diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f4427939ca82bcb59a2d979a135f36fcc50e8c66
--- /dev/null
+++ b/README.md
@@ -0,0 +1,63 @@
+---
+tags:
+- trl
+- sft
+- generated_from_trainer
+datasets:
+- generator
+model-index:
+- name: zephyr-2b-gemma-dft
+ results: []
+---
+
+
+
+[](https://zebra.wandb.io/cto/distillm/runs/kje9xugl)
+# zephyr-2b-gemma-dft
+
+This model was trained from scratch on the generator dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.0000
+
+## Model description
+
+More information needed
+
+## Intended uses & limitations
+
+More information needed
+
+## Training and evaluation data
+
+More information needed
+
+## Training procedure
+
+### Training hyperparameters
+
+The following hyperparameters were used during training:
+- learning_rate: 2e-05
+- train_batch_size: 8
+- eval_batch_size: 8
+- seed: 42
+- gradient_accumulation_steps: 4
+- total_train_batch_size: 32
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_ratio: 0.1
+- num_epochs: 1
+
+### Training results
+
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:------:|:----:|:---------------:|
+| 0.0 | 0.9982 | 270 | 0.0000 |
+
+
+### Framework versions
+
+- Transformers 4.42.4
+- Pytorch 2.3.1+cu121
+- Datasets 2.20.0
+- Tokenizers 0.19.1
diff --git a/all_results.json b/all_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..9617c3c3dd71f666bade349341271aa97164c552
--- /dev/null
+++ b/all_results.json
@@ -0,0 +1,9 @@
+{
+ "epoch": 0.9981515711645101,
+ "total_flos": 5.260333472022528e+16,
+ "train_loss": 1.8035727003330572e-05,
+ "train_runtime": 2689.0507,
+ "train_samples": 6750,
+ "train_samples_per_second": 3.217,
+ "train_steps_per_second": 0.1
+}
\ No newline at end of file
diff --git a/config.json b/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..84f1d7c33bb5754c7e336f67c5e4b38e9d1f3010
--- /dev/null
+++ b/config.json
@@ -0,0 +1,29 @@
+{
+ "_name_or_path": "data/gemma-2b",
+ "architectures": [
+ "GemmaForCausalLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "bos_token_id": 2,
+ "eos_token_id": 1,
+ "head_dim": 256,
+ "hidden_act": "gelu",
+ "hidden_activation": "gelu_pytorch_tanh",
+ "hidden_size": 2048,
+ "initializer_range": 0.02,
+ "intermediate_size": 16384,
+ "max_position_embeddings": 8192,
+ "model_type": "gemma",
+ "num_attention_heads": 8,
+ "num_hidden_layers": 18,
+ "num_key_value_heads": 1,
+ "pad_token_id": 0,
+ "rms_norm_eps": 1e-06,
+ "rope_scaling": null,
+ "rope_theta": 10000.0,
+ "torch_dtype": "bfloat16",
+ "transformers_version": "4.42.4",
+ "use_cache": false,
+ "vocab_size": 256000
+}
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..c20913bfa6d3576264545acb67eae5f4818d0d32
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,7 @@
+{
+ "_from_model_config": true,
+ "bos_token_id": 2,
+ "eos_token_id": 1,
+ "pad_token_id": 0,
+ "transformers_version": "4.42.4"
+}
diff --git a/model-00001-of-00002.safetensors b/model-00001-of-00002.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4abfbd0b78a619f71fbd892f42a9006f31675957
--- /dev/null
+++ b/model-00001-of-00002.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:166c2cc2ac8f29542b11f7812cc6cdded64345b787b9b74e50c194790584c068
+size 4945242264
diff --git a/model-00002-of-00002.safetensors b/model-00002-of-00002.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7a5f53bf0e00f73dcc86f33b73a82782c05f53e2
--- /dev/null
+++ b/model-00002-of-00002.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:403d33ecb881a9ae12700a261993eb4f1ad36a361fbb628ffaa31bf3ff3456e1
+size 67121608
diff --git a/model.safetensors.index.json b/model.safetensors.index.json
new file mode 100644
index 0000000000000000000000000000000000000000..b8fcee94bedd135b67735027dc217936f7444ab2
--- /dev/null
+++ b/model.safetensors.index.json
@@ -0,0 +1,171 @@
+{
+ "metadata": {
+ "total_size": 5012344832
+ },
+ "weight_map": {
+ "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.norm.weight": "model-00002-of-00002.safetensors"
+ }
+}
diff --git a/runs/Jul16_18-12-23_ale-distillm-4-0-0/events.out.tfevents.1721149966.ale-distillm-4-0-0.7658.0 b/runs/Jul16_18-12-23_ale-distillm-4-0-0/events.out.tfevents.1721149966.ale-distillm-4-0-0.7658.0
new file mode 100644
index 0000000000000000000000000000000000000000..80fdc97cd5df7132437bc7b2d958be1bffe3bff4
--- /dev/null
+++ b/runs/Jul16_18-12-23_ale-distillm-4-0-0/events.out.tfevents.1721149966.ale-distillm-4-0-0.7658.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3fd7babffa0e9fdf4182b38c8071b38d38e57db672c81819305ab7b2c25308b
+size 5055
diff --git a/runs/Jul16_18-20-11_ale-distillm-4-0-0/events.out.tfevents.1721150424.ale-distillm-4-0-0.7949.0 b/runs/Jul16_18-20-11_ale-distillm-4-0-0/events.out.tfevents.1721150424.ale-distillm-4-0-0.7949.0
new file mode 100644
index 0000000000000000000000000000000000000000..0770ba0990cc1dbc383b923ee8f79db331cc0393
--- /dev/null
+++ b/runs/Jul16_18-20-11_ale-distillm-4-0-0/events.out.tfevents.1721150424.ale-distillm-4-0-0.7949.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8dea8980e272d221ff59bdececffe023d523e0f5c7ea68a8138516094410725c
+size 5055
diff --git a/runs/Jul16_18-22-23_ale-distillm-4-0-0/events.out.tfevents.1721150560.ale-distillm-4-0-0.8244.0 b/runs/Jul16_18-22-23_ale-distillm-4-0-0/events.out.tfevents.1721150560.ale-distillm-4-0-0.8244.0
new file mode 100644
index 0000000000000000000000000000000000000000..45ec30b40f9916e55ad0cca409508d23db4222d1
--- /dev/null
+++ b/runs/Jul16_18-22-23_ale-distillm-4-0-0/events.out.tfevents.1721150560.ale-distillm-4-0-0.8244.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8ec26a1e5af465720c232ff6afd65bcf34cb3b4236f6949ac2c779afd12ca9e1
+size 5055
diff --git a/runs/Jul17_11-03-52_ale-distillm-4-0-0/events.out.tfevents.1721210651.ale-distillm-4-0-0.8479.0 b/runs/Jul17_11-03-52_ale-distillm-4-0-0/events.out.tfevents.1721210651.ale-distillm-4-0-0.8479.0
new file mode 100644
index 0000000000000000000000000000000000000000..55413cb50e0feaeb65b37a51a2e9612f6511e4de
--- /dev/null
+++ b/runs/Jul17_11-03-52_ale-distillm-4-0-0/events.out.tfevents.1721210651.ale-distillm-4-0-0.8479.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ab42917c254853050095e429ea9fa59b50da47494cc10f674f45746b5af02157
+size 5055
diff --git a/runs/Jul17_11-04-55_ale-distillm-4-0-0/events.out.tfevents.1721210714.ale-distillm-4-0-0.8662.0 b/runs/Jul17_11-04-55_ale-distillm-4-0-0/events.out.tfevents.1721210714.ale-distillm-4-0-0.8662.0
new file mode 100644
index 0000000000000000000000000000000000000000..ae79f04bc1e355994b0ac429dd1a8f92f261bc17
--- /dev/null
+++ b/runs/Jul17_11-04-55_ale-distillm-4-0-0/events.out.tfevents.1721210714.ale-distillm-4-0-0.8662.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dd05f663af210ef2de49565416459d12abe15960a1d0ca0ef8e93f9a27e60667
+size 5055
diff --git a/runs/Jul17_11-27-53_ale-distillm-4-0-0/events.out.tfevents.1721212093.ale-distillm-4-0-0.9186.0 b/runs/Jul17_11-27-53_ale-distillm-4-0-0/events.out.tfevents.1721212093.ale-distillm-4-0-0.9186.0
new file mode 100644
index 0000000000000000000000000000000000000000..c6bb15d281117e5672e252fe5b78da1125683ca8
--- /dev/null
+++ b/runs/Jul17_11-27-53_ale-distillm-4-0-0/events.out.tfevents.1721212093.ale-distillm-4-0-0.9186.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7092880334181484ee43a723e7d22e66b6de34a8cbe72a7dcf9d52af122ab904
+size 5055
diff --git a/runs/Jul17_11-28-42_ale-distillm-4-0-0/events.out.tfevents.1721212137.ale-distillm-4-0-0.9386.0 b/runs/Jul17_11-28-42_ale-distillm-4-0-0/events.out.tfevents.1721212137.ale-distillm-4-0-0.9386.0
new file mode 100644
index 0000000000000000000000000000000000000000..c58150989f0a9e2251431c2fa745daf30afbf373
--- /dev/null
+++ b/runs/Jul17_11-28-42_ale-distillm-4-0-0/events.out.tfevents.1721212137.ale-distillm-4-0-0.9386.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10974222cd7e984ba0a009524eb7a3965f59d8f6208cbedeabbac4ee4fa19ea8
+size 5055
diff --git a/runs/Jul17_11-36-32_ale-distillm-4-0-0/events.out.tfevents.1721212618.ale-distillm-4-0-0.10015.0 b/runs/Jul17_11-36-32_ale-distillm-4-0-0/events.out.tfevents.1721212618.ale-distillm-4-0-0.10015.0
new file mode 100644
index 0000000000000000000000000000000000000000..f62030103beefa3ba353b08df2107d93c579801a
--- /dev/null
+++ b/runs/Jul17_11-36-32_ale-distillm-4-0-0/events.out.tfevents.1721212618.ale-distillm-4-0-0.10015.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:62fdc7be20c86a69d309ce6af7a7c50639a02b8e7de9d509ac308c0b23e82404
+size 5087
diff --git a/runs/Jul17_12-10-33_ale-distillm-4-0-0/events.out.tfevents.1721214649.ale-distillm-4-0-0.10368.0 b/runs/Jul17_12-10-33_ale-distillm-4-0-0/events.out.tfevents.1721214649.ale-distillm-4-0-0.10368.0
new file mode 100644
index 0000000000000000000000000000000000000000..f3b4b854a0558c3618e3866b4a2bc99ad511ea69
--- /dev/null
+++ b/runs/Jul17_12-10-33_ale-distillm-4-0-0/events.out.tfevents.1721214649.ale-distillm-4-0-0.10368.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d504a55c4df859f7b471970c71509272d6e3c76a47e81f8c2827d94fe569a30
+size 5087
diff --git a/runs/Jul17_12-21-11_ale-distillm-4-0-0/events.out.tfevents.1721215291.ale-distillm-4-0-0.10647.0 b/runs/Jul17_12-21-11_ale-distillm-4-0-0/events.out.tfevents.1721215291.ale-distillm-4-0-0.10647.0
new file mode 100644
index 0000000000000000000000000000000000000000..add49c2fd64d39a6146367cb550d01f1ad77eecf
--- /dev/null
+++ b/runs/Jul17_12-21-11_ale-distillm-4-0-0/events.out.tfevents.1721215291.ale-distillm-4-0-0.10647.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de0fd73ff50ab253c90f3faea2b74cca9edbd46576b1426bce9795a85bf1f192
+size 5087
diff --git a/runs/Jul17_12-24-53_ale-distillm-4-0-0/events.out.tfevents.1721215508.ale-distillm-4-0-0.10860.0 b/runs/Jul17_12-24-53_ale-distillm-4-0-0/events.out.tfevents.1721215508.ale-distillm-4-0-0.10860.0
new file mode 100644
index 0000000000000000000000000000000000000000..fcb235eeca110c083f8876575138757256806e2c
--- /dev/null
+++ b/runs/Jul17_12-24-53_ale-distillm-4-0-0/events.out.tfevents.1721215508.ale-distillm-4-0-0.10860.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d89929bcc3fd481899f8559e297cf7bb0cdfc326d73f26b2f7e3effe48cab54a
+size 5087
diff --git a/runs/Jul17_12-34-51_ale-distillm-4-0-0/events.out.tfevents.1721216106.ale-distillm-4-0-0.11082.0 b/runs/Jul17_12-34-51_ale-distillm-4-0-0/events.out.tfevents.1721216106.ale-distillm-4-0-0.11082.0
new file mode 100644
index 0000000000000000000000000000000000000000..fd79166f2ad2ed724909f4400c4fce279bef7358
--- /dev/null
+++ b/runs/Jul17_12-34-51_ale-distillm-4-0-0/events.out.tfevents.1721216106.ale-distillm-4-0-0.11082.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e331c25188f28d5bc3abbb566730a28f1f2e55951a6977142f82c12a8d02eba
+size 5087
diff --git a/runs/Jul17_12-49-34_ale-distillm-4-0-0/events.out.tfevents.1721216989.ale-distillm-4-0-0.11354.0 b/runs/Jul17_12-49-34_ale-distillm-4-0-0/events.out.tfevents.1721216989.ale-distillm-4-0-0.11354.0
new file mode 100644
index 0000000000000000000000000000000000000000..3f937129632a7e52161f1f3bd4c7cbcecfe0115f
--- /dev/null
+++ b/runs/Jul17_12-49-34_ale-distillm-4-0-0/events.out.tfevents.1721216989.ale-distillm-4-0-0.11354.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e6ca33ae652b061367a63f8f1b6fc552bade2c1b46865945c26ab75da85d45c
+size 5087
diff --git a/runs/Jul17_13-09-32_ale-distillm-4-0-0/events.out.tfevents.1721218187.ale-distillm-4-0-0.11829.0 b/runs/Jul17_13-09-32_ale-distillm-4-0-0/events.out.tfevents.1721218187.ale-distillm-4-0-0.11829.0
new file mode 100644
index 0000000000000000000000000000000000000000..9d918f93f2af648a9043373c421d975092f2e753
--- /dev/null
+++ b/runs/Jul17_13-09-32_ale-distillm-4-0-0/events.out.tfevents.1721218187.ale-distillm-4-0-0.11829.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d5066ae9eb68702efa52f40e4f1da54ac877df37ee04c6e08a1b3221e51f4b0
+size 5087
diff --git a/runs/Jul17_13-12-55_ale-distillm-4-0-0/events.out.tfevents.1721218390.ale-distillm-4-0-0.12123.0 b/runs/Jul17_13-12-55_ale-distillm-4-0-0/events.out.tfevents.1721218390.ale-distillm-4-0-0.12123.0
new file mode 100644
index 0000000000000000000000000000000000000000..5db1d4e090ed0bcf9f2015dd27f0c635d60a4b6c
--- /dev/null
+++ b/runs/Jul17_13-12-55_ale-distillm-4-0-0/events.out.tfevents.1721218390.ale-distillm-4-0-0.12123.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfec3c1845089a56f9c4052ac42d774681dd9de81d94f1c3a011a19b4411385b
+size 5087
diff --git a/runs/Jul17_13-13-43_ale-distillm-4-0-0/events.out.tfevents.1721218443.ale-distillm-4-0-0.12398.0 b/runs/Jul17_13-13-43_ale-distillm-4-0-0/events.out.tfevents.1721218443.ale-distillm-4-0-0.12398.0
new file mode 100644
index 0000000000000000000000000000000000000000..be4085d112f0ecf1f0684e268413b33a897b8d5e
--- /dev/null
+++ b/runs/Jul17_13-13-43_ale-distillm-4-0-0/events.out.tfevents.1721218443.ale-distillm-4-0-0.12398.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a02770839a18eafe9c4ccd86088dfe776131f259dc5cefe54cafdf6e2af6a80
+size 5087
diff --git a/runs/Jul17_13-15-39_ale-distillm-4-0-0/events.out.tfevents.1721218553.ale-distillm-4-0-0.12607.0 b/runs/Jul17_13-15-39_ale-distillm-4-0-0/events.out.tfevents.1721218553.ale-distillm-4-0-0.12607.0
new file mode 100644
index 0000000000000000000000000000000000000000..ec94d6318e28830b7471eae04d9a645583940ace
--- /dev/null
+++ b/runs/Jul17_13-15-39_ale-distillm-4-0-0/events.out.tfevents.1721218553.ale-distillm-4-0-0.12607.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82b3eab5c1ae70676d9c87154ba38f758706702098852d920488668a4b087883
+size 5087
diff --git a/runs/Jul17_14-00-44_ale-distillm-4-0-0/events.out.tfevents.1721221259.ale-distillm-4-0-0.12881.0 b/runs/Jul17_14-00-44_ale-distillm-4-0-0/events.out.tfevents.1721221259.ale-distillm-4-0-0.12881.0
new file mode 100644
index 0000000000000000000000000000000000000000..fda9fe34b34170253611ab3ca1d7a18f6c7f3111
--- /dev/null
+++ b/runs/Jul17_14-00-44_ale-distillm-4-0-0/events.out.tfevents.1721221259.ale-distillm-4-0-0.12881.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e75cf2449c6b5e5f670f83bedafe082f8af03fabe57bb4a87069cf184d66e24
+size 5086
diff --git a/runs/Jul17_14-03-37_ale-distillm-4-0-0/events.out.tfevents.1721221432.ale-distillm-4-0-0.13176.0 b/runs/Jul17_14-03-37_ale-distillm-4-0-0/events.out.tfevents.1721221432.ale-distillm-4-0-0.13176.0
new file mode 100644
index 0000000000000000000000000000000000000000..c90d3f149c101676697eaa7779455de6517360ed
--- /dev/null
+++ b/runs/Jul17_14-03-37_ale-distillm-4-0-0/events.out.tfevents.1721221432.ale-distillm-4-0-0.13176.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b62346c4be5f816886cd8150e9b5cea91e925ef1fe0b69e429afac7f246bd9d8
+size 5293
diff --git a/runs/Jul17_14-04-49_ale-distillm-4-0-0/events.out.tfevents.1721221504.ale-distillm-4-0-0.13609.0 b/runs/Jul17_14-04-49_ale-distillm-4-0-0/events.out.tfevents.1721221504.ale-distillm-4-0-0.13609.0
new file mode 100644
index 0000000000000000000000000000000000000000..25c730b996a745e68928f5697c2309d058b1c409
--- /dev/null
+++ b/runs/Jul17_14-04-49_ale-distillm-4-0-0/events.out.tfevents.1721221504.ale-distillm-4-0-0.13609.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d0752a22e6ccc4555c0088bba4b252394e7d820fa8705a44de664a8976d91b0
+size 10683
diff --git a/runs/Jul17_14-34-01_ale-distillm-8-0-0/events.out.tfevents.1721223728.ale-distillm-8-0-0.2624.0 b/runs/Jul17_14-34-01_ale-distillm-8-0-0/events.out.tfevents.1721223728.ale-distillm-8-0-0.2624.0
new file mode 100644
index 0000000000000000000000000000000000000000..d6ebe26938d14a408fa8f03c66ef73bdc4b3a816
--- /dev/null
+++ b/runs/Jul17_14-34-01_ale-distillm-8-0-0/events.out.tfevents.1721223728.ale-distillm-8-0-0.2624.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7df9453a58a3ca60873c86a217f0bae83aa115c813e28a46964b8ba7e71c2017
+size 5086
diff --git a/runs/Jul17_15-05-14_ale-distillm-8-0-0/events.out.tfevents.1721225135.ale-distillm-8-0-0.3160.0 b/runs/Jul17_15-05-14_ale-distillm-8-0-0/events.out.tfevents.1721225135.ale-distillm-8-0-0.3160.0
new file mode 100644
index 0000000000000000000000000000000000000000..5c72f15bd3ce1aa3d160e2bf8bd5f6f7e3a8b237
--- /dev/null
+++ b/runs/Jul17_15-05-14_ale-distillm-8-0-0/events.out.tfevents.1721225135.ale-distillm-8-0-0.3160.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f4878ae53ad6ee4ba5b16b14ec9d342a6fbbad39fd2128a4f0ba2ecbde8fe61
+size 5086
diff --git a/runs/Jul17_15-07-31_ale-distillm-8-0-0/events.out.tfevents.1721225298.ale-distillm-8-0-0.3493.0 b/runs/Jul17_15-07-31_ale-distillm-8-0-0/events.out.tfevents.1721225298.ale-distillm-8-0-0.3493.0
new file mode 100644
index 0000000000000000000000000000000000000000..a7a97bb726c18f314e996773b0fdec38004b2e34
--- /dev/null
+++ b/runs/Jul17_15-07-31_ale-distillm-8-0-0/events.out.tfevents.1721225298.ale-distillm-8-0-0.3493.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a00ba0003f0a4cde97006a9e21f770f96592121961afb70d388006836c199d7
+size 5055
diff --git a/runs/Jul17_15-09-08_ale-distillm-8-0-0/events.out.tfevents.1721225375.ale-distillm-8-0-0.3899.0 b/runs/Jul17_15-09-08_ale-distillm-8-0-0/events.out.tfevents.1721225375.ale-distillm-8-0-0.3899.0
new file mode 100644
index 0000000000000000000000000000000000000000..a6541945b5b237e8641467f13e0c43246db15816
--- /dev/null
+++ b/runs/Jul17_15-09-08_ale-distillm-8-0-0/events.out.tfevents.1721225375.ale-distillm-8-0-0.3899.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6bdd1f0813c8b60d1f4b34ce47b8ea4e5c7d1b43860e78c077825e7ea9cf4317
+size 5086
diff --git a/runs/Jul17_15-12-13_ale-distillm-8-0-0/events.out.tfevents.1721225558.ale-distillm-8-0-0.4304.0 b/runs/Jul17_15-12-13_ale-distillm-8-0-0/events.out.tfevents.1721225558.ale-distillm-8-0-0.4304.0
new file mode 100644
index 0000000000000000000000000000000000000000..9f7962fe3da8f3b8d8356cad20be78552156fed8
--- /dev/null
+++ b/runs/Jul17_15-12-13_ale-distillm-8-0-0/events.out.tfevents.1721225558.ale-distillm-8-0-0.4304.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a0ca0aaee6a6430d4f9b1066718e40063917f12e389ef25a4abafdbeb985755
+size 5086
diff --git a/runs/Jul17_15-33-25_ale-distillm-8-0-0/events.out.tfevents.1721226819.ale-distillm-8-0-0.5027.0 b/runs/Jul17_15-33-25_ale-distillm-8-0-0/events.out.tfevents.1721226819.ale-distillm-8-0-0.5027.0
new file mode 100644
index 0000000000000000000000000000000000000000..b8e35db5a67d86e834c6651d2495c86f89c8a701
--- /dev/null
+++ b/runs/Jul17_15-33-25_ale-distillm-8-0-0/events.out.tfevents.1721226819.ale-distillm-8-0-0.5027.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8070ed496d5878317b955ed8a5d1816159305f6bbe985116856e738db1bc7213
+size 5087
diff --git a/runs/Jul17_15-37-19_ale-distillm-8-0-0/events.out.tfevents.1721227053.ale-distillm-8-0-0.5333.0 b/runs/Jul17_15-37-19_ale-distillm-8-0-0/events.out.tfevents.1721227053.ale-distillm-8-0-0.5333.0
new file mode 100644
index 0000000000000000000000000000000000000000..757dc9a2a5c722de254eb6168ef4945baf364f64
--- /dev/null
+++ b/runs/Jul17_15-37-19_ale-distillm-8-0-0/events.out.tfevents.1721227053.ale-distillm-8-0-0.5333.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f33fb6cda0b4825a7ea029b5d0605168b9d5953d56bbdc777e0328e70df2b15
+size 5087
diff --git a/runs/Jul17_15-40-39_ale-distillm-8-0-0/events.out.tfevents.1721227253.ale-distillm-8-0-0.5662.0 b/runs/Jul17_15-40-39_ale-distillm-8-0-0/events.out.tfevents.1721227253.ale-distillm-8-0-0.5662.0
new file mode 100644
index 0000000000000000000000000000000000000000..cd691554cc3df8554e58379743ab8ffccb14e9ef
--- /dev/null
+++ b/runs/Jul17_15-40-39_ale-distillm-8-0-0/events.out.tfevents.1721227253.ale-distillm-8-0-0.5662.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cac9787042f38c64ac3b2e1613e873a2742f4a51ff3d01ce6f6d46c3d3d9fb75
+size 5086
diff --git a/runs/Jul18_08-21-26_ale-distillm-8-0-0/events.out.tfevents.1721287323.ale-distillm-8-0-0.6210.0 b/runs/Jul18_08-21-26_ale-distillm-8-0-0/events.out.tfevents.1721287323.ale-distillm-8-0-0.6210.0
new file mode 100644
index 0000000000000000000000000000000000000000..8a70b42df3a53a9be2b9b7351e03f8b752016ff3
--- /dev/null
+++ b/runs/Jul18_08-21-26_ale-distillm-8-0-0/events.out.tfevents.1721287323.ale-distillm-8-0-0.6210.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6cc02a9efbe2cf2bfcf8b86f7413ed2343329ef3b2e8ef5a0a5ad12a5229fd7
+size 5086
diff --git a/runs/Jul18_08-31-58_ale-distillm-8-0-0/events.out.tfevents.1721287933.ale-distillm-8-0-0.6609.0 b/runs/Jul18_08-31-58_ale-distillm-8-0-0/events.out.tfevents.1721287933.ale-distillm-8-0-0.6609.0
new file mode 100644
index 0000000000000000000000000000000000000000..e4d7aca01457ff6a0189ae421b53eb33f222c379
--- /dev/null
+++ b/runs/Jul18_08-31-58_ale-distillm-8-0-0/events.out.tfevents.1721287933.ale-distillm-8-0-0.6609.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7f94b89bc6517903a538b4226559f1c7b3514d65825523c35b092168e38fc933
+size 5086
diff --git a/runs/Jul18_08-40-57_ale-distillm-8-0-0/events.out.tfevents.1721288501.ale-distillm-8-0-0.6938.0 b/runs/Jul18_08-40-57_ale-distillm-8-0-0/events.out.tfevents.1721288501.ale-distillm-8-0-0.6938.0
new file mode 100644
index 0000000000000000000000000000000000000000..0bf375d965b6261c291dd261641b9cff2aea67f2
--- /dev/null
+++ b/runs/Jul18_08-40-57_ale-distillm-8-0-0/events.out.tfevents.1721288501.ale-distillm-8-0-0.6938.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:181ebcd6a237e3dc441423c263d92f59515b9fe360230c3975ea5b4884bf3968
+size 5087
diff --git a/runs/Jul18_08-43-48_ale-distillm-8-0-0/events.out.tfevents.1721288655.ale-distillm-8-0-0.7340.0 b/runs/Jul18_08-43-48_ale-distillm-8-0-0/events.out.tfevents.1721288655.ale-distillm-8-0-0.7340.0
new file mode 100644
index 0000000000000000000000000000000000000000..f66ccb5aaaf9bcff5d452722bec039f8d62637f9
--- /dev/null
+++ b/runs/Jul18_08-43-48_ale-distillm-8-0-0/events.out.tfevents.1721288655.ale-distillm-8-0-0.7340.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:92b329679593a913b57ebbd352b51290c373d722856a81414310179ddb390e31
+size 5087
diff --git a/runs/Jul18_08-46-16_ale-distillm-8-0-0/events.out.tfevents.1721288790.ale-distillm-8-0-0.7769.0 b/runs/Jul18_08-46-16_ale-distillm-8-0-0/events.out.tfevents.1721288790.ale-distillm-8-0-0.7769.0
new file mode 100644
index 0000000000000000000000000000000000000000..cb22f96a9c62d404d54922dea0ddf93de59f788f
--- /dev/null
+++ b/runs/Jul18_08-46-16_ale-distillm-8-0-0/events.out.tfevents.1721288790.ale-distillm-8-0-0.7769.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:613b4ea765180d4441669aec54fc86fdaf2ec2c47e4d93c4087d06a81851c06a
+size 5087
diff --git a/runs/Jul18_09-00-59_ale-distillm-8-0-0/events.out.tfevents.1721289676.ale-distillm-8-0-0.8302.0 b/runs/Jul18_09-00-59_ale-distillm-8-0-0/events.out.tfevents.1721289676.ale-distillm-8-0-0.8302.0
new file mode 100644
index 0000000000000000000000000000000000000000..19f2c078b516751637e2491a763f82907c0912dd
--- /dev/null
+++ b/runs/Jul18_09-00-59_ale-distillm-8-0-0/events.out.tfevents.1721289676.ale-distillm-8-0-0.8302.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:54270265a73720dc8cfccae09b7f417ddf47f864ed882690595f45c97f7c99e7
+size 5087
diff --git a/runs/Jul18_09-05-16_ale-distillm-8-0-0/events.out.tfevents.1721289930.ale-distillm-8-0-0.8653.0 b/runs/Jul18_09-05-16_ale-distillm-8-0-0/events.out.tfevents.1721289930.ale-distillm-8-0-0.8653.0
new file mode 100644
index 0000000000000000000000000000000000000000..8612d58bc5523913b18f8180f50c88287da03dc1
--- /dev/null
+++ b/runs/Jul18_09-05-16_ale-distillm-8-0-0/events.out.tfevents.1721289930.ale-distillm-8-0-0.8653.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f27c23d77905906192737e0a47e380d3e8696e8cac96a8195e191aa8985a3be
+size 5087
diff --git a/runs/Jul18_09-10-17_ale-distillm-8-0-0/events.out.tfevents.1721290244.ale-distillm-8-0-0.8992.0 b/runs/Jul18_09-10-17_ale-distillm-8-0-0/events.out.tfevents.1721290244.ale-distillm-8-0-0.8992.0
new file mode 100644
index 0000000000000000000000000000000000000000..fbf0f536f26bac54bbe9d1bf06a3cbf4f1a57999
--- /dev/null
+++ b/runs/Jul18_09-10-17_ale-distillm-8-0-0/events.out.tfevents.1721290244.ale-distillm-8-0-0.8992.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6d4da68d9d2699e78cb8e1ed236a71d9a17fe9961352b90c0c297887b31cda8
+size 5087
diff --git a/runs/Jul18_09-31-51_ale-distillm-8-0-0/events.out.tfevents.1721291539.ale-distillm-8-0-0.9393.0 b/runs/Jul18_09-31-51_ale-distillm-8-0-0/events.out.tfevents.1721291539.ale-distillm-8-0-0.9393.0
new file mode 100644
index 0000000000000000000000000000000000000000..5931610635d0107ec289a594db45b17cfe329652
--- /dev/null
+++ b/runs/Jul18_09-31-51_ale-distillm-8-0-0/events.out.tfevents.1721291539.ale-distillm-8-0-0.9393.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6bbb367b444b118e93749b3754682a00b7a4f939f4cae26dfe31092003da1458
+size 5087
diff --git a/runs/Jul18_10-30-23_ale-distillm-8-0-0/events.out.tfevents.1721295036.ale-distillm-8-0-0.9995.0 b/runs/Jul18_10-30-23_ale-distillm-8-0-0/events.out.tfevents.1721295036.ale-distillm-8-0-0.9995.0
new file mode 100644
index 0000000000000000000000000000000000000000..00438b741518f8d796198f670f90c1037b6ebb59
--- /dev/null
+++ b/runs/Jul18_10-30-23_ale-distillm-8-0-0/events.out.tfevents.1721295036.ale-distillm-8-0-0.9995.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64ab4b2fafc194e75ea4b22c184da37e276a5dfc0f455985cf5b11bd30fc3e42
+size 5087
diff --git a/runs/Jul18_10-32-15_ale-distillm-8-0-0/events.out.tfevents.1721295149.ale-distillm-8-0-0.10329.0 b/runs/Jul18_10-32-15_ale-distillm-8-0-0/events.out.tfevents.1721295149.ale-distillm-8-0-0.10329.0
new file mode 100644
index 0000000000000000000000000000000000000000..f7c723f2d71fe960b3219e46df585b6a6ac15f27
--- /dev/null
+++ b/runs/Jul18_10-32-15_ale-distillm-8-0-0/events.out.tfevents.1721295149.ale-distillm-8-0-0.10329.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e07330b91d46f77e9e070fbc84a9935b72c420fe737aa7230e5520a95874354
+size 5087
diff --git a/runs/Jul18_10-33-27_ale-distillm-8-0-0/events.out.tfevents.1721295221.ale-distillm-8-0-0.10663.0 b/runs/Jul18_10-33-27_ale-distillm-8-0-0/events.out.tfevents.1721295221.ale-distillm-8-0-0.10663.0
new file mode 100644
index 0000000000000000000000000000000000000000..e558e5b64bb6fd7eeb2f0b3c48b33f26db3d0a0f
--- /dev/null
+++ b/runs/Jul18_10-33-27_ale-distillm-8-0-0/events.out.tfevents.1721295221.ale-distillm-8-0-0.10663.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f817bea72f99381b3353b349c888a22643e26c8da63fab5aea868192bfd4a84b
+size 5086
diff --git a/runs/Jul18_10-50-46_ale-distillm-8-0-0/events.out.tfevents.1721296272.ale-distillm-8-0-0.11268.0 b/runs/Jul18_10-50-46_ale-distillm-8-0-0/events.out.tfevents.1721296272.ale-distillm-8-0-0.11268.0
new file mode 100644
index 0000000000000000000000000000000000000000..b2f3d7cac326fdc367229f601aaf3f2edeb04e48
--- /dev/null
+++ b/runs/Jul18_10-50-46_ale-distillm-8-0-0/events.out.tfevents.1721296272.ale-distillm-8-0-0.11268.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c32a5d43ca36b20dba38d667c2bc977e48f4f07b7435862ddfba35334bd24574
+size 5087
diff --git a/runs/Jul18_10-51-50_ale-distillm-8-0-0/events.out.tfevents.1721296327.ale-distillm-8-0-0.11641.0 b/runs/Jul18_10-51-50_ale-distillm-8-0-0/events.out.tfevents.1721296327.ale-distillm-8-0-0.11641.0
new file mode 100644
index 0000000000000000000000000000000000000000..009462036f601e9c0a7ce663322e4366870fc4db
--- /dev/null
+++ b/runs/Jul18_10-51-50_ale-distillm-8-0-0/events.out.tfevents.1721296327.ale-distillm-8-0-0.11641.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b213f9e2452b5046a74bdf9a7f70307de998700b372bc1dc810065f5b2592b30
+size 5087
diff --git a/runs/Jul18_10-53-15_ale-distillm-8-0-0/events.out.tfevents.1721296411.ale-distillm-8-0-0.11938.0 b/runs/Jul18_10-53-15_ale-distillm-8-0-0/events.out.tfevents.1721296411.ale-distillm-8-0-0.11938.0
new file mode 100644
index 0000000000000000000000000000000000000000..6889e8bafd6f2a5a6f22a3927adf22ccae530814
--- /dev/null
+++ b/runs/Jul18_10-53-15_ale-distillm-8-0-0/events.out.tfevents.1721296411.ale-distillm-8-0-0.11938.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a269ac728419edd9c0e60c2638e07b6913f57d195aaea804dd4d6f57811581a
+size 5087
diff --git a/runs/Jul18_10-54-14_ale-distillm-8-0-0/events.out.tfevents.1721296481.ale-distillm-8-0-0.12233.0 b/runs/Jul18_10-54-14_ale-distillm-8-0-0/events.out.tfevents.1721296481.ale-distillm-8-0-0.12233.0
new file mode 100644
index 0000000000000000000000000000000000000000..1ed28b9566d3dc89fbaa8e803ce37e01aa355443
--- /dev/null
+++ b/runs/Jul18_10-54-14_ale-distillm-8-0-0/events.out.tfevents.1721296481.ale-distillm-8-0-0.12233.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b2ee3abf24bb7a8fc0f23857ec7545e12432a28750cb08fe0707bd5d4f61fd36
+size 5087
diff --git a/runs/Jul19_10-10-27_ale-distillm-8-0-0/events.out.tfevents.1721380242.ale-distillm-8-0-0.15591.0 b/runs/Jul19_10-10-27_ale-distillm-8-0-0/events.out.tfevents.1721380242.ale-distillm-8-0-0.15591.0
new file mode 100644
index 0000000000000000000000000000000000000000..5ce49f3f488a8df4afa450b757b4e4b7fb7bc696
--- /dev/null
+++ b/runs/Jul19_10-10-27_ale-distillm-8-0-0/events.out.tfevents.1721380242.ale-distillm-8-0-0.15591.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d10a2bbbfc4759cfbb2ae08080697fc743796fa13e1d1b3a717ff6fcb0a0c4de
+size 5087
diff --git a/runs/Jul19_10-13-58_ale-distillm-8-0-0/events.out.tfevents.1721380452.ale-distillm-8-0-0.15929.0 b/runs/Jul19_10-13-58_ale-distillm-8-0-0/events.out.tfevents.1721380452.ale-distillm-8-0-0.15929.0
new file mode 100644
index 0000000000000000000000000000000000000000..4c18b1f3149f938ae23fc51d15f53b5a0a90a614
--- /dev/null
+++ b/runs/Jul19_10-13-58_ale-distillm-8-0-0/events.out.tfevents.1721380452.ale-distillm-8-0-0.15929.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:700ac27894268518bb81c208180497b18ea24e13f4c46fb32d2be0575d991c63
+size 5087
diff --git a/runs/Jul19_10-16-02_ale-distillm-8-0-0/events.out.tfevents.1721380576.ale-distillm-8-0-0.16239.0 b/runs/Jul19_10-16-02_ale-distillm-8-0-0/events.out.tfevents.1721380576.ale-distillm-8-0-0.16239.0
new file mode 100644
index 0000000000000000000000000000000000000000..b94e8dfaae5b4d679baea0d08d2a163c0a4378ed
--- /dev/null
+++ b/runs/Jul19_10-16-02_ale-distillm-8-0-0/events.out.tfevents.1721380576.ale-distillm-8-0-0.16239.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b79e585fabd2e42be91abbd982beae3ddc330ff46e2f7c383548cfb88250e415
+size 5087
diff --git a/runs/Jul19_10-20-47_ale-distillm-8-0-0/events.out.tfevents.1721380862.ale-distillm-8-0-0.16558.0 b/runs/Jul19_10-20-47_ale-distillm-8-0-0/events.out.tfevents.1721380862.ale-distillm-8-0-0.16558.0
new file mode 100644
index 0000000000000000000000000000000000000000..2031738c35bc699666e8208d7a023df9cfd1ffdf
--- /dev/null
+++ b/runs/Jul19_10-20-47_ale-distillm-8-0-0/events.out.tfevents.1721380862.ale-distillm-8-0-0.16558.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a43a9bbb7673c5aba5a5b527749bd8529e5170247d20ca8fc8f6e3fe8364d23
+size 5087
diff --git a/runs/Jul19_10-33-24_ale-distillm-8-0-0/events.out.tfevents.1721381631.ale-distillm-8-0-0.16908.0 b/runs/Jul19_10-33-24_ale-distillm-8-0-0/events.out.tfevents.1721381631.ale-distillm-8-0-0.16908.0
new file mode 100644
index 0000000000000000000000000000000000000000..35f9e2a669f2e09accf2534c64dc51816fd9c4e4
--- /dev/null
+++ b/runs/Jul19_10-33-24_ale-distillm-8-0-0/events.out.tfevents.1721381631.ale-distillm-8-0-0.16908.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7232dbc6bc4f5372b79d952bbaf0136bbd43d5ae5e20c54cd9af1e9c10118984
+size 5549
diff --git a/runs/Jul19_10-42-11_ale-distillm-8-0-0/events.out.tfevents.1721382157.ale-distillm-8-0-0.17404.0 b/runs/Jul19_10-42-11_ale-distillm-8-0-0/events.out.tfevents.1721382157.ale-distillm-8-0-0.17404.0
new file mode 100644
index 0000000000000000000000000000000000000000..55d2c5e10d48a2f2418a212f5267ffcd29dfdbf0
--- /dev/null
+++ b/runs/Jul19_10-42-11_ale-distillm-8-0-0/events.out.tfevents.1721382157.ale-distillm-8-0-0.17404.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d678f0b48bb658aae1128bd0b77f8e25f6a510c18c782455db8a8a9c3dae5c4d
+size 5087
diff --git a/runs/Jul19_10-43-38_ale-distillm-8-0-0/events.out.tfevents.1721382232.ale-distillm-8-0-0.17772.0 b/runs/Jul19_10-43-38_ale-distillm-8-0-0/events.out.tfevents.1721382232.ale-distillm-8-0-0.17772.0
new file mode 100644
index 0000000000000000000000000000000000000000..4dd0ad319be67c804162b4b3bd8a931acda78ae9
--- /dev/null
+++ b/runs/Jul19_10-43-38_ale-distillm-8-0-0/events.out.tfevents.1721382232.ale-distillm-8-0-0.17772.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5f14961ca7d7a411fe05c9b87d95c17ef7ba60c7b405e094319451bced51baf
+size 5087
diff --git a/runs/Jul19_10-45-38_ale-distillm-8-0-0/events.out.tfevents.1721382353.ale-distillm-8-0-0.18083.0 b/runs/Jul19_10-45-38_ale-distillm-8-0-0/events.out.tfevents.1721382353.ale-distillm-8-0-0.18083.0
new file mode 100644
index 0000000000000000000000000000000000000000..1d768840398bb53bc99989c75a2ac5aa335ada4a
--- /dev/null
+++ b/runs/Jul19_10-45-38_ale-distillm-8-0-0/events.out.tfevents.1721382353.ale-distillm-8-0-0.18083.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c0c460fb6952717ac7596b9e5e747219e78f7169df84adba23c5d867b6489924
+size 5087
diff --git a/runs/Jul19_10-50-36_ale-distillm-8-0-0/events.out.tfevents.1721382650.ale-distillm-8-0-0.18394.0 b/runs/Jul19_10-50-36_ale-distillm-8-0-0/events.out.tfevents.1721382650.ale-distillm-8-0-0.18394.0
new file mode 100644
index 0000000000000000000000000000000000000000..45326b821ca6ee7a3ba5ddb193bc48e0436d495e
--- /dev/null
+++ b/runs/Jul19_10-50-36_ale-distillm-8-0-0/events.out.tfevents.1721382650.ale-distillm-8-0-0.18394.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7609e93f280481f97d89771504a82fa5bb9a178d85547dfb5dbc9b06020b193e
+size 5087
diff --git a/runs/Jul19_10-53-41_ale-distillm-8-0-0/events.out.tfevents.1721382836.ale-distillm-8-0-0.18705.0 b/runs/Jul19_10-53-41_ale-distillm-8-0-0/events.out.tfevents.1721382836.ale-distillm-8-0-0.18705.0
new file mode 100644
index 0000000000000000000000000000000000000000..8c63e29f1dc81c451278fce9537684016547c4a1
--- /dev/null
+++ b/runs/Jul19_10-53-41_ale-distillm-8-0-0/events.out.tfevents.1721382836.ale-distillm-8-0-0.18705.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae350c11643e140886352f006544b08b07dd66e53b7d043141ee606d3924ec85
+size 5087
diff --git a/runs/Jul19_10-55-10_ale-distillm-8-0-0/events.out.tfevents.1721382924.ale-distillm-8-0-0.18996.0 b/runs/Jul19_10-55-10_ale-distillm-8-0-0/events.out.tfevents.1721382924.ale-distillm-8-0-0.18996.0
new file mode 100644
index 0000000000000000000000000000000000000000..73a8938c29131cbce7c3aba95d2a0b0e64400ec8
--- /dev/null
+++ b/runs/Jul19_10-55-10_ale-distillm-8-0-0/events.out.tfevents.1721382924.ale-distillm-8-0-0.18996.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f19e69c2a79c465cf86a7f2a1e7c975e4081666b81988c700960fc69c7a8983
+size 5087
diff --git a/runs/Jul23_10-53-59_ale-distillm-8-0-0/events.out.tfevents.1721728470.ale-distillm-8-0-0.2319.0 b/runs/Jul23_10-53-59_ale-distillm-8-0-0/events.out.tfevents.1721728470.ale-distillm-8-0-0.2319.0
new file mode 100644
index 0000000000000000000000000000000000000000..7996f09196df651870384147746bbc3cb29cd632
--- /dev/null
+++ b/runs/Jul23_10-53-59_ale-distillm-8-0-0/events.out.tfevents.1721728470.ale-distillm-8-0-0.2319.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b0d43b2b46e72ed640fb19d05446eda031b2d4a78504e962cc01cf7d99e03b8f
+size 5087
diff --git a/runs/Jul23_10-55-28_ale-distillm-8-0-0/events.out.tfevents.1721728545.ale-distillm-8-0-0.2761.0 b/runs/Jul23_10-55-28_ale-distillm-8-0-0/events.out.tfevents.1721728545.ale-distillm-8-0-0.2761.0
new file mode 100644
index 0000000000000000000000000000000000000000..1d0b7bea1f4039fc2ae7e9ea600012a6edbe226e
--- /dev/null
+++ b/runs/Jul23_10-55-28_ale-distillm-8-0-0/events.out.tfevents.1721728545.ale-distillm-8-0-0.2761.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:224dbfd35060b8c31e67d1f4497ac050198ac286ae085f8ba966fe351cdb6e2c
+size 5087
diff --git a/runs/Jul23_10-57-38_ale-distillm-8-0-0/events.out.tfevents.1721728674.ale-distillm-8-0-0.3240.0 b/runs/Jul23_10-57-38_ale-distillm-8-0-0/events.out.tfevents.1721728674.ale-distillm-8-0-0.3240.0
new file mode 100644
index 0000000000000000000000000000000000000000..a450461b00a9c4bb18211b642213c8e2f5a2b432
--- /dev/null
+++ b/runs/Jul23_10-57-38_ale-distillm-8-0-0/events.out.tfevents.1721728674.ale-distillm-8-0-0.3240.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:af4db612723656fa5a7a750e9689eceb2118898836b884166215d0c1320f5889
+size 5087
diff --git a/runs/Jul23_10-59-23_ale-distillm-8-0-0/events.out.tfevents.1721728777.ale-distillm-8-0-0.3699.0 b/runs/Jul23_10-59-23_ale-distillm-8-0-0/events.out.tfevents.1721728777.ale-distillm-8-0-0.3699.0
new file mode 100644
index 0000000000000000000000000000000000000000..dcdbb499e82fe29888f5d24668e31365227c080b
--- /dev/null
+++ b/runs/Jul23_10-59-23_ale-distillm-8-0-0/events.out.tfevents.1721728777.ale-distillm-8-0-0.3699.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a401b1113f8f0cf4b49d45a8189aba41f93a44d61145b6a1113bbe05d2988f02
+size 5087
diff --git a/runs/Jul23_11-00-52_ale-distillm-8-0-0/events.out.tfevents.1721728868.ale-distillm-8-0-0.4046.0 b/runs/Jul23_11-00-52_ale-distillm-8-0-0/events.out.tfevents.1721728868.ale-distillm-8-0-0.4046.0
new file mode 100644
index 0000000000000000000000000000000000000000..bc782c07444dde6f738d883cd5cc68b27b8d55a8
--- /dev/null
+++ b/runs/Jul23_11-00-52_ale-distillm-8-0-0/events.out.tfevents.1721728868.ale-distillm-8-0-0.4046.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6c917a4c4bdb15242893d01b938af8d150ebcfb19b90d5be1e47e3aad850ed3d
+size 5087
diff --git a/runs/Jul23_11-01-54_ale-distillm-8-0-0/events.out.tfevents.1721728931.ale-distillm-8-0-0.4429.0 b/runs/Jul23_11-01-54_ale-distillm-8-0-0/events.out.tfevents.1721728931.ale-distillm-8-0-0.4429.0
new file mode 100644
index 0000000000000000000000000000000000000000..b3342646ca811e5238642a06ddf628c780e859e2
--- /dev/null
+++ b/runs/Jul23_11-01-54_ale-distillm-8-0-0/events.out.tfevents.1721728931.ale-distillm-8-0-0.4429.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b85b00c83e8de3480c0bdf7ed229515e6ca30d466a1a2f0edbf7fee69ab6f663
+size 5087
diff --git a/runs/Jul23_11-14-27_ale-distillm-8-0-0/events.out.tfevents.1721729685.ale-distillm-8-0-0.5879.0 b/runs/Jul23_11-14-27_ale-distillm-8-0-0/events.out.tfevents.1721729685.ale-distillm-8-0-0.5879.0
new file mode 100644
index 0000000000000000000000000000000000000000..3f2cc9fdc1a9db6d809316ebe8477d80bbb64998
--- /dev/null
+++ b/runs/Jul23_11-14-27_ale-distillm-8-0-0/events.out.tfevents.1721729685.ale-distillm-8-0-0.5879.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a36cde30b957e5c7d4c937f1e180f1d0425f994400180ea17b7cd91c8df28450
+size 5087
diff --git a/runs/Jul23_11-15-56_ale-distillm-8-0-0/events.out.tfevents.1721729773.ale-distillm-8-0-0.6343.0 b/runs/Jul23_11-15-56_ale-distillm-8-0-0/events.out.tfevents.1721729773.ale-distillm-8-0-0.6343.0
new file mode 100644
index 0000000000000000000000000000000000000000..79b37fd094a46e16ce7a670677dc8f2cdeac7c33
--- /dev/null
+++ b/runs/Jul23_11-15-56_ale-distillm-8-0-0/events.out.tfevents.1721729773.ale-distillm-8-0-0.6343.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:71d3358b42f22b8399b3b769f443ba8b807b683402fd4a64b0227c2550eb82d3
+size 5087
diff --git a/runs/Jul23_11-18-02_ale-distillm-8-0-0/events.out.tfevents.1721729897.ale-distillm-8-0-0.6864.0 b/runs/Jul23_11-18-02_ale-distillm-8-0-0/events.out.tfevents.1721729897.ale-distillm-8-0-0.6864.0
new file mode 100644
index 0000000000000000000000000000000000000000..64dc1dba0a3f3c68ebe2ad11aa6be45cd95b0f36
--- /dev/null
+++ b/runs/Jul23_11-18-02_ale-distillm-8-0-0/events.out.tfevents.1721729897.ale-distillm-8-0-0.6864.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea72d65027a5482125b8c420bd80e67acb95e9892e8087df3c3717a05eb19ca9
+size 5087
diff --git a/runs/Jul23_11-19-54_ale-distillm-8-0-0/events.out.tfevents.1721730013.ale-distillm-8-0-0.7383.0 b/runs/Jul23_11-19-54_ale-distillm-8-0-0/events.out.tfevents.1721730013.ale-distillm-8-0-0.7383.0
new file mode 100644
index 0000000000000000000000000000000000000000..a49fcceb06c6b99007334acb3732c7934214776e
--- /dev/null
+++ b/runs/Jul23_11-19-54_ale-distillm-8-0-0/events.out.tfevents.1721730013.ale-distillm-8-0-0.7383.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e43cf317c67aba7f0be452d68e7672df2e5dc788543343e49b0c1fdf11994cc1
+size 5087
diff --git a/runs/Jul23_11-22-17_ale-distillm-8-0-0/events.out.tfevents.1721730153.ale-distillm-8-0-0.7915.0 b/runs/Jul23_11-22-17_ale-distillm-8-0-0/events.out.tfevents.1721730153.ale-distillm-8-0-0.7915.0
new file mode 100644
index 0000000000000000000000000000000000000000..4b770b7ea89ff12a995a72d90d0a84e6cd5386c8
--- /dev/null
+++ b/runs/Jul23_11-22-17_ale-distillm-8-0-0/events.out.tfevents.1721730153.ale-distillm-8-0-0.7915.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7103917c286a5497f9c8d65c8a972baeea5e9129a81e08d4e07df0020faac615
+size 5087
diff --git a/runs/Jul23_11-25-57_ale-distillm-8-0-0/events.out.tfevents.1721730384.ale-distillm-8-0-0.8561.0 b/runs/Jul23_11-25-57_ale-distillm-8-0-0/events.out.tfevents.1721730384.ale-distillm-8-0-0.8561.0
new file mode 100644
index 0000000000000000000000000000000000000000..f17609eea1ce28d842ee9762422e0666249e7235
--- /dev/null
+++ b/runs/Jul23_11-25-57_ale-distillm-8-0-0/events.out.tfevents.1721730384.ale-distillm-8-0-0.8561.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e84f40d1205e6c55161cf3a3b0ba82d9f9ddc3a85493e02a20bfb0df424cd8c5
+size 11316
diff --git a/runs/Jul23_11-54-58_ale-distillm-8-0-0/events.out.tfevents.1721732116.ale-distillm-8-0-0.16409.0 b/runs/Jul23_11-54-58_ale-distillm-8-0-0/events.out.tfevents.1721732116.ale-distillm-8-0-0.16409.0
new file mode 100644
index 0000000000000000000000000000000000000000..f52cc9de2550e0531ae1869707d01f54600eb8b2
--- /dev/null
+++ b/runs/Jul23_11-54-58_ale-distillm-8-0-0/events.out.tfevents.1721732116.ale-distillm-8-0-0.16409.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:78faaf8f4d3b827198d0737d7ceec9dc3108f2cb2043e6a9f766724326b4de15
+size 16380
diff --git a/runs/Jul23_13-23-17_ale-distillm-8-0-0/events.out.tfevents.1721737415.ale-distillm-8-0-0.33165.0 b/runs/Jul23_13-23-17_ale-distillm-8-0-0/events.out.tfevents.1721737415.ale-distillm-8-0-0.33165.0
new file mode 100644
index 0000000000000000000000000000000000000000..25242be18843285c11472b7d43f808d324bda55a
--- /dev/null
+++ b/runs/Jul23_13-23-17_ale-distillm-8-0-0/events.out.tfevents.1721737415.ale-distillm-8-0-0.33165.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9b7e38d9fb72fbc518a1c8dc66d492fbf2247e90a44feadbd2d3fa364db4e444
+size 16380
diff --git a/runs/Jul23_14-10-34_ale-distillm-8-0-0/events.out.tfevents.1721740252.ale-distillm-8-0-0.46360.0 b/runs/Jul23_14-10-34_ale-distillm-8-0-0/events.out.tfevents.1721740252.ale-distillm-8-0-0.46360.0
new file mode 100644
index 0000000000000000000000000000000000000000..06842e7b6ce85cc04a494db6061c656e3941433d
--- /dev/null
+++ b/runs/Jul23_14-10-34_ale-distillm-8-0-0/events.out.tfevents.1721740252.ale-distillm-8-0-0.46360.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:72a5f82d0f07c3452c3f151bcd45ee726ea8d00f011700b6d6378d90a4af082b
+size 17005
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..fa665d525e921bb8f3a1e7d18c7ed6fd16348fd1
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,34 @@
+{
+ "additional_special_tokens": [
+ "<|im_start|>",
+ "<|im_end|>"
+ ],
+ "bos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "unk_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/tokenizer.json b/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..eaa6221f1e83e2d3737feafd66674c0768929e58
--- /dev/null
+++ b/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:322a5f52ab5cab196761ab397a022d6fa3a2e1418585e532bb6efb2fedd2ae94
+size 17477501
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..3208450a3248ebed7a0266609fcf02b3110d3573
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,70 @@
+{
+ "add_bos_token": false,
+ "add_eos_token": false,
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "106": {
+ "content": "<|im_start|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "107": {
+ "content": "<|im_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "additional_special_tokens": [
+ "<|im_start|>",
+ "<|im_end|>"
+ ],
+ "bos_token": "",
+ "chat_template": "{% if messages[0]['role'] == 'user' or messages[0]['role'] == 'system' %}{{ bos_token }}{% endif %}{% for message in messages %}{{ '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% elif messages[-1]['role'] == 'assistant' %}{{ eos_token }}{% endif %}",
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "",
+ "legacy": null,
+ "model_max_length": 2048,
+ "pad_token": "",
+ "sp_model_kwargs": {},
+ "spaces_between_special_tokens": false,
+ "tokenizer_class": "GemmaTokenizer",
+ "unk_token": "",
+ "use_default_system_prompt": false
+}
diff --git a/train_results.json b/train_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..9617c3c3dd71f666bade349341271aa97164c552
--- /dev/null
+++ b/train_results.json
@@ -0,0 +1,9 @@
+{
+ "epoch": 0.9981515711645101,
+ "total_flos": 5.260333472022528e+16,
+ "train_loss": 1.8035727003330572e-05,
+ "train_runtime": 2689.0507,
+ "train_samples": 6750,
+ "train_samples_per_second": 3.217,
+ "train_steps_per_second": 0.1
+}
\ No newline at end of file
diff --git a/trainer_state.json b/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..9e7409658ff05baa4937f04b64e4955f8b211938
--- /dev/null
+++ b/trainer_state.json
@@ -0,0 +1,428 @@
+{
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 0.9981515711645101,
+ "eval_steps": 500,
+ "global_step": 270,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.018484288354898338,
+ "grad_norm": 0.00118255615234375,
+ "learning_rate": 3.7037037037037037e-06,
+ "loss": 0.0001,
+ "step": 5
+ },
+ {
+ "epoch": 0.036968576709796676,
+ "grad_norm": 0.000579833984375,
+ "learning_rate": 7.4074074074074075e-06,
+ "loss": 0.0001,
+ "step": 10
+ },
+ {
+ "epoch": 0.05545286506469501,
+ "grad_norm": 0.000614166259765625,
+ "learning_rate": 1.1111111111111113e-05,
+ "loss": 0.0,
+ "step": 15
+ },
+ {
+ "epoch": 0.07393715341959335,
+ "grad_norm": 0.000850677490234375,
+ "learning_rate": 1.4814814814814815e-05,
+ "loss": 0.0,
+ "step": 20
+ },
+ {
+ "epoch": 0.09242144177449169,
+ "grad_norm": 0.000446319580078125,
+ "learning_rate": 1.851851851851852e-05,
+ "loss": 0.0,
+ "step": 25
+ },
+ {
+ "epoch": 0.11090573012939002,
+ "grad_norm": 0.00075531005859375,
+ "learning_rate": 1.9992479525042305e-05,
+ "loss": 0.0,
+ "step": 30
+ },
+ {
+ "epoch": 0.12939001848428835,
+ "grad_norm": 0.0003108978271484375,
+ "learning_rate": 1.9946562024066018e-05,
+ "loss": 0.0,
+ "step": 35
+ },
+ {
+ "epoch": 0.1478743068391867,
+ "grad_norm": 0.0003833770751953125,
+ "learning_rate": 1.9859096633447965e-05,
+ "loss": 0.0,
+ "step": 40
+ },
+ {
+ "epoch": 0.16635859519408502,
+ "grad_norm": 0.000499725341796875,
+ "learning_rate": 1.973044870579824e-05,
+ "loss": 0.0,
+ "step": 45
+ },
+ {
+ "epoch": 0.18484288354898337,
+ "grad_norm": 0.000118255615234375,
+ "learning_rate": 1.95611556177388e-05,
+ "loss": 0.0,
+ "step": 50
+ },
+ {
+ "epoch": 0.2033271719038817,
+ "grad_norm": 7.724761962890625e-05,
+ "learning_rate": 1.93519245252219e-05,
+ "loss": 0.0,
+ "step": 55
+ },
+ {
+ "epoch": 0.22181146025878004,
+ "grad_norm": 0.0002689361572265625,
+ "learning_rate": 1.9103629409661468e-05,
+ "loss": 0.0,
+ "step": 60
+ },
+ {
+ "epoch": 0.24029574861367836,
+ "grad_norm": 8.821487426757812e-05,
+ "learning_rate": 1.881730742721608e-05,
+ "loss": 0.0,
+ "step": 65
+ },
+ {
+ "epoch": 0.2587800369685767,
+ "grad_norm": 0.00016307830810546875,
+ "learning_rate": 1.8494154576472976e-05,
+ "loss": 0.0,
+ "step": 70
+ },
+ {
+ "epoch": 0.27726432532347506,
+ "grad_norm": 9.441375732421875e-05,
+ "learning_rate": 1.8135520702629677e-05,
+ "loss": 0.0,
+ "step": 75
+ },
+ {
+ "epoch": 0.2957486136783734,
+ "grad_norm": 0.0002689361572265625,
+ "learning_rate": 1.7742903859041324e-05,
+ "loss": 0.0,
+ "step": 80
+ },
+ {
+ "epoch": 0.3142329020332717,
+ "grad_norm": 5.793571472167969e-05,
+ "learning_rate": 1.7317944049686125e-05,
+ "loss": 0.0,
+ "step": 85
+ },
+ {
+ "epoch": 0.33271719038817005,
+ "grad_norm": 0.00018596649169921875,
+ "learning_rate": 1.686241637868734e-05,
+ "loss": 0.0,
+ "step": 90
+ },
+ {
+ "epoch": 0.3512014787430684,
+ "grad_norm": 0.00012111663818359375,
+ "learning_rate": 1.637822363550706e-05,
+ "loss": 0.0,
+ "step": 95
+ },
+ {
+ "epoch": 0.36968576709796674,
+ "grad_norm": 0.00013446807861328125,
+ "learning_rate": 1.586738834678418e-05,
+ "loss": 0.0,
+ "step": 100
+ },
+ {
+ "epoch": 0.38817005545286504,
+ "grad_norm": 6.246566772460938e-05,
+ "learning_rate": 1.5332044328016916e-05,
+ "loss": 0.0,
+ "step": 105
+ },
+ {
+ "epoch": 0.4066543438077634,
+ "grad_norm": 0.00013828277587890625,
+ "learning_rate": 1.4774427770379492e-05,
+ "loss": 0.0,
+ "step": 110
+ },
+ {
+ "epoch": 0.42513863216266173,
+ "grad_norm": 0.0001544952392578125,
+ "learning_rate": 1.4196867899904292e-05,
+ "loss": 0.0,
+ "step": 115
+ },
+ {
+ "epoch": 0.4436229205175601,
+ "grad_norm": 0.00011968612670898438,
+ "learning_rate": 1.3601777248047105e-05,
+ "loss": 0.0,
+ "step": 120
+ },
+ {
+ "epoch": 0.46210720887245843,
+ "grad_norm": 6.532669067382812e-05,
+ "learning_rate": 1.2991641574276419e-05,
+ "loss": 0.0,
+ "step": 125
+ },
+ {
+ "epoch": 0.4805914972273567,
+ "grad_norm": 9.965896606445312e-05,
+ "learning_rate": 1.2369009482781191e-05,
+ "loss": 0.0,
+ "step": 130
+ },
+ {
+ "epoch": 0.49907578558225507,
+ "grad_norm": 5.698204040527344e-05,
+ "learning_rate": 1.1736481776669307e-05,
+ "loss": 0.0,
+ "step": 135
+ },
+ {
+ "epoch": 0.5175600739371534,
+ "grad_norm": 4.124641418457031e-05,
+ "learning_rate": 1.1096700594125318e-05,
+ "loss": 0.0,
+ "step": 140
+ },
+ {
+ "epoch": 0.5360443622920518,
+ "grad_norm": 9.5367431640625e-05,
+ "learning_rate": 1.0452338371907065e-05,
+ "loss": 0.0,
+ "step": 145
+ },
+ {
+ "epoch": 0.5545286506469501,
+ "grad_norm": 5.698204040527344e-05,
+ "learning_rate": 9.806086682281759e-06,
+ "loss": 0.0,
+ "step": 150
+ },
+ {
+ "epoch": 0.5730129390018485,
+ "grad_norm": 9.441375732421875e-05,
+ "learning_rate": 9.160644990030932e-06,
+ "loss": 0.0,
+ "step": 155
+ },
+ {
+ "epoch": 0.5914972273567468,
+ "grad_norm": 0.00011777877807617188,
+ "learning_rate": 8.518709376487515e-06,
+ "loss": 0.0,
+ "step": 160
+ },
+ {
+ "epoch": 0.609981515711645,
+ "grad_norm": 0.0001583099365234375,
+ "learning_rate": 7.882961277705897e-06,
+ "loss": 0.0,
+ "step": 165
+ },
+ {
+ "epoch": 0.6284658040665434,
+ "grad_norm": 0.000141143798828125,
+ "learning_rate": 7.256056283806987e-06,
+ "loss": 0.0,
+ "step": 170
+ },
+ {
+ "epoch": 0.6469500924214417,
+ "grad_norm": 0.0001697540283203125,
+ "learning_rate": 6.640613046284581e-06,
+ "loss": 0.0,
+ "step": 175
+ },
+ {
+ "epoch": 0.6654343807763401,
+ "grad_norm": 0.000431060791015625,
+ "learning_rate": 6.039202339608432e-06,
+ "loss": 0.0,
+ "step": 180
+ },
+ {
+ "epoch": 0.6839186691312384,
+ "grad_norm": 8.106231689453125e-05,
+ "learning_rate": 5.454336322814995e-06,
+ "loss": 0.0,
+ "step": 185
+ },
+ {
+ "epoch": 0.7024029574861368,
+ "grad_norm": 9.918212890625e-05,
+ "learning_rate": 4.888458045941269e-06,
+ "loss": 0.0,
+ "step": 190
+ },
+ {
+ "epoch": 0.7208872458410351,
+ "grad_norm": 4.76837158203125e-05,
+ "learning_rate": 4.343931245134616e-06,
+ "loss": 0.0,
+ "step": 195
+ },
+ {
+ "epoch": 0.7393715341959335,
+ "grad_norm": 0.00012493133544921875,
+ "learning_rate": 3.823030469065431e-06,
+ "loss": 0.0,
+ "step": 200
+ },
+ {
+ "epoch": 0.7578558225508318,
+ "grad_norm": 5.340576171875e-05,
+ "learning_rate": 3.3279315778858034e-06,
+ "loss": 0.0,
+ "step": 205
+ },
+ {
+ "epoch": 0.7763401109057301,
+ "grad_norm": 7.05718994140625e-05,
+ "learning_rate": 2.8607026544210115e-06,
+ "loss": 0.0,
+ "step": 210
+ },
+ {
+ "epoch": 0.7948243992606284,
+ "grad_norm": 0.0002117156982421875,
+ "learning_rate": 2.423295365558821e-06,
+ "loss": 0.0,
+ "step": 215
+ },
+ {
+ "epoch": 0.8133086876155268,
+ "grad_norm": 4.38690185546875e-05,
+ "learning_rate": 2.01753680992107e-06,
+ "loss": 0.0,
+ "step": 220
+ },
+ {
+ "epoch": 0.8317929759704251,
+ "grad_norm": 0.00010395050048828125,
+ "learning_rate": 1.6451218858706374e-06,
+ "loss": 0.0,
+ "step": 225
+ },
+ {
+ "epoch": 0.8502772643253235,
+ "grad_norm": 0.00018978118896484375,
+ "learning_rate": 1.307606211733522e-06,
+ "loss": 0.0,
+ "step": 230
+ },
+ {
+ "epoch": 0.8687615526802218,
+ "grad_norm": 5.53131103515625e-05,
+ "learning_rate": 1.0063996278090704e-06,
+ "loss": 0.0,
+ "step": 235
+ },
+ {
+ "epoch": 0.8872458410351202,
+ "grad_norm": 6.389617919921875e-05,
+ "learning_rate": 7.427603073110967e-07,
+ "loss": 0.0,
+ "step": 240
+ },
+ {
+ "epoch": 0.9057301293900185,
+ "grad_norm": 0.00011205673217773438,
+ "learning_rate": 5.177895008392353e-07,
+ "loss": 0.0,
+ "step": 245
+ },
+ {
+ "epoch": 0.9242144177449169,
+ "grad_norm": 0.000293731689453125,
+ "learning_rate": 3.3242693633337986e-07,
+ "loss": 0.0,
+ "step": 250
+ },
+ {
+ "epoch": 0.9426987060998152,
+ "grad_norm": 0.000179290771484375,
+ "learning_rate": 1.874468937261531e-07,
+ "loss": 0.0,
+ "step": 255
+ },
+ {
+ "epoch": 0.9611829944547134,
+ "grad_norm": 0.002349853515625,
+ "learning_rate": 8.345497068998897e-08,
+ "loss": 0.0,
+ "step": 260
+ },
+ {
+ "epoch": 0.9796672828096118,
+ "grad_norm": 6.437301635742188e-05,
+ "learning_rate": 2.088555298867978e-08,
+ "loss": 0.0,
+ "step": 265
+ },
+ {
+ "epoch": 0.9981515711645101,
+ "grad_norm": 9.1552734375e-05,
+ "learning_rate": 0.0,
+ "loss": 0.0,
+ "step": 270
+ },
+ {
+ "epoch": 0.9981515711645101,
+ "eval_loss": 1.3726342331210617e-05,
+ "eval_runtime": 52.2001,
+ "eval_samples_per_second": 18.601,
+ "eval_steps_per_second": 2.337,
+ "step": 270
+ },
+ {
+ "epoch": 0.9981515711645101,
+ "step": 270,
+ "total_flos": 5.260333472022528e+16,
+ "train_loss": 1.8035727003330572e-05,
+ "train_runtime": 2689.0507,
+ "train_samples_per_second": 3.217,
+ "train_steps_per_second": 0.1
+ }
+ ],
+ "logging_steps": 5,
+ "max_steps": 270,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 1,
+ "save_steps": 500,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": false,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 5.260333472022528e+16,
+ "train_batch_size": 8,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/training_args.bin b/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..310e63f0641f02aa4da090789d8cf25e6ef879e4
--- /dev/null
+++ b/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:48db1adcd7121cc0952bac83481593034c426cd3d56b3e7a70068bc17b1bad10
+size 5304