diff --git a/prompt-a/onechar/README.md b/prompt-a/onechar/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d
--- /dev/null
+++ b/prompt-a/onechar/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.6.0.dev0
diff --git a/prompt-a/onechar/adapter_config.json b/prompt-a/onechar/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..57ccb0546533f3c17ba88615f0c0d4f7c0860f9e
--- /dev/null
+++ b/prompt-a/onechar/adapter_config.json
@@ -0,0 +1,26 @@
+{
+ "auto_mapping": null,
+ "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
+ "bias": "none",
+ "fan_in_fan_out": null,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "lora_alpha": 16,
+ "lora_dropout": 0.01,
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 8,
+ "revision": null,
+ "target_modules": [
+ "o_proj",
+ "q_proj",
+ "k_proj",
+ "down_proj",
+ "up_proj",
+ "gate_proj",
+ "v_proj"
+ ],
+ "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/prompt-a/onechar/adapter_model.bin b/prompt-a/onechar/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..efc4e51d138ecf5c5d8072972b34be04ca33ebd9
--- /dev/null
+++ b/prompt-a/onechar/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f5ec19329cacf861536c142e358f467bfaf4ef5352429c4826c33d208347a3d
+size 125374989
diff --git a/prompt-a/onechar/added_tokens.json b/prompt-a/onechar/added_tokens.json
new file mode 100644
index 0000000000000000000000000000000000000000..e41416ddd79948246ea2dced6800ea3cd531c424
--- /dev/null
+++ b/prompt-a/onechar/added_tokens.json
@@ -0,0 +1,3 @@
+{
+ "[PAD]": 32000
+}
diff --git a/prompt-a/onechar/checkpoint-100/README.md b/prompt-a/onechar/checkpoint-100/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-100/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.6.0.dev0
diff --git a/prompt-a/onechar/checkpoint-100/adapter_config.json b/prompt-a/onechar/checkpoint-100/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..57ccb0546533f3c17ba88615f0c0d4f7c0860f9e
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-100/adapter_config.json
@@ -0,0 +1,26 @@
+{
+ "auto_mapping": null,
+ "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
+ "bias": "none",
+ "fan_in_fan_out": null,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "lora_alpha": 16,
+ "lora_dropout": 0.01,
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 8,
+ "revision": null,
+ "target_modules": [
+ "o_proj",
+ "q_proj",
+ "k_proj",
+ "down_proj",
+ "up_proj",
+ "gate_proj",
+ "v_proj"
+ ],
+ "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/prompt-a/onechar/checkpoint-100/adapter_model.bin b/prompt-a/onechar/checkpoint-100/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3e5a35295386cee85b173a0b18ac1814a09a59c7
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-100/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ff610c0b53c8af0e9c984b4ed0725380085f987aa69e16c979a257578cfabcb
+size 125374989
diff --git a/prompt-a/onechar/checkpoint-100/adapter_model/README.md b/prompt-a/onechar/checkpoint-100/adapter_model/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-100/adapter_model/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.6.0.dev0
diff --git a/prompt-a/onechar/checkpoint-100/adapter_model/adapter_config.json b/prompt-a/onechar/checkpoint-100/adapter_model/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..57ccb0546533f3c17ba88615f0c0d4f7c0860f9e
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-100/adapter_model/adapter_config.json
@@ -0,0 +1,26 @@
+{
+ "auto_mapping": null,
+ "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
+ "bias": "none",
+ "fan_in_fan_out": null,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "lora_alpha": 16,
+ "lora_dropout": 0.01,
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 8,
+ "revision": null,
+ "target_modules": [
+ "o_proj",
+ "q_proj",
+ "k_proj",
+ "down_proj",
+ "up_proj",
+ "gate_proj",
+ "v_proj"
+ ],
+ "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/prompt-a/onechar/checkpoint-100/adapter_model/adapter_model.bin b/prompt-a/onechar/checkpoint-100/adapter_model/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3e5a35295386cee85b173a0b18ac1814a09a59c7
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-100/adapter_model/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ff610c0b53c8af0e9c984b4ed0725380085f987aa69e16c979a257578cfabcb
+size 125374989
diff --git a/prompt-a/onechar/checkpoint-100/optimizer.pt b/prompt-a/onechar/checkpoint-100/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..98f21b5e00fe7df2f858f515322260407beb6c1a
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-100/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f7f1f403d38dc995a8098ed409cbd760e111bd882f7a68df45055032258b88b6
+size 250681597
diff --git a/prompt-a/onechar/checkpoint-100/rng_state_0.pth b/prompt-a/onechar/checkpoint-100/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..bd46f14b9d704beaf43d0a60fb0fcee7e7a4dc55
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-100/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4102fd8971c12e358d8aa24117ab73918cc1aaac07c4678184c87371a7cce1c6
+size 21687
diff --git a/prompt-a/onechar/checkpoint-100/rng_state_1.pth b/prompt-a/onechar/checkpoint-100/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2bd2a4cfe2f0c0839a7bd28c30a0ee7f68e10cbf
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-100/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a35f8c8b1beb2b76ef2ca1aa345895c6db951b1551c9042228eb0ea741ce12c
+size 21687
diff --git a/prompt-a/onechar/checkpoint-100/rng_state_2.pth b/prompt-a/onechar/checkpoint-100/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3590ccc56e14f086598123a24161f3c9c03da99d
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-100/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aa20b68c489c5a2cd2428b1f3e5072cdf8f2417980be271f9db72c4049028bb3
+size 21687
diff --git a/prompt-a/onechar/checkpoint-100/rng_state_3.pth b/prompt-a/onechar/checkpoint-100/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..183a796b307b0d8ac5ffdd09477b826d21994e35
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-100/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a828f6b225941a386fb560abfcd7d4dee497153fe1cf40b8a87e5174aa75bed3
+size 21687
diff --git a/prompt-a/onechar/checkpoint-100/rng_state_4.pth b/prompt-a/onechar/checkpoint-100/rng_state_4.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d2d0fd052bcabc0b74f4b45f5eb2edd37d5a2f8c
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-100/rng_state_4.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1af5550599dc6617b8105e29364818d1dbea0b5d63b4e86bd3c1697f930ae248
+size 21687
diff --git a/prompt-a/onechar/checkpoint-100/rng_state_5.pth b/prompt-a/onechar/checkpoint-100/rng_state_5.pth
new file mode 100644
index 0000000000000000000000000000000000000000..abefa9a599bf1e9a29ee7d719924e925ba5adfbd
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-100/rng_state_5.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9b13498715ccaef6df8b69c2dab24d427d1152ac7f5c20f3a7aa0579d6df2f93
+size 21687
diff --git a/prompt-a/onechar/checkpoint-100/rng_state_6.pth b/prompt-a/onechar/checkpoint-100/rng_state_6.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f09479d402f769953a69a982820f048bb9f12252
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-100/rng_state_6.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e0a13f52d9df555a3b9fac609325509aebf8adff3267140830db81111f0c8f4a
+size 21687
diff --git a/prompt-a/onechar/checkpoint-100/rng_state_7.pth b/prompt-a/onechar/checkpoint-100/rng_state_7.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9c25123d780ea1d5371035e42eb4152cad9da645
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-100/rng_state_7.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f7afe0df70969c4c1a997671975d84b8bdde138351b49fb8396fec85cf845301
+size 21687
diff --git a/prompt-a/onechar/checkpoint-100/scheduler.pt b/prompt-a/onechar/checkpoint-100/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b6a3fa656e2f9813d7a5c39d48b47d4029c97391
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-100/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:781197e4490645acd969affd7bc482901bb63fc73f416de5de064f3460ca305a
+size 627
diff --git a/prompt-a/onechar/checkpoint-100/trainer_state.json b/prompt-a/onechar/checkpoint-100/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..07ea426f8fdd78d21d1df9ade93a28fea40a5029
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-100/trainer_state.json
@@ -0,0 +1,659 @@
+{
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 0.9523809523809523,
+ "eval_steps": 20,
+ "global_step": 100,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.01,
+ "learning_rate": 6.5e-06,
+ "loss": 2.1535,
+ "step": 1
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 1.3e-05,
+ "loss": 2.1328,
+ "step": 2
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 1.9499999999999996e-05,
+ "loss": 2.0918,
+ "step": 3
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 2.6e-05,
+ "loss": 2.1194,
+ "step": 4
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 3.25e-05,
+ "loss": 2.2342,
+ "step": 5
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 3.899999999999999e-05,
+ "loss": 2.1168,
+ "step": 6
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 4.5499999999999995e-05,
+ "loss": 2.1761,
+ "step": 7
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 5.2e-05,
+ "loss": 2.0764,
+ "step": 8
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 5.85e-05,
+ "loss": 2.1711,
+ "step": 9
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 6.5e-05,
+ "loss": 2.0885,
+ "step": 10
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 6.499599055565396e-05,
+ "loss": 2.0196,
+ "step": 11
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 6.498396321188627e-05,
+ "loss": 2.1799,
+ "step": 12
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 6.496392093626402e-05,
+ "loss": 2.1322,
+ "step": 13
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 6.493586867391882e-05,
+ "loss": 2.1486,
+ "step": 14
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 6.489981334632665e-05,
+ "loss": 2.1351,
+ "step": 15
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 6.48557638496001e-05,
+ "loss": 2.1366,
+ "step": 16
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 6.480373105229334e-05,
+ "loss": 2.0727,
+ "step": 17
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 6.474372779272053e-05,
+ "loss": 2.1123,
+ "step": 18
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 6.467576887578811e-05,
+ "loss": 2.0652,
+ "step": 19
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 6.459987106934198e-05,
+ "loss": 2.0008,
+ "step": 20
+ },
+ {
+ "epoch": 0.19,
+ "eval_loss": 2.0444116592407227,
+ "eval_runtime": 2.798,
+ "eval_samples_per_second": 3.574,
+ "eval_steps_per_second": 0.715,
+ "step": 20
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 6.451605310003014e-05,
+ "loss": 2.0769,
+ "step": 21
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 6.442433564868238e-05,
+ "loss": 1.9949,
+ "step": 22
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 6.432474134520738e-05,
+ "loss": 2.1341,
+ "step": 23
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 6.421729476300929e-05,
+ "loss": 2.1433,
+ "step": 24
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 6.410202241292448e-05,
+ "loss": 2.0168,
+ "step": 25
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 6.39789527366805e-05,
+ "loss": 2.0403,
+ "step": 26
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 6.384811609987843e-05,
+ "loss": 2.1983,
+ "step": 27
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 6.370954478450064e-05,
+ "loss": 2.0625,
+ "step": 28
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 6.356327298094572e-05,
+ "loss": 2.204,
+ "step": 29
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 6.340933677959248e-05,
+ "loss": 2.118,
+ "step": 30
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 6.324777416189522e-05,
+ "loss": 2.0997,
+ "step": 31
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 6.307862499101232e-05,
+ "loss": 2.1154,
+ "step": 32
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 6.290193100197069e-05,
+ "loss": 2.0845,
+ "step": 33
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 6.271773579136816e-05,
+ "loss": 2.0052,
+ "step": 34
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 6.252608480661681e-05,
+ "loss": 1.956,
+ "step": 35
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 6.232702533472938e-05,
+ "loss": 2.0958,
+ "step": 36
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 6.212060649065197e-05,
+ "loss": 2.0634,
+ "step": 37
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 6.190687920514563e-05,
+ "loss": 2.0033,
+ "step": 38
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 6.168589621222e-05,
+ "loss": 2.1426,
+ "step": 39
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 6.145771203612195e-05,
+ "loss": 1.9823,
+ "step": 40
+ },
+ {
+ "epoch": 0.38,
+ "eval_loss": 2.0226035118103027,
+ "eval_runtime": 2.8049,
+ "eval_samples_per_second": 3.565,
+ "eval_steps_per_second": 0.713,
+ "step": 40
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 6.122238297788254e-05,
+ "loss": 2.2016,
+ "step": 41
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 6.097996710142556e-05,
+ "loss": 2.1008,
+ "step": 42
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 6.0730524219241214e-05,
+ "loss": 2.0821,
+ "step": 43
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 6.047411587762816e-05,
+ "loss": 2.092,
+ "step": 44
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 6.0210805341507994e-05,
+ "loss": 1.9617,
+ "step": 45
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 5.994065757881549e-05,
+ "loss": 1.9358,
+ "step": 46
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 5.966373924446878e-05,
+ "loss": 2.0573,
+ "step": 47
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 5.938011866392325e-05,
+ "loss": 2.0731,
+ "step": 48
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 5.9089865816313253e-05,
+ "loss": 2.2842,
+ "step": 49
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 5.8793052317185785e-05,
+ "loss": 2.2145,
+ "step": 50
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 5.848975140083044e-05,
+ "loss": 1.9923,
+ "step": 51
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 5.818003790220993e-05,
+ "loss": 2.1065,
+ "step": 52
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 5.786398823849571e-05,
+ "loss": 2.0009,
+ "step": 53
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 5.754168039021315e-05,
+ "loss": 1.9785,
+ "step": 54
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 5.7213193882001006e-05,
+ "loss": 1.9504,
+ "step": 55
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 5.687860976298993e-05,
+ "loss": 2.1977,
+ "step": 56
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 5.6538010586804816e-05,
+ "loss": 2.0423,
+ "step": 57
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 5.619148039119587e-05,
+ "loss": 2.0536,
+ "step": 58
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 5.5839104677303635e-05,
+ "loss": 2.0286,
+ "step": 59
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 5.548097038856279e-05,
+ "loss": 2.1143,
+ "step": 60
+ },
+ {
+ "epoch": 0.57,
+ "eval_loss": 2.0128190517425537,
+ "eval_runtime": 2.8105,
+ "eval_samples_per_second": 3.558,
+ "eval_steps_per_second": 0.712,
+ "step": 60
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 5.511716588925021e-05,
+ "loss": 2.1397,
+ "step": 61
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 5.474778094268238e-05,
+ "loss": 2.0601,
+ "step": 62
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 5.437290668906763e-05,
+ "loss": 2.1707,
+ "step": 63
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 5.399263562301868e-05,
+ "loss": 2.0649,
+ "step": 64
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 5.360706157073096e-05,
+ "loss": 2.0314,
+ "step": 65
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 5.321627966683241e-05,
+ "loss": 2.1277,
+ "step": 66
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 5.2820386330910424e-05,
+ "loss": 2.0534,
+ "step": 67
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 5.2419479243721734e-05,
+ "loss": 1.9976,
+ "step": 68
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 5.2013657323091226e-05,
+ "loss": 2.0262,
+ "step": 69
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 5.160302069950537e-05,
+ "loss": 2.0093,
+ "step": 70
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 5.118767069140655e-05,
+ "loss": 2.1088,
+ "step": 71
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 5.076770978019424e-05,
+ "loss": 1.9841,
+ "step": 72
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 5.034324158493928e-05,
+ "loss": 2.0081,
+ "step": 73
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 4.991437083681739e-05,
+ "loss": 2.0179,
+ "step": 74
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 4.948120335326833e-05,
+ "loss": 2.1304,
+ "step": 75
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 4.904384601188706e-05,
+ "loss": 1.9213,
+ "step": 76
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 4.860240672405324e-05,
+ "loss": 2.2153,
+ "step": 77
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 4.815699440830574e-05,
+ "loss": 1.9365,
+ "step": 78
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 4.770771896346863e-05,
+ "loss": 2.021,
+ "step": 79
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 4.725469124153526e-05,
+ "loss": 2.0327,
+ "step": 80
+ },
+ {
+ "epoch": 0.76,
+ "eval_loss": 2.0059409141540527,
+ "eval_runtime": 2.8077,
+ "eval_samples_per_second": 3.562,
+ "eval_steps_per_second": 0.712,
+ "step": 80
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 4.679802302031724e-05,
+ "loss": 2.1576,
+ "step": 81
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 4.6337826975864856e-05,
+ "loss": 1.9468,
+ "step": 82
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 4.587421665466603e-05,
+ "loss": 2.0795,
+ "step": 83
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 4.540730644563036e-05,
+ "loss": 2.0566,
+ "step": 84
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 4.4937211551865415e-05,
+ "loss": 2.0819,
+ "step": 85
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 4.4464047962252035e-05,
+ "loss": 2.0542,
+ "step": 86
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 4.398793242282586e-05,
+ "loss": 2.0006,
+ "step": 87
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 4.350898240797197e-05,
+ "loss": 1.9953,
+ "step": 88
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 4.3027316091439854e-05,
+ "loss": 2.067,
+ "step": 89
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 4.254305231718579e-05,
+ "loss": 1.9909,
+ "step": 90
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 4.205631057004987e-05,
+ "loss": 1.9436,
+ "step": 91
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 4.156721094627495e-05,
+ "loss": 2.0643,
+ "step": 92
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 4.107587412387462e-05,
+ "loss": 2.1244,
+ "step": 93
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 4.058242133285779e-05,
+ "loss": 1.9795,
+ "step": 94
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 4.008697432531693e-05,
+ "loss": 2.1568,
+ "step": 95
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 3.9589655345387633e-05,
+ "loss": 1.9829,
+ "step": 96
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 3.909058709908665e-05,
+ "loss": 2.031,
+ "step": 97
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 3.858989272403605e-05,
+ "loss": 2.2042,
+ "step": 98
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 3.8087695759080804e-05,
+ "loss": 1.9775,
+ "step": 99
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 3.75841201138075e-05,
+ "loss": 2.0274,
+ "step": 100
+ },
+ {
+ "epoch": 0.95,
+ "eval_loss": 2.0003483295440674,
+ "eval_runtime": 2.8075,
+ "eval_samples_per_second": 3.562,
+ "eval_steps_per_second": 0.712,
+ "step": 100
+ }
+ ],
+ "logging_steps": 1,
+ "max_steps": 210,
+ "num_train_epochs": 2,
+ "save_steps": 50,
+ "total_flos": 8.146151922047386e+16,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/prompt-a/onechar/checkpoint-100/training_args.bin b/prompt-a/onechar/checkpoint-100/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f6a9833d8c69d00318139a8fc6e1126e7d7ac62f
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-100/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8ff85b6b2e99bcdbf030f4c883beac4553b8cad32f31f2d286263119b723c469
+size 4411
diff --git a/prompt-a/onechar/checkpoint-150/README.md b/prompt-a/onechar/checkpoint-150/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-150/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.6.0.dev0
diff --git a/prompt-a/onechar/checkpoint-150/adapter_config.json b/prompt-a/onechar/checkpoint-150/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..57ccb0546533f3c17ba88615f0c0d4f7c0860f9e
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-150/adapter_config.json
@@ -0,0 +1,26 @@
+{
+ "auto_mapping": null,
+ "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
+ "bias": "none",
+ "fan_in_fan_out": null,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "lora_alpha": 16,
+ "lora_dropout": 0.01,
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 8,
+ "revision": null,
+ "target_modules": [
+ "o_proj",
+ "q_proj",
+ "k_proj",
+ "down_proj",
+ "up_proj",
+ "gate_proj",
+ "v_proj"
+ ],
+ "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/prompt-a/onechar/checkpoint-150/adapter_model.bin b/prompt-a/onechar/checkpoint-150/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e414f833dc304b909af390a644e13234fd7a4fd4
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-150/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:16b699052f2d128f83c6010ef6f104f26dcbf6cb4325eedcc9a7b58bc08c5c9f
+size 125374989
diff --git a/prompt-a/onechar/checkpoint-150/adapter_model/README.md b/prompt-a/onechar/checkpoint-150/adapter_model/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-150/adapter_model/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.6.0.dev0
diff --git a/prompt-a/onechar/checkpoint-150/adapter_model/adapter_config.json b/prompt-a/onechar/checkpoint-150/adapter_model/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..57ccb0546533f3c17ba88615f0c0d4f7c0860f9e
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-150/adapter_model/adapter_config.json
@@ -0,0 +1,26 @@
+{
+ "auto_mapping": null,
+ "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
+ "bias": "none",
+ "fan_in_fan_out": null,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "lora_alpha": 16,
+ "lora_dropout": 0.01,
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 8,
+ "revision": null,
+ "target_modules": [
+ "o_proj",
+ "q_proj",
+ "k_proj",
+ "down_proj",
+ "up_proj",
+ "gate_proj",
+ "v_proj"
+ ],
+ "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/prompt-a/onechar/checkpoint-150/adapter_model/adapter_model.bin b/prompt-a/onechar/checkpoint-150/adapter_model/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e414f833dc304b909af390a644e13234fd7a4fd4
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-150/adapter_model/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:16b699052f2d128f83c6010ef6f104f26dcbf6cb4325eedcc9a7b58bc08c5c9f
+size 125374989
diff --git a/prompt-a/onechar/checkpoint-150/optimizer.pt b/prompt-a/onechar/checkpoint-150/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..177e712799bb777cd0a70695d411358ee65709e0
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-150/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6697d2772f8bdabc0de5308ce64fed55e96f213ec392cc350ff9e35b3419011a
+size 250681597
diff --git a/prompt-a/onechar/checkpoint-150/rng_state_0.pth b/prompt-a/onechar/checkpoint-150/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b8467bf58301adeddac4918aad90f7ed7b11fe90
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-150/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:39b0267998bb7a1cfb283b31e1dd7afbe75c84407c53c4f1c298028d9daacc6e
+size 21687
diff --git a/prompt-a/onechar/checkpoint-150/rng_state_1.pth b/prompt-a/onechar/checkpoint-150/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..fe874c140f820e3dd8b1fcf490184f21c33f9083
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-150/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:50e5dc96be0a674ddf476b916d068682bfc8d9f342c0213d1ba280d171cbc1d0
+size 21687
diff --git a/prompt-a/onechar/checkpoint-150/rng_state_2.pth b/prompt-a/onechar/checkpoint-150/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..bfecd933d4dfc73fa03780506049ffc66241c9e9
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-150/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f1ec8428ac308b6c6ffb63f0d2137a6c43b3b0f57031e333c2cde00dfbfedff
+size 21687
diff --git a/prompt-a/onechar/checkpoint-150/rng_state_3.pth b/prompt-a/onechar/checkpoint-150/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2b4461d7ff31a559100fe0f29a3ed023e9f190e7
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-150/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:832989103ea74caa2d18f41014c7ec87b33f665d85bbfabeac5e440a24a4e438
+size 21687
diff --git a/prompt-a/onechar/checkpoint-150/rng_state_4.pth b/prompt-a/onechar/checkpoint-150/rng_state_4.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1e060ecb0e09581d9813d16528d78d01ddb7208b
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-150/rng_state_4.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c7c038cf0ec9ace1d9317b463a053999ebbd8efbc6568a0c63333109129be6de
+size 21687
diff --git a/prompt-a/onechar/checkpoint-150/rng_state_5.pth b/prompt-a/onechar/checkpoint-150/rng_state_5.pth
new file mode 100644
index 0000000000000000000000000000000000000000..468210913429d5e73e28c4e107836a98b798fe35
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-150/rng_state_5.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea0b0a35fbb56436b89de780a77c2850d7ceb494df91befbfc0be4b4388f8c31
+size 21687
diff --git a/prompt-a/onechar/checkpoint-150/rng_state_6.pth b/prompt-a/onechar/checkpoint-150/rng_state_6.pth
new file mode 100644
index 0000000000000000000000000000000000000000..67e29ae9b2de9144ee1f89b7e33419d34897faac
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-150/rng_state_6.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:24a1fd82be4a9e4def96881742849b0f5444737a5c55f1de3d18ff9b31a835e5
+size 21687
diff --git a/prompt-a/onechar/checkpoint-150/rng_state_7.pth b/prompt-a/onechar/checkpoint-150/rng_state_7.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f23a7558687babd9d76c519f71934f1a3809b9ca
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-150/rng_state_7.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b924ccabce3c839d1b33edc854332d317e917f50d4f5defffe6a8f741a9641b0
+size 21687
diff --git a/prompt-a/onechar/checkpoint-150/scheduler.pt b/prompt-a/onechar/checkpoint-150/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9f71458a6d417fb79c6a17e12f00039ad26d70b1
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-150/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9abe1c80a7b715c51418717c88a5924cc27c160619ed0f1dec7010dc1687509
+size 627
diff --git a/prompt-a/onechar/checkpoint-150/trainer_state.json b/prompt-a/onechar/checkpoint-150/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..20d3af0e4c74485de30937314168f0399691f24b
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-150/trainer_state.json
@@ -0,0 +1,975 @@
+{
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 1.4285714285714286,
+ "eval_steps": 20,
+ "global_step": 150,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.01,
+ "learning_rate": 6.5e-06,
+ "loss": 2.1535,
+ "step": 1
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 1.3e-05,
+ "loss": 2.1328,
+ "step": 2
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 1.9499999999999996e-05,
+ "loss": 2.0918,
+ "step": 3
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 2.6e-05,
+ "loss": 2.1194,
+ "step": 4
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 3.25e-05,
+ "loss": 2.2342,
+ "step": 5
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 3.899999999999999e-05,
+ "loss": 2.1168,
+ "step": 6
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 4.5499999999999995e-05,
+ "loss": 2.1761,
+ "step": 7
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 5.2e-05,
+ "loss": 2.0764,
+ "step": 8
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 5.85e-05,
+ "loss": 2.1711,
+ "step": 9
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 6.5e-05,
+ "loss": 2.0885,
+ "step": 10
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 6.499599055565396e-05,
+ "loss": 2.0196,
+ "step": 11
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 6.498396321188627e-05,
+ "loss": 2.1799,
+ "step": 12
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 6.496392093626402e-05,
+ "loss": 2.1322,
+ "step": 13
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 6.493586867391882e-05,
+ "loss": 2.1486,
+ "step": 14
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 6.489981334632665e-05,
+ "loss": 2.1351,
+ "step": 15
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 6.48557638496001e-05,
+ "loss": 2.1366,
+ "step": 16
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 6.480373105229334e-05,
+ "loss": 2.0727,
+ "step": 17
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 6.474372779272053e-05,
+ "loss": 2.1123,
+ "step": 18
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 6.467576887578811e-05,
+ "loss": 2.0652,
+ "step": 19
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 6.459987106934198e-05,
+ "loss": 2.0008,
+ "step": 20
+ },
+ {
+ "epoch": 0.19,
+ "eval_loss": 2.0444116592407227,
+ "eval_runtime": 2.798,
+ "eval_samples_per_second": 3.574,
+ "eval_steps_per_second": 0.715,
+ "step": 20
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 6.451605310003014e-05,
+ "loss": 2.0769,
+ "step": 21
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 6.442433564868238e-05,
+ "loss": 1.9949,
+ "step": 22
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 6.432474134520738e-05,
+ "loss": 2.1341,
+ "step": 23
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 6.421729476300929e-05,
+ "loss": 2.1433,
+ "step": 24
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 6.410202241292448e-05,
+ "loss": 2.0168,
+ "step": 25
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 6.39789527366805e-05,
+ "loss": 2.0403,
+ "step": 26
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 6.384811609987843e-05,
+ "loss": 2.1983,
+ "step": 27
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 6.370954478450064e-05,
+ "loss": 2.0625,
+ "step": 28
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 6.356327298094572e-05,
+ "loss": 2.204,
+ "step": 29
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 6.340933677959248e-05,
+ "loss": 2.118,
+ "step": 30
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 6.324777416189522e-05,
+ "loss": 2.0997,
+ "step": 31
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 6.307862499101232e-05,
+ "loss": 2.1154,
+ "step": 32
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 6.290193100197069e-05,
+ "loss": 2.0845,
+ "step": 33
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 6.271773579136816e-05,
+ "loss": 2.0052,
+ "step": 34
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 6.252608480661681e-05,
+ "loss": 1.956,
+ "step": 35
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 6.232702533472938e-05,
+ "loss": 2.0958,
+ "step": 36
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 6.212060649065197e-05,
+ "loss": 2.0634,
+ "step": 37
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 6.190687920514563e-05,
+ "loss": 2.0033,
+ "step": 38
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 6.168589621222e-05,
+ "loss": 2.1426,
+ "step": 39
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 6.145771203612195e-05,
+ "loss": 1.9823,
+ "step": 40
+ },
+ {
+ "epoch": 0.38,
+ "eval_loss": 2.0226035118103027,
+ "eval_runtime": 2.8049,
+ "eval_samples_per_second": 3.565,
+ "eval_steps_per_second": 0.713,
+ "step": 40
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 6.122238297788254e-05,
+ "loss": 2.2016,
+ "step": 41
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 6.097996710142556e-05,
+ "loss": 2.1008,
+ "step": 42
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 6.0730524219241214e-05,
+ "loss": 2.0821,
+ "step": 43
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 6.047411587762816e-05,
+ "loss": 2.092,
+ "step": 44
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 6.0210805341507994e-05,
+ "loss": 1.9617,
+ "step": 45
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 5.994065757881549e-05,
+ "loss": 1.9358,
+ "step": 46
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 5.966373924446878e-05,
+ "loss": 2.0573,
+ "step": 47
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 5.938011866392325e-05,
+ "loss": 2.0731,
+ "step": 48
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 5.9089865816313253e-05,
+ "loss": 2.2842,
+ "step": 49
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 5.8793052317185785e-05,
+ "loss": 2.2145,
+ "step": 50
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 5.848975140083044e-05,
+ "loss": 1.9923,
+ "step": 51
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 5.818003790220993e-05,
+ "loss": 2.1065,
+ "step": 52
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 5.786398823849571e-05,
+ "loss": 2.0009,
+ "step": 53
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 5.754168039021315e-05,
+ "loss": 1.9785,
+ "step": 54
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 5.7213193882001006e-05,
+ "loss": 1.9504,
+ "step": 55
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 5.687860976298993e-05,
+ "loss": 2.1977,
+ "step": 56
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 5.6538010586804816e-05,
+ "loss": 2.0423,
+ "step": 57
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 5.619148039119587e-05,
+ "loss": 2.0536,
+ "step": 58
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 5.5839104677303635e-05,
+ "loss": 2.0286,
+ "step": 59
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 5.548097038856279e-05,
+ "loss": 2.1143,
+ "step": 60
+ },
+ {
+ "epoch": 0.57,
+ "eval_loss": 2.0128190517425537,
+ "eval_runtime": 2.8105,
+ "eval_samples_per_second": 3.558,
+ "eval_steps_per_second": 0.712,
+ "step": 60
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 5.511716588925021e-05,
+ "loss": 2.1397,
+ "step": 61
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 5.474778094268238e-05,
+ "loss": 2.0601,
+ "step": 62
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 5.437290668906763e-05,
+ "loss": 2.1707,
+ "step": 63
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 5.399263562301868e-05,
+ "loss": 2.0649,
+ "step": 64
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 5.360706157073096e-05,
+ "loss": 2.0314,
+ "step": 65
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 5.321627966683241e-05,
+ "loss": 2.1277,
+ "step": 66
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 5.2820386330910424e-05,
+ "loss": 2.0534,
+ "step": 67
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 5.2419479243721734e-05,
+ "loss": 1.9976,
+ "step": 68
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 5.2013657323091226e-05,
+ "loss": 2.0262,
+ "step": 69
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 5.160302069950537e-05,
+ "loss": 2.0093,
+ "step": 70
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 5.118767069140655e-05,
+ "loss": 2.1088,
+ "step": 71
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 5.076770978019424e-05,
+ "loss": 1.9841,
+ "step": 72
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 5.034324158493928e-05,
+ "loss": 2.0081,
+ "step": 73
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 4.991437083681739e-05,
+ "loss": 2.0179,
+ "step": 74
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 4.948120335326833e-05,
+ "loss": 2.1304,
+ "step": 75
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 4.904384601188706e-05,
+ "loss": 1.9213,
+ "step": 76
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 4.860240672405324e-05,
+ "loss": 2.2153,
+ "step": 77
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 4.815699440830574e-05,
+ "loss": 1.9365,
+ "step": 78
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 4.770771896346863e-05,
+ "loss": 2.021,
+ "step": 79
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 4.725469124153526e-05,
+ "loss": 2.0327,
+ "step": 80
+ },
+ {
+ "epoch": 0.76,
+ "eval_loss": 2.0059409141540527,
+ "eval_runtime": 2.8077,
+ "eval_samples_per_second": 3.562,
+ "eval_steps_per_second": 0.712,
+ "step": 80
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 4.679802302031724e-05,
+ "loss": 2.1576,
+ "step": 81
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 4.6337826975864856e-05,
+ "loss": 1.9468,
+ "step": 82
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 4.587421665466603e-05,
+ "loss": 2.0795,
+ "step": 83
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 4.540730644563036e-05,
+ "loss": 2.0566,
+ "step": 84
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 4.4937211551865415e-05,
+ "loss": 2.0819,
+ "step": 85
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 4.4464047962252035e-05,
+ "loss": 2.0542,
+ "step": 86
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 4.398793242282586e-05,
+ "loss": 2.0006,
+ "step": 87
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 4.350898240797197e-05,
+ "loss": 1.9953,
+ "step": 88
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 4.3027316091439854e-05,
+ "loss": 2.067,
+ "step": 89
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 4.254305231718579e-05,
+ "loss": 1.9909,
+ "step": 90
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 4.205631057004987e-05,
+ "loss": 1.9436,
+ "step": 91
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 4.156721094627495e-05,
+ "loss": 2.0643,
+ "step": 92
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 4.107587412387462e-05,
+ "loss": 2.1244,
+ "step": 93
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 4.058242133285779e-05,
+ "loss": 1.9795,
+ "step": 94
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 4.008697432531693e-05,
+ "loss": 2.1568,
+ "step": 95
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 3.9589655345387633e-05,
+ "loss": 1.9829,
+ "step": 96
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 3.909058709908665e-05,
+ "loss": 2.031,
+ "step": 97
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 3.858989272403605e-05,
+ "loss": 2.2042,
+ "step": 98
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 3.8087695759080804e-05,
+ "loss": 1.9775,
+ "step": 99
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 3.75841201138075e-05,
+ "loss": 2.0274,
+ "step": 100
+ },
+ {
+ "epoch": 0.95,
+ "eval_loss": 2.0003483295440674,
+ "eval_runtime": 2.8075,
+ "eval_samples_per_second": 3.562,
+ "eval_steps_per_second": 0.712,
+ "step": 100
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 3.707929003797143e-05,
+ "loss": 2.0745,
+ "step": 101
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 3.6573330090839886e-05,
+ "loss": 1.9933,
+ "step": 102
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 3.6066365110458966e-05,
+ "loss": 2.127,
+ "step": 103
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 3.555852018285172e-05,
+ "loss": 2.0499,
+ "step": 104
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 3.504992061115496e-05,
+ "loss": 1.9391,
+ "step": 105
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 3.4540691884702686e-05,
+ "loss": 2.0216,
+ "step": 106
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 3.4030959648063384e-05,
+ "loss": 2.0055,
+ "step": 107
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 3.3520849670039165e-05,
+ "loss": 2.0721,
+ "step": 108
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 3.301048781263417e-05,
+ "loss": 2.0274,
+ "step": 109
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 3.25e-05,
+ "loss": 1.9373,
+ "step": 110
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 3.198951218736583e-05,
+ "loss": 1.9034,
+ "step": 111
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 3.147915032996083e-05,
+ "loss": 1.8759,
+ "step": 112
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 3.096904035193661e-05,
+ "loss": 2.0572,
+ "step": 113
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 3.0459308115297312e-05,
+ "loss": 2.0072,
+ "step": 114
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 2.9950079388845042e-05,
+ "loss": 1.9129,
+ "step": 115
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 2.944147981714828e-05,
+ "loss": 2.0966,
+ "step": 116
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 2.8933634889541028e-05,
+ "loss": 2.0903,
+ "step": 117
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 2.8426669909160105e-05,
+ "loss": 2.0917,
+ "step": 118
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 2.7920709962028562e-05,
+ "loss": 2.089,
+ "step": 119
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 2.741587988619249e-05,
+ "loss": 1.9964,
+ "step": 120
+ },
+ {
+ "epoch": 1.14,
+ "eval_loss": 1.9988892078399658,
+ "eval_runtime": 2.8188,
+ "eval_samples_per_second": 3.548,
+ "eval_steps_per_second": 0.71,
+ "step": 120
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 2.6912304240919183e-05,
+ "loss": 1.9754,
+ "step": 121
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 2.641010727596394e-05,
+ "loss": 2.1508,
+ "step": 122
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 2.5909412900913353e-05,
+ "loss": 2.0331,
+ "step": 123
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 2.5410344654612374e-05,
+ "loss": 1.9607,
+ "step": 124
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 2.4913025674683083e-05,
+ "loss": 2.0356,
+ "step": 125
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 2.4417578667142222e-05,
+ "loss": 2.0417,
+ "step": 126
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 2.3924125876125387e-05,
+ "loss": 1.9246,
+ "step": 127
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 2.343278905372505e-05,
+ "loss": 2.0911,
+ "step": 128
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 2.2943689429950124e-05,
+ "loss": 1.9324,
+ "step": 129
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 2.2456947682814208e-05,
+ "loss": 1.9987,
+ "step": 130
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 2.1972683908560147e-05,
+ "loss": 2.0121,
+ "step": 131
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 2.149101759202803e-05,
+ "loss": 2.0002,
+ "step": 132
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 2.1012067577174147e-05,
+ "loss": 2.0464,
+ "step": 133
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 2.0535952037747966e-05,
+ "loss": 2.1422,
+ "step": 134
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 2.0062788448134583e-05,
+ "loss": 1.9796,
+ "step": 135
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 1.959269355436963e-05,
+ "loss": 1.9683,
+ "step": 136
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 1.9125783345333963e-05,
+ "loss": 2.1488,
+ "step": 137
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 1.866217302413514e-05,
+ "loss": 1.9852,
+ "step": 138
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 1.8201976979682756e-05,
+ "loss": 2.0087,
+ "step": 139
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 1.774530875846473e-05,
+ "loss": 1.9724,
+ "step": 140
+ },
+ {
+ "epoch": 1.33,
+ "eval_loss": 1.9960596561431885,
+ "eval_runtime": 2.8086,
+ "eval_samples_per_second": 3.56,
+ "eval_steps_per_second": 0.712,
+ "step": 140
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 1.7292281036531367e-05,
+ "loss": 2.0022,
+ "step": 141
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 1.6843005591694247e-05,
+ "loss": 2.0827,
+ "step": 142
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 1.639759327594675e-05,
+ "loss": 2.0052,
+ "step": 143
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 1.595615398811293e-05,
+ "loss": 1.9938,
+ "step": 144
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 1.551879664673166e-05,
+ "loss": 1.996,
+ "step": 145
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 1.5085629163182601e-05,
+ "loss": 2.0065,
+ "step": 146
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 1.4656758415060712e-05,
+ "loss": 2.0048,
+ "step": 147
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 1.4232290219805762e-05,
+ "loss": 1.8846,
+ "step": 148
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 1.3812329308593457e-05,
+ "loss": 1.9582,
+ "step": 149
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 1.3396979300494626e-05,
+ "loss": 2.0509,
+ "step": 150
+ }
+ ],
+ "logging_steps": 1,
+ "max_steps": 210,
+ "num_train_epochs": 2,
+ "save_steps": 50,
+ "total_flos": 1.2206169951043584e+17,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/prompt-a/onechar/checkpoint-150/training_args.bin b/prompt-a/onechar/checkpoint-150/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f6a9833d8c69d00318139a8fc6e1126e7d7ac62f
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-150/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8ff85b6b2e99bcdbf030f4c883beac4553b8cad32f31f2d286263119b723c469
+size 4411
diff --git a/prompt-a/onechar/checkpoint-200/README.md b/prompt-a/onechar/checkpoint-200/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-200/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.6.0.dev0
diff --git a/prompt-a/onechar/checkpoint-200/adapter_config.json b/prompt-a/onechar/checkpoint-200/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..57ccb0546533f3c17ba88615f0c0d4f7c0860f9e
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-200/adapter_config.json
@@ -0,0 +1,26 @@
+{
+ "auto_mapping": null,
+ "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
+ "bias": "none",
+ "fan_in_fan_out": null,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "lora_alpha": 16,
+ "lora_dropout": 0.01,
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 8,
+ "revision": null,
+ "target_modules": [
+ "o_proj",
+ "q_proj",
+ "k_proj",
+ "down_proj",
+ "up_proj",
+ "gate_proj",
+ "v_proj"
+ ],
+ "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/prompt-a/onechar/checkpoint-200/adapter_model.bin b/prompt-a/onechar/checkpoint-200/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a0c25d83f98c218fd56f87d85148d5cf35374ec3
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-200/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0aeeda740ab6bb8ee51e6b7ecd7a2848397fd35d1b4db657df6d8fa4dac2f048
+size 125374989
diff --git a/prompt-a/onechar/checkpoint-200/adapter_model/README.md b/prompt-a/onechar/checkpoint-200/adapter_model/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-200/adapter_model/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.6.0.dev0
diff --git a/prompt-a/onechar/checkpoint-200/adapter_model/adapter_config.json b/prompt-a/onechar/checkpoint-200/adapter_model/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..57ccb0546533f3c17ba88615f0c0d4f7c0860f9e
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-200/adapter_model/adapter_config.json
@@ -0,0 +1,26 @@
+{
+ "auto_mapping": null,
+ "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
+ "bias": "none",
+ "fan_in_fan_out": null,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "lora_alpha": 16,
+ "lora_dropout": 0.01,
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 8,
+ "revision": null,
+ "target_modules": [
+ "o_proj",
+ "q_proj",
+ "k_proj",
+ "down_proj",
+ "up_proj",
+ "gate_proj",
+ "v_proj"
+ ],
+ "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/prompt-a/onechar/checkpoint-200/adapter_model/adapter_model.bin b/prompt-a/onechar/checkpoint-200/adapter_model/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a0c25d83f98c218fd56f87d85148d5cf35374ec3
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-200/adapter_model/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0aeeda740ab6bb8ee51e6b7ecd7a2848397fd35d1b4db657df6d8fa4dac2f048
+size 125374989
diff --git a/prompt-a/onechar/checkpoint-200/optimizer.pt b/prompt-a/onechar/checkpoint-200/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c60174b7c83a2c0e7fc2090096f3b87ffcd4656f
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-200/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:696142d361db2f7734fc31c617f47091a5690b7733ed8c4a21b5b59a2ee213a8
+size 250681597
diff --git a/prompt-a/onechar/checkpoint-200/rng_state_0.pth b/prompt-a/onechar/checkpoint-200/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..da50e71000e6c1d1a4d7832253866f3044b66e7d
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-200/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:02519202233b2f973159654f70ae8d8d74816c44a0c980e2ec2bc67267e89056
+size 21687
diff --git a/prompt-a/onechar/checkpoint-200/rng_state_1.pth b/prompt-a/onechar/checkpoint-200/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ee7cfdbe994f9b04e1c752162cd3e359411fad28
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-200/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a7022483dc2b7110805d232d89f98606595b39d55b0309c029f39838461bfa05
+size 21687
diff --git a/prompt-a/onechar/checkpoint-200/rng_state_2.pth b/prompt-a/onechar/checkpoint-200/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..56091f7ff7f2320b878a237a667fdba0f8dcd55f
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-200/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fbee7570d88f852abb0c601b61e086fab06332833e7e65aa5e6cac17720c5a40
+size 21687
diff --git a/prompt-a/onechar/checkpoint-200/rng_state_3.pth b/prompt-a/onechar/checkpoint-200/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0307838519b9ac446131ad02102aefe833d22d3d
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-200/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4322ba034422871591d2ff0c9e49f1a15c37d9e090b2554c996dfb31f3b0c71d
+size 21687
diff --git a/prompt-a/onechar/checkpoint-200/rng_state_4.pth b/prompt-a/onechar/checkpoint-200/rng_state_4.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2cf045240097b80d2a27a13f154ee5a4972751df
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-200/rng_state_4.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6c7ff253aeb6baa0280c93c9623117398efc12932c877d511ea6a61c4dd3478d
+size 21687
diff --git a/prompt-a/onechar/checkpoint-200/rng_state_5.pth b/prompt-a/onechar/checkpoint-200/rng_state_5.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ef5dae1f5582adbd029a6fc88fd6f98d159052a4
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-200/rng_state_5.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9057a2b942467901cae269a7b56adc9305f3acecaaedd936c935eb0ca0f0c35b
+size 21687
diff --git a/prompt-a/onechar/checkpoint-200/rng_state_6.pth b/prompt-a/onechar/checkpoint-200/rng_state_6.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a4cacbeda939117b3d98b8c588543ea6e86caaeb
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-200/rng_state_6.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eecbe567bf4fcdbe022499d24c3a234ab459267ae14e5a586b3d94ba684f0166
+size 21687
diff --git a/prompt-a/onechar/checkpoint-200/rng_state_7.pth b/prompt-a/onechar/checkpoint-200/rng_state_7.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6c7f151ca1dbc055be461ae2c2de74d8541f6947
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-200/rng_state_7.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e6642c45077b1e8796a45aeec8e8477096bc2c474d725637e5efa8f6b04382e
+size 21687
diff --git a/prompt-a/onechar/checkpoint-200/scheduler.pt b/prompt-a/onechar/checkpoint-200/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b81124515ec46fd0d4546de54a49624b0ef04afe
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-200/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0e64216f3a6fcd04f261085b97281e9e1c4d72c568c3b04b427986c8bef072d8
+size 627
diff --git a/prompt-a/onechar/checkpoint-200/trainer_state.json b/prompt-a/onechar/checkpoint-200/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..c476ece0f529f280a41e965fa8f39533b74c14ab
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-200/trainer_state.json
@@ -0,0 +1,1299 @@
+{
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 1.9047619047619047,
+ "eval_steps": 20,
+ "global_step": 200,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.01,
+ "learning_rate": 6.5e-06,
+ "loss": 2.1535,
+ "step": 1
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 1.3e-05,
+ "loss": 2.1328,
+ "step": 2
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 1.9499999999999996e-05,
+ "loss": 2.0918,
+ "step": 3
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 2.6e-05,
+ "loss": 2.1194,
+ "step": 4
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 3.25e-05,
+ "loss": 2.2342,
+ "step": 5
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 3.899999999999999e-05,
+ "loss": 2.1168,
+ "step": 6
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 4.5499999999999995e-05,
+ "loss": 2.1761,
+ "step": 7
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 5.2e-05,
+ "loss": 2.0764,
+ "step": 8
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 5.85e-05,
+ "loss": 2.1711,
+ "step": 9
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 6.5e-05,
+ "loss": 2.0885,
+ "step": 10
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 6.499599055565396e-05,
+ "loss": 2.0196,
+ "step": 11
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 6.498396321188627e-05,
+ "loss": 2.1799,
+ "step": 12
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 6.496392093626402e-05,
+ "loss": 2.1322,
+ "step": 13
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 6.493586867391882e-05,
+ "loss": 2.1486,
+ "step": 14
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 6.489981334632665e-05,
+ "loss": 2.1351,
+ "step": 15
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 6.48557638496001e-05,
+ "loss": 2.1366,
+ "step": 16
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 6.480373105229334e-05,
+ "loss": 2.0727,
+ "step": 17
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 6.474372779272053e-05,
+ "loss": 2.1123,
+ "step": 18
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 6.467576887578811e-05,
+ "loss": 2.0652,
+ "step": 19
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 6.459987106934198e-05,
+ "loss": 2.0008,
+ "step": 20
+ },
+ {
+ "epoch": 0.19,
+ "eval_loss": 2.0444116592407227,
+ "eval_runtime": 2.798,
+ "eval_samples_per_second": 3.574,
+ "eval_steps_per_second": 0.715,
+ "step": 20
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 6.451605310003014e-05,
+ "loss": 2.0769,
+ "step": 21
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 6.442433564868238e-05,
+ "loss": 1.9949,
+ "step": 22
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 6.432474134520738e-05,
+ "loss": 2.1341,
+ "step": 23
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 6.421729476300929e-05,
+ "loss": 2.1433,
+ "step": 24
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 6.410202241292448e-05,
+ "loss": 2.0168,
+ "step": 25
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 6.39789527366805e-05,
+ "loss": 2.0403,
+ "step": 26
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 6.384811609987843e-05,
+ "loss": 2.1983,
+ "step": 27
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 6.370954478450064e-05,
+ "loss": 2.0625,
+ "step": 28
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 6.356327298094572e-05,
+ "loss": 2.204,
+ "step": 29
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 6.340933677959248e-05,
+ "loss": 2.118,
+ "step": 30
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 6.324777416189522e-05,
+ "loss": 2.0997,
+ "step": 31
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 6.307862499101232e-05,
+ "loss": 2.1154,
+ "step": 32
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 6.290193100197069e-05,
+ "loss": 2.0845,
+ "step": 33
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 6.271773579136816e-05,
+ "loss": 2.0052,
+ "step": 34
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 6.252608480661681e-05,
+ "loss": 1.956,
+ "step": 35
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 6.232702533472938e-05,
+ "loss": 2.0958,
+ "step": 36
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 6.212060649065197e-05,
+ "loss": 2.0634,
+ "step": 37
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 6.190687920514563e-05,
+ "loss": 2.0033,
+ "step": 38
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 6.168589621222e-05,
+ "loss": 2.1426,
+ "step": 39
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 6.145771203612195e-05,
+ "loss": 1.9823,
+ "step": 40
+ },
+ {
+ "epoch": 0.38,
+ "eval_loss": 2.0226035118103027,
+ "eval_runtime": 2.8049,
+ "eval_samples_per_second": 3.565,
+ "eval_steps_per_second": 0.713,
+ "step": 40
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 6.122238297788254e-05,
+ "loss": 2.2016,
+ "step": 41
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 6.097996710142556e-05,
+ "loss": 2.1008,
+ "step": 42
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 6.0730524219241214e-05,
+ "loss": 2.0821,
+ "step": 43
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 6.047411587762816e-05,
+ "loss": 2.092,
+ "step": 44
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 6.0210805341507994e-05,
+ "loss": 1.9617,
+ "step": 45
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 5.994065757881549e-05,
+ "loss": 1.9358,
+ "step": 46
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 5.966373924446878e-05,
+ "loss": 2.0573,
+ "step": 47
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 5.938011866392325e-05,
+ "loss": 2.0731,
+ "step": 48
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 5.9089865816313253e-05,
+ "loss": 2.2842,
+ "step": 49
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 5.8793052317185785e-05,
+ "loss": 2.2145,
+ "step": 50
+ },
+ {
+ "epoch": 0.49,
+ "learning_rate": 5.848975140083044e-05,
+ "loss": 1.9923,
+ "step": 51
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 5.818003790220993e-05,
+ "loss": 2.1065,
+ "step": 52
+ },
+ {
+ "epoch": 0.5,
+ "learning_rate": 5.786398823849571e-05,
+ "loss": 2.0009,
+ "step": 53
+ },
+ {
+ "epoch": 0.51,
+ "learning_rate": 5.754168039021315e-05,
+ "loss": 1.9785,
+ "step": 54
+ },
+ {
+ "epoch": 0.52,
+ "learning_rate": 5.7213193882001006e-05,
+ "loss": 1.9504,
+ "step": 55
+ },
+ {
+ "epoch": 0.53,
+ "learning_rate": 5.687860976298993e-05,
+ "loss": 2.1977,
+ "step": 56
+ },
+ {
+ "epoch": 0.54,
+ "learning_rate": 5.6538010586804816e-05,
+ "loss": 2.0423,
+ "step": 57
+ },
+ {
+ "epoch": 0.55,
+ "learning_rate": 5.619148039119587e-05,
+ "loss": 2.0536,
+ "step": 58
+ },
+ {
+ "epoch": 0.56,
+ "learning_rate": 5.5839104677303635e-05,
+ "loss": 2.0286,
+ "step": 59
+ },
+ {
+ "epoch": 0.57,
+ "learning_rate": 5.548097038856279e-05,
+ "loss": 2.1143,
+ "step": 60
+ },
+ {
+ "epoch": 0.57,
+ "eval_loss": 2.0128190517425537,
+ "eval_runtime": 2.8105,
+ "eval_samples_per_second": 3.558,
+ "eval_steps_per_second": 0.712,
+ "step": 60
+ },
+ {
+ "epoch": 0.58,
+ "learning_rate": 5.511716588925021e-05,
+ "loss": 2.1397,
+ "step": 61
+ },
+ {
+ "epoch": 0.59,
+ "learning_rate": 5.474778094268238e-05,
+ "loss": 2.0601,
+ "step": 62
+ },
+ {
+ "epoch": 0.6,
+ "learning_rate": 5.437290668906763e-05,
+ "loss": 2.1707,
+ "step": 63
+ },
+ {
+ "epoch": 0.61,
+ "learning_rate": 5.399263562301868e-05,
+ "loss": 2.0649,
+ "step": 64
+ },
+ {
+ "epoch": 0.62,
+ "learning_rate": 5.360706157073096e-05,
+ "loss": 2.0314,
+ "step": 65
+ },
+ {
+ "epoch": 0.63,
+ "learning_rate": 5.321627966683241e-05,
+ "loss": 2.1277,
+ "step": 66
+ },
+ {
+ "epoch": 0.64,
+ "learning_rate": 5.2820386330910424e-05,
+ "loss": 2.0534,
+ "step": 67
+ },
+ {
+ "epoch": 0.65,
+ "learning_rate": 5.2419479243721734e-05,
+ "loss": 1.9976,
+ "step": 68
+ },
+ {
+ "epoch": 0.66,
+ "learning_rate": 5.2013657323091226e-05,
+ "loss": 2.0262,
+ "step": 69
+ },
+ {
+ "epoch": 0.67,
+ "learning_rate": 5.160302069950537e-05,
+ "loss": 2.0093,
+ "step": 70
+ },
+ {
+ "epoch": 0.68,
+ "learning_rate": 5.118767069140655e-05,
+ "loss": 2.1088,
+ "step": 71
+ },
+ {
+ "epoch": 0.69,
+ "learning_rate": 5.076770978019424e-05,
+ "loss": 1.9841,
+ "step": 72
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 5.034324158493928e-05,
+ "loss": 2.0081,
+ "step": 73
+ },
+ {
+ "epoch": 0.7,
+ "learning_rate": 4.991437083681739e-05,
+ "loss": 2.0179,
+ "step": 74
+ },
+ {
+ "epoch": 0.71,
+ "learning_rate": 4.948120335326833e-05,
+ "loss": 2.1304,
+ "step": 75
+ },
+ {
+ "epoch": 0.72,
+ "learning_rate": 4.904384601188706e-05,
+ "loss": 1.9213,
+ "step": 76
+ },
+ {
+ "epoch": 0.73,
+ "learning_rate": 4.860240672405324e-05,
+ "loss": 2.2153,
+ "step": 77
+ },
+ {
+ "epoch": 0.74,
+ "learning_rate": 4.815699440830574e-05,
+ "loss": 1.9365,
+ "step": 78
+ },
+ {
+ "epoch": 0.75,
+ "learning_rate": 4.770771896346863e-05,
+ "loss": 2.021,
+ "step": 79
+ },
+ {
+ "epoch": 0.76,
+ "learning_rate": 4.725469124153526e-05,
+ "loss": 2.0327,
+ "step": 80
+ },
+ {
+ "epoch": 0.76,
+ "eval_loss": 2.0059409141540527,
+ "eval_runtime": 2.8077,
+ "eval_samples_per_second": 3.562,
+ "eval_steps_per_second": 0.712,
+ "step": 80
+ },
+ {
+ "epoch": 0.77,
+ "learning_rate": 4.679802302031724e-05,
+ "loss": 2.1576,
+ "step": 81
+ },
+ {
+ "epoch": 0.78,
+ "learning_rate": 4.6337826975864856e-05,
+ "loss": 1.9468,
+ "step": 82
+ },
+ {
+ "epoch": 0.79,
+ "learning_rate": 4.587421665466603e-05,
+ "loss": 2.0795,
+ "step": 83
+ },
+ {
+ "epoch": 0.8,
+ "learning_rate": 4.540730644563036e-05,
+ "loss": 2.0566,
+ "step": 84
+ },
+ {
+ "epoch": 0.81,
+ "learning_rate": 4.4937211551865415e-05,
+ "loss": 2.0819,
+ "step": 85
+ },
+ {
+ "epoch": 0.82,
+ "learning_rate": 4.4464047962252035e-05,
+ "loss": 2.0542,
+ "step": 86
+ },
+ {
+ "epoch": 0.83,
+ "learning_rate": 4.398793242282586e-05,
+ "loss": 2.0006,
+ "step": 87
+ },
+ {
+ "epoch": 0.84,
+ "learning_rate": 4.350898240797197e-05,
+ "loss": 1.9953,
+ "step": 88
+ },
+ {
+ "epoch": 0.85,
+ "learning_rate": 4.3027316091439854e-05,
+ "loss": 2.067,
+ "step": 89
+ },
+ {
+ "epoch": 0.86,
+ "learning_rate": 4.254305231718579e-05,
+ "loss": 1.9909,
+ "step": 90
+ },
+ {
+ "epoch": 0.87,
+ "learning_rate": 4.205631057004987e-05,
+ "loss": 1.9436,
+ "step": 91
+ },
+ {
+ "epoch": 0.88,
+ "learning_rate": 4.156721094627495e-05,
+ "loss": 2.0643,
+ "step": 92
+ },
+ {
+ "epoch": 0.89,
+ "learning_rate": 4.107587412387462e-05,
+ "loss": 2.1244,
+ "step": 93
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 4.058242133285779e-05,
+ "loss": 1.9795,
+ "step": 94
+ },
+ {
+ "epoch": 0.9,
+ "learning_rate": 4.008697432531693e-05,
+ "loss": 2.1568,
+ "step": 95
+ },
+ {
+ "epoch": 0.91,
+ "learning_rate": 3.9589655345387633e-05,
+ "loss": 1.9829,
+ "step": 96
+ },
+ {
+ "epoch": 0.92,
+ "learning_rate": 3.909058709908665e-05,
+ "loss": 2.031,
+ "step": 97
+ },
+ {
+ "epoch": 0.93,
+ "learning_rate": 3.858989272403605e-05,
+ "loss": 2.2042,
+ "step": 98
+ },
+ {
+ "epoch": 0.94,
+ "learning_rate": 3.8087695759080804e-05,
+ "loss": 1.9775,
+ "step": 99
+ },
+ {
+ "epoch": 0.95,
+ "learning_rate": 3.75841201138075e-05,
+ "loss": 2.0274,
+ "step": 100
+ },
+ {
+ "epoch": 0.95,
+ "eval_loss": 2.0003483295440674,
+ "eval_runtime": 2.8075,
+ "eval_samples_per_second": 3.562,
+ "eval_steps_per_second": 0.712,
+ "step": 100
+ },
+ {
+ "epoch": 0.96,
+ "learning_rate": 3.707929003797143e-05,
+ "loss": 2.0745,
+ "step": 101
+ },
+ {
+ "epoch": 0.97,
+ "learning_rate": 3.6573330090839886e-05,
+ "loss": 1.9933,
+ "step": 102
+ },
+ {
+ "epoch": 0.98,
+ "learning_rate": 3.6066365110458966e-05,
+ "loss": 2.127,
+ "step": 103
+ },
+ {
+ "epoch": 0.99,
+ "learning_rate": 3.555852018285172e-05,
+ "loss": 2.0499,
+ "step": 104
+ },
+ {
+ "epoch": 1.0,
+ "learning_rate": 3.504992061115496e-05,
+ "loss": 1.9391,
+ "step": 105
+ },
+ {
+ "epoch": 1.01,
+ "learning_rate": 3.4540691884702686e-05,
+ "loss": 2.0216,
+ "step": 106
+ },
+ {
+ "epoch": 1.02,
+ "learning_rate": 3.4030959648063384e-05,
+ "loss": 2.0055,
+ "step": 107
+ },
+ {
+ "epoch": 1.03,
+ "learning_rate": 3.3520849670039165e-05,
+ "loss": 2.0721,
+ "step": 108
+ },
+ {
+ "epoch": 1.04,
+ "learning_rate": 3.301048781263417e-05,
+ "loss": 2.0274,
+ "step": 109
+ },
+ {
+ "epoch": 1.05,
+ "learning_rate": 3.25e-05,
+ "loss": 1.9373,
+ "step": 110
+ },
+ {
+ "epoch": 1.06,
+ "learning_rate": 3.198951218736583e-05,
+ "loss": 1.9034,
+ "step": 111
+ },
+ {
+ "epoch": 1.07,
+ "learning_rate": 3.147915032996083e-05,
+ "loss": 1.8759,
+ "step": 112
+ },
+ {
+ "epoch": 1.08,
+ "learning_rate": 3.096904035193661e-05,
+ "loss": 2.0572,
+ "step": 113
+ },
+ {
+ "epoch": 1.09,
+ "learning_rate": 3.0459308115297312e-05,
+ "loss": 2.0072,
+ "step": 114
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 2.9950079388845042e-05,
+ "loss": 1.9129,
+ "step": 115
+ },
+ {
+ "epoch": 1.1,
+ "learning_rate": 2.944147981714828e-05,
+ "loss": 2.0966,
+ "step": 116
+ },
+ {
+ "epoch": 1.11,
+ "learning_rate": 2.8933634889541028e-05,
+ "loss": 2.0903,
+ "step": 117
+ },
+ {
+ "epoch": 1.12,
+ "learning_rate": 2.8426669909160105e-05,
+ "loss": 2.0917,
+ "step": 118
+ },
+ {
+ "epoch": 1.13,
+ "learning_rate": 2.7920709962028562e-05,
+ "loss": 2.089,
+ "step": 119
+ },
+ {
+ "epoch": 1.14,
+ "learning_rate": 2.741587988619249e-05,
+ "loss": 1.9964,
+ "step": 120
+ },
+ {
+ "epoch": 1.14,
+ "eval_loss": 1.9988892078399658,
+ "eval_runtime": 2.8188,
+ "eval_samples_per_second": 3.548,
+ "eval_steps_per_second": 0.71,
+ "step": 120
+ },
+ {
+ "epoch": 1.15,
+ "learning_rate": 2.6912304240919183e-05,
+ "loss": 1.9754,
+ "step": 121
+ },
+ {
+ "epoch": 1.16,
+ "learning_rate": 2.641010727596394e-05,
+ "loss": 2.1508,
+ "step": 122
+ },
+ {
+ "epoch": 1.17,
+ "learning_rate": 2.5909412900913353e-05,
+ "loss": 2.0331,
+ "step": 123
+ },
+ {
+ "epoch": 1.18,
+ "learning_rate": 2.5410344654612374e-05,
+ "loss": 1.9607,
+ "step": 124
+ },
+ {
+ "epoch": 1.19,
+ "learning_rate": 2.4913025674683083e-05,
+ "loss": 2.0356,
+ "step": 125
+ },
+ {
+ "epoch": 1.2,
+ "learning_rate": 2.4417578667142222e-05,
+ "loss": 2.0417,
+ "step": 126
+ },
+ {
+ "epoch": 1.21,
+ "learning_rate": 2.3924125876125387e-05,
+ "loss": 1.9246,
+ "step": 127
+ },
+ {
+ "epoch": 1.22,
+ "learning_rate": 2.343278905372505e-05,
+ "loss": 2.0911,
+ "step": 128
+ },
+ {
+ "epoch": 1.23,
+ "learning_rate": 2.2943689429950124e-05,
+ "loss": 1.9324,
+ "step": 129
+ },
+ {
+ "epoch": 1.24,
+ "learning_rate": 2.2456947682814208e-05,
+ "loss": 1.9987,
+ "step": 130
+ },
+ {
+ "epoch": 1.25,
+ "learning_rate": 2.1972683908560147e-05,
+ "loss": 2.0121,
+ "step": 131
+ },
+ {
+ "epoch": 1.26,
+ "learning_rate": 2.149101759202803e-05,
+ "loss": 2.0002,
+ "step": 132
+ },
+ {
+ "epoch": 1.27,
+ "learning_rate": 2.1012067577174147e-05,
+ "loss": 2.0464,
+ "step": 133
+ },
+ {
+ "epoch": 1.28,
+ "learning_rate": 2.0535952037747966e-05,
+ "loss": 2.1422,
+ "step": 134
+ },
+ {
+ "epoch": 1.29,
+ "learning_rate": 2.0062788448134583e-05,
+ "loss": 1.9796,
+ "step": 135
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 1.959269355436963e-05,
+ "loss": 1.9683,
+ "step": 136
+ },
+ {
+ "epoch": 1.3,
+ "learning_rate": 1.9125783345333963e-05,
+ "loss": 2.1488,
+ "step": 137
+ },
+ {
+ "epoch": 1.31,
+ "learning_rate": 1.866217302413514e-05,
+ "loss": 1.9852,
+ "step": 138
+ },
+ {
+ "epoch": 1.32,
+ "learning_rate": 1.8201976979682756e-05,
+ "loss": 2.0087,
+ "step": 139
+ },
+ {
+ "epoch": 1.33,
+ "learning_rate": 1.774530875846473e-05,
+ "loss": 1.9724,
+ "step": 140
+ },
+ {
+ "epoch": 1.33,
+ "eval_loss": 1.9960596561431885,
+ "eval_runtime": 2.8086,
+ "eval_samples_per_second": 3.56,
+ "eval_steps_per_second": 0.712,
+ "step": 140
+ },
+ {
+ "epoch": 1.34,
+ "learning_rate": 1.7292281036531367e-05,
+ "loss": 2.0022,
+ "step": 141
+ },
+ {
+ "epoch": 1.35,
+ "learning_rate": 1.6843005591694247e-05,
+ "loss": 2.0827,
+ "step": 142
+ },
+ {
+ "epoch": 1.36,
+ "learning_rate": 1.639759327594675e-05,
+ "loss": 2.0052,
+ "step": 143
+ },
+ {
+ "epoch": 1.37,
+ "learning_rate": 1.595615398811293e-05,
+ "loss": 1.9938,
+ "step": 144
+ },
+ {
+ "epoch": 1.38,
+ "learning_rate": 1.551879664673166e-05,
+ "loss": 1.996,
+ "step": 145
+ },
+ {
+ "epoch": 1.39,
+ "learning_rate": 1.5085629163182601e-05,
+ "loss": 2.0065,
+ "step": 146
+ },
+ {
+ "epoch": 1.4,
+ "learning_rate": 1.4656758415060712e-05,
+ "loss": 2.0048,
+ "step": 147
+ },
+ {
+ "epoch": 1.41,
+ "learning_rate": 1.4232290219805762e-05,
+ "loss": 1.8846,
+ "step": 148
+ },
+ {
+ "epoch": 1.42,
+ "learning_rate": 1.3812329308593457e-05,
+ "loss": 1.9582,
+ "step": 149
+ },
+ {
+ "epoch": 1.43,
+ "learning_rate": 1.3396979300494626e-05,
+ "loss": 2.0509,
+ "step": 150
+ },
+ {
+ "epoch": 1.44,
+ "learning_rate": 1.2986342676908771e-05,
+ "loss": 2.0542,
+ "step": 151
+ },
+ {
+ "epoch": 1.45,
+ "learning_rate": 1.258052075627827e-05,
+ "loss": 2.0184,
+ "step": 152
+ },
+ {
+ "epoch": 1.46,
+ "learning_rate": 1.217961366908959e-05,
+ "loss": 1.9188,
+ "step": 153
+ },
+ {
+ "epoch": 1.47,
+ "learning_rate": 1.1783720333167582e-05,
+ "loss": 2.0601,
+ "step": 154
+ },
+ {
+ "epoch": 1.48,
+ "learning_rate": 1.1392938429269033e-05,
+ "loss": 1.9857,
+ "step": 155
+ },
+ {
+ "epoch": 1.49,
+ "learning_rate": 1.1007364376981318e-05,
+ "loss": 2.053,
+ "step": 156
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 1.0627093310932374e-05,
+ "loss": 2.1052,
+ "step": 157
+ },
+ {
+ "epoch": 1.5,
+ "learning_rate": 1.0252219057317615e-05,
+ "loss": 1.9195,
+ "step": 158
+ },
+ {
+ "epoch": 1.51,
+ "learning_rate": 9.882834110749784e-06,
+ "loss": 1.9893,
+ "step": 159
+ },
+ {
+ "epoch": 1.52,
+ "learning_rate": 9.519029611437207e-06,
+ "loss": 2.178,
+ "step": 160
+ },
+ {
+ "epoch": 1.52,
+ "eval_loss": 1.9954354763031006,
+ "eval_runtime": 2.8093,
+ "eval_samples_per_second": 3.56,
+ "eval_steps_per_second": 0.712,
+ "step": 160
+ },
+ {
+ "epoch": 1.53,
+ "learning_rate": 9.160895322696367e-06,
+ "loss": 2.138,
+ "step": 161
+ },
+ {
+ "epoch": 1.54,
+ "learning_rate": 8.80851960880413e-06,
+ "loss": 2.0366,
+ "step": 162
+ },
+ {
+ "epoch": 1.55,
+ "learning_rate": 8.46198941319518e-06,
+ "loss": 1.9642,
+ "step": 163
+ },
+ {
+ "epoch": 1.56,
+ "learning_rate": 8.121390237010062e-06,
+ "loss": 1.9771,
+ "step": 164
+ },
+ {
+ "epoch": 1.57,
+ "learning_rate": 7.786806117998994e-06,
+ "loss": 2.0494,
+ "step": 165
+ },
+ {
+ "epoch": 1.58,
+ "learning_rate": 7.458319609786853e-06,
+ "loss": 2.0378,
+ "step": 166
+ },
+ {
+ "epoch": 1.59,
+ "learning_rate": 7.136011761504281e-06,
+ "loss": 2.0146,
+ "step": 167
+ },
+ {
+ "epoch": 1.6,
+ "learning_rate": 6.819962097790061e-06,
+ "loss": 2.157,
+ "step": 168
+ },
+ {
+ "epoch": 1.61,
+ "learning_rate": 6.510248599169555e-06,
+ "loss": 2.0941,
+ "step": 169
+ },
+ {
+ "epoch": 1.62,
+ "learning_rate": 6.206947682814211e-06,
+ "loss": 1.9445,
+ "step": 170
+ },
+ {
+ "epoch": 1.63,
+ "learning_rate": 5.9101341836867315e-06,
+ "loss": 2.0506,
+ "step": 171
+ },
+ {
+ "epoch": 1.64,
+ "learning_rate": 5.619881336076736e-06,
+ "loss": 2.0635,
+ "step": 172
+ },
+ {
+ "epoch": 1.65,
+ "learning_rate": 5.336260755531223e-06,
+ "loss": 1.9494,
+ "step": 173
+ },
+ {
+ "epoch": 1.66,
+ "learning_rate": 5.059342421184516e-06,
+ "loss": 1.9675,
+ "step": 174
+ },
+ {
+ "epoch": 1.67,
+ "learning_rate": 4.789194658492004e-06,
+ "loss": 1.9892,
+ "step": 175
+ },
+ {
+ "epoch": 1.68,
+ "learning_rate": 4.525884122371835e-06,
+ "loss": 2.0223,
+ "step": 176
+ },
+ {
+ "epoch": 1.69,
+ "learning_rate": 4.269475780758789e-06,
+ "loss": 2.2695,
+ "step": 177
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 4.02003289857444e-06,
+ "loss": 2.0977,
+ "step": 178
+ },
+ {
+ "epoch": 1.7,
+ "learning_rate": 3.777617022117462e-06,
+ "loss": 1.9732,
+ "step": 179
+ },
+ {
+ "epoch": 1.71,
+ "learning_rate": 3.5422879638780466e-06,
+ "loss": 2.0136,
+ "step": 180
+ },
+ {
+ "epoch": 1.71,
+ "eval_loss": 1.9940338134765625,
+ "eval_runtime": 2.8151,
+ "eval_samples_per_second": 3.552,
+ "eval_steps_per_second": 0.71,
+ "step": 180
+ },
+ {
+ "epoch": 1.72,
+ "learning_rate": 3.3141037877799946e-06,
+ "loss": 2.0513,
+ "step": 181
+ },
+ {
+ "epoch": 1.73,
+ "learning_rate": 3.0931207948543707e-06,
+ "loss": 2.0869,
+ "step": 182
+ },
+ {
+ "epoch": 1.74,
+ "learning_rate": 2.8793935093480276e-06,
+ "loss": 2.0547,
+ "step": 183
+ },
+ {
+ "epoch": 1.75,
+ "learning_rate": 2.6729746652706126e-06,
+ "loss": 2.0177,
+ "step": 184
+ },
+ {
+ "epoch": 1.76,
+ "learning_rate": 2.473915193383181e-06,
+ "loss": 2.0071,
+ "step": 185
+ },
+ {
+ "epoch": 1.77,
+ "learning_rate": 2.282264208631831e-06,
+ "loss": 1.9302,
+ "step": 186
+ },
+ {
+ "epoch": 1.78,
+ "learning_rate": 2.0980689980293135e-06,
+ "loss": 2.0888,
+ "step": 187
+ },
+ {
+ "epoch": 1.79,
+ "learning_rate": 1.9213750089876725e-06,
+ "loss": 2.0167,
+ "step": 188
+ },
+ {
+ "epoch": 1.8,
+ "learning_rate": 1.7522258381047776e-06,
+ "loss": 2.0818,
+ "step": 189
+ },
+ {
+ "epoch": 1.81,
+ "learning_rate": 1.5906632204075102e-06,
+ "loss": 1.9876,
+ "step": 190
+ },
+ {
+ "epoch": 1.82,
+ "learning_rate": 1.436727019054271e-06,
+ "loss": 2.0801,
+ "step": 191
+ },
+ {
+ "epoch": 1.83,
+ "learning_rate": 1.2904552154993503e-06,
+ "loss": 2.0012,
+ "step": 192
+ },
+ {
+ "epoch": 1.84,
+ "learning_rate": 1.1518839001215623e-06,
+ "loss": 1.8813,
+ "step": 193
+ },
+ {
+ "epoch": 1.85,
+ "learning_rate": 1.02104726331949e-06,
+ "loss": 1.9551,
+ "step": 194
+ },
+ {
+ "epoch": 1.86,
+ "learning_rate": 8.979775870755119e-07,
+ "loss": 2.1161,
+ "step": 195
+ },
+ {
+ "epoch": 1.87,
+ "learning_rate": 7.827052369907083e-07,
+ "loss": 2.0543,
+ "step": 196
+ },
+ {
+ "epoch": 1.88,
+ "learning_rate": 6.752586547926095e-07,
+ "loss": 2.2174,
+ "step": 197
+ },
+ {
+ "epoch": 1.89,
+ "learning_rate": 5.756643513176201e-07,
+ "loss": 2.0583,
+ "step": 198
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 4.839468999698492e-07,
+ "loss": 2.1223,
+ "step": 199
+ },
+ {
+ "epoch": 1.9,
+ "learning_rate": 4.00128930658026e-07,
+ "loss": 1.9051,
+ "step": 200
+ },
+ {
+ "epoch": 1.9,
+ "eval_loss": 1.994101881980896,
+ "eval_runtime": 2.8081,
+ "eval_samples_per_second": 3.561,
+ "eval_steps_per_second": 0.712,
+ "step": 200
+ }
+ ],
+ "logging_steps": 1,
+ "max_steps": 210,
+ "num_train_epochs": 2,
+ "save_steps": 50,
+ "total_flos": 1.6296572813416858e+17,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/prompt-a/onechar/checkpoint-200/training_args.bin b/prompt-a/onechar/checkpoint-200/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f6a9833d8c69d00318139a8fc6e1126e7d7ac62f
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-200/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8ff85b6b2e99bcdbf030f4c883beac4553b8cad32f31f2d286263119b723c469
+size 4411
diff --git a/prompt-a/onechar/checkpoint-50/README.md b/prompt-a/onechar/checkpoint-50/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-50/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.6.0.dev0
diff --git a/prompt-a/onechar/checkpoint-50/adapter_config.json b/prompt-a/onechar/checkpoint-50/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..57ccb0546533f3c17ba88615f0c0d4f7c0860f9e
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-50/adapter_config.json
@@ -0,0 +1,26 @@
+{
+ "auto_mapping": null,
+ "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
+ "bias": "none",
+ "fan_in_fan_out": null,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "lora_alpha": 16,
+ "lora_dropout": 0.01,
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 8,
+ "revision": null,
+ "target_modules": [
+ "o_proj",
+ "q_proj",
+ "k_proj",
+ "down_proj",
+ "up_proj",
+ "gate_proj",
+ "v_proj"
+ ],
+ "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/prompt-a/onechar/checkpoint-50/adapter_model.bin b/prompt-a/onechar/checkpoint-50/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3aca02acd8592afbcd83191047871316bbc87745
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-50/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:651e76d071000edf080354af6eaea8018c5f0bfb064c7d4bc9761cc218039b1d
+size 125374989
diff --git a/prompt-a/onechar/checkpoint-50/adapter_model/README.md b/prompt-a/onechar/checkpoint-50/adapter_model/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-50/adapter_model/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.6.0.dev0
diff --git a/prompt-a/onechar/checkpoint-50/adapter_model/adapter_config.json b/prompt-a/onechar/checkpoint-50/adapter_model/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..57ccb0546533f3c17ba88615f0c0d4f7c0860f9e
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-50/adapter_model/adapter_config.json
@@ -0,0 +1,26 @@
+{
+ "auto_mapping": null,
+ "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
+ "bias": "none",
+ "fan_in_fan_out": null,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "lora_alpha": 16,
+ "lora_dropout": 0.01,
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 8,
+ "revision": null,
+ "target_modules": [
+ "o_proj",
+ "q_proj",
+ "k_proj",
+ "down_proj",
+ "up_proj",
+ "gate_proj",
+ "v_proj"
+ ],
+ "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/prompt-a/onechar/checkpoint-50/adapter_model/adapter_model.bin b/prompt-a/onechar/checkpoint-50/adapter_model/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3aca02acd8592afbcd83191047871316bbc87745
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-50/adapter_model/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:651e76d071000edf080354af6eaea8018c5f0bfb064c7d4bc9761cc218039b1d
+size 125374989
diff --git a/prompt-a/onechar/checkpoint-50/optimizer.pt b/prompt-a/onechar/checkpoint-50/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f07700ec79cc78a41fda44cb8dff398ff97161fe
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-50/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c0a18365f569c60809b831c3392febacee180b7721e2d7b1cf4cb20e6f88b338
+size 250681597
diff --git a/prompt-a/onechar/checkpoint-50/rng_state_0.pth b/prompt-a/onechar/checkpoint-50/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9657e25b8e560ff5f327c8dc11c766083e1e5307
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-50/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed89856c64d26a78541a35ec221bc2b0be749157188567670657c394a266b758
+size 21687
diff --git a/prompt-a/onechar/checkpoint-50/rng_state_1.pth b/prompt-a/onechar/checkpoint-50/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..17628a4144b1543dd247eb43ef8f854dfd794dc4
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-50/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:820498a0470c610380dcc9c728864f0b01a35ed840d8891fb4254551faeec64b
+size 21687
diff --git a/prompt-a/onechar/checkpoint-50/rng_state_2.pth b/prompt-a/onechar/checkpoint-50/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..413eedd7ed80e5e7349f4991e5b3a857d00c0465
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-50/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6d665d7aece15556da73ea687f3341f188a352b9cec96326d64275d71bccb8a
+size 21687
diff --git a/prompt-a/onechar/checkpoint-50/rng_state_3.pth b/prompt-a/onechar/checkpoint-50/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..46d754b888ae7ac87e43d125ba7c3cf144c212a1
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-50/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6c3e7a9ae730eadfbb508e64610c31d6077c0215ca3becee01ebe5e807184933
+size 21687
diff --git a/prompt-a/onechar/checkpoint-50/rng_state_4.pth b/prompt-a/onechar/checkpoint-50/rng_state_4.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2f9140564c11dc24aed0031713090a7cda2e1b6b
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-50/rng_state_4.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a2ac6d81e0c93a656974dece1a97367d2f4eac41c40a26082f99352fe5f8755
+size 21687
diff --git a/prompt-a/onechar/checkpoint-50/rng_state_5.pth b/prompt-a/onechar/checkpoint-50/rng_state_5.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c2af78b776c7d5748ad4a413c6edbddefb70fa00
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-50/rng_state_5.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d69c5eee44666d590cc14929efb25ad70713ba9ab3fc44c2a73c778b88573d3
+size 21687
diff --git a/prompt-a/onechar/checkpoint-50/rng_state_6.pth b/prompt-a/onechar/checkpoint-50/rng_state_6.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d6bcd1ae0a9ebb65d683d1809b8f812933a28730
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-50/rng_state_6.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0ea24e03d2ca68e580c8f995bde7ff41c5238275b75912db4e6c66d3909b4910
+size 21687
diff --git a/prompt-a/onechar/checkpoint-50/rng_state_7.pth b/prompt-a/onechar/checkpoint-50/rng_state_7.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b42b22d6a68c5070be6a2c10ef3bfe64a477aa6d
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-50/rng_state_7.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc2e7e1fa600c6c6a79b3937b0656f80b525198bcde9f9b7fabc55d0a8b6e12f
+size 21687
diff --git a/prompt-a/onechar/checkpoint-50/scheduler.pt b/prompt-a/onechar/checkpoint-50/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..99b00e04b3c739592c28a943e82e4828c5722494
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-50/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5364a6b480c49bfcbbc6891f5b91141d4093514b963940d64fc410e4731b6c5e
+size 627
diff --git a/prompt-a/onechar/checkpoint-50/trainer_state.json b/prompt-a/onechar/checkpoint-50/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..a5af08db0c012d4cd296bbc43bc34d3f248885eb
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-50/trainer_state.json
@@ -0,0 +1,335 @@
+{
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 0.47619047619047616,
+ "eval_steps": 20,
+ "global_step": 50,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.01,
+ "learning_rate": 6.5e-06,
+ "loss": 2.1535,
+ "step": 1
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 1.3e-05,
+ "loss": 2.1328,
+ "step": 2
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 1.9499999999999996e-05,
+ "loss": 2.0918,
+ "step": 3
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 2.6e-05,
+ "loss": 2.1194,
+ "step": 4
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 3.25e-05,
+ "loss": 2.2342,
+ "step": 5
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 3.899999999999999e-05,
+ "loss": 2.1168,
+ "step": 6
+ },
+ {
+ "epoch": 0.07,
+ "learning_rate": 4.5499999999999995e-05,
+ "loss": 2.1761,
+ "step": 7
+ },
+ {
+ "epoch": 0.08,
+ "learning_rate": 5.2e-05,
+ "loss": 2.0764,
+ "step": 8
+ },
+ {
+ "epoch": 0.09,
+ "learning_rate": 5.85e-05,
+ "loss": 2.1711,
+ "step": 9
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 6.5e-05,
+ "loss": 2.0885,
+ "step": 10
+ },
+ {
+ "epoch": 0.1,
+ "learning_rate": 6.499599055565396e-05,
+ "loss": 2.0196,
+ "step": 11
+ },
+ {
+ "epoch": 0.11,
+ "learning_rate": 6.498396321188627e-05,
+ "loss": 2.1799,
+ "step": 12
+ },
+ {
+ "epoch": 0.12,
+ "learning_rate": 6.496392093626402e-05,
+ "loss": 2.1322,
+ "step": 13
+ },
+ {
+ "epoch": 0.13,
+ "learning_rate": 6.493586867391882e-05,
+ "loss": 2.1486,
+ "step": 14
+ },
+ {
+ "epoch": 0.14,
+ "learning_rate": 6.489981334632665e-05,
+ "loss": 2.1351,
+ "step": 15
+ },
+ {
+ "epoch": 0.15,
+ "learning_rate": 6.48557638496001e-05,
+ "loss": 2.1366,
+ "step": 16
+ },
+ {
+ "epoch": 0.16,
+ "learning_rate": 6.480373105229334e-05,
+ "loss": 2.0727,
+ "step": 17
+ },
+ {
+ "epoch": 0.17,
+ "learning_rate": 6.474372779272053e-05,
+ "loss": 2.1123,
+ "step": 18
+ },
+ {
+ "epoch": 0.18,
+ "learning_rate": 6.467576887578811e-05,
+ "loss": 2.0652,
+ "step": 19
+ },
+ {
+ "epoch": 0.19,
+ "learning_rate": 6.459987106934198e-05,
+ "loss": 2.0008,
+ "step": 20
+ },
+ {
+ "epoch": 0.19,
+ "eval_loss": 2.0444116592407227,
+ "eval_runtime": 2.798,
+ "eval_samples_per_second": 3.574,
+ "eval_steps_per_second": 0.715,
+ "step": 20
+ },
+ {
+ "epoch": 0.2,
+ "learning_rate": 6.451605310003014e-05,
+ "loss": 2.0769,
+ "step": 21
+ },
+ {
+ "epoch": 0.21,
+ "learning_rate": 6.442433564868238e-05,
+ "loss": 1.9949,
+ "step": 22
+ },
+ {
+ "epoch": 0.22,
+ "learning_rate": 6.432474134520738e-05,
+ "loss": 2.1341,
+ "step": 23
+ },
+ {
+ "epoch": 0.23,
+ "learning_rate": 6.421729476300929e-05,
+ "loss": 2.1433,
+ "step": 24
+ },
+ {
+ "epoch": 0.24,
+ "learning_rate": 6.410202241292448e-05,
+ "loss": 2.0168,
+ "step": 25
+ },
+ {
+ "epoch": 0.25,
+ "learning_rate": 6.39789527366805e-05,
+ "loss": 2.0403,
+ "step": 26
+ },
+ {
+ "epoch": 0.26,
+ "learning_rate": 6.384811609987843e-05,
+ "loss": 2.1983,
+ "step": 27
+ },
+ {
+ "epoch": 0.27,
+ "learning_rate": 6.370954478450064e-05,
+ "loss": 2.0625,
+ "step": 28
+ },
+ {
+ "epoch": 0.28,
+ "learning_rate": 6.356327298094572e-05,
+ "loss": 2.204,
+ "step": 29
+ },
+ {
+ "epoch": 0.29,
+ "learning_rate": 6.340933677959248e-05,
+ "loss": 2.118,
+ "step": 30
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 6.324777416189522e-05,
+ "loss": 2.0997,
+ "step": 31
+ },
+ {
+ "epoch": 0.3,
+ "learning_rate": 6.307862499101232e-05,
+ "loss": 2.1154,
+ "step": 32
+ },
+ {
+ "epoch": 0.31,
+ "learning_rate": 6.290193100197069e-05,
+ "loss": 2.0845,
+ "step": 33
+ },
+ {
+ "epoch": 0.32,
+ "learning_rate": 6.271773579136816e-05,
+ "loss": 2.0052,
+ "step": 34
+ },
+ {
+ "epoch": 0.33,
+ "learning_rate": 6.252608480661681e-05,
+ "loss": 1.956,
+ "step": 35
+ },
+ {
+ "epoch": 0.34,
+ "learning_rate": 6.232702533472938e-05,
+ "loss": 2.0958,
+ "step": 36
+ },
+ {
+ "epoch": 0.35,
+ "learning_rate": 6.212060649065197e-05,
+ "loss": 2.0634,
+ "step": 37
+ },
+ {
+ "epoch": 0.36,
+ "learning_rate": 6.190687920514563e-05,
+ "loss": 2.0033,
+ "step": 38
+ },
+ {
+ "epoch": 0.37,
+ "learning_rate": 6.168589621222e-05,
+ "loss": 2.1426,
+ "step": 39
+ },
+ {
+ "epoch": 0.38,
+ "learning_rate": 6.145771203612195e-05,
+ "loss": 1.9823,
+ "step": 40
+ },
+ {
+ "epoch": 0.38,
+ "eval_loss": 2.0226035118103027,
+ "eval_runtime": 2.8049,
+ "eval_samples_per_second": 3.565,
+ "eval_steps_per_second": 0.713,
+ "step": 40
+ },
+ {
+ "epoch": 0.39,
+ "learning_rate": 6.122238297788254e-05,
+ "loss": 2.2016,
+ "step": 41
+ },
+ {
+ "epoch": 0.4,
+ "learning_rate": 6.097996710142556e-05,
+ "loss": 2.1008,
+ "step": 42
+ },
+ {
+ "epoch": 0.41,
+ "learning_rate": 6.0730524219241214e-05,
+ "loss": 2.0821,
+ "step": 43
+ },
+ {
+ "epoch": 0.42,
+ "learning_rate": 6.047411587762816e-05,
+ "loss": 2.092,
+ "step": 44
+ },
+ {
+ "epoch": 0.43,
+ "learning_rate": 6.0210805341507994e-05,
+ "loss": 1.9617,
+ "step": 45
+ },
+ {
+ "epoch": 0.44,
+ "learning_rate": 5.994065757881549e-05,
+ "loss": 1.9358,
+ "step": 46
+ },
+ {
+ "epoch": 0.45,
+ "learning_rate": 5.966373924446878e-05,
+ "loss": 2.0573,
+ "step": 47
+ },
+ {
+ "epoch": 0.46,
+ "learning_rate": 5.938011866392325e-05,
+ "loss": 2.0731,
+ "step": 48
+ },
+ {
+ "epoch": 0.47,
+ "learning_rate": 5.9089865816313253e-05,
+ "loss": 2.2842,
+ "step": 49
+ },
+ {
+ "epoch": 0.48,
+ "learning_rate": 5.8793052317185785e-05,
+ "loss": 2.2145,
+ "step": 50
+ }
+ ],
+ "logging_steps": 1,
+ "max_steps": 210,
+ "num_train_epochs": 2,
+ "save_steps": 50,
+ "total_flos": 4.125056651650662e+16,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/prompt-a/onechar/checkpoint-50/training_args.bin b/prompt-a/onechar/checkpoint-50/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f6a9833d8c69d00318139a8fc6e1126e7d7ac62f
--- /dev/null
+++ b/prompt-a/onechar/checkpoint-50/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8ff85b6b2e99bcdbf030f4c883beac4553b8cad32f31f2d286263119b723c469
+size 4411
diff --git a/prompt-a/onechar/config.json b/prompt-a/onechar/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..99be7fbf147c96ef93ed720729ea90e8e21bdfca
--- /dev/null
+++ b/prompt-a/onechar/config.json
@@ -0,0 +1,38 @@
+{
+ "_name_or_path": "meta-llama/Llama-2-13b-hf",
+ "architectures": [
+ "LlamaForCausalLM"
+ ],
+ "bos_token_id": 1,
+ "eos_token_id": 2,
+ "hidden_act": "silu",
+ "hidden_size": 5120,
+ "initializer_range": 0.02,
+ "intermediate_size": 13824,
+ "max_position_embeddings": 4096,
+ "model_type": "llama",
+ "num_attention_heads": 40,
+ "num_hidden_layers": 40,
+ "num_key_value_heads": 40,
+ "pretraining_tp": 1,
+ "quantization_config": {
+ "bnb_4bit_compute_dtype": "bfloat16",
+ "bnb_4bit_quant_type": "nf4",
+ "bnb_4bit_use_double_quant": true,
+ "llm_int8_enable_fp32_cpu_offload": false,
+ "llm_int8_has_fp16_weight": false,
+ "llm_int8_skip_modules": null,
+ "llm_int8_threshold": 6.0,
+ "load_in_4bit": true,
+ "load_in_8bit": false,
+ "quant_method": "bitsandbytes"
+ },
+ "rms_norm_eps": 1e-05,
+ "rope_scaling": null,
+ "rope_theta": 10000.0,
+ "tie_word_embeddings": false,
+ "torch_dtype": "float16",
+ "transformers_version": "4.34.0.dev0",
+ "use_cache": false,
+ "vocab_size": 32001
+}
diff --git a/prompt-a/onechar/special_tokens_map.json b/prompt-a/onechar/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..3f58a5e115855c6ea3cec98accae196ad927222e
--- /dev/null
+++ b/prompt-a/onechar/special_tokens_map.json
@@ -0,0 +1,6 @@
+{
+ "bos_token": "",
+ "eos_token": "",
+ "pad_token": "[PAD]",
+ "unk_token": ""
+}
diff --git a/prompt-a/onechar/tokenizer.model b/prompt-a/onechar/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..6c00c742ce03c627d6cd5b795984876fa49fa899
--- /dev/null
+++ b/prompt-a/onechar/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723
diff --git a/prompt-a/onechar/tokenizer_config.json b/prompt-a/onechar/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..77849a9a3ca128f00bc3febdc9b39391a4ef43a3
--- /dev/null
+++ b/prompt-a/onechar/tokenizer_config.json
@@ -0,0 +1,39 @@
+{
+ "add_bos_token": true,
+ "add_eos_token": false,
+ "bos_token": {
+ "__type": "AddedToken",
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "clean_up_tokenization_spaces": false,
+ "eos_token": {
+ "__type": "AddedToken",
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "legacy": false,
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": null,
+ "padding_side": "right",
+ "sp_model_kwargs": {},
+ "spaces_between_special_tokens": false,
+ "tokenizer_class": "LlamaTokenizer",
+ "trust_remote_code": false,
+ "unk_token": {
+ "__type": "AddedToken",
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "use_default_system_prompt": true,
+ "use_fast": true
+}