diff --git a/prompt-a/onechar/README.md b/prompt-a/onechar/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/prompt-a/onechar/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/prompt-a/onechar/adapter_config.json b/prompt-a/onechar/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..57ccb0546533f3c17ba88615f0c0d4f7c0860f9e --- /dev/null +++ b/prompt-a/onechar/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.01, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "down_proj", + "up_proj", + "gate_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/prompt-a/onechar/adapter_model.bin b/prompt-a/onechar/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..efc4e51d138ecf5c5d8072972b34be04ca33ebd9 --- /dev/null +++ b/prompt-a/onechar/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f5ec19329cacf861536c142e358f467bfaf4ef5352429c4826c33d208347a3d +size 125374989 diff --git a/prompt-a/onechar/added_tokens.json b/prompt-a/onechar/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..e41416ddd79948246ea2dced6800ea3cd531c424 --- /dev/null +++ b/prompt-a/onechar/added_tokens.json @@ -0,0 +1,3 @@ +{ + "[PAD]": 32000 +} diff --git a/prompt-a/onechar/checkpoint-100/README.md b/prompt-a/onechar/checkpoint-100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/prompt-a/onechar/checkpoint-100/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/prompt-a/onechar/checkpoint-100/adapter_config.json b/prompt-a/onechar/checkpoint-100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..57ccb0546533f3c17ba88615f0c0d4f7c0860f9e --- /dev/null +++ b/prompt-a/onechar/checkpoint-100/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.01, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "down_proj", + "up_proj", + "gate_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/prompt-a/onechar/checkpoint-100/adapter_model.bin b/prompt-a/onechar/checkpoint-100/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e5a35295386cee85b173a0b18ac1814a09a59c7 --- /dev/null +++ b/prompt-a/onechar/checkpoint-100/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ff610c0b53c8af0e9c984b4ed0725380085f987aa69e16c979a257578cfabcb +size 125374989 diff --git a/prompt-a/onechar/checkpoint-100/adapter_model/README.md b/prompt-a/onechar/checkpoint-100/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/prompt-a/onechar/checkpoint-100/adapter_model/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/prompt-a/onechar/checkpoint-100/adapter_model/adapter_config.json b/prompt-a/onechar/checkpoint-100/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..57ccb0546533f3c17ba88615f0c0d4f7c0860f9e --- /dev/null +++ b/prompt-a/onechar/checkpoint-100/adapter_model/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.01, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "down_proj", + "up_proj", + "gate_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/prompt-a/onechar/checkpoint-100/adapter_model/adapter_model.bin b/prompt-a/onechar/checkpoint-100/adapter_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e5a35295386cee85b173a0b18ac1814a09a59c7 --- /dev/null +++ b/prompt-a/onechar/checkpoint-100/adapter_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ff610c0b53c8af0e9c984b4ed0725380085f987aa69e16c979a257578cfabcb +size 125374989 diff --git a/prompt-a/onechar/checkpoint-100/optimizer.pt b/prompt-a/onechar/checkpoint-100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..98f21b5e00fe7df2f858f515322260407beb6c1a --- /dev/null +++ b/prompt-a/onechar/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7f1f403d38dc995a8098ed409cbd760e111bd882f7a68df45055032258b88b6 +size 250681597 diff --git a/prompt-a/onechar/checkpoint-100/rng_state_0.pth b/prompt-a/onechar/checkpoint-100/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..bd46f14b9d704beaf43d0a60fb0fcee7e7a4dc55 --- /dev/null +++ b/prompt-a/onechar/checkpoint-100/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4102fd8971c12e358d8aa24117ab73918cc1aaac07c4678184c87371a7cce1c6 +size 21687 diff --git a/prompt-a/onechar/checkpoint-100/rng_state_1.pth b/prompt-a/onechar/checkpoint-100/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..2bd2a4cfe2f0c0839a7bd28c30a0ee7f68e10cbf --- /dev/null +++ b/prompt-a/onechar/checkpoint-100/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a35f8c8b1beb2b76ef2ca1aa345895c6db951b1551c9042228eb0ea741ce12c +size 21687 diff --git a/prompt-a/onechar/checkpoint-100/rng_state_2.pth b/prompt-a/onechar/checkpoint-100/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..3590ccc56e14f086598123a24161f3c9c03da99d --- /dev/null +++ b/prompt-a/onechar/checkpoint-100/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa20b68c489c5a2cd2428b1f3e5072cdf8f2417980be271f9db72c4049028bb3 +size 21687 diff --git a/prompt-a/onechar/checkpoint-100/rng_state_3.pth b/prompt-a/onechar/checkpoint-100/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..183a796b307b0d8ac5ffdd09477b826d21994e35 --- /dev/null +++ b/prompt-a/onechar/checkpoint-100/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a828f6b225941a386fb560abfcd7d4dee497153fe1cf40b8a87e5174aa75bed3 +size 21687 diff --git a/prompt-a/onechar/checkpoint-100/rng_state_4.pth b/prompt-a/onechar/checkpoint-100/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..d2d0fd052bcabc0b74f4b45f5eb2edd37d5a2f8c --- /dev/null +++ b/prompt-a/onechar/checkpoint-100/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1af5550599dc6617b8105e29364818d1dbea0b5d63b4e86bd3c1697f930ae248 +size 21687 diff --git a/prompt-a/onechar/checkpoint-100/rng_state_5.pth b/prompt-a/onechar/checkpoint-100/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..abefa9a599bf1e9a29ee7d719924e925ba5adfbd --- /dev/null +++ b/prompt-a/onechar/checkpoint-100/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b13498715ccaef6df8b69c2dab24d427d1152ac7f5c20f3a7aa0579d6df2f93 +size 21687 diff --git a/prompt-a/onechar/checkpoint-100/rng_state_6.pth b/prompt-a/onechar/checkpoint-100/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..f09479d402f769953a69a982820f048bb9f12252 --- /dev/null +++ b/prompt-a/onechar/checkpoint-100/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0a13f52d9df555a3b9fac609325509aebf8adff3267140830db81111f0c8f4a +size 21687 diff --git a/prompt-a/onechar/checkpoint-100/rng_state_7.pth b/prompt-a/onechar/checkpoint-100/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..9c25123d780ea1d5371035e42eb4152cad9da645 --- /dev/null +++ b/prompt-a/onechar/checkpoint-100/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7afe0df70969c4c1a997671975d84b8bdde138351b49fb8396fec85cf845301 +size 21687 diff --git a/prompt-a/onechar/checkpoint-100/scheduler.pt b/prompt-a/onechar/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b6a3fa656e2f9813d7a5c39d48b47d4029c97391 --- /dev/null +++ b/prompt-a/onechar/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:781197e4490645acd969affd7bc482901bb63fc73f416de5de064f3460ca305a +size 627 diff --git a/prompt-a/onechar/checkpoint-100/trainer_state.json b/prompt-a/onechar/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..07ea426f8fdd78d21d1df9ade93a28fea40a5029 --- /dev/null +++ b/prompt-a/onechar/checkpoint-100/trainer_state.json @@ -0,0 +1,659 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9523809523809523, + "eval_steps": 20, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 6.5e-06, + "loss": 2.1535, + "step": 1 + }, + { + "epoch": 0.02, + "learning_rate": 1.3e-05, + "loss": 2.1328, + "step": 2 + }, + { + "epoch": 0.03, + "learning_rate": 1.9499999999999996e-05, + "loss": 2.0918, + "step": 3 + }, + { + "epoch": 0.04, + "learning_rate": 2.6e-05, + "loss": 2.1194, + "step": 4 + }, + { + "epoch": 0.05, + "learning_rate": 3.25e-05, + "loss": 2.2342, + "step": 5 + }, + { + "epoch": 0.06, + "learning_rate": 3.899999999999999e-05, + "loss": 2.1168, + "step": 6 + }, + { + "epoch": 0.07, + "learning_rate": 4.5499999999999995e-05, + "loss": 2.1761, + "step": 7 + }, + { + "epoch": 0.08, + "learning_rate": 5.2e-05, + "loss": 2.0764, + "step": 8 + }, + { + "epoch": 0.09, + "learning_rate": 5.85e-05, + "loss": 2.1711, + "step": 9 + }, + { + "epoch": 0.1, + "learning_rate": 6.5e-05, + "loss": 2.0885, + "step": 10 + }, + { + "epoch": 0.1, + "learning_rate": 6.499599055565396e-05, + "loss": 2.0196, + "step": 11 + }, + { + "epoch": 0.11, + "learning_rate": 6.498396321188627e-05, + "loss": 2.1799, + "step": 12 + }, + { + "epoch": 0.12, + "learning_rate": 6.496392093626402e-05, + "loss": 2.1322, + "step": 13 + }, + { + "epoch": 0.13, + "learning_rate": 6.493586867391882e-05, + "loss": 2.1486, + "step": 14 + }, + { + "epoch": 0.14, + "learning_rate": 6.489981334632665e-05, + "loss": 2.1351, + "step": 15 + }, + { + "epoch": 0.15, + "learning_rate": 6.48557638496001e-05, + "loss": 2.1366, + "step": 16 + }, + { + "epoch": 0.16, + "learning_rate": 6.480373105229334e-05, + "loss": 2.0727, + "step": 17 + }, + { + "epoch": 0.17, + "learning_rate": 6.474372779272053e-05, + "loss": 2.1123, + "step": 18 + }, + { + "epoch": 0.18, + "learning_rate": 6.467576887578811e-05, + "loss": 2.0652, + "step": 19 + }, + { + "epoch": 0.19, + "learning_rate": 6.459987106934198e-05, + "loss": 2.0008, + "step": 20 + }, + { + "epoch": 0.19, + "eval_loss": 2.0444116592407227, + "eval_runtime": 2.798, + "eval_samples_per_second": 3.574, + "eval_steps_per_second": 0.715, + "step": 20 + }, + { + "epoch": 0.2, + "learning_rate": 6.451605310003014e-05, + "loss": 2.0769, + "step": 21 + }, + { + "epoch": 0.21, + "learning_rate": 6.442433564868238e-05, + "loss": 1.9949, + "step": 22 + }, + { + "epoch": 0.22, + "learning_rate": 6.432474134520738e-05, + "loss": 2.1341, + "step": 23 + }, + { + "epoch": 0.23, + "learning_rate": 6.421729476300929e-05, + "loss": 2.1433, + "step": 24 + }, + { + "epoch": 0.24, + "learning_rate": 6.410202241292448e-05, + "loss": 2.0168, + "step": 25 + }, + { + "epoch": 0.25, + "learning_rate": 6.39789527366805e-05, + "loss": 2.0403, + "step": 26 + }, + { + "epoch": 0.26, + "learning_rate": 6.384811609987843e-05, + "loss": 2.1983, + "step": 27 + }, + { + "epoch": 0.27, + "learning_rate": 6.370954478450064e-05, + "loss": 2.0625, + "step": 28 + }, + { + "epoch": 0.28, + "learning_rate": 6.356327298094572e-05, + "loss": 2.204, + "step": 29 + }, + { + "epoch": 0.29, + "learning_rate": 6.340933677959248e-05, + "loss": 2.118, + "step": 30 + }, + { + "epoch": 0.3, + "learning_rate": 6.324777416189522e-05, + "loss": 2.0997, + "step": 31 + }, + { + "epoch": 0.3, + "learning_rate": 6.307862499101232e-05, + "loss": 2.1154, + "step": 32 + }, + { + "epoch": 0.31, + "learning_rate": 6.290193100197069e-05, + "loss": 2.0845, + "step": 33 + }, + { + "epoch": 0.32, + "learning_rate": 6.271773579136816e-05, + "loss": 2.0052, + "step": 34 + }, + { + "epoch": 0.33, + "learning_rate": 6.252608480661681e-05, + "loss": 1.956, + "step": 35 + }, + { + "epoch": 0.34, + "learning_rate": 6.232702533472938e-05, + "loss": 2.0958, + "step": 36 + }, + { + "epoch": 0.35, + "learning_rate": 6.212060649065197e-05, + "loss": 2.0634, + "step": 37 + }, + { + "epoch": 0.36, + "learning_rate": 6.190687920514563e-05, + "loss": 2.0033, + "step": 38 + }, + { + "epoch": 0.37, + "learning_rate": 6.168589621222e-05, + "loss": 2.1426, + "step": 39 + }, + { + "epoch": 0.38, + "learning_rate": 6.145771203612195e-05, + "loss": 1.9823, + "step": 40 + }, + { + "epoch": 0.38, + "eval_loss": 2.0226035118103027, + "eval_runtime": 2.8049, + "eval_samples_per_second": 3.565, + "eval_steps_per_second": 0.713, + "step": 40 + }, + { + "epoch": 0.39, + "learning_rate": 6.122238297788254e-05, + "loss": 2.2016, + "step": 41 + }, + { + "epoch": 0.4, + "learning_rate": 6.097996710142556e-05, + "loss": 2.1008, + "step": 42 + }, + { + "epoch": 0.41, + "learning_rate": 6.0730524219241214e-05, + "loss": 2.0821, + "step": 43 + }, + { + "epoch": 0.42, + "learning_rate": 6.047411587762816e-05, + "loss": 2.092, + "step": 44 + }, + { + "epoch": 0.43, + "learning_rate": 6.0210805341507994e-05, + "loss": 1.9617, + "step": 45 + }, + { + "epoch": 0.44, + "learning_rate": 5.994065757881549e-05, + "loss": 1.9358, + "step": 46 + }, + { + "epoch": 0.45, + "learning_rate": 5.966373924446878e-05, + "loss": 2.0573, + "step": 47 + }, + { + "epoch": 0.46, + "learning_rate": 5.938011866392325e-05, + "loss": 2.0731, + "step": 48 + }, + { + "epoch": 0.47, + "learning_rate": 5.9089865816313253e-05, + "loss": 2.2842, + "step": 49 + }, + { + "epoch": 0.48, + "learning_rate": 5.8793052317185785e-05, + "loss": 2.2145, + "step": 50 + }, + { + "epoch": 0.49, + "learning_rate": 5.848975140083044e-05, + "loss": 1.9923, + "step": 51 + }, + { + "epoch": 0.5, + "learning_rate": 5.818003790220993e-05, + "loss": 2.1065, + "step": 52 + }, + { + "epoch": 0.5, + "learning_rate": 5.786398823849571e-05, + "loss": 2.0009, + "step": 53 + }, + { + "epoch": 0.51, + "learning_rate": 5.754168039021315e-05, + "loss": 1.9785, + "step": 54 + }, + { + "epoch": 0.52, + "learning_rate": 5.7213193882001006e-05, + "loss": 1.9504, + "step": 55 + }, + { + "epoch": 0.53, + "learning_rate": 5.687860976298993e-05, + "loss": 2.1977, + "step": 56 + }, + { + "epoch": 0.54, + "learning_rate": 5.6538010586804816e-05, + "loss": 2.0423, + "step": 57 + }, + { + "epoch": 0.55, + "learning_rate": 5.619148039119587e-05, + "loss": 2.0536, + "step": 58 + }, + { + "epoch": 0.56, + "learning_rate": 5.5839104677303635e-05, + "loss": 2.0286, + "step": 59 + }, + { + "epoch": 0.57, + "learning_rate": 5.548097038856279e-05, + "loss": 2.1143, + "step": 60 + }, + { + "epoch": 0.57, + "eval_loss": 2.0128190517425537, + "eval_runtime": 2.8105, + "eval_samples_per_second": 3.558, + "eval_steps_per_second": 0.712, + "step": 60 + }, + { + "epoch": 0.58, + "learning_rate": 5.511716588925021e-05, + "loss": 2.1397, + "step": 61 + }, + { + "epoch": 0.59, + "learning_rate": 5.474778094268238e-05, + "loss": 2.0601, + "step": 62 + }, + { + "epoch": 0.6, + "learning_rate": 5.437290668906763e-05, + "loss": 2.1707, + "step": 63 + }, + { + "epoch": 0.61, + "learning_rate": 5.399263562301868e-05, + "loss": 2.0649, + "step": 64 + }, + { + "epoch": 0.62, + "learning_rate": 5.360706157073096e-05, + "loss": 2.0314, + "step": 65 + }, + { + "epoch": 0.63, + "learning_rate": 5.321627966683241e-05, + "loss": 2.1277, + "step": 66 + }, + { + "epoch": 0.64, + "learning_rate": 5.2820386330910424e-05, + "loss": 2.0534, + "step": 67 + }, + { + "epoch": 0.65, + "learning_rate": 5.2419479243721734e-05, + "loss": 1.9976, + "step": 68 + }, + { + "epoch": 0.66, + "learning_rate": 5.2013657323091226e-05, + "loss": 2.0262, + "step": 69 + }, + { + "epoch": 0.67, + "learning_rate": 5.160302069950537e-05, + "loss": 2.0093, + "step": 70 + }, + { + "epoch": 0.68, + "learning_rate": 5.118767069140655e-05, + "loss": 2.1088, + "step": 71 + }, + { + "epoch": 0.69, + "learning_rate": 5.076770978019424e-05, + "loss": 1.9841, + "step": 72 + }, + { + "epoch": 0.7, + "learning_rate": 5.034324158493928e-05, + "loss": 2.0081, + "step": 73 + }, + { + "epoch": 0.7, + "learning_rate": 4.991437083681739e-05, + "loss": 2.0179, + "step": 74 + }, + { + "epoch": 0.71, + "learning_rate": 4.948120335326833e-05, + "loss": 2.1304, + "step": 75 + }, + { + "epoch": 0.72, + "learning_rate": 4.904384601188706e-05, + "loss": 1.9213, + "step": 76 + }, + { + "epoch": 0.73, + "learning_rate": 4.860240672405324e-05, + "loss": 2.2153, + "step": 77 + }, + { + "epoch": 0.74, + "learning_rate": 4.815699440830574e-05, + "loss": 1.9365, + "step": 78 + }, + { + "epoch": 0.75, + "learning_rate": 4.770771896346863e-05, + "loss": 2.021, + "step": 79 + }, + { + "epoch": 0.76, + "learning_rate": 4.725469124153526e-05, + "loss": 2.0327, + "step": 80 + }, + { + "epoch": 0.76, + "eval_loss": 2.0059409141540527, + "eval_runtime": 2.8077, + "eval_samples_per_second": 3.562, + "eval_steps_per_second": 0.712, + "step": 80 + }, + { + "epoch": 0.77, + "learning_rate": 4.679802302031724e-05, + "loss": 2.1576, + "step": 81 + }, + { + "epoch": 0.78, + "learning_rate": 4.6337826975864856e-05, + "loss": 1.9468, + "step": 82 + }, + { + "epoch": 0.79, + "learning_rate": 4.587421665466603e-05, + "loss": 2.0795, + "step": 83 + }, + { + "epoch": 0.8, + "learning_rate": 4.540730644563036e-05, + "loss": 2.0566, + "step": 84 + }, + { + "epoch": 0.81, + "learning_rate": 4.4937211551865415e-05, + "loss": 2.0819, + "step": 85 + }, + { + "epoch": 0.82, + "learning_rate": 4.4464047962252035e-05, + "loss": 2.0542, + "step": 86 + }, + { + "epoch": 0.83, + "learning_rate": 4.398793242282586e-05, + "loss": 2.0006, + "step": 87 + }, + { + "epoch": 0.84, + "learning_rate": 4.350898240797197e-05, + "loss": 1.9953, + "step": 88 + }, + { + "epoch": 0.85, + "learning_rate": 4.3027316091439854e-05, + "loss": 2.067, + "step": 89 + }, + { + "epoch": 0.86, + "learning_rate": 4.254305231718579e-05, + "loss": 1.9909, + "step": 90 + }, + { + "epoch": 0.87, + "learning_rate": 4.205631057004987e-05, + "loss": 1.9436, + "step": 91 + }, + { + "epoch": 0.88, + "learning_rate": 4.156721094627495e-05, + "loss": 2.0643, + "step": 92 + }, + { + "epoch": 0.89, + "learning_rate": 4.107587412387462e-05, + "loss": 2.1244, + "step": 93 + }, + { + "epoch": 0.9, + "learning_rate": 4.058242133285779e-05, + "loss": 1.9795, + "step": 94 + }, + { + "epoch": 0.9, + "learning_rate": 4.008697432531693e-05, + "loss": 2.1568, + "step": 95 + }, + { + "epoch": 0.91, + "learning_rate": 3.9589655345387633e-05, + "loss": 1.9829, + "step": 96 + }, + { + "epoch": 0.92, + "learning_rate": 3.909058709908665e-05, + "loss": 2.031, + "step": 97 + }, + { + "epoch": 0.93, + "learning_rate": 3.858989272403605e-05, + "loss": 2.2042, + "step": 98 + }, + { + "epoch": 0.94, + "learning_rate": 3.8087695759080804e-05, + "loss": 1.9775, + "step": 99 + }, + { + "epoch": 0.95, + "learning_rate": 3.75841201138075e-05, + "loss": 2.0274, + "step": 100 + }, + { + "epoch": 0.95, + "eval_loss": 2.0003483295440674, + "eval_runtime": 2.8075, + "eval_samples_per_second": 3.562, + "eval_steps_per_second": 0.712, + "step": 100 + } + ], + "logging_steps": 1, + "max_steps": 210, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 8.146151922047386e+16, + "trial_name": null, + "trial_params": null +} diff --git a/prompt-a/onechar/checkpoint-100/training_args.bin b/prompt-a/onechar/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f6a9833d8c69d00318139a8fc6e1126e7d7ac62f --- /dev/null +++ b/prompt-a/onechar/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ff85b6b2e99bcdbf030f4c883beac4553b8cad32f31f2d286263119b723c469 +size 4411 diff --git a/prompt-a/onechar/checkpoint-150/README.md b/prompt-a/onechar/checkpoint-150/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/prompt-a/onechar/checkpoint-150/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/prompt-a/onechar/checkpoint-150/adapter_config.json b/prompt-a/onechar/checkpoint-150/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..57ccb0546533f3c17ba88615f0c0d4f7c0860f9e --- /dev/null +++ b/prompt-a/onechar/checkpoint-150/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.01, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "down_proj", + "up_proj", + "gate_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/prompt-a/onechar/checkpoint-150/adapter_model.bin b/prompt-a/onechar/checkpoint-150/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..e414f833dc304b909af390a644e13234fd7a4fd4 --- /dev/null +++ b/prompt-a/onechar/checkpoint-150/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16b699052f2d128f83c6010ef6f104f26dcbf6cb4325eedcc9a7b58bc08c5c9f +size 125374989 diff --git a/prompt-a/onechar/checkpoint-150/adapter_model/README.md b/prompt-a/onechar/checkpoint-150/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/prompt-a/onechar/checkpoint-150/adapter_model/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/prompt-a/onechar/checkpoint-150/adapter_model/adapter_config.json b/prompt-a/onechar/checkpoint-150/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..57ccb0546533f3c17ba88615f0c0d4f7c0860f9e --- /dev/null +++ b/prompt-a/onechar/checkpoint-150/adapter_model/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.01, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "down_proj", + "up_proj", + "gate_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/prompt-a/onechar/checkpoint-150/adapter_model/adapter_model.bin b/prompt-a/onechar/checkpoint-150/adapter_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..e414f833dc304b909af390a644e13234fd7a4fd4 --- /dev/null +++ b/prompt-a/onechar/checkpoint-150/adapter_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16b699052f2d128f83c6010ef6f104f26dcbf6cb4325eedcc9a7b58bc08c5c9f +size 125374989 diff --git a/prompt-a/onechar/checkpoint-150/optimizer.pt b/prompt-a/onechar/checkpoint-150/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..177e712799bb777cd0a70695d411358ee65709e0 --- /dev/null +++ b/prompt-a/onechar/checkpoint-150/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6697d2772f8bdabc0de5308ce64fed55e96f213ec392cc350ff9e35b3419011a +size 250681597 diff --git a/prompt-a/onechar/checkpoint-150/rng_state_0.pth b/prompt-a/onechar/checkpoint-150/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..b8467bf58301adeddac4918aad90f7ed7b11fe90 --- /dev/null +++ b/prompt-a/onechar/checkpoint-150/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39b0267998bb7a1cfb283b31e1dd7afbe75c84407c53c4f1c298028d9daacc6e +size 21687 diff --git a/prompt-a/onechar/checkpoint-150/rng_state_1.pth b/prompt-a/onechar/checkpoint-150/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..fe874c140f820e3dd8b1fcf490184f21c33f9083 --- /dev/null +++ b/prompt-a/onechar/checkpoint-150/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50e5dc96be0a674ddf476b916d068682bfc8d9f342c0213d1ba280d171cbc1d0 +size 21687 diff --git a/prompt-a/onechar/checkpoint-150/rng_state_2.pth b/prompt-a/onechar/checkpoint-150/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..bfecd933d4dfc73fa03780506049ffc66241c9e9 --- /dev/null +++ b/prompt-a/onechar/checkpoint-150/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f1ec8428ac308b6c6ffb63f0d2137a6c43b3b0f57031e333c2cde00dfbfedff +size 21687 diff --git a/prompt-a/onechar/checkpoint-150/rng_state_3.pth b/prompt-a/onechar/checkpoint-150/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..2b4461d7ff31a559100fe0f29a3ed023e9f190e7 --- /dev/null +++ b/prompt-a/onechar/checkpoint-150/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:832989103ea74caa2d18f41014c7ec87b33f665d85bbfabeac5e440a24a4e438 +size 21687 diff --git a/prompt-a/onechar/checkpoint-150/rng_state_4.pth b/prompt-a/onechar/checkpoint-150/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..1e060ecb0e09581d9813d16528d78d01ddb7208b --- /dev/null +++ b/prompt-a/onechar/checkpoint-150/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7c038cf0ec9ace1d9317b463a053999ebbd8efbc6568a0c63333109129be6de +size 21687 diff --git a/prompt-a/onechar/checkpoint-150/rng_state_5.pth b/prompt-a/onechar/checkpoint-150/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..468210913429d5e73e28c4e107836a98b798fe35 --- /dev/null +++ b/prompt-a/onechar/checkpoint-150/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea0b0a35fbb56436b89de780a77c2850d7ceb494df91befbfc0be4b4388f8c31 +size 21687 diff --git a/prompt-a/onechar/checkpoint-150/rng_state_6.pth b/prompt-a/onechar/checkpoint-150/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..67e29ae9b2de9144ee1f89b7e33419d34897faac --- /dev/null +++ b/prompt-a/onechar/checkpoint-150/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24a1fd82be4a9e4def96881742849b0f5444737a5c55f1de3d18ff9b31a835e5 +size 21687 diff --git a/prompt-a/onechar/checkpoint-150/rng_state_7.pth b/prompt-a/onechar/checkpoint-150/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..f23a7558687babd9d76c519f71934f1a3809b9ca --- /dev/null +++ b/prompt-a/onechar/checkpoint-150/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b924ccabce3c839d1b33edc854332d317e917f50d4f5defffe6a8f741a9641b0 +size 21687 diff --git a/prompt-a/onechar/checkpoint-150/scheduler.pt b/prompt-a/onechar/checkpoint-150/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f71458a6d417fb79c6a17e12f00039ad26d70b1 --- /dev/null +++ b/prompt-a/onechar/checkpoint-150/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9abe1c80a7b715c51418717c88a5924cc27c160619ed0f1dec7010dc1687509 +size 627 diff --git a/prompt-a/onechar/checkpoint-150/trainer_state.json b/prompt-a/onechar/checkpoint-150/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..20d3af0e4c74485de30937314168f0399691f24b --- /dev/null +++ b/prompt-a/onechar/checkpoint-150/trainer_state.json @@ -0,0 +1,975 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.4285714285714286, + "eval_steps": 20, + "global_step": 150, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 6.5e-06, + "loss": 2.1535, + "step": 1 + }, + { + "epoch": 0.02, + "learning_rate": 1.3e-05, + "loss": 2.1328, + "step": 2 + }, + { + "epoch": 0.03, + "learning_rate": 1.9499999999999996e-05, + "loss": 2.0918, + "step": 3 + }, + { + "epoch": 0.04, + "learning_rate": 2.6e-05, + "loss": 2.1194, + "step": 4 + }, + { + "epoch": 0.05, + "learning_rate": 3.25e-05, + "loss": 2.2342, + "step": 5 + }, + { + "epoch": 0.06, + "learning_rate": 3.899999999999999e-05, + "loss": 2.1168, + "step": 6 + }, + { + "epoch": 0.07, + "learning_rate": 4.5499999999999995e-05, + "loss": 2.1761, + "step": 7 + }, + { + "epoch": 0.08, + "learning_rate": 5.2e-05, + "loss": 2.0764, + "step": 8 + }, + { + "epoch": 0.09, + "learning_rate": 5.85e-05, + "loss": 2.1711, + "step": 9 + }, + { + "epoch": 0.1, + "learning_rate": 6.5e-05, + "loss": 2.0885, + "step": 10 + }, + { + "epoch": 0.1, + "learning_rate": 6.499599055565396e-05, + "loss": 2.0196, + "step": 11 + }, + { + "epoch": 0.11, + "learning_rate": 6.498396321188627e-05, + "loss": 2.1799, + "step": 12 + }, + { + "epoch": 0.12, + "learning_rate": 6.496392093626402e-05, + "loss": 2.1322, + "step": 13 + }, + { + "epoch": 0.13, + "learning_rate": 6.493586867391882e-05, + "loss": 2.1486, + "step": 14 + }, + { + "epoch": 0.14, + "learning_rate": 6.489981334632665e-05, + "loss": 2.1351, + "step": 15 + }, + { + "epoch": 0.15, + "learning_rate": 6.48557638496001e-05, + "loss": 2.1366, + "step": 16 + }, + { + "epoch": 0.16, + "learning_rate": 6.480373105229334e-05, + "loss": 2.0727, + "step": 17 + }, + { + "epoch": 0.17, + "learning_rate": 6.474372779272053e-05, + "loss": 2.1123, + "step": 18 + }, + { + "epoch": 0.18, + "learning_rate": 6.467576887578811e-05, + "loss": 2.0652, + "step": 19 + }, + { + "epoch": 0.19, + "learning_rate": 6.459987106934198e-05, + "loss": 2.0008, + "step": 20 + }, + { + "epoch": 0.19, + "eval_loss": 2.0444116592407227, + "eval_runtime": 2.798, + "eval_samples_per_second": 3.574, + "eval_steps_per_second": 0.715, + "step": 20 + }, + { + "epoch": 0.2, + "learning_rate": 6.451605310003014e-05, + "loss": 2.0769, + "step": 21 + }, + { + "epoch": 0.21, + "learning_rate": 6.442433564868238e-05, + "loss": 1.9949, + "step": 22 + }, + { + "epoch": 0.22, + "learning_rate": 6.432474134520738e-05, + "loss": 2.1341, + "step": 23 + }, + { + "epoch": 0.23, + "learning_rate": 6.421729476300929e-05, + "loss": 2.1433, + "step": 24 + }, + { + "epoch": 0.24, + "learning_rate": 6.410202241292448e-05, + "loss": 2.0168, + "step": 25 + }, + { + "epoch": 0.25, + "learning_rate": 6.39789527366805e-05, + "loss": 2.0403, + "step": 26 + }, + { + "epoch": 0.26, + "learning_rate": 6.384811609987843e-05, + "loss": 2.1983, + "step": 27 + }, + { + "epoch": 0.27, + "learning_rate": 6.370954478450064e-05, + "loss": 2.0625, + "step": 28 + }, + { + "epoch": 0.28, + "learning_rate": 6.356327298094572e-05, + "loss": 2.204, + "step": 29 + }, + { + "epoch": 0.29, + "learning_rate": 6.340933677959248e-05, + "loss": 2.118, + "step": 30 + }, + { + "epoch": 0.3, + "learning_rate": 6.324777416189522e-05, + "loss": 2.0997, + "step": 31 + }, + { + "epoch": 0.3, + "learning_rate": 6.307862499101232e-05, + "loss": 2.1154, + "step": 32 + }, + { + "epoch": 0.31, + "learning_rate": 6.290193100197069e-05, + "loss": 2.0845, + "step": 33 + }, + { + "epoch": 0.32, + "learning_rate": 6.271773579136816e-05, + "loss": 2.0052, + "step": 34 + }, + { + "epoch": 0.33, + "learning_rate": 6.252608480661681e-05, + "loss": 1.956, + "step": 35 + }, + { + "epoch": 0.34, + "learning_rate": 6.232702533472938e-05, + "loss": 2.0958, + "step": 36 + }, + { + "epoch": 0.35, + "learning_rate": 6.212060649065197e-05, + "loss": 2.0634, + "step": 37 + }, + { + "epoch": 0.36, + "learning_rate": 6.190687920514563e-05, + "loss": 2.0033, + "step": 38 + }, + { + "epoch": 0.37, + "learning_rate": 6.168589621222e-05, + "loss": 2.1426, + "step": 39 + }, + { + "epoch": 0.38, + "learning_rate": 6.145771203612195e-05, + "loss": 1.9823, + "step": 40 + }, + { + "epoch": 0.38, + "eval_loss": 2.0226035118103027, + "eval_runtime": 2.8049, + "eval_samples_per_second": 3.565, + "eval_steps_per_second": 0.713, + "step": 40 + }, + { + "epoch": 0.39, + "learning_rate": 6.122238297788254e-05, + "loss": 2.2016, + "step": 41 + }, + { + "epoch": 0.4, + "learning_rate": 6.097996710142556e-05, + "loss": 2.1008, + "step": 42 + }, + { + "epoch": 0.41, + "learning_rate": 6.0730524219241214e-05, + "loss": 2.0821, + "step": 43 + }, + { + "epoch": 0.42, + "learning_rate": 6.047411587762816e-05, + "loss": 2.092, + "step": 44 + }, + { + "epoch": 0.43, + "learning_rate": 6.0210805341507994e-05, + "loss": 1.9617, + "step": 45 + }, + { + "epoch": 0.44, + "learning_rate": 5.994065757881549e-05, + "loss": 1.9358, + "step": 46 + }, + { + "epoch": 0.45, + "learning_rate": 5.966373924446878e-05, + "loss": 2.0573, + "step": 47 + }, + { + "epoch": 0.46, + "learning_rate": 5.938011866392325e-05, + "loss": 2.0731, + "step": 48 + }, + { + "epoch": 0.47, + "learning_rate": 5.9089865816313253e-05, + "loss": 2.2842, + "step": 49 + }, + { + "epoch": 0.48, + "learning_rate": 5.8793052317185785e-05, + "loss": 2.2145, + "step": 50 + }, + { + "epoch": 0.49, + "learning_rate": 5.848975140083044e-05, + "loss": 1.9923, + "step": 51 + }, + { + "epoch": 0.5, + "learning_rate": 5.818003790220993e-05, + "loss": 2.1065, + "step": 52 + }, + { + "epoch": 0.5, + "learning_rate": 5.786398823849571e-05, + "loss": 2.0009, + "step": 53 + }, + { + "epoch": 0.51, + "learning_rate": 5.754168039021315e-05, + "loss": 1.9785, + "step": 54 + }, + { + "epoch": 0.52, + "learning_rate": 5.7213193882001006e-05, + "loss": 1.9504, + "step": 55 + }, + { + "epoch": 0.53, + "learning_rate": 5.687860976298993e-05, + "loss": 2.1977, + "step": 56 + }, + { + "epoch": 0.54, + "learning_rate": 5.6538010586804816e-05, + "loss": 2.0423, + "step": 57 + }, + { + "epoch": 0.55, + "learning_rate": 5.619148039119587e-05, + "loss": 2.0536, + "step": 58 + }, + { + "epoch": 0.56, + "learning_rate": 5.5839104677303635e-05, + "loss": 2.0286, + "step": 59 + }, + { + "epoch": 0.57, + "learning_rate": 5.548097038856279e-05, + "loss": 2.1143, + "step": 60 + }, + { + "epoch": 0.57, + "eval_loss": 2.0128190517425537, + "eval_runtime": 2.8105, + "eval_samples_per_second": 3.558, + "eval_steps_per_second": 0.712, + "step": 60 + }, + { + "epoch": 0.58, + "learning_rate": 5.511716588925021e-05, + "loss": 2.1397, + "step": 61 + }, + { + "epoch": 0.59, + "learning_rate": 5.474778094268238e-05, + "loss": 2.0601, + "step": 62 + }, + { + "epoch": 0.6, + "learning_rate": 5.437290668906763e-05, + "loss": 2.1707, + "step": 63 + }, + { + "epoch": 0.61, + "learning_rate": 5.399263562301868e-05, + "loss": 2.0649, + "step": 64 + }, + { + "epoch": 0.62, + "learning_rate": 5.360706157073096e-05, + "loss": 2.0314, + "step": 65 + }, + { + "epoch": 0.63, + "learning_rate": 5.321627966683241e-05, + "loss": 2.1277, + "step": 66 + }, + { + "epoch": 0.64, + "learning_rate": 5.2820386330910424e-05, + "loss": 2.0534, + "step": 67 + }, + { + "epoch": 0.65, + "learning_rate": 5.2419479243721734e-05, + "loss": 1.9976, + "step": 68 + }, + { + "epoch": 0.66, + "learning_rate": 5.2013657323091226e-05, + "loss": 2.0262, + "step": 69 + }, + { + "epoch": 0.67, + "learning_rate": 5.160302069950537e-05, + "loss": 2.0093, + "step": 70 + }, + { + "epoch": 0.68, + "learning_rate": 5.118767069140655e-05, + "loss": 2.1088, + "step": 71 + }, + { + "epoch": 0.69, + "learning_rate": 5.076770978019424e-05, + "loss": 1.9841, + "step": 72 + }, + { + "epoch": 0.7, + "learning_rate": 5.034324158493928e-05, + "loss": 2.0081, + "step": 73 + }, + { + "epoch": 0.7, + "learning_rate": 4.991437083681739e-05, + "loss": 2.0179, + "step": 74 + }, + { + "epoch": 0.71, + "learning_rate": 4.948120335326833e-05, + "loss": 2.1304, + "step": 75 + }, + { + "epoch": 0.72, + "learning_rate": 4.904384601188706e-05, + "loss": 1.9213, + "step": 76 + }, + { + "epoch": 0.73, + "learning_rate": 4.860240672405324e-05, + "loss": 2.2153, + "step": 77 + }, + { + "epoch": 0.74, + "learning_rate": 4.815699440830574e-05, + "loss": 1.9365, + "step": 78 + }, + { + "epoch": 0.75, + "learning_rate": 4.770771896346863e-05, + "loss": 2.021, + "step": 79 + }, + { + "epoch": 0.76, + "learning_rate": 4.725469124153526e-05, + "loss": 2.0327, + "step": 80 + }, + { + "epoch": 0.76, + "eval_loss": 2.0059409141540527, + "eval_runtime": 2.8077, + "eval_samples_per_second": 3.562, + "eval_steps_per_second": 0.712, + "step": 80 + }, + { + "epoch": 0.77, + "learning_rate": 4.679802302031724e-05, + "loss": 2.1576, + "step": 81 + }, + { + "epoch": 0.78, + "learning_rate": 4.6337826975864856e-05, + "loss": 1.9468, + "step": 82 + }, + { + "epoch": 0.79, + "learning_rate": 4.587421665466603e-05, + "loss": 2.0795, + "step": 83 + }, + { + "epoch": 0.8, + "learning_rate": 4.540730644563036e-05, + "loss": 2.0566, + "step": 84 + }, + { + "epoch": 0.81, + "learning_rate": 4.4937211551865415e-05, + "loss": 2.0819, + "step": 85 + }, + { + "epoch": 0.82, + "learning_rate": 4.4464047962252035e-05, + "loss": 2.0542, + "step": 86 + }, + { + "epoch": 0.83, + "learning_rate": 4.398793242282586e-05, + "loss": 2.0006, + "step": 87 + }, + { + "epoch": 0.84, + "learning_rate": 4.350898240797197e-05, + "loss": 1.9953, + "step": 88 + }, + { + "epoch": 0.85, + "learning_rate": 4.3027316091439854e-05, + "loss": 2.067, + "step": 89 + }, + { + "epoch": 0.86, + "learning_rate": 4.254305231718579e-05, + "loss": 1.9909, + "step": 90 + }, + { + "epoch": 0.87, + "learning_rate": 4.205631057004987e-05, + "loss": 1.9436, + "step": 91 + }, + { + "epoch": 0.88, + "learning_rate": 4.156721094627495e-05, + "loss": 2.0643, + "step": 92 + }, + { + "epoch": 0.89, + "learning_rate": 4.107587412387462e-05, + "loss": 2.1244, + "step": 93 + }, + { + "epoch": 0.9, + "learning_rate": 4.058242133285779e-05, + "loss": 1.9795, + "step": 94 + }, + { + "epoch": 0.9, + "learning_rate": 4.008697432531693e-05, + "loss": 2.1568, + "step": 95 + }, + { + "epoch": 0.91, + "learning_rate": 3.9589655345387633e-05, + "loss": 1.9829, + "step": 96 + }, + { + "epoch": 0.92, + "learning_rate": 3.909058709908665e-05, + "loss": 2.031, + "step": 97 + }, + { + "epoch": 0.93, + "learning_rate": 3.858989272403605e-05, + "loss": 2.2042, + "step": 98 + }, + { + "epoch": 0.94, + "learning_rate": 3.8087695759080804e-05, + "loss": 1.9775, + "step": 99 + }, + { + "epoch": 0.95, + "learning_rate": 3.75841201138075e-05, + "loss": 2.0274, + "step": 100 + }, + { + "epoch": 0.95, + "eval_loss": 2.0003483295440674, + "eval_runtime": 2.8075, + "eval_samples_per_second": 3.562, + "eval_steps_per_second": 0.712, + "step": 100 + }, + { + "epoch": 0.96, + "learning_rate": 3.707929003797143e-05, + "loss": 2.0745, + "step": 101 + }, + { + "epoch": 0.97, + "learning_rate": 3.6573330090839886e-05, + "loss": 1.9933, + "step": 102 + }, + { + "epoch": 0.98, + "learning_rate": 3.6066365110458966e-05, + "loss": 2.127, + "step": 103 + }, + { + "epoch": 0.99, + "learning_rate": 3.555852018285172e-05, + "loss": 2.0499, + "step": 104 + }, + { + "epoch": 1.0, + "learning_rate": 3.504992061115496e-05, + "loss": 1.9391, + "step": 105 + }, + { + "epoch": 1.01, + "learning_rate": 3.4540691884702686e-05, + "loss": 2.0216, + "step": 106 + }, + { + "epoch": 1.02, + "learning_rate": 3.4030959648063384e-05, + "loss": 2.0055, + "step": 107 + }, + { + "epoch": 1.03, + "learning_rate": 3.3520849670039165e-05, + "loss": 2.0721, + "step": 108 + }, + { + "epoch": 1.04, + "learning_rate": 3.301048781263417e-05, + "loss": 2.0274, + "step": 109 + }, + { + "epoch": 1.05, + "learning_rate": 3.25e-05, + "loss": 1.9373, + "step": 110 + }, + { + "epoch": 1.06, + "learning_rate": 3.198951218736583e-05, + "loss": 1.9034, + "step": 111 + }, + { + "epoch": 1.07, + "learning_rate": 3.147915032996083e-05, + "loss": 1.8759, + "step": 112 + }, + { + "epoch": 1.08, + "learning_rate": 3.096904035193661e-05, + "loss": 2.0572, + "step": 113 + }, + { + "epoch": 1.09, + "learning_rate": 3.0459308115297312e-05, + "loss": 2.0072, + "step": 114 + }, + { + "epoch": 1.1, + "learning_rate": 2.9950079388845042e-05, + "loss": 1.9129, + "step": 115 + }, + { + "epoch": 1.1, + "learning_rate": 2.944147981714828e-05, + "loss": 2.0966, + "step": 116 + }, + { + "epoch": 1.11, + "learning_rate": 2.8933634889541028e-05, + "loss": 2.0903, + "step": 117 + }, + { + "epoch": 1.12, + "learning_rate": 2.8426669909160105e-05, + "loss": 2.0917, + "step": 118 + }, + { + "epoch": 1.13, + "learning_rate": 2.7920709962028562e-05, + "loss": 2.089, + "step": 119 + }, + { + "epoch": 1.14, + "learning_rate": 2.741587988619249e-05, + "loss": 1.9964, + "step": 120 + }, + { + "epoch": 1.14, + "eval_loss": 1.9988892078399658, + "eval_runtime": 2.8188, + "eval_samples_per_second": 3.548, + "eval_steps_per_second": 0.71, + "step": 120 + }, + { + "epoch": 1.15, + "learning_rate": 2.6912304240919183e-05, + "loss": 1.9754, + "step": 121 + }, + { + "epoch": 1.16, + "learning_rate": 2.641010727596394e-05, + "loss": 2.1508, + "step": 122 + }, + { + "epoch": 1.17, + "learning_rate": 2.5909412900913353e-05, + "loss": 2.0331, + "step": 123 + }, + { + "epoch": 1.18, + "learning_rate": 2.5410344654612374e-05, + "loss": 1.9607, + "step": 124 + }, + { + "epoch": 1.19, + "learning_rate": 2.4913025674683083e-05, + "loss": 2.0356, + "step": 125 + }, + { + "epoch": 1.2, + "learning_rate": 2.4417578667142222e-05, + "loss": 2.0417, + "step": 126 + }, + { + "epoch": 1.21, + "learning_rate": 2.3924125876125387e-05, + "loss": 1.9246, + "step": 127 + }, + { + "epoch": 1.22, + "learning_rate": 2.343278905372505e-05, + "loss": 2.0911, + "step": 128 + }, + { + "epoch": 1.23, + "learning_rate": 2.2943689429950124e-05, + "loss": 1.9324, + "step": 129 + }, + { + "epoch": 1.24, + "learning_rate": 2.2456947682814208e-05, + "loss": 1.9987, + "step": 130 + }, + { + "epoch": 1.25, + "learning_rate": 2.1972683908560147e-05, + "loss": 2.0121, + "step": 131 + }, + { + "epoch": 1.26, + "learning_rate": 2.149101759202803e-05, + "loss": 2.0002, + "step": 132 + }, + { + "epoch": 1.27, + "learning_rate": 2.1012067577174147e-05, + "loss": 2.0464, + "step": 133 + }, + { + "epoch": 1.28, + "learning_rate": 2.0535952037747966e-05, + "loss": 2.1422, + "step": 134 + }, + { + "epoch": 1.29, + "learning_rate": 2.0062788448134583e-05, + "loss": 1.9796, + "step": 135 + }, + { + "epoch": 1.3, + "learning_rate": 1.959269355436963e-05, + "loss": 1.9683, + "step": 136 + }, + { + "epoch": 1.3, + "learning_rate": 1.9125783345333963e-05, + "loss": 2.1488, + "step": 137 + }, + { + "epoch": 1.31, + "learning_rate": 1.866217302413514e-05, + "loss": 1.9852, + "step": 138 + }, + { + "epoch": 1.32, + "learning_rate": 1.8201976979682756e-05, + "loss": 2.0087, + "step": 139 + }, + { + "epoch": 1.33, + "learning_rate": 1.774530875846473e-05, + "loss": 1.9724, + "step": 140 + }, + { + "epoch": 1.33, + "eval_loss": 1.9960596561431885, + "eval_runtime": 2.8086, + "eval_samples_per_second": 3.56, + "eval_steps_per_second": 0.712, + "step": 140 + }, + { + "epoch": 1.34, + "learning_rate": 1.7292281036531367e-05, + "loss": 2.0022, + "step": 141 + }, + { + "epoch": 1.35, + "learning_rate": 1.6843005591694247e-05, + "loss": 2.0827, + "step": 142 + }, + { + "epoch": 1.36, + "learning_rate": 1.639759327594675e-05, + "loss": 2.0052, + "step": 143 + }, + { + "epoch": 1.37, + "learning_rate": 1.595615398811293e-05, + "loss": 1.9938, + "step": 144 + }, + { + "epoch": 1.38, + "learning_rate": 1.551879664673166e-05, + "loss": 1.996, + "step": 145 + }, + { + "epoch": 1.39, + "learning_rate": 1.5085629163182601e-05, + "loss": 2.0065, + "step": 146 + }, + { + "epoch": 1.4, + "learning_rate": 1.4656758415060712e-05, + "loss": 2.0048, + "step": 147 + }, + { + "epoch": 1.41, + "learning_rate": 1.4232290219805762e-05, + "loss": 1.8846, + "step": 148 + }, + { + "epoch": 1.42, + "learning_rate": 1.3812329308593457e-05, + "loss": 1.9582, + "step": 149 + }, + { + "epoch": 1.43, + "learning_rate": 1.3396979300494626e-05, + "loss": 2.0509, + "step": 150 + } + ], + "logging_steps": 1, + "max_steps": 210, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 1.2206169951043584e+17, + "trial_name": null, + "trial_params": null +} diff --git a/prompt-a/onechar/checkpoint-150/training_args.bin b/prompt-a/onechar/checkpoint-150/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f6a9833d8c69d00318139a8fc6e1126e7d7ac62f --- /dev/null +++ b/prompt-a/onechar/checkpoint-150/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ff85b6b2e99bcdbf030f4c883beac4553b8cad32f31f2d286263119b723c469 +size 4411 diff --git a/prompt-a/onechar/checkpoint-200/README.md b/prompt-a/onechar/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/prompt-a/onechar/checkpoint-200/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/prompt-a/onechar/checkpoint-200/adapter_config.json b/prompt-a/onechar/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..57ccb0546533f3c17ba88615f0c0d4f7c0860f9e --- /dev/null +++ b/prompt-a/onechar/checkpoint-200/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.01, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "down_proj", + "up_proj", + "gate_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/prompt-a/onechar/checkpoint-200/adapter_model.bin b/prompt-a/onechar/checkpoint-200/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a0c25d83f98c218fd56f87d85148d5cf35374ec3 --- /dev/null +++ b/prompt-a/onechar/checkpoint-200/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0aeeda740ab6bb8ee51e6b7ecd7a2848397fd35d1b4db657df6d8fa4dac2f048 +size 125374989 diff --git a/prompt-a/onechar/checkpoint-200/adapter_model/README.md b/prompt-a/onechar/checkpoint-200/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/prompt-a/onechar/checkpoint-200/adapter_model/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/prompt-a/onechar/checkpoint-200/adapter_model/adapter_config.json b/prompt-a/onechar/checkpoint-200/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..57ccb0546533f3c17ba88615f0c0d4f7c0860f9e --- /dev/null +++ b/prompt-a/onechar/checkpoint-200/adapter_model/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.01, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "down_proj", + "up_proj", + "gate_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/prompt-a/onechar/checkpoint-200/adapter_model/adapter_model.bin b/prompt-a/onechar/checkpoint-200/adapter_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a0c25d83f98c218fd56f87d85148d5cf35374ec3 --- /dev/null +++ b/prompt-a/onechar/checkpoint-200/adapter_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0aeeda740ab6bb8ee51e6b7ecd7a2848397fd35d1b4db657df6d8fa4dac2f048 +size 125374989 diff --git a/prompt-a/onechar/checkpoint-200/optimizer.pt b/prompt-a/onechar/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c60174b7c83a2c0e7fc2090096f3b87ffcd4656f --- /dev/null +++ b/prompt-a/onechar/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:696142d361db2f7734fc31c617f47091a5690b7733ed8c4a21b5b59a2ee213a8 +size 250681597 diff --git a/prompt-a/onechar/checkpoint-200/rng_state_0.pth b/prompt-a/onechar/checkpoint-200/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..da50e71000e6c1d1a4d7832253866f3044b66e7d --- /dev/null +++ b/prompt-a/onechar/checkpoint-200/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02519202233b2f973159654f70ae8d8d74816c44a0c980e2ec2bc67267e89056 +size 21687 diff --git a/prompt-a/onechar/checkpoint-200/rng_state_1.pth b/prompt-a/onechar/checkpoint-200/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..ee7cfdbe994f9b04e1c752162cd3e359411fad28 --- /dev/null +++ b/prompt-a/onechar/checkpoint-200/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7022483dc2b7110805d232d89f98606595b39d55b0309c029f39838461bfa05 +size 21687 diff --git a/prompt-a/onechar/checkpoint-200/rng_state_2.pth b/prompt-a/onechar/checkpoint-200/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..56091f7ff7f2320b878a237a667fdba0f8dcd55f --- /dev/null +++ b/prompt-a/onechar/checkpoint-200/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbee7570d88f852abb0c601b61e086fab06332833e7e65aa5e6cac17720c5a40 +size 21687 diff --git a/prompt-a/onechar/checkpoint-200/rng_state_3.pth b/prompt-a/onechar/checkpoint-200/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..0307838519b9ac446131ad02102aefe833d22d3d --- /dev/null +++ b/prompt-a/onechar/checkpoint-200/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4322ba034422871591d2ff0c9e49f1a15c37d9e090b2554c996dfb31f3b0c71d +size 21687 diff --git a/prompt-a/onechar/checkpoint-200/rng_state_4.pth b/prompt-a/onechar/checkpoint-200/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..2cf045240097b80d2a27a13f154ee5a4972751df --- /dev/null +++ b/prompt-a/onechar/checkpoint-200/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c7ff253aeb6baa0280c93c9623117398efc12932c877d511ea6a61c4dd3478d +size 21687 diff --git a/prompt-a/onechar/checkpoint-200/rng_state_5.pth b/prompt-a/onechar/checkpoint-200/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..ef5dae1f5582adbd029a6fc88fd6f98d159052a4 --- /dev/null +++ b/prompt-a/onechar/checkpoint-200/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9057a2b942467901cae269a7b56adc9305f3acecaaedd936c935eb0ca0f0c35b +size 21687 diff --git a/prompt-a/onechar/checkpoint-200/rng_state_6.pth b/prompt-a/onechar/checkpoint-200/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..a4cacbeda939117b3d98b8c588543ea6e86caaeb --- /dev/null +++ b/prompt-a/onechar/checkpoint-200/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eecbe567bf4fcdbe022499d24c3a234ab459267ae14e5a586b3d94ba684f0166 +size 21687 diff --git a/prompt-a/onechar/checkpoint-200/rng_state_7.pth b/prompt-a/onechar/checkpoint-200/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..6c7f151ca1dbc055be461ae2c2de74d8541f6947 --- /dev/null +++ b/prompt-a/onechar/checkpoint-200/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e6642c45077b1e8796a45aeec8e8477096bc2c474d725637e5efa8f6b04382e +size 21687 diff --git a/prompt-a/onechar/checkpoint-200/scheduler.pt b/prompt-a/onechar/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b81124515ec46fd0d4546de54a49624b0ef04afe --- /dev/null +++ b/prompt-a/onechar/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e64216f3a6fcd04f261085b97281e9e1c4d72c568c3b04b427986c8bef072d8 +size 627 diff --git a/prompt-a/onechar/checkpoint-200/trainer_state.json b/prompt-a/onechar/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c476ece0f529f280a41e965fa8f39533b74c14ab --- /dev/null +++ b/prompt-a/onechar/checkpoint-200/trainer_state.json @@ -0,0 +1,1299 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9047619047619047, + "eval_steps": 20, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 6.5e-06, + "loss": 2.1535, + "step": 1 + }, + { + "epoch": 0.02, + "learning_rate": 1.3e-05, + "loss": 2.1328, + "step": 2 + }, + { + "epoch": 0.03, + "learning_rate": 1.9499999999999996e-05, + "loss": 2.0918, + "step": 3 + }, + { + "epoch": 0.04, + "learning_rate": 2.6e-05, + "loss": 2.1194, + "step": 4 + }, + { + "epoch": 0.05, + "learning_rate": 3.25e-05, + "loss": 2.2342, + "step": 5 + }, + { + "epoch": 0.06, + "learning_rate": 3.899999999999999e-05, + "loss": 2.1168, + "step": 6 + }, + { + "epoch": 0.07, + "learning_rate": 4.5499999999999995e-05, + "loss": 2.1761, + "step": 7 + }, + { + "epoch": 0.08, + "learning_rate": 5.2e-05, + "loss": 2.0764, + "step": 8 + }, + { + "epoch": 0.09, + "learning_rate": 5.85e-05, + "loss": 2.1711, + "step": 9 + }, + { + "epoch": 0.1, + "learning_rate": 6.5e-05, + "loss": 2.0885, + "step": 10 + }, + { + "epoch": 0.1, + "learning_rate": 6.499599055565396e-05, + "loss": 2.0196, + "step": 11 + }, + { + "epoch": 0.11, + "learning_rate": 6.498396321188627e-05, + "loss": 2.1799, + "step": 12 + }, + { + "epoch": 0.12, + "learning_rate": 6.496392093626402e-05, + "loss": 2.1322, + "step": 13 + }, + { + "epoch": 0.13, + "learning_rate": 6.493586867391882e-05, + "loss": 2.1486, + "step": 14 + }, + { + "epoch": 0.14, + "learning_rate": 6.489981334632665e-05, + "loss": 2.1351, + "step": 15 + }, + { + "epoch": 0.15, + "learning_rate": 6.48557638496001e-05, + "loss": 2.1366, + "step": 16 + }, + { + "epoch": 0.16, + "learning_rate": 6.480373105229334e-05, + "loss": 2.0727, + "step": 17 + }, + { + "epoch": 0.17, + "learning_rate": 6.474372779272053e-05, + "loss": 2.1123, + "step": 18 + }, + { + "epoch": 0.18, + "learning_rate": 6.467576887578811e-05, + "loss": 2.0652, + "step": 19 + }, + { + "epoch": 0.19, + "learning_rate": 6.459987106934198e-05, + "loss": 2.0008, + "step": 20 + }, + { + "epoch": 0.19, + "eval_loss": 2.0444116592407227, + "eval_runtime": 2.798, + "eval_samples_per_second": 3.574, + "eval_steps_per_second": 0.715, + "step": 20 + }, + { + "epoch": 0.2, + "learning_rate": 6.451605310003014e-05, + "loss": 2.0769, + "step": 21 + }, + { + "epoch": 0.21, + "learning_rate": 6.442433564868238e-05, + "loss": 1.9949, + "step": 22 + }, + { + "epoch": 0.22, + "learning_rate": 6.432474134520738e-05, + "loss": 2.1341, + "step": 23 + }, + { + "epoch": 0.23, + "learning_rate": 6.421729476300929e-05, + "loss": 2.1433, + "step": 24 + }, + { + "epoch": 0.24, + "learning_rate": 6.410202241292448e-05, + "loss": 2.0168, + "step": 25 + }, + { + "epoch": 0.25, + "learning_rate": 6.39789527366805e-05, + "loss": 2.0403, + "step": 26 + }, + { + "epoch": 0.26, + "learning_rate": 6.384811609987843e-05, + "loss": 2.1983, + "step": 27 + }, + { + "epoch": 0.27, + "learning_rate": 6.370954478450064e-05, + "loss": 2.0625, + "step": 28 + }, + { + "epoch": 0.28, + "learning_rate": 6.356327298094572e-05, + "loss": 2.204, + "step": 29 + }, + { + "epoch": 0.29, + "learning_rate": 6.340933677959248e-05, + "loss": 2.118, + "step": 30 + }, + { + "epoch": 0.3, + "learning_rate": 6.324777416189522e-05, + "loss": 2.0997, + "step": 31 + }, + { + "epoch": 0.3, + "learning_rate": 6.307862499101232e-05, + "loss": 2.1154, + "step": 32 + }, + { + "epoch": 0.31, + "learning_rate": 6.290193100197069e-05, + "loss": 2.0845, + "step": 33 + }, + { + "epoch": 0.32, + "learning_rate": 6.271773579136816e-05, + "loss": 2.0052, + "step": 34 + }, + { + "epoch": 0.33, + "learning_rate": 6.252608480661681e-05, + "loss": 1.956, + "step": 35 + }, + { + "epoch": 0.34, + "learning_rate": 6.232702533472938e-05, + "loss": 2.0958, + "step": 36 + }, + { + "epoch": 0.35, + "learning_rate": 6.212060649065197e-05, + "loss": 2.0634, + "step": 37 + }, + { + "epoch": 0.36, + "learning_rate": 6.190687920514563e-05, + "loss": 2.0033, + "step": 38 + }, + { + "epoch": 0.37, + "learning_rate": 6.168589621222e-05, + "loss": 2.1426, + "step": 39 + }, + { + "epoch": 0.38, + "learning_rate": 6.145771203612195e-05, + "loss": 1.9823, + "step": 40 + }, + { + "epoch": 0.38, + "eval_loss": 2.0226035118103027, + "eval_runtime": 2.8049, + "eval_samples_per_second": 3.565, + "eval_steps_per_second": 0.713, + "step": 40 + }, + { + "epoch": 0.39, + "learning_rate": 6.122238297788254e-05, + "loss": 2.2016, + "step": 41 + }, + { + "epoch": 0.4, + "learning_rate": 6.097996710142556e-05, + "loss": 2.1008, + "step": 42 + }, + { + "epoch": 0.41, + "learning_rate": 6.0730524219241214e-05, + "loss": 2.0821, + "step": 43 + }, + { + "epoch": 0.42, + "learning_rate": 6.047411587762816e-05, + "loss": 2.092, + "step": 44 + }, + { + "epoch": 0.43, + "learning_rate": 6.0210805341507994e-05, + "loss": 1.9617, + "step": 45 + }, + { + "epoch": 0.44, + "learning_rate": 5.994065757881549e-05, + "loss": 1.9358, + "step": 46 + }, + { + "epoch": 0.45, + "learning_rate": 5.966373924446878e-05, + "loss": 2.0573, + "step": 47 + }, + { + "epoch": 0.46, + "learning_rate": 5.938011866392325e-05, + "loss": 2.0731, + "step": 48 + }, + { + "epoch": 0.47, + "learning_rate": 5.9089865816313253e-05, + "loss": 2.2842, + "step": 49 + }, + { + "epoch": 0.48, + "learning_rate": 5.8793052317185785e-05, + "loss": 2.2145, + "step": 50 + }, + { + "epoch": 0.49, + "learning_rate": 5.848975140083044e-05, + "loss": 1.9923, + "step": 51 + }, + { + "epoch": 0.5, + "learning_rate": 5.818003790220993e-05, + "loss": 2.1065, + "step": 52 + }, + { + "epoch": 0.5, + "learning_rate": 5.786398823849571e-05, + "loss": 2.0009, + "step": 53 + }, + { + "epoch": 0.51, + "learning_rate": 5.754168039021315e-05, + "loss": 1.9785, + "step": 54 + }, + { + "epoch": 0.52, + "learning_rate": 5.7213193882001006e-05, + "loss": 1.9504, + "step": 55 + }, + { + "epoch": 0.53, + "learning_rate": 5.687860976298993e-05, + "loss": 2.1977, + "step": 56 + }, + { + "epoch": 0.54, + "learning_rate": 5.6538010586804816e-05, + "loss": 2.0423, + "step": 57 + }, + { + "epoch": 0.55, + "learning_rate": 5.619148039119587e-05, + "loss": 2.0536, + "step": 58 + }, + { + "epoch": 0.56, + "learning_rate": 5.5839104677303635e-05, + "loss": 2.0286, + "step": 59 + }, + { + "epoch": 0.57, + "learning_rate": 5.548097038856279e-05, + "loss": 2.1143, + "step": 60 + }, + { + "epoch": 0.57, + "eval_loss": 2.0128190517425537, + "eval_runtime": 2.8105, + "eval_samples_per_second": 3.558, + "eval_steps_per_second": 0.712, + "step": 60 + }, + { + "epoch": 0.58, + "learning_rate": 5.511716588925021e-05, + "loss": 2.1397, + "step": 61 + }, + { + "epoch": 0.59, + "learning_rate": 5.474778094268238e-05, + "loss": 2.0601, + "step": 62 + }, + { + "epoch": 0.6, + "learning_rate": 5.437290668906763e-05, + "loss": 2.1707, + "step": 63 + }, + { + "epoch": 0.61, + "learning_rate": 5.399263562301868e-05, + "loss": 2.0649, + "step": 64 + }, + { + "epoch": 0.62, + "learning_rate": 5.360706157073096e-05, + "loss": 2.0314, + "step": 65 + }, + { + "epoch": 0.63, + "learning_rate": 5.321627966683241e-05, + "loss": 2.1277, + "step": 66 + }, + { + "epoch": 0.64, + "learning_rate": 5.2820386330910424e-05, + "loss": 2.0534, + "step": 67 + }, + { + "epoch": 0.65, + "learning_rate": 5.2419479243721734e-05, + "loss": 1.9976, + "step": 68 + }, + { + "epoch": 0.66, + "learning_rate": 5.2013657323091226e-05, + "loss": 2.0262, + "step": 69 + }, + { + "epoch": 0.67, + "learning_rate": 5.160302069950537e-05, + "loss": 2.0093, + "step": 70 + }, + { + "epoch": 0.68, + "learning_rate": 5.118767069140655e-05, + "loss": 2.1088, + "step": 71 + }, + { + "epoch": 0.69, + "learning_rate": 5.076770978019424e-05, + "loss": 1.9841, + "step": 72 + }, + { + "epoch": 0.7, + "learning_rate": 5.034324158493928e-05, + "loss": 2.0081, + "step": 73 + }, + { + "epoch": 0.7, + "learning_rate": 4.991437083681739e-05, + "loss": 2.0179, + "step": 74 + }, + { + "epoch": 0.71, + "learning_rate": 4.948120335326833e-05, + "loss": 2.1304, + "step": 75 + }, + { + "epoch": 0.72, + "learning_rate": 4.904384601188706e-05, + "loss": 1.9213, + "step": 76 + }, + { + "epoch": 0.73, + "learning_rate": 4.860240672405324e-05, + "loss": 2.2153, + "step": 77 + }, + { + "epoch": 0.74, + "learning_rate": 4.815699440830574e-05, + "loss": 1.9365, + "step": 78 + }, + { + "epoch": 0.75, + "learning_rate": 4.770771896346863e-05, + "loss": 2.021, + "step": 79 + }, + { + "epoch": 0.76, + "learning_rate": 4.725469124153526e-05, + "loss": 2.0327, + "step": 80 + }, + { + "epoch": 0.76, + "eval_loss": 2.0059409141540527, + "eval_runtime": 2.8077, + "eval_samples_per_second": 3.562, + "eval_steps_per_second": 0.712, + "step": 80 + }, + { + "epoch": 0.77, + "learning_rate": 4.679802302031724e-05, + "loss": 2.1576, + "step": 81 + }, + { + "epoch": 0.78, + "learning_rate": 4.6337826975864856e-05, + "loss": 1.9468, + "step": 82 + }, + { + "epoch": 0.79, + "learning_rate": 4.587421665466603e-05, + "loss": 2.0795, + "step": 83 + }, + { + "epoch": 0.8, + "learning_rate": 4.540730644563036e-05, + "loss": 2.0566, + "step": 84 + }, + { + "epoch": 0.81, + "learning_rate": 4.4937211551865415e-05, + "loss": 2.0819, + "step": 85 + }, + { + "epoch": 0.82, + "learning_rate": 4.4464047962252035e-05, + "loss": 2.0542, + "step": 86 + }, + { + "epoch": 0.83, + "learning_rate": 4.398793242282586e-05, + "loss": 2.0006, + "step": 87 + }, + { + "epoch": 0.84, + "learning_rate": 4.350898240797197e-05, + "loss": 1.9953, + "step": 88 + }, + { + "epoch": 0.85, + "learning_rate": 4.3027316091439854e-05, + "loss": 2.067, + "step": 89 + }, + { + "epoch": 0.86, + "learning_rate": 4.254305231718579e-05, + "loss": 1.9909, + "step": 90 + }, + { + "epoch": 0.87, + "learning_rate": 4.205631057004987e-05, + "loss": 1.9436, + "step": 91 + }, + { + "epoch": 0.88, + "learning_rate": 4.156721094627495e-05, + "loss": 2.0643, + "step": 92 + }, + { + "epoch": 0.89, + "learning_rate": 4.107587412387462e-05, + "loss": 2.1244, + "step": 93 + }, + { + "epoch": 0.9, + "learning_rate": 4.058242133285779e-05, + "loss": 1.9795, + "step": 94 + }, + { + "epoch": 0.9, + "learning_rate": 4.008697432531693e-05, + "loss": 2.1568, + "step": 95 + }, + { + "epoch": 0.91, + "learning_rate": 3.9589655345387633e-05, + "loss": 1.9829, + "step": 96 + }, + { + "epoch": 0.92, + "learning_rate": 3.909058709908665e-05, + "loss": 2.031, + "step": 97 + }, + { + "epoch": 0.93, + "learning_rate": 3.858989272403605e-05, + "loss": 2.2042, + "step": 98 + }, + { + "epoch": 0.94, + "learning_rate": 3.8087695759080804e-05, + "loss": 1.9775, + "step": 99 + }, + { + "epoch": 0.95, + "learning_rate": 3.75841201138075e-05, + "loss": 2.0274, + "step": 100 + }, + { + "epoch": 0.95, + "eval_loss": 2.0003483295440674, + "eval_runtime": 2.8075, + "eval_samples_per_second": 3.562, + "eval_steps_per_second": 0.712, + "step": 100 + }, + { + "epoch": 0.96, + "learning_rate": 3.707929003797143e-05, + "loss": 2.0745, + "step": 101 + }, + { + "epoch": 0.97, + "learning_rate": 3.6573330090839886e-05, + "loss": 1.9933, + "step": 102 + }, + { + "epoch": 0.98, + "learning_rate": 3.6066365110458966e-05, + "loss": 2.127, + "step": 103 + }, + { + "epoch": 0.99, + "learning_rate": 3.555852018285172e-05, + "loss": 2.0499, + "step": 104 + }, + { + "epoch": 1.0, + "learning_rate": 3.504992061115496e-05, + "loss": 1.9391, + "step": 105 + }, + { + "epoch": 1.01, + "learning_rate": 3.4540691884702686e-05, + "loss": 2.0216, + "step": 106 + }, + { + "epoch": 1.02, + "learning_rate": 3.4030959648063384e-05, + "loss": 2.0055, + "step": 107 + }, + { + "epoch": 1.03, + "learning_rate": 3.3520849670039165e-05, + "loss": 2.0721, + "step": 108 + }, + { + "epoch": 1.04, + "learning_rate": 3.301048781263417e-05, + "loss": 2.0274, + "step": 109 + }, + { + "epoch": 1.05, + "learning_rate": 3.25e-05, + "loss": 1.9373, + "step": 110 + }, + { + "epoch": 1.06, + "learning_rate": 3.198951218736583e-05, + "loss": 1.9034, + "step": 111 + }, + { + "epoch": 1.07, + "learning_rate": 3.147915032996083e-05, + "loss": 1.8759, + "step": 112 + }, + { + "epoch": 1.08, + "learning_rate": 3.096904035193661e-05, + "loss": 2.0572, + "step": 113 + }, + { + "epoch": 1.09, + "learning_rate": 3.0459308115297312e-05, + "loss": 2.0072, + "step": 114 + }, + { + "epoch": 1.1, + "learning_rate": 2.9950079388845042e-05, + "loss": 1.9129, + "step": 115 + }, + { + "epoch": 1.1, + "learning_rate": 2.944147981714828e-05, + "loss": 2.0966, + "step": 116 + }, + { + "epoch": 1.11, + "learning_rate": 2.8933634889541028e-05, + "loss": 2.0903, + "step": 117 + }, + { + "epoch": 1.12, + "learning_rate": 2.8426669909160105e-05, + "loss": 2.0917, + "step": 118 + }, + { + "epoch": 1.13, + "learning_rate": 2.7920709962028562e-05, + "loss": 2.089, + "step": 119 + }, + { + "epoch": 1.14, + "learning_rate": 2.741587988619249e-05, + "loss": 1.9964, + "step": 120 + }, + { + "epoch": 1.14, + "eval_loss": 1.9988892078399658, + "eval_runtime": 2.8188, + "eval_samples_per_second": 3.548, + "eval_steps_per_second": 0.71, + "step": 120 + }, + { + "epoch": 1.15, + "learning_rate": 2.6912304240919183e-05, + "loss": 1.9754, + "step": 121 + }, + { + "epoch": 1.16, + "learning_rate": 2.641010727596394e-05, + "loss": 2.1508, + "step": 122 + }, + { + "epoch": 1.17, + "learning_rate": 2.5909412900913353e-05, + "loss": 2.0331, + "step": 123 + }, + { + "epoch": 1.18, + "learning_rate": 2.5410344654612374e-05, + "loss": 1.9607, + "step": 124 + }, + { + "epoch": 1.19, + "learning_rate": 2.4913025674683083e-05, + "loss": 2.0356, + "step": 125 + }, + { + "epoch": 1.2, + "learning_rate": 2.4417578667142222e-05, + "loss": 2.0417, + "step": 126 + }, + { + "epoch": 1.21, + "learning_rate": 2.3924125876125387e-05, + "loss": 1.9246, + "step": 127 + }, + { + "epoch": 1.22, + "learning_rate": 2.343278905372505e-05, + "loss": 2.0911, + "step": 128 + }, + { + "epoch": 1.23, + "learning_rate": 2.2943689429950124e-05, + "loss": 1.9324, + "step": 129 + }, + { + "epoch": 1.24, + "learning_rate": 2.2456947682814208e-05, + "loss": 1.9987, + "step": 130 + }, + { + "epoch": 1.25, + "learning_rate": 2.1972683908560147e-05, + "loss": 2.0121, + "step": 131 + }, + { + "epoch": 1.26, + "learning_rate": 2.149101759202803e-05, + "loss": 2.0002, + "step": 132 + }, + { + "epoch": 1.27, + "learning_rate": 2.1012067577174147e-05, + "loss": 2.0464, + "step": 133 + }, + { + "epoch": 1.28, + "learning_rate": 2.0535952037747966e-05, + "loss": 2.1422, + "step": 134 + }, + { + "epoch": 1.29, + "learning_rate": 2.0062788448134583e-05, + "loss": 1.9796, + "step": 135 + }, + { + "epoch": 1.3, + "learning_rate": 1.959269355436963e-05, + "loss": 1.9683, + "step": 136 + }, + { + "epoch": 1.3, + "learning_rate": 1.9125783345333963e-05, + "loss": 2.1488, + "step": 137 + }, + { + "epoch": 1.31, + "learning_rate": 1.866217302413514e-05, + "loss": 1.9852, + "step": 138 + }, + { + "epoch": 1.32, + "learning_rate": 1.8201976979682756e-05, + "loss": 2.0087, + "step": 139 + }, + { + "epoch": 1.33, + "learning_rate": 1.774530875846473e-05, + "loss": 1.9724, + "step": 140 + }, + { + "epoch": 1.33, + "eval_loss": 1.9960596561431885, + "eval_runtime": 2.8086, + "eval_samples_per_second": 3.56, + "eval_steps_per_second": 0.712, + "step": 140 + }, + { + "epoch": 1.34, + "learning_rate": 1.7292281036531367e-05, + "loss": 2.0022, + "step": 141 + }, + { + "epoch": 1.35, + "learning_rate": 1.6843005591694247e-05, + "loss": 2.0827, + "step": 142 + }, + { + "epoch": 1.36, + "learning_rate": 1.639759327594675e-05, + "loss": 2.0052, + "step": 143 + }, + { + "epoch": 1.37, + "learning_rate": 1.595615398811293e-05, + "loss": 1.9938, + "step": 144 + }, + { + "epoch": 1.38, + "learning_rate": 1.551879664673166e-05, + "loss": 1.996, + "step": 145 + }, + { + "epoch": 1.39, + "learning_rate": 1.5085629163182601e-05, + "loss": 2.0065, + "step": 146 + }, + { + "epoch": 1.4, + "learning_rate": 1.4656758415060712e-05, + "loss": 2.0048, + "step": 147 + }, + { + "epoch": 1.41, + "learning_rate": 1.4232290219805762e-05, + "loss": 1.8846, + "step": 148 + }, + { + "epoch": 1.42, + "learning_rate": 1.3812329308593457e-05, + "loss": 1.9582, + "step": 149 + }, + { + "epoch": 1.43, + "learning_rate": 1.3396979300494626e-05, + "loss": 2.0509, + "step": 150 + }, + { + "epoch": 1.44, + "learning_rate": 1.2986342676908771e-05, + "loss": 2.0542, + "step": 151 + }, + { + "epoch": 1.45, + "learning_rate": 1.258052075627827e-05, + "loss": 2.0184, + "step": 152 + }, + { + "epoch": 1.46, + "learning_rate": 1.217961366908959e-05, + "loss": 1.9188, + "step": 153 + }, + { + "epoch": 1.47, + "learning_rate": 1.1783720333167582e-05, + "loss": 2.0601, + "step": 154 + }, + { + "epoch": 1.48, + "learning_rate": 1.1392938429269033e-05, + "loss": 1.9857, + "step": 155 + }, + { + "epoch": 1.49, + "learning_rate": 1.1007364376981318e-05, + "loss": 2.053, + "step": 156 + }, + { + "epoch": 1.5, + "learning_rate": 1.0627093310932374e-05, + "loss": 2.1052, + "step": 157 + }, + { + "epoch": 1.5, + "learning_rate": 1.0252219057317615e-05, + "loss": 1.9195, + "step": 158 + }, + { + "epoch": 1.51, + "learning_rate": 9.882834110749784e-06, + "loss": 1.9893, + "step": 159 + }, + { + "epoch": 1.52, + "learning_rate": 9.519029611437207e-06, + "loss": 2.178, + "step": 160 + }, + { + "epoch": 1.52, + "eval_loss": 1.9954354763031006, + "eval_runtime": 2.8093, + "eval_samples_per_second": 3.56, + "eval_steps_per_second": 0.712, + "step": 160 + }, + { + "epoch": 1.53, + "learning_rate": 9.160895322696367e-06, + "loss": 2.138, + "step": 161 + }, + { + "epoch": 1.54, + "learning_rate": 8.80851960880413e-06, + "loss": 2.0366, + "step": 162 + }, + { + "epoch": 1.55, + "learning_rate": 8.46198941319518e-06, + "loss": 1.9642, + "step": 163 + }, + { + "epoch": 1.56, + "learning_rate": 8.121390237010062e-06, + "loss": 1.9771, + "step": 164 + }, + { + "epoch": 1.57, + "learning_rate": 7.786806117998994e-06, + "loss": 2.0494, + "step": 165 + }, + { + "epoch": 1.58, + "learning_rate": 7.458319609786853e-06, + "loss": 2.0378, + "step": 166 + }, + { + "epoch": 1.59, + "learning_rate": 7.136011761504281e-06, + "loss": 2.0146, + "step": 167 + }, + { + "epoch": 1.6, + "learning_rate": 6.819962097790061e-06, + "loss": 2.157, + "step": 168 + }, + { + "epoch": 1.61, + "learning_rate": 6.510248599169555e-06, + "loss": 2.0941, + "step": 169 + }, + { + "epoch": 1.62, + "learning_rate": 6.206947682814211e-06, + "loss": 1.9445, + "step": 170 + }, + { + "epoch": 1.63, + "learning_rate": 5.9101341836867315e-06, + "loss": 2.0506, + "step": 171 + }, + { + "epoch": 1.64, + "learning_rate": 5.619881336076736e-06, + "loss": 2.0635, + "step": 172 + }, + { + "epoch": 1.65, + "learning_rate": 5.336260755531223e-06, + "loss": 1.9494, + "step": 173 + }, + { + "epoch": 1.66, + "learning_rate": 5.059342421184516e-06, + "loss": 1.9675, + "step": 174 + }, + { + "epoch": 1.67, + "learning_rate": 4.789194658492004e-06, + "loss": 1.9892, + "step": 175 + }, + { + "epoch": 1.68, + "learning_rate": 4.525884122371835e-06, + "loss": 2.0223, + "step": 176 + }, + { + "epoch": 1.69, + "learning_rate": 4.269475780758789e-06, + "loss": 2.2695, + "step": 177 + }, + { + "epoch": 1.7, + "learning_rate": 4.02003289857444e-06, + "loss": 2.0977, + "step": 178 + }, + { + "epoch": 1.7, + "learning_rate": 3.777617022117462e-06, + "loss": 1.9732, + "step": 179 + }, + { + "epoch": 1.71, + "learning_rate": 3.5422879638780466e-06, + "loss": 2.0136, + "step": 180 + }, + { + "epoch": 1.71, + "eval_loss": 1.9940338134765625, + "eval_runtime": 2.8151, + "eval_samples_per_second": 3.552, + "eval_steps_per_second": 0.71, + "step": 180 + }, + { + "epoch": 1.72, + "learning_rate": 3.3141037877799946e-06, + "loss": 2.0513, + "step": 181 + }, + { + "epoch": 1.73, + "learning_rate": 3.0931207948543707e-06, + "loss": 2.0869, + "step": 182 + }, + { + "epoch": 1.74, + "learning_rate": 2.8793935093480276e-06, + "loss": 2.0547, + "step": 183 + }, + { + "epoch": 1.75, + "learning_rate": 2.6729746652706126e-06, + "loss": 2.0177, + "step": 184 + }, + { + "epoch": 1.76, + "learning_rate": 2.473915193383181e-06, + "loss": 2.0071, + "step": 185 + }, + { + "epoch": 1.77, + "learning_rate": 2.282264208631831e-06, + "loss": 1.9302, + "step": 186 + }, + { + "epoch": 1.78, + "learning_rate": 2.0980689980293135e-06, + "loss": 2.0888, + "step": 187 + }, + { + "epoch": 1.79, + "learning_rate": 1.9213750089876725e-06, + "loss": 2.0167, + "step": 188 + }, + { + "epoch": 1.8, + "learning_rate": 1.7522258381047776e-06, + "loss": 2.0818, + "step": 189 + }, + { + "epoch": 1.81, + "learning_rate": 1.5906632204075102e-06, + "loss": 1.9876, + "step": 190 + }, + { + "epoch": 1.82, + "learning_rate": 1.436727019054271e-06, + "loss": 2.0801, + "step": 191 + }, + { + "epoch": 1.83, + "learning_rate": 1.2904552154993503e-06, + "loss": 2.0012, + "step": 192 + }, + { + "epoch": 1.84, + "learning_rate": 1.1518839001215623e-06, + "loss": 1.8813, + "step": 193 + }, + { + "epoch": 1.85, + "learning_rate": 1.02104726331949e-06, + "loss": 1.9551, + "step": 194 + }, + { + "epoch": 1.86, + "learning_rate": 8.979775870755119e-07, + "loss": 2.1161, + "step": 195 + }, + { + "epoch": 1.87, + "learning_rate": 7.827052369907083e-07, + "loss": 2.0543, + "step": 196 + }, + { + "epoch": 1.88, + "learning_rate": 6.752586547926095e-07, + "loss": 2.2174, + "step": 197 + }, + { + "epoch": 1.89, + "learning_rate": 5.756643513176201e-07, + "loss": 2.0583, + "step": 198 + }, + { + "epoch": 1.9, + "learning_rate": 4.839468999698492e-07, + "loss": 2.1223, + "step": 199 + }, + { + "epoch": 1.9, + "learning_rate": 4.00128930658026e-07, + "loss": 1.9051, + "step": 200 + }, + { + "epoch": 1.9, + "eval_loss": 1.994101881980896, + "eval_runtime": 2.8081, + "eval_samples_per_second": 3.561, + "eval_steps_per_second": 0.712, + "step": 200 + } + ], + "logging_steps": 1, + "max_steps": 210, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 1.6296572813416858e+17, + "trial_name": null, + "trial_params": null +} diff --git a/prompt-a/onechar/checkpoint-200/training_args.bin b/prompt-a/onechar/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f6a9833d8c69d00318139a8fc6e1126e7d7ac62f --- /dev/null +++ b/prompt-a/onechar/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ff85b6b2e99bcdbf030f4c883beac4553b8cad32f31f2d286263119b723c469 +size 4411 diff --git a/prompt-a/onechar/checkpoint-50/README.md b/prompt-a/onechar/checkpoint-50/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/prompt-a/onechar/checkpoint-50/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/prompt-a/onechar/checkpoint-50/adapter_config.json b/prompt-a/onechar/checkpoint-50/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..57ccb0546533f3c17ba88615f0c0d4f7c0860f9e --- /dev/null +++ b/prompt-a/onechar/checkpoint-50/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.01, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "down_proj", + "up_proj", + "gate_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/prompt-a/onechar/checkpoint-50/adapter_model.bin b/prompt-a/onechar/checkpoint-50/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..3aca02acd8592afbcd83191047871316bbc87745 --- /dev/null +++ b/prompt-a/onechar/checkpoint-50/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:651e76d071000edf080354af6eaea8018c5f0bfb064c7d4bc9761cc218039b1d +size 125374989 diff --git a/prompt-a/onechar/checkpoint-50/adapter_model/README.md b/prompt-a/onechar/checkpoint-50/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/prompt-a/onechar/checkpoint-50/adapter_model/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/prompt-a/onechar/checkpoint-50/adapter_model/adapter_config.json b/prompt-a/onechar/checkpoint-50/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..57ccb0546533f3c17ba88615f0c0d4f7c0860f9e --- /dev/null +++ b/prompt-a/onechar/checkpoint-50/adapter_model/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.01, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "down_proj", + "up_proj", + "gate_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/prompt-a/onechar/checkpoint-50/adapter_model/adapter_model.bin b/prompt-a/onechar/checkpoint-50/adapter_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..3aca02acd8592afbcd83191047871316bbc87745 --- /dev/null +++ b/prompt-a/onechar/checkpoint-50/adapter_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:651e76d071000edf080354af6eaea8018c5f0bfb064c7d4bc9761cc218039b1d +size 125374989 diff --git a/prompt-a/onechar/checkpoint-50/optimizer.pt b/prompt-a/onechar/checkpoint-50/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f07700ec79cc78a41fda44cb8dff398ff97161fe --- /dev/null +++ b/prompt-a/onechar/checkpoint-50/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0a18365f569c60809b831c3392febacee180b7721e2d7b1cf4cb20e6f88b338 +size 250681597 diff --git a/prompt-a/onechar/checkpoint-50/rng_state_0.pth b/prompt-a/onechar/checkpoint-50/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..9657e25b8e560ff5f327c8dc11c766083e1e5307 --- /dev/null +++ b/prompt-a/onechar/checkpoint-50/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed89856c64d26a78541a35ec221bc2b0be749157188567670657c394a266b758 +size 21687 diff --git a/prompt-a/onechar/checkpoint-50/rng_state_1.pth b/prompt-a/onechar/checkpoint-50/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..17628a4144b1543dd247eb43ef8f854dfd794dc4 --- /dev/null +++ b/prompt-a/onechar/checkpoint-50/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:820498a0470c610380dcc9c728864f0b01a35ed840d8891fb4254551faeec64b +size 21687 diff --git a/prompt-a/onechar/checkpoint-50/rng_state_2.pth b/prompt-a/onechar/checkpoint-50/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..413eedd7ed80e5e7349f4991e5b3a857d00c0465 --- /dev/null +++ b/prompt-a/onechar/checkpoint-50/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6d665d7aece15556da73ea687f3341f188a352b9cec96326d64275d71bccb8a +size 21687 diff --git a/prompt-a/onechar/checkpoint-50/rng_state_3.pth b/prompt-a/onechar/checkpoint-50/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..46d754b888ae7ac87e43d125ba7c3cf144c212a1 --- /dev/null +++ b/prompt-a/onechar/checkpoint-50/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c3e7a9ae730eadfbb508e64610c31d6077c0215ca3becee01ebe5e807184933 +size 21687 diff --git a/prompt-a/onechar/checkpoint-50/rng_state_4.pth b/prompt-a/onechar/checkpoint-50/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..2f9140564c11dc24aed0031713090a7cda2e1b6b --- /dev/null +++ b/prompt-a/onechar/checkpoint-50/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a2ac6d81e0c93a656974dece1a97367d2f4eac41c40a26082f99352fe5f8755 +size 21687 diff --git a/prompt-a/onechar/checkpoint-50/rng_state_5.pth b/prompt-a/onechar/checkpoint-50/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..c2af78b776c7d5748ad4a413c6edbddefb70fa00 --- /dev/null +++ b/prompt-a/onechar/checkpoint-50/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d69c5eee44666d590cc14929efb25ad70713ba9ab3fc44c2a73c778b88573d3 +size 21687 diff --git a/prompt-a/onechar/checkpoint-50/rng_state_6.pth b/prompt-a/onechar/checkpoint-50/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..d6bcd1ae0a9ebb65d683d1809b8f812933a28730 --- /dev/null +++ b/prompt-a/onechar/checkpoint-50/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ea24e03d2ca68e580c8f995bde7ff41c5238275b75912db4e6c66d3909b4910 +size 21687 diff --git a/prompt-a/onechar/checkpoint-50/rng_state_7.pth b/prompt-a/onechar/checkpoint-50/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..b42b22d6a68c5070be6a2c10ef3bfe64a477aa6d --- /dev/null +++ b/prompt-a/onechar/checkpoint-50/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc2e7e1fa600c6c6a79b3937b0656f80b525198bcde9f9b7fabc55d0a8b6e12f +size 21687 diff --git a/prompt-a/onechar/checkpoint-50/scheduler.pt b/prompt-a/onechar/checkpoint-50/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..99b00e04b3c739592c28a943e82e4828c5722494 --- /dev/null +++ b/prompt-a/onechar/checkpoint-50/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5364a6b480c49bfcbbc6891f5b91141d4093514b963940d64fc410e4731b6c5e +size 627 diff --git a/prompt-a/onechar/checkpoint-50/trainer_state.json b/prompt-a/onechar/checkpoint-50/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a5af08db0c012d4cd296bbc43bc34d3f248885eb --- /dev/null +++ b/prompt-a/onechar/checkpoint-50/trainer_state.json @@ -0,0 +1,335 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.47619047619047616, + "eval_steps": 20, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 6.5e-06, + "loss": 2.1535, + "step": 1 + }, + { + "epoch": 0.02, + "learning_rate": 1.3e-05, + "loss": 2.1328, + "step": 2 + }, + { + "epoch": 0.03, + "learning_rate": 1.9499999999999996e-05, + "loss": 2.0918, + "step": 3 + }, + { + "epoch": 0.04, + "learning_rate": 2.6e-05, + "loss": 2.1194, + "step": 4 + }, + { + "epoch": 0.05, + "learning_rate": 3.25e-05, + "loss": 2.2342, + "step": 5 + }, + { + "epoch": 0.06, + "learning_rate": 3.899999999999999e-05, + "loss": 2.1168, + "step": 6 + }, + { + "epoch": 0.07, + "learning_rate": 4.5499999999999995e-05, + "loss": 2.1761, + "step": 7 + }, + { + "epoch": 0.08, + "learning_rate": 5.2e-05, + "loss": 2.0764, + "step": 8 + }, + { + "epoch": 0.09, + "learning_rate": 5.85e-05, + "loss": 2.1711, + "step": 9 + }, + { + "epoch": 0.1, + "learning_rate": 6.5e-05, + "loss": 2.0885, + "step": 10 + }, + { + "epoch": 0.1, + "learning_rate": 6.499599055565396e-05, + "loss": 2.0196, + "step": 11 + }, + { + "epoch": 0.11, + "learning_rate": 6.498396321188627e-05, + "loss": 2.1799, + "step": 12 + }, + { + "epoch": 0.12, + "learning_rate": 6.496392093626402e-05, + "loss": 2.1322, + "step": 13 + }, + { + "epoch": 0.13, + "learning_rate": 6.493586867391882e-05, + "loss": 2.1486, + "step": 14 + }, + { + "epoch": 0.14, + "learning_rate": 6.489981334632665e-05, + "loss": 2.1351, + "step": 15 + }, + { + "epoch": 0.15, + "learning_rate": 6.48557638496001e-05, + "loss": 2.1366, + "step": 16 + }, + { + "epoch": 0.16, + "learning_rate": 6.480373105229334e-05, + "loss": 2.0727, + "step": 17 + }, + { + "epoch": 0.17, + "learning_rate": 6.474372779272053e-05, + "loss": 2.1123, + "step": 18 + }, + { + "epoch": 0.18, + "learning_rate": 6.467576887578811e-05, + "loss": 2.0652, + "step": 19 + }, + { + "epoch": 0.19, + "learning_rate": 6.459987106934198e-05, + "loss": 2.0008, + "step": 20 + }, + { + "epoch": 0.19, + "eval_loss": 2.0444116592407227, + "eval_runtime": 2.798, + "eval_samples_per_second": 3.574, + "eval_steps_per_second": 0.715, + "step": 20 + }, + { + "epoch": 0.2, + "learning_rate": 6.451605310003014e-05, + "loss": 2.0769, + "step": 21 + }, + { + "epoch": 0.21, + "learning_rate": 6.442433564868238e-05, + "loss": 1.9949, + "step": 22 + }, + { + "epoch": 0.22, + "learning_rate": 6.432474134520738e-05, + "loss": 2.1341, + "step": 23 + }, + { + "epoch": 0.23, + "learning_rate": 6.421729476300929e-05, + "loss": 2.1433, + "step": 24 + }, + { + "epoch": 0.24, + "learning_rate": 6.410202241292448e-05, + "loss": 2.0168, + "step": 25 + }, + { + "epoch": 0.25, + "learning_rate": 6.39789527366805e-05, + "loss": 2.0403, + "step": 26 + }, + { + "epoch": 0.26, + "learning_rate": 6.384811609987843e-05, + "loss": 2.1983, + "step": 27 + }, + { + "epoch": 0.27, + "learning_rate": 6.370954478450064e-05, + "loss": 2.0625, + "step": 28 + }, + { + "epoch": 0.28, + "learning_rate": 6.356327298094572e-05, + "loss": 2.204, + "step": 29 + }, + { + "epoch": 0.29, + "learning_rate": 6.340933677959248e-05, + "loss": 2.118, + "step": 30 + }, + { + "epoch": 0.3, + "learning_rate": 6.324777416189522e-05, + "loss": 2.0997, + "step": 31 + }, + { + "epoch": 0.3, + "learning_rate": 6.307862499101232e-05, + "loss": 2.1154, + "step": 32 + }, + { + "epoch": 0.31, + "learning_rate": 6.290193100197069e-05, + "loss": 2.0845, + "step": 33 + }, + { + "epoch": 0.32, + "learning_rate": 6.271773579136816e-05, + "loss": 2.0052, + "step": 34 + }, + { + "epoch": 0.33, + "learning_rate": 6.252608480661681e-05, + "loss": 1.956, + "step": 35 + }, + { + "epoch": 0.34, + "learning_rate": 6.232702533472938e-05, + "loss": 2.0958, + "step": 36 + }, + { + "epoch": 0.35, + "learning_rate": 6.212060649065197e-05, + "loss": 2.0634, + "step": 37 + }, + { + "epoch": 0.36, + "learning_rate": 6.190687920514563e-05, + "loss": 2.0033, + "step": 38 + }, + { + "epoch": 0.37, + "learning_rate": 6.168589621222e-05, + "loss": 2.1426, + "step": 39 + }, + { + "epoch": 0.38, + "learning_rate": 6.145771203612195e-05, + "loss": 1.9823, + "step": 40 + }, + { + "epoch": 0.38, + "eval_loss": 2.0226035118103027, + "eval_runtime": 2.8049, + "eval_samples_per_second": 3.565, + "eval_steps_per_second": 0.713, + "step": 40 + }, + { + "epoch": 0.39, + "learning_rate": 6.122238297788254e-05, + "loss": 2.2016, + "step": 41 + }, + { + "epoch": 0.4, + "learning_rate": 6.097996710142556e-05, + "loss": 2.1008, + "step": 42 + }, + { + "epoch": 0.41, + "learning_rate": 6.0730524219241214e-05, + "loss": 2.0821, + "step": 43 + }, + { + "epoch": 0.42, + "learning_rate": 6.047411587762816e-05, + "loss": 2.092, + "step": 44 + }, + { + "epoch": 0.43, + "learning_rate": 6.0210805341507994e-05, + "loss": 1.9617, + "step": 45 + }, + { + "epoch": 0.44, + "learning_rate": 5.994065757881549e-05, + "loss": 1.9358, + "step": 46 + }, + { + "epoch": 0.45, + "learning_rate": 5.966373924446878e-05, + "loss": 2.0573, + "step": 47 + }, + { + "epoch": 0.46, + "learning_rate": 5.938011866392325e-05, + "loss": 2.0731, + "step": 48 + }, + { + "epoch": 0.47, + "learning_rate": 5.9089865816313253e-05, + "loss": 2.2842, + "step": 49 + }, + { + "epoch": 0.48, + "learning_rate": 5.8793052317185785e-05, + "loss": 2.2145, + "step": 50 + } + ], + "logging_steps": 1, + "max_steps": 210, + "num_train_epochs": 2, + "save_steps": 50, + "total_flos": 4.125056651650662e+16, + "trial_name": null, + "trial_params": null +} diff --git a/prompt-a/onechar/checkpoint-50/training_args.bin b/prompt-a/onechar/checkpoint-50/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f6a9833d8c69d00318139a8fc6e1126e7d7ac62f --- /dev/null +++ b/prompt-a/onechar/checkpoint-50/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ff85b6b2e99bcdbf030f4c883beac4553b8cad32f31f2d286263119b723c469 +size 4411 diff --git a/prompt-a/onechar/config.json b/prompt-a/onechar/config.json new file mode 100644 index 0000000000000000000000000000000000000000..99be7fbf147c96ef93ed720729ea90e8e21bdfca --- /dev/null +++ b/prompt-a/onechar/config.json @@ -0,0 +1,38 @@ +{ + "_name_or_path": "meta-llama/Llama-2-13b-hf", + "architectures": [ + "LlamaForCausalLM" + ], + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "intermediate_size": 13824, + "max_position_embeddings": 4096, + "model_type": "llama", + "num_attention_heads": 40, + "num_hidden_layers": 40, + "num_key_value_heads": 40, + "pretraining_tp": 1, + "quantization_config": { + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "llm_int8_enable_fp32_cpu_offload": false, + "llm_int8_has_fp16_weight": false, + "llm_int8_skip_modules": null, + "llm_int8_threshold": 6.0, + "load_in_4bit": true, + "load_in_8bit": false, + "quant_method": "bitsandbytes" + }, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "torch_dtype": "float16", + "transformers_version": "4.34.0.dev0", + "use_cache": false, + "vocab_size": 32001 +} diff --git a/prompt-a/onechar/special_tokens_map.json b/prompt-a/onechar/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3f58a5e115855c6ea3cec98accae196ad927222e --- /dev/null +++ b/prompt-a/onechar/special_tokens_map.json @@ -0,0 +1,6 @@ +{ + "bos_token": "", + "eos_token": "", + "pad_token": "[PAD]", + "unk_token": "" +} diff --git a/prompt-a/onechar/tokenizer.model b/prompt-a/onechar/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..6c00c742ce03c627d6cd5b795984876fa49fa899 --- /dev/null +++ b/prompt-a/onechar/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 +size 499723 diff --git a/prompt-a/onechar/tokenizer_config.json b/prompt-a/onechar/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..77849a9a3ca128f00bc3febdc9b39391a4ef43a3 --- /dev/null +++ b/prompt-a/onechar/tokenizer_config.json @@ -0,0 +1,39 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "bos_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "clean_up_tokenization_spaces": false, + "eos_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "legacy": false, + "model_max_length": 1000000000000000019884624838656, + "pad_token": null, + "padding_side": "right", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "trust_remote_code": false, + "unk_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "use_default_system_prompt": true, + "use_fast": true +}