Model save

Files changed (9) hide show

README.md CHANGED Viewed

@@ -2,11 +2,10 @@
 license: apache-2.0
 library_name: peft
 tags:
-- alignment-handbook
 - generated_from_trainer
 base_model: mistralai/Mistral-7B-v0.1
 datasets:
-- HuggingFaceH4/ultrachat_200k
 model-index:
 - name: Test_with_new_script
   results: []
@@ -17,9 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
 # Test_with_new_script
-This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the HuggingFaceH4/ultrachat_200k dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.9985
 ## Model description
@@ -55,7 +54,7 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 1.0052        | 1.0   | 22   | 0.9985          |
 ### Framework versions

 license: apache-2.0
 library_name: peft
 tags:
 - generated_from_trainer
 base_model: mistralai/Mistral-7B-v0.1
 datasets:
+- generator
 model-index:
 - name: Test_with_new_script
   results: []
 # Test_with_new_script
+This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the generator dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.9987
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 1.0053        | 1.0   | 22   | 0.9987          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -19,13 +19,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "gate_proj",
-    "q_proj",
     "v_proj",
-    "k_proj",
     "o_proj",
-    "up_proj",
-    "down_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "k_proj",
     "gate_proj",
+    "up_proj",
     "v_proj",
     "o_proj",
+    "down_proj",
+    "q_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:67010e2a89699ee21c974ed33ddbce0d2b58366053107584dc70fbfcdebfadc0
 size 83946192

 version https://git-lfs.github.com/spec/v1
+oid sha256:79e3290b10cd48b06f8e4c3717bcf1e5e588fbb91f09c87a04308168b73d4eb2
 size 83946192

all_results.json CHANGED Viewed

@@ -1,13 +1,13 @@
 {
     "epoch": 1.0,
-    "eval_loss": 0.9985440969467163,
-    "eval_runtime": 10.4315,
     "eval_samples": 154,
-    "eval_samples_per_second": 14.763,
-    "eval_steps_per_second": 0.959,
-    "train_loss": 1.0528265129436145,
-    "train_runtime": 330.3819,
     "train_samples": 1383,
-    "train_samples_per_second": 4.186,
     "train_steps_per_second": 0.067
 }

 {
     "epoch": 1.0,
+    "eval_loss": 0.9987491965293884,
+    "eval_runtime": 10.4357,
     "eval_samples": 154,
+    "eval_samples_per_second": 14.757,
+    "eval_steps_per_second": 0.958,
+    "train_loss": 1.0528897643089294,
+    "train_runtime": 330.7732,
     "train_samples": 1383,
+    "train_samples_per_second": 4.181,
     "train_steps_per_second": 0.067
 }

eval_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 1.0,
-    "eval_loss": 0.9985440969467163,
-    "eval_runtime": 10.4315,
     "eval_samples": 154,
-    "eval_samples_per_second": 14.763,
-    "eval_steps_per_second": 0.959
 }

 {
     "epoch": 1.0,
+    "eval_loss": 0.9987491965293884,
+    "eval_runtime": 10.4357,
     "eval_samples": 154,
+    "eval_samples_per_second": 14.757,
+    "eval_steps_per_second": 0.958
 }

mm_projector.bin ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:45b9a8fa1054e5d16c0a47352800b7953c51d48a0adb004c95f427c7e37f6481
+size 16795126

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 1.0,
-    "train_loss": 1.0528265129436145,
-    "train_runtime": 330.3819,
     "train_samples": 1383,
-    "train_samples_per_second": 4.186,
     "train_steps_per_second": 0.067
 }

 {
     "epoch": 1.0,
+    "train_loss": 1.0528897643089294,
+    "train_runtime": 330.7732,
     "train_samples": 1383,
+    "train_samples_per_second": 4.181,
     "train_steps_per_second": 0.067
 }

trainer_state.json CHANGED Viewed

@@ -17,42 +17,42 @@
     {
       "epoch": 0.23,
       "learning_rate": 0.00019458172417006347,
-      "loss": 1.1529,
       "step": 5
     },
     {
       "epoch": 0.45,
       "learning_rate": 0.00014016954246529696,
-      "loss": 1.06,
       "step": 10
     },
     {
       "epoch": 0.68,
       "learning_rate": 5.983045753470308e-05,
-      "loss": 1.0093,
       "step": 15
     },
     {
       "epoch": 0.91,
       "learning_rate": 5.418275829936537e-06,
-      "loss": 1.0052,
       "step": 20
     },
     {
       "epoch": 1.0,
-      "eval_loss": 0.9985440969467163,
-      "eval_runtime": 10.4369,
-      "eval_samples_per_second": 14.755,
-      "eval_steps_per_second": 0.958,
       "step": 22
     },
     {
       "epoch": 1.0,
       "step": 22,
       "total_flos": 1.2907203290936115e+17,
-      "train_loss": 1.0528265129436145,
-      "train_runtime": 330.3819,
-      "train_samples_per_second": 4.186,
       "train_steps_per_second": 0.067
     }
   ],

     {
       "epoch": 0.23,
       "learning_rate": 0.00019458172417006347,
+      "loss": 1.1528,
       "step": 5
     },
     {
       "epoch": 0.45,
       "learning_rate": 0.00014016954246529696,
+      "loss": 1.0601,
       "step": 10
     },
     {
       "epoch": 0.68,
       "learning_rate": 5.983045753470308e-05,
+      "loss": 1.0094,
       "step": 15
     },
     {
       "epoch": 0.91,
       "learning_rate": 5.418275829936537e-06,
+      "loss": 1.0053,
       "step": 20
     },
     {
       "epoch": 1.0,
+      "eval_loss": 0.9987491965293884,
+      "eval_runtime": 10.4464,
+      "eval_samples_per_second": 14.742,
+      "eval_steps_per_second": 0.957,
       "step": 22
     },
     {
       "epoch": 1.0,
       "step": 22,
       "total_flos": 1.2907203290936115e+17,
+      "train_loss": 1.0528897643089294,
+      "train_runtime": 330.7732,
+      "train_samples_per_second": 4.181,
       "train_steps_per_second": 0.067
     }
   ],

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4ec1a30ffc3e3543c6f3726587f9e4f89d6d59b28a1239701a7135804e54a0d
 size 4856

 version https://git-lfs.github.com/spec/v1
+oid sha256:451e31abe3ca5507ccd26609911512762408195c872245e20da2022bdd8ebc40
 size 4856