ai-maker-space/llama381binstruct_summarize_short

Browse files

Files changed (5) hide show

README.md +23 -23
adapter_config.json +3 -3
adapter_model.safetensors +1 -1
runs/Sep19_21-25-07_2da8595c7f48/events.out.tfevents.1726781412.2da8595c7f48.30005.0 +3 -0
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -20,7 +20,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [NousResearch/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/NousResearch/Meta-Llama-3.1-8B-Instruct) on the generator dataset.
 It achieves the following results on the evaluation set:
-- Loss: 2.6176
 ## Model description
@@ -50,28 +50,28 @@ The following hyperparameters were used during training:
 ### Training results
-| Training Loss | Epoch | Step | Validation Loss |
-|:-------------:|:-----:|:----:|:---------------:|
-| 1.6519        | 1.25  | 25   | 1.8402          |
-| 0.7039        | 2.5   | 50   | 1.7272          |
-| 0.3793        | 3.75  | 75   | 1.7206          |
-| 0.1875        | 5.0   | 100  | 1.7496          |
-| 0.115         | 6.25  | 125  | 1.9070          |
-| 0.027         | 7.5   | 150  | 2.2242          |
-| 0.0211        | 8.75  | 175  | 2.2373          |
-| 0.0227        | 10.0  | 200  | 2.3110          |
-| 0.0138        | 11.25 | 225  | 2.3410          |
-| 0.0053        | 12.5  | 250  | 2.4058          |
-| 0.0037        | 13.75 | 275  | 2.4090          |
-| 0.0036        | 15.0  | 300  | 2.4907          |
-| 0.0024        | 16.25 | 325  | 2.5411          |
-| 0.0024        | 17.5  | 350  | 2.5661          |
-| 0.0021        | 18.75 | 375  | 2.5838          |
-| 0.0021        | 20.0  | 400  | 2.5969          |
-| 0.0018        | 21.25 | 425  | 2.6064          |
-| 0.0017        | 22.5  | 450  | 2.6128          |
-| 0.0015        | 23.75 | 475  | 2.6165          |
-| 0.0019        | 25.0  | 500  | 2.6176          |
 ### Framework versions

 This model is a fine-tuned version of [NousResearch/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/NousResearch/Meta-Llama-3.1-8B-Instruct) on the generator dataset.
 It achieves the following results on the evaluation set:
+- Loss: 2.3076
 ## Model description
 ### Training results
+| Training Loss | Epoch   | Step | Validation Loss |
+|:-------------:|:-------:|:----:|:---------------:|
+| 0.146         | 1.4706  | 25   | 2.0147          |
+| 0.1176        | 2.9412  | 50   | 1.8274          |
+| 0.0611        | 4.4118  | 75   | 1.8771          |
+| 0.0417        | 5.8824  | 100  | 1.9553          |
+| 0.0388        | 7.3529  | 125  | 1.8213          |
+| 0.0209        | 8.8235  | 150  | 2.0744          |
+| 0.0198        | 10.2941 | 175  | 2.0470          |
+| 0.0103        | 11.7647 | 200  | 2.1113          |
+| 0.0089        | 13.2353 | 225  | 2.0668          |
+| 0.0062        | 14.7059 | 250  | 2.0936          |
+| 0.0082        | 16.1765 | 275  | 2.0592          |
+| 0.0044        | 17.6471 | 300  | 2.1819          |
+| 0.0025        | 19.1176 | 325  | 2.2406          |
+| 0.0021        | 20.5882 | 350  | 2.2534          |
+| 0.0021        | 22.0588 | 375  | 2.2745          |
+| 0.0018        | 23.5294 | 400  | 2.2877          |
+| 0.0018        | 25.0    | 425  | 2.2974          |
+| 0.0016        | 26.4706 | 450  | 2.3034          |
+| 0.0017        | 27.9412 | 475  | 2.3066          |
+| 0.0016        | 29.4118 | 500  | 2.3076          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -20,13 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
     "gate_proj",
-    "o_proj",
     "up_proj",
     "v_proj",
     "down_proj",
-    "k_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "gate_proj",
     "up_proj",
     "v_proj",
     "down_proj",
+    "o_proj",
+    "k_proj",
+    "q_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1921e999e4db2f18dbe49e05ef1ca86d9ac428277eee7dae829d147957b21fa0
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:af468446596f2485ce1cf44d82f1a518d25977c66e6038ebf583bfc1323835cc
 size 167832240

runs/Sep19_21-25-07_2da8595c7f48/events.out.tfevents.1726781412.2da8595c7f48.30005.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:07c62d62a7e50f218f27652090c64069294f38119cffd089a9c0e7b72768d9ef
+size 36024

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ada2542ec51c15f92754649911fd9f955f4e8905ab09072a796ade592b52c20b
 size 5496

 version https://git-lfs.github.com/spec/v1
+oid sha256:3d7e107c4fb49ed9ad0a36a9ea56389064b74ecd9bd00f19faabf5e86d8c5f55
 size 5496