ldldld commited on
Commit
014badd
1 Parent(s): b9b4032

ai-maker-space/llama381binstruct_summarize_short

Browse files
README.md CHANGED
@@ -20,7 +20,7 @@ should probably proofread and complete it, then remove this comment. -->
20
 
21
  This model is a fine-tuned version of [NousResearch/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/NousResearch/Meta-Llama-3.1-8B-Instruct) on the generator dataset.
22
  It achieves the following results on the evaluation set:
23
- - Loss: 2.6176
24
 
25
  ## Model description
26
 
@@ -50,28 +50,28 @@ The following hyperparameters were used during training:
50
 
51
  ### Training results
52
 
53
- | Training Loss | Epoch | Step | Validation Loss |
54
- |:-------------:|:-----:|:----:|:---------------:|
55
- | 1.6519 | 1.25 | 25 | 1.8402 |
56
- | 0.7039 | 2.5 | 50 | 1.7272 |
57
- | 0.3793 | 3.75 | 75 | 1.7206 |
58
- | 0.1875 | 5.0 | 100 | 1.7496 |
59
- | 0.115 | 6.25 | 125 | 1.9070 |
60
- | 0.027 | 7.5 | 150 | 2.2242 |
61
- | 0.0211 | 8.75 | 175 | 2.2373 |
62
- | 0.0227 | 10.0 | 200 | 2.3110 |
63
- | 0.0138 | 11.25 | 225 | 2.3410 |
64
- | 0.0053 | 12.5 | 250 | 2.4058 |
65
- | 0.0037 | 13.75 | 275 | 2.4090 |
66
- | 0.0036 | 15.0 | 300 | 2.4907 |
67
- | 0.0024 | 16.25 | 325 | 2.5411 |
68
- | 0.0024 | 17.5 | 350 | 2.5661 |
69
- | 0.0021 | 18.75 | 375 | 2.5838 |
70
- | 0.0021 | 20.0 | 400 | 2.5969 |
71
- | 0.0018 | 21.25 | 425 | 2.6064 |
72
- | 0.0017 | 22.5 | 450 | 2.6128 |
73
- | 0.0015 | 23.75 | 475 | 2.6165 |
74
- | 0.0019 | 25.0 | 500 | 2.6176 |
75
 
76
 
77
  ### Framework versions
 
20
 
21
  This model is a fine-tuned version of [NousResearch/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/NousResearch/Meta-Llama-3.1-8B-Instruct) on the generator dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 2.3076
24
 
25
  ## Model description
26
 
 
50
 
51
  ### Training results
52
 
53
+ | Training Loss | Epoch | Step | Validation Loss |
54
+ |:-------------:|:-------:|:----:|:---------------:|
55
+ | 0.146 | 1.4706 | 25 | 2.0147 |
56
+ | 0.1176 | 2.9412 | 50 | 1.8274 |
57
+ | 0.0611 | 4.4118 | 75 | 1.8771 |
58
+ | 0.0417 | 5.8824 | 100 | 1.9553 |
59
+ | 0.0388 | 7.3529 | 125 | 1.8213 |
60
+ | 0.0209 | 8.8235 | 150 | 2.0744 |
61
+ | 0.0198 | 10.2941 | 175 | 2.0470 |
62
+ | 0.0103 | 11.7647 | 200 | 2.1113 |
63
+ | 0.0089 | 13.2353 | 225 | 2.0668 |
64
+ | 0.0062 | 14.7059 | 250 | 2.0936 |
65
+ | 0.0082 | 16.1765 | 275 | 2.0592 |
66
+ | 0.0044 | 17.6471 | 300 | 2.1819 |
67
+ | 0.0025 | 19.1176 | 325 | 2.2406 |
68
+ | 0.0021 | 20.5882 | 350 | 2.2534 |
69
+ | 0.0021 | 22.0588 | 375 | 2.2745 |
70
+ | 0.0018 | 23.5294 | 400 | 2.2877 |
71
+ | 0.0018 | 25.0 | 425 | 2.2974 |
72
+ | 0.0016 | 26.4706 | 450 | 2.3034 |
73
+ | 0.0017 | 27.9412 | 475 | 2.3066 |
74
+ | 0.0016 | 29.4118 | 500 | 2.3076 |
75
 
76
 
77
  ### Framework versions
adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "q_proj",
24
  "gate_proj",
25
- "o_proj",
26
  "up_proj",
27
  "v_proj",
28
  "down_proj",
29
- "k_proj"
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "gate_proj",
 
24
  "up_proj",
25
  "v_proj",
26
  "down_proj",
27
+ "o_proj",
28
+ "k_proj",
29
+ "q_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1921e999e4db2f18dbe49e05ef1ca86d9ac428277eee7dae829d147957b21fa0
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af468446596f2485ce1cf44d82f1a518d25977c66e6038ebf583bfc1323835cc
3
  size 167832240
runs/Sep19_21-25-07_2da8595c7f48/events.out.tfevents.1726781412.2da8595c7f48.30005.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07c62d62a7e50f218f27652090c64069294f38119cffd089a9c0e7b72768d9ef
3
+ size 36024
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ada2542ec51c15f92754649911fd9f955f4e8905ab09072a796ade592b52c20b
3
  size 5496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d7e107c4fb49ed9ad0a36a9ea56389064b74ecd9bd00f19faabf5e86d8c5f55
3
  size 5496