Update README.md
Browse files
README.md
CHANGED
@@ -1,5 +1,99 @@
|
|
1 |
-
---
|
2 |
-
license: other
|
3 |
-
license_name: other
|
4 |
-
license_link: LICENSE
|
5 |
-
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: other
|
3 |
+
license_name: other
|
4 |
+
license_link: LICENSE
|
5 |
+
---
|
6 |
+
|
7 |
+
Model Mixed by [Reborn Merge Method](https://medium.com/@puffanddmx82/reborn-elevating-model-adaptation-with-merging-for-superior-nlp-performance-f604e8e307b2)
|
8 |
+
|
9 |
+
Keep in mind that the accuracy of your desired questions may vary for this merge.
|
10 |
+
|
11 |
+
Will it be possible to use this merge as a base for future my another merge work?
|
12 |
+
|
13 |
+
I hope this merge model combines information and grammar appropriately so that it doesn't just give strange, nonsensical answers. Then I can make new cool food with the next merge...
|
14 |
+
|
15 |
+
ps : What I am saying above is not to say that each model is strange. It means I could be doing the merge wrong. I hope there is no misunderstanding.
|
16 |
+
|
17 |
+
I am open for the "Collaboration & ETC" if you want
|
18 |
+
|
19 |
+
```
|
20 |
+
Reborn Merge Information
|
21 |
+
|
22 |
+
[models info]
|
23 |
+
reference_model_name = "MLP-KTLim/llama-3-Korean-Bllossom-8B"
|
24 |
+
base_model_name = "NousResearch/Meta-Llama-3-8B-Instruct"
|
25 |
+
target_model_name = "maum-ai/Llama-3-MAAL-8B-Instruct-v0.1"
|
26 |
+
|
27 |
+
[interpolating mismatch part vocab]
|
28 |
+
Interpolating tensor 'model.embed_tokens.weight' to match the shape: torch.Size([145088, 4096]) vs torch.Size([128256, 4096])
|
29 |
+
Interpolating tensor 'lm_head.weight' to match the shape: torch.Size([145088, 4096]) vs torch.Size([128256, 4096])
|
30 |
+
Interpolating tensor 'model.embed_tokens.weight' to match the shape: torch.Size([128256, 4096]) vs torch.Size([128257, 4096])
|
31 |
+
Interpolating tensor 'lm_head.weight' to match the shape: torch.Size([128256, 4096]) vs torch.Size([128257, 4096])
|
32 |
+
```
|
33 |
+
|
34 |
+
Ollama Create
|
35 |
+
```
|
36 |
+
jaylee@lees-MacBook-Pro-2 % ./ollama create Joah -f ./gguf/Joah-Llama-3-MAAL-MLP-KoEn-8B-Reborn/Modelfile_Q5_K_M
|
37 |
+
transferring model data
|
38 |
+
creating model layer
|
39 |
+
creating template layer
|
40 |
+
creating system layer
|
41 |
+
creating parameters layer
|
42 |
+
creating config layer
|
43 |
+
using already created layer sha256:4eadb53f0c70683aeab133c60d76b8ffc9f41ca5d49524d4b803c19e5ce7e3a5
|
44 |
+
using already created layer sha256:8ab4849b038cf0abc5b1c9b8ee1443dca6b93a045c2272180d985126eb40bf6f
|
45 |
+
writing layer sha256:ae2974c64ea5d6f488eeb1b10717a270f48fb3452432589db6f5e60472ae96ac
|
46 |
+
writing layer sha256:74ef6315972b317734fe01e7e1ad5b49fce1fa8ed3978cb66501ecb8c3a2e984
|
47 |
+
writing layer sha256:83882a5e957b8ce0d454f26bcedb2819413b49d6b967b28d60edb8ac61edfa58
|
48 |
+
writing manifest
|
49 |
+
success
|
50 |
+
```
|
51 |
+
|
52 |
+
MODELFILE
|
53 |
+
```
|
54 |
+
FROM joah-llama-3-maal-mlp-koen-8b-reborn-Q5_K_M.gguf
|
55 |
+
TEMPLATE """{{ if .System }}<|start_header_id|>system<|end_header_id|>
|
56 |
+
|
57 |
+
{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>
|
58 |
+
|
59 |
+
{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>
|
60 |
+
|
61 |
+
{{ .Response }}<|eot_id|>"""
|
62 |
+
|
63 |
+
|
64 |
+
SYSTEM """
|
65 |
+
μΉμ ν μ±λ΄μΌλ‘μ μλλ°©μ μμ²μ μ΅λν μμΈνκ³ μΉμ νκ² λ΅νμ. λͺ¨λ λλ΅μ νκ΅μ΄(Korean)μΌλ‘ λλ΅ν΄μ€.
|
66 |
+
"""
|
67 |
+
|
68 |
+
PARAMETER num_keep 24
|
69 |
+
PARAMETER temperature 0.7
|
70 |
+
PARAMETER num_predict 3000
|
71 |
+
PARAMETER stop "<|start_header_id|>"
|
72 |
+
PARAMETER stop "<|end_header_id|>"
|
73 |
+
PARAMETER stop "<|eot_id|>"
|
74 |
+
```
|
75 |
+
|
76 |
+
## Citation
|
77 |
+
**Language Model**
|
78 |
+
```text
|
79 |
+
@misc{bllossom,
|
80 |
+
author = {ChangSu Choi, Yongbin Jeong, Seoyoon Park, InHo Won, HyeonSeok Lim, SangMin Kim, Yejee Kang, Chanhyuk Yoon, Jaewan Park, Yiseul Lee, HyeJin Lee, Younggyun Hahm, Hansaem Kim, KyungTae Lim},
|
81 |
+
title = {Optimizing Language Augmentation for Multilingual Large Language Models: A Case Study on Korean},
|
82 |
+
year = {2024},
|
83 |
+
journal = {LREC-COLING 2024},
|
84 |
+
paperLink = {\url{https://arxiv.org/pdf/2403.10882}},
|
85 |
+
},
|
86 |
+
}
|
87 |
+
|
88 |
+
@article{llama3modelcard,
|
89 |
+
|
90 |
+
title={Llama 3 Model Card},
|
91 |
+
|
92 |
+
author={AI@Meta},
|
93 |
+
|
94 |
+
year={2024},
|
95 |
+
|
96 |
+
url = {https://github.com/meta-llama/llama3/blob/main/MODEL_CARD.md}
|
97 |
+
|
98 |
+
}
|
99 |
+
```
|