Create README.md
Browse files
README.md
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
4bit AWQ version of the [lightblue/Karasu-Mixtral-8x22B-v0.1](https://huggingface.co/lightblue/Karasu-Mixtral-8x22B-v0.1) model.
|
2 |
+
|
3 |
+
Quantized using the following code:
|
4 |
+
|
5 |
+
```python
|
6 |
+
from awq import AutoAWQForCausalLM
|
7 |
+
import pandas as pd
|
8 |
+
from transformers import AutoTokenizer
|
9 |
+
from tqdm.auto import tqdm
|
10 |
+
|
11 |
+
pretrained_model_dir = '/workspace/llm_training/axolotl/mixtral_8x22B_training/merged_model_multiling'
|
12 |
+
quantized_model_dir = '/workspace/llm_training/axolotl/mixtral_8x22B_training/merged_model_multiling-awq'
|
13 |
+
|
14 |
+
# The samne dataset as in lightblue/gpt4_conversations_multilingual
|
15 |
+
df = pd.read_json(
|
16 |
+
"/workspace/llm_training/axolotl/mixtral_8x22B_training/sharegpt4_multilingual.json",
|
17 |
+
lines=True)
|
18 |
+
|
19 |
+
role_map = {
|
20 |
+
"human": "user",
|
21 |
+
"gpt": "assistant",
|
22 |
+
}
|
23 |
+
|
24 |
+
df["messages"] = df.conversations.apply(lambda x: [{"role": role_map[y["from"]], "content": y["value"]} for y in x])
|
25 |
+
|
26 |
+
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_dir, use_fast=True)
|
27 |
+
examples = [
|
28 |
+
tokenizer.apply_chat_template(
|
29 |
+
x, tokenize=False, add_generation_prompt=False
|
30 |
+
) for x in tqdm(df["messages"])
|
31 |
+
]
|
32 |
+
|
33 |
+
model_path = '/workspace/llm_training/axolotl/mixtral_8x22B_training/merged_model_multiling'
|
34 |
+
quant_path = '/workspace/llm_training/axolotl/mixtral_8x22B_training/merged_model_multiling-awq'
|
35 |
+
quant_config = { "zero_point": True, "q_group_size": 128, "w_bit": 4, "version": "GEMM" }
|
36 |
+
|
37 |
+
# Load model
|
38 |
+
model = AutoAWQForCausalLM.from_pretrained(model_path)
|
39 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
40 |
+
|
41 |
+
# Quantize
|
42 |
+
model.quantize(tokenizer, quant_config=quant_config, calib_data=examples)
|
43 |
+
|
44 |
+
# Save quantized model
|
45 |
+
model.save_quantized(quant_path)
|
46 |
+
tokenizer.save_pretrained(quant_path)
|
47 |
+
|
48 |
+
```
|