Upload folder using huggingface_hub
Browse files- llm_config.py +727 -0
llm_config.py
ADDED
@@ -0,0 +1,727 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
DEFAULT_SYSTEM_PROMPT = """\
|
2 |
+
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
|
3 |
+
If a question does not make any sense or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\
|
4 |
+
"""
|
5 |
+
|
6 |
+
DEFAULT_SYSTEM_PROMPT_CHINESE = """\
|
7 |
+
你是一个乐于助人、尊重他人以及诚实可靠的助手。在安全的情况下,始终尽可能有帮助地回答。 您的回答不应包含任何有害、不道德、种族主义、性别歧视、有毒、危险或非法的内容。请确保您的回答在社会上是公正的和积极的。
|
8 |
+
如果一个问题没有任何意义或与事实不符,请解释原因,而不是回答错误的问题。如果您不知道问题的答案,请不要分享虚假信息。另外,答案请使用中文。\
|
9 |
+
"""
|
10 |
+
|
11 |
+
DEFAULT_SYSTEM_PROMPT_JAPANESE = """\
|
12 |
+
あなたは親切で、礼儀正しく、誠実なアシスタントです。 常に安全を保ちながら、できるだけ役立つように答えてください。 回答には、有害、非倫理的、人種差別的、性差別的、有毒、危険、または違法なコンテンツを含めてはいけません。 回答は社会的に偏見がなく、本質的に前向きなものであることを確認してください。
|
13 |
+
質問が意味をなさない場合、または事実に一貫性がない場合は、正しくないことに答えるのではなく、その理由を説明してください。 質問の答えがわからない場合は、誤った情報を共有しないでください。\
|
14 |
+
"""
|
15 |
+
|
16 |
+
DEFAULT_RAG_PROMPT = """\
|
17 |
+
You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\
|
18 |
+
"""
|
19 |
+
|
20 |
+
DEFAULT_RAG_PROMPT_CHINESE = """\
|
21 |
+
基于以下已知信息,请简洁并专业地回答用户的问题。如果无法从中得到答案,请说 "根据已知信息无法回答该问题" 或 "没有提供足够的相关信息"。不允许在答案中添加编造成分。另外,答案请使用中文。\
|
22 |
+
"""
|
23 |
+
|
24 |
+
|
25 |
+
def red_pijama_partial_text_processor(partial_text, new_text):
|
26 |
+
if new_text == "<":
|
27 |
+
return partial_text
|
28 |
+
|
29 |
+
partial_text += new_text
|
30 |
+
return partial_text.split("<bot>:")[-1]
|
31 |
+
|
32 |
+
|
33 |
+
def llama_partial_text_processor(partial_text, new_text):
|
34 |
+
new_text = new_text.replace("[INST]", "").replace("[/INST]", "")
|
35 |
+
partial_text += new_text
|
36 |
+
return partial_text
|
37 |
+
|
38 |
+
|
39 |
+
def chatglm_partial_text_processor(partial_text, new_text):
|
40 |
+
new_text = new_text.strip()
|
41 |
+
new_text = new_text.replace("[[训练时间]]", "2023年")
|
42 |
+
partial_text += new_text
|
43 |
+
return partial_text
|
44 |
+
|
45 |
+
|
46 |
+
def youri_partial_text_processor(partial_text, new_text):
|
47 |
+
new_text = new_text.replace("システム:", "")
|
48 |
+
partial_text += new_text
|
49 |
+
return partial_text
|
50 |
+
|
51 |
+
|
52 |
+
def internlm_partial_text_processor(partial_text, new_text):
|
53 |
+
partial_text += new_text
|
54 |
+
return partial_text.split("<|im_end|>")[0]
|
55 |
+
|
56 |
+
|
57 |
+
def phi_completion_to_prompt(completion):
|
58 |
+
return f"<|system|><|end|><|user|>{completion}<|end|><|assistant|>\n"
|
59 |
+
|
60 |
+
|
61 |
+
def llama3_completion_to_prompt(completion):
|
62 |
+
return f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{completion}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
|
63 |
+
|
64 |
+
|
65 |
+
def qwen_completion_to_prompt(completion):
|
66 |
+
return f"<|im_start|>system\n<|im_end|>\n<|im_start|>user\n{completion}<|im_end|>\n<|im_start|>assistant\n"
|
67 |
+
|
68 |
+
|
69 |
+
SUPPORTED_LLM_MODELS = {
|
70 |
+
"English": {
|
71 |
+
"qwen2-0.5b-instruct": {
|
72 |
+
"model_id": "Qwen/Qwen2-0.5B-Instruct",
|
73 |
+
"remote_code": False,
|
74 |
+
"start_message": DEFAULT_SYSTEM_PROMPT,
|
75 |
+
"stop_tokens": ["<|im_end|>", "<|endoftext|>"],
|
76 |
+
"completion_to_prompt": qwen_completion_to_prompt,
|
77 |
+
},
|
78 |
+
"tiny-llama-1b-chat": {
|
79 |
+
"model_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
80 |
+
"remote_code": False,
|
81 |
+
"start_message": f"<|system|>\n{DEFAULT_SYSTEM_PROMPT}</s>\n",
|
82 |
+
"history_template": "<|user|>\n{user}</s> \n<|assistant|>\n{assistant}</s> \n",
|
83 |
+
"current_message_template": "<|user|>\n{user}</s> \n<|assistant|>\n{assistant}",
|
84 |
+
"rag_prompt_template": f"""<|system|> {DEFAULT_RAG_PROMPT }</s>"""
|
85 |
+
+ """
|
86 |
+
<|user|>
|
87 |
+
Question: {input}
|
88 |
+
Context: {context}
|
89 |
+
Answer: </s>
|
90 |
+
<|assistant|>""",
|
91 |
+
},
|
92 |
+
"llama-3.2-1b-instruct": {
|
93 |
+
"model_id": "meta-llama/Llama-3.2-1B-Instruct",
|
94 |
+
"start_message": DEFAULT_SYSTEM_PROMPT,
|
95 |
+
"stop_tokens": ["<|eot_id|>"],
|
96 |
+
"has_chat_template": True,
|
97 |
+
"start_message": " <|start_header_id|>system<|end_header_id|>\n\n" + DEFAULT_SYSTEM_PROMPT + "<|eot_id|>",
|
98 |
+
"history_template": "<|start_header_id|>user<|end_header_id|>\n\n{user}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{assistant}<|eot_id|>",
|
99 |
+
"current_message_template": "<|start_header_id|>user<|end_header_id|>\n\n{user}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{assistant}",
|
100 |
+
"rag_prompt_template": f"<|start_header_id|>system<|end_header_id|>\n\n{DEFAULT_RAG_PROMPT}<|eot_id|>"
|
101 |
+
+ """<|start_header_id|>user<|end_header_id|>
|
102 |
+
|
103 |
+
|
104 |
+
Question: {input}
|
105 |
+
Context: {context}
|
106 |
+
Answer:<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
107 |
+
|
108 |
+
|
109 |
+
""",
|
110 |
+
"completion_to_prompt": llama3_completion_to_prompt,
|
111 |
+
},
|
112 |
+
"llama-3.2-3b-instruct": {
|
113 |
+
"model_id": "meta-llama/Llama-3.2-3B-Instruct",
|
114 |
+
"start_message": DEFAULT_SYSTEM_PROMPT,
|
115 |
+
"stop_tokens": ["<|eot_id|>"],
|
116 |
+
"has_chat_template": True,
|
117 |
+
"start_message": " <|start_header_id|>system<|end_header_id|>\n\n" + DEFAULT_SYSTEM_PROMPT + "<|eot_id|>",
|
118 |
+
"history_template": "<|start_header_id|>user<|end_header_id|>\n\n{user}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{assistant}<|eot_id|>",
|
119 |
+
"current_message_template": "<|start_header_id|>user<|end_header_id|>\n\n{user}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{assistant}",
|
120 |
+
"rag_prompt_template": f"<|start_header_id|>system<|end_header_id|>\n\n{DEFAULT_RAG_PROMPT}<|eot_id|>"
|
121 |
+
+ """<|start_header_id|>user<|end_header_id|>
|
122 |
+
|
123 |
+
|
124 |
+
Question: {input}
|
125 |
+
Context: {context}
|
126 |
+
Answer:<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
127 |
+
|
128 |
+
|
129 |
+
""",
|
130 |
+
"completion_to_prompt": llama3_completion_to_prompt,
|
131 |
+
},
|
132 |
+
"qwen2-1.5b-instruct": {
|
133 |
+
"model_id": "Qwen/Qwen2-1.5B-Instruct",
|
134 |
+
"remote_code": False,
|
135 |
+
"start_message": DEFAULT_SYSTEM_PROMPT,
|
136 |
+
"stop_tokens": ["<|im_end|>", "<|endoftext|>"],
|
137 |
+
"completion_to_prompt": qwen_completion_to_prompt,
|
138 |
+
},
|
139 |
+
"gemma-2b-it": {
|
140 |
+
"model_id": "google/gemma-2b-it",
|
141 |
+
"remote_code": False,
|
142 |
+
"start_message": DEFAULT_SYSTEM_PROMPT + ", ",
|
143 |
+
"history_template": "<start_of_turn>user{user}<end_of_turn><start_of_turn>model{assistant}<end_of_turn>",
|
144 |
+
"current_message_template": "<start_of_turn>user{user}<end_of_turn><start_of_turn>model{assistant}",
|
145 |
+
"rag_prompt_template": f"""{DEFAULT_RAG_PROMPT},"""
|
146 |
+
+ """<start_of_turn>user{input}<end_of_turn><start_of_turn>context{context}<end_of_turn><start_of_turn>model""",
|
147 |
+
},
|
148 |
+
"gemma-2-2b-it": {
|
149 |
+
"model_id": "google/gemma-2-2b-it",
|
150 |
+
"remote_code": False,
|
151 |
+
"start_message": DEFAULT_SYSTEM_PROMPT + ", ",
|
152 |
+
"history_template": "<start_of_turn>user{user}<end_of_turn><start_of_turn>model{assistant}<end_of_turn>",
|
153 |
+
"current_message_template": "<start_of_turn>user{user}<end_of_turn><start_of_turn>model{assistant}",
|
154 |
+
"rag_prompt_template": f"""{DEFAULT_RAG_PROMPT},"""
|
155 |
+
+ """<start_of_turn>user{input}<end_of_turn><start_of_turn>context{context}<end_of_turn><start_of_turn>model""",
|
156 |
+
},
|
157 |
+
"red-pajama-3b-chat": {
|
158 |
+
"model_id": "togethercomputer/RedPajama-INCITE-Chat-3B-v1",
|
159 |
+
"remote_code": False,
|
160 |
+
"start_message": "",
|
161 |
+
"history_template": "\n<human>:{user}\n<bot>:{assistant}",
|
162 |
+
"stop_tokens": [29, 0],
|
163 |
+
"partial_text_processor": red_pijama_partial_text_processor,
|
164 |
+
"current_message_template": "\n<human>:{user}\n<bot>:{assistant}",
|
165 |
+
"rag_prompt_template": f"""{DEFAULT_RAG_PROMPT }"""
|
166 |
+
+ """
|
167 |
+
<human>: Question: {input}
|
168 |
+
Context: {context}
|
169 |
+
Answer: <bot>""",
|
170 |
+
},
|
171 |
+
"qwen2-7b-instruct": {
|
172 |
+
"model_id": "Qwen/Qwen2-7B-Instruct",
|
173 |
+
"remote_code": False,
|
174 |
+
"start_message": DEFAULT_SYSTEM_PROMPT + ", ",
|
175 |
+
"rag_prompt_template": f"""<|im_start|>system
|
176 |
+
{DEFAULT_RAG_PROMPT }<|im_end|>"""
|
177 |
+
+ """
|
178 |
+
<|im_start|>user
|
179 |
+
Question: {input}
|
180 |
+
Context: {context}
|
181 |
+
Answer: <|im_end|>
|
182 |
+
<|im_start|>assistant
|
183 |
+
""",
|
184 |
+
"completion_to_prompt": qwen_completion_to_prompt,
|
185 |
+
},
|
186 |
+
"gemma-7b-it": {
|
187 |
+
"model_id": "google/gemma-7b-it",
|
188 |
+
"remote_code": False,
|
189 |
+
"start_message": DEFAULT_SYSTEM_PROMPT + ", ",
|
190 |
+
"history_template": "<start_of_turn>user{user}<end_of_turn><start_of_turn>model{assistant}<end_of_turn>",
|
191 |
+
"current_message_template": "<start_of_turn>user{user}<end_of_turn><start_of_turn>model{assistant}",
|
192 |
+
"rag_prompt_template": f"""{DEFAULT_RAG_PROMPT},"""
|
193 |
+
+ """<start_of_turn>user{input}<end_of_turn><start_of_turn>context{context}<end_of_turn><start_of_turn>model""",
|
194 |
+
},
|
195 |
+
"gemma-2-9b-it": {
|
196 |
+
"model_id": "google/gemma-2-9b-it",
|
197 |
+
"remote_code": False,
|
198 |
+
"start_message": DEFAULT_SYSTEM_PROMPT + ", ",
|
199 |
+
"history_template": "<start_of_turn>user{user}<end_of_turn><start_of_turn>model{assistant}<end_of_turn>",
|
200 |
+
"current_message_template": "<start_of_turn>user{user}<end_of_turn><start_of_turn>model{assistant}",
|
201 |
+
"rag_prompt_template": f"""{DEFAULT_RAG_PROMPT},"""
|
202 |
+
+ """<start_of_turn>user{input}<end_of_turn><start_of_turn>context{context}<end_of_turn><start_of_turn>model""",
|
203 |
+
},
|
204 |
+
"llama-2-chat-7b": {
|
205 |
+
"model_id": "meta-llama/Llama-2-7b-chat-hf",
|
206 |
+
"remote_code": False,
|
207 |
+
"start_message": f"<s>[INST] <<SYS>>\n{DEFAULT_SYSTEM_PROMPT }\n<</SYS>>\n\n",
|
208 |
+
"history_template": "{user}[/INST]{assistant}</s><s>[INST]",
|
209 |
+
"current_message_template": "{user} [/INST]{assistant}",
|
210 |
+
"tokenizer_kwargs": {"add_special_tokens": False},
|
211 |
+
"partial_text_processor": llama_partial_text_processor,
|
212 |
+
"rag_prompt_template": f"""[INST]Human: <<SYS>> {DEFAULT_RAG_PROMPT }<</SYS>>"""
|
213 |
+
+ """
|
214 |
+
Question: {input}
|
215 |
+
Context: {context}
|
216 |
+
Answer: [/INST]""",
|
217 |
+
},
|
218 |
+
"llama-3-8b-instruct": {
|
219 |
+
"model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
|
220 |
+
"remote_code": False,
|
221 |
+
"start_message": DEFAULT_SYSTEM_PROMPT,
|
222 |
+
"stop_tokens": ["<|eot_id|>", "<|end_of_text|>"],
|
223 |
+
"has_chat_template": True,
|
224 |
+
"start_message": " <|start_header_id|>system<|end_header_id|>\n\n" + DEFAULT_SYSTEM_PROMPT + "<|eot_id|>",
|
225 |
+
"history_template": "<|start_header_id|>user<|end_header_id|>\n\n{user}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{assistant}<|eot_id|>",
|
226 |
+
"current_message_template": "<|start_header_id|>user<|end_header_id|>\n\n{user}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{assistant}",
|
227 |
+
"rag_prompt_template": f"<|start_header_id|>system<|end_header_id|>\n\n{DEFAULT_RAG_PROMPT}<|eot_id|>"
|
228 |
+
+ """<|start_header_id|>user<|end_header_id|>
|
229 |
+
|
230 |
+
|
231 |
+
Question: {input}
|
232 |
+
Context: {context}
|
233 |
+
Answer:<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
234 |
+
|
235 |
+
|
236 |
+
""",
|
237 |
+
"completion_to_prompt": llama3_completion_to_prompt,
|
238 |
+
},
|
239 |
+
"llama-3.1-8b-instruct": {
|
240 |
+
"model_id": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
241 |
+
"remote_code": False,
|
242 |
+
"start_message": DEFAULT_SYSTEM_PROMPT,
|
243 |
+
"stop_tokens": ["<|eot_id|>", "<|end_of_text|>"],
|
244 |
+
"has_chat_template": True,
|
245 |
+
"start_message": " <|start_header_id|>system<|end_header_id|>\n\n" + DEFAULT_SYSTEM_PROMPT + "<|eot_id|>",
|
246 |
+
"history_template": "<|start_header_id|>user<|end_header_id|>\n\n{user}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{assistant}<|eot_id|>",
|
247 |
+
"current_message_template": "<|start_header_id|>user<|end_header_id|>\n\n{user}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{assistant}",
|
248 |
+
"rag_prompt_template": f"<|start_header_id|>system<|end_header_id|>\n\n{DEFAULT_RAG_PROMPT}<|eot_id|>"
|
249 |
+
+ """<|start_header_id|>user<|end_header_id|>
|
250 |
+
|
251 |
+
|
252 |
+
Question: {input}
|
253 |
+
Context: {context}
|
254 |
+
Answer:<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
255 |
+
|
256 |
+
|
257 |
+
""",
|
258 |
+
"completion_to_prompt": llama3_completion_to_prompt,
|
259 |
+
},
|
260 |
+
"mistral-7b-instruct": {
|
261 |
+
"model_id": "mistralai/Mistral-7B-Instruct-v0.1",
|
262 |
+
"remote_code": False,
|
263 |
+
"start_message": f"<s>[INST] <<SYS>>\n{DEFAULT_SYSTEM_PROMPT }\n<</SYS>>\n\n",
|
264 |
+
"history_template": "{user}[/INST]{assistant}</s><s>[INST]",
|
265 |
+
"current_message_template": "{user} [/INST]{assistant}",
|
266 |
+
"tokenizer_kwargs": {"add_special_tokens": False},
|
267 |
+
"partial_text_processor": llama_partial_text_processor,
|
268 |
+
"rag_prompt_template": f"""<s> [INST] {DEFAULT_RAG_PROMPT } [/INST] </s>"""
|
269 |
+
+ """
|
270 |
+
[INST] Question: {input}
|
271 |
+
Context: {context}
|
272 |
+
Answer: [/INST]""",
|
273 |
+
},
|
274 |
+
"zephyr-7b-beta": {
|
275 |
+
"model_id": "HuggingFaceH4/zephyr-7b-beta",
|
276 |
+
"remote_code": False,
|
277 |
+
"start_message": f"<|system|>\n{DEFAULT_SYSTEM_PROMPT}</s>\n",
|
278 |
+
"history_template": "<|user|>\n{user}</s> \n<|assistant|>\n{assistant}</s> \n",
|
279 |
+
"current_message_template": "<|user|>\n{user}</s> \n<|assistant|>\n{assistant}",
|
280 |
+
"rag_prompt_template": f"""<|system|> {DEFAULT_RAG_PROMPT }</s>"""
|
281 |
+
+ """
|
282 |
+
<|user|>
|
283 |
+
Question: {input}
|
284 |
+
Context: {context}
|
285 |
+
Answer: </s>
|
286 |
+
<|assistant|>""",
|
287 |
+
},
|
288 |
+
"notus-7b-v1": {
|
289 |
+
"model_id": "argilla/notus-7b-v1",
|
290 |
+
"remote_code": False,
|
291 |
+
"start_message": f"<|system|>\n{DEFAULT_SYSTEM_PROMPT}</s>\n",
|
292 |
+
"history_template": "<|user|>\n{user}</s> \n<|assistant|>\n{assistant}</s> \n",
|
293 |
+
"current_message_template": "<|user|>\n{user}</s> \n<|assistant|>\n{assistant}",
|
294 |
+
"rag_prompt_template": f"""<|system|> {DEFAULT_RAG_PROMPT }</s>"""
|
295 |
+
+ """
|
296 |
+
<|user|>
|
297 |
+
Question: {input}
|
298 |
+
Context: {context}
|
299 |
+
Answer: </s>
|
300 |
+
<|assistant|>""",
|
301 |
+
},
|
302 |
+
"neural-chat-7b-v3-3": {
|
303 |
+
"model_id": "Intel/neural-chat-7b-v3-3",
|
304 |
+
"remote_code": False,
|
305 |
+
"start_message": f"<s>[INST] <<SYS>>\n{DEFAULT_SYSTEM_PROMPT }\n<</SYS>>\n\n",
|
306 |
+
"history_template": "{user}[/INST]{assistant}</s><s>[INST]",
|
307 |
+
"current_message_template": "{user} [/INST]{assistant}",
|
308 |
+
"tokenizer_kwargs": {"add_special_tokens": False},
|
309 |
+
"partial_text_processor": llama_partial_text_processor,
|
310 |
+
"rag_prompt_template": f"""<s> [INST] {DEFAULT_RAG_PROMPT } [/INST] </s>"""
|
311 |
+
+ """
|
312 |
+
[INST] Question: {input}
|
313 |
+
Context: {context}
|
314 |
+
Answer: [/INST]""",
|
315 |
+
},
|
316 |
+
"phi-3-mini-instruct": {
|
317 |
+
"model_id": "microsoft/Phi-3-mini-4k-instruct",
|
318 |
+
"remote_code": True,
|
319 |
+
"start_message": "<|system|>\n{DEFAULT_SYSTEM_PROMPT}<|end|>\n",
|
320 |
+
"history_template": "<|user|>\n{user}<|end|> \n<|assistant|>\n{assistant}<|end|>\n",
|
321 |
+
"current_message_template": "<|user|>\n{user}<|end|> \n<|assistant|>\n{assistant}",
|
322 |
+
"stop_tokens": ["<|end|>"],
|
323 |
+
"rag_prompt_template": f"""<|system|> {DEFAULT_RAG_PROMPT }<|end|>"""
|
324 |
+
+ """
|
325 |
+
<|user|>
|
326 |
+
Question: {input}
|
327 |
+
Context: {context}
|
328 |
+
Answer: <|end|>
|
329 |
+
<|assistant|>""",
|
330 |
+
"completion_to_prompt": phi_completion_to_prompt,
|
331 |
+
},
|
332 |
+
"phi-3.5-mini-instruct": {
|
333 |
+
"model_id": "microsoft/Phi-3.5-mini-instruct",
|
334 |
+
"remote_code": True,
|
335 |
+
"start_message": "<|system|>\n{DEFAULT_SYSTEM_PROMPT}<|end|>\n",
|
336 |
+
"history_template": "<|user|>\n{user}<|end|> \n<|assistant|>\n{assistant}<|end|>\n",
|
337 |
+
"current_message_template": "<|user|>\n{user}<|end|> \n<|assistant|>\n{assistant}",
|
338 |
+
"stop_tokens": ["<|end|>"],
|
339 |
+
"rag_prompt_template": f"""<|system|> {DEFAULT_RAG_PROMPT }<|end|>"""
|
340 |
+
+ """
|
341 |
+
<|user|>
|
342 |
+
Question: {input}
|
343 |
+
Context: {context}
|
344 |
+
Answer: <|end|>
|
345 |
+
<|assistant|>""",
|
346 |
+
"completion_to_prompt": phi_completion_to_prompt,
|
347 |
+
},
|
348 |
+
},
|
349 |
+
"Chinese": {
|
350 |
+
"qwen2-0.5b-instruct": {
|
351 |
+
"model_id": "Qwen/Qwen2-0.5B-Instruct",
|
352 |
+
"remote_code": False,
|
353 |
+
"start_message": DEFAULT_SYSTEM_PROMPT_CHINESE,
|
354 |
+
"stop_tokens": ["<|im_end|>", "<|endoftext|>"],
|
355 |
+
"completion_to_prompt": qwen_completion_to_prompt,
|
356 |
+
},
|
357 |
+
"qwen2-1.5b-instruct": {
|
358 |
+
"model_id": "Qwen/Qwen2-1.5B-Instruct",
|
359 |
+
"remote_code": False,
|
360 |
+
"start_message": DEFAULT_SYSTEM_PROMPT_CHINESE,
|
361 |
+
"stop_tokens": ["<|im_end|>", "<|endoftext|>"],
|
362 |
+
"completion_to_prompt": qwen_completion_to_prompt,
|
363 |
+
},
|
364 |
+
"qwen2-7b-instruct": {
|
365 |
+
"model_id": "Qwen/Qwen2-7B-Instruct",
|
366 |
+
"remote_code": False,
|
367 |
+
"stop_tokens": ["<|im_end|>", "<|endoftext|>"],
|
368 |
+
"start_message": DEFAULT_SYSTEM_PROMPT_CHINESE,
|
369 |
+
"rag_prompt_template": f"""<|im_start|>system
|
370 |
+
{DEFAULT_RAG_PROMPT_CHINESE }<|im_end|>"""
|
371 |
+
+ """
|
372 |
+
<|im_start|>user
|
373 |
+
问题: {input}
|
374 |
+
已知内容: {context}
|
375 |
+
回答: <|im_end|>
|
376 |
+
<|im_start|>assistant
|
377 |
+
""",
|
378 |
+
"completion_to_prompt": qwen_completion_to_prompt,
|
379 |
+
},
|
380 |
+
"qwen-7b-chat": {
|
381 |
+
"model_id": "Qwen/Qwen-7B-Chat",
|
382 |
+
"remote_code": True,
|
383 |
+
"start_message": f"<|im_start|>system\n {DEFAULT_SYSTEM_PROMPT_CHINESE }<|im_end|>",
|
384 |
+
"history_template": "<|im_start|>user\n{user}<im_end><|im_start|>assistant\n{assistant}<|im_end|>",
|
385 |
+
"current_message_template": '"<|im_start|>user\n{user}<im_end><|im_start|>assistant\n{assistant}',
|
386 |
+
"stop_tokens": ["<|im_end|>", "<|endoftext|>"],
|
387 |
+
"revision": "2abd8e5777bb4ce9c8ab4be7dbbd0fe4526db78d",
|
388 |
+
"rag_prompt_template": f"""<|im_start|>system
|
389 |
+
{DEFAULT_RAG_PROMPT_CHINESE }<|im_end|>"""
|
390 |
+
+ """
|
391 |
+
<|im_start|>user
|
392 |
+
问题: {input}
|
393 |
+
已知内容: {context}
|
394 |
+
回答: <|im_end|>
|
395 |
+
<|im_start|>assistant
|
396 |
+
""",
|
397 |
+
},
|
398 |
+
"chatglm3-6b": {
|
399 |
+
"model_id": "THUDM/chatglm3-6b",
|
400 |
+
"remote_code": True,
|
401 |
+
"start_message": DEFAULT_SYSTEM_PROMPT_CHINESE,
|
402 |
+
"tokenizer_kwargs": {"add_special_tokens": False},
|
403 |
+
"rag_prompt_template": f"""{DEFAULT_RAG_PROMPT_CHINESE }"""
|
404 |
+
+ """
|
405 |
+
问题: {input}
|
406 |
+
已知内容: {context}
|
407 |
+
回答:
|
408 |
+
""",
|
409 |
+
},
|
410 |
+
"glm-4-9b-chat": {
|
411 |
+
"model_id": "THUDM/glm-4-9b-chat",
|
412 |
+
"remote_code": True,
|
413 |
+
"start_message": DEFAULT_SYSTEM_PROMPT_CHINESE,
|
414 |
+
"tokenizer_kwargs": {"add_special_tokens": False},
|
415 |
+
"rag_prompt_template": f"""{DEFAULT_RAG_PROMPT_CHINESE }"""
|
416 |
+
+ """
|
417 |
+
问题: {input}
|
418 |
+
已知内容: {context}
|
419 |
+
回答:
|
420 |
+
""",
|
421 |
+
},
|
422 |
+
"baichuan2-7b-chat": {
|
423 |
+
"model_id": "baichuan-inc/Baichuan2-7B-Chat",
|
424 |
+
"remote_code": True,
|
425 |
+
"start_message": DEFAULT_SYSTEM_PROMPT_CHINESE,
|
426 |
+
"tokenizer_kwargs": {"add_special_tokens": False},
|
427 |
+
"stop_tokens": ["<unk>", "</s>"],
|
428 |
+
"rag_prompt_template": f"""{DEFAULT_RAG_PROMPT_CHINESE }"""
|
429 |
+
+ """
|
430 |
+
问题: {input}
|
431 |
+
已知内容: {context}
|
432 |
+
回答:
|
433 |
+
""",
|
434 |
+
},
|
435 |
+
"minicpm-2b-dpo": {
|
436 |
+
"model_id": "openbmb/MiniCPM-2B-dpo-fp16",
|
437 |
+
"remote_code": True,
|
438 |
+
"start_message": DEFAULT_SYSTEM_PROMPT_CHINESE,
|
439 |
+
},
|
440 |
+
"internlm2-chat-1.8b": {
|
441 |
+
"model_id": "internlm/internlm2-chat-1_8b",
|
442 |
+
"remote_code": True,
|
443 |
+
"start_message": DEFAULT_SYSTEM_PROMPT_CHINESE,
|
444 |
+
"stop_tokens": ["</s>", "<|im_end|>"],
|
445 |
+
"partial_text_processor": internlm_partial_text_processor,
|
446 |
+
},
|
447 |
+
"qwen1.5-1.8b-chat": {
|
448 |
+
"model_id": "Qwen/Qwen1.5-1.8B-Chat",
|
449 |
+
"remote_code": False,
|
450 |
+
"start_message": DEFAULT_SYSTEM_PROMPT_CHINESE,
|
451 |
+
"stop_tokens": ["<|im_end|>", "<|endoftext|>"],
|
452 |
+
"rag_prompt_template": f"""<|im_start|>system
|
453 |
+
{DEFAULT_RAG_PROMPT_CHINESE }<|im_end|>"""
|
454 |
+
+ """
|
455 |
+
<|im_start|>user
|
456 |
+
问题: {input}
|
457 |
+
已知内容: {context}
|
458 |
+
回答: <|im_end|>
|
459 |
+
<|im_start|>assistant
|
460 |
+
""",
|
461 |
+
},
|
462 |
+
},
|
463 |
+
"Japanese": {
|
464 |
+
"youri-7b-chat": {
|
465 |
+
"model_id": "rinna/youri-7b-chat",
|
466 |
+
"remote_code": False,
|
467 |
+
"start_message": f"設定: {DEFAULT_SYSTEM_PROMPT_JAPANESE}\n",
|
468 |
+
"history_template": "ユーザー: {user}\nシステム: {assistant}\n",
|
469 |
+
"current_message_template": "ユーザー: {user}\nシステム: {assistant}",
|
470 |
+
"tokenizer_kwargs": {"add_special_tokens": False},
|
471 |
+
"partial_text_processor": youri_partial_text_processor,
|
472 |
+
},
|
473 |
+
},
|
474 |
+
}
|
475 |
+
|
476 |
+
SUPPORTED_EMBEDDING_MODELS = {
|
477 |
+
"English": {
|
478 |
+
"bge-small-en-v1.5": {
|
479 |
+
"model_id": "BAAI/bge-small-en-v1.5",
|
480 |
+
"mean_pooling": False,
|
481 |
+
"normalize_embeddings": True,
|
482 |
+
},
|
483 |
+
"bge-large-en-v1.5": {
|
484 |
+
"model_id": "BAAI/bge-large-en-v1.5",
|
485 |
+
"mean_pooling": False,
|
486 |
+
"normalize_embeddings": True,
|
487 |
+
},
|
488 |
+
"bge-m3": {
|
489 |
+
"model_id": "BAAI/bge-m3",
|
490 |
+
"mean_pooling": False,
|
491 |
+
"normalize_embeddings": True,
|
492 |
+
},
|
493 |
+
},
|
494 |
+
"Chinese": {
|
495 |
+
"bge-small-zh-v1.5": {
|
496 |
+
"model_id": "BAAI/bge-small-zh-v1.5",
|
497 |
+
"mean_pooling": False,
|
498 |
+
"normalize_embeddings": True,
|
499 |
+
},
|
500 |
+
"bge-large-zh-v1.5": {
|
501 |
+
"model_id": "BAAI/bge-large-zh-v1.5",
|
502 |
+
"mean_pooling": False,
|
503 |
+
"normalize_embeddings": True,
|
504 |
+
},
|
505 |
+
"bge-m3": {
|
506 |
+
"model_id": "BAAI/bge-m3",
|
507 |
+
"mean_pooling": False,
|
508 |
+
"normalize_embeddings": True,
|
509 |
+
},
|
510 |
+
},
|
511 |
+
}
|
512 |
+
|
513 |
+
|
514 |
+
SUPPORTED_RERANK_MODELS = {
|
515 |
+
"bge-reranker-v2-m3": {"model_id": "BAAI/bge-reranker-v2-m3"},
|
516 |
+
"bge-reranker-large": {"model_id": "BAAI/bge-reranker-large"},
|
517 |
+
"bge-reranker-base": {"model_id": "BAAI/bge-reranker-base"},
|
518 |
+
}
|
519 |
+
|
520 |
+
compression_configs = {
|
521 |
+
"zephyr-7b-beta": {
|
522 |
+
"sym": True,
|
523 |
+
"group_size": 64,
|
524 |
+
"ratio": 0.6,
|
525 |
+
},
|
526 |
+
"mistral-7b": {
|
527 |
+
"sym": True,
|
528 |
+
"group_size": 64,
|
529 |
+
"ratio": 0.6,
|
530 |
+
},
|
531 |
+
"minicpm-2b-dpo": {
|
532 |
+
"sym": True,
|
533 |
+
"group_size": 64,
|
534 |
+
"ratio": 0.6,
|
535 |
+
},
|
536 |
+
"gemma-2b-it": {
|
537 |
+
"sym": True,
|
538 |
+
"group_size": 64,
|
539 |
+
"ratio": 0.6,
|
540 |
+
},
|
541 |
+
"notus-7b-v1": {
|
542 |
+
"sym": True,
|
543 |
+
"group_size": 64,
|
544 |
+
"ratio": 0.6,
|
545 |
+
},
|
546 |
+
"neural-chat-7b-v3-1": {
|
547 |
+
"sym": True,
|
548 |
+
"group_size": 64,
|
549 |
+
"ratio": 0.6,
|
550 |
+
},
|
551 |
+
"llama-2-chat-7b": {
|
552 |
+
"sym": True,
|
553 |
+
"group_size": 128,
|
554 |
+
"ratio": 0.8,
|
555 |
+
},
|
556 |
+
"llama-3-8b-instruct": {
|
557 |
+
"sym": True,
|
558 |
+
"group_size": 128,
|
559 |
+
"ratio": 0.8,
|
560 |
+
},
|
561 |
+
"gemma-7b-it": {
|
562 |
+
"sym": True,
|
563 |
+
"group_size": 128,
|
564 |
+
"ratio": 0.8,
|
565 |
+
},
|
566 |
+
"chatglm2-6b": {
|
567 |
+
"sym": True,
|
568 |
+
"group_size": 128,
|
569 |
+
"ratio": 0.72,
|
570 |
+
},
|
571 |
+
"qwen-7b-chat": {"sym": True, "group_size": 128, "ratio": 0.6},
|
572 |
+
"red-pajama-3b-chat": {
|
573 |
+
"sym": False,
|
574 |
+
"group_size": 128,
|
575 |
+
"ratio": 0.5,
|
576 |
+
},
|
577 |
+
"llama-3.2-3b-instruct": {"sym": False, "group_size": 64, "ratio": 1.0, "dataset": "wikitext2", "awq": True, "all_layers": True, "scale_estimation": True},
|
578 |
+
"llama-3.2-1b-instruct": {"sym": False, "group_size": 64, "ratio": 1.0, "dataset": "wikitext2", "awq": True, "all_layers": True, "scale_estimation": True},
|
579 |
+
"default": {
|
580 |
+
"sym": False,
|
581 |
+
"group_size": 128,
|
582 |
+
"ratio": 0.8,
|
583 |
+
},
|
584 |
+
}
|
585 |
+
|
586 |
+
|
587 |
+
def get_optimum_cli_command(model_id, weight_format, output_dir, compression_options=None, enable_awq=False, trust_remote_code=False):
|
588 |
+
base_command = "optimum-cli export openvino --model {} --task text-generation-with-past --weight-format {}"
|
589 |
+
command = base_command.format(model_id, weight_format)
|
590 |
+
if compression_options:
|
591 |
+
compression_args = " --group-size {} --ratio {}".format(compression_options["group_size"], compression_options["ratio"])
|
592 |
+
if compression_options["sym"]:
|
593 |
+
compression_args += " --sym"
|
594 |
+
if enable_awq or compression_options.get("awq", False):
|
595 |
+
compression_args += " --awq --dataset wikitext2 --num-samples 128"
|
596 |
+
if compression_options.get("scale_estimation", False):
|
597 |
+
compression_args += " --scale-estimation"
|
598 |
+
if compression_options.get("all_layers", False):
|
599 |
+
compression_args += " --all-layers"
|
600 |
+
|
601 |
+
command = command + compression_args
|
602 |
+
if trust_remote_code:
|
603 |
+
command += " --trust-remote-code"
|
604 |
+
|
605 |
+
command += " {}".format(output_dir)
|
606 |
+
return command
|
607 |
+
|
608 |
+
|
609 |
+
default_language = "English"
|
610 |
+
|
611 |
+
SUPPORTED_OPTIMIZATIONS = ["INT4", "INT4-AWQ", "INT8", "FP16"]
|
612 |
+
|
613 |
+
|
614 |
+
def get_llm_selection_widget(languages=list(SUPPORTED_LLM_MODELS), models=SUPPORTED_LLM_MODELS[default_language], show_preconverted_checkbox=True):
|
615 |
+
import ipywidgets as widgets
|
616 |
+
|
617 |
+
lang_dropdown = widgets.Dropdown(options=languages or [])
|
618 |
+
|
619 |
+
# Define dependent drop down
|
620 |
+
|
621 |
+
model_dropdown = widgets.Dropdown(options=models)
|
622 |
+
|
623 |
+
def dropdown_handler(change):
|
624 |
+
global default_language
|
625 |
+
default_language = change.new
|
626 |
+
# If statement checking on dropdown value and changing options of the dependent dropdown accordingly
|
627 |
+
model_dropdown.options = SUPPORTED_LLM_MODELS[change.new]
|
628 |
+
|
629 |
+
lang_dropdown.observe(dropdown_handler, names="value")
|
630 |
+
compression_dropdown = widgets.Dropdown(options=SUPPORTED_OPTIMIZATIONS)
|
631 |
+
preconverted_checkbox = widgets.Checkbox(value=True)
|
632 |
+
|
633 |
+
form_items = []
|
634 |
+
|
635 |
+
if languages:
|
636 |
+
form_items.append(widgets.Box([widgets.Label(value="Language:"), lang_dropdown]))
|
637 |
+
form_items.extend(
|
638 |
+
[
|
639 |
+
widgets.Box([widgets.Label(value="Model:"), model_dropdown]),
|
640 |
+
widgets.Box([widgets.Label(value="Compression:"), compression_dropdown]),
|
641 |
+
]
|
642 |
+
)
|
643 |
+
if show_preconverted_checkbox:
|
644 |
+
form_items.append(widgets.Box([widgets.Label(value="Use preconverted models:"), preconverted_checkbox]))
|
645 |
+
|
646 |
+
form = widgets.Box(
|
647 |
+
form_items,
|
648 |
+
layout=widgets.Layout(
|
649 |
+
display="flex",
|
650 |
+
flex_flow="column",
|
651 |
+
border="solid 1px",
|
652 |
+
# align_items='stretch',
|
653 |
+
width="30%",
|
654 |
+
padding="1%",
|
655 |
+
),
|
656 |
+
)
|
657 |
+
return form, lang_dropdown, model_dropdown, compression_dropdown, preconverted_checkbox
|
658 |
+
|
659 |
+
|
660 |
+
def convert_tokenizer(model_id, remote_code, model_dir):
|
661 |
+
import openvino as ov
|
662 |
+
from transformers import AutoTokenizer
|
663 |
+
from openvino_tokenizers import convert_tokenizer
|
664 |
+
|
665 |
+
hf_tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=remote_code)
|
666 |
+
ov_tokenizer, ov_detokenizer = convert_tokenizer(hf_tokenizer, with_detokenizer=True)
|
667 |
+
ov.save_model(ov_tokenizer, model_dir / "openvino_tokenizer.xml")
|
668 |
+
ov.save_model(ov_detokenizer, model_dir / "openvino_detokenizer.xml")
|
669 |
+
|
670 |
+
|
671 |
+
def convert_and_compress_model(model_id, model_config, precision, use_preconverted=True):
|
672 |
+
from pathlib import Path
|
673 |
+
from IPython.display import Markdown, display
|
674 |
+
import subprocess # nosec - disable B404:import-subprocess check
|
675 |
+
import platform
|
676 |
+
|
677 |
+
pt_model_id = model_config["model_id"]
|
678 |
+
pt_model_name = model_id.split("-")[0]
|
679 |
+
model_subdir = precision if precision == "FP16" else precision + "_compressed_weights"
|
680 |
+
model_dir = Path(pt_model_name) / model_subdir
|
681 |
+
remote_code = model_config.get("remote_code", False)
|
682 |
+
if (model_dir / "openvino_model.xml").exists():
|
683 |
+
print(f"✅ {precision} {model_id} model already converted and can be found in {model_dir}")
|
684 |
+
|
685 |
+
if not (model_dir / "openvino_tokenizer.xml").exists() or not (model_dir / "openvino_detokenizer.xml").exists():
|
686 |
+
convert_tokenizer(pt_model_id, remote_code, model_dir)
|
687 |
+
return model_dir
|
688 |
+
if use_preconverted:
|
689 |
+
OV_ORG = "OpenVINO"
|
690 |
+
pt_model_name = pt_model_id.split("/")[-1]
|
691 |
+
ov_model_name = pt_model_name + f"-{precision.lower()}-ov"
|
692 |
+
ov_model_hub_id = f"{OV_ORG}/{ov_model_name}"
|
693 |
+
import huggingface_hub as hf_hub
|
694 |
+
|
695 |
+
hub_api = hf_hub.HfApi()
|
696 |
+
if hub_api.repo_exists(ov_model_hub_id):
|
697 |
+
print(f"⌛Found preconverted {precision} {model_id}. Downloading model started. It may takes some time.")
|
698 |
+
hf_hub.snapshot_download(ov_model_hub_id, local_dir=model_dir)
|
699 |
+
print(f"✅ {precision} {model_id} model downloaded and can be found in {model_dir}")
|
700 |
+
return model_dir
|
701 |
+
|
702 |
+
model_compression_params = {}
|
703 |
+
if "INT4" in precision:
|
704 |
+
model_compression_params = compression_configs.get(model_id, compression_configs["default"])
|
705 |
+
weight_format = precision.split("-")[0].lower()
|
706 |
+
optimum_cli_command = get_optimum_cli_command(pt_model_id, weight_format, model_dir, model_compression_params, "AWQ" in precision, remote_code)
|
707 |
+
print(f"⌛ {model_id} conversion to {precision} started. It may takes some time.")
|
708 |
+
display(Markdown("**Export command:**"))
|
709 |
+
display(Markdown(f"`{optimum_cli_command}`"))
|
710 |
+
subprocess.run(optimum_cli_command.split(" "), shell=(platform.system() == "Windows"), check=True)
|
711 |
+
print(f"✅ {precision} {model_id} model converted and can be found in {model_dir}")
|
712 |
+
return model_dir
|
713 |
+
|
714 |
+
|
715 |
+
def compare_model_size(model_dir):
|
716 |
+
fp16_weights = model_dir.parent / "FP16" / "openvino_model.bin"
|
717 |
+
int8_weights = model_dir.parent / "INT8_compressed_weights" / "openvino_model.bin"
|
718 |
+
int4_weights = model_dir.parent / "INT4_compressed_weights" / "openvino_model.bin"
|
719 |
+
int4_awq_weights = model_dir.parent / "INT4-AWQ_compressed_weights" / "openvino_model.bin"
|
720 |
+
|
721 |
+
if fp16_weights.exists():
|
722 |
+
print(f"Size of FP16 model is {fp16_weights.stat().st_size / 1024 / 1024:.2f} MB")
|
723 |
+
for precision, compressed_weights in zip(["INT8", "INT4", "INT4-AWQ"], [int8_weights, int4_weights, int4_awq_weights]):
|
724 |
+
if compressed_weights.exists():
|
725 |
+
print(f"Size of model with {precision} compressed weights is {compressed_weights.stat().st_size / 1024 / 1024:.2f} MB")
|
726 |
+
if compressed_weights.exists() and fp16_weights.exists():
|
727 |
+
print(f"Compression rate for {precision} model: {fp16_weights.stat().st_size / compressed_weights.stat().st_size:.3f}")
|