|
DEFAULT_SYSTEM_PROMPT = """\ |
|
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. |
|
If a question does not make any sense or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\ |
|
""" |
|
|
|
DEFAULT_SYSTEM_PROMPT_CHINESE = """\ |
|
你是一个乐于助人、尊重他人以及诚实可靠的助手。在安全的情况下,始终尽可能有帮助地回答。 您的回答不应包含任何有害、不道德、种族主义、性别歧视、有毒、危险或非法的内容。请确保您的回答在社会上是公正的和积极的。 |
|
如果一个问题没有任何意义或与事实不符,请解释原因,而不是回答错误的问题。如果您不知道问题的答案,请不要分享虚假信息。另外,答案请使用中文。\ |
|
""" |
|
|
|
DEFAULT_SYSTEM_PROMPT_JAPANESE = """\ |
|
あなたは親切で、礼儀正しく、誠実なアシスタントです。 常に安全を保ちながら、できるだけ役立つように答えてください。 回答には、有害、非倫理的、人種差別的、性差別的、有毒、危険、または違法なコンテンツを含めてはいけません。 回答は社会的に偏見がなく、本質的に前向きなものであることを確認してください。 |
|
質問が意味をなさない場合、または事実に一貫性がない場合は、正しくないことに答えるのではなく、その理由を説明してください。 質問の答えがわからない場合は、誤った情報を共有しないでください。\ |
|
""" |
|
|
|
DEFAULT_RAG_PROMPT = """\ |
|
You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\ |
|
""" |
|
|
|
DEFAULT_RAG_PROMPT_CHINESE = """\ |
|
基于以下已知信息,请简洁并专业地回答用户的问题。如果无法从中得到答案,请说 "根据已知信息无法回答该问题" 或 "没有提供足够的相关信息"。不允许在答案中添加编造成分。另外,答案请使用中文。\ |
|
""" |
|
|
|
|
|
def red_pijama_partial_text_processor(partial_text, new_text): |
|
if new_text == "<": |
|
return partial_text |
|
|
|
partial_text += new_text |
|
return partial_text.split("<bot>:")[-1] |
|
|
|
|
|
def llama_partial_text_processor(partial_text, new_text): |
|
new_text = new_text.replace("[INST]", "").replace("[/INST]", "") |
|
partial_text += new_text |
|
return partial_text |
|
|
|
|
|
def chatglm_partial_text_processor(partial_text, new_text): |
|
new_text = new_text.strip() |
|
new_text = new_text.replace("[[训练时间]]", "2023年") |
|
partial_text += new_text |
|
return partial_text |
|
|
|
|
|
def youri_partial_text_processor(partial_text, new_text): |
|
new_text = new_text.replace("システム:", "") |
|
partial_text += new_text |
|
return partial_text |
|
|
|
|
|
def internlm_partial_text_processor(partial_text, new_text): |
|
partial_text += new_text |
|
return partial_text.split("<|im_end|>")[0] |
|
|
|
|
|
def phi_completion_to_prompt(completion): |
|
return f"<|system|><|end|><|user|>{completion}<|end|><|assistant|>\n" |
|
|
|
|
|
def llama3_completion_to_prompt(completion): |
|
return f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{completion}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n" |
|
|
|
|
|
def qwen_completion_to_prompt(completion): |
|
return f"<|im_start|>system\n<|im_end|>\n<|im_start|>user\n{completion}<|im_end|>\n<|im_start|>assistant\n" |
|
|
|
|
|
SUPPORTED_LLM_MODELS = { |
|
"English": { |
|
"qwen2-0.5b-instruct": { |
|
"model_id": "Qwen/Qwen2-0.5B-Instruct", |
|
"remote_code": False, |
|
"start_message": DEFAULT_SYSTEM_PROMPT, |
|
"stop_tokens": ["<|im_end|>", "<|endoftext|>"], |
|
"completion_to_prompt": qwen_completion_to_prompt, |
|
}, |
|
"tiny-llama-1b-chat": { |
|
"model_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", |
|
"remote_code": False, |
|
"start_message": f"<|system|>\n{DEFAULT_SYSTEM_PROMPT}</s>\n", |
|
"history_template": "<|user|>\n{user}</s> \n<|assistant|>\n{assistant}</s> \n", |
|
"current_message_template": "<|user|>\n{user}</s> \n<|assistant|>\n{assistant}", |
|
"rag_prompt_template": f"""<|system|> {DEFAULT_RAG_PROMPT }</s>""" |
|
+ """ |
|
<|user|> |
|
Question: {input} |
|
Context: {context} |
|
Answer: </s> |
|
<|assistant|>""", |
|
}, |
|
"llama-3.2-1b-instruct": { |
|
"model_id": "meta-llama/Llama-3.2-1B-Instruct", |
|
"start_message": DEFAULT_SYSTEM_PROMPT, |
|
"stop_tokens": ["<|eot_id|>"], |
|
"has_chat_template": True, |
|
"start_message": " <|start_header_id|>system<|end_header_id|>\n\n" + DEFAULT_SYSTEM_PROMPT + "<|eot_id|>", |
|
"history_template": "<|start_header_id|>user<|end_header_id|>\n\n{user}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{assistant}<|eot_id|>", |
|
"current_message_template": "<|start_header_id|>user<|end_header_id|>\n\n{user}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{assistant}", |
|
"rag_prompt_template": f"<|start_header_id|>system<|end_header_id|>\n\n{DEFAULT_RAG_PROMPT}<|eot_id|>" |
|
+ """<|start_header_id|>user<|end_header_id|> |
|
|
|
|
|
Question: {input} |
|
Context: {context} |
|
Answer:<|eot_id|><|start_header_id|>assistant<|end_header_id|> |
|
|
|
|
|
""", |
|
"completion_to_prompt": llama3_completion_to_prompt, |
|
}, |
|
"llama-3.2-3b-instruct": { |
|
"model_id": "meta-llama/Llama-3.2-3B-Instruct", |
|
"start_message": DEFAULT_SYSTEM_PROMPT, |
|
"stop_tokens": ["<|eot_id|>"], |
|
"has_chat_template": True, |
|
"start_message": " <|start_header_id|>system<|end_header_id|>\n\n" + DEFAULT_SYSTEM_PROMPT + "<|eot_id|>", |
|
"history_template": "<|start_header_id|>user<|end_header_id|>\n\n{user}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{assistant}<|eot_id|>", |
|
"current_message_template": "<|start_header_id|>user<|end_header_id|>\n\n{user}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{assistant}", |
|
"rag_prompt_template": f"<|start_header_id|>system<|end_header_id|>\n\n{DEFAULT_RAG_PROMPT}<|eot_id|>" |
|
+ """<|start_header_id|>user<|end_header_id|> |
|
|
|
|
|
Question: {input} |
|
Context: {context} |
|
Answer:<|eot_id|><|start_header_id|>assistant<|end_header_id|> |
|
|
|
|
|
""", |
|
"completion_to_prompt": llama3_completion_to_prompt, |
|
}, |
|
"qwen2-1.5b-instruct": { |
|
"model_id": "Qwen/Qwen2-1.5B-Instruct", |
|
"remote_code": False, |
|
"start_message": DEFAULT_SYSTEM_PROMPT, |
|
"stop_tokens": ["<|im_end|>", "<|endoftext|>"], |
|
"completion_to_prompt": qwen_completion_to_prompt, |
|
}, |
|
"gemma-2b-it": { |
|
"model_id": "google/gemma-2b-it", |
|
"remote_code": False, |
|
"start_message": DEFAULT_SYSTEM_PROMPT + ", ", |
|
"history_template": "<start_of_turn>user{user}<end_of_turn><start_of_turn>model{assistant}<end_of_turn>", |
|
"current_message_template": "<start_of_turn>user{user}<end_of_turn><start_of_turn>model{assistant}", |
|
"rag_prompt_template": f"""{DEFAULT_RAG_PROMPT},""" |
|
+ """<start_of_turn>user{input}<end_of_turn><start_of_turn>context{context}<end_of_turn><start_of_turn>model""", |
|
}, |
|
"gemma-2-2b-it": { |
|
"model_id": "google/gemma-2-2b-it", |
|
"remote_code": False, |
|
"start_message": DEFAULT_SYSTEM_PROMPT + ", ", |
|
"history_template": "<start_of_turn>user{user}<end_of_turn><start_of_turn>model{assistant}<end_of_turn>", |
|
"current_message_template": "<start_of_turn>user{user}<end_of_turn><start_of_turn>model{assistant}", |
|
"rag_prompt_template": f"""{DEFAULT_RAG_PROMPT},""" |
|
+ """<start_of_turn>user{input}<end_of_turn><start_of_turn>context{context}<end_of_turn><start_of_turn>model""", |
|
}, |
|
"red-pajama-3b-chat": { |
|
"model_id": "togethercomputer/RedPajama-INCITE-Chat-3B-v1", |
|
"remote_code": False, |
|
"start_message": "", |
|
"history_template": "\n<human>:{user}\n<bot>:{assistant}", |
|
"stop_tokens": [29, 0], |
|
"partial_text_processor": red_pijama_partial_text_processor, |
|
"current_message_template": "\n<human>:{user}\n<bot>:{assistant}", |
|
"rag_prompt_template": f"""{DEFAULT_RAG_PROMPT }""" |
|
+ """ |
|
<human>: Question: {input} |
|
Context: {context} |
|
Answer: <bot>""", |
|
}, |
|
"qwen2-7b-instruct": { |
|
"model_id": "Qwen/Qwen2-7B-Instruct", |
|
"remote_code": False, |
|
"start_message": DEFAULT_SYSTEM_PROMPT + ", ", |
|
"rag_prompt_template": f"""<|im_start|>system |
|
{DEFAULT_RAG_PROMPT }<|im_end|>""" |
|
+ """ |
|
<|im_start|>user |
|
Question: {input} |
|
Context: {context} |
|
Answer: <|im_end|> |
|
<|im_start|>assistant |
|
""", |
|
"completion_to_prompt": qwen_completion_to_prompt, |
|
}, |
|
"gemma-7b-it": { |
|
"model_id": "google/gemma-7b-it", |
|
"remote_code": False, |
|
"start_message": DEFAULT_SYSTEM_PROMPT + ", ", |
|
"history_template": "<start_of_turn>user{user}<end_of_turn><start_of_turn>model{assistant}<end_of_turn>", |
|
"current_message_template": "<start_of_turn>user{user}<end_of_turn><start_of_turn>model{assistant}", |
|
"rag_prompt_template": f"""{DEFAULT_RAG_PROMPT},""" |
|
+ """<start_of_turn>user{input}<end_of_turn><start_of_turn>context{context}<end_of_turn><start_of_turn>model""", |
|
}, |
|
"gemma-2-9b-it": { |
|
"model_id": "google/gemma-2-9b-it", |
|
"remote_code": False, |
|
"start_message": DEFAULT_SYSTEM_PROMPT + ", ", |
|
"history_template": "<start_of_turn>user{user}<end_of_turn><start_of_turn>model{assistant}<end_of_turn>", |
|
"current_message_template": "<start_of_turn>user{user}<end_of_turn><start_of_turn>model{assistant}", |
|
"rag_prompt_template": f"""{DEFAULT_RAG_PROMPT},""" |
|
+ """<start_of_turn>user{input}<end_of_turn><start_of_turn>context{context}<end_of_turn><start_of_turn>model""", |
|
}, |
|
"llama-2-chat-7b": { |
|
"model_id": "meta-llama/Llama-2-7b-chat-hf", |
|
"remote_code": False, |
|
"start_message": f"<s>[INST] <<SYS>>\n{DEFAULT_SYSTEM_PROMPT }\n<</SYS>>\n\n", |
|
"history_template": "{user}[/INST]{assistant}</s><s>[INST]", |
|
"current_message_template": "{user} [/INST]{assistant}", |
|
"tokenizer_kwargs": {"add_special_tokens": False}, |
|
"partial_text_processor": llama_partial_text_processor, |
|
"rag_prompt_template": f"""[INST]Human: <<SYS>> {DEFAULT_RAG_PROMPT }<</SYS>>""" |
|
+ """ |
|
Question: {input} |
|
Context: {context} |
|
Answer: [/INST]""", |
|
}, |
|
"llama-3-8b-instruct": { |
|
"model_id": "meta-llama/Meta-Llama-3-8B-Instruct", |
|
"remote_code": False, |
|
"start_message": DEFAULT_SYSTEM_PROMPT, |
|
"stop_tokens": ["<|eot_id|>", "<|end_of_text|>"], |
|
"has_chat_template": True, |
|
"start_message": " <|start_header_id|>system<|end_header_id|>\n\n" + DEFAULT_SYSTEM_PROMPT + "<|eot_id|>", |
|
"history_template": "<|start_header_id|>user<|end_header_id|>\n\n{user}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{assistant}<|eot_id|>", |
|
"current_message_template": "<|start_header_id|>user<|end_header_id|>\n\n{user}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{assistant}", |
|
"rag_prompt_template": f"<|start_header_id|>system<|end_header_id|>\n\n{DEFAULT_RAG_PROMPT}<|eot_id|>" |
|
+ """<|start_header_id|>user<|end_header_id|> |
|
|
|
|
|
Question: {input} |
|
Context: {context} |
|
Answer:<|eot_id|><|start_header_id|>assistant<|end_header_id|> |
|
|
|
|
|
""", |
|
"completion_to_prompt": llama3_completion_to_prompt, |
|
}, |
|
"llama-3.1-8b-instruct": { |
|
"model_id": "meta-llama/Meta-Llama-3.1-8B-Instruct", |
|
"remote_code": False, |
|
"start_message": DEFAULT_SYSTEM_PROMPT, |
|
"stop_tokens": ["<|eot_id|>", "<|end_of_text|>"], |
|
"has_chat_template": True, |
|
"start_message": " <|start_header_id|>system<|end_header_id|>\n\n" + DEFAULT_SYSTEM_PROMPT + "<|eot_id|>", |
|
"history_template": "<|start_header_id|>user<|end_header_id|>\n\n{user}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{assistant}<|eot_id|>", |
|
"current_message_template": "<|start_header_id|>user<|end_header_id|>\n\n{user}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{assistant}", |
|
"rag_prompt_template": f"<|start_header_id|>system<|end_header_id|>\n\n{DEFAULT_RAG_PROMPT}<|eot_id|>" |
|
+ """<|start_header_id|>user<|end_header_id|> |
|
|
|
|
|
Question: {input} |
|
Context: {context} |
|
Answer:<|eot_id|><|start_header_id|>assistant<|end_header_id|> |
|
|
|
|
|
""", |
|
"completion_to_prompt": llama3_completion_to_prompt, |
|
}, |
|
"mistral-7b-instruct": { |
|
"model_id": "mistralai/Mistral-7B-Instruct-v0.1", |
|
"remote_code": False, |
|
"start_message": f"<s>[INST] <<SYS>>\n{DEFAULT_SYSTEM_PROMPT }\n<</SYS>>\n\n", |
|
"history_template": "{user}[/INST]{assistant}</s><s>[INST]", |
|
"current_message_template": "{user} [/INST]{assistant}", |
|
"tokenizer_kwargs": {"add_special_tokens": False}, |
|
"partial_text_processor": llama_partial_text_processor, |
|
"rag_prompt_template": f"""<s> [INST] {DEFAULT_RAG_PROMPT } [/INST] </s>""" |
|
+ """ |
|
[INST] Question: {input} |
|
Context: {context} |
|
Answer: [/INST]""", |
|
}, |
|
"zephyr-7b-beta": { |
|
"model_id": "HuggingFaceH4/zephyr-7b-beta", |
|
"remote_code": False, |
|
"start_message": f"<|system|>\n{DEFAULT_SYSTEM_PROMPT}</s>\n", |
|
"history_template": "<|user|>\n{user}</s> \n<|assistant|>\n{assistant}</s> \n", |
|
"current_message_template": "<|user|>\n{user}</s> \n<|assistant|>\n{assistant}", |
|
"rag_prompt_template": f"""<|system|> {DEFAULT_RAG_PROMPT }</s>""" |
|
+ """ |
|
<|user|> |
|
Question: {input} |
|
Context: {context} |
|
Answer: </s> |
|
<|assistant|>""", |
|
}, |
|
"notus-7b-v1": { |
|
"model_id": "argilla/notus-7b-v1", |
|
"remote_code": False, |
|
"start_message": f"<|system|>\n{DEFAULT_SYSTEM_PROMPT}</s>\n", |
|
"history_template": "<|user|>\n{user}</s> \n<|assistant|>\n{assistant}</s> \n", |
|
"current_message_template": "<|user|>\n{user}</s> \n<|assistant|>\n{assistant}", |
|
"rag_prompt_template": f"""<|system|> {DEFAULT_RAG_PROMPT }</s>""" |
|
+ """ |
|
<|user|> |
|
Question: {input} |
|
Context: {context} |
|
Answer: </s> |
|
<|assistant|>""", |
|
}, |
|
"neural-chat-7b-v3-3": { |
|
"model_id": "Intel/neural-chat-7b-v3-3", |
|
"remote_code": False, |
|
"start_message": f"<s>[INST] <<SYS>>\n{DEFAULT_SYSTEM_PROMPT }\n<</SYS>>\n\n", |
|
"history_template": "{user}[/INST]{assistant}</s><s>[INST]", |
|
"current_message_template": "{user} [/INST]{assistant}", |
|
"tokenizer_kwargs": {"add_special_tokens": False}, |
|
"partial_text_processor": llama_partial_text_processor, |
|
"rag_prompt_template": f"""<s> [INST] {DEFAULT_RAG_PROMPT } [/INST] </s>""" |
|
+ """ |
|
[INST] Question: {input} |
|
Context: {context} |
|
Answer: [/INST]""", |
|
}, |
|
"phi-3-mini-instruct": { |
|
"model_id": "microsoft/Phi-3-mini-4k-instruct", |
|
"remote_code": True, |
|
"start_message": "<|system|>\n{DEFAULT_SYSTEM_PROMPT}<|end|>\n", |
|
"history_template": "<|user|>\n{user}<|end|> \n<|assistant|>\n{assistant}<|end|>\n", |
|
"current_message_template": "<|user|>\n{user}<|end|> \n<|assistant|>\n{assistant}", |
|
"stop_tokens": ["<|end|>"], |
|
"rag_prompt_template": f"""<|system|> {DEFAULT_RAG_PROMPT }<|end|>""" |
|
+ """ |
|
<|user|> |
|
Question: {input} |
|
Context: {context} |
|
Answer: <|end|> |
|
<|assistant|>""", |
|
"completion_to_prompt": phi_completion_to_prompt, |
|
}, |
|
"phi-3.5-mini-instruct": { |
|
"model_id": "microsoft/Phi-3.5-mini-instruct", |
|
"remote_code": True, |
|
"start_message": "<|system|>\n{DEFAULT_SYSTEM_PROMPT}<|end|>\n", |
|
"history_template": "<|user|>\n{user}<|end|> \n<|assistant|>\n{assistant}<|end|>\n", |
|
"current_message_template": "<|user|>\n{user}<|end|> \n<|assistant|>\n{assistant}", |
|
"stop_tokens": ["<|end|>"], |
|
"rag_prompt_template": f"""<|system|> {DEFAULT_RAG_PROMPT }<|end|>""" |
|
+ """ |
|
<|user|> |
|
Question: {input} |
|
Context: {context} |
|
Answer: <|end|> |
|
<|assistant|>""", |
|
"completion_to_prompt": phi_completion_to_prompt, |
|
}, |
|
}, |
|
"Chinese": { |
|
"qwen2-0.5b-instruct": { |
|
"model_id": "Qwen/Qwen2-0.5B-Instruct", |
|
"remote_code": False, |
|
"start_message": DEFAULT_SYSTEM_PROMPT_CHINESE, |
|
"stop_tokens": ["<|im_end|>", "<|endoftext|>"], |
|
"completion_to_prompt": qwen_completion_to_prompt, |
|
}, |
|
"qwen2-1.5b-instruct": { |
|
"model_id": "Qwen/Qwen2-1.5B-Instruct", |
|
"remote_code": False, |
|
"start_message": DEFAULT_SYSTEM_PROMPT_CHINESE, |
|
"stop_tokens": ["<|im_end|>", "<|endoftext|>"], |
|
"completion_to_prompt": qwen_completion_to_prompt, |
|
}, |
|
"qwen2-7b-instruct": { |
|
"model_id": "Qwen/Qwen2-7B-Instruct", |
|
"remote_code": False, |
|
"stop_tokens": ["<|im_end|>", "<|endoftext|>"], |
|
"start_message": DEFAULT_SYSTEM_PROMPT_CHINESE, |
|
"rag_prompt_template": f"""<|im_start|>system |
|
{DEFAULT_RAG_PROMPT_CHINESE }<|im_end|>""" |
|
+ """ |
|
<|im_start|>user |
|
问题: {input} |
|
已知内容: {context} |
|
回答: <|im_end|> |
|
<|im_start|>assistant |
|
""", |
|
"completion_to_prompt": qwen_completion_to_prompt, |
|
}, |
|
"qwen-7b-chat": { |
|
"model_id": "Qwen/Qwen-7B-Chat", |
|
"remote_code": True, |
|
"start_message": f"<|im_start|>system\n {DEFAULT_SYSTEM_PROMPT_CHINESE }<|im_end|>", |
|
"history_template": "<|im_start|>user\n{user}<im_end><|im_start|>assistant\n{assistant}<|im_end|>", |
|
"current_message_template": '"<|im_start|>user\n{user}<im_end><|im_start|>assistant\n{assistant}', |
|
"stop_tokens": ["<|im_end|>", "<|endoftext|>"], |
|
"revision": "2abd8e5777bb4ce9c8ab4be7dbbd0fe4526db78d", |
|
"rag_prompt_template": f"""<|im_start|>system |
|
{DEFAULT_RAG_PROMPT_CHINESE }<|im_end|>""" |
|
+ """ |
|
<|im_start|>user |
|
问题: {input} |
|
已知内容: {context} |
|
回答: <|im_end|> |
|
<|im_start|>assistant |
|
""", |
|
}, |
|
"chatglm3-6b": { |
|
"model_id": "THUDM/chatglm3-6b", |
|
"remote_code": True, |
|
"start_message": DEFAULT_SYSTEM_PROMPT_CHINESE, |
|
"tokenizer_kwargs": {"add_special_tokens": False}, |
|
"rag_prompt_template": f"""{DEFAULT_RAG_PROMPT_CHINESE }""" |
|
+ """ |
|
问题: {input} |
|
已知内容: {context} |
|
回答: |
|
""", |
|
}, |
|
"glm-4-9b-chat": { |
|
"model_id": "THUDM/glm-4-9b-chat", |
|
"remote_code": True, |
|
"start_message": DEFAULT_SYSTEM_PROMPT_CHINESE, |
|
"tokenizer_kwargs": {"add_special_tokens": False}, |
|
"rag_prompt_template": f"""{DEFAULT_RAG_PROMPT_CHINESE }""" |
|
+ """ |
|
问题: {input} |
|
已知内容: {context} |
|
回答: |
|
""", |
|
}, |
|
"baichuan2-7b-chat": { |
|
"model_id": "baichuan-inc/Baichuan2-7B-Chat", |
|
"remote_code": True, |
|
"start_message": DEFAULT_SYSTEM_PROMPT_CHINESE, |
|
"tokenizer_kwargs": {"add_special_tokens": False}, |
|
"stop_tokens": ["<unk>", "</s>"], |
|
"rag_prompt_template": f"""{DEFAULT_RAG_PROMPT_CHINESE }""" |
|
+ """ |
|
问题: {input} |
|
已知内容: {context} |
|
回答: |
|
""", |
|
}, |
|
"minicpm-2b-dpo": { |
|
"model_id": "openbmb/MiniCPM-2B-dpo-fp16", |
|
"remote_code": True, |
|
"start_message": DEFAULT_SYSTEM_PROMPT_CHINESE, |
|
}, |
|
"internlm2-chat-1.8b": { |
|
"model_id": "internlm/internlm2-chat-1_8b", |
|
"remote_code": True, |
|
"start_message": DEFAULT_SYSTEM_PROMPT_CHINESE, |
|
"stop_tokens": ["</s>", "<|im_end|>"], |
|
"partial_text_processor": internlm_partial_text_processor, |
|
}, |
|
"qwen1.5-1.8b-chat": { |
|
"model_id": "Qwen/Qwen1.5-1.8B-Chat", |
|
"remote_code": False, |
|
"start_message": DEFAULT_SYSTEM_PROMPT_CHINESE, |
|
"stop_tokens": ["<|im_end|>", "<|endoftext|>"], |
|
"rag_prompt_template": f"""<|im_start|>system |
|
{DEFAULT_RAG_PROMPT_CHINESE }<|im_end|>""" |
|
+ """ |
|
<|im_start|>user |
|
问题: {input} |
|
已知内容: {context} |
|
回答: <|im_end|> |
|
<|im_start|>assistant |
|
""", |
|
}, |
|
}, |
|
"Japanese": { |
|
"youri-7b-chat": { |
|
"model_id": "rinna/youri-7b-chat", |
|
"remote_code": False, |
|
"start_message": f"設定: {DEFAULT_SYSTEM_PROMPT_JAPANESE}\n", |
|
"history_template": "ユーザー: {user}\nシステム: {assistant}\n", |
|
"current_message_template": "ユーザー: {user}\nシステム: {assistant}", |
|
"tokenizer_kwargs": {"add_special_tokens": False}, |
|
"partial_text_processor": youri_partial_text_processor, |
|
}, |
|
}, |
|
} |
|
|
|
SUPPORTED_EMBEDDING_MODELS = { |
|
"English": { |
|
"bge-small-en-v1.5": { |
|
"model_id": "BAAI/bge-small-en-v1.5", |
|
"mean_pooling": False, |
|
"normalize_embeddings": True, |
|
}, |
|
"bge-large-en-v1.5": { |
|
"model_id": "BAAI/bge-large-en-v1.5", |
|
"mean_pooling": False, |
|
"normalize_embeddings": True, |
|
}, |
|
"bge-m3": { |
|
"model_id": "BAAI/bge-m3", |
|
"mean_pooling": False, |
|
"normalize_embeddings": True, |
|
}, |
|
}, |
|
"Chinese": { |
|
"bge-small-zh-v1.5": { |
|
"model_id": "BAAI/bge-small-zh-v1.5", |
|
"mean_pooling": False, |
|
"normalize_embeddings": True, |
|
}, |
|
"bge-large-zh-v1.5": { |
|
"model_id": "BAAI/bge-large-zh-v1.5", |
|
"mean_pooling": False, |
|
"normalize_embeddings": True, |
|
}, |
|
"bge-m3": { |
|
"model_id": "BAAI/bge-m3", |
|
"mean_pooling": False, |
|
"normalize_embeddings": True, |
|
}, |
|
}, |
|
} |
|
|
|
|
|
SUPPORTED_RERANK_MODELS = { |
|
"bge-reranker-v2-m3": {"model_id": "BAAI/bge-reranker-v2-m3"}, |
|
"bge-reranker-large": {"model_id": "BAAI/bge-reranker-large"}, |
|
"bge-reranker-base": {"model_id": "BAAI/bge-reranker-base"}, |
|
} |
|
|
|
compression_configs = { |
|
"zephyr-7b-beta": { |
|
"sym": True, |
|
"group_size": 64, |
|
"ratio": 0.6, |
|
}, |
|
"mistral-7b": { |
|
"sym": True, |
|
"group_size": 64, |
|
"ratio": 0.6, |
|
}, |
|
"minicpm-2b-dpo": { |
|
"sym": True, |
|
"group_size": 64, |
|
"ratio": 0.6, |
|
}, |
|
"gemma-2b-it": { |
|
"sym": True, |
|
"group_size": 64, |
|
"ratio": 0.6, |
|
}, |
|
"notus-7b-v1": { |
|
"sym": True, |
|
"group_size": 64, |
|
"ratio": 0.6, |
|
}, |
|
"neural-chat-7b-v3-1": { |
|
"sym": True, |
|
"group_size": 64, |
|
"ratio": 0.6, |
|
}, |
|
"llama-2-chat-7b": { |
|
"sym": True, |
|
"group_size": 128, |
|
"ratio": 0.8, |
|
}, |
|
"llama-3-8b-instruct": { |
|
"sym": True, |
|
"group_size": 128, |
|
"ratio": 0.8, |
|
}, |
|
"gemma-7b-it": { |
|
"sym": True, |
|
"group_size": 128, |
|
"ratio": 0.8, |
|
}, |
|
"chatglm2-6b": { |
|
"sym": True, |
|
"group_size": 128, |
|
"ratio": 0.72, |
|
}, |
|
"qwen-7b-chat": {"sym": True, "group_size": 128, "ratio": 0.6}, |
|
"red-pajama-3b-chat": { |
|
"sym": False, |
|
"group_size": 128, |
|
"ratio": 0.5, |
|
}, |
|
"llama-3.2-3b-instruct": {"sym": False, "group_size": 64, "ratio": 1.0, "dataset": "wikitext2", "awq": True, "all_layers": True, "scale_estimation": True}, |
|
"llama-3.2-1b-instruct": {"sym": False, "group_size": 64, "ratio": 1.0, "dataset": "wikitext2", "awq": True, "all_layers": True, "scale_estimation": True}, |
|
"default": { |
|
"sym": False, |
|
"group_size": 128, |
|
"ratio": 0.8, |
|
}, |
|
} |
|
|
|
|
|
def get_optimum_cli_command(model_id, weight_format, output_dir, compression_options=None, enable_awq=False, trust_remote_code=False): |
|
base_command = "optimum-cli export openvino --model {} --task text-generation-with-past --weight-format {}" |
|
command = base_command.format(model_id, weight_format) |
|
if compression_options: |
|
compression_args = " --group-size {} --ratio {}".format(compression_options["group_size"], compression_options["ratio"]) |
|
if compression_options["sym"]: |
|
compression_args += " --sym" |
|
if enable_awq or compression_options.get("awq", False): |
|
compression_args += " --awq --dataset wikitext2 --num-samples 128" |
|
if compression_options.get("scale_estimation", False): |
|
compression_args += " --scale-estimation" |
|
if compression_options.get("all_layers", False): |
|
compression_args += " --all-layers" |
|
|
|
command = command + compression_args |
|
if trust_remote_code: |
|
command += " --trust-remote-code" |
|
|
|
command += " {}".format(output_dir) |
|
return command |
|
|
|
|
|
default_language = "English" |
|
|
|
SUPPORTED_OPTIMIZATIONS = ["INT4", "INT4-AWQ", "INT8", "FP16"] |
|
|
|
|
|
def get_llm_selection_widget(languages=list(SUPPORTED_LLM_MODELS), models=SUPPORTED_LLM_MODELS[default_language], show_preconverted_checkbox=True): |
|
import ipywidgets as widgets |
|
|
|
lang_dropdown = widgets.Dropdown(options=languages or []) |
|
|
|
|
|
|
|
model_dropdown = widgets.Dropdown(options=models) |
|
|
|
def dropdown_handler(change): |
|
global default_language |
|
default_language = change.new |
|
|
|
model_dropdown.options = SUPPORTED_LLM_MODELS[change.new] |
|
|
|
lang_dropdown.observe(dropdown_handler, names="value") |
|
compression_dropdown = widgets.Dropdown(options=SUPPORTED_OPTIMIZATIONS) |
|
preconverted_checkbox = widgets.Checkbox(value=True) |
|
|
|
form_items = [] |
|
|
|
if languages: |
|
form_items.append(widgets.Box([widgets.Label(value="Language:"), lang_dropdown])) |
|
form_items.extend( |
|
[ |
|
widgets.Box([widgets.Label(value="Model:"), model_dropdown]), |
|
widgets.Box([widgets.Label(value="Compression:"), compression_dropdown]), |
|
] |
|
) |
|
if show_preconverted_checkbox: |
|
form_items.append(widgets.Box([widgets.Label(value="Use preconverted models:"), preconverted_checkbox])) |
|
|
|
form = widgets.Box( |
|
form_items, |
|
layout=widgets.Layout( |
|
display="flex", |
|
flex_flow="column", |
|
border="solid 1px", |
|
|
|
width="30%", |
|
padding="1%", |
|
), |
|
) |
|
return form, lang_dropdown, model_dropdown, compression_dropdown, preconverted_checkbox |
|
|
|
|
|
def convert_tokenizer(model_id, remote_code, model_dir): |
|
import openvino as ov |
|
from transformers import AutoTokenizer |
|
from openvino_tokenizers import convert_tokenizer |
|
|
|
hf_tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=remote_code) |
|
ov_tokenizer, ov_detokenizer = convert_tokenizer(hf_tokenizer, with_detokenizer=True) |
|
ov.save_model(ov_tokenizer, model_dir / "openvino_tokenizer.xml") |
|
ov.save_model(ov_detokenizer, model_dir / "openvino_detokenizer.xml") |
|
|
|
|
|
def convert_and_compress_model(model_id, model_config, precision, use_preconverted=True): |
|
from pathlib import Path |
|
from IPython.display import Markdown, display |
|
import subprocess |
|
import platform |
|
|
|
pt_model_id = model_config["model_id"] |
|
pt_model_name = model_id.split("-")[0] |
|
model_subdir = precision if precision == "FP16" else precision + "_compressed_weights" |
|
model_dir = Path(pt_model_name) / model_subdir |
|
remote_code = model_config.get("remote_code", False) |
|
if (model_dir / "openvino_model.xml").exists(): |
|
print(f"✅ {precision} {model_id} model already converted and can be found in {model_dir}") |
|
|
|
if not (model_dir / "openvino_tokenizer.xml").exists() or not (model_dir / "openvino_detokenizer.xml").exists(): |
|
convert_tokenizer(pt_model_id, remote_code, model_dir) |
|
return model_dir |
|
if use_preconverted: |
|
OV_ORG = "OpenVINO" |
|
pt_model_name = pt_model_id.split("/")[-1] |
|
ov_model_name = pt_model_name + f"-{precision.lower()}-ov" |
|
ov_model_hub_id = f"{OV_ORG}/{ov_model_name}" |
|
import huggingface_hub as hf_hub |
|
|
|
hub_api = hf_hub.HfApi() |
|
if hub_api.repo_exists(ov_model_hub_id): |
|
print(f"⌛Found preconverted {precision} {model_id}. Downloading model started. It may takes some time.") |
|
hf_hub.snapshot_download(ov_model_hub_id, local_dir=model_dir) |
|
print(f"✅ {precision} {model_id} model downloaded and can be found in {model_dir}") |
|
return model_dir |
|
|
|
model_compression_params = {} |
|
if "INT4" in precision: |
|
model_compression_params = compression_configs.get(model_id, compression_configs["default"]) |
|
weight_format = precision.split("-")[0].lower() |
|
optimum_cli_command = get_optimum_cli_command(pt_model_id, weight_format, model_dir, model_compression_params, "AWQ" in precision, remote_code) |
|
print(f"⌛ {model_id} conversion to {precision} started. It may takes some time.") |
|
display(Markdown("**Export command:**")) |
|
display(Markdown(f"`{optimum_cli_command}`")) |
|
subprocess.run(optimum_cli_command.split(" "), shell=(platform.system() == "Windows"), check=True) |
|
print(f"✅ {precision} {model_id} model converted and can be found in {model_dir}") |
|
return model_dir |
|
|
|
|
|
def compare_model_size(model_dir): |
|
fp16_weights = model_dir.parent / "FP16" / "openvino_model.bin" |
|
int8_weights = model_dir.parent / "INT8_compressed_weights" / "openvino_model.bin" |
|
int4_weights = model_dir.parent / "INT4_compressed_weights" / "openvino_model.bin" |
|
int4_awq_weights = model_dir.parent / "INT4-AWQ_compressed_weights" / "openvino_model.bin" |
|
|
|
if fp16_weights.exists(): |
|
print(f"Size of FP16 model is {fp16_weights.stat().st_size / 1024 / 1024:.2f} MB") |
|
for precision, compressed_weights in zip(["INT8", "INT4", "INT4-AWQ"], [int8_weights, int4_weights, int4_awq_weights]): |
|
if compressed_weights.exists(): |
|
print(f"Size of model with {precision} compressed weights is {compressed_weights.stat().st_size / 1024 / 1024:.2f} MB") |
|
if compressed_weights.exists() and fp16_weights.exists(): |
|
print(f"Compression rate for {precision} model: {fp16_weights.stat().st_size / compressed_weights.stat().st_size:.3f}") |
|
|