shenzhi-wang
commited on
Commit
•
7af785d
1
Parent(s):
9dfec53
Update README.md
Browse files
README.md
CHANGED
@@ -11,7 +11,6 @@ tags:
|
|
11 |
- orpo
|
12 |
---
|
13 |
|
14 |
-
❗️❗️❗️We are still uploading the GGUF file. Due to the network problem and the large size of this GGUF file, it may take some time. We are really sorry for that. If you want to use our q4 GGUF, you can use the [ollama q4 model](https://ollama.com/wangshenzhi/llama3-70b-chinese-chat-ollama-q4) first.
|
15 |
|
16 |
🔥 This repo contains the official q4_0 GGUF files for [shenzhi-wang/Llama3-70B-Chinese-Chat](https://huggingface.co/shenzhi-wang/Llama3-70B-Chinese-Chat).
|
17 |
|
@@ -82,32 +81,35 @@ C-Eval Hard is a distinct benchmark that comprises 8 difficult subjects in math,
|
|
82 |
# 3. Usage
|
83 |
|
84 |
```python
|
85 |
-
from
|
86 |
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
model_id, torch_dtype="auto", device_map="auto"
|
92 |
)
|
93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
messages = [
|
|
|
|
|
|
|
|
|
95 |
{"role": "user", "content": "写一首诗吧"},
|
96 |
]
|
97 |
|
98 |
-
|
99 |
-
messages, add_generation_prompt=True, return_tensors="pt"
|
100 |
-
).to(model.device)
|
101 |
-
|
102 |
-
outputs = model.generate(
|
103 |
-
input_ids,
|
104 |
-
max_new_tokens=8192,
|
105 |
-
do_sample=True,
|
106 |
-
temperature=0.6,
|
107 |
-
top_p=0.9,
|
108 |
-
)
|
109 |
-
response = outputs[0][input_ids.shape[-1]:]
|
110 |
-
print(tokenizer.decode(response, skip_special_tokens=True))
|
111 |
```
|
112 |
|
113 |
# 4. Examples
|
|
|
11 |
- orpo
|
12 |
---
|
13 |
|
|
|
14 |
|
15 |
🔥 This repo contains the official q4_0 GGUF files for [shenzhi-wang/Llama3-70B-Chinese-Chat](https://huggingface.co/shenzhi-wang/Llama3-70B-Chinese-Chat).
|
16 |
|
|
|
81 |
# 3. Usage
|
82 |
|
83 |
```python
|
84 |
+
from llama_cpp import Llama
|
85 |
|
86 |
+
model = Llama(
|
87 |
+
"/Your/Path/To/GGUF/File",
|
88 |
+
verbose=False,
|
89 |
+
n_gpu_layers=-1,
|
|
|
90 |
)
|
91 |
|
92 |
+
system_prompt = "You are a helpful assistant."
|
93 |
+
|
94 |
+
def generate_reponse(_model, _messages, _max_tokens=8192):
|
95 |
+
_output = _model.create_chat_completion(
|
96 |
+
_messages,
|
97 |
+
stop=["<|eot_id|>", "<|end_of_text|>"],
|
98 |
+
max_tokens=_max_tokens,
|
99 |
+
)["choices"][0]["message"]["content"]
|
100 |
+
return _output
|
101 |
+
|
102 |
+
# The following are some examples
|
103 |
+
|
104 |
messages = [
|
105 |
+
{
|
106 |
+
"role": "system",
|
107 |
+
"content": system_prompt,
|
108 |
+
},
|
109 |
{"role": "user", "content": "写一首诗吧"},
|
110 |
]
|
111 |
|
112 |
+
print(generate_reponse(model, messages))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
```
|
114 |
|
115 |
# 4. Examples
|