facat commited on
Commit
85ab8ff
1 Parent(s): 3c06b51

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +21 -12
README.md CHANGED
@@ -13,7 +13,11 @@ pipeline_tag: text-generation
13
 
14
  # 🐷SUS-Chat: Instruction tuning done right
15
 
 
 
 
16
 
 
17
 
18
  <div align="center">
19
 
@@ -83,14 +87,15 @@ pipeline_tag: text-generation
83
  # Inrtoduction
84
 
85
  <img src="https://hackmd.io/_uploads/HJlDtzhBa.png" id="fig-sus"
86
- alt="Figure 1: DALL·E 2023-12-01 11.03.28 - An imposing, majestic wild boar combined with elements of a futuristic transformer robot. The boar itself should be intricately blended with these tra" />
87
 
88
  **SUS-Chat** is a 34B bilingual Chinese-English dialogue model, jointly
89
  released by the **Southern University of Science and Technology** and
90
- **Cognitive Computing and Natural Language Center of International Digital Economy Academy (IDEA-CCNL)**. The SUS-Chat-34B model has
91
- been fine-tuned on millions of high-quality, multilingual instruction
92
- data. While maintaining the strong language capabilities of the base
93
- model, the SUS-Chat-34B model has improved the model’s response to human
 
94
  instructions through high-quality instruction fine-tuning and excels at
95
  imitating human thought processes through chains of thought. It
96
  introduces inter-instruction attention sharing in long texts, expanding
@@ -142,7 +147,7 @@ similar scale and achieved the most advanced comprehensive performance.
142
 
143
  <img
144
  src="https://github.com/SUSTech-IDEA/SUS-Chat/raw/main/assets/radar.png"
145
- id="fig-bench" alt="Figure 2: Benchmark" />
146
 
147
  # Usage
148
 
@@ -174,10 +179,12 @@ model = AutoModelForCausalLM.from_pretrained(
174
 
175
  messages = [{"role": "user", "content": "hi"}]
176
 
177
- input_ids = tokenizer.encode(chat_template(messages), return_tensors="pt").to("cuda")
178
- output_ids = model.generate(input_ids.to("cuda"))
 
 
179
  response = tokenizer.decode(
180
- output_ids[0][input_ids.shape[1] :], skip_special_tokens=True
181
  )
182
 
183
  messages.append({"role": "assistant", "content": response})
@@ -186,10 +193,12 @@ messages.append({"role": "assistant", "content": response})
186
 
187
  messages.append({"role": "user", "content": "What is the capital of China?"})
188
 
189
- input_ids = tokenizer.encode(chat_template(messages), return_tensors="pt").to("cuda")
190
- output_ids = model.generate(input_ids.to("cuda"))
 
 
191
  response = tokenizer.decode(
192
- output_ids[0][input_ids.shape[1] :], skip_special_tokens=True
193
  )
194
 
195
  messages.append({"role": "assistant", "content": response})
 
13
 
14
  # 🐷SUS-Chat: Instruction tuning done right
15
 
16
+ <p align="left">
17
+ <a href="README_CN.md">中文</a>&nbsp | &nbspEnglish&nbsp
18
+ </p>
19
 
20
+ <br><br>
21
 
22
  <div align="center">
23
 
 
87
  # Inrtoduction
88
 
89
  <img src="https://hackmd.io/_uploads/HJlDtzhBa.png" id="fig-sus"
90
+ alt="Figure 1: DALL·E 2023-12-01 11.03.28 - An imposing, majestic wild boar combined with elements of a futuristic transformer robot. The boar itself should be intricately blended with these tra" />
91
 
92
  **SUS-Chat** is a 34B bilingual Chinese-English dialogue model, jointly
93
  released by the **Southern University of Science and Technology** and
94
+ **Cognitive Computing and Natural Language Center of International
95
+ Digital Economy Academy (IDEA-CCNL)**. The SUS-Chat-34B model has been
96
+ fine-tuned on millions of high-quality, multilingual instruction data.
97
+ While maintaining the strong language capabilities of the base model,
98
+ the SUS-Chat-34B model has improved the model’s response to human
99
  instructions through high-quality instruction fine-tuning and excels at
100
  imitating human thought processes through chains of thought. It
101
  introduces inter-instruction attention sharing in long texts, expanding
 
147
 
148
  <img
149
  src="https://github.com/SUSTech-IDEA/SUS-Chat/raw/main/assets/radar.png"
150
+ id="fig-bench" alt="Figure 2: Benchmark" />
151
 
152
  # Usage
153
 
 
179
 
180
  messages = [{"role": "user", "content": "hi"}]
181
 
182
+ input_ids = tokenizer.encode(
183
+ chat_template(messages), return_tensors="pt", add_special_tokens=False
184
+ ).to("cuda")
185
+ output_ids = model.generate(input_ids.to("cuda"), max_length=256)
186
  response = tokenizer.decode(
187
+ output_ids[0][input_ids.shape[1] :], skip_special_tokens=False
188
  )
189
 
190
  messages.append({"role": "assistant", "content": response})
 
193
 
194
  messages.append({"role": "user", "content": "What is the capital of China?"})
195
 
196
+ input_ids = tokenizer.encode(
197
+ chat_template(messages), return_tensors="pt", add_special_tokens=False
198
+ ).to("cuda")
199
+ output_ids = model.generate(input_ids.to("cuda"), max_length=256)
200
  response = tokenizer.decode(
201
+ output_ids[0][input_ids.shape[1] :], skip_special_tokens=False
202
  )
203
 
204
  messages.append({"role": "assistant", "content": response})