Update README.md
Browse files
README.md
CHANGED
@@ -99,7 +99,6 @@ def preprocess(
|
|
99 |
# Apply prompt templates
|
100 |
input_ids, attention_masks = [], []
|
101 |
for i, source in enumerate(sources):
|
102 |
-
## system_message
|
103 |
messages = [
|
104 |
{"role": "user",
|
105 |
"content": "\n\n".join(source)}
|
@@ -109,6 +108,7 @@ def preprocess(
|
|
109 |
input_id = model_inputs['input_ids'][0]
|
110 |
attention_mask = model_inputs['attention_mask'][0]
|
111 |
if len(input_id) > max_len:
|
|
|
112 |
diff = len(input_id) - max_len
|
113 |
input_id = input_id[:-5-diff] + input_id[-5:]
|
114 |
attention_mask = attention_mask[:-5-diff] + attention_mask[-5:]
|
|
|
99 |
# Apply prompt templates
|
100 |
input_ids, attention_masks = [], []
|
101 |
for i, source in enumerate(sources):
|
|
|
102 |
messages = [
|
103 |
{"role": "user",
|
104 |
"content": "\n\n".join(source)}
|
|
|
108 |
input_id = model_inputs['input_ids'][0]
|
109 |
attention_mask = model_inputs['attention_mask'][0]
|
110 |
if len(input_id) > max_len:
|
111 |
+
## last five tokens: <|im_end|>(151645), \n(198), <|im_start|>(151644), assistant(77091), \n(198)
|
112 |
diff = len(input_id) - max_len
|
113 |
input_id = input_id[:-5-diff] + input_id[-5:]
|
114 |
attention_mask = attention_mask[:-5-diff] + attention_mask[-5:]
|