euclaise commited on
Commit
3842fa9
1 Parent(s): 5b3c04b

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +15 -0
README.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ```
2
+ pre_text = "The following is an interaction between a user and an AI assistant that is related to the above text."
3
+ def ds_map_fn(row):
4
+ input = f"[[[Title]]] {row['title'].strip()}\n[[[Content]]] {row['context'].strip()}\n\n" + pre_text + "\n\n[[[User]]] "
5
+ output = f"{row['question'].strip()}\n[[[Assistant]]] {row['answer'].strip()}"
6
+
7
+ input = tokenizer.encode(input, add_special_tokens=False)
8
+ output = tokenizer.encode(output, add_special_tokens=False)
9
+
10
+ input_ids = input + output + [tokenizer.eos_token_id]
11
+ labels = [-100]*len(input) + output + [tokenizer.eos_token_id]
12
+
13
+ return {'input_ids': input_ids, 'labels': labels}
14
+ ds = ds.map(ds_map_fn, remove_columns=ds.column_names)
15
+ ```