indiejoseph commited on
Commit
cc54c06
1 Parent(s): 1ac3d28

Training in progress, step 100

Browse files
config.json CHANGED
@@ -25,5 +25,5 @@
25
  "torch_dtype": "bfloat16",
26
  "transformers_version": "4.40.2",
27
  "use_cache": false,
28
- "vocab_size": 67264
29
  }
 
25
  "torch_dtype": "bfloat16",
26
  "transformers_version": "4.40.2",
27
  "use_cache": false,
28
+ "vocab_size": 67456
29
  }
special_tokens_map.json CHANGED
@@ -1,29 +1,36 @@
1
  {
 
 
 
 
 
 
 
2
  "bos_token": {
3
  "content": "<|startoftext|>",
4
  "lstrip": false,
5
- "normalized": true,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "eos_token": {
10
  "content": "<|endoftext|>",
11
  "lstrip": false,
12
- "normalized": true,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "pad_token": {
17
  "content": "<unk>",
18
  "lstrip": false,
19
- "normalized": true,
20
  "rstrip": false,
21
  "single_word": false
22
  },
23
  "unk_token": {
24
  "content": "<unk>",
25
  "lstrip": false,
26
- "normalized": true,
27
  "rstrip": false,
28
  "single_word": false
29
  }
 
1
  {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|Human|>",
6
+ "<|Assistant|>",
7
+ "<|System|>"
8
+ ],
9
  "bos_token": {
10
  "content": "<|startoftext|>",
11
  "lstrip": false,
12
+ "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "eos_token": {
17
  "content": "<|endoftext|>",
18
  "lstrip": false,
19
+ "normalized": false,
20
  "rstrip": false,
21
  "single_word": false
22
  },
23
  "pad_token": {
24
  "content": "<unk>",
25
  "lstrip": false,
26
+ "normalized": false,
27
  "rstrip": false,
28
  "single_word": false
29
  },
30
  "unk_token": {
31
  "content": "<unk>",
32
  "lstrip": false,
33
+ "normalized": false,
34
  "rstrip": false,
35
  "single_word": false
36
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:386c49cf943d71aa110361135338c50e38beeff0a66593480421f37b319e1a39
3
- size 1033105
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:701049bc2838520e8fb6561477505175af8d06be4952e321e32dbf8e9bb5c01a
3
+ size 1083475
tokenizer_config.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cfdd030a8208cf8c9173609e5ac4fab5b8b28eb906afae280afada0ac13990e
3
- size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea4f6392478906c1907983d5a47aabb917753dd4e2aad9f36addcc2ec38ce27b
3
+ size 5048