File size: 1,686 Bytes
dc6c577
 
 
 
 
 
af62b73
dc6c577
 
 
 
 
 
 
 
af62b73
be9d154
dc6c577
 
 
 
 
 
be9d154
af62b73
be9d154
dc6c577
 
 
be9d154
 
dc6c577
be9d154
af62b73
be9d154
dc6c577
 
 
be9d154
 
dc6c577
be9d154
dc6c577
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e16c918
dc6c577
 
 
 
 
e7e81f8
dc6c577
210ee5b
236f06d
 
 
 
 
e7e81f8
51e2ff0
be9d154
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
{
  "version": "1.0",
  "truncation": null,
  "padding": null,
  "added_tokens": [
    {
      "id": 1,
      "content": "<|endoftext|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": false,
      "normalized": false,
      "special": true
    },
    {
      "id": 0,
      "content": "<|beginoftext|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": false,
      "normalized": false,
      "special": true
    },
        {
      "id": 2,
      "content": "<|unknown|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": false,
      "normalized": false,
      "special": true
    },
        {
      "id": 1,
      "content": "<|padding|>",
      "single_word": false,
      "lstrip": false,
      "rstrip": false,
      "normalized": false,
      "special": true
    }
    
  ],
  "normalizer": {
    "type": "NFC"
  },
  "pre_tokenizer": {
    "type": "ByteLevel",
    "add_prefix_space": false,
    "trim_offsets": true,
    "use_regex": true
  },
  "post_processor": {
    "type": "ByteLevel",
    "add_prefix_space": false,
    "trim_offsets": true,
    "use_regex": true
  },
  "decoder": {
    "type": "ByteLevel",
    "add_prefix_space": false,
    "trim_offsets": true,
    "use_regex": true
  },
  "model": {
    "type": "BPE",
    "dropout": null,
    "unk_token": "<|unknown|>",
    "continuing_subword_prefix": null,
    "end_of_word_suffix": null,
    "fuse_unk": false,
    "byte_fallback": false,
    "vocab": {
      "<|endoftext|>": 1,
      "<|padding|>": 1,
      "<|unknown|>" : 2, 
      "a": 3,
      "c": 4,
      "g": 5,
      "t": 6,
      "n": 7, 
      "<|beginoftext|>" : 0
    }, "merges":[] }}