|
{ |
|
"version": "1.0", |
|
"truncation": null, |
|
"padding": null, |
|
"added_tokens": [ |
|
{ |
|
"id": 0, |
|
"content": "<UNK>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 1, |
|
"content": "<PAD>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 2, |
|
"content": "<CLS>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 3, |
|
"content": "<SEP>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 4, |
|
"content": "<MASK>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 5, |
|
"content": "<EOS>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 6, |
|
"content": "<MOLECULAR_ENTITY>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 7, |
|
"content": "<GLOBAL_INTERACTION_ATTRIBUTES>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 8, |
|
"content": "<MOLECULAR_ENTITY_ANTIGEN>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 9, |
|
"content": "<MOLECULAR_ENTITY_EPITOPE>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 10, |
|
"content": "<MOLECULAR_ENTITY_ANTIBODY_HEAVY_CHAIN>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 11, |
|
"content": "<MOLECULAR_ENTITY_ANTIBODY_LIGHT_CHAIN>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 12, |
|
"content": "<MOLECULAR_ENTITY_TCR_ALPHA_CHAIN>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 13, |
|
"content": "<MOLECULAR_ENTITY_TCR_BETA_VDJ>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 14, |
|
"content": "<MOLECULAR_ENTITY_TCR_BETA_CDR3>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 15, |
|
"content": "<BINDING_AFFINITY_CLASS>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 16, |
|
"content": "<DECODER_START>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 17, |
|
"content": "<BINDING>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 18, |
|
"content": "<FILLIN>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 19, |
|
"content": "<REORDER>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 20, |
|
"content": "<TOAA>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 21, |
|
"content": "<ACTIVE>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 22, |
|
"content": "<GENESEQ>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 23, |
|
"content": "<INCREASE>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 24, |
|
"content": "<DECREASE>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 25, |
|
"content": "<STRUCTURE>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 26, |
|
"content": "<DISTANCE>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 27, |
|
"content": "<SOLUBILITY>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 28, |
|
"content": "<TOXICITY>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 29, |
|
"content": "<AB>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 30, |
|
"content": "<ISACTIVE>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 31, |
|
"content": "<ISSYNTHETIC>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 32, |
|
"content": "<PENETR>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 33, |
|
"content": "<ABSORPTION>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 34, |
|
"content": "<DISTRIBUTION>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 35, |
|
"content": "<METABOLISM>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 36, |
|
"content": "<EXCRETION>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 37, |
|
"content": "<FLUORESCENCE>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 38, |
|
"content": "<STABILITY>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 39, |
|
"content": "<DISORDER>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 40, |
|
"content": "<DISEASE>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 41, |
|
"content": "<BINARY>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 42, |
|
"content": "<REGRESSION>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 43, |
|
"content": "<ORGANISM>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 44, |
|
"content": "<0>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 45, |
|
"content": "<1>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 46, |
|
"content": "<2>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 47, |
|
"content": "<3>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 48, |
|
"content": "<4>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 49, |
|
"content": "<5>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 50, |
|
"content": "<6>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 51, |
|
"content": "<7>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 52, |
|
"content": "<8>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 53, |
|
"content": "<9>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 54, |
|
"content": "<.>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 55, |
|
"content": "<YES>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 56, |
|
"content": "<NO>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 57, |
|
"content": "<SENTINEL_ID_0>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 58, |
|
"content": "<SENTINEL_ID_1>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 59, |
|
"content": "<SENTINEL_ID_2>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 60, |
|
"content": "<SENTINEL_ID_3>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 61, |
|
"content": "<SENTINEL_ID_4>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 62, |
|
"content": "<SENTINEL_ID_5>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 63, |
|
"content": "<SENTINEL_ID_6>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 64, |
|
"content": "<SENTINEL_ID_7>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 65, |
|
"content": "<SENTINEL_ID_8>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 66, |
|
"content": "<SENTINEL_ID_9>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 67, |
|
"content": "<SENTINEL_ID_10>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 68, |
|
"content": "<SENTINEL_ID_11>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 69, |
|
"content": "<SENTINEL_ID_12>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 70, |
|
"content": "<SENTINEL_ID_13>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 71, |
|
"content": "<SENTINEL_ID_14>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 72, |
|
"content": "<SENTINEL_ID_15>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 73, |
|
"content": "<SENTINEL_ID_16>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 74, |
|
"content": "<SENTINEL_ID_17>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 75, |
|
"content": "<SENTINEL_ID_18>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 76, |
|
"content": "<SENTINEL_ID_19>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 77, |
|
"content": "<SENTINEL_ID_20>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 78, |
|
"content": "<SENTINEL_ID_21>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 79, |
|
"content": "<SENTINEL_ID_22>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 80, |
|
"content": "<SENTINEL_ID_23>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 81, |
|
"content": "<SENTINEL_ID_24>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 82, |
|
"content": "<SENTINEL_ID_25>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 83, |
|
"content": "<SENTINEL_ID_26>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 84, |
|
"content": "<SENTINEL_ID_27>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 85, |
|
"content": "<SENTINEL_ID_28>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 86, |
|
"content": "<SENTINEL_ID_29>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 87, |
|
"content": "<SENTINEL_ID_30>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 88, |
|
"content": "<SENTINEL_ID_31>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 89, |
|
"content": "<SENTINEL_ID_32>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 90, |
|
"content": "<SENTINEL_ID_33>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 91, |
|
"content": "<SENTINEL_ID_34>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 92, |
|
"content": "<SENTINEL_ID_35>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 93, |
|
"content": "<SENTINEL_ID_36>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 94, |
|
"content": "<SENTINEL_ID_37>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 95, |
|
"content": "<SENTINEL_ID_38>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 96, |
|
"content": "<SENTINEL_ID_39>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 97, |
|
"content": "<SENTINEL_ID_40>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 98, |
|
"content": "<SENTINEL_ID_41>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 99, |
|
"content": "<SENTINEL_ID_42>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 100, |
|
"content": "<SENTINEL_ID_43>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 101, |
|
"content": "<SENTINEL_ID_44>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 102, |
|
"content": "<SENTINEL_ID_45>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 103, |
|
"content": "<SENTINEL_ID_46>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 104, |
|
"content": "<SENTINEL_ID_47>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 105, |
|
"content": "<SENTINEL_ID_48>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 106, |
|
"content": "<SENTINEL_ID_49>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 107, |
|
"content": "<SENTINEL_ID_50>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 108, |
|
"content": "<SENTINEL_ID_51>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 109, |
|
"content": "<SENTINEL_ID_52>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 110, |
|
"content": "<SENTINEL_ID_53>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 111, |
|
"content": "<SENTINEL_ID_54>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 112, |
|
"content": "<SENTINEL_ID_55>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 113, |
|
"content": "<SENTINEL_ID_56>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 114, |
|
"content": "<SENTINEL_ID_57>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 115, |
|
"content": "<SENTINEL_ID_58>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 116, |
|
"content": "<SENTINEL_ID_59>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 117, |
|
"content": "<SENTINEL_ID_60>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 118, |
|
"content": "<SENTINEL_ID_61>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 119, |
|
"content": "<SENTINEL_ID_62>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 120, |
|
"content": "<SENTINEL_ID_63>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 121, |
|
"content": "<SENTINEL_ID_64>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 122, |
|
"content": "<SENTINEL_ID_65>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 123, |
|
"content": "<SENTINEL_ID_66>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 124, |
|
"content": "<SENTINEL_ID_67>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 125, |
|
"content": "<SENTINEL_ID_68>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 126, |
|
"content": "<SENTINEL_ID_69>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 127, |
|
"content": "<SENTINEL_ID_70>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 128, |
|
"content": "<SENTINEL_ID_71>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 129, |
|
"content": "<SENTINEL_ID_72>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 130, |
|
"content": "<SENTINEL_ID_73>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 131, |
|
"content": "<SENTINEL_ID_74>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 132, |
|
"content": "<SENTINEL_ID_75>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 133, |
|
"content": "<SENTINEL_ID_76>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 134, |
|
"content": "<SENTINEL_ID_77>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 135, |
|
"content": "<SENTINEL_ID_78>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 136, |
|
"content": "<SENTINEL_ID_79>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 137, |
|
"content": "<SENTINEL_ID_80>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 138, |
|
"content": "<SENTINEL_ID_81>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 139, |
|
"content": "<SENTINEL_ID_82>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 140, |
|
"content": "<SENTINEL_ID_83>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 141, |
|
"content": "<SENTINEL_ID_84>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 142, |
|
"content": "<SENTINEL_ID_85>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 143, |
|
"content": "<SENTINEL_ID_86>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 144, |
|
"content": "<SENTINEL_ID_87>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 145, |
|
"content": "<SENTINEL_ID_88>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 146, |
|
"content": "<SENTINEL_ID_89>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 147, |
|
"content": "<SENTINEL_ID_90>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 148, |
|
"content": "<SENTINEL_ID_91>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 149, |
|
"content": "<SENTINEL_ID_92>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 150, |
|
"content": "<SENTINEL_ID_93>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 151, |
|
"content": "<SENTINEL_ID_94>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 152, |
|
"content": "<SENTINEL_ID_95>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 153, |
|
"content": "<SENTINEL_ID_96>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 154, |
|
"content": "<SENTINEL_ID_97>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 155, |
|
"content": "<SENTINEL_ID_98>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 156, |
|
"content": "<SENTINEL_ID_99>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 157, |
|
"content": "<SENTINEL_ID_100>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 158, |
|
"content": "<SENTINEL_ID_101>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 159, |
|
"content": "<SENTINEL_ID_102>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 160, |
|
"content": "<SENTINEL_ID_103>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 161, |
|
"content": "<SENTINEL_ID_104>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 162, |
|
"content": "<SENTINEL_ID_105>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 163, |
|
"content": "<SENTINEL_ID_106>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 164, |
|
"content": "<SENTINEL_ID_107>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 165, |
|
"content": "<SENTINEL_ID_108>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 166, |
|
"content": "<SENTINEL_ID_109>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 167, |
|
"content": "<SENTINEL_ID_110>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 168, |
|
"content": "<SENTINEL_ID_111>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 169, |
|
"content": "<SENTINEL_ID_112>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 170, |
|
"content": "<SENTINEL_ID_113>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 171, |
|
"content": "<SENTINEL_ID_114>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 172, |
|
"content": "<SENTINEL_ID_115>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 173, |
|
"content": "<SENTINEL_ID_116>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 174, |
|
"content": "<SENTINEL_ID_117>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 175, |
|
"content": "<SENTINEL_ID_118>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 176, |
|
"content": "<SENTINEL_ID_119>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 177, |
|
"content": "<SENTINEL_ID_120>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 178, |
|
"content": "<SENTINEL_ID_121>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 179, |
|
"content": "<SENTINEL_ID_122>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 180, |
|
"content": "<SENTINEL_ID_123>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 181, |
|
"content": "<SENTINEL_ID_124>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 182, |
|
"content": "<SENTINEL_ID_125>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 183, |
|
"content": "<SENTINEL_ID_126>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 184, |
|
"content": "<SENTINEL_ID_127>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 185, |
|
"content": "<SENTINEL_ID_128>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 186, |
|
"content": "<SENTINEL_ID_129>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 187, |
|
"content": "<SENTINEL_ID_130>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 188, |
|
"content": "<SENTINEL_ID_131>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 189, |
|
"content": "<SENTINEL_ID_132>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 190, |
|
"content": "<SENTINEL_ID_133>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 191, |
|
"content": "<SENTINEL_ID_134>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 192, |
|
"content": "<SENTINEL_ID_135>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 193, |
|
"content": "<SENTINEL_ID_136>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 194, |
|
"content": "<SENTINEL_ID_137>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 195, |
|
"content": "<SENTINEL_ID_138>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 196, |
|
"content": "<SENTINEL_ID_139>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 197, |
|
"content": "<SENTINEL_ID_140>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 198, |
|
"content": "<SENTINEL_ID_141>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 199, |
|
"content": "<SENTINEL_ID_142>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 200, |
|
"content": "<SENTINEL_ID_143>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 201, |
|
"content": "<SENTINEL_ID_144>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 202, |
|
"content": "<SENTINEL_ID_145>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 203, |
|
"content": "<SENTINEL_ID_146>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 204, |
|
"content": "<SENTINEL_ID_147>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 205, |
|
"content": "<SENTINEL_ID_148>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 206, |
|
"content": "<SENTINEL_ID_149>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 207, |
|
"content": "<SENTINEL_ID_150>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 208, |
|
"content": "<SENTINEL_ID_151>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 209, |
|
"content": "<SENTINEL_ID_152>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 210, |
|
"content": "<SENTINEL_ID_153>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 211, |
|
"content": "<SENTINEL_ID_154>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 212, |
|
"content": "<SENTINEL_ID_155>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 213, |
|
"content": "<SENTINEL_ID_156>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 214, |
|
"content": "<SENTINEL_ID_157>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 215, |
|
"content": "<SENTINEL_ID_158>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 216, |
|
"content": "<SENTINEL_ID_159>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 217, |
|
"content": "<SENTINEL_ID_160>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 218, |
|
"content": "<SENTINEL_ID_161>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 219, |
|
"content": "<SENTINEL_ID_162>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 220, |
|
"content": "<SENTINEL_ID_163>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 221, |
|
"content": "<SENTINEL_ID_164>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 222, |
|
"content": "<SENTINEL_ID_165>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 223, |
|
"content": "<SENTINEL_ID_166>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 224, |
|
"content": "<SENTINEL_ID_167>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 225, |
|
"content": "<SENTINEL_ID_168>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 226, |
|
"content": "<SENTINEL_ID_169>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 227, |
|
"content": "<SENTINEL_ID_170>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 228, |
|
"content": "<SENTINEL_ID_171>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 229, |
|
"content": "<SENTINEL_ID_172>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 230, |
|
"content": "<SENTINEL_ID_173>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 231, |
|
"content": "<SENTINEL_ID_174>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 232, |
|
"content": "<SENTINEL_ID_175>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 233, |
|
"content": "<SENTINEL_ID_176>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 234, |
|
"content": "<SENTINEL_ID_177>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 235, |
|
"content": "<SENTINEL_ID_178>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 236, |
|
"content": "<SENTINEL_ID_179>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 237, |
|
"content": "<SENTINEL_ID_180>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 238, |
|
"content": "<SENTINEL_ID_181>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 239, |
|
"content": "<SENTINEL_ID_182>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 240, |
|
"content": "<SENTINEL_ID_183>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 241, |
|
"content": "<SENTINEL_ID_184>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 242, |
|
"content": "<SENTINEL_ID_185>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 243, |
|
"content": "<SENTINEL_ID_186>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 244, |
|
"content": "<SENTINEL_ID_187>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 245, |
|
"content": "<SENTINEL_ID_188>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 246, |
|
"content": "<SENTINEL_ID_189>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 247, |
|
"content": "<SENTINEL_ID_190>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 248, |
|
"content": "<SENTINEL_ID_191>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 249, |
|
"content": "<SENTINEL_ID_192>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 250, |
|
"content": "<SENTINEL_ID_193>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 251, |
|
"content": "<SENTINEL_ID_194>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 252, |
|
"content": "<SENTINEL_ID_195>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 253, |
|
"content": "<SENTINEL_ID_196>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 254, |
|
"content": "<SENTINEL_ID_197>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 255, |
|
"content": "<SENTINEL_ID_198>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 256, |
|
"content": "<SENTINEL_ID_199>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 257, |
|
"content": "<MOLECULAR_ENTITY_TYPE_ANTIGEN>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 258, |
|
"content": "<MOLECULAR_ENTITY_TYPE_ANTIBODY_LIGHT_CHAIN>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 259, |
|
"content": "<MOLECULAR_ENTITY_TYPE_ANTIBODY_HEAVY_CHAIN>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 260, |
|
"content": "<ATTRIBUTE_ORGANISM>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 261, |
|
"content": "<ATTRIBUTE_ORGANISM_HUMAN>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 262, |
|
"content": "<ATTRIBUTE_ORGANISM_RABBIT>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 263, |
|
"content": "<ATTRIBUTE_ORGANISM_RAT>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 264, |
|
"content": "<ATTRIBUTE_ORGANISM_MOUSE>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 265, |
|
"content": "<ATTRIBUTE_ORGANISM_MONKEY>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 266, |
|
"content": "<ATTRIBUTE_ORGANISM_CAMEL>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 267, |
|
"content": "<EPITOPE_PARATOPE_PREDICTION>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 268, |
|
"content": "<MOLECULAR_ENTITY_ANTIBODY_HEAVY_CHAIN_CDR1>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 269, |
|
"content": "<MOLECULAR_ENTITY_ANTIBODY_LIGHT_CHAIN_CDR3>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 270, |
|
"content": "<MOLECULAR_ENTITY_ANTIBODY_HEAVY_CHAIN_CDR3>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 271, |
|
"content": "<MOLECULAR_ENTITY_ANTIBODY_LIGHT_CHAIN_CDR2>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 272, |
|
"content": "<MOLECULAR_ENTITY_ANTIBODY_HEAVY_CHAIN_CDR2>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 273, |
|
"content": "<MOLECULAR_ENTITY_ANTIBODY_LIGHT_CHAIN_CDR1>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 274, |
|
"content": "<MOLECULAR_ENTITY_GENERAL_PROTEIN>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 275, |
|
"content": "<TIMESTEP>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 276, |
|
"content": "<DIFFUSION>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 277, |
|
"content": "<SEQUENCE_NATURAL_END>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 278, |
|
"content": "<SMILES_SEQUENCE>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 279, |
|
"content": "<SELFIES_SEQUENCE>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 280, |
|
"content": "<AMINO_ACID_SEQUENCE>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 281, |
|
"content": "<GENERAL_AFFINITY_CLASS>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 282, |
|
"content": "<BACKSPACE>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 283, |
|
"content": "<SEQUENCE_NATURAL_START>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 284, |
|
"content": "<NOOP>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 285, |
|
"content": "<TARGETED_ANTIBODY_DESIGN_ENCODER_ONLY_MODE>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 286, |
|
"content": "<MOLECULAR_ENTITY_SMALL_MOLECULE>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 287, |
|
"content": "<MOLECULAR_ENTITY_CELL_GENE_EXPRESSION_RANKED>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 288, |
|
"content": "<CELL_TYPE_CLASS>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 289, |
|
"content": "<TISSUE_TYPE_CLASS>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 290, |
|
"content": "<CORRUPTED_AREA_START>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 291, |
|
"content": "<CORRUPTED_AREA_END>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 292, |
|
"content": "<MOLECULAR_ENTITY_MUTATED_PROTEIN_CHAIN>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 293, |
|
"content": "<MOLECULAR_ENTITY_PROTEIN_CHAIN>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 294, |
|
"content": "<COMPLEX_ENTITY>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 295, |
|
"content": "<ALTERNATIVE>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 296, |
|
"content": "<CDR3_REGION>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 297, |
|
"content": "<GENERAL_CHAIN>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 298, |
|
"content": "<SUBMOLECULAR_ENTITY>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 299, |
|
"content": "<MUTATED>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 300, |
|
"content": "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 301, |
|
"content": "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 302, |
|
"content": "<MOLECULAR_ENTITY_TCR_DELTA_VAR>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 303, |
|
"content": "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 304, |
|
"content": "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 305, |
|
"content": "<SCALAR>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 306, |
|
"content": "<VECTOR>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 307, |
|
"content": "<MASKED_SCALAR>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 308, |
|
"content": "<MASKED_VECTOR>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 309, |
|
"content": "<AUTOENCODER_LATENT_LOG_VARIANCE>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 310, |
|
"content": "<AUTOENCODER_LATENT_MEAN>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 311, |
|
"content": "<AUTOENCODER_LATENT_SAMPLED_Z>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 312, |
|
"content": "<AUTOENCODER_TASK>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 313, |
|
"content": "<DECODED_FROM_LATENT>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
} |
|
], |
|
"normalizer": null, |
|
"pre_tokenizer": { |
|
"type": "Sequence", |
|
"pretokenizers": [ |
|
{ |
|
"type": "Split", |
|
"pattern": { |
|
"Regex": "<.*?>|\\S" |
|
}, |
|
"behavior": "Removed", |
|
"invert": true |
|
} |
|
] |
|
}, |
|
"post_processor": null, |
|
"decoder": null, |
|
"model": { |
|
"type": "WordLevel", |
|
"vocab": { |
|
"<UNK>": 0, |
|
"<PAD>": 1, |
|
"<CLS>": 2, |
|
"<SEP>": 3, |
|
"<MASK>": 4, |
|
"<EOS>": 5, |
|
"<MOLECULAR_ENTITY>": 6, |
|
"<GLOBAL_INTERACTION_ATTRIBUTES>": 7, |
|
"<MOLECULAR_ENTITY_ANTIGEN>": 8, |
|
"<MOLECULAR_ENTITY_EPITOPE>": 9, |
|
"<MOLECULAR_ENTITY_ANTIBODY_HEAVY_CHAIN>": 10, |
|
"<MOLECULAR_ENTITY_ANTIBODY_LIGHT_CHAIN>": 11, |
|
"<MOLECULAR_ENTITY_TCR_ALPHA_CHAIN>": 12, |
|
"<MOLECULAR_ENTITY_TCR_BETA_VDJ>": 13, |
|
"<MOLECULAR_ENTITY_TCR_BETA_CDR3>": 14, |
|
"<BINDING_AFFINITY_CLASS>": 15, |
|
"<DECODER_START>": 16, |
|
"<BINDING>": 17, |
|
"<FILLIN>": 18, |
|
"<REORDER>": 19, |
|
"<TOAA>": 20, |
|
"<ACTIVE>": 21, |
|
"<GENESEQ>": 22, |
|
"<INCREASE>": 23, |
|
"<DECREASE>": 24, |
|
"<STRUCTURE>": 25, |
|
"<DISTANCE>": 26, |
|
"<SOLUBILITY>": 27, |
|
"<TOXICITY>": 28, |
|
"<AB>": 29, |
|
"<ISACTIVE>": 30, |
|
"<ISSYNTHETIC>": 31, |
|
"<PENETR>": 32, |
|
"<ABSORPTION>": 33, |
|
"<DISTRIBUTION>": 34, |
|
"<METABOLISM>": 35, |
|
"<EXCRETION>": 36, |
|
"<FLUORESCENCE>": 37, |
|
"<STABILITY>": 38, |
|
"<DISORDER>": 39, |
|
"<DISEASE>": 40, |
|
"<BINARY>": 41, |
|
"<REGRESSION>": 42, |
|
"<ORGANISM>": 43, |
|
"<0>": 44, |
|
"<1>": 45, |
|
"<2>": 46, |
|
"<3>": 47, |
|
"<4>": 48, |
|
"<5>": 49, |
|
"<6>": 50, |
|
"<7>": 51, |
|
"<8>": 52, |
|
"<9>": 53, |
|
"<.>": 54, |
|
"<YES>": 55, |
|
"<NO>": 56, |
|
"<SENTINEL_ID_0>": 57, |
|
"<SENTINEL_ID_1>": 58, |
|
"<SENTINEL_ID_2>": 59, |
|
"<SENTINEL_ID_3>": 60, |
|
"<SENTINEL_ID_4>": 61, |
|
"<SENTINEL_ID_5>": 62, |
|
"<SENTINEL_ID_6>": 63, |
|
"<SENTINEL_ID_7>": 64, |
|
"<SENTINEL_ID_8>": 65, |
|
"<SENTINEL_ID_9>": 66, |
|
"<SENTINEL_ID_10>": 67, |
|
"<SENTINEL_ID_11>": 68, |
|
"<SENTINEL_ID_12>": 69, |
|
"<SENTINEL_ID_13>": 70, |
|
"<SENTINEL_ID_14>": 71, |
|
"<SENTINEL_ID_15>": 72, |
|
"<SENTINEL_ID_16>": 73, |
|
"<SENTINEL_ID_17>": 74, |
|
"<SENTINEL_ID_18>": 75, |
|
"<SENTINEL_ID_19>": 76, |
|
"<SENTINEL_ID_20>": 77, |
|
"<SENTINEL_ID_21>": 78, |
|
"<SENTINEL_ID_22>": 79, |
|
"<SENTINEL_ID_23>": 80, |
|
"<SENTINEL_ID_24>": 81, |
|
"<SENTINEL_ID_25>": 82, |
|
"<SENTINEL_ID_26>": 83, |
|
"<SENTINEL_ID_27>": 84, |
|
"<SENTINEL_ID_28>": 85, |
|
"<SENTINEL_ID_29>": 86, |
|
"<SENTINEL_ID_30>": 87, |
|
"<SENTINEL_ID_31>": 88, |
|
"<SENTINEL_ID_32>": 89, |
|
"<SENTINEL_ID_33>": 90, |
|
"<SENTINEL_ID_34>": 91, |
|
"<SENTINEL_ID_35>": 92, |
|
"<SENTINEL_ID_36>": 93, |
|
"<SENTINEL_ID_37>": 94, |
|
"<SENTINEL_ID_38>": 95, |
|
"<SENTINEL_ID_39>": 96, |
|
"<SENTINEL_ID_40>": 97, |
|
"<SENTINEL_ID_41>": 98, |
|
"<SENTINEL_ID_42>": 99, |
|
"<SENTINEL_ID_43>": 100, |
|
"<SENTINEL_ID_44>": 101, |
|
"<SENTINEL_ID_45>": 102, |
|
"<SENTINEL_ID_46>": 103, |
|
"<SENTINEL_ID_47>": 104, |
|
"<SENTINEL_ID_48>": 105, |
|
"<SENTINEL_ID_49>": 106, |
|
"<SENTINEL_ID_50>": 107, |
|
"<SENTINEL_ID_51>": 108, |
|
"<SENTINEL_ID_52>": 109, |
|
"<SENTINEL_ID_53>": 110, |
|
"<SENTINEL_ID_54>": 111, |
|
"<SENTINEL_ID_55>": 112, |
|
"<SENTINEL_ID_56>": 113, |
|
"<SENTINEL_ID_57>": 114, |
|
"<SENTINEL_ID_58>": 115, |
|
"<SENTINEL_ID_59>": 116, |
|
"<SENTINEL_ID_60>": 117, |
|
"<SENTINEL_ID_61>": 118, |
|
"<SENTINEL_ID_62>": 119, |
|
"<SENTINEL_ID_63>": 120, |
|
"<SENTINEL_ID_64>": 121, |
|
"<SENTINEL_ID_65>": 122, |
|
"<SENTINEL_ID_66>": 123, |
|
"<SENTINEL_ID_67>": 124, |
|
"<SENTINEL_ID_68>": 125, |
|
"<SENTINEL_ID_69>": 126, |
|
"<SENTINEL_ID_70>": 127, |
|
"<SENTINEL_ID_71>": 128, |
|
"<SENTINEL_ID_72>": 129, |
|
"<SENTINEL_ID_73>": 130, |
|
"<SENTINEL_ID_74>": 131, |
|
"<SENTINEL_ID_75>": 132, |
|
"<SENTINEL_ID_76>": 133, |
|
"<SENTINEL_ID_77>": 134, |
|
"<SENTINEL_ID_78>": 135, |
|
"<SENTINEL_ID_79>": 136, |
|
"<SENTINEL_ID_80>": 137, |
|
"<SENTINEL_ID_81>": 138, |
|
"<SENTINEL_ID_82>": 139, |
|
"<SENTINEL_ID_83>": 140, |
|
"<SENTINEL_ID_84>": 141, |
|
"<SENTINEL_ID_85>": 142, |
|
"<SENTINEL_ID_86>": 143, |
|
"<SENTINEL_ID_87>": 144, |
|
"<SENTINEL_ID_88>": 145, |
|
"<SENTINEL_ID_89>": 146, |
|
"<SENTINEL_ID_90>": 147, |
|
"<SENTINEL_ID_91>": 148, |
|
"<SENTINEL_ID_92>": 149, |
|
"<SENTINEL_ID_93>": 150, |
|
"<SENTINEL_ID_94>": 151, |
|
"<SENTINEL_ID_95>": 152, |
|
"<SENTINEL_ID_96>": 153, |
|
"<SENTINEL_ID_97>": 154, |
|
"<SENTINEL_ID_98>": 155, |
|
"<SENTINEL_ID_99>": 156, |
|
"<SENTINEL_ID_100>": 157, |
|
"<SENTINEL_ID_101>": 158, |
|
"<SENTINEL_ID_102>": 159, |
|
"<SENTINEL_ID_103>": 160, |
|
"<SENTINEL_ID_104>": 161, |
|
"<SENTINEL_ID_105>": 162, |
|
"<SENTINEL_ID_106>": 163, |
|
"<SENTINEL_ID_107>": 164, |
|
"<SENTINEL_ID_108>": 165, |
|
"<SENTINEL_ID_109>": 166, |
|
"<SENTINEL_ID_110>": 167, |
|
"<SENTINEL_ID_111>": 168, |
|
"<SENTINEL_ID_112>": 169, |
|
"<SENTINEL_ID_113>": 170, |
|
"<SENTINEL_ID_114>": 171, |
|
"<SENTINEL_ID_115>": 172, |
|
"<SENTINEL_ID_116>": 173, |
|
"<SENTINEL_ID_117>": 174, |
|
"<SENTINEL_ID_118>": 175, |
|
"<SENTINEL_ID_119>": 176, |
|
"<SENTINEL_ID_120>": 177, |
|
"<SENTINEL_ID_121>": 178, |
|
"<SENTINEL_ID_122>": 179, |
|
"<SENTINEL_ID_123>": 180, |
|
"<SENTINEL_ID_124>": 181, |
|
"<SENTINEL_ID_125>": 182, |
|
"<SENTINEL_ID_126>": 183, |
|
"<SENTINEL_ID_127>": 184, |
|
"<SENTINEL_ID_128>": 185, |
|
"<SENTINEL_ID_129>": 186, |
|
"<SENTINEL_ID_130>": 187, |
|
"<SENTINEL_ID_131>": 188, |
|
"<SENTINEL_ID_132>": 189, |
|
"<SENTINEL_ID_133>": 190, |
|
"<SENTINEL_ID_134>": 191, |
|
"<SENTINEL_ID_135>": 192, |
|
"<SENTINEL_ID_136>": 193, |
|
"<SENTINEL_ID_137>": 194, |
|
"<SENTINEL_ID_138>": 195, |
|
"<SENTINEL_ID_139>": 196, |
|
"<SENTINEL_ID_140>": 197, |
|
"<SENTINEL_ID_141>": 198, |
|
"<SENTINEL_ID_142>": 199, |
|
"<SENTINEL_ID_143>": 200, |
|
"<SENTINEL_ID_144>": 201, |
|
"<SENTINEL_ID_145>": 202, |
|
"<SENTINEL_ID_146>": 203, |
|
"<SENTINEL_ID_147>": 204, |
|
"<SENTINEL_ID_148>": 205, |
|
"<SENTINEL_ID_149>": 206, |
|
"<SENTINEL_ID_150>": 207, |
|
"<SENTINEL_ID_151>": 208, |
|
"<SENTINEL_ID_152>": 209, |
|
"<SENTINEL_ID_153>": 210, |
|
"<SENTINEL_ID_154>": 211, |
|
"<SENTINEL_ID_155>": 212, |
|
"<SENTINEL_ID_156>": 213, |
|
"<SENTINEL_ID_157>": 214, |
|
"<SENTINEL_ID_158>": 215, |
|
"<SENTINEL_ID_159>": 216, |
|
"<SENTINEL_ID_160>": 217, |
|
"<SENTINEL_ID_161>": 218, |
|
"<SENTINEL_ID_162>": 219, |
|
"<SENTINEL_ID_163>": 220, |
|
"<SENTINEL_ID_164>": 221, |
|
"<SENTINEL_ID_165>": 222, |
|
"<SENTINEL_ID_166>": 223, |
|
"<SENTINEL_ID_167>": 224, |
|
"<SENTINEL_ID_168>": 225, |
|
"<SENTINEL_ID_169>": 226, |
|
"<SENTINEL_ID_170>": 227, |
|
"<SENTINEL_ID_171>": 228, |
|
"<SENTINEL_ID_172>": 229, |
|
"<SENTINEL_ID_173>": 230, |
|
"<SENTINEL_ID_174>": 231, |
|
"<SENTINEL_ID_175>": 232, |
|
"<SENTINEL_ID_176>": 233, |
|
"<SENTINEL_ID_177>": 234, |
|
"<SENTINEL_ID_178>": 235, |
|
"<SENTINEL_ID_179>": 236, |
|
"<SENTINEL_ID_180>": 237, |
|
"<SENTINEL_ID_181>": 238, |
|
"<SENTINEL_ID_182>": 239, |
|
"<SENTINEL_ID_183>": 240, |
|
"<SENTINEL_ID_184>": 241, |
|
"<SENTINEL_ID_185>": 242, |
|
"<SENTINEL_ID_186>": 243, |
|
"<SENTINEL_ID_187>": 244, |
|
"<SENTINEL_ID_188>": 245, |
|
"<SENTINEL_ID_189>": 246, |
|
"<SENTINEL_ID_190>": 247, |
|
"<SENTINEL_ID_191>": 248, |
|
"<SENTINEL_ID_192>": 249, |
|
"<SENTINEL_ID_193>": 250, |
|
"<SENTINEL_ID_194>": 251, |
|
"<SENTINEL_ID_195>": 252, |
|
"<SENTINEL_ID_196>": 253, |
|
"<SENTINEL_ID_197>": 254, |
|
"<SENTINEL_ID_198>": 255, |
|
"<SENTINEL_ID_199>": 256, |
|
"<MOLECULAR_ENTITY_TYPE_ANTIGEN>": 257, |
|
"<MOLECULAR_ENTITY_TYPE_ANTIBODY_LIGHT_CHAIN>": 258, |
|
"<MOLECULAR_ENTITY_TYPE_ANTIBODY_HEAVY_CHAIN>": 259, |
|
"<ATTRIBUTE_ORGANISM>": 260, |
|
"<ATTRIBUTE_ORGANISM_HUMAN>": 261, |
|
"<ATTRIBUTE_ORGANISM_RABBIT>": 262, |
|
"<ATTRIBUTE_ORGANISM_RAT>": 263, |
|
"<ATTRIBUTE_ORGANISM_MOUSE>": 264, |
|
"<ATTRIBUTE_ORGANISM_MONKEY>": 265, |
|
"<ATTRIBUTE_ORGANISM_CAMEL>": 266, |
|
"<EPITOPE_PARATOPE_PREDICTION>": 267, |
|
"<MOLECULAR_ENTITY_ANTIBODY_HEAVY_CHAIN_CDR1>": 268, |
|
"<MOLECULAR_ENTITY_ANTIBODY_LIGHT_CHAIN_CDR3>": 269, |
|
"<MOLECULAR_ENTITY_ANTIBODY_HEAVY_CHAIN_CDR3>": 270, |
|
"<MOLECULAR_ENTITY_ANTIBODY_LIGHT_CHAIN_CDR2>": 271, |
|
"<MOLECULAR_ENTITY_ANTIBODY_HEAVY_CHAIN_CDR2>": 272, |
|
"<MOLECULAR_ENTITY_ANTIBODY_LIGHT_CHAIN_CDR1>": 273, |
|
"<MOLECULAR_ENTITY_GENERAL_PROTEIN>": 274, |
|
"<TIMESTEP>": 275, |
|
"<DIFFUSION>": 276, |
|
"<SEQUENCE_NATURAL_END>": 277, |
|
"<SMILES_SEQUENCE>": 278, |
|
"<SELFIES_SEQUENCE>": 279, |
|
"<AMINO_ACID_SEQUENCE>": 280, |
|
"<GENERAL_AFFINITY_CLASS>": 281, |
|
"<BACKSPACE>": 282, |
|
"<SEQUENCE_NATURAL_START>": 283, |
|
"<NOOP>": 284, |
|
"<TARGETED_ANTIBODY_DESIGN_ENCODER_ONLY_MODE>": 285, |
|
"<MOLECULAR_ENTITY_SMALL_MOLECULE>": 286, |
|
"<MOLECULAR_ENTITY_CELL_GENE_EXPRESSION_RANKED>": 287, |
|
"<CELL_TYPE_CLASS>": 288, |
|
"<TISSUE_TYPE_CLASS>": 289, |
|
"<CORRUPTED_AREA_START>": 290, |
|
"<CORRUPTED_AREA_END>": 291, |
|
"<MOLECULAR_ENTITY_MUTATED_PROTEIN_CHAIN>": 292, |
|
"<MOLECULAR_ENTITY_PROTEIN_CHAIN>": 293, |
|
"<COMPLEX_ENTITY>": 294, |
|
"<ALTERNATIVE>": 295, |
|
"<CDR3_REGION>": 296, |
|
"<GENERAL_CHAIN>": 297, |
|
"<SUBMOLECULAR_ENTITY>": 298, |
|
"<MUTATED>": 299, |
|
"<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>": 300, |
|
"<MOLECULAR_ENTITY_TCR_DELTA_CDR3>": 301, |
|
"<MOLECULAR_ENTITY_TCR_DELTA_VAR>": 302, |
|
"<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>": 303, |
|
"<MOLECULAR_ENTITY_TCR_GAMMA_VAR>": 304, |
|
"<SCALAR>": 305, |
|
"<VECTOR>": 306, |
|
"<MASKED_SCALAR>": 307, |
|
"<MASKED_VECTOR>": 308, |
|
"<AUTOENCODER_LATENT_LOG_VARIANCE>": 309, |
|
"<AUTOENCODER_LATENT_MEAN>": 310, |
|
"<AUTOENCODER_LATENT_SAMPLED_Z>": 311, |
|
"<AUTOENCODER_TASK>": 312, |
|
"<DECODED_FROM_LATENT>": 313, |
|
"A": 501, |
|
"B": 502, |
|
"C": 503, |
|
"D": 504, |
|
"E": 505, |
|
"F": 506, |
|
"G": 507, |
|
"H": 508, |
|
"I": 509, |
|
"K": 510, |
|
"L": 511, |
|
"M": 512, |
|
"N": 513, |
|
"O": 514, |
|
"P": 515, |
|
"Q": 516, |
|
"R": 517, |
|
"S": 518, |
|
"T": 519, |
|
"U": 520, |
|
"V": 521, |
|
"W": 522, |
|
"X": 523, |
|
"Y": 524, |
|
"Z": 525, |
|
":": 526 |
|
}, |
|
"unk_token": "<UNK>" |
|
} |
|
} |