File size: 967 Bytes
d24b5bf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
tokenizers_info:
- name: AA
tokenizer_id: 0
json_path: ./t5_tokenizer_AA_special.json
modular_json_path: ./t5_tokenizer_AA_special.json
start_delimiter: <start_AA>
end_delimiter: <end_AA>
- name: SMILES
tokenizer_id: 1
json_path: ./bpe_tokenizer_trained_on_chembl_zinc_with_aug_4272372_samples_balanced_1_1.json
modular_json_path: ./bpe_tokenizer_trained_on_chembl_zinc_with_aug_4272372_samples_balanced_1_1.json
start_delimiter: <start_SMILES>
end_delimiter: <end_SMILES>
- name: CELL_ATTRIBUTES
tokenizer_id: 2
json_path: ./cell_attributes_tokenizer.json
modular_json_path: ./cell_attributes_tokenizer.json
start_delimiter: <start_CELL_ATTRIBUTES>
end_delimiter: <end_CELL_ATTRIBUTES>
- name: GENE
tokenizer_id: 3
json_path: ./gene_tokenizer.json
modular_json_path: ./gene_tokenizer.json
start_delimiter: <start_GENE>
end_delimiter: <end_GENE>
minimal_token_id: 5000
max_possible_token_id: 100000
max_special_token_id: 500
|