SagiPolaczek's picture
Push model using huggingface_hub.
765e006 verified
raw
history blame
967 Bytes
tokenizers_info:
- name: AA
tokenizer_id: 0
json_path: ./t5_tokenizer_AA_special.json
modular_json_path: ./t5_tokenizer_AA_special.json
start_delimiter: <start_AA>
end_delimiter: <end_AA>
- name: SMILES
tokenizer_id: 1
json_path: ./bpe_tokenizer_trained_on_chembl_zinc_with_aug_4272372_samples_balanced_1_1.json
modular_json_path: ./bpe_tokenizer_trained_on_chembl_zinc_with_aug_4272372_samples_balanced_1_1.json
start_delimiter: <start_SMILES>
end_delimiter: <end_SMILES>
- name: CELL_ATTRIBUTES
tokenizer_id: 2
json_path: ./cell_attributes_tokenizer.json
modular_json_path: ./cell_attributes_tokenizer.json
start_delimiter: <start_CELL_ATTRIBUTES>
end_delimiter: <end_CELL_ATTRIBUTES>
- name: GENE
tokenizer_id: 3
json_path: ./gene_tokenizer.json
modular_json_path: ./gene_tokenizer.json
start_delimiter: <start_GENE>
end_delimiter: <end_GENE>
minimal_token_id: 5000
max_possible_token_id: 100000
max_special_token_id: 500