{ | |
"output_root": "/gpfs/projects/bsc88/corpus-utils-lm/23-12-2020-72f8c7e/output/model-ready_output/2020-12-23-1900-daf4-ab38", | |
"files": "/gpfs/projects/bsc88/corpus-utils-lm/23-12-2020-72f8c7e/output/model-ready_output/2020-12-23-1900-daf4-ab38/train_valid_test_split_output/2020-12-23-1905-daf4-a0e0/train.txt", | |
"vocab_name": "roberta-ca", | |
"clean_text": true, | |
"handle_chinese_chars": true, | |
"strip_accents": false, | |
"lowercase": false, | |
"vocab_size": 52000, | |
"limit_alphabet": 1000, | |
"show_progress": true, | |
"min_frequency": 2, | |
"extra_tokens": [], | |
"reserve_tokens": 0, | |
"tokenizer": "bbpe", | |
"commit_hash": "daf4d660ec8a4b28d2bc29b3063779100ab85796\n" | |
} |