File size: 707 Bytes
8630abf cada4a6 8630abf cada4a6 499c5e2 cada4a6 8630abf cada4a6 8630abf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 |
{
"output_root": "/gpfs/projects/bsc88/corpus-utils-lm/23-12-2020-72f8c7e/output/model-ready_output/2020-12-23-1900-daf4-ab38",
"files": "/gpfs/projects/bsc88/corpus-utils-lm/23-12-2020-72f8c7e/output/model-ready_output/2020-12-23-1900-daf4-ab38/train_valid_test_split_output/2020-12-23-1905-daf4-a0e0/train.txt",
"vocab_name": "roberta-ca",
"clean_text": true,
"handle_chinese_chars": true,
"strip_accents": false,
"lowercase": false,
"vocab_size": 52000,
"limit_alphabet": 1000,
"show_progress": true,
"min_frequency": 2,
"extra_tokens": [],
"reserve_tokens": 0,
"tokenizer": "bbpe",
"commit_hash": "daf4d660ec8a4b28d2bc29b3063779100ab85796\n"
} |