File size: 707 Bytes
8630abf
cada4a6
 
 
 
 
 
8630abf
cada4a6
499c5e2
cada4a6
 
 
8630abf
cada4a6
 
8630abf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
{
    "output_root": "/gpfs/projects/bsc88/corpus-utils-lm/23-12-2020-72f8c7e/output/model-ready_output/2020-12-23-1900-daf4-ab38",
    "files": "/gpfs/projects/bsc88/corpus-utils-lm/23-12-2020-72f8c7e/output/model-ready_output/2020-12-23-1900-daf4-ab38/train_valid_test_split_output/2020-12-23-1905-daf4-a0e0/train.txt",
    "vocab_name": "roberta-ca",
    "clean_text": true,
    "handle_chinese_chars": true,
    "strip_accents": false,
    "lowercase": false,
    "vocab_size": 52000,
    "limit_alphabet": 1000,
    "show_progress": true,
    "min_frequency": 2,
    "extra_tokens": [],
    "reserve_tokens": 0,
    "tokenizer": "bbpe",
    "commit_hash": "daf4d660ec8a4b28d2bc29b3063779100ab85796\n"
}