File size: 2,612 Bytes
832155e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
{
    "query_token_id": "[unused0]",
    "doc_token_id": "[unused1]",
    "query_token": "[Q]",
    "doc_token": "[D]",
    "ncells": null,
    "centroid_score_threshold": null,
    "ndocs": null,
    "load_index_with_mmap": false,
    "index_path": null,
    "nbits": 1,
    "kmeans_niters": 20,
    "resume": false,
    "similarity": "cosine",
    "bsize": 4,
    "accumsteps": 1,
    "lr": 3e-6,
    "maxsteps": 400000,
    "save_every": null,
    "warmup": 20000,
    "warmup_bert": null,
    "relu": false,
    "nway": 64,
    "use_ib_negatives": false,
    "reranker": false,
    "distillation_alpha": 1.0,
    "ignore_scores": false,
    "model_name": null,
    "query_maxlen": 512,
    "attend_to_mask_tokens": false,
    "interaction": "colbert",
    "dim": 128,
    "doc_maxlen": 128,
    "mask_punctuation": true,
    "checkpoint": "\/data\/experiment_data\/junda\/chatdoctor\/llama-13b-32k-medqa-open-ir\/checkpoint-3500\/ir",
    "triples": "\/future\/u\/okhattab\/root\/unit\/experiments\/2021.10\/downstream.distillation.round2.2_score\/round2.nway6.cosine.ib\/examples.64.json",
    "collection": "\/future\/u\/okhattab\/data\/MSMARCO\/collection.tsv",
    "queries": "\/future\/u\/okhattab\/data\/MSMARCO\/queries.train.tsv",
    "index_name": null,
    "overwrite": false,
    "root": "\/future\/u\/okhattab\/root\/unit\/experiments",
    "experiment": "2021.10",
    "index_root": null,
    "name": "kldR2.nway64.ib",
    "rank": 0,
    "nranks": 4,
    "amp": true,
    "gpus": 8,
    "meta": {
        "hostname": "gamma",
        "git_branch": "main",
        "git_hash": "58087227c8ead73499b761c7f7a569844d01d248",
        "git_commit_datetime": "2023-10-16 12:32:12+08:00",
        "current_datetime": "Apr 21, 2024 ; 12:29AM UTC (+0000)",
        "cmd": "supervised-fine-tune2.py --model_name_or_path \/data\/experiment_data\/junda\/chatdoctor\/llama-13b-32k-medqa-open-reason-hf --colbert_path \/data\/experiment_data\/junda\/chatdoctor\/llama-13b-32k-medqa-open-ir\/checkpoint-3500\/ir --bf16 True --data_path error_20.json --output_dir \/data\/experiment_data\/junda\/chatdoctor\/llama-13b-32k-medqa-open-ir\/ --cache_dir \/home\/jwang\/.cache --model_max_length 32768 --use_flash_attn True --low_rank_training True --num_train_epochs 1 --per_device_train_batch_size 1 --per_device_eval_batch_size 2 --gradient_accumulation_steps 8 --save_strategy epoch --save_total_limit 5 --learning_rate 1e-5 --weight_decay 0.0 --warmup_steps 20 --lr_scheduler_type constant_with_warmup --logging_steps 1 --deepspeed ds_configs\/stage2.json --tf32 True",
        "version": "colbert-v0.4"
    }
}