{ "query_token_id": "[unused0]", "doc_token_id": "[unused1]", "query_token": "[Q]", "doc_token": "[D]", "ncells": null, "centroid_score_threshold": null, "ndocs": null, "load_index_with_mmap": false, "index_path": null, "nbits": 1, "kmeans_niters": 20, "resume": false, "similarity": "cosine", "bsize": 4, "accumsteps": 1, "lr": 3e-6, "maxsteps": 400000, "save_every": null, "warmup": 20000, "warmup_bert": null, "relu": false, "nway": 64, "use_ib_negatives": false, "reranker": false, "distillation_alpha": 1.0, "ignore_scores": false, "model_name": null, "query_maxlen": 512, "attend_to_mask_tokens": false, "interaction": "colbert", "dim": 128, "doc_maxlen": 128, "mask_punctuation": true, "checkpoint": "\/data\/experiment_data\/junda\/chatdoctor\/llama-13b-32k-medqa-open-ir\/checkpoint-3500\/ir", "triples": "\/future\/u\/okhattab\/root\/unit\/experiments\/2021.10\/downstream.distillation.round2.2_score\/round2.nway6.cosine.ib\/examples.64.json", "collection": "\/future\/u\/okhattab\/data\/MSMARCO\/collection.tsv", "queries": "\/future\/u\/okhattab\/data\/MSMARCO\/queries.train.tsv", "index_name": null, "overwrite": false, "root": "\/future\/u\/okhattab\/root\/unit\/experiments", "experiment": "2021.10", "index_root": null, "name": "kldR2.nway64.ib", "rank": 0, "nranks": 4, "amp": true, "gpus": 8, "meta": { "hostname": "gamma", "git_branch": "main", "git_hash": "58087227c8ead73499b761c7f7a569844d01d248", "git_commit_datetime": "2023-10-16 12:32:12+08:00", "current_datetime": "Apr 21, 2024 ; 12:29AM UTC (+0000)", "cmd": "supervised-fine-tune2.py --model_name_or_path \/data\/experiment_data\/junda\/chatdoctor\/llama-13b-32k-medqa-open-reason-hf --colbert_path \/data\/experiment_data\/junda\/chatdoctor\/llama-13b-32k-medqa-open-ir\/checkpoint-3500\/ir --bf16 True --data_path error_20.json --output_dir \/data\/experiment_data\/junda\/chatdoctor\/llama-13b-32k-medqa-open-ir\/ --cache_dir \/home\/jwang\/.cache --model_max_length 32768 --use_flash_attn True --low_rank_training True --num_train_epochs 1 --per_device_train_batch_size 1 --per_device_eval_batch_size 2 --gradient_accumulation_steps 8 --save_strategy epoch --save_total_limit 5 --learning_rate 1e-5 --weight_decay 0.0 --warmup_steps 20 --lr_scheduler_type constant_with_warmup --logging_steps 1 --deepspeed ds_configs\/stage2.json --tf32 True", "version": "colbert-v0.4" } }