JMLR / ir /artifact.metadata
akemiH's picture
Add Model
832155e
raw
history blame contribute delete
No virus
2.61 kB
{
"query_token_id": "[unused0]",
"doc_token_id": "[unused1]",
"query_token": "[Q]",
"doc_token": "[D]",
"ncells": null,
"centroid_score_threshold": null,
"ndocs": null,
"load_index_with_mmap": false,
"index_path": null,
"nbits": 1,
"kmeans_niters": 20,
"resume": false,
"similarity": "cosine",
"bsize": 4,
"accumsteps": 1,
"lr": 3e-6,
"maxsteps": 400000,
"save_every": null,
"warmup": 20000,
"warmup_bert": null,
"relu": false,
"nway": 64,
"use_ib_negatives": false,
"reranker": false,
"distillation_alpha": 1.0,
"ignore_scores": false,
"model_name": null,
"query_maxlen": 512,
"attend_to_mask_tokens": false,
"interaction": "colbert",
"dim": 128,
"doc_maxlen": 128,
"mask_punctuation": true,
"checkpoint": "\/data\/experiment_data\/junda\/chatdoctor\/llama-13b-32k-medqa-open-ir\/checkpoint-3500\/ir",
"triples": "\/future\/u\/okhattab\/root\/unit\/experiments\/2021.10\/downstream.distillation.round2.2_score\/round2.nway6.cosine.ib\/examples.64.json",
"collection": "\/future\/u\/okhattab\/data\/MSMARCO\/collection.tsv",
"queries": "\/future\/u\/okhattab\/data\/MSMARCO\/queries.train.tsv",
"index_name": null,
"overwrite": false,
"root": "\/future\/u\/okhattab\/root\/unit\/experiments",
"experiment": "2021.10",
"index_root": null,
"name": "kldR2.nway64.ib",
"rank": 0,
"nranks": 4,
"amp": true,
"gpus": 8,
"meta": {
"hostname": "gamma",
"git_branch": "main",
"git_hash": "58087227c8ead73499b761c7f7a569844d01d248",
"git_commit_datetime": "2023-10-16 12:32:12+08:00",
"current_datetime": "Apr 21, 2024 ; 12:29AM UTC (+0000)",
"cmd": "supervised-fine-tune2.py --model_name_or_path \/data\/experiment_data\/junda\/chatdoctor\/llama-13b-32k-medqa-open-reason-hf --colbert_path \/data\/experiment_data\/junda\/chatdoctor\/llama-13b-32k-medqa-open-ir\/checkpoint-3500\/ir --bf16 True --data_path error_20.json --output_dir \/data\/experiment_data\/junda\/chatdoctor\/llama-13b-32k-medqa-open-ir\/ --cache_dir \/home\/jwang\/.cache --model_max_length 32768 --use_flash_attn True --low_rank_training True --num_train_epochs 1 --per_device_train_batch_size 1 --per_device_eval_batch_size 2 --gradient_accumulation_steps 8 --save_strategy epoch --save_total_limit 5 --learning_rate 1e-5 --weight_decay 0.0 --warmup_steps 20 --lr_scheduler_type constant_with_warmup --logging_steps 1 --deepspeed ds_configs\/stage2.json --tf32 True",
"version": "colbert-v0.4"
}
}