{ "model_type": "llama", "alpha_pattern": {}, "auto_mapping": null, "base_model_name_or_path": "unsloth/llama-3-8b-bnb-4bit", "bias": "none", "fan_in_fan_out": false, "inference_mode": true, "init_lora_weights": true, "layer_replication": null, "layers_pattern": null, "layers_to_transform": null, "loftq_config": {}, "lora_alpha": 16, "lora_dropout": 0, "megatron_config": null, "megatron_core": "megatron.core", "modules_to_save": [ "lm_head", "embed_tokens" ], "peft_type": "LORA", "r": 16, "rank_pattern": {}, "revision": "unsloth", "target_modules": [ "k_proj", "gate_proj", "q_proj", "up_proj", "o_proj", "down_proj", "v_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, "use_rslora": false, "hidden_size": 4096, "num_attention_heads": 32, "num_hidden_layers": 24, "intermediate_size": 16384, "max_position_embeddings": 512, "vocab_size": 32000, "layer_norm_eps": 1e-5, "initializer_range": 0.02, "train_batch_size": 2, "gradient_accumulation_steps": 4, "warmup_steps": 5, "max_steps": 60, "learning_rate": 0.0002, "fp16": true, "bf16": false, "logging_steps": 1, "optim": "adamw_8bit", "weight_decay": 0.01, "lr_scheduler_type": "linear", "seed": 3407, "output_dir": "outputs" }