End of training

Browse files

Files changed (4) hide show

README.md +50 -0
config.json +52 -0
mm_projector.bin +3 -0
trainer_state.json +27 -0

README.md ADDED Viewed

	@@ -0,0 +1,50 @@

+---
+license: apache-2.0
+base_model: microsoft/llava-med-v1.5-mistral-7b
+tags:
+- generated_from_trainer
+model-index:
+- name: llava-med-v1.5-mistral-7b-pretrain
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# llava-med-v1.5-mistral-7b-pretrain
+This model is a fine-tuned version of [microsoft/llava-med-v1.5-mistral-7b](https://huggingface.co/microsoft/llava-med-v1.5-mistral-7b) on an unknown dataset.
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 1e-05
+- train_batch_size: 1
+- eval_batch_size: 1
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_ratio: 0.03
+- num_epochs: 1.0
+- mixed_precision_training: Native AMP
+### Framework versions
+- Transformers 4.37.2
+- Pytorch 2.0.1+cu117
+- Datasets 2.16.0
+- Tokenizers 0.15.1

config.json ADDED Viewed

	@@ -0,0 +1,52 @@

+{
+  "_name_or_path": "microsoft/llava-med-v1.5-mistral-7b",
+  "architectures": [
+    "LlavaMistralForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "feature_outs": "encoder+decoder",
+  "freeze_mm_mlp_adapter": false,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "image_aspect_ratio": "square",
+  "img_size": 640,
+  "initializer_range": 0.02,
+  "intermediate_size": 14336,
+  "max_position_embeddings": 32768,
+  "mm_hidden_size": 1024,
+  "mm_patch_merge_type": "flat",
+  "mm_projector_lr": null,
+  "mm_projector_type": "mlp2x_gelu",
+  "mm_use_im_patch_token": false,
+  "mm_use_im_start_end": false,
+  "mm_vision_select_feature": "patch",
+  "mm_vision_select_layer": -2,
+  "mm_vision_tower": "hf-hub:microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224",
+  "model_type": "llava_llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 8,
+  "pretraining_tp": 1,
+  "proj_vis_to_txt_tokens": false,
+  "prompt_segtok_w_instruct": false,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "segtok_posembed": "sincos",
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "tokenizer_model_max_length": 2048,
+  "tokenizer_padding_side": "right",
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.37.2",
+  "tune_mm_mlp_adapter": true,
+  "tune_vision_tokenizer": "none",
+  "use_cache": true,
+  "use_mm_proj": true,
+  "vision_backbone": "convnextlarge",
+  "vision_tokenizer_lr": null,
+  "vocab_size": 32000
+}

mm_projector.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4238151288ef5f7e701b9441ace2172a1be10808190624e688a3755b0f81fc97
+size 83920509

trainer_state.json ADDED Viewed

	@@ -0,0 +1,27 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 4.5509478613073355e-05,
+  "eval_steps": 500,
+  "global_step": 9,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0,
+      "learning_rate": 1.6854879487611667e-09,
+      "loss": 2.313,
+      "step": 1
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 197761,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 19776,
+  "total_flos": 7717069559808.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}