rkotari
/

tinyllama

+---
+license: apache-2.0
+library_name: peft
+tags:
+- trl
+- sft
+- generated_from_trainer
+base_model: PY007/TinyLlama-1.1B-step-50K-105b
+model-index:
+- name: tinyllama
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# tinyllama
+This model is a fine-tuned version of [PY007/TinyLlama-1.1B-step-50K-105b](https://huggingface.co/PY007/TinyLlama-1.1B-step-50K-105b) on the None dataset.
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.002
+- train_batch_size: 3
+- eval_batch_size: 8
+- seed: 42
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 6
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_ratio: 0.03
+- training_steps: 200
+### Training results
+### Framework versions
+- PEFT 0.11.2.dev0
+- Transformers 4.41.2
+- Pytorch 2.3.0+cu121
+- Datasets 2.19.1
+- Tokenizers 0.19.1

tokenizer.json CHANGED Viewed

@@ -1,11 +1,6 @@
 {
   "version": "1.0",
-  "truncation": {
-    "direction": "Right",
-    "max_length": 500,
-    "strategy": "LongestFirst",
-    "stride": 0
-  },
   "padding": null,
   "added_tokens": [
     {

 {
   "version": "1.0",
+  "truncation": null,
   "padding": null,
   "added_tokens": [
     {

trainer_state.json ADDED Viewed

	@@ -0,0 +1,182 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.150537634408602,
+  "eval_steps": 500,
+  "global_step": 200,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.10752688172043011,
+      "grad_norm": 0.35747238993644714,
+      "learning_rate": 0.0019979028262377117,
+      "loss": 2.7578,
+      "step": 10
+    },
+    {
+      "epoch": 0.21505376344086022,
+      "grad_norm": 0.31179729104042053,
+      "learning_rate": 0.001974410524646926,
+      "loss": 2.6817,
+      "step": 20
+    },
+    {
+      "epoch": 0.3225806451612903,
+      "grad_norm": 0.29704058170318604,
+      "learning_rate": 0.0019254212296427042,
+      "loss": 2.472,
+      "step": 30
+    },
+    {
+      "epoch": 0.43010752688172044,
+      "grad_norm": 0.31029245257377625,
+      "learning_rate": 0.0018522168236559692,
+      "loss": 2.606,
+      "step": 40
+    },
+    {
+      "epoch": 0.5376344086021505,
+      "grad_norm": 0.2981964945793152,
+      "learning_rate": 0.0017567128158176952,
+      "loss": 2.5843,
+      "step": 50
+    },
+    {
+      "epoch": 0.6451612903225806,
+      "grad_norm": 0.3093157708644867,
+      "learning_rate": 0.00164140821963114,
+      "loss": 2.4672,
+      "step": 60
+    },
+    {
+      "epoch": 0.7526881720430108,
+      "grad_norm": 0.2779924273490906,
+      "learning_rate": 0.001509320162328763,
+      "loss": 2.4794,
+      "step": 70
+    },
+    {
+      "epoch": 0.8602150537634409,
+      "grad_norm": 0.2921682894229889,
+      "learning_rate": 0.0013639049369634877,
+      "loss": 2.5483,
+      "step": 80
+    },
+    {
+      "epoch": 0.967741935483871,
+      "grad_norm": 0.2675682008266449,
+      "learning_rate": 0.0012089675630312753,
+      "loss": 2.5437,
+      "step": 90
+    },
+    {
+      "epoch": 1.075268817204301,
+      "grad_norm": 0.3106231093406677,
+      "learning_rate": 0.0010485622221144484,
+      "loss": 2.5045,
+      "step": 100
+    },
+    {
+      "epoch": 1.1827956989247312,
+      "grad_norm": 0.3767760992050171,
+      "learning_rate": 0.0008868861738047158,
+      "loss": 2.3468,
+      "step": 110
+    },
+    {
+      "epoch": 1.2903225806451613,
+      "grad_norm": 0.382185161113739,
+      "learning_rate": 0.0007281699277636571,
+      "loss": 2.2433,
+      "step": 120
+    },
+    {
+      "epoch": 1.3978494623655915,
+      "grad_norm": 0.44693300127983093,
+      "learning_rate": 0.0005765665457425102,
+      "loss": 2.1934,
+      "step": 130
+    },
+    {
+      "epoch": 1.5053763440860215,
+      "grad_norm": 0.37442219257354736,
+      "learning_rate": 0.0004360429701490934,
+      "loss": 2.2445,
+      "step": 140
+    },
+    {
+      "epoch": 1.6129032258064515,
+      "grad_norm": 0.36704185605049133,
+      "learning_rate": 0.00031027622272189573,
+      "loss": 2.2128,
+      "step": 150
+    },
+    {
+      "epoch": 1.7204301075268817,
+      "grad_norm": 0.3427051901817322,
+      "learning_rate": 0.0002025571894372794,
+      "loss": 2.2675,
+      "step": 160
+    },
+    {
+      "epoch": 1.827956989247312,
+      "grad_norm": 0.3787896931171417,
+      "learning_rate": 0.00011570450926997656,
+      "loss": 2.2544,
+      "step": 170
+    },
+    {
+      "epoch": 1.935483870967742,
+      "grad_norm": 0.3463006317615509,
+      "learning_rate": 5.199082004372957e-05,
+      "loss": 2.1877,
+      "step": 180
+    },
+    {
+      "epoch": 2.043010752688172,
+      "grad_norm": 0.33030927181243896,
+      "learning_rate": 1.3083291266109298e-05,
+      "loss": 2.2705,
+      "step": 190
+    },
+    {
+      "epoch": 2.150537634408602,
+      "grad_norm": 0.33233529329299927,
+      "learning_rate": 0.0,
+      "loss": 2.0534,
+      "step": 200
+    },
+    {
+      "epoch": 2.150537634408602,
+      "step": 200,
+      "total_flos": 1207440721170432.0,
+      "train_loss": 2.3959710121154787,
+      "train_runtime": 104.1427,
+      "train_samples_per_second": 11.523,
+      "train_steps_per_second": 1.92
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 200,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 10,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1207440721170432.0,
+  "train_batch_size": 3,
+  "trial_name": null,
+  "trial_params": null
+}