Model save

Files changed (8) hide show

README.md CHANGED Viewed

@@ -2,14 +2,12 @@
 license: mit
 library_name: peft
 tags:
-- alignment-handbook
-- generated_from_trainer
 - trl
 - sft
 - generated_from_trainer
 base_model: microsoft/phi-2
 datasets:
-- HuggingFaceH4/ultrachat_200k
 model-index:
 - name: phi-2-sft-lora-chat
   results: []
@@ -20,9 +18,9 @@ should probably proofread and complete it, then remove this comment. -->
 # phi-2-sft-lora-chat
-This model is a fine-tuned version of [microsoft/phi-2](https://huggingface.co/microsoft/phi-2) on the HuggingFaceH4/ultrachat_200k dataset.
 It achieves the following results on the evaluation set:
-- Loss: 1.2272
 ## Model description
@@ -53,13 +51,13 @@ The following hyperparameters were used during training:
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_ratio: 0.1
-- num_epochs: 0.02
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 1.2627        | 0.02  | 111  | 1.2272          |
 ### Framework versions

 license: mit
 library_name: peft
 tags:
 - trl
 - sft
 - generated_from_trainer
 base_model: microsoft/phi-2
 datasets:
+- generator
 model-index:
 - name: phi-2-sft-lora-chat
   results: []
 # phi-2-sft-lora-chat
+This model is a fine-tuned version of [microsoft/phi-2](https://huggingface.co/microsoft/phi-2) on the generator dataset.
 It achieves the following results on the evaluation set:
+- Loss: 1.2234
 ## Model description
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_ratio: 0.1
+- num_epochs: 1.0
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 1.2513        | 1.0   | 5520 | 1.2234          |
 ### Framework versions

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:caf01da28ac318fbad4608a3f01733fe18dfc50affbbad55a912855b5fecadab
 size 335579632

 version https://git-lfs.github.com/spec/v1
+oid sha256:8f0671cfaa4dfb46dcf468d952b75d2f83e8b139a51bae719f4ae4df95aae3c2
 size 335579632

all_results.json CHANGED Viewed

@@ -1,13 +1,13 @@
 {
-    "epoch": 0.02,
-    "eval_loss": 1.2271578311920166,
-    "eval_runtime": 1802.0046,
     "eval_samples": 23110,
-    "eval_samples_per_second": 16.275,
-    "eval_steps_per_second": 1.356,
-    "train_loss": 1.2791897563246992,
-    "train_runtime": 2444.0756,
     "train_samples": 207865,
-    "train_samples_per_second": 2.168,
-    "train_steps_per_second": 0.045
 }

 {
+    "epoch": 1.0,
+    "eval_loss": 1.2233707904815674,
+    "eval_runtime": 1795.3542,
     "eval_samples": 23110,
+    "eval_samples_per_second": 16.335,
+    "eval_steps_per_second": 1.361,
+    "train_loss": 1.269419441966043,
+    "train_runtime": 32984.8618,
     "train_samples": 207865,
+    "train_samples_per_second": 8.033,
+    "train_steps_per_second": 0.167
 }

eval_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 0.02,
-    "eval_loss": 1.2271578311920166,
-    "eval_runtime": 1802.0046,
     "eval_samples": 23110,
-    "eval_samples_per_second": 16.275,
-    "eval_steps_per_second": 1.356
 }

 {
+    "epoch": 1.0,
+    "eval_loss": 1.2233707904815674,
+    "eval_runtime": 1795.3542,
     "eval_samples": 23110,
+    "eval_samples_per_second": 16.335,
+    "eval_steps_per_second": 1.361
 }

runs/May19_00-28-02_gpu4-119-5/events.out.tfevents.1716042533.gpu4-119-5.2683548.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:902ad1ce605ebe107d529854b3626c62387a7e556bd920837f10f4801d867579
-size 174268

 version https://git-lfs.github.com/spec/v1
+oid sha256:5d2120054a4336ce958b841f9bc8edf8205d067fe70fe40323b378486b5ecd28
+size 175521

runs/May19_00-28-02_gpu4-119-5/events.out.tfevents.1716077313.gpu4-119-5.2683548.1 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:db4c20ea532d3ca1d938b820504b2400e44f7e0955ac2bbfbd6e738322c33009
+size 359

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 0.02,
-    "train_loss": 1.2791897563246992,
-    "train_runtime": 2444.0756,
     "train_samples": 207865,
-    "train_samples_per_second": 2.168,
-    "train_steps_per_second": 0.045
 }

 {
+    "epoch": 1.0,
+    "train_loss": 1.269419441966043,
+    "train_runtime": 32984.8618,
     "train_samples": 207865,
+    "train_samples_per_second": 8.033,
+    "train_steps_per_second": 0.167
 }

trainer_state.json CHANGED Viewed

The diff for this file is too large to render. See raw diff