Upload folder using huggingface_hub

Files changed (16) hide show

README.md CHANGED Viewed

@@ -3,19 +3,19 @@ license: other
 base_model: 01-ai/Yi-1.5-6B
 tags:
 - llama-factory
-- full
 - generated_from_trainer
 model-index:
-- name: cantonesellm-test2
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-# cantonesellm-test2
-This model is a fine-tuned version of [01-ai/Yi-1.5-6B](https://huggingface.co/01-ai/Yi-1.5-6B) on the yue_wiki dataset.
 ## Model description
@@ -34,12 +34,12 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 5e-05
-- train_batch_size: 4
 - eval_batch_size: 8
 - seed: 42
-- gradient_accumulation_steps: 2
-- total_train_batch_size: 8
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_ratio: 0.1
@@ -51,7 +51,7 @@ The following hyperparameters were used during training:
 ### Framework versions
-- Transformers 4.41.0
-- Pytorch 2.3.0+cu121
-- Datasets 2.19.1
 - Tokenizers 0.19.1

 base_model: 01-ai/Yi-1.5-6B
 tags:
 - llama-factory
+- freeze
 - generated_from_trainer
 model-index:
+- name: yi-1.5-6b-yub-vocab-expanded
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+# yi-1.5-6b-yub-vocab-expanded
+This model is a fine-tuned version of [01-ai/Yi-1.5-6B](https://huggingface.co/01-ai/Yi-1.5-6B) undergone layers freezeing learning on the 300m tokens Cantonese dataset, in order to train a new words embedding in the expanded vocab. This model has not been continued pre-trainined, therefore it is not recommended to be used for further pre-training.
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 0.0002
+- train_batch_size: 8
 - eval_batch_size: 8
 - seed: 42
+- gradient_accumulation_steps: 16
+- total_train_batch_size: 128
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_ratio: 0.1
 ### Framework versions
+- Transformers 4.41.2
+- Pytorch 2.1.1+cu121
+- Datasets 2.15.0
 - Tokenizers 0.19.1

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 0.999839871897518,
-    "total_flos": 1.7832492586531553e+18,
-    "train_loss": 2.216206028322468,
-    "train_runtime": 33247.722,
-    "train_samples_per_second": 0.751,
-    "train_steps_per_second": 0.094
 }

 {
+    "epoch": 0.9996843204390374,
+    "total_flos": 1.1741952995733012e+19,
+    "train_loss": 2.4089672912015,
+    "train_runtime": 234538.8638,
+    "train_samples_per_second": 1.405,
+    "train_steps_per_second": 0.011
 }

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "01-ai/Yi-1.5-6B",
   "architectures": [
     "LlamaForCausalLM"
   ],
@@ -24,7 +24,7 @@
   "rope_theta": 5000000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
-  "transformers_version": "4.41.0",
   "use_cache": false,
-  "vocab_size": 66816
-}

 {
+  "_name_or_path": "/notebooks/cantonese/llm/models/cantonesellm-cpt-freeze-stag4",
   "architectures": [
     "LlamaForCausalLM"
   ],
   "rope_theta": 5000000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
+  "transformers_version": "4.41.2",
   "use_cache": false,
+  "vocab_size": 64960
+}

generation_config.json CHANGED Viewed

@@ -3,5 +3,5 @@
   "bos_token_id": 1,
   "eos_token_id": 2,
   "pad_token_id": 0,
-  "transformers_version": "4.41.0"
 }

   "bos_token_id": 1,
   "eos_token_id": 2,
   "pad_token_id": 0,
+  "transformers_version": "4.41.2"
 }

input_embeddings.pt ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:dae04d5b3dc1f774ad3ef3dca400667e0fde0759e99ec177b99ff3bea95b33db
+size 532153609

model-00001-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8aca168e800fc4bc6ef18e349bbb1e5c0f5456a8b4c8b345e016bc95f6409c82
-size 4955780096

 version https://git-lfs.github.com/spec/v1
+oid sha256:8f0af518c6f76698587e49a83d16120edd6182bb7a5c8342a0e7963d4cb803bc
+size 4940575744

model-00003-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:422bf2d07af06d1c9f987ae7b99a02013215cbd9059dc4006797c282d5719648
-size 2235659240

 version https://git-lfs.github.com/spec/v1
+oid sha256:88e33b7ea19ad5973387ea98297dec865216dc05db45c13a21f8157171ace697
+size 2220454888

model.safetensors.index.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "metadata": {
-    "total_size": 12168208384
   },
   "weight_map": {
     "lm_head.weight": "model-00003-of-00003.safetensors",

 {
   "metadata": {
+    "total_size": 12137799680
   },
   "weight_map": {
     "lm_head.weight": "model-00003-of-00003.safetensors",

output_embeddings.pt ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:403d104a28113d0f501a4e21457233d4a617bf87f8777be9db2c3f3e8e0e2133
+size 532153614

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ba0a535b31eac5f7a44161b924c584c357e3db0c0fd4cd2bfb6ecdec62adf2c6
-size 1072327

 version https://git-lfs.github.com/spec/v1
+oid sha256:67909bb0045622af428982dee9b3f1033cf5c4bca5c9423a028d3748364ee14f
+size 1044277

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 0.999839871897518,
-    "total_flos": 1.7832492586531553e+18,
-    "train_loss": 2.216206028322468,
-    "train_runtime": 33247.722,
-    "train_samples_per_second": 0.751,
-    "train_steps_per_second": 0.094
 }

 {
+    "epoch": 0.9996843204390374,
+    "total_flos": 1.1741952995733012e+19,
+    "train_loss": 2.4089672912015,
+    "train_runtime": 234538.8638,
+    "train_samples_per_second": 1.405,
+    "train_steps_per_second": 0.011
 }

trainer_log.jsonl CHANGED Viewed

The diff for this file is too large to render. See raw diff

trainer_state.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1cfcaba803f70d2e48e007e00ba3539b0d01c0595bdaf7f2718436b7171e8b0e
-size 5304

 version https://git-lfs.github.com/spec/v1
+oid sha256:fb43ea03774dbe8fe1db1935888be2a5b022c32feeeccc53f0cae49079f4809f
+size 5368

training_loss.png CHANGED Viewed