Training in progress, step 100

Files changed (6) hide show

config.json CHANGED Viewed

@@ -25,5 +25,5 @@
   "torch_dtype": "bfloat16",
   "transformers_version": "4.40.2",
   "use_cache": false,
-  "vocab_size": 67264
 }

   "torch_dtype": "bfloat16",
   "transformers_version": "4.40.2",
   "use_cache": false,
+  "vocab_size": 67456
 }

special_tokens_map.json CHANGED Viewed

@@ -1,29 +1,36 @@
 {
   "bos_token": {
     "content": "<|startoftext|>",
     "lstrip": false,
-    "normalized": true,
     "rstrip": false,
     "single_word": false
   },
   "eos_token": {
     "content": "<|endoftext|>",
     "lstrip": false,
-    "normalized": true,
     "rstrip": false,
     "single_word": false
   },
   "pad_token": {
     "content": "<unk>",
     "lstrip": false,
-    "normalized": true,
     "rstrip": false,
     "single_word": false
   },
   "unk_token": {
     "content": "<unk>",
     "lstrip": false,
-    "normalized": true,
     "rstrip": false,
     "single_word": false
   }

 {
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|Human|>",
+    "<|Assistant|>",
+    "<|System|>"
+  ],
   "bos_token": {
     "content": "<|startoftext|>",
     "lstrip": false,
+    "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "eos_token": {
     "content": "<|endoftext|>",
     "lstrip": false,
+    "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "pad_token": {
     "content": "<unk>",
     "lstrip": false,
+    "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "unk_token": {
     "content": "<unk>",
     "lstrip": false,
+    "normalized": false,
     "rstrip": false,
     "single_word": false
   }

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:386c49cf943d71aa110361135338c50e38beeff0a66593480421f37b319e1a39
-size 1033105

 version https://git-lfs.github.com/spec/v1
+oid sha256:701049bc2838520e8fb6561477505175af8d06be4952e321e32dbf8e9bb5c01a
+size 1083475

tokenizer_config.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0cfdd030a8208cf8c9173609e5ac4fab5b8b28eb906afae280afada0ac13990e
-size 5176

 version https://git-lfs.github.com/spec/v1
+oid sha256:ea4f6392478906c1907983d5a47aabb917753dd4e2aad9f36addcc2ec38ce27b
+size 5048