yzhuang
/

phi-1_5_fictional

@@ -1,12 +1,12 @@
 ---
 license: mit
-base_model: microsoft/phi-2
 tags:
 - trl
 - sft
 - generated_from_trainer
 datasets:
-- mmlu_no_train
 model-index:
 - name: phi-1_5_fictional
   results: []
@@ -17,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
 # phi-1_5_fictional
-This model is a fine-tuned version of [microsoft/phi-2](https://huggingface.co/microsoft/phi-2) on the mmlu_no_train dataset.
 ## Model description

 ---
 license: mit
+base_model: microsoft/phi-1_5
 tags:
 - trl
 - sft
 - generated_from_trainer
 datasets:
+- generator
 model-index:
 - name: phi-1_5_fictional
   results: []
 # phi-1_5_fictional
+This model is a fine-tuned version of [microsoft/phi-1_5](https://huggingface.co/microsoft/phi-1_5) on the generator dataset.
 ## Model description

config.json CHANGED Viewed

@@ -1,29 +1,29 @@
 {
-  "_name_or_path": "microsoft/phi-2",
   "architectures": [
     "PhiForCausalLM"
   ],
   "attention_dropout": 0.0,
   "auto_map": {
-    "AutoConfig": "microsoft/phi-2--configuration_phi.PhiConfig",
-    "AutoModelForCausalLM": "microsoft/phi-2--modeling_phi.PhiForCausalLM"
   },
   "bos_token_id": null,
   "embd_pdrop": 0.0,
   "eos_token_id": null,
   "hidden_act": "gelu_new",
-  "hidden_size": 2560,
   "initializer_range": 0.02,
-  "intermediate_size": 10240,
   "layer_norm_eps": 1e-05,
   "max_position_embeddings": 2048,
   "model_type": "phi",
   "num_attention_heads": 32,
-  "num_hidden_layers": 32,
   "num_key_value_heads": 32,
-  "partial_rotary_factor": 0.4,
   "qk_layernorm": false,
-  "resid_pdrop": 0.1,
   "rope_scaling": null,
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,

 {
+  "_name_or_path": "microsoft/phi-1_5",
   "architectures": [
     "PhiForCausalLM"
   ],
   "attention_dropout": 0.0,
   "auto_map": {
+    "AutoConfig": "microsoft/phi-1_5--configuration_phi.PhiConfig",
+    "AutoModelForCausalLM": "microsoft/phi-1_5--modeling_phi.PhiForCausalLM"
   },
   "bos_token_id": null,
   "embd_pdrop": 0.0,
   "eos_token_id": null,
   "hidden_act": "gelu_new",
+  "hidden_size": 2048,
   "initializer_range": 0.02,
+  "intermediate_size": 8192,
   "layer_norm_eps": 1e-05,
   "max_position_embeddings": 2048,
   "model_type": "phi",
   "num_attention_heads": 32,
+  "num_hidden_layers": 24,
   "num_key_value_heads": 32,
+  "partial_rotary_factor": 0.5,
   "qk_layernorm": false,
+  "resid_pdrop": 0.0,
   "rope_scaling": null,
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,

tokenizer.json CHANGED Viewed

@@ -1,11 +1,6 @@
 {
   "version": "1.0",
-  "truncation": {
-    "direction": "Right",
-    "max_length": 2048,
-    "strategy": "LongestFirst",
-    "stride": 0
-  },
   "padding": null,
   "added_tokens": [
     {

 {
   "version": "1.0",
+  "truncation": null,
   "padding": null,
   "added_tokens": [
     {

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:79d025ca65776f089d1a829390dbc13f8c79145a91279815572d254a01126199
 size 4664

 version https://git-lfs.github.com/spec/v1
+oid sha256:51b0f613e17501a407a0270dce55758fb7861c64a82eccb2997da2fa7070ae45
 size 4664