Epiculous
/

Crimson_Dawn-V0.1

Text Generation

Model card Files Files and versions Community

Epiculous commited on Aug 17

Commit

dd108ef

•

1 Parent(s): 2fb66c7

Update README.md

Files changed (1) hide show

README.md +93 -0

README.md CHANGED Viewed

@@ -28,6 +28,99 @@ Crimson Dawn was trained with the Mistral Instruct template, therefore it should
 "[INST] Prompt goes here [/INST]"
 ```
 ## Training
 Training was done twice over 2 epochs each on two 2x [NVIDIA A6000 GPUs](https://www.nvidia.com/en-us/design-visualization/rtx-a6000/) using LoRA. A two-phased approach was used in which the base model was trained 2 epochs on RP data, the LoRA was then applied to base. Finally, the new modified base was trained 2 epochs on instruct, and the new instruct LoRA was applied to the modified base, resulting in what you see here.

 "[INST] Prompt goes here [/INST]"
 ```
+### Current Top Sampler Settings
+```json
+{
+    "temp": 1.25,
+    "temperature_last": true,
+    "top_p": 1,
+    "top_k": -1,
+    "top_a": 0,
+    "tfs": 1,
+    "epsilon_cutoff": 0,
+    "eta_cutoff": 0,
+    "typical_p": 1,
+    "min_p": 0.3,
+    "rep_pen": 1,
+    "rep_pen_range": 0,
+    "rep_pen_decay": 0,
+    "rep_pen_slope": 1,
+    "no_repeat_ngram_size": 0,
+    "penalty_alpha": 0,
+    "num_beams": 1,
+    "length_penalty": 1,
+    "min_length": 0,
+    "encoder_rep_pen": 1,
+    "freq_pen": 0,
+    "presence_pen": 0,
+    "skew": 0,
+    "do_sample": true,
+    "early_stopping": false,
+    "dynatemp": false,
+    "min_temp": 0,
+    "max_temp": 2,
+    "dynatemp_exponent": 1,
+    "smoothing_factor": 0,
+    "smoothing_curve": 1,
+    "dry_allowed_length": 2,
+    "dry_multiplier": 0,
+    "dry_base": 1.75,
+    "dry_sequence_breakers": "[\"\\n\", \":\", \"\\\"\", \"*\"]",
+    "dry_penalty_last_n": 0,
+    "add_bos_token": true,
+    "ban_eos_token": false,
+    "skip_special_tokens": true,
+    "mirostat_mode": 0,
+    "mirostat_tau": 5,
+    "mirostat_eta": 0.1,
+    "guidance_scale": 1,
+    "negative_prompt": "",
+    "grammar_string": "",
+    "json_schema": {},
+    "banned_tokens": "",
+    "sampler_priority": [
+        "temperature",
+        "dynamic_temperature",
+        "quadratic_sampling",
+        "top_k",
+        "top_p",
+        "typical_p",
+        "epsilon_cutoff",
+        "eta_cutoff",
+        "tfs",
+        "top_a",
+        "min_p",
+        "mirostat"
+    ],
+    "samplers": [
+        "top_k",
+        "tfs_z",
+        "typical_p",
+        "top_p",
+        "min_p",
+        "temperature"
+    ],
+    "ignore_eos_token": false,
+    "spaces_between_special_tokens": true,
+    "speculative_ngram": false,
+    "sampler_order": [
+        5,
+        6,
+        0,
+        1,
+        2,
+        3,
+        4
+    ],
+    "logit_bias": [],
+    "ignore_eos_token_aphrodite": false,
+    "spaces_between_special_tokens_aphrodite": true,
+    "rep_pen_size": 0,
+    "genamt": 1024,
+    "max_length": 16384
+}
+```
 ## Training
 Training was done twice over 2 epochs each on two 2x [NVIDIA A6000 GPUs](https://www.nvidia.com/en-us/design-visualization/rtx-a6000/) using LoRA. A two-phased approach was used in which the base model was trained 2 epochs on RP data, the LoRA was then applied to base. Finally, the new modified base was trained 2 epochs on instruct, and the new instruct LoRA was applied to the modified base, resulting in what you see here.