Delta-Vector commited on
Commit
c0854c3
1 Parent(s): ba39bfa

Upload ./README.md with huggingface_hub

Browse files
Files changed (1) hide show
  1. README.md +187 -0
README.md ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: llama3
4
+ base_model: arcee-ai/Llama-3.1-SuperNova-Lite
5
+ tags:
6
+ - generated_from_trainer
7
+ model-index:
8
+ - name: henbane-8b-r3
9
+ results: []
10
+ ---
11
+ ### exl2 quant (measurement.json in main branch)
12
+ ---
13
+ ### check revisions for quants
14
+ ---
15
+
16
+
17
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
18
+ should probably proofread and complete it, then remove this comment. -->
19
+
20
+ [<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
21
+ <details><summary>See axolotl config</summary>
22
+
23
+ axolotl version: `0.4.1`
24
+ ```yaml
25
+ base_model: arcee-ai/Llama-3.1-SuperNova-Lite
26
+ model_type: AutoModelForCausalLM
27
+ tokenizer_type: AutoTokenizer
28
+
29
+ #trust_remote_code: true
30
+
31
+ plugins:
32
+ - axolotl.integrations.liger.LigerPlugin
33
+ liger_rope: true
34
+ liger_rms_norm: true
35
+ liger_swiglu: true
36
+ liger_fused_linear_cross_entropy: true
37
+
38
+ load_in_8bit: false
39
+ load_in_4bit: false
40
+ strict: false
41
+
42
+ datasets:
43
+ - path: Gryphe/Sonnet3.5-SlimOrcaDedupCleaned
44
+ type: chat_template
45
+ - path: Nitral-AI/Cybersecurity-ShareGPT
46
+ type: chat_template
47
+ - path: Nitral-AI/Medical_Instruct-ShareGPT
48
+ type: chat_template
49
+ - path: Nitral-AI/Olympiad_Math-ShareGPT
50
+ type: chat_template
51
+ - path: anthracite-org/kalo_opus_misc_240827
52
+ type: chat_template
53
+ - path: NewEden/Claude-Instruct-5k
54
+ type: chat_template
55
+ - path: lodrick-the-lafted/kalo-opus-instruct-3k-filtered
56
+ type: chat_template
57
+ - path: anthracite-org/kalo-opus-instruct-22k-no-refusal
58
+ type: chat_template
59
+ - path: Epiculous/Synthstruct-Gens-v1.1-Filtered-n-Cleaned
60
+ type: chat_template
61
+ - path: Epiculous/SynthRP-Gens-v1.1-Filtered-n-Cleaned
62
+ type: chat_template
63
+ - path: anthracite-org/kalo_misc_part2
64
+ type: chat_template
65
+ - path: anthracite-org/kalo_misc_part2
66
+ type: chat_template
67
+ - path: Nitral-AI/Creative_Writing-ShareGPT
68
+ type: chat_template
69
+ - path: NewEden/Gryphe-Sonnet3.5-Charcard-Roleplay-unfiltered
70
+ type: chat_template
71
+
72
+ chat_template: llama3
73
+ shuffle_merged_datasets: true
74
+ default_system_message: "You are an assistant that responds to the user."
75
+ dataset_prepared_path: prepared_dataset_memorycore
76
+ val_set_size: 0.0
77
+ output_dir: ./henbane-8b-r3
78
+
79
+ sequence_len: 8192
80
+ sample_packing: true
81
+ eval_sample_packing: false
82
+ pad_to_sequence_len:
83
+
84
+ adapter:
85
+ lora_model_dir:
86
+ lora_r:
87
+ lora_alpha:
88
+ lora_dropout:
89
+ lora_target_linear:
90
+ lora_fan_in_fan_out:
91
+
92
+ wandb_project: henbane-8b-r3
93
+ wandb_entity:
94
+ wandb_watch:
95
+ wandb_name: henbane-8b-r3
96
+ wandb_log_model:
97
+
98
+ gradient_accumulation_steps: 32
99
+ micro_batch_size: 1
100
+ num_epochs: 2
101
+ optimizer: paged_adamw_8bit
102
+ lr_scheduler: cosine
103
+ #learning_rate: 3e-5
104
+ learning_rate: 1e-5
105
+
106
+ train_on_inputs: false
107
+ group_by_length: false
108
+ bf16: auto
109
+ fp16:
110
+ tf32: false
111
+
112
+ gradient_checkpointing: true
113
+ gradient_checkpointing_kwargs:
114
+ use_reentrant: false
115
+ early_stopping_patience:
116
+ resume_from_checkpoint:
117
+ local_rank:
118
+ logging_steps: 1
119
+ xformers_attention:
120
+ flash_attention: true
121
+
122
+ warmup_steps: 5
123
+ evals_per_epoch:
124
+ eval_table_size:
125
+ eval_max_new_tokens:
126
+ saves_per_epoch: 2
127
+ debug:
128
+ deepspeed: /workspace/axolotl/deepspeed_configs/zero2.json
129
+ weight_decay: 0.05
130
+ fsdp:
131
+ fsdp_config:
132
+ special_tokens:
133
+ pad_token: <|finetune_right_pad_id|>
134
+ eos_token: <|eot_id|>
135
+
136
+
137
+
138
+
139
+ ```
140
+
141
+ </details><br>
142
+
143
+ # henbane-8b-r3
144
+
145
+ This model is a fine-tuned version of [arcee-ai/Llama-3.1-SuperNova-Lite](https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite) on the None dataset.
146
+
147
+ ## Model description
148
+
149
+ More information needed
150
+
151
+ ## Intended uses & limitations
152
+
153
+ More information needed
154
+
155
+ ## Training and evaluation data
156
+
157
+ More information needed
158
+
159
+ ## Training procedure
160
+
161
+ ### Training hyperparameters
162
+
163
+ The following hyperparameters were used during training:
164
+ - learning_rate: 1e-05
165
+ - train_batch_size: 1
166
+ - eval_batch_size: 1
167
+ - seed: 42
168
+ - distributed_type: multi-GPU
169
+ - num_devices: 2
170
+ - gradient_accumulation_steps: 32
171
+ - total_train_batch_size: 64
172
+ - total_eval_batch_size: 2
173
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
174
+ - lr_scheduler_type: cosine
175
+ - lr_scheduler_warmup_steps: 5
176
+ - num_epochs: 2
177
+
178
+ ### Training results
179
+
180
+
181
+
182
+ ### Framework versions
183
+
184
+ - Transformers 4.45.0.dev0
185
+ - Pytorch 2.4.0+cu121
186
+ - Datasets 2.19.1
187
+ - Tokenizers 0.19.1