ahmedsamirio commited on
Commit
c6057b2
1 Parent(s): 0284cbe

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +135 -0
README.md ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ library_name: peft
4
+ tags:
5
+ - axolotl
6
+ - generated_from_trainer
7
+ base_model: mistralai/Mistral-7B-v0.3
8
+ model-index:
9
+ - name: mistral-sql-create-context-lora
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ [<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/OpenAccess-AI-Collective/axolotl)
17
+ <details><summary>See axolotl config</summary>
18
+
19
+ axolotl version: `0.4.1`
20
+ ```yaml
21
+ base_model: mistralai/Mistral-7B-v0.3
22
+ model_type: MistralForCausalLM
23
+ tokenizer_type: LlamaTokenizer
24
+
25
+ load_in_8bit: false
26
+ load_in_4bit: false
27
+ strict: false
28
+
29
+ datasets:
30
+ - path: b-mc2/sql-create-context
31
+ type:
32
+ # JSONL file contains question, context, answer fields per line.
33
+ # This gets mapped to instruction, input, output axolotl tags.
34
+ field_instruction: question
35
+ field_input: context
36
+ field_output: answer
37
+ # Format is used by axolotl to generate the prompt.
38
+ format: |-
39
+ [INST] Using the schema context below, generate a SQL query that answers the question.
40
+ {input}
41
+ {instruction} [/INST]
42
+
43
+ tokens: # add new control tokens from the dataset to the model
44
+ - "[INST]"
45
+ - " [/INST]"
46
+ - "[SQL]"
47
+ - " [/SQL]"
48
+
49
+ dataset_prepared_path:
50
+ val_set_size: 0.05
51
+ output_dir: ./outputs/mistral-sql-create-context-lora
52
+ hub_model_id: ahmedsamirio/mistral-sql-create-context-lora
53
+
54
+ # This is set to 4096 in the modal config, why?
55
+ # Since I'm using sample packing, decreasing the sequence length will create smaller batches
56
+ # which can fit better into memory
57
+ sequence_len: 8192
58
+
59
+ # These is set to false in the modal example, why? (Modal also uses FSDP which might be a reason)
60
+ sample_packing: true
61
+ eval_sample_packing: true
62
+ pad_to_sequence_len: true
63
+
64
+ adapter: lora
65
+ lora_model_dir:
66
+ lora_r: 32
67
+ lora_alpha: 16
68
+ lora_dropout: 0.05
69
+ lora_target_linear: true
70
+ lora_fan_in_fan_out:
71
+
72
+ lora_modules_to_save: # required when adding new tokens to LLaMA/Mistral
73
+ - embed_tokens
74
+ - lm_head
75
+
76
+ lora_target_modules:
77
+ - gate_proj
78
+ - down_proj
79
+ - up_proj
80
+ - q_proj
81
+ - v_proj
82
+ - k_proj
83
+ - o_proj
84
+
85
+ wandb_project: mistral-sql-create-context
86
+ wandb_entity: ahmedsamirio
87
+ wandb_watch:
88
+ wandb_name:
89
+ wandb_log_model:
90
+
91
+ gradient_accumulation_steps: 2
92
+ micro_batch_size: 4
93
+ num_epochs: 1
94
+ optimizer: adamw_bnb_8bit
95
+ lr_scheduler: cosine
96
+ learning_rate: 0.0002
97
+
98
+ train_on_inputs: false
99
+ group_by_length: false
100
+ bf16: auto
101
+ fp16:
102
+ tf32: false
103
+
104
+ gradient_checkpointing: true
105
+ early_stopping_patience:
106
+ resume_from_checkpoint:
107
+ local_rank:
108
+ logging_steps: 1
109
+ xformers_attention:
110
+ flash_attention: true
111
+
112
+ # What is this?
113
+ loss_watchdog_threshold: 5.0
114
+ loss_watchdog_patience: 3
115
+
116
+ warmup_steps: 10
117
+ evals_per_epoch: 4
118
+ eval_table_size:
119
+
120
+ # This wasn't set in modal config
121
+
122
+ eval_max_new_tokens: 128
123
+ saves_per_epoch: 1
124
+ debug:
125
+ deepspeed:
126
+ weight_decay: 0.0
127
+ fsdp:
128
+ fsdp_config:
129
+ special_tokens:
130
+ bos_token: "<s>"
131
+ eos_token: "</s>"
132
+ unk_token: "<unk>"
133
+ ```
134
+
135
+ </details>