ironrock commited on
Commit
4bb424c
1 Parent(s): b443144

Upload folder using huggingface_hub

Browse files
Files changed (49) hide show
  1. README.md +62 -54
  2. checkpoint-180/README.md +202 -0
  3. checkpoint-180/adapter_config.json +29 -0
  4. checkpoint-180/adapter_model.safetensors +3 -0
  5. checkpoint-180/optimizer.pt +3 -0
  6. checkpoint-180/rng_state.pth +3 -0
  7. checkpoint-180/scheduler.pt +3 -0
  8. checkpoint-180/special_tokens_map.json +24 -0
  9. checkpoint-180/tokenizer.json +0 -0
  10. checkpoint-180/tokenizer.model +3 -0
  11. checkpoint-180/tokenizer_config.json +49 -0
  12. checkpoint-180/trainer_state.json +387 -0
  13. checkpoint-180/training_args.bin +3 -0
  14. checkpoint-270/README.md +202 -0
  15. checkpoint-270/adapter_config.json +29 -0
  16. checkpoint-270/adapter_model.safetensors +3 -0
  17. checkpoint-270/optimizer.pt +3 -0
  18. checkpoint-270/rng_state.pth +3 -0
  19. checkpoint-270/scheduler.pt +3 -0
  20. checkpoint-270/special_tokens_map.json +24 -0
  21. checkpoint-270/tokenizer.json +0 -0
  22. checkpoint-270/tokenizer.model +3 -0
  23. checkpoint-270/tokenizer_config.json +49 -0
  24. checkpoint-270/trainer_state.json +570 -0
  25. checkpoint-270/training_args.bin +3 -0
  26. checkpoint-360/README.md +202 -0
  27. checkpoint-360/adapter_config.json +29 -0
  28. checkpoint-360/adapter_model.safetensors +3 -0
  29. checkpoint-360/optimizer.pt +3 -0
  30. checkpoint-360/rng_state.pth +3 -0
  31. checkpoint-360/scheduler.pt +3 -0
  32. checkpoint-360/special_tokens_map.json +24 -0
  33. checkpoint-360/tokenizer.json +0 -0
  34. checkpoint-360/tokenizer.model +3 -0
  35. checkpoint-360/tokenizer_config.json +49 -0
  36. checkpoint-360/trainer_state.json +753 -0
  37. checkpoint-360/training_args.bin +3 -0
  38. checkpoint-90/README.md +202 -0
  39. checkpoint-90/adapter_config.json +29 -0
  40. checkpoint-90/adapter_model.safetensors +3 -0
  41. checkpoint-90/optimizer.pt +3 -0
  42. checkpoint-90/rng_state.pth +3 -0
  43. checkpoint-90/scheduler.pt +3 -0
  44. checkpoint-90/special_tokens_map.json +24 -0
  45. checkpoint-90/tokenizer.json +0 -0
  46. checkpoint-90/tokenizer.model +3 -0
  47. checkpoint-90/tokenizer_config.json +49 -0
  48. checkpoint-90/trainer_state.json +204 -0
  49. checkpoint-90/training_args.bin +3 -0
README.md CHANGED
@@ -1,83 +1,91 @@
1
  ---
2
- library_name: peft
 
3
  tags:
4
- - trl
5
- - dpo
6
- - generated_from_trainer
7
  base_model: Weni/WeniGPT-Agents-Mistral-1.0.6-SFT-merged
8
  model-index:
9
- - name: WeniGPT-Agents-Mistral-1.0.6-SFT-1.0.5-DPO
10
  results: []
 
11
  ---
12
 
13
- <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
- should probably proofread and complete it, then remove this comment. -->
15
 
16
- # WeniGPT-Agents-Mistral-1.0.6-SFT-1.0.5-DPO
 
17
 
18
- This model is a fine-tuned version of [Weni/WeniGPT-Agents-Mistral-1.0.6-SFT-merged](https://huggingface.co/Weni/WeniGPT-Agents-Mistral-1.0.6-SFT-merged) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.4260
21
- - Rewards/chosen: 0.9172
22
- - Rewards/rejected: -0.6078
23
- - Rewards/accuracies: 0.4643
24
- - Rewards/margins: 1.5251
25
- - Logps/rejected: -103.4404
26
- - Logps/chosen: -46.9008
27
- - Logits/rejected: -1.8652
28
- - Logits/chosen: -1.8327
29
 
30
- ## Model description
31
 
32
- More information needed
33
 
34
- ## Intended uses & limitations
35
 
36
- More information needed
37
 
38
- ## Training and evaluation data
 
 
 
 
39
 
40
- More information needed
 
 
 
 
 
 
 
 
 
41
 
42
- ## Training procedure
 
 
 
43
 
44
  ### Training hyperparameters
45
 
46
  The following hyperparameters were used during training:
47
  - learning_rate: 5e-06
48
- - train_batch_size: 2
49
- - eval_batch_size: 2
50
- - seed: 42
51
  - gradient_accumulation_steps: 2
 
52
  - total_train_batch_size: 4
53
- - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
54
- - lr_scheduler_type: linear
55
- - lr_scheduler_warmup_ratio: 0.03
56
- - training_steps: 366
57
- - mixed_precision_training: Native AMP
58
 
59
  ### Training results
60
 
61
- | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
62
- |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
63
- | 0.6635 | 0.49 | 30 | 0.6524 | 0.0904 | 0.0036 | 0.4643 | 0.0867 | -97.3259 | -55.1696 | -1.8044 | -1.7832 |
64
- | 0.6026 | 0.98 | 60 | 0.5891 | 0.2506 | 0.0024 | 0.4643 | 0.2482 | -97.3380 | -53.5672 | -1.8099 | -1.7878 |
65
- | 0.5387 | 1.46 | 90 | 0.5295 | 0.4396 | -0.0275 | 0.4643 | 0.4671 | -97.6369 | -51.6775 | -1.8181 | -1.7943 |
66
- | 0.6033 | 1.95 | 120 | 0.4960 | 0.5751 | -0.0659 | 0.4643 | 0.6410 | -98.0210 | -50.3219 | -1.8261 | -1.8009 |
67
- | 0.5042 | 2.44 | 150 | 0.4709 | 0.6967 | -0.1479 | 0.4643 | 0.8446 | -98.8407 | -49.1060 | -1.8331 | -1.8059 |
68
- | 0.5087 | 2.93 | 180 | 0.4542 | 0.7878 | -0.2428 | 0.4643 | 1.0306 | -99.7900 | -48.1955 | -1.8425 | -1.8136 |
69
- | 0.4874 | 3.41 | 210 | 0.4428 | 0.8442 | -0.3560 | 0.4643 | 1.2002 | -100.9220 | -47.6315 | -1.8520 | -1.8219 |
70
- | 0.4229 | 3.9 | 240 | 0.4358 | 0.8750 | -0.4390 | 0.4643 | 1.3140 | -101.7521 | -47.3229 | -1.8575 | -1.8266 |
71
- | 0.5295 | 4.39 | 270 | 0.4313 | 0.9026 | -0.4960 | 0.4643 | 1.3986 | -102.3219 | -47.0471 | -1.8607 | -1.8289 |
72
- | 0.5466 | 4.88 | 300 | 0.4291 | 0.9119 | -0.5384 | 0.4643 | 1.4503 | -102.7461 | -46.9544 | -1.8629 | -1.8309 |
73
- | 0.4339 | 5.37 | 330 | 0.4268 | 0.9152 | -0.5900 | 0.4643 | 1.5052 | -103.2623 | -46.9216 | -1.8644 | -1.8320 |
74
- | 0.5438 | 5.85 | 360 | 0.4260 | 0.9172 | -0.6078 | 0.4643 | 1.5251 | -103.4404 | -46.9008 | -1.8652 | -1.8327 |
75
-
76
-
77
  ### Framework versions
78
 
79
- - PEFT 0.10.0
80
- - Transformers 4.38.2
81
- - Pytorch 2.1.0+cu118
82
- - Datasets 2.18.0
83
- - Tokenizers 0.15.2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ license: mit
3
+ library_name: "trl"
4
  tags:
5
+ - DPO
6
+ - WeniGPT
 
7
  base_model: Weni/WeniGPT-Agents-Mistral-1.0.6-SFT-merged
8
  model-index:
9
+ - name: Weni/WeniGPT-Agents-Mistral-1.0.6-SFT-1.0.5-DPO
10
  results: []
11
+ language: ['pt']
12
  ---
13
 
14
+ # Weni/WeniGPT-Agents-Mistral-1.0.6-SFT-1.0.5-DPO
 
15
 
16
+ This model is a fine-tuned version of [Weni/WeniGPT-Agents-Mistral-1.0.6-SFT-merged] on the dataset Weni/wenigpt-agent-dpo-1.0.0 with the DPO trainer. It is part of the WeniGPT project for [Weni](https://weni.ai/).
17
+ Description: Experiment on DPO with other hyperparameters and best SFT model of WeniGPT
18
 
 
19
  It achieves the following results on the evaluation set:
20
+ {'eval_loss': 0.42603132128715515, 'eval_runtime': 8.1364, 'eval_samples_per_second': 3.441, 'eval_steps_per_second': 1.721, 'eval_rewards/chosen': 0.9172464609146118, 'eval_rewards/rejected': -0.6078222990036011, 'eval_rewards/accuracies': 0.4642857015132904, 'eval_rewards/margins': 1.5250685214996338, 'eval_logps/rejected': -103.44039154052734, 'eval_logps/chosen': -46.90084457397461, 'eval_logits/rejected': -1.8652076721191406, 'eval_logits/chosen': -1.832722544670105, 'epoch': 5.95}
 
 
 
 
 
 
 
 
21
 
22
+ ## Intended uses & limitations
23
 
24
+ This model has not been trained to avoid specific intructions.
25
 
26
+ ## Training procedure
27
 
28
+ Finetuning was done on the model Weni/WeniGPT-Agents-Mistral-1.0.6-SFT-merged with the following prompt:
29
 
30
+ ```
31
+ ---------------------
32
+ System_prompt:
33
+ Agora você se chama {name}, você é {occupation} e seu objetivo é {chatbot_goal}. O adjetivo que mais define a sua personalidade é {adjective} e você se comporta da seguinte forma:
34
+ {instructions_formatted}
35
 
36
+ {context_statement}
37
+
38
+ Lista de requisitos:
39
+ - Responda de forma natural, mas nunca fale sobre um assunto fora do contexto.
40
+ - Nunca traga informações do seu próprio conhecimento.
41
+ - Repito é crucial que você responda usando apenas informações do contexto.
42
+ - Nunca mencione o contexto fornecido.
43
+ - Nunca mencione a pergunta fornecida.
44
+ - Gere a resposta mais útil possível para a pergunta usando informações do conexto acima.
45
+ - Nunca elabore sobre o porque e como você fez a tarefa, apenas responda.
46
 
47
+
48
+ ---------------------
49
+
50
+ ```
51
 
52
  ### Training hyperparameters
53
 
54
  The following hyperparameters were used during training:
55
  - learning_rate: 5e-06
56
+ - per_device_train_batch_size: 2
57
+ - per_device_eval_batch_size: 2
 
58
  - gradient_accumulation_steps: 2
59
+ - num_gpus: 1
60
  - total_train_batch_size: 4
61
+ - optimizer: AdamW
62
+ - lr_scheduler_type: cosine
63
+ - num_steps: 366
64
+ - quantization_type: bitsandbytes
65
+ - LoRA: ("\n - bits: 4\n - use_exllama: True\n - device_map: auto\n - use_cache: False\n - lora_r: 8\n - lora_alpha: 16\n - lora_dropout: 0.05\n - bias: none\n - target_modules: ['v_proj', 'q_proj']\n - task_type: CAUSAL_LM",)
66
 
67
  ### Training results
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  ### Framework versions
70
 
71
+ - transformers==4.38.2
72
+ - datasets==2.18.0
73
+ - peft==0.10.0
74
+ - safetensors==0.4.2
75
+ - evaluate==0.4.1
76
+ - bitsandbytes==0.43
77
+ - huggingface_hub==0.22.2
78
+ - seqeval==1.2.2
79
+ - optimum==1.18.1
80
+ - auto-gptq==0.7.1
81
+ - gpustat==1.1.1
82
+ - deepspeed==0.14.0
83
+ - wandb==0.16.6
84
+ - trl==0.8.1
85
+ - accelerate==0.29.2
86
+ - coloredlogs==15.0.1
87
+ - traitlets==5.14.2
88
+ - autoawq@https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.4/autoawq-0.2.4+cu118-cp310-cp310-linux_x86_64.whl
89
+
90
+ ### Hardware
91
+ - Cloud provided: runpod.io
checkpoint-180/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ base_model: Weni/WeniGPT-Agents-Mistral-1.0.6-SFT-merged
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.10.0
checkpoint-180/adapter_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "Weni/WeniGPT-Agents-Mistral-1.0.6-SFT-merged",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.05,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "v_proj",
24
+ "q_proj"
25
+ ],
26
+ "task_type": "CAUSAL_LM",
27
+ "use_dora": false,
28
+ "use_rslora": false
29
+ }
checkpoint-180/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f0a1d782152f7cbe522064e8176b567d6386e5430121f5ed5c8d91fe866a0c1
3
+ size 13648432
checkpoint-180/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca2233350e52278376f6ee073c87883c98fb2c90cbd76f0e35943aa63dd3ebee
3
+ size 27370618
checkpoint-180/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48ee9b73399c28d7e668360bf1d5a4d11095c4738bf96c13f7bb6fbff59f8ccb
3
+ size 14244
checkpoint-180/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14732432661203392f17c828aeaa967e3fc0c59a7193aef58b74af5f304be609
3
+ size 1064
checkpoint-180/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<unk>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
checkpoint-180/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-180/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
checkpoint-180/tokenizer_config.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "additional_special_tokens": [],
31
+ "bos_token": "<s>",
32
+ "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' + eos_token }}{% endif %}{% endfor %}",
33
+ "clean_up_tokenization_spaces": false,
34
+ "eos_token": "</s>",
35
+ "legacy": true,
36
+ "max_lenght": 8192,
37
+ "max_length": 8192,
38
+ "model_max_length": 1000000000000000019884624838656,
39
+ "pad_token": "<unk>",
40
+ "padding": true,
41
+ "sp_model_kwargs": {},
42
+ "spaces_between_special_tokens": false,
43
+ "stride": 0,
44
+ "tokenizer_class": "LlamaTokenizer",
45
+ "truncation_side": "right",
46
+ "truncation_strategy": "longest_first",
47
+ "unk_token": "<unk>",
48
+ "use_default_system_prompt": false
49
+ }
checkpoint-180/trainer_state.json ADDED
@@ -0,0 +1,387 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.4541548192501068,
3
+ "best_model_checkpoint": "./mistral/20-04-24-Weni-WeniGPT-Agents-Mistral-1.0.6-SFT-1.0.5-DPO_Experiment on DPO with other hyperparameters and best SFT model of WeniGPT-2_max_steps-366_batch_4_2024-04-20_ppid_9/checkpoint-180",
4
+ "epoch": 2.926829268292683,
5
+ "eval_steps": 30,
6
+ "global_step": 180,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.16,
13
+ "grad_norm": 8.378021240234375,
14
+ "learning_rate": 4.0909090909090915e-06,
15
+ "logits/chosen": -1.830958604812622,
16
+ "logits/rejected": -1.8507845401763916,
17
+ "logps/chosen": -28.701984405517578,
18
+ "logps/rejected": -54.28569793701172,
19
+ "loss": 0.6924,
20
+ "rewards/accuracies": 0.20000000298023224,
21
+ "rewards/chosen": 0.0008967495523393154,
22
+ "rewards/margins": 0.0014666033675894141,
23
+ "rewards/rejected": -0.0005698538152500987,
24
+ "step": 10
25
+ },
26
+ {
27
+ "epoch": 0.33,
28
+ "grad_norm": 5.193418502807617,
29
+ "learning_rate": 4.887323943661972e-06,
30
+ "logits/chosen": -1.7550897598266602,
31
+ "logits/rejected": -1.770708680152893,
32
+ "logps/chosen": -47.344207763671875,
33
+ "logps/rejected": -64.0368423461914,
34
+ "loss": 0.6852,
35
+ "rewards/accuracies": 0.4000000059604645,
36
+ "rewards/chosen": 0.017231885343790054,
37
+ "rewards/margins": 0.01606021076440811,
38
+ "rewards/rejected": 0.0011716745793819427,
39
+ "step": 20
40
+ },
41
+ {
42
+ "epoch": 0.49,
43
+ "grad_norm": 7.308932304382324,
44
+ "learning_rate": 4.746478873239437e-06,
45
+ "logits/chosen": -1.781267762184143,
46
+ "logits/rejected": -1.8114898204803467,
47
+ "logps/chosen": -54.274559020996094,
48
+ "logps/rejected": -95.20500183105469,
49
+ "loss": 0.6635,
50
+ "rewards/accuracies": 0.5,
51
+ "rewards/chosen": 0.0641159638762474,
52
+ "rewards/margins": 0.061691801995038986,
53
+ "rewards/rejected": 0.0024241588544100523,
54
+ "step": 30
55
+ },
56
+ {
57
+ "epoch": 0.49,
58
+ "eval_logits/chosen": -1.7831767797470093,
59
+ "eval_logits/rejected": -1.8043663501739502,
60
+ "eval_logps/chosen": -55.16960906982422,
61
+ "eval_logps/rejected": -97.32585144042969,
62
+ "eval_loss": 0.6523757576942444,
63
+ "eval_rewards/accuracies": 0.4642857015132904,
64
+ "eval_rewards/chosen": 0.09036973863840103,
65
+ "eval_rewards/margins": 0.08673857897520065,
66
+ "eval_rewards/rejected": 0.0036311547737568617,
67
+ "eval_runtime": 8.141,
68
+ "eval_samples_per_second": 3.439,
69
+ "eval_steps_per_second": 1.72,
70
+ "step": 30
71
+ },
72
+ {
73
+ "epoch": 0.65,
74
+ "grad_norm": 0.0,
75
+ "learning_rate": 4.6056338028169015e-06,
76
+ "logits/chosen": -1.889905333518982,
77
+ "logits/rejected": -1.9024461507797241,
78
+ "logps/chosen": -27.918941497802734,
79
+ "logps/rejected": -42.093284606933594,
80
+ "loss": 0.668,
81
+ "rewards/accuracies": 0.25,
82
+ "rewards/chosen": 0.054457180202007294,
83
+ "rewards/margins": 0.0539846234023571,
84
+ "rewards/rejected": 0.0004725646285805851,
85
+ "step": 40
86
+ },
87
+ {
88
+ "epoch": 0.81,
89
+ "grad_norm": 8.53225326538086,
90
+ "learning_rate": 4.464788732394367e-06,
91
+ "logits/chosen": -1.8278567790985107,
92
+ "logits/rejected": -1.849957823753357,
93
+ "logps/chosen": -43.8238639831543,
94
+ "logps/rejected": -68.02179718017578,
95
+ "loss": 0.6358,
96
+ "rewards/accuracies": 0.3499999940395355,
97
+ "rewards/chosen": 0.13941256701946259,
98
+ "rewards/margins": 0.13133978843688965,
99
+ "rewards/rejected": 0.008072790689766407,
100
+ "step": 50
101
+ },
102
+ {
103
+ "epoch": 0.98,
104
+ "grad_norm": 9.436968803405762,
105
+ "learning_rate": 4.3239436619718315e-06,
106
+ "logits/chosen": -1.805991768836975,
107
+ "logits/rejected": -1.8437427282333374,
108
+ "logps/chosen": -43.8873291015625,
109
+ "logps/rejected": -95.2943115234375,
110
+ "loss": 0.6026,
111
+ "rewards/accuracies": 0.44999998807907104,
112
+ "rewards/chosen": 0.18793432414531708,
113
+ "rewards/margins": 0.21308371424674988,
114
+ "rewards/rejected": -0.025149401277303696,
115
+ "step": 60
116
+ },
117
+ {
118
+ "epoch": 0.98,
119
+ "eval_logits/chosen": -1.7877694368362427,
120
+ "eval_logits/rejected": -1.8098936080932617,
121
+ "eval_logps/chosen": -53.567203521728516,
122
+ "eval_logps/rejected": -97.33795928955078,
123
+ "eval_loss": 0.5890871286392212,
124
+ "eval_rewards/accuracies": 0.4642857015132904,
125
+ "eval_rewards/chosen": 0.25061002373695374,
126
+ "eval_rewards/margins": 0.2481890469789505,
127
+ "eval_rewards/rejected": 0.002420984674245119,
128
+ "eval_runtime": 8.1404,
129
+ "eval_samples_per_second": 3.44,
130
+ "eval_steps_per_second": 1.72,
131
+ "step": 60
132
+ },
133
+ {
134
+ "epoch": 1.14,
135
+ "grad_norm": 0.0,
136
+ "learning_rate": 4.183098591549296e-06,
137
+ "logits/chosen": -1.8344879150390625,
138
+ "logits/rejected": -1.8489716053009033,
139
+ "logps/chosen": -40.38930892944336,
140
+ "logps/rejected": -60.9084358215332,
141
+ "loss": 0.6031,
142
+ "rewards/accuracies": 0.375,
143
+ "rewards/chosen": 0.19739331305027008,
144
+ "rewards/margins": 0.22638121247291565,
145
+ "rewards/rejected": -0.028987903147935867,
146
+ "step": 70
147
+ },
148
+ {
149
+ "epoch": 1.3,
150
+ "grad_norm": 5.49536657333374,
151
+ "learning_rate": 4.042253521126761e-06,
152
+ "logits/chosen": -1.7903095483779907,
153
+ "logits/rejected": -1.8362411260604858,
154
+ "logps/chosen": -44.288116455078125,
155
+ "logps/rejected": -90.21073913574219,
156
+ "loss": 0.5357,
157
+ "rewards/accuracies": 0.4749999940395355,
158
+ "rewards/chosen": 0.34061312675476074,
159
+ "rewards/margins": 0.40679749846458435,
160
+ "rewards/rejected": -0.06618441641330719,
161
+ "step": 80
162
+ },
163
+ {
164
+ "epoch": 1.46,
165
+ "grad_norm": 13.401692390441895,
166
+ "learning_rate": 3.901408450704225e-06,
167
+ "logits/chosen": -1.8004281520843506,
168
+ "logits/rejected": -1.8247934579849243,
169
+ "logps/chosen": -42.32465362548828,
170
+ "logps/rejected": -70.9749984741211,
171
+ "loss": 0.5387,
172
+ "rewards/accuracies": 0.4749999940395355,
173
+ "rewards/chosen": 0.3678433299064636,
174
+ "rewards/margins": 0.4186524450778961,
175
+ "rewards/rejected": -0.05080908536911011,
176
+ "step": 90
177
+ },
178
+ {
179
+ "epoch": 1.46,
180
+ "eval_logits/chosen": -1.7943389415740967,
181
+ "eval_logits/rejected": -1.8181126117706299,
182
+ "eval_logps/chosen": -51.677486419677734,
183
+ "eval_logps/rejected": -97.63689422607422,
184
+ "eval_loss": 0.529485821723938,
185
+ "eval_rewards/accuracies": 0.4642857015132904,
186
+ "eval_rewards/chosen": 0.4395819306373596,
187
+ "eval_rewards/margins": 0.4670555889606476,
188
+ "eval_rewards/rejected": -0.027473628520965576,
189
+ "eval_runtime": 8.1412,
190
+ "eval_samples_per_second": 3.439,
191
+ "eval_steps_per_second": 1.72,
192
+ "step": 90
193
+ },
194
+ {
195
+ "epoch": 1.63,
196
+ "grad_norm": 5.040858745574951,
197
+ "learning_rate": 3.7605633802816903e-06,
198
+ "logits/chosen": -1.8601042032241821,
199
+ "logits/rejected": -1.8790462017059326,
200
+ "logps/chosen": -43.77570343017578,
201
+ "logps/rejected": -70.64997863769531,
202
+ "loss": 0.5466,
203
+ "rewards/accuracies": 0.4000000059604645,
204
+ "rewards/chosen": 0.36673134565353394,
205
+ "rewards/margins": 0.42903366684913635,
206
+ "rewards/rejected": -0.06230226159095764,
207
+ "step": 100
208
+ },
209
+ {
210
+ "epoch": 1.79,
211
+ "grad_norm": 11.182683944702148,
212
+ "learning_rate": 3.6197183098591553e-06,
213
+ "logits/chosen": -1.8602203130722046,
214
+ "logits/rejected": -1.8786903619766235,
215
+ "logps/chosen": -29.601736068725586,
216
+ "logps/rejected": -66.1338882446289,
217
+ "loss": 0.6003,
218
+ "rewards/accuracies": 0.2750000059604645,
219
+ "rewards/chosen": 0.3122637867927551,
220
+ "rewards/margins": 0.2756831645965576,
221
+ "rewards/rejected": 0.03658062964677811,
222
+ "step": 110
223
+ },
224
+ {
225
+ "epoch": 1.95,
226
+ "grad_norm": 3.9169583320617676,
227
+ "learning_rate": 3.47887323943662e-06,
228
+ "logits/chosen": -1.8304624557495117,
229
+ "logits/rejected": -1.8451646566390991,
230
+ "logps/chosen": -31.413599014282227,
231
+ "logps/rejected": -56.841880798339844,
232
+ "loss": 0.6033,
233
+ "rewards/accuracies": 0.2750000059604645,
234
+ "rewards/chosen": 0.21899382770061493,
235
+ "rewards/margins": 0.2744571566581726,
236
+ "rewards/rejected": -0.05546332150697708,
237
+ "step": 120
238
+ },
239
+ {
240
+ "epoch": 1.95,
241
+ "eval_logits/chosen": -1.80086350440979,
242
+ "eval_logits/rejected": -1.8260576725006104,
243
+ "eval_logps/chosen": -50.32191848754883,
244
+ "eval_logps/rejected": -98.02101135253906,
245
+ "eval_loss": 0.49604225158691406,
246
+ "eval_rewards/accuracies": 0.4642857015132904,
247
+ "eval_rewards/chosen": 0.5751391053199768,
248
+ "eval_rewards/margins": 0.6410244107246399,
249
+ "eval_rewards/rejected": -0.0658852607011795,
250
+ "eval_runtime": 8.1445,
251
+ "eval_samples_per_second": 3.438,
252
+ "eval_steps_per_second": 1.719,
253
+ "step": 120
254
+ },
255
+ {
256
+ "epoch": 2.11,
257
+ "grad_norm": 1.4047716856002808,
258
+ "learning_rate": 3.338028169014085e-06,
259
+ "logits/chosen": -1.8776130676269531,
260
+ "logits/rejected": -1.8995519876480103,
261
+ "logps/chosen": -22.69371795654297,
262
+ "logps/rejected": -53.5282096862793,
263
+ "loss": 0.5611,
264
+ "rewards/accuracies": 0.25,
265
+ "rewards/chosen": 0.35938918590545654,
266
+ "rewards/margins": 0.5045264959335327,
267
+ "rewards/rejected": -0.14513733983039856,
268
+ "step": 130
269
+ },
270
+ {
271
+ "epoch": 2.28,
272
+ "grad_norm": 0.7528722882270813,
273
+ "learning_rate": 3.1971830985915496e-06,
274
+ "logits/chosen": -1.8126357793807983,
275
+ "logits/rejected": -1.832371711730957,
276
+ "logps/chosen": -38.33379364013672,
277
+ "logps/rejected": -67.96979522705078,
278
+ "loss": 0.5142,
279
+ "rewards/accuracies": 0.375,
280
+ "rewards/chosen": 0.5568062663078308,
281
+ "rewards/margins": 0.6818712949752808,
282
+ "rewards/rejected": -0.12506499886512756,
283
+ "step": 140
284
+ },
285
+ {
286
+ "epoch": 2.44,
287
+ "grad_norm": 3.405579090118408,
288
+ "learning_rate": 3.056338028169014e-06,
289
+ "logits/chosen": -1.8196109533309937,
290
+ "logits/rejected": -1.8556429147720337,
291
+ "logps/chosen": -36.78864669799805,
292
+ "logps/rejected": -83.05890655517578,
293
+ "loss": 0.5042,
294
+ "rewards/accuracies": 0.4000000059604645,
295
+ "rewards/chosen": 0.542107105255127,
296
+ "rewards/margins": 0.6411095857620239,
297
+ "rewards/rejected": -0.09900249540805817,
298
+ "step": 150
299
+ },
300
+ {
301
+ "epoch": 2.44,
302
+ "eval_logits/chosen": -1.805869698524475,
303
+ "eval_logits/rejected": -1.8330577611923218,
304
+ "eval_logps/chosen": -49.10601043701172,
305
+ "eval_logps/rejected": -98.84068298339844,
306
+ "eval_loss": 0.4709201455116272,
307
+ "eval_rewards/accuracies": 0.4642857015132904,
308
+ "eval_rewards/chosen": 0.6967297196388245,
309
+ "eval_rewards/margins": 0.8445812463760376,
310
+ "eval_rewards/rejected": -0.1478516012430191,
311
+ "eval_runtime": 8.1382,
312
+ "eval_samples_per_second": 3.441,
313
+ "eval_steps_per_second": 1.72,
314
+ "step": 150
315
+ },
316
+ {
317
+ "epoch": 2.6,
318
+ "grad_norm": 7.778740882873535,
319
+ "learning_rate": 2.915492957746479e-06,
320
+ "logits/chosen": -1.848589301109314,
321
+ "logits/rejected": -1.8790754079818726,
322
+ "logps/chosen": -36.49171447753906,
323
+ "logps/rejected": -72.55968475341797,
324
+ "loss": 0.4927,
325
+ "rewards/accuracies": 0.44999998807907104,
326
+ "rewards/chosen": 0.49555450677871704,
327
+ "rewards/margins": 0.6891830563545227,
328
+ "rewards/rejected": -0.1936284601688385,
329
+ "step": 160
330
+ },
331
+ {
332
+ "epoch": 2.76,
333
+ "grad_norm": 4.058627605438232,
334
+ "learning_rate": 2.774647887323944e-06,
335
+ "logits/chosen": -1.812421441078186,
336
+ "logits/rejected": -1.8415311574935913,
337
+ "logps/chosen": -45.62999725341797,
338
+ "logps/rejected": -87.85527038574219,
339
+ "loss": 0.4541,
340
+ "rewards/accuracies": 0.4749999940395355,
341
+ "rewards/chosen": 0.7084562182426453,
342
+ "rewards/margins": 0.9553689956665039,
343
+ "rewards/rejected": -0.24691279232501984,
344
+ "step": 170
345
+ },
346
+ {
347
+ "epoch": 2.93,
348
+ "grad_norm": 0.0,
349
+ "learning_rate": 2.6338028169014084e-06,
350
+ "logits/chosen": -1.8475942611694336,
351
+ "logits/rejected": -1.8678725957870483,
352
+ "logps/chosen": -40.53328323364258,
353
+ "logps/rejected": -64.86616516113281,
354
+ "loss": 0.5087,
355
+ "rewards/accuracies": 0.375,
356
+ "rewards/chosen": 0.5022943019866943,
357
+ "rewards/margins": 0.7252141833305359,
358
+ "rewards/rejected": -0.22291991114616394,
359
+ "step": 180
360
+ },
361
+ {
362
+ "epoch": 2.93,
363
+ "eval_logits/chosen": -1.8136398792266846,
364
+ "eval_logits/rejected": -1.8424787521362305,
365
+ "eval_logps/chosen": -48.19547653198242,
366
+ "eval_logps/rejected": -99.7900161743164,
367
+ "eval_loss": 0.4541548192501068,
368
+ "eval_rewards/accuracies": 0.4642857015132904,
369
+ "eval_rewards/chosen": 0.7877826690673828,
370
+ "eval_rewards/margins": 1.0305674076080322,
371
+ "eval_rewards/rejected": -0.24278469383716583,
372
+ "eval_runtime": 8.1397,
373
+ "eval_samples_per_second": 3.44,
374
+ "eval_steps_per_second": 1.72,
375
+ "step": 180
376
+ }
377
+ ],
378
+ "logging_steps": 10,
379
+ "max_steps": 366,
380
+ "num_input_tokens_seen": 0,
381
+ "num_train_epochs": 6,
382
+ "save_steps": 90,
383
+ "total_flos": 0.0,
384
+ "train_batch_size": 2,
385
+ "trial_name": null,
386
+ "trial_params": null
387
+ }
checkpoint-180/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de569ad7b35da22832fd1fc395ab1c110ac622b038bdfcb0eee757cdc5b4b97b
3
+ size 5304
checkpoint-270/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ base_model: Weni/WeniGPT-Agents-Mistral-1.0.6-SFT-merged
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.10.0
checkpoint-270/adapter_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "Weni/WeniGPT-Agents-Mistral-1.0.6-SFT-merged",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.05,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "v_proj",
24
+ "q_proj"
25
+ ],
26
+ "task_type": "CAUSAL_LM",
27
+ "use_dora": false,
28
+ "use_rslora": false
29
+ }
checkpoint-270/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:918068effef33fcd83ee39a7b70c44461e0a5f72909fe72fbba0207e41da5527
3
+ size 13648432
checkpoint-270/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44856c4420546cbc9f5a35b808e67c05528c23d760db27d4eb92ce6a79b5f895
3
+ size 27370618
checkpoint-270/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d3b7102895eb0637b0cab516bd672f216b2bf79078a83eb301011a90444f44c
3
+ size 14244
checkpoint-270/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:754fa30b685f93af9e6d375848220222347b5605c21cd93b54aaa798d6ea3598
3
+ size 1064
checkpoint-270/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<unk>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
checkpoint-270/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-270/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
checkpoint-270/tokenizer_config.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "additional_special_tokens": [],
31
+ "bos_token": "<s>",
32
+ "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' + eos_token }}{% endif %}{% endfor %}",
33
+ "clean_up_tokenization_spaces": false,
34
+ "eos_token": "</s>",
35
+ "legacy": true,
36
+ "max_lenght": 8192,
37
+ "max_length": 8192,
38
+ "model_max_length": 1000000000000000019884624838656,
39
+ "pad_token": "<unk>",
40
+ "padding": true,
41
+ "sp_model_kwargs": {},
42
+ "spaces_between_special_tokens": false,
43
+ "stride": 0,
44
+ "tokenizer_class": "LlamaTokenizer",
45
+ "truncation_side": "right",
46
+ "truncation_strategy": "longest_first",
47
+ "unk_token": "<unk>",
48
+ "use_default_system_prompt": false
49
+ }
checkpoint-270/trainer_state.json ADDED
@@ -0,0 +1,570 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.43130752444267273,
3
+ "best_model_checkpoint": "./mistral/20-04-24-Weni-WeniGPT-Agents-Mistral-1.0.6-SFT-1.0.5-DPO_Experiment on DPO with other hyperparameters and best SFT model of WeniGPT-2_max_steps-366_batch_4_2024-04-20_ppid_9/checkpoint-270",
4
+ "epoch": 4.390243902439025,
5
+ "eval_steps": 30,
6
+ "global_step": 270,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.16,
13
+ "grad_norm": 8.378021240234375,
14
+ "learning_rate": 4.0909090909090915e-06,
15
+ "logits/chosen": -1.830958604812622,
16
+ "logits/rejected": -1.8507845401763916,
17
+ "logps/chosen": -28.701984405517578,
18
+ "logps/rejected": -54.28569793701172,
19
+ "loss": 0.6924,
20
+ "rewards/accuracies": 0.20000000298023224,
21
+ "rewards/chosen": 0.0008967495523393154,
22
+ "rewards/margins": 0.0014666033675894141,
23
+ "rewards/rejected": -0.0005698538152500987,
24
+ "step": 10
25
+ },
26
+ {
27
+ "epoch": 0.33,
28
+ "grad_norm": 5.193418502807617,
29
+ "learning_rate": 4.887323943661972e-06,
30
+ "logits/chosen": -1.7550897598266602,
31
+ "logits/rejected": -1.770708680152893,
32
+ "logps/chosen": -47.344207763671875,
33
+ "logps/rejected": -64.0368423461914,
34
+ "loss": 0.6852,
35
+ "rewards/accuracies": 0.4000000059604645,
36
+ "rewards/chosen": 0.017231885343790054,
37
+ "rewards/margins": 0.01606021076440811,
38
+ "rewards/rejected": 0.0011716745793819427,
39
+ "step": 20
40
+ },
41
+ {
42
+ "epoch": 0.49,
43
+ "grad_norm": 7.308932304382324,
44
+ "learning_rate": 4.746478873239437e-06,
45
+ "logits/chosen": -1.781267762184143,
46
+ "logits/rejected": -1.8114898204803467,
47
+ "logps/chosen": -54.274559020996094,
48
+ "logps/rejected": -95.20500183105469,
49
+ "loss": 0.6635,
50
+ "rewards/accuracies": 0.5,
51
+ "rewards/chosen": 0.0641159638762474,
52
+ "rewards/margins": 0.061691801995038986,
53
+ "rewards/rejected": 0.0024241588544100523,
54
+ "step": 30
55
+ },
56
+ {
57
+ "epoch": 0.49,
58
+ "eval_logits/chosen": -1.7831767797470093,
59
+ "eval_logits/rejected": -1.8043663501739502,
60
+ "eval_logps/chosen": -55.16960906982422,
61
+ "eval_logps/rejected": -97.32585144042969,
62
+ "eval_loss": 0.6523757576942444,
63
+ "eval_rewards/accuracies": 0.4642857015132904,
64
+ "eval_rewards/chosen": 0.09036973863840103,
65
+ "eval_rewards/margins": 0.08673857897520065,
66
+ "eval_rewards/rejected": 0.0036311547737568617,
67
+ "eval_runtime": 8.141,
68
+ "eval_samples_per_second": 3.439,
69
+ "eval_steps_per_second": 1.72,
70
+ "step": 30
71
+ },
72
+ {
73
+ "epoch": 0.65,
74
+ "grad_norm": 0.0,
75
+ "learning_rate": 4.6056338028169015e-06,
76
+ "logits/chosen": -1.889905333518982,
77
+ "logits/rejected": -1.9024461507797241,
78
+ "logps/chosen": -27.918941497802734,
79
+ "logps/rejected": -42.093284606933594,
80
+ "loss": 0.668,
81
+ "rewards/accuracies": 0.25,
82
+ "rewards/chosen": 0.054457180202007294,
83
+ "rewards/margins": 0.0539846234023571,
84
+ "rewards/rejected": 0.0004725646285805851,
85
+ "step": 40
86
+ },
87
+ {
88
+ "epoch": 0.81,
89
+ "grad_norm": 8.53225326538086,
90
+ "learning_rate": 4.464788732394367e-06,
91
+ "logits/chosen": -1.8278567790985107,
92
+ "logits/rejected": -1.849957823753357,
93
+ "logps/chosen": -43.8238639831543,
94
+ "logps/rejected": -68.02179718017578,
95
+ "loss": 0.6358,
96
+ "rewards/accuracies": 0.3499999940395355,
97
+ "rewards/chosen": 0.13941256701946259,
98
+ "rewards/margins": 0.13133978843688965,
99
+ "rewards/rejected": 0.008072790689766407,
100
+ "step": 50
101
+ },
102
+ {
103
+ "epoch": 0.98,
104
+ "grad_norm": 9.436968803405762,
105
+ "learning_rate": 4.3239436619718315e-06,
106
+ "logits/chosen": -1.805991768836975,
107
+ "logits/rejected": -1.8437427282333374,
108
+ "logps/chosen": -43.8873291015625,
109
+ "logps/rejected": -95.2943115234375,
110
+ "loss": 0.6026,
111
+ "rewards/accuracies": 0.44999998807907104,
112
+ "rewards/chosen": 0.18793432414531708,
113
+ "rewards/margins": 0.21308371424674988,
114
+ "rewards/rejected": -0.025149401277303696,
115
+ "step": 60
116
+ },
117
+ {
118
+ "epoch": 0.98,
119
+ "eval_logits/chosen": -1.7877694368362427,
120
+ "eval_logits/rejected": -1.8098936080932617,
121
+ "eval_logps/chosen": -53.567203521728516,
122
+ "eval_logps/rejected": -97.33795928955078,
123
+ "eval_loss": 0.5890871286392212,
124
+ "eval_rewards/accuracies": 0.4642857015132904,
125
+ "eval_rewards/chosen": 0.25061002373695374,
126
+ "eval_rewards/margins": 0.2481890469789505,
127
+ "eval_rewards/rejected": 0.002420984674245119,
128
+ "eval_runtime": 8.1404,
129
+ "eval_samples_per_second": 3.44,
130
+ "eval_steps_per_second": 1.72,
131
+ "step": 60
132
+ },
133
+ {
134
+ "epoch": 1.14,
135
+ "grad_norm": 0.0,
136
+ "learning_rate": 4.183098591549296e-06,
137
+ "logits/chosen": -1.8344879150390625,
138
+ "logits/rejected": -1.8489716053009033,
139
+ "logps/chosen": -40.38930892944336,
140
+ "logps/rejected": -60.9084358215332,
141
+ "loss": 0.6031,
142
+ "rewards/accuracies": 0.375,
143
+ "rewards/chosen": 0.19739331305027008,
144
+ "rewards/margins": 0.22638121247291565,
145
+ "rewards/rejected": -0.028987903147935867,
146
+ "step": 70
147
+ },
148
+ {
149
+ "epoch": 1.3,
150
+ "grad_norm": 5.49536657333374,
151
+ "learning_rate": 4.042253521126761e-06,
152
+ "logits/chosen": -1.7903095483779907,
153
+ "logits/rejected": -1.8362411260604858,
154
+ "logps/chosen": -44.288116455078125,
155
+ "logps/rejected": -90.21073913574219,
156
+ "loss": 0.5357,
157
+ "rewards/accuracies": 0.4749999940395355,
158
+ "rewards/chosen": 0.34061312675476074,
159
+ "rewards/margins": 0.40679749846458435,
160
+ "rewards/rejected": -0.06618441641330719,
161
+ "step": 80
162
+ },
163
+ {
164
+ "epoch": 1.46,
165
+ "grad_norm": 13.401692390441895,
166
+ "learning_rate": 3.901408450704225e-06,
167
+ "logits/chosen": -1.8004281520843506,
168
+ "logits/rejected": -1.8247934579849243,
169
+ "logps/chosen": -42.32465362548828,
170
+ "logps/rejected": -70.9749984741211,
171
+ "loss": 0.5387,
172
+ "rewards/accuracies": 0.4749999940395355,
173
+ "rewards/chosen": 0.3678433299064636,
174
+ "rewards/margins": 0.4186524450778961,
175
+ "rewards/rejected": -0.05080908536911011,
176
+ "step": 90
177
+ },
178
+ {
179
+ "epoch": 1.46,
180
+ "eval_logits/chosen": -1.7943389415740967,
181
+ "eval_logits/rejected": -1.8181126117706299,
182
+ "eval_logps/chosen": -51.677486419677734,
183
+ "eval_logps/rejected": -97.63689422607422,
184
+ "eval_loss": 0.529485821723938,
185
+ "eval_rewards/accuracies": 0.4642857015132904,
186
+ "eval_rewards/chosen": 0.4395819306373596,
187
+ "eval_rewards/margins": 0.4670555889606476,
188
+ "eval_rewards/rejected": -0.027473628520965576,
189
+ "eval_runtime": 8.1412,
190
+ "eval_samples_per_second": 3.439,
191
+ "eval_steps_per_second": 1.72,
192
+ "step": 90
193
+ },
194
+ {
195
+ "epoch": 1.63,
196
+ "grad_norm": 5.040858745574951,
197
+ "learning_rate": 3.7605633802816903e-06,
198
+ "logits/chosen": -1.8601042032241821,
199
+ "logits/rejected": -1.8790462017059326,
200
+ "logps/chosen": -43.77570343017578,
201
+ "logps/rejected": -70.64997863769531,
202
+ "loss": 0.5466,
203
+ "rewards/accuracies": 0.4000000059604645,
204
+ "rewards/chosen": 0.36673134565353394,
205
+ "rewards/margins": 0.42903366684913635,
206
+ "rewards/rejected": -0.06230226159095764,
207
+ "step": 100
208
+ },
209
+ {
210
+ "epoch": 1.79,
211
+ "grad_norm": 11.182683944702148,
212
+ "learning_rate": 3.6197183098591553e-06,
213
+ "logits/chosen": -1.8602203130722046,
214
+ "logits/rejected": -1.8786903619766235,
215
+ "logps/chosen": -29.601736068725586,
216
+ "logps/rejected": -66.1338882446289,
217
+ "loss": 0.6003,
218
+ "rewards/accuracies": 0.2750000059604645,
219
+ "rewards/chosen": 0.3122637867927551,
220
+ "rewards/margins": 0.2756831645965576,
221
+ "rewards/rejected": 0.03658062964677811,
222
+ "step": 110
223
+ },
224
+ {
225
+ "epoch": 1.95,
226
+ "grad_norm": 3.9169583320617676,
227
+ "learning_rate": 3.47887323943662e-06,
228
+ "logits/chosen": -1.8304624557495117,
229
+ "logits/rejected": -1.8451646566390991,
230
+ "logps/chosen": -31.413599014282227,
231
+ "logps/rejected": -56.841880798339844,
232
+ "loss": 0.6033,
233
+ "rewards/accuracies": 0.2750000059604645,
234
+ "rewards/chosen": 0.21899382770061493,
235
+ "rewards/margins": 0.2744571566581726,
236
+ "rewards/rejected": -0.05546332150697708,
237
+ "step": 120
238
+ },
239
+ {
240
+ "epoch": 1.95,
241
+ "eval_logits/chosen": -1.80086350440979,
242
+ "eval_logits/rejected": -1.8260576725006104,
243
+ "eval_logps/chosen": -50.32191848754883,
244
+ "eval_logps/rejected": -98.02101135253906,
245
+ "eval_loss": 0.49604225158691406,
246
+ "eval_rewards/accuracies": 0.4642857015132904,
247
+ "eval_rewards/chosen": 0.5751391053199768,
248
+ "eval_rewards/margins": 0.6410244107246399,
249
+ "eval_rewards/rejected": -0.0658852607011795,
250
+ "eval_runtime": 8.1445,
251
+ "eval_samples_per_second": 3.438,
252
+ "eval_steps_per_second": 1.719,
253
+ "step": 120
254
+ },
255
+ {
256
+ "epoch": 2.11,
257
+ "grad_norm": 1.4047716856002808,
258
+ "learning_rate": 3.338028169014085e-06,
259
+ "logits/chosen": -1.8776130676269531,
260
+ "logits/rejected": -1.8995519876480103,
261
+ "logps/chosen": -22.69371795654297,
262
+ "logps/rejected": -53.5282096862793,
263
+ "loss": 0.5611,
264
+ "rewards/accuracies": 0.25,
265
+ "rewards/chosen": 0.35938918590545654,
266
+ "rewards/margins": 0.5045264959335327,
267
+ "rewards/rejected": -0.14513733983039856,
268
+ "step": 130
269
+ },
270
+ {
271
+ "epoch": 2.28,
272
+ "grad_norm": 0.7528722882270813,
273
+ "learning_rate": 3.1971830985915496e-06,
274
+ "logits/chosen": -1.8126357793807983,
275
+ "logits/rejected": -1.832371711730957,
276
+ "logps/chosen": -38.33379364013672,
277
+ "logps/rejected": -67.96979522705078,
278
+ "loss": 0.5142,
279
+ "rewards/accuracies": 0.375,
280
+ "rewards/chosen": 0.5568062663078308,
281
+ "rewards/margins": 0.6818712949752808,
282
+ "rewards/rejected": -0.12506499886512756,
283
+ "step": 140
284
+ },
285
+ {
286
+ "epoch": 2.44,
287
+ "grad_norm": 3.405579090118408,
288
+ "learning_rate": 3.056338028169014e-06,
289
+ "logits/chosen": -1.8196109533309937,
290
+ "logits/rejected": -1.8556429147720337,
291
+ "logps/chosen": -36.78864669799805,
292
+ "logps/rejected": -83.05890655517578,
293
+ "loss": 0.5042,
294
+ "rewards/accuracies": 0.4000000059604645,
295
+ "rewards/chosen": 0.542107105255127,
296
+ "rewards/margins": 0.6411095857620239,
297
+ "rewards/rejected": -0.09900249540805817,
298
+ "step": 150
299
+ },
300
+ {
301
+ "epoch": 2.44,
302
+ "eval_logits/chosen": -1.805869698524475,
303
+ "eval_logits/rejected": -1.8330577611923218,
304
+ "eval_logps/chosen": -49.10601043701172,
305
+ "eval_logps/rejected": -98.84068298339844,
306
+ "eval_loss": 0.4709201455116272,
307
+ "eval_rewards/accuracies": 0.4642857015132904,
308
+ "eval_rewards/chosen": 0.6967297196388245,
309
+ "eval_rewards/margins": 0.8445812463760376,
310
+ "eval_rewards/rejected": -0.1478516012430191,
311
+ "eval_runtime": 8.1382,
312
+ "eval_samples_per_second": 3.441,
313
+ "eval_steps_per_second": 1.72,
314
+ "step": 150
315
+ },
316
+ {
317
+ "epoch": 2.6,
318
+ "grad_norm": 7.778740882873535,
319
+ "learning_rate": 2.915492957746479e-06,
320
+ "logits/chosen": -1.848589301109314,
321
+ "logits/rejected": -1.8790754079818726,
322
+ "logps/chosen": -36.49171447753906,
323
+ "logps/rejected": -72.55968475341797,
324
+ "loss": 0.4927,
325
+ "rewards/accuracies": 0.44999998807907104,
326
+ "rewards/chosen": 0.49555450677871704,
327
+ "rewards/margins": 0.6891830563545227,
328
+ "rewards/rejected": -0.1936284601688385,
329
+ "step": 160
330
+ },
331
+ {
332
+ "epoch": 2.76,
333
+ "grad_norm": 4.058627605438232,
334
+ "learning_rate": 2.774647887323944e-06,
335
+ "logits/chosen": -1.812421441078186,
336
+ "logits/rejected": -1.8415311574935913,
337
+ "logps/chosen": -45.62999725341797,
338
+ "logps/rejected": -87.85527038574219,
339
+ "loss": 0.4541,
340
+ "rewards/accuracies": 0.4749999940395355,
341
+ "rewards/chosen": 0.7084562182426453,
342
+ "rewards/margins": 0.9553689956665039,
343
+ "rewards/rejected": -0.24691279232501984,
344
+ "step": 170
345
+ },
346
+ {
347
+ "epoch": 2.93,
348
+ "grad_norm": 0.0,
349
+ "learning_rate": 2.6338028169014084e-06,
350
+ "logits/chosen": -1.8475942611694336,
351
+ "logits/rejected": -1.8678725957870483,
352
+ "logps/chosen": -40.53328323364258,
353
+ "logps/rejected": -64.86616516113281,
354
+ "loss": 0.5087,
355
+ "rewards/accuracies": 0.375,
356
+ "rewards/chosen": 0.5022943019866943,
357
+ "rewards/margins": 0.7252141833305359,
358
+ "rewards/rejected": -0.22291991114616394,
359
+ "step": 180
360
+ },
361
+ {
362
+ "epoch": 2.93,
363
+ "eval_logits/chosen": -1.8136398792266846,
364
+ "eval_logits/rejected": -1.8424787521362305,
365
+ "eval_logps/chosen": -48.19547653198242,
366
+ "eval_logps/rejected": -99.7900161743164,
367
+ "eval_loss": 0.4541548192501068,
368
+ "eval_rewards/accuracies": 0.4642857015132904,
369
+ "eval_rewards/chosen": 0.7877826690673828,
370
+ "eval_rewards/margins": 1.0305674076080322,
371
+ "eval_rewards/rejected": -0.24278469383716583,
372
+ "eval_runtime": 8.1397,
373
+ "eval_samples_per_second": 3.44,
374
+ "eval_steps_per_second": 1.72,
375
+ "step": 180
376
+ },
377
+ {
378
+ "epoch": 3.09,
379
+ "grad_norm": 2.1618106365203857,
380
+ "learning_rate": 2.4929577464788734e-06,
381
+ "logits/chosen": -1.876151442527771,
382
+ "logits/rejected": -1.9132931232452393,
383
+ "logps/chosen": -38.02617645263672,
384
+ "logps/rejected": -84.4028549194336,
385
+ "loss": 0.4372,
386
+ "rewards/accuracies": 0.44999998807907104,
387
+ "rewards/chosen": 0.723468005657196,
388
+ "rewards/margins": 1.1590527296066284,
389
+ "rewards/rejected": -0.43558478355407715,
390
+ "step": 190
391
+ },
392
+ {
393
+ "epoch": 3.25,
394
+ "grad_norm": 0.0,
395
+ "learning_rate": 2.352112676056338e-06,
396
+ "logits/chosen": -1.8977773189544678,
397
+ "logits/rejected": -1.9120800495147705,
398
+ "logps/chosen": -38.097923278808594,
399
+ "logps/rejected": -55.17757034301758,
400
+ "loss": 0.4778,
401
+ "rewards/accuracies": 0.375,
402
+ "rewards/chosen": 0.5927585959434509,
403
+ "rewards/margins": 0.8746024370193481,
404
+ "rewards/rejected": -0.2818438410758972,
405
+ "step": 200
406
+ },
407
+ {
408
+ "epoch": 3.41,
409
+ "grad_norm": 7.095726013183594,
410
+ "learning_rate": 2.211267605633803e-06,
411
+ "logits/chosen": -1.8508259057998657,
412
+ "logits/rejected": -1.8876402378082275,
413
+ "logps/chosen": -33.23273468017578,
414
+ "logps/rejected": -79.0272445678711,
415
+ "loss": 0.4874,
416
+ "rewards/accuracies": 0.375,
417
+ "rewards/chosen": 0.5798195600509644,
418
+ "rewards/margins": 0.9200228452682495,
419
+ "rewards/rejected": -0.34020328521728516,
420
+ "step": 210
421
+ },
422
+ {
423
+ "epoch": 3.41,
424
+ "eval_logits/chosen": -1.821912407875061,
425
+ "eval_logits/rejected": -1.8520457744598389,
426
+ "eval_logps/chosen": -47.6314697265625,
427
+ "eval_logps/rejected": -100.92195129394531,
428
+ "eval_loss": 0.4427572786808014,
429
+ "eval_rewards/accuracies": 0.4642857015132904,
430
+ "eval_rewards/chosen": 0.8441829681396484,
431
+ "eval_rewards/margins": 1.2001608610153198,
432
+ "eval_rewards/rejected": -0.35597795248031616,
433
+ "eval_runtime": 8.1451,
434
+ "eval_samples_per_second": 3.438,
435
+ "eval_steps_per_second": 1.719,
436
+ "step": 210
437
+ },
438
+ {
439
+ "epoch": 3.58,
440
+ "grad_norm": 4.825575351715088,
441
+ "learning_rate": 2.0704225352112676e-06,
442
+ "logits/chosen": -1.889478325843811,
443
+ "logits/rejected": -1.9106714725494385,
444
+ "logps/chosen": -30.769512176513672,
445
+ "logps/rejected": -68.92756652832031,
446
+ "loss": 0.5277,
447
+ "rewards/accuracies": 0.32499998807907104,
448
+ "rewards/chosen": 0.379320353269577,
449
+ "rewards/margins": 0.6602964401245117,
450
+ "rewards/rejected": -0.28097596764564514,
451
+ "step": 220
452
+ },
453
+ {
454
+ "epoch": 3.74,
455
+ "grad_norm": 5.236915588378906,
456
+ "learning_rate": 1.9295774647887326e-06,
457
+ "logits/chosen": -1.8926284313201904,
458
+ "logits/rejected": -1.9087079763412476,
459
+ "logps/chosen": -36.48774719238281,
460
+ "logps/rejected": -59.29833221435547,
461
+ "loss": 0.5176,
462
+ "rewards/accuracies": 0.32499998807907104,
463
+ "rewards/chosen": 0.6325365304946899,
464
+ "rewards/margins": 0.867927074432373,
465
+ "rewards/rejected": -0.2353905737400055,
466
+ "step": 230
467
+ },
468
+ {
469
+ "epoch": 3.9,
470
+ "grad_norm": 1.3737443685531616,
471
+ "learning_rate": 1.7887323943661974e-06,
472
+ "logits/chosen": -1.7782018184661865,
473
+ "logits/rejected": -1.8105701208114624,
474
+ "logps/chosen": -41.42538833618164,
475
+ "logps/rejected": -93.73129272460938,
476
+ "loss": 0.4229,
477
+ "rewards/accuracies": 0.4749999940395355,
478
+ "rewards/chosen": 0.8450711369514465,
479
+ "rewards/margins": 1.3813583850860596,
480
+ "rewards/rejected": -0.5362871885299683,
481
+ "step": 240
482
+ },
483
+ {
484
+ "epoch": 3.9,
485
+ "eval_logits/chosen": -1.826602816581726,
486
+ "eval_logits/rejected": -1.8575078248977661,
487
+ "eval_logps/chosen": -47.322914123535156,
488
+ "eval_logps/rejected": -101.7520980834961,
489
+ "eval_loss": 0.4358247220516205,
490
+ "eval_rewards/accuracies": 0.4642857015132904,
491
+ "eval_rewards/chosen": 0.8750395178794861,
492
+ "eval_rewards/margins": 1.3140336275100708,
493
+ "eval_rewards/rejected": -0.4389941692352295,
494
+ "eval_runtime": 8.1403,
495
+ "eval_samples_per_second": 3.44,
496
+ "eval_steps_per_second": 1.72,
497
+ "step": 240
498
+ },
499
+ {
500
+ "epoch": 4.07,
501
+ "grad_norm": 1.977386713027954,
502
+ "learning_rate": 1.647887323943662e-06,
503
+ "logits/chosen": -1.875792145729065,
504
+ "logits/rejected": -1.8937476873397827,
505
+ "logps/chosen": -25.06104278564453,
506
+ "logps/rejected": -46.700584411621094,
507
+ "loss": 0.5274,
508
+ "rewards/accuracies": 0.30000001192092896,
509
+ "rewards/chosen": 0.42553478479385376,
510
+ "rewards/margins": 0.7891250252723694,
511
+ "rewards/rejected": -0.3635903000831604,
512
+ "step": 250
513
+ },
514
+ {
515
+ "epoch": 4.23,
516
+ "grad_norm": 3.320791244506836,
517
+ "learning_rate": 1.5070422535211269e-06,
518
+ "logits/chosen": -1.7908179759979248,
519
+ "logits/rejected": -1.8309694528579712,
520
+ "logps/chosen": -54.056663513183594,
521
+ "logps/rejected": -108.03240966796875,
522
+ "loss": 0.3569,
523
+ "rewards/accuracies": 0.574999988079071,
524
+ "rewards/chosen": 1.07839035987854,
525
+ "rewards/margins": 1.643531084060669,
526
+ "rewards/rejected": -0.5651407837867737,
527
+ "step": 260
528
+ },
529
+ {
530
+ "epoch": 4.39,
531
+ "grad_norm": 4.999856948852539,
532
+ "learning_rate": 1.3661971830985919e-06,
533
+ "logits/chosen": -1.9177863597869873,
534
+ "logits/rejected": -1.9476194381713867,
535
+ "logps/chosen": -22.58294105529785,
536
+ "logps/rejected": -61.99756622314453,
537
+ "loss": 0.5295,
538
+ "rewards/accuracies": 0.2750000059604645,
539
+ "rewards/chosen": 0.5058903694152832,
540
+ "rewards/margins": 0.8186748623847961,
541
+ "rewards/rejected": -0.31278449296951294,
542
+ "step": 270
543
+ },
544
+ {
545
+ "epoch": 4.39,
546
+ "eval_logits/chosen": -1.8289211988449097,
547
+ "eval_logits/rejected": -1.860676646232605,
548
+ "eval_logps/chosen": -47.04714584350586,
549
+ "eval_logps/rejected": -102.3218994140625,
550
+ "eval_loss": 0.43130752444267273,
551
+ "eval_rewards/accuracies": 0.4642857015132904,
552
+ "eval_rewards/chosen": 0.9026166200637817,
553
+ "eval_rewards/margins": 1.3985893726348877,
554
+ "eval_rewards/rejected": -0.4959728717803955,
555
+ "eval_runtime": 8.1397,
556
+ "eval_samples_per_second": 3.44,
557
+ "eval_steps_per_second": 1.72,
558
+ "step": 270
559
+ }
560
+ ],
561
+ "logging_steps": 10,
562
+ "max_steps": 366,
563
+ "num_input_tokens_seen": 0,
564
+ "num_train_epochs": 6,
565
+ "save_steps": 90,
566
+ "total_flos": 0.0,
567
+ "train_batch_size": 2,
568
+ "trial_name": null,
569
+ "trial_params": null
570
+ }
checkpoint-270/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de569ad7b35da22832fd1fc395ab1c110ac622b038bdfcb0eee757cdc5b4b97b
3
+ size 5304
checkpoint-360/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ base_model: Weni/WeniGPT-Agents-Mistral-1.0.6-SFT-merged
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.10.0
checkpoint-360/adapter_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "Weni/WeniGPT-Agents-Mistral-1.0.6-SFT-merged",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.05,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "v_proj",
24
+ "q_proj"
25
+ ],
26
+ "task_type": "CAUSAL_LM",
27
+ "use_dora": false,
28
+ "use_rslora": false
29
+ }
checkpoint-360/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0c709e037401ede6602043ef6a5abb1f9b33746d7060acc95355c55c0660071
3
+ size 13648432
checkpoint-360/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30955b65c45b0993e250de74aafe58045cbb1b0ea1ca012a6e570504f8394557
3
+ size 27370618
checkpoint-360/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adbac4581908b419c3039e502a4aacf8ebe77d7c5097a659c9e661f2ab321b78
3
+ size 14244
checkpoint-360/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c2a24ffbdf666fc29aa43a0bae8368ec77e666548541499714a3f8dfdd7c88b
3
+ size 1064
checkpoint-360/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<unk>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
checkpoint-360/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-360/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
checkpoint-360/tokenizer_config.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "additional_special_tokens": [],
31
+ "bos_token": "<s>",
32
+ "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' + eos_token }}{% endif %}{% endfor %}",
33
+ "clean_up_tokenization_spaces": false,
34
+ "eos_token": "</s>",
35
+ "legacy": true,
36
+ "max_lenght": 8192,
37
+ "max_length": 8192,
38
+ "model_max_length": 1000000000000000019884624838656,
39
+ "pad_token": "<unk>",
40
+ "padding": true,
41
+ "sp_model_kwargs": {},
42
+ "spaces_between_special_tokens": false,
43
+ "stride": 0,
44
+ "tokenizer_class": "LlamaTokenizer",
45
+ "truncation_side": "right",
46
+ "truncation_strategy": "longest_first",
47
+ "unk_token": "<unk>",
48
+ "use_default_system_prompt": false
49
+ }
checkpoint-360/trainer_state.json ADDED
@@ -0,0 +1,753 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.42603132128715515,
3
+ "best_model_checkpoint": "./mistral/20-04-24-Weni-WeniGPT-Agents-Mistral-1.0.6-SFT-1.0.5-DPO_Experiment on DPO with other hyperparameters and best SFT model of WeniGPT-2_max_steps-366_batch_4_2024-04-20_ppid_9/checkpoint-360",
4
+ "epoch": 5.853658536585366,
5
+ "eval_steps": 30,
6
+ "global_step": 360,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.16,
13
+ "grad_norm": 8.378021240234375,
14
+ "learning_rate": 4.0909090909090915e-06,
15
+ "logits/chosen": -1.830958604812622,
16
+ "logits/rejected": -1.8507845401763916,
17
+ "logps/chosen": -28.701984405517578,
18
+ "logps/rejected": -54.28569793701172,
19
+ "loss": 0.6924,
20
+ "rewards/accuracies": 0.20000000298023224,
21
+ "rewards/chosen": 0.0008967495523393154,
22
+ "rewards/margins": 0.0014666033675894141,
23
+ "rewards/rejected": -0.0005698538152500987,
24
+ "step": 10
25
+ },
26
+ {
27
+ "epoch": 0.33,
28
+ "grad_norm": 5.193418502807617,
29
+ "learning_rate": 4.887323943661972e-06,
30
+ "logits/chosen": -1.7550897598266602,
31
+ "logits/rejected": -1.770708680152893,
32
+ "logps/chosen": -47.344207763671875,
33
+ "logps/rejected": -64.0368423461914,
34
+ "loss": 0.6852,
35
+ "rewards/accuracies": 0.4000000059604645,
36
+ "rewards/chosen": 0.017231885343790054,
37
+ "rewards/margins": 0.01606021076440811,
38
+ "rewards/rejected": 0.0011716745793819427,
39
+ "step": 20
40
+ },
41
+ {
42
+ "epoch": 0.49,
43
+ "grad_norm": 7.308932304382324,
44
+ "learning_rate": 4.746478873239437e-06,
45
+ "logits/chosen": -1.781267762184143,
46
+ "logits/rejected": -1.8114898204803467,
47
+ "logps/chosen": -54.274559020996094,
48
+ "logps/rejected": -95.20500183105469,
49
+ "loss": 0.6635,
50
+ "rewards/accuracies": 0.5,
51
+ "rewards/chosen": 0.0641159638762474,
52
+ "rewards/margins": 0.061691801995038986,
53
+ "rewards/rejected": 0.0024241588544100523,
54
+ "step": 30
55
+ },
56
+ {
57
+ "epoch": 0.49,
58
+ "eval_logits/chosen": -1.7831767797470093,
59
+ "eval_logits/rejected": -1.8043663501739502,
60
+ "eval_logps/chosen": -55.16960906982422,
61
+ "eval_logps/rejected": -97.32585144042969,
62
+ "eval_loss": 0.6523757576942444,
63
+ "eval_rewards/accuracies": 0.4642857015132904,
64
+ "eval_rewards/chosen": 0.09036973863840103,
65
+ "eval_rewards/margins": 0.08673857897520065,
66
+ "eval_rewards/rejected": 0.0036311547737568617,
67
+ "eval_runtime": 8.141,
68
+ "eval_samples_per_second": 3.439,
69
+ "eval_steps_per_second": 1.72,
70
+ "step": 30
71
+ },
72
+ {
73
+ "epoch": 0.65,
74
+ "grad_norm": 0.0,
75
+ "learning_rate": 4.6056338028169015e-06,
76
+ "logits/chosen": -1.889905333518982,
77
+ "logits/rejected": -1.9024461507797241,
78
+ "logps/chosen": -27.918941497802734,
79
+ "logps/rejected": -42.093284606933594,
80
+ "loss": 0.668,
81
+ "rewards/accuracies": 0.25,
82
+ "rewards/chosen": 0.054457180202007294,
83
+ "rewards/margins": 0.0539846234023571,
84
+ "rewards/rejected": 0.0004725646285805851,
85
+ "step": 40
86
+ },
87
+ {
88
+ "epoch": 0.81,
89
+ "grad_norm": 8.53225326538086,
90
+ "learning_rate": 4.464788732394367e-06,
91
+ "logits/chosen": -1.8278567790985107,
92
+ "logits/rejected": -1.849957823753357,
93
+ "logps/chosen": -43.8238639831543,
94
+ "logps/rejected": -68.02179718017578,
95
+ "loss": 0.6358,
96
+ "rewards/accuracies": 0.3499999940395355,
97
+ "rewards/chosen": 0.13941256701946259,
98
+ "rewards/margins": 0.13133978843688965,
99
+ "rewards/rejected": 0.008072790689766407,
100
+ "step": 50
101
+ },
102
+ {
103
+ "epoch": 0.98,
104
+ "grad_norm": 9.436968803405762,
105
+ "learning_rate": 4.3239436619718315e-06,
106
+ "logits/chosen": -1.805991768836975,
107
+ "logits/rejected": -1.8437427282333374,
108
+ "logps/chosen": -43.8873291015625,
109
+ "logps/rejected": -95.2943115234375,
110
+ "loss": 0.6026,
111
+ "rewards/accuracies": 0.44999998807907104,
112
+ "rewards/chosen": 0.18793432414531708,
113
+ "rewards/margins": 0.21308371424674988,
114
+ "rewards/rejected": -0.025149401277303696,
115
+ "step": 60
116
+ },
117
+ {
118
+ "epoch": 0.98,
119
+ "eval_logits/chosen": -1.7877694368362427,
120
+ "eval_logits/rejected": -1.8098936080932617,
121
+ "eval_logps/chosen": -53.567203521728516,
122
+ "eval_logps/rejected": -97.33795928955078,
123
+ "eval_loss": 0.5890871286392212,
124
+ "eval_rewards/accuracies": 0.4642857015132904,
125
+ "eval_rewards/chosen": 0.25061002373695374,
126
+ "eval_rewards/margins": 0.2481890469789505,
127
+ "eval_rewards/rejected": 0.002420984674245119,
128
+ "eval_runtime": 8.1404,
129
+ "eval_samples_per_second": 3.44,
130
+ "eval_steps_per_second": 1.72,
131
+ "step": 60
132
+ },
133
+ {
134
+ "epoch": 1.14,
135
+ "grad_norm": 0.0,
136
+ "learning_rate": 4.183098591549296e-06,
137
+ "logits/chosen": -1.8344879150390625,
138
+ "logits/rejected": -1.8489716053009033,
139
+ "logps/chosen": -40.38930892944336,
140
+ "logps/rejected": -60.9084358215332,
141
+ "loss": 0.6031,
142
+ "rewards/accuracies": 0.375,
143
+ "rewards/chosen": 0.19739331305027008,
144
+ "rewards/margins": 0.22638121247291565,
145
+ "rewards/rejected": -0.028987903147935867,
146
+ "step": 70
147
+ },
148
+ {
149
+ "epoch": 1.3,
150
+ "grad_norm": 5.49536657333374,
151
+ "learning_rate": 4.042253521126761e-06,
152
+ "logits/chosen": -1.7903095483779907,
153
+ "logits/rejected": -1.8362411260604858,
154
+ "logps/chosen": -44.288116455078125,
155
+ "logps/rejected": -90.21073913574219,
156
+ "loss": 0.5357,
157
+ "rewards/accuracies": 0.4749999940395355,
158
+ "rewards/chosen": 0.34061312675476074,
159
+ "rewards/margins": 0.40679749846458435,
160
+ "rewards/rejected": -0.06618441641330719,
161
+ "step": 80
162
+ },
163
+ {
164
+ "epoch": 1.46,
165
+ "grad_norm": 13.401692390441895,
166
+ "learning_rate": 3.901408450704225e-06,
167
+ "logits/chosen": -1.8004281520843506,
168
+ "logits/rejected": -1.8247934579849243,
169
+ "logps/chosen": -42.32465362548828,
170
+ "logps/rejected": -70.9749984741211,
171
+ "loss": 0.5387,
172
+ "rewards/accuracies": 0.4749999940395355,
173
+ "rewards/chosen": 0.3678433299064636,
174
+ "rewards/margins": 0.4186524450778961,
175
+ "rewards/rejected": -0.05080908536911011,
176
+ "step": 90
177
+ },
178
+ {
179
+ "epoch": 1.46,
180
+ "eval_logits/chosen": -1.7943389415740967,
181
+ "eval_logits/rejected": -1.8181126117706299,
182
+ "eval_logps/chosen": -51.677486419677734,
183
+ "eval_logps/rejected": -97.63689422607422,
184
+ "eval_loss": 0.529485821723938,
185
+ "eval_rewards/accuracies": 0.4642857015132904,
186
+ "eval_rewards/chosen": 0.4395819306373596,
187
+ "eval_rewards/margins": 0.4670555889606476,
188
+ "eval_rewards/rejected": -0.027473628520965576,
189
+ "eval_runtime": 8.1412,
190
+ "eval_samples_per_second": 3.439,
191
+ "eval_steps_per_second": 1.72,
192
+ "step": 90
193
+ },
194
+ {
195
+ "epoch": 1.63,
196
+ "grad_norm": 5.040858745574951,
197
+ "learning_rate": 3.7605633802816903e-06,
198
+ "logits/chosen": -1.8601042032241821,
199
+ "logits/rejected": -1.8790462017059326,
200
+ "logps/chosen": -43.77570343017578,
201
+ "logps/rejected": -70.64997863769531,
202
+ "loss": 0.5466,
203
+ "rewards/accuracies": 0.4000000059604645,
204
+ "rewards/chosen": 0.36673134565353394,
205
+ "rewards/margins": 0.42903366684913635,
206
+ "rewards/rejected": -0.06230226159095764,
207
+ "step": 100
208
+ },
209
+ {
210
+ "epoch": 1.79,
211
+ "grad_norm": 11.182683944702148,
212
+ "learning_rate": 3.6197183098591553e-06,
213
+ "logits/chosen": -1.8602203130722046,
214
+ "logits/rejected": -1.8786903619766235,
215
+ "logps/chosen": -29.601736068725586,
216
+ "logps/rejected": -66.1338882446289,
217
+ "loss": 0.6003,
218
+ "rewards/accuracies": 0.2750000059604645,
219
+ "rewards/chosen": 0.3122637867927551,
220
+ "rewards/margins": 0.2756831645965576,
221
+ "rewards/rejected": 0.03658062964677811,
222
+ "step": 110
223
+ },
224
+ {
225
+ "epoch": 1.95,
226
+ "grad_norm": 3.9169583320617676,
227
+ "learning_rate": 3.47887323943662e-06,
228
+ "logits/chosen": -1.8304624557495117,
229
+ "logits/rejected": -1.8451646566390991,
230
+ "logps/chosen": -31.413599014282227,
231
+ "logps/rejected": -56.841880798339844,
232
+ "loss": 0.6033,
233
+ "rewards/accuracies": 0.2750000059604645,
234
+ "rewards/chosen": 0.21899382770061493,
235
+ "rewards/margins": 0.2744571566581726,
236
+ "rewards/rejected": -0.05546332150697708,
237
+ "step": 120
238
+ },
239
+ {
240
+ "epoch": 1.95,
241
+ "eval_logits/chosen": -1.80086350440979,
242
+ "eval_logits/rejected": -1.8260576725006104,
243
+ "eval_logps/chosen": -50.32191848754883,
244
+ "eval_logps/rejected": -98.02101135253906,
245
+ "eval_loss": 0.49604225158691406,
246
+ "eval_rewards/accuracies": 0.4642857015132904,
247
+ "eval_rewards/chosen": 0.5751391053199768,
248
+ "eval_rewards/margins": 0.6410244107246399,
249
+ "eval_rewards/rejected": -0.0658852607011795,
250
+ "eval_runtime": 8.1445,
251
+ "eval_samples_per_second": 3.438,
252
+ "eval_steps_per_second": 1.719,
253
+ "step": 120
254
+ },
255
+ {
256
+ "epoch": 2.11,
257
+ "grad_norm": 1.4047716856002808,
258
+ "learning_rate": 3.338028169014085e-06,
259
+ "logits/chosen": -1.8776130676269531,
260
+ "logits/rejected": -1.8995519876480103,
261
+ "logps/chosen": -22.69371795654297,
262
+ "logps/rejected": -53.5282096862793,
263
+ "loss": 0.5611,
264
+ "rewards/accuracies": 0.25,
265
+ "rewards/chosen": 0.35938918590545654,
266
+ "rewards/margins": 0.5045264959335327,
267
+ "rewards/rejected": -0.14513733983039856,
268
+ "step": 130
269
+ },
270
+ {
271
+ "epoch": 2.28,
272
+ "grad_norm": 0.7528722882270813,
273
+ "learning_rate": 3.1971830985915496e-06,
274
+ "logits/chosen": -1.8126357793807983,
275
+ "logits/rejected": -1.832371711730957,
276
+ "logps/chosen": -38.33379364013672,
277
+ "logps/rejected": -67.96979522705078,
278
+ "loss": 0.5142,
279
+ "rewards/accuracies": 0.375,
280
+ "rewards/chosen": 0.5568062663078308,
281
+ "rewards/margins": 0.6818712949752808,
282
+ "rewards/rejected": -0.12506499886512756,
283
+ "step": 140
284
+ },
285
+ {
286
+ "epoch": 2.44,
287
+ "grad_norm": 3.405579090118408,
288
+ "learning_rate": 3.056338028169014e-06,
289
+ "logits/chosen": -1.8196109533309937,
290
+ "logits/rejected": -1.8556429147720337,
291
+ "logps/chosen": -36.78864669799805,
292
+ "logps/rejected": -83.05890655517578,
293
+ "loss": 0.5042,
294
+ "rewards/accuracies": 0.4000000059604645,
295
+ "rewards/chosen": 0.542107105255127,
296
+ "rewards/margins": 0.6411095857620239,
297
+ "rewards/rejected": -0.09900249540805817,
298
+ "step": 150
299
+ },
300
+ {
301
+ "epoch": 2.44,
302
+ "eval_logits/chosen": -1.805869698524475,
303
+ "eval_logits/rejected": -1.8330577611923218,
304
+ "eval_logps/chosen": -49.10601043701172,
305
+ "eval_logps/rejected": -98.84068298339844,
306
+ "eval_loss": 0.4709201455116272,
307
+ "eval_rewards/accuracies": 0.4642857015132904,
308
+ "eval_rewards/chosen": 0.6967297196388245,
309
+ "eval_rewards/margins": 0.8445812463760376,
310
+ "eval_rewards/rejected": -0.1478516012430191,
311
+ "eval_runtime": 8.1382,
312
+ "eval_samples_per_second": 3.441,
313
+ "eval_steps_per_second": 1.72,
314
+ "step": 150
315
+ },
316
+ {
317
+ "epoch": 2.6,
318
+ "grad_norm": 7.778740882873535,
319
+ "learning_rate": 2.915492957746479e-06,
320
+ "logits/chosen": -1.848589301109314,
321
+ "logits/rejected": -1.8790754079818726,
322
+ "logps/chosen": -36.49171447753906,
323
+ "logps/rejected": -72.55968475341797,
324
+ "loss": 0.4927,
325
+ "rewards/accuracies": 0.44999998807907104,
326
+ "rewards/chosen": 0.49555450677871704,
327
+ "rewards/margins": 0.6891830563545227,
328
+ "rewards/rejected": -0.1936284601688385,
329
+ "step": 160
330
+ },
331
+ {
332
+ "epoch": 2.76,
333
+ "grad_norm": 4.058627605438232,
334
+ "learning_rate": 2.774647887323944e-06,
335
+ "logits/chosen": -1.812421441078186,
336
+ "logits/rejected": -1.8415311574935913,
337
+ "logps/chosen": -45.62999725341797,
338
+ "logps/rejected": -87.85527038574219,
339
+ "loss": 0.4541,
340
+ "rewards/accuracies": 0.4749999940395355,
341
+ "rewards/chosen": 0.7084562182426453,
342
+ "rewards/margins": 0.9553689956665039,
343
+ "rewards/rejected": -0.24691279232501984,
344
+ "step": 170
345
+ },
346
+ {
347
+ "epoch": 2.93,
348
+ "grad_norm": 0.0,
349
+ "learning_rate": 2.6338028169014084e-06,
350
+ "logits/chosen": -1.8475942611694336,
351
+ "logits/rejected": -1.8678725957870483,
352
+ "logps/chosen": -40.53328323364258,
353
+ "logps/rejected": -64.86616516113281,
354
+ "loss": 0.5087,
355
+ "rewards/accuracies": 0.375,
356
+ "rewards/chosen": 0.5022943019866943,
357
+ "rewards/margins": 0.7252141833305359,
358
+ "rewards/rejected": -0.22291991114616394,
359
+ "step": 180
360
+ },
361
+ {
362
+ "epoch": 2.93,
363
+ "eval_logits/chosen": -1.8136398792266846,
364
+ "eval_logits/rejected": -1.8424787521362305,
365
+ "eval_logps/chosen": -48.19547653198242,
366
+ "eval_logps/rejected": -99.7900161743164,
367
+ "eval_loss": 0.4541548192501068,
368
+ "eval_rewards/accuracies": 0.4642857015132904,
369
+ "eval_rewards/chosen": 0.7877826690673828,
370
+ "eval_rewards/margins": 1.0305674076080322,
371
+ "eval_rewards/rejected": -0.24278469383716583,
372
+ "eval_runtime": 8.1397,
373
+ "eval_samples_per_second": 3.44,
374
+ "eval_steps_per_second": 1.72,
375
+ "step": 180
376
+ },
377
+ {
378
+ "epoch": 3.09,
379
+ "grad_norm": 2.1618106365203857,
380
+ "learning_rate": 2.4929577464788734e-06,
381
+ "logits/chosen": -1.876151442527771,
382
+ "logits/rejected": -1.9132931232452393,
383
+ "logps/chosen": -38.02617645263672,
384
+ "logps/rejected": -84.4028549194336,
385
+ "loss": 0.4372,
386
+ "rewards/accuracies": 0.44999998807907104,
387
+ "rewards/chosen": 0.723468005657196,
388
+ "rewards/margins": 1.1590527296066284,
389
+ "rewards/rejected": -0.43558478355407715,
390
+ "step": 190
391
+ },
392
+ {
393
+ "epoch": 3.25,
394
+ "grad_norm": 0.0,
395
+ "learning_rate": 2.352112676056338e-06,
396
+ "logits/chosen": -1.8977773189544678,
397
+ "logits/rejected": -1.9120800495147705,
398
+ "logps/chosen": -38.097923278808594,
399
+ "logps/rejected": -55.17757034301758,
400
+ "loss": 0.4778,
401
+ "rewards/accuracies": 0.375,
402
+ "rewards/chosen": 0.5927585959434509,
403
+ "rewards/margins": 0.8746024370193481,
404
+ "rewards/rejected": -0.2818438410758972,
405
+ "step": 200
406
+ },
407
+ {
408
+ "epoch": 3.41,
409
+ "grad_norm": 7.095726013183594,
410
+ "learning_rate": 2.211267605633803e-06,
411
+ "logits/chosen": -1.8508259057998657,
412
+ "logits/rejected": -1.8876402378082275,
413
+ "logps/chosen": -33.23273468017578,
414
+ "logps/rejected": -79.0272445678711,
415
+ "loss": 0.4874,
416
+ "rewards/accuracies": 0.375,
417
+ "rewards/chosen": 0.5798195600509644,
418
+ "rewards/margins": 0.9200228452682495,
419
+ "rewards/rejected": -0.34020328521728516,
420
+ "step": 210
421
+ },
422
+ {
423
+ "epoch": 3.41,
424
+ "eval_logits/chosen": -1.821912407875061,
425
+ "eval_logits/rejected": -1.8520457744598389,
426
+ "eval_logps/chosen": -47.6314697265625,
427
+ "eval_logps/rejected": -100.92195129394531,
428
+ "eval_loss": 0.4427572786808014,
429
+ "eval_rewards/accuracies": 0.4642857015132904,
430
+ "eval_rewards/chosen": 0.8441829681396484,
431
+ "eval_rewards/margins": 1.2001608610153198,
432
+ "eval_rewards/rejected": -0.35597795248031616,
433
+ "eval_runtime": 8.1451,
434
+ "eval_samples_per_second": 3.438,
435
+ "eval_steps_per_second": 1.719,
436
+ "step": 210
437
+ },
438
+ {
439
+ "epoch": 3.58,
440
+ "grad_norm": 4.825575351715088,
441
+ "learning_rate": 2.0704225352112676e-06,
442
+ "logits/chosen": -1.889478325843811,
443
+ "logits/rejected": -1.9106714725494385,
444
+ "logps/chosen": -30.769512176513672,
445
+ "logps/rejected": -68.92756652832031,
446
+ "loss": 0.5277,
447
+ "rewards/accuracies": 0.32499998807907104,
448
+ "rewards/chosen": 0.379320353269577,
449
+ "rewards/margins": 0.6602964401245117,
450
+ "rewards/rejected": -0.28097596764564514,
451
+ "step": 220
452
+ },
453
+ {
454
+ "epoch": 3.74,
455
+ "grad_norm": 5.236915588378906,
456
+ "learning_rate": 1.9295774647887326e-06,
457
+ "logits/chosen": -1.8926284313201904,
458
+ "logits/rejected": -1.9087079763412476,
459
+ "logps/chosen": -36.48774719238281,
460
+ "logps/rejected": -59.29833221435547,
461
+ "loss": 0.5176,
462
+ "rewards/accuracies": 0.32499998807907104,
463
+ "rewards/chosen": 0.6325365304946899,
464
+ "rewards/margins": 0.867927074432373,
465
+ "rewards/rejected": -0.2353905737400055,
466
+ "step": 230
467
+ },
468
+ {
469
+ "epoch": 3.9,
470
+ "grad_norm": 1.3737443685531616,
471
+ "learning_rate": 1.7887323943661974e-06,
472
+ "logits/chosen": -1.7782018184661865,
473
+ "logits/rejected": -1.8105701208114624,
474
+ "logps/chosen": -41.42538833618164,
475
+ "logps/rejected": -93.73129272460938,
476
+ "loss": 0.4229,
477
+ "rewards/accuracies": 0.4749999940395355,
478
+ "rewards/chosen": 0.8450711369514465,
479
+ "rewards/margins": 1.3813583850860596,
480
+ "rewards/rejected": -0.5362871885299683,
481
+ "step": 240
482
+ },
483
+ {
484
+ "epoch": 3.9,
485
+ "eval_logits/chosen": -1.826602816581726,
486
+ "eval_logits/rejected": -1.8575078248977661,
487
+ "eval_logps/chosen": -47.322914123535156,
488
+ "eval_logps/rejected": -101.7520980834961,
489
+ "eval_loss": 0.4358247220516205,
490
+ "eval_rewards/accuracies": 0.4642857015132904,
491
+ "eval_rewards/chosen": 0.8750395178794861,
492
+ "eval_rewards/margins": 1.3140336275100708,
493
+ "eval_rewards/rejected": -0.4389941692352295,
494
+ "eval_runtime": 8.1403,
495
+ "eval_samples_per_second": 3.44,
496
+ "eval_steps_per_second": 1.72,
497
+ "step": 240
498
+ },
499
+ {
500
+ "epoch": 4.07,
501
+ "grad_norm": 1.977386713027954,
502
+ "learning_rate": 1.647887323943662e-06,
503
+ "logits/chosen": -1.875792145729065,
504
+ "logits/rejected": -1.8937476873397827,
505
+ "logps/chosen": -25.06104278564453,
506
+ "logps/rejected": -46.700584411621094,
507
+ "loss": 0.5274,
508
+ "rewards/accuracies": 0.30000001192092896,
509
+ "rewards/chosen": 0.42553478479385376,
510
+ "rewards/margins": 0.7891250252723694,
511
+ "rewards/rejected": -0.3635903000831604,
512
+ "step": 250
513
+ },
514
+ {
515
+ "epoch": 4.23,
516
+ "grad_norm": 3.320791244506836,
517
+ "learning_rate": 1.5070422535211269e-06,
518
+ "logits/chosen": -1.7908179759979248,
519
+ "logits/rejected": -1.8309694528579712,
520
+ "logps/chosen": -54.056663513183594,
521
+ "logps/rejected": -108.03240966796875,
522
+ "loss": 0.3569,
523
+ "rewards/accuracies": 0.574999988079071,
524
+ "rewards/chosen": 1.07839035987854,
525
+ "rewards/margins": 1.643531084060669,
526
+ "rewards/rejected": -0.5651407837867737,
527
+ "step": 260
528
+ },
529
+ {
530
+ "epoch": 4.39,
531
+ "grad_norm": 4.999856948852539,
532
+ "learning_rate": 1.3661971830985919e-06,
533
+ "logits/chosen": -1.9177863597869873,
534
+ "logits/rejected": -1.9476194381713867,
535
+ "logps/chosen": -22.58294105529785,
536
+ "logps/rejected": -61.99756622314453,
537
+ "loss": 0.5295,
538
+ "rewards/accuracies": 0.2750000059604645,
539
+ "rewards/chosen": 0.5058903694152832,
540
+ "rewards/margins": 0.8186748623847961,
541
+ "rewards/rejected": -0.31278449296951294,
542
+ "step": 270
543
+ },
544
+ {
545
+ "epoch": 4.39,
546
+ "eval_logits/chosen": -1.8289211988449097,
547
+ "eval_logits/rejected": -1.860676646232605,
548
+ "eval_logps/chosen": -47.04714584350586,
549
+ "eval_logps/rejected": -102.3218994140625,
550
+ "eval_loss": 0.43130752444267273,
551
+ "eval_rewards/accuracies": 0.4642857015132904,
552
+ "eval_rewards/chosen": 0.9026166200637817,
553
+ "eval_rewards/margins": 1.3985893726348877,
554
+ "eval_rewards/rejected": -0.4959728717803955,
555
+ "eval_runtime": 8.1397,
556
+ "eval_samples_per_second": 3.44,
557
+ "eval_steps_per_second": 1.72,
558
+ "step": 270
559
+ },
560
+ {
561
+ "epoch": 4.55,
562
+ "grad_norm": 0.36910170316696167,
563
+ "learning_rate": 1.2253521126760565e-06,
564
+ "logits/chosen": -1.881696105003357,
565
+ "logits/rejected": -1.9073266983032227,
566
+ "logps/chosen": -33.28648376464844,
567
+ "logps/rejected": -68.246337890625,
568
+ "loss": 0.4962,
569
+ "rewards/accuracies": 0.32499998807907104,
570
+ "rewards/chosen": 0.5577932000160217,
571
+ "rewards/margins": 1.0508588552474976,
572
+ "rewards/rejected": -0.4930656850337982,
573
+ "step": 280
574
+ },
575
+ {
576
+ "epoch": 4.72,
577
+ "grad_norm": 11.332355499267578,
578
+ "learning_rate": 1.084507042253521e-06,
579
+ "logits/chosen": -1.8866857290267944,
580
+ "logits/rejected": -1.900857925415039,
581
+ "logps/chosen": -40.804874420166016,
582
+ "logps/rejected": -71.67508697509766,
583
+ "loss": 0.4851,
584
+ "rewards/accuracies": 0.4000000059604645,
585
+ "rewards/chosen": 0.7005030512809753,
586
+ "rewards/margins": 1.2071340084075928,
587
+ "rewards/rejected": -0.5066308379173279,
588
+ "step": 290
589
+ },
590
+ {
591
+ "epoch": 4.88,
592
+ "grad_norm": 3.657494306564331,
593
+ "learning_rate": 9.43661971830986e-07,
594
+ "logits/chosen": -1.9023106098175049,
595
+ "logits/rejected": -1.9253908395767212,
596
+ "logps/chosen": -18.57657814025879,
597
+ "logps/rejected": -53.88740158081055,
598
+ "loss": 0.5466,
599
+ "rewards/accuracies": 0.25,
600
+ "rewards/chosen": 0.3226935565471649,
601
+ "rewards/margins": 0.6567031145095825,
602
+ "rewards/rejected": -0.33400958776474,
603
+ "step": 300
604
+ },
605
+ {
606
+ "epoch": 4.88,
607
+ "eval_logits/chosen": -1.8308794498443604,
608
+ "eval_logits/rejected": -1.8629435300827026,
609
+ "eval_logps/chosen": -46.95443344116211,
610
+ "eval_logps/rejected": -102.74605560302734,
611
+ "eval_loss": 0.4291366934776306,
612
+ "eval_rewards/accuracies": 0.4642857015132904,
613
+ "eval_rewards/chosen": 0.9118875861167908,
614
+ "eval_rewards/margins": 1.4502772092819214,
615
+ "eval_rewards/rejected": -0.5383896827697754,
616
+ "eval_runtime": 8.1441,
617
+ "eval_samples_per_second": 3.438,
618
+ "eval_steps_per_second": 1.719,
619
+ "step": 300
620
+ },
621
+ {
622
+ "epoch": 5.04,
623
+ "grad_norm": 4.444954872131348,
624
+ "learning_rate": 8.028169014084508e-07,
625
+ "logits/chosen": -1.835021734237671,
626
+ "logits/rejected": -1.858599066734314,
627
+ "logps/chosen": -42.14970016479492,
628
+ "logps/rejected": -86.938720703125,
629
+ "loss": 0.4128,
630
+ "rewards/accuracies": 0.4749999940395355,
631
+ "rewards/chosen": 0.8941621780395508,
632
+ "rewards/margins": 1.572546362876892,
633
+ "rewards/rejected": -0.6783844232559204,
634
+ "step": 310
635
+ },
636
+ {
637
+ "epoch": 5.2,
638
+ "grad_norm": 0.4182775616645813,
639
+ "learning_rate": 6.619718309859155e-07,
640
+ "logits/chosen": -1.8859401941299438,
641
+ "logits/rejected": -1.910548448562622,
642
+ "logps/chosen": -34.28424835205078,
643
+ "logps/rejected": -77.3191146850586,
644
+ "loss": 0.4465,
645
+ "rewards/accuracies": 0.4000000059604645,
646
+ "rewards/chosen": 0.739007294178009,
647
+ "rewards/margins": 1.3678598403930664,
648
+ "rewards/rejected": -0.6288524866104126,
649
+ "step": 320
650
+ },
651
+ {
652
+ "epoch": 5.37,
653
+ "grad_norm": 2.8709957859973656e-06,
654
+ "learning_rate": 5.211267605633803e-07,
655
+ "logits/chosen": -1.7752397060394287,
656
+ "logits/rejected": -1.8195409774780273,
657
+ "logps/chosen": -42.48664855957031,
658
+ "logps/rejected": -97.59371185302734,
659
+ "loss": 0.4339,
660
+ "rewards/accuracies": 0.5,
661
+ "rewards/chosen": 0.8835798501968384,
662
+ "rewards/margins": 1.3967663049697876,
663
+ "rewards/rejected": -0.513186514377594,
664
+ "step": 330
665
+ },
666
+ {
667
+ "epoch": 5.37,
668
+ "eval_logits/chosen": -1.8319826126098633,
669
+ "eval_logits/rejected": -1.864353895187378,
670
+ "eval_logps/chosen": -46.921607971191406,
671
+ "eval_logps/rejected": -103.26231384277344,
672
+ "eval_loss": 0.42683711647987366,
673
+ "eval_rewards/accuracies": 0.4642857015132904,
674
+ "eval_rewards/chosen": 0.9151698350906372,
675
+ "eval_rewards/margins": 1.505185842514038,
676
+ "eval_rewards/rejected": -0.5900159478187561,
677
+ "eval_runtime": 8.1406,
678
+ "eval_samples_per_second": 3.44,
679
+ "eval_steps_per_second": 1.72,
680
+ "step": 330
681
+ },
682
+ {
683
+ "epoch": 5.53,
684
+ "grad_norm": 0.23427560925483704,
685
+ "learning_rate": 3.8028169014084507e-07,
686
+ "logits/chosen": -1.8918192386627197,
687
+ "logits/rejected": -1.9169318675994873,
688
+ "logps/chosen": -35.16785430908203,
689
+ "logps/rejected": -71.60049438476562,
690
+ "loss": 0.4617,
691
+ "rewards/accuracies": 0.375,
692
+ "rewards/chosen": 0.5324742197990417,
693
+ "rewards/margins": 1.1221383810043335,
694
+ "rewards/rejected": -0.5896641612052917,
695
+ "step": 340
696
+ },
697
+ {
698
+ "epoch": 5.69,
699
+ "grad_norm": 0.0,
700
+ "learning_rate": 2.394366197183099e-07,
701
+ "logits/chosen": -1.857642412185669,
702
+ "logits/rejected": -1.888279676437378,
703
+ "logps/chosen": -37.31398010253906,
704
+ "logps/rejected": -90.64387512207031,
705
+ "loss": 0.4569,
706
+ "rewards/accuracies": 0.4000000059604645,
707
+ "rewards/chosen": 0.7042752504348755,
708
+ "rewards/margins": 1.4055907726287842,
709
+ "rewards/rejected": -0.7013154625892639,
710
+ "step": 350
711
+ },
712
+ {
713
+ "epoch": 5.85,
714
+ "grad_norm": 11.415884017944336,
715
+ "learning_rate": 9.859154929577466e-08,
716
+ "logits/chosen": -1.9216959476470947,
717
+ "logits/rejected": -1.9308369159698486,
718
+ "logps/chosen": -25.689884185791016,
719
+ "logps/rejected": -36.790706634521484,
720
+ "loss": 0.5438,
721
+ "rewards/accuracies": 0.25,
722
+ "rewards/chosen": 0.4890199303627014,
723
+ "rewards/margins": 0.789040207862854,
724
+ "rewards/rejected": -0.3000202775001526,
725
+ "step": 360
726
+ },
727
+ {
728
+ "epoch": 5.85,
729
+ "eval_logits/chosen": -1.832722544670105,
730
+ "eval_logits/rejected": -1.8652076721191406,
731
+ "eval_logps/chosen": -46.90084457397461,
732
+ "eval_logps/rejected": -103.44039154052734,
733
+ "eval_loss": 0.42603132128715515,
734
+ "eval_rewards/accuracies": 0.4642857015132904,
735
+ "eval_rewards/chosen": 0.9172464609146118,
736
+ "eval_rewards/margins": 1.5250685214996338,
737
+ "eval_rewards/rejected": -0.6078222990036011,
738
+ "eval_runtime": 8.1439,
739
+ "eval_samples_per_second": 3.438,
740
+ "eval_steps_per_second": 1.719,
741
+ "step": 360
742
+ }
743
+ ],
744
+ "logging_steps": 10,
745
+ "max_steps": 366,
746
+ "num_input_tokens_seen": 0,
747
+ "num_train_epochs": 6,
748
+ "save_steps": 90,
749
+ "total_flos": 0.0,
750
+ "train_batch_size": 2,
751
+ "trial_name": null,
752
+ "trial_params": null
753
+ }
checkpoint-360/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de569ad7b35da22832fd1fc395ab1c110ac622b038bdfcb0eee757cdc5b4b97b
3
+ size 5304
checkpoint-90/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ base_model: Weni/WeniGPT-Agents-Mistral-1.0.6-SFT-merged
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.10.0
checkpoint-90/adapter_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "Weni/WeniGPT-Agents-Mistral-1.0.6-SFT-merged",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.05,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "v_proj",
24
+ "q_proj"
25
+ ],
26
+ "task_type": "CAUSAL_LM",
27
+ "use_dora": false,
28
+ "use_rslora": false
29
+ }
checkpoint-90/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc1fb6f11ff592b65215364276acd01c77abf99204619c630325cab8c9b35c14
3
+ size 13648432
checkpoint-90/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b2425ba8d1c50c4c0d9e8e6feff1c707824fe534b70de713352a9d5a204adff
3
+ size 27370618
checkpoint-90/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b3ee827a7a00012c0a116546df467feee35e70376d81a7a85b1a70eb90414d3
3
+ size 14244
checkpoint-90/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d85fbb3ecde61d79df3a528f5e5b18350d9bf186a9590415f6ca273953853e9d
3
+ size 1064
checkpoint-90/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<unk>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
checkpoint-90/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-90/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
checkpoint-90/tokenizer_config.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "additional_special_tokens": [],
31
+ "bos_token": "<s>",
32
+ "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' + eos_token }}{% endif %}{% endfor %}",
33
+ "clean_up_tokenization_spaces": false,
34
+ "eos_token": "</s>",
35
+ "legacy": true,
36
+ "max_lenght": 8192,
37
+ "max_length": 8192,
38
+ "model_max_length": 1000000000000000019884624838656,
39
+ "pad_token": "<unk>",
40
+ "padding": true,
41
+ "sp_model_kwargs": {},
42
+ "spaces_between_special_tokens": false,
43
+ "stride": 0,
44
+ "tokenizer_class": "LlamaTokenizer",
45
+ "truncation_side": "right",
46
+ "truncation_strategy": "longest_first",
47
+ "unk_token": "<unk>",
48
+ "use_default_system_prompt": false
49
+ }
checkpoint-90/trainer_state.json ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.529485821723938,
3
+ "best_model_checkpoint": "./mistral/20-04-24-Weni-WeniGPT-Agents-Mistral-1.0.6-SFT-1.0.5-DPO_Experiment on DPO with other hyperparameters and best SFT model of WeniGPT-2_max_steps-366_batch_4_2024-04-20_ppid_9/checkpoint-90",
4
+ "epoch": 1.4634146341463414,
5
+ "eval_steps": 30,
6
+ "global_step": 90,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.16,
13
+ "grad_norm": 8.378021240234375,
14
+ "learning_rate": 4.0909090909090915e-06,
15
+ "logits/chosen": -1.830958604812622,
16
+ "logits/rejected": -1.8507845401763916,
17
+ "logps/chosen": -28.701984405517578,
18
+ "logps/rejected": -54.28569793701172,
19
+ "loss": 0.6924,
20
+ "rewards/accuracies": 0.20000000298023224,
21
+ "rewards/chosen": 0.0008967495523393154,
22
+ "rewards/margins": 0.0014666033675894141,
23
+ "rewards/rejected": -0.0005698538152500987,
24
+ "step": 10
25
+ },
26
+ {
27
+ "epoch": 0.33,
28
+ "grad_norm": 5.193418502807617,
29
+ "learning_rate": 4.887323943661972e-06,
30
+ "logits/chosen": -1.7550897598266602,
31
+ "logits/rejected": -1.770708680152893,
32
+ "logps/chosen": -47.344207763671875,
33
+ "logps/rejected": -64.0368423461914,
34
+ "loss": 0.6852,
35
+ "rewards/accuracies": 0.4000000059604645,
36
+ "rewards/chosen": 0.017231885343790054,
37
+ "rewards/margins": 0.01606021076440811,
38
+ "rewards/rejected": 0.0011716745793819427,
39
+ "step": 20
40
+ },
41
+ {
42
+ "epoch": 0.49,
43
+ "grad_norm": 7.308932304382324,
44
+ "learning_rate": 4.746478873239437e-06,
45
+ "logits/chosen": -1.781267762184143,
46
+ "logits/rejected": -1.8114898204803467,
47
+ "logps/chosen": -54.274559020996094,
48
+ "logps/rejected": -95.20500183105469,
49
+ "loss": 0.6635,
50
+ "rewards/accuracies": 0.5,
51
+ "rewards/chosen": 0.0641159638762474,
52
+ "rewards/margins": 0.061691801995038986,
53
+ "rewards/rejected": 0.0024241588544100523,
54
+ "step": 30
55
+ },
56
+ {
57
+ "epoch": 0.49,
58
+ "eval_logits/chosen": -1.7831767797470093,
59
+ "eval_logits/rejected": -1.8043663501739502,
60
+ "eval_logps/chosen": -55.16960906982422,
61
+ "eval_logps/rejected": -97.32585144042969,
62
+ "eval_loss": 0.6523757576942444,
63
+ "eval_rewards/accuracies": 0.4642857015132904,
64
+ "eval_rewards/chosen": 0.09036973863840103,
65
+ "eval_rewards/margins": 0.08673857897520065,
66
+ "eval_rewards/rejected": 0.0036311547737568617,
67
+ "eval_runtime": 8.141,
68
+ "eval_samples_per_second": 3.439,
69
+ "eval_steps_per_second": 1.72,
70
+ "step": 30
71
+ },
72
+ {
73
+ "epoch": 0.65,
74
+ "grad_norm": 0.0,
75
+ "learning_rate": 4.6056338028169015e-06,
76
+ "logits/chosen": -1.889905333518982,
77
+ "logits/rejected": -1.9024461507797241,
78
+ "logps/chosen": -27.918941497802734,
79
+ "logps/rejected": -42.093284606933594,
80
+ "loss": 0.668,
81
+ "rewards/accuracies": 0.25,
82
+ "rewards/chosen": 0.054457180202007294,
83
+ "rewards/margins": 0.0539846234023571,
84
+ "rewards/rejected": 0.0004725646285805851,
85
+ "step": 40
86
+ },
87
+ {
88
+ "epoch": 0.81,
89
+ "grad_norm": 8.53225326538086,
90
+ "learning_rate": 4.464788732394367e-06,
91
+ "logits/chosen": -1.8278567790985107,
92
+ "logits/rejected": -1.849957823753357,
93
+ "logps/chosen": -43.8238639831543,
94
+ "logps/rejected": -68.02179718017578,
95
+ "loss": 0.6358,
96
+ "rewards/accuracies": 0.3499999940395355,
97
+ "rewards/chosen": 0.13941256701946259,
98
+ "rewards/margins": 0.13133978843688965,
99
+ "rewards/rejected": 0.008072790689766407,
100
+ "step": 50
101
+ },
102
+ {
103
+ "epoch": 0.98,
104
+ "grad_norm": 9.436968803405762,
105
+ "learning_rate": 4.3239436619718315e-06,
106
+ "logits/chosen": -1.805991768836975,
107
+ "logits/rejected": -1.8437427282333374,
108
+ "logps/chosen": -43.8873291015625,
109
+ "logps/rejected": -95.2943115234375,
110
+ "loss": 0.6026,
111
+ "rewards/accuracies": 0.44999998807907104,
112
+ "rewards/chosen": 0.18793432414531708,
113
+ "rewards/margins": 0.21308371424674988,
114
+ "rewards/rejected": -0.025149401277303696,
115
+ "step": 60
116
+ },
117
+ {
118
+ "epoch": 0.98,
119
+ "eval_logits/chosen": -1.7877694368362427,
120
+ "eval_logits/rejected": -1.8098936080932617,
121
+ "eval_logps/chosen": -53.567203521728516,
122
+ "eval_logps/rejected": -97.33795928955078,
123
+ "eval_loss": 0.5890871286392212,
124
+ "eval_rewards/accuracies": 0.4642857015132904,
125
+ "eval_rewards/chosen": 0.25061002373695374,
126
+ "eval_rewards/margins": 0.2481890469789505,
127
+ "eval_rewards/rejected": 0.002420984674245119,
128
+ "eval_runtime": 8.1404,
129
+ "eval_samples_per_second": 3.44,
130
+ "eval_steps_per_second": 1.72,
131
+ "step": 60
132
+ },
133
+ {
134
+ "epoch": 1.14,
135
+ "grad_norm": 0.0,
136
+ "learning_rate": 4.183098591549296e-06,
137
+ "logits/chosen": -1.8344879150390625,
138
+ "logits/rejected": -1.8489716053009033,
139
+ "logps/chosen": -40.38930892944336,
140
+ "logps/rejected": -60.9084358215332,
141
+ "loss": 0.6031,
142
+ "rewards/accuracies": 0.375,
143
+ "rewards/chosen": 0.19739331305027008,
144
+ "rewards/margins": 0.22638121247291565,
145
+ "rewards/rejected": -0.028987903147935867,
146
+ "step": 70
147
+ },
148
+ {
149
+ "epoch": 1.3,
150
+ "grad_norm": 5.49536657333374,
151
+ "learning_rate": 4.042253521126761e-06,
152
+ "logits/chosen": -1.7903095483779907,
153
+ "logits/rejected": -1.8362411260604858,
154
+ "logps/chosen": -44.288116455078125,
155
+ "logps/rejected": -90.21073913574219,
156
+ "loss": 0.5357,
157
+ "rewards/accuracies": 0.4749999940395355,
158
+ "rewards/chosen": 0.34061312675476074,
159
+ "rewards/margins": 0.40679749846458435,
160
+ "rewards/rejected": -0.06618441641330719,
161
+ "step": 80
162
+ },
163
+ {
164
+ "epoch": 1.46,
165
+ "grad_norm": 13.401692390441895,
166
+ "learning_rate": 3.901408450704225e-06,
167
+ "logits/chosen": -1.8004281520843506,
168
+ "logits/rejected": -1.8247934579849243,
169
+ "logps/chosen": -42.32465362548828,
170
+ "logps/rejected": -70.9749984741211,
171
+ "loss": 0.5387,
172
+ "rewards/accuracies": 0.4749999940395355,
173
+ "rewards/chosen": 0.3678433299064636,
174
+ "rewards/margins": 0.4186524450778961,
175
+ "rewards/rejected": -0.05080908536911011,
176
+ "step": 90
177
+ },
178
+ {
179
+ "epoch": 1.46,
180
+ "eval_logits/chosen": -1.7943389415740967,
181
+ "eval_logits/rejected": -1.8181126117706299,
182
+ "eval_logps/chosen": -51.677486419677734,
183
+ "eval_logps/rejected": -97.63689422607422,
184
+ "eval_loss": 0.529485821723938,
185
+ "eval_rewards/accuracies": 0.4642857015132904,
186
+ "eval_rewards/chosen": 0.4395819306373596,
187
+ "eval_rewards/margins": 0.4670555889606476,
188
+ "eval_rewards/rejected": -0.027473628520965576,
189
+ "eval_runtime": 8.1412,
190
+ "eval_samples_per_second": 3.439,
191
+ "eval_steps_per_second": 1.72,
192
+ "step": 90
193
+ }
194
+ ],
195
+ "logging_steps": 10,
196
+ "max_steps": 366,
197
+ "num_input_tokens_seen": 0,
198
+ "num_train_epochs": 6,
199
+ "save_steps": 90,
200
+ "total_flos": 0.0,
201
+ "train_batch_size": 2,
202
+ "trial_name": null,
203
+ "trial_params": null
204
+ }
checkpoint-90/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de569ad7b35da22832fd1fc395ab1c110ac622b038bdfcb0eee757cdc5b4b97b
3
+ size 5304