Upload folder using huggingface_hub
Browse files- README.md +62 -54
- checkpoint-180/README.md +202 -0
- checkpoint-180/adapter_config.json +29 -0
- checkpoint-180/adapter_model.safetensors +3 -0
- checkpoint-180/optimizer.pt +3 -0
- checkpoint-180/rng_state.pth +3 -0
- checkpoint-180/scheduler.pt +3 -0
- checkpoint-180/special_tokens_map.json +24 -0
- checkpoint-180/tokenizer.json +0 -0
- checkpoint-180/tokenizer.model +3 -0
- checkpoint-180/tokenizer_config.json +49 -0
- checkpoint-180/trainer_state.json +387 -0
- checkpoint-180/training_args.bin +3 -0
- checkpoint-270/README.md +202 -0
- checkpoint-270/adapter_config.json +29 -0
- checkpoint-270/adapter_model.safetensors +3 -0
- checkpoint-270/optimizer.pt +3 -0
- checkpoint-270/rng_state.pth +3 -0
- checkpoint-270/scheduler.pt +3 -0
- checkpoint-270/special_tokens_map.json +24 -0
- checkpoint-270/tokenizer.json +0 -0
- checkpoint-270/tokenizer.model +3 -0
- checkpoint-270/tokenizer_config.json +49 -0
- checkpoint-270/trainer_state.json +570 -0
- checkpoint-270/training_args.bin +3 -0
- checkpoint-360/README.md +202 -0
- checkpoint-360/adapter_config.json +29 -0
- checkpoint-360/adapter_model.safetensors +3 -0
- checkpoint-360/optimizer.pt +3 -0
- checkpoint-360/rng_state.pth +3 -0
- checkpoint-360/scheduler.pt +3 -0
- checkpoint-360/special_tokens_map.json +24 -0
- checkpoint-360/tokenizer.json +0 -0
- checkpoint-360/tokenizer.model +3 -0
- checkpoint-360/tokenizer_config.json +49 -0
- checkpoint-360/trainer_state.json +753 -0
- checkpoint-360/training_args.bin +3 -0
- checkpoint-90/README.md +202 -0
- checkpoint-90/adapter_config.json +29 -0
- checkpoint-90/adapter_model.safetensors +3 -0
- checkpoint-90/optimizer.pt +3 -0
- checkpoint-90/rng_state.pth +3 -0
- checkpoint-90/scheduler.pt +3 -0
- checkpoint-90/special_tokens_map.json +24 -0
- checkpoint-90/tokenizer.json +0 -0
- checkpoint-90/tokenizer.model +3 -0
- checkpoint-90/tokenizer_config.json +49 -0
- checkpoint-90/trainer_state.json +204 -0
- checkpoint-90/training_args.bin +3 -0
README.md
CHANGED
@@ -1,83 +1,91 @@
|
|
1 |
---
|
2 |
-
|
|
|
3 |
tags:
|
4 |
-
-
|
5 |
-
-
|
6 |
-
- generated_from_trainer
|
7 |
base_model: Weni/WeniGPT-Agents-Mistral-1.0.6-SFT-merged
|
8 |
model-index:
|
9 |
-
- name: WeniGPT-Agents-Mistral-1.0.6-SFT-1.0.5-DPO
|
10 |
results: []
|
|
|
11 |
---
|
12 |
|
13 |
-
|
14 |
-
should probably proofread and complete it, then remove this comment. -->
|
15 |
|
16 |
-
|
|
|
17 |
|
18 |
-
This model is a fine-tuned version of [Weni/WeniGPT-Agents-Mistral-1.0.6-SFT-merged](https://huggingface.co/Weni/WeniGPT-Agents-Mistral-1.0.6-SFT-merged) on an unknown dataset.
|
19 |
It achieves the following results on the evaluation set:
|
20 |
-
-
|
21 |
-
- Rewards/chosen: 0.9172
|
22 |
-
- Rewards/rejected: -0.6078
|
23 |
-
- Rewards/accuracies: 0.4643
|
24 |
-
- Rewards/margins: 1.5251
|
25 |
-
- Logps/rejected: -103.4404
|
26 |
-
- Logps/chosen: -46.9008
|
27 |
-
- Logits/rejected: -1.8652
|
28 |
-
- Logits/chosen: -1.8327
|
29 |
|
30 |
-
##
|
31 |
|
32 |
-
|
33 |
|
34 |
-
##
|
35 |
|
36 |
-
|
37 |
|
38 |
-
|
|
|
|
|
|
|
|
|
39 |
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
-
|
|
|
|
|
|
|
43 |
|
44 |
### Training hyperparameters
|
45 |
|
46 |
The following hyperparameters were used during training:
|
47 |
- learning_rate: 5e-06
|
48 |
-
-
|
49 |
-
-
|
50 |
-
- seed: 42
|
51 |
- gradient_accumulation_steps: 2
|
|
|
52 |
- total_train_batch_size: 4
|
53 |
-
- optimizer:
|
54 |
-
- lr_scheduler_type:
|
55 |
-
-
|
56 |
-
-
|
57 |
-
-
|
58 |
|
59 |
### Training results
|
60 |
|
61 |
-
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
62 |
-
|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
63 |
-
| 0.6635 | 0.49 | 30 | 0.6524 | 0.0904 | 0.0036 | 0.4643 | 0.0867 | -97.3259 | -55.1696 | -1.8044 | -1.7832 |
|
64 |
-
| 0.6026 | 0.98 | 60 | 0.5891 | 0.2506 | 0.0024 | 0.4643 | 0.2482 | -97.3380 | -53.5672 | -1.8099 | -1.7878 |
|
65 |
-
| 0.5387 | 1.46 | 90 | 0.5295 | 0.4396 | -0.0275 | 0.4643 | 0.4671 | -97.6369 | -51.6775 | -1.8181 | -1.7943 |
|
66 |
-
| 0.6033 | 1.95 | 120 | 0.4960 | 0.5751 | -0.0659 | 0.4643 | 0.6410 | -98.0210 | -50.3219 | -1.8261 | -1.8009 |
|
67 |
-
| 0.5042 | 2.44 | 150 | 0.4709 | 0.6967 | -0.1479 | 0.4643 | 0.8446 | -98.8407 | -49.1060 | -1.8331 | -1.8059 |
|
68 |
-
| 0.5087 | 2.93 | 180 | 0.4542 | 0.7878 | -0.2428 | 0.4643 | 1.0306 | -99.7900 | -48.1955 | -1.8425 | -1.8136 |
|
69 |
-
| 0.4874 | 3.41 | 210 | 0.4428 | 0.8442 | -0.3560 | 0.4643 | 1.2002 | -100.9220 | -47.6315 | -1.8520 | -1.8219 |
|
70 |
-
| 0.4229 | 3.9 | 240 | 0.4358 | 0.8750 | -0.4390 | 0.4643 | 1.3140 | -101.7521 | -47.3229 | -1.8575 | -1.8266 |
|
71 |
-
| 0.5295 | 4.39 | 270 | 0.4313 | 0.9026 | -0.4960 | 0.4643 | 1.3986 | -102.3219 | -47.0471 | -1.8607 | -1.8289 |
|
72 |
-
| 0.5466 | 4.88 | 300 | 0.4291 | 0.9119 | -0.5384 | 0.4643 | 1.4503 | -102.7461 | -46.9544 | -1.8629 | -1.8309 |
|
73 |
-
| 0.4339 | 5.37 | 330 | 0.4268 | 0.9152 | -0.5900 | 0.4643 | 1.5052 | -103.2623 | -46.9216 | -1.8644 | -1.8320 |
|
74 |
-
| 0.5438 | 5.85 | 360 | 0.4260 | 0.9172 | -0.6078 | 0.4643 | 1.5251 | -103.4404 | -46.9008 | -1.8652 | -1.8327 |
|
75 |
-
|
76 |
-
|
77 |
### Framework versions
|
78 |
|
79 |
-
-
|
80 |
-
-
|
81 |
-
-
|
82 |
-
-
|
83 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
license: mit
|
3 |
+
library_name: "trl"
|
4 |
tags:
|
5 |
+
- DPO
|
6 |
+
- WeniGPT
|
|
|
7 |
base_model: Weni/WeniGPT-Agents-Mistral-1.0.6-SFT-merged
|
8 |
model-index:
|
9 |
+
- name: Weni/WeniGPT-Agents-Mistral-1.0.6-SFT-1.0.5-DPO
|
10 |
results: []
|
11 |
+
language: ['pt']
|
12 |
---
|
13 |
|
14 |
+
# Weni/WeniGPT-Agents-Mistral-1.0.6-SFT-1.0.5-DPO
|
|
|
15 |
|
16 |
+
This model is a fine-tuned version of [Weni/WeniGPT-Agents-Mistral-1.0.6-SFT-merged] on the dataset Weni/wenigpt-agent-dpo-1.0.0 with the DPO trainer. It is part of the WeniGPT project for [Weni](https://weni.ai/).
|
17 |
+
Description: Experiment on DPO with other hyperparameters and best SFT model of WeniGPT
|
18 |
|
|
|
19 |
It achieves the following results on the evaluation set:
|
20 |
+
{'eval_loss': 0.42603132128715515, 'eval_runtime': 8.1364, 'eval_samples_per_second': 3.441, 'eval_steps_per_second': 1.721, 'eval_rewards/chosen': 0.9172464609146118, 'eval_rewards/rejected': -0.6078222990036011, 'eval_rewards/accuracies': 0.4642857015132904, 'eval_rewards/margins': 1.5250685214996338, 'eval_logps/rejected': -103.44039154052734, 'eval_logps/chosen': -46.90084457397461, 'eval_logits/rejected': -1.8652076721191406, 'eval_logits/chosen': -1.832722544670105, 'epoch': 5.95}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
+
## Intended uses & limitations
|
23 |
|
24 |
+
This model has not been trained to avoid specific intructions.
|
25 |
|
26 |
+
## Training procedure
|
27 |
|
28 |
+
Finetuning was done on the model Weni/WeniGPT-Agents-Mistral-1.0.6-SFT-merged with the following prompt:
|
29 |
|
30 |
+
```
|
31 |
+
---------------------
|
32 |
+
System_prompt:
|
33 |
+
Agora você se chama {name}, você é {occupation} e seu objetivo é {chatbot_goal}. O adjetivo que mais define a sua personalidade é {adjective} e você se comporta da seguinte forma:
|
34 |
+
{instructions_formatted}
|
35 |
|
36 |
+
{context_statement}
|
37 |
+
|
38 |
+
Lista de requisitos:
|
39 |
+
- Responda de forma natural, mas nunca fale sobre um assunto fora do contexto.
|
40 |
+
- Nunca traga informações do seu próprio conhecimento.
|
41 |
+
- Repito é crucial que você responda usando apenas informações do contexto.
|
42 |
+
- Nunca mencione o contexto fornecido.
|
43 |
+
- Nunca mencione a pergunta fornecida.
|
44 |
+
- Gere a resposta mais útil possível para a pergunta usando informações do conexto acima.
|
45 |
+
- Nunca elabore sobre o porque e como você fez a tarefa, apenas responda.
|
46 |
|
47 |
+
|
48 |
+
---------------------
|
49 |
+
|
50 |
+
```
|
51 |
|
52 |
### Training hyperparameters
|
53 |
|
54 |
The following hyperparameters were used during training:
|
55 |
- learning_rate: 5e-06
|
56 |
+
- per_device_train_batch_size: 2
|
57 |
+
- per_device_eval_batch_size: 2
|
|
|
58 |
- gradient_accumulation_steps: 2
|
59 |
+
- num_gpus: 1
|
60 |
- total_train_batch_size: 4
|
61 |
+
- optimizer: AdamW
|
62 |
+
- lr_scheduler_type: cosine
|
63 |
+
- num_steps: 366
|
64 |
+
- quantization_type: bitsandbytes
|
65 |
+
- LoRA: ("\n - bits: 4\n - use_exllama: True\n - device_map: auto\n - use_cache: False\n - lora_r: 8\n - lora_alpha: 16\n - lora_dropout: 0.05\n - bias: none\n - target_modules: ['v_proj', 'q_proj']\n - task_type: CAUSAL_LM",)
|
66 |
|
67 |
### Training results
|
68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
### Framework versions
|
70 |
|
71 |
+
- transformers==4.38.2
|
72 |
+
- datasets==2.18.0
|
73 |
+
- peft==0.10.0
|
74 |
+
- safetensors==0.4.2
|
75 |
+
- evaluate==0.4.1
|
76 |
+
- bitsandbytes==0.43
|
77 |
+
- huggingface_hub==0.22.2
|
78 |
+
- seqeval==1.2.2
|
79 |
+
- optimum==1.18.1
|
80 |
+
- auto-gptq==0.7.1
|
81 |
+
- gpustat==1.1.1
|
82 |
+
- deepspeed==0.14.0
|
83 |
+
- wandb==0.16.6
|
84 |
+
- trl==0.8.1
|
85 |
+
- accelerate==0.29.2
|
86 |
+
- coloredlogs==15.0.1
|
87 |
+
- traitlets==5.14.2
|
88 |
+
- autoawq@https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.4/autoawq-0.2.4+cu118-cp310-cp310-linux_x86_64.whl
|
89 |
+
|
90 |
+
### Hardware
|
91 |
+
- Cloud provided: runpod.io
|
checkpoint-180/README.md
ADDED
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: peft
|
3 |
+
base_model: Weni/WeniGPT-Agents-Mistral-1.0.6-SFT-merged
|
4 |
+
---
|
5 |
+
|
6 |
+
# Model Card for Model ID
|
7 |
+
|
8 |
+
<!-- Provide a quick summary of what the model is/does. -->
|
9 |
+
|
10 |
+
|
11 |
+
|
12 |
+
## Model Details
|
13 |
+
|
14 |
+
### Model Description
|
15 |
+
|
16 |
+
<!-- Provide a longer summary of what this model is. -->
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
- **Developed by:** [More Information Needed]
|
21 |
+
- **Funded by [optional]:** [More Information Needed]
|
22 |
+
- **Shared by [optional]:** [More Information Needed]
|
23 |
+
- **Model type:** [More Information Needed]
|
24 |
+
- **Language(s) (NLP):** [More Information Needed]
|
25 |
+
- **License:** [More Information Needed]
|
26 |
+
- **Finetuned from model [optional]:** [More Information Needed]
|
27 |
+
|
28 |
+
### Model Sources [optional]
|
29 |
+
|
30 |
+
<!-- Provide the basic links for the model. -->
|
31 |
+
|
32 |
+
- **Repository:** [More Information Needed]
|
33 |
+
- **Paper [optional]:** [More Information Needed]
|
34 |
+
- **Demo [optional]:** [More Information Needed]
|
35 |
+
|
36 |
+
## Uses
|
37 |
+
|
38 |
+
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
|
39 |
+
|
40 |
+
### Direct Use
|
41 |
+
|
42 |
+
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
|
43 |
+
|
44 |
+
[More Information Needed]
|
45 |
+
|
46 |
+
### Downstream Use [optional]
|
47 |
+
|
48 |
+
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
|
49 |
+
|
50 |
+
[More Information Needed]
|
51 |
+
|
52 |
+
### Out-of-Scope Use
|
53 |
+
|
54 |
+
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
|
55 |
+
|
56 |
+
[More Information Needed]
|
57 |
+
|
58 |
+
## Bias, Risks, and Limitations
|
59 |
+
|
60 |
+
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|
61 |
+
|
62 |
+
[More Information Needed]
|
63 |
+
|
64 |
+
### Recommendations
|
65 |
+
|
66 |
+
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
|
67 |
+
|
68 |
+
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
|
69 |
+
|
70 |
+
## How to Get Started with the Model
|
71 |
+
|
72 |
+
Use the code below to get started with the model.
|
73 |
+
|
74 |
+
[More Information Needed]
|
75 |
+
|
76 |
+
## Training Details
|
77 |
+
|
78 |
+
### Training Data
|
79 |
+
|
80 |
+
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
|
81 |
+
|
82 |
+
[More Information Needed]
|
83 |
+
|
84 |
+
### Training Procedure
|
85 |
+
|
86 |
+
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
|
87 |
+
|
88 |
+
#### Preprocessing [optional]
|
89 |
+
|
90 |
+
[More Information Needed]
|
91 |
+
|
92 |
+
|
93 |
+
#### Training Hyperparameters
|
94 |
+
|
95 |
+
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
|
96 |
+
|
97 |
+
#### Speeds, Sizes, Times [optional]
|
98 |
+
|
99 |
+
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
|
100 |
+
|
101 |
+
[More Information Needed]
|
102 |
+
|
103 |
+
## Evaluation
|
104 |
+
|
105 |
+
<!-- This section describes the evaluation protocols and provides the results. -->
|
106 |
+
|
107 |
+
### Testing Data, Factors & Metrics
|
108 |
+
|
109 |
+
#### Testing Data
|
110 |
+
|
111 |
+
<!-- This should link to a Dataset Card if possible. -->
|
112 |
+
|
113 |
+
[More Information Needed]
|
114 |
+
|
115 |
+
#### Factors
|
116 |
+
|
117 |
+
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
|
118 |
+
|
119 |
+
[More Information Needed]
|
120 |
+
|
121 |
+
#### Metrics
|
122 |
+
|
123 |
+
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
|
124 |
+
|
125 |
+
[More Information Needed]
|
126 |
+
|
127 |
+
### Results
|
128 |
+
|
129 |
+
[More Information Needed]
|
130 |
+
|
131 |
+
#### Summary
|
132 |
+
|
133 |
+
|
134 |
+
|
135 |
+
## Model Examination [optional]
|
136 |
+
|
137 |
+
<!-- Relevant interpretability work for the model goes here -->
|
138 |
+
|
139 |
+
[More Information Needed]
|
140 |
+
|
141 |
+
## Environmental Impact
|
142 |
+
|
143 |
+
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
|
144 |
+
|
145 |
+
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
|
146 |
+
|
147 |
+
- **Hardware Type:** [More Information Needed]
|
148 |
+
- **Hours used:** [More Information Needed]
|
149 |
+
- **Cloud Provider:** [More Information Needed]
|
150 |
+
- **Compute Region:** [More Information Needed]
|
151 |
+
- **Carbon Emitted:** [More Information Needed]
|
152 |
+
|
153 |
+
## Technical Specifications [optional]
|
154 |
+
|
155 |
+
### Model Architecture and Objective
|
156 |
+
|
157 |
+
[More Information Needed]
|
158 |
+
|
159 |
+
### Compute Infrastructure
|
160 |
+
|
161 |
+
[More Information Needed]
|
162 |
+
|
163 |
+
#### Hardware
|
164 |
+
|
165 |
+
[More Information Needed]
|
166 |
+
|
167 |
+
#### Software
|
168 |
+
|
169 |
+
[More Information Needed]
|
170 |
+
|
171 |
+
## Citation [optional]
|
172 |
+
|
173 |
+
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
|
174 |
+
|
175 |
+
**BibTeX:**
|
176 |
+
|
177 |
+
[More Information Needed]
|
178 |
+
|
179 |
+
**APA:**
|
180 |
+
|
181 |
+
[More Information Needed]
|
182 |
+
|
183 |
+
## Glossary [optional]
|
184 |
+
|
185 |
+
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
|
186 |
+
|
187 |
+
[More Information Needed]
|
188 |
+
|
189 |
+
## More Information [optional]
|
190 |
+
|
191 |
+
[More Information Needed]
|
192 |
+
|
193 |
+
## Model Card Authors [optional]
|
194 |
+
|
195 |
+
[More Information Needed]
|
196 |
+
|
197 |
+
## Model Card Contact
|
198 |
+
|
199 |
+
[More Information Needed]
|
200 |
+
### Framework versions
|
201 |
+
|
202 |
+
- PEFT 0.10.0
|
checkpoint-180/adapter_config.json
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "Weni/WeniGPT-Agents-Mistral-1.0.6-SFT-merged",
|
5 |
+
"bias": "none",
|
6 |
+
"fan_in_fan_out": false,
|
7 |
+
"inference_mode": true,
|
8 |
+
"init_lora_weights": true,
|
9 |
+
"layer_replication": null,
|
10 |
+
"layers_pattern": null,
|
11 |
+
"layers_to_transform": null,
|
12 |
+
"loftq_config": {},
|
13 |
+
"lora_alpha": 16,
|
14 |
+
"lora_dropout": 0.05,
|
15 |
+
"megatron_config": null,
|
16 |
+
"megatron_core": "megatron.core",
|
17 |
+
"modules_to_save": null,
|
18 |
+
"peft_type": "LORA",
|
19 |
+
"r": 8,
|
20 |
+
"rank_pattern": {},
|
21 |
+
"revision": null,
|
22 |
+
"target_modules": [
|
23 |
+
"v_proj",
|
24 |
+
"q_proj"
|
25 |
+
],
|
26 |
+
"task_type": "CAUSAL_LM",
|
27 |
+
"use_dora": false,
|
28 |
+
"use_rslora": false
|
29 |
+
}
|
checkpoint-180/adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f0a1d782152f7cbe522064e8176b567d6386e5430121f5ed5c8d91fe866a0c1
|
3 |
+
size 13648432
|
checkpoint-180/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca2233350e52278376f6ee073c87883c98fb2c90cbd76f0e35943aa63dd3ebee
|
3 |
+
size 27370618
|
checkpoint-180/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48ee9b73399c28d7e668360bf1d5a4d11095c4738bf96c13f7bb6fbff59f8ccb
|
3 |
+
size 14244
|
checkpoint-180/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14732432661203392f17c828aeaa967e3fc0c59a7193aef58b74af5f304be609
|
3 |
+
size 1064
|
checkpoint-180/special_tokens_map.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "</s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": "<unk>",
|
17 |
+
"unk_token": {
|
18 |
+
"content": "<unk>",
|
19 |
+
"lstrip": false,
|
20 |
+
"normalized": false,
|
21 |
+
"rstrip": false,
|
22 |
+
"single_word": false
|
23 |
+
}
|
24 |
+
}
|
checkpoint-180/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-180/tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
|
3 |
+
size 493443
|
checkpoint-180/tokenizer_config.json
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": true,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"0": {
|
6 |
+
"content": "<unk>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false,
|
11 |
+
"special": true
|
12 |
+
},
|
13 |
+
"1": {
|
14 |
+
"content": "<s>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": false,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false,
|
19 |
+
"special": true
|
20 |
+
},
|
21 |
+
"2": {
|
22 |
+
"content": "</s>",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": false,
|
25 |
+
"rstrip": false,
|
26 |
+
"single_word": false,
|
27 |
+
"special": true
|
28 |
+
}
|
29 |
+
},
|
30 |
+
"additional_special_tokens": [],
|
31 |
+
"bos_token": "<s>",
|
32 |
+
"chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' + eos_token }}{% endif %}{% endfor %}",
|
33 |
+
"clean_up_tokenization_spaces": false,
|
34 |
+
"eos_token": "</s>",
|
35 |
+
"legacy": true,
|
36 |
+
"max_lenght": 8192,
|
37 |
+
"max_length": 8192,
|
38 |
+
"model_max_length": 1000000000000000019884624838656,
|
39 |
+
"pad_token": "<unk>",
|
40 |
+
"padding": true,
|
41 |
+
"sp_model_kwargs": {},
|
42 |
+
"spaces_between_special_tokens": false,
|
43 |
+
"stride": 0,
|
44 |
+
"tokenizer_class": "LlamaTokenizer",
|
45 |
+
"truncation_side": "right",
|
46 |
+
"truncation_strategy": "longest_first",
|
47 |
+
"unk_token": "<unk>",
|
48 |
+
"use_default_system_prompt": false
|
49 |
+
}
|
checkpoint-180/trainer_state.json
ADDED
@@ -0,0 +1,387 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.4541548192501068,
|
3 |
+
"best_model_checkpoint": "./mistral/20-04-24-Weni-WeniGPT-Agents-Mistral-1.0.6-SFT-1.0.5-DPO_Experiment on DPO with other hyperparameters and best SFT model of WeniGPT-2_max_steps-366_batch_4_2024-04-20_ppid_9/checkpoint-180",
|
4 |
+
"epoch": 2.926829268292683,
|
5 |
+
"eval_steps": 30,
|
6 |
+
"global_step": 180,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.16,
|
13 |
+
"grad_norm": 8.378021240234375,
|
14 |
+
"learning_rate": 4.0909090909090915e-06,
|
15 |
+
"logits/chosen": -1.830958604812622,
|
16 |
+
"logits/rejected": -1.8507845401763916,
|
17 |
+
"logps/chosen": -28.701984405517578,
|
18 |
+
"logps/rejected": -54.28569793701172,
|
19 |
+
"loss": 0.6924,
|
20 |
+
"rewards/accuracies": 0.20000000298023224,
|
21 |
+
"rewards/chosen": 0.0008967495523393154,
|
22 |
+
"rewards/margins": 0.0014666033675894141,
|
23 |
+
"rewards/rejected": -0.0005698538152500987,
|
24 |
+
"step": 10
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"epoch": 0.33,
|
28 |
+
"grad_norm": 5.193418502807617,
|
29 |
+
"learning_rate": 4.887323943661972e-06,
|
30 |
+
"logits/chosen": -1.7550897598266602,
|
31 |
+
"logits/rejected": -1.770708680152893,
|
32 |
+
"logps/chosen": -47.344207763671875,
|
33 |
+
"logps/rejected": -64.0368423461914,
|
34 |
+
"loss": 0.6852,
|
35 |
+
"rewards/accuracies": 0.4000000059604645,
|
36 |
+
"rewards/chosen": 0.017231885343790054,
|
37 |
+
"rewards/margins": 0.01606021076440811,
|
38 |
+
"rewards/rejected": 0.0011716745793819427,
|
39 |
+
"step": 20
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"epoch": 0.49,
|
43 |
+
"grad_norm": 7.308932304382324,
|
44 |
+
"learning_rate": 4.746478873239437e-06,
|
45 |
+
"logits/chosen": -1.781267762184143,
|
46 |
+
"logits/rejected": -1.8114898204803467,
|
47 |
+
"logps/chosen": -54.274559020996094,
|
48 |
+
"logps/rejected": -95.20500183105469,
|
49 |
+
"loss": 0.6635,
|
50 |
+
"rewards/accuracies": 0.5,
|
51 |
+
"rewards/chosen": 0.0641159638762474,
|
52 |
+
"rewards/margins": 0.061691801995038986,
|
53 |
+
"rewards/rejected": 0.0024241588544100523,
|
54 |
+
"step": 30
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"epoch": 0.49,
|
58 |
+
"eval_logits/chosen": -1.7831767797470093,
|
59 |
+
"eval_logits/rejected": -1.8043663501739502,
|
60 |
+
"eval_logps/chosen": -55.16960906982422,
|
61 |
+
"eval_logps/rejected": -97.32585144042969,
|
62 |
+
"eval_loss": 0.6523757576942444,
|
63 |
+
"eval_rewards/accuracies": 0.4642857015132904,
|
64 |
+
"eval_rewards/chosen": 0.09036973863840103,
|
65 |
+
"eval_rewards/margins": 0.08673857897520065,
|
66 |
+
"eval_rewards/rejected": 0.0036311547737568617,
|
67 |
+
"eval_runtime": 8.141,
|
68 |
+
"eval_samples_per_second": 3.439,
|
69 |
+
"eval_steps_per_second": 1.72,
|
70 |
+
"step": 30
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"epoch": 0.65,
|
74 |
+
"grad_norm": 0.0,
|
75 |
+
"learning_rate": 4.6056338028169015e-06,
|
76 |
+
"logits/chosen": -1.889905333518982,
|
77 |
+
"logits/rejected": -1.9024461507797241,
|
78 |
+
"logps/chosen": -27.918941497802734,
|
79 |
+
"logps/rejected": -42.093284606933594,
|
80 |
+
"loss": 0.668,
|
81 |
+
"rewards/accuracies": 0.25,
|
82 |
+
"rewards/chosen": 0.054457180202007294,
|
83 |
+
"rewards/margins": 0.0539846234023571,
|
84 |
+
"rewards/rejected": 0.0004725646285805851,
|
85 |
+
"step": 40
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"epoch": 0.81,
|
89 |
+
"grad_norm": 8.53225326538086,
|
90 |
+
"learning_rate": 4.464788732394367e-06,
|
91 |
+
"logits/chosen": -1.8278567790985107,
|
92 |
+
"logits/rejected": -1.849957823753357,
|
93 |
+
"logps/chosen": -43.8238639831543,
|
94 |
+
"logps/rejected": -68.02179718017578,
|
95 |
+
"loss": 0.6358,
|
96 |
+
"rewards/accuracies": 0.3499999940395355,
|
97 |
+
"rewards/chosen": 0.13941256701946259,
|
98 |
+
"rewards/margins": 0.13133978843688965,
|
99 |
+
"rewards/rejected": 0.008072790689766407,
|
100 |
+
"step": 50
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.98,
|
104 |
+
"grad_norm": 9.436968803405762,
|
105 |
+
"learning_rate": 4.3239436619718315e-06,
|
106 |
+
"logits/chosen": -1.805991768836975,
|
107 |
+
"logits/rejected": -1.8437427282333374,
|
108 |
+
"logps/chosen": -43.8873291015625,
|
109 |
+
"logps/rejected": -95.2943115234375,
|
110 |
+
"loss": 0.6026,
|
111 |
+
"rewards/accuracies": 0.44999998807907104,
|
112 |
+
"rewards/chosen": 0.18793432414531708,
|
113 |
+
"rewards/margins": 0.21308371424674988,
|
114 |
+
"rewards/rejected": -0.025149401277303696,
|
115 |
+
"step": 60
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"epoch": 0.98,
|
119 |
+
"eval_logits/chosen": -1.7877694368362427,
|
120 |
+
"eval_logits/rejected": -1.8098936080932617,
|
121 |
+
"eval_logps/chosen": -53.567203521728516,
|
122 |
+
"eval_logps/rejected": -97.33795928955078,
|
123 |
+
"eval_loss": 0.5890871286392212,
|
124 |
+
"eval_rewards/accuracies": 0.4642857015132904,
|
125 |
+
"eval_rewards/chosen": 0.25061002373695374,
|
126 |
+
"eval_rewards/margins": 0.2481890469789505,
|
127 |
+
"eval_rewards/rejected": 0.002420984674245119,
|
128 |
+
"eval_runtime": 8.1404,
|
129 |
+
"eval_samples_per_second": 3.44,
|
130 |
+
"eval_steps_per_second": 1.72,
|
131 |
+
"step": 60
|
132 |
+
},
|
133 |
+
{
|
134 |
+
"epoch": 1.14,
|
135 |
+
"grad_norm": 0.0,
|
136 |
+
"learning_rate": 4.183098591549296e-06,
|
137 |
+
"logits/chosen": -1.8344879150390625,
|
138 |
+
"logits/rejected": -1.8489716053009033,
|
139 |
+
"logps/chosen": -40.38930892944336,
|
140 |
+
"logps/rejected": -60.9084358215332,
|
141 |
+
"loss": 0.6031,
|
142 |
+
"rewards/accuracies": 0.375,
|
143 |
+
"rewards/chosen": 0.19739331305027008,
|
144 |
+
"rewards/margins": 0.22638121247291565,
|
145 |
+
"rewards/rejected": -0.028987903147935867,
|
146 |
+
"step": 70
|
147 |
+
},
|
148 |
+
{
|
149 |
+
"epoch": 1.3,
|
150 |
+
"grad_norm": 5.49536657333374,
|
151 |
+
"learning_rate": 4.042253521126761e-06,
|
152 |
+
"logits/chosen": -1.7903095483779907,
|
153 |
+
"logits/rejected": -1.8362411260604858,
|
154 |
+
"logps/chosen": -44.288116455078125,
|
155 |
+
"logps/rejected": -90.21073913574219,
|
156 |
+
"loss": 0.5357,
|
157 |
+
"rewards/accuracies": 0.4749999940395355,
|
158 |
+
"rewards/chosen": 0.34061312675476074,
|
159 |
+
"rewards/margins": 0.40679749846458435,
|
160 |
+
"rewards/rejected": -0.06618441641330719,
|
161 |
+
"step": 80
|
162 |
+
},
|
163 |
+
{
|
164 |
+
"epoch": 1.46,
|
165 |
+
"grad_norm": 13.401692390441895,
|
166 |
+
"learning_rate": 3.901408450704225e-06,
|
167 |
+
"logits/chosen": -1.8004281520843506,
|
168 |
+
"logits/rejected": -1.8247934579849243,
|
169 |
+
"logps/chosen": -42.32465362548828,
|
170 |
+
"logps/rejected": -70.9749984741211,
|
171 |
+
"loss": 0.5387,
|
172 |
+
"rewards/accuracies": 0.4749999940395355,
|
173 |
+
"rewards/chosen": 0.3678433299064636,
|
174 |
+
"rewards/margins": 0.4186524450778961,
|
175 |
+
"rewards/rejected": -0.05080908536911011,
|
176 |
+
"step": 90
|
177 |
+
},
|
178 |
+
{
|
179 |
+
"epoch": 1.46,
|
180 |
+
"eval_logits/chosen": -1.7943389415740967,
|
181 |
+
"eval_logits/rejected": -1.8181126117706299,
|
182 |
+
"eval_logps/chosen": -51.677486419677734,
|
183 |
+
"eval_logps/rejected": -97.63689422607422,
|
184 |
+
"eval_loss": 0.529485821723938,
|
185 |
+
"eval_rewards/accuracies": 0.4642857015132904,
|
186 |
+
"eval_rewards/chosen": 0.4395819306373596,
|
187 |
+
"eval_rewards/margins": 0.4670555889606476,
|
188 |
+
"eval_rewards/rejected": -0.027473628520965576,
|
189 |
+
"eval_runtime": 8.1412,
|
190 |
+
"eval_samples_per_second": 3.439,
|
191 |
+
"eval_steps_per_second": 1.72,
|
192 |
+
"step": 90
|
193 |
+
},
|
194 |
+
{
|
195 |
+
"epoch": 1.63,
|
196 |
+
"grad_norm": 5.040858745574951,
|
197 |
+
"learning_rate": 3.7605633802816903e-06,
|
198 |
+
"logits/chosen": -1.8601042032241821,
|
199 |
+
"logits/rejected": -1.8790462017059326,
|
200 |
+
"logps/chosen": -43.77570343017578,
|
201 |
+
"logps/rejected": -70.64997863769531,
|
202 |
+
"loss": 0.5466,
|
203 |
+
"rewards/accuracies": 0.4000000059604645,
|
204 |
+
"rewards/chosen": 0.36673134565353394,
|
205 |
+
"rewards/margins": 0.42903366684913635,
|
206 |
+
"rewards/rejected": -0.06230226159095764,
|
207 |
+
"step": 100
|
208 |
+
},
|
209 |
+
{
|
210 |
+
"epoch": 1.79,
|
211 |
+
"grad_norm": 11.182683944702148,
|
212 |
+
"learning_rate": 3.6197183098591553e-06,
|
213 |
+
"logits/chosen": -1.8602203130722046,
|
214 |
+
"logits/rejected": -1.8786903619766235,
|
215 |
+
"logps/chosen": -29.601736068725586,
|
216 |
+
"logps/rejected": -66.1338882446289,
|
217 |
+
"loss": 0.6003,
|
218 |
+
"rewards/accuracies": 0.2750000059604645,
|
219 |
+
"rewards/chosen": 0.3122637867927551,
|
220 |
+
"rewards/margins": 0.2756831645965576,
|
221 |
+
"rewards/rejected": 0.03658062964677811,
|
222 |
+
"step": 110
|
223 |
+
},
|
224 |
+
{
|
225 |
+
"epoch": 1.95,
|
226 |
+
"grad_norm": 3.9169583320617676,
|
227 |
+
"learning_rate": 3.47887323943662e-06,
|
228 |
+
"logits/chosen": -1.8304624557495117,
|
229 |
+
"logits/rejected": -1.8451646566390991,
|
230 |
+
"logps/chosen": -31.413599014282227,
|
231 |
+
"logps/rejected": -56.841880798339844,
|
232 |
+
"loss": 0.6033,
|
233 |
+
"rewards/accuracies": 0.2750000059604645,
|
234 |
+
"rewards/chosen": 0.21899382770061493,
|
235 |
+
"rewards/margins": 0.2744571566581726,
|
236 |
+
"rewards/rejected": -0.05546332150697708,
|
237 |
+
"step": 120
|
238 |
+
},
|
239 |
+
{
|
240 |
+
"epoch": 1.95,
|
241 |
+
"eval_logits/chosen": -1.80086350440979,
|
242 |
+
"eval_logits/rejected": -1.8260576725006104,
|
243 |
+
"eval_logps/chosen": -50.32191848754883,
|
244 |
+
"eval_logps/rejected": -98.02101135253906,
|
245 |
+
"eval_loss": 0.49604225158691406,
|
246 |
+
"eval_rewards/accuracies": 0.4642857015132904,
|
247 |
+
"eval_rewards/chosen": 0.5751391053199768,
|
248 |
+
"eval_rewards/margins": 0.6410244107246399,
|
249 |
+
"eval_rewards/rejected": -0.0658852607011795,
|
250 |
+
"eval_runtime": 8.1445,
|
251 |
+
"eval_samples_per_second": 3.438,
|
252 |
+
"eval_steps_per_second": 1.719,
|
253 |
+
"step": 120
|
254 |
+
},
|
255 |
+
{
|
256 |
+
"epoch": 2.11,
|
257 |
+
"grad_norm": 1.4047716856002808,
|
258 |
+
"learning_rate": 3.338028169014085e-06,
|
259 |
+
"logits/chosen": -1.8776130676269531,
|
260 |
+
"logits/rejected": -1.8995519876480103,
|
261 |
+
"logps/chosen": -22.69371795654297,
|
262 |
+
"logps/rejected": -53.5282096862793,
|
263 |
+
"loss": 0.5611,
|
264 |
+
"rewards/accuracies": 0.25,
|
265 |
+
"rewards/chosen": 0.35938918590545654,
|
266 |
+
"rewards/margins": 0.5045264959335327,
|
267 |
+
"rewards/rejected": -0.14513733983039856,
|
268 |
+
"step": 130
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 2.28,
|
272 |
+
"grad_norm": 0.7528722882270813,
|
273 |
+
"learning_rate": 3.1971830985915496e-06,
|
274 |
+
"logits/chosen": -1.8126357793807983,
|
275 |
+
"logits/rejected": -1.832371711730957,
|
276 |
+
"logps/chosen": -38.33379364013672,
|
277 |
+
"logps/rejected": -67.96979522705078,
|
278 |
+
"loss": 0.5142,
|
279 |
+
"rewards/accuracies": 0.375,
|
280 |
+
"rewards/chosen": 0.5568062663078308,
|
281 |
+
"rewards/margins": 0.6818712949752808,
|
282 |
+
"rewards/rejected": -0.12506499886512756,
|
283 |
+
"step": 140
|
284 |
+
},
|
285 |
+
{
|
286 |
+
"epoch": 2.44,
|
287 |
+
"grad_norm": 3.405579090118408,
|
288 |
+
"learning_rate": 3.056338028169014e-06,
|
289 |
+
"logits/chosen": -1.8196109533309937,
|
290 |
+
"logits/rejected": -1.8556429147720337,
|
291 |
+
"logps/chosen": -36.78864669799805,
|
292 |
+
"logps/rejected": -83.05890655517578,
|
293 |
+
"loss": 0.5042,
|
294 |
+
"rewards/accuracies": 0.4000000059604645,
|
295 |
+
"rewards/chosen": 0.542107105255127,
|
296 |
+
"rewards/margins": 0.6411095857620239,
|
297 |
+
"rewards/rejected": -0.09900249540805817,
|
298 |
+
"step": 150
|
299 |
+
},
|
300 |
+
{
|
301 |
+
"epoch": 2.44,
|
302 |
+
"eval_logits/chosen": -1.805869698524475,
|
303 |
+
"eval_logits/rejected": -1.8330577611923218,
|
304 |
+
"eval_logps/chosen": -49.10601043701172,
|
305 |
+
"eval_logps/rejected": -98.84068298339844,
|
306 |
+
"eval_loss": 0.4709201455116272,
|
307 |
+
"eval_rewards/accuracies": 0.4642857015132904,
|
308 |
+
"eval_rewards/chosen": 0.6967297196388245,
|
309 |
+
"eval_rewards/margins": 0.8445812463760376,
|
310 |
+
"eval_rewards/rejected": -0.1478516012430191,
|
311 |
+
"eval_runtime": 8.1382,
|
312 |
+
"eval_samples_per_second": 3.441,
|
313 |
+
"eval_steps_per_second": 1.72,
|
314 |
+
"step": 150
|
315 |
+
},
|
316 |
+
{
|
317 |
+
"epoch": 2.6,
|
318 |
+
"grad_norm": 7.778740882873535,
|
319 |
+
"learning_rate": 2.915492957746479e-06,
|
320 |
+
"logits/chosen": -1.848589301109314,
|
321 |
+
"logits/rejected": -1.8790754079818726,
|
322 |
+
"logps/chosen": -36.49171447753906,
|
323 |
+
"logps/rejected": -72.55968475341797,
|
324 |
+
"loss": 0.4927,
|
325 |
+
"rewards/accuracies": 0.44999998807907104,
|
326 |
+
"rewards/chosen": 0.49555450677871704,
|
327 |
+
"rewards/margins": 0.6891830563545227,
|
328 |
+
"rewards/rejected": -0.1936284601688385,
|
329 |
+
"step": 160
|
330 |
+
},
|
331 |
+
{
|
332 |
+
"epoch": 2.76,
|
333 |
+
"grad_norm": 4.058627605438232,
|
334 |
+
"learning_rate": 2.774647887323944e-06,
|
335 |
+
"logits/chosen": -1.812421441078186,
|
336 |
+
"logits/rejected": -1.8415311574935913,
|
337 |
+
"logps/chosen": -45.62999725341797,
|
338 |
+
"logps/rejected": -87.85527038574219,
|
339 |
+
"loss": 0.4541,
|
340 |
+
"rewards/accuracies": 0.4749999940395355,
|
341 |
+
"rewards/chosen": 0.7084562182426453,
|
342 |
+
"rewards/margins": 0.9553689956665039,
|
343 |
+
"rewards/rejected": -0.24691279232501984,
|
344 |
+
"step": 170
|
345 |
+
},
|
346 |
+
{
|
347 |
+
"epoch": 2.93,
|
348 |
+
"grad_norm": 0.0,
|
349 |
+
"learning_rate": 2.6338028169014084e-06,
|
350 |
+
"logits/chosen": -1.8475942611694336,
|
351 |
+
"logits/rejected": -1.8678725957870483,
|
352 |
+
"logps/chosen": -40.53328323364258,
|
353 |
+
"logps/rejected": -64.86616516113281,
|
354 |
+
"loss": 0.5087,
|
355 |
+
"rewards/accuracies": 0.375,
|
356 |
+
"rewards/chosen": 0.5022943019866943,
|
357 |
+
"rewards/margins": 0.7252141833305359,
|
358 |
+
"rewards/rejected": -0.22291991114616394,
|
359 |
+
"step": 180
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 2.93,
|
363 |
+
"eval_logits/chosen": -1.8136398792266846,
|
364 |
+
"eval_logits/rejected": -1.8424787521362305,
|
365 |
+
"eval_logps/chosen": -48.19547653198242,
|
366 |
+
"eval_logps/rejected": -99.7900161743164,
|
367 |
+
"eval_loss": 0.4541548192501068,
|
368 |
+
"eval_rewards/accuracies": 0.4642857015132904,
|
369 |
+
"eval_rewards/chosen": 0.7877826690673828,
|
370 |
+
"eval_rewards/margins": 1.0305674076080322,
|
371 |
+
"eval_rewards/rejected": -0.24278469383716583,
|
372 |
+
"eval_runtime": 8.1397,
|
373 |
+
"eval_samples_per_second": 3.44,
|
374 |
+
"eval_steps_per_second": 1.72,
|
375 |
+
"step": 180
|
376 |
+
}
|
377 |
+
],
|
378 |
+
"logging_steps": 10,
|
379 |
+
"max_steps": 366,
|
380 |
+
"num_input_tokens_seen": 0,
|
381 |
+
"num_train_epochs": 6,
|
382 |
+
"save_steps": 90,
|
383 |
+
"total_flos": 0.0,
|
384 |
+
"train_batch_size": 2,
|
385 |
+
"trial_name": null,
|
386 |
+
"trial_params": null
|
387 |
+
}
|
checkpoint-180/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:de569ad7b35da22832fd1fc395ab1c110ac622b038bdfcb0eee757cdc5b4b97b
|
3 |
+
size 5304
|
checkpoint-270/README.md
ADDED
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: peft
|
3 |
+
base_model: Weni/WeniGPT-Agents-Mistral-1.0.6-SFT-merged
|
4 |
+
---
|
5 |
+
|
6 |
+
# Model Card for Model ID
|
7 |
+
|
8 |
+
<!-- Provide a quick summary of what the model is/does. -->
|
9 |
+
|
10 |
+
|
11 |
+
|
12 |
+
## Model Details
|
13 |
+
|
14 |
+
### Model Description
|
15 |
+
|
16 |
+
<!-- Provide a longer summary of what this model is. -->
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
- **Developed by:** [More Information Needed]
|
21 |
+
- **Funded by [optional]:** [More Information Needed]
|
22 |
+
- **Shared by [optional]:** [More Information Needed]
|
23 |
+
- **Model type:** [More Information Needed]
|
24 |
+
- **Language(s) (NLP):** [More Information Needed]
|
25 |
+
- **License:** [More Information Needed]
|
26 |
+
- **Finetuned from model [optional]:** [More Information Needed]
|
27 |
+
|
28 |
+
### Model Sources [optional]
|
29 |
+
|
30 |
+
<!-- Provide the basic links for the model. -->
|
31 |
+
|
32 |
+
- **Repository:** [More Information Needed]
|
33 |
+
- **Paper [optional]:** [More Information Needed]
|
34 |
+
- **Demo [optional]:** [More Information Needed]
|
35 |
+
|
36 |
+
## Uses
|
37 |
+
|
38 |
+
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
|
39 |
+
|
40 |
+
### Direct Use
|
41 |
+
|
42 |
+
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
|
43 |
+
|
44 |
+
[More Information Needed]
|
45 |
+
|
46 |
+
### Downstream Use [optional]
|
47 |
+
|
48 |
+
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
|
49 |
+
|
50 |
+
[More Information Needed]
|
51 |
+
|
52 |
+
### Out-of-Scope Use
|
53 |
+
|
54 |
+
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
|
55 |
+
|
56 |
+
[More Information Needed]
|
57 |
+
|
58 |
+
## Bias, Risks, and Limitations
|
59 |
+
|
60 |
+
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|
61 |
+
|
62 |
+
[More Information Needed]
|
63 |
+
|
64 |
+
### Recommendations
|
65 |
+
|
66 |
+
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
|
67 |
+
|
68 |
+
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
|
69 |
+
|
70 |
+
## How to Get Started with the Model
|
71 |
+
|
72 |
+
Use the code below to get started with the model.
|
73 |
+
|
74 |
+
[More Information Needed]
|
75 |
+
|
76 |
+
## Training Details
|
77 |
+
|
78 |
+
### Training Data
|
79 |
+
|
80 |
+
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
|
81 |
+
|
82 |
+
[More Information Needed]
|
83 |
+
|
84 |
+
### Training Procedure
|
85 |
+
|
86 |
+
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
|
87 |
+
|
88 |
+
#### Preprocessing [optional]
|
89 |
+
|
90 |
+
[More Information Needed]
|
91 |
+
|
92 |
+
|
93 |
+
#### Training Hyperparameters
|
94 |
+
|
95 |
+
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
|
96 |
+
|
97 |
+
#### Speeds, Sizes, Times [optional]
|
98 |
+
|
99 |
+
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
|
100 |
+
|
101 |
+
[More Information Needed]
|
102 |
+
|
103 |
+
## Evaluation
|
104 |
+
|
105 |
+
<!-- This section describes the evaluation protocols and provides the results. -->
|
106 |
+
|
107 |
+
### Testing Data, Factors & Metrics
|
108 |
+
|
109 |
+
#### Testing Data
|
110 |
+
|
111 |
+
<!-- This should link to a Dataset Card if possible. -->
|
112 |
+
|
113 |
+
[More Information Needed]
|
114 |
+
|
115 |
+
#### Factors
|
116 |
+
|
117 |
+
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
|
118 |
+
|
119 |
+
[More Information Needed]
|
120 |
+
|
121 |
+
#### Metrics
|
122 |
+
|
123 |
+
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
|
124 |
+
|
125 |
+
[More Information Needed]
|
126 |
+
|
127 |
+
### Results
|
128 |
+
|
129 |
+
[More Information Needed]
|
130 |
+
|
131 |
+
#### Summary
|
132 |
+
|
133 |
+
|
134 |
+
|
135 |
+
## Model Examination [optional]
|
136 |
+
|
137 |
+
<!-- Relevant interpretability work for the model goes here -->
|
138 |
+
|
139 |
+
[More Information Needed]
|
140 |
+
|
141 |
+
## Environmental Impact
|
142 |
+
|
143 |
+
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
|
144 |
+
|
145 |
+
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
|
146 |
+
|
147 |
+
- **Hardware Type:** [More Information Needed]
|
148 |
+
- **Hours used:** [More Information Needed]
|
149 |
+
- **Cloud Provider:** [More Information Needed]
|
150 |
+
- **Compute Region:** [More Information Needed]
|
151 |
+
- **Carbon Emitted:** [More Information Needed]
|
152 |
+
|
153 |
+
## Technical Specifications [optional]
|
154 |
+
|
155 |
+
### Model Architecture and Objective
|
156 |
+
|
157 |
+
[More Information Needed]
|
158 |
+
|
159 |
+
### Compute Infrastructure
|
160 |
+
|
161 |
+
[More Information Needed]
|
162 |
+
|
163 |
+
#### Hardware
|
164 |
+
|
165 |
+
[More Information Needed]
|
166 |
+
|
167 |
+
#### Software
|
168 |
+
|
169 |
+
[More Information Needed]
|
170 |
+
|
171 |
+
## Citation [optional]
|
172 |
+
|
173 |
+
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
|
174 |
+
|
175 |
+
**BibTeX:**
|
176 |
+
|
177 |
+
[More Information Needed]
|
178 |
+
|
179 |
+
**APA:**
|
180 |
+
|
181 |
+
[More Information Needed]
|
182 |
+
|
183 |
+
## Glossary [optional]
|
184 |
+
|
185 |
+
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
|
186 |
+
|
187 |
+
[More Information Needed]
|
188 |
+
|
189 |
+
## More Information [optional]
|
190 |
+
|
191 |
+
[More Information Needed]
|
192 |
+
|
193 |
+
## Model Card Authors [optional]
|
194 |
+
|
195 |
+
[More Information Needed]
|
196 |
+
|
197 |
+
## Model Card Contact
|
198 |
+
|
199 |
+
[More Information Needed]
|
200 |
+
### Framework versions
|
201 |
+
|
202 |
+
- PEFT 0.10.0
|
checkpoint-270/adapter_config.json
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "Weni/WeniGPT-Agents-Mistral-1.0.6-SFT-merged",
|
5 |
+
"bias": "none",
|
6 |
+
"fan_in_fan_out": false,
|
7 |
+
"inference_mode": true,
|
8 |
+
"init_lora_weights": true,
|
9 |
+
"layer_replication": null,
|
10 |
+
"layers_pattern": null,
|
11 |
+
"layers_to_transform": null,
|
12 |
+
"loftq_config": {},
|
13 |
+
"lora_alpha": 16,
|
14 |
+
"lora_dropout": 0.05,
|
15 |
+
"megatron_config": null,
|
16 |
+
"megatron_core": "megatron.core",
|
17 |
+
"modules_to_save": null,
|
18 |
+
"peft_type": "LORA",
|
19 |
+
"r": 8,
|
20 |
+
"rank_pattern": {},
|
21 |
+
"revision": null,
|
22 |
+
"target_modules": [
|
23 |
+
"v_proj",
|
24 |
+
"q_proj"
|
25 |
+
],
|
26 |
+
"task_type": "CAUSAL_LM",
|
27 |
+
"use_dora": false,
|
28 |
+
"use_rslora": false
|
29 |
+
}
|
checkpoint-270/adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:918068effef33fcd83ee39a7b70c44461e0a5f72909fe72fbba0207e41da5527
|
3 |
+
size 13648432
|
checkpoint-270/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:44856c4420546cbc9f5a35b808e67c05528c23d760db27d4eb92ce6a79b5f895
|
3 |
+
size 27370618
|
checkpoint-270/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8d3b7102895eb0637b0cab516bd672f216b2bf79078a83eb301011a90444f44c
|
3 |
+
size 14244
|
checkpoint-270/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:754fa30b685f93af9e6d375848220222347b5605c21cd93b54aaa798d6ea3598
|
3 |
+
size 1064
|
checkpoint-270/special_tokens_map.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "</s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": "<unk>",
|
17 |
+
"unk_token": {
|
18 |
+
"content": "<unk>",
|
19 |
+
"lstrip": false,
|
20 |
+
"normalized": false,
|
21 |
+
"rstrip": false,
|
22 |
+
"single_word": false
|
23 |
+
}
|
24 |
+
}
|
checkpoint-270/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-270/tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
|
3 |
+
size 493443
|
checkpoint-270/tokenizer_config.json
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": true,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"0": {
|
6 |
+
"content": "<unk>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false,
|
11 |
+
"special": true
|
12 |
+
},
|
13 |
+
"1": {
|
14 |
+
"content": "<s>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": false,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false,
|
19 |
+
"special": true
|
20 |
+
},
|
21 |
+
"2": {
|
22 |
+
"content": "</s>",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": false,
|
25 |
+
"rstrip": false,
|
26 |
+
"single_word": false,
|
27 |
+
"special": true
|
28 |
+
}
|
29 |
+
},
|
30 |
+
"additional_special_tokens": [],
|
31 |
+
"bos_token": "<s>",
|
32 |
+
"chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' + eos_token }}{% endif %}{% endfor %}",
|
33 |
+
"clean_up_tokenization_spaces": false,
|
34 |
+
"eos_token": "</s>",
|
35 |
+
"legacy": true,
|
36 |
+
"max_lenght": 8192,
|
37 |
+
"max_length": 8192,
|
38 |
+
"model_max_length": 1000000000000000019884624838656,
|
39 |
+
"pad_token": "<unk>",
|
40 |
+
"padding": true,
|
41 |
+
"sp_model_kwargs": {},
|
42 |
+
"spaces_between_special_tokens": false,
|
43 |
+
"stride": 0,
|
44 |
+
"tokenizer_class": "LlamaTokenizer",
|
45 |
+
"truncation_side": "right",
|
46 |
+
"truncation_strategy": "longest_first",
|
47 |
+
"unk_token": "<unk>",
|
48 |
+
"use_default_system_prompt": false
|
49 |
+
}
|
checkpoint-270/trainer_state.json
ADDED
@@ -0,0 +1,570 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.43130752444267273,
|
3 |
+
"best_model_checkpoint": "./mistral/20-04-24-Weni-WeniGPT-Agents-Mistral-1.0.6-SFT-1.0.5-DPO_Experiment on DPO with other hyperparameters and best SFT model of WeniGPT-2_max_steps-366_batch_4_2024-04-20_ppid_9/checkpoint-270",
|
4 |
+
"epoch": 4.390243902439025,
|
5 |
+
"eval_steps": 30,
|
6 |
+
"global_step": 270,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.16,
|
13 |
+
"grad_norm": 8.378021240234375,
|
14 |
+
"learning_rate": 4.0909090909090915e-06,
|
15 |
+
"logits/chosen": -1.830958604812622,
|
16 |
+
"logits/rejected": -1.8507845401763916,
|
17 |
+
"logps/chosen": -28.701984405517578,
|
18 |
+
"logps/rejected": -54.28569793701172,
|
19 |
+
"loss": 0.6924,
|
20 |
+
"rewards/accuracies": 0.20000000298023224,
|
21 |
+
"rewards/chosen": 0.0008967495523393154,
|
22 |
+
"rewards/margins": 0.0014666033675894141,
|
23 |
+
"rewards/rejected": -0.0005698538152500987,
|
24 |
+
"step": 10
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"epoch": 0.33,
|
28 |
+
"grad_norm": 5.193418502807617,
|
29 |
+
"learning_rate": 4.887323943661972e-06,
|
30 |
+
"logits/chosen": -1.7550897598266602,
|
31 |
+
"logits/rejected": -1.770708680152893,
|
32 |
+
"logps/chosen": -47.344207763671875,
|
33 |
+
"logps/rejected": -64.0368423461914,
|
34 |
+
"loss": 0.6852,
|
35 |
+
"rewards/accuracies": 0.4000000059604645,
|
36 |
+
"rewards/chosen": 0.017231885343790054,
|
37 |
+
"rewards/margins": 0.01606021076440811,
|
38 |
+
"rewards/rejected": 0.0011716745793819427,
|
39 |
+
"step": 20
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"epoch": 0.49,
|
43 |
+
"grad_norm": 7.308932304382324,
|
44 |
+
"learning_rate": 4.746478873239437e-06,
|
45 |
+
"logits/chosen": -1.781267762184143,
|
46 |
+
"logits/rejected": -1.8114898204803467,
|
47 |
+
"logps/chosen": -54.274559020996094,
|
48 |
+
"logps/rejected": -95.20500183105469,
|
49 |
+
"loss": 0.6635,
|
50 |
+
"rewards/accuracies": 0.5,
|
51 |
+
"rewards/chosen": 0.0641159638762474,
|
52 |
+
"rewards/margins": 0.061691801995038986,
|
53 |
+
"rewards/rejected": 0.0024241588544100523,
|
54 |
+
"step": 30
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"epoch": 0.49,
|
58 |
+
"eval_logits/chosen": -1.7831767797470093,
|
59 |
+
"eval_logits/rejected": -1.8043663501739502,
|
60 |
+
"eval_logps/chosen": -55.16960906982422,
|
61 |
+
"eval_logps/rejected": -97.32585144042969,
|
62 |
+
"eval_loss": 0.6523757576942444,
|
63 |
+
"eval_rewards/accuracies": 0.4642857015132904,
|
64 |
+
"eval_rewards/chosen": 0.09036973863840103,
|
65 |
+
"eval_rewards/margins": 0.08673857897520065,
|
66 |
+
"eval_rewards/rejected": 0.0036311547737568617,
|
67 |
+
"eval_runtime": 8.141,
|
68 |
+
"eval_samples_per_second": 3.439,
|
69 |
+
"eval_steps_per_second": 1.72,
|
70 |
+
"step": 30
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"epoch": 0.65,
|
74 |
+
"grad_norm": 0.0,
|
75 |
+
"learning_rate": 4.6056338028169015e-06,
|
76 |
+
"logits/chosen": -1.889905333518982,
|
77 |
+
"logits/rejected": -1.9024461507797241,
|
78 |
+
"logps/chosen": -27.918941497802734,
|
79 |
+
"logps/rejected": -42.093284606933594,
|
80 |
+
"loss": 0.668,
|
81 |
+
"rewards/accuracies": 0.25,
|
82 |
+
"rewards/chosen": 0.054457180202007294,
|
83 |
+
"rewards/margins": 0.0539846234023571,
|
84 |
+
"rewards/rejected": 0.0004725646285805851,
|
85 |
+
"step": 40
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"epoch": 0.81,
|
89 |
+
"grad_norm": 8.53225326538086,
|
90 |
+
"learning_rate": 4.464788732394367e-06,
|
91 |
+
"logits/chosen": -1.8278567790985107,
|
92 |
+
"logits/rejected": -1.849957823753357,
|
93 |
+
"logps/chosen": -43.8238639831543,
|
94 |
+
"logps/rejected": -68.02179718017578,
|
95 |
+
"loss": 0.6358,
|
96 |
+
"rewards/accuracies": 0.3499999940395355,
|
97 |
+
"rewards/chosen": 0.13941256701946259,
|
98 |
+
"rewards/margins": 0.13133978843688965,
|
99 |
+
"rewards/rejected": 0.008072790689766407,
|
100 |
+
"step": 50
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.98,
|
104 |
+
"grad_norm": 9.436968803405762,
|
105 |
+
"learning_rate": 4.3239436619718315e-06,
|
106 |
+
"logits/chosen": -1.805991768836975,
|
107 |
+
"logits/rejected": -1.8437427282333374,
|
108 |
+
"logps/chosen": -43.8873291015625,
|
109 |
+
"logps/rejected": -95.2943115234375,
|
110 |
+
"loss": 0.6026,
|
111 |
+
"rewards/accuracies": 0.44999998807907104,
|
112 |
+
"rewards/chosen": 0.18793432414531708,
|
113 |
+
"rewards/margins": 0.21308371424674988,
|
114 |
+
"rewards/rejected": -0.025149401277303696,
|
115 |
+
"step": 60
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"epoch": 0.98,
|
119 |
+
"eval_logits/chosen": -1.7877694368362427,
|
120 |
+
"eval_logits/rejected": -1.8098936080932617,
|
121 |
+
"eval_logps/chosen": -53.567203521728516,
|
122 |
+
"eval_logps/rejected": -97.33795928955078,
|
123 |
+
"eval_loss": 0.5890871286392212,
|
124 |
+
"eval_rewards/accuracies": 0.4642857015132904,
|
125 |
+
"eval_rewards/chosen": 0.25061002373695374,
|
126 |
+
"eval_rewards/margins": 0.2481890469789505,
|
127 |
+
"eval_rewards/rejected": 0.002420984674245119,
|
128 |
+
"eval_runtime": 8.1404,
|
129 |
+
"eval_samples_per_second": 3.44,
|
130 |
+
"eval_steps_per_second": 1.72,
|
131 |
+
"step": 60
|
132 |
+
},
|
133 |
+
{
|
134 |
+
"epoch": 1.14,
|
135 |
+
"grad_norm": 0.0,
|
136 |
+
"learning_rate": 4.183098591549296e-06,
|
137 |
+
"logits/chosen": -1.8344879150390625,
|
138 |
+
"logits/rejected": -1.8489716053009033,
|
139 |
+
"logps/chosen": -40.38930892944336,
|
140 |
+
"logps/rejected": -60.9084358215332,
|
141 |
+
"loss": 0.6031,
|
142 |
+
"rewards/accuracies": 0.375,
|
143 |
+
"rewards/chosen": 0.19739331305027008,
|
144 |
+
"rewards/margins": 0.22638121247291565,
|
145 |
+
"rewards/rejected": -0.028987903147935867,
|
146 |
+
"step": 70
|
147 |
+
},
|
148 |
+
{
|
149 |
+
"epoch": 1.3,
|
150 |
+
"grad_norm": 5.49536657333374,
|
151 |
+
"learning_rate": 4.042253521126761e-06,
|
152 |
+
"logits/chosen": -1.7903095483779907,
|
153 |
+
"logits/rejected": -1.8362411260604858,
|
154 |
+
"logps/chosen": -44.288116455078125,
|
155 |
+
"logps/rejected": -90.21073913574219,
|
156 |
+
"loss": 0.5357,
|
157 |
+
"rewards/accuracies": 0.4749999940395355,
|
158 |
+
"rewards/chosen": 0.34061312675476074,
|
159 |
+
"rewards/margins": 0.40679749846458435,
|
160 |
+
"rewards/rejected": -0.06618441641330719,
|
161 |
+
"step": 80
|
162 |
+
},
|
163 |
+
{
|
164 |
+
"epoch": 1.46,
|
165 |
+
"grad_norm": 13.401692390441895,
|
166 |
+
"learning_rate": 3.901408450704225e-06,
|
167 |
+
"logits/chosen": -1.8004281520843506,
|
168 |
+
"logits/rejected": -1.8247934579849243,
|
169 |
+
"logps/chosen": -42.32465362548828,
|
170 |
+
"logps/rejected": -70.9749984741211,
|
171 |
+
"loss": 0.5387,
|
172 |
+
"rewards/accuracies": 0.4749999940395355,
|
173 |
+
"rewards/chosen": 0.3678433299064636,
|
174 |
+
"rewards/margins": 0.4186524450778961,
|
175 |
+
"rewards/rejected": -0.05080908536911011,
|
176 |
+
"step": 90
|
177 |
+
},
|
178 |
+
{
|
179 |
+
"epoch": 1.46,
|
180 |
+
"eval_logits/chosen": -1.7943389415740967,
|
181 |
+
"eval_logits/rejected": -1.8181126117706299,
|
182 |
+
"eval_logps/chosen": -51.677486419677734,
|
183 |
+
"eval_logps/rejected": -97.63689422607422,
|
184 |
+
"eval_loss": 0.529485821723938,
|
185 |
+
"eval_rewards/accuracies": 0.4642857015132904,
|
186 |
+
"eval_rewards/chosen": 0.4395819306373596,
|
187 |
+
"eval_rewards/margins": 0.4670555889606476,
|
188 |
+
"eval_rewards/rejected": -0.027473628520965576,
|
189 |
+
"eval_runtime": 8.1412,
|
190 |
+
"eval_samples_per_second": 3.439,
|
191 |
+
"eval_steps_per_second": 1.72,
|
192 |
+
"step": 90
|
193 |
+
},
|
194 |
+
{
|
195 |
+
"epoch": 1.63,
|
196 |
+
"grad_norm": 5.040858745574951,
|
197 |
+
"learning_rate": 3.7605633802816903e-06,
|
198 |
+
"logits/chosen": -1.8601042032241821,
|
199 |
+
"logits/rejected": -1.8790462017059326,
|
200 |
+
"logps/chosen": -43.77570343017578,
|
201 |
+
"logps/rejected": -70.64997863769531,
|
202 |
+
"loss": 0.5466,
|
203 |
+
"rewards/accuracies": 0.4000000059604645,
|
204 |
+
"rewards/chosen": 0.36673134565353394,
|
205 |
+
"rewards/margins": 0.42903366684913635,
|
206 |
+
"rewards/rejected": -0.06230226159095764,
|
207 |
+
"step": 100
|
208 |
+
},
|
209 |
+
{
|
210 |
+
"epoch": 1.79,
|
211 |
+
"grad_norm": 11.182683944702148,
|
212 |
+
"learning_rate": 3.6197183098591553e-06,
|
213 |
+
"logits/chosen": -1.8602203130722046,
|
214 |
+
"logits/rejected": -1.8786903619766235,
|
215 |
+
"logps/chosen": -29.601736068725586,
|
216 |
+
"logps/rejected": -66.1338882446289,
|
217 |
+
"loss": 0.6003,
|
218 |
+
"rewards/accuracies": 0.2750000059604645,
|
219 |
+
"rewards/chosen": 0.3122637867927551,
|
220 |
+
"rewards/margins": 0.2756831645965576,
|
221 |
+
"rewards/rejected": 0.03658062964677811,
|
222 |
+
"step": 110
|
223 |
+
},
|
224 |
+
{
|
225 |
+
"epoch": 1.95,
|
226 |
+
"grad_norm": 3.9169583320617676,
|
227 |
+
"learning_rate": 3.47887323943662e-06,
|
228 |
+
"logits/chosen": -1.8304624557495117,
|
229 |
+
"logits/rejected": -1.8451646566390991,
|
230 |
+
"logps/chosen": -31.413599014282227,
|
231 |
+
"logps/rejected": -56.841880798339844,
|
232 |
+
"loss": 0.6033,
|
233 |
+
"rewards/accuracies": 0.2750000059604645,
|
234 |
+
"rewards/chosen": 0.21899382770061493,
|
235 |
+
"rewards/margins": 0.2744571566581726,
|
236 |
+
"rewards/rejected": -0.05546332150697708,
|
237 |
+
"step": 120
|
238 |
+
},
|
239 |
+
{
|
240 |
+
"epoch": 1.95,
|
241 |
+
"eval_logits/chosen": -1.80086350440979,
|
242 |
+
"eval_logits/rejected": -1.8260576725006104,
|
243 |
+
"eval_logps/chosen": -50.32191848754883,
|
244 |
+
"eval_logps/rejected": -98.02101135253906,
|
245 |
+
"eval_loss": 0.49604225158691406,
|
246 |
+
"eval_rewards/accuracies": 0.4642857015132904,
|
247 |
+
"eval_rewards/chosen": 0.5751391053199768,
|
248 |
+
"eval_rewards/margins": 0.6410244107246399,
|
249 |
+
"eval_rewards/rejected": -0.0658852607011795,
|
250 |
+
"eval_runtime": 8.1445,
|
251 |
+
"eval_samples_per_second": 3.438,
|
252 |
+
"eval_steps_per_second": 1.719,
|
253 |
+
"step": 120
|
254 |
+
},
|
255 |
+
{
|
256 |
+
"epoch": 2.11,
|
257 |
+
"grad_norm": 1.4047716856002808,
|
258 |
+
"learning_rate": 3.338028169014085e-06,
|
259 |
+
"logits/chosen": -1.8776130676269531,
|
260 |
+
"logits/rejected": -1.8995519876480103,
|
261 |
+
"logps/chosen": -22.69371795654297,
|
262 |
+
"logps/rejected": -53.5282096862793,
|
263 |
+
"loss": 0.5611,
|
264 |
+
"rewards/accuracies": 0.25,
|
265 |
+
"rewards/chosen": 0.35938918590545654,
|
266 |
+
"rewards/margins": 0.5045264959335327,
|
267 |
+
"rewards/rejected": -0.14513733983039856,
|
268 |
+
"step": 130
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 2.28,
|
272 |
+
"grad_norm": 0.7528722882270813,
|
273 |
+
"learning_rate": 3.1971830985915496e-06,
|
274 |
+
"logits/chosen": -1.8126357793807983,
|
275 |
+
"logits/rejected": -1.832371711730957,
|
276 |
+
"logps/chosen": -38.33379364013672,
|
277 |
+
"logps/rejected": -67.96979522705078,
|
278 |
+
"loss": 0.5142,
|
279 |
+
"rewards/accuracies": 0.375,
|
280 |
+
"rewards/chosen": 0.5568062663078308,
|
281 |
+
"rewards/margins": 0.6818712949752808,
|
282 |
+
"rewards/rejected": -0.12506499886512756,
|
283 |
+
"step": 140
|
284 |
+
},
|
285 |
+
{
|
286 |
+
"epoch": 2.44,
|
287 |
+
"grad_norm": 3.405579090118408,
|
288 |
+
"learning_rate": 3.056338028169014e-06,
|
289 |
+
"logits/chosen": -1.8196109533309937,
|
290 |
+
"logits/rejected": -1.8556429147720337,
|
291 |
+
"logps/chosen": -36.78864669799805,
|
292 |
+
"logps/rejected": -83.05890655517578,
|
293 |
+
"loss": 0.5042,
|
294 |
+
"rewards/accuracies": 0.4000000059604645,
|
295 |
+
"rewards/chosen": 0.542107105255127,
|
296 |
+
"rewards/margins": 0.6411095857620239,
|
297 |
+
"rewards/rejected": -0.09900249540805817,
|
298 |
+
"step": 150
|
299 |
+
},
|
300 |
+
{
|
301 |
+
"epoch": 2.44,
|
302 |
+
"eval_logits/chosen": -1.805869698524475,
|
303 |
+
"eval_logits/rejected": -1.8330577611923218,
|
304 |
+
"eval_logps/chosen": -49.10601043701172,
|
305 |
+
"eval_logps/rejected": -98.84068298339844,
|
306 |
+
"eval_loss": 0.4709201455116272,
|
307 |
+
"eval_rewards/accuracies": 0.4642857015132904,
|
308 |
+
"eval_rewards/chosen": 0.6967297196388245,
|
309 |
+
"eval_rewards/margins": 0.8445812463760376,
|
310 |
+
"eval_rewards/rejected": -0.1478516012430191,
|
311 |
+
"eval_runtime": 8.1382,
|
312 |
+
"eval_samples_per_second": 3.441,
|
313 |
+
"eval_steps_per_second": 1.72,
|
314 |
+
"step": 150
|
315 |
+
},
|
316 |
+
{
|
317 |
+
"epoch": 2.6,
|
318 |
+
"grad_norm": 7.778740882873535,
|
319 |
+
"learning_rate": 2.915492957746479e-06,
|
320 |
+
"logits/chosen": -1.848589301109314,
|
321 |
+
"logits/rejected": -1.8790754079818726,
|
322 |
+
"logps/chosen": -36.49171447753906,
|
323 |
+
"logps/rejected": -72.55968475341797,
|
324 |
+
"loss": 0.4927,
|
325 |
+
"rewards/accuracies": 0.44999998807907104,
|
326 |
+
"rewards/chosen": 0.49555450677871704,
|
327 |
+
"rewards/margins": 0.6891830563545227,
|
328 |
+
"rewards/rejected": -0.1936284601688385,
|
329 |
+
"step": 160
|
330 |
+
},
|
331 |
+
{
|
332 |
+
"epoch": 2.76,
|
333 |
+
"grad_norm": 4.058627605438232,
|
334 |
+
"learning_rate": 2.774647887323944e-06,
|
335 |
+
"logits/chosen": -1.812421441078186,
|
336 |
+
"logits/rejected": -1.8415311574935913,
|
337 |
+
"logps/chosen": -45.62999725341797,
|
338 |
+
"logps/rejected": -87.85527038574219,
|
339 |
+
"loss": 0.4541,
|
340 |
+
"rewards/accuracies": 0.4749999940395355,
|
341 |
+
"rewards/chosen": 0.7084562182426453,
|
342 |
+
"rewards/margins": 0.9553689956665039,
|
343 |
+
"rewards/rejected": -0.24691279232501984,
|
344 |
+
"step": 170
|
345 |
+
},
|
346 |
+
{
|
347 |
+
"epoch": 2.93,
|
348 |
+
"grad_norm": 0.0,
|
349 |
+
"learning_rate": 2.6338028169014084e-06,
|
350 |
+
"logits/chosen": -1.8475942611694336,
|
351 |
+
"logits/rejected": -1.8678725957870483,
|
352 |
+
"logps/chosen": -40.53328323364258,
|
353 |
+
"logps/rejected": -64.86616516113281,
|
354 |
+
"loss": 0.5087,
|
355 |
+
"rewards/accuracies": 0.375,
|
356 |
+
"rewards/chosen": 0.5022943019866943,
|
357 |
+
"rewards/margins": 0.7252141833305359,
|
358 |
+
"rewards/rejected": -0.22291991114616394,
|
359 |
+
"step": 180
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 2.93,
|
363 |
+
"eval_logits/chosen": -1.8136398792266846,
|
364 |
+
"eval_logits/rejected": -1.8424787521362305,
|
365 |
+
"eval_logps/chosen": -48.19547653198242,
|
366 |
+
"eval_logps/rejected": -99.7900161743164,
|
367 |
+
"eval_loss": 0.4541548192501068,
|
368 |
+
"eval_rewards/accuracies": 0.4642857015132904,
|
369 |
+
"eval_rewards/chosen": 0.7877826690673828,
|
370 |
+
"eval_rewards/margins": 1.0305674076080322,
|
371 |
+
"eval_rewards/rejected": -0.24278469383716583,
|
372 |
+
"eval_runtime": 8.1397,
|
373 |
+
"eval_samples_per_second": 3.44,
|
374 |
+
"eval_steps_per_second": 1.72,
|
375 |
+
"step": 180
|
376 |
+
},
|
377 |
+
{
|
378 |
+
"epoch": 3.09,
|
379 |
+
"grad_norm": 2.1618106365203857,
|
380 |
+
"learning_rate": 2.4929577464788734e-06,
|
381 |
+
"logits/chosen": -1.876151442527771,
|
382 |
+
"logits/rejected": -1.9132931232452393,
|
383 |
+
"logps/chosen": -38.02617645263672,
|
384 |
+
"logps/rejected": -84.4028549194336,
|
385 |
+
"loss": 0.4372,
|
386 |
+
"rewards/accuracies": 0.44999998807907104,
|
387 |
+
"rewards/chosen": 0.723468005657196,
|
388 |
+
"rewards/margins": 1.1590527296066284,
|
389 |
+
"rewards/rejected": -0.43558478355407715,
|
390 |
+
"step": 190
|
391 |
+
},
|
392 |
+
{
|
393 |
+
"epoch": 3.25,
|
394 |
+
"grad_norm": 0.0,
|
395 |
+
"learning_rate": 2.352112676056338e-06,
|
396 |
+
"logits/chosen": -1.8977773189544678,
|
397 |
+
"logits/rejected": -1.9120800495147705,
|
398 |
+
"logps/chosen": -38.097923278808594,
|
399 |
+
"logps/rejected": -55.17757034301758,
|
400 |
+
"loss": 0.4778,
|
401 |
+
"rewards/accuracies": 0.375,
|
402 |
+
"rewards/chosen": 0.5927585959434509,
|
403 |
+
"rewards/margins": 0.8746024370193481,
|
404 |
+
"rewards/rejected": -0.2818438410758972,
|
405 |
+
"step": 200
|
406 |
+
},
|
407 |
+
{
|
408 |
+
"epoch": 3.41,
|
409 |
+
"grad_norm": 7.095726013183594,
|
410 |
+
"learning_rate": 2.211267605633803e-06,
|
411 |
+
"logits/chosen": -1.8508259057998657,
|
412 |
+
"logits/rejected": -1.8876402378082275,
|
413 |
+
"logps/chosen": -33.23273468017578,
|
414 |
+
"logps/rejected": -79.0272445678711,
|
415 |
+
"loss": 0.4874,
|
416 |
+
"rewards/accuracies": 0.375,
|
417 |
+
"rewards/chosen": 0.5798195600509644,
|
418 |
+
"rewards/margins": 0.9200228452682495,
|
419 |
+
"rewards/rejected": -0.34020328521728516,
|
420 |
+
"step": 210
|
421 |
+
},
|
422 |
+
{
|
423 |
+
"epoch": 3.41,
|
424 |
+
"eval_logits/chosen": -1.821912407875061,
|
425 |
+
"eval_logits/rejected": -1.8520457744598389,
|
426 |
+
"eval_logps/chosen": -47.6314697265625,
|
427 |
+
"eval_logps/rejected": -100.92195129394531,
|
428 |
+
"eval_loss": 0.4427572786808014,
|
429 |
+
"eval_rewards/accuracies": 0.4642857015132904,
|
430 |
+
"eval_rewards/chosen": 0.8441829681396484,
|
431 |
+
"eval_rewards/margins": 1.2001608610153198,
|
432 |
+
"eval_rewards/rejected": -0.35597795248031616,
|
433 |
+
"eval_runtime": 8.1451,
|
434 |
+
"eval_samples_per_second": 3.438,
|
435 |
+
"eval_steps_per_second": 1.719,
|
436 |
+
"step": 210
|
437 |
+
},
|
438 |
+
{
|
439 |
+
"epoch": 3.58,
|
440 |
+
"grad_norm": 4.825575351715088,
|
441 |
+
"learning_rate": 2.0704225352112676e-06,
|
442 |
+
"logits/chosen": -1.889478325843811,
|
443 |
+
"logits/rejected": -1.9106714725494385,
|
444 |
+
"logps/chosen": -30.769512176513672,
|
445 |
+
"logps/rejected": -68.92756652832031,
|
446 |
+
"loss": 0.5277,
|
447 |
+
"rewards/accuracies": 0.32499998807907104,
|
448 |
+
"rewards/chosen": 0.379320353269577,
|
449 |
+
"rewards/margins": 0.6602964401245117,
|
450 |
+
"rewards/rejected": -0.28097596764564514,
|
451 |
+
"step": 220
|
452 |
+
},
|
453 |
+
{
|
454 |
+
"epoch": 3.74,
|
455 |
+
"grad_norm": 5.236915588378906,
|
456 |
+
"learning_rate": 1.9295774647887326e-06,
|
457 |
+
"logits/chosen": -1.8926284313201904,
|
458 |
+
"logits/rejected": -1.9087079763412476,
|
459 |
+
"logps/chosen": -36.48774719238281,
|
460 |
+
"logps/rejected": -59.29833221435547,
|
461 |
+
"loss": 0.5176,
|
462 |
+
"rewards/accuracies": 0.32499998807907104,
|
463 |
+
"rewards/chosen": 0.6325365304946899,
|
464 |
+
"rewards/margins": 0.867927074432373,
|
465 |
+
"rewards/rejected": -0.2353905737400055,
|
466 |
+
"step": 230
|
467 |
+
},
|
468 |
+
{
|
469 |
+
"epoch": 3.9,
|
470 |
+
"grad_norm": 1.3737443685531616,
|
471 |
+
"learning_rate": 1.7887323943661974e-06,
|
472 |
+
"logits/chosen": -1.7782018184661865,
|
473 |
+
"logits/rejected": -1.8105701208114624,
|
474 |
+
"logps/chosen": -41.42538833618164,
|
475 |
+
"logps/rejected": -93.73129272460938,
|
476 |
+
"loss": 0.4229,
|
477 |
+
"rewards/accuracies": 0.4749999940395355,
|
478 |
+
"rewards/chosen": 0.8450711369514465,
|
479 |
+
"rewards/margins": 1.3813583850860596,
|
480 |
+
"rewards/rejected": -0.5362871885299683,
|
481 |
+
"step": 240
|
482 |
+
},
|
483 |
+
{
|
484 |
+
"epoch": 3.9,
|
485 |
+
"eval_logits/chosen": -1.826602816581726,
|
486 |
+
"eval_logits/rejected": -1.8575078248977661,
|
487 |
+
"eval_logps/chosen": -47.322914123535156,
|
488 |
+
"eval_logps/rejected": -101.7520980834961,
|
489 |
+
"eval_loss": 0.4358247220516205,
|
490 |
+
"eval_rewards/accuracies": 0.4642857015132904,
|
491 |
+
"eval_rewards/chosen": 0.8750395178794861,
|
492 |
+
"eval_rewards/margins": 1.3140336275100708,
|
493 |
+
"eval_rewards/rejected": -0.4389941692352295,
|
494 |
+
"eval_runtime": 8.1403,
|
495 |
+
"eval_samples_per_second": 3.44,
|
496 |
+
"eval_steps_per_second": 1.72,
|
497 |
+
"step": 240
|
498 |
+
},
|
499 |
+
{
|
500 |
+
"epoch": 4.07,
|
501 |
+
"grad_norm": 1.977386713027954,
|
502 |
+
"learning_rate": 1.647887323943662e-06,
|
503 |
+
"logits/chosen": -1.875792145729065,
|
504 |
+
"logits/rejected": -1.8937476873397827,
|
505 |
+
"logps/chosen": -25.06104278564453,
|
506 |
+
"logps/rejected": -46.700584411621094,
|
507 |
+
"loss": 0.5274,
|
508 |
+
"rewards/accuracies": 0.30000001192092896,
|
509 |
+
"rewards/chosen": 0.42553478479385376,
|
510 |
+
"rewards/margins": 0.7891250252723694,
|
511 |
+
"rewards/rejected": -0.3635903000831604,
|
512 |
+
"step": 250
|
513 |
+
},
|
514 |
+
{
|
515 |
+
"epoch": 4.23,
|
516 |
+
"grad_norm": 3.320791244506836,
|
517 |
+
"learning_rate": 1.5070422535211269e-06,
|
518 |
+
"logits/chosen": -1.7908179759979248,
|
519 |
+
"logits/rejected": -1.8309694528579712,
|
520 |
+
"logps/chosen": -54.056663513183594,
|
521 |
+
"logps/rejected": -108.03240966796875,
|
522 |
+
"loss": 0.3569,
|
523 |
+
"rewards/accuracies": 0.574999988079071,
|
524 |
+
"rewards/chosen": 1.07839035987854,
|
525 |
+
"rewards/margins": 1.643531084060669,
|
526 |
+
"rewards/rejected": -0.5651407837867737,
|
527 |
+
"step": 260
|
528 |
+
},
|
529 |
+
{
|
530 |
+
"epoch": 4.39,
|
531 |
+
"grad_norm": 4.999856948852539,
|
532 |
+
"learning_rate": 1.3661971830985919e-06,
|
533 |
+
"logits/chosen": -1.9177863597869873,
|
534 |
+
"logits/rejected": -1.9476194381713867,
|
535 |
+
"logps/chosen": -22.58294105529785,
|
536 |
+
"logps/rejected": -61.99756622314453,
|
537 |
+
"loss": 0.5295,
|
538 |
+
"rewards/accuracies": 0.2750000059604645,
|
539 |
+
"rewards/chosen": 0.5058903694152832,
|
540 |
+
"rewards/margins": 0.8186748623847961,
|
541 |
+
"rewards/rejected": -0.31278449296951294,
|
542 |
+
"step": 270
|
543 |
+
},
|
544 |
+
{
|
545 |
+
"epoch": 4.39,
|
546 |
+
"eval_logits/chosen": -1.8289211988449097,
|
547 |
+
"eval_logits/rejected": -1.860676646232605,
|
548 |
+
"eval_logps/chosen": -47.04714584350586,
|
549 |
+
"eval_logps/rejected": -102.3218994140625,
|
550 |
+
"eval_loss": 0.43130752444267273,
|
551 |
+
"eval_rewards/accuracies": 0.4642857015132904,
|
552 |
+
"eval_rewards/chosen": 0.9026166200637817,
|
553 |
+
"eval_rewards/margins": 1.3985893726348877,
|
554 |
+
"eval_rewards/rejected": -0.4959728717803955,
|
555 |
+
"eval_runtime": 8.1397,
|
556 |
+
"eval_samples_per_second": 3.44,
|
557 |
+
"eval_steps_per_second": 1.72,
|
558 |
+
"step": 270
|
559 |
+
}
|
560 |
+
],
|
561 |
+
"logging_steps": 10,
|
562 |
+
"max_steps": 366,
|
563 |
+
"num_input_tokens_seen": 0,
|
564 |
+
"num_train_epochs": 6,
|
565 |
+
"save_steps": 90,
|
566 |
+
"total_flos": 0.0,
|
567 |
+
"train_batch_size": 2,
|
568 |
+
"trial_name": null,
|
569 |
+
"trial_params": null
|
570 |
+
}
|
checkpoint-270/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:de569ad7b35da22832fd1fc395ab1c110ac622b038bdfcb0eee757cdc5b4b97b
|
3 |
+
size 5304
|
checkpoint-360/README.md
ADDED
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: peft
|
3 |
+
base_model: Weni/WeniGPT-Agents-Mistral-1.0.6-SFT-merged
|
4 |
+
---
|
5 |
+
|
6 |
+
# Model Card for Model ID
|
7 |
+
|
8 |
+
<!-- Provide a quick summary of what the model is/does. -->
|
9 |
+
|
10 |
+
|
11 |
+
|
12 |
+
## Model Details
|
13 |
+
|
14 |
+
### Model Description
|
15 |
+
|
16 |
+
<!-- Provide a longer summary of what this model is. -->
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
- **Developed by:** [More Information Needed]
|
21 |
+
- **Funded by [optional]:** [More Information Needed]
|
22 |
+
- **Shared by [optional]:** [More Information Needed]
|
23 |
+
- **Model type:** [More Information Needed]
|
24 |
+
- **Language(s) (NLP):** [More Information Needed]
|
25 |
+
- **License:** [More Information Needed]
|
26 |
+
- **Finetuned from model [optional]:** [More Information Needed]
|
27 |
+
|
28 |
+
### Model Sources [optional]
|
29 |
+
|
30 |
+
<!-- Provide the basic links for the model. -->
|
31 |
+
|
32 |
+
- **Repository:** [More Information Needed]
|
33 |
+
- **Paper [optional]:** [More Information Needed]
|
34 |
+
- **Demo [optional]:** [More Information Needed]
|
35 |
+
|
36 |
+
## Uses
|
37 |
+
|
38 |
+
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
|
39 |
+
|
40 |
+
### Direct Use
|
41 |
+
|
42 |
+
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
|
43 |
+
|
44 |
+
[More Information Needed]
|
45 |
+
|
46 |
+
### Downstream Use [optional]
|
47 |
+
|
48 |
+
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
|
49 |
+
|
50 |
+
[More Information Needed]
|
51 |
+
|
52 |
+
### Out-of-Scope Use
|
53 |
+
|
54 |
+
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
|
55 |
+
|
56 |
+
[More Information Needed]
|
57 |
+
|
58 |
+
## Bias, Risks, and Limitations
|
59 |
+
|
60 |
+
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|
61 |
+
|
62 |
+
[More Information Needed]
|
63 |
+
|
64 |
+
### Recommendations
|
65 |
+
|
66 |
+
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
|
67 |
+
|
68 |
+
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
|
69 |
+
|
70 |
+
## How to Get Started with the Model
|
71 |
+
|
72 |
+
Use the code below to get started with the model.
|
73 |
+
|
74 |
+
[More Information Needed]
|
75 |
+
|
76 |
+
## Training Details
|
77 |
+
|
78 |
+
### Training Data
|
79 |
+
|
80 |
+
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
|
81 |
+
|
82 |
+
[More Information Needed]
|
83 |
+
|
84 |
+
### Training Procedure
|
85 |
+
|
86 |
+
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
|
87 |
+
|
88 |
+
#### Preprocessing [optional]
|
89 |
+
|
90 |
+
[More Information Needed]
|
91 |
+
|
92 |
+
|
93 |
+
#### Training Hyperparameters
|
94 |
+
|
95 |
+
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
|
96 |
+
|
97 |
+
#### Speeds, Sizes, Times [optional]
|
98 |
+
|
99 |
+
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
|
100 |
+
|
101 |
+
[More Information Needed]
|
102 |
+
|
103 |
+
## Evaluation
|
104 |
+
|
105 |
+
<!-- This section describes the evaluation protocols and provides the results. -->
|
106 |
+
|
107 |
+
### Testing Data, Factors & Metrics
|
108 |
+
|
109 |
+
#### Testing Data
|
110 |
+
|
111 |
+
<!-- This should link to a Dataset Card if possible. -->
|
112 |
+
|
113 |
+
[More Information Needed]
|
114 |
+
|
115 |
+
#### Factors
|
116 |
+
|
117 |
+
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
|
118 |
+
|
119 |
+
[More Information Needed]
|
120 |
+
|
121 |
+
#### Metrics
|
122 |
+
|
123 |
+
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
|
124 |
+
|
125 |
+
[More Information Needed]
|
126 |
+
|
127 |
+
### Results
|
128 |
+
|
129 |
+
[More Information Needed]
|
130 |
+
|
131 |
+
#### Summary
|
132 |
+
|
133 |
+
|
134 |
+
|
135 |
+
## Model Examination [optional]
|
136 |
+
|
137 |
+
<!-- Relevant interpretability work for the model goes here -->
|
138 |
+
|
139 |
+
[More Information Needed]
|
140 |
+
|
141 |
+
## Environmental Impact
|
142 |
+
|
143 |
+
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
|
144 |
+
|
145 |
+
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
|
146 |
+
|
147 |
+
- **Hardware Type:** [More Information Needed]
|
148 |
+
- **Hours used:** [More Information Needed]
|
149 |
+
- **Cloud Provider:** [More Information Needed]
|
150 |
+
- **Compute Region:** [More Information Needed]
|
151 |
+
- **Carbon Emitted:** [More Information Needed]
|
152 |
+
|
153 |
+
## Technical Specifications [optional]
|
154 |
+
|
155 |
+
### Model Architecture and Objective
|
156 |
+
|
157 |
+
[More Information Needed]
|
158 |
+
|
159 |
+
### Compute Infrastructure
|
160 |
+
|
161 |
+
[More Information Needed]
|
162 |
+
|
163 |
+
#### Hardware
|
164 |
+
|
165 |
+
[More Information Needed]
|
166 |
+
|
167 |
+
#### Software
|
168 |
+
|
169 |
+
[More Information Needed]
|
170 |
+
|
171 |
+
## Citation [optional]
|
172 |
+
|
173 |
+
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
|
174 |
+
|
175 |
+
**BibTeX:**
|
176 |
+
|
177 |
+
[More Information Needed]
|
178 |
+
|
179 |
+
**APA:**
|
180 |
+
|
181 |
+
[More Information Needed]
|
182 |
+
|
183 |
+
## Glossary [optional]
|
184 |
+
|
185 |
+
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
|
186 |
+
|
187 |
+
[More Information Needed]
|
188 |
+
|
189 |
+
## More Information [optional]
|
190 |
+
|
191 |
+
[More Information Needed]
|
192 |
+
|
193 |
+
## Model Card Authors [optional]
|
194 |
+
|
195 |
+
[More Information Needed]
|
196 |
+
|
197 |
+
## Model Card Contact
|
198 |
+
|
199 |
+
[More Information Needed]
|
200 |
+
### Framework versions
|
201 |
+
|
202 |
+
- PEFT 0.10.0
|
checkpoint-360/adapter_config.json
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "Weni/WeniGPT-Agents-Mistral-1.0.6-SFT-merged",
|
5 |
+
"bias": "none",
|
6 |
+
"fan_in_fan_out": false,
|
7 |
+
"inference_mode": true,
|
8 |
+
"init_lora_weights": true,
|
9 |
+
"layer_replication": null,
|
10 |
+
"layers_pattern": null,
|
11 |
+
"layers_to_transform": null,
|
12 |
+
"loftq_config": {},
|
13 |
+
"lora_alpha": 16,
|
14 |
+
"lora_dropout": 0.05,
|
15 |
+
"megatron_config": null,
|
16 |
+
"megatron_core": "megatron.core",
|
17 |
+
"modules_to_save": null,
|
18 |
+
"peft_type": "LORA",
|
19 |
+
"r": 8,
|
20 |
+
"rank_pattern": {},
|
21 |
+
"revision": null,
|
22 |
+
"target_modules": [
|
23 |
+
"v_proj",
|
24 |
+
"q_proj"
|
25 |
+
],
|
26 |
+
"task_type": "CAUSAL_LM",
|
27 |
+
"use_dora": false,
|
28 |
+
"use_rslora": false
|
29 |
+
}
|
checkpoint-360/adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a0c709e037401ede6602043ef6a5abb1f9b33746d7060acc95355c55c0660071
|
3 |
+
size 13648432
|
checkpoint-360/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:30955b65c45b0993e250de74aafe58045cbb1b0ea1ca012a6e570504f8394557
|
3 |
+
size 27370618
|
checkpoint-360/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:adbac4581908b419c3039e502a4aacf8ebe77d7c5097a659c9e661f2ab321b78
|
3 |
+
size 14244
|
checkpoint-360/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6c2a24ffbdf666fc29aa43a0bae8368ec77e666548541499714a3f8dfdd7c88b
|
3 |
+
size 1064
|
checkpoint-360/special_tokens_map.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "</s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": "<unk>",
|
17 |
+
"unk_token": {
|
18 |
+
"content": "<unk>",
|
19 |
+
"lstrip": false,
|
20 |
+
"normalized": false,
|
21 |
+
"rstrip": false,
|
22 |
+
"single_word": false
|
23 |
+
}
|
24 |
+
}
|
checkpoint-360/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-360/tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
|
3 |
+
size 493443
|
checkpoint-360/tokenizer_config.json
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": true,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"0": {
|
6 |
+
"content": "<unk>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false,
|
11 |
+
"special": true
|
12 |
+
},
|
13 |
+
"1": {
|
14 |
+
"content": "<s>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": false,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false,
|
19 |
+
"special": true
|
20 |
+
},
|
21 |
+
"2": {
|
22 |
+
"content": "</s>",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": false,
|
25 |
+
"rstrip": false,
|
26 |
+
"single_word": false,
|
27 |
+
"special": true
|
28 |
+
}
|
29 |
+
},
|
30 |
+
"additional_special_tokens": [],
|
31 |
+
"bos_token": "<s>",
|
32 |
+
"chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' + eos_token }}{% endif %}{% endfor %}",
|
33 |
+
"clean_up_tokenization_spaces": false,
|
34 |
+
"eos_token": "</s>",
|
35 |
+
"legacy": true,
|
36 |
+
"max_lenght": 8192,
|
37 |
+
"max_length": 8192,
|
38 |
+
"model_max_length": 1000000000000000019884624838656,
|
39 |
+
"pad_token": "<unk>",
|
40 |
+
"padding": true,
|
41 |
+
"sp_model_kwargs": {},
|
42 |
+
"spaces_between_special_tokens": false,
|
43 |
+
"stride": 0,
|
44 |
+
"tokenizer_class": "LlamaTokenizer",
|
45 |
+
"truncation_side": "right",
|
46 |
+
"truncation_strategy": "longest_first",
|
47 |
+
"unk_token": "<unk>",
|
48 |
+
"use_default_system_prompt": false
|
49 |
+
}
|
checkpoint-360/trainer_state.json
ADDED
@@ -0,0 +1,753 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.42603132128715515,
|
3 |
+
"best_model_checkpoint": "./mistral/20-04-24-Weni-WeniGPT-Agents-Mistral-1.0.6-SFT-1.0.5-DPO_Experiment on DPO with other hyperparameters and best SFT model of WeniGPT-2_max_steps-366_batch_4_2024-04-20_ppid_9/checkpoint-360",
|
4 |
+
"epoch": 5.853658536585366,
|
5 |
+
"eval_steps": 30,
|
6 |
+
"global_step": 360,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.16,
|
13 |
+
"grad_norm": 8.378021240234375,
|
14 |
+
"learning_rate": 4.0909090909090915e-06,
|
15 |
+
"logits/chosen": -1.830958604812622,
|
16 |
+
"logits/rejected": -1.8507845401763916,
|
17 |
+
"logps/chosen": -28.701984405517578,
|
18 |
+
"logps/rejected": -54.28569793701172,
|
19 |
+
"loss": 0.6924,
|
20 |
+
"rewards/accuracies": 0.20000000298023224,
|
21 |
+
"rewards/chosen": 0.0008967495523393154,
|
22 |
+
"rewards/margins": 0.0014666033675894141,
|
23 |
+
"rewards/rejected": -0.0005698538152500987,
|
24 |
+
"step": 10
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"epoch": 0.33,
|
28 |
+
"grad_norm": 5.193418502807617,
|
29 |
+
"learning_rate": 4.887323943661972e-06,
|
30 |
+
"logits/chosen": -1.7550897598266602,
|
31 |
+
"logits/rejected": -1.770708680152893,
|
32 |
+
"logps/chosen": -47.344207763671875,
|
33 |
+
"logps/rejected": -64.0368423461914,
|
34 |
+
"loss": 0.6852,
|
35 |
+
"rewards/accuracies": 0.4000000059604645,
|
36 |
+
"rewards/chosen": 0.017231885343790054,
|
37 |
+
"rewards/margins": 0.01606021076440811,
|
38 |
+
"rewards/rejected": 0.0011716745793819427,
|
39 |
+
"step": 20
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"epoch": 0.49,
|
43 |
+
"grad_norm": 7.308932304382324,
|
44 |
+
"learning_rate": 4.746478873239437e-06,
|
45 |
+
"logits/chosen": -1.781267762184143,
|
46 |
+
"logits/rejected": -1.8114898204803467,
|
47 |
+
"logps/chosen": -54.274559020996094,
|
48 |
+
"logps/rejected": -95.20500183105469,
|
49 |
+
"loss": 0.6635,
|
50 |
+
"rewards/accuracies": 0.5,
|
51 |
+
"rewards/chosen": 0.0641159638762474,
|
52 |
+
"rewards/margins": 0.061691801995038986,
|
53 |
+
"rewards/rejected": 0.0024241588544100523,
|
54 |
+
"step": 30
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"epoch": 0.49,
|
58 |
+
"eval_logits/chosen": -1.7831767797470093,
|
59 |
+
"eval_logits/rejected": -1.8043663501739502,
|
60 |
+
"eval_logps/chosen": -55.16960906982422,
|
61 |
+
"eval_logps/rejected": -97.32585144042969,
|
62 |
+
"eval_loss": 0.6523757576942444,
|
63 |
+
"eval_rewards/accuracies": 0.4642857015132904,
|
64 |
+
"eval_rewards/chosen": 0.09036973863840103,
|
65 |
+
"eval_rewards/margins": 0.08673857897520065,
|
66 |
+
"eval_rewards/rejected": 0.0036311547737568617,
|
67 |
+
"eval_runtime": 8.141,
|
68 |
+
"eval_samples_per_second": 3.439,
|
69 |
+
"eval_steps_per_second": 1.72,
|
70 |
+
"step": 30
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"epoch": 0.65,
|
74 |
+
"grad_norm": 0.0,
|
75 |
+
"learning_rate": 4.6056338028169015e-06,
|
76 |
+
"logits/chosen": -1.889905333518982,
|
77 |
+
"logits/rejected": -1.9024461507797241,
|
78 |
+
"logps/chosen": -27.918941497802734,
|
79 |
+
"logps/rejected": -42.093284606933594,
|
80 |
+
"loss": 0.668,
|
81 |
+
"rewards/accuracies": 0.25,
|
82 |
+
"rewards/chosen": 0.054457180202007294,
|
83 |
+
"rewards/margins": 0.0539846234023571,
|
84 |
+
"rewards/rejected": 0.0004725646285805851,
|
85 |
+
"step": 40
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"epoch": 0.81,
|
89 |
+
"grad_norm": 8.53225326538086,
|
90 |
+
"learning_rate": 4.464788732394367e-06,
|
91 |
+
"logits/chosen": -1.8278567790985107,
|
92 |
+
"logits/rejected": -1.849957823753357,
|
93 |
+
"logps/chosen": -43.8238639831543,
|
94 |
+
"logps/rejected": -68.02179718017578,
|
95 |
+
"loss": 0.6358,
|
96 |
+
"rewards/accuracies": 0.3499999940395355,
|
97 |
+
"rewards/chosen": 0.13941256701946259,
|
98 |
+
"rewards/margins": 0.13133978843688965,
|
99 |
+
"rewards/rejected": 0.008072790689766407,
|
100 |
+
"step": 50
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.98,
|
104 |
+
"grad_norm": 9.436968803405762,
|
105 |
+
"learning_rate": 4.3239436619718315e-06,
|
106 |
+
"logits/chosen": -1.805991768836975,
|
107 |
+
"logits/rejected": -1.8437427282333374,
|
108 |
+
"logps/chosen": -43.8873291015625,
|
109 |
+
"logps/rejected": -95.2943115234375,
|
110 |
+
"loss": 0.6026,
|
111 |
+
"rewards/accuracies": 0.44999998807907104,
|
112 |
+
"rewards/chosen": 0.18793432414531708,
|
113 |
+
"rewards/margins": 0.21308371424674988,
|
114 |
+
"rewards/rejected": -0.025149401277303696,
|
115 |
+
"step": 60
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"epoch": 0.98,
|
119 |
+
"eval_logits/chosen": -1.7877694368362427,
|
120 |
+
"eval_logits/rejected": -1.8098936080932617,
|
121 |
+
"eval_logps/chosen": -53.567203521728516,
|
122 |
+
"eval_logps/rejected": -97.33795928955078,
|
123 |
+
"eval_loss": 0.5890871286392212,
|
124 |
+
"eval_rewards/accuracies": 0.4642857015132904,
|
125 |
+
"eval_rewards/chosen": 0.25061002373695374,
|
126 |
+
"eval_rewards/margins": 0.2481890469789505,
|
127 |
+
"eval_rewards/rejected": 0.002420984674245119,
|
128 |
+
"eval_runtime": 8.1404,
|
129 |
+
"eval_samples_per_second": 3.44,
|
130 |
+
"eval_steps_per_second": 1.72,
|
131 |
+
"step": 60
|
132 |
+
},
|
133 |
+
{
|
134 |
+
"epoch": 1.14,
|
135 |
+
"grad_norm": 0.0,
|
136 |
+
"learning_rate": 4.183098591549296e-06,
|
137 |
+
"logits/chosen": -1.8344879150390625,
|
138 |
+
"logits/rejected": -1.8489716053009033,
|
139 |
+
"logps/chosen": -40.38930892944336,
|
140 |
+
"logps/rejected": -60.9084358215332,
|
141 |
+
"loss": 0.6031,
|
142 |
+
"rewards/accuracies": 0.375,
|
143 |
+
"rewards/chosen": 0.19739331305027008,
|
144 |
+
"rewards/margins": 0.22638121247291565,
|
145 |
+
"rewards/rejected": -0.028987903147935867,
|
146 |
+
"step": 70
|
147 |
+
},
|
148 |
+
{
|
149 |
+
"epoch": 1.3,
|
150 |
+
"grad_norm": 5.49536657333374,
|
151 |
+
"learning_rate": 4.042253521126761e-06,
|
152 |
+
"logits/chosen": -1.7903095483779907,
|
153 |
+
"logits/rejected": -1.8362411260604858,
|
154 |
+
"logps/chosen": -44.288116455078125,
|
155 |
+
"logps/rejected": -90.21073913574219,
|
156 |
+
"loss": 0.5357,
|
157 |
+
"rewards/accuracies": 0.4749999940395355,
|
158 |
+
"rewards/chosen": 0.34061312675476074,
|
159 |
+
"rewards/margins": 0.40679749846458435,
|
160 |
+
"rewards/rejected": -0.06618441641330719,
|
161 |
+
"step": 80
|
162 |
+
},
|
163 |
+
{
|
164 |
+
"epoch": 1.46,
|
165 |
+
"grad_norm": 13.401692390441895,
|
166 |
+
"learning_rate": 3.901408450704225e-06,
|
167 |
+
"logits/chosen": -1.8004281520843506,
|
168 |
+
"logits/rejected": -1.8247934579849243,
|
169 |
+
"logps/chosen": -42.32465362548828,
|
170 |
+
"logps/rejected": -70.9749984741211,
|
171 |
+
"loss": 0.5387,
|
172 |
+
"rewards/accuracies": 0.4749999940395355,
|
173 |
+
"rewards/chosen": 0.3678433299064636,
|
174 |
+
"rewards/margins": 0.4186524450778961,
|
175 |
+
"rewards/rejected": -0.05080908536911011,
|
176 |
+
"step": 90
|
177 |
+
},
|
178 |
+
{
|
179 |
+
"epoch": 1.46,
|
180 |
+
"eval_logits/chosen": -1.7943389415740967,
|
181 |
+
"eval_logits/rejected": -1.8181126117706299,
|
182 |
+
"eval_logps/chosen": -51.677486419677734,
|
183 |
+
"eval_logps/rejected": -97.63689422607422,
|
184 |
+
"eval_loss": 0.529485821723938,
|
185 |
+
"eval_rewards/accuracies": 0.4642857015132904,
|
186 |
+
"eval_rewards/chosen": 0.4395819306373596,
|
187 |
+
"eval_rewards/margins": 0.4670555889606476,
|
188 |
+
"eval_rewards/rejected": -0.027473628520965576,
|
189 |
+
"eval_runtime": 8.1412,
|
190 |
+
"eval_samples_per_second": 3.439,
|
191 |
+
"eval_steps_per_second": 1.72,
|
192 |
+
"step": 90
|
193 |
+
},
|
194 |
+
{
|
195 |
+
"epoch": 1.63,
|
196 |
+
"grad_norm": 5.040858745574951,
|
197 |
+
"learning_rate": 3.7605633802816903e-06,
|
198 |
+
"logits/chosen": -1.8601042032241821,
|
199 |
+
"logits/rejected": -1.8790462017059326,
|
200 |
+
"logps/chosen": -43.77570343017578,
|
201 |
+
"logps/rejected": -70.64997863769531,
|
202 |
+
"loss": 0.5466,
|
203 |
+
"rewards/accuracies": 0.4000000059604645,
|
204 |
+
"rewards/chosen": 0.36673134565353394,
|
205 |
+
"rewards/margins": 0.42903366684913635,
|
206 |
+
"rewards/rejected": -0.06230226159095764,
|
207 |
+
"step": 100
|
208 |
+
},
|
209 |
+
{
|
210 |
+
"epoch": 1.79,
|
211 |
+
"grad_norm": 11.182683944702148,
|
212 |
+
"learning_rate": 3.6197183098591553e-06,
|
213 |
+
"logits/chosen": -1.8602203130722046,
|
214 |
+
"logits/rejected": -1.8786903619766235,
|
215 |
+
"logps/chosen": -29.601736068725586,
|
216 |
+
"logps/rejected": -66.1338882446289,
|
217 |
+
"loss": 0.6003,
|
218 |
+
"rewards/accuracies": 0.2750000059604645,
|
219 |
+
"rewards/chosen": 0.3122637867927551,
|
220 |
+
"rewards/margins": 0.2756831645965576,
|
221 |
+
"rewards/rejected": 0.03658062964677811,
|
222 |
+
"step": 110
|
223 |
+
},
|
224 |
+
{
|
225 |
+
"epoch": 1.95,
|
226 |
+
"grad_norm": 3.9169583320617676,
|
227 |
+
"learning_rate": 3.47887323943662e-06,
|
228 |
+
"logits/chosen": -1.8304624557495117,
|
229 |
+
"logits/rejected": -1.8451646566390991,
|
230 |
+
"logps/chosen": -31.413599014282227,
|
231 |
+
"logps/rejected": -56.841880798339844,
|
232 |
+
"loss": 0.6033,
|
233 |
+
"rewards/accuracies": 0.2750000059604645,
|
234 |
+
"rewards/chosen": 0.21899382770061493,
|
235 |
+
"rewards/margins": 0.2744571566581726,
|
236 |
+
"rewards/rejected": -0.05546332150697708,
|
237 |
+
"step": 120
|
238 |
+
},
|
239 |
+
{
|
240 |
+
"epoch": 1.95,
|
241 |
+
"eval_logits/chosen": -1.80086350440979,
|
242 |
+
"eval_logits/rejected": -1.8260576725006104,
|
243 |
+
"eval_logps/chosen": -50.32191848754883,
|
244 |
+
"eval_logps/rejected": -98.02101135253906,
|
245 |
+
"eval_loss": 0.49604225158691406,
|
246 |
+
"eval_rewards/accuracies": 0.4642857015132904,
|
247 |
+
"eval_rewards/chosen": 0.5751391053199768,
|
248 |
+
"eval_rewards/margins": 0.6410244107246399,
|
249 |
+
"eval_rewards/rejected": -0.0658852607011795,
|
250 |
+
"eval_runtime": 8.1445,
|
251 |
+
"eval_samples_per_second": 3.438,
|
252 |
+
"eval_steps_per_second": 1.719,
|
253 |
+
"step": 120
|
254 |
+
},
|
255 |
+
{
|
256 |
+
"epoch": 2.11,
|
257 |
+
"grad_norm": 1.4047716856002808,
|
258 |
+
"learning_rate": 3.338028169014085e-06,
|
259 |
+
"logits/chosen": -1.8776130676269531,
|
260 |
+
"logits/rejected": -1.8995519876480103,
|
261 |
+
"logps/chosen": -22.69371795654297,
|
262 |
+
"logps/rejected": -53.5282096862793,
|
263 |
+
"loss": 0.5611,
|
264 |
+
"rewards/accuracies": 0.25,
|
265 |
+
"rewards/chosen": 0.35938918590545654,
|
266 |
+
"rewards/margins": 0.5045264959335327,
|
267 |
+
"rewards/rejected": -0.14513733983039856,
|
268 |
+
"step": 130
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 2.28,
|
272 |
+
"grad_norm": 0.7528722882270813,
|
273 |
+
"learning_rate": 3.1971830985915496e-06,
|
274 |
+
"logits/chosen": -1.8126357793807983,
|
275 |
+
"logits/rejected": -1.832371711730957,
|
276 |
+
"logps/chosen": -38.33379364013672,
|
277 |
+
"logps/rejected": -67.96979522705078,
|
278 |
+
"loss": 0.5142,
|
279 |
+
"rewards/accuracies": 0.375,
|
280 |
+
"rewards/chosen": 0.5568062663078308,
|
281 |
+
"rewards/margins": 0.6818712949752808,
|
282 |
+
"rewards/rejected": -0.12506499886512756,
|
283 |
+
"step": 140
|
284 |
+
},
|
285 |
+
{
|
286 |
+
"epoch": 2.44,
|
287 |
+
"grad_norm": 3.405579090118408,
|
288 |
+
"learning_rate": 3.056338028169014e-06,
|
289 |
+
"logits/chosen": -1.8196109533309937,
|
290 |
+
"logits/rejected": -1.8556429147720337,
|
291 |
+
"logps/chosen": -36.78864669799805,
|
292 |
+
"logps/rejected": -83.05890655517578,
|
293 |
+
"loss": 0.5042,
|
294 |
+
"rewards/accuracies": 0.4000000059604645,
|
295 |
+
"rewards/chosen": 0.542107105255127,
|
296 |
+
"rewards/margins": 0.6411095857620239,
|
297 |
+
"rewards/rejected": -0.09900249540805817,
|
298 |
+
"step": 150
|
299 |
+
},
|
300 |
+
{
|
301 |
+
"epoch": 2.44,
|
302 |
+
"eval_logits/chosen": -1.805869698524475,
|
303 |
+
"eval_logits/rejected": -1.8330577611923218,
|
304 |
+
"eval_logps/chosen": -49.10601043701172,
|
305 |
+
"eval_logps/rejected": -98.84068298339844,
|
306 |
+
"eval_loss": 0.4709201455116272,
|
307 |
+
"eval_rewards/accuracies": 0.4642857015132904,
|
308 |
+
"eval_rewards/chosen": 0.6967297196388245,
|
309 |
+
"eval_rewards/margins": 0.8445812463760376,
|
310 |
+
"eval_rewards/rejected": -0.1478516012430191,
|
311 |
+
"eval_runtime": 8.1382,
|
312 |
+
"eval_samples_per_second": 3.441,
|
313 |
+
"eval_steps_per_second": 1.72,
|
314 |
+
"step": 150
|
315 |
+
},
|
316 |
+
{
|
317 |
+
"epoch": 2.6,
|
318 |
+
"grad_norm": 7.778740882873535,
|
319 |
+
"learning_rate": 2.915492957746479e-06,
|
320 |
+
"logits/chosen": -1.848589301109314,
|
321 |
+
"logits/rejected": -1.8790754079818726,
|
322 |
+
"logps/chosen": -36.49171447753906,
|
323 |
+
"logps/rejected": -72.55968475341797,
|
324 |
+
"loss": 0.4927,
|
325 |
+
"rewards/accuracies": 0.44999998807907104,
|
326 |
+
"rewards/chosen": 0.49555450677871704,
|
327 |
+
"rewards/margins": 0.6891830563545227,
|
328 |
+
"rewards/rejected": -0.1936284601688385,
|
329 |
+
"step": 160
|
330 |
+
},
|
331 |
+
{
|
332 |
+
"epoch": 2.76,
|
333 |
+
"grad_norm": 4.058627605438232,
|
334 |
+
"learning_rate": 2.774647887323944e-06,
|
335 |
+
"logits/chosen": -1.812421441078186,
|
336 |
+
"logits/rejected": -1.8415311574935913,
|
337 |
+
"logps/chosen": -45.62999725341797,
|
338 |
+
"logps/rejected": -87.85527038574219,
|
339 |
+
"loss": 0.4541,
|
340 |
+
"rewards/accuracies": 0.4749999940395355,
|
341 |
+
"rewards/chosen": 0.7084562182426453,
|
342 |
+
"rewards/margins": 0.9553689956665039,
|
343 |
+
"rewards/rejected": -0.24691279232501984,
|
344 |
+
"step": 170
|
345 |
+
},
|
346 |
+
{
|
347 |
+
"epoch": 2.93,
|
348 |
+
"grad_norm": 0.0,
|
349 |
+
"learning_rate": 2.6338028169014084e-06,
|
350 |
+
"logits/chosen": -1.8475942611694336,
|
351 |
+
"logits/rejected": -1.8678725957870483,
|
352 |
+
"logps/chosen": -40.53328323364258,
|
353 |
+
"logps/rejected": -64.86616516113281,
|
354 |
+
"loss": 0.5087,
|
355 |
+
"rewards/accuracies": 0.375,
|
356 |
+
"rewards/chosen": 0.5022943019866943,
|
357 |
+
"rewards/margins": 0.7252141833305359,
|
358 |
+
"rewards/rejected": -0.22291991114616394,
|
359 |
+
"step": 180
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 2.93,
|
363 |
+
"eval_logits/chosen": -1.8136398792266846,
|
364 |
+
"eval_logits/rejected": -1.8424787521362305,
|
365 |
+
"eval_logps/chosen": -48.19547653198242,
|
366 |
+
"eval_logps/rejected": -99.7900161743164,
|
367 |
+
"eval_loss": 0.4541548192501068,
|
368 |
+
"eval_rewards/accuracies": 0.4642857015132904,
|
369 |
+
"eval_rewards/chosen": 0.7877826690673828,
|
370 |
+
"eval_rewards/margins": 1.0305674076080322,
|
371 |
+
"eval_rewards/rejected": -0.24278469383716583,
|
372 |
+
"eval_runtime": 8.1397,
|
373 |
+
"eval_samples_per_second": 3.44,
|
374 |
+
"eval_steps_per_second": 1.72,
|
375 |
+
"step": 180
|
376 |
+
},
|
377 |
+
{
|
378 |
+
"epoch": 3.09,
|
379 |
+
"grad_norm": 2.1618106365203857,
|
380 |
+
"learning_rate": 2.4929577464788734e-06,
|
381 |
+
"logits/chosen": -1.876151442527771,
|
382 |
+
"logits/rejected": -1.9132931232452393,
|
383 |
+
"logps/chosen": -38.02617645263672,
|
384 |
+
"logps/rejected": -84.4028549194336,
|
385 |
+
"loss": 0.4372,
|
386 |
+
"rewards/accuracies": 0.44999998807907104,
|
387 |
+
"rewards/chosen": 0.723468005657196,
|
388 |
+
"rewards/margins": 1.1590527296066284,
|
389 |
+
"rewards/rejected": -0.43558478355407715,
|
390 |
+
"step": 190
|
391 |
+
},
|
392 |
+
{
|
393 |
+
"epoch": 3.25,
|
394 |
+
"grad_norm": 0.0,
|
395 |
+
"learning_rate": 2.352112676056338e-06,
|
396 |
+
"logits/chosen": -1.8977773189544678,
|
397 |
+
"logits/rejected": -1.9120800495147705,
|
398 |
+
"logps/chosen": -38.097923278808594,
|
399 |
+
"logps/rejected": -55.17757034301758,
|
400 |
+
"loss": 0.4778,
|
401 |
+
"rewards/accuracies": 0.375,
|
402 |
+
"rewards/chosen": 0.5927585959434509,
|
403 |
+
"rewards/margins": 0.8746024370193481,
|
404 |
+
"rewards/rejected": -0.2818438410758972,
|
405 |
+
"step": 200
|
406 |
+
},
|
407 |
+
{
|
408 |
+
"epoch": 3.41,
|
409 |
+
"grad_norm": 7.095726013183594,
|
410 |
+
"learning_rate": 2.211267605633803e-06,
|
411 |
+
"logits/chosen": -1.8508259057998657,
|
412 |
+
"logits/rejected": -1.8876402378082275,
|
413 |
+
"logps/chosen": -33.23273468017578,
|
414 |
+
"logps/rejected": -79.0272445678711,
|
415 |
+
"loss": 0.4874,
|
416 |
+
"rewards/accuracies": 0.375,
|
417 |
+
"rewards/chosen": 0.5798195600509644,
|
418 |
+
"rewards/margins": 0.9200228452682495,
|
419 |
+
"rewards/rejected": -0.34020328521728516,
|
420 |
+
"step": 210
|
421 |
+
},
|
422 |
+
{
|
423 |
+
"epoch": 3.41,
|
424 |
+
"eval_logits/chosen": -1.821912407875061,
|
425 |
+
"eval_logits/rejected": -1.8520457744598389,
|
426 |
+
"eval_logps/chosen": -47.6314697265625,
|
427 |
+
"eval_logps/rejected": -100.92195129394531,
|
428 |
+
"eval_loss": 0.4427572786808014,
|
429 |
+
"eval_rewards/accuracies": 0.4642857015132904,
|
430 |
+
"eval_rewards/chosen": 0.8441829681396484,
|
431 |
+
"eval_rewards/margins": 1.2001608610153198,
|
432 |
+
"eval_rewards/rejected": -0.35597795248031616,
|
433 |
+
"eval_runtime": 8.1451,
|
434 |
+
"eval_samples_per_second": 3.438,
|
435 |
+
"eval_steps_per_second": 1.719,
|
436 |
+
"step": 210
|
437 |
+
},
|
438 |
+
{
|
439 |
+
"epoch": 3.58,
|
440 |
+
"grad_norm": 4.825575351715088,
|
441 |
+
"learning_rate": 2.0704225352112676e-06,
|
442 |
+
"logits/chosen": -1.889478325843811,
|
443 |
+
"logits/rejected": -1.9106714725494385,
|
444 |
+
"logps/chosen": -30.769512176513672,
|
445 |
+
"logps/rejected": -68.92756652832031,
|
446 |
+
"loss": 0.5277,
|
447 |
+
"rewards/accuracies": 0.32499998807907104,
|
448 |
+
"rewards/chosen": 0.379320353269577,
|
449 |
+
"rewards/margins": 0.6602964401245117,
|
450 |
+
"rewards/rejected": -0.28097596764564514,
|
451 |
+
"step": 220
|
452 |
+
},
|
453 |
+
{
|
454 |
+
"epoch": 3.74,
|
455 |
+
"grad_norm": 5.236915588378906,
|
456 |
+
"learning_rate": 1.9295774647887326e-06,
|
457 |
+
"logits/chosen": -1.8926284313201904,
|
458 |
+
"logits/rejected": -1.9087079763412476,
|
459 |
+
"logps/chosen": -36.48774719238281,
|
460 |
+
"logps/rejected": -59.29833221435547,
|
461 |
+
"loss": 0.5176,
|
462 |
+
"rewards/accuracies": 0.32499998807907104,
|
463 |
+
"rewards/chosen": 0.6325365304946899,
|
464 |
+
"rewards/margins": 0.867927074432373,
|
465 |
+
"rewards/rejected": -0.2353905737400055,
|
466 |
+
"step": 230
|
467 |
+
},
|
468 |
+
{
|
469 |
+
"epoch": 3.9,
|
470 |
+
"grad_norm": 1.3737443685531616,
|
471 |
+
"learning_rate": 1.7887323943661974e-06,
|
472 |
+
"logits/chosen": -1.7782018184661865,
|
473 |
+
"logits/rejected": -1.8105701208114624,
|
474 |
+
"logps/chosen": -41.42538833618164,
|
475 |
+
"logps/rejected": -93.73129272460938,
|
476 |
+
"loss": 0.4229,
|
477 |
+
"rewards/accuracies": 0.4749999940395355,
|
478 |
+
"rewards/chosen": 0.8450711369514465,
|
479 |
+
"rewards/margins": 1.3813583850860596,
|
480 |
+
"rewards/rejected": -0.5362871885299683,
|
481 |
+
"step": 240
|
482 |
+
},
|
483 |
+
{
|
484 |
+
"epoch": 3.9,
|
485 |
+
"eval_logits/chosen": -1.826602816581726,
|
486 |
+
"eval_logits/rejected": -1.8575078248977661,
|
487 |
+
"eval_logps/chosen": -47.322914123535156,
|
488 |
+
"eval_logps/rejected": -101.7520980834961,
|
489 |
+
"eval_loss": 0.4358247220516205,
|
490 |
+
"eval_rewards/accuracies": 0.4642857015132904,
|
491 |
+
"eval_rewards/chosen": 0.8750395178794861,
|
492 |
+
"eval_rewards/margins": 1.3140336275100708,
|
493 |
+
"eval_rewards/rejected": -0.4389941692352295,
|
494 |
+
"eval_runtime": 8.1403,
|
495 |
+
"eval_samples_per_second": 3.44,
|
496 |
+
"eval_steps_per_second": 1.72,
|
497 |
+
"step": 240
|
498 |
+
},
|
499 |
+
{
|
500 |
+
"epoch": 4.07,
|
501 |
+
"grad_norm": 1.977386713027954,
|
502 |
+
"learning_rate": 1.647887323943662e-06,
|
503 |
+
"logits/chosen": -1.875792145729065,
|
504 |
+
"logits/rejected": -1.8937476873397827,
|
505 |
+
"logps/chosen": -25.06104278564453,
|
506 |
+
"logps/rejected": -46.700584411621094,
|
507 |
+
"loss": 0.5274,
|
508 |
+
"rewards/accuracies": 0.30000001192092896,
|
509 |
+
"rewards/chosen": 0.42553478479385376,
|
510 |
+
"rewards/margins": 0.7891250252723694,
|
511 |
+
"rewards/rejected": -0.3635903000831604,
|
512 |
+
"step": 250
|
513 |
+
},
|
514 |
+
{
|
515 |
+
"epoch": 4.23,
|
516 |
+
"grad_norm": 3.320791244506836,
|
517 |
+
"learning_rate": 1.5070422535211269e-06,
|
518 |
+
"logits/chosen": -1.7908179759979248,
|
519 |
+
"logits/rejected": -1.8309694528579712,
|
520 |
+
"logps/chosen": -54.056663513183594,
|
521 |
+
"logps/rejected": -108.03240966796875,
|
522 |
+
"loss": 0.3569,
|
523 |
+
"rewards/accuracies": 0.574999988079071,
|
524 |
+
"rewards/chosen": 1.07839035987854,
|
525 |
+
"rewards/margins": 1.643531084060669,
|
526 |
+
"rewards/rejected": -0.5651407837867737,
|
527 |
+
"step": 260
|
528 |
+
},
|
529 |
+
{
|
530 |
+
"epoch": 4.39,
|
531 |
+
"grad_norm": 4.999856948852539,
|
532 |
+
"learning_rate": 1.3661971830985919e-06,
|
533 |
+
"logits/chosen": -1.9177863597869873,
|
534 |
+
"logits/rejected": -1.9476194381713867,
|
535 |
+
"logps/chosen": -22.58294105529785,
|
536 |
+
"logps/rejected": -61.99756622314453,
|
537 |
+
"loss": 0.5295,
|
538 |
+
"rewards/accuracies": 0.2750000059604645,
|
539 |
+
"rewards/chosen": 0.5058903694152832,
|
540 |
+
"rewards/margins": 0.8186748623847961,
|
541 |
+
"rewards/rejected": -0.31278449296951294,
|
542 |
+
"step": 270
|
543 |
+
},
|
544 |
+
{
|
545 |
+
"epoch": 4.39,
|
546 |
+
"eval_logits/chosen": -1.8289211988449097,
|
547 |
+
"eval_logits/rejected": -1.860676646232605,
|
548 |
+
"eval_logps/chosen": -47.04714584350586,
|
549 |
+
"eval_logps/rejected": -102.3218994140625,
|
550 |
+
"eval_loss": 0.43130752444267273,
|
551 |
+
"eval_rewards/accuracies": 0.4642857015132904,
|
552 |
+
"eval_rewards/chosen": 0.9026166200637817,
|
553 |
+
"eval_rewards/margins": 1.3985893726348877,
|
554 |
+
"eval_rewards/rejected": -0.4959728717803955,
|
555 |
+
"eval_runtime": 8.1397,
|
556 |
+
"eval_samples_per_second": 3.44,
|
557 |
+
"eval_steps_per_second": 1.72,
|
558 |
+
"step": 270
|
559 |
+
},
|
560 |
+
{
|
561 |
+
"epoch": 4.55,
|
562 |
+
"grad_norm": 0.36910170316696167,
|
563 |
+
"learning_rate": 1.2253521126760565e-06,
|
564 |
+
"logits/chosen": -1.881696105003357,
|
565 |
+
"logits/rejected": -1.9073266983032227,
|
566 |
+
"logps/chosen": -33.28648376464844,
|
567 |
+
"logps/rejected": -68.246337890625,
|
568 |
+
"loss": 0.4962,
|
569 |
+
"rewards/accuracies": 0.32499998807907104,
|
570 |
+
"rewards/chosen": 0.5577932000160217,
|
571 |
+
"rewards/margins": 1.0508588552474976,
|
572 |
+
"rewards/rejected": -0.4930656850337982,
|
573 |
+
"step": 280
|
574 |
+
},
|
575 |
+
{
|
576 |
+
"epoch": 4.72,
|
577 |
+
"grad_norm": 11.332355499267578,
|
578 |
+
"learning_rate": 1.084507042253521e-06,
|
579 |
+
"logits/chosen": -1.8866857290267944,
|
580 |
+
"logits/rejected": -1.900857925415039,
|
581 |
+
"logps/chosen": -40.804874420166016,
|
582 |
+
"logps/rejected": -71.67508697509766,
|
583 |
+
"loss": 0.4851,
|
584 |
+
"rewards/accuracies": 0.4000000059604645,
|
585 |
+
"rewards/chosen": 0.7005030512809753,
|
586 |
+
"rewards/margins": 1.2071340084075928,
|
587 |
+
"rewards/rejected": -0.5066308379173279,
|
588 |
+
"step": 290
|
589 |
+
},
|
590 |
+
{
|
591 |
+
"epoch": 4.88,
|
592 |
+
"grad_norm": 3.657494306564331,
|
593 |
+
"learning_rate": 9.43661971830986e-07,
|
594 |
+
"logits/chosen": -1.9023106098175049,
|
595 |
+
"logits/rejected": -1.9253908395767212,
|
596 |
+
"logps/chosen": -18.57657814025879,
|
597 |
+
"logps/rejected": -53.88740158081055,
|
598 |
+
"loss": 0.5466,
|
599 |
+
"rewards/accuracies": 0.25,
|
600 |
+
"rewards/chosen": 0.3226935565471649,
|
601 |
+
"rewards/margins": 0.6567031145095825,
|
602 |
+
"rewards/rejected": -0.33400958776474,
|
603 |
+
"step": 300
|
604 |
+
},
|
605 |
+
{
|
606 |
+
"epoch": 4.88,
|
607 |
+
"eval_logits/chosen": -1.8308794498443604,
|
608 |
+
"eval_logits/rejected": -1.8629435300827026,
|
609 |
+
"eval_logps/chosen": -46.95443344116211,
|
610 |
+
"eval_logps/rejected": -102.74605560302734,
|
611 |
+
"eval_loss": 0.4291366934776306,
|
612 |
+
"eval_rewards/accuracies": 0.4642857015132904,
|
613 |
+
"eval_rewards/chosen": 0.9118875861167908,
|
614 |
+
"eval_rewards/margins": 1.4502772092819214,
|
615 |
+
"eval_rewards/rejected": -0.5383896827697754,
|
616 |
+
"eval_runtime": 8.1441,
|
617 |
+
"eval_samples_per_second": 3.438,
|
618 |
+
"eval_steps_per_second": 1.719,
|
619 |
+
"step": 300
|
620 |
+
},
|
621 |
+
{
|
622 |
+
"epoch": 5.04,
|
623 |
+
"grad_norm": 4.444954872131348,
|
624 |
+
"learning_rate": 8.028169014084508e-07,
|
625 |
+
"logits/chosen": -1.835021734237671,
|
626 |
+
"logits/rejected": -1.858599066734314,
|
627 |
+
"logps/chosen": -42.14970016479492,
|
628 |
+
"logps/rejected": -86.938720703125,
|
629 |
+
"loss": 0.4128,
|
630 |
+
"rewards/accuracies": 0.4749999940395355,
|
631 |
+
"rewards/chosen": 0.8941621780395508,
|
632 |
+
"rewards/margins": 1.572546362876892,
|
633 |
+
"rewards/rejected": -0.6783844232559204,
|
634 |
+
"step": 310
|
635 |
+
},
|
636 |
+
{
|
637 |
+
"epoch": 5.2,
|
638 |
+
"grad_norm": 0.4182775616645813,
|
639 |
+
"learning_rate": 6.619718309859155e-07,
|
640 |
+
"logits/chosen": -1.8859401941299438,
|
641 |
+
"logits/rejected": -1.910548448562622,
|
642 |
+
"logps/chosen": -34.28424835205078,
|
643 |
+
"logps/rejected": -77.3191146850586,
|
644 |
+
"loss": 0.4465,
|
645 |
+
"rewards/accuracies": 0.4000000059604645,
|
646 |
+
"rewards/chosen": 0.739007294178009,
|
647 |
+
"rewards/margins": 1.3678598403930664,
|
648 |
+
"rewards/rejected": -0.6288524866104126,
|
649 |
+
"step": 320
|
650 |
+
},
|
651 |
+
{
|
652 |
+
"epoch": 5.37,
|
653 |
+
"grad_norm": 2.8709957859973656e-06,
|
654 |
+
"learning_rate": 5.211267605633803e-07,
|
655 |
+
"logits/chosen": -1.7752397060394287,
|
656 |
+
"logits/rejected": -1.8195409774780273,
|
657 |
+
"logps/chosen": -42.48664855957031,
|
658 |
+
"logps/rejected": -97.59371185302734,
|
659 |
+
"loss": 0.4339,
|
660 |
+
"rewards/accuracies": 0.5,
|
661 |
+
"rewards/chosen": 0.8835798501968384,
|
662 |
+
"rewards/margins": 1.3967663049697876,
|
663 |
+
"rewards/rejected": -0.513186514377594,
|
664 |
+
"step": 330
|
665 |
+
},
|
666 |
+
{
|
667 |
+
"epoch": 5.37,
|
668 |
+
"eval_logits/chosen": -1.8319826126098633,
|
669 |
+
"eval_logits/rejected": -1.864353895187378,
|
670 |
+
"eval_logps/chosen": -46.921607971191406,
|
671 |
+
"eval_logps/rejected": -103.26231384277344,
|
672 |
+
"eval_loss": 0.42683711647987366,
|
673 |
+
"eval_rewards/accuracies": 0.4642857015132904,
|
674 |
+
"eval_rewards/chosen": 0.9151698350906372,
|
675 |
+
"eval_rewards/margins": 1.505185842514038,
|
676 |
+
"eval_rewards/rejected": -0.5900159478187561,
|
677 |
+
"eval_runtime": 8.1406,
|
678 |
+
"eval_samples_per_second": 3.44,
|
679 |
+
"eval_steps_per_second": 1.72,
|
680 |
+
"step": 330
|
681 |
+
},
|
682 |
+
{
|
683 |
+
"epoch": 5.53,
|
684 |
+
"grad_norm": 0.23427560925483704,
|
685 |
+
"learning_rate": 3.8028169014084507e-07,
|
686 |
+
"logits/chosen": -1.8918192386627197,
|
687 |
+
"logits/rejected": -1.9169318675994873,
|
688 |
+
"logps/chosen": -35.16785430908203,
|
689 |
+
"logps/rejected": -71.60049438476562,
|
690 |
+
"loss": 0.4617,
|
691 |
+
"rewards/accuracies": 0.375,
|
692 |
+
"rewards/chosen": 0.5324742197990417,
|
693 |
+
"rewards/margins": 1.1221383810043335,
|
694 |
+
"rewards/rejected": -0.5896641612052917,
|
695 |
+
"step": 340
|
696 |
+
},
|
697 |
+
{
|
698 |
+
"epoch": 5.69,
|
699 |
+
"grad_norm": 0.0,
|
700 |
+
"learning_rate": 2.394366197183099e-07,
|
701 |
+
"logits/chosen": -1.857642412185669,
|
702 |
+
"logits/rejected": -1.888279676437378,
|
703 |
+
"logps/chosen": -37.31398010253906,
|
704 |
+
"logps/rejected": -90.64387512207031,
|
705 |
+
"loss": 0.4569,
|
706 |
+
"rewards/accuracies": 0.4000000059604645,
|
707 |
+
"rewards/chosen": 0.7042752504348755,
|
708 |
+
"rewards/margins": 1.4055907726287842,
|
709 |
+
"rewards/rejected": -0.7013154625892639,
|
710 |
+
"step": 350
|
711 |
+
},
|
712 |
+
{
|
713 |
+
"epoch": 5.85,
|
714 |
+
"grad_norm": 11.415884017944336,
|
715 |
+
"learning_rate": 9.859154929577466e-08,
|
716 |
+
"logits/chosen": -1.9216959476470947,
|
717 |
+
"logits/rejected": -1.9308369159698486,
|
718 |
+
"logps/chosen": -25.689884185791016,
|
719 |
+
"logps/rejected": -36.790706634521484,
|
720 |
+
"loss": 0.5438,
|
721 |
+
"rewards/accuracies": 0.25,
|
722 |
+
"rewards/chosen": 0.4890199303627014,
|
723 |
+
"rewards/margins": 0.789040207862854,
|
724 |
+
"rewards/rejected": -0.3000202775001526,
|
725 |
+
"step": 360
|
726 |
+
},
|
727 |
+
{
|
728 |
+
"epoch": 5.85,
|
729 |
+
"eval_logits/chosen": -1.832722544670105,
|
730 |
+
"eval_logits/rejected": -1.8652076721191406,
|
731 |
+
"eval_logps/chosen": -46.90084457397461,
|
732 |
+
"eval_logps/rejected": -103.44039154052734,
|
733 |
+
"eval_loss": 0.42603132128715515,
|
734 |
+
"eval_rewards/accuracies": 0.4642857015132904,
|
735 |
+
"eval_rewards/chosen": 0.9172464609146118,
|
736 |
+
"eval_rewards/margins": 1.5250685214996338,
|
737 |
+
"eval_rewards/rejected": -0.6078222990036011,
|
738 |
+
"eval_runtime": 8.1439,
|
739 |
+
"eval_samples_per_second": 3.438,
|
740 |
+
"eval_steps_per_second": 1.719,
|
741 |
+
"step": 360
|
742 |
+
}
|
743 |
+
],
|
744 |
+
"logging_steps": 10,
|
745 |
+
"max_steps": 366,
|
746 |
+
"num_input_tokens_seen": 0,
|
747 |
+
"num_train_epochs": 6,
|
748 |
+
"save_steps": 90,
|
749 |
+
"total_flos": 0.0,
|
750 |
+
"train_batch_size": 2,
|
751 |
+
"trial_name": null,
|
752 |
+
"trial_params": null
|
753 |
+
}
|
checkpoint-360/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:de569ad7b35da22832fd1fc395ab1c110ac622b038bdfcb0eee757cdc5b4b97b
|
3 |
+
size 5304
|
checkpoint-90/README.md
ADDED
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: peft
|
3 |
+
base_model: Weni/WeniGPT-Agents-Mistral-1.0.6-SFT-merged
|
4 |
+
---
|
5 |
+
|
6 |
+
# Model Card for Model ID
|
7 |
+
|
8 |
+
<!-- Provide a quick summary of what the model is/does. -->
|
9 |
+
|
10 |
+
|
11 |
+
|
12 |
+
## Model Details
|
13 |
+
|
14 |
+
### Model Description
|
15 |
+
|
16 |
+
<!-- Provide a longer summary of what this model is. -->
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
- **Developed by:** [More Information Needed]
|
21 |
+
- **Funded by [optional]:** [More Information Needed]
|
22 |
+
- **Shared by [optional]:** [More Information Needed]
|
23 |
+
- **Model type:** [More Information Needed]
|
24 |
+
- **Language(s) (NLP):** [More Information Needed]
|
25 |
+
- **License:** [More Information Needed]
|
26 |
+
- **Finetuned from model [optional]:** [More Information Needed]
|
27 |
+
|
28 |
+
### Model Sources [optional]
|
29 |
+
|
30 |
+
<!-- Provide the basic links for the model. -->
|
31 |
+
|
32 |
+
- **Repository:** [More Information Needed]
|
33 |
+
- **Paper [optional]:** [More Information Needed]
|
34 |
+
- **Demo [optional]:** [More Information Needed]
|
35 |
+
|
36 |
+
## Uses
|
37 |
+
|
38 |
+
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
|
39 |
+
|
40 |
+
### Direct Use
|
41 |
+
|
42 |
+
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
|
43 |
+
|
44 |
+
[More Information Needed]
|
45 |
+
|
46 |
+
### Downstream Use [optional]
|
47 |
+
|
48 |
+
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
|
49 |
+
|
50 |
+
[More Information Needed]
|
51 |
+
|
52 |
+
### Out-of-Scope Use
|
53 |
+
|
54 |
+
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
|
55 |
+
|
56 |
+
[More Information Needed]
|
57 |
+
|
58 |
+
## Bias, Risks, and Limitations
|
59 |
+
|
60 |
+
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|
61 |
+
|
62 |
+
[More Information Needed]
|
63 |
+
|
64 |
+
### Recommendations
|
65 |
+
|
66 |
+
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
|
67 |
+
|
68 |
+
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
|
69 |
+
|
70 |
+
## How to Get Started with the Model
|
71 |
+
|
72 |
+
Use the code below to get started with the model.
|
73 |
+
|
74 |
+
[More Information Needed]
|
75 |
+
|
76 |
+
## Training Details
|
77 |
+
|
78 |
+
### Training Data
|
79 |
+
|
80 |
+
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
|
81 |
+
|
82 |
+
[More Information Needed]
|
83 |
+
|
84 |
+
### Training Procedure
|
85 |
+
|
86 |
+
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
|
87 |
+
|
88 |
+
#### Preprocessing [optional]
|
89 |
+
|
90 |
+
[More Information Needed]
|
91 |
+
|
92 |
+
|
93 |
+
#### Training Hyperparameters
|
94 |
+
|
95 |
+
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
|
96 |
+
|
97 |
+
#### Speeds, Sizes, Times [optional]
|
98 |
+
|
99 |
+
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
|
100 |
+
|
101 |
+
[More Information Needed]
|
102 |
+
|
103 |
+
## Evaluation
|
104 |
+
|
105 |
+
<!-- This section describes the evaluation protocols and provides the results. -->
|
106 |
+
|
107 |
+
### Testing Data, Factors & Metrics
|
108 |
+
|
109 |
+
#### Testing Data
|
110 |
+
|
111 |
+
<!-- This should link to a Dataset Card if possible. -->
|
112 |
+
|
113 |
+
[More Information Needed]
|
114 |
+
|
115 |
+
#### Factors
|
116 |
+
|
117 |
+
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
|
118 |
+
|
119 |
+
[More Information Needed]
|
120 |
+
|
121 |
+
#### Metrics
|
122 |
+
|
123 |
+
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
|
124 |
+
|
125 |
+
[More Information Needed]
|
126 |
+
|
127 |
+
### Results
|
128 |
+
|
129 |
+
[More Information Needed]
|
130 |
+
|
131 |
+
#### Summary
|
132 |
+
|
133 |
+
|
134 |
+
|
135 |
+
## Model Examination [optional]
|
136 |
+
|
137 |
+
<!-- Relevant interpretability work for the model goes here -->
|
138 |
+
|
139 |
+
[More Information Needed]
|
140 |
+
|
141 |
+
## Environmental Impact
|
142 |
+
|
143 |
+
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
|
144 |
+
|
145 |
+
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
|
146 |
+
|
147 |
+
- **Hardware Type:** [More Information Needed]
|
148 |
+
- **Hours used:** [More Information Needed]
|
149 |
+
- **Cloud Provider:** [More Information Needed]
|
150 |
+
- **Compute Region:** [More Information Needed]
|
151 |
+
- **Carbon Emitted:** [More Information Needed]
|
152 |
+
|
153 |
+
## Technical Specifications [optional]
|
154 |
+
|
155 |
+
### Model Architecture and Objective
|
156 |
+
|
157 |
+
[More Information Needed]
|
158 |
+
|
159 |
+
### Compute Infrastructure
|
160 |
+
|
161 |
+
[More Information Needed]
|
162 |
+
|
163 |
+
#### Hardware
|
164 |
+
|
165 |
+
[More Information Needed]
|
166 |
+
|
167 |
+
#### Software
|
168 |
+
|
169 |
+
[More Information Needed]
|
170 |
+
|
171 |
+
## Citation [optional]
|
172 |
+
|
173 |
+
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
|
174 |
+
|
175 |
+
**BibTeX:**
|
176 |
+
|
177 |
+
[More Information Needed]
|
178 |
+
|
179 |
+
**APA:**
|
180 |
+
|
181 |
+
[More Information Needed]
|
182 |
+
|
183 |
+
## Glossary [optional]
|
184 |
+
|
185 |
+
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
|
186 |
+
|
187 |
+
[More Information Needed]
|
188 |
+
|
189 |
+
## More Information [optional]
|
190 |
+
|
191 |
+
[More Information Needed]
|
192 |
+
|
193 |
+
## Model Card Authors [optional]
|
194 |
+
|
195 |
+
[More Information Needed]
|
196 |
+
|
197 |
+
## Model Card Contact
|
198 |
+
|
199 |
+
[More Information Needed]
|
200 |
+
### Framework versions
|
201 |
+
|
202 |
+
- PEFT 0.10.0
|
checkpoint-90/adapter_config.json
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "Weni/WeniGPT-Agents-Mistral-1.0.6-SFT-merged",
|
5 |
+
"bias": "none",
|
6 |
+
"fan_in_fan_out": false,
|
7 |
+
"inference_mode": true,
|
8 |
+
"init_lora_weights": true,
|
9 |
+
"layer_replication": null,
|
10 |
+
"layers_pattern": null,
|
11 |
+
"layers_to_transform": null,
|
12 |
+
"loftq_config": {},
|
13 |
+
"lora_alpha": 16,
|
14 |
+
"lora_dropout": 0.05,
|
15 |
+
"megatron_config": null,
|
16 |
+
"megatron_core": "megatron.core",
|
17 |
+
"modules_to_save": null,
|
18 |
+
"peft_type": "LORA",
|
19 |
+
"r": 8,
|
20 |
+
"rank_pattern": {},
|
21 |
+
"revision": null,
|
22 |
+
"target_modules": [
|
23 |
+
"v_proj",
|
24 |
+
"q_proj"
|
25 |
+
],
|
26 |
+
"task_type": "CAUSAL_LM",
|
27 |
+
"use_dora": false,
|
28 |
+
"use_rslora": false
|
29 |
+
}
|
checkpoint-90/adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc1fb6f11ff592b65215364276acd01c77abf99204619c630325cab8c9b35c14
|
3 |
+
size 13648432
|
checkpoint-90/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b2425ba8d1c50c4c0d9e8e6feff1c707824fe534b70de713352a9d5a204adff
|
3 |
+
size 27370618
|
checkpoint-90/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b3ee827a7a00012c0a116546df467feee35e70376d81a7a85b1a70eb90414d3
|
3 |
+
size 14244
|
checkpoint-90/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d85fbb3ecde61d79df3a528f5e5b18350d9bf186a9590415f6ca273953853e9d
|
3 |
+
size 1064
|
checkpoint-90/special_tokens_map.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "</s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": "<unk>",
|
17 |
+
"unk_token": {
|
18 |
+
"content": "<unk>",
|
19 |
+
"lstrip": false,
|
20 |
+
"normalized": false,
|
21 |
+
"rstrip": false,
|
22 |
+
"single_word": false
|
23 |
+
}
|
24 |
+
}
|
checkpoint-90/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-90/tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
|
3 |
+
size 493443
|
checkpoint-90/tokenizer_config.json
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": true,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"0": {
|
6 |
+
"content": "<unk>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false,
|
11 |
+
"special": true
|
12 |
+
},
|
13 |
+
"1": {
|
14 |
+
"content": "<s>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": false,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false,
|
19 |
+
"special": true
|
20 |
+
},
|
21 |
+
"2": {
|
22 |
+
"content": "</s>",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": false,
|
25 |
+
"rstrip": false,
|
26 |
+
"single_word": false,
|
27 |
+
"special": true
|
28 |
+
}
|
29 |
+
},
|
30 |
+
"additional_special_tokens": [],
|
31 |
+
"bos_token": "<s>",
|
32 |
+
"chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' + eos_token }}{% endif %}{% endfor %}",
|
33 |
+
"clean_up_tokenization_spaces": false,
|
34 |
+
"eos_token": "</s>",
|
35 |
+
"legacy": true,
|
36 |
+
"max_lenght": 8192,
|
37 |
+
"max_length": 8192,
|
38 |
+
"model_max_length": 1000000000000000019884624838656,
|
39 |
+
"pad_token": "<unk>",
|
40 |
+
"padding": true,
|
41 |
+
"sp_model_kwargs": {},
|
42 |
+
"spaces_between_special_tokens": false,
|
43 |
+
"stride": 0,
|
44 |
+
"tokenizer_class": "LlamaTokenizer",
|
45 |
+
"truncation_side": "right",
|
46 |
+
"truncation_strategy": "longest_first",
|
47 |
+
"unk_token": "<unk>",
|
48 |
+
"use_default_system_prompt": false
|
49 |
+
}
|
checkpoint-90/trainer_state.json
ADDED
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.529485821723938,
|
3 |
+
"best_model_checkpoint": "./mistral/20-04-24-Weni-WeniGPT-Agents-Mistral-1.0.6-SFT-1.0.5-DPO_Experiment on DPO with other hyperparameters and best SFT model of WeniGPT-2_max_steps-366_batch_4_2024-04-20_ppid_9/checkpoint-90",
|
4 |
+
"epoch": 1.4634146341463414,
|
5 |
+
"eval_steps": 30,
|
6 |
+
"global_step": 90,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.16,
|
13 |
+
"grad_norm": 8.378021240234375,
|
14 |
+
"learning_rate": 4.0909090909090915e-06,
|
15 |
+
"logits/chosen": -1.830958604812622,
|
16 |
+
"logits/rejected": -1.8507845401763916,
|
17 |
+
"logps/chosen": -28.701984405517578,
|
18 |
+
"logps/rejected": -54.28569793701172,
|
19 |
+
"loss": 0.6924,
|
20 |
+
"rewards/accuracies": 0.20000000298023224,
|
21 |
+
"rewards/chosen": 0.0008967495523393154,
|
22 |
+
"rewards/margins": 0.0014666033675894141,
|
23 |
+
"rewards/rejected": -0.0005698538152500987,
|
24 |
+
"step": 10
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"epoch": 0.33,
|
28 |
+
"grad_norm": 5.193418502807617,
|
29 |
+
"learning_rate": 4.887323943661972e-06,
|
30 |
+
"logits/chosen": -1.7550897598266602,
|
31 |
+
"logits/rejected": -1.770708680152893,
|
32 |
+
"logps/chosen": -47.344207763671875,
|
33 |
+
"logps/rejected": -64.0368423461914,
|
34 |
+
"loss": 0.6852,
|
35 |
+
"rewards/accuracies": 0.4000000059604645,
|
36 |
+
"rewards/chosen": 0.017231885343790054,
|
37 |
+
"rewards/margins": 0.01606021076440811,
|
38 |
+
"rewards/rejected": 0.0011716745793819427,
|
39 |
+
"step": 20
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"epoch": 0.49,
|
43 |
+
"grad_norm": 7.308932304382324,
|
44 |
+
"learning_rate": 4.746478873239437e-06,
|
45 |
+
"logits/chosen": -1.781267762184143,
|
46 |
+
"logits/rejected": -1.8114898204803467,
|
47 |
+
"logps/chosen": -54.274559020996094,
|
48 |
+
"logps/rejected": -95.20500183105469,
|
49 |
+
"loss": 0.6635,
|
50 |
+
"rewards/accuracies": 0.5,
|
51 |
+
"rewards/chosen": 0.0641159638762474,
|
52 |
+
"rewards/margins": 0.061691801995038986,
|
53 |
+
"rewards/rejected": 0.0024241588544100523,
|
54 |
+
"step": 30
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"epoch": 0.49,
|
58 |
+
"eval_logits/chosen": -1.7831767797470093,
|
59 |
+
"eval_logits/rejected": -1.8043663501739502,
|
60 |
+
"eval_logps/chosen": -55.16960906982422,
|
61 |
+
"eval_logps/rejected": -97.32585144042969,
|
62 |
+
"eval_loss": 0.6523757576942444,
|
63 |
+
"eval_rewards/accuracies": 0.4642857015132904,
|
64 |
+
"eval_rewards/chosen": 0.09036973863840103,
|
65 |
+
"eval_rewards/margins": 0.08673857897520065,
|
66 |
+
"eval_rewards/rejected": 0.0036311547737568617,
|
67 |
+
"eval_runtime": 8.141,
|
68 |
+
"eval_samples_per_second": 3.439,
|
69 |
+
"eval_steps_per_second": 1.72,
|
70 |
+
"step": 30
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"epoch": 0.65,
|
74 |
+
"grad_norm": 0.0,
|
75 |
+
"learning_rate": 4.6056338028169015e-06,
|
76 |
+
"logits/chosen": -1.889905333518982,
|
77 |
+
"logits/rejected": -1.9024461507797241,
|
78 |
+
"logps/chosen": -27.918941497802734,
|
79 |
+
"logps/rejected": -42.093284606933594,
|
80 |
+
"loss": 0.668,
|
81 |
+
"rewards/accuracies": 0.25,
|
82 |
+
"rewards/chosen": 0.054457180202007294,
|
83 |
+
"rewards/margins": 0.0539846234023571,
|
84 |
+
"rewards/rejected": 0.0004725646285805851,
|
85 |
+
"step": 40
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"epoch": 0.81,
|
89 |
+
"grad_norm": 8.53225326538086,
|
90 |
+
"learning_rate": 4.464788732394367e-06,
|
91 |
+
"logits/chosen": -1.8278567790985107,
|
92 |
+
"logits/rejected": -1.849957823753357,
|
93 |
+
"logps/chosen": -43.8238639831543,
|
94 |
+
"logps/rejected": -68.02179718017578,
|
95 |
+
"loss": 0.6358,
|
96 |
+
"rewards/accuracies": 0.3499999940395355,
|
97 |
+
"rewards/chosen": 0.13941256701946259,
|
98 |
+
"rewards/margins": 0.13133978843688965,
|
99 |
+
"rewards/rejected": 0.008072790689766407,
|
100 |
+
"step": 50
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.98,
|
104 |
+
"grad_norm": 9.436968803405762,
|
105 |
+
"learning_rate": 4.3239436619718315e-06,
|
106 |
+
"logits/chosen": -1.805991768836975,
|
107 |
+
"logits/rejected": -1.8437427282333374,
|
108 |
+
"logps/chosen": -43.8873291015625,
|
109 |
+
"logps/rejected": -95.2943115234375,
|
110 |
+
"loss": 0.6026,
|
111 |
+
"rewards/accuracies": 0.44999998807907104,
|
112 |
+
"rewards/chosen": 0.18793432414531708,
|
113 |
+
"rewards/margins": 0.21308371424674988,
|
114 |
+
"rewards/rejected": -0.025149401277303696,
|
115 |
+
"step": 60
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"epoch": 0.98,
|
119 |
+
"eval_logits/chosen": -1.7877694368362427,
|
120 |
+
"eval_logits/rejected": -1.8098936080932617,
|
121 |
+
"eval_logps/chosen": -53.567203521728516,
|
122 |
+
"eval_logps/rejected": -97.33795928955078,
|
123 |
+
"eval_loss": 0.5890871286392212,
|
124 |
+
"eval_rewards/accuracies": 0.4642857015132904,
|
125 |
+
"eval_rewards/chosen": 0.25061002373695374,
|
126 |
+
"eval_rewards/margins": 0.2481890469789505,
|
127 |
+
"eval_rewards/rejected": 0.002420984674245119,
|
128 |
+
"eval_runtime": 8.1404,
|
129 |
+
"eval_samples_per_second": 3.44,
|
130 |
+
"eval_steps_per_second": 1.72,
|
131 |
+
"step": 60
|
132 |
+
},
|
133 |
+
{
|
134 |
+
"epoch": 1.14,
|
135 |
+
"grad_norm": 0.0,
|
136 |
+
"learning_rate": 4.183098591549296e-06,
|
137 |
+
"logits/chosen": -1.8344879150390625,
|
138 |
+
"logits/rejected": -1.8489716053009033,
|
139 |
+
"logps/chosen": -40.38930892944336,
|
140 |
+
"logps/rejected": -60.9084358215332,
|
141 |
+
"loss": 0.6031,
|
142 |
+
"rewards/accuracies": 0.375,
|
143 |
+
"rewards/chosen": 0.19739331305027008,
|
144 |
+
"rewards/margins": 0.22638121247291565,
|
145 |
+
"rewards/rejected": -0.028987903147935867,
|
146 |
+
"step": 70
|
147 |
+
},
|
148 |
+
{
|
149 |
+
"epoch": 1.3,
|
150 |
+
"grad_norm": 5.49536657333374,
|
151 |
+
"learning_rate": 4.042253521126761e-06,
|
152 |
+
"logits/chosen": -1.7903095483779907,
|
153 |
+
"logits/rejected": -1.8362411260604858,
|
154 |
+
"logps/chosen": -44.288116455078125,
|
155 |
+
"logps/rejected": -90.21073913574219,
|
156 |
+
"loss": 0.5357,
|
157 |
+
"rewards/accuracies": 0.4749999940395355,
|
158 |
+
"rewards/chosen": 0.34061312675476074,
|
159 |
+
"rewards/margins": 0.40679749846458435,
|
160 |
+
"rewards/rejected": -0.06618441641330719,
|
161 |
+
"step": 80
|
162 |
+
},
|
163 |
+
{
|
164 |
+
"epoch": 1.46,
|
165 |
+
"grad_norm": 13.401692390441895,
|
166 |
+
"learning_rate": 3.901408450704225e-06,
|
167 |
+
"logits/chosen": -1.8004281520843506,
|
168 |
+
"logits/rejected": -1.8247934579849243,
|
169 |
+
"logps/chosen": -42.32465362548828,
|
170 |
+
"logps/rejected": -70.9749984741211,
|
171 |
+
"loss": 0.5387,
|
172 |
+
"rewards/accuracies": 0.4749999940395355,
|
173 |
+
"rewards/chosen": 0.3678433299064636,
|
174 |
+
"rewards/margins": 0.4186524450778961,
|
175 |
+
"rewards/rejected": -0.05080908536911011,
|
176 |
+
"step": 90
|
177 |
+
},
|
178 |
+
{
|
179 |
+
"epoch": 1.46,
|
180 |
+
"eval_logits/chosen": -1.7943389415740967,
|
181 |
+
"eval_logits/rejected": -1.8181126117706299,
|
182 |
+
"eval_logps/chosen": -51.677486419677734,
|
183 |
+
"eval_logps/rejected": -97.63689422607422,
|
184 |
+
"eval_loss": 0.529485821723938,
|
185 |
+
"eval_rewards/accuracies": 0.4642857015132904,
|
186 |
+
"eval_rewards/chosen": 0.4395819306373596,
|
187 |
+
"eval_rewards/margins": 0.4670555889606476,
|
188 |
+
"eval_rewards/rejected": -0.027473628520965576,
|
189 |
+
"eval_runtime": 8.1412,
|
190 |
+
"eval_samples_per_second": 3.439,
|
191 |
+
"eval_steps_per_second": 1.72,
|
192 |
+
"step": 90
|
193 |
+
}
|
194 |
+
],
|
195 |
+
"logging_steps": 10,
|
196 |
+
"max_steps": 366,
|
197 |
+
"num_input_tokens_seen": 0,
|
198 |
+
"num_train_epochs": 6,
|
199 |
+
"save_steps": 90,
|
200 |
+
"total_flos": 0.0,
|
201 |
+
"train_batch_size": 2,
|
202 |
+
"trial_name": null,
|
203 |
+
"trial_params": null
|
204 |
+
}
|
checkpoint-90/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:de569ad7b35da22832fd1fc395ab1c110ac622b038bdfcb0eee757cdc5b4b97b
|
3 |
+
size 5304
|