robin
commited on
Commit
•
1bcb1c9
1
Parent(s):
deb892f
v1.0.1
Browse files- all_results.json +1 -0
- checkpoints/checkpoint_125000/model.safetensors +3 -0
- checkpoints/checkpoint_125000/optimizer.bin +3 -0
- checkpoints/checkpoint_125000/random_states_0.pkl +3 -0
- checkpoints/checkpoint_125000/scaler.pt +3 -0
- checkpoints/checkpoint_125000/scheduler.bin +3 -0
- config.json +28 -0
- generation_config.json +7 -0
- merges.txt +0 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +30 -0
- tokenizer.json +0 -0
- tokenizer_config.json +40 -0
- vocab.json +0 -0
all_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"perplexity": 15.868202658528542, "model.decoder.final_layer_norm": 101.18981813023835, "model.decoder.layers.0": 1.020689142126247, "model.decoder.layers.1": 1.1320660508801887, "model.decoder.layers.2": 1.3580988537032161, "model.decoder.layers.3": 2.18800213048295, "model.decoder.layers.4": 2.8558699888903187, "model.decoder.layers.5": 3.4455860593342904, "model.decoder.layers.6": 3.7855113014076314, "model.decoder.layers.7": 3.920995691600322, "model.decoder.layers.8": 4.2376058533420515, "model.decoder.layers.9": 7.535203097723956, "model.decoder.layers.10": 10.981355136470572, "model.decoder.layers.11": 13.302202515874237, "model.decoder.layers.0.fc2": 0.14244042032539614, "model.decoder.layers.1.fc2": 0.10659956112600641, "model.decoder.layers.2.fc2": 0.09491643980483679, "model.decoder.layers.3.fc2": 0.12168890244089521, "model.decoder.layers.4.fc2": 0.1355718453686525, "model.decoder.layers.5.fc2": 0.14029372756486583, "model.decoder.layers.6.fc2": 0.18352150649856655, "model.decoder.layers.7.fc2": 0.30289249055464446, "model.decoder.layers.8.fc2": 0.43588046496673627, "model.decoder.layers.9.fc2": 0.7294037110919928, "model.decoder.layers.10.fc2": 0.8463573636711803, "model.decoder.layers.11.fc2": 0.46096582412139914, "model.decoder.layers.0.final_layer_norm": 1.2995478952655417, "model.decoder.layers.1.final_layer_norm": 0.6194378119464241, "model.decoder.layers.2.final_layer_norm": 0.8568585898286082, "model.decoder.layers.3.final_layer_norm": 0.7807713759038737, "model.decoder.layers.4.final_layer_norm": 0.7916218009980499, "model.decoder.layers.5.final_layer_norm": 0.8246828628029137, "model.decoder.layers.6.final_layer_norm": 0.9123977971781304, "model.decoder.layers.7.final_layer_norm": 1.0360167974151915, "model.decoder.layers.8.final_layer_norm": 1.2770969496272249, "model.decoder.layers.9.final_layer_norm": 1.4938973791931043, "model.decoder.layers.10.final_layer_norm": 2.146777304293299, "model.decoder.layers.11.final_layer_norm": 1.8395570718623886, "model.decoder.layers.0.self_attn.out_proj": 0.8137831632265321, "model.decoder.layers.1.self_attn.out_proj": 0.545522271736432, "model.decoder.layers.2.self_attn.out_proj": 0.15213572283199325, "model.decoder.layers.3.self_attn.out_proj": 0.21391866859604258, "model.decoder.layers.4.self_attn.out_proj": 0.2305717418784452, "model.decoder.layers.5.self_attn.out_proj": 0.2579686205674702, "model.decoder.layers.6.self_attn.out_proj": 0.3247806831092686, "model.decoder.layers.7.self_attn.out_proj": 0.4884541829427083, "model.decoder.layers.8.self_attn.out_proj": 0.9126374161815519, "model.decoder.layers.9.self_attn.out_proj": 1.6771561638699741, "model.decoder.layers.10.self_attn.out_proj": 3.3217248582654424, "model.decoder.layers.11.self_attn.out_proj": 3.274507000597844, "model.decoder.layers.0.self_attn_layer_norm": 3.439960506678246, "model.decoder.layers.1.self_attn_layer_norm": 4.605313908903051, "model.decoder.layers.2.self_attn_layer_norm": 10.448016800343913, "model.decoder.layers.3.self_attn_layer_norm": 14.245046605550517, "model.decoder.layers.4.self_attn_layer_norm": 13.998977129793353, "model.decoder.layers.5.self_attn_layer_norm": 10.991126992436543, "model.decoder.layers.6.self_attn_layer_norm": 10.88440672939235, "model.decoder.layers.7.self_attn_layer_norm": 6.706205302252689, "model.decoder.layers.8.self_attn_layer_norm": 7.820628746474299, "model.decoder.layers.9.self_attn_layer_norm": 6.4533082376271675, "model.decoder.layers.10.self_attn_layer_norm": 6.441283310916947, "model.decoder.layers.11.self_attn_layer_norm": 7.703703072555025, "max_inf_norm": 101.18981813023835, "max_ffn_inf_norm": 0.8463573636711803, "max_layer_inf_norm": 13.302202515874237, "avg_kurtosis": 21.36568820086953, "max_kurtosis": 219.27405422052365, "max_kurtosis_layers": 34.90877774052428}
|
checkpoints/checkpoint_125000/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2cba809044d66f64db984cc6184355629d72966a32d58ab32b6e778b35a5f313
|
3 |
+
size 496261008
|
checkpoints/checkpoint_125000/optimizer.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ccbe63f9eab192530b2cbe117fd775967dec9d0b6ede0d8172d478513f30515
|
3 |
+
size 992641146
|
checkpoints/checkpoint_125000/random_states_0.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:943bfb62ea47096a7c942b209c569319742615e25971f7a41046306d04528c12
|
3 |
+
size 14540
|
checkpoints/checkpoint_125000/scaler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0f99a672d2f6f56225523889be48d691b530390b1a2af964af62ff509b55776
|
3 |
+
size 988
|
checkpoints/checkpoint_125000/scheduler.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14152c5e3fc91645bb41d77afd9462d75210d61661f9d0b4aed209e31f5d7f4c
|
3 |
+
size 1064
|
config.json
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_remove_final_layer_norm": false,
|
3 |
+
"activation_function": "relu",
|
4 |
+
"architectures": [
|
5 |
+
"OPTForCausalLM"
|
6 |
+
],
|
7 |
+
"attention_dropout": 0.0,
|
8 |
+
"bos_token_id": 2,
|
9 |
+
"do_layer_norm_before": true,
|
10 |
+
"dropout": 0.1,
|
11 |
+
"enable_bias": true,
|
12 |
+
"eos_token_id": 2,
|
13 |
+
"ffn_dim": 3072,
|
14 |
+
"hidden_size": 768,
|
15 |
+
"init_std": 0.006,
|
16 |
+
"layer_norm_elementwise_affine": true,
|
17 |
+
"layerdrop": 0.0,
|
18 |
+
"max_position_embeddings": 512,
|
19 |
+
"model_type": "opt",
|
20 |
+
"num_attention_heads": 12,
|
21 |
+
"num_hidden_layers": 12,
|
22 |
+
"pad_token_id": 1,
|
23 |
+
"torch_dtype": "float32",
|
24 |
+
"transformers_version": "4.31.0",
|
25 |
+
"use_cache": true,
|
26 |
+
"vocab_size": 50272,
|
27 |
+
"word_embed_proj_dim": 768
|
28 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 2,
|
4 |
+
"eos_token_id": 2,
|
5 |
+
"pad_token_id": 1,
|
6 |
+
"transformers_version": "4.31.0"
|
7 |
+
}
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84647fb90fb604f25454b17b57d3132a4cef1ea242e3d45e818da6a2ceaf57fc
|
3 |
+
size 496305242
|
special_tokens_map.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "</s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": true,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "</s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": true,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "<pad>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": true,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"unk_token": {
|
24 |
+
"content": "</s>",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": true,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
}
|
30 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": true,
|
3 |
+
"add_prefix_space": false,
|
4 |
+
"bos_token": {
|
5 |
+
"__type": "AddedToken",
|
6 |
+
"content": "</s>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": true,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false
|
11 |
+
},
|
12 |
+
"clean_up_tokenization_spaces": true,
|
13 |
+
"eos_token": {
|
14 |
+
"__type": "AddedToken",
|
15 |
+
"content": "</s>",
|
16 |
+
"lstrip": false,
|
17 |
+
"normalized": true,
|
18 |
+
"rstrip": false,
|
19 |
+
"single_word": false
|
20 |
+
},
|
21 |
+
"errors": "replace",
|
22 |
+
"model_max_length": 1000000000000000019884624838656,
|
23 |
+
"pad_token": {
|
24 |
+
"__type": "AddedToken",
|
25 |
+
"content": "<pad>",
|
26 |
+
"lstrip": false,
|
27 |
+
"normalized": true,
|
28 |
+
"rstrip": false,
|
29 |
+
"single_word": false
|
30 |
+
},
|
31 |
+
"tokenizer_class": "GPT2Tokenizer",
|
32 |
+
"unk_token": {
|
33 |
+
"__type": "AddedToken",
|
34 |
+
"content": "</s>",
|
35 |
+
"lstrip": false,
|
36 |
+
"normalized": true,
|
37 |
+
"rstrip": false,
|
38 |
+
"single_word": false
|
39 |
+
}
|
40 |
+
}
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|