Upload 6 files
Browse files- added_tokens.json +42 -0
- config.json +33 -0
- configuration_phi.py +62 -0
- mergekit_config.yml +17 -0
- merges.txt +0 -0
- model.safetensors.index.json +1 -0
added_tokens.json
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"\t\t": 50294,
|
3 |
+
"\t\t\t": 50293,
|
4 |
+
"\t\t\t\t": 50292,
|
5 |
+
"\t\t\t\t\t": 50291,
|
6 |
+
"\t\t\t\t\t\t": 50290,
|
7 |
+
"\t\t\t\t\t\t\t": 50289,
|
8 |
+
"\t\t\t\t\t\t\t\t": 50288,
|
9 |
+
"\t\t\t\t\t\t\t\t\t": 50287,
|
10 |
+
" ": 50286,
|
11 |
+
" ": 50285,
|
12 |
+
" ": 50284,
|
13 |
+
" ": 50283,
|
14 |
+
" ": 50282,
|
15 |
+
" ": 50281,
|
16 |
+
" ": 50280,
|
17 |
+
" ": 50279,
|
18 |
+
" ": 50278,
|
19 |
+
" ": 50277,
|
20 |
+
" ": 50276,
|
21 |
+
" ": 50275,
|
22 |
+
" ": 50274,
|
23 |
+
" ": 50273,
|
24 |
+
" ": 50272,
|
25 |
+
" ": 50271,
|
26 |
+
" ": 50270,
|
27 |
+
" ": 50269,
|
28 |
+
" ": 50268,
|
29 |
+
" ": 50267,
|
30 |
+
" ": 50266,
|
31 |
+
" ": 50265,
|
32 |
+
" ": 50264,
|
33 |
+
" ": 50263,
|
34 |
+
" ": 50262,
|
35 |
+
" ": 50261,
|
36 |
+
" ": 50260,
|
37 |
+
" ": 50259,
|
38 |
+
" ": 50258,
|
39 |
+
" ": 50257,
|
40 |
+
"<|im_end|>": 50295,
|
41 |
+
"<|im_start|>": 50296
|
42 |
+
}
|
config.json
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "vince62s/phi-2-psy",
|
3 |
+
"activation_function": "gelu_new",
|
4 |
+
"architectures": [
|
5 |
+
"PhiForCausalLM"
|
6 |
+
],
|
7 |
+
"attn_pdrop": 0.0,
|
8 |
+
"auto_map": {
|
9 |
+
"AutoConfig": "vince62s/phi-2-psy--configuration_phi.PhiConfig",
|
10 |
+
"AutoModelForCausalLM": "vince62s/phi-2-psy--modeling_phi.PhiForCausalLM"
|
11 |
+
},
|
12 |
+
"embd_pdrop": 0.0,
|
13 |
+
"flash_attn": false,
|
14 |
+
"flash_rotary": false,
|
15 |
+
"fused_dense": false,
|
16 |
+
"img_processor": null,
|
17 |
+
"initializer_range": 0.02,
|
18 |
+
"layer_norm_epsilon": 1e-05,
|
19 |
+
"model_type": "phi-msft",
|
20 |
+
"n_embd": 2560,
|
21 |
+
"n_head": 32,
|
22 |
+
"n_head_kv": null,
|
23 |
+
"n_inner": null,
|
24 |
+
"n_layer": 32,
|
25 |
+
"n_positions": 2048,
|
26 |
+
"resid_pdrop": 0.1,
|
27 |
+
"rotary_dim": 32,
|
28 |
+
"tie_word_embeddings": false,
|
29 |
+
"torch_dtype": "bfloat16",
|
30 |
+
"transformers_version": "4.35.2",
|
31 |
+
"use_cache": false,
|
32 |
+
"vocab_size": 51200
|
33 |
+
}
|
configuration_phi.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Microsoft Corporation.
|
2 |
+
# Licensed under the MIT license.
|
3 |
+
|
4 |
+
import math
|
5 |
+
from typing import Optional
|
6 |
+
|
7 |
+
from transformers import PretrainedConfig
|
8 |
+
|
9 |
+
|
10 |
+
class PhiConfig(PretrainedConfig):
|
11 |
+
"""Phi configuration."""
|
12 |
+
|
13 |
+
model_type = "phi-msft"
|
14 |
+
attribute_map = {
|
15 |
+
"max_position_embeddings": "n_positions",
|
16 |
+
"hidden_size": "n_embd",
|
17 |
+
"num_attention_heads": "n_head",
|
18 |
+
"num_hidden_layers": "n_layer",
|
19 |
+
}
|
20 |
+
|
21 |
+
def __init__(
|
22 |
+
self,
|
23 |
+
vocab_size: int = 50304,
|
24 |
+
n_positions: int = 2048,
|
25 |
+
n_embd: int = 1024,
|
26 |
+
n_layer: int = 20,
|
27 |
+
n_inner: Optional[int] = None,
|
28 |
+
n_head: int = 16,
|
29 |
+
n_head_kv: Optional[int] = None,
|
30 |
+
rotary_dim: Optional[int] = 32,
|
31 |
+
activation_function: Optional[str] = "gelu_new",
|
32 |
+
flash_attn: bool = False,
|
33 |
+
flash_rotary: bool = False,
|
34 |
+
fused_dense: bool = False,
|
35 |
+
attn_pdrop: float = 0.0,
|
36 |
+
embd_pdrop: float = 0.0,
|
37 |
+
resid_pdrop: float = 0.0,
|
38 |
+
layer_norm_epsilon: float = 1e-5,
|
39 |
+
initializer_range: float = 0.02,
|
40 |
+
tie_word_embeddings: bool = False,
|
41 |
+
pad_vocab_size_multiple: int = 64,
|
42 |
+
**kwargs
|
43 |
+
) -> None:
|
44 |
+
self.vocab_size = int(math.ceil(vocab_size / pad_vocab_size_multiple) * pad_vocab_size_multiple)
|
45 |
+
self.n_positions = n_positions
|
46 |
+
self.n_embd = n_embd
|
47 |
+
self.n_layer = n_layer
|
48 |
+
self.n_inner = n_inner
|
49 |
+
self.n_head = n_head
|
50 |
+
self.n_head_kv = n_head_kv
|
51 |
+
self.rotary_dim = min(rotary_dim, n_embd // n_head)
|
52 |
+
self.activation_function = activation_function
|
53 |
+
self.flash_attn = flash_attn
|
54 |
+
self.flash_rotary = flash_rotary
|
55 |
+
self.fused_dense = fused_dense
|
56 |
+
self.attn_pdrop = attn_pdrop
|
57 |
+
self.embd_pdrop = embd_pdrop
|
58 |
+
self.resid_pdrop = resid_pdrop
|
59 |
+
self.layer_norm_epsilon = layer_norm_epsilon
|
60 |
+
self.initializer_range = initializer_range
|
61 |
+
|
62 |
+
super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
|
mergekit_config.yml
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
slices:
|
3 |
+
- sources:
|
4 |
+
- model: rhysjones/phi-2-orange
|
5 |
+
layer_range: [0, 32]
|
6 |
+
- model: cognitivecomputations/dolphin-2_6-phi-2
|
7 |
+
layer_range: [0, 32]
|
8 |
+
merge_method: slerp
|
9 |
+
base_model: rhysjones/phi-2-orange
|
10 |
+
parameters:
|
11 |
+
t:
|
12 |
+
- filter: self_attn
|
13 |
+
value: [0, 0.5, 0.3, 0.7, 1]
|
14 |
+
- filter: mlp
|
15 |
+
value: [1, 0.5, 0.7, 0.3, 0]
|
16 |
+
- value: 0.5
|
17 |
+
dtype: bfloat16
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model.safetensors.index.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metadata": {"mergekit_version": "0.0.3.2"}, "weight_map": {"transformer.embd.wte.weight": "model-00001-of-00003.safetensors", "transformer.h.0.ln.bias": "model-00001-of-00003.safetensors", "transformer.h.0.ln.weight": "model-00001-of-00003.safetensors", "transformer.h.0.mixer.Wqkv.bias": "model-00001-of-00003.safetensors", "transformer.h.0.mixer.Wqkv.weight": "model-00001-of-00003.safetensors", "transformer.h.0.mixer.out_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.0.mixer.out_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.0.mlp.fc1.bias": "model-00001-of-00003.safetensors", "transformer.h.0.mlp.fc1.weight": "model-00001-of-00003.safetensors", "transformer.h.0.mlp.fc2.bias": "model-00001-of-00003.safetensors", "transformer.h.0.mlp.fc2.weight": "model-00001-of-00003.safetensors", "transformer.h.1.ln.bias": "model-00001-of-00003.safetensors", "transformer.h.1.ln.weight": "model-00001-of-00003.safetensors", "transformer.h.1.mixer.Wqkv.bias": "model-00001-of-00003.safetensors", "transformer.h.1.mixer.Wqkv.weight": "model-00001-of-00003.safetensors", "transformer.h.1.mixer.out_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.1.mixer.out_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.1.mlp.fc1.bias": "model-00001-of-00003.safetensors", "transformer.h.1.mlp.fc1.weight": "model-00001-of-00003.safetensors", "transformer.h.1.mlp.fc2.bias": "model-00001-of-00003.safetensors", "transformer.h.1.mlp.fc2.weight": "model-00001-of-00003.safetensors", "transformer.h.10.ln.bias": "model-00001-of-00003.safetensors", "transformer.h.10.ln.weight": "model-00001-of-00003.safetensors", "transformer.h.10.mixer.Wqkv.bias": "model-00001-of-00003.safetensors", "transformer.h.10.mixer.Wqkv.weight": "model-00001-of-00003.safetensors", "transformer.h.10.mixer.out_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.10.mixer.out_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.10.mlp.fc1.bias": "model-00001-of-00003.safetensors", "transformer.h.10.mlp.fc1.weight": "model-00001-of-00003.safetensors", "transformer.h.10.mlp.fc2.bias": "model-00001-of-00003.safetensors", "transformer.h.10.mlp.fc2.weight": "model-00001-of-00003.safetensors", "transformer.h.11.ln.bias": "model-00001-of-00003.safetensors", "transformer.h.11.ln.weight": "model-00001-of-00003.safetensors", "transformer.h.11.mixer.Wqkv.bias": "model-00001-of-00003.safetensors", "transformer.h.11.mixer.Wqkv.weight": "model-00001-of-00003.safetensors", "transformer.h.11.mixer.out_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.11.mixer.out_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.11.mlp.fc1.bias": "model-00001-of-00003.safetensors", "transformer.h.11.mlp.fc1.weight": "model-00001-of-00003.safetensors", "transformer.h.11.mlp.fc2.bias": "model-00001-of-00003.safetensors", "transformer.h.11.mlp.fc2.weight": "model-00001-of-00003.safetensors", "transformer.h.12.ln.bias": "model-00001-of-00003.safetensors", "transformer.h.12.ln.weight": "model-00001-of-00003.safetensors", "transformer.h.12.mixer.Wqkv.bias": "model-00001-of-00003.safetensors", "transformer.h.12.mixer.Wqkv.weight": "model-00001-of-00003.safetensors", "transformer.h.12.mixer.out_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.12.mixer.out_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.12.mlp.fc1.bias": "model-00001-of-00003.safetensors", "transformer.h.12.mlp.fc1.weight": "model-00001-of-00003.safetensors", "transformer.h.12.mlp.fc2.bias": "model-00001-of-00003.safetensors", "transformer.h.12.mlp.fc2.weight": "model-00001-of-00003.safetensors", "transformer.h.13.ln.bias": "model-00001-of-00003.safetensors", "transformer.h.13.ln.weight": "model-00001-of-00003.safetensors", "transformer.h.13.mixer.Wqkv.bias": "model-00001-of-00003.safetensors", "transformer.h.13.mixer.Wqkv.weight": "model-00001-of-00003.safetensors", "transformer.h.13.mixer.out_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.13.mixer.out_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.13.mlp.fc1.bias": "model-00001-of-00003.safetensors", "transformer.h.13.mlp.fc1.weight": "model-00001-of-00003.safetensors", "transformer.h.13.mlp.fc2.bias": "model-00001-of-00003.safetensors", "transformer.h.13.mlp.fc2.weight": "model-00001-of-00003.safetensors", "transformer.h.14.ln.bias": "model-00001-of-00003.safetensors", "transformer.h.14.ln.weight": "model-00001-of-00003.safetensors", "transformer.h.14.mixer.Wqkv.bias": "model-00001-of-00003.safetensors", "transformer.h.14.mixer.Wqkv.weight": "model-00001-of-00003.safetensors", "transformer.h.14.mixer.out_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.14.mixer.out_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.14.mlp.fc1.bias": "model-00001-of-00003.safetensors", "transformer.h.14.mlp.fc1.weight": "model-00001-of-00003.safetensors", "transformer.h.14.mlp.fc2.bias": "model-00001-of-00003.safetensors", "transformer.h.14.mlp.fc2.weight": "model-00001-of-00003.safetensors", "transformer.h.15.ln.bias": "model-00001-of-00003.safetensors", "transformer.h.15.ln.weight": "model-00001-of-00003.safetensors", "transformer.h.15.mixer.Wqkv.bias": "model-00001-of-00003.safetensors", "transformer.h.15.mixer.Wqkv.weight": "model-00001-of-00003.safetensors", "transformer.h.15.mixer.out_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.15.mixer.out_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.15.mlp.fc1.bias": "model-00001-of-00003.safetensors", "transformer.h.15.mlp.fc1.weight": "model-00001-of-00003.safetensors", "transformer.h.15.mlp.fc2.bias": "model-00001-of-00003.safetensors", "transformer.h.15.mlp.fc2.weight": "model-00001-of-00003.safetensors", "transformer.h.16.ln.bias": "model-00001-of-00003.safetensors", "transformer.h.16.ln.weight": "model-00001-of-00003.safetensors", "transformer.h.16.mixer.Wqkv.bias": "model-00001-of-00003.safetensors", "transformer.h.16.mixer.Wqkv.weight": "model-00001-of-00003.safetensors", "transformer.h.16.mixer.out_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.16.mixer.out_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.16.mlp.fc1.bias": "model-00001-of-00003.safetensors", "transformer.h.16.mlp.fc1.weight": "model-00001-of-00003.safetensors", "transformer.h.16.mlp.fc2.bias": "model-00001-of-00003.safetensors", "transformer.h.16.mlp.fc2.weight": "model-00001-of-00003.safetensors", "transformer.h.17.ln.bias": "model-00001-of-00003.safetensors", "transformer.h.17.ln.weight": "model-00001-of-00003.safetensors", "transformer.h.17.mixer.Wqkv.bias": "model-00001-of-00003.safetensors", "transformer.h.17.mixer.Wqkv.weight": "model-00001-of-00003.safetensors", "transformer.h.17.mixer.out_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.17.mixer.out_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.17.mlp.fc1.bias": "model-00001-of-00003.safetensors", "transformer.h.17.mlp.fc1.weight": "model-00001-of-00003.safetensors", "transformer.h.17.mlp.fc2.bias": "model-00001-of-00003.safetensors", "transformer.h.17.mlp.fc2.weight": "model-00001-of-00003.safetensors", "transformer.h.18.ln.bias": "model-00001-of-00003.safetensors", "transformer.h.18.ln.weight": "model-00001-of-00003.safetensors", "transformer.h.18.mixer.Wqkv.bias": "model-00001-of-00003.safetensors", "transformer.h.18.mixer.Wqkv.weight": "model-00001-of-00003.safetensors", "transformer.h.18.mixer.out_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.18.mixer.out_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.18.mlp.fc1.bias": "model-00001-of-00003.safetensors", "transformer.h.18.mlp.fc1.weight": "model-00001-of-00003.safetensors", "transformer.h.18.mlp.fc2.bias": "model-00001-of-00003.safetensors", "transformer.h.18.mlp.fc2.weight": "model-00001-of-00003.safetensors", "transformer.h.19.ln.bias": "model-00001-of-00003.safetensors", "transformer.h.19.ln.weight": "model-00001-of-00003.safetensors", "transformer.h.19.mixer.Wqkv.bias": "model-00001-of-00003.safetensors", "transformer.h.19.mixer.Wqkv.weight": "model-00002-of-00003.safetensors", "transformer.h.19.mixer.out_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.19.mixer.out_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.19.mlp.fc1.bias": "model-00002-of-00003.safetensors", "transformer.h.19.mlp.fc1.weight": "model-00002-of-00003.safetensors", "transformer.h.19.mlp.fc2.bias": "model-00002-of-00003.safetensors", "transformer.h.19.mlp.fc2.weight": "model-00002-of-00003.safetensors", "transformer.h.2.ln.bias": "model-00002-of-00003.safetensors", "transformer.h.2.ln.weight": "model-00002-of-00003.safetensors", "transformer.h.2.mixer.Wqkv.bias": "model-00002-of-00003.safetensors", "transformer.h.2.mixer.Wqkv.weight": "model-00002-of-00003.safetensors", "transformer.h.2.mixer.out_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.2.mixer.out_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.2.mlp.fc1.bias": "model-00002-of-00003.safetensors", "transformer.h.2.mlp.fc1.weight": "model-00002-of-00003.safetensors", "transformer.h.2.mlp.fc2.bias": "model-00002-of-00003.safetensors", "transformer.h.2.mlp.fc2.weight": "model-00002-of-00003.safetensors", "transformer.h.20.ln.bias": "model-00002-of-00003.safetensors", "transformer.h.20.ln.weight": "model-00002-of-00003.safetensors", "transformer.h.20.mixer.Wqkv.bias": "model-00002-of-00003.safetensors", "transformer.h.20.mixer.Wqkv.weight": "model-00002-of-00003.safetensors", "transformer.h.20.mixer.out_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.20.mixer.out_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.20.mlp.fc1.bias": "model-00002-of-00003.safetensors", "transformer.h.20.mlp.fc1.weight": "model-00002-of-00003.safetensors", "transformer.h.20.mlp.fc2.bias": "model-00002-of-00003.safetensors", "transformer.h.20.mlp.fc2.weight": "model-00002-of-00003.safetensors", "transformer.h.21.ln.bias": "model-00002-of-00003.safetensors", "transformer.h.21.ln.weight": "model-00002-of-00003.safetensors", "transformer.h.21.mixer.Wqkv.bias": "model-00002-of-00003.safetensors", "transformer.h.21.mixer.Wqkv.weight": "model-00002-of-00003.safetensors", "transformer.h.21.mixer.out_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.21.mixer.out_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.21.mlp.fc1.bias": "model-00002-of-00003.safetensors", "transformer.h.21.mlp.fc1.weight": "model-00002-of-00003.safetensors", "transformer.h.21.mlp.fc2.bias": "model-00002-of-00003.safetensors", "transformer.h.21.mlp.fc2.weight": "model-00002-of-00003.safetensors", "transformer.h.22.ln.bias": "model-00002-of-00003.safetensors", "transformer.h.22.ln.weight": "model-00002-of-00003.safetensors", "transformer.h.22.mixer.Wqkv.bias": "model-00002-of-00003.safetensors", "transformer.h.22.mixer.Wqkv.weight": "model-00002-of-00003.safetensors", "transformer.h.22.mixer.out_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.22.mixer.out_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.22.mlp.fc1.bias": "model-00002-of-00003.safetensors", "transformer.h.22.mlp.fc1.weight": "model-00002-of-00003.safetensors", "transformer.h.22.mlp.fc2.bias": "model-00002-of-00003.safetensors", "transformer.h.22.mlp.fc2.weight": "model-00002-of-00003.safetensors", "transformer.h.23.ln.bias": "model-00002-of-00003.safetensors", "transformer.h.23.ln.weight": "model-00002-of-00003.safetensors", "transformer.h.23.mixer.Wqkv.bias": "model-00002-of-00003.safetensors", "transformer.h.23.mixer.Wqkv.weight": "model-00002-of-00003.safetensors", "transformer.h.23.mixer.out_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.23.mixer.out_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.23.mlp.fc1.bias": "model-00002-of-00003.safetensors", "transformer.h.23.mlp.fc1.weight": "model-00002-of-00003.safetensors", "transformer.h.23.mlp.fc2.bias": "model-00002-of-00003.safetensors", "transformer.h.23.mlp.fc2.weight": "model-00002-of-00003.safetensors", "transformer.h.24.ln.bias": "model-00002-of-00003.safetensors", "transformer.h.24.ln.weight": "model-00002-of-00003.safetensors", "transformer.h.24.mixer.Wqkv.bias": "model-00002-of-00003.safetensors", "transformer.h.24.mixer.Wqkv.weight": "model-00002-of-00003.safetensors", "transformer.h.24.mixer.out_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.24.mixer.out_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.24.mlp.fc1.bias": "model-00002-of-00003.safetensors", "transformer.h.24.mlp.fc1.weight": "model-00002-of-00003.safetensors", "transformer.h.24.mlp.fc2.bias": "model-00002-of-00003.safetensors", "transformer.h.24.mlp.fc2.weight": "model-00002-of-00003.safetensors", "transformer.h.25.ln.bias": "model-00002-of-00003.safetensors", "transformer.h.25.ln.weight": "model-00002-of-00003.safetensors", "transformer.h.25.mixer.Wqkv.bias": "model-00002-of-00003.safetensors", "transformer.h.25.mixer.Wqkv.weight": "model-00002-of-00003.safetensors", "transformer.h.25.mixer.out_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.25.mixer.out_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.25.mlp.fc1.bias": "model-00002-of-00003.safetensors", "transformer.h.25.mlp.fc1.weight": "model-00002-of-00003.safetensors", "transformer.h.25.mlp.fc2.bias": "model-00002-of-00003.safetensors", "transformer.h.25.mlp.fc2.weight": "model-00002-of-00003.safetensors", "transformer.h.26.ln.bias": "model-00002-of-00003.safetensors", "transformer.h.26.ln.weight": "model-00002-of-00003.safetensors", "transformer.h.26.mixer.Wqkv.bias": "model-00002-of-00003.safetensors", "transformer.h.26.mixer.Wqkv.weight": "model-00002-of-00003.safetensors", "transformer.h.26.mixer.out_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.26.mixer.out_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.26.mlp.fc1.bias": "model-00002-of-00003.safetensors", "transformer.h.26.mlp.fc1.weight": "model-00002-of-00003.safetensors", "transformer.h.26.mlp.fc2.bias": "model-00002-of-00003.safetensors", "transformer.h.26.mlp.fc2.weight": "model-00002-of-00003.safetensors", "transformer.h.27.ln.bias": "model-00002-of-00003.safetensors", "transformer.h.27.ln.weight": "model-00002-of-00003.safetensors", "transformer.h.27.mixer.Wqkv.bias": "model-00002-of-00003.safetensors", "transformer.h.27.mixer.Wqkv.weight": "model-00002-of-00003.safetensors", "transformer.h.27.mixer.out_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.27.mixer.out_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.27.mlp.fc1.bias": "model-00002-of-00003.safetensors", "transformer.h.27.mlp.fc1.weight": "model-00002-of-00003.safetensors", "transformer.h.27.mlp.fc2.bias": "model-00002-of-00003.safetensors", "transformer.h.27.mlp.fc2.weight": "model-00002-of-00003.safetensors", "transformer.h.28.ln.bias": "model-00002-of-00003.safetensors", "transformer.h.28.ln.weight": "model-00002-of-00003.safetensors", "transformer.h.28.mixer.Wqkv.bias": "model-00002-of-00003.safetensors", "transformer.h.28.mixer.Wqkv.weight": "model-00002-of-00003.safetensors", "transformer.h.28.mixer.out_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.28.mixer.out_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.28.mlp.fc1.bias": "model-00002-of-00003.safetensors", "transformer.h.28.mlp.fc1.weight": "model-00002-of-00003.safetensors", "transformer.h.28.mlp.fc2.bias": "model-00002-of-00003.safetensors", "transformer.h.28.mlp.fc2.weight": "model-00002-of-00003.safetensors", "transformer.h.29.ln.bias": "model-00002-of-00003.safetensors", "transformer.h.29.ln.weight": "model-00002-of-00003.safetensors", "transformer.h.29.mixer.Wqkv.bias": "model-00002-of-00003.safetensors", "transformer.h.29.mixer.Wqkv.weight": "model-00002-of-00003.safetensors", "transformer.h.29.mixer.out_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.29.mixer.out_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.29.mlp.fc1.bias": "model-00002-of-00003.safetensors", "transformer.h.29.mlp.fc1.weight": "model-00002-of-00003.safetensors", "transformer.h.29.mlp.fc2.bias": "model-00002-of-00003.safetensors", "transformer.h.29.mlp.fc2.weight": "model-00002-of-00003.safetensors", "transformer.h.3.ln.bias": "model-00002-of-00003.safetensors", "transformer.h.3.ln.weight": "model-00002-of-00003.safetensors", "transformer.h.3.mixer.Wqkv.bias": "model-00002-of-00003.safetensors", "transformer.h.3.mixer.Wqkv.weight": "model-00002-of-00003.safetensors", "transformer.h.3.mixer.out_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.3.mixer.out_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.3.mlp.fc1.bias": "model-00002-of-00003.safetensors", "transformer.h.3.mlp.fc1.weight": "model-00002-of-00003.safetensors", "transformer.h.3.mlp.fc2.bias": "model-00002-of-00003.safetensors", "transformer.h.3.mlp.fc2.weight": "model-00003-of-00003.safetensors", "transformer.h.30.ln.bias": "model-00003-of-00003.safetensors", "transformer.h.30.ln.weight": "model-00003-of-00003.safetensors", "transformer.h.4.ln.bias": "model-00003-of-00003.safetensors", "transformer.h.4.ln.weight": "model-00003-of-00003.safetensors", "transformer.h.4.mixer.Wqkv.bias": "model-00003-of-00003.safetensors", "transformer.h.4.mixer.Wqkv.weight": "model-00003-of-00003.safetensors", "transformer.h.4.mixer.out_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.4.mixer.out_proj.weight": "model-00003-of-00003.safetensors", "transformer.h.4.mlp.fc1.bias": "model-00003-of-00003.safetensors", "transformer.h.4.mlp.fc1.weight": "model-00003-of-00003.safetensors", "transformer.h.4.mlp.fc2.bias": "model-00003-of-00003.safetensors", "transformer.h.4.mlp.fc2.weight": "model-00003-of-00003.safetensors", "transformer.h.5.ln.bias": "model-00003-of-00003.safetensors", "transformer.h.5.ln.weight": "model-00003-of-00003.safetensors", "transformer.h.5.mixer.Wqkv.bias": "model-00003-of-00003.safetensors", "transformer.h.5.mixer.Wqkv.weight": "model-00003-of-00003.safetensors", "transformer.h.5.mixer.out_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.5.mixer.out_proj.weight": "model-00003-of-00003.safetensors", "transformer.h.5.mlp.fc1.bias": "model-00003-of-00003.safetensors", "transformer.h.5.mlp.fc1.weight": "model-00003-of-00003.safetensors", "transformer.h.5.mlp.fc2.bias": "model-00003-of-00003.safetensors", "transformer.h.5.mlp.fc2.weight": "model-00003-of-00003.safetensors", "transformer.h.6.ln.bias": "model-00003-of-00003.safetensors", "transformer.h.6.ln.weight": "model-00003-of-00003.safetensors", "transformer.h.6.mixer.Wqkv.bias": "model-00003-of-00003.safetensors", "transformer.h.6.mixer.Wqkv.weight": "model-00003-of-00003.safetensors", "transformer.h.6.mixer.out_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.6.mixer.out_proj.weight": "model-00003-of-00003.safetensors", "transformer.h.6.mlp.fc1.bias": "model-00003-of-00003.safetensors", "transformer.h.6.mlp.fc1.weight": "model-00003-of-00003.safetensors", "transformer.h.6.mlp.fc2.bias": "model-00003-of-00003.safetensors", "transformer.h.6.mlp.fc2.weight": "model-00003-of-00003.safetensors", "transformer.h.7.ln.bias": "model-00003-of-00003.safetensors", "transformer.h.7.ln.weight": "model-00003-of-00003.safetensors", "transformer.h.7.mixer.Wqkv.bias": "model-00003-of-00003.safetensors", "transformer.h.7.mixer.Wqkv.weight": "model-00003-of-00003.safetensors", "transformer.h.7.mixer.out_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.7.mixer.out_proj.weight": "model-00003-of-00003.safetensors", "transformer.h.7.mlp.fc1.bias": "model-00003-of-00003.safetensors", "transformer.h.7.mlp.fc1.weight": "model-00003-of-00003.safetensors", "transformer.h.7.mlp.fc2.bias": "model-00003-of-00003.safetensors", "transformer.h.7.mlp.fc2.weight": "model-00003-of-00003.safetensors", "transformer.h.8.ln.bias": "model-00003-of-00003.safetensors", "transformer.h.8.ln.weight": "model-00003-of-00003.safetensors", "transformer.h.8.mixer.Wqkv.bias": "model-00003-of-00003.safetensors", "transformer.h.8.mixer.Wqkv.weight": "model-00003-of-00003.safetensors", "transformer.h.8.mixer.out_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.8.mixer.out_proj.weight": "model-00003-of-00003.safetensors", "transformer.h.8.mlp.fc1.bias": "model-00003-of-00003.safetensors", "transformer.h.8.mlp.fc1.weight": "model-00003-of-00003.safetensors", "transformer.h.8.mlp.fc2.bias": "model-00003-of-00003.safetensors", "transformer.h.8.mlp.fc2.weight": "model-00003-of-00003.safetensors", "transformer.h.9.ln.bias": "model-00003-of-00003.safetensors", "transformer.h.9.ln.weight": "model-00003-of-00003.safetensors", "transformer.h.9.mixer.Wqkv.bias": "model-00003-of-00003.safetensors", "transformer.h.9.mixer.Wqkv.weight": "model-00003-of-00003.safetensors", "transformer.h.9.mixer.out_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.9.mixer.out_proj.weight": "model-00003-of-00003.safetensors", "transformer.h.9.mlp.fc1.bias": "model-00003-of-00003.safetensors", "transformer.h.9.mlp.fc1.weight": "model-00003-of-00003.safetensors", "transformer.h.9.mlp.fc2.bias": "model-00003-of-00003.safetensors", "transformer.h.9.mlp.fc2.weight": "model-00003-of-00003.safetensors", "lm_head.linear.bias": "model-00003-of-00003.safetensors", "lm_head.linear.weight": "model-00003-of-00003.safetensors", "lm_head.ln.bias": "model-00003-of-00003.safetensors", "lm_head.ln.weight": "model-00003-of-00003.safetensors", "transformer.h.30.mixer.Wqkv.bias": "model-00003-of-00003.safetensors", "transformer.h.30.mixer.Wqkv.weight": "model-00003-of-00003.safetensors", "transformer.h.30.mixer.out_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.30.mixer.out_proj.weight": "model-00003-of-00003.safetensors", "transformer.h.30.mlp.fc1.bias": "model-00003-of-00003.safetensors", "transformer.h.30.mlp.fc1.weight": "model-00003-of-00003.safetensors", "transformer.h.30.mlp.fc2.bias": "model-00003-of-00003.safetensors", "transformer.h.30.mlp.fc2.weight": "model-00003-of-00003.safetensors", "transformer.h.31.ln.bias": "model-00003-of-00003.safetensors", "transformer.h.31.ln.weight": "model-00003-of-00003.safetensors", "transformer.h.31.mixer.Wqkv.bias": "model-00003-of-00003.safetensors", "transformer.h.31.mixer.Wqkv.weight": "model-00003-of-00003.safetensors", "transformer.h.31.mixer.out_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.31.mixer.out_proj.weight": "model-00003-of-00003.safetensors", "transformer.h.31.mlp.fc1.bias": "model-00003-of-00003.safetensors", "transformer.h.31.mlp.fc1.weight": "model-00003-of-00003.safetensors", "transformer.h.31.mlp.fc2.bias": "model-00003-of-00003.safetensors", "transformer.h.31.mlp.fc2.weight": "model-00003-of-00003.safetensors"}}
|