zolicsaki commited on
Commit
5e157d9
1 Parent(s): f8450f1

Upload folder using huggingface_hub (#1)

Browse files

- dc9f0f3514da3338eae72c3543cd5243eca2baae1dd245d7337ac09b0e36e37a (aa0328cef4da2f124ac680e93d93492ce63c6254)
- bfa65d7e1ebdc228ea7dfcc0e70e6a43ef6c786bdfa24caec58638a321d795b0 (bf4d949939f4f14d980d69043a63a3da5b14c09a)
- ee32642ad5e1e724dcb850e4dd236571e5bfdee345e4d6d516393b06ff289195 (dd075c87c14e232ab6b74eac3748ca54dc8fbd47)
- 712cc26933c2eb0348769bca0833fd1c5047bdbf5ccd15160feda31b9aae0940 (e1ee6beffefca8cc5967019db1949ab2076c5a21)
- e02e163837707049807e0567f4254fe32833288d2e998230a06032602f250aa3 (df103ae2420608cfdb5d637fee448d6747422648)
- b379062007ccde53c1d927d726ef17fad56e32c4772a456d20f8a94abd0b8f2e (cf84a26b39cb93069fa01b5c7ec42a85df40c9cf)
- e381c5de43dfe70af8a94d06ad850593d104726f7c8302f9d055fef0435d1067 (f9d8ac0cc601713499f86f242bbedef87ada27ac)
- e6d59f64bd0d7b20629657bd6ef0d610610170f6beb75fa2cd54d686519ad161 (e8b8037eb3d2927c28e1501f7957c724a57c04a6)
- 066ee771dd06e1ae2295e962b4e960c32f5f7af70e8958fae4a705a81f385cae (c7aaf94d27c1cacd423d8483e6a142e6b403756b)
- bc48d934be5044a9080a2478d9304bdc9359fa8f3dc857a952b396ff2e3eb594 (3a2e28c45106b53f7eeb836077a042eeed2c8fdb)
- ebcb72d500d664cff41290b7fda85d1b74f425117788abd7e6f6022b405b98b6 (dd5f99b0e68c4b81fa544e7bccb9fa74bd508b82)
- cf6a526f6cac07379fb9e146b34dd31b6fbc61cc5a1a21454a7191f5ff3865f0 (bbe8fbfd1563f9ed6a37d5aabec69ea4487d59da)
- 15e98049b45261075bc76692195cff0163b434e4f3af9e3d8f7337d97a5f8f67 (dedc5cd6b3555641c9eeb764fbde3e0011b7bdab)
- 52ef638f3fd81388d56563661a9d91fc42d9f19f180b8845b896ecc4921bc98f (a17ff4deb7779c5f6c2f0ac4cef876c98aebe434)
- d1d04f6d0a91fb8b693918371d487310ca08f76da5aa5dfc6232efe28cef3496 (e5495e43833eb8af9321ac55fe235c1946e74564)
- 2c8974c272103d3cba88e62e6c35e38b67e538a32f58910f0336308135600144 (91b48a4f94bd0d8b62d489b40ac71a6cafa33e95)
- e4ed96029157bce084b174bddd6f2de90397ed91c3fb8d5eb33270926410bb30 (e81575fc9586219fcc0e06f3efe95326dd0c50af)
- 55c373a375707180a9e1fc0d6ab18b4bdad33dff271180fefe350c919dff3d7d (370a0c4b0a311a9ce6d24c23555be3c960407948)
- 2832d90d1bfe9f6ba3464d46a75d6304ed347f123b453313cb2acc86773a6aa0 (e3ed19f998a431be88ab945a9b753136a8de4c4a)
- ae4cae948eb06b55e2e2640b00e62e82e9b42eda3fb1b2f433e6b75b7afe4dcf (8c657143b4f4c95597210a4732a1fdda01fb973d)
- 1ef8958481a2383997d46d93135ffce8d992cfe9a50944cad98aa814ceb15ae6 (13676b5bfad7f3d70e9262342966a6ab99f2605b)
- e533eea1de0eaeb76a23978303c542285c4aba4a298b93731bc4368b17bc8270 (074ad3a03d674237640d7d585f1a4f4b104de664)
- ad86337c61a8dd9ae4740c1b7ee7f23709b7a358d9ae3bc9681602c98227da19 (4ce3da4aa162ea41950f7c8ed39dc7fdeb451e16)
- 35fc0bea45e2fa1a4dc7b362c8cc20e3898ece618bfc5ff7b85e8b0b1ec72d99 (261234d80f0e23aa7876a8816c479b100dc0b76f)
- 2ec2e1d04b4ad9131e7d2b810848d47943235a02622b81a8871c9e8f6c6ed65c (0546130970c66390b09e81a11c497e7a243ba70d)
- 632d189b973b51c83b3de82e1f16b69fbf06d45afbbf07f4613f1b433b394ac0 (0aaafb72eec6eb2c7c6ff7950765ed04621c46db)
- 62c6955b64a812c816320a5b14e619c96c644e293132d6097d177b32ec28fde9 (5de8f84169edb74858062371d189f607a9cc89cc)
- 305f5355213cbf31ccf4999db5bba32ada26885d671fc5cf9da4e7b609cc930c (c88a9eba0edc687c61ae0f59b59dd4bdeb821ed0)
- a2d69831b8c9f9d34bb09da564d44b9eba20db5aee2415111f7b18fe708fe122 (fcb3c70eaf8d39e952944daa6a4eb1be8c5db83e)
- 89e3a4c53ac3bfb457f92ee1fedc0deaa1eba53eb3534beeb6e65633e0638980 (eb121e4bb660e3b5a298b58dea608d8a0d108adc)
- bdf321893c6183081d696c716ea705760a1d7f5cf8311d130524ba47e4121654 (405eea5b51c48b56daec4af4dbb5f5835cff5992)
- fc21e479409ac7bb263b3aa85a0bdc5097682225010bef34aa7e0717ce861de1 (68c7d0b5dcaba5cfd2005375f123f8ad134a1ede)
- 07c5724ff35224c03bc5bdb33cbdfcbd8211e08d4b50299652b2415132971c97 (d0ed0357fe92f9671a407b573af5f32d20616860)
- c400a3c868e24b63b0711624722d5f7cc4a114c177f96b327913086eecf02343 (e3d0139ff3787ac4fccad972f3b978a1d640e353)
- 499b035220ab8e91e9b6c011ec459ab31302e8cd8c92a13b9302b432c31780a9 (85352ef16d1fdf5faad1ca1d36a879415982842d)
- 6ef5af1f55e5f19cac26670f039102aa2183d5f19851c3d1fb8bde5885ad9bc1 (9c2338d21d9376026e1b242695c7add1583276a6)

added_tokens.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "</s>": 2,
3
+ "<s>": 1,
4
+ "<unk>": 0
5
+ }
config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/import/ml-sc-nlpcheckpoints-scratch3/zoltanc/70b_international_llamas/ar_5500_ckpt_bin/config.json",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 8192,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 28672,
14
+ "max_position_embeddings": 4096,
15
+ "model_name": "",
16
+ "model_type": "llama",
17
+ "num_attention_heads": 64,
18
+ "num_hidden_layers": 80,
19
+ "num_key_value_heads": 8,
20
+ "pad_token_id": 0,
21
+ "pretraining_tp": 1,
22
+ "return_dict": false,
23
+ "rms_norm_eps": 1e-05,
24
+ "rope_scaling": null,
25
+ "rope_theta": 10000.0,
26
+ "tie_word_embeddings": false,
27
+ "torch_dtype": "float32",
28
+ "transformers_version": "4.31.0",
29
+ "use_cache": false,
30
+ "vocab_size": 57344
31
+ }
pytorch_model-01-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59084fde05cbe9f77dd48175aad832b66132b7bcc560e32185b3eff4ece9a39a
3
+ size 3892352979
pytorch_model-02-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f428a983dbc123a5cd6dc571f002dea7385d10e4c1fa520d6792fe98a53663ab
3
+ size 4194411495
pytorch_model-03-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aade9b30a80559d8d0973e43aabe46dd4f1090e1170e17c79c9020e6499b1aaa
3
+ size 3892386649
pytorch_model-04-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24e426af0ec73f4639f88ff28f06f61876ff7913a6fd554a3ca403acadf11926
3
+ size 3892386649
pytorch_model-05-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72cd0e8695aea58936d639386bf55d3df4a23c647df9c0cf897850b6dbeba167
3
+ size 4194411495
pytorch_model-06-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b756814b5cc6f161d7ddb526c0b6af31d2ee60bc497d3f3ed0fdba0174a9e10
3
+ size 3892386649
pytorch_model-07-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e26463f477d2fa517e991d80d592437d0155d2a3660ddaea59c28392f8201c69
3
+ size 3892386649
pytorch_model-08-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05fa5bed02099cadf24f0d525d513d5867721ec9deb7a6161384ee6d6bde9a0f
3
+ size 4194411495
pytorch_model-09-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0165f237fa6fa7b6e522fce467da644135996fe1c2fae48484672f35dd59a75
3
+ size 3892386649
pytorch_model-10-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f825fa09056dfff72520eea2d85cc7c11e8aba77595e7407d8ade0eb17e63c01
3
+ size 3892386649
pytorch_model-11-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d599289ef08e5a8260bab91cc6107645a0540d1ada2e68eb43d7cda4776b277
3
+ size 4194411589
pytorch_model-12-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c73bf896250162024b369fa20580034406d871c05abff69f80a8f4e85605ada
3
+ size 3892386672
pytorch_model-13-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c658434afbba99a84f5d584510d54c64fb7aff91f7016c6e0344b0b21b328dce
3
+ size 3892386672
pytorch_model-14-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37427e84ea3d87269e553014f1ad370d9c0f235c0da992f08533e0a79bde70fb
3
+ size 4194411589
pytorch_model-15-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8095b0a5940a4502614de9a91afa19de059d613be7f9f346d8a87b1fac790a76
3
+ size 3892386672
pytorch_model-16-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1c23606534f0d17fce3e9aea1107855c5a1f593c6299b5cf3f151d01670dbeb
3
+ size 3892386672
pytorch_model-17-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9275a219ee9be604adca174a1bcfd86ae73c3b99f80e8c9ffb176e3b85cab558
3
+ size 4194411589
pytorch_model-18-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c881fbb65240777441c72a7511b64723c8542d1ae5452fe582015ed25d398dcf
3
+ size 3892386672
pytorch_model-19-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb698f35caf5e226fd491e846a4761cb917caa1b9372165d68ae28dec5f46a4b
3
+ size 3892386672
pytorch_model-20-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e942a9580fe0b86782e57cd7e7f0d49d81652df7f35d5b78cfd9cfb03a0c6b91
3
+ size 4194411589
pytorch_model-21-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b3fd00154d0a5958db867c4af943eaea15f342b3030e5db6f1c366ffa7073ab
3
+ size 3892386672
pytorch_model-22-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9358009f48c7d7681f72eba5bccfcd81cb625cc1d34e2b54b5d166968ebfcba7
3
+ size 3892386672
pytorch_model-23-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c75bace91d84b284b5718cda7ba674c3aabaea0fa743f92c0c5e65f2d6488f0a
3
+ size 4194411589
pytorch_model-24-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56c30fa6df5fac68bc8783b486569567fed55ab75f858f7775313896b5e4166a
3
+ size 3892386672
pytorch_model-25-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb6acd5abc5c6a599d974236ad57bd3d1fd658b42261d55f8ac5264edc461018
3
+ size 3892386672
pytorch_model-26-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0685ea81053ddfb64bc3f31a160cc1b3c83c4dba4809675ad7ca6251a7a73454
3
+ size 4194411589
pytorch_model-27-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:408f7c03e46f7794f27b9d7121d60bf078818a46e9ef0b9f674f423f5d3f5b73
3
+ size 3892386672
pytorch_model-28-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43034214395db2e4978752dbc91bb7e0eb2944a22d1e98e2dc2dec5ff4278bfc
3
+ size 3892386672
pytorch_model-29-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3374934c295b6991f0e019c7db587574581166eb9dce540461dac3bf37f5cf6c
3
+ size 4194411589
pytorch_model-30-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97ebc9a94c08bf5d12ad8b706b2857a5d3aa1deb0ba01d2bac8d5c6392aef68c
3
+ size 3892386672
pytorch_model-31-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c29300531ccacb080c42c56f0a7e81680fcd4cd6abb436650c84c1a073d3ccf3
3
+ size 3892386672
pytorch_model-32-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6dda29de72d165e0373162c331e8a018f1f108e0690e18b6a5febad075bd792
3
+ size 4194411589
pytorch_model-33-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9b3a3f102a25670fe062d561e1384d2752fabd824d0f88739c92c6ac5772a27
3
+ size 3892386672
pytorch_model-34-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58f962cd00882a927dd59f90793de1e7a6c64a98f3f6de0665656795e89e8dd5
3
+ size 3892386672
pytorch_model-35-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7845a25f57f889f01836fbad38e21ce83c0b97b8452459d2f9b279019a491f74
3
+ size 3120649182
pytorch_model.bin.index.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metadata": {}, "weight_map": {"model.embed_tokens.weight": "pytorch_model-01-of-35.bin", "model.layers.0.self_attn.q_proj.weight": "pytorch_model-01-of-35.bin", "model.layers.0.self_attn.k_proj.weight": "pytorch_model-01-of-35.bin", "model.layers.0.self_attn.v_proj.weight": "pytorch_model-01-of-35.bin", "model.layers.0.self_attn.o_proj.weight": "pytorch_model-01-of-35.bin", "model.layers.0.self_attn.rotary_emb.inv_freq": "pytorch_model-01-of-35.bin", "model.layers.0.mlp.gate_proj.weight": "pytorch_model-01-of-35.bin", "model.layers.0.mlp.up_proj.weight": "pytorch_model-01-of-35.bin", "model.layers.0.mlp.down_proj.weight": "pytorch_model-01-of-35.bin", "model.layers.0.input_layernorm.weight": "pytorch_model-01-of-35.bin", "model.layers.0.post_attention_layernorm.weight": "pytorch_model-01-of-35.bin", "model.layers.1.self_attn.q_proj.weight": "pytorch_model-01-of-35.bin", "model.layers.1.self_attn.k_proj.weight": "pytorch_model-01-of-35.bin", "model.layers.1.self_attn.v_proj.weight": "pytorch_model-01-of-35.bin", "model.layers.1.self_attn.o_proj.weight": "pytorch_model-01-of-35.bin", "model.layers.1.self_attn.rotary_emb.inv_freq": "pytorch_model-01-of-35.bin", "model.layers.1.mlp.gate_proj.weight": "pytorch_model-01-of-35.bin", "model.layers.1.mlp.up_proj.weight": "pytorch_model-01-of-35.bin", "model.layers.1.mlp.down_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.1.input_layernorm.weight": "pytorch_model-02-of-35.bin", "model.layers.1.post_attention_layernorm.weight": "pytorch_model-02-of-35.bin", "model.layers.2.self_attn.q_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.2.self_attn.k_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.2.self_attn.v_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.2.self_attn.o_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.2.self_attn.rotary_emb.inv_freq": "pytorch_model-02-of-35.bin", "model.layers.2.mlp.gate_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.2.mlp.up_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.2.mlp.down_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.2.input_layernorm.weight": "pytorch_model-02-of-35.bin", "model.layers.2.post_attention_layernorm.weight": "pytorch_model-02-of-35.bin", "model.layers.3.self_attn.q_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.3.self_attn.k_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.3.self_attn.v_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.3.self_attn.o_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.3.self_attn.rotary_emb.inv_freq": "pytorch_model-02-of-35.bin", "model.layers.3.mlp.gate_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.3.mlp.up_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.3.mlp.down_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.3.input_layernorm.weight": "pytorch_model-02-of-35.bin", "model.layers.3.post_attention_layernorm.weight": "pytorch_model-02-of-35.bin", "model.layers.4.self_attn.q_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.4.self_attn.k_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.4.self_attn.v_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.4.self_attn.o_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.4.self_attn.rotary_emb.inv_freq": "pytorch_model-02-of-35.bin", "model.layers.4.mlp.gate_proj.weight": "pytorch_model-03-of-35.bin", "model.layers.4.mlp.up_proj.weight": "pytorch_model-03-of-35.bin", "model.layers.4.mlp.down_proj.weight": "pytorch_model-03-of-35.bin", "model.layers.4.input_layernorm.weight": "pytorch_model-03-of-35.bin", "model.layers.4.post_attention_layernorm.weight": "pytorch_model-03-of-35.bin", "model.layers.5.self_attn.q_proj.weight": "pytorch_model-03-of-35.bin", "model.layers.5.self_attn.k_proj.weight": "pytorch_model-03-of-35.bin", "model.layers.5.self_attn.v_proj.weight": "pytorch_model-03-of-35.bin", "model.layers.5.self_attn.o_proj.weight": "pytorch_model-03-of-35.bin", "model.layers.5.self_attn.rotary_emb.inv_freq": "pytorch_model-03-of-35.bin", "model.layers.5.mlp.gate_proj.weight": "pytorch_model-03-of-35.bin", "model.layers.5.mlp.up_proj.weight": "pytorch_model-03-of-35.bin", "model.layers.5.mlp.down_proj.weight": "pytorch_model-03-of-35.bin", "model.layers.5.input_layernorm.weight": "pytorch_model-03-of-35.bin", "model.layers.5.post_attention_layernorm.weight": "pytorch_model-03-of-35.bin", "model.layers.6.self_attn.q_proj.weight": "pytorch_model-03-of-35.bin", "model.layers.6.self_attn.k_proj.weight": "pytorch_model-03-of-35.bin", "model.layers.6.self_attn.v_proj.weight": "pytorch_model-03-of-35.bin", "model.layers.6.self_attn.o_proj.weight": "pytorch_model-03-of-35.bin", "model.layers.6.self_attn.rotary_emb.inv_freq": "pytorch_model-03-of-35.bin", "model.layers.6.mlp.gate_proj.weight": "pytorch_model-03-of-35.bin", "model.layers.6.mlp.up_proj.weight": "pytorch_model-04-of-35.bin", "model.layers.6.mlp.down_proj.weight": "pytorch_model-04-of-35.bin", "model.layers.6.input_layernorm.weight": "pytorch_model-04-of-35.bin", "model.layers.6.post_attention_layernorm.weight": "pytorch_model-04-of-35.bin", "model.layers.7.self_attn.q_proj.weight": "pytorch_model-04-of-35.bin", "model.layers.7.self_attn.k_proj.weight": "pytorch_model-04-of-35.bin", "model.layers.7.self_attn.v_proj.weight": "pytorch_model-04-of-35.bin", "model.layers.7.self_attn.o_proj.weight": "pytorch_model-04-of-35.bin", "model.layers.7.self_attn.rotary_emb.inv_freq": "pytorch_model-04-of-35.bin", "model.layers.7.mlp.gate_proj.weight": "pytorch_model-04-of-35.bin", "model.layers.7.mlp.up_proj.weight": "pytorch_model-04-of-35.bin", "model.layers.7.mlp.down_proj.weight": "pytorch_model-04-of-35.bin", "model.layers.7.input_layernorm.weight": "pytorch_model-04-of-35.bin", "model.layers.7.post_attention_layernorm.weight": "pytorch_model-04-of-35.bin", "model.layers.8.self_attn.q_proj.weight": "pytorch_model-04-of-35.bin", "model.layers.8.self_attn.k_proj.weight": "pytorch_model-04-of-35.bin", "model.layers.8.self_attn.v_proj.weight": "pytorch_model-04-of-35.bin", "model.layers.8.self_attn.o_proj.weight": "pytorch_model-04-of-35.bin", "model.layers.8.self_attn.rotary_emb.inv_freq": "pytorch_model-04-of-35.bin", "model.layers.8.mlp.gate_proj.weight": "pytorch_model-04-of-35.bin", "model.layers.8.mlp.up_proj.weight": "pytorch_model-04-of-35.bin", "model.layers.8.mlp.down_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.8.input_layernorm.weight": "pytorch_model-05-of-35.bin", "model.layers.8.post_attention_layernorm.weight": "pytorch_model-05-of-35.bin", "model.layers.9.self_attn.q_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.9.self_attn.k_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.9.self_attn.v_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.9.self_attn.o_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.9.self_attn.rotary_emb.inv_freq": "pytorch_model-05-of-35.bin", "model.layers.9.mlp.gate_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.9.mlp.up_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.9.mlp.down_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.9.input_layernorm.weight": "pytorch_model-05-of-35.bin", "model.layers.9.post_attention_layernorm.weight": "pytorch_model-05-of-35.bin", "model.layers.10.self_attn.q_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.10.self_attn.k_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.10.self_attn.v_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.10.self_attn.o_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.10.self_attn.rotary_emb.inv_freq": "pytorch_model-05-of-35.bin", "model.layers.10.mlp.gate_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.10.mlp.up_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.10.mlp.down_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.10.input_layernorm.weight": "pytorch_model-05-of-35.bin", "model.layers.10.post_attention_layernorm.weight": "pytorch_model-05-of-35.bin", "model.layers.11.self_attn.q_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.11.self_attn.k_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.11.self_attn.v_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.11.self_attn.o_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.11.self_attn.rotary_emb.inv_freq": "pytorch_model-05-of-35.bin", "model.layers.11.mlp.gate_proj.weight": "pytorch_model-06-of-35.bin", "model.layers.11.mlp.up_proj.weight": "pytorch_model-06-of-35.bin", "model.layers.11.mlp.down_proj.weight": "pytorch_model-06-of-35.bin", "model.layers.11.input_layernorm.weight": "pytorch_model-06-of-35.bin", "model.layers.11.post_attention_layernorm.weight": "pytorch_model-06-of-35.bin", "model.layers.12.self_attn.q_proj.weight": "pytorch_model-06-of-35.bin", "model.layers.12.self_attn.k_proj.weight": "pytorch_model-06-of-35.bin", "model.layers.12.self_attn.v_proj.weight": "pytorch_model-06-of-35.bin", "model.layers.12.self_attn.o_proj.weight": "pytorch_model-06-of-35.bin", "model.layers.12.self_attn.rotary_emb.inv_freq": "pytorch_model-06-of-35.bin", "model.layers.12.mlp.gate_proj.weight": "pytorch_model-06-of-35.bin", "model.layers.12.mlp.up_proj.weight": "pytorch_model-06-of-35.bin", "model.layers.12.mlp.down_proj.weight": "pytorch_model-06-of-35.bin", "model.layers.12.input_layernorm.weight": "pytorch_model-06-of-35.bin", "model.layers.12.post_attention_layernorm.weight": "pytorch_model-06-of-35.bin", "model.layers.13.self_attn.q_proj.weight": "pytorch_model-06-of-35.bin", "model.layers.13.self_attn.k_proj.weight": "pytorch_model-06-of-35.bin", "model.layers.13.self_attn.v_proj.weight": "pytorch_model-06-of-35.bin", "model.layers.13.self_attn.o_proj.weight": "pytorch_model-06-of-35.bin", "model.layers.13.self_attn.rotary_emb.inv_freq": "pytorch_model-06-of-35.bin", "model.layers.13.mlp.gate_proj.weight": "pytorch_model-06-of-35.bin", "model.layers.13.mlp.up_proj.weight": "pytorch_model-07-of-35.bin", "model.layers.13.mlp.down_proj.weight": "pytorch_model-07-of-35.bin", "model.layers.13.input_layernorm.weight": "pytorch_model-07-of-35.bin", "model.layers.13.post_attention_layernorm.weight": "pytorch_model-07-of-35.bin", "model.layers.14.self_attn.q_proj.weight": "pytorch_model-07-of-35.bin", "model.layers.14.self_attn.k_proj.weight": "pytorch_model-07-of-35.bin", "model.layers.14.self_attn.v_proj.weight": "pytorch_model-07-of-35.bin", "model.layers.14.self_attn.o_proj.weight": "pytorch_model-07-of-35.bin", "model.layers.14.self_attn.rotary_emb.inv_freq": "pytorch_model-07-of-35.bin", "model.layers.14.mlp.gate_proj.weight": "pytorch_model-07-of-35.bin", "model.layers.14.mlp.up_proj.weight": "pytorch_model-07-of-35.bin", "model.layers.14.mlp.down_proj.weight": "pytorch_model-07-of-35.bin", "model.layers.14.input_layernorm.weight": "pytorch_model-07-of-35.bin", "model.layers.14.post_attention_layernorm.weight": "pytorch_model-07-of-35.bin", "model.layers.15.self_attn.q_proj.weight": "pytorch_model-07-of-35.bin", "model.layers.15.self_attn.k_proj.weight": "pytorch_model-07-of-35.bin", "model.layers.15.self_attn.v_proj.weight": "pytorch_model-07-of-35.bin", "model.layers.15.self_attn.o_proj.weight": "pytorch_model-07-of-35.bin", "model.layers.15.self_attn.rotary_emb.inv_freq": "pytorch_model-07-of-35.bin", "model.layers.15.mlp.gate_proj.weight": "pytorch_model-07-of-35.bin", "model.layers.15.mlp.up_proj.weight": "pytorch_model-07-of-35.bin", "model.layers.15.mlp.down_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.15.input_layernorm.weight": "pytorch_model-08-of-35.bin", "model.layers.15.post_attention_layernorm.weight": "pytorch_model-08-of-35.bin", "model.layers.16.self_attn.q_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.16.self_attn.k_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.16.self_attn.v_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.16.self_attn.o_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.16.self_attn.rotary_emb.inv_freq": "pytorch_model-08-of-35.bin", "model.layers.16.mlp.gate_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.16.mlp.up_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.16.mlp.down_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.16.input_layernorm.weight": "pytorch_model-08-of-35.bin", "model.layers.16.post_attention_layernorm.weight": "pytorch_model-08-of-35.bin", "model.layers.17.self_attn.q_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.17.self_attn.k_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.17.self_attn.v_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.17.self_attn.o_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.17.self_attn.rotary_emb.inv_freq": "pytorch_model-08-of-35.bin", "model.layers.17.mlp.gate_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.17.mlp.up_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.17.mlp.down_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.17.input_layernorm.weight": "pytorch_model-08-of-35.bin", "model.layers.17.post_attention_layernorm.weight": "pytorch_model-08-of-35.bin", "model.layers.18.self_attn.q_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.18.self_attn.k_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.18.self_attn.v_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.18.self_attn.o_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.18.self_attn.rotary_emb.inv_freq": "pytorch_model-08-of-35.bin", "model.layers.18.mlp.gate_proj.weight": "pytorch_model-09-of-35.bin", "model.layers.18.mlp.up_proj.weight": "pytorch_model-09-of-35.bin", "model.layers.18.mlp.down_proj.weight": "pytorch_model-09-of-35.bin", "model.layers.18.input_layernorm.weight": "pytorch_model-09-of-35.bin", "model.layers.18.post_attention_layernorm.weight": "pytorch_model-09-of-35.bin", "model.layers.19.self_attn.q_proj.weight": "pytorch_model-09-of-35.bin", "model.layers.19.self_attn.k_proj.weight": "pytorch_model-09-of-35.bin", "model.layers.19.self_attn.v_proj.weight": "pytorch_model-09-of-35.bin", "model.layers.19.self_attn.o_proj.weight": "pytorch_model-09-of-35.bin", "model.layers.19.self_attn.rotary_emb.inv_freq": "pytorch_model-09-of-35.bin", "model.layers.19.mlp.gate_proj.weight": "pytorch_model-09-of-35.bin", "model.layers.19.mlp.up_proj.weight": "pytorch_model-09-of-35.bin", "model.layers.19.mlp.down_proj.weight": "pytorch_model-09-of-35.bin", "model.layers.19.input_layernorm.weight": "pytorch_model-09-of-35.bin", "model.layers.19.post_attention_layernorm.weight": "pytorch_model-09-of-35.bin", "model.layers.20.self_attn.q_proj.weight": "pytorch_model-09-of-35.bin", "model.layers.20.self_attn.k_proj.weight": "pytorch_model-09-of-35.bin", "model.layers.20.self_attn.v_proj.weight": "pytorch_model-09-of-35.bin", "model.layers.20.self_attn.o_proj.weight": "pytorch_model-09-of-35.bin", "model.layers.20.self_attn.rotary_emb.inv_freq": "pytorch_model-09-of-35.bin", "model.layers.20.mlp.gate_proj.weight": "pytorch_model-09-of-35.bin", "model.layers.20.mlp.up_proj.weight": "pytorch_model-10-of-35.bin", "model.layers.20.mlp.down_proj.weight": "pytorch_model-10-of-35.bin", "model.layers.20.input_layernorm.weight": "pytorch_model-10-of-35.bin", "model.layers.20.post_attention_layernorm.weight": "pytorch_model-10-of-35.bin", "model.layers.21.self_attn.q_proj.weight": "pytorch_model-10-of-35.bin", "model.layers.21.self_attn.k_proj.weight": "pytorch_model-10-of-35.bin", "model.layers.21.self_attn.v_proj.weight": "pytorch_model-10-of-35.bin", "model.layers.21.self_attn.o_proj.weight": "pytorch_model-10-of-35.bin", "model.layers.21.self_attn.rotary_emb.inv_freq": "pytorch_model-10-of-35.bin", "model.layers.21.mlp.gate_proj.weight": "pytorch_model-10-of-35.bin", "model.layers.21.mlp.up_proj.weight": "pytorch_model-10-of-35.bin", "model.layers.21.mlp.down_proj.weight": "pytorch_model-10-of-35.bin", "model.layers.21.input_layernorm.weight": "pytorch_model-10-of-35.bin", "model.layers.21.post_attention_layernorm.weight": "pytorch_model-10-of-35.bin", "model.layers.22.self_attn.q_proj.weight": "pytorch_model-10-of-35.bin", "model.layers.22.self_attn.k_proj.weight": "pytorch_model-10-of-35.bin", "model.layers.22.self_attn.v_proj.weight": "pytorch_model-10-of-35.bin", "model.layers.22.self_attn.o_proj.weight": "pytorch_model-10-of-35.bin", "model.layers.22.self_attn.rotary_emb.inv_freq": "pytorch_model-10-of-35.bin", "model.layers.22.mlp.gate_proj.weight": "pytorch_model-10-of-35.bin", "model.layers.22.mlp.up_proj.weight": "pytorch_model-10-of-35.bin", "model.layers.22.mlp.down_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.22.input_layernorm.weight": "pytorch_model-11-of-35.bin", "model.layers.22.post_attention_layernorm.weight": "pytorch_model-11-of-35.bin", "model.layers.23.self_attn.q_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.23.self_attn.k_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.23.self_attn.v_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.23.self_attn.o_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.23.self_attn.rotary_emb.inv_freq": "pytorch_model-11-of-35.bin", "model.layers.23.mlp.gate_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.23.mlp.up_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.23.mlp.down_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.23.input_layernorm.weight": "pytorch_model-11-of-35.bin", "model.layers.23.post_attention_layernorm.weight": "pytorch_model-11-of-35.bin", "model.layers.24.self_attn.q_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.24.self_attn.k_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.24.self_attn.v_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.24.self_attn.o_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.24.self_attn.rotary_emb.inv_freq": "pytorch_model-11-of-35.bin", "model.layers.24.mlp.gate_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.24.mlp.up_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.24.mlp.down_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.24.input_layernorm.weight": "pytorch_model-11-of-35.bin", "model.layers.24.post_attention_layernorm.weight": "pytorch_model-11-of-35.bin", "model.layers.25.self_attn.q_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.25.self_attn.k_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.25.self_attn.v_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.25.self_attn.o_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.25.self_attn.rotary_emb.inv_freq": "pytorch_model-11-of-35.bin", "model.layers.25.mlp.gate_proj.weight": "pytorch_model-12-of-35.bin", "model.layers.25.mlp.up_proj.weight": "pytorch_model-12-of-35.bin", "model.layers.25.mlp.down_proj.weight": "pytorch_model-12-of-35.bin", "model.layers.25.input_layernorm.weight": "pytorch_model-12-of-35.bin", "model.layers.25.post_attention_layernorm.weight": "pytorch_model-12-of-35.bin", "model.layers.26.self_attn.q_proj.weight": "pytorch_model-12-of-35.bin", "model.layers.26.self_attn.k_proj.weight": "pytorch_model-12-of-35.bin", "model.layers.26.self_attn.v_proj.weight": "pytorch_model-12-of-35.bin", "model.layers.26.self_attn.o_proj.weight": "pytorch_model-12-of-35.bin", "model.layers.26.self_attn.rotary_emb.inv_freq": "pytorch_model-12-of-35.bin", "model.layers.26.mlp.gate_proj.weight": "pytorch_model-12-of-35.bin", "model.layers.26.mlp.up_proj.weight": "pytorch_model-12-of-35.bin", "model.layers.26.mlp.down_proj.weight": "pytorch_model-12-of-35.bin", "model.layers.26.input_layernorm.weight": "pytorch_model-12-of-35.bin", "model.layers.26.post_attention_layernorm.weight": "pytorch_model-12-of-35.bin", "model.layers.27.self_attn.q_proj.weight": "pytorch_model-12-of-35.bin", "model.layers.27.self_attn.k_proj.weight": "pytorch_model-12-of-35.bin", "model.layers.27.self_attn.v_proj.weight": "pytorch_model-12-of-35.bin", "model.layers.27.self_attn.o_proj.weight": "pytorch_model-12-of-35.bin", "model.layers.27.self_attn.rotary_emb.inv_freq": "pytorch_model-12-of-35.bin", "model.layers.27.mlp.gate_proj.weight": "pytorch_model-12-of-35.bin", "model.layers.27.mlp.up_proj.weight": "pytorch_model-13-of-35.bin", "model.layers.27.mlp.down_proj.weight": "pytorch_model-13-of-35.bin", "model.layers.27.input_layernorm.weight": "pytorch_model-13-of-35.bin", "model.layers.27.post_attention_layernorm.weight": "pytorch_model-13-of-35.bin", "model.layers.28.self_attn.q_proj.weight": "pytorch_model-13-of-35.bin", "model.layers.28.self_attn.k_proj.weight": "pytorch_model-13-of-35.bin", "model.layers.28.self_attn.v_proj.weight": "pytorch_model-13-of-35.bin", "model.layers.28.self_attn.o_proj.weight": "pytorch_model-13-of-35.bin", "model.layers.28.self_attn.rotary_emb.inv_freq": "pytorch_model-13-of-35.bin", "model.layers.28.mlp.gate_proj.weight": "pytorch_model-13-of-35.bin", "model.layers.28.mlp.up_proj.weight": "pytorch_model-13-of-35.bin", "model.layers.28.mlp.down_proj.weight": "pytorch_model-13-of-35.bin", "model.layers.28.input_layernorm.weight": "pytorch_model-13-of-35.bin", "model.layers.28.post_attention_layernorm.weight": "pytorch_model-13-of-35.bin", "model.layers.29.self_attn.q_proj.weight": "pytorch_model-13-of-35.bin", "model.layers.29.self_attn.k_proj.weight": "pytorch_model-13-of-35.bin", "model.layers.29.self_attn.v_proj.weight": "pytorch_model-13-of-35.bin", "model.layers.29.self_attn.o_proj.weight": "pytorch_model-13-of-35.bin", "model.layers.29.self_attn.rotary_emb.inv_freq": "pytorch_model-13-of-35.bin", "model.layers.29.mlp.gate_proj.weight": "pytorch_model-13-of-35.bin", "model.layers.29.mlp.up_proj.weight": "pytorch_model-13-of-35.bin", "model.layers.29.mlp.down_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.29.input_layernorm.weight": "pytorch_model-14-of-35.bin", "model.layers.29.post_attention_layernorm.weight": "pytorch_model-14-of-35.bin", "model.layers.30.self_attn.q_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.30.self_attn.k_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.30.self_attn.v_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.30.self_attn.o_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.30.self_attn.rotary_emb.inv_freq": "pytorch_model-14-of-35.bin", "model.layers.30.mlp.gate_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.30.mlp.up_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.30.mlp.down_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.30.input_layernorm.weight": "pytorch_model-14-of-35.bin", "model.layers.30.post_attention_layernorm.weight": "pytorch_model-14-of-35.bin", "model.layers.31.self_attn.q_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.31.self_attn.k_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.31.self_attn.v_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.31.self_attn.o_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.31.self_attn.rotary_emb.inv_freq": "pytorch_model-14-of-35.bin", "model.layers.31.mlp.gate_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.31.mlp.up_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.31.mlp.down_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.31.input_layernorm.weight": "pytorch_model-14-of-35.bin", "model.layers.31.post_attention_layernorm.weight": "pytorch_model-14-of-35.bin", "model.layers.32.self_attn.q_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.32.self_attn.k_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.32.self_attn.v_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.32.self_attn.o_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.32.self_attn.rotary_emb.inv_freq": "pytorch_model-14-of-35.bin", "model.layers.32.mlp.gate_proj.weight": "pytorch_model-15-of-35.bin", "model.layers.32.mlp.up_proj.weight": "pytorch_model-15-of-35.bin", "model.layers.32.mlp.down_proj.weight": "pytorch_model-15-of-35.bin", "model.layers.32.input_layernorm.weight": "pytorch_model-15-of-35.bin", "model.layers.32.post_attention_layernorm.weight": "pytorch_model-15-of-35.bin", "model.layers.33.self_attn.q_proj.weight": "pytorch_model-15-of-35.bin", "model.layers.33.self_attn.k_proj.weight": "pytorch_model-15-of-35.bin", "model.layers.33.self_attn.v_proj.weight": "pytorch_model-15-of-35.bin", "model.layers.33.self_attn.o_proj.weight": "pytorch_model-15-of-35.bin", "model.layers.33.self_attn.rotary_emb.inv_freq": "pytorch_model-15-of-35.bin", "model.layers.33.mlp.gate_proj.weight": "pytorch_model-15-of-35.bin", "model.layers.33.mlp.up_proj.weight": "pytorch_model-15-of-35.bin", "model.layers.33.mlp.down_proj.weight": "pytorch_model-15-of-35.bin", "model.layers.33.input_layernorm.weight": "pytorch_model-15-of-35.bin", "model.layers.33.post_attention_layernorm.weight": "pytorch_model-15-of-35.bin", "model.layers.34.self_attn.q_proj.weight": "pytorch_model-15-of-35.bin", "model.layers.34.self_attn.k_proj.weight": "pytorch_model-15-of-35.bin", "model.layers.34.self_attn.v_proj.weight": "pytorch_model-15-of-35.bin", "model.layers.34.self_attn.o_proj.weight": "pytorch_model-15-of-35.bin", "model.layers.34.self_attn.rotary_emb.inv_freq": "pytorch_model-15-of-35.bin", "model.layers.34.mlp.gate_proj.weight": "pytorch_model-15-of-35.bin", "model.layers.34.mlp.up_proj.weight": "pytorch_model-16-of-35.bin", "model.layers.34.mlp.down_proj.weight": "pytorch_model-16-of-35.bin", "model.layers.34.input_layernorm.weight": "pytorch_model-16-of-35.bin", "model.layers.34.post_attention_layernorm.weight": "pytorch_model-16-of-35.bin", "model.layers.35.self_attn.q_proj.weight": "pytorch_model-16-of-35.bin", "model.layers.35.self_attn.k_proj.weight": "pytorch_model-16-of-35.bin", "model.layers.35.self_attn.v_proj.weight": "pytorch_model-16-of-35.bin", "model.layers.35.self_attn.o_proj.weight": "pytorch_model-16-of-35.bin", "model.layers.35.self_attn.rotary_emb.inv_freq": "pytorch_model-16-of-35.bin", "model.layers.35.mlp.gate_proj.weight": "pytorch_model-16-of-35.bin", "model.layers.35.mlp.up_proj.weight": "pytorch_model-16-of-35.bin", "model.layers.35.mlp.down_proj.weight": "pytorch_model-16-of-35.bin", "model.layers.35.input_layernorm.weight": "pytorch_model-16-of-35.bin", "model.layers.35.post_attention_layernorm.weight": "pytorch_model-16-of-35.bin", "model.layers.36.self_attn.q_proj.weight": "pytorch_model-16-of-35.bin", "model.layers.36.self_attn.k_proj.weight": "pytorch_model-16-of-35.bin", "model.layers.36.self_attn.v_proj.weight": "pytorch_model-16-of-35.bin", "model.layers.36.self_attn.o_proj.weight": "pytorch_model-16-of-35.bin", "model.layers.36.self_attn.rotary_emb.inv_freq": "pytorch_model-16-of-35.bin", "model.layers.36.mlp.gate_proj.weight": "pytorch_model-16-of-35.bin", "model.layers.36.mlp.up_proj.weight": "pytorch_model-16-of-35.bin", "model.layers.36.mlp.down_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.36.input_layernorm.weight": "pytorch_model-17-of-35.bin", "model.layers.36.post_attention_layernorm.weight": "pytorch_model-17-of-35.bin", "model.layers.37.self_attn.q_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.37.self_attn.k_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.37.self_attn.v_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.37.self_attn.o_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.37.self_attn.rotary_emb.inv_freq": "pytorch_model-17-of-35.bin", "model.layers.37.mlp.gate_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.37.mlp.up_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.37.mlp.down_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.37.input_layernorm.weight": "pytorch_model-17-of-35.bin", "model.layers.37.post_attention_layernorm.weight": "pytorch_model-17-of-35.bin", "model.layers.38.self_attn.q_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.38.self_attn.k_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.38.self_attn.v_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.38.self_attn.o_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.38.self_attn.rotary_emb.inv_freq": "pytorch_model-17-of-35.bin", "model.layers.38.mlp.gate_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.38.mlp.up_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.38.mlp.down_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.38.input_layernorm.weight": "pytorch_model-17-of-35.bin", "model.layers.38.post_attention_layernorm.weight": "pytorch_model-17-of-35.bin", "model.layers.39.self_attn.q_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.39.self_attn.k_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.39.self_attn.v_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.39.self_attn.o_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.39.self_attn.rotary_emb.inv_freq": "pytorch_model-17-of-35.bin", "model.layers.39.mlp.gate_proj.weight": "pytorch_model-18-of-35.bin", "model.layers.39.mlp.up_proj.weight": "pytorch_model-18-of-35.bin", "model.layers.39.mlp.down_proj.weight": "pytorch_model-18-of-35.bin", "model.layers.39.input_layernorm.weight": "pytorch_model-18-of-35.bin", "model.layers.39.post_attention_layernorm.weight": "pytorch_model-18-of-35.bin", "model.layers.40.self_attn.q_proj.weight": "pytorch_model-18-of-35.bin", "model.layers.40.self_attn.k_proj.weight": "pytorch_model-18-of-35.bin", "model.layers.40.self_attn.v_proj.weight": "pytorch_model-18-of-35.bin", "model.layers.40.self_attn.o_proj.weight": "pytorch_model-18-of-35.bin", "model.layers.40.self_attn.rotary_emb.inv_freq": "pytorch_model-18-of-35.bin", "model.layers.40.mlp.gate_proj.weight": "pytorch_model-18-of-35.bin", "model.layers.40.mlp.up_proj.weight": "pytorch_model-18-of-35.bin", "model.layers.40.mlp.down_proj.weight": "pytorch_model-18-of-35.bin", "model.layers.40.input_layernorm.weight": "pytorch_model-18-of-35.bin", "model.layers.40.post_attention_layernorm.weight": "pytorch_model-18-of-35.bin", "model.layers.41.self_attn.q_proj.weight": "pytorch_model-18-of-35.bin", "model.layers.41.self_attn.k_proj.weight": "pytorch_model-18-of-35.bin", "model.layers.41.self_attn.v_proj.weight": "pytorch_model-18-of-35.bin", "model.layers.41.self_attn.o_proj.weight": "pytorch_model-18-of-35.bin", "model.layers.41.self_attn.rotary_emb.inv_freq": "pytorch_model-18-of-35.bin", "model.layers.41.mlp.gate_proj.weight": "pytorch_model-18-of-35.bin", "model.layers.41.mlp.up_proj.weight": "pytorch_model-19-of-35.bin", "model.layers.41.mlp.down_proj.weight": "pytorch_model-19-of-35.bin", "model.layers.41.input_layernorm.weight": "pytorch_model-19-of-35.bin", "model.layers.41.post_attention_layernorm.weight": "pytorch_model-19-of-35.bin", "model.layers.42.self_attn.q_proj.weight": "pytorch_model-19-of-35.bin", "model.layers.42.self_attn.k_proj.weight": "pytorch_model-19-of-35.bin", "model.layers.42.self_attn.v_proj.weight": "pytorch_model-19-of-35.bin", "model.layers.42.self_attn.o_proj.weight": "pytorch_model-19-of-35.bin", "model.layers.42.self_attn.rotary_emb.inv_freq": "pytorch_model-19-of-35.bin", "model.layers.42.mlp.gate_proj.weight": "pytorch_model-19-of-35.bin", "model.layers.42.mlp.up_proj.weight": "pytorch_model-19-of-35.bin", "model.layers.42.mlp.down_proj.weight": "pytorch_model-19-of-35.bin", "model.layers.42.input_layernorm.weight": "pytorch_model-19-of-35.bin", "model.layers.42.post_attention_layernorm.weight": "pytorch_model-19-of-35.bin", "model.layers.43.self_attn.q_proj.weight": "pytorch_model-19-of-35.bin", "model.layers.43.self_attn.k_proj.weight": "pytorch_model-19-of-35.bin", "model.layers.43.self_attn.v_proj.weight": "pytorch_model-19-of-35.bin", "model.layers.43.self_attn.o_proj.weight": "pytorch_model-19-of-35.bin", "model.layers.43.self_attn.rotary_emb.inv_freq": "pytorch_model-19-of-35.bin", "model.layers.43.mlp.gate_proj.weight": "pytorch_model-19-of-35.bin", "model.layers.43.mlp.up_proj.weight": "pytorch_model-19-of-35.bin", "model.layers.43.mlp.down_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.43.input_layernorm.weight": "pytorch_model-20-of-35.bin", "model.layers.43.post_attention_layernorm.weight": "pytorch_model-20-of-35.bin", "model.layers.44.self_attn.q_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.44.self_attn.k_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.44.self_attn.v_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.44.self_attn.o_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.44.self_attn.rotary_emb.inv_freq": "pytorch_model-20-of-35.bin", "model.layers.44.mlp.gate_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.44.mlp.up_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.44.mlp.down_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.44.input_layernorm.weight": "pytorch_model-20-of-35.bin", "model.layers.44.post_attention_layernorm.weight": "pytorch_model-20-of-35.bin", "model.layers.45.self_attn.q_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.45.self_attn.k_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.45.self_attn.v_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.45.self_attn.o_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.45.self_attn.rotary_emb.inv_freq": "pytorch_model-20-of-35.bin", "model.layers.45.mlp.gate_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.45.mlp.up_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.45.mlp.down_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.45.input_layernorm.weight": "pytorch_model-20-of-35.bin", "model.layers.45.post_attention_layernorm.weight": "pytorch_model-20-of-35.bin", "model.layers.46.self_attn.q_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.46.self_attn.k_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.46.self_attn.v_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.46.self_attn.o_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.46.self_attn.rotary_emb.inv_freq": "pytorch_model-20-of-35.bin", "model.layers.46.mlp.gate_proj.weight": "pytorch_model-21-of-35.bin", "model.layers.46.mlp.up_proj.weight": "pytorch_model-21-of-35.bin", "model.layers.46.mlp.down_proj.weight": "pytorch_model-21-of-35.bin", "model.layers.46.input_layernorm.weight": "pytorch_model-21-of-35.bin", "model.layers.46.post_attention_layernorm.weight": "pytorch_model-21-of-35.bin", "model.layers.47.self_attn.q_proj.weight": "pytorch_model-21-of-35.bin", "model.layers.47.self_attn.k_proj.weight": "pytorch_model-21-of-35.bin", "model.layers.47.self_attn.v_proj.weight": "pytorch_model-21-of-35.bin", "model.layers.47.self_attn.o_proj.weight": "pytorch_model-21-of-35.bin", "model.layers.47.self_attn.rotary_emb.inv_freq": "pytorch_model-21-of-35.bin", "model.layers.47.mlp.gate_proj.weight": "pytorch_model-21-of-35.bin", "model.layers.47.mlp.up_proj.weight": "pytorch_model-21-of-35.bin", "model.layers.47.mlp.down_proj.weight": "pytorch_model-21-of-35.bin", "model.layers.47.input_layernorm.weight": "pytorch_model-21-of-35.bin", "model.layers.47.post_attention_layernorm.weight": "pytorch_model-21-of-35.bin", "model.layers.48.self_attn.q_proj.weight": "pytorch_model-21-of-35.bin", "model.layers.48.self_attn.k_proj.weight": "pytorch_model-21-of-35.bin", "model.layers.48.self_attn.v_proj.weight": "pytorch_model-21-of-35.bin", "model.layers.48.self_attn.o_proj.weight": "pytorch_model-21-of-35.bin", "model.layers.48.self_attn.rotary_emb.inv_freq": "pytorch_model-21-of-35.bin", "model.layers.48.mlp.gate_proj.weight": "pytorch_model-21-of-35.bin", "model.layers.48.mlp.up_proj.weight": "pytorch_model-22-of-35.bin", "model.layers.48.mlp.down_proj.weight": "pytorch_model-22-of-35.bin", "model.layers.48.input_layernorm.weight": "pytorch_model-22-of-35.bin", "model.layers.48.post_attention_layernorm.weight": "pytorch_model-22-of-35.bin", "model.layers.49.self_attn.q_proj.weight": "pytorch_model-22-of-35.bin", "model.layers.49.self_attn.k_proj.weight": "pytorch_model-22-of-35.bin", "model.layers.49.self_attn.v_proj.weight": "pytorch_model-22-of-35.bin", "model.layers.49.self_attn.o_proj.weight": "pytorch_model-22-of-35.bin", "model.layers.49.self_attn.rotary_emb.inv_freq": "pytorch_model-22-of-35.bin", "model.layers.49.mlp.gate_proj.weight": "pytorch_model-22-of-35.bin", "model.layers.49.mlp.up_proj.weight": "pytorch_model-22-of-35.bin", "model.layers.49.mlp.down_proj.weight": "pytorch_model-22-of-35.bin", "model.layers.49.input_layernorm.weight": "pytorch_model-22-of-35.bin", "model.layers.49.post_attention_layernorm.weight": "pytorch_model-22-of-35.bin", "model.layers.50.self_attn.q_proj.weight": "pytorch_model-22-of-35.bin", "model.layers.50.self_attn.k_proj.weight": "pytorch_model-22-of-35.bin", "model.layers.50.self_attn.v_proj.weight": "pytorch_model-22-of-35.bin", "model.layers.50.self_attn.o_proj.weight": "pytorch_model-22-of-35.bin", "model.layers.50.self_attn.rotary_emb.inv_freq": "pytorch_model-22-of-35.bin", "model.layers.50.mlp.gate_proj.weight": "pytorch_model-22-of-35.bin", "model.layers.50.mlp.up_proj.weight": "pytorch_model-22-of-35.bin", "model.layers.50.mlp.down_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.50.input_layernorm.weight": "pytorch_model-23-of-35.bin", "model.layers.50.post_attention_layernorm.weight": "pytorch_model-23-of-35.bin", "model.layers.51.self_attn.q_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.51.self_attn.k_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.51.self_attn.v_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.51.self_attn.o_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.51.self_attn.rotary_emb.inv_freq": "pytorch_model-23-of-35.bin", "model.layers.51.mlp.gate_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.51.mlp.up_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.51.mlp.down_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.51.input_layernorm.weight": "pytorch_model-23-of-35.bin", "model.layers.51.post_attention_layernorm.weight": "pytorch_model-23-of-35.bin", "model.layers.52.self_attn.q_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.52.self_attn.k_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.52.self_attn.v_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.52.self_attn.o_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.52.self_attn.rotary_emb.inv_freq": "pytorch_model-23-of-35.bin", "model.layers.52.mlp.gate_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.52.mlp.up_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.52.mlp.down_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.52.input_layernorm.weight": "pytorch_model-23-of-35.bin", "model.layers.52.post_attention_layernorm.weight": "pytorch_model-23-of-35.bin", "model.layers.53.self_attn.q_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.53.self_attn.k_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.53.self_attn.v_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.53.self_attn.o_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.53.self_attn.rotary_emb.inv_freq": "pytorch_model-23-of-35.bin", "model.layers.53.mlp.gate_proj.weight": "pytorch_model-24-of-35.bin", "model.layers.53.mlp.up_proj.weight": "pytorch_model-24-of-35.bin", "model.layers.53.mlp.down_proj.weight": "pytorch_model-24-of-35.bin", "model.layers.53.input_layernorm.weight": "pytorch_model-24-of-35.bin", "model.layers.53.post_attention_layernorm.weight": "pytorch_model-24-of-35.bin", "model.layers.54.self_attn.q_proj.weight": "pytorch_model-24-of-35.bin", "model.layers.54.self_attn.k_proj.weight": "pytorch_model-24-of-35.bin", "model.layers.54.self_attn.v_proj.weight": "pytorch_model-24-of-35.bin", "model.layers.54.self_attn.o_proj.weight": "pytorch_model-24-of-35.bin", "model.layers.54.self_attn.rotary_emb.inv_freq": "pytorch_model-24-of-35.bin", "model.layers.54.mlp.gate_proj.weight": "pytorch_model-24-of-35.bin", "model.layers.54.mlp.up_proj.weight": "pytorch_model-24-of-35.bin", "model.layers.54.mlp.down_proj.weight": "pytorch_model-24-of-35.bin", "model.layers.54.input_layernorm.weight": "pytorch_model-24-of-35.bin", "model.layers.54.post_attention_layernorm.weight": "pytorch_model-24-of-35.bin", "model.layers.55.self_attn.q_proj.weight": "pytorch_model-24-of-35.bin", "model.layers.55.self_attn.k_proj.weight": "pytorch_model-24-of-35.bin", "model.layers.55.self_attn.v_proj.weight": "pytorch_model-24-of-35.bin", "model.layers.55.self_attn.o_proj.weight": "pytorch_model-24-of-35.bin", "model.layers.55.self_attn.rotary_emb.inv_freq": "pytorch_model-24-of-35.bin", "model.layers.55.mlp.gate_proj.weight": "pytorch_model-24-of-35.bin", "model.layers.55.mlp.up_proj.weight": "pytorch_model-25-of-35.bin", "model.layers.55.mlp.down_proj.weight": "pytorch_model-25-of-35.bin", "model.layers.55.input_layernorm.weight": "pytorch_model-25-of-35.bin", "model.layers.55.post_attention_layernorm.weight": "pytorch_model-25-of-35.bin", "model.layers.56.self_attn.q_proj.weight": "pytorch_model-25-of-35.bin", "model.layers.56.self_attn.k_proj.weight": "pytorch_model-25-of-35.bin", "model.layers.56.self_attn.v_proj.weight": "pytorch_model-25-of-35.bin", "model.layers.56.self_attn.o_proj.weight": "pytorch_model-25-of-35.bin", "model.layers.56.self_attn.rotary_emb.inv_freq": "pytorch_model-25-of-35.bin", "model.layers.56.mlp.gate_proj.weight": "pytorch_model-25-of-35.bin", "model.layers.56.mlp.up_proj.weight": "pytorch_model-25-of-35.bin", "model.layers.56.mlp.down_proj.weight": "pytorch_model-25-of-35.bin", "model.layers.56.input_layernorm.weight": "pytorch_model-25-of-35.bin", "model.layers.56.post_attention_layernorm.weight": "pytorch_model-25-of-35.bin", "model.layers.57.self_attn.q_proj.weight": "pytorch_model-25-of-35.bin", "model.layers.57.self_attn.k_proj.weight": "pytorch_model-25-of-35.bin", "model.layers.57.self_attn.v_proj.weight": "pytorch_model-25-of-35.bin", "model.layers.57.self_attn.o_proj.weight": "pytorch_model-25-of-35.bin", "model.layers.57.self_attn.rotary_emb.inv_freq": "pytorch_model-25-of-35.bin", "model.layers.57.mlp.gate_proj.weight": "pytorch_model-25-of-35.bin", "model.layers.57.mlp.up_proj.weight": "pytorch_model-25-of-35.bin", "model.layers.57.mlp.down_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.57.input_layernorm.weight": "pytorch_model-26-of-35.bin", "model.layers.57.post_attention_layernorm.weight": "pytorch_model-26-of-35.bin", "model.layers.58.self_attn.q_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.58.self_attn.k_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.58.self_attn.v_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.58.self_attn.o_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.58.self_attn.rotary_emb.inv_freq": "pytorch_model-26-of-35.bin", "model.layers.58.mlp.gate_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.58.mlp.up_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.58.mlp.down_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.58.input_layernorm.weight": "pytorch_model-26-of-35.bin", "model.layers.58.post_attention_layernorm.weight": "pytorch_model-26-of-35.bin", "model.layers.59.self_attn.q_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.59.self_attn.k_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.59.self_attn.v_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.59.self_attn.o_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.59.self_attn.rotary_emb.inv_freq": "pytorch_model-26-of-35.bin", "model.layers.59.mlp.gate_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.59.mlp.up_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.59.mlp.down_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.59.input_layernorm.weight": "pytorch_model-26-of-35.bin", "model.layers.59.post_attention_layernorm.weight": "pytorch_model-26-of-35.bin", "model.layers.60.self_attn.q_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.60.self_attn.k_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.60.self_attn.v_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.60.self_attn.o_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.60.self_attn.rotary_emb.inv_freq": "pytorch_model-26-of-35.bin", "model.layers.60.mlp.gate_proj.weight": "pytorch_model-27-of-35.bin", "model.layers.60.mlp.up_proj.weight": "pytorch_model-27-of-35.bin", "model.layers.60.mlp.down_proj.weight": "pytorch_model-27-of-35.bin", "model.layers.60.input_layernorm.weight": "pytorch_model-27-of-35.bin", "model.layers.60.post_attention_layernorm.weight": "pytorch_model-27-of-35.bin", "model.layers.61.self_attn.q_proj.weight": "pytorch_model-27-of-35.bin", "model.layers.61.self_attn.k_proj.weight": "pytorch_model-27-of-35.bin", "model.layers.61.self_attn.v_proj.weight": "pytorch_model-27-of-35.bin", "model.layers.61.self_attn.o_proj.weight": "pytorch_model-27-of-35.bin", "model.layers.61.self_attn.rotary_emb.inv_freq": "pytorch_model-27-of-35.bin", "model.layers.61.mlp.gate_proj.weight": "pytorch_model-27-of-35.bin", "model.layers.61.mlp.up_proj.weight": "pytorch_model-27-of-35.bin", "model.layers.61.mlp.down_proj.weight": "pytorch_model-27-of-35.bin", "model.layers.61.input_layernorm.weight": "pytorch_model-27-of-35.bin", "model.layers.61.post_attention_layernorm.weight": "pytorch_model-27-of-35.bin", "model.layers.62.self_attn.q_proj.weight": "pytorch_model-27-of-35.bin", "model.layers.62.self_attn.k_proj.weight": "pytorch_model-27-of-35.bin", "model.layers.62.self_attn.v_proj.weight": "pytorch_model-27-of-35.bin", "model.layers.62.self_attn.o_proj.weight": "pytorch_model-27-of-35.bin", "model.layers.62.self_attn.rotary_emb.inv_freq": "pytorch_model-27-of-35.bin", "model.layers.62.mlp.gate_proj.weight": "pytorch_model-27-of-35.bin", "model.layers.62.mlp.up_proj.weight": "pytorch_model-28-of-35.bin", "model.layers.62.mlp.down_proj.weight": "pytorch_model-28-of-35.bin", "model.layers.62.input_layernorm.weight": "pytorch_model-28-of-35.bin", "model.layers.62.post_attention_layernorm.weight": "pytorch_model-28-of-35.bin", "model.layers.63.self_attn.q_proj.weight": "pytorch_model-28-of-35.bin", "model.layers.63.self_attn.k_proj.weight": "pytorch_model-28-of-35.bin", "model.layers.63.self_attn.v_proj.weight": "pytorch_model-28-of-35.bin", "model.layers.63.self_attn.o_proj.weight": "pytorch_model-28-of-35.bin", "model.layers.63.self_attn.rotary_emb.inv_freq": "pytorch_model-28-of-35.bin", "model.layers.63.mlp.gate_proj.weight": "pytorch_model-28-of-35.bin", "model.layers.63.mlp.up_proj.weight": "pytorch_model-28-of-35.bin", "model.layers.63.mlp.down_proj.weight": "pytorch_model-28-of-35.bin", "model.layers.63.input_layernorm.weight": "pytorch_model-28-of-35.bin", "model.layers.63.post_attention_layernorm.weight": "pytorch_model-28-of-35.bin", "model.layers.64.self_attn.q_proj.weight": "pytorch_model-28-of-35.bin", "model.layers.64.self_attn.k_proj.weight": "pytorch_model-28-of-35.bin", "model.layers.64.self_attn.v_proj.weight": "pytorch_model-28-of-35.bin", "model.layers.64.self_attn.o_proj.weight": "pytorch_model-28-of-35.bin", "model.layers.64.self_attn.rotary_emb.inv_freq": "pytorch_model-28-of-35.bin", "model.layers.64.mlp.gate_proj.weight": "pytorch_model-28-of-35.bin", "model.layers.64.mlp.up_proj.weight": "pytorch_model-28-of-35.bin", "model.layers.64.mlp.down_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.64.input_layernorm.weight": "pytorch_model-29-of-35.bin", "model.layers.64.post_attention_layernorm.weight": "pytorch_model-29-of-35.bin", "model.layers.65.self_attn.q_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.65.self_attn.k_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.65.self_attn.v_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.65.self_attn.o_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.65.self_attn.rotary_emb.inv_freq": "pytorch_model-29-of-35.bin", "model.layers.65.mlp.gate_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.65.mlp.up_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.65.mlp.down_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.65.input_layernorm.weight": "pytorch_model-29-of-35.bin", "model.layers.65.post_attention_layernorm.weight": "pytorch_model-29-of-35.bin", "model.layers.66.self_attn.q_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.66.self_attn.k_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.66.self_attn.v_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.66.self_attn.o_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.66.self_attn.rotary_emb.inv_freq": "pytorch_model-29-of-35.bin", "model.layers.66.mlp.gate_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.66.mlp.up_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.66.mlp.down_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.66.input_layernorm.weight": "pytorch_model-29-of-35.bin", "model.layers.66.post_attention_layernorm.weight": "pytorch_model-29-of-35.bin", "model.layers.67.self_attn.q_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.67.self_attn.k_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.67.self_attn.v_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.67.self_attn.o_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.67.self_attn.rotary_emb.inv_freq": "pytorch_model-29-of-35.bin", "model.layers.67.mlp.gate_proj.weight": "pytorch_model-30-of-35.bin", "model.layers.67.mlp.up_proj.weight": "pytorch_model-30-of-35.bin", "model.layers.67.mlp.down_proj.weight": "pytorch_model-30-of-35.bin", "model.layers.67.input_layernorm.weight": "pytorch_model-30-of-35.bin", "model.layers.67.post_attention_layernorm.weight": "pytorch_model-30-of-35.bin", "model.layers.68.self_attn.q_proj.weight": "pytorch_model-30-of-35.bin", "model.layers.68.self_attn.k_proj.weight": "pytorch_model-30-of-35.bin", "model.layers.68.self_attn.v_proj.weight": "pytorch_model-30-of-35.bin", "model.layers.68.self_attn.o_proj.weight": "pytorch_model-30-of-35.bin", "model.layers.68.self_attn.rotary_emb.inv_freq": "pytorch_model-30-of-35.bin", "model.layers.68.mlp.gate_proj.weight": "pytorch_model-30-of-35.bin", "model.layers.68.mlp.up_proj.weight": "pytorch_model-30-of-35.bin", "model.layers.68.mlp.down_proj.weight": "pytorch_model-30-of-35.bin", "model.layers.68.input_layernorm.weight": "pytorch_model-30-of-35.bin", "model.layers.68.post_attention_layernorm.weight": "pytorch_model-30-of-35.bin", "model.layers.69.self_attn.q_proj.weight": "pytorch_model-30-of-35.bin", "model.layers.69.self_attn.k_proj.weight": "pytorch_model-30-of-35.bin", "model.layers.69.self_attn.v_proj.weight": "pytorch_model-30-of-35.bin", "model.layers.69.self_attn.o_proj.weight": "pytorch_model-30-of-35.bin", "model.layers.69.self_attn.rotary_emb.inv_freq": "pytorch_model-30-of-35.bin", "model.layers.69.mlp.gate_proj.weight": "pytorch_model-30-of-35.bin", "model.layers.69.mlp.up_proj.weight": "pytorch_model-31-of-35.bin", "model.layers.69.mlp.down_proj.weight": "pytorch_model-31-of-35.bin", "model.layers.69.input_layernorm.weight": "pytorch_model-31-of-35.bin", "model.layers.69.post_attention_layernorm.weight": "pytorch_model-31-of-35.bin", "model.layers.70.self_attn.q_proj.weight": "pytorch_model-31-of-35.bin", "model.layers.70.self_attn.k_proj.weight": "pytorch_model-31-of-35.bin", "model.layers.70.self_attn.v_proj.weight": "pytorch_model-31-of-35.bin", "model.layers.70.self_attn.o_proj.weight": "pytorch_model-31-of-35.bin", "model.layers.70.self_attn.rotary_emb.inv_freq": "pytorch_model-31-of-35.bin", "model.layers.70.mlp.gate_proj.weight": "pytorch_model-31-of-35.bin", "model.layers.70.mlp.up_proj.weight": "pytorch_model-31-of-35.bin", "model.layers.70.mlp.down_proj.weight": "pytorch_model-31-of-35.bin", "model.layers.70.input_layernorm.weight": "pytorch_model-31-of-35.bin", "model.layers.70.post_attention_layernorm.weight": "pytorch_model-31-of-35.bin", "model.layers.71.self_attn.q_proj.weight": "pytorch_model-31-of-35.bin", "model.layers.71.self_attn.k_proj.weight": "pytorch_model-31-of-35.bin", "model.layers.71.self_attn.v_proj.weight": "pytorch_model-31-of-35.bin", "model.layers.71.self_attn.o_proj.weight": "pytorch_model-31-of-35.bin", "model.layers.71.self_attn.rotary_emb.inv_freq": "pytorch_model-31-of-35.bin", "model.layers.71.mlp.gate_proj.weight": "pytorch_model-31-of-35.bin", "model.layers.71.mlp.up_proj.weight": "pytorch_model-31-of-35.bin", "model.layers.71.mlp.down_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.71.input_layernorm.weight": "pytorch_model-32-of-35.bin", "model.layers.71.post_attention_layernorm.weight": "pytorch_model-32-of-35.bin", "model.layers.72.self_attn.q_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.72.self_attn.k_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.72.self_attn.v_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.72.self_attn.o_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.72.self_attn.rotary_emb.inv_freq": "pytorch_model-32-of-35.bin", "model.layers.72.mlp.gate_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.72.mlp.up_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.72.mlp.down_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.72.input_layernorm.weight": "pytorch_model-32-of-35.bin", "model.layers.72.post_attention_layernorm.weight": "pytorch_model-32-of-35.bin", "model.layers.73.self_attn.q_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.73.self_attn.k_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.73.self_attn.v_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.73.self_attn.o_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.73.self_attn.rotary_emb.inv_freq": "pytorch_model-32-of-35.bin", "model.layers.73.mlp.gate_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.73.mlp.up_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.73.mlp.down_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.73.input_layernorm.weight": "pytorch_model-32-of-35.bin", "model.layers.73.post_attention_layernorm.weight": "pytorch_model-32-of-35.bin", "model.layers.74.self_attn.q_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.74.self_attn.k_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.74.self_attn.v_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.74.self_attn.o_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.74.self_attn.rotary_emb.inv_freq": "pytorch_model-32-of-35.bin", "model.layers.74.mlp.gate_proj.weight": "pytorch_model-33-of-35.bin", "model.layers.74.mlp.up_proj.weight": "pytorch_model-33-of-35.bin", "model.layers.74.mlp.down_proj.weight": "pytorch_model-33-of-35.bin", "model.layers.74.input_layernorm.weight": "pytorch_model-33-of-35.bin", "model.layers.74.post_attention_layernorm.weight": "pytorch_model-33-of-35.bin", "model.layers.75.self_attn.q_proj.weight": "pytorch_model-33-of-35.bin", "model.layers.75.self_attn.k_proj.weight": "pytorch_model-33-of-35.bin", "model.layers.75.self_attn.v_proj.weight": "pytorch_model-33-of-35.bin", "model.layers.75.self_attn.o_proj.weight": "pytorch_model-33-of-35.bin", "model.layers.75.self_attn.rotary_emb.inv_freq": "pytorch_model-33-of-35.bin", "model.layers.75.mlp.gate_proj.weight": "pytorch_model-33-of-35.bin", "model.layers.75.mlp.up_proj.weight": "pytorch_model-33-of-35.bin", "model.layers.75.mlp.down_proj.weight": "pytorch_model-33-of-35.bin", "model.layers.75.input_layernorm.weight": "pytorch_model-33-of-35.bin", "model.layers.75.post_attention_layernorm.weight": "pytorch_model-33-of-35.bin", "model.layers.76.self_attn.q_proj.weight": "pytorch_model-33-of-35.bin", "model.layers.76.self_attn.k_proj.weight": "pytorch_model-33-of-35.bin", "model.layers.76.self_attn.v_proj.weight": "pytorch_model-33-of-35.bin", "model.layers.76.self_attn.o_proj.weight": "pytorch_model-33-of-35.bin", "model.layers.76.self_attn.rotary_emb.inv_freq": "pytorch_model-33-of-35.bin", "model.layers.76.mlp.gate_proj.weight": "pytorch_model-33-of-35.bin", "model.layers.76.mlp.up_proj.weight": "pytorch_model-34-of-35.bin", "model.layers.76.mlp.down_proj.weight": "pytorch_model-34-of-35.bin", "model.layers.76.input_layernorm.weight": "pytorch_model-34-of-35.bin", "model.layers.76.post_attention_layernorm.weight": "pytorch_model-34-of-35.bin", "model.layers.77.self_attn.q_proj.weight": "pytorch_model-34-of-35.bin", "model.layers.77.self_attn.k_proj.weight": "pytorch_model-34-of-35.bin", "model.layers.77.self_attn.v_proj.weight": "pytorch_model-34-of-35.bin", "model.layers.77.self_attn.o_proj.weight": "pytorch_model-34-of-35.bin", "model.layers.77.self_attn.rotary_emb.inv_freq": "pytorch_model-34-of-35.bin", "model.layers.77.mlp.gate_proj.weight": "pytorch_model-34-of-35.bin", "model.layers.77.mlp.up_proj.weight": "pytorch_model-34-of-35.bin", "model.layers.77.mlp.down_proj.weight": "pytorch_model-34-of-35.bin", "model.layers.77.input_layernorm.weight": "pytorch_model-34-of-35.bin", "model.layers.77.post_attention_layernorm.weight": "pytorch_model-34-of-35.bin", "model.layers.78.self_attn.q_proj.weight": "pytorch_model-34-of-35.bin", "model.layers.78.self_attn.k_proj.weight": "pytorch_model-34-of-35.bin", "model.layers.78.self_attn.v_proj.weight": "pytorch_model-34-of-35.bin", "model.layers.78.self_attn.o_proj.weight": "pytorch_model-34-of-35.bin", "model.layers.78.self_attn.rotary_emb.inv_freq": "pytorch_model-34-of-35.bin", "model.layers.78.mlp.gate_proj.weight": "pytorch_model-34-of-35.bin", "model.layers.78.mlp.up_proj.weight": "pytorch_model-34-of-35.bin", "model.layers.78.mlp.down_proj.weight": "pytorch_model-35-of-35.bin", "model.layers.78.input_layernorm.weight": "pytorch_model-35-of-35.bin", "model.layers.78.post_attention_layernorm.weight": "pytorch_model-35-of-35.bin", "model.layers.79.self_attn.q_proj.weight": "pytorch_model-35-of-35.bin", "model.layers.79.self_attn.k_proj.weight": "pytorch_model-35-of-35.bin", "model.layers.79.self_attn.v_proj.weight": "pytorch_model-35-of-35.bin", "model.layers.79.self_attn.o_proj.weight": "pytorch_model-35-of-35.bin", "model.layers.79.self_attn.rotary_emb.inv_freq": "pytorch_model-35-of-35.bin", "model.layers.79.mlp.gate_proj.weight": "pytorch_model-35-of-35.bin", "model.layers.79.mlp.up_proj.weight": "pytorch_model-35-of-35.bin", "model.layers.79.mlp.down_proj.weight": "pytorch_model-35-of-35.bin", "model.layers.79.input_layernorm.weight": "pytorch_model-35-of-35.bin", "model.layers.79.post_attention_layernorm.weight": "pytorch_model-35-of-35.bin", "model.norm.weight": "pytorch_model-35-of-35.bin", "lm_head.weight": "pytorch_model-35-of-35.bin"}}
pytorch_model.bin.sambatensor_index.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model.embed_tokens.weight": "llamaforcausallm__model__embed_tokens__weight", "model.layers.0.self_attn.q_proj.weight": "llamaforcausallm__model__layers__0__self_attn__q_proj__weight", "model.layers.0.self_attn.k_proj.weight": "llamaforcausallm__model__layers__0__self_attn__k_proj__weight", "model.layers.0.self_attn.v_proj.weight": "llamaforcausallm__model__layers__0__self_attn__v_proj__weight", "model.layers.0.self_attn.o_proj.weight": "llamaforcausallm__model__layers__0__self_attn__o_proj__weight", "model.layers.0.mlp.gate_proj.weight": "llamaforcausallm__model__layers__0__mlp__gate_proj__weight", "model.layers.0.mlp.up_proj.weight": "llamaforcausallm__model__layers__0__mlp__up_proj__weight", "model.layers.0.mlp.down_proj.weight": "llamaforcausallm__model__layers__0__mlp__down_proj__weight", "model.layers.0.input_layernorm.weight": "llamaforcausallm__model__layers__0__input_layernorm__weight", "model.layers.0.post_attention_layernorm.weight": "llamaforcausallm__model__layers__0__post_attention_layernorm__weight", "model.layers.1.self_attn.q_proj.weight": "llamaforcausallm__model__layers__1__self_attn__q_proj__weight", "model.layers.1.self_attn.k_proj.weight": "llamaforcausallm__model__layers__1__self_attn__k_proj__weight", "model.layers.1.self_attn.v_proj.weight": "llamaforcausallm__model__layers__1__self_attn__v_proj__weight", "model.layers.1.self_attn.o_proj.weight": "llamaforcausallm__model__layers__1__self_attn__o_proj__weight", "model.layers.1.mlp.gate_proj.weight": "llamaforcausallm__model__layers__1__mlp__gate_proj__weight", "model.layers.1.mlp.up_proj.weight": "llamaforcausallm__model__layers__1__mlp__up_proj__weight", "model.layers.1.mlp.down_proj.weight": "llamaforcausallm__model__layers__1__mlp__down_proj__weight", "model.layers.1.input_layernorm.weight": "llamaforcausallm__model__layers__1__input_layernorm__weight", "model.layers.1.post_attention_layernorm.weight": "llamaforcausallm__model__layers__1__post_attention_layernorm__weight", "model.layers.2.self_attn.q_proj.weight": "llamaforcausallm__model__layers__2__self_attn__q_proj__weight", "model.layers.2.self_attn.k_proj.weight": "llamaforcausallm__model__layers__2__self_attn__k_proj__weight", "model.layers.2.self_attn.v_proj.weight": "llamaforcausallm__model__layers__2__self_attn__v_proj__weight", "model.layers.2.self_attn.o_proj.weight": "llamaforcausallm__model__layers__2__self_attn__o_proj__weight", "model.layers.2.mlp.gate_proj.weight": "llamaforcausallm__model__layers__2__mlp__gate_proj__weight", "model.layers.2.mlp.up_proj.weight": "llamaforcausallm__model__layers__2__mlp__up_proj__weight", "model.layers.2.mlp.down_proj.weight": "llamaforcausallm__model__layers__2__mlp__down_proj__weight", "model.layers.2.input_layernorm.weight": "llamaforcausallm__model__layers__2__input_layernorm__weight", "model.layers.2.post_attention_layernorm.weight": "llamaforcausallm__model__layers__2__post_attention_layernorm__weight", "model.layers.3.self_attn.q_proj.weight": "llamaforcausallm__model__layers__3__self_attn__q_proj__weight", "model.layers.3.self_attn.k_proj.weight": "llamaforcausallm__model__layers__3__self_attn__k_proj__weight", "model.layers.3.self_attn.v_proj.weight": "llamaforcausallm__model__layers__3__self_attn__v_proj__weight", "model.layers.3.self_attn.o_proj.weight": "llamaforcausallm__model__layers__3__self_attn__o_proj__weight", "model.layers.3.mlp.gate_proj.weight": "llamaforcausallm__model__layers__3__mlp__gate_proj__weight", "model.layers.3.mlp.up_proj.weight": "llamaforcausallm__model__layers__3__mlp__up_proj__weight", "model.layers.3.mlp.down_proj.weight": "llamaforcausallm__model__layers__3__mlp__down_proj__weight", "model.layers.3.input_layernorm.weight": "llamaforcausallm__model__layers__3__input_layernorm__weight", "model.layers.3.post_attention_layernorm.weight": "llamaforcausallm__model__layers__3__post_attention_layernorm__weight", "model.layers.4.self_attn.q_proj.weight": "llamaforcausallm__model__layers__4__self_attn__q_proj__weight", "model.layers.4.self_attn.k_proj.weight": "llamaforcausallm__model__layers__4__self_attn__k_proj__weight", "model.layers.4.self_attn.v_proj.weight": "llamaforcausallm__model__layers__4__self_attn__v_proj__weight", "model.layers.4.self_attn.o_proj.weight": "llamaforcausallm__model__layers__4__self_attn__o_proj__weight", "model.layers.4.mlp.gate_proj.weight": "llamaforcausallm__model__layers__4__mlp__gate_proj__weight", "model.layers.4.mlp.up_proj.weight": "llamaforcausallm__model__layers__4__mlp__up_proj__weight", "model.layers.4.mlp.down_proj.weight": "llamaforcausallm__model__layers__4__mlp__down_proj__weight", "model.layers.4.input_layernorm.weight": "llamaforcausallm__model__layers__4__input_layernorm__weight", "model.layers.4.post_attention_layernorm.weight": "llamaforcausallm__model__layers__4__post_attention_layernorm__weight", "model.layers.5.self_attn.q_proj.weight": "llamaforcausallm__model__layers__5__self_attn__q_proj__weight", "model.layers.5.self_attn.k_proj.weight": "llamaforcausallm__model__layers__5__self_attn__k_proj__weight", "model.layers.5.self_attn.v_proj.weight": "llamaforcausallm__model__layers__5__self_attn__v_proj__weight", "model.layers.5.self_attn.o_proj.weight": "llamaforcausallm__model__layers__5__self_attn__o_proj__weight", "model.layers.5.mlp.gate_proj.weight": "llamaforcausallm__model__layers__5__mlp__gate_proj__weight", "model.layers.5.mlp.up_proj.weight": "llamaforcausallm__model__layers__5__mlp__up_proj__weight", "model.layers.5.mlp.down_proj.weight": "llamaforcausallm__model__layers__5__mlp__down_proj__weight", "model.layers.5.input_layernorm.weight": "llamaforcausallm__model__layers__5__input_layernorm__weight", "model.layers.5.post_attention_layernorm.weight": "llamaforcausallm__model__layers__5__post_attention_layernorm__weight", "model.layers.6.self_attn.q_proj.weight": "llamaforcausallm__model__layers__6__self_attn__q_proj__weight", "model.layers.6.self_attn.k_proj.weight": "llamaforcausallm__model__layers__6__self_attn__k_proj__weight", "model.layers.6.self_attn.v_proj.weight": "llamaforcausallm__model__layers__6__self_attn__v_proj__weight", "model.layers.6.self_attn.o_proj.weight": "llamaforcausallm__model__layers__6__self_attn__o_proj__weight", "model.layers.6.mlp.gate_proj.weight": "llamaforcausallm__model__layers__6__mlp__gate_proj__weight", "model.layers.6.mlp.up_proj.weight": "llamaforcausallm__model__layers__6__mlp__up_proj__weight", "model.layers.6.mlp.down_proj.weight": "llamaforcausallm__model__layers__6__mlp__down_proj__weight", "model.layers.6.input_layernorm.weight": "llamaforcausallm__model__layers__6__input_layernorm__weight", "model.layers.6.post_attention_layernorm.weight": "llamaforcausallm__model__layers__6__post_attention_layernorm__weight", "model.layers.7.self_attn.q_proj.weight": "llamaforcausallm__model__layers__7__self_attn__q_proj__weight", "model.layers.7.self_attn.k_proj.weight": "llamaforcausallm__model__layers__7__self_attn__k_proj__weight", "model.layers.7.self_attn.v_proj.weight": "llamaforcausallm__model__layers__7__self_attn__v_proj__weight", "model.layers.7.self_attn.o_proj.weight": "llamaforcausallm__model__layers__7__self_attn__o_proj__weight", "model.layers.7.mlp.gate_proj.weight": "llamaforcausallm__model__layers__7__mlp__gate_proj__weight", "model.layers.7.mlp.up_proj.weight": "llamaforcausallm__model__layers__7__mlp__up_proj__weight", "model.layers.7.mlp.down_proj.weight": "llamaforcausallm__model__layers__7__mlp__down_proj__weight", "model.layers.7.input_layernorm.weight": "llamaforcausallm__model__layers__7__input_layernorm__weight", "model.layers.7.post_attention_layernorm.weight": "llamaforcausallm__model__layers__7__post_attention_layernorm__weight", "model.layers.8.self_attn.q_proj.weight": "llamaforcausallm__model__layers__8__self_attn__q_proj__weight", "model.layers.8.self_attn.k_proj.weight": "llamaforcausallm__model__layers__8__self_attn__k_proj__weight", "model.layers.8.self_attn.v_proj.weight": "llamaforcausallm__model__layers__8__self_attn__v_proj__weight", "model.layers.8.self_attn.o_proj.weight": "llamaforcausallm__model__layers__8__self_attn__o_proj__weight", "model.layers.8.mlp.gate_proj.weight": "llamaforcausallm__model__layers__8__mlp__gate_proj__weight", "model.layers.8.mlp.up_proj.weight": "llamaforcausallm__model__layers__8__mlp__up_proj__weight", "model.layers.8.mlp.down_proj.weight": "llamaforcausallm__model__layers__8__mlp__down_proj__weight", "model.layers.8.input_layernorm.weight": "llamaforcausallm__model__layers__8__input_layernorm__weight", "model.layers.8.post_attention_layernorm.weight": "llamaforcausallm__model__layers__8__post_attention_layernorm__weight", "model.layers.9.self_attn.q_proj.weight": "llamaforcausallm__model__layers__9__self_attn__q_proj__weight", "model.layers.9.self_attn.k_proj.weight": "llamaforcausallm__model__layers__9__self_attn__k_proj__weight", "model.layers.9.self_attn.v_proj.weight": "llamaforcausallm__model__layers__9__self_attn__v_proj__weight", "model.layers.9.self_attn.o_proj.weight": "llamaforcausallm__model__layers__9__self_attn__o_proj__weight", "model.layers.9.mlp.gate_proj.weight": "llamaforcausallm__model__layers__9__mlp__gate_proj__weight", "model.layers.9.mlp.up_proj.weight": "llamaforcausallm__model__layers__9__mlp__up_proj__weight", "model.layers.9.mlp.down_proj.weight": "llamaforcausallm__model__layers__9__mlp__down_proj__weight", "model.layers.9.input_layernorm.weight": "llamaforcausallm__model__layers__9__input_layernorm__weight", "model.layers.9.post_attention_layernorm.weight": "llamaforcausallm__model__layers__9__post_attention_layernorm__weight", "model.layers.10.self_attn.q_proj.weight": "llamaforcausallm__model__layers__10__self_attn__q_proj__weight", "model.layers.10.self_attn.k_proj.weight": "llamaforcausallm__model__layers__10__self_attn__k_proj__weight", "model.layers.10.self_attn.v_proj.weight": "llamaforcausallm__model__layers__10__self_attn__v_proj__weight", "model.layers.10.self_attn.o_proj.weight": "llamaforcausallm__model__layers__10__self_attn__o_proj__weight", "model.layers.10.mlp.gate_proj.weight": "llamaforcausallm__model__layers__10__mlp__gate_proj__weight", "model.layers.10.mlp.up_proj.weight": "llamaforcausallm__model__layers__10__mlp__up_proj__weight", "model.layers.10.mlp.down_proj.weight": "llamaforcausallm__model__layers__10__mlp__down_proj__weight", "model.layers.10.input_layernorm.weight": "llamaforcausallm__model__layers__10__input_layernorm__weight", "model.layers.10.post_attention_layernorm.weight": "llamaforcausallm__model__layers__10__post_attention_layernorm__weight", "model.layers.11.self_attn.q_proj.weight": "llamaforcausallm__model__layers__11__self_attn__q_proj__weight", "model.layers.11.self_attn.k_proj.weight": "llamaforcausallm__model__layers__11__self_attn__k_proj__weight", "model.layers.11.self_attn.v_proj.weight": "llamaforcausallm__model__layers__11__self_attn__v_proj__weight", "model.layers.11.self_attn.o_proj.weight": "llamaforcausallm__model__layers__11__self_attn__o_proj__weight", "model.layers.11.mlp.gate_proj.weight": "llamaforcausallm__model__layers__11__mlp__gate_proj__weight", "model.layers.11.mlp.up_proj.weight": "llamaforcausallm__model__layers__11__mlp__up_proj__weight", "model.layers.11.mlp.down_proj.weight": "llamaforcausallm__model__layers__11__mlp__down_proj__weight", "model.layers.11.input_layernorm.weight": "llamaforcausallm__model__layers__11__input_layernorm__weight", "model.layers.11.post_attention_layernorm.weight": "llamaforcausallm__model__layers__11__post_attention_layernorm__weight", "model.layers.12.self_attn.q_proj.weight": "llamaforcausallm__model__layers__12__self_attn__q_proj__weight", "model.layers.12.self_attn.k_proj.weight": "llamaforcausallm__model__layers__12__self_attn__k_proj__weight", "model.layers.12.self_attn.v_proj.weight": "llamaforcausallm__model__layers__12__self_attn__v_proj__weight", "model.layers.12.self_attn.o_proj.weight": "llamaforcausallm__model__layers__12__self_attn__o_proj__weight", "model.layers.12.mlp.gate_proj.weight": "llamaforcausallm__model__layers__12__mlp__gate_proj__weight", "model.layers.12.mlp.up_proj.weight": "llamaforcausallm__model__layers__12__mlp__up_proj__weight", "model.layers.12.mlp.down_proj.weight": "llamaforcausallm__model__layers__12__mlp__down_proj__weight", "model.layers.12.input_layernorm.weight": "llamaforcausallm__model__layers__12__input_layernorm__weight", "model.layers.12.post_attention_layernorm.weight": "llamaforcausallm__model__layers__12__post_attention_layernorm__weight", "model.layers.13.self_attn.q_proj.weight": "llamaforcausallm__model__layers__13__self_attn__q_proj__weight", "model.layers.13.self_attn.k_proj.weight": "llamaforcausallm__model__layers__13__self_attn__k_proj__weight", "model.layers.13.self_attn.v_proj.weight": "llamaforcausallm__model__layers__13__self_attn__v_proj__weight", "model.layers.13.self_attn.o_proj.weight": "llamaforcausallm__model__layers__13__self_attn__o_proj__weight", "model.layers.13.mlp.gate_proj.weight": "llamaforcausallm__model__layers__13__mlp__gate_proj__weight", "model.layers.13.mlp.up_proj.weight": "llamaforcausallm__model__layers__13__mlp__up_proj__weight", "model.layers.13.mlp.down_proj.weight": "llamaforcausallm__model__layers__13__mlp__down_proj__weight", "model.layers.13.input_layernorm.weight": "llamaforcausallm__model__layers__13__input_layernorm__weight", "model.layers.13.post_attention_layernorm.weight": "llamaforcausallm__model__layers__13__post_attention_layernorm__weight", "model.layers.14.self_attn.q_proj.weight": "llamaforcausallm__model__layers__14__self_attn__q_proj__weight", "model.layers.14.self_attn.k_proj.weight": "llamaforcausallm__model__layers__14__self_attn__k_proj__weight", "model.layers.14.self_attn.v_proj.weight": "llamaforcausallm__model__layers__14__self_attn__v_proj__weight", "model.layers.14.self_attn.o_proj.weight": "llamaforcausallm__model__layers__14__self_attn__o_proj__weight", "model.layers.14.mlp.gate_proj.weight": "llamaforcausallm__model__layers__14__mlp__gate_proj__weight", "model.layers.14.mlp.up_proj.weight": "llamaforcausallm__model__layers__14__mlp__up_proj__weight", "model.layers.14.mlp.down_proj.weight": "llamaforcausallm__model__layers__14__mlp__down_proj__weight", "model.layers.14.input_layernorm.weight": "llamaforcausallm__model__layers__14__input_layernorm__weight", "model.layers.14.post_attention_layernorm.weight": "llamaforcausallm__model__layers__14__post_attention_layernorm__weight", "model.layers.15.self_attn.q_proj.weight": "llamaforcausallm__model__layers__15__self_attn__q_proj__weight", "model.layers.15.self_attn.k_proj.weight": "llamaforcausallm__model__layers__15__self_attn__k_proj__weight", "model.layers.15.self_attn.v_proj.weight": "llamaforcausallm__model__layers__15__self_attn__v_proj__weight", "model.layers.15.self_attn.o_proj.weight": "llamaforcausallm__model__layers__15__self_attn__o_proj__weight", "model.layers.15.mlp.gate_proj.weight": "llamaforcausallm__model__layers__15__mlp__gate_proj__weight", "model.layers.15.mlp.up_proj.weight": "llamaforcausallm__model__layers__15__mlp__up_proj__weight", "model.layers.15.mlp.down_proj.weight": "llamaforcausallm__model__layers__15__mlp__down_proj__weight", "model.layers.15.input_layernorm.weight": "llamaforcausallm__model__layers__15__input_layernorm__weight", "model.layers.15.post_attention_layernorm.weight": "llamaforcausallm__model__layers__15__post_attention_layernorm__weight", "model.layers.16.self_attn.q_proj.weight": "llamaforcausallm__model__layers__16__self_attn__q_proj__weight", "model.layers.16.self_attn.k_proj.weight": "llamaforcausallm__model__layers__16__self_attn__k_proj__weight", "model.layers.16.self_attn.v_proj.weight": "llamaforcausallm__model__layers__16__self_attn__v_proj__weight", "model.layers.16.self_attn.o_proj.weight": "llamaforcausallm__model__layers__16__self_attn__o_proj__weight", "model.layers.16.mlp.gate_proj.weight": "llamaforcausallm__model__layers__16__mlp__gate_proj__weight", "model.layers.16.mlp.up_proj.weight": "llamaforcausallm__model__layers__16__mlp__up_proj__weight", "model.layers.16.mlp.down_proj.weight": "llamaforcausallm__model__layers__16__mlp__down_proj__weight", "model.layers.16.input_layernorm.weight": "llamaforcausallm__model__layers__16__input_layernorm__weight", "model.layers.16.post_attention_layernorm.weight": "llamaforcausallm__model__layers__16__post_attention_layernorm__weight", "model.layers.17.self_attn.q_proj.weight": "llamaforcausallm__model__layers__17__self_attn__q_proj__weight", "model.layers.17.self_attn.k_proj.weight": "llamaforcausallm__model__layers__17__self_attn__k_proj__weight", "model.layers.17.self_attn.v_proj.weight": "llamaforcausallm__model__layers__17__self_attn__v_proj__weight", "model.layers.17.self_attn.o_proj.weight": "llamaforcausallm__model__layers__17__self_attn__o_proj__weight", "model.layers.17.mlp.gate_proj.weight": "llamaforcausallm__model__layers__17__mlp__gate_proj__weight", "model.layers.17.mlp.up_proj.weight": "llamaforcausallm__model__layers__17__mlp__up_proj__weight", "model.layers.17.mlp.down_proj.weight": "llamaforcausallm__model__layers__17__mlp__down_proj__weight", "model.layers.17.input_layernorm.weight": "llamaforcausallm__model__layers__17__input_layernorm__weight", "model.layers.17.post_attention_layernorm.weight": "llamaforcausallm__model__layers__17__post_attention_layernorm__weight", "model.layers.18.self_attn.q_proj.weight": "llamaforcausallm__model__layers__18__self_attn__q_proj__weight", "model.layers.18.self_attn.k_proj.weight": "llamaforcausallm__model__layers__18__self_attn__k_proj__weight", "model.layers.18.self_attn.v_proj.weight": "llamaforcausallm__model__layers__18__self_attn__v_proj__weight", "model.layers.18.self_attn.o_proj.weight": "llamaforcausallm__model__layers__18__self_attn__o_proj__weight", "model.layers.18.mlp.gate_proj.weight": "llamaforcausallm__model__layers__18__mlp__gate_proj__weight", "model.layers.18.mlp.up_proj.weight": "llamaforcausallm__model__layers__18__mlp__up_proj__weight", "model.layers.18.mlp.down_proj.weight": "llamaforcausallm__model__layers__18__mlp__down_proj__weight", "model.layers.18.input_layernorm.weight": "llamaforcausallm__model__layers__18__input_layernorm__weight", "model.layers.18.post_attention_layernorm.weight": "llamaforcausallm__model__layers__18__post_attention_layernorm__weight", "model.layers.19.self_attn.q_proj.weight": "llamaforcausallm__model__layers__19__self_attn__q_proj__weight", "model.layers.19.self_attn.k_proj.weight": "llamaforcausallm__model__layers__19__self_attn__k_proj__weight", "model.layers.19.self_attn.v_proj.weight": "llamaforcausallm__model__layers__19__self_attn__v_proj__weight", "model.layers.19.self_attn.o_proj.weight": "llamaforcausallm__model__layers__19__self_attn__o_proj__weight", "model.layers.19.mlp.gate_proj.weight": "llamaforcausallm__model__layers__19__mlp__gate_proj__weight", "model.layers.19.mlp.up_proj.weight": "llamaforcausallm__model__layers__19__mlp__up_proj__weight", "model.layers.19.mlp.down_proj.weight": "llamaforcausallm__model__layers__19__mlp__down_proj__weight", "model.layers.19.input_layernorm.weight": "llamaforcausallm__model__layers__19__input_layernorm__weight", "model.layers.19.post_attention_layernorm.weight": "llamaforcausallm__model__layers__19__post_attention_layernorm__weight", "model.layers.20.self_attn.q_proj.weight": "llamaforcausallm__model__layers__20__self_attn__q_proj__weight", "model.layers.20.self_attn.k_proj.weight": "llamaforcausallm__model__layers__20__self_attn__k_proj__weight", "model.layers.20.self_attn.v_proj.weight": "llamaforcausallm__model__layers__20__self_attn__v_proj__weight", "model.layers.20.self_attn.o_proj.weight": "llamaforcausallm__model__layers__20__self_attn__o_proj__weight", "model.layers.20.mlp.gate_proj.weight": "llamaforcausallm__model__layers__20__mlp__gate_proj__weight", "model.layers.20.mlp.up_proj.weight": "llamaforcausallm__model__layers__20__mlp__up_proj__weight", "model.layers.20.mlp.down_proj.weight": "llamaforcausallm__model__layers__20__mlp__down_proj__weight", "model.layers.20.input_layernorm.weight": "llamaforcausallm__model__layers__20__input_layernorm__weight", "model.layers.20.post_attention_layernorm.weight": "llamaforcausallm__model__layers__20__post_attention_layernorm__weight", "model.layers.21.self_attn.q_proj.weight": "llamaforcausallm__model__layers__21__self_attn__q_proj__weight", "model.layers.21.self_attn.k_proj.weight": "llamaforcausallm__model__layers__21__self_attn__k_proj__weight", "model.layers.21.self_attn.v_proj.weight": "llamaforcausallm__model__layers__21__self_attn__v_proj__weight", "model.layers.21.self_attn.o_proj.weight": "llamaforcausallm__model__layers__21__self_attn__o_proj__weight", "model.layers.21.mlp.gate_proj.weight": "llamaforcausallm__model__layers__21__mlp__gate_proj__weight", "model.layers.21.mlp.up_proj.weight": "llamaforcausallm__model__layers__21__mlp__up_proj__weight", "model.layers.21.mlp.down_proj.weight": "llamaforcausallm__model__layers__21__mlp__down_proj__weight", "model.layers.21.input_layernorm.weight": "llamaforcausallm__model__layers__21__input_layernorm__weight", "model.layers.21.post_attention_layernorm.weight": "llamaforcausallm__model__layers__21__post_attention_layernorm__weight", "model.layers.22.self_attn.q_proj.weight": "llamaforcausallm__model__layers__22__self_attn__q_proj__weight", "model.layers.22.self_attn.k_proj.weight": "llamaforcausallm__model__layers__22__self_attn__k_proj__weight", "model.layers.22.self_attn.v_proj.weight": "llamaforcausallm__model__layers__22__self_attn__v_proj__weight", "model.layers.22.self_attn.o_proj.weight": "llamaforcausallm__model__layers__22__self_attn__o_proj__weight", "model.layers.22.mlp.gate_proj.weight": "llamaforcausallm__model__layers__22__mlp__gate_proj__weight", "model.layers.22.mlp.up_proj.weight": "llamaforcausallm__model__layers__22__mlp__up_proj__weight", "model.layers.22.mlp.down_proj.weight": "llamaforcausallm__model__layers__22__mlp__down_proj__weight", "model.layers.22.input_layernorm.weight": "llamaforcausallm__model__layers__22__input_layernorm__weight", "model.layers.22.post_attention_layernorm.weight": "llamaforcausallm__model__layers__22__post_attention_layernorm__weight", "model.layers.23.self_attn.q_proj.weight": "llamaforcausallm__model__layers__23__self_attn__q_proj__weight", "model.layers.23.self_attn.k_proj.weight": "llamaforcausallm__model__layers__23__self_attn__k_proj__weight", "model.layers.23.self_attn.v_proj.weight": "llamaforcausallm__model__layers__23__self_attn__v_proj__weight", "model.layers.23.self_attn.o_proj.weight": "llamaforcausallm__model__layers__23__self_attn__o_proj__weight", "model.layers.23.mlp.gate_proj.weight": "llamaforcausallm__model__layers__23__mlp__gate_proj__weight", "model.layers.23.mlp.up_proj.weight": "llamaforcausallm__model__layers__23__mlp__up_proj__weight", "model.layers.23.mlp.down_proj.weight": "llamaforcausallm__model__layers__23__mlp__down_proj__weight", "model.layers.23.input_layernorm.weight": "llamaforcausallm__model__layers__23__input_layernorm__weight", "model.layers.23.post_attention_layernorm.weight": "llamaforcausallm__model__layers__23__post_attention_layernorm__weight", "model.layers.24.self_attn.q_proj.weight": "llamaforcausallm__model__layers__24__self_attn__q_proj__weight", "model.layers.24.self_attn.k_proj.weight": "llamaforcausallm__model__layers__24__self_attn__k_proj__weight", "model.layers.24.self_attn.v_proj.weight": "llamaforcausallm__model__layers__24__self_attn__v_proj__weight", "model.layers.24.self_attn.o_proj.weight": "llamaforcausallm__model__layers__24__self_attn__o_proj__weight", "model.layers.24.mlp.gate_proj.weight": "llamaforcausallm__model__layers__24__mlp__gate_proj__weight", "model.layers.24.mlp.up_proj.weight": "llamaforcausallm__model__layers__24__mlp__up_proj__weight", "model.layers.24.mlp.down_proj.weight": "llamaforcausallm__model__layers__24__mlp__down_proj__weight", "model.layers.24.input_layernorm.weight": "llamaforcausallm__model__layers__24__input_layernorm__weight", "model.layers.24.post_attention_layernorm.weight": "llamaforcausallm__model__layers__24__post_attention_layernorm__weight", "model.layers.25.self_attn.q_proj.weight": "llamaforcausallm__model__layers__25__self_attn__q_proj__weight", "model.layers.25.self_attn.k_proj.weight": "llamaforcausallm__model__layers__25__self_attn__k_proj__weight", "model.layers.25.self_attn.v_proj.weight": "llamaforcausallm__model__layers__25__self_attn__v_proj__weight", "model.layers.25.self_attn.o_proj.weight": "llamaforcausallm__model__layers__25__self_attn__o_proj__weight", "model.layers.25.mlp.gate_proj.weight": "llamaforcausallm__model__layers__25__mlp__gate_proj__weight", "model.layers.25.mlp.up_proj.weight": "llamaforcausallm__model__layers__25__mlp__up_proj__weight", "model.layers.25.mlp.down_proj.weight": "llamaforcausallm__model__layers__25__mlp__down_proj__weight", "model.layers.25.input_layernorm.weight": "llamaforcausallm__model__layers__25__input_layernorm__weight", "model.layers.25.post_attention_layernorm.weight": "llamaforcausallm__model__layers__25__post_attention_layernorm__weight", "model.layers.26.self_attn.q_proj.weight": "llamaforcausallm__model__layers__26__self_attn__q_proj__weight", "model.layers.26.self_attn.k_proj.weight": "llamaforcausallm__model__layers__26__self_attn__k_proj__weight", "model.layers.26.self_attn.v_proj.weight": "llamaforcausallm__model__layers__26__self_attn__v_proj__weight", "model.layers.26.self_attn.o_proj.weight": "llamaforcausallm__model__layers__26__self_attn__o_proj__weight", "model.layers.26.mlp.gate_proj.weight": "llamaforcausallm__model__layers__26__mlp__gate_proj__weight", "model.layers.26.mlp.up_proj.weight": "llamaforcausallm__model__layers__26__mlp__up_proj__weight", "model.layers.26.mlp.down_proj.weight": "llamaforcausallm__model__layers__26__mlp__down_proj__weight", "model.layers.26.input_layernorm.weight": "llamaforcausallm__model__layers__26__input_layernorm__weight", "model.layers.26.post_attention_layernorm.weight": "llamaforcausallm__model__layers__26__post_attention_layernorm__weight", "model.layers.27.self_attn.q_proj.weight": "llamaforcausallm__model__layers__27__self_attn__q_proj__weight", "model.layers.27.self_attn.k_proj.weight": "llamaforcausallm__model__layers__27__self_attn__k_proj__weight", "model.layers.27.self_attn.v_proj.weight": "llamaforcausallm__model__layers__27__self_attn__v_proj__weight", "model.layers.27.self_attn.o_proj.weight": "llamaforcausallm__model__layers__27__self_attn__o_proj__weight", "model.layers.27.mlp.gate_proj.weight": "llamaforcausallm__model__layers__27__mlp__gate_proj__weight", "model.layers.27.mlp.up_proj.weight": "llamaforcausallm__model__layers__27__mlp__up_proj__weight", "model.layers.27.mlp.down_proj.weight": "llamaforcausallm__model__layers__27__mlp__down_proj__weight", "model.layers.27.input_layernorm.weight": "llamaforcausallm__model__layers__27__input_layernorm__weight", "model.layers.27.post_attention_layernorm.weight": "llamaforcausallm__model__layers__27__post_attention_layernorm__weight", "model.layers.28.self_attn.q_proj.weight": "llamaforcausallm__model__layers__28__self_attn__q_proj__weight", "model.layers.28.self_attn.k_proj.weight": "llamaforcausallm__model__layers__28__self_attn__k_proj__weight", "model.layers.28.self_attn.v_proj.weight": "llamaforcausallm__model__layers__28__self_attn__v_proj__weight", "model.layers.28.self_attn.o_proj.weight": "llamaforcausallm__model__layers__28__self_attn__o_proj__weight", "model.layers.28.mlp.gate_proj.weight": "llamaforcausallm__model__layers__28__mlp__gate_proj__weight", "model.layers.28.mlp.up_proj.weight": "llamaforcausallm__model__layers__28__mlp__up_proj__weight", "model.layers.28.mlp.down_proj.weight": "llamaforcausallm__model__layers__28__mlp__down_proj__weight", "model.layers.28.input_layernorm.weight": "llamaforcausallm__model__layers__28__input_layernorm__weight", "model.layers.28.post_attention_layernorm.weight": "llamaforcausallm__model__layers__28__post_attention_layernorm__weight", "model.layers.29.self_attn.q_proj.weight": "llamaforcausallm__model__layers__29__self_attn__q_proj__weight", "model.layers.29.self_attn.k_proj.weight": "llamaforcausallm__model__layers__29__self_attn__k_proj__weight", "model.layers.29.self_attn.v_proj.weight": "llamaforcausallm__model__layers__29__self_attn__v_proj__weight", "model.layers.29.self_attn.o_proj.weight": "llamaforcausallm__model__layers__29__self_attn__o_proj__weight", "model.layers.29.mlp.gate_proj.weight": "llamaforcausallm__model__layers__29__mlp__gate_proj__weight", "model.layers.29.mlp.up_proj.weight": "llamaforcausallm__model__layers__29__mlp__up_proj__weight", "model.layers.29.mlp.down_proj.weight": "llamaforcausallm__model__layers__29__mlp__down_proj__weight", "model.layers.29.input_layernorm.weight": "llamaforcausallm__model__layers__29__input_layernorm__weight", "model.layers.29.post_attention_layernorm.weight": "llamaforcausallm__model__layers__29__post_attention_layernorm__weight", "model.layers.30.self_attn.q_proj.weight": "llamaforcausallm__model__layers__30__self_attn__q_proj__weight", "model.layers.30.self_attn.k_proj.weight": "llamaforcausallm__model__layers__30__self_attn__k_proj__weight", "model.layers.30.self_attn.v_proj.weight": "llamaforcausallm__model__layers__30__self_attn__v_proj__weight", "model.layers.30.self_attn.o_proj.weight": "llamaforcausallm__model__layers__30__self_attn__o_proj__weight", "model.layers.30.mlp.gate_proj.weight": "llamaforcausallm__model__layers__30__mlp__gate_proj__weight", "model.layers.30.mlp.up_proj.weight": "llamaforcausallm__model__layers__30__mlp__up_proj__weight", "model.layers.30.mlp.down_proj.weight": "llamaforcausallm__model__layers__30__mlp__down_proj__weight", "model.layers.30.input_layernorm.weight": "llamaforcausallm__model__layers__30__input_layernorm__weight", "model.layers.30.post_attention_layernorm.weight": "llamaforcausallm__model__layers__30__post_attention_layernorm__weight", "model.layers.31.self_attn.q_proj.weight": "llamaforcausallm__model__layers__31__self_attn__q_proj__weight", "model.layers.31.self_attn.k_proj.weight": "llamaforcausallm__model__layers__31__self_attn__k_proj__weight", "model.layers.31.self_attn.v_proj.weight": "llamaforcausallm__model__layers__31__self_attn__v_proj__weight", "model.layers.31.self_attn.o_proj.weight": "llamaforcausallm__model__layers__31__self_attn__o_proj__weight", "model.layers.31.mlp.gate_proj.weight": "llamaforcausallm__model__layers__31__mlp__gate_proj__weight", "model.layers.31.mlp.up_proj.weight": "llamaforcausallm__model__layers__31__mlp__up_proj__weight", "model.layers.31.mlp.down_proj.weight": "llamaforcausallm__model__layers__31__mlp__down_proj__weight", "model.layers.31.input_layernorm.weight": "llamaforcausallm__model__layers__31__input_layernorm__weight", "model.layers.31.post_attention_layernorm.weight": "llamaforcausallm__model__layers__31__post_attention_layernorm__weight", "model.layers.32.self_attn.q_proj.weight": "llamaforcausallm__model__layers__32__self_attn__q_proj__weight", "model.layers.32.self_attn.k_proj.weight": "llamaforcausallm__model__layers__32__self_attn__k_proj__weight", "model.layers.32.self_attn.v_proj.weight": "llamaforcausallm__model__layers__32__self_attn__v_proj__weight", "model.layers.32.self_attn.o_proj.weight": "llamaforcausallm__model__layers__32__self_attn__o_proj__weight", "model.layers.32.mlp.gate_proj.weight": "llamaforcausallm__model__layers__32__mlp__gate_proj__weight", "model.layers.32.mlp.up_proj.weight": "llamaforcausallm__model__layers__32__mlp__up_proj__weight", "model.layers.32.mlp.down_proj.weight": "llamaforcausallm__model__layers__32__mlp__down_proj__weight", "model.layers.32.input_layernorm.weight": "llamaforcausallm__model__layers__32__input_layernorm__weight", "model.layers.32.post_attention_layernorm.weight": "llamaforcausallm__model__layers__32__post_attention_layernorm__weight", "model.layers.33.self_attn.q_proj.weight": "llamaforcausallm__model__layers__33__self_attn__q_proj__weight", "model.layers.33.self_attn.k_proj.weight": "llamaforcausallm__model__layers__33__self_attn__k_proj__weight", "model.layers.33.self_attn.v_proj.weight": "llamaforcausallm__model__layers__33__self_attn__v_proj__weight", "model.layers.33.self_attn.o_proj.weight": "llamaforcausallm__model__layers__33__self_attn__o_proj__weight", "model.layers.33.mlp.gate_proj.weight": "llamaforcausallm__model__layers__33__mlp__gate_proj__weight", "model.layers.33.mlp.up_proj.weight": "llamaforcausallm__model__layers__33__mlp__up_proj__weight", "model.layers.33.mlp.down_proj.weight": "llamaforcausallm__model__layers__33__mlp__down_proj__weight", "model.layers.33.input_layernorm.weight": "llamaforcausallm__model__layers__33__input_layernorm__weight", "model.layers.33.post_attention_layernorm.weight": "llamaforcausallm__model__layers__33__post_attention_layernorm__weight", "model.layers.34.self_attn.q_proj.weight": "llamaforcausallm__model__layers__34__self_attn__q_proj__weight", "model.layers.34.self_attn.k_proj.weight": "llamaforcausallm__model__layers__34__self_attn__k_proj__weight", "model.layers.34.self_attn.v_proj.weight": "llamaforcausallm__model__layers__34__self_attn__v_proj__weight", "model.layers.34.self_attn.o_proj.weight": "llamaforcausallm__model__layers__34__self_attn__o_proj__weight", "model.layers.34.mlp.gate_proj.weight": "llamaforcausallm__model__layers__34__mlp__gate_proj__weight", "model.layers.34.mlp.up_proj.weight": "llamaforcausallm__model__layers__34__mlp__up_proj__weight", "model.layers.34.mlp.down_proj.weight": "llamaforcausallm__model__layers__34__mlp__down_proj__weight", "model.layers.34.input_layernorm.weight": "llamaforcausallm__model__layers__34__input_layernorm__weight", "model.layers.34.post_attention_layernorm.weight": "llamaforcausallm__model__layers__34__post_attention_layernorm__weight", "model.layers.35.self_attn.q_proj.weight": "llamaforcausallm__model__layers__35__self_attn__q_proj__weight", "model.layers.35.self_attn.k_proj.weight": "llamaforcausallm__model__layers__35__self_attn__k_proj__weight", "model.layers.35.self_attn.v_proj.weight": "llamaforcausallm__model__layers__35__self_attn__v_proj__weight", "model.layers.35.self_attn.o_proj.weight": "llamaforcausallm__model__layers__35__self_attn__o_proj__weight", "model.layers.35.mlp.gate_proj.weight": "llamaforcausallm__model__layers__35__mlp__gate_proj__weight", "model.layers.35.mlp.up_proj.weight": "llamaforcausallm__model__layers__35__mlp__up_proj__weight", "model.layers.35.mlp.down_proj.weight": "llamaforcausallm__model__layers__35__mlp__down_proj__weight", "model.layers.35.input_layernorm.weight": "llamaforcausallm__model__layers__35__input_layernorm__weight", "model.layers.35.post_attention_layernorm.weight": "llamaforcausallm__model__layers__35__post_attention_layernorm__weight", "model.layers.36.self_attn.q_proj.weight": "llamaforcausallm__model__layers__36__self_attn__q_proj__weight", "model.layers.36.self_attn.k_proj.weight": "llamaforcausallm__model__layers__36__self_attn__k_proj__weight", "model.layers.36.self_attn.v_proj.weight": "llamaforcausallm__model__layers__36__self_attn__v_proj__weight", "model.layers.36.self_attn.o_proj.weight": "llamaforcausallm__model__layers__36__self_attn__o_proj__weight", "model.layers.36.mlp.gate_proj.weight": "llamaforcausallm__model__layers__36__mlp__gate_proj__weight", "model.layers.36.mlp.up_proj.weight": "llamaforcausallm__model__layers__36__mlp__up_proj__weight", "model.layers.36.mlp.down_proj.weight": "llamaforcausallm__model__layers__36__mlp__down_proj__weight", "model.layers.36.input_layernorm.weight": "llamaforcausallm__model__layers__36__input_layernorm__weight", "model.layers.36.post_attention_layernorm.weight": "llamaforcausallm__model__layers__36__post_attention_layernorm__weight", "model.layers.37.self_attn.q_proj.weight": "llamaforcausallm__model__layers__37__self_attn__q_proj__weight", "model.layers.37.self_attn.k_proj.weight": "llamaforcausallm__model__layers__37__self_attn__k_proj__weight", "model.layers.37.self_attn.v_proj.weight": "llamaforcausallm__model__layers__37__self_attn__v_proj__weight", "model.layers.37.self_attn.o_proj.weight": "llamaforcausallm__model__layers__37__self_attn__o_proj__weight", "model.layers.37.mlp.gate_proj.weight": "llamaforcausallm__model__layers__37__mlp__gate_proj__weight", "model.layers.37.mlp.up_proj.weight": "llamaforcausallm__model__layers__37__mlp__up_proj__weight", "model.layers.37.mlp.down_proj.weight": "llamaforcausallm__model__layers__37__mlp__down_proj__weight", "model.layers.37.input_layernorm.weight": "llamaforcausallm__model__layers__37__input_layernorm__weight", "model.layers.37.post_attention_layernorm.weight": "llamaforcausallm__model__layers__37__post_attention_layernorm__weight", "model.layers.38.self_attn.q_proj.weight": "llamaforcausallm__model__layers__38__self_attn__q_proj__weight", "model.layers.38.self_attn.k_proj.weight": "llamaforcausallm__model__layers__38__self_attn__k_proj__weight", "model.layers.38.self_attn.v_proj.weight": "llamaforcausallm__model__layers__38__self_attn__v_proj__weight", "model.layers.38.self_attn.o_proj.weight": "llamaforcausallm__model__layers__38__self_attn__o_proj__weight", "model.layers.38.mlp.gate_proj.weight": "llamaforcausallm__model__layers__38__mlp__gate_proj__weight", "model.layers.38.mlp.up_proj.weight": "llamaforcausallm__model__layers__38__mlp__up_proj__weight", "model.layers.38.mlp.down_proj.weight": "llamaforcausallm__model__layers__38__mlp__down_proj__weight", "model.layers.38.input_layernorm.weight": "llamaforcausallm__model__layers__38__input_layernorm__weight", "model.layers.38.post_attention_layernorm.weight": "llamaforcausallm__model__layers__38__post_attention_layernorm__weight", "model.layers.39.self_attn.q_proj.weight": "llamaforcausallm__model__layers__39__self_attn__q_proj__weight", "model.layers.39.self_attn.k_proj.weight": "llamaforcausallm__model__layers__39__self_attn__k_proj__weight", "model.layers.39.self_attn.v_proj.weight": "llamaforcausallm__model__layers__39__self_attn__v_proj__weight", "model.layers.39.self_attn.o_proj.weight": "llamaforcausallm__model__layers__39__self_attn__o_proj__weight", "model.layers.39.mlp.gate_proj.weight": "llamaforcausallm__model__layers__39__mlp__gate_proj__weight", "model.layers.39.mlp.up_proj.weight": "llamaforcausallm__model__layers__39__mlp__up_proj__weight", "model.layers.39.mlp.down_proj.weight": "llamaforcausallm__model__layers__39__mlp__down_proj__weight", "model.layers.39.input_layernorm.weight": "llamaforcausallm__model__layers__39__input_layernorm__weight", "model.layers.39.post_attention_layernorm.weight": "llamaforcausallm__model__layers__39__post_attention_layernorm__weight", "model.layers.40.self_attn.q_proj.weight": "llamaforcausallm__model__layers__40__self_attn__q_proj__weight", "model.layers.40.self_attn.k_proj.weight": "llamaforcausallm__model__layers__40__self_attn__k_proj__weight", "model.layers.40.self_attn.v_proj.weight": "llamaforcausallm__model__layers__40__self_attn__v_proj__weight", "model.layers.40.self_attn.o_proj.weight": "llamaforcausallm__model__layers__40__self_attn__o_proj__weight", "model.layers.40.mlp.gate_proj.weight": "llamaforcausallm__model__layers__40__mlp__gate_proj__weight", "model.layers.40.mlp.up_proj.weight": "llamaforcausallm__model__layers__40__mlp__up_proj__weight", "model.layers.40.mlp.down_proj.weight": "llamaforcausallm__model__layers__40__mlp__down_proj__weight", "model.layers.40.input_layernorm.weight": "llamaforcausallm__model__layers__40__input_layernorm__weight", "model.layers.40.post_attention_layernorm.weight": "llamaforcausallm__model__layers__40__post_attention_layernorm__weight", "model.layers.41.self_attn.q_proj.weight": "llamaforcausallm__model__layers__41__self_attn__q_proj__weight", "model.layers.41.self_attn.k_proj.weight": "llamaforcausallm__model__layers__41__self_attn__k_proj__weight", "model.layers.41.self_attn.v_proj.weight": "llamaforcausallm__model__layers__41__self_attn__v_proj__weight", "model.layers.41.self_attn.o_proj.weight": "llamaforcausallm__model__layers__41__self_attn__o_proj__weight", "model.layers.41.mlp.gate_proj.weight": "llamaforcausallm__model__layers__41__mlp__gate_proj__weight", "model.layers.41.mlp.up_proj.weight": "llamaforcausallm__model__layers__41__mlp__up_proj__weight", "model.layers.41.mlp.down_proj.weight": "llamaforcausallm__model__layers__41__mlp__down_proj__weight", "model.layers.41.input_layernorm.weight": "llamaforcausallm__model__layers__41__input_layernorm__weight", "model.layers.41.post_attention_layernorm.weight": "llamaforcausallm__model__layers__41__post_attention_layernorm__weight", "model.layers.42.self_attn.q_proj.weight": "llamaforcausallm__model__layers__42__self_attn__q_proj__weight", "model.layers.42.self_attn.k_proj.weight": "llamaforcausallm__model__layers__42__self_attn__k_proj__weight", "model.layers.42.self_attn.v_proj.weight": "llamaforcausallm__model__layers__42__self_attn__v_proj__weight", "model.layers.42.self_attn.o_proj.weight": "llamaforcausallm__model__layers__42__self_attn__o_proj__weight", "model.layers.42.mlp.gate_proj.weight": "llamaforcausallm__model__layers__42__mlp__gate_proj__weight", "model.layers.42.mlp.up_proj.weight": "llamaforcausallm__model__layers__42__mlp__up_proj__weight", "model.layers.42.mlp.down_proj.weight": "llamaforcausallm__model__layers__42__mlp__down_proj__weight", "model.layers.42.input_layernorm.weight": "llamaforcausallm__model__layers__42__input_layernorm__weight", "model.layers.42.post_attention_layernorm.weight": "llamaforcausallm__model__layers__42__post_attention_layernorm__weight", "model.layers.43.self_attn.q_proj.weight": "llamaforcausallm__model__layers__43__self_attn__q_proj__weight", "model.layers.43.self_attn.k_proj.weight": "llamaforcausallm__model__layers__43__self_attn__k_proj__weight", "model.layers.43.self_attn.v_proj.weight": "llamaforcausallm__model__layers__43__self_attn__v_proj__weight", "model.layers.43.self_attn.o_proj.weight": "llamaforcausallm__model__layers__43__self_attn__o_proj__weight", "model.layers.43.mlp.gate_proj.weight": "llamaforcausallm__model__layers__43__mlp__gate_proj__weight", "model.layers.43.mlp.up_proj.weight": "llamaforcausallm__model__layers__43__mlp__up_proj__weight", "model.layers.43.mlp.down_proj.weight": "llamaforcausallm__model__layers__43__mlp__down_proj__weight", "model.layers.43.input_layernorm.weight": "llamaforcausallm__model__layers__43__input_layernorm__weight", "model.layers.43.post_attention_layernorm.weight": "llamaforcausallm__model__layers__43__post_attention_layernorm__weight", "model.layers.44.self_attn.q_proj.weight": "llamaforcausallm__model__layers__44__self_attn__q_proj__weight", "model.layers.44.self_attn.k_proj.weight": "llamaforcausallm__model__layers__44__self_attn__k_proj__weight", "model.layers.44.self_attn.v_proj.weight": "llamaforcausallm__model__layers__44__self_attn__v_proj__weight", "model.layers.44.self_attn.o_proj.weight": "llamaforcausallm__model__layers__44__self_attn__o_proj__weight", "model.layers.44.mlp.gate_proj.weight": "llamaforcausallm__model__layers__44__mlp__gate_proj__weight", "model.layers.44.mlp.up_proj.weight": "llamaforcausallm__model__layers__44__mlp__up_proj__weight", "model.layers.44.mlp.down_proj.weight": "llamaforcausallm__model__layers__44__mlp__down_proj__weight", "model.layers.44.input_layernorm.weight": "llamaforcausallm__model__layers__44__input_layernorm__weight", "model.layers.44.post_attention_layernorm.weight": "llamaforcausallm__model__layers__44__post_attention_layernorm__weight", "model.layers.45.self_attn.q_proj.weight": "llamaforcausallm__model__layers__45__self_attn__q_proj__weight", "model.layers.45.self_attn.k_proj.weight": "llamaforcausallm__model__layers__45__self_attn__k_proj__weight", "model.layers.45.self_attn.v_proj.weight": "llamaforcausallm__model__layers__45__self_attn__v_proj__weight", "model.layers.45.self_attn.o_proj.weight": "llamaforcausallm__model__layers__45__self_attn__o_proj__weight", "model.layers.45.mlp.gate_proj.weight": "llamaforcausallm__model__layers__45__mlp__gate_proj__weight", "model.layers.45.mlp.up_proj.weight": "llamaforcausallm__model__layers__45__mlp__up_proj__weight", "model.layers.45.mlp.down_proj.weight": "llamaforcausallm__model__layers__45__mlp__down_proj__weight", "model.layers.45.input_layernorm.weight": "llamaforcausallm__model__layers__45__input_layernorm__weight", "model.layers.45.post_attention_layernorm.weight": "llamaforcausallm__model__layers__45__post_attention_layernorm__weight", "model.layers.46.self_attn.q_proj.weight": "llamaforcausallm__model__layers__46__self_attn__q_proj__weight", "model.layers.46.self_attn.k_proj.weight": "llamaforcausallm__model__layers__46__self_attn__k_proj__weight", "model.layers.46.self_attn.v_proj.weight": "llamaforcausallm__model__layers__46__self_attn__v_proj__weight", "model.layers.46.self_attn.o_proj.weight": "llamaforcausallm__model__layers__46__self_attn__o_proj__weight", "model.layers.46.mlp.gate_proj.weight": "llamaforcausallm__model__layers__46__mlp__gate_proj__weight", "model.layers.46.mlp.up_proj.weight": "llamaforcausallm__model__layers__46__mlp__up_proj__weight", "model.layers.46.mlp.down_proj.weight": "llamaforcausallm__model__layers__46__mlp__down_proj__weight", "model.layers.46.input_layernorm.weight": "llamaforcausallm__model__layers__46__input_layernorm__weight", "model.layers.46.post_attention_layernorm.weight": "llamaforcausallm__model__layers__46__post_attention_layernorm__weight", "model.layers.47.self_attn.q_proj.weight": "llamaforcausallm__model__layers__47__self_attn__q_proj__weight", "model.layers.47.self_attn.k_proj.weight": "llamaforcausallm__model__layers__47__self_attn__k_proj__weight", "model.layers.47.self_attn.v_proj.weight": "llamaforcausallm__model__layers__47__self_attn__v_proj__weight", "model.layers.47.self_attn.o_proj.weight": "llamaforcausallm__model__layers__47__self_attn__o_proj__weight", "model.layers.47.mlp.gate_proj.weight": "llamaforcausallm__model__layers__47__mlp__gate_proj__weight", "model.layers.47.mlp.up_proj.weight": "llamaforcausallm__model__layers__47__mlp__up_proj__weight", "model.layers.47.mlp.down_proj.weight": "llamaforcausallm__model__layers__47__mlp__down_proj__weight", "model.layers.47.input_layernorm.weight": "llamaforcausallm__model__layers__47__input_layernorm__weight", "model.layers.47.post_attention_layernorm.weight": "llamaforcausallm__model__layers__47__post_attention_layernorm__weight", "model.layers.48.self_attn.q_proj.weight": "llamaforcausallm__model__layers__48__self_attn__q_proj__weight", "model.layers.48.self_attn.k_proj.weight": "llamaforcausallm__model__layers__48__self_attn__k_proj__weight", "model.layers.48.self_attn.v_proj.weight": "llamaforcausallm__model__layers__48__self_attn__v_proj__weight", "model.layers.48.self_attn.o_proj.weight": "llamaforcausallm__model__layers__48__self_attn__o_proj__weight", "model.layers.48.mlp.gate_proj.weight": "llamaforcausallm__model__layers__48__mlp__gate_proj__weight", "model.layers.48.mlp.up_proj.weight": "llamaforcausallm__model__layers__48__mlp__up_proj__weight", "model.layers.48.mlp.down_proj.weight": "llamaforcausallm__model__layers__48__mlp__down_proj__weight", "model.layers.48.input_layernorm.weight": "llamaforcausallm__model__layers__48__input_layernorm__weight", "model.layers.48.post_attention_layernorm.weight": "llamaforcausallm__model__layers__48__post_attention_layernorm__weight", "model.layers.49.self_attn.q_proj.weight": "llamaforcausallm__model__layers__49__self_attn__q_proj__weight", "model.layers.49.self_attn.k_proj.weight": "llamaforcausallm__model__layers__49__self_attn__k_proj__weight", "model.layers.49.self_attn.v_proj.weight": "llamaforcausallm__model__layers__49__self_attn__v_proj__weight", "model.layers.49.self_attn.o_proj.weight": "llamaforcausallm__model__layers__49__self_attn__o_proj__weight", "model.layers.49.mlp.gate_proj.weight": "llamaforcausallm__model__layers__49__mlp__gate_proj__weight", "model.layers.49.mlp.up_proj.weight": "llamaforcausallm__model__layers__49__mlp__up_proj__weight", "model.layers.49.mlp.down_proj.weight": "llamaforcausallm__model__layers__49__mlp__down_proj__weight", "model.layers.49.input_layernorm.weight": "llamaforcausallm__model__layers__49__input_layernorm__weight", "model.layers.49.post_attention_layernorm.weight": "llamaforcausallm__model__layers__49__post_attention_layernorm__weight", "model.layers.50.self_attn.q_proj.weight": "llamaforcausallm__model__layers__50__self_attn__q_proj__weight", "model.layers.50.self_attn.k_proj.weight": "llamaforcausallm__model__layers__50__self_attn__k_proj__weight", "model.layers.50.self_attn.v_proj.weight": "llamaforcausallm__model__layers__50__self_attn__v_proj__weight", "model.layers.50.self_attn.o_proj.weight": "llamaforcausallm__model__layers__50__self_attn__o_proj__weight", "model.layers.50.mlp.gate_proj.weight": "llamaforcausallm__model__layers__50__mlp__gate_proj__weight", "model.layers.50.mlp.up_proj.weight": "llamaforcausallm__model__layers__50__mlp__up_proj__weight", "model.layers.50.mlp.down_proj.weight": "llamaforcausallm__model__layers__50__mlp__down_proj__weight", "model.layers.50.input_layernorm.weight": "llamaforcausallm__model__layers__50__input_layernorm__weight", "model.layers.50.post_attention_layernorm.weight": "llamaforcausallm__model__layers__50__post_attention_layernorm__weight", "model.layers.51.self_attn.q_proj.weight": "llamaforcausallm__model__layers__51__self_attn__q_proj__weight", "model.layers.51.self_attn.k_proj.weight": "llamaforcausallm__model__layers__51__self_attn__k_proj__weight", "model.layers.51.self_attn.v_proj.weight": "llamaforcausallm__model__layers__51__self_attn__v_proj__weight", "model.layers.51.self_attn.o_proj.weight": "llamaforcausallm__model__layers__51__self_attn__o_proj__weight", "model.layers.51.mlp.gate_proj.weight": "llamaforcausallm__model__layers__51__mlp__gate_proj__weight", "model.layers.51.mlp.up_proj.weight": "llamaforcausallm__model__layers__51__mlp__up_proj__weight", "model.layers.51.mlp.down_proj.weight": "llamaforcausallm__model__layers__51__mlp__down_proj__weight", "model.layers.51.input_layernorm.weight": "llamaforcausallm__model__layers__51__input_layernorm__weight", "model.layers.51.post_attention_layernorm.weight": "llamaforcausallm__model__layers__51__post_attention_layernorm__weight", "model.layers.52.self_attn.q_proj.weight": "llamaforcausallm__model__layers__52__self_attn__q_proj__weight", "model.layers.52.self_attn.k_proj.weight": "llamaforcausallm__model__layers__52__self_attn__k_proj__weight", "model.layers.52.self_attn.v_proj.weight": "llamaforcausallm__model__layers__52__self_attn__v_proj__weight", "model.layers.52.self_attn.o_proj.weight": "llamaforcausallm__model__layers__52__self_attn__o_proj__weight", "model.layers.52.mlp.gate_proj.weight": "llamaforcausallm__model__layers__52__mlp__gate_proj__weight", "model.layers.52.mlp.up_proj.weight": "llamaforcausallm__model__layers__52__mlp__up_proj__weight", "model.layers.52.mlp.down_proj.weight": "llamaforcausallm__model__layers__52__mlp__down_proj__weight", "model.layers.52.input_layernorm.weight": "llamaforcausallm__model__layers__52__input_layernorm__weight", "model.layers.52.post_attention_layernorm.weight": "llamaforcausallm__model__layers__52__post_attention_layernorm__weight", "model.layers.53.self_attn.q_proj.weight": "llamaforcausallm__model__layers__53__self_attn__q_proj__weight", "model.layers.53.self_attn.k_proj.weight": "llamaforcausallm__model__layers__53__self_attn__k_proj__weight", "model.layers.53.self_attn.v_proj.weight": "llamaforcausallm__model__layers__53__self_attn__v_proj__weight", "model.layers.53.self_attn.o_proj.weight": "llamaforcausallm__model__layers__53__self_attn__o_proj__weight", "model.layers.53.mlp.gate_proj.weight": "llamaforcausallm__model__layers__53__mlp__gate_proj__weight", "model.layers.53.mlp.up_proj.weight": "llamaforcausallm__model__layers__53__mlp__up_proj__weight", "model.layers.53.mlp.down_proj.weight": "llamaforcausallm__model__layers__53__mlp__down_proj__weight", "model.layers.53.input_layernorm.weight": "llamaforcausallm__model__layers__53__input_layernorm__weight", "model.layers.53.post_attention_layernorm.weight": "llamaforcausallm__model__layers__53__post_attention_layernorm__weight", "model.layers.54.self_attn.q_proj.weight": "llamaforcausallm__model__layers__54__self_attn__q_proj__weight", "model.layers.54.self_attn.k_proj.weight": "llamaforcausallm__model__layers__54__self_attn__k_proj__weight", "model.layers.54.self_attn.v_proj.weight": "llamaforcausallm__model__layers__54__self_attn__v_proj__weight", "model.layers.54.self_attn.o_proj.weight": "llamaforcausallm__model__layers__54__self_attn__o_proj__weight", "model.layers.54.mlp.gate_proj.weight": "llamaforcausallm__model__layers__54__mlp__gate_proj__weight", "model.layers.54.mlp.up_proj.weight": "llamaforcausallm__model__layers__54__mlp__up_proj__weight", "model.layers.54.mlp.down_proj.weight": "llamaforcausallm__model__layers__54__mlp__down_proj__weight", "model.layers.54.input_layernorm.weight": "llamaforcausallm__model__layers__54__input_layernorm__weight", "model.layers.54.post_attention_layernorm.weight": "llamaforcausallm__model__layers__54__post_attention_layernorm__weight", "model.layers.55.self_attn.q_proj.weight": "llamaforcausallm__model__layers__55__self_attn__q_proj__weight", "model.layers.55.self_attn.k_proj.weight": "llamaforcausallm__model__layers__55__self_attn__k_proj__weight", "model.layers.55.self_attn.v_proj.weight": "llamaforcausallm__model__layers__55__self_attn__v_proj__weight", "model.layers.55.self_attn.o_proj.weight": "llamaforcausallm__model__layers__55__self_attn__o_proj__weight", "model.layers.55.mlp.gate_proj.weight": "llamaforcausallm__model__layers__55__mlp__gate_proj__weight", "model.layers.55.mlp.up_proj.weight": "llamaforcausallm__model__layers__55__mlp__up_proj__weight", "model.layers.55.mlp.down_proj.weight": "llamaforcausallm__model__layers__55__mlp__down_proj__weight", "model.layers.55.input_layernorm.weight": "llamaforcausallm__model__layers__55__input_layernorm__weight", "model.layers.55.post_attention_layernorm.weight": "llamaforcausallm__model__layers__55__post_attention_layernorm__weight", "model.layers.56.self_attn.q_proj.weight": "llamaforcausallm__model__layers__56__self_attn__q_proj__weight", "model.layers.56.self_attn.k_proj.weight": "llamaforcausallm__model__layers__56__self_attn__k_proj__weight", "model.layers.56.self_attn.v_proj.weight": "llamaforcausallm__model__layers__56__self_attn__v_proj__weight", "model.layers.56.self_attn.o_proj.weight": "llamaforcausallm__model__layers__56__self_attn__o_proj__weight", "model.layers.56.mlp.gate_proj.weight": "llamaforcausallm__model__layers__56__mlp__gate_proj__weight", "model.layers.56.mlp.up_proj.weight": "llamaforcausallm__model__layers__56__mlp__up_proj__weight", "model.layers.56.mlp.down_proj.weight": "llamaforcausallm__model__layers__56__mlp__down_proj__weight", "model.layers.56.input_layernorm.weight": "llamaforcausallm__model__layers__56__input_layernorm__weight", "model.layers.56.post_attention_layernorm.weight": "llamaforcausallm__model__layers__56__post_attention_layernorm__weight", "model.layers.57.self_attn.q_proj.weight": "llamaforcausallm__model__layers__57__self_attn__q_proj__weight", "model.layers.57.self_attn.k_proj.weight": "llamaforcausallm__model__layers__57__self_attn__k_proj__weight", "model.layers.57.self_attn.v_proj.weight": "llamaforcausallm__model__layers__57__self_attn__v_proj__weight", "model.layers.57.self_attn.o_proj.weight": "llamaforcausallm__model__layers__57__self_attn__o_proj__weight", "model.layers.57.mlp.gate_proj.weight": "llamaforcausallm__model__layers__57__mlp__gate_proj__weight", "model.layers.57.mlp.up_proj.weight": "llamaforcausallm__model__layers__57__mlp__up_proj__weight", "model.layers.57.mlp.down_proj.weight": "llamaforcausallm__model__layers__57__mlp__down_proj__weight", "model.layers.57.input_layernorm.weight": "llamaforcausallm__model__layers__57__input_layernorm__weight", "model.layers.57.post_attention_layernorm.weight": "llamaforcausallm__model__layers__57__post_attention_layernorm__weight", "model.layers.58.self_attn.q_proj.weight": "llamaforcausallm__model__layers__58__self_attn__q_proj__weight", "model.layers.58.self_attn.k_proj.weight": "llamaforcausallm__model__layers__58__self_attn__k_proj__weight", "model.layers.58.self_attn.v_proj.weight": "llamaforcausallm__model__layers__58__self_attn__v_proj__weight", "model.layers.58.self_attn.o_proj.weight": "llamaforcausallm__model__layers__58__self_attn__o_proj__weight", "model.layers.58.mlp.gate_proj.weight": "llamaforcausallm__model__layers__58__mlp__gate_proj__weight", "model.layers.58.mlp.up_proj.weight": "llamaforcausallm__model__layers__58__mlp__up_proj__weight", "model.layers.58.mlp.down_proj.weight": "llamaforcausallm__model__layers__58__mlp__down_proj__weight", "model.layers.58.input_layernorm.weight": "llamaforcausallm__model__layers__58__input_layernorm__weight", "model.layers.58.post_attention_layernorm.weight": "llamaforcausallm__model__layers__58__post_attention_layernorm__weight", "model.layers.59.self_attn.q_proj.weight": "llamaforcausallm__model__layers__59__self_attn__q_proj__weight", "model.layers.59.self_attn.k_proj.weight": "llamaforcausallm__model__layers__59__self_attn__k_proj__weight", "model.layers.59.self_attn.v_proj.weight": "llamaforcausallm__model__layers__59__self_attn__v_proj__weight", "model.layers.59.self_attn.o_proj.weight": "llamaforcausallm__model__layers__59__self_attn__o_proj__weight", "model.layers.59.mlp.gate_proj.weight": "llamaforcausallm__model__layers__59__mlp__gate_proj__weight", "model.layers.59.mlp.up_proj.weight": "llamaforcausallm__model__layers__59__mlp__up_proj__weight", "model.layers.59.mlp.down_proj.weight": "llamaforcausallm__model__layers__59__mlp__down_proj__weight", "model.layers.59.input_layernorm.weight": "llamaforcausallm__model__layers__59__input_layernorm__weight", "model.layers.59.post_attention_layernorm.weight": "llamaforcausallm__model__layers__59__post_attention_layernorm__weight", "model.layers.60.self_attn.q_proj.weight": "llamaforcausallm__model__layers__60__self_attn__q_proj__weight", "model.layers.60.self_attn.k_proj.weight": "llamaforcausallm__model__layers__60__self_attn__k_proj__weight", "model.layers.60.self_attn.v_proj.weight": "llamaforcausallm__model__layers__60__self_attn__v_proj__weight", "model.layers.60.self_attn.o_proj.weight": "llamaforcausallm__model__layers__60__self_attn__o_proj__weight", "model.layers.60.mlp.gate_proj.weight": "llamaforcausallm__model__layers__60__mlp__gate_proj__weight", "model.layers.60.mlp.up_proj.weight": "llamaforcausallm__model__layers__60__mlp__up_proj__weight", "model.layers.60.mlp.down_proj.weight": "llamaforcausallm__model__layers__60__mlp__down_proj__weight", "model.layers.60.input_layernorm.weight": "llamaforcausallm__model__layers__60__input_layernorm__weight", "model.layers.60.post_attention_layernorm.weight": "llamaforcausallm__model__layers__60__post_attention_layernorm__weight", "model.layers.61.self_attn.q_proj.weight": "llamaforcausallm__model__layers__61__self_attn__q_proj__weight", "model.layers.61.self_attn.k_proj.weight": "llamaforcausallm__model__layers__61__self_attn__k_proj__weight", "model.layers.61.self_attn.v_proj.weight": "llamaforcausallm__model__layers__61__self_attn__v_proj__weight", "model.layers.61.self_attn.o_proj.weight": "llamaforcausallm__model__layers__61__self_attn__o_proj__weight", "model.layers.61.mlp.gate_proj.weight": "llamaforcausallm__model__layers__61__mlp__gate_proj__weight", "model.layers.61.mlp.up_proj.weight": "llamaforcausallm__model__layers__61__mlp__up_proj__weight", "model.layers.61.mlp.down_proj.weight": "llamaforcausallm__model__layers__61__mlp__down_proj__weight", "model.layers.61.input_layernorm.weight": "llamaforcausallm__model__layers__61__input_layernorm__weight", "model.layers.61.post_attention_layernorm.weight": "llamaforcausallm__model__layers__61__post_attention_layernorm__weight", "model.layers.62.self_attn.q_proj.weight": "llamaforcausallm__model__layers__62__self_attn__q_proj__weight", "model.layers.62.self_attn.k_proj.weight": "llamaforcausallm__model__layers__62__self_attn__k_proj__weight", "model.layers.62.self_attn.v_proj.weight": "llamaforcausallm__model__layers__62__self_attn__v_proj__weight", "model.layers.62.self_attn.o_proj.weight": "llamaforcausallm__model__layers__62__self_attn__o_proj__weight", "model.layers.62.mlp.gate_proj.weight": "llamaforcausallm__model__layers__62__mlp__gate_proj__weight", "model.layers.62.mlp.up_proj.weight": "llamaforcausallm__model__layers__62__mlp__up_proj__weight", "model.layers.62.mlp.down_proj.weight": "llamaforcausallm__model__layers__62__mlp__down_proj__weight", "model.layers.62.input_layernorm.weight": "llamaforcausallm__model__layers__62__input_layernorm__weight", "model.layers.62.post_attention_layernorm.weight": "llamaforcausallm__model__layers__62__post_attention_layernorm__weight", "model.layers.63.self_attn.q_proj.weight": "llamaforcausallm__model__layers__63__self_attn__q_proj__weight", "model.layers.63.self_attn.k_proj.weight": "llamaforcausallm__model__layers__63__self_attn__k_proj__weight", "model.layers.63.self_attn.v_proj.weight": "llamaforcausallm__model__layers__63__self_attn__v_proj__weight", "model.layers.63.self_attn.o_proj.weight": "llamaforcausallm__model__layers__63__self_attn__o_proj__weight", "model.layers.63.mlp.gate_proj.weight": "llamaforcausallm__model__layers__63__mlp__gate_proj__weight", "model.layers.63.mlp.up_proj.weight": "llamaforcausallm__model__layers__63__mlp__up_proj__weight", "model.layers.63.mlp.down_proj.weight": "llamaforcausallm__model__layers__63__mlp__down_proj__weight", "model.layers.63.input_layernorm.weight": "llamaforcausallm__model__layers__63__input_layernorm__weight", "model.layers.63.post_attention_layernorm.weight": "llamaforcausallm__model__layers__63__post_attention_layernorm__weight", "model.layers.64.self_attn.q_proj.weight": "llamaforcausallm__model__layers__64__self_attn__q_proj__weight", "model.layers.64.self_attn.k_proj.weight": "llamaforcausallm__model__layers__64__self_attn__k_proj__weight", "model.layers.64.self_attn.v_proj.weight": "llamaforcausallm__model__layers__64__self_attn__v_proj__weight", "model.layers.64.self_attn.o_proj.weight": "llamaforcausallm__model__layers__64__self_attn__o_proj__weight", "model.layers.64.mlp.gate_proj.weight": "llamaforcausallm__model__layers__64__mlp__gate_proj__weight", "model.layers.64.mlp.up_proj.weight": "llamaforcausallm__model__layers__64__mlp__up_proj__weight", "model.layers.64.mlp.down_proj.weight": "llamaforcausallm__model__layers__64__mlp__down_proj__weight", "model.layers.64.input_layernorm.weight": "llamaforcausallm__model__layers__64__input_layernorm__weight", "model.layers.64.post_attention_layernorm.weight": "llamaforcausallm__model__layers__64__post_attention_layernorm__weight", "model.layers.65.self_attn.q_proj.weight": "llamaforcausallm__model__layers__65__self_attn__q_proj__weight", "model.layers.65.self_attn.k_proj.weight": "llamaforcausallm__model__layers__65__self_attn__k_proj__weight", "model.layers.65.self_attn.v_proj.weight": "llamaforcausallm__model__layers__65__self_attn__v_proj__weight", "model.layers.65.self_attn.o_proj.weight": "llamaforcausallm__model__layers__65__self_attn__o_proj__weight", "model.layers.65.mlp.gate_proj.weight": "llamaforcausallm__model__layers__65__mlp__gate_proj__weight", "model.layers.65.mlp.up_proj.weight": "llamaforcausallm__model__layers__65__mlp__up_proj__weight", "model.layers.65.mlp.down_proj.weight": "llamaforcausallm__model__layers__65__mlp__down_proj__weight", "model.layers.65.input_layernorm.weight": "llamaforcausallm__model__layers__65__input_layernorm__weight", "model.layers.65.post_attention_layernorm.weight": "llamaforcausallm__model__layers__65__post_attention_layernorm__weight", "model.layers.66.self_attn.q_proj.weight": "llamaforcausallm__model__layers__66__self_attn__q_proj__weight", "model.layers.66.self_attn.k_proj.weight": "llamaforcausallm__model__layers__66__self_attn__k_proj__weight", "model.layers.66.self_attn.v_proj.weight": "llamaforcausallm__model__layers__66__self_attn__v_proj__weight", "model.layers.66.self_attn.o_proj.weight": "llamaforcausallm__model__layers__66__self_attn__o_proj__weight", "model.layers.66.mlp.gate_proj.weight": "llamaforcausallm__model__layers__66__mlp__gate_proj__weight", "model.layers.66.mlp.up_proj.weight": "llamaforcausallm__model__layers__66__mlp__up_proj__weight", "model.layers.66.mlp.down_proj.weight": "llamaforcausallm__model__layers__66__mlp__down_proj__weight", "model.layers.66.input_layernorm.weight": "llamaforcausallm__model__layers__66__input_layernorm__weight", "model.layers.66.post_attention_layernorm.weight": "llamaforcausallm__model__layers__66__post_attention_layernorm__weight", "model.layers.67.self_attn.q_proj.weight": "llamaforcausallm__model__layers__67__self_attn__q_proj__weight", "model.layers.67.self_attn.k_proj.weight": "llamaforcausallm__model__layers__67__self_attn__k_proj__weight", "model.layers.67.self_attn.v_proj.weight": "llamaforcausallm__model__layers__67__self_attn__v_proj__weight", "model.layers.67.self_attn.o_proj.weight": "llamaforcausallm__model__layers__67__self_attn__o_proj__weight", "model.layers.67.mlp.gate_proj.weight": "llamaforcausallm__model__layers__67__mlp__gate_proj__weight", "model.layers.67.mlp.up_proj.weight": "llamaforcausallm__model__layers__67__mlp__up_proj__weight", "model.layers.67.mlp.down_proj.weight": "llamaforcausallm__model__layers__67__mlp__down_proj__weight", "model.layers.67.input_layernorm.weight": "llamaforcausallm__model__layers__67__input_layernorm__weight", "model.layers.67.post_attention_layernorm.weight": "llamaforcausallm__model__layers__67__post_attention_layernorm__weight", "model.layers.68.self_attn.q_proj.weight": "llamaforcausallm__model__layers__68__self_attn__q_proj__weight", "model.layers.68.self_attn.k_proj.weight": "llamaforcausallm__model__layers__68__self_attn__k_proj__weight", "model.layers.68.self_attn.v_proj.weight": "llamaforcausallm__model__layers__68__self_attn__v_proj__weight", "model.layers.68.self_attn.o_proj.weight": "llamaforcausallm__model__layers__68__self_attn__o_proj__weight", "model.layers.68.mlp.gate_proj.weight": "llamaforcausallm__model__layers__68__mlp__gate_proj__weight", "model.layers.68.mlp.up_proj.weight": "llamaforcausallm__model__layers__68__mlp__up_proj__weight", "model.layers.68.mlp.down_proj.weight": "llamaforcausallm__model__layers__68__mlp__down_proj__weight", "model.layers.68.input_layernorm.weight": "llamaforcausallm__model__layers__68__input_layernorm__weight", "model.layers.68.post_attention_layernorm.weight": "llamaforcausallm__model__layers__68__post_attention_layernorm__weight", "model.layers.69.self_attn.q_proj.weight": "llamaforcausallm__model__layers__69__self_attn__q_proj__weight", "model.layers.69.self_attn.k_proj.weight": "llamaforcausallm__model__layers__69__self_attn__k_proj__weight", "model.layers.69.self_attn.v_proj.weight": "llamaforcausallm__model__layers__69__self_attn__v_proj__weight", "model.layers.69.self_attn.o_proj.weight": "llamaforcausallm__model__layers__69__self_attn__o_proj__weight", "model.layers.69.mlp.gate_proj.weight": "llamaforcausallm__model__layers__69__mlp__gate_proj__weight", "model.layers.69.mlp.up_proj.weight": "llamaforcausallm__model__layers__69__mlp__up_proj__weight", "model.layers.69.mlp.down_proj.weight": "llamaforcausallm__model__layers__69__mlp__down_proj__weight", "model.layers.69.input_layernorm.weight": "llamaforcausallm__model__layers__69__input_layernorm__weight", "model.layers.69.post_attention_layernorm.weight": "llamaforcausallm__model__layers__69__post_attention_layernorm__weight", "model.layers.70.self_attn.q_proj.weight": "llamaforcausallm__model__layers__70__self_attn__q_proj__weight", "model.layers.70.self_attn.k_proj.weight": "llamaforcausallm__model__layers__70__self_attn__k_proj__weight", "model.layers.70.self_attn.v_proj.weight": "llamaforcausallm__model__layers__70__self_attn__v_proj__weight", "model.layers.70.self_attn.o_proj.weight": "llamaforcausallm__model__layers__70__self_attn__o_proj__weight", "model.layers.70.mlp.gate_proj.weight": "llamaforcausallm__model__layers__70__mlp__gate_proj__weight", "model.layers.70.mlp.up_proj.weight": "llamaforcausallm__model__layers__70__mlp__up_proj__weight", "model.layers.70.mlp.down_proj.weight": "llamaforcausallm__model__layers__70__mlp__down_proj__weight", "model.layers.70.input_layernorm.weight": "llamaforcausallm__model__layers__70__input_layernorm__weight", "model.layers.70.post_attention_layernorm.weight": "llamaforcausallm__model__layers__70__post_attention_layernorm__weight", "model.layers.71.self_attn.q_proj.weight": "llamaforcausallm__model__layers__71__self_attn__q_proj__weight", "model.layers.71.self_attn.k_proj.weight": "llamaforcausallm__model__layers__71__self_attn__k_proj__weight", "model.layers.71.self_attn.v_proj.weight": "llamaforcausallm__model__layers__71__self_attn__v_proj__weight", "model.layers.71.self_attn.o_proj.weight": "llamaforcausallm__model__layers__71__self_attn__o_proj__weight", "model.layers.71.mlp.gate_proj.weight": "llamaforcausallm__model__layers__71__mlp__gate_proj__weight", "model.layers.71.mlp.up_proj.weight": "llamaforcausallm__model__layers__71__mlp__up_proj__weight", "model.layers.71.mlp.down_proj.weight": "llamaforcausallm__model__layers__71__mlp__down_proj__weight", "model.layers.71.input_layernorm.weight": "llamaforcausallm__model__layers__71__input_layernorm__weight", "model.layers.71.post_attention_layernorm.weight": "llamaforcausallm__model__layers__71__post_attention_layernorm__weight", "model.layers.72.self_attn.q_proj.weight": "llamaforcausallm__model__layers__72__self_attn__q_proj__weight", "model.layers.72.self_attn.k_proj.weight": "llamaforcausallm__model__layers__72__self_attn__k_proj__weight", "model.layers.72.self_attn.v_proj.weight": "llamaforcausallm__model__layers__72__self_attn__v_proj__weight", "model.layers.72.self_attn.o_proj.weight": "llamaforcausallm__model__layers__72__self_attn__o_proj__weight", "model.layers.72.mlp.gate_proj.weight": "llamaforcausallm__model__layers__72__mlp__gate_proj__weight", "model.layers.72.mlp.up_proj.weight": "llamaforcausallm__model__layers__72__mlp__up_proj__weight", "model.layers.72.mlp.down_proj.weight": "llamaforcausallm__model__layers__72__mlp__down_proj__weight", "model.layers.72.input_layernorm.weight": "llamaforcausallm__model__layers__72__input_layernorm__weight", "model.layers.72.post_attention_layernorm.weight": "llamaforcausallm__model__layers__72__post_attention_layernorm__weight", "model.layers.73.self_attn.q_proj.weight": "llamaforcausallm__model__layers__73__self_attn__q_proj__weight", "model.layers.73.self_attn.k_proj.weight": "llamaforcausallm__model__layers__73__self_attn__k_proj__weight", "model.layers.73.self_attn.v_proj.weight": "llamaforcausallm__model__layers__73__self_attn__v_proj__weight", "model.layers.73.self_attn.o_proj.weight": "llamaforcausallm__model__layers__73__self_attn__o_proj__weight", "model.layers.73.mlp.gate_proj.weight": "llamaforcausallm__model__layers__73__mlp__gate_proj__weight", "model.layers.73.mlp.up_proj.weight": "llamaforcausallm__model__layers__73__mlp__up_proj__weight", "model.layers.73.mlp.down_proj.weight": "llamaforcausallm__model__layers__73__mlp__down_proj__weight", "model.layers.73.input_layernorm.weight": "llamaforcausallm__model__layers__73__input_layernorm__weight", "model.layers.73.post_attention_layernorm.weight": "llamaforcausallm__model__layers__73__post_attention_layernorm__weight", "model.layers.74.self_attn.q_proj.weight": "llamaforcausallm__model__layers__74__self_attn__q_proj__weight", "model.layers.74.self_attn.k_proj.weight": "llamaforcausallm__model__layers__74__self_attn__k_proj__weight", "model.layers.74.self_attn.v_proj.weight": "llamaforcausallm__model__layers__74__self_attn__v_proj__weight", "model.layers.74.self_attn.o_proj.weight": "llamaforcausallm__model__layers__74__self_attn__o_proj__weight", "model.layers.74.mlp.gate_proj.weight": "llamaforcausallm__model__layers__74__mlp__gate_proj__weight", "model.layers.74.mlp.up_proj.weight": "llamaforcausallm__model__layers__74__mlp__up_proj__weight", "model.layers.74.mlp.down_proj.weight": "llamaforcausallm__model__layers__74__mlp__down_proj__weight", "model.layers.74.input_layernorm.weight": "llamaforcausallm__model__layers__74__input_layernorm__weight", "model.layers.74.post_attention_layernorm.weight": "llamaforcausallm__model__layers__74__post_attention_layernorm__weight", "model.layers.75.self_attn.q_proj.weight": "llamaforcausallm__model__layers__75__self_attn__q_proj__weight", "model.layers.75.self_attn.k_proj.weight": "llamaforcausallm__model__layers__75__self_attn__k_proj__weight", "model.layers.75.self_attn.v_proj.weight": "llamaforcausallm__model__layers__75__self_attn__v_proj__weight", "model.layers.75.self_attn.o_proj.weight": "llamaforcausallm__model__layers__75__self_attn__o_proj__weight", "model.layers.75.mlp.gate_proj.weight": "llamaforcausallm__model__layers__75__mlp__gate_proj__weight", "model.layers.75.mlp.up_proj.weight": "llamaforcausallm__model__layers__75__mlp__up_proj__weight", "model.layers.75.mlp.down_proj.weight": "llamaforcausallm__model__layers__75__mlp__down_proj__weight", "model.layers.75.input_layernorm.weight": "llamaforcausallm__model__layers__75__input_layernorm__weight", "model.layers.75.post_attention_layernorm.weight": "llamaforcausallm__model__layers__75__post_attention_layernorm__weight", "model.layers.76.self_attn.q_proj.weight": "llamaforcausallm__model__layers__76__self_attn__q_proj__weight", "model.layers.76.self_attn.k_proj.weight": "llamaforcausallm__model__layers__76__self_attn__k_proj__weight", "model.layers.76.self_attn.v_proj.weight": "llamaforcausallm__model__layers__76__self_attn__v_proj__weight", "model.layers.76.self_attn.o_proj.weight": "llamaforcausallm__model__layers__76__self_attn__o_proj__weight", "model.layers.76.mlp.gate_proj.weight": "llamaforcausallm__model__layers__76__mlp__gate_proj__weight", "model.layers.76.mlp.up_proj.weight": "llamaforcausallm__model__layers__76__mlp__up_proj__weight", "model.layers.76.mlp.down_proj.weight": "llamaforcausallm__model__layers__76__mlp__down_proj__weight", "model.layers.76.input_layernorm.weight": "llamaforcausallm__model__layers__76__input_layernorm__weight", "model.layers.76.post_attention_layernorm.weight": "llamaforcausallm__model__layers__76__post_attention_layernorm__weight", "model.layers.77.self_attn.q_proj.weight": "llamaforcausallm__model__layers__77__self_attn__q_proj__weight", "model.layers.77.self_attn.k_proj.weight": "llamaforcausallm__model__layers__77__self_attn__k_proj__weight", "model.layers.77.self_attn.v_proj.weight": "llamaforcausallm__model__layers__77__self_attn__v_proj__weight", "model.layers.77.self_attn.o_proj.weight": "llamaforcausallm__model__layers__77__self_attn__o_proj__weight", "model.layers.77.mlp.gate_proj.weight": "llamaforcausallm__model__layers__77__mlp__gate_proj__weight", "model.layers.77.mlp.up_proj.weight": "llamaforcausallm__model__layers__77__mlp__up_proj__weight", "model.layers.77.mlp.down_proj.weight": "llamaforcausallm__model__layers__77__mlp__down_proj__weight", "model.layers.77.input_layernorm.weight": "llamaforcausallm__model__layers__77__input_layernorm__weight", "model.layers.77.post_attention_layernorm.weight": "llamaforcausallm__model__layers__77__post_attention_layernorm__weight", "model.layers.78.self_attn.q_proj.weight": "llamaforcausallm__model__layers__78__self_attn__q_proj__weight", "model.layers.78.self_attn.k_proj.weight": "llamaforcausallm__model__layers__78__self_attn__k_proj__weight", "model.layers.78.self_attn.v_proj.weight": "llamaforcausallm__model__layers__78__self_attn__v_proj__weight", "model.layers.78.self_attn.o_proj.weight": "llamaforcausallm__model__layers__78__self_attn__o_proj__weight", "model.layers.78.mlp.gate_proj.weight": "llamaforcausallm__model__layers__78__mlp__gate_proj__weight", "model.layers.78.mlp.up_proj.weight": "llamaforcausallm__model__layers__78__mlp__up_proj__weight", "model.layers.78.mlp.down_proj.weight": "llamaforcausallm__model__layers__78__mlp__down_proj__weight", "model.layers.78.input_layernorm.weight": "llamaforcausallm__model__layers__78__input_layernorm__weight", "model.layers.78.post_attention_layernorm.weight": "llamaforcausallm__model__layers__78__post_attention_layernorm__weight", "model.layers.79.self_attn.q_proj.weight": "llamaforcausallm__model__layers__79__self_attn__q_proj__weight", "model.layers.79.self_attn.k_proj.weight": "llamaforcausallm__model__layers__79__self_attn__k_proj__weight", "model.layers.79.self_attn.v_proj.weight": "llamaforcausallm__model__layers__79__self_attn__v_proj__weight", "model.layers.79.self_attn.o_proj.weight": "llamaforcausallm__model__layers__79__self_attn__o_proj__weight", "model.layers.79.mlp.gate_proj.weight": "llamaforcausallm__model__layers__79__mlp__gate_proj__weight", "model.layers.79.mlp.up_proj.weight": "llamaforcausallm__model__layers__79__mlp__up_proj__weight", "model.layers.79.mlp.down_proj.weight": "llamaforcausallm__model__layers__79__mlp__down_proj__weight", "model.layers.79.input_layernorm.weight": "llamaforcausallm__model__layers__79__input_layernorm__weight", "model.layers.79.post_attention_layernorm.weight": "llamaforcausallm__model__layers__79__post_attention_layernorm__weight", "model.norm.weight": "llamaforcausallm__model__norm__weight", "lm_head.weight": "llamaforcausallm__lm_head__weight"}
special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "unk_token": "<unk>"
5
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e358066070e28277acbdf4ff778b22f285ba9b61a92668e7f54307882967ec1
3
+ size 986150
tokenizer_config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": true,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "additional_special_tokens": [],
31
+ "bos_token": "<s>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "legacy": true,
35
+ "model_max_length": 1000000000000000019884624838656,
36
+ "pad_token": null,
37
+ "sp_model_kwargs": {},
38
+ "spaces_between_special_tokens": false,
39
+ "tokenizer_class": "LlamaTokenizer",
40
+ "unk_token": "<unk>",
41
+ "use_default_system_prompt": true
42
+ }