feihu.hf commited on
Commit
80ba958
1 Parent(s): 3971207

update weights

Browse files
config.json CHANGED
@@ -8,7 +8,7 @@
8
  "hidden_act": "silu",
9
  "hidden_size": 5120,
10
  "initializer_range": 0.02,
11
- "intermediate_size": 13696,
12
  "max_position_embeddings": 32768,
13
  "max_window_layers": 35,
14
  "model_type": "qwen2",
 
8
  "hidden_act": "silu",
9
  "hidden_size": 5120,
10
  "initializer_range": 0.02,
11
+ "intermediate_size": 14336,
12
  "max_position_embeddings": 32768,
13
  "max_window_layers": 35,
14
  "model_type": "qwen2",
model-00001-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a8a5d3acefb3f5b44d0cb74e83f10e45f40ad932dd1dcd63f5e9dc184d267ab
3
- size 3996880360
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1feec0077fd4055e9fb6a5db24240a81ac41ff117cda1a67670a6149c3791f6
3
+ size 3995509184
model-00002-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74b932e8d51140fa28a5105ecc01f0cf23aa3838ce87ff4d9dc6b0f5cd0eb364
3
- size 3946910664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4479a9d40667ba0fbcd6edba3a18dc552e5550b59fe438721b260ece78abfae1
3
+ size 3995886232
model-00003-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c18a21157df97466eb43e2d68e63ddf785a0ce9acc4c5c082830da298cb26679
3
- size 4000006064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41c4b900993032a3f94adc2f20a593bf3884bfc7b089a7061137e52f20cf120d
3
+ size 3995886344
model-00004-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fbd61adb26287b891a6247d921b62f1e59ca9405372c6b6a965b947f28331a58
3
- size 2530332544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d6701d5280d5764aa071fae569cc7535b563c7b5efa6cdd81964a90abf78008
3
+ size 2889484712
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 16031098880
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00005-of-00005.safetensors",
@@ -413,16 +413,16 @@
413
  "model.layers.18.self_attn.v_proj.qzeros": "model-00002-of-00005.safetensors",
414
  "model.layers.18.self_attn.v_proj.scales": "model-00002-of-00005.safetensors",
415
  "model.layers.19.input_layernorm.weight": "model-00003-of-00005.safetensors",
416
- "model.layers.19.mlp.down_proj.bias": "model-00002-of-00005.safetensors",
417
- "model.layers.19.mlp.down_proj.g_idx": "model-00002-of-00005.safetensors",
418
- "model.layers.19.mlp.down_proj.qweight": "model-00002-of-00005.safetensors",
419
- "model.layers.19.mlp.down_proj.qzeros": "model-00002-of-00005.safetensors",
420
- "model.layers.19.mlp.down_proj.scales": "model-00002-of-00005.safetensors",
421
- "model.layers.19.mlp.gate_proj.bias": "model-00002-of-00005.safetensors",
422
- "model.layers.19.mlp.gate_proj.g_idx": "model-00002-of-00005.safetensors",
423
- "model.layers.19.mlp.gate_proj.qweight": "model-00002-of-00005.safetensors",
424
- "model.layers.19.mlp.gate_proj.qzeros": "model-00002-of-00005.safetensors",
425
- "model.layers.19.mlp.gate_proj.scales": "model-00002-of-00005.safetensors",
426
  "model.layers.19.mlp.up_proj.bias": "model-00003-of-00005.safetensors",
427
  "model.layers.19.mlp.up_proj.g_idx": "model-00003-of-00005.safetensors",
428
  "model.layers.19.mlp.up_proj.qweight": "model-00003-of-00005.safetensors",
@@ -930,23 +930,23 @@
930
  "model.layers.30.self_attn.v_proj.qweight": "model-00003-of-00005.safetensors",
931
  "model.layers.30.self_attn.v_proj.qzeros": "model-00003-of-00005.safetensors",
932
  "model.layers.30.self_attn.v_proj.scales": "model-00003-of-00005.safetensors",
933
- "model.layers.31.input_layernorm.weight": "model-00003-of-00005.safetensors",
934
- "model.layers.31.mlp.down_proj.bias": "model-00003-of-00005.safetensors",
935
- "model.layers.31.mlp.down_proj.g_idx": "model-00003-of-00005.safetensors",
936
- "model.layers.31.mlp.down_proj.qweight": "model-00003-of-00005.safetensors",
937
- "model.layers.31.mlp.down_proj.qzeros": "model-00003-of-00005.safetensors",
938
- "model.layers.31.mlp.down_proj.scales": "model-00003-of-00005.safetensors",
939
- "model.layers.31.mlp.gate_proj.bias": "model-00003-of-00005.safetensors",
940
- "model.layers.31.mlp.gate_proj.g_idx": "model-00003-of-00005.safetensors",
941
- "model.layers.31.mlp.gate_proj.qweight": "model-00003-of-00005.safetensors",
942
- "model.layers.31.mlp.gate_proj.qzeros": "model-00003-of-00005.safetensors",
943
- "model.layers.31.mlp.gate_proj.scales": "model-00003-of-00005.safetensors",
944
- "model.layers.31.mlp.up_proj.bias": "model-00003-of-00005.safetensors",
945
- "model.layers.31.mlp.up_proj.g_idx": "model-00003-of-00005.safetensors",
946
- "model.layers.31.mlp.up_proj.qweight": "model-00003-of-00005.safetensors",
947
- "model.layers.31.mlp.up_proj.qzeros": "model-00003-of-00005.safetensors",
948
- "model.layers.31.mlp.up_proj.scales": "model-00003-of-00005.safetensors",
949
- "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
950
  "model.layers.31.self_attn.k_proj.bias": "model-00003-of-00005.safetensors",
951
  "model.layers.31.self_attn.k_proj.g_idx": "model-00003-of-00005.safetensors",
952
  "model.layers.31.self_attn.k_proj.qweight": "model-00003-of-00005.safetensors",
@@ -984,14 +984,14 @@
984
  "model.layers.32.mlp.up_proj.qzeros": "model-00004-of-00005.safetensors",
985
  "model.layers.32.mlp.up_proj.scales": "model-00004-of-00005.safetensors",
986
  "model.layers.32.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
987
- "model.layers.32.self_attn.k_proj.bias": "model-00003-of-00005.safetensors",
988
- "model.layers.32.self_attn.k_proj.g_idx": "model-00003-of-00005.safetensors",
989
- "model.layers.32.self_attn.k_proj.qweight": "model-00003-of-00005.safetensors",
990
- "model.layers.32.self_attn.k_proj.qzeros": "model-00003-of-00005.safetensors",
991
- "model.layers.32.self_attn.k_proj.scales": "model-00003-of-00005.safetensors",
992
  "model.layers.32.self_attn.o_proj.bias": "model-00004-of-00005.safetensors",
993
  "model.layers.32.self_attn.o_proj.g_idx": "model-00004-of-00005.safetensors",
994
- "model.layers.32.self_attn.o_proj.qweight": "model-00003-of-00005.safetensors",
995
  "model.layers.32.self_attn.o_proj.qzeros": "model-00004-of-00005.safetensors",
996
  "model.layers.32.self_attn.o_proj.scales": "model-00004-of-00005.safetensors",
997
  "model.layers.32.self_attn.q_proj.bias": "model-00004-of-00005.safetensors",
@@ -1375,11 +1375,11 @@
1375
  "model.layers.6.self_attn.v_proj.qzeros": "model-00001-of-00005.safetensors",
1376
  "model.layers.6.self_attn.v_proj.scales": "model-00001-of-00005.safetensors",
1377
  "model.layers.7.input_layernorm.weight": "model-00002-of-00005.safetensors",
1378
- "model.layers.7.mlp.down_proj.bias": "model-00001-of-00005.safetensors",
1379
- "model.layers.7.mlp.down_proj.g_idx": "model-00001-of-00005.safetensors",
1380
- "model.layers.7.mlp.down_proj.qweight": "model-00001-of-00005.safetensors",
1381
- "model.layers.7.mlp.down_proj.qzeros": "model-00001-of-00005.safetensors",
1382
- "model.layers.7.mlp.down_proj.scales": "model-00001-of-00005.safetensors",
1383
  "model.layers.7.mlp.gate_proj.bias": "model-00002-of-00005.safetensors",
1384
  "model.layers.7.mlp.gate_proj.g_idx": "model-00002-of-00005.safetensors",
1385
  "model.layers.7.mlp.gate_proj.qweight": "model-00002-of-00005.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 16433735680
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00005-of-00005.safetensors",
 
413
  "model.layers.18.self_attn.v_proj.qzeros": "model-00002-of-00005.safetensors",
414
  "model.layers.18.self_attn.v_proj.scales": "model-00002-of-00005.safetensors",
415
  "model.layers.19.input_layernorm.weight": "model-00003-of-00005.safetensors",
416
+ "model.layers.19.mlp.down_proj.bias": "model-00003-of-00005.safetensors",
417
+ "model.layers.19.mlp.down_proj.g_idx": "model-00003-of-00005.safetensors",
418
+ "model.layers.19.mlp.down_proj.qweight": "model-00003-of-00005.safetensors",
419
+ "model.layers.19.mlp.down_proj.qzeros": "model-00003-of-00005.safetensors",
420
+ "model.layers.19.mlp.down_proj.scales": "model-00003-of-00005.safetensors",
421
+ "model.layers.19.mlp.gate_proj.bias": "model-00003-of-00005.safetensors",
422
+ "model.layers.19.mlp.gate_proj.g_idx": "model-00003-of-00005.safetensors",
423
+ "model.layers.19.mlp.gate_proj.qweight": "model-00003-of-00005.safetensors",
424
+ "model.layers.19.mlp.gate_proj.qzeros": "model-00003-of-00005.safetensors",
425
+ "model.layers.19.mlp.gate_proj.scales": "model-00003-of-00005.safetensors",
426
  "model.layers.19.mlp.up_proj.bias": "model-00003-of-00005.safetensors",
427
  "model.layers.19.mlp.up_proj.g_idx": "model-00003-of-00005.safetensors",
428
  "model.layers.19.mlp.up_proj.qweight": "model-00003-of-00005.safetensors",
 
930
  "model.layers.30.self_attn.v_proj.qweight": "model-00003-of-00005.safetensors",
931
  "model.layers.30.self_attn.v_proj.qzeros": "model-00003-of-00005.safetensors",
932
  "model.layers.30.self_attn.v_proj.scales": "model-00003-of-00005.safetensors",
933
+ "model.layers.31.input_layernorm.weight": "model-00004-of-00005.safetensors",
934
+ "model.layers.31.mlp.down_proj.bias": "model-00004-of-00005.safetensors",
935
+ "model.layers.31.mlp.down_proj.g_idx": "model-00004-of-00005.safetensors",
936
+ "model.layers.31.mlp.down_proj.qweight": "model-00004-of-00005.safetensors",
937
+ "model.layers.31.mlp.down_proj.qzeros": "model-00004-of-00005.safetensors",
938
+ "model.layers.31.mlp.down_proj.scales": "model-00004-of-00005.safetensors",
939
+ "model.layers.31.mlp.gate_proj.bias": "model-00004-of-00005.safetensors",
940
+ "model.layers.31.mlp.gate_proj.g_idx": "model-00004-of-00005.safetensors",
941
+ "model.layers.31.mlp.gate_proj.qweight": "model-00004-of-00005.safetensors",
942
+ "model.layers.31.mlp.gate_proj.qzeros": "model-00004-of-00005.safetensors",
943
+ "model.layers.31.mlp.gate_proj.scales": "model-00004-of-00005.safetensors",
944
+ "model.layers.31.mlp.up_proj.bias": "model-00004-of-00005.safetensors",
945
+ "model.layers.31.mlp.up_proj.g_idx": "model-00004-of-00005.safetensors",
946
+ "model.layers.31.mlp.up_proj.qweight": "model-00004-of-00005.safetensors",
947
+ "model.layers.31.mlp.up_proj.qzeros": "model-00004-of-00005.safetensors",
948
+ "model.layers.31.mlp.up_proj.scales": "model-00004-of-00005.safetensors",
949
+ "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
950
  "model.layers.31.self_attn.k_proj.bias": "model-00003-of-00005.safetensors",
951
  "model.layers.31.self_attn.k_proj.g_idx": "model-00003-of-00005.safetensors",
952
  "model.layers.31.self_attn.k_proj.qweight": "model-00003-of-00005.safetensors",
 
984
  "model.layers.32.mlp.up_proj.qzeros": "model-00004-of-00005.safetensors",
985
  "model.layers.32.mlp.up_proj.scales": "model-00004-of-00005.safetensors",
986
  "model.layers.32.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
987
+ "model.layers.32.self_attn.k_proj.bias": "model-00004-of-00005.safetensors",
988
+ "model.layers.32.self_attn.k_proj.g_idx": "model-00004-of-00005.safetensors",
989
+ "model.layers.32.self_attn.k_proj.qweight": "model-00004-of-00005.safetensors",
990
+ "model.layers.32.self_attn.k_proj.qzeros": "model-00004-of-00005.safetensors",
991
+ "model.layers.32.self_attn.k_proj.scales": "model-00004-of-00005.safetensors",
992
  "model.layers.32.self_attn.o_proj.bias": "model-00004-of-00005.safetensors",
993
  "model.layers.32.self_attn.o_proj.g_idx": "model-00004-of-00005.safetensors",
994
+ "model.layers.32.self_attn.o_proj.qweight": "model-00004-of-00005.safetensors",
995
  "model.layers.32.self_attn.o_proj.qzeros": "model-00004-of-00005.safetensors",
996
  "model.layers.32.self_attn.o_proj.scales": "model-00004-of-00005.safetensors",
997
  "model.layers.32.self_attn.q_proj.bias": "model-00004-of-00005.safetensors",
 
1375
  "model.layers.6.self_attn.v_proj.qzeros": "model-00001-of-00005.safetensors",
1376
  "model.layers.6.self_attn.v_proj.scales": "model-00001-of-00005.safetensors",
1377
  "model.layers.7.input_layernorm.weight": "model-00002-of-00005.safetensors",
1378
+ "model.layers.7.mlp.down_proj.bias": "model-00002-of-00005.safetensors",
1379
+ "model.layers.7.mlp.down_proj.g_idx": "model-00002-of-00005.safetensors",
1380
+ "model.layers.7.mlp.down_proj.qweight": "model-00002-of-00005.safetensors",
1381
+ "model.layers.7.mlp.down_proj.qzeros": "model-00002-of-00005.safetensors",
1382
+ "model.layers.7.mlp.down_proj.scales": "model-00002-of-00005.safetensors",
1383
  "model.layers.7.mlp.gate_proj.bias": "model-00002-of-00005.safetensors",
1384
  "model.layers.7.mlp.gate_proj.g_idx": "model-00002-of-00005.safetensors",
1385
  "model.layers.7.mlp.gate_proj.qweight": "model-00002-of-00005.safetensors",