stan-hua commited on
Commit
8818d39
1 Parent(s): bff4757

Push folder to HuggingFace Hub

Browse files
Files changed (4) hide show
  1. config.json +32 -1
  2. recipe.yaml +7 -0
  3. special_tokens_map.json +2 -1
  4. tokenizer_config.json +1 -0
config.json CHANGED
@@ -23,6 +23,37 @@
23
  "num_hidden_layers": 32,
24
  "num_key_value_heads": 8,
25
  "pretraining_tp": 1,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  "rms_norm_eps": 1e-05,
27
  "rope_scaling": {
28
  "factor": 8.0,
@@ -37,4 +68,4 @@
37
  "transformers_version": "4.45.2",
38
  "use_cache": true,
39
  "vocab_size": 128256
40
- }
 
23
  "num_hidden_layers": 32,
24
  "num_key_value_heads": 8,
25
  "pretraining_tp": 1,
26
+ "quantization_config": {
27
+ "config_groups": {
28
+ "group_0": {
29
+ "input_activations": null,
30
+ "output_activations": null,
31
+ "targets": [
32
+ "Linear"
33
+ ],
34
+ "weights": {
35
+ "actorder": null,
36
+ "block_structure": null,
37
+ "dynamic": false,
38
+ "group_size": 128,
39
+ "num_bits": 4,
40
+ "observer": "minmax",
41
+ "observer_kwargs": {},
42
+ "strategy": "group",
43
+ "symmetric": true,
44
+ "type": "int"
45
+ }
46
+ }
47
+ },
48
+ "format": "pack-quantized",
49
+ "global_compression_ratio": 1.8917232374233346,
50
+ "ignore": [
51
+ "lm_head"
52
+ ],
53
+ "kv_cache_scheme": null,
54
+ "quant_method": "compressed-tensors",
55
+ "quantization_status": "compressed"
56
+ },
57
  "rms_norm_eps": 1e-05,
58
  "rope_scaling": {
59
  "factor": 8.0,
 
68
  "transformers_version": "4.45.2",
69
  "use_cache": true,
70
  "vocab_size": 128256
71
+ }
recipe.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ DEFAULT_stage:
2
+ DEFAULT_modifiers:
3
+ SmoothQuantModifier: {smoothing_strength: 0.8}
4
+ QuantizationModifier:
5
+ ignore: [lm_head]
6
+ targets: Linear
7
+ scheme: W4A16
special_tokens_map.json CHANGED
@@ -12,5 +12,6 @@
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
- }
 
16
  }
 
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
+ },
16
+ "pad_token": "<|eot_id|>"
17
  }
tokenizer_config.json CHANGED
@@ -2058,5 +2058,6 @@
2058
  "attention_mask"
2059
  ],
2060
  "model_max_length": 131072,
 
2061
  "tokenizer_class": "PreTrainedTokenizerFast"
2062
  }
 
2058
  "attention_mask"
2059
  ],
2060
  "model_max_length": 131072,
2061
+ "pad_token": "<|eot_id|>",
2062
  "tokenizer_class": "PreTrainedTokenizerFast"
2063
  }