mgoin commited on
Commit
8670dfa
1 Parent(s): c1fdfad

Updated compression_config to quantization_config

Browse files
Files changed (1) hide show
  1. config.json +37 -37
config.json CHANGED
@@ -10,42 +10,6 @@
10
  "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM"
11
  },
12
  "bos_token_id": 1,
13
- "compression_config": {
14
- "config_groups": {
15
- "group_0": {
16
- "input_activations": null,
17
- "output_activations": null,
18
- "targets": [
19
- "Linear"
20
- ],
21
- "weights": {
22
- "block_structure": null,
23
- "dynamic": false,
24
- "group_size": 128,
25
- "num_bits": 4,
26
- "observer": "minmax",
27
- "observer_kwargs": {},
28
- "strategy": "group",
29
- "symmetric": true,
30
- "type": "int"
31
- }
32
- }
33
- },
34
- "format": "pack-quantized",
35
- "global_compression_ratio": 1.4217800438908097,
36
- "ignore": [
37
- "lm_head"
38
- ],
39
- "kv_cache_scheme": null,
40
- "quant_method": "compressed-tensors",
41
- "quantization_status": "frozen",
42
- "sparsity_config": {
43
- "format": "dense",
44
- "global_sparsity": 14.910563402082037,
45
- "registry_requires_subclass": false,
46
- "sparsity_structure": "unstructured"
47
- }
48
- },
49
  "embd_pdrop": 0.0,
50
  "eos_token_id": 32000,
51
  "hidden_act": "silu",
@@ -202,5 +166,41 @@
202
  "torch_dtype": "bfloat16",
203
  "transformers_version": "4.42.4",
204
  "use_cache": true,
205
- "vocab_size": 32064
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  }
 
10
  "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM"
11
  },
12
  "bos_token_id": 1,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  "embd_pdrop": 0.0,
14
  "eos_token_id": 32000,
15
  "hidden_act": "silu",
 
166
  "torch_dtype": "bfloat16",
167
  "transformers_version": "4.42.4",
168
  "use_cache": true,
169
+ "vocab_size": 32064,
170
+ "quantization_config": {
171
+ "config_groups": {
172
+ "group_0": {
173
+ "input_activations": null,
174
+ "output_activations": null,
175
+ "targets": [
176
+ "Linear"
177
+ ],
178
+ "weights": {
179
+ "block_structure": null,
180
+ "dynamic": false,
181
+ "group_size": 128,
182
+ "num_bits": 4,
183
+ "observer": "minmax",
184
+ "observer_kwargs": {},
185
+ "strategy": "group",
186
+ "symmetric": true,
187
+ "type": "int"
188
+ }
189
+ }
190
+ },
191
+ "format": "pack-quantized",
192
+ "global_compression_ratio": 1.4217800438908097,
193
+ "ignore": [
194
+ "lm_head"
195
+ ],
196
+ "kv_cache_scheme": null,
197
+ "quant_method": "compressed-tensors",
198
+ "quantization_status": "frozen",
199
+ "sparsity_config": {
200
+ "format": "dense",
201
+ "global_sparsity": 14.910563402082037,
202
+ "registry_requires_subclass": false,
203
+ "sparsity_structure": "unstructured"
204
+ }
205
+ }
206
  }