mgoin commited on
Commit
0ab444b
1 Parent(s): 791a361

Updated compression_config to quantization_config

Browse files
Files changed (1) hide show
  1. config.json +47 -47
config.json CHANGED
@@ -10,52 +10,6 @@
10
  "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM"
11
  },
12
  "bos_token_id": 1,
13
- "compression_config": {
14
- "config_groups": {
15
- "group_0": {
16
- "input_activations": {
17
- "block_structure": null,
18
- "dynamic": true,
19
- "group_size": null,
20
- "num_bits": 8,
21
- "observer": "memoryless",
22
- "observer_kwargs": {},
23
- "strategy": "token",
24
- "symmetric": true,
25
- "type": "int"
26
- },
27
- "output_activations": null,
28
- "targets": [
29
- "Linear"
30
- ],
31
- "weights": {
32
- "block_structure": null,
33
- "dynamic": false,
34
- "group_size": null,
35
- "num_bits": 8,
36
- "observer": "minmax",
37
- "observer_kwargs": {},
38
- "strategy": "channel",
39
- "symmetric": true,
40
- "type": "int"
41
- }
42
- }
43
- },
44
- "format": "int-quantized",
45
- "global_compression_ratio": 1.1649229226070288,
46
- "ignore": [
47
- "lm_head"
48
- ],
49
- "kv_cache_scheme": null,
50
- "quant_method": "compressed-tensors",
51
- "quantization_status": "frozen",
52
- "sparsity_config": {
53
- "format": "dense",
54
- "global_sparsity": 1.2496731708976467,
55
- "registry_requires_subclass": false,
56
- "sparsity_structure": "unstructured"
57
- }
58
- },
59
  "embd_pdrop": 0.0,
60
  "eos_token_id": 32000,
61
  "hidden_act": "silu",
@@ -180,5 +134,51 @@
180
  "torch_dtype": "bfloat16",
181
  "transformers_version": "4.42.3",
182
  "use_cache": true,
183
- "vocab_size": 32064
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  }
 
10
  "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM"
11
  },
12
  "bos_token_id": 1,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  "embd_pdrop": 0.0,
14
  "eos_token_id": 32000,
15
  "hidden_act": "silu",
 
134
  "torch_dtype": "bfloat16",
135
  "transformers_version": "4.42.3",
136
  "use_cache": true,
137
+ "vocab_size": 32064,
138
+ "quantization_config": {
139
+ "config_groups": {
140
+ "group_0": {
141
+ "input_activations": {
142
+ "block_structure": null,
143
+ "dynamic": true,
144
+ "group_size": null,
145
+ "num_bits": 8,
146
+ "observer": "memoryless",
147
+ "observer_kwargs": {},
148
+ "strategy": "token",
149
+ "symmetric": true,
150
+ "type": "int"
151
+ },
152
+ "output_activations": null,
153
+ "targets": [
154
+ "Linear"
155
+ ],
156
+ "weights": {
157
+ "block_structure": null,
158
+ "dynamic": false,
159
+ "group_size": null,
160
+ "num_bits": 8,
161
+ "observer": "minmax",
162
+ "observer_kwargs": {},
163
+ "strategy": "channel",
164
+ "symmetric": true,
165
+ "type": "int"
166
+ }
167
+ }
168
+ },
169
+ "format": "int-quantized",
170
+ "global_compression_ratio": 1.1649229226070288,
171
+ "ignore": [
172
+ "lm_head"
173
+ ],
174
+ "kv_cache_scheme": null,
175
+ "quant_method": "compressed-tensors",
176
+ "quantization_status": "frozen",
177
+ "sparsity_config": {
178
+ "format": "dense",
179
+ "global_sparsity": 1.2496731708976467,
180
+ "registry_requires_subclass": false,
181
+ "sparsity_structure": "unstructured"
182
+ }
183
+ }
184
  }