mgoin commited on
Commit
6e5b068
1 Parent(s): 150ad2b

Updated compression_config to quantization_config

Browse files
Files changed (1) hide show
  1. config.json +31 -31
config.json CHANGED
@@ -6,36 +6,6 @@
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
  "bos_token_id": 128000,
9
- "compression_config": {
10
- "config_groups": {
11
- "group_0": {
12
- "input_activations": null,
13
- "output_activations": null,
14
- "targets": [
15
- "Linear"
16
- ],
17
- "weights": {
18
- "block_structure": null,
19
- "dynamic": false,
20
- "group_size": null,
21
- "num_bits": 8,
22
- "observer": "minmax",
23
- "observer_kwargs": {},
24
- "strategy": "channel",
25
- "symmetric": true,
26
- "type": "int"
27
- }
28
- }
29
- },
30
- "format": "pack-quantized",
31
- "global_compression_ratio": 1.464683023832259,
32
- "ignore": [
33
- "lm_head"
34
- ],
35
- "kv_cache_scheme": null,
36
- "quant_method": "compressed-tensors",
37
- "quantization_status": "frozen"
38
- },
39
  "eos_token_id": [
40
  128001,
41
  128008,
@@ -65,5 +35,35 @@
65
  "torch_dtype": "bfloat16",
66
  "transformers_version": "4.44.0",
67
  "use_cache": true,
68
- "vocab_size": 128256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  }
 
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
  "bos_token_id": 128000,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  "eos_token_id": [
10
  128001,
11
  128008,
 
35
  "torch_dtype": "bfloat16",
36
  "transformers_version": "4.44.0",
37
  "use_cache": true,
38
+ "vocab_size": 128256,
39
+ "quantization_config": {
40
+ "config_groups": {
41
+ "group_0": {
42
+ "input_activations": null,
43
+ "output_activations": null,
44
+ "targets": [
45
+ "Linear"
46
+ ],
47
+ "weights": {
48
+ "block_structure": null,
49
+ "dynamic": false,
50
+ "group_size": null,
51
+ "num_bits": 8,
52
+ "observer": "minmax",
53
+ "observer_kwargs": {},
54
+ "strategy": "channel",
55
+ "symmetric": true,
56
+ "type": "int"
57
+ }
58
+ }
59
+ },
60
+ "format": "pack-quantized",
61
+ "global_compression_ratio": 1.464683023832259,
62
+ "ignore": [
63
+ "lm_head"
64
+ ],
65
+ "kv_cache_scheme": null,
66
+ "quant_method": "compressed-tensors",
67
+ "quantization_status": "frozen"
68
+ }
69
  }