stan-hua commited on
Commit
c97a362
1 Parent(s): 44b4b40

Push folder to HuggingFace Hub

Browse files
Files changed (2) hide show
  1. config.json +40 -1
  2. recipe.yaml +8 -0
config.json CHANGED
@@ -23,6 +23,45 @@
23
  "num_hidden_layers": 32,
24
  "num_key_value_heads": 8,
25
  "pretraining_tp": 1,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  "rms_norm_eps": 1e-05,
27
  "rope_scaling": {
28
  "factor": 8.0,
@@ -37,4 +76,4 @@
37
  "transformers_version": "4.45.2",
38
  "use_cache": true,
39
  "vocab_size": 128256
40
- }
 
23
  "num_hidden_layers": 32,
24
  "num_key_value_heads": 8,
25
  "pretraining_tp": 1,
26
+ "quantization_config": {
27
+ "config_groups": {
28
+ "group_0": {
29
+ "input_activations": null,
30
+ "output_activations": null,
31
+ "targets": [
32
+ "Linear"
33
+ ],
34
+ "weights": {
35
+ "actorder": null,
36
+ "block_structure": null,
37
+ "dynamic": false,
38
+ "group_size": 128,
39
+ "num_bits": 4,
40
+ "observer": "minmax",
41
+ "observer_kwargs": {},
42
+ "strategy": "group",
43
+ "symmetric": true,
44
+ "type": "int"
45
+ }
46
+ }
47
+ },
48
+ "format": "pack-quantized",
49
+ "global_compression_ratio": 1.8917232374233346,
50
+ "ignore": [
51
+ "lm_head"
52
+ ],
53
+ "kv_cache_scheme": null,
54
+ "quant_method": "compressed-tensors",
55
+ "quantization_status": "compressed",
56
+ "sparsity_config": {
57
+ "format": "dense",
58
+ "global_sparsity": 0.14809091252120618,
59
+ "ignore": null,
60
+ "registry_requires_subclass": false,
61
+ "sparsity_structure": "unstructured",
62
+ "targets": null
63
+ }
64
+ },
65
  "rms_norm_eps": 1e-05,
66
  "rope_scaling": {
67
  "factor": 8.0,
 
76
  "transformers_version": "4.45.2",
77
  "use_cache": true,
78
  "vocab_size": 128256
79
+ }
recipe.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ DEFAULT_stage:
2
+ DEFAULT_modifiers:
3
+ SmoothQuantModifier: {smoothing_strength: 0.8}
4
+ GPTQModifier:
5
+ targets: Linear
6
+ dampening_frac: 0.01
7
+ ignore: [lm_head]
8
+ scheme: W4A16