|
{ |
|
"module": "keras_nlp.src.models.llama3.llama3_backbone", |
|
"class_name": "Llama3Backbone", |
|
"config": { |
|
"name": "llama_backbone", |
|
"trainable": true, |
|
"dtype": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "DTypePolicyMap", |
|
"config": { |
|
"default_policy": null, |
|
"policy_map": { |
|
"token_embedding": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_0/feedforward_output_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_0/feedforward_gate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_0/feedforward_intermediate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_0/self_attention/attention_output": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_0/self_attention/value": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_0/self_attention/key": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_0/self_attention/query": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_1/feedforward_output_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_1/feedforward_gate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_1/feedforward_intermediate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_1/self_attention/attention_output": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_1/self_attention/value": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_1/self_attention/key": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_1/self_attention/query": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_2/feedforward_output_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_2/feedforward_gate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_2/feedforward_intermediate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_2/self_attention/attention_output": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_2/self_attention/value": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_2/self_attention/key": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_2/self_attention/query": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_3/feedforward_output_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_3/feedforward_gate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_3/feedforward_intermediate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_3/self_attention/attention_output": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_3/self_attention/value": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_3/self_attention/key": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_3/self_attention/query": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_4/feedforward_output_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_4/feedforward_gate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_4/feedforward_intermediate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_4/self_attention/attention_output": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_4/self_attention/value": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_4/self_attention/key": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_4/self_attention/query": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_5/feedforward_output_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_5/feedforward_gate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_5/feedforward_intermediate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_5/self_attention/attention_output": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_5/self_attention/value": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_5/self_attention/key": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_5/self_attention/query": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_6/feedforward_output_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_6/feedforward_gate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_6/feedforward_intermediate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_6/self_attention/attention_output": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_6/self_attention/value": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_6/self_attention/key": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_6/self_attention/query": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_7/feedforward_output_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_7/feedforward_gate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_7/feedforward_intermediate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_7/self_attention/attention_output": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_7/self_attention/value": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_7/self_attention/key": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_7/self_attention/query": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_8/feedforward_output_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_8/feedforward_gate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_8/feedforward_intermediate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_8/self_attention/attention_output": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_8/self_attention/value": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_8/self_attention/key": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_8/self_attention/query": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_9/feedforward_output_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_9/feedforward_gate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_9/feedforward_intermediate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_9/self_attention/attention_output": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_9/self_attention/value": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_9/self_attention/key": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_9/self_attention/query": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_10/feedforward_output_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_10/feedforward_gate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_10/feedforward_intermediate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_10/self_attention/attention_output": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_10/self_attention/value": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_10/self_attention/key": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_10/self_attention/query": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_11/feedforward_output_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_11/feedforward_gate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_11/feedforward_intermediate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_11/self_attention/attention_output": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_11/self_attention/value": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_11/self_attention/key": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_11/self_attention/query": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_12/feedforward_output_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_12/feedforward_gate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_12/feedforward_intermediate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_12/self_attention/attention_output": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_12/self_attention/value": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_12/self_attention/key": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_12/self_attention/query": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_13/feedforward_output_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_13/feedforward_gate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_13/feedforward_intermediate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_13/self_attention/attention_output": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_13/self_attention/value": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_13/self_attention/key": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_13/self_attention/query": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_14/feedforward_output_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_14/feedforward_gate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_14/feedforward_intermediate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_14/self_attention/attention_output": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_14/self_attention/value": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_14/self_attention/key": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_14/self_attention/query": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_15/feedforward_output_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_15/feedforward_gate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_15/feedforward_intermediate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_15/self_attention/attention_output": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_15/self_attention/value": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_15/self_attention/key": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_15/self_attention/query": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_16/feedforward_output_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_16/feedforward_gate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_16/feedforward_intermediate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_16/self_attention/attention_output": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_16/self_attention/value": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_16/self_attention/key": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_16/self_attention/query": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_17/feedforward_output_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_17/feedforward_gate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_17/feedforward_intermediate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_17/self_attention/attention_output": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_17/self_attention/value": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_17/self_attention/key": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_17/self_attention/query": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_18/feedforward_output_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_18/feedforward_gate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_18/feedforward_intermediate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_18/self_attention/attention_output": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_18/self_attention/value": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_18/self_attention/key": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_18/self_attention/query": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_19/feedforward_output_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_19/feedforward_gate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_19/feedforward_intermediate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_19/self_attention/attention_output": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_19/self_attention/value": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_19/self_attention/key": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_19/self_attention/query": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_20/feedforward_output_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_20/feedforward_gate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_20/feedforward_intermediate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_20/self_attention/attention_output": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_20/self_attention/value": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_20/self_attention/key": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_20/self_attention/query": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_21/feedforward_output_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_21/feedforward_gate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_21/feedforward_intermediate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_21/self_attention/attention_output": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_21/self_attention/value": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_21/self_attention/key": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_21/self_attention/query": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_22/feedforward_output_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_22/feedforward_gate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_22/feedforward_intermediate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_22/self_attention/attention_output": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_22/self_attention/value": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_22/self_attention/key": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_22/self_attention/query": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_23/feedforward_output_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_23/feedforward_gate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_23/feedforward_intermediate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_23/self_attention/attention_output": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_23/self_attention/value": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_23/self_attention/key": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_23/self_attention/query": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_24/feedforward_output_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_24/feedforward_gate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_24/feedforward_intermediate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_24/self_attention/attention_output": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_24/self_attention/value": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_24/self_attention/key": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_24/self_attention/query": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_25/feedforward_output_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_25/feedforward_gate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_25/feedforward_intermediate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_25/self_attention/attention_output": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_25/self_attention/value": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_25/self_attention/key": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_25/self_attention/query": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_26/feedforward_output_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_26/feedforward_gate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_26/feedforward_intermediate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_26/self_attention/attention_output": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_26/self_attention/value": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_26/self_attention/key": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_26/self_attention/query": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_27/feedforward_output_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_27/feedforward_gate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_27/feedforward_intermediate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_27/self_attention/attention_output": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_27/self_attention/value": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_27/self_attention/key": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_27/self_attention/query": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_28/feedforward_output_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_28/feedforward_gate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_28/feedforward_intermediate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_28/self_attention/attention_output": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_28/self_attention/value": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_28/self_attention/key": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_28/self_attention/query": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_29/feedforward_output_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_29/feedforward_gate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_29/feedforward_intermediate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_29/self_attention/attention_output": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_29/self_attention/value": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_29/self_attention/key": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_29/self_attention/query": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_30/feedforward_output_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_30/feedforward_gate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_30/feedforward_intermediate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_30/self_attention/attention_output": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_30/self_attention/value": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_30/self_attention/key": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_30/self_attention/query": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_31/feedforward_output_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_31/feedforward_gate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_31/feedforward_intermediate_dense": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_31/self_attention/attention_output": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_31/self_attention/value": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_31/self_attention/key": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
}, |
|
"transformer_layer_31/self_attention/query": { |
|
"module": "keras.dtype_policies", |
|
"class_name": "QuantizedDTypePolicy", |
|
"config": { |
|
"mode": "int8", |
|
"source_name": null |
|
}, |
|
"registered_name": null |
|
} |
|
} |
|
}, |
|
"registered_name": null |
|
}, |
|
"vocabulary_size": 128256, |
|
"num_layers": 32, |
|
"num_query_heads": 32, |
|
"hidden_dim": 4096, |
|
"intermediate_dim": 14336, |
|
"rope_max_wavelength": 500000.0, |
|
"rope_scaling_factor": 1.0, |
|
"num_key_value_heads": 8, |
|
"layer_norm_epsilon": 1e-05, |
|
"dropout": 0 |
|
}, |
|
"registered_name": "keras_nlp>Llama3Backbone" |
|
} |