diff --git "a/TextEncoderT5.mlmodelc/model.mil" "b/TextEncoderT5.mlmodelc/model.mil" new file mode 100644--- /dev/null +++ "b/TextEncoderT5.mlmodelc/model.mil" @@ -0,0 +1,2219 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.2.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.1"}})] +{ + func main(tensor attention_mask, tensor input_ids) { + tensor x_1_axis_0 = const()[name = tensor("x_1_axis_0"), val = tensor(0)]; + tensor x_1_batch_dims_0 = const()[name = tensor("x_1_batch_dims_0"), val = tensor(0)]; + tensor embed_tokens_weight_to_fp16 = const()[name = tensor("embed_tokens_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; + tensor x_1_cast_fp16 = gather(axis = x_1_axis_0, batch_dims = x_1_batch_dims_0, indices = input_ids, x = embed_tokens_weight_to_fp16)[name = tensor("x_1_cast_fp16")]; + tensor var_60_perm_0 = const()[name = tensor("op_60_perm_0"), val = tensor([0, 2, 1])]; + tensor inputs_1_axes_0 = const()[name = tensor("inputs_1_axes_0"), val = tensor([2])]; + tensor transpose_0 = transpose(perm = var_60_perm_0, x = x_1_cast_fp16)[name = tensor("transpose_0")]; + tensor inputs_1_cast_fp16 = expand_dims(axes = inputs_1_axes_0, x = transpose_0)[name = tensor("inputs_1_cast_fp16")]; + tensor var_66 = const()[name = tensor("op_66"), val = tensor(true)]; + tensor var_67 = const()[name = tensor("op_67"), val = tensor(1)]; + tensor var_71 = const()[name = tensor("op_71"), val = tensor(3)]; + tensor var_69_to_fp16 = const()[name = tensor("op_69_to_fp16"), val = tensor(-0x1.b18p+15)]; + tensor var_68_to_fp16 = const()[name = tensor("op_68_to_fp16"), val = tensor(0x1.b18p+15)]; + tensor inputs_3_cast_fp16 = clip(alpha = var_69_to_fp16, beta = var_68_to_fp16, x = inputs_1_cast_fp16)[name = tensor("inputs_3_cast_fp16")]; + tensor inputs_sq_1_cast_fp16 = mul(x = inputs_3_cast_fp16, y = inputs_3_cast_fp16)[name = tensor("inputs_sq_1_cast_fp16")]; + tensor var_88 = const()[name = tensor("op_88"), val = tensor([1])]; + tensor variance_1_cast_fp16 = reduce_mean(axes = var_88, keep_dims = var_66, x = inputs_sq_1_cast_fp16)[name = tensor("variance_1_cast_fp16")]; + tensor var_90_to_fp16 = const()[name = tensor("op_90_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_91_cast_fp16 = add(x = variance_1_cast_fp16, y = var_90_to_fp16)[name = tensor("op_91_cast_fp16")]; + tensor var_92_epsilon_0_to_fp16 = const()[name = tensor("op_92_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_92_cast_fp16 = rsqrt(epsilon = var_92_epsilon_0_to_fp16, x = var_91_cast_fp16)[name = tensor("op_92_cast_fp16")]; + tensor hidden_states_1_cast_fp16 = mul(x = inputs_3_cast_fp16, y = var_92_cast_fp16)[name = tensor("hidden_states_1_cast_fp16")]; + tensor w_1_to_fp16 = const()[name = tensor("w_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263192704)))]; + tensor obj_1_cast_fp16 = mul(x = w_1_to_fp16, y = hidden_states_1_cast_fp16)[name = tensor("obj_1_cast_fp16")]; + tensor var_106 = const()[name = tensor("op_106"), val = tensor([1, 1])]; + tensor var_108 = const()[name = tensor("op_108"), val = tensor([1, 1])]; + tensor query_1_pad_type_0 = const()[name = tensor("query_1_pad_type_0"), val = tensor("custom")]; + tensor query_1_pad_0 = const()[name = tensor("query_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_0_layer_0_SelfAttention_q_proj_weight_to_fp16 = const()[name = tensor("block_0_layer_0_SelfAttention_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263200960)))]; + tensor block_0_layer_0_SelfAttention_q_proj_bias_to_fp16 = const()[name = tensor("block_0_layer_0_SelfAttention_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(296755456)))]; + tensor query_1_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_108, groups = var_67, pad = query_1_pad_0, pad_type = query_1_pad_type_0, strides = var_106, weight = block_0_layer_0_SelfAttention_q_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor("query_1_cast_fp16")]; + tensor var_112 = const()[name = tensor("op_112"), val = tensor([1, 1])]; + tensor var_114 = const()[name = tensor("op_114"), val = tensor([1, 1])]; + tensor key_1_pad_type_0 = const()[name = tensor("key_1_pad_type_0"), val = tensor("custom")]; + tensor key_1_pad_0 = const()[name = tensor("key_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_0_layer_0_SelfAttention_k_proj_weight_to_fp16 = const()[name = tensor("block_0_layer_0_SelfAttention_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(296763712)))]; + tensor key_1_cast_fp16 = conv(dilations = var_114, groups = var_67, pad = key_1_pad_0, pad_type = key_1_pad_type_0, strides = var_112, weight = block_0_layer_0_SelfAttention_k_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor("key_1_cast_fp16")]; + tensor var_119 = const()[name = tensor("op_119"), val = tensor([1, 1])]; + tensor var_121 = const()[name = tensor("op_121"), val = tensor([1, 1])]; + tensor value_1_pad_type_0 = const()[name = tensor("value_1_pad_type_0"), val = tensor("custom")]; + tensor value_1_pad_0 = const()[name = tensor("value_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_0_layer_0_SelfAttention_v_proj_weight_to_fp16 = const()[name = tensor("block_0_layer_0_SelfAttention_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(330318208)))]; + tensor value_1_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_121, groups = var_67, pad = value_1_pad_0, pad_type = value_1_pad_type_0, strides = var_119, weight = block_0_layer_0_SelfAttention_v_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor("value_1_cast_fp16")]; + tensor var_125 = const()[name = tensor("op_125"), val = tensor([1, 64, 64, -1])]; + tensor var_126_cast_fp16 = reshape(shape = var_125, x = query_1_cast_fp16)[name = tensor("op_126_cast_fp16")]; + tensor var_127 = const()[name = tensor("op_127"), val = tensor([1, 64, 64, -1])]; + tensor var_128_cast_fp16 = reshape(shape = var_127, x = key_1_cast_fp16)[name = tensor("op_128_cast_fp16")]; + tensor mh_w_1_transpose_x_0 = const()[name = tensor("mh_w_1_transpose_x_0"), val = tensor(true)]; + tensor mh_w_1_transpose_y_0 = const()[name = tensor("mh_w_1_transpose_y_0"), val = tensor(false)]; + tensor mh_w_1_cast_fp16 = matmul(transpose_x = mh_w_1_transpose_x_0, transpose_y = mh_w_1_transpose_y_0, x = var_126_cast_fp16, y = var_128_cast_fp16)[name = tensor("mh_w_1_cast_fp16")]; + tensor var_132_axes_0 = const()[name = tensor("op_132_axes_0"), val = tensor([1])]; + tensor var_132_cast_fp16 = expand_dims(axes = var_132_axes_0, x = attention_mask)[name = tensor("op_132_cast_fp16")]; + tensor var_133_axes_0 = const()[name = tensor("op_133_axes_0"), val = tensor([2])]; + tensor var_133_cast_fp16 = expand_dims(axes = var_133_axes_0, x = var_132_cast_fp16)[name = tensor("op_133_cast_fp16")]; + tensor mh_w_3_cast_fp16 = add(x = mh_w_1_cast_fp16, y = var_133_cast_fp16)[name = tensor("mh_w_3_cast_fp16")]; + tensor relative_attention_bias_to_fp16 = const()[name = tensor("relative_attention_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(363872704)))]; + tensor mh_w_5_cast_fp16 = add(x = mh_w_3_cast_fp16, y = relative_attention_bias_to_fp16)[name = tensor("mh_w_5_cast_fp16")]; + tensor var_137_cast_fp16 = softmax(axis = var_71, x = mh_w_5_cast_fp16)[name = tensor("op_137_cast_fp16")]; + tensor var_138 = const()[name = tensor("op_138"), val = tensor([1, 64, 64, -1])]; + tensor var_139_cast_fp16 = reshape(shape = var_138, x = value_1_cast_fp16)[name = tensor("op_139_cast_fp16")]; + tensor attn_1_transpose_x_0 = const()[name = tensor("attn_1_transpose_x_0"), val = tensor(false)]; + tensor attn_1_transpose_y_0 = const()[name = tensor("attn_1_transpose_y_0"), val = tensor(true)]; + tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = var_139_cast_fp16, y = var_137_cast_fp16)[name = tensor("attn_1_cast_fp16")]; + tensor var_142 = const()[name = tensor("op_142"), val = tensor([1, 4096, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_142, x = attn_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_146 = const()[name = tensor("op_146"), val = tensor([1, 1])]; + tensor var_148 = const()[name = tensor("op_148"), val = tensor([1, 1])]; + tensor obj_5_pad_type_0 = const()[name = tensor("obj_5_pad_type_0"), val = tensor("custom")]; + tensor obj_5_pad_0 = const()[name = tensor("obj_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_0_layer_0_SelfAttention_o_proj_weight_to_fp16 = const()[name = tensor("block_0_layer_0_SelfAttention_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(364631680)))]; + tensor obj_5_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_148, groups = var_67, pad = obj_5_pad_0, pad_type = obj_5_pad_type_0, strides = var_146, weight = block_0_layer_0_SelfAttention_o_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("obj_5_cast_fp16")]; + tensor inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = obj_5_cast_fp16)[name = tensor("inputs_5_cast_fp16")]; + tensor inputs_7_cast_fp16 = clip(alpha = var_69_to_fp16, beta = var_68_to_fp16, x = inputs_5_cast_fp16)[name = tensor("inputs_7_cast_fp16")]; + tensor inputs_sq_3_cast_fp16 = mul(x = inputs_7_cast_fp16, y = inputs_7_cast_fp16)[name = tensor("inputs_sq_3_cast_fp16")]; + tensor var_157 = const()[name = tensor("op_157"), val = tensor([1])]; + tensor variance_3_cast_fp16 = reduce_mean(axes = var_157, keep_dims = var_66, x = inputs_sq_3_cast_fp16)[name = tensor("variance_3_cast_fp16")]; + tensor var_159_to_fp16 = const()[name = tensor("op_159_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_160_cast_fp16 = add(x = variance_3_cast_fp16, y = var_159_to_fp16)[name = tensor("op_160_cast_fp16")]; + tensor var_161_epsilon_0_to_fp16 = const()[name = tensor("op_161_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_161_cast_fp16 = rsqrt(epsilon = var_161_epsilon_0_to_fp16, x = var_160_cast_fp16)[name = tensor("op_161_cast_fp16")]; + tensor hidden_states_3_cast_fp16 = mul(x = inputs_7_cast_fp16, y = var_161_cast_fp16)[name = tensor("hidden_states_3_cast_fp16")]; + tensor w_3_to_fp16 = const()[name = tensor("w_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(398186176)))]; + tensor input_3_cast_fp16 = mul(x = w_3_to_fp16, y = hidden_states_3_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_174 = const()[name = tensor("op_174"), val = tensor([1, 1])]; + tensor var_176 = const()[name = tensor("op_176"), val = tensor([1, 1])]; + tensor x_3_pad_type_0 = const()[name = tensor("x_3_pad_type_0"), val = tensor("custom")]; + tensor x_3_pad_0 = const()[name = tensor("x_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_0_layer_1_DenseReluDense_wi_0_weight_to_fp16 = const()[name = tensor("block_0_layer_1_DenseReluDense_wi_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(398194432)))]; + tensor block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16 = const()[name = tensor("block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(482080576)))]; + tensor x_3_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_176, groups = var_67, pad = x_3_pad_0, pad_type = x_3_pad_type_0, strides = var_174, weight = block_0_layer_1_DenseReluDense_wi_0_weight_to_fp16, x = input_3_cast_fp16)[name = tensor("x_3_cast_fp16")]; + tensor var_190_mode_0 = const()[name = tensor("op_190_mode_0"), val = tensor("TANH_APPROXIMATION")]; + tensor var_190_cast_fp16 = gelu(mode = var_190_mode_0, x = x_3_cast_fp16)[name = tensor("op_190_cast_fp16")]; + tensor var_193 = const()[name = tensor("op_193"), val = tensor([1, 1])]; + tensor var_195 = const()[name = tensor("op_195"), val = tensor([1, 1])]; + tensor var_197_pad_type_0 = const()[name = tensor("op_197_pad_type_0"), val = tensor("custom")]; + tensor var_197_pad_0 = const()[name = tensor("op_197_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_0_layer_1_DenseReluDense_wi_1_weight_to_fp16 = const()[name = tensor("block_0_layer_1_DenseReluDense_wi_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(482101120)))]; + tensor var_197_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_195, groups = var_67, pad = var_197_pad_0, pad_type = var_197_pad_type_0, strides = var_193, weight = block_0_layer_1_DenseReluDense_wi_1_weight_to_fp16, x = input_3_cast_fp16)[name = tensor("op_197_cast_fp16")]; + tensor input_5_cast_fp16 = mul(x = var_190_cast_fp16, y = var_197_cast_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_201 = const()[name = tensor("op_201"), val = tensor([1, 1])]; + tensor var_203 = const()[name = tensor("op_203"), val = tensor([1, 1])]; + tensor var_205_pad_type_0 = const()[name = tensor("op_205_pad_type_0"), val = tensor("custom")]; + tensor var_205_pad_0 = const()[name = tensor("op_205_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_0_layer_1_DenseReluDense_wo_weight_to_fp16 = const()[name = tensor("block_0_layer_1_DenseReluDense_wo_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(565987264)))]; + tensor var_205_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_203, groups = var_67, pad = var_205_pad_0, pad_type = var_205_pad_type_0, strides = var_201, weight = block_0_layer_1_DenseReluDense_wo_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("op_205_cast_fp16")]; + tensor inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = var_205_cast_fp16)[name = tensor("inputs_9_cast_fp16")]; + tensor var_210 = const()[name = tensor("op_210"), val = tensor(true)]; + tensor var_211 = const()[name = tensor("op_211"), val = tensor(1)]; + tensor var_215 = const()[name = tensor("op_215"), val = tensor(3)]; + tensor var_213_to_fp16 = const()[name = tensor("op_213_to_fp16"), val = tensor(-0x1.b18p+15)]; + tensor var_212_to_fp16 = const()[name = tensor("op_212_to_fp16"), val = tensor(0x1.b18p+15)]; + tensor inputs_11_cast_fp16 = clip(alpha = var_213_to_fp16, beta = var_212_to_fp16, x = inputs_9_cast_fp16)[name = tensor("inputs_11_cast_fp16")]; + tensor inputs_sq_5_cast_fp16 = mul(x = inputs_11_cast_fp16, y = inputs_11_cast_fp16)[name = tensor("inputs_sq_5_cast_fp16")]; + tensor var_232 = const()[name = tensor("op_232"), val = tensor([1])]; + tensor variance_5_cast_fp16 = reduce_mean(axes = var_232, keep_dims = var_210, x = inputs_sq_5_cast_fp16)[name = tensor("variance_5_cast_fp16")]; + tensor var_234_to_fp16 = const()[name = tensor("op_234_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_235_cast_fp16 = add(x = variance_5_cast_fp16, y = var_234_to_fp16)[name = tensor("op_235_cast_fp16")]; + tensor var_236_epsilon_0_to_fp16 = const()[name = tensor("op_236_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_236_cast_fp16 = rsqrt(epsilon = var_236_epsilon_0_to_fp16, x = var_235_cast_fp16)[name = tensor("op_236_cast_fp16")]; + tensor hidden_states_5_cast_fp16 = mul(x = inputs_11_cast_fp16, y = var_236_cast_fp16)[name = tensor("hidden_states_5_cast_fp16")]; + tensor w_5_to_fp16 = const()[name = tensor("w_5_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(649873408)))]; + tensor obj_7_cast_fp16 = mul(x = w_5_to_fp16, y = hidden_states_5_cast_fp16)[name = tensor("obj_7_cast_fp16")]; + tensor var_250 = const()[name = tensor("op_250"), val = tensor([1, 1])]; + tensor var_252 = const()[name = tensor("op_252"), val = tensor([1, 1])]; + tensor query_3_pad_type_0 = const()[name = tensor("query_3_pad_type_0"), val = tensor("custom")]; + tensor query_3_pad_0 = const()[name = tensor("query_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_1_layer_0_SelfAttention_q_proj_weight_to_fp16 = const()[name = tensor("block_1_layer_0_SelfAttention_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(649881664)))]; + tensor query_3_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_252, groups = var_211, pad = query_3_pad_0, pad_type = query_3_pad_type_0, strides = var_250, weight = block_1_layer_0_SelfAttention_q_proj_weight_to_fp16, x = obj_7_cast_fp16)[name = tensor("query_3_cast_fp16")]; + tensor var_256 = const()[name = tensor("op_256"), val = tensor([1, 1])]; + tensor var_258 = const()[name = tensor("op_258"), val = tensor([1, 1])]; + tensor key_3_pad_type_0 = const()[name = tensor("key_3_pad_type_0"), val = tensor("custom")]; + tensor key_3_pad_0 = const()[name = tensor("key_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_1_layer_0_SelfAttention_k_proj_weight_to_fp16 = const()[name = tensor("block_1_layer_0_SelfAttention_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(683436160)))]; + tensor key_3_cast_fp16 = conv(dilations = var_258, groups = var_211, pad = key_3_pad_0, pad_type = key_3_pad_type_0, strides = var_256, weight = block_1_layer_0_SelfAttention_k_proj_weight_to_fp16, x = obj_7_cast_fp16)[name = tensor("key_3_cast_fp16")]; + tensor var_263 = const()[name = tensor("op_263"), val = tensor([1, 1])]; + tensor var_265 = const()[name = tensor("op_265"), val = tensor([1, 1])]; + tensor value_3_pad_type_0 = const()[name = tensor("value_3_pad_type_0"), val = tensor("custom")]; + tensor value_3_pad_0 = const()[name = tensor("value_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_1_layer_0_SelfAttention_v_proj_weight_to_fp16 = const()[name = tensor("block_1_layer_0_SelfAttention_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(716990656)))]; + tensor value_3_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_265, groups = var_211, pad = value_3_pad_0, pad_type = value_3_pad_type_0, strides = var_263, weight = block_1_layer_0_SelfAttention_v_proj_weight_to_fp16, x = obj_7_cast_fp16)[name = tensor("value_3_cast_fp16")]; + tensor var_269 = const()[name = tensor("op_269"), val = tensor([1, 64, 64, -1])]; + tensor var_270_cast_fp16 = reshape(shape = var_269, x = query_3_cast_fp16)[name = tensor("op_270_cast_fp16")]; + tensor var_271 = const()[name = tensor("op_271"), val = tensor([1, 64, 64, -1])]; + tensor var_272_cast_fp16 = reshape(shape = var_271, x = key_3_cast_fp16)[name = tensor("op_272_cast_fp16")]; + tensor mh_w_7_transpose_x_0 = const()[name = tensor("mh_w_7_transpose_x_0"), val = tensor(true)]; + tensor mh_w_7_transpose_y_0 = const()[name = tensor("mh_w_7_transpose_y_0"), val = tensor(false)]; + tensor mh_w_7_cast_fp16 = matmul(transpose_x = mh_w_7_transpose_x_0, transpose_y = mh_w_7_transpose_y_0, x = var_270_cast_fp16, y = var_272_cast_fp16)[name = tensor("mh_w_7_cast_fp16")]; + tensor mh_w_9_cast_fp16 = add(x = mh_w_7_cast_fp16, y = var_133_cast_fp16)[name = tensor("mh_w_9_cast_fp16")]; + tensor mh_w_11_cast_fp16 = add(x = mh_w_9_cast_fp16, y = relative_attention_bias_to_fp16)[name = tensor("mh_w_11_cast_fp16")]; + tensor var_281_cast_fp16 = softmax(axis = var_215, x = mh_w_11_cast_fp16)[name = tensor("op_281_cast_fp16")]; + tensor var_282 = const()[name = tensor("op_282"), val = tensor([1, 64, 64, -1])]; + tensor var_283_cast_fp16 = reshape(shape = var_282, x = value_3_cast_fp16)[name = tensor("op_283_cast_fp16")]; + tensor attn_3_transpose_x_0 = const()[name = tensor("attn_3_transpose_x_0"), val = tensor(false)]; + tensor attn_3_transpose_y_0 = const()[name = tensor("attn_3_transpose_y_0"), val = tensor(true)]; + tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = var_283_cast_fp16, y = var_281_cast_fp16)[name = tensor("attn_3_cast_fp16")]; + tensor var_286 = const()[name = tensor("op_286"), val = tensor([1, 4096, 1, -1])]; + tensor input_7_cast_fp16 = reshape(shape = var_286, x = attn_3_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_290 = const()[name = tensor("op_290"), val = tensor([1, 1])]; + tensor var_292 = const()[name = tensor("op_292"), val = tensor([1, 1])]; + tensor obj_9_pad_type_0 = const()[name = tensor("obj_9_pad_type_0"), val = tensor("custom")]; + tensor obj_9_pad_0 = const()[name = tensor("obj_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_1_layer_0_SelfAttention_o_proj_weight_to_fp16 = const()[name = tensor("block_1_layer_0_SelfAttention_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(750545152)))]; + tensor obj_9_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_292, groups = var_211, pad = obj_9_pad_0, pad_type = obj_9_pad_type_0, strides = var_290, weight = block_1_layer_0_SelfAttention_o_proj_weight_to_fp16, x = input_7_cast_fp16)[name = tensor("obj_9_cast_fp16")]; + tensor inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = obj_9_cast_fp16)[name = tensor("inputs_13_cast_fp16")]; + tensor inputs_15_cast_fp16 = clip(alpha = var_213_to_fp16, beta = var_212_to_fp16, x = inputs_13_cast_fp16)[name = tensor("inputs_15_cast_fp16")]; + tensor inputs_sq_7_cast_fp16 = mul(x = inputs_15_cast_fp16, y = inputs_15_cast_fp16)[name = tensor("inputs_sq_7_cast_fp16")]; + tensor var_301 = const()[name = tensor("op_301"), val = tensor([1])]; + tensor variance_7_cast_fp16 = reduce_mean(axes = var_301, keep_dims = var_210, x = inputs_sq_7_cast_fp16)[name = tensor("variance_7_cast_fp16")]; + tensor var_303_to_fp16 = const()[name = tensor("op_303_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_304_cast_fp16 = add(x = variance_7_cast_fp16, y = var_303_to_fp16)[name = tensor("op_304_cast_fp16")]; + tensor var_305_epsilon_0_to_fp16 = const()[name = tensor("op_305_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_305_cast_fp16 = rsqrt(epsilon = var_305_epsilon_0_to_fp16, x = var_304_cast_fp16)[name = tensor("op_305_cast_fp16")]; + tensor hidden_states_7_cast_fp16 = mul(x = inputs_15_cast_fp16, y = var_305_cast_fp16)[name = tensor("hidden_states_7_cast_fp16")]; + tensor w_7_to_fp16 = const()[name = tensor("w_7_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(784099648)))]; + tensor input_9_cast_fp16 = mul(x = w_7_to_fp16, y = hidden_states_7_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_318 = const()[name = tensor("op_318"), val = tensor([1, 1])]; + tensor var_320 = const()[name = tensor("op_320"), val = tensor([1, 1])]; + tensor x_5_pad_type_0 = const()[name = tensor("x_5_pad_type_0"), val = tensor("custom")]; + tensor x_5_pad_0 = const()[name = tensor("x_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_1_layer_1_DenseReluDense_wi_0_weight_to_fp16 = const()[name = tensor("block_1_layer_1_DenseReluDense_wi_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(784107904)))]; + tensor x_5_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_320, groups = var_211, pad = x_5_pad_0, pad_type = x_5_pad_type_0, strides = var_318, weight = block_1_layer_1_DenseReluDense_wi_0_weight_to_fp16, x = input_9_cast_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_334_mode_0 = const()[name = tensor("op_334_mode_0"), val = tensor("TANH_APPROXIMATION")]; + tensor var_334_cast_fp16 = gelu(mode = var_334_mode_0, x = x_5_cast_fp16)[name = tensor("op_334_cast_fp16")]; + tensor var_337 = const()[name = tensor("op_337"), val = tensor([1, 1])]; + tensor var_339 = const()[name = tensor("op_339"), val = tensor([1, 1])]; + tensor var_341_pad_type_0 = const()[name = tensor("op_341_pad_type_0"), val = tensor("custom")]; + tensor var_341_pad_0 = const()[name = tensor("op_341_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_1_layer_1_DenseReluDense_wi_1_weight_to_fp16 = const()[name = tensor("block_1_layer_1_DenseReluDense_wi_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(867994048)))]; + tensor var_341_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_339, groups = var_211, pad = var_341_pad_0, pad_type = var_341_pad_type_0, strides = var_337, weight = block_1_layer_1_DenseReluDense_wi_1_weight_to_fp16, x = input_9_cast_fp16)[name = tensor("op_341_cast_fp16")]; + tensor input_11_cast_fp16 = mul(x = var_334_cast_fp16, y = var_341_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_345 = const()[name = tensor("op_345"), val = tensor([1, 1])]; + tensor var_347 = const()[name = tensor("op_347"), val = tensor([1, 1])]; + tensor var_349_pad_type_0 = const()[name = tensor("op_349_pad_type_0"), val = tensor("custom")]; + tensor var_349_pad_0 = const()[name = tensor("op_349_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_1_layer_1_DenseReluDense_wo_weight_to_fp16 = const()[name = tensor("block_1_layer_1_DenseReluDense_wo_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(951880192)))]; + tensor var_349_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_347, groups = var_211, pad = var_349_pad_0, pad_type = var_349_pad_type_0, strides = var_345, weight = block_1_layer_1_DenseReluDense_wo_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("op_349_cast_fp16")]; + tensor inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = var_349_cast_fp16)[name = tensor("inputs_17_cast_fp16")]; + tensor var_354 = const()[name = tensor("op_354"), val = tensor(true)]; + tensor var_355 = const()[name = tensor("op_355"), val = tensor(1)]; + tensor var_359 = const()[name = tensor("op_359"), val = tensor(3)]; + tensor var_357_to_fp16 = const()[name = tensor("op_357_to_fp16"), val = tensor(-0x1.b18p+15)]; + tensor var_356_to_fp16 = const()[name = tensor("op_356_to_fp16"), val = tensor(0x1.b18p+15)]; + tensor inputs_19_cast_fp16 = clip(alpha = var_357_to_fp16, beta = var_356_to_fp16, x = inputs_17_cast_fp16)[name = tensor("inputs_19_cast_fp16")]; + tensor inputs_sq_9_cast_fp16 = mul(x = inputs_19_cast_fp16, y = inputs_19_cast_fp16)[name = tensor("inputs_sq_9_cast_fp16")]; + tensor var_376 = const()[name = tensor("op_376"), val = tensor([1])]; + tensor variance_9_cast_fp16 = reduce_mean(axes = var_376, keep_dims = var_354, x = inputs_sq_9_cast_fp16)[name = tensor("variance_9_cast_fp16")]; + tensor var_378_to_fp16 = const()[name = tensor("op_378_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_379_cast_fp16 = add(x = variance_9_cast_fp16, y = var_378_to_fp16)[name = tensor("op_379_cast_fp16")]; + tensor var_380_epsilon_0_to_fp16 = const()[name = tensor("op_380_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_380_cast_fp16 = rsqrt(epsilon = var_380_epsilon_0_to_fp16, x = var_379_cast_fp16)[name = tensor("op_380_cast_fp16")]; + tensor hidden_states_9_cast_fp16 = mul(x = inputs_19_cast_fp16, y = var_380_cast_fp16)[name = tensor("hidden_states_9_cast_fp16")]; + tensor w_9_to_fp16 = const()[name = tensor("w_9_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1035766336)))]; + tensor obj_11_cast_fp16 = mul(x = w_9_to_fp16, y = hidden_states_9_cast_fp16)[name = tensor("obj_11_cast_fp16")]; + tensor var_394 = const()[name = tensor("op_394"), val = tensor([1, 1])]; + tensor var_396 = const()[name = tensor("op_396"), val = tensor([1, 1])]; + tensor query_5_pad_type_0 = const()[name = tensor("query_5_pad_type_0"), val = tensor("custom")]; + tensor query_5_pad_0 = const()[name = tensor("query_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_2_layer_0_SelfAttention_q_proj_weight_to_fp16 = const()[name = tensor("block_2_layer_0_SelfAttention_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1035774592)))]; + tensor query_5_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_396, groups = var_355, pad = query_5_pad_0, pad_type = query_5_pad_type_0, strides = var_394, weight = block_2_layer_0_SelfAttention_q_proj_weight_to_fp16, x = obj_11_cast_fp16)[name = tensor("query_5_cast_fp16")]; + tensor var_400 = const()[name = tensor("op_400"), val = tensor([1, 1])]; + tensor var_402 = const()[name = tensor("op_402"), val = tensor([1, 1])]; + tensor key_5_pad_type_0 = const()[name = tensor("key_5_pad_type_0"), val = tensor("custom")]; + tensor key_5_pad_0 = const()[name = tensor("key_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_2_layer_0_SelfAttention_k_proj_weight_to_fp16 = const()[name = tensor("block_2_layer_0_SelfAttention_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1069329088)))]; + tensor key_5_cast_fp16 = conv(dilations = var_402, groups = var_355, pad = key_5_pad_0, pad_type = key_5_pad_type_0, strides = var_400, weight = block_2_layer_0_SelfAttention_k_proj_weight_to_fp16, x = obj_11_cast_fp16)[name = tensor("key_5_cast_fp16")]; + tensor var_407 = const()[name = tensor("op_407"), val = tensor([1, 1])]; + tensor var_409 = const()[name = tensor("op_409"), val = tensor([1, 1])]; + tensor value_5_pad_type_0 = const()[name = tensor("value_5_pad_type_0"), val = tensor("custom")]; + tensor value_5_pad_0 = const()[name = tensor("value_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_2_layer_0_SelfAttention_v_proj_weight_to_fp16 = const()[name = tensor("block_2_layer_0_SelfAttention_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1102883584)))]; + tensor value_5_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_409, groups = var_355, pad = value_5_pad_0, pad_type = value_5_pad_type_0, strides = var_407, weight = block_2_layer_0_SelfAttention_v_proj_weight_to_fp16, x = obj_11_cast_fp16)[name = tensor("value_5_cast_fp16")]; + tensor var_413 = const()[name = tensor("op_413"), val = tensor([1, 64, 64, -1])]; + tensor var_414_cast_fp16 = reshape(shape = var_413, x = query_5_cast_fp16)[name = tensor("op_414_cast_fp16")]; + tensor var_415 = const()[name = tensor("op_415"), val = tensor([1, 64, 64, -1])]; + tensor var_416_cast_fp16 = reshape(shape = var_415, x = key_5_cast_fp16)[name = tensor("op_416_cast_fp16")]; + tensor mh_w_13_transpose_x_0 = const()[name = tensor("mh_w_13_transpose_x_0"), val = tensor(true)]; + tensor mh_w_13_transpose_y_0 = const()[name = tensor("mh_w_13_transpose_y_0"), val = tensor(false)]; + tensor mh_w_13_cast_fp16 = matmul(transpose_x = mh_w_13_transpose_x_0, transpose_y = mh_w_13_transpose_y_0, x = var_414_cast_fp16, y = var_416_cast_fp16)[name = tensor("mh_w_13_cast_fp16")]; + tensor mh_w_15_cast_fp16 = add(x = mh_w_13_cast_fp16, y = var_133_cast_fp16)[name = tensor("mh_w_15_cast_fp16")]; + tensor mh_w_17_cast_fp16 = add(x = mh_w_15_cast_fp16, y = relative_attention_bias_to_fp16)[name = tensor("mh_w_17_cast_fp16")]; + tensor var_425_cast_fp16 = softmax(axis = var_359, x = mh_w_17_cast_fp16)[name = tensor("op_425_cast_fp16")]; + tensor var_426 = const()[name = tensor("op_426"), val = tensor([1, 64, 64, -1])]; + tensor var_427_cast_fp16 = reshape(shape = var_426, x = value_5_cast_fp16)[name = tensor("op_427_cast_fp16")]; + tensor attn_5_transpose_x_0 = const()[name = tensor("attn_5_transpose_x_0"), val = tensor(false)]; + tensor attn_5_transpose_y_0 = const()[name = tensor("attn_5_transpose_y_0"), val = tensor(true)]; + tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = var_427_cast_fp16, y = var_425_cast_fp16)[name = tensor("attn_5_cast_fp16")]; + tensor var_430 = const()[name = tensor("op_430"), val = tensor([1, 4096, 1, -1])]; + tensor input_13_cast_fp16 = reshape(shape = var_430, x = attn_5_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_434 = const()[name = tensor("op_434"), val = tensor([1, 1])]; + tensor var_436 = const()[name = tensor("op_436"), val = tensor([1, 1])]; + tensor obj_13_pad_type_0 = const()[name = tensor("obj_13_pad_type_0"), val = tensor("custom")]; + tensor obj_13_pad_0 = const()[name = tensor("obj_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_2_layer_0_SelfAttention_o_proj_weight_to_fp16 = const()[name = tensor("block_2_layer_0_SelfAttention_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1136438080)))]; + tensor obj_13_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_436, groups = var_355, pad = obj_13_pad_0, pad_type = obj_13_pad_type_0, strides = var_434, weight = block_2_layer_0_SelfAttention_o_proj_weight_to_fp16, x = input_13_cast_fp16)[name = tensor("obj_13_cast_fp16")]; + tensor inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = obj_13_cast_fp16)[name = tensor("inputs_21_cast_fp16")]; + tensor inputs_23_cast_fp16 = clip(alpha = var_357_to_fp16, beta = var_356_to_fp16, x = inputs_21_cast_fp16)[name = tensor("inputs_23_cast_fp16")]; + tensor inputs_sq_11_cast_fp16 = mul(x = inputs_23_cast_fp16, y = inputs_23_cast_fp16)[name = tensor("inputs_sq_11_cast_fp16")]; + tensor var_445 = const()[name = tensor("op_445"), val = tensor([1])]; + tensor variance_11_cast_fp16 = reduce_mean(axes = var_445, keep_dims = var_354, x = inputs_sq_11_cast_fp16)[name = tensor("variance_11_cast_fp16")]; + tensor var_447_to_fp16 = const()[name = tensor("op_447_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_448_cast_fp16 = add(x = variance_11_cast_fp16, y = var_447_to_fp16)[name = tensor("op_448_cast_fp16")]; + tensor var_449_epsilon_0_to_fp16 = const()[name = tensor("op_449_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_449_cast_fp16 = rsqrt(epsilon = var_449_epsilon_0_to_fp16, x = var_448_cast_fp16)[name = tensor("op_449_cast_fp16")]; + tensor hidden_states_11_cast_fp16 = mul(x = inputs_23_cast_fp16, y = var_449_cast_fp16)[name = tensor("hidden_states_11_cast_fp16")]; + tensor w_11_to_fp16 = const()[name = tensor("w_11_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1169992576)))]; + tensor input_15_cast_fp16 = mul(x = w_11_to_fp16, y = hidden_states_11_cast_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_462 = const()[name = tensor("op_462"), val = tensor([1, 1])]; + tensor var_464 = const()[name = tensor("op_464"), val = tensor([1, 1])]; + tensor x_7_pad_type_0 = const()[name = tensor("x_7_pad_type_0"), val = tensor("custom")]; + tensor x_7_pad_0 = const()[name = tensor("x_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_2_layer_1_DenseReluDense_wi_0_weight_to_fp16 = const()[name = tensor("block_2_layer_1_DenseReluDense_wi_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1170000832)))]; + tensor x_7_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_464, groups = var_355, pad = x_7_pad_0, pad_type = x_7_pad_type_0, strides = var_462, weight = block_2_layer_1_DenseReluDense_wi_0_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("x_7_cast_fp16")]; + tensor var_478_mode_0 = const()[name = tensor("op_478_mode_0"), val = tensor("TANH_APPROXIMATION")]; + tensor var_478_cast_fp16 = gelu(mode = var_478_mode_0, x = x_7_cast_fp16)[name = tensor("op_478_cast_fp16")]; + tensor var_481 = const()[name = tensor("op_481"), val = tensor([1, 1])]; + tensor var_483 = const()[name = tensor("op_483"), val = tensor([1, 1])]; + tensor var_485_pad_type_0 = const()[name = tensor("op_485_pad_type_0"), val = tensor("custom")]; + tensor var_485_pad_0 = const()[name = tensor("op_485_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_2_layer_1_DenseReluDense_wi_1_weight_to_fp16 = const()[name = tensor("block_2_layer_1_DenseReluDense_wi_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1253886976)))]; + tensor var_485_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_483, groups = var_355, pad = var_485_pad_0, pad_type = var_485_pad_type_0, strides = var_481, weight = block_2_layer_1_DenseReluDense_wi_1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("op_485_cast_fp16")]; + tensor input_17_cast_fp16 = mul(x = var_478_cast_fp16, y = var_485_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_489 = const()[name = tensor("op_489"), val = tensor([1, 1])]; + tensor var_491 = const()[name = tensor("op_491"), val = tensor([1, 1])]; + tensor var_493_pad_type_0 = const()[name = tensor("op_493_pad_type_0"), val = tensor("custom")]; + tensor var_493_pad_0 = const()[name = tensor("op_493_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_2_layer_1_DenseReluDense_wo_weight_to_fp16 = const()[name = tensor("block_2_layer_1_DenseReluDense_wo_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1337773120)))]; + tensor var_493_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_491, groups = var_355, pad = var_493_pad_0, pad_type = var_493_pad_type_0, strides = var_489, weight = block_2_layer_1_DenseReluDense_wo_weight_to_fp16, x = input_17_cast_fp16)[name = tensor("op_493_cast_fp16")]; + tensor inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = var_493_cast_fp16)[name = tensor("inputs_25_cast_fp16")]; + tensor var_498 = const()[name = tensor("op_498"), val = tensor(true)]; + tensor var_499 = const()[name = tensor("op_499"), val = tensor(1)]; + tensor var_503 = const()[name = tensor("op_503"), val = tensor(3)]; + tensor var_501_to_fp16 = const()[name = tensor("op_501_to_fp16"), val = tensor(-0x1.b18p+15)]; + tensor var_500_to_fp16 = const()[name = tensor("op_500_to_fp16"), val = tensor(0x1.b18p+15)]; + tensor inputs_27_cast_fp16 = clip(alpha = var_501_to_fp16, beta = var_500_to_fp16, x = inputs_25_cast_fp16)[name = tensor("inputs_27_cast_fp16")]; + tensor inputs_sq_13_cast_fp16 = mul(x = inputs_27_cast_fp16, y = inputs_27_cast_fp16)[name = tensor("inputs_sq_13_cast_fp16")]; + tensor var_520 = const()[name = tensor("op_520"), val = tensor([1])]; + tensor variance_13_cast_fp16 = reduce_mean(axes = var_520, keep_dims = var_498, x = inputs_sq_13_cast_fp16)[name = tensor("variance_13_cast_fp16")]; + tensor var_522_to_fp16 = const()[name = tensor("op_522_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_523_cast_fp16 = add(x = variance_13_cast_fp16, y = var_522_to_fp16)[name = tensor("op_523_cast_fp16")]; + tensor var_524_epsilon_0_to_fp16 = const()[name = tensor("op_524_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_524_cast_fp16 = rsqrt(epsilon = var_524_epsilon_0_to_fp16, x = var_523_cast_fp16)[name = tensor("op_524_cast_fp16")]; + tensor hidden_states_13_cast_fp16 = mul(x = inputs_27_cast_fp16, y = var_524_cast_fp16)[name = tensor("hidden_states_13_cast_fp16")]; + tensor w_13_to_fp16 = const()[name = tensor("w_13_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1421659264)))]; + tensor obj_15_cast_fp16 = mul(x = w_13_to_fp16, y = hidden_states_13_cast_fp16)[name = tensor("obj_15_cast_fp16")]; + tensor var_538 = const()[name = tensor("op_538"), val = tensor([1, 1])]; + tensor var_540 = const()[name = tensor("op_540"), val = tensor([1, 1])]; + tensor query_7_pad_type_0 = const()[name = tensor("query_7_pad_type_0"), val = tensor("custom")]; + tensor query_7_pad_0 = const()[name = tensor("query_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_3_layer_0_SelfAttention_q_proj_weight_to_fp16 = const()[name = tensor("block_3_layer_0_SelfAttention_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1421667520)))]; + tensor query_7_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_540, groups = var_499, pad = query_7_pad_0, pad_type = query_7_pad_type_0, strides = var_538, weight = block_3_layer_0_SelfAttention_q_proj_weight_to_fp16, x = obj_15_cast_fp16)[name = tensor("query_7_cast_fp16")]; + tensor var_544 = const()[name = tensor("op_544"), val = tensor([1, 1])]; + tensor var_546 = const()[name = tensor("op_546"), val = tensor([1, 1])]; + tensor key_7_pad_type_0 = const()[name = tensor("key_7_pad_type_0"), val = tensor("custom")]; + tensor key_7_pad_0 = const()[name = tensor("key_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_3_layer_0_SelfAttention_k_proj_weight_to_fp16 = const()[name = tensor("block_3_layer_0_SelfAttention_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1455222016)))]; + tensor key_7_cast_fp16 = conv(dilations = var_546, groups = var_499, pad = key_7_pad_0, pad_type = key_7_pad_type_0, strides = var_544, weight = block_3_layer_0_SelfAttention_k_proj_weight_to_fp16, x = obj_15_cast_fp16)[name = tensor("key_7_cast_fp16")]; + tensor var_551 = const()[name = tensor("op_551"), val = tensor([1, 1])]; + tensor var_553 = const()[name = tensor("op_553"), val = tensor([1, 1])]; + tensor value_7_pad_type_0 = const()[name = tensor("value_7_pad_type_0"), val = tensor("custom")]; + tensor value_7_pad_0 = const()[name = tensor("value_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_3_layer_0_SelfAttention_v_proj_weight_to_fp16 = const()[name = tensor("block_3_layer_0_SelfAttention_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1488776512)))]; + tensor value_7_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_553, groups = var_499, pad = value_7_pad_0, pad_type = value_7_pad_type_0, strides = var_551, weight = block_3_layer_0_SelfAttention_v_proj_weight_to_fp16, x = obj_15_cast_fp16)[name = tensor("value_7_cast_fp16")]; + tensor var_557 = const()[name = tensor("op_557"), val = tensor([1, 64, 64, -1])]; + tensor var_558_cast_fp16 = reshape(shape = var_557, x = query_7_cast_fp16)[name = tensor("op_558_cast_fp16")]; + tensor var_559 = const()[name = tensor("op_559"), val = tensor([1, 64, 64, -1])]; + tensor var_560_cast_fp16 = reshape(shape = var_559, x = key_7_cast_fp16)[name = tensor("op_560_cast_fp16")]; + tensor mh_w_19_transpose_x_0 = const()[name = tensor("mh_w_19_transpose_x_0"), val = tensor(true)]; + tensor mh_w_19_transpose_y_0 = const()[name = tensor("mh_w_19_transpose_y_0"), val = tensor(false)]; + tensor mh_w_19_cast_fp16 = matmul(transpose_x = mh_w_19_transpose_x_0, transpose_y = mh_w_19_transpose_y_0, x = var_558_cast_fp16, y = var_560_cast_fp16)[name = tensor("mh_w_19_cast_fp16")]; + tensor mh_w_21_cast_fp16 = add(x = mh_w_19_cast_fp16, y = var_133_cast_fp16)[name = tensor("mh_w_21_cast_fp16")]; + tensor mh_w_23_cast_fp16 = add(x = mh_w_21_cast_fp16, y = relative_attention_bias_to_fp16)[name = tensor("mh_w_23_cast_fp16")]; + tensor var_569_cast_fp16 = softmax(axis = var_503, x = mh_w_23_cast_fp16)[name = tensor("op_569_cast_fp16")]; + tensor var_570 = const()[name = tensor("op_570"), val = tensor([1, 64, 64, -1])]; + tensor var_571_cast_fp16 = reshape(shape = var_570, x = value_7_cast_fp16)[name = tensor("op_571_cast_fp16")]; + tensor attn_7_transpose_x_0 = const()[name = tensor("attn_7_transpose_x_0"), val = tensor(false)]; + tensor attn_7_transpose_y_0 = const()[name = tensor("attn_7_transpose_y_0"), val = tensor(true)]; + tensor attn_7_cast_fp16 = matmul(transpose_x = attn_7_transpose_x_0, transpose_y = attn_7_transpose_y_0, x = var_571_cast_fp16, y = var_569_cast_fp16)[name = tensor("attn_7_cast_fp16")]; + tensor var_574 = const()[name = tensor("op_574"), val = tensor([1, 4096, 1, -1])]; + tensor input_19_cast_fp16 = reshape(shape = var_574, x = attn_7_cast_fp16)[name = tensor("input_19_cast_fp16")]; + tensor var_578 = const()[name = tensor("op_578"), val = tensor([1, 1])]; + tensor var_580 = const()[name = tensor("op_580"), val = tensor([1, 1])]; + tensor obj_17_pad_type_0 = const()[name = tensor("obj_17_pad_type_0"), val = tensor("custom")]; + tensor obj_17_pad_0 = const()[name = tensor("obj_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_3_layer_0_SelfAttention_o_proj_weight_to_fp16 = const()[name = tensor("block_3_layer_0_SelfAttention_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1522331008)))]; + tensor obj_17_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_580, groups = var_499, pad = obj_17_pad_0, pad_type = obj_17_pad_type_0, strides = var_578, weight = block_3_layer_0_SelfAttention_o_proj_weight_to_fp16, x = input_19_cast_fp16)[name = tensor("obj_17_cast_fp16")]; + tensor inputs_29_cast_fp16 = add(x = inputs_27_cast_fp16, y = obj_17_cast_fp16)[name = tensor("inputs_29_cast_fp16")]; + tensor inputs_31_cast_fp16 = clip(alpha = var_501_to_fp16, beta = var_500_to_fp16, x = inputs_29_cast_fp16)[name = tensor("inputs_31_cast_fp16")]; + tensor inputs_sq_15_cast_fp16 = mul(x = inputs_31_cast_fp16, y = inputs_31_cast_fp16)[name = tensor("inputs_sq_15_cast_fp16")]; + tensor var_589 = const()[name = tensor("op_589"), val = tensor([1])]; + tensor variance_15_cast_fp16 = reduce_mean(axes = var_589, keep_dims = var_498, x = inputs_sq_15_cast_fp16)[name = tensor("variance_15_cast_fp16")]; + tensor var_591_to_fp16 = const()[name = tensor("op_591_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_592_cast_fp16 = add(x = variance_15_cast_fp16, y = var_591_to_fp16)[name = tensor("op_592_cast_fp16")]; + tensor var_593_epsilon_0_to_fp16 = const()[name = tensor("op_593_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_593_cast_fp16 = rsqrt(epsilon = var_593_epsilon_0_to_fp16, x = var_592_cast_fp16)[name = tensor("op_593_cast_fp16")]; + tensor hidden_states_15_cast_fp16 = mul(x = inputs_31_cast_fp16, y = var_593_cast_fp16)[name = tensor("hidden_states_15_cast_fp16")]; + tensor w_15_to_fp16 = const()[name = tensor("w_15_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1555885504)))]; + tensor input_21_cast_fp16 = mul(x = w_15_to_fp16, y = hidden_states_15_cast_fp16)[name = tensor("input_21_cast_fp16")]; + tensor var_606 = const()[name = tensor("op_606"), val = tensor([1, 1])]; + tensor var_608 = const()[name = tensor("op_608"), val = tensor([1, 1])]; + tensor x_9_pad_type_0 = const()[name = tensor("x_9_pad_type_0"), val = tensor("custom")]; + tensor x_9_pad_0 = const()[name = tensor("x_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_3_layer_1_DenseReluDense_wi_0_weight_to_fp16 = const()[name = tensor("block_3_layer_1_DenseReluDense_wi_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1555893760)))]; + tensor x_9_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_608, groups = var_499, pad = x_9_pad_0, pad_type = x_9_pad_type_0, strides = var_606, weight = block_3_layer_1_DenseReluDense_wi_0_weight_to_fp16, x = input_21_cast_fp16)[name = tensor("x_9_cast_fp16")]; + tensor var_622_mode_0 = const()[name = tensor("op_622_mode_0"), val = tensor("TANH_APPROXIMATION")]; + tensor var_622_cast_fp16 = gelu(mode = var_622_mode_0, x = x_9_cast_fp16)[name = tensor("op_622_cast_fp16")]; + tensor var_625 = const()[name = tensor("op_625"), val = tensor([1, 1])]; + tensor var_627 = const()[name = tensor("op_627"), val = tensor([1, 1])]; + tensor var_629_pad_type_0 = const()[name = tensor("op_629_pad_type_0"), val = tensor("custom")]; + tensor var_629_pad_0 = const()[name = tensor("op_629_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_3_layer_1_DenseReluDense_wi_1_weight_to_fp16 = const()[name = tensor("block_3_layer_1_DenseReluDense_wi_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1639779904)))]; + tensor var_629_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_627, groups = var_499, pad = var_629_pad_0, pad_type = var_629_pad_type_0, strides = var_625, weight = block_3_layer_1_DenseReluDense_wi_1_weight_to_fp16, x = input_21_cast_fp16)[name = tensor("op_629_cast_fp16")]; + tensor input_23_cast_fp16 = mul(x = var_622_cast_fp16, y = var_629_cast_fp16)[name = tensor("input_23_cast_fp16")]; + tensor var_633 = const()[name = tensor("op_633"), val = tensor([1, 1])]; + tensor var_635 = const()[name = tensor("op_635"), val = tensor([1, 1])]; + tensor var_637_pad_type_0 = const()[name = tensor("op_637_pad_type_0"), val = tensor("custom")]; + tensor var_637_pad_0 = const()[name = tensor("op_637_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_3_layer_1_DenseReluDense_wo_weight_to_fp16 = const()[name = tensor("block_3_layer_1_DenseReluDense_wo_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1723666048)))]; + tensor var_637_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_635, groups = var_499, pad = var_637_pad_0, pad_type = var_637_pad_type_0, strides = var_633, weight = block_3_layer_1_DenseReluDense_wo_weight_to_fp16, x = input_23_cast_fp16)[name = tensor("op_637_cast_fp16")]; + tensor inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = var_637_cast_fp16)[name = tensor("inputs_33_cast_fp16")]; + tensor var_642 = const()[name = tensor("op_642"), val = tensor(true)]; + tensor var_643 = const()[name = tensor("op_643"), val = tensor(1)]; + tensor var_647 = const()[name = tensor("op_647"), val = tensor(3)]; + tensor var_645_to_fp16 = const()[name = tensor("op_645_to_fp16"), val = tensor(-0x1.b18p+15)]; + tensor var_644_to_fp16 = const()[name = tensor("op_644_to_fp16"), val = tensor(0x1.b18p+15)]; + tensor inputs_35_cast_fp16 = clip(alpha = var_645_to_fp16, beta = var_644_to_fp16, x = inputs_33_cast_fp16)[name = tensor("inputs_35_cast_fp16")]; + tensor inputs_sq_17_cast_fp16 = mul(x = inputs_35_cast_fp16, y = inputs_35_cast_fp16)[name = tensor("inputs_sq_17_cast_fp16")]; + tensor var_664 = const()[name = tensor("op_664"), val = tensor([1])]; + tensor variance_17_cast_fp16 = reduce_mean(axes = var_664, keep_dims = var_642, x = inputs_sq_17_cast_fp16)[name = tensor("variance_17_cast_fp16")]; + tensor var_666_to_fp16 = const()[name = tensor("op_666_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_667_cast_fp16 = add(x = variance_17_cast_fp16, y = var_666_to_fp16)[name = tensor("op_667_cast_fp16")]; + tensor var_668_epsilon_0_to_fp16 = const()[name = tensor("op_668_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_668_cast_fp16 = rsqrt(epsilon = var_668_epsilon_0_to_fp16, x = var_667_cast_fp16)[name = tensor("op_668_cast_fp16")]; + tensor hidden_states_17_cast_fp16 = mul(x = inputs_35_cast_fp16, y = var_668_cast_fp16)[name = tensor("hidden_states_17_cast_fp16")]; + tensor w_17_to_fp16 = const()[name = tensor("w_17_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1807552192)))]; + tensor obj_19_cast_fp16 = mul(x = w_17_to_fp16, y = hidden_states_17_cast_fp16)[name = tensor("obj_19_cast_fp16")]; + tensor var_682 = const()[name = tensor("op_682"), val = tensor([1, 1])]; + tensor var_684 = const()[name = tensor("op_684"), val = tensor([1, 1])]; + tensor query_9_pad_type_0 = const()[name = tensor("query_9_pad_type_0"), val = tensor("custom")]; + tensor query_9_pad_0 = const()[name = tensor("query_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_4_layer_0_SelfAttention_q_proj_weight_to_fp16 = const()[name = tensor("block_4_layer_0_SelfAttention_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1807560448)))]; + tensor query_9_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_684, groups = var_643, pad = query_9_pad_0, pad_type = query_9_pad_type_0, strides = var_682, weight = block_4_layer_0_SelfAttention_q_proj_weight_to_fp16, x = obj_19_cast_fp16)[name = tensor("query_9_cast_fp16")]; + tensor var_688 = const()[name = tensor("op_688"), val = tensor([1, 1])]; + tensor var_690 = const()[name = tensor("op_690"), val = tensor([1, 1])]; + tensor key_9_pad_type_0 = const()[name = tensor("key_9_pad_type_0"), val = tensor("custom")]; + tensor key_9_pad_0 = const()[name = tensor("key_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_4_layer_0_SelfAttention_k_proj_weight_to_fp16 = const()[name = tensor("block_4_layer_0_SelfAttention_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1841114944)))]; + tensor key_9_cast_fp16 = conv(dilations = var_690, groups = var_643, pad = key_9_pad_0, pad_type = key_9_pad_type_0, strides = var_688, weight = block_4_layer_0_SelfAttention_k_proj_weight_to_fp16, x = obj_19_cast_fp16)[name = tensor("key_9_cast_fp16")]; + tensor var_695 = const()[name = tensor("op_695"), val = tensor([1, 1])]; + tensor var_697 = const()[name = tensor("op_697"), val = tensor([1, 1])]; + tensor value_9_pad_type_0 = const()[name = tensor("value_9_pad_type_0"), val = tensor("custom")]; + tensor value_9_pad_0 = const()[name = tensor("value_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_4_layer_0_SelfAttention_v_proj_weight_to_fp16 = const()[name = tensor("block_4_layer_0_SelfAttention_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1874669440)))]; + tensor value_9_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_697, groups = var_643, pad = value_9_pad_0, pad_type = value_9_pad_type_0, strides = var_695, weight = block_4_layer_0_SelfAttention_v_proj_weight_to_fp16, x = obj_19_cast_fp16)[name = tensor("value_9_cast_fp16")]; + tensor var_701 = const()[name = tensor("op_701"), val = tensor([1, 64, 64, -1])]; + tensor var_702_cast_fp16 = reshape(shape = var_701, x = query_9_cast_fp16)[name = tensor("op_702_cast_fp16")]; + tensor var_703 = const()[name = tensor("op_703"), val = tensor([1, 64, 64, -1])]; + tensor var_704_cast_fp16 = reshape(shape = var_703, x = key_9_cast_fp16)[name = tensor("op_704_cast_fp16")]; + tensor mh_w_25_transpose_x_0 = const()[name = tensor("mh_w_25_transpose_x_0"), val = tensor(true)]; + tensor mh_w_25_transpose_y_0 = const()[name = tensor("mh_w_25_transpose_y_0"), val = tensor(false)]; + tensor mh_w_25_cast_fp16 = matmul(transpose_x = mh_w_25_transpose_x_0, transpose_y = mh_w_25_transpose_y_0, x = var_702_cast_fp16, y = var_704_cast_fp16)[name = tensor("mh_w_25_cast_fp16")]; + tensor mh_w_27_cast_fp16 = add(x = mh_w_25_cast_fp16, y = var_133_cast_fp16)[name = tensor("mh_w_27_cast_fp16")]; + tensor mh_w_29_cast_fp16 = add(x = mh_w_27_cast_fp16, y = relative_attention_bias_to_fp16)[name = tensor("mh_w_29_cast_fp16")]; + tensor var_713_cast_fp16 = softmax(axis = var_647, x = mh_w_29_cast_fp16)[name = tensor("op_713_cast_fp16")]; + tensor var_714 = const()[name = tensor("op_714"), val = tensor([1, 64, 64, -1])]; + tensor var_715_cast_fp16 = reshape(shape = var_714, x = value_9_cast_fp16)[name = tensor("op_715_cast_fp16")]; + tensor attn_9_transpose_x_0 = const()[name = tensor("attn_9_transpose_x_0"), val = tensor(false)]; + tensor attn_9_transpose_y_0 = const()[name = tensor("attn_9_transpose_y_0"), val = tensor(true)]; + tensor attn_9_cast_fp16 = matmul(transpose_x = attn_9_transpose_x_0, transpose_y = attn_9_transpose_y_0, x = var_715_cast_fp16, y = var_713_cast_fp16)[name = tensor("attn_9_cast_fp16")]; + tensor var_718 = const()[name = tensor("op_718"), val = tensor([1, 4096, 1, -1])]; + tensor input_25_cast_fp16 = reshape(shape = var_718, x = attn_9_cast_fp16)[name = tensor("input_25_cast_fp16")]; + tensor var_722 = const()[name = tensor("op_722"), val = tensor([1, 1])]; + tensor var_724 = const()[name = tensor("op_724"), val = tensor([1, 1])]; + tensor obj_21_pad_type_0 = const()[name = tensor("obj_21_pad_type_0"), val = tensor("custom")]; + tensor obj_21_pad_0 = const()[name = tensor("obj_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_4_layer_0_SelfAttention_o_proj_weight_to_fp16 = const()[name = tensor("block_4_layer_0_SelfAttention_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1908223936)))]; + tensor obj_21_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_724, groups = var_643, pad = obj_21_pad_0, pad_type = obj_21_pad_type_0, strides = var_722, weight = block_4_layer_0_SelfAttention_o_proj_weight_to_fp16, x = input_25_cast_fp16)[name = tensor("obj_21_cast_fp16")]; + tensor inputs_37_cast_fp16 = add(x = inputs_35_cast_fp16, y = obj_21_cast_fp16)[name = tensor("inputs_37_cast_fp16")]; + tensor inputs_39_cast_fp16 = clip(alpha = var_645_to_fp16, beta = var_644_to_fp16, x = inputs_37_cast_fp16)[name = tensor("inputs_39_cast_fp16")]; + tensor inputs_sq_19_cast_fp16 = mul(x = inputs_39_cast_fp16, y = inputs_39_cast_fp16)[name = tensor("inputs_sq_19_cast_fp16")]; + tensor var_733 = const()[name = tensor("op_733"), val = tensor([1])]; + tensor variance_19_cast_fp16 = reduce_mean(axes = var_733, keep_dims = var_642, x = inputs_sq_19_cast_fp16)[name = tensor("variance_19_cast_fp16")]; + tensor var_735_to_fp16 = const()[name = tensor("op_735_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_736_cast_fp16 = add(x = variance_19_cast_fp16, y = var_735_to_fp16)[name = tensor("op_736_cast_fp16")]; + tensor var_737_epsilon_0_to_fp16 = const()[name = tensor("op_737_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_737_cast_fp16 = rsqrt(epsilon = var_737_epsilon_0_to_fp16, x = var_736_cast_fp16)[name = tensor("op_737_cast_fp16")]; + tensor hidden_states_19_cast_fp16 = mul(x = inputs_39_cast_fp16, y = var_737_cast_fp16)[name = tensor("hidden_states_19_cast_fp16")]; + tensor w_19_to_fp16 = const()[name = tensor("w_19_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1941778432)))]; + tensor input_27_cast_fp16 = mul(x = w_19_to_fp16, y = hidden_states_19_cast_fp16)[name = tensor("input_27_cast_fp16")]; + tensor var_750 = const()[name = tensor("op_750"), val = tensor([1, 1])]; + tensor var_752 = const()[name = tensor("op_752"), val = tensor([1, 1])]; + tensor x_11_pad_type_0 = const()[name = tensor("x_11_pad_type_0"), val = tensor("custom")]; + tensor x_11_pad_0 = const()[name = tensor("x_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_4_layer_1_DenseReluDense_wi_0_weight_to_fp16 = const()[name = tensor("block_4_layer_1_DenseReluDense_wi_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1941786688)))]; + tensor x_11_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_752, groups = var_643, pad = x_11_pad_0, pad_type = x_11_pad_type_0, strides = var_750, weight = block_4_layer_1_DenseReluDense_wi_0_weight_to_fp16, x = input_27_cast_fp16)[name = tensor("x_11_cast_fp16")]; + tensor var_766_mode_0 = const()[name = tensor("op_766_mode_0"), val = tensor("TANH_APPROXIMATION")]; + tensor var_766_cast_fp16 = gelu(mode = var_766_mode_0, x = x_11_cast_fp16)[name = tensor("op_766_cast_fp16")]; + tensor var_769 = const()[name = tensor("op_769"), val = tensor([1, 1])]; + tensor var_771 = const()[name = tensor("op_771"), val = tensor([1, 1])]; + tensor var_773_pad_type_0 = const()[name = tensor("op_773_pad_type_0"), val = tensor("custom")]; + tensor var_773_pad_0 = const()[name = tensor("op_773_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_4_layer_1_DenseReluDense_wi_1_weight_to_fp16 = const()[name = tensor("block_4_layer_1_DenseReluDense_wi_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2025672832)))]; + tensor var_773_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_771, groups = var_643, pad = var_773_pad_0, pad_type = var_773_pad_type_0, strides = var_769, weight = block_4_layer_1_DenseReluDense_wi_1_weight_to_fp16, x = input_27_cast_fp16)[name = tensor("op_773_cast_fp16")]; + tensor input_29_cast_fp16 = mul(x = var_766_cast_fp16, y = var_773_cast_fp16)[name = tensor("input_29_cast_fp16")]; + tensor var_777 = const()[name = tensor("op_777"), val = tensor([1, 1])]; + tensor var_779 = const()[name = tensor("op_779"), val = tensor([1, 1])]; + tensor var_781_pad_type_0 = const()[name = tensor("op_781_pad_type_0"), val = tensor("custom")]; + tensor var_781_pad_0 = const()[name = tensor("op_781_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_4_layer_1_DenseReluDense_wo_weight_to_fp16 = const()[name = tensor("block_4_layer_1_DenseReluDense_wo_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2109558976)))]; + tensor var_781_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_779, groups = var_643, pad = var_781_pad_0, pad_type = var_781_pad_type_0, strides = var_777, weight = block_4_layer_1_DenseReluDense_wo_weight_to_fp16, x = input_29_cast_fp16)[name = tensor("op_781_cast_fp16")]; + tensor inputs_41_cast_fp16 = add(x = inputs_39_cast_fp16, y = var_781_cast_fp16)[name = tensor("inputs_41_cast_fp16")]; + tensor var_786 = const()[name = tensor("op_786"), val = tensor(true)]; + tensor var_787 = const()[name = tensor("op_787"), val = tensor(1)]; + tensor var_791 = const()[name = tensor("op_791"), val = tensor(3)]; + tensor var_789_to_fp16 = const()[name = tensor("op_789_to_fp16"), val = tensor(-0x1.b18p+15)]; + tensor var_788_to_fp16 = const()[name = tensor("op_788_to_fp16"), val = tensor(0x1.b18p+15)]; + tensor inputs_43_cast_fp16 = clip(alpha = var_789_to_fp16, beta = var_788_to_fp16, x = inputs_41_cast_fp16)[name = tensor("inputs_43_cast_fp16")]; + tensor inputs_sq_21_cast_fp16 = mul(x = inputs_43_cast_fp16, y = inputs_43_cast_fp16)[name = tensor("inputs_sq_21_cast_fp16")]; + tensor var_808 = const()[name = tensor("op_808"), val = tensor([1])]; + tensor variance_21_cast_fp16 = reduce_mean(axes = var_808, keep_dims = var_786, x = inputs_sq_21_cast_fp16)[name = tensor("variance_21_cast_fp16")]; + tensor var_810_to_fp16 = const()[name = tensor("op_810_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_811_cast_fp16 = add(x = variance_21_cast_fp16, y = var_810_to_fp16)[name = tensor("op_811_cast_fp16")]; + tensor var_812_epsilon_0_to_fp16 = const()[name = tensor("op_812_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_812_cast_fp16 = rsqrt(epsilon = var_812_epsilon_0_to_fp16, x = var_811_cast_fp16)[name = tensor("op_812_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = mul(x = inputs_43_cast_fp16, y = var_812_cast_fp16)[name = tensor("hidden_states_21_cast_fp16")]; + tensor w_21_to_fp16 = const()[name = tensor("w_21_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2193445120)))]; + tensor obj_23_cast_fp16 = mul(x = w_21_to_fp16, y = hidden_states_21_cast_fp16)[name = tensor("obj_23_cast_fp16")]; + tensor var_826 = const()[name = tensor("op_826"), val = tensor([1, 1])]; + tensor var_828 = const()[name = tensor("op_828"), val = tensor([1, 1])]; + tensor query_11_pad_type_0 = const()[name = tensor("query_11_pad_type_0"), val = tensor("custom")]; + tensor query_11_pad_0 = const()[name = tensor("query_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_5_layer_0_SelfAttention_q_proj_weight_to_fp16 = const()[name = tensor("block_5_layer_0_SelfAttention_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2193453376)))]; + tensor query_11_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_828, groups = var_787, pad = query_11_pad_0, pad_type = query_11_pad_type_0, strides = var_826, weight = block_5_layer_0_SelfAttention_q_proj_weight_to_fp16, x = obj_23_cast_fp16)[name = tensor("query_11_cast_fp16")]; + tensor var_832 = const()[name = tensor("op_832"), val = tensor([1, 1])]; + tensor var_834 = const()[name = tensor("op_834"), val = tensor([1, 1])]; + tensor key_11_pad_type_0 = const()[name = tensor("key_11_pad_type_0"), val = tensor("custom")]; + tensor key_11_pad_0 = const()[name = tensor("key_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_5_layer_0_SelfAttention_k_proj_weight_to_fp16 = const()[name = tensor("block_5_layer_0_SelfAttention_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2227007872)))]; + tensor key_11_cast_fp16 = conv(dilations = var_834, groups = var_787, pad = key_11_pad_0, pad_type = key_11_pad_type_0, strides = var_832, weight = block_5_layer_0_SelfAttention_k_proj_weight_to_fp16, x = obj_23_cast_fp16)[name = tensor("key_11_cast_fp16")]; + tensor var_839 = const()[name = tensor("op_839"), val = tensor([1, 1])]; + tensor var_841 = const()[name = tensor("op_841"), val = tensor([1, 1])]; + tensor value_11_pad_type_0 = const()[name = tensor("value_11_pad_type_0"), val = tensor("custom")]; + tensor value_11_pad_0 = const()[name = tensor("value_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_5_layer_0_SelfAttention_v_proj_weight_to_fp16 = const()[name = tensor("block_5_layer_0_SelfAttention_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2260562368)))]; + tensor value_11_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_841, groups = var_787, pad = value_11_pad_0, pad_type = value_11_pad_type_0, strides = var_839, weight = block_5_layer_0_SelfAttention_v_proj_weight_to_fp16, x = obj_23_cast_fp16)[name = tensor("value_11_cast_fp16")]; + tensor var_845 = const()[name = tensor("op_845"), val = tensor([1, 64, 64, -1])]; + tensor var_846_cast_fp16 = reshape(shape = var_845, x = query_11_cast_fp16)[name = tensor("op_846_cast_fp16")]; + tensor var_847 = const()[name = tensor("op_847"), val = tensor([1, 64, 64, -1])]; + tensor var_848_cast_fp16 = reshape(shape = var_847, x = key_11_cast_fp16)[name = tensor("op_848_cast_fp16")]; + tensor mh_w_31_transpose_x_0 = const()[name = tensor("mh_w_31_transpose_x_0"), val = tensor(true)]; + tensor mh_w_31_transpose_y_0 = const()[name = tensor("mh_w_31_transpose_y_0"), val = tensor(false)]; + tensor mh_w_31_cast_fp16 = matmul(transpose_x = mh_w_31_transpose_x_0, transpose_y = mh_w_31_transpose_y_0, x = var_846_cast_fp16, y = var_848_cast_fp16)[name = tensor("mh_w_31_cast_fp16")]; + tensor mh_w_33_cast_fp16 = add(x = mh_w_31_cast_fp16, y = var_133_cast_fp16)[name = tensor("mh_w_33_cast_fp16")]; + tensor mh_w_35_cast_fp16 = add(x = mh_w_33_cast_fp16, y = relative_attention_bias_to_fp16)[name = tensor("mh_w_35_cast_fp16")]; + tensor var_857_cast_fp16 = softmax(axis = var_791, x = mh_w_35_cast_fp16)[name = tensor("op_857_cast_fp16")]; + tensor var_858 = const()[name = tensor("op_858"), val = tensor([1, 64, 64, -1])]; + tensor var_859_cast_fp16 = reshape(shape = var_858, x = value_11_cast_fp16)[name = tensor("op_859_cast_fp16")]; + tensor attn_11_transpose_x_0 = const()[name = tensor("attn_11_transpose_x_0"), val = tensor(false)]; + tensor attn_11_transpose_y_0 = const()[name = tensor("attn_11_transpose_y_0"), val = tensor(true)]; + tensor attn_11_cast_fp16 = matmul(transpose_x = attn_11_transpose_x_0, transpose_y = attn_11_transpose_y_0, x = var_859_cast_fp16, y = var_857_cast_fp16)[name = tensor("attn_11_cast_fp16")]; + tensor var_862 = const()[name = tensor("op_862"), val = tensor([1, 4096, 1, -1])]; + tensor input_31_cast_fp16 = reshape(shape = var_862, x = attn_11_cast_fp16)[name = tensor("input_31_cast_fp16")]; + tensor var_866 = const()[name = tensor("op_866"), val = tensor([1, 1])]; + tensor var_868 = const()[name = tensor("op_868"), val = tensor([1, 1])]; + tensor obj_25_pad_type_0 = const()[name = tensor("obj_25_pad_type_0"), val = tensor("custom")]; + tensor obj_25_pad_0 = const()[name = tensor("obj_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_5_layer_0_SelfAttention_o_proj_weight_to_fp16 = const()[name = tensor("block_5_layer_0_SelfAttention_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2294116864)))]; + tensor obj_25_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_868, groups = var_787, pad = obj_25_pad_0, pad_type = obj_25_pad_type_0, strides = var_866, weight = block_5_layer_0_SelfAttention_o_proj_weight_to_fp16, x = input_31_cast_fp16)[name = tensor("obj_25_cast_fp16")]; + tensor inputs_45_cast_fp16 = add(x = inputs_43_cast_fp16, y = obj_25_cast_fp16)[name = tensor("inputs_45_cast_fp16")]; + tensor inputs_47_cast_fp16 = clip(alpha = var_789_to_fp16, beta = var_788_to_fp16, x = inputs_45_cast_fp16)[name = tensor("inputs_47_cast_fp16")]; + tensor inputs_sq_23_cast_fp16 = mul(x = inputs_47_cast_fp16, y = inputs_47_cast_fp16)[name = tensor("inputs_sq_23_cast_fp16")]; + tensor var_877 = const()[name = tensor("op_877"), val = tensor([1])]; + tensor variance_23_cast_fp16 = reduce_mean(axes = var_877, keep_dims = var_786, x = inputs_sq_23_cast_fp16)[name = tensor("variance_23_cast_fp16")]; + tensor var_879_to_fp16 = const()[name = tensor("op_879_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_880_cast_fp16 = add(x = variance_23_cast_fp16, y = var_879_to_fp16)[name = tensor("op_880_cast_fp16")]; + tensor var_881_epsilon_0_to_fp16 = const()[name = tensor("op_881_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_881_cast_fp16 = rsqrt(epsilon = var_881_epsilon_0_to_fp16, x = var_880_cast_fp16)[name = tensor("op_881_cast_fp16")]; + tensor hidden_states_23_cast_fp16 = mul(x = inputs_47_cast_fp16, y = var_881_cast_fp16)[name = tensor("hidden_states_23_cast_fp16")]; + tensor w_23_to_fp16 = const()[name = tensor("w_23_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2327671360)))]; + tensor input_33_cast_fp16 = mul(x = w_23_to_fp16, y = hidden_states_23_cast_fp16)[name = tensor("input_33_cast_fp16")]; + tensor var_894 = const()[name = tensor("op_894"), val = tensor([1, 1])]; + tensor var_896 = const()[name = tensor("op_896"), val = tensor([1, 1])]; + tensor x_13_pad_type_0 = const()[name = tensor("x_13_pad_type_0"), val = tensor("custom")]; + tensor x_13_pad_0 = const()[name = tensor("x_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_5_layer_1_DenseReluDense_wi_0_weight_to_fp16 = const()[name = tensor("block_5_layer_1_DenseReluDense_wi_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2327679616)))]; + tensor x_13_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_896, groups = var_787, pad = x_13_pad_0, pad_type = x_13_pad_type_0, strides = var_894, weight = block_5_layer_1_DenseReluDense_wi_0_weight_to_fp16, x = input_33_cast_fp16)[name = tensor("x_13_cast_fp16")]; + tensor var_910_mode_0 = const()[name = tensor("op_910_mode_0"), val = tensor("TANH_APPROXIMATION")]; + tensor var_910_cast_fp16 = gelu(mode = var_910_mode_0, x = x_13_cast_fp16)[name = tensor("op_910_cast_fp16")]; + tensor var_913 = const()[name = tensor("op_913"), val = tensor([1, 1])]; + tensor var_915 = const()[name = tensor("op_915"), val = tensor([1, 1])]; + tensor var_917_pad_type_0 = const()[name = tensor("op_917_pad_type_0"), val = tensor("custom")]; + tensor var_917_pad_0 = const()[name = tensor("op_917_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_5_layer_1_DenseReluDense_wi_1_weight_to_fp16 = const()[name = tensor("block_5_layer_1_DenseReluDense_wi_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2411565760)))]; + tensor var_917_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_915, groups = var_787, pad = var_917_pad_0, pad_type = var_917_pad_type_0, strides = var_913, weight = block_5_layer_1_DenseReluDense_wi_1_weight_to_fp16, x = input_33_cast_fp16)[name = tensor("op_917_cast_fp16")]; + tensor input_35_cast_fp16 = mul(x = var_910_cast_fp16, y = var_917_cast_fp16)[name = tensor("input_35_cast_fp16")]; + tensor var_921 = const()[name = tensor("op_921"), val = tensor([1, 1])]; + tensor var_923 = const()[name = tensor("op_923"), val = tensor([1, 1])]; + tensor var_925_pad_type_0 = const()[name = tensor("op_925_pad_type_0"), val = tensor("custom")]; + tensor var_925_pad_0 = const()[name = tensor("op_925_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_5_layer_1_DenseReluDense_wo_weight_to_fp16 = const()[name = tensor("block_5_layer_1_DenseReluDense_wo_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2495451904)))]; + tensor var_925_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_923, groups = var_787, pad = var_925_pad_0, pad_type = var_925_pad_type_0, strides = var_921, weight = block_5_layer_1_DenseReluDense_wo_weight_to_fp16, x = input_35_cast_fp16)[name = tensor("op_925_cast_fp16")]; + tensor inputs_49_cast_fp16 = add(x = inputs_47_cast_fp16, y = var_925_cast_fp16)[name = tensor("inputs_49_cast_fp16")]; + tensor var_930 = const()[name = tensor("op_930"), val = tensor(true)]; + tensor var_931 = const()[name = tensor("op_931"), val = tensor(1)]; + tensor var_935 = const()[name = tensor("op_935"), val = tensor(3)]; + tensor var_933_to_fp16 = const()[name = tensor("op_933_to_fp16"), val = tensor(-0x1.b18p+15)]; + tensor var_932_to_fp16 = const()[name = tensor("op_932_to_fp16"), val = tensor(0x1.b18p+15)]; + tensor inputs_51_cast_fp16 = clip(alpha = var_933_to_fp16, beta = var_932_to_fp16, x = inputs_49_cast_fp16)[name = tensor("inputs_51_cast_fp16")]; + tensor inputs_sq_25_cast_fp16 = mul(x = inputs_51_cast_fp16, y = inputs_51_cast_fp16)[name = tensor("inputs_sq_25_cast_fp16")]; + tensor var_952 = const()[name = tensor("op_952"), val = tensor([1])]; + tensor variance_25_cast_fp16 = reduce_mean(axes = var_952, keep_dims = var_930, x = inputs_sq_25_cast_fp16)[name = tensor("variance_25_cast_fp16")]; + tensor var_954_to_fp16 = const()[name = tensor("op_954_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_955_cast_fp16 = add(x = variance_25_cast_fp16, y = var_954_to_fp16)[name = tensor("op_955_cast_fp16")]; + tensor var_956_epsilon_0_to_fp16 = const()[name = tensor("op_956_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_956_cast_fp16 = rsqrt(epsilon = var_956_epsilon_0_to_fp16, x = var_955_cast_fp16)[name = tensor("op_956_cast_fp16")]; + tensor hidden_states_25_cast_fp16 = mul(x = inputs_51_cast_fp16, y = var_956_cast_fp16)[name = tensor("hidden_states_25_cast_fp16")]; + tensor w_25_to_fp16 = const()[name = tensor("w_25_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2579338048)))]; + tensor obj_27_cast_fp16 = mul(x = w_25_to_fp16, y = hidden_states_25_cast_fp16)[name = tensor("obj_27_cast_fp16")]; + tensor var_970 = const()[name = tensor("op_970"), val = tensor([1, 1])]; + tensor var_972 = const()[name = tensor("op_972"), val = tensor([1, 1])]; + tensor query_13_pad_type_0 = const()[name = tensor("query_13_pad_type_0"), val = tensor("custom")]; + tensor query_13_pad_0 = const()[name = tensor("query_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_6_layer_0_SelfAttention_q_proj_weight_to_fp16 = const()[name = tensor("block_6_layer_0_SelfAttention_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2579346304)))]; + tensor query_13_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_972, groups = var_931, pad = query_13_pad_0, pad_type = query_13_pad_type_0, strides = var_970, weight = block_6_layer_0_SelfAttention_q_proj_weight_to_fp16, x = obj_27_cast_fp16)[name = tensor("query_13_cast_fp16")]; + tensor var_976 = const()[name = tensor("op_976"), val = tensor([1, 1])]; + tensor var_978 = const()[name = tensor("op_978"), val = tensor([1, 1])]; + tensor key_13_pad_type_0 = const()[name = tensor("key_13_pad_type_0"), val = tensor("custom")]; + tensor key_13_pad_0 = const()[name = tensor("key_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_6_layer_0_SelfAttention_k_proj_weight_to_fp16 = const()[name = tensor("block_6_layer_0_SelfAttention_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2612900800)))]; + tensor key_13_cast_fp16 = conv(dilations = var_978, groups = var_931, pad = key_13_pad_0, pad_type = key_13_pad_type_0, strides = var_976, weight = block_6_layer_0_SelfAttention_k_proj_weight_to_fp16, x = obj_27_cast_fp16)[name = tensor("key_13_cast_fp16")]; + tensor var_983 = const()[name = tensor("op_983"), val = tensor([1, 1])]; + tensor var_985 = const()[name = tensor("op_985"), val = tensor([1, 1])]; + tensor value_13_pad_type_0 = const()[name = tensor("value_13_pad_type_0"), val = tensor("custom")]; + tensor value_13_pad_0 = const()[name = tensor("value_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_6_layer_0_SelfAttention_v_proj_weight_to_fp16 = const()[name = tensor("block_6_layer_0_SelfAttention_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2646455296)))]; + tensor value_13_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_985, groups = var_931, pad = value_13_pad_0, pad_type = value_13_pad_type_0, strides = var_983, weight = block_6_layer_0_SelfAttention_v_proj_weight_to_fp16, x = obj_27_cast_fp16)[name = tensor("value_13_cast_fp16")]; + tensor var_989 = const()[name = tensor("op_989"), val = tensor([1, 64, 64, -1])]; + tensor var_990_cast_fp16 = reshape(shape = var_989, x = query_13_cast_fp16)[name = tensor("op_990_cast_fp16")]; + tensor var_991 = const()[name = tensor("op_991"), val = tensor([1, 64, 64, -1])]; + tensor var_992_cast_fp16 = reshape(shape = var_991, x = key_13_cast_fp16)[name = tensor("op_992_cast_fp16")]; + tensor mh_w_37_transpose_x_0 = const()[name = tensor("mh_w_37_transpose_x_0"), val = tensor(true)]; + tensor mh_w_37_transpose_y_0 = const()[name = tensor("mh_w_37_transpose_y_0"), val = tensor(false)]; + tensor mh_w_37_cast_fp16 = matmul(transpose_x = mh_w_37_transpose_x_0, transpose_y = mh_w_37_transpose_y_0, x = var_990_cast_fp16, y = var_992_cast_fp16)[name = tensor("mh_w_37_cast_fp16")]; + tensor mh_w_39_cast_fp16 = add(x = mh_w_37_cast_fp16, y = var_133_cast_fp16)[name = tensor("mh_w_39_cast_fp16")]; + tensor mh_w_41_cast_fp16 = add(x = mh_w_39_cast_fp16, y = relative_attention_bias_to_fp16)[name = tensor("mh_w_41_cast_fp16")]; + tensor var_1001_cast_fp16 = softmax(axis = var_935, x = mh_w_41_cast_fp16)[name = tensor("op_1001_cast_fp16")]; + tensor var_1002 = const()[name = tensor("op_1002"), val = tensor([1, 64, 64, -1])]; + tensor var_1003_cast_fp16 = reshape(shape = var_1002, x = value_13_cast_fp16)[name = tensor("op_1003_cast_fp16")]; + tensor attn_13_transpose_x_0 = const()[name = tensor("attn_13_transpose_x_0"), val = tensor(false)]; + tensor attn_13_transpose_y_0 = const()[name = tensor("attn_13_transpose_y_0"), val = tensor(true)]; + tensor attn_13_cast_fp16 = matmul(transpose_x = attn_13_transpose_x_0, transpose_y = attn_13_transpose_y_0, x = var_1003_cast_fp16, y = var_1001_cast_fp16)[name = tensor("attn_13_cast_fp16")]; + tensor var_1006 = const()[name = tensor("op_1006"), val = tensor([1, 4096, 1, -1])]; + tensor input_37_cast_fp16 = reshape(shape = var_1006, x = attn_13_cast_fp16)[name = tensor("input_37_cast_fp16")]; + tensor var_1010 = const()[name = tensor("op_1010"), val = tensor([1, 1])]; + tensor var_1012 = const()[name = tensor("op_1012"), val = tensor([1, 1])]; + tensor obj_29_pad_type_0 = const()[name = tensor("obj_29_pad_type_0"), val = tensor("custom")]; + tensor obj_29_pad_0 = const()[name = tensor("obj_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_6_layer_0_SelfAttention_o_proj_weight_to_fp16 = const()[name = tensor("block_6_layer_0_SelfAttention_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2680009792)))]; + tensor obj_29_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_1012, groups = var_931, pad = obj_29_pad_0, pad_type = obj_29_pad_type_0, strides = var_1010, weight = block_6_layer_0_SelfAttention_o_proj_weight_to_fp16, x = input_37_cast_fp16)[name = tensor("obj_29_cast_fp16")]; + tensor inputs_53_cast_fp16 = add(x = inputs_51_cast_fp16, y = obj_29_cast_fp16)[name = tensor("inputs_53_cast_fp16")]; + tensor inputs_55_cast_fp16 = clip(alpha = var_933_to_fp16, beta = var_932_to_fp16, x = inputs_53_cast_fp16)[name = tensor("inputs_55_cast_fp16")]; + tensor inputs_sq_27_cast_fp16 = mul(x = inputs_55_cast_fp16, y = inputs_55_cast_fp16)[name = tensor("inputs_sq_27_cast_fp16")]; + tensor var_1021 = const()[name = tensor("op_1021"), val = tensor([1])]; + tensor variance_27_cast_fp16 = reduce_mean(axes = var_1021, keep_dims = var_930, x = inputs_sq_27_cast_fp16)[name = tensor("variance_27_cast_fp16")]; + tensor var_1023_to_fp16 = const()[name = tensor("op_1023_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_1024_cast_fp16 = add(x = variance_27_cast_fp16, y = var_1023_to_fp16)[name = tensor("op_1024_cast_fp16")]; + tensor var_1025_epsilon_0_to_fp16 = const()[name = tensor("op_1025_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_1025_cast_fp16 = rsqrt(epsilon = var_1025_epsilon_0_to_fp16, x = var_1024_cast_fp16)[name = tensor("op_1025_cast_fp16")]; + tensor hidden_states_27_cast_fp16 = mul(x = inputs_55_cast_fp16, y = var_1025_cast_fp16)[name = tensor("hidden_states_27_cast_fp16")]; + tensor w_27_to_fp16 = const()[name = tensor("w_27_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2713564288)))]; + tensor input_39_cast_fp16 = mul(x = w_27_to_fp16, y = hidden_states_27_cast_fp16)[name = tensor("input_39_cast_fp16")]; + tensor var_1038 = const()[name = tensor("op_1038"), val = tensor([1, 1])]; + tensor var_1040 = const()[name = tensor("op_1040"), val = tensor([1, 1])]; + tensor x_15_pad_type_0 = const()[name = tensor("x_15_pad_type_0"), val = tensor("custom")]; + tensor x_15_pad_0 = const()[name = tensor("x_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_6_layer_1_DenseReluDense_wi_0_weight_to_fp16 = const()[name = tensor("block_6_layer_1_DenseReluDense_wi_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2713572544)))]; + tensor x_15_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_1040, groups = var_931, pad = x_15_pad_0, pad_type = x_15_pad_type_0, strides = var_1038, weight = block_6_layer_1_DenseReluDense_wi_0_weight_to_fp16, x = input_39_cast_fp16)[name = tensor("x_15_cast_fp16")]; + tensor var_1054_mode_0 = const()[name = tensor("op_1054_mode_0"), val = tensor("TANH_APPROXIMATION")]; + tensor var_1054_cast_fp16 = gelu(mode = var_1054_mode_0, x = x_15_cast_fp16)[name = tensor("op_1054_cast_fp16")]; + tensor var_1057 = const()[name = tensor("op_1057"), val = tensor([1, 1])]; + tensor var_1059 = const()[name = tensor("op_1059"), val = tensor([1, 1])]; + tensor var_1061_pad_type_0 = const()[name = tensor("op_1061_pad_type_0"), val = tensor("custom")]; + tensor var_1061_pad_0 = const()[name = tensor("op_1061_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_6_layer_1_DenseReluDense_wi_1_weight_to_fp16 = const()[name = tensor("block_6_layer_1_DenseReluDense_wi_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2797458688)))]; + tensor var_1061_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_1059, groups = var_931, pad = var_1061_pad_0, pad_type = var_1061_pad_type_0, strides = var_1057, weight = block_6_layer_1_DenseReluDense_wi_1_weight_to_fp16, x = input_39_cast_fp16)[name = tensor("op_1061_cast_fp16")]; + tensor input_41_cast_fp16 = mul(x = var_1054_cast_fp16, y = var_1061_cast_fp16)[name = tensor("input_41_cast_fp16")]; + tensor var_1065 = const()[name = tensor("op_1065"), val = tensor([1, 1])]; + tensor var_1067 = const()[name = tensor("op_1067"), val = tensor([1, 1])]; + tensor var_1069_pad_type_0 = const()[name = tensor("op_1069_pad_type_0"), val = tensor("custom")]; + tensor var_1069_pad_0 = const()[name = tensor("op_1069_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_6_layer_1_DenseReluDense_wo_weight_to_fp16 = const()[name = tensor("block_6_layer_1_DenseReluDense_wo_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2881344832)))]; + tensor var_1069_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_1067, groups = var_931, pad = var_1069_pad_0, pad_type = var_1069_pad_type_0, strides = var_1065, weight = block_6_layer_1_DenseReluDense_wo_weight_to_fp16, x = input_41_cast_fp16)[name = tensor("op_1069_cast_fp16")]; + tensor inputs_57_cast_fp16 = add(x = inputs_55_cast_fp16, y = var_1069_cast_fp16)[name = tensor("inputs_57_cast_fp16")]; + tensor var_1074 = const()[name = tensor("op_1074"), val = tensor(true)]; + tensor var_1075 = const()[name = tensor("op_1075"), val = tensor(1)]; + tensor var_1079 = const()[name = tensor("op_1079"), val = tensor(3)]; + tensor var_1077_to_fp16 = const()[name = tensor("op_1077_to_fp16"), val = tensor(-0x1.b18p+15)]; + tensor var_1076_to_fp16 = const()[name = tensor("op_1076_to_fp16"), val = tensor(0x1.b18p+15)]; + tensor inputs_59_cast_fp16 = clip(alpha = var_1077_to_fp16, beta = var_1076_to_fp16, x = inputs_57_cast_fp16)[name = tensor("inputs_59_cast_fp16")]; + tensor inputs_sq_29_cast_fp16 = mul(x = inputs_59_cast_fp16, y = inputs_59_cast_fp16)[name = tensor("inputs_sq_29_cast_fp16")]; + tensor var_1096 = const()[name = tensor("op_1096"), val = tensor([1])]; + tensor variance_29_cast_fp16 = reduce_mean(axes = var_1096, keep_dims = var_1074, x = inputs_sq_29_cast_fp16)[name = tensor("variance_29_cast_fp16")]; + tensor var_1098_to_fp16 = const()[name = tensor("op_1098_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_1099_cast_fp16 = add(x = variance_29_cast_fp16, y = var_1098_to_fp16)[name = tensor("op_1099_cast_fp16")]; + tensor var_1100_epsilon_0_to_fp16 = const()[name = tensor("op_1100_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_1100_cast_fp16 = rsqrt(epsilon = var_1100_epsilon_0_to_fp16, x = var_1099_cast_fp16)[name = tensor("op_1100_cast_fp16")]; + tensor hidden_states_29_cast_fp16 = mul(x = inputs_59_cast_fp16, y = var_1100_cast_fp16)[name = tensor("hidden_states_29_cast_fp16")]; + tensor w_29_to_fp16 = const()[name = tensor("w_29_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2965230976)))]; + tensor obj_31_cast_fp16 = mul(x = w_29_to_fp16, y = hidden_states_29_cast_fp16)[name = tensor("obj_31_cast_fp16")]; + tensor var_1114 = const()[name = tensor("op_1114"), val = tensor([1, 1])]; + tensor var_1116 = const()[name = tensor("op_1116"), val = tensor([1, 1])]; + tensor query_15_pad_type_0 = const()[name = tensor("query_15_pad_type_0"), val = tensor("custom")]; + tensor query_15_pad_0 = const()[name = tensor("query_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_7_layer_0_SelfAttention_q_proj_weight_to_fp16 = const()[name = tensor("block_7_layer_0_SelfAttention_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2965239232)))]; + tensor query_15_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_1116, groups = var_1075, pad = query_15_pad_0, pad_type = query_15_pad_type_0, strides = var_1114, weight = block_7_layer_0_SelfAttention_q_proj_weight_to_fp16, x = obj_31_cast_fp16)[name = tensor("query_15_cast_fp16")]; + tensor var_1120 = const()[name = tensor("op_1120"), val = tensor([1, 1])]; + tensor var_1122 = const()[name = tensor("op_1122"), val = tensor([1, 1])]; + tensor key_15_pad_type_0 = const()[name = tensor("key_15_pad_type_0"), val = tensor("custom")]; + tensor key_15_pad_0 = const()[name = tensor("key_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_7_layer_0_SelfAttention_k_proj_weight_to_fp16 = const()[name = tensor("block_7_layer_0_SelfAttention_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2998793728)))]; + tensor key_15_cast_fp16 = conv(dilations = var_1122, groups = var_1075, pad = key_15_pad_0, pad_type = key_15_pad_type_0, strides = var_1120, weight = block_7_layer_0_SelfAttention_k_proj_weight_to_fp16, x = obj_31_cast_fp16)[name = tensor("key_15_cast_fp16")]; + tensor var_1127 = const()[name = tensor("op_1127"), val = tensor([1, 1])]; + tensor var_1129 = const()[name = tensor("op_1129"), val = tensor([1, 1])]; + tensor value_15_pad_type_0 = const()[name = tensor("value_15_pad_type_0"), val = tensor("custom")]; + tensor value_15_pad_0 = const()[name = tensor("value_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_7_layer_0_SelfAttention_v_proj_weight_to_fp16 = const()[name = tensor("block_7_layer_0_SelfAttention_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3032348224)))]; + tensor value_15_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_1129, groups = var_1075, pad = value_15_pad_0, pad_type = value_15_pad_type_0, strides = var_1127, weight = block_7_layer_0_SelfAttention_v_proj_weight_to_fp16, x = obj_31_cast_fp16)[name = tensor("value_15_cast_fp16")]; + tensor var_1133 = const()[name = tensor("op_1133"), val = tensor([1, 64, 64, -1])]; + tensor var_1134_cast_fp16 = reshape(shape = var_1133, x = query_15_cast_fp16)[name = tensor("op_1134_cast_fp16")]; + tensor var_1135 = const()[name = tensor("op_1135"), val = tensor([1, 64, 64, -1])]; + tensor var_1136_cast_fp16 = reshape(shape = var_1135, x = key_15_cast_fp16)[name = tensor("op_1136_cast_fp16")]; + tensor mh_w_43_transpose_x_0 = const()[name = tensor("mh_w_43_transpose_x_0"), val = tensor(true)]; + tensor mh_w_43_transpose_y_0 = const()[name = tensor("mh_w_43_transpose_y_0"), val = tensor(false)]; + tensor mh_w_43_cast_fp16 = matmul(transpose_x = mh_w_43_transpose_x_0, transpose_y = mh_w_43_transpose_y_0, x = var_1134_cast_fp16, y = var_1136_cast_fp16)[name = tensor("mh_w_43_cast_fp16")]; + tensor mh_w_45_cast_fp16 = add(x = mh_w_43_cast_fp16, y = var_133_cast_fp16)[name = tensor("mh_w_45_cast_fp16")]; + tensor mh_w_47_cast_fp16 = add(x = mh_w_45_cast_fp16, y = relative_attention_bias_to_fp16)[name = tensor("mh_w_47_cast_fp16")]; + tensor var_1145_cast_fp16 = softmax(axis = var_1079, x = mh_w_47_cast_fp16)[name = tensor("op_1145_cast_fp16")]; + tensor var_1146 = const()[name = tensor("op_1146"), val = tensor([1, 64, 64, -1])]; + tensor var_1147_cast_fp16 = reshape(shape = var_1146, x = value_15_cast_fp16)[name = tensor("op_1147_cast_fp16")]; + tensor attn_15_transpose_x_0 = const()[name = tensor("attn_15_transpose_x_0"), val = tensor(false)]; + tensor attn_15_transpose_y_0 = const()[name = tensor("attn_15_transpose_y_0"), val = tensor(true)]; + tensor attn_15_cast_fp16 = matmul(transpose_x = attn_15_transpose_x_0, transpose_y = attn_15_transpose_y_0, x = var_1147_cast_fp16, y = var_1145_cast_fp16)[name = tensor("attn_15_cast_fp16")]; + tensor var_1150 = const()[name = tensor("op_1150"), val = tensor([1, 4096, 1, -1])]; + tensor input_43_cast_fp16 = reshape(shape = var_1150, x = attn_15_cast_fp16)[name = tensor("input_43_cast_fp16")]; + tensor var_1154 = const()[name = tensor("op_1154"), val = tensor([1, 1])]; + tensor var_1156 = const()[name = tensor("op_1156"), val = tensor([1, 1])]; + tensor obj_33_pad_type_0 = const()[name = tensor("obj_33_pad_type_0"), val = tensor("custom")]; + tensor obj_33_pad_0 = const()[name = tensor("obj_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_7_layer_0_SelfAttention_o_proj_weight_to_fp16 = const()[name = tensor("block_7_layer_0_SelfAttention_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3065902720)))]; + tensor obj_33_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_1156, groups = var_1075, pad = obj_33_pad_0, pad_type = obj_33_pad_type_0, strides = var_1154, weight = block_7_layer_0_SelfAttention_o_proj_weight_to_fp16, x = input_43_cast_fp16)[name = tensor("obj_33_cast_fp16")]; + tensor inputs_61_cast_fp16 = add(x = inputs_59_cast_fp16, y = obj_33_cast_fp16)[name = tensor("inputs_61_cast_fp16")]; + tensor inputs_63_cast_fp16 = clip(alpha = var_1077_to_fp16, beta = var_1076_to_fp16, x = inputs_61_cast_fp16)[name = tensor("inputs_63_cast_fp16")]; + tensor inputs_sq_31_cast_fp16 = mul(x = inputs_63_cast_fp16, y = inputs_63_cast_fp16)[name = tensor("inputs_sq_31_cast_fp16")]; + tensor var_1165 = const()[name = tensor("op_1165"), val = tensor([1])]; + tensor variance_31_cast_fp16 = reduce_mean(axes = var_1165, keep_dims = var_1074, x = inputs_sq_31_cast_fp16)[name = tensor("variance_31_cast_fp16")]; + tensor var_1167_to_fp16 = const()[name = tensor("op_1167_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_1168_cast_fp16 = add(x = variance_31_cast_fp16, y = var_1167_to_fp16)[name = tensor("op_1168_cast_fp16")]; + tensor var_1169_epsilon_0_to_fp16 = const()[name = tensor("op_1169_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_1169_cast_fp16 = rsqrt(epsilon = var_1169_epsilon_0_to_fp16, x = var_1168_cast_fp16)[name = tensor("op_1169_cast_fp16")]; + tensor hidden_states_31_cast_fp16 = mul(x = inputs_63_cast_fp16, y = var_1169_cast_fp16)[name = tensor("hidden_states_31_cast_fp16")]; + tensor w_31_to_fp16 = const()[name = tensor("w_31_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3099457216)))]; + tensor input_45_cast_fp16 = mul(x = w_31_to_fp16, y = hidden_states_31_cast_fp16)[name = tensor("input_45_cast_fp16")]; + tensor var_1182 = const()[name = tensor("op_1182"), val = tensor([1, 1])]; + tensor var_1184 = const()[name = tensor("op_1184"), val = tensor([1, 1])]; + tensor x_17_pad_type_0 = const()[name = tensor("x_17_pad_type_0"), val = tensor("custom")]; + tensor x_17_pad_0 = const()[name = tensor("x_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_7_layer_1_DenseReluDense_wi_0_weight_to_fp16 = const()[name = tensor("block_7_layer_1_DenseReluDense_wi_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3099465472)))]; + tensor x_17_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_1184, groups = var_1075, pad = x_17_pad_0, pad_type = x_17_pad_type_0, strides = var_1182, weight = block_7_layer_1_DenseReluDense_wi_0_weight_to_fp16, x = input_45_cast_fp16)[name = tensor("x_17_cast_fp16")]; + tensor var_1198_mode_0 = const()[name = tensor("op_1198_mode_0"), val = tensor("TANH_APPROXIMATION")]; + tensor var_1198_cast_fp16 = gelu(mode = var_1198_mode_0, x = x_17_cast_fp16)[name = tensor("op_1198_cast_fp16")]; + tensor var_1201 = const()[name = tensor("op_1201"), val = tensor([1, 1])]; + tensor var_1203 = const()[name = tensor("op_1203"), val = tensor([1, 1])]; + tensor var_1205_pad_type_0 = const()[name = tensor("op_1205_pad_type_0"), val = tensor("custom")]; + tensor var_1205_pad_0 = const()[name = tensor("op_1205_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_7_layer_1_DenseReluDense_wi_1_weight_to_fp16 = const()[name = tensor("block_7_layer_1_DenseReluDense_wi_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3183351616)))]; + tensor var_1205_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_1203, groups = var_1075, pad = var_1205_pad_0, pad_type = var_1205_pad_type_0, strides = var_1201, weight = block_7_layer_1_DenseReluDense_wi_1_weight_to_fp16, x = input_45_cast_fp16)[name = tensor("op_1205_cast_fp16")]; + tensor input_47_cast_fp16 = mul(x = var_1198_cast_fp16, y = var_1205_cast_fp16)[name = tensor("input_47_cast_fp16")]; + tensor var_1209 = const()[name = tensor("op_1209"), val = tensor([1, 1])]; + tensor var_1211 = const()[name = tensor("op_1211"), val = tensor([1, 1])]; + tensor var_1213_pad_type_0 = const()[name = tensor("op_1213_pad_type_0"), val = tensor("custom")]; + tensor var_1213_pad_0 = const()[name = tensor("op_1213_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_7_layer_1_DenseReluDense_wo_weight_to_fp16 = const()[name = tensor("block_7_layer_1_DenseReluDense_wo_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3267237760)))]; + tensor var_1213_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_1211, groups = var_1075, pad = var_1213_pad_0, pad_type = var_1213_pad_type_0, strides = var_1209, weight = block_7_layer_1_DenseReluDense_wo_weight_to_fp16, x = input_47_cast_fp16)[name = tensor("op_1213_cast_fp16")]; + tensor inputs_65_cast_fp16 = add(x = inputs_63_cast_fp16, y = var_1213_cast_fp16)[name = tensor("inputs_65_cast_fp16")]; + tensor var_1218 = const()[name = tensor("op_1218"), val = tensor(true)]; + tensor var_1219 = const()[name = tensor("op_1219"), val = tensor(1)]; + tensor var_1223 = const()[name = tensor("op_1223"), val = tensor(3)]; + tensor var_1221_to_fp16 = const()[name = tensor("op_1221_to_fp16"), val = tensor(-0x1.b18p+15)]; + tensor var_1220_to_fp16 = const()[name = tensor("op_1220_to_fp16"), val = tensor(0x1.b18p+15)]; + tensor inputs_67_cast_fp16 = clip(alpha = var_1221_to_fp16, beta = var_1220_to_fp16, x = inputs_65_cast_fp16)[name = tensor("inputs_67_cast_fp16")]; + tensor inputs_sq_33_cast_fp16 = mul(x = inputs_67_cast_fp16, y = inputs_67_cast_fp16)[name = tensor("inputs_sq_33_cast_fp16")]; + tensor var_1240 = const()[name = tensor("op_1240"), val = tensor([1])]; + tensor variance_33_cast_fp16 = reduce_mean(axes = var_1240, keep_dims = var_1218, x = inputs_sq_33_cast_fp16)[name = tensor("variance_33_cast_fp16")]; + tensor var_1242_to_fp16 = const()[name = tensor("op_1242_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_1243_cast_fp16 = add(x = variance_33_cast_fp16, y = var_1242_to_fp16)[name = tensor("op_1243_cast_fp16")]; + tensor var_1244_epsilon_0_to_fp16 = const()[name = tensor("op_1244_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_1244_cast_fp16 = rsqrt(epsilon = var_1244_epsilon_0_to_fp16, x = var_1243_cast_fp16)[name = tensor("op_1244_cast_fp16")]; + tensor hidden_states_33_cast_fp16 = mul(x = inputs_67_cast_fp16, y = var_1244_cast_fp16)[name = tensor("hidden_states_33_cast_fp16")]; + tensor w_33_to_fp16 = const()[name = tensor("w_33_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3351123904)))]; + tensor obj_35_cast_fp16 = mul(x = w_33_to_fp16, y = hidden_states_33_cast_fp16)[name = tensor("obj_35_cast_fp16")]; + tensor var_1258 = const()[name = tensor("op_1258"), val = tensor([1, 1])]; + tensor var_1260 = const()[name = tensor("op_1260"), val = tensor([1, 1])]; + tensor query_17_pad_type_0 = const()[name = tensor("query_17_pad_type_0"), val = tensor("custom")]; + tensor query_17_pad_0 = const()[name = tensor("query_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_8_layer_0_SelfAttention_q_proj_weight_to_fp16 = const()[name = tensor("block_8_layer_0_SelfAttention_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3351132160)))]; + tensor query_17_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_1260, groups = var_1219, pad = query_17_pad_0, pad_type = query_17_pad_type_0, strides = var_1258, weight = block_8_layer_0_SelfAttention_q_proj_weight_to_fp16, x = obj_35_cast_fp16)[name = tensor("query_17_cast_fp16")]; + tensor var_1264 = const()[name = tensor("op_1264"), val = tensor([1, 1])]; + tensor var_1266 = const()[name = tensor("op_1266"), val = tensor([1, 1])]; + tensor key_17_pad_type_0 = const()[name = tensor("key_17_pad_type_0"), val = tensor("custom")]; + tensor key_17_pad_0 = const()[name = tensor("key_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_8_layer_0_SelfAttention_k_proj_weight_to_fp16 = const()[name = tensor("block_8_layer_0_SelfAttention_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3384686656)))]; + tensor key_17_cast_fp16 = conv(dilations = var_1266, groups = var_1219, pad = key_17_pad_0, pad_type = key_17_pad_type_0, strides = var_1264, weight = block_8_layer_0_SelfAttention_k_proj_weight_to_fp16, x = obj_35_cast_fp16)[name = tensor("key_17_cast_fp16")]; + tensor var_1271 = const()[name = tensor("op_1271"), val = tensor([1, 1])]; + tensor var_1273 = const()[name = tensor("op_1273"), val = tensor([1, 1])]; + tensor value_17_pad_type_0 = const()[name = tensor("value_17_pad_type_0"), val = tensor("custom")]; + tensor value_17_pad_0 = const()[name = tensor("value_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_8_layer_0_SelfAttention_v_proj_weight_to_fp16 = const()[name = tensor("block_8_layer_0_SelfAttention_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3418241152)))]; + tensor value_17_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_1273, groups = var_1219, pad = value_17_pad_0, pad_type = value_17_pad_type_0, strides = var_1271, weight = block_8_layer_0_SelfAttention_v_proj_weight_to_fp16, x = obj_35_cast_fp16)[name = tensor("value_17_cast_fp16")]; + tensor var_1277 = const()[name = tensor("op_1277"), val = tensor([1, 64, 64, -1])]; + tensor var_1278_cast_fp16 = reshape(shape = var_1277, x = query_17_cast_fp16)[name = tensor("op_1278_cast_fp16")]; + tensor var_1279 = const()[name = tensor("op_1279"), val = tensor([1, 64, 64, -1])]; + tensor var_1280_cast_fp16 = reshape(shape = var_1279, x = key_17_cast_fp16)[name = tensor("op_1280_cast_fp16")]; + tensor mh_w_49_transpose_x_0 = const()[name = tensor("mh_w_49_transpose_x_0"), val = tensor(true)]; + tensor mh_w_49_transpose_y_0 = const()[name = tensor("mh_w_49_transpose_y_0"), val = tensor(false)]; + tensor mh_w_49_cast_fp16 = matmul(transpose_x = mh_w_49_transpose_x_0, transpose_y = mh_w_49_transpose_y_0, x = var_1278_cast_fp16, y = var_1280_cast_fp16)[name = tensor("mh_w_49_cast_fp16")]; + tensor mh_w_51_cast_fp16 = add(x = mh_w_49_cast_fp16, y = var_133_cast_fp16)[name = tensor("mh_w_51_cast_fp16")]; + tensor mh_w_53_cast_fp16 = add(x = mh_w_51_cast_fp16, y = relative_attention_bias_to_fp16)[name = tensor("mh_w_53_cast_fp16")]; + tensor var_1289_cast_fp16 = softmax(axis = var_1223, x = mh_w_53_cast_fp16)[name = tensor("op_1289_cast_fp16")]; + tensor var_1290 = const()[name = tensor("op_1290"), val = tensor([1, 64, 64, -1])]; + tensor var_1291_cast_fp16 = reshape(shape = var_1290, x = value_17_cast_fp16)[name = tensor("op_1291_cast_fp16")]; + tensor attn_17_transpose_x_0 = const()[name = tensor("attn_17_transpose_x_0"), val = tensor(false)]; + tensor attn_17_transpose_y_0 = const()[name = tensor("attn_17_transpose_y_0"), val = tensor(true)]; + tensor attn_17_cast_fp16 = matmul(transpose_x = attn_17_transpose_x_0, transpose_y = attn_17_transpose_y_0, x = var_1291_cast_fp16, y = var_1289_cast_fp16)[name = tensor("attn_17_cast_fp16")]; + tensor var_1294 = const()[name = tensor("op_1294"), val = tensor([1, 4096, 1, -1])]; + tensor input_49_cast_fp16 = reshape(shape = var_1294, x = attn_17_cast_fp16)[name = tensor("input_49_cast_fp16")]; + tensor var_1298 = const()[name = tensor("op_1298"), val = tensor([1, 1])]; + tensor var_1300 = const()[name = tensor("op_1300"), val = tensor([1, 1])]; + tensor obj_37_pad_type_0 = const()[name = tensor("obj_37_pad_type_0"), val = tensor("custom")]; + tensor obj_37_pad_0 = const()[name = tensor("obj_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_8_layer_0_SelfAttention_o_proj_weight_to_fp16 = const()[name = tensor("block_8_layer_0_SelfAttention_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3451795648)))]; + tensor obj_37_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_1300, groups = var_1219, pad = obj_37_pad_0, pad_type = obj_37_pad_type_0, strides = var_1298, weight = block_8_layer_0_SelfAttention_o_proj_weight_to_fp16, x = input_49_cast_fp16)[name = tensor("obj_37_cast_fp16")]; + tensor inputs_69_cast_fp16 = add(x = inputs_67_cast_fp16, y = obj_37_cast_fp16)[name = tensor("inputs_69_cast_fp16")]; + tensor inputs_71_cast_fp16 = clip(alpha = var_1221_to_fp16, beta = var_1220_to_fp16, x = inputs_69_cast_fp16)[name = tensor("inputs_71_cast_fp16")]; + tensor inputs_sq_35_cast_fp16 = mul(x = inputs_71_cast_fp16, y = inputs_71_cast_fp16)[name = tensor("inputs_sq_35_cast_fp16")]; + tensor var_1309 = const()[name = tensor("op_1309"), val = tensor([1])]; + tensor variance_35_cast_fp16 = reduce_mean(axes = var_1309, keep_dims = var_1218, x = inputs_sq_35_cast_fp16)[name = tensor("variance_35_cast_fp16")]; + tensor var_1311_to_fp16 = const()[name = tensor("op_1311_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_1312_cast_fp16 = add(x = variance_35_cast_fp16, y = var_1311_to_fp16)[name = tensor("op_1312_cast_fp16")]; + tensor var_1313_epsilon_0_to_fp16 = const()[name = tensor("op_1313_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_1313_cast_fp16 = rsqrt(epsilon = var_1313_epsilon_0_to_fp16, x = var_1312_cast_fp16)[name = tensor("op_1313_cast_fp16")]; + tensor hidden_states_35_cast_fp16 = mul(x = inputs_71_cast_fp16, y = var_1313_cast_fp16)[name = tensor("hidden_states_35_cast_fp16")]; + tensor w_35_to_fp16 = const()[name = tensor("w_35_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3485350144)))]; + tensor input_51_cast_fp16 = mul(x = w_35_to_fp16, y = hidden_states_35_cast_fp16)[name = tensor("input_51_cast_fp16")]; + tensor var_1326 = const()[name = tensor("op_1326"), val = tensor([1, 1])]; + tensor var_1328 = const()[name = tensor("op_1328"), val = tensor([1, 1])]; + tensor x_19_pad_type_0 = const()[name = tensor("x_19_pad_type_0"), val = tensor("custom")]; + tensor x_19_pad_0 = const()[name = tensor("x_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_8_layer_1_DenseReluDense_wi_0_weight_to_fp16 = const()[name = tensor("block_8_layer_1_DenseReluDense_wi_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3485358400)))]; + tensor x_19_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_1328, groups = var_1219, pad = x_19_pad_0, pad_type = x_19_pad_type_0, strides = var_1326, weight = block_8_layer_1_DenseReluDense_wi_0_weight_to_fp16, x = input_51_cast_fp16)[name = tensor("x_19_cast_fp16")]; + tensor var_1342_mode_0 = const()[name = tensor("op_1342_mode_0"), val = tensor("TANH_APPROXIMATION")]; + tensor var_1342_cast_fp16 = gelu(mode = var_1342_mode_0, x = x_19_cast_fp16)[name = tensor("op_1342_cast_fp16")]; + tensor var_1345 = const()[name = tensor("op_1345"), val = tensor([1, 1])]; + tensor var_1347 = const()[name = tensor("op_1347"), val = tensor([1, 1])]; + tensor var_1349_pad_type_0 = const()[name = tensor("op_1349_pad_type_0"), val = tensor("custom")]; + tensor var_1349_pad_0 = const()[name = tensor("op_1349_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_8_layer_1_DenseReluDense_wi_1_weight_to_fp16 = const()[name = tensor("block_8_layer_1_DenseReluDense_wi_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3569244544)))]; + tensor var_1349_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_1347, groups = var_1219, pad = var_1349_pad_0, pad_type = var_1349_pad_type_0, strides = var_1345, weight = block_8_layer_1_DenseReluDense_wi_1_weight_to_fp16, x = input_51_cast_fp16)[name = tensor("op_1349_cast_fp16")]; + tensor input_53_cast_fp16 = mul(x = var_1342_cast_fp16, y = var_1349_cast_fp16)[name = tensor("input_53_cast_fp16")]; + tensor var_1353 = const()[name = tensor("op_1353"), val = tensor([1, 1])]; + tensor var_1355 = const()[name = tensor("op_1355"), val = tensor([1, 1])]; + tensor var_1357_pad_type_0 = const()[name = tensor("op_1357_pad_type_0"), val = tensor("custom")]; + tensor var_1357_pad_0 = const()[name = tensor("op_1357_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_8_layer_1_DenseReluDense_wo_weight_to_fp16 = const()[name = tensor("block_8_layer_1_DenseReluDense_wo_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3653130688)))]; + tensor var_1357_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_1355, groups = var_1219, pad = var_1357_pad_0, pad_type = var_1357_pad_type_0, strides = var_1353, weight = block_8_layer_1_DenseReluDense_wo_weight_to_fp16, x = input_53_cast_fp16)[name = tensor("op_1357_cast_fp16")]; + tensor inputs_73_cast_fp16 = add(x = inputs_71_cast_fp16, y = var_1357_cast_fp16)[name = tensor("inputs_73_cast_fp16")]; + tensor var_1362 = const()[name = tensor("op_1362"), val = tensor(true)]; + tensor var_1363 = const()[name = tensor("op_1363"), val = tensor(1)]; + tensor var_1367 = const()[name = tensor("op_1367"), val = tensor(3)]; + tensor var_1365_to_fp16 = const()[name = tensor("op_1365_to_fp16"), val = tensor(-0x1.b18p+15)]; + tensor var_1364_to_fp16 = const()[name = tensor("op_1364_to_fp16"), val = tensor(0x1.b18p+15)]; + tensor inputs_75_cast_fp16 = clip(alpha = var_1365_to_fp16, beta = var_1364_to_fp16, x = inputs_73_cast_fp16)[name = tensor("inputs_75_cast_fp16")]; + tensor inputs_sq_37_cast_fp16 = mul(x = inputs_75_cast_fp16, y = inputs_75_cast_fp16)[name = tensor("inputs_sq_37_cast_fp16")]; + tensor var_1384 = const()[name = tensor("op_1384"), val = tensor([1])]; + tensor variance_37_cast_fp16 = reduce_mean(axes = var_1384, keep_dims = var_1362, x = inputs_sq_37_cast_fp16)[name = tensor("variance_37_cast_fp16")]; + tensor var_1386_to_fp16 = const()[name = tensor("op_1386_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_1387_cast_fp16 = add(x = variance_37_cast_fp16, y = var_1386_to_fp16)[name = tensor("op_1387_cast_fp16")]; + tensor var_1388_epsilon_0_to_fp16 = const()[name = tensor("op_1388_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_1388_cast_fp16 = rsqrt(epsilon = var_1388_epsilon_0_to_fp16, x = var_1387_cast_fp16)[name = tensor("op_1388_cast_fp16")]; + tensor hidden_states_37_cast_fp16 = mul(x = inputs_75_cast_fp16, y = var_1388_cast_fp16)[name = tensor("hidden_states_37_cast_fp16")]; + tensor w_37_to_fp16 = const()[name = tensor("w_37_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3737016832)))]; + tensor obj_39_cast_fp16 = mul(x = w_37_to_fp16, y = hidden_states_37_cast_fp16)[name = tensor("obj_39_cast_fp16")]; + tensor var_1402 = const()[name = tensor("op_1402"), val = tensor([1, 1])]; + tensor var_1404 = const()[name = tensor("op_1404"), val = tensor([1, 1])]; + tensor query_19_pad_type_0 = const()[name = tensor("query_19_pad_type_0"), val = tensor("custom")]; + tensor query_19_pad_0 = const()[name = tensor("query_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_9_layer_0_SelfAttention_q_proj_weight_to_fp16 = const()[name = tensor("block_9_layer_0_SelfAttention_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3737025088)))]; + tensor query_19_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_1404, groups = var_1363, pad = query_19_pad_0, pad_type = query_19_pad_type_0, strides = var_1402, weight = block_9_layer_0_SelfAttention_q_proj_weight_to_fp16, x = obj_39_cast_fp16)[name = tensor("query_19_cast_fp16")]; + tensor var_1408 = const()[name = tensor("op_1408"), val = tensor([1, 1])]; + tensor var_1410 = const()[name = tensor("op_1410"), val = tensor([1, 1])]; + tensor key_19_pad_type_0 = const()[name = tensor("key_19_pad_type_0"), val = tensor("custom")]; + tensor key_19_pad_0 = const()[name = tensor("key_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_9_layer_0_SelfAttention_k_proj_weight_to_fp16 = const()[name = tensor("block_9_layer_0_SelfAttention_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3770579584)))]; + tensor key_19_cast_fp16 = conv(dilations = var_1410, groups = var_1363, pad = key_19_pad_0, pad_type = key_19_pad_type_0, strides = var_1408, weight = block_9_layer_0_SelfAttention_k_proj_weight_to_fp16, x = obj_39_cast_fp16)[name = tensor("key_19_cast_fp16")]; + tensor var_1415 = const()[name = tensor("op_1415"), val = tensor([1, 1])]; + tensor var_1417 = const()[name = tensor("op_1417"), val = tensor([1, 1])]; + tensor value_19_pad_type_0 = const()[name = tensor("value_19_pad_type_0"), val = tensor("custom")]; + tensor value_19_pad_0 = const()[name = tensor("value_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_9_layer_0_SelfAttention_v_proj_weight_to_fp16 = const()[name = tensor("block_9_layer_0_SelfAttention_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3804134080)))]; + tensor value_19_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_1417, groups = var_1363, pad = value_19_pad_0, pad_type = value_19_pad_type_0, strides = var_1415, weight = block_9_layer_0_SelfAttention_v_proj_weight_to_fp16, x = obj_39_cast_fp16)[name = tensor("value_19_cast_fp16")]; + tensor var_1421 = const()[name = tensor("op_1421"), val = tensor([1, 64, 64, -1])]; + tensor var_1422_cast_fp16 = reshape(shape = var_1421, x = query_19_cast_fp16)[name = tensor("op_1422_cast_fp16")]; + tensor var_1423 = const()[name = tensor("op_1423"), val = tensor([1, 64, 64, -1])]; + tensor var_1424_cast_fp16 = reshape(shape = var_1423, x = key_19_cast_fp16)[name = tensor("op_1424_cast_fp16")]; + tensor mh_w_55_transpose_x_0 = const()[name = tensor("mh_w_55_transpose_x_0"), val = tensor(true)]; + tensor mh_w_55_transpose_y_0 = const()[name = tensor("mh_w_55_transpose_y_0"), val = tensor(false)]; + tensor mh_w_55_cast_fp16 = matmul(transpose_x = mh_w_55_transpose_x_0, transpose_y = mh_w_55_transpose_y_0, x = var_1422_cast_fp16, y = var_1424_cast_fp16)[name = tensor("mh_w_55_cast_fp16")]; + tensor mh_w_57_cast_fp16 = add(x = mh_w_55_cast_fp16, y = var_133_cast_fp16)[name = tensor("mh_w_57_cast_fp16")]; + tensor mh_w_59_cast_fp16 = add(x = mh_w_57_cast_fp16, y = relative_attention_bias_to_fp16)[name = tensor("mh_w_59_cast_fp16")]; + tensor var_1433_cast_fp16 = softmax(axis = var_1367, x = mh_w_59_cast_fp16)[name = tensor("op_1433_cast_fp16")]; + tensor var_1434 = const()[name = tensor("op_1434"), val = tensor([1, 64, 64, -1])]; + tensor var_1435_cast_fp16 = reshape(shape = var_1434, x = value_19_cast_fp16)[name = tensor("op_1435_cast_fp16")]; + tensor attn_19_transpose_x_0 = const()[name = tensor("attn_19_transpose_x_0"), val = tensor(false)]; + tensor attn_19_transpose_y_0 = const()[name = tensor("attn_19_transpose_y_0"), val = tensor(true)]; + tensor attn_19_cast_fp16 = matmul(transpose_x = attn_19_transpose_x_0, transpose_y = attn_19_transpose_y_0, x = var_1435_cast_fp16, y = var_1433_cast_fp16)[name = tensor("attn_19_cast_fp16")]; + tensor var_1438 = const()[name = tensor("op_1438"), val = tensor([1, 4096, 1, -1])]; + tensor input_55_cast_fp16 = reshape(shape = var_1438, x = attn_19_cast_fp16)[name = tensor("input_55_cast_fp16")]; + tensor var_1442 = const()[name = tensor("op_1442"), val = tensor([1, 1])]; + tensor var_1444 = const()[name = tensor("op_1444"), val = tensor([1, 1])]; + tensor obj_41_pad_type_0 = const()[name = tensor("obj_41_pad_type_0"), val = tensor("custom")]; + tensor obj_41_pad_0 = const()[name = tensor("obj_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_9_layer_0_SelfAttention_o_proj_weight_to_fp16 = const()[name = tensor("block_9_layer_0_SelfAttention_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3837688576)))]; + tensor obj_41_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_1444, groups = var_1363, pad = obj_41_pad_0, pad_type = obj_41_pad_type_0, strides = var_1442, weight = block_9_layer_0_SelfAttention_o_proj_weight_to_fp16, x = input_55_cast_fp16)[name = tensor("obj_41_cast_fp16")]; + tensor inputs_77_cast_fp16 = add(x = inputs_75_cast_fp16, y = obj_41_cast_fp16)[name = tensor("inputs_77_cast_fp16")]; + tensor inputs_79_cast_fp16 = clip(alpha = var_1365_to_fp16, beta = var_1364_to_fp16, x = inputs_77_cast_fp16)[name = tensor("inputs_79_cast_fp16")]; + tensor inputs_sq_39_cast_fp16 = mul(x = inputs_79_cast_fp16, y = inputs_79_cast_fp16)[name = tensor("inputs_sq_39_cast_fp16")]; + tensor var_1453 = const()[name = tensor("op_1453"), val = tensor([1])]; + tensor variance_39_cast_fp16 = reduce_mean(axes = var_1453, keep_dims = var_1362, x = inputs_sq_39_cast_fp16)[name = tensor("variance_39_cast_fp16")]; + tensor var_1455_to_fp16 = const()[name = tensor("op_1455_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_1456_cast_fp16 = add(x = variance_39_cast_fp16, y = var_1455_to_fp16)[name = tensor("op_1456_cast_fp16")]; + tensor var_1457_epsilon_0_to_fp16 = const()[name = tensor("op_1457_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_1457_cast_fp16 = rsqrt(epsilon = var_1457_epsilon_0_to_fp16, x = var_1456_cast_fp16)[name = tensor("op_1457_cast_fp16")]; + tensor hidden_states_39_cast_fp16 = mul(x = inputs_79_cast_fp16, y = var_1457_cast_fp16)[name = tensor("hidden_states_39_cast_fp16")]; + tensor w_39_to_fp16 = const()[name = tensor("w_39_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3871243072)))]; + tensor input_57_cast_fp16 = mul(x = w_39_to_fp16, y = hidden_states_39_cast_fp16)[name = tensor("input_57_cast_fp16")]; + tensor var_1470 = const()[name = tensor("op_1470"), val = tensor([1, 1])]; + tensor var_1472 = const()[name = tensor("op_1472"), val = tensor([1, 1])]; + tensor x_21_pad_type_0 = const()[name = tensor("x_21_pad_type_0"), val = tensor("custom")]; + tensor x_21_pad_0 = const()[name = tensor("x_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_9_layer_1_DenseReluDense_wi_0_weight_to_fp16 = const()[name = tensor("block_9_layer_1_DenseReluDense_wi_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3871251328)))]; + tensor x_21_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_1472, groups = var_1363, pad = x_21_pad_0, pad_type = x_21_pad_type_0, strides = var_1470, weight = block_9_layer_1_DenseReluDense_wi_0_weight_to_fp16, x = input_57_cast_fp16)[name = tensor("x_21_cast_fp16")]; + tensor var_1486_mode_0 = const()[name = tensor("op_1486_mode_0"), val = tensor("TANH_APPROXIMATION")]; + tensor var_1486_cast_fp16 = gelu(mode = var_1486_mode_0, x = x_21_cast_fp16)[name = tensor("op_1486_cast_fp16")]; + tensor var_1489 = const()[name = tensor("op_1489"), val = tensor([1, 1])]; + tensor var_1491 = const()[name = tensor("op_1491"), val = tensor([1, 1])]; + tensor var_1493_pad_type_0 = const()[name = tensor("op_1493_pad_type_0"), val = tensor("custom")]; + tensor var_1493_pad_0 = const()[name = tensor("op_1493_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_9_layer_1_DenseReluDense_wi_1_weight_to_fp16 = const()[name = tensor("block_9_layer_1_DenseReluDense_wi_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3955137472)))]; + tensor var_1493_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_1491, groups = var_1363, pad = var_1493_pad_0, pad_type = var_1493_pad_type_0, strides = var_1489, weight = block_9_layer_1_DenseReluDense_wi_1_weight_to_fp16, x = input_57_cast_fp16)[name = tensor("op_1493_cast_fp16")]; + tensor input_59_cast_fp16 = mul(x = var_1486_cast_fp16, y = var_1493_cast_fp16)[name = tensor("input_59_cast_fp16")]; + tensor var_1497 = const()[name = tensor("op_1497"), val = tensor([1, 1])]; + tensor var_1499 = const()[name = tensor("op_1499"), val = tensor([1, 1])]; + tensor var_1501_pad_type_0 = const()[name = tensor("op_1501_pad_type_0"), val = tensor("custom")]; + tensor var_1501_pad_0 = const()[name = tensor("op_1501_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_9_layer_1_DenseReluDense_wo_weight_to_fp16 = const()[name = tensor("block_9_layer_1_DenseReluDense_wo_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4039023616)))]; + tensor var_1501_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_1499, groups = var_1363, pad = var_1501_pad_0, pad_type = var_1501_pad_type_0, strides = var_1497, weight = block_9_layer_1_DenseReluDense_wo_weight_to_fp16, x = input_59_cast_fp16)[name = tensor("op_1501_cast_fp16")]; + tensor inputs_81_cast_fp16 = add(x = inputs_79_cast_fp16, y = var_1501_cast_fp16)[name = tensor("inputs_81_cast_fp16")]; + tensor var_1506 = const()[name = tensor("op_1506"), val = tensor(true)]; + tensor var_1507 = const()[name = tensor("op_1507"), val = tensor(1)]; + tensor var_1511 = const()[name = tensor("op_1511"), val = tensor(3)]; + tensor var_1509_to_fp16 = const()[name = tensor("op_1509_to_fp16"), val = tensor(-0x1.b18p+15)]; + tensor var_1508_to_fp16 = const()[name = tensor("op_1508_to_fp16"), val = tensor(0x1.b18p+15)]; + tensor inputs_83_cast_fp16 = clip(alpha = var_1509_to_fp16, beta = var_1508_to_fp16, x = inputs_81_cast_fp16)[name = tensor("inputs_83_cast_fp16")]; + tensor inputs_sq_41_cast_fp16 = mul(x = inputs_83_cast_fp16, y = inputs_83_cast_fp16)[name = tensor("inputs_sq_41_cast_fp16")]; + tensor var_1528 = const()[name = tensor("op_1528"), val = tensor([1])]; + tensor variance_41_cast_fp16 = reduce_mean(axes = var_1528, keep_dims = var_1506, x = inputs_sq_41_cast_fp16)[name = tensor("variance_41_cast_fp16")]; + tensor var_1530_to_fp16 = const()[name = tensor("op_1530_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_1531_cast_fp16 = add(x = variance_41_cast_fp16, y = var_1530_to_fp16)[name = tensor("op_1531_cast_fp16")]; + tensor var_1532_epsilon_0_to_fp16 = const()[name = tensor("op_1532_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_1532_cast_fp16 = rsqrt(epsilon = var_1532_epsilon_0_to_fp16, x = var_1531_cast_fp16)[name = tensor("op_1532_cast_fp16")]; + tensor hidden_states_41_cast_fp16 = mul(x = inputs_83_cast_fp16, y = var_1532_cast_fp16)[name = tensor("hidden_states_41_cast_fp16")]; + tensor w_41_to_fp16 = const()[name = tensor("w_41_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4122909760)))]; + tensor obj_43_cast_fp16 = mul(x = w_41_to_fp16, y = hidden_states_41_cast_fp16)[name = tensor("obj_43_cast_fp16")]; + tensor var_1546 = const()[name = tensor("op_1546"), val = tensor([1, 1])]; + tensor var_1548 = const()[name = tensor("op_1548"), val = tensor([1, 1])]; + tensor query_21_pad_type_0 = const()[name = tensor("query_21_pad_type_0"), val = tensor("custom")]; + tensor query_21_pad_0 = const()[name = tensor("query_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_10_layer_0_SelfAttention_q_proj_weight_to_fp16 = const()[name = tensor("block_10_layer_0_SelfAttention_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4122918016)))]; + tensor query_21_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_1548, groups = var_1507, pad = query_21_pad_0, pad_type = query_21_pad_type_0, strides = var_1546, weight = block_10_layer_0_SelfAttention_q_proj_weight_to_fp16, x = obj_43_cast_fp16)[name = tensor("query_21_cast_fp16")]; + tensor var_1552 = const()[name = tensor("op_1552"), val = tensor([1, 1])]; + tensor var_1554 = const()[name = tensor("op_1554"), val = tensor([1, 1])]; + tensor key_21_pad_type_0 = const()[name = tensor("key_21_pad_type_0"), val = tensor("custom")]; + tensor key_21_pad_0 = const()[name = tensor("key_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_10_layer_0_SelfAttention_k_proj_weight_to_fp16 = const()[name = tensor("block_10_layer_0_SelfAttention_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4156472512)))]; + tensor key_21_cast_fp16 = conv(dilations = var_1554, groups = var_1507, pad = key_21_pad_0, pad_type = key_21_pad_type_0, strides = var_1552, weight = block_10_layer_0_SelfAttention_k_proj_weight_to_fp16, x = obj_43_cast_fp16)[name = tensor("key_21_cast_fp16")]; + tensor var_1559 = const()[name = tensor("op_1559"), val = tensor([1, 1])]; + tensor var_1561 = const()[name = tensor("op_1561"), val = tensor([1, 1])]; + tensor value_21_pad_type_0 = const()[name = tensor("value_21_pad_type_0"), val = tensor("custom")]; + tensor value_21_pad_0 = const()[name = tensor("value_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_10_layer_0_SelfAttention_v_proj_weight_to_fp16 = const()[name = tensor("block_10_layer_0_SelfAttention_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4190027008)))]; + tensor value_21_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_1561, groups = var_1507, pad = value_21_pad_0, pad_type = value_21_pad_type_0, strides = var_1559, weight = block_10_layer_0_SelfAttention_v_proj_weight_to_fp16, x = obj_43_cast_fp16)[name = tensor("value_21_cast_fp16")]; + tensor var_1565 = const()[name = tensor("op_1565"), val = tensor([1, 64, 64, -1])]; + tensor var_1566_cast_fp16 = reshape(shape = var_1565, x = query_21_cast_fp16)[name = tensor("op_1566_cast_fp16")]; + tensor var_1567 = const()[name = tensor("op_1567"), val = tensor([1, 64, 64, -1])]; + tensor var_1568_cast_fp16 = reshape(shape = var_1567, x = key_21_cast_fp16)[name = tensor("op_1568_cast_fp16")]; + tensor mh_w_61_transpose_x_0 = const()[name = tensor("mh_w_61_transpose_x_0"), val = tensor(true)]; + tensor mh_w_61_transpose_y_0 = const()[name = tensor("mh_w_61_transpose_y_0"), val = tensor(false)]; + tensor mh_w_61_cast_fp16 = matmul(transpose_x = mh_w_61_transpose_x_0, transpose_y = mh_w_61_transpose_y_0, x = var_1566_cast_fp16, y = var_1568_cast_fp16)[name = tensor("mh_w_61_cast_fp16")]; + tensor mh_w_63_cast_fp16 = add(x = mh_w_61_cast_fp16, y = var_133_cast_fp16)[name = tensor("mh_w_63_cast_fp16")]; + tensor mh_w_65_cast_fp16 = add(x = mh_w_63_cast_fp16, y = relative_attention_bias_to_fp16)[name = tensor("mh_w_65_cast_fp16")]; + tensor var_1577_cast_fp16 = softmax(axis = var_1511, x = mh_w_65_cast_fp16)[name = tensor("op_1577_cast_fp16")]; + tensor var_1578 = const()[name = tensor("op_1578"), val = tensor([1, 64, 64, -1])]; + tensor var_1579_cast_fp16 = reshape(shape = var_1578, x = value_21_cast_fp16)[name = tensor("op_1579_cast_fp16")]; + tensor attn_21_transpose_x_0 = const()[name = tensor("attn_21_transpose_x_0"), val = tensor(false)]; + tensor attn_21_transpose_y_0 = const()[name = tensor("attn_21_transpose_y_0"), val = tensor(true)]; + tensor attn_21_cast_fp16 = matmul(transpose_x = attn_21_transpose_x_0, transpose_y = attn_21_transpose_y_0, x = var_1579_cast_fp16, y = var_1577_cast_fp16)[name = tensor("attn_21_cast_fp16")]; + tensor var_1582 = const()[name = tensor("op_1582"), val = tensor([1, 4096, 1, -1])]; + tensor input_61_cast_fp16 = reshape(shape = var_1582, x = attn_21_cast_fp16)[name = tensor("input_61_cast_fp16")]; + tensor var_1586 = const()[name = tensor("op_1586"), val = tensor([1, 1])]; + tensor var_1588 = const()[name = tensor("op_1588"), val = tensor([1, 1])]; + tensor obj_45_pad_type_0 = const()[name = tensor("obj_45_pad_type_0"), val = tensor("custom")]; + tensor obj_45_pad_0 = const()[name = tensor("obj_45_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_10_layer_0_SelfAttention_o_proj_weight_to_fp16 = const()[name = tensor("block_10_layer_0_SelfAttention_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4223581504)))]; + tensor obj_45_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_1588, groups = var_1507, pad = obj_45_pad_0, pad_type = obj_45_pad_type_0, strides = var_1586, weight = block_10_layer_0_SelfAttention_o_proj_weight_to_fp16, x = input_61_cast_fp16)[name = tensor("obj_45_cast_fp16")]; + tensor inputs_85_cast_fp16 = add(x = inputs_83_cast_fp16, y = obj_45_cast_fp16)[name = tensor("inputs_85_cast_fp16")]; + tensor inputs_87_cast_fp16 = clip(alpha = var_1509_to_fp16, beta = var_1508_to_fp16, x = inputs_85_cast_fp16)[name = tensor("inputs_87_cast_fp16")]; + tensor inputs_sq_43_cast_fp16 = mul(x = inputs_87_cast_fp16, y = inputs_87_cast_fp16)[name = tensor("inputs_sq_43_cast_fp16")]; + tensor var_1597 = const()[name = tensor("op_1597"), val = tensor([1])]; + tensor variance_43_cast_fp16 = reduce_mean(axes = var_1597, keep_dims = var_1506, x = inputs_sq_43_cast_fp16)[name = tensor("variance_43_cast_fp16")]; + tensor var_1599_to_fp16 = const()[name = tensor("op_1599_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_1600_cast_fp16 = add(x = variance_43_cast_fp16, y = var_1599_to_fp16)[name = tensor("op_1600_cast_fp16")]; + tensor var_1601_epsilon_0_to_fp16 = const()[name = tensor("op_1601_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_1601_cast_fp16 = rsqrt(epsilon = var_1601_epsilon_0_to_fp16, x = var_1600_cast_fp16)[name = tensor("op_1601_cast_fp16")]; + tensor hidden_states_43_cast_fp16 = mul(x = inputs_87_cast_fp16, y = var_1601_cast_fp16)[name = tensor("hidden_states_43_cast_fp16")]; + tensor w_43_to_fp16 = const()[name = tensor("w_43_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4257136000)))]; + tensor input_63_cast_fp16 = mul(x = w_43_to_fp16, y = hidden_states_43_cast_fp16)[name = tensor("input_63_cast_fp16")]; + tensor var_1614 = const()[name = tensor("op_1614"), val = tensor([1, 1])]; + tensor var_1616 = const()[name = tensor("op_1616"), val = tensor([1, 1])]; + tensor x_23_pad_type_0 = const()[name = tensor("x_23_pad_type_0"), val = tensor("custom")]; + tensor x_23_pad_0 = const()[name = tensor("x_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_10_layer_1_DenseReluDense_wi_0_weight_to_fp16 = const()[name = tensor("block_10_layer_1_DenseReluDense_wi_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4257144256)))]; + tensor x_23_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_1616, groups = var_1507, pad = x_23_pad_0, pad_type = x_23_pad_type_0, strides = var_1614, weight = block_10_layer_1_DenseReluDense_wi_0_weight_to_fp16, x = input_63_cast_fp16)[name = tensor("x_23_cast_fp16")]; + tensor var_1630_mode_0 = const()[name = tensor("op_1630_mode_0"), val = tensor("TANH_APPROXIMATION")]; + tensor var_1630_cast_fp16 = gelu(mode = var_1630_mode_0, x = x_23_cast_fp16)[name = tensor("op_1630_cast_fp16")]; + tensor var_1633 = const()[name = tensor("op_1633"), val = tensor([1, 1])]; + tensor var_1635 = const()[name = tensor("op_1635"), val = tensor([1, 1])]; + tensor var_1637_pad_type_0 = const()[name = tensor("op_1637_pad_type_0"), val = tensor("custom")]; + tensor var_1637_pad_0 = const()[name = tensor("op_1637_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_10_layer_1_DenseReluDense_wi_1_weight_to_fp16 = const()[name = tensor("block_10_layer_1_DenseReluDense_wi_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4341030400)))]; + tensor var_1637_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_1635, groups = var_1507, pad = var_1637_pad_0, pad_type = var_1637_pad_type_0, strides = var_1633, weight = block_10_layer_1_DenseReluDense_wi_1_weight_to_fp16, x = input_63_cast_fp16)[name = tensor("op_1637_cast_fp16")]; + tensor input_65_cast_fp16 = mul(x = var_1630_cast_fp16, y = var_1637_cast_fp16)[name = tensor("input_65_cast_fp16")]; + tensor var_1641 = const()[name = tensor("op_1641"), val = tensor([1, 1])]; + tensor var_1643 = const()[name = tensor("op_1643"), val = tensor([1, 1])]; + tensor var_1645_pad_type_0 = const()[name = tensor("op_1645_pad_type_0"), val = tensor("custom")]; + tensor var_1645_pad_0 = const()[name = tensor("op_1645_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_10_layer_1_DenseReluDense_wo_weight_to_fp16 = const()[name = tensor("block_10_layer_1_DenseReluDense_wo_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4424916544)))]; + tensor var_1645_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_1643, groups = var_1507, pad = var_1645_pad_0, pad_type = var_1645_pad_type_0, strides = var_1641, weight = block_10_layer_1_DenseReluDense_wo_weight_to_fp16, x = input_65_cast_fp16)[name = tensor("op_1645_cast_fp16")]; + tensor inputs_89_cast_fp16 = add(x = inputs_87_cast_fp16, y = var_1645_cast_fp16)[name = tensor("inputs_89_cast_fp16")]; + tensor var_1650 = const()[name = tensor("op_1650"), val = tensor(true)]; + tensor var_1651 = const()[name = tensor("op_1651"), val = tensor(1)]; + tensor var_1655 = const()[name = tensor("op_1655"), val = tensor(3)]; + tensor var_1653_to_fp16 = const()[name = tensor("op_1653_to_fp16"), val = tensor(-0x1.b18p+15)]; + tensor var_1652_to_fp16 = const()[name = tensor("op_1652_to_fp16"), val = tensor(0x1.b18p+15)]; + tensor inputs_91_cast_fp16 = clip(alpha = var_1653_to_fp16, beta = var_1652_to_fp16, x = inputs_89_cast_fp16)[name = tensor("inputs_91_cast_fp16")]; + tensor inputs_sq_45_cast_fp16 = mul(x = inputs_91_cast_fp16, y = inputs_91_cast_fp16)[name = tensor("inputs_sq_45_cast_fp16")]; + tensor var_1672 = const()[name = tensor("op_1672"), val = tensor([1])]; + tensor variance_45_cast_fp16 = reduce_mean(axes = var_1672, keep_dims = var_1650, x = inputs_sq_45_cast_fp16)[name = tensor("variance_45_cast_fp16")]; + tensor var_1674_to_fp16 = const()[name = tensor("op_1674_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_1675_cast_fp16 = add(x = variance_45_cast_fp16, y = var_1674_to_fp16)[name = tensor("op_1675_cast_fp16")]; + tensor var_1676_epsilon_0_to_fp16 = const()[name = tensor("op_1676_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_1676_cast_fp16 = rsqrt(epsilon = var_1676_epsilon_0_to_fp16, x = var_1675_cast_fp16)[name = tensor("op_1676_cast_fp16")]; + tensor hidden_states_45_cast_fp16 = mul(x = inputs_91_cast_fp16, y = var_1676_cast_fp16)[name = tensor("hidden_states_45_cast_fp16")]; + tensor w_45_to_fp16 = const()[name = tensor("w_45_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4508802688)))]; + tensor obj_47_cast_fp16 = mul(x = w_45_to_fp16, y = hidden_states_45_cast_fp16)[name = tensor("obj_47_cast_fp16")]; + tensor var_1690 = const()[name = tensor("op_1690"), val = tensor([1, 1])]; + tensor var_1692 = const()[name = tensor("op_1692"), val = tensor([1, 1])]; + tensor query_23_pad_type_0 = const()[name = tensor("query_23_pad_type_0"), val = tensor("custom")]; + tensor query_23_pad_0 = const()[name = tensor("query_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_11_layer_0_SelfAttention_q_proj_weight_to_fp16 = const()[name = tensor("block_11_layer_0_SelfAttention_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4508810944)))]; + tensor query_23_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_1692, groups = var_1651, pad = query_23_pad_0, pad_type = query_23_pad_type_0, strides = var_1690, weight = block_11_layer_0_SelfAttention_q_proj_weight_to_fp16, x = obj_47_cast_fp16)[name = tensor("query_23_cast_fp16")]; + tensor var_1696 = const()[name = tensor("op_1696"), val = tensor([1, 1])]; + tensor var_1698 = const()[name = tensor("op_1698"), val = tensor([1, 1])]; + tensor key_23_pad_type_0 = const()[name = tensor("key_23_pad_type_0"), val = tensor("custom")]; + tensor key_23_pad_0 = const()[name = tensor("key_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_11_layer_0_SelfAttention_k_proj_weight_to_fp16 = const()[name = tensor("block_11_layer_0_SelfAttention_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4542365440)))]; + tensor key_23_cast_fp16 = conv(dilations = var_1698, groups = var_1651, pad = key_23_pad_0, pad_type = key_23_pad_type_0, strides = var_1696, weight = block_11_layer_0_SelfAttention_k_proj_weight_to_fp16, x = obj_47_cast_fp16)[name = tensor("key_23_cast_fp16")]; + tensor var_1703 = const()[name = tensor("op_1703"), val = tensor([1, 1])]; + tensor var_1705 = const()[name = tensor("op_1705"), val = tensor([1, 1])]; + tensor value_23_pad_type_0 = const()[name = tensor("value_23_pad_type_0"), val = tensor("custom")]; + tensor value_23_pad_0 = const()[name = tensor("value_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_11_layer_0_SelfAttention_v_proj_weight_to_fp16 = const()[name = tensor("block_11_layer_0_SelfAttention_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4575919936)))]; + tensor value_23_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_1705, groups = var_1651, pad = value_23_pad_0, pad_type = value_23_pad_type_0, strides = var_1703, weight = block_11_layer_0_SelfAttention_v_proj_weight_to_fp16, x = obj_47_cast_fp16)[name = tensor("value_23_cast_fp16")]; + tensor var_1709 = const()[name = tensor("op_1709"), val = tensor([1, 64, 64, -1])]; + tensor var_1710_cast_fp16 = reshape(shape = var_1709, x = query_23_cast_fp16)[name = tensor("op_1710_cast_fp16")]; + tensor var_1711 = const()[name = tensor("op_1711"), val = tensor([1, 64, 64, -1])]; + tensor var_1712_cast_fp16 = reshape(shape = var_1711, x = key_23_cast_fp16)[name = tensor("op_1712_cast_fp16")]; + tensor mh_w_67_transpose_x_0 = const()[name = tensor("mh_w_67_transpose_x_0"), val = tensor(true)]; + tensor mh_w_67_transpose_y_0 = const()[name = tensor("mh_w_67_transpose_y_0"), val = tensor(false)]; + tensor mh_w_67_cast_fp16 = matmul(transpose_x = mh_w_67_transpose_x_0, transpose_y = mh_w_67_transpose_y_0, x = var_1710_cast_fp16, y = var_1712_cast_fp16)[name = tensor("mh_w_67_cast_fp16")]; + tensor mh_w_69_cast_fp16 = add(x = mh_w_67_cast_fp16, y = var_133_cast_fp16)[name = tensor("mh_w_69_cast_fp16")]; + tensor mh_w_71_cast_fp16 = add(x = mh_w_69_cast_fp16, y = relative_attention_bias_to_fp16)[name = tensor("mh_w_71_cast_fp16")]; + tensor var_1721_cast_fp16 = softmax(axis = var_1655, x = mh_w_71_cast_fp16)[name = tensor("op_1721_cast_fp16")]; + tensor var_1722 = const()[name = tensor("op_1722"), val = tensor([1, 64, 64, -1])]; + tensor var_1723_cast_fp16 = reshape(shape = var_1722, x = value_23_cast_fp16)[name = tensor("op_1723_cast_fp16")]; + tensor attn_23_transpose_x_0 = const()[name = tensor("attn_23_transpose_x_0"), val = tensor(false)]; + tensor attn_23_transpose_y_0 = const()[name = tensor("attn_23_transpose_y_0"), val = tensor(true)]; + tensor attn_23_cast_fp16 = matmul(transpose_x = attn_23_transpose_x_0, transpose_y = attn_23_transpose_y_0, x = var_1723_cast_fp16, y = var_1721_cast_fp16)[name = tensor("attn_23_cast_fp16")]; + tensor var_1726 = const()[name = tensor("op_1726"), val = tensor([1, 4096, 1, -1])]; + tensor input_67_cast_fp16 = reshape(shape = var_1726, x = attn_23_cast_fp16)[name = tensor("input_67_cast_fp16")]; + tensor var_1730 = const()[name = tensor("op_1730"), val = tensor([1, 1])]; + tensor var_1732 = const()[name = tensor("op_1732"), val = tensor([1, 1])]; + tensor obj_49_pad_type_0 = const()[name = tensor("obj_49_pad_type_0"), val = tensor("custom")]; + tensor obj_49_pad_0 = const()[name = tensor("obj_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_11_layer_0_SelfAttention_o_proj_weight_to_fp16 = const()[name = tensor("block_11_layer_0_SelfAttention_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4609474432)))]; + tensor obj_49_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_1732, groups = var_1651, pad = obj_49_pad_0, pad_type = obj_49_pad_type_0, strides = var_1730, weight = block_11_layer_0_SelfAttention_o_proj_weight_to_fp16, x = input_67_cast_fp16)[name = tensor("obj_49_cast_fp16")]; + tensor inputs_93_cast_fp16 = add(x = inputs_91_cast_fp16, y = obj_49_cast_fp16)[name = tensor("inputs_93_cast_fp16")]; + tensor inputs_95_cast_fp16 = clip(alpha = var_1653_to_fp16, beta = var_1652_to_fp16, x = inputs_93_cast_fp16)[name = tensor("inputs_95_cast_fp16")]; + tensor inputs_sq_47_cast_fp16 = mul(x = inputs_95_cast_fp16, y = inputs_95_cast_fp16)[name = tensor("inputs_sq_47_cast_fp16")]; + tensor var_1741 = const()[name = tensor("op_1741"), val = tensor([1])]; + tensor variance_47_cast_fp16 = reduce_mean(axes = var_1741, keep_dims = var_1650, x = inputs_sq_47_cast_fp16)[name = tensor("variance_47_cast_fp16")]; + tensor var_1743_to_fp16 = const()[name = tensor("op_1743_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_1744_cast_fp16 = add(x = variance_47_cast_fp16, y = var_1743_to_fp16)[name = tensor("op_1744_cast_fp16")]; + tensor var_1745_epsilon_0_to_fp16 = const()[name = tensor("op_1745_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_1745_cast_fp16 = rsqrt(epsilon = var_1745_epsilon_0_to_fp16, x = var_1744_cast_fp16)[name = tensor("op_1745_cast_fp16")]; + tensor hidden_states_47_cast_fp16 = mul(x = inputs_95_cast_fp16, y = var_1745_cast_fp16)[name = tensor("hidden_states_47_cast_fp16")]; + tensor w_47_to_fp16 = const()[name = tensor("w_47_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4643028928)))]; + tensor input_69_cast_fp16 = mul(x = w_47_to_fp16, y = hidden_states_47_cast_fp16)[name = tensor("input_69_cast_fp16")]; + tensor var_1758 = const()[name = tensor("op_1758"), val = tensor([1, 1])]; + tensor var_1760 = const()[name = tensor("op_1760"), val = tensor([1, 1])]; + tensor x_25_pad_type_0 = const()[name = tensor("x_25_pad_type_0"), val = tensor("custom")]; + tensor x_25_pad_0 = const()[name = tensor("x_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_11_layer_1_DenseReluDense_wi_0_weight_to_fp16 = const()[name = tensor("block_11_layer_1_DenseReluDense_wi_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4643037184)))]; + tensor x_25_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_1760, groups = var_1651, pad = x_25_pad_0, pad_type = x_25_pad_type_0, strides = var_1758, weight = block_11_layer_1_DenseReluDense_wi_0_weight_to_fp16, x = input_69_cast_fp16)[name = tensor("x_25_cast_fp16")]; + tensor var_1774_mode_0 = const()[name = tensor("op_1774_mode_0"), val = tensor("TANH_APPROXIMATION")]; + tensor var_1774_cast_fp16 = gelu(mode = var_1774_mode_0, x = x_25_cast_fp16)[name = tensor("op_1774_cast_fp16")]; + tensor var_1777 = const()[name = tensor("op_1777"), val = tensor([1, 1])]; + tensor var_1779 = const()[name = tensor("op_1779"), val = tensor([1, 1])]; + tensor var_1781_pad_type_0 = const()[name = tensor("op_1781_pad_type_0"), val = tensor("custom")]; + tensor var_1781_pad_0 = const()[name = tensor("op_1781_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_11_layer_1_DenseReluDense_wi_1_weight_to_fp16 = const()[name = tensor("block_11_layer_1_DenseReluDense_wi_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4726923328)))]; + tensor var_1781_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_1779, groups = var_1651, pad = var_1781_pad_0, pad_type = var_1781_pad_type_0, strides = var_1777, weight = block_11_layer_1_DenseReluDense_wi_1_weight_to_fp16, x = input_69_cast_fp16)[name = tensor("op_1781_cast_fp16")]; + tensor input_71_cast_fp16 = mul(x = var_1774_cast_fp16, y = var_1781_cast_fp16)[name = tensor("input_71_cast_fp16")]; + tensor var_1785 = const()[name = tensor("op_1785"), val = tensor([1, 1])]; + tensor var_1787 = const()[name = tensor("op_1787"), val = tensor([1, 1])]; + tensor var_1789_pad_type_0 = const()[name = tensor("op_1789_pad_type_0"), val = tensor("custom")]; + tensor var_1789_pad_0 = const()[name = tensor("op_1789_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_11_layer_1_DenseReluDense_wo_weight_to_fp16 = const()[name = tensor("block_11_layer_1_DenseReluDense_wo_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4810809472)))]; + tensor var_1789_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_1787, groups = var_1651, pad = var_1789_pad_0, pad_type = var_1789_pad_type_0, strides = var_1785, weight = block_11_layer_1_DenseReluDense_wo_weight_to_fp16, x = input_71_cast_fp16)[name = tensor("op_1789_cast_fp16")]; + tensor inputs_97_cast_fp16 = add(x = inputs_95_cast_fp16, y = var_1789_cast_fp16)[name = tensor("inputs_97_cast_fp16")]; + tensor var_1794 = const()[name = tensor("op_1794"), val = tensor(true)]; + tensor var_1795 = const()[name = tensor("op_1795"), val = tensor(1)]; + tensor var_1799 = const()[name = tensor("op_1799"), val = tensor(3)]; + tensor var_1797_to_fp16 = const()[name = tensor("op_1797_to_fp16"), val = tensor(-0x1.b18p+15)]; + tensor var_1796_to_fp16 = const()[name = tensor("op_1796_to_fp16"), val = tensor(0x1.b18p+15)]; + tensor inputs_99_cast_fp16 = clip(alpha = var_1797_to_fp16, beta = var_1796_to_fp16, x = inputs_97_cast_fp16)[name = tensor("inputs_99_cast_fp16")]; + tensor inputs_sq_49_cast_fp16 = mul(x = inputs_99_cast_fp16, y = inputs_99_cast_fp16)[name = tensor("inputs_sq_49_cast_fp16")]; + tensor var_1816 = const()[name = tensor("op_1816"), val = tensor([1])]; + tensor variance_49_cast_fp16 = reduce_mean(axes = var_1816, keep_dims = var_1794, x = inputs_sq_49_cast_fp16)[name = tensor("variance_49_cast_fp16")]; + tensor var_1818_to_fp16 = const()[name = tensor("op_1818_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_1819_cast_fp16 = add(x = variance_49_cast_fp16, y = var_1818_to_fp16)[name = tensor("op_1819_cast_fp16")]; + tensor var_1820_epsilon_0_to_fp16 = const()[name = tensor("op_1820_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_1820_cast_fp16 = rsqrt(epsilon = var_1820_epsilon_0_to_fp16, x = var_1819_cast_fp16)[name = tensor("op_1820_cast_fp16")]; + tensor hidden_states_49_cast_fp16 = mul(x = inputs_99_cast_fp16, y = var_1820_cast_fp16)[name = tensor("hidden_states_49_cast_fp16")]; + tensor w_49_to_fp16 = const()[name = tensor("w_49_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4894695616)))]; + tensor obj_51_cast_fp16 = mul(x = w_49_to_fp16, y = hidden_states_49_cast_fp16)[name = tensor("obj_51_cast_fp16")]; + tensor var_1834 = const()[name = tensor("op_1834"), val = tensor([1, 1])]; + tensor var_1836 = const()[name = tensor("op_1836"), val = tensor([1, 1])]; + tensor query_25_pad_type_0 = const()[name = tensor("query_25_pad_type_0"), val = tensor("custom")]; + tensor query_25_pad_0 = const()[name = tensor("query_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_12_layer_0_SelfAttention_q_proj_weight_to_fp16 = const()[name = tensor("block_12_layer_0_SelfAttention_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4894703872)))]; + tensor query_25_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_1836, groups = var_1795, pad = query_25_pad_0, pad_type = query_25_pad_type_0, strides = var_1834, weight = block_12_layer_0_SelfAttention_q_proj_weight_to_fp16, x = obj_51_cast_fp16)[name = tensor("query_25_cast_fp16")]; + tensor var_1840 = const()[name = tensor("op_1840"), val = tensor([1, 1])]; + tensor var_1842 = const()[name = tensor("op_1842"), val = tensor([1, 1])]; + tensor key_25_pad_type_0 = const()[name = tensor("key_25_pad_type_0"), val = tensor("custom")]; + tensor key_25_pad_0 = const()[name = tensor("key_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_12_layer_0_SelfAttention_k_proj_weight_to_fp16 = const()[name = tensor("block_12_layer_0_SelfAttention_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4928258368)))]; + tensor key_25_cast_fp16 = conv(dilations = var_1842, groups = var_1795, pad = key_25_pad_0, pad_type = key_25_pad_type_0, strides = var_1840, weight = block_12_layer_0_SelfAttention_k_proj_weight_to_fp16, x = obj_51_cast_fp16)[name = tensor("key_25_cast_fp16")]; + tensor var_1847 = const()[name = tensor("op_1847"), val = tensor([1, 1])]; + tensor var_1849 = const()[name = tensor("op_1849"), val = tensor([1, 1])]; + tensor value_25_pad_type_0 = const()[name = tensor("value_25_pad_type_0"), val = tensor("custom")]; + tensor value_25_pad_0 = const()[name = tensor("value_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_12_layer_0_SelfAttention_v_proj_weight_to_fp16 = const()[name = tensor("block_12_layer_0_SelfAttention_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4961812864)))]; + tensor value_25_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_1849, groups = var_1795, pad = value_25_pad_0, pad_type = value_25_pad_type_0, strides = var_1847, weight = block_12_layer_0_SelfAttention_v_proj_weight_to_fp16, x = obj_51_cast_fp16)[name = tensor("value_25_cast_fp16")]; + tensor var_1853 = const()[name = tensor("op_1853"), val = tensor([1, 64, 64, -1])]; + tensor var_1854_cast_fp16 = reshape(shape = var_1853, x = query_25_cast_fp16)[name = tensor("op_1854_cast_fp16")]; + tensor var_1855 = const()[name = tensor("op_1855"), val = tensor([1, 64, 64, -1])]; + tensor var_1856_cast_fp16 = reshape(shape = var_1855, x = key_25_cast_fp16)[name = tensor("op_1856_cast_fp16")]; + tensor mh_w_73_transpose_x_0 = const()[name = tensor("mh_w_73_transpose_x_0"), val = tensor(true)]; + tensor mh_w_73_transpose_y_0 = const()[name = tensor("mh_w_73_transpose_y_0"), val = tensor(false)]; + tensor mh_w_73_cast_fp16 = matmul(transpose_x = mh_w_73_transpose_x_0, transpose_y = mh_w_73_transpose_y_0, x = var_1854_cast_fp16, y = var_1856_cast_fp16)[name = tensor("mh_w_73_cast_fp16")]; + tensor mh_w_75_cast_fp16 = add(x = mh_w_73_cast_fp16, y = var_133_cast_fp16)[name = tensor("mh_w_75_cast_fp16")]; + tensor mh_w_77_cast_fp16 = add(x = mh_w_75_cast_fp16, y = relative_attention_bias_to_fp16)[name = tensor("mh_w_77_cast_fp16")]; + tensor var_1865_cast_fp16 = softmax(axis = var_1799, x = mh_w_77_cast_fp16)[name = tensor("op_1865_cast_fp16")]; + tensor var_1866 = const()[name = tensor("op_1866"), val = tensor([1, 64, 64, -1])]; + tensor var_1867_cast_fp16 = reshape(shape = var_1866, x = value_25_cast_fp16)[name = tensor("op_1867_cast_fp16")]; + tensor attn_25_transpose_x_0 = const()[name = tensor("attn_25_transpose_x_0"), val = tensor(false)]; + tensor attn_25_transpose_y_0 = const()[name = tensor("attn_25_transpose_y_0"), val = tensor(true)]; + tensor attn_25_cast_fp16 = matmul(transpose_x = attn_25_transpose_x_0, transpose_y = attn_25_transpose_y_0, x = var_1867_cast_fp16, y = var_1865_cast_fp16)[name = tensor("attn_25_cast_fp16")]; + tensor var_1870 = const()[name = tensor("op_1870"), val = tensor([1, 4096, 1, -1])]; + tensor input_73_cast_fp16 = reshape(shape = var_1870, x = attn_25_cast_fp16)[name = tensor("input_73_cast_fp16")]; + tensor var_1874 = const()[name = tensor("op_1874"), val = tensor([1, 1])]; + tensor var_1876 = const()[name = tensor("op_1876"), val = tensor([1, 1])]; + tensor obj_53_pad_type_0 = const()[name = tensor("obj_53_pad_type_0"), val = tensor("custom")]; + tensor obj_53_pad_0 = const()[name = tensor("obj_53_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_12_layer_0_SelfAttention_o_proj_weight_to_fp16 = const()[name = tensor("block_12_layer_0_SelfAttention_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4995367360)))]; + tensor obj_53_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_1876, groups = var_1795, pad = obj_53_pad_0, pad_type = obj_53_pad_type_0, strides = var_1874, weight = block_12_layer_0_SelfAttention_o_proj_weight_to_fp16, x = input_73_cast_fp16)[name = tensor("obj_53_cast_fp16")]; + tensor inputs_101_cast_fp16 = add(x = inputs_99_cast_fp16, y = obj_53_cast_fp16)[name = tensor("inputs_101_cast_fp16")]; + tensor inputs_103_cast_fp16 = clip(alpha = var_1797_to_fp16, beta = var_1796_to_fp16, x = inputs_101_cast_fp16)[name = tensor("inputs_103_cast_fp16")]; + tensor inputs_sq_51_cast_fp16 = mul(x = inputs_103_cast_fp16, y = inputs_103_cast_fp16)[name = tensor("inputs_sq_51_cast_fp16")]; + tensor var_1885 = const()[name = tensor("op_1885"), val = tensor([1])]; + tensor variance_51_cast_fp16 = reduce_mean(axes = var_1885, keep_dims = var_1794, x = inputs_sq_51_cast_fp16)[name = tensor("variance_51_cast_fp16")]; + tensor var_1887_to_fp16 = const()[name = tensor("op_1887_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_1888_cast_fp16 = add(x = variance_51_cast_fp16, y = var_1887_to_fp16)[name = tensor("op_1888_cast_fp16")]; + tensor var_1889_epsilon_0_to_fp16 = const()[name = tensor("op_1889_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_1889_cast_fp16 = rsqrt(epsilon = var_1889_epsilon_0_to_fp16, x = var_1888_cast_fp16)[name = tensor("op_1889_cast_fp16")]; + tensor hidden_states_51_cast_fp16 = mul(x = inputs_103_cast_fp16, y = var_1889_cast_fp16)[name = tensor("hidden_states_51_cast_fp16")]; + tensor w_51_to_fp16 = const()[name = tensor("w_51_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5028921856)))]; + tensor input_75_cast_fp16 = mul(x = w_51_to_fp16, y = hidden_states_51_cast_fp16)[name = tensor("input_75_cast_fp16")]; + tensor var_1902 = const()[name = tensor("op_1902"), val = tensor([1, 1])]; + tensor var_1904 = const()[name = tensor("op_1904"), val = tensor([1, 1])]; + tensor x_27_pad_type_0 = const()[name = tensor("x_27_pad_type_0"), val = tensor("custom")]; + tensor x_27_pad_0 = const()[name = tensor("x_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_12_layer_1_DenseReluDense_wi_0_weight_to_fp16 = const()[name = tensor("block_12_layer_1_DenseReluDense_wi_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5028930112)))]; + tensor x_27_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_1904, groups = var_1795, pad = x_27_pad_0, pad_type = x_27_pad_type_0, strides = var_1902, weight = block_12_layer_1_DenseReluDense_wi_0_weight_to_fp16, x = input_75_cast_fp16)[name = tensor("x_27_cast_fp16")]; + tensor var_1918_mode_0 = const()[name = tensor("op_1918_mode_0"), val = tensor("TANH_APPROXIMATION")]; + tensor var_1918_cast_fp16 = gelu(mode = var_1918_mode_0, x = x_27_cast_fp16)[name = tensor("op_1918_cast_fp16")]; + tensor var_1921 = const()[name = tensor("op_1921"), val = tensor([1, 1])]; + tensor var_1923 = const()[name = tensor("op_1923"), val = tensor([1, 1])]; + tensor var_1925_pad_type_0 = const()[name = tensor("op_1925_pad_type_0"), val = tensor("custom")]; + tensor var_1925_pad_0 = const()[name = tensor("op_1925_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_12_layer_1_DenseReluDense_wi_1_weight_to_fp16 = const()[name = tensor("block_12_layer_1_DenseReluDense_wi_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5112816256)))]; + tensor var_1925_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_1923, groups = var_1795, pad = var_1925_pad_0, pad_type = var_1925_pad_type_0, strides = var_1921, weight = block_12_layer_1_DenseReluDense_wi_1_weight_to_fp16, x = input_75_cast_fp16)[name = tensor("op_1925_cast_fp16")]; + tensor input_77_cast_fp16 = mul(x = var_1918_cast_fp16, y = var_1925_cast_fp16)[name = tensor("input_77_cast_fp16")]; + tensor var_1929 = const()[name = tensor("op_1929"), val = tensor([1, 1])]; + tensor var_1931 = const()[name = tensor("op_1931"), val = tensor([1, 1])]; + tensor var_1933_pad_type_0 = const()[name = tensor("op_1933_pad_type_0"), val = tensor("custom")]; + tensor var_1933_pad_0 = const()[name = tensor("op_1933_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_12_layer_1_DenseReluDense_wo_weight_to_fp16 = const()[name = tensor("block_12_layer_1_DenseReluDense_wo_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5196702400)))]; + tensor var_1933_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_1931, groups = var_1795, pad = var_1933_pad_0, pad_type = var_1933_pad_type_0, strides = var_1929, weight = block_12_layer_1_DenseReluDense_wo_weight_to_fp16, x = input_77_cast_fp16)[name = tensor("op_1933_cast_fp16")]; + tensor inputs_105_cast_fp16 = add(x = inputs_103_cast_fp16, y = var_1933_cast_fp16)[name = tensor("inputs_105_cast_fp16")]; + tensor var_1938 = const()[name = tensor("op_1938"), val = tensor(true)]; + tensor var_1939 = const()[name = tensor("op_1939"), val = tensor(1)]; + tensor var_1943 = const()[name = tensor("op_1943"), val = tensor(3)]; + tensor var_1941_to_fp16 = const()[name = tensor("op_1941_to_fp16"), val = tensor(-0x1.b18p+15)]; + tensor var_1940_to_fp16 = const()[name = tensor("op_1940_to_fp16"), val = tensor(0x1.b18p+15)]; + tensor inputs_107_cast_fp16 = clip(alpha = var_1941_to_fp16, beta = var_1940_to_fp16, x = inputs_105_cast_fp16)[name = tensor("inputs_107_cast_fp16")]; + tensor inputs_sq_53_cast_fp16 = mul(x = inputs_107_cast_fp16, y = inputs_107_cast_fp16)[name = tensor("inputs_sq_53_cast_fp16")]; + tensor var_1960 = const()[name = tensor("op_1960"), val = tensor([1])]; + tensor variance_53_cast_fp16 = reduce_mean(axes = var_1960, keep_dims = var_1938, x = inputs_sq_53_cast_fp16)[name = tensor("variance_53_cast_fp16")]; + tensor var_1962_to_fp16 = const()[name = tensor("op_1962_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_1963_cast_fp16 = add(x = variance_53_cast_fp16, y = var_1962_to_fp16)[name = tensor("op_1963_cast_fp16")]; + tensor var_1964_epsilon_0_to_fp16 = const()[name = tensor("op_1964_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_1964_cast_fp16 = rsqrt(epsilon = var_1964_epsilon_0_to_fp16, x = var_1963_cast_fp16)[name = tensor("op_1964_cast_fp16")]; + tensor hidden_states_53_cast_fp16 = mul(x = inputs_107_cast_fp16, y = var_1964_cast_fp16)[name = tensor("hidden_states_53_cast_fp16")]; + tensor w_53_to_fp16 = const()[name = tensor("w_53_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5280588544)))]; + tensor obj_55_cast_fp16 = mul(x = w_53_to_fp16, y = hidden_states_53_cast_fp16)[name = tensor("obj_55_cast_fp16")]; + tensor var_1978 = const()[name = tensor("op_1978"), val = tensor([1, 1])]; + tensor var_1980 = const()[name = tensor("op_1980"), val = tensor([1, 1])]; + tensor query_27_pad_type_0 = const()[name = tensor("query_27_pad_type_0"), val = tensor("custom")]; + tensor query_27_pad_0 = const()[name = tensor("query_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_13_layer_0_SelfAttention_q_proj_weight_to_fp16 = const()[name = tensor("block_13_layer_0_SelfAttention_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5280596800)))]; + tensor query_27_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_1980, groups = var_1939, pad = query_27_pad_0, pad_type = query_27_pad_type_0, strides = var_1978, weight = block_13_layer_0_SelfAttention_q_proj_weight_to_fp16, x = obj_55_cast_fp16)[name = tensor("query_27_cast_fp16")]; + tensor var_1984 = const()[name = tensor("op_1984"), val = tensor([1, 1])]; + tensor var_1986 = const()[name = tensor("op_1986"), val = tensor([1, 1])]; + tensor key_27_pad_type_0 = const()[name = tensor("key_27_pad_type_0"), val = tensor("custom")]; + tensor key_27_pad_0 = const()[name = tensor("key_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_13_layer_0_SelfAttention_k_proj_weight_to_fp16 = const()[name = tensor("block_13_layer_0_SelfAttention_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5314151296)))]; + tensor key_27_cast_fp16 = conv(dilations = var_1986, groups = var_1939, pad = key_27_pad_0, pad_type = key_27_pad_type_0, strides = var_1984, weight = block_13_layer_0_SelfAttention_k_proj_weight_to_fp16, x = obj_55_cast_fp16)[name = tensor("key_27_cast_fp16")]; + tensor var_1991 = const()[name = tensor("op_1991"), val = tensor([1, 1])]; + tensor var_1993 = const()[name = tensor("op_1993"), val = tensor([1, 1])]; + tensor value_27_pad_type_0 = const()[name = tensor("value_27_pad_type_0"), val = tensor("custom")]; + tensor value_27_pad_0 = const()[name = tensor("value_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_13_layer_0_SelfAttention_v_proj_weight_to_fp16 = const()[name = tensor("block_13_layer_0_SelfAttention_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5347705792)))]; + tensor value_27_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_1993, groups = var_1939, pad = value_27_pad_0, pad_type = value_27_pad_type_0, strides = var_1991, weight = block_13_layer_0_SelfAttention_v_proj_weight_to_fp16, x = obj_55_cast_fp16)[name = tensor("value_27_cast_fp16")]; + tensor var_1997 = const()[name = tensor("op_1997"), val = tensor([1, 64, 64, -1])]; + tensor var_1998_cast_fp16 = reshape(shape = var_1997, x = query_27_cast_fp16)[name = tensor("op_1998_cast_fp16")]; + tensor var_1999 = const()[name = tensor("op_1999"), val = tensor([1, 64, 64, -1])]; + tensor var_2000_cast_fp16 = reshape(shape = var_1999, x = key_27_cast_fp16)[name = tensor("op_2000_cast_fp16")]; + tensor mh_w_79_transpose_x_0 = const()[name = tensor("mh_w_79_transpose_x_0"), val = tensor(true)]; + tensor mh_w_79_transpose_y_0 = const()[name = tensor("mh_w_79_transpose_y_0"), val = tensor(false)]; + tensor mh_w_79_cast_fp16 = matmul(transpose_x = mh_w_79_transpose_x_0, transpose_y = mh_w_79_transpose_y_0, x = var_1998_cast_fp16, y = var_2000_cast_fp16)[name = tensor("mh_w_79_cast_fp16")]; + tensor mh_w_81_cast_fp16 = add(x = mh_w_79_cast_fp16, y = var_133_cast_fp16)[name = tensor("mh_w_81_cast_fp16")]; + tensor mh_w_83_cast_fp16 = add(x = mh_w_81_cast_fp16, y = relative_attention_bias_to_fp16)[name = tensor("mh_w_83_cast_fp16")]; + tensor var_2009_cast_fp16 = softmax(axis = var_1943, x = mh_w_83_cast_fp16)[name = tensor("op_2009_cast_fp16")]; + tensor var_2010 = const()[name = tensor("op_2010"), val = tensor([1, 64, 64, -1])]; + tensor var_2011_cast_fp16 = reshape(shape = var_2010, x = value_27_cast_fp16)[name = tensor("op_2011_cast_fp16")]; + tensor attn_27_transpose_x_0 = const()[name = tensor("attn_27_transpose_x_0"), val = tensor(false)]; + tensor attn_27_transpose_y_0 = const()[name = tensor("attn_27_transpose_y_0"), val = tensor(true)]; + tensor attn_27_cast_fp16 = matmul(transpose_x = attn_27_transpose_x_0, transpose_y = attn_27_transpose_y_0, x = var_2011_cast_fp16, y = var_2009_cast_fp16)[name = tensor("attn_27_cast_fp16")]; + tensor var_2014 = const()[name = tensor("op_2014"), val = tensor([1, 4096, 1, -1])]; + tensor input_79_cast_fp16 = reshape(shape = var_2014, x = attn_27_cast_fp16)[name = tensor("input_79_cast_fp16")]; + tensor var_2018 = const()[name = tensor("op_2018"), val = tensor([1, 1])]; + tensor var_2020 = const()[name = tensor("op_2020"), val = tensor([1, 1])]; + tensor obj_57_pad_type_0 = const()[name = tensor("obj_57_pad_type_0"), val = tensor("custom")]; + tensor obj_57_pad_0 = const()[name = tensor("obj_57_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_13_layer_0_SelfAttention_o_proj_weight_to_fp16 = const()[name = tensor("block_13_layer_0_SelfAttention_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5381260288)))]; + tensor obj_57_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_2020, groups = var_1939, pad = obj_57_pad_0, pad_type = obj_57_pad_type_0, strides = var_2018, weight = block_13_layer_0_SelfAttention_o_proj_weight_to_fp16, x = input_79_cast_fp16)[name = tensor("obj_57_cast_fp16")]; + tensor inputs_109_cast_fp16 = add(x = inputs_107_cast_fp16, y = obj_57_cast_fp16)[name = tensor("inputs_109_cast_fp16")]; + tensor inputs_111_cast_fp16 = clip(alpha = var_1941_to_fp16, beta = var_1940_to_fp16, x = inputs_109_cast_fp16)[name = tensor("inputs_111_cast_fp16")]; + tensor inputs_sq_55_cast_fp16 = mul(x = inputs_111_cast_fp16, y = inputs_111_cast_fp16)[name = tensor("inputs_sq_55_cast_fp16")]; + tensor var_2029 = const()[name = tensor("op_2029"), val = tensor([1])]; + tensor variance_55_cast_fp16 = reduce_mean(axes = var_2029, keep_dims = var_1938, x = inputs_sq_55_cast_fp16)[name = tensor("variance_55_cast_fp16")]; + tensor var_2031_to_fp16 = const()[name = tensor("op_2031_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_2032_cast_fp16 = add(x = variance_55_cast_fp16, y = var_2031_to_fp16)[name = tensor("op_2032_cast_fp16")]; + tensor var_2033_epsilon_0_to_fp16 = const()[name = tensor("op_2033_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_2033_cast_fp16 = rsqrt(epsilon = var_2033_epsilon_0_to_fp16, x = var_2032_cast_fp16)[name = tensor("op_2033_cast_fp16")]; + tensor hidden_states_55_cast_fp16 = mul(x = inputs_111_cast_fp16, y = var_2033_cast_fp16)[name = tensor("hidden_states_55_cast_fp16")]; + tensor w_55_to_fp16 = const()[name = tensor("w_55_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5414814784)))]; + tensor input_81_cast_fp16 = mul(x = w_55_to_fp16, y = hidden_states_55_cast_fp16)[name = tensor("input_81_cast_fp16")]; + tensor var_2046 = const()[name = tensor("op_2046"), val = tensor([1, 1])]; + tensor var_2048 = const()[name = tensor("op_2048"), val = tensor([1, 1])]; + tensor x_29_pad_type_0 = const()[name = tensor("x_29_pad_type_0"), val = tensor("custom")]; + tensor x_29_pad_0 = const()[name = tensor("x_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_13_layer_1_DenseReluDense_wi_0_weight_to_fp16 = const()[name = tensor("block_13_layer_1_DenseReluDense_wi_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5414823040)))]; + tensor x_29_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_2048, groups = var_1939, pad = x_29_pad_0, pad_type = x_29_pad_type_0, strides = var_2046, weight = block_13_layer_1_DenseReluDense_wi_0_weight_to_fp16, x = input_81_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor var_2062_mode_0 = const()[name = tensor("op_2062_mode_0"), val = tensor("TANH_APPROXIMATION")]; + tensor var_2062_cast_fp16 = gelu(mode = var_2062_mode_0, x = x_29_cast_fp16)[name = tensor("op_2062_cast_fp16")]; + tensor var_2065 = const()[name = tensor("op_2065"), val = tensor([1, 1])]; + tensor var_2067 = const()[name = tensor("op_2067"), val = tensor([1, 1])]; + tensor var_2069_pad_type_0 = const()[name = tensor("op_2069_pad_type_0"), val = tensor("custom")]; + tensor var_2069_pad_0 = const()[name = tensor("op_2069_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_13_layer_1_DenseReluDense_wi_1_weight_to_fp16 = const()[name = tensor("block_13_layer_1_DenseReluDense_wi_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5498709184)))]; + tensor var_2069_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_2067, groups = var_1939, pad = var_2069_pad_0, pad_type = var_2069_pad_type_0, strides = var_2065, weight = block_13_layer_1_DenseReluDense_wi_1_weight_to_fp16, x = input_81_cast_fp16)[name = tensor("op_2069_cast_fp16")]; + tensor input_83_cast_fp16 = mul(x = var_2062_cast_fp16, y = var_2069_cast_fp16)[name = tensor("input_83_cast_fp16")]; + tensor var_2073 = const()[name = tensor("op_2073"), val = tensor([1, 1])]; + tensor var_2075 = const()[name = tensor("op_2075"), val = tensor([1, 1])]; + tensor var_2077_pad_type_0 = const()[name = tensor("op_2077_pad_type_0"), val = tensor("custom")]; + tensor var_2077_pad_0 = const()[name = tensor("op_2077_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_13_layer_1_DenseReluDense_wo_weight_to_fp16 = const()[name = tensor("block_13_layer_1_DenseReluDense_wo_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5582595328)))]; + tensor var_2077_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_2075, groups = var_1939, pad = var_2077_pad_0, pad_type = var_2077_pad_type_0, strides = var_2073, weight = block_13_layer_1_DenseReluDense_wo_weight_to_fp16, x = input_83_cast_fp16)[name = tensor("op_2077_cast_fp16")]; + tensor inputs_113_cast_fp16 = add(x = inputs_111_cast_fp16, y = var_2077_cast_fp16)[name = tensor("inputs_113_cast_fp16")]; + tensor var_2082 = const()[name = tensor("op_2082"), val = tensor(true)]; + tensor var_2083 = const()[name = tensor("op_2083"), val = tensor(1)]; + tensor var_2087 = const()[name = tensor("op_2087"), val = tensor(3)]; + tensor var_2085_to_fp16 = const()[name = tensor("op_2085_to_fp16"), val = tensor(-0x1.b18p+15)]; + tensor var_2084_to_fp16 = const()[name = tensor("op_2084_to_fp16"), val = tensor(0x1.b18p+15)]; + tensor inputs_115_cast_fp16 = clip(alpha = var_2085_to_fp16, beta = var_2084_to_fp16, x = inputs_113_cast_fp16)[name = tensor("inputs_115_cast_fp16")]; + tensor inputs_sq_57_cast_fp16 = mul(x = inputs_115_cast_fp16, y = inputs_115_cast_fp16)[name = tensor("inputs_sq_57_cast_fp16")]; + tensor var_2104 = const()[name = tensor("op_2104"), val = tensor([1])]; + tensor variance_57_cast_fp16 = reduce_mean(axes = var_2104, keep_dims = var_2082, x = inputs_sq_57_cast_fp16)[name = tensor("variance_57_cast_fp16")]; + tensor var_2106_to_fp16 = const()[name = tensor("op_2106_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_2107_cast_fp16 = add(x = variance_57_cast_fp16, y = var_2106_to_fp16)[name = tensor("op_2107_cast_fp16")]; + tensor var_2108_epsilon_0_to_fp16 = const()[name = tensor("op_2108_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_2108_cast_fp16 = rsqrt(epsilon = var_2108_epsilon_0_to_fp16, x = var_2107_cast_fp16)[name = tensor("op_2108_cast_fp16")]; + tensor hidden_states_57_cast_fp16 = mul(x = inputs_115_cast_fp16, y = var_2108_cast_fp16)[name = tensor("hidden_states_57_cast_fp16")]; + tensor w_57_to_fp16 = const()[name = tensor("w_57_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5666481472)))]; + tensor obj_59_cast_fp16 = mul(x = w_57_to_fp16, y = hidden_states_57_cast_fp16)[name = tensor("obj_59_cast_fp16")]; + tensor var_2122 = const()[name = tensor("op_2122"), val = tensor([1, 1])]; + tensor var_2124 = const()[name = tensor("op_2124"), val = tensor([1, 1])]; + tensor query_29_pad_type_0 = const()[name = tensor("query_29_pad_type_0"), val = tensor("custom")]; + tensor query_29_pad_0 = const()[name = tensor("query_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_14_layer_0_SelfAttention_q_proj_weight_to_fp16 = const()[name = tensor("block_14_layer_0_SelfAttention_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5666489728)))]; + tensor query_29_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_2124, groups = var_2083, pad = query_29_pad_0, pad_type = query_29_pad_type_0, strides = var_2122, weight = block_14_layer_0_SelfAttention_q_proj_weight_to_fp16, x = obj_59_cast_fp16)[name = tensor("query_29_cast_fp16")]; + tensor var_2128 = const()[name = tensor("op_2128"), val = tensor([1, 1])]; + tensor var_2130 = const()[name = tensor("op_2130"), val = tensor([1, 1])]; + tensor key_29_pad_type_0 = const()[name = tensor("key_29_pad_type_0"), val = tensor("custom")]; + tensor key_29_pad_0 = const()[name = tensor("key_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_14_layer_0_SelfAttention_k_proj_weight_to_fp16 = const()[name = tensor("block_14_layer_0_SelfAttention_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5700044224)))]; + tensor key_29_cast_fp16 = conv(dilations = var_2130, groups = var_2083, pad = key_29_pad_0, pad_type = key_29_pad_type_0, strides = var_2128, weight = block_14_layer_0_SelfAttention_k_proj_weight_to_fp16, x = obj_59_cast_fp16)[name = tensor("key_29_cast_fp16")]; + tensor var_2135 = const()[name = tensor("op_2135"), val = tensor([1, 1])]; + tensor var_2137 = const()[name = tensor("op_2137"), val = tensor([1, 1])]; + tensor value_29_pad_type_0 = const()[name = tensor("value_29_pad_type_0"), val = tensor("custom")]; + tensor value_29_pad_0 = const()[name = tensor("value_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_14_layer_0_SelfAttention_v_proj_weight_to_fp16 = const()[name = tensor("block_14_layer_0_SelfAttention_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5733598720)))]; + tensor value_29_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_2137, groups = var_2083, pad = value_29_pad_0, pad_type = value_29_pad_type_0, strides = var_2135, weight = block_14_layer_0_SelfAttention_v_proj_weight_to_fp16, x = obj_59_cast_fp16)[name = tensor("value_29_cast_fp16")]; + tensor var_2141 = const()[name = tensor("op_2141"), val = tensor([1, 64, 64, -1])]; + tensor var_2142_cast_fp16 = reshape(shape = var_2141, x = query_29_cast_fp16)[name = tensor("op_2142_cast_fp16")]; + tensor var_2143 = const()[name = tensor("op_2143"), val = tensor([1, 64, 64, -1])]; + tensor var_2144_cast_fp16 = reshape(shape = var_2143, x = key_29_cast_fp16)[name = tensor("op_2144_cast_fp16")]; + tensor mh_w_85_transpose_x_0 = const()[name = tensor("mh_w_85_transpose_x_0"), val = tensor(true)]; + tensor mh_w_85_transpose_y_0 = const()[name = tensor("mh_w_85_transpose_y_0"), val = tensor(false)]; + tensor mh_w_85_cast_fp16 = matmul(transpose_x = mh_w_85_transpose_x_0, transpose_y = mh_w_85_transpose_y_0, x = var_2142_cast_fp16, y = var_2144_cast_fp16)[name = tensor("mh_w_85_cast_fp16")]; + tensor mh_w_87_cast_fp16 = add(x = mh_w_85_cast_fp16, y = var_133_cast_fp16)[name = tensor("mh_w_87_cast_fp16")]; + tensor mh_w_89_cast_fp16 = add(x = mh_w_87_cast_fp16, y = relative_attention_bias_to_fp16)[name = tensor("mh_w_89_cast_fp16")]; + tensor var_2153_cast_fp16 = softmax(axis = var_2087, x = mh_w_89_cast_fp16)[name = tensor("op_2153_cast_fp16")]; + tensor var_2154 = const()[name = tensor("op_2154"), val = tensor([1, 64, 64, -1])]; + tensor var_2155_cast_fp16 = reshape(shape = var_2154, x = value_29_cast_fp16)[name = tensor("op_2155_cast_fp16")]; + tensor attn_29_transpose_x_0 = const()[name = tensor("attn_29_transpose_x_0"), val = tensor(false)]; + tensor attn_29_transpose_y_0 = const()[name = tensor("attn_29_transpose_y_0"), val = tensor(true)]; + tensor attn_29_cast_fp16 = matmul(transpose_x = attn_29_transpose_x_0, transpose_y = attn_29_transpose_y_0, x = var_2155_cast_fp16, y = var_2153_cast_fp16)[name = tensor("attn_29_cast_fp16")]; + tensor var_2158 = const()[name = tensor("op_2158"), val = tensor([1, 4096, 1, -1])]; + tensor input_85_cast_fp16 = reshape(shape = var_2158, x = attn_29_cast_fp16)[name = tensor("input_85_cast_fp16")]; + tensor var_2162 = const()[name = tensor("op_2162"), val = tensor([1, 1])]; + tensor var_2164 = const()[name = tensor("op_2164"), val = tensor([1, 1])]; + tensor obj_61_pad_type_0 = const()[name = tensor("obj_61_pad_type_0"), val = tensor("custom")]; + tensor obj_61_pad_0 = const()[name = tensor("obj_61_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_14_layer_0_SelfAttention_o_proj_weight_to_fp16 = const()[name = tensor("block_14_layer_0_SelfAttention_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5767153216)))]; + tensor obj_61_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_2164, groups = var_2083, pad = obj_61_pad_0, pad_type = obj_61_pad_type_0, strides = var_2162, weight = block_14_layer_0_SelfAttention_o_proj_weight_to_fp16, x = input_85_cast_fp16)[name = tensor("obj_61_cast_fp16")]; + tensor inputs_117_cast_fp16 = add(x = inputs_115_cast_fp16, y = obj_61_cast_fp16)[name = tensor("inputs_117_cast_fp16")]; + tensor inputs_119_cast_fp16 = clip(alpha = var_2085_to_fp16, beta = var_2084_to_fp16, x = inputs_117_cast_fp16)[name = tensor("inputs_119_cast_fp16")]; + tensor inputs_sq_59_cast_fp16 = mul(x = inputs_119_cast_fp16, y = inputs_119_cast_fp16)[name = tensor("inputs_sq_59_cast_fp16")]; + tensor var_2173 = const()[name = tensor("op_2173"), val = tensor([1])]; + tensor variance_59_cast_fp16 = reduce_mean(axes = var_2173, keep_dims = var_2082, x = inputs_sq_59_cast_fp16)[name = tensor("variance_59_cast_fp16")]; + tensor var_2175_to_fp16 = const()[name = tensor("op_2175_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_2176_cast_fp16 = add(x = variance_59_cast_fp16, y = var_2175_to_fp16)[name = tensor("op_2176_cast_fp16")]; + tensor var_2177_epsilon_0_to_fp16 = const()[name = tensor("op_2177_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_2177_cast_fp16 = rsqrt(epsilon = var_2177_epsilon_0_to_fp16, x = var_2176_cast_fp16)[name = tensor("op_2177_cast_fp16")]; + tensor hidden_states_59_cast_fp16 = mul(x = inputs_119_cast_fp16, y = var_2177_cast_fp16)[name = tensor("hidden_states_59_cast_fp16")]; + tensor w_59_to_fp16 = const()[name = tensor("w_59_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5800707712)))]; + tensor input_87_cast_fp16 = mul(x = w_59_to_fp16, y = hidden_states_59_cast_fp16)[name = tensor("input_87_cast_fp16")]; + tensor var_2190 = const()[name = tensor("op_2190"), val = tensor([1, 1])]; + tensor var_2192 = const()[name = tensor("op_2192"), val = tensor([1, 1])]; + tensor x_31_pad_type_0 = const()[name = tensor("x_31_pad_type_0"), val = tensor("custom")]; + tensor x_31_pad_0 = const()[name = tensor("x_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_14_layer_1_DenseReluDense_wi_0_weight_to_fp16 = const()[name = tensor("block_14_layer_1_DenseReluDense_wi_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5800715968)))]; + tensor x_31_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_2192, groups = var_2083, pad = x_31_pad_0, pad_type = x_31_pad_type_0, strides = var_2190, weight = block_14_layer_1_DenseReluDense_wi_0_weight_to_fp16, x = input_87_cast_fp16)[name = tensor("x_31_cast_fp16")]; + tensor var_2206_mode_0 = const()[name = tensor("op_2206_mode_0"), val = tensor("TANH_APPROXIMATION")]; + tensor var_2206_cast_fp16 = gelu(mode = var_2206_mode_0, x = x_31_cast_fp16)[name = tensor("op_2206_cast_fp16")]; + tensor var_2209 = const()[name = tensor("op_2209"), val = tensor([1, 1])]; + tensor var_2211 = const()[name = tensor("op_2211"), val = tensor([1, 1])]; + tensor var_2213_pad_type_0 = const()[name = tensor("op_2213_pad_type_0"), val = tensor("custom")]; + tensor var_2213_pad_0 = const()[name = tensor("op_2213_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_14_layer_1_DenseReluDense_wi_1_weight_to_fp16 = const()[name = tensor("block_14_layer_1_DenseReluDense_wi_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5884602112)))]; + tensor var_2213_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_2211, groups = var_2083, pad = var_2213_pad_0, pad_type = var_2213_pad_type_0, strides = var_2209, weight = block_14_layer_1_DenseReluDense_wi_1_weight_to_fp16, x = input_87_cast_fp16)[name = tensor("op_2213_cast_fp16")]; + tensor input_89_cast_fp16 = mul(x = var_2206_cast_fp16, y = var_2213_cast_fp16)[name = tensor("input_89_cast_fp16")]; + tensor var_2217 = const()[name = tensor("op_2217"), val = tensor([1, 1])]; + tensor var_2219 = const()[name = tensor("op_2219"), val = tensor([1, 1])]; + tensor var_2221_pad_type_0 = const()[name = tensor("op_2221_pad_type_0"), val = tensor("custom")]; + tensor var_2221_pad_0 = const()[name = tensor("op_2221_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_14_layer_1_DenseReluDense_wo_weight_to_fp16 = const()[name = tensor("block_14_layer_1_DenseReluDense_wo_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5968488256)))]; + tensor var_2221_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_2219, groups = var_2083, pad = var_2221_pad_0, pad_type = var_2221_pad_type_0, strides = var_2217, weight = block_14_layer_1_DenseReluDense_wo_weight_to_fp16, x = input_89_cast_fp16)[name = tensor("op_2221_cast_fp16")]; + tensor inputs_121_cast_fp16 = add(x = inputs_119_cast_fp16, y = var_2221_cast_fp16)[name = tensor("inputs_121_cast_fp16")]; + tensor var_2226 = const()[name = tensor("op_2226"), val = tensor(true)]; + tensor var_2227 = const()[name = tensor("op_2227"), val = tensor(1)]; + tensor var_2231 = const()[name = tensor("op_2231"), val = tensor(3)]; + tensor var_2229_to_fp16 = const()[name = tensor("op_2229_to_fp16"), val = tensor(-0x1.b18p+15)]; + tensor var_2228_to_fp16 = const()[name = tensor("op_2228_to_fp16"), val = tensor(0x1.b18p+15)]; + tensor inputs_123_cast_fp16 = clip(alpha = var_2229_to_fp16, beta = var_2228_to_fp16, x = inputs_121_cast_fp16)[name = tensor("inputs_123_cast_fp16")]; + tensor inputs_sq_61_cast_fp16 = mul(x = inputs_123_cast_fp16, y = inputs_123_cast_fp16)[name = tensor("inputs_sq_61_cast_fp16")]; + tensor var_2248 = const()[name = tensor("op_2248"), val = tensor([1])]; + tensor variance_61_cast_fp16 = reduce_mean(axes = var_2248, keep_dims = var_2226, x = inputs_sq_61_cast_fp16)[name = tensor("variance_61_cast_fp16")]; + tensor var_2250_to_fp16 = const()[name = tensor("op_2250_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_2251_cast_fp16 = add(x = variance_61_cast_fp16, y = var_2250_to_fp16)[name = tensor("op_2251_cast_fp16")]; + tensor var_2252_epsilon_0_to_fp16 = const()[name = tensor("op_2252_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_2252_cast_fp16 = rsqrt(epsilon = var_2252_epsilon_0_to_fp16, x = var_2251_cast_fp16)[name = tensor("op_2252_cast_fp16")]; + tensor hidden_states_61_cast_fp16 = mul(x = inputs_123_cast_fp16, y = var_2252_cast_fp16)[name = tensor("hidden_states_61_cast_fp16")]; + tensor w_61_to_fp16 = const()[name = tensor("w_61_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6052374400)))]; + tensor obj_63_cast_fp16 = mul(x = w_61_to_fp16, y = hidden_states_61_cast_fp16)[name = tensor("obj_63_cast_fp16")]; + tensor var_2266 = const()[name = tensor("op_2266"), val = tensor([1, 1])]; + tensor var_2268 = const()[name = tensor("op_2268"), val = tensor([1, 1])]; + tensor query_31_pad_type_0 = const()[name = tensor("query_31_pad_type_0"), val = tensor("custom")]; + tensor query_31_pad_0 = const()[name = tensor("query_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_15_layer_0_SelfAttention_q_proj_weight_to_fp16 = const()[name = tensor("block_15_layer_0_SelfAttention_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6052382656)))]; + tensor query_31_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_2268, groups = var_2227, pad = query_31_pad_0, pad_type = query_31_pad_type_0, strides = var_2266, weight = block_15_layer_0_SelfAttention_q_proj_weight_to_fp16, x = obj_63_cast_fp16)[name = tensor("query_31_cast_fp16")]; + tensor var_2272 = const()[name = tensor("op_2272"), val = tensor([1, 1])]; + tensor var_2274 = const()[name = tensor("op_2274"), val = tensor([1, 1])]; + tensor key_31_pad_type_0 = const()[name = tensor("key_31_pad_type_0"), val = tensor("custom")]; + tensor key_31_pad_0 = const()[name = tensor("key_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_15_layer_0_SelfAttention_k_proj_weight_to_fp16 = const()[name = tensor("block_15_layer_0_SelfAttention_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6085937152)))]; + tensor key_31_cast_fp16 = conv(dilations = var_2274, groups = var_2227, pad = key_31_pad_0, pad_type = key_31_pad_type_0, strides = var_2272, weight = block_15_layer_0_SelfAttention_k_proj_weight_to_fp16, x = obj_63_cast_fp16)[name = tensor("key_31_cast_fp16")]; + tensor var_2279 = const()[name = tensor("op_2279"), val = tensor([1, 1])]; + tensor var_2281 = const()[name = tensor("op_2281"), val = tensor([1, 1])]; + tensor value_31_pad_type_0 = const()[name = tensor("value_31_pad_type_0"), val = tensor("custom")]; + tensor value_31_pad_0 = const()[name = tensor("value_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_15_layer_0_SelfAttention_v_proj_weight_to_fp16 = const()[name = tensor("block_15_layer_0_SelfAttention_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6119491648)))]; + tensor value_31_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_2281, groups = var_2227, pad = value_31_pad_0, pad_type = value_31_pad_type_0, strides = var_2279, weight = block_15_layer_0_SelfAttention_v_proj_weight_to_fp16, x = obj_63_cast_fp16)[name = tensor("value_31_cast_fp16")]; + tensor var_2285 = const()[name = tensor("op_2285"), val = tensor([1, 64, 64, -1])]; + tensor var_2286_cast_fp16 = reshape(shape = var_2285, x = query_31_cast_fp16)[name = tensor("op_2286_cast_fp16")]; + tensor var_2287 = const()[name = tensor("op_2287"), val = tensor([1, 64, 64, -1])]; + tensor var_2288_cast_fp16 = reshape(shape = var_2287, x = key_31_cast_fp16)[name = tensor("op_2288_cast_fp16")]; + tensor mh_w_91_transpose_x_0 = const()[name = tensor("mh_w_91_transpose_x_0"), val = tensor(true)]; + tensor mh_w_91_transpose_y_0 = const()[name = tensor("mh_w_91_transpose_y_0"), val = tensor(false)]; + tensor mh_w_91_cast_fp16 = matmul(transpose_x = mh_w_91_transpose_x_0, transpose_y = mh_w_91_transpose_y_0, x = var_2286_cast_fp16, y = var_2288_cast_fp16)[name = tensor("mh_w_91_cast_fp16")]; + tensor mh_w_93_cast_fp16 = add(x = mh_w_91_cast_fp16, y = var_133_cast_fp16)[name = tensor("mh_w_93_cast_fp16")]; + tensor mh_w_95_cast_fp16 = add(x = mh_w_93_cast_fp16, y = relative_attention_bias_to_fp16)[name = tensor("mh_w_95_cast_fp16")]; + tensor var_2297_cast_fp16 = softmax(axis = var_2231, x = mh_w_95_cast_fp16)[name = tensor("op_2297_cast_fp16")]; + tensor var_2298 = const()[name = tensor("op_2298"), val = tensor([1, 64, 64, -1])]; + tensor var_2299_cast_fp16 = reshape(shape = var_2298, x = value_31_cast_fp16)[name = tensor("op_2299_cast_fp16")]; + tensor attn_31_transpose_x_0 = const()[name = tensor("attn_31_transpose_x_0"), val = tensor(false)]; + tensor attn_31_transpose_y_0 = const()[name = tensor("attn_31_transpose_y_0"), val = tensor(true)]; + tensor attn_31_cast_fp16 = matmul(transpose_x = attn_31_transpose_x_0, transpose_y = attn_31_transpose_y_0, x = var_2299_cast_fp16, y = var_2297_cast_fp16)[name = tensor("attn_31_cast_fp16")]; + tensor var_2302 = const()[name = tensor("op_2302"), val = tensor([1, 4096, 1, -1])]; + tensor input_91_cast_fp16 = reshape(shape = var_2302, x = attn_31_cast_fp16)[name = tensor("input_91_cast_fp16")]; + tensor var_2306 = const()[name = tensor("op_2306"), val = tensor([1, 1])]; + tensor var_2308 = const()[name = tensor("op_2308"), val = tensor([1, 1])]; + tensor obj_65_pad_type_0 = const()[name = tensor("obj_65_pad_type_0"), val = tensor("custom")]; + tensor obj_65_pad_0 = const()[name = tensor("obj_65_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_15_layer_0_SelfAttention_o_proj_weight_to_fp16 = const()[name = tensor("block_15_layer_0_SelfAttention_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6153046144)))]; + tensor obj_65_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_2308, groups = var_2227, pad = obj_65_pad_0, pad_type = obj_65_pad_type_0, strides = var_2306, weight = block_15_layer_0_SelfAttention_o_proj_weight_to_fp16, x = input_91_cast_fp16)[name = tensor("obj_65_cast_fp16")]; + tensor inputs_125_cast_fp16 = add(x = inputs_123_cast_fp16, y = obj_65_cast_fp16)[name = tensor("inputs_125_cast_fp16")]; + tensor inputs_127_cast_fp16 = clip(alpha = var_2229_to_fp16, beta = var_2228_to_fp16, x = inputs_125_cast_fp16)[name = tensor("inputs_127_cast_fp16")]; + tensor inputs_sq_63_cast_fp16 = mul(x = inputs_127_cast_fp16, y = inputs_127_cast_fp16)[name = tensor("inputs_sq_63_cast_fp16")]; + tensor var_2317 = const()[name = tensor("op_2317"), val = tensor([1])]; + tensor variance_63_cast_fp16 = reduce_mean(axes = var_2317, keep_dims = var_2226, x = inputs_sq_63_cast_fp16)[name = tensor("variance_63_cast_fp16")]; + tensor var_2319_to_fp16 = const()[name = tensor("op_2319_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_2320_cast_fp16 = add(x = variance_63_cast_fp16, y = var_2319_to_fp16)[name = tensor("op_2320_cast_fp16")]; + tensor var_2321_epsilon_0_to_fp16 = const()[name = tensor("op_2321_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_2321_cast_fp16 = rsqrt(epsilon = var_2321_epsilon_0_to_fp16, x = var_2320_cast_fp16)[name = tensor("op_2321_cast_fp16")]; + tensor hidden_states_63_cast_fp16 = mul(x = inputs_127_cast_fp16, y = var_2321_cast_fp16)[name = tensor("hidden_states_63_cast_fp16")]; + tensor w_63_to_fp16 = const()[name = tensor("w_63_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6186600640)))]; + tensor input_93_cast_fp16 = mul(x = w_63_to_fp16, y = hidden_states_63_cast_fp16)[name = tensor("input_93_cast_fp16")]; + tensor var_2334 = const()[name = tensor("op_2334"), val = tensor([1, 1])]; + tensor var_2336 = const()[name = tensor("op_2336"), val = tensor([1, 1])]; + tensor x_33_pad_type_0 = const()[name = tensor("x_33_pad_type_0"), val = tensor("custom")]; + tensor x_33_pad_0 = const()[name = tensor("x_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_15_layer_1_DenseReluDense_wi_0_weight_to_fp16 = const()[name = tensor("block_15_layer_1_DenseReluDense_wi_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6186608896)))]; + tensor x_33_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_2336, groups = var_2227, pad = x_33_pad_0, pad_type = x_33_pad_type_0, strides = var_2334, weight = block_15_layer_1_DenseReluDense_wi_0_weight_to_fp16, x = input_93_cast_fp16)[name = tensor("x_33_cast_fp16")]; + tensor var_2350_mode_0 = const()[name = tensor("op_2350_mode_0"), val = tensor("TANH_APPROXIMATION")]; + tensor var_2350_cast_fp16 = gelu(mode = var_2350_mode_0, x = x_33_cast_fp16)[name = tensor("op_2350_cast_fp16")]; + tensor var_2353 = const()[name = tensor("op_2353"), val = tensor([1, 1])]; + tensor var_2355 = const()[name = tensor("op_2355"), val = tensor([1, 1])]; + tensor var_2357_pad_type_0 = const()[name = tensor("op_2357_pad_type_0"), val = tensor("custom")]; + tensor var_2357_pad_0 = const()[name = tensor("op_2357_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_15_layer_1_DenseReluDense_wi_1_weight_to_fp16 = const()[name = tensor("block_15_layer_1_DenseReluDense_wi_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6270495040)))]; + tensor var_2357_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_2355, groups = var_2227, pad = var_2357_pad_0, pad_type = var_2357_pad_type_0, strides = var_2353, weight = block_15_layer_1_DenseReluDense_wi_1_weight_to_fp16, x = input_93_cast_fp16)[name = tensor("op_2357_cast_fp16")]; + tensor input_95_cast_fp16 = mul(x = var_2350_cast_fp16, y = var_2357_cast_fp16)[name = tensor("input_95_cast_fp16")]; + tensor var_2361 = const()[name = tensor("op_2361"), val = tensor([1, 1])]; + tensor var_2363 = const()[name = tensor("op_2363"), val = tensor([1, 1])]; + tensor var_2365_pad_type_0 = const()[name = tensor("op_2365_pad_type_0"), val = tensor("custom")]; + tensor var_2365_pad_0 = const()[name = tensor("op_2365_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_15_layer_1_DenseReluDense_wo_weight_to_fp16 = const()[name = tensor("block_15_layer_1_DenseReluDense_wo_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6354381184)))]; + tensor var_2365_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_2363, groups = var_2227, pad = var_2365_pad_0, pad_type = var_2365_pad_type_0, strides = var_2361, weight = block_15_layer_1_DenseReluDense_wo_weight_to_fp16, x = input_95_cast_fp16)[name = tensor("op_2365_cast_fp16")]; + tensor inputs_129_cast_fp16 = add(x = inputs_127_cast_fp16, y = var_2365_cast_fp16)[name = tensor("inputs_129_cast_fp16")]; + tensor var_2370 = const()[name = tensor("op_2370"), val = tensor(true)]; + tensor var_2371 = const()[name = tensor("op_2371"), val = tensor(1)]; + tensor var_2375 = const()[name = tensor("op_2375"), val = tensor(3)]; + tensor var_2373_to_fp16 = const()[name = tensor("op_2373_to_fp16"), val = tensor(-0x1.b18p+15)]; + tensor var_2372_to_fp16 = const()[name = tensor("op_2372_to_fp16"), val = tensor(0x1.b18p+15)]; + tensor inputs_131_cast_fp16 = clip(alpha = var_2373_to_fp16, beta = var_2372_to_fp16, x = inputs_129_cast_fp16)[name = tensor("inputs_131_cast_fp16")]; + tensor inputs_sq_65_cast_fp16 = mul(x = inputs_131_cast_fp16, y = inputs_131_cast_fp16)[name = tensor("inputs_sq_65_cast_fp16")]; + tensor var_2392 = const()[name = tensor("op_2392"), val = tensor([1])]; + tensor variance_65_cast_fp16 = reduce_mean(axes = var_2392, keep_dims = var_2370, x = inputs_sq_65_cast_fp16)[name = tensor("variance_65_cast_fp16")]; + tensor var_2394_to_fp16 = const()[name = tensor("op_2394_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_2395_cast_fp16 = add(x = variance_65_cast_fp16, y = var_2394_to_fp16)[name = tensor("op_2395_cast_fp16")]; + tensor var_2396_epsilon_0_to_fp16 = const()[name = tensor("op_2396_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_2396_cast_fp16 = rsqrt(epsilon = var_2396_epsilon_0_to_fp16, x = var_2395_cast_fp16)[name = tensor("op_2396_cast_fp16")]; + tensor hidden_states_65_cast_fp16 = mul(x = inputs_131_cast_fp16, y = var_2396_cast_fp16)[name = tensor("hidden_states_65_cast_fp16")]; + tensor w_65_to_fp16 = const()[name = tensor("w_65_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6438267328)))]; + tensor obj_67_cast_fp16 = mul(x = w_65_to_fp16, y = hidden_states_65_cast_fp16)[name = tensor("obj_67_cast_fp16")]; + tensor var_2410 = const()[name = tensor("op_2410"), val = tensor([1, 1])]; + tensor var_2412 = const()[name = tensor("op_2412"), val = tensor([1, 1])]; + tensor query_33_pad_type_0 = const()[name = tensor("query_33_pad_type_0"), val = tensor("custom")]; + tensor query_33_pad_0 = const()[name = tensor("query_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_16_layer_0_SelfAttention_q_proj_weight_to_fp16 = const()[name = tensor("block_16_layer_0_SelfAttention_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6438275584)))]; + tensor query_33_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_2412, groups = var_2371, pad = query_33_pad_0, pad_type = query_33_pad_type_0, strides = var_2410, weight = block_16_layer_0_SelfAttention_q_proj_weight_to_fp16, x = obj_67_cast_fp16)[name = tensor("query_33_cast_fp16")]; + tensor var_2416 = const()[name = tensor("op_2416"), val = tensor([1, 1])]; + tensor var_2418 = const()[name = tensor("op_2418"), val = tensor([1, 1])]; + tensor key_33_pad_type_0 = const()[name = tensor("key_33_pad_type_0"), val = tensor("custom")]; + tensor key_33_pad_0 = const()[name = tensor("key_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_16_layer_0_SelfAttention_k_proj_weight_to_fp16 = const()[name = tensor("block_16_layer_0_SelfAttention_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6471830080)))]; + tensor key_33_cast_fp16 = conv(dilations = var_2418, groups = var_2371, pad = key_33_pad_0, pad_type = key_33_pad_type_0, strides = var_2416, weight = block_16_layer_0_SelfAttention_k_proj_weight_to_fp16, x = obj_67_cast_fp16)[name = tensor("key_33_cast_fp16")]; + tensor var_2423 = const()[name = tensor("op_2423"), val = tensor([1, 1])]; + tensor var_2425 = const()[name = tensor("op_2425"), val = tensor([1, 1])]; + tensor value_33_pad_type_0 = const()[name = tensor("value_33_pad_type_0"), val = tensor("custom")]; + tensor value_33_pad_0 = const()[name = tensor("value_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_16_layer_0_SelfAttention_v_proj_weight_to_fp16 = const()[name = tensor("block_16_layer_0_SelfAttention_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6505384576)))]; + tensor value_33_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_2425, groups = var_2371, pad = value_33_pad_0, pad_type = value_33_pad_type_0, strides = var_2423, weight = block_16_layer_0_SelfAttention_v_proj_weight_to_fp16, x = obj_67_cast_fp16)[name = tensor("value_33_cast_fp16")]; + tensor var_2429 = const()[name = tensor("op_2429"), val = tensor([1, 64, 64, -1])]; + tensor var_2430_cast_fp16 = reshape(shape = var_2429, x = query_33_cast_fp16)[name = tensor("op_2430_cast_fp16")]; + tensor var_2431 = const()[name = tensor("op_2431"), val = tensor([1, 64, 64, -1])]; + tensor var_2432_cast_fp16 = reshape(shape = var_2431, x = key_33_cast_fp16)[name = tensor("op_2432_cast_fp16")]; + tensor mh_w_97_transpose_x_0 = const()[name = tensor("mh_w_97_transpose_x_0"), val = tensor(true)]; + tensor mh_w_97_transpose_y_0 = const()[name = tensor("mh_w_97_transpose_y_0"), val = tensor(false)]; + tensor mh_w_97_cast_fp16 = matmul(transpose_x = mh_w_97_transpose_x_0, transpose_y = mh_w_97_transpose_y_0, x = var_2430_cast_fp16, y = var_2432_cast_fp16)[name = tensor("mh_w_97_cast_fp16")]; + tensor mh_w_99_cast_fp16 = add(x = mh_w_97_cast_fp16, y = var_133_cast_fp16)[name = tensor("mh_w_99_cast_fp16")]; + tensor mh_w_101_cast_fp16 = add(x = mh_w_99_cast_fp16, y = relative_attention_bias_to_fp16)[name = tensor("mh_w_101_cast_fp16")]; + tensor var_2441_cast_fp16 = softmax(axis = var_2375, x = mh_w_101_cast_fp16)[name = tensor("op_2441_cast_fp16")]; + tensor var_2442 = const()[name = tensor("op_2442"), val = tensor([1, 64, 64, -1])]; + tensor var_2443_cast_fp16 = reshape(shape = var_2442, x = value_33_cast_fp16)[name = tensor("op_2443_cast_fp16")]; + tensor attn_33_transpose_x_0 = const()[name = tensor("attn_33_transpose_x_0"), val = tensor(false)]; + tensor attn_33_transpose_y_0 = const()[name = tensor("attn_33_transpose_y_0"), val = tensor(true)]; + tensor attn_33_cast_fp16 = matmul(transpose_x = attn_33_transpose_x_0, transpose_y = attn_33_transpose_y_0, x = var_2443_cast_fp16, y = var_2441_cast_fp16)[name = tensor("attn_33_cast_fp16")]; + tensor var_2446 = const()[name = tensor("op_2446"), val = tensor([1, 4096, 1, -1])]; + tensor input_97_cast_fp16 = reshape(shape = var_2446, x = attn_33_cast_fp16)[name = tensor("input_97_cast_fp16")]; + tensor var_2450 = const()[name = tensor("op_2450"), val = tensor([1, 1])]; + tensor var_2452 = const()[name = tensor("op_2452"), val = tensor([1, 1])]; + tensor obj_69_pad_type_0 = const()[name = tensor("obj_69_pad_type_0"), val = tensor("custom")]; + tensor obj_69_pad_0 = const()[name = tensor("obj_69_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_16_layer_0_SelfAttention_o_proj_weight_to_fp16 = const()[name = tensor("block_16_layer_0_SelfAttention_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6538939072)))]; + tensor obj_69_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_2452, groups = var_2371, pad = obj_69_pad_0, pad_type = obj_69_pad_type_0, strides = var_2450, weight = block_16_layer_0_SelfAttention_o_proj_weight_to_fp16, x = input_97_cast_fp16)[name = tensor("obj_69_cast_fp16")]; + tensor inputs_133_cast_fp16 = add(x = inputs_131_cast_fp16, y = obj_69_cast_fp16)[name = tensor("inputs_133_cast_fp16")]; + tensor inputs_135_cast_fp16 = clip(alpha = var_2373_to_fp16, beta = var_2372_to_fp16, x = inputs_133_cast_fp16)[name = tensor("inputs_135_cast_fp16")]; + tensor inputs_sq_67_cast_fp16 = mul(x = inputs_135_cast_fp16, y = inputs_135_cast_fp16)[name = tensor("inputs_sq_67_cast_fp16")]; + tensor var_2461 = const()[name = tensor("op_2461"), val = tensor([1])]; + tensor variance_67_cast_fp16 = reduce_mean(axes = var_2461, keep_dims = var_2370, x = inputs_sq_67_cast_fp16)[name = tensor("variance_67_cast_fp16")]; + tensor var_2463_to_fp16 = const()[name = tensor("op_2463_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_2464_cast_fp16 = add(x = variance_67_cast_fp16, y = var_2463_to_fp16)[name = tensor("op_2464_cast_fp16")]; + tensor var_2465_epsilon_0_to_fp16 = const()[name = tensor("op_2465_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_2465_cast_fp16 = rsqrt(epsilon = var_2465_epsilon_0_to_fp16, x = var_2464_cast_fp16)[name = tensor("op_2465_cast_fp16")]; + tensor hidden_states_67_cast_fp16 = mul(x = inputs_135_cast_fp16, y = var_2465_cast_fp16)[name = tensor("hidden_states_67_cast_fp16")]; + tensor w_67_to_fp16 = const()[name = tensor("w_67_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6572493568)))]; + tensor input_99_cast_fp16 = mul(x = w_67_to_fp16, y = hidden_states_67_cast_fp16)[name = tensor("input_99_cast_fp16")]; + tensor var_2478 = const()[name = tensor("op_2478"), val = tensor([1, 1])]; + tensor var_2480 = const()[name = tensor("op_2480"), val = tensor([1, 1])]; + tensor x_35_pad_type_0 = const()[name = tensor("x_35_pad_type_0"), val = tensor("custom")]; + tensor x_35_pad_0 = const()[name = tensor("x_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_16_layer_1_DenseReluDense_wi_0_weight_to_fp16 = const()[name = tensor("block_16_layer_1_DenseReluDense_wi_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6572501824)))]; + tensor x_35_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_2480, groups = var_2371, pad = x_35_pad_0, pad_type = x_35_pad_type_0, strides = var_2478, weight = block_16_layer_1_DenseReluDense_wi_0_weight_to_fp16, x = input_99_cast_fp16)[name = tensor("x_35_cast_fp16")]; + tensor var_2494_mode_0 = const()[name = tensor("op_2494_mode_0"), val = tensor("TANH_APPROXIMATION")]; + tensor var_2494_cast_fp16 = gelu(mode = var_2494_mode_0, x = x_35_cast_fp16)[name = tensor("op_2494_cast_fp16")]; + tensor var_2497 = const()[name = tensor("op_2497"), val = tensor([1, 1])]; + tensor var_2499 = const()[name = tensor("op_2499"), val = tensor([1, 1])]; + tensor var_2501_pad_type_0 = const()[name = tensor("op_2501_pad_type_0"), val = tensor("custom")]; + tensor var_2501_pad_0 = const()[name = tensor("op_2501_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_16_layer_1_DenseReluDense_wi_1_weight_to_fp16 = const()[name = tensor("block_16_layer_1_DenseReluDense_wi_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6656387968)))]; + tensor var_2501_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_2499, groups = var_2371, pad = var_2501_pad_0, pad_type = var_2501_pad_type_0, strides = var_2497, weight = block_16_layer_1_DenseReluDense_wi_1_weight_to_fp16, x = input_99_cast_fp16)[name = tensor("op_2501_cast_fp16")]; + tensor input_101_cast_fp16 = mul(x = var_2494_cast_fp16, y = var_2501_cast_fp16)[name = tensor("input_101_cast_fp16")]; + tensor var_2505 = const()[name = tensor("op_2505"), val = tensor([1, 1])]; + tensor var_2507 = const()[name = tensor("op_2507"), val = tensor([1, 1])]; + tensor var_2509_pad_type_0 = const()[name = tensor("op_2509_pad_type_0"), val = tensor("custom")]; + tensor var_2509_pad_0 = const()[name = tensor("op_2509_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_16_layer_1_DenseReluDense_wo_weight_to_fp16 = const()[name = tensor("block_16_layer_1_DenseReluDense_wo_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6740274112)))]; + tensor var_2509_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_2507, groups = var_2371, pad = var_2509_pad_0, pad_type = var_2509_pad_type_0, strides = var_2505, weight = block_16_layer_1_DenseReluDense_wo_weight_to_fp16, x = input_101_cast_fp16)[name = tensor("op_2509_cast_fp16")]; + tensor inputs_137_cast_fp16 = add(x = inputs_135_cast_fp16, y = var_2509_cast_fp16)[name = tensor("inputs_137_cast_fp16")]; + tensor var_2514 = const()[name = tensor("op_2514"), val = tensor(true)]; + tensor var_2515 = const()[name = tensor("op_2515"), val = tensor(1)]; + tensor var_2519 = const()[name = tensor("op_2519"), val = tensor(3)]; + tensor var_2517_to_fp16 = const()[name = tensor("op_2517_to_fp16"), val = tensor(-0x1.b18p+15)]; + tensor var_2516_to_fp16 = const()[name = tensor("op_2516_to_fp16"), val = tensor(0x1.b18p+15)]; + tensor inputs_139_cast_fp16 = clip(alpha = var_2517_to_fp16, beta = var_2516_to_fp16, x = inputs_137_cast_fp16)[name = tensor("inputs_139_cast_fp16")]; + tensor inputs_sq_69_cast_fp16 = mul(x = inputs_139_cast_fp16, y = inputs_139_cast_fp16)[name = tensor("inputs_sq_69_cast_fp16")]; + tensor var_2536 = const()[name = tensor("op_2536"), val = tensor([1])]; + tensor variance_69_cast_fp16 = reduce_mean(axes = var_2536, keep_dims = var_2514, x = inputs_sq_69_cast_fp16)[name = tensor("variance_69_cast_fp16")]; + tensor var_2538_to_fp16 = const()[name = tensor("op_2538_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_2539_cast_fp16 = add(x = variance_69_cast_fp16, y = var_2538_to_fp16)[name = tensor("op_2539_cast_fp16")]; + tensor var_2540_epsilon_0_to_fp16 = const()[name = tensor("op_2540_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_2540_cast_fp16 = rsqrt(epsilon = var_2540_epsilon_0_to_fp16, x = var_2539_cast_fp16)[name = tensor("op_2540_cast_fp16")]; + tensor hidden_states_69_cast_fp16 = mul(x = inputs_139_cast_fp16, y = var_2540_cast_fp16)[name = tensor("hidden_states_69_cast_fp16")]; + tensor w_69_to_fp16 = const()[name = tensor("w_69_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6824160256)))]; + tensor obj_71_cast_fp16 = mul(x = w_69_to_fp16, y = hidden_states_69_cast_fp16)[name = tensor("obj_71_cast_fp16")]; + tensor var_2554 = const()[name = tensor("op_2554"), val = tensor([1, 1])]; + tensor var_2556 = const()[name = tensor("op_2556"), val = tensor([1, 1])]; + tensor query_35_pad_type_0 = const()[name = tensor("query_35_pad_type_0"), val = tensor("custom")]; + tensor query_35_pad_0 = const()[name = tensor("query_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_17_layer_0_SelfAttention_q_proj_weight_to_fp16 = const()[name = tensor("block_17_layer_0_SelfAttention_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6824168512)))]; + tensor query_35_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_2556, groups = var_2515, pad = query_35_pad_0, pad_type = query_35_pad_type_0, strides = var_2554, weight = block_17_layer_0_SelfAttention_q_proj_weight_to_fp16, x = obj_71_cast_fp16)[name = tensor("query_35_cast_fp16")]; + tensor var_2560 = const()[name = tensor("op_2560"), val = tensor([1, 1])]; + tensor var_2562 = const()[name = tensor("op_2562"), val = tensor([1, 1])]; + tensor key_35_pad_type_0 = const()[name = tensor("key_35_pad_type_0"), val = tensor("custom")]; + tensor key_35_pad_0 = const()[name = tensor("key_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_17_layer_0_SelfAttention_k_proj_weight_to_fp16 = const()[name = tensor("block_17_layer_0_SelfAttention_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6857723008)))]; + tensor key_35_cast_fp16 = conv(dilations = var_2562, groups = var_2515, pad = key_35_pad_0, pad_type = key_35_pad_type_0, strides = var_2560, weight = block_17_layer_0_SelfAttention_k_proj_weight_to_fp16, x = obj_71_cast_fp16)[name = tensor("key_35_cast_fp16")]; + tensor var_2567 = const()[name = tensor("op_2567"), val = tensor([1, 1])]; + tensor var_2569 = const()[name = tensor("op_2569"), val = tensor([1, 1])]; + tensor value_35_pad_type_0 = const()[name = tensor("value_35_pad_type_0"), val = tensor("custom")]; + tensor value_35_pad_0 = const()[name = tensor("value_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_17_layer_0_SelfAttention_v_proj_weight_to_fp16 = const()[name = tensor("block_17_layer_0_SelfAttention_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6891277504)))]; + tensor value_35_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_2569, groups = var_2515, pad = value_35_pad_0, pad_type = value_35_pad_type_0, strides = var_2567, weight = block_17_layer_0_SelfAttention_v_proj_weight_to_fp16, x = obj_71_cast_fp16)[name = tensor("value_35_cast_fp16")]; + tensor var_2573 = const()[name = tensor("op_2573"), val = tensor([1, 64, 64, -1])]; + tensor var_2574_cast_fp16 = reshape(shape = var_2573, x = query_35_cast_fp16)[name = tensor("op_2574_cast_fp16")]; + tensor var_2575 = const()[name = tensor("op_2575"), val = tensor([1, 64, 64, -1])]; + tensor var_2576_cast_fp16 = reshape(shape = var_2575, x = key_35_cast_fp16)[name = tensor("op_2576_cast_fp16")]; + tensor mh_w_103_transpose_x_0 = const()[name = tensor("mh_w_103_transpose_x_0"), val = tensor(true)]; + tensor mh_w_103_transpose_y_0 = const()[name = tensor("mh_w_103_transpose_y_0"), val = tensor(false)]; + tensor mh_w_103_cast_fp16 = matmul(transpose_x = mh_w_103_transpose_x_0, transpose_y = mh_w_103_transpose_y_0, x = var_2574_cast_fp16, y = var_2576_cast_fp16)[name = tensor("mh_w_103_cast_fp16")]; + tensor mh_w_105_cast_fp16 = add(x = mh_w_103_cast_fp16, y = var_133_cast_fp16)[name = tensor("mh_w_105_cast_fp16")]; + tensor mh_w_107_cast_fp16 = add(x = mh_w_105_cast_fp16, y = relative_attention_bias_to_fp16)[name = tensor("mh_w_107_cast_fp16")]; + tensor var_2585_cast_fp16 = softmax(axis = var_2519, x = mh_w_107_cast_fp16)[name = tensor("op_2585_cast_fp16")]; + tensor var_2586 = const()[name = tensor("op_2586"), val = tensor([1, 64, 64, -1])]; + tensor var_2587_cast_fp16 = reshape(shape = var_2586, x = value_35_cast_fp16)[name = tensor("op_2587_cast_fp16")]; + tensor attn_35_transpose_x_0 = const()[name = tensor("attn_35_transpose_x_0"), val = tensor(false)]; + tensor attn_35_transpose_y_0 = const()[name = tensor("attn_35_transpose_y_0"), val = tensor(true)]; + tensor attn_35_cast_fp16 = matmul(transpose_x = attn_35_transpose_x_0, transpose_y = attn_35_transpose_y_0, x = var_2587_cast_fp16, y = var_2585_cast_fp16)[name = tensor("attn_35_cast_fp16")]; + tensor var_2590 = const()[name = tensor("op_2590"), val = tensor([1, 4096, 1, -1])]; + tensor input_103_cast_fp16 = reshape(shape = var_2590, x = attn_35_cast_fp16)[name = tensor("input_103_cast_fp16")]; + tensor var_2594 = const()[name = tensor("op_2594"), val = tensor([1, 1])]; + tensor var_2596 = const()[name = tensor("op_2596"), val = tensor([1, 1])]; + tensor obj_73_pad_type_0 = const()[name = tensor("obj_73_pad_type_0"), val = tensor("custom")]; + tensor obj_73_pad_0 = const()[name = tensor("obj_73_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_17_layer_0_SelfAttention_o_proj_weight_to_fp16 = const()[name = tensor("block_17_layer_0_SelfAttention_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6924832000)))]; + tensor obj_73_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_2596, groups = var_2515, pad = obj_73_pad_0, pad_type = obj_73_pad_type_0, strides = var_2594, weight = block_17_layer_0_SelfAttention_o_proj_weight_to_fp16, x = input_103_cast_fp16)[name = tensor("obj_73_cast_fp16")]; + tensor inputs_141_cast_fp16 = add(x = inputs_139_cast_fp16, y = obj_73_cast_fp16)[name = tensor("inputs_141_cast_fp16")]; + tensor inputs_143_cast_fp16 = clip(alpha = var_2517_to_fp16, beta = var_2516_to_fp16, x = inputs_141_cast_fp16)[name = tensor("inputs_143_cast_fp16")]; + tensor inputs_sq_71_cast_fp16 = mul(x = inputs_143_cast_fp16, y = inputs_143_cast_fp16)[name = tensor("inputs_sq_71_cast_fp16")]; + tensor var_2605 = const()[name = tensor("op_2605"), val = tensor([1])]; + tensor variance_71_cast_fp16 = reduce_mean(axes = var_2605, keep_dims = var_2514, x = inputs_sq_71_cast_fp16)[name = tensor("variance_71_cast_fp16")]; + tensor var_2607_to_fp16 = const()[name = tensor("op_2607_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_2608_cast_fp16 = add(x = variance_71_cast_fp16, y = var_2607_to_fp16)[name = tensor("op_2608_cast_fp16")]; + tensor var_2609_epsilon_0_to_fp16 = const()[name = tensor("op_2609_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_2609_cast_fp16 = rsqrt(epsilon = var_2609_epsilon_0_to_fp16, x = var_2608_cast_fp16)[name = tensor("op_2609_cast_fp16")]; + tensor hidden_states_71_cast_fp16 = mul(x = inputs_143_cast_fp16, y = var_2609_cast_fp16)[name = tensor("hidden_states_71_cast_fp16")]; + tensor w_71_to_fp16 = const()[name = tensor("w_71_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6958386496)))]; + tensor input_105_cast_fp16 = mul(x = w_71_to_fp16, y = hidden_states_71_cast_fp16)[name = tensor("input_105_cast_fp16")]; + tensor var_2622 = const()[name = tensor("op_2622"), val = tensor([1, 1])]; + tensor var_2624 = const()[name = tensor("op_2624"), val = tensor([1, 1])]; + tensor x_37_pad_type_0 = const()[name = tensor("x_37_pad_type_0"), val = tensor("custom")]; + tensor x_37_pad_0 = const()[name = tensor("x_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_17_layer_1_DenseReluDense_wi_0_weight_to_fp16 = const()[name = tensor("block_17_layer_1_DenseReluDense_wi_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6958394752)))]; + tensor x_37_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_2624, groups = var_2515, pad = x_37_pad_0, pad_type = x_37_pad_type_0, strides = var_2622, weight = block_17_layer_1_DenseReluDense_wi_0_weight_to_fp16, x = input_105_cast_fp16)[name = tensor("x_37_cast_fp16")]; + tensor var_2638_mode_0 = const()[name = tensor("op_2638_mode_0"), val = tensor("TANH_APPROXIMATION")]; + tensor var_2638_cast_fp16 = gelu(mode = var_2638_mode_0, x = x_37_cast_fp16)[name = tensor("op_2638_cast_fp16")]; + tensor var_2641 = const()[name = tensor("op_2641"), val = tensor([1, 1])]; + tensor var_2643 = const()[name = tensor("op_2643"), val = tensor([1, 1])]; + tensor var_2645_pad_type_0 = const()[name = tensor("op_2645_pad_type_0"), val = tensor("custom")]; + tensor var_2645_pad_0 = const()[name = tensor("op_2645_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_17_layer_1_DenseReluDense_wi_1_weight_to_fp16 = const()[name = tensor("block_17_layer_1_DenseReluDense_wi_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7042280896)))]; + tensor var_2645_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_2643, groups = var_2515, pad = var_2645_pad_0, pad_type = var_2645_pad_type_0, strides = var_2641, weight = block_17_layer_1_DenseReluDense_wi_1_weight_to_fp16, x = input_105_cast_fp16)[name = tensor("op_2645_cast_fp16")]; + tensor input_107_cast_fp16 = mul(x = var_2638_cast_fp16, y = var_2645_cast_fp16)[name = tensor("input_107_cast_fp16")]; + tensor var_2649 = const()[name = tensor("op_2649"), val = tensor([1, 1])]; + tensor var_2651 = const()[name = tensor("op_2651"), val = tensor([1, 1])]; + tensor var_2653_pad_type_0 = const()[name = tensor("op_2653_pad_type_0"), val = tensor("custom")]; + tensor var_2653_pad_0 = const()[name = tensor("op_2653_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_17_layer_1_DenseReluDense_wo_weight_to_fp16 = const()[name = tensor("block_17_layer_1_DenseReluDense_wo_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7126167040)))]; + tensor var_2653_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_2651, groups = var_2515, pad = var_2653_pad_0, pad_type = var_2653_pad_type_0, strides = var_2649, weight = block_17_layer_1_DenseReluDense_wo_weight_to_fp16, x = input_107_cast_fp16)[name = tensor("op_2653_cast_fp16")]; + tensor inputs_145_cast_fp16 = add(x = inputs_143_cast_fp16, y = var_2653_cast_fp16)[name = tensor("inputs_145_cast_fp16")]; + tensor var_2658 = const()[name = tensor("op_2658"), val = tensor(true)]; + tensor var_2659 = const()[name = tensor("op_2659"), val = tensor(1)]; + tensor var_2663 = const()[name = tensor("op_2663"), val = tensor(3)]; + tensor var_2661_to_fp16 = const()[name = tensor("op_2661_to_fp16"), val = tensor(-0x1.b18p+15)]; + tensor var_2660_to_fp16 = const()[name = tensor("op_2660_to_fp16"), val = tensor(0x1.b18p+15)]; + tensor inputs_147_cast_fp16 = clip(alpha = var_2661_to_fp16, beta = var_2660_to_fp16, x = inputs_145_cast_fp16)[name = tensor("inputs_147_cast_fp16")]; + tensor inputs_sq_73_cast_fp16 = mul(x = inputs_147_cast_fp16, y = inputs_147_cast_fp16)[name = tensor("inputs_sq_73_cast_fp16")]; + tensor var_2680 = const()[name = tensor("op_2680"), val = tensor([1])]; + tensor variance_73_cast_fp16 = reduce_mean(axes = var_2680, keep_dims = var_2658, x = inputs_sq_73_cast_fp16)[name = tensor("variance_73_cast_fp16")]; + tensor var_2682_to_fp16 = const()[name = tensor("op_2682_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_2683_cast_fp16 = add(x = variance_73_cast_fp16, y = var_2682_to_fp16)[name = tensor("op_2683_cast_fp16")]; + tensor var_2684_epsilon_0_to_fp16 = const()[name = tensor("op_2684_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_2684_cast_fp16 = rsqrt(epsilon = var_2684_epsilon_0_to_fp16, x = var_2683_cast_fp16)[name = tensor("op_2684_cast_fp16")]; + tensor hidden_states_73_cast_fp16 = mul(x = inputs_147_cast_fp16, y = var_2684_cast_fp16)[name = tensor("hidden_states_73_cast_fp16")]; + tensor w_73_to_fp16 = const()[name = tensor("w_73_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7210053184)))]; + tensor obj_75_cast_fp16 = mul(x = w_73_to_fp16, y = hidden_states_73_cast_fp16)[name = tensor("obj_75_cast_fp16")]; + tensor var_2698 = const()[name = tensor("op_2698"), val = tensor([1, 1])]; + tensor var_2700 = const()[name = tensor("op_2700"), val = tensor([1, 1])]; + tensor query_37_pad_type_0 = const()[name = tensor("query_37_pad_type_0"), val = tensor("custom")]; + tensor query_37_pad_0 = const()[name = tensor("query_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_18_layer_0_SelfAttention_q_proj_weight_to_fp16 = const()[name = tensor("block_18_layer_0_SelfAttention_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7210061440)))]; + tensor query_37_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_2700, groups = var_2659, pad = query_37_pad_0, pad_type = query_37_pad_type_0, strides = var_2698, weight = block_18_layer_0_SelfAttention_q_proj_weight_to_fp16, x = obj_75_cast_fp16)[name = tensor("query_37_cast_fp16")]; + tensor var_2704 = const()[name = tensor("op_2704"), val = tensor([1, 1])]; + tensor var_2706 = const()[name = tensor("op_2706"), val = tensor([1, 1])]; + tensor key_37_pad_type_0 = const()[name = tensor("key_37_pad_type_0"), val = tensor("custom")]; + tensor key_37_pad_0 = const()[name = tensor("key_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_18_layer_0_SelfAttention_k_proj_weight_to_fp16 = const()[name = tensor("block_18_layer_0_SelfAttention_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7243615936)))]; + tensor key_37_cast_fp16 = conv(dilations = var_2706, groups = var_2659, pad = key_37_pad_0, pad_type = key_37_pad_type_0, strides = var_2704, weight = block_18_layer_0_SelfAttention_k_proj_weight_to_fp16, x = obj_75_cast_fp16)[name = tensor("key_37_cast_fp16")]; + tensor var_2711 = const()[name = tensor("op_2711"), val = tensor([1, 1])]; + tensor var_2713 = const()[name = tensor("op_2713"), val = tensor([1, 1])]; + tensor value_37_pad_type_0 = const()[name = tensor("value_37_pad_type_0"), val = tensor("custom")]; + tensor value_37_pad_0 = const()[name = tensor("value_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_18_layer_0_SelfAttention_v_proj_weight_to_fp16 = const()[name = tensor("block_18_layer_0_SelfAttention_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7277170432)))]; + tensor value_37_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_2713, groups = var_2659, pad = value_37_pad_0, pad_type = value_37_pad_type_0, strides = var_2711, weight = block_18_layer_0_SelfAttention_v_proj_weight_to_fp16, x = obj_75_cast_fp16)[name = tensor("value_37_cast_fp16")]; + tensor var_2717 = const()[name = tensor("op_2717"), val = tensor([1, 64, 64, -1])]; + tensor var_2718_cast_fp16 = reshape(shape = var_2717, x = query_37_cast_fp16)[name = tensor("op_2718_cast_fp16")]; + tensor var_2719 = const()[name = tensor("op_2719"), val = tensor([1, 64, 64, -1])]; + tensor var_2720_cast_fp16 = reshape(shape = var_2719, x = key_37_cast_fp16)[name = tensor("op_2720_cast_fp16")]; + tensor mh_w_109_transpose_x_0 = const()[name = tensor("mh_w_109_transpose_x_0"), val = tensor(true)]; + tensor mh_w_109_transpose_y_0 = const()[name = tensor("mh_w_109_transpose_y_0"), val = tensor(false)]; + tensor mh_w_109_cast_fp16 = matmul(transpose_x = mh_w_109_transpose_x_0, transpose_y = mh_w_109_transpose_y_0, x = var_2718_cast_fp16, y = var_2720_cast_fp16)[name = tensor("mh_w_109_cast_fp16")]; + tensor mh_w_111_cast_fp16 = add(x = mh_w_109_cast_fp16, y = var_133_cast_fp16)[name = tensor("mh_w_111_cast_fp16")]; + tensor mh_w_113_cast_fp16 = add(x = mh_w_111_cast_fp16, y = relative_attention_bias_to_fp16)[name = tensor("mh_w_113_cast_fp16")]; + tensor var_2729_cast_fp16 = softmax(axis = var_2663, x = mh_w_113_cast_fp16)[name = tensor("op_2729_cast_fp16")]; + tensor var_2730 = const()[name = tensor("op_2730"), val = tensor([1, 64, 64, -1])]; + tensor var_2731_cast_fp16 = reshape(shape = var_2730, x = value_37_cast_fp16)[name = tensor("op_2731_cast_fp16")]; + tensor attn_37_transpose_x_0 = const()[name = tensor("attn_37_transpose_x_0"), val = tensor(false)]; + tensor attn_37_transpose_y_0 = const()[name = tensor("attn_37_transpose_y_0"), val = tensor(true)]; + tensor attn_37_cast_fp16 = matmul(transpose_x = attn_37_transpose_x_0, transpose_y = attn_37_transpose_y_0, x = var_2731_cast_fp16, y = var_2729_cast_fp16)[name = tensor("attn_37_cast_fp16")]; + tensor var_2734 = const()[name = tensor("op_2734"), val = tensor([1, 4096, 1, -1])]; + tensor input_109_cast_fp16 = reshape(shape = var_2734, x = attn_37_cast_fp16)[name = tensor("input_109_cast_fp16")]; + tensor var_2738 = const()[name = tensor("op_2738"), val = tensor([1, 1])]; + tensor var_2740 = const()[name = tensor("op_2740"), val = tensor([1, 1])]; + tensor obj_77_pad_type_0 = const()[name = tensor("obj_77_pad_type_0"), val = tensor("custom")]; + tensor obj_77_pad_0 = const()[name = tensor("obj_77_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_18_layer_0_SelfAttention_o_proj_weight_to_fp16 = const()[name = tensor("block_18_layer_0_SelfAttention_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7310724928)))]; + tensor obj_77_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_2740, groups = var_2659, pad = obj_77_pad_0, pad_type = obj_77_pad_type_0, strides = var_2738, weight = block_18_layer_0_SelfAttention_o_proj_weight_to_fp16, x = input_109_cast_fp16)[name = tensor("obj_77_cast_fp16")]; + tensor inputs_149_cast_fp16 = add(x = inputs_147_cast_fp16, y = obj_77_cast_fp16)[name = tensor("inputs_149_cast_fp16")]; + tensor inputs_151_cast_fp16 = clip(alpha = var_2661_to_fp16, beta = var_2660_to_fp16, x = inputs_149_cast_fp16)[name = tensor("inputs_151_cast_fp16")]; + tensor inputs_sq_75_cast_fp16 = mul(x = inputs_151_cast_fp16, y = inputs_151_cast_fp16)[name = tensor("inputs_sq_75_cast_fp16")]; + tensor var_2749 = const()[name = tensor("op_2749"), val = tensor([1])]; + tensor variance_75_cast_fp16 = reduce_mean(axes = var_2749, keep_dims = var_2658, x = inputs_sq_75_cast_fp16)[name = tensor("variance_75_cast_fp16")]; + tensor var_2751_to_fp16 = const()[name = tensor("op_2751_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_2752_cast_fp16 = add(x = variance_75_cast_fp16, y = var_2751_to_fp16)[name = tensor("op_2752_cast_fp16")]; + tensor var_2753_epsilon_0_to_fp16 = const()[name = tensor("op_2753_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_2753_cast_fp16 = rsqrt(epsilon = var_2753_epsilon_0_to_fp16, x = var_2752_cast_fp16)[name = tensor("op_2753_cast_fp16")]; + tensor hidden_states_75_cast_fp16 = mul(x = inputs_151_cast_fp16, y = var_2753_cast_fp16)[name = tensor("hidden_states_75_cast_fp16")]; + tensor w_75_to_fp16 = const()[name = tensor("w_75_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7344279424)))]; + tensor input_111_cast_fp16 = mul(x = w_75_to_fp16, y = hidden_states_75_cast_fp16)[name = tensor("input_111_cast_fp16")]; + tensor var_2766 = const()[name = tensor("op_2766"), val = tensor([1, 1])]; + tensor var_2768 = const()[name = tensor("op_2768"), val = tensor([1, 1])]; + tensor x_39_pad_type_0 = const()[name = tensor("x_39_pad_type_0"), val = tensor("custom")]; + tensor x_39_pad_0 = const()[name = tensor("x_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_18_layer_1_DenseReluDense_wi_0_weight_to_fp16 = const()[name = tensor("block_18_layer_1_DenseReluDense_wi_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7344287680)))]; + tensor x_39_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_2768, groups = var_2659, pad = x_39_pad_0, pad_type = x_39_pad_type_0, strides = var_2766, weight = block_18_layer_1_DenseReluDense_wi_0_weight_to_fp16, x = input_111_cast_fp16)[name = tensor("x_39_cast_fp16")]; + tensor var_2782_mode_0 = const()[name = tensor("op_2782_mode_0"), val = tensor("TANH_APPROXIMATION")]; + tensor var_2782_cast_fp16 = gelu(mode = var_2782_mode_0, x = x_39_cast_fp16)[name = tensor("op_2782_cast_fp16")]; + tensor var_2785 = const()[name = tensor("op_2785"), val = tensor([1, 1])]; + tensor var_2787 = const()[name = tensor("op_2787"), val = tensor([1, 1])]; + tensor var_2789_pad_type_0 = const()[name = tensor("op_2789_pad_type_0"), val = tensor("custom")]; + tensor var_2789_pad_0 = const()[name = tensor("op_2789_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_18_layer_1_DenseReluDense_wi_1_weight_to_fp16 = const()[name = tensor("block_18_layer_1_DenseReluDense_wi_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7428173824)))]; + tensor var_2789_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_2787, groups = var_2659, pad = var_2789_pad_0, pad_type = var_2789_pad_type_0, strides = var_2785, weight = block_18_layer_1_DenseReluDense_wi_1_weight_to_fp16, x = input_111_cast_fp16)[name = tensor("op_2789_cast_fp16")]; + tensor input_113_cast_fp16 = mul(x = var_2782_cast_fp16, y = var_2789_cast_fp16)[name = tensor("input_113_cast_fp16")]; + tensor var_2793 = const()[name = tensor("op_2793"), val = tensor([1, 1])]; + tensor var_2795 = const()[name = tensor("op_2795"), val = tensor([1, 1])]; + tensor var_2797_pad_type_0 = const()[name = tensor("op_2797_pad_type_0"), val = tensor("custom")]; + tensor var_2797_pad_0 = const()[name = tensor("op_2797_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_18_layer_1_DenseReluDense_wo_weight_to_fp16 = const()[name = tensor("block_18_layer_1_DenseReluDense_wo_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7512059968)))]; + tensor var_2797_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_2795, groups = var_2659, pad = var_2797_pad_0, pad_type = var_2797_pad_type_0, strides = var_2793, weight = block_18_layer_1_DenseReluDense_wo_weight_to_fp16, x = input_113_cast_fp16)[name = tensor("op_2797_cast_fp16")]; + tensor inputs_153_cast_fp16 = add(x = inputs_151_cast_fp16, y = var_2797_cast_fp16)[name = tensor("inputs_153_cast_fp16")]; + tensor var_2802 = const()[name = tensor("op_2802"), val = tensor(true)]; + tensor var_2803 = const()[name = tensor("op_2803"), val = tensor(1)]; + tensor var_2807 = const()[name = tensor("op_2807"), val = tensor(3)]; + tensor var_2805_to_fp16 = const()[name = tensor("op_2805_to_fp16"), val = tensor(-0x1.b18p+15)]; + tensor var_2804_to_fp16 = const()[name = tensor("op_2804_to_fp16"), val = tensor(0x1.b18p+15)]; + tensor inputs_155_cast_fp16 = clip(alpha = var_2805_to_fp16, beta = var_2804_to_fp16, x = inputs_153_cast_fp16)[name = tensor("inputs_155_cast_fp16")]; + tensor inputs_sq_77_cast_fp16 = mul(x = inputs_155_cast_fp16, y = inputs_155_cast_fp16)[name = tensor("inputs_sq_77_cast_fp16")]; + tensor var_2824 = const()[name = tensor("op_2824"), val = tensor([1])]; + tensor variance_77_cast_fp16 = reduce_mean(axes = var_2824, keep_dims = var_2802, x = inputs_sq_77_cast_fp16)[name = tensor("variance_77_cast_fp16")]; + tensor var_2826_to_fp16 = const()[name = tensor("op_2826_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_2827_cast_fp16 = add(x = variance_77_cast_fp16, y = var_2826_to_fp16)[name = tensor("op_2827_cast_fp16")]; + tensor var_2828_epsilon_0_to_fp16 = const()[name = tensor("op_2828_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_2828_cast_fp16 = rsqrt(epsilon = var_2828_epsilon_0_to_fp16, x = var_2827_cast_fp16)[name = tensor("op_2828_cast_fp16")]; + tensor hidden_states_77_cast_fp16 = mul(x = inputs_155_cast_fp16, y = var_2828_cast_fp16)[name = tensor("hidden_states_77_cast_fp16")]; + tensor w_77_to_fp16 = const()[name = tensor("w_77_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7595946112)))]; + tensor obj_79_cast_fp16 = mul(x = w_77_to_fp16, y = hidden_states_77_cast_fp16)[name = tensor("obj_79_cast_fp16")]; + tensor var_2842 = const()[name = tensor("op_2842"), val = tensor([1, 1])]; + tensor var_2844 = const()[name = tensor("op_2844"), val = tensor([1, 1])]; + tensor query_39_pad_type_0 = const()[name = tensor("query_39_pad_type_0"), val = tensor("custom")]; + tensor query_39_pad_0 = const()[name = tensor("query_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_19_layer_0_SelfAttention_q_proj_weight_to_fp16 = const()[name = tensor("block_19_layer_0_SelfAttention_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7595954368)))]; + tensor query_39_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_2844, groups = var_2803, pad = query_39_pad_0, pad_type = query_39_pad_type_0, strides = var_2842, weight = block_19_layer_0_SelfAttention_q_proj_weight_to_fp16, x = obj_79_cast_fp16)[name = tensor("query_39_cast_fp16")]; + tensor var_2848 = const()[name = tensor("op_2848"), val = tensor([1, 1])]; + tensor var_2850 = const()[name = tensor("op_2850"), val = tensor([1, 1])]; + tensor key_39_pad_type_0 = const()[name = tensor("key_39_pad_type_0"), val = tensor("custom")]; + tensor key_39_pad_0 = const()[name = tensor("key_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_19_layer_0_SelfAttention_k_proj_weight_to_fp16 = const()[name = tensor("block_19_layer_0_SelfAttention_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7629508864)))]; + tensor key_39_cast_fp16 = conv(dilations = var_2850, groups = var_2803, pad = key_39_pad_0, pad_type = key_39_pad_type_0, strides = var_2848, weight = block_19_layer_0_SelfAttention_k_proj_weight_to_fp16, x = obj_79_cast_fp16)[name = tensor("key_39_cast_fp16")]; + tensor var_2855 = const()[name = tensor("op_2855"), val = tensor([1, 1])]; + tensor var_2857 = const()[name = tensor("op_2857"), val = tensor([1, 1])]; + tensor value_39_pad_type_0 = const()[name = tensor("value_39_pad_type_0"), val = tensor("custom")]; + tensor value_39_pad_0 = const()[name = tensor("value_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_19_layer_0_SelfAttention_v_proj_weight_to_fp16 = const()[name = tensor("block_19_layer_0_SelfAttention_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7663063360)))]; + tensor value_39_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_2857, groups = var_2803, pad = value_39_pad_0, pad_type = value_39_pad_type_0, strides = var_2855, weight = block_19_layer_0_SelfAttention_v_proj_weight_to_fp16, x = obj_79_cast_fp16)[name = tensor("value_39_cast_fp16")]; + tensor var_2861 = const()[name = tensor("op_2861"), val = tensor([1, 64, 64, -1])]; + tensor var_2862_cast_fp16 = reshape(shape = var_2861, x = query_39_cast_fp16)[name = tensor("op_2862_cast_fp16")]; + tensor var_2863 = const()[name = tensor("op_2863"), val = tensor([1, 64, 64, -1])]; + tensor var_2864_cast_fp16 = reshape(shape = var_2863, x = key_39_cast_fp16)[name = tensor("op_2864_cast_fp16")]; + tensor mh_w_115_transpose_x_0 = const()[name = tensor("mh_w_115_transpose_x_0"), val = tensor(true)]; + tensor mh_w_115_transpose_y_0 = const()[name = tensor("mh_w_115_transpose_y_0"), val = tensor(false)]; + tensor mh_w_115_cast_fp16 = matmul(transpose_x = mh_w_115_transpose_x_0, transpose_y = mh_w_115_transpose_y_0, x = var_2862_cast_fp16, y = var_2864_cast_fp16)[name = tensor("mh_w_115_cast_fp16")]; + tensor mh_w_117_cast_fp16 = add(x = mh_w_115_cast_fp16, y = var_133_cast_fp16)[name = tensor("mh_w_117_cast_fp16")]; + tensor mh_w_119_cast_fp16 = add(x = mh_w_117_cast_fp16, y = relative_attention_bias_to_fp16)[name = tensor("mh_w_119_cast_fp16")]; + tensor var_2873_cast_fp16 = softmax(axis = var_2807, x = mh_w_119_cast_fp16)[name = tensor("op_2873_cast_fp16")]; + tensor var_2874 = const()[name = tensor("op_2874"), val = tensor([1, 64, 64, -1])]; + tensor var_2875_cast_fp16 = reshape(shape = var_2874, x = value_39_cast_fp16)[name = tensor("op_2875_cast_fp16")]; + tensor attn_39_transpose_x_0 = const()[name = tensor("attn_39_transpose_x_0"), val = tensor(false)]; + tensor attn_39_transpose_y_0 = const()[name = tensor("attn_39_transpose_y_0"), val = tensor(true)]; + tensor attn_39_cast_fp16 = matmul(transpose_x = attn_39_transpose_x_0, transpose_y = attn_39_transpose_y_0, x = var_2875_cast_fp16, y = var_2873_cast_fp16)[name = tensor("attn_39_cast_fp16")]; + tensor var_2878 = const()[name = tensor("op_2878"), val = tensor([1, 4096, 1, -1])]; + tensor input_115_cast_fp16 = reshape(shape = var_2878, x = attn_39_cast_fp16)[name = tensor("input_115_cast_fp16")]; + tensor var_2882 = const()[name = tensor("op_2882"), val = tensor([1, 1])]; + tensor var_2884 = const()[name = tensor("op_2884"), val = tensor([1, 1])]; + tensor obj_81_pad_type_0 = const()[name = tensor("obj_81_pad_type_0"), val = tensor("custom")]; + tensor obj_81_pad_0 = const()[name = tensor("obj_81_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_19_layer_0_SelfAttention_o_proj_weight_to_fp16 = const()[name = tensor("block_19_layer_0_SelfAttention_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7696617856)))]; + tensor obj_81_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_2884, groups = var_2803, pad = obj_81_pad_0, pad_type = obj_81_pad_type_0, strides = var_2882, weight = block_19_layer_0_SelfAttention_o_proj_weight_to_fp16, x = input_115_cast_fp16)[name = tensor("obj_81_cast_fp16")]; + tensor inputs_157_cast_fp16 = add(x = inputs_155_cast_fp16, y = obj_81_cast_fp16)[name = tensor("inputs_157_cast_fp16")]; + tensor inputs_159_cast_fp16 = clip(alpha = var_2805_to_fp16, beta = var_2804_to_fp16, x = inputs_157_cast_fp16)[name = tensor("inputs_159_cast_fp16")]; + tensor inputs_sq_79_cast_fp16 = mul(x = inputs_159_cast_fp16, y = inputs_159_cast_fp16)[name = tensor("inputs_sq_79_cast_fp16")]; + tensor var_2893 = const()[name = tensor("op_2893"), val = tensor([1])]; + tensor variance_79_cast_fp16 = reduce_mean(axes = var_2893, keep_dims = var_2802, x = inputs_sq_79_cast_fp16)[name = tensor("variance_79_cast_fp16")]; + tensor var_2895_to_fp16 = const()[name = tensor("op_2895_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_2896_cast_fp16 = add(x = variance_79_cast_fp16, y = var_2895_to_fp16)[name = tensor("op_2896_cast_fp16")]; + tensor var_2897_epsilon_0_to_fp16 = const()[name = tensor("op_2897_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_2897_cast_fp16 = rsqrt(epsilon = var_2897_epsilon_0_to_fp16, x = var_2896_cast_fp16)[name = tensor("op_2897_cast_fp16")]; + tensor hidden_states_79_cast_fp16 = mul(x = inputs_159_cast_fp16, y = var_2897_cast_fp16)[name = tensor("hidden_states_79_cast_fp16")]; + tensor w_79_to_fp16 = const()[name = tensor("w_79_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7730172352)))]; + tensor input_117_cast_fp16 = mul(x = w_79_to_fp16, y = hidden_states_79_cast_fp16)[name = tensor("input_117_cast_fp16")]; + tensor var_2910 = const()[name = tensor("op_2910"), val = tensor([1, 1])]; + tensor var_2912 = const()[name = tensor("op_2912"), val = tensor([1, 1])]; + tensor x_41_pad_type_0 = const()[name = tensor("x_41_pad_type_0"), val = tensor("custom")]; + tensor x_41_pad_0 = const()[name = tensor("x_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_19_layer_1_DenseReluDense_wi_0_weight_to_fp16 = const()[name = tensor("block_19_layer_1_DenseReluDense_wi_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7730180608)))]; + tensor x_41_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_2912, groups = var_2803, pad = x_41_pad_0, pad_type = x_41_pad_type_0, strides = var_2910, weight = block_19_layer_1_DenseReluDense_wi_0_weight_to_fp16, x = input_117_cast_fp16)[name = tensor("x_41_cast_fp16")]; + tensor var_2926_mode_0 = const()[name = tensor("op_2926_mode_0"), val = tensor("TANH_APPROXIMATION")]; + tensor var_2926_cast_fp16 = gelu(mode = var_2926_mode_0, x = x_41_cast_fp16)[name = tensor("op_2926_cast_fp16")]; + tensor var_2929 = const()[name = tensor("op_2929"), val = tensor([1, 1])]; + tensor var_2931 = const()[name = tensor("op_2931"), val = tensor([1, 1])]; + tensor var_2933_pad_type_0 = const()[name = tensor("op_2933_pad_type_0"), val = tensor("custom")]; + tensor var_2933_pad_0 = const()[name = tensor("op_2933_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_19_layer_1_DenseReluDense_wi_1_weight_to_fp16 = const()[name = tensor("block_19_layer_1_DenseReluDense_wi_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7814066752)))]; + tensor var_2933_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_2931, groups = var_2803, pad = var_2933_pad_0, pad_type = var_2933_pad_type_0, strides = var_2929, weight = block_19_layer_1_DenseReluDense_wi_1_weight_to_fp16, x = input_117_cast_fp16)[name = tensor("op_2933_cast_fp16")]; + tensor input_119_cast_fp16 = mul(x = var_2926_cast_fp16, y = var_2933_cast_fp16)[name = tensor("input_119_cast_fp16")]; + tensor var_2937 = const()[name = tensor("op_2937"), val = tensor([1, 1])]; + tensor var_2939 = const()[name = tensor("op_2939"), val = tensor([1, 1])]; + tensor var_2941_pad_type_0 = const()[name = tensor("op_2941_pad_type_0"), val = tensor("custom")]; + tensor var_2941_pad_0 = const()[name = tensor("op_2941_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_19_layer_1_DenseReluDense_wo_weight_to_fp16 = const()[name = tensor("block_19_layer_1_DenseReluDense_wo_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7897952896)))]; + tensor var_2941_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_2939, groups = var_2803, pad = var_2941_pad_0, pad_type = var_2941_pad_type_0, strides = var_2937, weight = block_19_layer_1_DenseReluDense_wo_weight_to_fp16, x = input_119_cast_fp16)[name = tensor("op_2941_cast_fp16")]; + tensor inputs_161_cast_fp16 = add(x = inputs_159_cast_fp16, y = var_2941_cast_fp16)[name = tensor("inputs_161_cast_fp16")]; + tensor var_2946 = const()[name = tensor("op_2946"), val = tensor(true)]; + tensor var_2947 = const()[name = tensor("op_2947"), val = tensor(1)]; + tensor var_2951 = const()[name = tensor("op_2951"), val = tensor(3)]; + tensor var_2949_to_fp16 = const()[name = tensor("op_2949_to_fp16"), val = tensor(-0x1.b18p+15)]; + tensor var_2948_to_fp16 = const()[name = tensor("op_2948_to_fp16"), val = tensor(0x1.b18p+15)]; + tensor inputs_163_cast_fp16 = clip(alpha = var_2949_to_fp16, beta = var_2948_to_fp16, x = inputs_161_cast_fp16)[name = tensor("inputs_163_cast_fp16")]; + tensor inputs_sq_81_cast_fp16 = mul(x = inputs_163_cast_fp16, y = inputs_163_cast_fp16)[name = tensor("inputs_sq_81_cast_fp16")]; + tensor var_2968 = const()[name = tensor("op_2968"), val = tensor([1])]; + tensor variance_81_cast_fp16 = reduce_mean(axes = var_2968, keep_dims = var_2946, x = inputs_sq_81_cast_fp16)[name = tensor("variance_81_cast_fp16")]; + tensor var_2970_to_fp16 = const()[name = tensor("op_2970_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_2971_cast_fp16 = add(x = variance_81_cast_fp16, y = var_2970_to_fp16)[name = tensor("op_2971_cast_fp16")]; + tensor var_2972_epsilon_0_to_fp16 = const()[name = tensor("op_2972_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_2972_cast_fp16 = rsqrt(epsilon = var_2972_epsilon_0_to_fp16, x = var_2971_cast_fp16)[name = tensor("op_2972_cast_fp16")]; + tensor hidden_states_81_cast_fp16 = mul(x = inputs_163_cast_fp16, y = var_2972_cast_fp16)[name = tensor("hidden_states_81_cast_fp16")]; + tensor w_81_to_fp16 = const()[name = tensor("w_81_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7981839040)))]; + tensor obj_83_cast_fp16 = mul(x = w_81_to_fp16, y = hidden_states_81_cast_fp16)[name = tensor("obj_83_cast_fp16")]; + tensor var_2986 = const()[name = tensor("op_2986"), val = tensor([1, 1])]; + tensor var_2988 = const()[name = tensor("op_2988"), val = tensor([1, 1])]; + tensor query_41_pad_type_0 = const()[name = tensor("query_41_pad_type_0"), val = tensor("custom")]; + tensor query_41_pad_0 = const()[name = tensor("query_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_20_layer_0_SelfAttention_q_proj_weight_to_fp16 = const()[name = tensor("block_20_layer_0_SelfAttention_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7981847296)))]; + tensor query_41_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_2988, groups = var_2947, pad = query_41_pad_0, pad_type = query_41_pad_type_0, strides = var_2986, weight = block_20_layer_0_SelfAttention_q_proj_weight_to_fp16, x = obj_83_cast_fp16)[name = tensor("query_41_cast_fp16")]; + tensor var_2992 = const()[name = tensor("op_2992"), val = tensor([1, 1])]; + tensor var_2994 = const()[name = tensor("op_2994"), val = tensor([1, 1])]; + tensor key_41_pad_type_0 = const()[name = tensor("key_41_pad_type_0"), val = tensor("custom")]; + tensor key_41_pad_0 = const()[name = tensor("key_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_20_layer_0_SelfAttention_k_proj_weight_to_fp16 = const()[name = tensor("block_20_layer_0_SelfAttention_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8015401792)))]; + tensor key_41_cast_fp16 = conv(dilations = var_2994, groups = var_2947, pad = key_41_pad_0, pad_type = key_41_pad_type_0, strides = var_2992, weight = block_20_layer_0_SelfAttention_k_proj_weight_to_fp16, x = obj_83_cast_fp16)[name = tensor("key_41_cast_fp16")]; + tensor var_2999 = const()[name = tensor("op_2999"), val = tensor([1, 1])]; + tensor var_3001 = const()[name = tensor("op_3001"), val = tensor([1, 1])]; + tensor value_41_pad_type_0 = const()[name = tensor("value_41_pad_type_0"), val = tensor("custom")]; + tensor value_41_pad_0 = const()[name = tensor("value_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_20_layer_0_SelfAttention_v_proj_weight_to_fp16 = const()[name = tensor("block_20_layer_0_SelfAttention_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8048956288)))]; + tensor value_41_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_3001, groups = var_2947, pad = value_41_pad_0, pad_type = value_41_pad_type_0, strides = var_2999, weight = block_20_layer_0_SelfAttention_v_proj_weight_to_fp16, x = obj_83_cast_fp16)[name = tensor("value_41_cast_fp16")]; + tensor var_3005 = const()[name = tensor("op_3005"), val = tensor([1, 64, 64, -1])]; + tensor var_3006_cast_fp16 = reshape(shape = var_3005, x = query_41_cast_fp16)[name = tensor("op_3006_cast_fp16")]; + tensor var_3007 = const()[name = tensor("op_3007"), val = tensor([1, 64, 64, -1])]; + tensor var_3008_cast_fp16 = reshape(shape = var_3007, x = key_41_cast_fp16)[name = tensor("op_3008_cast_fp16")]; + tensor mh_w_121_transpose_x_0 = const()[name = tensor("mh_w_121_transpose_x_0"), val = tensor(true)]; + tensor mh_w_121_transpose_y_0 = const()[name = tensor("mh_w_121_transpose_y_0"), val = tensor(false)]; + tensor mh_w_121_cast_fp16 = matmul(transpose_x = mh_w_121_transpose_x_0, transpose_y = mh_w_121_transpose_y_0, x = var_3006_cast_fp16, y = var_3008_cast_fp16)[name = tensor("mh_w_121_cast_fp16")]; + tensor mh_w_123_cast_fp16 = add(x = mh_w_121_cast_fp16, y = var_133_cast_fp16)[name = tensor("mh_w_123_cast_fp16")]; + tensor mh_w_125_cast_fp16 = add(x = mh_w_123_cast_fp16, y = relative_attention_bias_to_fp16)[name = tensor("mh_w_125_cast_fp16")]; + tensor var_3017_cast_fp16 = softmax(axis = var_2951, x = mh_w_125_cast_fp16)[name = tensor("op_3017_cast_fp16")]; + tensor var_3018 = const()[name = tensor("op_3018"), val = tensor([1, 64, 64, -1])]; + tensor var_3019_cast_fp16 = reshape(shape = var_3018, x = value_41_cast_fp16)[name = tensor("op_3019_cast_fp16")]; + tensor attn_41_transpose_x_0 = const()[name = tensor("attn_41_transpose_x_0"), val = tensor(false)]; + tensor attn_41_transpose_y_0 = const()[name = tensor("attn_41_transpose_y_0"), val = tensor(true)]; + tensor attn_41_cast_fp16 = matmul(transpose_x = attn_41_transpose_x_0, transpose_y = attn_41_transpose_y_0, x = var_3019_cast_fp16, y = var_3017_cast_fp16)[name = tensor("attn_41_cast_fp16")]; + tensor var_3022 = const()[name = tensor("op_3022"), val = tensor([1, 4096, 1, -1])]; + tensor input_121_cast_fp16 = reshape(shape = var_3022, x = attn_41_cast_fp16)[name = tensor("input_121_cast_fp16")]; + tensor var_3026 = const()[name = tensor("op_3026"), val = tensor([1, 1])]; + tensor var_3028 = const()[name = tensor("op_3028"), val = tensor([1, 1])]; + tensor obj_85_pad_type_0 = const()[name = tensor("obj_85_pad_type_0"), val = tensor("custom")]; + tensor obj_85_pad_0 = const()[name = tensor("obj_85_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_20_layer_0_SelfAttention_o_proj_weight_to_fp16 = const()[name = tensor("block_20_layer_0_SelfAttention_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8082510784)))]; + tensor obj_85_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_3028, groups = var_2947, pad = obj_85_pad_0, pad_type = obj_85_pad_type_0, strides = var_3026, weight = block_20_layer_0_SelfAttention_o_proj_weight_to_fp16, x = input_121_cast_fp16)[name = tensor("obj_85_cast_fp16")]; + tensor inputs_165_cast_fp16 = add(x = inputs_163_cast_fp16, y = obj_85_cast_fp16)[name = tensor("inputs_165_cast_fp16")]; + tensor inputs_167_cast_fp16 = clip(alpha = var_2949_to_fp16, beta = var_2948_to_fp16, x = inputs_165_cast_fp16)[name = tensor("inputs_167_cast_fp16")]; + tensor inputs_sq_83_cast_fp16 = mul(x = inputs_167_cast_fp16, y = inputs_167_cast_fp16)[name = tensor("inputs_sq_83_cast_fp16")]; + tensor var_3037 = const()[name = tensor("op_3037"), val = tensor([1])]; + tensor variance_83_cast_fp16 = reduce_mean(axes = var_3037, keep_dims = var_2946, x = inputs_sq_83_cast_fp16)[name = tensor("variance_83_cast_fp16")]; + tensor var_3039_to_fp16 = const()[name = tensor("op_3039_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_3040_cast_fp16 = add(x = variance_83_cast_fp16, y = var_3039_to_fp16)[name = tensor("op_3040_cast_fp16")]; + tensor var_3041_epsilon_0_to_fp16 = const()[name = tensor("op_3041_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_3041_cast_fp16 = rsqrt(epsilon = var_3041_epsilon_0_to_fp16, x = var_3040_cast_fp16)[name = tensor("op_3041_cast_fp16")]; + tensor hidden_states_83_cast_fp16 = mul(x = inputs_167_cast_fp16, y = var_3041_cast_fp16)[name = tensor("hidden_states_83_cast_fp16")]; + tensor w_83_to_fp16 = const()[name = tensor("w_83_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8116065280)))]; + tensor input_123_cast_fp16 = mul(x = w_83_to_fp16, y = hidden_states_83_cast_fp16)[name = tensor("input_123_cast_fp16")]; + tensor var_3054 = const()[name = tensor("op_3054"), val = tensor([1, 1])]; + tensor var_3056 = const()[name = tensor("op_3056"), val = tensor([1, 1])]; + tensor x_43_pad_type_0 = const()[name = tensor("x_43_pad_type_0"), val = tensor("custom")]; + tensor x_43_pad_0 = const()[name = tensor("x_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_20_layer_1_DenseReluDense_wi_0_weight_to_fp16 = const()[name = tensor("block_20_layer_1_DenseReluDense_wi_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8116073536)))]; + tensor x_43_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_3056, groups = var_2947, pad = x_43_pad_0, pad_type = x_43_pad_type_0, strides = var_3054, weight = block_20_layer_1_DenseReluDense_wi_0_weight_to_fp16, x = input_123_cast_fp16)[name = tensor("x_43_cast_fp16")]; + tensor var_3070_mode_0 = const()[name = tensor("op_3070_mode_0"), val = tensor("TANH_APPROXIMATION")]; + tensor var_3070_cast_fp16 = gelu(mode = var_3070_mode_0, x = x_43_cast_fp16)[name = tensor("op_3070_cast_fp16")]; + tensor var_3073 = const()[name = tensor("op_3073"), val = tensor([1, 1])]; + tensor var_3075 = const()[name = tensor("op_3075"), val = tensor([1, 1])]; + tensor var_3077_pad_type_0 = const()[name = tensor("op_3077_pad_type_0"), val = tensor("custom")]; + tensor var_3077_pad_0 = const()[name = tensor("op_3077_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_20_layer_1_DenseReluDense_wi_1_weight_to_fp16 = const()[name = tensor("block_20_layer_1_DenseReluDense_wi_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8199959680)))]; + tensor var_3077_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_3075, groups = var_2947, pad = var_3077_pad_0, pad_type = var_3077_pad_type_0, strides = var_3073, weight = block_20_layer_1_DenseReluDense_wi_1_weight_to_fp16, x = input_123_cast_fp16)[name = tensor("op_3077_cast_fp16")]; + tensor input_125_cast_fp16 = mul(x = var_3070_cast_fp16, y = var_3077_cast_fp16)[name = tensor("input_125_cast_fp16")]; + tensor var_3081 = const()[name = tensor("op_3081"), val = tensor([1, 1])]; + tensor var_3083 = const()[name = tensor("op_3083"), val = tensor([1, 1])]; + tensor var_3085_pad_type_0 = const()[name = tensor("op_3085_pad_type_0"), val = tensor("custom")]; + tensor var_3085_pad_0 = const()[name = tensor("op_3085_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_20_layer_1_DenseReluDense_wo_weight_to_fp16 = const()[name = tensor("block_20_layer_1_DenseReluDense_wo_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8283845824)))]; + tensor var_3085_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_3083, groups = var_2947, pad = var_3085_pad_0, pad_type = var_3085_pad_type_0, strides = var_3081, weight = block_20_layer_1_DenseReluDense_wo_weight_to_fp16, x = input_125_cast_fp16)[name = tensor("op_3085_cast_fp16")]; + tensor inputs_169_cast_fp16 = add(x = inputs_167_cast_fp16, y = var_3085_cast_fp16)[name = tensor("inputs_169_cast_fp16")]; + tensor var_3090 = const()[name = tensor("op_3090"), val = tensor(true)]; + tensor var_3091 = const()[name = tensor("op_3091"), val = tensor(1)]; + tensor var_3095 = const()[name = tensor("op_3095"), val = tensor(3)]; + tensor var_3093_to_fp16 = const()[name = tensor("op_3093_to_fp16"), val = tensor(-0x1.b18p+15)]; + tensor var_3092_to_fp16 = const()[name = tensor("op_3092_to_fp16"), val = tensor(0x1.b18p+15)]; + tensor inputs_171_cast_fp16 = clip(alpha = var_3093_to_fp16, beta = var_3092_to_fp16, x = inputs_169_cast_fp16)[name = tensor("inputs_171_cast_fp16")]; + tensor inputs_sq_85_cast_fp16 = mul(x = inputs_171_cast_fp16, y = inputs_171_cast_fp16)[name = tensor("inputs_sq_85_cast_fp16")]; + tensor var_3112 = const()[name = tensor("op_3112"), val = tensor([1])]; + tensor variance_85_cast_fp16 = reduce_mean(axes = var_3112, keep_dims = var_3090, x = inputs_sq_85_cast_fp16)[name = tensor("variance_85_cast_fp16")]; + tensor var_3114_to_fp16 = const()[name = tensor("op_3114_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_3115_cast_fp16 = add(x = variance_85_cast_fp16, y = var_3114_to_fp16)[name = tensor("op_3115_cast_fp16")]; + tensor var_3116_epsilon_0_to_fp16 = const()[name = tensor("op_3116_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_3116_cast_fp16 = rsqrt(epsilon = var_3116_epsilon_0_to_fp16, x = var_3115_cast_fp16)[name = tensor("op_3116_cast_fp16")]; + tensor hidden_states_85_cast_fp16 = mul(x = inputs_171_cast_fp16, y = var_3116_cast_fp16)[name = tensor("hidden_states_85_cast_fp16")]; + tensor w_85_to_fp16 = const()[name = tensor("w_85_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8367731968)))]; + tensor obj_87_cast_fp16 = mul(x = w_85_to_fp16, y = hidden_states_85_cast_fp16)[name = tensor("obj_87_cast_fp16")]; + tensor var_3130 = const()[name = tensor("op_3130"), val = tensor([1, 1])]; + tensor var_3132 = const()[name = tensor("op_3132"), val = tensor([1, 1])]; + tensor query_43_pad_type_0 = const()[name = tensor("query_43_pad_type_0"), val = tensor("custom")]; + tensor query_43_pad_0 = const()[name = tensor("query_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_21_layer_0_SelfAttention_q_proj_weight_to_fp16 = const()[name = tensor("block_21_layer_0_SelfAttention_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8367740224)))]; + tensor query_43_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_3132, groups = var_3091, pad = query_43_pad_0, pad_type = query_43_pad_type_0, strides = var_3130, weight = block_21_layer_0_SelfAttention_q_proj_weight_to_fp16, x = obj_87_cast_fp16)[name = tensor("query_43_cast_fp16")]; + tensor var_3136 = const()[name = tensor("op_3136"), val = tensor([1, 1])]; + tensor var_3138 = const()[name = tensor("op_3138"), val = tensor([1, 1])]; + tensor key_43_pad_type_0 = const()[name = tensor("key_43_pad_type_0"), val = tensor("custom")]; + tensor key_43_pad_0 = const()[name = tensor("key_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_21_layer_0_SelfAttention_k_proj_weight_to_fp16 = const()[name = tensor("block_21_layer_0_SelfAttention_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8401294720)))]; + tensor key_43_cast_fp16 = conv(dilations = var_3138, groups = var_3091, pad = key_43_pad_0, pad_type = key_43_pad_type_0, strides = var_3136, weight = block_21_layer_0_SelfAttention_k_proj_weight_to_fp16, x = obj_87_cast_fp16)[name = tensor("key_43_cast_fp16")]; + tensor var_3143 = const()[name = tensor("op_3143"), val = tensor([1, 1])]; + tensor var_3145 = const()[name = tensor("op_3145"), val = tensor([1, 1])]; + tensor value_43_pad_type_0 = const()[name = tensor("value_43_pad_type_0"), val = tensor("custom")]; + tensor value_43_pad_0 = const()[name = tensor("value_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_21_layer_0_SelfAttention_v_proj_weight_to_fp16 = const()[name = tensor("block_21_layer_0_SelfAttention_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8434849216)))]; + tensor value_43_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_3145, groups = var_3091, pad = value_43_pad_0, pad_type = value_43_pad_type_0, strides = var_3143, weight = block_21_layer_0_SelfAttention_v_proj_weight_to_fp16, x = obj_87_cast_fp16)[name = tensor("value_43_cast_fp16")]; + tensor var_3149 = const()[name = tensor("op_3149"), val = tensor([1, 64, 64, -1])]; + tensor var_3150_cast_fp16 = reshape(shape = var_3149, x = query_43_cast_fp16)[name = tensor("op_3150_cast_fp16")]; + tensor var_3151 = const()[name = tensor("op_3151"), val = tensor([1, 64, 64, -1])]; + tensor var_3152_cast_fp16 = reshape(shape = var_3151, x = key_43_cast_fp16)[name = tensor("op_3152_cast_fp16")]; + tensor mh_w_127_transpose_x_0 = const()[name = tensor("mh_w_127_transpose_x_0"), val = tensor(true)]; + tensor mh_w_127_transpose_y_0 = const()[name = tensor("mh_w_127_transpose_y_0"), val = tensor(false)]; + tensor mh_w_127_cast_fp16 = matmul(transpose_x = mh_w_127_transpose_x_0, transpose_y = mh_w_127_transpose_y_0, x = var_3150_cast_fp16, y = var_3152_cast_fp16)[name = tensor("mh_w_127_cast_fp16")]; + tensor mh_w_129_cast_fp16 = add(x = mh_w_127_cast_fp16, y = var_133_cast_fp16)[name = tensor("mh_w_129_cast_fp16")]; + tensor mh_w_131_cast_fp16 = add(x = mh_w_129_cast_fp16, y = relative_attention_bias_to_fp16)[name = tensor("mh_w_131_cast_fp16")]; + tensor var_3161_cast_fp16 = softmax(axis = var_3095, x = mh_w_131_cast_fp16)[name = tensor("op_3161_cast_fp16")]; + tensor var_3162 = const()[name = tensor("op_3162"), val = tensor([1, 64, 64, -1])]; + tensor var_3163_cast_fp16 = reshape(shape = var_3162, x = value_43_cast_fp16)[name = tensor("op_3163_cast_fp16")]; + tensor attn_43_transpose_x_0 = const()[name = tensor("attn_43_transpose_x_0"), val = tensor(false)]; + tensor attn_43_transpose_y_0 = const()[name = tensor("attn_43_transpose_y_0"), val = tensor(true)]; + tensor attn_43_cast_fp16 = matmul(transpose_x = attn_43_transpose_x_0, transpose_y = attn_43_transpose_y_0, x = var_3163_cast_fp16, y = var_3161_cast_fp16)[name = tensor("attn_43_cast_fp16")]; + tensor var_3166 = const()[name = tensor("op_3166"), val = tensor([1, 4096, 1, -1])]; + tensor input_127_cast_fp16 = reshape(shape = var_3166, x = attn_43_cast_fp16)[name = tensor("input_127_cast_fp16")]; + tensor var_3170 = const()[name = tensor("op_3170"), val = tensor([1, 1])]; + tensor var_3172 = const()[name = tensor("op_3172"), val = tensor([1, 1])]; + tensor obj_89_pad_type_0 = const()[name = tensor("obj_89_pad_type_0"), val = tensor("custom")]; + tensor obj_89_pad_0 = const()[name = tensor("obj_89_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_21_layer_0_SelfAttention_o_proj_weight_to_fp16 = const()[name = tensor("block_21_layer_0_SelfAttention_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8468403712)))]; + tensor obj_89_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_3172, groups = var_3091, pad = obj_89_pad_0, pad_type = obj_89_pad_type_0, strides = var_3170, weight = block_21_layer_0_SelfAttention_o_proj_weight_to_fp16, x = input_127_cast_fp16)[name = tensor("obj_89_cast_fp16")]; + tensor inputs_173_cast_fp16 = add(x = inputs_171_cast_fp16, y = obj_89_cast_fp16)[name = tensor("inputs_173_cast_fp16")]; + tensor inputs_175_cast_fp16 = clip(alpha = var_3093_to_fp16, beta = var_3092_to_fp16, x = inputs_173_cast_fp16)[name = tensor("inputs_175_cast_fp16")]; + tensor inputs_sq_87_cast_fp16 = mul(x = inputs_175_cast_fp16, y = inputs_175_cast_fp16)[name = tensor("inputs_sq_87_cast_fp16")]; + tensor var_3181 = const()[name = tensor("op_3181"), val = tensor([1])]; + tensor variance_87_cast_fp16 = reduce_mean(axes = var_3181, keep_dims = var_3090, x = inputs_sq_87_cast_fp16)[name = tensor("variance_87_cast_fp16")]; + tensor var_3183_to_fp16 = const()[name = tensor("op_3183_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_3184_cast_fp16 = add(x = variance_87_cast_fp16, y = var_3183_to_fp16)[name = tensor("op_3184_cast_fp16")]; + tensor var_3185_epsilon_0_to_fp16 = const()[name = tensor("op_3185_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_3185_cast_fp16 = rsqrt(epsilon = var_3185_epsilon_0_to_fp16, x = var_3184_cast_fp16)[name = tensor("op_3185_cast_fp16")]; + tensor hidden_states_87_cast_fp16 = mul(x = inputs_175_cast_fp16, y = var_3185_cast_fp16)[name = tensor("hidden_states_87_cast_fp16")]; + tensor w_87_to_fp16 = const()[name = tensor("w_87_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8501958208)))]; + tensor input_129_cast_fp16 = mul(x = w_87_to_fp16, y = hidden_states_87_cast_fp16)[name = tensor("input_129_cast_fp16")]; + tensor var_3198 = const()[name = tensor("op_3198"), val = tensor([1, 1])]; + tensor var_3200 = const()[name = tensor("op_3200"), val = tensor([1, 1])]; + tensor x_45_pad_type_0 = const()[name = tensor("x_45_pad_type_0"), val = tensor("custom")]; + tensor x_45_pad_0 = const()[name = tensor("x_45_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_21_layer_1_DenseReluDense_wi_0_weight_to_fp16 = const()[name = tensor("block_21_layer_1_DenseReluDense_wi_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8501966464)))]; + tensor x_45_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_3200, groups = var_3091, pad = x_45_pad_0, pad_type = x_45_pad_type_0, strides = var_3198, weight = block_21_layer_1_DenseReluDense_wi_0_weight_to_fp16, x = input_129_cast_fp16)[name = tensor("x_45_cast_fp16")]; + tensor var_3214_mode_0 = const()[name = tensor("op_3214_mode_0"), val = tensor("TANH_APPROXIMATION")]; + tensor var_3214_cast_fp16 = gelu(mode = var_3214_mode_0, x = x_45_cast_fp16)[name = tensor("op_3214_cast_fp16")]; + tensor var_3217 = const()[name = tensor("op_3217"), val = tensor([1, 1])]; + tensor var_3219 = const()[name = tensor("op_3219"), val = tensor([1, 1])]; + tensor var_3221_pad_type_0 = const()[name = tensor("op_3221_pad_type_0"), val = tensor("custom")]; + tensor var_3221_pad_0 = const()[name = tensor("op_3221_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_21_layer_1_DenseReluDense_wi_1_weight_to_fp16 = const()[name = tensor("block_21_layer_1_DenseReluDense_wi_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8585852608)))]; + tensor var_3221_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_3219, groups = var_3091, pad = var_3221_pad_0, pad_type = var_3221_pad_type_0, strides = var_3217, weight = block_21_layer_1_DenseReluDense_wi_1_weight_to_fp16, x = input_129_cast_fp16)[name = tensor("op_3221_cast_fp16")]; + tensor input_131_cast_fp16 = mul(x = var_3214_cast_fp16, y = var_3221_cast_fp16)[name = tensor("input_131_cast_fp16")]; + tensor var_3225 = const()[name = tensor("op_3225"), val = tensor([1, 1])]; + tensor var_3227 = const()[name = tensor("op_3227"), val = tensor([1, 1])]; + tensor var_3229_pad_type_0 = const()[name = tensor("op_3229_pad_type_0"), val = tensor("custom")]; + tensor var_3229_pad_0 = const()[name = tensor("op_3229_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_21_layer_1_DenseReluDense_wo_weight_to_fp16 = const()[name = tensor("block_21_layer_1_DenseReluDense_wo_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8669738752)))]; + tensor var_3229_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_3227, groups = var_3091, pad = var_3229_pad_0, pad_type = var_3229_pad_type_0, strides = var_3225, weight = block_21_layer_1_DenseReluDense_wo_weight_to_fp16, x = input_131_cast_fp16)[name = tensor("op_3229_cast_fp16")]; + tensor inputs_177_cast_fp16 = add(x = inputs_175_cast_fp16, y = var_3229_cast_fp16)[name = tensor("inputs_177_cast_fp16")]; + tensor var_3234 = const()[name = tensor("op_3234"), val = tensor(true)]; + tensor var_3235 = const()[name = tensor("op_3235"), val = tensor(1)]; + tensor var_3239 = const()[name = tensor("op_3239"), val = tensor(3)]; + tensor var_3237_to_fp16 = const()[name = tensor("op_3237_to_fp16"), val = tensor(-0x1.b18p+15)]; + tensor var_3236_to_fp16 = const()[name = tensor("op_3236_to_fp16"), val = tensor(0x1.b18p+15)]; + tensor inputs_179_cast_fp16 = clip(alpha = var_3237_to_fp16, beta = var_3236_to_fp16, x = inputs_177_cast_fp16)[name = tensor("inputs_179_cast_fp16")]; + tensor inputs_sq_89_cast_fp16 = mul(x = inputs_179_cast_fp16, y = inputs_179_cast_fp16)[name = tensor("inputs_sq_89_cast_fp16")]; + tensor var_3256 = const()[name = tensor("op_3256"), val = tensor([1])]; + tensor variance_89_cast_fp16 = reduce_mean(axes = var_3256, keep_dims = var_3234, x = inputs_sq_89_cast_fp16)[name = tensor("variance_89_cast_fp16")]; + tensor var_3258_to_fp16 = const()[name = tensor("op_3258_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_3259_cast_fp16 = add(x = variance_89_cast_fp16, y = var_3258_to_fp16)[name = tensor("op_3259_cast_fp16")]; + tensor var_3260_epsilon_0_to_fp16 = const()[name = tensor("op_3260_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_3260_cast_fp16 = rsqrt(epsilon = var_3260_epsilon_0_to_fp16, x = var_3259_cast_fp16)[name = tensor("op_3260_cast_fp16")]; + tensor hidden_states_89_cast_fp16 = mul(x = inputs_179_cast_fp16, y = var_3260_cast_fp16)[name = tensor("hidden_states_89_cast_fp16")]; + tensor w_89_to_fp16 = const()[name = tensor("w_89_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8753624896)))]; + tensor obj_91_cast_fp16 = mul(x = w_89_to_fp16, y = hidden_states_89_cast_fp16)[name = tensor("obj_91_cast_fp16")]; + tensor var_3274 = const()[name = tensor("op_3274"), val = tensor([1, 1])]; + tensor var_3276 = const()[name = tensor("op_3276"), val = tensor([1, 1])]; + tensor query_45_pad_type_0 = const()[name = tensor("query_45_pad_type_0"), val = tensor("custom")]; + tensor query_45_pad_0 = const()[name = tensor("query_45_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_22_layer_0_SelfAttention_q_proj_weight_to_fp16 = const()[name = tensor("block_22_layer_0_SelfAttention_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8753633152)))]; + tensor query_45_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_3276, groups = var_3235, pad = query_45_pad_0, pad_type = query_45_pad_type_0, strides = var_3274, weight = block_22_layer_0_SelfAttention_q_proj_weight_to_fp16, x = obj_91_cast_fp16)[name = tensor("query_45_cast_fp16")]; + tensor var_3280 = const()[name = tensor("op_3280"), val = tensor([1, 1])]; + tensor var_3282 = const()[name = tensor("op_3282"), val = tensor([1, 1])]; + tensor key_45_pad_type_0 = const()[name = tensor("key_45_pad_type_0"), val = tensor("custom")]; + tensor key_45_pad_0 = const()[name = tensor("key_45_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_22_layer_0_SelfAttention_k_proj_weight_to_fp16 = const()[name = tensor("block_22_layer_0_SelfAttention_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8787187648)))]; + tensor key_45_cast_fp16 = conv(dilations = var_3282, groups = var_3235, pad = key_45_pad_0, pad_type = key_45_pad_type_0, strides = var_3280, weight = block_22_layer_0_SelfAttention_k_proj_weight_to_fp16, x = obj_91_cast_fp16)[name = tensor("key_45_cast_fp16")]; + tensor var_3287 = const()[name = tensor("op_3287"), val = tensor([1, 1])]; + tensor var_3289 = const()[name = tensor("op_3289"), val = tensor([1, 1])]; + tensor value_45_pad_type_0 = const()[name = tensor("value_45_pad_type_0"), val = tensor("custom")]; + tensor value_45_pad_0 = const()[name = tensor("value_45_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_22_layer_0_SelfAttention_v_proj_weight_to_fp16 = const()[name = tensor("block_22_layer_0_SelfAttention_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8820742144)))]; + tensor value_45_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_3289, groups = var_3235, pad = value_45_pad_0, pad_type = value_45_pad_type_0, strides = var_3287, weight = block_22_layer_0_SelfAttention_v_proj_weight_to_fp16, x = obj_91_cast_fp16)[name = tensor("value_45_cast_fp16")]; + tensor var_3293 = const()[name = tensor("op_3293"), val = tensor([1, 64, 64, -1])]; + tensor var_3294_cast_fp16 = reshape(shape = var_3293, x = query_45_cast_fp16)[name = tensor("op_3294_cast_fp16")]; + tensor var_3295 = const()[name = tensor("op_3295"), val = tensor([1, 64, 64, -1])]; + tensor var_3296_cast_fp16 = reshape(shape = var_3295, x = key_45_cast_fp16)[name = tensor("op_3296_cast_fp16")]; + tensor mh_w_133_transpose_x_0 = const()[name = tensor("mh_w_133_transpose_x_0"), val = tensor(true)]; + tensor mh_w_133_transpose_y_0 = const()[name = tensor("mh_w_133_transpose_y_0"), val = tensor(false)]; + tensor mh_w_133_cast_fp16 = matmul(transpose_x = mh_w_133_transpose_x_0, transpose_y = mh_w_133_transpose_y_0, x = var_3294_cast_fp16, y = var_3296_cast_fp16)[name = tensor("mh_w_133_cast_fp16")]; + tensor mh_w_135_cast_fp16 = add(x = mh_w_133_cast_fp16, y = var_133_cast_fp16)[name = tensor("mh_w_135_cast_fp16")]; + tensor mh_w_137_cast_fp16 = add(x = mh_w_135_cast_fp16, y = relative_attention_bias_to_fp16)[name = tensor("mh_w_137_cast_fp16")]; + tensor var_3305_cast_fp16 = softmax(axis = var_3239, x = mh_w_137_cast_fp16)[name = tensor("op_3305_cast_fp16")]; + tensor var_3306 = const()[name = tensor("op_3306"), val = tensor([1, 64, 64, -1])]; + tensor var_3307_cast_fp16 = reshape(shape = var_3306, x = value_45_cast_fp16)[name = tensor("op_3307_cast_fp16")]; + tensor attn_45_transpose_x_0 = const()[name = tensor("attn_45_transpose_x_0"), val = tensor(false)]; + tensor attn_45_transpose_y_0 = const()[name = tensor("attn_45_transpose_y_0"), val = tensor(true)]; + tensor attn_45_cast_fp16 = matmul(transpose_x = attn_45_transpose_x_0, transpose_y = attn_45_transpose_y_0, x = var_3307_cast_fp16, y = var_3305_cast_fp16)[name = tensor("attn_45_cast_fp16")]; + tensor var_3310 = const()[name = tensor("op_3310"), val = tensor([1, 4096, 1, -1])]; + tensor input_133_cast_fp16 = reshape(shape = var_3310, x = attn_45_cast_fp16)[name = tensor("input_133_cast_fp16")]; + tensor var_3314 = const()[name = tensor("op_3314"), val = tensor([1, 1])]; + tensor var_3316 = const()[name = tensor("op_3316"), val = tensor([1, 1])]; + tensor obj_93_pad_type_0 = const()[name = tensor("obj_93_pad_type_0"), val = tensor("custom")]; + tensor obj_93_pad_0 = const()[name = tensor("obj_93_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_22_layer_0_SelfAttention_o_proj_weight_to_fp16 = const()[name = tensor("block_22_layer_0_SelfAttention_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8854296640)))]; + tensor obj_93_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_3316, groups = var_3235, pad = obj_93_pad_0, pad_type = obj_93_pad_type_0, strides = var_3314, weight = block_22_layer_0_SelfAttention_o_proj_weight_to_fp16, x = input_133_cast_fp16)[name = tensor("obj_93_cast_fp16")]; + tensor inputs_181_cast_fp16 = add(x = inputs_179_cast_fp16, y = obj_93_cast_fp16)[name = tensor("inputs_181_cast_fp16")]; + tensor inputs_183_cast_fp16 = clip(alpha = var_3237_to_fp16, beta = var_3236_to_fp16, x = inputs_181_cast_fp16)[name = tensor("inputs_183_cast_fp16")]; + tensor inputs_sq_91_cast_fp16 = mul(x = inputs_183_cast_fp16, y = inputs_183_cast_fp16)[name = tensor("inputs_sq_91_cast_fp16")]; + tensor var_3325 = const()[name = tensor("op_3325"), val = tensor([1])]; + tensor variance_91_cast_fp16 = reduce_mean(axes = var_3325, keep_dims = var_3234, x = inputs_sq_91_cast_fp16)[name = tensor("variance_91_cast_fp16")]; + tensor var_3327_to_fp16 = const()[name = tensor("op_3327_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_3328_cast_fp16 = add(x = variance_91_cast_fp16, y = var_3327_to_fp16)[name = tensor("op_3328_cast_fp16")]; + tensor var_3329_epsilon_0_to_fp16 = const()[name = tensor("op_3329_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_3329_cast_fp16 = rsqrt(epsilon = var_3329_epsilon_0_to_fp16, x = var_3328_cast_fp16)[name = tensor("op_3329_cast_fp16")]; + tensor hidden_states_91_cast_fp16 = mul(x = inputs_183_cast_fp16, y = var_3329_cast_fp16)[name = tensor("hidden_states_91_cast_fp16")]; + tensor w_91_to_fp16 = const()[name = tensor("w_91_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8887851136)))]; + tensor input_135_cast_fp16 = mul(x = w_91_to_fp16, y = hidden_states_91_cast_fp16)[name = tensor("input_135_cast_fp16")]; + tensor var_3342 = const()[name = tensor("op_3342"), val = tensor([1, 1])]; + tensor var_3344 = const()[name = tensor("op_3344"), val = tensor([1, 1])]; + tensor x_47_pad_type_0 = const()[name = tensor("x_47_pad_type_0"), val = tensor("custom")]; + tensor x_47_pad_0 = const()[name = tensor("x_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_22_layer_1_DenseReluDense_wi_0_weight_to_fp16 = const()[name = tensor("block_22_layer_1_DenseReluDense_wi_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8887859392)))]; + tensor x_47_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_3344, groups = var_3235, pad = x_47_pad_0, pad_type = x_47_pad_type_0, strides = var_3342, weight = block_22_layer_1_DenseReluDense_wi_0_weight_to_fp16, x = input_135_cast_fp16)[name = tensor("x_47_cast_fp16")]; + tensor var_3358_mode_0 = const()[name = tensor("op_3358_mode_0"), val = tensor("TANH_APPROXIMATION")]; + tensor var_3358_cast_fp16 = gelu(mode = var_3358_mode_0, x = x_47_cast_fp16)[name = tensor("op_3358_cast_fp16")]; + tensor var_3361 = const()[name = tensor("op_3361"), val = tensor([1, 1])]; + tensor var_3363 = const()[name = tensor("op_3363"), val = tensor([1, 1])]; + tensor var_3365_pad_type_0 = const()[name = tensor("op_3365_pad_type_0"), val = tensor("custom")]; + tensor var_3365_pad_0 = const()[name = tensor("op_3365_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_22_layer_1_DenseReluDense_wi_1_weight_to_fp16 = const()[name = tensor("block_22_layer_1_DenseReluDense_wi_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8971745536)))]; + tensor var_3365_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_3363, groups = var_3235, pad = var_3365_pad_0, pad_type = var_3365_pad_type_0, strides = var_3361, weight = block_22_layer_1_DenseReluDense_wi_1_weight_to_fp16, x = input_135_cast_fp16)[name = tensor("op_3365_cast_fp16")]; + tensor input_137_cast_fp16 = mul(x = var_3358_cast_fp16, y = var_3365_cast_fp16)[name = tensor("input_137_cast_fp16")]; + tensor var_3369 = const()[name = tensor("op_3369"), val = tensor([1, 1])]; + tensor var_3371 = const()[name = tensor("op_3371"), val = tensor([1, 1])]; + tensor var_3373_pad_type_0 = const()[name = tensor("op_3373_pad_type_0"), val = tensor("custom")]; + tensor var_3373_pad_0 = const()[name = tensor("op_3373_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_22_layer_1_DenseReluDense_wo_weight_to_fp16 = const()[name = tensor("block_22_layer_1_DenseReluDense_wo_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9055631680)))]; + tensor var_3373_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_3371, groups = var_3235, pad = var_3373_pad_0, pad_type = var_3373_pad_type_0, strides = var_3369, weight = block_22_layer_1_DenseReluDense_wo_weight_to_fp16, x = input_137_cast_fp16)[name = tensor("op_3373_cast_fp16")]; + tensor inputs_185_cast_fp16 = add(x = inputs_183_cast_fp16, y = var_3373_cast_fp16)[name = tensor("inputs_185_cast_fp16")]; + tensor var_3378 = const()[name = tensor("op_3378"), val = tensor(true)]; + tensor var_3379 = const()[name = tensor("op_3379"), val = tensor(1)]; + tensor var_3383 = const()[name = tensor("op_3383"), val = tensor(3)]; + tensor var_3381_to_fp16 = const()[name = tensor("op_3381_to_fp16"), val = tensor(-0x1.b18p+15)]; + tensor var_3380_to_fp16 = const()[name = tensor("op_3380_to_fp16"), val = tensor(0x1.b18p+15)]; + tensor inputs_187_cast_fp16 = clip(alpha = var_3381_to_fp16, beta = var_3380_to_fp16, x = inputs_185_cast_fp16)[name = tensor("inputs_187_cast_fp16")]; + tensor inputs_sq_93_cast_fp16 = mul(x = inputs_187_cast_fp16, y = inputs_187_cast_fp16)[name = tensor("inputs_sq_93_cast_fp16")]; + tensor var_3400 = const()[name = tensor("op_3400"), val = tensor([1])]; + tensor variance_93_cast_fp16 = reduce_mean(axes = var_3400, keep_dims = var_3378, x = inputs_sq_93_cast_fp16)[name = tensor("variance_93_cast_fp16")]; + tensor var_3402_to_fp16 = const()[name = tensor("op_3402_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_3403_cast_fp16 = add(x = variance_93_cast_fp16, y = var_3402_to_fp16)[name = tensor("op_3403_cast_fp16")]; + tensor var_3404_epsilon_0_to_fp16 = const()[name = tensor("op_3404_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_3404_cast_fp16 = rsqrt(epsilon = var_3404_epsilon_0_to_fp16, x = var_3403_cast_fp16)[name = tensor("op_3404_cast_fp16")]; + tensor hidden_states_93_cast_fp16 = mul(x = inputs_187_cast_fp16, y = var_3404_cast_fp16)[name = tensor("hidden_states_93_cast_fp16")]; + tensor w_93_to_fp16 = const()[name = tensor("w_93_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9139517824)))]; + tensor obj_95_cast_fp16 = mul(x = w_93_to_fp16, y = hidden_states_93_cast_fp16)[name = tensor("obj_95_cast_fp16")]; + tensor var_3418 = const()[name = tensor("op_3418"), val = tensor([1, 1])]; + tensor var_3420 = const()[name = tensor("op_3420"), val = tensor([1, 1])]; + tensor query_pad_type_0 = const()[name = tensor("query_pad_type_0"), val = tensor("custom")]; + tensor query_pad_0 = const()[name = tensor("query_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_23_layer_0_SelfAttention_q_proj_weight_to_fp16 = const()[name = tensor("block_23_layer_0_SelfAttention_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9139526080)))]; + tensor query_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_3420, groups = var_3379, pad = query_pad_0, pad_type = query_pad_type_0, strides = var_3418, weight = block_23_layer_0_SelfAttention_q_proj_weight_to_fp16, x = obj_95_cast_fp16)[name = tensor("query_cast_fp16")]; + tensor var_3424 = const()[name = tensor("op_3424"), val = tensor([1, 1])]; + tensor var_3426 = const()[name = tensor("op_3426"), val = tensor([1, 1])]; + tensor key_pad_type_0 = const()[name = tensor("key_pad_type_0"), val = tensor("custom")]; + tensor key_pad_0 = const()[name = tensor("key_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_23_layer_0_SelfAttention_k_proj_weight_to_fp16 = const()[name = tensor("block_23_layer_0_SelfAttention_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9173080576)))]; + tensor key_cast_fp16 = conv(dilations = var_3426, groups = var_3379, pad = key_pad_0, pad_type = key_pad_type_0, strides = var_3424, weight = block_23_layer_0_SelfAttention_k_proj_weight_to_fp16, x = obj_95_cast_fp16)[name = tensor("key_cast_fp16")]; + tensor var_3431 = const()[name = tensor("op_3431"), val = tensor([1, 1])]; + tensor var_3433 = const()[name = tensor("op_3433"), val = tensor([1, 1])]; + tensor value_pad_type_0 = const()[name = tensor("value_pad_type_0"), val = tensor("custom")]; + tensor value_pad_0 = const()[name = tensor("value_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_23_layer_0_SelfAttention_v_proj_weight_to_fp16 = const()[name = tensor("block_23_layer_0_SelfAttention_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9206635072)))]; + tensor value_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_3433, groups = var_3379, pad = value_pad_0, pad_type = value_pad_type_0, strides = var_3431, weight = block_23_layer_0_SelfAttention_v_proj_weight_to_fp16, x = obj_95_cast_fp16)[name = tensor("value_cast_fp16")]; + tensor var_3437 = const()[name = tensor("op_3437"), val = tensor([1, 64, 64, -1])]; + tensor var_3438_cast_fp16 = reshape(shape = var_3437, x = query_cast_fp16)[name = tensor("op_3438_cast_fp16")]; + tensor var_3439 = const()[name = tensor("op_3439"), val = tensor([1, 64, 64, -1])]; + tensor var_3440_cast_fp16 = reshape(shape = var_3439, x = key_cast_fp16)[name = tensor("op_3440_cast_fp16")]; + tensor mh_w_139_transpose_x_0 = const()[name = tensor("mh_w_139_transpose_x_0"), val = tensor(true)]; + tensor mh_w_139_transpose_y_0 = const()[name = tensor("mh_w_139_transpose_y_0"), val = tensor(false)]; + tensor mh_w_139_cast_fp16 = matmul(transpose_x = mh_w_139_transpose_x_0, transpose_y = mh_w_139_transpose_y_0, x = var_3438_cast_fp16, y = var_3440_cast_fp16)[name = tensor("mh_w_139_cast_fp16")]; + tensor mh_w_141_cast_fp16 = add(x = mh_w_139_cast_fp16, y = var_133_cast_fp16)[name = tensor("mh_w_141_cast_fp16")]; + tensor mh_w_cast_fp16 = add(x = mh_w_141_cast_fp16, y = relative_attention_bias_to_fp16)[name = tensor("mh_w_cast_fp16")]; + tensor var_3449_cast_fp16 = softmax(axis = var_3383, x = mh_w_cast_fp16)[name = tensor("op_3449_cast_fp16")]; + tensor var_3450 = const()[name = tensor("op_3450"), val = tensor([1, 64, 64, -1])]; + tensor var_3451_cast_fp16 = reshape(shape = var_3450, x = value_cast_fp16)[name = tensor("op_3451_cast_fp16")]; + tensor attn_transpose_x_0 = const()[name = tensor("attn_transpose_x_0"), val = tensor(false)]; + tensor attn_transpose_y_0 = const()[name = tensor("attn_transpose_y_0"), val = tensor(true)]; + tensor attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = var_3451_cast_fp16, y = var_3449_cast_fp16)[name = tensor("attn_cast_fp16")]; + tensor var_3454 = const()[name = tensor("op_3454"), val = tensor([1, 4096, 1, -1])]; + tensor input_139_cast_fp16 = reshape(shape = var_3454, x = attn_cast_fp16)[name = tensor("input_139_cast_fp16")]; + tensor var_3458 = const()[name = tensor("op_3458"), val = tensor([1, 1])]; + tensor var_3460 = const()[name = tensor("op_3460"), val = tensor([1, 1])]; + tensor obj_pad_type_0 = const()[name = tensor("obj_pad_type_0"), val = tensor("custom")]; + tensor obj_pad_0 = const()[name = tensor("obj_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_23_layer_0_SelfAttention_o_proj_weight_to_fp16 = const()[name = tensor("block_23_layer_0_SelfAttention_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9240189568)))]; + tensor obj_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_3460, groups = var_3379, pad = obj_pad_0, pad_type = obj_pad_type_0, strides = var_3458, weight = block_23_layer_0_SelfAttention_o_proj_weight_to_fp16, x = input_139_cast_fp16)[name = tensor("obj_cast_fp16")]; + tensor inputs_189_cast_fp16 = add(x = inputs_187_cast_fp16, y = obj_cast_fp16)[name = tensor("inputs_189_cast_fp16")]; + tensor inputs_191_cast_fp16 = clip(alpha = var_3381_to_fp16, beta = var_3380_to_fp16, x = inputs_189_cast_fp16)[name = tensor("inputs_191_cast_fp16")]; + tensor inputs_sq_95_cast_fp16 = mul(x = inputs_191_cast_fp16, y = inputs_191_cast_fp16)[name = tensor("inputs_sq_95_cast_fp16")]; + tensor var_3469 = const()[name = tensor("op_3469"), val = tensor([1])]; + tensor variance_95_cast_fp16 = reduce_mean(axes = var_3469, keep_dims = var_3378, x = inputs_sq_95_cast_fp16)[name = tensor("variance_95_cast_fp16")]; + tensor var_3471_to_fp16 = const()[name = tensor("op_3471_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_3472_cast_fp16 = add(x = variance_95_cast_fp16, y = var_3471_to_fp16)[name = tensor("op_3472_cast_fp16")]; + tensor var_3473_epsilon_0_to_fp16 = const()[name = tensor("op_3473_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_3473_cast_fp16 = rsqrt(epsilon = var_3473_epsilon_0_to_fp16, x = var_3472_cast_fp16)[name = tensor("op_3473_cast_fp16")]; + tensor hidden_states_95_cast_fp16 = mul(x = inputs_191_cast_fp16, y = var_3473_cast_fp16)[name = tensor("hidden_states_95_cast_fp16")]; + tensor w_95_to_fp16 = const()[name = tensor("w_95_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9273744064)))]; + tensor input_141_cast_fp16 = mul(x = w_95_to_fp16, y = hidden_states_95_cast_fp16)[name = tensor("input_141_cast_fp16")]; + tensor var_3486 = const()[name = tensor("op_3486"), val = tensor([1, 1])]; + tensor var_3488 = const()[name = tensor("op_3488"), val = tensor([1, 1])]; + tensor x_pad_type_0 = const()[name = tensor("x_pad_type_0"), val = tensor("custom")]; + tensor x_pad_0 = const()[name = tensor("x_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_23_layer_1_DenseReluDense_wi_0_weight_to_fp16 = const()[name = tensor("block_23_layer_1_DenseReluDense_wi_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9273752320)))]; + tensor x_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_3488, groups = var_3379, pad = x_pad_0, pad_type = x_pad_type_0, strides = var_3486, weight = block_23_layer_1_DenseReluDense_wi_0_weight_to_fp16, x = input_141_cast_fp16)[name = tensor("x_cast_fp16")]; + tensor var_3502_mode_0 = const()[name = tensor("op_3502_mode_0"), val = tensor("TANH_APPROXIMATION")]; + tensor var_3502_cast_fp16 = gelu(mode = var_3502_mode_0, x = x_cast_fp16)[name = tensor("op_3502_cast_fp16")]; + tensor var_3505 = const()[name = tensor("op_3505"), val = tensor([1, 1])]; + tensor var_3507 = const()[name = tensor("op_3507"), val = tensor([1, 1])]; + tensor var_3509_pad_type_0 = const()[name = tensor("op_3509_pad_type_0"), val = tensor("custom")]; + tensor var_3509_pad_0 = const()[name = tensor("op_3509_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_23_layer_1_DenseReluDense_wi_1_weight_to_fp16 = const()[name = tensor("block_23_layer_1_DenseReluDense_wi_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9357638464)))]; + tensor var_3509_cast_fp16 = conv(bias = block_0_layer_1_DenseReluDense_wi_0_bias_to_fp16, dilations = var_3507, groups = var_3379, pad = var_3509_pad_0, pad_type = var_3509_pad_type_0, strides = var_3505, weight = block_23_layer_1_DenseReluDense_wi_1_weight_to_fp16, x = input_141_cast_fp16)[name = tensor("op_3509_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_3502_cast_fp16, y = var_3509_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_3513 = const()[name = tensor("op_3513"), val = tensor([1, 1])]; + tensor var_3515 = const()[name = tensor("op_3515"), val = tensor([1, 1])]; + tensor var_3517_pad_type_0 = const()[name = tensor("op_3517_pad_type_0"), val = tensor("custom")]; + tensor var_3517_pad_0 = const()[name = tensor("op_3517_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor block_23_layer_1_DenseReluDense_wo_weight_to_fp16 = const()[name = tensor("block_23_layer_1_DenseReluDense_wo_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9441524608)))]; + tensor var_3517_cast_fp16 = conv(bias = block_0_layer_0_SelfAttention_q_proj_bias_to_fp16, dilations = var_3515, groups = var_3379, pad = var_3517_pad_0, pad_type = var_3517_pad_type_0, strides = var_3513, weight = block_23_layer_1_DenseReluDense_wo_weight_to_fp16, x = input_cast_fp16)[name = tensor("op_3517_cast_fp16")]; + tensor inputs_193_cast_fp16 = add(x = inputs_191_cast_fp16, y = var_3517_cast_fp16)[name = tensor("inputs_193_cast_fp16")]; + tensor var_3521 = const()[name = tensor("op_3521"), val = tensor(true)]; + tensor var_3524_to_fp16 = const()[name = tensor("op_3524_to_fp16"), val = tensor(-0x1.b18p+15)]; + tensor var_3523_to_fp16 = const()[name = tensor("op_3523_to_fp16"), val = tensor(0x1.b18p+15)]; + tensor inputs_cast_fp16 = clip(alpha = var_3524_to_fp16, beta = var_3523_to_fp16, x = inputs_193_cast_fp16)[name = tensor("inputs_cast_fp16")]; + tensor inputs_sq_cast_fp16 = mul(x = inputs_cast_fp16, y = inputs_cast_fp16)[name = tensor("inputs_sq_cast_fp16")]; + tensor var_3528 = const()[name = tensor("op_3528"), val = tensor([1])]; + tensor variance_cast_fp16 = reduce_mean(axes = var_3528, keep_dims = var_3521, x = inputs_sq_cast_fp16)[name = tensor("variance_cast_fp16")]; + tensor var_3530_to_fp16 = const()[name = tensor("op_3530_to_fp16"), val = tensor(0x1.1p-20)]; + tensor var_3531_cast_fp16 = add(x = variance_cast_fp16, y = var_3530_to_fp16)[name = tensor("op_3531_cast_fp16")]; + tensor var_3532_epsilon_0_to_fp16 = const()[name = tensor("op_3532_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor var_3532_cast_fp16 = rsqrt(epsilon = var_3532_epsilon_0_to_fp16, x = var_3531_cast_fp16)[name = tensor("op_3532_cast_fp16")]; + tensor hidden_states_cast_fp16 = mul(x = inputs_cast_fp16, y = var_3532_cast_fp16)[name = tensor("hidden_states_cast_fp16")]; + tensor w_to_fp16 = const()[name = tensor("w_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9525410752)))]; + tensor encoder_hidden_states = mul(x = w_to_fp16, y = hidden_states_cast_fp16)[name = tensor("op_3536_cast_fp16")]; + } -> (encoder_hidden_states); +} \ No newline at end of file