diff --git "a/MelEncoder.mlmodelc/model.mil" "b/MelEncoder.mlmodelc/model.mil" --- "a/MelEncoder.mlmodelc/model.mil" +++ "b/MelEncoder.mlmodelc/model.mil" @@ -13,7 +13,7 @@ program(1.0) tensor var_37_promoted_to_fp16 = const()[name = tensor("op_37_promoted_to_fp16"), val = tensor(0x1p+0)]; tensor floor_div_0_to_fp16 = cast(dtype = var_36_to_fp16_dtype_0, x = floor_div_0)[name = tensor("cast_6")]; tensor seq_len_1_cast_fp16 = add(x = floor_div_0_to_fp16, y = var_37_promoted_to_fp16)[name = tensor("seq_len_1_cast_fp16")]; - tensor cast_2_dtype_0 = const()[name = tensor("cast_2_dtype_0"), val = tensor("int32")]; + tensor seq_len_dtype_0 = const()[name = tensor("seq_len_dtype_0"), val = tensor("int32")]; tensor var_41_begin_0 = const()[name = tensor("op_41_begin_0"), val = tensor([0, 0])]; tensor var_41_end_0 = const()[name = tensor("op_41_end_0"), val = tensor([1, 1])]; tensor var_41_end_mask_0 = const()[name = tensor("op_41_end_mask_0"), val = tensor([true, false])]; @@ -51,14 +51,14 @@ program(1.0) tensor conv_0_pad_0 = const()[name = tensor("conv_0_pad_0"), val = tensor([0, 0])]; tensor conv_0_dilations_0 = const()[name = tensor("conv_0_dilations_0"), val = tensor([1])]; tensor conv_0_groups_0 = const()[name = tensor("conv_0_groups_0"), val = tensor(1)]; - tensor expand_dims_4_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131712))), name = tensor("expand_dims_4_to_fp16_palettized"), shape = tensor([257, 1, 512])]; - tensor conv_0_cast_fp16 = conv(dilations = conv_0_dilations_0, groups = conv_0_groups_0, pad = conv_0_pad_0, pad_type = conv_0_pad_type_0, strides = expand_dims_6, weight = expand_dims_4_to_fp16_palettized, x = expand_dims_7_cast_fp16)[name = tensor("conv_0_cast_fp16")]; + tensor expand_dims_4_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("expand_dims_4_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(132096))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131712)))]; + tensor conv_0_cast_fp16 = conv(dilations = conv_0_dilations_0, groups = conv_0_groups_0, pad = conv_0_pad_0, pad_type = conv_0_pad_type_0, strides = expand_dims_6, weight = expand_dims_4_to_fp16_quantized, x = expand_dims_7_cast_fp16)[name = tensor("conv_0_cast_fp16")]; tensor conv_1_pad_type_0 = const()[name = tensor("conv_1_pad_type_0"), val = tensor("valid")]; tensor conv_1_pad_0 = const()[name = tensor("conv_1_pad_0"), val = tensor([0, 0])]; tensor conv_1_dilations_0 = const()[name = tensor("conv_1_dilations_0"), val = tensor([1])]; tensor conv_1_groups_0 = const()[name = tensor("conv_1_groups_0"), val = tensor(1)]; - tensor expand_dims_5_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(132288))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263936))), name = tensor("expand_dims_5_to_fp16_palettized"), shape = tensor([257, 1, 512])]; - tensor conv_1_cast_fp16 = conv(dilations = conv_1_dilations_0, groups = conv_1_groups_0, pad = conv_1_pad_0, pad_type = conv_1_pad_type_0, strides = expand_dims_6, weight = expand_dims_5_to_fp16_palettized, x = expand_dims_7_cast_fp16)[name = tensor("conv_1_cast_fp16")]; + tensor expand_dims_5_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("expand_dims_5_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(132736))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(264768))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(264384)))]; + tensor conv_1_cast_fp16 = conv(dilations = conv_1_dilations_0, groups = conv_1_groups_0, pad = conv_1_pad_0, pad_type = conv_1_pad_type_0, strides = expand_dims_6, weight = expand_dims_5_to_fp16_quantized, x = expand_dims_7_cast_fp16)[name = tensor("conv_1_cast_fp16")]; tensor stack_0_axis_0 = const()[name = tensor("stack_0_axis_0"), val = tensor(-1)]; tensor stack_0_cast_fp16 = stack(axis = stack_0_axis_0, values = (conv_0_cast_fp16, conv_1_cast_fp16))[name = tensor("stack_0_cast_fp16")]; tensor var_15_promoted_to_fp16 = const()[name = tensor("op_15_promoted_to_fp16"), val = tensor(0x1p+1)]; @@ -68,30 +68,30 @@ program(1.0) tensor var_67_cast_fp16 = reduce_sum(axes = var_67_axes_0, keep_dims = var_67_keep_dims_0, x = var_65_cast_fp16)[name = tensor("op_67_cast_fp16")]; tensor x_11_transpose_x_0 = const()[name = tensor("x_11_transpose_x_0"), val = tensor(false)]; tensor x_11_transpose_y_0 = const()[name = tensor("x_11_transpose_y_0"), val = tensor(false)]; - tensor const_6_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(264512))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(297472))), name = tensor("const_6_to_fp16_palettized"), shape = tensor([1, 128, 257])]; - tensor x_11_cast_fp16 = matmul(transpose_x = x_11_transpose_x_0, transpose_y = x_11_transpose_y_0, x = const_6_to_fp16_palettized, y = var_67_cast_fp16)[name = tensor("x_11_cast_fp16")]; + tensor const_6_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(1), name = tensor("const_6_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(265408))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(298560))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(298368)))]; + tensor x_11_cast_fp16 = matmul(transpose_x = x_11_transpose_x_0, transpose_y = x_11_transpose_y_0, x = const_6_to_fp16_quantized, y = var_67_cast_fp16)[name = tensor("x_11_cast_fp16")]; tensor var_74_to_fp16 = const()[name = tensor("op_74_to_fp16"), val = tensor(0x1p-24)]; tensor var_75_cast_fp16 = add(x = x_11_cast_fp16, y = var_74_to_fp16)[name = tensor("op_75_cast_fp16")]; tensor x_13_epsilon_0 = const()[name = tensor("x_13_epsilon_0"), val = tensor(0x1p-149)]; tensor x_13_cast_fp16 = log(epsilon = x_13_epsilon_0, x = var_75_cast_fp16)[name = tensor("x_13_cast_fp16")]; tensor var_80 = const()[name = tensor("op_80"), val = tensor([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1100, 1101, 1102, 1103, 1104, 1105, 1106, 1107, 1108, 1109, 1110, 1111, 1112, 1113, 1114, 1115, 1116, 1117, 1118, 1119, 1120, 1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128, 1129, 1130, 1131, 1132, 1133, 1134, 1135, 1136, 1137, 1138, 1139, 1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1150, 1151, 1152, 1153, 1154, 1155, 1156, 1157, 1158, 1159, 1160, 1161, 1162, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1176, 1177, 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1196, 1197, 1198, 1199, 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211, 1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230, 1231, 1232, 1233, 1234, 1235, 1236, 1237, 1238, 1239, 1240, 1241, 1242, 1243, 1244, 1245, 1246, 1247, 1248, 1249, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258, 1259, 1260, 1261, 1262, 1263, 1264, 1265, 1266, 1267, 1268, 1269, 1270, 1271, 1272, 1273, 1274, 1275, 1276, 1277, 1278, 1279, 1280, 1281, 1282, 1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, 1292, 1293, 1294, 1295, 1296, 1297, 1298, 1299, 1300, 1301, 1302, 1303, 1304, 1305, 1306, 1307, 1308, 1309, 1310, 1311, 1312, 1313, 1314, 1315, 1316, 1317, 1318, 1319, 1320, 1321, 1322, 1323, 1324, 1325, 1326, 1327, 1328, 1329, 1330, 1331, 1332, 1333, 1334, 1335, 1336, 1337, 1338, 1339, 1340, 1341, 1342, 1343, 1344, 1345, 1346, 1347, 1348, 1349, 1350, 1351, 1352, 1353, 1354, 1355, 1356, 1357, 1358, 1359, 1360, 1361, 1362, 1363, 1364, 1365, 1366, 1367, 1368, 1369, 1370, 1371, 1372, 1373, 1374, 1375, 1376, 1377, 1378, 1379, 1380, 1381, 1382, 1383, 1384, 1385, 1386, 1387, 1388, 1389, 1390, 1391, 1392, 1393, 1394, 1395, 1396, 1397, 1398, 1399, 1400, 1401, 1402, 1403, 1404, 1405, 1406, 1407, 1408, 1409, 1410, 1411, 1412, 1413, 1414, 1415, 1416, 1417, 1418, 1419, 1420, 1421, 1422, 1423, 1424, 1425, 1426, 1427, 1428, 1429, 1430, 1431, 1432, 1433, 1434, 1435, 1436, 1437, 1438, 1439, 1440, 1441, 1442, 1443, 1444, 1445, 1446, 1447, 1448, 1449, 1450, 1451, 1452, 1453, 1454, 1455, 1456, 1457, 1458, 1459, 1460, 1461, 1462, 1463, 1464, 1465, 1466, 1467, 1468, 1469, 1470, 1471, 1472, 1473, 1474, 1475, 1476, 1477, 1478, 1479, 1480, 1481, 1482, 1483, 1484, 1485, 1486, 1487, 1488, 1489, 1490, 1491, 1492, 1493, 1494, 1495, 1496, 1497, 1498, 1499, 1500]])]; tensor var_83_axes_0 = const()[name = tensor("op_83_axes_0"), val = tensor([1])]; - tensor seq_len_1_cast_fp16_to_int32 = cast(dtype = cast_2_dtype_0, x = seq_len_1_cast_fp16)[name = tensor("cast_4")]; + tensor seq_len_1_cast_fp16_to_int32 = cast(dtype = seq_len_dtype_0, x = seq_len_1_cast_fp16)[name = tensor("cast_4")]; tensor var_83 = expand_dims(axes = var_83_axes_0, x = seq_len_1_cast_fp16_to_int32)[name = tensor("op_83")]; tensor valid_mask = less(x = var_80, y = var_83)[name = tensor("valid_mask")]; tensor var_85_axes_0 = const()[name = tensor("op_85_axes_0"), val = tensor([1])]; tensor var_85 = expand_dims(axes = var_85_axes_0, x = valid_mask)[name = tensor("op_85")]; tensor var_85_after_broadcast_reps_0 = const()[name = tensor("op_85_after_broadcast_reps_0"), val = tensor([1, 128, 1])]; tensor var_85_after_broadcast = tile(reps = var_85_after_broadcast_reps_0, x = var_85)[name = tensor("op_85_after_broadcast")]; - tensor op_8_after_broadcast_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(298048))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(490240))), name = tensor("op_8_after_broadcast_to_fp16_palettized"), shape = tensor([1, 128, 1501])]; - tensor var_86_cast_fp16 = select(a = x_13_cast_fp16, b = op_8_after_broadcast_to_fp16_palettized, cond = var_85_after_broadcast)[name = tensor("op_86_cast_fp16")]; + tensor op_8_after_broadcast_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("op_8_after_broadcast_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(298880))), scale = tensor(0x0p+0), zero_point = tensor(0)]; + tensor var_86_cast_fp16 = select(a = x_13_cast_fp16, b = op_8_after_broadcast_to_fp16_quantized, cond = var_85_after_broadcast)[name = tensor("op_86_cast_fp16")]; tensor x_mean_numerator_axes_0 = const()[name = tensor("x_mean_numerator_axes_0"), val = tensor([2])]; tensor x_mean_numerator_keep_dims_0 = const()[name = tensor("x_mean_numerator_keep_dims_0"), val = tensor(false)]; tensor x_mean_numerator_cast_fp16 = reduce_sum(axes = x_mean_numerator_axes_0, keep_dims = x_mean_numerator_keep_dims_0, x = var_86_cast_fp16)[name = tensor("x_mean_numerator_cast_fp16")]; tensor x_mean_denominator_axes_0 = const()[name = tensor("x_mean_denominator_axes_0"), val = tensor([1])]; tensor x_mean_denominator_keep_dims_0 = const()[name = tensor("x_mean_denominator_keep_dims_0"), val = tensor(false)]; - tensor cast_5_to_fp16_dtype_0 = const()[name = tensor("cast_5_to_fp16_dtype_0"), val = tensor("fp16")]; - tensor valid_mask_to_fp16 = cast(dtype = cast_5_to_fp16_dtype_0, x = valid_mask)[name = tensor("cast_3")]; + tensor cast_2_to_fp16_dtype_0 = const()[name = tensor("cast_2_to_fp16_dtype_0"), val = tensor("fp16")]; + tensor valid_mask_to_fp16 = cast(dtype = cast_2_to_fp16_dtype_0, x = valid_mask)[name = tensor("cast_3")]; tensor x_mean_denominator_cast_fp16 = reduce_sum(axes = x_mean_denominator_axes_0, keep_dims = x_mean_denominator_keep_dims_0, x = valid_mask_to_fp16)[name = tensor("x_mean_denominator_cast_fp16")]; tensor var_91_axes_0 = const()[name = tensor("op_91_axes_0"), val = tensor([1])]; tensor var_91_cast_fp16 = expand_dims(axes = var_91_axes_0, x = x_mean_denominator_cast_fp16)[name = tensor("op_91_cast_fp16")]; @@ -99,7 +99,7 @@ program(1.0) tensor var_94_axes_0 = const()[name = tensor("op_94_axes_0"), val = tensor([2])]; tensor var_94_cast_fp16 = expand_dims(axes = var_94_axes_0, x = x_mean_cast_fp16)[name = tensor("op_94_cast_fp16")]; tensor var_95_cast_fp16 = sub(x = x_13_cast_fp16, y = var_94_cast_fp16)[name = tensor("op_95_cast_fp16")]; - tensor var_96_cast_fp16 = select(a = var_95_cast_fp16, b = op_8_after_broadcast_to_fp16_palettized, cond = var_85_after_broadcast)[name = tensor("op_96_cast_fp16")]; + tensor var_96_cast_fp16 = select(a = var_95_cast_fp16, b = op_8_after_broadcast_to_fp16_quantized, cond = var_85_after_broadcast)[name = tensor("op_96_cast_fp16")]; tensor var_15_promoted_1_to_fp16 = const()[name = tensor("op_15_promoted_1_to_fp16"), val = tensor(0x1p+1)]; tensor var_97_cast_fp16 = pow(x = var_96_cast_fp16, y = var_15_promoted_1_to_fp16)[name = tensor("op_97_cast_fp16")]; tensor var_99_axes_0 = const()[name = tensor("op_99_axes_0"), val = tensor([2])]; @@ -121,9 +121,9 @@ program(1.0) tensor processed_signal_cast_fp16 = select(a = var_8_to_fp16, b = x_15_cast_fp16, cond = var_117)[name = tensor("processed_signal_cast_fp16")]; tensor var_138 = const()[name = tensor("op_138"), val = tensor(-1)]; tensor x_17_perm_0 = const()[name = tensor("x_17_perm_0"), val = tensor([0, 2, 1])]; - tensor cast_17_to_fp16_dtype_0 = const()[name = tensor("cast_17_to_fp16_dtype_0"), val = tensor("fp16")]; + tensor var_215_to_fp16_dtype_0 = const()[name = tensor("op_215_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_216_promoted_to_fp16 = const()[name = tensor("op_216_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor seq_len_1_cast_fp16_to_int32_to_fp16 = cast(dtype = cast_17_to_fp16_dtype_0, x = seq_len_1_cast_fp16_to_int32)[name = tensor("cast_2")]; + tensor seq_len_1_cast_fp16_to_int32_to_fp16 = cast(dtype = var_215_to_fp16_dtype_0, x = seq_len_1_cast_fp16_to_int32)[name = tensor("cast_2")]; tensor var_217_cast_fp16 = add(x = seq_len_1_cast_fp16_to_int32_to_fp16, y = var_216_promoted_to_fp16)[name = tensor("op_217_cast_fp16")]; tensor _inversed_219_y_0_to_fp16 = const()[name = tensor("_inversed_219_y_0_to_fp16"), val = tensor(0x1p-1)]; tensor _inversed_219_cast_fp16 = mul(x = var_217_cast_fp16, y = _inversed_219_y_0_to_fp16)[name = tensor("_inversed_219_cast_fp16")]; @@ -152,55 +152,55 @@ program(1.0) tensor input_11_strides_0 = const()[name = tensor("input_11_strides_0"), val = tensor([2, 2])]; tensor input_11_dilations_0 = const()[name = tensor("input_11_dilations_0"), val = tensor([1, 1])]; tensor input_11_groups_0 = const()[name = tensor("input_11_groups_0"), val = tensor(1)]; - tensor encoder_module_pre_encode_conv_0_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(490816))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(493184))), name = tensor("encoder_module_pre_encode_conv_0_weight_to_fp16_palettized"), shape = tensor([256, 1, 3, 3])]; - tensor encoder_module_pre_encode_conv_0_bias_to_fp16 = const()[name = tensor("encoder_module_pre_encode_conv_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(493760)))]; - tensor input_11_cast_fp16 = conv(bias = encoder_module_pre_encode_conv_0_bias_to_fp16, dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = encoder_module_pre_encode_conv_0_weight_to_fp16_palettized, x = input_9_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor encoder_module_pre_encode_conv_0_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_pre_encode_conv_0_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(491072))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(493760))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(493440)))]; + tensor encoder_module_pre_encode_conv_0_bias_to_fp16 = const()[name = tensor("encoder_module_pre_encode_conv_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(494336)))]; + tensor input_11_cast_fp16 = conv(bias = encoder_module_pre_encode_conv_0_bias_to_fp16, dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = encoder_module_pre_encode_conv_0_weight_to_fp16_quantized, x = input_9_cast_fp16)[name = tensor("input_11_cast_fp16")]; tensor input_13_cast_fp16 = relu(x = input_11_cast_fp16)[name = tensor("input_13_cast_fp16")]; tensor input_15_pad_type_0 = const()[name = tensor("input_15_pad_type_0"), val = tensor("custom")]; tensor input_15_pad_0 = const()[name = tensor("input_15_pad_0"), val = tensor([1, 1, 1, 1])]; tensor input_15_strides_0 = const()[name = tensor("input_15_strides_0"), val = tensor([2, 2])]; tensor input_15_groups_0 = const()[name = tensor("input_15_groups_0"), val = tensor(256)]; tensor input_15_dilations_0 = const()[name = tensor("input_15_dilations_0"), val = tensor([1, 1])]; - tensor encoder_module_pre_encode_conv_2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(494336))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(496704))), name = tensor("encoder_module_pre_encode_conv_2_weight_to_fp16_palettized"), shape = tensor([256, 1, 3, 3])]; - tensor encoder_module_pre_encode_conv_2_bias_to_fp16 = const()[name = tensor("encoder_module_pre_encode_conv_2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(497280)))]; - tensor input_15_cast_fp16 = conv(bias = encoder_module_pre_encode_conv_2_bias_to_fp16, dilations = input_15_dilations_0, groups = input_15_groups_0, pad = input_15_pad_0, pad_type = input_15_pad_type_0, strides = input_15_strides_0, weight = encoder_module_pre_encode_conv_2_weight_to_fp16_palettized, x = input_13_cast_fp16)[name = tensor("input_15_cast_fp16")]; + tensor encoder_module_pre_encode_conv_2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_pre_encode_conv_2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(494912))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(497600))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(497280)))]; + tensor encoder_module_pre_encode_conv_2_bias_to_fp16 = const()[name = tensor("encoder_module_pre_encode_conv_2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(498176)))]; + tensor input_15_cast_fp16 = conv(bias = encoder_module_pre_encode_conv_2_bias_to_fp16, dilations = input_15_dilations_0, groups = input_15_groups_0, pad = input_15_pad_0, pad_type = input_15_pad_type_0, strides = input_15_strides_0, weight = encoder_module_pre_encode_conv_2_weight_to_fp16_quantized, x = input_13_cast_fp16)[name = tensor("input_15_cast_fp16")]; tensor input_17_pad_type_0 = const()[name = tensor("input_17_pad_type_0"), val = tensor("valid")]; tensor input_17_strides_0 = const()[name = tensor("input_17_strides_0"), val = tensor([1, 1])]; tensor input_17_pad_0 = const()[name = tensor("input_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_17_dilations_0 = const()[name = tensor("input_17_dilations_0"), val = tensor([1, 1])]; tensor input_17_groups_0 = const()[name = tensor("input_17_groups_0"), val = tensor(1)]; - tensor encoder_module_pre_encode_conv_3_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(497856))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(563456))), name = tensor("encoder_module_pre_encode_conv_3_weight_to_fp16_palettized"), shape = tensor([256, 256, 1, 1])]; - tensor encoder_module_pre_encode_conv_3_bias_to_fp16 = const()[name = tensor("encoder_module_pre_encode_conv_3_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(564032)))]; - tensor input_17_cast_fp16 = conv(bias = encoder_module_pre_encode_conv_3_bias_to_fp16, dilations = input_17_dilations_0, groups = input_17_groups_0, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = input_17_strides_0, weight = encoder_module_pre_encode_conv_3_weight_to_fp16_palettized, x = input_15_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor encoder_module_pre_encode_conv_3_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_pre_encode_conv_3_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(498752))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(564672))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(564352)))]; + tensor encoder_module_pre_encode_conv_3_bias_to_fp16 = const()[name = tensor("encoder_module_pre_encode_conv_3_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(565248)))]; + tensor input_17_cast_fp16 = conv(bias = encoder_module_pre_encode_conv_3_bias_to_fp16, dilations = input_17_dilations_0, groups = input_17_groups_0, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = input_17_strides_0, weight = encoder_module_pre_encode_conv_3_weight_to_fp16_quantized, x = input_15_cast_fp16)[name = tensor("input_17_cast_fp16")]; tensor input_19_cast_fp16 = relu(x = input_17_cast_fp16)[name = tensor("input_19_cast_fp16")]; tensor input_21_pad_type_0 = const()[name = tensor("input_21_pad_type_0"), val = tensor("custom")]; tensor input_21_pad_0 = const()[name = tensor("input_21_pad_0"), val = tensor([1, 1, 1, 1])]; tensor input_21_strides_0 = const()[name = tensor("input_21_strides_0"), val = tensor([2, 2])]; tensor input_21_groups_0 = const()[name = tensor("input_21_groups_0"), val = tensor(256)]; tensor input_21_dilations_0 = const()[name = tensor("input_21_dilations_0"), val = tensor([1, 1])]; - tensor encoder_module_pre_encode_conv_5_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(564608))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(566976))), name = tensor("encoder_module_pre_encode_conv_5_weight_to_fp16_palettized"), shape = tensor([256, 1, 3, 3])]; - tensor encoder_module_pre_encode_conv_5_bias_to_fp16 = const()[name = tensor("encoder_module_pre_encode_conv_5_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(567552)))]; - tensor input_21_cast_fp16 = conv(bias = encoder_module_pre_encode_conv_5_bias_to_fp16, dilations = input_21_dilations_0, groups = input_21_groups_0, pad = input_21_pad_0, pad_type = input_21_pad_type_0, strides = input_21_strides_0, weight = encoder_module_pre_encode_conv_5_weight_to_fp16_palettized, x = input_19_cast_fp16)[name = tensor("input_21_cast_fp16")]; + tensor encoder_module_pre_encode_conv_5_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_pre_encode_conv_5_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(565824))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(568512))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(568192)))]; + tensor encoder_module_pre_encode_conv_5_bias_to_fp16 = const()[name = tensor("encoder_module_pre_encode_conv_5_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(569088)))]; + tensor input_21_cast_fp16 = conv(bias = encoder_module_pre_encode_conv_5_bias_to_fp16, dilations = input_21_dilations_0, groups = input_21_groups_0, pad = input_21_pad_0, pad_type = input_21_pad_type_0, strides = input_21_strides_0, weight = encoder_module_pre_encode_conv_5_weight_to_fp16_quantized, x = input_19_cast_fp16)[name = tensor("input_21_cast_fp16")]; tensor input_23_pad_type_0 = const()[name = tensor("input_23_pad_type_0"), val = tensor("valid")]; tensor input_23_strides_0 = const()[name = tensor("input_23_strides_0"), val = tensor([1, 1])]; tensor input_23_pad_0 = const()[name = tensor("input_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor input_23_dilations_0 = const()[name = tensor("input_23_dilations_0"), val = tensor([1, 1])]; tensor input_23_groups_0 = const()[name = tensor("input_23_groups_0"), val = tensor(1)]; - tensor encoder_module_pre_encode_conv_6_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(568128))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(633728))), name = tensor("encoder_module_pre_encode_conv_6_weight_to_fp16_palettized"), shape = tensor([256, 256, 1, 1])]; - tensor encoder_module_pre_encode_conv_6_bias_to_fp16 = const()[name = tensor("encoder_module_pre_encode_conv_6_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(634304)))]; - tensor input_23_cast_fp16 = conv(bias = encoder_module_pre_encode_conv_6_bias_to_fp16, dilations = input_23_dilations_0, groups = input_23_groups_0, pad = input_23_pad_0, pad_type = input_23_pad_type_0, strides = input_23_strides_0, weight = encoder_module_pre_encode_conv_6_weight_to_fp16_palettized, x = input_21_cast_fp16)[name = tensor("input_23_cast_fp16")]; + tensor encoder_module_pre_encode_conv_6_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_pre_encode_conv_6_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(569664))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(635584))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(635264)))]; + tensor encoder_module_pre_encode_conv_6_bias_to_fp16 = const()[name = tensor("encoder_module_pre_encode_conv_6_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(636160)))]; + tensor input_23_cast_fp16 = conv(bias = encoder_module_pre_encode_conv_6_bias_to_fp16, dilations = input_23_dilations_0, groups = input_23_groups_0, pad = input_23_pad_0, pad_type = input_23_pad_type_0, strides = input_23_strides_0, weight = encoder_module_pre_encode_conv_6_weight_to_fp16_quantized, x = input_21_cast_fp16)[name = tensor("input_23_cast_fp16")]; tensor x_19_cast_fp16 = relu(x = input_23_cast_fp16)[name = tensor("x_19_cast_fp16")]; tensor var_286_perm_0 = const()[name = tensor("op_286_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_287 = const()[name = tensor("op_287"), val = tensor([1, 188, -1])]; tensor var_286_cast_fp16 = transpose(perm = var_286_perm_0, x = x_19_cast_fp16)[name = tensor("transpose_314")]; tensor input_25_cast_fp16 = reshape(shape = var_287, x = var_286_cast_fp16)[name = tensor("input_25_cast_fp16")]; - tensor encoder_module_pre_encode_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(634880))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4829248))), name = tensor("encoder_module_pre_encode_out_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor encoder_module_pre_encode_out_bias_to_fp16 = const()[name = tensor("encoder_module_pre_encode_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4829824)))]; - tensor linear_0_cast_fp16 = linear(bias = encoder_module_pre_encode_out_bias_to_fp16, weight = encoder_module_pre_encode_out_weight_to_fp16_palettized, x = input_25_cast_fp16)[name = tensor("linear_0_cast_fp16")]; - tensor cast_28_dtype_0 = const()[name = tensor("cast_28_dtype_0"), val = tensor("int32")]; + tensor encoder_module_pre_encode_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_pre_encode_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(636736))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4832192))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4831104)))]; + tensor encoder_module_pre_encode_out_bias_to_fp16 = const()[name = tensor("encoder_module_pre_encode_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4834304)))]; + tensor linear_0_cast_fp16 = linear(bias = encoder_module_pre_encode_out_bias_to_fp16, weight = encoder_module_pre_encode_out_weight_to_fp16_quantized, x = input_25_cast_fp16)[name = tensor("linear_0_cast_fp16")]; + tensor padding_length_dtype_0 = const()[name = tensor("padding_length_dtype_0"), val = tensor("int32")]; tensor expand_dims_3 = const()[name = tensor("expand_dims_3"), val = tensor([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187]])]; tensor var_325_axes_0 = const()[name = tensor("op_325_axes_0"), val = tensor([-1])]; - tensor encoder_length = cast(dtype = cast_28_dtype_0, x = lengths_cast_fp16)[name = tensor("cast_1")]; + tensor encoder_length = cast(dtype = padding_length_dtype_0, x = lengths_cast_fp16)[name = tensor("cast_1")]; tensor var_325 = expand_dims(axes = var_325_axes_0, x = encoder_length)[name = tensor("op_325")]; tensor pad_mask_1 = less(x = expand_dims_3, y = var_325)[name = tensor("pad_mask_1")]; tensor var_327_axes_0 = const()[name = tensor("op_327_axes_0"), val = tensor([1])]; @@ -215,47 +215,47 @@ program(1.0) tensor mask_5 = logical_not(x = att_mask)[name = tensor("mask_5")]; tensor pad_mask = logical_not(x = pad_mask_1)[name = tensor("pad_mask")]; tensor input_29_axes_0 = const()[name = tensor("input_29_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_0_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_0_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4831936)))]; - tensor encoder_module_layers_0_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_0_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4834048)))]; + tensor encoder_module_layers_0_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_0_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4836416)))]; + tensor encoder_module_layers_0_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_0_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4838528)))]; tensor var_156_to_fp16 = const()[name = tensor("op_156_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_29_cast_fp16 = layer_norm(axes = input_29_axes_0, beta = encoder_module_layers_0_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_0_norm_feed_forward1_weight_to_fp16, x = linear_0_cast_fp16)[name = tensor("input_29_cast_fp16")]; - tensor encoder_module_layers_0_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4836160))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9030528))), name = tensor("encoder_module_layers_0_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_1_bias_0_to_fp16 = const()[name = tensor("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9031104)))]; - tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_0_feed_forward1_linear1_weight_to_fp16_palettized, x = input_29_cast_fp16)[name = tensor("linear_1_cast_fp16")]; + tensor encoder_module_layers_0_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_0_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4840640))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9039168))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9035008)))]; + tensor linear_1_bias_0_to_fp16 = const()[name = tensor("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9047424)))]; + tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_0_feed_forward1_linear1_weight_to_fp16_quantized, x = input_29_cast_fp16)[name = tensor("linear_1_cast_fp16")]; tensor input_33_cast_fp16 = silu(x = linear_1_cast_fp16)[name = tensor("input_33_cast_fp16")]; - tensor encoder_module_layers_0_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9039360))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13233728))), name = tensor("encoder_module_layers_0_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_2_bias_0_to_fp16 = const()[name = tensor("linear_2_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13234304)))]; - tensor linear_2_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_0_feed_forward1_linear2_weight_to_fp16_palettized, x = input_33_cast_fp16)[name = tensor("linear_2_cast_fp16")]; + tensor encoder_module_layers_0_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_0_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9055680))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13251136))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13250048)))]; + tensor linear_2_bias_0_to_fp16 = const()[name = tensor("linear_2_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13253248)))]; + tensor linear_2_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_0_feed_forward1_linear2_weight_to_fp16_quantized, x = input_33_cast_fp16)[name = tensor("linear_2_cast_fp16")]; tensor var_361_to_fp16 = const()[name = tensor("op_361_to_fp16"), val = tensor(0x1p-1)]; tensor var_362_cast_fp16 = mul(x = linear_2_cast_fp16, y = var_361_to_fp16)[name = tensor("op_362_cast_fp16")]; tensor input_39_cast_fp16 = add(x = linear_0_cast_fp16, y = var_362_cast_fp16)[name = tensor("input_39_cast_fp16")]; tensor query_1_axes_0 = const()[name = tensor("query_1_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_0_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_0_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13236416)))]; - tensor encoder_module_layers_0_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_0_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13238528)))]; + tensor encoder_module_layers_0_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_0_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13255360)))]; + tensor encoder_module_layers_0_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_0_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13257472)))]; tensor query_1_cast_fp16 = layer_norm(axes = query_1_axes_0, beta = encoder_module_layers_0_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_0_norm_self_att_weight_to_fp16, x = input_39_cast_fp16)[name = tensor("query_1_cast_fp16")]; - tensor encoder_module_layers_0_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13240640))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(14289280))), name = tensor("encoder_module_layers_0_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_3_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_0_self_attn_linear_q_weight_to_fp16_palettized, x = query_1_cast_fp16)[name = tensor("linear_3_cast_fp16")]; + tensor encoder_module_layers_0_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_0_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13259584))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(14309312))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(14308224)))]; + tensor linear_3_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_0_self_attn_linear_q_weight_to_fp16_quantized, x = query_1_cast_fp16)[name = tensor("linear_3_cast_fp16")]; tensor var_378 = const()[name = tensor("op_378"), val = tensor([1, -1, 8, 128])]; tensor q_1_cast_fp16 = reshape(shape = var_378, x = linear_3_cast_fp16)[name = tensor("q_1_cast_fp16")]; - tensor encoder_module_layers_0_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(14289856))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15338496))), name = tensor("encoder_module_layers_0_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_4_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_0_self_attn_linear_k_weight_to_fp16_palettized, x = query_1_cast_fp16)[name = tensor("linear_4_cast_fp16")]; + tensor encoder_module_layers_0_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_0_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(14311424))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15361152))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15360064)))]; + tensor linear_4_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_0_self_attn_linear_k_weight_to_fp16_quantized, x = query_1_cast_fp16)[name = tensor("linear_4_cast_fp16")]; tensor var_382 = const()[name = tensor("op_382"), val = tensor([1, -1, 8, 128])]; tensor k_1_cast_fp16 = reshape(shape = var_382, x = linear_4_cast_fp16)[name = tensor("k_1_cast_fp16")]; - tensor encoder_module_layers_0_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15339072))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16387712))), name = tensor("encoder_module_layers_0_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_5_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_0_self_attn_linear_v_weight_to_fp16_palettized, x = query_1_cast_fp16)[name = tensor("linear_5_cast_fp16")]; + tensor encoder_module_layers_0_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_0_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15363264))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16412992))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16411904)))]; + tensor linear_5_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_0_self_attn_linear_v_weight_to_fp16_quantized, x = query_1_cast_fp16)[name = tensor("linear_5_cast_fp16")]; tensor var_386 = const()[name = tensor("op_386"), val = tensor([1, -1, 8, 128])]; tensor v_1_cast_fp16 = reshape(shape = var_386, x = linear_5_cast_fp16)[name = tensor("v_1_cast_fp16")]; tensor value_5_perm_0 = const()[name = tensor("value_5_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor encoder_module_layers_0_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_0_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16388288)))]; + tensor encoder_module_layers_0_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_0_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16415104)))]; tensor var_398_cast_fp16 = add(x = q_1_cast_fp16, y = encoder_module_layers_0_self_attn_pos_bias_u_to_fp16)[name = tensor("op_398_cast_fp16")]; - tensor encoder_module_layers_0_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_0_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16390400)))]; + tensor encoder_module_layers_0_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_0_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16417216)))]; tensor var_400_cast_fp16 = add(x = q_1_cast_fp16, y = encoder_module_layers_0_self_attn_pos_bias_v_to_fp16)[name = tensor("op_400_cast_fp16")]; tensor q_with_bias_v_1_perm_0 = const()[name = tensor("q_with_bias_v_1_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_23_transpose_x_0 = const()[name = tensor("x_23_transpose_x_0"), val = tensor(false)]; tensor x_23_transpose_y_0 = const()[name = tensor("x_23_transpose_y_0"), val = tensor(false)]; - tensor op_402_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16392512))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16776576))), name = tensor("op_402_to_fp16_palettized"), shape = tensor([1, 8, 128, 375])]; + tensor op_402_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_402_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16419328))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16803840))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16803392)))]; tensor q_with_bias_v_1_cast_fp16 = transpose(perm = q_with_bias_v_1_perm_0, x = var_400_cast_fp16)[name = tensor("transpose_312")]; - tensor x_23_cast_fp16 = matmul(transpose_x = x_23_transpose_x_0, transpose_y = x_23_transpose_y_0, x = q_with_bias_v_1_cast_fp16, y = op_402_to_fp16_palettized)[name = tensor("x_23_cast_fp16")]; + tensor x_23_cast_fp16 = matmul(transpose_x = x_23_transpose_x_0, transpose_y = x_23_transpose_y_0, x = q_with_bias_v_1_cast_fp16, y = op_402_to_fp16_quantized)[name = tensor("x_23_cast_fp16")]; tensor x_25_pad_0 = const()[name = tensor("x_25_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; tensor x_25_mode_0 = const()[name = tensor("x_25_mode_0"), val = tensor("constant")]; tensor const_29_to_fp16 = const()[name = tensor("const_29_to_fp16"), val = tensor(0x0p+0)]; @@ -297,12 +297,12 @@ program(1.0) tensor var_435 = const()[name = tensor("op_435"), val = tensor([1, -1, 1024])]; tensor var_434_cast_fp16 = transpose(perm = var_434_perm_0, x = x_29_cast_fp16)[name = tensor("transpose_308")]; tensor input_43_cast_fp16 = reshape(shape = var_435, x = var_434_cast_fp16)[name = tensor("input_43_cast_fp16")]; - tensor encoder_module_layers_0_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16777152))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17825792))), name = tensor("encoder_module_layers_0_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_7_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_0_self_attn_linear_out_weight_to_fp16_palettized, x = input_43_cast_fp16)[name = tensor("linear_7_cast_fp16")]; + tensor encoder_module_layers_0_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_0_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16804672))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17854400))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17853312)))]; + tensor linear_7_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_0_self_attn_linear_out_weight_to_fp16_quantized, x = input_43_cast_fp16)[name = tensor("linear_7_cast_fp16")]; tensor input_47_cast_fp16 = add(x = input_39_cast_fp16, y = linear_7_cast_fp16)[name = tensor("input_47_cast_fp16")]; tensor x_33_axes_0 = const()[name = tensor("x_33_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_0_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_0_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17826368)))]; - tensor encoder_module_layers_0_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_0_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17828480)))]; + tensor encoder_module_layers_0_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_0_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17856512)))]; + tensor encoder_module_layers_0_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_0_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17858624)))]; tensor x_33_cast_fp16 = layer_norm(axes = x_33_axes_0, beta = encoder_module_layers_0_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_0_norm_conv_weight_to_fp16, x = input_47_cast_fp16)[name = tensor("x_33_cast_fp16")]; tensor input_49_perm_0 = const()[name = tensor("input_49_perm_0"), val = tensor([0, 2, 1])]; tensor input_51_pad_type_0 = const()[name = tensor("input_51_pad_type_0"), val = tensor("valid")]; @@ -310,9 +310,9 @@ program(1.0) tensor input_51_pad_0 = const()[name = tensor("input_51_pad_0"), val = tensor([0, 0])]; tensor input_51_dilations_0 = const()[name = tensor("input_51_dilations_0"), val = tensor([1])]; tensor input_51_groups_0 = const()[name = tensor("input_51_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_0_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17830592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19927808))), name = tensor("encoder_module_layers_0_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor([2048, 1024, 1])]; + tensor encoder_module_layers_0_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_0_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17860736))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19960064))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19957952)))]; tensor input_49_cast_fp16 = transpose(perm = input_49_perm_0, x = x_33_cast_fp16)[name = tensor("transpose_307")]; - tensor input_51_cast_fp16 = conv(dilations = input_51_dilations_0, groups = input_51_groups_0, pad = input_51_pad_0, pad_type = input_51_pad_type_0, strides = input_51_strides_0, weight = encoder_module_layers_0_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_49_cast_fp16)[name = tensor("input_51_cast_fp16")]; + tensor input_51_cast_fp16 = conv(dilations = input_51_dilations_0, groups = input_51_groups_0, pad = input_51_pad_0, pad_type = input_51_pad_type_0, strides = input_51_strides_0, weight = encoder_module_layers_0_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_49_cast_fp16)[name = tensor("input_51_cast_fp16")]; tensor x_35_split_num_splits_0 = const()[name = tensor("x_35_split_num_splits_0"), val = tensor(2)]; tensor x_35_split_axis_0 = const()[name = tensor("x_35_split_axis_0"), val = tensor(1)]; tensor x_35_split_cast_fp16_0, tensor x_35_split_cast_fp16_1 = split(axis = x_35_split_axis_0, num_splits = x_35_split_num_splits_0, x = input_51_cast_fp16)[name = tensor("x_35_split_cast_fp16")]; @@ -330,75 +330,75 @@ program(1.0) tensor input_57_strides_0 = const()[name = tensor("input_57_strides_0"), val = tensor([1])]; tensor input_57_pad_0 = const()[name = tensor("input_57_pad_0"), val = tensor([0, 0])]; tensor input_57_dilations_0 = const()[name = tensor("input_57_dilations_0"), val = tensor([1])]; - tensor const_263_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19928384))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19937664))), name = tensor("const_263_to_fp16_palettized"), shape = tensor([1024, 1, 9])]; - tensor const_264_to_fp16 = const()[name = tensor("const_264_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19938240)))]; - tensor input_59_cast_fp16 = conv(bias = const_264_to_fp16, dilations = input_57_dilations_0, groups = input_57_groups_0, pad = input_57_pad_0, pad_type = input_57_pad_type_0, strides = input_57_strides_0, weight = const_263_to_fp16_palettized, x = input_55_cast_fp16)[name = tensor("input_59_cast_fp16")]; + tensor const_263_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_263_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19964224))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19974592))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19973504)))]; + tensor const_264_to_fp16 = const()[name = tensor("const_264_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19976704)))]; + tensor input_59_cast_fp16 = conv(bias = const_264_to_fp16, dilations = input_57_dilations_0, groups = input_57_groups_0, pad = input_57_pad_0, pad_type = input_57_pad_type_0, strides = input_57_strides_0, weight = const_263_to_fp16_quantized, x = input_55_cast_fp16)[name = tensor("input_59_cast_fp16")]; tensor input_61_cast_fp16 = silu(x = input_59_cast_fp16)[name = tensor("input_61_cast_fp16")]; tensor x_37_pad_type_0 = const()[name = tensor("x_37_pad_type_0"), val = tensor("valid")]; tensor x_37_strides_0 = const()[name = tensor("x_37_strides_0"), val = tensor([1])]; tensor x_37_pad_0 = const()[name = tensor("x_37_pad_0"), val = tensor([0, 0])]; tensor x_37_dilations_0 = const()[name = tensor("x_37_dilations_0"), val = tensor([1])]; tensor x_37_groups_0 = const()[name = tensor("x_37_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_0_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19940352))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20988992))), name = tensor("encoder_module_layers_0_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor([1024, 1024, 1])]; - tensor x_37_cast_fp16 = conv(dilations = x_37_dilations_0, groups = x_37_groups_0, pad = x_37_pad_0, pad_type = x_37_pad_type_0, strides = x_37_strides_0, weight = encoder_module_layers_0_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_61_cast_fp16)[name = tensor("x_37_cast_fp16")]; + tensor encoder_module_layers_0_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_0_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19978816))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21028544))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21027456)))]; + tensor x_37_cast_fp16 = conv(dilations = x_37_dilations_0, groups = x_37_groups_0, pad = x_37_pad_0, pad_type = x_37_pad_type_0, strides = x_37_strides_0, weight = encoder_module_layers_0_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_61_cast_fp16)[name = tensor("x_37_cast_fp16")]; tensor input_63_perm_0 = const()[name = tensor("input_63_perm_0"), val = tensor([0, 2, 1])]; tensor input_63_cast_fp16 = transpose(perm = input_63_perm_0, x = x_37_cast_fp16)[name = tensor("transpose_306")]; tensor input_65_cast_fp16 = add(x = input_47_cast_fp16, y = input_63_cast_fp16)[name = tensor("input_65_cast_fp16")]; tensor input_67_axes_0 = const()[name = tensor("input_67_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_0_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_0_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20989568)))]; - tensor encoder_module_layers_0_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_0_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20991680)))]; + tensor encoder_module_layers_0_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_0_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21030656)))]; + tensor encoder_module_layers_0_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_0_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21032768)))]; tensor input_67_cast_fp16 = layer_norm(axes = input_67_axes_0, beta = encoder_module_layers_0_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_0_norm_feed_forward2_weight_to_fp16, x = input_65_cast_fp16)[name = tensor("input_67_cast_fp16")]; - tensor encoder_module_layers_0_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20993792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25188160))), name = tensor("encoder_module_layers_0_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_8_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_0_feed_forward2_linear1_weight_to_fp16_palettized, x = input_67_cast_fp16)[name = tensor("linear_8_cast_fp16")]; + tensor encoder_module_layers_0_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_0_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21034880))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25233408))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25229248)))]; + tensor linear_8_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_0_feed_forward2_linear1_weight_to_fp16_quantized, x = input_67_cast_fp16)[name = tensor("linear_8_cast_fp16")]; tensor input_71_cast_fp16 = silu(x = linear_8_cast_fp16)[name = tensor("input_71_cast_fp16")]; - tensor encoder_module_layers_0_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25188736))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29383104))), name = tensor("encoder_module_layers_0_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_9_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_0_feed_forward2_linear2_weight_to_fp16_palettized, x = input_71_cast_fp16)[name = tensor("linear_9_cast_fp16")]; + tensor encoder_module_layers_0_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_0_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25241664))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29437120))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29436032)))]; + tensor linear_9_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_0_feed_forward2_linear2_weight_to_fp16_quantized, x = input_71_cast_fp16)[name = tensor("linear_9_cast_fp16")]; tensor var_495_to_fp16 = const()[name = tensor("op_495_to_fp16"), val = tensor(0x1p-1)]; tensor var_496_cast_fp16 = mul(x = linear_9_cast_fp16, y = var_495_to_fp16)[name = tensor("op_496_cast_fp16")]; tensor input_77_cast_fp16 = add(x = input_65_cast_fp16, y = var_496_cast_fp16)[name = tensor("input_77_cast_fp16")]; tensor input_79_axes_0 = const()[name = tensor("input_79_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_0_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_0_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29383680)))]; - tensor encoder_module_layers_0_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_0_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29385792)))]; + tensor encoder_module_layers_0_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_0_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29439232)))]; + tensor encoder_module_layers_0_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_0_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29441344)))]; tensor input_79_cast_fp16 = layer_norm(axes = input_79_axes_0, beta = encoder_module_layers_0_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_0_norm_out_weight_to_fp16, x = input_77_cast_fp16)[name = tensor("input_79_cast_fp16")]; tensor input_81_axes_0 = const()[name = tensor("input_81_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_1_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_1_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29387904)))]; - tensor encoder_module_layers_1_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_1_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29390016)))]; + tensor encoder_module_layers_1_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_1_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29443456)))]; + tensor encoder_module_layers_1_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_1_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29445568)))]; tensor input_81_cast_fp16 = layer_norm(axes = input_81_axes_0, beta = encoder_module_layers_1_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_1_norm_feed_forward1_weight_to_fp16, x = input_79_cast_fp16)[name = tensor("input_81_cast_fp16")]; - tensor encoder_module_layers_1_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29392128))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33586496))), name = tensor("encoder_module_layers_1_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_10_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_1_feed_forward1_linear1_weight_to_fp16_palettized, x = input_81_cast_fp16)[name = tensor("linear_10_cast_fp16")]; + tensor encoder_module_layers_1_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_1_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29447680))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33646208))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33642048)))]; + tensor linear_10_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_1_feed_forward1_linear1_weight_to_fp16_quantized, x = input_81_cast_fp16)[name = tensor("linear_10_cast_fp16")]; tensor input_85_cast_fp16 = silu(x = linear_10_cast_fp16)[name = tensor("input_85_cast_fp16")]; - tensor encoder_module_layers_1_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33587072))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37781440))), name = tensor("encoder_module_layers_1_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_11_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_1_feed_forward1_linear2_weight_to_fp16_palettized, x = input_85_cast_fp16)[name = tensor("linear_11_cast_fp16")]; + tensor encoder_module_layers_1_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_1_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33654464))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37849920))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37848832)))]; + tensor linear_11_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_1_feed_forward1_linear2_weight_to_fp16_quantized, x = input_85_cast_fp16)[name = tensor("linear_11_cast_fp16")]; tensor var_524_to_fp16 = const()[name = tensor("op_524_to_fp16"), val = tensor(0x1p-1)]; tensor var_525_cast_fp16 = mul(x = linear_11_cast_fp16, y = var_524_to_fp16)[name = tensor("op_525_cast_fp16")]; tensor input_91_cast_fp16 = add(x = input_79_cast_fp16, y = var_525_cast_fp16)[name = tensor("input_91_cast_fp16")]; tensor query_3_axes_0 = const()[name = tensor("query_3_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_1_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_1_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37782016)))]; - tensor encoder_module_layers_1_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_1_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37784128)))]; + tensor encoder_module_layers_1_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_1_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37852032)))]; + tensor encoder_module_layers_1_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_1_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37854144)))]; tensor query_3_cast_fp16 = layer_norm(axes = query_3_axes_0, beta = encoder_module_layers_1_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_1_norm_self_att_weight_to_fp16, x = input_91_cast_fp16)[name = tensor("query_3_cast_fp16")]; - tensor encoder_module_layers_1_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37786240))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38834880))), name = tensor("encoder_module_layers_1_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_12_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_1_self_attn_linear_q_weight_to_fp16_palettized, x = query_3_cast_fp16)[name = tensor("linear_12_cast_fp16")]; + tensor encoder_module_layers_1_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_1_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37856256))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38905984))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38904896)))]; + tensor linear_12_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_1_self_attn_linear_q_weight_to_fp16_quantized, x = query_3_cast_fp16)[name = tensor("linear_12_cast_fp16")]; tensor var_541 = const()[name = tensor("op_541"), val = tensor([1, -1, 8, 128])]; tensor q_7_cast_fp16 = reshape(shape = var_541, x = linear_12_cast_fp16)[name = tensor("q_7_cast_fp16")]; - tensor encoder_module_layers_1_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38835456))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39884096))), name = tensor("encoder_module_layers_1_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_13_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_1_self_attn_linear_k_weight_to_fp16_palettized, x = query_3_cast_fp16)[name = tensor("linear_13_cast_fp16")]; + tensor encoder_module_layers_1_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_1_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38908096))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39957824))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39956736)))]; + tensor linear_13_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_1_self_attn_linear_k_weight_to_fp16_quantized, x = query_3_cast_fp16)[name = tensor("linear_13_cast_fp16")]; tensor var_545 = const()[name = tensor("op_545"), val = tensor([1, -1, 8, 128])]; tensor k_5_cast_fp16 = reshape(shape = var_545, x = linear_13_cast_fp16)[name = tensor("k_5_cast_fp16")]; - tensor encoder_module_layers_1_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39884672))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40933312))), name = tensor("encoder_module_layers_1_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_14_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_1_self_attn_linear_v_weight_to_fp16_palettized, x = query_3_cast_fp16)[name = tensor("linear_14_cast_fp16")]; + tensor encoder_module_layers_1_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_1_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39959936))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(41009664))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(41008576)))]; + tensor linear_14_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_1_self_attn_linear_v_weight_to_fp16_quantized, x = query_3_cast_fp16)[name = tensor("linear_14_cast_fp16")]; tensor var_549 = const()[name = tensor("op_549"), val = tensor([1, -1, 8, 128])]; tensor v_3_cast_fp16 = reshape(shape = var_549, x = linear_14_cast_fp16)[name = tensor("v_3_cast_fp16")]; tensor value_7_perm_0 = const()[name = tensor("value_7_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor encoder_module_layers_1_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_1_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40933888)))]; + tensor encoder_module_layers_1_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_1_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(41011776)))]; tensor var_561_cast_fp16 = add(x = q_7_cast_fp16, y = encoder_module_layers_1_self_attn_pos_bias_u_to_fp16)[name = tensor("op_561_cast_fp16")]; - tensor encoder_module_layers_1_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_1_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40936000)))]; + tensor encoder_module_layers_1_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_1_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(41013888)))]; tensor var_563_cast_fp16 = add(x = q_7_cast_fp16, y = encoder_module_layers_1_self_attn_pos_bias_v_to_fp16)[name = tensor("op_563_cast_fp16")]; tensor q_with_bias_v_3_perm_0 = const()[name = tensor("q_with_bias_v_3_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_45_transpose_x_0 = const()[name = tensor("x_45_transpose_x_0"), val = tensor(false)]; tensor x_45_transpose_y_0 = const()[name = tensor("x_45_transpose_y_0"), val = tensor(false)]; - tensor op_565_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40938112))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(41322176))), name = tensor("op_565_to_fp16_palettized"), shape = tensor([1, 8, 128, 375])]; + tensor op_565_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_565_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(41016000))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(41400512))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(41400064)))]; tensor q_with_bias_v_3_cast_fp16 = transpose(perm = q_with_bias_v_3_perm_0, x = var_563_cast_fp16)[name = tensor("transpose_305")]; - tensor x_45_cast_fp16 = matmul(transpose_x = x_45_transpose_x_0, transpose_y = x_45_transpose_y_0, x = q_with_bias_v_3_cast_fp16, y = op_565_to_fp16_palettized)[name = tensor("x_45_cast_fp16")]; + tensor x_45_cast_fp16 = matmul(transpose_x = x_45_transpose_x_0, transpose_y = x_45_transpose_y_0, x = q_with_bias_v_3_cast_fp16, y = op_565_to_fp16_quantized)[name = tensor("x_45_cast_fp16")]; tensor x_47_pad_0 = const()[name = tensor("x_47_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; tensor x_47_mode_0 = const()[name = tensor("x_47_mode_0"), val = tensor("constant")]; tensor const_39_to_fp16 = const()[name = tensor("const_39_to_fp16"), val = tensor(0x0p+0)]; @@ -436,12 +436,12 @@ program(1.0) tensor var_598 = const()[name = tensor("op_598"), val = tensor([1, -1, 1024])]; tensor var_597_cast_fp16 = transpose(perm = var_597_perm_0, x = x_51_cast_fp16)[name = tensor("transpose_301")]; tensor input_95_cast_fp16 = reshape(shape = var_598, x = var_597_cast_fp16)[name = tensor("input_95_cast_fp16")]; - tensor encoder_module_layers_1_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(41322752))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42371392))), name = tensor("encoder_module_layers_1_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_16_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_1_self_attn_linear_out_weight_to_fp16_palettized, x = input_95_cast_fp16)[name = tensor("linear_16_cast_fp16")]; + tensor encoder_module_layers_1_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_1_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(41401344))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42451072))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42449984)))]; + tensor linear_16_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_1_self_attn_linear_out_weight_to_fp16_quantized, x = input_95_cast_fp16)[name = tensor("linear_16_cast_fp16")]; tensor input_99_cast_fp16 = add(x = input_91_cast_fp16, y = linear_16_cast_fp16)[name = tensor("input_99_cast_fp16")]; tensor x_55_axes_0 = const()[name = tensor("x_55_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_1_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_1_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42371968)))]; - tensor encoder_module_layers_1_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_1_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42374080)))]; + tensor encoder_module_layers_1_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_1_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42453184)))]; + tensor encoder_module_layers_1_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_1_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42455296)))]; tensor x_55_cast_fp16 = layer_norm(axes = x_55_axes_0, beta = encoder_module_layers_1_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_1_norm_conv_weight_to_fp16, x = input_99_cast_fp16)[name = tensor("x_55_cast_fp16")]; tensor input_101_perm_0 = const()[name = tensor("input_101_perm_0"), val = tensor([0, 2, 1])]; tensor input_103_pad_type_0 = const()[name = tensor("input_103_pad_type_0"), val = tensor("valid")]; @@ -449,9 +449,9 @@ program(1.0) tensor input_103_pad_0 = const()[name = tensor("input_103_pad_0"), val = tensor([0, 0])]; tensor input_103_dilations_0 = const()[name = tensor("input_103_dilations_0"), val = tensor([1])]; tensor input_103_groups_0 = const()[name = tensor("input_103_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_1_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42376192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44473408))), name = tensor("encoder_module_layers_1_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor([2048, 1024, 1])]; + tensor encoder_module_layers_1_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_1_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42457408))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44556736))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44554624)))]; tensor input_101_cast_fp16 = transpose(perm = input_101_perm_0, x = x_55_cast_fp16)[name = tensor("transpose_300")]; - tensor input_103_cast_fp16 = conv(dilations = input_103_dilations_0, groups = input_103_groups_0, pad = input_103_pad_0, pad_type = input_103_pad_type_0, strides = input_103_strides_0, weight = encoder_module_layers_1_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_101_cast_fp16)[name = tensor("input_103_cast_fp16")]; + tensor input_103_cast_fp16 = conv(dilations = input_103_dilations_0, groups = input_103_groups_0, pad = input_103_pad_0, pad_type = input_103_pad_type_0, strides = input_103_strides_0, weight = encoder_module_layers_1_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_101_cast_fp16)[name = tensor("input_103_cast_fp16")]; tensor x_57_split_num_splits_0 = const()[name = tensor("x_57_split_num_splits_0"), val = tensor(2)]; tensor x_57_split_axis_0 = const()[name = tensor("x_57_split_axis_0"), val = tensor(1)]; tensor x_57_split_cast_fp16_0, tensor x_57_split_cast_fp16_1 = split(axis = x_57_split_axis_0, num_splits = x_57_split_num_splits_0, x = input_103_cast_fp16)[name = tensor("x_57_split_cast_fp16")]; @@ -467,75 +467,75 @@ program(1.0) tensor input_109_strides_0 = const()[name = tensor("input_109_strides_0"), val = tensor([1])]; tensor input_109_pad_0 = const()[name = tensor("input_109_pad_0"), val = tensor([0, 0])]; tensor input_109_dilations_0 = const()[name = tensor("input_109_dilations_0"), val = tensor([1])]; - tensor const_265_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44473984))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44483264))), name = tensor("const_265_to_fp16_palettized"), shape = tensor([1024, 1, 9])]; - tensor const_266_to_fp16 = const()[name = tensor("const_266_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44483840)))]; - tensor input_111_cast_fp16 = conv(bias = const_266_to_fp16, dilations = input_109_dilations_0, groups = input_109_groups_0, pad = input_109_pad_0, pad_type = input_109_pad_type_0, strides = input_109_strides_0, weight = const_265_to_fp16_palettized, x = input_107_cast_fp16)[name = tensor("input_111_cast_fp16")]; + tensor const_265_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_265_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44560896))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44571264))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44570176)))]; + tensor const_266_to_fp16 = const()[name = tensor("const_266_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44573376)))]; + tensor input_111_cast_fp16 = conv(bias = const_266_to_fp16, dilations = input_109_dilations_0, groups = input_109_groups_0, pad = input_109_pad_0, pad_type = input_109_pad_type_0, strides = input_109_strides_0, weight = const_265_to_fp16_quantized, x = input_107_cast_fp16)[name = tensor("input_111_cast_fp16")]; tensor input_113_cast_fp16 = silu(x = input_111_cast_fp16)[name = tensor("input_113_cast_fp16")]; tensor x_59_pad_type_0 = const()[name = tensor("x_59_pad_type_0"), val = tensor("valid")]; tensor x_59_strides_0 = const()[name = tensor("x_59_strides_0"), val = tensor([1])]; tensor x_59_pad_0 = const()[name = tensor("x_59_pad_0"), val = tensor([0, 0])]; tensor x_59_dilations_0 = const()[name = tensor("x_59_dilations_0"), val = tensor([1])]; tensor x_59_groups_0 = const()[name = tensor("x_59_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_1_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44485952))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45534592))), name = tensor("encoder_module_layers_1_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor([1024, 1024, 1])]; - tensor x_59_cast_fp16 = conv(dilations = x_59_dilations_0, groups = x_59_groups_0, pad = x_59_pad_0, pad_type = x_59_pad_type_0, strides = x_59_strides_0, weight = encoder_module_layers_1_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_113_cast_fp16)[name = tensor("x_59_cast_fp16")]; + tensor encoder_module_layers_1_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_1_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44575488))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45625216))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45624128)))]; + tensor x_59_cast_fp16 = conv(dilations = x_59_dilations_0, groups = x_59_groups_0, pad = x_59_pad_0, pad_type = x_59_pad_type_0, strides = x_59_strides_0, weight = encoder_module_layers_1_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_113_cast_fp16)[name = tensor("x_59_cast_fp16")]; tensor input_115_perm_0 = const()[name = tensor("input_115_perm_0"), val = tensor([0, 2, 1])]; tensor input_115_cast_fp16 = transpose(perm = input_115_perm_0, x = x_59_cast_fp16)[name = tensor("transpose_299")]; tensor input_117_cast_fp16 = add(x = input_99_cast_fp16, y = input_115_cast_fp16)[name = tensor("input_117_cast_fp16")]; tensor input_119_axes_0 = const()[name = tensor("input_119_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_1_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_1_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45535168)))]; - tensor encoder_module_layers_1_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_1_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45537280)))]; + tensor encoder_module_layers_1_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_1_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45627328)))]; + tensor encoder_module_layers_1_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_1_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45629440)))]; tensor input_119_cast_fp16 = layer_norm(axes = input_119_axes_0, beta = encoder_module_layers_1_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_1_norm_feed_forward2_weight_to_fp16, x = input_117_cast_fp16)[name = tensor("input_119_cast_fp16")]; - tensor encoder_module_layers_1_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45539392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49733760))), name = tensor("encoder_module_layers_1_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_1_feed_forward2_linear1_weight_to_fp16_palettized, x = input_119_cast_fp16)[name = tensor("linear_17_cast_fp16")]; + tensor encoder_module_layers_1_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_1_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45631552))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49830080))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49825920)))]; + tensor linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_1_feed_forward2_linear1_weight_to_fp16_quantized, x = input_119_cast_fp16)[name = tensor("linear_17_cast_fp16")]; tensor input_123_cast_fp16 = silu(x = linear_17_cast_fp16)[name = tensor("input_123_cast_fp16")]; - tensor encoder_module_layers_1_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49734336))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53928704))), name = tensor("encoder_module_layers_1_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_18_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_1_feed_forward2_linear2_weight_to_fp16_palettized, x = input_123_cast_fp16)[name = tensor("linear_18_cast_fp16")]; + tensor encoder_module_layers_1_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_1_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49838336))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54033792))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54032704)))]; + tensor linear_18_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_1_feed_forward2_linear2_weight_to_fp16_quantized, x = input_123_cast_fp16)[name = tensor("linear_18_cast_fp16")]; tensor var_658_to_fp16 = const()[name = tensor("op_658_to_fp16"), val = tensor(0x1p-1)]; tensor var_659_cast_fp16 = mul(x = linear_18_cast_fp16, y = var_658_to_fp16)[name = tensor("op_659_cast_fp16")]; tensor input_129_cast_fp16 = add(x = input_117_cast_fp16, y = var_659_cast_fp16)[name = tensor("input_129_cast_fp16")]; tensor input_131_axes_0 = const()[name = tensor("input_131_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_1_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_1_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53929280)))]; - tensor encoder_module_layers_1_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_1_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53931392)))]; + tensor encoder_module_layers_1_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_1_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54035904)))]; + tensor encoder_module_layers_1_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_1_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54038016)))]; tensor input_131_cast_fp16 = layer_norm(axes = input_131_axes_0, beta = encoder_module_layers_1_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_1_norm_out_weight_to_fp16, x = input_129_cast_fp16)[name = tensor("input_131_cast_fp16")]; tensor input_133_axes_0 = const()[name = tensor("input_133_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_2_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_2_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53933504)))]; - tensor encoder_module_layers_2_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_2_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53935616)))]; + tensor encoder_module_layers_2_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_2_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54040128)))]; + tensor encoder_module_layers_2_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_2_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54042240)))]; tensor input_133_cast_fp16 = layer_norm(axes = input_133_axes_0, beta = encoder_module_layers_2_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_2_norm_feed_forward1_weight_to_fp16, x = input_131_cast_fp16)[name = tensor("input_133_cast_fp16")]; - tensor encoder_module_layers_2_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53937728))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58132096))), name = tensor("encoder_module_layers_2_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_19_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_2_feed_forward1_linear1_weight_to_fp16_palettized, x = input_133_cast_fp16)[name = tensor("linear_19_cast_fp16")]; + tensor encoder_module_layers_2_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_2_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54044352))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58242880))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58238720)))]; + tensor linear_19_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_2_feed_forward1_linear1_weight_to_fp16_quantized, x = input_133_cast_fp16)[name = tensor("linear_19_cast_fp16")]; tensor input_137_cast_fp16 = silu(x = linear_19_cast_fp16)[name = tensor("input_137_cast_fp16")]; - tensor encoder_module_layers_2_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58132672))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62327040))), name = tensor("encoder_module_layers_2_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_20_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_2_feed_forward1_linear2_weight_to_fp16_palettized, x = input_137_cast_fp16)[name = tensor("linear_20_cast_fp16")]; + tensor encoder_module_layers_2_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_2_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58251136))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62446592))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62445504)))]; + tensor linear_20_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_2_feed_forward1_linear2_weight_to_fp16_quantized, x = input_137_cast_fp16)[name = tensor("linear_20_cast_fp16")]; tensor var_687_to_fp16 = const()[name = tensor("op_687_to_fp16"), val = tensor(0x1p-1)]; tensor var_688_cast_fp16 = mul(x = linear_20_cast_fp16, y = var_687_to_fp16)[name = tensor("op_688_cast_fp16")]; tensor input_143_cast_fp16 = add(x = input_131_cast_fp16, y = var_688_cast_fp16)[name = tensor("input_143_cast_fp16")]; tensor query_5_axes_0 = const()[name = tensor("query_5_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_2_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_2_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62327616)))]; - tensor encoder_module_layers_2_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_2_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62329728)))]; + tensor encoder_module_layers_2_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_2_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62448704)))]; + tensor encoder_module_layers_2_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_2_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62450816)))]; tensor query_5_cast_fp16 = layer_norm(axes = query_5_axes_0, beta = encoder_module_layers_2_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_2_norm_self_att_weight_to_fp16, x = input_143_cast_fp16)[name = tensor("query_5_cast_fp16")]; - tensor encoder_module_layers_2_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62331840))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(63380480))), name = tensor("encoder_module_layers_2_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_21_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_2_self_attn_linear_q_weight_to_fp16_palettized, x = query_5_cast_fp16)[name = tensor("linear_21_cast_fp16")]; + tensor encoder_module_layers_2_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_2_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62452928))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(63502656))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(63501568)))]; + tensor linear_21_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_2_self_attn_linear_q_weight_to_fp16_quantized, x = query_5_cast_fp16)[name = tensor("linear_21_cast_fp16")]; tensor var_704 = const()[name = tensor("op_704"), val = tensor([1, -1, 8, 128])]; tensor q_13_cast_fp16 = reshape(shape = var_704, x = linear_21_cast_fp16)[name = tensor("q_13_cast_fp16")]; - tensor encoder_module_layers_2_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(63381056))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64429696))), name = tensor("encoder_module_layers_2_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_22_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_2_self_attn_linear_k_weight_to_fp16_palettized, x = query_5_cast_fp16)[name = tensor("linear_22_cast_fp16")]; + tensor encoder_module_layers_2_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_2_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(63504768))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64554496))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64553408)))]; + tensor linear_22_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_2_self_attn_linear_k_weight_to_fp16_quantized, x = query_5_cast_fp16)[name = tensor("linear_22_cast_fp16")]; tensor var_708 = const()[name = tensor("op_708"), val = tensor([1, -1, 8, 128])]; tensor k_9_cast_fp16 = reshape(shape = var_708, x = linear_22_cast_fp16)[name = tensor("k_9_cast_fp16")]; - tensor encoder_module_layers_2_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64430272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65478912))), name = tensor("encoder_module_layers_2_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_23_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_2_self_attn_linear_v_weight_to_fp16_palettized, x = query_5_cast_fp16)[name = tensor("linear_23_cast_fp16")]; + tensor encoder_module_layers_2_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_2_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64556608))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65606336))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65605248)))]; + tensor linear_23_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_2_self_attn_linear_v_weight_to_fp16_quantized, x = query_5_cast_fp16)[name = tensor("linear_23_cast_fp16")]; tensor var_712 = const()[name = tensor("op_712"), val = tensor([1, -1, 8, 128])]; tensor v_5_cast_fp16 = reshape(shape = var_712, x = linear_23_cast_fp16)[name = tensor("v_5_cast_fp16")]; tensor value_9_perm_0 = const()[name = tensor("value_9_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor encoder_module_layers_2_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_2_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65479488)))]; + tensor encoder_module_layers_2_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_2_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65608448)))]; tensor var_724_cast_fp16 = add(x = q_13_cast_fp16, y = encoder_module_layers_2_self_attn_pos_bias_u_to_fp16)[name = tensor("op_724_cast_fp16")]; - tensor encoder_module_layers_2_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_2_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65481600)))]; + tensor encoder_module_layers_2_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_2_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65610560)))]; tensor var_726_cast_fp16 = add(x = q_13_cast_fp16, y = encoder_module_layers_2_self_attn_pos_bias_v_to_fp16)[name = tensor("op_726_cast_fp16")]; tensor q_with_bias_v_5_perm_0 = const()[name = tensor("q_with_bias_v_5_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_67_transpose_x_0 = const()[name = tensor("x_67_transpose_x_0"), val = tensor(false)]; tensor x_67_transpose_y_0 = const()[name = tensor("x_67_transpose_y_0"), val = tensor(false)]; - tensor op_728_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65483712))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65867776))), name = tensor("op_728_to_fp16_palettized"), shape = tensor([1, 8, 128, 375])]; + tensor op_728_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_728_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65612672))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65997184))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65996736)))]; tensor q_with_bias_v_5_cast_fp16 = transpose(perm = q_with_bias_v_5_perm_0, x = var_726_cast_fp16)[name = tensor("transpose_298")]; - tensor x_67_cast_fp16 = matmul(transpose_x = x_67_transpose_x_0, transpose_y = x_67_transpose_y_0, x = q_with_bias_v_5_cast_fp16, y = op_728_to_fp16_palettized)[name = tensor("x_67_cast_fp16")]; + tensor x_67_cast_fp16 = matmul(transpose_x = x_67_transpose_x_0, transpose_y = x_67_transpose_y_0, x = q_with_bias_v_5_cast_fp16, y = op_728_to_fp16_quantized)[name = tensor("x_67_cast_fp16")]; tensor x_69_pad_0 = const()[name = tensor("x_69_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; tensor x_69_mode_0 = const()[name = tensor("x_69_mode_0"), val = tensor("constant")]; tensor const_49_to_fp16 = const()[name = tensor("const_49_to_fp16"), val = tensor(0x0p+0)]; @@ -573,12 +573,12 @@ program(1.0) tensor var_761 = const()[name = tensor("op_761"), val = tensor([1, -1, 1024])]; tensor var_760_cast_fp16 = transpose(perm = var_760_perm_0, x = x_73_cast_fp16)[name = tensor("transpose_294")]; tensor input_147_cast_fp16 = reshape(shape = var_761, x = var_760_cast_fp16)[name = tensor("input_147_cast_fp16")]; - tensor encoder_module_layers_2_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65868352))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66916992))), name = tensor("encoder_module_layers_2_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_25_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_2_self_attn_linear_out_weight_to_fp16_palettized, x = input_147_cast_fp16)[name = tensor("linear_25_cast_fp16")]; + tensor encoder_module_layers_2_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_2_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65998016))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67047744))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67046656)))]; + tensor linear_25_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_2_self_attn_linear_out_weight_to_fp16_quantized, x = input_147_cast_fp16)[name = tensor("linear_25_cast_fp16")]; tensor input_151_cast_fp16 = add(x = input_143_cast_fp16, y = linear_25_cast_fp16)[name = tensor("input_151_cast_fp16")]; tensor x_77_axes_0 = const()[name = tensor("x_77_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_2_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_2_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66917568)))]; - tensor encoder_module_layers_2_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_2_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66919680)))]; + tensor encoder_module_layers_2_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_2_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67049856)))]; + tensor encoder_module_layers_2_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_2_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67051968)))]; tensor x_77_cast_fp16 = layer_norm(axes = x_77_axes_0, beta = encoder_module_layers_2_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_2_norm_conv_weight_to_fp16, x = input_151_cast_fp16)[name = tensor("x_77_cast_fp16")]; tensor input_153_perm_0 = const()[name = tensor("input_153_perm_0"), val = tensor([0, 2, 1])]; tensor input_155_pad_type_0 = const()[name = tensor("input_155_pad_type_0"), val = tensor("valid")]; @@ -586,9 +586,9 @@ program(1.0) tensor input_155_pad_0 = const()[name = tensor("input_155_pad_0"), val = tensor([0, 0])]; tensor input_155_dilations_0 = const()[name = tensor("input_155_dilations_0"), val = tensor([1])]; tensor input_155_groups_0 = const()[name = tensor("input_155_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_2_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66921792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69019008))), name = tensor("encoder_module_layers_2_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor([2048, 1024, 1])]; + tensor encoder_module_layers_2_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_2_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67054080))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69153408))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69151296)))]; tensor input_153_cast_fp16 = transpose(perm = input_153_perm_0, x = x_77_cast_fp16)[name = tensor("transpose_293")]; - tensor input_155_cast_fp16 = conv(dilations = input_155_dilations_0, groups = input_155_groups_0, pad = input_155_pad_0, pad_type = input_155_pad_type_0, strides = input_155_strides_0, weight = encoder_module_layers_2_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_153_cast_fp16)[name = tensor("input_155_cast_fp16")]; + tensor input_155_cast_fp16 = conv(dilations = input_155_dilations_0, groups = input_155_groups_0, pad = input_155_pad_0, pad_type = input_155_pad_type_0, strides = input_155_strides_0, weight = encoder_module_layers_2_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_153_cast_fp16)[name = tensor("input_155_cast_fp16")]; tensor x_79_split_num_splits_0 = const()[name = tensor("x_79_split_num_splits_0"), val = tensor(2)]; tensor x_79_split_axis_0 = const()[name = tensor("x_79_split_axis_0"), val = tensor(1)]; tensor x_79_split_cast_fp16_0, tensor x_79_split_cast_fp16_1 = split(axis = x_79_split_axis_0, num_splits = x_79_split_num_splits_0, x = input_155_cast_fp16)[name = tensor("x_79_split_cast_fp16")]; @@ -604,75 +604,75 @@ program(1.0) tensor input_161_strides_0 = const()[name = tensor("input_161_strides_0"), val = tensor([1])]; tensor input_161_pad_0 = const()[name = tensor("input_161_pad_0"), val = tensor([0, 0])]; tensor input_161_dilations_0 = const()[name = tensor("input_161_dilations_0"), val = tensor([1])]; - tensor const_267_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69019584))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69028864))), name = tensor("const_267_to_fp16_palettized"), shape = tensor([1024, 1, 9])]; - tensor const_268_to_fp16 = const()[name = tensor("const_268_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69029440)))]; - tensor input_163_cast_fp16 = conv(bias = const_268_to_fp16, dilations = input_161_dilations_0, groups = input_161_groups_0, pad = input_161_pad_0, pad_type = input_161_pad_type_0, strides = input_161_strides_0, weight = const_267_to_fp16_palettized, x = input_159_cast_fp16)[name = tensor("input_163_cast_fp16")]; + tensor const_267_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_267_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69157568))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69167936))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69166848)))]; + tensor const_268_to_fp16 = const()[name = tensor("const_268_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69170048)))]; + tensor input_163_cast_fp16 = conv(bias = const_268_to_fp16, dilations = input_161_dilations_0, groups = input_161_groups_0, pad = input_161_pad_0, pad_type = input_161_pad_type_0, strides = input_161_strides_0, weight = const_267_to_fp16_quantized, x = input_159_cast_fp16)[name = tensor("input_163_cast_fp16")]; tensor input_165_cast_fp16 = silu(x = input_163_cast_fp16)[name = tensor("input_165_cast_fp16")]; tensor x_81_pad_type_0 = const()[name = tensor("x_81_pad_type_0"), val = tensor("valid")]; tensor x_81_strides_0 = const()[name = tensor("x_81_strides_0"), val = tensor([1])]; tensor x_81_pad_0 = const()[name = tensor("x_81_pad_0"), val = tensor([0, 0])]; tensor x_81_dilations_0 = const()[name = tensor("x_81_dilations_0"), val = tensor([1])]; tensor x_81_groups_0 = const()[name = tensor("x_81_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_2_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69031552))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(70080192))), name = tensor("encoder_module_layers_2_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor([1024, 1024, 1])]; - tensor x_81_cast_fp16 = conv(dilations = x_81_dilations_0, groups = x_81_groups_0, pad = x_81_pad_0, pad_type = x_81_pad_type_0, strides = x_81_strides_0, weight = encoder_module_layers_2_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_165_cast_fp16)[name = tensor("x_81_cast_fp16")]; + tensor encoder_module_layers_2_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_2_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69172160))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(70221888))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(70220800)))]; + tensor x_81_cast_fp16 = conv(dilations = x_81_dilations_0, groups = x_81_groups_0, pad = x_81_pad_0, pad_type = x_81_pad_type_0, strides = x_81_strides_0, weight = encoder_module_layers_2_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_165_cast_fp16)[name = tensor("x_81_cast_fp16")]; tensor input_167_perm_0 = const()[name = tensor("input_167_perm_0"), val = tensor([0, 2, 1])]; tensor input_167_cast_fp16 = transpose(perm = input_167_perm_0, x = x_81_cast_fp16)[name = tensor("transpose_292")]; tensor input_169_cast_fp16 = add(x = input_151_cast_fp16, y = input_167_cast_fp16)[name = tensor("input_169_cast_fp16")]; tensor input_171_axes_0 = const()[name = tensor("input_171_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_2_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_2_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(70080768)))]; - tensor encoder_module_layers_2_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_2_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(70082880)))]; + tensor encoder_module_layers_2_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_2_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(70224000)))]; + tensor encoder_module_layers_2_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_2_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(70226112)))]; tensor input_171_cast_fp16 = layer_norm(axes = input_171_axes_0, beta = encoder_module_layers_2_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_2_norm_feed_forward2_weight_to_fp16, x = input_169_cast_fp16)[name = tensor("input_171_cast_fp16")]; - tensor encoder_module_layers_2_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(70084992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(74279360))), name = tensor("encoder_module_layers_2_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_26_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_2_feed_forward2_linear1_weight_to_fp16_palettized, x = input_171_cast_fp16)[name = tensor("linear_26_cast_fp16")]; + tensor encoder_module_layers_2_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_2_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(70228224))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(74426752))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(74422592)))]; + tensor linear_26_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_2_feed_forward2_linear1_weight_to_fp16_quantized, x = input_171_cast_fp16)[name = tensor("linear_26_cast_fp16")]; tensor input_175_cast_fp16 = silu(x = linear_26_cast_fp16)[name = tensor("input_175_cast_fp16")]; - tensor encoder_module_layers_2_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(74279936))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78474304))), name = tensor("encoder_module_layers_2_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_27_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_2_feed_forward2_linear2_weight_to_fp16_palettized, x = input_175_cast_fp16)[name = tensor("linear_27_cast_fp16")]; + tensor encoder_module_layers_2_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_2_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(74435008))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78630464))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78629376)))]; + tensor linear_27_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_2_feed_forward2_linear2_weight_to_fp16_quantized, x = input_175_cast_fp16)[name = tensor("linear_27_cast_fp16")]; tensor var_821_to_fp16 = const()[name = tensor("op_821_to_fp16"), val = tensor(0x1p-1)]; tensor var_822_cast_fp16 = mul(x = linear_27_cast_fp16, y = var_821_to_fp16)[name = tensor("op_822_cast_fp16")]; tensor input_181_cast_fp16 = add(x = input_169_cast_fp16, y = var_822_cast_fp16)[name = tensor("input_181_cast_fp16")]; tensor input_183_axes_0 = const()[name = tensor("input_183_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_2_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_2_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78474880)))]; - tensor encoder_module_layers_2_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_2_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78476992)))]; + tensor encoder_module_layers_2_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_2_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78632576)))]; + tensor encoder_module_layers_2_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_2_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78634688)))]; tensor input_183_cast_fp16 = layer_norm(axes = input_183_axes_0, beta = encoder_module_layers_2_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_2_norm_out_weight_to_fp16, x = input_181_cast_fp16)[name = tensor("input_183_cast_fp16")]; tensor input_185_axes_0 = const()[name = tensor("input_185_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_3_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_3_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78479104)))]; - tensor encoder_module_layers_3_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_3_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78481216)))]; + tensor encoder_module_layers_3_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_3_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78636800)))]; + tensor encoder_module_layers_3_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_3_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78638912)))]; tensor input_185_cast_fp16 = layer_norm(axes = input_185_axes_0, beta = encoder_module_layers_3_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_3_norm_feed_forward1_weight_to_fp16, x = input_183_cast_fp16)[name = tensor("input_185_cast_fp16")]; - tensor encoder_module_layers_3_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78483328))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82677696))), name = tensor("encoder_module_layers_3_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_28_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_3_feed_forward1_linear1_weight_to_fp16_palettized, x = input_185_cast_fp16)[name = tensor("linear_28_cast_fp16")]; + tensor encoder_module_layers_3_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_3_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78641024))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82839552))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82835392)))]; + tensor linear_28_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_3_feed_forward1_linear1_weight_to_fp16_quantized, x = input_185_cast_fp16)[name = tensor("linear_28_cast_fp16")]; tensor input_189_cast_fp16 = silu(x = linear_28_cast_fp16)[name = tensor("input_189_cast_fp16")]; - tensor encoder_module_layers_3_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82678272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86872640))), name = tensor("encoder_module_layers_3_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_29_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_3_feed_forward1_linear2_weight_to_fp16_palettized, x = input_189_cast_fp16)[name = tensor("linear_29_cast_fp16")]; + tensor encoder_module_layers_3_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_3_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82847808))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87043264))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87042176)))]; + tensor linear_29_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_3_feed_forward1_linear2_weight_to_fp16_quantized, x = input_189_cast_fp16)[name = tensor("linear_29_cast_fp16")]; tensor var_850_to_fp16 = const()[name = tensor("op_850_to_fp16"), val = tensor(0x1p-1)]; tensor var_851_cast_fp16 = mul(x = linear_29_cast_fp16, y = var_850_to_fp16)[name = tensor("op_851_cast_fp16")]; tensor input_195_cast_fp16 = add(x = input_183_cast_fp16, y = var_851_cast_fp16)[name = tensor("input_195_cast_fp16")]; tensor query_7_axes_0 = const()[name = tensor("query_7_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_3_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_3_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86873216)))]; - tensor encoder_module_layers_3_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_3_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86875328)))]; + tensor encoder_module_layers_3_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_3_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87045376)))]; + tensor encoder_module_layers_3_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_3_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87047488)))]; tensor query_7_cast_fp16 = layer_norm(axes = query_7_axes_0, beta = encoder_module_layers_3_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_3_norm_self_att_weight_to_fp16, x = input_195_cast_fp16)[name = tensor("query_7_cast_fp16")]; - tensor encoder_module_layers_3_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86877440))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87926080))), name = tensor("encoder_module_layers_3_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_30_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_3_self_attn_linear_q_weight_to_fp16_palettized, x = query_7_cast_fp16)[name = tensor("linear_30_cast_fp16")]; + tensor encoder_module_layers_3_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_3_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87049600))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88099328))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88098240)))]; + tensor linear_30_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_3_self_attn_linear_q_weight_to_fp16_quantized, x = query_7_cast_fp16)[name = tensor("linear_30_cast_fp16")]; tensor var_867 = const()[name = tensor("op_867"), val = tensor([1, -1, 8, 128])]; tensor q_19_cast_fp16 = reshape(shape = var_867, x = linear_30_cast_fp16)[name = tensor("q_19_cast_fp16")]; - tensor encoder_module_layers_3_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87926656))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88975296))), name = tensor("encoder_module_layers_3_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_31_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_3_self_attn_linear_k_weight_to_fp16_palettized, x = query_7_cast_fp16)[name = tensor("linear_31_cast_fp16")]; + tensor encoder_module_layers_3_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_3_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88101440))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89151168))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89150080)))]; + tensor linear_31_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_3_self_attn_linear_k_weight_to_fp16_quantized, x = query_7_cast_fp16)[name = tensor("linear_31_cast_fp16")]; tensor var_871 = const()[name = tensor("op_871"), val = tensor([1, -1, 8, 128])]; tensor k_13_cast_fp16 = reshape(shape = var_871, x = linear_31_cast_fp16)[name = tensor("k_13_cast_fp16")]; - tensor encoder_module_layers_3_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88975872))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90024512))), name = tensor("encoder_module_layers_3_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_32_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_3_self_attn_linear_v_weight_to_fp16_palettized, x = query_7_cast_fp16)[name = tensor("linear_32_cast_fp16")]; + tensor encoder_module_layers_3_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_3_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89153280))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90203008))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90201920)))]; + tensor linear_32_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_3_self_attn_linear_v_weight_to_fp16_quantized, x = query_7_cast_fp16)[name = tensor("linear_32_cast_fp16")]; tensor var_875 = const()[name = tensor("op_875"), val = tensor([1, -1, 8, 128])]; tensor v_7_cast_fp16 = reshape(shape = var_875, x = linear_32_cast_fp16)[name = tensor("v_7_cast_fp16")]; tensor value_11_perm_0 = const()[name = tensor("value_11_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor encoder_module_layers_3_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_3_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90025088)))]; + tensor encoder_module_layers_3_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_3_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90205120)))]; tensor var_887_cast_fp16 = add(x = q_19_cast_fp16, y = encoder_module_layers_3_self_attn_pos_bias_u_to_fp16)[name = tensor("op_887_cast_fp16")]; - tensor encoder_module_layers_3_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_3_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90027200)))]; + tensor encoder_module_layers_3_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_3_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90207232)))]; tensor var_889_cast_fp16 = add(x = q_19_cast_fp16, y = encoder_module_layers_3_self_attn_pos_bias_v_to_fp16)[name = tensor("op_889_cast_fp16")]; tensor q_with_bias_v_7_perm_0 = const()[name = tensor("q_with_bias_v_7_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_89_transpose_x_0 = const()[name = tensor("x_89_transpose_x_0"), val = tensor(false)]; tensor x_89_transpose_y_0 = const()[name = tensor("x_89_transpose_y_0"), val = tensor(false)]; - tensor op_891_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90029312))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90413376))), name = tensor("op_891_to_fp16_palettized"), shape = tensor([1, 8, 128, 375])]; + tensor op_891_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_891_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90209344))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90593856))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90593408)))]; tensor q_with_bias_v_7_cast_fp16 = transpose(perm = q_with_bias_v_7_perm_0, x = var_889_cast_fp16)[name = tensor("transpose_291")]; - tensor x_89_cast_fp16 = matmul(transpose_x = x_89_transpose_x_0, transpose_y = x_89_transpose_y_0, x = q_with_bias_v_7_cast_fp16, y = op_891_to_fp16_palettized)[name = tensor("x_89_cast_fp16")]; + tensor x_89_cast_fp16 = matmul(transpose_x = x_89_transpose_x_0, transpose_y = x_89_transpose_y_0, x = q_with_bias_v_7_cast_fp16, y = op_891_to_fp16_quantized)[name = tensor("x_89_cast_fp16")]; tensor x_91_pad_0 = const()[name = tensor("x_91_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; tensor x_91_mode_0 = const()[name = tensor("x_91_mode_0"), val = tensor("constant")]; tensor const_59_to_fp16 = const()[name = tensor("const_59_to_fp16"), val = tensor(0x0p+0)]; @@ -710,12 +710,12 @@ program(1.0) tensor var_924 = const()[name = tensor("op_924"), val = tensor([1, -1, 1024])]; tensor var_923_cast_fp16 = transpose(perm = var_923_perm_0, x = x_95_cast_fp16)[name = tensor("transpose_287")]; tensor input_199_cast_fp16 = reshape(shape = var_924, x = var_923_cast_fp16)[name = tensor("input_199_cast_fp16")]; - tensor encoder_module_layers_3_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90413952))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(91462592))), name = tensor("encoder_module_layers_3_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_34_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_3_self_attn_linear_out_weight_to_fp16_palettized, x = input_199_cast_fp16)[name = tensor("linear_34_cast_fp16")]; + tensor encoder_module_layers_3_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_3_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90594688))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(91644416))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(91643328)))]; + tensor linear_34_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_3_self_attn_linear_out_weight_to_fp16_quantized, x = input_199_cast_fp16)[name = tensor("linear_34_cast_fp16")]; tensor input_203_cast_fp16 = add(x = input_195_cast_fp16, y = linear_34_cast_fp16)[name = tensor("input_203_cast_fp16")]; tensor x_99_axes_0 = const()[name = tensor("x_99_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_3_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_3_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(91463168)))]; - tensor encoder_module_layers_3_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_3_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(91465280)))]; + tensor encoder_module_layers_3_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_3_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(91646528)))]; + tensor encoder_module_layers_3_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_3_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(91648640)))]; tensor x_99_cast_fp16 = layer_norm(axes = x_99_axes_0, beta = encoder_module_layers_3_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_3_norm_conv_weight_to_fp16, x = input_203_cast_fp16)[name = tensor("x_99_cast_fp16")]; tensor input_205_perm_0 = const()[name = tensor("input_205_perm_0"), val = tensor([0, 2, 1])]; tensor input_207_pad_type_0 = const()[name = tensor("input_207_pad_type_0"), val = tensor("valid")]; @@ -723,9 +723,9 @@ program(1.0) tensor input_207_pad_0 = const()[name = tensor("input_207_pad_0"), val = tensor([0, 0])]; tensor input_207_dilations_0 = const()[name = tensor("input_207_dilations_0"), val = tensor([1])]; tensor input_207_groups_0 = const()[name = tensor("input_207_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_3_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(91467392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93564608))), name = tensor("encoder_module_layers_3_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor([2048, 1024, 1])]; + tensor encoder_module_layers_3_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_3_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(91650752))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93750080))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93747968)))]; tensor input_205_cast_fp16 = transpose(perm = input_205_perm_0, x = x_99_cast_fp16)[name = tensor("transpose_286")]; - tensor input_207_cast_fp16 = conv(dilations = input_207_dilations_0, groups = input_207_groups_0, pad = input_207_pad_0, pad_type = input_207_pad_type_0, strides = input_207_strides_0, weight = encoder_module_layers_3_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_205_cast_fp16)[name = tensor("input_207_cast_fp16")]; + tensor input_207_cast_fp16 = conv(dilations = input_207_dilations_0, groups = input_207_groups_0, pad = input_207_pad_0, pad_type = input_207_pad_type_0, strides = input_207_strides_0, weight = encoder_module_layers_3_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_205_cast_fp16)[name = tensor("input_207_cast_fp16")]; tensor x_101_split_num_splits_0 = const()[name = tensor("x_101_split_num_splits_0"), val = tensor(2)]; tensor x_101_split_axis_0 = const()[name = tensor("x_101_split_axis_0"), val = tensor(1)]; tensor x_101_split_cast_fp16_0, tensor x_101_split_cast_fp16_1 = split(axis = x_101_split_axis_0, num_splits = x_101_split_num_splits_0, x = input_207_cast_fp16)[name = tensor("x_101_split_cast_fp16")]; @@ -741,75 +741,75 @@ program(1.0) tensor input_213_strides_0 = const()[name = tensor("input_213_strides_0"), val = tensor([1])]; tensor input_213_pad_0 = const()[name = tensor("input_213_pad_0"), val = tensor([0, 0])]; tensor input_213_dilations_0 = const()[name = tensor("input_213_dilations_0"), val = tensor([1])]; - tensor const_269_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93565184))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93574464))), name = tensor("const_269_to_fp16_palettized"), shape = tensor([1024, 1, 9])]; - tensor const_270_to_fp16 = const()[name = tensor("const_270_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93575040)))]; - tensor input_215_cast_fp16 = conv(bias = const_270_to_fp16, dilations = input_213_dilations_0, groups = input_213_groups_0, pad = input_213_pad_0, pad_type = input_213_pad_type_0, strides = input_213_strides_0, weight = const_269_to_fp16_palettized, x = input_211_cast_fp16)[name = tensor("input_215_cast_fp16")]; + tensor const_269_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_269_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93754240))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93764608))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93763520)))]; + tensor const_270_to_fp16 = const()[name = tensor("const_270_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93766720)))]; + tensor input_215_cast_fp16 = conv(bias = const_270_to_fp16, dilations = input_213_dilations_0, groups = input_213_groups_0, pad = input_213_pad_0, pad_type = input_213_pad_type_0, strides = input_213_strides_0, weight = const_269_to_fp16_quantized, x = input_211_cast_fp16)[name = tensor("input_215_cast_fp16")]; tensor input_217_cast_fp16 = silu(x = input_215_cast_fp16)[name = tensor("input_217_cast_fp16")]; tensor x_103_pad_type_0 = const()[name = tensor("x_103_pad_type_0"), val = tensor("valid")]; tensor x_103_strides_0 = const()[name = tensor("x_103_strides_0"), val = tensor([1])]; tensor x_103_pad_0 = const()[name = tensor("x_103_pad_0"), val = tensor([0, 0])]; tensor x_103_dilations_0 = const()[name = tensor("x_103_dilations_0"), val = tensor([1])]; tensor x_103_groups_0 = const()[name = tensor("x_103_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_3_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93577152))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94625792))), name = tensor("encoder_module_layers_3_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor([1024, 1024, 1])]; - tensor x_103_cast_fp16 = conv(dilations = x_103_dilations_0, groups = x_103_groups_0, pad = x_103_pad_0, pad_type = x_103_pad_type_0, strides = x_103_strides_0, weight = encoder_module_layers_3_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_217_cast_fp16)[name = tensor("x_103_cast_fp16")]; + tensor encoder_module_layers_3_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_3_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93768832))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94818560))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94817472)))]; + tensor x_103_cast_fp16 = conv(dilations = x_103_dilations_0, groups = x_103_groups_0, pad = x_103_pad_0, pad_type = x_103_pad_type_0, strides = x_103_strides_0, weight = encoder_module_layers_3_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_217_cast_fp16)[name = tensor("x_103_cast_fp16")]; tensor input_219_perm_0 = const()[name = tensor("input_219_perm_0"), val = tensor([0, 2, 1])]; tensor input_219_cast_fp16 = transpose(perm = input_219_perm_0, x = x_103_cast_fp16)[name = tensor("transpose_285")]; tensor input_221_cast_fp16 = add(x = input_203_cast_fp16, y = input_219_cast_fp16)[name = tensor("input_221_cast_fp16")]; tensor input_223_axes_0 = const()[name = tensor("input_223_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_3_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_3_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94626368)))]; - tensor encoder_module_layers_3_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_3_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94628480)))]; + tensor encoder_module_layers_3_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_3_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94820672)))]; + tensor encoder_module_layers_3_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_3_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94822784)))]; tensor input_223_cast_fp16 = layer_norm(axes = input_223_axes_0, beta = encoder_module_layers_3_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_3_norm_feed_forward2_weight_to_fp16, x = input_221_cast_fp16)[name = tensor("input_223_cast_fp16")]; - tensor encoder_module_layers_3_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94630592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98824960))), name = tensor("encoder_module_layers_3_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_35_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_3_feed_forward2_linear1_weight_to_fp16_palettized, x = input_223_cast_fp16)[name = tensor("linear_35_cast_fp16")]; + tensor encoder_module_layers_3_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_3_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94824896))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99023424))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99019264)))]; + tensor linear_35_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_3_feed_forward2_linear1_weight_to_fp16_quantized, x = input_223_cast_fp16)[name = tensor("linear_35_cast_fp16")]; tensor input_227_cast_fp16 = silu(x = linear_35_cast_fp16)[name = tensor("input_227_cast_fp16")]; - tensor encoder_module_layers_3_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98825536))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(103019904))), name = tensor("encoder_module_layers_3_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_36_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_3_feed_forward2_linear2_weight_to_fp16_palettized, x = input_227_cast_fp16)[name = tensor("linear_36_cast_fp16")]; + tensor encoder_module_layers_3_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_3_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99031680))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(103227136))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(103226048)))]; + tensor linear_36_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_3_feed_forward2_linear2_weight_to_fp16_quantized, x = input_227_cast_fp16)[name = tensor("linear_36_cast_fp16")]; tensor var_984_to_fp16 = const()[name = tensor("op_984_to_fp16"), val = tensor(0x1p-1)]; tensor var_985_cast_fp16 = mul(x = linear_36_cast_fp16, y = var_984_to_fp16)[name = tensor("op_985_cast_fp16")]; tensor input_233_cast_fp16 = add(x = input_221_cast_fp16, y = var_985_cast_fp16)[name = tensor("input_233_cast_fp16")]; tensor input_235_axes_0 = const()[name = tensor("input_235_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_3_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_3_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(103020480)))]; - tensor encoder_module_layers_3_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_3_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(103022592)))]; + tensor encoder_module_layers_3_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_3_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(103229248)))]; + tensor encoder_module_layers_3_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_3_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(103231360)))]; tensor input_235_cast_fp16 = layer_norm(axes = input_235_axes_0, beta = encoder_module_layers_3_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_3_norm_out_weight_to_fp16, x = input_233_cast_fp16)[name = tensor("input_235_cast_fp16")]; tensor input_237_axes_0 = const()[name = tensor("input_237_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_4_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_4_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(103024704)))]; - tensor encoder_module_layers_4_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_4_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(103026816)))]; + tensor encoder_module_layers_4_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_4_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(103233472)))]; + tensor encoder_module_layers_4_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_4_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(103235584)))]; tensor input_237_cast_fp16 = layer_norm(axes = input_237_axes_0, beta = encoder_module_layers_4_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_4_norm_feed_forward1_weight_to_fp16, x = input_235_cast_fp16)[name = tensor("input_237_cast_fp16")]; - tensor encoder_module_layers_4_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(103028928))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107223296))), name = tensor("encoder_module_layers_4_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_37_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_4_feed_forward1_linear1_weight_to_fp16_palettized, x = input_237_cast_fp16)[name = tensor("linear_37_cast_fp16")]; + tensor encoder_module_layers_4_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_4_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(103237696))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107436224))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107432064)))]; + tensor linear_37_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_4_feed_forward1_linear1_weight_to_fp16_quantized, x = input_237_cast_fp16)[name = tensor("linear_37_cast_fp16")]; tensor input_241_cast_fp16 = silu(x = linear_37_cast_fp16)[name = tensor("input_241_cast_fp16")]; - tensor encoder_module_layers_4_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107223872))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111418240))), name = tensor("encoder_module_layers_4_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_38_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_4_feed_forward1_linear2_weight_to_fp16_palettized, x = input_241_cast_fp16)[name = tensor("linear_38_cast_fp16")]; + tensor encoder_module_layers_4_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_4_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107444480))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111639936))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111638848)))]; + tensor linear_38_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_4_feed_forward1_linear2_weight_to_fp16_quantized, x = input_241_cast_fp16)[name = tensor("linear_38_cast_fp16")]; tensor var_1013_to_fp16 = const()[name = tensor("op_1013_to_fp16"), val = tensor(0x1p-1)]; tensor var_1014_cast_fp16 = mul(x = linear_38_cast_fp16, y = var_1013_to_fp16)[name = tensor("op_1014_cast_fp16")]; tensor input_247_cast_fp16 = add(x = input_235_cast_fp16, y = var_1014_cast_fp16)[name = tensor("input_247_cast_fp16")]; tensor query_9_axes_0 = const()[name = tensor("query_9_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_4_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_4_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111418816)))]; - tensor encoder_module_layers_4_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_4_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111420928)))]; + tensor encoder_module_layers_4_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_4_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111642048)))]; + tensor encoder_module_layers_4_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_4_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111644160)))]; tensor query_9_cast_fp16 = layer_norm(axes = query_9_axes_0, beta = encoder_module_layers_4_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_4_norm_self_att_weight_to_fp16, x = input_247_cast_fp16)[name = tensor("query_9_cast_fp16")]; - tensor encoder_module_layers_4_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111423040))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112471680))), name = tensor("encoder_module_layers_4_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_39_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_4_self_attn_linear_q_weight_to_fp16_palettized, x = query_9_cast_fp16)[name = tensor("linear_39_cast_fp16")]; + tensor encoder_module_layers_4_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_4_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111646272))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112696000))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112694912)))]; + tensor linear_39_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_4_self_attn_linear_q_weight_to_fp16_quantized, x = query_9_cast_fp16)[name = tensor("linear_39_cast_fp16")]; tensor var_1030 = const()[name = tensor("op_1030"), val = tensor([1, -1, 8, 128])]; tensor q_25_cast_fp16 = reshape(shape = var_1030, x = linear_39_cast_fp16)[name = tensor("q_25_cast_fp16")]; - tensor encoder_module_layers_4_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112472256))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113520896))), name = tensor("encoder_module_layers_4_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_40_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_4_self_attn_linear_k_weight_to_fp16_palettized, x = query_9_cast_fp16)[name = tensor("linear_40_cast_fp16")]; + tensor encoder_module_layers_4_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_4_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112698112))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113747840))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113746752)))]; + tensor linear_40_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_4_self_attn_linear_k_weight_to_fp16_quantized, x = query_9_cast_fp16)[name = tensor("linear_40_cast_fp16")]; tensor var_1034 = const()[name = tensor("op_1034"), val = tensor([1, -1, 8, 128])]; tensor k_17_cast_fp16 = reshape(shape = var_1034, x = linear_40_cast_fp16)[name = tensor("k_17_cast_fp16")]; - tensor encoder_module_layers_4_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113521472))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(114570112))), name = tensor("encoder_module_layers_4_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_41_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_4_self_attn_linear_v_weight_to_fp16_palettized, x = query_9_cast_fp16)[name = tensor("linear_41_cast_fp16")]; + tensor encoder_module_layers_4_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_4_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113749952))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(114799680))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(114798592)))]; + tensor linear_41_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_4_self_attn_linear_v_weight_to_fp16_quantized, x = query_9_cast_fp16)[name = tensor("linear_41_cast_fp16")]; tensor var_1038 = const()[name = tensor("op_1038"), val = tensor([1, -1, 8, 128])]; tensor v_9_cast_fp16 = reshape(shape = var_1038, x = linear_41_cast_fp16)[name = tensor("v_9_cast_fp16")]; tensor value_13_perm_0 = const()[name = tensor("value_13_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor encoder_module_layers_4_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_4_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(114570688)))]; + tensor encoder_module_layers_4_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_4_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(114801792)))]; tensor var_1050_cast_fp16 = add(x = q_25_cast_fp16, y = encoder_module_layers_4_self_attn_pos_bias_u_to_fp16)[name = tensor("op_1050_cast_fp16")]; - tensor encoder_module_layers_4_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_4_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(114572800)))]; + tensor encoder_module_layers_4_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_4_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(114803904)))]; tensor var_1052_cast_fp16 = add(x = q_25_cast_fp16, y = encoder_module_layers_4_self_attn_pos_bias_v_to_fp16)[name = tensor("op_1052_cast_fp16")]; tensor q_with_bias_v_9_perm_0 = const()[name = tensor("q_with_bias_v_9_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_111_transpose_x_0 = const()[name = tensor("x_111_transpose_x_0"), val = tensor(false)]; tensor x_111_transpose_y_0 = const()[name = tensor("x_111_transpose_y_0"), val = tensor(false)]; - tensor op_1054_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(114574912))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(114958976))), name = tensor("op_1054_to_fp16_palettized"), shape = tensor([1, 8, 128, 375])]; + tensor op_1054_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_1054_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(114806016))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(115190528))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(115190080)))]; tensor q_with_bias_v_9_cast_fp16 = transpose(perm = q_with_bias_v_9_perm_0, x = var_1052_cast_fp16)[name = tensor("transpose_284")]; - tensor x_111_cast_fp16 = matmul(transpose_x = x_111_transpose_x_0, transpose_y = x_111_transpose_y_0, x = q_with_bias_v_9_cast_fp16, y = op_1054_to_fp16_palettized)[name = tensor("x_111_cast_fp16")]; + tensor x_111_cast_fp16 = matmul(transpose_x = x_111_transpose_x_0, transpose_y = x_111_transpose_y_0, x = q_with_bias_v_9_cast_fp16, y = op_1054_to_fp16_quantized)[name = tensor("x_111_cast_fp16")]; tensor x_113_pad_0 = const()[name = tensor("x_113_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; tensor x_113_mode_0 = const()[name = tensor("x_113_mode_0"), val = tensor("constant")]; tensor const_69_to_fp16 = const()[name = tensor("const_69_to_fp16"), val = tensor(0x0p+0)]; @@ -847,12 +847,12 @@ program(1.0) tensor var_1087 = const()[name = tensor("op_1087"), val = tensor([1, -1, 1024])]; tensor var_1086_cast_fp16 = transpose(perm = var_1086_perm_0, x = x_117_cast_fp16)[name = tensor("transpose_280")]; tensor input_251_cast_fp16 = reshape(shape = var_1087, x = var_1086_cast_fp16)[name = tensor("input_251_cast_fp16")]; - tensor encoder_module_layers_4_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(114959552))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(116008192))), name = tensor("encoder_module_layers_4_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_43_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_4_self_attn_linear_out_weight_to_fp16_palettized, x = input_251_cast_fp16)[name = tensor("linear_43_cast_fp16")]; + tensor encoder_module_layers_4_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_4_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(115191360))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(116241088))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(116240000)))]; + tensor linear_43_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_4_self_attn_linear_out_weight_to_fp16_quantized, x = input_251_cast_fp16)[name = tensor("linear_43_cast_fp16")]; tensor input_255_cast_fp16 = add(x = input_247_cast_fp16, y = linear_43_cast_fp16)[name = tensor("input_255_cast_fp16")]; tensor x_121_axes_0 = const()[name = tensor("x_121_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_4_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_4_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(116008768)))]; - tensor encoder_module_layers_4_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_4_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(116010880)))]; + tensor encoder_module_layers_4_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_4_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(116243200)))]; + tensor encoder_module_layers_4_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_4_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(116245312)))]; tensor x_121_cast_fp16 = layer_norm(axes = x_121_axes_0, beta = encoder_module_layers_4_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_4_norm_conv_weight_to_fp16, x = input_255_cast_fp16)[name = tensor("x_121_cast_fp16")]; tensor input_257_perm_0 = const()[name = tensor("input_257_perm_0"), val = tensor([0, 2, 1])]; tensor input_259_pad_type_0 = const()[name = tensor("input_259_pad_type_0"), val = tensor("valid")]; @@ -860,9 +860,9 @@ program(1.0) tensor input_259_pad_0 = const()[name = tensor("input_259_pad_0"), val = tensor([0, 0])]; tensor input_259_dilations_0 = const()[name = tensor("input_259_dilations_0"), val = tensor([1])]; tensor input_259_groups_0 = const()[name = tensor("input_259_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_4_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(116012992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118110208))), name = tensor("encoder_module_layers_4_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor([2048, 1024, 1])]; + tensor encoder_module_layers_4_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_4_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(116247424))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118346752))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118344640)))]; tensor input_257_cast_fp16 = transpose(perm = input_257_perm_0, x = x_121_cast_fp16)[name = tensor("transpose_279")]; - tensor input_259_cast_fp16 = conv(dilations = input_259_dilations_0, groups = input_259_groups_0, pad = input_259_pad_0, pad_type = input_259_pad_type_0, strides = input_259_strides_0, weight = encoder_module_layers_4_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_257_cast_fp16)[name = tensor("input_259_cast_fp16")]; + tensor input_259_cast_fp16 = conv(dilations = input_259_dilations_0, groups = input_259_groups_0, pad = input_259_pad_0, pad_type = input_259_pad_type_0, strides = input_259_strides_0, weight = encoder_module_layers_4_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_257_cast_fp16)[name = tensor("input_259_cast_fp16")]; tensor x_123_split_num_splits_0 = const()[name = tensor("x_123_split_num_splits_0"), val = tensor(2)]; tensor x_123_split_axis_0 = const()[name = tensor("x_123_split_axis_0"), val = tensor(1)]; tensor x_123_split_cast_fp16_0, tensor x_123_split_cast_fp16_1 = split(axis = x_123_split_axis_0, num_splits = x_123_split_num_splits_0, x = input_259_cast_fp16)[name = tensor("x_123_split_cast_fp16")]; @@ -878,75 +878,75 @@ program(1.0) tensor input_265_strides_0 = const()[name = tensor("input_265_strides_0"), val = tensor([1])]; tensor input_265_pad_0 = const()[name = tensor("input_265_pad_0"), val = tensor([0, 0])]; tensor input_265_dilations_0 = const()[name = tensor("input_265_dilations_0"), val = tensor([1])]; - tensor const_271_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118110784))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118120064))), name = tensor("const_271_to_fp16_palettized"), shape = tensor([1024, 1, 9])]; - tensor const_272_to_fp16 = const()[name = tensor("const_272_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118120640)))]; - tensor input_267_cast_fp16 = conv(bias = const_272_to_fp16, dilations = input_265_dilations_0, groups = input_265_groups_0, pad = input_265_pad_0, pad_type = input_265_pad_type_0, strides = input_265_strides_0, weight = const_271_to_fp16_palettized, x = input_263_cast_fp16)[name = tensor("input_267_cast_fp16")]; + tensor const_271_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_271_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118350912))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118361280))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118360192)))]; + tensor const_272_to_fp16 = const()[name = tensor("const_272_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118363392)))]; + tensor input_267_cast_fp16 = conv(bias = const_272_to_fp16, dilations = input_265_dilations_0, groups = input_265_groups_0, pad = input_265_pad_0, pad_type = input_265_pad_type_0, strides = input_265_strides_0, weight = const_271_to_fp16_quantized, x = input_263_cast_fp16)[name = tensor("input_267_cast_fp16")]; tensor input_269_cast_fp16 = silu(x = input_267_cast_fp16)[name = tensor("input_269_cast_fp16")]; tensor x_125_pad_type_0 = const()[name = tensor("x_125_pad_type_0"), val = tensor("valid")]; tensor x_125_strides_0 = const()[name = tensor("x_125_strides_0"), val = tensor([1])]; tensor x_125_pad_0 = const()[name = tensor("x_125_pad_0"), val = tensor([0, 0])]; tensor x_125_dilations_0 = const()[name = tensor("x_125_dilations_0"), val = tensor([1])]; tensor x_125_groups_0 = const()[name = tensor("x_125_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_4_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118122752))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119171392))), name = tensor("encoder_module_layers_4_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor([1024, 1024, 1])]; - tensor x_125_cast_fp16 = conv(dilations = x_125_dilations_0, groups = x_125_groups_0, pad = x_125_pad_0, pad_type = x_125_pad_type_0, strides = x_125_strides_0, weight = encoder_module_layers_4_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_269_cast_fp16)[name = tensor("x_125_cast_fp16")]; + tensor encoder_module_layers_4_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_4_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118365504))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119415232))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119414144)))]; + tensor x_125_cast_fp16 = conv(dilations = x_125_dilations_0, groups = x_125_groups_0, pad = x_125_pad_0, pad_type = x_125_pad_type_0, strides = x_125_strides_0, weight = encoder_module_layers_4_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_269_cast_fp16)[name = tensor("x_125_cast_fp16")]; tensor input_271_perm_0 = const()[name = tensor("input_271_perm_0"), val = tensor([0, 2, 1])]; tensor input_271_cast_fp16 = transpose(perm = input_271_perm_0, x = x_125_cast_fp16)[name = tensor("transpose_278")]; tensor input_273_cast_fp16 = add(x = input_255_cast_fp16, y = input_271_cast_fp16)[name = tensor("input_273_cast_fp16")]; tensor input_275_axes_0 = const()[name = tensor("input_275_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_4_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_4_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119171968)))]; - tensor encoder_module_layers_4_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_4_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119174080)))]; + tensor encoder_module_layers_4_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_4_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119417344)))]; + tensor encoder_module_layers_4_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_4_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119419456)))]; tensor input_275_cast_fp16 = layer_norm(axes = input_275_axes_0, beta = encoder_module_layers_4_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_4_norm_feed_forward2_weight_to_fp16, x = input_273_cast_fp16)[name = tensor("input_275_cast_fp16")]; - tensor encoder_module_layers_4_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119176192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123370560))), name = tensor("encoder_module_layers_4_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_44_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_4_feed_forward2_linear1_weight_to_fp16_palettized, x = input_275_cast_fp16)[name = tensor("linear_44_cast_fp16")]; + tensor encoder_module_layers_4_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_4_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119421568))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123620096))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123615936)))]; + tensor linear_44_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_4_feed_forward2_linear1_weight_to_fp16_quantized, x = input_275_cast_fp16)[name = tensor("linear_44_cast_fp16")]; tensor input_279_cast_fp16 = silu(x = linear_44_cast_fp16)[name = tensor("input_279_cast_fp16")]; - tensor encoder_module_layers_4_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123371136))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127565504))), name = tensor("encoder_module_layers_4_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_45_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_4_feed_forward2_linear2_weight_to_fp16_palettized, x = input_279_cast_fp16)[name = tensor("linear_45_cast_fp16")]; + tensor encoder_module_layers_4_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_4_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123628352))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127823808))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127822720)))]; + tensor linear_45_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_4_feed_forward2_linear2_weight_to_fp16_quantized, x = input_279_cast_fp16)[name = tensor("linear_45_cast_fp16")]; tensor var_1147_to_fp16 = const()[name = tensor("op_1147_to_fp16"), val = tensor(0x1p-1)]; tensor var_1148_cast_fp16 = mul(x = linear_45_cast_fp16, y = var_1147_to_fp16)[name = tensor("op_1148_cast_fp16")]; tensor input_285_cast_fp16 = add(x = input_273_cast_fp16, y = var_1148_cast_fp16)[name = tensor("input_285_cast_fp16")]; tensor input_287_axes_0 = const()[name = tensor("input_287_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_4_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_4_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127566080)))]; - tensor encoder_module_layers_4_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_4_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127568192)))]; + tensor encoder_module_layers_4_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_4_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127825920)))]; + tensor encoder_module_layers_4_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_4_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127828032)))]; tensor input_287_cast_fp16 = layer_norm(axes = input_287_axes_0, beta = encoder_module_layers_4_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_4_norm_out_weight_to_fp16, x = input_285_cast_fp16)[name = tensor("input_287_cast_fp16")]; tensor input_289_axes_0 = const()[name = tensor("input_289_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_5_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_5_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127570304)))]; - tensor encoder_module_layers_5_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_5_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127572416)))]; + tensor encoder_module_layers_5_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_5_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127830144)))]; + tensor encoder_module_layers_5_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_5_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127832256)))]; tensor input_289_cast_fp16 = layer_norm(axes = input_289_axes_0, beta = encoder_module_layers_5_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_5_norm_feed_forward1_weight_to_fp16, x = input_287_cast_fp16)[name = tensor("input_289_cast_fp16")]; - tensor encoder_module_layers_5_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127574528))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131768896))), name = tensor("encoder_module_layers_5_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_46_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_5_feed_forward1_linear1_weight_to_fp16_palettized, x = input_289_cast_fp16)[name = tensor("linear_46_cast_fp16")]; + tensor encoder_module_layers_5_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_5_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127834368))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(132032896))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(132028736)))]; + tensor linear_46_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_5_feed_forward1_linear1_weight_to_fp16_quantized, x = input_289_cast_fp16)[name = tensor("linear_46_cast_fp16")]; tensor input_293_cast_fp16 = silu(x = linear_46_cast_fp16)[name = tensor("input_293_cast_fp16")]; - tensor encoder_module_layers_5_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131769472))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135963840))), name = tensor("encoder_module_layers_5_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_47_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_5_feed_forward1_linear2_weight_to_fp16_palettized, x = input_293_cast_fp16)[name = tensor("linear_47_cast_fp16")]; + tensor encoder_module_layers_5_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_5_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(132041152))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136236608))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136235520)))]; + tensor linear_47_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_5_feed_forward1_linear2_weight_to_fp16_quantized, x = input_293_cast_fp16)[name = tensor("linear_47_cast_fp16")]; tensor var_1176_to_fp16 = const()[name = tensor("op_1176_to_fp16"), val = tensor(0x1p-1)]; tensor var_1177_cast_fp16 = mul(x = linear_47_cast_fp16, y = var_1176_to_fp16)[name = tensor("op_1177_cast_fp16")]; tensor input_299_cast_fp16 = add(x = input_287_cast_fp16, y = var_1177_cast_fp16)[name = tensor("input_299_cast_fp16")]; tensor query_11_axes_0 = const()[name = tensor("query_11_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_5_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_5_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135964416)))]; - tensor encoder_module_layers_5_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_5_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135966528)))]; + tensor encoder_module_layers_5_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_5_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136238720)))]; + tensor encoder_module_layers_5_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_5_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136240832)))]; tensor query_11_cast_fp16 = layer_norm(axes = query_11_axes_0, beta = encoder_module_layers_5_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_5_norm_self_att_weight_to_fp16, x = input_299_cast_fp16)[name = tensor("query_11_cast_fp16")]; - tensor encoder_module_layers_5_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135968640))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137017280))), name = tensor("encoder_module_layers_5_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_48_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_5_self_attn_linear_q_weight_to_fp16_palettized, x = query_11_cast_fp16)[name = tensor("linear_48_cast_fp16")]; + tensor encoder_module_layers_5_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_5_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136242944))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137292672))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137291584)))]; + tensor linear_48_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_5_self_attn_linear_q_weight_to_fp16_quantized, x = query_11_cast_fp16)[name = tensor("linear_48_cast_fp16")]; tensor var_1193 = const()[name = tensor("op_1193"), val = tensor([1, -1, 8, 128])]; tensor q_31_cast_fp16 = reshape(shape = var_1193, x = linear_48_cast_fp16)[name = tensor("q_31_cast_fp16")]; - tensor encoder_module_layers_5_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137017856))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138066496))), name = tensor("encoder_module_layers_5_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_49_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_5_self_attn_linear_k_weight_to_fp16_palettized, x = query_11_cast_fp16)[name = tensor("linear_49_cast_fp16")]; + tensor encoder_module_layers_5_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_5_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137294784))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138344512))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138343424)))]; + tensor linear_49_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_5_self_attn_linear_k_weight_to_fp16_quantized, x = query_11_cast_fp16)[name = tensor("linear_49_cast_fp16")]; tensor var_1197 = const()[name = tensor("op_1197"), val = tensor([1, -1, 8, 128])]; tensor k_21_cast_fp16 = reshape(shape = var_1197, x = linear_49_cast_fp16)[name = tensor("k_21_cast_fp16")]; - tensor encoder_module_layers_5_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138067072))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139115712))), name = tensor("encoder_module_layers_5_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_50_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_5_self_attn_linear_v_weight_to_fp16_palettized, x = query_11_cast_fp16)[name = tensor("linear_50_cast_fp16")]; + tensor encoder_module_layers_5_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_5_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138346624))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139396352))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139395264)))]; + tensor linear_50_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_5_self_attn_linear_v_weight_to_fp16_quantized, x = query_11_cast_fp16)[name = tensor("linear_50_cast_fp16")]; tensor var_1201 = const()[name = tensor("op_1201"), val = tensor([1, -1, 8, 128])]; tensor v_11_cast_fp16 = reshape(shape = var_1201, x = linear_50_cast_fp16)[name = tensor("v_11_cast_fp16")]; tensor value_15_perm_0 = const()[name = tensor("value_15_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor encoder_module_layers_5_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_5_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139116288)))]; + tensor encoder_module_layers_5_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_5_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139398464)))]; tensor var_1213_cast_fp16 = add(x = q_31_cast_fp16, y = encoder_module_layers_5_self_attn_pos_bias_u_to_fp16)[name = tensor("op_1213_cast_fp16")]; - tensor encoder_module_layers_5_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_5_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139118400)))]; + tensor encoder_module_layers_5_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_5_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139400576)))]; tensor var_1215_cast_fp16 = add(x = q_31_cast_fp16, y = encoder_module_layers_5_self_attn_pos_bias_v_to_fp16)[name = tensor("op_1215_cast_fp16")]; tensor q_with_bias_v_11_perm_0 = const()[name = tensor("q_with_bias_v_11_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_133_transpose_x_0 = const()[name = tensor("x_133_transpose_x_0"), val = tensor(false)]; tensor x_133_transpose_y_0 = const()[name = tensor("x_133_transpose_y_0"), val = tensor(false)]; - tensor op_1217_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139120512))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139504576))), name = tensor("op_1217_to_fp16_palettized"), shape = tensor([1, 8, 128, 375])]; + tensor op_1217_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_1217_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139402688))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139787200))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139786752)))]; tensor q_with_bias_v_11_cast_fp16 = transpose(perm = q_with_bias_v_11_perm_0, x = var_1215_cast_fp16)[name = tensor("transpose_277")]; - tensor x_133_cast_fp16 = matmul(transpose_x = x_133_transpose_x_0, transpose_y = x_133_transpose_y_0, x = q_with_bias_v_11_cast_fp16, y = op_1217_to_fp16_palettized)[name = tensor("x_133_cast_fp16")]; + tensor x_133_cast_fp16 = matmul(transpose_x = x_133_transpose_x_0, transpose_y = x_133_transpose_y_0, x = q_with_bias_v_11_cast_fp16, y = op_1217_to_fp16_quantized)[name = tensor("x_133_cast_fp16")]; tensor x_135_pad_0 = const()[name = tensor("x_135_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; tensor x_135_mode_0 = const()[name = tensor("x_135_mode_0"), val = tensor("constant")]; tensor const_79_to_fp16 = const()[name = tensor("const_79_to_fp16"), val = tensor(0x0p+0)]; @@ -984,12 +984,12 @@ program(1.0) tensor var_1250 = const()[name = tensor("op_1250"), val = tensor([1, -1, 1024])]; tensor var_1249_cast_fp16 = transpose(perm = var_1249_perm_0, x = x_139_cast_fp16)[name = tensor("transpose_273")]; tensor input_303_cast_fp16 = reshape(shape = var_1250, x = var_1249_cast_fp16)[name = tensor("input_303_cast_fp16")]; - tensor encoder_module_layers_5_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139505152))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(140553792))), name = tensor("encoder_module_layers_5_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_52_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_5_self_attn_linear_out_weight_to_fp16_palettized, x = input_303_cast_fp16)[name = tensor("linear_52_cast_fp16")]; + tensor encoder_module_layers_5_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_5_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139788032))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(140837760))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(140836672)))]; + tensor linear_52_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_5_self_attn_linear_out_weight_to_fp16_quantized, x = input_303_cast_fp16)[name = tensor("linear_52_cast_fp16")]; tensor input_307_cast_fp16 = add(x = input_299_cast_fp16, y = linear_52_cast_fp16)[name = tensor("input_307_cast_fp16")]; tensor x_143_axes_0 = const()[name = tensor("x_143_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_5_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_5_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(140554368)))]; - tensor encoder_module_layers_5_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_5_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(140556480)))]; + tensor encoder_module_layers_5_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_5_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(140839872)))]; + tensor encoder_module_layers_5_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_5_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(140841984)))]; tensor x_143_cast_fp16 = layer_norm(axes = x_143_axes_0, beta = encoder_module_layers_5_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_5_norm_conv_weight_to_fp16, x = input_307_cast_fp16)[name = tensor("x_143_cast_fp16")]; tensor input_309_perm_0 = const()[name = tensor("input_309_perm_0"), val = tensor([0, 2, 1])]; tensor input_311_pad_type_0 = const()[name = tensor("input_311_pad_type_0"), val = tensor("valid")]; @@ -997,9 +997,9 @@ program(1.0) tensor input_311_pad_0 = const()[name = tensor("input_311_pad_0"), val = tensor([0, 0])]; tensor input_311_dilations_0 = const()[name = tensor("input_311_dilations_0"), val = tensor([1])]; tensor input_311_groups_0 = const()[name = tensor("input_311_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_5_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(140558592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142655808))), name = tensor("encoder_module_layers_5_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor([2048, 1024, 1])]; + tensor encoder_module_layers_5_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_5_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(140844096))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142943424))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142941312)))]; tensor input_309_cast_fp16 = transpose(perm = input_309_perm_0, x = x_143_cast_fp16)[name = tensor("transpose_272")]; - tensor input_311_cast_fp16 = conv(dilations = input_311_dilations_0, groups = input_311_groups_0, pad = input_311_pad_0, pad_type = input_311_pad_type_0, strides = input_311_strides_0, weight = encoder_module_layers_5_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_309_cast_fp16)[name = tensor("input_311_cast_fp16")]; + tensor input_311_cast_fp16 = conv(dilations = input_311_dilations_0, groups = input_311_groups_0, pad = input_311_pad_0, pad_type = input_311_pad_type_0, strides = input_311_strides_0, weight = encoder_module_layers_5_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_309_cast_fp16)[name = tensor("input_311_cast_fp16")]; tensor x_145_split_num_splits_0 = const()[name = tensor("x_145_split_num_splits_0"), val = tensor(2)]; tensor x_145_split_axis_0 = const()[name = tensor("x_145_split_axis_0"), val = tensor(1)]; tensor x_145_split_cast_fp16_0, tensor x_145_split_cast_fp16_1 = split(axis = x_145_split_axis_0, num_splits = x_145_split_num_splits_0, x = input_311_cast_fp16)[name = tensor("x_145_split_cast_fp16")]; @@ -1015,75 +1015,75 @@ program(1.0) tensor input_317_strides_0 = const()[name = tensor("input_317_strides_0"), val = tensor([1])]; tensor input_317_pad_0 = const()[name = tensor("input_317_pad_0"), val = tensor([0, 0])]; tensor input_317_dilations_0 = const()[name = tensor("input_317_dilations_0"), val = tensor([1])]; - tensor const_273_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142656384))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142665664))), name = tensor("const_273_to_fp16_palettized"), shape = tensor([1024, 1, 9])]; - tensor const_274_to_fp16 = const()[name = tensor("const_274_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142666240)))]; - tensor input_319_cast_fp16 = conv(bias = const_274_to_fp16, dilations = input_317_dilations_0, groups = input_317_groups_0, pad = input_317_pad_0, pad_type = input_317_pad_type_0, strides = input_317_strides_0, weight = const_273_to_fp16_palettized, x = input_315_cast_fp16)[name = tensor("input_319_cast_fp16")]; + tensor const_273_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_273_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142947584))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142957952))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142956864)))]; + tensor const_274_to_fp16 = const()[name = tensor("const_274_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142960064)))]; + tensor input_319_cast_fp16 = conv(bias = const_274_to_fp16, dilations = input_317_dilations_0, groups = input_317_groups_0, pad = input_317_pad_0, pad_type = input_317_pad_type_0, strides = input_317_strides_0, weight = const_273_to_fp16_quantized, x = input_315_cast_fp16)[name = tensor("input_319_cast_fp16")]; tensor input_321_cast_fp16 = silu(x = input_319_cast_fp16)[name = tensor("input_321_cast_fp16")]; tensor x_147_pad_type_0 = const()[name = tensor("x_147_pad_type_0"), val = tensor("valid")]; tensor x_147_strides_0 = const()[name = tensor("x_147_strides_0"), val = tensor([1])]; tensor x_147_pad_0 = const()[name = tensor("x_147_pad_0"), val = tensor([0, 0])]; tensor x_147_dilations_0 = const()[name = tensor("x_147_dilations_0"), val = tensor([1])]; tensor x_147_groups_0 = const()[name = tensor("x_147_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_5_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142668352))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143716992))), name = tensor("encoder_module_layers_5_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor([1024, 1024, 1])]; - tensor x_147_cast_fp16 = conv(dilations = x_147_dilations_0, groups = x_147_groups_0, pad = x_147_pad_0, pad_type = x_147_pad_type_0, strides = x_147_strides_0, weight = encoder_module_layers_5_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_321_cast_fp16)[name = tensor("x_147_cast_fp16")]; + tensor encoder_module_layers_5_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_5_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142962176))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(144011904))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(144010816)))]; + tensor x_147_cast_fp16 = conv(dilations = x_147_dilations_0, groups = x_147_groups_0, pad = x_147_pad_0, pad_type = x_147_pad_type_0, strides = x_147_strides_0, weight = encoder_module_layers_5_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_321_cast_fp16)[name = tensor("x_147_cast_fp16")]; tensor input_323_perm_0 = const()[name = tensor("input_323_perm_0"), val = tensor([0, 2, 1])]; tensor input_323_cast_fp16 = transpose(perm = input_323_perm_0, x = x_147_cast_fp16)[name = tensor("transpose_271")]; tensor input_325_cast_fp16 = add(x = input_307_cast_fp16, y = input_323_cast_fp16)[name = tensor("input_325_cast_fp16")]; tensor input_327_axes_0 = const()[name = tensor("input_327_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_5_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_5_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143717568)))]; - tensor encoder_module_layers_5_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_5_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143719680)))]; + tensor encoder_module_layers_5_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_5_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(144014016)))]; + tensor encoder_module_layers_5_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_5_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(144016128)))]; tensor input_327_cast_fp16 = layer_norm(axes = input_327_axes_0, beta = encoder_module_layers_5_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_5_norm_feed_forward2_weight_to_fp16, x = input_325_cast_fp16)[name = tensor("input_327_cast_fp16")]; - tensor encoder_module_layers_5_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143721792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147916160))), name = tensor("encoder_module_layers_5_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_53_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_5_feed_forward2_linear1_weight_to_fp16_palettized, x = input_327_cast_fp16)[name = tensor("linear_53_cast_fp16")]; + tensor encoder_module_layers_5_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_5_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(144018240))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148216768))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148212608)))]; + tensor linear_53_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_5_feed_forward2_linear1_weight_to_fp16_quantized, x = input_327_cast_fp16)[name = tensor("linear_53_cast_fp16")]; tensor input_331_cast_fp16 = silu(x = linear_53_cast_fp16)[name = tensor("input_331_cast_fp16")]; - tensor encoder_module_layers_5_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147916736))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152111104))), name = tensor("encoder_module_layers_5_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_54_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_5_feed_forward2_linear2_weight_to_fp16_palettized, x = input_331_cast_fp16)[name = tensor("linear_54_cast_fp16")]; + tensor encoder_module_layers_5_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_5_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148225024))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152420480))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152419392)))]; + tensor linear_54_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_5_feed_forward2_linear2_weight_to_fp16_quantized, x = input_331_cast_fp16)[name = tensor("linear_54_cast_fp16")]; tensor var_1310_to_fp16 = const()[name = tensor("op_1310_to_fp16"), val = tensor(0x1p-1)]; tensor var_1311_cast_fp16 = mul(x = linear_54_cast_fp16, y = var_1310_to_fp16)[name = tensor("op_1311_cast_fp16")]; tensor input_337_cast_fp16 = add(x = input_325_cast_fp16, y = var_1311_cast_fp16)[name = tensor("input_337_cast_fp16")]; tensor input_339_axes_0 = const()[name = tensor("input_339_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_5_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_5_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152111680)))]; - tensor encoder_module_layers_5_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_5_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152113792)))]; + tensor encoder_module_layers_5_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_5_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152422592)))]; + tensor encoder_module_layers_5_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_5_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152424704)))]; tensor input_339_cast_fp16 = layer_norm(axes = input_339_axes_0, beta = encoder_module_layers_5_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_5_norm_out_weight_to_fp16, x = input_337_cast_fp16)[name = tensor("input_339_cast_fp16")]; tensor input_341_axes_0 = const()[name = tensor("input_341_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_6_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_6_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152115904)))]; - tensor encoder_module_layers_6_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_6_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152118016)))]; + tensor encoder_module_layers_6_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_6_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152426816)))]; + tensor encoder_module_layers_6_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_6_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152428928)))]; tensor input_341_cast_fp16 = layer_norm(axes = input_341_axes_0, beta = encoder_module_layers_6_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_6_norm_feed_forward1_weight_to_fp16, x = input_339_cast_fp16)[name = tensor("input_341_cast_fp16")]; - tensor encoder_module_layers_6_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152120128))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(156314496))), name = tensor("encoder_module_layers_6_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_55_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_6_feed_forward1_linear1_weight_to_fp16_palettized, x = input_341_cast_fp16)[name = tensor("linear_55_cast_fp16")]; + tensor encoder_module_layers_6_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_6_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152431040))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(156629568))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(156625408)))]; + tensor linear_55_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_6_feed_forward1_linear1_weight_to_fp16_quantized, x = input_341_cast_fp16)[name = tensor("linear_55_cast_fp16")]; tensor input_345_cast_fp16 = silu(x = linear_55_cast_fp16)[name = tensor("input_345_cast_fp16")]; - tensor encoder_module_layers_6_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(156315072))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(160509440))), name = tensor("encoder_module_layers_6_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_56_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_6_feed_forward1_linear2_weight_to_fp16_palettized, x = input_345_cast_fp16)[name = tensor("linear_56_cast_fp16")]; + tensor encoder_module_layers_6_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_6_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(156637824))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(160833280))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(160832192)))]; + tensor linear_56_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_6_feed_forward1_linear2_weight_to_fp16_quantized, x = input_345_cast_fp16)[name = tensor("linear_56_cast_fp16")]; tensor var_1339_to_fp16 = const()[name = tensor("op_1339_to_fp16"), val = tensor(0x1p-1)]; tensor var_1340_cast_fp16 = mul(x = linear_56_cast_fp16, y = var_1339_to_fp16)[name = tensor("op_1340_cast_fp16")]; tensor input_351_cast_fp16 = add(x = input_339_cast_fp16, y = var_1340_cast_fp16)[name = tensor("input_351_cast_fp16")]; tensor query_13_axes_0 = const()[name = tensor("query_13_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_6_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_6_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(160510016)))]; - tensor encoder_module_layers_6_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_6_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(160512128)))]; + tensor encoder_module_layers_6_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_6_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(160835392)))]; + tensor encoder_module_layers_6_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_6_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(160837504)))]; tensor query_13_cast_fp16 = layer_norm(axes = query_13_axes_0, beta = encoder_module_layers_6_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_6_norm_self_att_weight_to_fp16, x = input_351_cast_fp16)[name = tensor("query_13_cast_fp16")]; - tensor encoder_module_layers_6_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(160514240))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(161562880))), name = tensor("encoder_module_layers_6_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_57_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_6_self_attn_linear_q_weight_to_fp16_palettized, x = query_13_cast_fp16)[name = tensor("linear_57_cast_fp16")]; + tensor encoder_module_layers_6_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_6_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(160839616))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(161889344))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(161888256)))]; + tensor linear_57_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_6_self_attn_linear_q_weight_to_fp16_quantized, x = query_13_cast_fp16)[name = tensor("linear_57_cast_fp16")]; tensor var_1356 = const()[name = tensor("op_1356"), val = tensor([1, -1, 8, 128])]; tensor q_37_cast_fp16 = reshape(shape = var_1356, x = linear_57_cast_fp16)[name = tensor("q_37_cast_fp16")]; - tensor encoder_module_layers_6_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(161563456))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162612096))), name = tensor("encoder_module_layers_6_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_58_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_6_self_attn_linear_k_weight_to_fp16_palettized, x = query_13_cast_fp16)[name = tensor("linear_58_cast_fp16")]; + tensor encoder_module_layers_6_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_6_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(161891456))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162941184))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162940096)))]; + tensor linear_58_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_6_self_attn_linear_k_weight_to_fp16_quantized, x = query_13_cast_fp16)[name = tensor("linear_58_cast_fp16")]; tensor var_1360 = const()[name = tensor("op_1360"), val = tensor([1, -1, 8, 128])]; tensor k_25_cast_fp16 = reshape(shape = var_1360, x = linear_58_cast_fp16)[name = tensor("k_25_cast_fp16")]; - tensor encoder_module_layers_6_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162612672))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(163661312))), name = tensor("encoder_module_layers_6_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_59_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_6_self_attn_linear_v_weight_to_fp16_palettized, x = query_13_cast_fp16)[name = tensor("linear_59_cast_fp16")]; + tensor encoder_module_layers_6_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_6_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162943296))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(163993024))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(163991936)))]; + tensor linear_59_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_6_self_attn_linear_v_weight_to_fp16_quantized, x = query_13_cast_fp16)[name = tensor("linear_59_cast_fp16")]; tensor var_1364 = const()[name = tensor("op_1364"), val = tensor([1, -1, 8, 128])]; tensor v_13_cast_fp16 = reshape(shape = var_1364, x = linear_59_cast_fp16)[name = tensor("v_13_cast_fp16")]; tensor value_17_perm_0 = const()[name = tensor("value_17_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor encoder_module_layers_6_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_6_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(163661888)))]; + tensor encoder_module_layers_6_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_6_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(163995136)))]; tensor var_1376_cast_fp16 = add(x = q_37_cast_fp16, y = encoder_module_layers_6_self_attn_pos_bias_u_to_fp16)[name = tensor("op_1376_cast_fp16")]; - tensor encoder_module_layers_6_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_6_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(163664000)))]; + tensor encoder_module_layers_6_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_6_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(163997248)))]; tensor var_1378_cast_fp16 = add(x = q_37_cast_fp16, y = encoder_module_layers_6_self_attn_pos_bias_v_to_fp16)[name = tensor("op_1378_cast_fp16")]; tensor q_with_bias_v_13_perm_0 = const()[name = tensor("q_with_bias_v_13_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_155_transpose_x_0 = const()[name = tensor("x_155_transpose_x_0"), val = tensor(false)]; tensor x_155_transpose_y_0 = const()[name = tensor("x_155_transpose_y_0"), val = tensor(false)]; - tensor op_1380_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(163666112))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(164050176))), name = tensor("op_1380_to_fp16_palettized"), shape = tensor([1, 8, 128, 375])]; + tensor op_1380_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_1380_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(163999360))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(164383872))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(164383424)))]; tensor q_with_bias_v_13_cast_fp16 = transpose(perm = q_with_bias_v_13_perm_0, x = var_1378_cast_fp16)[name = tensor("transpose_270")]; - tensor x_155_cast_fp16 = matmul(transpose_x = x_155_transpose_x_0, transpose_y = x_155_transpose_y_0, x = q_with_bias_v_13_cast_fp16, y = op_1380_to_fp16_palettized)[name = tensor("x_155_cast_fp16")]; + tensor x_155_cast_fp16 = matmul(transpose_x = x_155_transpose_x_0, transpose_y = x_155_transpose_y_0, x = q_with_bias_v_13_cast_fp16, y = op_1380_to_fp16_quantized)[name = tensor("x_155_cast_fp16")]; tensor x_157_pad_0 = const()[name = tensor("x_157_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; tensor x_157_mode_0 = const()[name = tensor("x_157_mode_0"), val = tensor("constant")]; tensor const_89_to_fp16 = const()[name = tensor("const_89_to_fp16"), val = tensor(0x0p+0)]; @@ -1121,12 +1121,12 @@ program(1.0) tensor var_1413 = const()[name = tensor("op_1413"), val = tensor([1, -1, 1024])]; tensor var_1412_cast_fp16 = transpose(perm = var_1412_perm_0, x = x_161_cast_fp16)[name = tensor("transpose_266")]; tensor input_355_cast_fp16 = reshape(shape = var_1413, x = var_1412_cast_fp16)[name = tensor("input_355_cast_fp16")]; - tensor encoder_module_layers_6_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(164050752))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(165099392))), name = tensor("encoder_module_layers_6_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_61_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_6_self_attn_linear_out_weight_to_fp16_palettized, x = input_355_cast_fp16)[name = tensor("linear_61_cast_fp16")]; + tensor encoder_module_layers_6_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_6_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(164384704))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(165434432))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(165433344)))]; + tensor linear_61_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_6_self_attn_linear_out_weight_to_fp16_quantized, x = input_355_cast_fp16)[name = tensor("linear_61_cast_fp16")]; tensor input_359_cast_fp16 = add(x = input_351_cast_fp16, y = linear_61_cast_fp16)[name = tensor("input_359_cast_fp16")]; tensor x_165_axes_0 = const()[name = tensor("x_165_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_6_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_6_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(165099968)))]; - tensor encoder_module_layers_6_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_6_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(165102080)))]; + tensor encoder_module_layers_6_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_6_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(165436544)))]; + tensor encoder_module_layers_6_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_6_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(165438656)))]; tensor x_165_cast_fp16 = layer_norm(axes = x_165_axes_0, beta = encoder_module_layers_6_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_6_norm_conv_weight_to_fp16, x = input_359_cast_fp16)[name = tensor("x_165_cast_fp16")]; tensor input_361_perm_0 = const()[name = tensor("input_361_perm_0"), val = tensor([0, 2, 1])]; tensor input_363_pad_type_0 = const()[name = tensor("input_363_pad_type_0"), val = tensor("valid")]; @@ -1134,9 +1134,9 @@ program(1.0) tensor input_363_pad_0 = const()[name = tensor("input_363_pad_0"), val = tensor([0, 0])]; tensor input_363_dilations_0 = const()[name = tensor("input_363_dilations_0"), val = tensor([1])]; tensor input_363_groups_0 = const()[name = tensor("input_363_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_6_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(165104192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167201408))), name = tensor("encoder_module_layers_6_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor([2048, 1024, 1])]; + tensor encoder_module_layers_6_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_6_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(165440768))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167540096))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167537984)))]; tensor input_361_cast_fp16 = transpose(perm = input_361_perm_0, x = x_165_cast_fp16)[name = tensor("transpose_265")]; - tensor input_363_cast_fp16 = conv(dilations = input_363_dilations_0, groups = input_363_groups_0, pad = input_363_pad_0, pad_type = input_363_pad_type_0, strides = input_363_strides_0, weight = encoder_module_layers_6_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_361_cast_fp16)[name = tensor("input_363_cast_fp16")]; + tensor input_363_cast_fp16 = conv(dilations = input_363_dilations_0, groups = input_363_groups_0, pad = input_363_pad_0, pad_type = input_363_pad_type_0, strides = input_363_strides_0, weight = encoder_module_layers_6_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_361_cast_fp16)[name = tensor("input_363_cast_fp16")]; tensor x_167_split_num_splits_0 = const()[name = tensor("x_167_split_num_splits_0"), val = tensor(2)]; tensor x_167_split_axis_0 = const()[name = tensor("x_167_split_axis_0"), val = tensor(1)]; tensor x_167_split_cast_fp16_0, tensor x_167_split_cast_fp16_1 = split(axis = x_167_split_axis_0, num_splits = x_167_split_num_splits_0, x = input_363_cast_fp16)[name = tensor("x_167_split_cast_fp16")]; @@ -1152,75 +1152,75 @@ program(1.0) tensor input_369_strides_0 = const()[name = tensor("input_369_strides_0"), val = tensor([1])]; tensor input_369_pad_0 = const()[name = tensor("input_369_pad_0"), val = tensor([0, 0])]; tensor input_369_dilations_0 = const()[name = tensor("input_369_dilations_0"), val = tensor([1])]; - tensor const_275_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167201984))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167211264))), name = tensor("const_275_to_fp16_palettized"), shape = tensor([1024, 1, 9])]; - tensor const_276_to_fp16 = const()[name = tensor("const_276_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167211840)))]; - tensor input_371_cast_fp16 = conv(bias = const_276_to_fp16, dilations = input_369_dilations_0, groups = input_369_groups_0, pad = input_369_pad_0, pad_type = input_369_pad_type_0, strides = input_369_strides_0, weight = const_275_to_fp16_palettized, x = input_367_cast_fp16)[name = tensor("input_371_cast_fp16")]; + tensor const_275_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_275_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167544256))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167554624))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167553536)))]; + tensor const_276_to_fp16 = const()[name = tensor("const_276_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167556736)))]; + tensor input_371_cast_fp16 = conv(bias = const_276_to_fp16, dilations = input_369_dilations_0, groups = input_369_groups_0, pad = input_369_pad_0, pad_type = input_369_pad_type_0, strides = input_369_strides_0, weight = const_275_to_fp16_quantized, x = input_367_cast_fp16)[name = tensor("input_371_cast_fp16")]; tensor input_373_cast_fp16 = silu(x = input_371_cast_fp16)[name = tensor("input_373_cast_fp16")]; tensor x_169_pad_type_0 = const()[name = tensor("x_169_pad_type_0"), val = tensor("valid")]; tensor x_169_strides_0 = const()[name = tensor("x_169_strides_0"), val = tensor([1])]; tensor x_169_pad_0 = const()[name = tensor("x_169_pad_0"), val = tensor([0, 0])]; tensor x_169_dilations_0 = const()[name = tensor("x_169_dilations_0"), val = tensor([1])]; tensor x_169_groups_0 = const()[name = tensor("x_169_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_6_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167213952))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(168262592))), name = tensor("encoder_module_layers_6_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor([1024, 1024, 1])]; - tensor x_169_cast_fp16 = conv(dilations = x_169_dilations_0, groups = x_169_groups_0, pad = x_169_pad_0, pad_type = x_169_pad_type_0, strides = x_169_strides_0, weight = encoder_module_layers_6_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_373_cast_fp16)[name = tensor("x_169_cast_fp16")]; + tensor encoder_module_layers_6_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_6_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167558848))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(168608576))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(168607488)))]; + tensor x_169_cast_fp16 = conv(dilations = x_169_dilations_0, groups = x_169_groups_0, pad = x_169_pad_0, pad_type = x_169_pad_type_0, strides = x_169_strides_0, weight = encoder_module_layers_6_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_373_cast_fp16)[name = tensor("x_169_cast_fp16")]; tensor input_375_perm_0 = const()[name = tensor("input_375_perm_0"), val = tensor([0, 2, 1])]; tensor input_375_cast_fp16 = transpose(perm = input_375_perm_0, x = x_169_cast_fp16)[name = tensor("transpose_264")]; tensor input_377_cast_fp16 = add(x = input_359_cast_fp16, y = input_375_cast_fp16)[name = tensor("input_377_cast_fp16")]; tensor input_379_axes_0 = const()[name = tensor("input_379_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_6_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_6_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(168263168)))]; - tensor encoder_module_layers_6_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_6_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(168265280)))]; + tensor encoder_module_layers_6_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_6_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(168610688)))]; + tensor encoder_module_layers_6_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_6_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(168612800)))]; tensor input_379_cast_fp16 = layer_norm(axes = input_379_axes_0, beta = encoder_module_layers_6_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_6_norm_feed_forward2_weight_to_fp16, x = input_377_cast_fp16)[name = tensor("input_379_cast_fp16")]; - tensor encoder_module_layers_6_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(168267392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(172461760))), name = tensor("encoder_module_layers_6_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_62_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_6_feed_forward2_linear1_weight_to_fp16_palettized, x = input_379_cast_fp16)[name = tensor("linear_62_cast_fp16")]; + tensor encoder_module_layers_6_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_6_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(168614912))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(172813440))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(172809280)))]; + tensor linear_62_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_6_feed_forward2_linear1_weight_to_fp16_quantized, x = input_379_cast_fp16)[name = tensor("linear_62_cast_fp16")]; tensor input_383_cast_fp16 = silu(x = linear_62_cast_fp16)[name = tensor("input_383_cast_fp16")]; - tensor encoder_module_layers_6_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(172462336))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(176656704))), name = tensor("encoder_module_layers_6_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_63_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_6_feed_forward2_linear2_weight_to_fp16_palettized, x = input_383_cast_fp16)[name = tensor("linear_63_cast_fp16")]; + tensor encoder_module_layers_6_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_6_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(172821696))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(177017152))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(177016064)))]; + tensor linear_63_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_6_feed_forward2_linear2_weight_to_fp16_quantized, x = input_383_cast_fp16)[name = tensor("linear_63_cast_fp16")]; tensor var_1473_to_fp16 = const()[name = tensor("op_1473_to_fp16"), val = tensor(0x1p-1)]; tensor var_1474_cast_fp16 = mul(x = linear_63_cast_fp16, y = var_1473_to_fp16)[name = tensor("op_1474_cast_fp16")]; tensor input_389_cast_fp16 = add(x = input_377_cast_fp16, y = var_1474_cast_fp16)[name = tensor("input_389_cast_fp16")]; tensor input_391_axes_0 = const()[name = tensor("input_391_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_6_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_6_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(176657280)))]; - tensor encoder_module_layers_6_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_6_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(176659392)))]; + tensor encoder_module_layers_6_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_6_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(177019264)))]; + tensor encoder_module_layers_6_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_6_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(177021376)))]; tensor input_391_cast_fp16 = layer_norm(axes = input_391_axes_0, beta = encoder_module_layers_6_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_6_norm_out_weight_to_fp16, x = input_389_cast_fp16)[name = tensor("input_391_cast_fp16")]; tensor input_393_axes_0 = const()[name = tensor("input_393_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_7_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_7_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(176661504)))]; - tensor encoder_module_layers_7_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_7_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(176663616)))]; + tensor encoder_module_layers_7_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_7_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(177023488)))]; + tensor encoder_module_layers_7_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_7_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(177025600)))]; tensor input_393_cast_fp16 = layer_norm(axes = input_393_axes_0, beta = encoder_module_layers_7_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_7_norm_feed_forward1_weight_to_fp16, x = input_391_cast_fp16)[name = tensor("input_393_cast_fp16")]; - tensor encoder_module_layers_7_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(176665728))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(180860096))), name = tensor("encoder_module_layers_7_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_64_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_7_feed_forward1_linear1_weight_to_fp16_palettized, x = input_393_cast_fp16)[name = tensor("linear_64_cast_fp16")]; + tensor encoder_module_layers_7_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_7_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(177027712))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(181226240))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(181222080)))]; + tensor linear_64_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_7_feed_forward1_linear1_weight_to_fp16_quantized, x = input_393_cast_fp16)[name = tensor("linear_64_cast_fp16")]; tensor input_397_cast_fp16 = silu(x = linear_64_cast_fp16)[name = tensor("input_397_cast_fp16")]; - tensor encoder_module_layers_7_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(180860672))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(185055040))), name = tensor("encoder_module_layers_7_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_65_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_7_feed_forward1_linear2_weight_to_fp16_palettized, x = input_397_cast_fp16)[name = tensor("linear_65_cast_fp16")]; + tensor encoder_module_layers_7_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_7_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(181234496))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(185429952))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(185428864)))]; + tensor linear_65_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_7_feed_forward1_linear2_weight_to_fp16_quantized, x = input_397_cast_fp16)[name = tensor("linear_65_cast_fp16")]; tensor var_1502_to_fp16 = const()[name = tensor("op_1502_to_fp16"), val = tensor(0x1p-1)]; tensor var_1503_cast_fp16 = mul(x = linear_65_cast_fp16, y = var_1502_to_fp16)[name = tensor("op_1503_cast_fp16")]; tensor input_403_cast_fp16 = add(x = input_391_cast_fp16, y = var_1503_cast_fp16)[name = tensor("input_403_cast_fp16")]; tensor query_15_axes_0 = const()[name = tensor("query_15_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_7_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_7_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(185055616)))]; - tensor encoder_module_layers_7_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_7_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(185057728)))]; + tensor encoder_module_layers_7_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_7_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(185432064)))]; + tensor encoder_module_layers_7_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_7_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(185434176)))]; tensor query_15_cast_fp16 = layer_norm(axes = query_15_axes_0, beta = encoder_module_layers_7_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_7_norm_self_att_weight_to_fp16, x = input_403_cast_fp16)[name = tensor("query_15_cast_fp16")]; - tensor encoder_module_layers_7_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(185059840))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(186108480))), name = tensor("encoder_module_layers_7_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_66_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_7_self_attn_linear_q_weight_to_fp16_palettized, x = query_15_cast_fp16)[name = tensor("linear_66_cast_fp16")]; + tensor encoder_module_layers_7_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_7_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(185436288))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(186486016))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(186484928)))]; + tensor linear_66_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_7_self_attn_linear_q_weight_to_fp16_quantized, x = query_15_cast_fp16)[name = tensor("linear_66_cast_fp16")]; tensor var_1519 = const()[name = tensor("op_1519"), val = tensor([1, -1, 8, 128])]; tensor q_43_cast_fp16 = reshape(shape = var_1519, x = linear_66_cast_fp16)[name = tensor("q_43_cast_fp16")]; - tensor encoder_module_layers_7_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(186109056))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187157696))), name = tensor("encoder_module_layers_7_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_67_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_7_self_attn_linear_k_weight_to_fp16_palettized, x = query_15_cast_fp16)[name = tensor("linear_67_cast_fp16")]; + tensor encoder_module_layers_7_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_7_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(186488128))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187537856))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187536768)))]; + tensor linear_67_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_7_self_attn_linear_k_weight_to_fp16_quantized, x = query_15_cast_fp16)[name = tensor("linear_67_cast_fp16")]; tensor var_1523 = const()[name = tensor("op_1523"), val = tensor([1, -1, 8, 128])]; tensor k_29_cast_fp16 = reshape(shape = var_1523, x = linear_67_cast_fp16)[name = tensor("k_29_cast_fp16")]; - tensor encoder_module_layers_7_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187158272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(188206912))), name = tensor("encoder_module_layers_7_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_68_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_7_self_attn_linear_v_weight_to_fp16_palettized, x = query_15_cast_fp16)[name = tensor("linear_68_cast_fp16")]; + tensor encoder_module_layers_7_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_7_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187539968))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(188589696))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(188588608)))]; + tensor linear_68_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_7_self_attn_linear_v_weight_to_fp16_quantized, x = query_15_cast_fp16)[name = tensor("linear_68_cast_fp16")]; tensor var_1527 = const()[name = tensor("op_1527"), val = tensor([1, -1, 8, 128])]; tensor v_15_cast_fp16 = reshape(shape = var_1527, x = linear_68_cast_fp16)[name = tensor("v_15_cast_fp16")]; tensor value_19_perm_0 = const()[name = tensor("value_19_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor encoder_module_layers_7_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_7_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(188207488)))]; + tensor encoder_module_layers_7_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_7_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(188591808)))]; tensor var_1539_cast_fp16 = add(x = q_43_cast_fp16, y = encoder_module_layers_7_self_attn_pos_bias_u_to_fp16)[name = tensor("op_1539_cast_fp16")]; - tensor encoder_module_layers_7_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_7_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(188209600)))]; + tensor encoder_module_layers_7_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_7_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(188593920)))]; tensor var_1541_cast_fp16 = add(x = q_43_cast_fp16, y = encoder_module_layers_7_self_attn_pos_bias_v_to_fp16)[name = tensor("op_1541_cast_fp16")]; tensor q_with_bias_v_15_perm_0 = const()[name = tensor("q_with_bias_v_15_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_177_transpose_x_0 = const()[name = tensor("x_177_transpose_x_0"), val = tensor(false)]; tensor x_177_transpose_y_0 = const()[name = tensor("x_177_transpose_y_0"), val = tensor(false)]; - tensor op_1543_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(188211712))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(188595776))), name = tensor("op_1543_to_fp16_palettized"), shape = tensor([1, 8, 128, 375])]; + tensor op_1543_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_1543_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(188596032))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(188980544))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(188980096)))]; tensor q_with_bias_v_15_cast_fp16 = transpose(perm = q_with_bias_v_15_perm_0, x = var_1541_cast_fp16)[name = tensor("transpose_263")]; - tensor x_177_cast_fp16 = matmul(transpose_x = x_177_transpose_x_0, transpose_y = x_177_transpose_y_0, x = q_with_bias_v_15_cast_fp16, y = op_1543_to_fp16_palettized)[name = tensor("x_177_cast_fp16")]; + tensor x_177_cast_fp16 = matmul(transpose_x = x_177_transpose_x_0, transpose_y = x_177_transpose_y_0, x = q_with_bias_v_15_cast_fp16, y = op_1543_to_fp16_quantized)[name = tensor("x_177_cast_fp16")]; tensor x_179_pad_0 = const()[name = tensor("x_179_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; tensor x_179_mode_0 = const()[name = tensor("x_179_mode_0"), val = tensor("constant")]; tensor const_99_to_fp16 = const()[name = tensor("const_99_to_fp16"), val = tensor(0x0p+0)]; @@ -1258,12 +1258,12 @@ program(1.0) tensor var_1576 = const()[name = tensor("op_1576"), val = tensor([1, -1, 1024])]; tensor var_1575_cast_fp16 = transpose(perm = var_1575_perm_0, x = x_183_cast_fp16)[name = tensor("transpose_259")]; tensor input_407_cast_fp16 = reshape(shape = var_1576, x = var_1575_cast_fp16)[name = tensor("input_407_cast_fp16")]; - tensor encoder_module_layers_7_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(188596352))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(189644992))), name = tensor("encoder_module_layers_7_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_70_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_7_self_attn_linear_out_weight_to_fp16_palettized, x = input_407_cast_fp16)[name = tensor("linear_70_cast_fp16")]; + tensor encoder_module_layers_7_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_7_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(188981376))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(190031104))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(190030016)))]; + tensor linear_70_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_7_self_attn_linear_out_weight_to_fp16_quantized, x = input_407_cast_fp16)[name = tensor("linear_70_cast_fp16")]; tensor input_411_cast_fp16 = add(x = input_403_cast_fp16, y = linear_70_cast_fp16)[name = tensor("input_411_cast_fp16")]; tensor x_187_axes_0 = const()[name = tensor("x_187_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_7_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_7_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(189645568)))]; - tensor encoder_module_layers_7_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_7_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(189647680)))]; + tensor encoder_module_layers_7_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_7_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(190033216)))]; + tensor encoder_module_layers_7_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_7_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(190035328)))]; tensor x_187_cast_fp16 = layer_norm(axes = x_187_axes_0, beta = encoder_module_layers_7_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_7_norm_conv_weight_to_fp16, x = input_411_cast_fp16)[name = tensor("x_187_cast_fp16")]; tensor input_413_perm_0 = const()[name = tensor("input_413_perm_0"), val = tensor([0, 2, 1])]; tensor input_415_pad_type_0 = const()[name = tensor("input_415_pad_type_0"), val = tensor("valid")]; @@ -1271,9 +1271,9 @@ program(1.0) tensor input_415_pad_0 = const()[name = tensor("input_415_pad_0"), val = tensor([0, 0])]; tensor input_415_dilations_0 = const()[name = tensor("input_415_dilations_0"), val = tensor([1])]; tensor input_415_groups_0 = const()[name = tensor("input_415_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_7_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(189649792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(191747008))), name = tensor("encoder_module_layers_7_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor([2048, 1024, 1])]; + tensor encoder_module_layers_7_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_7_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(190037440))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(192136768))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(192134656)))]; tensor input_413_cast_fp16 = transpose(perm = input_413_perm_0, x = x_187_cast_fp16)[name = tensor("transpose_258")]; - tensor input_415_cast_fp16 = conv(dilations = input_415_dilations_0, groups = input_415_groups_0, pad = input_415_pad_0, pad_type = input_415_pad_type_0, strides = input_415_strides_0, weight = encoder_module_layers_7_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_413_cast_fp16)[name = tensor("input_415_cast_fp16")]; + tensor input_415_cast_fp16 = conv(dilations = input_415_dilations_0, groups = input_415_groups_0, pad = input_415_pad_0, pad_type = input_415_pad_type_0, strides = input_415_strides_0, weight = encoder_module_layers_7_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_413_cast_fp16)[name = tensor("input_415_cast_fp16")]; tensor x_189_split_num_splits_0 = const()[name = tensor("x_189_split_num_splits_0"), val = tensor(2)]; tensor x_189_split_axis_0 = const()[name = tensor("x_189_split_axis_0"), val = tensor(1)]; tensor x_189_split_cast_fp16_0, tensor x_189_split_cast_fp16_1 = split(axis = x_189_split_axis_0, num_splits = x_189_split_num_splits_0, x = input_415_cast_fp16)[name = tensor("x_189_split_cast_fp16")]; @@ -1289,75 +1289,75 @@ program(1.0) tensor input_421_strides_0 = const()[name = tensor("input_421_strides_0"), val = tensor([1])]; tensor input_421_pad_0 = const()[name = tensor("input_421_pad_0"), val = tensor([0, 0])]; tensor input_421_dilations_0 = const()[name = tensor("input_421_dilations_0"), val = tensor([1])]; - tensor const_277_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(191747584))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(191756864))), name = tensor("const_277_to_fp16_palettized"), shape = tensor([1024, 1, 9])]; - tensor const_278_to_fp16 = const()[name = tensor("const_278_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(191757440)))]; - tensor input_423_cast_fp16 = conv(bias = const_278_to_fp16, dilations = input_421_dilations_0, groups = input_421_groups_0, pad = input_421_pad_0, pad_type = input_421_pad_type_0, strides = input_421_strides_0, weight = const_277_to_fp16_palettized, x = input_419_cast_fp16)[name = tensor("input_423_cast_fp16")]; + tensor const_277_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_277_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(192140928))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(192151296))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(192150208)))]; + tensor const_278_to_fp16 = const()[name = tensor("const_278_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(192153408)))]; + tensor input_423_cast_fp16 = conv(bias = const_278_to_fp16, dilations = input_421_dilations_0, groups = input_421_groups_0, pad = input_421_pad_0, pad_type = input_421_pad_type_0, strides = input_421_strides_0, weight = const_277_to_fp16_quantized, x = input_419_cast_fp16)[name = tensor("input_423_cast_fp16")]; tensor input_425_cast_fp16 = silu(x = input_423_cast_fp16)[name = tensor("input_425_cast_fp16")]; tensor x_191_pad_type_0 = const()[name = tensor("x_191_pad_type_0"), val = tensor("valid")]; tensor x_191_strides_0 = const()[name = tensor("x_191_strides_0"), val = tensor([1])]; tensor x_191_pad_0 = const()[name = tensor("x_191_pad_0"), val = tensor([0, 0])]; tensor x_191_dilations_0 = const()[name = tensor("x_191_dilations_0"), val = tensor([1])]; tensor x_191_groups_0 = const()[name = tensor("x_191_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_7_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(191759552))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(192808192))), name = tensor("encoder_module_layers_7_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor([1024, 1024, 1])]; - tensor x_191_cast_fp16 = conv(dilations = x_191_dilations_0, groups = x_191_groups_0, pad = x_191_pad_0, pad_type = x_191_pad_type_0, strides = x_191_strides_0, weight = encoder_module_layers_7_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_425_cast_fp16)[name = tensor("x_191_cast_fp16")]; + tensor encoder_module_layers_7_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_7_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(192155520))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(193205248))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(193204160)))]; + tensor x_191_cast_fp16 = conv(dilations = x_191_dilations_0, groups = x_191_groups_0, pad = x_191_pad_0, pad_type = x_191_pad_type_0, strides = x_191_strides_0, weight = encoder_module_layers_7_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_425_cast_fp16)[name = tensor("x_191_cast_fp16")]; tensor input_427_perm_0 = const()[name = tensor("input_427_perm_0"), val = tensor([0, 2, 1])]; tensor input_427_cast_fp16 = transpose(perm = input_427_perm_0, x = x_191_cast_fp16)[name = tensor("transpose_257")]; tensor input_429_cast_fp16 = add(x = input_411_cast_fp16, y = input_427_cast_fp16)[name = tensor("input_429_cast_fp16")]; tensor input_431_axes_0 = const()[name = tensor("input_431_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_7_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_7_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(192808768)))]; - tensor encoder_module_layers_7_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_7_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(192810880)))]; + tensor encoder_module_layers_7_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_7_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(193207360)))]; + tensor encoder_module_layers_7_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_7_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(193209472)))]; tensor input_431_cast_fp16 = layer_norm(axes = input_431_axes_0, beta = encoder_module_layers_7_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_7_norm_feed_forward2_weight_to_fp16, x = input_429_cast_fp16)[name = tensor("input_431_cast_fp16")]; - tensor encoder_module_layers_7_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(192812992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(197007360))), name = tensor("encoder_module_layers_7_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_71_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_7_feed_forward2_linear1_weight_to_fp16_palettized, x = input_431_cast_fp16)[name = tensor("linear_71_cast_fp16")]; + tensor encoder_module_layers_7_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_7_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(193211584))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(197410112))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(197405952)))]; + tensor linear_71_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_7_feed_forward2_linear1_weight_to_fp16_quantized, x = input_431_cast_fp16)[name = tensor("linear_71_cast_fp16")]; tensor input_435_cast_fp16 = silu(x = linear_71_cast_fp16)[name = tensor("input_435_cast_fp16")]; - tensor encoder_module_layers_7_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(197007936))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201202304))), name = tensor("encoder_module_layers_7_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_72_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_7_feed_forward2_linear2_weight_to_fp16_palettized, x = input_435_cast_fp16)[name = tensor("linear_72_cast_fp16")]; + tensor encoder_module_layers_7_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_7_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(197418368))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201613824))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201612736)))]; + tensor linear_72_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_7_feed_forward2_linear2_weight_to_fp16_quantized, x = input_435_cast_fp16)[name = tensor("linear_72_cast_fp16")]; tensor var_1636_to_fp16 = const()[name = tensor("op_1636_to_fp16"), val = tensor(0x1p-1)]; tensor var_1637_cast_fp16 = mul(x = linear_72_cast_fp16, y = var_1636_to_fp16)[name = tensor("op_1637_cast_fp16")]; tensor input_441_cast_fp16 = add(x = input_429_cast_fp16, y = var_1637_cast_fp16)[name = tensor("input_441_cast_fp16")]; tensor input_443_axes_0 = const()[name = tensor("input_443_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_7_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_7_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201202880)))]; - tensor encoder_module_layers_7_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_7_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201204992)))]; + tensor encoder_module_layers_7_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_7_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201615936)))]; + tensor encoder_module_layers_7_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_7_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201618048)))]; tensor input_443_cast_fp16 = layer_norm(axes = input_443_axes_0, beta = encoder_module_layers_7_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_7_norm_out_weight_to_fp16, x = input_441_cast_fp16)[name = tensor("input_443_cast_fp16")]; tensor input_445_axes_0 = const()[name = tensor("input_445_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_8_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_8_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201207104)))]; - tensor encoder_module_layers_8_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_8_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201209216)))]; + tensor encoder_module_layers_8_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_8_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201620160)))]; + tensor encoder_module_layers_8_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_8_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201622272)))]; tensor input_445_cast_fp16 = layer_norm(axes = input_445_axes_0, beta = encoder_module_layers_8_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_8_norm_feed_forward1_weight_to_fp16, x = input_443_cast_fp16)[name = tensor("input_445_cast_fp16")]; - tensor encoder_module_layers_8_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201211328))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(205405696))), name = tensor("encoder_module_layers_8_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_73_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_8_feed_forward1_linear1_weight_to_fp16_palettized, x = input_445_cast_fp16)[name = tensor("linear_73_cast_fp16")]; + tensor encoder_module_layers_8_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_8_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201624384))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(205822912))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(205818752)))]; + tensor linear_73_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_8_feed_forward1_linear1_weight_to_fp16_quantized, x = input_445_cast_fp16)[name = tensor("linear_73_cast_fp16")]; tensor input_449_cast_fp16 = silu(x = linear_73_cast_fp16)[name = tensor("input_449_cast_fp16")]; - tensor encoder_module_layers_8_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(205406272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(209600640))), name = tensor("encoder_module_layers_8_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_74_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_8_feed_forward1_linear2_weight_to_fp16_palettized, x = input_449_cast_fp16)[name = tensor("linear_74_cast_fp16")]; + tensor encoder_module_layers_8_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_8_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(205831168))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210026624))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210025536)))]; + tensor linear_74_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_8_feed_forward1_linear2_weight_to_fp16_quantized, x = input_449_cast_fp16)[name = tensor("linear_74_cast_fp16")]; tensor var_1665_to_fp16 = const()[name = tensor("op_1665_to_fp16"), val = tensor(0x1p-1)]; tensor var_1666_cast_fp16 = mul(x = linear_74_cast_fp16, y = var_1665_to_fp16)[name = tensor("op_1666_cast_fp16")]; tensor input_455_cast_fp16 = add(x = input_443_cast_fp16, y = var_1666_cast_fp16)[name = tensor("input_455_cast_fp16")]; tensor query_17_axes_0 = const()[name = tensor("query_17_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_8_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_8_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(209601216)))]; - tensor encoder_module_layers_8_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_8_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(209603328)))]; + tensor encoder_module_layers_8_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_8_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210028736)))]; + tensor encoder_module_layers_8_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_8_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210030848)))]; tensor query_17_cast_fp16 = layer_norm(axes = query_17_axes_0, beta = encoder_module_layers_8_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_8_norm_self_att_weight_to_fp16, x = input_455_cast_fp16)[name = tensor("query_17_cast_fp16")]; - tensor encoder_module_layers_8_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(209605440))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210654080))), name = tensor("encoder_module_layers_8_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_75_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_8_self_attn_linear_q_weight_to_fp16_palettized, x = query_17_cast_fp16)[name = tensor("linear_75_cast_fp16")]; + tensor encoder_module_layers_8_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_8_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210032960))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211082688))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211081600)))]; + tensor linear_75_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_8_self_attn_linear_q_weight_to_fp16_quantized, x = query_17_cast_fp16)[name = tensor("linear_75_cast_fp16")]; tensor var_1682 = const()[name = tensor("op_1682"), val = tensor([1, -1, 8, 128])]; tensor q_49_cast_fp16 = reshape(shape = var_1682, x = linear_75_cast_fp16)[name = tensor("q_49_cast_fp16")]; - tensor encoder_module_layers_8_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210654656))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211703296))), name = tensor("encoder_module_layers_8_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_76_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_8_self_attn_linear_k_weight_to_fp16_palettized, x = query_17_cast_fp16)[name = tensor("linear_76_cast_fp16")]; + tensor encoder_module_layers_8_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_8_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211084800))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(212134528))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(212133440)))]; + tensor linear_76_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_8_self_attn_linear_k_weight_to_fp16_quantized, x = query_17_cast_fp16)[name = tensor("linear_76_cast_fp16")]; tensor var_1686 = const()[name = tensor("op_1686"), val = tensor([1, -1, 8, 128])]; tensor k_33_cast_fp16 = reshape(shape = var_1686, x = linear_76_cast_fp16)[name = tensor("k_33_cast_fp16")]; - tensor encoder_module_layers_8_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211703872))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(212752512))), name = tensor("encoder_module_layers_8_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_77_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_8_self_attn_linear_v_weight_to_fp16_palettized, x = query_17_cast_fp16)[name = tensor("linear_77_cast_fp16")]; + tensor encoder_module_layers_8_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_8_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(212136640))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(213186368))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(213185280)))]; + tensor linear_77_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_8_self_attn_linear_v_weight_to_fp16_quantized, x = query_17_cast_fp16)[name = tensor("linear_77_cast_fp16")]; tensor var_1690 = const()[name = tensor("op_1690"), val = tensor([1, -1, 8, 128])]; tensor v_17_cast_fp16 = reshape(shape = var_1690, x = linear_77_cast_fp16)[name = tensor("v_17_cast_fp16")]; tensor value_21_perm_0 = const()[name = tensor("value_21_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor encoder_module_layers_8_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_8_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(212753088)))]; + tensor encoder_module_layers_8_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_8_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(213188480)))]; tensor var_1702_cast_fp16 = add(x = q_49_cast_fp16, y = encoder_module_layers_8_self_attn_pos_bias_u_to_fp16)[name = tensor("op_1702_cast_fp16")]; - tensor encoder_module_layers_8_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_8_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(212755200)))]; + tensor encoder_module_layers_8_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_8_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(213190592)))]; tensor var_1704_cast_fp16 = add(x = q_49_cast_fp16, y = encoder_module_layers_8_self_attn_pos_bias_v_to_fp16)[name = tensor("op_1704_cast_fp16")]; tensor q_with_bias_v_17_perm_0 = const()[name = tensor("q_with_bias_v_17_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_199_transpose_x_0 = const()[name = tensor("x_199_transpose_x_0"), val = tensor(false)]; tensor x_199_transpose_y_0 = const()[name = tensor("x_199_transpose_y_0"), val = tensor(false)]; - tensor op_1706_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(212757312))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(213141376))), name = tensor("op_1706_to_fp16_palettized"), shape = tensor([1, 8, 128, 375])]; + tensor op_1706_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_1706_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(213192704))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(213577216))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(213576768)))]; tensor q_with_bias_v_17_cast_fp16 = transpose(perm = q_with_bias_v_17_perm_0, x = var_1704_cast_fp16)[name = tensor("transpose_256")]; - tensor x_199_cast_fp16 = matmul(transpose_x = x_199_transpose_x_0, transpose_y = x_199_transpose_y_0, x = q_with_bias_v_17_cast_fp16, y = op_1706_to_fp16_palettized)[name = tensor("x_199_cast_fp16")]; + tensor x_199_cast_fp16 = matmul(transpose_x = x_199_transpose_x_0, transpose_y = x_199_transpose_y_0, x = q_with_bias_v_17_cast_fp16, y = op_1706_to_fp16_quantized)[name = tensor("x_199_cast_fp16")]; tensor x_201_pad_0 = const()[name = tensor("x_201_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; tensor x_201_mode_0 = const()[name = tensor("x_201_mode_0"), val = tensor("constant")]; tensor const_109_to_fp16 = const()[name = tensor("const_109_to_fp16"), val = tensor(0x0p+0)]; @@ -1395,12 +1395,12 @@ program(1.0) tensor var_1739 = const()[name = tensor("op_1739"), val = tensor([1, -1, 1024])]; tensor var_1738_cast_fp16 = transpose(perm = var_1738_perm_0, x = x_205_cast_fp16)[name = tensor("transpose_252")]; tensor input_459_cast_fp16 = reshape(shape = var_1739, x = var_1738_cast_fp16)[name = tensor("input_459_cast_fp16")]; - tensor encoder_module_layers_8_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(213141952))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(214190592))), name = tensor("encoder_module_layers_8_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_79_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_8_self_attn_linear_out_weight_to_fp16_palettized, x = input_459_cast_fp16)[name = tensor("linear_79_cast_fp16")]; + tensor encoder_module_layers_8_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_8_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(213578048))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(214627776))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(214626688)))]; + tensor linear_79_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_8_self_attn_linear_out_weight_to_fp16_quantized, x = input_459_cast_fp16)[name = tensor("linear_79_cast_fp16")]; tensor input_463_cast_fp16 = add(x = input_455_cast_fp16, y = linear_79_cast_fp16)[name = tensor("input_463_cast_fp16")]; tensor x_209_axes_0 = const()[name = tensor("x_209_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_8_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_8_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(214191168)))]; - tensor encoder_module_layers_8_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_8_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(214193280)))]; + tensor encoder_module_layers_8_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_8_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(214629888)))]; + tensor encoder_module_layers_8_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_8_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(214632000)))]; tensor x_209_cast_fp16 = layer_norm(axes = x_209_axes_0, beta = encoder_module_layers_8_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_8_norm_conv_weight_to_fp16, x = input_463_cast_fp16)[name = tensor("x_209_cast_fp16")]; tensor input_465_perm_0 = const()[name = tensor("input_465_perm_0"), val = tensor([0, 2, 1])]; tensor input_467_pad_type_0 = const()[name = tensor("input_467_pad_type_0"), val = tensor("valid")]; @@ -1408,9 +1408,9 @@ program(1.0) tensor input_467_pad_0 = const()[name = tensor("input_467_pad_0"), val = tensor([0, 0])]; tensor input_467_dilations_0 = const()[name = tensor("input_467_dilations_0"), val = tensor([1])]; tensor input_467_groups_0 = const()[name = tensor("input_467_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_8_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(214195392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(216292608))), name = tensor("encoder_module_layers_8_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor([2048, 1024, 1])]; + tensor encoder_module_layers_8_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_8_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(214634112))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(216733440))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(216731328)))]; tensor input_465_cast_fp16 = transpose(perm = input_465_perm_0, x = x_209_cast_fp16)[name = tensor("transpose_251")]; - tensor input_467_cast_fp16 = conv(dilations = input_467_dilations_0, groups = input_467_groups_0, pad = input_467_pad_0, pad_type = input_467_pad_type_0, strides = input_467_strides_0, weight = encoder_module_layers_8_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_465_cast_fp16)[name = tensor("input_467_cast_fp16")]; + tensor input_467_cast_fp16 = conv(dilations = input_467_dilations_0, groups = input_467_groups_0, pad = input_467_pad_0, pad_type = input_467_pad_type_0, strides = input_467_strides_0, weight = encoder_module_layers_8_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_465_cast_fp16)[name = tensor("input_467_cast_fp16")]; tensor x_211_split_num_splits_0 = const()[name = tensor("x_211_split_num_splits_0"), val = tensor(2)]; tensor x_211_split_axis_0 = const()[name = tensor("x_211_split_axis_0"), val = tensor(1)]; tensor x_211_split_cast_fp16_0, tensor x_211_split_cast_fp16_1 = split(axis = x_211_split_axis_0, num_splits = x_211_split_num_splits_0, x = input_467_cast_fp16)[name = tensor("x_211_split_cast_fp16")]; @@ -1426,75 +1426,75 @@ program(1.0) tensor input_473_strides_0 = const()[name = tensor("input_473_strides_0"), val = tensor([1])]; tensor input_473_pad_0 = const()[name = tensor("input_473_pad_0"), val = tensor([0, 0])]; tensor input_473_dilations_0 = const()[name = tensor("input_473_dilations_0"), val = tensor([1])]; - tensor const_279_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(216293184))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(216302464))), name = tensor("const_279_to_fp16_palettized"), shape = tensor([1024, 1, 9])]; - tensor const_280_to_fp16 = const()[name = tensor("const_280_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(216303040)))]; - tensor input_475_cast_fp16 = conv(bias = const_280_to_fp16, dilations = input_473_dilations_0, groups = input_473_groups_0, pad = input_473_pad_0, pad_type = input_473_pad_type_0, strides = input_473_strides_0, weight = const_279_to_fp16_palettized, x = input_471_cast_fp16)[name = tensor("input_475_cast_fp16")]; + tensor const_279_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_279_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(216737600))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(216747968))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(216746880)))]; + tensor const_280_to_fp16 = const()[name = tensor("const_280_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(216750080)))]; + tensor input_475_cast_fp16 = conv(bias = const_280_to_fp16, dilations = input_473_dilations_0, groups = input_473_groups_0, pad = input_473_pad_0, pad_type = input_473_pad_type_0, strides = input_473_strides_0, weight = const_279_to_fp16_quantized, x = input_471_cast_fp16)[name = tensor("input_475_cast_fp16")]; tensor input_477_cast_fp16 = silu(x = input_475_cast_fp16)[name = tensor("input_477_cast_fp16")]; tensor x_213_pad_type_0 = const()[name = tensor("x_213_pad_type_0"), val = tensor("valid")]; tensor x_213_strides_0 = const()[name = tensor("x_213_strides_0"), val = tensor([1])]; tensor x_213_pad_0 = const()[name = tensor("x_213_pad_0"), val = tensor([0, 0])]; tensor x_213_dilations_0 = const()[name = tensor("x_213_dilations_0"), val = tensor([1])]; tensor x_213_groups_0 = const()[name = tensor("x_213_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_8_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(216305152))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(217353792))), name = tensor("encoder_module_layers_8_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor([1024, 1024, 1])]; - tensor x_213_cast_fp16 = conv(dilations = x_213_dilations_0, groups = x_213_groups_0, pad = x_213_pad_0, pad_type = x_213_pad_type_0, strides = x_213_strides_0, weight = encoder_module_layers_8_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_477_cast_fp16)[name = tensor("x_213_cast_fp16")]; + tensor encoder_module_layers_8_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_8_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(216752192))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(217801920))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(217800832)))]; + tensor x_213_cast_fp16 = conv(dilations = x_213_dilations_0, groups = x_213_groups_0, pad = x_213_pad_0, pad_type = x_213_pad_type_0, strides = x_213_strides_0, weight = encoder_module_layers_8_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_477_cast_fp16)[name = tensor("x_213_cast_fp16")]; tensor input_479_perm_0 = const()[name = tensor("input_479_perm_0"), val = tensor([0, 2, 1])]; tensor input_479_cast_fp16 = transpose(perm = input_479_perm_0, x = x_213_cast_fp16)[name = tensor("transpose_250")]; tensor input_481_cast_fp16 = add(x = input_463_cast_fp16, y = input_479_cast_fp16)[name = tensor("input_481_cast_fp16")]; tensor input_483_axes_0 = const()[name = tensor("input_483_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_8_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_8_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(217354368)))]; - tensor encoder_module_layers_8_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_8_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(217356480)))]; + tensor encoder_module_layers_8_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_8_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(217804032)))]; + tensor encoder_module_layers_8_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_8_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(217806144)))]; tensor input_483_cast_fp16 = layer_norm(axes = input_483_axes_0, beta = encoder_module_layers_8_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_8_norm_feed_forward2_weight_to_fp16, x = input_481_cast_fp16)[name = tensor("input_483_cast_fp16")]; - tensor encoder_module_layers_8_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(217358592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(221552960))), name = tensor("encoder_module_layers_8_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_80_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_8_feed_forward2_linear1_weight_to_fp16_palettized, x = input_483_cast_fp16)[name = tensor("linear_80_cast_fp16")]; + tensor encoder_module_layers_8_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_8_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(217808256))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(222006784))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(222002624)))]; + tensor linear_80_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_8_feed_forward2_linear1_weight_to_fp16_quantized, x = input_483_cast_fp16)[name = tensor("linear_80_cast_fp16")]; tensor input_487_cast_fp16 = silu(x = linear_80_cast_fp16)[name = tensor("input_487_cast_fp16")]; - tensor encoder_module_layers_8_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(221553536))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(225747904))), name = tensor("encoder_module_layers_8_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_81_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_8_feed_forward2_linear2_weight_to_fp16_palettized, x = input_487_cast_fp16)[name = tensor("linear_81_cast_fp16")]; + tensor encoder_module_layers_8_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_8_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(222015040))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(226210496))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(226209408)))]; + tensor linear_81_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_8_feed_forward2_linear2_weight_to_fp16_quantized, x = input_487_cast_fp16)[name = tensor("linear_81_cast_fp16")]; tensor var_1799_to_fp16 = const()[name = tensor("op_1799_to_fp16"), val = tensor(0x1p-1)]; tensor var_1800_cast_fp16 = mul(x = linear_81_cast_fp16, y = var_1799_to_fp16)[name = tensor("op_1800_cast_fp16")]; tensor input_493_cast_fp16 = add(x = input_481_cast_fp16, y = var_1800_cast_fp16)[name = tensor("input_493_cast_fp16")]; tensor input_495_axes_0 = const()[name = tensor("input_495_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_8_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_8_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(225748480)))]; - tensor encoder_module_layers_8_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_8_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(225750592)))]; + tensor encoder_module_layers_8_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_8_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(226212608)))]; + tensor encoder_module_layers_8_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_8_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(226214720)))]; tensor input_495_cast_fp16 = layer_norm(axes = input_495_axes_0, beta = encoder_module_layers_8_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_8_norm_out_weight_to_fp16, x = input_493_cast_fp16)[name = tensor("input_495_cast_fp16")]; tensor input_497_axes_0 = const()[name = tensor("input_497_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_9_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_9_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(225752704)))]; - tensor encoder_module_layers_9_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_9_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(225754816)))]; + tensor encoder_module_layers_9_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_9_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(226216832)))]; + tensor encoder_module_layers_9_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_9_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(226218944)))]; tensor input_497_cast_fp16 = layer_norm(axes = input_497_axes_0, beta = encoder_module_layers_9_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_9_norm_feed_forward1_weight_to_fp16, x = input_495_cast_fp16)[name = tensor("input_497_cast_fp16")]; - tensor encoder_module_layers_9_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(225756928))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(229951296))), name = tensor("encoder_module_layers_9_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_82_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_9_feed_forward1_linear1_weight_to_fp16_palettized, x = input_497_cast_fp16)[name = tensor("linear_82_cast_fp16")]; + tensor encoder_module_layers_9_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_9_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(226221056))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(230419584))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(230415424)))]; + tensor linear_82_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_9_feed_forward1_linear1_weight_to_fp16_quantized, x = input_497_cast_fp16)[name = tensor("linear_82_cast_fp16")]; tensor input_501_cast_fp16 = silu(x = linear_82_cast_fp16)[name = tensor("input_501_cast_fp16")]; - tensor encoder_module_layers_9_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(229951872))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(234146240))), name = tensor("encoder_module_layers_9_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_83_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_9_feed_forward1_linear2_weight_to_fp16_palettized, x = input_501_cast_fp16)[name = tensor("linear_83_cast_fp16")]; + tensor encoder_module_layers_9_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_9_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(230427840))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(234623296))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(234622208)))]; + tensor linear_83_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_9_feed_forward1_linear2_weight_to_fp16_quantized, x = input_501_cast_fp16)[name = tensor("linear_83_cast_fp16")]; tensor var_1828_to_fp16 = const()[name = tensor("op_1828_to_fp16"), val = tensor(0x1p-1)]; tensor var_1829_cast_fp16 = mul(x = linear_83_cast_fp16, y = var_1828_to_fp16)[name = tensor("op_1829_cast_fp16")]; tensor input_507_cast_fp16 = add(x = input_495_cast_fp16, y = var_1829_cast_fp16)[name = tensor("input_507_cast_fp16")]; tensor query_19_axes_0 = const()[name = tensor("query_19_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_9_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_9_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(234146816)))]; - tensor encoder_module_layers_9_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_9_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(234148928)))]; + tensor encoder_module_layers_9_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_9_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(234625408)))]; + tensor encoder_module_layers_9_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_9_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(234627520)))]; tensor query_19_cast_fp16 = layer_norm(axes = query_19_axes_0, beta = encoder_module_layers_9_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_9_norm_self_att_weight_to_fp16, x = input_507_cast_fp16)[name = tensor("query_19_cast_fp16")]; - tensor encoder_module_layers_9_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(234151040))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235199680))), name = tensor("encoder_module_layers_9_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_84_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_9_self_attn_linear_q_weight_to_fp16_palettized, x = query_19_cast_fp16)[name = tensor("linear_84_cast_fp16")]; + tensor encoder_module_layers_9_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_9_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(234629632))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235679360))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235678272)))]; + tensor linear_84_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_9_self_attn_linear_q_weight_to_fp16_quantized, x = query_19_cast_fp16)[name = tensor("linear_84_cast_fp16")]; tensor var_1845 = const()[name = tensor("op_1845"), val = tensor([1, -1, 8, 128])]; tensor q_55_cast_fp16 = reshape(shape = var_1845, x = linear_84_cast_fp16)[name = tensor("q_55_cast_fp16")]; - tensor encoder_module_layers_9_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235200256))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(236248896))), name = tensor("encoder_module_layers_9_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_85_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_9_self_attn_linear_k_weight_to_fp16_palettized, x = query_19_cast_fp16)[name = tensor("linear_85_cast_fp16")]; + tensor encoder_module_layers_9_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_9_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235681472))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(236731200))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(236730112)))]; + tensor linear_85_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_9_self_attn_linear_k_weight_to_fp16_quantized, x = query_19_cast_fp16)[name = tensor("linear_85_cast_fp16")]; tensor var_1849 = const()[name = tensor("op_1849"), val = tensor([1, -1, 8, 128])]; tensor k_37_cast_fp16 = reshape(shape = var_1849, x = linear_85_cast_fp16)[name = tensor("k_37_cast_fp16")]; - tensor encoder_module_layers_9_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(236249472))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(237298112))), name = tensor("encoder_module_layers_9_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_86_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_9_self_attn_linear_v_weight_to_fp16_palettized, x = query_19_cast_fp16)[name = tensor("linear_86_cast_fp16")]; + tensor encoder_module_layers_9_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_9_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(236733312))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(237783040))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(237781952)))]; + tensor linear_86_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_9_self_attn_linear_v_weight_to_fp16_quantized, x = query_19_cast_fp16)[name = tensor("linear_86_cast_fp16")]; tensor var_1853 = const()[name = tensor("op_1853"), val = tensor([1, -1, 8, 128])]; tensor v_19_cast_fp16 = reshape(shape = var_1853, x = linear_86_cast_fp16)[name = tensor("v_19_cast_fp16")]; tensor value_23_perm_0 = const()[name = tensor("value_23_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor encoder_module_layers_9_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_9_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(237298688)))]; + tensor encoder_module_layers_9_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_9_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(237785152)))]; tensor var_1865_cast_fp16 = add(x = q_55_cast_fp16, y = encoder_module_layers_9_self_attn_pos_bias_u_to_fp16)[name = tensor("op_1865_cast_fp16")]; - tensor encoder_module_layers_9_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_9_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(237300800)))]; + tensor encoder_module_layers_9_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_9_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(237787264)))]; tensor var_1867_cast_fp16 = add(x = q_55_cast_fp16, y = encoder_module_layers_9_self_attn_pos_bias_v_to_fp16)[name = tensor("op_1867_cast_fp16")]; tensor q_with_bias_v_19_perm_0 = const()[name = tensor("q_with_bias_v_19_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_221_transpose_x_0 = const()[name = tensor("x_221_transpose_x_0"), val = tensor(false)]; tensor x_221_transpose_y_0 = const()[name = tensor("x_221_transpose_y_0"), val = tensor(false)]; - tensor op_1869_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(237302912))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(237686976))), name = tensor("op_1869_to_fp16_palettized"), shape = tensor([1, 8, 128, 375])]; + tensor op_1869_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_1869_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(237789376))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(238173888))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(238173440)))]; tensor q_with_bias_v_19_cast_fp16 = transpose(perm = q_with_bias_v_19_perm_0, x = var_1867_cast_fp16)[name = tensor("transpose_249")]; - tensor x_221_cast_fp16 = matmul(transpose_x = x_221_transpose_x_0, transpose_y = x_221_transpose_y_0, x = q_with_bias_v_19_cast_fp16, y = op_1869_to_fp16_palettized)[name = tensor("x_221_cast_fp16")]; + tensor x_221_cast_fp16 = matmul(transpose_x = x_221_transpose_x_0, transpose_y = x_221_transpose_y_0, x = q_with_bias_v_19_cast_fp16, y = op_1869_to_fp16_quantized)[name = tensor("x_221_cast_fp16")]; tensor x_223_pad_0 = const()[name = tensor("x_223_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; tensor x_223_mode_0 = const()[name = tensor("x_223_mode_0"), val = tensor("constant")]; tensor const_119_to_fp16 = const()[name = tensor("const_119_to_fp16"), val = tensor(0x0p+0)]; @@ -1532,12 +1532,12 @@ program(1.0) tensor var_1902 = const()[name = tensor("op_1902"), val = tensor([1, -1, 1024])]; tensor var_1901_cast_fp16 = transpose(perm = var_1901_perm_0, x = x_227_cast_fp16)[name = tensor("transpose_245")]; tensor input_511_cast_fp16 = reshape(shape = var_1902, x = var_1901_cast_fp16)[name = tensor("input_511_cast_fp16")]; - tensor encoder_module_layers_9_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(237687552))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(238736192))), name = tensor("encoder_module_layers_9_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_88_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_9_self_attn_linear_out_weight_to_fp16_palettized, x = input_511_cast_fp16)[name = tensor("linear_88_cast_fp16")]; + tensor encoder_module_layers_9_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_9_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(238174720))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(239224448))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(239223360)))]; + tensor linear_88_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_9_self_attn_linear_out_weight_to_fp16_quantized, x = input_511_cast_fp16)[name = tensor("linear_88_cast_fp16")]; tensor input_515_cast_fp16 = add(x = input_507_cast_fp16, y = linear_88_cast_fp16)[name = tensor("input_515_cast_fp16")]; tensor x_231_axes_0 = const()[name = tensor("x_231_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_9_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_9_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(238736768)))]; - tensor encoder_module_layers_9_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_9_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(238738880)))]; + tensor encoder_module_layers_9_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_9_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(239226560)))]; + tensor encoder_module_layers_9_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_9_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(239228672)))]; tensor x_231_cast_fp16 = layer_norm(axes = x_231_axes_0, beta = encoder_module_layers_9_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_9_norm_conv_weight_to_fp16, x = input_515_cast_fp16)[name = tensor("x_231_cast_fp16")]; tensor input_517_perm_0 = const()[name = tensor("input_517_perm_0"), val = tensor([0, 2, 1])]; tensor input_519_pad_type_0 = const()[name = tensor("input_519_pad_type_0"), val = tensor("valid")]; @@ -1545,9 +1545,9 @@ program(1.0) tensor input_519_pad_0 = const()[name = tensor("input_519_pad_0"), val = tensor([0, 0])]; tensor input_519_dilations_0 = const()[name = tensor("input_519_dilations_0"), val = tensor([1])]; tensor input_519_groups_0 = const()[name = tensor("input_519_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_9_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(238740992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(240838208))), name = tensor("encoder_module_layers_9_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor([2048, 1024, 1])]; + tensor encoder_module_layers_9_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_9_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(239230784))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(241330112))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(241328000)))]; tensor input_517_cast_fp16 = transpose(perm = input_517_perm_0, x = x_231_cast_fp16)[name = tensor("transpose_244")]; - tensor input_519_cast_fp16 = conv(dilations = input_519_dilations_0, groups = input_519_groups_0, pad = input_519_pad_0, pad_type = input_519_pad_type_0, strides = input_519_strides_0, weight = encoder_module_layers_9_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_517_cast_fp16)[name = tensor("input_519_cast_fp16")]; + tensor input_519_cast_fp16 = conv(dilations = input_519_dilations_0, groups = input_519_groups_0, pad = input_519_pad_0, pad_type = input_519_pad_type_0, strides = input_519_strides_0, weight = encoder_module_layers_9_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_517_cast_fp16)[name = tensor("input_519_cast_fp16")]; tensor x_233_split_num_splits_0 = const()[name = tensor("x_233_split_num_splits_0"), val = tensor(2)]; tensor x_233_split_axis_0 = const()[name = tensor("x_233_split_axis_0"), val = tensor(1)]; tensor x_233_split_cast_fp16_0, tensor x_233_split_cast_fp16_1 = split(axis = x_233_split_axis_0, num_splits = x_233_split_num_splits_0, x = input_519_cast_fp16)[name = tensor("x_233_split_cast_fp16")]; @@ -1563,75 +1563,75 @@ program(1.0) tensor input_525_strides_0 = const()[name = tensor("input_525_strides_0"), val = tensor([1])]; tensor input_525_pad_0 = const()[name = tensor("input_525_pad_0"), val = tensor([0, 0])]; tensor input_525_dilations_0 = const()[name = tensor("input_525_dilations_0"), val = tensor([1])]; - tensor const_281_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(240838784))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(240848064))), name = tensor("const_281_to_fp16_palettized"), shape = tensor([1024, 1, 9])]; - tensor const_282_to_fp16 = const()[name = tensor("const_282_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(240848640)))]; - tensor input_527_cast_fp16 = conv(bias = const_282_to_fp16, dilations = input_525_dilations_0, groups = input_525_groups_0, pad = input_525_pad_0, pad_type = input_525_pad_type_0, strides = input_525_strides_0, weight = const_281_to_fp16_palettized, x = input_523_cast_fp16)[name = tensor("input_527_cast_fp16")]; + tensor const_281_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_281_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(241334272))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(241344640))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(241343552)))]; + tensor const_282_to_fp16 = const()[name = tensor("const_282_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(241346752)))]; + tensor input_527_cast_fp16 = conv(bias = const_282_to_fp16, dilations = input_525_dilations_0, groups = input_525_groups_0, pad = input_525_pad_0, pad_type = input_525_pad_type_0, strides = input_525_strides_0, weight = const_281_to_fp16_quantized, x = input_523_cast_fp16)[name = tensor("input_527_cast_fp16")]; tensor input_529_cast_fp16 = silu(x = input_527_cast_fp16)[name = tensor("input_529_cast_fp16")]; tensor x_235_pad_type_0 = const()[name = tensor("x_235_pad_type_0"), val = tensor("valid")]; tensor x_235_strides_0 = const()[name = tensor("x_235_strides_0"), val = tensor([1])]; tensor x_235_pad_0 = const()[name = tensor("x_235_pad_0"), val = tensor([0, 0])]; tensor x_235_dilations_0 = const()[name = tensor("x_235_dilations_0"), val = tensor([1])]; tensor x_235_groups_0 = const()[name = tensor("x_235_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_9_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(240850752))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(241899392))), name = tensor("encoder_module_layers_9_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor([1024, 1024, 1])]; - tensor x_235_cast_fp16 = conv(dilations = x_235_dilations_0, groups = x_235_groups_0, pad = x_235_pad_0, pad_type = x_235_pad_type_0, strides = x_235_strides_0, weight = encoder_module_layers_9_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_529_cast_fp16)[name = tensor("x_235_cast_fp16")]; + tensor encoder_module_layers_9_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_9_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(241348864))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(242398592))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(242397504)))]; + tensor x_235_cast_fp16 = conv(dilations = x_235_dilations_0, groups = x_235_groups_0, pad = x_235_pad_0, pad_type = x_235_pad_type_0, strides = x_235_strides_0, weight = encoder_module_layers_9_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_529_cast_fp16)[name = tensor("x_235_cast_fp16")]; tensor input_531_perm_0 = const()[name = tensor("input_531_perm_0"), val = tensor([0, 2, 1])]; tensor input_531_cast_fp16 = transpose(perm = input_531_perm_0, x = x_235_cast_fp16)[name = tensor("transpose_243")]; tensor input_533_cast_fp16 = add(x = input_515_cast_fp16, y = input_531_cast_fp16)[name = tensor("input_533_cast_fp16")]; tensor input_535_axes_0 = const()[name = tensor("input_535_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_9_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_9_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(241899968)))]; - tensor encoder_module_layers_9_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_9_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(241902080)))]; + tensor encoder_module_layers_9_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_9_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(242400704)))]; + tensor encoder_module_layers_9_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_9_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(242402816)))]; tensor input_535_cast_fp16 = layer_norm(axes = input_535_axes_0, beta = encoder_module_layers_9_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_9_norm_feed_forward2_weight_to_fp16, x = input_533_cast_fp16)[name = tensor("input_535_cast_fp16")]; - tensor encoder_module_layers_9_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(241904192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(246098560))), name = tensor("encoder_module_layers_9_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_89_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_9_feed_forward2_linear1_weight_to_fp16_palettized, x = input_535_cast_fp16)[name = tensor("linear_89_cast_fp16")]; + tensor encoder_module_layers_9_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_9_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(242404928))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(246603456))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(246599296)))]; + tensor linear_89_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_9_feed_forward2_linear1_weight_to_fp16_quantized, x = input_535_cast_fp16)[name = tensor("linear_89_cast_fp16")]; tensor input_539_cast_fp16 = silu(x = linear_89_cast_fp16)[name = tensor("input_539_cast_fp16")]; - tensor encoder_module_layers_9_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(246099136))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250293504))), name = tensor("encoder_module_layers_9_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_90_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_9_feed_forward2_linear2_weight_to_fp16_palettized, x = input_539_cast_fp16)[name = tensor("linear_90_cast_fp16")]; + tensor encoder_module_layers_9_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_9_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(246611712))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250807168))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250806080)))]; + tensor linear_90_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_9_feed_forward2_linear2_weight_to_fp16_quantized, x = input_539_cast_fp16)[name = tensor("linear_90_cast_fp16")]; tensor var_1962_to_fp16 = const()[name = tensor("op_1962_to_fp16"), val = tensor(0x1p-1)]; tensor var_1963_cast_fp16 = mul(x = linear_90_cast_fp16, y = var_1962_to_fp16)[name = tensor("op_1963_cast_fp16")]; tensor input_545_cast_fp16 = add(x = input_533_cast_fp16, y = var_1963_cast_fp16)[name = tensor("input_545_cast_fp16")]; tensor input_547_axes_0 = const()[name = tensor("input_547_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_9_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_9_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250294080)))]; - tensor encoder_module_layers_9_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_9_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250296192)))]; + tensor encoder_module_layers_9_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_9_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250809280)))]; + tensor encoder_module_layers_9_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_9_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250811392)))]; tensor input_547_cast_fp16 = layer_norm(axes = input_547_axes_0, beta = encoder_module_layers_9_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_9_norm_out_weight_to_fp16, x = input_545_cast_fp16)[name = tensor("input_547_cast_fp16")]; tensor input_549_axes_0 = const()[name = tensor("input_549_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_10_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_10_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250298304)))]; - tensor encoder_module_layers_10_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_10_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250300416)))]; + tensor encoder_module_layers_10_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_10_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250813504)))]; + tensor encoder_module_layers_10_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_10_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250815616)))]; tensor input_549_cast_fp16 = layer_norm(axes = input_549_axes_0, beta = encoder_module_layers_10_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_10_norm_feed_forward1_weight_to_fp16, x = input_547_cast_fp16)[name = tensor("input_549_cast_fp16")]; - tensor encoder_module_layers_10_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250302528))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(254496896))), name = tensor("encoder_module_layers_10_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_91_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_10_feed_forward1_linear1_weight_to_fp16_palettized, x = input_549_cast_fp16)[name = tensor("linear_91_cast_fp16")]; + tensor encoder_module_layers_10_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_10_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250817728))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(255016256))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(255012096)))]; + tensor linear_91_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_10_feed_forward1_linear1_weight_to_fp16_quantized, x = input_549_cast_fp16)[name = tensor("linear_91_cast_fp16")]; tensor input_553_cast_fp16 = silu(x = linear_91_cast_fp16)[name = tensor("input_553_cast_fp16")]; - tensor encoder_module_layers_10_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(254497472))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258691840))), name = tensor("encoder_module_layers_10_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_92_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_10_feed_forward1_linear2_weight_to_fp16_palettized, x = input_553_cast_fp16)[name = tensor("linear_92_cast_fp16")]; + tensor encoder_module_layers_10_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_10_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(255024512))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(259219968))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(259218880)))]; + tensor linear_92_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_10_feed_forward1_linear2_weight_to_fp16_quantized, x = input_553_cast_fp16)[name = tensor("linear_92_cast_fp16")]; tensor var_1991_to_fp16 = const()[name = tensor("op_1991_to_fp16"), val = tensor(0x1p-1)]; tensor var_1992_cast_fp16 = mul(x = linear_92_cast_fp16, y = var_1991_to_fp16)[name = tensor("op_1992_cast_fp16")]; tensor input_559_cast_fp16 = add(x = input_547_cast_fp16, y = var_1992_cast_fp16)[name = tensor("input_559_cast_fp16")]; tensor query_21_axes_0 = const()[name = tensor("query_21_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_10_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_10_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258692416)))]; - tensor encoder_module_layers_10_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_10_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258694528)))]; + tensor encoder_module_layers_10_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_10_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(259222080)))]; + tensor encoder_module_layers_10_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_10_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(259224192)))]; tensor query_21_cast_fp16 = layer_norm(axes = query_21_axes_0, beta = encoder_module_layers_10_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_10_norm_self_att_weight_to_fp16, x = input_559_cast_fp16)[name = tensor("query_21_cast_fp16")]; - tensor encoder_module_layers_10_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258696640))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(259745280))), name = tensor("encoder_module_layers_10_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_93_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_10_self_attn_linear_q_weight_to_fp16_palettized, x = query_21_cast_fp16)[name = tensor("linear_93_cast_fp16")]; + tensor encoder_module_layers_10_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_10_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(259226304))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(260276032))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(260274944)))]; + tensor linear_93_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_10_self_attn_linear_q_weight_to_fp16_quantized, x = query_21_cast_fp16)[name = tensor("linear_93_cast_fp16")]; tensor var_2008 = const()[name = tensor("op_2008"), val = tensor([1, -1, 8, 128])]; tensor q_61_cast_fp16 = reshape(shape = var_2008, x = linear_93_cast_fp16)[name = tensor("q_61_cast_fp16")]; - tensor encoder_module_layers_10_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(259745856))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(260794496))), name = tensor("encoder_module_layers_10_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_94_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_10_self_attn_linear_k_weight_to_fp16_palettized, x = query_21_cast_fp16)[name = tensor("linear_94_cast_fp16")]; + tensor encoder_module_layers_10_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_10_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(260278144))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(261327872))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(261326784)))]; + tensor linear_94_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_10_self_attn_linear_k_weight_to_fp16_quantized, x = query_21_cast_fp16)[name = tensor("linear_94_cast_fp16")]; tensor var_2012 = const()[name = tensor("op_2012"), val = tensor([1, -1, 8, 128])]; tensor k_41_cast_fp16 = reshape(shape = var_2012, x = linear_94_cast_fp16)[name = tensor("k_41_cast_fp16")]; - tensor encoder_module_layers_10_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(260795072))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(261843712))), name = tensor("encoder_module_layers_10_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_95_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_10_self_attn_linear_v_weight_to_fp16_palettized, x = query_21_cast_fp16)[name = tensor("linear_95_cast_fp16")]; + tensor encoder_module_layers_10_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_10_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(261329984))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(262379712))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(262378624)))]; + tensor linear_95_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_10_self_attn_linear_v_weight_to_fp16_quantized, x = query_21_cast_fp16)[name = tensor("linear_95_cast_fp16")]; tensor var_2016 = const()[name = tensor("op_2016"), val = tensor([1, -1, 8, 128])]; tensor v_21_cast_fp16 = reshape(shape = var_2016, x = linear_95_cast_fp16)[name = tensor("v_21_cast_fp16")]; tensor value_25_perm_0 = const()[name = tensor("value_25_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor encoder_module_layers_10_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_10_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(261844288)))]; + tensor encoder_module_layers_10_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_10_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(262381824)))]; tensor var_2028_cast_fp16 = add(x = q_61_cast_fp16, y = encoder_module_layers_10_self_attn_pos_bias_u_to_fp16)[name = tensor("op_2028_cast_fp16")]; - tensor encoder_module_layers_10_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_10_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(261846400)))]; + tensor encoder_module_layers_10_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_10_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(262383936)))]; tensor var_2030_cast_fp16 = add(x = q_61_cast_fp16, y = encoder_module_layers_10_self_attn_pos_bias_v_to_fp16)[name = tensor("op_2030_cast_fp16")]; tensor q_with_bias_v_21_perm_0 = const()[name = tensor("q_with_bias_v_21_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_243_transpose_x_0 = const()[name = tensor("x_243_transpose_x_0"), val = tensor(false)]; tensor x_243_transpose_y_0 = const()[name = tensor("x_243_transpose_y_0"), val = tensor(false)]; - tensor op_2032_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(261848512))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(262232576))), name = tensor("op_2032_to_fp16_palettized"), shape = tensor([1, 8, 128, 375])]; + tensor op_2032_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_2032_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(262386048))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(262770560))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(262770112)))]; tensor q_with_bias_v_21_cast_fp16 = transpose(perm = q_with_bias_v_21_perm_0, x = var_2030_cast_fp16)[name = tensor("transpose_242")]; - tensor x_243_cast_fp16 = matmul(transpose_x = x_243_transpose_x_0, transpose_y = x_243_transpose_y_0, x = q_with_bias_v_21_cast_fp16, y = op_2032_to_fp16_palettized)[name = tensor("x_243_cast_fp16")]; + tensor x_243_cast_fp16 = matmul(transpose_x = x_243_transpose_x_0, transpose_y = x_243_transpose_y_0, x = q_with_bias_v_21_cast_fp16, y = op_2032_to_fp16_quantized)[name = tensor("x_243_cast_fp16")]; tensor x_245_pad_0 = const()[name = tensor("x_245_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; tensor x_245_mode_0 = const()[name = tensor("x_245_mode_0"), val = tensor("constant")]; tensor const_129_to_fp16 = const()[name = tensor("const_129_to_fp16"), val = tensor(0x0p+0)]; @@ -1669,12 +1669,12 @@ program(1.0) tensor var_2065 = const()[name = tensor("op_2065"), val = tensor([1, -1, 1024])]; tensor var_2064_cast_fp16 = transpose(perm = var_2064_perm_0, x = x_249_cast_fp16)[name = tensor("transpose_238")]; tensor input_563_cast_fp16 = reshape(shape = var_2065, x = var_2064_cast_fp16)[name = tensor("input_563_cast_fp16")]; - tensor encoder_module_layers_10_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(262233152))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263281792))), name = tensor("encoder_module_layers_10_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_97_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_10_self_attn_linear_out_weight_to_fp16_palettized, x = input_563_cast_fp16)[name = tensor("linear_97_cast_fp16")]; + tensor encoder_module_layers_10_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_10_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(262771392))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263821120))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263820032)))]; + tensor linear_97_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_10_self_attn_linear_out_weight_to_fp16_quantized, x = input_563_cast_fp16)[name = tensor("linear_97_cast_fp16")]; tensor input_567_cast_fp16 = add(x = input_559_cast_fp16, y = linear_97_cast_fp16)[name = tensor("input_567_cast_fp16")]; tensor x_253_axes_0 = const()[name = tensor("x_253_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_10_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_10_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263282368)))]; - tensor encoder_module_layers_10_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_10_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263284480)))]; + tensor encoder_module_layers_10_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_10_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263823232)))]; + tensor encoder_module_layers_10_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_10_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263825344)))]; tensor x_253_cast_fp16 = layer_norm(axes = x_253_axes_0, beta = encoder_module_layers_10_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_10_norm_conv_weight_to_fp16, x = input_567_cast_fp16)[name = tensor("x_253_cast_fp16")]; tensor input_569_perm_0 = const()[name = tensor("input_569_perm_0"), val = tensor([0, 2, 1])]; tensor input_571_pad_type_0 = const()[name = tensor("input_571_pad_type_0"), val = tensor("valid")]; @@ -1682,9 +1682,9 @@ program(1.0) tensor input_571_pad_0 = const()[name = tensor("input_571_pad_0"), val = tensor([0, 0])]; tensor input_571_dilations_0 = const()[name = tensor("input_571_dilations_0"), val = tensor([1])]; tensor input_571_groups_0 = const()[name = tensor("input_571_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_10_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263286592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(265383808))), name = tensor("encoder_module_layers_10_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor([2048, 1024, 1])]; + tensor encoder_module_layers_10_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_10_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263827456))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(265926784))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(265924672)))]; tensor input_569_cast_fp16 = transpose(perm = input_569_perm_0, x = x_253_cast_fp16)[name = tensor("transpose_237")]; - tensor input_571_cast_fp16 = conv(dilations = input_571_dilations_0, groups = input_571_groups_0, pad = input_571_pad_0, pad_type = input_571_pad_type_0, strides = input_571_strides_0, weight = encoder_module_layers_10_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_569_cast_fp16)[name = tensor("input_571_cast_fp16")]; + tensor input_571_cast_fp16 = conv(dilations = input_571_dilations_0, groups = input_571_groups_0, pad = input_571_pad_0, pad_type = input_571_pad_type_0, strides = input_571_strides_0, weight = encoder_module_layers_10_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_569_cast_fp16)[name = tensor("input_571_cast_fp16")]; tensor x_255_split_num_splits_0 = const()[name = tensor("x_255_split_num_splits_0"), val = tensor(2)]; tensor x_255_split_axis_0 = const()[name = tensor("x_255_split_axis_0"), val = tensor(1)]; tensor x_255_split_cast_fp16_0, tensor x_255_split_cast_fp16_1 = split(axis = x_255_split_axis_0, num_splits = x_255_split_num_splits_0, x = input_571_cast_fp16)[name = tensor("x_255_split_cast_fp16")]; @@ -1700,75 +1700,75 @@ program(1.0) tensor input_577_strides_0 = const()[name = tensor("input_577_strides_0"), val = tensor([1])]; tensor input_577_pad_0 = const()[name = tensor("input_577_pad_0"), val = tensor([0, 0])]; tensor input_577_dilations_0 = const()[name = tensor("input_577_dilations_0"), val = tensor([1])]; - tensor const_283_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(265384384))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(265393664))), name = tensor("const_283_to_fp16_palettized"), shape = tensor([1024, 1, 9])]; - tensor const_284_to_fp16 = const()[name = tensor("const_284_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(265394240)))]; - tensor input_579_cast_fp16 = conv(bias = const_284_to_fp16, dilations = input_577_dilations_0, groups = input_577_groups_0, pad = input_577_pad_0, pad_type = input_577_pad_type_0, strides = input_577_strides_0, weight = const_283_to_fp16_palettized, x = input_575_cast_fp16)[name = tensor("input_579_cast_fp16")]; + tensor const_283_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_283_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(265930944))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(265941312))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(265940224)))]; + tensor const_284_to_fp16 = const()[name = tensor("const_284_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(265943424)))]; + tensor input_579_cast_fp16 = conv(bias = const_284_to_fp16, dilations = input_577_dilations_0, groups = input_577_groups_0, pad = input_577_pad_0, pad_type = input_577_pad_type_0, strides = input_577_strides_0, weight = const_283_to_fp16_quantized, x = input_575_cast_fp16)[name = tensor("input_579_cast_fp16")]; tensor input_581_cast_fp16 = silu(x = input_579_cast_fp16)[name = tensor("input_581_cast_fp16")]; tensor x_257_pad_type_0 = const()[name = tensor("x_257_pad_type_0"), val = tensor("valid")]; tensor x_257_strides_0 = const()[name = tensor("x_257_strides_0"), val = tensor([1])]; tensor x_257_pad_0 = const()[name = tensor("x_257_pad_0"), val = tensor([0, 0])]; tensor x_257_dilations_0 = const()[name = tensor("x_257_dilations_0"), val = tensor([1])]; tensor x_257_groups_0 = const()[name = tensor("x_257_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_10_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(265396352))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(266444992))), name = tensor("encoder_module_layers_10_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor([1024, 1024, 1])]; - tensor x_257_cast_fp16 = conv(dilations = x_257_dilations_0, groups = x_257_groups_0, pad = x_257_pad_0, pad_type = x_257_pad_type_0, strides = x_257_strides_0, weight = encoder_module_layers_10_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_581_cast_fp16)[name = tensor("x_257_cast_fp16")]; + tensor encoder_module_layers_10_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_10_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(265945536))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(266995264))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(266994176)))]; + tensor x_257_cast_fp16 = conv(dilations = x_257_dilations_0, groups = x_257_groups_0, pad = x_257_pad_0, pad_type = x_257_pad_type_0, strides = x_257_strides_0, weight = encoder_module_layers_10_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_581_cast_fp16)[name = tensor("x_257_cast_fp16")]; tensor input_583_perm_0 = const()[name = tensor("input_583_perm_0"), val = tensor([0, 2, 1])]; tensor input_583_cast_fp16 = transpose(perm = input_583_perm_0, x = x_257_cast_fp16)[name = tensor("transpose_236")]; tensor input_585_cast_fp16 = add(x = input_567_cast_fp16, y = input_583_cast_fp16)[name = tensor("input_585_cast_fp16")]; tensor input_587_axes_0 = const()[name = tensor("input_587_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_10_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_10_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(266445568)))]; - tensor encoder_module_layers_10_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_10_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(266447680)))]; + tensor encoder_module_layers_10_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_10_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(266997376)))]; + tensor encoder_module_layers_10_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_10_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(266999488)))]; tensor input_587_cast_fp16 = layer_norm(axes = input_587_axes_0, beta = encoder_module_layers_10_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_10_norm_feed_forward2_weight_to_fp16, x = input_585_cast_fp16)[name = tensor("input_587_cast_fp16")]; - tensor encoder_module_layers_10_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(266449792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(270644160))), name = tensor("encoder_module_layers_10_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_98_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_10_feed_forward2_linear1_weight_to_fp16_palettized, x = input_587_cast_fp16)[name = tensor("linear_98_cast_fp16")]; + tensor encoder_module_layers_10_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_10_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(267001600))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(271200128))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(271195968)))]; + tensor linear_98_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_10_feed_forward2_linear1_weight_to_fp16_quantized, x = input_587_cast_fp16)[name = tensor("linear_98_cast_fp16")]; tensor input_591_cast_fp16 = silu(x = linear_98_cast_fp16)[name = tensor("input_591_cast_fp16")]; - tensor encoder_module_layers_10_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(270644736))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(274839104))), name = tensor("encoder_module_layers_10_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_99_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_10_feed_forward2_linear2_weight_to_fp16_palettized, x = input_591_cast_fp16)[name = tensor("linear_99_cast_fp16")]; + tensor encoder_module_layers_10_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_10_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(271208384))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(275403840))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(275402752)))]; + tensor linear_99_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_10_feed_forward2_linear2_weight_to_fp16_quantized, x = input_591_cast_fp16)[name = tensor("linear_99_cast_fp16")]; tensor var_2125_to_fp16 = const()[name = tensor("op_2125_to_fp16"), val = tensor(0x1p-1)]; tensor var_2126_cast_fp16 = mul(x = linear_99_cast_fp16, y = var_2125_to_fp16)[name = tensor("op_2126_cast_fp16")]; tensor input_597_cast_fp16 = add(x = input_585_cast_fp16, y = var_2126_cast_fp16)[name = tensor("input_597_cast_fp16")]; tensor input_599_axes_0 = const()[name = tensor("input_599_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_10_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_10_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(274839680)))]; - tensor encoder_module_layers_10_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_10_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(274841792)))]; + tensor encoder_module_layers_10_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_10_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(275405952)))]; + tensor encoder_module_layers_10_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_10_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(275408064)))]; tensor input_599_cast_fp16 = layer_norm(axes = input_599_axes_0, beta = encoder_module_layers_10_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_10_norm_out_weight_to_fp16, x = input_597_cast_fp16)[name = tensor("input_599_cast_fp16")]; tensor input_601_axes_0 = const()[name = tensor("input_601_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_11_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_11_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(274843904)))]; - tensor encoder_module_layers_11_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_11_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(274846016)))]; + tensor encoder_module_layers_11_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_11_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(275410176)))]; + tensor encoder_module_layers_11_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_11_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(275412288)))]; tensor input_601_cast_fp16 = layer_norm(axes = input_601_axes_0, beta = encoder_module_layers_11_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_11_norm_feed_forward1_weight_to_fp16, x = input_599_cast_fp16)[name = tensor("input_601_cast_fp16")]; - tensor encoder_module_layers_11_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(274848128))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(279042496))), name = tensor("encoder_module_layers_11_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_100_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_11_feed_forward1_linear1_weight_to_fp16_palettized, x = input_601_cast_fp16)[name = tensor("linear_100_cast_fp16")]; + tensor encoder_module_layers_11_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_11_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(275414400))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(279612928))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(279608768)))]; + tensor linear_100_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_11_feed_forward1_linear1_weight_to_fp16_quantized, x = input_601_cast_fp16)[name = tensor("linear_100_cast_fp16")]; tensor input_605_cast_fp16 = silu(x = linear_100_cast_fp16)[name = tensor("input_605_cast_fp16")]; - tensor encoder_module_layers_11_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(279043072))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(283237440))), name = tensor("encoder_module_layers_11_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_101_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_11_feed_forward1_linear2_weight_to_fp16_palettized, x = input_605_cast_fp16)[name = tensor("linear_101_cast_fp16")]; + tensor encoder_module_layers_11_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_11_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(279621184))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(283816640))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(283815552)))]; + tensor linear_101_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_11_feed_forward1_linear2_weight_to_fp16_quantized, x = input_605_cast_fp16)[name = tensor("linear_101_cast_fp16")]; tensor var_2154_to_fp16 = const()[name = tensor("op_2154_to_fp16"), val = tensor(0x1p-1)]; tensor var_2155_cast_fp16 = mul(x = linear_101_cast_fp16, y = var_2154_to_fp16)[name = tensor("op_2155_cast_fp16")]; tensor input_611_cast_fp16 = add(x = input_599_cast_fp16, y = var_2155_cast_fp16)[name = tensor("input_611_cast_fp16")]; tensor query_23_axes_0 = const()[name = tensor("query_23_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_11_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_11_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(283238016)))]; - tensor encoder_module_layers_11_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_11_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(283240128)))]; + tensor encoder_module_layers_11_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_11_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(283818752)))]; + tensor encoder_module_layers_11_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_11_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(283820864)))]; tensor query_23_cast_fp16 = layer_norm(axes = query_23_axes_0, beta = encoder_module_layers_11_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_11_norm_self_att_weight_to_fp16, x = input_611_cast_fp16)[name = tensor("query_23_cast_fp16")]; - tensor encoder_module_layers_11_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(283242240))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(284290880))), name = tensor("encoder_module_layers_11_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_102_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_11_self_attn_linear_q_weight_to_fp16_palettized, x = query_23_cast_fp16)[name = tensor("linear_102_cast_fp16")]; + tensor encoder_module_layers_11_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_11_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(283822976))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(284872704))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(284871616)))]; + tensor linear_102_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_11_self_attn_linear_q_weight_to_fp16_quantized, x = query_23_cast_fp16)[name = tensor("linear_102_cast_fp16")]; tensor var_2171 = const()[name = tensor("op_2171"), val = tensor([1, -1, 8, 128])]; tensor q_67_cast_fp16 = reshape(shape = var_2171, x = linear_102_cast_fp16)[name = tensor("q_67_cast_fp16")]; - tensor encoder_module_layers_11_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(284291456))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(285340096))), name = tensor("encoder_module_layers_11_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_103_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_11_self_attn_linear_k_weight_to_fp16_palettized, x = query_23_cast_fp16)[name = tensor("linear_103_cast_fp16")]; + tensor encoder_module_layers_11_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_11_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(284874816))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(285924544))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(285923456)))]; + tensor linear_103_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_11_self_attn_linear_k_weight_to_fp16_quantized, x = query_23_cast_fp16)[name = tensor("linear_103_cast_fp16")]; tensor var_2175 = const()[name = tensor("op_2175"), val = tensor([1, -1, 8, 128])]; tensor k_45_cast_fp16 = reshape(shape = var_2175, x = linear_103_cast_fp16)[name = tensor("k_45_cast_fp16")]; - tensor encoder_module_layers_11_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(285340672))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(286389312))), name = tensor("encoder_module_layers_11_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_104_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_11_self_attn_linear_v_weight_to_fp16_palettized, x = query_23_cast_fp16)[name = tensor("linear_104_cast_fp16")]; + tensor encoder_module_layers_11_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_11_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(285926656))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(286976384))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(286975296)))]; + tensor linear_104_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_11_self_attn_linear_v_weight_to_fp16_quantized, x = query_23_cast_fp16)[name = tensor("linear_104_cast_fp16")]; tensor var_2179 = const()[name = tensor("op_2179"), val = tensor([1, -1, 8, 128])]; tensor v_23_cast_fp16 = reshape(shape = var_2179, x = linear_104_cast_fp16)[name = tensor("v_23_cast_fp16")]; tensor value_27_perm_0 = const()[name = tensor("value_27_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor encoder_module_layers_11_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_11_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(286389888)))]; + tensor encoder_module_layers_11_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_11_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(286978496)))]; tensor var_2191_cast_fp16 = add(x = q_67_cast_fp16, y = encoder_module_layers_11_self_attn_pos_bias_u_to_fp16)[name = tensor("op_2191_cast_fp16")]; - tensor encoder_module_layers_11_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_11_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(286392000)))]; + tensor encoder_module_layers_11_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_11_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(286980608)))]; tensor var_2193_cast_fp16 = add(x = q_67_cast_fp16, y = encoder_module_layers_11_self_attn_pos_bias_v_to_fp16)[name = tensor("op_2193_cast_fp16")]; tensor q_with_bias_v_23_perm_0 = const()[name = tensor("q_with_bias_v_23_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_265_transpose_x_0 = const()[name = tensor("x_265_transpose_x_0"), val = tensor(false)]; tensor x_265_transpose_y_0 = const()[name = tensor("x_265_transpose_y_0"), val = tensor(false)]; - tensor op_2195_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(286394112))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(286778176))), name = tensor("op_2195_to_fp16_palettized"), shape = tensor([1, 8, 128, 375])]; + tensor op_2195_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_2195_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(286982720))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(287367232))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(287366784)))]; tensor q_with_bias_v_23_cast_fp16 = transpose(perm = q_with_bias_v_23_perm_0, x = var_2193_cast_fp16)[name = tensor("transpose_235")]; - tensor x_265_cast_fp16 = matmul(transpose_x = x_265_transpose_x_0, transpose_y = x_265_transpose_y_0, x = q_with_bias_v_23_cast_fp16, y = op_2195_to_fp16_palettized)[name = tensor("x_265_cast_fp16")]; + tensor x_265_cast_fp16 = matmul(transpose_x = x_265_transpose_x_0, transpose_y = x_265_transpose_y_0, x = q_with_bias_v_23_cast_fp16, y = op_2195_to_fp16_quantized)[name = tensor("x_265_cast_fp16")]; tensor x_267_pad_0 = const()[name = tensor("x_267_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; tensor x_267_mode_0 = const()[name = tensor("x_267_mode_0"), val = tensor("constant")]; tensor const_139_to_fp16 = const()[name = tensor("const_139_to_fp16"), val = tensor(0x0p+0)]; @@ -1806,12 +1806,12 @@ program(1.0) tensor var_2228 = const()[name = tensor("op_2228"), val = tensor([1, -1, 1024])]; tensor var_2227_cast_fp16 = transpose(perm = var_2227_perm_0, x = x_271_cast_fp16)[name = tensor("transpose_231")]; tensor input_615_cast_fp16 = reshape(shape = var_2228, x = var_2227_cast_fp16)[name = tensor("input_615_cast_fp16")]; - tensor encoder_module_layers_11_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(286778752))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(287827392))), name = tensor("encoder_module_layers_11_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_106_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_11_self_attn_linear_out_weight_to_fp16_palettized, x = input_615_cast_fp16)[name = tensor("linear_106_cast_fp16")]; + tensor encoder_module_layers_11_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_11_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(287368064))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(288417792))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(288416704)))]; + tensor linear_106_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_11_self_attn_linear_out_weight_to_fp16_quantized, x = input_615_cast_fp16)[name = tensor("linear_106_cast_fp16")]; tensor input_619_cast_fp16 = add(x = input_611_cast_fp16, y = linear_106_cast_fp16)[name = tensor("input_619_cast_fp16")]; tensor x_275_axes_0 = const()[name = tensor("x_275_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_11_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_11_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(287827968)))]; - tensor encoder_module_layers_11_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_11_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(287830080)))]; + tensor encoder_module_layers_11_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_11_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(288419904)))]; + tensor encoder_module_layers_11_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_11_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(288422016)))]; tensor x_275_cast_fp16 = layer_norm(axes = x_275_axes_0, beta = encoder_module_layers_11_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_11_norm_conv_weight_to_fp16, x = input_619_cast_fp16)[name = tensor("x_275_cast_fp16")]; tensor input_621_perm_0 = const()[name = tensor("input_621_perm_0"), val = tensor([0, 2, 1])]; tensor input_623_pad_type_0 = const()[name = tensor("input_623_pad_type_0"), val = tensor("valid")]; @@ -1819,9 +1819,9 @@ program(1.0) tensor input_623_pad_0 = const()[name = tensor("input_623_pad_0"), val = tensor([0, 0])]; tensor input_623_dilations_0 = const()[name = tensor("input_623_dilations_0"), val = tensor([1])]; tensor input_623_groups_0 = const()[name = tensor("input_623_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_11_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(287832192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(289929408))), name = tensor("encoder_module_layers_11_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor([2048, 1024, 1])]; + tensor encoder_module_layers_11_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_11_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(288424128))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(290523456))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(290521344)))]; tensor input_621_cast_fp16 = transpose(perm = input_621_perm_0, x = x_275_cast_fp16)[name = tensor("transpose_230")]; - tensor input_623_cast_fp16 = conv(dilations = input_623_dilations_0, groups = input_623_groups_0, pad = input_623_pad_0, pad_type = input_623_pad_type_0, strides = input_623_strides_0, weight = encoder_module_layers_11_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_621_cast_fp16)[name = tensor("input_623_cast_fp16")]; + tensor input_623_cast_fp16 = conv(dilations = input_623_dilations_0, groups = input_623_groups_0, pad = input_623_pad_0, pad_type = input_623_pad_type_0, strides = input_623_strides_0, weight = encoder_module_layers_11_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_621_cast_fp16)[name = tensor("input_623_cast_fp16")]; tensor x_277_split_num_splits_0 = const()[name = tensor("x_277_split_num_splits_0"), val = tensor(2)]; tensor x_277_split_axis_0 = const()[name = tensor("x_277_split_axis_0"), val = tensor(1)]; tensor x_277_split_cast_fp16_0, tensor x_277_split_cast_fp16_1 = split(axis = x_277_split_axis_0, num_splits = x_277_split_num_splits_0, x = input_623_cast_fp16)[name = tensor("x_277_split_cast_fp16")]; @@ -1837,75 +1837,75 @@ program(1.0) tensor input_629_strides_0 = const()[name = tensor("input_629_strides_0"), val = tensor([1])]; tensor input_629_pad_0 = const()[name = tensor("input_629_pad_0"), val = tensor([0, 0])]; tensor input_629_dilations_0 = const()[name = tensor("input_629_dilations_0"), val = tensor([1])]; - tensor const_285_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(289929984))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(289939264))), name = tensor("const_285_to_fp16_palettized"), shape = tensor([1024, 1, 9])]; - tensor const_286_to_fp16 = const()[name = tensor("const_286_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(289939840)))]; - tensor input_631_cast_fp16 = conv(bias = const_286_to_fp16, dilations = input_629_dilations_0, groups = input_629_groups_0, pad = input_629_pad_0, pad_type = input_629_pad_type_0, strides = input_629_strides_0, weight = const_285_to_fp16_palettized, x = input_627_cast_fp16)[name = tensor("input_631_cast_fp16")]; + tensor const_285_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_285_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(290527616))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(290537984))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(290536896)))]; + tensor const_286_to_fp16 = const()[name = tensor("const_286_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(290540096)))]; + tensor input_631_cast_fp16 = conv(bias = const_286_to_fp16, dilations = input_629_dilations_0, groups = input_629_groups_0, pad = input_629_pad_0, pad_type = input_629_pad_type_0, strides = input_629_strides_0, weight = const_285_to_fp16_quantized, x = input_627_cast_fp16)[name = tensor("input_631_cast_fp16")]; tensor input_633_cast_fp16 = silu(x = input_631_cast_fp16)[name = tensor("input_633_cast_fp16")]; tensor x_279_pad_type_0 = const()[name = tensor("x_279_pad_type_0"), val = tensor("valid")]; tensor x_279_strides_0 = const()[name = tensor("x_279_strides_0"), val = tensor([1])]; tensor x_279_pad_0 = const()[name = tensor("x_279_pad_0"), val = tensor([0, 0])]; tensor x_279_dilations_0 = const()[name = tensor("x_279_dilations_0"), val = tensor([1])]; tensor x_279_groups_0 = const()[name = tensor("x_279_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_11_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(289941952))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(290990592))), name = tensor("encoder_module_layers_11_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor([1024, 1024, 1])]; - tensor x_279_cast_fp16 = conv(dilations = x_279_dilations_0, groups = x_279_groups_0, pad = x_279_pad_0, pad_type = x_279_pad_type_0, strides = x_279_strides_0, weight = encoder_module_layers_11_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_633_cast_fp16)[name = tensor("x_279_cast_fp16")]; + tensor encoder_module_layers_11_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_11_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(290542208))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(291591936))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(291590848)))]; + tensor x_279_cast_fp16 = conv(dilations = x_279_dilations_0, groups = x_279_groups_0, pad = x_279_pad_0, pad_type = x_279_pad_type_0, strides = x_279_strides_0, weight = encoder_module_layers_11_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_633_cast_fp16)[name = tensor("x_279_cast_fp16")]; tensor input_635_perm_0 = const()[name = tensor("input_635_perm_0"), val = tensor([0, 2, 1])]; tensor input_635_cast_fp16 = transpose(perm = input_635_perm_0, x = x_279_cast_fp16)[name = tensor("transpose_229")]; tensor input_637_cast_fp16 = add(x = input_619_cast_fp16, y = input_635_cast_fp16)[name = tensor("input_637_cast_fp16")]; tensor input_639_axes_0 = const()[name = tensor("input_639_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_11_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_11_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(290991168)))]; - tensor encoder_module_layers_11_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_11_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(290993280)))]; + tensor encoder_module_layers_11_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_11_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(291594048)))]; + tensor encoder_module_layers_11_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_11_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(291596160)))]; tensor input_639_cast_fp16 = layer_norm(axes = input_639_axes_0, beta = encoder_module_layers_11_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_11_norm_feed_forward2_weight_to_fp16, x = input_637_cast_fp16)[name = tensor("input_639_cast_fp16")]; - tensor encoder_module_layers_11_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(290995392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(295189760))), name = tensor("encoder_module_layers_11_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_107_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_11_feed_forward2_linear1_weight_to_fp16_palettized, x = input_639_cast_fp16)[name = tensor("linear_107_cast_fp16")]; + tensor encoder_module_layers_11_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_11_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(291598272))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(295796800))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(295792640)))]; + tensor linear_107_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_11_feed_forward2_linear1_weight_to_fp16_quantized, x = input_639_cast_fp16)[name = tensor("linear_107_cast_fp16")]; tensor input_643_cast_fp16 = silu(x = linear_107_cast_fp16)[name = tensor("input_643_cast_fp16")]; - tensor encoder_module_layers_11_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(295190336))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(299384704))), name = tensor("encoder_module_layers_11_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_108_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_11_feed_forward2_linear2_weight_to_fp16_palettized, x = input_643_cast_fp16)[name = tensor("linear_108_cast_fp16")]; + tensor encoder_module_layers_11_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_11_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(295805056))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(300000512))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(299999424)))]; + tensor linear_108_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_11_feed_forward2_linear2_weight_to_fp16_quantized, x = input_643_cast_fp16)[name = tensor("linear_108_cast_fp16")]; tensor var_2288_to_fp16 = const()[name = tensor("op_2288_to_fp16"), val = tensor(0x1p-1)]; tensor var_2289_cast_fp16 = mul(x = linear_108_cast_fp16, y = var_2288_to_fp16)[name = tensor("op_2289_cast_fp16")]; tensor input_649_cast_fp16 = add(x = input_637_cast_fp16, y = var_2289_cast_fp16)[name = tensor("input_649_cast_fp16")]; tensor input_651_axes_0 = const()[name = tensor("input_651_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_11_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_11_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(299385280)))]; - tensor encoder_module_layers_11_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_11_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(299387392)))]; + tensor encoder_module_layers_11_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_11_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(300002624)))]; + tensor encoder_module_layers_11_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_11_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(300004736)))]; tensor input_651_cast_fp16 = layer_norm(axes = input_651_axes_0, beta = encoder_module_layers_11_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_11_norm_out_weight_to_fp16, x = input_649_cast_fp16)[name = tensor("input_651_cast_fp16")]; tensor input_653_axes_0 = const()[name = tensor("input_653_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_12_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_12_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(299389504)))]; - tensor encoder_module_layers_12_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_12_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(299391616)))]; + tensor encoder_module_layers_12_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_12_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(300006848)))]; + tensor encoder_module_layers_12_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_12_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(300008960)))]; tensor input_653_cast_fp16 = layer_norm(axes = input_653_axes_0, beta = encoder_module_layers_12_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_12_norm_feed_forward1_weight_to_fp16, x = input_651_cast_fp16)[name = tensor("input_653_cast_fp16")]; - tensor encoder_module_layers_12_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(299393728))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303588096))), name = tensor("encoder_module_layers_12_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_109_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_12_feed_forward1_linear1_weight_to_fp16_palettized, x = input_653_cast_fp16)[name = tensor("linear_109_cast_fp16")]; + tensor encoder_module_layers_12_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_12_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(300011072))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(304209600))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(304205440)))]; + tensor linear_109_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_12_feed_forward1_linear1_weight_to_fp16_quantized, x = input_653_cast_fp16)[name = tensor("linear_109_cast_fp16")]; tensor input_657_cast_fp16 = silu(x = linear_109_cast_fp16)[name = tensor("input_657_cast_fp16")]; - tensor encoder_module_layers_12_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303588672))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(307783040))), name = tensor("encoder_module_layers_12_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_110_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_12_feed_forward1_linear2_weight_to_fp16_palettized, x = input_657_cast_fp16)[name = tensor("linear_110_cast_fp16")]; + tensor encoder_module_layers_12_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_12_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(304217856))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(308413312))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(308412224)))]; + tensor linear_110_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_12_feed_forward1_linear2_weight_to_fp16_quantized, x = input_657_cast_fp16)[name = tensor("linear_110_cast_fp16")]; tensor var_2317_to_fp16 = const()[name = tensor("op_2317_to_fp16"), val = tensor(0x1p-1)]; tensor var_2318_cast_fp16 = mul(x = linear_110_cast_fp16, y = var_2317_to_fp16)[name = tensor("op_2318_cast_fp16")]; tensor input_663_cast_fp16 = add(x = input_651_cast_fp16, y = var_2318_cast_fp16)[name = tensor("input_663_cast_fp16")]; tensor query_25_axes_0 = const()[name = tensor("query_25_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_12_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_12_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(307783616)))]; - tensor encoder_module_layers_12_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_12_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(307785728)))]; + tensor encoder_module_layers_12_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_12_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(308415424)))]; + tensor encoder_module_layers_12_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_12_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(308417536)))]; tensor query_25_cast_fp16 = layer_norm(axes = query_25_axes_0, beta = encoder_module_layers_12_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_12_norm_self_att_weight_to_fp16, x = input_663_cast_fp16)[name = tensor("query_25_cast_fp16")]; - tensor encoder_module_layers_12_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(307787840))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(308836480))), name = tensor("encoder_module_layers_12_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_111_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_12_self_attn_linear_q_weight_to_fp16_palettized, x = query_25_cast_fp16)[name = tensor("linear_111_cast_fp16")]; + tensor encoder_module_layers_12_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_12_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(308419648))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(309469376))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(309468288)))]; + tensor linear_111_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_12_self_attn_linear_q_weight_to_fp16_quantized, x = query_25_cast_fp16)[name = tensor("linear_111_cast_fp16")]; tensor var_2334 = const()[name = tensor("op_2334"), val = tensor([1, -1, 8, 128])]; tensor q_73_cast_fp16 = reshape(shape = var_2334, x = linear_111_cast_fp16)[name = tensor("q_73_cast_fp16")]; - tensor encoder_module_layers_12_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(308837056))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(309885696))), name = tensor("encoder_module_layers_12_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_112_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_12_self_attn_linear_k_weight_to_fp16_palettized, x = query_25_cast_fp16)[name = tensor("linear_112_cast_fp16")]; + tensor encoder_module_layers_12_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_12_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(309471488))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(310521216))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(310520128)))]; + tensor linear_112_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_12_self_attn_linear_k_weight_to_fp16_quantized, x = query_25_cast_fp16)[name = tensor("linear_112_cast_fp16")]; tensor var_2338 = const()[name = tensor("op_2338"), val = tensor([1, -1, 8, 128])]; tensor k_49_cast_fp16 = reshape(shape = var_2338, x = linear_112_cast_fp16)[name = tensor("k_49_cast_fp16")]; - tensor encoder_module_layers_12_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(309886272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(310934912))), name = tensor("encoder_module_layers_12_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_113_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_12_self_attn_linear_v_weight_to_fp16_palettized, x = query_25_cast_fp16)[name = tensor("linear_113_cast_fp16")]; + tensor encoder_module_layers_12_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_12_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(310523328))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(311573056))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(311571968)))]; + tensor linear_113_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_12_self_attn_linear_v_weight_to_fp16_quantized, x = query_25_cast_fp16)[name = tensor("linear_113_cast_fp16")]; tensor var_2342 = const()[name = tensor("op_2342"), val = tensor([1, -1, 8, 128])]; tensor v_25_cast_fp16 = reshape(shape = var_2342, x = linear_113_cast_fp16)[name = tensor("v_25_cast_fp16")]; tensor value_29_perm_0 = const()[name = tensor("value_29_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor encoder_module_layers_12_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_12_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(310935488)))]; + tensor encoder_module_layers_12_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_12_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(311575168)))]; tensor var_2354_cast_fp16 = add(x = q_73_cast_fp16, y = encoder_module_layers_12_self_attn_pos_bias_u_to_fp16)[name = tensor("op_2354_cast_fp16")]; - tensor encoder_module_layers_12_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_12_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(310937600)))]; + tensor encoder_module_layers_12_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_12_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(311577280)))]; tensor var_2356_cast_fp16 = add(x = q_73_cast_fp16, y = encoder_module_layers_12_self_attn_pos_bias_v_to_fp16)[name = tensor("op_2356_cast_fp16")]; tensor q_with_bias_v_25_perm_0 = const()[name = tensor("q_with_bias_v_25_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_287_transpose_x_0 = const()[name = tensor("x_287_transpose_x_0"), val = tensor(false)]; tensor x_287_transpose_y_0 = const()[name = tensor("x_287_transpose_y_0"), val = tensor(false)]; - tensor op_2358_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(310939712))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(311323776))), name = tensor("op_2358_to_fp16_palettized"), shape = tensor([1, 8, 128, 375])]; + tensor op_2358_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_2358_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(311579392))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(311963904))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(311963456)))]; tensor q_with_bias_v_25_cast_fp16 = transpose(perm = q_with_bias_v_25_perm_0, x = var_2356_cast_fp16)[name = tensor("transpose_228")]; - tensor x_287_cast_fp16 = matmul(transpose_x = x_287_transpose_x_0, transpose_y = x_287_transpose_y_0, x = q_with_bias_v_25_cast_fp16, y = op_2358_to_fp16_palettized)[name = tensor("x_287_cast_fp16")]; + tensor x_287_cast_fp16 = matmul(transpose_x = x_287_transpose_x_0, transpose_y = x_287_transpose_y_0, x = q_with_bias_v_25_cast_fp16, y = op_2358_to_fp16_quantized)[name = tensor("x_287_cast_fp16")]; tensor x_289_pad_0 = const()[name = tensor("x_289_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; tensor x_289_mode_0 = const()[name = tensor("x_289_mode_0"), val = tensor("constant")]; tensor const_149_to_fp16 = const()[name = tensor("const_149_to_fp16"), val = tensor(0x0p+0)]; @@ -1943,12 +1943,12 @@ program(1.0) tensor var_2391 = const()[name = tensor("op_2391"), val = tensor([1, -1, 1024])]; tensor var_2390_cast_fp16 = transpose(perm = var_2390_perm_0, x = x_293_cast_fp16)[name = tensor("transpose_224")]; tensor input_667_cast_fp16 = reshape(shape = var_2391, x = var_2390_cast_fp16)[name = tensor("input_667_cast_fp16")]; - tensor encoder_module_layers_12_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(311324352))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(312372992))), name = tensor("encoder_module_layers_12_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_115_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_12_self_attn_linear_out_weight_to_fp16_palettized, x = input_667_cast_fp16)[name = tensor("linear_115_cast_fp16")]; + tensor encoder_module_layers_12_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_12_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(311964736))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(313014464))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(313013376)))]; + tensor linear_115_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_12_self_attn_linear_out_weight_to_fp16_quantized, x = input_667_cast_fp16)[name = tensor("linear_115_cast_fp16")]; tensor input_671_cast_fp16 = add(x = input_663_cast_fp16, y = linear_115_cast_fp16)[name = tensor("input_671_cast_fp16")]; tensor x_297_axes_0 = const()[name = tensor("x_297_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_12_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_12_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(312373568)))]; - tensor encoder_module_layers_12_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_12_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(312375680)))]; + tensor encoder_module_layers_12_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_12_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(313016576)))]; + tensor encoder_module_layers_12_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_12_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(313018688)))]; tensor x_297_cast_fp16 = layer_norm(axes = x_297_axes_0, beta = encoder_module_layers_12_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_12_norm_conv_weight_to_fp16, x = input_671_cast_fp16)[name = tensor("x_297_cast_fp16")]; tensor input_673_perm_0 = const()[name = tensor("input_673_perm_0"), val = tensor([0, 2, 1])]; tensor input_675_pad_type_0 = const()[name = tensor("input_675_pad_type_0"), val = tensor("valid")]; @@ -1956,9 +1956,9 @@ program(1.0) tensor input_675_pad_0 = const()[name = tensor("input_675_pad_0"), val = tensor([0, 0])]; tensor input_675_dilations_0 = const()[name = tensor("input_675_dilations_0"), val = tensor([1])]; tensor input_675_groups_0 = const()[name = tensor("input_675_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_12_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(312377792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(314475008))), name = tensor("encoder_module_layers_12_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor([2048, 1024, 1])]; + tensor encoder_module_layers_12_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_12_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(313020800))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(315120128))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(315118016)))]; tensor input_673_cast_fp16 = transpose(perm = input_673_perm_0, x = x_297_cast_fp16)[name = tensor("transpose_223")]; - tensor input_675_cast_fp16 = conv(dilations = input_675_dilations_0, groups = input_675_groups_0, pad = input_675_pad_0, pad_type = input_675_pad_type_0, strides = input_675_strides_0, weight = encoder_module_layers_12_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_673_cast_fp16)[name = tensor("input_675_cast_fp16")]; + tensor input_675_cast_fp16 = conv(dilations = input_675_dilations_0, groups = input_675_groups_0, pad = input_675_pad_0, pad_type = input_675_pad_type_0, strides = input_675_strides_0, weight = encoder_module_layers_12_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_673_cast_fp16)[name = tensor("input_675_cast_fp16")]; tensor x_299_split_num_splits_0 = const()[name = tensor("x_299_split_num_splits_0"), val = tensor(2)]; tensor x_299_split_axis_0 = const()[name = tensor("x_299_split_axis_0"), val = tensor(1)]; tensor x_299_split_cast_fp16_0, tensor x_299_split_cast_fp16_1 = split(axis = x_299_split_axis_0, num_splits = x_299_split_num_splits_0, x = input_675_cast_fp16)[name = tensor("x_299_split_cast_fp16")]; @@ -1974,75 +1974,75 @@ program(1.0) tensor input_681_strides_0 = const()[name = tensor("input_681_strides_0"), val = tensor([1])]; tensor input_681_pad_0 = const()[name = tensor("input_681_pad_0"), val = tensor([0, 0])]; tensor input_681_dilations_0 = const()[name = tensor("input_681_dilations_0"), val = tensor([1])]; - tensor const_287_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(314475584))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(314484864))), name = tensor("const_287_to_fp16_palettized"), shape = tensor([1024, 1, 9])]; - tensor const_288_to_fp16 = const()[name = tensor("const_288_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(314485440)))]; - tensor input_683_cast_fp16 = conv(bias = const_288_to_fp16, dilations = input_681_dilations_0, groups = input_681_groups_0, pad = input_681_pad_0, pad_type = input_681_pad_type_0, strides = input_681_strides_0, weight = const_287_to_fp16_palettized, x = input_679_cast_fp16)[name = tensor("input_683_cast_fp16")]; + tensor const_287_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_287_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(315124288))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(315134656))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(315133568)))]; + tensor const_288_to_fp16 = const()[name = tensor("const_288_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(315136768)))]; + tensor input_683_cast_fp16 = conv(bias = const_288_to_fp16, dilations = input_681_dilations_0, groups = input_681_groups_0, pad = input_681_pad_0, pad_type = input_681_pad_type_0, strides = input_681_strides_0, weight = const_287_to_fp16_quantized, x = input_679_cast_fp16)[name = tensor("input_683_cast_fp16")]; tensor input_685_cast_fp16 = silu(x = input_683_cast_fp16)[name = tensor("input_685_cast_fp16")]; tensor x_301_pad_type_0 = const()[name = tensor("x_301_pad_type_0"), val = tensor("valid")]; tensor x_301_strides_0 = const()[name = tensor("x_301_strides_0"), val = tensor([1])]; tensor x_301_pad_0 = const()[name = tensor("x_301_pad_0"), val = tensor([0, 0])]; tensor x_301_dilations_0 = const()[name = tensor("x_301_dilations_0"), val = tensor([1])]; tensor x_301_groups_0 = const()[name = tensor("x_301_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_12_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(314487552))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(315536192))), name = tensor("encoder_module_layers_12_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor([1024, 1024, 1])]; - tensor x_301_cast_fp16 = conv(dilations = x_301_dilations_0, groups = x_301_groups_0, pad = x_301_pad_0, pad_type = x_301_pad_type_0, strides = x_301_strides_0, weight = encoder_module_layers_12_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_685_cast_fp16)[name = tensor("x_301_cast_fp16")]; + tensor encoder_module_layers_12_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_12_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(315138880))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(316188608))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(316187520)))]; + tensor x_301_cast_fp16 = conv(dilations = x_301_dilations_0, groups = x_301_groups_0, pad = x_301_pad_0, pad_type = x_301_pad_type_0, strides = x_301_strides_0, weight = encoder_module_layers_12_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_685_cast_fp16)[name = tensor("x_301_cast_fp16")]; tensor input_687_perm_0 = const()[name = tensor("input_687_perm_0"), val = tensor([0, 2, 1])]; tensor input_687_cast_fp16 = transpose(perm = input_687_perm_0, x = x_301_cast_fp16)[name = tensor("transpose_222")]; tensor input_689_cast_fp16 = add(x = input_671_cast_fp16, y = input_687_cast_fp16)[name = tensor("input_689_cast_fp16")]; tensor input_691_axes_0 = const()[name = tensor("input_691_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_12_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_12_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(315536768)))]; - tensor encoder_module_layers_12_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_12_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(315538880)))]; + tensor encoder_module_layers_12_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_12_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(316190720)))]; + tensor encoder_module_layers_12_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_12_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(316192832)))]; tensor input_691_cast_fp16 = layer_norm(axes = input_691_axes_0, beta = encoder_module_layers_12_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_12_norm_feed_forward2_weight_to_fp16, x = input_689_cast_fp16)[name = tensor("input_691_cast_fp16")]; - tensor encoder_module_layers_12_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(315540992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(319735360))), name = tensor("encoder_module_layers_12_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_116_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_12_feed_forward2_linear1_weight_to_fp16_palettized, x = input_691_cast_fp16)[name = tensor("linear_116_cast_fp16")]; + tensor encoder_module_layers_12_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_12_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(316194944))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(320393472))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(320389312)))]; + tensor linear_116_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_12_feed_forward2_linear1_weight_to_fp16_quantized, x = input_691_cast_fp16)[name = tensor("linear_116_cast_fp16")]; tensor input_695_cast_fp16 = silu(x = linear_116_cast_fp16)[name = tensor("input_695_cast_fp16")]; - tensor encoder_module_layers_12_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(319735936))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(323930304))), name = tensor("encoder_module_layers_12_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_117_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_12_feed_forward2_linear2_weight_to_fp16_palettized, x = input_695_cast_fp16)[name = tensor("linear_117_cast_fp16")]; + tensor encoder_module_layers_12_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_12_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(320401728))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(324597184))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(324596096)))]; + tensor linear_117_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_12_feed_forward2_linear2_weight_to_fp16_quantized, x = input_695_cast_fp16)[name = tensor("linear_117_cast_fp16")]; tensor var_2451_to_fp16 = const()[name = tensor("op_2451_to_fp16"), val = tensor(0x1p-1)]; tensor var_2452_cast_fp16 = mul(x = linear_117_cast_fp16, y = var_2451_to_fp16)[name = tensor("op_2452_cast_fp16")]; tensor input_701_cast_fp16 = add(x = input_689_cast_fp16, y = var_2452_cast_fp16)[name = tensor("input_701_cast_fp16")]; tensor input_703_axes_0 = const()[name = tensor("input_703_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_12_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_12_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(323930880)))]; - tensor encoder_module_layers_12_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_12_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(323932992)))]; + tensor encoder_module_layers_12_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_12_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(324599296)))]; + tensor encoder_module_layers_12_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_12_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(324601408)))]; tensor input_703_cast_fp16 = layer_norm(axes = input_703_axes_0, beta = encoder_module_layers_12_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_12_norm_out_weight_to_fp16, x = input_701_cast_fp16)[name = tensor("input_703_cast_fp16")]; tensor input_705_axes_0 = const()[name = tensor("input_705_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_13_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_13_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(323935104)))]; - tensor encoder_module_layers_13_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_13_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(323937216)))]; + tensor encoder_module_layers_13_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_13_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(324603520)))]; + tensor encoder_module_layers_13_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_13_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(324605632)))]; tensor input_705_cast_fp16 = layer_norm(axes = input_705_axes_0, beta = encoder_module_layers_13_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_13_norm_feed_forward1_weight_to_fp16, x = input_703_cast_fp16)[name = tensor("input_705_cast_fp16")]; - tensor encoder_module_layers_13_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(323939328))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(328133696))), name = tensor("encoder_module_layers_13_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_118_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_13_feed_forward1_linear1_weight_to_fp16_palettized, x = input_705_cast_fp16)[name = tensor("linear_118_cast_fp16")]; + tensor encoder_module_layers_13_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_13_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(324607744))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(328806272))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(328802112)))]; + tensor linear_118_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_13_feed_forward1_linear1_weight_to_fp16_quantized, x = input_705_cast_fp16)[name = tensor("linear_118_cast_fp16")]; tensor input_709_cast_fp16 = silu(x = linear_118_cast_fp16)[name = tensor("input_709_cast_fp16")]; - tensor encoder_module_layers_13_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(328134272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(332328640))), name = tensor("encoder_module_layers_13_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_119_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_13_feed_forward1_linear2_weight_to_fp16_palettized, x = input_709_cast_fp16)[name = tensor("linear_119_cast_fp16")]; + tensor encoder_module_layers_13_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_13_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(328814528))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(333009984))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(333008896)))]; + tensor linear_119_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_13_feed_forward1_linear2_weight_to_fp16_quantized, x = input_709_cast_fp16)[name = tensor("linear_119_cast_fp16")]; tensor var_2480_to_fp16 = const()[name = tensor("op_2480_to_fp16"), val = tensor(0x1p-1)]; tensor var_2481_cast_fp16 = mul(x = linear_119_cast_fp16, y = var_2480_to_fp16)[name = tensor("op_2481_cast_fp16")]; tensor input_715_cast_fp16 = add(x = input_703_cast_fp16, y = var_2481_cast_fp16)[name = tensor("input_715_cast_fp16")]; tensor query_27_axes_0 = const()[name = tensor("query_27_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_13_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_13_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(332329216)))]; - tensor encoder_module_layers_13_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_13_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(332331328)))]; + tensor encoder_module_layers_13_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_13_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(333012096)))]; + tensor encoder_module_layers_13_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_13_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(333014208)))]; tensor query_27_cast_fp16 = layer_norm(axes = query_27_axes_0, beta = encoder_module_layers_13_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_13_norm_self_att_weight_to_fp16, x = input_715_cast_fp16)[name = tensor("query_27_cast_fp16")]; - tensor encoder_module_layers_13_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(332333440))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(333382080))), name = tensor("encoder_module_layers_13_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_120_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_13_self_attn_linear_q_weight_to_fp16_palettized, x = query_27_cast_fp16)[name = tensor("linear_120_cast_fp16")]; + tensor encoder_module_layers_13_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_13_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(333016320))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(334066048))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(334064960)))]; + tensor linear_120_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_13_self_attn_linear_q_weight_to_fp16_quantized, x = query_27_cast_fp16)[name = tensor("linear_120_cast_fp16")]; tensor var_2497 = const()[name = tensor("op_2497"), val = tensor([1, -1, 8, 128])]; tensor q_79_cast_fp16 = reshape(shape = var_2497, x = linear_120_cast_fp16)[name = tensor("q_79_cast_fp16")]; - tensor encoder_module_layers_13_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(333382656))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(334431296))), name = tensor("encoder_module_layers_13_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_121_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_13_self_attn_linear_k_weight_to_fp16_palettized, x = query_27_cast_fp16)[name = tensor("linear_121_cast_fp16")]; + tensor encoder_module_layers_13_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_13_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(334068160))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(335117888))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(335116800)))]; + tensor linear_121_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_13_self_attn_linear_k_weight_to_fp16_quantized, x = query_27_cast_fp16)[name = tensor("linear_121_cast_fp16")]; tensor var_2501 = const()[name = tensor("op_2501"), val = tensor([1, -1, 8, 128])]; tensor k_53_cast_fp16 = reshape(shape = var_2501, x = linear_121_cast_fp16)[name = tensor("k_53_cast_fp16")]; - tensor encoder_module_layers_13_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(334431872))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(335480512))), name = tensor("encoder_module_layers_13_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_122_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_13_self_attn_linear_v_weight_to_fp16_palettized, x = query_27_cast_fp16)[name = tensor("linear_122_cast_fp16")]; + tensor encoder_module_layers_13_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_13_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(335120000))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(336169728))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(336168640)))]; + tensor linear_122_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_13_self_attn_linear_v_weight_to_fp16_quantized, x = query_27_cast_fp16)[name = tensor("linear_122_cast_fp16")]; tensor var_2505 = const()[name = tensor("op_2505"), val = tensor([1, -1, 8, 128])]; tensor v_27_cast_fp16 = reshape(shape = var_2505, x = linear_122_cast_fp16)[name = tensor("v_27_cast_fp16")]; tensor value_31_perm_0 = const()[name = tensor("value_31_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor encoder_module_layers_13_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_13_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(335481088)))]; + tensor encoder_module_layers_13_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_13_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(336171840)))]; tensor var_2517_cast_fp16 = add(x = q_79_cast_fp16, y = encoder_module_layers_13_self_attn_pos_bias_u_to_fp16)[name = tensor("op_2517_cast_fp16")]; - tensor encoder_module_layers_13_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_13_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(335483200)))]; + tensor encoder_module_layers_13_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_13_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(336173952)))]; tensor var_2519_cast_fp16 = add(x = q_79_cast_fp16, y = encoder_module_layers_13_self_attn_pos_bias_v_to_fp16)[name = tensor("op_2519_cast_fp16")]; tensor q_with_bias_v_27_perm_0 = const()[name = tensor("q_with_bias_v_27_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_309_transpose_x_0 = const()[name = tensor("x_309_transpose_x_0"), val = tensor(false)]; tensor x_309_transpose_y_0 = const()[name = tensor("x_309_transpose_y_0"), val = tensor(false)]; - tensor op_2521_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(335485312))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(335869376))), name = tensor("op_2521_to_fp16_palettized"), shape = tensor([1, 8, 128, 375])]; + tensor op_2521_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_2521_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(336176064))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(336560576))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(336560128)))]; tensor q_with_bias_v_27_cast_fp16 = transpose(perm = q_with_bias_v_27_perm_0, x = var_2519_cast_fp16)[name = tensor("transpose_221")]; - tensor x_309_cast_fp16 = matmul(transpose_x = x_309_transpose_x_0, transpose_y = x_309_transpose_y_0, x = q_with_bias_v_27_cast_fp16, y = op_2521_to_fp16_palettized)[name = tensor("x_309_cast_fp16")]; + tensor x_309_cast_fp16 = matmul(transpose_x = x_309_transpose_x_0, transpose_y = x_309_transpose_y_0, x = q_with_bias_v_27_cast_fp16, y = op_2521_to_fp16_quantized)[name = tensor("x_309_cast_fp16")]; tensor x_311_pad_0 = const()[name = tensor("x_311_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; tensor x_311_mode_0 = const()[name = tensor("x_311_mode_0"), val = tensor("constant")]; tensor const_159_to_fp16 = const()[name = tensor("const_159_to_fp16"), val = tensor(0x0p+0)]; @@ -2080,12 +2080,12 @@ program(1.0) tensor var_2554 = const()[name = tensor("op_2554"), val = tensor([1, -1, 1024])]; tensor var_2553_cast_fp16 = transpose(perm = var_2553_perm_0, x = x_315_cast_fp16)[name = tensor("transpose_217")]; tensor input_719_cast_fp16 = reshape(shape = var_2554, x = var_2553_cast_fp16)[name = tensor("input_719_cast_fp16")]; - tensor encoder_module_layers_13_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(335869952))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(336918592))), name = tensor("encoder_module_layers_13_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_124_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_13_self_attn_linear_out_weight_to_fp16_palettized, x = input_719_cast_fp16)[name = tensor("linear_124_cast_fp16")]; + tensor encoder_module_layers_13_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_13_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(336561408))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(337611136))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(337610048)))]; + tensor linear_124_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_13_self_attn_linear_out_weight_to_fp16_quantized, x = input_719_cast_fp16)[name = tensor("linear_124_cast_fp16")]; tensor input_723_cast_fp16 = add(x = input_715_cast_fp16, y = linear_124_cast_fp16)[name = tensor("input_723_cast_fp16")]; tensor x_319_axes_0 = const()[name = tensor("x_319_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_13_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_13_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(336919168)))]; - tensor encoder_module_layers_13_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_13_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(336921280)))]; + tensor encoder_module_layers_13_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_13_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(337613248)))]; + tensor encoder_module_layers_13_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_13_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(337615360)))]; tensor x_319_cast_fp16 = layer_norm(axes = x_319_axes_0, beta = encoder_module_layers_13_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_13_norm_conv_weight_to_fp16, x = input_723_cast_fp16)[name = tensor("x_319_cast_fp16")]; tensor input_725_perm_0 = const()[name = tensor("input_725_perm_0"), val = tensor([0, 2, 1])]; tensor input_727_pad_type_0 = const()[name = tensor("input_727_pad_type_0"), val = tensor("valid")]; @@ -2093,9 +2093,9 @@ program(1.0) tensor input_727_pad_0 = const()[name = tensor("input_727_pad_0"), val = tensor([0, 0])]; tensor input_727_dilations_0 = const()[name = tensor("input_727_dilations_0"), val = tensor([1])]; tensor input_727_groups_0 = const()[name = tensor("input_727_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_13_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(336923392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(339020608))), name = tensor("encoder_module_layers_13_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor([2048, 1024, 1])]; + tensor encoder_module_layers_13_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_13_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(337617472))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(339716800))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(339714688)))]; tensor input_725_cast_fp16 = transpose(perm = input_725_perm_0, x = x_319_cast_fp16)[name = tensor("transpose_216")]; - tensor input_727_cast_fp16 = conv(dilations = input_727_dilations_0, groups = input_727_groups_0, pad = input_727_pad_0, pad_type = input_727_pad_type_0, strides = input_727_strides_0, weight = encoder_module_layers_13_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_725_cast_fp16)[name = tensor("input_727_cast_fp16")]; + tensor input_727_cast_fp16 = conv(dilations = input_727_dilations_0, groups = input_727_groups_0, pad = input_727_pad_0, pad_type = input_727_pad_type_0, strides = input_727_strides_0, weight = encoder_module_layers_13_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_725_cast_fp16)[name = tensor("input_727_cast_fp16")]; tensor x_321_split_num_splits_0 = const()[name = tensor("x_321_split_num_splits_0"), val = tensor(2)]; tensor x_321_split_axis_0 = const()[name = tensor("x_321_split_axis_0"), val = tensor(1)]; tensor x_321_split_cast_fp16_0, tensor x_321_split_cast_fp16_1 = split(axis = x_321_split_axis_0, num_splits = x_321_split_num_splits_0, x = input_727_cast_fp16)[name = tensor("x_321_split_cast_fp16")]; @@ -2111,75 +2111,75 @@ program(1.0) tensor input_733_strides_0 = const()[name = tensor("input_733_strides_0"), val = tensor([1])]; tensor input_733_pad_0 = const()[name = tensor("input_733_pad_0"), val = tensor([0, 0])]; tensor input_733_dilations_0 = const()[name = tensor("input_733_dilations_0"), val = tensor([1])]; - tensor const_289_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(339021184))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(339030464))), name = tensor("const_289_to_fp16_palettized"), shape = tensor([1024, 1, 9])]; - tensor const_290_to_fp16 = const()[name = tensor("const_290_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(339031040)))]; - tensor input_735_cast_fp16 = conv(bias = const_290_to_fp16, dilations = input_733_dilations_0, groups = input_733_groups_0, pad = input_733_pad_0, pad_type = input_733_pad_type_0, strides = input_733_strides_0, weight = const_289_to_fp16_palettized, x = input_731_cast_fp16)[name = tensor("input_735_cast_fp16")]; + tensor const_289_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_289_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(339720960))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(339731328))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(339730240)))]; + tensor const_290_to_fp16 = const()[name = tensor("const_290_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(339733440)))]; + tensor input_735_cast_fp16 = conv(bias = const_290_to_fp16, dilations = input_733_dilations_0, groups = input_733_groups_0, pad = input_733_pad_0, pad_type = input_733_pad_type_0, strides = input_733_strides_0, weight = const_289_to_fp16_quantized, x = input_731_cast_fp16)[name = tensor("input_735_cast_fp16")]; tensor input_737_cast_fp16 = silu(x = input_735_cast_fp16)[name = tensor("input_737_cast_fp16")]; tensor x_323_pad_type_0 = const()[name = tensor("x_323_pad_type_0"), val = tensor("valid")]; tensor x_323_strides_0 = const()[name = tensor("x_323_strides_0"), val = tensor([1])]; tensor x_323_pad_0 = const()[name = tensor("x_323_pad_0"), val = tensor([0, 0])]; tensor x_323_dilations_0 = const()[name = tensor("x_323_dilations_0"), val = tensor([1])]; tensor x_323_groups_0 = const()[name = tensor("x_323_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_13_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(339033152))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(340081792))), name = tensor("encoder_module_layers_13_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor([1024, 1024, 1])]; - tensor x_323_cast_fp16 = conv(dilations = x_323_dilations_0, groups = x_323_groups_0, pad = x_323_pad_0, pad_type = x_323_pad_type_0, strides = x_323_strides_0, weight = encoder_module_layers_13_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_737_cast_fp16)[name = tensor("x_323_cast_fp16")]; + tensor encoder_module_layers_13_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_13_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(339735552))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(340785280))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(340784192)))]; + tensor x_323_cast_fp16 = conv(dilations = x_323_dilations_0, groups = x_323_groups_0, pad = x_323_pad_0, pad_type = x_323_pad_type_0, strides = x_323_strides_0, weight = encoder_module_layers_13_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_737_cast_fp16)[name = tensor("x_323_cast_fp16")]; tensor input_739_perm_0 = const()[name = tensor("input_739_perm_0"), val = tensor([0, 2, 1])]; tensor input_739_cast_fp16 = transpose(perm = input_739_perm_0, x = x_323_cast_fp16)[name = tensor("transpose_215")]; tensor input_741_cast_fp16 = add(x = input_723_cast_fp16, y = input_739_cast_fp16)[name = tensor("input_741_cast_fp16")]; tensor input_743_axes_0 = const()[name = tensor("input_743_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_13_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_13_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(340082368)))]; - tensor encoder_module_layers_13_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_13_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(340084480)))]; + tensor encoder_module_layers_13_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_13_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(340787392)))]; + tensor encoder_module_layers_13_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_13_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(340789504)))]; tensor input_743_cast_fp16 = layer_norm(axes = input_743_axes_0, beta = encoder_module_layers_13_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_13_norm_feed_forward2_weight_to_fp16, x = input_741_cast_fp16)[name = tensor("input_743_cast_fp16")]; - tensor encoder_module_layers_13_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(340086592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(344280960))), name = tensor("encoder_module_layers_13_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_125_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_13_feed_forward2_linear1_weight_to_fp16_palettized, x = input_743_cast_fp16)[name = tensor("linear_125_cast_fp16")]; + tensor encoder_module_layers_13_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_13_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(340791616))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(344990144))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(344985984)))]; + tensor linear_125_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_13_feed_forward2_linear1_weight_to_fp16_quantized, x = input_743_cast_fp16)[name = tensor("linear_125_cast_fp16")]; tensor input_747_cast_fp16 = silu(x = linear_125_cast_fp16)[name = tensor("input_747_cast_fp16")]; - tensor encoder_module_layers_13_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(344281536))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(348475904))), name = tensor("encoder_module_layers_13_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_126_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_13_feed_forward2_linear2_weight_to_fp16_palettized, x = input_747_cast_fp16)[name = tensor("linear_126_cast_fp16")]; + tensor encoder_module_layers_13_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_13_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(344998400))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(349193856))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(349192768)))]; + tensor linear_126_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_13_feed_forward2_linear2_weight_to_fp16_quantized, x = input_747_cast_fp16)[name = tensor("linear_126_cast_fp16")]; tensor var_2614_to_fp16 = const()[name = tensor("op_2614_to_fp16"), val = tensor(0x1p-1)]; tensor var_2615_cast_fp16 = mul(x = linear_126_cast_fp16, y = var_2614_to_fp16)[name = tensor("op_2615_cast_fp16")]; tensor input_753_cast_fp16 = add(x = input_741_cast_fp16, y = var_2615_cast_fp16)[name = tensor("input_753_cast_fp16")]; tensor input_755_axes_0 = const()[name = tensor("input_755_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_13_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_13_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(348476480)))]; - tensor encoder_module_layers_13_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_13_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(348478592)))]; + tensor encoder_module_layers_13_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_13_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(349195968)))]; + tensor encoder_module_layers_13_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_13_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(349198080)))]; tensor input_755_cast_fp16 = layer_norm(axes = input_755_axes_0, beta = encoder_module_layers_13_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_13_norm_out_weight_to_fp16, x = input_753_cast_fp16)[name = tensor("input_755_cast_fp16")]; tensor input_757_axes_0 = const()[name = tensor("input_757_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_14_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_14_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(348480704)))]; - tensor encoder_module_layers_14_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_14_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(348482816)))]; + tensor encoder_module_layers_14_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_14_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(349200192)))]; + tensor encoder_module_layers_14_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_14_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(349202304)))]; tensor input_757_cast_fp16 = layer_norm(axes = input_757_axes_0, beta = encoder_module_layers_14_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_14_norm_feed_forward1_weight_to_fp16, x = input_755_cast_fp16)[name = tensor("input_757_cast_fp16")]; - tensor encoder_module_layers_14_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(348484928))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(352679296))), name = tensor("encoder_module_layers_14_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_127_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_14_feed_forward1_linear1_weight_to_fp16_palettized, x = input_757_cast_fp16)[name = tensor("linear_127_cast_fp16")]; + tensor encoder_module_layers_14_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_14_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(349204416))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(353402944))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(353398784)))]; + tensor linear_127_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_14_feed_forward1_linear1_weight_to_fp16_quantized, x = input_757_cast_fp16)[name = tensor("linear_127_cast_fp16")]; tensor input_761_cast_fp16 = silu(x = linear_127_cast_fp16)[name = tensor("input_761_cast_fp16")]; - tensor encoder_module_layers_14_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(352679872))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(356874240))), name = tensor("encoder_module_layers_14_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_128_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_14_feed_forward1_linear2_weight_to_fp16_palettized, x = input_761_cast_fp16)[name = tensor("linear_128_cast_fp16")]; + tensor encoder_module_layers_14_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_14_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(353411200))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(357606656))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(357605568)))]; + tensor linear_128_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_14_feed_forward1_linear2_weight_to_fp16_quantized, x = input_761_cast_fp16)[name = tensor("linear_128_cast_fp16")]; tensor var_2643_to_fp16 = const()[name = tensor("op_2643_to_fp16"), val = tensor(0x1p-1)]; tensor var_2644_cast_fp16 = mul(x = linear_128_cast_fp16, y = var_2643_to_fp16)[name = tensor("op_2644_cast_fp16")]; tensor input_767_cast_fp16 = add(x = input_755_cast_fp16, y = var_2644_cast_fp16)[name = tensor("input_767_cast_fp16")]; tensor query_29_axes_0 = const()[name = tensor("query_29_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_14_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_14_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(356874816)))]; - tensor encoder_module_layers_14_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_14_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(356876928)))]; + tensor encoder_module_layers_14_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_14_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(357608768)))]; + tensor encoder_module_layers_14_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_14_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(357610880)))]; tensor query_29_cast_fp16 = layer_norm(axes = query_29_axes_0, beta = encoder_module_layers_14_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_14_norm_self_att_weight_to_fp16, x = input_767_cast_fp16)[name = tensor("query_29_cast_fp16")]; - tensor encoder_module_layers_14_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(356879040))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(357927680))), name = tensor("encoder_module_layers_14_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_129_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_14_self_attn_linear_q_weight_to_fp16_palettized, x = query_29_cast_fp16)[name = tensor("linear_129_cast_fp16")]; + tensor encoder_module_layers_14_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_14_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(357612992))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(358662720))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(358661632)))]; + tensor linear_129_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_14_self_attn_linear_q_weight_to_fp16_quantized, x = query_29_cast_fp16)[name = tensor("linear_129_cast_fp16")]; tensor var_2660 = const()[name = tensor("op_2660"), val = tensor([1, -1, 8, 128])]; tensor q_85_cast_fp16 = reshape(shape = var_2660, x = linear_129_cast_fp16)[name = tensor("q_85_cast_fp16")]; - tensor encoder_module_layers_14_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(357928256))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(358976896))), name = tensor("encoder_module_layers_14_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_130_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_14_self_attn_linear_k_weight_to_fp16_palettized, x = query_29_cast_fp16)[name = tensor("linear_130_cast_fp16")]; + tensor encoder_module_layers_14_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_14_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(358664832))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(359714560))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(359713472)))]; + tensor linear_130_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_14_self_attn_linear_k_weight_to_fp16_quantized, x = query_29_cast_fp16)[name = tensor("linear_130_cast_fp16")]; tensor var_2664 = const()[name = tensor("op_2664"), val = tensor([1, -1, 8, 128])]; tensor k_57_cast_fp16 = reshape(shape = var_2664, x = linear_130_cast_fp16)[name = tensor("k_57_cast_fp16")]; - tensor encoder_module_layers_14_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(358977472))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(360026112))), name = tensor("encoder_module_layers_14_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_131_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_14_self_attn_linear_v_weight_to_fp16_palettized, x = query_29_cast_fp16)[name = tensor("linear_131_cast_fp16")]; + tensor encoder_module_layers_14_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_14_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(359716672))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(360766400))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(360765312)))]; + tensor linear_131_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_14_self_attn_linear_v_weight_to_fp16_quantized, x = query_29_cast_fp16)[name = tensor("linear_131_cast_fp16")]; tensor var_2668 = const()[name = tensor("op_2668"), val = tensor([1, -1, 8, 128])]; tensor v_29_cast_fp16 = reshape(shape = var_2668, x = linear_131_cast_fp16)[name = tensor("v_29_cast_fp16")]; tensor value_33_perm_0 = const()[name = tensor("value_33_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor encoder_module_layers_14_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_14_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(360026688)))]; + tensor encoder_module_layers_14_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_14_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(360768512)))]; tensor var_2680_cast_fp16 = add(x = q_85_cast_fp16, y = encoder_module_layers_14_self_attn_pos_bias_u_to_fp16)[name = tensor("op_2680_cast_fp16")]; - tensor encoder_module_layers_14_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_14_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(360028800)))]; + tensor encoder_module_layers_14_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_14_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(360770624)))]; tensor var_2682_cast_fp16 = add(x = q_85_cast_fp16, y = encoder_module_layers_14_self_attn_pos_bias_v_to_fp16)[name = tensor("op_2682_cast_fp16")]; tensor q_with_bias_v_29_perm_0 = const()[name = tensor("q_with_bias_v_29_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_331_transpose_x_0 = const()[name = tensor("x_331_transpose_x_0"), val = tensor(false)]; tensor x_331_transpose_y_0 = const()[name = tensor("x_331_transpose_y_0"), val = tensor(false)]; - tensor op_2684_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(360030912))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(360414976))), name = tensor("op_2684_to_fp16_palettized"), shape = tensor([1, 8, 128, 375])]; + tensor op_2684_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_2684_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(360772736))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(361157248))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(361156800)))]; tensor q_with_bias_v_29_cast_fp16 = transpose(perm = q_with_bias_v_29_perm_0, x = var_2682_cast_fp16)[name = tensor("transpose_214")]; - tensor x_331_cast_fp16 = matmul(transpose_x = x_331_transpose_x_0, transpose_y = x_331_transpose_y_0, x = q_with_bias_v_29_cast_fp16, y = op_2684_to_fp16_palettized)[name = tensor("x_331_cast_fp16")]; + tensor x_331_cast_fp16 = matmul(transpose_x = x_331_transpose_x_0, transpose_y = x_331_transpose_y_0, x = q_with_bias_v_29_cast_fp16, y = op_2684_to_fp16_quantized)[name = tensor("x_331_cast_fp16")]; tensor x_333_pad_0 = const()[name = tensor("x_333_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; tensor x_333_mode_0 = const()[name = tensor("x_333_mode_0"), val = tensor("constant")]; tensor const_169_to_fp16 = const()[name = tensor("const_169_to_fp16"), val = tensor(0x0p+0)]; @@ -2217,12 +2217,12 @@ program(1.0) tensor var_2717 = const()[name = tensor("op_2717"), val = tensor([1, -1, 1024])]; tensor var_2716_cast_fp16 = transpose(perm = var_2716_perm_0, x = x_337_cast_fp16)[name = tensor("transpose_210")]; tensor input_771_cast_fp16 = reshape(shape = var_2717, x = var_2716_cast_fp16)[name = tensor("input_771_cast_fp16")]; - tensor encoder_module_layers_14_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(360415552))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(361464192))), name = tensor("encoder_module_layers_14_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_133_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_14_self_attn_linear_out_weight_to_fp16_palettized, x = input_771_cast_fp16)[name = tensor("linear_133_cast_fp16")]; + tensor encoder_module_layers_14_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_14_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(361158080))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(362207808))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(362206720)))]; + tensor linear_133_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_14_self_attn_linear_out_weight_to_fp16_quantized, x = input_771_cast_fp16)[name = tensor("linear_133_cast_fp16")]; tensor input_775_cast_fp16 = add(x = input_767_cast_fp16, y = linear_133_cast_fp16)[name = tensor("input_775_cast_fp16")]; tensor x_341_axes_0 = const()[name = tensor("x_341_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_14_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_14_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(361464768)))]; - tensor encoder_module_layers_14_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_14_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(361466880)))]; + tensor encoder_module_layers_14_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_14_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(362209920)))]; + tensor encoder_module_layers_14_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_14_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(362212032)))]; tensor x_341_cast_fp16 = layer_norm(axes = x_341_axes_0, beta = encoder_module_layers_14_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_14_norm_conv_weight_to_fp16, x = input_775_cast_fp16)[name = tensor("x_341_cast_fp16")]; tensor input_777_perm_0 = const()[name = tensor("input_777_perm_0"), val = tensor([0, 2, 1])]; tensor input_779_pad_type_0 = const()[name = tensor("input_779_pad_type_0"), val = tensor("valid")]; @@ -2230,9 +2230,9 @@ program(1.0) tensor input_779_pad_0 = const()[name = tensor("input_779_pad_0"), val = tensor([0, 0])]; tensor input_779_dilations_0 = const()[name = tensor("input_779_dilations_0"), val = tensor([1])]; tensor input_779_groups_0 = const()[name = tensor("input_779_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_14_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(361468992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(363566208))), name = tensor("encoder_module_layers_14_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor([2048, 1024, 1])]; + tensor encoder_module_layers_14_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_14_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(362214144))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(364313472))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(364311360)))]; tensor input_777_cast_fp16 = transpose(perm = input_777_perm_0, x = x_341_cast_fp16)[name = tensor("transpose_209")]; - tensor input_779_cast_fp16 = conv(dilations = input_779_dilations_0, groups = input_779_groups_0, pad = input_779_pad_0, pad_type = input_779_pad_type_0, strides = input_779_strides_0, weight = encoder_module_layers_14_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_777_cast_fp16)[name = tensor("input_779_cast_fp16")]; + tensor input_779_cast_fp16 = conv(dilations = input_779_dilations_0, groups = input_779_groups_0, pad = input_779_pad_0, pad_type = input_779_pad_type_0, strides = input_779_strides_0, weight = encoder_module_layers_14_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_777_cast_fp16)[name = tensor("input_779_cast_fp16")]; tensor x_343_split_num_splits_0 = const()[name = tensor("x_343_split_num_splits_0"), val = tensor(2)]; tensor x_343_split_axis_0 = const()[name = tensor("x_343_split_axis_0"), val = tensor(1)]; tensor x_343_split_cast_fp16_0, tensor x_343_split_cast_fp16_1 = split(axis = x_343_split_axis_0, num_splits = x_343_split_num_splits_0, x = input_779_cast_fp16)[name = tensor("x_343_split_cast_fp16")]; @@ -2248,75 +2248,75 @@ program(1.0) tensor input_785_strides_0 = const()[name = tensor("input_785_strides_0"), val = tensor([1])]; tensor input_785_pad_0 = const()[name = tensor("input_785_pad_0"), val = tensor([0, 0])]; tensor input_785_dilations_0 = const()[name = tensor("input_785_dilations_0"), val = tensor([1])]; - tensor const_291_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(363566784))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(363576064))), name = tensor("const_291_to_fp16_palettized"), shape = tensor([1024, 1, 9])]; - tensor const_292_to_fp16 = const()[name = tensor("const_292_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(363576640)))]; - tensor input_787_cast_fp16 = conv(bias = const_292_to_fp16, dilations = input_785_dilations_0, groups = input_785_groups_0, pad = input_785_pad_0, pad_type = input_785_pad_type_0, strides = input_785_strides_0, weight = const_291_to_fp16_palettized, x = input_783_cast_fp16)[name = tensor("input_787_cast_fp16")]; + tensor const_291_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_291_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(364317632))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(364328000))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(364326912)))]; + tensor const_292_to_fp16 = const()[name = tensor("const_292_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(364330112)))]; + tensor input_787_cast_fp16 = conv(bias = const_292_to_fp16, dilations = input_785_dilations_0, groups = input_785_groups_0, pad = input_785_pad_0, pad_type = input_785_pad_type_0, strides = input_785_strides_0, weight = const_291_to_fp16_quantized, x = input_783_cast_fp16)[name = tensor("input_787_cast_fp16")]; tensor input_789_cast_fp16 = silu(x = input_787_cast_fp16)[name = tensor("input_789_cast_fp16")]; tensor x_345_pad_type_0 = const()[name = tensor("x_345_pad_type_0"), val = tensor("valid")]; tensor x_345_strides_0 = const()[name = tensor("x_345_strides_0"), val = tensor([1])]; tensor x_345_pad_0 = const()[name = tensor("x_345_pad_0"), val = tensor([0, 0])]; tensor x_345_dilations_0 = const()[name = tensor("x_345_dilations_0"), val = tensor([1])]; tensor x_345_groups_0 = const()[name = tensor("x_345_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_14_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(363578752))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(364627392))), name = tensor("encoder_module_layers_14_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor([1024, 1024, 1])]; - tensor x_345_cast_fp16 = conv(dilations = x_345_dilations_0, groups = x_345_groups_0, pad = x_345_pad_0, pad_type = x_345_pad_type_0, strides = x_345_strides_0, weight = encoder_module_layers_14_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_789_cast_fp16)[name = tensor("x_345_cast_fp16")]; + tensor encoder_module_layers_14_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_14_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(364332224))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(365381952))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(365380864)))]; + tensor x_345_cast_fp16 = conv(dilations = x_345_dilations_0, groups = x_345_groups_0, pad = x_345_pad_0, pad_type = x_345_pad_type_0, strides = x_345_strides_0, weight = encoder_module_layers_14_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_789_cast_fp16)[name = tensor("x_345_cast_fp16")]; tensor input_791_perm_0 = const()[name = tensor("input_791_perm_0"), val = tensor([0, 2, 1])]; tensor input_791_cast_fp16 = transpose(perm = input_791_perm_0, x = x_345_cast_fp16)[name = tensor("transpose_208")]; tensor input_793_cast_fp16 = add(x = input_775_cast_fp16, y = input_791_cast_fp16)[name = tensor("input_793_cast_fp16")]; tensor input_795_axes_0 = const()[name = tensor("input_795_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_14_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_14_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(364627968)))]; - tensor encoder_module_layers_14_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_14_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(364630080)))]; + tensor encoder_module_layers_14_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_14_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(365384064)))]; + tensor encoder_module_layers_14_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_14_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(365386176)))]; tensor input_795_cast_fp16 = layer_norm(axes = input_795_axes_0, beta = encoder_module_layers_14_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_14_norm_feed_forward2_weight_to_fp16, x = input_793_cast_fp16)[name = tensor("input_795_cast_fp16")]; - tensor encoder_module_layers_14_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(364632192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(368826560))), name = tensor("encoder_module_layers_14_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_134_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_14_feed_forward2_linear1_weight_to_fp16_palettized, x = input_795_cast_fp16)[name = tensor("linear_134_cast_fp16")]; + tensor encoder_module_layers_14_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_14_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(365388288))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(369586816))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(369582656)))]; + tensor linear_134_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_14_feed_forward2_linear1_weight_to_fp16_quantized, x = input_795_cast_fp16)[name = tensor("linear_134_cast_fp16")]; tensor input_799_cast_fp16 = silu(x = linear_134_cast_fp16)[name = tensor("input_799_cast_fp16")]; - tensor encoder_module_layers_14_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(368827136))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(373021504))), name = tensor("encoder_module_layers_14_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_135_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_14_feed_forward2_linear2_weight_to_fp16_palettized, x = input_799_cast_fp16)[name = tensor("linear_135_cast_fp16")]; + tensor encoder_module_layers_14_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_14_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(369595072))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(373790528))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(373789440)))]; + tensor linear_135_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_14_feed_forward2_linear2_weight_to_fp16_quantized, x = input_799_cast_fp16)[name = tensor("linear_135_cast_fp16")]; tensor var_2777_to_fp16 = const()[name = tensor("op_2777_to_fp16"), val = tensor(0x1p-1)]; tensor var_2778_cast_fp16 = mul(x = linear_135_cast_fp16, y = var_2777_to_fp16)[name = tensor("op_2778_cast_fp16")]; tensor input_805_cast_fp16 = add(x = input_793_cast_fp16, y = var_2778_cast_fp16)[name = tensor("input_805_cast_fp16")]; tensor input_807_axes_0 = const()[name = tensor("input_807_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_14_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_14_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(373022080)))]; - tensor encoder_module_layers_14_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_14_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(373024192)))]; + tensor encoder_module_layers_14_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_14_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(373792640)))]; + tensor encoder_module_layers_14_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_14_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(373794752)))]; tensor input_807_cast_fp16 = layer_norm(axes = input_807_axes_0, beta = encoder_module_layers_14_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_14_norm_out_weight_to_fp16, x = input_805_cast_fp16)[name = tensor("input_807_cast_fp16")]; tensor input_809_axes_0 = const()[name = tensor("input_809_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_15_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_15_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(373026304)))]; - tensor encoder_module_layers_15_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_15_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(373028416)))]; + tensor encoder_module_layers_15_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_15_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(373796864)))]; + tensor encoder_module_layers_15_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_15_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(373798976)))]; tensor input_809_cast_fp16 = layer_norm(axes = input_809_axes_0, beta = encoder_module_layers_15_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_15_norm_feed_forward1_weight_to_fp16, x = input_807_cast_fp16)[name = tensor("input_809_cast_fp16")]; - tensor encoder_module_layers_15_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(373030528))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(377224896))), name = tensor("encoder_module_layers_15_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_136_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_15_feed_forward1_linear1_weight_to_fp16_palettized, x = input_809_cast_fp16)[name = tensor("linear_136_cast_fp16")]; + tensor encoder_module_layers_15_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_15_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(373801088))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(377999616))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(377995456)))]; + tensor linear_136_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_15_feed_forward1_linear1_weight_to_fp16_quantized, x = input_809_cast_fp16)[name = tensor("linear_136_cast_fp16")]; tensor input_813_cast_fp16 = silu(x = linear_136_cast_fp16)[name = tensor("input_813_cast_fp16")]; - tensor encoder_module_layers_15_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(377225472))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(381419840))), name = tensor("encoder_module_layers_15_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_137_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_15_feed_forward1_linear2_weight_to_fp16_palettized, x = input_813_cast_fp16)[name = tensor("linear_137_cast_fp16")]; + tensor encoder_module_layers_15_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_15_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(378007872))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(382203328))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(382202240)))]; + tensor linear_137_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_15_feed_forward1_linear2_weight_to_fp16_quantized, x = input_813_cast_fp16)[name = tensor("linear_137_cast_fp16")]; tensor var_2806_to_fp16 = const()[name = tensor("op_2806_to_fp16"), val = tensor(0x1p-1)]; tensor var_2807_cast_fp16 = mul(x = linear_137_cast_fp16, y = var_2806_to_fp16)[name = tensor("op_2807_cast_fp16")]; tensor input_819_cast_fp16 = add(x = input_807_cast_fp16, y = var_2807_cast_fp16)[name = tensor("input_819_cast_fp16")]; tensor query_31_axes_0 = const()[name = tensor("query_31_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_15_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_15_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(381420416)))]; - tensor encoder_module_layers_15_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_15_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(381422528)))]; + tensor encoder_module_layers_15_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_15_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(382205440)))]; + tensor encoder_module_layers_15_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_15_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(382207552)))]; tensor query_31_cast_fp16 = layer_norm(axes = query_31_axes_0, beta = encoder_module_layers_15_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_15_norm_self_att_weight_to_fp16, x = input_819_cast_fp16)[name = tensor("query_31_cast_fp16")]; - tensor encoder_module_layers_15_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(381424640))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(382473280))), name = tensor("encoder_module_layers_15_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_138_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_15_self_attn_linear_q_weight_to_fp16_palettized, x = query_31_cast_fp16)[name = tensor("linear_138_cast_fp16")]; + tensor encoder_module_layers_15_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_15_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(382209664))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(383259392))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(383258304)))]; + tensor linear_138_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_15_self_attn_linear_q_weight_to_fp16_quantized, x = query_31_cast_fp16)[name = tensor("linear_138_cast_fp16")]; tensor var_2823 = const()[name = tensor("op_2823"), val = tensor([1, -1, 8, 128])]; tensor q_91_cast_fp16 = reshape(shape = var_2823, x = linear_138_cast_fp16)[name = tensor("q_91_cast_fp16")]; - tensor encoder_module_layers_15_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(382473856))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(383522496))), name = tensor("encoder_module_layers_15_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_139_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_15_self_attn_linear_k_weight_to_fp16_palettized, x = query_31_cast_fp16)[name = tensor("linear_139_cast_fp16")]; + tensor encoder_module_layers_15_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_15_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(383261504))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(384311232))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(384310144)))]; + tensor linear_139_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_15_self_attn_linear_k_weight_to_fp16_quantized, x = query_31_cast_fp16)[name = tensor("linear_139_cast_fp16")]; tensor var_2827 = const()[name = tensor("op_2827"), val = tensor([1, -1, 8, 128])]; tensor k_61_cast_fp16 = reshape(shape = var_2827, x = linear_139_cast_fp16)[name = tensor("k_61_cast_fp16")]; - tensor encoder_module_layers_15_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(383523072))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(384571712))), name = tensor("encoder_module_layers_15_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_140_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_15_self_attn_linear_v_weight_to_fp16_palettized, x = query_31_cast_fp16)[name = tensor("linear_140_cast_fp16")]; + tensor encoder_module_layers_15_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_15_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(384313344))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(385363072))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(385361984)))]; + tensor linear_140_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_15_self_attn_linear_v_weight_to_fp16_quantized, x = query_31_cast_fp16)[name = tensor("linear_140_cast_fp16")]; tensor var_2831 = const()[name = tensor("op_2831"), val = tensor([1, -1, 8, 128])]; tensor v_31_cast_fp16 = reshape(shape = var_2831, x = linear_140_cast_fp16)[name = tensor("v_31_cast_fp16")]; tensor value_35_perm_0 = const()[name = tensor("value_35_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor encoder_module_layers_15_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_15_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(384572288)))]; + tensor encoder_module_layers_15_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_15_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(385365184)))]; tensor var_2843_cast_fp16 = add(x = q_91_cast_fp16, y = encoder_module_layers_15_self_attn_pos_bias_u_to_fp16)[name = tensor("op_2843_cast_fp16")]; - tensor encoder_module_layers_15_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_15_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(384574400)))]; + tensor encoder_module_layers_15_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_15_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(385367296)))]; tensor var_2845_cast_fp16 = add(x = q_91_cast_fp16, y = encoder_module_layers_15_self_attn_pos_bias_v_to_fp16)[name = tensor("op_2845_cast_fp16")]; tensor q_with_bias_v_31_perm_0 = const()[name = tensor("q_with_bias_v_31_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_353_transpose_x_0 = const()[name = tensor("x_353_transpose_x_0"), val = tensor(false)]; tensor x_353_transpose_y_0 = const()[name = tensor("x_353_transpose_y_0"), val = tensor(false)]; - tensor op_2847_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(384576512))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(384960576))), name = tensor("op_2847_to_fp16_palettized"), shape = tensor([1, 8, 128, 375])]; + tensor op_2847_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_2847_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(385369408))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(385753920))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(385753472)))]; tensor q_with_bias_v_31_cast_fp16 = transpose(perm = q_with_bias_v_31_perm_0, x = var_2845_cast_fp16)[name = tensor("transpose_207")]; - tensor x_353_cast_fp16 = matmul(transpose_x = x_353_transpose_x_0, transpose_y = x_353_transpose_y_0, x = q_with_bias_v_31_cast_fp16, y = op_2847_to_fp16_palettized)[name = tensor("x_353_cast_fp16")]; + tensor x_353_cast_fp16 = matmul(transpose_x = x_353_transpose_x_0, transpose_y = x_353_transpose_y_0, x = q_with_bias_v_31_cast_fp16, y = op_2847_to_fp16_quantized)[name = tensor("x_353_cast_fp16")]; tensor x_355_pad_0 = const()[name = tensor("x_355_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; tensor x_355_mode_0 = const()[name = tensor("x_355_mode_0"), val = tensor("constant")]; tensor const_179_to_fp16 = const()[name = tensor("const_179_to_fp16"), val = tensor(0x0p+0)]; @@ -2354,12 +2354,12 @@ program(1.0) tensor var_2880 = const()[name = tensor("op_2880"), val = tensor([1, -1, 1024])]; tensor var_2879_cast_fp16 = transpose(perm = var_2879_perm_0, x = x_359_cast_fp16)[name = tensor("transpose_203")]; tensor input_823_cast_fp16 = reshape(shape = var_2880, x = var_2879_cast_fp16)[name = tensor("input_823_cast_fp16")]; - tensor encoder_module_layers_15_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(384961152))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(386009792))), name = tensor("encoder_module_layers_15_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_142_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_15_self_attn_linear_out_weight_to_fp16_palettized, x = input_823_cast_fp16)[name = tensor("linear_142_cast_fp16")]; + tensor encoder_module_layers_15_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_15_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(385754752))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(386804480))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(386803392)))]; + tensor linear_142_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_15_self_attn_linear_out_weight_to_fp16_quantized, x = input_823_cast_fp16)[name = tensor("linear_142_cast_fp16")]; tensor input_827_cast_fp16 = add(x = input_819_cast_fp16, y = linear_142_cast_fp16)[name = tensor("input_827_cast_fp16")]; tensor x_363_axes_0 = const()[name = tensor("x_363_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_15_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_15_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(386010368)))]; - tensor encoder_module_layers_15_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_15_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(386012480)))]; + tensor encoder_module_layers_15_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_15_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(386806592)))]; + tensor encoder_module_layers_15_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_15_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(386808704)))]; tensor x_363_cast_fp16 = layer_norm(axes = x_363_axes_0, beta = encoder_module_layers_15_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_15_norm_conv_weight_to_fp16, x = input_827_cast_fp16)[name = tensor("x_363_cast_fp16")]; tensor input_829_perm_0 = const()[name = tensor("input_829_perm_0"), val = tensor([0, 2, 1])]; tensor input_831_pad_type_0 = const()[name = tensor("input_831_pad_type_0"), val = tensor("valid")]; @@ -2367,9 +2367,9 @@ program(1.0) tensor input_831_pad_0 = const()[name = tensor("input_831_pad_0"), val = tensor([0, 0])]; tensor input_831_dilations_0 = const()[name = tensor("input_831_dilations_0"), val = tensor([1])]; tensor input_831_groups_0 = const()[name = tensor("input_831_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_15_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(386014592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(388111808))), name = tensor("encoder_module_layers_15_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor([2048, 1024, 1])]; + tensor encoder_module_layers_15_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_15_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(386810816))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(388910144))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(388908032)))]; tensor input_829_cast_fp16 = transpose(perm = input_829_perm_0, x = x_363_cast_fp16)[name = tensor("transpose_202")]; - tensor input_831_cast_fp16 = conv(dilations = input_831_dilations_0, groups = input_831_groups_0, pad = input_831_pad_0, pad_type = input_831_pad_type_0, strides = input_831_strides_0, weight = encoder_module_layers_15_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_829_cast_fp16)[name = tensor("input_831_cast_fp16")]; + tensor input_831_cast_fp16 = conv(dilations = input_831_dilations_0, groups = input_831_groups_0, pad = input_831_pad_0, pad_type = input_831_pad_type_0, strides = input_831_strides_0, weight = encoder_module_layers_15_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_829_cast_fp16)[name = tensor("input_831_cast_fp16")]; tensor x_365_split_num_splits_0 = const()[name = tensor("x_365_split_num_splits_0"), val = tensor(2)]; tensor x_365_split_axis_0 = const()[name = tensor("x_365_split_axis_0"), val = tensor(1)]; tensor x_365_split_cast_fp16_0, tensor x_365_split_cast_fp16_1 = split(axis = x_365_split_axis_0, num_splits = x_365_split_num_splits_0, x = input_831_cast_fp16)[name = tensor("x_365_split_cast_fp16")]; @@ -2385,75 +2385,75 @@ program(1.0) tensor input_837_strides_0 = const()[name = tensor("input_837_strides_0"), val = tensor([1])]; tensor input_837_pad_0 = const()[name = tensor("input_837_pad_0"), val = tensor([0, 0])]; tensor input_837_dilations_0 = const()[name = tensor("input_837_dilations_0"), val = tensor([1])]; - tensor const_293_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(388112384))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(388121664))), name = tensor("const_293_to_fp16_palettized"), shape = tensor([1024, 1, 9])]; - tensor const_294_to_fp16 = const()[name = tensor("const_294_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(388122240)))]; - tensor input_839_cast_fp16 = conv(bias = const_294_to_fp16, dilations = input_837_dilations_0, groups = input_837_groups_0, pad = input_837_pad_0, pad_type = input_837_pad_type_0, strides = input_837_strides_0, weight = const_293_to_fp16_palettized, x = input_835_cast_fp16)[name = tensor("input_839_cast_fp16")]; + tensor const_293_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_293_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(388914304))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(388924672))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(388923584)))]; + tensor const_294_to_fp16 = const()[name = tensor("const_294_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(388926784)))]; + tensor input_839_cast_fp16 = conv(bias = const_294_to_fp16, dilations = input_837_dilations_0, groups = input_837_groups_0, pad = input_837_pad_0, pad_type = input_837_pad_type_0, strides = input_837_strides_0, weight = const_293_to_fp16_quantized, x = input_835_cast_fp16)[name = tensor("input_839_cast_fp16")]; tensor input_841_cast_fp16 = silu(x = input_839_cast_fp16)[name = tensor("input_841_cast_fp16")]; tensor x_367_pad_type_0 = const()[name = tensor("x_367_pad_type_0"), val = tensor("valid")]; tensor x_367_strides_0 = const()[name = tensor("x_367_strides_0"), val = tensor([1])]; tensor x_367_pad_0 = const()[name = tensor("x_367_pad_0"), val = tensor([0, 0])]; tensor x_367_dilations_0 = const()[name = tensor("x_367_dilations_0"), val = tensor([1])]; tensor x_367_groups_0 = const()[name = tensor("x_367_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_15_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(388124352))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(389172992))), name = tensor("encoder_module_layers_15_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor([1024, 1024, 1])]; - tensor x_367_cast_fp16 = conv(dilations = x_367_dilations_0, groups = x_367_groups_0, pad = x_367_pad_0, pad_type = x_367_pad_type_0, strides = x_367_strides_0, weight = encoder_module_layers_15_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_841_cast_fp16)[name = tensor("x_367_cast_fp16")]; + tensor encoder_module_layers_15_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_15_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(388928896))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(389978624))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(389977536)))]; + tensor x_367_cast_fp16 = conv(dilations = x_367_dilations_0, groups = x_367_groups_0, pad = x_367_pad_0, pad_type = x_367_pad_type_0, strides = x_367_strides_0, weight = encoder_module_layers_15_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_841_cast_fp16)[name = tensor("x_367_cast_fp16")]; tensor input_843_perm_0 = const()[name = tensor("input_843_perm_0"), val = tensor([0, 2, 1])]; tensor input_843_cast_fp16 = transpose(perm = input_843_perm_0, x = x_367_cast_fp16)[name = tensor("transpose_201")]; tensor input_845_cast_fp16 = add(x = input_827_cast_fp16, y = input_843_cast_fp16)[name = tensor("input_845_cast_fp16")]; tensor input_847_axes_0 = const()[name = tensor("input_847_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_15_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_15_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(389173568)))]; - tensor encoder_module_layers_15_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_15_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(389175680)))]; + tensor encoder_module_layers_15_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_15_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(389980736)))]; + tensor encoder_module_layers_15_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_15_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(389982848)))]; tensor input_847_cast_fp16 = layer_norm(axes = input_847_axes_0, beta = encoder_module_layers_15_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_15_norm_feed_forward2_weight_to_fp16, x = input_845_cast_fp16)[name = tensor("input_847_cast_fp16")]; - tensor encoder_module_layers_15_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(389177792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(393372160))), name = tensor("encoder_module_layers_15_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_143_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_15_feed_forward2_linear1_weight_to_fp16_palettized, x = input_847_cast_fp16)[name = tensor("linear_143_cast_fp16")]; + tensor encoder_module_layers_15_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_15_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(389984960))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(394183488))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(394179328)))]; + tensor linear_143_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_15_feed_forward2_linear1_weight_to_fp16_quantized, x = input_847_cast_fp16)[name = tensor("linear_143_cast_fp16")]; tensor input_851_cast_fp16 = silu(x = linear_143_cast_fp16)[name = tensor("input_851_cast_fp16")]; - tensor encoder_module_layers_15_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(393372736))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(397567104))), name = tensor("encoder_module_layers_15_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_144_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_15_feed_forward2_linear2_weight_to_fp16_palettized, x = input_851_cast_fp16)[name = tensor("linear_144_cast_fp16")]; + tensor encoder_module_layers_15_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_15_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(394191744))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(398387200))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(398386112)))]; + tensor linear_144_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_15_feed_forward2_linear2_weight_to_fp16_quantized, x = input_851_cast_fp16)[name = tensor("linear_144_cast_fp16")]; tensor var_2940_to_fp16 = const()[name = tensor("op_2940_to_fp16"), val = tensor(0x1p-1)]; tensor var_2941_cast_fp16 = mul(x = linear_144_cast_fp16, y = var_2940_to_fp16)[name = tensor("op_2941_cast_fp16")]; tensor input_857_cast_fp16 = add(x = input_845_cast_fp16, y = var_2941_cast_fp16)[name = tensor("input_857_cast_fp16")]; tensor input_859_axes_0 = const()[name = tensor("input_859_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_15_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_15_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(397567680)))]; - tensor encoder_module_layers_15_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_15_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(397569792)))]; + tensor encoder_module_layers_15_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_15_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(398389312)))]; + tensor encoder_module_layers_15_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_15_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(398391424)))]; tensor input_859_cast_fp16 = layer_norm(axes = input_859_axes_0, beta = encoder_module_layers_15_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_15_norm_out_weight_to_fp16, x = input_857_cast_fp16)[name = tensor("input_859_cast_fp16")]; tensor input_861_axes_0 = const()[name = tensor("input_861_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_16_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_16_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(397571904)))]; - tensor encoder_module_layers_16_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_16_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(397574016)))]; + tensor encoder_module_layers_16_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_16_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(398393536)))]; + tensor encoder_module_layers_16_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_16_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(398395648)))]; tensor input_861_cast_fp16 = layer_norm(axes = input_861_axes_0, beta = encoder_module_layers_16_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_16_norm_feed_forward1_weight_to_fp16, x = input_859_cast_fp16)[name = tensor("input_861_cast_fp16")]; - tensor encoder_module_layers_16_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(397576128))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(401770496))), name = tensor("encoder_module_layers_16_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_145_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_16_feed_forward1_linear1_weight_to_fp16_palettized, x = input_861_cast_fp16)[name = tensor("linear_145_cast_fp16")]; + tensor encoder_module_layers_16_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_16_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(398397760))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(402596288))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(402592128)))]; + tensor linear_145_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_16_feed_forward1_linear1_weight_to_fp16_quantized, x = input_861_cast_fp16)[name = tensor("linear_145_cast_fp16")]; tensor input_865_cast_fp16 = silu(x = linear_145_cast_fp16)[name = tensor("input_865_cast_fp16")]; - tensor encoder_module_layers_16_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(401771072))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(405965440))), name = tensor("encoder_module_layers_16_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_146_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_16_feed_forward1_linear2_weight_to_fp16_palettized, x = input_865_cast_fp16)[name = tensor("linear_146_cast_fp16")]; + tensor encoder_module_layers_16_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_16_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(402604544))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(406800000))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(406798912)))]; + tensor linear_146_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_16_feed_forward1_linear2_weight_to_fp16_quantized, x = input_865_cast_fp16)[name = tensor("linear_146_cast_fp16")]; tensor var_2969_to_fp16 = const()[name = tensor("op_2969_to_fp16"), val = tensor(0x1p-1)]; tensor var_2970_cast_fp16 = mul(x = linear_146_cast_fp16, y = var_2969_to_fp16)[name = tensor("op_2970_cast_fp16")]; tensor input_871_cast_fp16 = add(x = input_859_cast_fp16, y = var_2970_cast_fp16)[name = tensor("input_871_cast_fp16")]; tensor query_33_axes_0 = const()[name = tensor("query_33_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_16_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_16_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(405966016)))]; - tensor encoder_module_layers_16_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_16_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(405968128)))]; + tensor encoder_module_layers_16_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_16_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(406802112)))]; + tensor encoder_module_layers_16_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_16_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(406804224)))]; tensor query_33_cast_fp16 = layer_norm(axes = query_33_axes_0, beta = encoder_module_layers_16_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_16_norm_self_att_weight_to_fp16, x = input_871_cast_fp16)[name = tensor("query_33_cast_fp16")]; - tensor encoder_module_layers_16_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(405970240))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(407018880))), name = tensor("encoder_module_layers_16_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_147_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_16_self_attn_linear_q_weight_to_fp16_palettized, x = query_33_cast_fp16)[name = tensor("linear_147_cast_fp16")]; + tensor encoder_module_layers_16_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_16_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(406806336))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(407856064))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(407854976)))]; + tensor linear_147_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_16_self_attn_linear_q_weight_to_fp16_quantized, x = query_33_cast_fp16)[name = tensor("linear_147_cast_fp16")]; tensor var_2986 = const()[name = tensor("op_2986"), val = tensor([1, -1, 8, 128])]; tensor q_97_cast_fp16 = reshape(shape = var_2986, x = linear_147_cast_fp16)[name = tensor("q_97_cast_fp16")]; - tensor encoder_module_layers_16_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(407019456))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(408068096))), name = tensor("encoder_module_layers_16_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_148_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_16_self_attn_linear_k_weight_to_fp16_palettized, x = query_33_cast_fp16)[name = tensor("linear_148_cast_fp16")]; + tensor encoder_module_layers_16_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_16_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(407858176))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(408907904))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(408906816)))]; + tensor linear_148_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_16_self_attn_linear_k_weight_to_fp16_quantized, x = query_33_cast_fp16)[name = tensor("linear_148_cast_fp16")]; tensor var_2990 = const()[name = tensor("op_2990"), val = tensor([1, -1, 8, 128])]; tensor k_65_cast_fp16 = reshape(shape = var_2990, x = linear_148_cast_fp16)[name = tensor("k_65_cast_fp16")]; - tensor encoder_module_layers_16_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(408068672))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(409117312))), name = tensor("encoder_module_layers_16_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_149_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_16_self_attn_linear_v_weight_to_fp16_palettized, x = query_33_cast_fp16)[name = tensor("linear_149_cast_fp16")]; + tensor encoder_module_layers_16_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_16_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(408910016))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(409959744))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(409958656)))]; + tensor linear_149_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_16_self_attn_linear_v_weight_to_fp16_quantized, x = query_33_cast_fp16)[name = tensor("linear_149_cast_fp16")]; tensor var_2994 = const()[name = tensor("op_2994"), val = tensor([1, -1, 8, 128])]; tensor v_33_cast_fp16 = reshape(shape = var_2994, x = linear_149_cast_fp16)[name = tensor("v_33_cast_fp16")]; tensor value_37_perm_0 = const()[name = tensor("value_37_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor encoder_module_layers_16_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_16_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(409117888)))]; + tensor encoder_module_layers_16_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_16_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(409961856)))]; tensor var_3006_cast_fp16 = add(x = q_97_cast_fp16, y = encoder_module_layers_16_self_attn_pos_bias_u_to_fp16)[name = tensor("op_3006_cast_fp16")]; - tensor encoder_module_layers_16_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_16_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(409120000)))]; + tensor encoder_module_layers_16_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_16_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(409963968)))]; tensor var_3008_cast_fp16 = add(x = q_97_cast_fp16, y = encoder_module_layers_16_self_attn_pos_bias_v_to_fp16)[name = tensor("op_3008_cast_fp16")]; tensor q_with_bias_v_33_perm_0 = const()[name = tensor("q_with_bias_v_33_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_375_transpose_x_0 = const()[name = tensor("x_375_transpose_x_0"), val = tensor(false)]; tensor x_375_transpose_y_0 = const()[name = tensor("x_375_transpose_y_0"), val = tensor(false)]; - tensor op_3010_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(409122112))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(409506176))), name = tensor("op_3010_to_fp16_palettized"), shape = tensor([1, 8, 128, 375])]; + tensor op_3010_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_3010_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(409966080))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(410350592))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(410350144)))]; tensor q_with_bias_v_33_cast_fp16 = transpose(perm = q_with_bias_v_33_perm_0, x = var_3008_cast_fp16)[name = tensor("transpose_200")]; - tensor x_375_cast_fp16 = matmul(transpose_x = x_375_transpose_x_0, transpose_y = x_375_transpose_y_0, x = q_with_bias_v_33_cast_fp16, y = op_3010_to_fp16_palettized)[name = tensor("x_375_cast_fp16")]; + tensor x_375_cast_fp16 = matmul(transpose_x = x_375_transpose_x_0, transpose_y = x_375_transpose_y_0, x = q_with_bias_v_33_cast_fp16, y = op_3010_to_fp16_quantized)[name = tensor("x_375_cast_fp16")]; tensor x_377_pad_0 = const()[name = tensor("x_377_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; tensor x_377_mode_0 = const()[name = tensor("x_377_mode_0"), val = tensor("constant")]; tensor const_189_to_fp16 = const()[name = tensor("const_189_to_fp16"), val = tensor(0x0p+0)]; @@ -2491,12 +2491,12 @@ program(1.0) tensor var_3043 = const()[name = tensor("op_3043"), val = tensor([1, -1, 1024])]; tensor var_3042_cast_fp16 = transpose(perm = var_3042_perm_0, x = x_381_cast_fp16)[name = tensor("transpose_196")]; tensor input_875_cast_fp16 = reshape(shape = var_3043, x = var_3042_cast_fp16)[name = tensor("input_875_cast_fp16")]; - tensor encoder_module_layers_16_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(409506752))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(410555392))), name = tensor("encoder_module_layers_16_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_151_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_16_self_attn_linear_out_weight_to_fp16_palettized, x = input_875_cast_fp16)[name = tensor("linear_151_cast_fp16")]; + tensor encoder_module_layers_16_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_16_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(410351424))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(411401152))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(411400064)))]; + tensor linear_151_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_16_self_attn_linear_out_weight_to_fp16_quantized, x = input_875_cast_fp16)[name = tensor("linear_151_cast_fp16")]; tensor input_879_cast_fp16 = add(x = input_871_cast_fp16, y = linear_151_cast_fp16)[name = tensor("input_879_cast_fp16")]; tensor x_385_axes_0 = const()[name = tensor("x_385_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_16_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_16_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(410555968)))]; - tensor encoder_module_layers_16_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_16_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(410558080)))]; + tensor encoder_module_layers_16_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_16_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(411403264)))]; + tensor encoder_module_layers_16_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_16_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(411405376)))]; tensor x_385_cast_fp16 = layer_norm(axes = x_385_axes_0, beta = encoder_module_layers_16_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_16_norm_conv_weight_to_fp16, x = input_879_cast_fp16)[name = tensor("x_385_cast_fp16")]; tensor input_881_perm_0 = const()[name = tensor("input_881_perm_0"), val = tensor([0, 2, 1])]; tensor input_883_pad_type_0 = const()[name = tensor("input_883_pad_type_0"), val = tensor("valid")]; @@ -2504,9 +2504,9 @@ program(1.0) tensor input_883_pad_0 = const()[name = tensor("input_883_pad_0"), val = tensor([0, 0])]; tensor input_883_dilations_0 = const()[name = tensor("input_883_dilations_0"), val = tensor([1])]; tensor input_883_groups_0 = const()[name = tensor("input_883_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_16_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(410560192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(412657408))), name = tensor("encoder_module_layers_16_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor([2048, 1024, 1])]; + tensor encoder_module_layers_16_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_16_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(411407488))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(413506816))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(413504704)))]; tensor input_881_cast_fp16 = transpose(perm = input_881_perm_0, x = x_385_cast_fp16)[name = tensor("transpose_195")]; - tensor input_883_cast_fp16 = conv(dilations = input_883_dilations_0, groups = input_883_groups_0, pad = input_883_pad_0, pad_type = input_883_pad_type_0, strides = input_883_strides_0, weight = encoder_module_layers_16_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_881_cast_fp16)[name = tensor("input_883_cast_fp16")]; + tensor input_883_cast_fp16 = conv(dilations = input_883_dilations_0, groups = input_883_groups_0, pad = input_883_pad_0, pad_type = input_883_pad_type_0, strides = input_883_strides_0, weight = encoder_module_layers_16_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_881_cast_fp16)[name = tensor("input_883_cast_fp16")]; tensor x_387_split_num_splits_0 = const()[name = tensor("x_387_split_num_splits_0"), val = tensor(2)]; tensor x_387_split_axis_0 = const()[name = tensor("x_387_split_axis_0"), val = tensor(1)]; tensor x_387_split_cast_fp16_0, tensor x_387_split_cast_fp16_1 = split(axis = x_387_split_axis_0, num_splits = x_387_split_num_splits_0, x = input_883_cast_fp16)[name = tensor("x_387_split_cast_fp16")]; @@ -2522,75 +2522,75 @@ program(1.0) tensor input_889_strides_0 = const()[name = tensor("input_889_strides_0"), val = tensor([1])]; tensor input_889_pad_0 = const()[name = tensor("input_889_pad_0"), val = tensor([0, 0])]; tensor input_889_dilations_0 = const()[name = tensor("input_889_dilations_0"), val = tensor([1])]; - tensor const_295_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(412657984))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(412667264))), name = tensor("const_295_to_fp16_palettized"), shape = tensor([1024, 1, 9])]; - tensor const_296_to_fp16 = const()[name = tensor("const_296_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(412667840)))]; - tensor input_891_cast_fp16 = conv(bias = const_296_to_fp16, dilations = input_889_dilations_0, groups = input_889_groups_0, pad = input_889_pad_0, pad_type = input_889_pad_type_0, strides = input_889_strides_0, weight = const_295_to_fp16_palettized, x = input_887_cast_fp16)[name = tensor("input_891_cast_fp16")]; + tensor const_295_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_295_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(413510976))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(413521344))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(413520256)))]; + tensor const_296_to_fp16 = const()[name = tensor("const_296_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(413523456)))]; + tensor input_891_cast_fp16 = conv(bias = const_296_to_fp16, dilations = input_889_dilations_0, groups = input_889_groups_0, pad = input_889_pad_0, pad_type = input_889_pad_type_0, strides = input_889_strides_0, weight = const_295_to_fp16_quantized, x = input_887_cast_fp16)[name = tensor("input_891_cast_fp16")]; tensor input_893_cast_fp16 = silu(x = input_891_cast_fp16)[name = tensor("input_893_cast_fp16")]; tensor x_389_pad_type_0 = const()[name = tensor("x_389_pad_type_0"), val = tensor("valid")]; tensor x_389_strides_0 = const()[name = tensor("x_389_strides_0"), val = tensor([1])]; tensor x_389_pad_0 = const()[name = tensor("x_389_pad_0"), val = tensor([0, 0])]; tensor x_389_dilations_0 = const()[name = tensor("x_389_dilations_0"), val = tensor([1])]; tensor x_389_groups_0 = const()[name = tensor("x_389_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_16_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(412669952))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(413718592))), name = tensor("encoder_module_layers_16_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor([1024, 1024, 1])]; - tensor x_389_cast_fp16 = conv(dilations = x_389_dilations_0, groups = x_389_groups_0, pad = x_389_pad_0, pad_type = x_389_pad_type_0, strides = x_389_strides_0, weight = encoder_module_layers_16_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_893_cast_fp16)[name = tensor("x_389_cast_fp16")]; + tensor encoder_module_layers_16_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_16_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(413525568))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(414575296))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(414574208)))]; + tensor x_389_cast_fp16 = conv(dilations = x_389_dilations_0, groups = x_389_groups_0, pad = x_389_pad_0, pad_type = x_389_pad_type_0, strides = x_389_strides_0, weight = encoder_module_layers_16_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_893_cast_fp16)[name = tensor("x_389_cast_fp16")]; tensor input_895_perm_0 = const()[name = tensor("input_895_perm_0"), val = tensor([0, 2, 1])]; tensor input_895_cast_fp16 = transpose(perm = input_895_perm_0, x = x_389_cast_fp16)[name = tensor("transpose_194")]; tensor input_897_cast_fp16 = add(x = input_879_cast_fp16, y = input_895_cast_fp16)[name = tensor("input_897_cast_fp16")]; tensor input_899_axes_0 = const()[name = tensor("input_899_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_16_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_16_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(413719168)))]; - tensor encoder_module_layers_16_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_16_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(413721280)))]; + tensor encoder_module_layers_16_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_16_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(414577408)))]; + tensor encoder_module_layers_16_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_16_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(414579520)))]; tensor input_899_cast_fp16 = layer_norm(axes = input_899_axes_0, beta = encoder_module_layers_16_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_16_norm_feed_forward2_weight_to_fp16, x = input_897_cast_fp16)[name = tensor("input_899_cast_fp16")]; - tensor encoder_module_layers_16_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(413723392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(417917760))), name = tensor("encoder_module_layers_16_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_152_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_16_feed_forward2_linear1_weight_to_fp16_palettized, x = input_899_cast_fp16)[name = tensor("linear_152_cast_fp16")]; + tensor encoder_module_layers_16_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_16_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(414581632))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(418780160))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(418776000)))]; + tensor linear_152_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_16_feed_forward2_linear1_weight_to_fp16_quantized, x = input_899_cast_fp16)[name = tensor("linear_152_cast_fp16")]; tensor input_903_cast_fp16 = silu(x = linear_152_cast_fp16)[name = tensor("input_903_cast_fp16")]; - tensor encoder_module_layers_16_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(417918336))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(422112704))), name = tensor("encoder_module_layers_16_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_153_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_16_feed_forward2_linear2_weight_to_fp16_palettized, x = input_903_cast_fp16)[name = tensor("linear_153_cast_fp16")]; + tensor encoder_module_layers_16_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_16_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(418788416))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(422983872))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(422982784)))]; + tensor linear_153_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_16_feed_forward2_linear2_weight_to_fp16_quantized, x = input_903_cast_fp16)[name = tensor("linear_153_cast_fp16")]; tensor var_3103_to_fp16 = const()[name = tensor("op_3103_to_fp16"), val = tensor(0x1p-1)]; tensor var_3104_cast_fp16 = mul(x = linear_153_cast_fp16, y = var_3103_to_fp16)[name = tensor("op_3104_cast_fp16")]; tensor input_909_cast_fp16 = add(x = input_897_cast_fp16, y = var_3104_cast_fp16)[name = tensor("input_909_cast_fp16")]; tensor input_911_axes_0 = const()[name = tensor("input_911_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_16_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_16_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(422113280)))]; - tensor encoder_module_layers_16_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_16_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(422115392)))]; + tensor encoder_module_layers_16_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_16_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(422985984)))]; + tensor encoder_module_layers_16_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_16_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(422988096)))]; tensor input_911_cast_fp16 = layer_norm(axes = input_911_axes_0, beta = encoder_module_layers_16_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_16_norm_out_weight_to_fp16, x = input_909_cast_fp16)[name = tensor("input_911_cast_fp16")]; tensor input_913_axes_0 = const()[name = tensor("input_913_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_17_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_17_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(422117504)))]; - tensor encoder_module_layers_17_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_17_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(422119616)))]; + tensor encoder_module_layers_17_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_17_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(422990208)))]; + tensor encoder_module_layers_17_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_17_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(422992320)))]; tensor input_913_cast_fp16 = layer_norm(axes = input_913_axes_0, beta = encoder_module_layers_17_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_17_norm_feed_forward1_weight_to_fp16, x = input_911_cast_fp16)[name = tensor("input_913_cast_fp16")]; - tensor encoder_module_layers_17_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(422121728))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(426316096))), name = tensor("encoder_module_layers_17_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_154_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_17_feed_forward1_linear1_weight_to_fp16_palettized, x = input_913_cast_fp16)[name = tensor("linear_154_cast_fp16")]; + tensor encoder_module_layers_17_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_17_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(422994432))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(427192960))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(427188800)))]; + tensor linear_154_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_17_feed_forward1_linear1_weight_to_fp16_quantized, x = input_913_cast_fp16)[name = tensor("linear_154_cast_fp16")]; tensor input_917_cast_fp16 = silu(x = linear_154_cast_fp16)[name = tensor("input_917_cast_fp16")]; - tensor encoder_module_layers_17_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(426316672))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(430511040))), name = tensor("encoder_module_layers_17_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_155_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_17_feed_forward1_linear2_weight_to_fp16_palettized, x = input_917_cast_fp16)[name = tensor("linear_155_cast_fp16")]; + tensor encoder_module_layers_17_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_17_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(427201216))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(431396672))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(431395584)))]; + tensor linear_155_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_17_feed_forward1_linear2_weight_to_fp16_quantized, x = input_917_cast_fp16)[name = tensor("linear_155_cast_fp16")]; tensor var_3132_to_fp16 = const()[name = tensor("op_3132_to_fp16"), val = tensor(0x1p-1)]; tensor var_3133_cast_fp16 = mul(x = linear_155_cast_fp16, y = var_3132_to_fp16)[name = tensor("op_3133_cast_fp16")]; tensor input_923_cast_fp16 = add(x = input_911_cast_fp16, y = var_3133_cast_fp16)[name = tensor("input_923_cast_fp16")]; tensor query_35_axes_0 = const()[name = tensor("query_35_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_17_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_17_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(430511616)))]; - tensor encoder_module_layers_17_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_17_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(430513728)))]; + tensor encoder_module_layers_17_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_17_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(431398784)))]; + tensor encoder_module_layers_17_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_17_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(431400896)))]; tensor query_35_cast_fp16 = layer_norm(axes = query_35_axes_0, beta = encoder_module_layers_17_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_17_norm_self_att_weight_to_fp16, x = input_923_cast_fp16)[name = tensor("query_35_cast_fp16")]; - tensor encoder_module_layers_17_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(430515840))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(431564480))), name = tensor("encoder_module_layers_17_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_156_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_17_self_attn_linear_q_weight_to_fp16_palettized, x = query_35_cast_fp16)[name = tensor("linear_156_cast_fp16")]; + tensor encoder_module_layers_17_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_17_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(431403008))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(432452736))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(432451648)))]; + tensor linear_156_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_17_self_attn_linear_q_weight_to_fp16_quantized, x = query_35_cast_fp16)[name = tensor("linear_156_cast_fp16")]; tensor var_3149 = const()[name = tensor("op_3149"), val = tensor([1, -1, 8, 128])]; tensor q_103_cast_fp16 = reshape(shape = var_3149, x = linear_156_cast_fp16)[name = tensor("q_103_cast_fp16")]; - tensor encoder_module_layers_17_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(431565056))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(432613696))), name = tensor("encoder_module_layers_17_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_157_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_17_self_attn_linear_k_weight_to_fp16_palettized, x = query_35_cast_fp16)[name = tensor("linear_157_cast_fp16")]; + tensor encoder_module_layers_17_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_17_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(432454848))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(433504576))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(433503488)))]; + tensor linear_157_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_17_self_attn_linear_k_weight_to_fp16_quantized, x = query_35_cast_fp16)[name = tensor("linear_157_cast_fp16")]; tensor var_3153 = const()[name = tensor("op_3153"), val = tensor([1, -1, 8, 128])]; tensor k_69_cast_fp16 = reshape(shape = var_3153, x = linear_157_cast_fp16)[name = tensor("k_69_cast_fp16")]; - tensor encoder_module_layers_17_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(432614272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(433662912))), name = tensor("encoder_module_layers_17_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_158_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_17_self_attn_linear_v_weight_to_fp16_palettized, x = query_35_cast_fp16)[name = tensor("linear_158_cast_fp16")]; + tensor encoder_module_layers_17_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_17_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(433506688))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(434556416))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(434555328)))]; + tensor linear_158_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_17_self_attn_linear_v_weight_to_fp16_quantized, x = query_35_cast_fp16)[name = tensor("linear_158_cast_fp16")]; tensor var_3157 = const()[name = tensor("op_3157"), val = tensor([1, -1, 8, 128])]; tensor v_35_cast_fp16 = reshape(shape = var_3157, x = linear_158_cast_fp16)[name = tensor("v_35_cast_fp16")]; tensor value_39_perm_0 = const()[name = tensor("value_39_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor encoder_module_layers_17_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_17_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(433663488)))]; + tensor encoder_module_layers_17_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_17_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(434558528)))]; tensor var_3169_cast_fp16 = add(x = q_103_cast_fp16, y = encoder_module_layers_17_self_attn_pos_bias_u_to_fp16)[name = tensor("op_3169_cast_fp16")]; - tensor encoder_module_layers_17_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_17_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(433665600)))]; + tensor encoder_module_layers_17_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_17_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(434560640)))]; tensor var_3171_cast_fp16 = add(x = q_103_cast_fp16, y = encoder_module_layers_17_self_attn_pos_bias_v_to_fp16)[name = tensor("op_3171_cast_fp16")]; tensor q_with_bias_v_35_perm_0 = const()[name = tensor("q_with_bias_v_35_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_397_transpose_x_0 = const()[name = tensor("x_397_transpose_x_0"), val = tensor(false)]; tensor x_397_transpose_y_0 = const()[name = tensor("x_397_transpose_y_0"), val = tensor(false)]; - tensor op_3173_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(433667712))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(434051776))), name = tensor("op_3173_to_fp16_palettized"), shape = tensor([1, 8, 128, 375])]; + tensor op_3173_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_3173_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(434562752))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(434947264))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(434946816)))]; tensor q_with_bias_v_35_cast_fp16 = transpose(perm = q_with_bias_v_35_perm_0, x = var_3171_cast_fp16)[name = tensor("transpose_193")]; - tensor x_397_cast_fp16 = matmul(transpose_x = x_397_transpose_x_0, transpose_y = x_397_transpose_y_0, x = q_with_bias_v_35_cast_fp16, y = op_3173_to_fp16_palettized)[name = tensor("x_397_cast_fp16")]; + tensor x_397_cast_fp16 = matmul(transpose_x = x_397_transpose_x_0, transpose_y = x_397_transpose_y_0, x = q_with_bias_v_35_cast_fp16, y = op_3173_to_fp16_quantized)[name = tensor("x_397_cast_fp16")]; tensor x_399_pad_0 = const()[name = tensor("x_399_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; tensor x_399_mode_0 = const()[name = tensor("x_399_mode_0"), val = tensor("constant")]; tensor const_199_to_fp16 = const()[name = tensor("const_199_to_fp16"), val = tensor(0x0p+0)]; @@ -2628,12 +2628,12 @@ program(1.0) tensor var_3206 = const()[name = tensor("op_3206"), val = tensor([1, -1, 1024])]; tensor var_3205_cast_fp16 = transpose(perm = var_3205_perm_0, x = x_403_cast_fp16)[name = tensor("transpose_189")]; tensor input_927_cast_fp16 = reshape(shape = var_3206, x = var_3205_cast_fp16)[name = tensor("input_927_cast_fp16")]; - tensor encoder_module_layers_17_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(434052352))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(435100992))), name = tensor("encoder_module_layers_17_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_160_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_17_self_attn_linear_out_weight_to_fp16_palettized, x = input_927_cast_fp16)[name = tensor("linear_160_cast_fp16")]; + tensor encoder_module_layers_17_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_17_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(434948096))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(435997824))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(435996736)))]; + tensor linear_160_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_17_self_attn_linear_out_weight_to_fp16_quantized, x = input_927_cast_fp16)[name = tensor("linear_160_cast_fp16")]; tensor input_931_cast_fp16 = add(x = input_923_cast_fp16, y = linear_160_cast_fp16)[name = tensor("input_931_cast_fp16")]; tensor x_407_axes_0 = const()[name = tensor("x_407_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_17_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_17_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(435101568)))]; - tensor encoder_module_layers_17_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_17_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(435103680)))]; + tensor encoder_module_layers_17_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_17_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(435999936)))]; + tensor encoder_module_layers_17_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_17_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(436002048)))]; tensor x_407_cast_fp16 = layer_norm(axes = x_407_axes_0, beta = encoder_module_layers_17_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_17_norm_conv_weight_to_fp16, x = input_931_cast_fp16)[name = tensor("x_407_cast_fp16")]; tensor input_933_perm_0 = const()[name = tensor("input_933_perm_0"), val = tensor([0, 2, 1])]; tensor input_935_pad_type_0 = const()[name = tensor("input_935_pad_type_0"), val = tensor("valid")]; @@ -2641,9 +2641,9 @@ program(1.0) tensor input_935_pad_0 = const()[name = tensor("input_935_pad_0"), val = tensor([0, 0])]; tensor input_935_dilations_0 = const()[name = tensor("input_935_dilations_0"), val = tensor([1])]; tensor input_935_groups_0 = const()[name = tensor("input_935_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_17_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(435105792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(437203008))), name = tensor("encoder_module_layers_17_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor([2048, 1024, 1])]; + tensor encoder_module_layers_17_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_17_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(436004160))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(438103488))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(438101376)))]; tensor input_933_cast_fp16 = transpose(perm = input_933_perm_0, x = x_407_cast_fp16)[name = tensor("transpose_188")]; - tensor input_935_cast_fp16 = conv(dilations = input_935_dilations_0, groups = input_935_groups_0, pad = input_935_pad_0, pad_type = input_935_pad_type_0, strides = input_935_strides_0, weight = encoder_module_layers_17_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_933_cast_fp16)[name = tensor("input_935_cast_fp16")]; + tensor input_935_cast_fp16 = conv(dilations = input_935_dilations_0, groups = input_935_groups_0, pad = input_935_pad_0, pad_type = input_935_pad_type_0, strides = input_935_strides_0, weight = encoder_module_layers_17_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_933_cast_fp16)[name = tensor("input_935_cast_fp16")]; tensor x_409_split_num_splits_0 = const()[name = tensor("x_409_split_num_splits_0"), val = tensor(2)]; tensor x_409_split_axis_0 = const()[name = tensor("x_409_split_axis_0"), val = tensor(1)]; tensor x_409_split_cast_fp16_0, tensor x_409_split_cast_fp16_1 = split(axis = x_409_split_axis_0, num_splits = x_409_split_num_splits_0, x = input_935_cast_fp16)[name = tensor("x_409_split_cast_fp16")]; @@ -2659,75 +2659,75 @@ program(1.0) tensor input_941_strides_0 = const()[name = tensor("input_941_strides_0"), val = tensor([1])]; tensor input_941_pad_0 = const()[name = tensor("input_941_pad_0"), val = tensor([0, 0])]; tensor input_941_dilations_0 = const()[name = tensor("input_941_dilations_0"), val = tensor([1])]; - tensor const_297_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(437203584))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(437212864))), name = tensor("const_297_to_fp16_palettized"), shape = tensor([1024, 1, 9])]; - tensor const_298_to_fp16 = const()[name = tensor("const_298_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(437213440)))]; - tensor input_943_cast_fp16 = conv(bias = const_298_to_fp16, dilations = input_941_dilations_0, groups = input_941_groups_0, pad = input_941_pad_0, pad_type = input_941_pad_type_0, strides = input_941_strides_0, weight = const_297_to_fp16_palettized, x = input_939_cast_fp16)[name = tensor("input_943_cast_fp16")]; + tensor const_297_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_297_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(438107648))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(438118016))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(438116928)))]; + tensor const_298_to_fp16 = const()[name = tensor("const_298_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(438120128)))]; + tensor input_943_cast_fp16 = conv(bias = const_298_to_fp16, dilations = input_941_dilations_0, groups = input_941_groups_0, pad = input_941_pad_0, pad_type = input_941_pad_type_0, strides = input_941_strides_0, weight = const_297_to_fp16_quantized, x = input_939_cast_fp16)[name = tensor("input_943_cast_fp16")]; tensor input_945_cast_fp16 = silu(x = input_943_cast_fp16)[name = tensor("input_945_cast_fp16")]; tensor x_411_pad_type_0 = const()[name = tensor("x_411_pad_type_0"), val = tensor("valid")]; tensor x_411_strides_0 = const()[name = tensor("x_411_strides_0"), val = tensor([1])]; tensor x_411_pad_0 = const()[name = tensor("x_411_pad_0"), val = tensor([0, 0])]; tensor x_411_dilations_0 = const()[name = tensor("x_411_dilations_0"), val = tensor([1])]; tensor x_411_groups_0 = const()[name = tensor("x_411_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_17_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(437215552))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(438264192))), name = tensor("encoder_module_layers_17_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor([1024, 1024, 1])]; - tensor x_411_cast_fp16 = conv(dilations = x_411_dilations_0, groups = x_411_groups_0, pad = x_411_pad_0, pad_type = x_411_pad_type_0, strides = x_411_strides_0, weight = encoder_module_layers_17_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_945_cast_fp16)[name = tensor("x_411_cast_fp16")]; + tensor encoder_module_layers_17_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_17_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(438122240))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(439171968))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(439170880)))]; + tensor x_411_cast_fp16 = conv(dilations = x_411_dilations_0, groups = x_411_groups_0, pad = x_411_pad_0, pad_type = x_411_pad_type_0, strides = x_411_strides_0, weight = encoder_module_layers_17_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_945_cast_fp16)[name = tensor("x_411_cast_fp16")]; tensor input_947_perm_0 = const()[name = tensor("input_947_perm_0"), val = tensor([0, 2, 1])]; tensor input_947_cast_fp16 = transpose(perm = input_947_perm_0, x = x_411_cast_fp16)[name = tensor("transpose_187")]; tensor input_949_cast_fp16 = add(x = input_931_cast_fp16, y = input_947_cast_fp16)[name = tensor("input_949_cast_fp16")]; tensor input_951_axes_0 = const()[name = tensor("input_951_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_17_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_17_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(438264768)))]; - tensor encoder_module_layers_17_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_17_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(438266880)))]; + tensor encoder_module_layers_17_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_17_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(439174080)))]; + tensor encoder_module_layers_17_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_17_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(439176192)))]; tensor input_951_cast_fp16 = layer_norm(axes = input_951_axes_0, beta = encoder_module_layers_17_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_17_norm_feed_forward2_weight_to_fp16, x = input_949_cast_fp16)[name = tensor("input_951_cast_fp16")]; - tensor encoder_module_layers_17_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(438268992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(442463360))), name = tensor("encoder_module_layers_17_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_161_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_17_feed_forward2_linear1_weight_to_fp16_palettized, x = input_951_cast_fp16)[name = tensor("linear_161_cast_fp16")]; + tensor encoder_module_layers_17_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_17_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(439178304))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(443376832))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(443372672)))]; + tensor linear_161_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_17_feed_forward2_linear1_weight_to_fp16_quantized, x = input_951_cast_fp16)[name = tensor("linear_161_cast_fp16")]; tensor input_955_cast_fp16 = silu(x = linear_161_cast_fp16)[name = tensor("input_955_cast_fp16")]; - tensor encoder_module_layers_17_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(442463936))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(446658304))), name = tensor("encoder_module_layers_17_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_162_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_17_feed_forward2_linear2_weight_to_fp16_palettized, x = input_955_cast_fp16)[name = tensor("linear_162_cast_fp16")]; + tensor encoder_module_layers_17_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_17_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(443385088))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(447580544))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(447579456)))]; + tensor linear_162_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_17_feed_forward2_linear2_weight_to_fp16_quantized, x = input_955_cast_fp16)[name = tensor("linear_162_cast_fp16")]; tensor var_3266_to_fp16 = const()[name = tensor("op_3266_to_fp16"), val = tensor(0x1p-1)]; tensor var_3267_cast_fp16 = mul(x = linear_162_cast_fp16, y = var_3266_to_fp16)[name = tensor("op_3267_cast_fp16")]; tensor input_961_cast_fp16 = add(x = input_949_cast_fp16, y = var_3267_cast_fp16)[name = tensor("input_961_cast_fp16")]; tensor input_963_axes_0 = const()[name = tensor("input_963_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_17_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_17_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(446658880)))]; - tensor encoder_module_layers_17_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_17_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(446660992)))]; + tensor encoder_module_layers_17_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_17_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(447582656)))]; + tensor encoder_module_layers_17_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_17_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(447584768)))]; tensor input_963_cast_fp16 = layer_norm(axes = input_963_axes_0, beta = encoder_module_layers_17_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_17_norm_out_weight_to_fp16, x = input_961_cast_fp16)[name = tensor("input_963_cast_fp16")]; tensor input_965_axes_0 = const()[name = tensor("input_965_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_18_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_18_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(446663104)))]; - tensor encoder_module_layers_18_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_18_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(446665216)))]; + tensor encoder_module_layers_18_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_18_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(447586880)))]; + tensor encoder_module_layers_18_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_18_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(447588992)))]; tensor input_965_cast_fp16 = layer_norm(axes = input_965_axes_0, beta = encoder_module_layers_18_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_18_norm_feed_forward1_weight_to_fp16, x = input_963_cast_fp16)[name = tensor("input_965_cast_fp16")]; - tensor encoder_module_layers_18_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(446667328))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(450861696))), name = tensor("encoder_module_layers_18_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_163_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_18_feed_forward1_linear1_weight_to_fp16_palettized, x = input_965_cast_fp16)[name = tensor("linear_163_cast_fp16")]; + tensor encoder_module_layers_18_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_18_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(447591104))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(451789632))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(451785472)))]; + tensor linear_163_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_18_feed_forward1_linear1_weight_to_fp16_quantized, x = input_965_cast_fp16)[name = tensor("linear_163_cast_fp16")]; tensor input_969_cast_fp16 = silu(x = linear_163_cast_fp16)[name = tensor("input_969_cast_fp16")]; - tensor encoder_module_layers_18_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(450862272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(455056640))), name = tensor("encoder_module_layers_18_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_164_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_18_feed_forward1_linear2_weight_to_fp16_palettized, x = input_969_cast_fp16)[name = tensor("linear_164_cast_fp16")]; + tensor encoder_module_layers_18_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_18_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(451797888))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(455993344))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(455992256)))]; + tensor linear_164_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_18_feed_forward1_linear2_weight_to_fp16_quantized, x = input_969_cast_fp16)[name = tensor("linear_164_cast_fp16")]; tensor var_3295_to_fp16 = const()[name = tensor("op_3295_to_fp16"), val = tensor(0x1p-1)]; tensor var_3296_cast_fp16 = mul(x = linear_164_cast_fp16, y = var_3295_to_fp16)[name = tensor("op_3296_cast_fp16")]; tensor input_975_cast_fp16 = add(x = input_963_cast_fp16, y = var_3296_cast_fp16)[name = tensor("input_975_cast_fp16")]; tensor query_37_axes_0 = const()[name = tensor("query_37_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_18_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_18_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(455057216)))]; - tensor encoder_module_layers_18_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_18_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(455059328)))]; + tensor encoder_module_layers_18_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_18_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(455995456)))]; + tensor encoder_module_layers_18_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_18_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(455997568)))]; tensor query_37_cast_fp16 = layer_norm(axes = query_37_axes_0, beta = encoder_module_layers_18_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_18_norm_self_att_weight_to_fp16, x = input_975_cast_fp16)[name = tensor("query_37_cast_fp16")]; - tensor encoder_module_layers_18_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(455061440))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(456110080))), name = tensor("encoder_module_layers_18_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_165_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_18_self_attn_linear_q_weight_to_fp16_palettized, x = query_37_cast_fp16)[name = tensor("linear_165_cast_fp16")]; + tensor encoder_module_layers_18_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_18_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(455999680))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(457049408))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(457048320)))]; + tensor linear_165_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_18_self_attn_linear_q_weight_to_fp16_quantized, x = query_37_cast_fp16)[name = tensor("linear_165_cast_fp16")]; tensor var_3312 = const()[name = tensor("op_3312"), val = tensor([1, -1, 8, 128])]; tensor q_109_cast_fp16 = reshape(shape = var_3312, x = linear_165_cast_fp16)[name = tensor("q_109_cast_fp16")]; - tensor encoder_module_layers_18_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(456110656))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(457159296))), name = tensor("encoder_module_layers_18_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_166_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_18_self_attn_linear_k_weight_to_fp16_palettized, x = query_37_cast_fp16)[name = tensor("linear_166_cast_fp16")]; + tensor encoder_module_layers_18_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_18_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(457051520))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(458101248))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(458100160)))]; + tensor linear_166_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_18_self_attn_linear_k_weight_to_fp16_quantized, x = query_37_cast_fp16)[name = tensor("linear_166_cast_fp16")]; tensor var_3316 = const()[name = tensor("op_3316"), val = tensor([1, -1, 8, 128])]; tensor k_73_cast_fp16 = reshape(shape = var_3316, x = linear_166_cast_fp16)[name = tensor("k_73_cast_fp16")]; - tensor encoder_module_layers_18_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(457159872))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(458208512))), name = tensor("encoder_module_layers_18_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_167_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_18_self_attn_linear_v_weight_to_fp16_palettized, x = query_37_cast_fp16)[name = tensor("linear_167_cast_fp16")]; + tensor encoder_module_layers_18_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_18_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(458103360))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(459153088))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(459152000)))]; + tensor linear_167_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_18_self_attn_linear_v_weight_to_fp16_quantized, x = query_37_cast_fp16)[name = tensor("linear_167_cast_fp16")]; tensor var_3320 = const()[name = tensor("op_3320"), val = tensor([1, -1, 8, 128])]; tensor v_37_cast_fp16 = reshape(shape = var_3320, x = linear_167_cast_fp16)[name = tensor("v_37_cast_fp16")]; tensor value_41_perm_0 = const()[name = tensor("value_41_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor encoder_module_layers_18_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_18_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(458209088)))]; + tensor encoder_module_layers_18_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_18_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(459155200)))]; tensor var_3332_cast_fp16 = add(x = q_109_cast_fp16, y = encoder_module_layers_18_self_attn_pos_bias_u_to_fp16)[name = tensor("op_3332_cast_fp16")]; - tensor encoder_module_layers_18_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_18_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(458211200)))]; + tensor encoder_module_layers_18_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_18_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(459157312)))]; tensor var_3334_cast_fp16 = add(x = q_109_cast_fp16, y = encoder_module_layers_18_self_attn_pos_bias_v_to_fp16)[name = tensor("op_3334_cast_fp16")]; tensor q_with_bias_v_37_perm_0 = const()[name = tensor("q_with_bias_v_37_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_419_transpose_x_0 = const()[name = tensor("x_419_transpose_x_0"), val = tensor(false)]; tensor x_419_transpose_y_0 = const()[name = tensor("x_419_transpose_y_0"), val = tensor(false)]; - tensor op_3336_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(458213312))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(458597376))), name = tensor("op_3336_to_fp16_palettized"), shape = tensor([1, 8, 128, 375])]; + tensor op_3336_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_3336_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(459159424))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(459543936))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(459543488)))]; tensor q_with_bias_v_37_cast_fp16 = transpose(perm = q_with_bias_v_37_perm_0, x = var_3334_cast_fp16)[name = tensor("transpose_186")]; - tensor x_419_cast_fp16 = matmul(transpose_x = x_419_transpose_x_0, transpose_y = x_419_transpose_y_0, x = q_with_bias_v_37_cast_fp16, y = op_3336_to_fp16_palettized)[name = tensor("x_419_cast_fp16")]; + tensor x_419_cast_fp16 = matmul(transpose_x = x_419_transpose_x_0, transpose_y = x_419_transpose_y_0, x = q_with_bias_v_37_cast_fp16, y = op_3336_to_fp16_quantized)[name = tensor("x_419_cast_fp16")]; tensor x_421_pad_0 = const()[name = tensor("x_421_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; tensor x_421_mode_0 = const()[name = tensor("x_421_mode_0"), val = tensor("constant")]; tensor const_209_to_fp16 = const()[name = tensor("const_209_to_fp16"), val = tensor(0x0p+0)]; @@ -2765,12 +2765,12 @@ program(1.0) tensor var_3369 = const()[name = tensor("op_3369"), val = tensor([1, -1, 1024])]; tensor var_3368_cast_fp16 = transpose(perm = var_3368_perm_0, x = x_425_cast_fp16)[name = tensor("transpose_182")]; tensor input_979_cast_fp16 = reshape(shape = var_3369, x = var_3368_cast_fp16)[name = tensor("input_979_cast_fp16")]; - tensor encoder_module_layers_18_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(458597952))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(459646592))), name = tensor("encoder_module_layers_18_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_169_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_18_self_attn_linear_out_weight_to_fp16_palettized, x = input_979_cast_fp16)[name = tensor("linear_169_cast_fp16")]; + tensor encoder_module_layers_18_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_18_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(459544768))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(460594496))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(460593408)))]; + tensor linear_169_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_18_self_attn_linear_out_weight_to_fp16_quantized, x = input_979_cast_fp16)[name = tensor("linear_169_cast_fp16")]; tensor input_983_cast_fp16 = add(x = input_975_cast_fp16, y = linear_169_cast_fp16)[name = tensor("input_983_cast_fp16")]; tensor x_429_axes_0 = const()[name = tensor("x_429_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_18_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_18_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(459647168)))]; - tensor encoder_module_layers_18_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_18_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(459649280)))]; + tensor encoder_module_layers_18_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_18_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(460596608)))]; + tensor encoder_module_layers_18_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_18_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(460598720)))]; tensor x_429_cast_fp16 = layer_norm(axes = x_429_axes_0, beta = encoder_module_layers_18_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_18_norm_conv_weight_to_fp16, x = input_983_cast_fp16)[name = tensor("x_429_cast_fp16")]; tensor input_985_perm_0 = const()[name = tensor("input_985_perm_0"), val = tensor([0, 2, 1])]; tensor input_987_pad_type_0 = const()[name = tensor("input_987_pad_type_0"), val = tensor("valid")]; @@ -2778,9 +2778,9 @@ program(1.0) tensor input_987_pad_0 = const()[name = tensor("input_987_pad_0"), val = tensor([0, 0])]; tensor input_987_dilations_0 = const()[name = tensor("input_987_dilations_0"), val = tensor([1])]; tensor input_987_groups_0 = const()[name = tensor("input_987_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_18_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(459651392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(461748608))), name = tensor("encoder_module_layers_18_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor([2048, 1024, 1])]; + tensor encoder_module_layers_18_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_18_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(460600832))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(462700160))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(462698048)))]; tensor input_985_cast_fp16 = transpose(perm = input_985_perm_0, x = x_429_cast_fp16)[name = tensor("transpose_181")]; - tensor input_987_cast_fp16 = conv(dilations = input_987_dilations_0, groups = input_987_groups_0, pad = input_987_pad_0, pad_type = input_987_pad_type_0, strides = input_987_strides_0, weight = encoder_module_layers_18_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_985_cast_fp16)[name = tensor("input_987_cast_fp16")]; + tensor input_987_cast_fp16 = conv(dilations = input_987_dilations_0, groups = input_987_groups_0, pad = input_987_pad_0, pad_type = input_987_pad_type_0, strides = input_987_strides_0, weight = encoder_module_layers_18_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_985_cast_fp16)[name = tensor("input_987_cast_fp16")]; tensor x_431_split_num_splits_0 = const()[name = tensor("x_431_split_num_splits_0"), val = tensor(2)]; tensor x_431_split_axis_0 = const()[name = tensor("x_431_split_axis_0"), val = tensor(1)]; tensor x_431_split_cast_fp16_0, tensor x_431_split_cast_fp16_1 = split(axis = x_431_split_axis_0, num_splits = x_431_split_num_splits_0, x = input_987_cast_fp16)[name = tensor("x_431_split_cast_fp16")]; @@ -2796,75 +2796,75 @@ program(1.0) tensor input_993_strides_0 = const()[name = tensor("input_993_strides_0"), val = tensor([1])]; tensor input_993_pad_0 = const()[name = tensor("input_993_pad_0"), val = tensor([0, 0])]; tensor input_993_dilations_0 = const()[name = tensor("input_993_dilations_0"), val = tensor([1])]; - tensor const_299_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(461749184))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(461758464))), name = tensor("const_299_to_fp16_palettized"), shape = tensor([1024, 1, 9])]; - tensor const_300_to_fp16 = const()[name = tensor("const_300_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(461759040)))]; - tensor input_995_cast_fp16 = conv(bias = const_300_to_fp16, dilations = input_993_dilations_0, groups = input_993_groups_0, pad = input_993_pad_0, pad_type = input_993_pad_type_0, strides = input_993_strides_0, weight = const_299_to_fp16_palettized, x = input_991_cast_fp16)[name = tensor("input_995_cast_fp16")]; + tensor const_299_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_299_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(462704320))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(462714688))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(462713600)))]; + tensor const_300_to_fp16 = const()[name = tensor("const_300_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(462716800)))]; + tensor input_995_cast_fp16 = conv(bias = const_300_to_fp16, dilations = input_993_dilations_0, groups = input_993_groups_0, pad = input_993_pad_0, pad_type = input_993_pad_type_0, strides = input_993_strides_0, weight = const_299_to_fp16_quantized, x = input_991_cast_fp16)[name = tensor("input_995_cast_fp16")]; tensor input_997_cast_fp16 = silu(x = input_995_cast_fp16)[name = tensor("input_997_cast_fp16")]; tensor x_433_pad_type_0 = const()[name = tensor("x_433_pad_type_0"), val = tensor("valid")]; tensor x_433_strides_0 = const()[name = tensor("x_433_strides_0"), val = tensor([1])]; tensor x_433_pad_0 = const()[name = tensor("x_433_pad_0"), val = tensor([0, 0])]; tensor x_433_dilations_0 = const()[name = tensor("x_433_dilations_0"), val = tensor([1])]; tensor x_433_groups_0 = const()[name = tensor("x_433_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_18_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(461761152))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(462809792))), name = tensor("encoder_module_layers_18_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor([1024, 1024, 1])]; - tensor x_433_cast_fp16 = conv(dilations = x_433_dilations_0, groups = x_433_groups_0, pad = x_433_pad_0, pad_type = x_433_pad_type_0, strides = x_433_strides_0, weight = encoder_module_layers_18_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_997_cast_fp16)[name = tensor("x_433_cast_fp16")]; + tensor encoder_module_layers_18_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_18_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(462718912))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(463768640))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(463767552)))]; + tensor x_433_cast_fp16 = conv(dilations = x_433_dilations_0, groups = x_433_groups_0, pad = x_433_pad_0, pad_type = x_433_pad_type_0, strides = x_433_strides_0, weight = encoder_module_layers_18_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_997_cast_fp16)[name = tensor("x_433_cast_fp16")]; tensor input_999_perm_0 = const()[name = tensor("input_999_perm_0"), val = tensor([0, 2, 1])]; tensor input_999_cast_fp16 = transpose(perm = input_999_perm_0, x = x_433_cast_fp16)[name = tensor("transpose_180")]; tensor input_1001_cast_fp16 = add(x = input_983_cast_fp16, y = input_999_cast_fp16)[name = tensor("input_1001_cast_fp16")]; tensor input_1003_axes_0 = const()[name = tensor("input_1003_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_18_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_18_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(462810368)))]; - tensor encoder_module_layers_18_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_18_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(462812480)))]; + tensor encoder_module_layers_18_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_18_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(463770752)))]; + tensor encoder_module_layers_18_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_18_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(463772864)))]; tensor input_1003_cast_fp16 = layer_norm(axes = input_1003_axes_0, beta = encoder_module_layers_18_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_18_norm_feed_forward2_weight_to_fp16, x = input_1001_cast_fp16)[name = tensor("input_1003_cast_fp16")]; - tensor encoder_module_layers_18_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(462814592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(467008960))), name = tensor("encoder_module_layers_18_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_170_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_18_feed_forward2_linear1_weight_to_fp16_palettized, x = input_1003_cast_fp16)[name = tensor("linear_170_cast_fp16")]; + tensor encoder_module_layers_18_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_18_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(463774976))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(467973504))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(467969344)))]; + tensor linear_170_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_18_feed_forward2_linear1_weight_to_fp16_quantized, x = input_1003_cast_fp16)[name = tensor("linear_170_cast_fp16")]; tensor input_1007_cast_fp16 = silu(x = linear_170_cast_fp16)[name = tensor("input_1007_cast_fp16")]; - tensor encoder_module_layers_18_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(467009536))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(471203904))), name = tensor("encoder_module_layers_18_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_171_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_18_feed_forward2_linear2_weight_to_fp16_palettized, x = input_1007_cast_fp16)[name = tensor("linear_171_cast_fp16")]; + tensor encoder_module_layers_18_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_18_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(467981760))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(472177216))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(472176128)))]; + tensor linear_171_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_18_feed_forward2_linear2_weight_to_fp16_quantized, x = input_1007_cast_fp16)[name = tensor("linear_171_cast_fp16")]; tensor var_3429_to_fp16 = const()[name = tensor("op_3429_to_fp16"), val = tensor(0x1p-1)]; tensor var_3430_cast_fp16 = mul(x = linear_171_cast_fp16, y = var_3429_to_fp16)[name = tensor("op_3430_cast_fp16")]; tensor input_1013_cast_fp16 = add(x = input_1001_cast_fp16, y = var_3430_cast_fp16)[name = tensor("input_1013_cast_fp16")]; tensor input_1015_axes_0 = const()[name = tensor("input_1015_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_18_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_18_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(471204480)))]; - tensor encoder_module_layers_18_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_18_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(471206592)))]; + tensor encoder_module_layers_18_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_18_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(472179328)))]; + tensor encoder_module_layers_18_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_18_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(472181440)))]; tensor input_1015_cast_fp16 = layer_norm(axes = input_1015_axes_0, beta = encoder_module_layers_18_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_18_norm_out_weight_to_fp16, x = input_1013_cast_fp16)[name = tensor("input_1015_cast_fp16")]; tensor input_1017_axes_0 = const()[name = tensor("input_1017_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_19_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_19_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(471208704)))]; - tensor encoder_module_layers_19_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_19_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(471210816)))]; + tensor encoder_module_layers_19_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_19_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(472183552)))]; + tensor encoder_module_layers_19_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_19_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(472185664)))]; tensor input_1017_cast_fp16 = layer_norm(axes = input_1017_axes_0, beta = encoder_module_layers_19_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_19_norm_feed_forward1_weight_to_fp16, x = input_1015_cast_fp16)[name = tensor("input_1017_cast_fp16")]; - tensor encoder_module_layers_19_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(471212928))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(475407296))), name = tensor("encoder_module_layers_19_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_172_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_19_feed_forward1_linear1_weight_to_fp16_palettized, x = input_1017_cast_fp16)[name = tensor("linear_172_cast_fp16")]; + tensor encoder_module_layers_19_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_19_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(472187776))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(476386304))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(476382144)))]; + tensor linear_172_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_19_feed_forward1_linear1_weight_to_fp16_quantized, x = input_1017_cast_fp16)[name = tensor("linear_172_cast_fp16")]; tensor input_1021_cast_fp16 = silu(x = linear_172_cast_fp16)[name = tensor("input_1021_cast_fp16")]; - tensor encoder_module_layers_19_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(475407872))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(479602240))), name = tensor("encoder_module_layers_19_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_173_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_19_feed_forward1_linear2_weight_to_fp16_palettized, x = input_1021_cast_fp16)[name = tensor("linear_173_cast_fp16")]; + tensor encoder_module_layers_19_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_19_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(476394560))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(480590016))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(480588928)))]; + tensor linear_173_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_19_feed_forward1_linear2_weight_to_fp16_quantized, x = input_1021_cast_fp16)[name = tensor("linear_173_cast_fp16")]; tensor var_3458_to_fp16 = const()[name = tensor("op_3458_to_fp16"), val = tensor(0x1p-1)]; tensor var_3459_cast_fp16 = mul(x = linear_173_cast_fp16, y = var_3458_to_fp16)[name = tensor("op_3459_cast_fp16")]; tensor input_1027_cast_fp16 = add(x = input_1015_cast_fp16, y = var_3459_cast_fp16)[name = tensor("input_1027_cast_fp16")]; tensor query_39_axes_0 = const()[name = tensor("query_39_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_19_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_19_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(479602816)))]; - tensor encoder_module_layers_19_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_19_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(479604928)))]; + tensor encoder_module_layers_19_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_19_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(480592128)))]; + tensor encoder_module_layers_19_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_19_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(480594240)))]; tensor query_39_cast_fp16 = layer_norm(axes = query_39_axes_0, beta = encoder_module_layers_19_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_19_norm_self_att_weight_to_fp16, x = input_1027_cast_fp16)[name = tensor("query_39_cast_fp16")]; - tensor encoder_module_layers_19_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(479607040))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(480655680))), name = tensor("encoder_module_layers_19_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_174_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_19_self_attn_linear_q_weight_to_fp16_palettized, x = query_39_cast_fp16)[name = tensor("linear_174_cast_fp16")]; + tensor encoder_module_layers_19_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_19_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(480596352))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(481646080))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(481644992)))]; + tensor linear_174_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_19_self_attn_linear_q_weight_to_fp16_quantized, x = query_39_cast_fp16)[name = tensor("linear_174_cast_fp16")]; tensor var_3475 = const()[name = tensor("op_3475"), val = tensor([1, -1, 8, 128])]; tensor q_115_cast_fp16 = reshape(shape = var_3475, x = linear_174_cast_fp16)[name = tensor("q_115_cast_fp16")]; - tensor encoder_module_layers_19_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(480656256))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(481704896))), name = tensor("encoder_module_layers_19_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_175_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_19_self_attn_linear_k_weight_to_fp16_palettized, x = query_39_cast_fp16)[name = tensor("linear_175_cast_fp16")]; + tensor encoder_module_layers_19_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_19_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(481648192))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(482697920))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(482696832)))]; + tensor linear_175_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_19_self_attn_linear_k_weight_to_fp16_quantized, x = query_39_cast_fp16)[name = tensor("linear_175_cast_fp16")]; tensor var_3479 = const()[name = tensor("op_3479"), val = tensor([1, -1, 8, 128])]; tensor k_77_cast_fp16 = reshape(shape = var_3479, x = linear_175_cast_fp16)[name = tensor("k_77_cast_fp16")]; - tensor encoder_module_layers_19_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(481705472))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(482754112))), name = tensor("encoder_module_layers_19_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_176_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_19_self_attn_linear_v_weight_to_fp16_palettized, x = query_39_cast_fp16)[name = tensor("linear_176_cast_fp16")]; + tensor encoder_module_layers_19_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_19_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(482700032))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(483749760))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(483748672)))]; + tensor linear_176_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_19_self_attn_linear_v_weight_to_fp16_quantized, x = query_39_cast_fp16)[name = tensor("linear_176_cast_fp16")]; tensor var_3483 = const()[name = tensor("op_3483"), val = tensor([1, -1, 8, 128])]; tensor v_39_cast_fp16 = reshape(shape = var_3483, x = linear_176_cast_fp16)[name = tensor("v_39_cast_fp16")]; tensor value_43_perm_0 = const()[name = tensor("value_43_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor encoder_module_layers_19_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_19_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(482754688)))]; + tensor encoder_module_layers_19_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_19_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(483751872)))]; tensor var_3495_cast_fp16 = add(x = q_115_cast_fp16, y = encoder_module_layers_19_self_attn_pos_bias_u_to_fp16)[name = tensor("op_3495_cast_fp16")]; - tensor encoder_module_layers_19_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_19_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(482756800)))]; + tensor encoder_module_layers_19_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_19_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(483753984)))]; tensor var_3497_cast_fp16 = add(x = q_115_cast_fp16, y = encoder_module_layers_19_self_attn_pos_bias_v_to_fp16)[name = tensor("op_3497_cast_fp16")]; tensor q_with_bias_v_39_perm_0 = const()[name = tensor("q_with_bias_v_39_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_441_transpose_x_0 = const()[name = tensor("x_441_transpose_x_0"), val = tensor(false)]; tensor x_441_transpose_y_0 = const()[name = tensor("x_441_transpose_y_0"), val = tensor(false)]; - tensor op_3499_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(482758912))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(483142976))), name = tensor("op_3499_to_fp16_palettized"), shape = tensor([1, 8, 128, 375])]; + tensor op_3499_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_3499_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(483756096))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(484140608))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(484140160)))]; tensor q_with_bias_v_39_cast_fp16 = transpose(perm = q_with_bias_v_39_perm_0, x = var_3497_cast_fp16)[name = tensor("transpose_179")]; - tensor x_441_cast_fp16 = matmul(transpose_x = x_441_transpose_x_0, transpose_y = x_441_transpose_y_0, x = q_with_bias_v_39_cast_fp16, y = op_3499_to_fp16_palettized)[name = tensor("x_441_cast_fp16")]; + tensor x_441_cast_fp16 = matmul(transpose_x = x_441_transpose_x_0, transpose_y = x_441_transpose_y_0, x = q_with_bias_v_39_cast_fp16, y = op_3499_to_fp16_quantized)[name = tensor("x_441_cast_fp16")]; tensor x_443_pad_0 = const()[name = tensor("x_443_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; tensor x_443_mode_0 = const()[name = tensor("x_443_mode_0"), val = tensor("constant")]; tensor const_219_to_fp16 = const()[name = tensor("const_219_to_fp16"), val = tensor(0x0p+0)]; @@ -2902,12 +2902,12 @@ program(1.0) tensor var_3532 = const()[name = tensor("op_3532"), val = tensor([1, -1, 1024])]; tensor var_3531_cast_fp16 = transpose(perm = var_3531_perm_0, x = x_447_cast_fp16)[name = tensor("transpose_175")]; tensor input_1031_cast_fp16 = reshape(shape = var_3532, x = var_3531_cast_fp16)[name = tensor("input_1031_cast_fp16")]; - tensor encoder_module_layers_19_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(483143552))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(484192192))), name = tensor("encoder_module_layers_19_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_178_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_19_self_attn_linear_out_weight_to_fp16_palettized, x = input_1031_cast_fp16)[name = tensor("linear_178_cast_fp16")]; + tensor encoder_module_layers_19_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_19_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(484141440))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(485191168))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(485190080)))]; + tensor linear_178_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_19_self_attn_linear_out_weight_to_fp16_quantized, x = input_1031_cast_fp16)[name = tensor("linear_178_cast_fp16")]; tensor input_1035_cast_fp16 = add(x = input_1027_cast_fp16, y = linear_178_cast_fp16)[name = tensor("input_1035_cast_fp16")]; tensor x_451_axes_0 = const()[name = tensor("x_451_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_19_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_19_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(484192768)))]; - tensor encoder_module_layers_19_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_19_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(484194880)))]; + tensor encoder_module_layers_19_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_19_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(485193280)))]; + tensor encoder_module_layers_19_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_19_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(485195392)))]; tensor x_451_cast_fp16 = layer_norm(axes = x_451_axes_0, beta = encoder_module_layers_19_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_19_norm_conv_weight_to_fp16, x = input_1035_cast_fp16)[name = tensor("x_451_cast_fp16")]; tensor input_1037_perm_0 = const()[name = tensor("input_1037_perm_0"), val = tensor([0, 2, 1])]; tensor input_1039_pad_type_0 = const()[name = tensor("input_1039_pad_type_0"), val = tensor("valid")]; @@ -2915,9 +2915,9 @@ program(1.0) tensor input_1039_pad_0 = const()[name = tensor("input_1039_pad_0"), val = tensor([0, 0])]; tensor input_1039_dilations_0 = const()[name = tensor("input_1039_dilations_0"), val = tensor([1])]; tensor input_1039_groups_0 = const()[name = tensor("input_1039_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_19_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(484196992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(486294208))), name = tensor("encoder_module_layers_19_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor([2048, 1024, 1])]; + tensor encoder_module_layers_19_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_19_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(485197504))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(487296832))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(487294720)))]; tensor input_1037_cast_fp16 = transpose(perm = input_1037_perm_0, x = x_451_cast_fp16)[name = tensor("transpose_174")]; - tensor input_1039_cast_fp16 = conv(dilations = input_1039_dilations_0, groups = input_1039_groups_0, pad = input_1039_pad_0, pad_type = input_1039_pad_type_0, strides = input_1039_strides_0, weight = encoder_module_layers_19_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_1037_cast_fp16)[name = tensor("input_1039_cast_fp16")]; + tensor input_1039_cast_fp16 = conv(dilations = input_1039_dilations_0, groups = input_1039_groups_0, pad = input_1039_pad_0, pad_type = input_1039_pad_type_0, strides = input_1039_strides_0, weight = encoder_module_layers_19_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_1037_cast_fp16)[name = tensor("input_1039_cast_fp16")]; tensor x_453_split_num_splits_0 = const()[name = tensor("x_453_split_num_splits_0"), val = tensor(2)]; tensor x_453_split_axis_0 = const()[name = tensor("x_453_split_axis_0"), val = tensor(1)]; tensor x_453_split_cast_fp16_0, tensor x_453_split_cast_fp16_1 = split(axis = x_453_split_axis_0, num_splits = x_453_split_num_splits_0, x = input_1039_cast_fp16)[name = tensor("x_453_split_cast_fp16")]; @@ -2933,75 +2933,75 @@ program(1.0) tensor input_1045_strides_0 = const()[name = tensor("input_1045_strides_0"), val = tensor([1])]; tensor input_1045_pad_0 = const()[name = tensor("input_1045_pad_0"), val = tensor([0, 0])]; tensor input_1045_dilations_0 = const()[name = tensor("input_1045_dilations_0"), val = tensor([1])]; - tensor const_301_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(486294784))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(486304064))), name = tensor("const_301_to_fp16_palettized"), shape = tensor([1024, 1, 9])]; - tensor const_302_to_fp16 = const()[name = tensor("const_302_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(486304640)))]; - tensor input_1047_cast_fp16 = conv(bias = const_302_to_fp16, dilations = input_1045_dilations_0, groups = input_1045_groups_0, pad = input_1045_pad_0, pad_type = input_1045_pad_type_0, strides = input_1045_strides_0, weight = const_301_to_fp16_palettized, x = input_1043_cast_fp16)[name = tensor("input_1047_cast_fp16")]; + tensor const_301_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_301_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(487300992))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(487311360))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(487310272)))]; + tensor const_302_to_fp16 = const()[name = tensor("const_302_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(487313472)))]; + tensor input_1047_cast_fp16 = conv(bias = const_302_to_fp16, dilations = input_1045_dilations_0, groups = input_1045_groups_0, pad = input_1045_pad_0, pad_type = input_1045_pad_type_0, strides = input_1045_strides_0, weight = const_301_to_fp16_quantized, x = input_1043_cast_fp16)[name = tensor("input_1047_cast_fp16")]; tensor input_1049_cast_fp16 = silu(x = input_1047_cast_fp16)[name = tensor("input_1049_cast_fp16")]; tensor x_455_pad_type_0 = const()[name = tensor("x_455_pad_type_0"), val = tensor("valid")]; tensor x_455_strides_0 = const()[name = tensor("x_455_strides_0"), val = tensor([1])]; tensor x_455_pad_0 = const()[name = tensor("x_455_pad_0"), val = tensor([0, 0])]; tensor x_455_dilations_0 = const()[name = tensor("x_455_dilations_0"), val = tensor([1])]; tensor x_455_groups_0 = const()[name = tensor("x_455_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_19_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(486306752))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(487355392))), name = tensor("encoder_module_layers_19_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor([1024, 1024, 1])]; - tensor x_455_cast_fp16 = conv(dilations = x_455_dilations_0, groups = x_455_groups_0, pad = x_455_pad_0, pad_type = x_455_pad_type_0, strides = x_455_strides_0, weight = encoder_module_layers_19_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_1049_cast_fp16)[name = tensor("x_455_cast_fp16")]; + tensor encoder_module_layers_19_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_19_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(487315584))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(488365312))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(488364224)))]; + tensor x_455_cast_fp16 = conv(dilations = x_455_dilations_0, groups = x_455_groups_0, pad = x_455_pad_0, pad_type = x_455_pad_type_0, strides = x_455_strides_0, weight = encoder_module_layers_19_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_1049_cast_fp16)[name = tensor("x_455_cast_fp16")]; tensor input_1051_perm_0 = const()[name = tensor("input_1051_perm_0"), val = tensor([0, 2, 1])]; tensor input_1051_cast_fp16 = transpose(perm = input_1051_perm_0, x = x_455_cast_fp16)[name = tensor("transpose_173")]; tensor input_1053_cast_fp16 = add(x = input_1035_cast_fp16, y = input_1051_cast_fp16)[name = tensor("input_1053_cast_fp16")]; tensor input_1055_axes_0 = const()[name = tensor("input_1055_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_19_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_19_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(487355968)))]; - tensor encoder_module_layers_19_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_19_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(487358080)))]; + tensor encoder_module_layers_19_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_19_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(488367424)))]; + tensor encoder_module_layers_19_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_19_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(488369536)))]; tensor input_1055_cast_fp16 = layer_norm(axes = input_1055_axes_0, beta = encoder_module_layers_19_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_19_norm_feed_forward2_weight_to_fp16, x = input_1053_cast_fp16)[name = tensor("input_1055_cast_fp16")]; - tensor encoder_module_layers_19_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(487360192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(491554560))), name = tensor("encoder_module_layers_19_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_179_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_19_feed_forward2_linear1_weight_to_fp16_palettized, x = input_1055_cast_fp16)[name = tensor("linear_179_cast_fp16")]; + tensor encoder_module_layers_19_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_19_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(488371648))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(492570176))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(492566016)))]; + tensor linear_179_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_19_feed_forward2_linear1_weight_to_fp16_quantized, x = input_1055_cast_fp16)[name = tensor("linear_179_cast_fp16")]; tensor input_1059_cast_fp16 = silu(x = linear_179_cast_fp16)[name = tensor("input_1059_cast_fp16")]; - tensor encoder_module_layers_19_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(491555136))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(495749504))), name = tensor("encoder_module_layers_19_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_180_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_19_feed_forward2_linear2_weight_to_fp16_palettized, x = input_1059_cast_fp16)[name = tensor("linear_180_cast_fp16")]; + tensor encoder_module_layers_19_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_19_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(492578432))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(496773888))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(496772800)))]; + tensor linear_180_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_19_feed_forward2_linear2_weight_to_fp16_quantized, x = input_1059_cast_fp16)[name = tensor("linear_180_cast_fp16")]; tensor var_3592_to_fp16 = const()[name = tensor("op_3592_to_fp16"), val = tensor(0x1p-1)]; tensor var_3593_cast_fp16 = mul(x = linear_180_cast_fp16, y = var_3592_to_fp16)[name = tensor("op_3593_cast_fp16")]; tensor input_1065_cast_fp16 = add(x = input_1053_cast_fp16, y = var_3593_cast_fp16)[name = tensor("input_1065_cast_fp16")]; tensor input_1067_axes_0 = const()[name = tensor("input_1067_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_19_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_19_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(495750080)))]; - tensor encoder_module_layers_19_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_19_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(495752192)))]; + tensor encoder_module_layers_19_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_19_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(496776000)))]; + tensor encoder_module_layers_19_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_19_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(496778112)))]; tensor input_1067_cast_fp16 = layer_norm(axes = input_1067_axes_0, beta = encoder_module_layers_19_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_19_norm_out_weight_to_fp16, x = input_1065_cast_fp16)[name = tensor("input_1067_cast_fp16")]; tensor input_1069_axes_0 = const()[name = tensor("input_1069_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_20_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_20_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(495754304)))]; - tensor encoder_module_layers_20_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_20_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(495756416)))]; + tensor encoder_module_layers_20_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_20_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(496780224)))]; + tensor encoder_module_layers_20_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_20_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(496782336)))]; tensor input_1069_cast_fp16 = layer_norm(axes = input_1069_axes_0, beta = encoder_module_layers_20_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_20_norm_feed_forward1_weight_to_fp16, x = input_1067_cast_fp16)[name = tensor("input_1069_cast_fp16")]; - tensor encoder_module_layers_20_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(495758528))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(499952896))), name = tensor("encoder_module_layers_20_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_181_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_20_feed_forward1_linear1_weight_to_fp16_palettized, x = input_1069_cast_fp16)[name = tensor("linear_181_cast_fp16")]; + tensor encoder_module_layers_20_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_20_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(496784448))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(500982976))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(500978816)))]; + tensor linear_181_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_20_feed_forward1_linear1_weight_to_fp16_quantized, x = input_1069_cast_fp16)[name = tensor("linear_181_cast_fp16")]; tensor input_1073_cast_fp16 = silu(x = linear_181_cast_fp16)[name = tensor("input_1073_cast_fp16")]; - tensor encoder_module_layers_20_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(499953472))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(504147840))), name = tensor("encoder_module_layers_20_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_182_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_20_feed_forward1_linear2_weight_to_fp16_palettized, x = input_1073_cast_fp16)[name = tensor("linear_182_cast_fp16")]; + tensor encoder_module_layers_20_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_20_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(500991232))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(505186688))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(505185600)))]; + tensor linear_182_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_20_feed_forward1_linear2_weight_to_fp16_quantized, x = input_1073_cast_fp16)[name = tensor("linear_182_cast_fp16")]; tensor var_3621_to_fp16 = const()[name = tensor("op_3621_to_fp16"), val = tensor(0x1p-1)]; tensor var_3622_cast_fp16 = mul(x = linear_182_cast_fp16, y = var_3621_to_fp16)[name = tensor("op_3622_cast_fp16")]; tensor input_1079_cast_fp16 = add(x = input_1067_cast_fp16, y = var_3622_cast_fp16)[name = tensor("input_1079_cast_fp16")]; tensor query_41_axes_0 = const()[name = tensor("query_41_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_20_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_20_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(504148416)))]; - tensor encoder_module_layers_20_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_20_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(504150528)))]; + tensor encoder_module_layers_20_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_20_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(505188800)))]; + tensor encoder_module_layers_20_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_20_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(505190912)))]; tensor query_41_cast_fp16 = layer_norm(axes = query_41_axes_0, beta = encoder_module_layers_20_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_20_norm_self_att_weight_to_fp16, x = input_1079_cast_fp16)[name = tensor("query_41_cast_fp16")]; - tensor encoder_module_layers_20_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(504152640))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(505201280))), name = tensor("encoder_module_layers_20_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_183_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_20_self_attn_linear_q_weight_to_fp16_palettized, x = query_41_cast_fp16)[name = tensor("linear_183_cast_fp16")]; + tensor encoder_module_layers_20_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_20_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(505193024))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(506242752))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(506241664)))]; + tensor linear_183_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_20_self_attn_linear_q_weight_to_fp16_quantized, x = query_41_cast_fp16)[name = tensor("linear_183_cast_fp16")]; tensor var_3638 = const()[name = tensor("op_3638"), val = tensor([1, -1, 8, 128])]; tensor q_121_cast_fp16 = reshape(shape = var_3638, x = linear_183_cast_fp16)[name = tensor("q_121_cast_fp16")]; - tensor encoder_module_layers_20_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(505201856))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(506250496))), name = tensor("encoder_module_layers_20_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_184_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_20_self_attn_linear_k_weight_to_fp16_palettized, x = query_41_cast_fp16)[name = tensor("linear_184_cast_fp16")]; + tensor encoder_module_layers_20_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_20_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(506244864))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(507294592))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(507293504)))]; + tensor linear_184_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_20_self_attn_linear_k_weight_to_fp16_quantized, x = query_41_cast_fp16)[name = tensor("linear_184_cast_fp16")]; tensor var_3642 = const()[name = tensor("op_3642"), val = tensor([1, -1, 8, 128])]; tensor k_81_cast_fp16 = reshape(shape = var_3642, x = linear_184_cast_fp16)[name = tensor("k_81_cast_fp16")]; - tensor encoder_module_layers_20_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(506251072))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(507299712))), name = tensor("encoder_module_layers_20_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_185_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_20_self_attn_linear_v_weight_to_fp16_palettized, x = query_41_cast_fp16)[name = tensor("linear_185_cast_fp16")]; + tensor encoder_module_layers_20_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_20_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(507296704))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(508346432))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(508345344)))]; + tensor linear_185_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_20_self_attn_linear_v_weight_to_fp16_quantized, x = query_41_cast_fp16)[name = tensor("linear_185_cast_fp16")]; tensor var_3646 = const()[name = tensor("op_3646"), val = tensor([1, -1, 8, 128])]; tensor v_41_cast_fp16 = reshape(shape = var_3646, x = linear_185_cast_fp16)[name = tensor("v_41_cast_fp16")]; tensor value_45_perm_0 = const()[name = tensor("value_45_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor encoder_module_layers_20_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_20_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(507300288)))]; + tensor encoder_module_layers_20_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_20_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(508348544)))]; tensor var_3658_cast_fp16 = add(x = q_121_cast_fp16, y = encoder_module_layers_20_self_attn_pos_bias_u_to_fp16)[name = tensor("op_3658_cast_fp16")]; - tensor encoder_module_layers_20_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_20_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(507302400)))]; + tensor encoder_module_layers_20_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_20_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(508350656)))]; tensor var_3660_cast_fp16 = add(x = q_121_cast_fp16, y = encoder_module_layers_20_self_attn_pos_bias_v_to_fp16)[name = tensor("op_3660_cast_fp16")]; tensor q_with_bias_v_41_perm_0 = const()[name = tensor("q_with_bias_v_41_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_463_transpose_x_0 = const()[name = tensor("x_463_transpose_x_0"), val = tensor(false)]; tensor x_463_transpose_y_0 = const()[name = tensor("x_463_transpose_y_0"), val = tensor(false)]; - tensor op_3662_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(507304512))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(507688576))), name = tensor("op_3662_to_fp16_palettized"), shape = tensor([1, 8, 128, 375])]; + tensor op_3662_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_3662_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(508352768))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(508737280))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(508736832)))]; tensor q_with_bias_v_41_cast_fp16 = transpose(perm = q_with_bias_v_41_perm_0, x = var_3660_cast_fp16)[name = tensor("transpose_172")]; - tensor x_463_cast_fp16 = matmul(transpose_x = x_463_transpose_x_0, transpose_y = x_463_transpose_y_0, x = q_with_bias_v_41_cast_fp16, y = op_3662_to_fp16_palettized)[name = tensor("x_463_cast_fp16")]; + tensor x_463_cast_fp16 = matmul(transpose_x = x_463_transpose_x_0, transpose_y = x_463_transpose_y_0, x = q_with_bias_v_41_cast_fp16, y = op_3662_to_fp16_quantized)[name = tensor("x_463_cast_fp16")]; tensor x_465_pad_0 = const()[name = tensor("x_465_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; tensor x_465_mode_0 = const()[name = tensor("x_465_mode_0"), val = tensor("constant")]; tensor const_229_to_fp16 = const()[name = tensor("const_229_to_fp16"), val = tensor(0x0p+0)]; @@ -3039,12 +3039,12 @@ program(1.0) tensor var_3695 = const()[name = tensor("op_3695"), val = tensor([1, -1, 1024])]; tensor var_3694_cast_fp16 = transpose(perm = var_3694_perm_0, x = x_469_cast_fp16)[name = tensor("transpose_168")]; tensor input_1083_cast_fp16 = reshape(shape = var_3695, x = var_3694_cast_fp16)[name = tensor("input_1083_cast_fp16")]; - tensor encoder_module_layers_20_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(507689152))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(508737792))), name = tensor("encoder_module_layers_20_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_187_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_20_self_attn_linear_out_weight_to_fp16_palettized, x = input_1083_cast_fp16)[name = tensor("linear_187_cast_fp16")]; + tensor encoder_module_layers_20_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_20_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(508738112))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(509787840))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(509786752)))]; + tensor linear_187_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_20_self_attn_linear_out_weight_to_fp16_quantized, x = input_1083_cast_fp16)[name = tensor("linear_187_cast_fp16")]; tensor input_1087_cast_fp16 = add(x = input_1079_cast_fp16, y = linear_187_cast_fp16)[name = tensor("input_1087_cast_fp16")]; tensor x_473_axes_0 = const()[name = tensor("x_473_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_20_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_20_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(508738368)))]; - tensor encoder_module_layers_20_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_20_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(508740480)))]; + tensor encoder_module_layers_20_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_20_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(509789952)))]; + tensor encoder_module_layers_20_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_20_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(509792064)))]; tensor x_473_cast_fp16 = layer_norm(axes = x_473_axes_0, beta = encoder_module_layers_20_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_20_norm_conv_weight_to_fp16, x = input_1087_cast_fp16)[name = tensor("x_473_cast_fp16")]; tensor input_1089_perm_0 = const()[name = tensor("input_1089_perm_0"), val = tensor([0, 2, 1])]; tensor input_1091_pad_type_0 = const()[name = tensor("input_1091_pad_type_0"), val = tensor("valid")]; @@ -3052,9 +3052,9 @@ program(1.0) tensor input_1091_pad_0 = const()[name = tensor("input_1091_pad_0"), val = tensor([0, 0])]; tensor input_1091_dilations_0 = const()[name = tensor("input_1091_dilations_0"), val = tensor([1])]; tensor input_1091_groups_0 = const()[name = tensor("input_1091_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_20_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(508742592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(510839808))), name = tensor("encoder_module_layers_20_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor([2048, 1024, 1])]; + tensor encoder_module_layers_20_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_20_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(509794176))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(511893504))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(511891392)))]; tensor input_1089_cast_fp16 = transpose(perm = input_1089_perm_0, x = x_473_cast_fp16)[name = tensor("transpose_167")]; - tensor input_1091_cast_fp16 = conv(dilations = input_1091_dilations_0, groups = input_1091_groups_0, pad = input_1091_pad_0, pad_type = input_1091_pad_type_0, strides = input_1091_strides_0, weight = encoder_module_layers_20_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_1089_cast_fp16)[name = tensor("input_1091_cast_fp16")]; + tensor input_1091_cast_fp16 = conv(dilations = input_1091_dilations_0, groups = input_1091_groups_0, pad = input_1091_pad_0, pad_type = input_1091_pad_type_0, strides = input_1091_strides_0, weight = encoder_module_layers_20_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_1089_cast_fp16)[name = tensor("input_1091_cast_fp16")]; tensor x_475_split_num_splits_0 = const()[name = tensor("x_475_split_num_splits_0"), val = tensor(2)]; tensor x_475_split_axis_0 = const()[name = tensor("x_475_split_axis_0"), val = tensor(1)]; tensor x_475_split_cast_fp16_0, tensor x_475_split_cast_fp16_1 = split(axis = x_475_split_axis_0, num_splits = x_475_split_num_splits_0, x = input_1091_cast_fp16)[name = tensor("x_475_split_cast_fp16")]; @@ -3070,75 +3070,75 @@ program(1.0) tensor input_1097_strides_0 = const()[name = tensor("input_1097_strides_0"), val = tensor([1])]; tensor input_1097_pad_0 = const()[name = tensor("input_1097_pad_0"), val = tensor([0, 0])]; tensor input_1097_dilations_0 = const()[name = tensor("input_1097_dilations_0"), val = tensor([1])]; - tensor const_303_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(510840384))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(510849664))), name = tensor("const_303_to_fp16_palettized"), shape = tensor([1024, 1, 9])]; - tensor const_304_to_fp16 = const()[name = tensor("const_304_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(510850240)))]; - tensor input_1099_cast_fp16 = conv(bias = const_304_to_fp16, dilations = input_1097_dilations_0, groups = input_1097_groups_0, pad = input_1097_pad_0, pad_type = input_1097_pad_type_0, strides = input_1097_strides_0, weight = const_303_to_fp16_palettized, x = input_1095_cast_fp16)[name = tensor("input_1099_cast_fp16")]; + tensor const_303_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_303_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(511897664))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(511908032))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(511906944)))]; + tensor const_304_to_fp16 = const()[name = tensor("const_304_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(511910144)))]; + tensor input_1099_cast_fp16 = conv(bias = const_304_to_fp16, dilations = input_1097_dilations_0, groups = input_1097_groups_0, pad = input_1097_pad_0, pad_type = input_1097_pad_type_0, strides = input_1097_strides_0, weight = const_303_to_fp16_quantized, x = input_1095_cast_fp16)[name = tensor("input_1099_cast_fp16")]; tensor input_1101_cast_fp16 = silu(x = input_1099_cast_fp16)[name = tensor("input_1101_cast_fp16")]; tensor x_477_pad_type_0 = const()[name = tensor("x_477_pad_type_0"), val = tensor("valid")]; tensor x_477_strides_0 = const()[name = tensor("x_477_strides_0"), val = tensor([1])]; tensor x_477_pad_0 = const()[name = tensor("x_477_pad_0"), val = tensor([0, 0])]; tensor x_477_dilations_0 = const()[name = tensor("x_477_dilations_0"), val = tensor([1])]; tensor x_477_groups_0 = const()[name = tensor("x_477_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_20_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(510852352))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(511900992))), name = tensor("encoder_module_layers_20_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor([1024, 1024, 1])]; - tensor x_477_cast_fp16 = conv(dilations = x_477_dilations_0, groups = x_477_groups_0, pad = x_477_pad_0, pad_type = x_477_pad_type_0, strides = x_477_strides_0, weight = encoder_module_layers_20_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_1101_cast_fp16)[name = tensor("x_477_cast_fp16")]; + tensor encoder_module_layers_20_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_20_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(511912256))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(512961984))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(512960896)))]; + tensor x_477_cast_fp16 = conv(dilations = x_477_dilations_0, groups = x_477_groups_0, pad = x_477_pad_0, pad_type = x_477_pad_type_0, strides = x_477_strides_0, weight = encoder_module_layers_20_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_1101_cast_fp16)[name = tensor("x_477_cast_fp16")]; tensor input_1103_perm_0 = const()[name = tensor("input_1103_perm_0"), val = tensor([0, 2, 1])]; tensor input_1103_cast_fp16 = transpose(perm = input_1103_perm_0, x = x_477_cast_fp16)[name = tensor("transpose_166")]; tensor input_1105_cast_fp16 = add(x = input_1087_cast_fp16, y = input_1103_cast_fp16)[name = tensor("input_1105_cast_fp16")]; tensor input_1107_axes_0 = const()[name = tensor("input_1107_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_20_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_20_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(511901568)))]; - tensor encoder_module_layers_20_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_20_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(511903680)))]; + tensor encoder_module_layers_20_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_20_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(512964096)))]; + tensor encoder_module_layers_20_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_20_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(512966208)))]; tensor input_1107_cast_fp16 = layer_norm(axes = input_1107_axes_0, beta = encoder_module_layers_20_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_20_norm_feed_forward2_weight_to_fp16, x = input_1105_cast_fp16)[name = tensor("input_1107_cast_fp16")]; - tensor encoder_module_layers_20_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(511905792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(516100160))), name = tensor("encoder_module_layers_20_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_188_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_20_feed_forward2_linear1_weight_to_fp16_palettized, x = input_1107_cast_fp16)[name = tensor("linear_188_cast_fp16")]; + tensor encoder_module_layers_20_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_20_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(512968320))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(517166848))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(517162688)))]; + tensor linear_188_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_20_feed_forward2_linear1_weight_to_fp16_quantized, x = input_1107_cast_fp16)[name = tensor("linear_188_cast_fp16")]; tensor input_1111_cast_fp16 = silu(x = linear_188_cast_fp16)[name = tensor("input_1111_cast_fp16")]; - tensor encoder_module_layers_20_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(516100736))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(520295104))), name = tensor("encoder_module_layers_20_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_189_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_20_feed_forward2_linear2_weight_to_fp16_palettized, x = input_1111_cast_fp16)[name = tensor("linear_189_cast_fp16")]; + tensor encoder_module_layers_20_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_20_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(517175104))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(521370560))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(521369472)))]; + tensor linear_189_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_20_feed_forward2_linear2_weight_to_fp16_quantized, x = input_1111_cast_fp16)[name = tensor("linear_189_cast_fp16")]; tensor var_3755_to_fp16 = const()[name = tensor("op_3755_to_fp16"), val = tensor(0x1p-1)]; tensor var_3756_cast_fp16 = mul(x = linear_189_cast_fp16, y = var_3755_to_fp16)[name = tensor("op_3756_cast_fp16")]; tensor input_1117_cast_fp16 = add(x = input_1105_cast_fp16, y = var_3756_cast_fp16)[name = tensor("input_1117_cast_fp16")]; tensor input_1119_axes_0 = const()[name = tensor("input_1119_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_20_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_20_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(520295680)))]; - tensor encoder_module_layers_20_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_20_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(520297792)))]; + tensor encoder_module_layers_20_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_20_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(521372672)))]; + tensor encoder_module_layers_20_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_20_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(521374784)))]; tensor input_1119_cast_fp16 = layer_norm(axes = input_1119_axes_0, beta = encoder_module_layers_20_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_20_norm_out_weight_to_fp16, x = input_1117_cast_fp16)[name = tensor("input_1119_cast_fp16")]; tensor input_1121_axes_0 = const()[name = tensor("input_1121_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_21_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_21_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(520299904)))]; - tensor encoder_module_layers_21_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_21_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(520302016)))]; + tensor encoder_module_layers_21_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_21_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(521376896)))]; + tensor encoder_module_layers_21_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_21_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(521379008)))]; tensor input_1121_cast_fp16 = layer_norm(axes = input_1121_axes_0, beta = encoder_module_layers_21_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_21_norm_feed_forward1_weight_to_fp16, x = input_1119_cast_fp16)[name = tensor("input_1121_cast_fp16")]; - tensor encoder_module_layers_21_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(520304128))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(524498496))), name = tensor("encoder_module_layers_21_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_190_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_21_feed_forward1_linear1_weight_to_fp16_palettized, x = input_1121_cast_fp16)[name = tensor("linear_190_cast_fp16")]; + tensor encoder_module_layers_21_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_21_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(521381120))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(525579648))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(525575488)))]; + tensor linear_190_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_21_feed_forward1_linear1_weight_to_fp16_quantized, x = input_1121_cast_fp16)[name = tensor("linear_190_cast_fp16")]; tensor input_1125_cast_fp16 = silu(x = linear_190_cast_fp16)[name = tensor("input_1125_cast_fp16")]; - tensor encoder_module_layers_21_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(524499072))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(528693440))), name = tensor("encoder_module_layers_21_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_191_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_21_feed_forward1_linear2_weight_to_fp16_palettized, x = input_1125_cast_fp16)[name = tensor("linear_191_cast_fp16")]; + tensor encoder_module_layers_21_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_21_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(525587904))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(529783360))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(529782272)))]; + tensor linear_191_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_21_feed_forward1_linear2_weight_to_fp16_quantized, x = input_1125_cast_fp16)[name = tensor("linear_191_cast_fp16")]; tensor var_3784_to_fp16 = const()[name = tensor("op_3784_to_fp16"), val = tensor(0x1p-1)]; tensor var_3785_cast_fp16 = mul(x = linear_191_cast_fp16, y = var_3784_to_fp16)[name = tensor("op_3785_cast_fp16")]; tensor input_1131_cast_fp16 = add(x = input_1119_cast_fp16, y = var_3785_cast_fp16)[name = tensor("input_1131_cast_fp16")]; tensor query_43_axes_0 = const()[name = tensor("query_43_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_21_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_21_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(528694016)))]; - tensor encoder_module_layers_21_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_21_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(528696128)))]; + tensor encoder_module_layers_21_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_21_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(529785472)))]; + tensor encoder_module_layers_21_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_21_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(529787584)))]; tensor query_43_cast_fp16 = layer_norm(axes = query_43_axes_0, beta = encoder_module_layers_21_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_21_norm_self_att_weight_to_fp16, x = input_1131_cast_fp16)[name = tensor("query_43_cast_fp16")]; - tensor encoder_module_layers_21_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(528698240))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(529746880))), name = tensor("encoder_module_layers_21_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_192_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_21_self_attn_linear_q_weight_to_fp16_palettized, x = query_43_cast_fp16)[name = tensor("linear_192_cast_fp16")]; + tensor encoder_module_layers_21_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_21_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(529789696))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(530839424))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(530838336)))]; + tensor linear_192_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_21_self_attn_linear_q_weight_to_fp16_quantized, x = query_43_cast_fp16)[name = tensor("linear_192_cast_fp16")]; tensor var_3801 = const()[name = tensor("op_3801"), val = tensor([1, -1, 8, 128])]; tensor q_127_cast_fp16 = reshape(shape = var_3801, x = linear_192_cast_fp16)[name = tensor("q_127_cast_fp16")]; - tensor encoder_module_layers_21_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(529747456))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(530796096))), name = tensor("encoder_module_layers_21_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_193_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_21_self_attn_linear_k_weight_to_fp16_palettized, x = query_43_cast_fp16)[name = tensor("linear_193_cast_fp16")]; + tensor encoder_module_layers_21_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_21_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(530841536))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(531891264))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(531890176)))]; + tensor linear_193_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_21_self_attn_linear_k_weight_to_fp16_quantized, x = query_43_cast_fp16)[name = tensor("linear_193_cast_fp16")]; tensor var_3805 = const()[name = tensor("op_3805"), val = tensor([1, -1, 8, 128])]; tensor k_85_cast_fp16 = reshape(shape = var_3805, x = linear_193_cast_fp16)[name = tensor("k_85_cast_fp16")]; - tensor encoder_module_layers_21_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(530796672))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(531845312))), name = tensor("encoder_module_layers_21_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_194_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_21_self_attn_linear_v_weight_to_fp16_palettized, x = query_43_cast_fp16)[name = tensor("linear_194_cast_fp16")]; + tensor encoder_module_layers_21_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_21_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(531893376))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(532943104))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(532942016)))]; + tensor linear_194_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_21_self_attn_linear_v_weight_to_fp16_quantized, x = query_43_cast_fp16)[name = tensor("linear_194_cast_fp16")]; tensor var_3809 = const()[name = tensor("op_3809"), val = tensor([1, -1, 8, 128])]; tensor v_43_cast_fp16 = reshape(shape = var_3809, x = linear_194_cast_fp16)[name = tensor("v_43_cast_fp16")]; tensor value_47_perm_0 = const()[name = tensor("value_47_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor encoder_module_layers_21_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_21_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(531845888)))]; + tensor encoder_module_layers_21_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_21_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(532945216)))]; tensor var_3821_cast_fp16 = add(x = q_127_cast_fp16, y = encoder_module_layers_21_self_attn_pos_bias_u_to_fp16)[name = tensor("op_3821_cast_fp16")]; - tensor encoder_module_layers_21_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_21_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(531848000)))]; + tensor encoder_module_layers_21_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_21_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(532947328)))]; tensor var_3823_cast_fp16 = add(x = q_127_cast_fp16, y = encoder_module_layers_21_self_attn_pos_bias_v_to_fp16)[name = tensor("op_3823_cast_fp16")]; tensor q_with_bias_v_43_perm_0 = const()[name = tensor("q_with_bias_v_43_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_485_transpose_x_0 = const()[name = tensor("x_485_transpose_x_0"), val = tensor(false)]; tensor x_485_transpose_y_0 = const()[name = tensor("x_485_transpose_y_0"), val = tensor(false)]; - tensor op_3825_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(531850112))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(532234176))), name = tensor("op_3825_to_fp16_palettized"), shape = tensor([1, 8, 128, 375])]; + tensor op_3825_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_3825_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(532949440))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(533333952))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(533333504)))]; tensor q_with_bias_v_43_cast_fp16 = transpose(perm = q_with_bias_v_43_perm_0, x = var_3823_cast_fp16)[name = tensor("transpose_165")]; - tensor x_485_cast_fp16 = matmul(transpose_x = x_485_transpose_x_0, transpose_y = x_485_transpose_y_0, x = q_with_bias_v_43_cast_fp16, y = op_3825_to_fp16_palettized)[name = tensor("x_485_cast_fp16")]; + tensor x_485_cast_fp16 = matmul(transpose_x = x_485_transpose_x_0, transpose_y = x_485_transpose_y_0, x = q_with_bias_v_43_cast_fp16, y = op_3825_to_fp16_quantized)[name = tensor("x_485_cast_fp16")]; tensor x_487_pad_0 = const()[name = tensor("x_487_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; tensor x_487_mode_0 = const()[name = tensor("x_487_mode_0"), val = tensor("constant")]; tensor const_239_to_fp16 = const()[name = tensor("const_239_to_fp16"), val = tensor(0x0p+0)]; @@ -3176,12 +3176,12 @@ program(1.0) tensor var_3858 = const()[name = tensor("op_3858"), val = tensor([1, -1, 1024])]; tensor var_3857_cast_fp16 = transpose(perm = var_3857_perm_0, x = x_491_cast_fp16)[name = tensor("transpose_161")]; tensor input_1135_cast_fp16 = reshape(shape = var_3858, x = var_3857_cast_fp16)[name = tensor("input_1135_cast_fp16")]; - tensor encoder_module_layers_21_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(532234752))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(533283392))), name = tensor("encoder_module_layers_21_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_196_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_21_self_attn_linear_out_weight_to_fp16_palettized, x = input_1135_cast_fp16)[name = tensor("linear_196_cast_fp16")]; + tensor encoder_module_layers_21_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_21_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(533334784))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(534384512))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(534383424)))]; + tensor linear_196_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_21_self_attn_linear_out_weight_to_fp16_quantized, x = input_1135_cast_fp16)[name = tensor("linear_196_cast_fp16")]; tensor input_1139_cast_fp16 = add(x = input_1131_cast_fp16, y = linear_196_cast_fp16)[name = tensor("input_1139_cast_fp16")]; tensor x_495_axes_0 = const()[name = tensor("x_495_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_21_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_21_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(533283968)))]; - tensor encoder_module_layers_21_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_21_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(533286080)))]; + tensor encoder_module_layers_21_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_21_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(534386624)))]; + tensor encoder_module_layers_21_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_21_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(534388736)))]; tensor x_495_cast_fp16 = layer_norm(axes = x_495_axes_0, beta = encoder_module_layers_21_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_21_norm_conv_weight_to_fp16, x = input_1139_cast_fp16)[name = tensor("x_495_cast_fp16")]; tensor input_1141_perm_0 = const()[name = tensor("input_1141_perm_0"), val = tensor([0, 2, 1])]; tensor input_1143_pad_type_0 = const()[name = tensor("input_1143_pad_type_0"), val = tensor("valid")]; @@ -3189,9 +3189,9 @@ program(1.0) tensor input_1143_pad_0 = const()[name = tensor("input_1143_pad_0"), val = tensor([0, 0])]; tensor input_1143_dilations_0 = const()[name = tensor("input_1143_dilations_0"), val = tensor([1])]; tensor input_1143_groups_0 = const()[name = tensor("input_1143_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_21_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(533288192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(535385408))), name = tensor("encoder_module_layers_21_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor([2048, 1024, 1])]; + tensor encoder_module_layers_21_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_21_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(534390848))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(536490176))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(536488064)))]; tensor input_1141_cast_fp16 = transpose(perm = input_1141_perm_0, x = x_495_cast_fp16)[name = tensor("transpose_160")]; - tensor input_1143_cast_fp16 = conv(dilations = input_1143_dilations_0, groups = input_1143_groups_0, pad = input_1143_pad_0, pad_type = input_1143_pad_type_0, strides = input_1143_strides_0, weight = encoder_module_layers_21_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_1141_cast_fp16)[name = tensor("input_1143_cast_fp16")]; + tensor input_1143_cast_fp16 = conv(dilations = input_1143_dilations_0, groups = input_1143_groups_0, pad = input_1143_pad_0, pad_type = input_1143_pad_type_0, strides = input_1143_strides_0, weight = encoder_module_layers_21_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_1141_cast_fp16)[name = tensor("input_1143_cast_fp16")]; tensor x_497_split_num_splits_0 = const()[name = tensor("x_497_split_num_splits_0"), val = tensor(2)]; tensor x_497_split_axis_0 = const()[name = tensor("x_497_split_axis_0"), val = tensor(1)]; tensor x_497_split_cast_fp16_0, tensor x_497_split_cast_fp16_1 = split(axis = x_497_split_axis_0, num_splits = x_497_split_num_splits_0, x = input_1143_cast_fp16)[name = tensor("x_497_split_cast_fp16")]; @@ -3207,75 +3207,75 @@ program(1.0) tensor input_1149_strides_0 = const()[name = tensor("input_1149_strides_0"), val = tensor([1])]; tensor input_1149_pad_0 = const()[name = tensor("input_1149_pad_0"), val = tensor([0, 0])]; tensor input_1149_dilations_0 = const()[name = tensor("input_1149_dilations_0"), val = tensor([1])]; - tensor const_305_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(535385984))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(535395264))), name = tensor("const_305_to_fp16_palettized"), shape = tensor([1024, 1, 9])]; - tensor const_306_to_fp16 = const()[name = tensor("const_306_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(535395840)))]; - tensor input_1151_cast_fp16 = conv(bias = const_306_to_fp16, dilations = input_1149_dilations_0, groups = input_1149_groups_0, pad = input_1149_pad_0, pad_type = input_1149_pad_type_0, strides = input_1149_strides_0, weight = const_305_to_fp16_palettized, x = input_1147_cast_fp16)[name = tensor("input_1151_cast_fp16")]; + tensor const_305_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_305_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(536494336))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(536504704))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(536503616)))]; + tensor const_306_to_fp16 = const()[name = tensor("const_306_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(536506816)))]; + tensor input_1151_cast_fp16 = conv(bias = const_306_to_fp16, dilations = input_1149_dilations_0, groups = input_1149_groups_0, pad = input_1149_pad_0, pad_type = input_1149_pad_type_0, strides = input_1149_strides_0, weight = const_305_to_fp16_quantized, x = input_1147_cast_fp16)[name = tensor("input_1151_cast_fp16")]; tensor input_1153_cast_fp16 = silu(x = input_1151_cast_fp16)[name = tensor("input_1153_cast_fp16")]; tensor x_499_pad_type_0 = const()[name = tensor("x_499_pad_type_0"), val = tensor("valid")]; tensor x_499_strides_0 = const()[name = tensor("x_499_strides_0"), val = tensor([1])]; tensor x_499_pad_0 = const()[name = tensor("x_499_pad_0"), val = tensor([0, 0])]; tensor x_499_dilations_0 = const()[name = tensor("x_499_dilations_0"), val = tensor([1])]; tensor x_499_groups_0 = const()[name = tensor("x_499_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_21_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(535397952))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(536446592))), name = tensor("encoder_module_layers_21_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor([1024, 1024, 1])]; - tensor x_499_cast_fp16 = conv(dilations = x_499_dilations_0, groups = x_499_groups_0, pad = x_499_pad_0, pad_type = x_499_pad_type_0, strides = x_499_strides_0, weight = encoder_module_layers_21_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_1153_cast_fp16)[name = tensor("x_499_cast_fp16")]; + tensor encoder_module_layers_21_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_21_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(536508928))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(537558656))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(537557568)))]; + tensor x_499_cast_fp16 = conv(dilations = x_499_dilations_0, groups = x_499_groups_0, pad = x_499_pad_0, pad_type = x_499_pad_type_0, strides = x_499_strides_0, weight = encoder_module_layers_21_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_1153_cast_fp16)[name = tensor("x_499_cast_fp16")]; tensor input_1155_perm_0 = const()[name = tensor("input_1155_perm_0"), val = tensor([0, 2, 1])]; tensor input_1155_cast_fp16 = transpose(perm = input_1155_perm_0, x = x_499_cast_fp16)[name = tensor("transpose_159")]; tensor input_1157_cast_fp16 = add(x = input_1139_cast_fp16, y = input_1155_cast_fp16)[name = tensor("input_1157_cast_fp16")]; tensor input_1159_axes_0 = const()[name = tensor("input_1159_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_21_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_21_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(536447168)))]; - tensor encoder_module_layers_21_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_21_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(536449280)))]; + tensor encoder_module_layers_21_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_21_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(537560768)))]; + tensor encoder_module_layers_21_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_21_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(537562880)))]; tensor input_1159_cast_fp16 = layer_norm(axes = input_1159_axes_0, beta = encoder_module_layers_21_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_21_norm_feed_forward2_weight_to_fp16, x = input_1157_cast_fp16)[name = tensor("input_1159_cast_fp16")]; - tensor encoder_module_layers_21_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(536451392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(540645760))), name = tensor("encoder_module_layers_21_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_197_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_21_feed_forward2_linear1_weight_to_fp16_palettized, x = input_1159_cast_fp16)[name = tensor("linear_197_cast_fp16")]; + tensor encoder_module_layers_21_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_21_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(537564992))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(541763520))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(541759360)))]; + tensor linear_197_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_21_feed_forward2_linear1_weight_to_fp16_quantized, x = input_1159_cast_fp16)[name = tensor("linear_197_cast_fp16")]; tensor input_1163_cast_fp16 = silu(x = linear_197_cast_fp16)[name = tensor("input_1163_cast_fp16")]; - tensor encoder_module_layers_21_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(540646336))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(544840704))), name = tensor("encoder_module_layers_21_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_198_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_21_feed_forward2_linear2_weight_to_fp16_palettized, x = input_1163_cast_fp16)[name = tensor("linear_198_cast_fp16")]; + tensor encoder_module_layers_21_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_21_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(541771776))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(545967232))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(545966144)))]; + tensor linear_198_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_21_feed_forward2_linear2_weight_to_fp16_quantized, x = input_1163_cast_fp16)[name = tensor("linear_198_cast_fp16")]; tensor var_3918_to_fp16 = const()[name = tensor("op_3918_to_fp16"), val = tensor(0x1p-1)]; tensor var_3919_cast_fp16 = mul(x = linear_198_cast_fp16, y = var_3918_to_fp16)[name = tensor("op_3919_cast_fp16")]; tensor input_1169_cast_fp16 = add(x = input_1157_cast_fp16, y = var_3919_cast_fp16)[name = tensor("input_1169_cast_fp16")]; tensor input_1171_axes_0 = const()[name = tensor("input_1171_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_21_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_21_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(544841280)))]; - tensor encoder_module_layers_21_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_21_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(544843392)))]; + tensor encoder_module_layers_21_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_21_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(545969344)))]; + tensor encoder_module_layers_21_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_21_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(545971456)))]; tensor input_1171_cast_fp16 = layer_norm(axes = input_1171_axes_0, beta = encoder_module_layers_21_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_21_norm_out_weight_to_fp16, x = input_1169_cast_fp16)[name = tensor("input_1171_cast_fp16")]; tensor input_1173_axes_0 = const()[name = tensor("input_1173_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_22_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_22_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(544845504)))]; - tensor encoder_module_layers_22_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_22_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(544847616)))]; + tensor encoder_module_layers_22_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_22_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(545973568)))]; + tensor encoder_module_layers_22_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_22_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(545975680)))]; tensor input_1173_cast_fp16 = layer_norm(axes = input_1173_axes_0, beta = encoder_module_layers_22_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_22_norm_feed_forward1_weight_to_fp16, x = input_1171_cast_fp16)[name = tensor("input_1173_cast_fp16")]; - tensor encoder_module_layers_22_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(544849728))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(549044096))), name = tensor("encoder_module_layers_22_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_199_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_22_feed_forward1_linear1_weight_to_fp16_palettized, x = input_1173_cast_fp16)[name = tensor("linear_199_cast_fp16")]; + tensor encoder_module_layers_22_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_22_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(545977792))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(550176320))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(550172160)))]; + tensor linear_199_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_22_feed_forward1_linear1_weight_to_fp16_quantized, x = input_1173_cast_fp16)[name = tensor("linear_199_cast_fp16")]; tensor input_1177_cast_fp16 = silu(x = linear_199_cast_fp16)[name = tensor("input_1177_cast_fp16")]; - tensor encoder_module_layers_22_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(549044672))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(553239040))), name = tensor("encoder_module_layers_22_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_200_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_22_feed_forward1_linear2_weight_to_fp16_palettized, x = input_1177_cast_fp16)[name = tensor("linear_200_cast_fp16")]; + tensor encoder_module_layers_22_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_22_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(550184576))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(554380032))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(554378944)))]; + tensor linear_200_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_22_feed_forward1_linear2_weight_to_fp16_quantized, x = input_1177_cast_fp16)[name = tensor("linear_200_cast_fp16")]; tensor var_3947_to_fp16 = const()[name = tensor("op_3947_to_fp16"), val = tensor(0x1p-1)]; tensor var_3948_cast_fp16 = mul(x = linear_200_cast_fp16, y = var_3947_to_fp16)[name = tensor("op_3948_cast_fp16")]; tensor input_1183_cast_fp16 = add(x = input_1171_cast_fp16, y = var_3948_cast_fp16)[name = tensor("input_1183_cast_fp16")]; tensor query_45_axes_0 = const()[name = tensor("query_45_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_22_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_22_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(553239616)))]; - tensor encoder_module_layers_22_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_22_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(553241728)))]; + tensor encoder_module_layers_22_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_22_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(554382144)))]; + tensor encoder_module_layers_22_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_22_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(554384256)))]; tensor query_45_cast_fp16 = layer_norm(axes = query_45_axes_0, beta = encoder_module_layers_22_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_22_norm_self_att_weight_to_fp16, x = input_1183_cast_fp16)[name = tensor("query_45_cast_fp16")]; - tensor encoder_module_layers_22_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(553243840))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(554292480))), name = tensor("encoder_module_layers_22_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_201_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_22_self_attn_linear_q_weight_to_fp16_palettized, x = query_45_cast_fp16)[name = tensor("linear_201_cast_fp16")]; + tensor encoder_module_layers_22_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_22_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(554386368))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(555436096))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(555435008)))]; + tensor linear_201_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_22_self_attn_linear_q_weight_to_fp16_quantized, x = query_45_cast_fp16)[name = tensor("linear_201_cast_fp16")]; tensor var_3964 = const()[name = tensor("op_3964"), val = tensor([1, -1, 8, 128])]; tensor q_133_cast_fp16 = reshape(shape = var_3964, x = linear_201_cast_fp16)[name = tensor("q_133_cast_fp16")]; - tensor encoder_module_layers_22_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(554293056))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(555341696))), name = tensor("encoder_module_layers_22_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_202_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_22_self_attn_linear_k_weight_to_fp16_palettized, x = query_45_cast_fp16)[name = tensor("linear_202_cast_fp16")]; + tensor encoder_module_layers_22_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_22_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(555438208))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(556487936))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(556486848)))]; + tensor linear_202_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_22_self_attn_linear_k_weight_to_fp16_quantized, x = query_45_cast_fp16)[name = tensor("linear_202_cast_fp16")]; tensor var_3968 = const()[name = tensor("op_3968"), val = tensor([1, -1, 8, 128])]; tensor k_89_cast_fp16 = reshape(shape = var_3968, x = linear_202_cast_fp16)[name = tensor("k_89_cast_fp16")]; - tensor encoder_module_layers_22_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(555342272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(556390912))), name = tensor("encoder_module_layers_22_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_203_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_22_self_attn_linear_v_weight_to_fp16_palettized, x = query_45_cast_fp16)[name = tensor("linear_203_cast_fp16")]; + tensor encoder_module_layers_22_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_22_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(556490048))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(557539776))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(557538688)))]; + tensor linear_203_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_22_self_attn_linear_v_weight_to_fp16_quantized, x = query_45_cast_fp16)[name = tensor("linear_203_cast_fp16")]; tensor var_3972 = const()[name = tensor("op_3972"), val = tensor([1, -1, 8, 128])]; tensor v_45_cast_fp16 = reshape(shape = var_3972, x = linear_203_cast_fp16)[name = tensor("v_45_cast_fp16")]; tensor value_49_perm_0 = const()[name = tensor("value_49_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor encoder_module_layers_22_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_22_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(556391488)))]; + tensor encoder_module_layers_22_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_22_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(557541888)))]; tensor var_3984_cast_fp16 = add(x = q_133_cast_fp16, y = encoder_module_layers_22_self_attn_pos_bias_u_to_fp16)[name = tensor("op_3984_cast_fp16")]; - tensor encoder_module_layers_22_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_22_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(556393600)))]; + tensor encoder_module_layers_22_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_22_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(557544000)))]; tensor var_3986_cast_fp16 = add(x = q_133_cast_fp16, y = encoder_module_layers_22_self_attn_pos_bias_v_to_fp16)[name = tensor("op_3986_cast_fp16")]; tensor q_with_bias_v_45_perm_0 = const()[name = tensor("q_with_bias_v_45_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_507_transpose_x_0 = const()[name = tensor("x_507_transpose_x_0"), val = tensor(false)]; tensor x_507_transpose_y_0 = const()[name = tensor("x_507_transpose_y_0"), val = tensor(false)]; - tensor op_3988_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(556395712))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(556779776))), name = tensor("op_3988_to_fp16_palettized"), shape = tensor([1, 8, 128, 375])]; + tensor op_3988_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_3988_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(557546112))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(557930624))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(557930176)))]; tensor q_with_bias_v_45_cast_fp16 = transpose(perm = q_with_bias_v_45_perm_0, x = var_3986_cast_fp16)[name = tensor("transpose_158")]; - tensor x_507_cast_fp16 = matmul(transpose_x = x_507_transpose_x_0, transpose_y = x_507_transpose_y_0, x = q_with_bias_v_45_cast_fp16, y = op_3988_to_fp16_palettized)[name = tensor("x_507_cast_fp16")]; + tensor x_507_cast_fp16 = matmul(transpose_x = x_507_transpose_x_0, transpose_y = x_507_transpose_y_0, x = q_with_bias_v_45_cast_fp16, y = op_3988_to_fp16_quantized)[name = tensor("x_507_cast_fp16")]; tensor x_509_pad_0 = const()[name = tensor("x_509_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; tensor x_509_mode_0 = const()[name = tensor("x_509_mode_0"), val = tensor("constant")]; tensor const_249_to_fp16 = const()[name = tensor("const_249_to_fp16"), val = tensor(0x0p+0)]; @@ -3313,12 +3313,12 @@ program(1.0) tensor var_4021 = const()[name = tensor("op_4021"), val = tensor([1, -1, 1024])]; tensor var_4020_cast_fp16 = transpose(perm = var_4020_perm_0, x = x_513_cast_fp16)[name = tensor("transpose_154")]; tensor input_1187_cast_fp16 = reshape(shape = var_4021, x = var_4020_cast_fp16)[name = tensor("input_1187_cast_fp16")]; - tensor encoder_module_layers_22_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(556780352))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(557828992))), name = tensor("encoder_module_layers_22_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_205_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_22_self_attn_linear_out_weight_to_fp16_palettized, x = input_1187_cast_fp16)[name = tensor("linear_205_cast_fp16")]; + tensor encoder_module_layers_22_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_22_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(557931456))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(558981184))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(558980096)))]; + tensor linear_205_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_22_self_attn_linear_out_weight_to_fp16_quantized, x = input_1187_cast_fp16)[name = tensor("linear_205_cast_fp16")]; tensor input_1191_cast_fp16 = add(x = input_1183_cast_fp16, y = linear_205_cast_fp16)[name = tensor("input_1191_cast_fp16")]; tensor x_517_axes_0 = const()[name = tensor("x_517_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_22_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_22_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(557829568)))]; - tensor encoder_module_layers_22_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_22_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(557831680)))]; + tensor encoder_module_layers_22_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_22_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(558983296)))]; + tensor encoder_module_layers_22_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_22_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(558985408)))]; tensor x_517_cast_fp16 = layer_norm(axes = x_517_axes_0, beta = encoder_module_layers_22_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_22_norm_conv_weight_to_fp16, x = input_1191_cast_fp16)[name = tensor("x_517_cast_fp16")]; tensor input_1193_perm_0 = const()[name = tensor("input_1193_perm_0"), val = tensor([0, 2, 1])]; tensor input_1195_pad_type_0 = const()[name = tensor("input_1195_pad_type_0"), val = tensor("valid")]; @@ -3326,9 +3326,9 @@ program(1.0) tensor input_1195_pad_0 = const()[name = tensor("input_1195_pad_0"), val = tensor([0, 0])]; tensor input_1195_dilations_0 = const()[name = tensor("input_1195_dilations_0"), val = tensor([1])]; tensor input_1195_groups_0 = const()[name = tensor("input_1195_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_22_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(557833792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(559931008))), name = tensor("encoder_module_layers_22_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor([2048, 1024, 1])]; + tensor encoder_module_layers_22_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_22_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(558987520))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(561086848))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(561084736)))]; tensor input_1193_cast_fp16 = transpose(perm = input_1193_perm_0, x = x_517_cast_fp16)[name = tensor("transpose_153")]; - tensor input_1195_cast_fp16 = conv(dilations = input_1195_dilations_0, groups = input_1195_groups_0, pad = input_1195_pad_0, pad_type = input_1195_pad_type_0, strides = input_1195_strides_0, weight = encoder_module_layers_22_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_1193_cast_fp16)[name = tensor("input_1195_cast_fp16")]; + tensor input_1195_cast_fp16 = conv(dilations = input_1195_dilations_0, groups = input_1195_groups_0, pad = input_1195_pad_0, pad_type = input_1195_pad_type_0, strides = input_1195_strides_0, weight = encoder_module_layers_22_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_1193_cast_fp16)[name = tensor("input_1195_cast_fp16")]; tensor x_519_split_num_splits_0 = const()[name = tensor("x_519_split_num_splits_0"), val = tensor(2)]; tensor x_519_split_axis_0 = const()[name = tensor("x_519_split_axis_0"), val = tensor(1)]; tensor x_519_split_cast_fp16_0, tensor x_519_split_cast_fp16_1 = split(axis = x_519_split_axis_0, num_splits = x_519_split_num_splits_0, x = input_1195_cast_fp16)[name = tensor("x_519_split_cast_fp16")]; @@ -3344,75 +3344,75 @@ program(1.0) tensor input_1201_strides_0 = const()[name = tensor("input_1201_strides_0"), val = tensor([1])]; tensor input_1201_pad_0 = const()[name = tensor("input_1201_pad_0"), val = tensor([0, 0])]; tensor input_1201_dilations_0 = const()[name = tensor("input_1201_dilations_0"), val = tensor([1])]; - tensor const_307_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(559931584))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(559940864))), name = tensor("const_307_to_fp16_palettized"), shape = tensor([1024, 1, 9])]; - tensor const_308_to_fp16 = const()[name = tensor("const_308_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(559941440)))]; - tensor input_1203_cast_fp16 = conv(bias = const_308_to_fp16, dilations = input_1201_dilations_0, groups = input_1201_groups_0, pad = input_1201_pad_0, pad_type = input_1201_pad_type_0, strides = input_1201_strides_0, weight = const_307_to_fp16_palettized, x = input_1199_cast_fp16)[name = tensor("input_1203_cast_fp16")]; + tensor const_307_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_307_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(561091008))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(561101376))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(561100288)))]; + tensor const_308_to_fp16 = const()[name = tensor("const_308_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(561103488)))]; + tensor input_1203_cast_fp16 = conv(bias = const_308_to_fp16, dilations = input_1201_dilations_0, groups = input_1201_groups_0, pad = input_1201_pad_0, pad_type = input_1201_pad_type_0, strides = input_1201_strides_0, weight = const_307_to_fp16_quantized, x = input_1199_cast_fp16)[name = tensor("input_1203_cast_fp16")]; tensor input_1205_cast_fp16 = silu(x = input_1203_cast_fp16)[name = tensor("input_1205_cast_fp16")]; tensor x_521_pad_type_0 = const()[name = tensor("x_521_pad_type_0"), val = tensor("valid")]; tensor x_521_strides_0 = const()[name = tensor("x_521_strides_0"), val = tensor([1])]; tensor x_521_pad_0 = const()[name = tensor("x_521_pad_0"), val = tensor([0, 0])]; tensor x_521_dilations_0 = const()[name = tensor("x_521_dilations_0"), val = tensor([1])]; tensor x_521_groups_0 = const()[name = tensor("x_521_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_22_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(559943552))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(560992192))), name = tensor("encoder_module_layers_22_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor([1024, 1024, 1])]; - tensor x_521_cast_fp16 = conv(dilations = x_521_dilations_0, groups = x_521_groups_0, pad = x_521_pad_0, pad_type = x_521_pad_type_0, strides = x_521_strides_0, weight = encoder_module_layers_22_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_1205_cast_fp16)[name = tensor("x_521_cast_fp16")]; + tensor encoder_module_layers_22_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_22_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(561105600))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(562155328))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(562154240)))]; + tensor x_521_cast_fp16 = conv(dilations = x_521_dilations_0, groups = x_521_groups_0, pad = x_521_pad_0, pad_type = x_521_pad_type_0, strides = x_521_strides_0, weight = encoder_module_layers_22_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_1205_cast_fp16)[name = tensor("x_521_cast_fp16")]; tensor input_1207_perm_0 = const()[name = tensor("input_1207_perm_0"), val = tensor([0, 2, 1])]; tensor input_1207_cast_fp16 = transpose(perm = input_1207_perm_0, x = x_521_cast_fp16)[name = tensor("transpose_152")]; tensor input_1209_cast_fp16 = add(x = input_1191_cast_fp16, y = input_1207_cast_fp16)[name = tensor("input_1209_cast_fp16")]; tensor input_1211_axes_0 = const()[name = tensor("input_1211_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_22_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_22_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(560992768)))]; - tensor encoder_module_layers_22_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_22_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(560994880)))]; + tensor encoder_module_layers_22_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_22_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(562157440)))]; + tensor encoder_module_layers_22_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_22_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(562159552)))]; tensor input_1211_cast_fp16 = layer_norm(axes = input_1211_axes_0, beta = encoder_module_layers_22_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_22_norm_feed_forward2_weight_to_fp16, x = input_1209_cast_fp16)[name = tensor("input_1211_cast_fp16")]; - tensor encoder_module_layers_22_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(560996992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(565191360))), name = tensor("encoder_module_layers_22_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_206_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_22_feed_forward2_linear1_weight_to_fp16_palettized, x = input_1211_cast_fp16)[name = tensor("linear_206_cast_fp16")]; + tensor encoder_module_layers_22_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_22_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(562161664))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(566360192))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(566356032)))]; + tensor linear_206_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_22_feed_forward2_linear1_weight_to_fp16_quantized, x = input_1211_cast_fp16)[name = tensor("linear_206_cast_fp16")]; tensor input_1215_cast_fp16 = silu(x = linear_206_cast_fp16)[name = tensor("input_1215_cast_fp16")]; - tensor encoder_module_layers_22_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(565191936))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(569386304))), name = tensor("encoder_module_layers_22_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_207_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_22_feed_forward2_linear2_weight_to_fp16_palettized, x = input_1215_cast_fp16)[name = tensor("linear_207_cast_fp16")]; + tensor encoder_module_layers_22_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_22_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(566368448))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(570563904))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(570562816)))]; + tensor linear_207_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_22_feed_forward2_linear2_weight_to_fp16_quantized, x = input_1215_cast_fp16)[name = tensor("linear_207_cast_fp16")]; tensor var_4081_to_fp16 = const()[name = tensor("op_4081_to_fp16"), val = tensor(0x1p-1)]; tensor var_4082_cast_fp16 = mul(x = linear_207_cast_fp16, y = var_4081_to_fp16)[name = tensor("op_4082_cast_fp16")]; tensor input_1221_cast_fp16 = add(x = input_1209_cast_fp16, y = var_4082_cast_fp16)[name = tensor("input_1221_cast_fp16")]; tensor input_1223_axes_0 = const()[name = tensor("input_1223_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_22_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_22_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(569386880)))]; - tensor encoder_module_layers_22_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_22_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(569388992)))]; + tensor encoder_module_layers_22_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_22_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(570566016)))]; + tensor encoder_module_layers_22_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_22_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(570568128)))]; tensor input_1223_cast_fp16 = layer_norm(axes = input_1223_axes_0, beta = encoder_module_layers_22_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_22_norm_out_weight_to_fp16, x = input_1221_cast_fp16)[name = tensor("input_1223_cast_fp16")]; tensor input_1225_axes_0 = const()[name = tensor("input_1225_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_23_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_23_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(569391104)))]; - tensor encoder_module_layers_23_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_23_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(569393216)))]; + tensor encoder_module_layers_23_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("encoder_module_layers_23_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(570570240)))]; + tensor encoder_module_layers_23_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("encoder_module_layers_23_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(570572352)))]; tensor input_1225_cast_fp16 = layer_norm(axes = input_1225_axes_0, beta = encoder_module_layers_23_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_23_norm_feed_forward1_weight_to_fp16, x = input_1223_cast_fp16)[name = tensor("input_1225_cast_fp16")]; - tensor encoder_module_layers_23_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(569395328))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(573589696))), name = tensor("encoder_module_layers_23_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_208_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_23_feed_forward1_linear1_weight_to_fp16_palettized, x = input_1225_cast_fp16)[name = tensor("linear_208_cast_fp16")]; + tensor encoder_module_layers_23_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_23_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(570574464))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(574772992))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(574768832)))]; + tensor linear_208_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_23_feed_forward1_linear1_weight_to_fp16_quantized, x = input_1225_cast_fp16)[name = tensor("linear_208_cast_fp16")]; tensor input_1229_cast_fp16 = silu(x = linear_208_cast_fp16)[name = tensor("input_1229_cast_fp16")]; - tensor encoder_module_layers_23_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(573590272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(577784640))), name = tensor("encoder_module_layers_23_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_209_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_23_feed_forward1_linear2_weight_to_fp16_palettized, x = input_1229_cast_fp16)[name = tensor("linear_209_cast_fp16")]; + tensor encoder_module_layers_23_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_23_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(574781248))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(578976704))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(578975616)))]; + tensor linear_209_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_23_feed_forward1_linear2_weight_to_fp16_quantized, x = input_1229_cast_fp16)[name = tensor("linear_209_cast_fp16")]; tensor var_4110_to_fp16 = const()[name = tensor("op_4110_to_fp16"), val = tensor(0x1p-1)]; tensor var_4111_cast_fp16 = mul(x = linear_209_cast_fp16, y = var_4110_to_fp16)[name = tensor("op_4111_cast_fp16")]; tensor input_1235_cast_fp16 = add(x = input_1223_cast_fp16, y = var_4111_cast_fp16)[name = tensor("input_1235_cast_fp16")]; tensor query_axes_0 = const()[name = tensor("query_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_23_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_23_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(577785216)))]; - tensor encoder_module_layers_23_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_23_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(577787328)))]; + tensor encoder_module_layers_23_norm_self_att_weight_to_fp16 = const()[name = tensor("encoder_module_layers_23_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(578978816)))]; + tensor encoder_module_layers_23_norm_self_att_bias_to_fp16 = const()[name = tensor("encoder_module_layers_23_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(578980928)))]; tensor query_cast_fp16 = layer_norm(axes = query_axes_0, beta = encoder_module_layers_23_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_23_norm_self_att_weight_to_fp16, x = input_1235_cast_fp16)[name = tensor("query_cast_fp16")]; - tensor encoder_module_layers_23_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(577789440))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(578838080))), name = tensor("encoder_module_layers_23_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_210_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_23_self_attn_linear_q_weight_to_fp16_palettized, x = query_cast_fp16)[name = tensor("linear_210_cast_fp16")]; + tensor encoder_module_layers_23_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_23_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(578983040))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(580032768))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(580031680)))]; + tensor linear_210_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_23_self_attn_linear_q_weight_to_fp16_quantized, x = query_cast_fp16)[name = tensor("linear_210_cast_fp16")]; tensor var_4127 = const()[name = tensor("op_4127"), val = tensor([1, -1, 8, 128])]; tensor q_139_cast_fp16 = reshape(shape = var_4127, x = linear_210_cast_fp16)[name = tensor("q_139_cast_fp16")]; - tensor encoder_module_layers_23_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(578838656))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(579887296))), name = tensor("encoder_module_layers_23_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_211_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_23_self_attn_linear_k_weight_to_fp16_palettized, x = query_cast_fp16)[name = tensor("linear_211_cast_fp16")]; + tensor encoder_module_layers_23_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_23_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(580034880))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(581084608))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(581083520)))]; + tensor linear_211_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_23_self_attn_linear_k_weight_to_fp16_quantized, x = query_cast_fp16)[name = tensor("linear_211_cast_fp16")]; tensor var_4131 = const()[name = tensor("op_4131"), val = tensor([1, -1, 8, 128])]; tensor k_93_cast_fp16 = reshape(shape = var_4131, x = linear_211_cast_fp16)[name = tensor("k_93_cast_fp16")]; - tensor encoder_module_layers_23_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(579887872))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(580936512))), name = tensor("encoder_module_layers_23_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_212_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_23_self_attn_linear_v_weight_to_fp16_palettized, x = query_cast_fp16)[name = tensor("linear_212_cast_fp16")]; + tensor encoder_module_layers_23_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_23_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(581086720))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(582136448))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(582135360)))]; + tensor linear_212_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_23_self_attn_linear_v_weight_to_fp16_quantized, x = query_cast_fp16)[name = tensor("linear_212_cast_fp16")]; tensor var_4135 = const()[name = tensor("op_4135"), val = tensor([1, -1, 8, 128])]; tensor v_cast_fp16 = reshape(shape = var_4135, x = linear_212_cast_fp16)[name = tensor("v_cast_fp16")]; tensor value_perm_0 = const()[name = tensor("value_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor encoder_module_layers_23_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_23_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(580937088)))]; + tensor encoder_module_layers_23_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("encoder_module_layers_23_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(582138560)))]; tensor var_4147_cast_fp16 = add(x = q_139_cast_fp16, y = encoder_module_layers_23_self_attn_pos_bias_u_to_fp16)[name = tensor("op_4147_cast_fp16")]; - tensor encoder_module_layers_23_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_23_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(580939200)))]; + tensor encoder_module_layers_23_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("encoder_module_layers_23_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(582140672)))]; tensor var_4149_cast_fp16 = add(x = q_139_cast_fp16, y = encoder_module_layers_23_self_attn_pos_bias_v_to_fp16)[name = tensor("op_4149_cast_fp16")]; tensor q_with_bias_v_perm_0 = const()[name = tensor("q_with_bias_v_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_529_transpose_x_0 = const()[name = tensor("x_529_transpose_x_0"), val = tensor(false)]; tensor x_529_transpose_y_0 = const()[name = tensor("x_529_transpose_y_0"), val = tensor(false)]; - tensor op_4151_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(580941312))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(581325376))), name = tensor("op_4151_to_fp16_palettized"), shape = tensor([1, 8, 128, 375])]; + tensor op_4151_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_4151_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(582142784))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(582527296))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(582526848)))]; tensor q_with_bias_v_cast_fp16 = transpose(perm = q_with_bias_v_perm_0, x = var_4149_cast_fp16)[name = tensor("transpose_151")]; - tensor x_529_cast_fp16 = matmul(transpose_x = x_529_transpose_x_0, transpose_y = x_529_transpose_y_0, x = q_with_bias_v_cast_fp16, y = op_4151_to_fp16_palettized)[name = tensor("x_529_cast_fp16")]; + tensor x_529_cast_fp16 = matmul(transpose_x = x_529_transpose_x_0, transpose_y = x_529_transpose_y_0, x = q_with_bias_v_cast_fp16, y = op_4151_to_fp16_quantized)[name = tensor("x_529_cast_fp16")]; tensor x_531_pad_0 = const()[name = tensor("x_531_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; tensor x_531_mode_0 = const()[name = tensor("x_531_mode_0"), val = tensor("constant")]; tensor const_259_to_fp16 = const()[name = tensor("const_259_to_fp16"), val = tensor(0x0p+0)]; @@ -3450,12 +3450,12 @@ program(1.0) tensor var_4184 = const()[name = tensor("op_4184"), val = tensor([1, -1, 1024])]; tensor var_4183_cast_fp16 = transpose(perm = var_4183_perm_0, x = x_535_cast_fp16)[name = tensor("transpose_147")]; tensor input_1239_cast_fp16 = reshape(shape = var_4184, x = var_4183_cast_fp16)[name = tensor("input_1239_cast_fp16")]; - tensor encoder_module_layers_23_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(581325952))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(582374592))), name = tensor("encoder_module_layers_23_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor([1024, 1024])]; - tensor linear_214_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_23_self_attn_linear_out_weight_to_fp16_palettized, x = input_1239_cast_fp16)[name = tensor("linear_214_cast_fp16")]; + tensor encoder_module_layers_23_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_23_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(582528128))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(583577856))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(583576768)))]; + tensor linear_214_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_23_self_attn_linear_out_weight_to_fp16_quantized, x = input_1239_cast_fp16)[name = tensor("linear_214_cast_fp16")]; tensor input_1243_cast_fp16 = add(x = input_1235_cast_fp16, y = linear_214_cast_fp16)[name = tensor("input_1243_cast_fp16")]; tensor x_539_axes_0 = const()[name = tensor("x_539_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_23_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_23_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(582375168)))]; - tensor encoder_module_layers_23_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_23_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(582377280)))]; + tensor encoder_module_layers_23_norm_conv_weight_to_fp16 = const()[name = tensor("encoder_module_layers_23_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(583579968)))]; + tensor encoder_module_layers_23_norm_conv_bias_to_fp16 = const()[name = tensor("encoder_module_layers_23_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(583582080)))]; tensor x_539_cast_fp16 = layer_norm(axes = x_539_axes_0, beta = encoder_module_layers_23_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_23_norm_conv_weight_to_fp16, x = input_1243_cast_fp16)[name = tensor("x_539_cast_fp16")]; tensor input_1245_perm_0 = const()[name = tensor("input_1245_perm_0"), val = tensor([0, 2, 1])]; tensor input_1247_pad_type_0 = const()[name = tensor("input_1247_pad_type_0"), val = tensor("valid")]; @@ -3463,9 +3463,9 @@ program(1.0) tensor input_1247_pad_0 = const()[name = tensor("input_1247_pad_0"), val = tensor([0, 0])]; tensor input_1247_dilations_0 = const()[name = tensor("input_1247_dilations_0"), val = tensor([1])]; tensor input_1247_groups_0 = const()[name = tensor("input_1247_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_23_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(582379392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(584476608))), name = tensor("encoder_module_layers_23_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor([2048, 1024, 1])]; + tensor encoder_module_layers_23_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_23_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(583584192))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(585683520))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(585681408)))]; tensor input_1245_cast_fp16 = transpose(perm = input_1245_perm_0, x = x_539_cast_fp16)[name = tensor("transpose_146")]; - tensor input_1247_cast_fp16 = conv(dilations = input_1247_dilations_0, groups = input_1247_groups_0, pad = input_1247_pad_0, pad_type = input_1247_pad_type_0, strides = input_1247_strides_0, weight = encoder_module_layers_23_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_1245_cast_fp16)[name = tensor("input_1247_cast_fp16")]; + tensor input_1247_cast_fp16 = conv(dilations = input_1247_dilations_0, groups = input_1247_groups_0, pad = input_1247_pad_0, pad_type = input_1247_pad_type_0, strides = input_1247_strides_0, weight = encoder_module_layers_23_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_1245_cast_fp16)[name = tensor("input_1247_cast_fp16")]; tensor x_541_split_num_splits_0 = const()[name = tensor("x_541_split_num_splits_0"), val = tensor(2)]; tensor x_541_split_axis_0 = const()[name = tensor("x_541_split_axis_0"), val = tensor(1)]; tensor x_541_split_cast_fp16_0, tensor x_541_split_cast_fp16_1 = split(axis = x_541_split_axis_0, num_splits = x_541_split_num_splits_0, x = input_1247_cast_fp16)[name = tensor("x_541_split_cast_fp16")]; @@ -3481,35 +3481,35 @@ program(1.0) tensor input_1253_strides_0 = const()[name = tensor("input_1253_strides_0"), val = tensor([1])]; tensor input_1253_pad_0 = const()[name = tensor("input_1253_pad_0"), val = tensor([0, 0])]; tensor input_1253_dilations_0 = const()[name = tensor("input_1253_dilations_0"), val = tensor([1])]; - tensor const_309_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(584477184))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(584486464))), name = tensor("const_309_to_fp16_palettized"), shape = tensor([1024, 1, 9])]; - tensor const_310_to_fp16 = const()[name = tensor("const_310_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(584487040)))]; - tensor input_1255_cast_fp16 = conv(bias = const_310_to_fp16, dilations = input_1253_dilations_0, groups = input_1253_groups_0, pad = input_1253_pad_0, pad_type = input_1253_pad_type_0, strides = input_1253_strides_0, weight = const_309_to_fp16_palettized, x = input_1251_cast_fp16)[name = tensor("input_1255_cast_fp16")]; + tensor const_309_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_309_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(585687680))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(585698048))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(585696960)))]; + tensor const_310_to_fp16 = const()[name = tensor("const_310_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(585700160)))]; + tensor input_1255_cast_fp16 = conv(bias = const_310_to_fp16, dilations = input_1253_dilations_0, groups = input_1253_groups_0, pad = input_1253_pad_0, pad_type = input_1253_pad_type_0, strides = input_1253_strides_0, weight = const_309_to_fp16_quantized, x = input_1251_cast_fp16)[name = tensor("input_1255_cast_fp16")]; tensor input_1257_cast_fp16 = silu(x = input_1255_cast_fp16)[name = tensor("input_1257_cast_fp16")]; tensor x_543_pad_type_0 = const()[name = tensor("x_543_pad_type_0"), val = tensor("valid")]; tensor x_543_strides_0 = const()[name = tensor("x_543_strides_0"), val = tensor([1])]; tensor x_543_pad_0 = const()[name = tensor("x_543_pad_0"), val = tensor([0, 0])]; tensor x_543_dilations_0 = const()[name = tensor("x_543_dilations_0"), val = tensor([1])]; tensor x_543_groups_0 = const()[name = tensor("x_543_groups_0"), val = tensor(1)]; - tensor encoder_module_layers_23_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(584489152))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(585537792))), name = tensor("encoder_module_layers_23_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor([1024, 1024, 1])]; - tensor x_543_cast_fp16 = conv(dilations = x_543_dilations_0, groups = x_543_groups_0, pad = x_543_pad_0, pad_type = x_543_pad_type_0, strides = x_543_strides_0, weight = encoder_module_layers_23_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_1257_cast_fp16)[name = tensor("x_543_cast_fp16")]; + tensor encoder_module_layers_23_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_23_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(585702272))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(586752000))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(586750912)))]; + tensor x_543_cast_fp16 = conv(dilations = x_543_dilations_0, groups = x_543_groups_0, pad = x_543_pad_0, pad_type = x_543_pad_type_0, strides = x_543_strides_0, weight = encoder_module_layers_23_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_1257_cast_fp16)[name = tensor("x_543_cast_fp16")]; tensor input_1259_perm_0 = const()[name = tensor("input_1259_perm_0"), val = tensor([0, 2, 1])]; tensor input_1259_cast_fp16 = transpose(perm = input_1259_perm_0, x = x_543_cast_fp16)[name = tensor("transpose_145")]; tensor input_1261_cast_fp16 = add(x = input_1243_cast_fp16, y = input_1259_cast_fp16)[name = tensor("input_1261_cast_fp16")]; tensor input_1263_axes_0 = const()[name = tensor("input_1263_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_23_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_23_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(585538368)))]; - tensor encoder_module_layers_23_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_23_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(585540480)))]; + tensor encoder_module_layers_23_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("encoder_module_layers_23_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(586754112)))]; + tensor encoder_module_layers_23_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("encoder_module_layers_23_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(586756224)))]; tensor input_1263_cast_fp16 = layer_norm(axes = input_1263_axes_0, beta = encoder_module_layers_23_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_23_norm_feed_forward2_weight_to_fp16, x = input_1261_cast_fp16)[name = tensor("input_1263_cast_fp16")]; - tensor encoder_module_layers_23_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(585542592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(589736960))), name = tensor("encoder_module_layers_23_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor([4096, 1024])]; - tensor linear_215_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_23_feed_forward2_linear1_weight_to_fp16_palettized, x = input_1263_cast_fp16)[name = tensor("linear_215_cast_fp16")]; + tensor encoder_module_layers_23_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_23_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(586758336))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(590956864))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(590952704)))]; + tensor linear_215_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_23_feed_forward2_linear1_weight_to_fp16_quantized, x = input_1263_cast_fp16)[name = tensor("linear_215_cast_fp16")]; tensor input_1267_cast_fp16 = silu(x = linear_215_cast_fp16)[name = tensor("input_1267_cast_fp16")]; - tensor encoder_module_layers_23_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(589737536))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(593931904))), name = tensor("encoder_module_layers_23_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor([1024, 4096])]; - tensor linear_216_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_23_feed_forward2_linear2_weight_to_fp16_palettized, x = input_1267_cast_fp16)[name = tensor("linear_216_cast_fp16")]; + tensor encoder_module_layers_23_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_module_layers_23_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(590965120))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(595160576))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(595159488)))]; + tensor linear_216_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_23_feed_forward2_linear2_weight_to_fp16_quantized, x = input_1267_cast_fp16)[name = tensor("linear_216_cast_fp16")]; tensor var_4244_to_fp16 = const()[name = tensor("op_4244_to_fp16"), val = tensor(0x1p-1)]; tensor var_4245_cast_fp16 = mul(x = linear_216_cast_fp16, y = var_4244_to_fp16)[name = tensor("op_4245_cast_fp16")]; tensor input_cast_fp16 = add(x = input_1261_cast_fp16, y = var_4245_cast_fp16)[name = tensor("input_cast_fp16")]; tensor audio_signal_axes_0 = const()[name = tensor("audio_signal_axes_0"), val = tensor([-1])]; - tensor encoder_module_layers_23_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_23_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(593932480)))]; - tensor encoder_module_layers_23_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_23_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(593934592)))]; + tensor encoder_module_layers_23_norm_out_weight_to_fp16 = const()[name = tensor("encoder_module_layers_23_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(595162688)))]; + tensor encoder_module_layers_23_norm_out_bias_to_fp16 = const()[name = tensor("encoder_module_layers_23_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(595164800)))]; tensor audio_signal_cast_fp16 = layer_norm(axes = audio_signal_axes_0, beta = encoder_module_layers_23_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_23_norm_out_weight_to_fp16, x = input_cast_fp16)[name = tensor("audio_signal_cast_fp16")]; tensor obj_3_perm_0 = const()[name = tensor("obj_3_perm_0"), val = tensor([0, 2, 1])]; tensor obj_3_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("obj_3_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")];