diff --git "a/MelEncoder.mlmodelc/model.mil" "b/MelEncoder.mlmodelc/model.mil"
--- "a/MelEncoder.mlmodelc/model.mil"
+++ "b/MelEncoder.mlmodelc/model.mil"
@@ -13,7 +13,7 @@ program(1.0)
             tensor<fp16, []> var_37_promoted_to_fp16 = const()[name = tensor<string, []>("op_37_promoted_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
             tensor<fp16, [1]> floor_div_0_to_fp16 = cast(dtype = var_36_to_fp16_dtype_0, x = floor_div_0)[name = tensor<string, []>("cast_6")];
             tensor<fp16, [1]> seq_len_1_cast_fp16 = add(x = floor_div_0_to_fp16, y = var_37_promoted_to_fp16)[name = tensor<string, []>("seq_len_1_cast_fp16")];
-            tensor<string, []> cast_2_dtype_0 = const()[name = tensor<string, []>("cast_2_dtype_0"), val = tensor<string, []>("int32")];
+            tensor<string, []> seq_len_dtype_0 = const()[name = tensor<string, []>("seq_len_dtype_0"), val = tensor<string, []>("int32")];
             tensor<int32, [2]> var_41_begin_0 = const()[name = tensor<string, []>("op_41_begin_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [2]> var_41_end_0 = const()[name = tensor<string, []>("op_41_end_0"), val = tensor<int32, [2]>([1, 1])];
             tensor<bool, [2]> var_41_end_mask_0 = const()[name = tensor<string, []>("op_41_end_mask_0"), val = tensor<bool, [2]>([true, false])];
@@ -51,14 +51,14 @@ program(1.0)
             tensor<int32, [2]> conv_0_pad_0 = const()[name = tensor<string, []>("conv_0_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> conv_0_dilations_0 = const()[name = tensor<string, []>("conv_0_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> conv_0_groups_0 = const()[name = tensor<string, []>("conv_0_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [257, 1, 512]> expand_dims_4_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [131584]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(131712))), name = tensor<string, []>("expand_dims_4_to_fp16_palettized"), shape = tensor<uint32, [3]>([257, 1, 512])];
-            tensor<fp16, [1, 257, 1501]> conv_0_cast_fp16 = conv(dilations = conv_0_dilations_0, groups = conv_0_groups_0, pad = conv_0_pad_0, pad_type = conv_0_pad_type_0, strides = expand_dims_6, weight = expand_dims_4_to_fp16_palettized, x = expand_dims_7_cast_fp16)[name = tensor<string, []>("conv_0_cast_fp16")];
+            tensor<fp16, [257, 1, 512]> expand_dims_4_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("expand_dims_4_to_fp16_quantized"), quantized_data = tensor<int8, [257, 1, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64))), scale = tensor<fp16, [257]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132096))), zero_point = tensor<int8, [257]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(131712)))];
+            tensor<fp16, [1, 257, 1501]> conv_0_cast_fp16 = conv(dilations = conv_0_dilations_0, groups = conv_0_groups_0, pad = conv_0_pad_0, pad_type = conv_0_pad_type_0, strides = expand_dims_6, weight = expand_dims_4_to_fp16_quantized, x = expand_dims_7_cast_fp16)[name = tensor<string, []>("conv_0_cast_fp16")];
             tensor<string, []> conv_1_pad_type_0 = const()[name = tensor<string, []>("conv_1_pad_type_0"), val = tensor<string, []>("valid")];
             tensor<int32, [2]> conv_1_pad_0 = const()[name = tensor<string, []>("conv_1_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> conv_1_dilations_0 = const()[name = tensor<string, []>("conv_1_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> conv_1_groups_0 = const()[name = tensor<string, []>("conv_1_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [257, 1, 512]> expand_dims_5_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [131584]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132288))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(263936))), name = tensor<string, []>("expand_dims_5_to_fp16_palettized"), shape = tensor<uint32, [3]>([257, 1, 512])];
-            tensor<fp16, [1, 257, 1501]> conv_1_cast_fp16 = conv(dilations = conv_1_dilations_0, groups = conv_1_groups_0, pad = conv_1_pad_0, pad_type = conv_1_pad_type_0, strides = expand_dims_6, weight = expand_dims_5_to_fp16_palettized, x = expand_dims_7_cast_fp16)[name = tensor<string, []>("conv_1_cast_fp16")];
+            tensor<fp16, [257, 1, 512]> expand_dims_5_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("expand_dims_5_to_fp16_quantized"), quantized_data = tensor<int8, [257, 1, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132736))), scale = tensor<fp16, [257]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(264768))), zero_point = tensor<int8, [257]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(264384)))];
+            tensor<fp16, [1, 257, 1501]> conv_1_cast_fp16 = conv(dilations = conv_1_dilations_0, groups = conv_1_groups_0, pad = conv_1_pad_0, pad_type = conv_1_pad_type_0, strides = expand_dims_6, weight = expand_dims_5_to_fp16_quantized, x = expand_dims_7_cast_fp16)[name = tensor<string, []>("conv_1_cast_fp16")];
             tensor<int32, []> stack_0_axis_0 = const()[name = tensor<string, []>("stack_0_axis_0"), val = tensor<int32, []>(-1)];
             tensor<fp16, [1, 257, 1501, 2]> stack_0_cast_fp16 = stack(axis = stack_0_axis_0, values = (conv_0_cast_fp16, conv_1_cast_fp16))[name = tensor<string, []>("stack_0_cast_fp16")];
             tensor<fp16, []> var_15_promoted_to_fp16 = const()[name = tensor<string, []>("op_15_promoted_to_fp16"), val = tensor<fp16, []>(0x1p+1)];
@@ -68,30 +68,30 @@ program(1.0)
             tensor<fp16, [1, 257, 1501]> var_67_cast_fp16 = reduce_sum(axes = var_67_axes_0, keep_dims = var_67_keep_dims_0, x = var_65_cast_fp16)[name = tensor<string, []>("op_67_cast_fp16")];
             tensor<bool, []> x_11_transpose_x_0 = const()[name = tensor<string, []>("x_11_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> x_11_transpose_y_0 = const()[name = tensor<string, []>("x_11_transpose_y_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 128, 257]> const_6_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [32896]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(264512))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(297472))), name = tensor<string, []>("const_6_to_fp16_palettized"), shape = tensor<uint32, [3]>([1, 128, 257])];
-            tensor<fp16, [1, 128, 1501]> x_11_cast_fp16 = matmul(transpose_x = x_11_transpose_x_0, transpose_y = x_11_transpose_y_0, x = const_6_to_fp16_palettized, y = var_67_cast_fp16)[name = tensor<string, []>("x_11_cast_fp16")];
+            tensor<fp16, [1, 128, 257]> const_6_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(1), name = tensor<string, []>("const_6_to_fp16_quantized"), quantized_data = tensor<int8, [1, 128, 257]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(265408))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(298560))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(298368)))];
+            tensor<fp16, [1, 128, 1501]> x_11_cast_fp16 = matmul(transpose_x = x_11_transpose_x_0, transpose_y = x_11_transpose_y_0, x = const_6_to_fp16_quantized, y = var_67_cast_fp16)[name = tensor<string, []>("x_11_cast_fp16")];
             tensor<fp16, []> var_74_to_fp16 = const()[name = tensor<string, []>("op_74_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
             tensor<fp16, [1, 128, 1501]> var_75_cast_fp16 = add(x = x_11_cast_fp16, y = var_74_to_fp16)[name = tensor<string, []>("op_75_cast_fp16")];
             tensor<fp32, []> x_13_epsilon_0 = const()[name = tensor<string, []>("x_13_epsilon_0"), val = tensor<fp32, []>(0x1p-149)];
             tensor<fp16, [1, 128, 1501]> x_13_cast_fp16 = log(epsilon = x_13_epsilon_0, x = var_75_cast_fp16)[name = tensor<string, []>("x_13_cast_fp16")];
             tensor<int32, [1, 1501]> var_80 = const()[name = tensor<string, []>("op_80"), val = tensor<int32, [1, 1501]>([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1100, 1101, 1102, 1103, 1104, 1105, 1106, 1107, 1108, 1109, 1110, 1111, 1112, 1113, 1114, 1115, 1116, 1117, 1118, 1119, 1120, 1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128, 1129, 1130, 1131, 1132, 1133, 1134, 1135, 1136, 1137, 1138, 1139, 1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1150, 1151, 1152, 1153, 1154, 1155, 1156, 1157, 1158, 1159, 1160, 1161, 1162, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1176, 1177, 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1196, 1197, 1198, 1199, 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211, 1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230, 1231, 1232, 1233, 1234, 1235, 1236, 1237, 1238, 1239, 1240, 1241, 1242, 1243, 1244, 1245, 1246, 1247, 1248, 1249, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258, 1259, 1260, 1261, 1262, 1263, 1264, 1265, 1266, 1267, 1268, 1269, 1270, 1271, 1272, 1273, 1274, 1275, 1276, 1277, 1278, 1279, 1280, 1281, 1282, 1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, 1292, 1293, 1294, 1295, 1296, 1297, 1298, 1299, 1300, 1301, 1302, 1303, 1304, 1305, 1306, 1307, 1308, 1309, 1310, 1311, 1312, 1313, 1314, 1315, 1316, 1317, 1318, 1319, 1320, 1321, 1322, 1323, 1324, 1325, 1326, 1327, 1328, 1329, 1330, 1331, 1332, 1333, 1334, 1335, 1336, 1337, 1338, 1339, 1340, 1341, 1342, 1343, 1344, 1345, 1346, 1347, 1348, 1349, 1350, 1351, 1352, 1353, 1354, 1355, 1356, 1357, 1358, 1359, 1360, 1361, 1362, 1363, 1364, 1365, 1366, 1367, 1368, 1369, 1370, 1371, 1372, 1373, 1374, 1375, 1376, 1377, 1378, 1379, 1380, 1381, 1382, 1383, 1384, 1385, 1386, 1387, 1388, 1389, 1390, 1391, 1392, 1393, 1394, 1395, 1396, 1397, 1398, 1399, 1400, 1401, 1402, 1403, 1404, 1405, 1406, 1407, 1408, 1409, 1410, 1411, 1412, 1413, 1414, 1415, 1416, 1417, 1418, 1419, 1420, 1421, 1422, 1423, 1424, 1425, 1426, 1427, 1428, 1429, 1430, 1431, 1432, 1433, 1434, 1435, 1436, 1437, 1438, 1439, 1440, 1441, 1442, 1443, 1444, 1445, 1446, 1447, 1448, 1449, 1450, 1451, 1452, 1453, 1454, 1455, 1456, 1457, 1458, 1459, 1460, 1461, 1462, 1463, 1464, 1465, 1466, 1467, 1468, 1469, 1470, 1471, 1472, 1473, 1474, 1475, 1476, 1477, 1478, 1479, 1480, 1481, 1482, 1483, 1484, 1485, 1486, 1487, 1488, 1489, 1490, 1491, 1492, 1493, 1494, 1495, 1496, 1497, 1498, 1499, 1500]])];
             tensor<int32, [1]> var_83_axes_0 = const()[name = tensor<string, []>("op_83_axes_0"), val = tensor<int32, [1]>([1])];
-            tensor<int32, [1]> seq_len_1_cast_fp16_to_int32 = cast(dtype = cast_2_dtype_0, x = seq_len_1_cast_fp16)[name = tensor<string, []>("cast_4")];
+            tensor<int32, [1]> seq_len_1_cast_fp16_to_int32 = cast(dtype = seq_len_dtype_0, x = seq_len_1_cast_fp16)[name = tensor<string, []>("cast_4")];
             tensor<int32, [1, 1]> var_83 = expand_dims(axes = var_83_axes_0, x = seq_len_1_cast_fp16_to_int32)[name = tensor<string, []>("op_83")];
             tensor<bool, [1, 1501]> valid_mask = less(x = var_80, y = var_83)[name = tensor<string, []>("valid_mask")];
             tensor<int32, [1]> var_85_axes_0 = const()[name = tensor<string, []>("op_85_axes_0"), val = tensor<int32, [1]>([1])];
             tensor<bool, [1, 1, 1501]> var_85 = expand_dims(axes = var_85_axes_0, x = valid_mask)[name = tensor<string, []>("op_85")];
             tensor<int32, [3]> var_85_after_broadcast_reps_0 = const()[name = tensor<string, []>("op_85_after_broadcast_reps_0"), val = tensor<int32, [3]>([1, 128, 1])];
             tensor<bool, [1, 128, 1501]> var_85_after_broadcast = tile(reps = var_85_after_broadcast_reps_0, x = var_85)[name = tensor<string, []>("op_85_after_broadcast")];
-            tensor<fp16, [1, 128, 1501]> op_8_after_broadcast_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [192128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(298048))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(490240))), name = tensor<string, []>("op_8_after_broadcast_to_fp16_palettized"), shape = tensor<uint32, [3]>([1, 128, 1501])];
-            tensor<fp16, [1, 128, 1501]> var_86_cast_fp16 = select(a = x_13_cast_fp16, b = op_8_after_broadcast_to_fp16_palettized, cond = var_85_after_broadcast)[name = tensor<string, []>("op_86_cast_fp16")];
+            tensor<fp16, [1, 128, 1501]> op_8_after_broadcast_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("op_8_after_broadcast_to_fp16_quantized"), quantized_data = tensor<int8, [1, 128, 1501]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(298880))), scale = tensor<fp16, []>(0x0p+0), zero_point = tensor<int8, []>(0)];
+            tensor<fp16, [1, 128, 1501]> var_86_cast_fp16 = select(a = x_13_cast_fp16, b = op_8_after_broadcast_to_fp16_quantized, cond = var_85_after_broadcast)[name = tensor<string, []>("op_86_cast_fp16")];
             tensor<int32, [1]> x_mean_numerator_axes_0 = const()[name = tensor<string, []>("x_mean_numerator_axes_0"), val = tensor<int32, [1]>([2])];
             tensor<bool, []> x_mean_numerator_keep_dims_0 = const()[name = tensor<string, []>("x_mean_numerator_keep_dims_0"), val = tensor<bool, []>(false)];
             tensor<fp16, [1, 128]> x_mean_numerator_cast_fp16 = reduce_sum(axes = x_mean_numerator_axes_0, keep_dims = x_mean_numerator_keep_dims_0, x = var_86_cast_fp16)[name = tensor<string, []>("x_mean_numerator_cast_fp16")];
             tensor<int32, [1]> x_mean_denominator_axes_0 = const()[name = tensor<string, []>("x_mean_denominator_axes_0"), val = tensor<int32, [1]>([1])];
             tensor<bool, []> x_mean_denominator_keep_dims_0 = const()[name = tensor<string, []>("x_mean_denominator_keep_dims_0"), val = tensor<bool, []>(false)];
-            tensor<string, []> cast_5_to_fp16_dtype_0 = const()[name = tensor<string, []>("cast_5_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
-            tensor<fp16, [1, 1501]> valid_mask_to_fp16 = cast(dtype = cast_5_to_fp16_dtype_0, x = valid_mask)[name = tensor<string, []>("cast_3")];
+            tensor<string, []> cast_2_to_fp16_dtype_0 = const()[name = tensor<string, []>("cast_2_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
+            tensor<fp16, [1, 1501]> valid_mask_to_fp16 = cast(dtype = cast_2_to_fp16_dtype_0, x = valid_mask)[name = tensor<string, []>("cast_3")];
             tensor<fp16, [1]> x_mean_denominator_cast_fp16 = reduce_sum(axes = x_mean_denominator_axes_0, keep_dims = x_mean_denominator_keep_dims_0, x = valid_mask_to_fp16)[name = tensor<string, []>("x_mean_denominator_cast_fp16")];
             tensor<int32, [1]> var_91_axes_0 = const()[name = tensor<string, []>("op_91_axes_0"), val = tensor<int32, [1]>([1])];
             tensor<fp16, [1, 1]> var_91_cast_fp16 = expand_dims(axes = var_91_axes_0, x = x_mean_denominator_cast_fp16)[name = tensor<string, []>("op_91_cast_fp16")];
@@ -99,7 +99,7 @@ program(1.0)
             tensor<int32, [1]> var_94_axes_0 = const()[name = tensor<string, []>("op_94_axes_0"), val = tensor<int32, [1]>([2])];
             tensor<fp16, [1, 128, 1]> var_94_cast_fp16 = expand_dims(axes = var_94_axes_0, x = x_mean_cast_fp16)[name = tensor<string, []>("op_94_cast_fp16")];
             tensor<fp16, [1, 128, 1501]> var_95_cast_fp16 = sub(x = x_13_cast_fp16, y = var_94_cast_fp16)[name = tensor<string, []>("op_95_cast_fp16")];
-            tensor<fp16, [1, 128, 1501]> var_96_cast_fp16 = select(a = var_95_cast_fp16, b = op_8_after_broadcast_to_fp16_palettized, cond = var_85_after_broadcast)[name = tensor<string, []>("op_96_cast_fp16")];
+            tensor<fp16, [1, 128, 1501]> var_96_cast_fp16 = select(a = var_95_cast_fp16, b = op_8_after_broadcast_to_fp16_quantized, cond = var_85_after_broadcast)[name = tensor<string, []>("op_96_cast_fp16")];
             tensor<fp16, []> var_15_promoted_1_to_fp16 = const()[name = tensor<string, []>("op_15_promoted_1_to_fp16"), val = tensor<fp16, []>(0x1p+1)];
             tensor<fp16, [1, 128, 1501]> var_97_cast_fp16 = pow(x = var_96_cast_fp16, y = var_15_promoted_1_to_fp16)[name = tensor<string, []>("op_97_cast_fp16")];
             tensor<int32, [1]> var_99_axes_0 = const()[name = tensor<string, []>("op_99_axes_0"), val = tensor<int32, [1]>([2])];
@@ -121,9 +121,9 @@ program(1.0)
             tensor<fp16, [1, 128, 1501]> processed_signal_cast_fp16 = select(a = var_8_to_fp16, b = x_15_cast_fp16, cond = var_117)[name = tensor<string, []>("processed_signal_cast_fp16")];
             tensor<int32, []> var_138 = const()[name = tensor<string, []>("op_138"), val = tensor<int32, []>(-1)];
             tensor<int32, [3]> x_17_perm_0 = const()[name = tensor<string, []>("x_17_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
-            tensor<string, []> cast_17_to_fp16_dtype_0 = const()[name = tensor<string, []>("cast_17_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
+            tensor<string, []> var_215_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_215_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
             tensor<fp16, []> var_216_promoted_to_fp16 = const()[name = tensor<string, []>("op_216_promoted_to_fp16"), val = tensor<fp16, []>(-0x1p+0)];
-            tensor<fp16, [1]> seq_len_1_cast_fp16_to_int32_to_fp16 = cast(dtype = cast_17_to_fp16_dtype_0, x = seq_len_1_cast_fp16_to_int32)[name = tensor<string, []>("cast_2")];
+            tensor<fp16, [1]> seq_len_1_cast_fp16_to_int32_to_fp16 = cast(dtype = var_215_to_fp16_dtype_0, x = seq_len_1_cast_fp16_to_int32)[name = tensor<string, []>("cast_2")];
             tensor<fp16, [1]> var_217_cast_fp16 = add(x = seq_len_1_cast_fp16_to_int32_to_fp16, y = var_216_promoted_to_fp16)[name = tensor<string, []>("op_217_cast_fp16")];
             tensor<fp16, []> _inversed_219_y_0_to_fp16 = const()[name = tensor<string, []>("_inversed_219_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1]> _inversed_219_cast_fp16 = mul(x = var_217_cast_fp16, y = _inversed_219_y_0_to_fp16)[name = tensor<string, []>("_inversed_219_cast_fp16")];
@@ -152,55 +152,55 @@ program(1.0)
             tensor<int32, [2]> input_11_strides_0 = const()[name = tensor<string, []>("input_11_strides_0"), val = tensor<int32, [2]>([2, 2])];
             tensor<int32, [2]> input_11_dilations_0 = const()[name = tensor<string, []>("input_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, []> input_11_groups_0 = const()[name = tensor<string, []>("input_11_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [256, 1, 3, 3]> encoder_module_pre_encode_conv_0_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(490816))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(493184))), name = tensor<string, []>("encoder_module_pre_encode_conv_0_weight_to_fp16_palettized"), shape = tensor<uint32, [4]>([256, 1, 3, 3])];
-            tensor<fp16, [256]> encoder_module_pre_encode_conv_0_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_pre_encode_conv_0_bias_to_fp16"), val = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(493760)))];
-            tensor<fp16, [1, 256, 751, 64]> input_11_cast_fp16 = conv(bias = encoder_module_pre_encode_conv_0_bias_to_fp16, dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = encoder_module_pre_encode_conv_0_weight_to_fp16_palettized, x = input_9_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
+            tensor<fp16, [256, 1, 3, 3]> encoder_module_pre_encode_conv_0_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_pre_encode_conv_0_weight_to_fp16_quantized"), quantized_data = tensor<int8, [256, 1, 3, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(491072))), scale = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(493760))), zero_point = tensor<int8, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(493440)))];
+            tensor<fp16, [256]> encoder_module_pre_encode_conv_0_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_pre_encode_conv_0_bias_to_fp16"), val = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(494336)))];
+            tensor<fp16, [1, 256, 751, 64]> input_11_cast_fp16 = conv(bias = encoder_module_pre_encode_conv_0_bias_to_fp16, dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = encoder_module_pre_encode_conv_0_weight_to_fp16_quantized, x = input_9_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
             tensor<fp16, [1, 256, 751, 64]> input_13_cast_fp16 = relu(x = input_11_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
             tensor<string, []> input_15_pad_type_0 = const()[name = tensor<string, []>("input_15_pad_type_0"), val = tensor<string, []>("custom")];
             tensor<int32, [4]> input_15_pad_0 = const()[name = tensor<string, []>("input_15_pad_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
             tensor<int32, [2]> input_15_strides_0 = const()[name = tensor<string, []>("input_15_strides_0"), val = tensor<int32, [2]>([2, 2])];
             tensor<int32, []> input_15_groups_0 = const()[name = tensor<string, []>("input_15_groups_0"), val = tensor<int32, []>(256)];
             tensor<int32, [2]> input_15_dilations_0 = const()[name = tensor<string, []>("input_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<fp16, [256, 1, 3, 3]> encoder_module_pre_encode_conv_2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(494336))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(496704))), name = tensor<string, []>("encoder_module_pre_encode_conv_2_weight_to_fp16_palettized"), shape = tensor<uint32, [4]>([256, 1, 3, 3])];
-            tensor<fp16, [256]> encoder_module_pre_encode_conv_2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_pre_encode_conv_2_bias_to_fp16"), val = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(497280)))];
-            tensor<fp16, [1, 256, 376, 32]> input_15_cast_fp16 = conv(bias = encoder_module_pre_encode_conv_2_bias_to_fp16, dilations = input_15_dilations_0, groups = input_15_groups_0, pad = input_15_pad_0, pad_type = input_15_pad_type_0, strides = input_15_strides_0, weight = encoder_module_pre_encode_conv_2_weight_to_fp16_palettized, x = input_13_cast_fp16)[name = tensor<string, []>("input_15_cast_fp16")];
+            tensor<fp16, [256, 1, 3, 3]> encoder_module_pre_encode_conv_2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_pre_encode_conv_2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [256, 1, 3, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(494912))), scale = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(497600))), zero_point = tensor<int8, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(497280)))];
+            tensor<fp16, [256]> encoder_module_pre_encode_conv_2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_pre_encode_conv_2_bias_to_fp16"), val = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(498176)))];
+            tensor<fp16, [1, 256, 376, 32]> input_15_cast_fp16 = conv(bias = encoder_module_pre_encode_conv_2_bias_to_fp16, dilations = input_15_dilations_0, groups = input_15_groups_0, pad = input_15_pad_0, pad_type = input_15_pad_type_0, strides = input_15_strides_0, weight = encoder_module_pre_encode_conv_2_weight_to_fp16_quantized, x = input_13_cast_fp16)[name = tensor<string, []>("input_15_cast_fp16")];
             tensor<string, []> input_17_pad_type_0 = const()[name = tensor<string, []>("input_17_pad_type_0"), val = tensor<string, []>("valid")];
             tensor<int32, [2]> input_17_strides_0 = const()[name = tensor<string, []>("input_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [4]> input_17_pad_0 = const()[name = tensor<string, []>("input_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<int32, [2]> input_17_dilations_0 = const()[name = tensor<string, []>("input_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, []> input_17_groups_0 = const()[name = tensor<string, []>("input_17_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [256, 256, 1, 1]> encoder_module_pre_encode_conv_3_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [65536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(497856))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(563456))), name = tensor<string, []>("encoder_module_pre_encode_conv_3_weight_to_fp16_palettized"), shape = tensor<uint32, [4]>([256, 256, 1, 1])];
-            tensor<fp16, [256]> encoder_module_pre_encode_conv_3_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_pre_encode_conv_3_bias_to_fp16"), val = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(564032)))];
-            tensor<fp16, [1, 256, 376, 32]> input_17_cast_fp16 = conv(bias = encoder_module_pre_encode_conv_3_bias_to_fp16, dilations = input_17_dilations_0, groups = input_17_groups_0, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = input_17_strides_0, weight = encoder_module_pre_encode_conv_3_weight_to_fp16_palettized, x = input_15_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
+            tensor<fp16, [256, 256, 1, 1]> encoder_module_pre_encode_conv_3_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_pre_encode_conv_3_weight_to_fp16_quantized"), quantized_data = tensor<int8, [256, 256, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(498752))), scale = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(564672))), zero_point = tensor<int8, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(564352)))];
+            tensor<fp16, [256]> encoder_module_pre_encode_conv_3_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_pre_encode_conv_3_bias_to_fp16"), val = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(565248)))];
+            tensor<fp16, [1, 256, 376, 32]> input_17_cast_fp16 = conv(bias = encoder_module_pre_encode_conv_3_bias_to_fp16, dilations = input_17_dilations_0, groups = input_17_groups_0, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = input_17_strides_0, weight = encoder_module_pre_encode_conv_3_weight_to_fp16_quantized, x = input_15_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
             tensor<fp16, [1, 256, 376, 32]> input_19_cast_fp16 = relu(x = input_17_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
             tensor<string, []> input_21_pad_type_0 = const()[name = tensor<string, []>("input_21_pad_type_0"), val = tensor<string, []>("custom")];
             tensor<int32, [4]> input_21_pad_0 = const()[name = tensor<string, []>("input_21_pad_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
             tensor<int32, [2]> input_21_strides_0 = const()[name = tensor<string, []>("input_21_strides_0"), val = tensor<int32, [2]>([2, 2])];
             tensor<int32, []> input_21_groups_0 = const()[name = tensor<string, []>("input_21_groups_0"), val = tensor<int32, []>(256)];
             tensor<int32, [2]> input_21_dilations_0 = const()[name = tensor<string, []>("input_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
-            tensor<fp16, [256, 1, 3, 3]> encoder_module_pre_encode_conv_5_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(564608))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(566976))), name = tensor<string, []>("encoder_module_pre_encode_conv_5_weight_to_fp16_palettized"), shape = tensor<uint32, [4]>([256, 1, 3, 3])];
-            tensor<fp16, [256]> encoder_module_pre_encode_conv_5_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_pre_encode_conv_5_bias_to_fp16"), val = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(567552)))];
-            tensor<fp16, [1, 256, 188, 16]> input_21_cast_fp16 = conv(bias = encoder_module_pre_encode_conv_5_bias_to_fp16, dilations = input_21_dilations_0, groups = input_21_groups_0, pad = input_21_pad_0, pad_type = input_21_pad_type_0, strides = input_21_strides_0, weight = encoder_module_pre_encode_conv_5_weight_to_fp16_palettized, x = input_19_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
+            tensor<fp16, [256, 1, 3, 3]> encoder_module_pre_encode_conv_5_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_pre_encode_conv_5_weight_to_fp16_quantized"), quantized_data = tensor<int8, [256, 1, 3, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(565824))), scale = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(568512))), zero_point = tensor<int8, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(568192)))];
+            tensor<fp16, [256]> encoder_module_pre_encode_conv_5_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_pre_encode_conv_5_bias_to_fp16"), val = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(569088)))];
+            tensor<fp16, [1, 256, 188, 16]> input_21_cast_fp16 = conv(bias = encoder_module_pre_encode_conv_5_bias_to_fp16, dilations = input_21_dilations_0, groups = input_21_groups_0, pad = input_21_pad_0, pad_type = input_21_pad_type_0, strides = input_21_strides_0, weight = encoder_module_pre_encode_conv_5_weight_to_fp16_quantized, x = input_19_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
             tensor<string, []> input_23_pad_type_0 = const()[name = tensor<string, []>("input_23_pad_type_0"), val = tensor<string, []>("valid")];
             tensor<int32, [2]> input_23_strides_0 = const()[name = tensor<string, []>("input_23_strides_0"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [4]> input_23_pad_0 = const()[name = tensor<string, []>("input_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<int32, [2]> input_23_dilations_0 = const()[name = tensor<string, []>("input_23_dilations_0"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, []> input_23_groups_0 = const()[name = tensor<string, []>("input_23_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [256, 256, 1, 1]> encoder_module_pre_encode_conv_6_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [65536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(568128))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(633728))), name = tensor<string, []>("encoder_module_pre_encode_conv_6_weight_to_fp16_palettized"), shape = tensor<uint32, [4]>([256, 256, 1, 1])];
-            tensor<fp16, [256]> encoder_module_pre_encode_conv_6_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_pre_encode_conv_6_bias_to_fp16"), val = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(634304)))];
-            tensor<fp16, [1, 256, 188, 16]> input_23_cast_fp16 = conv(bias = encoder_module_pre_encode_conv_6_bias_to_fp16, dilations = input_23_dilations_0, groups = input_23_groups_0, pad = input_23_pad_0, pad_type = input_23_pad_type_0, strides = input_23_strides_0, weight = encoder_module_pre_encode_conv_6_weight_to_fp16_palettized, x = input_21_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
+            tensor<fp16, [256, 256, 1, 1]> encoder_module_pre_encode_conv_6_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_pre_encode_conv_6_weight_to_fp16_quantized"), quantized_data = tensor<int8, [256, 256, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(569664))), scale = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(635584))), zero_point = tensor<int8, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(635264)))];
+            tensor<fp16, [256]> encoder_module_pre_encode_conv_6_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_pre_encode_conv_6_bias_to_fp16"), val = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(636160)))];
+            tensor<fp16, [1, 256, 188, 16]> input_23_cast_fp16 = conv(bias = encoder_module_pre_encode_conv_6_bias_to_fp16, dilations = input_23_dilations_0, groups = input_23_groups_0, pad = input_23_pad_0, pad_type = input_23_pad_type_0, strides = input_23_strides_0, weight = encoder_module_pre_encode_conv_6_weight_to_fp16_quantized, x = input_21_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
             tensor<fp16, [1, 256, 188, 16]> x_19_cast_fp16 = relu(x = input_23_cast_fp16)[name = tensor<string, []>("x_19_cast_fp16")];
             tensor<int32, [4]> var_286_perm_0 = const()[name = tensor<string, []>("op_286_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
             tensor<int32, [3]> var_287 = const()[name = tensor<string, []>("op_287"), val = tensor<int32, [3]>([1, 188, -1])];
             tensor<fp16, [1, 188, 256, 16]> var_286_cast_fp16 = transpose(perm = var_286_perm_0, x = x_19_cast_fp16)[name = tensor<string, []>("transpose_314")];
             tensor<fp16, [1, 188, 4096]> input_25_cast_fp16 = reshape(shape = var_287, x = var_286_cast_fp16)[name = tensor<string, []>("input_25_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_pre_encode_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(634880))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4829248))), name = tensor<string, []>("encoder_module_pre_encode_out_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1024]> encoder_module_pre_encode_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_pre_encode_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4829824)))];
-            tensor<fp16, [1, 188, 1024]> linear_0_cast_fp16 = linear(bias = encoder_module_pre_encode_out_bias_to_fp16, weight = encoder_module_pre_encode_out_weight_to_fp16_palettized, x = input_25_cast_fp16)[name = tensor<string, []>("linear_0_cast_fp16")];
-            tensor<string, []> cast_28_dtype_0 = const()[name = tensor<string, []>("cast_28_dtype_0"), val = tensor<string, []>("int32")];
+            tensor<fp16, [1024, 4096]> encoder_module_pre_encode_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_pre_encode_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(636736))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4832192))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4831104)))];
+            tensor<fp16, [1024]> encoder_module_pre_encode_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_pre_encode_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4834304)))];
+            tensor<fp16, [1, 188, 1024]> linear_0_cast_fp16 = linear(bias = encoder_module_pre_encode_out_bias_to_fp16, weight = encoder_module_pre_encode_out_weight_to_fp16_quantized, x = input_25_cast_fp16)[name = tensor<string, []>("linear_0_cast_fp16")];
+            tensor<string, []> padding_length_dtype_0 = const()[name = tensor<string, []>("padding_length_dtype_0"), val = tensor<string, []>("int32")];
             tensor<int32, [1, 188]> expand_dims_3 = const()[name = tensor<string, []>("expand_dims_3"), val = tensor<int32, [1, 188]>([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187]])];
             tensor<int32, [1]> var_325_axes_0 = const()[name = tensor<string, []>("op_325_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<int32, [1]> encoder_length = cast(dtype = cast_28_dtype_0, x = lengths_cast_fp16)[name = tensor<string, []>("cast_1")];
+            tensor<int32, [1]> encoder_length = cast(dtype = padding_length_dtype_0, x = lengths_cast_fp16)[name = tensor<string, []>("cast_1")];
             tensor<int32, [1, 1]> var_325 = expand_dims(axes = var_325_axes_0, x = encoder_length)[name = tensor<string, []>("op_325")];
             tensor<bool, [1, 188]> pad_mask_1 = less(x = expand_dims_3, y = var_325)[name = tensor<string, []>("pad_mask_1")];
             tensor<int32, [1]> var_327_axes_0 = const()[name = tensor<string, []>("op_327_axes_0"), val = tensor<int32, [1]>([1])];
@@ -215,47 +215,47 @@ program(1.0)
             tensor<bool, [1, 188, 188]> mask_5 = logical_not(x = att_mask)[name = tensor<string, []>("mask_5")];
             tensor<bool, [1, 188]> pad_mask = logical_not(x = pad_mask_1)[name = tensor<string, []>("pad_mask")];
             tensor<int32, [1]> input_29_axes_0 = const()[name = tensor<string, []>("input_29_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_0_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_0_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4831936)))];
-            tensor<fp16, [1024]> encoder_module_layers_0_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_0_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4834048)))];
+            tensor<fp16, [1024]> encoder_module_layers_0_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_0_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4836416)))];
+            tensor<fp16, [1024]> encoder_module_layers_0_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_0_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4838528)))];
             tensor<fp16, []> var_156_to_fp16 = const()[name = tensor<string, []>("op_156_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
             tensor<fp16, [1, 188, 1024]> input_29_cast_fp16 = layer_norm(axes = input_29_axes_0, beta = encoder_module_layers_0_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_0_norm_feed_forward1_weight_to_fp16, x = linear_0_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_0_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4836160))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9030528))), name = tensor<string, []>("encoder_module_layers_0_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [4096]> linear_1_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_1_bias_0_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9031104)))];
-            tensor<fp16, [1, 188, 4096]> linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_0_feed_forward1_linear1_weight_to_fp16_palettized, x = input_29_cast_fp16)[name = tensor<string, []>("linear_1_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_0_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_0_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4840640))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9039168))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9035008)))];
+            tensor<fp16, [4096]> linear_1_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_1_bias_0_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9047424)))];
+            tensor<fp16, [1, 188, 4096]> linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_0_feed_forward1_linear1_weight_to_fp16_quantized, x = input_29_cast_fp16)[name = tensor<string, []>("linear_1_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_33_cast_fp16 = silu(x = linear_1_cast_fp16)[name = tensor<string, []>("input_33_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_0_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9039360))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13233728))), name = tensor<string, []>("encoder_module_layers_0_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1024]> linear_2_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_2_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13234304)))];
-            tensor<fp16, [1, 188, 1024]> linear_2_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_0_feed_forward1_linear2_weight_to_fp16_palettized, x = input_33_cast_fp16)[name = tensor<string, []>("linear_2_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_0_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_0_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9055680))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13251136))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13250048)))];
+            tensor<fp16, [1024]> linear_2_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_2_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13253248)))];
+            tensor<fp16, [1, 188, 1024]> linear_2_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_0_feed_forward1_linear2_weight_to_fp16_quantized, x = input_33_cast_fp16)[name = tensor<string, []>("linear_2_cast_fp16")];
             tensor<fp16, []> var_361_to_fp16 = const()[name = tensor<string, []>("op_361_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_362_cast_fp16 = mul(x = linear_2_cast_fp16, y = var_361_to_fp16)[name = tensor<string, []>("op_362_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_39_cast_fp16 = add(x = linear_0_cast_fp16, y = var_362_cast_fp16)[name = tensor<string, []>("input_39_cast_fp16")];
             tensor<int32, [1]> query_1_axes_0 = const()[name = tensor<string, []>("query_1_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_0_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_0_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13236416)))];
-            tensor<fp16, [1024]> encoder_module_layers_0_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_0_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13238528)))];
+            tensor<fp16, [1024]> encoder_module_layers_0_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_0_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13255360)))];
+            tensor<fp16, [1024]> encoder_module_layers_0_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_0_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13257472)))];
             tensor<fp16, [1, 188, 1024]> query_1_cast_fp16 = layer_norm(axes = query_1_axes_0, beta = encoder_module_layers_0_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_0_norm_self_att_weight_to_fp16, x = input_39_cast_fp16)[name = tensor<string, []>("query_1_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_0_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13240640))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14289280))), name = tensor<string, []>("encoder_module_layers_0_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_3_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_0_self_attn_linear_q_weight_to_fp16_palettized, x = query_1_cast_fp16)[name = tensor<string, []>("linear_3_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_0_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_0_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13259584))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14309312))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14308224)))];
+            tensor<fp16, [1, 188, 1024]> linear_3_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_0_self_attn_linear_q_weight_to_fp16_quantized, x = query_1_cast_fp16)[name = tensor<string, []>("linear_3_cast_fp16")];
             tensor<int32, [4]> var_378 = const()[name = tensor<string, []>("op_378"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> q_1_cast_fp16 = reshape(shape = var_378, x = linear_3_cast_fp16)[name = tensor<string, []>("q_1_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_0_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14289856))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15338496))), name = tensor<string, []>("encoder_module_layers_0_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_4_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_0_self_attn_linear_k_weight_to_fp16_palettized, x = query_1_cast_fp16)[name = tensor<string, []>("linear_4_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_0_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_0_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14311424))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15361152))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15360064)))];
+            tensor<fp16, [1, 188, 1024]> linear_4_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_0_self_attn_linear_k_weight_to_fp16_quantized, x = query_1_cast_fp16)[name = tensor<string, []>("linear_4_cast_fp16")];
             tensor<int32, [4]> var_382 = const()[name = tensor<string, []>("op_382"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> k_1_cast_fp16 = reshape(shape = var_382, x = linear_4_cast_fp16)[name = tensor<string, []>("k_1_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_0_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15339072))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16387712))), name = tensor<string, []>("encoder_module_layers_0_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_5_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_0_self_attn_linear_v_weight_to_fp16_palettized, x = query_1_cast_fp16)[name = tensor<string, []>("linear_5_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_0_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_0_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15363264))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16412992))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16411904)))];
+            tensor<fp16, [1, 188, 1024]> linear_5_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_0_self_attn_linear_v_weight_to_fp16_quantized, x = query_1_cast_fp16)[name = tensor<string, []>("linear_5_cast_fp16")];
             tensor<int32, [4]> var_386 = const()[name = tensor<string, []>("op_386"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> v_1_cast_fp16 = reshape(shape = var_386, x = linear_5_cast_fp16)[name = tensor<string, []>("v_1_cast_fp16")];
             tensor<int32, [4]> value_5_perm_0 = const()[name = tensor<string, []>("value_5_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
-            tensor<fp16, [8, 128]> encoder_module_layers_0_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_0_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16388288)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_0_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_0_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16415104)))];
             tensor<fp16, [1, 188, 8, 128]> var_398_cast_fp16 = add(x = q_1_cast_fp16, y = encoder_module_layers_0_self_attn_pos_bias_u_to_fp16)[name = tensor<string, []>("op_398_cast_fp16")];
-            tensor<fp16, [8, 128]> encoder_module_layers_0_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_0_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16390400)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_0_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_0_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16417216)))];
             tensor<fp16, [1, 188, 8, 128]> var_400_cast_fp16 = add(x = q_1_cast_fp16, y = encoder_module_layers_0_self_attn_pos_bias_v_to_fp16)[name = tensor<string, []>("op_400_cast_fp16")];
             tensor<int32, [4]> q_with_bias_v_1_perm_0 = const()[name = tensor<string, []>("q_with_bias_v_1_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
             tensor<bool, []> x_23_transpose_x_0 = const()[name = tensor<string, []>("x_23_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> x_23_transpose_y_0 = const()[name = tensor<string, []>("x_23_transpose_y_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 8, 128, 375]> op_402_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [384000]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16392512))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16776576))), name = tensor<string, []>("op_402_to_fp16_palettized"), shape = tensor<uint32, [4]>([1, 8, 128, 375])];
+            tensor<fp16, [1, 8, 128, 375]> op_402_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(3), name = tensor<string, []>("op_402_to_fp16_quantized"), quantized_data = tensor<int8, [1, 8, 128, 375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16419328))), scale = tensor<fp16, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16803840))), zero_point = tensor<int8, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16803392)))];
             tensor<fp16, [1, 8, 188, 128]> q_with_bias_v_1_cast_fp16 = transpose(perm = q_with_bias_v_1_perm_0, x = var_400_cast_fp16)[name = tensor<string, []>("transpose_312")];
-            tensor<fp16, [1, 8, 188, 375]> x_23_cast_fp16 = matmul(transpose_x = x_23_transpose_x_0, transpose_y = x_23_transpose_y_0, x = q_with_bias_v_1_cast_fp16, y = op_402_to_fp16_palettized)[name = tensor<string, []>("x_23_cast_fp16")];
+            tensor<fp16, [1, 8, 188, 375]> x_23_cast_fp16 = matmul(transpose_x = x_23_transpose_x_0, transpose_y = x_23_transpose_y_0, x = q_with_bias_v_1_cast_fp16, y = op_402_to_fp16_quantized)[name = tensor<string, []>("x_23_cast_fp16")];
             tensor<int32, [8]> x_25_pad_0 = const()[name = tensor<string, []>("x_25_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 1, 0])];
             tensor<string, []> x_25_mode_0 = const()[name = tensor<string, []>("x_25_mode_0"), val = tensor<string, []>("constant")];
             tensor<fp16, []> const_29_to_fp16 = const()[name = tensor<string, []>("const_29_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
@@ -297,12 +297,12 @@ program(1.0)
             tensor<int32, [3]> var_435 = const()[name = tensor<string, []>("op_435"), val = tensor<int32, [3]>([1, -1, 1024])];
             tensor<fp16, [1, 188, 8, 128]> var_434_cast_fp16 = transpose(perm = var_434_perm_0, x = x_29_cast_fp16)[name = tensor<string, []>("transpose_308")];
             tensor<fp16, [1, 188, 1024]> input_43_cast_fp16 = reshape(shape = var_435, x = var_434_cast_fp16)[name = tensor<string, []>("input_43_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_0_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16777152))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17825792))), name = tensor<string, []>("encoder_module_layers_0_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_7_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_0_self_attn_linear_out_weight_to_fp16_palettized, x = input_43_cast_fp16)[name = tensor<string, []>("linear_7_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_0_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_0_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16804672))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17854400))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17853312)))];
+            tensor<fp16, [1, 188, 1024]> linear_7_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_0_self_attn_linear_out_weight_to_fp16_quantized, x = input_43_cast_fp16)[name = tensor<string, []>("linear_7_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_47_cast_fp16 = add(x = input_39_cast_fp16, y = linear_7_cast_fp16)[name = tensor<string, []>("input_47_cast_fp16")];
             tensor<int32, [1]> x_33_axes_0 = const()[name = tensor<string, []>("x_33_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_0_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_0_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17826368)))];
-            tensor<fp16, [1024]> encoder_module_layers_0_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_0_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17828480)))];
+            tensor<fp16, [1024]> encoder_module_layers_0_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_0_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17856512)))];
+            tensor<fp16, [1024]> encoder_module_layers_0_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_0_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17858624)))];
             tensor<fp16, [1, 188, 1024]> x_33_cast_fp16 = layer_norm(axes = x_33_axes_0, beta = encoder_module_layers_0_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_0_norm_conv_weight_to_fp16, x = input_47_cast_fp16)[name = tensor<string, []>("x_33_cast_fp16")];
             tensor<int32, [3]> input_49_perm_0 = const()[name = tensor<string, []>("input_49_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<string, []> input_51_pad_type_0 = const()[name = tensor<string, []>("input_51_pad_type_0"), val = tensor<string, []>("valid")];
@@ -310,9 +310,9 @@ program(1.0)
             tensor<int32, [2]> input_51_pad_0 = const()[name = tensor<string, []>("input_51_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_51_dilations_0 = const()[name = tensor<string, []>("input_51_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> input_51_groups_0 = const()[name = tensor<string, []>("input_51_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_0_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17830592))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(19927808))), name = tensor<string, []>("encoder_module_layers_0_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([2048, 1024, 1])];
+            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_0_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_0_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17860736))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(19960064))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(19957952)))];
             tensor<fp16, [1, 1024, 188]> input_49_cast_fp16 = transpose(perm = input_49_perm_0, x = x_33_cast_fp16)[name = tensor<string, []>("transpose_307")];
-            tensor<fp16, [1, 2048, 188]> input_51_cast_fp16 = conv(dilations = input_51_dilations_0, groups = input_51_groups_0, pad = input_51_pad_0, pad_type = input_51_pad_type_0, strides = input_51_strides_0, weight = encoder_module_layers_0_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_49_cast_fp16)[name = tensor<string, []>("input_51_cast_fp16")];
+            tensor<fp16, [1, 2048, 188]> input_51_cast_fp16 = conv(dilations = input_51_dilations_0, groups = input_51_groups_0, pad = input_51_pad_0, pad_type = input_51_pad_type_0, strides = input_51_strides_0, weight = encoder_module_layers_0_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_49_cast_fp16)[name = tensor<string, []>("input_51_cast_fp16")];
             tensor<int32, []> x_35_split_num_splits_0 = const()[name = tensor<string, []>("x_35_split_num_splits_0"), val = tensor<int32, []>(2)];
             tensor<int32, []> x_35_split_axis_0 = const()[name = tensor<string, []>("x_35_split_axis_0"), val = tensor<int32, []>(1)];
             tensor<fp16, [1, 1024, 188]> x_35_split_cast_fp16_0, tensor<fp16, [1, 1024, 188]> x_35_split_cast_fp16_1 = split(axis = x_35_split_axis_0, num_splits = x_35_split_num_splits_0, x = input_51_cast_fp16)[name = tensor<string, []>("x_35_split_cast_fp16")];
@@ -330,75 +330,75 @@ program(1.0)
             tensor<int32, [1]> input_57_strides_0 = const()[name = tensor<string, []>("input_57_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> input_57_pad_0 = const()[name = tensor<string, []>("input_57_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_57_dilations_0 = const()[name = tensor<string, []>("input_57_dilations_0"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1024, 1, 9]> const_263_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [9216]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(19928384))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(19937664))), name = tensor<string, []>("const_263_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1, 9])];
-            tensor<fp16, [1024]> const_264_to_fp16 = const()[name = tensor<string, []>("const_264_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(19938240)))];
-            tensor<fp16, [1, 1024, 188]> input_59_cast_fp16 = conv(bias = const_264_to_fp16, dilations = input_57_dilations_0, groups = input_57_groups_0, pad = input_57_pad_0, pad_type = input_57_pad_type_0, strides = input_57_strides_0, weight = const_263_to_fp16_palettized, x = input_55_cast_fp16)[name = tensor<string, []>("input_59_cast_fp16")];
+            tensor<fp16, [1024, 1, 9]> const_263_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("const_263_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1, 9]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(19964224))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(19974592))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(19973504)))];
+            tensor<fp16, [1024]> const_264_to_fp16 = const()[name = tensor<string, []>("const_264_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(19976704)))];
+            tensor<fp16, [1, 1024, 188]> input_59_cast_fp16 = conv(bias = const_264_to_fp16, dilations = input_57_dilations_0, groups = input_57_groups_0, pad = input_57_pad_0, pad_type = input_57_pad_type_0, strides = input_57_strides_0, weight = const_263_to_fp16_quantized, x = input_55_cast_fp16)[name = tensor<string, []>("input_59_cast_fp16")];
             tensor<fp16, [1, 1024, 188]> input_61_cast_fp16 = silu(x = input_59_cast_fp16)[name = tensor<string, []>("input_61_cast_fp16")];
             tensor<string, []> x_37_pad_type_0 = const()[name = tensor<string, []>("x_37_pad_type_0"), val = tensor<string, []>("valid")];
             tensor<int32, [1]> x_37_strides_0 = const()[name = tensor<string, []>("x_37_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> x_37_pad_0 = const()[name = tensor<string, []>("x_37_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> x_37_dilations_0 = const()[name = tensor<string, []>("x_37_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> x_37_groups_0 = const()[name = tensor<string, []>("x_37_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_0_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(19940352))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20988992))), name = tensor<string, []>("encoder_module_layers_0_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1024, 1])];
-            tensor<fp16, [1, 1024, 188]> x_37_cast_fp16 = conv(dilations = x_37_dilations_0, groups = x_37_groups_0, pad = x_37_pad_0, pad_type = x_37_pad_type_0, strides = x_37_strides_0, weight = encoder_module_layers_0_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_61_cast_fp16)[name = tensor<string, []>("x_37_cast_fp16")];
+            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_0_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_0_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(19978816))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21028544))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21027456)))];
+            tensor<fp16, [1, 1024, 188]> x_37_cast_fp16 = conv(dilations = x_37_dilations_0, groups = x_37_groups_0, pad = x_37_pad_0, pad_type = x_37_pad_type_0, strides = x_37_strides_0, weight = encoder_module_layers_0_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_61_cast_fp16)[name = tensor<string, []>("x_37_cast_fp16")];
             tensor<int32, [3]> input_63_perm_0 = const()[name = tensor<string, []>("input_63_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<fp16, [1, 188, 1024]> input_63_cast_fp16 = transpose(perm = input_63_perm_0, x = x_37_cast_fp16)[name = tensor<string, []>("transpose_306")];
             tensor<fp16, [1, 188, 1024]> input_65_cast_fp16 = add(x = input_47_cast_fp16, y = input_63_cast_fp16)[name = tensor<string, []>("input_65_cast_fp16")];
             tensor<int32, [1]> input_67_axes_0 = const()[name = tensor<string, []>("input_67_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_0_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_0_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20989568)))];
-            tensor<fp16, [1024]> encoder_module_layers_0_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_0_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20991680)))];
+            tensor<fp16, [1024]> encoder_module_layers_0_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_0_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21030656)))];
+            tensor<fp16, [1024]> encoder_module_layers_0_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_0_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21032768)))];
             tensor<fp16, [1, 188, 1024]> input_67_cast_fp16 = layer_norm(axes = input_67_axes_0, beta = encoder_module_layers_0_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_0_norm_feed_forward2_weight_to_fp16, x = input_65_cast_fp16)[name = tensor<string, []>("input_67_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_0_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20993792))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25188160))), name = tensor<string, []>("encoder_module_layers_0_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_8_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_0_feed_forward2_linear1_weight_to_fp16_palettized, x = input_67_cast_fp16)[name = tensor<string, []>("linear_8_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_0_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_0_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21034880))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25233408))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25229248)))];
+            tensor<fp16, [1, 188, 4096]> linear_8_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_0_feed_forward2_linear1_weight_to_fp16_quantized, x = input_67_cast_fp16)[name = tensor<string, []>("linear_8_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_71_cast_fp16 = silu(x = linear_8_cast_fp16)[name = tensor<string, []>("input_71_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_0_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25188736))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29383104))), name = tensor<string, []>("encoder_module_layers_0_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_9_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_0_feed_forward2_linear2_weight_to_fp16_palettized, x = input_71_cast_fp16)[name = tensor<string, []>("linear_9_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_0_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_0_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25241664))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29437120))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29436032)))];
+            tensor<fp16, [1, 188, 1024]> linear_9_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_0_feed_forward2_linear2_weight_to_fp16_quantized, x = input_71_cast_fp16)[name = tensor<string, []>("linear_9_cast_fp16")];
             tensor<fp16, []> var_495_to_fp16 = const()[name = tensor<string, []>("op_495_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_496_cast_fp16 = mul(x = linear_9_cast_fp16, y = var_495_to_fp16)[name = tensor<string, []>("op_496_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_77_cast_fp16 = add(x = input_65_cast_fp16, y = var_496_cast_fp16)[name = tensor<string, []>("input_77_cast_fp16")];
             tensor<int32, [1]> input_79_axes_0 = const()[name = tensor<string, []>("input_79_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_0_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_0_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29383680)))];
-            tensor<fp16, [1024]> encoder_module_layers_0_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_0_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29385792)))];
+            tensor<fp16, [1024]> encoder_module_layers_0_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_0_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29439232)))];
+            tensor<fp16, [1024]> encoder_module_layers_0_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_0_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29441344)))];
             tensor<fp16, [1, 188, 1024]> input_79_cast_fp16 = layer_norm(axes = input_79_axes_0, beta = encoder_module_layers_0_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_0_norm_out_weight_to_fp16, x = input_77_cast_fp16)[name = tensor<string, []>("input_79_cast_fp16")];
             tensor<int32, [1]> input_81_axes_0 = const()[name = tensor<string, []>("input_81_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_1_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_1_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29387904)))];
-            tensor<fp16, [1024]> encoder_module_layers_1_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_1_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29390016)))];
+            tensor<fp16, [1024]> encoder_module_layers_1_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_1_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29443456)))];
+            tensor<fp16, [1024]> encoder_module_layers_1_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_1_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29445568)))];
             tensor<fp16, [1, 188, 1024]> input_81_cast_fp16 = layer_norm(axes = input_81_axes_0, beta = encoder_module_layers_1_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_1_norm_feed_forward1_weight_to_fp16, x = input_79_cast_fp16)[name = tensor<string, []>("input_81_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_1_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29392128))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33586496))), name = tensor<string, []>("encoder_module_layers_1_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_10_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_1_feed_forward1_linear1_weight_to_fp16_palettized, x = input_81_cast_fp16)[name = tensor<string, []>("linear_10_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_1_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_1_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29447680))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33646208))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33642048)))];
+            tensor<fp16, [1, 188, 4096]> linear_10_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_1_feed_forward1_linear1_weight_to_fp16_quantized, x = input_81_cast_fp16)[name = tensor<string, []>("linear_10_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_85_cast_fp16 = silu(x = linear_10_cast_fp16)[name = tensor<string, []>("input_85_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_1_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33587072))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37781440))), name = tensor<string, []>("encoder_module_layers_1_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_11_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_1_feed_forward1_linear2_weight_to_fp16_palettized, x = input_85_cast_fp16)[name = tensor<string, []>("linear_11_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_1_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_1_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33654464))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37849920))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37848832)))];
+            tensor<fp16, [1, 188, 1024]> linear_11_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_1_feed_forward1_linear2_weight_to_fp16_quantized, x = input_85_cast_fp16)[name = tensor<string, []>("linear_11_cast_fp16")];
             tensor<fp16, []> var_524_to_fp16 = const()[name = tensor<string, []>("op_524_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_525_cast_fp16 = mul(x = linear_11_cast_fp16, y = var_524_to_fp16)[name = tensor<string, []>("op_525_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_91_cast_fp16 = add(x = input_79_cast_fp16, y = var_525_cast_fp16)[name = tensor<string, []>("input_91_cast_fp16")];
             tensor<int32, [1]> query_3_axes_0 = const()[name = tensor<string, []>("query_3_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_1_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_1_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37782016)))];
-            tensor<fp16, [1024]> encoder_module_layers_1_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_1_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37784128)))];
+            tensor<fp16, [1024]> encoder_module_layers_1_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_1_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37852032)))];
+            tensor<fp16, [1024]> encoder_module_layers_1_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_1_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37854144)))];
             tensor<fp16, [1, 188, 1024]> query_3_cast_fp16 = layer_norm(axes = query_3_axes_0, beta = encoder_module_layers_1_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_1_norm_self_att_weight_to_fp16, x = input_91_cast_fp16)[name = tensor<string, []>("query_3_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_1_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37786240))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38834880))), name = tensor<string, []>("encoder_module_layers_1_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_12_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_1_self_attn_linear_q_weight_to_fp16_palettized, x = query_3_cast_fp16)[name = tensor<string, []>("linear_12_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_1_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_1_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37856256))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38905984))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38904896)))];
+            tensor<fp16, [1, 188, 1024]> linear_12_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_1_self_attn_linear_q_weight_to_fp16_quantized, x = query_3_cast_fp16)[name = tensor<string, []>("linear_12_cast_fp16")];
             tensor<int32, [4]> var_541 = const()[name = tensor<string, []>("op_541"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> q_7_cast_fp16 = reshape(shape = var_541, x = linear_12_cast_fp16)[name = tensor<string, []>("q_7_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_1_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38835456))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39884096))), name = tensor<string, []>("encoder_module_layers_1_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_13_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_1_self_attn_linear_k_weight_to_fp16_palettized, x = query_3_cast_fp16)[name = tensor<string, []>("linear_13_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_1_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_1_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38908096))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39957824))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39956736)))];
+            tensor<fp16, [1, 188, 1024]> linear_13_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_1_self_attn_linear_k_weight_to_fp16_quantized, x = query_3_cast_fp16)[name = tensor<string, []>("linear_13_cast_fp16")];
             tensor<int32, [4]> var_545 = const()[name = tensor<string, []>("op_545"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> k_5_cast_fp16 = reshape(shape = var_545, x = linear_13_cast_fp16)[name = tensor<string, []>("k_5_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_1_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39884672))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40933312))), name = tensor<string, []>("encoder_module_layers_1_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_14_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_1_self_attn_linear_v_weight_to_fp16_palettized, x = query_3_cast_fp16)[name = tensor<string, []>("linear_14_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_1_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_1_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39959936))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41009664))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41008576)))];
+            tensor<fp16, [1, 188, 1024]> linear_14_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_1_self_attn_linear_v_weight_to_fp16_quantized, x = query_3_cast_fp16)[name = tensor<string, []>("linear_14_cast_fp16")];
             tensor<int32, [4]> var_549 = const()[name = tensor<string, []>("op_549"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> v_3_cast_fp16 = reshape(shape = var_549, x = linear_14_cast_fp16)[name = tensor<string, []>("v_3_cast_fp16")];
             tensor<int32, [4]> value_7_perm_0 = const()[name = tensor<string, []>("value_7_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
-            tensor<fp16, [8, 128]> encoder_module_layers_1_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_1_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40933888)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_1_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_1_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41011776)))];
             tensor<fp16, [1, 188, 8, 128]> var_561_cast_fp16 = add(x = q_7_cast_fp16, y = encoder_module_layers_1_self_attn_pos_bias_u_to_fp16)[name = tensor<string, []>("op_561_cast_fp16")];
-            tensor<fp16, [8, 128]> encoder_module_layers_1_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_1_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40936000)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_1_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_1_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41013888)))];
             tensor<fp16, [1, 188, 8, 128]> var_563_cast_fp16 = add(x = q_7_cast_fp16, y = encoder_module_layers_1_self_attn_pos_bias_v_to_fp16)[name = tensor<string, []>("op_563_cast_fp16")];
             tensor<int32, [4]> q_with_bias_v_3_perm_0 = const()[name = tensor<string, []>("q_with_bias_v_3_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
             tensor<bool, []> x_45_transpose_x_0 = const()[name = tensor<string, []>("x_45_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> x_45_transpose_y_0 = const()[name = tensor<string, []>("x_45_transpose_y_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 8, 128, 375]> op_565_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [384000]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40938112))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41322176))), name = tensor<string, []>("op_565_to_fp16_palettized"), shape = tensor<uint32, [4]>([1, 8, 128, 375])];
+            tensor<fp16, [1, 8, 128, 375]> op_565_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(3), name = tensor<string, []>("op_565_to_fp16_quantized"), quantized_data = tensor<int8, [1, 8, 128, 375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41016000))), scale = tensor<fp16, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41400512))), zero_point = tensor<int8, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41400064)))];
             tensor<fp16, [1, 8, 188, 128]> q_with_bias_v_3_cast_fp16 = transpose(perm = q_with_bias_v_3_perm_0, x = var_563_cast_fp16)[name = tensor<string, []>("transpose_305")];
-            tensor<fp16, [1, 8, 188, 375]> x_45_cast_fp16 = matmul(transpose_x = x_45_transpose_x_0, transpose_y = x_45_transpose_y_0, x = q_with_bias_v_3_cast_fp16, y = op_565_to_fp16_palettized)[name = tensor<string, []>("x_45_cast_fp16")];
+            tensor<fp16, [1, 8, 188, 375]> x_45_cast_fp16 = matmul(transpose_x = x_45_transpose_x_0, transpose_y = x_45_transpose_y_0, x = q_with_bias_v_3_cast_fp16, y = op_565_to_fp16_quantized)[name = tensor<string, []>("x_45_cast_fp16")];
             tensor<int32, [8]> x_47_pad_0 = const()[name = tensor<string, []>("x_47_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 1, 0])];
             tensor<string, []> x_47_mode_0 = const()[name = tensor<string, []>("x_47_mode_0"), val = tensor<string, []>("constant")];
             tensor<fp16, []> const_39_to_fp16 = const()[name = tensor<string, []>("const_39_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
@@ -436,12 +436,12 @@ program(1.0)
             tensor<int32, [3]> var_598 = const()[name = tensor<string, []>("op_598"), val = tensor<int32, [3]>([1, -1, 1024])];
             tensor<fp16, [1, 188, 8, 128]> var_597_cast_fp16 = transpose(perm = var_597_perm_0, x = x_51_cast_fp16)[name = tensor<string, []>("transpose_301")];
             tensor<fp16, [1, 188, 1024]> input_95_cast_fp16 = reshape(shape = var_598, x = var_597_cast_fp16)[name = tensor<string, []>("input_95_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_1_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41322752))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42371392))), name = tensor<string, []>("encoder_module_layers_1_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_16_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_1_self_attn_linear_out_weight_to_fp16_palettized, x = input_95_cast_fp16)[name = tensor<string, []>("linear_16_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_1_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_1_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41401344))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42451072))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42449984)))];
+            tensor<fp16, [1, 188, 1024]> linear_16_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_1_self_attn_linear_out_weight_to_fp16_quantized, x = input_95_cast_fp16)[name = tensor<string, []>("linear_16_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_99_cast_fp16 = add(x = input_91_cast_fp16, y = linear_16_cast_fp16)[name = tensor<string, []>("input_99_cast_fp16")];
             tensor<int32, [1]> x_55_axes_0 = const()[name = tensor<string, []>("x_55_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_1_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_1_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42371968)))];
-            tensor<fp16, [1024]> encoder_module_layers_1_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_1_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42374080)))];
+            tensor<fp16, [1024]> encoder_module_layers_1_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_1_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42453184)))];
+            tensor<fp16, [1024]> encoder_module_layers_1_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_1_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42455296)))];
             tensor<fp16, [1, 188, 1024]> x_55_cast_fp16 = layer_norm(axes = x_55_axes_0, beta = encoder_module_layers_1_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_1_norm_conv_weight_to_fp16, x = input_99_cast_fp16)[name = tensor<string, []>("x_55_cast_fp16")];
             tensor<int32, [3]> input_101_perm_0 = const()[name = tensor<string, []>("input_101_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<string, []> input_103_pad_type_0 = const()[name = tensor<string, []>("input_103_pad_type_0"), val = tensor<string, []>("valid")];
@@ -449,9 +449,9 @@ program(1.0)
             tensor<int32, [2]> input_103_pad_0 = const()[name = tensor<string, []>("input_103_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_103_dilations_0 = const()[name = tensor<string, []>("input_103_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> input_103_groups_0 = const()[name = tensor<string, []>("input_103_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_1_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42376192))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44473408))), name = tensor<string, []>("encoder_module_layers_1_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([2048, 1024, 1])];
+            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_1_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_1_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(42457408))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44556736))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44554624)))];
             tensor<fp16, [1, 1024, 188]> input_101_cast_fp16 = transpose(perm = input_101_perm_0, x = x_55_cast_fp16)[name = tensor<string, []>("transpose_300")];
-            tensor<fp16, [1, 2048, 188]> input_103_cast_fp16 = conv(dilations = input_103_dilations_0, groups = input_103_groups_0, pad = input_103_pad_0, pad_type = input_103_pad_type_0, strides = input_103_strides_0, weight = encoder_module_layers_1_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_101_cast_fp16)[name = tensor<string, []>("input_103_cast_fp16")];
+            tensor<fp16, [1, 2048, 188]> input_103_cast_fp16 = conv(dilations = input_103_dilations_0, groups = input_103_groups_0, pad = input_103_pad_0, pad_type = input_103_pad_type_0, strides = input_103_strides_0, weight = encoder_module_layers_1_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_101_cast_fp16)[name = tensor<string, []>("input_103_cast_fp16")];
             tensor<int32, []> x_57_split_num_splits_0 = const()[name = tensor<string, []>("x_57_split_num_splits_0"), val = tensor<int32, []>(2)];
             tensor<int32, []> x_57_split_axis_0 = const()[name = tensor<string, []>("x_57_split_axis_0"), val = tensor<int32, []>(1)];
             tensor<fp16, [1, 1024, 188]> x_57_split_cast_fp16_0, tensor<fp16, [1, 1024, 188]> x_57_split_cast_fp16_1 = split(axis = x_57_split_axis_0, num_splits = x_57_split_num_splits_0, x = input_103_cast_fp16)[name = tensor<string, []>("x_57_split_cast_fp16")];
@@ -467,75 +467,75 @@ program(1.0)
             tensor<int32, [1]> input_109_strides_0 = const()[name = tensor<string, []>("input_109_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> input_109_pad_0 = const()[name = tensor<string, []>("input_109_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_109_dilations_0 = const()[name = tensor<string, []>("input_109_dilations_0"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1024, 1, 9]> const_265_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [9216]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44473984))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44483264))), name = tensor<string, []>("const_265_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1, 9])];
-            tensor<fp16, [1024]> const_266_to_fp16 = const()[name = tensor<string, []>("const_266_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44483840)))];
-            tensor<fp16, [1, 1024, 188]> input_111_cast_fp16 = conv(bias = const_266_to_fp16, dilations = input_109_dilations_0, groups = input_109_groups_0, pad = input_109_pad_0, pad_type = input_109_pad_type_0, strides = input_109_strides_0, weight = const_265_to_fp16_palettized, x = input_107_cast_fp16)[name = tensor<string, []>("input_111_cast_fp16")];
+            tensor<fp16, [1024, 1, 9]> const_265_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("const_265_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1, 9]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44560896))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44571264))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44570176)))];
+            tensor<fp16, [1024]> const_266_to_fp16 = const()[name = tensor<string, []>("const_266_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44573376)))];
+            tensor<fp16, [1, 1024, 188]> input_111_cast_fp16 = conv(bias = const_266_to_fp16, dilations = input_109_dilations_0, groups = input_109_groups_0, pad = input_109_pad_0, pad_type = input_109_pad_type_0, strides = input_109_strides_0, weight = const_265_to_fp16_quantized, x = input_107_cast_fp16)[name = tensor<string, []>("input_111_cast_fp16")];
             tensor<fp16, [1, 1024, 188]> input_113_cast_fp16 = silu(x = input_111_cast_fp16)[name = tensor<string, []>("input_113_cast_fp16")];
             tensor<string, []> x_59_pad_type_0 = const()[name = tensor<string, []>("x_59_pad_type_0"), val = tensor<string, []>("valid")];
             tensor<int32, [1]> x_59_strides_0 = const()[name = tensor<string, []>("x_59_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> x_59_pad_0 = const()[name = tensor<string, []>("x_59_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> x_59_dilations_0 = const()[name = tensor<string, []>("x_59_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> x_59_groups_0 = const()[name = tensor<string, []>("x_59_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_1_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44485952))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45534592))), name = tensor<string, []>("encoder_module_layers_1_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1024, 1])];
-            tensor<fp16, [1, 1024, 188]> x_59_cast_fp16 = conv(dilations = x_59_dilations_0, groups = x_59_groups_0, pad = x_59_pad_0, pad_type = x_59_pad_type_0, strides = x_59_strides_0, weight = encoder_module_layers_1_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_113_cast_fp16)[name = tensor<string, []>("x_59_cast_fp16")];
+            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_1_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_1_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44575488))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45625216))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45624128)))];
+            tensor<fp16, [1, 1024, 188]> x_59_cast_fp16 = conv(dilations = x_59_dilations_0, groups = x_59_groups_0, pad = x_59_pad_0, pad_type = x_59_pad_type_0, strides = x_59_strides_0, weight = encoder_module_layers_1_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_113_cast_fp16)[name = tensor<string, []>("x_59_cast_fp16")];
             tensor<int32, [3]> input_115_perm_0 = const()[name = tensor<string, []>("input_115_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<fp16, [1, 188, 1024]> input_115_cast_fp16 = transpose(perm = input_115_perm_0, x = x_59_cast_fp16)[name = tensor<string, []>("transpose_299")];
             tensor<fp16, [1, 188, 1024]> input_117_cast_fp16 = add(x = input_99_cast_fp16, y = input_115_cast_fp16)[name = tensor<string, []>("input_117_cast_fp16")];
             tensor<int32, [1]> input_119_axes_0 = const()[name = tensor<string, []>("input_119_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_1_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_1_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45535168)))];
-            tensor<fp16, [1024]> encoder_module_layers_1_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_1_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45537280)))];
+            tensor<fp16, [1024]> encoder_module_layers_1_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_1_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45627328)))];
+            tensor<fp16, [1024]> encoder_module_layers_1_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_1_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45629440)))];
             tensor<fp16, [1, 188, 1024]> input_119_cast_fp16 = layer_norm(axes = input_119_axes_0, beta = encoder_module_layers_1_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_1_norm_feed_forward2_weight_to_fp16, x = input_117_cast_fp16)[name = tensor<string, []>("input_119_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_1_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45539392))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49733760))), name = tensor<string, []>("encoder_module_layers_1_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_1_feed_forward2_linear1_weight_to_fp16_palettized, x = input_119_cast_fp16)[name = tensor<string, []>("linear_17_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_1_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_1_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45631552))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49830080))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49825920)))];
+            tensor<fp16, [1, 188, 4096]> linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_1_feed_forward2_linear1_weight_to_fp16_quantized, x = input_119_cast_fp16)[name = tensor<string, []>("linear_17_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_123_cast_fp16 = silu(x = linear_17_cast_fp16)[name = tensor<string, []>("input_123_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_1_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49734336))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53928704))), name = tensor<string, []>("encoder_module_layers_1_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_18_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_1_feed_forward2_linear2_weight_to_fp16_palettized, x = input_123_cast_fp16)[name = tensor<string, []>("linear_18_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_1_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_1_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49838336))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54033792))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54032704)))];
+            tensor<fp16, [1, 188, 1024]> linear_18_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_1_feed_forward2_linear2_weight_to_fp16_quantized, x = input_123_cast_fp16)[name = tensor<string, []>("linear_18_cast_fp16")];
             tensor<fp16, []> var_658_to_fp16 = const()[name = tensor<string, []>("op_658_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_659_cast_fp16 = mul(x = linear_18_cast_fp16, y = var_658_to_fp16)[name = tensor<string, []>("op_659_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_129_cast_fp16 = add(x = input_117_cast_fp16, y = var_659_cast_fp16)[name = tensor<string, []>("input_129_cast_fp16")];
             tensor<int32, [1]> input_131_axes_0 = const()[name = tensor<string, []>("input_131_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_1_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_1_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53929280)))];
-            tensor<fp16, [1024]> encoder_module_layers_1_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_1_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53931392)))];
+            tensor<fp16, [1024]> encoder_module_layers_1_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_1_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54035904)))];
+            tensor<fp16, [1024]> encoder_module_layers_1_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_1_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54038016)))];
             tensor<fp16, [1, 188, 1024]> input_131_cast_fp16 = layer_norm(axes = input_131_axes_0, beta = encoder_module_layers_1_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_1_norm_out_weight_to_fp16, x = input_129_cast_fp16)[name = tensor<string, []>("input_131_cast_fp16")];
             tensor<int32, [1]> input_133_axes_0 = const()[name = tensor<string, []>("input_133_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_2_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_2_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53933504)))];
-            tensor<fp16, [1024]> encoder_module_layers_2_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_2_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53935616)))];
+            tensor<fp16, [1024]> encoder_module_layers_2_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_2_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54040128)))];
+            tensor<fp16, [1024]> encoder_module_layers_2_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_2_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54042240)))];
             tensor<fp16, [1, 188, 1024]> input_133_cast_fp16 = layer_norm(axes = input_133_axes_0, beta = encoder_module_layers_2_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_2_norm_feed_forward1_weight_to_fp16, x = input_131_cast_fp16)[name = tensor<string, []>("input_133_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_2_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53937728))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(58132096))), name = tensor<string, []>("encoder_module_layers_2_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_19_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_2_feed_forward1_linear1_weight_to_fp16_palettized, x = input_133_cast_fp16)[name = tensor<string, []>("linear_19_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_2_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_2_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54044352))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(58242880))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(58238720)))];
+            tensor<fp16, [1, 188, 4096]> linear_19_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_2_feed_forward1_linear1_weight_to_fp16_quantized, x = input_133_cast_fp16)[name = tensor<string, []>("linear_19_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_137_cast_fp16 = silu(x = linear_19_cast_fp16)[name = tensor<string, []>("input_137_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_2_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(58132672))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62327040))), name = tensor<string, []>("encoder_module_layers_2_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_20_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_2_feed_forward1_linear2_weight_to_fp16_palettized, x = input_137_cast_fp16)[name = tensor<string, []>("linear_20_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_2_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_2_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(58251136))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62446592))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62445504)))];
+            tensor<fp16, [1, 188, 1024]> linear_20_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_2_feed_forward1_linear2_weight_to_fp16_quantized, x = input_137_cast_fp16)[name = tensor<string, []>("linear_20_cast_fp16")];
             tensor<fp16, []> var_687_to_fp16 = const()[name = tensor<string, []>("op_687_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_688_cast_fp16 = mul(x = linear_20_cast_fp16, y = var_687_to_fp16)[name = tensor<string, []>("op_688_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_143_cast_fp16 = add(x = input_131_cast_fp16, y = var_688_cast_fp16)[name = tensor<string, []>("input_143_cast_fp16")];
             tensor<int32, [1]> query_5_axes_0 = const()[name = tensor<string, []>("query_5_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_2_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_2_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62327616)))];
-            tensor<fp16, [1024]> encoder_module_layers_2_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_2_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62329728)))];
+            tensor<fp16, [1024]> encoder_module_layers_2_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_2_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62448704)))];
+            tensor<fp16, [1024]> encoder_module_layers_2_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_2_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62450816)))];
             tensor<fp16, [1, 188, 1024]> query_5_cast_fp16 = layer_norm(axes = query_5_axes_0, beta = encoder_module_layers_2_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_2_norm_self_att_weight_to_fp16, x = input_143_cast_fp16)[name = tensor<string, []>("query_5_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_2_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62331840))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(63380480))), name = tensor<string, []>("encoder_module_layers_2_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_21_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_2_self_attn_linear_q_weight_to_fp16_palettized, x = query_5_cast_fp16)[name = tensor<string, []>("linear_21_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_2_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_2_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62452928))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(63502656))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(63501568)))];
+            tensor<fp16, [1, 188, 1024]> linear_21_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_2_self_attn_linear_q_weight_to_fp16_quantized, x = query_5_cast_fp16)[name = tensor<string, []>("linear_21_cast_fp16")];
             tensor<int32, [4]> var_704 = const()[name = tensor<string, []>("op_704"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> q_13_cast_fp16 = reshape(shape = var_704, x = linear_21_cast_fp16)[name = tensor<string, []>("q_13_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_2_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(63381056))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64429696))), name = tensor<string, []>("encoder_module_layers_2_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_22_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_2_self_attn_linear_k_weight_to_fp16_palettized, x = query_5_cast_fp16)[name = tensor<string, []>("linear_22_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_2_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_2_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(63504768))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64554496))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64553408)))];
+            tensor<fp16, [1, 188, 1024]> linear_22_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_2_self_attn_linear_k_weight_to_fp16_quantized, x = query_5_cast_fp16)[name = tensor<string, []>("linear_22_cast_fp16")];
             tensor<int32, [4]> var_708 = const()[name = tensor<string, []>("op_708"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> k_9_cast_fp16 = reshape(shape = var_708, x = linear_22_cast_fp16)[name = tensor<string, []>("k_9_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_2_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64430272))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(65478912))), name = tensor<string, []>("encoder_module_layers_2_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_23_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_2_self_attn_linear_v_weight_to_fp16_palettized, x = query_5_cast_fp16)[name = tensor<string, []>("linear_23_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_2_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_2_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64556608))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(65606336))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(65605248)))];
+            tensor<fp16, [1, 188, 1024]> linear_23_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_2_self_attn_linear_v_weight_to_fp16_quantized, x = query_5_cast_fp16)[name = tensor<string, []>("linear_23_cast_fp16")];
             tensor<int32, [4]> var_712 = const()[name = tensor<string, []>("op_712"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> v_5_cast_fp16 = reshape(shape = var_712, x = linear_23_cast_fp16)[name = tensor<string, []>("v_5_cast_fp16")];
             tensor<int32, [4]> value_9_perm_0 = const()[name = tensor<string, []>("value_9_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
-            tensor<fp16, [8, 128]> encoder_module_layers_2_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_2_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(65479488)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_2_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_2_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(65608448)))];
             tensor<fp16, [1, 188, 8, 128]> var_724_cast_fp16 = add(x = q_13_cast_fp16, y = encoder_module_layers_2_self_attn_pos_bias_u_to_fp16)[name = tensor<string, []>("op_724_cast_fp16")];
-            tensor<fp16, [8, 128]> encoder_module_layers_2_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_2_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(65481600)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_2_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_2_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(65610560)))];
             tensor<fp16, [1, 188, 8, 128]> var_726_cast_fp16 = add(x = q_13_cast_fp16, y = encoder_module_layers_2_self_attn_pos_bias_v_to_fp16)[name = tensor<string, []>("op_726_cast_fp16")];
             tensor<int32, [4]> q_with_bias_v_5_perm_0 = const()[name = tensor<string, []>("q_with_bias_v_5_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
             tensor<bool, []> x_67_transpose_x_0 = const()[name = tensor<string, []>("x_67_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> x_67_transpose_y_0 = const()[name = tensor<string, []>("x_67_transpose_y_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 8, 128, 375]> op_728_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [384000]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(65483712))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(65867776))), name = tensor<string, []>("op_728_to_fp16_palettized"), shape = tensor<uint32, [4]>([1, 8, 128, 375])];
+            tensor<fp16, [1, 8, 128, 375]> op_728_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(3), name = tensor<string, []>("op_728_to_fp16_quantized"), quantized_data = tensor<int8, [1, 8, 128, 375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(65612672))), scale = tensor<fp16, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(65997184))), zero_point = tensor<int8, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(65996736)))];
             tensor<fp16, [1, 8, 188, 128]> q_with_bias_v_5_cast_fp16 = transpose(perm = q_with_bias_v_5_perm_0, x = var_726_cast_fp16)[name = tensor<string, []>("transpose_298")];
-            tensor<fp16, [1, 8, 188, 375]> x_67_cast_fp16 = matmul(transpose_x = x_67_transpose_x_0, transpose_y = x_67_transpose_y_0, x = q_with_bias_v_5_cast_fp16, y = op_728_to_fp16_palettized)[name = tensor<string, []>("x_67_cast_fp16")];
+            tensor<fp16, [1, 8, 188, 375]> x_67_cast_fp16 = matmul(transpose_x = x_67_transpose_x_0, transpose_y = x_67_transpose_y_0, x = q_with_bias_v_5_cast_fp16, y = op_728_to_fp16_quantized)[name = tensor<string, []>("x_67_cast_fp16")];
             tensor<int32, [8]> x_69_pad_0 = const()[name = tensor<string, []>("x_69_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 1, 0])];
             tensor<string, []> x_69_mode_0 = const()[name = tensor<string, []>("x_69_mode_0"), val = tensor<string, []>("constant")];
             tensor<fp16, []> const_49_to_fp16 = const()[name = tensor<string, []>("const_49_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
@@ -573,12 +573,12 @@ program(1.0)
             tensor<int32, [3]> var_761 = const()[name = tensor<string, []>("op_761"), val = tensor<int32, [3]>([1, -1, 1024])];
             tensor<fp16, [1, 188, 8, 128]> var_760_cast_fp16 = transpose(perm = var_760_perm_0, x = x_73_cast_fp16)[name = tensor<string, []>("transpose_294")];
             tensor<fp16, [1, 188, 1024]> input_147_cast_fp16 = reshape(shape = var_761, x = var_760_cast_fp16)[name = tensor<string, []>("input_147_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_2_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(65868352))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66916992))), name = tensor<string, []>("encoder_module_layers_2_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_25_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_2_self_attn_linear_out_weight_to_fp16_palettized, x = input_147_cast_fp16)[name = tensor<string, []>("linear_25_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_2_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_2_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(65998016))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67047744))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67046656)))];
+            tensor<fp16, [1, 188, 1024]> linear_25_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_2_self_attn_linear_out_weight_to_fp16_quantized, x = input_147_cast_fp16)[name = tensor<string, []>("linear_25_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_151_cast_fp16 = add(x = input_143_cast_fp16, y = linear_25_cast_fp16)[name = tensor<string, []>("input_151_cast_fp16")];
             tensor<int32, [1]> x_77_axes_0 = const()[name = tensor<string, []>("x_77_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_2_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_2_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66917568)))];
-            tensor<fp16, [1024]> encoder_module_layers_2_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_2_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66919680)))];
+            tensor<fp16, [1024]> encoder_module_layers_2_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_2_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67049856)))];
+            tensor<fp16, [1024]> encoder_module_layers_2_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_2_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67051968)))];
             tensor<fp16, [1, 188, 1024]> x_77_cast_fp16 = layer_norm(axes = x_77_axes_0, beta = encoder_module_layers_2_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_2_norm_conv_weight_to_fp16, x = input_151_cast_fp16)[name = tensor<string, []>("x_77_cast_fp16")];
             tensor<int32, [3]> input_153_perm_0 = const()[name = tensor<string, []>("input_153_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<string, []> input_155_pad_type_0 = const()[name = tensor<string, []>("input_155_pad_type_0"), val = tensor<string, []>("valid")];
@@ -586,9 +586,9 @@ program(1.0)
             tensor<int32, [2]> input_155_pad_0 = const()[name = tensor<string, []>("input_155_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_155_dilations_0 = const()[name = tensor<string, []>("input_155_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> input_155_groups_0 = const()[name = tensor<string, []>("input_155_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_2_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66921792))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69019008))), name = tensor<string, []>("encoder_module_layers_2_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([2048, 1024, 1])];
+            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_2_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_2_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67054080))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69153408))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69151296)))];
             tensor<fp16, [1, 1024, 188]> input_153_cast_fp16 = transpose(perm = input_153_perm_0, x = x_77_cast_fp16)[name = tensor<string, []>("transpose_293")];
-            tensor<fp16, [1, 2048, 188]> input_155_cast_fp16 = conv(dilations = input_155_dilations_0, groups = input_155_groups_0, pad = input_155_pad_0, pad_type = input_155_pad_type_0, strides = input_155_strides_0, weight = encoder_module_layers_2_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_153_cast_fp16)[name = tensor<string, []>("input_155_cast_fp16")];
+            tensor<fp16, [1, 2048, 188]> input_155_cast_fp16 = conv(dilations = input_155_dilations_0, groups = input_155_groups_0, pad = input_155_pad_0, pad_type = input_155_pad_type_0, strides = input_155_strides_0, weight = encoder_module_layers_2_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_153_cast_fp16)[name = tensor<string, []>("input_155_cast_fp16")];
             tensor<int32, []> x_79_split_num_splits_0 = const()[name = tensor<string, []>("x_79_split_num_splits_0"), val = tensor<int32, []>(2)];
             tensor<int32, []> x_79_split_axis_0 = const()[name = tensor<string, []>("x_79_split_axis_0"), val = tensor<int32, []>(1)];
             tensor<fp16, [1, 1024, 188]> x_79_split_cast_fp16_0, tensor<fp16, [1, 1024, 188]> x_79_split_cast_fp16_1 = split(axis = x_79_split_axis_0, num_splits = x_79_split_num_splits_0, x = input_155_cast_fp16)[name = tensor<string, []>("x_79_split_cast_fp16")];
@@ -604,75 +604,75 @@ program(1.0)
             tensor<int32, [1]> input_161_strides_0 = const()[name = tensor<string, []>("input_161_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> input_161_pad_0 = const()[name = tensor<string, []>("input_161_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_161_dilations_0 = const()[name = tensor<string, []>("input_161_dilations_0"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1024, 1, 9]> const_267_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [9216]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69019584))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69028864))), name = tensor<string, []>("const_267_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1, 9])];
-            tensor<fp16, [1024]> const_268_to_fp16 = const()[name = tensor<string, []>("const_268_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69029440)))];
-            tensor<fp16, [1, 1024, 188]> input_163_cast_fp16 = conv(bias = const_268_to_fp16, dilations = input_161_dilations_0, groups = input_161_groups_0, pad = input_161_pad_0, pad_type = input_161_pad_type_0, strides = input_161_strides_0, weight = const_267_to_fp16_palettized, x = input_159_cast_fp16)[name = tensor<string, []>("input_163_cast_fp16")];
+            tensor<fp16, [1024, 1, 9]> const_267_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("const_267_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1, 9]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69157568))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69167936))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69166848)))];
+            tensor<fp16, [1024]> const_268_to_fp16 = const()[name = tensor<string, []>("const_268_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69170048)))];
+            tensor<fp16, [1, 1024, 188]> input_163_cast_fp16 = conv(bias = const_268_to_fp16, dilations = input_161_dilations_0, groups = input_161_groups_0, pad = input_161_pad_0, pad_type = input_161_pad_type_0, strides = input_161_strides_0, weight = const_267_to_fp16_quantized, x = input_159_cast_fp16)[name = tensor<string, []>("input_163_cast_fp16")];
             tensor<fp16, [1, 1024, 188]> input_165_cast_fp16 = silu(x = input_163_cast_fp16)[name = tensor<string, []>("input_165_cast_fp16")];
             tensor<string, []> x_81_pad_type_0 = const()[name = tensor<string, []>("x_81_pad_type_0"), val = tensor<string, []>("valid")];
             tensor<int32, [1]> x_81_strides_0 = const()[name = tensor<string, []>("x_81_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> x_81_pad_0 = const()[name = tensor<string, []>("x_81_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> x_81_dilations_0 = const()[name = tensor<string, []>("x_81_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> x_81_groups_0 = const()[name = tensor<string, []>("x_81_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_2_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69031552))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70080192))), name = tensor<string, []>("encoder_module_layers_2_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1024, 1])];
-            tensor<fp16, [1, 1024, 188]> x_81_cast_fp16 = conv(dilations = x_81_dilations_0, groups = x_81_groups_0, pad = x_81_pad_0, pad_type = x_81_pad_type_0, strides = x_81_strides_0, weight = encoder_module_layers_2_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_165_cast_fp16)[name = tensor<string, []>("x_81_cast_fp16")];
+            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_2_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_2_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69172160))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70221888))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70220800)))];
+            tensor<fp16, [1, 1024, 188]> x_81_cast_fp16 = conv(dilations = x_81_dilations_0, groups = x_81_groups_0, pad = x_81_pad_0, pad_type = x_81_pad_type_0, strides = x_81_strides_0, weight = encoder_module_layers_2_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_165_cast_fp16)[name = tensor<string, []>("x_81_cast_fp16")];
             tensor<int32, [3]> input_167_perm_0 = const()[name = tensor<string, []>("input_167_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<fp16, [1, 188, 1024]> input_167_cast_fp16 = transpose(perm = input_167_perm_0, x = x_81_cast_fp16)[name = tensor<string, []>("transpose_292")];
             tensor<fp16, [1, 188, 1024]> input_169_cast_fp16 = add(x = input_151_cast_fp16, y = input_167_cast_fp16)[name = tensor<string, []>("input_169_cast_fp16")];
             tensor<int32, [1]> input_171_axes_0 = const()[name = tensor<string, []>("input_171_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_2_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_2_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70080768)))];
-            tensor<fp16, [1024]> encoder_module_layers_2_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_2_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70082880)))];
+            tensor<fp16, [1024]> encoder_module_layers_2_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_2_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70224000)))];
+            tensor<fp16, [1024]> encoder_module_layers_2_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_2_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70226112)))];
             tensor<fp16, [1, 188, 1024]> input_171_cast_fp16 = layer_norm(axes = input_171_axes_0, beta = encoder_module_layers_2_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_2_norm_feed_forward2_weight_to_fp16, x = input_169_cast_fp16)[name = tensor<string, []>("input_171_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_2_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70084992))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(74279360))), name = tensor<string, []>("encoder_module_layers_2_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_26_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_2_feed_forward2_linear1_weight_to_fp16_palettized, x = input_171_cast_fp16)[name = tensor<string, []>("linear_26_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_2_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_2_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70228224))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(74426752))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(74422592)))];
+            tensor<fp16, [1, 188, 4096]> linear_26_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_2_feed_forward2_linear1_weight_to_fp16_quantized, x = input_171_cast_fp16)[name = tensor<string, []>("linear_26_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_175_cast_fp16 = silu(x = linear_26_cast_fp16)[name = tensor<string, []>("input_175_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_2_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(74279936))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78474304))), name = tensor<string, []>("encoder_module_layers_2_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_27_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_2_feed_forward2_linear2_weight_to_fp16_palettized, x = input_175_cast_fp16)[name = tensor<string, []>("linear_27_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_2_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_2_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(74435008))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78630464))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78629376)))];
+            tensor<fp16, [1, 188, 1024]> linear_27_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_2_feed_forward2_linear2_weight_to_fp16_quantized, x = input_175_cast_fp16)[name = tensor<string, []>("linear_27_cast_fp16")];
             tensor<fp16, []> var_821_to_fp16 = const()[name = tensor<string, []>("op_821_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_822_cast_fp16 = mul(x = linear_27_cast_fp16, y = var_821_to_fp16)[name = tensor<string, []>("op_822_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_181_cast_fp16 = add(x = input_169_cast_fp16, y = var_822_cast_fp16)[name = tensor<string, []>("input_181_cast_fp16")];
             tensor<int32, [1]> input_183_axes_0 = const()[name = tensor<string, []>("input_183_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_2_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_2_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78474880)))];
-            tensor<fp16, [1024]> encoder_module_layers_2_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_2_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78476992)))];
+            tensor<fp16, [1024]> encoder_module_layers_2_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_2_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78632576)))];
+            tensor<fp16, [1024]> encoder_module_layers_2_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_2_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78634688)))];
             tensor<fp16, [1, 188, 1024]> input_183_cast_fp16 = layer_norm(axes = input_183_axes_0, beta = encoder_module_layers_2_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_2_norm_out_weight_to_fp16, x = input_181_cast_fp16)[name = tensor<string, []>("input_183_cast_fp16")];
             tensor<int32, [1]> input_185_axes_0 = const()[name = tensor<string, []>("input_185_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_3_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_3_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78479104)))];
-            tensor<fp16, [1024]> encoder_module_layers_3_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_3_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78481216)))];
+            tensor<fp16, [1024]> encoder_module_layers_3_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_3_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78636800)))];
+            tensor<fp16, [1024]> encoder_module_layers_3_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_3_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78638912)))];
             tensor<fp16, [1, 188, 1024]> input_185_cast_fp16 = layer_norm(axes = input_185_axes_0, beta = encoder_module_layers_3_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_3_norm_feed_forward1_weight_to_fp16, x = input_183_cast_fp16)[name = tensor<string, []>("input_185_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_3_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78483328))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82677696))), name = tensor<string, []>("encoder_module_layers_3_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_28_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_3_feed_forward1_linear1_weight_to_fp16_palettized, x = input_185_cast_fp16)[name = tensor<string, []>("linear_28_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_3_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_3_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78641024))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82839552))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82835392)))];
+            tensor<fp16, [1, 188, 4096]> linear_28_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_3_feed_forward1_linear1_weight_to_fp16_quantized, x = input_185_cast_fp16)[name = tensor<string, []>("linear_28_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_189_cast_fp16 = silu(x = linear_28_cast_fp16)[name = tensor<string, []>("input_189_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_3_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82678272))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(86872640))), name = tensor<string, []>("encoder_module_layers_3_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_29_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_3_feed_forward1_linear2_weight_to_fp16_palettized, x = input_189_cast_fp16)[name = tensor<string, []>("linear_29_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_3_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_3_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82847808))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87043264))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87042176)))];
+            tensor<fp16, [1, 188, 1024]> linear_29_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_3_feed_forward1_linear2_weight_to_fp16_quantized, x = input_189_cast_fp16)[name = tensor<string, []>("linear_29_cast_fp16")];
             tensor<fp16, []> var_850_to_fp16 = const()[name = tensor<string, []>("op_850_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_851_cast_fp16 = mul(x = linear_29_cast_fp16, y = var_850_to_fp16)[name = tensor<string, []>("op_851_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_195_cast_fp16 = add(x = input_183_cast_fp16, y = var_851_cast_fp16)[name = tensor<string, []>("input_195_cast_fp16")];
             tensor<int32, [1]> query_7_axes_0 = const()[name = tensor<string, []>("query_7_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_3_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_3_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(86873216)))];
-            tensor<fp16, [1024]> encoder_module_layers_3_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_3_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(86875328)))];
+            tensor<fp16, [1024]> encoder_module_layers_3_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_3_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87045376)))];
+            tensor<fp16, [1024]> encoder_module_layers_3_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_3_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87047488)))];
             tensor<fp16, [1, 188, 1024]> query_7_cast_fp16 = layer_norm(axes = query_7_axes_0, beta = encoder_module_layers_3_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_3_norm_self_att_weight_to_fp16, x = input_195_cast_fp16)[name = tensor<string, []>("query_7_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_3_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(86877440))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87926080))), name = tensor<string, []>("encoder_module_layers_3_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_30_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_3_self_attn_linear_q_weight_to_fp16_palettized, x = query_7_cast_fp16)[name = tensor<string, []>("linear_30_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_3_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_3_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87049600))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(88099328))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(88098240)))];
+            tensor<fp16, [1, 188, 1024]> linear_30_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_3_self_attn_linear_q_weight_to_fp16_quantized, x = query_7_cast_fp16)[name = tensor<string, []>("linear_30_cast_fp16")];
             tensor<int32, [4]> var_867 = const()[name = tensor<string, []>("op_867"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> q_19_cast_fp16 = reshape(shape = var_867, x = linear_30_cast_fp16)[name = tensor<string, []>("q_19_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_3_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87926656))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(88975296))), name = tensor<string, []>("encoder_module_layers_3_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_31_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_3_self_attn_linear_k_weight_to_fp16_palettized, x = query_7_cast_fp16)[name = tensor<string, []>("linear_31_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_3_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_3_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(88101440))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89151168))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89150080)))];
+            tensor<fp16, [1, 188, 1024]> linear_31_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_3_self_attn_linear_k_weight_to_fp16_quantized, x = query_7_cast_fp16)[name = tensor<string, []>("linear_31_cast_fp16")];
             tensor<int32, [4]> var_871 = const()[name = tensor<string, []>("op_871"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> k_13_cast_fp16 = reshape(shape = var_871, x = linear_31_cast_fp16)[name = tensor<string, []>("k_13_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_3_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(88975872))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(90024512))), name = tensor<string, []>("encoder_module_layers_3_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_32_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_3_self_attn_linear_v_weight_to_fp16_palettized, x = query_7_cast_fp16)[name = tensor<string, []>("linear_32_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_3_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_3_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89153280))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(90203008))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(90201920)))];
+            tensor<fp16, [1, 188, 1024]> linear_32_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_3_self_attn_linear_v_weight_to_fp16_quantized, x = query_7_cast_fp16)[name = tensor<string, []>("linear_32_cast_fp16")];
             tensor<int32, [4]> var_875 = const()[name = tensor<string, []>("op_875"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> v_7_cast_fp16 = reshape(shape = var_875, x = linear_32_cast_fp16)[name = tensor<string, []>("v_7_cast_fp16")];
             tensor<int32, [4]> value_11_perm_0 = const()[name = tensor<string, []>("value_11_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
-            tensor<fp16, [8, 128]> encoder_module_layers_3_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_3_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(90025088)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_3_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_3_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(90205120)))];
             tensor<fp16, [1, 188, 8, 128]> var_887_cast_fp16 = add(x = q_19_cast_fp16, y = encoder_module_layers_3_self_attn_pos_bias_u_to_fp16)[name = tensor<string, []>("op_887_cast_fp16")];
-            tensor<fp16, [8, 128]> encoder_module_layers_3_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_3_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(90027200)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_3_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_3_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(90207232)))];
             tensor<fp16, [1, 188, 8, 128]> var_889_cast_fp16 = add(x = q_19_cast_fp16, y = encoder_module_layers_3_self_attn_pos_bias_v_to_fp16)[name = tensor<string, []>("op_889_cast_fp16")];
             tensor<int32, [4]> q_with_bias_v_7_perm_0 = const()[name = tensor<string, []>("q_with_bias_v_7_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
             tensor<bool, []> x_89_transpose_x_0 = const()[name = tensor<string, []>("x_89_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> x_89_transpose_y_0 = const()[name = tensor<string, []>("x_89_transpose_y_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 8, 128, 375]> op_891_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [384000]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(90029312))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(90413376))), name = tensor<string, []>("op_891_to_fp16_palettized"), shape = tensor<uint32, [4]>([1, 8, 128, 375])];
+            tensor<fp16, [1, 8, 128, 375]> op_891_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(3), name = tensor<string, []>("op_891_to_fp16_quantized"), quantized_data = tensor<int8, [1, 8, 128, 375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(90209344))), scale = tensor<fp16, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(90593856))), zero_point = tensor<int8, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(90593408)))];
             tensor<fp16, [1, 8, 188, 128]> q_with_bias_v_7_cast_fp16 = transpose(perm = q_with_bias_v_7_perm_0, x = var_889_cast_fp16)[name = tensor<string, []>("transpose_291")];
-            tensor<fp16, [1, 8, 188, 375]> x_89_cast_fp16 = matmul(transpose_x = x_89_transpose_x_0, transpose_y = x_89_transpose_y_0, x = q_with_bias_v_7_cast_fp16, y = op_891_to_fp16_palettized)[name = tensor<string, []>("x_89_cast_fp16")];
+            tensor<fp16, [1, 8, 188, 375]> x_89_cast_fp16 = matmul(transpose_x = x_89_transpose_x_0, transpose_y = x_89_transpose_y_0, x = q_with_bias_v_7_cast_fp16, y = op_891_to_fp16_quantized)[name = tensor<string, []>("x_89_cast_fp16")];
             tensor<int32, [8]> x_91_pad_0 = const()[name = tensor<string, []>("x_91_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 1, 0])];
             tensor<string, []> x_91_mode_0 = const()[name = tensor<string, []>("x_91_mode_0"), val = tensor<string, []>("constant")];
             tensor<fp16, []> const_59_to_fp16 = const()[name = tensor<string, []>("const_59_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
@@ -710,12 +710,12 @@ program(1.0)
             tensor<int32, [3]> var_924 = const()[name = tensor<string, []>("op_924"), val = tensor<int32, [3]>([1, -1, 1024])];
             tensor<fp16, [1, 188, 8, 128]> var_923_cast_fp16 = transpose(perm = var_923_perm_0, x = x_95_cast_fp16)[name = tensor<string, []>("transpose_287")];
             tensor<fp16, [1, 188, 1024]> input_199_cast_fp16 = reshape(shape = var_924, x = var_923_cast_fp16)[name = tensor<string, []>("input_199_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_3_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(90413952))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91462592))), name = tensor<string, []>("encoder_module_layers_3_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_34_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_3_self_attn_linear_out_weight_to_fp16_palettized, x = input_199_cast_fp16)[name = tensor<string, []>("linear_34_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_3_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_3_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(90594688))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91644416))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91643328)))];
+            tensor<fp16, [1, 188, 1024]> linear_34_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_3_self_attn_linear_out_weight_to_fp16_quantized, x = input_199_cast_fp16)[name = tensor<string, []>("linear_34_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_203_cast_fp16 = add(x = input_195_cast_fp16, y = linear_34_cast_fp16)[name = tensor<string, []>("input_203_cast_fp16")];
             tensor<int32, [1]> x_99_axes_0 = const()[name = tensor<string, []>("x_99_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_3_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_3_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91463168)))];
-            tensor<fp16, [1024]> encoder_module_layers_3_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_3_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91465280)))];
+            tensor<fp16, [1024]> encoder_module_layers_3_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_3_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91646528)))];
+            tensor<fp16, [1024]> encoder_module_layers_3_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_3_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91648640)))];
             tensor<fp16, [1, 188, 1024]> x_99_cast_fp16 = layer_norm(axes = x_99_axes_0, beta = encoder_module_layers_3_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_3_norm_conv_weight_to_fp16, x = input_203_cast_fp16)[name = tensor<string, []>("x_99_cast_fp16")];
             tensor<int32, [3]> input_205_perm_0 = const()[name = tensor<string, []>("input_205_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<string, []> input_207_pad_type_0 = const()[name = tensor<string, []>("input_207_pad_type_0"), val = tensor<string, []>("valid")];
@@ -723,9 +723,9 @@ program(1.0)
             tensor<int32, [2]> input_207_pad_0 = const()[name = tensor<string, []>("input_207_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_207_dilations_0 = const()[name = tensor<string, []>("input_207_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> input_207_groups_0 = const()[name = tensor<string, []>("input_207_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_3_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91467392))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93564608))), name = tensor<string, []>("encoder_module_layers_3_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([2048, 1024, 1])];
+            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_3_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_3_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91650752))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93750080))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93747968)))];
             tensor<fp16, [1, 1024, 188]> input_205_cast_fp16 = transpose(perm = input_205_perm_0, x = x_99_cast_fp16)[name = tensor<string, []>("transpose_286")];
-            tensor<fp16, [1, 2048, 188]> input_207_cast_fp16 = conv(dilations = input_207_dilations_0, groups = input_207_groups_0, pad = input_207_pad_0, pad_type = input_207_pad_type_0, strides = input_207_strides_0, weight = encoder_module_layers_3_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_205_cast_fp16)[name = tensor<string, []>("input_207_cast_fp16")];
+            tensor<fp16, [1, 2048, 188]> input_207_cast_fp16 = conv(dilations = input_207_dilations_0, groups = input_207_groups_0, pad = input_207_pad_0, pad_type = input_207_pad_type_0, strides = input_207_strides_0, weight = encoder_module_layers_3_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_205_cast_fp16)[name = tensor<string, []>("input_207_cast_fp16")];
             tensor<int32, []> x_101_split_num_splits_0 = const()[name = tensor<string, []>("x_101_split_num_splits_0"), val = tensor<int32, []>(2)];
             tensor<int32, []> x_101_split_axis_0 = const()[name = tensor<string, []>("x_101_split_axis_0"), val = tensor<int32, []>(1)];
             tensor<fp16, [1, 1024, 188]> x_101_split_cast_fp16_0, tensor<fp16, [1, 1024, 188]> x_101_split_cast_fp16_1 = split(axis = x_101_split_axis_0, num_splits = x_101_split_num_splits_0, x = input_207_cast_fp16)[name = tensor<string, []>("x_101_split_cast_fp16")];
@@ -741,75 +741,75 @@ program(1.0)
             tensor<int32, [1]> input_213_strides_0 = const()[name = tensor<string, []>("input_213_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> input_213_pad_0 = const()[name = tensor<string, []>("input_213_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_213_dilations_0 = const()[name = tensor<string, []>("input_213_dilations_0"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1024, 1, 9]> const_269_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [9216]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93565184))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93574464))), name = tensor<string, []>("const_269_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1, 9])];
-            tensor<fp16, [1024]> const_270_to_fp16 = const()[name = tensor<string, []>("const_270_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93575040)))];
-            tensor<fp16, [1, 1024, 188]> input_215_cast_fp16 = conv(bias = const_270_to_fp16, dilations = input_213_dilations_0, groups = input_213_groups_0, pad = input_213_pad_0, pad_type = input_213_pad_type_0, strides = input_213_strides_0, weight = const_269_to_fp16_palettized, x = input_211_cast_fp16)[name = tensor<string, []>("input_215_cast_fp16")];
+            tensor<fp16, [1024, 1, 9]> const_269_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("const_269_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1, 9]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93754240))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93764608))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93763520)))];
+            tensor<fp16, [1024]> const_270_to_fp16 = const()[name = tensor<string, []>("const_270_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93766720)))];
+            tensor<fp16, [1, 1024, 188]> input_215_cast_fp16 = conv(bias = const_270_to_fp16, dilations = input_213_dilations_0, groups = input_213_groups_0, pad = input_213_pad_0, pad_type = input_213_pad_type_0, strides = input_213_strides_0, weight = const_269_to_fp16_quantized, x = input_211_cast_fp16)[name = tensor<string, []>("input_215_cast_fp16")];
             tensor<fp16, [1, 1024, 188]> input_217_cast_fp16 = silu(x = input_215_cast_fp16)[name = tensor<string, []>("input_217_cast_fp16")];
             tensor<string, []> x_103_pad_type_0 = const()[name = tensor<string, []>("x_103_pad_type_0"), val = tensor<string, []>("valid")];
             tensor<int32, [1]> x_103_strides_0 = const()[name = tensor<string, []>("x_103_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> x_103_pad_0 = const()[name = tensor<string, []>("x_103_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> x_103_dilations_0 = const()[name = tensor<string, []>("x_103_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> x_103_groups_0 = const()[name = tensor<string, []>("x_103_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_3_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93577152))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94625792))), name = tensor<string, []>("encoder_module_layers_3_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1024, 1])];
-            tensor<fp16, [1, 1024, 188]> x_103_cast_fp16 = conv(dilations = x_103_dilations_0, groups = x_103_groups_0, pad = x_103_pad_0, pad_type = x_103_pad_type_0, strides = x_103_strides_0, weight = encoder_module_layers_3_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_217_cast_fp16)[name = tensor<string, []>("x_103_cast_fp16")];
+            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_3_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_3_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93768832))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94818560))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94817472)))];
+            tensor<fp16, [1, 1024, 188]> x_103_cast_fp16 = conv(dilations = x_103_dilations_0, groups = x_103_groups_0, pad = x_103_pad_0, pad_type = x_103_pad_type_0, strides = x_103_strides_0, weight = encoder_module_layers_3_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_217_cast_fp16)[name = tensor<string, []>("x_103_cast_fp16")];
             tensor<int32, [3]> input_219_perm_0 = const()[name = tensor<string, []>("input_219_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<fp16, [1, 188, 1024]> input_219_cast_fp16 = transpose(perm = input_219_perm_0, x = x_103_cast_fp16)[name = tensor<string, []>("transpose_285")];
             tensor<fp16, [1, 188, 1024]> input_221_cast_fp16 = add(x = input_203_cast_fp16, y = input_219_cast_fp16)[name = tensor<string, []>("input_221_cast_fp16")];
             tensor<int32, [1]> input_223_axes_0 = const()[name = tensor<string, []>("input_223_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_3_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_3_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94626368)))];
-            tensor<fp16, [1024]> encoder_module_layers_3_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_3_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94628480)))];
+            tensor<fp16, [1024]> encoder_module_layers_3_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_3_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94820672)))];
+            tensor<fp16, [1024]> encoder_module_layers_3_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_3_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94822784)))];
             tensor<fp16, [1, 188, 1024]> input_223_cast_fp16 = layer_norm(axes = input_223_axes_0, beta = encoder_module_layers_3_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_3_norm_feed_forward2_weight_to_fp16, x = input_221_cast_fp16)[name = tensor<string, []>("input_223_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_3_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94630592))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(98824960))), name = tensor<string, []>("encoder_module_layers_3_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_35_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_3_feed_forward2_linear1_weight_to_fp16_palettized, x = input_223_cast_fp16)[name = tensor<string, []>("linear_35_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_3_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_3_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94824896))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99023424))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99019264)))];
+            tensor<fp16, [1, 188, 4096]> linear_35_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_3_feed_forward2_linear1_weight_to_fp16_quantized, x = input_223_cast_fp16)[name = tensor<string, []>("linear_35_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_227_cast_fp16 = silu(x = linear_35_cast_fp16)[name = tensor<string, []>("input_227_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_3_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(98825536))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103019904))), name = tensor<string, []>("encoder_module_layers_3_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_36_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_3_feed_forward2_linear2_weight_to_fp16_palettized, x = input_227_cast_fp16)[name = tensor<string, []>("linear_36_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_3_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_3_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99031680))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103227136))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103226048)))];
+            tensor<fp16, [1, 188, 1024]> linear_36_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_3_feed_forward2_linear2_weight_to_fp16_quantized, x = input_227_cast_fp16)[name = tensor<string, []>("linear_36_cast_fp16")];
             tensor<fp16, []> var_984_to_fp16 = const()[name = tensor<string, []>("op_984_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_985_cast_fp16 = mul(x = linear_36_cast_fp16, y = var_984_to_fp16)[name = tensor<string, []>("op_985_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_233_cast_fp16 = add(x = input_221_cast_fp16, y = var_985_cast_fp16)[name = tensor<string, []>("input_233_cast_fp16")];
             tensor<int32, [1]> input_235_axes_0 = const()[name = tensor<string, []>("input_235_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_3_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_3_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103020480)))];
-            tensor<fp16, [1024]> encoder_module_layers_3_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_3_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103022592)))];
+            tensor<fp16, [1024]> encoder_module_layers_3_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_3_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103229248)))];
+            tensor<fp16, [1024]> encoder_module_layers_3_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_3_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103231360)))];
             tensor<fp16, [1, 188, 1024]> input_235_cast_fp16 = layer_norm(axes = input_235_axes_0, beta = encoder_module_layers_3_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_3_norm_out_weight_to_fp16, x = input_233_cast_fp16)[name = tensor<string, []>("input_235_cast_fp16")];
             tensor<int32, [1]> input_237_axes_0 = const()[name = tensor<string, []>("input_237_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_4_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_4_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103024704)))];
-            tensor<fp16, [1024]> encoder_module_layers_4_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_4_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103026816)))];
+            tensor<fp16, [1024]> encoder_module_layers_4_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_4_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103233472)))];
+            tensor<fp16, [1024]> encoder_module_layers_4_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_4_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103235584)))];
             tensor<fp16, [1, 188, 1024]> input_237_cast_fp16 = layer_norm(axes = input_237_axes_0, beta = encoder_module_layers_4_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_4_norm_feed_forward1_weight_to_fp16, x = input_235_cast_fp16)[name = tensor<string, []>("input_237_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_4_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103028928))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(107223296))), name = tensor<string, []>("encoder_module_layers_4_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_37_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_4_feed_forward1_linear1_weight_to_fp16_palettized, x = input_237_cast_fp16)[name = tensor<string, []>("linear_37_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_4_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_4_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103237696))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(107436224))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(107432064)))];
+            tensor<fp16, [1, 188, 4096]> linear_37_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_4_feed_forward1_linear1_weight_to_fp16_quantized, x = input_237_cast_fp16)[name = tensor<string, []>("linear_37_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_241_cast_fp16 = silu(x = linear_37_cast_fp16)[name = tensor<string, []>("input_241_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_4_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(107223872))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(111418240))), name = tensor<string, []>("encoder_module_layers_4_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_38_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_4_feed_forward1_linear2_weight_to_fp16_palettized, x = input_241_cast_fp16)[name = tensor<string, []>("linear_38_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_4_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_4_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(107444480))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(111639936))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(111638848)))];
+            tensor<fp16, [1, 188, 1024]> linear_38_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_4_feed_forward1_linear2_weight_to_fp16_quantized, x = input_241_cast_fp16)[name = tensor<string, []>("linear_38_cast_fp16")];
             tensor<fp16, []> var_1013_to_fp16 = const()[name = tensor<string, []>("op_1013_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_1014_cast_fp16 = mul(x = linear_38_cast_fp16, y = var_1013_to_fp16)[name = tensor<string, []>("op_1014_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_247_cast_fp16 = add(x = input_235_cast_fp16, y = var_1014_cast_fp16)[name = tensor<string, []>("input_247_cast_fp16")];
             tensor<int32, [1]> query_9_axes_0 = const()[name = tensor<string, []>("query_9_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_4_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_4_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(111418816)))];
-            tensor<fp16, [1024]> encoder_module_layers_4_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_4_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(111420928)))];
+            tensor<fp16, [1024]> encoder_module_layers_4_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_4_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(111642048)))];
+            tensor<fp16, [1024]> encoder_module_layers_4_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_4_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(111644160)))];
             tensor<fp16, [1, 188, 1024]> query_9_cast_fp16 = layer_norm(axes = query_9_axes_0, beta = encoder_module_layers_4_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_4_norm_self_att_weight_to_fp16, x = input_247_cast_fp16)[name = tensor<string, []>("query_9_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_4_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(111423040))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(112471680))), name = tensor<string, []>("encoder_module_layers_4_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_39_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_4_self_attn_linear_q_weight_to_fp16_palettized, x = query_9_cast_fp16)[name = tensor<string, []>("linear_39_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_4_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_4_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(111646272))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(112696000))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(112694912)))];
+            tensor<fp16, [1, 188, 1024]> linear_39_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_4_self_attn_linear_q_weight_to_fp16_quantized, x = query_9_cast_fp16)[name = tensor<string, []>("linear_39_cast_fp16")];
             tensor<int32, [4]> var_1030 = const()[name = tensor<string, []>("op_1030"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> q_25_cast_fp16 = reshape(shape = var_1030, x = linear_39_cast_fp16)[name = tensor<string, []>("q_25_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_4_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(112472256))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(113520896))), name = tensor<string, []>("encoder_module_layers_4_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_40_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_4_self_attn_linear_k_weight_to_fp16_palettized, x = query_9_cast_fp16)[name = tensor<string, []>("linear_40_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_4_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_4_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(112698112))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(113747840))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(113746752)))];
+            tensor<fp16, [1, 188, 1024]> linear_40_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_4_self_attn_linear_k_weight_to_fp16_quantized, x = query_9_cast_fp16)[name = tensor<string, []>("linear_40_cast_fp16")];
             tensor<int32, [4]> var_1034 = const()[name = tensor<string, []>("op_1034"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> k_17_cast_fp16 = reshape(shape = var_1034, x = linear_40_cast_fp16)[name = tensor<string, []>("k_17_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_4_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(113521472))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(114570112))), name = tensor<string, []>("encoder_module_layers_4_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_41_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_4_self_attn_linear_v_weight_to_fp16_palettized, x = query_9_cast_fp16)[name = tensor<string, []>("linear_41_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_4_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_4_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(113749952))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(114799680))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(114798592)))];
+            tensor<fp16, [1, 188, 1024]> linear_41_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_4_self_attn_linear_v_weight_to_fp16_quantized, x = query_9_cast_fp16)[name = tensor<string, []>("linear_41_cast_fp16")];
             tensor<int32, [4]> var_1038 = const()[name = tensor<string, []>("op_1038"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> v_9_cast_fp16 = reshape(shape = var_1038, x = linear_41_cast_fp16)[name = tensor<string, []>("v_9_cast_fp16")];
             tensor<int32, [4]> value_13_perm_0 = const()[name = tensor<string, []>("value_13_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
-            tensor<fp16, [8, 128]> encoder_module_layers_4_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_4_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(114570688)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_4_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_4_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(114801792)))];
             tensor<fp16, [1, 188, 8, 128]> var_1050_cast_fp16 = add(x = q_25_cast_fp16, y = encoder_module_layers_4_self_attn_pos_bias_u_to_fp16)[name = tensor<string, []>("op_1050_cast_fp16")];
-            tensor<fp16, [8, 128]> encoder_module_layers_4_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_4_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(114572800)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_4_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_4_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(114803904)))];
             tensor<fp16, [1, 188, 8, 128]> var_1052_cast_fp16 = add(x = q_25_cast_fp16, y = encoder_module_layers_4_self_attn_pos_bias_v_to_fp16)[name = tensor<string, []>("op_1052_cast_fp16")];
             tensor<int32, [4]> q_with_bias_v_9_perm_0 = const()[name = tensor<string, []>("q_with_bias_v_9_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
             tensor<bool, []> x_111_transpose_x_0 = const()[name = tensor<string, []>("x_111_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> x_111_transpose_y_0 = const()[name = tensor<string, []>("x_111_transpose_y_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 8, 128, 375]> op_1054_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [384000]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(114574912))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(114958976))), name = tensor<string, []>("op_1054_to_fp16_palettized"), shape = tensor<uint32, [4]>([1, 8, 128, 375])];
+            tensor<fp16, [1, 8, 128, 375]> op_1054_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(3), name = tensor<string, []>("op_1054_to_fp16_quantized"), quantized_data = tensor<int8, [1, 8, 128, 375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(114806016))), scale = tensor<fp16, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(115190528))), zero_point = tensor<int8, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(115190080)))];
             tensor<fp16, [1, 8, 188, 128]> q_with_bias_v_9_cast_fp16 = transpose(perm = q_with_bias_v_9_perm_0, x = var_1052_cast_fp16)[name = tensor<string, []>("transpose_284")];
-            tensor<fp16, [1, 8, 188, 375]> x_111_cast_fp16 = matmul(transpose_x = x_111_transpose_x_0, transpose_y = x_111_transpose_y_0, x = q_with_bias_v_9_cast_fp16, y = op_1054_to_fp16_palettized)[name = tensor<string, []>("x_111_cast_fp16")];
+            tensor<fp16, [1, 8, 188, 375]> x_111_cast_fp16 = matmul(transpose_x = x_111_transpose_x_0, transpose_y = x_111_transpose_y_0, x = q_with_bias_v_9_cast_fp16, y = op_1054_to_fp16_quantized)[name = tensor<string, []>("x_111_cast_fp16")];
             tensor<int32, [8]> x_113_pad_0 = const()[name = tensor<string, []>("x_113_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 1, 0])];
             tensor<string, []> x_113_mode_0 = const()[name = tensor<string, []>("x_113_mode_0"), val = tensor<string, []>("constant")];
             tensor<fp16, []> const_69_to_fp16 = const()[name = tensor<string, []>("const_69_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
@@ -847,12 +847,12 @@ program(1.0)
             tensor<int32, [3]> var_1087 = const()[name = tensor<string, []>("op_1087"), val = tensor<int32, [3]>([1, -1, 1024])];
             tensor<fp16, [1, 188, 8, 128]> var_1086_cast_fp16 = transpose(perm = var_1086_perm_0, x = x_117_cast_fp16)[name = tensor<string, []>("transpose_280")];
             tensor<fp16, [1, 188, 1024]> input_251_cast_fp16 = reshape(shape = var_1087, x = var_1086_cast_fp16)[name = tensor<string, []>("input_251_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_4_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(114959552))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(116008192))), name = tensor<string, []>("encoder_module_layers_4_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_43_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_4_self_attn_linear_out_weight_to_fp16_palettized, x = input_251_cast_fp16)[name = tensor<string, []>("linear_43_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_4_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_4_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(115191360))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(116241088))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(116240000)))];
+            tensor<fp16, [1, 188, 1024]> linear_43_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_4_self_attn_linear_out_weight_to_fp16_quantized, x = input_251_cast_fp16)[name = tensor<string, []>("linear_43_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_255_cast_fp16 = add(x = input_247_cast_fp16, y = linear_43_cast_fp16)[name = tensor<string, []>("input_255_cast_fp16")];
             tensor<int32, [1]> x_121_axes_0 = const()[name = tensor<string, []>("x_121_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_4_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_4_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(116008768)))];
-            tensor<fp16, [1024]> encoder_module_layers_4_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_4_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(116010880)))];
+            tensor<fp16, [1024]> encoder_module_layers_4_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_4_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(116243200)))];
+            tensor<fp16, [1024]> encoder_module_layers_4_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_4_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(116245312)))];
             tensor<fp16, [1, 188, 1024]> x_121_cast_fp16 = layer_norm(axes = x_121_axes_0, beta = encoder_module_layers_4_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_4_norm_conv_weight_to_fp16, x = input_255_cast_fp16)[name = tensor<string, []>("x_121_cast_fp16")];
             tensor<int32, [3]> input_257_perm_0 = const()[name = tensor<string, []>("input_257_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<string, []> input_259_pad_type_0 = const()[name = tensor<string, []>("input_259_pad_type_0"), val = tensor<string, []>("valid")];
@@ -860,9 +860,9 @@ program(1.0)
             tensor<int32, [2]> input_259_pad_0 = const()[name = tensor<string, []>("input_259_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_259_dilations_0 = const()[name = tensor<string, []>("input_259_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> input_259_groups_0 = const()[name = tensor<string, []>("input_259_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_4_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(116012992))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(118110208))), name = tensor<string, []>("encoder_module_layers_4_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([2048, 1024, 1])];
+            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_4_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_4_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(116247424))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(118346752))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(118344640)))];
             tensor<fp16, [1, 1024, 188]> input_257_cast_fp16 = transpose(perm = input_257_perm_0, x = x_121_cast_fp16)[name = tensor<string, []>("transpose_279")];
-            tensor<fp16, [1, 2048, 188]> input_259_cast_fp16 = conv(dilations = input_259_dilations_0, groups = input_259_groups_0, pad = input_259_pad_0, pad_type = input_259_pad_type_0, strides = input_259_strides_0, weight = encoder_module_layers_4_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_257_cast_fp16)[name = tensor<string, []>("input_259_cast_fp16")];
+            tensor<fp16, [1, 2048, 188]> input_259_cast_fp16 = conv(dilations = input_259_dilations_0, groups = input_259_groups_0, pad = input_259_pad_0, pad_type = input_259_pad_type_0, strides = input_259_strides_0, weight = encoder_module_layers_4_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_257_cast_fp16)[name = tensor<string, []>("input_259_cast_fp16")];
             tensor<int32, []> x_123_split_num_splits_0 = const()[name = tensor<string, []>("x_123_split_num_splits_0"), val = tensor<int32, []>(2)];
             tensor<int32, []> x_123_split_axis_0 = const()[name = tensor<string, []>("x_123_split_axis_0"), val = tensor<int32, []>(1)];
             tensor<fp16, [1, 1024, 188]> x_123_split_cast_fp16_0, tensor<fp16, [1, 1024, 188]> x_123_split_cast_fp16_1 = split(axis = x_123_split_axis_0, num_splits = x_123_split_num_splits_0, x = input_259_cast_fp16)[name = tensor<string, []>("x_123_split_cast_fp16")];
@@ -878,75 +878,75 @@ program(1.0)
             tensor<int32, [1]> input_265_strides_0 = const()[name = tensor<string, []>("input_265_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> input_265_pad_0 = const()[name = tensor<string, []>("input_265_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_265_dilations_0 = const()[name = tensor<string, []>("input_265_dilations_0"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1024, 1, 9]> const_271_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [9216]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(118110784))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(118120064))), name = tensor<string, []>("const_271_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1, 9])];
-            tensor<fp16, [1024]> const_272_to_fp16 = const()[name = tensor<string, []>("const_272_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(118120640)))];
-            tensor<fp16, [1, 1024, 188]> input_267_cast_fp16 = conv(bias = const_272_to_fp16, dilations = input_265_dilations_0, groups = input_265_groups_0, pad = input_265_pad_0, pad_type = input_265_pad_type_0, strides = input_265_strides_0, weight = const_271_to_fp16_palettized, x = input_263_cast_fp16)[name = tensor<string, []>("input_267_cast_fp16")];
+            tensor<fp16, [1024, 1, 9]> const_271_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("const_271_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1, 9]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(118350912))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(118361280))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(118360192)))];
+            tensor<fp16, [1024]> const_272_to_fp16 = const()[name = tensor<string, []>("const_272_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(118363392)))];
+            tensor<fp16, [1, 1024, 188]> input_267_cast_fp16 = conv(bias = const_272_to_fp16, dilations = input_265_dilations_0, groups = input_265_groups_0, pad = input_265_pad_0, pad_type = input_265_pad_type_0, strides = input_265_strides_0, weight = const_271_to_fp16_quantized, x = input_263_cast_fp16)[name = tensor<string, []>("input_267_cast_fp16")];
             tensor<fp16, [1, 1024, 188]> input_269_cast_fp16 = silu(x = input_267_cast_fp16)[name = tensor<string, []>("input_269_cast_fp16")];
             tensor<string, []> x_125_pad_type_0 = const()[name = tensor<string, []>("x_125_pad_type_0"), val = tensor<string, []>("valid")];
             tensor<int32, [1]> x_125_strides_0 = const()[name = tensor<string, []>("x_125_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> x_125_pad_0 = const()[name = tensor<string, []>("x_125_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> x_125_dilations_0 = const()[name = tensor<string, []>("x_125_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> x_125_groups_0 = const()[name = tensor<string, []>("x_125_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_4_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(118122752))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119171392))), name = tensor<string, []>("encoder_module_layers_4_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1024, 1])];
-            tensor<fp16, [1, 1024, 188]> x_125_cast_fp16 = conv(dilations = x_125_dilations_0, groups = x_125_groups_0, pad = x_125_pad_0, pad_type = x_125_pad_type_0, strides = x_125_strides_0, weight = encoder_module_layers_4_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_269_cast_fp16)[name = tensor<string, []>("x_125_cast_fp16")];
+            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_4_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_4_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(118365504))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119415232))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119414144)))];
+            tensor<fp16, [1, 1024, 188]> x_125_cast_fp16 = conv(dilations = x_125_dilations_0, groups = x_125_groups_0, pad = x_125_pad_0, pad_type = x_125_pad_type_0, strides = x_125_strides_0, weight = encoder_module_layers_4_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_269_cast_fp16)[name = tensor<string, []>("x_125_cast_fp16")];
             tensor<int32, [3]> input_271_perm_0 = const()[name = tensor<string, []>("input_271_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<fp16, [1, 188, 1024]> input_271_cast_fp16 = transpose(perm = input_271_perm_0, x = x_125_cast_fp16)[name = tensor<string, []>("transpose_278")];
             tensor<fp16, [1, 188, 1024]> input_273_cast_fp16 = add(x = input_255_cast_fp16, y = input_271_cast_fp16)[name = tensor<string, []>("input_273_cast_fp16")];
             tensor<int32, [1]> input_275_axes_0 = const()[name = tensor<string, []>("input_275_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_4_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_4_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119171968)))];
-            tensor<fp16, [1024]> encoder_module_layers_4_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_4_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119174080)))];
+            tensor<fp16, [1024]> encoder_module_layers_4_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_4_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119417344)))];
+            tensor<fp16, [1024]> encoder_module_layers_4_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_4_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119419456)))];
             tensor<fp16, [1, 188, 1024]> input_275_cast_fp16 = layer_norm(axes = input_275_axes_0, beta = encoder_module_layers_4_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_4_norm_feed_forward2_weight_to_fp16, x = input_273_cast_fp16)[name = tensor<string, []>("input_275_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_4_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119176192))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(123370560))), name = tensor<string, []>("encoder_module_layers_4_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_44_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_4_feed_forward2_linear1_weight_to_fp16_palettized, x = input_275_cast_fp16)[name = tensor<string, []>("linear_44_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_4_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_4_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119421568))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(123620096))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(123615936)))];
+            tensor<fp16, [1, 188, 4096]> linear_44_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_4_feed_forward2_linear1_weight_to_fp16_quantized, x = input_275_cast_fp16)[name = tensor<string, []>("linear_44_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_279_cast_fp16 = silu(x = linear_44_cast_fp16)[name = tensor<string, []>("input_279_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_4_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(123371136))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127565504))), name = tensor<string, []>("encoder_module_layers_4_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_45_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_4_feed_forward2_linear2_weight_to_fp16_palettized, x = input_279_cast_fp16)[name = tensor<string, []>("linear_45_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_4_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_4_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(123628352))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127823808))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127822720)))];
+            tensor<fp16, [1, 188, 1024]> linear_45_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_4_feed_forward2_linear2_weight_to_fp16_quantized, x = input_279_cast_fp16)[name = tensor<string, []>("linear_45_cast_fp16")];
             tensor<fp16, []> var_1147_to_fp16 = const()[name = tensor<string, []>("op_1147_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_1148_cast_fp16 = mul(x = linear_45_cast_fp16, y = var_1147_to_fp16)[name = tensor<string, []>("op_1148_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_285_cast_fp16 = add(x = input_273_cast_fp16, y = var_1148_cast_fp16)[name = tensor<string, []>("input_285_cast_fp16")];
             tensor<int32, [1]> input_287_axes_0 = const()[name = tensor<string, []>("input_287_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_4_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_4_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127566080)))];
-            tensor<fp16, [1024]> encoder_module_layers_4_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_4_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127568192)))];
+            tensor<fp16, [1024]> encoder_module_layers_4_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_4_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127825920)))];
+            tensor<fp16, [1024]> encoder_module_layers_4_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_4_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127828032)))];
             tensor<fp16, [1, 188, 1024]> input_287_cast_fp16 = layer_norm(axes = input_287_axes_0, beta = encoder_module_layers_4_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_4_norm_out_weight_to_fp16, x = input_285_cast_fp16)[name = tensor<string, []>("input_287_cast_fp16")];
             tensor<int32, [1]> input_289_axes_0 = const()[name = tensor<string, []>("input_289_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_5_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_5_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127570304)))];
-            tensor<fp16, [1024]> encoder_module_layers_5_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_5_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127572416)))];
+            tensor<fp16, [1024]> encoder_module_layers_5_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_5_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127830144)))];
+            tensor<fp16, [1024]> encoder_module_layers_5_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_5_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127832256)))];
             tensor<fp16, [1, 188, 1024]> input_289_cast_fp16 = layer_norm(axes = input_289_axes_0, beta = encoder_module_layers_5_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_5_norm_feed_forward1_weight_to_fp16, x = input_287_cast_fp16)[name = tensor<string, []>("input_289_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_5_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127574528))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(131768896))), name = tensor<string, []>("encoder_module_layers_5_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_46_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_5_feed_forward1_linear1_weight_to_fp16_palettized, x = input_289_cast_fp16)[name = tensor<string, []>("linear_46_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_5_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_5_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127834368))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132032896))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132028736)))];
+            tensor<fp16, [1, 188, 4096]> linear_46_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_5_feed_forward1_linear1_weight_to_fp16_quantized, x = input_289_cast_fp16)[name = tensor<string, []>("linear_46_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_293_cast_fp16 = silu(x = linear_46_cast_fp16)[name = tensor<string, []>("input_293_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_5_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(131769472))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(135963840))), name = tensor<string, []>("encoder_module_layers_5_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_47_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_5_feed_forward1_linear2_weight_to_fp16_palettized, x = input_293_cast_fp16)[name = tensor<string, []>("linear_47_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_5_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_5_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132041152))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(136236608))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(136235520)))];
+            tensor<fp16, [1, 188, 1024]> linear_47_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_5_feed_forward1_linear2_weight_to_fp16_quantized, x = input_293_cast_fp16)[name = tensor<string, []>("linear_47_cast_fp16")];
             tensor<fp16, []> var_1176_to_fp16 = const()[name = tensor<string, []>("op_1176_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_1177_cast_fp16 = mul(x = linear_47_cast_fp16, y = var_1176_to_fp16)[name = tensor<string, []>("op_1177_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_299_cast_fp16 = add(x = input_287_cast_fp16, y = var_1177_cast_fp16)[name = tensor<string, []>("input_299_cast_fp16")];
             tensor<int32, [1]> query_11_axes_0 = const()[name = tensor<string, []>("query_11_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_5_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_5_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(135964416)))];
-            tensor<fp16, [1024]> encoder_module_layers_5_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_5_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(135966528)))];
+            tensor<fp16, [1024]> encoder_module_layers_5_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_5_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(136238720)))];
+            tensor<fp16, [1024]> encoder_module_layers_5_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_5_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(136240832)))];
             tensor<fp16, [1, 188, 1024]> query_11_cast_fp16 = layer_norm(axes = query_11_axes_0, beta = encoder_module_layers_5_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_5_norm_self_att_weight_to_fp16, x = input_299_cast_fp16)[name = tensor<string, []>("query_11_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_5_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(135968640))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137017280))), name = tensor<string, []>("encoder_module_layers_5_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_48_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_5_self_attn_linear_q_weight_to_fp16_palettized, x = query_11_cast_fp16)[name = tensor<string, []>("linear_48_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_5_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_5_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(136242944))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137292672))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137291584)))];
+            tensor<fp16, [1, 188, 1024]> linear_48_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_5_self_attn_linear_q_weight_to_fp16_quantized, x = query_11_cast_fp16)[name = tensor<string, []>("linear_48_cast_fp16")];
             tensor<int32, [4]> var_1193 = const()[name = tensor<string, []>("op_1193"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> q_31_cast_fp16 = reshape(shape = var_1193, x = linear_48_cast_fp16)[name = tensor<string, []>("q_31_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_5_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137017856))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(138066496))), name = tensor<string, []>("encoder_module_layers_5_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_49_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_5_self_attn_linear_k_weight_to_fp16_palettized, x = query_11_cast_fp16)[name = tensor<string, []>("linear_49_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_5_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_5_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137294784))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(138344512))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(138343424)))];
+            tensor<fp16, [1, 188, 1024]> linear_49_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_5_self_attn_linear_k_weight_to_fp16_quantized, x = query_11_cast_fp16)[name = tensor<string, []>("linear_49_cast_fp16")];
             tensor<int32, [4]> var_1197 = const()[name = tensor<string, []>("op_1197"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> k_21_cast_fp16 = reshape(shape = var_1197, x = linear_49_cast_fp16)[name = tensor<string, []>("k_21_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_5_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(138067072))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(139115712))), name = tensor<string, []>("encoder_module_layers_5_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_50_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_5_self_attn_linear_v_weight_to_fp16_palettized, x = query_11_cast_fp16)[name = tensor<string, []>("linear_50_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_5_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_5_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(138346624))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(139396352))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(139395264)))];
+            tensor<fp16, [1, 188, 1024]> linear_50_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_5_self_attn_linear_v_weight_to_fp16_quantized, x = query_11_cast_fp16)[name = tensor<string, []>("linear_50_cast_fp16")];
             tensor<int32, [4]> var_1201 = const()[name = tensor<string, []>("op_1201"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> v_11_cast_fp16 = reshape(shape = var_1201, x = linear_50_cast_fp16)[name = tensor<string, []>("v_11_cast_fp16")];
             tensor<int32, [4]> value_15_perm_0 = const()[name = tensor<string, []>("value_15_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
-            tensor<fp16, [8, 128]> encoder_module_layers_5_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_5_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(139116288)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_5_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_5_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(139398464)))];
             tensor<fp16, [1, 188, 8, 128]> var_1213_cast_fp16 = add(x = q_31_cast_fp16, y = encoder_module_layers_5_self_attn_pos_bias_u_to_fp16)[name = tensor<string, []>("op_1213_cast_fp16")];
-            tensor<fp16, [8, 128]> encoder_module_layers_5_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_5_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(139118400)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_5_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_5_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(139400576)))];
             tensor<fp16, [1, 188, 8, 128]> var_1215_cast_fp16 = add(x = q_31_cast_fp16, y = encoder_module_layers_5_self_attn_pos_bias_v_to_fp16)[name = tensor<string, []>("op_1215_cast_fp16")];
             tensor<int32, [4]> q_with_bias_v_11_perm_0 = const()[name = tensor<string, []>("q_with_bias_v_11_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
             tensor<bool, []> x_133_transpose_x_0 = const()[name = tensor<string, []>("x_133_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> x_133_transpose_y_0 = const()[name = tensor<string, []>("x_133_transpose_y_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 8, 128, 375]> op_1217_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [384000]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(139120512))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(139504576))), name = tensor<string, []>("op_1217_to_fp16_palettized"), shape = tensor<uint32, [4]>([1, 8, 128, 375])];
+            tensor<fp16, [1, 8, 128, 375]> op_1217_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(3), name = tensor<string, []>("op_1217_to_fp16_quantized"), quantized_data = tensor<int8, [1, 8, 128, 375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(139402688))), scale = tensor<fp16, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(139787200))), zero_point = tensor<int8, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(139786752)))];
             tensor<fp16, [1, 8, 188, 128]> q_with_bias_v_11_cast_fp16 = transpose(perm = q_with_bias_v_11_perm_0, x = var_1215_cast_fp16)[name = tensor<string, []>("transpose_277")];
-            tensor<fp16, [1, 8, 188, 375]> x_133_cast_fp16 = matmul(transpose_x = x_133_transpose_x_0, transpose_y = x_133_transpose_y_0, x = q_with_bias_v_11_cast_fp16, y = op_1217_to_fp16_palettized)[name = tensor<string, []>("x_133_cast_fp16")];
+            tensor<fp16, [1, 8, 188, 375]> x_133_cast_fp16 = matmul(transpose_x = x_133_transpose_x_0, transpose_y = x_133_transpose_y_0, x = q_with_bias_v_11_cast_fp16, y = op_1217_to_fp16_quantized)[name = tensor<string, []>("x_133_cast_fp16")];
             tensor<int32, [8]> x_135_pad_0 = const()[name = tensor<string, []>("x_135_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 1, 0])];
             tensor<string, []> x_135_mode_0 = const()[name = tensor<string, []>("x_135_mode_0"), val = tensor<string, []>("constant")];
             tensor<fp16, []> const_79_to_fp16 = const()[name = tensor<string, []>("const_79_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
@@ -984,12 +984,12 @@ program(1.0)
             tensor<int32, [3]> var_1250 = const()[name = tensor<string, []>("op_1250"), val = tensor<int32, [3]>([1, -1, 1024])];
             tensor<fp16, [1, 188, 8, 128]> var_1249_cast_fp16 = transpose(perm = var_1249_perm_0, x = x_139_cast_fp16)[name = tensor<string, []>("transpose_273")];
             tensor<fp16, [1, 188, 1024]> input_303_cast_fp16 = reshape(shape = var_1250, x = var_1249_cast_fp16)[name = tensor<string, []>("input_303_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_5_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(139505152))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(140553792))), name = tensor<string, []>("encoder_module_layers_5_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_52_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_5_self_attn_linear_out_weight_to_fp16_palettized, x = input_303_cast_fp16)[name = tensor<string, []>("linear_52_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_5_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_5_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(139788032))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(140837760))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(140836672)))];
+            tensor<fp16, [1, 188, 1024]> linear_52_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_5_self_attn_linear_out_weight_to_fp16_quantized, x = input_303_cast_fp16)[name = tensor<string, []>("linear_52_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_307_cast_fp16 = add(x = input_299_cast_fp16, y = linear_52_cast_fp16)[name = tensor<string, []>("input_307_cast_fp16")];
             tensor<int32, [1]> x_143_axes_0 = const()[name = tensor<string, []>("x_143_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_5_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_5_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(140554368)))];
-            tensor<fp16, [1024]> encoder_module_layers_5_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_5_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(140556480)))];
+            tensor<fp16, [1024]> encoder_module_layers_5_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_5_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(140839872)))];
+            tensor<fp16, [1024]> encoder_module_layers_5_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_5_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(140841984)))];
             tensor<fp16, [1, 188, 1024]> x_143_cast_fp16 = layer_norm(axes = x_143_axes_0, beta = encoder_module_layers_5_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_5_norm_conv_weight_to_fp16, x = input_307_cast_fp16)[name = tensor<string, []>("x_143_cast_fp16")];
             tensor<int32, [3]> input_309_perm_0 = const()[name = tensor<string, []>("input_309_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<string, []> input_311_pad_type_0 = const()[name = tensor<string, []>("input_311_pad_type_0"), val = tensor<string, []>("valid")];
@@ -997,9 +997,9 @@ program(1.0)
             tensor<int32, [2]> input_311_pad_0 = const()[name = tensor<string, []>("input_311_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_311_dilations_0 = const()[name = tensor<string, []>("input_311_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> input_311_groups_0 = const()[name = tensor<string, []>("input_311_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_5_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(140558592))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142655808))), name = tensor<string, []>("encoder_module_layers_5_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([2048, 1024, 1])];
+            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_5_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_5_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(140844096))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142943424))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142941312)))];
             tensor<fp16, [1, 1024, 188]> input_309_cast_fp16 = transpose(perm = input_309_perm_0, x = x_143_cast_fp16)[name = tensor<string, []>("transpose_272")];
-            tensor<fp16, [1, 2048, 188]> input_311_cast_fp16 = conv(dilations = input_311_dilations_0, groups = input_311_groups_0, pad = input_311_pad_0, pad_type = input_311_pad_type_0, strides = input_311_strides_0, weight = encoder_module_layers_5_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_309_cast_fp16)[name = tensor<string, []>("input_311_cast_fp16")];
+            tensor<fp16, [1, 2048, 188]> input_311_cast_fp16 = conv(dilations = input_311_dilations_0, groups = input_311_groups_0, pad = input_311_pad_0, pad_type = input_311_pad_type_0, strides = input_311_strides_0, weight = encoder_module_layers_5_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_309_cast_fp16)[name = tensor<string, []>("input_311_cast_fp16")];
             tensor<int32, []> x_145_split_num_splits_0 = const()[name = tensor<string, []>("x_145_split_num_splits_0"), val = tensor<int32, []>(2)];
             tensor<int32, []> x_145_split_axis_0 = const()[name = tensor<string, []>("x_145_split_axis_0"), val = tensor<int32, []>(1)];
             tensor<fp16, [1, 1024, 188]> x_145_split_cast_fp16_0, tensor<fp16, [1, 1024, 188]> x_145_split_cast_fp16_1 = split(axis = x_145_split_axis_0, num_splits = x_145_split_num_splits_0, x = input_311_cast_fp16)[name = tensor<string, []>("x_145_split_cast_fp16")];
@@ -1015,75 +1015,75 @@ program(1.0)
             tensor<int32, [1]> input_317_strides_0 = const()[name = tensor<string, []>("input_317_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> input_317_pad_0 = const()[name = tensor<string, []>("input_317_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_317_dilations_0 = const()[name = tensor<string, []>("input_317_dilations_0"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1024, 1, 9]> const_273_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [9216]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142656384))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142665664))), name = tensor<string, []>("const_273_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1, 9])];
-            tensor<fp16, [1024]> const_274_to_fp16 = const()[name = tensor<string, []>("const_274_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142666240)))];
-            tensor<fp16, [1, 1024, 188]> input_319_cast_fp16 = conv(bias = const_274_to_fp16, dilations = input_317_dilations_0, groups = input_317_groups_0, pad = input_317_pad_0, pad_type = input_317_pad_type_0, strides = input_317_strides_0, weight = const_273_to_fp16_palettized, x = input_315_cast_fp16)[name = tensor<string, []>("input_319_cast_fp16")];
+            tensor<fp16, [1024, 1, 9]> const_273_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("const_273_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1, 9]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142947584))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142957952))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142956864)))];
+            tensor<fp16, [1024]> const_274_to_fp16 = const()[name = tensor<string, []>("const_274_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142960064)))];
+            tensor<fp16, [1, 1024, 188]> input_319_cast_fp16 = conv(bias = const_274_to_fp16, dilations = input_317_dilations_0, groups = input_317_groups_0, pad = input_317_pad_0, pad_type = input_317_pad_type_0, strides = input_317_strides_0, weight = const_273_to_fp16_quantized, x = input_315_cast_fp16)[name = tensor<string, []>("input_319_cast_fp16")];
             tensor<fp16, [1, 1024, 188]> input_321_cast_fp16 = silu(x = input_319_cast_fp16)[name = tensor<string, []>("input_321_cast_fp16")];
             tensor<string, []> x_147_pad_type_0 = const()[name = tensor<string, []>("x_147_pad_type_0"), val = tensor<string, []>("valid")];
             tensor<int32, [1]> x_147_strides_0 = const()[name = tensor<string, []>("x_147_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> x_147_pad_0 = const()[name = tensor<string, []>("x_147_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> x_147_dilations_0 = const()[name = tensor<string, []>("x_147_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> x_147_groups_0 = const()[name = tensor<string, []>("x_147_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_5_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142668352))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(143716992))), name = tensor<string, []>("encoder_module_layers_5_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1024, 1])];
-            tensor<fp16, [1, 1024, 188]> x_147_cast_fp16 = conv(dilations = x_147_dilations_0, groups = x_147_groups_0, pad = x_147_pad_0, pad_type = x_147_pad_type_0, strides = x_147_strides_0, weight = encoder_module_layers_5_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_321_cast_fp16)[name = tensor<string, []>("x_147_cast_fp16")];
+            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_5_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_5_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142962176))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(144011904))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(144010816)))];
+            tensor<fp16, [1, 1024, 188]> x_147_cast_fp16 = conv(dilations = x_147_dilations_0, groups = x_147_groups_0, pad = x_147_pad_0, pad_type = x_147_pad_type_0, strides = x_147_strides_0, weight = encoder_module_layers_5_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_321_cast_fp16)[name = tensor<string, []>("x_147_cast_fp16")];
             tensor<int32, [3]> input_323_perm_0 = const()[name = tensor<string, []>("input_323_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<fp16, [1, 188, 1024]> input_323_cast_fp16 = transpose(perm = input_323_perm_0, x = x_147_cast_fp16)[name = tensor<string, []>("transpose_271")];
             tensor<fp16, [1, 188, 1024]> input_325_cast_fp16 = add(x = input_307_cast_fp16, y = input_323_cast_fp16)[name = tensor<string, []>("input_325_cast_fp16")];
             tensor<int32, [1]> input_327_axes_0 = const()[name = tensor<string, []>("input_327_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_5_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_5_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(143717568)))];
-            tensor<fp16, [1024]> encoder_module_layers_5_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_5_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(143719680)))];
+            tensor<fp16, [1024]> encoder_module_layers_5_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_5_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(144014016)))];
+            tensor<fp16, [1024]> encoder_module_layers_5_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_5_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(144016128)))];
             tensor<fp16, [1, 188, 1024]> input_327_cast_fp16 = layer_norm(axes = input_327_axes_0, beta = encoder_module_layers_5_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_5_norm_feed_forward2_weight_to_fp16, x = input_325_cast_fp16)[name = tensor<string, []>("input_327_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_5_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(143721792))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(147916160))), name = tensor<string, []>("encoder_module_layers_5_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_53_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_5_feed_forward2_linear1_weight_to_fp16_palettized, x = input_327_cast_fp16)[name = tensor<string, []>("linear_53_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_5_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_5_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(144018240))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(148216768))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(148212608)))];
+            tensor<fp16, [1, 188, 4096]> linear_53_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_5_feed_forward2_linear1_weight_to_fp16_quantized, x = input_327_cast_fp16)[name = tensor<string, []>("linear_53_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_331_cast_fp16 = silu(x = linear_53_cast_fp16)[name = tensor<string, []>("input_331_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_5_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(147916736))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152111104))), name = tensor<string, []>("encoder_module_layers_5_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_54_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_5_feed_forward2_linear2_weight_to_fp16_palettized, x = input_331_cast_fp16)[name = tensor<string, []>("linear_54_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_5_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_5_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(148225024))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152420480))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152419392)))];
+            tensor<fp16, [1, 188, 1024]> linear_54_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_5_feed_forward2_linear2_weight_to_fp16_quantized, x = input_331_cast_fp16)[name = tensor<string, []>("linear_54_cast_fp16")];
             tensor<fp16, []> var_1310_to_fp16 = const()[name = tensor<string, []>("op_1310_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_1311_cast_fp16 = mul(x = linear_54_cast_fp16, y = var_1310_to_fp16)[name = tensor<string, []>("op_1311_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_337_cast_fp16 = add(x = input_325_cast_fp16, y = var_1311_cast_fp16)[name = tensor<string, []>("input_337_cast_fp16")];
             tensor<int32, [1]> input_339_axes_0 = const()[name = tensor<string, []>("input_339_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_5_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_5_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152111680)))];
-            tensor<fp16, [1024]> encoder_module_layers_5_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_5_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152113792)))];
+            tensor<fp16, [1024]> encoder_module_layers_5_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_5_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152422592)))];
+            tensor<fp16, [1024]> encoder_module_layers_5_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_5_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152424704)))];
             tensor<fp16, [1, 188, 1024]> input_339_cast_fp16 = layer_norm(axes = input_339_axes_0, beta = encoder_module_layers_5_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_5_norm_out_weight_to_fp16, x = input_337_cast_fp16)[name = tensor<string, []>("input_339_cast_fp16")];
             tensor<int32, [1]> input_341_axes_0 = const()[name = tensor<string, []>("input_341_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_6_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_6_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152115904)))];
-            tensor<fp16, [1024]> encoder_module_layers_6_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_6_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152118016)))];
+            tensor<fp16, [1024]> encoder_module_layers_6_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_6_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152426816)))];
+            tensor<fp16, [1024]> encoder_module_layers_6_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_6_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152428928)))];
             tensor<fp16, [1, 188, 1024]> input_341_cast_fp16 = layer_norm(axes = input_341_axes_0, beta = encoder_module_layers_6_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_6_norm_feed_forward1_weight_to_fp16, x = input_339_cast_fp16)[name = tensor<string, []>("input_341_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_6_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152120128))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(156314496))), name = tensor<string, []>("encoder_module_layers_6_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_55_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_6_feed_forward1_linear1_weight_to_fp16_palettized, x = input_341_cast_fp16)[name = tensor<string, []>("linear_55_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_6_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_6_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152431040))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(156629568))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(156625408)))];
+            tensor<fp16, [1, 188, 4096]> linear_55_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_6_feed_forward1_linear1_weight_to_fp16_quantized, x = input_341_cast_fp16)[name = tensor<string, []>("linear_55_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_345_cast_fp16 = silu(x = linear_55_cast_fp16)[name = tensor<string, []>("input_345_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_6_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(156315072))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(160509440))), name = tensor<string, []>("encoder_module_layers_6_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_56_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_6_feed_forward1_linear2_weight_to_fp16_palettized, x = input_345_cast_fp16)[name = tensor<string, []>("linear_56_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_6_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_6_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(156637824))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(160833280))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(160832192)))];
+            tensor<fp16, [1, 188, 1024]> linear_56_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_6_feed_forward1_linear2_weight_to_fp16_quantized, x = input_345_cast_fp16)[name = tensor<string, []>("linear_56_cast_fp16")];
             tensor<fp16, []> var_1339_to_fp16 = const()[name = tensor<string, []>("op_1339_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_1340_cast_fp16 = mul(x = linear_56_cast_fp16, y = var_1339_to_fp16)[name = tensor<string, []>("op_1340_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_351_cast_fp16 = add(x = input_339_cast_fp16, y = var_1340_cast_fp16)[name = tensor<string, []>("input_351_cast_fp16")];
             tensor<int32, [1]> query_13_axes_0 = const()[name = tensor<string, []>("query_13_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_6_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_6_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(160510016)))];
-            tensor<fp16, [1024]> encoder_module_layers_6_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_6_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(160512128)))];
+            tensor<fp16, [1024]> encoder_module_layers_6_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_6_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(160835392)))];
+            tensor<fp16, [1024]> encoder_module_layers_6_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_6_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(160837504)))];
             tensor<fp16, [1, 188, 1024]> query_13_cast_fp16 = layer_norm(axes = query_13_axes_0, beta = encoder_module_layers_6_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_6_norm_self_att_weight_to_fp16, x = input_351_cast_fp16)[name = tensor<string, []>("query_13_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_6_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(160514240))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(161562880))), name = tensor<string, []>("encoder_module_layers_6_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_57_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_6_self_attn_linear_q_weight_to_fp16_palettized, x = query_13_cast_fp16)[name = tensor<string, []>("linear_57_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_6_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_6_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(160839616))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(161889344))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(161888256)))];
+            tensor<fp16, [1, 188, 1024]> linear_57_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_6_self_attn_linear_q_weight_to_fp16_quantized, x = query_13_cast_fp16)[name = tensor<string, []>("linear_57_cast_fp16")];
             tensor<int32, [4]> var_1356 = const()[name = tensor<string, []>("op_1356"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> q_37_cast_fp16 = reshape(shape = var_1356, x = linear_57_cast_fp16)[name = tensor<string, []>("q_37_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_6_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(161563456))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(162612096))), name = tensor<string, []>("encoder_module_layers_6_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_58_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_6_self_attn_linear_k_weight_to_fp16_palettized, x = query_13_cast_fp16)[name = tensor<string, []>("linear_58_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_6_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_6_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(161891456))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(162941184))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(162940096)))];
+            tensor<fp16, [1, 188, 1024]> linear_58_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_6_self_attn_linear_k_weight_to_fp16_quantized, x = query_13_cast_fp16)[name = tensor<string, []>("linear_58_cast_fp16")];
             tensor<int32, [4]> var_1360 = const()[name = tensor<string, []>("op_1360"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> k_25_cast_fp16 = reshape(shape = var_1360, x = linear_58_cast_fp16)[name = tensor<string, []>("k_25_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_6_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(162612672))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(163661312))), name = tensor<string, []>("encoder_module_layers_6_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_59_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_6_self_attn_linear_v_weight_to_fp16_palettized, x = query_13_cast_fp16)[name = tensor<string, []>("linear_59_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_6_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_6_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(162943296))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(163993024))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(163991936)))];
+            tensor<fp16, [1, 188, 1024]> linear_59_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_6_self_attn_linear_v_weight_to_fp16_quantized, x = query_13_cast_fp16)[name = tensor<string, []>("linear_59_cast_fp16")];
             tensor<int32, [4]> var_1364 = const()[name = tensor<string, []>("op_1364"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> v_13_cast_fp16 = reshape(shape = var_1364, x = linear_59_cast_fp16)[name = tensor<string, []>("v_13_cast_fp16")];
             tensor<int32, [4]> value_17_perm_0 = const()[name = tensor<string, []>("value_17_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
-            tensor<fp16, [8, 128]> encoder_module_layers_6_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_6_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(163661888)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_6_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_6_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(163995136)))];
             tensor<fp16, [1, 188, 8, 128]> var_1376_cast_fp16 = add(x = q_37_cast_fp16, y = encoder_module_layers_6_self_attn_pos_bias_u_to_fp16)[name = tensor<string, []>("op_1376_cast_fp16")];
-            tensor<fp16, [8, 128]> encoder_module_layers_6_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_6_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(163664000)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_6_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_6_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(163997248)))];
             tensor<fp16, [1, 188, 8, 128]> var_1378_cast_fp16 = add(x = q_37_cast_fp16, y = encoder_module_layers_6_self_attn_pos_bias_v_to_fp16)[name = tensor<string, []>("op_1378_cast_fp16")];
             tensor<int32, [4]> q_with_bias_v_13_perm_0 = const()[name = tensor<string, []>("q_with_bias_v_13_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
             tensor<bool, []> x_155_transpose_x_0 = const()[name = tensor<string, []>("x_155_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> x_155_transpose_y_0 = const()[name = tensor<string, []>("x_155_transpose_y_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 8, 128, 375]> op_1380_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [384000]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(163666112))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(164050176))), name = tensor<string, []>("op_1380_to_fp16_palettized"), shape = tensor<uint32, [4]>([1, 8, 128, 375])];
+            tensor<fp16, [1, 8, 128, 375]> op_1380_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(3), name = tensor<string, []>("op_1380_to_fp16_quantized"), quantized_data = tensor<int8, [1, 8, 128, 375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(163999360))), scale = tensor<fp16, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(164383872))), zero_point = tensor<int8, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(164383424)))];
             tensor<fp16, [1, 8, 188, 128]> q_with_bias_v_13_cast_fp16 = transpose(perm = q_with_bias_v_13_perm_0, x = var_1378_cast_fp16)[name = tensor<string, []>("transpose_270")];
-            tensor<fp16, [1, 8, 188, 375]> x_155_cast_fp16 = matmul(transpose_x = x_155_transpose_x_0, transpose_y = x_155_transpose_y_0, x = q_with_bias_v_13_cast_fp16, y = op_1380_to_fp16_palettized)[name = tensor<string, []>("x_155_cast_fp16")];
+            tensor<fp16, [1, 8, 188, 375]> x_155_cast_fp16 = matmul(transpose_x = x_155_transpose_x_0, transpose_y = x_155_transpose_y_0, x = q_with_bias_v_13_cast_fp16, y = op_1380_to_fp16_quantized)[name = tensor<string, []>("x_155_cast_fp16")];
             tensor<int32, [8]> x_157_pad_0 = const()[name = tensor<string, []>("x_157_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 1, 0])];
             tensor<string, []> x_157_mode_0 = const()[name = tensor<string, []>("x_157_mode_0"), val = tensor<string, []>("constant")];
             tensor<fp16, []> const_89_to_fp16 = const()[name = tensor<string, []>("const_89_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
@@ -1121,12 +1121,12 @@ program(1.0)
             tensor<int32, [3]> var_1413 = const()[name = tensor<string, []>("op_1413"), val = tensor<int32, [3]>([1, -1, 1024])];
             tensor<fp16, [1, 188, 8, 128]> var_1412_cast_fp16 = transpose(perm = var_1412_perm_0, x = x_161_cast_fp16)[name = tensor<string, []>("transpose_266")];
             tensor<fp16, [1, 188, 1024]> input_355_cast_fp16 = reshape(shape = var_1413, x = var_1412_cast_fp16)[name = tensor<string, []>("input_355_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_6_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(164050752))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(165099392))), name = tensor<string, []>("encoder_module_layers_6_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_61_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_6_self_attn_linear_out_weight_to_fp16_palettized, x = input_355_cast_fp16)[name = tensor<string, []>("linear_61_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_6_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_6_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(164384704))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(165434432))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(165433344)))];
+            tensor<fp16, [1, 188, 1024]> linear_61_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_6_self_attn_linear_out_weight_to_fp16_quantized, x = input_355_cast_fp16)[name = tensor<string, []>("linear_61_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_359_cast_fp16 = add(x = input_351_cast_fp16, y = linear_61_cast_fp16)[name = tensor<string, []>("input_359_cast_fp16")];
             tensor<int32, [1]> x_165_axes_0 = const()[name = tensor<string, []>("x_165_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_6_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_6_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(165099968)))];
-            tensor<fp16, [1024]> encoder_module_layers_6_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_6_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(165102080)))];
+            tensor<fp16, [1024]> encoder_module_layers_6_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_6_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(165436544)))];
+            tensor<fp16, [1024]> encoder_module_layers_6_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_6_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(165438656)))];
             tensor<fp16, [1, 188, 1024]> x_165_cast_fp16 = layer_norm(axes = x_165_axes_0, beta = encoder_module_layers_6_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_6_norm_conv_weight_to_fp16, x = input_359_cast_fp16)[name = tensor<string, []>("x_165_cast_fp16")];
             tensor<int32, [3]> input_361_perm_0 = const()[name = tensor<string, []>("input_361_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<string, []> input_363_pad_type_0 = const()[name = tensor<string, []>("input_363_pad_type_0"), val = tensor<string, []>("valid")];
@@ -1134,9 +1134,9 @@ program(1.0)
             tensor<int32, [2]> input_363_pad_0 = const()[name = tensor<string, []>("input_363_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_363_dilations_0 = const()[name = tensor<string, []>("input_363_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> input_363_groups_0 = const()[name = tensor<string, []>("input_363_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_6_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(165104192))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(167201408))), name = tensor<string, []>("encoder_module_layers_6_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([2048, 1024, 1])];
+            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_6_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_6_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(165440768))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(167540096))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(167537984)))];
             tensor<fp16, [1, 1024, 188]> input_361_cast_fp16 = transpose(perm = input_361_perm_0, x = x_165_cast_fp16)[name = tensor<string, []>("transpose_265")];
-            tensor<fp16, [1, 2048, 188]> input_363_cast_fp16 = conv(dilations = input_363_dilations_0, groups = input_363_groups_0, pad = input_363_pad_0, pad_type = input_363_pad_type_0, strides = input_363_strides_0, weight = encoder_module_layers_6_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_361_cast_fp16)[name = tensor<string, []>("input_363_cast_fp16")];
+            tensor<fp16, [1, 2048, 188]> input_363_cast_fp16 = conv(dilations = input_363_dilations_0, groups = input_363_groups_0, pad = input_363_pad_0, pad_type = input_363_pad_type_0, strides = input_363_strides_0, weight = encoder_module_layers_6_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_361_cast_fp16)[name = tensor<string, []>("input_363_cast_fp16")];
             tensor<int32, []> x_167_split_num_splits_0 = const()[name = tensor<string, []>("x_167_split_num_splits_0"), val = tensor<int32, []>(2)];
             tensor<int32, []> x_167_split_axis_0 = const()[name = tensor<string, []>("x_167_split_axis_0"), val = tensor<int32, []>(1)];
             tensor<fp16, [1, 1024, 188]> x_167_split_cast_fp16_0, tensor<fp16, [1, 1024, 188]> x_167_split_cast_fp16_1 = split(axis = x_167_split_axis_0, num_splits = x_167_split_num_splits_0, x = input_363_cast_fp16)[name = tensor<string, []>("x_167_split_cast_fp16")];
@@ -1152,75 +1152,75 @@ program(1.0)
             tensor<int32, [1]> input_369_strides_0 = const()[name = tensor<string, []>("input_369_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> input_369_pad_0 = const()[name = tensor<string, []>("input_369_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_369_dilations_0 = const()[name = tensor<string, []>("input_369_dilations_0"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1024, 1, 9]> const_275_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [9216]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(167201984))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(167211264))), name = tensor<string, []>("const_275_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1, 9])];
-            tensor<fp16, [1024]> const_276_to_fp16 = const()[name = tensor<string, []>("const_276_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(167211840)))];
-            tensor<fp16, [1, 1024, 188]> input_371_cast_fp16 = conv(bias = const_276_to_fp16, dilations = input_369_dilations_0, groups = input_369_groups_0, pad = input_369_pad_0, pad_type = input_369_pad_type_0, strides = input_369_strides_0, weight = const_275_to_fp16_palettized, x = input_367_cast_fp16)[name = tensor<string, []>("input_371_cast_fp16")];
+            tensor<fp16, [1024, 1, 9]> const_275_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("const_275_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1, 9]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(167544256))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(167554624))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(167553536)))];
+            tensor<fp16, [1024]> const_276_to_fp16 = const()[name = tensor<string, []>("const_276_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(167556736)))];
+            tensor<fp16, [1, 1024, 188]> input_371_cast_fp16 = conv(bias = const_276_to_fp16, dilations = input_369_dilations_0, groups = input_369_groups_0, pad = input_369_pad_0, pad_type = input_369_pad_type_0, strides = input_369_strides_0, weight = const_275_to_fp16_quantized, x = input_367_cast_fp16)[name = tensor<string, []>("input_371_cast_fp16")];
             tensor<fp16, [1, 1024, 188]> input_373_cast_fp16 = silu(x = input_371_cast_fp16)[name = tensor<string, []>("input_373_cast_fp16")];
             tensor<string, []> x_169_pad_type_0 = const()[name = tensor<string, []>("x_169_pad_type_0"), val = tensor<string, []>("valid")];
             tensor<int32, [1]> x_169_strides_0 = const()[name = tensor<string, []>("x_169_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> x_169_pad_0 = const()[name = tensor<string, []>("x_169_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> x_169_dilations_0 = const()[name = tensor<string, []>("x_169_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> x_169_groups_0 = const()[name = tensor<string, []>("x_169_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_6_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(167213952))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(168262592))), name = tensor<string, []>("encoder_module_layers_6_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1024, 1])];
-            tensor<fp16, [1, 1024, 188]> x_169_cast_fp16 = conv(dilations = x_169_dilations_0, groups = x_169_groups_0, pad = x_169_pad_0, pad_type = x_169_pad_type_0, strides = x_169_strides_0, weight = encoder_module_layers_6_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_373_cast_fp16)[name = tensor<string, []>("x_169_cast_fp16")];
+            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_6_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_6_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(167558848))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(168608576))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(168607488)))];
+            tensor<fp16, [1, 1024, 188]> x_169_cast_fp16 = conv(dilations = x_169_dilations_0, groups = x_169_groups_0, pad = x_169_pad_0, pad_type = x_169_pad_type_0, strides = x_169_strides_0, weight = encoder_module_layers_6_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_373_cast_fp16)[name = tensor<string, []>("x_169_cast_fp16")];
             tensor<int32, [3]> input_375_perm_0 = const()[name = tensor<string, []>("input_375_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<fp16, [1, 188, 1024]> input_375_cast_fp16 = transpose(perm = input_375_perm_0, x = x_169_cast_fp16)[name = tensor<string, []>("transpose_264")];
             tensor<fp16, [1, 188, 1024]> input_377_cast_fp16 = add(x = input_359_cast_fp16, y = input_375_cast_fp16)[name = tensor<string, []>("input_377_cast_fp16")];
             tensor<int32, [1]> input_379_axes_0 = const()[name = tensor<string, []>("input_379_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_6_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_6_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(168263168)))];
-            tensor<fp16, [1024]> encoder_module_layers_6_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_6_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(168265280)))];
+            tensor<fp16, [1024]> encoder_module_layers_6_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_6_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(168610688)))];
+            tensor<fp16, [1024]> encoder_module_layers_6_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_6_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(168612800)))];
             tensor<fp16, [1, 188, 1024]> input_379_cast_fp16 = layer_norm(axes = input_379_axes_0, beta = encoder_module_layers_6_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_6_norm_feed_forward2_weight_to_fp16, x = input_377_cast_fp16)[name = tensor<string, []>("input_379_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_6_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(168267392))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(172461760))), name = tensor<string, []>("encoder_module_layers_6_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_62_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_6_feed_forward2_linear1_weight_to_fp16_palettized, x = input_379_cast_fp16)[name = tensor<string, []>("linear_62_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_6_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_6_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(168614912))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(172813440))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(172809280)))];
+            tensor<fp16, [1, 188, 4096]> linear_62_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_6_feed_forward2_linear1_weight_to_fp16_quantized, x = input_379_cast_fp16)[name = tensor<string, []>("linear_62_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_383_cast_fp16 = silu(x = linear_62_cast_fp16)[name = tensor<string, []>("input_383_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_6_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(172462336))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(176656704))), name = tensor<string, []>("encoder_module_layers_6_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_63_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_6_feed_forward2_linear2_weight_to_fp16_palettized, x = input_383_cast_fp16)[name = tensor<string, []>("linear_63_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_6_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_6_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(172821696))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(177017152))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(177016064)))];
+            tensor<fp16, [1, 188, 1024]> linear_63_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_6_feed_forward2_linear2_weight_to_fp16_quantized, x = input_383_cast_fp16)[name = tensor<string, []>("linear_63_cast_fp16")];
             tensor<fp16, []> var_1473_to_fp16 = const()[name = tensor<string, []>("op_1473_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_1474_cast_fp16 = mul(x = linear_63_cast_fp16, y = var_1473_to_fp16)[name = tensor<string, []>("op_1474_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_389_cast_fp16 = add(x = input_377_cast_fp16, y = var_1474_cast_fp16)[name = tensor<string, []>("input_389_cast_fp16")];
             tensor<int32, [1]> input_391_axes_0 = const()[name = tensor<string, []>("input_391_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_6_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_6_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(176657280)))];
-            tensor<fp16, [1024]> encoder_module_layers_6_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_6_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(176659392)))];
+            tensor<fp16, [1024]> encoder_module_layers_6_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_6_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(177019264)))];
+            tensor<fp16, [1024]> encoder_module_layers_6_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_6_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(177021376)))];
             tensor<fp16, [1, 188, 1024]> input_391_cast_fp16 = layer_norm(axes = input_391_axes_0, beta = encoder_module_layers_6_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_6_norm_out_weight_to_fp16, x = input_389_cast_fp16)[name = tensor<string, []>("input_391_cast_fp16")];
             tensor<int32, [1]> input_393_axes_0 = const()[name = tensor<string, []>("input_393_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_7_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_7_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(176661504)))];
-            tensor<fp16, [1024]> encoder_module_layers_7_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_7_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(176663616)))];
+            tensor<fp16, [1024]> encoder_module_layers_7_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_7_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(177023488)))];
+            tensor<fp16, [1024]> encoder_module_layers_7_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_7_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(177025600)))];
             tensor<fp16, [1, 188, 1024]> input_393_cast_fp16 = layer_norm(axes = input_393_axes_0, beta = encoder_module_layers_7_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_7_norm_feed_forward1_weight_to_fp16, x = input_391_cast_fp16)[name = tensor<string, []>("input_393_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_7_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(176665728))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(180860096))), name = tensor<string, []>("encoder_module_layers_7_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_64_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_7_feed_forward1_linear1_weight_to_fp16_palettized, x = input_393_cast_fp16)[name = tensor<string, []>("linear_64_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_7_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_7_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(177027712))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(181226240))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(181222080)))];
+            tensor<fp16, [1, 188, 4096]> linear_64_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_7_feed_forward1_linear1_weight_to_fp16_quantized, x = input_393_cast_fp16)[name = tensor<string, []>("linear_64_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_397_cast_fp16 = silu(x = linear_64_cast_fp16)[name = tensor<string, []>("input_397_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_7_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(180860672))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(185055040))), name = tensor<string, []>("encoder_module_layers_7_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_65_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_7_feed_forward1_linear2_weight_to_fp16_palettized, x = input_397_cast_fp16)[name = tensor<string, []>("linear_65_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_7_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_7_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(181234496))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(185429952))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(185428864)))];
+            tensor<fp16, [1, 188, 1024]> linear_65_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_7_feed_forward1_linear2_weight_to_fp16_quantized, x = input_397_cast_fp16)[name = tensor<string, []>("linear_65_cast_fp16")];
             tensor<fp16, []> var_1502_to_fp16 = const()[name = tensor<string, []>("op_1502_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_1503_cast_fp16 = mul(x = linear_65_cast_fp16, y = var_1502_to_fp16)[name = tensor<string, []>("op_1503_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_403_cast_fp16 = add(x = input_391_cast_fp16, y = var_1503_cast_fp16)[name = tensor<string, []>("input_403_cast_fp16")];
             tensor<int32, [1]> query_15_axes_0 = const()[name = tensor<string, []>("query_15_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_7_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_7_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(185055616)))];
-            tensor<fp16, [1024]> encoder_module_layers_7_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_7_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(185057728)))];
+            tensor<fp16, [1024]> encoder_module_layers_7_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_7_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(185432064)))];
+            tensor<fp16, [1024]> encoder_module_layers_7_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_7_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(185434176)))];
             tensor<fp16, [1, 188, 1024]> query_15_cast_fp16 = layer_norm(axes = query_15_axes_0, beta = encoder_module_layers_7_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_7_norm_self_att_weight_to_fp16, x = input_403_cast_fp16)[name = tensor<string, []>("query_15_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_7_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(185059840))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186108480))), name = tensor<string, []>("encoder_module_layers_7_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_66_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_7_self_attn_linear_q_weight_to_fp16_palettized, x = query_15_cast_fp16)[name = tensor<string, []>("linear_66_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_7_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_7_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(185436288))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186486016))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186484928)))];
+            tensor<fp16, [1, 188, 1024]> linear_66_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_7_self_attn_linear_q_weight_to_fp16_quantized, x = query_15_cast_fp16)[name = tensor<string, []>("linear_66_cast_fp16")];
             tensor<int32, [4]> var_1519 = const()[name = tensor<string, []>("op_1519"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> q_43_cast_fp16 = reshape(shape = var_1519, x = linear_66_cast_fp16)[name = tensor<string, []>("q_43_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_7_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186109056))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(187157696))), name = tensor<string, []>("encoder_module_layers_7_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_67_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_7_self_attn_linear_k_weight_to_fp16_palettized, x = query_15_cast_fp16)[name = tensor<string, []>("linear_67_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_7_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_7_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186488128))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(187537856))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(187536768)))];
+            tensor<fp16, [1, 188, 1024]> linear_67_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_7_self_attn_linear_k_weight_to_fp16_quantized, x = query_15_cast_fp16)[name = tensor<string, []>("linear_67_cast_fp16")];
             tensor<int32, [4]> var_1523 = const()[name = tensor<string, []>("op_1523"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> k_29_cast_fp16 = reshape(shape = var_1523, x = linear_67_cast_fp16)[name = tensor<string, []>("k_29_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_7_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(187158272))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(188206912))), name = tensor<string, []>("encoder_module_layers_7_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_68_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_7_self_attn_linear_v_weight_to_fp16_palettized, x = query_15_cast_fp16)[name = tensor<string, []>("linear_68_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_7_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_7_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(187539968))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(188589696))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(188588608)))];
+            tensor<fp16, [1, 188, 1024]> linear_68_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_7_self_attn_linear_v_weight_to_fp16_quantized, x = query_15_cast_fp16)[name = tensor<string, []>("linear_68_cast_fp16")];
             tensor<int32, [4]> var_1527 = const()[name = tensor<string, []>("op_1527"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> v_15_cast_fp16 = reshape(shape = var_1527, x = linear_68_cast_fp16)[name = tensor<string, []>("v_15_cast_fp16")];
             tensor<int32, [4]> value_19_perm_0 = const()[name = tensor<string, []>("value_19_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
-            tensor<fp16, [8, 128]> encoder_module_layers_7_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_7_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(188207488)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_7_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_7_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(188591808)))];
             tensor<fp16, [1, 188, 8, 128]> var_1539_cast_fp16 = add(x = q_43_cast_fp16, y = encoder_module_layers_7_self_attn_pos_bias_u_to_fp16)[name = tensor<string, []>("op_1539_cast_fp16")];
-            tensor<fp16, [8, 128]> encoder_module_layers_7_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_7_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(188209600)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_7_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_7_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(188593920)))];
             tensor<fp16, [1, 188, 8, 128]> var_1541_cast_fp16 = add(x = q_43_cast_fp16, y = encoder_module_layers_7_self_attn_pos_bias_v_to_fp16)[name = tensor<string, []>("op_1541_cast_fp16")];
             tensor<int32, [4]> q_with_bias_v_15_perm_0 = const()[name = tensor<string, []>("q_with_bias_v_15_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
             tensor<bool, []> x_177_transpose_x_0 = const()[name = tensor<string, []>("x_177_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> x_177_transpose_y_0 = const()[name = tensor<string, []>("x_177_transpose_y_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 8, 128, 375]> op_1543_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [384000]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(188211712))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(188595776))), name = tensor<string, []>("op_1543_to_fp16_palettized"), shape = tensor<uint32, [4]>([1, 8, 128, 375])];
+            tensor<fp16, [1, 8, 128, 375]> op_1543_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(3), name = tensor<string, []>("op_1543_to_fp16_quantized"), quantized_data = tensor<int8, [1, 8, 128, 375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(188596032))), scale = tensor<fp16, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(188980544))), zero_point = tensor<int8, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(188980096)))];
             tensor<fp16, [1, 8, 188, 128]> q_with_bias_v_15_cast_fp16 = transpose(perm = q_with_bias_v_15_perm_0, x = var_1541_cast_fp16)[name = tensor<string, []>("transpose_263")];
-            tensor<fp16, [1, 8, 188, 375]> x_177_cast_fp16 = matmul(transpose_x = x_177_transpose_x_0, transpose_y = x_177_transpose_y_0, x = q_with_bias_v_15_cast_fp16, y = op_1543_to_fp16_palettized)[name = tensor<string, []>("x_177_cast_fp16")];
+            tensor<fp16, [1, 8, 188, 375]> x_177_cast_fp16 = matmul(transpose_x = x_177_transpose_x_0, transpose_y = x_177_transpose_y_0, x = q_with_bias_v_15_cast_fp16, y = op_1543_to_fp16_quantized)[name = tensor<string, []>("x_177_cast_fp16")];
             tensor<int32, [8]> x_179_pad_0 = const()[name = tensor<string, []>("x_179_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 1, 0])];
             tensor<string, []> x_179_mode_0 = const()[name = tensor<string, []>("x_179_mode_0"), val = tensor<string, []>("constant")];
             tensor<fp16, []> const_99_to_fp16 = const()[name = tensor<string, []>("const_99_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
@@ -1258,12 +1258,12 @@ program(1.0)
             tensor<int32, [3]> var_1576 = const()[name = tensor<string, []>("op_1576"), val = tensor<int32, [3]>([1, -1, 1024])];
             tensor<fp16, [1, 188, 8, 128]> var_1575_cast_fp16 = transpose(perm = var_1575_perm_0, x = x_183_cast_fp16)[name = tensor<string, []>("transpose_259")];
             tensor<fp16, [1, 188, 1024]> input_407_cast_fp16 = reshape(shape = var_1576, x = var_1575_cast_fp16)[name = tensor<string, []>("input_407_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_7_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(188596352))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(189644992))), name = tensor<string, []>("encoder_module_layers_7_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_70_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_7_self_attn_linear_out_weight_to_fp16_palettized, x = input_407_cast_fp16)[name = tensor<string, []>("linear_70_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_7_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_7_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(188981376))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(190031104))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(190030016)))];
+            tensor<fp16, [1, 188, 1024]> linear_70_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_7_self_attn_linear_out_weight_to_fp16_quantized, x = input_407_cast_fp16)[name = tensor<string, []>("linear_70_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_411_cast_fp16 = add(x = input_403_cast_fp16, y = linear_70_cast_fp16)[name = tensor<string, []>("input_411_cast_fp16")];
             tensor<int32, [1]> x_187_axes_0 = const()[name = tensor<string, []>("x_187_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_7_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_7_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(189645568)))];
-            tensor<fp16, [1024]> encoder_module_layers_7_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_7_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(189647680)))];
+            tensor<fp16, [1024]> encoder_module_layers_7_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_7_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(190033216)))];
+            tensor<fp16, [1024]> encoder_module_layers_7_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_7_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(190035328)))];
             tensor<fp16, [1, 188, 1024]> x_187_cast_fp16 = layer_norm(axes = x_187_axes_0, beta = encoder_module_layers_7_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_7_norm_conv_weight_to_fp16, x = input_411_cast_fp16)[name = tensor<string, []>("x_187_cast_fp16")];
             tensor<int32, [3]> input_413_perm_0 = const()[name = tensor<string, []>("input_413_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<string, []> input_415_pad_type_0 = const()[name = tensor<string, []>("input_415_pad_type_0"), val = tensor<string, []>("valid")];
@@ -1271,9 +1271,9 @@ program(1.0)
             tensor<int32, [2]> input_415_pad_0 = const()[name = tensor<string, []>("input_415_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_415_dilations_0 = const()[name = tensor<string, []>("input_415_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> input_415_groups_0 = const()[name = tensor<string, []>("input_415_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_7_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(189649792))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(191747008))), name = tensor<string, []>("encoder_module_layers_7_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([2048, 1024, 1])];
+            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_7_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_7_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(190037440))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(192136768))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(192134656)))];
             tensor<fp16, [1, 1024, 188]> input_413_cast_fp16 = transpose(perm = input_413_perm_0, x = x_187_cast_fp16)[name = tensor<string, []>("transpose_258")];
-            tensor<fp16, [1, 2048, 188]> input_415_cast_fp16 = conv(dilations = input_415_dilations_0, groups = input_415_groups_0, pad = input_415_pad_0, pad_type = input_415_pad_type_0, strides = input_415_strides_0, weight = encoder_module_layers_7_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_413_cast_fp16)[name = tensor<string, []>("input_415_cast_fp16")];
+            tensor<fp16, [1, 2048, 188]> input_415_cast_fp16 = conv(dilations = input_415_dilations_0, groups = input_415_groups_0, pad = input_415_pad_0, pad_type = input_415_pad_type_0, strides = input_415_strides_0, weight = encoder_module_layers_7_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_413_cast_fp16)[name = tensor<string, []>("input_415_cast_fp16")];
             tensor<int32, []> x_189_split_num_splits_0 = const()[name = tensor<string, []>("x_189_split_num_splits_0"), val = tensor<int32, []>(2)];
             tensor<int32, []> x_189_split_axis_0 = const()[name = tensor<string, []>("x_189_split_axis_0"), val = tensor<int32, []>(1)];
             tensor<fp16, [1, 1024, 188]> x_189_split_cast_fp16_0, tensor<fp16, [1, 1024, 188]> x_189_split_cast_fp16_1 = split(axis = x_189_split_axis_0, num_splits = x_189_split_num_splits_0, x = input_415_cast_fp16)[name = tensor<string, []>("x_189_split_cast_fp16")];
@@ -1289,75 +1289,75 @@ program(1.0)
             tensor<int32, [1]> input_421_strides_0 = const()[name = tensor<string, []>("input_421_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> input_421_pad_0 = const()[name = tensor<string, []>("input_421_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_421_dilations_0 = const()[name = tensor<string, []>("input_421_dilations_0"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1024, 1, 9]> const_277_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [9216]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(191747584))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(191756864))), name = tensor<string, []>("const_277_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1, 9])];
-            tensor<fp16, [1024]> const_278_to_fp16 = const()[name = tensor<string, []>("const_278_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(191757440)))];
-            tensor<fp16, [1, 1024, 188]> input_423_cast_fp16 = conv(bias = const_278_to_fp16, dilations = input_421_dilations_0, groups = input_421_groups_0, pad = input_421_pad_0, pad_type = input_421_pad_type_0, strides = input_421_strides_0, weight = const_277_to_fp16_palettized, x = input_419_cast_fp16)[name = tensor<string, []>("input_423_cast_fp16")];
+            tensor<fp16, [1024, 1, 9]> const_277_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("const_277_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1, 9]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(192140928))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(192151296))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(192150208)))];
+            tensor<fp16, [1024]> const_278_to_fp16 = const()[name = tensor<string, []>("const_278_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(192153408)))];
+            tensor<fp16, [1, 1024, 188]> input_423_cast_fp16 = conv(bias = const_278_to_fp16, dilations = input_421_dilations_0, groups = input_421_groups_0, pad = input_421_pad_0, pad_type = input_421_pad_type_0, strides = input_421_strides_0, weight = const_277_to_fp16_quantized, x = input_419_cast_fp16)[name = tensor<string, []>("input_423_cast_fp16")];
             tensor<fp16, [1, 1024, 188]> input_425_cast_fp16 = silu(x = input_423_cast_fp16)[name = tensor<string, []>("input_425_cast_fp16")];
             tensor<string, []> x_191_pad_type_0 = const()[name = tensor<string, []>("x_191_pad_type_0"), val = tensor<string, []>("valid")];
             tensor<int32, [1]> x_191_strides_0 = const()[name = tensor<string, []>("x_191_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> x_191_pad_0 = const()[name = tensor<string, []>("x_191_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> x_191_dilations_0 = const()[name = tensor<string, []>("x_191_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> x_191_groups_0 = const()[name = tensor<string, []>("x_191_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_7_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(191759552))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(192808192))), name = tensor<string, []>("encoder_module_layers_7_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1024, 1])];
-            tensor<fp16, [1, 1024, 188]> x_191_cast_fp16 = conv(dilations = x_191_dilations_0, groups = x_191_groups_0, pad = x_191_pad_0, pad_type = x_191_pad_type_0, strides = x_191_strides_0, weight = encoder_module_layers_7_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_425_cast_fp16)[name = tensor<string, []>("x_191_cast_fp16")];
+            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_7_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_7_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(192155520))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(193205248))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(193204160)))];
+            tensor<fp16, [1, 1024, 188]> x_191_cast_fp16 = conv(dilations = x_191_dilations_0, groups = x_191_groups_0, pad = x_191_pad_0, pad_type = x_191_pad_type_0, strides = x_191_strides_0, weight = encoder_module_layers_7_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_425_cast_fp16)[name = tensor<string, []>("x_191_cast_fp16")];
             tensor<int32, [3]> input_427_perm_0 = const()[name = tensor<string, []>("input_427_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<fp16, [1, 188, 1024]> input_427_cast_fp16 = transpose(perm = input_427_perm_0, x = x_191_cast_fp16)[name = tensor<string, []>("transpose_257")];
             tensor<fp16, [1, 188, 1024]> input_429_cast_fp16 = add(x = input_411_cast_fp16, y = input_427_cast_fp16)[name = tensor<string, []>("input_429_cast_fp16")];
             tensor<int32, [1]> input_431_axes_0 = const()[name = tensor<string, []>("input_431_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_7_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_7_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(192808768)))];
-            tensor<fp16, [1024]> encoder_module_layers_7_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_7_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(192810880)))];
+            tensor<fp16, [1024]> encoder_module_layers_7_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_7_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(193207360)))];
+            tensor<fp16, [1024]> encoder_module_layers_7_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_7_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(193209472)))];
             tensor<fp16, [1, 188, 1024]> input_431_cast_fp16 = layer_norm(axes = input_431_axes_0, beta = encoder_module_layers_7_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_7_norm_feed_forward2_weight_to_fp16, x = input_429_cast_fp16)[name = tensor<string, []>("input_431_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_7_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(192812992))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(197007360))), name = tensor<string, []>("encoder_module_layers_7_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_71_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_7_feed_forward2_linear1_weight_to_fp16_palettized, x = input_431_cast_fp16)[name = tensor<string, []>("linear_71_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_7_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_7_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(193211584))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(197410112))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(197405952)))];
+            tensor<fp16, [1, 188, 4096]> linear_71_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_7_feed_forward2_linear1_weight_to_fp16_quantized, x = input_431_cast_fp16)[name = tensor<string, []>("linear_71_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_435_cast_fp16 = silu(x = linear_71_cast_fp16)[name = tensor<string, []>("input_435_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_7_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(197007936))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(201202304))), name = tensor<string, []>("encoder_module_layers_7_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_72_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_7_feed_forward2_linear2_weight_to_fp16_palettized, x = input_435_cast_fp16)[name = tensor<string, []>("linear_72_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_7_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_7_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(197418368))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(201613824))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(201612736)))];
+            tensor<fp16, [1, 188, 1024]> linear_72_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_7_feed_forward2_linear2_weight_to_fp16_quantized, x = input_435_cast_fp16)[name = tensor<string, []>("linear_72_cast_fp16")];
             tensor<fp16, []> var_1636_to_fp16 = const()[name = tensor<string, []>("op_1636_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_1637_cast_fp16 = mul(x = linear_72_cast_fp16, y = var_1636_to_fp16)[name = tensor<string, []>("op_1637_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_441_cast_fp16 = add(x = input_429_cast_fp16, y = var_1637_cast_fp16)[name = tensor<string, []>("input_441_cast_fp16")];
             tensor<int32, [1]> input_443_axes_0 = const()[name = tensor<string, []>("input_443_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_7_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_7_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(201202880)))];
-            tensor<fp16, [1024]> encoder_module_layers_7_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_7_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(201204992)))];
+            tensor<fp16, [1024]> encoder_module_layers_7_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_7_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(201615936)))];
+            tensor<fp16, [1024]> encoder_module_layers_7_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_7_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(201618048)))];
             tensor<fp16, [1, 188, 1024]> input_443_cast_fp16 = layer_norm(axes = input_443_axes_0, beta = encoder_module_layers_7_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_7_norm_out_weight_to_fp16, x = input_441_cast_fp16)[name = tensor<string, []>("input_443_cast_fp16")];
             tensor<int32, [1]> input_445_axes_0 = const()[name = tensor<string, []>("input_445_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_8_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_8_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(201207104)))];
-            tensor<fp16, [1024]> encoder_module_layers_8_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_8_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(201209216)))];
+            tensor<fp16, [1024]> encoder_module_layers_8_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_8_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(201620160)))];
+            tensor<fp16, [1024]> encoder_module_layers_8_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_8_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(201622272)))];
             tensor<fp16, [1, 188, 1024]> input_445_cast_fp16 = layer_norm(axes = input_445_axes_0, beta = encoder_module_layers_8_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_8_norm_feed_forward1_weight_to_fp16, x = input_443_cast_fp16)[name = tensor<string, []>("input_445_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_8_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(201211328))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(205405696))), name = tensor<string, []>("encoder_module_layers_8_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_73_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_8_feed_forward1_linear1_weight_to_fp16_palettized, x = input_445_cast_fp16)[name = tensor<string, []>("linear_73_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_8_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_8_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(201624384))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(205822912))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(205818752)))];
+            tensor<fp16, [1, 188, 4096]> linear_73_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_8_feed_forward1_linear1_weight_to_fp16_quantized, x = input_445_cast_fp16)[name = tensor<string, []>("linear_73_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_449_cast_fp16 = silu(x = linear_73_cast_fp16)[name = tensor<string, []>("input_449_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_8_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(205406272))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(209600640))), name = tensor<string, []>("encoder_module_layers_8_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_74_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_8_feed_forward1_linear2_weight_to_fp16_palettized, x = input_449_cast_fp16)[name = tensor<string, []>("linear_74_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_8_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_8_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(205831168))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(210026624))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(210025536)))];
+            tensor<fp16, [1, 188, 1024]> linear_74_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_8_feed_forward1_linear2_weight_to_fp16_quantized, x = input_449_cast_fp16)[name = tensor<string, []>("linear_74_cast_fp16")];
             tensor<fp16, []> var_1665_to_fp16 = const()[name = tensor<string, []>("op_1665_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_1666_cast_fp16 = mul(x = linear_74_cast_fp16, y = var_1665_to_fp16)[name = tensor<string, []>("op_1666_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_455_cast_fp16 = add(x = input_443_cast_fp16, y = var_1666_cast_fp16)[name = tensor<string, []>("input_455_cast_fp16")];
             tensor<int32, [1]> query_17_axes_0 = const()[name = tensor<string, []>("query_17_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_8_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_8_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(209601216)))];
-            tensor<fp16, [1024]> encoder_module_layers_8_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_8_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(209603328)))];
+            tensor<fp16, [1024]> encoder_module_layers_8_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_8_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(210028736)))];
+            tensor<fp16, [1024]> encoder_module_layers_8_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_8_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(210030848)))];
             tensor<fp16, [1, 188, 1024]> query_17_cast_fp16 = layer_norm(axes = query_17_axes_0, beta = encoder_module_layers_8_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_8_norm_self_att_weight_to_fp16, x = input_455_cast_fp16)[name = tensor<string, []>("query_17_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_8_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(209605440))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(210654080))), name = tensor<string, []>("encoder_module_layers_8_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_75_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_8_self_attn_linear_q_weight_to_fp16_palettized, x = query_17_cast_fp16)[name = tensor<string, []>("linear_75_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_8_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_8_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(210032960))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(211082688))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(211081600)))];
+            tensor<fp16, [1, 188, 1024]> linear_75_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_8_self_attn_linear_q_weight_to_fp16_quantized, x = query_17_cast_fp16)[name = tensor<string, []>("linear_75_cast_fp16")];
             tensor<int32, [4]> var_1682 = const()[name = tensor<string, []>("op_1682"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> q_49_cast_fp16 = reshape(shape = var_1682, x = linear_75_cast_fp16)[name = tensor<string, []>("q_49_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_8_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(210654656))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(211703296))), name = tensor<string, []>("encoder_module_layers_8_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_76_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_8_self_attn_linear_k_weight_to_fp16_palettized, x = query_17_cast_fp16)[name = tensor<string, []>("linear_76_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_8_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_8_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(211084800))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212134528))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212133440)))];
+            tensor<fp16, [1, 188, 1024]> linear_76_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_8_self_attn_linear_k_weight_to_fp16_quantized, x = query_17_cast_fp16)[name = tensor<string, []>("linear_76_cast_fp16")];
             tensor<int32, [4]> var_1686 = const()[name = tensor<string, []>("op_1686"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> k_33_cast_fp16 = reshape(shape = var_1686, x = linear_76_cast_fp16)[name = tensor<string, []>("k_33_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_8_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(211703872))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212752512))), name = tensor<string, []>("encoder_module_layers_8_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_77_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_8_self_attn_linear_v_weight_to_fp16_palettized, x = query_17_cast_fp16)[name = tensor<string, []>("linear_77_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_8_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_8_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212136640))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(213186368))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(213185280)))];
+            tensor<fp16, [1, 188, 1024]> linear_77_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_8_self_attn_linear_v_weight_to_fp16_quantized, x = query_17_cast_fp16)[name = tensor<string, []>("linear_77_cast_fp16")];
             tensor<int32, [4]> var_1690 = const()[name = tensor<string, []>("op_1690"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> v_17_cast_fp16 = reshape(shape = var_1690, x = linear_77_cast_fp16)[name = tensor<string, []>("v_17_cast_fp16")];
             tensor<int32, [4]> value_21_perm_0 = const()[name = tensor<string, []>("value_21_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
-            tensor<fp16, [8, 128]> encoder_module_layers_8_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_8_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212753088)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_8_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_8_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(213188480)))];
             tensor<fp16, [1, 188, 8, 128]> var_1702_cast_fp16 = add(x = q_49_cast_fp16, y = encoder_module_layers_8_self_attn_pos_bias_u_to_fp16)[name = tensor<string, []>("op_1702_cast_fp16")];
-            tensor<fp16, [8, 128]> encoder_module_layers_8_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_8_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212755200)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_8_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_8_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(213190592)))];
             tensor<fp16, [1, 188, 8, 128]> var_1704_cast_fp16 = add(x = q_49_cast_fp16, y = encoder_module_layers_8_self_attn_pos_bias_v_to_fp16)[name = tensor<string, []>("op_1704_cast_fp16")];
             tensor<int32, [4]> q_with_bias_v_17_perm_0 = const()[name = tensor<string, []>("q_with_bias_v_17_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
             tensor<bool, []> x_199_transpose_x_0 = const()[name = tensor<string, []>("x_199_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> x_199_transpose_y_0 = const()[name = tensor<string, []>("x_199_transpose_y_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 8, 128, 375]> op_1706_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [384000]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212757312))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(213141376))), name = tensor<string, []>("op_1706_to_fp16_palettized"), shape = tensor<uint32, [4]>([1, 8, 128, 375])];
+            tensor<fp16, [1, 8, 128, 375]> op_1706_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(3), name = tensor<string, []>("op_1706_to_fp16_quantized"), quantized_data = tensor<int8, [1, 8, 128, 375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(213192704))), scale = tensor<fp16, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(213577216))), zero_point = tensor<int8, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(213576768)))];
             tensor<fp16, [1, 8, 188, 128]> q_with_bias_v_17_cast_fp16 = transpose(perm = q_with_bias_v_17_perm_0, x = var_1704_cast_fp16)[name = tensor<string, []>("transpose_256")];
-            tensor<fp16, [1, 8, 188, 375]> x_199_cast_fp16 = matmul(transpose_x = x_199_transpose_x_0, transpose_y = x_199_transpose_y_0, x = q_with_bias_v_17_cast_fp16, y = op_1706_to_fp16_palettized)[name = tensor<string, []>("x_199_cast_fp16")];
+            tensor<fp16, [1, 8, 188, 375]> x_199_cast_fp16 = matmul(transpose_x = x_199_transpose_x_0, transpose_y = x_199_transpose_y_0, x = q_with_bias_v_17_cast_fp16, y = op_1706_to_fp16_quantized)[name = tensor<string, []>("x_199_cast_fp16")];
             tensor<int32, [8]> x_201_pad_0 = const()[name = tensor<string, []>("x_201_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 1, 0])];
             tensor<string, []> x_201_mode_0 = const()[name = tensor<string, []>("x_201_mode_0"), val = tensor<string, []>("constant")];
             tensor<fp16, []> const_109_to_fp16 = const()[name = tensor<string, []>("const_109_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
@@ -1395,12 +1395,12 @@ program(1.0)
             tensor<int32, [3]> var_1739 = const()[name = tensor<string, []>("op_1739"), val = tensor<int32, [3]>([1, -1, 1024])];
             tensor<fp16, [1, 188, 8, 128]> var_1738_cast_fp16 = transpose(perm = var_1738_perm_0, x = x_205_cast_fp16)[name = tensor<string, []>("transpose_252")];
             tensor<fp16, [1, 188, 1024]> input_459_cast_fp16 = reshape(shape = var_1739, x = var_1738_cast_fp16)[name = tensor<string, []>("input_459_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_8_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(213141952))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(214190592))), name = tensor<string, []>("encoder_module_layers_8_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_79_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_8_self_attn_linear_out_weight_to_fp16_palettized, x = input_459_cast_fp16)[name = tensor<string, []>("linear_79_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_8_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_8_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(213578048))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(214627776))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(214626688)))];
+            tensor<fp16, [1, 188, 1024]> linear_79_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_8_self_attn_linear_out_weight_to_fp16_quantized, x = input_459_cast_fp16)[name = tensor<string, []>("linear_79_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_463_cast_fp16 = add(x = input_455_cast_fp16, y = linear_79_cast_fp16)[name = tensor<string, []>("input_463_cast_fp16")];
             tensor<int32, [1]> x_209_axes_0 = const()[name = tensor<string, []>("x_209_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_8_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_8_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(214191168)))];
-            tensor<fp16, [1024]> encoder_module_layers_8_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_8_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(214193280)))];
+            tensor<fp16, [1024]> encoder_module_layers_8_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_8_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(214629888)))];
+            tensor<fp16, [1024]> encoder_module_layers_8_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_8_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(214632000)))];
             tensor<fp16, [1, 188, 1024]> x_209_cast_fp16 = layer_norm(axes = x_209_axes_0, beta = encoder_module_layers_8_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_8_norm_conv_weight_to_fp16, x = input_463_cast_fp16)[name = tensor<string, []>("x_209_cast_fp16")];
             tensor<int32, [3]> input_465_perm_0 = const()[name = tensor<string, []>("input_465_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<string, []> input_467_pad_type_0 = const()[name = tensor<string, []>("input_467_pad_type_0"), val = tensor<string, []>("valid")];
@@ -1408,9 +1408,9 @@ program(1.0)
             tensor<int32, [2]> input_467_pad_0 = const()[name = tensor<string, []>("input_467_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_467_dilations_0 = const()[name = tensor<string, []>("input_467_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> input_467_groups_0 = const()[name = tensor<string, []>("input_467_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_8_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(214195392))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(216292608))), name = tensor<string, []>("encoder_module_layers_8_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([2048, 1024, 1])];
+            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_8_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_8_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(214634112))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(216733440))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(216731328)))];
             tensor<fp16, [1, 1024, 188]> input_465_cast_fp16 = transpose(perm = input_465_perm_0, x = x_209_cast_fp16)[name = tensor<string, []>("transpose_251")];
-            tensor<fp16, [1, 2048, 188]> input_467_cast_fp16 = conv(dilations = input_467_dilations_0, groups = input_467_groups_0, pad = input_467_pad_0, pad_type = input_467_pad_type_0, strides = input_467_strides_0, weight = encoder_module_layers_8_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_465_cast_fp16)[name = tensor<string, []>("input_467_cast_fp16")];
+            tensor<fp16, [1, 2048, 188]> input_467_cast_fp16 = conv(dilations = input_467_dilations_0, groups = input_467_groups_0, pad = input_467_pad_0, pad_type = input_467_pad_type_0, strides = input_467_strides_0, weight = encoder_module_layers_8_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_465_cast_fp16)[name = tensor<string, []>("input_467_cast_fp16")];
             tensor<int32, []> x_211_split_num_splits_0 = const()[name = tensor<string, []>("x_211_split_num_splits_0"), val = tensor<int32, []>(2)];
             tensor<int32, []> x_211_split_axis_0 = const()[name = tensor<string, []>("x_211_split_axis_0"), val = tensor<int32, []>(1)];
             tensor<fp16, [1, 1024, 188]> x_211_split_cast_fp16_0, tensor<fp16, [1, 1024, 188]> x_211_split_cast_fp16_1 = split(axis = x_211_split_axis_0, num_splits = x_211_split_num_splits_0, x = input_467_cast_fp16)[name = tensor<string, []>("x_211_split_cast_fp16")];
@@ -1426,75 +1426,75 @@ program(1.0)
             tensor<int32, [1]> input_473_strides_0 = const()[name = tensor<string, []>("input_473_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> input_473_pad_0 = const()[name = tensor<string, []>("input_473_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_473_dilations_0 = const()[name = tensor<string, []>("input_473_dilations_0"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1024, 1, 9]> const_279_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [9216]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(216293184))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(216302464))), name = tensor<string, []>("const_279_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1, 9])];
-            tensor<fp16, [1024]> const_280_to_fp16 = const()[name = tensor<string, []>("const_280_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(216303040)))];
-            tensor<fp16, [1, 1024, 188]> input_475_cast_fp16 = conv(bias = const_280_to_fp16, dilations = input_473_dilations_0, groups = input_473_groups_0, pad = input_473_pad_0, pad_type = input_473_pad_type_0, strides = input_473_strides_0, weight = const_279_to_fp16_palettized, x = input_471_cast_fp16)[name = tensor<string, []>("input_475_cast_fp16")];
+            tensor<fp16, [1024, 1, 9]> const_279_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("const_279_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1, 9]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(216737600))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(216747968))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(216746880)))];
+            tensor<fp16, [1024]> const_280_to_fp16 = const()[name = tensor<string, []>("const_280_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(216750080)))];
+            tensor<fp16, [1, 1024, 188]> input_475_cast_fp16 = conv(bias = const_280_to_fp16, dilations = input_473_dilations_0, groups = input_473_groups_0, pad = input_473_pad_0, pad_type = input_473_pad_type_0, strides = input_473_strides_0, weight = const_279_to_fp16_quantized, x = input_471_cast_fp16)[name = tensor<string, []>("input_475_cast_fp16")];
             tensor<fp16, [1, 1024, 188]> input_477_cast_fp16 = silu(x = input_475_cast_fp16)[name = tensor<string, []>("input_477_cast_fp16")];
             tensor<string, []> x_213_pad_type_0 = const()[name = tensor<string, []>("x_213_pad_type_0"), val = tensor<string, []>("valid")];
             tensor<int32, [1]> x_213_strides_0 = const()[name = tensor<string, []>("x_213_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> x_213_pad_0 = const()[name = tensor<string, []>("x_213_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> x_213_dilations_0 = const()[name = tensor<string, []>("x_213_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> x_213_groups_0 = const()[name = tensor<string, []>("x_213_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_8_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(216305152))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(217353792))), name = tensor<string, []>("encoder_module_layers_8_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1024, 1])];
-            tensor<fp16, [1, 1024, 188]> x_213_cast_fp16 = conv(dilations = x_213_dilations_0, groups = x_213_groups_0, pad = x_213_pad_0, pad_type = x_213_pad_type_0, strides = x_213_strides_0, weight = encoder_module_layers_8_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_477_cast_fp16)[name = tensor<string, []>("x_213_cast_fp16")];
+            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_8_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_8_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(216752192))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(217801920))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(217800832)))];
+            tensor<fp16, [1, 1024, 188]> x_213_cast_fp16 = conv(dilations = x_213_dilations_0, groups = x_213_groups_0, pad = x_213_pad_0, pad_type = x_213_pad_type_0, strides = x_213_strides_0, weight = encoder_module_layers_8_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_477_cast_fp16)[name = tensor<string, []>("x_213_cast_fp16")];
             tensor<int32, [3]> input_479_perm_0 = const()[name = tensor<string, []>("input_479_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<fp16, [1, 188, 1024]> input_479_cast_fp16 = transpose(perm = input_479_perm_0, x = x_213_cast_fp16)[name = tensor<string, []>("transpose_250")];
             tensor<fp16, [1, 188, 1024]> input_481_cast_fp16 = add(x = input_463_cast_fp16, y = input_479_cast_fp16)[name = tensor<string, []>("input_481_cast_fp16")];
             tensor<int32, [1]> input_483_axes_0 = const()[name = tensor<string, []>("input_483_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_8_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_8_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(217354368)))];
-            tensor<fp16, [1024]> encoder_module_layers_8_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_8_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(217356480)))];
+            tensor<fp16, [1024]> encoder_module_layers_8_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_8_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(217804032)))];
+            tensor<fp16, [1024]> encoder_module_layers_8_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_8_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(217806144)))];
             tensor<fp16, [1, 188, 1024]> input_483_cast_fp16 = layer_norm(axes = input_483_axes_0, beta = encoder_module_layers_8_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_8_norm_feed_forward2_weight_to_fp16, x = input_481_cast_fp16)[name = tensor<string, []>("input_483_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_8_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(217358592))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(221552960))), name = tensor<string, []>("encoder_module_layers_8_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_80_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_8_feed_forward2_linear1_weight_to_fp16_palettized, x = input_483_cast_fp16)[name = tensor<string, []>("linear_80_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_8_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_8_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(217808256))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(222006784))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(222002624)))];
+            tensor<fp16, [1, 188, 4096]> linear_80_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_8_feed_forward2_linear1_weight_to_fp16_quantized, x = input_483_cast_fp16)[name = tensor<string, []>("linear_80_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_487_cast_fp16 = silu(x = linear_80_cast_fp16)[name = tensor<string, []>("input_487_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_8_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(221553536))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(225747904))), name = tensor<string, []>("encoder_module_layers_8_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_81_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_8_feed_forward2_linear2_weight_to_fp16_palettized, x = input_487_cast_fp16)[name = tensor<string, []>("linear_81_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_8_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_8_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(222015040))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(226210496))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(226209408)))];
+            tensor<fp16, [1, 188, 1024]> linear_81_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_8_feed_forward2_linear2_weight_to_fp16_quantized, x = input_487_cast_fp16)[name = tensor<string, []>("linear_81_cast_fp16")];
             tensor<fp16, []> var_1799_to_fp16 = const()[name = tensor<string, []>("op_1799_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_1800_cast_fp16 = mul(x = linear_81_cast_fp16, y = var_1799_to_fp16)[name = tensor<string, []>("op_1800_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_493_cast_fp16 = add(x = input_481_cast_fp16, y = var_1800_cast_fp16)[name = tensor<string, []>("input_493_cast_fp16")];
             tensor<int32, [1]> input_495_axes_0 = const()[name = tensor<string, []>("input_495_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_8_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_8_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(225748480)))];
-            tensor<fp16, [1024]> encoder_module_layers_8_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_8_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(225750592)))];
+            tensor<fp16, [1024]> encoder_module_layers_8_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_8_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(226212608)))];
+            tensor<fp16, [1024]> encoder_module_layers_8_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_8_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(226214720)))];
             tensor<fp16, [1, 188, 1024]> input_495_cast_fp16 = layer_norm(axes = input_495_axes_0, beta = encoder_module_layers_8_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_8_norm_out_weight_to_fp16, x = input_493_cast_fp16)[name = tensor<string, []>("input_495_cast_fp16")];
             tensor<int32, [1]> input_497_axes_0 = const()[name = tensor<string, []>("input_497_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_9_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_9_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(225752704)))];
-            tensor<fp16, [1024]> encoder_module_layers_9_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_9_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(225754816)))];
+            tensor<fp16, [1024]> encoder_module_layers_9_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_9_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(226216832)))];
+            tensor<fp16, [1024]> encoder_module_layers_9_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_9_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(226218944)))];
             tensor<fp16, [1, 188, 1024]> input_497_cast_fp16 = layer_norm(axes = input_497_axes_0, beta = encoder_module_layers_9_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_9_norm_feed_forward1_weight_to_fp16, x = input_495_cast_fp16)[name = tensor<string, []>("input_497_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_9_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(225756928))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(229951296))), name = tensor<string, []>("encoder_module_layers_9_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_82_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_9_feed_forward1_linear1_weight_to_fp16_palettized, x = input_497_cast_fp16)[name = tensor<string, []>("linear_82_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_9_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_9_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(226221056))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(230419584))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(230415424)))];
+            tensor<fp16, [1, 188, 4096]> linear_82_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_9_feed_forward1_linear1_weight_to_fp16_quantized, x = input_497_cast_fp16)[name = tensor<string, []>("linear_82_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_501_cast_fp16 = silu(x = linear_82_cast_fp16)[name = tensor<string, []>("input_501_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_9_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(229951872))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(234146240))), name = tensor<string, []>("encoder_module_layers_9_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_83_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_9_feed_forward1_linear2_weight_to_fp16_palettized, x = input_501_cast_fp16)[name = tensor<string, []>("linear_83_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_9_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_9_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(230427840))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(234623296))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(234622208)))];
+            tensor<fp16, [1, 188, 1024]> linear_83_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_9_feed_forward1_linear2_weight_to_fp16_quantized, x = input_501_cast_fp16)[name = tensor<string, []>("linear_83_cast_fp16")];
             tensor<fp16, []> var_1828_to_fp16 = const()[name = tensor<string, []>("op_1828_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_1829_cast_fp16 = mul(x = linear_83_cast_fp16, y = var_1828_to_fp16)[name = tensor<string, []>("op_1829_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_507_cast_fp16 = add(x = input_495_cast_fp16, y = var_1829_cast_fp16)[name = tensor<string, []>("input_507_cast_fp16")];
             tensor<int32, [1]> query_19_axes_0 = const()[name = tensor<string, []>("query_19_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_9_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_9_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(234146816)))];
-            tensor<fp16, [1024]> encoder_module_layers_9_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_9_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(234148928)))];
+            tensor<fp16, [1024]> encoder_module_layers_9_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_9_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(234625408)))];
+            tensor<fp16, [1024]> encoder_module_layers_9_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_9_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(234627520)))];
             tensor<fp16, [1, 188, 1024]> query_19_cast_fp16 = layer_norm(axes = query_19_axes_0, beta = encoder_module_layers_9_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_9_norm_self_att_weight_to_fp16, x = input_507_cast_fp16)[name = tensor<string, []>("query_19_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_9_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(234151040))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(235199680))), name = tensor<string, []>("encoder_module_layers_9_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_84_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_9_self_attn_linear_q_weight_to_fp16_palettized, x = query_19_cast_fp16)[name = tensor<string, []>("linear_84_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_9_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_9_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(234629632))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(235679360))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(235678272)))];
+            tensor<fp16, [1, 188, 1024]> linear_84_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_9_self_attn_linear_q_weight_to_fp16_quantized, x = query_19_cast_fp16)[name = tensor<string, []>("linear_84_cast_fp16")];
             tensor<int32, [4]> var_1845 = const()[name = tensor<string, []>("op_1845"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> q_55_cast_fp16 = reshape(shape = var_1845, x = linear_84_cast_fp16)[name = tensor<string, []>("q_55_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_9_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(235200256))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(236248896))), name = tensor<string, []>("encoder_module_layers_9_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_85_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_9_self_attn_linear_k_weight_to_fp16_palettized, x = query_19_cast_fp16)[name = tensor<string, []>("linear_85_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_9_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_9_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(235681472))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(236731200))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(236730112)))];
+            tensor<fp16, [1, 188, 1024]> linear_85_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_9_self_attn_linear_k_weight_to_fp16_quantized, x = query_19_cast_fp16)[name = tensor<string, []>("linear_85_cast_fp16")];
             tensor<int32, [4]> var_1849 = const()[name = tensor<string, []>("op_1849"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> k_37_cast_fp16 = reshape(shape = var_1849, x = linear_85_cast_fp16)[name = tensor<string, []>("k_37_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_9_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(236249472))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(237298112))), name = tensor<string, []>("encoder_module_layers_9_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_86_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_9_self_attn_linear_v_weight_to_fp16_palettized, x = query_19_cast_fp16)[name = tensor<string, []>("linear_86_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_9_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_9_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(236733312))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(237783040))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(237781952)))];
+            tensor<fp16, [1, 188, 1024]> linear_86_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_9_self_attn_linear_v_weight_to_fp16_quantized, x = query_19_cast_fp16)[name = tensor<string, []>("linear_86_cast_fp16")];
             tensor<int32, [4]> var_1853 = const()[name = tensor<string, []>("op_1853"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> v_19_cast_fp16 = reshape(shape = var_1853, x = linear_86_cast_fp16)[name = tensor<string, []>("v_19_cast_fp16")];
             tensor<int32, [4]> value_23_perm_0 = const()[name = tensor<string, []>("value_23_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
-            tensor<fp16, [8, 128]> encoder_module_layers_9_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_9_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(237298688)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_9_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_9_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(237785152)))];
             tensor<fp16, [1, 188, 8, 128]> var_1865_cast_fp16 = add(x = q_55_cast_fp16, y = encoder_module_layers_9_self_attn_pos_bias_u_to_fp16)[name = tensor<string, []>("op_1865_cast_fp16")];
-            tensor<fp16, [8, 128]> encoder_module_layers_9_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_9_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(237300800)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_9_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_9_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(237787264)))];
             tensor<fp16, [1, 188, 8, 128]> var_1867_cast_fp16 = add(x = q_55_cast_fp16, y = encoder_module_layers_9_self_attn_pos_bias_v_to_fp16)[name = tensor<string, []>("op_1867_cast_fp16")];
             tensor<int32, [4]> q_with_bias_v_19_perm_0 = const()[name = tensor<string, []>("q_with_bias_v_19_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
             tensor<bool, []> x_221_transpose_x_0 = const()[name = tensor<string, []>("x_221_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> x_221_transpose_y_0 = const()[name = tensor<string, []>("x_221_transpose_y_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 8, 128, 375]> op_1869_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [384000]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(237302912))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(237686976))), name = tensor<string, []>("op_1869_to_fp16_palettized"), shape = tensor<uint32, [4]>([1, 8, 128, 375])];
+            tensor<fp16, [1, 8, 128, 375]> op_1869_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(3), name = tensor<string, []>("op_1869_to_fp16_quantized"), quantized_data = tensor<int8, [1, 8, 128, 375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(237789376))), scale = tensor<fp16, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(238173888))), zero_point = tensor<int8, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(238173440)))];
             tensor<fp16, [1, 8, 188, 128]> q_with_bias_v_19_cast_fp16 = transpose(perm = q_with_bias_v_19_perm_0, x = var_1867_cast_fp16)[name = tensor<string, []>("transpose_249")];
-            tensor<fp16, [1, 8, 188, 375]> x_221_cast_fp16 = matmul(transpose_x = x_221_transpose_x_0, transpose_y = x_221_transpose_y_0, x = q_with_bias_v_19_cast_fp16, y = op_1869_to_fp16_palettized)[name = tensor<string, []>("x_221_cast_fp16")];
+            tensor<fp16, [1, 8, 188, 375]> x_221_cast_fp16 = matmul(transpose_x = x_221_transpose_x_0, transpose_y = x_221_transpose_y_0, x = q_with_bias_v_19_cast_fp16, y = op_1869_to_fp16_quantized)[name = tensor<string, []>("x_221_cast_fp16")];
             tensor<int32, [8]> x_223_pad_0 = const()[name = tensor<string, []>("x_223_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 1, 0])];
             tensor<string, []> x_223_mode_0 = const()[name = tensor<string, []>("x_223_mode_0"), val = tensor<string, []>("constant")];
             tensor<fp16, []> const_119_to_fp16 = const()[name = tensor<string, []>("const_119_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
@@ -1532,12 +1532,12 @@ program(1.0)
             tensor<int32, [3]> var_1902 = const()[name = tensor<string, []>("op_1902"), val = tensor<int32, [3]>([1, -1, 1024])];
             tensor<fp16, [1, 188, 8, 128]> var_1901_cast_fp16 = transpose(perm = var_1901_perm_0, x = x_227_cast_fp16)[name = tensor<string, []>("transpose_245")];
             tensor<fp16, [1, 188, 1024]> input_511_cast_fp16 = reshape(shape = var_1902, x = var_1901_cast_fp16)[name = tensor<string, []>("input_511_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_9_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(237687552))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(238736192))), name = tensor<string, []>("encoder_module_layers_9_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_88_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_9_self_attn_linear_out_weight_to_fp16_palettized, x = input_511_cast_fp16)[name = tensor<string, []>("linear_88_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_9_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_9_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(238174720))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(239224448))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(239223360)))];
+            tensor<fp16, [1, 188, 1024]> linear_88_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_9_self_attn_linear_out_weight_to_fp16_quantized, x = input_511_cast_fp16)[name = tensor<string, []>("linear_88_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_515_cast_fp16 = add(x = input_507_cast_fp16, y = linear_88_cast_fp16)[name = tensor<string, []>("input_515_cast_fp16")];
             tensor<int32, [1]> x_231_axes_0 = const()[name = tensor<string, []>("x_231_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_9_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_9_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(238736768)))];
-            tensor<fp16, [1024]> encoder_module_layers_9_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_9_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(238738880)))];
+            tensor<fp16, [1024]> encoder_module_layers_9_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_9_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(239226560)))];
+            tensor<fp16, [1024]> encoder_module_layers_9_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_9_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(239228672)))];
             tensor<fp16, [1, 188, 1024]> x_231_cast_fp16 = layer_norm(axes = x_231_axes_0, beta = encoder_module_layers_9_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_9_norm_conv_weight_to_fp16, x = input_515_cast_fp16)[name = tensor<string, []>("x_231_cast_fp16")];
             tensor<int32, [3]> input_517_perm_0 = const()[name = tensor<string, []>("input_517_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<string, []> input_519_pad_type_0 = const()[name = tensor<string, []>("input_519_pad_type_0"), val = tensor<string, []>("valid")];
@@ -1545,9 +1545,9 @@ program(1.0)
             tensor<int32, [2]> input_519_pad_0 = const()[name = tensor<string, []>("input_519_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_519_dilations_0 = const()[name = tensor<string, []>("input_519_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> input_519_groups_0 = const()[name = tensor<string, []>("input_519_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_9_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(238740992))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(240838208))), name = tensor<string, []>("encoder_module_layers_9_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([2048, 1024, 1])];
+            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_9_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_9_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(239230784))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(241330112))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(241328000)))];
             tensor<fp16, [1, 1024, 188]> input_517_cast_fp16 = transpose(perm = input_517_perm_0, x = x_231_cast_fp16)[name = tensor<string, []>("transpose_244")];
-            tensor<fp16, [1, 2048, 188]> input_519_cast_fp16 = conv(dilations = input_519_dilations_0, groups = input_519_groups_0, pad = input_519_pad_0, pad_type = input_519_pad_type_0, strides = input_519_strides_0, weight = encoder_module_layers_9_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_517_cast_fp16)[name = tensor<string, []>("input_519_cast_fp16")];
+            tensor<fp16, [1, 2048, 188]> input_519_cast_fp16 = conv(dilations = input_519_dilations_0, groups = input_519_groups_0, pad = input_519_pad_0, pad_type = input_519_pad_type_0, strides = input_519_strides_0, weight = encoder_module_layers_9_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_517_cast_fp16)[name = tensor<string, []>("input_519_cast_fp16")];
             tensor<int32, []> x_233_split_num_splits_0 = const()[name = tensor<string, []>("x_233_split_num_splits_0"), val = tensor<int32, []>(2)];
             tensor<int32, []> x_233_split_axis_0 = const()[name = tensor<string, []>("x_233_split_axis_0"), val = tensor<int32, []>(1)];
             tensor<fp16, [1, 1024, 188]> x_233_split_cast_fp16_0, tensor<fp16, [1, 1024, 188]> x_233_split_cast_fp16_1 = split(axis = x_233_split_axis_0, num_splits = x_233_split_num_splits_0, x = input_519_cast_fp16)[name = tensor<string, []>("x_233_split_cast_fp16")];
@@ -1563,75 +1563,75 @@ program(1.0)
             tensor<int32, [1]> input_525_strides_0 = const()[name = tensor<string, []>("input_525_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> input_525_pad_0 = const()[name = tensor<string, []>("input_525_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_525_dilations_0 = const()[name = tensor<string, []>("input_525_dilations_0"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1024, 1, 9]> const_281_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [9216]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(240838784))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(240848064))), name = tensor<string, []>("const_281_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1, 9])];
-            tensor<fp16, [1024]> const_282_to_fp16 = const()[name = tensor<string, []>("const_282_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(240848640)))];
-            tensor<fp16, [1, 1024, 188]> input_527_cast_fp16 = conv(bias = const_282_to_fp16, dilations = input_525_dilations_0, groups = input_525_groups_0, pad = input_525_pad_0, pad_type = input_525_pad_type_0, strides = input_525_strides_0, weight = const_281_to_fp16_palettized, x = input_523_cast_fp16)[name = tensor<string, []>("input_527_cast_fp16")];
+            tensor<fp16, [1024, 1, 9]> const_281_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("const_281_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1, 9]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(241334272))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(241344640))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(241343552)))];
+            tensor<fp16, [1024]> const_282_to_fp16 = const()[name = tensor<string, []>("const_282_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(241346752)))];
+            tensor<fp16, [1, 1024, 188]> input_527_cast_fp16 = conv(bias = const_282_to_fp16, dilations = input_525_dilations_0, groups = input_525_groups_0, pad = input_525_pad_0, pad_type = input_525_pad_type_0, strides = input_525_strides_0, weight = const_281_to_fp16_quantized, x = input_523_cast_fp16)[name = tensor<string, []>("input_527_cast_fp16")];
             tensor<fp16, [1, 1024, 188]> input_529_cast_fp16 = silu(x = input_527_cast_fp16)[name = tensor<string, []>("input_529_cast_fp16")];
             tensor<string, []> x_235_pad_type_0 = const()[name = tensor<string, []>("x_235_pad_type_0"), val = tensor<string, []>("valid")];
             tensor<int32, [1]> x_235_strides_0 = const()[name = tensor<string, []>("x_235_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> x_235_pad_0 = const()[name = tensor<string, []>("x_235_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> x_235_dilations_0 = const()[name = tensor<string, []>("x_235_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> x_235_groups_0 = const()[name = tensor<string, []>("x_235_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_9_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(240850752))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(241899392))), name = tensor<string, []>("encoder_module_layers_9_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1024, 1])];
-            tensor<fp16, [1, 1024, 188]> x_235_cast_fp16 = conv(dilations = x_235_dilations_0, groups = x_235_groups_0, pad = x_235_pad_0, pad_type = x_235_pad_type_0, strides = x_235_strides_0, weight = encoder_module_layers_9_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_529_cast_fp16)[name = tensor<string, []>("x_235_cast_fp16")];
+            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_9_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_9_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(241348864))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(242398592))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(242397504)))];
+            tensor<fp16, [1, 1024, 188]> x_235_cast_fp16 = conv(dilations = x_235_dilations_0, groups = x_235_groups_0, pad = x_235_pad_0, pad_type = x_235_pad_type_0, strides = x_235_strides_0, weight = encoder_module_layers_9_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_529_cast_fp16)[name = tensor<string, []>("x_235_cast_fp16")];
             tensor<int32, [3]> input_531_perm_0 = const()[name = tensor<string, []>("input_531_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<fp16, [1, 188, 1024]> input_531_cast_fp16 = transpose(perm = input_531_perm_0, x = x_235_cast_fp16)[name = tensor<string, []>("transpose_243")];
             tensor<fp16, [1, 188, 1024]> input_533_cast_fp16 = add(x = input_515_cast_fp16, y = input_531_cast_fp16)[name = tensor<string, []>("input_533_cast_fp16")];
             tensor<int32, [1]> input_535_axes_0 = const()[name = tensor<string, []>("input_535_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_9_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_9_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(241899968)))];
-            tensor<fp16, [1024]> encoder_module_layers_9_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_9_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(241902080)))];
+            tensor<fp16, [1024]> encoder_module_layers_9_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_9_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(242400704)))];
+            tensor<fp16, [1024]> encoder_module_layers_9_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_9_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(242402816)))];
             tensor<fp16, [1, 188, 1024]> input_535_cast_fp16 = layer_norm(axes = input_535_axes_0, beta = encoder_module_layers_9_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_9_norm_feed_forward2_weight_to_fp16, x = input_533_cast_fp16)[name = tensor<string, []>("input_535_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_9_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(241904192))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(246098560))), name = tensor<string, []>("encoder_module_layers_9_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_89_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_9_feed_forward2_linear1_weight_to_fp16_palettized, x = input_535_cast_fp16)[name = tensor<string, []>("linear_89_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_9_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_9_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(242404928))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(246603456))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(246599296)))];
+            tensor<fp16, [1, 188, 4096]> linear_89_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_9_feed_forward2_linear1_weight_to_fp16_quantized, x = input_535_cast_fp16)[name = tensor<string, []>("linear_89_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_539_cast_fp16 = silu(x = linear_89_cast_fp16)[name = tensor<string, []>("input_539_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_9_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(246099136))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250293504))), name = tensor<string, []>("encoder_module_layers_9_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_90_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_9_feed_forward2_linear2_weight_to_fp16_palettized, x = input_539_cast_fp16)[name = tensor<string, []>("linear_90_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_9_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_9_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(246611712))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250807168))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250806080)))];
+            tensor<fp16, [1, 188, 1024]> linear_90_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_9_feed_forward2_linear2_weight_to_fp16_quantized, x = input_539_cast_fp16)[name = tensor<string, []>("linear_90_cast_fp16")];
             tensor<fp16, []> var_1962_to_fp16 = const()[name = tensor<string, []>("op_1962_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_1963_cast_fp16 = mul(x = linear_90_cast_fp16, y = var_1962_to_fp16)[name = tensor<string, []>("op_1963_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_545_cast_fp16 = add(x = input_533_cast_fp16, y = var_1963_cast_fp16)[name = tensor<string, []>("input_545_cast_fp16")];
             tensor<int32, [1]> input_547_axes_0 = const()[name = tensor<string, []>("input_547_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_9_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_9_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250294080)))];
-            tensor<fp16, [1024]> encoder_module_layers_9_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_9_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250296192)))];
+            tensor<fp16, [1024]> encoder_module_layers_9_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_9_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250809280)))];
+            tensor<fp16, [1024]> encoder_module_layers_9_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_9_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250811392)))];
             tensor<fp16, [1, 188, 1024]> input_547_cast_fp16 = layer_norm(axes = input_547_axes_0, beta = encoder_module_layers_9_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_9_norm_out_weight_to_fp16, x = input_545_cast_fp16)[name = tensor<string, []>("input_547_cast_fp16")];
             tensor<int32, [1]> input_549_axes_0 = const()[name = tensor<string, []>("input_549_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_10_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_10_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250298304)))];
-            tensor<fp16, [1024]> encoder_module_layers_10_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_10_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250300416)))];
+            tensor<fp16, [1024]> encoder_module_layers_10_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_10_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250813504)))];
+            tensor<fp16, [1024]> encoder_module_layers_10_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_10_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250815616)))];
             tensor<fp16, [1, 188, 1024]> input_549_cast_fp16 = layer_norm(axes = input_549_axes_0, beta = encoder_module_layers_10_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_10_norm_feed_forward1_weight_to_fp16, x = input_547_cast_fp16)[name = tensor<string, []>("input_549_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_10_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250302528))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(254496896))), name = tensor<string, []>("encoder_module_layers_10_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_91_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_10_feed_forward1_linear1_weight_to_fp16_palettized, x = input_549_cast_fp16)[name = tensor<string, []>("linear_91_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_10_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_10_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250817728))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(255016256))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(255012096)))];
+            tensor<fp16, [1, 188, 4096]> linear_91_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_10_feed_forward1_linear1_weight_to_fp16_quantized, x = input_549_cast_fp16)[name = tensor<string, []>("linear_91_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_553_cast_fp16 = silu(x = linear_91_cast_fp16)[name = tensor<string, []>("input_553_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_10_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(254497472))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(258691840))), name = tensor<string, []>("encoder_module_layers_10_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_92_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_10_feed_forward1_linear2_weight_to_fp16_palettized, x = input_553_cast_fp16)[name = tensor<string, []>("linear_92_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_10_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_10_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(255024512))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(259219968))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(259218880)))];
+            tensor<fp16, [1, 188, 1024]> linear_92_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_10_feed_forward1_linear2_weight_to_fp16_quantized, x = input_553_cast_fp16)[name = tensor<string, []>("linear_92_cast_fp16")];
             tensor<fp16, []> var_1991_to_fp16 = const()[name = tensor<string, []>("op_1991_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_1992_cast_fp16 = mul(x = linear_92_cast_fp16, y = var_1991_to_fp16)[name = tensor<string, []>("op_1992_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_559_cast_fp16 = add(x = input_547_cast_fp16, y = var_1992_cast_fp16)[name = tensor<string, []>("input_559_cast_fp16")];
             tensor<int32, [1]> query_21_axes_0 = const()[name = tensor<string, []>("query_21_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_10_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_10_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(258692416)))];
-            tensor<fp16, [1024]> encoder_module_layers_10_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_10_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(258694528)))];
+            tensor<fp16, [1024]> encoder_module_layers_10_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_10_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(259222080)))];
+            tensor<fp16, [1024]> encoder_module_layers_10_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_10_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(259224192)))];
             tensor<fp16, [1, 188, 1024]> query_21_cast_fp16 = layer_norm(axes = query_21_axes_0, beta = encoder_module_layers_10_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_10_norm_self_att_weight_to_fp16, x = input_559_cast_fp16)[name = tensor<string, []>("query_21_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_10_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(258696640))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(259745280))), name = tensor<string, []>("encoder_module_layers_10_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_93_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_10_self_attn_linear_q_weight_to_fp16_palettized, x = query_21_cast_fp16)[name = tensor<string, []>("linear_93_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_10_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_10_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(259226304))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(260276032))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(260274944)))];
+            tensor<fp16, [1, 188, 1024]> linear_93_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_10_self_attn_linear_q_weight_to_fp16_quantized, x = query_21_cast_fp16)[name = tensor<string, []>("linear_93_cast_fp16")];
             tensor<int32, [4]> var_2008 = const()[name = tensor<string, []>("op_2008"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> q_61_cast_fp16 = reshape(shape = var_2008, x = linear_93_cast_fp16)[name = tensor<string, []>("q_61_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_10_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(259745856))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(260794496))), name = tensor<string, []>("encoder_module_layers_10_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_94_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_10_self_attn_linear_k_weight_to_fp16_palettized, x = query_21_cast_fp16)[name = tensor<string, []>("linear_94_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_10_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_10_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(260278144))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(261327872))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(261326784)))];
+            tensor<fp16, [1, 188, 1024]> linear_94_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_10_self_attn_linear_k_weight_to_fp16_quantized, x = query_21_cast_fp16)[name = tensor<string, []>("linear_94_cast_fp16")];
             tensor<int32, [4]> var_2012 = const()[name = tensor<string, []>("op_2012"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> k_41_cast_fp16 = reshape(shape = var_2012, x = linear_94_cast_fp16)[name = tensor<string, []>("k_41_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_10_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(260795072))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(261843712))), name = tensor<string, []>("encoder_module_layers_10_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_95_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_10_self_attn_linear_v_weight_to_fp16_palettized, x = query_21_cast_fp16)[name = tensor<string, []>("linear_95_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_10_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_10_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(261329984))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(262379712))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(262378624)))];
+            tensor<fp16, [1, 188, 1024]> linear_95_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_10_self_attn_linear_v_weight_to_fp16_quantized, x = query_21_cast_fp16)[name = tensor<string, []>("linear_95_cast_fp16")];
             tensor<int32, [4]> var_2016 = const()[name = tensor<string, []>("op_2016"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> v_21_cast_fp16 = reshape(shape = var_2016, x = linear_95_cast_fp16)[name = tensor<string, []>("v_21_cast_fp16")];
             tensor<int32, [4]> value_25_perm_0 = const()[name = tensor<string, []>("value_25_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
-            tensor<fp16, [8, 128]> encoder_module_layers_10_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_10_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(261844288)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_10_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_10_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(262381824)))];
             tensor<fp16, [1, 188, 8, 128]> var_2028_cast_fp16 = add(x = q_61_cast_fp16, y = encoder_module_layers_10_self_attn_pos_bias_u_to_fp16)[name = tensor<string, []>("op_2028_cast_fp16")];
-            tensor<fp16, [8, 128]> encoder_module_layers_10_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_10_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(261846400)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_10_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_10_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(262383936)))];
             tensor<fp16, [1, 188, 8, 128]> var_2030_cast_fp16 = add(x = q_61_cast_fp16, y = encoder_module_layers_10_self_attn_pos_bias_v_to_fp16)[name = tensor<string, []>("op_2030_cast_fp16")];
             tensor<int32, [4]> q_with_bias_v_21_perm_0 = const()[name = tensor<string, []>("q_with_bias_v_21_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
             tensor<bool, []> x_243_transpose_x_0 = const()[name = tensor<string, []>("x_243_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> x_243_transpose_y_0 = const()[name = tensor<string, []>("x_243_transpose_y_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 8, 128, 375]> op_2032_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [384000]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(261848512))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(262232576))), name = tensor<string, []>("op_2032_to_fp16_palettized"), shape = tensor<uint32, [4]>([1, 8, 128, 375])];
+            tensor<fp16, [1, 8, 128, 375]> op_2032_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(3), name = tensor<string, []>("op_2032_to_fp16_quantized"), quantized_data = tensor<int8, [1, 8, 128, 375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(262386048))), scale = tensor<fp16, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(262770560))), zero_point = tensor<int8, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(262770112)))];
             tensor<fp16, [1, 8, 188, 128]> q_with_bias_v_21_cast_fp16 = transpose(perm = q_with_bias_v_21_perm_0, x = var_2030_cast_fp16)[name = tensor<string, []>("transpose_242")];
-            tensor<fp16, [1, 8, 188, 375]> x_243_cast_fp16 = matmul(transpose_x = x_243_transpose_x_0, transpose_y = x_243_transpose_y_0, x = q_with_bias_v_21_cast_fp16, y = op_2032_to_fp16_palettized)[name = tensor<string, []>("x_243_cast_fp16")];
+            tensor<fp16, [1, 8, 188, 375]> x_243_cast_fp16 = matmul(transpose_x = x_243_transpose_x_0, transpose_y = x_243_transpose_y_0, x = q_with_bias_v_21_cast_fp16, y = op_2032_to_fp16_quantized)[name = tensor<string, []>("x_243_cast_fp16")];
             tensor<int32, [8]> x_245_pad_0 = const()[name = tensor<string, []>("x_245_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 1, 0])];
             tensor<string, []> x_245_mode_0 = const()[name = tensor<string, []>("x_245_mode_0"), val = tensor<string, []>("constant")];
             tensor<fp16, []> const_129_to_fp16 = const()[name = tensor<string, []>("const_129_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
@@ -1669,12 +1669,12 @@ program(1.0)
             tensor<int32, [3]> var_2065 = const()[name = tensor<string, []>("op_2065"), val = tensor<int32, [3]>([1, -1, 1024])];
             tensor<fp16, [1, 188, 8, 128]> var_2064_cast_fp16 = transpose(perm = var_2064_perm_0, x = x_249_cast_fp16)[name = tensor<string, []>("transpose_238")];
             tensor<fp16, [1, 188, 1024]> input_563_cast_fp16 = reshape(shape = var_2065, x = var_2064_cast_fp16)[name = tensor<string, []>("input_563_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_10_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(262233152))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(263281792))), name = tensor<string, []>("encoder_module_layers_10_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_97_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_10_self_attn_linear_out_weight_to_fp16_palettized, x = input_563_cast_fp16)[name = tensor<string, []>("linear_97_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_10_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_10_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(262771392))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(263821120))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(263820032)))];
+            tensor<fp16, [1, 188, 1024]> linear_97_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_10_self_attn_linear_out_weight_to_fp16_quantized, x = input_563_cast_fp16)[name = tensor<string, []>("linear_97_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_567_cast_fp16 = add(x = input_559_cast_fp16, y = linear_97_cast_fp16)[name = tensor<string, []>("input_567_cast_fp16")];
             tensor<int32, [1]> x_253_axes_0 = const()[name = tensor<string, []>("x_253_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_10_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_10_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(263282368)))];
-            tensor<fp16, [1024]> encoder_module_layers_10_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_10_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(263284480)))];
+            tensor<fp16, [1024]> encoder_module_layers_10_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_10_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(263823232)))];
+            tensor<fp16, [1024]> encoder_module_layers_10_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_10_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(263825344)))];
             tensor<fp16, [1, 188, 1024]> x_253_cast_fp16 = layer_norm(axes = x_253_axes_0, beta = encoder_module_layers_10_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_10_norm_conv_weight_to_fp16, x = input_567_cast_fp16)[name = tensor<string, []>("x_253_cast_fp16")];
             tensor<int32, [3]> input_569_perm_0 = const()[name = tensor<string, []>("input_569_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<string, []> input_571_pad_type_0 = const()[name = tensor<string, []>("input_571_pad_type_0"), val = tensor<string, []>("valid")];
@@ -1682,9 +1682,9 @@ program(1.0)
             tensor<int32, [2]> input_571_pad_0 = const()[name = tensor<string, []>("input_571_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_571_dilations_0 = const()[name = tensor<string, []>("input_571_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> input_571_groups_0 = const()[name = tensor<string, []>("input_571_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_10_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(263286592))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(265383808))), name = tensor<string, []>("encoder_module_layers_10_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([2048, 1024, 1])];
+            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_10_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_10_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(263827456))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(265926784))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(265924672)))];
             tensor<fp16, [1, 1024, 188]> input_569_cast_fp16 = transpose(perm = input_569_perm_0, x = x_253_cast_fp16)[name = tensor<string, []>("transpose_237")];
-            tensor<fp16, [1, 2048, 188]> input_571_cast_fp16 = conv(dilations = input_571_dilations_0, groups = input_571_groups_0, pad = input_571_pad_0, pad_type = input_571_pad_type_0, strides = input_571_strides_0, weight = encoder_module_layers_10_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_569_cast_fp16)[name = tensor<string, []>("input_571_cast_fp16")];
+            tensor<fp16, [1, 2048, 188]> input_571_cast_fp16 = conv(dilations = input_571_dilations_0, groups = input_571_groups_0, pad = input_571_pad_0, pad_type = input_571_pad_type_0, strides = input_571_strides_0, weight = encoder_module_layers_10_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_569_cast_fp16)[name = tensor<string, []>("input_571_cast_fp16")];
             tensor<int32, []> x_255_split_num_splits_0 = const()[name = tensor<string, []>("x_255_split_num_splits_0"), val = tensor<int32, []>(2)];
             tensor<int32, []> x_255_split_axis_0 = const()[name = tensor<string, []>("x_255_split_axis_0"), val = tensor<int32, []>(1)];
             tensor<fp16, [1, 1024, 188]> x_255_split_cast_fp16_0, tensor<fp16, [1, 1024, 188]> x_255_split_cast_fp16_1 = split(axis = x_255_split_axis_0, num_splits = x_255_split_num_splits_0, x = input_571_cast_fp16)[name = tensor<string, []>("x_255_split_cast_fp16")];
@@ -1700,75 +1700,75 @@ program(1.0)
             tensor<int32, [1]> input_577_strides_0 = const()[name = tensor<string, []>("input_577_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> input_577_pad_0 = const()[name = tensor<string, []>("input_577_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_577_dilations_0 = const()[name = tensor<string, []>("input_577_dilations_0"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1024, 1, 9]> const_283_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [9216]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(265384384))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(265393664))), name = tensor<string, []>("const_283_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1, 9])];
-            tensor<fp16, [1024]> const_284_to_fp16 = const()[name = tensor<string, []>("const_284_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(265394240)))];
-            tensor<fp16, [1, 1024, 188]> input_579_cast_fp16 = conv(bias = const_284_to_fp16, dilations = input_577_dilations_0, groups = input_577_groups_0, pad = input_577_pad_0, pad_type = input_577_pad_type_0, strides = input_577_strides_0, weight = const_283_to_fp16_palettized, x = input_575_cast_fp16)[name = tensor<string, []>("input_579_cast_fp16")];
+            tensor<fp16, [1024, 1, 9]> const_283_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("const_283_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1, 9]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(265930944))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(265941312))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(265940224)))];
+            tensor<fp16, [1024]> const_284_to_fp16 = const()[name = tensor<string, []>("const_284_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(265943424)))];
+            tensor<fp16, [1, 1024, 188]> input_579_cast_fp16 = conv(bias = const_284_to_fp16, dilations = input_577_dilations_0, groups = input_577_groups_0, pad = input_577_pad_0, pad_type = input_577_pad_type_0, strides = input_577_strides_0, weight = const_283_to_fp16_quantized, x = input_575_cast_fp16)[name = tensor<string, []>("input_579_cast_fp16")];
             tensor<fp16, [1, 1024, 188]> input_581_cast_fp16 = silu(x = input_579_cast_fp16)[name = tensor<string, []>("input_581_cast_fp16")];
             tensor<string, []> x_257_pad_type_0 = const()[name = tensor<string, []>("x_257_pad_type_0"), val = tensor<string, []>("valid")];
             tensor<int32, [1]> x_257_strides_0 = const()[name = tensor<string, []>("x_257_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> x_257_pad_0 = const()[name = tensor<string, []>("x_257_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> x_257_dilations_0 = const()[name = tensor<string, []>("x_257_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> x_257_groups_0 = const()[name = tensor<string, []>("x_257_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_10_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(265396352))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(266444992))), name = tensor<string, []>("encoder_module_layers_10_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1024, 1])];
-            tensor<fp16, [1, 1024, 188]> x_257_cast_fp16 = conv(dilations = x_257_dilations_0, groups = x_257_groups_0, pad = x_257_pad_0, pad_type = x_257_pad_type_0, strides = x_257_strides_0, weight = encoder_module_layers_10_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_581_cast_fp16)[name = tensor<string, []>("x_257_cast_fp16")];
+            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_10_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_10_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(265945536))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(266995264))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(266994176)))];
+            tensor<fp16, [1, 1024, 188]> x_257_cast_fp16 = conv(dilations = x_257_dilations_0, groups = x_257_groups_0, pad = x_257_pad_0, pad_type = x_257_pad_type_0, strides = x_257_strides_0, weight = encoder_module_layers_10_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_581_cast_fp16)[name = tensor<string, []>("x_257_cast_fp16")];
             tensor<int32, [3]> input_583_perm_0 = const()[name = tensor<string, []>("input_583_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<fp16, [1, 188, 1024]> input_583_cast_fp16 = transpose(perm = input_583_perm_0, x = x_257_cast_fp16)[name = tensor<string, []>("transpose_236")];
             tensor<fp16, [1, 188, 1024]> input_585_cast_fp16 = add(x = input_567_cast_fp16, y = input_583_cast_fp16)[name = tensor<string, []>("input_585_cast_fp16")];
             tensor<int32, [1]> input_587_axes_0 = const()[name = tensor<string, []>("input_587_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_10_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_10_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(266445568)))];
-            tensor<fp16, [1024]> encoder_module_layers_10_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_10_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(266447680)))];
+            tensor<fp16, [1024]> encoder_module_layers_10_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_10_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(266997376)))];
+            tensor<fp16, [1024]> encoder_module_layers_10_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_10_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(266999488)))];
             tensor<fp16, [1, 188, 1024]> input_587_cast_fp16 = layer_norm(axes = input_587_axes_0, beta = encoder_module_layers_10_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_10_norm_feed_forward2_weight_to_fp16, x = input_585_cast_fp16)[name = tensor<string, []>("input_587_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_10_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(266449792))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(270644160))), name = tensor<string, []>("encoder_module_layers_10_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_98_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_10_feed_forward2_linear1_weight_to_fp16_palettized, x = input_587_cast_fp16)[name = tensor<string, []>("linear_98_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_10_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_10_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(267001600))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(271200128))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(271195968)))];
+            tensor<fp16, [1, 188, 4096]> linear_98_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_10_feed_forward2_linear1_weight_to_fp16_quantized, x = input_587_cast_fp16)[name = tensor<string, []>("linear_98_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_591_cast_fp16 = silu(x = linear_98_cast_fp16)[name = tensor<string, []>("input_591_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_10_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(270644736))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(274839104))), name = tensor<string, []>("encoder_module_layers_10_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_99_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_10_feed_forward2_linear2_weight_to_fp16_palettized, x = input_591_cast_fp16)[name = tensor<string, []>("linear_99_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_10_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_10_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(271208384))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(275403840))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(275402752)))];
+            tensor<fp16, [1, 188, 1024]> linear_99_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_10_feed_forward2_linear2_weight_to_fp16_quantized, x = input_591_cast_fp16)[name = tensor<string, []>("linear_99_cast_fp16")];
             tensor<fp16, []> var_2125_to_fp16 = const()[name = tensor<string, []>("op_2125_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_2126_cast_fp16 = mul(x = linear_99_cast_fp16, y = var_2125_to_fp16)[name = tensor<string, []>("op_2126_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_597_cast_fp16 = add(x = input_585_cast_fp16, y = var_2126_cast_fp16)[name = tensor<string, []>("input_597_cast_fp16")];
             tensor<int32, [1]> input_599_axes_0 = const()[name = tensor<string, []>("input_599_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_10_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_10_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(274839680)))];
-            tensor<fp16, [1024]> encoder_module_layers_10_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_10_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(274841792)))];
+            tensor<fp16, [1024]> encoder_module_layers_10_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_10_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(275405952)))];
+            tensor<fp16, [1024]> encoder_module_layers_10_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_10_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(275408064)))];
             tensor<fp16, [1, 188, 1024]> input_599_cast_fp16 = layer_norm(axes = input_599_axes_0, beta = encoder_module_layers_10_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_10_norm_out_weight_to_fp16, x = input_597_cast_fp16)[name = tensor<string, []>("input_599_cast_fp16")];
             tensor<int32, [1]> input_601_axes_0 = const()[name = tensor<string, []>("input_601_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_11_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_11_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(274843904)))];
-            tensor<fp16, [1024]> encoder_module_layers_11_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_11_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(274846016)))];
+            tensor<fp16, [1024]> encoder_module_layers_11_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_11_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(275410176)))];
+            tensor<fp16, [1024]> encoder_module_layers_11_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_11_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(275412288)))];
             tensor<fp16, [1, 188, 1024]> input_601_cast_fp16 = layer_norm(axes = input_601_axes_0, beta = encoder_module_layers_11_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_11_norm_feed_forward1_weight_to_fp16, x = input_599_cast_fp16)[name = tensor<string, []>("input_601_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_11_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(274848128))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(279042496))), name = tensor<string, []>("encoder_module_layers_11_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_100_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_11_feed_forward1_linear1_weight_to_fp16_palettized, x = input_601_cast_fp16)[name = tensor<string, []>("linear_100_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_11_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_11_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(275414400))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(279612928))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(279608768)))];
+            tensor<fp16, [1, 188, 4096]> linear_100_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_11_feed_forward1_linear1_weight_to_fp16_quantized, x = input_601_cast_fp16)[name = tensor<string, []>("linear_100_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_605_cast_fp16 = silu(x = linear_100_cast_fp16)[name = tensor<string, []>("input_605_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_11_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(279043072))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(283237440))), name = tensor<string, []>("encoder_module_layers_11_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_101_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_11_feed_forward1_linear2_weight_to_fp16_palettized, x = input_605_cast_fp16)[name = tensor<string, []>("linear_101_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_11_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_11_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(279621184))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(283816640))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(283815552)))];
+            tensor<fp16, [1, 188, 1024]> linear_101_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_11_feed_forward1_linear2_weight_to_fp16_quantized, x = input_605_cast_fp16)[name = tensor<string, []>("linear_101_cast_fp16")];
             tensor<fp16, []> var_2154_to_fp16 = const()[name = tensor<string, []>("op_2154_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_2155_cast_fp16 = mul(x = linear_101_cast_fp16, y = var_2154_to_fp16)[name = tensor<string, []>("op_2155_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_611_cast_fp16 = add(x = input_599_cast_fp16, y = var_2155_cast_fp16)[name = tensor<string, []>("input_611_cast_fp16")];
             tensor<int32, [1]> query_23_axes_0 = const()[name = tensor<string, []>("query_23_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_11_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_11_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(283238016)))];
-            tensor<fp16, [1024]> encoder_module_layers_11_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_11_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(283240128)))];
+            tensor<fp16, [1024]> encoder_module_layers_11_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_11_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(283818752)))];
+            tensor<fp16, [1024]> encoder_module_layers_11_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_11_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(283820864)))];
             tensor<fp16, [1, 188, 1024]> query_23_cast_fp16 = layer_norm(axes = query_23_axes_0, beta = encoder_module_layers_11_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_11_norm_self_att_weight_to_fp16, x = input_611_cast_fp16)[name = tensor<string, []>("query_23_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_11_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(283242240))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(284290880))), name = tensor<string, []>("encoder_module_layers_11_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_102_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_11_self_attn_linear_q_weight_to_fp16_palettized, x = query_23_cast_fp16)[name = tensor<string, []>("linear_102_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_11_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_11_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(283822976))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(284872704))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(284871616)))];
+            tensor<fp16, [1, 188, 1024]> linear_102_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_11_self_attn_linear_q_weight_to_fp16_quantized, x = query_23_cast_fp16)[name = tensor<string, []>("linear_102_cast_fp16")];
             tensor<int32, [4]> var_2171 = const()[name = tensor<string, []>("op_2171"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> q_67_cast_fp16 = reshape(shape = var_2171, x = linear_102_cast_fp16)[name = tensor<string, []>("q_67_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_11_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(284291456))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(285340096))), name = tensor<string, []>("encoder_module_layers_11_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_103_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_11_self_attn_linear_k_weight_to_fp16_palettized, x = query_23_cast_fp16)[name = tensor<string, []>("linear_103_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_11_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_11_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(284874816))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(285924544))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(285923456)))];
+            tensor<fp16, [1, 188, 1024]> linear_103_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_11_self_attn_linear_k_weight_to_fp16_quantized, x = query_23_cast_fp16)[name = tensor<string, []>("linear_103_cast_fp16")];
             tensor<int32, [4]> var_2175 = const()[name = tensor<string, []>("op_2175"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> k_45_cast_fp16 = reshape(shape = var_2175, x = linear_103_cast_fp16)[name = tensor<string, []>("k_45_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_11_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(285340672))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(286389312))), name = tensor<string, []>("encoder_module_layers_11_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_104_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_11_self_attn_linear_v_weight_to_fp16_palettized, x = query_23_cast_fp16)[name = tensor<string, []>("linear_104_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_11_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_11_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(285926656))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(286976384))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(286975296)))];
+            tensor<fp16, [1, 188, 1024]> linear_104_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_11_self_attn_linear_v_weight_to_fp16_quantized, x = query_23_cast_fp16)[name = tensor<string, []>("linear_104_cast_fp16")];
             tensor<int32, [4]> var_2179 = const()[name = tensor<string, []>("op_2179"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> v_23_cast_fp16 = reshape(shape = var_2179, x = linear_104_cast_fp16)[name = tensor<string, []>("v_23_cast_fp16")];
             tensor<int32, [4]> value_27_perm_0 = const()[name = tensor<string, []>("value_27_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
-            tensor<fp16, [8, 128]> encoder_module_layers_11_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_11_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(286389888)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_11_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_11_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(286978496)))];
             tensor<fp16, [1, 188, 8, 128]> var_2191_cast_fp16 = add(x = q_67_cast_fp16, y = encoder_module_layers_11_self_attn_pos_bias_u_to_fp16)[name = tensor<string, []>("op_2191_cast_fp16")];
-            tensor<fp16, [8, 128]> encoder_module_layers_11_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_11_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(286392000)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_11_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_11_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(286980608)))];
             tensor<fp16, [1, 188, 8, 128]> var_2193_cast_fp16 = add(x = q_67_cast_fp16, y = encoder_module_layers_11_self_attn_pos_bias_v_to_fp16)[name = tensor<string, []>("op_2193_cast_fp16")];
             tensor<int32, [4]> q_with_bias_v_23_perm_0 = const()[name = tensor<string, []>("q_with_bias_v_23_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
             tensor<bool, []> x_265_transpose_x_0 = const()[name = tensor<string, []>("x_265_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> x_265_transpose_y_0 = const()[name = tensor<string, []>("x_265_transpose_y_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 8, 128, 375]> op_2195_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [384000]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(286394112))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(286778176))), name = tensor<string, []>("op_2195_to_fp16_palettized"), shape = tensor<uint32, [4]>([1, 8, 128, 375])];
+            tensor<fp16, [1, 8, 128, 375]> op_2195_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(3), name = tensor<string, []>("op_2195_to_fp16_quantized"), quantized_data = tensor<int8, [1, 8, 128, 375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(286982720))), scale = tensor<fp16, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(287367232))), zero_point = tensor<int8, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(287366784)))];
             tensor<fp16, [1, 8, 188, 128]> q_with_bias_v_23_cast_fp16 = transpose(perm = q_with_bias_v_23_perm_0, x = var_2193_cast_fp16)[name = tensor<string, []>("transpose_235")];
-            tensor<fp16, [1, 8, 188, 375]> x_265_cast_fp16 = matmul(transpose_x = x_265_transpose_x_0, transpose_y = x_265_transpose_y_0, x = q_with_bias_v_23_cast_fp16, y = op_2195_to_fp16_palettized)[name = tensor<string, []>("x_265_cast_fp16")];
+            tensor<fp16, [1, 8, 188, 375]> x_265_cast_fp16 = matmul(transpose_x = x_265_transpose_x_0, transpose_y = x_265_transpose_y_0, x = q_with_bias_v_23_cast_fp16, y = op_2195_to_fp16_quantized)[name = tensor<string, []>("x_265_cast_fp16")];
             tensor<int32, [8]> x_267_pad_0 = const()[name = tensor<string, []>("x_267_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 1, 0])];
             tensor<string, []> x_267_mode_0 = const()[name = tensor<string, []>("x_267_mode_0"), val = tensor<string, []>("constant")];
             tensor<fp16, []> const_139_to_fp16 = const()[name = tensor<string, []>("const_139_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
@@ -1806,12 +1806,12 @@ program(1.0)
             tensor<int32, [3]> var_2228 = const()[name = tensor<string, []>("op_2228"), val = tensor<int32, [3]>([1, -1, 1024])];
             tensor<fp16, [1, 188, 8, 128]> var_2227_cast_fp16 = transpose(perm = var_2227_perm_0, x = x_271_cast_fp16)[name = tensor<string, []>("transpose_231")];
             tensor<fp16, [1, 188, 1024]> input_615_cast_fp16 = reshape(shape = var_2228, x = var_2227_cast_fp16)[name = tensor<string, []>("input_615_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_11_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(286778752))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(287827392))), name = tensor<string, []>("encoder_module_layers_11_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_106_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_11_self_attn_linear_out_weight_to_fp16_palettized, x = input_615_cast_fp16)[name = tensor<string, []>("linear_106_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_11_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_11_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(287368064))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(288417792))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(288416704)))];
+            tensor<fp16, [1, 188, 1024]> linear_106_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_11_self_attn_linear_out_weight_to_fp16_quantized, x = input_615_cast_fp16)[name = tensor<string, []>("linear_106_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_619_cast_fp16 = add(x = input_611_cast_fp16, y = linear_106_cast_fp16)[name = tensor<string, []>("input_619_cast_fp16")];
             tensor<int32, [1]> x_275_axes_0 = const()[name = tensor<string, []>("x_275_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_11_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_11_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(287827968)))];
-            tensor<fp16, [1024]> encoder_module_layers_11_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_11_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(287830080)))];
+            tensor<fp16, [1024]> encoder_module_layers_11_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_11_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(288419904)))];
+            tensor<fp16, [1024]> encoder_module_layers_11_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_11_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(288422016)))];
             tensor<fp16, [1, 188, 1024]> x_275_cast_fp16 = layer_norm(axes = x_275_axes_0, beta = encoder_module_layers_11_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_11_norm_conv_weight_to_fp16, x = input_619_cast_fp16)[name = tensor<string, []>("x_275_cast_fp16")];
             tensor<int32, [3]> input_621_perm_0 = const()[name = tensor<string, []>("input_621_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<string, []> input_623_pad_type_0 = const()[name = tensor<string, []>("input_623_pad_type_0"), val = tensor<string, []>("valid")];
@@ -1819,9 +1819,9 @@ program(1.0)
             tensor<int32, [2]> input_623_pad_0 = const()[name = tensor<string, []>("input_623_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_623_dilations_0 = const()[name = tensor<string, []>("input_623_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> input_623_groups_0 = const()[name = tensor<string, []>("input_623_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_11_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(287832192))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(289929408))), name = tensor<string, []>("encoder_module_layers_11_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([2048, 1024, 1])];
+            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_11_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_11_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(288424128))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(290523456))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(290521344)))];
             tensor<fp16, [1, 1024, 188]> input_621_cast_fp16 = transpose(perm = input_621_perm_0, x = x_275_cast_fp16)[name = tensor<string, []>("transpose_230")];
-            tensor<fp16, [1, 2048, 188]> input_623_cast_fp16 = conv(dilations = input_623_dilations_0, groups = input_623_groups_0, pad = input_623_pad_0, pad_type = input_623_pad_type_0, strides = input_623_strides_0, weight = encoder_module_layers_11_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_621_cast_fp16)[name = tensor<string, []>("input_623_cast_fp16")];
+            tensor<fp16, [1, 2048, 188]> input_623_cast_fp16 = conv(dilations = input_623_dilations_0, groups = input_623_groups_0, pad = input_623_pad_0, pad_type = input_623_pad_type_0, strides = input_623_strides_0, weight = encoder_module_layers_11_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_621_cast_fp16)[name = tensor<string, []>("input_623_cast_fp16")];
             tensor<int32, []> x_277_split_num_splits_0 = const()[name = tensor<string, []>("x_277_split_num_splits_0"), val = tensor<int32, []>(2)];
             tensor<int32, []> x_277_split_axis_0 = const()[name = tensor<string, []>("x_277_split_axis_0"), val = tensor<int32, []>(1)];
             tensor<fp16, [1, 1024, 188]> x_277_split_cast_fp16_0, tensor<fp16, [1, 1024, 188]> x_277_split_cast_fp16_1 = split(axis = x_277_split_axis_0, num_splits = x_277_split_num_splits_0, x = input_623_cast_fp16)[name = tensor<string, []>("x_277_split_cast_fp16")];
@@ -1837,75 +1837,75 @@ program(1.0)
             tensor<int32, [1]> input_629_strides_0 = const()[name = tensor<string, []>("input_629_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> input_629_pad_0 = const()[name = tensor<string, []>("input_629_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_629_dilations_0 = const()[name = tensor<string, []>("input_629_dilations_0"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1024, 1, 9]> const_285_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [9216]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(289929984))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(289939264))), name = tensor<string, []>("const_285_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1, 9])];
-            tensor<fp16, [1024]> const_286_to_fp16 = const()[name = tensor<string, []>("const_286_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(289939840)))];
-            tensor<fp16, [1, 1024, 188]> input_631_cast_fp16 = conv(bias = const_286_to_fp16, dilations = input_629_dilations_0, groups = input_629_groups_0, pad = input_629_pad_0, pad_type = input_629_pad_type_0, strides = input_629_strides_0, weight = const_285_to_fp16_palettized, x = input_627_cast_fp16)[name = tensor<string, []>("input_631_cast_fp16")];
+            tensor<fp16, [1024, 1, 9]> const_285_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("const_285_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1, 9]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(290527616))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(290537984))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(290536896)))];
+            tensor<fp16, [1024]> const_286_to_fp16 = const()[name = tensor<string, []>("const_286_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(290540096)))];
+            tensor<fp16, [1, 1024, 188]> input_631_cast_fp16 = conv(bias = const_286_to_fp16, dilations = input_629_dilations_0, groups = input_629_groups_0, pad = input_629_pad_0, pad_type = input_629_pad_type_0, strides = input_629_strides_0, weight = const_285_to_fp16_quantized, x = input_627_cast_fp16)[name = tensor<string, []>("input_631_cast_fp16")];
             tensor<fp16, [1, 1024, 188]> input_633_cast_fp16 = silu(x = input_631_cast_fp16)[name = tensor<string, []>("input_633_cast_fp16")];
             tensor<string, []> x_279_pad_type_0 = const()[name = tensor<string, []>("x_279_pad_type_0"), val = tensor<string, []>("valid")];
             tensor<int32, [1]> x_279_strides_0 = const()[name = tensor<string, []>("x_279_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> x_279_pad_0 = const()[name = tensor<string, []>("x_279_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> x_279_dilations_0 = const()[name = tensor<string, []>("x_279_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> x_279_groups_0 = const()[name = tensor<string, []>("x_279_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_11_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(289941952))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(290990592))), name = tensor<string, []>("encoder_module_layers_11_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1024, 1])];
-            tensor<fp16, [1, 1024, 188]> x_279_cast_fp16 = conv(dilations = x_279_dilations_0, groups = x_279_groups_0, pad = x_279_pad_0, pad_type = x_279_pad_type_0, strides = x_279_strides_0, weight = encoder_module_layers_11_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_633_cast_fp16)[name = tensor<string, []>("x_279_cast_fp16")];
+            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_11_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_11_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(290542208))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(291591936))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(291590848)))];
+            tensor<fp16, [1, 1024, 188]> x_279_cast_fp16 = conv(dilations = x_279_dilations_0, groups = x_279_groups_0, pad = x_279_pad_0, pad_type = x_279_pad_type_0, strides = x_279_strides_0, weight = encoder_module_layers_11_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_633_cast_fp16)[name = tensor<string, []>("x_279_cast_fp16")];
             tensor<int32, [3]> input_635_perm_0 = const()[name = tensor<string, []>("input_635_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<fp16, [1, 188, 1024]> input_635_cast_fp16 = transpose(perm = input_635_perm_0, x = x_279_cast_fp16)[name = tensor<string, []>("transpose_229")];
             tensor<fp16, [1, 188, 1024]> input_637_cast_fp16 = add(x = input_619_cast_fp16, y = input_635_cast_fp16)[name = tensor<string, []>("input_637_cast_fp16")];
             tensor<int32, [1]> input_639_axes_0 = const()[name = tensor<string, []>("input_639_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_11_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_11_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(290991168)))];
-            tensor<fp16, [1024]> encoder_module_layers_11_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_11_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(290993280)))];
+            tensor<fp16, [1024]> encoder_module_layers_11_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_11_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(291594048)))];
+            tensor<fp16, [1024]> encoder_module_layers_11_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_11_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(291596160)))];
             tensor<fp16, [1, 188, 1024]> input_639_cast_fp16 = layer_norm(axes = input_639_axes_0, beta = encoder_module_layers_11_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_11_norm_feed_forward2_weight_to_fp16, x = input_637_cast_fp16)[name = tensor<string, []>("input_639_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_11_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(290995392))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(295189760))), name = tensor<string, []>("encoder_module_layers_11_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_107_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_11_feed_forward2_linear1_weight_to_fp16_palettized, x = input_639_cast_fp16)[name = tensor<string, []>("linear_107_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_11_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_11_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(291598272))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(295796800))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(295792640)))];
+            tensor<fp16, [1, 188, 4096]> linear_107_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_11_feed_forward2_linear1_weight_to_fp16_quantized, x = input_639_cast_fp16)[name = tensor<string, []>("linear_107_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_643_cast_fp16 = silu(x = linear_107_cast_fp16)[name = tensor<string, []>("input_643_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_11_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(295190336))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(299384704))), name = tensor<string, []>("encoder_module_layers_11_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_108_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_11_feed_forward2_linear2_weight_to_fp16_palettized, x = input_643_cast_fp16)[name = tensor<string, []>("linear_108_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_11_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_11_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(295805056))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(300000512))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(299999424)))];
+            tensor<fp16, [1, 188, 1024]> linear_108_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_11_feed_forward2_linear2_weight_to_fp16_quantized, x = input_643_cast_fp16)[name = tensor<string, []>("linear_108_cast_fp16")];
             tensor<fp16, []> var_2288_to_fp16 = const()[name = tensor<string, []>("op_2288_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_2289_cast_fp16 = mul(x = linear_108_cast_fp16, y = var_2288_to_fp16)[name = tensor<string, []>("op_2289_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_649_cast_fp16 = add(x = input_637_cast_fp16, y = var_2289_cast_fp16)[name = tensor<string, []>("input_649_cast_fp16")];
             tensor<int32, [1]> input_651_axes_0 = const()[name = tensor<string, []>("input_651_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_11_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_11_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(299385280)))];
-            tensor<fp16, [1024]> encoder_module_layers_11_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_11_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(299387392)))];
+            tensor<fp16, [1024]> encoder_module_layers_11_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_11_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(300002624)))];
+            tensor<fp16, [1024]> encoder_module_layers_11_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_11_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(300004736)))];
             tensor<fp16, [1, 188, 1024]> input_651_cast_fp16 = layer_norm(axes = input_651_axes_0, beta = encoder_module_layers_11_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_11_norm_out_weight_to_fp16, x = input_649_cast_fp16)[name = tensor<string, []>("input_651_cast_fp16")];
             tensor<int32, [1]> input_653_axes_0 = const()[name = tensor<string, []>("input_653_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_12_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_12_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(299389504)))];
-            tensor<fp16, [1024]> encoder_module_layers_12_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_12_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(299391616)))];
+            tensor<fp16, [1024]> encoder_module_layers_12_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_12_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(300006848)))];
+            tensor<fp16, [1024]> encoder_module_layers_12_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_12_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(300008960)))];
             tensor<fp16, [1, 188, 1024]> input_653_cast_fp16 = layer_norm(axes = input_653_axes_0, beta = encoder_module_layers_12_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_12_norm_feed_forward1_weight_to_fp16, x = input_651_cast_fp16)[name = tensor<string, []>("input_653_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_12_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(299393728))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303588096))), name = tensor<string, []>("encoder_module_layers_12_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_109_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_12_feed_forward1_linear1_weight_to_fp16_palettized, x = input_653_cast_fp16)[name = tensor<string, []>("linear_109_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_12_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_12_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(300011072))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(304209600))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(304205440)))];
+            tensor<fp16, [1, 188, 4096]> linear_109_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_12_feed_forward1_linear1_weight_to_fp16_quantized, x = input_653_cast_fp16)[name = tensor<string, []>("linear_109_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_657_cast_fp16 = silu(x = linear_109_cast_fp16)[name = tensor<string, []>("input_657_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_12_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303588672))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(307783040))), name = tensor<string, []>("encoder_module_layers_12_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_110_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_12_feed_forward1_linear2_weight_to_fp16_palettized, x = input_657_cast_fp16)[name = tensor<string, []>("linear_110_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_12_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_12_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(304217856))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(308413312))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(308412224)))];
+            tensor<fp16, [1, 188, 1024]> linear_110_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_12_feed_forward1_linear2_weight_to_fp16_quantized, x = input_657_cast_fp16)[name = tensor<string, []>("linear_110_cast_fp16")];
             tensor<fp16, []> var_2317_to_fp16 = const()[name = tensor<string, []>("op_2317_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_2318_cast_fp16 = mul(x = linear_110_cast_fp16, y = var_2317_to_fp16)[name = tensor<string, []>("op_2318_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_663_cast_fp16 = add(x = input_651_cast_fp16, y = var_2318_cast_fp16)[name = tensor<string, []>("input_663_cast_fp16")];
             tensor<int32, [1]> query_25_axes_0 = const()[name = tensor<string, []>("query_25_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_12_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_12_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(307783616)))];
-            tensor<fp16, [1024]> encoder_module_layers_12_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_12_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(307785728)))];
+            tensor<fp16, [1024]> encoder_module_layers_12_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_12_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(308415424)))];
+            tensor<fp16, [1024]> encoder_module_layers_12_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_12_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(308417536)))];
             tensor<fp16, [1, 188, 1024]> query_25_cast_fp16 = layer_norm(axes = query_25_axes_0, beta = encoder_module_layers_12_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_12_norm_self_att_weight_to_fp16, x = input_663_cast_fp16)[name = tensor<string, []>("query_25_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_12_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(307787840))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(308836480))), name = tensor<string, []>("encoder_module_layers_12_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_111_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_12_self_attn_linear_q_weight_to_fp16_palettized, x = query_25_cast_fp16)[name = tensor<string, []>("linear_111_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_12_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_12_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(308419648))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(309469376))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(309468288)))];
+            tensor<fp16, [1, 188, 1024]> linear_111_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_12_self_attn_linear_q_weight_to_fp16_quantized, x = query_25_cast_fp16)[name = tensor<string, []>("linear_111_cast_fp16")];
             tensor<int32, [4]> var_2334 = const()[name = tensor<string, []>("op_2334"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> q_73_cast_fp16 = reshape(shape = var_2334, x = linear_111_cast_fp16)[name = tensor<string, []>("q_73_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_12_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(308837056))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(309885696))), name = tensor<string, []>("encoder_module_layers_12_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_112_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_12_self_attn_linear_k_weight_to_fp16_palettized, x = query_25_cast_fp16)[name = tensor<string, []>("linear_112_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_12_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_12_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(309471488))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(310521216))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(310520128)))];
+            tensor<fp16, [1, 188, 1024]> linear_112_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_12_self_attn_linear_k_weight_to_fp16_quantized, x = query_25_cast_fp16)[name = tensor<string, []>("linear_112_cast_fp16")];
             tensor<int32, [4]> var_2338 = const()[name = tensor<string, []>("op_2338"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> k_49_cast_fp16 = reshape(shape = var_2338, x = linear_112_cast_fp16)[name = tensor<string, []>("k_49_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_12_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(309886272))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(310934912))), name = tensor<string, []>("encoder_module_layers_12_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_113_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_12_self_attn_linear_v_weight_to_fp16_palettized, x = query_25_cast_fp16)[name = tensor<string, []>("linear_113_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_12_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_12_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(310523328))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(311573056))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(311571968)))];
+            tensor<fp16, [1, 188, 1024]> linear_113_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_12_self_attn_linear_v_weight_to_fp16_quantized, x = query_25_cast_fp16)[name = tensor<string, []>("linear_113_cast_fp16")];
             tensor<int32, [4]> var_2342 = const()[name = tensor<string, []>("op_2342"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> v_25_cast_fp16 = reshape(shape = var_2342, x = linear_113_cast_fp16)[name = tensor<string, []>("v_25_cast_fp16")];
             tensor<int32, [4]> value_29_perm_0 = const()[name = tensor<string, []>("value_29_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
-            tensor<fp16, [8, 128]> encoder_module_layers_12_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_12_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(310935488)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_12_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_12_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(311575168)))];
             tensor<fp16, [1, 188, 8, 128]> var_2354_cast_fp16 = add(x = q_73_cast_fp16, y = encoder_module_layers_12_self_attn_pos_bias_u_to_fp16)[name = tensor<string, []>("op_2354_cast_fp16")];
-            tensor<fp16, [8, 128]> encoder_module_layers_12_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_12_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(310937600)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_12_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_12_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(311577280)))];
             tensor<fp16, [1, 188, 8, 128]> var_2356_cast_fp16 = add(x = q_73_cast_fp16, y = encoder_module_layers_12_self_attn_pos_bias_v_to_fp16)[name = tensor<string, []>("op_2356_cast_fp16")];
             tensor<int32, [4]> q_with_bias_v_25_perm_0 = const()[name = tensor<string, []>("q_with_bias_v_25_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
             tensor<bool, []> x_287_transpose_x_0 = const()[name = tensor<string, []>("x_287_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> x_287_transpose_y_0 = const()[name = tensor<string, []>("x_287_transpose_y_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 8, 128, 375]> op_2358_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [384000]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(310939712))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(311323776))), name = tensor<string, []>("op_2358_to_fp16_palettized"), shape = tensor<uint32, [4]>([1, 8, 128, 375])];
+            tensor<fp16, [1, 8, 128, 375]> op_2358_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(3), name = tensor<string, []>("op_2358_to_fp16_quantized"), quantized_data = tensor<int8, [1, 8, 128, 375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(311579392))), scale = tensor<fp16, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(311963904))), zero_point = tensor<int8, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(311963456)))];
             tensor<fp16, [1, 8, 188, 128]> q_with_bias_v_25_cast_fp16 = transpose(perm = q_with_bias_v_25_perm_0, x = var_2356_cast_fp16)[name = tensor<string, []>("transpose_228")];
-            tensor<fp16, [1, 8, 188, 375]> x_287_cast_fp16 = matmul(transpose_x = x_287_transpose_x_0, transpose_y = x_287_transpose_y_0, x = q_with_bias_v_25_cast_fp16, y = op_2358_to_fp16_palettized)[name = tensor<string, []>("x_287_cast_fp16")];
+            tensor<fp16, [1, 8, 188, 375]> x_287_cast_fp16 = matmul(transpose_x = x_287_transpose_x_0, transpose_y = x_287_transpose_y_0, x = q_with_bias_v_25_cast_fp16, y = op_2358_to_fp16_quantized)[name = tensor<string, []>("x_287_cast_fp16")];
             tensor<int32, [8]> x_289_pad_0 = const()[name = tensor<string, []>("x_289_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 1, 0])];
             tensor<string, []> x_289_mode_0 = const()[name = tensor<string, []>("x_289_mode_0"), val = tensor<string, []>("constant")];
             tensor<fp16, []> const_149_to_fp16 = const()[name = tensor<string, []>("const_149_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
@@ -1943,12 +1943,12 @@ program(1.0)
             tensor<int32, [3]> var_2391 = const()[name = tensor<string, []>("op_2391"), val = tensor<int32, [3]>([1, -1, 1024])];
             tensor<fp16, [1, 188, 8, 128]> var_2390_cast_fp16 = transpose(perm = var_2390_perm_0, x = x_293_cast_fp16)[name = tensor<string, []>("transpose_224")];
             tensor<fp16, [1, 188, 1024]> input_667_cast_fp16 = reshape(shape = var_2391, x = var_2390_cast_fp16)[name = tensor<string, []>("input_667_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_12_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(311324352))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(312372992))), name = tensor<string, []>("encoder_module_layers_12_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_115_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_12_self_attn_linear_out_weight_to_fp16_palettized, x = input_667_cast_fp16)[name = tensor<string, []>("linear_115_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_12_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_12_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(311964736))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(313014464))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(313013376)))];
+            tensor<fp16, [1, 188, 1024]> linear_115_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_12_self_attn_linear_out_weight_to_fp16_quantized, x = input_667_cast_fp16)[name = tensor<string, []>("linear_115_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_671_cast_fp16 = add(x = input_663_cast_fp16, y = linear_115_cast_fp16)[name = tensor<string, []>("input_671_cast_fp16")];
             tensor<int32, [1]> x_297_axes_0 = const()[name = tensor<string, []>("x_297_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_12_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_12_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(312373568)))];
-            tensor<fp16, [1024]> encoder_module_layers_12_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_12_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(312375680)))];
+            tensor<fp16, [1024]> encoder_module_layers_12_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_12_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(313016576)))];
+            tensor<fp16, [1024]> encoder_module_layers_12_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_12_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(313018688)))];
             tensor<fp16, [1, 188, 1024]> x_297_cast_fp16 = layer_norm(axes = x_297_axes_0, beta = encoder_module_layers_12_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_12_norm_conv_weight_to_fp16, x = input_671_cast_fp16)[name = tensor<string, []>("x_297_cast_fp16")];
             tensor<int32, [3]> input_673_perm_0 = const()[name = tensor<string, []>("input_673_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<string, []> input_675_pad_type_0 = const()[name = tensor<string, []>("input_675_pad_type_0"), val = tensor<string, []>("valid")];
@@ -1956,9 +1956,9 @@ program(1.0)
             tensor<int32, [2]> input_675_pad_0 = const()[name = tensor<string, []>("input_675_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_675_dilations_0 = const()[name = tensor<string, []>("input_675_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> input_675_groups_0 = const()[name = tensor<string, []>("input_675_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_12_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(312377792))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(314475008))), name = tensor<string, []>("encoder_module_layers_12_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([2048, 1024, 1])];
+            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_12_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_12_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(313020800))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(315120128))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(315118016)))];
             tensor<fp16, [1, 1024, 188]> input_673_cast_fp16 = transpose(perm = input_673_perm_0, x = x_297_cast_fp16)[name = tensor<string, []>("transpose_223")];
-            tensor<fp16, [1, 2048, 188]> input_675_cast_fp16 = conv(dilations = input_675_dilations_0, groups = input_675_groups_0, pad = input_675_pad_0, pad_type = input_675_pad_type_0, strides = input_675_strides_0, weight = encoder_module_layers_12_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_673_cast_fp16)[name = tensor<string, []>("input_675_cast_fp16")];
+            tensor<fp16, [1, 2048, 188]> input_675_cast_fp16 = conv(dilations = input_675_dilations_0, groups = input_675_groups_0, pad = input_675_pad_0, pad_type = input_675_pad_type_0, strides = input_675_strides_0, weight = encoder_module_layers_12_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_673_cast_fp16)[name = tensor<string, []>("input_675_cast_fp16")];
             tensor<int32, []> x_299_split_num_splits_0 = const()[name = tensor<string, []>("x_299_split_num_splits_0"), val = tensor<int32, []>(2)];
             tensor<int32, []> x_299_split_axis_0 = const()[name = tensor<string, []>("x_299_split_axis_0"), val = tensor<int32, []>(1)];
             tensor<fp16, [1, 1024, 188]> x_299_split_cast_fp16_0, tensor<fp16, [1, 1024, 188]> x_299_split_cast_fp16_1 = split(axis = x_299_split_axis_0, num_splits = x_299_split_num_splits_0, x = input_675_cast_fp16)[name = tensor<string, []>("x_299_split_cast_fp16")];
@@ -1974,75 +1974,75 @@ program(1.0)
             tensor<int32, [1]> input_681_strides_0 = const()[name = tensor<string, []>("input_681_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> input_681_pad_0 = const()[name = tensor<string, []>("input_681_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_681_dilations_0 = const()[name = tensor<string, []>("input_681_dilations_0"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1024, 1, 9]> const_287_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [9216]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(314475584))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(314484864))), name = tensor<string, []>("const_287_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1, 9])];
-            tensor<fp16, [1024]> const_288_to_fp16 = const()[name = tensor<string, []>("const_288_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(314485440)))];
-            tensor<fp16, [1, 1024, 188]> input_683_cast_fp16 = conv(bias = const_288_to_fp16, dilations = input_681_dilations_0, groups = input_681_groups_0, pad = input_681_pad_0, pad_type = input_681_pad_type_0, strides = input_681_strides_0, weight = const_287_to_fp16_palettized, x = input_679_cast_fp16)[name = tensor<string, []>("input_683_cast_fp16")];
+            tensor<fp16, [1024, 1, 9]> const_287_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("const_287_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1, 9]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(315124288))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(315134656))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(315133568)))];
+            tensor<fp16, [1024]> const_288_to_fp16 = const()[name = tensor<string, []>("const_288_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(315136768)))];
+            tensor<fp16, [1, 1024, 188]> input_683_cast_fp16 = conv(bias = const_288_to_fp16, dilations = input_681_dilations_0, groups = input_681_groups_0, pad = input_681_pad_0, pad_type = input_681_pad_type_0, strides = input_681_strides_0, weight = const_287_to_fp16_quantized, x = input_679_cast_fp16)[name = tensor<string, []>("input_683_cast_fp16")];
             tensor<fp16, [1, 1024, 188]> input_685_cast_fp16 = silu(x = input_683_cast_fp16)[name = tensor<string, []>("input_685_cast_fp16")];
             tensor<string, []> x_301_pad_type_0 = const()[name = tensor<string, []>("x_301_pad_type_0"), val = tensor<string, []>("valid")];
             tensor<int32, [1]> x_301_strides_0 = const()[name = tensor<string, []>("x_301_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> x_301_pad_0 = const()[name = tensor<string, []>("x_301_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> x_301_dilations_0 = const()[name = tensor<string, []>("x_301_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> x_301_groups_0 = const()[name = tensor<string, []>("x_301_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_12_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(314487552))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(315536192))), name = tensor<string, []>("encoder_module_layers_12_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1024, 1])];
-            tensor<fp16, [1, 1024, 188]> x_301_cast_fp16 = conv(dilations = x_301_dilations_0, groups = x_301_groups_0, pad = x_301_pad_0, pad_type = x_301_pad_type_0, strides = x_301_strides_0, weight = encoder_module_layers_12_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_685_cast_fp16)[name = tensor<string, []>("x_301_cast_fp16")];
+            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_12_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_12_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(315138880))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(316188608))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(316187520)))];
+            tensor<fp16, [1, 1024, 188]> x_301_cast_fp16 = conv(dilations = x_301_dilations_0, groups = x_301_groups_0, pad = x_301_pad_0, pad_type = x_301_pad_type_0, strides = x_301_strides_0, weight = encoder_module_layers_12_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_685_cast_fp16)[name = tensor<string, []>("x_301_cast_fp16")];
             tensor<int32, [3]> input_687_perm_0 = const()[name = tensor<string, []>("input_687_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<fp16, [1, 188, 1024]> input_687_cast_fp16 = transpose(perm = input_687_perm_0, x = x_301_cast_fp16)[name = tensor<string, []>("transpose_222")];
             tensor<fp16, [1, 188, 1024]> input_689_cast_fp16 = add(x = input_671_cast_fp16, y = input_687_cast_fp16)[name = tensor<string, []>("input_689_cast_fp16")];
             tensor<int32, [1]> input_691_axes_0 = const()[name = tensor<string, []>("input_691_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_12_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_12_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(315536768)))];
-            tensor<fp16, [1024]> encoder_module_layers_12_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_12_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(315538880)))];
+            tensor<fp16, [1024]> encoder_module_layers_12_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_12_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(316190720)))];
+            tensor<fp16, [1024]> encoder_module_layers_12_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_12_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(316192832)))];
             tensor<fp16, [1, 188, 1024]> input_691_cast_fp16 = layer_norm(axes = input_691_axes_0, beta = encoder_module_layers_12_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_12_norm_feed_forward2_weight_to_fp16, x = input_689_cast_fp16)[name = tensor<string, []>("input_691_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_12_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(315540992))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(319735360))), name = tensor<string, []>("encoder_module_layers_12_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_116_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_12_feed_forward2_linear1_weight_to_fp16_palettized, x = input_691_cast_fp16)[name = tensor<string, []>("linear_116_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_12_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_12_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(316194944))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(320393472))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(320389312)))];
+            tensor<fp16, [1, 188, 4096]> linear_116_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_12_feed_forward2_linear1_weight_to_fp16_quantized, x = input_691_cast_fp16)[name = tensor<string, []>("linear_116_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_695_cast_fp16 = silu(x = linear_116_cast_fp16)[name = tensor<string, []>("input_695_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_12_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(319735936))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(323930304))), name = tensor<string, []>("encoder_module_layers_12_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_117_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_12_feed_forward2_linear2_weight_to_fp16_palettized, x = input_695_cast_fp16)[name = tensor<string, []>("linear_117_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_12_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_12_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(320401728))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(324597184))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(324596096)))];
+            tensor<fp16, [1, 188, 1024]> linear_117_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_12_feed_forward2_linear2_weight_to_fp16_quantized, x = input_695_cast_fp16)[name = tensor<string, []>("linear_117_cast_fp16")];
             tensor<fp16, []> var_2451_to_fp16 = const()[name = tensor<string, []>("op_2451_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_2452_cast_fp16 = mul(x = linear_117_cast_fp16, y = var_2451_to_fp16)[name = tensor<string, []>("op_2452_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_701_cast_fp16 = add(x = input_689_cast_fp16, y = var_2452_cast_fp16)[name = tensor<string, []>("input_701_cast_fp16")];
             tensor<int32, [1]> input_703_axes_0 = const()[name = tensor<string, []>("input_703_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_12_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_12_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(323930880)))];
-            tensor<fp16, [1024]> encoder_module_layers_12_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_12_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(323932992)))];
+            tensor<fp16, [1024]> encoder_module_layers_12_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_12_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(324599296)))];
+            tensor<fp16, [1024]> encoder_module_layers_12_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_12_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(324601408)))];
             tensor<fp16, [1, 188, 1024]> input_703_cast_fp16 = layer_norm(axes = input_703_axes_0, beta = encoder_module_layers_12_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_12_norm_out_weight_to_fp16, x = input_701_cast_fp16)[name = tensor<string, []>("input_703_cast_fp16")];
             tensor<int32, [1]> input_705_axes_0 = const()[name = tensor<string, []>("input_705_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_13_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_13_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(323935104)))];
-            tensor<fp16, [1024]> encoder_module_layers_13_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_13_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(323937216)))];
+            tensor<fp16, [1024]> encoder_module_layers_13_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_13_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(324603520)))];
+            tensor<fp16, [1024]> encoder_module_layers_13_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_13_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(324605632)))];
             tensor<fp16, [1, 188, 1024]> input_705_cast_fp16 = layer_norm(axes = input_705_axes_0, beta = encoder_module_layers_13_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_13_norm_feed_forward1_weight_to_fp16, x = input_703_cast_fp16)[name = tensor<string, []>("input_705_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_13_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(323939328))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(328133696))), name = tensor<string, []>("encoder_module_layers_13_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_118_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_13_feed_forward1_linear1_weight_to_fp16_palettized, x = input_705_cast_fp16)[name = tensor<string, []>("linear_118_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_13_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_13_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(324607744))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(328806272))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(328802112)))];
+            tensor<fp16, [1, 188, 4096]> linear_118_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_13_feed_forward1_linear1_weight_to_fp16_quantized, x = input_705_cast_fp16)[name = tensor<string, []>("linear_118_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_709_cast_fp16 = silu(x = linear_118_cast_fp16)[name = tensor<string, []>("input_709_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_13_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(328134272))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(332328640))), name = tensor<string, []>("encoder_module_layers_13_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_119_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_13_feed_forward1_linear2_weight_to_fp16_palettized, x = input_709_cast_fp16)[name = tensor<string, []>("linear_119_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_13_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_13_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(328814528))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(333009984))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(333008896)))];
+            tensor<fp16, [1, 188, 1024]> linear_119_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_13_feed_forward1_linear2_weight_to_fp16_quantized, x = input_709_cast_fp16)[name = tensor<string, []>("linear_119_cast_fp16")];
             tensor<fp16, []> var_2480_to_fp16 = const()[name = tensor<string, []>("op_2480_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_2481_cast_fp16 = mul(x = linear_119_cast_fp16, y = var_2480_to_fp16)[name = tensor<string, []>("op_2481_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_715_cast_fp16 = add(x = input_703_cast_fp16, y = var_2481_cast_fp16)[name = tensor<string, []>("input_715_cast_fp16")];
             tensor<int32, [1]> query_27_axes_0 = const()[name = tensor<string, []>("query_27_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_13_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_13_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(332329216)))];
-            tensor<fp16, [1024]> encoder_module_layers_13_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_13_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(332331328)))];
+            tensor<fp16, [1024]> encoder_module_layers_13_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_13_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(333012096)))];
+            tensor<fp16, [1024]> encoder_module_layers_13_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_13_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(333014208)))];
             tensor<fp16, [1, 188, 1024]> query_27_cast_fp16 = layer_norm(axes = query_27_axes_0, beta = encoder_module_layers_13_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_13_norm_self_att_weight_to_fp16, x = input_715_cast_fp16)[name = tensor<string, []>("query_27_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_13_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(332333440))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(333382080))), name = tensor<string, []>("encoder_module_layers_13_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_120_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_13_self_attn_linear_q_weight_to_fp16_palettized, x = query_27_cast_fp16)[name = tensor<string, []>("linear_120_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_13_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_13_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(333016320))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(334066048))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(334064960)))];
+            tensor<fp16, [1, 188, 1024]> linear_120_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_13_self_attn_linear_q_weight_to_fp16_quantized, x = query_27_cast_fp16)[name = tensor<string, []>("linear_120_cast_fp16")];
             tensor<int32, [4]> var_2497 = const()[name = tensor<string, []>("op_2497"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> q_79_cast_fp16 = reshape(shape = var_2497, x = linear_120_cast_fp16)[name = tensor<string, []>("q_79_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_13_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(333382656))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(334431296))), name = tensor<string, []>("encoder_module_layers_13_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_121_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_13_self_attn_linear_k_weight_to_fp16_palettized, x = query_27_cast_fp16)[name = tensor<string, []>("linear_121_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_13_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_13_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(334068160))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(335117888))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(335116800)))];
+            tensor<fp16, [1, 188, 1024]> linear_121_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_13_self_attn_linear_k_weight_to_fp16_quantized, x = query_27_cast_fp16)[name = tensor<string, []>("linear_121_cast_fp16")];
             tensor<int32, [4]> var_2501 = const()[name = tensor<string, []>("op_2501"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> k_53_cast_fp16 = reshape(shape = var_2501, x = linear_121_cast_fp16)[name = tensor<string, []>("k_53_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_13_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(334431872))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(335480512))), name = tensor<string, []>("encoder_module_layers_13_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_122_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_13_self_attn_linear_v_weight_to_fp16_palettized, x = query_27_cast_fp16)[name = tensor<string, []>("linear_122_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_13_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_13_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(335120000))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(336169728))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(336168640)))];
+            tensor<fp16, [1, 188, 1024]> linear_122_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_13_self_attn_linear_v_weight_to_fp16_quantized, x = query_27_cast_fp16)[name = tensor<string, []>("linear_122_cast_fp16")];
             tensor<int32, [4]> var_2505 = const()[name = tensor<string, []>("op_2505"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> v_27_cast_fp16 = reshape(shape = var_2505, x = linear_122_cast_fp16)[name = tensor<string, []>("v_27_cast_fp16")];
             tensor<int32, [4]> value_31_perm_0 = const()[name = tensor<string, []>("value_31_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
-            tensor<fp16, [8, 128]> encoder_module_layers_13_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_13_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(335481088)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_13_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_13_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(336171840)))];
             tensor<fp16, [1, 188, 8, 128]> var_2517_cast_fp16 = add(x = q_79_cast_fp16, y = encoder_module_layers_13_self_attn_pos_bias_u_to_fp16)[name = tensor<string, []>("op_2517_cast_fp16")];
-            tensor<fp16, [8, 128]> encoder_module_layers_13_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_13_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(335483200)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_13_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_13_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(336173952)))];
             tensor<fp16, [1, 188, 8, 128]> var_2519_cast_fp16 = add(x = q_79_cast_fp16, y = encoder_module_layers_13_self_attn_pos_bias_v_to_fp16)[name = tensor<string, []>("op_2519_cast_fp16")];
             tensor<int32, [4]> q_with_bias_v_27_perm_0 = const()[name = tensor<string, []>("q_with_bias_v_27_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
             tensor<bool, []> x_309_transpose_x_0 = const()[name = tensor<string, []>("x_309_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> x_309_transpose_y_0 = const()[name = tensor<string, []>("x_309_transpose_y_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 8, 128, 375]> op_2521_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [384000]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(335485312))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(335869376))), name = tensor<string, []>("op_2521_to_fp16_palettized"), shape = tensor<uint32, [4]>([1, 8, 128, 375])];
+            tensor<fp16, [1, 8, 128, 375]> op_2521_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(3), name = tensor<string, []>("op_2521_to_fp16_quantized"), quantized_data = tensor<int8, [1, 8, 128, 375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(336176064))), scale = tensor<fp16, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(336560576))), zero_point = tensor<int8, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(336560128)))];
             tensor<fp16, [1, 8, 188, 128]> q_with_bias_v_27_cast_fp16 = transpose(perm = q_with_bias_v_27_perm_0, x = var_2519_cast_fp16)[name = tensor<string, []>("transpose_221")];
-            tensor<fp16, [1, 8, 188, 375]> x_309_cast_fp16 = matmul(transpose_x = x_309_transpose_x_0, transpose_y = x_309_transpose_y_0, x = q_with_bias_v_27_cast_fp16, y = op_2521_to_fp16_palettized)[name = tensor<string, []>("x_309_cast_fp16")];
+            tensor<fp16, [1, 8, 188, 375]> x_309_cast_fp16 = matmul(transpose_x = x_309_transpose_x_0, transpose_y = x_309_transpose_y_0, x = q_with_bias_v_27_cast_fp16, y = op_2521_to_fp16_quantized)[name = tensor<string, []>("x_309_cast_fp16")];
             tensor<int32, [8]> x_311_pad_0 = const()[name = tensor<string, []>("x_311_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 1, 0])];
             tensor<string, []> x_311_mode_0 = const()[name = tensor<string, []>("x_311_mode_0"), val = tensor<string, []>("constant")];
             tensor<fp16, []> const_159_to_fp16 = const()[name = tensor<string, []>("const_159_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
@@ -2080,12 +2080,12 @@ program(1.0)
             tensor<int32, [3]> var_2554 = const()[name = tensor<string, []>("op_2554"), val = tensor<int32, [3]>([1, -1, 1024])];
             tensor<fp16, [1, 188, 8, 128]> var_2553_cast_fp16 = transpose(perm = var_2553_perm_0, x = x_315_cast_fp16)[name = tensor<string, []>("transpose_217")];
             tensor<fp16, [1, 188, 1024]> input_719_cast_fp16 = reshape(shape = var_2554, x = var_2553_cast_fp16)[name = tensor<string, []>("input_719_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_13_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(335869952))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(336918592))), name = tensor<string, []>("encoder_module_layers_13_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_124_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_13_self_attn_linear_out_weight_to_fp16_palettized, x = input_719_cast_fp16)[name = tensor<string, []>("linear_124_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_13_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_13_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(336561408))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(337611136))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(337610048)))];
+            tensor<fp16, [1, 188, 1024]> linear_124_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_13_self_attn_linear_out_weight_to_fp16_quantized, x = input_719_cast_fp16)[name = tensor<string, []>("linear_124_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_723_cast_fp16 = add(x = input_715_cast_fp16, y = linear_124_cast_fp16)[name = tensor<string, []>("input_723_cast_fp16")];
             tensor<int32, [1]> x_319_axes_0 = const()[name = tensor<string, []>("x_319_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_13_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_13_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(336919168)))];
-            tensor<fp16, [1024]> encoder_module_layers_13_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_13_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(336921280)))];
+            tensor<fp16, [1024]> encoder_module_layers_13_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_13_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(337613248)))];
+            tensor<fp16, [1024]> encoder_module_layers_13_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_13_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(337615360)))];
             tensor<fp16, [1, 188, 1024]> x_319_cast_fp16 = layer_norm(axes = x_319_axes_0, beta = encoder_module_layers_13_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_13_norm_conv_weight_to_fp16, x = input_723_cast_fp16)[name = tensor<string, []>("x_319_cast_fp16")];
             tensor<int32, [3]> input_725_perm_0 = const()[name = tensor<string, []>("input_725_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<string, []> input_727_pad_type_0 = const()[name = tensor<string, []>("input_727_pad_type_0"), val = tensor<string, []>("valid")];
@@ -2093,9 +2093,9 @@ program(1.0)
             tensor<int32, [2]> input_727_pad_0 = const()[name = tensor<string, []>("input_727_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_727_dilations_0 = const()[name = tensor<string, []>("input_727_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> input_727_groups_0 = const()[name = tensor<string, []>("input_727_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_13_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(336923392))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(339020608))), name = tensor<string, []>("encoder_module_layers_13_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([2048, 1024, 1])];
+            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_13_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_13_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(337617472))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(339716800))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(339714688)))];
             tensor<fp16, [1, 1024, 188]> input_725_cast_fp16 = transpose(perm = input_725_perm_0, x = x_319_cast_fp16)[name = tensor<string, []>("transpose_216")];
-            tensor<fp16, [1, 2048, 188]> input_727_cast_fp16 = conv(dilations = input_727_dilations_0, groups = input_727_groups_0, pad = input_727_pad_0, pad_type = input_727_pad_type_0, strides = input_727_strides_0, weight = encoder_module_layers_13_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_725_cast_fp16)[name = tensor<string, []>("input_727_cast_fp16")];
+            tensor<fp16, [1, 2048, 188]> input_727_cast_fp16 = conv(dilations = input_727_dilations_0, groups = input_727_groups_0, pad = input_727_pad_0, pad_type = input_727_pad_type_0, strides = input_727_strides_0, weight = encoder_module_layers_13_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_725_cast_fp16)[name = tensor<string, []>("input_727_cast_fp16")];
             tensor<int32, []> x_321_split_num_splits_0 = const()[name = tensor<string, []>("x_321_split_num_splits_0"), val = tensor<int32, []>(2)];
             tensor<int32, []> x_321_split_axis_0 = const()[name = tensor<string, []>("x_321_split_axis_0"), val = tensor<int32, []>(1)];
             tensor<fp16, [1, 1024, 188]> x_321_split_cast_fp16_0, tensor<fp16, [1, 1024, 188]> x_321_split_cast_fp16_1 = split(axis = x_321_split_axis_0, num_splits = x_321_split_num_splits_0, x = input_727_cast_fp16)[name = tensor<string, []>("x_321_split_cast_fp16")];
@@ -2111,75 +2111,75 @@ program(1.0)
             tensor<int32, [1]> input_733_strides_0 = const()[name = tensor<string, []>("input_733_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> input_733_pad_0 = const()[name = tensor<string, []>("input_733_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_733_dilations_0 = const()[name = tensor<string, []>("input_733_dilations_0"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1024, 1, 9]> const_289_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [9216]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(339021184))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(339030464))), name = tensor<string, []>("const_289_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1, 9])];
-            tensor<fp16, [1024]> const_290_to_fp16 = const()[name = tensor<string, []>("const_290_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(339031040)))];
-            tensor<fp16, [1, 1024, 188]> input_735_cast_fp16 = conv(bias = const_290_to_fp16, dilations = input_733_dilations_0, groups = input_733_groups_0, pad = input_733_pad_0, pad_type = input_733_pad_type_0, strides = input_733_strides_0, weight = const_289_to_fp16_palettized, x = input_731_cast_fp16)[name = tensor<string, []>("input_735_cast_fp16")];
+            tensor<fp16, [1024, 1, 9]> const_289_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("const_289_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1, 9]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(339720960))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(339731328))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(339730240)))];
+            tensor<fp16, [1024]> const_290_to_fp16 = const()[name = tensor<string, []>("const_290_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(339733440)))];
+            tensor<fp16, [1, 1024, 188]> input_735_cast_fp16 = conv(bias = const_290_to_fp16, dilations = input_733_dilations_0, groups = input_733_groups_0, pad = input_733_pad_0, pad_type = input_733_pad_type_0, strides = input_733_strides_0, weight = const_289_to_fp16_quantized, x = input_731_cast_fp16)[name = tensor<string, []>("input_735_cast_fp16")];
             tensor<fp16, [1, 1024, 188]> input_737_cast_fp16 = silu(x = input_735_cast_fp16)[name = tensor<string, []>("input_737_cast_fp16")];
             tensor<string, []> x_323_pad_type_0 = const()[name = tensor<string, []>("x_323_pad_type_0"), val = tensor<string, []>("valid")];
             tensor<int32, [1]> x_323_strides_0 = const()[name = tensor<string, []>("x_323_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> x_323_pad_0 = const()[name = tensor<string, []>("x_323_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> x_323_dilations_0 = const()[name = tensor<string, []>("x_323_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> x_323_groups_0 = const()[name = tensor<string, []>("x_323_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_13_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(339033152))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(340081792))), name = tensor<string, []>("encoder_module_layers_13_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1024, 1])];
-            tensor<fp16, [1, 1024, 188]> x_323_cast_fp16 = conv(dilations = x_323_dilations_0, groups = x_323_groups_0, pad = x_323_pad_0, pad_type = x_323_pad_type_0, strides = x_323_strides_0, weight = encoder_module_layers_13_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_737_cast_fp16)[name = tensor<string, []>("x_323_cast_fp16")];
+            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_13_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_13_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(339735552))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(340785280))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(340784192)))];
+            tensor<fp16, [1, 1024, 188]> x_323_cast_fp16 = conv(dilations = x_323_dilations_0, groups = x_323_groups_0, pad = x_323_pad_0, pad_type = x_323_pad_type_0, strides = x_323_strides_0, weight = encoder_module_layers_13_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_737_cast_fp16)[name = tensor<string, []>("x_323_cast_fp16")];
             tensor<int32, [3]> input_739_perm_0 = const()[name = tensor<string, []>("input_739_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<fp16, [1, 188, 1024]> input_739_cast_fp16 = transpose(perm = input_739_perm_0, x = x_323_cast_fp16)[name = tensor<string, []>("transpose_215")];
             tensor<fp16, [1, 188, 1024]> input_741_cast_fp16 = add(x = input_723_cast_fp16, y = input_739_cast_fp16)[name = tensor<string, []>("input_741_cast_fp16")];
             tensor<int32, [1]> input_743_axes_0 = const()[name = tensor<string, []>("input_743_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_13_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_13_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(340082368)))];
-            tensor<fp16, [1024]> encoder_module_layers_13_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_13_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(340084480)))];
+            tensor<fp16, [1024]> encoder_module_layers_13_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_13_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(340787392)))];
+            tensor<fp16, [1024]> encoder_module_layers_13_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_13_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(340789504)))];
             tensor<fp16, [1, 188, 1024]> input_743_cast_fp16 = layer_norm(axes = input_743_axes_0, beta = encoder_module_layers_13_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_13_norm_feed_forward2_weight_to_fp16, x = input_741_cast_fp16)[name = tensor<string, []>("input_743_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_13_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(340086592))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(344280960))), name = tensor<string, []>("encoder_module_layers_13_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_125_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_13_feed_forward2_linear1_weight_to_fp16_palettized, x = input_743_cast_fp16)[name = tensor<string, []>("linear_125_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_13_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_13_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(340791616))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(344990144))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(344985984)))];
+            tensor<fp16, [1, 188, 4096]> linear_125_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_13_feed_forward2_linear1_weight_to_fp16_quantized, x = input_743_cast_fp16)[name = tensor<string, []>("linear_125_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_747_cast_fp16 = silu(x = linear_125_cast_fp16)[name = tensor<string, []>("input_747_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_13_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(344281536))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(348475904))), name = tensor<string, []>("encoder_module_layers_13_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_126_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_13_feed_forward2_linear2_weight_to_fp16_palettized, x = input_747_cast_fp16)[name = tensor<string, []>("linear_126_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_13_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_13_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(344998400))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(349193856))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(349192768)))];
+            tensor<fp16, [1, 188, 1024]> linear_126_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_13_feed_forward2_linear2_weight_to_fp16_quantized, x = input_747_cast_fp16)[name = tensor<string, []>("linear_126_cast_fp16")];
             tensor<fp16, []> var_2614_to_fp16 = const()[name = tensor<string, []>("op_2614_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_2615_cast_fp16 = mul(x = linear_126_cast_fp16, y = var_2614_to_fp16)[name = tensor<string, []>("op_2615_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_753_cast_fp16 = add(x = input_741_cast_fp16, y = var_2615_cast_fp16)[name = tensor<string, []>("input_753_cast_fp16")];
             tensor<int32, [1]> input_755_axes_0 = const()[name = tensor<string, []>("input_755_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_13_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_13_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(348476480)))];
-            tensor<fp16, [1024]> encoder_module_layers_13_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_13_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(348478592)))];
+            tensor<fp16, [1024]> encoder_module_layers_13_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_13_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(349195968)))];
+            tensor<fp16, [1024]> encoder_module_layers_13_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_13_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(349198080)))];
             tensor<fp16, [1, 188, 1024]> input_755_cast_fp16 = layer_norm(axes = input_755_axes_0, beta = encoder_module_layers_13_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_13_norm_out_weight_to_fp16, x = input_753_cast_fp16)[name = tensor<string, []>("input_755_cast_fp16")];
             tensor<int32, [1]> input_757_axes_0 = const()[name = tensor<string, []>("input_757_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_14_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_14_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(348480704)))];
-            tensor<fp16, [1024]> encoder_module_layers_14_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_14_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(348482816)))];
+            tensor<fp16, [1024]> encoder_module_layers_14_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_14_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(349200192)))];
+            tensor<fp16, [1024]> encoder_module_layers_14_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_14_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(349202304)))];
             tensor<fp16, [1, 188, 1024]> input_757_cast_fp16 = layer_norm(axes = input_757_axes_0, beta = encoder_module_layers_14_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_14_norm_feed_forward1_weight_to_fp16, x = input_755_cast_fp16)[name = tensor<string, []>("input_757_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_14_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(348484928))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(352679296))), name = tensor<string, []>("encoder_module_layers_14_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_127_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_14_feed_forward1_linear1_weight_to_fp16_palettized, x = input_757_cast_fp16)[name = tensor<string, []>("linear_127_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_14_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_14_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(349204416))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(353402944))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(353398784)))];
+            tensor<fp16, [1, 188, 4096]> linear_127_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_14_feed_forward1_linear1_weight_to_fp16_quantized, x = input_757_cast_fp16)[name = tensor<string, []>("linear_127_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_761_cast_fp16 = silu(x = linear_127_cast_fp16)[name = tensor<string, []>("input_761_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_14_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(352679872))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(356874240))), name = tensor<string, []>("encoder_module_layers_14_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_128_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_14_feed_forward1_linear2_weight_to_fp16_palettized, x = input_761_cast_fp16)[name = tensor<string, []>("linear_128_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_14_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_14_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(353411200))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(357606656))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(357605568)))];
+            tensor<fp16, [1, 188, 1024]> linear_128_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_14_feed_forward1_linear2_weight_to_fp16_quantized, x = input_761_cast_fp16)[name = tensor<string, []>("linear_128_cast_fp16")];
             tensor<fp16, []> var_2643_to_fp16 = const()[name = tensor<string, []>("op_2643_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_2644_cast_fp16 = mul(x = linear_128_cast_fp16, y = var_2643_to_fp16)[name = tensor<string, []>("op_2644_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_767_cast_fp16 = add(x = input_755_cast_fp16, y = var_2644_cast_fp16)[name = tensor<string, []>("input_767_cast_fp16")];
             tensor<int32, [1]> query_29_axes_0 = const()[name = tensor<string, []>("query_29_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_14_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_14_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(356874816)))];
-            tensor<fp16, [1024]> encoder_module_layers_14_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_14_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(356876928)))];
+            tensor<fp16, [1024]> encoder_module_layers_14_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_14_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(357608768)))];
+            tensor<fp16, [1024]> encoder_module_layers_14_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_14_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(357610880)))];
             tensor<fp16, [1, 188, 1024]> query_29_cast_fp16 = layer_norm(axes = query_29_axes_0, beta = encoder_module_layers_14_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_14_norm_self_att_weight_to_fp16, x = input_767_cast_fp16)[name = tensor<string, []>("query_29_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_14_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(356879040))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(357927680))), name = tensor<string, []>("encoder_module_layers_14_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_129_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_14_self_attn_linear_q_weight_to_fp16_palettized, x = query_29_cast_fp16)[name = tensor<string, []>("linear_129_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_14_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_14_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(357612992))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(358662720))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(358661632)))];
+            tensor<fp16, [1, 188, 1024]> linear_129_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_14_self_attn_linear_q_weight_to_fp16_quantized, x = query_29_cast_fp16)[name = tensor<string, []>("linear_129_cast_fp16")];
             tensor<int32, [4]> var_2660 = const()[name = tensor<string, []>("op_2660"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> q_85_cast_fp16 = reshape(shape = var_2660, x = linear_129_cast_fp16)[name = tensor<string, []>("q_85_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_14_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(357928256))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(358976896))), name = tensor<string, []>("encoder_module_layers_14_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_130_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_14_self_attn_linear_k_weight_to_fp16_palettized, x = query_29_cast_fp16)[name = tensor<string, []>("linear_130_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_14_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_14_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(358664832))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(359714560))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(359713472)))];
+            tensor<fp16, [1, 188, 1024]> linear_130_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_14_self_attn_linear_k_weight_to_fp16_quantized, x = query_29_cast_fp16)[name = tensor<string, []>("linear_130_cast_fp16")];
             tensor<int32, [4]> var_2664 = const()[name = tensor<string, []>("op_2664"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> k_57_cast_fp16 = reshape(shape = var_2664, x = linear_130_cast_fp16)[name = tensor<string, []>("k_57_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_14_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(358977472))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(360026112))), name = tensor<string, []>("encoder_module_layers_14_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_131_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_14_self_attn_linear_v_weight_to_fp16_palettized, x = query_29_cast_fp16)[name = tensor<string, []>("linear_131_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_14_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_14_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(359716672))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(360766400))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(360765312)))];
+            tensor<fp16, [1, 188, 1024]> linear_131_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_14_self_attn_linear_v_weight_to_fp16_quantized, x = query_29_cast_fp16)[name = tensor<string, []>("linear_131_cast_fp16")];
             tensor<int32, [4]> var_2668 = const()[name = tensor<string, []>("op_2668"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> v_29_cast_fp16 = reshape(shape = var_2668, x = linear_131_cast_fp16)[name = tensor<string, []>("v_29_cast_fp16")];
             tensor<int32, [4]> value_33_perm_0 = const()[name = tensor<string, []>("value_33_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
-            tensor<fp16, [8, 128]> encoder_module_layers_14_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_14_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(360026688)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_14_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_14_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(360768512)))];
             tensor<fp16, [1, 188, 8, 128]> var_2680_cast_fp16 = add(x = q_85_cast_fp16, y = encoder_module_layers_14_self_attn_pos_bias_u_to_fp16)[name = tensor<string, []>("op_2680_cast_fp16")];
-            tensor<fp16, [8, 128]> encoder_module_layers_14_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_14_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(360028800)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_14_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_14_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(360770624)))];
             tensor<fp16, [1, 188, 8, 128]> var_2682_cast_fp16 = add(x = q_85_cast_fp16, y = encoder_module_layers_14_self_attn_pos_bias_v_to_fp16)[name = tensor<string, []>("op_2682_cast_fp16")];
             tensor<int32, [4]> q_with_bias_v_29_perm_0 = const()[name = tensor<string, []>("q_with_bias_v_29_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
             tensor<bool, []> x_331_transpose_x_0 = const()[name = tensor<string, []>("x_331_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> x_331_transpose_y_0 = const()[name = tensor<string, []>("x_331_transpose_y_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 8, 128, 375]> op_2684_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [384000]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(360030912))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(360414976))), name = tensor<string, []>("op_2684_to_fp16_palettized"), shape = tensor<uint32, [4]>([1, 8, 128, 375])];
+            tensor<fp16, [1, 8, 128, 375]> op_2684_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(3), name = tensor<string, []>("op_2684_to_fp16_quantized"), quantized_data = tensor<int8, [1, 8, 128, 375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(360772736))), scale = tensor<fp16, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(361157248))), zero_point = tensor<int8, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(361156800)))];
             tensor<fp16, [1, 8, 188, 128]> q_with_bias_v_29_cast_fp16 = transpose(perm = q_with_bias_v_29_perm_0, x = var_2682_cast_fp16)[name = tensor<string, []>("transpose_214")];
-            tensor<fp16, [1, 8, 188, 375]> x_331_cast_fp16 = matmul(transpose_x = x_331_transpose_x_0, transpose_y = x_331_transpose_y_0, x = q_with_bias_v_29_cast_fp16, y = op_2684_to_fp16_palettized)[name = tensor<string, []>("x_331_cast_fp16")];
+            tensor<fp16, [1, 8, 188, 375]> x_331_cast_fp16 = matmul(transpose_x = x_331_transpose_x_0, transpose_y = x_331_transpose_y_0, x = q_with_bias_v_29_cast_fp16, y = op_2684_to_fp16_quantized)[name = tensor<string, []>("x_331_cast_fp16")];
             tensor<int32, [8]> x_333_pad_0 = const()[name = tensor<string, []>("x_333_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 1, 0])];
             tensor<string, []> x_333_mode_0 = const()[name = tensor<string, []>("x_333_mode_0"), val = tensor<string, []>("constant")];
             tensor<fp16, []> const_169_to_fp16 = const()[name = tensor<string, []>("const_169_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
@@ -2217,12 +2217,12 @@ program(1.0)
             tensor<int32, [3]> var_2717 = const()[name = tensor<string, []>("op_2717"), val = tensor<int32, [3]>([1, -1, 1024])];
             tensor<fp16, [1, 188, 8, 128]> var_2716_cast_fp16 = transpose(perm = var_2716_perm_0, x = x_337_cast_fp16)[name = tensor<string, []>("transpose_210")];
             tensor<fp16, [1, 188, 1024]> input_771_cast_fp16 = reshape(shape = var_2717, x = var_2716_cast_fp16)[name = tensor<string, []>("input_771_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_14_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(360415552))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(361464192))), name = tensor<string, []>("encoder_module_layers_14_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_133_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_14_self_attn_linear_out_weight_to_fp16_palettized, x = input_771_cast_fp16)[name = tensor<string, []>("linear_133_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_14_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_14_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(361158080))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(362207808))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(362206720)))];
+            tensor<fp16, [1, 188, 1024]> linear_133_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_14_self_attn_linear_out_weight_to_fp16_quantized, x = input_771_cast_fp16)[name = tensor<string, []>("linear_133_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_775_cast_fp16 = add(x = input_767_cast_fp16, y = linear_133_cast_fp16)[name = tensor<string, []>("input_775_cast_fp16")];
             tensor<int32, [1]> x_341_axes_0 = const()[name = tensor<string, []>("x_341_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_14_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_14_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(361464768)))];
-            tensor<fp16, [1024]> encoder_module_layers_14_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_14_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(361466880)))];
+            tensor<fp16, [1024]> encoder_module_layers_14_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_14_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(362209920)))];
+            tensor<fp16, [1024]> encoder_module_layers_14_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_14_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(362212032)))];
             tensor<fp16, [1, 188, 1024]> x_341_cast_fp16 = layer_norm(axes = x_341_axes_0, beta = encoder_module_layers_14_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_14_norm_conv_weight_to_fp16, x = input_775_cast_fp16)[name = tensor<string, []>("x_341_cast_fp16")];
             tensor<int32, [3]> input_777_perm_0 = const()[name = tensor<string, []>("input_777_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<string, []> input_779_pad_type_0 = const()[name = tensor<string, []>("input_779_pad_type_0"), val = tensor<string, []>("valid")];
@@ -2230,9 +2230,9 @@ program(1.0)
             tensor<int32, [2]> input_779_pad_0 = const()[name = tensor<string, []>("input_779_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_779_dilations_0 = const()[name = tensor<string, []>("input_779_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> input_779_groups_0 = const()[name = tensor<string, []>("input_779_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_14_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(361468992))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(363566208))), name = tensor<string, []>("encoder_module_layers_14_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([2048, 1024, 1])];
+            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_14_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_14_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(362214144))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(364313472))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(364311360)))];
             tensor<fp16, [1, 1024, 188]> input_777_cast_fp16 = transpose(perm = input_777_perm_0, x = x_341_cast_fp16)[name = tensor<string, []>("transpose_209")];
-            tensor<fp16, [1, 2048, 188]> input_779_cast_fp16 = conv(dilations = input_779_dilations_0, groups = input_779_groups_0, pad = input_779_pad_0, pad_type = input_779_pad_type_0, strides = input_779_strides_0, weight = encoder_module_layers_14_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_777_cast_fp16)[name = tensor<string, []>("input_779_cast_fp16")];
+            tensor<fp16, [1, 2048, 188]> input_779_cast_fp16 = conv(dilations = input_779_dilations_0, groups = input_779_groups_0, pad = input_779_pad_0, pad_type = input_779_pad_type_0, strides = input_779_strides_0, weight = encoder_module_layers_14_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_777_cast_fp16)[name = tensor<string, []>("input_779_cast_fp16")];
             tensor<int32, []> x_343_split_num_splits_0 = const()[name = tensor<string, []>("x_343_split_num_splits_0"), val = tensor<int32, []>(2)];
             tensor<int32, []> x_343_split_axis_0 = const()[name = tensor<string, []>("x_343_split_axis_0"), val = tensor<int32, []>(1)];
             tensor<fp16, [1, 1024, 188]> x_343_split_cast_fp16_0, tensor<fp16, [1, 1024, 188]> x_343_split_cast_fp16_1 = split(axis = x_343_split_axis_0, num_splits = x_343_split_num_splits_0, x = input_779_cast_fp16)[name = tensor<string, []>("x_343_split_cast_fp16")];
@@ -2248,75 +2248,75 @@ program(1.0)
             tensor<int32, [1]> input_785_strides_0 = const()[name = tensor<string, []>("input_785_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> input_785_pad_0 = const()[name = tensor<string, []>("input_785_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_785_dilations_0 = const()[name = tensor<string, []>("input_785_dilations_0"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1024, 1, 9]> const_291_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [9216]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(363566784))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(363576064))), name = tensor<string, []>("const_291_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1, 9])];
-            tensor<fp16, [1024]> const_292_to_fp16 = const()[name = tensor<string, []>("const_292_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(363576640)))];
-            tensor<fp16, [1, 1024, 188]> input_787_cast_fp16 = conv(bias = const_292_to_fp16, dilations = input_785_dilations_0, groups = input_785_groups_0, pad = input_785_pad_0, pad_type = input_785_pad_type_0, strides = input_785_strides_0, weight = const_291_to_fp16_palettized, x = input_783_cast_fp16)[name = tensor<string, []>("input_787_cast_fp16")];
+            tensor<fp16, [1024, 1, 9]> const_291_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("const_291_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1, 9]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(364317632))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(364328000))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(364326912)))];
+            tensor<fp16, [1024]> const_292_to_fp16 = const()[name = tensor<string, []>("const_292_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(364330112)))];
+            tensor<fp16, [1, 1024, 188]> input_787_cast_fp16 = conv(bias = const_292_to_fp16, dilations = input_785_dilations_0, groups = input_785_groups_0, pad = input_785_pad_0, pad_type = input_785_pad_type_0, strides = input_785_strides_0, weight = const_291_to_fp16_quantized, x = input_783_cast_fp16)[name = tensor<string, []>("input_787_cast_fp16")];
             tensor<fp16, [1, 1024, 188]> input_789_cast_fp16 = silu(x = input_787_cast_fp16)[name = tensor<string, []>("input_789_cast_fp16")];
             tensor<string, []> x_345_pad_type_0 = const()[name = tensor<string, []>("x_345_pad_type_0"), val = tensor<string, []>("valid")];
             tensor<int32, [1]> x_345_strides_0 = const()[name = tensor<string, []>("x_345_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> x_345_pad_0 = const()[name = tensor<string, []>("x_345_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> x_345_dilations_0 = const()[name = tensor<string, []>("x_345_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> x_345_groups_0 = const()[name = tensor<string, []>("x_345_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_14_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(363578752))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(364627392))), name = tensor<string, []>("encoder_module_layers_14_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1024, 1])];
-            tensor<fp16, [1, 1024, 188]> x_345_cast_fp16 = conv(dilations = x_345_dilations_0, groups = x_345_groups_0, pad = x_345_pad_0, pad_type = x_345_pad_type_0, strides = x_345_strides_0, weight = encoder_module_layers_14_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_789_cast_fp16)[name = tensor<string, []>("x_345_cast_fp16")];
+            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_14_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_14_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(364332224))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(365381952))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(365380864)))];
+            tensor<fp16, [1, 1024, 188]> x_345_cast_fp16 = conv(dilations = x_345_dilations_0, groups = x_345_groups_0, pad = x_345_pad_0, pad_type = x_345_pad_type_0, strides = x_345_strides_0, weight = encoder_module_layers_14_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_789_cast_fp16)[name = tensor<string, []>("x_345_cast_fp16")];
             tensor<int32, [3]> input_791_perm_0 = const()[name = tensor<string, []>("input_791_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<fp16, [1, 188, 1024]> input_791_cast_fp16 = transpose(perm = input_791_perm_0, x = x_345_cast_fp16)[name = tensor<string, []>("transpose_208")];
             tensor<fp16, [1, 188, 1024]> input_793_cast_fp16 = add(x = input_775_cast_fp16, y = input_791_cast_fp16)[name = tensor<string, []>("input_793_cast_fp16")];
             tensor<int32, [1]> input_795_axes_0 = const()[name = tensor<string, []>("input_795_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_14_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_14_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(364627968)))];
-            tensor<fp16, [1024]> encoder_module_layers_14_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_14_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(364630080)))];
+            tensor<fp16, [1024]> encoder_module_layers_14_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_14_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(365384064)))];
+            tensor<fp16, [1024]> encoder_module_layers_14_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_14_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(365386176)))];
             tensor<fp16, [1, 188, 1024]> input_795_cast_fp16 = layer_norm(axes = input_795_axes_0, beta = encoder_module_layers_14_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_14_norm_feed_forward2_weight_to_fp16, x = input_793_cast_fp16)[name = tensor<string, []>("input_795_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_14_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(364632192))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(368826560))), name = tensor<string, []>("encoder_module_layers_14_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_134_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_14_feed_forward2_linear1_weight_to_fp16_palettized, x = input_795_cast_fp16)[name = tensor<string, []>("linear_134_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_14_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_14_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(365388288))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(369586816))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(369582656)))];
+            tensor<fp16, [1, 188, 4096]> linear_134_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_14_feed_forward2_linear1_weight_to_fp16_quantized, x = input_795_cast_fp16)[name = tensor<string, []>("linear_134_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_799_cast_fp16 = silu(x = linear_134_cast_fp16)[name = tensor<string, []>("input_799_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_14_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(368827136))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(373021504))), name = tensor<string, []>("encoder_module_layers_14_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_135_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_14_feed_forward2_linear2_weight_to_fp16_palettized, x = input_799_cast_fp16)[name = tensor<string, []>("linear_135_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_14_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_14_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(369595072))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(373790528))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(373789440)))];
+            tensor<fp16, [1, 188, 1024]> linear_135_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_14_feed_forward2_linear2_weight_to_fp16_quantized, x = input_799_cast_fp16)[name = tensor<string, []>("linear_135_cast_fp16")];
             tensor<fp16, []> var_2777_to_fp16 = const()[name = tensor<string, []>("op_2777_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_2778_cast_fp16 = mul(x = linear_135_cast_fp16, y = var_2777_to_fp16)[name = tensor<string, []>("op_2778_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_805_cast_fp16 = add(x = input_793_cast_fp16, y = var_2778_cast_fp16)[name = tensor<string, []>("input_805_cast_fp16")];
             tensor<int32, [1]> input_807_axes_0 = const()[name = tensor<string, []>("input_807_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_14_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_14_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(373022080)))];
-            tensor<fp16, [1024]> encoder_module_layers_14_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_14_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(373024192)))];
+            tensor<fp16, [1024]> encoder_module_layers_14_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_14_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(373792640)))];
+            tensor<fp16, [1024]> encoder_module_layers_14_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_14_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(373794752)))];
             tensor<fp16, [1, 188, 1024]> input_807_cast_fp16 = layer_norm(axes = input_807_axes_0, beta = encoder_module_layers_14_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_14_norm_out_weight_to_fp16, x = input_805_cast_fp16)[name = tensor<string, []>("input_807_cast_fp16")];
             tensor<int32, [1]> input_809_axes_0 = const()[name = tensor<string, []>("input_809_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_15_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_15_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(373026304)))];
-            tensor<fp16, [1024]> encoder_module_layers_15_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_15_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(373028416)))];
+            tensor<fp16, [1024]> encoder_module_layers_15_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_15_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(373796864)))];
+            tensor<fp16, [1024]> encoder_module_layers_15_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_15_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(373798976)))];
             tensor<fp16, [1, 188, 1024]> input_809_cast_fp16 = layer_norm(axes = input_809_axes_0, beta = encoder_module_layers_15_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_15_norm_feed_forward1_weight_to_fp16, x = input_807_cast_fp16)[name = tensor<string, []>("input_809_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_15_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(373030528))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(377224896))), name = tensor<string, []>("encoder_module_layers_15_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_136_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_15_feed_forward1_linear1_weight_to_fp16_palettized, x = input_809_cast_fp16)[name = tensor<string, []>("linear_136_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_15_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_15_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(373801088))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(377999616))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(377995456)))];
+            tensor<fp16, [1, 188, 4096]> linear_136_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_15_feed_forward1_linear1_weight_to_fp16_quantized, x = input_809_cast_fp16)[name = tensor<string, []>("linear_136_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_813_cast_fp16 = silu(x = linear_136_cast_fp16)[name = tensor<string, []>("input_813_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_15_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(377225472))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(381419840))), name = tensor<string, []>("encoder_module_layers_15_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_137_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_15_feed_forward1_linear2_weight_to_fp16_palettized, x = input_813_cast_fp16)[name = tensor<string, []>("linear_137_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_15_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_15_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(378007872))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(382203328))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(382202240)))];
+            tensor<fp16, [1, 188, 1024]> linear_137_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_15_feed_forward1_linear2_weight_to_fp16_quantized, x = input_813_cast_fp16)[name = tensor<string, []>("linear_137_cast_fp16")];
             tensor<fp16, []> var_2806_to_fp16 = const()[name = tensor<string, []>("op_2806_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_2807_cast_fp16 = mul(x = linear_137_cast_fp16, y = var_2806_to_fp16)[name = tensor<string, []>("op_2807_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_819_cast_fp16 = add(x = input_807_cast_fp16, y = var_2807_cast_fp16)[name = tensor<string, []>("input_819_cast_fp16")];
             tensor<int32, [1]> query_31_axes_0 = const()[name = tensor<string, []>("query_31_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_15_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_15_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(381420416)))];
-            tensor<fp16, [1024]> encoder_module_layers_15_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_15_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(381422528)))];
+            tensor<fp16, [1024]> encoder_module_layers_15_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_15_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(382205440)))];
+            tensor<fp16, [1024]> encoder_module_layers_15_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_15_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(382207552)))];
             tensor<fp16, [1, 188, 1024]> query_31_cast_fp16 = layer_norm(axes = query_31_axes_0, beta = encoder_module_layers_15_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_15_norm_self_att_weight_to_fp16, x = input_819_cast_fp16)[name = tensor<string, []>("query_31_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_15_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(381424640))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(382473280))), name = tensor<string, []>("encoder_module_layers_15_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_138_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_15_self_attn_linear_q_weight_to_fp16_palettized, x = query_31_cast_fp16)[name = tensor<string, []>("linear_138_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_15_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_15_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(382209664))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(383259392))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(383258304)))];
+            tensor<fp16, [1, 188, 1024]> linear_138_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_15_self_attn_linear_q_weight_to_fp16_quantized, x = query_31_cast_fp16)[name = tensor<string, []>("linear_138_cast_fp16")];
             tensor<int32, [4]> var_2823 = const()[name = tensor<string, []>("op_2823"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> q_91_cast_fp16 = reshape(shape = var_2823, x = linear_138_cast_fp16)[name = tensor<string, []>("q_91_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_15_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(382473856))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(383522496))), name = tensor<string, []>("encoder_module_layers_15_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_139_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_15_self_attn_linear_k_weight_to_fp16_palettized, x = query_31_cast_fp16)[name = tensor<string, []>("linear_139_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_15_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_15_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(383261504))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(384311232))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(384310144)))];
+            tensor<fp16, [1, 188, 1024]> linear_139_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_15_self_attn_linear_k_weight_to_fp16_quantized, x = query_31_cast_fp16)[name = tensor<string, []>("linear_139_cast_fp16")];
             tensor<int32, [4]> var_2827 = const()[name = tensor<string, []>("op_2827"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> k_61_cast_fp16 = reshape(shape = var_2827, x = linear_139_cast_fp16)[name = tensor<string, []>("k_61_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_15_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(383523072))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(384571712))), name = tensor<string, []>("encoder_module_layers_15_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_140_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_15_self_attn_linear_v_weight_to_fp16_palettized, x = query_31_cast_fp16)[name = tensor<string, []>("linear_140_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_15_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_15_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(384313344))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(385363072))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(385361984)))];
+            tensor<fp16, [1, 188, 1024]> linear_140_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_15_self_attn_linear_v_weight_to_fp16_quantized, x = query_31_cast_fp16)[name = tensor<string, []>("linear_140_cast_fp16")];
             tensor<int32, [4]> var_2831 = const()[name = tensor<string, []>("op_2831"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> v_31_cast_fp16 = reshape(shape = var_2831, x = linear_140_cast_fp16)[name = tensor<string, []>("v_31_cast_fp16")];
             tensor<int32, [4]> value_35_perm_0 = const()[name = tensor<string, []>("value_35_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
-            tensor<fp16, [8, 128]> encoder_module_layers_15_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_15_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(384572288)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_15_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_15_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(385365184)))];
             tensor<fp16, [1, 188, 8, 128]> var_2843_cast_fp16 = add(x = q_91_cast_fp16, y = encoder_module_layers_15_self_attn_pos_bias_u_to_fp16)[name = tensor<string, []>("op_2843_cast_fp16")];
-            tensor<fp16, [8, 128]> encoder_module_layers_15_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_15_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(384574400)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_15_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_15_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(385367296)))];
             tensor<fp16, [1, 188, 8, 128]> var_2845_cast_fp16 = add(x = q_91_cast_fp16, y = encoder_module_layers_15_self_attn_pos_bias_v_to_fp16)[name = tensor<string, []>("op_2845_cast_fp16")];
             tensor<int32, [4]> q_with_bias_v_31_perm_0 = const()[name = tensor<string, []>("q_with_bias_v_31_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
             tensor<bool, []> x_353_transpose_x_0 = const()[name = tensor<string, []>("x_353_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> x_353_transpose_y_0 = const()[name = tensor<string, []>("x_353_transpose_y_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 8, 128, 375]> op_2847_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [384000]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(384576512))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(384960576))), name = tensor<string, []>("op_2847_to_fp16_palettized"), shape = tensor<uint32, [4]>([1, 8, 128, 375])];
+            tensor<fp16, [1, 8, 128, 375]> op_2847_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(3), name = tensor<string, []>("op_2847_to_fp16_quantized"), quantized_data = tensor<int8, [1, 8, 128, 375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(385369408))), scale = tensor<fp16, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(385753920))), zero_point = tensor<int8, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(385753472)))];
             tensor<fp16, [1, 8, 188, 128]> q_with_bias_v_31_cast_fp16 = transpose(perm = q_with_bias_v_31_perm_0, x = var_2845_cast_fp16)[name = tensor<string, []>("transpose_207")];
-            tensor<fp16, [1, 8, 188, 375]> x_353_cast_fp16 = matmul(transpose_x = x_353_transpose_x_0, transpose_y = x_353_transpose_y_0, x = q_with_bias_v_31_cast_fp16, y = op_2847_to_fp16_palettized)[name = tensor<string, []>("x_353_cast_fp16")];
+            tensor<fp16, [1, 8, 188, 375]> x_353_cast_fp16 = matmul(transpose_x = x_353_transpose_x_0, transpose_y = x_353_transpose_y_0, x = q_with_bias_v_31_cast_fp16, y = op_2847_to_fp16_quantized)[name = tensor<string, []>("x_353_cast_fp16")];
             tensor<int32, [8]> x_355_pad_0 = const()[name = tensor<string, []>("x_355_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 1, 0])];
             tensor<string, []> x_355_mode_0 = const()[name = tensor<string, []>("x_355_mode_0"), val = tensor<string, []>("constant")];
             tensor<fp16, []> const_179_to_fp16 = const()[name = tensor<string, []>("const_179_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
@@ -2354,12 +2354,12 @@ program(1.0)
             tensor<int32, [3]> var_2880 = const()[name = tensor<string, []>("op_2880"), val = tensor<int32, [3]>([1, -1, 1024])];
             tensor<fp16, [1, 188, 8, 128]> var_2879_cast_fp16 = transpose(perm = var_2879_perm_0, x = x_359_cast_fp16)[name = tensor<string, []>("transpose_203")];
             tensor<fp16, [1, 188, 1024]> input_823_cast_fp16 = reshape(shape = var_2880, x = var_2879_cast_fp16)[name = tensor<string, []>("input_823_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_15_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(384961152))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(386009792))), name = tensor<string, []>("encoder_module_layers_15_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_142_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_15_self_attn_linear_out_weight_to_fp16_palettized, x = input_823_cast_fp16)[name = tensor<string, []>("linear_142_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_15_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_15_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(385754752))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(386804480))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(386803392)))];
+            tensor<fp16, [1, 188, 1024]> linear_142_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_15_self_attn_linear_out_weight_to_fp16_quantized, x = input_823_cast_fp16)[name = tensor<string, []>("linear_142_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_827_cast_fp16 = add(x = input_819_cast_fp16, y = linear_142_cast_fp16)[name = tensor<string, []>("input_827_cast_fp16")];
             tensor<int32, [1]> x_363_axes_0 = const()[name = tensor<string, []>("x_363_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_15_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_15_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(386010368)))];
-            tensor<fp16, [1024]> encoder_module_layers_15_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_15_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(386012480)))];
+            tensor<fp16, [1024]> encoder_module_layers_15_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_15_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(386806592)))];
+            tensor<fp16, [1024]> encoder_module_layers_15_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_15_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(386808704)))];
             tensor<fp16, [1, 188, 1024]> x_363_cast_fp16 = layer_norm(axes = x_363_axes_0, beta = encoder_module_layers_15_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_15_norm_conv_weight_to_fp16, x = input_827_cast_fp16)[name = tensor<string, []>("x_363_cast_fp16")];
             tensor<int32, [3]> input_829_perm_0 = const()[name = tensor<string, []>("input_829_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<string, []> input_831_pad_type_0 = const()[name = tensor<string, []>("input_831_pad_type_0"), val = tensor<string, []>("valid")];
@@ -2367,9 +2367,9 @@ program(1.0)
             tensor<int32, [2]> input_831_pad_0 = const()[name = tensor<string, []>("input_831_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_831_dilations_0 = const()[name = tensor<string, []>("input_831_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> input_831_groups_0 = const()[name = tensor<string, []>("input_831_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_15_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(386014592))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(388111808))), name = tensor<string, []>("encoder_module_layers_15_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([2048, 1024, 1])];
+            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_15_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_15_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(386810816))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(388910144))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(388908032)))];
             tensor<fp16, [1, 1024, 188]> input_829_cast_fp16 = transpose(perm = input_829_perm_0, x = x_363_cast_fp16)[name = tensor<string, []>("transpose_202")];
-            tensor<fp16, [1, 2048, 188]> input_831_cast_fp16 = conv(dilations = input_831_dilations_0, groups = input_831_groups_0, pad = input_831_pad_0, pad_type = input_831_pad_type_0, strides = input_831_strides_0, weight = encoder_module_layers_15_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_829_cast_fp16)[name = tensor<string, []>("input_831_cast_fp16")];
+            tensor<fp16, [1, 2048, 188]> input_831_cast_fp16 = conv(dilations = input_831_dilations_0, groups = input_831_groups_0, pad = input_831_pad_0, pad_type = input_831_pad_type_0, strides = input_831_strides_0, weight = encoder_module_layers_15_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_829_cast_fp16)[name = tensor<string, []>("input_831_cast_fp16")];
             tensor<int32, []> x_365_split_num_splits_0 = const()[name = tensor<string, []>("x_365_split_num_splits_0"), val = tensor<int32, []>(2)];
             tensor<int32, []> x_365_split_axis_0 = const()[name = tensor<string, []>("x_365_split_axis_0"), val = tensor<int32, []>(1)];
             tensor<fp16, [1, 1024, 188]> x_365_split_cast_fp16_0, tensor<fp16, [1, 1024, 188]> x_365_split_cast_fp16_1 = split(axis = x_365_split_axis_0, num_splits = x_365_split_num_splits_0, x = input_831_cast_fp16)[name = tensor<string, []>("x_365_split_cast_fp16")];
@@ -2385,75 +2385,75 @@ program(1.0)
             tensor<int32, [1]> input_837_strides_0 = const()[name = tensor<string, []>("input_837_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> input_837_pad_0 = const()[name = tensor<string, []>("input_837_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_837_dilations_0 = const()[name = tensor<string, []>("input_837_dilations_0"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1024, 1, 9]> const_293_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [9216]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(388112384))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(388121664))), name = tensor<string, []>("const_293_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1, 9])];
-            tensor<fp16, [1024]> const_294_to_fp16 = const()[name = tensor<string, []>("const_294_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(388122240)))];
-            tensor<fp16, [1, 1024, 188]> input_839_cast_fp16 = conv(bias = const_294_to_fp16, dilations = input_837_dilations_0, groups = input_837_groups_0, pad = input_837_pad_0, pad_type = input_837_pad_type_0, strides = input_837_strides_0, weight = const_293_to_fp16_palettized, x = input_835_cast_fp16)[name = tensor<string, []>("input_839_cast_fp16")];
+            tensor<fp16, [1024, 1, 9]> const_293_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("const_293_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1, 9]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(388914304))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(388924672))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(388923584)))];
+            tensor<fp16, [1024]> const_294_to_fp16 = const()[name = tensor<string, []>("const_294_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(388926784)))];
+            tensor<fp16, [1, 1024, 188]> input_839_cast_fp16 = conv(bias = const_294_to_fp16, dilations = input_837_dilations_0, groups = input_837_groups_0, pad = input_837_pad_0, pad_type = input_837_pad_type_0, strides = input_837_strides_0, weight = const_293_to_fp16_quantized, x = input_835_cast_fp16)[name = tensor<string, []>("input_839_cast_fp16")];
             tensor<fp16, [1, 1024, 188]> input_841_cast_fp16 = silu(x = input_839_cast_fp16)[name = tensor<string, []>("input_841_cast_fp16")];
             tensor<string, []> x_367_pad_type_0 = const()[name = tensor<string, []>("x_367_pad_type_0"), val = tensor<string, []>("valid")];
             tensor<int32, [1]> x_367_strides_0 = const()[name = tensor<string, []>("x_367_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> x_367_pad_0 = const()[name = tensor<string, []>("x_367_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> x_367_dilations_0 = const()[name = tensor<string, []>("x_367_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> x_367_groups_0 = const()[name = tensor<string, []>("x_367_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_15_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(388124352))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(389172992))), name = tensor<string, []>("encoder_module_layers_15_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1024, 1])];
-            tensor<fp16, [1, 1024, 188]> x_367_cast_fp16 = conv(dilations = x_367_dilations_0, groups = x_367_groups_0, pad = x_367_pad_0, pad_type = x_367_pad_type_0, strides = x_367_strides_0, weight = encoder_module_layers_15_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_841_cast_fp16)[name = tensor<string, []>("x_367_cast_fp16")];
+            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_15_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_15_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(388928896))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(389978624))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(389977536)))];
+            tensor<fp16, [1, 1024, 188]> x_367_cast_fp16 = conv(dilations = x_367_dilations_0, groups = x_367_groups_0, pad = x_367_pad_0, pad_type = x_367_pad_type_0, strides = x_367_strides_0, weight = encoder_module_layers_15_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_841_cast_fp16)[name = tensor<string, []>("x_367_cast_fp16")];
             tensor<int32, [3]> input_843_perm_0 = const()[name = tensor<string, []>("input_843_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<fp16, [1, 188, 1024]> input_843_cast_fp16 = transpose(perm = input_843_perm_0, x = x_367_cast_fp16)[name = tensor<string, []>("transpose_201")];
             tensor<fp16, [1, 188, 1024]> input_845_cast_fp16 = add(x = input_827_cast_fp16, y = input_843_cast_fp16)[name = tensor<string, []>("input_845_cast_fp16")];
             tensor<int32, [1]> input_847_axes_0 = const()[name = tensor<string, []>("input_847_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_15_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_15_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(389173568)))];
-            tensor<fp16, [1024]> encoder_module_layers_15_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_15_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(389175680)))];
+            tensor<fp16, [1024]> encoder_module_layers_15_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_15_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(389980736)))];
+            tensor<fp16, [1024]> encoder_module_layers_15_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_15_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(389982848)))];
             tensor<fp16, [1, 188, 1024]> input_847_cast_fp16 = layer_norm(axes = input_847_axes_0, beta = encoder_module_layers_15_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_15_norm_feed_forward2_weight_to_fp16, x = input_845_cast_fp16)[name = tensor<string, []>("input_847_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_15_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(389177792))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(393372160))), name = tensor<string, []>("encoder_module_layers_15_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_143_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_15_feed_forward2_linear1_weight_to_fp16_palettized, x = input_847_cast_fp16)[name = tensor<string, []>("linear_143_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_15_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_15_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(389984960))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(394183488))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(394179328)))];
+            tensor<fp16, [1, 188, 4096]> linear_143_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_15_feed_forward2_linear1_weight_to_fp16_quantized, x = input_847_cast_fp16)[name = tensor<string, []>("linear_143_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_851_cast_fp16 = silu(x = linear_143_cast_fp16)[name = tensor<string, []>("input_851_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_15_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(393372736))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(397567104))), name = tensor<string, []>("encoder_module_layers_15_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_144_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_15_feed_forward2_linear2_weight_to_fp16_palettized, x = input_851_cast_fp16)[name = tensor<string, []>("linear_144_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_15_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_15_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(394191744))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(398387200))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(398386112)))];
+            tensor<fp16, [1, 188, 1024]> linear_144_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_15_feed_forward2_linear2_weight_to_fp16_quantized, x = input_851_cast_fp16)[name = tensor<string, []>("linear_144_cast_fp16")];
             tensor<fp16, []> var_2940_to_fp16 = const()[name = tensor<string, []>("op_2940_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_2941_cast_fp16 = mul(x = linear_144_cast_fp16, y = var_2940_to_fp16)[name = tensor<string, []>("op_2941_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_857_cast_fp16 = add(x = input_845_cast_fp16, y = var_2941_cast_fp16)[name = tensor<string, []>("input_857_cast_fp16")];
             tensor<int32, [1]> input_859_axes_0 = const()[name = tensor<string, []>("input_859_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_15_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_15_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(397567680)))];
-            tensor<fp16, [1024]> encoder_module_layers_15_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_15_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(397569792)))];
+            tensor<fp16, [1024]> encoder_module_layers_15_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_15_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(398389312)))];
+            tensor<fp16, [1024]> encoder_module_layers_15_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_15_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(398391424)))];
             tensor<fp16, [1, 188, 1024]> input_859_cast_fp16 = layer_norm(axes = input_859_axes_0, beta = encoder_module_layers_15_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_15_norm_out_weight_to_fp16, x = input_857_cast_fp16)[name = tensor<string, []>("input_859_cast_fp16")];
             tensor<int32, [1]> input_861_axes_0 = const()[name = tensor<string, []>("input_861_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_16_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_16_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(397571904)))];
-            tensor<fp16, [1024]> encoder_module_layers_16_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_16_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(397574016)))];
+            tensor<fp16, [1024]> encoder_module_layers_16_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_16_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(398393536)))];
+            tensor<fp16, [1024]> encoder_module_layers_16_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_16_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(398395648)))];
             tensor<fp16, [1, 188, 1024]> input_861_cast_fp16 = layer_norm(axes = input_861_axes_0, beta = encoder_module_layers_16_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_16_norm_feed_forward1_weight_to_fp16, x = input_859_cast_fp16)[name = tensor<string, []>("input_861_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_16_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(397576128))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(401770496))), name = tensor<string, []>("encoder_module_layers_16_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_145_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_16_feed_forward1_linear1_weight_to_fp16_palettized, x = input_861_cast_fp16)[name = tensor<string, []>("linear_145_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_16_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_16_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(398397760))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(402596288))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(402592128)))];
+            tensor<fp16, [1, 188, 4096]> linear_145_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_16_feed_forward1_linear1_weight_to_fp16_quantized, x = input_861_cast_fp16)[name = tensor<string, []>("linear_145_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_865_cast_fp16 = silu(x = linear_145_cast_fp16)[name = tensor<string, []>("input_865_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_16_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(401771072))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(405965440))), name = tensor<string, []>("encoder_module_layers_16_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_146_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_16_feed_forward1_linear2_weight_to_fp16_palettized, x = input_865_cast_fp16)[name = tensor<string, []>("linear_146_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_16_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_16_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(402604544))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(406800000))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(406798912)))];
+            tensor<fp16, [1, 188, 1024]> linear_146_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_16_feed_forward1_linear2_weight_to_fp16_quantized, x = input_865_cast_fp16)[name = tensor<string, []>("linear_146_cast_fp16")];
             tensor<fp16, []> var_2969_to_fp16 = const()[name = tensor<string, []>("op_2969_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_2970_cast_fp16 = mul(x = linear_146_cast_fp16, y = var_2969_to_fp16)[name = tensor<string, []>("op_2970_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_871_cast_fp16 = add(x = input_859_cast_fp16, y = var_2970_cast_fp16)[name = tensor<string, []>("input_871_cast_fp16")];
             tensor<int32, [1]> query_33_axes_0 = const()[name = tensor<string, []>("query_33_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_16_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_16_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(405966016)))];
-            tensor<fp16, [1024]> encoder_module_layers_16_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_16_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(405968128)))];
+            tensor<fp16, [1024]> encoder_module_layers_16_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_16_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(406802112)))];
+            tensor<fp16, [1024]> encoder_module_layers_16_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_16_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(406804224)))];
             tensor<fp16, [1, 188, 1024]> query_33_cast_fp16 = layer_norm(axes = query_33_axes_0, beta = encoder_module_layers_16_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_16_norm_self_att_weight_to_fp16, x = input_871_cast_fp16)[name = tensor<string, []>("query_33_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_16_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(405970240))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(407018880))), name = tensor<string, []>("encoder_module_layers_16_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_147_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_16_self_attn_linear_q_weight_to_fp16_palettized, x = query_33_cast_fp16)[name = tensor<string, []>("linear_147_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_16_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_16_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(406806336))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(407856064))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(407854976)))];
+            tensor<fp16, [1, 188, 1024]> linear_147_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_16_self_attn_linear_q_weight_to_fp16_quantized, x = query_33_cast_fp16)[name = tensor<string, []>("linear_147_cast_fp16")];
             tensor<int32, [4]> var_2986 = const()[name = tensor<string, []>("op_2986"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> q_97_cast_fp16 = reshape(shape = var_2986, x = linear_147_cast_fp16)[name = tensor<string, []>("q_97_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_16_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(407019456))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(408068096))), name = tensor<string, []>("encoder_module_layers_16_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_148_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_16_self_attn_linear_k_weight_to_fp16_palettized, x = query_33_cast_fp16)[name = tensor<string, []>("linear_148_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_16_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_16_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(407858176))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(408907904))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(408906816)))];
+            tensor<fp16, [1, 188, 1024]> linear_148_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_16_self_attn_linear_k_weight_to_fp16_quantized, x = query_33_cast_fp16)[name = tensor<string, []>("linear_148_cast_fp16")];
             tensor<int32, [4]> var_2990 = const()[name = tensor<string, []>("op_2990"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> k_65_cast_fp16 = reshape(shape = var_2990, x = linear_148_cast_fp16)[name = tensor<string, []>("k_65_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_16_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(408068672))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(409117312))), name = tensor<string, []>("encoder_module_layers_16_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_149_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_16_self_attn_linear_v_weight_to_fp16_palettized, x = query_33_cast_fp16)[name = tensor<string, []>("linear_149_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_16_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_16_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(408910016))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(409959744))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(409958656)))];
+            tensor<fp16, [1, 188, 1024]> linear_149_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_16_self_attn_linear_v_weight_to_fp16_quantized, x = query_33_cast_fp16)[name = tensor<string, []>("linear_149_cast_fp16")];
             tensor<int32, [4]> var_2994 = const()[name = tensor<string, []>("op_2994"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> v_33_cast_fp16 = reshape(shape = var_2994, x = linear_149_cast_fp16)[name = tensor<string, []>("v_33_cast_fp16")];
             tensor<int32, [4]> value_37_perm_0 = const()[name = tensor<string, []>("value_37_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
-            tensor<fp16, [8, 128]> encoder_module_layers_16_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_16_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(409117888)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_16_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_16_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(409961856)))];
             tensor<fp16, [1, 188, 8, 128]> var_3006_cast_fp16 = add(x = q_97_cast_fp16, y = encoder_module_layers_16_self_attn_pos_bias_u_to_fp16)[name = tensor<string, []>("op_3006_cast_fp16")];
-            tensor<fp16, [8, 128]> encoder_module_layers_16_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_16_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(409120000)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_16_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_16_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(409963968)))];
             tensor<fp16, [1, 188, 8, 128]> var_3008_cast_fp16 = add(x = q_97_cast_fp16, y = encoder_module_layers_16_self_attn_pos_bias_v_to_fp16)[name = tensor<string, []>("op_3008_cast_fp16")];
             tensor<int32, [4]> q_with_bias_v_33_perm_0 = const()[name = tensor<string, []>("q_with_bias_v_33_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
             tensor<bool, []> x_375_transpose_x_0 = const()[name = tensor<string, []>("x_375_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> x_375_transpose_y_0 = const()[name = tensor<string, []>("x_375_transpose_y_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 8, 128, 375]> op_3010_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [384000]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(409122112))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(409506176))), name = tensor<string, []>("op_3010_to_fp16_palettized"), shape = tensor<uint32, [4]>([1, 8, 128, 375])];
+            tensor<fp16, [1, 8, 128, 375]> op_3010_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(3), name = tensor<string, []>("op_3010_to_fp16_quantized"), quantized_data = tensor<int8, [1, 8, 128, 375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(409966080))), scale = tensor<fp16, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(410350592))), zero_point = tensor<int8, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(410350144)))];
             tensor<fp16, [1, 8, 188, 128]> q_with_bias_v_33_cast_fp16 = transpose(perm = q_with_bias_v_33_perm_0, x = var_3008_cast_fp16)[name = tensor<string, []>("transpose_200")];
-            tensor<fp16, [1, 8, 188, 375]> x_375_cast_fp16 = matmul(transpose_x = x_375_transpose_x_0, transpose_y = x_375_transpose_y_0, x = q_with_bias_v_33_cast_fp16, y = op_3010_to_fp16_palettized)[name = tensor<string, []>("x_375_cast_fp16")];
+            tensor<fp16, [1, 8, 188, 375]> x_375_cast_fp16 = matmul(transpose_x = x_375_transpose_x_0, transpose_y = x_375_transpose_y_0, x = q_with_bias_v_33_cast_fp16, y = op_3010_to_fp16_quantized)[name = tensor<string, []>("x_375_cast_fp16")];
             tensor<int32, [8]> x_377_pad_0 = const()[name = tensor<string, []>("x_377_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 1, 0])];
             tensor<string, []> x_377_mode_0 = const()[name = tensor<string, []>("x_377_mode_0"), val = tensor<string, []>("constant")];
             tensor<fp16, []> const_189_to_fp16 = const()[name = tensor<string, []>("const_189_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
@@ -2491,12 +2491,12 @@ program(1.0)
             tensor<int32, [3]> var_3043 = const()[name = tensor<string, []>("op_3043"), val = tensor<int32, [3]>([1, -1, 1024])];
             tensor<fp16, [1, 188, 8, 128]> var_3042_cast_fp16 = transpose(perm = var_3042_perm_0, x = x_381_cast_fp16)[name = tensor<string, []>("transpose_196")];
             tensor<fp16, [1, 188, 1024]> input_875_cast_fp16 = reshape(shape = var_3043, x = var_3042_cast_fp16)[name = tensor<string, []>("input_875_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_16_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(409506752))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(410555392))), name = tensor<string, []>("encoder_module_layers_16_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_151_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_16_self_attn_linear_out_weight_to_fp16_palettized, x = input_875_cast_fp16)[name = tensor<string, []>("linear_151_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_16_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_16_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(410351424))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(411401152))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(411400064)))];
+            tensor<fp16, [1, 188, 1024]> linear_151_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_16_self_attn_linear_out_weight_to_fp16_quantized, x = input_875_cast_fp16)[name = tensor<string, []>("linear_151_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_879_cast_fp16 = add(x = input_871_cast_fp16, y = linear_151_cast_fp16)[name = tensor<string, []>("input_879_cast_fp16")];
             tensor<int32, [1]> x_385_axes_0 = const()[name = tensor<string, []>("x_385_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_16_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_16_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(410555968)))];
-            tensor<fp16, [1024]> encoder_module_layers_16_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_16_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(410558080)))];
+            tensor<fp16, [1024]> encoder_module_layers_16_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_16_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(411403264)))];
+            tensor<fp16, [1024]> encoder_module_layers_16_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_16_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(411405376)))];
             tensor<fp16, [1, 188, 1024]> x_385_cast_fp16 = layer_norm(axes = x_385_axes_0, beta = encoder_module_layers_16_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_16_norm_conv_weight_to_fp16, x = input_879_cast_fp16)[name = tensor<string, []>("x_385_cast_fp16")];
             tensor<int32, [3]> input_881_perm_0 = const()[name = tensor<string, []>("input_881_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<string, []> input_883_pad_type_0 = const()[name = tensor<string, []>("input_883_pad_type_0"), val = tensor<string, []>("valid")];
@@ -2504,9 +2504,9 @@ program(1.0)
             tensor<int32, [2]> input_883_pad_0 = const()[name = tensor<string, []>("input_883_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_883_dilations_0 = const()[name = tensor<string, []>("input_883_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> input_883_groups_0 = const()[name = tensor<string, []>("input_883_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_16_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(410560192))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(412657408))), name = tensor<string, []>("encoder_module_layers_16_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([2048, 1024, 1])];
+            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_16_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_16_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(411407488))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(413506816))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(413504704)))];
             tensor<fp16, [1, 1024, 188]> input_881_cast_fp16 = transpose(perm = input_881_perm_0, x = x_385_cast_fp16)[name = tensor<string, []>("transpose_195")];
-            tensor<fp16, [1, 2048, 188]> input_883_cast_fp16 = conv(dilations = input_883_dilations_0, groups = input_883_groups_0, pad = input_883_pad_0, pad_type = input_883_pad_type_0, strides = input_883_strides_0, weight = encoder_module_layers_16_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_881_cast_fp16)[name = tensor<string, []>("input_883_cast_fp16")];
+            tensor<fp16, [1, 2048, 188]> input_883_cast_fp16 = conv(dilations = input_883_dilations_0, groups = input_883_groups_0, pad = input_883_pad_0, pad_type = input_883_pad_type_0, strides = input_883_strides_0, weight = encoder_module_layers_16_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_881_cast_fp16)[name = tensor<string, []>("input_883_cast_fp16")];
             tensor<int32, []> x_387_split_num_splits_0 = const()[name = tensor<string, []>("x_387_split_num_splits_0"), val = tensor<int32, []>(2)];
             tensor<int32, []> x_387_split_axis_0 = const()[name = tensor<string, []>("x_387_split_axis_0"), val = tensor<int32, []>(1)];
             tensor<fp16, [1, 1024, 188]> x_387_split_cast_fp16_0, tensor<fp16, [1, 1024, 188]> x_387_split_cast_fp16_1 = split(axis = x_387_split_axis_0, num_splits = x_387_split_num_splits_0, x = input_883_cast_fp16)[name = tensor<string, []>("x_387_split_cast_fp16")];
@@ -2522,75 +2522,75 @@ program(1.0)
             tensor<int32, [1]> input_889_strides_0 = const()[name = tensor<string, []>("input_889_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> input_889_pad_0 = const()[name = tensor<string, []>("input_889_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_889_dilations_0 = const()[name = tensor<string, []>("input_889_dilations_0"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1024, 1, 9]> const_295_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [9216]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(412657984))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(412667264))), name = tensor<string, []>("const_295_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1, 9])];
-            tensor<fp16, [1024]> const_296_to_fp16 = const()[name = tensor<string, []>("const_296_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(412667840)))];
-            tensor<fp16, [1, 1024, 188]> input_891_cast_fp16 = conv(bias = const_296_to_fp16, dilations = input_889_dilations_0, groups = input_889_groups_0, pad = input_889_pad_0, pad_type = input_889_pad_type_0, strides = input_889_strides_0, weight = const_295_to_fp16_palettized, x = input_887_cast_fp16)[name = tensor<string, []>("input_891_cast_fp16")];
+            tensor<fp16, [1024, 1, 9]> const_295_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("const_295_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1, 9]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(413510976))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(413521344))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(413520256)))];
+            tensor<fp16, [1024]> const_296_to_fp16 = const()[name = tensor<string, []>("const_296_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(413523456)))];
+            tensor<fp16, [1, 1024, 188]> input_891_cast_fp16 = conv(bias = const_296_to_fp16, dilations = input_889_dilations_0, groups = input_889_groups_0, pad = input_889_pad_0, pad_type = input_889_pad_type_0, strides = input_889_strides_0, weight = const_295_to_fp16_quantized, x = input_887_cast_fp16)[name = tensor<string, []>("input_891_cast_fp16")];
             tensor<fp16, [1, 1024, 188]> input_893_cast_fp16 = silu(x = input_891_cast_fp16)[name = tensor<string, []>("input_893_cast_fp16")];
             tensor<string, []> x_389_pad_type_0 = const()[name = tensor<string, []>("x_389_pad_type_0"), val = tensor<string, []>("valid")];
             tensor<int32, [1]> x_389_strides_0 = const()[name = tensor<string, []>("x_389_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> x_389_pad_0 = const()[name = tensor<string, []>("x_389_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> x_389_dilations_0 = const()[name = tensor<string, []>("x_389_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> x_389_groups_0 = const()[name = tensor<string, []>("x_389_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_16_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(412669952))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(413718592))), name = tensor<string, []>("encoder_module_layers_16_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1024, 1])];
-            tensor<fp16, [1, 1024, 188]> x_389_cast_fp16 = conv(dilations = x_389_dilations_0, groups = x_389_groups_0, pad = x_389_pad_0, pad_type = x_389_pad_type_0, strides = x_389_strides_0, weight = encoder_module_layers_16_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_893_cast_fp16)[name = tensor<string, []>("x_389_cast_fp16")];
+            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_16_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_16_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(413525568))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(414575296))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(414574208)))];
+            tensor<fp16, [1, 1024, 188]> x_389_cast_fp16 = conv(dilations = x_389_dilations_0, groups = x_389_groups_0, pad = x_389_pad_0, pad_type = x_389_pad_type_0, strides = x_389_strides_0, weight = encoder_module_layers_16_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_893_cast_fp16)[name = tensor<string, []>("x_389_cast_fp16")];
             tensor<int32, [3]> input_895_perm_0 = const()[name = tensor<string, []>("input_895_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<fp16, [1, 188, 1024]> input_895_cast_fp16 = transpose(perm = input_895_perm_0, x = x_389_cast_fp16)[name = tensor<string, []>("transpose_194")];
             tensor<fp16, [1, 188, 1024]> input_897_cast_fp16 = add(x = input_879_cast_fp16, y = input_895_cast_fp16)[name = tensor<string, []>("input_897_cast_fp16")];
             tensor<int32, [1]> input_899_axes_0 = const()[name = tensor<string, []>("input_899_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_16_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_16_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(413719168)))];
-            tensor<fp16, [1024]> encoder_module_layers_16_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_16_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(413721280)))];
+            tensor<fp16, [1024]> encoder_module_layers_16_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_16_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(414577408)))];
+            tensor<fp16, [1024]> encoder_module_layers_16_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_16_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(414579520)))];
             tensor<fp16, [1, 188, 1024]> input_899_cast_fp16 = layer_norm(axes = input_899_axes_0, beta = encoder_module_layers_16_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_16_norm_feed_forward2_weight_to_fp16, x = input_897_cast_fp16)[name = tensor<string, []>("input_899_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_16_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(413723392))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(417917760))), name = tensor<string, []>("encoder_module_layers_16_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_152_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_16_feed_forward2_linear1_weight_to_fp16_palettized, x = input_899_cast_fp16)[name = tensor<string, []>("linear_152_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_16_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_16_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(414581632))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(418780160))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(418776000)))];
+            tensor<fp16, [1, 188, 4096]> linear_152_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_16_feed_forward2_linear1_weight_to_fp16_quantized, x = input_899_cast_fp16)[name = tensor<string, []>("linear_152_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_903_cast_fp16 = silu(x = linear_152_cast_fp16)[name = tensor<string, []>("input_903_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_16_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(417918336))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(422112704))), name = tensor<string, []>("encoder_module_layers_16_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_153_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_16_feed_forward2_linear2_weight_to_fp16_palettized, x = input_903_cast_fp16)[name = tensor<string, []>("linear_153_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_16_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_16_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(418788416))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(422983872))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(422982784)))];
+            tensor<fp16, [1, 188, 1024]> linear_153_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_16_feed_forward2_linear2_weight_to_fp16_quantized, x = input_903_cast_fp16)[name = tensor<string, []>("linear_153_cast_fp16")];
             tensor<fp16, []> var_3103_to_fp16 = const()[name = tensor<string, []>("op_3103_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_3104_cast_fp16 = mul(x = linear_153_cast_fp16, y = var_3103_to_fp16)[name = tensor<string, []>("op_3104_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_909_cast_fp16 = add(x = input_897_cast_fp16, y = var_3104_cast_fp16)[name = tensor<string, []>("input_909_cast_fp16")];
             tensor<int32, [1]> input_911_axes_0 = const()[name = tensor<string, []>("input_911_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_16_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_16_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(422113280)))];
-            tensor<fp16, [1024]> encoder_module_layers_16_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_16_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(422115392)))];
+            tensor<fp16, [1024]> encoder_module_layers_16_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_16_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(422985984)))];
+            tensor<fp16, [1024]> encoder_module_layers_16_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_16_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(422988096)))];
             tensor<fp16, [1, 188, 1024]> input_911_cast_fp16 = layer_norm(axes = input_911_axes_0, beta = encoder_module_layers_16_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_16_norm_out_weight_to_fp16, x = input_909_cast_fp16)[name = tensor<string, []>("input_911_cast_fp16")];
             tensor<int32, [1]> input_913_axes_0 = const()[name = tensor<string, []>("input_913_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_17_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_17_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(422117504)))];
-            tensor<fp16, [1024]> encoder_module_layers_17_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_17_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(422119616)))];
+            tensor<fp16, [1024]> encoder_module_layers_17_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_17_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(422990208)))];
+            tensor<fp16, [1024]> encoder_module_layers_17_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_17_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(422992320)))];
             tensor<fp16, [1, 188, 1024]> input_913_cast_fp16 = layer_norm(axes = input_913_axes_0, beta = encoder_module_layers_17_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_17_norm_feed_forward1_weight_to_fp16, x = input_911_cast_fp16)[name = tensor<string, []>("input_913_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_17_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(422121728))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(426316096))), name = tensor<string, []>("encoder_module_layers_17_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_154_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_17_feed_forward1_linear1_weight_to_fp16_palettized, x = input_913_cast_fp16)[name = tensor<string, []>("linear_154_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_17_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_17_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(422994432))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(427192960))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(427188800)))];
+            tensor<fp16, [1, 188, 4096]> linear_154_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_17_feed_forward1_linear1_weight_to_fp16_quantized, x = input_913_cast_fp16)[name = tensor<string, []>("linear_154_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_917_cast_fp16 = silu(x = linear_154_cast_fp16)[name = tensor<string, []>("input_917_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_17_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(426316672))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(430511040))), name = tensor<string, []>("encoder_module_layers_17_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_155_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_17_feed_forward1_linear2_weight_to_fp16_palettized, x = input_917_cast_fp16)[name = tensor<string, []>("linear_155_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_17_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_17_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(427201216))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(431396672))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(431395584)))];
+            tensor<fp16, [1, 188, 1024]> linear_155_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_17_feed_forward1_linear2_weight_to_fp16_quantized, x = input_917_cast_fp16)[name = tensor<string, []>("linear_155_cast_fp16")];
             tensor<fp16, []> var_3132_to_fp16 = const()[name = tensor<string, []>("op_3132_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_3133_cast_fp16 = mul(x = linear_155_cast_fp16, y = var_3132_to_fp16)[name = tensor<string, []>("op_3133_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_923_cast_fp16 = add(x = input_911_cast_fp16, y = var_3133_cast_fp16)[name = tensor<string, []>("input_923_cast_fp16")];
             tensor<int32, [1]> query_35_axes_0 = const()[name = tensor<string, []>("query_35_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_17_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_17_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(430511616)))];
-            tensor<fp16, [1024]> encoder_module_layers_17_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_17_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(430513728)))];
+            tensor<fp16, [1024]> encoder_module_layers_17_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_17_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(431398784)))];
+            tensor<fp16, [1024]> encoder_module_layers_17_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_17_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(431400896)))];
             tensor<fp16, [1, 188, 1024]> query_35_cast_fp16 = layer_norm(axes = query_35_axes_0, beta = encoder_module_layers_17_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_17_norm_self_att_weight_to_fp16, x = input_923_cast_fp16)[name = tensor<string, []>("query_35_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_17_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(430515840))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(431564480))), name = tensor<string, []>("encoder_module_layers_17_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_156_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_17_self_attn_linear_q_weight_to_fp16_palettized, x = query_35_cast_fp16)[name = tensor<string, []>("linear_156_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_17_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_17_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(431403008))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(432452736))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(432451648)))];
+            tensor<fp16, [1, 188, 1024]> linear_156_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_17_self_attn_linear_q_weight_to_fp16_quantized, x = query_35_cast_fp16)[name = tensor<string, []>("linear_156_cast_fp16")];
             tensor<int32, [4]> var_3149 = const()[name = tensor<string, []>("op_3149"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> q_103_cast_fp16 = reshape(shape = var_3149, x = linear_156_cast_fp16)[name = tensor<string, []>("q_103_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_17_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(431565056))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(432613696))), name = tensor<string, []>("encoder_module_layers_17_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_157_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_17_self_attn_linear_k_weight_to_fp16_palettized, x = query_35_cast_fp16)[name = tensor<string, []>("linear_157_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_17_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_17_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(432454848))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(433504576))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(433503488)))];
+            tensor<fp16, [1, 188, 1024]> linear_157_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_17_self_attn_linear_k_weight_to_fp16_quantized, x = query_35_cast_fp16)[name = tensor<string, []>("linear_157_cast_fp16")];
             tensor<int32, [4]> var_3153 = const()[name = tensor<string, []>("op_3153"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> k_69_cast_fp16 = reshape(shape = var_3153, x = linear_157_cast_fp16)[name = tensor<string, []>("k_69_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_17_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(432614272))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(433662912))), name = tensor<string, []>("encoder_module_layers_17_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_158_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_17_self_attn_linear_v_weight_to_fp16_palettized, x = query_35_cast_fp16)[name = tensor<string, []>("linear_158_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_17_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_17_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(433506688))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(434556416))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(434555328)))];
+            tensor<fp16, [1, 188, 1024]> linear_158_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_17_self_attn_linear_v_weight_to_fp16_quantized, x = query_35_cast_fp16)[name = tensor<string, []>("linear_158_cast_fp16")];
             tensor<int32, [4]> var_3157 = const()[name = tensor<string, []>("op_3157"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> v_35_cast_fp16 = reshape(shape = var_3157, x = linear_158_cast_fp16)[name = tensor<string, []>("v_35_cast_fp16")];
             tensor<int32, [4]> value_39_perm_0 = const()[name = tensor<string, []>("value_39_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
-            tensor<fp16, [8, 128]> encoder_module_layers_17_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_17_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(433663488)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_17_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_17_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(434558528)))];
             tensor<fp16, [1, 188, 8, 128]> var_3169_cast_fp16 = add(x = q_103_cast_fp16, y = encoder_module_layers_17_self_attn_pos_bias_u_to_fp16)[name = tensor<string, []>("op_3169_cast_fp16")];
-            tensor<fp16, [8, 128]> encoder_module_layers_17_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_17_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(433665600)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_17_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_17_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(434560640)))];
             tensor<fp16, [1, 188, 8, 128]> var_3171_cast_fp16 = add(x = q_103_cast_fp16, y = encoder_module_layers_17_self_attn_pos_bias_v_to_fp16)[name = tensor<string, []>("op_3171_cast_fp16")];
             tensor<int32, [4]> q_with_bias_v_35_perm_0 = const()[name = tensor<string, []>("q_with_bias_v_35_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
             tensor<bool, []> x_397_transpose_x_0 = const()[name = tensor<string, []>("x_397_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> x_397_transpose_y_0 = const()[name = tensor<string, []>("x_397_transpose_y_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 8, 128, 375]> op_3173_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [384000]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(433667712))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(434051776))), name = tensor<string, []>("op_3173_to_fp16_palettized"), shape = tensor<uint32, [4]>([1, 8, 128, 375])];
+            tensor<fp16, [1, 8, 128, 375]> op_3173_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(3), name = tensor<string, []>("op_3173_to_fp16_quantized"), quantized_data = tensor<int8, [1, 8, 128, 375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(434562752))), scale = tensor<fp16, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(434947264))), zero_point = tensor<int8, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(434946816)))];
             tensor<fp16, [1, 8, 188, 128]> q_with_bias_v_35_cast_fp16 = transpose(perm = q_with_bias_v_35_perm_0, x = var_3171_cast_fp16)[name = tensor<string, []>("transpose_193")];
-            tensor<fp16, [1, 8, 188, 375]> x_397_cast_fp16 = matmul(transpose_x = x_397_transpose_x_0, transpose_y = x_397_transpose_y_0, x = q_with_bias_v_35_cast_fp16, y = op_3173_to_fp16_palettized)[name = tensor<string, []>("x_397_cast_fp16")];
+            tensor<fp16, [1, 8, 188, 375]> x_397_cast_fp16 = matmul(transpose_x = x_397_transpose_x_0, transpose_y = x_397_transpose_y_0, x = q_with_bias_v_35_cast_fp16, y = op_3173_to_fp16_quantized)[name = tensor<string, []>("x_397_cast_fp16")];
             tensor<int32, [8]> x_399_pad_0 = const()[name = tensor<string, []>("x_399_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 1, 0])];
             tensor<string, []> x_399_mode_0 = const()[name = tensor<string, []>("x_399_mode_0"), val = tensor<string, []>("constant")];
             tensor<fp16, []> const_199_to_fp16 = const()[name = tensor<string, []>("const_199_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
@@ -2628,12 +2628,12 @@ program(1.0)
             tensor<int32, [3]> var_3206 = const()[name = tensor<string, []>("op_3206"), val = tensor<int32, [3]>([1, -1, 1024])];
             tensor<fp16, [1, 188, 8, 128]> var_3205_cast_fp16 = transpose(perm = var_3205_perm_0, x = x_403_cast_fp16)[name = tensor<string, []>("transpose_189")];
             tensor<fp16, [1, 188, 1024]> input_927_cast_fp16 = reshape(shape = var_3206, x = var_3205_cast_fp16)[name = tensor<string, []>("input_927_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_17_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(434052352))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(435100992))), name = tensor<string, []>("encoder_module_layers_17_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_160_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_17_self_attn_linear_out_weight_to_fp16_palettized, x = input_927_cast_fp16)[name = tensor<string, []>("linear_160_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_17_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_17_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(434948096))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(435997824))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(435996736)))];
+            tensor<fp16, [1, 188, 1024]> linear_160_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_17_self_attn_linear_out_weight_to_fp16_quantized, x = input_927_cast_fp16)[name = tensor<string, []>("linear_160_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_931_cast_fp16 = add(x = input_923_cast_fp16, y = linear_160_cast_fp16)[name = tensor<string, []>("input_931_cast_fp16")];
             tensor<int32, [1]> x_407_axes_0 = const()[name = tensor<string, []>("x_407_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_17_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_17_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(435101568)))];
-            tensor<fp16, [1024]> encoder_module_layers_17_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_17_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(435103680)))];
+            tensor<fp16, [1024]> encoder_module_layers_17_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_17_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(435999936)))];
+            tensor<fp16, [1024]> encoder_module_layers_17_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_17_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(436002048)))];
             tensor<fp16, [1, 188, 1024]> x_407_cast_fp16 = layer_norm(axes = x_407_axes_0, beta = encoder_module_layers_17_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_17_norm_conv_weight_to_fp16, x = input_931_cast_fp16)[name = tensor<string, []>("x_407_cast_fp16")];
             tensor<int32, [3]> input_933_perm_0 = const()[name = tensor<string, []>("input_933_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<string, []> input_935_pad_type_0 = const()[name = tensor<string, []>("input_935_pad_type_0"), val = tensor<string, []>("valid")];
@@ -2641,9 +2641,9 @@ program(1.0)
             tensor<int32, [2]> input_935_pad_0 = const()[name = tensor<string, []>("input_935_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_935_dilations_0 = const()[name = tensor<string, []>("input_935_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> input_935_groups_0 = const()[name = tensor<string, []>("input_935_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_17_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(435105792))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(437203008))), name = tensor<string, []>("encoder_module_layers_17_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([2048, 1024, 1])];
+            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_17_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_17_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(436004160))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(438103488))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(438101376)))];
             tensor<fp16, [1, 1024, 188]> input_933_cast_fp16 = transpose(perm = input_933_perm_0, x = x_407_cast_fp16)[name = tensor<string, []>("transpose_188")];
-            tensor<fp16, [1, 2048, 188]> input_935_cast_fp16 = conv(dilations = input_935_dilations_0, groups = input_935_groups_0, pad = input_935_pad_0, pad_type = input_935_pad_type_0, strides = input_935_strides_0, weight = encoder_module_layers_17_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_933_cast_fp16)[name = tensor<string, []>("input_935_cast_fp16")];
+            tensor<fp16, [1, 2048, 188]> input_935_cast_fp16 = conv(dilations = input_935_dilations_0, groups = input_935_groups_0, pad = input_935_pad_0, pad_type = input_935_pad_type_0, strides = input_935_strides_0, weight = encoder_module_layers_17_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_933_cast_fp16)[name = tensor<string, []>("input_935_cast_fp16")];
             tensor<int32, []> x_409_split_num_splits_0 = const()[name = tensor<string, []>("x_409_split_num_splits_0"), val = tensor<int32, []>(2)];
             tensor<int32, []> x_409_split_axis_0 = const()[name = tensor<string, []>("x_409_split_axis_0"), val = tensor<int32, []>(1)];
             tensor<fp16, [1, 1024, 188]> x_409_split_cast_fp16_0, tensor<fp16, [1, 1024, 188]> x_409_split_cast_fp16_1 = split(axis = x_409_split_axis_0, num_splits = x_409_split_num_splits_0, x = input_935_cast_fp16)[name = tensor<string, []>("x_409_split_cast_fp16")];
@@ -2659,75 +2659,75 @@ program(1.0)
             tensor<int32, [1]> input_941_strides_0 = const()[name = tensor<string, []>("input_941_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> input_941_pad_0 = const()[name = tensor<string, []>("input_941_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_941_dilations_0 = const()[name = tensor<string, []>("input_941_dilations_0"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1024, 1, 9]> const_297_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [9216]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(437203584))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(437212864))), name = tensor<string, []>("const_297_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1, 9])];
-            tensor<fp16, [1024]> const_298_to_fp16 = const()[name = tensor<string, []>("const_298_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(437213440)))];
-            tensor<fp16, [1, 1024, 188]> input_943_cast_fp16 = conv(bias = const_298_to_fp16, dilations = input_941_dilations_0, groups = input_941_groups_0, pad = input_941_pad_0, pad_type = input_941_pad_type_0, strides = input_941_strides_0, weight = const_297_to_fp16_palettized, x = input_939_cast_fp16)[name = tensor<string, []>("input_943_cast_fp16")];
+            tensor<fp16, [1024, 1, 9]> const_297_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("const_297_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1, 9]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(438107648))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(438118016))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(438116928)))];
+            tensor<fp16, [1024]> const_298_to_fp16 = const()[name = tensor<string, []>("const_298_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(438120128)))];
+            tensor<fp16, [1, 1024, 188]> input_943_cast_fp16 = conv(bias = const_298_to_fp16, dilations = input_941_dilations_0, groups = input_941_groups_0, pad = input_941_pad_0, pad_type = input_941_pad_type_0, strides = input_941_strides_0, weight = const_297_to_fp16_quantized, x = input_939_cast_fp16)[name = tensor<string, []>("input_943_cast_fp16")];
             tensor<fp16, [1, 1024, 188]> input_945_cast_fp16 = silu(x = input_943_cast_fp16)[name = tensor<string, []>("input_945_cast_fp16")];
             tensor<string, []> x_411_pad_type_0 = const()[name = tensor<string, []>("x_411_pad_type_0"), val = tensor<string, []>("valid")];
             tensor<int32, [1]> x_411_strides_0 = const()[name = tensor<string, []>("x_411_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> x_411_pad_0 = const()[name = tensor<string, []>("x_411_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> x_411_dilations_0 = const()[name = tensor<string, []>("x_411_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> x_411_groups_0 = const()[name = tensor<string, []>("x_411_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_17_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(437215552))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(438264192))), name = tensor<string, []>("encoder_module_layers_17_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1024, 1])];
-            tensor<fp16, [1, 1024, 188]> x_411_cast_fp16 = conv(dilations = x_411_dilations_0, groups = x_411_groups_0, pad = x_411_pad_0, pad_type = x_411_pad_type_0, strides = x_411_strides_0, weight = encoder_module_layers_17_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_945_cast_fp16)[name = tensor<string, []>("x_411_cast_fp16")];
+            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_17_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_17_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(438122240))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(439171968))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(439170880)))];
+            tensor<fp16, [1, 1024, 188]> x_411_cast_fp16 = conv(dilations = x_411_dilations_0, groups = x_411_groups_0, pad = x_411_pad_0, pad_type = x_411_pad_type_0, strides = x_411_strides_0, weight = encoder_module_layers_17_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_945_cast_fp16)[name = tensor<string, []>("x_411_cast_fp16")];
             tensor<int32, [3]> input_947_perm_0 = const()[name = tensor<string, []>("input_947_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<fp16, [1, 188, 1024]> input_947_cast_fp16 = transpose(perm = input_947_perm_0, x = x_411_cast_fp16)[name = tensor<string, []>("transpose_187")];
             tensor<fp16, [1, 188, 1024]> input_949_cast_fp16 = add(x = input_931_cast_fp16, y = input_947_cast_fp16)[name = tensor<string, []>("input_949_cast_fp16")];
             tensor<int32, [1]> input_951_axes_0 = const()[name = tensor<string, []>("input_951_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_17_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_17_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(438264768)))];
-            tensor<fp16, [1024]> encoder_module_layers_17_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_17_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(438266880)))];
+            tensor<fp16, [1024]> encoder_module_layers_17_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_17_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(439174080)))];
+            tensor<fp16, [1024]> encoder_module_layers_17_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_17_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(439176192)))];
             tensor<fp16, [1, 188, 1024]> input_951_cast_fp16 = layer_norm(axes = input_951_axes_0, beta = encoder_module_layers_17_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_17_norm_feed_forward2_weight_to_fp16, x = input_949_cast_fp16)[name = tensor<string, []>("input_951_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_17_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(438268992))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(442463360))), name = tensor<string, []>("encoder_module_layers_17_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_161_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_17_feed_forward2_linear1_weight_to_fp16_palettized, x = input_951_cast_fp16)[name = tensor<string, []>("linear_161_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_17_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_17_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(439178304))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(443376832))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(443372672)))];
+            tensor<fp16, [1, 188, 4096]> linear_161_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_17_feed_forward2_linear1_weight_to_fp16_quantized, x = input_951_cast_fp16)[name = tensor<string, []>("linear_161_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_955_cast_fp16 = silu(x = linear_161_cast_fp16)[name = tensor<string, []>("input_955_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_17_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(442463936))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(446658304))), name = tensor<string, []>("encoder_module_layers_17_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_162_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_17_feed_forward2_linear2_weight_to_fp16_palettized, x = input_955_cast_fp16)[name = tensor<string, []>("linear_162_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_17_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_17_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(443385088))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(447580544))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(447579456)))];
+            tensor<fp16, [1, 188, 1024]> linear_162_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_17_feed_forward2_linear2_weight_to_fp16_quantized, x = input_955_cast_fp16)[name = tensor<string, []>("linear_162_cast_fp16")];
             tensor<fp16, []> var_3266_to_fp16 = const()[name = tensor<string, []>("op_3266_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_3267_cast_fp16 = mul(x = linear_162_cast_fp16, y = var_3266_to_fp16)[name = tensor<string, []>("op_3267_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_961_cast_fp16 = add(x = input_949_cast_fp16, y = var_3267_cast_fp16)[name = tensor<string, []>("input_961_cast_fp16")];
             tensor<int32, [1]> input_963_axes_0 = const()[name = tensor<string, []>("input_963_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_17_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_17_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(446658880)))];
-            tensor<fp16, [1024]> encoder_module_layers_17_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_17_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(446660992)))];
+            tensor<fp16, [1024]> encoder_module_layers_17_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_17_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(447582656)))];
+            tensor<fp16, [1024]> encoder_module_layers_17_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_17_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(447584768)))];
             tensor<fp16, [1, 188, 1024]> input_963_cast_fp16 = layer_norm(axes = input_963_axes_0, beta = encoder_module_layers_17_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_17_norm_out_weight_to_fp16, x = input_961_cast_fp16)[name = tensor<string, []>("input_963_cast_fp16")];
             tensor<int32, [1]> input_965_axes_0 = const()[name = tensor<string, []>("input_965_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_18_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_18_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(446663104)))];
-            tensor<fp16, [1024]> encoder_module_layers_18_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_18_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(446665216)))];
+            tensor<fp16, [1024]> encoder_module_layers_18_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_18_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(447586880)))];
+            tensor<fp16, [1024]> encoder_module_layers_18_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_18_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(447588992)))];
             tensor<fp16, [1, 188, 1024]> input_965_cast_fp16 = layer_norm(axes = input_965_axes_0, beta = encoder_module_layers_18_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_18_norm_feed_forward1_weight_to_fp16, x = input_963_cast_fp16)[name = tensor<string, []>("input_965_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_18_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(446667328))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(450861696))), name = tensor<string, []>("encoder_module_layers_18_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_163_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_18_feed_forward1_linear1_weight_to_fp16_palettized, x = input_965_cast_fp16)[name = tensor<string, []>("linear_163_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_18_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_18_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(447591104))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(451789632))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(451785472)))];
+            tensor<fp16, [1, 188, 4096]> linear_163_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_18_feed_forward1_linear1_weight_to_fp16_quantized, x = input_965_cast_fp16)[name = tensor<string, []>("linear_163_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_969_cast_fp16 = silu(x = linear_163_cast_fp16)[name = tensor<string, []>("input_969_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_18_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(450862272))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(455056640))), name = tensor<string, []>("encoder_module_layers_18_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_164_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_18_feed_forward1_linear2_weight_to_fp16_palettized, x = input_969_cast_fp16)[name = tensor<string, []>("linear_164_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_18_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_18_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(451797888))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(455993344))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(455992256)))];
+            tensor<fp16, [1, 188, 1024]> linear_164_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_18_feed_forward1_linear2_weight_to_fp16_quantized, x = input_969_cast_fp16)[name = tensor<string, []>("linear_164_cast_fp16")];
             tensor<fp16, []> var_3295_to_fp16 = const()[name = tensor<string, []>("op_3295_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_3296_cast_fp16 = mul(x = linear_164_cast_fp16, y = var_3295_to_fp16)[name = tensor<string, []>("op_3296_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_975_cast_fp16 = add(x = input_963_cast_fp16, y = var_3296_cast_fp16)[name = tensor<string, []>("input_975_cast_fp16")];
             tensor<int32, [1]> query_37_axes_0 = const()[name = tensor<string, []>("query_37_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_18_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_18_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(455057216)))];
-            tensor<fp16, [1024]> encoder_module_layers_18_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_18_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(455059328)))];
+            tensor<fp16, [1024]> encoder_module_layers_18_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_18_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(455995456)))];
+            tensor<fp16, [1024]> encoder_module_layers_18_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_18_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(455997568)))];
             tensor<fp16, [1, 188, 1024]> query_37_cast_fp16 = layer_norm(axes = query_37_axes_0, beta = encoder_module_layers_18_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_18_norm_self_att_weight_to_fp16, x = input_975_cast_fp16)[name = tensor<string, []>("query_37_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_18_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(455061440))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(456110080))), name = tensor<string, []>("encoder_module_layers_18_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_165_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_18_self_attn_linear_q_weight_to_fp16_palettized, x = query_37_cast_fp16)[name = tensor<string, []>("linear_165_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_18_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_18_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(455999680))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(457049408))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(457048320)))];
+            tensor<fp16, [1, 188, 1024]> linear_165_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_18_self_attn_linear_q_weight_to_fp16_quantized, x = query_37_cast_fp16)[name = tensor<string, []>("linear_165_cast_fp16")];
             tensor<int32, [4]> var_3312 = const()[name = tensor<string, []>("op_3312"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> q_109_cast_fp16 = reshape(shape = var_3312, x = linear_165_cast_fp16)[name = tensor<string, []>("q_109_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_18_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(456110656))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(457159296))), name = tensor<string, []>("encoder_module_layers_18_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_166_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_18_self_attn_linear_k_weight_to_fp16_palettized, x = query_37_cast_fp16)[name = tensor<string, []>("linear_166_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_18_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_18_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(457051520))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(458101248))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(458100160)))];
+            tensor<fp16, [1, 188, 1024]> linear_166_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_18_self_attn_linear_k_weight_to_fp16_quantized, x = query_37_cast_fp16)[name = tensor<string, []>("linear_166_cast_fp16")];
             tensor<int32, [4]> var_3316 = const()[name = tensor<string, []>("op_3316"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> k_73_cast_fp16 = reshape(shape = var_3316, x = linear_166_cast_fp16)[name = tensor<string, []>("k_73_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_18_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(457159872))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(458208512))), name = tensor<string, []>("encoder_module_layers_18_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_167_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_18_self_attn_linear_v_weight_to_fp16_palettized, x = query_37_cast_fp16)[name = tensor<string, []>("linear_167_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_18_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_18_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(458103360))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(459153088))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(459152000)))];
+            tensor<fp16, [1, 188, 1024]> linear_167_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_18_self_attn_linear_v_weight_to_fp16_quantized, x = query_37_cast_fp16)[name = tensor<string, []>("linear_167_cast_fp16")];
             tensor<int32, [4]> var_3320 = const()[name = tensor<string, []>("op_3320"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> v_37_cast_fp16 = reshape(shape = var_3320, x = linear_167_cast_fp16)[name = tensor<string, []>("v_37_cast_fp16")];
             tensor<int32, [4]> value_41_perm_0 = const()[name = tensor<string, []>("value_41_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
-            tensor<fp16, [8, 128]> encoder_module_layers_18_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_18_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(458209088)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_18_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_18_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(459155200)))];
             tensor<fp16, [1, 188, 8, 128]> var_3332_cast_fp16 = add(x = q_109_cast_fp16, y = encoder_module_layers_18_self_attn_pos_bias_u_to_fp16)[name = tensor<string, []>("op_3332_cast_fp16")];
-            tensor<fp16, [8, 128]> encoder_module_layers_18_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_18_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(458211200)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_18_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_18_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(459157312)))];
             tensor<fp16, [1, 188, 8, 128]> var_3334_cast_fp16 = add(x = q_109_cast_fp16, y = encoder_module_layers_18_self_attn_pos_bias_v_to_fp16)[name = tensor<string, []>("op_3334_cast_fp16")];
             tensor<int32, [4]> q_with_bias_v_37_perm_0 = const()[name = tensor<string, []>("q_with_bias_v_37_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
             tensor<bool, []> x_419_transpose_x_0 = const()[name = tensor<string, []>("x_419_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> x_419_transpose_y_0 = const()[name = tensor<string, []>("x_419_transpose_y_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 8, 128, 375]> op_3336_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [384000]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(458213312))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(458597376))), name = tensor<string, []>("op_3336_to_fp16_palettized"), shape = tensor<uint32, [4]>([1, 8, 128, 375])];
+            tensor<fp16, [1, 8, 128, 375]> op_3336_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(3), name = tensor<string, []>("op_3336_to_fp16_quantized"), quantized_data = tensor<int8, [1, 8, 128, 375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(459159424))), scale = tensor<fp16, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(459543936))), zero_point = tensor<int8, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(459543488)))];
             tensor<fp16, [1, 8, 188, 128]> q_with_bias_v_37_cast_fp16 = transpose(perm = q_with_bias_v_37_perm_0, x = var_3334_cast_fp16)[name = tensor<string, []>("transpose_186")];
-            tensor<fp16, [1, 8, 188, 375]> x_419_cast_fp16 = matmul(transpose_x = x_419_transpose_x_0, transpose_y = x_419_transpose_y_0, x = q_with_bias_v_37_cast_fp16, y = op_3336_to_fp16_palettized)[name = tensor<string, []>("x_419_cast_fp16")];
+            tensor<fp16, [1, 8, 188, 375]> x_419_cast_fp16 = matmul(transpose_x = x_419_transpose_x_0, transpose_y = x_419_transpose_y_0, x = q_with_bias_v_37_cast_fp16, y = op_3336_to_fp16_quantized)[name = tensor<string, []>("x_419_cast_fp16")];
             tensor<int32, [8]> x_421_pad_0 = const()[name = tensor<string, []>("x_421_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 1, 0])];
             tensor<string, []> x_421_mode_0 = const()[name = tensor<string, []>("x_421_mode_0"), val = tensor<string, []>("constant")];
             tensor<fp16, []> const_209_to_fp16 = const()[name = tensor<string, []>("const_209_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
@@ -2765,12 +2765,12 @@ program(1.0)
             tensor<int32, [3]> var_3369 = const()[name = tensor<string, []>("op_3369"), val = tensor<int32, [3]>([1, -1, 1024])];
             tensor<fp16, [1, 188, 8, 128]> var_3368_cast_fp16 = transpose(perm = var_3368_perm_0, x = x_425_cast_fp16)[name = tensor<string, []>("transpose_182")];
             tensor<fp16, [1, 188, 1024]> input_979_cast_fp16 = reshape(shape = var_3369, x = var_3368_cast_fp16)[name = tensor<string, []>("input_979_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_18_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(458597952))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(459646592))), name = tensor<string, []>("encoder_module_layers_18_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_169_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_18_self_attn_linear_out_weight_to_fp16_palettized, x = input_979_cast_fp16)[name = tensor<string, []>("linear_169_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_18_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_18_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(459544768))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(460594496))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(460593408)))];
+            tensor<fp16, [1, 188, 1024]> linear_169_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_18_self_attn_linear_out_weight_to_fp16_quantized, x = input_979_cast_fp16)[name = tensor<string, []>("linear_169_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_983_cast_fp16 = add(x = input_975_cast_fp16, y = linear_169_cast_fp16)[name = tensor<string, []>("input_983_cast_fp16")];
             tensor<int32, [1]> x_429_axes_0 = const()[name = tensor<string, []>("x_429_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_18_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_18_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(459647168)))];
-            tensor<fp16, [1024]> encoder_module_layers_18_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_18_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(459649280)))];
+            tensor<fp16, [1024]> encoder_module_layers_18_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_18_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(460596608)))];
+            tensor<fp16, [1024]> encoder_module_layers_18_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_18_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(460598720)))];
             tensor<fp16, [1, 188, 1024]> x_429_cast_fp16 = layer_norm(axes = x_429_axes_0, beta = encoder_module_layers_18_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_18_norm_conv_weight_to_fp16, x = input_983_cast_fp16)[name = tensor<string, []>("x_429_cast_fp16")];
             tensor<int32, [3]> input_985_perm_0 = const()[name = tensor<string, []>("input_985_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<string, []> input_987_pad_type_0 = const()[name = tensor<string, []>("input_987_pad_type_0"), val = tensor<string, []>("valid")];
@@ -2778,9 +2778,9 @@ program(1.0)
             tensor<int32, [2]> input_987_pad_0 = const()[name = tensor<string, []>("input_987_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_987_dilations_0 = const()[name = tensor<string, []>("input_987_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> input_987_groups_0 = const()[name = tensor<string, []>("input_987_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_18_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(459651392))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(461748608))), name = tensor<string, []>("encoder_module_layers_18_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([2048, 1024, 1])];
+            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_18_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_18_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(460600832))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(462700160))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(462698048)))];
             tensor<fp16, [1, 1024, 188]> input_985_cast_fp16 = transpose(perm = input_985_perm_0, x = x_429_cast_fp16)[name = tensor<string, []>("transpose_181")];
-            tensor<fp16, [1, 2048, 188]> input_987_cast_fp16 = conv(dilations = input_987_dilations_0, groups = input_987_groups_0, pad = input_987_pad_0, pad_type = input_987_pad_type_0, strides = input_987_strides_0, weight = encoder_module_layers_18_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_985_cast_fp16)[name = tensor<string, []>("input_987_cast_fp16")];
+            tensor<fp16, [1, 2048, 188]> input_987_cast_fp16 = conv(dilations = input_987_dilations_0, groups = input_987_groups_0, pad = input_987_pad_0, pad_type = input_987_pad_type_0, strides = input_987_strides_0, weight = encoder_module_layers_18_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_985_cast_fp16)[name = tensor<string, []>("input_987_cast_fp16")];
             tensor<int32, []> x_431_split_num_splits_0 = const()[name = tensor<string, []>("x_431_split_num_splits_0"), val = tensor<int32, []>(2)];
             tensor<int32, []> x_431_split_axis_0 = const()[name = tensor<string, []>("x_431_split_axis_0"), val = tensor<int32, []>(1)];
             tensor<fp16, [1, 1024, 188]> x_431_split_cast_fp16_0, tensor<fp16, [1, 1024, 188]> x_431_split_cast_fp16_1 = split(axis = x_431_split_axis_0, num_splits = x_431_split_num_splits_0, x = input_987_cast_fp16)[name = tensor<string, []>("x_431_split_cast_fp16")];
@@ -2796,75 +2796,75 @@ program(1.0)
             tensor<int32, [1]> input_993_strides_0 = const()[name = tensor<string, []>("input_993_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> input_993_pad_0 = const()[name = tensor<string, []>("input_993_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_993_dilations_0 = const()[name = tensor<string, []>("input_993_dilations_0"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1024, 1, 9]> const_299_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [9216]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(461749184))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(461758464))), name = tensor<string, []>("const_299_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1, 9])];
-            tensor<fp16, [1024]> const_300_to_fp16 = const()[name = tensor<string, []>("const_300_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(461759040)))];
-            tensor<fp16, [1, 1024, 188]> input_995_cast_fp16 = conv(bias = const_300_to_fp16, dilations = input_993_dilations_0, groups = input_993_groups_0, pad = input_993_pad_0, pad_type = input_993_pad_type_0, strides = input_993_strides_0, weight = const_299_to_fp16_palettized, x = input_991_cast_fp16)[name = tensor<string, []>("input_995_cast_fp16")];
+            tensor<fp16, [1024, 1, 9]> const_299_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("const_299_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1, 9]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(462704320))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(462714688))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(462713600)))];
+            tensor<fp16, [1024]> const_300_to_fp16 = const()[name = tensor<string, []>("const_300_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(462716800)))];
+            tensor<fp16, [1, 1024, 188]> input_995_cast_fp16 = conv(bias = const_300_to_fp16, dilations = input_993_dilations_0, groups = input_993_groups_0, pad = input_993_pad_0, pad_type = input_993_pad_type_0, strides = input_993_strides_0, weight = const_299_to_fp16_quantized, x = input_991_cast_fp16)[name = tensor<string, []>("input_995_cast_fp16")];
             tensor<fp16, [1, 1024, 188]> input_997_cast_fp16 = silu(x = input_995_cast_fp16)[name = tensor<string, []>("input_997_cast_fp16")];
             tensor<string, []> x_433_pad_type_0 = const()[name = tensor<string, []>("x_433_pad_type_0"), val = tensor<string, []>("valid")];
             tensor<int32, [1]> x_433_strides_0 = const()[name = tensor<string, []>("x_433_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> x_433_pad_0 = const()[name = tensor<string, []>("x_433_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> x_433_dilations_0 = const()[name = tensor<string, []>("x_433_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> x_433_groups_0 = const()[name = tensor<string, []>("x_433_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_18_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(461761152))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(462809792))), name = tensor<string, []>("encoder_module_layers_18_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1024, 1])];
-            tensor<fp16, [1, 1024, 188]> x_433_cast_fp16 = conv(dilations = x_433_dilations_0, groups = x_433_groups_0, pad = x_433_pad_0, pad_type = x_433_pad_type_0, strides = x_433_strides_0, weight = encoder_module_layers_18_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_997_cast_fp16)[name = tensor<string, []>("x_433_cast_fp16")];
+            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_18_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_18_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(462718912))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(463768640))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(463767552)))];
+            tensor<fp16, [1, 1024, 188]> x_433_cast_fp16 = conv(dilations = x_433_dilations_0, groups = x_433_groups_0, pad = x_433_pad_0, pad_type = x_433_pad_type_0, strides = x_433_strides_0, weight = encoder_module_layers_18_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_997_cast_fp16)[name = tensor<string, []>("x_433_cast_fp16")];
             tensor<int32, [3]> input_999_perm_0 = const()[name = tensor<string, []>("input_999_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<fp16, [1, 188, 1024]> input_999_cast_fp16 = transpose(perm = input_999_perm_0, x = x_433_cast_fp16)[name = tensor<string, []>("transpose_180")];
             tensor<fp16, [1, 188, 1024]> input_1001_cast_fp16 = add(x = input_983_cast_fp16, y = input_999_cast_fp16)[name = tensor<string, []>("input_1001_cast_fp16")];
             tensor<int32, [1]> input_1003_axes_0 = const()[name = tensor<string, []>("input_1003_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_18_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_18_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(462810368)))];
-            tensor<fp16, [1024]> encoder_module_layers_18_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_18_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(462812480)))];
+            tensor<fp16, [1024]> encoder_module_layers_18_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_18_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(463770752)))];
+            tensor<fp16, [1024]> encoder_module_layers_18_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_18_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(463772864)))];
             tensor<fp16, [1, 188, 1024]> input_1003_cast_fp16 = layer_norm(axes = input_1003_axes_0, beta = encoder_module_layers_18_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_18_norm_feed_forward2_weight_to_fp16, x = input_1001_cast_fp16)[name = tensor<string, []>("input_1003_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_18_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(462814592))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(467008960))), name = tensor<string, []>("encoder_module_layers_18_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_170_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_18_feed_forward2_linear1_weight_to_fp16_palettized, x = input_1003_cast_fp16)[name = tensor<string, []>("linear_170_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_18_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_18_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(463774976))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(467973504))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(467969344)))];
+            tensor<fp16, [1, 188, 4096]> linear_170_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_18_feed_forward2_linear1_weight_to_fp16_quantized, x = input_1003_cast_fp16)[name = tensor<string, []>("linear_170_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_1007_cast_fp16 = silu(x = linear_170_cast_fp16)[name = tensor<string, []>("input_1007_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_18_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(467009536))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(471203904))), name = tensor<string, []>("encoder_module_layers_18_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_171_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_18_feed_forward2_linear2_weight_to_fp16_palettized, x = input_1007_cast_fp16)[name = tensor<string, []>("linear_171_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_18_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_18_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(467981760))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(472177216))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(472176128)))];
+            tensor<fp16, [1, 188, 1024]> linear_171_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_18_feed_forward2_linear2_weight_to_fp16_quantized, x = input_1007_cast_fp16)[name = tensor<string, []>("linear_171_cast_fp16")];
             tensor<fp16, []> var_3429_to_fp16 = const()[name = tensor<string, []>("op_3429_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_3430_cast_fp16 = mul(x = linear_171_cast_fp16, y = var_3429_to_fp16)[name = tensor<string, []>("op_3430_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_1013_cast_fp16 = add(x = input_1001_cast_fp16, y = var_3430_cast_fp16)[name = tensor<string, []>("input_1013_cast_fp16")];
             tensor<int32, [1]> input_1015_axes_0 = const()[name = tensor<string, []>("input_1015_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_18_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_18_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(471204480)))];
-            tensor<fp16, [1024]> encoder_module_layers_18_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_18_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(471206592)))];
+            tensor<fp16, [1024]> encoder_module_layers_18_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_18_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(472179328)))];
+            tensor<fp16, [1024]> encoder_module_layers_18_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_18_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(472181440)))];
             tensor<fp16, [1, 188, 1024]> input_1015_cast_fp16 = layer_norm(axes = input_1015_axes_0, beta = encoder_module_layers_18_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_18_norm_out_weight_to_fp16, x = input_1013_cast_fp16)[name = tensor<string, []>("input_1015_cast_fp16")];
             tensor<int32, [1]> input_1017_axes_0 = const()[name = tensor<string, []>("input_1017_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_19_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_19_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(471208704)))];
-            tensor<fp16, [1024]> encoder_module_layers_19_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_19_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(471210816)))];
+            tensor<fp16, [1024]> encoder_module_layers_19_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_19_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(472183552)))];
+            tensor<fp16, [1024]> encoder_module_layers_19_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_19_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(472185664)))];
             tensor<fp16, [1, 188, 1024]> input_1017_cast_fp16 = layer_norm(axes = input_1017_axes_0, beta = encoder_module_layers_19_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_19_norm_feed_forward1_weight_to_fp16, x = input_1015_cast_fp16)[name = tensor<string, []>("input_1017_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_19_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(471212928))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(475407296))), name = tensor<string, []>("encoder_module_layers_19_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_172_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_19_feed_forward1_linear1_weight_to_fp16_palettized, x = input_1017_cast_fp16)[name = tensor<string, []>("linear_172_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_19_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_19_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(472187776))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(476386304))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(476382144)))];
+            tensor<fp16, [1, 188, 4096]> linear_172_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_19_feed_forward1_linear1_weight_to_fp16_quantized, x = input_1017_cast_fp16)[name = tensor<string, []>("linear_172_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_1021_cast_fp16 = silu(x = linear_172_cast_fp16)[name = tensor<string, []>("input_1021_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_19_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(475407872))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(479602240))), name = tensor<string, []>("encoder_module_layers_19_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_173_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_19_feed_forward1_linear2_weight_to_fp16_palettized, x = input_1021_cast_fp16)[name = tensor<string, []>("linear_173_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_19_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_19_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(476394560))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(480590016))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(480588928)))];
+            tensor<fp16, [1, 188, 1024]> linear_173_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_19_feed_forward1_linear2_weight_to_fp16_quantized, x = input_1021_cast_fp16)[name = tensor<string, []>("linear_173_cast_fp16")];
             tensor<fp16, []> var_3458_to_fp16 = const()[name = tensor<string, []>("op_3458_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_3459_cast_fp16 = mul(x = linear_173_cast_fp16, y = var_3458_to_fp16)[name = tensor<string, []>("op_3459_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_1027_cast_fp16 = add(x = input_1015_cast_fp16, y = var_3459_cast_fp16)[name = tensor<string, []>("input_1027_cast_fp16")];
             tensor<int32, [1]> query_39_axes_0 = const()[name = tensor<string, []>("query_39_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_19_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_19_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(479602816)))];
-            tensor<fp16, [1024]> encoder_module_layers_19_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_19_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(479604928)))];
+            tensor<fp16, [1024]> encoder_module_layers_19_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_19_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(480592128)))];
+            tensor<fp16, [1024]> encoder_module_layers_19_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_19_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(480594240)))];
             tensor<fp16, [1, 188, 1024]> query_39_cast_fp16 = layer_norm(axes = query_39_axes_0, beta = encoder_module_layers_19_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_19_norm_self_att_weight_to_fp16, x = input_1027_cast_fp16)[name = tensor<string, []>("query_39_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_19_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(479607040))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(480655680))), name = tensor<string, []>("encoder_module_layers_19_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_174_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_19_self_attn_linear_q_weight_to_fp16_palettized, x = query_39_cast_fp16)[name = tensor<string, []>("linear_174_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_19_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_19_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(480596352))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(481646080))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(481644992)))];
+            tensor<fp16, [1, 188, 1024]> linear_174_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_19_self_attn_linear_q_weight_to_fp16_quantized, x = query_39_cast_fp16)[name = tensor<string, []>("linear_174_cast_fp16")];
             tensor<int32, [4]> var_3475 = const()[name = tensor<string, []>("op_3475"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> q_115_cast_fp16 = reshape(shape = var_3475, x = linear_174_cast_fp16)[name = tensor<string, []>("q_115_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_19_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(480656256))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(481704896))), name = tensor<string, []>("encoder_module_layers_19_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_175_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_19_self_attn_linear_k_weight_to_fp16_palettized, x = query_39_cast_fp16)[name = tensor<string, []>("linear_175_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_19_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_19_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(481648192))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(482697920))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(482696832)))];
+            tensor<fp16, [1, 188, 1024]> linear_175_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_19_self_attn_linear_k_weight_to_fp16_quantized, x = query_39_cast_fp16)[name = tensor<string, []>("linear_175_cast_fp16")];
             tensor<int32, [4]> var_3479 = const()[name = tensor<string, []>("op_3479"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> k_77_cast_fp16 = reshape(shape = var_3479, x = linear_175_cast_fp16)[name = tensor<string, []>("k_77_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_19_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(481705472))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(482754112))), name = tensor<string, []>("encoder_module_layers_19_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_176_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_19_self_attn_linear_v_weight_to_fp16_palettized, x = query_39_cast_fp16)[name = tensor<string, []>("linear_176_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_19_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_19_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(482700032))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(483749760))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(483748672)))];
+            tensor<fp16, [1, 188, 1024]> linear_176_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_19_self_attn_linear_v_weight_to_fp16_quantized, x = query_39_cast_fp16)[name = tensor<string, []>("linear_176_cast_fp16")];
             tensor<int32, [4]> var_3483 = const()[name = tensor<string, []>("op_3483"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> v_39_cast_fp16 = reshape(shape = var_3483, x = linear_176_cast_fp16)[name = tensor<string, []>("v_39_cast_fp16")];
             tensor<int32, [4]> value_43_perm_0 = const()[name = tensor<string, []>("value_43_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
-            tensor<fp16, [8, 128]> encoder_module_layers_19_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_19_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(482754688)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_19_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_19_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(483751872)))];
             tensor<fp16, [1, 188, 8, 128]> var_3495_cast_fp16 = add(x = q_115_cast_fp16, y = encoder_module_layers_19_self_attn_pos_bias_u_to_fp16)[name = tensor<string, []>("op_3495_cast_fp16")];
-            tensor<fp16, [8, 128]> encoder_module_layers_19_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_19_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(482756800)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_19_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_19_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(483753984)))];
             tensor<fp16, [1, 188, 8, 128]> var_3497_cast_fp16 = add(x = q_115_cast_fp16, y = encoder_module_layers_19_self_attn_pos_bias_v_to_fp16)[name = tensor<string, []>("op_3497_cast_fp16")];
             tensor<int32, [4]> q_with_bias_v_39_perm_0 = const()[name = tensor<string, []>("q_with_bias_v_39_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
             tensor<bool, []> x_441_transpose_x_0 = const()[name = tensor<string, []>("x_441_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> x_441_transpose_y_0 = const()[name = tensor<string, []>("x_441_transpose_y_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 8, 128, 375]> op_3499_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [384000]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(482758912))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(483142976))), name = tensor<string, []>("op_3499_to_fp16_palettized"), shape = tensor<uint32, [4]>([1, 8, 128, 375])];
+            tensor<fp16, [1, 8, 128, 375]> op_3499_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(3), name = tensor<string, []>("op_3499_to_fp16_quantized"), quantized_data = tensor<int8, [1, 8, 128, 375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(483756096))), scale = tensor<fp16, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(484140608))), zero_point = tensor<int8, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(484140160)))];
             tensor<fp16, [1, 8, 188, 128]> q_with_bias_v_39_cast_fp16 = transpose(perm = q_with_bias_v_39_perm_0, x = var_3497_cast_fp16)[name = tensor<string, []>("transpose_179")];
-            tensor<fp16, [1, 8, 188, 375]> x_441_cast_fp16 = matmul(transpose_x = x_441_transpose_x_0, transpose_y = x_441_transpose_y_0, x = q_with_bias_v_39_cast_fp16, y = op_3499_to_fp16_palettized)[name = tensor<string, []>("x_441_cast_fp16")];
+            tensor<fp16, [1, 8, 188, 375]> x_441_cast_fp16 = matmul(transpose_x = x_441_transpose_x_0, transpose_y = x_441_transpose_y_0, x = q_with_bias_v_39_cast_fp16, y = op_3499_to_fp16_quantized)[name = tensor<string, []>("x_441_cast_fp16")];
             tensor<int32, [8]> x_443_pad_0 = const()[name = tensor<string, []>("x_443_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 1, 0])];
             tensor<string, []> x_443_mode_0 = const()[name = tensor<string, []>("x_443_mode_0"), val = tensor<string, []>("constant")];
             tensor<fp16, []> const_219_to_fp16 = const()[name = tensor<string, []>("const_219_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
@@ -2902,12 +2902,12 @@ program(1.0)
             tensor<int32, [3]> var_3532 = const()[name = tensor<string, []>("op_3532"), val = tensor<int32, [3]>([1, -1, 1024])];
             tensor<fp16, [1, 188, 8, 128]> var_3531_cast_fp16 = transpose(perm = var_3531_perm_0, x = x_447_cast_fp16)[name = tensor<string, []>("transpose_175")];
             tensor<fp16, [1, 188, 1024]> input_1031_cast_fp16 = reshape(shape = var_3532, x = var_3531_cast_fp16)[name = tensor<string, []>("input_1031_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_19_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(483143552))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(484192192))), name = tensor<string, []>("encoder_module_layers_19_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_178_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_19_self_attn_linear_out_weight_to_fp16_palettized, x = input_1031_cast_fp16)[name = tensor<string, []>("linear_178_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_19_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_19_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(484141440))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(485191168))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(485190080)))];
+            tensor<fp16, [1, 188, 1024]> linear_178_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_19_self_attn_linear_out_weight_to_fp16_quantized, x = input_1031_cast_fp16)[name = tensor<string, []>("linear_178_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_1035_cast_fp16 = add(x = input_1027_cast_fp16, y = linear_178_cast_fp16)[name = tensor<string, []>("input_1035_cast_fp16")];
             tensor<int32, [1]> x_451_axes_0 = const()[name = tensor<string, []>("x_451_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_19_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_19_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(484192768)))];
-            tensor<fp16, [1024]> encoder_module_layers_19_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_19_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(484194880)))];
+            tensor<fp16, [1024]> encoder_module_layers_19_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_19_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(485193280)))];
+            tensor<fp16, [1024]> encoder_module_layers_19_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_19_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(485195392)))];
             tensor<fp16, [1, 188, 1024]> x_451_cast_fp16 = layer_norm(axes = x_451_axes_0, beta = encoder_module_layers_19_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_19_norm_conv_weight_to_fp16, x = input_1035_cast_fp16)[name = tensor<string, []>("x_451_cast_fp16")];
             tensor<int32, [3]> input_1037_perm_0 = const()[name = tensor<string, []>("input_1037_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<string, []> input_1039_pad_type_0 = const()[name = tensor<string, []>("input_1039_pad_type_0"), val = tensor<string, []>("valid")];
@@ -2915,9 +2915,9 @@ program(1.0)
             tensor<int32, [2]> input_1039_pad_0 = const()[name = tensor<string, []>("input_1039_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_1039_dilations_0 = const()[name = tensor<string, []>("input_1039_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> input_1039_groups_0 = const()[name = tensor<string, []>("input_1039_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_19_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(484196992))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(486294208))), name = tensor<string, []>("encoder_module_layers_19_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([2048, 1024, 1])];
+            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_19_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_19_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(485197504))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(487296832))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(487294720)))];
             tensor<fp16, [1, 1024, 188]> input_1037_cast_fp16 = transpose(perm = input_1037_perm_0, x = x_451_cast_fp16)[name = tensor<string, []>("transpose_174")];
-            tensor<fp16, [1, 2048, 188]> input_1039_cast_fp16 = conv(dilations = input_1039_dilations_0, groups = input_1039_groups_0, pad = input_1039_pad_0, pad_type = input_1039_pad_type_0, strides = input_1039_strides_0, weight = encoder_module_layers_19_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_1037_cast_fp16)[name = tensor<string, []>("input_1039_cast_fp16")];
+            tensor<fp16, [1, 2048, 188]> input_1039_cast_fp16 = conv(dilations = input_1039_dilations_0, groups = input_1039_groups_0, pad = input_1039_pad_0, pad_type = input_1039_pad_type_0, strides = input_1039_strides_0, weight = encoder_module_layers_19_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_1037_cast_fp16)[name = tensor<string, []>("input_1039_cast_fp16")];
             tensor<int32, []> x_453_split_num_splits_0 = const()[name = tensor<string, []>("x_453_split_num_splits_0"), val = tensor<int32, []>(2)];
             tensor<int32, []> x_453_split_axis_0 = const()[name = tensor<string, []>("x_453_split_axis_0"), val = tensor<int32, []>(1)];
             tensor<fp16, [1, 1024, 188]> x_453_split_cast_fp16_0, tensor<fp16, [1, 1024, 188]> x_453_split_cast_fp16_1 = split(axis = x_453_split_axis_0, num_splits = x_453_split_num_splits_0, x = input_1039_cast_fp16)[name = tensor<string, []>("x_453_split_cast_fp16")];
@@ -2933,75 +2933,75 @@ program(1.0)
             tensor<int32, [1]> input_1045_strides_0 = const()[name = tensor<string, []>("input_1045_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> input_1045_pad_0 = const()[name = tensor<string, []>("input_1045_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_1045_dilations_0 = const()[name = tensor<string, []>("input_1045_dilations_0"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1024, 1, 9]> const_301_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [9216]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(486294784))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(486304064))), name = tensor<string, []>("const_301_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1, 9])];
-            tensor<fp16, [1024]> const_302_to_fp16 = const()[name = tensor<string, []>("const_302_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(486304640)))];
-            tensor<fp16, [1, 1024, 188]> input_1047_cast_fp16 = conv(bias = const_302_to_fp16, dilations = input_1045_dilations_0, groups = input_1045_groups_0, pad = input_1045_pad_0, pad_type = input_1045_pad_type_0, strides = input_1045_strides_0, weight = const_301_to_fp16_palettized, x = input_1043_cast_fp16)[name = tensor<string, []>("input_1047_cast_fp16")];
+            tensor<fp16, [1024, 1, 9]> const_301_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("const_301_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1, 9]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(487300992))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(487311360))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(487310272)))];
+            tensor<fp16, [1024]> const_302_to_fp16 = const()[name = tensor<string, []>("const_302_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(487313472)))];
+            tensor<fp16, [1, 1024, 188]> input_1047_cast_fp16 = conv(bias = const_302_to_fp16, dilations = input_1045_dilations_0, groups = input_1045_groups_0, pad = input_1045_pad_0, pad_type = input_1045_pad_type_0, strides = input_1045_strides_0, weight = const_301_to_fp16_quantized, x = input_1043_cast_fp16)[name = tensor<string, []>("input_1047_cast_fp16")];
             tensor<fp16, [1, 1024, 188]> input_1049_cast_fp16 = silu(x = input_1047_cast_fp16)[name = tensor<string, []>("input_1049_cast_fp16")];
             tensor<string, []> x_455_pad_type_0 = const()[name = tensor<string, []>("x_455_pad_type_0"), val = tensor<string, []>("valid")];
             tensor<int32, [1]> x_455_strides_0 = const()[name = tensor<string, []>("x_455_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> x_455_pad_0 = const()[name = tensor<string, []>("x_455_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> x_455_dilations_0 = const()[name = tensor<string, []>("x_455_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> x_455_groups_0 = const()[name = tensor<string, []>("x_455_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_19_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(486306752))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(487355392))), name = tensor<string, []>("encoder_module_layers_19_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1024, 1])];
-            tensor<fp16, [1, 1024, 188]> x_455_cast_fp16 = conv(dilations = x_455_dilations_0, groups = x_455_groups_0, pad = x_455_pad_0, pad_type = x_455_pad_type_0, strides = x_455_strides_0, weight = encoder_module_layers_19_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_1049_cast_fp16)[name = tensor<string, []>("x_455_cast_fp16")];
+            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_19_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_19_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(487315584))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(488365312))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(488364224)))];
+            tensor<fp16, [1, 1024, 188]> x_455_cast_fp16 = conv(dilations = x_455_dilations_0, groups = x_455_groups_0, pad = x_455_pad_0, pad_type = x_455_pad_type_0, strides = x_455_strides_0, weight = encoder_module_layers_19_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_1049_cast_fp16)[name = tensor<string, []>("x_455_cast_fp16")];
             tensor<int32, [3]> input_1051_perm_0 = const()[name = tensor<string, []>("input_1051_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<fp16, [1, 188, 1024]> input_1051_cast_fp16 = transpose(perm = input_1051_perm_0, x = x_455_cast_fp16)[name = tensor<string, []>("transpose_173")];
             tensor<fp16, [1, 188, 1024]> input_1053_cast_fp16 = add(x = input_1035_cast_fp16, y = input_1051_cast_fp16)[name = tensor<string, []>("input_1053_cast_fp16")];
             tensor<int32, [1]> input_1055_axes_0 = const()[name = tensor<string, []>("input_1055_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_19_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_19_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(487355968)))];
-            tensor<fp16, [1024]> encoder_module_layers_19_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_19_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(487358080)))];
+            tensor<fp16, [1024]> encoder_module_layers_19_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_19_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(488367424)))];
+            tensor<fp16, [1024]> encoder_module_layers_19_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_19_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(488369536)))];
             tensor<fp16, [1, 188, 1024]> input_1055_cast_fp16 = layer_norm(axes = input_1055_axes_0, beta = encoder_module_layers_19_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_19_norm_feed_forward2_weight_to_fp16, x = input_1053_cast_fp16)[name = tensor<string, []>("input_1055_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_19_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(487360192))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(491554560))), name = tensor<string, []>("encoder_module_layers_19_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_179_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_19_feed_forward2_linear1_weight_to_fp16_palettized, x = input_1055_cast_fp16)[name = tensor<string, []>("linear_179_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_19_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_19_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(488371648))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(492570176))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(492566016)))];
+            tensor<fp16, [1, 188, 4096]> linear_179_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_19_feed_forward2_linear1_weight_to_fp16_quantized, x = input_1055_cast_fp16)[name = tensor<string, []>("linear_179_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_1059_cast_fp16 = silu(x = linear_179_cast_fp16)[name = tensor<string, []>("input_1059_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_19_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(491555136))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(495749504))), name = tensor<string, []>("encoder_module_layers_19_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_180_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_19_feed_forward2_linear2_weight_to_fp16_palettized, x = input_1059_cast_fp16)[name = tensor<string, []>("linear_180_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_19_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_19_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(492578432))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(496773888))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(496772800)))];
+            tensor<fp16, [1, 188, 1024]> linear_180_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_19_feed_forward2_linear2_weight_to_fp16_quantized, x = input_1059_cast_fp16)[name = tensor<string, []>("linear_180_cast_fp16")];
             tensor<fp16, []> var_3592_to_fp16 = const()[name = tensor<string, []>("op_3592_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_3593_cast_fp16 = mul(x = linear_180_cast_fp16, y = var_3592_to_fp16)[name = tensor<string, []>("op_3593_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_1065_cast_fp16 = add(x = input_1053_cast_fp16, y = var_3593_cast_fp16)[name = tensor<string, []>("input_1065_cast_fp16")];
             tensor<int32, [1]> input_1067_axes_0 = const()[name = tensor<string, []>("input_1067_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_19_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_19_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(495750080)))];
-            tensor<fp16, [1024]> encoder_module_layers_19_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_19_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(495752192)))];
+            tensor<fp16, [1024]> encoder_module_layers_19_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_19_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(496776000)))];
+            tensor<fp16, [1024]> encoder_module_layers_19_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_19_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(496778112)))];
             tensor<fp16, [1, 188, 1024]> input_1067_cast_fp16 = layer_norm(axes = input_1067_axes_0, beta = encoder_module_layers_19_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_19_norm_out_weight_to_fp16, x = input_1065_cast_fp16)[name = tensor<string, []>("input_1067_cast_fp16")];
             tensor<int32, [1]> input_1069_axes_0 = const()[name = tensor<string, []>("input_1069_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_20_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_20_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(495754304)))];
-            tensor<fp16, [1024]> encoder_module_layers_20_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_20_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(495756416)))];
+            tensor<fp16, [1024]> encoder_module_layers_20_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_20_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(496780224)))];
+            tensor<fp16, [1024]> encoder_module_layers_20_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_20_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(496782336)))];
             tensor<fp16, [1, 188, 1024]> input_1069_cast_fp16 = layer_norm(axes = input_1069_axes_0, beta = encoder_module_layers_20_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_20_norm_feed_forward1_weight_to_fp16, x = input_1067_cast_fp16)[name = tensor<string, []>("input_1069_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_20_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(495758528))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(499952896))), name = tensor<string, []>("encoder_module_layers_20_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_181_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_20_feed_forward1_linear1_weight_to_fp16_palettized, x = input_1069_cast_fp16)[name = tensor<string, []>("linear_181_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_20_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_20_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(496784448))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(500982976))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(500978816)))];
+            tensor<fp16, [1, 188, 4096]> linear_181_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_20_feed_forward1_linear1_weight_to_fp16_quantized, x = input_1069_cast_fp16)[name = tensor<string, []>("linear_181_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_1073_cast_fp16 = silu(x = linear_181_cast_fp16)[name = tensor<string, []>("input_1073_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_20_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(499953472))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(504147840))), name = tensor<string, []>("encoder_module_layers_20_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_182_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_20_feed_forward1_linear2_weight_to_fp16_palettized, x = input_1073_cast_fp16)[name = tensor<string, []>("linear_182_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_20_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_20_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(500991232))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(505186688))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(505185600)))];
+            tensor<fp16, [1, 188, 1024]> linear_182_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_20_feed_forward1_linear2_weight_to_fp16_quantized, x = input_1073_cast_fp16)[name = tensor<string, []>("linear_182_cast_fp16")];
             tensor<fp16, []> var_3621_to_fp16 = const()[name = tensor<string, []>("op_3621_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_3622_cast_fp16 = mul(x = linear_182_cast_fp16, y = var_3621_to_fp16)[name = tensor<string, []>("op_3622_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_1079_cast_fp16 = add(x = input_1067_cast_fp16, y = var_3622_cast_fp16)[name = tensor<string, []>("input_1079_cast_fp16")];
             tensor<int32, [1]> query_41_axes_0 = const()[name = tensor<string, []>("query_41_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_20_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_20_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(504148416)))];
-            tensor<fp16, [1024]> encoder_module_layers_20_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_20_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(504150528)))];
+            tensor<fp16, [1024]> encoder_module_layers_20_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_20_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(505188800)))];
+            tensor<fp16, [1024]> encoder_module_layers_20_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_20_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(505190912)))];
             tensor<fp16, [1, 188, 1024]> query_41_cast_fp16 = layer_norm(axes = query_41_axes_0, beta = encoder_module_layers_20_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_20_norm_self_att_weight_to_fp16, x = input_1079_cast_fp16)[name = tensor<string, []>("query_41_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_20_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(504152640))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(505201280))), name = tensor<string, []>("encoder_module_layers_20_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_183_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_20_self_attn_linear_q_weight_to_fp16_palettized, x = query_41_cast_fp16)[name = tensor<string, []>("linear_183_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_20_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_20_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(505193024))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(506242752))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(506241664)))];
+            tensor<fp16, [1, 188, 1024]> linear_183_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_20_self_attn_linear_q_weight_to_fp16_quantized, x = query_41_cast_fp16)[name = tensor<string, []>("linear_183_cast_fp16")];
             tensor<int32, [4]> var_3638 = const()[name = tensor<string, []>("op_3638"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> q_121_cast_fp16 = reshape(shape = var_3638, x = linear_183_cast_fp16)[name = tensor<string, []>("q_121_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_20_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(505201856))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(506250496))), name = tensor<string, []>("encoder_module_layers_20_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_184_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_20_self_attn_linear_k_weight_to_fp16_palettized, x = query_41_cast_fp16)[name = tensor<string, []>("linear_184_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_20_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_20_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(506244864))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(507294592))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(507293504)))];
+            tensor<fp16, [1, 188, 1024]> linear_184_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_20_self_attn_linear_k_weight_to_fp16_quantized, x = query_41_cast_fp16)[name = tensor<string, []>("linear_184_cast_fp16")];
             tensor<int32, [4]> var_3642 = const()[name = tensor<string, []>("op_3642"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> k_81_cast_fp16 = reshape(shape = var_3642, x = linear_184_cast_fp16)[name = tensor<string, []>("k_81_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_20_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(506251072))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(507299712))), name = tensor<string, []>("encoder_module_layers_20_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_185_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_20_self_attn_linear_v_weight_to_fp16_palettized, x = query_41_cast_fp16)[name = tensor<string, []>("linear_185_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_20_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_20_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(507296704))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(508346432))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(508345344)))];
+            tensor<fp16, [1, 188, 1024]> linear_185_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_20_self_attn_linear_v_weight_to_fp16_quantized, x = query_41_cast_fp16)[name = tensor<string, []>("linear_185_cast_fp16")];
             tensor<int32, [4]> var_3646 = const()[name = tensor<string, []>("op_3646"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> v_41_cast_fp16 = reshape(shape = var_3646, x = linear_185_cast_fp16)[name = tensor<string, []>("v_41_cast_fp16")];
             tensor<int32, [4]> value_45_perm_0 = const()[name = tensor<string, []>("value_45_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
-            tensor<fp16, [8, 128]> encoder_module_layers_20_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_20_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(507300288)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_20_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_20_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(508348544)))];
             tensor<fp16, [1, 188, 8, 128]> var_3658_cast_fp16 = add(x = q_121_cast_fp16, y = encoder_module_layers_20_self_attn_pos_bias_u_to_fp16)[name = tensor<string, []>("op_3658_cast_fp16")];
-            tensor<fp16, [8, 128]> encoder_module_layers_20_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_20_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(507302400)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_20_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_20_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(508350656)))];
             tensor<fp16, [1, 188, 8, 128]> var_3660_cast_fp16 = add(x = q_121_cast_fp16, y = encoder_module_layers_20_self_attn_pos_bias_v_to_fp16)[name = tensor<string, []>("op_3660_cast_fp16")];
             tensor<int32, [4]> q_with_bias_v_41_perm_0 = const()[name = tensor<string, []>("q_with_bias_v_41_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
             tensor<bool, []> x_463_transpose_x_0 = const()[name = tensor<string, []>("x_463_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> x_463_transpose_y_0 = const()[name = tensor<string, []>("x_463_transpose_y_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 8, 128, 375]> op_3662_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [384000]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(507304512))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(507688576))), name = tensor<string, []>("op_3662_to_fp16_palettized"), shape = tensor<uint32, [4]>([1, 8, 128, 375])];
+            tensor<fp16, [1, 8, 128, 375]> op_3662_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(3), name = tensor<string, []>("op_3662_to_fp16_quantized"), quantized_data = tensor<int8, [1, 8, 128, 375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(508352768))), scale = tensor<fp16, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(508737280))), zero_point = tensor<int8, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(508736832)))];
             tensor<fp16, [1, 8, 188, 128]> q_with_bias_v_41_cast_fp16 = transpose(perm = q_with_bias_v_41_perm_0, x = var_3660_cast_fp16)[name = tensor<string, []>("transpose_172")];
-            tensor<fp16, [1, 8, 188, 375]> x_463_cast_fp16 = matmul(transpose_x = x_463_transpose_x_0, transpose_y = x_463_transpose_y_0, x = q_with_bias_v_41_cast_fp16, y = op_3662_to_fp16_palettized)[name = tensor<string, []>("x_463_cast_fp16")];
+            tensor<fp16, [1, 8, 188, 375]> x_463_cast_fp16 = matmul(transpose_x = x_463_transpose_x_0, transpose_y = x_463_transpose_y_0, x = q_with_bias_v_41_cast_fp16, y = op_3662_to_fp16_quantized)[name = tensor<string, []>("x_463_cast_fp16")];
             tensor<int32, [8]> x_465_pad_0 = const()[name = tensor<string, []>("x_465_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 1, 0])];
             tensor<string, []> x_465_mode_0 = const()[name = tensor<string, []>("x_465_mode_0"), val = tensor<string, []>("constant")];
             tensor<fp16, []> const_229_to_fp16 = const()[name = tensor<string, []>("const_229_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
@@ -3039,12 +3039,12 @@ program(1.0)
             tensor<int32, [3]> var_3695 = const()[name = tensor<string, []>("op_3695"), val = tensor<int32, [3]>([1, -1, 1024])];
             tensor<fp16, [1, 188, 8, 128]> var_3694_cast_fp16 = transpose(perm = var_3694_perm_0, x = x_469_cast_fp16)[name = tensor<string, []>("transpose_168")];
             tensor<fp16, [1, 188, 1024]> input_1083_cast_fp16 = reshape(shape = var_3695, x = var_3694_cast_fp16)[name = tensor<string, []>("input_1083_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_20_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(507689152))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(508737792))), name = tensor<string, []>("encoder_module_layers_20_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_187_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_20_self_attn_linear_out_weight_to_fp16_palettized, x = input_1083_cast_fp16)[name = tensor<string, []>("linear_187_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_20_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_20_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(508738112))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(509787840))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(509786752)))];
+            tensor<fp16, [1, 188, 1024]> linear_187_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_20_self_attn_linear_out_weight_to_fp16_quantized, x = input_1083_cast_fp16)[name = tensor<string, []>("linear_187_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_1087_cast_fp16 = add(x = input_1079_cast_fp16, y = linear_187_cast_fp16)[name = tensor<string, []>("input_1087_cast_fp16")];
             tensor<int32, [1]> x_473_axes_0 = const()[name = tensor<string, []>("x_473_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_20_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_20_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(508738368)))];
-            tensor<fp16, [1024]> encoder_module_layers_20_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_20_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(508740480)))];
+            tensor<fp16, [1024]> encoder_module_layers_20_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_20_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(509789952)))];
+            tensor<fp16, [1024]> encoder_module_layers_20_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_20_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(509792064)))];
             tensor<fp16, [1, 188, 1024]> x_473_cast_fp16 = layer_norm(axes = x_473_axes_0, beta = encoder_module_layers_20_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_20_norm_conv_weight_to_fp16, x = input_1087_cast_fp16)[name = tensor<string, []>("x_473_cast_fp16")];
             tensor<int32, [3]> input_1089_perm_0 = const()[name = tensor<string, []>("input_1089_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<string, []> input_1091_pad_type_0 = const()[name = tensor<string, []>("input_1091_pad_type_0"), val = tensor<string, []>("valid")];
@@ -3052,9 +3052,9 @@ program(1.0)
             tensor<int32, [2]> input_1091_pad_0 = const()[name = tensor<string, []>("input_1091_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_1091_dilations_0 = const()[name = tensor<string, []>("input_1091_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> input_1091_groups_0 = const()[name = tensor<string, []>("input_1091_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_20_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(508742592))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(510839808))), name = tensor<string, []>("encoder_module_layers_20_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([2048, 1024, 1])];
+            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_20_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_20_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(509794176))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(511893504))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(511891392)))];
             tensor<fp16, [1, 1024, 188]> input_1089_cast_fp16 = transpose(perm = input_1089_perm_0, x = x_473_cast_fp16)[name = tensor<string, []>("transpose_167")];
-            tensor<fp16, [1, 2048, 188]> input_1091_cast_fp16 = conv(dilations = input_1091_dilations_0, groups = input_1091_groups_0, pad = input_1091_pad_0, pad_type = input_1091_pad_type_0, strides = input_1091_strides_0, weight = encoder_module_layers_20_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_1089_cast_fp16)[name = tensor<string, []>("input_1091_cast_fp16")];
+            tensor<fp16, [1, 2048, 188]> input_1091_cast_fp16 = conv(dilations = input_1091_dilations_0, groups = input_1091_groups_0, pad = input_1091_pad_0, pad_type = input_1091_pad_type_0, strides = input_1091_strides_0, weight = encoder_module_layers_20_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_1089_cast_fp16)[name = tensor<string, []>("input_1091_cast_fp16")];
             tensor<int32, []> x_475_split_num_splits_0 = const()[name = tensor<string, []>("x_475_split_num_splits_0"), val = tensor<int32, []>(2)];
             tensor<int32, []> x_475_split_axis_0 = const()[name = tensor<string, []>("x_475_split_axis_0"), val = tensor<int32, []>(1)];
             tensor<fp16, [1, 1024, 188]> x_475_split_cast_fp16_0, tensor<fp16, [1, 1024, 188]> x_475_split_cast_fp16_1 = split(axis = x_475_split_axis_0, num_splits = x_475_split_num_splits_0, x = input_1091_cast_fp16)[name = tensor<string, []>("x_475_split_cast_fp16")];
@@ -3070,75 +3070,75 @@ program(1.0)
             tensor<int32, [1]> input_1097_strides_0 = const()[name = tensor<string, []>("input_1097_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> input_1097_pad_0 = const()[name = tensor<string, []>("input_1097_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_1097_dilations_0 = const()[name = tensor<string, []>("input_1097_dilations_0"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1024, 1, 9]> const_303_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [9216]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(510840384))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(510849664))), name = tensor<string, []>("const_303_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1, 9])];
-            tensor<fp16, [1024]> const_304_to_fp16 = const()[name = tensor<string, []>("const_304_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(510850240)))];
-            tensor<fp16, [1, 1024, 188]> input_1099_cast_fp16 = conv(bias = const_304_to_fp16, dilations = input_1097_dilations_0, groups = input_1097_groups_0, pad = input_1097_pad_0, pad_type = input_1097_pad_type_0, strides = input_1097_strides_0, weight = const_303_to_fp16_palettized, x = input_1095_cast_fp16)[name = tensor<string, []>("input_1099_cast_fp16")];
+            tensor<fp16, [1024, 1, 9]> const_303_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("const_303_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1, 9]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(511897664))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(511908032))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(511906944)))];
+            tensor<fp16, [1024]> const_304_to_fp16 = const()[name = tensor<string, []>("const_304_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(511910144)))];
+            tensor<fp16, [1, 1024, 188]> input_1099_cast_fp16 = conv(bias = const_304_to_fp16, dilations = input_1097_dilations_0, groups = input_1097_groups_0, pad = input_1097_pad_0, pad_type = input_1097_pad_type_0, strides = input_1097_strides_0, weight = const_303_to_fp16_quantized, x = input_1095_cast_fp16)[name = tensor<string, []>("input_1099_cast_fp16")];
             tensor<fp16, [1, 1024, 188]> input_1101_cast_fp16 = silu(x = input_1099_cast_fp16)[name = tensor<string, []>("input_1101_cast_fp16")];
             tensor<string, []> x_477_pad_type_0 = const()[name = tensor<string, []>("x_477_pad_type_0"), val = tensor<string, []>("valid")];
             tensor<int32, [1]> x_477_strides_0 = const()[name = tensor<string, []>("x_477_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> x_477_pad_0 = const()[name = tensor<string, []>("x_477_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> x_477_dilations_0 = const()[name = tensor<string, []>("x_477_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> x_477_groups_0 = const()[name = tensor<string, []>("x_477_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_20_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(510852352))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(511900992))), name = tensor<string, []>("encoder_module_layers_20_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1024, 1])];
-            tensor<fp16, [1, 1024, 188]> x_477_cast_fp16 = conv(dilations = x_477_dilations_0, groups = x_477_groups_0, pad = x_477_pad_0, pad_type = x_477_pad_type_0, strides = x_477_strides_0, weight = encoder_module_layers_20_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_1101_cast_fp16)[name = tensor<string, []>("x_477_cast_fp16")];
+            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_20_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_20_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(511912256))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(512961984))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(512960896)))];
+            tensor<fp16, [1, 1024, 188]> x_477_cast_fp16 = conv(dilations = x_477_dilations_0, groups = x_477_groups_0, pad = x_477_pad_0, pad_type = x_477_pad_type_0, strides = x_477_strides_0, weight = encoder_module_layers_20_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_1101_cast_fp16)[name = tensor<string, []>("x_477_cast_fp16")];
             tensor<int32, [3]> input_1103_perm_0 = const()[name = tensor<string, []>("input_1103_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<fp16, [1, 188, 1024]> input_1103_cast_fp16 = transpose(perm = input_1103_perm_0, x = x_477_cast_fp16)[name = tensor<string, []>("transpose_166")];
             tensor<fp16, [1, 188, 1024]> input_1105_cast_fp16 = add(x = input_1087_cast_fp16, y = input_1103_cast_fp16)[name = tensor<string, []>("input_1105_cast_fp16")];
             tensor<int32, [1]> input_1107_axes_0 = const()[name = tensor<string, []>("input_1107_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_20_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_20_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(511901568)))];
-            tensor<fp16, [1024]> encoder_module_layers_20_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_20_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(511903680)))];
+            tensor<fp16, [1024]> encoder_module_layers_20_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_20_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(512964096)))];
+            tensor<fp16, [1024]> encoder_module_layers_20_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_20_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(512966208)))];
             tensor<fp16, [1, 188, 1024]> input_1107_cast_fp16 = layer_norm(axes = input_1107_axes_0, beta = encoder_module_layers_20_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_20_norm_feed_forward2_weight_to_fp16, x = input_1105_cast_fp16)[name = tensor<string, []>("input_1107_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_20_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(511905792))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(516100160))), name = tensor<string, []>("encoder_module_layers_20_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_188_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_20_feed_forward2_linear1_weight_to_fp16_palettized, x = input_1107_cast_fp16)[name = tensor<string, []>("linear_188_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_20_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_20_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(512968320))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(517166848))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(517162688)))];
+            tensor<fp16, [1, 188, 4096]> linear_188_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_20_feed_forward2_linear1_weight_to_fp16_quantized, x = input_1107_cast_fp16)[name = tensor<string, []>("linear_188_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_1111_cast_fp16 = silu(x = linear_188_cast_fp16)[name = tensor<string, []>("input_1111_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_20_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(516100736))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(520295104))), name = tensor<string, []>("encoder_module_layers_20_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_189_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_20_feed_forward2_linear2_weight_to_fp16_palettized, x = input_1111_cast_fp16)[name = tensor<string, []>("linear_189_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_20_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_20_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(517175104))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(521370560))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(521369472)))];
+            tensor<fp16, [1, 188, 1024]> linear_189_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_20_feed_forward2_linear2_weight_to_fp16_quantized, x = input_1111_cast_fp16)[name = tensor<string, []>("linear_189_cast_fp16")];
             tensor<fp16, []> var_3755_to_fp16 = const()[name = tensor<string, []>("op_3755_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_3756_cast_fp16 = mul(x = linear_189_cast_fp16, y = var_3755_to_fp16)[name = tensor<string, []>("op_3756_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_1117_cast_fp16 = add(x = input_1105_cast_fp16, y = var_3756_cast_fp16)[name = tensor<string, []>("input_1117_cast_fp16")];
             tensor<int32, [1]> input_1119_axes_0 = const()[name = tensor<string, []>("input_1119_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_20_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_20_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(520295680)))];
-            tensor<fp16, [1024]> encoder_module_layers_20_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_20_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(520297792)))];
+            tensor<fp16, [1024]> encoder_module_layers_20_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_20_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(521372672)))];
+            tensor<fp16, [1024]> encoder_module_layers_20_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_20_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(521374784)))];
             tensor<fp16, [1, 188, 1024]> input_1119_cast_fp16 = layer_norm(axes = input_1119_axes_0, beta = encoder_module_layers_20_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_20_norm_out_weight_to_fp16, x = input_1117_cast_fp16)[name = tensor<string, []>("input_1119_cast_fp16")];
             tensor<int32, [1]> input_1121_axes_0 = const()[name = tensor<string, []>("input_1121_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_21_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_21_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(520299904)))];
-            tensor<fp16, [1024]> encoder_module_layers_21_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_21_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(520302016)))];
+            tensor<fp16, [1024]> encoder_module_layers_21_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_21_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(521376896)))];
+            tensor<fp16, [1024]> encoder_module_layers_21_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_21_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(521379008)))];
             tensor<fp16, [1, 188, 1024]> input_1121_cast_fp16 = layer_norm(axes = input_1121_axes_0, beta = encoder_module_layers_21_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_21_norm_feed_forward1_weight_to_fp16, x = input_1119_cast_fp16)[name = tensor<string, []>("input_1121_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_21_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(520304128))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(524498496))), name = tensor<string, []>("encoder_module_layers_21_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_190_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_21_feed_forward1_linear1_weight_to_fp16_palettized, x = input_1121_cast_fp16)[name = tensor<string, []>("linear_190_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_21_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_21_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(521381120))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(525579648))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(525575488)))];
+            tensor<fp16, [1, 188, 4096]> linear_190_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_21_feed_forward1_linear1_weight_to_fp16_quantized, x = input_1121_cast_fp16)[name = tensor<string, []>("linear_190_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_1125_cast_fp16 = silu(x = linear_190_cast_fp16)[name = tensor<string, []>("input_1125_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_21_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(524499072))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(528693440))), name = tensor<string, []>("encoder_module_layers_21_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_191_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_21_feed_forward1_linear2_weight_to_fp16_palettized, x = input_1125_cast_fp16)[name = tensor<string, []>("linear_191_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_21_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_21_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(525587904))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(529783360))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(529782272)))];
+            tensor<fp16, [1, 188, 1024]> linear_191_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_21_feed_forward1_linear2_weight_to_fp16_quantized, x = input_1125_cast_fp16)[name = tensor<string, []>("linear_191_cast_fp16")];
             tensor<fp16, []> var_3784_to_fp16 = const()[name = tensor<string, []>("op_3784_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_3785_cast_fp16 = mul(x = linear_191_cast_fp16, y = var_3784_to_fp16)[name = tensor<string, []>("op_3785_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_1131_cast_fp16 = add(x = input_1119_cast_fp16, y = var_3785_cast_fp16)[name = tensor<string, []>("input_1131_cast_fp16")];
             tensor<int32, [1]> query_43_axes_0 = const()[name = tensor<string, []>("query_43_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_21_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_21_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(528694016)))];
-            tensor<fp16, [1024]> encoder_module_layers_21_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_21_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(528696128)))];
+            tensor<fp16, [1024]> encoder_module_layers_21_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_21_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(529785472)))];
+            tensor<fp16, [1024]> encoder_module_layers_21_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_21_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(529787584)))];
             tensor<fp16, [1, 188, 1024]> query_43_cast_fp16 = layer_norm(axes = query_43_axes_0, beta = encoder_module_layers_21_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_21_norm_self_att_weight_to_fp16, x = input_1131_cast_fp16)[name = tensor<string, []>("query_43_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_21_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(528698240))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(529746880))), name = tensor<string, []>("encoder_module_layers_21_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_192_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_21_self_attn_linear_q_weight_to_fp16_palettized, x = query_43_cast_fp16)[name = tensor<string, []>("linear_192_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_21_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_21_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(529789696))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(530839424))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(530838336)))];
+            tensor<fp16, [1, 188, 1024]> linear_192_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_21_self_attn_linear_q_weight_to_fp16_quantized, x = query_43_cast_fp16)[name = tensor<string, []>("linear_192_cast_fp16")];
             tensor<int32, [4]> var_3801 = const()[name = tensor<string, []>("op_3801"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> q_127_cast_fp16 = reshape(shape = var_3801, x = linear_192_cast_fp16)[name = tensor<string, []>("q_127_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_21_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(529747456))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(530796096))), name = tensor<string, []>("encoder_module_layers_21_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_193_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_21_self_attn_linear_k_weight_to_fp16_palettized, x = query_43_cast_fp16)[name = tensor<string, []>("linear_193_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_21_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_21_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(530841536))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(531891264))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(531890176)))];
+            tensor<fp16, [1, 188, 1024]> linear_193_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_21_self_attn_linear_k_weight_to_fp16_quantized, x = query_43_cast_fp16)[name = tensor<string, []>("linear_193_cast_fp16")];
             tensor<int32, [4]> var_3805 = const()[name = tensor<string, []>("op_3805"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> k_85_cast_fp16 = reshape(shape = var_3805, x = linear_193_cast_fp16)[name = tensor<string, []>("k_85_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_21_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(530796672))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(531845312))), name = tensor<string, []>("encoder_module_layers_21_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_194_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_21_self_attn_linear_v_weight_to_fp16_palettized, x = query_43_cast_fp16)[name = tensor<string, []>("linear_194_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_21_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_21_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(531893376))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(532943104))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(532942016)))];
+            tensor<fp16, [1, 188, 1024]> linear_194_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_21_self_attn_linear_v_weight_to_fp16_quantized, x = query_43_cast_fp16)[name = tensor<string, []>("linear_194_cast_fp16")];
             tensor<int32, [4]> var_3809 = const()[name = tensor<string, []>("op_3809"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> v_43_cast_fp16 = reshape(shape = var_3809, x = linear_194_cast_fp16)[name = tensor<string, []>("v_43_cast_fp16")];
             tensor<int32, [4]> value_47_perm_0 = const()[name = tensor<string, []>("value_47_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
-            tensor<fp16, [8, 128]> encoder_module_layers_21_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_21_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(531845888)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_21_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_21_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(532945216)))];
             tensor<fp16, [1, 188, 8, 128]> var_3821_cast_fp16 = add(x = q_127_cast_fp16, y = encoder_module_layers_21_self_attn_pos_bias_u_to_fp16)[name = tensor<string, []>("op_3821_cast_fp16")];
-            tensor<fp16, [8, 128]> encoder_module_layers_21_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_21_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(531848000)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_21_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_21_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(532947328)))];
             tensor<fp16, [1, 188, 8, 128]> var_3823_cast_fp16 = add(x = q_127_cast_fp16, y = encoder_module_layers_21_self_attn_pos_bias_v_to_fp16)[name = tensor<string, []>("op_3823_cast_fp16")];
             tensor<int32, [4]> q_with_bias_v_43_perm_0 = const()[name = tensor<string, []>("q_with_bias_v_43_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
             tensor<bool, []> x_485_transpose_x_0 = const()[name = tensor<string, []>("x_485_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> x_485_transpose_y_0 = const()[name = tensor<string, []>("x_485_transpose_y_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 8, 128, 375]> op_3825_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [384000]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(531850112))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(532234176))), name = tensor<string, []>("op_3825_to_fp16_palettized"), shape = tensor<uint32, [4]>([1, 8, 128, 375])];
+            tensor<fp16, [1, 8, 128, 375]> op_3825_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(3), name = tensor<string, []>("op_3825_to_fp16_quantized"), quantized_data = tensor<int8, [1, 8, 128, 375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(532949440))), scale = tensor<fp16, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(533333952))), zero_point = tensor<int8, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(533333504)))];
             tensor<fp16, [1, 8, 188, 128]> q_with_bias_v_43_cast_fp16 = transpose(perm = q_with_bias_v_43_perm_0, x = var_3823_cast_fp16)[name = tensor<string, []>("transpose_165")];
-            tensor<fp16, [1, 8, 188, 375]> x_485_cast_fp16 = matmul(transpose_x = x_485_transpose_x_0, transpose_y = x_485_transpose_y_0, x = q_with_bias_v_43_cast_fp16, y = op_3825_to_fp16_palettized)[name = tensor<string, []>("x_485_cast_fp16")];
+            tensor<fp16, [1, 8, 188, 375]> x_485_cast_fp16 = matmul(transpose_x = x_485_transpose_x_0, transpose_y = x_485_transpose_y_0, x = q_with_bias_v_43_cast_fp16, y = op_3825_to_fp16_quantized)[name = tensor<string, []>("x_485_cast_fp16")];
             tensor<int32, [8]> x_487_pad_0 = const()[name = tensor<string, []>("x_487_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 1, 0])];
             tensor<string, []> x_487_mode_0 = const()[name = tensor<string, []>("x_487_mode_0"), val = tensor<string, []>("constant")];
             tensor<fp16, []> const_239_to_fp16 = const()[name = tensor<string, []>("const_239_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
@@ -3176,12 +3176,12 @@ program(1.0)
             tensor<int32, [3]> var_3858 = const()[name = tensor<string, []>("op_3858"), val = tensor<int32, [3]>([1, -1, 1024])];
             tensor<fp16, [1, 188, 8, 128]> var_3857_cast_fp16 = transpose(perm = var_3857_perm_0, x = x_491_cast_fp16)[name = tensor<string, []>("transpose_161")];
             tensor<fp16, [1, 188, 1024]> input_1135_cast_fp16 = reshape(shape = var_3858, x = var_3857_cast_fp16)[name = tensor<string, []>("input_1135_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_21_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(532234752))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(533283392))), name = tensor<string, []>("encoder_module_layers_21_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_196_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_21_self_attn_linear_out_weight_to_fp16_palettized, x = input_1135_cast_fp16)[name = tensor<string, []>("linear_196_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_21_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_21_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(533334784))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(534384512))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(534383424)))];
+            tensor<fp16, [1, 188, 1024]> linear_196_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_21_self_attn_linear_out_weight_to_fp16_quantized, x = input_1135_cast_fp16)[name = tensor<string, []>("linear_196_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_1139_cast_fp16 = add(x = input_1131_cast_fp16, y = linear_196_cast_fp16)[name = tensor<string, []>("input_1139_cast_fp16")];
             tensor<int32, [1]> x_495_axes_0 = const()[name = tensor<string, []>("x_495_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_21_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_21_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(533283968)))];
-            tensor<fp16, [1024]> encoder_module_layers_21_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_21_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(533286080)))];
+            tensor<fp16, [1024]> encoder_module_layers_21_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_21_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(534386624)))];
+            tensor<fp16, [1024]> encoder_module_layers_21_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_21_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(534388736)))];
             tensor<fp16, [1, 188, 1024]> x_495_cast_fp16 = layer_norm(axes = x_495_axes_0, beta = encoder_module_layers_21_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_21_norm_conv_weight_to_fp16, x = input_1139_cast_fp16)[name = tensor<string, []>("x_495_cast_fp16")];
             tensor<int32, [3]> input_1141_perm_0 = const()[name = tensor<string, []>("input_1141_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<string, []> input_1143_pad_type_0 = const()[name = tensor<string, []>("input_1143_pad_type_0"), val = tensor<string, []>("valid")];
@@ -3189,9 +3189,9 @@ program(1.0)
             tensor<int32, [2]> input_1143_pad_0 = const()[name = tensor<string, []>("input_1143_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_1143_dilations_0 = const()[name = tensor<string, []>("input_1143_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> input_1143_groups_0 = const()[name = tensor<string, []>("input_1143_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_21_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(533288192))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(535385408))), name = tensor<string, []>("encoder_module_layers_21_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([2048, 1024, 1])];
+            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_21_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_21_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(534390848))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(536490176))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(536488064)))];
             tensor<fp16, [1, 1024, 188]> input_1141_cast_fp16 = transpose(perm = input_1141_perm_0, x = x_495_cast_fp16)[name = tensor<string, []>("transpose_160")];
-            tensor<fp16, [1, 2048, 188]> input_1143_cast_fp16 = conv(dilations = input_1143_dilations_0, groups = input_1143_groups_0, pad = input_1143_pad_0, pad_type = input_1143_pad_type_0, strides = input_1143_strides_0, weight = encoder_module_layers_21_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_1141_cast_fp16)[name = tensor<string, []>("input_1143_cast_fp16")];
+            tensor<fp16, [1, 2048, 188]> input_1143_cast_fp16 = conv(dilations = input_1143_dilations_0, groups = input_1143_groups_0, pad = input_1143_pad_0, pad_type = input_1143_pad_type_0, strides = input_1143_strides_0, weight = encoder_module_layers_21_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_1141_cast_fp16)[name = tensor<string, []>("input_1143_cast_fp16")];
             tensor<int32, []> x_497_split_num_splits_0 = const()[name = tensor<string, []>("x_497_split_num_splits_0"), val = tensor<int32, []>(2)];
             tensor<int32, []> x_497_split_axis_0 = const()[name = tensor<string, []>("x_497_split_axis_0"), val = tensor<int32, []>(1)];
             tensor<fp16, [1, 1024, 188]> x_497_split_cast_fp16_0, tensor<fp16, [1, 1024, 188]> x_497_split_cast_fp16_1 = split(axis = x_497_split_axis_0, num_splits = x_497_split_num_splits_0, x = input_1143_cast_fp16)[name = tensor<string, []>("x_497_split_cast_fp16")];
@@ -3207,75 +3207,75 @@ program(1.0)
             tensor<int32, [1]> input_1149_strides_0 = const()[name = tensor<string, []>("input_1149_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> input_1149_pad_0 = const()[name = tensor<string, []>("input_1149_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_1149_dilations_0 = const()[name = tensor<string, []>("input_1149_dilations_0"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1024, 1, 9]> const_305_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [9216]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(535385984))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(535395264))), name = tensor<string, []>("const_305_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1, 9])];
-            tensor<fp16, [1024]> const_306_to_fp16 = const()[name = tensor<string, []>("const_306_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(535395840)))];
-            tensor<fp16, [1, 1024, 188]> input_1151_cast_fp16 = conv(bias = const_306_to_fp16, dilations = input_1149_dilations_0, groups = input_1149_groups_0, pad = input_1149_pad_0, pad_type = input_1149_pad_type_0, strides = input_1149_strides_0, weight = const_305_to_fp16_palettized, x = input_1147_cast_fp16)[name = tensor<string, []>("input_1151_cast_fp16")];
+            tensor<fp16, [1024, 1, 9]> const_305_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("const_305_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1, 9]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(536494336))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(536504704))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(536503616)))];
+            tensor<fp16, [1024]> const_306_to_fp16 = const()[name = tensor<string, []>("const_306_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(536506816)))];
+            tensor<fp16, [1, 1024, 188]> input_1151_cast_fp16 = conv(bias = const_306_to_fp16, dilations = input_1149_dilations_0, groups = input_1149_groups_0, pad = input_1149_pad_0, pad_type = input_1149_pad_type_0, strides = input_1149_strides_0, weight = const_305_to_fp16_quantized, x = input_1147_cast_fp16)[name = tensor<string, []>("input_1151_cast_fp16")];
             tensor<fp16, [1, 1024, 188]> input_1153_cast_fp16 = silu(x = input_1151_cast_fp16)[name = tensor<string, []>("input_1153_cast_fp16")];
             tensor<string, []> x_499_pad_type_0 = const()[name = tensor<string, []>("x_499_pad_type_0"), val = tensor<string, []>("valid")];
             tensor<int32, [1]> x_499_strides_0 = const()[name = tensor<string, []>("x_499_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> x_499_pad_0 = const()[name = tensor<string, []>("x_499_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> x_499_dilations_0 = const()[name = tensor<string, []>("x_499_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> x_499_groups_0 = const()[name = tensor<string, []>("x_499_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_21_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(535397952))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(536446592))), name = tensor<string, []>("encoder_module_layers_21_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1024, 1])];
-            tensor<fp16, [1, 1024, 188]> x_499_cast_fp16 = conv(dilations = x_499_dilations_0, groups = x_499_groups_0, pad = x_499_pad_0, pad_type = x_499_pad_type_0, strides = x_499_strides_0, weight = encoder_module_layers_21_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_1153_cast_fp16)[name = tensor<string, []>("x_499_cast_fp16")];
+            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_21_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_21_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(536508928))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(537558656))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(537557568)))];
+            tensor<fp16, [1, 1024, 188]> x_499_cast_fp16 = conv(dilations = x_499_dilations_0, groups = x_499_groups_0, pad = x_499_pad_0, pad_type = x_499_pad_type_0, strides = x_499_strides_0, weight = encoder_module_layers_21_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_1153_cast_fp16)[name = tensor<string, []>("x_499_cast_fp16")];
             tensor<int32, [3]> input_1155_perm_0 = const()[name = tensor<string, []>("input_1155_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<fp16, [1, 188, 1024]> input_1155_cast_fp16 = transpose(perm = input_1155_perm_0, x = x_499_cast_fp16)[name = tensor<string, []>("transpose_159")];
             tensor<fp16, [1, 188, 1024]> input_1157_cast_fp16 = add(x = input_1139_cast_fp16, y = input_1155_cast_fp16)[name = tensor<string, []>("input_1157_cast_fp16")];
             tensor<int32, [1]> input_1159_axes_0 = const()[name = tensor<string, []>("input_1159_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_21_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_21_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(536447168)))];
-            tensor<fp16, [1024]> encoder_module_layers_21_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_21_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(536449280)))];
+            tensor<fp16, [1024]> encoder_module_layers_21_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_21_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(537560768)))];
+            tensor<fp16, [1024]> encoder_module_layers_21_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_21_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(537562880)))];
             tensor<fp16, [1, 188, 1024]> input_1159_cast_fp16 = layer_norm(axes = input_1159_axes_0, beta = encoder_module_layers_21_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_21_norm_feed_forward2_weight_to_fp16, x = input_1157_cast_fp16)[name = tensor<string, []>("input_1159_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_21_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(536451392))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(540645760))), name = tensor<string, []>("encoder_module_layers_21_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_197_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_21_feed_forward2_linear1_weight_to_fp16_palettized, x = input_1159_cast_fp16)[name = tensor<string, []>("linear_197_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_21_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_21_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(537564992))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(541763520))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(541759360)))];
+            tensor<fp16, [1, 188, 4096]> linear_197_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_21_feed_forward2_linear1_weight_to_fp16_quantized, x = input_1159_cast_fp16)[name = tensor<string, []>("linear_197_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_1163_cast_fp16 = silu(x = linear_197_cast_fp16)[name = tensor<string, []>("input_1163_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_21_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(540646336))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(544840704))), name = tensor<string, []>("encoder_module_layers_21_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_198_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_21_feed_forward2_linear2_weight_to_fp16_palettized, x = input_1163_cast_fp16)[name = tensor<string, []>("linear_198_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_21_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_21_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(541771776))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(545967232))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(545966144)))];
+            tensor<fp16, [1, 188, 1024]> linear_198_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_21_feed_forward2_linear2_weight_to_fp16_quantized, x = input_1163_cast_fp16)[name = tensor<string, []>("linear_198_cast_fp16")];
             tensor<fp16, []> var_3918_to_fp16 = const()[name = tensor<string, []>("op_3918_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_3919_cast_fp16 = mul(x = linear_198_cast_fp16, y = var_3918_to_fp16)[name = tensor<string, []>("op_3919_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_1169_cast_fp16 = add(x = input_1157_cast_fp16, y = var_3919_cast_fp16)[name = tensor<string, []>("input_1169_cast_fp16")];
             tensor<int32, [1]> input_1171_axes_0 = const()[name = tensor<string, []>("input_1171_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_21_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_21_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(544841280)))];
-            tensor<fp16, [1024]> encoder_module_layers_21_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_21_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(544843392)))];
+            tensor<fp16, [1024]> encoder_module_layers_21_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_21_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(545969344)))];
+            tensor<fp16, [1024]> encoder_module_layers_21_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_21_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(545971456)))];
             tensor<fp16, [1, 188, 1024]> input_1171_cast_fp16 = layer_norm(axes = input_1171_axes_0, beta = encoder_module_layers_21_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_21_norm_out_weight_to_fp16, x = input_1169_cast_fp16)[name = tensor<string, []>("input_1171_cast_fp16")];
             tensor<int32, [1]> input_1173_axes_0 = const()[name = tensor<string, []>("input_1173_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_22_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_22_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(544845504)))];
-            tensor<fp16, [1024]> encoder_module_layers_22_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_22_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(544847616)))];
+            tensor<fp16, [1024]> encoder_module_layers_22_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_22_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(545973568)))];
+            tensor<fp16, [1024]> encoder_module_layers_22_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_22_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(545975680)))];
             tensor<fp16, [1, 188, 1024]> input_1173_cast_fp16 = layer_norm(axes = input_1173_axes_0, beta = encoder_module_layers_22_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_22_norm_feed_forward1_weight_to_fp16, x = input_1171_cast_fp16)[name = tensor<string, []>("input_1173_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_22_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(544849728))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(549044096))), name = tensor<string, []>("encoder_module_layers_22_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_199_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_22_feed_forward1_linear1_weight_to_fp16_palettized, x = input_1173_cast_fp16)[name = tensor<string, []>("linear_199_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_22_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_22_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(545977792))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(550176320))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(550172160)))];
+            tensor<fp16, [1, 188, 4096]> linear_199_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_22_feed_forward1_linear1_weight_to_fp16_quantized, x = input_1173_cast_fp16)[name = tensor<string, []>("linear_199_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_1177_cast_fp16 = silu(x = linear_199_cast_fp16)[name = tensor<string, []>("input_1177_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_22_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(549044672))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(553239040))), name = tensor<string, []>("encoder_module_layers_22_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_200_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_22_feed_forward1_linear2_weight_to_fp16_palettized, x = input_1177_cast_fp16)[name = tensor<string, []>("linear_200_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_22_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_22_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(550184576))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(554380032))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(554378944)))];
+            tensor<fp16, [1, 188, 1024]> linear_200_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_22_feed_forward1_linear2_weight_to_fp16_quantized, x = input_1177_cast_fp16)[name = tensor<string, []>("linear_200_cast_fp16")];
             tensor<fp16, []> var_3947_to_fp16 = const()[name = tensor<string, []>("op_3947_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_3948_cast_fp16 = mul(x = linear_200_cast_fp16, y = var_3947_to_fp16)[name = tensor<string, []>("op_3948_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_1183_cast_fp16 = add(x = input_1171_cast_fp16, y = var_3948_cast_fp16)[name = tensor<string, []>("input_1183_cast_fp16")];
             tensor<int32, [1]> query_45_axes_0 = const()[name = tensor<string, []>("query_45_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_22_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_22_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(553239616)))];
-            tensor<fp16, [1024]> encoder_module_layers_22_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_22_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(553241728)))];
+            tensor<fp16, [1024]> encoder_module_layers_22_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_22_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(554382144)))];
+            tensor<fp16, [1024]> encoder_module_layers_22_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_22_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(554384256)))];
             tensor<fp16, [1, 188, 1024]> query_45_cast_fp16 = layer_norm(axes = query_45_axes_0, beta = encoder_module_layers_22_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_22_norm_self_att_weight_to_fp16, x = input_1183_cast_fp16)[name = tensor<string, []>("query_45_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_22_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(553243840))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(554292480))), name = tensor<string, []>("encoder_module_layers_22_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_201_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_22_self_attn_linear_q_weight_to_fp16_palettized, x = query_45_cast_fp16)[name = tensor<string, []>("linear_201_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_22_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_22_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(554386368))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(555436096))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(555435008)))];
+            tensor<fp16, [1, 188, 1024]> linear_201_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_22_self_attn_linear_q_weight_to_fp16_quantized, x = query_45_cast_fp16)[name = tensor<string, []>("linear_201_cast_fp16")];
             tensor<int32, [4]> var_3964 = const()[name = tensor<string, []>("op_3964"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> q_133_cast_fp16 = reshape(shape = var_3964, x = linear_201_cast_fp16)[name = tensor<string, []>("q_133_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_22_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(554293056))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(555341696))), name = tensor<string, []>("encoder_module_layers_22_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_202_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_22_self_attn_linear_k_weight_to_fp16_palettized, x = query_45_cast_fp16)[name = tensor<string, []>("linear_202_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_22_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_22_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(555438208))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(556487936))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(556486848)))];
+            tensor<fp16, [1, 188, 1024]> linear_202_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_22_self_attn_linear_k_weight_to_fp16_quantized, x = query_45_cast_fp16)[name = tensor<string, []>("linear_202_cast_fp16")];
             tensor<int32, [4]> var_3968 = const()[name = tensor<string, []>("op_3968"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> k_89_cast_fp16 = reshape(shape = var_3968, x = linear_202_cast_fp16)[name = tensor<string, []>("k_89_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_22_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(555342272))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(556390912))), name = tensor<string, []>("encoder_module_layers_22_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_203_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_22_self_attn_linear_v_weight_to_fp16_palettized, x = query_45_cast_fp16)[name = tensor<string, []>("linear_203_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_22_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_22_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(556490048))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(557539776))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(557538688)))];
+            tensor<fp16, [1, 188, 1024]> linear_203_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_22_self_attn_linear_v_weight_to_fp16_quantized, x = query_45_cast_fp16)[name = tensor<string, []>("linear_203_cast_fp16")];
             tensor<int32, [4]> var_3972 = const()[name = tensor<string, []>("op_3972"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> v_45_cast_fp16 = reshape(shape = var_3972, x = linear_203_cast_fp16)[name = tensor<string, []>("v_45_cast_fp16")];
             tensor<int32, [4]> value_49_perm_0 = const()[name = tensor<string, []>("value_49_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
-            tensor<fp16, [8, 128]> encoder_module_layers_22_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_22_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(556391488)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_22_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_22_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(557541888)))];
             tensor<fp16, [1, 188, 8, 128]> var_3984_cast_fp16 = add(x = q_133_cast_fp16, y = encoder_module_layers_22_self_attn_pos_bias_u_to_fp16)[name = tensor<string, []>("op_3984_cast_fp16")];
-            tensor<fp16, [8, 128]> encoder_module_layers_22_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_22_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(556393600)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_22_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_22_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(557544000)))];
             tensor<fp16, [1, 188, 8, 128]> var_3986_cast_fp16 = add(x = q_133_cast_fp16, y = encoder_module_layers_22_self_attn_pos_bias_v_to_fp16)[name = tensor<string, []>("op_3986_cast_fp16")];
             tensor<int32, [4]> q_with_bias_v_45_perm_0 = const()[name = tensor<string, []>("q_with_bias_v_45_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
             tensor<bool, []> x_507_transpose_x_0 = const()[name = tensor<string, []>("x_507_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> x_507_transpose_y_0 = const()[name = tensor<string, []>("x_507_transpose_y_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 8, 128, 375]> op_3988_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [384000]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(556395712))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(556779776))), name = tensor<string, []>("op_3988_to_fp16_palettized"), shape = tensor<uint32, [4]>([1, 8, 128, 375])];
+            tensor<fp16, [1, 8, 128, 375]> op_3988_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(3), name = tensor<string, []>("op_3988_to_fp16_quantized"), quantized_data = tensor<int8, [1, 8, 128, 375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(557546112))), scale = tensor<fp16, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(557930624))), zero_point = tensor<int8, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(557930176)))];
             tensor<fp16, [1, 8, 188, 128]> q_with_bias_v_45_cast_fp16 = transpose(perm = q_with_bias_v_45_perm_0, x = var_3986_cast_fp16)[name = tensor<string, []>("transpose_158")];
-            tensor<fp16, [1, 8, 188, 375]> x_507_cast_fp16 = matmul(transpose_x = x_507_transpose_x_0, transpose_y = x_507_transpose_y_0, x = q_with_bias_v_45_cast_fp16, y = op_3988_to_fp16_palettized)[name = tensor<string, []>("x_507_cast_fp16")];
+            tensor<fp16, [1, 8, 188, 375]> x_507_cast_fp16 = matmul(transpose_x = x_507_transpose_x_0, transpose_y = x_507_transpose_y_0, x = q_with_bias_v_45_cast_fp16, y = op_3988_to_fp16_quantized)[name = tensor<string, []>("x_507_cast_fp16")];
             tensor<int32, [8]> x_509_pad_0 = const()[name = tensor<string, []>("x_509_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 1, 0])];
             tensor<string, []> x_509_mode_0 = const()[name = tensor<string, []>("x_509_mode_0"), val = tensor<string, []>("constant")];
             tensor<fp16, []> const_249_to_fp16 = const()[name = tensor<string, []>("const_249_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
@@ -3313,12 +3313,12 @@ program(1.0)
             tensor<int32, [3]> var_4021 = const()[name = tensor<string, []>("op_4021"), val = tensor<int32, [3]>([1, -1, 1024])];
             tensor<fp16, [1, 188, 8, 128]> var_4020_cast_fp16 = transpose(perm = var_4020_perm_0, x = x_513_cast_fp16)[name = tensor<string, []>("transpose_154")];
             tensor<fp16, [1, 188, 1024]> input_1187_cast_fp16 = reshape(shape = var_4021, x = var_4020_cast_fp16)[name = tensor<string, []>("input_1187_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_22_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(556780352))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(557828992))), name = tensor<string, []>("encoder_module_layers_22_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_205_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_22_self_attn_linear_out_weight_to_fp16_palettized, x = input_1187_cast_fp16)[name = tensor<string, []>("linear_205_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_22_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_22_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(557931456))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(558981184))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(558980096)))];
+            tensor<fp16, [1, 188, 1024]> linear_205_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_22_self_attn_linear_out_weight_to_fp16_quantized, x = input_1187_cast_fp16)[name = tensor<string, []>("linear_205_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_1191_cast_fp16 = add(x = input_1183_cast_fp16, y = linear_205_cast_fp16)[name = tensor<string, []>("input_1191_cast_fp16")];
             tensor<int32, [1]> x_517_axes_0 = const()[name = tensor<string, []>("x_517_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_22_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_22_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(557829568)))];
-            tensor<fp16, [1024]> encoder_module_layers_22_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_22_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(557831680)))];
+            tensor<fp16, [1024]> encoder_module_layers_22_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_22_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(558983296)))];
+            tensor<fp16, [1024]> encoder_module_layers_22_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_22_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(558985408)))];
             tensor<fp16, [1, 188, 1024]> x_517_cast_fp16 = layer_norm(axes = x_517_axes_0, beta = encoder_module_layers_22_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_22_norm_conv_weight_to_fp16, x = input_1191_cast_fp16)[name = tensor<string, []>("x_517_cast_fp16")];
             tensor<int32, [3]> input_1193_perm_0 = const()[name = tensor<string, []>("input_1193_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<string, []> input_1195_pad_type_0 = const()[name = tensor<string, []>("input_1195_pad_type_0"), val = tensor<string, []>("valid")];
@@ -3326,9 +3326,9 @@ program(1.0)
             tensor<int32, [2]> input_1195_pad_0 = const()[name = tensor<string, []>("input_1195_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_1195_dilations_0 = const()[name = tensor<string, []>("input_1195_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> input_1195_groups_0 = const()[name = tensor<string, []>("input_1195_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_22_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(557833792))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(559931008))), name = tensor<string, []>("encoder_module_layers_22_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([2048, 1024, 1])];
+            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_22_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_22_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(558987520))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(561086848))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(561084736)))];
             tensor<fp16, [1, 1024, 188]> input_1193_cast_fp16 = transpose(perm = input_1193_perm_0, x = x_517_cast_fp16)[name = tensor<string, []>("transpose_153")];
-            tensor<fp16, [1, 2048, 188]> input_1195_cast_fp16 = conv(dilations = input_1195_dilations_0, groups = input_1195_groups_0, pad = input_1195_pad_0, pad_type = input_1195_pad_type_0, strides = input_1195_strides_0, weight = encoder_module_layers_22_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_1193_cast_fp16)[name = tensor<string, []>("input_1195_cast_fp16")];
+            tensor<fp16, [1, 2048, 188]> input_1195_cast_fp16 = conv(dilations = input_1195_dilations_0, groups = input_1195_groups_0, pad = input_1195_pad_0, pad_type = input_1195_pad_type_0, strides = input_1195_strides_0, weight = encoder_module_layers_22_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_1193_cast_fp16)[name = tensor<string, []>("input_1195_cast_fp16")];
             tensor<int32, []> x_519_split_num_splits_0 = const()[name = tensor<string, []>("x_519_split_num_splits_0"), val = tensor<int32, []>(2)];
             tensor<int32, []> x_519_split_axis_0 = const()[name = tensor<string, []>("x_519_split_axis_0"), val = tensor<int32, []>(1)];
             tensor<fp16, [1, 1024, 188]> x_519_split_cast_fp16_0, tensor<fp16, [1, 1024, 188]> x_519_split_cast_fp16_1 = split(axis = x_519_split_axis_0, num_splits = x_519_split_num_splits_0, x = input_1195_cast_fp16)[name = tensor<string, []>("x_519_split_cast_fp16")];
@@ -3344,75 +3344,75 @@ program(1.0)
             tensor<int32, [1]> input_1201_strides_0 = const()[name = tensor<string, []>("input_1201_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> input_1201_pad_0 = const()[name = tensor<string, []>("input_1201_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_1201_dilations_0 = const()[name = tensor<string, []>("input_1201_dilations_0"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1024, 1, 9]> const_307_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [9216]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(559931584))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(559940864))), name = tensor<string, []>("const_307_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1, 9])];
-            tensor<fp16, [1024]> const_308_to_fp16 = const()[name = tensor<string, []>("const_308_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(559941440)))];
-            tensor<fp16, [1, 1024, 188]> input_1203_cast_fp16 = conv(bias = const_308_to_fp16, dilations = input_1201_dilations_0, groups = input_1201_groups_0, pad = input_1201_pad_0, pad_type = input_1201_pad_type_0, strides = input_1201_strides_0, weight = const_307_to_fp16_palettized, x = input_1199_cast_fp16)[name = tensor<string, []>("input_1203_cast_fp16")];
+            tensor<fp16, [1024, 1, 9]> const_307_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("const_307_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1, 9]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(561091008))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(561101376))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(561100288)))];
+            tensor<fp16, [1024]> const_308_to_fp16 = const()[name = tensor<string, []>("const_308_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(561103488)))];
+            tensor<fp16, [1, 1024, 188]> input_1203_cast_fp16 = conv(bias = const_308_to_fp16, dilations = input_1201_dilations_0, groups = input_1201_groups_0, pad = input_1201_pad_0, pad_type = input_1201_pad_type_0, strides = input_1201_strides_0, weight = const_307_to_fp16_quantized, x = input_1199_cast_fp16)[name = tensor<string, []>("input_1203_cast_fp16")];
             tensor<fp16, [1, 1024, 188]> input_1205_cast_fp16 = silu(x = input_1203_cast_fp16)[name = tensor<string, []>("input_1205_cast_fp16")];
             tensor<string, []> x_521_pad_type_0 = const()[name = tensor<string, []>("x_521_pad_type_0"), val = tensor<string, []>("valid")];
             tensor<int32, [1]> x_521_strides_0 = const()[name = tensor<string, []>("x_521_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> x_521_pad_0 = const()[name = tensor<string, []>("x_521_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> x_521_dilations_0 = const()[name = tensor<string, []>("x_521_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> x_521_groups_0 = const()[name = tensor<string, []>("x_521_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_22_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(559943552))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(560992192))), name = tensor<string, []>("encoder_module_layers_22_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1024, 1])];
-            tensor<fp16, [1, 1024, 188]> x_521_cast_fp16 = conv(dilations = x_521_dilations_0, groups = x_521_groups_0, pad = x_521_pad_0, pad_type = x_521_pad_type_0, strides = x_521_strides_0, weight = encoder_module_layers_22_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_1205_cast_fp16)[name = tensor<string, []>("x_521_cast_fp16")];
+            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_22_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_22_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(561105600))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(562155328))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(562154240)))];
+            tensor<fp16, [1, 1024, 188]> x_521_cast_fp16 = conv(dilations = x_521_dilations_0, groups = x_521_groups_0, pad = x_521_pad_0, pad_type = x_521_pad_type_0, strides = x_521_strides_0, weight = encoder_module_layers_22_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_1205_cast_fp16)[name = tensor<string, []>("x_521_cast_fp16")];
             tensor<int32, [3]> input_1207_perm_0 = const()[name = tensor<string, []>("input_1207_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<fp16, [1, 188, 1024]> input_1207_cast_fp16 = transpose(perm = input_1207_perm_0, x = x_521_cast_fp16)[name = tensor<string, []>("transpose_152")];
             tensor<fp16, [1, 188, 1024]> input_1209_cast_fp16 = add(x = input_1191_cast_fp16, y = input_1207_cast_fp16)[name = tensor<string, []>("input_1209_cast_fp16")];
             tensor<int32, [1]> input_1211_axes_0 = const()[name = tensor<string, []>("input_1211_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_22_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_22_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(560992768)))];
-            tensor<fp16, [1024]> encoder_module_layers_22_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_22_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(560994880)))];
+            tensor<fp16, [1024]> encoder_module_layers_22_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_22_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(562157440)))];
+            tensor<fp16, [1024]> encoder_module_layers_22_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_22_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(562159552)))];
             tensor<fp16, [1, 188, 1024]> input_1211_cast_fp16 = layer_norm(axes = input_1211_axes_0, beta = encoder_module_layers_22_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_22_norm_feed_forward2_weight_to_fp16, x = input_1209_cast_fp16)[name = tensor<string, []>("input_1211_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_22_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(560996992))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(565191360))), name = tensor<string, []>("encoder_module_layers_22_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_206_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_22_feed_forward2_linear1_weight_to_fp16_palettized, x = input_1211_cast_fp16)[name = tensor<string, []>("linear_206_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_22_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_22_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(562161664))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(566360192))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(566356032)))];
+            tensor<fp16, [1, 188, 4096]> linear_206_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_22_feed_forward2_linear1_weight_to_fp16_quantized, x = input_1211_cast_fp16)[name = tensor<string, []>("linear_206_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_1215_cast_fp16 = silu(x = linear_206_cast_fp16)[name = tensor<string, []>("input_1215_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_22_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(565191936))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(569386304))), name = tensor<string, []>("encoder_module_layers_22_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_207_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_22_feed_forward2_linear2_weight_to_fp16_palettized, x = input_1215_cast_fp16)[name = tensor<string, []>("linear_207_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_22_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_22_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(566368448))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(570563904))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(570562816)))];
+            tensor<fp16, [1, 188, 1024]> linear_207_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_22_feed_forward2_linear2_weight_to_fp16_quantized, x = input_1215_cast_fp16)[name = tensor<string, []>("linear_207_cast_fp16")];
             tensor<fp16, []> var_4081_to_fp16 = const()[name = tensor<string, []>("op_4081_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_4082_cast_fp16 = mul(x = linear_207_cast_fp16, y = var_4081_to_fp16)[name = tensor<string, []>("op_4082_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_1221_cast_fp16 = add(x = input_1209_cast_fp16, y = var_4082_cast_fp16)[name = tensor<string, []>("input_1221_cast_fp16")];
             tensor<int32, [1]> input_1223_axes_0 = const()[name = tensor<string, []>("input_1223_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_22_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_22_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(569386880)))];
-            tensor<fp16, [1024]> encoder_module_layers_22_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_22_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(569388992)))];
+            tensor<fp16, [1024]> encoder_module_layers_22_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_22_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(570566016)))];
+            tensor<fp16, [1024]> encoder_module_layers_22_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_22_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(570568128)))];
             tensor<fp16, [1, 188, 1024]> input_1223_cast_fp16 = layer_norm(axes = input_1223_axes_0, beta = encoder_module_layers_22_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_22_norm_out_weight_to_fp16, x = input_1221_cast_fp16)[name = tensor<string, []>("input_1223_cast_fp16")];
             tensor<int32, [1]> input_1225_axes_0 = const()[name = tensor<string, []>("input_1225_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_23_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_23_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(569391104)))];
-            tensor<fp16, [1024]> encoder_module_layers_23_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_23_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(569393216)))];
+            tensor<fp16, [1024]> encoder_module_layers_23_norm_feed_forward1_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_23_norm_feed_forward1_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(570570240)))];
+            tensor<fp16, [1024]> encoder_module_layers_23_norm_feed_forward1_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_23_norm_feed_forward1_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(570572352)))];
             tensor<fp16, [1, 188, 1024]> input_1225_cast_fp16 = layer_norm(axes = input_1225_axes_0, beta = encoder_module_layers_23_norm_feed_forward1_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_23_norm_feed_forward1_weight_to_fp16, x = input_1223_cast_fp16)[name = tensor<string, []>("input_1225_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_23_feed_forward1_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(569395328))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(573589696))), name = tensor<string, []>("encoder_module_layers_23_feed_forward1_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_208_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_23_feed_forward1_linear1_weight_to_fp16_palettized, x = input_1225_cast_fp16)[name = tensor<string, []>("linear_208_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_23_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_23_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(570574464))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(574772992))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(574768832)))];
+            tensor<fp16, [1, 188, 4096]> linear_208_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_23_feed_forward1_linear1_weight_to_fp16_quantized, x = input_1225_cast_fp16)[name = tensor<string, []>("linear_208_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_1229_cast_fp16 = silu(x = linear_208_cast_fp16)[name = tensor<string, []>("input_1229_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_23_feed_forward1_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(573590272))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(577784640))), name = tensor<string, []>("encoder_module_layers_23_feed_forward1_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_209_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_23_feed_forward1_linear2_weight_to_fp16_palettized, x = input_1229_cast_fp16)[name = tensor<string, []>("linear_209_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_23_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_23_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(574781248))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(578976704))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(578975616)))];
+            tensor<fp16, [1, 188, 1024]> linear_209_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_23_feed_forward1_linear2_weight_to_fp16_quantized, x = input_1229_cast_fp16)[name = tensor<string, []>("linear_209_cast_fp16")];
             tensor<fp16, []> var_4110_to_fp16 = const()[name = tensor<string, []>("op_4110_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_4111_cast_fp16 = mul(x = linear_209_cast_fp16, y = var_4110_to_fp16)[name = tensor<string, []>("op_4111_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_1235_cast_fp16 = add(x = input_1223_cast_fp16, y = var_4111_cast_fp16)[name = tensor<string, []>("input_1235_cast_fp16")];
             tensor<int32, [1]> query_axes_0 = const()[name = tensor<string, []>("query_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_23_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_23_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(577785216)))];
-            tensor<fp16, [1024]> encoder_module_layers_23_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_23_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(577787328)))];
+            tensor<fp16, [1024]> encoder_module_layers_23_norm_self_att_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_23_norm_self_att_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(578978816)))];
+            tensor<fp16, [1024]> encoder_module_layers_23_norm_self_att_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_23_norm_self_att_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(578980928)))];
             tensor<fp16, [1, 188, 1024]> query_cast_fp16 = layer_norm(axes = query_axes_0, beta = encoder_module_layers_23_norm_self_att_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_23_norm_self_att_weight_to_fp16, x = input_1235_cast_fp16)[name = tensor<string, []>("query_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_23_self_attn_linear_q_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(577789440))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(578838080))), name = tensor<string, []>("encoder_module_layers_23_self_attn_linear_q_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_210_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_23_self_attn_linear_q_weight_to_fp16_palettized, x = query_cast_fp16)[name = tensor<string, []>("linear_210_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_23_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_23_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(578983040))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(580032768))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(580031680)))];
+            tensor<fp16, [1, 188, 1024]> linear_210_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_23_self_attn_linear_q_weight_to_fp16_quantized, x = query_cast_fp16)[name = tensor<string, []>("linear_210_cast_fp16")];
             tensor<int32, [4]> var_4127 = const()[name = tensor<string, []>("op_4127"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> q_139_cast_fp16 = reshape(shape = var_4127, x = linear_210_cast_fp16)[name = tensor<string, []>("q_139_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_23_self_attn_linear_k_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(578838656))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(579887296))), name = tensor<string, []>("encoder_module_layers_23_self_attn_linear_k_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_211_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_23_self_attn_linear_k_weight_to_fp16_palettized, x = query_cast_fp16)[name = tensor<string, []>("linear_211_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_23_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_23_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(580034880))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(581084608))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(581083520)))];
+            tensor<fp16, [1, 188, 1024]> linear_211_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_23_self_attn_linear_k_weight_to_fp16_quantized, x = query_cast_fp16)[name = tensor<string, []>("linear_211_cast_fp16")];
             tensor<int32, [4]> var_4131 = const()[name = tensor<string, []>("op_4131"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> k_93_cast_fp16 = reshape(shape = var_4131, x = linear_211_cast_fp16)[name = tensor<string, []>("k_93_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_23_self_attn_linear_v_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(579887872))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(580936512))), name = tensor<string, []>("encoder_module_layers_23_self_attn_linear_v_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_212_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_23_self_attn_linear_v_weight_to_fp16_palettized, x = query_cast_fp16)[name = tensor<string, []>("linear_212_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_23_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_23_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(581086720))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(582136448))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(582135360)))];
+            tensor<fp16, [1, 188, 1024]> linear_212_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_23_self_attn_linear_v_weight_to_fp16_quantized, x = query_cast_fp16)[name = tensor<string, []>("linear_212_cast_fp16")];
             tensor<int32, [4]> var_4135 = const()[name = tensor<string, []>("op_4135"), val = tensor<int32, [4]>([1, -1, 8, 128])];
             tensor<fp16, [1, 188, 8, 128]> v_cast_fp16 = reshape(shape = var_4135, x = linear_212_cast_fp16)[name = tensor<string, []>("v_cast_fp16")];
             tensor<int32, [4]> value_perm_0 = const()[name = tensor<string, []>("value_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
-            tensor<fp16, [8, 128]> encoder_module_layers_23_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_23_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(580937088)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_23_self_attn_pos_bias_u_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_23_self_attn_pos_bias_u_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(582138560)))];
             tensor<fp16, [1, 188, 8, 128]> var_4147_cast_fp16 = add(x = q_139_cast_fp16, y = encoder_module_layers_23_self_attn_pos_bias_u_to_fp16)[name = tensor<string, []>("op_4147_cast_fp16")];
-            tensor<fp16, [8, 128]> encoder_module_layers_23_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_23_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(580939200)))];
+            tensor<fp16, [8, 128]> encoder_module_layers_23_self_attn_pos_bias_v_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_23_self_attn_pos_bias_v_to_fp16"), val = tensor<fp16, [8, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(582140672)))];
             tensor<fp16, [1, 188, 8, 128]> var_4149_cast_fp16 = add(x = q_139_cast_fp16, y = encoder_module_layers_23_self_attn_pos_bias_v_to_fp16)[name = tensor<string, []>("op_4149_cast_fp16")];
             tensor<int32, [4]> q_with_bias_v_perm_0 = const()[name = tensor<string, []>("q_with_bias_v_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
             tensor<bool, []> x_529_transpose_x_0 = const()[name = tensor<string, []>("x_529_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> x_529_transpose_y_0 = const()[name = tensor<string, []>("x_529_transpose_y_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 8, 128, 375]> op_4151_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [384000]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(580941312))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(581325376))), name = tensor<string, []>("op_4151_to_fp16_palettized"), shape = tensor<uint32, [4]>([1, 8, 128, 375])];
+            tensor<fp16, [1, 8, 128, 375]> op_4151_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(3), name = tensor<string, []>("op_4151_to_fp16_quantized"), quantized_data = tensor<int8, [1, 8, 128, 375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(582142784))), scale = tensor<fp16, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(582527296))), zero_point = tensor<int8, [375]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(582526848)))];
             tensor<fp16, [1, 8, 188, 128]> q_with_bias_v_cast_fp16 = transpose(perm = q_with_bias_v_perm_0, x = var_4149_cast_fp16)[name = tensor<string, []>("transpose_151")];
-            tensor<fp16, [1, 8, 188, 375]> x_529_cast_fp16 = matmul(transpose_x = x_529_transpose_x_0, transpose_y = x_529_transpose_y_0, x = q_with_bias_v_cast_fp16, y = op_4151_to_fp16_palettized)[name = tensor<string, []>("x_529_cast_fp16")];
+            tensor<fp16, [1, 8, 188, 375]> x_529_cast_fp16 = matmul(transpose_x = x_529_transpose_x_0, transpose_y = x_529_transpose_y_0, x = q_with_bias_v_cast_fp16, y = op_4151_to_fp16_quantized)[name = tensor<string, []>("x_529_cast_fp16")];
             tensor<int32, [8]> x_531_pad_0 = const()[name = tensor<string, []>("x_531_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 1, 0])];
             tensor<string, []> x_531_mode_0 = const()[name = tensor<string, []>("x_531_mode_0"), val = tensor<string, []>("constant")];
             tensor<fp16, []> const_259_to_fp16 = const()[name = tensor<string, []>("const_259_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
@@ -3450,12 +3450,12 @@ program(1.0)
             tensor<int32, [3]> var_4184 = const()[name = tensor<string, []>("op_4184"), val = tensor<int32, [3]>([1, -1, 1024])];
             tensor<fp16, [1, 188, 8, 128]> var_4183_cast_fp16 = transpose(perm = var_4183_perm_0, x = x_535_cast_fp16)[name = tensor<string, []>("transpose_147")];
             tensor<fp16, [1, 188, 1024]> input_1239_cast_fp16 = reshape(shape = var_4184, x = var_4183_cast_fp16)[name = tensor<string, []>("input_1239_cast_fp16")];
-            tensor<fp16, [1024, 1024]> encoder_module_layers_23_self_attn_linear_out_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(581325952))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(582374592))), name = tensor<string, []>("encoder_module_layers_23_self_attn_linear_out_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 1024])];
-            tensor<fp16, [1, 188, 1024]> linear_214_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_23_self_attn_linear_out_weight_to_fp16_palettized, x = input_1239_cast_fp16)[name = tensor<string, []>("linear_214_cast_fp16")];
+            tensor<fp16, [1024, 1024]> encoder_module_layers_23_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_23_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(582528128))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(583577856))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(583576768)))];
+            tensor<fp16, [1, 188, 1024]> linear_214_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_23_self_attn_linear_out_weight_to_fp16_quantized, x = input_1239_cast_fp16)[name = tensor<string, []>("linear_214_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_1243_cast_fp16 = add(x = input_1235_cast_fp16, y = linear_214_cast_fp16)[name = tensor<string, []>("input_1243_cast_fp16")];
             tensor<int32, [1]> x_539_axes_0 = const()[name = tensor<string, []>("x_539_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_23_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_23_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(582375168)))];
-            tensor<fp16, [1024]> encoder_module_layers_23_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_23_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(582377280)))];
+            tensor<fp16, [1024]> encoder_module_layers_23_norm_conv_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_23_norm_conv_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(583579968)))];
+            tensor<fp16, [1024]> encoder_module_layers_23_norm_conv_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_23_norm_conv_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(583582080)))];
             tensor<fp16, [1, 188, 1024]> x_539_cast_fp16 = layer_norm(axes = x_539_axes_0, beta = encoder_module_layers_23_norm_conv_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_23_norm_conv_weight_to_fp16, x = input_1243_cast_fp16)[name = tensor<string, []>("x_539_cast_fp16")];
             tensor<int32, [3]> input_1245_perm_0 = const()[name = tensor<string, []>("input_1245_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<string, []> input_1247_pad_type_0 = const()[name = tensor<string, []>("input_1247_pad_type_0"), val = tensor<string, []>("valid")];
@@ -3463,9 +3463,9 @@ program(1.0)
             tensor<int32, [2]> input_1247_pad_0 = const()[name = tensor<string, []>("input_1247_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_1247_dilations_0 = const()[name = tensor<string, []>("input_1247_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> input_1247_groups_0 = const()[name = tensor<string, []>("input_1247_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_23_conv_pointwise_conv1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [2097152]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(582379392))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(584476608))), name = tensor<string, []>("encoder_module_layers_23_conv_pointwise_conv1_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([2048, 1024, 1])];
+            tensor<fp16, [2048, 1024, 1]> encoder_module_layers_23_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_23_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(583584192))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(585683520))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(585681408)))];
             tensor<fp16, [1, 1024, 188]> input_1245_cast_fp16 = transpose(perm = input_1245_perm_0, x = x_539_cast_fp16)[name = tensor<string, []>("transpose_146")];
-            tensor<fp16, [1, 2048, 188]> input_1247_cast_fp16 = conv(dilations = input_1247_dilations_0, groups = input_1247_groups_0, pad = input_1247_pad_0, pad_type = input_1247_pad_type_0, strides = input_1247_strides_0, weight = encoder_module_layers_23_conv_pointwise_conv1_weight_to_fp16_palettized, x = input_1245_cast_fp16)[name = tensor<string, []>("input_1247_cast_fp16")];
+            tensor<fp16, [1, 2048, 188]> input_1247_cast_fp16 = conv(dilations = input_1247_dilations_0, groups = input_1247_groups_0, pad = input_1247_pad_0, pad_type = input_1247_pad_type_0, strides = input_1247_strides_0, weight = encoder_module_layers_23_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_1245_cast_fp16)[name = tensor<string, []>("input_1247_cast_fp16")];
             tensor<int32, []> x_541_split_num_splits_0 = const()[name = tensor<string, []>("x_541_split_num_splits_0"), val = tensor<int32, []>(2)];
             tensor<int32, []> x_541_split_axis_0 = const()[name = tensor<string, []>("x_541_split_axis_0"), val = tensor<int32, []>(1)];
             tensor<fp16, [1, 1024, 188]> x_541_split_cast_fp16_0, tensor<fp16, [1, 1024, 188]> x_541_split_cast_fp16_1 = split(axis = x_541_split_axis_0, num_splits = x_541_split_num_splits_0, x = input_1247_cast_fp16)[name = tensor<string, []>("x_541_split_cast_fp16")];
@@ -3481,35 +3481,35 @@ program(1.0)
             tensor<int32, [1]> input_1253_strides_0 = const()[name = tensor<string, []>("input_1253_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> input_1253_pad_0 = const()[name = tensor<string, []>("input_1253_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> input_1253_dilations_0 = const()[name = tensor<string, []>("input_1253_dilations_0"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1024, 1, 9]> const_309_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [9216]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(584477184))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(584486464))), name = tensor<string, []>("const_309_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1, 9])];
-            tensor<fp16, [1024]> const_310_to_fp16 = const()[name = tensor<string, []>("const_310_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(584487040)))];
-            tensor<fp16, [1, 1024, 188]> input_1255_cast_fp16 = conv(bias = const_310_to_fp16, dilations = input_1253_dilations_0, groups = input_1253_groups_0, pad = input_1253_pad_0, pad_type = input_1253_pad_type_0, strides = input_1253_strides_0, weight = const_309_to_fp16_palettized, x = input_1251_cast_fp16)[name = tensor<string, []>("input_1255_cast_fp16")];
+            tensor<fp16, [1024, 1, 9]> const_309_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("const_309_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1, 9]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(585687680))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(585698048))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(585696960)))];
+            tensor<fp16, [1024]> const_310_to_fp16 = const()[name = tensor<string, []>("const_310_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(585700160)))];
+            tensor<fp16, [1, 1024, 188]> input_1255_cast_fp16 = conv(bias = const_310_to_fp16, dilations = input_1253_dilations_0, groups = input_1253_groups_0, pad = input_1253_pad_0, pad_type = input_1253_pad_type_0, strides = input_1253_strides_0, weight = const_309_to_fp16_quantized, x = input_1251_cast_fp16)[name = tensor<string, []>("input_1255_cast_fp16")];
             tensor<fp16, [1, 1024, 188]> input_1257_cast_fp16 = silu(x = input_1255_cast_fp16)[name = tensor<string, []>("input_1257_cast_fp16")];
             tensor<string, []> x_543_pad_type_0 = const()[name = tensor<string, []>("x_543_pad_type_0"), val = tensor<string, []>("valid")];
             tensor<int32, [1]> x_543_strides_0 = const()[name = tensor<string, []>("x_543_strides_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, [2]> x_543_pad_0 = const()[name = tensor<string, []>("x_543_pad_0"), val = tensor<int32, [2]>([0, 0])];
             tensor<int32, [1]> x_543_dilations_0 = const()[name = tensor<string, []>("x_543_dilations_0"), val = tensor<int32, [1]>([1])];
             tensor<int32, []> x_543_groups_0 = const()[name = tensor<string, []>("x_543_groups_0"), val = tensor<int32, []>(1)];
-            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_23_conv_pointwise_conv2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [1048576]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(584489152))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(585537792))), name = tensor<string, []>("encoder_module_layers_23_conv_pointwise_conv2_weight_to_fp16_palettized"), shape = tensor<uint32, [3]>([1024, 1024, 1])];
-            tensor<fp16, [1, 1024, 188]> x_543_cast_fp16 = conv(dilations = x_543_dilations_0, groups = x_543_groups_0, pad = x_543_pad_0, pad_type = x_543_pad_type_0, strides = x_543_strides_0, weight = encoder_module_layers_23_conv_pointwise_conv2_weight_to_fp16_palettized, x = input_1257_cast_fp16)[name = tensor<string, []>("x_543_cast_fp16")];
+            tensor<fp16, [1024, 1024, 1]> encoder_module_layers_23_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_23_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 1024, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(585702272))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(586752000))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(586750912)))];
+            tensor<fp16, [1, 1024, 188]> x_543_cast_fp16 = conv(dilations = x_543_dilations_0, groups = x_543_groups_0, pad = x_543_pad_0, pad_type = x_543_pad_type_0, strides = x_543_strides_0, weight = encoder_module_layers_23_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_1257_cast_fp16)[name = tensor<string, []>("x_543_cast_fp16")];
             tensor<int32, [3]> input_1259_perm_0 = const()[name = tensor<string, []>("input_1259_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<fp16, [1, 188, 1024]> input_1259_cast_fp16 = transpose(perm = input_1259_perm_0, x = x_543_cast_fp16)[name = tensor<string, []>("transpose_145")];
             tensor<fp16, [1, 188, 1024]> input_1261_cast_fp16 = add(x = input_1243_cast_fp16, y = input_1259_cast_fp16)[name = tensor<string, []>("input_1261_cast_fp16")];
             tensor<int32, [1]> input_1263_axes_0 = const()[name = tensor<string, []>("input_1263_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_23_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_23_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(585538368)))];
-            tensor<fp16, [1024]> encoder_module_layers_23_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_23_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(585540480)))];
+            tensor<fp16, [1024]> encoder_module_layers_23_norm_feed_forward2_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_23_norm_feed_forward2_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(586754112)))];
+            tensor<fp16, [1024]> encoder_module_layers_23_norm_feed_forward2_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_23_norm_feed_forward2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(586756224)))];
             tensor<fp16, [1, 188, 1024]> input_1263_cast_fp16 = layer_norm(axes = input_1263_axes_0, beta = encoder_module_layers_23_norm_feed_forward2_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_23_norm_feed_forward2_weight_to_fp16, x = input_1261_cast_fp16)[name = tensor<string, []>("input_1263_cast_fp16")];
-            tensor<fp16, [4096, 1024]> encoder_module_layers_23_feed_forward2_linear1_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(585542592))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(589736960))), name = tensor<string, []>("encoder_module_layers_23_feed_forward2_linear1_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([4096, 1024])];
-            tensor<fp16, [1, 188, 4096]> linear_215_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_23_feed_forward2_linear1_weight_to_fp16_palettized, x = input_1263_cast_fp16)[name = tensor<string, []>("linear_215_cast_fp16")];
+            tensor<fp16, [4096, 1024]> encoder_module_layers_23_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_23_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor<int8, [4096, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(586758336))), scale = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(590956864))), zero_point = tensor<int8, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(590952704)))];
+            tensor<fp16, [1, 188, 4096]> linear_215_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_module_layers_23_feed_forward2_linear1_weight_to_fp16_quantized, x = input_1263_cast_fp16)[name = tensor<string, []>("linear_215_cast_fp16")];
             tensor<fp16, [1, 188, 4096]> input_1267_cast_fp16 = silu(x = linear_215_cast_fp16)[name = tensor<string, []>("input_1267_cast_fp16")];
-            tensor<fp16, [1024, 4096]> encoder_module_layers_23_feed_forward2_linear2_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor<uint8, [4194304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(589737536))), lut = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(593931904))), name = tensor<string, []>("encoder_module_layers_23_feed_forward2_linear2_weight_to_fp16_palettized"), shape = tensor<uint32, [2]>([1024, 4096])];
-            tensor<fp16, [1, 188, 1024]> linear_216_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_23_feed_forward2_linear2_weight_to_fp16_palettized, x = input_1267_cast_fp16)[name = tensor<string, []>("linear_216_cast_fp16")];
+            tensor<fp16, [1024, 4096]> encoder_module_layers_23_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("encoder_module_layers_23_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor<int8, [1024, 4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(590965120))), scale = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(595160576))), zero_point = tensor<int8, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(595159488)))];
+            tensor<fp16, [1, 188, 1024]> linear_216_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = encoder_module_layers_23_feed_forward2_linear2_weight_to_fp16_quantized, x = input_1267_cast_fp16)[name = tensor<string, []>("linear_216_cast_fp16")];
             tensor<fp16, []> var_4244_to_fp16 = const()[name = tensor<string, []>("op_4244_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
             tensor<fp16, [1, 188, 1024]> var_4245_cast_fp16 = mul(x = linear_216_cast_fp16, y = var_4244_to_fp16)[name = tensor<string, []>("op_4245_cast_fp16")];
             tensor<fp16, [1, 188, 1024]> input_cast_fp16 = add(x = input_1261_cast_fp16, y = var_4245_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
             tensor<int32, [1]> audio_signal_axes_0 = const()[name = tensor<string, []>("audio_signal_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [1024]> encoder_module_layers_23_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_23_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(593932480)))];
-            tensor<fp16, [1024]> encoder_module_layers_23_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_23_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(593934592)))];
+            tensor<fp16, [1024]> encoder_module_layers_23_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_23_norm_out_weight_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(595162688)))];
+            tensor<fp16, [1024]> encoder_module_layers_23_norm_out_bias_to_fp16 = const()[name = tensor<string, []>("encoder_module_layers_23_norm_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(595164800)))];
             tensor<fp16, [1, 188, 1024]> audio_signal_cast_fp16 = layer_norm(axes = audio_signal_axes_0, beta = encoder_module_layers_23_norm_out_bias_to_fp16, epsilon = var_156_to_fp16, gamma = encoder_module_layers_23_norm_out_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("audio_signal_cast_fp16")];
             tensor<int32, [3]> obj_3_perm_0 = const()[name = tensor<string, []>("obj_3_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<string, []> obj_3_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("obj_3_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];