2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 /** @file brw_eu_compact.c
26 * Instruction compaction is a feature of G45 and newer hardware that allows
27 * for a smaller instruction encoding.
29 * The instruction cache is on the order of 32KB, and many programs generate
30 * far more instructions than that. The instruction cache is built to barely
31 * keep up with instruction dispatch ability in cache hit cases -- L1
32 * instruction cache misses that still hit in the next level could limit
33 * throughput by around 50%.
35 * The idea of instruction compaction is that most instructions use a tiny
36 * subset of the GPU functionality, so we can encode what would be a 16 byte
37 * instruction in 8 bytes using some lookup tables for various fields.
40 * Instruction compaction capabilities vary subtly by generation.
42 * G45's support for instruction compaction is very limited. Jump counts on
43 * this generation are in units of 16-byte uncompacted instructions. As such,
44 * all jump targets must be 16-byte aligned. Also, all instructions must be
45 * naturally aligned, i.e. uncompacted instructions must be 16-byte aligned.
46 * A G45-only instruction, NENOP, must be used to provide padding to align
47 * uncompacted instructions.
49 * Gen5 removes these restrictions and changes jump counts to be in units of
50 * 8-byte compacted instructions, allowing jump targets to be only 8-byte
51 * aligned. Uncompacted instructions can also be placed on 8-byte boundaries.
53 * Gen6 adds the ability to compact instructions with a limited range of
54 * immediate values. Compactable immediates have 12 unrestricted bits, and a
55 * 13th bit that's replicated through the high 20 bits, to create the 32-bit
56 * value of DW3 in the uncompacted instruction word.
58 * On Gen7 we can compact some control flow instructions with a small positive
59 * immediate in the low bits of DW3, like ENDIF with the JIP field. Other
60 * control flow instructions with UIP cannot be compacted, because of the
61 * replicated 13th bit. No control flow instructions can be compacted on Gen6
62 * since the jump count field is not in DW3.
68 * else JIP (plus UIP on BDW+)
70 * while JIP (must be negative)
72 * Gen 8 adds support for compacting 3-src instructions.
76 #include "brw_shader.h"
77 #include "brw_disasm_info.h"
78 #include "dev/gen_debug.h"
80 static const uint32_t g45_control_index_table
[32] = {
115 static const uint32_t g45_datatype_table
[32] = {
116 0b001000000000100001,
117 0b001011010110101101,
118 0b001000001000110001,
119 0b001111011110111101,
120 0b001011010110101100,
121 0b001000000110101101,
122 0b001000000000100000,
123 0b010100010110110001,
124 0b001100011000101101,
125 0b001000000000100010,
126 0b001000001000110110,
127 0b010000001000110001,
128 0b001000001000110010,
129 0b011000001000110010,
130 0b001111011110111100,
131 0b001000000100101000,
132 0b010100011000110001,
133 0b001010010100101001,
134 0b001000001000101001,
135 0b010000001000110110,
136 0b101000001000110001,
137 0b001011011000101101,
138 0b001000000100001001,
139 0b001011011000101100,
140 0b110100011000110001,
141 0b001000001110111101,
142 0b110000001000110001,
143 0b011000000100101010,
144 0b101000001000101001,
145 0b001011010110001100,
146 0b001000000110100001,
147 0b001010010100001000,
150 static const uint16_t g45_subreg_table
[32] = {
185 static const uint16_t g45_src_index_table
[32] = {
220 static const uint32_t gen6_control_index_table
[32] = {
255 static const uint32_t gen6_datatype_table
[32] = {
256 0b001001110000000000,
257 0b001000110000100000,
258 0b001001110000000001,
259 0b001000000001100000,
260 0b001010110100101001,
261 0b001000000110101101,
262 0b001100011000101100,
263 0b001011110110101101,
264 0b001000000111101100,
265 0b001000000001100001,
266 0b001000110010100101,
267 0b001000000001000001,
268 0b001000001000110001,
269 0b001000001000101001,
270 0b001000000000100000,
271 0b001000001000110010,
272 0b001010010100101001,
273 0b001011010010100101,
274 0b001000000110100101,
275 0b001100011000101001,
276 0b001011011000101100,
277 0b001011010110100101,
278 0b001011110110100101,
279 0b001111011110111101,
280 0b001111011110111100,
281 0b001111011110111101,
282 0b001111011110011101,
283 0b001111011110111110,
284 0b001000000000100001,
285 0b001000000000100010,
286 0b001001111111011101,
287 0b001000001110111110,
290 static const uint16_t gen6_subreg_table
[32] = {
325 static const uint16_t gen6_src_index_table
[32] = {
360 static const uint32_t gen7_control_index_table
[32] = {
361 0b0000000000000000010,
362 0b0000100000000000000,
363 0b0000100000000000001,
364 0b0000100000000000010,
365 0b0000100000000000011,
366 0b0000100000000000100,
367 0b0000100000000000101,
368 0b0000100000000000111,
369 0b0000100000000001000,
370 0b0000100000000001001,
371 0b0000100000000001101,
372 0b0000110000000000000,
373 0b0000110000000000001,
374 0b0000110000000000010,
375 0b0000110000000000011,
376 0b0000110000000000100,
377 0b0000110000000000101,
378 0b0000110000000000111,
379 0b0000110000000001001,
380 0b0000110000000001101,
381 0b0000110000000010000,
382 0b0000110000100000000,
383 0b0001000000000000000,
384 0b0001000000000000010,
385 0b0001000000000000100,
386 0b0001000000100000000,
387 0b0010110000000000000,
388 0b0010110000000010000,
389 0b0011000000000000000,
390 0b0011000000100000000,
391 0b0101000000000000000,
392 0b0101000000100000000,
395 static const uint32_t gen7_datatype_table
[32] = {
396 0b001000000000000001,
397 0b001000000000100000,
398 0b001000000000100001,
399 0b001000000001100001,
400 0b001000000010111101,
401 0b001000001011111101,
402 0b001000001110100001,
403 0b001000001110100101,
404 0b001000001110111101,
405 0b001000010000100001,
406 0b001000110000100000,
407 0b001000110000100001,
408 0b001001010010100101,
409 0b001001110010100100,
410 0b001001110010100101,
411 0b001111001110111101,
412 0b001111011110011101,
413 0b001111011110111100,
414 0b001111011110111101,
415 0b001111111110111100,
416 0b000000001000001100,
417 0b001000000000111101,
418 0b001000000010100101,
419 0b001000010000100000,
420 0b001001010010100100,
421 0b001001110010000100,
422 0b001010010100001001,
423 0b001101111110111101,
424 0b001111111110111101,
425 0b001011110110101100,
426 0b001010010100101000,
427 0b001010110100101000,
430 static const uint16_t gen7_subreg_table
[32] = {
465 static const uint16_t gen7_src_index_table
[32] = {
500 static const uint32_t gen8_control_index_table
[32] = {
501 0b0000000000000000010,
502 0b0000100000000000000,
503 0b0000100000000000001,
504 0b0000100000000000010,
505 0b0000100000000000011,
506 0b0000100000000000100,
507 0b0000100000000000101,
508 0b0000100000000000111,
509 0b0000100000000001000,
510 0b0000100000000001001,
511 0b0000100000000001101,
512 0b0000110000000000000,
513 0b0000110000000000001,
514 0b0000110000000000010,
515 0b0000110000000000011,
516 0b0000110000000000100,
517 0b0000110000000000101,
518 0b0000110000000000111,
519 0b0000110000000001001,
520 0b0000110000000001101,
521 0b0000110000000010000,
522 0b0000110000100000000,
523 0b0001000000000000000,
524 0b0001000000000000010,
525 0b0001000000000000100,
526 0b0001000000100000000,
527 0b0010110000000000000,
528 0b0010110000000010000,
529 0b0011000000000000000,
530 0b0011000000100000000,
531 0b0101000000000000000,
532 0b0101000000100000000,
535 static const uint32_t gen8_datatype_table
[32] = {
536 0b001000000000000000001,
537 0b001000000000001000000,
538 0b001000000000001000001,
539 0b001000000000011000001,
540 0b001000000000101011101,
541 0b001000000010111011101,
542 0b001000000011101000001,
543 0b001000000011101000101,
544 0b001000000011101011101,
545 0b001000001000001000001,
546 0b001000011000001000000,
547 0b001000011000001000001,
548 0b001000101000101000101,
549 0b001000111000101000100,
550 0b001000111000101000101,
551 0b001011100011101011101,
552 0b001011101011100011101,
553 0b001011101011101011100,
554 0b001011101011101011101,
555 0b001011111011101011100,
556 0b000000000010000001100,
557 0b001000000000001011101,
558 0b001000000000101000101,
559 0b001000001000001000000,
560 0b001000101000101000100,
561 0b001000111000100000100,
562 0b001001001001000001001,
563 0b001010111011101011101,
564 0b001011111011101011101,
565 0b001001111001101001100,
566 0b001001001001001001000,
567 0b001001011001001001000,
570 static const uint16_t gen8_subreg_table
[32] = {
605 static const uint16_t gen8_src_index_table
[32] = {
640 static const uint32_t gen11_datatype_table
[32] = {
641 0b001000000000000000001,
642 0b001000000000001000000,
643 0b001000000000001000001,
644 0b001000000000011000001,
645 0b001000000000101100101,
646 0b001000000101111100101,
647 0b001000000100101000001,
648 0b001000000100101000101,
649 0b001000000100101100101,
650 0b001000001000001000001,
651 0b001000011000001000000,
652 0b001000011000001000001,
653 0b001000101000101000101,
654 0b001000111000101000100,
655 0b001000111000101000101,
656 0b001100100100101100101,
657 0b001100101100100100101,
658 0b001100101100101100100,
659 0b001100101100101100101,
660 0b001100111100101100100,
661 0b000000000010000001100,
662 0b001000000000001100101,
663 0b001000000000101000101,
664 0b001000001000001000000,
665 0b001000101000101000100,
666 0b001000111000100000100,
667 0b001001001001000001001,
668 0b001101111100101100101,
669 0b001100111100101100101,
670 0b001001111001101001100,
671 0b001001001001001001000,
672 0b001001011001001001000,
675 /* This is actually the control index table for Cherryview (26 bits), but the
676 * only difference from Broadwell (24 bits) is that it has two extra 0-bits at
679 * The low 24 bits have the same mappings on both hardware.
681 static const uint32_t gen8_3src_control_index_table
[4] = {
682 0b00100000000110000000000001,
683 0b00000000000110000000000001,
684 0b00000000001000000000000001,
685 0b00000000001000000000100001,
688 /* This is actually the control index table for Cherryview (49 bits), but the
689 * only difference from Broadwell (46 bits) is that it has three extra 0-bits
692 * The low 44 bits have the same mappings on both hardware, and since the high
693 * three bits on Broadwell are zero, we can reuse Cherryview's table.
695 static const uint64_t gen8_3src_source_index_table
[4] = {
696 0b0000001110010011100100111001000001111000000000000,
697 0b0000001110010011100100111001000001111000000000010,
698 0b0000001110010011100100111001000001111000000001000,
699 0b0000001110010011100100111001000001111000000100000,
702 static const uint32_t *control_index_table
;
703 static const uint32_t *datatype_table
;
704 static const uint16_t *subreg_table
;
705 static const uint16_t *src_index_table
;
708 set_control_index(const struct gen_device_info
*devinfo
,
709 brw_compact_inst
*dst
, const brw_inst
*src
)
711 uint32_t uncompacted
; /* 17b/G45; 19b/IVB+ */
713 if (devinfo
->gen
>= 8) {
714 uncompacted
= (brw_inst_bits(src
, 33, 31) << 16) | /* 3b */
715 (brw_inst_bits(src
, 23, 12) << 4) | /* 12b */
716 (brw_inst_bits(src
, 10, 9) << 2) | /* 2b */
717 (brw_inst_bits(src
, 34, 34) << 1) | /* 1b */
718 (brw_inst_bits(src
, 8, 8)); /* 1b */
720 uncompacted
= (brw_inst_bits(src
, 31, 31) << 16) | /* 1b */
721 (brw_inst_bits(src
, 23, 8)); /* 16b */
723 /* On gen7, the flag register and subregister numbers are integrated into
726 if (devinfo
->gen
== 7)
727 uncompacted
|= brw_inst_bits(src
, 90, 89) << 17; /* 2b */
730 for (int i
= 0; i
< 32; i
++) {
731 if (control_index_table
[i
] == uncompacted
) {
732 brw_compact_inst_set_control_index(devinfo
, dst
, i
);
741 set_datatype_index(const struct gen_device_info
*devinfo
, brw_compact_inst
*dst
,
744 uint32_t uncompacted
; /* 18b/G45+; 21b/BDW+ */
746 if (devinfo
->gen
>= 8) {
747 uncompacted
= (brw_inst_bits(src
, 63, 61) << 18) | /* 3b */
748 (brw_inst_bits(src
, 94, 89) << 12) | /* 6b */
749 (brw_inst_bits(src
, 46, 35)); /* 12b */
751 uncompacted
= (brw_inst_bits(src
, 63, 61) << 15) | /* 3b */
752 (brw_inst_bits(src
, 46, 32)); /* 15b */
755 for (int i
= 0; i
< 32; i
++) {
756 if (datatype_table
[i
] == uncompacted
) {
757 brw_compact_inst_set_datatype_index(devinfo
, dst
, i
);
766 set_subreg_index(const struct gen_device_info
*devinfo
, brw_compact_inst
*dst
,
767 const brw_inst
*src
, bool is_immediate
)
769 uint16_t uncompacted
= /* 15b */
770 (brw_inst_bits(src
, 52, 48) << 0) | /* 5b */
771 (brw_inst_bits(src
, 68, 64) << 5); /* 5b */
774 uncompacted
|= brw_inst_bits(src
, 100, 96) << 10; /* 5b */
776 for (int i
= 0; i
< 32; i
++) {
777 if (subreg_table
[i
] == uncompacted
) {
778 brw_compact_inst_set_subreg_index(devinfo
, dst
, i
);
787 get_src_index(uint16_t uncompacted
,
790 for (int i
= 0; i
< 32; i
++) {
791 if (src_index_table
[i
] == uncompacted
) {
801 set_src0_index(const struct gen_device_info
*devinfo
,
802 brw_compact_inst
*dst
, const brw_inst
*src
)
805 uint16_t uncompacted
= brw_inst_bits(src
, 88, 77); /* 12b */
807 if (!get_src_index(uncompacted
, &compacted
))
810 brw_compact_inst_set_src0_index(devinfo
, dst
, compacted
);
816 set_src1_index(const struct gen_device_info
*devinfo
, brw_compact_inst
*dst
,
817 const brw_inst
*src
, bool is_immediate
)
822 compacted
= (brw_inst_imm_ud(devinfo
, src
) >> 8) & 0x1f;
824 uint16_t uncompacted
= brw_inst_bits(src
, 120, 109); /* 12b */
826 if (!get_src_index(uncompacted
, &compacted
))
830 brw_compact_inst_set_src1_index(devinfo
, dst
, compacted
);
836 set_3src_control_index(const struct gen_device_info
*devinfo
,
837 brw_compact_inst
*dst
, const brw_inst
*src
)
839 assert(devinfo
->gen
>= 8);
841 uint32_t uncompacted
= /* 24b/BDW; 26b/CHV */
842 (brw_inst_bits(src
, 34, 32) << 21) | /* 3b */
843 (brw_inst_bits(src
, 28, 8)); /* 21b */
845 if (devinfo
->gen
>= 9 || devinfo
->is_cherryview
)
846 uncompacted
|= brw_inst_bits(src
, 36, 35) << 24; /* 2b */
848 for (unsigned i
= 0; i
< ARRAY_SIZE(gen8_3src_control_index_table
); i
++) {
849 if (gen8_3src_control_index_table
[i
] == uncompacted
) {
850 brw_compact_inst_set_3src_control_index(devinfo
, dst
, i
);
859 set_3src_source_index(const struct gen_device_info
*devinfo
,
860 brw_compact_inst
*dst
, const brw_inst
*src
)
862 assert(devinfo
->gen
>= 8);
864 uint64_t uncompacted
= /* 46b/BDW; 49b/CHV */
865 (brw_inst_bits(src
, 83, 83) << 43) | /* 1b */
866 (brw_inst_bits(src
, 114, 107) << 35) | /* 8b */
867 (brw_inst_bits(src
, 93, 86) << 27) | /* 8b */
868 (brw_inst_bits(src
, 72, 65) << 19) | /* 8b */
869 (brw_inst_bits(src
, 55, 37)); /* 19b */
871 if (devinfo
->gen
>= 9 || devinfo
->is_cherryview
) {
873 (brw_inst_bits(src
, 126, 125) << 47) | /* 2b */
874 (brw_inst_bits(src
, 105, 104) << 45) | /* 2b */
875 (brw_inst_bits(src
, 84, 84) << 44); /* 1b */
878 (brw_inst_bits(src
, 125, 125) << 45) | /* 1b */
879 (brw_inst_bits(src
, 104, 104) << 44); /* 1b */
882 for (unsigned i
= 0; i
< ARRAY_SIZE(gen8_3src_source_index_table
); i
++) {
883 if (gen8_3src_source_index_table
[i
] == uncompacted
) {
884 brw_compact_inst_set_3src_source_index(devinfo
, dst
, i
);
893 has_unmapped_bits(const struct gen_device_info
*devinfo
, const brw_inst
*src
)
895 /* EOT can only be mapped on a send if the src1 is an immediate */
896 if ((brw_inst_opcode(devinfo
, src
) == BRW_OPCODE_SENDC
||
897 brw_inst_opcode(devinfo
, src
) == BRW_OPCODE_SEND
) &&
898 brw_inst_eot(devinfo
, src
))
901 /* Check for instruction bits that don't map to any of the fields of the
902 * compacted instruction. The instruction cannot be compacted if any of
903 * them are set. They overlap with:
904 * - NibCtrl (bit 47 on Gen7, bit 11 on Gen8)
905 * - Dst.AddrImm[9] (bit 47 on Gen8)
906 * - Src0.AddrImm[9] (bit 95 on Gen8)
907 * - Imm64[27:31] (bits 91-95 on Gen7, bit 95 on Gen8)
908 * - UIP[31] (bit 95 on Gen8)
910 if (devinfo
->gen
>= 8) {
911 assert(!brw_inst_bits(src
, 7, 7));
912 return brw_inst_bits(src
, 95, 95) ||
913 brw_inst_bits(src
, 47, 47) ||
914 brw_inst_bits(src
, 11, 11);
916 assert(!brw_inst_bits(src
, 7, 7) &&
917 !(devinfo
->gen
< 7 && brw_inst_bits(src
, 90, 90)));
918 return brw_inst_bits(src
, 95, 91) ||
919 brw_inst_bits(src
, 47, 47);
924 has_3src_unmapped_bits(const struct gen_device_info
*devinfo
,
927 /* Check for three-source instruction bits that don't map to any of the
928 * fields of the compacted instruction. All of them seem to be reserved
931 if (devinfo
->gen
>= 9 || devinfo
->is_cherryview
) {
932 assert(!brw_inst_bits(src
, 127, 127) &&
933 !brw_inst_bits(src
, 7, 7));
935 assert(devinfo
->gen
>= 8);
936 assert(!brw_inst_bits(src
, 127, 126) &&
937 !brw_inst_bits(src
, 105, 105) &&
938 !brw_inst_bits(src
, 84, 84) &&
939 !brw_inst_bits(src
, 7, 7));
941 /* Src1Type and Src2Type, used for mixed-precision floating point */
942 if (brw_inst_bits(src
, 36, 35))
950 brw_try_compact_3src_instruction(const struct gen_device_info
*devinfo
,
951 brw_compact_inst
*dst
, const brw_inst
*src
)
953 assert(devinfo
->gen
>= 8);
955 if (has_3src_unmapped_bits(devinfo
, src
))
958 #define compact(field) \
959 brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_##field(devinfo, src))
960 #define compact_a16(field) \
961 brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_a16_##field(devinfo, src))
965 if (!set_3src_control_index(devinfo
, dst
, src
))
968 if (!set_3src_source_index(devinfo
, dst
, src
))
972 compact_a16(src0_rep_ctrl
);
973 brw_compact_inst_set_3src_cmpt_control(devinfo
, dst
, true);
974 compact(debug_control
);
976 compact_a16(src1_rep_ctrl
);
977 compact_a16(src2_rep_ctrl
);
978 compact(src0_reg_nr
);
979 compact(src1_reg_nr
);
980 compact(src2_reg_nr
);
981 compact_a16(src0_subreg_nr
);
982 compact_a16(src1_subreg_nr
);
983 compact_a16(src2_subreg_nr
);
991 /* Compacted instructions have 12-bits for immediate sources, and a 13th bit
992 * that's replicated through the high 20 bits.
994 * Effectively this means we get 12-bit integers, 0.0f, and some limited uses
995 * of packed vectors as compactable immediates.
998 is_compactable_immediate(unsigned imm
)
1000 /* We get the low 12 bits as-is. */
1003 /* We get one bit replicated through the top 20 bits. */
1004 return imm
== 0 || imm
== 0xfffff000;
1008 * Applies some small changes to instruction types to increase chances of
1012 precompact(const struct gen_device_info
*devinfo
, brw_inst inst
)
1014 if (brw_inst_src0_reg_file(devinfo
, &inst
) != BRW_IMMEDIATE_VALUE
)
1017 /* The Bspec's section titled "Non-present Operands" claims that if src0
1018 * is an immediate that src1's type must be the same as that of src0.
1020 * The SNB+ DataTypeIndex instruction compaction tables contain mappings
1021 * that do not follow this rule. E.g., from the IVB/HSW table:
1023 * DataTypeIndex 18-Bit Mapping Mapped Meaning
1024 * 3 001000001011111101 r:f | i:vf | a:ud | <1> | dir |
1026 * And from the SNB table:
1028 * DataTypeIndex 18-Bit Mapping Mapped Meaning
1029 * 8 001000000111101100 a:w | i:w | a:ud | <1> | dir |
1031 * Neither of these cause warnings from the simulator when used,
1032 * compacted or otherwise. In fact, all compaction mappings that have an
1033 * immediate in src0 use a:ud for src1.
1035 * The GM45 instruction compaction tables do not contain mapped meanings
1036 * so it's not clear whether it has the restriction. We'll assume it was
1037 * lifted on SNB. (FINISHME: decode the GM45 tables and check.)
1039 * Don't do any of this for 64-bit immediates, since the src1 fields
1040 * overlap with the immediate and setting them would overwrite the
1043 if (devinfo
->gen
>= 6 &&
1044 !(devinfo
->is_haswell
&&
1045 brw_inst_opcode(devinfo
, &inst
) == BRW_OPCODE_DIM
) &&
1046 !(devinfo
->gen
>= 8 &&
1047 (brw_inst_src0_type(devinfo
, &inst
) == BRW_REGISTER_TYPE_DF
||
1048 brw_inst_src0_type(devinfo
, &inst
) == BRW_REGISTER_TYPE_UQ
||
1049 brw_inst_src0_type(devinfo
, &inst
) == BRW_REGISTER_TYPE_Q
))) {
1050 enum brw_reg_file file
= brw_inst_src1_reg_file(devinfo
, &inst
);
1051 brw_inst_set_src1_file_type(devinfo
, &inst
, file
, BRW_REGISTER_TYPE_UD
);
1054 /* Compacted instructions only have 12-bits (plus 1 for the other 20)
1055 * for immediate values. Presumably the hardware engineers realized
1056 * that the only useful floating-point value that could be represented
1057 * in this format is 0.0, which can also be represented as a VF-typed
1058 * immediate, so they gave us the previously mentioned mapping on IVB+.
1060 * Strangely, we do have a mapping for imm:f in src1, so we don't need
1063 * If we see a 0.0:F, change the type to VF so that it can be compacted.
1065 if (brw_inst_imm_ud(devinfo
, &inst
) == 0x0 &&
1066 brw_inst_src0_type(devinfo
, &inst
) == BRW_REGISTER_TYPE_F
&&
1067 brw_inst_dst_type(devinfo
, &inst
) == BRW_REGISTER_TYPE_F
&&
1068 brw_inst_dst_hstride(devinfo
, &inst
) == BRW_HORIZONTAL_STRIDE_1
) {
1069 enum brw_reg_file file
= brw_inst_src0_reg_file(devinfo
, &inst
);
1070 brw_inst_set_src0_file_type(devinfo
, &inst
, file
, BRW_REGISTER_TYPE_VF
);
1073 /* There are no mappings for dst:d | i:d, so if the immediate is suitable
1074 * set the types to :UD so the instruction can be compacted.
1076 if (is_compactable_immediate(brw_inst_imm_ud(devinfo
, &inst
)) &&
1077 brw_inst_cond_modifier(devinfo
, &inst
) == BRW_CONDITIONAL_NONE
&&
1078 brw_inst_src0_type(devinfo
, &inst
) == BRW_REGISTER_TYPE_D
&&
1079 brw_inst_dst_type(devinfo
, &inst
) == BRW_REGISTER_TYPE_D
) {
1080 enum brw_reg_file src_file
= brw_inst_src0_reg_file(devinfo
, &inst
);
1081 enum brw_reg_file dst_file
= brw_inst_dst_reg_file(devinfo
, &inst
);
1083 brw_inst_set_src0_file_type(devinfo
, &inst
, src_file
, BRW_REGISTER_TYPE_UD
);
1084 brw_inst_set_dst_file_type(devinfo
, &inst
, dst_file
, BRW_REGISTER_TYPE_UD
);
1091 * Tries to compact instruction src into dst.
1093 * It doesn't modify dst unless src is compactable, which is relied on by
1094 * brw_compact_instructions().
1097 brw_try_compact_instruction(const struct gen_device_info
*devinfo
,
1098 brw_compact_inst
*dst
, const brw_inst
*src
)
1100 brw_compact_inst temp
;
1102 assert(brw_inst_cmpt_control(devinfo
, src
) == 0);
1104 if (is_3src(devinfo
, brw_inst_opcode(devinfo
, src
))) {
1105 if (devinfo
->gen
>= 8) {
1106 memset(&temp
, 0, sizeof(temp
));
1107 if (brw_try_compact_3src_instruction(devinfo
, &temp
, src
)) {
1119 brw_inst_src0_reg_file(devinfo
, src
) == BRW_IMMEDIATE_VALUE
||
1120 brw_inst_src1_reg_file(devinfo
, src
) == BRW_IMMEDIATE_VALUE
;
1122 (devinfo
->gen
< 6 ||
1123 !is_compactable_immediate(brw_inst_imm_ud(devinfo
, src
)))) {
1127 if (has_unmapped_bits(devinfo
, src
))
1130 memset(&temp
, 0, sizeof(temp
));
1132 #define compact(field) \
1133 brw_compact_inst_set_##field(devinfo, &temp, brw_inst_##field(devinfo, src))
1136 compact(debug_control
);
1138 if (!set_control_index(devinfo
, &temp
, src
))
1140 if (!set_datatype_index(devinfo
, &temp
, src
))
1142 if (!set_subreg_index(devinfo
, &temp
, src
, is_immediate
))
1145 if (devinfo
->gen
>= 6) {
1146 compact(acc_wr_control
);
1148 compact(mask_control_ex
);
1151 compact(cond_modifier
);
1153 if (devinfo
->gen
<= 6)
1154 compact(flag_subreg_nr
);
1156 brw_compact_inst_set_cmpt_control(devinfo
, &temp
, true);
1158 if (!set_src0_index(devinfo
, &temp
, src
))
1160 if (!set_src1_index(devinfo
, &temp
, src
, is_immediate
))
1163 brw_compact_inst_set_dst_reg_nr(devinfo
, &temp
,
1164 brw_inst_dst_da_reg_nr(devinfo
, src
));
1165 brw_compact_inst_set_src0_reg_nr(devinfo
, &temp
,
1166 brw_inst_src0_da_reg_nr(devinfo
, src
));
1169 brw_compact_inst_set_src1_reg_nr(devinfo
, &temp
,
1170 brw_inst_imm_ud(devinfo
, src
) & 0xff);
1172 brw_compact_inst_set_src1_reg_nr(devinfo
, &temp
,
1173 brw_inst_src1_da_reg_nr(devinfo
, src
));
1184 set_uncompacted_control(const struct gen_device_info
*devinfo
, brw_inst
*dst
,
1185 brw_compact_inst
*src
)
1187 uint32_t uncompacted
=
1188 control_index_table
[brw_compact_inst_control_index(devinfo
, src
)];
1190 if (devinfo
->gen
>= 8) {
1191 brw_inst_set_bits(dst
, 33, 31, (uncompacted
>> 16));
1192 brw_inst_set_bits(dst
, 23, 12, (uncompacted
>> 4) & 0xfff);
1193 brw_inst_set_bits(dst
, 10, 9, (uncompacted
>> 2) & 0x3);
1194 brw_inst_set_bits(dst
, 34, 34, (uncompacted
>> 1) & 0x1);
1195 brw_inst_set_bits(dst
, 8, 8, (uncompacted
>> 0) & 0x1);
1197 brw_inst_set_bits(dst
, 31, 31, (uncompacted
>> 16) & 0x1);
1198 brw_inst_set_bits(dst
, 23, 8, (uncompacted
& 0xffff));
1200 if (devinfo
->gen
== 7)
1201 brw_inst_set_bits(dst
, 90, 89, uncompacted
>> 17);
1206 set_uncompacted_datatype(const struct gen_device_info
*devinfo
, brw_inst
*dst
,
1207 brw_compact_inst
*src
)
1209 uint32_t uncompacted
=
1210 datatype_table
[brw_compact_inst_datatype_index(devinfo
, src
)];
1212 if (devinfo
->gen
>= 8) {
1213 brw_inst_set_bits(dst
, 63, 61, (uncompacted
>> 18));
1214 brw_inst_set_bits(dst
, 94, 89, (uncompacted
>> 12) & 0x3f);
1215 brw_inst_set_bits(dst
, 46, 35, (uncompacted
>> 0) & 0xfff);
1217 brw_inst_set_bits(dst
, 63, 61, (uncompacted
>> 15));
1218 brw_inst_set_bits(dst
, 46, 32, (uncompacted
& 0x7fff));
1223 set_uncompacted_subreg(const struct gen_device_info
*devinfo
, brw_inst
*dst
,
1224 brw_compact_inst
*src
)
1226 uint16_t uncompacted
=
1227 subreg_table
[brw_compact_inst_subreg_index(devinfo
, src
)];
1229 brw_inst_set_bits(dst
, 100, 96, (uncompacted
>> 10));
1230 brw_inst_set_bits(dst
, 68, 64, (uncompacted
>> 5) & 0x1f);
1231 brw_inst_set_bits(dst
, 52, 48, (uncompacted
>> 0) & 0x1f);
1235 set_uncompacted_src0(const struct gen_device_info
*devinfo
, brw_inst
*dst
,
1236 brw_compact_inst
*src
)
1238 uint32_t compacted
= brw_compact_inst_src0_index(devinfo
, src
);
1239 uint16_t uncompacted
= src_index_table
[compacted
];
1241 brw_inst_set_bits(dst
, 88, 77, uncompacted
);
1245 set_uncompacted_src1(const struct gen_device_info
*devinfo
, brw_inst
*dst
,
1246 brw_compact_inst
*src
, bool is_immediate
)
1249 signed high5
= brw_compact_inst_src1_index(devinfo
, src
);
1250 /* Replicate top bit of src1_index into high 20 bits of the immediate. */
1251 brw_inst_set_imm_ud(devinfo
, dst
, (high5
<< 27) >> 19);
1253 uint16_t uncompacted
=
1254 src_index_table
[brw_compact_inst_src1_index(devinfo
, src
)];
1256 brw_inst_set_bits(dst
, 120, 109, uncompacted
);
1261 set_uncompacted_3src_control_index(const struct gen_device_info
*devinfo
,
1262 brw_inst
*dst
, brw_compact_inst
*src
)
1264 assert(devinfo
->gen
>= 8);
1266 uint32_t compacted
= brw_compact_inst_3src_control_index(devinfo
, src
);
1267 uint32_t uncompacted
= gen8_3src_control_index_table
[compacted
];
1269 brw_inst_set_bits(dst
, 34, 32, (uncompacted
>> 21) & 0x7);
1270 brw_inst_set_bits(dst
, 28, 8, (uncompacted
>> 0) & 0x1fffff);
1272 if (devinfo
->gen
>= 9 || devinfo
->is_cherryview
)
1273 brw_inst_set_bits(dst
, 36, 35, (uncompacted
>> 24) & 0x3);
1277 set_uncompacted_3src_source_index(const struct gen_device_info
*devinfo
,
1278 brw_inst
*dst
, brw_compact_inst
*src
)
1280 assert(devinfo
->gen
>= 8);
1282 uint32_t compacted
= brw_compact_inst_3src_source_index(devinfo
, src
);
1283 uint64_t uncompacted
= gen8_3src_source_index_table
[compacted
];
1285 brw_inst_set_bits(dst
, 83, 83, (uncompacted
>> 43) & 0x1);
1286 brw_inst_set_bits(dst
, 114, 107, (uncompacted
>> 35) & 0xff);
1287 brw_inst_set_bits(dst
, 93, 86, (uncompacted
>> 27) & 0xff);
1288 brw_inst_set_bits(dst
, 72, 65, (uncompacted
>> 19) & 0xff);
1289 brw_inst_set_bits(dst
, 55, 37, (uncompacted
>> 0) & 0x7ffff);
1291 if (devinfo
->gen
>= 9 || devinfo
->is_cherryview
) {
1292 brw_inst_set_bits(dst
, 126, 125, (uncompacted
>> 47) & 0x3);
1293 brw_inst_set_bits(dst
, 105, 104, (uncompacted
>> 45) & 0x3);
1294 brw_inst_set_bits(dst
, 84, 84, (uncompacted
>> 44) & 0x1);
1296 brw_inst_set_bits(dst
, 125, 125, (uncompacted
>> 45) & 0x1);
1297 brw_inst_set_bits(dst
, 104, 104, (uncompacted
>> 44) & 0x1);
1302 brw_uncompact_3src_instruction(const struct gen_device_info
*devinfo
,
1303 brw_inst
*dst
, brw_compact_inst
*src
)
1305 assert(devinfo
->gen
>= 8);
1307 #define uncompact(field) \
1308 brw_inst_set_3src_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
1309 #define uncompact_a16(field) \
1310 brw_inst_set_3src_a16_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
1312 uncompact(hw_opcode
);
1314 set_uncompacted_3src_control_index(devinfo
, dst
, src
);
1315 set_uncompacted_3src_source_index(devinfo
, dst
, src
);
1317 uncompact(dst_reg_nr
);
1318 uncompact_a16(src0_rep_ctrl
);
1319 brw_inst_set_3src_cmpt_control(devinfo
, dst
, false);
1320 uncompact(debug_control
);
1321 uncompact(saturate
);
1322 uncompact_a16(src1_rep_ctrl
);
1323 uncompact_a16(src2_rep_ctrl
);
1324 uncompact(src0_reg_nr
);
1325 uncompact(src1_reg_nr
);
1326 uncompact(src2_reg_nr
);
1327 uncompact_a16(src0_subreg_nr
);
1328 uncompact_a16(src1_subreg_nr
);
1329 uncompact_a16(src2_subreg_nr
);
1332 #undef uncompact_a16
1336 brw_uncompact_instruction(const struct gen_device_info
*devinfo
, brw_inst
*dst
,
1337 brw_compact_inst
*src
)
1339 memset(dst
, 0, sizeof(*dst
));
1341 if (devinfo
->gen
>= 8 &&
1342 is_3src(devinfo
, brw_opcode_decode(
1343 devinfo
, brw_compact_inst_3src_hw_opcode(devinfo
, src
)))) {
1344 brw_uncompact_3src_instruction(devinfo
, dst
, src
);
1348 #define uncompact(field) \
1349 brw_inst_set_##field(devinfo, dst, brw_compact_inst_##field(devinfo, src))
1351 uncompact(hw_opcode
);
1352 uncompact(debug_control
);
1354 set_uncompacted_control(devinfo
, dst
, src
);
1355 set_uncompacted_datatype(devinfo
, dst
, src
);
1357 /* src0/1 register file fields are in the datatype table. */
1358 bool is_immediate
= brw_inst_src0_reg_file(devinfo
, dst
) == BRW_IMMEDIATE_VALUE
||
1359 brw_inst_src1_reg_file(devinfo
, dst
) == BRW_IMMEDIATE_VALUE
;
1361 set_uncompacted_subreg(devinfo
, dst
, src
);
1363 if (devinfo
->gen
>= 6) {
1364 uncompact(acc_wr_control
);
1366 uncompact(mask_control_ex
);
1369 uncompact(cond_modifier
);
1371 if (devinfo
->gen
<= 6)
1372 uncompact(flag_subreg_nr
);
1374 set_uncompacted_src0(devinfo
, dst
, src
);
1375 set_uncompacted_src1(devinfo
, dst
, src
, is_immediate
);
1377 brw_inst_set_dst_da_reg_nr(devinfo
, dst
,
1378 brw_compact_inst_dst_reg_nr(devinfo
, src
));
1379 brw_inst_set_src0_da_reg_nr(devinfo
, dst
,
1380 brw_compact_inst_src0_reg_nr(devinfo
, src
));
1383 brw_inst_set_imm_ud(devinfo
, dst
,
1384 brw_inst_imm_ud(devinfo
, dst
) |
1385 brw_compact_inst_src1_reg_nr(devinfo
, src
));
1387 brw_inst_set_src1_da_reg_nr(devinfo
, dst
,
1388 brw_compact_inst_src1_reg_nr(devinfo
, src
));
1394 void brw_debug_compact_uncompact(const struct gen_device_info
*devinfo
,
1396 brw_inst
*uncompacted
)
1398 fprintf(stderr
, "Instruction compact/uncompact changed (gen%d):\n",
1401 fprintf(stderr
, " before: ");
1402 brw_disassemble_inst(stderr
, devinfo
, orig
, true);
1404 fprintf(stderr
, " after: ");
1405 brw_disassemble_inst(stderr
, devinfo
, uncompacted
, false);
1407 uint32_t *before_bits
= (uint32_t *)orig
;
1408 uint32_t *after_bits
= (uint32_t *)uncompacted
;
1409 fprintf(stderr
, " changed bits:\n");
1410 for (int i
= 0; i
< 128; i
++) {
1411 uint32_t before
= before_bits
[i
/ 32] & (1 << (i
& 31));
1412 uint32_t after
= after_bits
[i
/ 32] & (1 << (i
& 31));
1414 if (before
!= after
) {
1415 fprintf(stderr
, " bit %d, %s to %s\n", i
,
1416 before
? "set" : "unset",
1417 after
? "set" : "unset");
1423 compacted_between(int old_ip
, int old_target_ip
, int *compacted_counts
)
1425 int this_compacted_count
= compacted_counts
[old_ip
];
1426 int target_compacted_count
= compacted_counts
[old_target_ip
];
1427 return target_compacted_count
- this_compacted_count
;
1431 update_uip_jip(const struct gen_device_info
*devinfo
, brw_inst
*insn
,
1432 int this_old_ip
, int *compacted_counts
)
1434 /* JIP and UIP are in units of:
1435 * - bytes on Gen8+; and
1436 * - compacted instructions on Gen6+.
1438 int shift
= devinfo
->gen
>= 8 ? 3 : 0;
1440 int32_t jip_compacted
= brw_inst_jip(devinfo
, insn
) >> shift
;
1441 jip_compacted
-= compacted_between(this_old_ip
,
1442 this_old_ip
+ (jip_compacted
/ 2),
1444 brw_inst_set_jip(devinfo
, insn
, jip_compacted
<< shift
);
1446 if (brw_inst_opcode(devinfo
, insn
) == BRW_OPCODE_ENDIF
||
1447 brw_inst_opcode(devinfo
, insn
) == BRW_OPCODE_WHILE
||
1448 (brw_inst_opcode(devinfo
, insn
) == BRW_OPCODE_ELSE
&& devinfo
->gen
<= 7))
1451 int32_t uip_compacted
= brw_inst_uip(devinfo
, insn
) >> shift
;
1452 uip_compacted
-= compacted_between(this_old_ip
,
1453 this_old_ip
+ (uip_compacted
/ 2),
1455 brw_inst_set_uip(devinfo
, insn
, uip_compacted
<< shift
);
1459 update_gen4_jump_count(const struct gen_device_info
*devinfo
, brw_inst
*insn
,
1460 int this_old_ip
, int *compacted_counts
)
1462 assert(devinfo
->gen
== 5 || devinfo
->is_g4x
);
1464 /* Jump Count is in units of:
1465 * - uncompacted instructions on G45; and
1466 * - compacted instructions on Gen5.
1468 int shift
= devinfo
->is_g4x
? 1 : 0;
1470 int jump_count_compacted
= brw_inst_gen4_jump_count(devinfo
, insn
) << shift
;
1472 int target_old_ip
= this_old_ip
+ (jump_count_compacted
/ 2);
1474 int this_compacted_count
= compacted_counts
[this_old_ip
];
1475 int target_compacted_count
= compacted_counts
[target_old_ip
];
1477 jump_count_compacted
-= (target_compacted_count
- this_compacted_count
);
1478 brw_inst_set_gen4_jump_count(devinfo
, insn
, jump_count_compacted
>> shift
);
1482 brw_init_compaction_tables(const struct gen_device_info
*devinfo
)
1484 assert(g45_control_index_table
[ARRAY_SIZE(g45_control_index_table
) - 1] != 0);
1485 assert(g45_datatype_table
[ARRAY_SIZE(g45_datatype_table
) - 1] != 0);
1486 assert(g45_subreg_table
[ARRAY_SIZE(g45_subreg_table
) - 1] != 0);
1487 assert(g45_src_index_table
[ARRAY_SIZE(g45_src_index_table
) - 1] != 0);
1488 assert(gen6_control_index_table
[ARRAY_SIZE(gen6_control_index_table
) - 1] != 0);
1489 assert(gen6_datatype_table
[ARRAY_SIZE(gen6_datatype_table
) - 1] != 0);
1490 assert(gen6_subreg_table
[ARRAY_SIZE(gen6_subreg_table
) - 1] != 0);
1491 assert(gen6_src_index_table
[ARRAY_SIZE(gen6_src_index_table
) - 1] != 0);
1492 assert(gen7_control_index_table
[ARRAY_SIZE(gen7_control_index_table
) - 1] != 0);
1493 assert(gen7_datatype_table
[ARRAY_SIZE(gen7_datatype_table
) - 1] != 0);
1494 assert(gen7_subreg_table
[ARRAY_SIZE(gen7_subreg_table
) - 1] != 0);
1495 assert(gen7_src_index_table
[ARRAY_SIZE(gen7_src_index_table
) - 1] != 0);
1496 assert(gen8_control_index_table
[ARRAY_SIZE(gen8_control_index_table
) - 1] != 0);
1497 assert(gen8_datatype_table
[ARRAY_SIZE(gen8_datatype_table
) - 1] != 0);
1498 assert(gen8_subreg_table
[ARRAY_SIZE(gen8_subreg_table
) - 1] != 0);
1499 assert(gen8_src_index_table
[ARRAY_SIZE(gen8_src_index_table
) - 1] != 0);
1500 assert(gen11_datatype_table
[ARRAY_SIZE(gen11_datatype_table
) - 1] != 0);
1502 switch (devinfo
->gen
) {
1504 control_index_table
= NULL
;
1505 datatype_table
= NULL
;
1506 subreg_table
= NULL
;
1507 src_index_table
= NULL
;
1510 control_index_table
= gen8_control_index_table
;
1511 datatype_table
= gen11_datatype_table
;
1512 subreg_table
= gen8_subreg_table
;
1513 src_index_table
= gen8_src_index_table
;
1518 control_index_table
= gen8_control_index_table
;
1519 datatype_table
= gen8_datatype_table
;
1520 subreg_table
= gen8_subreg_table
;
1521 src_index_table
= gen8_src_index_table
;
1524 control_index_table
= gen7_control_index_table
;
1525 datatype_table
= gen7_datatype_table
;
1526 subreg_table
= gen7_subreg_table
;
1527 src_index_table
= gen7_src_index_table
;
1530 control_index_table
= gen6_control_index_table
;
1531 datatype_table
= gen6_datatype_table
;
1532 subreg_table
= gen6_subreg_table
;
1533 src_index_table
= gen6_src_index_table
;
1537 control_index_table
= g45_control_index_table
;
1538 datatype_table
= g45_datatype_table
;
1539 subreg_table
= g45_subreg_table
;
1540 src_index_table
= g45_src_index_table
;
1543 unreachable("unknown generation");
1548 brw_compact_instructions(struct brw_codegen
*p
, int start_offset
,
1549 struct disasm_info
*disasm
)
1551 if (unlikely(INTEL_DEBUG
& DEBUG_NO_COMPACTION
) || p
->devinfo
->gen
> 11)
1554 const struct gen_device_info
*devinfo
= p
->devinfo
;
1555 void *store
= p
->store
+ start_offset
/ 16;
1556 /* For an instruction at byte offset 16*i before compaction, this is the
1557 * number of compacted instructions minus the number of padding NOP/NENOPs
1560 int compacted_counts
[(p
->next_insn_offset
- start_offset
) / sizeof(brw_inst
)];
1561 /* For an instruction at byte offset 8*i after compaction, this was its IP
1562 * (in 16-byte units) before compaction.
1564 int old_ip
[(p
->next_insn_offset
- start_offset
) / sizeof(brw_compact_inst
) + 1];
1566 if (devinfo
->gen
== 4 && !devinfo
->is_g4x
)
1570 int compacted_count
= 0;
1571 for (int src_offset
= 0; src_offset
< p
->next_insn_offset
- start_offset
;
1572 src_offset
+= sizeof(brw_inst
)) {
1573 brw_inst
*src
= store
+ src_offset
;
1574 void *dst
= store
+ offset
;
1576 old_ip
[offset
/ sizeof(brw_compact_inst
)] = src_offset
/ sizeof(brw_inst
);
1577 compacted_counts
[src_offset
/ sizeof(brw_inst
)] = compacted_count
;
1579 brw_inst inst
= precompact(devinfo
, *src
);
1580 brw_inst saved
= inst
;
1582 if (brw_try_compact_instruction(devinfo
, dst
, &inst
)) {
1586 brw_inst uncompacted
;
1587 brw_uncompact_instruction(devinfo
, &uncompacted
, dst
);
1588 if (memcmp(&saved
, &uncompacted
, sizeof(uncompacted
))) {
1589 brw_debug_compact_uncompact(devinfo
, &saved
, &uncompacted
);
1593 offset
+= sizeof(brw_compact_inst
);
1595 /* All uncompacted instructions need to be aligned on G45. */
1596 if ((offset
& sizeof(brw_compact_inst
)) != 0 && devinfo
->is_g4x
){
1597 brw_compact_inst
*align
= store
+ offset
;
1598 memset(align
, 0, sizeof(*align
));
1599 brw_compact_inst_set_hw_opcode(
1600 devinfo
, align
, brw_opcode_encode(devinfo
, BRW_OPCODE_NENOP
));
1601 brw_compact_inst_set_cmpt_control(devinfo
, align
, true);
1602 offset
+= sizeof(brw_compact_inst
);
1604 compacted_counts
[src_offset
/ sizeof(brw_inst
)] = compacted_count
;
1605 old_ip
[offset
/ sizeof(brw_compact_inst
)] = src_offset
/ sizeof(brw_inst
);
1607 dst
= store
+ offset
;
1610 /* If we didn't compact this intruction, we need to move it down into
1613 if (offset
!= src_offset
) {
1614 memmove(dst
, src
, sizeof(brw_inst
));
1616 offset
+= sizeof(brw_inst
);
1620 /* Add an entry for the ending offset of the program. This greatly
1621 * simplifies the linked list walk at the end of the function.
1623 old_ip
[offset
/ sizeof(brw_compact_inst
)] =
1624 (p
->next_insn_offset
- start_offset
) / sizeof(brw_inst
);
1626 /* Fix up control flow offsets. */
1627 p
->next_insn_offset
= start_offset
+ offset
;
1628 for (offset
= 0; offset
< p
->next_insn_offset
- start_offset
;
1629 offset
= next_offset(devinfo
, store
, offset
)) {
1630 brw_inst
*insn
= store
+ offset
;
1631 int this_old_ip
= old_ip
[offset
/ sizeof(brw_compact_inst
)];
1632 int this_compacted_count
= compacted_counts
[this_old_ip
];
1634 switch (brw_inst_opcode(devinfo
, insn
)) {
1635 case BRW_OPCODE_BREAK
:
1636 case BRW_OPCODE_CONTINUE
:
1637 case BRW_OPCODE_HALT
:
1638 if (devinfo
->gen
>= 6) {
1639 update_uip_jip(devinfo
, insn
, this_old_ip
, compacted_counts
);
1641 update_gen4_jump_count(devinfo
, insn
, this_old_ip
,
1647 case BRW_OPCODE_IFF
:
1648 case BRW_OPCODE_ELSE
:
1649 case BRW_OPCODE_ENDIF
:
1650 case BRW_OPCODE_WHILE
:
1651 if (devinfo
->gen
>= 7) {
1652 if (brw_inst_cmpt_control(devinfo
, insn
)) {
1653 brw_inst uncompacted
;
1654 brw_uncompact_instruction(devinfo
, &uncompacted
,
1655 (brw_compact_inst
*)insn
);
1657 update_uip_jip(devinfo
, &uncompacted
, this_old_ip
,
1660 bool ret
= brw_try_compact_instruction(devinfo
,
1661 (brw_compact_inst
*)insn
,
1663 assert(ret
); (void)ret
;
1665 update_uip_jip(devinfo
, insn
, this_old_ip
, compacted_counts
);
1667 } else if (devinfo
->gen
== 6) {
1668 assert(!brw_inst_cmpt_control(devinfo
, insn
));
1670 /* Jump Count is in units of compacted instructions on Gen6. */
1671 int jump_count_compacted
= brw_inst_gen6_jump_count(devinfo
, insn
);
1673 int target_old_ip
= this_old_ip
+ (jump_count_compacted
/ 2);
1674 int target_compacted_count
= compacted_counts
[target_old_ip
];
1675 jump_count_compacted
-= (target_compacted_count
- this_compacted_count
);
1676 brw_inst_set_gen6_jump_count(devinfo
, insn
, jump_count_compacted
);
1678 update_gen4_jump_count(devinfo
, insn
, this_old_ip
,
1683 case BRW_OPCODE_ADD
:
1684 /* Add instructions modifying the IP register use an immediate src1,
1685 * and Gens that use this cannot compact instructions with immediate
1688 if (brw_inst_cmpt_control(devinfo
, insn
))
1691 if (brw_inst_dst_reg_file(devinfo
, insn
) == BRW_ARCHITECTURE_REGISTER_FILE
&&
1692 brw_inst_dst_da_reg_nr(devinfo
, insn
) == BRW_ARF_IP
) {
1693 assert(brw_inst_src1_reg_file(devinfo
, insn
) == BRW_IMMEDIATE_VALUE
);
1696 int jump_compacted
= brw_inst_imm_d(devinfo
, insn
) >> shift
;
1698 int target_old_ip
= this_old_ip
+ (jump_compacted
/ 2);
1699 int target_compacted_count
= compacted_counts
[target_old_ip
];
1700 jump_compacted
-= (target_compacted_count
- this_compacted_count
);
1701 brw_inst_set_imm_ud(devinfo
, insn
, jump_compacted
<< shift
);
1710 /* p->nr_insn is counting the number of uncompacted instructions still, so
1711 * divide. We do want to be sure there's a valid instruction in any
1712 * alignment padding, so that the next compression pass (for the FS 8/16
1713 * compile passes) parses correctly.
1715 if (p
->next_insn_offset
& sizeof(brw_compact_inst
)) {
1716 brw_compact_inst
*align
= store
+ offset
;
1717 memset(align
, 0, sizeof(*align
));
1718 brw_compact_inst_set_hw_opcode(
1719 devinfo
, align
, brw_opcode_encode(devinfo
, BRW_OPCODE_NOP
));
1720 brw_compact_inst_set_cmpt_control(devinfo
, align
, true);
1721 p
->next_insn_offset
+= sizeof(brw_compact_inst
);
1723 p
->nr_insn
= p
->next_insn_offset
/ sizeof(brw_inst
);
1725 /* Update the instruction offsets for each group. */
1729 foreach_list_typed(struct inst_group
, group
, link
, &disasm
->group_list
) {
1730 while (start_offset
+ old_ip
[offset
/ sizeof(brw_compact_inst
)] *
1731 sizeof(brw_inst
) != group
->offset
) {
1732 assert(start_offset
+ old_ip
[offset
/ sizeof(brw_compact_inst
)] *
1733 sizeof(brw_inst
) < group
->offset
);
1734 offset
= next_offset(devinfo
, store
, offset
);
1737 group
->offset
= start_offset
+ offset
;
1739 offset
= next_offset(devinfo
, store
, offset
);