2 * Copyright © 2012-2018 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 /** @file brw_eu_compact.c
26 * Instruction compaction is a feature of G45 and newer hardware that allows
27 * for a smaller instruction encoding.
29 * The instruction cache is on the order of 32KB, and many programs generate
30 * far more instructions than that. The instruction cache is built to barely
31 * keep up with instruction dispatch ability in cache hit cases -- L1
32 * instruction cache misses that still hit in the next level could limit
33 * throughput by around 50%.
35 * The idea of instruction compaction is that most instructions use a tiny
36 * subset of the GPU functionality, so we can encode what would be a 16 byte
37 * instruction in 8 bytes using some lookup tables for various fields.
40 * Instruction compaction capabilities vary subtly by generation.
42 * G45's support for instruction compaction is very limited. Jump counts on
43 * this generation are in units of 16-byte uncompacted instructions. As such,
44 * all jump targets must be 16-byte aligned. Also, all instructions must be
45 * naturally aligned, i.e. uncompacted instructions must be 16-byte aligned.
46 * A G45-only instruction, NENOP, must be used to provide padding to align
47 * uncompacted instructions.
49 * Gen5 removes these restrictions and changes jump counts to be in units of
50 * 8-byte compacted instructions, allowing jump targets to be only 8-byte
51 * aligned. Uncompacted instructions can also be placed on 8-byte boundaries.
53 * Gen6 adds the ability to compact instructions with a limited range of
54 * immediate values. Compactable immediates have 12 unrestricted bits, and a
55 * 13th bit that's replicated through the high 20 bits, to create the 32-bit
56 * value of DW3 in the uncompacted instruction word.
58 * On Gen7 we can compact some control flow instructions with a small positive
59 * immediate in the low bits of DW3, like ENDIF with the JIP field. Other
60 * control flow instructions with UIP cannot be compacted, because of the
61 * replicated 13th bit. No control flow instructions can be compacted on Gen6
62 * since the jump count field is not in DW3.
68 * else JIP (plus UIP on BDW+)
70 * while JIP (must be negative)
72 * Gen 8 adds support for compacting 3-src instructions.
74 * Gen12 reduces the number of bits that available to compacted immediates from
75 * 13 to 12, but improves the compaction of floating-point immediates by
76 * allowing the high bits to be encoded (the sign, 8-bit exponent, and the
77 * three most significant bits of the mantissa), rather than the lowest bits of
82 #include "brw_shader.h"
83 #include "brw_disasm_info.h"
84 #include "dev/gen_debug.h"
86 static const uint32_t g45_control_index_table
[32] = {
121 static const uint32_t g45_datatype_table
[32] = {
122 0b001000000000100001,
123 0b001011010110101101,
124 0b001000001000110001,
125 0b001111011110111101,
126 0b001011010110101100,
127 0b001000000110101101,
128 0b001000000000100000,
129 0b010100010110110001,
130 0b001100011000101101,
131 0b001000000000100010,
132 0b001000001000110110,
133 0b010000001000110001,
134 0b001000001000110010,
135 0b011000001000110010,
136 0b001111011110111100,
137 0b001000000100101000,
138 0b010100011000110001,
139 0b001010010100101001,
140 0b001000001000101001,
141 0b010000001000110110,
142 0b101000001000110001,
143 0b001011011000101101,
144 0b001000000100001001,
145 0b001011011000101100,
146 0b110100011000110001,
147 0b001000001110111101,
148 0b110000001000110001,
149 0b011000000100101010,
150 0b101000001000101001,
151 0b001011010110001100,
152 0b001000000110100001,
153 0b001010010100001000,
156 static const uint16_t g45_subreg_table
[32] = {
191 static const uint16_t g45_src_index_table
[32] = {
226 static const uint32_t gen6_control_index_table
[32] = {
261 static const uint32_t gen6_datatype_table
[32] = {
262 0b001001110000000000,
263 0b001000110000100000,
264 0b001001110000000001,
265 0b001000000001100000,
266 0b001010110100101001,
267 0b001000000110101101,
268 0b001100011000101100,
269 0b001011110110101101,
270 0b001000000111101100,
271 0b001000000001100001,
272 0b001000110010100101,
273 0b001000000001000001,
274 0b001000001000110001,
275 0b001000001000101001,
276 0b001000000000100000,
277 0b001000001000110010,
278 0b001010010100101001,
279 0b001011010010100101,
280 0b001000000110100101,
281 0b001100011000101001,
282 0b001011011000101100,
283 0b001011010110100101,
284 0b001011110110100101,
285 0b001111011110111101,
286 0b001111011110111100,
287 0b001111011110111101,
288 0b001111011110011101,
289 0b001111011110111110,
290 0b001000000000100001,
291 0b001000000000100010,
292 0b001001111111011101,
293 0b001000001110111110,
296 static const uint16_t gen6_subreg_table
[32] = {
331 static const uint16_t gen6_src_index_table
[32] = {
366 static const uint32_t gen7_control_index_table
[32] = {
367 0b0000000000000000010,
368 0b0000100000000000000,
369 0b0000100000000000001,
370 0b0000100000000000010,
371 0b0000100000000000011,
372 0b0000100000000000100,
373 0b0000100000000000101,
374 0b0000100000000000111,
375 0b0000100000000001000,
376 0b0000100000000001001,
377 0b0000100000000001101,
378 0b0000110000000000000,
379 0b0000110000000000001,
380 0b0000110000000000010,
381 0b0000110000000000011,
382 0b0000110000000000100,
383 0b0000110000000000101,
384 0b0000110000000000111,
385 0b0000110000000001001,
386 0b0000110000000001101,
387 0b0000110000000010000,
388 0b0000110000100000000,
389 0b0001000000000000000,
390 0b0001000000000000010,
391 0b0001000000000000100,
392 0b0001000000100000000,
393 0b0010110000000000000,
394 0b0010110000000010000,
395 0b0011000000000000000,
396 0b0011000000100000000,
397 0b0101000000000000000,
398 0b0101000000100000000,
401 static const uint32_t gen7_datatype_table
[32] = {
402 0b001000000000000001,
403 0b001000000000100000,
404 0b001000000000100001,
405 0b001000000001100001,
406 0b001000000010111101,
407 0b001000001011111101,
408 0b001000001110100001,
409 0b001000001110100101,
410 0b001000001110111101,
411 0b001000010000100001,
412 0b001000110000100000,
413 0b001000110000100001,
414 0b001001010010100101,
415 0b001001110010100100,
416 0b001001110010100101,
417 0b001111001110111101,
418 0b001111011110011101,
419 0b001111011110111100,
420 0b001111011110111101,
421 0b001111111110111100,
422 0b000000001000001100,
423 0b001000000000111101,
424 0b001000000010100101,
425 0b001000010000100000,
426 0b001001010010100100,
427 0b001001110010000100,
428 0b001010010100001001,
429 0b001101111110111101,
430 0b001111111110111101,
431 0b001011110110101100,
432 0b001010010100101000,
433 0b001010110100101000,
436 static const uint16_t gen7_subreg_table
[32] = {
471 static const uint16_t gen7_src_index_table
[32] = {
506 static const uint32_t gen8_control_index_table
[32] = {
507 0b0000000000000000010,
508 0b0000100000000000000,
509 0b0000100000000000001,
510 0b0000100000000000010,
511 0b0000100000000000011,
512 0b0000100000000000100,
513 0b0000100000000000101,
514 0b0000100000000000111,
515 0b0000100000000001000,
516 0b0000100000000001001,
517 0b0000100000000001101,
518 0b0000110000000000000,
519 0b0000110000000000001,
520 0b0000110000000000010,
521 0b0000110000000000011,
522 0b0000110000000000100,
523 0b0000110000000000101,
524 0b0000110000000000111,
525 0b0000110000000001001,
526 0b0000110000000001101,
527 0b0000110000000010000,
528 0b0000110000100000000,
529 0b0001000000000000000,
530 0b0001000000000000010,
531 0b0001000000000000100,
532 0b0001000000100000000,
533 0b0010110000000000000,
534 0b0010110000000010000,
535 0b0011000000000000000,
536 0b0011000000100000000,
537 0b0101000000000000000,
538 0b0101000000100000000,
541 static const uint32_t gen8_datatype_table
[32] = {
542 0b001000000000000000001,
543 0b001000000000001000000,
544 0b001000000000001000001,
545 0b001000000000011000001,
546 0b001000000000101011101,
547 0b001000000010111011101,
548 0b001000000011101000001,
549 0b001000000011101000101,
550 0b001000000011101011101,
551 0b001000001000001000001,
552 0b001000011000001000000,
553 0b001000011000001000001,
554 0b001000101000101000101,
555 0b001000111000101000100,
556 0b001000111000101000101,
557 0b001011100011101011101,
558 0b001011101011100011101,
559 0b001011101011101011100,
560 0b001011101011101011101,
561 0b001011111011101011100,
562 0b000000000010000001100,
563 0b001000000000001011101,
564 0b001000000000101000101,
565 0b001000001000001000000,
566 0b001000101000101000100,
567 0b001000111000100000100,
568 0b001001001001000001001,
569 0b001010111011101011101,
570 0b001011111011101011101,
571 0b001001111001101001100,
572 0b001001001001001001000,
573 0b001001011001001001000,
576 static const uint16_t gen8_subreg_table
[32] = {
611 static const uint16_t gen8_src_index_table
[32] = {
646 static const uint32_t gen11_datatype_table
[32] = {
647 0b001000000000000000001,
648 0b001000000000001000000,
649 0b001000000000001000001,
650 0b001000000000011000001,
651 0b001000000000101100101,
652 0b001000000101111100101,
653 0b001000000100101000001,
654 0b001000000100101000101,
655 0b001000000100101100101,
656 0b001000001000001000001,
657 0b001000011000001000000,
658 0b001000011000001000001,
659 0b001000101000101000101,
660 0b001000111000101000100,
661 0b001000111000101000101,
662 0b001100100100101100101,
663 0b001100101100100100101,
664 0b001100101100101100100,
665 0b001100101100101100101,
666 0b001100111100101100100,
667 0b000000000010000001100,
668 0b001000000000001100101,
669 0b001000000000101000101,
670 0b001000001000001000000,
671 0b001000101000101000100,
672 0b001000111000100000100,
673 0b001001001001000001001,
674 0b001101111100101100101,
675 0b001100111100101100101,
676 0b001001111001101001100,
677 0b001001001001001001000,
678 0b001001011001001001000,
681 static const uint32_t gen12_control_index_table
[32] = {
682 0b000000000000000000100, /* (16|M0) */
683 0b000000000000000000011, /* (8|M0) */
684 0b000000010000000000000, /* (W) (1|M0) */
685 0b000000010000000000100, /* (W) (16|M0) */
686 0b000000010000000000011, /* (W) (8|M0) */
687 0b010000000000000000100, /* (16|M0) (ge)f0.0 */
688 0b000000000000000100100, /* (16|M16) */
689 0b010100000000000000100, /* (16|M0) (lt)f0.0 */
690 0b000000000000000000000, /* (1|M0) */
691 0b000010000000000000100, /* (16|M0) (sat) */
692 0b000000000000000010011, /* (8|M8) */
693 0b001100000000000000100, /* (16|M0) (gt)f0.0 */
694 0b000100000000000000100, /* (16|M0) (eq)f0.0 */
695 0b000100010000000000100, /* (W) (16|M0) (eq)f0.0 */
696 0b001000000000000000100, /* (16|M0) (ne)f0.0 */
697 0b000000000000100000100, /* (f0.0) (16|M0) */
698 0b010100000000000000011, /* (8|M0) (lt)f0.0 */
699 0b000000000000110000100, /* (f1.0) (16|M0) */
700 0b000000010000000000001, /* (W) (2|M0) */
701 0b000000000000101000100, /* (f0.1) (16|M0) */
702 0b000000000000111000100, /* (f1.1) (16|M0) */
703 0b010000010000000000100, /* (W) (16|M0) (ge)f0.0 */
704 0b000000000000000100011, /* (8|M16) */
705 0b000000000000000110011, /* (8|M24) */
706 0b010100010000000000100, /* (W) (16|M0) (lt)f0.0 */
707 0b010000000000000000011, /* (8|M0) (ge)f0.0 */
708 0b000100010000000000000, /* (W) (1|M0) (eq)f0.0 */
709 0b000010000000000000011, /* (8|M0) (sat) */
710 0b010100000000010000100, /* (16|M0) (lt)f1.0 */
711 0b000100000000000000011, /* (8|M0) (eq)f0.0 */
712 0b000001000000000000011, /* (8|M0) {AccWrEn} */
713 0b000000010000000100100, /* (W) (16|M16) */
716 static const uint32_t gen12_datatype_table
[32] = {
717 0b11010110100101010100, /* grf<1>:f grf:f grf:f */
718 0b00000110100101010100, /* grf<1>:f grf:f arf:ub */
719 0b00000010101101010100, /* grf<1>:f imm:f arf:ub */
720 0b01010110110101010100, /* grf<1>:f grf:f imm:f */
721 0b11010100100101010100, /* arf<1>:f grf:f grf:f */
722 0b11010010100101010100, /* grf<1>:f arf:f grf:f */
723 0b01010100110101010100, /* arf<1>:f grf:f imm:f */
724 0b00000000100000000000, /* arf<1>:ub arf:ub arf:ub */
725 0b11010000100101010100, /* arf<1>:f arf:f grf:f */
726 0b00101110110011001100, /* grf<1>:d grf:d imm:w */
727 0b10110110100011001100, /* grf<1>:d grf:d grf:d */
728 0b01010010110101010100, /* grf<1>:f arf:f imm:f */
729 0b10010110100001000100, /* grf<1>:ud grf:ud grf:ud */
730 0b01010000110101010100, /* arf<1>:f arf:f imm:f */
731 0b00110110110011001100, /* grf<1>:d grf:d imm:d */
732 0b00010110110001000100, /* grf<1>:ud grf:ud imm:ud */
733 0b00000111000101010100, /* grf<2>:f grf:f arf:ub */
734 0b00101100110011001100, /* arf<1>:d grf:d imm:w */
735 0b00000000100000100010, /* arf<1>:uw arf:uw arf:ub */
736 0b00000010100001000100, /* grf<1>:ud arf:ud arf:ub */
737 0b00100110110000101010, /* grf<1>:w grf:uw imm:uv */
738 0b00001110110000100010, /* grf<1>:uw grf:uw imm:uw */
739 0b10010111000001000100, /* grf<2>:ud grf:ud grf:ud */
740 0b00000110100101001100, /* grf<1>:d grf:f arf:ub */
741 0b10001100100011001100, /* arf<1>:d grf:d grf:uw */
742 0b00000110100001010100, /* grf<1>:f grf:ud arf:ub */
743 0b00101110110001001100, /* grf<1>:d grf:ud imm:w */
744 0b00000010100000100010, /* grf<1>:uw arf:uw arf:ub */
745 0b00000110100000110100, /* grf<1>:f grf:uw arf:ub */
746 0b00000110100000010100, /* grf<1>:f grf:ub arf:ub */
747 0b00000110100011010100, /* grf<1>:f grf:d arf:ub */
748 0b00000010100101010100, /* grf<1>:f arf:f arf:ub */
751 static const uint16_t gen12_subreg_table
[32] = {
752 0b000000000000000, /* .0 .0 .0 */
753 0b100000000000000, /* .0 .0 .16 */
754 0b001000000000000, /* .0 .0 .4 */
755 0b011000000000000, /* .0 .0 .12 */
756 0b000000010000000, /* .0 .4 .0 */
757 0b010000000000000, /* .0 .0 .8 */
758 0b101000000000000, /* .0 .0 .20 */
759 0b000000000001000, /* .8 .0 .0 */
760 0b000000100000000, /* .0 .8 .0 */
761 0b110000000000000, /* .0 .0 .24 */
762 0b111000000000000, /* .0 .0 .28 */
763 0b000001000000000, /* .0 .16 .0 */
764 0b000000000000100, /* .4 .0 .0 */
765 0b000001100000000, /* .0 .24 .0 */
766 0b000001010000000, /* .0 .20 .0 */
767 0b000000110000000, /* .0 .12 .0 */
768 0b000001110000000, /* .0 .28 .0 */
769 0b000000000011100, /* .28 .0 .0 */
770 0b000000000010000, /* .16 .0 .0 */
771 0b000000000001100, /* .12 .0 .0 */
772 0b000000000011000, /* .24 .0 .0 */
773 0b000000000010100, /* .20 .0 .0 */
774 0b000000000000010, /* .2 .0 .0 */
775 0b000000101000000, /* .0 .10 .0 */
776 0b000000001000000, /* .0 .2 .0 */
777 0b000000010000100, /* .4 .4 .0 */
778 0b000000001011100, /* .28 .2 .0 */
779 0b000000001000010, /* .2 .2 .0 */
780 0b000000110001100, /* .12 .12 .0 */
781 0b000000000100000, /* .0 .1 .0 */
782 0b000000001100000, /* .0 .3 .0 */
783 0b110001100000000, /* .0 .24 .24 */
786 static const uint16_t gen12_src0_index_table
[16] = {
787 0b010001100100, /* r<8;8,1> */
788 0b000000000000, /* r<0;1,0> */
789 0b010001100110, /* -r<8;8,1> */
790 0b010001100101, /* (abs)r<8;8,1> */
791 0b000000000010, /* -r<0;1,0> */
792 0b001000000000, /* r<2;1,0> */
793 0b001001000000, /* r<2;4,0> */
794 0b001101000000, /* r<4;4,0> */
795 0b001000100100, /* r<2;2,1> */
796 0b001100000000, /* r<4;1,0> */
797 0b001000100110, /* -r<2;2,1> */
798 0b001101000100, /* r<4;4,1> */
799 0b010001100111, /* -(abs)r<8;8,1> */
800 0b000100000000, /* r<1;1,0> */
801 0b000000000001, /* (abs)r<0;1,0> */
802 0b111100010000, /* r[a]<1,0> */
805 static const uint16_t gen12_src1_index_table
[16] = {
806 0b000100011001, /* r<8;8,1> */
807 0b000000000000, /* r<0;1,0> */
808 0b100100011001, /* -r<8;8,1> */
809 0b100000000000, /* -r<0;1,0> */
810 0b010100011001, /* (abs)r<8;8,1> */
811 0b100011010000, /* -r<4;4,0> */
812 0b000010000000, /* r<2;1,0> */
813 0b000010001001, /* r<2;2,1> */
814 0b100010001001, /* -r<2;2,1> */
815 0b000011010000, /* r<4;4,0> */
816 0b000011010001, /* r<4;4,1> */
817 0b000011000000, /* r<4;1,0> */
818 0b110100011001, /* -(abs)r<8;8,1> */
819 0b010000000000, /* (abs)r<0;1,0> */
820 0b110000000000, /* -(abs)r<0;1,0> */
821 0b100011010001, /* -r<4;4,1> */
824 /* This is actually the control index table for Cherryview (26 bits), but the
825 * only difference from Broadwell (24 bits) is that it has two extra 0-bits at
828 * The low 24 bits have the same mappings on both hardware.
830 static const uint32_t gen8_3src_control_index_table
[4] = {
831 0b00100000000110000000000001,
832 0b00000000000110000000000001,
833 0b00000000001000000000000001,
834 0b00000000001000000000100001,
837 /* This is actually the control index table for Cherryview (49 bits), but the
838 * only difference from Broadwell (46 bits) is that it has three extra 0-bits
841 * The low 44 bits have the same mappings on both hardware, and since the high
842 * three bits on Broadwell are zero, we can reuse Cherryview's table.
844 static const uint64_t gen8_3src_source_index_table
[4] = {
845 0b0000001110010011100100111001000001111000000000000,
846 0b0000001110010011100100111001000001111000000000010,
847 0b0000001110010011100100111001000001111000000001000,
848 0b0000001110010011100100111001000001111000000100000,
851 static const uint64_t gen12_3src_control_index_table
[32] = {
852 0b000001001010010101000000000000000100, /* (16|M0) grf<1>:f :f :f :f */
853 0b000001001010010101000000000000000011, /* (8|M0) grf<1>:f :f :f :f */
854 0b000001001000010101000000000000000011, /* (8|M0) arf<1>:f :f :f :f */
855 0b000001001010010101000010000000000011, /* (W) (8|M0) grf<1>:f :f :f :f */
856 0b000001001000010101000010000000000011, /* (W) (8|M0) arf<1>:f :f :f :f */
857 0b000001001000010101000000000000010011, /* (8|M8) arf<1>:f :f :f :f */
858 0b000001001010010101000000000000010011, /* (8|M8) grf<1>:f :f :f :f */
859 0b000001001000010101000010000000010011, /* (W) (8|M8) arf<1>:f :f :f :f */
860 0b000001001010010101000010000000010011, /* (W) (8|M8) grf<1>:f :f :f :f */
861 0b000001001010010101000010000000000100, /* (W) (16|M0) grf<1>:f :f :f :f */
862 0b000001001000010101000000000000000100, /* (16|M0) arf<1>:f :f :f :f */
863 0b000001001010010101010000000000000100, /* (16|M0) (sat)grf<1>:f :f :f :f */
864 0b000001001010010101000000000000100100, /* (16|M16) grf<1>:f :f :f :f */
865 0b000001001000010101000010000000000100, /* (W) (16|M0) arf<1>:f :f :f :f */
866 0b000001001010010101000010000000000000, /* (W) (1|M0) grf<1>:f :f :f :f */
867 0b000001001010010101010000000000000011, /* (8|M0) (sat)grf<1>:f :f :f :f */
868 0b000001001000010101000010000000110011, /* (W) (8|M24) arf<1>:f :f :f :f */
869 0b000001001000010101000010000000100011, /* (W) (8|M16) arf<1>:f :f :f :f */
870 0b000001001010010101000010000000110011, /* (W) (8|M24) grf<1>:f :f :f :f */
871 0b000001001010010101000010000000100011, /* (W) (8|M16) grf<1>:f :f :f :f */
872 0b000001001000010101000000000000100011, /* (8|M16) arf<1>:f :f :f :f */
873 0b000001001000010101000000000000110011, /* (8|M24) arf<1>:f :f :f :f */
874 0b000001001010010101000000000000100011, /* (8|M16) grf<1>:f :f :f :f */
875 0b000001001010010101000000000000110011, /* (8|M24) grf<1>:f :f :f :f */
876 0b000001001000010101010000000000000100, /* (16|M0) (sat)arf<1>:f :f :f :f */
877 0b000001001010010101010010000000000100, /* (W) (16|M0) (sat)grf<1>:f :f :f :f */
878 0b000001001010010101000010000000100100, /* (W) (16|M16) grf<1>:f :f :f :f */
879 0b000001001010010001000010000000000000, /* (W) (1|M0) grf<1>:ud :ud :ud :ud */
880 0b000001001000010101000000000000100100, /* (16|M16) arf<1>:f :f :f :f */
881 0b000001001010010101010000000000100100, /* (16|M16) (sat)grf<1>:f :f :f :f */
882 0b000001001010010101000010000000000010, /* (W) (4|M0) grf<1>:f :f :f :f */
883 0b000001001000010101010000000000000011, /* (8|M0) (sat)arf<1>:f :f :f :f */
886 static const uint32_t gen12_3src_source_index_table
[32] = {
887 0b100101100001100000000, /* grf<0;0> grf<8;1> grf<0> */
888 0b100101100001001000010, /* arf<4;1> grf<8;1> grf<0> */
889 0b101101100001101000011, /* grf<8;1> grf<8;1> grf<1> */
890 0b100101100001101000011, /* grf<8;1> grf<8;1> grf<0> */
891 0b101100000000101000011, /* grf<8;1> grf<0;0> grf<1> */
892 0b101101100001101001011, /* -grf<8;1> grf<8;1> grf<1> */
893 0b101001100001101000011, /* grf<8;1> arf<8;1> grf<1> */
894 0b100001100001100000000, /* grf<0;0> arf<8;1> grf<0> */
895 0b101101100001100000000, /* grf<0;0> grf<8;1> grf<1> */
896 0b101101100101101000011, /* grf<8;1> grf<8;1> -grf<1> */
897 0b101101110001101000011, /* grf<8;1> -grf<8;1> grf<1> */
898 0b101100000000100000000, /* grf<0;0> grf<0;0> grf<1> */
899 0b100001100001101000011, /* grf<8;1> arf<8;1> grf<0> */
900 0b100101110001100000000, /* grf<0;0> -grf<8;1> grf<0> */
901 0b100101110001101000011, /* grf<8;1> -grf<8;1> grf<0> */
902 0b100101100001101001011, /* -grf<8;1> grf<8;1> grf<0> */
903 0b100100000000101000011, /* grf<8;1> grf<0;0> grf<0> */
904 0b100101100001100001000, /* -grf<0;0> grf<8;1> grf<0> */
905 0b100100000000100000000, /* grf<0;0> grf<0;0> grf<0> */
906 0b101101110001100000000, /* grf<0;0> -grf<8;1> grf<1> */
907 0b100101100101100000000, /* grf<0;0> grf<8;1> -grf<0> */
908 0b101001100001100000000, /* grf<0;0> arf<8;1> grf<1> */
909 0b100101100101101000011, /* grf<8;1> grf<8;1> -grf<0> */
910 0b101101100101101001011, /* -grf<8;1> grf<8;1> -grf<1> */
911 0b101001100001101001011, /* -grf<8;1> arf<8;1> grf<1> */
912 0b101101110001101001011, /* -grf<8;1> -grf<8;1> grf<1> */
913 0b101100010000101000011, /* grf<8;1> -grf<0;0> grf<1> */
914 0b101100000100101000011, /* grf<8;1> grf<0;0> -grf<1> */
915 0b101101100001100001000, /* -grf<0;0> grf<8;1> grf<1> */
916 0b101101100101100000000, /* grf<0;0> grf<8;1> -grf<1> */
917 0b100100000100101000011, /* grf<8;1> grf<0;0> -grf<0> */
918 0b101001100101101000011, /* grf<8;1> arf<8;1> -grf<1> */
921 static const uint32_t gen12_3src_subreg_table
[32] = {
922 0b00000000000000000000, /* .0 .0 .0 .0 */
923 0b00100000000000000000, /* .0 .0 .0 .4 */
924 0b00000000000110000000, /* .0 .12 .0 .0 */
925 0b10100000000000000000, /* .0 .0 .0 .20 */
926 0b10000000001110000000, /* .0 .28 .0 .16 */
927 0b01100000000000000000, /* .0 .0 .0 .12 */
928 0b01000000000000000000, /* .0 .0 .0 .8 */
929 0b00000010000000000000, /* .0 .0 .8 .0 */
930 0b00000001000000000000, /* .0 .0 .4 .0 */
931 0b11000000000000000000, /* .0 .0 .0 .24 */
932 0b10000000000000000000, /* .0 .0 .0 .16 */
933 0b11100000000000000000, /* .0 .0 .0 .28 */
934 0b00000110000000000000, /* .0 .0 .24 .0 */
935 0b00000000000010000000, /* .0 .4 .0 .0 */
936 0b00000100000000000000, /* .0 .0 .16 .0 */
937 0b00000011000000000000, /* .0 .0 .12 .0 */
938 0b00000101000000000000, /* .0 .0 .20 .0 */
939 0b00000111000000000000, /* .0 .0 .28 .0 */
940 0b00000000000100000000, /* .0 .8 .0 .0 */
941 0b00000000001000000000, /* .0 .16 .0 .0 */
942 0b00000000001100000000, /* .0 .24 .0 .0 */
943 0b00000000001010000000, /* .0 .20 .0 .0 */
944 0b00000000001110000000, /* .0 .28 .0 .0 */
945 0b11000000001110000000, /* .0 .28 .0 .24 */
946 0b00100000000100000000, /* .0 .8 .0 .4 */
947 0b00100000000110000000, /* .0 .12 .0 .4 */
948 0b01000000000110000000, /* .0 .12 .0 .8 */
949 0b10000000001100000000, /* .0 .24 .0 .16 */
950 0b10000000001010000000, /* .0 .20 .0 .16 */
951 0b01100000000010000000, /* .0 .4 .0 .12 */
952 0b10100000001110000000, /* .0 .28 .0 .20 */
953 0b01000000000010000000, /* .0 .4 .0 .8 */
956 static const uint32_t *control_index_table
;
957 static const uint32_t *datatype_table
;
958 static const uint16_t *subreg_table
;
959 static const uint16_t *src0_index_table
;
960 static const uint16_t *src1_index_table
;
963 set_control_index(const struct gen_device_info
*devinfo
,
964 brw_compact_inst
*dst
, const brw_inst
*src
)
966 uint32_t uncompacted
; /* 17b/G45; 19b/IVB+; 21b/TGL+ */
968 if (devinfo
->gen
>= 12) {
969 uncompacted
= (brw_inst_bits(src
, 95, 92) << 17) | /* 4b */
970 (brw_inst_bits(src
, 34, 34) << 16) | /* 1b */
971 (brw_inst_bits(src
, 33, 33) << 15) | /* 1b */
972 (brw_inst_bits(src
, 32, 32) << 14) | /* 1b */
973 (brw_inst_bits(src
, 31, 31) << 13) | /* 1b */
974 (brw_inst_bits(src
, 28, 28) << 12) | /* 1b */
975 (brw_inst_bits(src
, 27, 24) << 8) | /* 4b */
976 (brw_inst_bits(src
, 23, 22) << 6) | /* 2b */
977 (brw_inst_bits(src
, 21, 19) << 3) | /* 3b */
978 (brw_inst_bits(src
, 18, 16)); /* 3b */
979 } else if (devinfo
->gen
>= 8) {
980 uncompacted
= (brw_inst_bits(src
, 33, 31) << 16) | /* 3b */
981 (brw_inst_bits(src
, 23, 12) << 4) | /* 12b */
982 (brw_inst_bits(src
, 10, 9) << 2) | /* 2b */
983 (brw_inst_bits(src
, 34, 34) << 1) | /* 1b */
984 (brw_inst_bits(src
, 8, 8)); /* 1b */
986 uncompacted
= (brw_inst_bits(src
, 31, 31) << 16) | /* 1b */
987 (brw_inst_bits(src
, 23, 8)); /* 16b */
989 /* On gen7, the flag register and subregister numbers are integrated into
992 if (devinfo
->gen
== 7)
993 uncompacted
|= brw_inst_bits(src
, 90, 89) << 17; /* 2b */
996 for (int i
= 0; i
< 32; i
++) {
997 if (control_index_table
[i
] == uncompacted
) {
998 brw_compact_inst_set_control_index(devinfo
, dst
, i
);
1007 set_datatype_index(const struct gen_device_info
*devinfo
, brw_compact_inst
*dst
,
1008 const brw_inst
*src
, bool is_immediate
)
1010 uint32_t uncompacted
; /* 18b/G45+; 21b/BDW+; 20b/TGL+ */
1012 if (devinfo
->gen
>= 12) {
1013 uncompacted
= (brw_inst_bits(src
, 91, 88) << 15) | /* 4b */
1014 (brw_inst_bits(src
, 66, 66) << 14) | /* 1b */
1015 (brw_inst_bits(src
, 50, 50) << 13) | /* 1b */
1016 (brw_inst_bits(src
, 49, 48) << 11) | /* 2b */
1017 (brw_inst_bits(src
, 47, 47) << 10) | /* 1b */
1018 (brw_inst_bits(src
, 46, 46) << 9) | /* 1b */
1019 (brw_inst_bits(src
, 43, 40) << 5) | /* 4b */
1020 (brw_inst_bits(src
, 39, 36) << 1) | /* 4b */
1021 (brw_inst_bits(src
, 35, 35)); /* 1b */
1023 /* Src1.RegFile overlaps with the immediate, so ignore it if an immediate
1026 if (!is_immediate
) {
1027 uncompacted
|= brw_inst_bits(src
, 98, 98) << 19; /* 1b */
1029 } else if (devinfo
->gen
>= 8) {
1030 uncompacted
= (brw_inst_bits(src
, 63, 61) << 18) | /* 3b */
1031 (brw_inst_bits(src
, 94, 89) << 12) | /* 6b */
1032 (brw_inst_bits(src
, 46, 35)); /* 12b */
1034 uncompacted
= (brw_inst_bits(src
, 63, 61) << 15) | /* 3b */
1035 (brw_inst_bits(src
, 46, 32)); /* 15b */
1038 for (int i
= 0; i
< 32; i
++) {
1039 if (datatype_table
[i
] == uncompacted
) {
1040 brw_compact_inst_set_datatype_index(devinfo
, dst
, i
);
1049 set_subreg_index(const struct gen_device_info
*devinfo
, brw_compact_inst
*dst
,
1050 const brw_inst
*src
, bool is_immediate
)
1052 uint16_t uncompacted
; /* 15b */
1054 if (devinfo
->gen
>= 12) {
1055 uncompacted
= (brw_inst_bits(src
, 55, 51) << 0) | /* 5b */
1056 (brw_inst_bits(src
, 71, 67) << 5); /* 5b */
1059 uncompacted
|= brw_inst_bits(src
, 103, 99) << 10; /* 5b */
1061 uncompacted
= (brw_inst_bits(src
, 52, 48) << 0) | /* 5b */
1062 (brw_inst_bits(src
, 68, 64) << 5); /* 5b */
1065 uncompacted
|= brw_inst_bits(src
, 100, 96) << 10; /* 5b */
1068 for (int i
= 0; i
< 32; i
++) {
1069 if (subreg_table
[i
] == uncompacted
) {
1070 brw_compact_inst_set_subreg_index(devinfo
, dst
, i
);
1079 set_src0_index(const struct gen_device_info
*devinfo
,
1080 brw_compact_inst
*dst
, const brw_inst
*src
)
1082 uint16_t uncompacted
; /* 12b */
1085 if (devinfo
->gen
>= 12) {
1086 table_len
= ARRAY_SIZE(gen12_src0_index_table
);
1087 uncompacted
= (brw_inst_bits(src
, 87, 84) << 8) | /* 4b */
1088 (brw_inst_bits(src
, 83, 81) << 5) | /* 3b */
1089 (brw_inst_bits(src
, 80, 80) << 4) | /* 1b */
1090 (brw_inst_bits(src
, 65, 64) << 2) | /* 2b */
1091 (brw_inst_bits(src
, 45, 44)); /* 2b */
1093 table_len
= ARRAY_SIZE(gen8_src_index_table
);
1094 uncompacted
= brw_inst_bits(src
, 88, 77); /* 12b */
1097 for (int i
= 0; i
< table_len
; i
++) {
1098 if (src0_index_table
[i
] == uncompacted
) {
1099 brw_compact_inst_set_src0_index(devinfo
, dst
, i
);
1108 set_src1_index(const struct gen_device_info
*devinfo
, brw_compact_inst
*dst
,
1109 const brw_inst
*src
, bool is_immediate
, unsigned imm
)
1112 if (devinfo
->gen
>= 12) {
1113 /* src1 index takes the low 4 bits of the 12-bit compacted value */
1114 brw_compact_inst_set_src1_index(devinfo
, dst
, imm
& 0xf);
1116 /* src1 index takes the high 5 bits of the 13-bit compacted value */
1117 brw_compact_inst_set_src1_index(devinfo
, dst
, imm
>> 8);
1121 uint16_t uncompacted
; /* 12b */
1124 if (devinfo
->gen
>= 12) {
1125 table_len
= ARRAY_SIZE(gen12_src0_index_table
);
1126 uncompacted
= (brw_inst_bits(src
, 121, 120) << 10) | /* 2b */
1127 (brw_inst_bits(src
, 119, 116) << 6) | /* 4b */
1128 (brw_inst_bits(src
, 115, 113) << 3) | /* 3b */
1129 (brw_inst_bits(src
, 112, 112) << 2) | /* 1b */
1130 (brw_inst_bits(src
, 97, 96)); /* 2b */
1132 table_len
= ARRAY_SIZE(gen8_src_index_table
);
1133 uncompacted
= brw_inst_bits(src
, 120, 109); /* 12b */
1136 for (int i
= 0; i
< table_len
; i
++) {
1137 if (src1_index_table
[i
] == uncompacted
) {
1138 brw_compact_inst_set_src1_index(devinfo
, dst
, i
);
1148 set_3src_control_index(const struct gen_device_info
*devinfo
,
1149 brw_compact_inst
*dst
, const brw_inst
*src
)
1151 assert(devinfo
->gen
>= 8);
1153 if (devinfo
->gen
>= 12) {
1154 uint64_t uncompacted
= /* 36b/TGL+ */
1155 (brw_inst_bits(src
, 95, 92) << 32) | /* 4b */
1156 (brw_inst_bits(src
, 90, 88) << 29) | /* 3b */
1157 (brw_inst_bits(src
, 82, 80) << 26) | /* 3b */
1158 (brw_inst_bits(src
, 50, 50) << 25) | /* 1b */
1159 (brw_inst_bits(src
, 48, 48) << 24) | /* 1b */
1160 (brw_inst_bits(src
, 42, 40) << 21) | /* 3b */
1161 (brw_inst_bits(src
, 39, 39) << 20) | /* 1b */
1162 (brw_inst_bits(src
, 38, 36) << 17) | /* 3b */
1163 (brw_inst_bits(src
, 34, 34) << 16) | /* 1b */
1164 (brw_inst_bits(src
, 33, 33) << 15) | /* 1b */
1165 (brw_inst_bits(src
, 32, 32) << 14) | /* 1b */
1166 (brw_inst_bits(src
, 31, 31) << 13) | /* 1b */
1167 (brw_inst_bits(src
, 28, 28) << 12) | /* 1b */
1168 (brw_inst_bits(src
, 27, 24) << 8) | /* 4b */
1169 (brw_inst_bits(src
, 23, 23) << 7) | /* 1b */
1170 (brw_inst_bits(src
, 22, 22) << 6) | /* 1b */
1171 (brw_inst_bits(src
, 21, 19) << 3) | /* 3b */
1172 (brw_inst_bits(src
, 18, 16)); /* 3b */
1174 for (unsigned i
= 0; i
< ARRAY_SIZE(gen12_3src_control_index_table
); i
++) {
1175 if (gen12_3src_control_index_table
[i
] == uncompacted
) {
1176 brw_compact_inst_set_3src_control_index(devinfo
, dst
, i
);
1181 uint32_t uncompacted
= /* 24b/BDW; 26b/CHV/SKL+ */
1182 (brw_inst_bits(src
, 34, 32) << 21) | /* 3b */
1183 (brw_inst_bits(src
, 28, 8)); /* 21b */
1185 if (devinfo
->gen
>= 9 || devinfo
->is_cherryview
) {
1187 brw_inst_bits(src
, 36, 35) << 24; /* 2b */
1190 for (unsigned i
= 0; i
< ARRAY_SIZE(gen8_3src_control_index_table
); i
++) {
1191 if (gen8_3src_control_index_table
[i
] == uncompacted
) {
1192 brw_compact_inst_set_3src_control_index(devinfo
, dst
, i
);
1202 set_3src_source_index(const struct gen_device_info
*devinfo
,
1203 brw_compact_inst
*dst
, const brw_inst
*src
)
1205 assert(devinfo
->gen
>= 8);
1207 if (devinfo
->gen
>= 12) {
1208 uint32_t uncompacted
= /* 21b/TGL+ */
1209 (brw_inst_bits(src
, 114, 114) << 20) | /* 1b */
1210 (brw_inst_bits(src
, 113, 112) << 18) | /* 2b */
1211 (brw_inst_bits(src
, 98, 98) << 17) | /* 1b */
1212 (brw_inst_bits(src
, 97, 96) << 15) | /* 2b */
1213 (brw_inst_bits(src
, 91, 91) << 14) | /* 1b */
1214 (brw_inst_bits(src
, 87, 86) << 12) | /* 2b */
1215 (brw_inst_bits(src
, 85, 84) << 10) | /* 2b */
1216 (brw_inst_bits(src
, 83, 83) << 9) | /* 1b */
1217 (brw_inst_bits(src
, 66, 66) << 8) | /* 1b */
1218 (brw_inst_bits(src
, 65, 64) << 6) | /* 2b */
1219 (brw_inst_bits(src
, 47, 47) << 5) | /* 1b */
1220 (brw_inst_bits(src
, 46, 46) << 4) | /* 1b */
1221 (brw_inst_bits(src
, 45, 44) << 2) | /* 2b */
1222 (brw_inst_bits(src
, 43, 43) << 1) | /* 1b */
1223 (brw_inst_bits(src
, 35, 35)); /* 1b */
1225 for (unsigned i
= 0; i
< ARRAY_SIZE(gen12_3src_source_index_table
); i
++) {
1226 if (gen12_3src_source_index_table
[i
] == uncompacted
) {
1227 brw_compact_inst_set_3src_source_index(devinfo
, dst
, i
);
1232 uint64_t uncompacted
= /* 46b/BDW; 49b/CHV/SKL+ */
1233 (brw_inst_bits(src
, 83, 83) << 43) | /* 1b */
1234 (brw_inst_bits(src
, 114, 107) << 35) | /* 8b */
1235 (brw_inst_bits(src
, 93, 86) << 27) | /* 8b */
1236 (brw_inst_bits(src
, 72, 65) << 19) | /* 8b */
1237 (brw_inst_bits(src
, 55, 37)); /* 19b */
1239 if (devinfo
->gen
>= 9 || devinfo
->is_cherryview
) {
1241 (brw_inst_bits(src
, 126, 125) << 47) | /* 2b */
1242 (brw_inst_bits(src
, 105, 104) << 45) | /* 2b */
1243 (brw_inst_bits(src
, 84, 84) << 44); /* 1b */
1246 (brw_inst_bits(src
, 125, 125) << 45) | /* 1b */
1247 (brw_inst_bits(src
, 104, 104) << 44); /* 1b */
1250 for (unsigned i
= 0; i
< ARRAY_SIZE(gen8_3src_source_index_table
); i
++) {
1251 if (gen8_3src_source_index_table
[i
] == uncompacted
) {
1252 brw_compact_inst_set_3src_source_index(devinfo
, dst
, i
);
1262 set_3src_subreg_index(const struct gen_device_info
*devinfo
,
1263 brw_compact_inst
*dst
, const brw_inst
*src
)
1265 assert(devinfo
->gen
>= 12);
1267 uint32_t uncompacted
= /* 20b/TGL+ */
1268 (brw_inst_bits(src
, 119, 115) << 15) | /* 5b */
1269 (brw_inst_bits(src
, 103, 99) << 10) | /* 5b */
1270 (brw_inst_bits(src
, 71, 67) << 5) | /* 5b */
1271 (brw_inst_bits(src
, 55, 51)); /* 5b */
1273 for (unsigned i
= 0; i
< ARRAY_SIZE(gen12_3src_subreg_table
); i
++) {
1274 if (gen12_3src_subreg_table
[i
] == uncompacted
) {
1275 brw_compact_inst_set_3src_subreg_index(devinfo
, dst
, i
);
1284 has_unmapped_bits(const struct gen_device_info
*devinfo
, const brw_inst
*src
)
1286 /* EOT can only be mapped on a send if the src1 is an immediate */
1287 if ((brw_inst_opcode(devinfo
, src
) == BRW_OPCODE_SENDC
||
1288 brw_inst_opcode(devinfo
, src
) == BRW_OPCODE_SEND
) &&
1289 brw_inst_eot(devinfo
, src
))
1292 /* Check for instruction bits that don't map to any of the fields of the
1293 * compacted instruction. The instruction cannot be compacted if any of
1294 * them are set. They overlap with:
1295 * - NibCtrl (bit 47 on Gen7, bit 11 on Gen8)
1296 * - Dst.AddrImm[9] (bit 47 on Gen8)
1297 * - Src0.AddrImm[9] (bit 95 on Gen8)
1298 * - Imm64[27:31] (bits 91-95 on Gen7, bit 95 on Gen8)
1299 * - UIP[31] (bit 95 on Gen8)
1301 if (devinfo
->gen
>= 12) {
1302 assert(!brw_inst_bits(src
, 7, 7));
1304 } else if (devinfo
->gen
>= 8) {
1305 assert(!brw_inst_bits(src
, 7, 7));
1306 return brw_inst_bits(src
, 95, 95) ||
1307 brw_inst_bits(src
, 47, 47) ||
1308 brw_inst_bits(src
, 11, 11);
1310 assert(!brw_inst_bits(src
, 7, 7) &&
1311 !(devinfo
->gen
< 7 && brw_inst_bits(src
, 90, 90)));
1312 return brw_inst_bits(src
, 95, 91) ||
1313 brw_inst_bits(src
, 47, 47);
1318 has_3src_unmapped_bits(const struct gen_device_info
*devinfo
,
1319 const brw_inst
*src
)
1321 /* Check for three-source instruction bits that don't map to any of the
1322 * fields of the compacted instruction. All of them seem to be reserved
1325 if (devinfo
->gen
>= 12) {
1326 assert(!brw_inst_bits(src
, 7, 7));
1327 } else if (devinfo
->gen
>= 9 || devinfo
->is_cherryview
) {
1328 assert(!brw_inst_bits(src
, 127, 127) &&
1329 !brw_inst_bits(src
, 7, 7));
1331 assert(devinfo
->gen
>= 8);
1332 assert(!brw_inst_bits(src
, 127, 126) &&
1333 !brw_inst_bits(src
, 105, 105) &&
1334 !brw_inst_bits(src
, 84, 84) &&
1335 !brw_inst_bits(src
, 7, 7));
1337 /* Src1Type and Src2Type, used for mixed-precision floating point */
1338 if (brw_inst_bits(src
, 36, 35))
1346 brw_try_compact_3src_instruction(const struct gen_device_info
*devinfo
,
1347 brw_compact_inst
*dst
, const brw_inst
*src
)
1349 assert(devinfo
->gen
>= 8);
1351 if (has_3src_unmapped_bits(devinfo
, src
))
1354 #define compact(field) \
1355 brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_##field(devinfo, src))
1356 #define compact_a16(field) \
1357 brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_a16_##field(devinfo, src))
1361 if (!set_3src_control_index(devinfo
, dst
, src
))
1364 if (!set_3src_source_index(devinfo
, dst
, src
))
1367 if (devinfo
->gen
>= 12) {
1368 if (!set_3src_subreg_index(devinfo
, dst
, src
))
1372 compact(debug_control
);
1373 compact(dst_reg_nr
);
1374 compact(src0_reg_nr
);
1375 compact(src1_reg_nr
);
1376 compact(src2_reg_nr
);
1378 compact(dst_reg_nr
);
1379 compact_a16(src0_rep_ctrl
);
1380 compact(debug_control
);
1382 compact_a16(src1_rep_ctrl
);
1383 compact_a16(src2_rep_ctrl
);
1384 compact(src0_reg_nr
);
1385 compact(src1_reg_nr
);
1386 compact(src2_reg_nr
);
1387 compact_a16(src0_subreg_nr
);
1388 compact_a16(src1_subreg_nr
);
1389 compact_a16(src2_subreg_nr
);
1391 brw_compact_inst_set_3src_cmpt_control(devinfo
, dst
, true);
1399 /* On SNB through ICL, compacted instructions have 12-bits for immediate
1400 * sources, and a 13th bit that's replicated through the high 20 bits.
1402 * Effectively this means we get 12-bit integers, 0.0f, and some limited uses
1403 * of packed vectors as compactable immediates.
1405 * On TGL+, the high 12-bits of floating-point values (:f and :hf) are encoded
1406 * rather than the low 12-bits. For signed integer the 12th bit is replicated,
1407 * while for unsigned integers it is not.
1409 * Returns the compacted immediate, or -1 if immediate cannot be compacted
1412 compact_immediate(const struct gen_device_info
*devinfo
,
1413 enum brw_reg_type type
, unsigned imm
)
1415 if (devinfo
->gen
>= 12) {
1416 /* 16-bit immediates need to be replicated through the 32-bit immediate
1420 case BRW_REGISTER_TYPE_W
:
1421 case BRW_REGISTER_TYPE_UW
:
1422 case BRW_REGISTER_TYPE_HF
:
1423 if ((imm
>> 16) != (imm
& 0xffff))
1431 case BRW_REGISTER_TYPE_F
:
1432 /* We get the high 12-bits as-is; rest must be zero */
1433 if ((imm
& 0xfffff) == 0)
1434 return (imm
>> 20) & 0xfff;
1436 case BRW_REGISTER_TYPE_HF
:
1437 /* We get the high 12-bits as-is; rest must be zero */
1438 if ((imm
& 0xf) == 0)
1439 return (imm
>> 4) & 0xfff;
1441 case BRW_REGISTER_TYPE_UD
:
1442 case BRW_REGISTER_TYPE_VF
:
1443 case BRW_REGISTER_TYPE_UV
:
1444 case BRW_REGISTER_TYPE_V
:
1445 /* We get the low 12-bits as-is; rest must be zero */
1446 if ((imm
& 0xfffff000) == 0)
1449 case BRW_REGISTER_TYPE_UW
:
1450 /* We get the low 12-bits as-is; rest must be zero */
1451 if ((imm
& 0xf000) == 0)
1454 case BRW_REGISTER_TYPE_D
:
1455 /* We get the low 11-bits as-is; 12th is replicated */
1456 if (((int)imm
>> 11) == 0 || ((int)imm
>> 11) == -1)
1459 case BRW_REGISTER_TYPE_W
:
1460 /* We get the low 11-bits as-is; 12th is replicated */
1461 if (((short)imm
>> 11) == 0 || ((short)imm
>> 11) == -1)
1464 case BRW_REGISTER_TYPE_NF
:
1465 case BRW_REGISTER_TYPE_DF
:
1466 case BRW_REGISTER_TYPE_Q
:
1467 case BRW_REGISTER_TYPE_UQ
:
1468 case BRW_REGISTER_TYPE_B
:
1469 case BRW_REGISTER_TYPE_UB
:
1473 /* We get the low 12 bits as-is; 13th is replicated */
1474 if (((int)imm
>> 12) == 0 || ((int)imm
>> 12 == -1)) {
1475 return imm
& 0x1fff;
1483 uncompact_immediate(const struct gen_device_info
*devinfo
,
1484 enum brw_reg_type type
, unsigned compact_imm
)
1486 if (devinfo
->gen
>= 12) {
1488 case BRW_REGISTER_TYPE_F
:
1489 return compact_imm
<< 20;
1490 case BRW_REGISTER_TYPE_HF
:
1491 return (compact_imm
<< 20) | (compact_imm
<< 4);
1492 case BRW_REGISTER_TYPE_UD
:
1493 case BRW_REGISTER_TYPE_VF
:
1494 case BRW_REGISTER_TYPE_UV
:
1495 case BRW_REGISTER_TYPE_V
:
1497 case BRW_REGISTER_TYPE_UW
:
1499 return compact_imm
<< 16 | compact_imm
;
1500 case BRW_REGISTER_TYPE_D
:
1501 /* Extend the 12th bit into the high 20 bits */
1502 return (int)(compact_imm
<< 20) >> 20;
1503 case BRW_REGISTER_TYPE_W
:
1504 /* Extend the 12th bit into the high 4 bits and replicate */
1505 return ( (int)(compact_imm
<< 20) >> 4) |
1506 ((short)(compact_imm
<< 4) >> 4);
1507 case BRW_REGISTER_TYPE_NF
:
1508 case BRW_REGISTER_TYPE_DF
:
1509 case BRW_REGISTER_TYPE_Q
:
1510 case BRW_REGISTER_TYPE_UQ
:
1511 case BRW_REGISTER_TYPE_B
:
1512 case BRW_REGISTER_TYPE_UB
:
1513 unreachable("not reached");
1516 /* Replicate the 13th bit into the high 19 bits */
1517 return (int)(compact_imm
<< 19) >> 19;
1520 unreachable("not reached");
1524 has_immediate(const struct gen_device_info
*devinfo
, const brw_inst
*inst
,
1525 enum brw_reg_type
*type
)
1527 if (brw_inst_src0_reg_file(devinfo
, inst
) == BRW_IMMEDIATE_VALUE
) {
1528 *type
= brw_inst_src0_type(devinfo
, inst
);
1529 return *type
!= INVALID_REG_TYPE
;
1530 } else if (brw_inst_src1_reg_file(devinfo
, inst
) == BRW_IMMEDIATE_VALUE
) {
1531 *type
= brw_inst_src1_type(devinfo
, inst
);
1532 return *type
!= INVALID_REG_TYPE
;
1539 * Applies some small changes to instruction types to increase chances of
1543 precompact(const struct gen_device_info
*devinfo
, brw_inst inst
)
1545 if (brw_inst_src0_reg_file(devinfo
, &inst
) != BRW_IMMEDIATE_VALUE
)
1548 /* The Bspec's section titled "Non-present Operands" claims that if src0
1549 * is an immediate that src1's type must be the same as that of src0.
1551 * The SNB+ DataTypeIndex instruction compaction tables contain mappings
1552 * that do not follow this rule. E.g., from the IVB/HSW table:
1554 * DataTypeIndex 18-Bit Mapping Mapped Meaning
1555 * 3 001000001011111101 r:f | i:vf | a:ud | <1> | dir |
1557 * And from the SNB table:
1559 * DataTypeIndex 18-Bit Mapping Mapped Meaning
1560 * 8 001000000111101100 a:w | i:w | a:ud | <1> | dir |
1562 * Neither of these cause warnings from the simulator when used,
1563 * compacted or otherwise. In fact, all compaction mappings that have an
1564 * immediate in src0 use a:ud for src1.
1566 * The GM45 instruction compaction tables do not contain mapped meanings
1567 * so it's not clear whether it has the restriction. We'll assume it was
1568 * lifted on SNB. (FINISHME: decode the GM45 tables and check.)
1570 * Don't do any of this for 64-bit immediates, since the src1 fields
1571 * overlap with the immediate and setting them would overwrite the
1574 if (devinfo
->gen
>= 6 &&
1575 !(devinfo
->is_haswell
&&
1576 brw_inst_opcode(devinfo
, &inst
) == BRW_OPCODE_DIM
) &&
1577 !(devinfo
->gen
>= 8 &&
1578 (brw_inst_src0_type(devinfo
, &inst
) == BRW_REGISTER_TYPE_DF
||
1579 brw_inst_src0_type(devinfo
, &inst
) == BRW_REGISTER_TYPE_UQ
||
1580 brw_inst_src0_type(devinfo
, &inst
) == BRW_REGISTER_TYPE_Q
))) {
1581 brw_inst_set_src1_reg_hw_type(devinfo
, &inst
, 0);
1584 /* Compacted instructions only have 12-bits (plus 1 for the other 20)
1585 * for immediate values. Presumably the hardware engineers realized
1586 * that the only useful floating-point value that could be represented
1587 * in this format is 0.0, which can also be represented as a VF-typed
1588 * immediate, so they gave us the previously mentioned mapping on IVB+.
1590 * Strangely, we do have a mapping for imm:f in src1, so we don't need
1593 * If we see a 0.0:F, change the type to VF so that it can be compacted.
1595 * Compaction of floating-point immediates is improved on Gen12, thus
1596 * removing the need for this.
1598 if (devinfo
->gen
< 12 &&
1599 brw_inst_imm_ud(devinfo
, &inst
) == 0x0 &&
1600 brw_inst_src0_type(devinfo
, &inst
) == BRW_REGISTER_TYPE_F
&&
1601 brw_inst_dst_type(devinfo
, &inst
) == BRW_REGISTER_TYPE_F
&&
1602 brw_inst_dst_hstride(devinfo
, &inst
) == BRW_HORIZONTAL_STRIDE_1
) {
1603 enum brw_reg_file file
= brw_inst_src0_reg_file(devinfo
, &inst
);
1604 brw_inst_set_src0_file_type(devinfo
, &inst
, file
, BRW_REGISTER_TYPE_VF
);
1607 /* There are no mappings for dst:d | i:d, so if the immediate is suitable
1608 * set the types to :UD so the instruction can be compacted.
1610 * FINISHME: Use dst:f | imm:f on Gen12
1612 if (devinfo
->gen
< 12 &&
1613 compact_immediate(devinfo
, BRW_REGISTER_TYPE_D
,
1614 brw_inst_imm_ud(devinfo
, &inst
)) != -1 &&
1615 brw_inst_cond_modifier(devinfo
, &inst
) == BRW_CONDITIONAL_NONE
&&
1616 brw_inst_src0_type(devinfo
, &inst
) == BRW_REGISTER_TYPE_D
&&
1617 brw_inst_dst_type(devinfo
, &inst
) == BRW_REGISTER_TYPE_D
) {
1618 enum brw_reg_file src_file
= brw_inst_src0_reg_file(devinfo
, &inst
);
1619 enum brw_reg_file dst_file
= brw_inst_dst_reg_file(devinfo
, &inst
);
1621 brw_inst_set_src0_file_type(devinfo
, &inst
, src_file
, BRW_REGISTER_TYPE_UD
);
1622 brw_inst_set_dst_file_type(devinfo
, &inst
, dst_file
, BRW_REGISTER_TYPE_UD
);
1629 * Tries to compact instruction src into dst.
1631 * It doesn't modify dst unless src is compactable, which is relied on by
1632 * brw_compact_instructions().
1635 brw_try_compact_instruction(const struct gen_device_info
*devinfo
,
1636 brw_compact_inst
*dst
, const brw_inst
*src
)
1638 brw_compact_inst temp
;
1640 assert(brw_inst_cmpt_control(devinfo
, src
) == 0);
1642 if (is_3src(devinfo
, brw_inst_opcode(devinfo
, src
))) {
1643 if (devinfo
->gen
>= 8) {
1644 memset(&temp
, 0, sizeof(temp
));
1645 if (brw_try_compact_3src_instruction(devinfo
, &temp
, src
)) {
1656 enum brw_reg_type type
;
1657 bool is_immediate
= has_immediate(devinfo
, src
, &type
);
1659 unsigned compacted_imm
= 0;
1662 /* Instructions with immediates cannot be compacted on Gen < 6 */
1663 if (devinfo
->gen
< 6)
1666 compacted_imm
= compact_immediate(devinfo
, type
,
1667 brw_inst_imm_ud(devinfo
, src
));
1668 if (compacted_imm
== -1)
1672 if (has_unmapped_bits(devinfo
, src
))
1675 memset(&temp
, 0, sizeof(temp
));
1677 #define compact(field) \
1678 brw_compact_inst_set_##field(devinfo, &temp, brw_inst_##field(devinfo, src))
1679 #define compact_reg(field) \
1680 brw_compact_inst_set_##field##_reg_nr(devinfo, &temp, \
1681 brw_inst_##field##_da_reg_nr(devinfo, src))
1684 compact(debug_control
);
1686 if (!set_control_index(devinfo
, &temp
, src
))
1688 if (!set_datatype_index(devinfo
, &temp
, src
, is_immediate
))
1690 if (!set_subreg_index(devinfo
, &temp
, src
, is_immediate
))
1692 if (!set_src0_index(devinfo
, &temp
, src
))
1694 if (!set_src1_index(devinfo
, &temp
, src
, is_immediate
, compacted_imm
))
1697 if (devinfo
->gen
>= 12) {
1703 /* src1 reg takes the high 8 bits (of the 12-bit compacted value) */
1704 brw_compact_inst_set_src1_reg_nr(devinfo
, &temp
, compacted_imm
>> 4);
1709 if (devinfo
->gen
>= 6) {
1710 compact(acc_wr_control
);
1712 compact(mask_control_ex
);
1715 if (devinfo
->gen
<= 6)
1716 compact(flag_subreg_nr
);
1718 compact(cond_modifier
);
1724 /* src1 reg takes the low 8 bits (of the 13-bit compacted value) */
1725 brw_compact_inst_set_src1_reg_nr(devinfo
, &temp
, compacted_imm
& 0xff);
1730 brw_compact_inst_set_cmpt_control(devinfo
, &temp
, true);
1741 set_uncompacted_control(const struct gen_device_info
*devinfo
, brw_inst
*dst
,
1742 brw_compact_inst
*src
)
1744 uint32_t uncompacted
=
1745 control_index_table
[brw_compact_inst_control_index(devinfo
, src
)];
1747 if (devinfo
->gen
>= 12) {
1748 brw_inst_set_bits(dst
, 95, 92, (uncompacted
>> 17));
1749 brw_inst_set_bits(dst
, 34, 34, (uncompacted
>> 16) & 0x1);
1750 brw_inst_set_bits(dst
, 33, 33, (uncompacted
>> 15) & 0x1);
1751 brw_inst_set_bits(dst
, 32, 32, (uncompacted
>> 14) & 0x1);
1752 brw_inst_set_bits(dst
, 31, 31, (uncompacted
>> 13) & 0x1);
1753 brw_inst_set_bits(dst
, 28, 28, (uncompacted
>> 12) & 0x1);
1754 brw_inst_set_bits(dst
, 27, 24, (uncompacted
>> 8) & 0xf);
1755 brw_inst_set_bits(dst
, 23, 22, (uncompacted
>> 6) & 0x3);
1756 brw_inst_set_bits(dst
, 21, 19, (uncompacted
>> 3) & 0x7);
1757 brw_inst_set_bits(dst
, 18, 16, (uncompacted
>> 0) & 0x7);
1758 } else if (devinfo
->gen
>= 8) {
1759 brw_inst_set_bits(dst
, 33, 31, (uncompacted
>> 16));
1760 brw_inst_set_bits(dst
, 23, 12, (uncompacted
>> 4) & 0xfff);
1761 brw_inst_set_bits(dst
, 10, 9, (uncompacted
>> 2) & 0x3);
1762 brw_inst_set_bits(dst
, 34, 34, (uncompacted
>> 1) & 0x1);
1763 brw_inst_set_bits(dst
, 8, 8, (uncompacted
>> 0) & 0x1);
1765 brw_inst_set_bits(dst
, 31, 31, (uncompacted
>> 16) & 0x1);
1766 brw_inst_set_bits(dst
, 23, 8, (uncompacted
& 0xffff));
1768 if (devinfo
->gen
== 7)
1769 brw_inst_set_bits(dst
, 90, 89, uncompacted
>> 17);
1774 set_uncompacted_datatype(const struct gen_device_info
*devinfo
, brw_inst
*dst
,
1775 brw_compact_inst
*src
)
1777 uint32_t uncompacted
=
1778 datatype_table
[brw_compact_inst_datatype_index(devinfo
, src
)];
1780 if (devinfo
->gen
>= 12) {
1781 brw_inst_set_bits(dst
, 98, 98, (uncompacted
>> 19));
1782 brw_inst_set_bits(dst
, 91, 88, (uncompacted
>> 15) & 0xf);
1783 brw_inst_set_bits(dst
, 66, 66, (uncompacted
>> 14) & 0x1);
1784 brw_inst_set_bits(dst
, 50, 50, (uncompacted
>> 13) & 0x1);
1785 brw_inst_set_bits(dst
, 49, 48, (uncompacted
>> 11) & 0x3);
1786 brw_inst_set_bits(dst
, 47, 47, (uncompacted
>> 10) & 0x1);
1787 brw_inst_set_bits(dst
, 46, 46, (uncompacted
>> 9) & 0x1);
1788 brw_inst_set_bits(dst
, 43, 40, (uncompacted
>> 5) & 0xf);
1789 brw_inst_set_bits(dst
, 39, 36, (uncompacted
>> 1) & 0xf);
1790 brw_inst_set_bits(dst
, 35, 35, (uncompacted
>> 0) & 0x1);
1791 } else if (devinfo
->gen
>= 8) {
1792 brw_inst_set_bits(dst
, 63, 61, (uncompacted
>> 18));
1793 brw_inst_set_bits(dst
, 94, 89, (uncompacted
>> 12) & 0x3f);
1794 brw_inst_set_bits(dst
, 46, 35, (uncompacted
>> 0) & 0xfff);
1796 brw_inst_set_bits(dst
, 63, 61, (uncompacted
>> 15));
1797 brw_inst_set_bits(dst
, 46, 32, (uncompacted
& 0x7fff));
1802 set_uncompacted_subreg(const struct gen_device_info
*devinfo
, brw_inst
*dst
,
1803 brw_compact_inst
*src
)
1805 uint16_t uncompacted
=
1806 subreg_table
[brw_compact_inst_subreg_index(devinfo
, src
)];
1808 if (devinfo
->gen
>= 12) {
1809 brw_inst_set_bits(dst
, 103, 99, (uncompacted
>> 10));
1810 brw_inst_set_bits(dst
, 71, 67, (uncompacted
>> 5) & 0x1f);
1811 brw_inst_set_bits(dst
, 55, 51, (uncompacted
>> 0) & 0x1f);
1813 brw_inst_set_bits(dst
, 100, 96, (uncompacted
>> 10));
1814 brw_inst_set_bits(dst
, 68, 64, (uncompacted
>> 5) & 0x1f);
1815 brw_inst_set_bits(dst
, 52, 48, (uncompacted
>> 0) & 0x1f);
1820 set_uncompacted_src0(const struct gen_device_info
*devinfo
, brw_inst
*dst
,
1821 brw_compact_inst
*src
)
1823 uint32_t compacted
= brw_compact_inst_src0_index(devinfo
, src
);
1824 uint16_t uncompacted
= src0_index_table
[compacted
];
1826 if (devinfo
->gen
>= 12) {
1827 brw_inst_set_bits(dst
, 87, 84, (uncompacted
>> 8));
1828 brw_inst_set_bits(dst
, 83, 81, (uncompacted
>> 5) & 0x7);
1829 brw_inst_set_bits(dst
, 80, 80, (uncompacted
>> 4) & 0x1);
1830 brw_inst_set_bits(dst
, 65, 64, (uncompacted
>> 2) & 0x3);
1831 brw_inst_set_bits(dst
, 45, 44, (uncompacted
>> 0) & 0x3);
1833 brw_inst_set_bits(dst
, 88, 77, uncompacted
);
1838 set_uncompacted_src1(const struct gen_device_info
*devinfo
, brw_inst
*dst
,
1839 brw_compact_inst
*src
)
1841 uint16_t uncompacted
=
1842 src1_index_table
[brw_compact_inst_src1_index(devinfo
, src
)];
1844 if (devinfo
->gen
>= 12) {
1845 brw_inst_set_bits(dst
, 121, 120, (uncompacted
>> 10));
1846 brw_inst_set_bits(dst
, 119, 116, (uncompacted
>> 6) & 0xf);
1847 brw_inst_set_bits(dst
, 115, 113, (uncompacted
>> 3) & 0x7);
1848 brw_inst_set_bits(dst
, 112, 112, (uncompacted
>> 2) & 0x1);
1849 brw_inst_set_bits(dst
, 97, 96, (uncompacted
>> 0) & 0x3);
1851 brw_inst_set_bits(dst
, 120, 109, uncompacted
);
1856 set_uncompacted_3src_control_index(const struct gen_device_info
*devinfo
,
1857 brw_inst
*dst
, brw_compact_inst
*src
)
1859 assert(devinfo
->gen
>= 8);
1861 if (devinfo
->gen
>= 12) {
1862 uint64_t compacted
= brw_compact_inst_3src_control_index(devinfo
, src
);
1863 uint64_t uncompacted
= gen12_3src_control_index_table
[compacted
];
1865 brw_inst_set_bits(dst
, 95, 92, (uncompacted
>> 32));
1866 brw_inst_set_bits(dst
, 90, 88, (uncompacted
>> 29) & 0x7);
1867 brw_inst_set_bits(dst
, 82, 80, (uncompacted
>> 26) & 0x7);
1868 brw_inst_set_bits(dst
, 50, 50, (uncompacted
>> 25) & 0x1);
1869 brw_inst_set_bits(dst
, 48, 48, (uncompacted
>> 24) & 0x1);
1870 brw_inst_set_bits(dst
, 42, 40, (uncompacted
>> 21) & 0x7);
1871 brw_inst_set_bits(dst
, 39, 39, (uncompacted
>> 20) & 0x1);
1872 brw_inst_set_bits(dst
, 38, 36, (uncompacted
>> 17) & 0x7);
1873 brw_inst_set_bits(dst
, 34, 34, (uncompacted
>> 16) & 0x1);
1874 brw_inst_set_bits(dst
, 33, 33, (uncompacted
>> 15) & 0x1);
1875 brw_inst_set_bits(dst
, 32, 32, (uncompacted
>> 14) & 0x1);
1876 brw_inst_set_bits(dst
, 31, 31, (uncompacted
>> 13) & 0x1);
1877 brw_inst_set_bits(dst
, 28, 28, (uncompacted
>> 12) & 0x1);
1878 brw_inst_set_bits(dst
, 27, 24, (uncompacted
>> 8) & 0xf);
1879 brw_inst_set_bits(dst
, 23, 23, (uncompacted
>> 7) & 0x1);
1880 brw_inst_set_bits(dst
, 22, 22, (uncompacted
>> 6) & 0x1);
1881 brw_inst_set_bits(dst
, 21, 19, (uncompacted
>> 3) & 0x7);
1882 brw_inst_set_bits(dst
, 18, 16, (uncompacted
>> 0) & 0x7);
1884 uint32_t compacted
= brw_compact_inst_3src_control_index(devinfo
, src
);
1885 uint32_t uncompacted
= gen8_3src_control_index_table
[compacted
];
1887 brw_inst_set_bits(dst
, 34, 32, (uncompacted
>> 21) & 0x7);
1888 brw_inst_set_bits(dst
, 28, 8, (uncompacted
>> 0) & 0x1fffff);
1890 if (devinfo
->gen
>= 9 || devinfo
->is_cherryview
)
1891 brw_inst_set_bits(dst
, 36, 35, (uncompacted
>> 24) & 0x3);
1896 set_uncompacted_3src_source_index(const struct gen_device_info
*devinfo
,
1897 brw_inst
*dst
, brw_compact_inst
*src
)
1899 assert(devinfo
->gen
>= 8);
1901 uint32_t compacted
= brw_compact_inst_3src_source_index(devinfo
, src
);
1903 if (devinfo
->gen
>= 12) {
1904 uint32_t uncompacted
= gen12_3src_source_index_table
[compacted
];
1906 brw_inst_set_bits(dst
, 114, 114, (uncompacted
>> 20));
1907 brw_inst_set_bits(dst
, 113, 112, (uncompacted
>> 18) & 0x3);
1908 brw_inst_set_bits(dst
, 98, 98, (uncompacted
>> 17) & 0x1);
1909 brw_inst_set_bits(dst
, 97, 96, (uncompacted
>> 15) & 0x3);
1910 brw_inst_set_bits(dst
, 91, 91, (uncompacted
>> 14) & 0x1);
1911 brw_inst_set_bits(dst
, 87, 86, (uncompacted
>> 12) & 0x3);
1912 brw_inst_set_bits(dst
, 85, 84, (uncompacted
>> 10) & 0x3);
1913 brw_inst_set_bits(dst
, 83, 83, (uncompacted
>> 9) & 0x1);
1914 brw_inst_set_bits(dst
, 66, 66, (uncompacted
>> 8) & 0x1);
1915 brw_inst_set_bits(dst
, 65, 64, (uncompacted
>> 6) & 0x3);
1916 brw_inst_set_bits(dst
, 47, 47, (uncompacted
>> 5) & 0x1);
1917 brw_inst_set_bits(dst
, 46, 46, (uncompacted
>> 4) & 0x1);
1918 brw_inst_set_bits(dst
, 45, 44, (uncompacted
>> 2) & 0x3);
1919 brw_inst_set_bits(dst
, 43, 43, (uncompacted
>> 1) & 0x1);
1920 brw_inst_set_bits(dst
, 35, 35, (uncompacted
>> 0) & 0x1);
1922 uint64_t uncompacted
= gen8_3src_source_index_table
[compacted
];
1924 brw_inst_set_bits(dst
, 83, 83, (uncompacted
>> 43) & 0x1);
1925 brw_inst_set_bits(dst
, 114, 107, (uncompacted
>> 35) & 0xff);
1926 brw_inst_set_bits(dst
, 93, 86, (uncompacted
>> 27) & 0xff);
1927 brw_inst_set_bits(dst
, 72, 65, (uncompacted
>> 19) & 0xff);
1928 brw_inst_set_bits(dst
, 55, 37, (uncompacted
>> 0) & 0x7ffff);
1930 if (devinfo
->gen
>= 9 || devinfo
->is_cherryview
) {
1931 brw_inst_set_bits(dst
, 126, 125, (uncompacted
>> 47) & 0x3);
1932 brw_inst_set_bits(dst
, 105, 104, (uncompacted
>> 45) & 0x3);
1933 brw_inst_set_bits(dst
, 84, 84, (uncompacted
>> 44) & 0x1);
1935 brw_inst_set_bits(dst
, 125, 125, (uncompacted
>> 45) & 0x1);
1936 brw_inst_set_bits(dst
, 104, 104, (uncompacted
>> 44) & 0x1);
1942 set_uncompacted_3src_subreg_index(const struct gen_device_info
*devinfo
,
1943 brw_inst
*dst
, brw_compact_inst
*src
)
1945 assert(devinfo
->gen
>= 12);
1947 uint32_t compacted
= brw_compact_inst_3src_subreg_index(devinfo
, src
);
1948 uint32_t uncompacted
= gen12_3src_subreg_table
[compacted
];
1950 brw_inst_set_bits(dst
, 119, 115, (uncompacted
>> 15));
1951 brw_inst_set_bits(dst
, 103, 99, (uncompacted
>> 10) & 0x1f);
1952 brw_inst_set_bits(dst
, 71, 67, (uncompacted
>> 5) & 0x1f);
1953 brw_inst_set_bits(dst
, 55, 51, (uncompacted
>> 0) & 0x1f);
1957 brw_uncompact_3src_instruction(const struct gen_device_info
*devinfo
,
1958 brw_inst
*dst
, brw_compact_inst
*src
)
1960 assert(devinfo
->gen
>= 8);
1962 #define uncompact(field) \
1963 brw_inst_set_3src_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
1964 #define uncompact_a16(field) \
1965 brw_inst_set_3src_a16_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
1967 uncompact(hw_opcode
);
1969 if (devinfo
->gen
>= 12) {
1970 set_uncompacted_3src_control_index(devinfo
, dst
, src
);
1971 set_uncompacted_3src_source_index(devinfo
, dst
, src
);
1972 set_uncompacted_3src_subreg_index(devinfo
, dst
, src
);
1974 uncompact(debug_control
);
1976 uncompact(dst_reg_nr
);
1977 uncompact(src0_reg_nr
);
1978 uncompact(src1_reg_nr
);
1979 uncompact(src2_reg_nr
);
1981 set_uncompacted_3src_control_index(devinfo
, dst
, src
);
1982 set_uncompacted_3src_source_index(devinfo
, dst
, src
);
1984 uncompact(dst_reg_nr
);
1985 uncompact_a16(src0_rep_ctrl
);
1986 uncompact(debug_control
);
1987 uncompact(saturate
);
1988 uncompact_a16(src1_rep_ctrl
);
1989 uncompact_a16(src2_rep_ctrl
);
1990 uncompact(src0_reg_nr
);
1991 uncompact(src1_reg_nr
);
1992 uncompact(src2_reg_nr
);
1993 uncompact_a16(src0_subreg_nr
);
1994 uncompact_a16(src1_subreg_nr
);
1995 uncompact_a16(src2_subreg_nr
);
1997 brw_inst_set_3src_cmpt_control(devinfo
, dst
, false);
2000 #undef uncompact_a16
2004 brw_uncompact_instruction(const struct gen_device_info
*devinfo
, brw_inst
*dst
,
2005 brw_compact_inst
*src
)
2007 memset(dst
, 0, sizeof(*dst
));
2009 if (devinfo
->gen
>= 8 &&
2010 is_3src(devinfo
, brw_opcode_decode(
2011 devinfo
, brw_compact_inst_3src_hw_opcode(devinfo
, src
)))) {
2012 brw_uncompact_3src_instruction(devinfo
, dst
, src
);
2016 #define uncompact(field) \
2017 brw_inst_set_##field(devinfo, dst, brw_compact_inst_##field(devinfo, src))
2018 #define uncompact_reg(field) \
2019 brw_inst_set_##field##_da_reg_nr(devinfo, dst, \
2020 brw_compact_inst_##field##_reg_nr(devinfo, src))
2022 uncompact(hw_opcode
);
2023 uncompact(debug_control
);
2025 set_uncompacted_control(devinfo
, dst
, src
);
2026 set_uncompacted_datatype(devinfo
, dst
, src
);
2027 set_uncompacted_subreg(devinfo
, dst
, src
);
2028 set_uncompacted_src0(devinfo
, dst
, src
);
2030 enum brw_reg_type type
;
2031 if (has_immediate(devinfo
, dst
, &type
)) {
2032 unsigned imm
= uncompact_immediate(devinfo
, type
,
2033 brw_compact_inst_imm(devinfo
, src
));
2034 brw_inst_set_imm_ud(devinfo
, dst
, imm
);
2036 set_uncompacted_src1(devinfo
, dst
, src
);
2037 uncompact_reg(src1
);
2040 if (devinfo
->gen
>= 12) {
2043 uncompact_reg(src0
);
2045 if (devinfo
->gen
>= 6) {
2046 uncompact(acc_wr_control
);
2048 uncompact(mask_control_ex
);
2051 uncompact(cond_modifier
);
2053 if (devinfo
->gen
<= 6)
2054 uncompact(flag_subreg_nr
);
2057 uncompact_reg(src0
);
2059 brw_inst_set_cmpt_control(devinfo
, dst
, false);
2062 #undef uncompact_reg
2065 void brw_debug_compact_uncompact(const struct gen_device_info
*devinfo
,
2067 brw_inst
*uncompacted
)
2069 fprintf(stderr
, "Instruction compact/uncompact changed (gen%d):\n",
2072 fprintf(stderr
, " before: ");
2073 brw_disassemble_inst(stderr
, devinfo
, orig
, true);
2075 fprintf(stderr
, " after: ");
2076 brw_disassemble_inst(stderr
, devinfo
, uncompacted
, false);
2078 uint32_t *before_bits
= (uint32_t *)orig
;
2079 uint32_t *after_bits
= (uint32_t *)uncompacted
;
2080 fprintf(stderr
, " changed bits:\n");
2081 for (int i
= 0; i
< 128; i
++) {
2082 uint32_t before
= before_bits
[i
/ 32] & (1 << (i
& 31));
2083 uint32_t after
= after_bits
[i
/ 32] & (1 << (i
& 31));
2085 if (before
!= after
) {
2086 fprintf(stderr
, " bit %d, %s to %s\n", i
,
2087 before
? "set" : "unset",
2088 after
? "set" : "unset");
2094 compacted_between(int old_ip
, int old_target_ip
, int *compacted_counts
)
2096 int this_compacted_count
= compacted_counts
[old_ip
];
2097 int target_compacted_count
= compacted_counts
[old_target_ip
];
2098 return target_compacted_count
- this_compacted_count
;
2102 update_uip_jip(const struct gen_device_info
*devinfo
, brw_inst
*insn
,
2103 int this_old_ip
, int *compacted_counts
)
2105 /* JIP and UIP are in units of:
2106 * - bytes on Gen8+; and
2107 * - compacted instructions on Gen6+.
2109 int shift
= devinfo
->gen
>= 8 ? 3 : 0;
2111 int32_t jip_compacted
= brw_inst_jip(devinfo
, insn
) >> shift
;
2112 jip_compacted
-= compacted_between(this_old_ip
,
2113 this_old_ip
+ (jip_compacted
/ 2),
2115 brw_inst_set_jip(devinfo
, insn
, jip_compacted
<< shift
);
2117 if (brw_inst_opcode(devinfo
, insn
) == BRW_OPCODE_ENDIF
||
2118 brw_inst_opcode(devinfo
, insn
) == BRW_OPCODE_WHILE
||
2119 (brw_inst_opcode(devinfo
, insn
) == BRW_OPCODE_ELSE
&& devinfo
->gen
<= 7))
2122 int32_t uip_compacted
= brw_inst_uip(devinfo
, insn
) >> shift
;
2123 uip_compacted
-= compacted_between(this_old_ip
,
2124 this_old_ip
+ (uip_compacted
/ 2),
2126 brw_inst_set_uip(devinfo
, insn
, uip_compacted
<< shift
);
2130 update_gen4_jump_count(const struct gen_device_info
*devinfo
, brw_inst
*insn
,
2131 int this_old_ip
, int *compacted_counts
)
2133 assert(devinfo
->gen
== 5 || devinfo
->is_g4x
);
2135 /* Jump Count is in units of:
2136 * - uncompacted instructions on G45; and
2137 * - compacted instructions on Gen5.
2139 int shift
= devinfo
->is_g4x
? 1 : 0;
2141 int jump_count_compacted
= brw_inst_gen4_jump_count(devinfo
, insn
) << shift
;
2143 int target_old_ip
= this_old_ip
+ (jump_count_compacted
/ 2);
2145 int this_compacted_count
= compacted_counts
[this_old_ip
];
2146 int target_compacted_count
= compacted_counts
[target_old_ip
];
2148 jump_count_compacted
-= (target_compacted_count
- this_compacted_count
);
2149 brw_inst_set_gen4_jump_count(devinfo
, insn
, jump_count_compacted
>> shift
);
2153 brw_init_compaction_tables(const struct gen_device_info
*devinfo
)
2155 assert(g45_control_index_table
[ARRAY_SIZE(g45_control_index_table
) - 1] != 0);
2156 assert(g45_datatype_table
[ARRAY_SIZE(g45_datatype_table
) - 1] != 0);
2157 assert(g45_subreg_table
[ARRAY_SIZE(g45_subreg_table
) - 1] != 0);
2158 assert(g45_src_index_table
[ARRAY_SIZE(g45_src_index_table
) - 1] != 0);
2159 assert(gen6_control_index_table
[ARRAY_SIZE(gen6_control_index_table
) - 1] != 0);
2160 assert(gen6_datatype_table
[ARRAY_SIZE(gen6_datatype_table
) - 1] != 0);
2161 assert(gen6_subreg_table
[ARRAY_SIZE(gen6_subreg_table
) - 1] != 0);
2162 assert(gen6_src_index_table
[ARRAY_SIZE(gen6_src_index_table
) - 1] != 0);
2163 assert(gen7_control_index_table
[ARRAY_SIZE(gen7_control_index_table
) - 1] != 0);
2164 assert(gen7_datatype_table
[ARRAY_SIZE(gen7_datatype_table
) - 1] != 0);
2165 assert(gen7_subreg_table
[ARRAY_SIZE(gen7_subreg_table
) - 1] != 0);
2166 assert(gen7_src_index_table
[ARRAY_SIZE(gen7_src_index_table
) - 1] != 0);
2167 assert(gen8_control_index_table
[ARRAY_SIZE(gen8_control_index_table
) - 1] != 0);
2168 assert(gen8_datatype_table
[ARRAY_SIZE(gen8_datatype_table
) - 1] != 0);
2169 assert(gen8_subreg_table
[ARRAY_SIZE(gen8_subreg_table
) - 1] != 0);
2170 assert(gen8_src_index_table
[ARRAY_SIZE(gen8_src_index_table
) - 1] != 0);
2171 assert(gen11_datatype_table
[ARRAY_SIZE(gen11_datatype_table
) - 1] != 0);
2172 assert(gen12_control_index_table
[ARRAY_SIZE(gen12_control_index_table
) - 1] != 0);
2173 assert(gen12_datatype_table
[ARRAY_SIZE(gen12_datatype_table
) - 1] != 0);
2174 assert(gen12_subreg_table
[ARRAY_SIZE(gen12_subreg_table
) - 1] != 0);
2175 assert(gen12_src0_index_table
[ARRAY_SIZE(gen12_src0_index_table
) - 1] != 0);
2176 assert(gen12_src1_index_table
[ARRAY_SIZE(gen12_src1_index_table
) - 1] != 0);
2178 switch (devinfo
->gen
) {
2180 control_index_table
= gen12_control_index_table
;;
2181 datatype_table
= gen12_datatype_table
;
2182 subreg_table
= gen12_subreg_table
;
2183 src0_index_table
= gen12_src0_index_table
;
2184 src1_index_table
= gen12_src1_index_table
;
2187 control_index_table
= gen8_control_index_table
;
2188 datatype_table
= gen11_datatype_table
;
2189 subreg_table
= gen8_subreg_table
;
2190 src0_index_table
= gen8_src_index_table
;
2191 src1_index_table
= gen8_src_index_table
;
2196 control_index_table
= gen8_control_index_table
;
2197 datatype_table
= gen8_datatype_table
;
2198 subreg_table
= gen8_subreg_table
;
2199 src0_index_table
= gen8_src_index_table
;
2200 src1_index_table
= gen8_src_index_table
;
2203 control_index_table
= gen7_control_index_table
;
2204 datatype_table
= gen7_datatype_table
;
2205 subreg_table
= gen7_subreg_table
;
2206 src0_index_table
= gen7_src_index_table
;
2207 src1_index_table
= gen7_src_index_table
;
2210 control_index_table
= gen6_control_index_table
;
2211 datatype_table
= gen6_datatype_table
;
2212 subreg_table
= gen6_subreg_table
;
2213 src0_index_table
= gen6_src_index_table
;
2214 src1_index_table
= gen6_src_index_table
;
2218 control_index_table
= g45_control_index_table
;
2219 datatype_table
= g45_datatype_table
;
2220 subreg_table
= g45_subreg_table
;
2221 src0_index_table
= g45_src_index_table
;
2222 src1_index_table
= g45_src_index_table
;
2225 unreachable("unknown generation");
2230 brw_compact_instructions(struct brw_codegen
*p
, int start_offset
,
2231 struct disasm_info
*disasm
)
2233 if (unlikely(INTEL_DEBUG
& DEBUG_NO_COMPACTION
))
2236 const struct gen_device_info
*devinfo
= p
->devinfo
;
2237 void *store
= p
->store
+ start_offset
/ 16;
2238 /* For an instruction at byte offset 16*i before compaction, this is the
2239 * number of compacted instructions minus the number of padding NOP/NENOPs
2242 int compacted_counts
[(p
->next_insn_offset
- start_offset
) / sizeof(brw_inst
)];
2243 /* For an instruction at byte offset 8*i after compaction, this was its IP
2244 * (in 16-byte units) before compaction.
2246 int old_ip
[(p
->next_insn_offset
- start_offset
) / sizeof(brw_compact_inst
) + 1];
2248 if (devinfo
->gen
== 4 && !devinfo
->is_g4x
)
2252 int compacted_count
= 0;
2253 for (int src_offset
= 0; src_offset
< p
->next_insn_offset
- start_offset
;
2254 src_offset
+= sizeof(brw_inst
)) {
2255 brw_inst
*src
= store
+ src_offset
;
2256 void *dst
= store
+ offset
;
2258 old_ip
[offset
/ sizeof(brw_compact_inst
)] = src_offset
/ sizeof(brw_inst
);
2259 compacted_counts
[src_offset
/ sizeof(brw_inst
)] = compacted_count
;
2261 brw_inst inst
= precompact(devinfo
, *src
);
2262 brw_inst saved
= inst
;
2264 if (brw_try_compact_instruction(devinfo
, dst
, &inst
)) {
2268 brw_inst uncompacted
;
2269 brw_uncompact_instruction(devinfo
, &uncompacted
, dst
);
2270 if (memcmp(&saved
, &uncompacted
, sizeof(uncompacted
))) {
2271 brw_debug_compact_uncompact(devinfo
, &saved
, &uncompacted
);
2275 offset
+= sizeof(brw_compact_inst
);
2277 /* All uncompacted instructions need to be aligned on G45. */
2278 if ((offset
& sizeof(brw_compact_inst
)) != 0 && devinfo
->is_g4x
){
2279 brw_compact_inst
*align
= store
+ offset
;
2280 memset(align
, 0, sizeof(*align
));
2281 brw_compact_inst_set_hw_opcode(
2282 devinfo
, align
, brw_opcode_encode(devinfo
, BRW_OPCODE_NENOP
));
2283 brw_compact_inst_set_cmpt_control(devinfo
, align
, true);
2284 offset
+= sizeof(brw_compact_inst
);
2286 compacted_counts
[src_offset
/ sizeof(brw_inst
)] = compacted_count
;
2287 old_ip
[offset
/ sizeof(brw_compact_inst
)] = src_offset
/ sizeof(brw_inst
);
2289 dst
= store
+ offset
;
2292 /* If we didn't compact this intruction, we need to move it down into
2295 if (offset
!= src_offset
) {
2296 memmove(dst
, src
, sizeof(brw_inst
));
2298 offset
+= sizeof(brw_inst
);
2302 /* Add an entry for the ending offset of the program. This greatly
2303 * simplifies the linked list walk at the end of the function.
2305 old_ip
[offset
/ sizeof(brw_compact_inst
)] =
2306 (p
->next_insn_offset
- start_offset
) / sizeof(brw_inst
);
2308 /* Fix up control flow offsets. */
2309 p
->next_insn_offset
= start_offset
+ offset
;
2310 for (offset
= 0; offset
< p
->next_insn_offset
- start_offset
;
2311 offset
= next_offset(devinfo
, store
, offset
)) {
2312 brw_inst
*insn
= store
+ offset
;
2313 int this_old_ip
= old_ip
[offset
/ sizeof(brw_compact_inst
)];
2314 int this_compacted_count
= compacted_counts
[this_old_ip
];
2316 switch (brw_inst_opcode(devinfo
, insn
)) {
2317 case BRW_OPCODE_BREAK
:
2318 case BRW_OPCODE_CONTINUE
:
2319 case BRW_OPCODE_HALT
:
2320 if (devinfo
->gen
>= 6) {
2321 update_uip_jip(devinfo
, insn
, this_old_ip
, compacted_counts
);
2323 update_gen4_jump_count(devinfo
, insn
, this_old_ip
,
2329 case BRW_OPCODE_IFF
:
2330 case BRW_OPCODE_ELSE
:
2331 case BRW_OPCODE_ENDIF
:
2332 case BRW_OPCODE_WHILE
:
2333 if (devinfo
->gen
>= 7) {
2334 if (brw_inst_cmpt_control(devinfo
, insn
)) {
2335 brw_inst uncompacted
;
2336 brw_uncompact_instruction(devinfo
, &uncompacted
,
2337 (brw_compact_inst
*)insn
);
2339 update_uip_jip(devinfo
, &uncompacted
, this_old_ip
,
2342 bool ret
= brw_try_compact_instruction(devinfo
,
2343 (brw_compact_inst
*)insn
,
2345 assert(ret
); (void)ret
;
2347 update_uip_jip(devinfo
, insn
, this_old_ip
, compacted_counts
);
2349 } else if (devinfo
->gen
== 6) {
2350 assert(!brw_inst_cmpt_control(devinfo
, insn
));
2352 /* Jump Count is in units of compacted instructions on Gen6. */
2353 int jump_count_compacted
= brw_inst_gen6_jump_count(devinfo
, insn
);
2355 int target_old_ip
= this_old_ip
+ (jump_count_compacted
/ 2);
2356 int target_compacted_count
= compacted_counts
[target_old_ip
];
2357 jump_count_compacted
-= (target_compacted_count
- this_compacted_count
);
2358 brw_inst_set_gen6_jump_count(devinfo
, insn
, jump_count_compacted
);
2360 update_gen4_jump_count(devinfo
, insn
, this_old_ip
,
2365 case BRW_OPCODE_ADD
:
2366 /* Add instructions modifying the IP register use an immediate src1,
2367 * and Gens that use this cannot compact instructions with immediate
2370 if (brw_inst_cmpt_control(devinfo
, insn
))
2373 if (brw_inst_dst_reg_file(devinfo
, insn
) == BRW_ARCHITECTURE_REGISTER_FILE
&&
2374 brw_inst_dst_da_reg_nr(devinfo
, insn
) == BRW_ARF_IP
) {
2375 assert(brw_inst_src1_reg_file(devinfo
, insn
) == BRW_IMMEDIATE_VALUE
);
2378 int jump_compacted
= brw_inst_imm_d(devinfo
, insn
) >> shift
;
2380 int target_old_ip
= this_old_ip
+ (jump_compacted
/ 2);
2381 int target_compacted_count
= compacted_counts
[target_old_ip
];
2382 jump_compacted
-= (target_compacted_count
- this_compacted_count
);
2383 brw_inst_set_imm_ud(devinfo
, insn
, jump_compacted
<< shift
);
2392 /* p->nr_insn is counting the number of uncompacted instructions still, so
2393 * divide. We do want to be sure there's a valid instruction in any
2394 * alignment padding, so that the next compression pass (for the FS 8/16
2395 * compile passes) parses correctly.
2397 if (p
->next_insn_offset
& sizeof(brw_compact_inst
)) {
2398 brw_compact_inst
*align
= store
+ offset
;
2399 memset(align
, 0, sizeof(*align
));
2400 brw_compact_inst_set_hw_opcode(
2401 devinfo
, align
, brw_opcode_encode(devinfo
, BRW_OPCODE_NOP
));
2402 brw_compact_inst_set_cmpt_control(devinfo
, align
, true);
2403 p
->next_insn_offset
+= sizeof(brw_compact_inst
);
2405 p
->nr_insn
= p
->next_insn_offset
/ sizeof(brw_inst
);
2407 /* Update the instruction offsets for each group. */
2411 foreach_list_typed(struct inst_group
, group
, link
, &disasm
->group_list
) {
2412 while (start_offset
+ old_ip
[offset
/ sizeof(brw_compact_inst
)] *
2413 sizeof(brw_inst
) != group
->offset
) {
2414 assert(start_offset
+ old_ip
[offset
/ sizeof(brw_compact_inst
)] *
2415 sizeof(brw_inst
) < group
->offset
);
2416 offset
= next_offset(devinfo
, store
, offset
);
2419 group
->offset
= start_offset
+ offset
;
2421 offset
= next_offset(devinfo
, store
, offset
);