intel/eu: Add a mechanism for emitting relocatable constant MOVs
[mesa.git] / src / intel / compiler / brw_eu_compact.c
1 /*
2 * Copyright © 2012-2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_eu_compact.c
25 *
26 * Instruction compaction is a feature of G45 and newer hardware that allows
27 * for a smaller instruction encoding.
28 *
29 * The instruction cache is on the order of 32KB, and many programs generate
30 * far more instructions than that. The instruction cache is built to barely
31 * keep up with instruction dispatch ability in cache hit cases -- L1
32 * instruction cache misses that still hit in the next level could limit
33 * throughput by around 50%.
34 *
35 * The idea of instruction compaction is that most instructions use a tiny
36 * subset of the GPU functionality, so we can encode what would be a 16 byte
37 * instruction in 8 bytes using some lookup tables for various fields.
38 *
39 *
40 * Instruction compaction capabilities vary subtly by generation.
41 *
42 * G45's support for instruction compaction is very limited. Jump counts on
43 * this generation are in units of 16-byte uncompacted instructions. As such,
44 * all jump targets must be 16-byte aligned. Also, all instructions must be
45 * naturally aligned, i.e. uncompacted instructions must be 16-byte aligned.
46 * A G45-only instruction, NENOP, must be used to provide padding to align
47 * uncompacted instructions.
48 *
49 * Gen5 removes these restrictions and changes jump counts to be in units of
50 * 8-byte compacted instructions, allowing jump targets to be only 8-byte
51 * aligned. Uncompacted instructions can also be placed on 8-byte boundaries.
52 *
53 * Gen6 adds the ability to compact instructions with a limited range of
54 * immediate values. Compactable immediates have 12 unrestricted bits, and a
55 * 13th bit that's replicated through the high 20 bits, to create the 32-bit
56 * value of DW3 in the uncompacted instruction word.
57 *
58 * On Gen7 we can compact some control flow instructions with a small positive
59 * immediate in the low bits of DW3, like ENDIF with the JIP field. Other
60 * control flow instructions with UIP cannot be compacted, because of the
61 * replicated 13th bit. No control flow instructions can be compacted on Gen6
62 * since the jump count field is not in DW3.
63 *
64 * break JIP/UIP
65 * cont JIP/UIP
66 * halt JIP/UIP
67 * if JIP/UIP
68 * else JIP (plus UIP on BDW+)
69 * endif JIP
70 * while JIP (must be negative)
71 *
72 * Gen 8 adds support for compacting 3-src instructions.
73 *
74 * Gen12 reduces the number of bits that available to compacted immediates from
75 * 13 to 12, but improves the compaction of floating-point immediates by
76 * allowing the high bits to be encoded (the sign, 8-bit exponent, and the
77 * three most significant bits of the mantissa), rather than the lowest bits of
78 * the mantissa.
79 */
80
81 #include "brw_eu.h"
82 #include "brw_shader.h"
83 #include "brw_disasm_info.h"
84 #include "dev/gen_debug.h"
85
86 static const uint32_t g45_control_index_table[32] = {
87 0b00000000000000000,
88 0b01000000000000000,
89 0b00110000000000000,
90 0b00000000000000010,
91 0b00100000000000000,
92 0b00010000000000000,
93 0b01000000000100000,
94 0b01000000100000000,
95 0b01010000000100000,
96 0b00000000100000010,
97 0b11000000000000000,
98 0b00001000100000010,
99 0b01001000100000000,
100 0b00000000100000000,
101 0b11000000000100000,
102 0b00001000100000000,
103 0b10110000000000000,
104 0b11010000000100000,
105 0b00110000100000000,
106 0b00100000100000000,
107 0b01000000000001000,
108 0b01000000000000100,
109 0b00111100000000000,
110 0b00101011000000000,
111 0b00110000000010000,
112 0b00010000100000000,
113 0b01000000000100100,
114 0b01000000000101000,
115 0b00110000000000110,
116 0b00000000000001010,
117 0b01010000000101000,
118 0b01010000000100100,
119 };
120
121 static const uint32_t g45_datatype_table[32] = {
122 0b001000000000100001,
123 0b001011010110101101,
124 0b001000001000110001,
125 0b001111011110111101,
126 0b001011010110101100,
127 0b001000000110101101,
128 0b001000000000100000,
129 0b010100010110110001,
130 0b001100011000101101,
131 0b001000000000100010,
132 0b001000001000110110,
133 0b010000001000110001,
134 0b001000001000110010,
135 0b011000001000110010,
136 0b001111011110111100,
137 0b001000000100101000,
138 0b010100011000110001,
139 0b001010010100101001,
140 0b001000001000101001,
141 0b010000001000110110,
142 0b101000001000110001,
143 0b001011011000101101,
144 0b001000000100001001,
145 0b001011011000101100,
146 0b110100011000110001,
147 0b001000001110111101,
148 0b110000001000110001,
149 0b011000000100101010,
150 0b101000001000101001,
151 0b001011010110001100,
152 0b001000000110100001,
153 0b001010010100001000,
154 };
155
156 static const uint16_t g45_subreg_table[32] = {
157 0b000000000000000,
158 0b000000010000000,
159 0b000001000000000,
160 0b000100000000000,
161 0b000000000100000,
162 0b100000000000000,
163 0b000000000010000,
164 0b001100000000000,
165 0b001010000000000,
166 0b000000100000000,
167 0b001000000000000,
168 0b000000000001000,
169 0b000000001000000,
170 0b000000000000001,
171 0b000010000000000,
172 0b000000010100000,
173 0b000000000000111,
174 0b000001000100000,
175 0b011000000000000,
176 0b000000110000000,
177 0b000000000000010,
178 0b000000000000100,
179 0b000000001100000,
180 0b000100000000010,
181 0b001110011000110,
182 0b001110100001000,
183 0b000110011000110,
184 0b000001000011000,
185 0b000110010000100,
186 0b001100000000110,
187 0b000000010000110,
188 0b000001000110000,
189 };
190
191 static const uint16_t g45_src_index_table[32] = {
192 0b000000000000,
193 0b010001101000,
194 0b010110001000,
195 0b011010010000,
196 0b001101001000,
197 0b010110001010,
198 0b010101110000,
199 0b011001111000,
200 0b001000101000,
201 0b000000101000,
202 0b010001010000,
203 0b111101101100,
204 0b010110001100,
205 0b010001101100,
206 0b011010010100,
207 0b010001001100,
208 0b001100101000,
209 0b000000000010,
210 0b111101001100,
211 0b011001101000,
212 0b010101001000,
213 0b000000000100,
214 0b000000101100,
215 0b010001101010,
216 0b000000111000,
217 0b010101011000,
218 0b000100100000,
219 0b010110000000,
220 0b010000000100,
221 0b010000111000,
222 0b000101100000,
223 0b111101110100,
224 };
225
226 static const uint32_t gen6_control_index_table[32] = {
227 0b00000000000000000,
228 0b01000000000000000,
229 0b00110000000000000,
230 0b00000000100000000,
231 0b00010000000000000,
232 0b00001000100000000,
233 0b00000000100000010,
234 0b00000000000000010,
235 0b01000000100000000,
236 0b01010000000000000,
237 0b10110000000000000,
238 0b00100000000000000,
239 0b11010000000000000,
240 0b11000000000000000,
241 0b01001000100000000,
242 0b01000000000001000,
243 0b01000000000000100,
244 0b00000000000001000,
245 0b00000000000000100,
246 0b00111000100000000,
247 0b00001000100000010,
248 0b00110000100000000,
249 0b00110000000000001,
250 0b00100000000000001,
251 0b00110000000000010,
252 0b00110000000000101,
253 0b00110000000001001,
254 0b00110000000010000,
255 0b00110000000000011,
256 0b00110000000000100,
257 0b00110000100001000,
258 0b00100000000001001,
259 };
260
261 static const uint32_t gen6_datatype_table[32] = {
262 0b001001110000000000,
263 0b001000110000100000,
264 0b001001110000000001,
265 0b001000000001100000,
266 0b001010110100101001,
267 0b001000000110101101,
268 0b001100011000101100,
269 0b001011110110101101,
270 0b001000000111101100,
271 0b001000000001100001,
272 0b001000110010100101,
273 0b001000000001000001,
274 0b001000001000110001,
275 0b001000001000101001,
276 0b001000000000100000,
277 0b001000001000110010,
278 0b001010010100101001,
279 0b001011010010100101,
280 0b001000000110100101,
281 0b001100011000101001,
282 0b001011011000101100,
283 0b001011010110100101,
284 0b001011110110100101,
285 0b001111011110111101,
286 0b001111011110111100,
287 0b001111011110111101,
288 0b001111011110011101,
289 0b001111011110111110,
290 0b001000000000100001,
291 0b001000000000100010,
292 0b001001111111011101,
293 0b001000001110111110,
294 };
295
296 static const uint16_t gen6_subreg_table[32] = {
297 0b000000000000000,
298 0b000000000000100,
299 0b000000110000000,
300 0b111000000000000,
301 0b011110000001000,
302 0b000010000000000,
303 0b000000000010000,
304 0b000110000001100,
305 0b001000000000000,
306 0b000001000000000,
307 0b000001010010100,
308 0b000000001010110,
309 0b010000000000000,
310 0b110000000000000,
311 0b000100000000000,
312 0b000000010000000,
313 0b000000000001000,
314 0b100000000000000,
315 0b000001010000000,
316 0b001010000000000,
317 0b001100000000000,
318 0b000000001010100,
319 0b101101010010100,
320 0b010100000000000,
321 0b000000010001111,
322 0b011000000000000,
323 0b111110000000000,
324 0b101000000000000,
325 0b000000000001111,
326 0b000100010001111,
327 0b001000010001111,
328 0b000110000000000,
329 };
330
331 static const uint16_t gen6_src_index_table[32] = {
332 0b000000000000,
333 0b010110001000,
334 0b010001101000,
335 0b001000101000,
336 0b011010010000,
337 0b000100100000,
338 0b010001101100,
339 0b010101110000,
340 0b011001111000,
341 0b001100101000,
342 0b010110001100,
343 0b001000100000,
344 0b010110001010,
345 0b000000000010,
346 0b010101010000,
347 0b010101101000,
348 0b111101001100,
349 0b111100101100,
350 0b011001110000,
351 0b010110001001,
352 0b010101011000,
353 0b001101001000,
354 0b010000101100,
355 0b010000000000,
356 0b001101110000,
357 0b001100010000,
358 0b001100000000,
359 0b010001101010,
360 0b001101111000,
361 0b000001110000,
362 0b001100100000,
363 0b001101010000,
364 };
365
366 static const uint32_t gen7_control_index_table[32] = {
367 0b0000000000000000010,
368 0b0000100000000000000,
369 0b0000100000000000001,
370 0b0000100000000000010,
371 0b0000100000000000011,
372 0b0000100000000000100,
373 0b0000100000000000101,
374 0b0000100000000000111,
375 0b0000100000000001000,
376 0b0000100000000001001,
377 0b0000100000000001101,
378 0b0000110000000000000,
379 0b0000110000000000001,
380 0b0000110000000000010,
381 0b0000110000000000011,
382 0b0000110000000000100,
383 0b0000110000000000101,
384 0b0000110000000000111,
385 0b0000110000000001001,
386 0b0000110000000001101,
387 0b0000110000000010000,
388 0b0000110000100000000,
389 0b0001000000000000000,
390 0b0001000000000000010,
391 0b0001000000000000100,
392 0b0001000000100000000,
393 0b0010110000000000000,
394 0b0010110000000010000,
395 0b0011000000000000000,
396 0b0011000000100000000,
397 0b0101000000000000000,
398 0b0101000000100000000,
399 };
400
401 static const uint32_t gen7_datatype_table[32] = {
402 0b001000000000000001,
403 0b001000000000100000,
404 0b001000000000100001,
405 0b001000000001100001,
406 0b001000000010111101,
407 0b001000001011111101,
408 0b001000001110100001,
409 0b001000001110100101,
410 0b001000001110111101,
411 0b001000010000100001,
412 0b001000110000100000,
413 0b001000110000100001,
414 0b001001010010100101,
415 0b001001110010100100,
416 0b001001110010100101,
417 0b001111001110111101,
418 0b001111011110011101,
419 0b001111011110111100,
420 0b001111011110111101,
421 0b001111111110111100,
422 0b000000001000001100,
423 0b001000000000111101,
424 0b001000000010100101,
425 0b001000010000100000,
426 0b001001010010100100,
427 0b001001110010000100,
428 0b001010010100001001,
429 0b001101111110111101,
430 0b001111111110111101,
431 0b001011110110101100,
432 0b001010010100101000,
433 0b001010110100101000,
434 };
435
436 static const uint16_t gen7_subreg_table[32] = {
437 0b000000000000000,
438 0b000000000000001,
439 0b000000000001000,
440 0b000000000001111,
441 0b000000000010000,
442 0b000000010000000,
443 0b000000100000000,
444 0b000000110000000,
445 0b000001000000000,
446 0b000001000010000,
447 0b000010100000000,
448 0b001000000000000,
449 0b001000000000001,
450 0b001000010000001,
451 0b001000010000010,
452 0b001000010000011,
453 0b001000010000100,
454 0b001000010000111,
455 0b001000010001000,
456 0b001000010001110,
457 0b001000010001111,
458 0b001000110000000,
459 0b001000111101000,
460 0b010000000000000,
461 0b010000110000000,
462 0b011000000000000,
463 0b011110010000111,
464 0b100000000000000,
465 0b101000000000000,
466 0b110000000000000,
467 0b111000000000000,
468 0b111000000011100,
469 };
470
471 static const uint16_t gen7_src_index_table[32] = {
472 0b000000000000,
473 0b000000000010,
474 0b000000010000,
475 0b000000010010,
476 0b000000011000,
477 0b000000100000,
478 0b000000101000,
479 0b000001001000,
480 0b000001010000,
481 0b000001110000,
482 0b000001111000,
483 0b001100000000,
484 0b001100000010,
485 0b001100001000,
486 0b001100010000,
487 0b001100010010,
488 0b001100100000,
489 0b001100101000,
490 0b001100111000,
491 0b001101000000,
492 0b001101000010,
493 0b001101001000,
494 0b001101010000,
495 0b001101100000,
496 0b001101101000,
497 0b001101110000,
498 0b001101110001,
499 0b001101111000,
500 0b010001101000,
501 0b010001101001,
502 0b010001101010,
503 0b010110001000,
504 };
505
506 static const uint32_t gen8_control_index_table[32] = {
507 0b0000000000000000010,
508 0b0000100000000000000,
509 0b0000100000000000001,
510 0b0000100000000000010,
511 0b0000100000000000011,
512 0b0000100000000000100,
513 0b0000100000000000101,
514 0b0000100000000000111,
515 0b0000100000000001000,
516 0b0000100000000001001,
517 0b0000100000000001101,
518 0b0000110000000000000,
519 0b0000110000000000001,
520 0b0000110000000000010,
521 0b0000110000000000011,
522 0b0000110000000000100,
523 0b0000110000000000101,
524 0b0000110000000000111,
525 0b0000110000000001001,
526 0b0000110000000001101,
527 0b0000110000000010000,
528 0b0000110000100000000,
529 0b0001000000000000000,
530 0b0001000000000000010,
531 0b0001000000000000100,
532 0b0001000000100000000,
533 0b0010110000000000000,
534 0b0010110000000010000,
535 0b0011000000000000000,
536 0b0011000000100000000,
537 0b0101000000000000000,
538 0b0101000000100000000,
539 };
540
541 static const uint32_t gen8_datatype_table[32] = {
542 0b001000000000000000001,
543 0b001000000000001000000,
544 0b001000000000001000001,
545 0b001000000000011000001,
546 0b001000000000101011101,
547 0b001000000010111011101,
548 0b001000000011101000001,
549 0b001000000011101000101,
550 0b001000000011101011101,
551 0b001000001000001000001,
552 0b001000011000001000000,
553 0b001000011000001000001,
554 0b001000101000101000101,
555 0b001000111000101000100,
556 0b001000111000101000101,
557 0b001011100011101011101,
558 0b001011101011100011101,
559 0b001011101011101011100,
560 0b001011101011101011101,
561 0b001011111011101011100,
562 0b000000000010000001100,
563 0b001000000000001011101,
564 0b001000000000101000101,
565 0b001000001000001000000,
566 0b001000101000101000100,
567 0b001000111000100000100,
568 0b001001001001000001001,
569 0b001010111011101011101,
570 0b001011111011101011101,
571 0b001001111001101001100,
572 0b001001001001001001000,
573 0b001001011001001001000,
574 };
575
576 static const uint16_t gen8_subreg_table[32] = {
577 0b000000000000000,
578 0b000000000000001,
579 0b000000000001000,
580 0b000000000001111,
581 0b000000000010000,
582 0b000000010000000,
583 0b000000100000000,
584 0b000000110000000,
585 0b000001000000000,
586 0b000001000010000,
587 0b000001010000000,
588 0b001000000000000,
589 0b001000000000001,
590 0b001000010000001,
591 0b001000010000010,
592 0b001000010000011,
593 0b001000010000100,
594 0b001000010000111,
595 0b001000010001000,
596 0b001000010001110,
597 0b001000010001111,
598 0b001000110000000,
599 0b001000111101000,
600 0b010000000000000,
601 0b010000110000000,
602 0b011000000000000,
603 0b011110010000111,
604 0b100000000000000,
605 0b101000000000000,
606 0b110000000000000,
607 0b111000000000000,
608 0b111000000011100,
609 };
610
611 static const uint16_t gen8_src_index_table[32] = {
612 0b000000000000,
613 0b000000000010,
614 0b000000010000,
615 0b000000010010,
616 0b000000011000,
617 0b000000100000,
618 0b000000101000,
619 0b000001001000,
620 0b000001010000,
621 0b000001110000,
622 0b000001111000,
623 0b001100000000,
624 0b001100000010,
625 0b001100001000,
626 0b001100010000,
627 0b001100010010,
628 0b001100100000,
629 0b001100101000,
630 0b001100111000,
631 0b001101000000,
632 0b001101000010,
633 0b001101001000,
634 0b001101010000,
635 0b001101100000,
636 0b001101101000,
637 0b001101110000,
638 0b001101110001,
639 0b001101111000,
640 0b010001101000,
641 0b010001101001,
642 0b010001101010,
643 0b010110001000,
644 };
645
646 static const uint32_t gen11_datatype_table[32] = {
647 0b001000000000000000001,
648 0b001000000000001000000,
649 0b001000000000001000001,
650 0b001000000000011000001,
651 0b001000000000101100101,
652 0b001000000101111100101,
653 0b001000000100101000001,
654 0b001000000100101000101,
655 0b001000000100101100101,
656 0b001000001000001000001,
657 0b001000011000001000000,
658 0b001000011000001000001,
659 0b001000101000101000101,
660 0b001000111000101000100,
661 0b001000111000101000101,
662 0b001100100100101100101,
663 0b001100101100100100101,
664 0b001100101100101100100,
665 0b001100101100101100101,
666 0b001100111100101100100,
667 0b000000000010000001100,
668 0b001000000000001100101,
669 0b001000000000101000101,
670 0b001000001000001000000,
671 0b001000101000101000100,
672 0b001000111000100000100,
673 0b001001001001000001001,
674 0b001101111100101100101,
675 0b001100111100101100101,
676 0b001001111001101001100,
677 0b001001001001001001000,
678 0b001001011001001001000,
679 };
680
681 static const uint32_t gen12_control_index_table[32] = {
682 0b000000000000000000100, /* (16|M0) */
683 0b000000000000000000011, /* (8|M0) */
684 0b000000010000000000000, /* (W) (1|M0) */
685 0b000000010000000000100, /* (W) (16|M0) */
686 0b000000010000000000011, /* (W) (8|M0) */
687 0b010000000000000000100, /* (16|M0) (ge)f0.0 */
688 0b000000000000000100100, /* (16|M16) */
689 0b010100000000000000100, /* (16|M0) (lt)f0.0 */
690 0b000000000000000000000, /* (1|M0) */
691 0b000010000000000000100, /* (16|M0) (sat) */
692 0b000000000000000010011, /* (8|M8) */
693 0b001100000000000000100, /* (16|M0) (gt)f0.0 */
694 0b000100000000000000100, /* (16|M0) (eq)f0.0 */
695 0b000100010000000000100, /* (W) (16|M0) (eq)f0.0 */
696 0b001000000000000000100, /* (16|M0) (ne)f0.0 */
697 0b000000000000100000100, /* (f0.0) (16|M0) */
698 0b010100000000000000011, /* (8|M0) (lt)f0.0 */
699 0b000000000000110000100, /* (f1.0) (16|M0) */
700 0b000000010000000000001, /* (W) (2|M0) */
701 0b000000000000101000100, /* (f0.1) (16|M0) */
702 0b000000000000111000100, /* (f1.1) (16|M0) */
703 0b010000010000000000100, /* (W) (16|M0) (ge)f0.0 */
704 0b000000000000000100011, /* (8|M16) */
705 0b000000000000000110011, /* (8|M24) */
706 0b010100010000000000100, /* (W) (16|M0) (lt)f0.0 */
707 0b010000000000000000011, /* (8|M0) (ge)f0.0 */
708 0b000100010000000000000, /* (W) (1|M0) (eq)f0.0 */
709 0b000010000000000000011, /* (8|M0) (sat) */
710 0b010100000000010000100, /* (16|M0) (lt)f1.0 */
711 0b000100000000000000011, /* (8|M0) (eq)f0.0 */
712 0b000001000000000000011, /* (8|M0) {AccWrEn} */
713 0b000000010000000100100, /* (W) (16|M16) */
714 };
715
716 static const uint32_t gen12_datatype_table[32] = {
717 0b11010110100101010100, /* grf<1>:f grf:f grf:f */
718 0b00000110100101010100, /* grf<1>:f grf:f arf:ub */
719 0b00000010101101010100, /* grf<1>:f imm:f arf:ub */
720 0b01010110110101010100, /* grf<1>:f grf:f imm:f */
721 0b11010100100101010100, /* arf<1>:f grf:f grf:f */
722 0b11010010100101010100, /* grf<1>:f arf:f grf:f */
723 0b01010100110101010100, /* arf<1>:f grf:f imm:f */
724 0b00000000100000000000, /* arf<1>:ub arf:ub arf:ub */
725 0b11010000100101010100, /* arf<1>:f arf:f grf:f */
726 0b00101110110011001100, /* grf<1>:d grf:d imm:w */
727 0b10110110100011001100, /* grf<1>:d grf:d grf:d */
728 0b01010010110101010100, /* grf<1>:f arf:f imm:f */
729 0b10010110100001000100, /* grf<1>:ud grf:ud grf:ud */
730 0b01010000110101010100, /* arf<1>:f arf:f imm:f */
731 0b00110110110011001100, /* grf<1>:d grf:d imm:d */
732 0b00010110110001000100, /* grf<1>:ud grf:ud imm:ud */
733 0b00000111000101010100, /* grf<2>:f grf:f arf:ub */
734 0b00101100110011001100, /* arf<1>:d grf:d imm:w */
735 0b00000000100000100010, /* arf<1>:uw arf:uw arf:ub */
736 0b00000010100001000100, /* grf<1>:ud arf:ud arf:ub */
737 0b00100110110000101010, /* grf<1>:w grf:uw imm:uv */
738 0b00001110110000100010, /* grf<1>:uw grf:uw imm:uw */
739 0b10010111000001000100, /* grf<2>:ud grf:ud grf:ud */
740 0b00000110100101001100, /* grf<1>:d grf:f arf:ub */
741 0b10001100100011001100, /* arf<1>:d grf:d grf:uw */
742 0b00000110100001010100, /* grf<1>:f grf:ud arf:ub */
743 0b00101110110001001100, /* grf<1>:d grf:ud imm:w */
744 0b00000010100000100010, /* grf<1>:uw arf:uw arf:ub */
745 0b00000110100000110100, /* grf<1>:f grf:uw arf:ub */
746 0b00000110100000010100, /* grf<1>:f grf:ub arf:ub */
747 0b00000110100011010100, /* grf<1>:f grf:d arf:ub */
748 0b00000010100101010100, /* grf<1>:f arf:f arf:ub */
749 };
750
751 static const uint16_t gen12_subreg_table[32] = {
752 0b000000000000000, /* .0 .0 .0 */
753 0b100000000000000, /* .0 .0 .16 */
754 0b001000000000000, /* .0 .0 .4 */
755 0b011000000000000, /* .0 .0 .12 */
756 0b000000010000000, /* .0 .4 .0 */
757 0b010000000000000, /* .0 .0 .8 */
758 0b101000000000000, /* .0 .0 .20 */
759 0b000000000001000, /* .8 .0 .0 */
760 0b000000100000000, /* .0 .8 .0 */
761 0b110000000000000, /* .0 .0 .24 */
762 0b111000000000000, /* .0 .0 .28 */
763 0b000001000000000, /* .0 .16 .0 */
764 0b000000000000100, /* .4 .0 .0 */
765 0b000001100000000, /* .0 .24 .0 */
766 0b000001010000000, /* .0 .20 .0 */
767 0b000000110000000, /* .0 .12 .0 */
768 0b000001110000000, /* .0 .28 .0 */
769 0b000000000011100, /* .28 .0 .0 */
770 0b000000000010000, /* .16 .0 .0 */
771 0b000000000001100, /* .12 .0 .0 */
772 0b000000000011000, /* .24 .0 .0 */
773 0b000000000010100, /* .20 .0 .0 */
774 0b000000000000010, /* .2 .0 .0 */
775 0b000000101000000, /* .0 .10 .0 */
776 0b000000001000000, /* .0 .2 .0 */
777 0b000000010000100, /* .4 .4 .0 */
778 0b000000001011100, /* .28 .2 .0 */
779 0b000000001000010, /* .2 .2 .0 */
780 0b000000110001100, /* .12 .12 .0 */
781 0b000000000100000, /* .0 .1 .0 */
782 0b000000001100000, /* .0 .3 .0 */
783 0b110001100000000, /* .0 .24 .24 */
784 };
785
786 static const uint16_t gen12_src0_index_table[16] = {
787 0b010001100100, /* r<8;8,1> */
788 0b000000000000, /* r<0;1,0> */
789 0b010001100110, /* -r<8;8,1> */
790 0b010001100101, /* (abs)r<8;8,1> */
791 0b000000000010, /* -r<0;1,0> */
792 0b001000000000, /* r<2;1,0> */
793 0b001001000000, /* r<2;4,0> */
794 0b001101000000, /* r<4;4,0> */
795 0b001000100100, /* r<2;2,1> */
796 0b001100000000, /* r<4;1,0> */
797 0b001000100110, /* -r<2;2,1> */
798 0b001101000100, /* r<4;4,1> */
799 0b010001100111, /* -(abs)r<8;8,1> */
800 0b000100000000, /* r<1;1,0> */
801 0b000000000001, /* (abs)r<0;1,0> */
802 0b111100010000, /* r[a]<1,0> */
803 };
804
805 static const uint16_t gen12_src1_index_table[16] = {
806 0b000100011001, /* r<8;8,1> */
807 0b000000000000, /* r<0;1,0> */
808 0b100100011001, /* -r<8;8,1> */
809 0b100000000000, /* -r<0;1,0> */
810 0b010100011001, /* (abs)r<8;8,1> */
811 0b100011010000, /* -r<4;4,0> */
812 0b000010000000, /* r<2;1,0> */
813 0b000010001001, /* r<2;2,1> */
814 0b100010001001, /* -r<2;2,1> */
815 0b000011010000, /* r<4;4,0> */
816 0b000011010001, /* r<4;4,1> */
817 0b000011000000, /* r<4;1,0> */
818 0b110100011001, /* -(abs)r<8;8,1> */
819 0b010000000000, /* (abs)r<0;1,0> */
820 0b110000000000, /* -(abs)r<0;1,0> */
821 0b100011010001, /* -r<4;4,1> */
822 };
823
824 /* This is actually the control index table for Cherryview (26 bits), but the
825 * only difference from Broadwell (24 bits) is that it has two extra 0-bits at
826 * the start.
827 *
828 * The low 24 bits have the same mappings on both hardware.
829 */
830 static const uint32_t gen8_3src_control_index_table[4] = {
831 0b00100000000110000000000001,
832 0b00000000000110000000000001,
833 0b00000000001000000000000001,
834 0b00000000001000000000100001,
835 };
836
837 /* This is actually the control index table for Cherryview (49 bits), but the
838 * only difference from Broadwell (46 bits) is that it has three extra 0-bits
839 * at the start.
840 *
841 * The low 44 bits have the same mappings on both hardware, and since the high
842 * three bits on Broadwell are zero, we can reuse Cherryview's table.
843 */
844 static const uint64_t gen8_3src_source_index_table[4] = {
845 0b0000001110010011100100111001000001111000000000000,
846 0b0000001110010011100100111001000001111000000000010,
847 0b0000001110010011100100111001000001111000000001000,
848 0b0000001110010011100100111001000001111000000100000,
849 };
850
851 static const uint64_t gen12_3src_control_index_table[32] = {
852 0b000001001010010101000000000000000100, /* (16|M0) grf<1>:f :f :f :f */
853 0b000001001010010101000000000000000011, /* (8|M0) grf<1>:f :f :f :f */
854 0b000001001000010101000000000000000011, /* (8|M0) arf<1>:f :f :f :f */
855 0b000001001010010101000010000000000011, /* (W) (8|M0) grf<1>:f :f :f :f */
856 0b000001001000010101000010000000000011, /* (W) (8|M0) arf<1>:f :f :f :f */
857 0b000001001000010101000000000000010011, /* (8|M8) arf<1>:f :f :f :f */
858 0b000001001010010101000000000000010011, /* (8|M8) grf<1>:f :f :f :f */
859 0b000001001000010101000010000000010011, /* (W) (8|M8) arf<1>:f :f :f :f */
860 0b000001001010010101000010000000010011, /* (W) (8|M8) grf<1>:f :f :f :f */
861 0b000001001010010101000010000000000100, /* (W) (16|M0) grf<1>:f :f :f :f */
862 0b000001001000010101000000000000000100, /* (16|M0) arf<1>:f :f :f :f */
863 0b000001001010010101010000000000000100, /* (16|M0) (sat)grf<1>:f :f :f :f */
864 0b000001001010010101000000000000100100, /* (16|M16) grf<1>:f :f :f :f */
865 0b000001001000010101000010000000000100, /* (W) (16|M0) arf<1>:f :f :f :f */
866 0b000001001010010101000010000000000000, /* (W) (1|M0) grf<1>:f :f :f :f */
867 0b000001001010010101010000000000000011, /* (8|M0) (sat)grf<1>:f :f :f :f */
868 0b000001001000010101000010000000110011, /* (W) (8|M24) arf<1>:f :f :f :f */
869 0b000001001000010101000010000000100011, /* (W) (8|M16) arf<1>:f :f :f :f */
870 0b000001001010010101000010000000110011, /* (W) (8|M24) grf<1>:f :f :f :f */
871 0b000001001010010101000010000000100011, /* (W) (8|M16) grf<1>:f :f :f :f */
872 0b000001001000010101000000000000100011, /* (8|M16) arf<1>:f :f :f :f */
873 0b000001001000010101000000000000110011, /* (8|M24) arf<1>:f :f :f :f */
874 0b000001001010010101000000000000100011, /* (8|M16) grf<1>:f :f :f :f */
875 0b000001001010010101000000000000110011, /* (8|M24) grf<1>:f :f :f :f */
876 0b000001001000010101010000000000000100, /* (16|M0) (sat)arf<1>:f :f :f :f */
877 0b000001001010010101010010000000000100, /* (W) (16|M0) (sat)grf<1>:f :f :f :f */
878 0b000001001010010101000010000000100100, /* (W) (16|M16) grf<1>:f :f :f :f */
879 0b000001001010010001000010000000000000, /* (W) (1|M0) grf<1>:ud :ud :ud :ud */
880 0b000001001000010101000000000000100100, /* (16|M16) arf<1>:f :f :f :f */
881 0b000001001010010101010000000000100100, /* (16|M16) (sat)grf<1>:f :f :f :f */
882 0b000001001010010101000010000000000010, /* (W) (4|M0) grf<1>:f :f :f :f */
883 0b000001001000010101010000000000000011, /* (8|M0) (sat)arf<1>:f :f :f :f */
884 };
885
886 static const uint32_t gen12_3src_source_index_table[32] = {
887 0b100101100001100000000, /* grf<0;0> grf<8;1> grf<0> */
888 0b100101100001001000010, /* arf<4;1> grf<8;1> grf<0> */
889 0b101101100001101000011, /* grf<8;1> grf<8;1> grf<1> */
890 0b100101100001101000011, /* grf<8;1> grf<8;1> grf<0> */
891 0b101100000000101000011, /* grf<8;1> grf<0;0> grf<1> */
892 0b101101100001101001011, /* -grf<8;1> grf<8;1> grf<1> */
893 0b101001100001101000011, /* grf<8;1> arf<8;1> grf<1> */
894 0b100001100001100000000, /* grf<0;0> arf<8;1> grf<0> */
895 0b101101100001100000000, /* grf<0;0> grf<8;1> grf<1> */
896 0b101101100101101000011, /* grf<8;1> grf<8;1> -grf<1> */
897 0b101101110001101000011, /* grf<8;1> -grf<8;1> grf<1> */
898 0b101100000000100000000, /* grf<0;0> grf<0;0> grf<1> */
899 0b100001100001101000011, /* grf<8;1> arf<8;1> grf<0> */
900 0b100101110001100000000, /* grf<0;0> -grf<8;1> grf<0> */
901 0b100101110001101000011, /* grf<8;1> -grf<8;1> grf<0> */
902 0b100101100001101001011, /* -grf<8;1> grf<8;1> grf<0> */
903 0b100100000000101000011, /* grf<8;1> grf<0;0> grf<0> */
904 0b100101100001100001000, /* -grf<0;0> grf<8;1> grf<0> */
905 0b100100000000100000000, /* grf<0;0> grf<0;0> grf<0> */
906 0b101101110001100000000, /* grf<0;0> -grf<8;1> grf<1> */
907 0b100101100101100000000, /* grf<0;0> grf<8;1> -grf<0> */
908 0b101001100001100000000, /* grf<0;0> arf<8;1> grf<1> */
909 0b100101100101101000011, /* grf<8;1> grf<8;1> -grf<0> */
910 0b101101100101101001011, /* -grf<8;1> grf<8;1> -grf<1> */
911 0b101001100001101001011, /* -grf<8;1> arf<8;1> grf<1> */
912 0b101101110001101001011, /* -grf<8;1> -grf<8;1> grf<1> */
913 0b101100010000101000011, /* grf<8;1> -grf<0;0> grf<1> */
914 0b101100000100101000011, /* grf<8;1> grf<0;0> -grf<1> */
915 0b101101100001100001000, /* -grf<0;0> grf<8;1> grf<1> */
916 0b101101100101100000000, /* grf<0;0> grf<8;1> -grf<1> */
917 0b100100000100101000011, /* grf<8;1> grf<0;0> -grf<0> */
918 0b101001100101101000011, /* grf<8;1> arf<8;1> -grf<1> */
919 };
920
921 static const uint32_t gen12_3src_subreg_table[32] = {
922 0b00000000000000000000, /* .0 .0 .0 .0 */
923 0b00100000000000000000, /* .0 .0 .0 .4 */
924 0b00000000000110000000, /* .0 .12 .0 .0 */
925 0b10100000000000000000, /* .0 .0 .0 .20 */
926 0b10000000001110000000, /* .0 .28 .0 .16 */
927 0b01100000000000000000, /* .0 .0 .0 .12 */
928 0b01000000000000000000, /* .0 .0 .0 .8 */
929 0b00000010000000000000, /* .0 .0 .8 .0 */
930 0b00000001000000000000, /* .0 .0 .4 .0 */
931 0b11000000000000000000, /* .0 .0 .0 .24 */
932 0b10000000000000000000, /* .0 .0 .0 .16 */
933 0b11100000000000000000, /* .0 .0 .0 .28 */
934 0b00000110000000000000, /* .0 .0 .24 .0 */
935 0b00000000000010000000, /* .0 .4 .0 .0 */
936 0b00000100000000000000, /* .0 .0 .16 .0 */
937 0b00000011000000000000, /* .0 .0 .12 .0 */
938 0b00000101000000000000, /* .0 .0 .20 .0 */
939 0b00000111000000000000, /* .0 .0 .28 .0 */
940 0b00000000000100000000, /* .0 .8 .0 .0 */
941 0b00000000001000000000, /* .0 .16 .0 .0 */
942 0b00000000001100000000, /* .0 .24 .0 .0 */
943 0b00000000001010000000, /* .0 .20 .0 .0 */
944 0b00000000001110000000, /* .0 .28 .0 .0 */
945 0b11000000001110000000, /* .0 .28 .0 .24 */
946 0b00100000000100000000, /* .0 .8 .0 .4 */
947 0b00100000000110000000, /* .0 .12 .0 .4 */
948 0b01000000000110000000, /* .0 .12 .0 .8 */
949 0b10000000001100000000, /* .0 .24 .0 .16 */
950 0b10000000001010000000, /* .0 .20 .0 .16 */
951 0b01100000000010000000, /* .0 .4 .0 .12 */
952 0b10100000001110000000, /* .0 .28 .0 .20 */
953 0b01000000000010000000, /* .0 .4 .0 .8 */
954 };
955
956 struct compaction_state {
957 const struct gen_device_info *devinfo;
958 const uint32_t *control_index_table;
959 const uint32_t *datatype_table;
960 const uint16_t *subreg_table;
961 const uint16_t *src0_index_table;
962 const uint16_t *src1_index_table;
963 };
964
965 static void compaction_state_init(struct compaction_state *c,
966 const struct gen_device_info *devinfo);
967
968 static bool
969 set_control_index(const struct compaction_state *c,
970 brw_compact_inst *dst, const brw_inst *src)
971 {
972 const struct gen_device_info *devinfo = c->devinfo;
973 uint32_t uncompacted; /* 17b/G45; 19b/IVB+; 21b/TGL+ */
974
975 if (devinfo->gen >= 12) {
976 uncompacted = (brw_inst_bits(src, 95, 92) << 17) | /* 4b */
977 (brw_inst_bits(src, 34, 34) << 16) | /* 1b */
978 (brw_inst_bits(src, 33, 33) << 15) | /* 1b */
979 (brw_inst_bits(src, 32, 32) << 14) | /* 1b */
980 (brw_inst_bits(src, 31, 31) << 13) | /* 1b */
981 (brw_inst_bits(src, 28, 28) << 12) | /* 1b */
982 (brw_inst_bits(src, 27, 24) << 8) | /* 4b */
983 (brw_inst_bits(src, 23, 22) << 6) | /* 2b */
984 (brw_inst_bits(src, 21, 19) << 3) | /* 3b */
985 (brw_inst_bits(src, 18, 16)); /* 3b */
986 } else if (devinfo->gen >= 8) {
987 uncompacted = (brw_inst_bits(src, 33, 31) << 16) | /* 3b */
988 (brw_inst_bits(src, 23, 12) << 4) | /* 12b */
989 (brw_inst_bits(src, 10, 9) << 2) | /* 2b */
990 (brw_inst_bits(src, 34, 34) << 1) | /* 1b */
991 (brw_inst_bits(src, 8, 8)); /* 1b */
992 } else {
993 uncompacted = (brw_inst_bits(src, 31, 31) << 16) | /* 1b */
994 (brw_inst_bits(src, 23, 8)); /* 16b */
995
996 /* On gen7, the flag register and subregister numbers are integrated into
997 * the control index.
998 */
999 if (devinfo->gen == 7)
1000 uncompacted |= brw_inst_bits(src, 90, 89) << 17; /* 2b */
1001 }
1002
1003 for (int i = 0; i < 32; i++) {
1004 if (c->control_index_table[i] == uncompacted) {
1005 brw_compact_inst_set_control_index(devinfo, dst, i);
1006 return true;
1007 }
1008 }
1009
1010 return false;
1011 }
1012
1013 static bool
1014 set_datatype_index(const struct compaction_state *c, brw_compact_inst *dst,
1015 const brw_inst *src, bool is_immediate)
1016 {
1017 const struct gen_device_info *devinfo = c->devinfo;
1018 uint32_t uncompacted; /* 18b/G45+; 21b/BDW+; 20b/TGL+ */
1019
1020 if (devinfo->gen >= 12) {
1021 uncompacted = (brw_inst_bits(src, 91, 88) << 15) | /* 4b */
1022 (brw_inst_bits(src, 66, 66) << 14) | /* 1b */
1023 (brw_inst_bits(src, 50, 50) << 13) | /* 1b */
1024 (brw_inst_bits(src, 49, 48) << 11) | /* 2b */
1025 (brw_inst_bits(src, 47, 47) << 10) | /* 1b */
1026 (brw_inst_bits(src, 46, 46) << 9) | /* 1b */
1027 (brw_inst_bits(src, 43, 40) << 5) | /* 4b */
1028 (brw_inst_bits(src, 39, 36) << 1) | /* 4b */
1029 (brw_inst_bits(src, 35, 35)); /* 1b */
1030
1031 /* Src1.RegFile overlaps with the immediate, so ignore it if an immediate
1032 * is present
1033 */
1034 if (!is_immediate) {
1035 uncompacted |= brw_inst_bits(src, 98, 98) << 19; /* 1b */
1036 }
1037 } else if (devinfo->gen >= 8) {
1038 uncompacted = (brw_inst_bits(src, 63, 61) << 18) | /* 3b */
1039 (brw_inst_bits(src, 94, 89) << 12) | /* 6b */
1040 (brw_inst_bits(src, 46, 35)); /* 12b */
1041 } else {
1042 uncompacted = (brw_inst_bits(src, 63, 61) << 15) | /* 3b */
1043 (brw_inst_bits(src, 46, 32)); /* 15b */
1044 }
1045
1046 for (int i = 0; i < 32; i++) {
1047 if (c->datatype_table[i] == uncompacted) {
1048 brw_compact_inst_set_datatype_index(devinfo, dst, i);
1049 return true;
1050 }
1051 }
1052
1053 return false;
1054 }
1055
1056 static bool
1057 set_subreg_index(const struct compaction_state *c, brw_compact_inst *dst,
1058 const brw_inst *src, bool is_immediate)
1059 {
1060 const struct gen_device_info *devinfo = c->devinfo;
1061 uint16_t uncompacted; /* 15b */
1062
1063 if (devinfo->gen >= 12) {
1064 uncompacted = (brw_inst_bits(src, 55, 51) << 0) | /* 5b */
1065 (brw_inst_bits(src, 71, 67) << 5); /* 5b */
1066
1067 if (!is_immediate)
1068 uncompacted |= brw_inst_bits(src, 103, 99) << 10; /* 5b */
1069 } else {
1070 uncompacted = (brw_inst_bits(src, 52, 48) << 0) | /* 5b */
1071 (brw_inst_bits(src, 68, 64) << 5); /* 5b */
1072
1073 if (!is_immediate)
1074 uncompacted |= brw_inst_bits(src, 100, 96) << 10; /* 5b */
1075 }
1076
1077 for (int i = 0; i < 32; i++) {
1078 if (c->subreg_table[i] == uncompacted) {
1079 brw_compact_inst_set_subreg_index(devinfo, dst, i);
1080 return true;
1081 }
1082 }
1083
1084 return false;
1085 }
1086
1087 static bool
1088 set_src0_index(const struct compaction_state *c, brw_compact_inst *dst,
1089 const brw_inst *src)
1090 {
1091 const struct gen_device_info *devinfo = c->devinfo;
1092 uint16_t uncompacted; /* 12b */
1093 int table_len;
1094
1095 if (devinfo->gen >= 12) {
1096 table_len = ARRAY_SIZE(gen12_src0_index_table);
1097 uncompacted = (brw_inst_bits(src, 87, 84) << 8) | /* 4b */
1098 (brw_inst_bits(src, 83, 81) << 5) | /* 3b */
1099 (brw_inst_bits(src, 80, 80) << 4) | /* 1b */
1100 (brw_inst_bits(src, 65, 64) << 2) | /* 2b */
1101 (brw_inst_bits(src, 45, 44)); /* 2b */
1102 } else {
1103 table_len = ARRAY_SIZE(gen8_src_index_table);
1104 uncompacted = brw_inst_bits(src, 88, 77); /* 12b */
1105 }
1106
1107 for (int i = 0; i < table_len; i++) {
1108 if (c->src0_index_table[i] == uncompacted) {
1109 brw_compact_inst_set_src0_index(devinfo, dst, i);
1110 return true;
1111 }
1112 }
1113
1114 return false;
1115 }
1116
1117 static bool
1118 set_src1_index(const struct compaction_state *c, brw_compact_inst *dst,
1119 const brw_inst *src, bool is_immediate, unsigned imm)
1120 {
1121 const struct gen_device_info *devinfo = c->devinfo;
1122 if (is_immediate) {
1123 if (devinfo->gen >= 12) {
1124 /* src1 index takes the low 4 bits of the 12-bit compacted value */
1125 brw_compact_inst_set_src1_index(devinfo, dst, imm & 0xf);
1126 } else {
1127 /* src1 index takes the high 5 bits of the 13-bit compacted value */
1128 brw_compact_inst_set_src1_index(devinfo, dst, imm >> 8);
1129 }
1130 return true;
1131 } else {
1132 uint16_t uncompacted; /* 12b */
1133 int table_len;
1134
1135 if (devinfo->gen >= 12) {
1136 table_len = ARRAY_SIZE(gen12_src0_index_table);
1137 uncompacted = (brw_inst_bits(src, 121, 120) << 10) | /* 2b */
1138 (brw_inst_bits(src, 119, 116) << 6) | /* 4b */
1139 (brw_inst_bits(src, 115, 113) << 3) | /* 3b */
1140 (brw_inst_bits(src, 112, 112) << 2) | /* 1b */
1141 (brw_inst_bits(src, 97, 96)); /* 2b */
1142 } else {
1143 table_len = ARRAY_SIZE(gen8_src_index_table);
1144 uncompacted = brw_inst_bits(src, 120, 109); /* 12b */
1145 }
1146
1147 for (int i = 0; i < table_len; i++) {
1148 if (c->src1_index_table[i] == uncompacted) {
1149 brw_compact_inst_set_src1_index(devinfo, dst, i);
1150 return true;
1151 }
1152 }
1153 }
1154
1155 return false;
1156 }
1157
1158 static bool
1159 set_3src_control_index(const struct gen_device_info *devinfo,
1160 brw_compact_inst *dst, const brw_inst *src)
1161 {
1162 assert(devinfo->gen >= 8);
1163
1164 if (devinfo->gen >= 12) {
1165 uint64_t uncompacted = /* 36b/TGL+ */
1166 (brw_inst_bits(src, 95, 92) << 32) | /* 4b */
1167 (brw_inst_bits(src, 90, 88) << 29) | /* 3b */
1168 (brw_inst_bits(src, 82, 80) << 26) | /* 3b */
1169 (brw_inst_bits(src, 50, 50) << 25) | /* 1b */
1170 (brw_inst_bits(src, 48, 48) << 24) | /* 1b */
1171 (brw_inst_bits(src, 42, 40) << 21) | /* 3b */
1172 (brw_inst_bits(src, 39, 39) << 20) | /* 1b */
1173 (brw_inst_bits(src, 38, 36) << 17) | /* 3b */
1174 (brw_inst_bits(src, 34, 34) << 16) | /* 1b */
1175 (brw_inst_bits(src, 33, 33) << 15) | /* 1b */
1176 (brw_inst_bits(src, 32, 32) << 14) | /* 1b */
1177 (brw_inst_bits(src, 31, 31) << 13) | /* 1b */
1178 (brw_inst_bits(src, 28, 28) << 12) | /* 1b */
1179 (brw_inst_bits(src, 27, 24) << 8) | /* 4b */
1180 (brw_inst_bits(src, 23, 23) << 7) | /* 1b */
1181 (brw_inst_bits(src, 22, 22) << 6) | /* 1b */
1182 (brw_inst_bits(src, 21, 19) << 3) | /* 3b */
1183 (brw_inst_bits(src, 18, 16)); /* 3b */
1184
1185 for (unsigned i = 0; i < ARRAY_SIZE(gen12_3src_control_index_table); i++) {
1186 if (gen12_3src_control_index_table[i] == uncompacted) {
1187 brw_compact_inst_set_3src_control_index(devinfo, dst, i);
1188 return true;
1189 }
1190 }
1191 } else {
1192 uint32_t uncompacted = /* 24b/BDW; 26b/CHV/SKL+ */
1193 (brw_inst_bits(src, 34, 32) << 21) | /* 3b */
1194 (brw_inst_bits(src, 28, 8)); /* 21b */
1195
1196 if (devinfo->gen >= 9 || devinfo->is_cherryview) {
1197 uncompacted |=
1198 brw_inst_bits(src, 36, 35) << 24; /* 2b */
1199 }
1200
1201 for (unsigned i = 0; i < ARRAY_SIZE(gen8_3src_control_index_table); i++) {
1202 if (gen8_3src_control_index_table[i] == uncompacted) {
1203 brw_compact_inst_set_3src_control_index(devinfo, dst, i);
1204 return true;
1205 }
1206 }
1207 }
1208
1209 return false;
1210 }
1211
1212 static bool
1213 set_3src_source_index(const struct gen_device_info *devinfo,
1214 brw_compact_inst *dst, const brw_inst *src)
1215 {
1216 assert(devinfo->gen >= 8);
1217
1218 if (devinfo->gen >= 12) {
1219 uint32_t uncompacted = /* 21b/TGL+ */
1220 (brw_inst_bits(src, 114, 114) << 20) | /* 1b */
1221 (brw_inst_bits(src, 113, 112) << 18) | /* 2b */
1222 (brw_inst_bits(src, 98, 98) << 17) | /* 1b */
1223 (brw_inst_bits(src, 97, 96) << 15) | /* 2b */
1224 (brw_inst_bits(src, 91, 91) << 14) | /* 1b */
1225 (brw_inst_bits(src, 87, 86) << 12) | /* 2b */
1226 (brw_inst_bits(src, 85, 84) << 10) | /* 2b */
1227 (brw_inst_bits(src, 83, 83) << 9) | /* 1b */
1228 (brw_inst_bits(src, 66, 66) << 8) | /* 1b */
1229 (brw_inst_bits(src, 65, 64) << 6) | /* 2b */
1230 (brw_inst_bits(src, 47, 47) << 5) | /* 1b */
1231 (brw_inst_bits(src, 46, 46) << 4) | /* 1b */
1232 (brw_inst_bits(src, 45, 44) << 2) | /* 2b */
1233 (brw_inst_bits(src, 43, 43) << 1) | /* 1b */
1234 (brw_inst_bits(src, 35, 35)); /* 1b */
1235
1236 for (unsigned i = 0; i < ARRAY_SIZE(gen12_3src_source_index_table); i++) {
1237 if (gen12_3src_source_index_table[i] == uncompacted) {
1238 brw_compact_inst_set_3src_source_index(devinfo, dst, i);
1239 return true;
1240 }
1241 }
1242 } else {
1243 uint64_t uncompacted = /* 46b/BDW; 49b/CHV/SKL+ */
1244 (brw_inst_bits(src, 83, 83) << 43) | /* 1b */
1245 (brw_inst_bits(src, 114, 107) << 35) | /* 8b */
1246 (brw_inst_bits(src, 93, 86) << 27) | /* 8b */
1247 (brw_inst_bits(src, 72, 65) << 19) | /* 8b */
1248 (brw_inst_bits(src, 55, 37)); /* 19b */
1249
1250 if (devinfo->gen >= 9 || devinfo->is_cherryview) {
1251 uncompacted |=
1252 (brw_inst_bits(src, 126, 125) << 47) | /* 2b */
1253 (brw_inst_bits(src, 105, 104) << 45) | /* 2b */
1254 (brw_inst_bits(src, 84, 84) << 44); /* 1b */
1255 } else {
1256 uncompacted |=
1257 (brw_inst_bits(src, 125, 125) << 45) | /* 1b */
1258 (brw_inst_bits(src, 104, 104) << 44); /* 1b */
1259 }
1260
1261 for (unsigned i = 0; i < ARRAY_SIZE(gen8_3src_source_index_table); i++) {
1262 if (gen8_3src_source_index_table[i] == uncompacted) {
1263 brw_compact_inst_set_3src_source_index(devinfo, dst, i);
1264 return true;
1265 }
1266 }
1267 }
1268
1269 return false;
1270 }
1271
1272 static bool
1273 set_3src_subreg_index(const struct gen_device_info *devinfo,
1274 brw_compact_inst *dst, const brw_inst *src)
1275 {
1276 assert(devinfo->gen >= 12);
1277
1278 uint32_t uncompacted = /* 20b/TGL+ */
1279 (brw_inst_bits(src, 119, 115) << 15) | /* 5b */
1280 (brw_inst_bits(src, 103, 99) << 10) | /* 5b */
1281 (brw_inst_bits(src, 71, 67) << 5) | /* 5b */
1282 (brw_inst_bits(src, 55, 51)); /* 5b */
1283
1284 for (unsigned i = 0; i < ARRAY_SIZE(gen12_3src_subreg_table); i++) {
1285 if (gen12_3src_subreg_table[i] == uncompacted) {
1286 brw_compact_inst_set_3src_subreg_index(devinfo, dst, i);
1287 return true;
1288 }
1289 }
1290
1291 return false;
1292 }
1293
1294 static bool
1295 has_unmapped_bits(const struct gen_device_info *devinfo, const brw_inst *src)
1296 {
1297 /* EOT can only be mapped on a send if the src1 is an immediate */
1298 if ((brw_inst_opcode(devinfo, src) == BRW_OPCODE_SENDC ||
1299 brw_inst_opcode(devinfo, src) == BRW_OPCODE_SEND) &&
1300 brw_inst_eot(devinfo, src))
1301 return true;
1302
1303 /* Check for instruction bits that don't map to any of the fields of the
1304 * compacted instruction. The instruction cannot be compacted if any of
1305 * them are set. They overlap with:
1306 * - NibCtrl (bit 47 on Gen7, bit 11 on Gen8)
1307 * - Dst.AddrImm[9] (bit 47 on Gen8)
1308 * - Src0.AddrImm[9] (bit 95 on Gen8)
1309 * - Imm64[27:31] (bits 91-95 on Gen7, bit 95 on Gen8)
1310 * - UIP[31] (bit 95 on Gen8)
1311 */
1312 if (devinfo->gen >= 12) {
1313 assert(!brw_inst_bits(src, 7, 7));
1314 return false;
1315 } else if (devinfo->gen >= 8) {
1316 assert(!brw_inst_bits(src, 7, 7));
1317 return brw_inst_bits(src, 95, 95) ||
1318 brw_inst_bits(src, 47, 47) ||
1319 brw_inst_bits(src, 11, 11);
1320 } else {
1321 assert(!brw_inst_bits(src, 7, 7) &&
1322 !(devinfo->gen < 7 && brw_inst_bits(src, 90, 90)));
1323 return brw_inst_bits(src, 95, 91) ||
1324 brw_inst_bits(src, 47, 47);
1325 }
1326 }
1327
1328 static bool
1329 has_3src_unmapped_bits(const struct gen_device_info *devinfo,
1330 const brw_inst *src)
1331 {
1332 /* Check for three-source instruction bits that don't map to any of the
1333 * fields of the compacted instruction. All of them seem to be reserved
1334 * bits currently.
1335 */
1336 if (devinfo->gen >= 12) {
1337 assert(!brw_inst_bits(src, 7, 7));
1338 } else if (devinfo->gen >= 9 || devinfo->is_cherryview) {
1339 assert(!brw_inst_bits(src, 127, 127) &&
1340 !brw_inst_bits(src, 7, 7));
1341 } else {
1342 assert(devinfo->gen >= 8);
1343 assert(!brw_inst_bits(src, 127, 126) &&
1344 !brw_inst_bits(src, 105, 105) &&
1345 !brw_inst_bits(src, 84, 84) &&
1346 !brw_inst_bits(src, 7, 7));
1347
1348 /* Src1Type and Src2Type, used for mixed-precision floating point */
1349 if (brw_inst_bits(src, 36, 35))
1350 return true;
1351 }
1352
1353 return false;
1354 }
1355
1356 static bool
1357 brw_try_compact_3src_instruction(const struct gen_device_info *devinfo,
1358 brw_compact_inst *dst, const brw_inst *src)
1359 {
1360 assert(devinfo->gen >= 8);
1361
1362 if (has_3src_unmapped_bits(devinfo, src))
1363 return false;
1364
1365 #define compact(field) \
1366 brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_##field(devinfo, src))
1367 #define compact_a16(field) \
1368 brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_a16_##field(devinfo, src))
1369
1370 compact(hw_opcode);
1371
1372 if (!set_3src_control_index(devinfo, dst, src))
1373 return false;
1374
1375 if (!set_3src_source_index(devinfo, dst, src))
1376 return false;
1377
1378 if (devinfo->gen >= 12) {
1379 if (!set_3src_subreg_index(devinfo, dst, src))
1380 return false;
1381
1382 compact(swsb);
1383 compact(debug_control);
1384 compact(dst_reg_nr);
1385 compact(src0_reg_nr);
1386 compact(src1_reg_nr);
1387 compact(src2_reg_nr);
1388 } else {
1389 compact(dst_reg_nr);
1390 compact_a16(src0_rep_ctrl);
1391 compact(debug_control);
1392 compact(saturate);
1393 compact_a16(src1_rep_ctrl);
1394 compact_a16(src2_rep_ctrl);
1395 compact(src0_reg_nr);
1396 compact(src1_reg_nr);
1397 compact(src2_reg_nr);
1398 compact_a16(src0_subreg_nr);
1399 compact_a16(src1_subreg_nr);
1400 compact_a16(src2_subreg_nr);
1401 }
1402 brw_compact_inst_set_3src_cmpt_control(devinfo, dst, true);
1403
1404 #undef compact
1405 #undef compact_a16
1406
1407 return true;
1408 }
1409
1410 /* On SNB through ICL, compacted instructions have 12-bits for immediate
1411 * sources, and a 13th bit that's replicated through the high 20 bits.
1412 *
1413 * Effectively this means we get 12-bit integers, 0.0f, and some limited uses
1414 * of packed vectors as compactable immediates.
1415 *
1416 * On TGL+, the high 12-bits of floating-point values (:f and :hf) are encoded
1417 * rather than the low 12-bits. For signed integer the 12th bit is replicated,
1418 * while for unsigned integers it is not.
1419 *
1420 * Returns the compacted immediate, or -1 if immediate cannot be compacted
1421 */
1422 static int
1423 compact_immediate(const struct gen_device_info *devinfo,
1424 enum brw_reg_type type, unsigned imm)
1425 {
1426 if (devinfo->gen >= 12) {
1427 /* 16-bit immediates need to be replicated through the 32-bit immediate
1428 * field
1429 */
1430 switch (type) {
1431 case BRW_REGISTER_TYPE_W:
1432 case BRW_REGISTER_TYPE_UW:
1433 case BRW_REGISTER_TYPE_HF:
1434 if ((imm >> 16) != (imm & 0xffff))
1435 return -1;
1436 break;
1437 default:
1438 break;
1439 }
1440
1441 switch (type) {
1442 case BRW_REGISTER_TYPE_F:
1443 /* We get the high 12-bits as-is; rest must be zero */
1444 if ((imm & 0xfffff) == 0)
1445 return (imm >> 20) & 0xfff;
1446 break;
1447 case BRW_REGISTER_TYPE_HF:
1448 /* We get the high 12-bits as-is; rest must be zero */
1449 if ((imm & 0xf) == 0)
1450 return (imm >> 4) & 0xfff;
1451 break;
1452 case BRW_REGISTER_TYPE_UD:
1453 case BRW_REGISTER_TYPE_VF:
1454 case BRW_REGISTER_TYPE_UV:
1455 case BRW_REGISTER_TYPE_V:
1456 /* We get the low 12-bits as-is; rest must be zero */
1457 if ((imm & 0xfffff000) == 0)
1458 return imm & 0xfff;
1459 break;
1460 case BRW_REGISTER_TYPE_UW:
1461 /* We get the low 12-bits as-is; rest must be zero */
1462 if ((imm & 0xf000) == 0)
1463 return imm & 0xfff;
1464 break;
1465 case BRW_REGISTER_TYPE_D:
1466 /* We get the low 11-bits as-is; 12th is replicated */
1467 if (((int)imm >> 11) == 0 || ((int)imm >> 11) == -1)
1468 return imm & 0xfff;
1469 break;
1470 case BRW_REGISTER_TYPE_W:
1471 /* We get the low 11-bits as-is; 12th is replicated */
1472 if (((short)imm >> 11) == 0 || ((short)imm >> 11) == -1)
1473 return imm & 0xfff;
1474 break;
1475 case BRW_REGISTER_TYPE_NF:
1476 case BRW_REGISTER_TYPE_DF:
1477 case BRW_REGISTER_TYPE_Q:
1478 case BRW_REGISTER_TYPE_UQ:
1479 case BRW_REGISTER_TYPE_B:
1480 case BRW_REGISTER_TYPE_UB:
1481 return -1;
1482 }
1483 } else {
1484 /* We get the low 12 bits as-is; 13th is replicated */
1485 if (((int)imm >> 12) == 0 || ((int)imm >> 12 == -1)) {
1486 return imm & 0x1fff;
1487 }
1488 }
1489
1490 return -1;
1491 }
1492
1493 static int
1494 uncompact_immediate(const struct gen_device_info *devinfo,
1495 enum brw_reg_type type, unsigned compact_imm)
1496 {
1497 if (devinfo->gen >= 12) {
1498 switch (type) {
1499 case BRW_REGISTER_TYPE_F:
1500 return compact_imm << 20;
1501 case BRW_REGISTER_TYPE_HF:
1502 return (compact_imm << 20) | (compact_imm << 4);
1503 case BRW_REGISTER_TYPE_UD:
1504 case BRW_REGISTER_TYPE_VF:
1505 case BRW_REGISTER_TYPE_UV:
1506 case BRW_REGISTER_TYPE_V:
1507 return compact_imm;
1508 case BRW_REGISTER_TYPE_UW:
1509 /* Replicate */
1510 return compact_imm << 16 | compact_imm;
1511 case BRW_REGISTER_TYPE_D:
1512 /* Extend the 12th bit into the high 20 bits */
1513 return (int)(compact_imm << 20) >> 20;
1514 case BRW_REGISTER_TYPE_W:
1515 /* Extend the 12th bit into the high 4 bits and replicate */
1516 return ( (int)(compact_imm << 20) >> 4) |
1517 ((short)(compact_imm << 4) >> 4);
1518 case BRW_REGISTER_TYPE_NF:
1519 case BRW_REGISTER_TYPE_DF:
1520 case BRW_REGISTER_TYPE_Q:
1521 case BRW_REGISTER_TYPE_UQ:
1522 case BRW_REGISTER_TYPE_B:
1523 case BRW_REGISTER_TYPE_UB:
1524 unreachable("not reached");
1525 }
1526 } else {
1527 /* Replicate the 13th bit into the high 19 bits */
1528 return (int)(compact_imm << 19) >> 19;
1529 }
1530
1531 unreachable("not reached");
1532 }
1533
1534 static bool
1535 has_immediate(const struct gen_device_info *devinfo, const brw_inst *inst,
1536 enum brw_reg_type *type)
1537 {
1538 if (brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) {
1539 *type = brw_inst_src0_type(devinfo, inst);
1540 return *type != INVALID_REG_TYPE;
1541 } else if (brw_inst_src1_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) {
1542 *type = brw_inst_src1_type(devinfo, inst);
1543 return *type != INVALID_REG_TYPE;
1544 }
1545
1546 return false;
1547 }
1548
1549 /**
1550 * Applies some small changes to instruction types to increase chances of
1551 * compaction.
1552 */
1553 static brw_inst
1554 precompact(const struct gen_device_info *devinfo, brw_inst inst)
1555 {
1556 if (brw_inst_src0_reg_file(devinfo, &inst) != BRW_IMMEDIATE_VALUE)
1557 return inst;
1558
1559 /* The Bspec's section titled "Non-present Operands" claims that if src0
1560 * is an immediate that src1's type must be the same as that of src0.
1561 *
1562 * The SNB+ DataTypeIndex instruction compaction tables contain mappings
1563 * that do not follow this rule. E.g., from the IVB/HSW table:
1564 *
1565 * DataTypeIndex 18-Bit Mapping Mapped Meaning
1566 * 3 001000001011111101 r:f | i:vf | a:ud | <1> | dir |
1567 *
1568 * And from the SNB table:
1569 *
1570 * DataTypeIndex 18-Bit Mapping Mapped Meaning
1571 * 8 001000000111101100 a:w | i:w | a:ud | <1> | dir |
1572 *
1573 * Neither of these cause warnings from the simulator when used,
1574 * compacted or otherwise. In fact, all compaction mappings that have an
1575 * immediate in src0 use a:ud for src1.
1576 *
1577 * The GM45 instruction compaction tables do not contain mapped meanings
1578 * so it's not clear whether it has the restriction. We'll assume it was
1579 * lifted on SNB. (FINISHME: decode the GM45 tables and check.)
1580 *
1581 * Don't do any of this for 64-bit immediates, since the src1 fields
1582 * overlap with the immediate and setting them would overwrite the
1583 * immediate we set.
1584 */
1585 if (devinfo->gen >= 6 &&
1586 !(devinfo->is_haswell &&
1587 brw_inst_opcode(devinfo, &inst) == BRW_OPCODE_DIM) &&
1588 !(devinfo->gen >= 8 &&
1589 (brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_DF ||
1590 brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_UQ ||
1591 brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_Q))) {
1592 brw_inst_set_src1_reg_hw_type(devinfo, &inst, 0);
1593 }
1594
1595 /* Compacted instructions only have 12-bits (plus 1 for the other 20)
1596 * for immediate values. Presumably the hardware engineers realized
1597 * that the only useful floating-point value that could be represented
1598 * in this format is 0.0, which can also be represented as a VF-typed
1599 * immediate, so they gave us the previously mentioned mapping on IVB+.
1600 *
1601 * Strangely, we do have a mapping for imm:f in src1, so we don't need
1602 * to do this there.
1603 *
1604 * If we see a 0.0:F, change the type to VF so that it can be compacted.
1605 *
1606 * Compaction of floating-point immediates is improved on Gen12, thus
1607 * removing the need for this.
1608 */
1609 if (devinfo->gen < 12 &&
1610 brw_inst_imm_ud(devinfo, &inst) == 0x0 &&
1611 brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_F &&
1612 brw_inst_dst_type(devinfo, &inst) == BRW_REGISTER_TYPE_F &&
1613 brw_inst_dst_hstride(devinfo, &inst) == BRW_HORIZONTAL_STRIDE_1) {
1614 enum brw_reg_file file = brw_inst_src0_reg_file(devinfo, &inst);
1615 brw_inst_set_src0_file_type(devinfo, &inst, file, BRW_REGISTER_TYPE_VF);
1616 }
1617
1618 /* There are no mappings for dst:d | i:d, so if the immediate is suitable
1619 * set the types to :UD so the instruction can be compacted.
1620 *
1621 * FINISHME: Use dst:f | imm:f on Gen12
1622 */
1623 if (devinfo->gen < 12 &&
1624 compact_immediate(devinfo, BRW_REGISTER_TYPE_D,
1625 brw_inst_imm_ud(devinfo, &inst)) != -1 &&
1626 brw_inst_cond_modifier(devinfo, &inst) == BRW_CONDITIONAL_NONE &&
1627 brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_D &&
1628 brw_inst_dst_type(devinfo, &inst) == BRW_REGISTER_TYPE_D) {
1629 enum brw_reg_file src_file = brw_inst_src0_reg_file(devinfo, &inst);
1630 enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, &inst);
1631
1632 brw_inst_set_src0_file_type(devinfo, &inst, src_file, BRW_REGISTER_TYPE_UD);
1633 brw_inst_set_dst_file_type(devinfo, &inst, dst_file, BRW_REGISTER_TYPE_UD);
1634 }
1635
1636 return inst;
1637 }
1638
1639 /**
1640 * Tries to compact instruction src into dst.
1641 *
1642 * It doesn't modify dst unless src is compactable, which is relied on by
1643 * brw_compact_instructions().
1644 */
1645 static bool
1646 try_compact_instruction(const struct compaction_state *c,
1647 brw_compact_inst *dst, const brw_inst *src)
1648 {
1649 const struct gen_device_info *devinfo = c->devinfo;
1650 brw_compact_inst temp;
1651
1652 assert(brw_inst_cmpt_control(devinfo, src) == 0);
1653
1654 if (is_3src(devinfo, brw_inst_opcode(devinfo, src))) {
1655 if (devinfo->gen >= 8) {
1656 memset(&temp, 0, sizeof(temp));
1657 if (brw_try_compact_3src_instruction(devinfo, &temp, src)) {
1658 *dst = temp;
1659 return true;
1660 } else {
1661 return false;
1662 }
1663 } else {
1664 return false;
1665 }
1666 }
1667
1668 enum brw_reg_type type;
1669 bool is_immediate = has_immediate(devinfo, src, &type);
1670
1671 unsigned compacted_imm = 0;
1672
1673 if (is_immediate) {
1674 /* Instructions with immediates cannot be compacted on Gen < 6 */
1675 if (devinfo->gen < 6)
1676 return false;
1677
1678 compacted_imm = compact_immediate(devinfo, type,
1679 brw_inst_imm_ud(devinfo, src));
1680 if (compacted_imm == -1)
1681 return false;
1682 }
1683
1684 if (has_unmapped_bits(devinfo, src))
1685 return false;
1686
1687 memset(&temp, 0, sizeof(temp));
1688
1689 #define compact(field) \
1690 brw_compact_inst_set_##field(devinfo, &temp, brw_inst_##field(devinfo, src))
1691 #define compact_reg(field) \
1692 brw_compact_inst_set_##field##_reg_nr(devinfo, &temp, \
1693 brw_inst_##field##_da_reg_nr(devinfo, src))
1694
1695 compact(hw_opcode);
1696 compact(debug_control);
1697
1698 if (!set_control_index(c, &temp, src))
1699 return false;
1700 if (!set_datatype_index(c, &temp, src, is_immediate))
1701 return false;
1702 if (!set_subreg_index(c, &temp, src, is_immediate))
1703 return false;
1704 if (!set_src0_index(c, &temp, src))
1705 return false;
1706 if (!set_src1_index(c, &temp, src, is_immediate, compacted_imm))
1707 return false;
1708
1709 if (devinfo->gen >= 12) {
1710 compact(swsb);
1711 compact_reg(dst);
1712 compact_reg(src0);
1713
1714 if (is_immediate) {
1715 /* src1 reg takes the high 8 bits (of the 12-bit compacted value) */
1716 brw_compact_inst_set_src1_reg_nr(devinfo, &temp, compacted_imm >> 4);
1717 } else {
1718 compact_reg(src1);
1719 }
1720 } else {
1721 if (devinfo->gen >= 6) {
1722 compact(acc_wr_control);
1723 } else {
1724 compact(mask_control_ex);
1725 }
1726
1727 if (devinfo->gen <= 6)
1728 compact(flag_subreg_nr);
1729
1730 compact(cond_modifier);
1731
1732 compact_reg(dst);
1733 compact_reg(src0);
1734
1735 if (is_immediate) {
1736 /* src1 reg takes the low 8 bits (of the 13-bit compacted value) */
1737 brw_compact_inst_set_src1_reg_nr(devinfo, &temp, compacted_imm & 0xff);
1738 } else {
1739 compact_reg(src1);
1740 }
1741 }
1742 brw_compact_inst_set_cmpt_control(devinfo, &temp, true);
1743
1744 #undef compact
1745 #undef compact_reg
1746
1747 *dst = temp;
1748
1749 return true;
1750 }
1751
1752 bool
1753 brw_try_compact_instruction(const struct gen_device_info *devinfo,
1754 brw_compact_inst *dst, const brw_inst *src)
1755 {
1756 struct compaction_state c;
1757 compaction_state_init(&c, devinfo);
1758 return try_compact_instruction(&c, dst, src);
1759 }
1760
1761 static void
1762 set_uncompacted_control(const struct compaction_state *c, brw_inst *dst,
1763 brw_compact_inst *src)
1764 {
1765 const struct gen_device_info *devinfo = c->devinfo;
1766 uint32_t uncompacted =
1767 c->control_index_table[brw_compact_inst_control_index(devinfo, src)];
1768
1769 if (devinfo->gen >= 12) {
1770 brw_inst_set_bits(dst, 95, 92, (uncompacted >> 17));
1771 brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
1772 brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
1773 brw_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
1774 brw_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
1775 brw_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
1776 brw_inst_set_bits(dst, 27, 24, (uncompacted >> 8) & 0xf);
1777 brw_inst_set_bits(dst, 23, 22, (uncompacted >> 6) & 0x3);
1778 brw_inst_set_bits(dst, 21, 19, (uncompacted >> 3) & 0x7);
1779 brw_inst_set_bits(dst, 18, 16, (uncompacted >> 0) & 0x7);
1780 } else if (devinfo->gen >= 8) {
1781 brw_inst_set_bits(dst, 33, 31, (uncompacted >> 16));
1782 brw_inst_set_bits(dst, 23, 12, (uncompacted >> 4) & 0xfff);
1783 brw_inst_set_bits(dst, 10, 9, (uncompacted >> 2) & 0x3);
1784 brw_inst_set_bits(dst, 34, 34, (uncompacted >> 1) & 0x1);
1785 brw_inst_set_bits(dst, 8, 8, (uncompacted >> 0) & 0x1);
1786 } else {
1787 brw_inst_set_bits(dst, 31, 31, (uncompacted >> 16) & 0x1);
1788 brw_inst_set_bits(dst, 23, 8, (uncompacted & 0xffff));
1789
1790 if (devinfo->gen == 7)
1791 brw_inst_set_bits(dst, 90, 89, uncompacted >> 17);
1792 }
1793 }
1794
1795 static void
1796 set_uncompacted_datatype(const struct compaction_state *c, brw_inst *dst,
1797 brw_compact_inst *src)
1798 {
1799 const struct gen_device_info *devinfo = c->devinfo;
1800 uint32_t uncompacted =
1801 c->datatype_table[brw_compact_inst_datatype_index(devinfo, src)];
1802
1803 if (devinfo->gen >= 12) {
1804 brw_inst_set_bits(dst, 98, 98, (uncompacted >> 19));
1805 brw_inst_set_bits(dst, 91, 88, (uncompacted >> 15) & 0xf);
1806 brw_inst_set_bits(dst, 66, 66, (uncompacted >> 14) & 0x1);
1807 brw_inst_set_bits(dst, 50, 50, (uncompacted >> 13) & 0x1);
1808 brw_inst_set_bits(dst, 49, 48, (uncompacted >> 11) & 0x3);
1809 brw_inst_set_bits(dst, 47, 47, (uncompacted >> 10) & 0x1);
1810 brw_inst_set_bits(dst, 46, 46, (uncompacted >> 9) & 0x1);
1811 brw_inst_set_bits(dst, 43, 40, (uncompacted >> 5) & 0xf);
1812 brw_inst_set_bits(dst, 39, 36, (uncompacted >> 1) & 0xf);
1813 brw_inst_set_bits(dst, 35, 35, (uncompacted >> 0) & 0x1);
1814 } else if (devinfo->gen >= 8) {
1815 brw_inst_set_bits(dst, 63, 61, (uncompacted >> 18));
1816 brw_inst_set_bits(dst, 94, 89, (uncompacted >> 12) & 0x3f);
1817 brw_inst_set_bits(dst, 46, 35, (uncompacted >> 0) & 0xfff);
1818 } else {
1819 brw_inst_set_bits(dst, 63, 61, (uncompacted >> 15));
1820 brw_inst_set_bits(dst, 46, 32, (uncompacted & 0x7fff));
1821 }
1822 }
1823
1824 static void
1825 set_uncompacted_subreg(const struct compaction_state *c, brw_inst *dst,
1826 brw_compact_inst *src)
1827 {
1828 const struct gen_device_info *devinfo = c->devinfo;
1829 uint16_t uncompacted =
1830 c->subreg_table[brw_compact_inst_subreg_index(devinfo, src)];
1831
1832 if (devinfo->gen >= 12) {
1833 brw_inst_set_bits(dst, 103, 99, (uncompacted >> 10));
1834 brw_inst_set_bits(dst, 71, 67, (uncompacted >> 5) & 0x1f);
1835 brw_inst_set_bits(dst, 55, 51, (uncompacted >> 0) & 0x1f);
1836 } else {
1837 brw_inst_set_bits(dst, 100, 96, (uncompacted >> 10));
1838 brw_inst_set_bits(dst, 68, 64, (uncompacted >> 5) & 0x1f);
1839 brw_inst_set_bits(dst, 52, 48, (uncompacted >> 0) & 0x1f);
1840 }
1841 }
1842
1843 static void
1844 set_uncompacted_src0(const struct compaction_state *c, brw_inst *dst,
1845 brw_compact_inst *src)
1846 {
1847 const struct gen_device_info *devinfo = c->devinfo;
1848 uint32_t compacted = brw_compact_inst_src0_index(devinfo, src);
1849 uint16_t uncompacted = c->src0_index_table[compacted];
1850
1851 if (devinfo->gen >= 12) {
1852 brw_inst_set_bits(dst, 87, 84, (uncompacted >> 8));
1853 brw_inst_set_bits(dst, 83, 81, (uncompacted >> 5) & 0x7);
1854 brw_inst_set_bits(dst, 80, 80, (uncompacted >> 4) & 0x1);
1855 brw_inst_set_bits(dst, 65, 64, (uncompacted >> 2) & 0x3);
1856 brw_inst_set_bits(dst, 45, 44, (uncompacted >> 0) & 0x3);
1857 } else {
1858 brw_inst_set_bits(dst, 88, 77, uncompacted);
1859 }
1860 }
1861
1862 static void
1863 set_uncompacted_src1(const struct compaction_state *c, brw_inst *dst,
1864 brw_compact_inst *src)
1865 {
1866 const struct gen_device_info *devinfo = c->devinfo;
1867 uint16_t uncompacted =
1868 c->src1_index_table[brw_compact_inst_src1_index(devinfo, src)];
1869
1870 if (devinfo->gen >= 12) {
1871 brw_inst_set_bits(dst, 121, 120, (uncompacted >> 10));
1872 brw_inst_set_bits(dst, 119, 116, (uncompacted >> 6) & 0xf);
1873 brw_inst_set_bits(dst, 115, 113, (uncompacted >> 3) & 0x7);
1874 brw_inst_set_bits(dst, 112, 112, (uncompacted >> 2) & 0x1);
1875 brw_inst_set_bits(dst, 97, 96, (uncompacted >> 0) & 0x3);
1876 } else {
1877 brw_inst_set_bits(dst, 120, 109, uncompacted);
1878 }
1879 }
1880
1881 static void
1882 set_uncompacted_3src_control_index(const struct compaction_state *c,
1883 brw_inst *dst, brw_compact_inst *src)
1884 {
1885 const struct gen_device_info *devinfo = c->devinfo;
1886 assert(devinfo->gen >= 8);
1887
1888 if (devinfo->gen >= 12) {
1889 uint64_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
1890 uint64_t uncompacted = gen12_3src_control_index_table[compacted];
1891
1892 brw_inst_set_bits(dst, 95, 92, (uncompacted >> 32));
1893 brw_inst_set_bits(dst, 90, 88, (uncompacted >> 29) & 0x7);
1894 brw_inst_set_bits(dst, 82, 80, (uncompacted >> 26) & 0x7);
1895 brw_inst_set_bits(dst, 50, 50, (uncompacted >> 25) & 0x1);
1896 brw_inst_set_bits(dst, 48, 48, (uncompacted >> 24) & 0x1);
1897 brw_inst_set_bits(dst, 42, 40, (uncompacted >> 21) & 0x7);
1898 brw_inst_set_bits(dst, 39, 39, (uncompacted >> 20) & 0x1);
1899 brw_inst_set_bits(dst, 38, 36, (uncompacted >> 17) & 0x7);
1900 brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
1901 brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
1902 brw_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
1903 brw_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
1904 brw_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
1905 brw_inst_set_bits(dst, 27, 24, (uncompacted >> 8) & 0xf);
1906 brw_inst_set_bits(dst, 23, 23, (uncompacted >> 7) & 0x1);
1907 brw_inst_set_bits(dst, 22, 22, (uncompacted >> 6) & 0x1);
1908 brw_inst_set_bits(dst, 21, 19, (uncompacted >> 3) & 0x7);
1909 brw_inst_set_bits(dst, 18, 16, (uncompacted >> 0) & 0x7);
1910 } else {
1911 uint32_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
1912 uint32_t uncompacted = gen8_3src_control_index_table[compacted];
1913
1914 brw_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7);
1915 brw_inst_set_bits(dst, 28, 8, (uncompacted >> 0) & 0x1fffff);
1916
1917 if (devinfo->gen >= 9 || devinfo->is_cherryview)
1918 brw_inst_set_bits(dst, 36, 35, (uncompacted >> 24) & 0x3);
1919 }
1920 }
1921
1922 static void
1923 set_uncompacted_3src_source_index(const struct gen_device_info *devinfo,
1924 brw_inst *dst, brw_compact_inst *src)
1925 {
1926 assert(devinfo->gen >= 8);
1927
1928 uint32_t compacted = brw_compact_inst_3src_source_index(devinfo, src);
1929
1930 if (devinfo->gen >= 12) {
1931 uint32_t uncompacted = gen12_3src_source_index_table[compacted];
1932
1933 brw_inst_set_bits(dst, 114, 114, (uncompacted >> 20));
1934 brw_inst_set_bits(dst, 113, 112, (uncompacted >> 18) & 0x3);
1935 brw_inst_set_bits(dst, 98, 98, (uncompacted >> 17) & 0x1);
1936 brw_inst_set_bits(dst, 97, 96, (uncompacted >> 15) & 0x3);
1937 brw_inst_set_bits(dst, 91, 91, (uncompacted >> 14) & 0x1);
1938 brw_inst_set_bits(dst, 87, 86, (uncompacted >> 12) & 0x3);
1939 brw_inst_set_bits(dst, 85, 84, (uncompacted >> 10) & 0x3);
1940 brw_inst_set_bits(dst, 83, 83, (uncompacted >> 9) & 0x1);
1941 brw_inst_set_bits(dst, 66, 66, (uncompacted >> 8) & 0x1);
1942 brw_inst_set_bits(dst, 65, 64, (uncompacted >> 6) & 0x3);
1943 brw_inst_set_bits(dst, 47, 47, (uncompacted >> 5) & 0x1);
1944 brw_inst_set_bits(dst, 46, 46, (uncompacted >> 4) & 0x1);
1945 brw_inst_set_bits(dst, 45, 44, (uncompacted >> 2) & 0x3);
1946 brw_inst_set_bits(dst, 43, 43, (uncompacted >> 1) & 0x1);
1947 brw_inst_set_bits(dst, 35, 35, (uncompacted >> 0) & 0x1);
1948 } else {
1949 uint64_t uncompacted = gen8_3src_source_index_table[compacted];
1950
1951 brw_inst_set_bits(dst, 83, 83, (uncompacted >> 43) & 0x1);
1952 brw_inst_set_bits(dst, 114, 107, (uncompacted >> 35) & 0xff);
1953 brw_inst_set_bits(dst, 93, 86, (uncompacted >> 27) & 0xff);
1954 brw_inst_set_bits(dst, 72, 65, (uncompacted >> 19) & 0xff);
1955 brw_inst_set_bits(dst, 55, 37, (uncompacted >> 0) & 0x7ffff);
1956
1957 if (devinfo->gen >= 9 || devinfo->is_cherryview) {
1958 brw_inst_set_bits(dst, 126, 125, (uncompacted >> 47) & 0x3);
1959 brw_inst_set_bits(dst, 105, 104, (uncompacted >> 45) & 0x3);
1960 brw_inst_set_bits(dst, 84, 84, (uncompacted >> 44) & 0x1);
1961 } else {
1962 brw_inst_set_bits(dst, 125, 125, (uncompacted >> 45) & 0x1);
1963 brw_inst_set_bits(dst, 104, 104, (uncompacted >> 44) & 0x1);
1964 }
1965 }
1966 }
1967
1968 static void
1969 set_uncompacted_3src_subreg_index(const struct gen_device_info *devinfo,
1970 brw_inst *dst, brw_compact_inst *src)
1971 {
1972 assert(devinfo->gen >= 12);
1973
1974 uint32_t compacted = brw_compact_inst_3src_subreg_index(devinfo, src);
1975 uint32_t uncompacted = gen12_3src_subreg_table[compacted];
1976
1977 brw_inst_set_bits(dst, 119, 115, (uncompacted >> 15));
1978 brw_inst_set_bits(dst, 103, 99, (uncompacted >> 10) & 0x1f);
1979 brw_inst_set_bits(dst, 71, 67, (uncompacted >> 5) & 0x1f);
1980 brw_inst_set_bits(dst, 55, 51, (uncompacted >> 0) & 0x1f);
1981 }
1982
1983 static void
1984 brw_uncompact_3src_instruction(const struct compaction_state *c,
1985 brw_inst *dst, brw_compact_inst *src)
1986 {
1987 const struct gen_device_info *devinfo = c->devinfo;
1988 assert(devinfo->gen >= 8);
1989
1990 #define uncompact(field) \
1991 brw_inst_set_3src_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
1992 #define uncompact_a16(field) \
1993 brw_inst_set_3src_a16_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
1994
1995 uncompact(hw_opcode);
1996
1997 if (devinfo->gen >= 12) {
1998 set_uncompacted_3src_control_index(c, dst, src);
1999 set_uncompacted_3src_source_index(devinfo, dst, src);
2000 set_uncompacted_3src_subreg_index(devinfo, dst, src);
2001
2002 uncompact(debug_control);
2003 uncompact(swsb);
2004 uncompact(dst_reg_nr);
2005 uncompact(src0_reg_nr);
2006 uncompact(src1_reg_nr);
2007 uncompact(src2_reg_nr);
2008 } else {
2009 set_uncompacted_3src_control_index(c, dst, src);
2010 set_uncompacted_3src_source_index(devinfo, dst, src);
2011
2012 uncompact(dst_reg_nr);
2013 uncompact_a16(src0_rep_ctrl);
2014 uncompact(debug_control);
2015 uncompact(saturate);
2016 uncompact_a16(src1_rep_ctrl);
2017 uncompact_a16(src2_rep_ctrl);
2018 uncompact(src0_reg_nr);
2019 uncompact(src1_reg_nr);
2020 uncompact(src2_reg_nr);
2021 uncompact_a16(src0_subreg_nr);
2022 uncompact_a16(src1_subreg_nr);
2023 uncompact_a16(src2_subreg_nr);
2024 }
2025 brw_inst_set_3src_cmpt_control(devinfo, dst, false);
2026
2027 #undef uncompact
2028 #undef uncompact_a16
2029 }
2030
2031 static void
2032 uncompact_instruction(const struct compaction_state *c, brw_inst *dst,
2033 brw_compact_inst *src)
2034 {
2035 const struct gen_device_info *devinfo = c->devinfo;
2036 memset(dst, 0, sizeof(*dst));
2037
2038 if (devinfo->gen >= 8 &&
2039 is_3src(devinfo, brw_opcode_decode(
2040 devinfo, brw_compact_inst_3src_hw_opcode(devinfo, src)))) {
2041 brw_uncompact_3src_instruction(c, dst, src);
2042 return;
2043 }
2044
2045 #define uncompact(field) \
2046 brw_inst_set_##field(devinfo, dst, brw_compact_inst_##field(devinfo, src))
2047 #define uncompact_reg(field) \
2048 brw_inst_set_##field##_da_reg_nr(devinfo, dst, \
2049 brw_compact_inst_##field##_reg_nr(devinfo, src))
2050
2051 uncompact(hw_opcode);
2052 uncompact(debug_control);
2053
2054 set_uncompacted_control(c, dst, src);
2055 set_uncompacted_datatype(c, dst, src);
2056 set_uncompacted_subreg(c, dst, src);
2057 set_uncompacted_src0(c, dst, src);
2058
2059 enum brw_reg_type type;
2060 if (has_immediate(devinfo, dst, &type)) {
2061 unsigned imm = uncompact_immediate(devinfo, type,
2062 brw_compact_inst_imm(devinfo, src));
2063 brw_inst_set_imm_ud(devinfo, dst, imm);
2064 } else {
2065 set_uncompacted_src1(c, dst, src);
2066 uncompact_reg(src1);
2067 }
2068
2069 if (devinfo->gen >= 12) {
2070 uncompact(swsb);
2071 uncompact_reg(dst);
2072 uncompact_reg(src0);
2073 } else {
2074 if (devinfo->gen >= 6) {
2075 uncompact(acc_wr_control);
2076 } else {
2077 uncompact(mask_control_ex);
2078 }
2079
2080 uncompact(cond_modifier);
2081
2082 if (devinfo->gen <= 6)
2083 uncompact(flag_subreg_nr);
2084
2085 uncompact_reg(dst);
2086 uncompact_reg(src0);
2087 }
2088 brw_inst_set_cmpt_control(devinfo, dst, false);
2089
2090 #undef uncompact
2091 #undef uncompact_reg
2092 }
2093
2094 void
2095 brw_uncompact_instruction(const struct gen_device_info *devinfo, brw_inst *dst,
2096 brw_compact_inst *src)
2097 {
2098 struct compaction_state c;
2099 compaction_state_init(&c, devinfo);
2100 uncompact_instruction(&c, dst, src);
2101 }
2102
2103 void brw_debug_compact_uncompact(const struct gen_device_info *devinfo,
2104 brw_inst *orig,
2105 brw_inst *uncompacted)
2106 {
2107 fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n",
2108 devinfo->gen);
2109
2110 fprintf(stderr, " before: ");
2111 brw_disassemble_inst(stderr, devinfo, orig, true, 0, NULL);
2112
2113 fprintf(stderr, " after: ");
2114 brw_disassemble_inst(stderr, devinfo, uncompacted, false, 0, NULL);
2115
2116 uint32_t *before_bits = (uint32_t *)orig;
2117 uint32_t *after_bits = (uint32_t *)uncompacted;
2118 fprintf(stderr, " changed bits:\n");
2119 for (int i = 0; i < 128; i++) {
2120 uint32_t before = before_bits[i / 32] & (1 << (i & 31));
2121 uint32_t after = after_bits[i / 32] & (1 << (i & 31));
2122
2123 if (before != after) {
2124 fprintf(stderr, " bit %d, %s to %s\n", i,
2125 before ? "set" : "unset",
2126 after ? "set" : "unset");
2127 }
2128 }
2129 }
2130
2131 static int
2132 compacted_between(int old_ip, int old_target_ip, int *compacted_counts)
2133 {
2134 int this_compacted_count = compacted_counts[old_ip];
2135 int target_compacted_count = compacted_counts[old_target_ip];
2136 return target_compacted_count - this_compacted_count;
2137 }
2138
2139 static void
2140 update_uip_jip(const struct gen_device_info *devinfo, brw_inst *insn,
2141 int this_old_ip, int *compacted_counts)
2142 {
2143 /* JIP and UIP are in units of:
2144 * - bytes on Gen8+; and
2145 * - compacted instructions on Gen6+.
2146 */
2147 int shift = devinfo->gen >= 8 ? 3 : 0;
2148
2149 int32_t jip_compacted = brw_inst_jip(devinfo, insn) >> shift;
2150 jip_compacted -= compacted_between(this_old_ip,
2151 this_old_ip + (jip_compacted / 2),
2152 compacted_counts);
2153 brw_inst_set_jip(devinfo, insn, jip_compacted << shift);
2154
2155 if (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_ENDIF ||
2156 brw_inst_opcode(devinfo, insn) == BRW_OPCODE_WHILE ||
2157 (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_ELSE && devinfo->gen <= 7))
2158 return;
2159
2160 int32_t uip_compacted = brw_inst_uip(devinfo, insn) >> shift;
2161 uip_compacted -= compacted_between(this_old_ip,
2162 this_old_ip + (uip_compacted / 2),
2163 compacted_counts);
2164 brw_inst_set_uip(devinfo, insn, uip_compacted << shift);
2165 }
2166
2167 static void
2168 update_gen4_jump_count(const struct gen_device_info *devinfo, brw_inst *insn,
2169 int this_old_ip, int *compacted_counts)
2170 {
2171 assert(devinfo->gen == 5 || devinfo->is_g4x);
2172
2173 /* Jump Count is in units of:
2174 * - uncompacted instructions on G45; and
2175 * - compacted instructions on Gen5.
2176 */
2177 int shift = devinfo->is_g4x ? 1 : 0;
2178
2179 int jump_count_compacted = brw_inst_gen4_jump_count(devinfo, insn) << shift;
2180
2181 int target_old_ip = this_old_ip + (jump_count_compacted / 2);
2182
2183 int this_compacted_count = compacted_counts[this_old_ip];
2184 int target_compacted_count = compacted_counts[target_old_ip];
2185
2186 jump_count_compacted -= (target_compacted_count - this_compacted_count);
2187 brw_inst_set_gen4_jump_count(devinfo, insn, jump_count_compacted >> shift);
2188 }
2189
2190 static void
2191 compaction_state_init(struct compaction_state *c,
2192 const struct gen_device_info *devinfo)
2193 {
2194 assert(g45_control_index_table[ARRAY_SIZE(g45_control_index_table) - 1] != 0);
2195 assert(g45_datatype_table[ARRAY_SIZE(g45_datatype_table) - 1] != 0);
2196 assert(g45_subreg_table[ARRAY_SIZE(g45_subreg_table) - 1] != 0);
2197 assert(g45_src_index_table[ARRAY_SIZE(g45_src_index_table) - 1] != 0);
2198 assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0);
2199 assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0);
2200 assert(gen6_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0);
2201 assert(gen6_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0);
2202 assert(gen7_control_index_table[ARRAY_SIZE(gen7_control_index_table) - 1] != 0);
2203 assert(gen7_datatype_table[ARRAY_SIZE(gen7_datatype_table) - 1] != 0);
2204 assert(gen7_subreg_table[ARRAY_SIZE(gen7_subreg_table) - 1] != 0);
2205 assert(gen7_src_index_table[ARRAY_SIZE(gen7_src_index_table) - 1] != 0);
2206 assert(gen8_control_index_table[ARRAY_SIZE(gen8_control_index_table) - 1] != 0);
2207 assert(gen8_datatype_table[ARRAY_SIZE(gen8_datatype_table) - 1] != 0);
2208 assert(gen8_subreg_table[ARRAY_SIZE(gen8_subreg_table) - 1] != 0);
2209 assert(gen8_src_index_table[ARRAY_SIZE(gen8_src_index_table) - 1] != 0);
2210 assert(gen11_datatype_table[ARRAY_SIZE(gen11_datatype_table) - 1] != 0);
2211 assert(gen12_control_index_table[ARRAY_SIZE(gen12_control_index_table) - 1] != 0);
2212 assert(gen12_datatype_table[ARRAY_SIZE(gen12_datatype_table) - 1] != 0);
2213 assert(gen12_subreg_table[ARRAY_SIZE(gen12_subreg_table) - 1] != 0);
2214 assert(gen12_src0_index_table[ARRAY_SIZE(gen12_src0_index_table) - 1] != 0);
2215 assert(gen12_src1_index_table[ARRAY_SIZE(gen12_src1_index_table) - 1] != 0);
2216
2217 c->devinfo = devinfo;
2218 switch (devinfo->gen) {
2219 case 12:
2220 c->control_index_table = gen12_control_index_table;;
2221 c->datatype_table = gen12_datatype_table;
2222 c->subreg_table = gen12_subreg_table;
2223 c->src0_index_table = gen12_src0_index_table;
2224 c->src1_index_table = gen12_src1_index_table;
2225 break;
2226 case 11:
2227 c->control_index_table = gen8_control_index_table;
2228 c->datatype_table = gen11_datatype_table;
2229 c->subreg_table = gen8_subreg_table;
2230 c->src0_index_table = gen8_src_index_table;
2231 c->src1_index_table = gen8_src_index_table;
2232 break;
2233 case 10:
2234 case 9:
2235 case 8:
2236 c->control_index_table = gen8_control_index_table;
2237 c->datatype_table = gen8_datatype_table;
2238 c->subreg_table = gen8_subreg_table;
2239 c->src0_index_table = gen8_src_index_table;
2240 c->src1_index_table = gen8_src_index_table;
2241 break;
2242 case 7:
2243 c->control_index_table = gen7_control_index_table;
2244 c->datatype_table = gen7_datatype_table;
2245 c->subreg_table = gen7_subreg_table;
2246 c->src0_index_table = gen7_src_index_table;
2247 c->src1_index_table = gen7_src_index_table;
2248 break;
2249 case 6:
2250 c->control_index_table = gen6_control_index_table;
2251 c->datatype_table = gen6_datatype_table;
2252 c->subreg_table = gen6_subreg_table;
2253 c->src0_index_table = gen6_src_index_table;
2254 c->src1_index_table = gen6_src_index_table;
2255 break;
2256 case 5:
2257 case 4:
2258 c->control_index_table = g45_control_index_table;
2259 c->datatype_table = g45_datatype_table;
2260 c->subreg_table = g45_subreg_table;
2261 c->src0_index_table = g45_src_index_table;
2262 c->src1_index_table = g45_src_index_table;
2263 break;
2264 default:
2265 unreachable("unknown generation");
2266 }
2267 }
2268
2269 void
2270 brw_compact_instructions(struct brw_codegen *p, int start_offset,
2271 struct disasm_info *disasm)
2272 {
2273 if (unlikely(INTEL_DEBUG & DEBUG_NO_COMPACTION))
2274 return;
2275
2276 const struct gen_device_info *devinfo = p->devinfo;
2277 void *store = p->store + start_offset / 16;
2278 /* For an instruction at byte offset 16*i before compaction, this is the
2279 * number of compacted instructions minus the number of padding NOP/NENOPs
2280 * that preceded it.
2281 */
2282 int compacted_counts[(p->next_insn_offset - start_offset) / sizeof(brw_inst)];
2283 /* For an instruction at byte offset 8*i after compaction, this was its IP
2284 * (in 16-byte units) before compaction.
2285 */
2286 int old_ip[(p->next_insn_offset - start_offset) / sizeof(brw_compact_inst) + 1];
2287
2288 if (devinfo->gen == 4 && !devinfo->is_g4x)
2289 return;
2290
2291 struct compaction_state c;
2292 compaction_state_init(&c, devinfo);
2293
2294 int offset = 0;
2295 int compacted_count = 0;
2296 for (int src_offset = 0; src_offset < p->next_insn_offset - start_offset;
2297 src_offset += sizeof(brw_inst)) {
2298 brw_inst *src = store + src_offset;
2299 void *dst = store + offset;
2300
2301 old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
2302 compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
2303
2304 brw_inst inst = precompact(devinfo, *src);
2305 brw_inst saved = inst;
2306
2307 if (try_compact_instruction(&c, dst, &inst)) {
2308 compacted_count++;
2309
2310 if (INTEL_DEBUG) {
2311 brw_inst uncompacted;
2312 uncompact_instruction(&c, &uncompacted, dst);
2313 if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {
2314 brw_debug_compact_uncompact(devinfo, &saved, &uncompacted);
2315 }
2316 }
2317
2318 offset += sizeof(brw_compact_inst);
2319 } else {
2320 /* All uncompacted instructions need to be aligned on G45. */
2321 if ((offset & sizeof(brw_compact_inst)) != 0 && devinfo->is_g4x){
2322 brw_compact_inst *align = store + offset;
2323 memset(align, 0, sizeof(*align));
2324 brw_compact_inst_set_hw_opcode(
2325 devinfo, align, brw_opcode_encode(devinfo, BRW_OPCODE_NENOP));
2326 brw_compact_inst_set_cmpt_control(devinfo, align, true);
2327 offset += sizeof(brw_compact_inst);
2328 compacted_count--;
2329 compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
2330 old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
2331
2332 dst = store + offset;
2333 }
2334
2335 /* If we didn't compact this intruction, we need to move it down into
2336 * place.
2337 */
2338 if (offset != src_offset) {
2339 memmove(dst, src, sizeof(brw_inst));
2340 }
2341 offset += sizeof(brw_inst);
2342 }
2343 }
2344
2345 /* Add an entry for the ending offset of the program. This greatly
2346 * simplifies the linked list walk at the end of the function.
2347 */
2348 old_ip[offset / sizeof(brw_compact_inst)] =
2349 (p->next_insn_offset - start_offset) / sizeof(brw_inst);
2350
2351 /* Fix up control flow offsets. */
2352 p->next_insn_offset = start_offset + offset;
2353 for (offset = 0; offset < p->next_insn_offset - start_offset;
2354 offset = next_offset(devinfo, store, offset)) {
2355 brw_inst *insn = store + offset;
2356 int this_old_ip = old_ip[offset / sizeof(brw_compact_inst)];
2357 int this_compacted_count = compacted_counts[this_old_ip];
2358
2359 switch (brw_inst_opcode(devinfo, insn)) {
2360 case BRW_OPCODE_BREAK:
2361 case BRW_OPCODE_CONTINUE:
2362 case BRW_OPCODE_HALT:
2363 if (devinfo->gen >= 6) {
2364 update_uip_jip(devinfo, insn, this_old_ip, compacted_counts);
2365 } else {
2366 update_gen4_jump_count(devinfo, insn, this_old_ip,
2367 compacted_counts);
2368 }
2369 break;
2370
2371 case BRW_OPCODE_IF:
2372 case BRW_OPCODE_IFF:
2373 case BRW_OPCODE_ELSE:
2374 case BRW_OPCODE_ENDIF:
2375 case BRW_OPCODE_WHILE:
2376 if (devinfo->gen >= 7) {
2377 if (brw_inst_cmpt_control(devinfo, insn)) {
2378 brw_inst uncompacted;
2379 uncompact_instruction(&c, &uncompacted,
2380 (brw_compact_inst *)insn);
2381
2382 update_uip_jip(devinfo, &uncompacted, this_old_ip,
2383 compacted_counts);
2384
2385 bool ret = try_compact_instruction(&c, (brw_compact_inst *)insn,
2386 &uncompacted);
2387 assert(ret); (void)ret;
2388 } else {
2389 update_uip_jip(devinfo, insn, this_old_ip, compacted_counts);
2390 }
2391 } else if (devinfo->gen == 6) {
2392 assert(!brw_inst_cmpt_control(devinfo, insn));
2393
2394 /* Jump Count is in units of compacted instructions on Gen6. */
2395 int jump_count_compacted = brw_inst_gen6_jump_count(devinfo, insn);
2396
2397 int target_old_ip = this_old_ip + (jump_count_compacted / 2);
2398 int target_compacted_count = compacted_counts[target_old_ip];
2399 jump_count_compacted -= (target_compacted_count - this_compacted_count);
2400 brw_inst_set_gen6_jump_count(devinfo, insn, jump_count_compacted);
2401 } else {
2402 update_gen4_jump_count(devinfo, insn, this_old_ip,
2403 compacted_counts);
2404 }
2405 break;
2406
2407 case BRW_OPCODE_ADD:
2408 /* Add instructions modifying the IP register use an immediate src1,
2409 * and Gens that use this cannot compact instructions with immediate
2410 * operands.
2411 */
2412 if (brw_inst_cmpt_control(devinfo, insn))
2413 break;
2414
2415 if (brw_inst_dst_reg_file(devinfo, insn) == BRW_ARCHITECTURE_REGISTER_FILE &&
2416 brw_inst_dst_da_reg_nr(devinfo, insn) == BRW_ARF_IP) {
2417 assert(brw_inst_src1_reg_file(devinfo, insn) == BRW_IMMEDIATE_VALUE);
2418
2419 int shift = 3;
2420 int jump_compacted = brw_inst_imm_d(devinfo, insn) >> shift;
2421
2422 int target_old_ip = this_old_ip + (jump_compacted / 2);
2423 int target_compacted_count = compacted_counts[target_old_ip];
2424 jump_compacted -= (target_compacted_count - this_compacted_count);
2425 brw_inst_set_imm_ud(devinfo, insn, jump_compacted << shift);
2426 }
2427 break;
2428
2429 default:
2430 break;
2431 }
2432 }
2433
2434 /* p->nr_insn is counting the number of uncompacted instructions still, so
2435 * divide. We do want to be sure there's a valid instruction in any
2436 * alignment padding, so that the next compression pass (for the FS 8/16
2437 * compile passes) parses correctly.
2438 */
2439 if (p->next_insn_offset & sizeof(brw_compact_inst)) {
2440 brw_compact_inst *align = store + offset;
2441 memset(align, 0, sizeof(*align));
2442 brw_compact_inst_set_hw_opcode(
2443 devinfo, align, brw_opcode_encode(devinfo, BRW_OPCODE_NOP));
2444 brw_compact_inst_set_cmpt_control(devinfo, align, true);
2445 p->next_insn_offset += sizeof(brw_compact_inst);
2446 }
2447 p->nr_insn = p->next_insn_offset / sizeof(brw_inst);
2448
2449 for (int i = 0; i < p->num_relocs; i++) {
2450 if (p->relocs[i].offset < (uint32_t)start_offset)
2451 continue;
2452
2453 assert(p->relocs[i].offset % 16 == 0);
2454 unsigned idx = (p->relocs[i].offset - start_offset) / 16;
2455 p->relocs[i].offset -= compacted_counts[idx] * 8;
2456 }
2457
2458 /* Update the instruction offsets for each group. */
2459 if (disasm) {
2460 int offset = 0;
2461
2462 foreach_list_typed(struct inst_group, group, link, &disasm->group_list) {
2463 while (start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
2464 sizeof(brw_inst) != group->offset) {
2465 assert(start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
2466 sizeof(brw_inst) < group->offset);
2467 offset = next_offset(devinfo, store, offset);
2468 }
2469
2470 group->offset = start_offset + offset;
2471
2472 offset = next_offset(devinfo, store, offset);
2473 }
2474 }
2475 }