intel/compiler: Make separate src0/src1 index tables
[mesa.git] / src / intel / compiler / brw_eu_compact.c
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_eu_compact.c
25 *
26 * Instruction compaction is a feature of G45 and newer hardware that allows
27 * for a smaller instruction encoding.
28 *
29 * The instruction cache is on the order of 32KB, and many programs generate
30 * far more instructions than that. The instruction cache is built to barely
31 * keep up with instruction dispatch ability in cache hit cases -- L1
32 * instruction cache misses that still hit in the next level could limit
33 * throughput by around 50%.
34 *
35 * The idea of instruction compaction is that most instructions use a tiny
36 * subset of the GPU functionality, so we can encode what would be a 16 byte
37 * instruction in 8 bytes using some lookup tables for various fields.
38 *
39 *
40 * Instruction compaction capabilities vary subtly by generation.
41 *
42 * G45's support for instruction compaction is very limited. Jump counts on
43 * this generation are in units of 16-byte uncompacted instructions. As such,
44 * all jump targets must be 16-byte aligned. Also, all instructions must be
45 * naturally aligned, i.e. uncompacted instructions must be 16-byte aligned.
46 * A G45-only instruction, NENOP, must be used to provide padding to align
47 * uncompacted instructions.
48 *
49 * Gen5 removes these restrictions and changes jump counts to be in units of
50 * 8-byte compacted instructions, allowing jump targets to be only 8-byte
51 * aligned. Uncompacted instructions can also be placed on 8-byte boundaries.
52 *
53 * Gen6 adds the ability to compact instructions with a limited range of
54 * immediate values. Compactable immediates have 12 unrestricted bits, and a
55 * 13th bit that's replicated through the high 20 bits, to create the 32-bit
56 * value of DW3 in the uncompacted instruction word.
57 *
58 * On Gen7 we can compact some control flow instructions with a small positive
59 * immediate in the low bits of DW3, like ENDIF with the JIP field. Other
60 * control flow instructions with UIP cannot be compacted, because of the
61 * replicated 13th bit. No control flow instructions can be compacted on Gen6
62 * since the jump count field is not in DW3.
63 *
64 * break JIP/UIP
65 * cont JIP/UIP
66 * halt JIP/UIP
67 * if JIP/UIP
68 * else JIP (plus UIP on BDW+)
69 * endif JIP
70 * while JIP (must be negative)
71 *
72 * Gen 8 adds support for compacting 3-src instructions.
73 */
74
75 #include "brw_eu.h"
76 #include "brw_shader.h"
77 #include "brw_disasm_info.h"
78 #include "dev/gen_debug.h"
79
80 static const uint32_t g45_control_index_table[32] = {
81 0b00000000000000000,
82 0b01000000000000000,
83 0b00110000000000000,
84 0b00000000000000010,
85 0b00100000000000000,
86 0b00010000000000000,
87 0b01000000000100000,
88 0b01000000100000000,
89 0b01010000000100000,
90 0b00000000100000010,
91 0b11000000000000000,
92 0b00001000100000010,
93 0b01001000100000000,
94 0b00000000100000000,
95 0b11000000000100000,
96 0b00001000100000000,
97 0b10110000000000000,
98 0b11010000000100000,
99 0b00110000100000000,
100 0b00100000100000000,
101 0b01000000000001000,
102 0b01000000000000100,
103 0b00111100000000000,
104 0b00101011000000000,
105 0b00110000000010000,
106 0b00010000100000000,
107 0b01000000000100100,
108 0b01000000000101000,
109 0b00110000000000110,
110 0b00000000000001010,
111 0b01010000000101000,
112 0b01010000000100100,
113 };
114
115 static const uint32_t g45_datatype_table[32] = {
116 0b001000000000100001,
117 0b001011010110101101,
118 0b001000001000110001,
119 0b001111011110111101,
120 0b001011010110101100,
121 0b001000000110101101,
122 0b001000000000100000,
123 0b010100010110110001,
124 0b001100011000101101,
125 0b001000000000100010,
126 0b001000001000110110,
127 0b010000001000110001,
128 0b001000001000110010,
129 0b011000001000110010,
130 0b001111011110111100,
131 0b001000000100101000,
132 0b010100011000110001,
133 0b001010010100101001,
134 0b001000001000101001,
135 0b010000001000110110,
136 0b101000001000110001,
137 0b001011011000101101,
138 0b001000000100001001,
139 0b001011011000101100,
140 0b110100011000110001,
141 0b001000001110111101,
142 0b110000001000110001,
143 0b011000000100101010,
144 0b101000001000101001,
145 0b001011010110001100,
146 0b001000000110100001,
147 0b001010010100001000,
148 };
149
150 static const uint16_t g45_subreg_table[32] = {
151 0b000000000000000,
152 0b000000010000000,
153 0b000001000000000,
154 0b000100000000000,
155 0b000000000100000,
156 0b100000000000000,
157 0b000000000010000,
158 0b001100000000000,
159 0b001010000000000,
160 0b000000100000000,
161 0b001000000000000,
162 0b000000000001000,
163 0b000000001000000,
164 0b000000000000001,
165 0b000010000000000,
166 0b000000010100000,
167 0b000000000000111,
168 0b000001000100000,
169 0b011000000000000,
170 0b000000110000000,
171 0b000000000000010,
172 0b000000000000100,
173 0b000000001100000,
174 0b000100000000010,
175 0b001110011000110,
176 0b001110100001000,
177 0b000110011000110,
178 0b000001000011000,
179 0b000110010000100,
180 0b001100000000110,
181 0b000000010000110,
182 0b000001000110000,
183 };
184
185 static const uint16_t g45_src_index_table[32] = {
186 0b000000000000,
187 0b010001101000,
188 0b010110001000,
189 0b011010010000,
190 0b001101001000,
191 0b010110001010,
192 0b010101110000,
193 0b011001111000,
194 0b001000101000,
195 0b000000101000,
196 0b010001010000,
197 0b111101101100,
198 0b010110001100,
199 0b010001101100,
200 0b011010010100,
201 0b010001001100,
202 0b001100101000,
203 0b000000000010,
204 0b111101001100,
205 0b011001101000,
206 0b010101001000,
207 0b000000000100,
208 0b000000101100,
209 0b010001101010,
210 0b000000111000,
211 0b010101011000,
212 0b000100100000,
213 0b010110000000,
214 0b010000000100,
215 0b010000111000,
216 0b000101100000,
217 0b111101110100,
218 };
219
220 static const uint32_t gen6_control_index_table[32] = {
221 0b00000000000000000,
222 0b01000000000000000,
223 0b00110000000000000,
224 0b00000000100000000,
225 0b00010000000000000,
226 0b00001000100000000,
227 0b00000000100000010,
228 0b00000000000000010,
229 0b01000000100000000,
230 0b01010000000000000,
231 0b10110000000000000,
232 0b00100000000000000,
233 0b11010000000000000,
234 0b11000000000000000,
235 0b01001000100000000,
236 0b01000000000001000,
237 0b01000000000000100,
238 0b00000000000001000,
239 0b00000000000000100,
240 0b00111000100000000,
241 0b00001000100000010,
242 0b00110000100000000,
243 0b00110000000000001,
244 0b00100000000000001,
245 0b00110000000000010,
246 0b00110000000000101,
247 0b00110000000001001,
248 0b00110000000010000,
249 0b00110000000000011,
250 0b00110000000000100,
251 0b00110000100001000,
252 0b00100000000001001,
253 };
254
255 static const uint32_t gen6_datatype_table[32] = {
256 0b001001110000000000,
257 0b001000110000100000,
258 0b001001110000000001,
259 0b001000000001100000,
260 0b001010110100101001,
261 0b001000000110101101,
262 0b001100011000101100,
263 0b001011110110101101,
264 0b001000000111101100,
265 0b001000000001100001,
266 0b001000110010100101,
267 0b001000000001000001,
268 0b001000001000110001,
269 0b001000001000101001,
270 0b001000000000100000,
271 0b001000001000110010,
272 0b001010010100101001,
273 0b001011010010100101,
274 0b001000000110100101,
275 0b001100011000101001,
276 0b001011011000101100,
277 0b001011010110100101,
278 0b001011110110100101,
279 0b001111011110111101,
280 0b001111011110111100,
281 0b001111011110111101,
282 0b001111011110011101,
283 0b001111011110111110,
284 0b001000000000100001,
285 0b001000000000100010,
286 0b001001111111011101,
287 0b001000001110111110,
288 };
289
290 static const uint16_t gen6_subreg_table[32] = {
291 0b000000000000000,
292 0b000000000000100,
293 0b000000110000000,
294 0b111000000000000,
295 0b011110000001000,
296 0b000010000000000,
297 0b000000000010000,
298 0b000110000001100,
299 0b001000000000000,
300 0b000001000000000,
301 0b000001010010100,
302 0b000000001010110,
303 0b010000000000000,
304 0b110000000000000,
305 0b000100000000000,
306 0b000000010000000,
307 0b000000000001000,
308 0b100000000000000,
309 0b000001010000000,
310 0b001010000000000,
311 0b001100000000000,
312 0b000000001010100,
313 0b101101010010100,
314 0b010100000000000,
315 0b000000010001111,
316 0b011000000000000,
317 0b111110000000000,
318 0b101000000000000,
319 0b000000000001111,
320 0b000100010001111,
321 0b001000010001111,
322 0b000110000000000,
323 };
324
325 static const uint16_t gen6_src_index_table[32] = {
326 0b000000000000,
327 0b010110001000,
328 0b010001101000,
329 0b001000101000,
330 0b011010010000,
331 0b000100100000,
332 0b010001101100,
333 0b010101110000,
334 0b011001111000,
335 0b001100101000,
336 0b010110001100,
337 0b001000100000,
338 0b010110001010,
339 0b000000000010,
340 0b010101010000,
341 0b010101101000,
342 0b111101001100,
343 0b111100101100,
344 0b011001110000,
345 0b010110001001,
346 0b010101011000,
347 0b001101001000,
348 0b010000101100,
349 0b010000000000,
350 0b001101110000,
351 0b001100010000,
352 0b001100000000,
353 0b010001101010,
354 0b001101111000,
355 0b000001110000,
356 0b001100100000,
357 0b001101010000,
358 };
359
360 static const uint32_t gen7_control_index_table[32] = {
361 0b0000000000000000010,
362 0b0000100000000000000,
363 0b0000100000000000001,
364 0b0000100000000000010,
365 0b0000100000000000011,
366 0b0000100000000000100,
367 0b0000100000000000101,
368 0b0000100000000000111,
369 0b0000100000000001000,
370 0b0000100000000001001,
371 0b0000100000000001101,
372 0b0000110000000000000,
373 0b0000110000000000001,
374 0b0000110000000000010,
375 0b0000110000000000011,
376 0b0000110000000000100,
377 0b0000110000000000101,
378 0b0000110000000000111,
379 0b0000110000000001001,
380 0b0000110000000001101,
381 0b0000110000000010000,
382 0b0000110000100000000,
383 0b0001000000000000000,
384 0b0001000000000000010,
385 0b0001000000000000100,
386 0b0001000000100000000,
387 0b0010110000000000000,
388 0b0010110000000010000,
389 0b0011000000000000000,
390 0b0011000000100000000,
391 0b0101000000000000000,
392 0b0101000000100000000,
393 };
394
395 static const uint32_t gen7_datatype_table[32] = {
396 0b001000000000000001,
397 0b001000000000100000,
398 0b001000000000100001,
399 0b001000000001100001,
400 0b001000000010111101,
401 0b001000001011111101,
402 0b001000001110100001,
403 0b001000001110100101,
404 0b001000001110111101,
405 0b001000010000100001,
406 0b001000110000100000,
407 0b001000110000100001,
408 0b001001010010100101,
409 0b001001110010100100,
410 0b001001110010100101,
411 0b001111001110111101,
412 0b001111011110011101,
413 0b001111011110111100,
414 0b001111011110111101,
415 0b001111111110111100,
416 0b000000001000001100,
417 0b001000000000111101,
418 0b001000000010100101,
419 0b001000010000100000,
420 0b001001010010100100,
421 0b001001110010000100,
422 0b001010010100001001,
423 0b001101111110111101,
424 0b001111111110111101,
425 0b001011110110101100,
426 0b001010010100101000,
427 0b001010110100101000,
428 };
429
430 static const uint16_t gen7_subreg_table[32] = {
431 0b000000000000000,
432 0b000000000000001,
433 0b000000000001000,
434 0b000000000001111,
435 0b000000000010000,
436 0b000000010000000,
437 0b000000100000000,
438 0b000000110000000,
439 0b000001000000000,
440 0b000001000010000,
441 0b000010100000000,
442 0b001000000000000,
443 0b001000000000001,
444 0b001000010000001,
445 0b001000010000010,
446 0b001000010000011,
447 0b001000010000100,
448 0b001000010000111,
449 0b001000010001000,
450 0b001000010001110,
451 0b001000010001111,
452 0b001000110000000,
453 0b001000111101000,
454 0b010000000000000,
455 0b010000110000000,
456 0b011000000000000,
457 0b011110010000111,
458 0b100000000000000,
459 0b101000000000000,
460 0b110000000000000,
461 0b111000000000000,
462 0b111000000011100,
463 };
464
465 static const uint16_t gen7_src_index_table[32] = {
466 0b000000000000,
467 0b000000000010,
468 0b000000010000,
469 0b000000010010,
470 0b000000011000,
471 0b000000100000,
472 0b000000101000,
473 0b000001001000,
474 0b000001010000,
475 0b000001110000,
476 0b000001111000,
477 0b001100000000,
478 0b001100000010,
479 0b001100001000,
480 0b001100010000,
481 0b001100010010,
482 0b001100100000,
483 0b001100101000,
484 0b001100111000,
485 0b001101000000,
486 0b001101000010,
487 0b001101001000,
488 0b001101010000,
489 0b001101100000,
490 0b001101101000,
491 0b001101110000,
492 0b001101110001,
493 0b001101111000,
494 0b010001101000,
495 0b010001101001,
496 0b010001101010,
497 0b010110001000,
498 };
499
500 static const uint32_t gen8_control_index_table[32] = {
501 0b0000000000000000010,
502 0b0000100000000000000,
503 0b0000100000000000001,
504 0b0000100000000000010,
505 0b0000100000000000011,
506 0b0000100000000000100,
507 0b0000100000000000101,
508 0b0000100000000000111,
509 0b0000100000000001000,
510 0b0000100000000001001,
511 0b0000100000000001101,
512 0b0000110000000000000,
513 0b0000110000000000001,
514 0b0000110000000000010,
515 0b0000110000000000011,
516 0b0000110000000000100,
517 0b0000110000000000101,
518 0b0000110000000000111,
519 0b0000110000000001001,
520 0b0000110000000001101,
521 0b0000110000000010000,
522 0b0000110000100000000,
523 0b0001000000000000000,
524 0b0001000000000000010,
525 0b0001000000000000100,
526 0b0001000000100000000,
527 0b0010110000000000000,
528 0b0010110000000010000,
529 0b0011000000000000000,
530 0b0011000000100000000,
531 0b0101000000000000000,
532 0b0101000000100000000,
533 };
534
535 static const uint32_t gen8_datatype_table[32] = {
536 0b001000000000000000001,
537 0b001000000000001000000,
538 0b001000000000001000001,
539 0b001000000000011000001,
540 0b001000000000101011101,
541 0b001000000010111011101,
542 0b001000000011101000001,
543 0b001000000011101000101,
544 0b001000000011101011101,
545 0b001000001000001000001,
546 0b001000011000001000000,
547 0b001000011000001000001,
548 0b001000101000101000101,
549 0b001000111000101000100,
550 0b001000111000101000101,
551 0b001011100011101011101,
552 0b001011101011100011101,
553 0b001011101011101011100,
554 0b001011101011101011101,
555 0b001011111011101011100,
556 0b000000000010000001100,
557 0b001000000000001011101,
558 0b001000000000101000101,
559 0b001000001000001000000,
560 0b001000101000101000100,
561 0b001000111000100000100,
562 0b001001001001000001001,
563 0b001010111011101011101,
564 0b001011111011101011101,
565 0b001001111001101001100,
566 0b001001001001001001000,
567 0b001001011001001001000,
568 };
569
570 static const uint16_t gen8_subreg_table[32] = {
571 0b000000000000000,
572 0b000000000000001,
573 0b000000000001000,
574 0b000000000001111,
575 0b000000000010000,
576 0b000000010000000,
577 0b000000100000000,
578 0b000000110000000,
579 0b000001000000000,
580 0b000001000010000,
581 0b000001010000000,
582 0b001000000000000,
583 0b001000000000001,
584 0b001000010000001,
585 0b001000010000010,
586 0b001000010000011,
587 0b001000010000100,
588 0b001000010000111,
589 0b001000010001000,
590 0b001000010001110,
591 0b001000010001111,
592 0b001000110000000,
593 0b001000111101000,
594 0b010000000000000,
595 0b010000110000000,
596 0b011000000000000,
597 0b011110010000111,
598 0b100000000000000,
599 0b101000000000000,
600 0b110000000000000,
601 0b111000000000000,
602 0b111000000011100,
603 };
604
605 static const uint16_t gen8_src_index_table[32] = {
606 0b000000000000,
607 0b000000000010,
608 0b000000010000,
609 0b000000010010,
610 0b000000011000,
611 0b000000100000,
612 0b000000101000,
613 0b000001001000,
614 0b000001010000,
615 0b000001110000,
616 0b000001111000,
617 0b001100000000,
618 0b001100000010,
619 0b001100001000,
620 0b001100010000,
621 0b001100010010,
622 0b001100100000,
623 0b001100101000,
624 0b001100111000,
625 0b001101000000,
626 0b001101000010,
627 0b001101001000,
628 0b001101010000,
629 0b001101100000,
630 0b001101101000,
631 0b001101110000,
632 0b001101110001,
633 0b001101111000,
634 0b010001101000,
635 0b010001101001,
636 0b010001101010,
637 0b010110001000,
638 };
639
640 static const uint32_t gen11_datatype_table[32] = {
641 0b001000000000000000001,
642 0b001000000000001000000,
643 0b001000000000001000001,
644 0b001000000000011000001,
645 0b001000000000101100101,
646 0b001000000101111100101,
647 0b001000000100101000001,
648 0b001000000100101000101,
649 0b001000000100101100101,
650 0b001000001000001000001,
651 0b001000011000001000000,
652 0b001000011000001000001,
653 0b001000101000101000101,
654 0b001000111000101000100,
655 0b001000111000101000101,
656 0b001100100100101100101,
657 0b001100101100100100101,
658 0b001100101100101100100,
659 0b001100101100101100101,
660 0b001100111100101100100,
661 0b000000000010000001100,
662 0b001000000000001100101,
663 0b001000000000101000101,
664 0b001000001000001000000,
665 0b001000101000101000100,
666 0b001000111000100000100,
667 0b001001001001000001001,
668 0b001101111100101100101,
669 0b001100111100101100101,
670 0b001001111001101001100,
671 0b001001001001001001000,
672 0b001001011001001001000,
673 };
674
675 /* This is actually the control index table for Cherryview (26 bits), but the
676 * only difference from Broadwell (24 bits) is that it has two extra 0-bits at
677 * the start.
678 *
679 * The low 24 bits have the same mappings on both hardware.
680 */
681 static const uint32_t gen8_3src_control_index_table[4] = {
682 0b00100000000110000000000001,
683 0b00000000000110000000000001,
684 0b00000000001000000000000001,
685 0b00000000001000000000100001,
686 };
687
688 /* This is actually the control index table for Cherryview (49 bits), but the
689 * only difference from Broadwell (46 bits) is that it has three extra 0-bits
690 * at the start.
691 *
692 * The low 44 bits have the same mappings on both hardware, and since the high
693 * three bits on Broadwell are zero, we can reuse Cherryview's table.
694 */
695 static const uint64_t gen8_3src_source_index_table[4] = {
696 0b0000001110010011100100111001000001111000000000000,
697 0b0000001110010011100100111001000001111000000000010,
698 0b0000001110010011100100111001000001111000000001000,
699 0b0000001110010011100100111001000001111000000100000,
700 };
701
702 static const uint32_t *control_index_table;
703 static const uint32_t *datatype_table;
704 static const uint16_t *subreg_table;
705 static const uint16_t *src0_index_table;
706 static const uint16_t *src1_index_table;
707
708 static bool
709 set_control_index(const struct gen_device_info *devinfo,
710 brw_compact_inst *dst, const brw_inst *src)
711 {
712 uint32_t uncompacted; /* 17b/G45; 19b/IVB+ */
713
714 if (devinfo->gen >= 8) {
715 uncompacted = (brw_inst_bits(src, 33, 31) << 16) | /* 3b */
716 (brw_inst_bits(src, 23, 12) << 4) | /* 12b */
717 (brw_inst_bits(src, 10, 9) << 2) | /* 2b */
718 (brw_inst_bits(src, 34, 34) << 1) | /* 1b */
719 (brw_inst_bits(src, 8, 8)); /* 1b */
720 } else {
721 uncompacted = (brw_inst_bits(src, 31, 31) << 16) | /* 1b */
722 (brw_inst_bits(src, 23, 8)); /* 16b */
723
724 /* On gen7, the flag register and subregister numbers are integrated into
725 * the control index.
726 */
727 if (devinfo->gen == 7)
728 uncompacted |= brw_inst_bits(src, 90, 89) << 17; /* 2b */
729 }
730
731 for (int i = 0; i < 32; i++) {
732 if (control_index_table[i] == uncompacted) {
733 brw_compact_inst_set_control_index(devinfo, dst, i);
734 return true;
735 }
736 }
737
738 return false;
739 }
740
741 static bool
742 set_datatype_index(const struct gen_device_info *devinfo, brw_compact_inst *dst,
743 const brw_inst *src)
744 {
745 uint32_t uncompacted; /* 18b/G45+; 21b/BDW+ */
746
747 if (devinfo->gen >= 8) {
748 uncompacted = (brw_inst_bits(src, 63, 61) << 18) | /* 3b */
749 (brw_inst_bits(src, 94, 89) << 12) | /* 6b */
750 (brw_inst_bits(src, 46, 35)); /* 12b */
751 } else {
752 uncompacted = (brw_inst_bits(src, 63, 61) << 15) | /* 3b */
753 (brw_inst_bits(src, 46, 32)); /* 15b */
754 }
755
756 for (int i = 0; i < 32; i++) {
757 if (datatype_table[i] == uncompacted) {
758 brw_compact_inst_set_datatype_index(devinfo, dst, i);
759 return true;
760 }
761 }
762
763 return false;
764 }
765
766 static bool
767 set_subreg_index(const struct gen_device_info *devinfo, brw_compact_inst *dst,
768 const brw_inst *src, bool is_immediate)
769 {
770 uint16_t uncompacted = /* 15b */
771 (brw_inst_bits(src, 52, 48) << 0) | /* 5b */
772 (brw_inst_bits(src, 68, 64) << 5); /* 5b */
773
774 if (!is_immediate)
775 uncompacted |= brw_inst_bits(src, 100, 96) << 10; /* 5b */
776
777 for (int i = 0; i < 32; i++) {
778 if (subreg_table[i] == uncompacted) {
779 brw_compact_inst_set_subreg_index(devinfo, dst, i);
780 return true;
781 }
782 }
783
784 return false;
785 }
786
787 static bool
788 set_src0_index(const struct gen_device_info *devinfo,
789 brw_compact_inst *dst, const brw_inst *src)
790 {
791 uint16_t uncompacted = brw_inst_bits(src, 88, 77); /* 12b */
792
793 for (int i = 0; i < 32; i++) {
794 if (src0_index_table[i] == uncompacted) {
795 brw_compact_inst_set_src0_index(devinfo, dst, i);
796 return true;
797 }
798 }
799
800 return false;
801 }
802
803 static bool
804 set_src1_index(const struct gen_device_info *devinfo, brw_compact_inst *dst,
805 const brw_inst *src, bool is_immediate)
806 {
807 if (is_immediate) {
808 uint16_t imm = (brw_inst_imm_ud(devinfo, src) >> 8) & 0x1f;
809 brw_compact_inst_set_src1_index(devinfo, dst, imm);
810 return true;
811 } else {
812 uint16_t uncompacted = brw_inst_bits(src, 120, 109); /* 12b */
813
814 for (int i = 0; i < 32; i++) {
815 if (src1_index_table[i] == uncompacted) {
816 brw_compact_inst_set_src1_index(devinfo, dst, i);
817 return true;
818 }
819 }
820 }
821
822 return false;
823 }
824
825 static bool
826 set_3src_control_index(const struct gen_device_info *devinfo,
827 brw_compact_inst *dst, const brw_inst *src)
828 {
829 assert(devinfo->gen >= 8);
830
831 uint32_t uncompacted = /* 24b/BDW; 26b/CHV */
832 (brw_inst_bits(src, 34, 32) << 21) | /* 3b */
833 (brw_inst_bits(src, 28, 8)); /* 21b */
834
835 if (devinfo->gen >= 9 || devinfo->is_cherryview)
836 uncompacted |= brw_inst_bits(src, 36, 35) << 24; /* 2b */
837
838 for (unsigned i = 0; i < ARRAY_SIZE(gen8_3src_control_index_table); i++) {
839 if (gen8_3src_control_index_table[i] == uncompacted) {
840 brw_compact_inst_set_3src_control_index(devinfo, dst, i);
841 return true;
842 }
843 }
844
845 return false;
846 }
847
848 static bool
849 set_3src_source_index(const struct gen_device_info *devinfo,
850 brw_compact_inst *dst, const brw_inst *src)
851 {
852 assert(devinfo->gen >= 8);
853
854 uint64_t uncompacted = /* 46b/BDW; 49b/CHV */
855 (brw_inst_bits(src, 83, 83) << 43) | /* 1b */
856 (brw_inst_bits(src, 114, 107) << 35) | /* 8b */
857 (brw_inst_bits(src, 93, 86) << 27) | /* 8b */
858 (brw_inst_bits(src, 72, 65) << 19) | /* 8b */
859 (brw_inst_bits(src, 55, 37)); /* 19b */
860
861 if (devinfo->gen >= 9 || devinfo->is_cherryview) {
862 uncompacted |=
863 (brw_inst_bits(src, 126, 125) << 47) | /* 2b */
864 (brw_inst_bits(src, 105, 104) << 45) | /* 2b */
865 (brw_inst_bits(src, 84, 84) << 44); /* 1b */
866 } else {
867 uncompacted |=
868 (brw_inst_bits(src, 125, 125) << 45) | /* 1b */
869 (brw_inst_bits(src, 104, 104) << 44); /* 1b */
870 }
871
872 for (unsigned i = 0; i < ARRAY_SIZE(gen8_3src_source_index_table); i++) {
873 if (gen8_3src_source_index_table[i] == uncompacted) {
874 brw_compact_inst_set_3src_source_index(devinfo, dst, i);
875 return true;
876 }
877 }
878
879 return false;
880 }
881
882 static bool
883 has_unmapped_bits(const struct gen_device_info *devinfo, const brw_inst *src)
884 {
885 /* EOT can only be mapped on a send if the src1 is an immediate */
886 if ((brw_inst_opcode(devinfo, src) == BRW_OPCODE_SENDC ||
887 brw_inst_opcode(devinfo, src) == BRW_OPCODE_SEND) &&
888 brw_inst_eot(devinfo, src))
889 return true;
890
891 /* Check for instruction bits that don't map to any of the fields of the
892 * compacted instruction. The instruction cannot be compacted if any of
893 * them are set. They overlap with:
894 * - NibCtrl (bit 47 on Gen7, bit 11 on Gen8)
895 * - Dst.AddrImm[9] (bit 47 on Gen8)
896 * - Src0.AddrImm[9] (bit 95 on Gen8)
897 * - Imm64[27:31] (bits 91-95 on Gen7, bit 95 on Gen8)
898 * - UIP[31] (bit 95 on Gen8)
899 */
900 if (devinfo->gen >= 8) {
901 assert(!brw_inst_bits(src, 7, 7));
902 return brw_inst_bits(src, 95, 95) ||
903 brw_inst_bits(src, 47, 47) ||
904 brw_inst_bits(src, 11, 11);
905 } else {
906 assert(!brw_inst_bits(src, 7, 7) &&
907 !(devinfo->gen < 7 && brw_inst_bits(src, 90, 90)));
908 return brw_inst_bits(src, 95, 91) ||
909 brw_inst_bits(src, 47, 47);
910 }
911 }
912
913 static bool
914 has_3src_unmapped_bits(const struct gen_device_info *devinfo,
915 const brw_inst *src)
916 {
917 /* Check for three-source instruction bits that don't map to any of the
918 * fields of the compacted instruction. All of them seem to be reserved
919 * bits currently.
920 */
921 if (devinfo->gen >= 9 || devinfo->is_cherryview) {
922 assert(!brw_inst_bits(src, 127, 127) &&
923 !brw_inst_bits(src, 7, 7));
924 } else {
925 assert(devinfo->gen >= 8);
926 assert(!brw_inst_bits(src, 127, 126) &&
927 !brw_inst_bits(src, 105, 105) &&
928 !brw_inst_bits(src, 84, 84) &&
929 !brw_inst_bits(src, 7, 7));
930
931 /* Src1Type and Src2Type, used for mixed-precision floating point */
932 if (brw_inst_bits(src, 36, 35))
933 return true;
934 }
935
936 return false;
937 }
938
939 static bool
940 brw_try_compact_3src_instruction(const struct gen_device_info *devinfo,
941 brw_compact_inst *dst, const brw_inst *src)
942 {
943 assert(devinfo->gen >= 8);
944
945 if (has_3src_unmapped_bits(devinfo, src))
946 return false;
947
948 #define compact(field) \
949 brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_##field(devinfo, src))
950 #define compact_a16(field) \
951 brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_a16_##field(devinfo, src))
952
953 compact(hw_opcode);
954
955 if (!set_3src_control_index(devinfo, dst, src))
956 return false;
957
958 if (!set_3src_source_index(devinfo, dst, src))
959 return false;
960
961 compact(dst_reg_nr);
962 compact_a16(src0_rep_ctrl);
963 brw_compact_inst_set_3src_cmpt_control(devinfo, dst, true);
964 compact(debug_control);
965 compact(saturate);
966 compact_a16(src1_rep_ctrl);
967 compact_a16(src2_rep_ctrl);
968 compact(src0_reg_nr);
969 compact(src1_reg_nr);
970 compact(src2_reg_nr);
971 compact_a16(src0_subreg_nr);
972 compact_a16(src1_subreg_nr);
973 compact_a16(src2_subreg_nr);
974
975 #undef compact
976 #undef compact_a16
977
978 return true;
979 }
980
981 /* Compacted instructions have 12-bits for immediate sources, and a 13th bit
982 * that's replicated through the high 20 bits.
983 *
984 * Effectively this means we get 12-bit integers, 0.0f, and some limited uses
985 * of packed vectors as compactable immediates.
986 */
987 static bool
988 is_compactable_immediate(unsigned imm)
989 {
990 /* We get the low 12 bits as-is. */
991 imm &= ~0xfff;
992
993 /* We get one bit replicated through the top 20 bits. */
994 return imm == 0 || imm == 0xfffff000;
995 }
996
997 /**
998 * Applies some small changes to instruction types to increase chances of
999 * compaction.
1000 */
1001 static brw_inst
1002 precompact(const struct gen_device_info *devinfo, brw_inst inst)
1003 {
1004 if (brw_inst_src0_reg_file(devinfo, &inst) != BRW_IMMEDIATE_VALUE)
1005 return inst;
1006
1007 /* The Bspec's section titled "Non-present Operands" claims that if src0
1008 * is an immediate that src1's type must be the same as that of src0.
1009 *
1010 * The SNB+ DataTypeIndex instruction compaction tables contain mappings
1011 * that do not follow this rule. E.g., from the IVB/HSW table:
1012 *
1013 * DataTypeIndex 18-Bit Mapping Mapped Meaning
1014 * 3 001000001011111101 r:f | i:vf | a:ud | <1> | dir |
1015 *
1016 * And from the SNB table:
1017 *
1018 * DataTypeIndex 18-Bit Mapping Mapped Meaning
1019 * 8 001000000111101100 a:w | i:w | a:ud | <1> | dir |
1020 *
1021 * Neither of these cause warnings from the simulator when used,
1022 * compacted or otherwise. In fact, all compaction mappings that have an
1023 * immediate in src0 use a:ud for src1.
1024 *
1025 * The GM45 instruction compaction tables do not contain mapped meanings
1026 * so it's not clear whether it has the restriction. We'll assume it was
1027 * lifted on SNB. (FINISHME: decode the GM45 tables and check.)
1028 *
1029 * Don't do any of this for 64-bit immediates, since the src1 fields
1030 * overlap with the immediate and setting them would overwrite the
1031 * immediate we set.
1032 */
1033 if (devinfo->gen >= 6 &&
1034 !(devinfo->is_haswell &&
1035 brw_inst_opcode(devinfo, &inst) == BRW_OPCODE_DIM) &&
1036 !(devinfo->gen >= 8 &&
1037 (brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_DF ||
1038 brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_UQ ||
1039 brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_Q))) {
1040 enum brw_reg_file file = brw_inst_src1_reg_file(devinfo, &inst);
1041 brw_inst_set_src1_file_type(devinfo, &inst, file, BRW_REGISTER_TYPE_UD);
1042 }
1043
1044 /* Compacted instructions only have 12-bits (plus 1 for the other 20)
1045 * for immediate values. Presumably the hardware engineers realized
1046 * that the only useful floating-point value that could be represented
1047 * in this format is 0.0, which can also be represented as a VF-typed
1048 * immediate, so they gave us the previously mentioned mapping on IVB+.
1049 *
1050 * Strangely, we do have a mapping for imm:f in src1, so we don't need
1051 * to do this there.
1052 *
1053 * If we see a 0.0:F, change the type to VF so that it can be compacted.
1054 */
1055 if (brw_inst_imm_ud(devinfo, &inst) == 0x0 &&
1056 brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_F &&
1057 brw_inst_dst_type(devinfo, &inst) == BRW_REGISTER_TYPE_F &&
1058 brw_inst_dst_hstride(devinfo, &inst) == BRW_HORIZONTAL_STRIDE_1) {
1059 enum brw_reg_file file = brw_inst_src0_reg_file(devinfo, &inst);
1060 brw_inst_set_src0_file_type(devinfo, &inst, file, BRW_REGISTER_TYPE_VF);
1061 }
1062
1063 /* There are no mappings for dst:d | i:d, so if the immediate is suitable
1064 * set the types to :UD so the instruction can be compacted.
1065 */
1066 if (is_compactable_immediate(brw_inst_imm_ud(devinfo, &inst)) &&
1067 brw_inst_cond_modifier(devinfo, &inst) == BRW_CONDITIONAL_NONE &&
1068 brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_D &&
1069 brw_inst_dst_type(devinfo, &inst) == BRW_REGISTER_TYPE_D) {
1070 enum brw_reg_file src_file = brw_inst_src0_reg_file(devinfo, &inst);
1071 enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, &inst);
1072
1073 brw_inst_set_src0_file_type(devinfo, &inst, src_file, BRW_REGISTER_TYPE_UD);
1074 brw_inst_set_dst_file_type(devinfo, &inst, dst_file, BRW_REGISTER_TYPE_UD);
1075 }
1076
1077 return inst;
1078 }
1079
1080 /**
1081 * Tries to compact instruction src into dst.
1082 *
1083 * It doesn't modify dst unless src is compactable, which is relied on by
1084 * brw_compact_instructions().
1085 */
1086 bool
1087 brw_try_compact_instruction(const struct gen_device_info *devinfo,
1088 brw_compact_inst *dst, const brw_inst *src)
1089 {
1090 brw_compact_inst temp;
1091
1092 assert(brw_inst_cmpt_control(devinfo, src) == 0);
1093
1094 if (is_3src(devinfo, brw_inst_opcode(devinfo, src))) {
1095 if (devinfo->gen >= 8) {
1096 memset(&temp, 0, sizeof(temp));
1097 if (brw_try_compact_3src_instruction(devinfo, &temp, src)) {
1098 *dst = temp;
1099 return true;
1100 } else {
1101 return false;
1102 }
1103 } else {
1104 return false;
1105 }
1106 }
1107
1108 bool is_immediate =
1109 brw_inst_src0_reg_file(devinfo, src) == BRW_IMMEDIATE_VALUE ||
1110 brw_inst_src1_reg_file(devinfo, src) == BRW_IMMEDIATE_VALUE;
1111 if (is_immediate &&
1112 (devinfo->gen < 6 ||
1113 !is_compactable_immediate(brw_inst_imm_ud(devinfo, src)))) {
1114 return false;
1115 }
1116
1117 if (has_unmapped_bits(devinfo, src))
1118 return false;
1119
1120 memset(&temp, 0, sizeof(temp));
1121
1122 #define compact(field) \
1123 brw_compact_inst_set_##field(devinfo, &temp, brw_inst_##field(devinfo, src))
1124
1125 compact(hw_opcode);
1126 compact(debug_control);
1127
1128 if (!set_control_index(devinfo, &temp, src))
1129 return false;
1130 if (!set_datatype_index(devinfo, &temp, src))
1131 return false;
1132 if (!set_subreg_index(devinfo, &temp, src, is_immediate))
1133 return false;
1134
1135 if (devinfo->gen >= 6) {
1136 compact(acc_wr_control);
1137 } else {
1138 compact(mask_control_ex);
1139 }
1140
1141 compact(cond_modifier);
1142
1143 if (devinfo->gen <= 6)
1144 compact(flag_subreg_nr);
1145
1146 brw_compact_inst_set_cmpt_control(devinfo, &temp, true);
1147
1148 if (!set_src0_index(devinfo, &temp, src))
1149 return false;
1150 if (!set_src1_index(devinfo, &temp, src, is_immediate))
1151 return false;
1152
1153 brw_compact_inst_set_dst_reg_nr(devinfo, &temp,
1154 brw_inst_dst_da_reg_nr(devinfo, src));
1155 brw_compact_inst_set_src0_reg_nr(devinfo, &temp,
1156 brw_inst_src0_da_reg_nr(devinfo, src));
1157
1158 if (is_immediate) {
1159 brw_compact_inst_set_src1_reg_nr(devinfo, &temp,
1160 brw_inst_imm_ud(devinfo, src) & 0xff);
1161 } else {
1162 brw_compact_inst_set_src1_reg_nr(devinfo, &temp,
1163 brw_inst_src1_da_reg_nr(devinfo, src));
1164 }
1165
1166 #undef compact
1167
1168 *dst = temp;
1169
1170 return true;
1171 }
1172
1173 static void
1174 set_uncompacted_control(const struct gen_device_info *devinfo, brw_inst *dst,
1175 brw_compact_inst *src)
1176 {
1177 uint32_t uncompacted =
1178 control_index_table[brw_compact_inst_control_index(devinfo, src)];
1179
1180 if (devinfo->gen >= 8) {
1181 brw_inst_set_bits(dst, 33, 31, (uncompacted >> 16));
1182 brw_inst_set_bits(dst, 23, 12, (uncompacted >> 4) & 0xfff);
1183 brw_inst_set_bits(dst, 10, 9, (uncompacted >> 2) & 0x3);
1184 brw_inst_set_bits(dst, 34, 34, (uncompacted >> 1) & 0x1);
1185 brw_inst_set_bits(dst, 8, 8, (uncompacted >> 0) & 0x1);
1186 } else {
1187 brw_inst_set_bits(dst, 31, 31, (uncompacted >> 16) & 0x1);
1188 brw_inst_set_bits(dst, 23, 8, (uncompacted & 0xffff));
1189
1190 if (devinfo->gen == 7)
1191 brw_inst_set_bits(dst, 90, 89, uncompacted >> 17);
1192 }
1193 }
1194
1195 static void
1196 set_uncompacted_datatype(const struct gen_device_info *devinfo, brw_inst *dst,
1197 brw_compact_inst *src)
1198 {
1199 uint32_t uncompacted =
1200 datatype_table[brw_compact_inst_datatype_index(devinfo, src)];
1201
1202 if (devinfo->gen >= 8) {
1203 brw_inst_set_bits(dst, 63, 61, (uncompacted >> 18));
1204 brw_inst_set_bits(dst, 94, 89, (uncompacted >> 12) & 0x3f);
1205 brw_inst_set_bits(dst, 46, 35, (uncompacted >> 0) & 0xfff);
1206 } else {
1207 brw_inst_set_bits(dst, 63, 61, (uncompacted >> 15));
1208 brw_inst_set_bits(dst, 46, 32, (uncompacted & 0x7fff));
1209 }
1210 }
1211
1212 static void
1213 set_uncompacted_subreg(const struct gen_device_info *devinfo, brw_inst *dst,
1214 brw_compact_inst *src)
1215 {
1216 uint16_t uncompacted =
1217 subreg_table[brw_compact_inst_subreg_index(devinfo, src)];
1218
1219 brw_inst_set_bits(dst, 100, 96, (uncompacted >> 10));
1220 brw_inst_set_bits(dst, 68, 64, (uncompacted >> 5) & 0x1f);
1221 brw_inst_set_bits(dst, 52, 48, (uncompacted >> 0) & 0x1f);
1222 }
1223
1224 static void
1225 set_uncompacted_src0(const struct gen_device_info *devinfo, brw_inst *dst,
1226 brw_compact_inst *src)
1227 {
1228 uint32_t compacted = brw_compact_inst_src0_index(devinfo, src);
1229 uint16_t uncompacted = src0_index_table[compacted];
1230
1231 brw_inst_set_bits(dst, 88, 77, uncompacted);
1232 }
1233
1234 static void
1235 set_uncompacted_src1(const struct gen_device_info *devinfo, brw_inst *dst,
1236 brw_compact_inst *src, bool is_immediate)
1237 {
1238 if (is_immediate) {
1239 signed high5 = brw_compact_inst_src1_index(devinfo, src);
1240 /* Replicate top bit of src1_index into high 20 bits of the immediate. */
1241 brw_inst_set_imm_ud(devinfo, dst, (high5 << 27) >> 19);
1242 } else {
1243 uint16_t uncompacted =
1244 src1_index_table[brw_compact_inst_src1_index(devinfo, src)];
1245
1246 brw_inst_set_bits(dst, 120, 109, uncompacted);
1247 }
1248 }
1249
1250 static void
1251 set_uncompacted_3src_control_index(const struct gen_device_info *devinfo,
1252 brw_inst *dst, brw_compact_inst *src)
1253 {
1254 assert(devinfo->gen >= 8);
1255
1256 uint32_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
1257 uint32_t uncompacted = gen8_3src_control_index_table[compacted];
1258
1259 brw_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7);
1260 brw_inst_set_bits(dst, 28, 8, (uncompacted >> 0) & 0x1fffff);
1261
1262 if (devinfo->gen >= 9 || devinfo->is_cherryview)
1263 brw_inst_set_bits(dst, 36, 35, (uncompacted >> 24) & 0x3);
1264 }
1265
1266 static void
1267 set_uncompacted_3src_source_index(const struct gen_device_info *devinfo,
1268 brw_inst *dst, brw_compact_inst *src)
1269 {
1270 assert(devinfo->gen >= 8);
1271
1272 uint32_t compacted = brw_compact_inst_3src_source_index(devinfo, src);
1273 uint64_t uncompacted = gen8_3src_source_index_table[compacted];
1274
1275 brw_inst_set_bits(dst, 83, 83, (uncompacted >> 43) & 0x1);
1276 brw_inst_set_bits(dst, 114, 107, (uncompacted >> 35) & 0xff);
1277 brw_inst_set_bits(dst, 93, 86, (uncompacted >> 27) & 0xff);
1278 brw_inst_set_bits(dst, 72, 65, (uncompacted >> 19) & 0xff);
1279 brw_inst_set_bits(dst, 55, 37, (uncompacted >> 0) & 0x7ffff);
1280
1281 if (devinfo->gen >= 9 || devinfo->is_cherryview) {
1282 brw_inst_set_bits(dst, 126, 125, (uncompacted >> 47) & 0x3);
1283 brw_inst_set_bits(dst, 105, 104, (uncompacted >> 45) & 0x3);
1284 brw_inst_set_bits(dst, 84, 84, (uncompacted >> 44) & 0x1);
1285 } else {
1286 brw_inst_set_bits(dst, 125, 125, (uncompacted >> 45) & 0x1);
1287 brw_inst_set_bits(dst, 104, 104, (uncompacted >> 44) & 0x1);
1288 }
1289 }
1290
1291 static void
1292 brw_uncompact_3src_instruction(const struct gen_device_info *devinfo,
1293 brw_inst *dst, brw_compact_inst *src)
1294 {
1295 assert(devinfo->gen >= 8);
1296
1297 #define uncompact(field) \
1298 brw_inst_set_3src_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
1299 #define uncompact_a16(field) \
1300 brw_inst_set_3src_a16_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
1301
1302 uncompact(hw_opcode);
1303
1304 set_uncompacted_3src_control_index(devinfo, dst, src);
1305 set_uncompacted_3src_source_index(devinfo, dst, src);
1306
1307 uncompact(dst_reg_nr);
1308 uncompact_a16(src0_rep_ctrl);
1309 brw_inst_set_3src_cmpt_control(devinfo, dst, false);
1310 uncompact(debug_control);
1311 uncompact(saturate);
1312 uncompact_a16(src1_rep_ctrl);
1313 uncompact_a16(src2_rep_ctrl);
1314 uncompact(src0_reg_nr);
1315 uncompact(src1_reg_nr);
1316 uncompact(src2_reg_nr);
1317 uncompact_a16(src0_subreg_nr);
1318 uncompact_a16(src1_subreg_nr);
1319 uncompact_a16(src2_subreg_nr);
1320
1321 #undef uncompact
1322 #undef uncompact_a16
1323 }
1324
1325 void
1326 brw_uncompact_instruction(const struct gen_device_info *devinfo, brw_inst *dst,
1327 brw_compact_inst *src)
1328 {
1329 memset(dst, 0, sizeof(*dst));
1330
1331 if (devinfo->gen >= 8 &&
1332 is_3src(devinfo, brw_opcode_decode(
1333 devinfo, brw_compact_inst_3src_hw_opcode(devinfo, src)))) {
1334 brw_uncompact_3src_instruction(devinfo, dst, src);
1335 return;
1336 }
1337
1338 #define uncompact(field) \
1339 brw_inst_set_##field(devinfo, dst, brw_compact_inst_##field(devinfo, src))
1340
1341 uncompact(hw_opcode);
1342 uncompact(debug_control);
1343
1344 set_uncompacted_control(devinfo, dst, src);
1345 set_uncompacted_datatype(devinfo, dst, src);
1346
1347 /* src0/1 register file fields are in the datatype table. */
1348 bool is_immediate = brw_inst_src0_reg_file(devinfo, dst) == BRW_IMMEDIATE_VALUE ||
1349 brw_inst_src1_reg_file(devinfo, dst) == BRW_IMMEDIATE_VALUE;
1350
1351 set_uncompacted_subreg(devinfo, dst, src);
1352
1353 if (devinfo->gen >= 6) {
1354 uncompact(acc_wr_control);
1355 } else {
1356 uncompact(mask_control_ex);
1357 }
1358
1359 uncompact(cond_modifier);
1360
1361 if (devinfo->gen <= 6)
1362 uncompact(flag_subreg_nr);
1363
1364 set_uncompacted_src0(devinfo, dst, src);
1365 set_uncompacted_src1(devinfo, dst, src, is_immediate);
1366
1367 brw_inst_set_dst_da_reg_nr(devinfo, dst,
1368 brw_compact_inst_dst_reg_nr(devinfo, src));
1369 brw_inst_set_src0_da_reg_nr(devinfo, dst,
1370 brw_compact_inst_src0_reg_nr(devinfo, src));
1371
1372 if (is_immediate) {
1373 brw_inst_set_imm_ud(devinfo, dst,
1374 brw_inst_imm_ud(devinfo, dst) |
1375 brw_compact_inst_src1_reg_nr(devinfo, src));
1376 } else {
1377 brw_inst_set_src1_da_reg_nr(devinfo, dst,
1378 brw_compact_inst_src1_reg_nr(devinfo, src));
1379 }
1380
1381 #undef uncompact
1382 }
1383
1384 void brw_debug_compact_uncompact(const struct gen_device_info *devinfo,
1385 brw_inst *orig,
1386 brw_inst *uncompacted)
1387 {
1388 fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n",
1389 devinfo->gen);
1390
1391 fprintf(stderr, " before: ");
1392 brw_disassemble_inst(stderr, devinfo, orig, true);
1393
1394 fprintf(stderr, " after: ");
1395 brw_disassemble_inst(stderr, devinfo, uncompacted, false);
1396
1397 uint32_t *before_bits = (uint32_t *)orig;
1398 uint32_t *after_bits = (uint32_t *)uncompacted;
1399 fprintf(stderr, " changed bits:\n");
1400 for (int i = 0; i < 128; i++) {
1401 uint32_t before = before_bits[i / 32] & (1 << (i & 31));
1402 uint32_t after = after_bits[i / 32] & (1 << (i & 31));
1403
1404 if (before != after) {
1405 fprintf(stderr, " bit %d, %s to %s\n", i,
1406 before ? "set" : "unset",
1407 after ? "set" : "unset");
1408 }
1409 }
1410 }
1411
1412 static int
1413 compacted_between(int old_ip, int old_target_ip, int *compacted_counts)
1414 {
1415 int this_compacted_count = compacted_counts[old_ip];
1416 int target_compacted_count = compacted_counts[old_target_ip];
1417 return target_compacted_count - this_compacted_count;
1418 }
1419
1420 static void
1421 update_uip_jip(const struct gen_device_info *devinfo, brw_inst *insn,
1422 int this_old_ip, int *compacted_counts)
1423 {
1424 /* JIP and UIP are in units of:
1425 * - bytes on Gen8+; and
1426 * - compacted instructions on Gen6+.
1427 */
1428 int shift = devinfo->gen >= 8 ? 3 : 0;
1429
1430 int32_t jip_compacted = brw_inst_jip(devinfo, insn) >> shift;
1431 jip_compacted -= compacted_between(this_old_ip,
1432 this_old_ip + (jip_compacted / 2),
1433 compacted_counts);
1434 brw_inst_set_jip(devinfo, insn, jip_compacted << shift);
1435
1436 if (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_ENDIF ||
1437 brw_inst_opcode(devinfo, insn) == BRW_OPCODE_WHILE ||
1438 (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_ELSE && devinfo->gen <= 7))
1439 return;
1440
1441 int32_t uip_compacted = brw_inst_uip(devinfo, insn) >> shift;
1442 uip_compacted -= compacted_between(this_old_ip,
1443 this_old_ip + (uip_compacted / 2),
1444 compacted_counts);
1445 brw_inst_set_uip(devinfo, insn, uip_compacted << shift);
1446 }
1447
1448 static void
1449 update_gen4_jump_count(const struct gen_device_info *devinfo, brw_inst *insn,
1450 int this_old_ip, int *compacted_counts)
1451 {
1452 assert(devinfo->gen == 5 || devinfo->is_g4x);
1453
1454 /* Jump Count is in units of:
1455 * - uncompacted instructions on G45; and
1456 * - compacted instructions on Gen5.
1457 */
1458 int shift = devinfo->is_g4x ? 1 : 0;
1459
1460 int jump_count_compacted = brw_inst_gen4_jump_count(devinfo, insn) << shift;
1461
1462 int target_old_ip = this_old_ip + (jump_count_compacted / 2);
1463
1464 int this_compacted_count = compacted_counts[this_old_ip];
1465 int target_compacted_count = compacted_counts[target_old_ip];
1466
1467 jump_count_compacted -= (target_compacted_count - this_compacted_count);
1468 brw_inst_set_gen4_jump_count(devinfo, insn, jump_count_compacted >> shift);
1469 }
1470
1471 void
1472 brw_init_compaction_tables(const struct gen_device_info *devinfo)
1473 {
1474 assert(g45_control_index_table[ARRAY_SIZE(g45_control_index_table) - 1] != 0);
1475 assert(g45_datatype_table[ARRAY_SIZE(g45_datatype_table) - 1] != 0);
1476 assert(g45_subreg_table[ARRAY_SIZE(g45_subreg_table) - 1] != 0);
1477 assert(g45_src_index_table[ARRAY_SIZE(g45_src_index_table) - 1] != 0);
1478 assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0);
1479 assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0);
1480 assert(gen6_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0);
1481 assert(gen6_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0);
1482 assert(gen7_control_index_table[ARRAY_SIZE(gen7_control_index_table) - 1] != 0);
1483 assert(gen7_datatype_table[ARRAY_SIZE(gen7_datatype_table) - 1] != 0);
1484 assert(gen7_subreg_table[ARRAY_SIZE(gen7_subreg_table) - 1] != 0);
1485 assert(gen7_src_index_table[ARRAY_SIZE(gen7_src_index_table) - 1] != 0);
1486 assert(gen8_control_index_table[ARRAY_SIZE(gen8_control_index_table) - 1] != 0);
1487 assert(gen8_datatype_table[ARRAY_SIZE(gen8_datatype_table) - 1] != 0);
1488 assert(gen8_subreg_table[ARRAY_SIZE(gen8_subreg_table) - 1] != 0);
1489 assert(gen8_src_index_table[ARRAY_SIZE(gen8_src_index_table) - 1] != 0);
1490 assert(gen11_datatype_table[ARRAY_SIZE(gen11_datatype_table) - 1] != 0);
1491
1492 switch (devinfo->gen) {
1493 case 12:
1494 control_index_table = NULL;
1495 datatype_table = NULL;
1496 subreg_table = NULL;
1497 src0_index_table = NULL;
1498 src1_index_table = NULL;
1499 break;
1500 case 11:
1501 control_index_table = gen8_control_index_table;
1502 datatype_table = gen11_datatype_table;
1503 subreg_table = gen8_subreg_table;
1504 src0_index_table = gen8_src_index_table;
1505 src1_index_table = gen8_src_index_table;
1506 break;
1507 case 10:
1508 case 9:
1509 case 8:
1510 control_index_table = gen8_control_index_table;
1511 datatype_table = gen8_datatype_table;
1512 subreg_table = gen8_subreg_table;
1513 src0_index_table = gen8_src_index_table;
1514 src1_index_table = gen8_src_index_table;
1515 break;
1516 case 7:
1517 control_index_table = gen7_control_index_table;
1518 datatype_table = gen7_datatype_table;
1519 subreg_table = gen7_subreg_table;
1520 src0_index_table = gen7_src_index_table;
1521 src1_index_table = gen7_src_index_table;
1522 break;
1523 case 6:
1524 control_index_table = gen6_control_index_table;
1525 datatype_table = gen6_datatype_table;
1526 subreg_table = gen6_subreg_table;
1527 src0_index_table = gen6_src_index_table;
1528 src1_index_table = gen6_src_index_table;
1529 break;
1530 case 5:
1531 case 4:
1532 control_index_table = g45_control_index_table;
1533 datatype_table = g45_datatype_table;
1534 subreg_table = g45_subreg_table;
1535 src0_index_table = g45_src_index_table;
1536 src1_index_table = g45_src_index_table;
1537 break;
1538 default:
1539 unreachable("unknown generation");
1540 }
1541 }
1542
1543 void
1544 brw_compact_instructions(struct brw_codegen *p, int start_offset,
1545 struct disasm_info *disasm)
1546 {
1547 if (unlikely(INTEL_DEBUG & DEBUG_NO_COMPACTION) || p->devinfo->gen > 11)
1548 return;
1549
1550 const struct gen_device_info *devinfo = p->devinfo;
1551 void *store = p->store + start_offset / 16;
1552 /* For an instruction at byte offset 16*i before compaction, this is the
1553 * number of compacted instructions minus the number of padding NOP/NENOPs
1554 * that preceded it.
1555 */
1556 int compacted_counts[(p->next_insn_offset - start_offset) / sizeof(brw_inst)];
1557 /* For an instruction at byte offset 8*i after compaction, this was its IP
1558 * (in 16-byte units) before compaction.
1559 */
1560 int old_ip[(p->next_insn_offset - start_offset) / sizeof(brw_compact_inst) + 1];
1561
1562 if (devinfo->gen == 4 && !devinfo->is_g4x)
1563 return;
1564
1565 int offset = 0;
1566 int compacted_count = 0;
1567 for (int src_offset = 0; src_offset < p->next_insn_offset - start_offset;
1568 src_offset += sizeof(brw_inst)) {
1569 brw_inst *src = store + src_offset;
1570 void *dst = store + offset;
1571
1572 old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
1573 compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
1574
1575 brw_inst inst = precompact(devinfo, *src);
1576 brw_inst saved = inst;
1577
1578 if (brw_try_compact_instruction(devinfo, dst, &inst)) {
1579 compacted_count++;
1580
1581 if (INTEL_DEBUG) {
1582 brw_inst uncompacted;
1583 brw_uncompact_instruction(devinfo, &uncompacted, dst);
1584 if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {
1585 brw_debug_compact_uncompact(devinfo, &saved, &uncompacted);
1586 }
1587 }
1588
1589 offset += sizeof(brw_compact_inst);
1590 } else {
1591 /* All uncompacted instructions need to be aligned on G45. */
1592 if ((offset & sizeof(brw_compact_inst)) != 0 && devinfo->is_g4x){
1593 brw_compact_inst *align = store + offset;
1594 memset(align, 0, sizeof(*align));
1595 brw_compact_inst_set_hw_opcode(
1596 devinfo, align, brw_opcode_encode(devinfo, BRW_OPCODE_NENOP));
1597 brw_compact_inst_set_cmpt_control(devinfo, align, true);
1598 offset += sizeof(brw_compact_inst);
1599 compacted_count--;
1600 compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
1601 old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
1602
1603 dst = store + offset;
1604 }
1605
1606 /* If we didn't compact this intruction, we need to move it down into
1607 * place.
1608 */
1609 if (offset != src_offset) {
1610 memmove(dst, src, sizeof(brw_inst));
1611 }
1612 offset += sizeof(brw_inst);
1613 }
1614 }
1615
1616 /* Add an entry for the ending offset of the program. This greatly
1617 * simplifies the linked list walk at the end of the function.
1618 */
1619 old_ip[offset / sizeof(brw_compact_inst)] =
1620 (p->next_insn_offset - start_offset) / sizeof(brw_inst);
1621
1622 /* Fix up control flow offsets. */
1623 p->next_insn_offset = start_offset + offset;
1624 for (offset = 0; offset < p->next_insn_offset - start_offset;
1625 offset = next_offset(devinfo, store, offset)) {
1626 brw_inst *insn = store + offset;
1627 int this_old_ip = old_ip[offset / sizeof(brw_compact_inst)];
1628 int this_compacted_count = compacted_counts[this_old_ip];
1629
1630 switch (brw_inst_opcode(devinfo, insn)) {
1631 case BRW_OPCODE_BREAK:
1632 case BRW_OPCODE_CONTINUE:
1633 case BRW_OPCODE_HALT:
1634 if (devinfo->gen >= 6) {
1635 update_uip_jip(devinfo, insn, this_old_ip, compacted_counts);
1636 } else {
1637 update_gen4_jump_count(devinfo, insn, this_old_ip,
1638 compacted_counts);
1639 }
1640 break;
1641
1642 case BRW_OPCODE_IF:
1643 case BRW_OPCODE_IFF:
1644 case BRW_OPCODE_ELSE:
1645 case BRW_OPCODE_ENDIF:
1646 case BRW_OPCODE_WHILE:
1647 if (devinfo->gen >= 7) {
1648 if (brw_inst_cmpt_control(devinfo, insn)) {
1649 brw_inst uncompacted;
1650 brw_uncompact_instruction(devinfo, &uncompacted,
1651 (brw_compact_inst *)insn);
1652
1653 update_uip_jip(devinfo, &uncompacted, this_old_ip,
1654 compacted_counts);
1655
1656 bool ret = brw_try_compact_instruction(devinfo,
1657 (brw_compact_inst *)insn,
1658 &uncompacted);
1659 assert(ret); (void)ret;
1660 } else {
1661 update_uip_jip(devinfo, insn, this_old_ip, compacted_counts);
1662 }
1663 } else if (devinfo->gen == 6) {
1664 assert(!brw_inst_cmpt_control(devinfo, insn));
1665
1666 /* Jump Count is in units of compacted instructions on Gen6. */
1667 int jump_count_compacted = brw_inst_gen6_jump_count(devinfo, insn);
1668
1669 int target_old_ip = this_old_ip + (jump_count_compacted / 2);
1670 int target_compacted_count = compacted_counts[target_old_ip];
1671 jump_count_compacted -= (target_compacted_count - this_compacted_count);
1672 brw_inst_set_gen6_jump_count(devinfo, insn, jump_count_compacted);
1673 } else {
1674 update_gen4_jump_count(devinfo, insn, this_old_ip,
1675 compacted_counts);
1676 }
1677 break;
1678
1679 case BRW_OPCODE_ADD:
1680 /* Add instructions modifying the IP register use an immediate src1,
1681 * and Gens that use this cannot compact instructions with immediate
1682 * operands.
1683 */
1684 if (brw_inst_cmpt_control(devinfo, insn))
1685 break;
1686
1687 if (brw_inst_dst_reg_file(devinfo, insn) == BRW_ARCHITECTURE_REGISTER_FILE &&
1688 brw_inst_dst_da_reg_nr(devinfo, insn) == BRW_ARF_IP) {
1689 assert(brw_inst_src1_reg_file(devinfo, insn) == BRW_IMMEDIATE_VALUE);
1690
1691 int shift = 3;
1692 int jump_compacted = brw_inst_imm_d(devinfo, insn) >> shift;
1693
1694 int target_old_ip = this_old_ip + (jump_compacted / 2);
1695 int target_compacted_count = compacted_counts[target_old_ip];
1696 jump_compacted -= (target_compacted_count - this_compacted_count);
1697 brw_inst_set_imm_ud(devinfo, insn, jump_compacted << shift);
1698 }
1699 break;
1700
1701 default:
1702 break;
1703 }
1704 }
1705
1706 /* p->nr_insn is counting the number of uncompacted instructions still, so
1707 * divide. We do want to be sure there's a valid instruction in any
1708 * alignment padding, so that the next compression pass (for the FS 8/16
1709 * compile passes) parses correctly.
1710 */
1711 if (p->next_insn_offset & sizeof(brw_compact_inst)) {
1712 brw_compact_inst *align = store + offset;
1713 memset(align, 0, sizeof(*align));
1714 brw_compact_inst_set_hw_opcode(
1715 devinfo, align, brw_opcode_encode(devinfo, BRW_OPCODE_NOP));
1716 brw_compact_inst_set_cmpt_control(devinfo, align, true);
1717 p->next_insn_offset += sizeof(brw_compact_inst);
1718 }
1719 p->nr_insn = p->next_insn_offset / sizeof(brw_inst);
1720
1721 /* Update the instruction offsets for each group. */
1722 if (disasm) {
1723 int offset = 0;
1724
1725 foreach_list_typed(struct inst_group, group, link, &disasm->group_list) {
1726 while (start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
1727 sizeof(brw_inst) != group->offset) {
1728 assert(start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
1729 sizeof(brw_inst) < group->offset);
1730 offset = next_offset(devinfo, store, offset);
1731 }
1732
1733 group->offset = start_offset + offset;
1734
1735 offset = next_offset(devinfo, store, offset);
1736 }
1737 }
1738 }