23aac0a988f9a375f3c6082af4dbadfbaa3a68d3
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_compact.c
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_eu_compact.c
25 *
26 * Instruction compaction is a feature of G45 and newer hardware that allows
27 * for a smaller instruction encoding.
28 *
29 * The instruction cache is on the order of 32KB, and many programs generate
30 * far more instructions than that. The instruction cache is built to barely
31 * keep up with instruction dispatch ability in cache hit cases -- L1
32 * instruction cache misses that still hit in the next level could limit
33 * throughput by around 50%.
34 *
35 * The idea of instruction compaction is that most instructions use a tiny
36 * subset of the GPU functionality, so we can encode what would be a 16 byte
37 * instruction in 8 bytes using some lookup tables for various fields.
38 *
39 *
40 * Instruction compaction capabilities vary subtly by generation.
41 *
42 * G45's support for instruction compaction is very limited. Jump counts on
43 * this generation are in units of 16-byte uncompacted instructions. As such,
44 * all jump targets must be 16-byte aligned. Also, all instructions must be
45 * naturally aligned, i.e. uncompacted instructions must be 16-byte aligned.
46 * A G45-only instruction, NENOP, must be used to provide padding to align
47 * uncompacted instructions.
48 *
49 * Gen5 removes these restrictions and changes jump counts to be in units of
50 * 8-byte compacted instructions, allowing jump targets to be only 8-byte
51 * aligned. Uncompacted instructions can also be placed on 8-byte boundaries.
52 *
53 * Gen6 adds the ability to compact instructions with a limited range of
54 * immediate values. Compactable immediates have 12 unrestricted bits, and a
55 * 13th bit that's replicated through the high 20 bits, to create the 32-bit
56 * value of DW3 in the uncompacted instruction word.
57 *
58 * On Gen7 we can compact some control flow instructions with a small positive
59 * immediate in the low bits of DW3, like ENDIF with the JIP field. Other
60 * control flow instructions with UIP cannot be compacted, because of the
61 * replicated 13th bit. No control flow instructions can be compacted on Gen6
62 * since the jump count field is not in DW3.
63 *
64 * break JIP/UIP
65 * cont JIP/UIP
66 * halt JIP/UIP
67 * if JIP/UIP
68 * else JIP (plus UIP on BDW+)
69 * endif JIP
70 * while JIP (must be negative)
71 *
72 * Gen 8 adds support for compacting 3-src instructions.
73 */
74
75 #include "brw_context.h"
76 #include "brw_eu.h"
77 #include "intel_asm_annotation.h"
78 #include "util/u_atomic.h" /* for p_atomic_cmpxchg */
79
80 static const uint32_t g45_control_index_table[32] = {
81 0b00000000000000000,
82 0b01000000000000000,
83 0b00110000000000000,
84 0b00000000000000010,
85 0b00100000000000000,
86 0b00010000000000000,
87 0b01000000000100000,
88 0b01000000100000000,
89 0b01010000000100000,
90 0b00000000100000010,
91 0b11000000000000000,
92 0b00001000100000010,
93 0b01001000100000000,
94 0b00000000100000000,
95 0b11000000000100000,
96 0b00001000100000000,
97 0b10110000000000000,
98 0b11010000000100000,
99 0b00110000100000000,
100 0b00100000100000000,
101 0b01000000000001000,
102 0b01000000000000100,
103 0b00111100000000000,
104 0b00101011000000000,
105 0b00110000000010000,
106 0b00010000100000000,
107 0b01000000000100100,
108 0b01000000000101000,
109 0b00110000000000110,
110 0b00000000000001010,
111 0b01010000000101000,
112 0b01010000000100100
113 };
114
115 static const uint32_t g45_datatype_table[32] = {
116 0b001000000000100001,
117 0b001011010110101101,
118 0b001000001000110001,
119 0b001111011110111101,
120 0b001011010110101100,
121 0b001000000110101101,
122 0b001000000000100000,
123 0b010100010110110001,
124 0b001100011000101101,
125 0b001000000000100010,
126 0b001000001000110110,
127 0b010000001000110001,
128 0b001000001000110010,
129 0b011000001000110010,
130 0b001111011110111100,
131 0b001000000100101000,
132 0b010100011000110001,
133 0b001010010100101001,
134 0b001000001000101001,
135 0b010000001000110110,
136 0b101000001000110001,
137 0b001011011000101101,
138 0b001000000100001001,
139 0b001011011000101100,
140 0b110100011000110001,
141 0b001000001110111101,
142 0b110000001000110001,
143 0b011000000100101010,
144 0b101000001000101001,
145 0b001011010110001100,
146 0b001000000110100001,
147 0b001010010100001000
148 };
149
150 static const uint16_t g45_subreg_table[32] = {
151 0b000000000000000,
152 0b000000010000000,
153 0b000001000000000,
154 0b000100000000000,
155 0b000000000100000,
156 0b100000000000000,
157 0b000000000010000,
158 0b001100000000000,
159 0b001010000000000,
160 0b000000100000000,
161 0b001000000000000,
162 0b000000000001000,
163 0b000000001000000,
164 0b000000000000001,
165 0b000010000000000,
166 0b000000010100000,
167 0b000000000000111,
168 0b000001000100000,
169 0b011000000000000,
170 0b000000110000000,
171 0b000000000000010,
172 0b000000000000100,
173 0b000000001100000,
174 0b000100000000010,
175 0b001110011000110,
176 0b001110100001000,
177 0b000110011000110,
178 0b000001000011000,
179 0b000110010000100,
180 0b001100000000110,
181 0b000000010000110,
182 0b000001000110000
183 };
184
185 static const uint16_t g45_src_index_table[32] = {
186 0b000000000000,
187 0b010001101000,
188 0b010110001000,
189 0b011010010000,
190 0b001101001000,
191 0b010110001010,
192 0b010101110000,
193 0b011001111000,
194 0b001000101000,
195 0b000000101000,
196 0b010001010000,
197 0b111101101100,
198 0b010110001100,
199 0b010001101100,
200 0b011010010100,
201 0b010001001100,
202 0b001100101000,
203 0b000000000010,
204 0b111101001100,
205 0b011001101000,
206 0b010101001000,
207 0b000000000100,
208 0b000000101100,
209 0b010001101010,
210 0b000000111000,
211 0b010101011000,
212 0b000100100000,
213 0b010110000000,
214 0b010000000100,
215 0b010000111000,
216 0b000101100000,
217 0b111101110100
218 };
219
220 static const uint32_t gen6_control_index_table[32] = {
221 0b00000000000000000,
222 0b01000000000000000,
223 0b00110000000000000,
224 0b00000000100000000,
225 0b00010000000000000,
226 0b00001000100000000,
227 0b00000000100000010,
228 0b00000000000000010,
229 0b01000000100000000,
230 0b01010000000000000,
231 0b10110000000000000,
232 0b00100000000000000,
233 0b11010000000000000,
234 0b11000000000000000,
235 0b01001000100000000,
236 0b01000000000001000,
237 0b01000000000000100,
238 0b00000000000001000,
239 0b00000000000000100,
240 0b00111000100000000,
241 0b00001000100000010,
242 0b00110000100000000,
243 0b00110000000000001,
244 0b00100000000000001,
245 0b00110000000000010,
246 0b00110000000000101,
247 0b00110000000001001,
248 0b00110000000010000,
249 0b00110000000000011,
250 0b00110000000000100,
251 0b00110000100001000,
252 0b00100000000001001
253 };
254
255 static const uint32_t gen6_datatype_table[32] = {
256 0b001001110000000000,
257 0b001000110000100000,
258 0b001001110000000001,
259 0b001000000001100000,
260 0b001010110100101001,
261 0b001000000110101101,
262 0b001100011000101100,
263 0b001011110110101101,
264 0b001000000111101100,
265 0b001000000001100001,
266 0b001000110010100101,
267 0b001000000001000001,
268 0b001000001000110001,
269 0b001000001000101001,
270 0b001000000000100000,
271 0b001000001000110010,
272 0b001010010100101001,
273 0b001011010010100101,
274 0b001000000110100101,
275 0b001100011000101001,
276 0b001011011000101100,
277 0b001011010110100101,
278 0b001011110110100101,
279 0b001111011110111101,
280 0b001111011110111100,
281 0b001111011110111101,
282 0b001111011110011101,
283 0b001111011110111110,
284 0b001000000000100001,
285 0b001000000000100010,
286 0b001001111111011101,
287 0b001000001110111110,
288 };
289
290 static const uint16_t gen6_subreg_table[32] = {
291 0b000000000000000,
292 0b000000000000100,
293 0b000000110000000,
294 0b111000000000000,
295 0b011110000001000,
296 0b000010000000000,
297 0b000000000010000,
298 0b000110000001100,
299 0b001000000000000,
300 0b000001000000000,
301 0b000001010010100,
302 0b000000001010110,
303 0b010000000000000,
304 0b110000000000000,
305 0b000100000000000,
306 0b000000010000000,
307 0b000000000001000,
308 0b100000000000000,
309 0b000001010000000,
310 0b001010000000000,
311 0b001100000000000,
312 0b000000001010100,
313 0b101101010010100,
314 0b010100000000000,
315 0b000000010001111,
316 0b011000000000000,
317 0b111110000000000,
318 0b101000000000000,
319 0b000000000001111,
320 0b000100010001111,
321 0b001000010001111,
322 0b000110000000000,
323 };
324
325 static const uint16_t gen6_src_index_table[32] = {
326 0b000000000000,
327 0b010110001000,
328 0b010001101000,
329 0b001000101000,
330 0b011010010000,
331 0b000100100000,
332 0b010001101100,
333 0b010101110000,
334 0b011001111000,
335 0b001100101000,
336 0b010110001100,
337 0b001000100000,
338 0b010110001010,
339 0b000000000010,
340 0b010101010000,
341 0b010101101000,
342 0b111101001100,
343 0b111100101100,
344 0b011001110000,
345 0b010110001001,
346 0b010101011000,
347 0b001101001000,
348 0b010000101100,
349 0b010000000000,
350 0b001101110000,
351 0b001100010000,
352 0b001100000000,
353 0b010001101010,
354 0b001101111000,
355 0b000001110000,
356 0b001100100000,
357 0b001101010000,
358 };
359
360 static const uint32_t gen7_control_index_table[32] = {
361 0b0000000000000000010,
362 0b0000100000000000000,
363 0b0000100000000000001,
364 0b0000100000000000010,
365 0b0000100000000000011,
366 0b0000100000000000100,
367 0b0000100000000000101,
368 0b0000100000000000111,
369 0b0000100000000001000,
370 0b0000100000000001001,
371 0b0000100000000001101,
372 0b0000110000000000000,
373 0b0000110000000000001,
374 0b0000110000000000010,
375 0b0000110000000000011,
376 0b0000110000000000100,
377 0b0000110000000000101,
378 0b0000110000000000111,
379 0b0000110000000001001,
380 0b0000110000000001101,
381 0b0000110000000010000,
382 0b0000110000100000000,
383 0b0001000000000000000,
384 0b0001000000000000010,
385 0b0001000000000000100,
386 0b0001000000100000000,
387 0b0010110000000000000,
388 0b0010110000000010000,
389 0b0011000000000000000,
390 0b0011000000100000000,
391 0b0101000000000000000,
392 0b0101000000100000000
393 };
394
395 static const uint32_t gen7_datatype_table[32] = {
396 0b001000000000000001,
397 0b001000000000100000,
398 0b001000000000100001,
399 0b001000000001100001,
400 0b001000000010111101,
401 0b001000001011111101,
402 0b001000001110100001,
403 0b001000001110100101,
404 0b001000001110111101,
405 0b001000010000100001,
406 0b001000110000100000,
407 0b001000110000100001,
408 0b001001010010100101,
409 0b001001110010100100,
410 0b001001110010100101,
411 0b001111001110111101,
412 0b001111011110011101,
413 0b001111011110111100,
414 0b001111011110111101,
415 0b001111111110111100,
416 0b000000001000001100,
417 0b001000000000111101,
418 0b001000000010100101,
419 0b001000010000100000,
420 0b001001010010100100,
421 0b001001110010000100,
422 0b001010010100001001,
423 0b001101111110111101,
424 0b001111111110111101,
425 0b001011110110101100,
426 0b001010010100101000,
427 0b001010110100101000
428 };
429
430 static const uint16_t gen7_subreg_table[32] = {
431 0b000000000000000,
432 0b000000000000001,
433 0b000000000001000,
434 0b000000000001111,
435 0b000000000010000,
436 0b000000010000000,
437 0b000000100000000,
438 0b000000110000000,
439 0b000001000000000,
440 0b000001000010000,
441 0b000010100000000,
442 0b001000000000000,
443 0b001000000000001,
444 0b001000010000001,
445 0b001000010000010,
446 0b001000010000011,
447 0b001000010000100,
448 0b001000010000111,
449 0b001000010001000,
450 0b001000010001110,
451 0b001000010001111,
452 0b001000110000000,
453 0b001000111101000,
454 0b010000000000000,
455 0b010000110000000,
456 0b011000000000000,
457 0b011110010000111,
458 0b100000000000000,
459 0b101000000000000,
460 0b110000000000000,
461 0b111000000000000,
462 0b111000000011100
463 };
464
465 static const uint16_t gen7_src_index_table[32] = {
466 0b000000000000,
467 0b000000000010,
468 0b000000010000,
469 0b000000010010,
470 0b000000011000,
471 0b000000100000,
472 0b000000101000,
473 0b000001001000,
474 0b000001010000,
475 0b000001110000,
476 0b000001111000,
477 0b001100000000,
478 0b001100000010,
479 0b001100001000,
480 0b001100010000,
481 0b001100010010,
482 0b001100100000,
483 0b001100101000,
484 0b001100111000,
485 0b001101000000,
486 0b001101000010,
487 0b001101001000,
488 0b001101010000,
489 0b001101100000,
490 0b001101101000,
491 0b001101110000,
492 0b001101110001,
493 0b001101111000,
494 0b010001101000,
495 0b010001101001,
496 0b010001101010,
497 0b010110001000
498 };
499
500 static const uint32_t gen8_control_index_table[32] = {
501 0b0000000000000000010,
502 0b0000100000000000000,
503 0b0000100000000000001,
504 0b0000100000000000010,
505 0b0000100000000000011,
506 0b0000100000000000100,
507 0b0000100000000000101,
508 0b0000100000000000111,
509 0b0000100000000001000,
510 0b0000100000000001001,
511 0b0000100000000001101,
512 0b0000110000000000000,
513 0b0000110000000000001,
514 0b0000110000000000010,
515 0b0000110000000000011,
516 0b0000110000000000100,
517 0b0000110000000000101,
518 0b0000110000000000111,
519 0b0000110000000001001,
520 0b0000110000000001101,
521 0b0000110000000010000,
522 0b0000110000100000000,
523 0b0001000000000000000,
524 0b0001000000000000010,
525 0b0001000000000000100,
526 0b0001000000100000000,
527 0b0010110000000000000,
528 0b0010110000000010000,
529 0b0011000000000000000,
530 0b0011000000100000000,
531 0b0101000000000000000,
532 0b0101000000100000000
533 };
534
535 static const uint32_t gen8_datatype_table[32] = {
536 0b001000000000000000001,
537 0b001000000000001000000,
538 0b001000000000001000001,
539 0b001000000000011000001,
540 0b001000000000101011101,
541 0b001000000010111011101,
542 0b001000000011101000001,
543 0b001000000011101000101,
544 0b001000000011101011101,
545 0b001000001000001000001,
546 0b001000011000001000000,
547 0b001000011000001000001,
548 0b001000101000101000101,
549 0b001000111000101000100,
550 0b001000111000101000101,
551 0b001011100011101011101,
552 0b001011101011100011101,
553 0b001011101011101011100,
554 0b001011101011101011101,
555 0b001011111011101011100,
556 0b000000000010000001100,
557 0b001000000000001011101,
558 0b001000000000101000101,
559 0b001000001000001000000,
560 0b001000101000101000100,
561 0b001000111000100000100,
562 0b001001001001000001001,
563 0b001010111011101011101,
564 0b001011111011101011101,
565 0b001001111001101001100,
566 0b001001001001001001000,
567 0b001001011001001001000
568 };
569
570 static const uint16_t gen8_subreg_table[32] = {
571 0b000000000000000,
572 0b000000000000001,
573 0b000000000001000,
574 0b000000000001111,
575 0b000000000010000,
576 0b000000010000000,
577 0b000000100000000,
578 0b000000110000000,
579 0b000001000000000,
580 0b000001000010000,
581 0b000001010000000,
582 0b001000000000000,
583 0b001000000000001,
584 0b001000010000001,
585 0b001000010000010,
586 0b001000010000011,
587 0b001000010000100,
588 0b001000010000111,
589 0b001000010001000,
590 0b001000010001110,
591 0b001000010001111,
592 0b001000110000000,
593 0b001000111101000,
594 0b010000000000000,
595 0b010000110000000,
596 0b011000000000000,
597 0b011110010000111,
598 0b100000000000000,
599 0b101000000000000,
600 0b110000000000000,
601 0b111000000000000,
602 0b111000000011100
603 };
604
605 static const uint16_t gen8_src_index_table[32] = {
606 0b000000000000,
607 0b000000000010,
608 0b000000010000,
609 0b000000010010,
610 0b000000011000,
611 0b000000100000,
612 0b000000101000,
613 0b000001001000,
614 0b000001010000,
615 0b000001110000,
616 0b000001111000,
617 0b001100000000,
618 0b001100000010,
619 0b001100001000,
620 0b001100010000,
621 0b001100010010,
622 0b001100100000,
623 0b001100101000,
624 0b001100111000,
625 0b001101000000,
626 0b001101000010,
627 0b001101001000,
628 0b001101010000,
629 0b001101100000,
630 0b001101101000,
631 0b001101110000,
632 0b001101110001,
633 0b001101111000,
634 0b010001101000,
635 0b010001101001,
636 0b010001101010,
637 0b010110001000
638 };
639
640 /* This is actually the control index table for Cherryview (26 bits), but the
641 * only difference from Broadwell (24 bits) is that it has two extra 0-bits at
642 * the start.
643 *
644 * The low 24 bits have the same mappings on both hardware.
645 */
646 static const uint32_t gen8_3src_control_index_table[4] = {
647 0b00100000000110000000000001,
648 0b00000000000110000000000001,
649 0b00000000001000000000000001,
650 0b00000000001000000000100001
651 };
652
653 /* This is actually the control index table for Cherryview (49 bits), but the
654 * only difference from Broadwell (46 bits) is that it has three extra 0-bits
655 * at the start.
656 *
657 * The low 44 bits have the same mappings on both hardware, and since the high
658 * three bits on Broadwell are zero, we can reuse Cherryview's table.
659 */
660 static const uint64_t gen8_3src_source_index_table[4] = {
661 0b0000001110010011100100111001000001111000000000000,
662 0b0000001110010011100100111001000001111000000000010,
663 0b0000001110010011100100111001000001111000000001000,
664 0b0000001110010011100100111001000001111000000100000
665 };
666
667 static const uint32_t *control_index_table;
668 static const uint32_t *datatype_table;
669 static const uint16_t *subreg_table;
670 static const uint16_t *src_index_table;
671
672 static bool
673 set_control_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src)
674 {
675 uint32_t uncompacted = brw->gen >= 8 /* 17b/G45; 19b/IVB+ */
676 ? (brw_inst_bits(src, 33, 31) << 16) | /* 3b */
677 (brw_inst_bits(src, 23, 12) << 4) | /* 12b */
678 (brw_inst_bits(src, 10, 9) << 2) | /* 2b */
679 (brw_inst_bits(src, 34, 34) << 1) | /* 1b */
680 (brw_inst_bits(src, 8, 8)) /* 1b */
681 : (brw_inst_bits(src, 31, 31) << 16) | /* 1b */
682 (brw_inst_bits(src, 23, 8)); /* 16b */
683
684 /* On gen7, the flag register and subregister numbers are integrated into
685 * the control index.
686 */
687 if (brw->gen == 7)
688 uncompacted |= brw_inst_bits(src, 90, 89) << 17; /* 2b */
689
690 for (int i = 0; i < 32; i++) {
691 if (control_index_table[i] == uncompacted) {
692 brw_compact_inst_set_control_index(dst, i);
693 return true;
694 }
695 }
696
697 return false;
698 }
699
700 static bool
701 set_datatype_index(struct brw_context *brw, brw_compact_inst *dst,
702 brw_inst *src)
703 {
704 uint32_t uncompacted = brw->gen >= 8 /* 18b/G45+; 21b/BDW+ */
705 ? (brw_inst_bits(src, 63, 61) << 18) | /* 3b */
706 (brw_inst_bits(src, 94, 89) << 12) | /* 6b */
707 (brw_inst_bits(src, 46, 35)) /* 12b */
708 : (brw_inst_bits(src, 63, 61) << 15) | /* 3b */
709 (brw_inst_bits(src, 46, 32)); /* 15b */
710
711 for (int i = 0; i < 32; i++) {
712 if (datatype_table[i] == uncompacted) {
713 brw_compact_inst_set_datatype_index(dst, i);
714 return true;
715 }
716 }
717
718 return false;
719 }
720
721 static bool
722 set_subreg_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src,
723 bool is_immediate)
724 {
725 uint16_t uncompacted = /* 15b */
726 (brw_inst_bits(src, 52, 48) << 0) | /* 5b */
727 (brw_inst_bits(src, 68, 64) << 5); /* 5b */
728
729 if (!is_immediate)
730 uncompacted |= brw_inst_bits(src, 100, 96) << 10; /* 5b */
731
732 for (int i = 0; i < 32; i++) {
733 if (subreg_table[i] == uncompacted) {
734 brw_compact_inst_set_subreg_index(dst, i);
735 return true;
736 }
737 }
738
739 return false;
740 }
741
742 static bool
743 get_src_index(uint16_t uncompacted,
744 uint16_t *compacted)
745 {
746 for (int i = 0; i < 32; i++) {
747 if (src_index_table[i] == uncompacted) {
748 *compacted = i;
749 return true;
750 }
751 }
752
753 return false;
754 }
755
756 static bool
757 set_src0_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src)
758 {
759 uint16_t compacted;
760 uint16_t uncompacted = brw_inst_bits(src, 88, 77); /* 12b */
761
762 if (!get_src_index(uncompacted, &compacted))
763 return false;
764
765 brw_compact_inst_set_src0_index(dst, compacted);
766
767 return true;
768 }
769
770 static bool
771 set_src1_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src,
772 bool is_immediate)
773 {
774 uint16_t compacted;
775
776 if (is_immediate) {
777 compacted = (brw_inst_imm_ud(brw->intelScreen->devinfo, src) >> 8) & 0x1f;
778 } else {
779 uint16_t uncompacted = brw_inst_bits(src, 120, 109); /* 12b */
780
781 if (!get_src_index(uncompacted, &compacted))
782 return false;
783 }
784
785 brw_compact_inst_set_src1_index(dst, compacted);
786
787 return true;
788 }
789
790 static bool
791 set_3src_control_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src)
792 {
793 assert(brw->gen >= 8);
794
795 uint32_t uncompacted = /* 24b/BDW; 26b/CHV */
796 (brw_inst_bits(src, 34, 32) << 21) | /* 3b */
797 (brw_inst_bits(src, 28, 8)); /* 21b */
798
799 if (brw->gen >= 9 || brw->is_cherryview)
800 uncompacted |= brw_inst_bits(src, 36, 35) << 24; /* 2b */
801
802 for (int i = 0; i < ARRAY_SIZE(gen8_3src_control_index_table); i++) {
803 if (gen8_3src_control_index_table[i] == uncompacted) {
804 brw_compact_inst_set_3src_control_index(dst, i);
805 return true;
806 }
807 }
808
809 return false;
810 }
811
812 static bool
813 set_3src_source_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src)
814 {
815 assert(brw->gen >= 8);
816
817 uint64_t uncompacted = /* 46b/BDW; 49b/CHV */
818 (brw_inst_bits(src, 83, 83) << 43) | /* 1b */
819 (brw_inst_bits(src, 114, 107) << 35) | /* 8b */
820 (brw_inst_bits(src, 93, 86) << 27) | /* 8b */
821 (brw_inst_bits(src, 72, 65) << 19) | /* 8b */
822 (brw_inst_bits(src, 55, 37)); /* 19b */
823
824 if (brw->gen >= 9 || brw->is_cherryview) {
825 uncompacted |=
826 (brw_inst_bits(src, 126, 125) << 47) | /* 2b */
827 (brw_inst_bits(src, 105, 104) << 45) | /* 2b */
828 (brw_inst_bits(src, 84, 84) << 44); /* 1b */
829 } else {
830 uncompacted |=
831 (brw_inst_bits(src, 125, 125) << 45) | /* 1b */
832 (brw_inst_bits(src, 104, 104) << 44); /* 1b */
833 }
834
835 for (int i = 0; i < ARRAY_SIZE(gen8_3src_source_index_table); i++) {
836 if (gen8_3src_source_index_table[i] == uncompacted) {
837 brw_compact_inst_set_3src_source_index(dst, i);
838 return true;
839 }
840 }
841
842 return false;
843 }
844
845 static bool
846 has_unmapped_bits(struct brw_context *brw, brw_inst *src)
847 {
848 /* Check for instruction bits that don't map to any of the fields of the
849 * compacted instruction. The instruction cannot be compacted if any of
850 * them are set. They overlap with:
851 * - NibCtrl (bit 47 on Gen7, bit 11 on Gen8)
852 * - Dst.AddrImm[9] (bit 47 on Gen8)
853 * - Src0.AddrImm[9] (bit 95 on Gen8)
854 * - Imm64[27:31] (bits 91-95 on Gen7, bit 95 on Gen8)
855 * - UIP[31] (bit 95 on Gen8)
856 */
857 if (brw->gen >= 8) {
858 assert(!brw_inst_bits(src, 7, 7));
859 return brw_inst_bits(src, 95, 95) ||
860 brw_inst_bits(src, 47, 47) ||
861 brw_inst_bits(src, 11, 11);
862 } else {
863 assert(!brw_inst_bits(src, 7, 7) &&
864 !(brw->gen < 7 && brw_inst_bits(src, 90, 90)));
865 return brw_inst_bits(src, 95, 91) ||
866 brw_inst_bits(src, 47, 47);
867 }
868 }
869
870 static bool
871 has_3src_unmapped_bits(struct brw_context *brw, brw_inst *src)
872 {
873 /* Check for three-source instruction bits that don't map to any of the
874 * fields of the compacted instruction. All of them seem to be reserved
875 * bits currently.
876 */
877 if (brw->gen >= 9 || brw->is_cherryview) {
878 assert(!brw_inst_bits(src, 127, 127) &&
879 !brw_inst_bits(src, 7, 7));
880 } else {
881 assert(brw->gen >= 8);
882 assert(!brw_inst_bits(src, 127, 126) &&
883 !brw_inst_bits(src, 105, 105) &&
884 !brw_inst_bits(src, 84, 84) &&
885 !brw_inst_bits(src, 36, 35) &&
886 !brw_inst_bits(src, 7, 7));
887 }
888
889 return false;
890 }
891
892 static bool
893 brw_try_compact_3src_instruction(struct brw_context *brw, brw_compact_inst *dst,
894 brw_inst *src)
895 {
896 assert(brw->gen >= 8);
897
898 if (has_3src_unmapped_bits(brw, src))
899 return false;
900
901 const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
902 #define compact(field) \
903 brw_compact_inst_set_3src_##field(dst, brw_inst_3src_##field(devinfo, src))
904
905 compact(opcode);
906
907 if (!set_3src_control_index(brw, dst, src))
908 return false;
909
910 if (!set_3src_source_index(brw, dst, src))
911 return false;
912
913 compact(dst_reg_nr);
914 compact(src0_rep_ctrl);
915 brw_compact_inst_set_3src_cmpt_control(dst, true);
916 compact(debug_control);
917 compact(saturate);
918 compact(src1_rep_ctrl);
919 compact(src2_rep_ctrl);
920 compact(src0_reg_nr);
921 compact(src1_reg_nr);
922 compact(src2_reg_nr);
923 compact(src0_subreg_nr);
924 compact(src1_subreg_nr);
925 compact(src2_subreg_nr);
926
927 #undef compact
928
929 return true;
930 }
931
932 /* Compacted instructions have 12-bits for immediate sources, and a 13th bit
933 * that's replicated through the high 20 bits.
934 *
935 * Effectively this means we get 12-bit integers, 0.0f, and some limited uses
936 * of packed vectors as compactable immediates.
937 */
938 static bool
939 is_compactable_immediate(unsigned imm)
940 {
941 /* We get the low 12 bits as-is. */
942 imm &= ~0xfff;
943
944 /* We get one bit replicated through the top 20 bits. */
945 return imm == 0 || imm == 0xfffff000;
946 }
947
948 /* Returns whether an opcode takes three sources. */
949 static bool
950 is_3src(uint32_t op)
951 {
952 return opcode_descs[op].nsrc == 3;
953 }
954
955 /**
956 * Tries to compact instruction src into dst.
957 *
958 * It doesn't modify dst unless src is compactable, which is relied on by
959 * brw_compact_instructions().
960 */
961 bool
962 brw_try_compact_instruction(struct brw_context *brw, brw_compact_inst *dst,
963 brw_inst *src)
964 {
965 const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
966 brw_compact_inst temp;
967
968 assert(brw_inst_cmpt_control(devinfo, src) == 0);
969
970 if (is_3src(brw_inst_opcode(devinfo, src))) {
971 if (brw->gen >= 8) {
972 memset(&temp, 0, sizeof(temp));
973 if (brw_try_compact_3src_instruction(brw, &temp, src)) {
974 *dst = temp;
975 return true;
976 } else {
977 return false;
978 }
979 } else {
980 return false;
981 }
982 }
983
984 bool is_immediate =
985 brw_inst_src0_reg_file(devinfo, src) == BRW_IMMEDIATE_VALUE ||
986 brw_inst_src1_reg_file(devinfo, src) == BRW_IMMEDIATE_VALUE;
987 if (is_immediate &&
988 (brw->gen < 6 || !is_compactable_immediate(brw_inst_imm_ud(devinfo, src)))) {
989 return false;
990 }
991
992 if (has_unmapped_bits(brw, src))
993 return false;
994
995 memset(&temp, 0, sizeof(temp));
996
997 brw_compact_inst_set_opcode(&temp, brw_inst_opcode(devinfo, src));
998 brw_compact_inst_set_debug_control(&temp, brw_inst_debug_control(devinfo, src));
999 if (!set_control_index(brw, &temp, src))
1000 return false;
1001 if (!set_datatype_index(brw, &temp, src))
1002 return false;
1003 if (!set_subreg_index(brw, &temp, src, is_immediate))
1004 return false;
1005 brw_compact_inst_set_acc_wr_control(&temp,
1006 brw_inst_acc_wr_control(devinfo, src));
1007 brw_compact_inst_set_cond_modifier(&temp,
1008 brw_inst_cond_modifier(devinfo, src));
1009 if (brw->gen <= 6)
1010 brw_compact_inst_set_flag_subreg_nr(&temp,
1011 brw_inst_flag_subreg_nr(devinfo, src));
1012 brw_compact_inst_set_cmpt_control(&temp, true);
1013 if (!set_src0_index(brw, &temp, src))
1014 return false;
1015 if (!set_src1_index(brw, &temp, src, is_immediate))
1016 return false;
1017 brw_compact_inst_set_dst_reg_nr(&temp, brw_inst_dst_da_reg_nr(devinfo, src));
1018 brw_compact_inst_set_src0_reg_nr(&temp, brw_inst_src0_da_reg_nr(devinfo, src));
1019 if (is_immediate) {
1020 brw_compact_inst_set_src1_reg_nr(&temp,
1021 brw_inst_imm_ud(devinfo, src) & 0xff);
1022 } else {
1023 brw_compact_inst_set_src1_reg_nr(&temp,
1024 brw_inst_src1_da_reg_nr(devinfo, src));
1025 }
1026
1027 *dst = temp;
1028
1029 return true;
1030 }
1031
1032 static void
1033 set_uncompacted_control(struct brw_context *brw, brw_inst *dst,
1034 brw_compact_inst *src)
1035 {
1036 uint32_t uncompacted =
1037 control_index_table[brw_compact_inst_control_index(src)];
1038
1039 if (brw->gen >= 8) {
1040 brw_inst_set_bits(dst, 33, 31, (uncompacted >> 16));
1041 brw_inst_set_bits(dst, 23, 12, (uncompacted >> 4) & 0xfff);
1042 brw_inst_set_bits(dst, 10, 9, (uncompacted >> 2) & 0x3);
1043 brw_inst_set_bits(dst, 34, 34, (uncompacted >> 1) & 0x1);
1044 brw_inst_set_bits(dst, 8, 8, (uncompacted >> 0) & 0x1);
1045 } else {
1046 brw_inst_set_bits(dst, 31, 31, (uncompacted >> 16) & 0x1);
1047 brw_inst_set_bits(dst, 23, 8, (uncompacted & 0xffff));
1048
1049 if (brw->gen == 7)
1050 brw_inst_set_bits(dst, 90, 89, uncompacted >> 17);
1051 }
1052 }
1053
1054 static void
1055 set_uncompacted_datatype(struct brw_context *brw, brw_inst *dst,
1056 brw_compact_inst *src)
1057 {
1058 uint32_t uncompacted = datatype_table[brw_compact_inst_datatype_index(src)];
1059
1060 if (brw->gen >= 8) {
1061 brw_inst_set_bits(dst, 63, 61, (uncompacted >> 18));
1062 brw_inst_set_bits(dst, 94, 89, (uncompacted >> 12) & 0x3f);
1063 brw_inst_set_bits(dst, 46, 35, (uncompacted >> 0) & 0xfff);
1064 } else {
1065 brw_inst_set_bits(dst, 63, 61, (uncompacted >> 15));
1066 brw_inst_set_bits(dst, 46, 32, (uncompacted & 0x7fff));
1067 }
1068 }
1069
1070 static void
1071 set_uncompacted_subreg(struct brw_context *brw, brw_inst *dst,
1072 brw_compact_inst *src)
1073 {
1074 uint16_t uncompacted = subreg_table[brw_compact_inst_subreg_index(src)];
1075
1076 brw_inst_set_bits(dst, 100, 96, (uncompacted >> 10));
1077 brw_inst_set_bits(dst, 68, 64, (uncompacted >> 5) & 0x1f);
1078 brw_inst_set_bits(dst, 52, 48, (uncompacted >> 0) & 0x1f);
1079 }
1080
1081 static void
1082 set_uncompacted_src0(struct brw_context *brw, brw_inst *dst,
1083 brw_compact_inst *src)
1084 {
1085 uint32_t compacted = brw_compact_inst_src0_index(src);
1086 uint16_t uncompacted = src_index_table[compacted];
1087
1088 brw_inst_set_bits(dst, 88, 77, uncompacted);
1089 }
1090
1091 static void
1092 set_uncompacted_src1(struct brw_context *brw, brw_inst *dst,
1093 brw_compact_inst *src, bool is_immediate)
1094 {
1095 if (is_immediate) {
1096 signed high5 = brw_compact_inst_src1_index(src);
1097 /* Replicate top bit of src1_index into high 20 bits of the immediate. */
1098 brw_inst_set_imm_ud(brw->intelScreen->devinfo, dst, (high5 << 27) >> 19);
1099 } else {
1100 uint16_t uncompacted = src_index_table[brw_compact_inst_src1_index(src)];
1101
1102 brw_inst_set_bits(dst, 120, 109, uncompacted);
1103 }
1104 }
1105
1106 static void
1107 set_uncompacted_3src_control_index(struct brw_context *brw, brw_inst *dst,
1108 brw_compact_inst *src)
1109 {
1110 assert(brw->gen >= 8);
1111
1112 uint32_t compacted = brw_compact_inst_3src_control_index(src);
1113 uint32_t uncompacted = gen8_3src_control_index_table[compacted];
1114
1115 brw_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7);
1116 brw_inst_set_bits(dst, 28, 8, (uncompacted >> 0) & 0x1fffff);
1117
1118 if (brw->gen >= 9 || brw->is_cherryview)
1119 brw_inst_set_bits(dst, 36, 35, (uncompacted >> 24) & 0x3);
1120 }
1121
1122 static void
1123 set_uncompacted_3src_source_index(struct brw_context *brw, brw_inst *dst,
1124 brw_compact_inst *src)
1125 {
1126 assert(brw->gen >= 8);
1127
1128 uint32_t compacted = brw_compact_inst_3src_source_index(src);
1129 uint64_t uncompacted = gen8_3src_source_index_table[compacted];
1130
1131 brw_inst_set_bits(dst, 83, 83, (uncompacted >> 43) & 0x1);
1132 brw_inst_set_bits(dst, 114, 107, (uncompacted >> 35) & 0xff);
1133 brw_inst_set_bits(dst, 93, 86, (uncompacted >> 27) & 0xff);
1134 brw_inst_set_bits(dst, 72, 65, (uncompacted >> 19) & 0xff);
1135 brw_inst_set_bits(dst, 55, 37, (uncompacted >> 0) & 0x7ffff);
1136
1137 if (brw->gen >= 9 || brw->is_cherryview) {
1138 brw_inst_set_bits(dst, 126, 125, (uncompacted >> 47) & 0x3);
1139 brw_inst_set_bits(dst, 105, 104, (uncompacted >> 45) & 0x3);
1140 brw_inst_set_bits(dst, 84, 84, (uncompacted >> 44) & 0x1);
1141 } else {
1142 brw_inst_set_bits(dst, 125, 125, (uncompacted >> 45) & 0x1);
1143 brw_inst_set_bits(dst, 104, 104, (uncompacted >> 44) & 0x1);
1144 }
1145 }
1146
1147 static void
1148 brw_uncompact_3src_instruction(struct brw_context *brw, brw_inst *dst,
1149 brw_compact_inst *src)
1150 {
1151 assert(brw->gen >= 8);
1152
1153 const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
1154 #define uncompact(field) \
1155 brw_inst_set_3src_##field(devinfo, dst, brw_compact_inst_3src_##field(src))
1156
1157 uncompact(opcode);
1158
1159 set_uncompacted_3src_control_index(brw, dst, src);
1160 set_uncompacted_3src_source_index(brw, dst, src);
1161
1162 uncompact(dst_reg_nr);
1163 uncompact(src0_rep_ctrl);
1164 brw_inst_set_3src_cmpt_control(devinfo, dst, false);
1165 uncompact(debug_control);
1166 uncompact(saturate);
1167 uncompact(src1_rep_ctrl);
1168 uncompact(src2_rep_ctrl);
1169 uncompact(src0_reg_nr);
1170 uncompact(src1_reg_nr);
1171 uncompact(src2_reg_nr);
1172 uncompact(src0_subreg_nr);
1173 uncompact(src1_subreg_nr);
1174 uncompact(src2_subreg_nr);
1175
1176 #undef uncompact
1177 }
1178
1179 void
1180 brw_uncompact_instruction(struct brw_context *brw, brw_inst *dst,
1181 brw_compact_inst *src)
1182 {
1183 memset(dst, 0, sizeof(*dst));
1184
1185 const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
1186 if (brw->gen >= 8 && is_3src(brw_compact_inst_3src_opcode(src))) {
1187 brw_uncompact_3src_instruction(brw, dst, src);
1188 return;
1189 }
1190
1191 brw_inst_set_opcode(devinfo, dst, brw_compact_inst_opcode(src));
1192 brw_inst_set_debug_control(devinfo, dst, brw_compact_inst_debug_control(src));
1193
1194 set_uncompacted_control(brw, dst, src);
1195 set_uncompacted_datatype(brw, dst, src);
1196
1197 /* src0/1 register file fields are in the datatype table. */
1198 bool is_immediate = brw_inst_src0_reg_file(devinfo, dst) == BRW_IMMEDIATE_VALUE ||
1199 brw_inst_src1_reg_file(devinfo, dst) == BRW_IMMEDIATE_VALUE;
1200
1201 set_uncompacted_subreg(brw, dst, src);
1202 brw_inst_set_acc_wr_control(devinfo, dst, brw_compact_inst_acc_wr_control(src));
1203 brw_inst_set_cond_modifier(devinfo, dst, brw_compact_inst_cond_modifier(src));
1204 if (brw->gen <= 6)
1205 brw_inst_set_flag_subreg_nr(devinfo, dst,
1206 brw_compact_inst_flag_subreg_nr(src));
1207 set_uncompacted_src0(brw, dst, src);
1208 set_uncompacted_src1(brw, dst, src, is_immediate);
1209 brw_inst_set_dst_da_reg_nr(devinfo, dst, brw_compact_inst_dst_reg_nr(src));
1210 brw_inst_set_src0_da_reg_nr(devinfo, dst, brw_compact_inst_src0_reg_nr(src));
1211 if (is_immediate) {
1212 brw_inst_set_imm_ud(devinfo, dst,
1213 brw_inst_imm_ud(devinfo, dst) |
1214 brw_compact_inst_src1_reg_nr(src));
1215 } else {
1216 brw_inst_set_src1_da_reg_nr(devinfo, dst, brw_compact_inst_src1_reg_nr(src));
1217 }
1218 }
1219
1220 void brw_debug_compact_uncompact(struct brw_context *brw,
1221 brw_inst *orig,
1222 brw_inst *uncompacted)
1223 {
1224 fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n",
1225 brw->gen);
1226
1227 fprintf(stderr, " before: ");
1228 brw_disassemble_inst(stderr, brw, orig, true);
1229
1230 fprintf(stderr, " after: ");
1231 brw_disassemble_inst(stderr, brw, uncompacted, false);
1232
1233 uint32_t *before_bits = (uint32_t *)orig;
1234 uint32_t *after_bits = (uint32_t *)uncompacted;
1235 fprintf(stderr, " changed bits:\n");
1236 for (int i = 0; i < 128; i++) {
1237 uint32_t before = before_bits[i / 32] & (1 << (i & 31));
1238 uint32_t after = after_bits[i / 32] & (1 << (i & 31));
1239
1240 if (before != after) {
1241 fprintf(stderr, " bit %d, %s to %s\n", i,
1242 before ? "set" : "unset",
1243 after ? "set" : "unset");
1244 }
1245 }
1246 }
1247
1248 static int
1249 compacted_between(int old_ip, int old_target_ip, int *compacted_counts)
1250 {
1251 int this_compacted_count = compacted_counts[old_ip];
1252 int target_compacted_count = compacted_counts[old_target_ip];
1253 return target_compacted_count - this_compacted_count;
1254 }
1255
1256 static void
1257 update_uip_jip(struct brw_context *brw, brw_inst *insn,
1258 int this_old_ip, int *compacted_counts)
1259 {
1260 const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
1261 /* JIP and UIP are in units of:
1262 * - bytes on Gen8+; and
1263 * - compacted instructions on Gen6+.
1264 */
1265 int shift = brw->gen >= 8 ? 3 : 0;
1266
1267 int32_t jip_compacted = brw_inst_jip(devinfo, insn) >> shift;
1268 jip_compacted -= compacted_between(this_old_ip,
1269 this_old_ip + (jip_compacted / 2),
1270 compacted_counts);
1271 brw_inst_set_jip(devinfo, insn, jip_compacted << shift);
1272
1273 if (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_ENDIF ||
1274 brw_inst_opcode(devinfo, insn) == BRW_OPCODE_WHILE ||
1275 (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_ELSE && brw->gen <= 7))
1276 return;
1277
1278 int32_t uip_compacted = brw_inst_uip(devinfo, insn) >> shift;
1279 uip_compacted -= compacted_between(this_old_ip,
1280 this_old_ip + (uip_compacted / 2),
1281 compacted_counts);
1282 brw_inst_set_uip(devinfo, insn, uip_compacted << shift);
1283 }
1284
1285 static void
1286 update_gen4_jump_count(struct brw_context *brw, brw_inst *insn,
1287 int this_old_ip, int *compacted_counts)
1288 {
1289 assert(brw->gen == 5 || brw->is_g4x);
1290
1291 const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
1292 /* Jump Count is in units of:
1293 * - uncompacted instructions on G45; and
1294 * - compacted instructions on Gen5.
1295 */
1296 int shift = brw->is_g4x ? 1 : 0;
1297
1298 int jump_count_compacted = brw_inst_gen4_jump_count(devinfo, insn) << shift;
1299
1300 int target_old_ip = this_old_ip + (jump_count_compacted / 2);
1301
1302 int this_compacted_count = compacted_counts[this_old_ip];
1303 int target_compacted_count = compacted_counts[target_old_ip];
1304
1305 jump_count_compacted -= (target_compacted_count - this_compacted_count);
1306 brw_inst_set_gen4_jump_count(devinfo, insn, jump_count_compacted >> shift);
1307 }
1308
1309 void
1310 brw_init_compaction_tables(struct brw_context *brw)
1311 {
1312 static bool initialized;
1313 if (initialized || p_atomic_cmpxchg(&initialized, false, true) != false)
1314 return;
1315
1316 assert(g45_control_index_table[ARRAY_SIZE(g45_control_index_table) - 1] != 0);
1317 assert(g45_datatype_table[ARRAY_SIZE(g45_datatype_table) - 1] != 0);
1318 assert(g45_subreg_table[ARRAY_SIZE(g45_subreg_table) - 1] != 0);
1319 assert(g45_src_index_table[ARRAY_SIZE(g45_src_index_table) - 1] != 0);
1320 assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0);
1321 assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0);
1322 assert(gen6_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0);
1323 assert(gen6_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0);
1324 assert(gen7_control_index_table[ARRAY_SIZE(gen7_control_index_table) - 1] != 0);
1325 assert(gen7_datatype_table[ARRAY_SIZE(gen7_datatype_table) - 1] != 0);
1326 assert(gen7_subreg_table[ARRAY_SIZE(gen7_subreg_table) - 1] != 0);
1327 assert(gen7_src_index_table[ARRAY_SIZE(gen7_src_index_table) - 1] != 0);
1328 assert(gen8_control_index_table[ARRAY_SIZE(gen8_control_index_table) - 1] != 0);
1329 assert(gen8_datatype_table[ARRAY_SIZE(gen8_datatype_table) - 1] != 0);
1330 assert(gen8_subreg_table[ARRAY_SIZE(gen8_subreg_table) - 1] != 0);
1331 assert(gen8_src_index_table[ARRAY_SIZE(gen8_src_index_table) - 1] != 0);
1332
1333 switch (brw->gen) {
1334 case 9:
1335 case 8:
1336 control_index_table = gen8_control_index_table;
1337 datatype_table = gen8_datatype_table;
1338 subreg_table = gen8_subreg_table;
1339 src_index_table = gen8_src_index_table;
1340 break;
1341 case 7:
1342 control_index_table = gen7_control_index_table;
1343 datatype_table = gen7_datatype_table;
1344 subreg_table = gen7_subreg_table;
1345 src_index_table = gen7_src_index_table;
1346 break;
1347 case 6:
1348 control_index_table = gen6_control_index_table;
1349 datatype_table = gen6_datatype_table;
1350 subreg_table = gen6_subreg_table;
1351 src_index_table = gen6_src_index_table;
1352 break;
1353 case 5:
1354 case 4:
1355 control_index_table = g45_control_index_table;
1356 datatype_table = g45_datatype_table;
1357 subreg_table = g45_subreg_table;
1358 src_index_table = g45_src_index_table;
1359 break;
1360 default:
1361 unreachable("unknown generation");
1362 }
1363 }
1364
1365 void
1366 brw_compact_instructions(struct brw_compile *p, int start_offset,
1367 int num_annotations, struct annotation *annotation)
1368 {
1369 struct brw_context *brw = p->brw;
1370 const struct brw_device_info *devinfo = p->devinfo;
1371 void *store = p->store + start_offset / 16;
1372 /* For an instruction at byte offset 16*i before compaction, this is the
1373 * number of compacted instructions minus the number of padding NOP/NENOPs
1374 * that preceded it.
1375 */
1376 int compacted_counts[(p->next_insn_offset - start_offset) / sizeof(brw_inst)];
1377 /* For an instruction at byte offset 8*i after compaction, this was its IP
1378 * (in 16-byte units) before compaction.
1379 */
1380 int old_ip[(p->next_insn_offset - start_offset) / sizeof(brw_compact_inst)];
1381
1382 if (brw->gen == 4 && !brw->is_g4x)
1383 return;
1384
1385 int offset = 0;
1386 int compacted_count = 0;
1387 for (int src_offset = 0; src_offset < p->next_insn_offset - start_offset;
1388 src_offset += sizeof(brw_inst)) {
1389 brw_inst *src = store + src_offset;
1390 void *dst = store + offset;
1391
1392 old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
1393 compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
1394
1395 brw_inst saved = *src;
1396
1397 if (brw_try_compact_instruction(brw, dst, src)) {
1398 compacted_count++;
1399
1400 if (INTEL_DEBUG) {
1401 brw_inst uncompacted;
1402 brw_uncompact_instruction(brw, &uncompacted, dst);
1403 if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {
1404 brw_debug_compact_uncompact(brw, &saved, &uncompacted);
1405 }
1406 }
1407
1408 offset += sizeof(brw_compact_inst);
1409 } else {
1410 /* It appears that the end of thread SEND instruction needs to be
1411 * aligned, or the GPU hangs. All uncompacted instructions need to be
1412 * aligned on G45.
1413 */
1414 if ((offset & sizeof(brw_compact_inst)) != 0 &&
1415 (((brw_inst_opcode(devinfo, src) == BRW_OPCODE_SEND ||
1416 brw_inst_opcode(devinfo, src) == BRW_OPCODE_SENDC) &&
1417 brw_inst_eot(devinfo, src)) ||
1418 brw->is_g4x)) {
1419 brw_compact_inst *align = store + offset;
1420 memset(align, 0, sizeof(*align));
1421 brw_compact_inst_set_opcode(align, brw->is_g4x ? BRW_OPCODE_NENOP :
1422 BRW_OPCODE_NOP);
1423 brw_compact_inst_set_cmpt_control(align, true);
1424 offset += sizeof(brw_compact_inst);
1425 compacted_count--;
1426 compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
1427 old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
1428
1429 dst = store + offset;
1430 }
1431
1432 /* If we didn't compact this intruction, we need to move it down into
1433 * place.
1434 */
1435 if (offset != src_offset) {
1436 memmove(dst, src, sizeof(brw_inst));
1437 }
1438 offset += sizeof(brw_inst);
1439 }
1440 }
1441
1442 /* Fix up control flow offsets. */
1443 p->next_insn_offset = start_offset + offset;
1444 for (offset = 0; offset < p->next_insn_offset - start_offset;
1445 offset = next_offset(devinfo, store, offset)) {
1446 brw_inst *insn = store + offset;
1447 int this_old_ip = old_ip[offset / sizeof(brw_compact_inst)];
1448 int this_compacted_count = compacted_counts[this_old_ip];
1449
1450 switch (brw_inst_opcode(devinfo, insn)) {
1451 case BRW_OPCODE_BREAK:
1452 case BRW_OPCODE_CONTINUE:
1453 case BRW_OPCODE_HALT:
1454 if (brw->gen >= 6) {
1455 update_uip_jip(brw, insn, this_old_ip, compacted_counts);
1456 } else {
1457 update_gen4_jump_count(brw, insn, this_old_ip, compacted_counts);
1458 }
1459 break;
1460
1461 case BRW_OPCODE_IF:
1462 case BRW_OPCODE_IFF:
1463 case BRW_OPCODE_ELSE:
1464 case BRW_OPCODE_ENDIF:
1465 case BRW_OPCODE_WHILE:
1466 if (brw->gen >= 7) {
1467 if (brw_inst_cmpt_control(devinfo, insn)) {
1468 brw_inst uncompacted;
1469 brw_uncompact_instruction(brw, &uncompacted,
1470 (brw_compact_inst *)insn);
1471
1472 update_uip_jip(brw, &uncompacted, this_old_ip, compacted_counts);
1473
1474 bool ret = brw_try_compact_instruction(brw,
1475 (brw_compact_inst *)insn,
1476 &uncompacted);
1477 assert(ret); (void)ret;
1478 } else {
1479 update_uip_jip(brw, insn, this_old_ip, compacted_counts);
1480 }
1481 } else if (brw->gen == 6) {
1482 assert(!brw_inst_cmpt_control(devinfo, insn));
1483
1484 /* Jump Count is in units of compacted instructions on Gen6. */
1485 int jump_count_compacted = brw_inst_gen6_jump_count(devinfo, insn);
1486
1487 int target_old_ip = this_old_ip + (jump_count_compacted / 2);
1488 int target_compacted_count = compacted_counts[target_old_ip];
1489 jump_count_compacted -= (target_compacted_count - this_compacted_count);
1490 brw_inst_set_gen6_jump_count(devinfo, insn, jump_count_compacted);
1491 } else {
1492 update_gen4_jump_count(brw, insn, this_old_ip, compacted_counts);
1493 }
1494 break;
1495
1496 case BRW_OPCODE_ADD:
1497 /* Add instructions modifying the IP register use an immediate src1,
1498 * and Gens that use this cannot compact instructions with immediate
1499 * operands.
1500 */
1501 if (brw_inst_cmpt_control(devinfo, insn))
1502 break;
1503
1504 if (brw_inst_dst_reg_file(devinfo, insn) == BRW_ARCHITECTURE_REGISTER_FILE &&
1505 brw_inst_dst_da_reg_nr(devinfo, insn) == BRW_ARF_IP) {
1506 assert(brw_inst_src1_reg_file(devinfo, insn) == BRW_IMMEDIATE_VALUE);
1507
1508 int shift = 3;
1509 int jump_compacted = brw_inst_imm_d(devinfo, insn) >> shift;
1510
1511 int target_old_ip = this_old_ip + (jump_compacted / 2);
1512 int target_compacted_count = compacted_counts[target_old_ip];
1513 jump_compacted -= (target_compacted_count - this_compacted_count);
1514 brw_inst_set_imm_ud(devinfo, insn, jump_compacted << shift);
1515 }
1516 break;
1517 }
1518 }
1519
1520 /* p->nr_insn is counting the number of uncompacted instructions still, so
1521 * divide. We do want to be sure there's a valid instruction in any
1522 * alignment padding, so that the next compression pass (for the FS 8/16
1523 * compile passes) parses correctly.
1524 */
1525 if (p->next_insn_offset & sizeof(brw_compact_inst)) {
1526 brw_compact_inst *align = store + offset;
1527 memset(align, 0, sizeof(*align));
1528 brw_compact_inst_set_opcode(align, BRW_OPCODE_NOP);
1529 brw_compact_inst_set_cmpt_control(align, true);
1530 p->next_insn_offset += sizeof(brw_compact_inst);
1531 }
1532 p->nr_insn = p->next_insn_offset / sizeof(brw_inst);
1533
1534 /* Update the instruction offsets for each annotation. */
1535 if (annotation) {
1536 for (int offset = 0, i = 0; i < num_annotations; i++) {
1537 while (start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
1538 sizeof(brw_inst) != annotation[i].offset) {
1539 assert(start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
1540 sizeof(brw_inst) < annotation[i].offset);
1541 offset = next_offset(devinfo, store, offset);
1542 }
1543
1544 annotation[i].offset = start_offset + offset;
1545
1546 offset = next_offset(devinfo, store, offset);
1547 }
1548
1549 annotation[num_annotations].offset = p->next_insn_offset;
1550 }
1551 }