i965/gen8: Add instruction compaction tables.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_compact.c
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_eu_compact.c
25 *
26 * Instruction compaction is a feature of gm45 and newer hardware that allows
27 * for a smaller instruction encoding.
28 *
29 * The instruction cache is on the order of 32KB, and many programs generate
30 * far more instructions than that. The instruction cache is built to barely
31 * keep up with instruction dispatch abaility in cache hit cases -- L1
32 * instruction cache misses that still hit in the next level could limit
33 * throughput by around 50%.
34 *
35 * The idea of instruction compaction is that most instructions use a tiny
36 * subset of the GPU functionality, so we can encode what would be a 16 byte
37 * instruction in 8 bytes using some lookup tables for various fields.
38 */
39
40 #include "brw_context.h"
41 #include "brw_eu.h"
42 #include "intel_asm_annotation.h"
43
44 static const uint32_t gen6_control_index_table[32] = {
45 0b00000000000000000,
46 0b01000000000000000,
47 0b00110000000000000,
48 0b00000000100000000,
49 0b00010000000000000,
50 0b00001000100000000,
51 0b00000000100000010,
52 0b00000000000000010,
53 0b01000000100000000,
54 0b01010000000000000,
55 0b10110000000000000,
56 0b00100000000000000,
57 0b11010000000000000,
58 0b11000000000000000,
59 0b01001000100000000,
60 0b01000000000001000,
61 0b01000000000000100,
62 0b00000000000001000,
63 0b00000000000000100,
64 0b00111000100000000,
65 0b00001000100000010,
66 0b00110000100000000,
67 0b00110000000000001,
68 0b00100000000000001,
69 0b00110000000000010,
70 0b00110000000000101,
71 0b00110000000001001,
72 0b00110000000010000,
73 0b00110000000000011,
74 0b00110000000000100,
75 0b00110000100001000,
76 0b00100000000001001
77 };
78
79 static const uint32_t gen6_datatype_table[32] = {
80 0b001001110000000000,
81 0b001000110000100000,
82 0b001001110000000001,
83 0b001000000001100000,
84 0b001010110100101001,
85 0b001000000110101101,
86 0b001100011000101100,
87 0b001011110110101101,
88 0b001000000111101100,
89 0b001000000001100001,
90 0b001000110010100101,
91 0b001000000001000001,
92 0b001000001000110001,
93 0b001000001000101001,
94 0b001000000000100000,
95 0b001000001000110010,
96 0b001010010100101001,
97 0b001011010010100101,
98 0b001000000110100101,
99 0b001100011000101001,
100 0b001011011000101100,
101 0b001011010110100101,
102 0b001011110110100101,
103 0b001111011110111101,
104 0b001111011110111100,
105 0b001111011110111101,
106 0b001111011110011101,
107 0b001111011110111110,
108 0b001000000000100001,
109 0b001000000000100010,
110 0b001001111111011101,
111 0b001000001110111110,
112 };
113
114 static const uint16_t gen6_subreg_table[32] = {
115 0b000000000000000,
116 0b000000000000100,
117 0b000000110000000,
118 0b111000000000000,
119 0b011110000001000,
120 0b000010000000000,
121 0b000000000010000,
122 0b000110000001100,
123 0b001000000000000,
124 0b000001000000000,
125 0b000001010010100,
126 0b000000001010110,
127 0b010000000000000,
128 0b110000000000000,
129 0b000100000000000,
130 0b000000010000000,
131 0b000000000001000,
132 0b100000000000000,
133 0b000001010000000,
134 0b001010000000000,
135 0b001100000000000,
136 0b000000001010100,
137 0b101101010010100,
138 0b010100000000000,
139 0b000000010001111,
140 0b011000000000000,
141 0b111110000000000,
142 0b101000000000000,
143 0b000000000001111,
144 0b000100010001111,
145 0b001000010001111,
146 0b000110000000000,
147 };
148
149 static const uint16_t gen6_src_index_table[32] = {
150 0b000000000000,
151 0b010110001000,
152 0b010001101000,
153 0b001000101000,
154 0b011010010000,
155 0b000100100000,
156 0b010001101100,
157 0b010101110000,
158 0b011001111000,
159 0b001100101000,
160 0b010110001100,
161 0b001000100000,
162 0b010110001010,
163 0b000000000010,
164 0b010101010000,
165 0b010101101000,
166 0b111101001100,
167 0b111100101100,
168 0b011001110000,
169 0b010110001001,
170 0b010101011000,
171 0b001101001000,
172 0b010000101100,
173 0b010000000000,
174 0b001101110000,
175 0b001100010000,
176 0b001100000000,
177 0b010001101010,
178 0b001101111000,
179 0b000001110000,
180 0b001100100000,
181 0b001101010000,
182 };
183
184 static const uint32_t gen7_control_index_table[32] = {
185 0b0000000000000000010,
186 0b0000100000000000000,
187 0b0000100000000000001,
188 0b0000100000000000010,
189 0b0000100000000000011,
190 0b0000100000000000100,
191 0b0000100000000000101,
192 0b0000100000000000111,
193 0b0000100000000001000,
194 0b0000100000000001001,
195 0b0000100000000001101,
196 0b0000110000000000000,
197 0b0000110000000000001,
198 0b0000110000000000010,
199 0b0000110000000000011,
200 0b0000110000000000100,
201 0b0000110000000000101,
202 0b0000110000000000111,
203 0b0000110000000001001,
204 0b0000110000000001101,
205 0b0000110000000010000,
206 0b0000110000100000000,
207 0b0001000000000000000,
208 0b0001000000000000010,
209 0b0001000000000000100,
210 0b0001000000100000000,
211 0b0010110000000000000,
212 0b0010110000000010000,
213 0b0011000000000000000,
214 0b0011000000100000000,
215 0b0101000000000000000,
216 0b0101000000100000000
217 };
218
219 static const uint32_t gen7_datatype_table[32] = {
220 0b001000000000000001,
221 0b001000000000100000,
222 0b001000000000100001,
223 0b001000000001100001,
224 0b001000000010111101,
225 0b001000001011111101,
226 0b001000001110100001,
227 0b001000001110100101,
228 0b001000001110111101,
229 0b001000010000100001,
230 0b001000110000100000,
231 0b001000110000100001,
232 0b001001010010100101,
233 0b001001110010100100,
234 0b001001110010100101,
235 0b001111001110111101,
236 0b001111011110011101,
237 0b001111011110111100,
238 0b001111011110111101,
239 0b001111111110111100,
240 0b000000001000001100,
241 0b001000000000111101,
242 0b001000000010100101,
243 0b001000010000100000,
244 0b001001010010100100,
245 0b001001110010000100,
246 0b001010010100001001,
247 0b001101111110111101,
248 0b001111111110111101,
249 0b001011110110101100,
250 0b001010010100101000,
251 0b001010110100101000
252 };
253
254 static const uint16_t gen7_subreg_table[32] = {
255 0b000000000000000,
256 0b000000000000001,
257 0b000000000001000,
258 0b000000000001111,
259 0b000000000010000,
260 0b000000010000000,
261 0b000000100000000,
262 0b000000110000000,
263 0b000001000000000,
264 0b000001000010000,
265 0b000010100000000,
266 0b001000000000000,
267 0b001000000000001,
268 0b001000010000001,
269 0b001000010000010,
270 0b001000010000011,
271 0b001000010000100,
272 0b001000010000111,
273 0b001000010001000,
274 0b001000010001110,
275 0b001000010001111,
276 0b001000110000000,
277 0b001000111101000,
278 0b010000000000000,
279 0b010000110000000,
280 0b011000000000000,
281 0b011110010000111,
282 0b100000000000000,
283 0b101000000000000,
284 0b110000000000000,
285 0b111000000000000,
286 0b111000000011100
287 };
288
289 static const uint16_t gen7_src_index_table[32] = {
290 0b000000000000,
291 0b000000000010,
292 0b000000010000,
293 0b000000010010,
294 0b000000011000,
295 0b000000100000,
296 0b000000101000,
297 0b000001001000,
298 0b000001010000,
299 0b000001110000,
300 0b000001111000,
301 0b001100000000,
302 0b001100000010,
303 0b001100001000,
304 0b001100010000,
305 0b001100010010,
306 0b001100100000,
307 0b001100101000,
308 0b001100111000,
309 0b001101000000,
310 0b001101000010,
311 0b001101001000,
312 0b001101010000,
313 0b001101100000,
314 0b001101101000,
315 0b001101110000,
316 0b001101110001,
317 0b001101111000,
318 0b010001101000,
319 0b010001101001,
320 0b010001101010,
321 0b010110001000
322 };
323
324 static const uint32_t gen8_control_index_table[32] = {
325 0b0000000000000000010,
326 0b0000100000000000000,
327 0b0000100000000000001,
328 0b0000100000000000010,
329 0b0000100000000000011,
330 0b0000100000000000100,
331 0b0000100000000000101,
332 0b0000100000000000111,
333 0b0000100000000001000,
334 0b0000100000000001001,
335 0b0000100000000001101,
336 0b0000110000000000000,
337 0b0000110000000000001,
338 0b0000110000000000010,
339 0b0000110000000000011,
340 0b0000110000000000100,
341 0b0000110000000000101,
342 0b0000110000000000111,
343 0b0000110000000001001,
344 0b0000110000000001101,
345 0b0000110000000010000,
346 0b0000110000100000000,
347 0b0001000000000000000,
348 0b0001000000000000010,
349 0b0001000000000000100,
350 0b0001000000100000000,
351 0b0010110000000000000,
352 0b0010110000000010000,
353 0b0011000000000000000,
354 0b0011000000100000000,
355 0b0101000000000000000,
356 0b0101000000100000000
357 };
358
359 static const uint32_t gen8_datatype_table[32] = {
360 0b001000000000000000001,
361 0b001000000000001000000,
362 0b001000000000001000001,
363 0b001000000000011000001,
364 0b001000000000101011101,
365 0b001000000010111011101,
366 0b001000000011101000001,
367 0b001000000011101000101,
368 0b001000000011101011101,
369 0b001000001000001000001,
370 0b001000011000001000000,
371 0b001000011000001000001,
372 0b001000101000101000101,
373 0b001000111000101000100,
374 0b001000111000101000101,
375 0b001011100011101011101,
376 0b001011101011100011101,
377 0b001011101011101011100,
378 0b001011101011101011101,
379 0b001011111011101011100,
380 0b000000000010000001100,
381 0b001000000000001011101,
382 0b001000000000101000101,
383 0b001000001000001000000,
384 0b001000101000101000100,
385 0b001000111000100000100,
386 0b001001001001000001001,
387 0b001010111011101011101,
388 0b001011111011101011101,
389 0b001001111001101001100,
390 0b001001001001001001000,
391 0b001001011001001001000
392 };
393
394 static const uint16_t gen8_subreg_table[32] = {
395 0b000000000000000,
396 0b000000000000001,
397 0b000000000001000,
398 0b000000000001111,
399 0b000000000010000,
400 0b000000010000000,
401 0b000000100000000,
402 0b000000110000000,
403 0b000001000000000,
404 0b000001000010000,
405 0b000001010000000,
406 0b001000000000000,
407 0b001000000000001,
408 0b001000010000001,
409 0b001000010000010,
410 0b001000010000011,
411 0b001000010000100,
412 0b001000010000111,
413 0b001000010001000,
414 0b001000010001110,
415 0b001000010001111,
416 0b001000110000000,
417 0b001000111101000,
418 0b010000000000000,
419 0b010000110000000,
420 0b011000000000000,
421 0b011110010000111,
422 0b100000000000000,
423 0b101000000000000,
424 0b110000000000000,
425 0b111000000000000,
426 0b111000000011100
427 };
428
429 static const uint16_t gen8_src_index_table[32] = {
430 0b000000000000,
431 0b000000000010,
432 0b000000010000,
433 0b000000010010,
434 0b000000011000,
435 0b000000100000,
436 0b000000101000,
437 0b000001001000,
438 0b000001010000,
439 0b000001110000,
440 0b000001111000,
441 0b001100000000,
442 0b001100000010,
443 0b001100001000,
444 0b001100010000,
445 0b001100010010,
446 0b001100100000,
447 0b001100101000,
448 0b001100111000,
449 0b001101000000,
450 0b001101000010,
451 0b001101001000,
452 0b001101010000,
453 0b001101100000,
454 0b001101101000,
455 0b001101110000,
456 0b001101110001,
457 0b001101111000,
458 0b010001101000,
459 0b010001101001,
460 0b010001101010,
461 0b010110001000
462 };
463
464 static const uint32_t *control_index_table;
465 static const uint32_t *datatype_table;
466 static const uint16_t *subreg_table;
467 static const uint16_t *src_index_table;
468
469 static bool
470 set_control_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src)
471 {
472 uint32_t uncompacted = /* 17b/SNB; 19b/IVB+ */
473 (brw_inst_bits(src, 31, 31) << 16) | /* 1b */
474 (brw_inst_bits(src, 23, 8)); /* 16b */
475
476 /* On gen7, the flag register and subregister numbers are integrated into
477 * the control index.
478 */
479 if (brw->gen >= 7)
480 uncompacted |= brw_inst_bits(src, 90, 89) << 17; /* 2b */
481
482 for (int i = 0; i < 32; i++) {
483 if (control_index_table[i] == uncompacted) {
484 brw_compact_inst_set_control_index(dst, i);
485 return true;
486 }
487 }
488
489 return false;
490 }
491
492 static bool
493 set_datatype_index(struct brw_context *brw, brw_compact_inst *dst,
494 brw_inst *src)
495 {
496 uint32_t uncompacted = /* 18b */
497 (brw_inst_bits(src, 63, 61) << 15) | /* 3b */
498 (brw_inst_bits(src, 46, 32)); /* 15b */
499
500 for (int i = 0; i < 32; i++) {
501 if (datatype_table[i] == uncompacted) {
502 brw_compact_inst_set_datatype_index(dst, i);
503 return true;
504 }
505 }
506
507 return false;
508 }
509
510 static bool
511 set_subreg_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src,
512 bool is_immediate)
513 {
514 uint16_t uncompacted = /* 15b */
515 (brw_inst_bits(src, 52, 48) << 0) | /* 5b */
516 (brw_inst_bits(src, 68, 64) << 5); /* 5b */
517
518 if (!is_immediate)
519 uncompacted |= brw_inst_bits(src, 100, 96) << 10; /* 5b */
520
521 for (int i = 0; i < 32; i++) {
522 if (subreg_table[i] == uncompacted) {
523 brw_compact_inst_set_subreg_index(dst, i);
524 return true;
525 }
526 }
527
528 return false;
529 }
530
531 static bool
532 get_src_index(uint16_t uncompacted,
533 uint16_t *compacted)
534 {
535 for (int i = 0; i < 32; i++) {
536 if (src_index_table[i] == uncompacted) {
537 *compacted = i;
538 return true;
539 }
540 }
541
542 return false;
543 }
544
545 static bool
546 set_src0_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src)
547 {
548 uint16_t compacted;
549 uint16_t uncompacted = brw_inst_bits(src, 88, 77); /* 12b */
550
551 if (!get_src_index(uncompacted, &compacted))
552 return false;
553
554 brw_compact_inst_set_src0_index(dst, compacted);
555
556 return true;
557 }
558
559 static bool
560 set_src1_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src,
561 bool is_immediate)
562 {
563 uint16_t compacted;
564
565 if (is_immediate) {
566 compacted = (brw_inst_imm_ud(brw, src) >> 8) & 0x1f;
567 } else {
568 uint16_t uncompacted = brw_inst_bits(src, 120, 109); /* 12b */
569
570 if (!get_src_index(uncompacted, &compacted))
571 return false;
572 }
573
574 brw_compact_inst_set_src1_index(dst, compacted);
575
576 return true;
577 }
578
579 /* Compacted instructions have 12-bits for immediate sources, and a 13th bit
580 * that's replicated through the high 20 bits.
581 *
582 * Effectively this means we get 12-bit integers, 0.0f, and some limited uses
583 * of packed vectors as compactable immediates.
584 */
585 static bool
586 is_compactable_immediate(unsigned imm)
587 {
588 /* We get the low 12 bits as-is. */
589 imm &= ~0xfff;
590
591 /* We get one bit replicated through the top 20 bits. */
592 return imm == 0 || imm == 0xfffff000;
593 }
594
595 /**
596 * Tries to compact instruction src into dst.
597 *
598 * It doesn't modify dst unless src is compactable, which is relied on by
599 * brw_compact_instructions().
600 */
601 bool
602 brw_try_compact_instruction(struct brw_context *brw, brw_compact_inst *dst,
603 brw_inst *src)
604 {
605 brw_compact_inst temp;
606
607 if (brw_inst_opcode(brw, src) == BRW_OPCODE_IF ||
608 brw_inst_opcode(brw, src) == BRW_OPCODE_ELSE ||
609 brw_inst_opcode(brw, src) == BRW_OPCODE_ENDIF ||
610 brw_inst_opcode(brw, src) == BRW_OPCODE_HALT ||
611 brw_inst_opcode(brw, src) == BRW_OPCODE_DO ||
612 brw_inst_opcode(brw, src) == BRW_OPCODE_WHILE) {
613 /* FINISHME: The fixup code below, and brw_set_uip_jip and friends, needs
614 * to be able to handle compacted flow control instructions..
615 */
616 return false;
617 }
618
619 bool is_immediate =
620 brw_inst_src0_reg_file(brw, src) == BRW_IMMEDIATE_VALUE ||
621 brw_inst_src1_reg_file(brw, src) == BRW_IMMEDIATE_VALUE;
622 if (is_immediate && !is_compactable_immediate(brw_inst_imm_ud(brw, src))) {
623 return false;
624 }
625
626 memset(&temp, 0, sizeof(temp));
627
628 brw_compact_inst_set_opcode(&temp, brw_inst_opcode(brw, src));
629 brw_compact_inst_set_debug_control(&temp, brw_inst_debug_control(brw, src));
630 if (!set_control_index(brw, &temp, src))
631 return false;
632 if (!set_datatype_index(brw, &temp, src))
633 return false;
634 if (!set_subreg_index(brw, &temp, src, is_immediate))
635 return false;
636 brw_compact_inst_set_acc_wr_control(&temp,
637 brw_inst_acc_wr_control(brw, src));
638 brw_compact_inst_set_cond_modifier(&temp, brw_inst_cond_modifier(brw, src));
639 if (brw->gen <= 6)
640 brw_compact_inst_set_flag_subreg_nr(&temp,
641 brw_inst_flag_subreg_nr(brw, src));
642 brw_compact_inst_set_cmpt_control(&temp, true);
643 if (!set_src0_index(brw, &temp, src))
644 return false;
645 if (!set_src1_index(brw, &temp, src, is_immediate))
646 return false;
647 brw_compact_inst_set_dst_reg_nr(&temp, brw_inst_dst_da_reg_nr(brw, src));
648 brw_compact_inst_set_src0_reg_nr(&temp, brw_inst_src0_da_reg_nr(brw, src));
649 if (is_immediate) {
650 brw_compact_inst_set_src1_reg_nr(&temp, brw_inst_imm_ud(brw, src) & 0xff);
651 } else {
652 brw_compact_inst_set_src1_reg_nr(&temp,
653 brw_inst_src1_da_reg_nr(brw, src));
654 }
655
656 *dst = temp;
657
658 return true;
659 }
660
661 static void
662 set_uncompacted_control(struct brw_context *brw, brw_inst *dst,
663 brw_compact_inst *src)
664 {
665 uint32_t uncompacted =
666 control_index_table[brw_compact_inst_control_index(src)];
667
668 brw_inst_set_bits(dst, 31, 31, (uncompacted >> 16) & 0x1);
669 brw_inst_set_bits(dst, 23, 8, (uncompacted & 0xffff));
670
671 if (brw->gen >= 7)
672 brw_inst_set_bits(dst, 90, 89, uncompacted >> 17);
673 }
674
675 static void
676 set_uncompacted_datatype(struct brw_context *brw, brw_inst *dst,
677 brw_compact_inst *src)
678 {
679 uint32_t uncompacted = datatype_table[brw_compact_inst_datatype_index(src)];
680
681 brw_inst_set_bits(dst, 63, 61, (uncompacted >> 15));
682 brw_inst_set_bits(dst, 46, 32, (uncompacted & 0x7fff));
683 }
684
685 static void
686 set_uncompacted_subreg(struct brw_context *brw, brw_inst *dst,
687 brw_compact_inst *src)
688 {
689 uint16_t uncompacted = subreg_table[brw_compact_inst_subreg_index(src)];
690
691 brw_inst_set_bits(dst, 100, 96, (uncompacted >> 10));
692 brw_inst_set_bits(dst, 68, 64, (uncompacted >> 5) & 0x1f);
693 brw_inst_set_bits(dst, 52, 48, (uncompacted >> 0) & 0x1f);
694 }
695
696 static void
697 set_uncompacted_src0(struct brw_context *brw, brw_inst *dst,
698 brw_compact_inst *src)
699 {
700 uint32_t compacted = brw_compact_inst_src0_index(src);
701 uint16_t uncompacted = src_index_table[compacted];
702
703 brw_inst_set_bits(dst, 88, 77, uncompacted);
704 }
705
706 static void
707 set_uncompacted_src1(struct brw_context *brw, brw_inst *dst,
708 brw_compact_inst *src, bool is_immediate)
709 {
710 if (is_immediate) {
711 signed high5 = brw_compact_inst_src1_index(src);
712 /* Replicate top bit of src1_index into high 20 bits of the immediate. */
713 brw_inst_set_imm_ud(brw, dst, (high5 << 27) >> 19);
714 } else {
715 uint16_t uncompacted = src_index_table[brw_compact_inst_src1_index(src)];
716
717 brw_inst_set_bits(dst, 120, 109, uncompacted);
718 }
719 }
720
721 void
722 brw_uncompact_instruction(struct brw_context *brw, brw_inst *dst,
723 brw_compact_inst *src)
724 {
725 memset(dst, 0, sizeof(*dst));
726
727 brw_inst_set_opcode(brw, dst, brw_compact_inst_opcode(src));
728 brw_inst_set_debug_control(brw, dst, brw_compact_inst_debug_control(src));
729
730 set_uncompacted_control(brw, dst, src);
731 set_uncompacted_datatype(brw, dst, src);
732
733 /* src0/1 register file fields are in the datatype table. */
734 bool is_immediate = brw_inst_src0_reg_file(brw, dst) == BRW_IMMEDIATE_VALUE ||
735 brw_inst_src1_reg_file(brw, dst) == BRW_IMMEDIATE_VALUE;
736
737 set_uncompacted_subreg(brw, dst, src);
738 brw_inst_set_acc_wr_control(brw, dst, brw_compact_inst_acc_wr_control(src));
739 brw_inst_set_cond_modifier(brw, dst, brw_compact_inst_cond_modifier(src));
740 if (brw->gen <= 6)
741 brw_inst_set_flag_subreg_nr(brw, dst,
742 brw_compact_inst_flag_subreg_nr(src));
743 set_uncompacted_src0(brw, dst, src);
744 set_uncompacted_src1(brw, dst, src, is_immediate);
745 brw_inst_set_dst_da_reg_nr(brw, dst, brw_compact_inst_dst_reg_nr(src));
746 brw_inst_set_src0_da_reg_nr(brw, dst, brw_compact_inst_src0_reg_nr(src));
747 if (is_immediate) {
748 brw_inst_set_imm_ud(brw, dst,
749 brw_inst_imm_ud(brw, dst) |
750 brw_compact_inst_src1_reg_nr(src));
751 } else {
752 brw_inst_set_src1_da_reg_nr(brw, dst, brw_compact_inst_src1_reg_nr(src));
753 }
754 }
755
756 void brw_debug_compact_uncompact(struct brw_context *brw,
757 brw_inst *orig,
758 brw_inst *uncompacted)
759 {
760 fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n",
761 brw->gen);
762
763 fprintf(stderr, " before: ");
764 brw_disassemble_inst(stderr, brw, orig, true);
765
766 fprintf(stderr, " after: ");
767 brw_disassemble_inst(stderr, brw, uncompacted, false);
768
769 uint32_t *before_bits = (uint32_t *)orig;
770 uint32_t *after_bits = (uint32_t *)uncompacted;
771 fprintf(stderr, " changed bits:\n");
772 for (int i = 0; i < 128; i++) {
773 uint32_t before = before_bits[i / 32] & (1 << (i & 31));
774 uint32_t after = after_bits[i / 32] & (1 << (i & 31));
775
776 if (before != after) {
777 fprintf(stderr, " bit %d, %s to %s\n", i,
778 before ? "set" : "unset",
779 after ? "set" : "unset");
780 }
781 }
782 }
783
784 static int
785 compacted_between(int old_ip, int old_target_ip, int *compacted_counts)
786 {
787 int this_compacted_count = compacted_counts[old_ip];
788 int target_compacted_count = compacted_counts[old_target_ip];
789 return target_compacted_count - this_compacted_count;
790 }
791
792 static void
793 update_uip_jip(struct brw_context *brw, brw_inst *insn,
794 int this_old_ip, int *compacted_counts)
795 {
796 int scale = brw->gen >= 8 ? sizeof(brw_compact_inst) : 1;
797
798 int32_t jip = brw_inst_jip(brw, insn);
799 jip -= scale *
800 compacted_between(this_old_ip, this_old_ip + jip, compacted_counts);
801 brw_inst_set_jip(brw, insn, jip);
802
803 if (brw_inst_opcode(brw, insn) == BRW_OPCODE_ENDIF ||
804 brw_inst_opcode(brw, insn) == BRW_OPCODE_WHILE)
805 return;
806
807 int32_t uip = brw_inst_uip(brw, insn);
808 uip -= scale *
809 compacted_between(this_old_ip, this_old_ip + uip, compacted_counts);
810 brw_inst_set_uip(brw, insn, uip);
811 }
812
813 void
814 brw_init_compaction_tables(struct brw_context *brw)
815 {
816 assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0);
817 assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0);
818 assert(gen6_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0);
819 assert(gen6_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0);
820 assert(gen7_control_index_table[ARRAY_SIZE(gen7_control_index_table) - 1] != 0);
821 assert(gen7_datatype_table[ARRAY_SIZE(gen7_datatype_table) - 1] != 0);
822 assert(gen7_subreg_table[ARRAY_SIZE(gen7_subreg_table) - 1] != 0);
823 assert(gen7_src_index_table[ARRAY_SIZE(gen7_src_index_table) - 1] != 0);
824 assert(gen8_control_index_table[ARRAY_SIZE(gen8_control_index_table) - 1] != 0);
825 assert(gen8_datatype_table[ARRAY_SIZE(gen8_datatype_table) - 1] != 0);
826 assert(gen8_subreg_table[ARRAY_SIZE(gen8_subreg_table) - 1] != 0);
827 assert(gen8_src_index_table[ARRAY_SIZE(gen8_src_index_table) - 1] != 0);
828
829 switch (brw->gen) {
830 case 8:
831 control_index_table = gen8_control_index_table;
832 datatype_table = gen8_datatype_table;
833 subreg_table = gen8_subreg_table;
834 src_index_table = gen8_src_index_table;
835 break;
836 case 7:
837 control_index_table = gen7_control_index_table;
838 datatype_table = gen7_datatype_table;
839 subreg_table = gen7_subreg_table;
840 src_index_table = gen7_src_index_table;
841 break;
842 case 6:
843 control_index_table = gen6_control_index_table;
844 datatype_table = gen6_datatype_table;
845 subreg_table = gen6_subreg_table;
846 src_index_table = gen6_src_index_table;
847 break;
848 default:
849 return;
850 }
851 }
852
853 void
854 brw_compact_instructions(struct brw_compile *p, int start_offset,
855 int num_annotations, struct annotation *annotation)
856 {
857 struct brw_context *brw = p->brw;
858 void *store = p->store + start_offset / 16;
859 /* For an instruction at byte offset 8*i before compaction, this is the number
860 * of compacted instructions that preceded it.
861 */
862 int compacted_counts[(p->next_insn_offset - start_offset) / 8];
863 /* For an instruction at byte offset 8*i after compaction, this is the
864 * 8-byte offset it was at before compaction.
865 */
866 int old_ip[(p->next_insn_offset - start_offset) / 8];
867
868 if (brw->gen < 6 || brw->gen >= 8)
869 return;
870
871 int src_offset;
872 int offset = 0;
873 int compacted_count = 0;
874 for (src_offset = 0; src_offset < p->next_insn_offset - start_offset;) {
875 brw_inst *src = store + src_offset;
876 void *dst = store + offset;
877
878 old_ip[offset / 8] = src_offset / 8;
879 compacted_counts[src_offset / 8] = compacted_count;
880
881 brw_inst saved = *src;
882
883 if (!brw_inst_cmpt_control(brw, src) &&
884 brw_try_compact_instruction(brw, dst, src)) {
885 compacted_count++;
886
887 if (INTEL_DEBUG) {
888 brw_inst uncompacted;
889 brw_uncompact_instruction(brw, &uncompacted, dst);
890 if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {
891 brw_debug_compact_uncompact(brw, &saved, &uncompacted);
892 }
893 }
894
895 offset += 8;
896 src_offset += 16;
897 } else {
898 int size = brw_inst_cmpt_control(brw, src) ? 8 : 16;
899
900 /* It appears that the end of thread SEND instruction needs to be
901 * aligned, or the GPU hangs.
902 */
903 if ((brw_inst_opcode(brw, src) == BRW_OPCODE_SEND ||
904 brw_inst_opcode(brw, src) == BRW_OPCODE_SENDC) &&
905 brw_inst_eot(brw, src) &&
906 (offset & 8) != 0) {
907 brw_compact_inst *align = store + offset;
908 memset(align, 0, sizeof(*align));
909 brw_compact_inst_set_opcode(align, BRW_OPCODE_NOP);
910 brw_compact_inst_set_cmpt_control(align, true);
911 offset += 8;
912 old_ip[offset / 8] = src_offset / 8;
913 dst = store + offset;
914 }
915
916 /* If we didn't compact this intruction, we need to move it down into
917 * place.
918 */
919 if (offset != src_offset) {
920 memmove(dst, src, size);
921 }
922 offset += size;
923 src_offset += size;
924 }
925 }
926
927 /* Fix up control flow offsets. */
928 p->next_insn_offset = start_offset + offset;
929 for (offset = 0; offset < p->next_insn_offset - start_offset;) {
930 brw_inst *insn = store + offset;
931 int this_old_ip = old_ip[offset / 8];
932 int this_compacted_count = compacted_counts[this_old_ip];
933 int target_old_ip, target_compacted_count;
934
935 switch (brw_inst_opcode(brw, insn)) {
936 case BRW_OPCODE_BREAK:
937 case BRW_OPCODE_CONTINUE:
938 case BRW_OPCODE_HALT:
939 update_uip_jip(brw, insn, this_old_ip, compacted_counts);
940 break;
941
942 case BRW_OPCODE_IF:
943 case BRW_OPCODE_ELSE:
944 case BRW_OPCODE_ENDIF:
945 case BRW_OPCODE_WHILE:
946 if (brw->gen >= 7) {
947 update_uip_jip(brw, insn, this_old_ip, compacted_counts);
948 } else if (brw->gen == 6) {
949 int gen6_jump_count = brw_inst_gen6_jump_count(brw, insn);
950 target_old_ip = this_old_ip + gen6_jump_count;
951 target_compacted_count = compacted_counts[target_old_ip];
952 gen6_jump_count -= (target_compacted_count - this_compacted_count);
953 brw_inst_set_gen6_jump_count(brw, insn, gen6_jump_count);
954 }
955 break;
956 }
957
958 offset = next_offset(brw, store, offset);
959 }
960
961 /* p->nr_insn is counting the number of uncompacted instructions still, so
962 * divide. We do want to be sure there's a valid instruction in any
963 * alignment padding, so that the next compression pass (for the FS 8/16
964 * compile passes) parses correctly.
965 */
966 if (p->next_insn_offset & 8) {
967 brw_compact_inst *align = store + offset;
968 memset(align, 0, sizeof(*align));
969 brw_compact_inst_set_opcode(align, BRW_OPCODE_NOP);
970 brw_compact_inst_set_cmpt_control(align, true);
971 p->next_insn_offset += 8;
972 }
973 p->nr_insn = p->next_insn_offset / 16;
974
975 /* Update the instruction offsets for each annotation. */
976 if (annotation) {
977 for (int offset = 0, i = 0; i < num_annotations; i++) {
978 while (start_offset + old_ip[offset / 8] * 8 != annotation[i].offset) {
979 assert(start_offset + old_ip[offset / 8] * 8 <
980 annotation[i].offset);
981 offset = next_offset(brw, store, offset);
982 }
983
984 annotation[i].offset = start_offset + offset;
985
986 offset = next_offset(brw, store, offset);
987 }
988
989 annotation[num_annotations].offset = p->next_insn_offset;
990 }
991 }