114d18f4ab2c10d5b37c75e65f991c783a89362b
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_compact.c
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_eu_compact.c
25 *
26 * Instruction compaction is a feature of G45 and newer hardware that allows
27 * for a smaller instruction encoding.
28 *
29 * The instruction cache is on the order of 32KB, and many programs generate
30 * far more instructions than that. The instruction cache is built to barely
31 * keep up with instruction dispatch ability in cache hit cases -- L1
32 * instruction cache misses that still hit in the next level could limit
33 * throughput by around 50%.
34 *
35 * The idea of instruction compaction is that most instructions use a tiny
36 * subset of the GPU functionality, so we can encode what would be a 16 byte
37 * instruction in 8 bytes using some lookup tables for various fields.
38 *
39 *
40 * Instruction compaction capabilities vary subtly by generation.
41 *
42 * G45's support for instruction compaction is very limited. Jump counts on
43 * this generation are in units of 16-byte uncompacted instructions. As such,
44 * all jump targets must be 16-byte aligned. Also, all instructions must be
45 * naturally aligned, i.e. uncompacted instructions must be 16-byte aligned.
46 * A G45-only instruction, NENOP, must be used to provide padding to align
47 * uncompacted instructions.
48 *
49 * Gen5 removes these restrictions and changes jump counts to be in units of
50 * 8-byte compacted instructions, allowing jump targets to be only 8-byte
51 * aligned. Uncompacted instructions can also be placed on 8-byte boundaries.
52 *
53 * Gen6 adds the ability to compact instructions with a limited range of
54 * immediate values. Compactable immediates have 12 unrestricted bits, and a
55 * 13th bit that's replicated through the high 20 bits, to create the 32-bit
56 * value of DW3 in the uncompacted instruction word.
57 *
58 * On Gen7 we can compact some control flow instructions with a small positive
59 * immediate in the low bits of DW3, like ENDIF with the JIP field. Other
60 * control flow instructions with UIP cannot be compacted, because of the
61 * replicated 13th bit. No control flow instructions can be compacted on Gen6
62 * since the jump count field is not in DW3.
63 *
64 * break JIP/UIP
65 * cont JIP/UIP
66 * halt JIP/UIP
67 * if JIP/UIP
68 * else JIP (plus UIP on BDW+)
69 * endif JIP
70 * while JIP (must be negative)
71 *
72 * Gen 8 adds support for compacting 3-src instructions.
73 */
74
75 #include "brw_context.h"
76 #include "brw_eu.h"
77 #include "intel_asm_annotation.h"
78
79 static const uint32_t g45_control_index_table[32] = {
80 0b00000000000000000,
81 0b01000000000000000,
82 0b00110000000000000,
83 0b00000000000000010,
84 0b00100000000000000,
85 0b00010000000000000,
86 0b01000000000100000,
87 0b01000000100000000,
88 0b01010000000100000,
89 0b00000000100000010,
90 0b11000000000000000,
91 0b00001000100000010,
92 0b01001000100000000,
93 0b00000000100000000,
94 0b11000000000100000,
95 0b00001000100000000,
96 0b10110000000000000,
97 0b11010000000100000,
98 0b00110000100000000,
99 0b00100000100000000,
100 0b01000000000001000,
101 0b01000000000000100,
102 0b00111100000000000,
103 0b00101011000000000,
104 0b00110000000010000,
105 0b00010000100000000,
106 0b01000000000100100,
107 0b01000000000101000,
108 0b00110000000000110,
109 0b00000000000001010,
110 0b01010000000101000,
111 0b01010000000100100
112 };
113
114 static const uint32_t g45_datatype_table[32] = {
115 0b001000000000100001,
116 0b001011010110101101,
117 0b001000001000110001,
118 0b001111011110111101,
119 0b001011010110101100,
120 0b001000000110101101,
121 0b001000000000100000,
122 0b010100010110110001,
123 0b001100011000101101,
124 0b001000000000100010,
125 0b001000001000110110,
126 0b010000001000110001,
127 0b001000001000110010,
128 0b011000001000110010,
129 0b001111011110111100,
130 0b001000000100101000,
131 0b010100011000110001,
132 0b001010010100101001,
133 0b001000001000101001,
134 0b010000001000110110,
135 0b101000001000110001,
136 0b001011011000101101,
137 0b001000000100001001,
138 0b001011011000101100,
139 0b110100011000110001,
140 0b001000001110111101,
141 0b110000001000110001,
142 0b011000000100101010,
143 0b101000001000101001,
144 0b001011010110001100,
145 0b001000000110100001,
146 0b001010010100001000
147 };
148
149 static const uint16_t g45_subreg_table[32] = {
150 0b000000000000000,
151 0b000000010000000,
152 0b000001000000000,
153 0b000100000000000,
154 0b000000000100000,
155 0b100000000000000,
156 0b000000000010000,
157 0b001100000000000,
158 0b001010000000000,
159 0b000000100000000,
160 0b001000000000000,
161 0b000000000001000,
162 0b000000001000000,
163 0b000000000000001,
164 0b000010000000000,
165 0b000000010100000,
166 0b000000000000111,
167 0b000001000100000,
168 0b011000000000000,
169 0b000000110000000,
170 0b000000000000010,
171 0b000000000000100,
172 0b000000001100000,
173 0b000100000000010,
174 0b001110011000110,
175 0b001110100001000,
176 0b000110011000110,
177 0b000001000011000,
178 0b000110010000100,
179 0b001100000000110,
180 0b000000010000110,
181 0b000001000110000
182 };
183
184 static const uint16_t g45_src_index_table[32] = {
185 0b000000000000,
186 0b010001101000,
187 0b010110001000,
188 0b011010010000,
189 0b001101001000,
190 0b010110001010,
191 0b010101110000,
192 0b011001111000,
193 0b001000101000,
194 0b000000101000,
195 0b010001010000,
196 0b111101101100,
197 0b010110001100,
198 0b010001101100,
199 0b011010010100,
200 0b010001001100,
201 0b001100101000,
202 0b000000000010,
203 0b111101001100,
204 0b011001101000,
205 0b010101001000,
206 0b000000000100,
207 0b000000101100,
208 0b010001101010,
209 0b000000111000,
210 0b010101011000,
211 0b000100100000,
212 0b010110000000,
213 0b010000000100,
214 0b010000111000,
215 0b000101100000,
216 0b111101110100
217 };
218
219 static const uint32_t gen6_control_index_table[32] = {
220 0b00000000000000000,
221 0b01000000000000000,
222 0b00110000000000000,
223 0b00000000100000000,
224 0b00010000000000000,
225 0b00001000100000000,
226 0b00000000100000010,
227 0b00000000000000010,
228 0b01000000100000000,
229 0b01010000000000000,
230 0b10110000000000000,
231 0b00100000000000000,
232 0b11010000000000000,
233 0b11000000000000000,
234 0b01001000100000000,
235 0b01000000000001000,
236 0b01000000000000100,
237 0b00000000000001000,
238 0b00000000000000100,
239 0b00111000100000000,
240 0b00001000100000010,
241 0b00110000100000000,
242 0b00110000000000001,
243 0b00100000000000001,
244 0b00110000000000010,
245 0b00110000000000101,
246 0b00110000000001001,
247 0b00110000000010000,
248 0b00110000000000011,
249 0b00110000000000100,
250 0b00110000100001000,
251 0b00100000000001001
252 };
253
254 static const uint32_t gen6_datatype_table[32] = {
255 0b001001110000000000,
256 0b001000110000100000,
257 0b001001110000000001,
258 0b001000000001100000,
259 0b001010110100101001,
260 0b001000000110101101,
261 0b001100011000101100,
262 0b001011110110101101,
263 0b001000000111101100,
264 0b001000000001100001,
265 0b001000110010100101,
266 0b001000000001000001,
267 0b001000001000110001,
268 0b001000001000101001,
269 0b001000000000100000,
270 0b001000001000110010,
271 0b001010010100101001,
272 0b001011010010100101,
273 0b001000000110100101,
274 0b001100011000101001,
275 0b001011011000101100,
276 0b001011010110100101,
277 0b001011110110100101,
278 0b001111011110111101,
279 0b001111011110111100,
280 0b001111011110111101,
281 0b001111011110011101,
282 0b001111011110111110,
283 0b001000000000100001,
284 0b001000000000100010,
285 0b001001111111011101,
286 0b001000001110111110,
287 };
288
289 static const uint16_t gen6_subreg_table[32] = {
290 0b000000000000000,
291 0b000000000000100,
292 0b000000110000000,
293 0b111000000000000,
294 0b011110000001000,
295 0b000010000000000,
296 0b000000000010000,
297 0b000110000001100,
298 0b001000000000000,
299 0b000001000000000,
300 0b000001010010100,
301 0b000000001010110,
302 0b010000000000000,
303 0b110000000000000,
304 0b000100000000000,
305 0b000000010000000,
306 0b000000000001000,
307 0b100000000000000,
308 0b000001010000000,
309 0b001010000000000,
310 0b001100000000000,
311 0b000000001010100,
312 0b101101010010100,
313 0b010100000000000,
314 0b000000010001111,
315 0b011000000000000,
316 0b111110000000000,
317 0b101000000000000,
318 0b000000000001111,
319 0b000100010001111,
320 0b001000010001111,
321 0b000110000000000,
322 };
323
324 static const uint16_t gen6_src_index_table[32] = {
325 0b000000000000,
326 0b010110001000,
327 0b010001101000,
328 0b001000101000,
329 0b011010010000,
330 0b000100100000,
331 0b010001101100,
332 0b010101110000,
333 0b011001111000,
334 0b001100101000,
335 0b010110001100,
336 0b001000100000,
337 0b010110001010,
338 0b000000000010,
339 0b010101010000,
340 0b010101101000,
341 0b111101001100,
342 0b111100101100,
343 0b011001110000,
344 0b010110001001,
345 0b010101011000,
346 0b001101001000,
347 0b010000101100,
348 0b010000000000,
349 0b001101110000,
350 0b001100010000,
351 0b001100000000,
352 0b010001101010,
353 0b001101111000,
354 0b000001110000,
355 0b001100100000,
356 0b001101010000,
357 };
358
359 static const uint32_t gen7_control_index_table[32] = {
360 0b0000000000000000010,
361 0b0000100000000000000,
362 0b0000100000000000001,
363 0b0000100000000000010,
364 0b0000100000000000011,
365 0b0000100000000000100,
366 0b0000100000000000101,
367 0b0000100000000000111,
368 0b0000100000000001000,
369 0b0000100000000001001,
370 0b0000100000000001101,
371 0b0000110000000000000,
372 0b0000110000000000001,
373 0b0000110000000000010,
374 0b0000110000000000011,
375 0b0000110000000000100,
376 0b0000110000000000101,
377 0b0000110000000000111,
378 0b0000110000000001001,
379 0b0000110000000001101,
380 0b0000110000000010000,
381 0b0000110000100000000,
382 0b0001000000000000000,
383 0b0001000000000000010,
384 0b0001000000000000100,
385 0b0001000000100000000,
386 0b0010110000000000000,
387 0b0010110000000010000,
388 0b0011000000000000000,
389 0b0011000000100000000,
390 0b0101000000000000000,
391 0b0101000000100000000
392 };
393
394 static const uint32_t gen7_datatype_table[32] = {
395 0b001000000000000001,
396 0b001000000000100000,
397 0b001000000000100001,
398 0b001000000001100001,
399 0b001000000010111101,
400 0b001000001011111101,
401 0b001000001110100001,
402 0b001000001110100101,
403 0b001000001110111101,
404 0b001000010000100001,
405 0b001000110000100000,
406 0b001000110000100001,
407 0b001001010010100101,
408 0b001001110010100100,
409 0b001001110010100101,
410 0b001111001110111101,
411 0b001111011110011101,
412 0b001111011110111100,
413 0b001111011110111101,
414 0b001111111110111100,
415 0b000000001000001100,
416 0b001000000000111101,
417 0b001000000010100101,
418 0b001000010000100000,
419 0b001001010010100100,
420 0b001001110010000100,
421 0b001010010100001001,
422 0b001101111110111101,
423 0b001111111110111101,
424 0b001011110110101100,
425 0b001010010100101000,
426 0b001010110100101000
427 };
428
429 static const uint16_t gen7_subreg_table[32] = {
430 0b000000000000000,
431 0b000000000000001,
432 0b000000000001000,
433 0b000000000001111,
434 0b000000000010000,
435 0b000000010000000,
436 0b000000100000000,
437 0b000000110000000,
438 0b000001000000000,
439 0b000001000010000,
440 0b000010100000000,
441 0b001000000000000,
442 0b001000000000001,
443 0b001000010000001,
444 0b001000010000010,
445 0b001000010000011,
446 0b001000010000100,
447 0b001000010000111,
448 0b001000010001000,
449 0b001000010001110,
450 0b001000010001111,
451 0b001000110000000,
452 0b001000111101000,
453 0b010000000000000,
454 0b010000110000000,
455 0b011000000000000,
456 0b011110010000111,
457 0b100000000000000,
458 0b101000000000000,
459 0b110000000000000,
460 0b111000000000000,
461 0b111000000011100
462 };
463
464 static const uint16_t gen7_src_index_table[32] = {
465 0b000000000000,
466 0b000000000010,
467 0b000000010000,
468 0b000000010010,
469 0b000000011000,
470 0b000000100000,
471 0b000000101000,
472 0b000001001000,
473 0b000001010000,
474 0b000001110000,
475 0b000001111000,
476 0b001100000000,
477 0b001100000010,
478 0b001100001000,
479 0b001100010000,
480 0b001100010010,
481 0b001100100000,
482 0b001100101000,
483 0b001100111000,
484 0b001101000000,
485 0b001101000010,
486 0b001101001000,
487 0b001101010000,
488 0b001101100000,
489 0b001101101000,
490 0b001101110000,
491 0b001101110001,
492 0b001101111000,
493 0b010001101000,
494 0b010001101001,
495 0b010001101010,
496 0b010110001000
497 };
498
499 static const uint32_t gen8_control_index_table[32] = {
500 0b0000000000000000010,
501 0b0000100000000000000,
502 0b0000100000000000001,
503 0b0000100000000000010,
504 0b0000100000000000011,
505 0b0000100000000000100,
506 0b0000100000000000101,
507 0b0000100000000000111,
508 0b0000100000000001000,
509 0b0000100000000001001,
510 0b0000100000000001101,
511 0b0000110000000000000,
512 0b0000110000000000001,
513 0b0000110000000000010,
514 0b0000110000000000011,
515 0b0000110000000000100,
516 0b0000110000000000101,
517 0b0000110000000000111,
518 0b0000110000000001001,
519 0b0000110000000001101,
520 0b0000110000000010000,
521 0b0000110000100000000,
522 0b0001000000000000000,
523 0b0001000000000000010,
524 0b0001000000000000100,
525 0b0001000000100000000,
526 0b0010110000000000000,
527 0b0010110000000010000,
528 0b0011000000000000000,
529 0b0011000000100000000,
530 0b0101000000000000000,
531 0b0101000000100000000
532 };
533
534 static const uint32_t gen8_datatype_table[32] = {
535 0b001000000000000000001,
536 0b001000000000001000000,
537 0b001000000000001000001,
538 0b001000000000011000001,
539 0b001000000000101011101,
540 0b001000000010111011101,
541 0b001000000011101000001,
542 0b001000000011101000101,
543 0b001000000011101011101,
544 0b001000001000001000001,
545 0b001000011000001000000,
546 0b001000011000001000001,
547 0b001000101000101000101,
548 0b001000111000101000100,
549 0b001000111000101000101,
550 0b001011100011101011101,
551 0b001011101011100011101,
552 0b001011101011101011100,
553 0b001011101011101011101,
554 0b001011111011101011100,
555 0b000000000010000001100,
556 0b001000000000001011101,
557 0b001000000000101000101,
558 0b001000001000001000000,
559 0b001000101000101000100,
560 0b001000111000100000100,
561 0b001001001001000001001,
562 0b001010111011101011101,
563 0b001011111011101011101,
564 0b001001111001101001100,
565 0b001001001001001001000,
566 0b001001011001001001000
567 };
568
569 static const uint16_t gen8_subreg_table[32] = {
570 0b000000000000000,
571 0b000000000000001,
572 0b000000000001000,
573 0b000000000001111,
574 0b000000000010000,
575 0b000000010000000,
576 0b000000100000000,
577 0b000000110000000,
578 0b000001000000000,
579 0b000001000010000,
580 0b000001010000000,
581 0b001000000000000,
582 0b001000000000001,
583 0b001000010000001,
584 0b001000010000010,
585 0b001000010000011,
586 0b001000010000100,
587 0b001000010000111,
588 0b001000010001000,
589 0b001000010001110,
590 0b001000010001111,
591 0b001000110000000,
592 0b001000111101000,
593 0b010000000000000,
594 0b010000110000000,
595 0b011000000000000,
596 0b011110010000111,
597 0b100000000000000,
598 0b101000000000000,
599 0b110000000000000,
600 0b111000000000000,
601 0b111000000011100
602 };
603
604 static const uint16_t gen8_src_index_table[32] = {
605 0b000000000000,
606 0b000000000010,
607 0b000000010000,
608 0b000000010010,
609 0b000000011000,
610 0b000000100000,
611 0b000000101000,
612 0b000001001000,
613 0b000001010000,
614 0b000001110000,
615 0b000001111000,
616 0b001100000000,
617 0b001100000010,
618 0b001100001000,
619 0b001100010000,
620 0b001100010010,
621 0b001100100000,
622 0b001100101000,
623 0b001100111000,
624 0b001101000000,
625 0b001101000010,
626 0b001101001000,
627 0b001101010000,
628 0b001101100000,
629 0b001101101000,
630 0b001101110000,
631 0b001101110001,
632 0b001101111000,
633 0b010001101000,
634 0b010001101001,
635 0b010001101010,
636 0b010110001000
637 };
638
639 /* This is actually the control index table for Cherryview (26 bits), but the
640 * only difference from Broadwell (24 bits) is that it has two extra 0-bits at
641 * the start.
642 *
643 * The low 24 bits have the same mappings on both hardware.
644 */
645 static const uint32_t gen8_3src_control_index_table[4] = {
646 0b00100000000110000000000001,
647 0b00000000000110000000000001,
648 0b00000000001000000000000001,
649 0b00000000001000000000100001
650 };
651
652 /* This is actually the control index table for Cherryview (49 bits), but the
653 * only difference from Broadwell (46 bits) is that it has three extra 0-bits
654 * at the start.
655 *
656 * The low 44 bits have the same mappings on both hardware, and since the high
657 * three bits on Broadwell are zero, we can reuse Cherryview's table.
658 */
659 static const uint64_t gen8_3src_source_index_table[4] = {
660 0b0000001110010011100100111001000001111000000000000,
661 0b0000001110010011100100111001000001111000000000010,
662 0b0000001110010011100100111001000001111000000001000,
663 0b0000001110010011100100111001000001111000000100000
664 };
665
666 static const uint32_t *control_index_table;
667 static const uint32_t *datatype_table;
668 static const uint16_t *subreg_table;
669 static const uint16_t *src_index_table;
670
671 static bool
672 set_control_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src)
673 {
674 uint32_t uncompacted = brw->gen >= 8 /* 17b/G45; 19b/IVB+ */
675 ? (brw_inst_bits(src, 33, 31) << 16) | /* 3b */
676 (brw_inst_bits(src, 23, 12) << 4) | /* 12b */
677 (brw_inst_bits(src, 10, 9) << 2) | /* 2b */
678 (brw_inst_bits(src, 34, 34) << 1) | /* 1b */
679 (brw_inst_bits(src, 8, 8)) /* 1b */
680 : (brw_inst_bits(src, 31, 31) << 16) | /* 1b */
681 (brw_inst_bits(src, 23, 8)); /* 16b */
682
683 /* On gen7, the flag register and subregister numbers are integrated into
684 * the control index.
685 */
686 if (brw->gen == 7)
687 uncompacted |= brw_inst_bits(src, 90, 89) << 17; /* 2b */
688
689 for (int i = 0; i < 32; i++) {
690 if (control_index_table[i] == uncompacted) {
691 brw_compact_inst_set_control_index(dst, i);
692 return true;
693 }
694 }
695
696 return false;
697 }
698
699 static bool
700 set_datatype_index(struct brw_context *brw, brw_compact_inst *dst,
701 brw_inst *src)
702 {
703 uint32_t uncompacted = brw->gen >= 8 /* 18b/G45+; 21b/BDW+ */
704 ? (brw_inst_bits(src, 63, 61) << 18) | /* 3b */
705 (brw_inst_bits(src, 94, 89) << 12) | /* 6b */
706 (brw_inst_bits(src, 46, 35)) /* 12b */
707 : (brw_inst_bits(src, 63, 61) << 15) | /* 3b */
708 (brw_inst_bits(src, 46, 32)); /* 15b */
709
710 for (int i = 0; i < 32; i++) {
711 if (datatype_table[i] == uncompacted) {
712 brw_compact_inst_set_datatype_index(dst, i);
713 return true;
714 }
715 }
716
717 return false;
718 }
719
720 static bool
721 set_subreg_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src,
722 bool is_immediate)
723 {
724 uint16_t uncompacted = /* 15b */
725 (brw_inst_bits(src, 52, 48) << 0) | /* 5b */
726 (brw_inst_bits(src, 68, 64) << 5); /* 5b */
727
728 if (!is_immediate)
729 uncompacted |= brw_inst_bits(src, 100, 96) << 10; /* 5b */
730
731 for (int i = 0; i < 32; i++) {
732 if (subreg_table[i] == uncompacted) {
733 brw_compact_inst_set_subreg_index(dst, i);
734 return true;
735 }
736 }
737
738 return false;
739 }
740
741 static bool
742 get_src_index(uint16_t uncompacted,
743 uint16_t *compacted)
744 {
745 for (int i = 0; i < 32; i++) {
746 if (src_index_table[i] == uncompacted) {
747 *compacted = i;
748 return true;
749 }
750 }
751
752 return false;
753 }
754
755 static bool
756 set_src0_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src)
757 {
758 uint16_t compacted;
759 uint16_t uncompacted = brw_inst_bits(src, 88, 77); /* 12b */
760
761 if (!get_src_index(uncompacted, &compacted))
762 return false;
763
764 brw_compact_inst_set_src0_index(dst, compacted);
765
766 return true;
767 }
768
769 static bool
770 set_src1_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src,
771 bool is_immediate)
772 {
773 uint16_t compacted;
774
775 if (is_immediate) {
776 compacted = (brw_inst_imm_ud(brw, src) >> 8) & 0x1f;
777 } else {
778 uint16_t uncompacted = brw_inst_bits(src, 120, 109); /* 12b */
779
780 if (!get_src_index(uncompacted, &compacted))
781 return false;
782 }
783
784 brw_compact_inst_set_src1_index(dst, compacted);
785
786 return true;
787 }
788
789 static bool
790 set_3src_control_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src)
791 {
792 assert(brw->gen >= 8);
793
794 uint32_t uncompacted = /* 24b/BDW; 26b/CHV */
795 (brw_inst_bits(src, 34, 32) << 21) | /* 3b */
796 (brw_inst_bits(src, 28, 8)); /* 21b */
797
798 if (brw->is_cherryview)
799 uncompacted |= brw_inst_bits(src, 36, 35) << 24; /* 2b */
800
801 for (int i = 0; i < ARRAY_SIZE(gen8_3src_control_index_table); i++) {
802 if (gen8_3src_control_index_table[i] == uncompacted) {
803 brw_compact_inst_set_3src_control_index(dst, i);
804 return true;
805 }
806 }
807
808 return false;
809 }
810
811 static bool
812 set_3src_source_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src)
813 {
814 assert(brw->gen >= 8);
815
816 uint64_t uncompacted = /* 46b/BDW; 49b/CHV */
817 (brw_inst_bits(src, 83, 83) << 43) | /* 1b */
818 (brw_inst_bits(src, 114, 107) << 35) | /* 8b */
819 (brw_inst_bits(src, 93, 86) << 27) | /* 8b */
820 (brw_inst_bits(src, 72, 65) << 19) | /* 8b */
821 (brw_inst_bits(src, 55, 37)); /* 19b */
822
823 if (brw->is_cherryview) {
824 uncompacted |=
825 (brw_inst_bits(src, 126, 125) << 47) | /* 2b */
826 (brw_inst_bits(src, 105, 104) << 45) | /* 2b */
827 (brw_inst_bits(src, 84, 84) << 44); /* 1b */
828 } else {
829 uncompacted |=
830 (brw_inst_bits(src, 125, 125) << 45) | /* 1b */
831 (brw_inst_bits(src, 104, 104) << 44); /* 1b */
832 }
833
834 for (int i = 0; i < ARRAY_SIZE(gen8_3src_source_index_table); i++) {
835 if (gen8_3src_source_index_table[i] == uncompacted) {
836 brw_compact_inst_set_3src_source_index(dst, i);
837 return true;
838 }
839 }
840
841 return false;
842 }
843
844 static bool
845 brw_try_compact_3src_instruction(struct brw_context *brw, brw_compact_inst *dst,
846 brw_inst *src)
847 {
848 assert(brw->gen >= 8);
849
850 #define compact(field) \
851 brw_compact_inst_set_3src_##field(dst, brw_inst_3src_##field(brw, src))
852
853 compact(opcode);
854
855 if (!set_3src_control_index(brw, dst, src))
856 return false;
857
858 if (!set_3src_source_index(brw, dst, src))
859 return false;
860
861 compact(dst_reg_nr);
862 compact(src0_rep_ctrl);
863 brw_compact_inst_set_3src_cmpt_control(dst, true);
864 compact(debug_control);
865 compact(saturate);
866 compact(src1_rep_ctrl);
867 compact(src2_rep_ctrl);
868 compact(src0_reg_nr);
869 compact(src1_reg_nr);
870 compact(src2_reg_nr);
871 compact(src0_subreg_nr);
872 compact(src1_subreg_nr);
873 compact(src2_subreg_nr);
874
875 #undef compact
876
877 return true;
878 }
879
880 /* Compacted instructions have 12-bits for immediate sources, and a 13th bit
881 * that's replicated through the high 20 bits.
882 *
883 * Effectively this means we get 12-bit integers, 0.0f, and some limited uses
884 * of packed vectors as compactable immediates.
885 */
886 static bool
887 is_compactable_immediate(unsigned imm)
888 {
889 /* We get the low 12 bits as-is. */
890 imm &= ~0xfff;
891
892 /* We get one bit replicated through the top 20 bits. */
893 return imm == 0 || imm == 0xfffff000;
894 }
895
896 /* Returns whether an opcode takes three sources. */
897 static bool
898 is_3src(uint32_t op)
899 {
900 return opcode_descs[op].nsrc == 3;
901 }
902
903 /**
904 * Tries to compact instruction src into dst.
905 *
906 * It doesn't modify dst unless src is compactable, which is relied on by
907 * brw_compact_instructions().
908 */
909 bool
910 brw_try_compact_instruction(struct brw_context *brw, brw_compact_inst *dst,
911 brw_inst *src)
912 {
913 brw_compact_inst temp;
914
915 assert(brw_inst_cmpt_control(brw, src) == 0);
916
917 if (is_3src(brw_inst_opcode(brw, src))) {
918 if (brw->gen >= 8) {
919 memset(&temp, 0, sizeof(temp));
920 if (brw_try_compact_3src_instruction(brw, &temp, src)) {
921 *dst = temp;
922 return true;
923 } else {
924 return false;
925 }
926 } else {
927 return false;
928 }
929 }
930
931 bool is_immediate =
932 brw_inst_src0_reg_file(brw, src) == BRW_IMMEDIATE_VALUE ||
933 brw_inst_src1_reg_file(brw, src) == BRW_IMMEDIATE_VALUE;
934 if (is_immediate &&
935 (brw->gen < 6 || !is_compactable_immediate(brw_inst_imm_ud(brw, src)))) {
936 return false;
937 }
938
939 memset(&temp, 0, sizeof(temp));
940
941 brw_compact_inst_set_opcode(&temp, brw_inst_opcode(brw, src));
942 brw_compact_inst_set_debug_control(&temp, brw_inst_debug_control(brw, src));
943 if (!set_control_index(brw, &temp, src))
944 return false;
945 if (!set_datatype_index(brw, &temp, src))
946 return false;
947 if (!set_subreg_index(brw, &temp, src, is_immediate))
948 return false;
949 brw_compact_inst_set_acc_wr_control(&temp,
950 brw_inst_acc_wr_control(brw, src));
951 brw_compact_inst_set_cond_modifier(&temp, brw_inst_cond_modifier(brw, src));
952 if (brw->gen <= 6)
953 brw_compact_inst_set_flag_subreg_nr(&temp,
954 brw_inst_flag_subreg_nr(brw, src));
955 brw_compact_inst_set_cmpt_control(&temp, true);
956 if (!set_src0_index(brw, &temp, src))
957 return false;
958 if (!set_src1_index(brw, &temp, src, is_immediate))
959 return false;
960 brw_compact_inst_set_dst_reg_nr(&temp, brw_inst_dst_da_reg_nr(brw, src));
961 brw_compact_inst_set_src0_reg_nr(&temp, brw_inst_src0_da_reg_nr(brw, src));
962 if (is_immediate) {
963 brw_compact_inst_set_src1_reg_nr(&temp, brw_inst_imm_ud(brw, src) & 0xff);
964 } else {
965 brw_compact_inst_set_src1_reg_nr(&temp,
966 brw_inst_src1_da_reg_nr(brw, src));
967 }
968
969 *dst = temp;
970
971 return true;
972 }
973
974 static void
975 set_uncompacted_control(struct brw_context *brw, brw_inst *dst,
976 brw_compact_inst *src)
977 {
978 uint32_t uncompacted =
979 control_index_table[brw_compact_inst_control_index(src)];
980
981 if (brw->gen >= 8) {
982 brw_inst_set_bits(dst, 33, 31, (uncompacted >> 16));
983 brw_inst_set_bits(dst, 23, 12, (uncompacted >> 4) & 0xfff);
984 brw_inst_set_bits(dst, 10, 9, (uncompacted >> 2) & 0x3);
985 brw_inst_set_bits(dst, 34, 34, (uncompacted >> 1) & 0x1);
986 brw_inst_set_bits(dst, 8, 8, (uncompacted >> 0) & 0x1);
987 } else {
988 brw_inst_set_bits(dst, 31, 31, (uncompacted >> 16) & 0x1);
989 brw_inst_set_bits(dst, 23, 8, (uncompacted & 0xffff));
990
991 if (brw->gen == 7)
992 brw_inst_set_bits(dst, 90, 89, uncompacted >> 17);
993 }
994 }
995
996 static void
997 set_uncompacted_datatype(struct brw_context *brw, brw_inst *dst,
998 brw_compact_inst *src)
999 {
1000 uint32_t uncompacted = datatype_table[brw_compact_inst_datatype_index(src)];
1001
1002 if (brw->gen >= 8) {
1003 brw_inst_set_bits(dst, 63, 61, (uncompacted >> 18));
1004 brw_inst_set_bits(dst, 94, 89, (uncompacted >> 12) & 0x3f);
1005 brw_inst_set_bits(dst, 46, 35, (uncompacted >> 0) & 0xfff);
1006 } else {
1007 brw_inst_set_bits(dst, 63, 61, (uncompacted >> 15));
1008 brw_inst_set_bits(dst, 46, 32, (uncompacted & 0x7fff));
1009 }
1010 }
1011
1012 static void
1013 set_uncompacted_subreg(struct brw_context *brw, brw_inst *dst,
1014 brw_compact_inst *src)
1015 {
1016 uint16_t uncompacted = subreg_table[brw_compact_inst_subreg_index(src)];
1017
1018 brw_inst_set_bits(dst, 100, 96, (uncompacted >> 10));
1019 brw_inst_set_bits(dst, 68, 64, (uncompacted >> 5) & 0x1f);
1020 brw_inst_set_bits(dst, 52, 48, (uncompacted >> 0) & 0x1f);
1021 }
1022
1023 static void
1024 set_uncompacted_src0(struct brw_context *brw, brw_inst *dst,
1025 brw_compact_inst *src)
1026 {
1027 uint32_t compacted = brw_compact_inst_src0_index(src);
1028 uint16_t uncompacted = src_index_table[compacted];
1029
1030 brw_inst_set_bits(dst, 88, 77, uncompacted);
1031 }
1032
1033 static void
1034 set_uncompacted_src1(struct brw_context *brw, brw_inst *dst,
1035 brw_compact_inst *src, bool is_immediate)
1036 {
1037 if (is_immediate) {
1038 signed high5 = brw_compact_inst_src1_index(src);
1039 /* Replicate top bit of src1_index into high 20 bits of the immediate. */
1040 brw_inst_set_imm_ud(brw, dst, (high5 << 27) >> 19);
1041 } else {
1042 uint16_t uncompacted = src_index_table[brw_compact_inst_src1_index(src)];
1043
1044 brw_inst_set_bits(dst, 120, 109, uncompacted);
1045 }
1046 }
1047
1048 static void
1049 set_uncompacted_3src_control_index(struct brw_context *brw, brw_inst *dst,
1050 brw_compact_inst *src)
1051 {
1052 assert(brw->gen >= 8);
1053
1054 uint32_t compacted = brw_compact_inst_3src_control_index(src);
1055 uint32_t uncompacted = gen8_3src_control_index_table[compacted];
1056
1057 brw_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7);
1058 brw_inst_set_bits(dst, 28, 8, (uncompacted >> 0) & 0x1fffff);
1059
1060 if (brw->is_cherryview)
1061 brw_inst_set_bits(dst, 36, 35, (uncompacted >> 24) & 0x3);
1062 }
1063
1064 static void
1065 set_uncompacted_3src_source_index(struct brw_context *brw, brw_inst *dst,
1066 brw_compact_inst *src)
1067 {
1068 assert(brw->gen >= 8);
1069
1070 uint32_t compacted = brw_compact_inst_3src_source_index(src);
1071 uint64_t uncompacted = gen8_3src_source_index_table[compacted];
1072
1073 brw_inst_set_bits(dst, 83, 83, (uncompacted >> 43) & 0x1);
1074 brw_inst_set_bits(dst, 114, 107, (uncompacted >> 35) & 0xff);
1075 brw_inst_set_bits(dst, 93, 86, (uncompacted >> 27) & 0xff);
1076 brw_inst_set_bits(dst, 72, 65, (uncompacted >> 19) & 0xff);
1077 brw_inst_set_bits(dst, 55, 37, (uncompacted >> 0) & 0x7ffff);
1078
1079 if (brw->is_cherryview) {
1080 brw_inst_set_bits(dst, 126, 125, (uncompacted >> 47) & 0x3);
1081 brw_inst_set_bits(dst, 105, 104, (uncompacted >> 45) & 0x3);
1082 brw_inst_set_bits(dst, 84, 84, (uncompacted >> 44) & 0x1);
1083 } else {
1084 brw_inst_set_bits(dst, 125, 125, (uncompacted >> 45) & 0x1);
1085 brw_inst_set_bits(dst, 104, 104, (uncompacted >> 44) & 0x1);
1086 }
1087 }
1088
1089 static void
1090 brw_uncompact_3src_instruction(struct brw_context *brw, brw_inst *dst,
1091 brw_compact_inst *src)
1092 {
1093 assert(brw->gen >= 8);
1094
1095 #define uncompact(field) \
1096 brw_inst_set_3src_##field(brw, dst, brw_compact_inst_3src_##field(src))
1097
1098 uncompact(opcode);
1099
1100 set_uncompacted_3src_control_index(brw, dst, src);
1101 set_uncompacted_3src_source_index(brw, dst, src);
1102
1103 uncompact(dst_reg_nr);
1104 uncompact(src0_rep_ctrl);
1105 brw_inst_set_3src_cmpt_control(brw, dst, false);
1106 uncompact(debug_control);
1107 uncompact(saturate);
1108 uncompact(src1_rep_ctrl);
1109 uncompact(src2_rep_ctrl);
1110 uncompact(src0_reg_nr);
1111 uncompact(src1_reg_nr);
1112 uncompact(src2_reg_nr);
1113 uncompact(src0_subreg_nr);
1114 uncompact(src1_subreg_nr);
1115 uncompact(src2_subreg_nr);
1116
1117 #undef uncompact
1118 }
1119
1120 void
1121 brw_uncompact_instruction(struct brw_context *brw, brw_inst *dst,
1122 brw_compact_inst *src)
1123 {
1124 memset(dst, 0, sizeof(*dst));
1125
1126 if (brw->gen >= 8 && is_3src(brw_compact_inst_3src_opcode(src))) {
1127 brw_uncompact_3src_instruction(brw, dst, src);
1128 return;
1129 }
1130
1131 brw_inst_set_opcode(brw, dst, brw_compact_inst_opcode(src));
1132 brw_inst_set_debug_control(brw, dst, brw_compact_inst_debug_control(src));
1133
1134 set_uncompacted_control(brw, dst, src);
1135 set_uncompacted_datatype(brw, dst, src);
1136
1137 /* src0/1 register file fields are in the datatype table. */
1138 bool is_immediate = brw_inst_src0_reg_file(brw, dst) == BRW_IMMEDIATE_VALUE ||
1139 brw_inst_src1_reg_file(brw, dst) == BRW_IMMEDIATE_VALUE;
1140
1141 set_uncompacted_subreg(brw, dst, src);
1142 brw_inst_set_acc_wr_control(brw, dst, brw_compact_inst_acc_wr_control(src));
1143 brw_inst_set_cond_modifier(brw, dst, brw_compact_inst_cond_modifier(src));
1144 if (brw->gen <= 6)
1145 brw_inst_set_flag_subreg_nr(brw, dst,
1146 brw_compact_inst_flag_subreg_nr(src));
1147 set_uncompacted_src0(brw, dst, src);
1148 set_uncompacted_src1(brw, dst, src, is_immediate);
1149 brw_inst_set_dst_da_reg_nr(brw, dst, brw_compact_inst_dst_reg_nr(src));
1150 brw_inst_set_src0_da_reg_nr(brw, dst, brw_compact_inst_src0_reg_nr(src));
1151 if (is_immediate) {
1152 brw_inst_set_imm_ud(brw, dst,
1153 brw_inst_imm_ud(brw, dst) |
1154 brw_compact_inst_src1_reg_nr(src));
1155 } else {
1156 brw_inst_set_src1_da_reg_nr(brw, dst, brw_compact_inst_src1_reg_nr(src));
1157 }
1158 }
1159
1160 void brw_debug_compact_uncompact(struct brw_context *brw,
1161 brw_inst *orig,
1162 brw_inst *uncompacted)
1163 {
1164 fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n",
1165 brw->gen);
1166
1167 fprintf(stderr, " before: ");
1168 brw_disassemble_inst(stderr, brw, orig, true);
1169
1170 fprintf(stderr, " after: ");
1171 brw_disassemble_inst(stderr, brw, uncompacted, false);
1172
1173 uint32_t *before_bits = (uint32_t *)orig;
1174 uint32_t *after_bits = (uint32_t *)uncompacted;
1175 fprintf(stderr, " changed bits:\n");
1176 for (int i = 0; i < 128; i++) {
1177 uint32_t before = before_bits[i / 32] & (1 << (i & 31));
1178 uint32_t after = after_bits[i / 32] & (1 << (i & 31));
1179
1180 if (before != after) {
1181 fprintf(stderr, " bit %d, %s to %s\n", i,
1182 before ? "set" : "unset",
1183 after ? "set" : "unset");
1184 }
1185 }
1186 }
1187
1188 static int
1189 compacted_between(int old_ip, int old_target_ip, int *compacted_counts)
1190 {
1191 int this_compacted_count = compacted_counts[old_ip];
1192 int target_compacted_count = compacted_counts[old_target_ip];
1193 return target_compacted_count - this_compacted_count;
1194 }
1195
1196 static void
1197 update_uip_jip(struct brw_context *brw, brw_inst *insn,
1198 int this_old_ip, int *compacted_counts)
1199 {
1200 /* JIP and UIP are in units of:
1201 * - bytes on Gen8+; and
1202 * - compacted instructions on Gen6+.
1203 */
1204 int32_t jip = brw_inst_jip(brw, insn);
1205 int32_t jip_compacted = jip / (brw->gen >= 8 ? sizeof(brw_compact_inst) : 1);
1206 int32_t jip_uncompacted = jip / (brw->gen >= 8 ? sizeof(brw_inst) : 2);
1207 jip_compacted -= compacted_between(this_old_ip,
1208 this_old_ip + jip_uncompacted,
1209 compacted_counts);
1210 brw_inst_set_jip(brw, insn,
1211 jip_compacted * (brw->gen >= 8 ? sizeof(brw_compact_inst) : 1));
1212
1213 if (brw_inst_opcode(brw, insn) == BRW_OPCODE_ENDIF ||
1214 brw_inst_opcode(brw, insn) == BRW_OPCODE_WHILE ||
1215 (brw_inst_opcode(brw, insn) == BRW_OPCODE_ELSE && brw->gen <= 7))
1216 return;
1217
1218 int32_t uip = brw_inst_uip(brw, insn);
1219 int32_t uip_compacted = uip / (brw->gen >= 8 ? sizeof(brw_compact_inst) : 1);
1220 int32_t uip_uncompacted = uip / (brw->gen >= 8 ? sizeof(brw_inst) : 2);
1221 uip_compacted -= compacted_between(this_old_ip,
1222 this_old_ip + uip_uncompacted,
1223 compacted_counts);
1224 brw_inst_set_uip(brw, insn,
1225 uip_compacted * (brw->gen >= 8 ? sizeof(brw_compact_inst) : 1));
1226 }
1227
1228 static void
1229 update_gen4_jump_count(struct brw_context *brw, brw_inst *insn,
1230 int this_old_ip, int *compacted_counts)
1231 {
1232 assert(brw->gen == 5 || brw->is_g4x);
1233
1234 /* Jump Count is in units of:
1235 * - uncompacted instructions on G45; and
1236 * - compacted instructions on Gen5.
1237 */
1238 int jump_count = brw_inst_gen4_jump_count(brw, insn);
1239 int jump_count_compacted = jump_count * (brw->is_g4x ? 2 : 1);
1240 int jump_count_uncompacted = jump_count / (brw->is_g4x ? 1 : 2);
1241
1242 int target_old_ip = this_old_ip + jump_count_uncompacted;
1243
1244 int this_compacted_count = compacted_counts[this_old_ip];
1245 int target_compacted_count = compacted_counts[target_old_ip];
1246
1247 jump_count_compacted -= (target_compacted_count - this_compacted_count);
1248 brw_inst_set_gen4_jump_count(brw, insn, jump_count_compacted /
1249 (brw->is_g4x ? 2 : 1));
1250 }
1251
1252 void
1253 brw_init_compaction_tables(struct brw_context *brw)
1254 {
1255 assert(g45_control_index_table[ARRAY_SIZE(g45_control_index_table) - 1] != 0);
1256 assert(g45_datatype_table[ARRAY_SIZE(g45_datatype_table) - 1] != 0);
1257 assert(g45_subreg_table[ARRAY_SIZE(g45_subreg_table) - 1] != 0);
1258 assert(g45_src_index_table[ARRAY_SIZE(g45_src_index_table) - 1] != 0);
1259 assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0);
1260 assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0);
1261 assert(gen6_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0);
1262 assert(gen6_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0);
1263 assert(gen7_control_index_table[ARRAY_SIZE(gen7_control_index_table) - 1] != 0);
1264 assert(gen7_datatype_table[ARRAY_SIZE(gen7_datatype_table) - 1] != 0);
1265 assert(gen7_subreg_table[ARRAY_SIZE(gen7_subreg_table) - 1] != 0);
1266 assert(gen7_src_index_table[ARRAY_SIZE(gen7_src_index_table) - 1] != 0);
1267 assert(gen8_control_index_table[ARRAY_SIZE(gen8_control_index_table) - 1] != 0);
1268 assert(gen8_datatype_table[ARRAY_SIZE(gen8_datatype_table) - 1] != 0);
1269 assert(gen8_subreg_table[ARRAY_SIZE(gen8_subreg_table) - 1] != 0);
1270 assert(gen8_src_index_table[ARRAY_SIZE(gen8_src_index_table) - 1] != 0);
1271
1272 switch (brw->gen) {
1273 case 8:
1274 control_index_table = gen8_control_index_table;
1275 datatype_table = gen8_datatype_table;
1276 subreg_table = gen8_subreg_table;
1277 src_index_table = gen8_src_index_table;
1278 break;
1279 case 7:
1280 control_index_table = gen7_control_index_table;
1281 datatype_table = gen7_datatype_table;
1282 subreg_table = gen7_subreg_table;
1283 src_index_table = gen7_src_index_table;
1284 break;
1285 case 6:
1286 control_index_table = gen6_control_index_table;
1287 datatype_table = gen6_datatype_table;
1288 subreg_table = gen6_subreg_table;
1289 src_index_table = gen6_src_index_table;
1290 break;
1291 case 5:
1292 case 4:
1293 control_index_table = g45_control_index_table;
1294 datatype_table = g45_datatype_table;
1295 subreg_table = g45_subreg_table;
1296 src_index_table = g45_src_index_table;
1297 break;
1298 default:
1299 unreachable("unknown generation");
1300 }
1301 }
1302
1303 void
1304 brw_compact_instructions(struct brw_compile *p, int start_offset,
1305 int num_annotations, struct annotation *annotation)
1306 {
1307 struct brw_context *brw = p->brw;
1308 void *store = p->store + start_offset / 16;
1309 /* For an instruction at byte offset 16*i before compaction, this is the
1310 * number of compacted instructions minus the number of padding NOP/NENOPs
1311 * that preceded it.
1312 */
1313 int compacted_counts[(p->next_insn_offset - start_offset) / sizeof(brw_inst)];
1314 /* For an instruction at byte offset 8*i after compaction, this was its IP
1315 * (in 16-byte units) before compaction.
1316 */
1317 int old_ip[(p->next_insn_offset - start_offset) / sizeof(brw_compact_inst)];
1318
1319 if (brw->gen == 4 && !brw->is_g4x)
1320 return;
1321
1322 int offset = 0;
1323 int compacted_count = 0;
1324 for (int src_offset = 0; src_offset < p->next_insn_offset - start_offset;
1325 src_offset += sizeof(brw_inst)) {
1326 brw_inst *src = store + src_offset;
1327 void *dst = store + offset;
1328
1329 old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
1330 compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
1331
1332 brw_inst saved = *src;
1333
1334 if (brw_try_compact_instruction(brw, dst, src)) {
1335 compacted_count++;
1336
1337 if (INTEL_DEBUG) {
1338 brw_inst uncompacted;
1339 brw_uncompact_instruction(brw, &uncompacted, dst);
1340 if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {
1341 brw_debug_compact_uncompact(brw, &saved, &uncompacted);
1342 }
1343 }
1344
1345 offset += sizeof(brw_compact_inst);
1346 } else {
1347 /* It appears that the end of thread SEND instruction needs to be
1348 * aligned, or the GPU hangs. All uncompacted instructions need to be
1349 * aligned on G45.
1350 */
1351 if ((offset & sizeof(brw_compact_inst)) != 0 &&
1352 (((brw_inst_opcode(brw, src) == BRW_OPCODE_SEND ||
1353 brw_inst_opcode(brw, src) == BRW_OPCODE_SENDC) &&
1354 brw_inst_eot(brw, src)) ||
1355 brw->is_g4x)) {
1356 brw_compact_inst *align = store + offset;
1357 memset(align, 0, sizeof(*align));
1358 brw_compact_inst_set_opcode(align, brw->is_g4x ? BRW_OPCODE_NENOP :
1359 BRW_OPCODE_NOP);
1360 brw_compact_inst_set_cmpt_control(align, true);
1361 offset += sizeof(brw_compact_inst);
1362 compacted_count--;
1363 compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
1364 old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
1365
1366 dst = store + offset;
1367 }
1368
1369 /* If we didn't compact this intruction, we need to move it down into
1370 * place.
1371 */
1372 if (offset != src_offset) {
1373 memmove(dst, src, sizeof(brw_inst));
1374 }
1375 offset += sizeof(brw_inst);
1376 }
1377 }
1378
1379 /* Fix up control flow offsets. */
1380 p->next_insn_offset = start_offset + offset;
1381 for (offset = 0; offset < p->next_insn_offset - start_offset;
1382 offset = next_offset(brw, store, offset)) {
1383 brw_inst *insn = store + offset;
1384 int this_old_ip = old_ip[offset / sizeof(brw_compact_inst)];
1385 int this_compacted_count = compacted_counts[this_old_ip];
1386 int target_old_ip, target_compacted_count;
1387
1388 switch (brw_inst_opcode(brw, insn)) {
1389 case BRW_OPCODE_BREAK:
1390 case BRW_OPCODE_CONTINUE:
1391 case BRW_OPCODE_HALT:
1392 if (brw->gen >= 6) {
1393 update_uip_jip(brw, insn, this_old_ip, compacted_counts);
1394 } else {
1395 update_gen4_jump_count(brw, insn, this_old_ip, compacted_counts);
1396 }
1397 break;
1398
1399 case BRW_OPCODE_IF:
1400 case BRW_OPCODE_IFF:
1401 case BRW_OPCODE_ELSE:
1402 case BRW_OPCODE_ENDIF:
1403 case BRW_OPCODE_WHILE:
1404 if (brw->gen >= 7) {
1405 if (brw_inst_cmpt_control(brw, insn)) {
1406 brw_inst uncompacted;
1407 brw_uncompact_instruction(brw, &uncompacted,
1408 (brw_compact_inst *)insn);
1409
1410 update_uip_jip(brw, &uncompacted, this_old_ip, compacted_counts);
1411
1412 bool ret = brw_try_compact_instruction(brw,
1413 (brw_compact_inst *)insn,
1414 &uncompacted);
1415 assert(ret); (void)ret;
1416 } else {
1417 update_uip_jip(brw, insn, this_old_ip, compacted_counts);
1418 }
1419 } else if (brw->gen == 6) {
1420 assert(!brw_inst_cmpt_control(brw, insn));
1421
1422 /* Jump Count is in units of compacted instructions on Gen6. */
1423 int jump_count_compacted = brw_inst_gen6_jump_count(brw, insn);
1424 int jump_count_uncompacted = jump_count_compacted / 2;
1425
1426 target_old_ip = this_old_ip + jump_count_uncompacted;
1427 target_compacted_count = compacted_counts[target_old_ip];
1428 jump_count_compacted -= (target_compacted_count - this_compacted_count);
1429 brw_inst_set_gen6_jump_count(brw, insn, jump_count_compacted);
1430 } else {
1431 update_gen4_jump_count(brw, insn, this_old_ip, compacted_counts);
1432 }
1433 break;
1434
1435 case BRW_OPCODE_ADD:
1436 /* Add instructions modifying the IP register use an immediate src1,
1437 * and Gens that use this cannot compact instructions with immediate
1438 * operands.
1439 */
1440 if (brw_inst_cmpt_control(brw, insn))
1441 break;
1442
1443 if (brw_inst_dst_reg_file(brw, insn) == BRW_ARCHITECTURE_REGISTER_FILE &&
1444 brw_inst_dst_da_reg_nr(brw, insn) == BRW_ARF_IP) {
1445 assert(brw_inst_src1_reg_file(brw, insn) == BRW_IMMEDIATE_VALUE);
1446
1447 int jump = brw_inst_imm_d(brw, insn);
1448 int jump_compacted = jump / sizeof(brw_compact_inst);
1449 int jump_uncompacted = jump / sizeof(brw_inst);
1450
1451 target_old_ip = this_old_ip + jump_uncompacted;
1452 target_compacted_count = compacted_counts[target_old_ip];
1453 jump_compacted -= (target_compacted_count - this_compacted_count);
1454 brw_inst_set_imm_ud(brw, insn, jump_compacted *
1455 sizeof(brw_compact_inst));
1456 }
1457 break;
1458 }
1459 }
1460
1461 /* p->nr_insn is counting the number of uncompacted instructions still, so
1462 * divide. We do want to be sure there's a valid instruction in any
1463 * alignment padding, so that the next compression pass (for the FS 8/16
1464 * compile passes) parses correctly.
1465 */
1466 if (p->next_insn_offset & sizeof(brw_compact_inst)) {
1467 brw_compact_inst *align = store + offset;
1468 memset(align, 0, sizeof(*align));
1469 brw_compact_inst_set_opcode(align, BRW_OPCODE_NOP);
1470 brw_compact_inst_set_cmpt_control(align, true);
1471 p->next_insn_offset += sizeof(brw_compact_inst);
1472 }
1473 p->nr_insn = p->next_insn_offset / sizeof(brw_inst);
1474
1475 /* Update the instruction offsets for each annotation. */
1476 if (annotation) {
1477 for (int offset = 0, i = 0; i < num_annotations; i++) {
1478 while (start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
1479 sizeof(brw_inst) != annotation[i].offset) {
1480 assert(start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
1481 sizeof(brw_inst) < annotation[i].offset);
1482 offset = next_offset(brw, store, offset);
1483 }
1484
1485 annotation[i].offset = start_offset + offset;
1486
1487 offset = next_offset(brw, store, offset);
1488 }
1489
1490 annotation[num_annotations].offset = p->next_insn_offset;
1491 }
1492 }