i965/compaction: Add support for Gen5.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_compact.c
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_eu_compact.c
25 *
26 * Instruction compaction is a feature of gm45 and newer hardware that allows
27 * for a smaller instruction encoding.
28 *
29 * The instruction cache is on the order of 32KB, and many programs generate
30 * far more instructions than that. The instruction cache is built to barely
31 * keep up with instruction dispatch abaility in cache hit cases -- L1
32 * instruction cache misses that still hit in the next level could limit
33 * throughput by around 50%.
34 *
35 * The idea of instruction compaction is that most instructions use a tiny
36 * subset of the GPU functionality, so we can encode what would be a 16 byte
37 * instruction in 8 bytes using some lookup tables for various fields.
38 */
39
40 #include "brw_context.h"
41 #include "brw_eu.h"
42 #include "intel_asm_annotation.h"
43
44 static const uint32_t g45_control_index_table[32] = {
45 0b00000000000000000,
46 0b01000000000000000,
47 0b00110000000000000,
48 0b00000000000000010,
49 0b00100000000000000,
50 0b00010000000000000,
51 0b01000000000100000,
52 0b01000000100000000,
53 0b01010000000100000,
54 0b00000000100000010,
55 0b11000000000000000,
56 0b00001000100000010,
57 0b01001000100000000,
58 0b00000000100000000,
59 0b11000000000100000,
60 0b00001000100000000,
61 0b10110000000000000,
62 0b11010000000100000,
63 0b00110000100000000,
64 0b00100000100000000,
65 0b01000000000001000,
66 0b01000000000000100,
67 0b00111100000000000,
68 0b00101011000000000,
69 0b00110000000010000,
70 0b00010000100000000,
71 0b01000000000100100,
72 0b01000000000101000,
73 0b00110000000000110,
74 0b00000000000001010,
75 0b01010000000101000,
76 0b01010000000100100
77 };
78
79 static const uint32_t g45_datatype_table[32] = {
80 0b001000000000100001,
81 0b001011010110101101,
82 0b001000001000110001,
83 0b001111011110111101,
84 0b001011010110101100,
85 0b001000000110101101,
86 0b001000000000100000,
87 0b010100010110110001,
88 0b001100011000101101,
89 0b001000000000100010,
90 0b001000001000110110,
91 0b010000001000110001,
92 0b001000001000110010,
93 0b011000001000110010,
94 0b001111011110111100,
95 0b001000000100101000,
96 0b010100011000110001,
97 0b001010010100101001,
98 0b001000001000101001,
99 0b010000001000110110,
100 0b101000001000110001,
101 0b001011011000101101,
102 0b001000000100001001,
103 0b001011011000101100,
104 0b110100011000110001,
105 0b001000001110111101,
106 0b110000001000110001,
107 0b011000000100101010,
108 0b101000001000101001,
109 0b001011010110001100,
110 0b001000000110100001,
111 0b001010010100001000
112 };
113
114 static const uint16_t g45_subreg_table[32] = {
115 0b000000000000000,
116 0b000000010000000,
117 0b000001000000000,
118 0b000100000000000,
119 0b000000000100000,
120 0b100000000000000,
121 0b000000000010000,
122 0b001100000000000,
123 0b001010000000000,
124 0b000000100000000,
125 0b001000000000000,
126 0b000000000001000,
127 0b000000001000000,
128 0b000000000000001,
129 0b000010000000000,
130 0b000000010100000,
131 0b000000000000111,
132 0b000001000100000,
133 0b011000000000000,
134 0b000000110000000,
135 0b000000000000010,
136 0b000000000000100,
137 0b000000001100000,
138 0b000100000000010,
139 0b001110011000110,
140 0b001110100001000,
141 0b000110011000110,
142 0b000001000011000,
143 0b000110010000100,
144 0b001100000000110,
145 0b000000010000110,
146 0b000001000110000
147 };
148
149 static const uint16_t g45_src_index_table[32] = {
150 0b000000000000,
151 0b010001101000,
152 0b010110001000,
153 0b011010010000,
154 0b001101001000,
155 0b010110001010,
156 0b010101110000,
157 0b011001111000,
158 0b001000101000,
159 0b000000101000,
160 0b010001010000,
161 0b111101101100,
162 0b010110001100,
163 0b010001101100,
164 0b011010010100,
165 0b010001001100,
166 0b001100101000,
167 0b000000000010,
168 0b111101001100,
169 0b011001101000,
170 0b010101001000,
171 0b000000000100,
172 0b000000101100,
173 0b010001101010,
174 0b000000111000,
175 0b010101011000,
176 0b000100100000,
177 0b010110000000,
178 0b010000000100,
179 0b010000111000,
180 0b000101100000,
181 0b111101110100
182 };
183
184 static const uint32_t gen6_control_index_table[32] = {
185 0b00000000000000000,
186 0b01000000000000000,
187 0b00110000000000000,
188 0b00000000100000000,
189 0b00010000000000000,
190 0b00001000100000000,
191 0b00000000100000010,
192 0b00000000000000010,
193 0b01000000100000000,
194 0b01010000000000000,
195 0b10110000000000000,
196 0b00100000000000000,
197 0b11010000000000000,
198 0b11000000000000000,
199 0b01001000100000000,
200 0b01000000000001000,
201 0b01000000000000100,
202 0b00000000000001000,
203 0b00000000000000100,
204 0b00111000100000000,
205 0b00001000100000010,
206 0b00110000100000000,
207 0b00110000000000001,
208 0b00100000000000001,
209 0b00110000000000010,
210 0b00110000000000101,
211 0b00110000000001001,
212 0b00110000000010000,
213 0b00110000000000011,
214 0b00110000000000100,
215 0b00110000100001000,
216 0b00100000000001001
217 };
218
219 static const uint32_t gen6_datatype_table[32] = {
220 0b001001110000000000,
221 0b001000110000100000,
222 0b001001110000000001,
223 0b001000000001100000,
224 0b001010110100101001,
225 0b001000000110101101,
226 0b001100011000101100,
227 0b001011110110101101,
228 0b001000000111101100,
229 0b001000000001100001,
230 0b001000110010100101,
231 0b001000000001000001,
232 0b001000001000110001,
233 0b001000001000101001,
234 0b001000000000100000,
235 0b001000001000110010,
236 0b001010010100101001,
237 0b001011010010100101,
238 0b001000000110100101,
239 0b001100011000101001,
240 0b001011011000101100,
241 0b001011010110100101,
242 0b001011110110100101,
243 0b001111011110111101,
244 0b001111011110111100,
245 0b001111011110111101,
246 0b001111011110011101,
247 0b001111011110111110,
248 0b001000000000100001,
249 0b001000000000100010,
250 0b001001111111011101,
251 0b001000001110111110,
252 };
253
254 static const uint16_t gen6_subreg_table[32] = {
255 0b000000000000000,
256 0b000000000000100,
257 0b000000110000000,
258 0b111000000000000,
259 0b011110000001000,
260 0b000010000000000,
261 0b000000000010000,
262 0b000110000001100,
263 0b001000000000000,
264 0b000001000000000,
265 0b000001010010100,
266 0b000000001010110,
267 0b010000000000000,
268 0b110000000000000,
269 0b000100000000000,
270 0b000000010000000,
271 0b000000000001000,
272 0b100000000000000,
273 0b000001010000000,
274 0b001010000000000,
275 0b001100000000000,
276 0b000000001010100,
277 0b101101010010100,
278 0b010100000000000,
279 0b000000010001111,
280 0b011000000000000,
281 0b111110000000000,
282 0b101000000000000,
283 0b000000000001111,
284 0b000100010001111,
285 0b001000010001111,
286 0b000110000000000,
287 };
288
289 static const uint16_t gen6_src_index_table[32] = {
290 0b000000000000,
291 0b010110001000,
292 0b010001101000,
293 0b001000101000,
294 0b011010010000,
295 0b000100100000,
296 0b010001101100,
297 0b010101110000,
298 0b011001111000,
299 0b001100101000,
300 0b010110001100,
301 0b001000100000,
302 0b010110001010,
303 0b000000000010,
304 0b010101010000,
305 0b010101101000,
306 0b111101001100,
307 0b111100101100,
308 0b011001110000,
309 0b010110001001,
310 0b010101011000,
311 0b001101001000,
312 0b010000101100,
313 0b010000000000,
314 0b001101110000,
315 0b001100010000,
316 0b001100000000,
317 0b010001101010,
318 0b001101111000,
319 0b000001110000,
320 0b001100100000,
321 0b001101010000,
322 };
323
324 static const uint32_t gen7_control_index_table[32] = {
325 0b0000000000000000010,
326 0b0000100000000000000,
327 0b0000100000000000001,
328 0b0000100000000000010,
329 0b0000100000000000011,
330 0b0000100000000000100,
331 0b0000100000000000101,
332 0b0000100000000000111,
333 0b0000100000000001000,
334 0b0000100000000001001,
335 0b0000100000000001101,
336 0b0000110000000000000,
337 0b0000110000000000001,
338 0b0000110000000000010,
339 0b0000110000000000011,
340 0b0000110000000000100,
341 0b0000110000000000101,
342 0b0000110000000000111,
343 0b0000110000000001001,
344 0b0000110000000001101,
345 0b0000110000000010000,
346 0b0000110000100000000,
347 0b0001000000000000000,
348 0b0001000000000000010,
349 0b0001000000000000100,
350 0b0001000000100000000,
351 0b0010110000000000000,
352 0b0010110000000010000,
353 0b0011000000000000000,
354 0b0011000000100000000,
355 0b0101000000000000000,
356 0b0101000000100000000
357 };
358
359 static const uint32_t gen7_datatype_table[32] = {
360 0b001000000000000001,
361 0b001000000000100000,
362 0b001000000000100001,
363 0b001000000001100001,
364 0b001000000010111101,
365 0b001000001011111101,
366 0b001000001110100001,
367 0b001000001110100101,
368 0b001000001110111101,
369 0b001000010000100001,
370 0b001000110000100000,
371 0b001000110000100001,
372 0b001001010010100101,
373 0b001001110010100100,
374 0b001001110010100101,
375 0b001111001110111101,
376 0b001111011110011101,
377 0b001111011110111100,
378 0b001111011110111101,
379 0b001111111110111100,
380 0b000000001000001100,
381 0b001000000000111101,
382 0b001000000010100101,
383 0b001000010000100000,
384 0b001001010010100100,
385 0b001001110010000100,
386 0b001010010100001001,
387 0b001101111110111101,
388 0b001111111110111101,
389 0b001011110110101100,
390 0b001010010100101000,
391 0b001010110100101000
392 };
393
394 static const uint16_t gen7_subreg_table[32] = {
395 0b000000000000000,
396 0b000000000000001,
397 0b000000000001000,
398 0b000000000001111,
399 0b000000000010000,
400 0b000000010000000,
401 0b000000100000000,
402 0b000000110000000,
403 0b000001000000000,
404 0b000001000010000,
405 0b000010100000000,
406 0b001000000000000,
407 0b001000000000001,
408 0b001000010000001,
409 0b001000010000010,
410 0b001000010000011,
411 0b001000010000100,
412 0b001000010000111,
413 0b001000010001000,
414 0b001000010001110,
415 0b001000010001111,
416 0b001000110000000,
417 0b001000111101000,
418 0b010000000000000,
419 0b010000110000000,
420 0b011000000000000,
421 0b011110010000111,
422 0b100000000000000,
423 0b101000000000000,
424 0b110000000000000,
425 0b111000000000000,
426 0b111000000011100
427 };
428
429 static const uint16_t gen7_src_index_table[32] = {
430 0b000000000000,
431 0b000000000010,
432 0b000000010000,
433 0b000000010010,
434 0b000000011000,
435 0b000000100000,
436 0b000000101000,
437 0b000001001000,
438 0b000001010000,
439 0b000001110000,
440 0b000001111000,
441 0b001100000000,
442 0b001100000010,
443 0b001100001000,
444 0b001100010000,
445 0b001100010010,
446 0b001100100000,
447 0b001100101000,
448 0b001100111000,
449 0b001101000000,
450 0b001101000010,
451 0b001101001000,
452 0b001101010000,
453 0b001101100000,
454 0b001101101000,
455 0b001101110000,
456 0b001101110001,
457 0b001101111000,
458 0b010001101000,
459 0b010001101001,
460 0b010001101010,
461 0b010110001000
462 };
463
464 static const uint32_t gen8_control_index_table[32] = {
465 0b0000000000000000010,
466 0b0000100000000000000,
467 0b0000100000000000001,
468 0b0000100000000000010,
469 0b0000100000000000011,
470 0b0000100000000000100,
471 0b0000100000000000101,
472 0b0000100000000000111,
473 0b0000100000000001000,
474 0b0000100000000001001,
475 0b0000100000000001101,
476 0b0000110000000000000,
477 0b0000110000000000001,
478 0b0000110000000000010,
479 0b0000110000000000011,
480 0b0000110000000000100,
481 0b0000110000000000101,
482 0b0000110000000000111,
483 0b0000110000000001001,
484 0b0000110000000001101,
485 0b0000110000000010000,
486 0b0000110000100000000,
487 0b0001000000000000000,
488 0b0001000000000000010,
489 0b0001000000000000100,
490 0b0001000000100000000,
491 0b0010110000000000000,
492 0b0010110000000010000,
493 0b0011000000000000000,
494 0b0011000000100000000,
495 0b0101000000000000000,
496 0b0101000000100000000
497 };
498
499 static const uint32_t gen8_datatype_table[32] = {
500 0b001000000000000000001,
501 0b001000000000001000000,
502 0b001000000000001000001,
503 0b001000000000011000001,
504 0b001000000000101011101,
505 0b001000000010111011101,
506 0b001000000011101000001,
507 0b001000000011101000101,
508 0b001000000011101011101,
509 0b001000001000001000001,
510 0b001000011000001000000,
511 0b001000011000001000001,
512 0b001000101000101000101,
513 0b001000111000101000100,
514 0b001000111000101000101,
515 0b001011100011101011101,
516 0b001011101011100011101,
517 0b001011101011101011100,
518 0b001011101011101011101,
519 0b001011111011101011100,
520 0b000000000010000001100,
521 0b001000000000001011101,
522 0b001000000000101000101,
523 0b001000001000001000000,
524 0b001000101000101000100,
525 0b001000111000100000100,
526 0b001001001001000001001,
527 0b001010111011101011101,
528 0b001011111011101011101,
529 0b001001111001101001100,
530 0b001001001001001001000,
531 0b001001011001001001000
532 };
533
534 static const uint16_t gen8_subreg_table[32] = {
535 0b000000000000000,
536 0b000000000000001,
537 0b000000000001000,
538 0b000000000001111,
539 0b000000000010000,
540 0b000000010000000,
541 0b000000100000000,
542 0b000000110000000,
543 0b000001000000000,
544 0b000001000010000,
545 0b000001010000000,
546 0b001000000000000,
547 0b001000000000001,
548 0b001000010000001,
549 0b001000010000010,
550 0b001000010000011,
551 0b001000010000100,
552 0b001000010000111,
553 0b001000010001000,
554 0b001000010001110,
555 0b001000010001111,
556 0b001000110000000,
557 0b001000111101000,
558 0b010000000000000,
559 0b010000110000000,
560 0b011000000000000,
561 0b011110010000111,
562 0b100000000000000,
563 0b101000000000000,
564 0b110000000000000,
565 0b111000000000000,
566 0b111000000011100
567 };
568
569 static const uint16_t gen8_src_index_table[32] = {
570 0b000000000000,
571 0b000000000010,
572 0b000000010000,
573 0b000000010010,
574 0b000000011000,
575 0b000000100000,
576 0b000000101000,
577 0b000001001000,
578 0b000001010000,
579 0b000001110000,
580 0b000001111000,
581 0b001100000000,
582 0b001100000010,
583 0b001100001000,
584 0b001100010000,
585 0b001100010010,
586 0b001100100000,
587 0b001100101000,
588 0b001100111000,
589 0b001101000000,
590 0b001101000010,
591 0b001101001000,
592 0b001101010000,
593 0b001101100000,
594 0b001101101000,
595 0b001101110000,
596 0b001101110001,
597 0b001101111000,
598 0b010001101000,
599 0b010001101001,
600 0b010001101010,
601 0b010110001000
602 };
603
604 /* This is actually the control index table for Cherryview (26 bits), but the
605 * only difference from Broadwell (24 bits) is that it has two extra 0-bits at
606 * the start.
607 *
608 * The low 24 bits have the same mappings on both hardware.
609 */
610 static const uint32_t gen8_3src_control_index_table[4] = {
611 0b00100000000110000000000001,
612 0b00000000000110000000000001,
613 0b00000000001000000000000001,
614 0b00000000001000000000100001
615 };
616
617 /* This is actually the control index table for Cherryview (49 bits), but the
618 * only difference from Broadwell (46 bits) is that it has three extra 0-bits
619 * at the start.
620 *
621 * The low 44 bits have the same mappings on both hardware, and since the high
622 * three bits on Broadwell are zero, we can reuse Cherryview's table.
623 */
624 static const uint64_t gen8_3src_source_index_table[4] = {
625 0b0000001110010011100100111001000001111000000000000,
626 0b0000001110010011100100111001000001111000000000010,
627 0b0000001110010011100100111001000001111000000001000,
628 0b0000001110010011100100111001000001111000000100000
629 };
630
631 static const uint32_t *control_index_table;
632 static const uint32_t *datatype_table;
633 static const uint16_t *subreg_table;
634 static const uint16_t *src_index_table;
635
636 static bool
637 set_control_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src)
638 {
639 uint32_t uncompacted = brw->gen >= 8 /* 17b/G45; 19b/IVB+ */
640 ? (brw_inst_bits(src, 33, 31) << 16) | /* 3b */
641 (brw_inst_bits(src, 23, 12) << 4) | /* 12b */
642 (brw_inst_bits(src, 10, 9) << 2) | /* 2b */
643 (brw_inst_bits(src, 34, 34) << 1) | /* 1b */
644 (brw_inst_bits(src, 8, 8)) /* 1b */
645 : (brw_inst_bits(src, 31, 31) << 16) | /* 1b */
646 (brw_inst_bits(src, 23, 8)); /* 16b */
647
648 /* On gen7, the flag register and subregister numbers are integrated into
649 * the control index.
650 */
651 if (brw->gen == 7)
652 uncompacted |= brw_inst_bits(src, 90, 89) << 17; /* 2b */
653
654 for (int i = 0; i < 32; i++) {
655 if (control_index_table[i] == uncompacted) {
656 brw_compact_inst_set_control_index(dst, i);
657 return true;
658 }
659 }
660
661 return false;
662 }
663
664 static bool
665 set_datatype_index(struct brw_context *brw, brw_compact_inst *dst,
666 brw_inst *src)
667 {
668 uint32_t uncompacted = brw->gen >= 8 /* 18b/G45+; 21b/BDW+ */
669 ? (brw_inst_bits(src, 63, 61) << 18) | /* 3b */
670 (brw_inst_bits(src, 94, 89) << 12) | /* 6b */
671 (brw_inst_bits(src, 46, 35)) /* 12b */
672 : (brw_inst_bits(src, 63, 61) << 15) | /* 3b */
673 (brw_inst_bits(src, 46, 32)); /* 15b */
674
675 for (int i = 0; i < 32; i++) {
676 if (datatype_table[i] == uncompacted) {
677 brw_compact_inst_set_datatype_index(dst, i);
678 return true;
679 }
680 }
681
682 return false;
683 }
684
685 static bool
686 set_subreg_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src,
687 bool is_immediate)
688 {
689 uint16_t uncompacted = /* 15b */
690 (brw_inst_bits(src, 52, 48) << 0) | /* 5b */
691 (brw_inst_bits(src, 68, 64) << 5); /* 5b */
692
693 if (!is_immediate)
694 uncompacted |= brw_inst_bits(src, 100, 96) << 10; /* 5b */
695
696 for (int i = 0; i < 32; i++) {
697 if (subreg_table[i] == uncompacted) {
698 brw_compact_inst_set_subreg_index(dst, i);
699 return true;
700 }
701 }
702
703 return false;
704 }
705
706 static bool
707 get_src_index(uint16_t uncompacted,
708 uint16_t *compacted)
709 {
710 for (int i = 0; i < 32; i++) {
711 if (src_index_table[i] == uncompacted) {
712 *compacted = i;
713 return true;
714 }
715 }
716
717 return false;
718 }
719
720 static bool
721 set_src0_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src)
722 {
723 uint16_t compacted;
724 uint16_t uncompacted = brw_inst_bits(src, 88, 77); /* 12b */
725
726 if (!get_src_index(uncompacted, &compacted))
727 return false;
728
729 brw_compact_inst_set_src0_index(dst, compacted);
730
731 return true;
732 }
733
734 static bool
735 set_src1_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src,
736 bool is_immediate)
737 {
738 uint16_t compacted;
739
740 if (is_immediate) {
741 compacted = (brw_inst_imm_ud(brw, src) >> 8) & 0x1f;
742 } else {
743 uint16_t uncompacted = brw_inst_bits(src, 120, 109); /* 12b */
744
745 if (!get_src_index(uncompacted, &compacted))
746 return false;
747 }
748
749 brw_compact_inst_set_src1_index(dst, compacted);
750
751 return true;
752 }
753
754 static bool
755 set_3src_control_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src)
756 {
757 assert(brw->gen >= 8);
758
759 uint32_t uncompacted = /* 24b/BDW; 26b/CHV */
760 (brw_inst_bits(src, 34, 32) << 21) | /* 3b */
761 (brw_inst_bits(src, 28, 8)); /* 21b */
762
763 if (brw->is_cherryview)
764 uncompacted |= brw_inst_bits(src, 36, 35) << 24; /* 2b */
765
766 for (int i = 0; i < ARRAY_SIZE(gen8_3src_control_index_table); i++) {
767 if (gen8_3src_control_index_table[i] == uncompacted) {
768 brw_compact_inst_set_3src_control_index(dst, i);
769 return true;
770 }
771 }
772
773 return false;
774 }
775
776 static bool
777 set_3src_source_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src)
778 {
779 assert(brw->gen >= 8);
780
781 uint64_t uncompacted = /* 46b/BDW; 49b/CHV */
782 (brw_inst_bits(src, 83, 83) << 43) | /* 1b */
783 (brw_inst_bits(src, 114, 107) << 35) | /* 8b */
784 (brw_inst_bits(src, 93, 86) << 27) | /* 8b */
785 (brw_inst_bits(src, 72, 65) << 19) | /* 8b */
786 (brw_inst_bits(src, 55, 37)); /* 19b */
787
788 if (brw->is_cherryview) {
789 uncompacted |=
790 (brw_inst_bits(src, 126, 125) << 47) | /* 2b */
791 (brw_inst_bits(src, 105, 104) << 45) | /* 2b */
792 (brw_inst_bits(src, 84, 84) << 44); /* 1b */
793 } else {
794 uncompacted |=
795 (brw_inst_bits(src, 125, 125) << 45) | /* 1b */
796 (brw_inst_bits(src, 104, 104) << 44); /* 1b */
797 }
798
799 for (int i = 0; i < ARRAY_SIZE(gen8_3src_source_index_table); i++) {
800 if (gen8_3src_source_index_table[i] == uncompacted) {
801 brw_compact_inst_set_3src_source_index(dst, i);
802 return true;
803 }
804 }
805
806 return false;
807 }
808
809 static bool
810 brw_try_compact_3src_instruction(struct brw_context *brw, brw_compact_inst *dst,
811 brw_inst *src)
812 {
813 assert(brw->gen >= 8);
814
815 #define compact(field) \
816 brw_compact_inst_set_3src_##field(dst, brw_inst_3src_##field(brw, src))
817
818 compact(opcode);
819
820 if (!set_3src_control_index(brw, dst, src))
821 return false;
822
823 if (!set_3src_source_index(brw, dst, src))
824 return false;
825
826 compact(dst_reg_nr);
827 compact(src0_rep_ctrl);
828 brw_compact_inst_set_3src_cmpt_control(dst, true);
829 compact(debug_control);
830 compact(saturate);
831 compact(src1_rep_ctrl);
832 compact(src2_rep_ctrl);
833 compact(src0_reg_nr);
834 compact(src1_reg_nr);
835 compact(src2_reg_nr);
836 compact(src0_subreg_nr);
837 compact(src1_subreg_nr);
838 compact(src2_subreg_nr);
839
840 #undef compact
841
842 return true;
843 }
844
845 /* Compacted instructions have 12-bits for immediate sources, and a 13th bit
846 * that's replicated through the high 20 bits.
847 *
848 * Effectively this means we get 12-bit integers, 0.0f, and some limited uses
849 * of packed vectors as compactable immediates.
850 */
851 static bool
852 is_compactable_immediate(unsigned imm)
853 {
854 /* We get the low 12 bits as-is. */
855 imm &= ~0xfff;
856
857 /* We get one bit replicated through the top 20 bits. */
858 return imm == 0 || imm == 0xfffff000;
859 }
860
861 /* Returns whether an opcode takes three sources. */
862 static bool
863 is_3src(uint32_t op)
864 {
865 return opcode_descs[op].nsrc == 3;
866 }
867
868 /**
869 * Tries to compact instruction src into dst.
870 *
871 * It doesn't modify dst unless src is compactable, which is relied on by
872 * brw_compact_instructions().
873 */
874 bool
875 brw_try_compact_instruction(struct brw_context *brw, brw_compact_inst *dst,
876 brw_inst *src)
877 {
878 brw_compact_inst temp;
879
880 assert(brw_inst_cmpt_control(brw, src) == 0);
881
882 if (brw_inst_opcode(brw, src) == BRW_OPCODE_IF ||
883 brw_inst_opcode(brw, src) == BRW_OPCODE_IFF ||
884 brw_inst_opcode(brw, src) == BRW_OPCODE_ELSE ||
885 brw_inst_opcode(brw, src) == BRW_OPCODE_ENDIF ||
886 brw_inst_opcode(brw, src) == BRW_OPCODE_HALT ||
887 brw_inst_opcode(brw, src) == BRW_OPCODE_DO ||
888 brw_inst_opcode(brw, src) == BRW_OPCODE_WHILE) {
889 /* FINISHME: The fixup code below, and brw_set_uip_jip and friends, needs
890 * to be able to handle compacted flow control instructions..
891 */
892 return false;
893 }
894
895 if (is_3src(brw_inst_opcode(brw, src))) {
896 if (brw->gen >= 8) {
897 memset(&temp, 0, sizeof(temp));
898 if (brw_try_compact_3src_instruction(brw, &temp, src)) {
899 *dst = temp;
900 return true;
901 } else {
902 return false;
903 }
904 } else {
905 return false;
906 }
907 }
908
909 bool is_immediate =
910 brw_inst_src0_reg_file(brw, src) == BRW_IMMEDIATE_VALUE ||
911 brw_inst_src1_reg_file(brw, src) == BRW_IMMEDIATE_VALUE;
912 if (is_immediate &&
913 (brw->gen < 6 || !is_compactable_immediate(brw_inst_imm_ud(brw, src)))) {
914 return false;
915 }
916
917 memset(&temp, 0, sizeof(temp));
918
919 brw_compact_inst_set_opcode(&temp, brw_inst_opcode(brw, src));
920 brw_compact_inst_set_debug_control(&temp, brw_inst_debug_control(brw, src));
921 if (!set_control_index(brw, &temp, src))
922 return false;
923 if (!set_datatype_index(brw, &temp, src))
924 return false;
925 if (!set_subreg_index(brw, &temp, src, is_immediate))
926 return false;
927 brw_compact_inst_set_acc_wr_control(&temp,
928 brw_inst_acc_wr_control(brw, src));
929 brw_compact_inst_set_cond_modifier(&temp, brw_inst_cond_modifier(brw, src));
930 if (brw->gen <= 6)
931 brw_compact_inst_set_flag_subreg_nr(&temp,
932 brw_inst_flag_subreg_nr(brw, src));
933 brw_compact_inst_set_cmpt_control(&temp, true);
934 if (!set_src0_index(brw, &temp, src))
935 return false;
936 if (!set_src1_index(brw, &temp, src, is_immediate))
937 return false;
938 brw_compact_inst_set_dst_reg_nr(&temp, brw_inst_dst_da_reg_nr(brw, src));
939 brw_compact_inst_set_src0_reg_nr(&temp, brw_inst_src0_da_reg_nr(brw, src));
940 if (is_immediate) {
941 brw_compact_inst_set_src1_reg_nr(&temp, brw_inst_imm_ud(brw, src) & 0xff);
942 } else {
943 brw_compact_inst_set_src1_reg_nr(&temp,
944 brw_inst_src1_da_reg_nr(brw, src));
945 }
946
947 *dst = temp;
948
949 return true;
950 }
951
952 static void
953 set_uncompacted_control(struct brw_context *brw, brw_inst *dst,
954 brw_compact_inst *src)
955 {
956 uint32_t uncompacted =
957 control_index_table[brw_compact_inst_control_index(src)];
958
959 if (brw->gen >= 8) {
960 brw_inst_set_bits(dst, 33, 31, (uncompacted >> 16));
961 brw_inst_set_bits(dst, 23, 12, (uncompacted >> 4) & 0xfff);
962 brw_inst_set_bits(dst, 10, 9, (uncompacted >> 2) & 0x3);
963 brw_inst_set_bits(dst, 34, 34, (uncompacted >> 1) & 0x1);
964 brw_inst_set_bits(dst, 8, 8, (uncompacted >> 0) & 0x1);
965 } else {
966 brw_inst_set_bits(dst, 31, 31, (uncompacted >> 16) & 0x1);
967 brw_inst_set_bits(dst, 23, 8, (uncompacted & 0xffff));
968
969 if (brw->gen == 7)
970 brw_inst_set_bits(dst, 90, 89, uncompacted >> 17);
971 }
972 }
973
974 static void
975 set_uncompacted_datatype(struct brw_context *brw, brw_inst *dst,
976 brw_compact_inst *src)
977 {
978 uint32_t uncompacted = datatype_table[brw_compact_inst_datatype_index(src)];
979
980 if (brw->gen >= 8) {
981 brw_inst_set_bits(dst, 63, 61, (uncompacted >> 18));
982 brw_inst_set_bits(dst, 94, 89, (uncompacted >> 12) & 0x3f);
983 brw_inst_set_bits(dst, 46, 35, (uncompacted >> 0) & 0xfff);
984 } else {
985 brw_inst_set_bits(dst, 63, 61, (uncompacted >> 15));
986 brw_inst_set_bits(dst, 46, 32, (uncompacted & 0x7fff));
987 }
988 }
989
990 static void
991 set_uncompacted_subreg(struct brw_context *brw, brw_inst *dst,
992 brw_compact_inst *src)
993 {
994 uint16_t uncompacted = subreg_table[brw_compact_inst_subreg_index(src)];
995
996 brw_inst_set_bits(dst, 100, 96, (uncompacted >> 10));
997 brw_inst_set_bits(dst, 68, 64, (uncompacted >> 5) & 0x1f);
998 brw_inst_set_bits(dst, 52, 48, (uncompacted >> 0) & 0x1f);
999 }
1000
1001 static void
1002 set_uncompacted_src0(struct brw_context *brw, brw_inst *dst,
1003 brw_compact_inst *src)
1004 {
1005 uint32_t compacted = brw_compact_inst_src0_index(src);
1006 uint16_t uncompacted = src_index_table[compacted];
1007
1008 brw_inst_set_bits(dst, 88, 77, uncompacted);
1009 }
1010
1011 static void
1012 set_uncompacted_src1(struct brw_context *brw, brw_inst *dst,
1013 brw_compact_inst *src, bool is_immediate)
1014 {
1015 if (is_immediate) {
1016 signed high5 = brw_compact_inst_src1_index(src);
1017 /* Replicate top bit of src1_index into high 20 bits of the immediate. */
1018 brw_inst_set_imm_ud(brw, dst, (high5 << 27) >> 19);
1019 } else {
1020 uint16_t uncompacted = src_index_table[brw_compact_inst_src1_index(src)];
1021
1022 brw_inst_set_bits(dst, 120, 109, uncompacted);
1023 }
1024 }
1025
1026 static void
1027 set_uncompacted_3src_control_index(struct brw_context *brw, brw_inst *dst,
1028 brw_compact_inst *src)
1029 {
1030 assert(brw->gen >= 8);
1031
1032 uint32_t compacted = brw_compact_inst_3src_control_index(src);
1033 uint32_t uncompacted = gen8_3src_control_index_table[compacted];
1034
1035 brw_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7);
1036 brw_inst_set_bits(dst, 28, 8, (uncompacted >> 0) & 0x1fffff);
1037
1038 if (brw->is_cherryview)
1039 brw_inst_set_bits(dst, 36, 35, (uncompacted >> 24) & 0x3);
1040 }
1041
1042 static void
1043 set_uncompacted_3src_source_index(struct brw_context *brw, brw_inst *dst,
1044 brw_compact_inst *src)
1045 {
1046 assert(brw->gen >= 8);
1047
1048 uint32_t compacted = brw_compact_inst_3src_source_index(src);
1049 uint64_t uncompacted = gen8_3src_source_index_table[compacted];
1050
1051 brw_inst_set_bits(dst, 83, 83, (uncompacted >> 43) & 0x1);
1052 brw_inst_set_bits(dst, 114, 107, (uncompacted >> 35) & 0xff);
1053 brw_inst_set_bits(dst, 93, 86, (uncompacted >> 27) & 0xff);
1054 brw_inst_set_bits(dst, 72, 65, (uncompacted >> 19) & 0xff);
1055 brw_inst_set_bits(dst, 55, 37, (uncompacted >> 0) & 0x7ffff);
1056
1057 if (brw->is_cherryview) {
1058 brw_inst_set_bits(dst, 126, 125, (uncompacted >> 47) & 0x3);
1059 brw_inst_set_bits(dst, 105, 104, (uncompacted >> 45) & 0x3);
1060 brw_inst_set_bits(dst, 84, 84, (uncompacted >> 44) & 0x1);
1061 } else {
1062 brw_inst_set_bits(dst, 125, 125, (uncompacted >> 45) & 0x1);
1063 brw_inst_set_bits(dst, 104, 104, (uncompacted >> 44) & 0x1);
1064 }
1065 }
1066
1067 static void
1068 brw_uncompact_3src_instruction(struct brw_context *brw, brw_inst *dst,
1069 brw_compact_inst *src)
1070 {
1071 assert(brw->gen >= 8);
1072
1073 #define uncompact(field) \
1074 brw_inst_set_3src_##field(brw, dst, brw_compact_inst_3src_##field(src))
1075
1076 uncompact(opcode);
1077
1078 set_uncompacted_3src_control_index(brw, dst, src);
1079 set_uncompacted_3src_source_index(brw, dst, src);
1080
1081 uncompact(dst_reg_nr);
1082 uncompact(src0_rep_ctrl);
1083 brw_inst_set_3src_cmpt_control(brw, dst, false);
1084 uncompact(debug_control);
1085 uncompact(saturate);
1086 uncompact(src1_rep_ctrl);
1087 uncompact(src2_rep_ctrl);
1088 uncompact(src0_reg_nr);
1089 uncompact(src1_reg_nr);
1090 uncompact(src2_reg_nr);
1091 uncompact(src0_subreg_nr);
1092 uncompact(src1_subreg_nr);
1093 uncompact(src2_subreg_nr);
1094
1095 #undef uncompact
1096 }
1097
1098 void
1099 brw_uncompact_instruction(struct brw_context *brw, brw_inst *dst,
1100 brw_compact_inst *src)
1101 {
1102 memset(dst, 0, sizeof(*dst));
1103
1104 if (brw->gen >= 8 && is_3src(brw_compact_inst_3src_opcode(src))) {
1105 brw_uncompact_3src_instruction(brw, dst, src);
1106 return;
1107 }
1108
1109 brw_inst_set_opcode(brw, dst, brw_compact_inst_opcode(src));
1110 brw_inst_set_debug_control(brw, dst, brw_compact_inst_debug_control(src));
1111
1112 set_uncompacted_control(brw, dst, src);
1113 set_uncompacted_datatype(brw, dst, src);
1114
1115 /* src0/1 register file fields are in the datatype table. */
1116 bool is_immediate = brw_inst_src0_reg_file(brw, dst) == BRW_IMMEDIATE_VALUE ||
1117 brw_inst_src1_reg_file(brw, dst) == BRW_IMMEDIATE_VALUE;
1118
1119 set_uncompacted_subreg(brw, dst, src);
1120 brw_inst_set_acc_wr_control(brw, dst, brw_compact_inst_acc_wr_control(src));
1121 brw_inst_set_cond_modifier(brw, dst, brw_compact_inst_cond_modifier(src));
1122 if (brw->gen <= 6)
1123 brw_inst_set_flag_subreg_nr(brw, dst,
1124 brw_compact_inst_flag_subreg_nr(src));
1125 set_uncompacted_src0(brw, dst, src);
1126 set_uncompacted_src1(brw, dst, src, is_immediate);
1127 brw_inst_set_dst_da_reg_nr(brw, dst, brw_compact_inst_dst_reg_nr(src));
1128 brw_inst_set_src0_da_reg_nr(brw, dst, brw_compact_inst_src0_reg_nr(src));
1129 if (is_immediate) {
1130 brw_inst_set_imm_ud(brw, dst,
1131 brw_inst_imm_ud(brw, dst) |
1132 brw_compact_inst_src1_reg_nr(src));
1133 } else {
1134 brw_inst_set_src1_da_reg_nr(brw, dst, brw_compact_inst_src1_reg_nr(src));
1135 }
1136 }
1137
1138 void brw_debug_compact_uncompact(struct brw_context *brw,
1139 brw_inst *orig,
1140 brw_inst *uncompacted)
1141 {
1142 fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n",
1143 brw->gen);
1144
1145 fprintf(stderr, " before: ");
1146 brw_disassemble_inst(stderr, brw, orig, true);
1147
1148 fprintf(stderr, " after: ");
1149 brw_disassemble_inst(stderr, brw, uncompacted, false);
1150
1151 uint32_t *before_bits = (uint32_t *)orig;
1152 uint32_t *after_bits = (uint32_t *)uncompacted;
1153 fprintf(stderr, " changed bits:\n");
1154 for (int i = 0; i < 128; i++) {
1155 uint32_t before = before_bits[i / 32] & (1 << (i & 31));
1156 uint32_t after = after_bits[i / 32] & (1 << (i & 31));
1157
1158 if (before != after) {
1159 fprintf(stderr, " bit %d, %s to %s\n", i,
1160 before ? "set" : "unset",
1161 after ? "set" : "unset");
1162 }
1163 }
1164 }
1165
1166 static int
1167 compacted_between(int old_ip, int old_target_ip, int *compacted_counts)
1168 {
1169 int this_compacted_count = compacted_counts[old_ip];
1170 int target_compacted_count = compacted_counts[old_target_ip];
1171 return target_compacted_count - this_compacted_count;
1172 }
1173
1174 static void
1175 update_uip_jip(struct brw_context *brw, brw_inst *insn,
1176 int this_old_ip, int *compacted_counts)
1177 {
1178 /* JIP and UIP are in units of:
1179 * - bytes on Gen8+; and
1180 * - compacted instructions on Gen6+.
1181 */
1182 int32_t jip = brw_inst_jip(brw, insn);
1183 int32_t jip_compacted = jip / (brw->gen >= 8 ? sizeof(brw_compact_inst) : 1);
1184 int32_t jip_uncompacted = jip / (brw->gen >= 8 ? sizeof(brw_inst) : 2);
1185 jip_compacted -= compacted_between(this_old_ip,
1186 this_old_ip + jip_uncompacted,
1187 compacted_counts);
1188 brw_inst_set_jip(brw, insn,
1189 jip_compacted * (brw->gen >= 8 ? sizeof(brw_compact_inst) : 1));
1190
1191 if (brw_inst_opcode(brw, insn) == BRW_OPCODE_ENDIF ||
1192 brw_inst_opcode(brw, insn) == BRW_OPCODE_WHILE ||
1193 (brw_inst_opcode(brw, insn) == BRW_OPCODE_ELSE && brw->gen <= 7))
1194 return;
1195
1196 int32_t uip = brw_inst_uip(brw, insn);
1197 int32_t uip_compacted = uip / (brw->gen >= 8 ? sizeof(brw_compact_inst) : 1);
1198 int32_t uip_uncompacted = uip / (brw->gen >= 8 ? sizeof(brw_inst) : 2);
1199 uip_compacted -= compacted_between(this_old_ip,
1200 this_old_ip + uip_uncompacted,
1201 compacted_counts);
1202 brw_inst_set_uip(brw, insn,
1203 uip_compacted * (brw->gen >= 8 ? sizeof(brw_compact_inst) : 1));
1204 }
1205
1206 static void
1207 update_gen4_jump_count(struct brw_context *brw, brw_inst *insn,
1208 int this_old_ip, int *compacted_counts)
1209 {
1210 assert(brw->gen == 5);
1211
1212 /* Jump Count is in units of:
1213 * - compacted instructions on Gen5.
1214 */
1215 int jump_count = brw_inst_gen4_jump_count(brw, insn);
1216 int jump_count_compacted = jump_count;
1217 int jump_count_uncompacted = jump_count / 2;
1218
1219 int target_old_ip = this_old_ip + jump_count_uncompacted;
1220
1221 int this_compacted_count = compacted_counts[this_old_ip];
1222 int target_compacted_count = compacted_counts[target_old_ip];
1223
1224 jump_count_compacted -= (target_compacted_count - this_compacted_count);
1225 brw_inst_set_gen4_jump_count(brw, insn, jump_count_compacted);
1226 }
1227
1228 void
1229 brw_init_compaction_tables(struct brw_context *brw)
1230 {
1231 assert(g45_control_index_table[ARRAY_SIZE(g45_control_index_table) - 1] != 0);
1232 assert(g45_datatype_table[ARRAY_SIZE(g45_datatype_table) - 1] != 0);
1233 assert(g45_subreg_table[ARRAY_SIZE(g45_subreg_table) - 1] != 0);
1234 assert(g45_src_index_table[ARRAY_SIZE(g45_src_index_table) - 1] != 0);
1235 assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0);
1236 assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0);
1237 assert(gen6_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0);
1238 assert(gen6_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0);
1239 assert(gen7_control_index_table[ARRAY_SIZE(gen7_control_index_table) - 1] != 0);
1240 assert(gen7_datatype_table[ARRAY_SIZE(gen7_datatype_table) - 1] != 0);
1241 assert(gen7_subreg_table[ARRAY_SIZE(gen7_subreg_table) - 1] != 0);
1242 assert(gen7_src_index_table[ARRAY_SIZE(gen7_src_index_table) - 1] != 0);
1243 assert(gen8_control_index_table[ARRAY_SIZE(gen8_control_index_table) - 1] != 0);
1244 assert(gen8_datatype_table[ARRAY_SIZE(gen8_datatype_table) - 1] != 0);
1245 assert(gen8_subreg_table[ARRAY_SIZE(gen8_subreg_table) - 1] != 0);
1246 assert(gen8_src_index_table[ARRAY_SIZE(gen8_src_index_table) - 1] != 0);
1247
1248 switch (brw->gen) {
1249 case 8:
1250 control_index_table = gen8_control_index_table;
1251 datatype_table = gen8_datatype_table;
1252 subreg_table = gen8_subreg_table;
1253 src_index_table = gen8_src_index_table;
1254 break;
1255 case 7:
1256 control_index_table = gen7_control_index_table;
1257 datatype_table = gen7_datatype_table;
1258 subreg_table = gen7_subreg_table;
1259 src_index_table = gen7_src_index_table;
1260 break;
1261 case 6:
1262 control_index_table = gen6_control_index_table;
1263 datatype_table = gen6_datatype_table;
1264 subreg_table = gen6_subreg_table;
1265 src_index_table = gen6_src_index_table;
1266 break;
1267 case 5:
1268 control_index_table = g45_control_index_table;
1269 datatype_table = g45_datatype_table;
1270 subreg_table = g45_subreg_table;
1271 src_index_table = g45_src_index_table;
1272 break;
1273 default:
1274 return;
1275 }
1276 }
1277
1278 void
1279 brw_compact_instructions(struct brw_compile *p, int start_offset,
1280 int num_annotations, struct annotation *annotation)
1281 {
1282 struct brw_context *brw = p->brw;
1283 void *store = p->store + start_offset / 16;
1284 /* For an instruction at byte offset 16*i before compaction, this is the
1285 * number of compacted instructions that preceded it.
1286 */
1287 int compacted_counts[(p->next_insn_offset - start_offset) / sizeof(brw_inst)];
1288 /* For an instruction at byte offset 8*i after compaction, this was its IP
1289 * (in 16-byte units) before compaction.
1290 */
1291 int old_ip[(p->next_insn_offset - start_offset) / sizeof(brw_compact_inst)];
1292
1293 if (brw->gen == 4)
1294 return;
1295
1296 int offset = 0;
1297 int compacted_count = 0;
1298 for (int src_offset = 0; src_offset < p->next_insn_offset - start_offset;
1299 src_offset += sizeof(brw_inst)) {
1300 brw_inst *src = store + src_offset;
1301 void *dst = store + offset;
1302
1303 old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
1304 compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
1305
1306 brw_inst saved = *src;
1307
1308 if (brw_try_compact_instruction(brw, dst, src)) {
1309 compacted_count++;
1310
1311 if (INTEL_DEBUG) {
1312 brw_inst uncompacted;
1313 brw_uncompact_instruction(brw, &uncompacted, dst);
1314 if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {
1315 brw_debug_compact_uncompact(brw, &saved, &uncompacted);
1316 }
1317 }
1318
1319 offset += sizeof(brw_compact_inst);
1320 } else {
1321 /* It appears that the end of thread SEND instruction needs to be
1322 * aligned, or the GPU hangs.
1323 */
1324 if ((brw_inst_opcode(brw, src) == BRW_OPCODE_SEND ||
1325 brw_inst_opcode(brw, src) == BRW_OPCODE_SENDC) &&
1326 brw_inst_eot(brw, src) &&
1327 (offset & sizeof(brw_compact_inst)) != 0) {
1328 brw_compact_inst *align = store + offset;
1329 memset(align, 0, sizeof(*align));
1330 brw_compact_inst_set_opcode(align, BRW_OPCODE_NOP);
1331 brw_compact_inst_set_cmpt_control(align, true);
1332 offset += sizeof(brw_compact_inst);
1333 old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
1334
1335 dst = store + offset;
1336 }
1337
1338 /* If we didn't compact this intruction, we need to move it down into
1339 * place.
1340 */
1341 if (offset != src_offset) {
1342 memmove(dst, src, sizeof(brw_inst));
1343 }
1344 offset += sizeof(brw_inst);
1345 }
1346 }
1347
1348 /* Fix up control flow offsets. */
1349 p->next_insn_offset = start_offset + offset;
1350 for (offset = 0; offset < p->next_insn_offset - start_offset;
1351 offset = next_offset(brw, store, offset)) {
1352 brw_inst *insn = store + offset;
1353 int this_old_ip = old_ip[offset / sizeof(brw_compact_inst)];
1354 int this_compacted_count = compacted_counts[this_old_ip];
1355 int target_old_ip, target_compacted_count;
1356
1357 switch (brw_inst_opcode(brw, insn)) {
1358 case BRW_OPCODE_BREAK:
1359 case BRW_OPCODE_CONTINUE:
1360 case BRW_OPCODE_HALT:
1361 if (brw->gen >= 6) {
1362 update_uip_jip(brw, insn, this_old_ip, compacted_counts);
1363 } else {
1364 update_gen4_jump_count(brw, insn, this_old_ip, compacted_counts);
1365 }
1366 break;
1367
1368 case BRW_OPCODE_IF:
1369 case BRW_OPCODE_IFF:
1370 case BRW_OPCODE_ELSE:
1371 case BRW_OPCODE_ENDIF:
1372 case BRW_OPCODE_WHILE:
1373 if (brw->gen >= 7) {
1374 update_uip_jip(brw, insn, this_old_ip, compacted_counts);
1375 } else if (brw->gen == 6) {
1376 /* Jump Count is in units of compacted instructions on Gen6. */
1377 int jump_count_compacted = brw_inst_gen6_jump_count(brw, insn);
1378 int jump_count_uncompacted = jump_count_compacted / 2;
1379
1380 target_old_ip = this_old_ip + jump_count_uncompacted;
1381 target_compacted_count = compacted_counts[target_old_ip];
1382 jump_count_compacted -= (target_compacted_count - this_compacted_count);
1383 brw_inst_set_gen6_jump_count(brw, insn, jump_count_compacted);
1384 } else {
1385 update_gen4_jump_count(brw, insn, this_old_ip, compacted_counts);
1386 }
1387 break;
1388
1389 case BRW_OPCODE_ADD:
1390 /* Add instructions modifying the IP register use an immediate src1,
1391 * and Gens that use this cannot compact instructions with immediate
1392 * operands.
1393 */
1394 if (brw_inst_cmpt_control(brw, insn))
1395 break;
1396
1397 if (brw_inst_dst_reg_file(brw, insn) == BRW_ARCHITECTURE_REGISTER_FILE &&
1398 brw_inst_dst_da_reg_nr(brw, insn) == BRW_ARF_IP) {
1399 assert(brw_inst_src1_reg_file(brw, insn) == BRW_IMMEDIATE_VALUE);
1400
1401 int jump = brw_inst_imm_d(brw, insn);
1402 int jump_compacted = jump / sizeof(brw_compact_inst);
1403 int jump_uncompacted = jump / sizeof(brw_inst);
1404
1405 target_old_ip = this_old_ip + jump_uncompacted;
1406 target_compacted_count = compacted_counts[target_old_ip];
1407 jump_compacted -= (target_compacted_count - this_compacted_count);
1408 brw_inst_set_imm_ud(brw, insn, jump_compacted *
1409 sizeof(brw_compact_inst));
1410 }
1411 break;
1412 }
1413 }
1414
1415 /* p->nr_insn is counting the number of uncompacted instructions still, so
1416 * divide. We do want to be sure there's a valid instruction in any
1417 * alignment padding, so that the next compression pass (for the FS 8/16
1418 * compile passes) parses correctly.
1419 */
1420 if (p->next_insn_offset & sizeof(brw_compact_inst)) {
1421 brw_compact_inst *align = store + offset;
1422 memset(align, 0, sizeof(*align));
1423 brw_compact_inst_set_opcode(align, BRW_OPCODE_NOP);
1424 brw_compact_inst_set_cmpt_control(align, true);
1425 p->next_insn_offset += sizeof(brw_compact_inst);
1426 }
1427 p->nr_insn = p->next_insn_offset / sizeof(brw_inst);
1428
1429 /* Update the instruction offsets for each annotation. */
1430 if (annotation) {
1431 for (int offset = 0, i = 0; i < num_annotations; i++) {
1432 while (start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
1433 sizeof(brw_inst) != annotation[i].offset) {
1434 assert(start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
1435 sizeof(brw_inst) < annotation[i].offset);
1436 offset = next_offset(brw, store, offset);
1437 }
1438
1439 annotation[i].offset = start_offset + offset;
1440
1441 offset = next_offset(brw, store, offset);
1442 }
1443
1444 annotation[num_annotations].offset = p->next_insn_offset;
1445 }
1446 }