i965/vec4: Only zero out unused message components when there are any.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_compact.c
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_eu_compact.c
25 *
26 * Instruction compaction is a feature of gm45 and newer hardware that allows
27 * for a smaller instruction encoding.
28 *
29 * The instruction cache is on the order of 32KB, and many programs generate
30 * far more instructions than that. The instruction cache is built to barely
31 * keep up with instruction dispatch abaility in cache hit cases -- L1
32 * instruction cache misses that still hit in the next level could limit
33 * throughput by around 50%.
34 *
35 * The idea of instruction compaction is that most instructions use a tiny
36 * subset of the GPU functionality, so we can encode what would be a 16 byte
37 * instruction in 8 bytes using some lookup tables for various fields.
38 */
39
40 #include "brw_context.h"
41 #include "brw_eu.h"
42
43 static const uint32_t gen6_control_index_table[32] = {
44 0b00000000000000000,
45 0b01000000000000000,
46 0b00110000000000000,
47 0b00000000100000000,
48 0b00010000000000000,
49 0b00001000100000000,
50 0b00000000100000010,
51 0b00000000000000010,
52 0b01000000100000000,
53 0b01010000000000000,
54 0b10110000000000000,
55 0b00100000000000000,
56 0b11010000000000000,
57 0b11000000000000000,
58 0b01001000100000000,
59 0b01000000000001000,
60 0b01000000000000100,
61 0b00000000000001000,
62 0b00000000000000100,
63 0b00111000100000000,
64 0b00001000100000010,
65 0b00110000100000000,
66 0b00110000000000001,
67 0b00100000000000001,
68 0b00110000000000010,
69 0b00110000000000101,
70 0b00110000000001001,
71 0b00110000000010000,
72 0b00110000000000011,
73 0b00110000000000100,
74 0b00110000100001000,
75 0b00100000000001001
76 };
77
78 static const uint32_t gen6_datatype_table[32] = {
79 0b001001110000000000,
80 0b001000110000100000,
81 0b001001110000000001,
82 0b001000000001100000,
83 0b001010110100101001,
84 0b001000000110101101,
85 0b001100011000101100,
86 0b001011110110101101,
87 0b001000000111101100,
88 0b001000000001100001,
89 0b001000110010100101,
90 0b001000000001000001,
91 0b001000001000110001,
92 0b001000001000101001,
93 0b001000000000100000,
94 0b001000001000110010,
95 0b001010010100101001,
96 0b001011010010100101,
97 0b001000000110100101,
98 0b001100011000101001,
99 0b001011011000101100,
100 0b001011010110100101,
101 0b001011110110100101,
102 0b001111011110111101,
103 0b001111011110111100,
104 0b001111011110111101,
105 0b001111011110011101,
106 0b001111011110111110,
107 0b001000000000100001,
108 0b001000000000100010,
109 0b001001111111011101,
110 0b001000001110111110,
111 };
112
113 static const uint32_t gen6_subreg_table[32] = {
114 0b000000000000000,
115 0b000000000000100,
116 0b000000110000000,
117 0b111000000000000,
118 0b011110000001000,
119 0b000010000000000,
120 0b000000000010000,
121 0b000110000001100,
122 0b001000000000000,
123 0b000001000000000,
124 0b000001010010100,
125 0b000000001010110,
126 0b010000000000000,
127 0b110000000000000,
128 0b000100000000000,
129 0b000000010000000,
130 0b000000000001000,
131 0b100000000000000,
132 0b000001010000000,
133 0b001010000000000,
134 0b001100000000000,
135 0b000000001010100,
136 0b101101010010100,
137 0b010100000000000,
138 0b000000010001111,
139 0b011000000000000,
140 0b111110000000000,
141 0b101000000000000,
142 0b000000000001111,
143 0b000100010001111,
144 0b001000010001111,
145 0b000110000000000,
146 };
147
148 static const uint32_t gen6_src_index_table[32] = {
149 0b000000000000,
150 0b010110001000,
151 0b010001101000,
152 0b001000101000,
153 0b011010010000,
154 0b000100100000,
155 0b010001101100,
156 0b010101110000,
157 0b011001111000,
158 0b001100101000,
159 0b010110001100,
160 0b001000100000,
161 0b010110001010,
162 0b000000000010,
163 0b010101010000,
164 0b010101101000,
165 0b111101001100,
166 0b111100101100,
167 0b011001110000,
168 0b010110001001,
169 0b010101011000,
170 0b001101001000,
171 0b010000101100,
172 0b010000000000,
173 0b001101110000,
174 0b001100010000,
175 0b001100000000,
176 0b010001101010,
177 0b001101111000,
178 0b000001110000,
179 0b001100100000,
180 0b001101010000,
181 };
182
183 static const uint32_t gen7_control_index_table[32] = {
184 0b0000000000000000010,
185 0b0000100000000000000,
186 0b0000100000000000001,
187 0b0000100000000000010,
188 0b0000100000000000011,
189 0b0000100000000000100,
190 0b0000100000000000101,
191 0b0000100000000000111,
192 0b0000100000000001000,
193 0b0000100000000001001,
194 0b0000100000000001101,
195 0b0000110000000000000,
196 0b0000110000000000001,
197 0b0000110000000000010,
198 0b0000110000000000011,
199 0b0000110000000000100,
200 0b0000110000000000101,
201 0b0000110000000000111,
202 0b0000110000000001001,
203 0b0000110000000001101,
204 0b0000110000000010000,
205 0b0000110000100000000,
206 0b0001000000000000000,
207 0b0001000000000000010,
208 0b0001000000000000100,
209 0b0001000000100000000,
210 0b0010110000000000000,
211 0b0010110000000010000,
212 0b0011000000000000000,
213 0b0011000000100000000,
214 0b0101000000000000000,
215 0b0101000000100000000
216 };
217
218 static const uint32_t gen7_datatype_table[32] = {
219 0b001000000000000001,
220 0b001000000000100000,
221 0b001000000000100001,
222 0b001000000001100001,
223 0b001000000010111101,
224 0b001000001011111101,
225 0b001000001110100001,
226 0b001000001110100101,
227 0b001000001110111101,
228 0b001000010000100001,
229 0b001000110000100000,
230 0b001000110000100001,
231 0b001001010010100101,
232 0b001001110010100100,
233 0b001001110010100101,
234 0b001111001110111101,
235 0b001111011110011101,
236 0b001111011110111100,
237 0b001111011110111101,
238 0b001111111110111100,
239 0b000000001000001100,
240 0b001000000000111101,
241 0b001000000010100101,
242 0b001000010000100000,
243 0b001001010010100100,
244 0b001001110010000100,
245 0b001010010100001001,
246 0b001101111110111101,
247 0b001111111110111101,
248 0b001011110110101100,
249 0b001010010100101000,
250 0b001010110100101000
251 };
252
253 static const uint32_t gen7_subreg_table[32] = {
254 0b000000000000000,
255 0b000000000000001,
256 0b000000000001000,
257 0b000000000001111,
258 0b000000000010000,
259 0b000000010000000,
260 0b000000100000000,
261 0b000000110000000,
262 0b000001000000000,
263 0b000001000010000,
264 0b000010100000000,
265 0b001000000000000,
266 0b001000000000001,
267 0b001000010000001,
268 0b001000010000010,
269 0b001000010000011,
270 0b001000010000100,
271 0b001000010000111,
272 0b001000010001000,
273 0b001000010001110,
274 0b001000010001111,
275 0b001000110000000,
276 0b001000111101000,
277 0b010000000000000,
278 0b010000110000000,
279 0b011000000000000,
280 0b011110010000111,
281 0b100000000000000,
282 0b101000000000000,
283 0b110000000000000,
284 0b111000000000000,
285 0b111000000011100
286 };
287
288 static const uint32_t gen7_src_index_table[32] = {
289 0b000000000000,
290 0b000000000010,
291 0b000000010000,
292 0b000000010010,
293 0b000000011000,
294 0b000000100000,
295 0b000000101000,
296 0b000001001000,
297 0b000001010000,
298 0b000001110000,
299 0b000001111000,
300 0b001100000000,
301 0b001100000010,
302 0b001100001000,
303 0b001100010000,
304 0b001100010010,
305 0b001100100000,
306 0b001100101000,
307 0b001100111000,
308 0b001101000000,
309 0b001101000010,
310 0b001101001000,
311 0b001101010000,
312 0b001101100000,
313 0b001101101000,
314 0b001101110000,
315 0b001101110001,
316 0b001101111000,
317 0b010001101000,
318 0b010001101001,
319 0b010001101010,
320 0b010110001000
321 };
322
323 static const uint32_t *control_index_table;
324 static const uint32_t *datatype_table;
325 static const uint32_t *subreg_table;
326 static const uint32_t *src_index_table;
327
328 static bool
329 set_control_index(struct brw_context *brw,
330 struct brw_compact_instruction *dst,
331 struct brw_instruction *src)
332 {
333 uint32_t *src_u32 = (uint32_t *)src;
334 uint32_t uncompacted = 0;
335
336 uncompacted |= ((src_u32[0] >> 8) & 0xffff) << 0;
337 uncompacted |= ((src_u32[0] >> 31) & 0x1) << 16;
338 /* On gen7, the flag register number gets integrated into the control
339 * index.
340 */
341 if (brw->gen >= 7)
342 uncompacted |= ((src_u32[2] >> 25) & 0x3) << 17;
343
344 for (int i = 0; i < 32; i++) {
345 if (control_index_table[i] == uncompacted) {
346 dst->dw0.control_index = i;
347 return true;
348 }
349 }
350
351 return false;
352 }
353
354 static bool
355 set_datatype_index(struct brw_compact_instruction *dst,
356 struct brw_instruction *src)
357 {
358 uint32_t uncompacted = 0;
359
360 uncompacted |= src->bits1.ud & 0x7fff;
361 uncompacted |= (src->bits1.ud >> 29) << 15;
362
363 for (int i = 0; i < 32; i++) {
364 if (datatype_table[i] == uncompacted) {
365 dst->dw0.data_type_index = i;
366 return true;
367 }
368 }
369
370 return false;
371 }
372
373 static bool
374 set_subreg_index(struct brw_compact_instruction *dst,
375 struct brw_instruction *src)
376 {
377 uint32_t uncompacted = 0;
378
379 uncompacted |= src->bits1.da1.dest_subreg_nr << 0;
380 uncompacted |= src->bits2.da1.src0_subreg_nr << 5;
381 uncompacted |= src->bits3.da1.src1_subreg_nr << 10;
382
383 for (int i = 0; i < 32; i++) {
384 if (subreg_table[i] == uncompacted) {
385 dst->dw0.sub_reg_index = i;
386 return true;
387 }
388 }
389
390 return false;
391 }
392
393 static bool
394 get_src_index(uint32_t uncompacted,
395 uint32_t *compacted)
396 {
397 for (int i = 0; i < 32; i++) {
398 if (src_index_table[i] == uncompacted) {
399 *compacted = i;
400 return true;
401 }
402 }
403
404 return false;
405 }
406
407 static bool
408 set_src0_index(struct brw_compact_instruction *dst,
409 struct brw_instruction *src)
410 {
411 uint32_t compacted, uncompacted = 0;
412
413 uncompacted |= (src->bits2.ud >> 13) & 0xfff;
414
415 if (!get_src_index(uncompacted, &compacted))
416 return false;
417
418 dst->dw0.src0_index = compacted & 0x3;
419 dst->dw1.src0_index = compacted >> 2;
420
421 return true;
422 }
423
424 static bool
425 set_src1_index(struct brw_compact_instruction *dst,
426 struct brw_instruction *src)
427 {
428 uint32_t compacted, uncompacted = 0;
429
430 uncompacted |= (src->bits3.ud >> 13) & 0xfff;
431
432 if (!get_src_index(uncompacted, &compacted))
433 return false;
434
435 dst->dw1.src1_index = compacted;
436
437 return true;
438 }
439
440 /**
441 * Tries to compact instruction src into dst.
442 *
443 * It doesn't modify dst unless src is compactable, which is relied on by
444 * brw_compact_instructions().
445 */
446 bool
447 brw_try_compact_instruction(struct brw_compile *p,
448 struct brw_compact_instruction *dst,
449 struct brw_instruction *src)
450 {
451 struct brw_context *brw = p->brw;
452 struct brw_compact_instruction temp;
453
454 if (src->header.opcode == BRW_OPCODE_IF ||
455 src->header.opcode == BRW_OPCODE_ELSE ||
456 src->header.opcode == BRW_OPCODE_ENDIF ||
457 src->header.opcode == BRW_OPCODE_HALT ||
458 src->header.opcode == BRW_OPCODE_DO ||
459 src->header.opcode == BRW_OPCODE_WHILE) {
460 /* FINISHME: The fixup code below, and brw_set_uip_jip and friends, needs
461 * to be able to handle compacted flow control instructions..
462 */
463 return false;
464 }
465
466 /* FINISHME: immediates */
467 if (src->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE ||
468 src->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE)
469 return false;
470
471 memset(&temp, 0, sizeof(temp));
472
473 temp.dw0.opcode = src->header.opcode;
474 temp.dw0.debug_control = src->header.debug_control;
475 if (!set_control_index(brw, &temp, src))
476 return false;
477 if (!set_datatype_index(&temp, src))
478 return false;
479 if (!set_subreg_index(&temp, src))
480 return false;
481 temp.dw0.acc_wr_control = src->header.acc_wr_control;
482 temp.dw0.conditionalmod = src->header.destreg__conditionalmod;
483 if (brw->gen <= 6)
484 temp.dw0.flag_subreg_nr = src->bits2.da1.flag_subreg_nr;
485 temp.dw0.cmpt_ctrl = 1;
486 if (!set_src0_index(&temp, src))
487 return false;
488 if (!set_src1_index(&temp, src))
489 return false;
490 temp.dw1.dst_reg_nr = src->bits1.da1.dest_reg_nr;
491 temp.dw1.src0_reg_nr = src->bits2.da1.src0_reg_nr;
492 temp.dw1.src1_reg_nr = src->bits3.da1.src1_reg_nr;
493
494 *dst = temp;
495
496 return true;
497 }
498
499 static void
500 set_uncompacted_control(struct brw_context *brw,
501 struct brw_instruction *dst,
502 struct brw_compact_instruction *src)
503 {
504 uint32_t *dst_u32 = (uint32_t *)dst;
505 uint32_t uncompacted = control_index_table[src->dw0.control_index];
506
507 dst_u32[0] |= ((uncompacted >> 0) & 0xffff) << 8;
508 dst_u32[0] |= ((uncompacted >> 16) & 0x1) << 31;
509
510 if (brw->gen >= 7)
511 dst_u32[2] |= ((uncompacted >> 17) & 0x3) << 25;
512 }
513
514 static void
515 set_uncompacted_datatype(struct brw_instruction *dst,
516 struct brw_compact_instruction *src)
517 {
518 uint32_t uncompacted = datatype_table[src->dw0.data_type_index];
519
520 dst->bits1.ud &= ~(0x7 << 29);
521 dst->bits1.ud |= ((uncompacted >> 15) & 0x7) << 29;
522 dst->bits1.ud &= ~0x7fff;
523 dst->bits1.ud |= uncompacted & 0x7fff;
524 }
525
526 static void
527 set_uncompacted_subreg(struct brw_instruction *dst,
528 struct brw_compact_instruction *src)
529 {
530 uint32_t uncompacted = subreg_table[src->dw0.sub_reg_index];
531
532 dst->bits1.da1.dest_subreg_nr = (uncompacted >> 0) & 0x1f;
533 dst->bits2.da1.src0_subreg_nr = (uncompacted >> 5) & 0x1f;
534 dst->bits3.da1.src1_subreg_nr = (uncompacted >> 10) & 0x1f;
535 }
536
537 static void
538 set_uncompacted_src0(struct brw_instruction *dst,
539 struct brw_compact_instruction *src)
540 {
541 uint32_t compacted = src->dw0.src0_index | src->dw1.src0_index << 2;
542 uint32_t uncompacted = src_index_table[compacted];
543
544 dst->bits2.ud |= uncompacted << 13;
545 }
546
547 static void
548 set_uncompacted_src1(struct brw_instruction *dst,
549 struct brw_compact_instruction *src)
550 {
551 uint32_t uncompacted = src_index_table[src->dw1.src1_index];
552
553 dst->bits3.ud |= uncompacted << 13;
554 }
555
556 void
557 brw_uncompact_instruction(struct brw_context *brw,
558 struct brw_instruction *dst,
559 struct brw_compact_instruction *src)
560 {
561 memset(dst, 0, sizeof(*dst));
562
563 dst->header.opcode = src->dw0.opcode;
564 dst->header.debug_control = src->dw0.debug_control;
565
566 set_uncompacted_control(brw, dst, src);
567 set_uncompacted_datatype(dst, src);
568 set_uncompacted_subreg(dst, src);
569 dst->header.acc_wr_control = src->dw0.acc_wr_control;
570 dst->header.destreg__conditionalmod = src->dw0.conditionalmod;
571 if (brw->gen <= 6)
572 dst->bits2.da1.flag_subreg_nr = src->dw0.flag_subreg_nr;
573 set_uncompacted_src0(dst, src);
574 set_uncompacted_src1(dst, src);
575 dst->bits1.da1.dest_reg_nr = src->dw1.dst_reg_nr;
576 dst->bits2.da1.src0_reg_nr = src->dw1.src0_reg_nr;
577 dst->bits3.da1.src1_reg_nr = src->dw1.src1_reg_nr;
578 }
579
580 void brw_debug_compact_uncompact(struct brw_context *brw,
581 struct brw_instruction *orig,
582 struct brw_instruction *uncompacted)
583 {
584 fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n",
585 brw->gen);
586
587 fprintf(stderr, " before: ");
588 brw_disasm(stderr, orig, brw->gen);
589
590 fprintf(stderr, " after: ");
591 brw_disasm(stderr, uncompacted, brw->gen);
592
593 uint32_t *before_bits = (uint32_t *)orig;
594 uint32_t *after_bits = (uint32_t *)uncompacted;
595 printf(" changed bits:\n");
596 for (int i = 0; i < 128; i++) {
597 uint32_t before = before_bits[i / 32] & (1 << (i & 31));
598 uint32_t after = after_bits[i / 32] & (1 << (i & 31));
599
600 if (before != after) {
601 printf(" bit %d, %s to %s\n", i,
602 before ? "set" : "unset",
603 after ? "set" : "unset");
604 }
605 }
606 }
607
608 static int
609 compacted_between(int old_ip, int old_target_ip, int *compacted_counts)
610 {
611 int this_compacted_count = compacted_counts[old_ip];
612 int target_compacted_count = compacted_counts[old_target_ip];
613 return target_compacted_count - this_compacted_count;
614 }
615
616 static void
617 update_uip_jip(struct brw_instruction *insn, int this_old_ip,
618 int *compacted_counts)
619 {
620 int target_old_ip;
621
622 target_old_ip = this_old_ip + insn->bits3.break_cont.jip;
623 insn->bits3.break_cont.jip -= compacted_between(this_old_ip,
624 target_old_ip,
625 compacted_counts);
626
627 target_old_ip = this_old_ip + insn->bits3.break_cont.uip;
628 insn->bits3.break_cont.uip -= compacted_between(this_old_ip,
629 target_old_ip,
630 compacted_counts);
631 }
632
633 void
634 brw_init_compaction_tables(struct brw_context *brw)
635 {
636 assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0);
637 assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0);
638 assert(gen6_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0);
639 assert(gen6_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0);
640 assert(gen7_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0);
641 assert(gen7_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0);
642 assert(gen7_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0);
643 assert(gen7_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0);
644
645 switch (brw->gen) {
646 case 7:
647 control_index_table = gen7_control_index_table;
648 datatype_table = gen7_datatype_table;
649 subreg_table = gen7_subreg_table;
650 src_index_table = gen7_src_index_table;
651 break;
652 case 6:
653 control_index_table = gen6_control_index_table;
654 datatype_table = gen6_datatype_table;
655 subreg_table = gen6_subreg_table;
656 src_index_table = gen6_src_index_table;
657 break;
658 default:
659 return;
660 }
661 }
662
663 void
664 brw_compact_instructions(struct brw_compile *p)
665 {
666 struct brw_context *brw = p->brw;
667 void *store = p->store;
668 /* For an instruction at byte offset 8*i before compaction, this is the number
669 * of compacted instructions that preceded it.
670 */
671 int compacted_counts[p->next_insn_offset / 8];
672 /* For an instruction at byte offset 8*i after compaction, this is the
673 * 8-byte offset it was at before compaction.
674 */
675 int old_ip[p->next_insn_offset / 8];
676
677 if (brw->gen < 6)
678 return;
679
680 int src_offset;
681 int offset = 0;
682 int compacted_count = 0;
683 for (src_offset = 0; src_offset < p->nr_insn * 16;) {
684 struct brw_instruction *src = store + src_offset;
685 void *dst = store + offset;
686
687 old_ip[offset / 8] = src_offset / 8;
688 compacted_counts[src_offset / 8] = compacted_count;
689
690 struct brw_instruction saved = *src;
691
692 if (!src->header.cmpt_control &&
693 brw_try_compact_instruction(p, dst, src)) {
694 compacted_count++;
695
696 if (INTEL_DEBUG) {
697 struct brw_instruction uncompacted;
698 brw_uncompact_instruction(brw, &uncompacted, dst);
699 if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {
700 brw_debug_compact_uncompact(brw, &saved, &uncompacted);
701 }
702 }
703
704 offset += 8;
705 src_offset += 16;
706 } else {
707 int size = src->header.cmpt_control ? 8 : 16;
708
709 /* It appears that the end of thread SEND instruction needs to be
710 * aligned, or the GPU hangs.
711 */
712 if ((src->header.opcode == BRW_OPCODE_SEND ||
713 src->header.opcode == BRW_OPCODE_SENDC) &&
714 src->bits3.generic.end_of_thread &&
715 (offset & 8) != 0) {
716 struct brw_compact_instruction *align = store + offset;
717 memset(align, 0, sizeof(*align));
718 align->dw0.opcode = BRW_OPCODE_NOP;
719 align->dw0.cmpt_ctrl = 1;
720 offset += 8;
721 old_ip[offset / 8] = src_offset / 8;
722 dst = store + offset;
723 }
724
725 /* If we didn't compact this intruction, we need to move it down into
726 * place.
727 */
728 if (offset != src_offset) {
729 memmove(dst, src, size);
730 }
731 offset += size;
732 src_offset += size;
733 }
734 }
735
736 /* Fix up control flow offsets. */
737 p->next_insn_offset = offset;
738 for (offset = 0; offset < p->next_insn_offset;) {
739 struct brw_instruction *insn = store + offset;
740 int this_old_ip = old_ip[offset / 8];
741 int this_compacted_count = compacted_counts[this_old_ip];
742 int target_old_ip, target_compacted_count;
743
744 switch (insn->header.opcode) {
745 case BRW_OPCODE_BREAK:
746 case BRW_OPCODE_CONTINUE:
747 case BRW_OPCODE_HALT:
748 update_uip_jip(insn, this_old_ip, compacted_counts);
749 break;
750
751 case BRW_OPCODE_IF:
752 case BRW_OPCODE_ELSE:
753 case BRW_OPCODE_ENDIF:
754 case BRW_OPCODE_WHILE:
755 if (brw->gen == 6) {
756 target_old_ip = this_old_ip + insn->bits1.branch_gen6.jump_count;
757 target_compacted_count = compacted_counts[target_old_ip];
758 insn->bits1.branch_gen6.jump_count -= (target_compacted_count -
759 this_compacted_count);
760 } else {
761 update_uip_jip(insn, this_old_ip, compacted_counts);
762 }
763 break;
764 }
765
766 if (insn->header.cmpt_control) {
767 offset += 8;
768 } else {
769 offset += 16;
770 }
771 }
772
773 /* p->nr_insn is counting the number of uncompacted instructions still, so
774 * divide. We do want to be sure there's a valid instruction in any
775 * alignment padding, so that the next compression pass (for the FS 8/16
776 * compile passes) parses correctly.
777 */
778 if (p->next_insn_offset & 8) {
779 struct brw_compact_instruction *align = store + offset;
780 memset(align, 0, sizeof(*align));
781 align->dw0.opcode = BRW_OPCODE_NOP;
782 align->dw0.cmpt_ctrl = 1;
783 p->next_insn_offset += 8;
784 }
785 p->nr_insn = p->next_insn_offset / 16;
786
787 if (0) {
788 fprintf(stdout, "dumping compacted program\n");
789 brw_dump_compile(p, stdout, 0, p->next_insn_offset);
790
791 int cmp = 0;
792 for (offset = 0; offset < p->next_insn_offset;) {
793 struct brw_instruction *insn = store + offset;
794
795 if (insn->header.cmpt_control) {
796 offset += 8;
797 cmp++;
798 } else {
799 offset += 16;
800 }
801 }
802 fprintf(stderr, "%db/%db saved (%d%%)\n", cmp * 8, offset + cmp * 8,
803 cmp * 8 * 100 / (offset + cmp * 8));
804 }
805 }