i915: Remove most of the code under gen >= 4 checks.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_compact.c
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_eu_compact.c
25 *
26 * Instruction compaction is a feature of gm45 and newer hardware that allows
27 * for a smaller instruction encoding.
28 *
29 * The instruction cache is on the order of 32KB, and many programs generate
30 * far more instructions than that. The instruction cache is built to barely
31 * keep up with instruction dispatch abaility in cache hit cases -- L1
32 * instruction cache misses that still hit in the next level could limit
33 * throughput by around 50%.
34 *
35 * The idea of instruction compaction is that most instructions use a tiny
36 * subset of the GPU functionality, so we can encode what would be a 16 byte
37 * instruction in 8 bytes using some lookup tables for various fields.
38 */
39
40 #include "brw_context.h"
41 #include "brw_eu.h"
42
43 static const uint32_t gen6_control_index_table[32] = {
44 0b00000000000000000,
45 0b01000000000000000,
46 0b00110000000000000,
47 0b00000000100000000,
48 0b00010000000000000,
49 0b00001000100000000,
50 0b00000000100000010,
51 0b00000000000000010,
52 0b01000000100000000,
53 0b01010000000000000,
54 0b10110000000000000,
55 0b00100000000000000,
56 0b11010000000000000,
57 0b11000000000000000,
58 0b01001000100000000,
59 0b01000000000001000,
60 0b01000000000000100,
61 0b00000000000001000,
62 0b00000000000000100,
63 0b00111000100000000,
64 0b00001000100000010,
65 0b00110000100000000,
66 0b00110000000000001,
67 0b00100000000000001,
68 0b00110000000000010,
69 0b00110000000000101,
70 0b00110000000001001,
71 0b00110000000010000,
72 0b00110000000000011,
73 0b00110000000000100,
74 0b00110000100001000,
75 0b00100000000001001
76 };
77
78 static const uint32_t gen6_datatype_table[32] = {
79 0b001001110000000000,
80 0b001000110000100000,
81 0b001001110000000001,
82 0b001000000001100000,
83 0b001010110100101001,
84 0b001000000110101101,
85 0b001100011000101100,
86 0b001011110110101101,
87 0b001000000111101100,
88 0b001000000001100001,
89 0b001000110010100101,
90 0b001000000001000001,
91 0b001000001000110001,
92 0b001000001000101001,
93 0b001000000000100000,
94 0b001000001000110010,
95 0b001010010100101001,
96 0b001011010010100101,
97 0b001000000110100101,
98 0b001100011000101001,
99 0b001011011000101100,
100 0b001011010110100101,
101 0b001011110110100101,
102 0b001111011110111101,
103 0b001111011110111100,
104 0b001111011110111101,
105 0b001111011110011101,
106 0b001111011110111110,
107 0b001000000000100001,
108 0b001000000000100010,
109 0b001001111111011101,
110 0b001000001110111110,
111 };
112
113 static const uint32_t gen6_subreg_table[32] = {
114 0b000000000000000,
115 0b000000000000100,
116 0b000000110000000,
117 0b111000000000000,
118 0b011110000001000,
119 0b000010000000000,
120 0b000000000010000,
121 0b000110000001100,
122 0b001000000000000,
123 0b000001000000000,
124 0b000001010010100,
125 0b000000001010110,
126 0b010000000000000,
127 0b110000000000000,
128 0b000100000000000,
129 0b000000010000000,
130 0b000000000001000,
131 0b100000000000000,
132 0b000001010000000,
133 0b001010000000000,
134 0b001100000000000,
135 0b000000001010100,
136 0b101101010010100,
137 0b010100000000000,
138 0b000000010001111,
139 0b011000000000000,
140 0b111110000000000,
141 0b101000000000000,
142 0b000000000001111,
143 0b000100010001111,
144 0b001000010001111,
145 0b000110000000000,
146 };
147
148 static const uint32_t gen6_src_index_table[32] = {
149 0b000000000000,
150 0b010110001000,
151 0b010001101000,
152 0b001000101000,
153 0b011010010000,
154 0b000100100000,
155 0b010001101100,
156 0b010101110000,
157 0b011001111000,
158 0b001100101000,
159 0b010110001100,
160 0b001000100000,
161 0b010110001010,
162 0b000000000010,
163 0b010101010000,
164 0b010101101000,
165 0b111101001100,
166 0b111100101100,
167 0b011001110000,
168 0b010110001001,
169 0b010101011000,
170 0b001101001000,
171 0b010000101100,
172 0b010000000000,
173 0b001101110000,
174 0b001100010000,
175 0b001100000000,
176 0b010001101010,
177 0b001101111000,
178 0b000001110000,
179 0b001100100000,
180 0b001101010000,
181 };
182
183 static const uint32_t gen7_control_index_table[32] = {
184 0b0000000000000000010,
185 0b0000100000000000000,
186 0b0000100000000000001,
187 0b0000100000000000010,
188 0b0000100000000000011,
189 0b0000100000000000100,
190 0b0000100000000000101,
191 0b0000100000000000111,
192 0b0000100000000001000,
193 0b0000100000000001001,
194 0b0000100000000001101,
195 0b0000110000000000000,
196 0b0000110000000000001,
197 0b0000110000000000010,
198 0b0000110000000000011,
199 0b0000110000000000100,
200 0b0000110000000000101,
201 0b0000110000000000111,
202 0b0000110000000001001,
203 0b0000110000000001101,
204 0b0000110000000010000,
205 0b0000110000100000000,
206 0b0001000000000000000,
207 0b0001000000000000010,
208 0b0001000000000000100,
209 0b0001000000100000000,
210 0b0010110000000000000,
211 0b0010110000000010000,
212 0b0011000000000000000,
213 0b0011000000100000000,
214 0b0101000000000000000,
215 0b0101000000100000000
216 };
217
218 static const uint32_t gen7_datatype_table[32] = {
219 0b001000000000000001,
220 0b001000000000100000,
221 0b001000000000100001,
222 0b001000000001100001,
223 0b001000000010111101,
224 0b001000001011111101,
225 0b001000001110100001,
226 0b001000001110100101,
227 0b001000001110111101,
228 0b001000010000100001,
229 0b001000110000100000,
230 0b001000110000100001,
231 0b001001010010100101,
232 0b001001110010100100,
233 0b001001110010100101,
234 0b001111001110111101,
235 0b001111011110011101,
236 0b001111011110111100,
237 0b001111011110111101,
238 0b001111111110111100,
239 0b000000001000001100,
240 0b001000000000111101,
241 0b001000000010100101,
242 0b001000010000100000,
243 0b001001010010100100,
244 0b001001110010000100,
245 0b001010010100001001,
246 0b001101111110111101,
247 0b001111111110111101,
248 0b001011110110101100,
249 0b001010010100101000,
250 0b001010110100101000
251 };
252
253 static const uint32_t gen7_subreg_table[32] = {
254 0b000000000000000,
255 0b000000000000001,
256 0b000000000001000,
257 0b000000000001111,
258 0b000000000010000,
259 0b000000010000000,
260 0b000000100000000,
261 0b000000110000000,
262 0b000001000000000,
263 0b000001000010000,
264 0b000010100000000,
265 0b001000000000000,
266 0b001000000000001,
267 0b001000010000001,
268 0b001000010000010,
269 0b001000010000011,
270 0b001000010000100,
271 0b001000010000111,
272 0b001000010001000,
273 0b001000010001110,
274 0b001000010001111,
275 0b001000110000000,
276 0b001000111101000,
277 0b010000000000000,
278 0b010000110000000,
279 0b011000000000000,
280 0b011110010000111,
281 0b100000000000000,
282 0b101000000000000,
283 0b110000000000000,
284 0b111000000000000,
285 0b111000000011100
286 };
287
288 static const uint32_t gen7_src_index_table[32] = {
289 0b000000000000,
290 0b000000000010,
291 0b000000010000,
292 0b000000010010,
293 0b000000011000,
294 0b000000100000,
295 0b000000101000,
296 0b000001001000,
297 0b000001010000,
298 0b000001110000,
299 0b000001111000,
300 0b001100000000,
301 0b001100000010,
302 0b001100001000,
303 0b001100010000,
304 0b001100010010,
305 0b001100100000,
306 0b001100101000,
307 0b001100111000,
308 0b001101000000,
309 0b001101000010,
310 0b001101001000,
311 0b001101010000,
312 0b001101100000,
313 0b001101101000,
314 0b001101110000,
315 0b001101110001,
316 0b001101111000,
317 0b010001101000,
318 0b010001101001,
319 0b010001101010,
320 0b010110001000
321 };
322
323 static const uint32_t *control_index_table;
324 static const uint32_t *datatype_table;
325 static const uint32_t *subreg_table;
326 static const uint32_t *src_index_table;
327
328 static bool
329 set_control_index(struct intel_context *intel,
330 struct brw_compact_instruction *dst,
331 struct brw_instruction *src)
332 {
333 uint32_t *src_u32 = (uint32_t *)src;
334 uint32_t uncompacted = 0;
335
336 uncompacted |= ((src_u32[0] >> 8) & 0xffff) << 0;
337 uncompacted |= ((src_u32[0] >> 31) & 0x1) << 16;
338 /* On gen7, the flag register number gets integrated into the control
339 * index.
340 */
341 if (intel->gen >= 7)
342 uncompacted |= ((src_u32[2] >> 25) & 0x3) << 17;
343
344 for (int i = 0; i < 32; i++) {
345 if (control_index_table[i] == uncompacted) {
346 dst->dw0.control_index = i;
347 return true;
348 }
349 }
350
351 return false;
352 }
353
354 static bool
355 set_datatype_index(struct brw_compact_instruction *dst,
356 struct brw_instruction *src)
357 {
358 uint32_t uncompacted = 0;
359
360 uncompacted |= src->bits1.ud & 0x7fff;
361 uncompacted |= (src->bits1.ud >> 29) << 15;
362
363 for (int i = 0; i < 32; i++) {
364 if (datatype_table[i] == uncompacted) {
365 dst->dw0.data_type_index = i;
366 return true;
367 }
368 }
369
370 return false;
371 }
372
373 static bool
374 set_subreg_index(struct brw_compact_instruction *dst,
375 struct brw_instruction *src)
376 {
377 uint32_t uncompacted = 0;
378
379 uncompacted |= src->bits1.da1.dest_subreg_nr << 0;
380 uncompacted |= src->bits2.da1.src0_subreg_nr << 5;
381 uncompacted |= src->bits3.da1.src1_subreg_nr << 10;
382
383 for (int i = 0; i < 32; i++) {
384 if (subreg_table[i] == uncompacted) {
385 dst->dw0.sub_reg_index = i;
386 return true;
387 }
388 }
389
390 return false;
391 }
392
393 static bool
394 get_src_index(uint32_t uncompacted,
395 uint32_t *compacted)
396 {
397 for (int i = 0; i < 32; i++) {
398 if (src_index_table[i] == uncompacted) {
399 *compacted = i;
400 return true;
401 }
402 }
403
404 return false;
405 }
406
407 static bool
408 set_src0_index(struct brw_compact_instruction *dst,
409 struct brw_instruction *src)
410 {
411 uint32_t compacted, uncompacted = 0;
412
413 uncompacted |= (src->bits2.ud >> 13) & 0xfff;
414
415 if (!get_src_index(uncompacted, &compacted))
416 return false;
417
418 dst->dw0.src0_index = compacted & 0x3;
419 dst->dw1.src0_index = compacted >> 2;
420
421 return true;
422 }
423
424 static bool
425 set_src1_index(struct brw_compact_instruction *dst,
426 struct brw_instruction *src)
427 {
428 uint32_t compacted, uncompacted = 0;
429
430 uncompacted |= (src->bits3.ud >> 13) & 0xfff;
431
432 if (!get_src_index(uncompacted, &compacted))
433 return false;
434
435 dst->dw1.src1_index = compacted;
436
437 return true;
438 }
439
440 /**
441 * Tries to compact instruction src into dst.
442 *
443 * It doesn't modify dst unless src is compactable, which is relied on by
444 * brw_compact_instructions().
445 */
446 bool
447 brw_try_compact_instruction(struct brw_compile *p,
448 struct brw_compact_instruction *dst,
449 struct brw_instruction *src)
450 {
451 struct brw_context *brw = p->brw;
452 struct intel_context *intel = &brw->intel;
453 struct brw_compact_instruction temp;
454
455 if (src->header.opcode == BRW_OPCODE_IF ||
456 src->header.opcode == BRW_OPCODE_ELSE ||
457 src->header.opcode == BRW_OPCODE_ENDIF ||
458 src->header.opcode == BRW_OPCODE_HALT ||
459 src->header.opcode == BRW_OPCODE_DO ||
460 src->header.opcode == BRW_OPCODE_WHILE) {
461 /* FINISHME: The fixup code below, and brw_set_uip_jip and friends, needs
462 * to be able to handle compacted flow control instructions..
463 */
464 return false;
465 }
466
467 /* FINISHME: immediates */
468 if (src->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE ||
469 src->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE)
470 return false;
471
472 memset(&temp, 0, sizeof(temp));
473
474 temp.dw0.opcode = src->header.opcode;
475 temp.dw0.debug_control = src->header.debug_control;
476 if (!set_control_index(intel, &temp, src))
477 return false;
478 if (!set_datatype_index(&temp, src))
479 return false;
480 if (!set_subreg_index(&temp, src))
481 return false;
482 temp.dw0.acc_wr_control = src->header.acc_wr_control;
483 temp.dw0.conditionalmod = src->header.destreg__conditionalmod;
484 if (intel->gen <= 6)
485 temp.dw0.flag_subreg_nr = src->bits2.da1.flag_subreg_nr;
486 temp.dw0.cmpt_ctrl = 1;
487 if (!set_src0_index(&temp, src))
488 return false;
489 if (!set_src1_index(&temp, src))
490 return false;
491 temp.dw1.dst_reg_nr = src->bits1.da1.dest_reg_nr;
492 temp.dw1.src0_reg_nr = src->bits2.da1.src0_reg_nr;
493 temp.dw1.src1_reg_nr = src->bits3.da1.src1_reg_nr;
494
495 *dst = temp;
496
497 return true;
498 }
499
500 static void
501 set_uncompacted_control(struct intel_context *intel,
502 struct brw_instruction *dst,
503 struct brw_compact_instruction *src)
504 {
505 uint32_t *dst_u32 = (uint32_t *)dst;
506 uint32_t uncompacted = control_index_table[src->dw0.control_index];
507
508 dst_u32[0] |= ((uncompacted >> 0) & 0xffff) << 8;
509 dst_u32[0] |= ((uncompacted >> 16) & 0x1) << 31;
510
511 if (intel->gen >= 7)
512 dst_u32[2] |= ((uncompacted >> 17) & 0x3) << 25;
513 }
514
515 static void
516 set_uncompacted_datatype(struct brw_instruction *dst,
517 struct brw_compact_instruction *src)
518 {
519 uint32_t uncompacted = datatype_table[src->dw0.data_type_index];
520
521 dst->bits1.ud &= ~(0x7 << 29);
522 dst->bits1.ud |= ((uncompacted >> 15) & 0x7) << 29;
523 dst->bits1.ud &= ~0x7fff;
524 dst->bits1.ud |= uncompacted & 0x7fff;
525 }
526
527 static void
528 set_uncompacted_subreg(struct brw_instruction *dst,
529 struct brw_compact_instruction *src)
530 {
531 uint32_t uncompacted = subreg_table[src->dw0.sub_reg_index];
532
533 dst->bits1.da1.dest_subreg_nr = (uncompacted >> 0) & 0x1f;
534 dst->bits2.da1.src0_subreg_nr = (uncompacted >> 5) & 0x1f;
535 dst->bits3.da1.src1_subreg_nr = (uncompacted >> 10) & 0x1f;
536 }
537
538 static void
539 set_uncompacted_src0(struct brw_instruction *dst,
540 struct brw_compact_instruction *src)
541 {
542 uint32_t compacted = src->dw0.src0_index | src->dw1.src0_index << 2;
543 uint32_t uncompacted = src_index_table[compacted];
544
545 dst->bits2.ud |= uncompacted << 13;
546 }
547
548 static void
549 set_uncompacted_src1(struct brw_instruction *dst,
550 struct brw_compact_instruction *src)
551 {
552 uint32_t uncompacted = src_index_table[src->dw1.src1_index];
553
554 dst->bits3.ud |= uncompacted << 13;
555 }
556
557 void
558 brw_uncompact_instruction(struct intel_context *intel,
559 struct brw_instruction *dst,
560 struct brw_compact_instruction *src)
561 {
562 memset(dst, 0, sizeof(*dst));
563
564 dst->header.opcode = src->dw0.opcode;
565 dst->header.debug_control = src->dw0.debug_control;
566
567 set_uncompacted_control(intel, dst, src);
568 set_uncompacted_datatype(dst, src);
569 set_uncompacted_subreg(dst, src);
570 dst->header.acc_wr_control = src->dw0.acc_wr_control;
571 dst->header.destreg__conditionalmod = src->dw0.conditionalmod;
572 if (intel->gen <= 6)
573 dst->bits2.da1.flag_subreg_nr = src->dw0.flag_subreg_nr;
574 set_uncompacted_src0(dst, src);
575 set_uncompacted_src1(dst, src);
576 dst->bits1.da1.dest_reg_nr = src->dw1.dst_reg_nr;
577 dst->bits2.da1.src0_reg_nr = src->dw1.src0_reg_nr;
578 dst->bits3.da1.src1_reg_nr = src->dw1.src1_reg_nr;
579 }
580
581 void brw_debug_compact_uncompact(struct intel_context *intel,
582 struct brw_instruction *orig,
583 struct brw_instruction *uncompacted)
584 {
585 fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n",
586 intel->gen);
587
588 fprintf(stderr, " before: ");
589 brw_disasm(stderr, orig, intel->gen);
590
591 fprintf(stderr, " after: ");
592 brw_disasm(stderr, uncompacted, intel->gen);
593
594 uint32_t *before_bits = (uint32_t *)orig;
595 uint32_t *after_bits = (uint32_t *)uncompacted;
596 printf(" changed bits:\n");
597 for (int i = 0; i < 128; i++) {
598 uint32_t before = before_bits[i / 32] & (1 << (i & 31));
599 uint32_t after = after_bits[i / 32] & (1 << (i & 31));
600
601 if (before != after) {
602 printf(" bit %d, %s to %s\n", i,
603 before ? "set" : "unset",
604 after ? "set" : "unset");
605 }
606 }
607 }
608
609 static int
610 compacted_between(int old_ip, int old_target_ip, int *compacted_counts)
611 {
612 int this_compacted_count = compacted_counts[old_ip];
613 int target_compacted_count = compacted_counts[old_target_ip];
614 return target_compacted_count - this_compacted_count;
615 }
616
617 static void
618 update_uip_jip(struct brw_instruction *insn, int this_old_ip,
619 int *compacted_counts)
620 {
621 int target_old_ip;
622
623 target_old_ip = this_old_ip + insn->bits3.break_cont.jip;
624 insn->bits3.break_cont.jip -= compacted_between(this_old_ip,
625 target_old_ip,
626 compacted_counts);
627
628 target_old_ip = this_old_ip + insn->bits3.break_cont.uip;
629 insn->bits3.break_cont.uip -= compacted_between(this_old_ip,
630 target_old_ip,
631 compacted_counts);
632 }
633
634 void
635 brw_init_compaction_tables(struct intel_context *intel)
636 {
637 assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0);
638 assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0);
639 assert(gen6_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0);
640 assert(gen6_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0);
641 assert(gen7_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0);
642 assert(gen7_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0);
643 assert(gen7_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0);
644 assert(gen7_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0);
645
646 switch (intel->gen) {
647 case 7:
648 control_index_table = gen7_control_index_table;
649 datatype_table = gen7_datatype_table;
650 subreg_table = gen7_subreg_table;
651 src_index_table = gen7_src_index_table;
652 break;
653 case 6:
654 control_index_table = gen6_control_index_table;
655 datatype_table = gen6_datatype_table;
656 subreg_table = gen6_subreg_table;
657 src_index_table = gen6_src_index_table;
658 break;
659 default:
660 return;
661 }
662 }
663
664 void
665 brw_compact_instructions(struct brw_compile *p)
666 {
667 struct brw_context *brw = p->brw;
668 struct intel_context *intel = &brw->intel;
669 void *store = p->store;
670 /* For an instruction at byte offset 8*i before compaction, this is the number
671 * of compacted instructions that preceded it.
672 */
673 int compacted_counts[p->next_insn_offset / 8];
674 /* For an instruction at byte offset 8*i after compaction, this is the
675 * 8-byte offset it was at before compaction.
676 */
677 int old_ip[p->next_insn_offset / 8];
678
679 if (intel->gen < 6)
680 return;
681
682 int src_offset;
683 int offset = 0;
684 int compacted_count = 0;
685 for (src_offset = 0; src_offset < p->nr_insn * 16;) {
686 struct brw_instruction *src = store + src_offset;
687 void *dst = store + offset;
688
689 old_ip[offset / 8] = src_offset / 8;
690 compacted_counts[src_offset / 8] = compacted_count;
691
692 struct brw_instruction saved = *src;
693
694 if (!src->header.cmpt_control &&
695 brw_try_compact_instruction(p, dst, src)) {
696 compacted_count++;
697
698 if (INTEL_DEBUG) {
699 struct brw_instruction uncompacted;
700 brw_uncompact_instruction(intel, &uncompacted, dst);
701 if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {
702 brw_debug_compact_uncompact(intel, &saved, &uncompacted);
703 }
704 }
705
706 offset += 8;
707 src_offset += 16;
708 } else {
709 int size = src->header.cmpt_control ? 8 : 16;
710
711 /* It appears that the end of thread SEND instruction needs to be
712 * aligned, or the GPU hangs.
713 */
714 if ((src->header.opcode == BRW_OPCODE_SEND ||
715 src->header.opcode == BRW_OPCODE_SENDC) &&
716 src->bits3.generic.end_of_thread &&
717 (offset & 8) != 0) {
718 struct brw_compact_instruction *align = store + offset;
719 memset(align, 0, sizeof(*align));
720 align->dw0.opcode = BRW_OPCODE_NOP;
721 align->dw0.cmpt_ctrl = 1;
722 offset += 8;
723 old_ip[offset / 8] = src_offset / 8;
724 dst = store + offset;
725 }
726
727 /* If we didn't compact this intruction, we need to move it down into
728 * place.
729 */
730 if (offset != src_offset) {
731 memmove(dst, src, size);
732 }
733 offset += size;
734 src_offset += size;
735 }
736 }
737
738 /* Fix up control flow offsets. */
739 p->next_insn_offset = offset;
740 for (offset = 0; offset < p->next_insn_offset;) {
741 struct brw_instruction *insn = store + offset;
742 int this_old_ip = old_ip[offset / 8];
743 int this_compacted_count = compacted_counts[this_old_ip];
744 int target_old_ip, target_compacted_count;
745
746 switch (insn->header.opcode) {
747 case BRW_OPCODE_BREAK:
748 case BRW_OPCODE_CONTINUE:
749 case BRW_OPCODE_HALT:
750 update_uip_jip(insn, this_old_ip, compacted_counts);
751 break;
752
753 case BRW_OPCODE_IF:
754 case BRW_OPCODE_ELSE:
755 case BRW_OPCODE_ENDIF:
756 case BRW_OPCODE_WHILE:
757 if (intel->gen == 6) {
758 target_old_ip = this_old_ip + insn->bits1.branch_gen6.jump_count;
759 target_compacted_count = compacted_counts[target_old_ip];
760 insn->bits1.branch_gen6.jump_count -= (target_compacted_count -
761 this_compacted_count);
762 } else {
763 update_uip_jip(insn, this_old_ip, compacted_counts);
764 }
765 break;
766 }
767
768 if (insn->header.cmpt_control) {
769 offset += 8;
770 } else {
771 offset += 16;
772 }
773 }
774
775 /* p->nr_insn is counting the number of uncompacted instructions still, so
776 * divide. We do want to be sure there's a valid instruction in any
777 * alignment padding, so that the next compression pass (for the FS 8/16
778 * compile passes) parses correctly.
779 */
780 if (p->next_insn_offset & 8) {
781 struct brw_compact_instruction *align = store + offset;
782 memset(align, 0, sizeof(*align));
783 align->dw0.opcode = BRW_OPCODE_NOP;
784 align->dw0.cmpt_ctrl = 1;
785 p->next_insn_offset += 8;
786 }
787 p->nr_insn = p->next_insn_offset / 16;
788
789 if (0) {
790 fprintf(stdout, "dumping compacted program\n");
791 brw_dump_compile(p, stdout, 0, p->next_insn_offset);
792
793 int cmp = 0;
794 for (offset = 0; offset < p->next_insn_offset;) {
795 struct brw_instruction *insn = store + offset;
796
797 if (insn->header.cmpt_control) {
798 offset += 8;
799 cmp++;
800 } else {
801 offset += 16;
802 }
803 }
804 fprintf(stderr, "%db/%db saved (%d%%)\n", cmp * 8, offset + cmp * 8,
805 cmp * 8 * 100 / (offset + cmp * 8));
806 }
807 }