25a96e7641c5e9a8a99c5d5c8c10f16773456d58
[mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_compact.c
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_eu_compact.c
25 *
26 * Instruction compaction is a feature of gm45 and newer hardware that allows
27 * for a smaller instruction encoding.
28 *
29 * The instruction cache is on the order of 32KB, and many programs generate
30 * far more instructions than that. The instruction cache is built to barely
31 * keep up with instruction dispatch abaility in cache hit cases -- L1
32 * instruction cache misses that still hit in the next level could limit
33 * throughput by around 50%.
34 *
35 * The idea of instruction compaction is that most instructions use a tiny
36 * subset of the GPU functionality, so we can encode what would be a 16 byte
37 * instruction in 8 bytes using some lookup tables for various fields.
38 */
39
40 #include "brw_context.h"
41 #include "brw_eu.h"
42 #include "intel_asm_annotation.h"
43
44 static const uint32_t gen6_control_index_table[32] = {
45 0b00000000000000000,
46 0b01000000000000000,
47 0b00110000000000000,
48 0b00000000100000000,
49 0b00010000000000000,
50 0b00001000100000000,
51 0b00000000100000010,
52 0b00000000000000010,
53 0b01000000100000000,
54 0b01010000000000000,
55 0b10110000000000000,
56 0b00100000000000000,
57 0b11010000000000000,
58 0b11000000000000000,
59 0b01001000100000000,
60 0b01000000000001000,
61 0b01000000000000100,
62 0b00000000000001000,
63 0b00000000000000100,
64 0b00111000100000000,
65 0b00001000100000010,
66 0b00110000100000000,
67 0b00110000000000001,
68 0b00100000000000001,
69 0b00110000000000010,
70 0b00110000000000101,
71 0b00110000000001001,
72 0b00110000000010000,
73 0b00110000000000011,
74 0b00110000000000100,
75 0b00110000100001000,
76 0b00100000000001001
77 };
78
79 static const uint32_t gen6_datatype_table[32] = {
80 0b001001110000000000,
81 0b001000110000100000,
82 0b001001110000000001,
83 0b001000000001100000,
84 0b001010110100101001,
85 0b001000000110101101,
86 0b001100011000101100,
87 0b001011110110101101,
88 0b001000000111101100,
89 0b001000000001100001,
90 0b001000110010100101,
91 0b001000000001000001,
92 0b001000001000110001,
93 0b001000001000101001,
94 0b001000000000100000,
95 0b001000001000110010,
96 0b001010010100101001,
97 0b001011010010100101,
98 0b001000000110100101,
99 0b001100011000101001,
100 0b001011011000101100,
101 0b001011010110100101,
102 0b001011110110100101,
103 0b001111011110111101,
104 0b001111011110111100,
105 0b001111011110111101,
106 0b001111011110011101,
107 0b001111011110111110,
108 0b001000000000100001,
109 0b001000000000100010,
110 0b001001111111011101,
111 0b001000001110111110,
112 };
113
114 static const uint16_t gen6_subreg_table[32] = {
115 0b000000000000000,
116 0b000000000000100,
117 0b000000110000000,
118 0b111000000000000,
119 0b011110000001000,
120 0b000010000000000,
121 0b000000000010000,
122 0b000110000001100,
123 0b001000000000000,
124 0b000001000000000,
125 0b000001010010100,
126 0b000000001010110,
127 0b010000000000000,
128 0b110000000000000,
129 0b000100000000000,
130 0b000000010000000,
131 0b000000000001000,
132 0b100000000000000,
133 0b000001010000000,
134 0b001010000000000,
135 0b001100000000000,
136 0b000000001010100,
137 0b101101010010100,
138 0b010100000000000,
139 0b000000010001111,
140 0b011000000000000,
141 0b111110000000000,
142 0b101000000000000,
143 0b000000000001111,
144 0b000100010001111,
145 0b001000010001111,
146 0b000110000000000,
147 };
148
149 static const uint16_t gen6_src_index_table[32] = {
150 0b000000000000,
151 0b010110001000,
152 0b010001101000,
153 0b001000101000,
154 0b011010010000,
155 0b000100100000,
156 0b010001101100,
157 0b010101110000,
158 0b011001111000,
159 0b001100101000,
160 0b010110001100,
161 0b001000100000,
162 0b010110001010,
163 0b000000000010,
164 0b010101010000,
165 0b010101101000,
166 0b111101001100,
167 0b111100101100,
168 0b011001110000,
169 0b010110001001,
170 0b010101011000,
171 0b001101001000,
172 0b010000101100,
173 0b010000000000,
174 0b001101110000,
175 0b001100010000,
176 0b001100000000,
177 0b010001101010,
178 0b001101111000,
179 0b000001110000,
180 0b001100100000,
181 0b001101010000,
182 };
183
184 static const uint32_t gen7_control_index_table[32] = {
185 0b0000000000000000010,
186 0b0000100000000000000,
187 0b0000100000000000001,
188 0b0000100000000000010,
189 0b0000100000000000011,
190 0b0000100000000000100,
191 0b0000100000000000101,
192 0b0000100000000000111,
193 0b0000100000000001000,
194 0b0000100000000001001,
195 0b0000100000000001101,
196 0b0000110000000000000,
197 0b0000110000000000001,
198 0b0000110000000000010,
199 0b0000110000000000011,
200 0b0000110000000000100,
201 0b0000110000000000101,
202 0b0000110000000000111,
203 0b0000110000000001001,
204 0b0000110000000001101,
205 0b0000110000000010000,
206 0b0000110000100000000,
207 0b0001000000000000000,
208 0b0001000000000000010,
209 0b0001000000000000100,
210 0b0001000000100000000,
211 0b0010110000000000000,
212 0b0010110000000010000,
213 0b0011000000000000000,
214 0b0011000000100000000,
215 0b0101000000000000000,
216 0b0101000000100000000
217 };
218
219 static const uint32_t gen7_datatype_table[32] = {
220 0b001000000000000001,
221 0b001000000000100000,
222 0b001000000000100001,
223 0b001000000001100001,
224 0b001000000010111101,
225 0b001000001011111101,
226 0b001000001110100001,
227 0b001000001110100101,
228 0b001000001110111101,
229 0b001000010000100001,
230 0b001000110000100000,
231 0b001000110000100001,
232 0b001001010010100101,
233 0b001001110010100100,
234 0b001001110010100101,
235 0b001111001110111101,
236 0b001111011110011101,
237 0b001111011110111100,
238 0b001111011110111101,
239 0b001111111110111100,
240 0b000000001000001100,
241 0b001000000000111101,
242 0b001000000010100101,
243 0b001000010000100000,
244 0b001001010010100100,
245 0b001001110010000100,
246 0b001010010100001001,
247 0b001101111110111101,
248 0b001111111110111101,
249 0b001011110110101100,
250 0b001010010100101000,
251 0b001010110100101000
252 };
253
254 static const uint16_t gen7_subreg_table[32] = {
255 0b000000000000000,
256 0b000000000000001,
257 0b000000000001000,
258 0b000000000001111,
259 0b000000000010000,
260 0b000000010000000,
261 0b000000100000000,
262 0b000000110000000,
263 0b000001000000000,
264 0b000001000010000,
265 0b000010100000000,
266 0b001000000000000,
267 0b001000000000001,
268 0b001000010000001,
269 0b001000010000010,
270 0b001000010000011,
271 0b001000010000100,
272 0b001000010000111,
273 0b001000010001000,
274 0b001000010001110,
275 0b001000010001111,
276 0b001000110000000,
277 0b001000111101000,
278 0b010000000000000,
279 0b010000110000000,
280 0b011000000000000,
281 0b011110010000111,
282 0b100000000000000,
283 0b101000000000000,
284 0b110000000000000,
285 0b111000000000000,
286 0b111000000011100
287 };
288
289 static const uint16_t gen7_src_index_table[32] = {
290 0b000000000000,
291 0b000000000010,
292 0b000000010000,
293 0b000000010010,
294 0b000000011000,
295 0b000000100000,
296 0b000000101000,
297 0b000001001000,
298 0b000001010000,
299 0b000001110000,
300 0b000001111000,
301 0b001100000000,
302 0b001100000010,
303 0b001100001000,
304 0b001100010000,
305 0b001100010010,
306 0b001100100000,
307 0b001100101000,
308 0b001100111000,
309 0b001101000000,
310 0b001101000010,
311 0b001101001000,
312 0b001101010000,
313 0b001101100000,
314 0b001101101000,
315 0b001101110000,
316 0b001101110001,
317 0b001101111000,
318 0b010001101000,
319 0b010001101001,
320 0b010001101010,
321 0b010110001000
322 };
323
324 static const uint32_t *control_index_table;
325 static const uint32_t *datatype_table;
326 static const uint16_t *subreg_table;
327 static const uint16_t *src_index_table;
328
329 static bool
330 set_control_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src)
331 {
332 uint32_t uncompacted = /* 17b/SNB; 19b/IVB+ */
333 (brw_inst_bits(src, 31, 31) << 16) | /* 1b */
334 (brw_inst_bits(src, 23, 8)); /* 16b */
335
336 /* On gen7, the flag register and subregister numbers are integrated into
337 * the control index.
338 */
339 if (brw->gen >= 7)
340 uncompacted |= brw_inst_bits(src, 90, 89) << 17; /* 2b */
341
342 for (int i = 0; i < 32; i++) {
343 if (control_index_table[i] == uncompacted) {
344 brw_compact_inst_set_control_index(dst, i);
345 return true;
346 }
347 }
348
349 return false;
350 }
351
352 static bool
353 set_datatype_index(struct brw_context *brw, brw_compact_inst *dst,
354 brw_inst *src)
355 {
356 uint32_t uncompacted = /* 18b */
357 (brw_inst_bits(src, 63, 61) << 15) | /* 3b */
358 (brw_inst_bits(src, 46, 32)); /* 15b */
359
360 for (int i = 0; i < 32; i++) {
361 if (datatype_table[i] == uncompacted) {
362 brw_compact_inst_set_datatype_index(dst, i);
363 return true;
364 }
365 }
366
367 return false;
368 }
369
370 static bool
371 set_subreg_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src,
372 bool is_immediate)
373 {
374 uint16_t uncompacted = /* 15b */
375 (brw_inst_bits(src, 52, 48) << 0) | /* 5b */
376 (brw_inst_bits(src, 68, 64) << 5); /* 5b */
377
378 if (!is_immediate)
379 uncompacted |= brw_inst_bits(src, 100, 96) << 10; /* 5b */
380
381 for (int i = 0; i < 32; i++) {
382 if (subreg_table[i] == uncompacted) {
383 brw_compact_inst_set_subreg_index(dst, i);
384 return true;
385 }
386 }
387
388 return false;
389 }
390
391 static bool
392 get_src_index(uint16_t uncompacted,
393 uint16_t *compacted)
394 {
395 for (int i = 0; i < 32; i++) {
396 if (src_index_table[i] == uncompacted) {
397 *compacted = i;
398 return true;
399 }
400 }
401
402 return false;
403 }
404
405 static bool
406 set_src0_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src)
407 {
408 uint16_t compacted;
409 uint16_t uncompacted = brw_inst_bits(src, 88, 77); /* 12b */
410
411 if (!get_src_index(uncompacted, &compacted))
412 return false;
413
414 brw_compact_inst_set_src0_index(dst, compacted);
415
416 return true;
417 }
418
419 static bool
420 set_src1_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src,
421 bool is_immediate)
422 {
423 uint16_t compacted;
424
425 if (is_immediate) {
426 compacted = (brw_inst_imm_ud(brw, src) >> 8) & 0x1f;
427 } else {
428 uint16_t uncompacted = brw_inst_bits(src, 120, 109); /* 12b */
429
430 if (!get_src_index(uncompacted, &compacted))
431 return false;
432 }
433
434 brw_compact_inst_set_src1_index(dst, compacted);
435
436 return true;
437 }
438
439 /* Compacted instructions have 12-bits for immediate sources, and a 13th bit
440 * that's replicated through the high 20 bits.
441 *
442 * Effectively this means we get 12-bit integers, 0.0f, and some limited uses
443 * of packed vectors as compactable immediates.
444 */
445 static bool
446 is_compactable_immediate(unsigned imm)
447 {
448 /* We get the low 12 bits as-is. */
449 imm &= ~0xfff;
450
451 /* We get one bit replicated through the top 20 bits. */
452 return imm == 0 || imm == 0xfffff000;
453 }
454
455 /**
456 * Tries to compact instruction src into dst.
457 *
458 * It doesn't modify dst unless src is compactable, which is relied on by
459 * brw_compact_instructions().
460 */
461 bool
462 brw_try_compact_instruction(struct brw_context *brw, brw_compact_inst *dst,
463 brw_inst *src)
464 {
465 brw_compact_inst temp;
466
467 if (brw_inst_opcode(brw, src) == BRW_OPCODE_IF ||
468 brw_inst_opcode(brw, src) == BRW_OPCODE_ELSE ||
469 brw_inst_opcode(brw, src) == BRW_OPCODE_ENDIF ||
470 brw_inst_opcode(brw, src) == BRW_OPCODE_HALT ||
471 brw_inst_opcode(brw, src) == BRW_OPCODE_DO ||
472 brw_inst_opcode(brw, src) == BRW_OPCODE_WHILE) {
473 /* FINISHME: The fixup code below, and brw_set_uip_jip and friends, needs
474 * to be able to handle compacted flow control instructions..
475 */
476 return false;
477 }
478
479 bool is_immediate =
480 brw_inst_src0_reg_file(brw, src) == BRW_IMMEDIATE_VALUE ||
481 brw_inst_src1_reg_file(brw, src) == BRW_IMMEDIATE_VALUE;
482 if (is_immediate && !is_compactable_immediate(brw_inst_imm_ud(brw, src))) {
483 return false;
484 }
485
486 memset(&temp, 0, sizeof(temp));
487
488 brw_compact_inst_set_opcode(&temp, brw_inst_opcode(brw, src));
489 brw_compact_inst_set_debug_control(&temp, brw_inst_debug_control(brw, src));
490 if (!set_control_index(brw, &temp, src))
491 return false;
492 if (!set_datatype_index(brw, &temp, src))
493 return false;
494 if (!set_subreg_index(brw, &temp, src, is_immediate))
495 return false;
496 brw_compact_inst_set_acc_wr_control(&temp,
497 brw_inst_acc_wr_control(brw, src));
498 brw_compact_inst_set_cond_modifier(&temp, brw_inst_cond_modifier(brw, src));
499 if (brw->gen <= 6)
500 brw_compact_inst_set_flag_subreg_nr(&temp,
501 brw_inst_flag_subreg_nr(brw, src));
502 brw_compact_inst_set_cmpt_control(&temp, true);
503 if (!set_src0_index(brw, &temp, src))
504 return false;
505 if (!set_src1_index(brw, &temp, src, is_immediate))
506 return false;
507 brw_compact_inst_set_dst_reg_nr(&temp, brw_inst_dst_da_reg_nr(brw, src));
508 brw_compact_inst_set_src0_reg_nr(&temp, brw_inst_src0_da_reg_nr(brw, src));
509 if (is_immediate) {
510 brw_compact_inst_set_src1_reg_nr(&temp, brw_inst_imm_ud(brw, src) & 0xff);
511 } else {
512 brw_compact_inst_set_src1_reg_nr(&temp,
513 brw_inst_src1_da_reg_nr(brw, src));
514 }
515
516 *dst = temp;
517
518 return true;
519 }
520
521 static void
522 set_uncompacted_control(struct brw_context *brw, brw_inst *dst,
523 brw_compact_inst *src)
524 {
525 uint32_t uncompacted =
526 control_index_table[brw_compact_inst_control_index(src)];
527
528 brw_inst_set_bits(dst, 31, 31, (uncompacted >> 16) & 0x1);
529 brw_inst_set_bits(dst, 23, 8, (uncompacted & 0xffff));
530
531 if (brw->gen >= 7)
532 brw_inst_set_bits(dst, 90, 89, uncompacted >> 17);
533 }
534
535 static void
536 set_uncompacted_datatype(struct brw_context *brw, brw_inst *dst,
537 brw_compact_inst *src)
538 {
539 uint32_t uncompacted = datatype_table[brw_compact_inst_datatype_index(src)];
540
541 brw_inst_set_bits(dst, 63, 61, (uncompacted >> 15));
542 brw_inst_set_bits(dst, 46, 32, (uncompacted & 0x7fff));
543 }
544
545 static void
546 set_uncompacted_subreg(struct brw_context *brw, brw_inst *dst,
547 brw_compact_inst *src)
548 {
549 uint16_t uncompacted = subreg_table[brw_compact_inst_subreg_index(src)];
550
551 brw_inst_set_bits(dst, 100, 96, (uncompacted >> 10));
552 brw_inst_set_bits(dst, 68, 64, (uncompacted >> 5) & 0x1f);
553 brw_inst_set_bits(dst, 52, 48, (uncompacted >> 0) & 0x1f);
554 }
555
556 static void
557 set_uncompacted_src0(struct brw_context *brw, brw_inst *dst,
558 brw_compact_inst *src)
559 {
560 uint32_t compacted = brw_compact_inst_src0_index(src);
561 uint16_t uncompacted = src_index_table[compacted];
562
563 brw_inst_set_bits(dst, 88, 77, uncompacted);
564 }
565
566 static void
567 set_uncompacted_src1(struct brw_context *brw, brw_inst *dst,
568 brw_compact_inst *src, bool is_immediate)
569 {
570 if (is_immediate) {
571 signed high5 = brw_compact_inst_src1_index(src);
572 /* Replicate top bit of src1_index into high 20 bits of the immediate. */
573 brw_inst_set_imm_ud(brw, dst, (high5 << 27) >> 19);
574 } else {
575 uint16_t uncompacted = src_index_table[brw_compact_inst_src1_index(src)];
576
577 brw_inst_set_bits(dst, 120, 109, uncompacted);
578 }
579 }
580
581 void
582 brw_uncompact_instruction(struct brw_context *brw, brw_inst *dst,
583 brw_compact_inst *src)
584 {
585 memset(dst, 0, sizeof(*dst));
586
587 brw_inst_set_opcode(brw, dst, brw_compact_inst_opcode(src));
588 brw_inst_set_debug_control(brw, dst, brw_compact_inst_debug_control(src));
589
590 set_uncompacted_control(brw, dst, src);
591 set_uncompacted_datatype(brw, dst, src);
592
593 /* src0/1 register file fields are in the datatype table. */
594 bool is_immediate = brw_inst_src0_reg_file(brw, dst) == BRW_IMMEDIATE_VALUE ||
595 brw_inst_src1_reg_file(brw, dst) == BRW_IMMEDIATE_VALUE;
596
597 set_uncompacted_subreg(brw, dst, src);
598 brw_inst_set_acc_wr_control(brw, dst, brw_compact_inst_acc_wr_control(src));
599 brw_inst_set_cond_modifier(brw, dst, brw_compact_inst_cond_modifier(src));
600 if (brw->gen <= 6)
601 brw_inst_set_flag_subreg_nr(brw, dst,
602 brw_compact_inst_flag_subreg_nr(src));
603 set_uncompacted_src0(brw, dst, src);
604 set_uncompacted_src1(brw, dst, src, is_immediate);
605 brw_inst_set_dst_da_reg_nr(brw, dst, brw_compact_inst_dst_reg_nr(src));
606 brw_inst_set_src0_da_reg_nr(brw, dst, brw_compact_inst_src0_reg_nr(src));
607 if (is_immediate) {
608 brw_inst_set_imm_ud(brw, dst,
609 brw_inst_imm_ud(brw, dst) |
610 brw_compact_inst_src1_reg_nr(src));
611 } else {
612 brw_inst_set_src1_da_reg_nr(brw, dst, brw_compact_inst_src1_reg_nr(src));
613 }
614 }
615
616 void brw_debug_compact_uncompact(struct brw_context *brw,
617 brw_inst *orig,
618 brw_inst *uncompacted)
619 {
620 fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n",
621 brw->gen);
622
623 fprintf(stderr, " before: ");
624 brw_disassemble_inst(stderr, brw, orig, true);
625
626 fprintf(stderr, " after: ");
627 brw_disassemble_inst(stderr, brw, uncompacted, false);
628
629 uint32_t *before_bits = (uint32_t *)orig;
630 uint32_t *after_bits = (uint32_t *)uncompacted;
631 fprintf(stderr, " changed bits:\n");
632 for (int i = 0; i < 128; i++) {
633 uint32_t before = before_bits[i / 32] & (1 << (i & 31));
634 uint32_t after = after_bits[i / 32] & (1 << (i & 31));
635
636 if (before != after) {
637 fprintf(stderr, " bit %d, %s to %s\n", i,
638 before ? "set" : "unset",
639 after ? "set" : "unset");
640 }
641 }
642 }
643
644 static int
645 compacted_between(int old_ip, int old_target_ip, int *compacted_counts)
646 {
647 int this_compacted_count = compacted_counts[old_ip];
648 int target_compacted_count = compacted_counts[old_target_ip];
649 return target_compacted_count - this_compacted_count;
650 }
651
652 static void
653 update_uip_jip(struct brw_context *brw, brw_inst *insn,
654 int this_old_ip, int *compacted_counts)
655 {
656 int jip = brw_inst_jip(brw, insn);
657 jip -= compacted_between(this_old_ip, this_old_ip + jip, compacted_counts);
658 brw_inst_set_jip(brw, insn, jip);
659
660 if (brw_inst_opcode(brw, insn) == BRW_OPCODE_ENDIF ||
661 brw_inst_opcode(brw, insn) == BRW_OPCODE_WHILE)
662 return;
663
664 int uip = brw_inst_uip(brw, insn);
665 uip -= compacted_between(this_old_ip, this_old_ip + uip, compacted_counts);
666 brw_inst_set_uip(brw, insn, uip);
667 }
668
669 void
670 brw_init_compaction_tables(struct brw_context *brw)
671 {
672 assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0);
673 assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0);
674 assert(gen6_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0);
675 assert(gen6_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0);
676 assert(gen7_control_index_table[ARRAY_SIZE(gen7_control_index_table) - 1] != 0);
677 assert(gen7_datatype_table[ARRAY_SIZE(gen7_datatype_table) - 1] != 0);
678 assert(gen7_subreg_table[ARRAY_SIZE(gen7_subreg_table) - 1] != 0);
679 assert(gen7_src_index_table[ARRAY_SIZE(gen7_src_index_table) - 1] != 0);
680
681 switch (brw->gen) {
682 case 7:
683 control_index_table = gen7_control_index_table;
684 datatype_table = gen7_datatype_table;
685 subreg_table = gen7_subreg_table;
686 src_index_table = gen7_src_index_table;
687 break;
688 case 6:
689 control_index_table = gen6_control_index_table;
690 datatype_table = gen6_datatype_table;
691 subreg_table = gen6_subreg_table;
692 src_index_table = gen6_src_index_table;
693 break;
694 default:
695 return;
696 }
697 }
698
699 void
700 brw_compact_instructions(struct brw_compile *p, int start_offset,
701 int num_annotations, struct annotation *annotation)
702 {
703 struct brw_context *brw = p->brw;
704 void *store = p->store + start_offset / 16;
705 /* For an instruction at byte offset 8*i before compaction, this is the number
706 * of compacted instructions that preceded it.
707 */
708 int compacted_counts[(p->next_insn_offset - start_offset) / 8];
709 /* For an instruction at byte offset 8*i after compaction, this is the
710 * 8-byte offset it was at before compaction.
711 */
712 int old_ip[(p->next_insn_offset - start_offset) / 8];
713
714 if (brw->gen < 6 || brw->gen >= 8)
715 return;
716
717 int src_offset;
718 int offset = 0;
719 int compacted_count = 0;
720 for (src_offset = 0; src_offset < p->next_insn_offset - start_offset;) {
721 brw_inst *src = store + src_offset;
722 void *dst = store + offset;
723
724 old_ip[offset / 8] = src_offset / 8;
725 compacted_counts[src_offset / 8] = compacted_count;
726
727 brw_inst saved = *src;
728
729 if (!brw_inst_cmpt_control(brw, src) &&
730 brw_try_compact_instruction(brw, dst, src)) {
731 compacted_count++;
732
733 if (INTEL_DEBUG) {
734 brw_inst uncompacted;
735 brw_uncompact_instruction(brw, &uncompacted, dst);
736 if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {
737 brw_debug_compact_uncompact(brw, &saved, &uncompacted);
738 }
739 }
740
741 offset += 8;
742 src_offset += 16;
743 } else {
744 int size = brw_inst_cmpt_control(brw, src) ? 8 : 16;
745
746 /* It appears that the end of thread SEND instruction needs to be
747 * aligned, or the GPU hangs.
748 */
749 if ((brw_inst_opcode(brw, src) == BRW_OPCODE_SEND ||
750 brw_inst_opcode(brw, src) == BRW_OPCODE_SENDC) &&
751 brw_inst_eot(brw, src) &&
752 (offset & 8) != 0) {
753 brw_compact_inst *align = store + offset;
754 memset(align, 0, sizeof(*align));
755 brw_compact_inst_set_opcode(align, BRW_OPCODE_NOP);
756 brw_compact_inst_set_cmpt_control(align, true);
757 offset += 8;
758 old_ip[offset / 8] = src_offset / 8;
759 dst = store + offset;
760 }
761
762 /* If we didn't compact this intruction, we need to move it down into
763 * place.
764 */
765 if (offset != src_offset) {
766 memmove(dst, src, size);
767 }
768 offset += size;
769 src_offset += size;
770 }
771 }
772
773 /* Fix up control flow offsets. */
774 p->next_insn_offset = start_offset + offset;
775 for (offset = 0; offset < p->next_insn_offset - start_offset;) {
776 brw_inst *insn = store + offset;
777 int this_old_ip = old_ip[offset / 8];
778 int this_compacted_count = compacted_counts[this_old_ip];
779 int target_old_ip, target_compacted_count;
780
781 switch (brw_inst_opcode(brw, insn)) {
782 case BRW_OPCODE_BREAK:
783 case BRW_OPCODE_CONTINUE:
784 case BRW_OPCODE_HALT:
785 update_uip_jip(brw, insn, this_old_ip, compacted_counts);
786 break;
787
788 case BRW_OPCODE_IF:
789 case BRW_OPCODE_ELSE:
790 case BRW_OPCODE_ENDIF:
791 case BRW_OPCODE_WHILE:
792 if (brw->gen >= 7) {
793 update_uip_jip(brw, insn, this_old_ip, compacted_counts);
794 } else if (brw->gen == 6) {
795 int gen6_jump_count = brw_inst_gen6_jump_count(brw, insn);
796 target_old_ip = this_old_ip + gen6_jump_count;
797 target_compacted_count = compacted_counts[target_old_ip];
798 gen6_jump_count -= (target_compacted_count - this_compacted_count);
799 brw_inst_set_gen6_jump_count(brw, insn, gen6_jump_count);
800 }
801 break;
802 }
803
804 offset = next_offset(brw, store, offset);
805 }
806
807 /* p->nr_insn is counting the number of uncompacted instructions still, so
808 * divide. We do want to be sure there's a valid instruction in any
809 * alignment padding, so that the next compression pass (for the FS 8/16
810 * compile passes) parses correctly.
811 */
812 if (p->next_insn_offset & 8) {
813 brw_compact_inst *align = store + offset;
814 memset(align, 0, sizeof(*align));
815 brw_compact_inst_set_opcode(align, BRW_OPCODE_NOP);
816 brw_compact_inst_set_cmpt_control(align, true);
817 p->next_insn_offset += 8;
818 }
819 p->nr_insn = p->next_insn_offset / 16;
820
821 /* Update the instruction offsets for each annotation. */
822 if (annotation) {
823 for (int offset = 0, i = 0; i < num_annotations; i++) {
824 while (start_offset + old_ip[offset / 8] * 8 != annotation[i].offset) {
825 assert(start_offset + old_ip[offset / 8] * 8 <
826 annotation[i].offset);
827 offset = next_offset(brw, store, offset);
828 }
829
830 annotation[i].offset = start_offset + offset;
831
832 offset = next_offset(brw, store, offset);
833 }
834
835 annotation[num_annotations].offset = p->next_insn_offset;
836 }
837 }