2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 /** @file gen8_generator.cpp
26 * Code generation for Gen8+ hardware, replacing the brw_eu_emit.c layer.
30 #include "main/compiler.h"
31 #include "main/macros.h"
32 #include "brw_context.h"
35 #include "glsl/ralloc.h"
38 #include "gen8_generator.h"
40 gen8_generator::gen8_generator(struct brw_context
*brw
,
41 struct gl_shader_program
*shader_prog
,
42 struct gl_program
*prog
,
44 : shader_prog(shader_prog
), prog(prog
), brw(brw
), mem_ctx(mem_ctx
)
48 memset(&default_state
, 0, sizeof(default_state
));
49 default_state
.mask_control
= BRW_MASK_ENABLE
;
52 store
= rzalloc_array(mem_ctx
, gen8_instruction
, store_size
);
56 /* Set up the control flow stacks. */
58 if_stack_array_size
= 16;
59 if_stack
= rzalloc_array(mem_ctx
, int, if_stack_array_size
);
62 loop_stack_array_size
= 16;
63 loop_stack
= rzalloc_array(mem_ctx
, int, loop_stack_array_size
);
66 gen8_generator::~gen8_generator()
71 gen8_generator::next_inst(unsigned opcode
)
73 gen8_instruction
*inst
;
75 if (nr_inst
+ 1 > unsigned(store_size
)) {
77 store
= reralloc(mem_ctx
, store
, gen8_instruction
, store_size
);
81 next_inst_offset
+= 16;
82 inst
= &store
[nr_inst
++];
84 memset(inst
, 0, sizeof(gen8_instruction
));
86 gen8_set_opcode(inst
, opcode
);
87 gen8_set_exec_size(inst
, default_state
.exec_size
);
88 gen8_set_access_mode(inst
, default_state
.access_mode
);
89 gen8_set_mask_control(inst
, default_state
.mask_control
);
90 gen8_set_cond_modifier(inst
, default_state
.conditional_mod
);
91 gen8_set_pred_control(inst
, default_state
.predicate
);
92 gen8_set_pred_inv(inst
, default_state
.predicate_inverse
);
93 gen8_set_saturate(inst
, default_state
.saturate
);
94 gen8_set_flag_subreg_nr(inst
, default_state
.flag_subreg_nr
);
100 gen8_generator::OP(struct brw_reg dst, struct brw_reg src) \
102 gen8_instruction *inst = next_inst(BRW_OPCODE_##OP); \
103 gen8_set_dst(brw, inst, dst); \
104 gen8_set_src0(brw, inst, src); \
110 gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, struct brw_reg s1) \
112 gen8_instruction *inst = next_inst(BRW_OPCODE_##OP); \
113 gen8_set_dst(brw, inst, dst); \
114 gen8_set_src0(brw, inst, s0); \
115 gen8_set_src1(brw, inst, s1); \
119 #define ALU2_ACCUMULATE(OP) \
121 gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, struct brw_reg s1) \
123 gen8_instruction *inst = next_inst(BRW_OPCODE_##OP); \
124 gen8_set_dst(brw, inst, dst); \
125 gen8_set_src0(brw, inst, s0); \
126 gen8_set_src1(brw, inst, s1); \
127 gen8_set_acc_wr_control(inst, true); \
133 gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, \
134 struct brw_reg s1, struct brw_reg s2) \
136 return alu3(BRW_OPCODE_##OP, dst, s0, s1, s2); \
141 gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, \
142 struct brw_reg s1, struct brw_reg s2) \
144 assert(dst.type == BRW_REGISTER_TYPE_F); \
145 assert(s0.type == BRW_REGISTER_TYPE_F); \
146 assert(s1.type == BRW_REGISTER_TYPE_F); \
147 assert(s2.type == BRW_REGISTER_TYPE_F); \
148 return alu3(BRW_OPCODE_##OP, dst, s0, s1, s2); \
159 ALU2_ACCUMULATE(ADDC
)
160 ALU2_ACCUMULATE(SUBB
)
180 ALU2_ACCUMULATE(MACH
)
187 gen8_generator::CMP(struct brw_reg dst
, unsigned conditional
,
188 struct brw_reg src0
, struct brw_reg src1
)
190 gen8_instruction
*inst
= next_inst(BRW_OPCODE_CMP
);
191 gen8_set_cond_modifier(inst
, conditional
);
192 /* The CMP instruction appears to behave erratically for floating point
193 * sources unless the destination type is also float. Overriding it to
194 * match src0 makes it work in all cases.
196 dst
.type
= src0
.type
;
197 gen8_set_dst(brw
, inst
, dst
);
198 gen8_set_src0(brw
, inst
, src0
);
199 gen8_set_src1(brw
, inst
, src1
);
204 get_3src_subreg_nr(struct brw_reg reg
)
206 if (reg
.vstride
== BRW_VERTICAL_STRIDE_0
) {
207 assert(brw_is_single_value_swizzle(reg
.dw1
.bits
.swizzle
));
208 return reg
.subnr
/ 4 + BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, 0);
210 return reg
.subnr
/ 4;
215 gen8_generator::alu3(unsigned opcode
,
221 /* MRFs haven't existed since Gen7, so we better not be using them. */
222 if (dst
.file
== BRW_MESSAGE_REGISTER_FILE
) {
223 dst
.file
= BRW_GENERAL_REGISTER_FILE
;
224 dst
.nr
+= GEN7_MRF_HACK_START
;
227 gen8_instruction
*inst
= next_inst(opcode
);
228 assert(gen8_access_mode(inst
) == BRW_ALIGN_16
);
230 assert(dst
.file
== BRW_GENERAL_REGISTER_FILE
);
231 assert(dst
.nr
< 128);
232 assert(dst
.address_mode
== BRW_ADDRESS_DIRECT
);
233 assert(dst
.type
== BRW_REGISTER_TYPE_F
||
234 dst
.type
== BRW_REGISTER_TYPE_D
||
235 dst
.type
== BRW_REGISTER_TYPE_UD
);
236 gen8_set_dst_3src_reg_nr(inst
, dst
.nr
);
237 gen8_set_dst_3src_subreg_nr(inst
, dst
.subnr
/ 16);
238 gen8_set_dst_3src_writemask(inst
, dst
.dw1
.bits
.writemask
);
240 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
241 assert(src0
.address_mode
== BRW_ADDRESS_DIRECT
);
242 assert(src0
.nr
< 128);
243 gen8_set_src0_3src_swizzle(inst
, src0
.dw1
.bits
.swizzle
);
244 gen8_set_src0_3src_subreg_nr(inst
, get_3src_subreg_nr(src0
));
245 gen8_set_src0_3src_rep_ctrl(inst
, src0
.vstride
== BRW_VERTICAL_STRIDE_0
);
246 gen8_set_src0_3src_reg_nr(inst
, src0
.nr
);
247 gen8_set_src0_3src_abs(inst
, src0
.abs
);
248 gen8_set_src0_3src_negate(inst
, src0
.negate
);
250 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
251 assert(src1
.address_mode
== BRW_ADDRESS_DIRECT
);
252 assert(src1
.nr
< 128);
253 gen8_set_src1_3src_swizzle(inst
, src1
.dw1
.bits
.swizzle
);
254 gen8_set_src1_3src_subreg_nr(inst
, get_3src_subreg_nr(src1
));
255 gen8_set_src1_3src_rep_ctrl(inst
, src1
.vstride
== BRW_VERTICAL_STRIDE_0
);
256 gen8_set_src1_3src_reg_nr(inst
, src1
.nr
);
257 gen8_set_src1_3src_abs(inst
, src1
.abs
);
258 gen8_set_src1_3src_negate(inst
, src1
.negate
);
260 assert(src2
.file
== BRW_GENERAL_REGISTER_FILE
);
261 assert(src2
.address_mode
== BRW_ADDRESS_DIRECT
);
262 assert(src2
.nr
< 128);
263 gen8_set_src2_3src_swizzle(inst
, src2
.dw1
.bits
.swizzle
);
264 gen8_set_src2_3src_subreg_nr(inst
, get_3src_subreg_nr(src2
));
265 gen8_set_src2_3src_rep_ctrl(inst
, src2
.vstride
== BRW_VERTICAL_STRIDE_0
);
266 gen8_set_src2_3src_reg_nr(inst
, src2
.nr
);
267 gen8_set_src2_3src_abs(inst
, src2
.abs
);
268 gen8_set_src2_3src_negate(inst
, src2
.negate
);
270 /* Set both the source and destination types based on dst.type, ignoring
271 * the source register types. The MAD and LRP emitters both ensure that
272 * all register types are float. The BFE and BFI2 emitters, however, may
273 * send us mixed D and UD source types and want us to ignore that.
276 case BRW_REGISTER_TYPE_F
:
277 gen8_set_src_3src_type(inst
, BRW_3SRC_TYPE_F
);
278 gen8_set_dst_3src_type(inst
, BRW_3SRC_TYPE_F
);
280 case BRW_REGISTER_TYPE_D
:
281 gen8_set_src_3src_type(inst
, BRW_3SRC_TYPE_D
);
282 gen8_set_dst_3src_type(inst
, BRW_3SRC_TYPE_D
);
284 case BRW_REGISTER_TYPE_UD
:
285 gen8_set_src_3src_type(inst
, BRW_3SRC_TYPE_UD
);
286 gen8_set_dst_3src_type(inst
, BRW_3SRC_TYPE_UD
);
294 gen8_generator::math(unsigned math_function
,
298 gen8_instruction
*inst
= next_inst(BRW_OPCODE_MATH
);
300 assert(src0
.hstride
== 0 || src0
.hstride
== dst
.hstride
);
302 gen8_set_math_function(inst
, math_function
);
303 gen8_set_dst(brw
, inst
, dst
);
304 gen8_set_src0(brw
, inst
, src0
);
309 gen8_generator::MATH(unsigned math_function
,
313 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
314 gen8_instruction
*inst
= math(math_function
, dst
, src0
);
319 gen8_generator::MATH(unsigned math_function
,
325 math_function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
326 math_function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
327 math_function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
;
330 assert(src0
.type
!= BRW_REGISTER_TYPE_F
);
331 assert(src1
.type
!= BRW_REGISTER_TYPE_F
);
333 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
336 gen8_instruction
*inst
= math(math_function
, dst
, src0
);
337 gen8_set_src1(brw
, inst
, src1
);
342 gen8_generator::MOV_RAW(struct brw_reg dst
, struct brw_reg src0
)
344 gen8_instruction
*inst
= next_inst(BRW_OPCODE_MOV
);
345 gen8_set_dst(brw
, inst
, retype(dst
, BRW_REGISTER_TYPE_UD
));
346 gen8_set_src0(brw
, inst
, retype(src0
, BRW_REGISTER_TYPE_UD
));
347 gen8_set_mask_control(inst
, BRW_MASK_DISABLE
);
354 gen8_generator::NOP()
356 return next_inst(BRW_OPCODE_NOP
);
360 gen8_generator::push_if_stack(gen8_instruction
*inst
)
362 if_stack
[if_stack_depth
] = inst
- store
;
365 if (if_stack_array_size
<= if_stack_depth
) {
366 if_stack_array_size
*= 2;
367 if_stack
= reralloc(mem_ctx
, if_stack
, int, if_stack_array_size
);
372 gen8_generator::pop_if_stack()
375 return &store
[if_stack
[if_stack_depth
]];
379 * Patch the IF and ELSE instructions to set the jump offsets (JIP and UIP.)
382 gen8_generator::patch_IF_ELSE(gen8_instruction
*if_inst
,
383 gen8_instruction
*else_inst
,
384 gen8_instruction
*endif_inst
)
386 assert(if_inst
!= NULL
&& gen8_opcode(if_inst
) == BRW_OPCODE_IF
);
387 assert(else_inst
== NULL
|| gen8_opcode(else_inst
) == BRW_OPCODE_ELSE
);
388 assert(endif_inst
!= NULL
&& gen8_opcode(endif_inst
) == BRW_OPCODE_ENDIF
);
390 gen8_set_exec_size(endif_inst
, gen8_exec_size(if_inst
));
392 if (else_inst
== NULL
) {
393 /* Patch IF -> ENDIF */
394 gen8_set_jip(if_inst
, 16 * (endif_inst
- if_inst
));
395 gen8_set_uip(if_inst
, 16 * (endif_inst
- if_inst
));
397 gen8_set_exec_size(else_inst
, gen8_exec_size(if_inst
));
399 /* Patch IF -> ELSE and ELSE -> ENDIF:
401 * The IF's JIP should point at the instruction after the ELSE.
402 * The IF's UIP should point to the ENDIF.
404 * Both are expressed in bytes, hence the multiply by 16...128-bits.
406 gen8_set_jip(if_inst
, 16 * (else_inst
- if_inst
+ 1));
407 gen8_set_uip(if_inst
, 16 * (endif_inst
- if_inst
));
409 /* Patch ELSE -> ENDIF:
411 * Since we don't set branch_ctrl, both JIP and UIP point to ENDIF.
413 gen8_set_jip(else_inst
, 16 * (endif_inst
- else_inst
));
414 gen8_set_uip(else_inst
, 16 * (endif_inst
- else_inst
));
416 gen8_set_jip(endif_inst
, 16);
420 gen8_generator::IF(unsigned predicate
)
422 gen8_instruction
*inst
= next_inst(BRW_OPCODE_IF
);
423 gen8_set_dst(brw
, inst
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
424 gen8_set_exec_size(inst
, default_state
.exec_size
);
425 gen8_set_pred_control(inst
, predicate
);
426 gen8_set_mask_control(inst
, BRW_MASK_ENABLE
);
433 gen8_generator::ELSE()
435 gen8_instruction
*inst
= next_inst(BRW_OPCODE_ELSE
);
436 gen8_set_dst(brw
, inst
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
437 gen8_set_mask_control(inst
, BRW_MASK_ENABLE
);
443 gen8_generator::ENDIF()
445 gen8_instruction
*if_inst
= NULL
;
446 gen8_instruction
*else_inst
= NULL
;
448 gen8_instruction
*tmp
= pop_if_stack();
449 if (gen8_opcode(tmp
) == BRW_OPCODE_ELSE
) {
451 tmp
= pop_if_stack();
453 assert(gen8_opcode(tmp
) == BRW_OPCODE_IF
);
456 gen8_instruction
*endif_inst
= next_inst(BRW_OPCODE_ENDIF
);
457 gen8_set_mask_control(endif_inst
, BRW_MASK_ENABLE
);
458 patch_IF_ELSE(if_inst
, else_inst
, endif_inst
);
464 gen8_generator::next_ip(unsigned ip
) const
470 gen8_generator::find_next_block_end(unsigned start
) const
472 for (unsigned ip
= next_ip(start
); ip
< next_inst_offset
; ip
= next_ip(ip
)) {
473 gen8_instruction
*inst
= &store
[ip
/ 16];
475 switch (gen8_opcode(inst
)) {
476 case BRW_OPCODE_ENDIF
:
477 case BRW_OPCODE_ELSE
:
478 case BRW_OPCODE_WHILE
:
479 case BRW_OPCODE_HALT
:
487 /* There is no DO instruction on Gen6+, so to find the end of the loop
488 * we have to see if the loop is jumping back before our start
492 gen8_generator::find_loop_end(unsigned start
) const
494 /* Always start after the instruction (such as a WHILE) we're trying to fix
497 for (unsigned ip
= next_ip(start
); ip
< next_inst_offset
; ip
= next_ip(ip
)) {
498 gen8_instruction
*inst
= &store
[ip
/ 16];
500 if (gen8_opcode(inst
) == BRW_OPCODE_WHILE
) {
501 if (ip
+ gen8_jip(inst
) <= start
)
505 assert(!"not reached");
509 /* After program generation, go back and update the UIP and JIP of
510 * BREAK, CONT, and HALT instructions to their correct locations.
513 gen8_generator::patch_jump_targets()
515 for (unsigned ip
= 0; ip
< next_inst_offset
; ip
= next_ip(ip
)) {
516 gen8_instruction
*inst
= &store
[ip
/ 16];
518 int block_end_ip
= find_next_block_end(ip
);
519 switch (gen8_opcode(inst
)) {
520 case BRW_OPCODE_BREAK
:
521 assert(block_end_ip
!= 0);
522 gen8_set_jip(inst
, block_end_ip
- ip
);
523 gen8_set_uip(inst
, find_loop_end(ip
) - ip
);
524 assert(gen8_uip(inst
) != 0);
525 assert(gen8_jip(inst
) != 0);
527 case BRW_OPCODE_CONTINUE
:
528 assert(block_end_ip
!= 0);
529 gen8_set_jip(inst
, block_end_ip
- ip
);
530 gen8_set_uip(inst
, find_loop_end(ip
) - ip
);
531 assert(gen8_uip(inst
) != 0);
532 assert(gen8_jip(inst
) != 0);
534 case BRW_OPCODE_ENDIF
:
535 if (block_end_ip
== 0)
536 gen8_set_jip(inst
, 16);
538 gen8_set_jip(inst
, block_end_ip
- ip
);
540 case BRW_OPCODE_HALT
:
541 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
543 * "In case of the halt instruction not inside any conditional
544 * code block, the value of <JIP> and <UIP> should be the
545 * same. In case of the halt instruction inside conditional code
546 * block, the <UIP> should be the end of the program, and the
547 * <JIP> should be end of the most inner conditional code block."
549 * The uip will have already been set by whoever set up the
552 if (block_end_ip
== 0) {
553 gen8_set_jip(inst
, gen8_uip(inst
));
555 gen8_set_jip(inst
, block_end_ip
- ip
);
557 assert(gen8_uip(inst
) != 0);
558 assert(gen8_jip(inst
) != 0);
567 if (loop_stack_array_size
< loop_stack_depth
) {
568 loop_stack_array_size
*= 2;
569 loop_stack
= reralloc(mem_ctx
, loop_stack
, int, loop_stack_array_size
);
571 loop_stack
[loop_stack_depth
++] = nr_inst
;
575 gen8_generator::BREAK()
577 gen8_instruction
*inst
= next_inst(BRW_OPCODE_BREAK
);
578 gen8_set_dst(brw
, inst
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
579 gen8_set_src0(brw
, inst
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
580 gen8_set_src1(brw
, inst
, brw_imm_d(0));
581 gen8_set_exec_size(inst
, default_state
.exec_size
);
586 gen8_generator::CONTINUE()
588 gen8_instruction
*inst
= next_inst(BRW_OPCODE_CONTINUE
);
589 gen8_set_dst(brw
, inst
, brw_ip_reg());
590 gen8_set_src0(brw
, inst
, brw_ip_reg());
591 gen8_set_src1(brw
, inst
, brw_imm_d(0));
592 gen8_set_exec_size(inst
, default_state
.exec_size
);
597 gen8_generator::WHILE()
599 gen8_instruction
*do_inst
= &store
[loop_stack
[--loop_stack_depth
]];
600 gen8_instruction
*while_inst
= next_inst(BRW_OPCODE_WHILE
);
602 gen8_set_dst(brw
, while_inst
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
603 gen8_set_src0(brw
, while_inst
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
604 gen8_set_src1(brw
, while_inst
, brw_imm_ud(0));
605 gen8_set_jip(while_inst
, 16 * (do_inst
- while_inst
));
606 gen8_set_exec_size(while_inst
, default_state
.exec_size
);
612 gen8_generator::HALT()
614 gen8_instruction
*inst
= next_inst(BRW_OPCODE_HALT
);
615 gen8_set_dst(brw
, inst
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
616 gen8_set_src0(brw
, inst
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
617 gen8_set_exec_size(inst
, default_state
.exec_size
);
618 gen8_set_mask_control(inst
, BRW_MASK_DISABLE
);
623 gen8_generator::disassemble(FILE *out
, int start
, int end
)
625 bool dump_hex
= false;
627 for (int offset
= start
; offset
< end
; offset
+= 16) {
628 gen8_instruction
*inst
= &store
[offset
/ 16];
629 fprintf(stderr
, "0x%08x: ", offset
);
632 fprintf(stderr
, "0x%08x 0x%08x 0x%08x 0x%08x ",
633 ((uint32_t *) inst
)[3],
634 ((uint32_t *) inst
)[2],
635 ((uint32_t *) inst
)[1],
636 ((uint32_t *) inst
)[0]);
639 gen8_disassemble(stderr
, inst
, brw
->gen
);