2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 /** @file gen8_generator.cpp
26 * Code generation for Gen8+ hardware, replacing the brw_eu_emit.c layer.
30 #include "main/compiler.h"
31 #include "main/macros.h"
32 #include "brw_context.h"
35 #include "glsl/ralloc.h"
38 #include "gen8_generator.h"
40 gen8_generator::gen8_generator(struct brw_context
*brw
,
41 struct gl_shader_program
*shader_prog
,
42 struct gl_program
*prog
,
44 : shader_prog(shader_prog
), prog(prog
), brw(brw
), mem_ctx(mem_ctx
)
48 memset(&default_state
, 0, sizeof(default_state
));
49 default_state
.mask_control
= BRW_MASK_ENABLE
;
52 store
= rzalloc_array(mem_ctx
, gen8_instruction
, store_size
);
56 /* Set up the control flow stacks. */
58 if_stack_array_size
= 16;
59 if_stack
= rzalloc_array(mem_ctx
, int, if_stack_array_size
);
62 loop_stack_array_size
= 16;
63 loop_stack
= rzalloc_array(mem_ctx
, int, loop_stack_array_size
);
66 gen8_generator::~gen8_generator()
71 gen8_generator::next_inst(unsigned opcode
)
73 gen8_instruction
*inst
;
75 if (nr_inst
+ 1 > unsigned(store_size
)) {
77 store
= reralloc(mem_ctx
, store
, gen8_instruction
, store_size
);
81 next_inst_offset
+= 16;
82 inst
= &store
[nr_inst
++];
84 memset(inst
, 0, sizeof(gen8_instruction
));
86 gen8_set_opcode(inst
, opcode
);
87 gen8_set_exec_size(inst
, default_state
.exec_size
);
88 gen8_set_access_mode(inst
, default_state
.access_mode
);
89 gen8_set_mask_control(inst
, default_state
.mask_control
);
90 gen8_set_qtr_control(inst
, default_state
.qtr_control
);
91 gen8_set_cond_modifier(inst
, default_state
.conditional_mod
);
92 gen8_set_pred_control(inst
, default_state
.predicate
);
93 gen8_set_pred_inv(inst
, default_state
.predicate_inverse
);
94 gen8_set_saturate(inst
, default_state
.saturate
);
95 gen8_set_flag_subreg_nr(inst
, default_state
.flag_subreg_nr
);
101 gen8_generator::OP(struct brw_reg dst, struct brw_reg src) \
103 gen8_instruction *inst = next_inst(BRW_OPCODE_##OP); \
104 gen8_set_dst(brw, inst, dst); \
105 gen8_set_src0(brw, inst, src); \
111 gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, struct brw_reg s1) \
113 gen8_instruction *inst = next_inst(BRW_OPCODE_##OP); \
114 gen8_set_dst(brw, inst, dst); \
115 gen8_set_src0(brw, inst, s0); \
116 gen8_set_src1(brw, inst, s1); \
120 #define ALU2_ACCUMULATE(OP) \
122 gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, struct brw_reg s1) \
124 gen8_instruction *inst = next_inst(BRW_OPCODE_##OP); \
125 gen8_set_dst(brw, inst, dst); \
126 gen8_set_src0(brw, inst, s0); \
127 gen8_set_src1(brw, inst, s1); \
128 gen8_set_acc_wr_control(inst, true); \
134 gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, \
135 struct brw_reg s1, struct brw_reg s2) \
137 return alu3(BRW_OPCODE_##OP, dst, s0, s1, s2); \
142 gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, \
143 struct brw_reg s1, struct brw_reg s2) \
145 assert(dst.type == BRW_REGISTER_TYPE_F); \
146 assert(s0.type == BRW_REGISTER_TYPE_F); \
147 assert(s1.type == BRW_REGISTER_TYPE_F); \
148 assert(s2.type == BRW_REGISTER_TYPE_F); \
149 return alu3(BRW_OPCODE_##OP, dst, s0, s1, s2); \
160 ALU2_ACCUMULATE(ADDC
)
161 ALU2_ACCUMULATE(SUBB
)
181 ALU2_ACCUMULATE(MACH
)
188 gen8_generator::CMP(struct brw_reg dst
, unsigned conditional
,
189 struct brw_reg src0
, struct brw_reg src1
)
191 gen8_instruction
*inst
= next_inst(BRW_OPCODE_CMP
);
192 gen8_set_cond_modifier(inst
, conditional
);
193 /* The CMP instruction appears to behave erratically for floating point
194 * sources unless the destination type is also float. Overriding it to
195 * match src0 makes it work in all cases.
197 dst
.type
= src0
.type
;
198 gen8_set_dst(brw
, inst
, dst
);
199 gen8_set_src0(brw
, inst
, src0
);
200 gen8_set_src1(brw
, inst
, src1
);
205 get_3src_subreg_nr(struct brw_reg reg
)
207 if (reg
.vstride
== BRW_VERTICAL_STRIDE_0
) {
208 assert(brw_is_single_value_swizzle(reg
.dw1
.bits
.swizzle
));
209 return reg
.subnr
/ 4 + BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, 0);
211 return reg
.subnr
/ 4;
216 gen8_generator::alu3(unsigned opcode
,
222 /* MRFs haven't existed since Gen7, so we better not be using them. */
223 if (dst
.file
== BRW_MESSAGE_REGISTER_FILE
) {
224 dst
.file
= BRW_GENERAL_REGISTER_FILE
;
225 dst
.nr
+= GEN7_MRF_HACK_START
;
228 gen8_instruction
*inst
= next_inst(opcode
);
229 assert(gen8_access_mode(inst
) == BRW_ALIGN_16
);
231 assert(dst
.file
== BRW_GENERAL_REGISTER_FILE
);
232 assert(dst
.nr
< 128);
233 assert(dst
.address_mode
== BRW_ADDRESS_DIRECT
);
234 assert(dst
.type
== BRW_REGISTER_TYPE_F
||
235 dst
.type
== BRW_REGISTER_TYPE_D
||
236 dst
.type
== BRW_REGISTER_TYPE_UD
);
237 gen8_set_dst_3src_reg_nr(inst
, dst
.nr
);
238 gen8_set_dst_3src_subreg_nr(inst
, dst
.subnr
/ 16);
239 gen8_set_dst_3src_writemask(inst
, dst
.dw1
.bits
.writemask
);
241 assert(src0
.file
== BRW_GENERAL_REGISTER_FILE
);
242 assert(src0
.address_mode
== BRW_ADDRESS_DIRECT
);
243 assert(src0
.nr
< 128);
244 gen8_set_src0_3src_swizzle(inst
, src0
.dw1
.bits
.swizzle
);
245 gen8_set_src0_3src_subreg_nr(inst
, get_3src_subreg_nr(src0
));
246 gen8_set_src0_3src_rep_ctrl(inst
, src0
.vstride
== BRW_VERTICAL_STRIDE_0
);
247 gen8_set_src0_3src_reg_nr(inst
, src0
.nr
);
248 gen8_set_src0_3src_abs(inst
, src0
.abs
);
249 gen8_set_src0_3src_negate(inst
, src0
.negate
);
251 assert(src1
.file
== BRW_GENERAL_REGISTER_FILE
);
252 assert(src1
.address_mode
== BRW_ADDRESS_DIRECT
);
253 assert(src1
.nr
< 128);
254 gen8_set_src1_3src_swizzle(inst
, src1
.dw1
.bits
.swizzle
);
255 gen8_set_src1_3src_subreg_nr(inst
, get_3src_subreg_nr(src1
));
256 gen8_set_src1_3src_rep_ctrl(inst
, src1
.vstride
== BRW_VERTICAL_STRIDE_0
);
257 gen8_set_src1_3src_reg_nr(inst
, src1
.nr
);
258 gen8_set_src1_3src_abs(inst
, src1
.abs
);
259 gen8_set_src1_3src_negate(inst
, src1
.negate
);
261 assert(src2
.file
== BRW_GENERAL_REGISTER_FILE
);
262 assert(src2
.address_mode
== BRW_ADDRESS_DIRECT
);
263 assert(src2
.nr
< 128);
264 gen8_set_src2_3src_swizzle(inst
, src2
.dw1
.bits
.swizzle
);
265 gen8_set_src2_3src_subreg_nr(inst
, get_3src_subreg_nr(src2
));
266 gen8_set_src2_3src_rep_ctrl(inst
, src2
.vstride
== BRW_VERTICAL_STRIDE_0
);
267 gen8_set_src2_3src_reg_nr(inst
, src2
.nr
);
268 gen8_set_src2_3src_abs(inst
, src2
.abs
);
269 gen8_set_src2_3src_negate(inst
, src2
.negate
);
271 /* Set both the source and destination types based on dst.type, ignoring
272 * the source register types. The MAD and LRP emitters both ensure that
273 * all register types are float. The BFE and BFI2 emitters, however, may
274 * send us mixed D and UD source types and want us to ignore that.
277 case BRW_REGISTER_TYPE_F
:
278 gen8_set_src_3src_type(inst
, BRW_3SRC_TYPE_F
);
279 gen8_set_dst_3src_type(inst
, BRW_3SRC_TYPE_F
);
281 case BRW_REGISTER_TYPE_D
:
282 gen8_set_src_3src_type(inst
, BRW_3SRC_TYPE_D
);
283 gen8_set_dst_3src_type(inst
, BRW_3SRC_TYPE_D
);
285 case BRW_REGISTER_TYPE_UD
:
286 gen8_set_src_3src_type(inst
, BRW_3SRC_TYPE_UD
);
287 gen8_set_dst_3src_type(inst
, BRW_3SRC_TYPE_UD
);
295 gen8_generator::math(unsigned math_function
,
299 gen8_instruction
*inst
= next_inst(BRW_OPCODE_MATH
);
301 assert(src0
.hstride
== 0 || src0
.hstride
== dst
.hstride
);
303 gen8_set_math_function(inst
, math_function
);
304 gen8_set_dst(brw
, inst
, dst
);
305 gen8_set_src0(brw
, inst
, src0
);
310 gen8_generator::MATH(unsigned math_function
,
314 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
315 gen8_instruction
*inst
= math(math_function
, dst
, src0
);
320 gen8_generator::MATH(unsigned math_function
,
326 math_function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT
||
327 math_function
== BRW_MATH_FUNCTION_INT_DIV_REMAINDER
||
328 math_function
== BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER
;
331 assert(src0
.type
!= BRW_REGISTER_TYPE_F
);
332 assert(src1
.type
!= BRW_REGISTER_TYPE_F
);
334 assert(src0
.type
== BRW_REGISTER_TYPE_F
);
337 gen8_instruction
*inst
= math(math_function
, dst
, src0
);
338 gen8_set_src1(brw
, inst
, src1
);
343 gen8_generator::MOV_RAW(struct brw_reg dst
, struct brw_reg src0
)
345 gen8_instruction
*inst
= next_inst(BRW_OPCODE_MOV
);
346 gen8_set_dst(brw
, inst
, retype(dst
, BRW_REGISTER_TYPE_UD
));
347 gen8_set_src0(brw
, inst
, retype(src0
, BRW_REGISTER_TYPE_UD
));
348 gen8_set_mask_control(inst
, BRW_MASK_DISABLE
);
355 gen8_generator::NOP()
357 return next_inst(BRW_OPCODE_NOP
);
361 gen8_generator::push_if_stack(gen8_instruction
*inst
)
363 if_stack
[if_stack_depth
] = inst
- store
;
366 if (if_stack_array_size
<= if_stack_depth
) {
367 if_stack_array_size
*= 2;
368 if_stack
= reralloc(mem_ctx
, if_stack
, int, if_stack_array_size
);
373 gen8_generator::pop_if_stack()
376 return &store
[if_stack
[if_stack_depth
]];
380 * Patch the IF and ELSE instructions to set the jump offsets (JIP and UIP.)
383 gen8_generator::patch_IF_ELSE(gen8_instruction
*if_inst
,
384 gen8_instruction
*else_inst
,
385 gen8_instruction
*endif_inst
)
387 assert(if_inst
!= NULL
&& gen8_opcode(if_inst
) == BRW_OPCODE_IF
);
388 assert(else_inst
== NULL
|| gen8_opcode(else_inst
) == BRW_OPCODE_ELSE
);
389 assert(endif_inst
!= NULL
&& gen8_opcode(endif_inst
) == BRW_OPCODE_ENDIF
);
391 gen8_set_exec_size(endif_inst
, gen8_exec_size(if_inst
));
393 if (else_inst
== NULL
) {
394 /* Patch IF -> ENDIF */
395 gen8_set_jip(if_inst
, 16 * (endif_inst
- if_inst
));
396 gen8_set_uip(if_inst
, 16 * (endif_inst
- if_inst
));
398 gen8_set_exec_size(else_inst
, gen8_exec_size(if_inst
));
400 /* Patch IF -> ELSE and ELSE -> ENDIF:
402 * The IF's JIP should point at the instruction after the ELSE.
403 * The IF's UIP should point to the ENDIF.
405 * Both are expressed in bytes, hence the multiply by 16...128-bits.
407 gen8_set_jip(if_inst
, 16 * (else_inst
- if_inst
+ 1));
408 gen8_set_uip(if_inst
, 16 * (endif_inst
- if_inst
));
410 /* Patch ELSE -> ENDIF:
412 * Since we don't set branch_ctrl, both JIP and UIP point to ENDIF.
414 gen8_set_jip(else_inst
, 16 * (endif_inst
- else_inst
));
415 gen8_set_uip(else_inst
, 16 * (endif_inst
- else_inst
));
417 gen8_set_jip(endif_inst
, 16);
421 gen8_generator::IF(unsigned predicate
)
423 gen8_instruction
*inst
= next_inst(BRW_OPCODE_IF
);
424 gen8_set_dst(brw
, inst
, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D
)));
425 gen8_set_src0(brw
, inst
, brw_imm_d(0));
426 gen8_set_exec_size(inst
, default_state
.exec_size
);
427 gen8_set_pred_control(inst
, predicate
);
428 gen8_set_mask_control(inst
, BRW_MASK_ENABLE
);
435 gen8_generator::ELSE()
437 gen8_instruction
*inst
= next_inst(BRW_OPCODE_ELSE
);
438 gen8_set_dst(brw
, inst
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
439 gen8_set_src0(brw
, inst
, brw_imm_d(0));
440 gen8_set_mask_control(inst
, BRW_MASK_ENABLE
);
446 gen8_generator::ENDIF()
448 gen8_instruction
*if_inst
= NULL
;
449 gen8_instruction
*else_inst
= NULL
;
451 gen8_instruction
*tmp
= pop_if_stack();
452 if (gen8_opcode(tmp
) == BRW_OPCODE_ELSE
) {
454 tmp
= pop_if_stack();
456 assert(gen8_opcode(tmp
) == BRW_OPCODE_IF
);
459 gen8_instruction
*endif_inst
= next_inst(BRW_OPCODE_ENDIF
);
460 gen8_set_mask_control(endif_inst
, BRW_MASK_ENABLE
);
461 gen8_set_src0(brw
, endif_inst
, brw_imm_d(0));
462 patch_IF_ELSE(if_inst
, else_inst
, endif_inst
);
468 gen8_generator::next_ip(unsigned ip
) const
474 gen8_generator::find_next_block_end(unsigned start
) const
476 for (unsigned ip
= next_ip(start
); ip
< next_inst_offset
; ip
= next_ip(ip
)) {
477 gen8_instruction
*inst
= &store
[ip
/ 16];
479 switch (gen8_opcode(inst
)) {
480 case BRW_OPCODE_ENDIF
:
481 case BRW_OPCODE_ELSE
:
482 case BRW_OPCODE_WHILE
:
483 case BRW_OPCODE_HALT
:
491 /* There is no DO instruction on Gen6+, so to find the end of the loop
492 * we have to see if the loop is jumping back before our start
496 gen8_generator::find_loop_end(unsigned start
) const
498 /* Always start after the instruction (such as a WHILE) we're trying to fix
501 for (unsigned ip
= next_ip(start
); ip
< next_inst_offset
; ip
= next_ip(ip
)) {
502 gen8_instruction
*inst
= &store
[ip
/ 16];
504 if (gen8_opcode(inst
) == BRW_OPCODE_WHILE
) {
505 if (ip
+ gen8_jip(inst
) <= start
)
509 assert(!"not reached");
513 /* After program generation, go back and update the UIP and JIP of
514 * BREAK, CONT, and HALT instructions to their correct locations.
517 gen8_generator::patch_jump_targets()
519 for (unsigned ip
= 0; ip
< next_inst_offset
; ip
= next_ip(ip
)) {
520 gen8_instruction
*inst
= &store
[ip
/ 16];
522 int block_end_ip
= find_next_block_end(ip
);
523 switch (gen8_opcode(inst
)) {
524 case BRW_OPCODE_BREAK
:
525 assert(block_end_ip
!= 0);
526 gen8_set_jip(inst
, block_end_ip
- ip
);
527 gen8_set_uip(inst
, find_loop_end(ip
) - ip
);
528 assert(gen8_uip(inst
) != 0);
529 assert(gen8_jip(inst
) != 0);
531 case BRW_OPCODE_CONTINUE
:
532 assert(block_end_ip
!= 0);
533 gen8_set_jip(inst
, block_end_ip
- ip
);
534 gen8_set_uip(inst
, find_loop_end(ip
) - ip
);
535 assert(gen8_uip(inst
) != 0);
536 assert(gen8_jip(inst
) != 0);
538 case BRW_OPCODE_ENDIF
:
539 if (block_end_ip
== 0)
540 gen8_set_jip(inst
, 16);
542 gen8_set_jip(inst
, block_end_ip
- ip
);
544 case BRW_OPCODE_HALT
:
545 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
547 * "In case of the halt instruction not inside any conditional
548 * code block, the value of <JIP> and <UIP> should be the
549 * same. In case of the halt instruction inside conditional code
550 * block, the <UIP> should be the end of the program, and the
551 * <JIP> should be end of the most inner conditional code block."
553 * The uip will have already been set by whoever set up the
556 if (block_end_ip
== 0) {
557 gen8_set_jip(inst
, gen8_uip(inst
));
559 gen8_set_jip(inst
, block_end_ip
- ip
);
561 assert(gen8_uip(inst
) != 0);
562 assert(gen8_jip(inst
) != 0);
571 if (loop_stack_array_size
< loop_stack_depth
) {
572 loop_stack_array_size
*= 2;
573 loop_stack
= reralloc(mem_ctx
, loop_stack
, int, loop_stack_array_size
);
575 loop_stack
[loop_stack_depth
++] = nr_inst
;
579 gen8_generator::BREAK()
581 gen8_instruction
*inst
= next_inst(BRW_OPCODE_BREAK
);
582 gen8_set_dst(brw
, inst
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
583 gen8_set_src0(brw
, inst
, brw_imm_d(0));
584 gen8_set_exec_size(inst
, default_state
.exec_size
);
589 gen8_generator::CONTINUE()
591 gen8_instruction
*inst
= next_inst(BRW_OPCODE_CONTINUE
);
592 gen8_set_dst(brw
, inst
, brw_ip_reg());
593 gen8_set_src0(brw
, inst
, brw_imm_d(0));
594 gen8_set_exec_size(inst
, default_state
.exec_size
);
599 gen8_generator::WHILE()
601 gen8_instruction
*do_inst
= &store
[loop_stack
[--loop_stack_depth
]];
602 gen8_instruction
*while_inst
= next_inst(BRW_OPCODE_WHILE
);
604 gen8_set_dst(brw
, while_inst
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
605 gen8_set_src0(brw
, while_inst
, brw_imm_d(0));
606 gen8_set_jip(while_inst
, 16 * (do_inst
- while_inst
));
607 gen8_set_exec_size(while_inst
, default_state
.exec_size
);
613 gen8_generator::HALT()
615 gen8_instruction
*inst
= next_inst(BRW_OPCODE_HALT
);
616 gen8_set_dst(brw
, inst
, retype(brw_null_reg(), BRW_REGISTER_TYPE_D
));
617 gen8_set_src0(brw
, inst
, brw_imm_d(0));
618 gen8_set_exec_size(inst
, default_state
.exec_size
);
619 gen8_set_mask_control(inst
, BRW_MASK_DISABLE
);