3 * Copyright © 2010-2015 Intel Corporation
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #ifndef BRW_VEC4_BUILDER_H
26 #define BRW_VEC4_BUILDER_H
28 #include "brw_ir_vec4.h"
29 #include "brw_ir_allocator.h"
33 * Toolbox to assemble a VEC4 IR program out of individual instructions.
35 * This object is meant to have an interface consistent with
36 * brw::fs_builder. They cannot be fully interchangeable because
37 * brw::fs_builder generates scalar code while brw::vec4_builder generates
42 /** Type used in this IR to represent a source of an instruction. */
43 typedef brw::src_reg src_reg
;
45 /** Type used in this IR to represent the destination of an instruction. */
46 typedef brw::dst_reg dst_reg
;
48 /** Type used in this IR to represent an instruction. */
49 typedef vec4_instruction instruction
;
52 * Construct a vec4_builder that inserts instructions into \p shader.
54 vec4_builder(backend_shader
*shader
, unsigned dispatch_width
= 8) :
55 shader(shader
), block(NULL
), cursor(NULL
),
56 _dispatch_width(dispatch_width
), _group(0),
57 force_writemask_all(false),
63 * Construct a vec4_builder that inserts instructions into \p shader
64 * before instruction \p inst in basic block \p block. The default
65 * execution controls and debug annotation are initialized from the
66 * instruction passed as argument.
68 vec4_builder(backend_shader
*shader
, bblock_t
*block
, instruction
*inst
) :
69 shader(shader
), block(block
), cursor(inst
),
70 _dispatch_width(inst
->exec_size
), _group(inst
->group
),
71 force_writemask_all(inst
->force_writemask_all
)
73 annotation
.str
= inst
->annotation
;
74 annotation
.ir
= inst
->ir
;
78 * Construct a vec4_builder that inserts instructions before \p cursor
79 * in basic block \p block, inheriting other code generation parameters
83 at(bblock_t
*block
, exec_node
*cursor
) const
85 vec4_builder bld
= *this;
92 * Construct a vec4_builder appending instructions at the end of the
93 * instruction list of the shader, inheriting other code generation
94 * parameters from this.
99 return at(NULL
, (exec_node
*)&shader
->instructions
.tail_sentinel
);
103 * Construct a builder specifying the default SIMD width and group of
104 * channel enable signals, inheriting other code generation parameters
107 * \p n gives the default SIMD width, \p i gives the slot group used for
108 * predication and control flow masking in multiples of \p n channels.
111 group(unsigned n
, unsigned i
) const
113 assert(force_writemask_all
||
114 (n
<= dispatch_width() && i
< dispatch_width() / n
));
115 vec4_builder bld
= *this;
116 bld
._dispatch_width
= n
;
122 * Construct a builder with per-channel control flow execution masking
123 * disabled if \p b is true. If control flow execution masking is
124 * already disabled this has no effect.
127 exec_all(bool b
= true) const
129 vec4_builder bld
= *this;
131 bld
.force_writemask_all
= true;
136 * Construct a builder with the given debug annotation info.
139 annotate(const char *str
, const void *ir
= NULL
) const
141 vec4_builder bld
= *this;
142 bld
.annotation
.str
= str
;
143 bld
.annotation
.ir
= ir
;
148 * Get the SIMD width in use.
151 dispatch_width() const
153 return _dispatch_width
;
157 * Get the channel group in use.
166 * Allocate a virtual register of natural vector size (four for this IR)
167 * and SIMD width. \p n gives the amount of space to allocate in
168 * dispatch_width units (which is just enough space for four logical
169 * components in this IR).
172 vgrf(enum brw_reg_type type
, unsigned n
= 1) const
174 assert(dispatch_width() <= 32);
177 return retype(dst_reg(VGRF
, shader
->alloc
.allocate(
178 n
* DIV_ROUND_UP(type_sz(type
), 4))),
181 return retype(null_reg_ud(), type
);
185 * Create a null register of floating type.
190 return dst_reg(retype(brw_null_vec(dispatch_width()),
191 BRW_REGISTER_TYPE_F
));
195 * Create a null register of signed integer type.
200 return dst_reg(retype(brw_null_vec(dispatch_width()),
201 BRW_REGISTER_TYPE_D
));
205 * Create a null register of unsigned integer type.
210 return dst_reg(retype(brw_null_vec(dispatch_width()),
211 BRW_REGISTER_TYPE_UD
));
215 * Insert an instruction into the program.
218 emit(const instruction
&inst
) const
220 return emit(new(shader
->mem_ctx
) instruction(inst
));
224 * Create and insert a nullary control instruction into the program.
227 emit(enum opcode opcode
) const
229 return emit(instruction(opcode
));
233 * Create and insert a nullary instruction into the program.
236 emit(enum opcode opcode
, const dst_reg
&dst
) const
238 return emit(instruction(opcode
, dst
));
242 * Create and insert a unary instruction into the program.
245 emit(enum opcode opcode
, const dst_reg
&dst
, const src_reg
&src0
) const
248 case SHADER_OPCODE_RCP
:
249 case SHADER_OPCODE_RSQ
:
250 case SHADER_OPCODE_SQRT
:
251 case SHADER_OPCODE_EXP2
:
252 case SHADER_OPCODE_LOG2
:
253 case SHADER_OPCODE_SIN
:
254 case SHADER_OPCODE_COS
:
255 return fix_math_instruction(
256 emit(instruction(opcode
, dst
,
257 fix_math_operand(src0
))));
260 return emit(instruction(opcode
, dst
, src0
));
265 * Create and insert a binary instruction into the program.
268 emit(enum opcode opcode
, const dst_reg
&dst
, const src_reg
&src0
,
269 const src_reg
&src1
) const
272 case SHADER_OPCODE_POW
:
273 case SHADER_OPCODE_INT_QUOTIENT
:
274 case SHADER_OPCODE_INT_REMAINDER
:
275 return fix_math_instruction(
276 emit(instruction(opcode
, dst
,
277 fix_math_operand(src0
),
278 fix_math_operand(src1
))));
281 return emit(instruction(opcode
, dst
, src0
, src1
));
286 * Create and insert a ternary instruction into the program.
289 emit(enum opcode opcode
, const dst_reg
&dst
, const src_reg
&src0
,
290 const src_reg
&src1
, const src_reg
&src2
) const
294 case BRW_OPCODE_BFI2
:
297 return emit(instruction(opcode
, dst
,
298 fix_3src_operand(src0
),
299 fix_3src_operand(src1
),
300 fix_3src_operand(src2
)));
303 return emit(instruction(opcode
, dst
, src0
, src1
, src2
));
308 * Insert a preallocated instruction into the program.
311 emit(instruction
*inst
) const
313 inst
->exec_size
= dispatch_width();
314 inst
->group
= group();
315 inst
->force_writemask_all
= force_writemask_all
;
316 inst
->size_written
= inst
->exec_size
* type_sz(inst
->dst
.type
);
317 inst
->annotation
= annotation
.str
;
318 inst
->ir
= annotation
.ir
;
321 static_cast<instruction
*>(cursor
)->insert_before(block
, inst
);
323 cursor
->insert_before(inst
);
329 * Select \p src0 if the comparison of both sources with the given
330 * conditional mod evaluates to true, otherwise select \p src1.
332 * Generally useful to get the minimum or maximum of two values.
335 emit_minmax(const dst_reg
&dst
, const src_reg
&src0
,
336 const src_reg
&src1
, brw_conditional_mod mod
) const
338 assert(mod
== BRW_CONDITIONAL_GE
|| mod
== BRW_CONDITIONAL_L
);
340 return set_condmod(mod
, SEL(dst
, fix_unsigned_negate(src0
),
341 fix_unsigned_negate(src1
)));
345 * Copy any live channel from \p src to the first channel of the result.
348 emit_uniformize(const src_reg
&src
) const
350 const vec4_builder ubld
= exec_all();
351 const dst_reg chan_index
=
352 writemask(vgrf(BRW_REGISTER_TYPE_UD
), WRITEMASK_X
);
353 const dst_reg dst
= vgrf(src
.type
);
355 ubld
.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL
, chan_index
);
356 ubld
.emit(SHADER_OPCODE_BROADCAST
, dst
, src
, src_reg(chan_index
));
362 * Assorted arithmetic ops.
367 op(const dst_reg &dst, const src_reg &src0) const \
369 return emit(BRW_OPCODE_##op, dst, src0); \
374 op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
376 return emit(BRW_OPCODE_##op, dst, src0, src1); \
379 #define ALU2_ACC(op) \
381 op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
383 instruction *inst = emit(BRW_OPCODE_##op, dst, src0, src1); \
384 inst->writes_accumulator = true; \
390 op(const dst_reg &dst, const src_reg &src0, const src_reg &src1, \
391 const src_reg &src2) const \
393 return emit(BRW_OPCODE_##op, dst, src0, src1, src2); \
447 * CMP: Sets the low bit of the destination channels with the result
448 * of the comparison, while the upper bits are undefined, and updates
449 * the flag register with the packed 16 bits of the result.
452 CMP(const dst_reg
&dst
, const src_reg
&src0
, const src_reg
&src1
,
453 brw_conditional_mod condition
) const
455 /* Take the instruction:
457 * CMP null<d> src0<f> src1<f>
459 * Original gen4 does type conversion to the destination type
460 * before comparison, producing garbage results for floating
463 * The destination type doesn't matter on newer generations,
464 * so we set the type to match src0 so we can compact the
467 return set_condmod(condition
,
468 emit(BRW_OPCODE_CMP
, retype(dst
, src0
.type
),
469 fix_unsigned_negate(src0
),
470 fix_unsigned_negate(src1
)));
474 * Gen4 predicated IF.
477 IF(brw_predicate predicate
) const
479 return set_predicate(predicate
, emit(BRW_OPCODE_IF
));
483 * Gen6 IF with embedded comparison.
486 IF(const src_reg
&src0
, const src_reg
&src1
,
487 brw_conditional_mod condition
) const
489 assert(shader
->devinfo
->gen
== 6);
490 return set_condmod(condition
,
493 fix_unsigned_negate(src0
),
494 fix_unsigned_negate(src1
)));
498 * Emit a linear interpolation instruction.
501 LRP(const dst_reg
&dst
, const src_reg
&x
, const src_reg
&y
,
502 const src_reg
&a
) const
504 if (shader
->devinfo
->gen
>= 6 && shader
->devinfo
->gen
<= 10) {
505 /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
506 * we need to reorder the operands.
508 return emit(BRW_OPCODE_LRP
, dst
, a
, y
, x
);
511 /* We can't use the LRP instruction. Emit x*(1-a) + y*a. */
512 const dst_reg y_times_a
= vgrf(dst
.type
);
513 const dst_reg one_minus_a
= vgrf(dst
.type
);
514 const dst_reg x_times_one_minus_a
= vgrf(dst
.type
);
516 MUL(y_times_a
, y
, a
);
517 ADD(one_minus_a
, negate(a
), brw_imm_f(1.0f
));
518 MUL(x_times_one_minus_a
, x
, src_reg(one_minus_a
));
519 return ADD(dst
, src_reg(x_times_one_minus_a
), src_reg(y_times_a
));
523 backend_shader
*shader
;
527 * Workaround for negation of UD registers. See comment in
528 * fs_generator::generate_code() for the details.
531 fix_unsigned_negate(const src_reg
&src
) const
533 if (src
.type
== BRW_REGISTER_TYPE_UD
&& src
.negate
) {
534 dst_reg temp
= vgrf(BRW_REGISTER_TYPE_UD
);
536 return src_reg(temp
);
543 * Workaround for register access modes not supported by the ternary
544 * instruction encoding.
547 fix_3src_operand(const src_reg
&src
) const
549 /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
550 * able to use vertical stride of zero to replicate the vec4 uniform, like
552 * g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7]
554 * But you can't, since vertical stride is always four in three-source
555 * instructions. Instead, insert a MOV instruction to do the replication so
556 * that the three-source instruction can consume it.
559 /* The MOV is only needed if the source is a uniform or immediate. */
560 if (src
.file
!= UNIFORM
&& src
.file
!= IMM
)
563 if (src
.file
== UNIFORM
&& brw_is_single_value_swizzle(src
.swizzle
))
566 const dst_reg expanded
= vgrf(src
.type
);
567 emit(VEC4_OPCODE_UNPACK_UNIFORM
, expanded
, src
);
568 return src_reg(expanded
);
572 * Workaround for register access modes not supported by the math
576 fix_math_operand(const src_reg
&src
) const
578 /* The gen6 math instruction ignores the source modifiers --
579 * swizzle, abs, negate, and at least some parts of the register
580 * region description.
582 * Rather than trying to enumerate all these cases, *always* expand the
583 * operand to a temp GRF for gen6.
585 * For gen7, keep the operand as-is, except if immediate, which gen7 still
588 if (shader
->devinfo
->gen
== 6 ||
589 (shader
->devinfo
->gen
== 7 && src
.file
== IMM
)) {
590 const dst_reg tmp
= vgrf(src
.type
);
599 * Workaround other weirdness of the math instruction.
602 fix_math_instruction(instruction
*inst
) const
604 if (shader
->devinfo
->gen
== 6 &&
605 inst
->dst
.writemask
!= WRITEMASK_XYZW
) {
606 const dst_reg tmp
= vgrf(inst
->dst
.type
);
607 MOV(inst
->dst
, src_reg(tmp
));
610 } else if (shader
->devinfo
->gen
< 6) {
611 const unsigned sources
= (inst
->src
[1].file
== BAD_FILE
? 1 : 2);
613 inst
->mlen
= sources
;
622 unsigned _dispatch_width
;
624 bool force_writemask_all
;
626 /** Debug annotation info. */