3 * Copyright © 2010-2015 Intel Corporation
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #ifndef BRW_FS_BUILDER_H
26 #define BRW_FS_BUILDER_H
28 #include "brw_ir_fs.h"
29 #include "brw_shader.h"
30 #include "brw_context.h"
34 * Toolbox to assemble an FS IR program out of individual instructions.
36 * This object is meant to have an interface consistent with
37 * brw::vec4_builder. They cannot be fully interchangeable because
38 * brw::fs_builder generates scalar code while brw::vec4_builder generates
43 /** Type used in this IR to represent a source of an instruction. */
44 typedef fs_reg src_reg
;
46 /** Type used in this IR to represent the destination of an instruction. */
47 typedef fs_reg dst_reg
;
49 /** Type used in this IR to represent an instruction. */
50 typedef fs_inst instruction
;
53 * Construct an fs_builder that inserts instructions into \p shader.
54 * \p dispatch_width gives the native execution width of the program.
56 fs_builder(backend_shader
*shader
,
57 unsigned dispatch_width
) :
58 shader(shader
), block(NULL
), cursor(NULL
),
59 _dispatch_width(dispatch_width
),
61 force_writemask_all(false),
67 * Construct an fs_builder that inserts instructions before \p cursor in
68 * basic block \p block, inheriting other code generation parameters
72 at(bblock_t
*block
, exec_node
*cursor
) const
74 fs_builder bld
= *this;
81 * Construct an fs_builder appending instructions at the end of the
82 * instruction list of the shader, inheriting other code generation
83 * parameters from this.
88 return at(NULL
, (exec_node
*)&shader
->instructions
.tail
);
92 * Construct a builder specifying the default SIMD width and group of
93 * channel enable signals, inheriting other code generation parameters
96 * \p n gives the default SIMD width, \p i gives the slot group used for
97 * predication and control flow masking in multiples of \p n channels.
100 group(unsigned n
, unsigned i
) const
102 assert(n
<= dispatch_width() &&
103 i
< dispatch_width() / n
);
104 fs_builder bld
= *this;
105 bld
._dispatch_width
= n
;
111 * Alias for group() with width equal to eight.
114 half(unsigned i
) const
120 * Construct a builder with per-channel control flow execution masking
121 * disabled if \p b is true. If control flow execution masking is
122 * already disabled this has no effect.
125 exec_all(bool b
= true) const
127 fs_builder bld
= *this;
129 bld
.force_writemask_all
= true;
134 * Construct a builder with the given debug annotation info.
137 annotate(const char *str
, const void *ir
= NULL
) const
139 fs_builder bld
= *this;
140 bld
.annotation
.str
= str
;
141 bld
.annotation
.ir
= ir
;
146 * Get the SIMD width in use.
149 dispatch_width() const
151 return _dispatch_width
;
155 * Allocate a virtual register of natural vector size (one for this IR)
156 * and SIMD width. \p n gives the amount of space to allocate in
157 * dispatch_width units (which is just enough space for one logical
158 * component in this IR).
161 vgrf(enum brw_reg_type type
, unsigned n
= 1) const
163 return dst_reg(GRF
, shader
->alloc
.allocate(
164 DIV_ROUND_UP(n
* type_sz(type
) * dispatch_width(),
166 type
, dispatch_width());
170 * Create a null register of floating type.
175 return dst_reg(retype(brw_null_vec(dispatch_width()),
176 BRW_REGISTER_TYPE_F
));
180 * Create a null register of signed integer type.
185 return dst_reg(retype(brw_null_vec(dispatch_width()),
186 BRW_REGISTER_TYPE_D
));
190 * Create a null register of unsigned integer type.
195 return dst_reg(retype(brw_null_vec(dispatch_width()),
196 BRW_REGISTER_TYPE_UD
));
200 * Get the mask of SIMD channels enabled by dispatch and not yet
201 * disabled by discard.
204 sample_mask_reg() const
206 const bool uses_kill
=
207 (shader
->stage
== MESA_SHADER_FRAGMENT
&&
208 ((brw_wm_prog_data
*)shader
->stage_prog_data
)->uses_kill
);
209 return (shader
->stage
!= MESA_SHADER_FRAGMENT
? src_reg(0xffff) :
210 uses_kill
? brw_flag_reg(0, 1) :
211 retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD
));
215 * Insert an instruction into the program.
218 emit(const instruction
&inst
) const
220 return emit(new(shader
->mem_ctx
) instruction(inst
));
224 * Create and insert a nullary control instruction into the program.
227 emit(enum opcode opcode
) const
229 return emit(instruction(opcode
, dispatch_width()));
233 * Create and insert a nullary instruction into the program.
236 emit(enum opcode opcode
, const dst_reg
&dst
) const
238 return emit(instruction(opcode
, dst
));
242 * Create and insert a unary instruction into the program.
245 emit(enum opcode opcode
, const dst_reg
&dst
, const src_reg
&src0
) const
248 case SHADER_OPCODE_RCP
:
249 case SHADER_OPCODE_RSQ
:
250 case SHADER_OPCODE_SQRT
:
251 case SHADER_OPCODE_EXP2
:
252 case SHADER_OPCODE_LOG2
:
253 case SHADER_OPCODE_SIN
:
254 case SHADER_OPCODE_COS
:
255 return fix_math_instruction(
256 emit(instruction(opcode
, dst
.width
, dst
,
257 fix_math_operand(src0
))));
260 return emit(instruction(opcode
, dst
.width
, dst
, src0
));
265 * Create and insert a binary instruction into the program.
268 emit(enum opcode opcode
, const dst_reg
&dst
, const src_reg
&src0
,
269 const src_reg
&src1
) const
272 case SHADER_OPCODE_POW
:
273 case SHADER_OPCODE_INT_QUOTIENT
:
274 case SHADER_OPCODE_INT_REMAINDER
:
275 return fix_math_instruction(
276 emit(instruction(opcode
, dst
.width
, dst
,
277 fix_math_operand(src0
),
278 fix_math_operand(src1
))));
281 return emit(instruction(opcode
, dst
.width
, dst
, src0
, src1
));
287 * Create and insert a ternary instruction into the program.
290 emit(enum opcode opcode
, const dst_reg
&dst
, const src_reg
&src0
,
291 const src_reg
&src1
, const src_reg
&src2
) const
295 case BRW_OPCODE_BFI2
:
298 return emit(instruction(opcode
, dst
.width
, dst
,
299 fix_3src_operand(src0
),
300 fix_3src_operand(src1
),
301 fix_3src_operand(src2
)));
304 return emit(instruction(opcode
, dst
.width
, dst
, src0
, src1
, src2
));
309 * Insert a preallocated instruction into the program.
312 emit(instruction
*inst
) const
314 assert(inst
->exec_size
== dispatch_width() ||
315 force_writemask_all
);
316 assert(_group
== 0 || _group
== 8);
318 inst
->force_sechalf
= (_group
== 8);
319 inst
->force_writemask_all
= force_writemask_all
;
320 inst
->annotation
= annotation
.str
;
321 inst
->ir
= annotation
.ir
;
324 static_cast<instruction
*>(cursor
)->insert_before(block
, inst
);
326 cursor
->insert_before(inst
);
332 * Select \p src0 if the comparison of both sources with the given
333 * conditional mod evaluates to true, otherwise select \p src1.
335 * Generally useful to get the minimum or maximum of two values.
338 emit_minmax(const dst_reg
&dst
, const src_reg
&src0
,
339 const src_reg
&src1
, brw_conditional_mod mod
) const
341 if (shader
->devinfo
->gen
>= 6) {
342 set_condmod(mod
, SEL(dst
, fix_unsigned_negate(src0
),
343 fix_unsigned_negate(src1
)));
345 CMP(null_reg_d(), src0
, src1
, mod
);
346 set_predicate(BRW_PREDICATE_NORMAL
,
347 SEL(dst
, src0
, src1
));
352 * Copy any live channel from \p src to the first channel of \p dst.
355 emit_uniformize(const dst_reg
&dst
, const src_reg
&src
) const
357 const fs_builder ubld
= exec_all();
358 const dst_reg chan_index
= vgrf(BRW_REGISTER_TYPE_UD
);
360 ubld
.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL
, component(chan_index
, 0));
361 ubld
.emit(SHADER_OPCODE_BROADCAST
, component(dst
, 0),
362 src
, component(chan_index
, 0));
366 * Assorted arithmetic ops.
371 op(const dst_reg &dst, const src_reg &src0) const \
373 return emit(BRW_OPCODE_##op, dst, src0); \
378 op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
380 return emit(BRW_OPCODE_##op, dst, src0, src1); \
383 #define ALU2_ACC(op) \
385 op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
387 instruction *inst = emit(BRW_OPCODE_##op, dst, src0, src1); \
388 inst->writes_accumulator = true; \
394 op(const dst_reg &dst, const src_reg &src0, const src_reg &src1, \
395 const src_reg &src2) const \
397 return emit(BRW_OPCODE_##op, dst, src0, src1, src2); \
450 * CMP: Sets the low bit of the destination channels with the result
451 * of the comparison, while the upper bits are undefined, and updates
452 * the flag register with the packed 16 bits of the result.
455 CMP(const dst_reg
&dst
, const src_reg
&src0
, const src_reg
&src1
,
456 brw_conditional_mod condition
) const
458 /* Take the instruction:
460 * CMP null<d> src0<f> src1<f>
462 * Original gen4 does type conversion to the destination type
463 * before comparison, producing garbage results for floating
466 * The destination type doesn't matter on newer generations,
467 * so we set the type to match src0 so we can compact the
470 return set_condmod(condition
,
471 emit(BRW_OPCODE_CMP
, retype(dst
, src0
.type
),
472 fix_unsigned_negate(src0
),
473 fix_unsigned_negate(src1
)));
477 * Gen4 predicated IF.
480 IF(brw_predicate predicate
) const
482 return set_predicate(predicate
, emit(BRW_OPCODE_IF
));
486 * Emit a linear interpolation instruction.
489 LRP(const dst_reg
&dst
, const src_reg
&x
, const src_reg
&y
,
490 const src_reg
&a
) const
492 if (shader
->devinfo
->gen
>= 6) {
493 /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
494 * we need to reorder the operands.
496 return emit(BRW_OPCODE_LRP
, dst
, a
, y
, x
);
499 /* We can't use the LRP instruction. Emit x*(1-a) + y*a. */
500 const dst_reg y_times_a
= vgrf(dst
.type
);
501 const dst_reg one_minus_a
= vgrf(dst
.type
);
502 const dst_reg x_times_one_minus_a
= vgrf(dst
.type
);
504 MUL(y_times_a
, y
, a
);
505 ADD(one_minus_a
, negate(a
), src_reg(1.0f
));
506 MUL(x_times_one_minus_a
, x
, src_reg(one_minus_a
));
507 return ADD(dst
, src_reg(x_times_one_minus_a
), src_reg(y_times_a
));
512 * Collect a number of registers in a contiguous range of registers.
515 LOAD_PAYLOAD(const dst_reg
&dst
, const src_reg
*src
,
516 unsigned sources
, unsigned header_size
) const
518 assert(dst
.width
% 8 == 0);
519 instruction
*inst
= emit(instruction(SHADER_OPCODE_LOAD_PAYLOAD
,
520 dst
.width
, dst
, src
, sources
));
521 inst
->header_size
= header_size
;
523 for (unsigned i
= 0; i
< header_size
; i
++)
524 assert(src
[i
].file
!= GRF
||
525 src
[i
].width
* type_sz(src
[i
].type
) == 32);
526 inst
->regs_written
= header_size
;
528 for (unsigned i
= header_size
; i
< sources
; ++i
)
529 assert(src
[i
].file
!= GRF
||
530 src
[i
].width
== dst
.width
);
531 inst
->regs_written
+= (sources
- header_size
) * (dst
.width
/ 8);
536 backend_shader
*shader
;
540 * Workaround for negation of UD registers. See comment in
541 * fs_generator::generate_code() for more details.
544 fix_unsigned_negate(const src_reg
&src
) const
546 if (src
.type
== BRW_REGISTER_TYPE_UD
&&
548 dst_reg temp
= vgrf(BRW_REGISTER_TYPE_UD
);
550 return src_reg(temp
);
557 * Workaround for source register modes not supported by the ternary
558 * instruction encoding.
561 fix_3src_operand(const src_reg
&src
) const
563 if (src
.file
== GRF
|| src
.file
== UNIFORM
|| src
.stride
> 1) {
566 dst_reg expanded
= vgrf(src
.type
);
573 * Workaround for source register modes not supported by the math
577 fix_math_operand(const src_reg
&src
) const
579 /* Can't do hstride == 0 args on gen6 math, so expand it out. We
580 * might be able to do better by doing execsize = 1 math and then
581 * expanding that result out, but we would need to be careful with
584 * Gen6 hardware ignores source modifiers (negate and abs) on math
585 * instructions, so we also move to a temp to set those up.
587 * Gen7 relaxes most of the above restrictions, but still can't use IMM
590 if ((shader
->devinfo
->gen
== 6 &&
591 (src
.file
== IMM
|| src
.file
== UNIFORM
||
592 src
.abs
|| src
.negate
)) ||
593 (shader
->devinfo
->gen
== 7 && src
.file
== IMM
)) {
594 const dst_reg tmp
= vgrf(src
.type
);
603 * Workaround other weirdness of the math instruction.
606 fix_math_instruction(instruction
*inst
) const
608 if (shader
->devinfo
->gen
< 6) {
610 inst
->mlen
= inst
->sources
* dispatch_width() / 8;
612 if (inst
->sources
> 1) {
613 /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13
616 * "Operand0[7]. For the INT DIV functions, this operand is the
619 * "Operand1[7]. For the INT DIV functions, this operand is the
622 const bool is_int_div
= inst
->opcode
!= SHADER_OPCODE_POW
;
623 const fs_reg src0
= is_int_div
? inst
->src
[1] : inst
->src
[0];
624 const fs_reg src1
= is_int_div
? inst
->src
[0] : inst
->src
[1];
626 inst
->resize_sources(1);
629 at(block
, inst
).MOV(fs_reg(MRF
, inst
->base_mrf
+ 1, src1
.type
,
630 dispatch_width()), src1
);
640 unsigned _dispatch_width
;
642 bool force_writemask_all
;
644 /** Debug annotation info. */