cae41a02b8c36f67886724cb8aa89ef3bdd1e17f
3 * Copyright © 2010-2015 Intel Corporation
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 #include "brw_shader.h"
32 class fs_reg
: public backend_reg
{
34 DECLARE_RALLOC_CXX_OPERATORS(fs_reg
)
39 fs_reg(struct ::brw_reg reg
);
40 fs_reg(enum brw_reg_file file
, int nr
);
41 fs_reg(enum brw_reg_file file
, int nr
, enum brw_reg_type type
);
43 bool equals(const fs_reg
&r
) const;
44 bool is_contiguous() const;
47 * Return the size in bytes of a single logical component of the
48 * register assuming the given execution width.
50 unsigned component_size(unsigned width
) const;
52 /** Smear a channel of the reg to all channels. */
53 fs_reg
&set_smear(unsigned subreg
);
56 * Offset in bytes from the start of the register. Values up to a
57 * backend_reg::reg_offset unit are valid.
61 /** Register region horizontal stride */
68 assert(reg
.file
!= IMM
);
69 reg
.negate
= !reg
.negate
;
74 retype(fs_reg reg
, enum brw_reg_type type
)
81 byte_offset(fs_reg reg
, unsigned delta
)
89 const unsigned reg_size
= (reg
.file
== UNIFORM
? 4 : REG_SIZE
);
90 const unsigned suboffset
= reg
.subreg_offset
+ delta
;
91 reg
.reg_offset
+= suboffset
/ reg_size
;
92 reg
.subreg_offset
= suboffset
% reg_size
;
96 const unsigned suboffset
= reg
.subreg_offset
+ delta
;
97 reg
.nr
+= suboffset
/ REG_SIZE
;
98 reg
.subreg_offset
= suboffset
% REG_SIZE
;
103 const unsigned suboffset
= reg
.subnr
+ delta
;
104 reg
.nr
+= suboffset
/ REG_SIZE
;
105 reg
.subnr
= suboffset
% REG_SIZE
;
116 horiz_offset(fs_reg reg
, unsigned delta
)
122 /* These only have a single component that is implicitly splatted. A
123 * horizontal offset should be a harmless no-op.
129 return byte_offset(reg
, delta
* reg
.stride
* type_sz(reg
.type
));
138 * Get the scalar channel of \p reg given by \p idx and replicate it to all
139 * channels of the result.
142 component(fs_reg reg
, unsigned idx
)
144 reg
= horiz_offset(reg
, idx
);
150 * Return whether the given register region is n-periodic, i.e. whether the
151 * original region remains invariant after shifting it by \p n scalar
155 is_periodic(const fs_reg
®
, unsigned n
)
157 if (reg
.file
== BAD_FILE
|| reg
.is_null()) {
160 } else if (reg
.file
== IMM
) {
161 const unsigned period
= (reg
.type
== BRW_REGISTER_TYPE_UV
||
162 reg
.type
== BRW_REGISTER_TYPE_V
? 8 :
163 reg
.type
== BRW_REGISTER_TYPE_VF
? 4 :
165 return n
% period
== 0;
167 } else if (reg
.file
== ARF
|| reg
.file
== FIXED_GRF
) {
168 const unsigned period
= (reg
.hstride
== 0 && reg
.vstride
== 0 ? 1 :
169 reg
.vstride
== 0 ? 1 << reg
.width
:
171 return n
% period
== 0;
174 return reg
.stride
== 0;
179 is_uniform(const fs_reg
®
)
181 return is_periodic(reg
, 1);
185 * Get either of the 8-component halves of a 16-component register.
187 * Note: this also works if \c reg represents a SIMD16 pair of registers.
190 half(fs_reg reg
, unsigned idx
)
202 return horiz_offset(reg
, 8 * idx
);
207 unreachable("Cannot take half of this register type");
213 * Reinterpret each channel of register \p reg as a vector of values of the
214 * given smaller type and take the i-th subcomponent from each.
217 subscript(fs_reg reg
, brw_reg_type type
, unsigned i
)
219 assert((i
+ 1) * type_sz(type
) <= type_sz(reg
.type
));
221 if (reg
.file
== ARF
|| reg
.file
== FIXED_GRF
) {
222 /* The stride is encoded inconsistently for fixed GRF and ARF registers
223 * as the log2 of the actual vertical and horizontal strides.
225 const int delta
= _mesa_logbase2(type_sz(reg
.type
)) -
226 _mesa_logbase2(type_sz(type
));
227 reg
.hstride
+= (reg
.hstride
? delta
: 0);
228 reg
.vstride
+= (reg
.vstride
? delta
: 0);
230 } else if (reg
.file
== IMM
) {
231 assert(reg
.type
== type
);
234 reg
.stride
*= type_sz(reg
.type
) / type_sz(type
);
237 return byte_offset(retype(reg
, type
), i
* type_sz(type
));
240 static const fs_reg reg_undef
;
242 class fs_inst
: public backend_instruction
{
243 fs_inst
&operator=(const fs_inst
&);
245 void init(enum opcode opcode
, uint8_t exec_width
, const fs_reg
&dst
,
246 const fs_reg
*src
, unsigned sources
);
249 DECLARE_RALLOC_CXX_OPERATORS(fs_inst
)
252 fs_inst(enum opcode opcode
, uint8_t exec_size
);
253 fs_inst(enum opcode opcode
, uint8_t exec_size
, const fs_reg
&dst
);
254 fs_inst(enum opcode opcode
, uint8_t exec_size
, const fs_reg
&dst
,
256 fs_inst(enum opcode opcode
, uint8_t exec_size
, const fs_reg
&dst
,
257 const fs_reg
&src0
, const fs_reg
&src1
);
258 fs_inst(enum opcode opcode
, uint8_t exec_size
, const fs_reg
&dst
,
259 const fs_reg
&src0
, const fs_reg
&src1
, const fs_reg
&src2
);
260 fs_inst(enum opcode opcode
, uint8_t exec_size
, const fs_reg
&dst
,
261 const fs_reg src
[], unsigned sources
);
262 fs_inst(const fs_inst
&that
);
265 void resize_sources(uint8_t num_sources
);
267 bool equals(fs_inst
*inst
) const;
268 bool overwrites_reg(const fs_reg
®
) const;
269 bool is_send_from_grf() const;
270 bool is_partial_write() const;
271 bool is_copy_payload(const brw::simple_allocator
&grf_alloc
) const;
272 unsigned components_read(unsigned i
) const;
273 int regs_read(int arg
) const;
274 bool can_do_source_mods(const struct brw_device_info
*devinfo
);
275 bool can_change_types() const;
276 bool has_side_effects() const;
277 bool has_source_and_destination_hazard() const;
280 * Return the subset of flag registers read by the instruction as a bitset
281 * with byte granularity.
283 unsigned flags_read(const brw_device_info
*devinfo
) const;
286 * Return the subset of flag registers updated by the instruction (either
287 * partially or fully) as a bitset with byte granularity.
289 unsigned flags_written() const;
294 uint8_t sources
; /**< Number of fs_reg sources. */
297 * Execution size of the instruction. This is used by the generator to
298 * generate the correct binary for the given fs_inst. Current valid
299 * values are 1, 8, 16.
304 * Channel group from the hardware execution and predication mask that
305 * should be applied to the instruction. The subset of channel enable
306 * signals (calculated from the EU control flow and predication state)
307 * given by [group, group + exec_size) will be used to mask GRF writes and
308 * any other side effects of the instruction.
313 bool pi_noperspective
:1; /**< Pixel interpolator noperspective flag */
317 * Make the execution of \p inst dependent on the evaluation of a possibly
318 * inverted predicate.
320 static inline fs_inst
*
321 set_predicate_inv(enum brw_predicate pred
, bool inverse
,
324 inst
->predicate
= pred
;
325 inst
->predicate_inverse
= inverse
;
330 * Make the execution of \p inst dependent on the evaluation of a predicate.
332 static inline fs_inst
*
333 set_predicate(enum brw_predicate pred
, fs_inst
*inst
)
335 return set_predicate_inv(pred
, false, inst
);
339 * Write the result of evaluating the condition given by \p mod to a flag
342 static inline fs_inst
*
343 set_condmod(enum brw_conditional_mod mod
, fs_inst
*inst
)
345 inst
->conditional_mod
= mod
;
350 * Clamp the result of \p inst to the saturation range of its destination
353 static inline fs_inst
*
354 set_saturate(bool saturate
, fs_inst
*inst
)
356 inst
->saturate
= saturate
;