cad371248c49112a219d030798c31c33398cc23a
3 * Copyright © 2010-2015 Intel Corporation
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 #include "brw_shader.h"
32 class fs_reg
: public backend_reg
{
34 DECLARE_RALLOC_CXX_OPERATORS(fs_reg
)
39 fs_reg(struct ::brw_reg reg
);
40 fs_reg(enum brw_reg_file file
, int nr
);
41 fs_reg(enum brw_reg_file file
, int nr
, enum brw_reg_type type
);
43 bool equals(const fs_reg
&r
) const;
44 bool is_contiguous() const;
47 * Return the size in bytes of a single logical component of the
48 * register assuming the given execution width.
50 unsigned component_size(unsigned width
) const;
52 /** Register region horizontal stride */
59 assert(reg
.file
!= IMM
);
60 reg
.negate
= !reg
.negate
;
65 retype(fs_reg reg
, enum brw_reg_type type
)
72 byte_offset(fs_reg reg
, unsigned delta
)
83 const unsigned suboffset
= reg
.offset
+ delta
;
84 reg
.nr
+= suboffset
/ REG_SIZE
;
85 reg
.offset
= suboffset
% REG_SIZE
;
90 const unsigned suboffset
= reg
.subnr
+ delta
;
91 reg
.nr
+= suboffset
/ REG_SIZE
;
92 reg
.subnr
= suboffset
% REG_SIZE
;
103 horiz_offset(const fs_reg
®
, unsigned delta
)
109 /* These only have a single component that is implicitly splatted. A
110 * horizontal offset should be a harmless no-op.
111 * XXX - Handle vector immediates correctly.
117 return byte_offset(reg
, delta
* reg
.stride
* type_sz(reg
.type
));
123 const unsigned stride
= reg
.hstride
? 1 << (reg
.hstride
- 1) : 0;
124 return byte_offset(reg
, delta
* stride
* type_sz(reg
.type
));
127 unreachable("Invalid register file");
131 offset(fs_reg reg
, unsigned width
, unsigned delta
)
142 return byte_offset(reg
, delta
* reg
.component_size(width
));
150 * Get the scalar channel of \p reg given by \p idx and replicate it to all
151 * channels of the result.
154 component(fs_reg reg
, unsigned idx
)
156 reg
= horiz_offset(reg
, idx
);
162 * Return an integer identifying the discrete address space a register is
163 * contained in. A register is by definition fully contained in the single
164 * reg_space it belongs to, so two registers with different reg_space ids are
165 * guaranteed not to overlap. Most register files are a single reg_space of
166 * its own, only the VGRF file is composed of multiple discrete address
167 * spaces, one for each VGRF allocation.
169 static inline uint32_t
170 reg_space(const fs_reg
&r
)
172 return r
.file
<< 16 | (r
.file
== VGRF
? r
.nr
: 0);
176 * Return the base offset in bytes of a register relative to the start of its
179 static inline unsigned
180 reg_offset(const fs_reg
&r
)
182 return (r
.file
== VGRF
|| r
.file
== IMM
? 0 : r
.nr
) *
183 (r
.file
== UNIFORM
? 4 : REG_SIZE
) + r
.offset
+
184 (r
.file
== ARF
|| r
.file
== FIXED_GRF
? r
.subnr
: 0);
188 * Return the amount of padding in bytes left unused between individual
189 * components of register \p r due to a (horizontal) stride value greater than
190 * one, or zero if components are tightly packed in the register file.
192 static inline unsigned
193 reg_padding(const fs_reg
&r
)
195 const unsigned stride
= ((r
.file
!= ARF
&& r
.file
!= FIXED_GRF
) ? r
.stride
:
197 1 << (r
.hstride
- 1));
198 return (MAX2(1, stride
) - 1) * type_sz(r
.type
);
202 * Return whether the register region starting at \p r and spanning \p dr
203 * bytes could potentially overlap the register region starting at \p s and
204 * spanning \p ds bytes.
207 regions_overlap(const fs_reg
&r
, unsigned dr
, const fs_reg
&s
, unsigned ds
)
209 if (r
.file
== MRF
&& (r
.nr
& BRW_MRF_COMPR4
)) {
211 t
.nr
&= ~BRW_MRF_COMPR4
;
212 /* COMPR4 regions are translated by the hardware during decompression
213 * into two separate half-regions 4 MRFs apart from each other.
215 return regions_overlap(t
, dr
/ 2, s
, ds
) ||
216 regions_overlap(byte_offset(t
, 4 * REG_SIZE
), dr
/ 2, s
, ds
);
218 } else if (s
.file
== MRF
&& (s
.nr
& BRW_MRF_COMPR4
)) {
219 return regions_overlap(s
, ds
, r
, dr
);
222 return reg_space(r
) == reg_space(s
) &&
223 !(reg_offset(r
) + dr
<= reg_offset(s
) ||
224 reg_offset(s
) + ds
<= reg_offset(r
));
229 * Check that the register region given by r [r.offset, r.offset + dr[
230 * is fully contained inside the register region given by s
231 * [s.offset, s.offset + ds[.
234 region_contained_in(const fs_reg
&r
, unsigned dr
, const fs_reg
&s
, unsigned ds
)
236 return reg_space(r
) == reg_space(s
) &&
237 reg_offset(r
) >= reg_offset(s
) &&
238 reg_offset(r
) + dr
<= reg_offset(s
) + ds
;
242 * Return whether the given register region is n-periodic, i.e. whether the
243 * original region remains invariant after shifting it by \p n scalar
247 is_periodic(const fs_reg
®
, unsigned n
)
249 if (reg
.file
== BAD_FILE
|| reg
.is_null()) {
252 } else if (reg
.file
== IMM
) {
253 const unsigned period
= (reg
.type
== BRW_REGISTER_TYPE_UV
||
254 reg
.type
== BRW_REGISTER_TYPE_V
? 8 :
255 reg
.type
== BRW_REGISTER_TYPE_VF
? 4 :
257 return n
% period
== 0;
259 } else if (reg
.file
== ARF
|| reg
.file
== FIXED_GRF
) {
260 const unsigned period
= (reg
.hstride
== 0 && reg
.vstride
== 0 ? 1 :
261 reg
.vstride
== 0 ? 1 << reg
.width
:
263 return n
% period
== 0;
266 return reg
.stride
== 0;
271 is_uniform(const fs_reg
®
)
273 return is_periodic(reg
, 1);
277 * Get the specified 8-component quarter of a register.
278 * XXX - Maybe come up with a less misleading name for this (e.g. quarter())?
281 half(const fs_reg
®
, unsigned idx
)
284 return horiz_offset(reg
, 8 * idx
);
288 * Reinterpret each channel of register \p reg as a vector of values of the
289 * given smaller type and take the i-th subcomponent from each.
292 subscript(fs_reg reg
, brw_reg_type type
, unsigned i
)
294 assert((i
+ 1) * type_sz(type
) <= type_sz(reg
.type
));
296 if (reg
.file
== ARF
|| reg
.file
== FIXED_GRF
) {
297 /* The stride is encoded inconsistently for fixed GRF and ARF registers
298 * as the log2 of the actual vertical and horizontal strides.
300 const int delta
= _mesa_logbase2(type_sz(reg
.type
)) -
301 _mesa_logbase2(type_sz(type
));
302 reg
.hstride
+= (reg
.hstride
? delta
: 0);
303 reg
.vstride
+= (reg
.vstride
? delta
: 0);
305 } else if (reg
.file
== IMM
) {
306 assert(reg
.type
== type
);
309 reg
.stride
*= type_sz(reg
.type
) / type_sz(type
);
312 return byte_offset(retype(reg
, type
), i
* type_sz(type
));
315 static const fs_reg reg_undef
;
317 class fs_inst
: public backend_instruction
{
318 fs_inst
&operator=(const fs_inst
&);
320 void init(enum opcode opcode
, uint8_t exec_width
, const fs_reg
&dst
,
321 const fs_reg
*src
, unsigned sources
);
324 DECLARE_RALLOC_CXX_OPERATORS(fs_inst
)
327 fs_inst(enum opcode opcode
, uint8_t exec_size
);
328 fs_inst(enum opcode opcode
, uint8_t exec_size
, const fs_reg
&dst
);
329 fs_inst(enum opcode opcode
, uint8_t exec_size
, const fs_reg
&dst
,
331 fs_inst(enum opcode opcode
, uint8_t exec_size
, const fs_reg
&dst
,
332 const fs_reg
&src0
, const fs_reg
&src1
);
333 fs_inst(enum opcode opcode
, uint8_t exec_size
, const fs_reg
&dst
,
334 const fs_reg
&src0
, const fs_reg
&src1
, const fs_reg
&src2
);
335 fs_inst(enum opcode opcode
, uint8_t exec_size
, const fs_reg
&dst
,
336 const fs_reg src
[], unsigned sources
);
337 fs_inst(const fs_inst
&that
);
340 void resize_sources(uint8_t num_sources
);
342 bool equals(fs_inst
*inst
) const;
343 bool is_send_from_grf() const;
344 bool is_partial_write() const;
345 bool is_copy_payload(const brw::simple_allocator
&grf_alloc
) const;
346 unsigned components_read(unsigned i
) const;
347 unsigned size_read(int arg
) const;
348 bool can_do_source_mods(const struct gen_device_info
*devinfo
);
349 bool can_change_types() const;
350 bool has_side_effects() const;
351 bool has_source_and_destination_hazard() const;
354 * Return the subset of flag registers read by the instruction as a bitset
355 * with byte granularity.
357 unsigned flags_read(const gen_device_info
*devinfo
) const;
360 * Return the subset of flag registers updated by the instruction (either
361 * partially or fully) as a bitset with byte granularity.
363 unsigned flags_written() const;
368 uint8_t sources
; /**< Number of fs_reg sources. */
371 bool pi_noperspective
:1; /**< Pixel interpolator noperspective flag */
375 * Make the execution of \p inst dependent on the evaluation of a possibly
376 * inverted predicate.
378 static inline fs_inst
*
379 set_predicate_inv(enum brw_predicate pred
, bool inverse
,
382 inst
->predicate
= pred
;
383 inst
->predicate_inverse
= inverse
;
388 * Make the execution of \p inst dependent on the evaluation of a predicate.
390 static inline fs_inst
*
391 set_predicate(enum brw_predicate pred
, fs_inst
*inst
)
393 return set_predicate_inv(pred
, false, inst
);
397 * Write the result of evaluating the condition given by \p mod to a flag
400 static inline fs_inst
*
401 set_condmod(enum brw_conditional_mod mod
, fs_inst
*inst
)
403 inst
->conditional_mod
= mod
;
408 * Clamp the result of \p inst to the saturation range of its destination
411 static inline fs_inst
*
412 set_saturate(bool saturate
, fs_inst
*inst
)
414 inst
->saturate
= saturate
;
419 * Return the number of dataflow registers written by the instruction (either
420 * fully or partially) counted from 'floor(reg_offset(inst->dst) /
421 * register_size)'. The somewhat arbitrary register size unit is 4B for the
422 * UNIFORM and IMM files and 32B for all other files.
425 regs_written(const fs_inst
*inst
)
427 assert(inst
->dst
.file
!= UNIFORM
&& inst
->dst
.file
!= IMM
);
428 return DIV_ROUND_UP(reg_offset(inst
->dst
) % REG_SIZE
+
430 MIN2(inst
->size_written
, reg_padding(inst
->dst
)),
435 * Return the number of dataflow registers read by the instruction (either
436 * fully or partially) counted from 'floor(reg_offset(inst->src[i]) /
437 * register_size)'. The somewhat arbitrary register size unit is 4B for the
438 * UNIFORM and IMM files and 32B for all other files.
441 regs_read(const fs_inst
*inst
, unsigned i
)
443 const unsigned reg_size
=
444 inst
->src
[i
].file
== UNIFORM
|| inst
->src
[i
].file
== IMM
? 4 : REG_SIZE
;
445 return DIV_ROUND_UP(reg_offset(inst
->src
[i
]) % reg_size
+
447 MIN2(inst
->size_read(i
), reg_padding(inst
->src
[i
])),