3 * Copyright © 2010-2015 Intel Corporation
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 #include "brw_shader.h"
32 class fs_reg
: public backend_reg
{
34 DECLARE_RALLOC_CXX_OPERATORS(fs_reg
)
39 fs_reg(struct ::brw_reg reg
);
40 fs_reg(enum brw_reg_file file
, int nr
);
41 fs_reg(enum brw_reg_file file
, int nr
, enum brw_reg_type type
);
43 bool equals(const fs_reg
&r
) const;
44 bool negative_equals(const fs_reg
&r
) const;
45 bool is_contiguous() const;
48 * Return the size in bytes of a single logical component of the
49 * register assuming the given execution width.
51 unsigned component_size(unsigned width
) const;
53 /** Register region horizontal stride */
60 assert(reg
.file
!= IMM
);
61 reg
.negate
= !reg
.negate
;
66 retype(fs_reg reg
, enum brw_reg_type type
)
73 byte_offset(fs_reg reg
, unsigned delta
)
84 const unsigned suboffset
= reg
.offset
+ delta
;
85 reg
.nr
+= suboffset
/ REG_SIZE
;
86 reg
.offset
= suboffset
% REG_SIZE
;
91 const unsigned suboffset
= reg
.subnr
+ delta
;
92 reg
.nr
+= suboffset
/ REG_SIZE
;
93 reg
.subnr
= suboffset
% REG_SIZE
;
104 horiz_offset(const fs_reg
®
, unsigned delta
)
110 /* These only have a single component that is implicitly splatted. A
111 * horizontal offset should be a harmless no-op.
112 * XXX - Handle vector immediates correctly.
118 return byte_offset(reg
, delta
* reg
.stride
* type_sz(reg
.type
));
124 const unsigned stride
= reg
.hstride
? 1 << (reg
.hstride
- 1) : 0;
125 return byte_offset(reg
, delta
* stride
* type_sz(reg
.type
));
128 unreachable("Invalid register file");
132 offset(fs_reg reg
, unsigned width
, unsigned delta
)
143 return byte_offset(reg
, delta
* reg
.component_size(width
));
151 * Get the scalar channel of \p reg given by \p idx and replicate it to all
152 * channels of the result.
155 component(fs_reg reg
, unsigned idx
)
157 reg
= horiz_offset(reg
, idx
);
163 * Return an integer identifying the discrete address space a register is
164 * contained in. A register is by definition fully contained in the single
165 * reg_space it belongs to, so two registers with different reg_space ids are
166 * guaranteed not to overlap. Most register files are a single reg_space of
167 * its own, only the VGRF file is composed of multiple discrete address
168 * spaces, one for each VGRF allocation.
170 static inline uint32_t
171 reg_space(const fs_reg
&r
)
173 return r
.file
<< 16 | (r
.file
== VGRF
? r
.nr
: 0);
177 * Return the base offset in bytes of a register relative to the start of its
180 static inline unsigned
181 reg_offset(const fs_reg
&r
)
183 return (r
.file
== VGRF
|| r
.file
== IMM
? 0 : r
.nr
) *
184 (r
.file
== UNIFORM
? 4 : REG_SIZE
) + r
.offset
+
185 (r
.file
== ARF
|| r
.file
== FIXED_GRF
? r
.subnr
: 0);
189 * Return the amount of padding in bytes left unused between individual
190 * components of register \p r due to a (horizontal) stride value greater than
191 * one, or zero if components are tightly packed in the register file.
193 static inline unsigned
194 reg_padding(const fs_reg
&r
)
196 const unsigned stride
= ((r
.file
!= ARF
&& r
.file
!= FIXED_GRF
) ? r
.stride
:
198 1 << (r
.hstride
- 1));
199 return (MAX2(1, stride
) - 1) * type_sz(r
.type
);
203 * Return whether the register region starting at \p r and spanning \p dr
204 * bytes could potentially overlap the register region starting at \p s and
205 * spanning \p ds bytes.
208 regions_overlap(const fs_reg
&r
, unsigned dr
, const fs_reg
&s
, unsigned ds
)
210 if (r
.file
== MRF
&& (r
.nr
& BRW_MRF_COMPR4
)) {
212 t
.nr
&= ~BRW_MRF_COMPR4
;
213 /* COMPR4 regions are translated by the hardware during decompression
214 * into two separate half-regions 4 MRFs apart from each other.
216 return regions_overlap(t
, dr
/ 2, s
, ds
) ||
217 regions_overlap(byte_offset(t
, 4 * REG_SIZE
), dr
/ 2, s
, ds
);
219 } else if (s
.file
== MRF
&& (s
.nr
& BRW_MRF_COMPR4
)) {
220 return regions_overlap(s
, ds
, r
, dr
);
223 return reg_space(r
) == reg_space(s
) &&
224 !(reg_offset(r
) + dr
<= reg_offset(s
) ||
225 reg_offset(s
) + ds
<= reg_offset(r
));
230 * Check that the register region given by r [r.offset, r.offset + dr[
231 * is fully contained inside the register region given by s
232 * [s.offset, s.offset + ds[.
235 region_contained_in(const fs_reg
&r
, unsigned dr
, const fs_reg
&s
, unsigned ds
)
237 return reg_space(r
) == reg_space(s
) &&
238 reg_offset(r
) >= reg_offset(s
) &&
239 reg_offset(r
) + dr
<= reg_offset(s
) + ds
;
243 * Return whether the given register region is n-periodic, i.e. whether the
244 * original region remains invariant after shifting it by \p n scalar
248 is_periodic(const fs_reg
®
, unsigned n
)
250 if (reg
.file
== BAD_FILE
|| reg
.is_null()) {
253 } else if (reg
.file
== IMM
) {
254 const unsigned period
= (reg
.type
== BRW_REGISTER_TYPE_UV
||
255 reg
.type
== BRW_REGISTER_TYPE_V
? 8 :
256 reg
.type
== BRW_REGISTER_TYPE_VF
? 4 :
258 return n
% period
== 0;
260 } else if (reg
.file
== ARF
|| reg
.file
== FIXED_GRF
) {
261 const unsigned period
= (reg
.hstride
== 0 && reg
.vstride
== 0 ? 1 :
262 reg
.vstride
== 0 ? 1 << reg
.width
:
264 return n
% period
== 0;
267 return reg
.stride
== 0;
272 is_uniform(const fs_reg
®
)
274 return is_periodic(reg
, 1);
278 * Get the specified 8-component quarter of a register.
279 * XXX - Maybe come up with a less misleading name for this (e.g. quarter())?
282 half(const fs_reg
®
, unsigned idx
)
285 return horiz_offset(reg
, 8 * idx
);
289 * Reinterpret each channel of register \p reg as a vector of values of the
290 * given smaller type and take the i-th subcomponent from each.
293 subscript(fs_reg reg
, brw_reg_type type
, unsigned i
)
295 assert((i
+ 1) * type_sz(type
) <= type_sz(reg
.type
));
297 if (reg
.file
== ARF
|| reg
.file
== FIXED_GRF
) {
298 /* The stride is encoded inconsistently for fixed GRF and ARF registers
299 * as the log2 of the actual vertical and horizontal strides.
301 const int delta
= _mesa_logbase2(type_sz(reg
.type
)) -
302 _mesa_logbase2(type_sz(type
));
303 reg
.hstride
+= (reg
.hstride
? delta
: 0);
304 reg
.vstride
+= (reg
.vstride
? delta
: 0);
306 } else if (reg
.file
== IMM
) {
307 assert(reg
.type
== type
);
310 reg
.stride
*= type_sz(reg
.type
) / type_sz(type
);
313 return byte_offset(retype(reg
, type
), i
* type_sz(type
));
317 horiz_stride(fs_reg reg
, unsigned s
)
323 static const fs_reg reg_undef
;
325 class fs_inst
: public backend_instruction
{
326 fs_inst
&operator=(const fs_inst
&);
328 void init(enum opcode opcode
, uint8_t exec_width
, const fs_reg
&dst
,
329 const fs_reg
*src
, unsigned sources
);
332 DECLARE_RALLOC_CXX_OPERATORS(fs_inst
)
335 fs_inst(enum opcode opcode
, uint8_t exec_size
);
336 fs_inst(enum opcode opcode
, uint8_t exec_size
, const fs_reg
&dst
);
337 fs_inst(enum opcode opcode
, uint8_t exec_size
, const fs_reg
&dst
,
339 fs_inst(enum opcode opcode
, uint8_t exec_size
, const fs_reg
&dst
,
340 const fs_reg
&src0
, const fs_reg
&src1
);
341 fs_inst(enum opcode opcode
, uint8_t exec_size
, const fs_reg
&dst
,
342 const fs_reg
&src0
, const fs_reg
&src1
, const fs_reg
&src2
);
343 fs_inst(enum opcode opcode
, uint8_t exec_size
, const fs_reg
&dst
,
344 const fs_reg src
[], unsigned sources
);
345 fs_inst(const fs_inst
&that
);
348 void resize_sources(uint8_t num_sources
);
350 bool equals(fs_inst
*inst
) const;
351 bool is_send_from_grf() const;
352 bool is_partial_write() const;
353 bool is_copy_payload(const brw::simple_allocator
&grf_alloc
) const;
354 unsigned components_read(unsigned i
) const;
355 unsigned size_read(int arg
) const;
356 bool can_do_source_mods(const struct gen_device_info
*devinfo
) const;
358 bool can_change_types() const;
359 bool has_source_and_destination_hazard() const;
362 * Return the subset of flag registers read by the instruction as a bitset
363 * with byte granularity.
365 unsigned flags_read(const gen_device_info
*devinfo
) const;
368 * Return the subset of flag registers updated by the instruction (either
369 * partially or fully) as a bitset with byte granularity.
371 unsigned flags_written() const;
376 uint8_t sources
; /**< Number of fs_reg sources. */
379 bool pi_noperspective
:1; /**< Pixel interpolator noperspective flag */
383 * Make the execution of \p inst dependent on the evaluation of a possibly
384 * inverted predicate.
386 static inline fs_inst
*
387 set_predicate_inv(enum brw_predicate pred
, bool inverse
,
390 inst
->predicate
= pred
;
391 inst
->predicate_inverse
= inverse
;
396 * Make the execution of \p inst dependent on the evaluation of a predicate.
398 static inline fs_inst
*
399 set_predicate(enum brw_predicate pred
, fs_inst
*inst
)
401 return set_predicate_inv(pred
, false, inst
);
405 * Write the result of evaluating the condition given by \p mod to a flag
408 static inline fs_inst
*
409 set_condmod(enum brw_conditional_mod mod
, fs_inst
*inst
)
411 inst
->conditional_mod
= mod
;
416 * Clamp the result of \p inst to the saturation range of its destination
419 static inline fs_inst
*
420 set_saturate(bool saturate
, fs_inst
*inst
)
422 inst
->saturate
= saturate
;
427 * Return the number of dataflow registers written by the instruction (either
428 * fully or partially) counted from 'floor(reg_offset(inst->dst) /
429 * register_size)'. The somewhat arbitrary register size unit is 4B for the
430 * UNIFORM and IMM files and 32B for all other files.
433 regs_written(const fs_inst
*inst
)
435 assert(inst
->dst
.file
!= UNIFORM
&& inst
->dst
.file
!= IMM
);
436 return DIV_ROUND_UP(reg_offset(inst
->dst
) % REG_SIZE
+
438 MIN2(inst
->size_written
, reg_padding(inst
->dst
)),
443 * Return the number of dataflow registers read by the instruction (either
444 * fully or partially) counted from 'floor(reg_offset(inst->src[i]) /
445 * register_size)'. The somewhat arbitrary register size unit is 4B for the
446 * UNIFORM and IMM files and 32B for all other files.
449 regs_read(const fs_inst
*inst
, unsigned i
)
451 const unsigned reg_size
=
452 inst
->src
[i
].file
== UNIFORM
|| inst
->src
[i
].file
== IMM
? 4 : REG_SIZE
;
453 return DIV_ROUND_UP(reg_offset(inst
->src
[i
]) % reg_size
+
455 MIN2(inst
->size_read(i
), reg_padding(inst
->src
[i
])),
459 static inline enum brw_reg_type
460 get_exec_type(const fs_inst
*inst
)
462 brw_reg_type exec_type
= BRW_REGISTER_TYPE_B
;
464 for (int i
= 0; i
< inst
->sources
; i
++) {
465 if (inst
->src
[i
].file
!= BAD_FILE
) {
466 const brw_reg_type t
= get_exec_type(inst
->src
[i
].type
);
467 if (type_sz(t
) > type_sz(exec_type
))
469 else if (type_sz(t
) == type_sz(exec_type
) &&
470 brw_reg_type_is_floating_point(t
))
475 if (exec_type
== BRW_REGISTER_TYPE_B
)
476 exec_type
= inst
->dst
.type
;
478 assert(exec_type
!= BRW_REGISTER_TYPE_B
);
480 /* Promotion of the execution type to 32-bit for conversions from or to
481 * half-float seems to be consistent with the following text from the
482 * Cherryview PRM Vol. 7, "Execution Data Type":
484 * "When single precision and half precision floats are mixed between
485 * source operands or between source and destination operand [..] single
486 * precision float is the execution datatype."
488 * and from "Register Region Restrictions":
490 * "Conversion between Integer and HF (Half Float) must be DWord aligned
491 * and strided by a DWord on the destination."
493 if (type_sz(exec_type
) == 2 &&
494 inst
->dst
.type
!= exec_type
) {
495 if (exec_type
== BRW_REGISTER_TYPE_HF
)
496 exec_type
= BRW_REGISTER_TYPE_F
;
497 else if (inst
->dst
.type
== BRW_REGISTER_TYPE_HF
)
498 exec_type
= BRW_REGISTER_TYPE_D
;
504 static inline unsigned
505 get_exec_type_size(const fs_inst
*inst
)
507 return type_sz(get_exec_type(inst
));
511 * Return whether the instruction isn't an ALU instruction and cannot be
512 * assumed to complete in-order.
515 is_unordered(const fs_inst
*inst
)
517 return inst
->mlen
|| inst
->is_send_from_grf() || inst
->is_math();
521 * Return whether the following regioning restriction applies to the specified
522 * instruction. From the Cherryview PRM Vol 7. "Register Region
525 * "When source or destination datatype is 64b or operation is integer DWord
526 * multiply, regioning in Align1 must follow these rules:
528 * 1. Source and Destination horizontal stride must be aligned to the same qword.
529 * 2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride.
530 * 3. Source and Destination offset must be the same, except the case of
534 has_dst_aligned_region_restriction(const gen_device_info
*devinfo
,
537 const brw_reg_type exec_type
= get_exec_type(inst
);
538 const bool is_int_multiply
= !brw_reg_type_is_floating_point(exec_type
) &&
539 (inst
->opcode
== BRW_OPCODE_MUL
|| inst
->opcode
== BRW_OPCODE_MAD
);
541 if (type_sz(inst
->dst
.type
) > 4 || type_sz(exec_type
) > 4 ||
542 (type_sz(exec_type
) == 4 && is_int_multiply
))
543 return devinfo
->is_cherryview
|| gen_device_info_is_9lp(devinfo
);