3 * Copyright © 2011-2015 Intel Corporation
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 #include "brw_shader.h"
34 class src_reg
: public backend_reg
37 DECLARE_RALLOC_CXX_OPERATORS(src_reg
)
41 src_reg(enum brw_reg_file file
, int nr
, const glsl_type
*type
);
43 src_reg(struct ::brw_reg reg
);
45 bool equals(const src_reg
&r
) const;
46 bool negative_equals(const src_reg
&r
) const;
48 src_reg(class vec4_visitor
*v
, const struct glsl_type
*type
);
49 src_reg(class vec4_visitor
*v
, const struct glsl_type
*type
, int size
);
51 explicit src_reg(const dst_reg
®
);
57 retype(src_reg reg
, enum brw_reg_type type
)
66 add_byte_offset(backend_reg
*reg
, unsigned bytes
)
75 assert(reg
->offset
% 16 == 0);
78 const unsigned suboffset
= reg
->offset
+ bytes
;
79 reg
->nr
+= suboffset
/ REG_SIZE
;
80 reg
->offset
= suboffset
% REG_SIZE
;
81 assert(reg
->offset
% 16 == 0);
86 const unsigned suboffset
= reg
->subnr
+ bytes
;
87 reg
->nr
+= suboffset
/ REG_SIZE
;
88 reg
->subnr
= suboffset
% REG_SIZE
;
89 assert(reg
->subnr
% 16 == 0);
97 } /* namepace detail */
100 byte_offset(src_reg reg
, unsigned bytes
)
102 detail::add_byte_offset(®
, bytes
);
106 static inline src_reg
107 offset(src_reg reg
, unsigned width
, unsigned delta
)
109 const unsigned stride
= (reg
.file
== UNIFORM
? 0 : 4);
110 const unsigned num_components
= MAX2(width
/ 4 * stride
, 4);
111 return byte_offset(reg
, num_components
* type_sz(reg
.type
) * delta
);
114 static inline src_reg
115 horiz_offset(src_reg reg
, unsigned delta
)
117 return byte_offset(reg
, delta
* type_sz(reg
.type
));
121 * Reswizzle a given source register.
124 static inline src_reg
125 swizzle(src_reg reg
, unsigned swizzle
)
128 reg
.ud
= brw_swizzle_immediate(reg
.type
, reg
.ud
, swizzle
);
130 reg
.swizzle
= brw_compose_swizzle(swizzle
, reg
.swizzle
);
135 static inline src_reg
138 assert(reg
.file
!= IMM
);
139 reg
.negate
= !reg
.negate
;
144 is_uniform(const src_reg
®
)
146 return (reg
.file
== IMM
|| reg
.file
== UNIFORM
|| reg
.is_null()) &&
147 (!reg
.reladdr
|| is_uniform(*reg
.reladdr
));
150 class dst_reg
: public backend_reg
153 DECLARE_RALLOC_CXX_OPERATORS(dst_reg
)
158 dst_reg(enum brw_reg_file file
, int nr
);
159 dst_reg(enum brw_reg_file file
, int nr
, const glsl_type
*type
,
161 dst_reg(enum brw_reg_file file
, int nr
, brw_reg_type type
,
163 dst_reg(struct ::brw_reg reg
);
164 dst_reg(class vec4_visitor
*v
, const struct glsl_type
*type
);
166 explicit dst_reg(const src_reg
®
);
168 bool equals(const dst_reg
&r
) const;
173 static inline dst_reg
174 retype(dst_reg reg
, enum brw_reg_type type
)
180 static inline dst_reg
181 byte_offset(dst_reg reg
, unsigned bytes
)
183 detail::add_byte_offset(®
, bytes
);
187 static inline dst_reg
188 offset(dst_reg reg
, unsigned width
, unsigned delta
)
190 const unsigned stride
= (reg
.file
== UNIFORM
? 0 : 4);
191 const unsigned num_components
= MAX2(width
/ 4 * stride
, 4);
192 return byte_offset(reg
, num_components
* type_sz(reg
.type
) * delta
);
195 static inline dst_reg
196 horiz_offset(const dst_reg
®
, unsigned delta
)
198 if (is_uniform(src_reg(reg
)))
201 return byte_offset(reg
, delta
* type_sz(reg
.type
));
204 static inline dst_reg
205 writemask(dst_reg reg
, unsigned mask
)
207 assert(reg
.file
!= IMM
);
208 assert((reg
.writemask
& mask
) != 0);
209 reg
.writemask
&= mask
;
214 * Return an integer identifying the discrete address space a register is
215 * contained in. A register is by definition fully contained in the single
216 * reg_space it belongs to, so two registers with different reg_space ids are
217 * guaranteed not to overlap. Most register files are a single reg_space of
218 * its own, only the VGRF file is composed of multiple discrete address
219 * spaces, one for each VGRF allocation.
221 static inline uint32_t
222 reg_space(const backend_reg
&r
)
224 return r
.file
<< 16 | (r
.file
== VGRF
? r
.nr
: 0);
228 * Return the base offset in bytes of a register relative to the start of its
231 static inline unsigned
232 reg_offset(const backend_reg
&r
)
234 return (r
.file
== VGRF
|| r
.file
== IMM
? 0 : r
.nr
) *
235 (r
.file
== UNIFORM
? 16 : REG_SIZE
) + r
.offset
+
236 (r
.file
== ARF
|| r
.file
== FIXED_GRF
? r
.subnr
: 0);
240 * Return whether the register region starting at \p r and spanning \p dr
241 * bytes could potentially overlap the register region starting at \p s and
242 * spanning \p ds bytes.
245 regions_overlap(const backend_reg
&r
, unsigned dr
,
246 const backend_reg
&s
, unsigned ds
)
248 if (r
.file
== MRF
&& (r
.nr
& BRW_MRF_COMPR4
)) {
249 /* COMPR4 regions are translated by the hardware during decompression
250 * into two separate half-regions 4 MRFs apart from each other.
253 t0
.nr
&= ~BRW_MRF_COMPR4
;
255 t1
.offset
+= 4 * REG_SIZE
;
256 return regions_overlap(t0
, dr
/ 2, s
, ds
) ||
257 regions_overlap(t1
, dr
/ 2, s
, ds
);
259 } else if (s
.file
== MRF
&& (s
.nr
& BRW_MRF_COMPR4
)) {
260 return regions_overlap(s
, ds
, r
, dr
);
263 return reg_space(r
) == reg_space(s
) &&
264 !(reg_offset(r
) + dr
<= reg_offset(s
) ||
265 reg_offset(s
) + ds
<= reg_offset(r
));
269 class vec4_instruction
: public backend_instruction
{
271 DECLARE_RALLOC_CXX_OPERATORS(vec4_instruction
)
273 vec4_instruction(enum opcode opcode
,
274 const dst_reg
&dst
= dst_reg(),
275 const src_reg
&src0
= src_reg(),
276 const src_reg
&src1
= src_reg(),
277 const src_reg
&src2
= src_reg());
282 enum brw_urb_write_flags urb_write_flags
;
284 unsigned sol_binding
; /**< gen6: SOL binding table index */
285 bool sol_final_write
; /**< gen6: send commit message */
286 unsigned sol_vertex
; /**< gen6: used for setting dst index in SVB header */
288 bool is_send_from_grf() const;
289 unsigned size_read(unsigned arg
) const;
290 bool can_reswizzle(const struct gen_device_info
*devinfo
, int dst_writemask
,
291 int swizzle
, int swizzle_mask
);
292 void reswizzle(int dst_writemask
, int swizzle
);
293 bool can_do_source_mods(const struct gen_device_info
*devinfo
);
295 bool can_do_writemask(const struct gen_device_info
*devinfo
);
296 bool can_change_types() const;
297 bool has_source_and_destination_hazard() const;
298 unsigned implied_mrf_writes() const;
300 bool is_align1_partial_write()
302 return opcode
== VEC4_OPCODE_SET_LOW_32BIT
||
303 opcode
== VEC4_OPCODE_SET_HIGH_32BIT
;
306 bool reads_flag() const
308 return predicate
|| opcode
== VS_OPCODE_UNPACK_FLAGS_SIMD4X2
;
311 bool reads_flag(unsigned c
)
313 if (opcode
== VS_OPCODE_UNPACK_FLAGS_SIMD4X2
)
317 case BRW_PREDICATE_NONE
:
319 case BRW_PREDICATE_ALIGN16_REPLICATE_X
:
321 case BRW_PREDICATE_ALIGN16_REPLICATE_Y
:
323 case BRW_PREDICATE_ALIGN16_REPLICATE_Z
:
325 case BRW_PREDICATE_ALIGN16_REPLICATE_W
:
332 bool writes_flag() const
334 return (conditional_mod
&& (opcode
!= BRW_OPCODE_SEL
&&
335 opcode
!= BRW_OPCODE_CSEL
&&
336 opcode
!= BRW_OPCODE_IF
&&
337 opcode
!= BRW_OPCODE_WHILE
));
340 bool reads_g0_implicitly() const
343 case SHADER_OPCODE_TEX
:
344 case SHADER_OPCODE_TXL
:
345 case SHADER_OPCODE_TXD
:
346 case SHADER_OPCODE_TXF
:
347 case SHADER_OPCODE_TXF_CMS_W
:
348 case SHADER_OPCODE_TXF_CMS
:
349 case SHADER_OPCODE_TXF_MCS
:
350 case SHADER_OPCODE_TXS
:
351 case SHADER_OPCODE_TG4
:
352 case SHADER_OPCODE_TG4_OFFSET
:
353 case SHADER_OPCODE_SAMPLEINFO
:
354 case VS_OPCODE_PULL_CONSTANT_LOAD
:
355 case GS_OPCODE_SET_PRIMITIVE_ID
:
356 case GS_OPCODE_GET_INSTANCE_ID
:
357 case SHADER_OPCODE_GEN4_SCRATCH_READ
:
358 case SHADER_OPCODE_GEN4_SCRATCH_WRITE
:
367 * Make the execution of \p inst dependent on the evaluation of a possibly
368 * inverted predicate.
370 inline vec4_instruction
*
371 set_predicate_inv(enum brw_predicate pred
, bool inverse
,
372 vec4_instruction
*inst
)
374 inst
->predicate
= pred
;
375 inst
->predicate_inverse
= inverse
;
380 * Make the execution of \p inst dependent on the evaluation of a predicate.
382 inline vec4_instruction
*
383 set_predicate(enum brw_predicate pred
, vec4_instruction
*inst
)
385 return set_predicate_inv(pred
, false, inst
);
389 * Write the result of evaluating the condition given by \p mod to a flag
392 inline vec4_instruction
*
393 set_condmod(enum brw_conditional_mod mod
, vec4_instruction
*inst
)
395 inst
->conditional_mod
= mod
;
400 * Clamp the result of \p inst to the saturation range of its destination
403 inline vec4_instruction
*
404 set_saturate(bool saturate
, vec4_instruction
*inst
)
406 inst
->saturate
= saturate
;
411 * Return the number of dataflow registers written by the instruction (either
412 * fully or partially) counted from 'floor(reg_offset(inst->dst) /
413 * register_size)'. The somewhat arbitrary register size unit is 16B for the
414 * UNIFORM and IMM files and 32B for all other files.
417 regs_written(const vec4_instruction
*inst
)
419 assert(inst
->dst
.file
!= UNIFORM
&& inst
->dst
.file
!= IMM
);
420 return DIV_ROUND_UP(reg_offset(inst
->dst
) % REG_SIZE
+ inst
->size_written
,
425 * Return the number of dataflow registers read by the instruction (either
426 * fully or partially) counted from 'floor(reg_offset(inst->src[i]) /
427 * register_size)'. The somewhat arbitrary register size unit is 16B for the
428 * UNIFORM and IMM files and 32B for all other files.
431 regs_read(const vec4_instruction
*inst
, unsigned i
)
433 const unsigned reg_size
=
434 inst
->src
[i
].file
== UNIFORM
|| inst
->src
[i
].file
== IMM
? 16 : REG_SIZE
;
435 return DIV_ROUND_UP(reg_offset(inst
->src
[i
]) % reg_size
+ inst
->size_read(i
),
439 static inline enum brw_reg_type
440 get_exec_type(const vec4_instruction
*inst
)
442 enum brw_reg_type exec_type
= BRW_REGISTER_TYPE_B
;
444 for (int i
= 0; i
< 3; i
++) {
445 if (inst
->src
[i
].file
!= BAD_FILE
) {
446 const brw_reg_type t
= get_exec_type(brw_reg_type(inst
->src
[i
].type
));
447 if (type_sz(t
) > type_sz(exec_type
))
449 else if (type_sz(t
) == type_sz(exec_type
) &&
450 brw_reg_type_is_floating_point(t
))
455 if (exec_type
== BRW_REGISTER_TYPE_B
)
456 exec_type
= inst
->dst
.type
;
458 /* TODO: We need to handle half-float conversions. */
459 assert(exec_type
!= BRW_REGISTER_TYPE_HF
||
460 inst
->dst
.type
== BRW_REGISTER_TYPE_HF
);
461 assert(exec_type
!= BRW_REGISTER_TYPE_B
);
466 static inline unsigned
467 get_exec_type_size(const vec4_instruction
*inst
)
469 return type_sz(get_exec_type(inst
));
472 } /* namespace brw */