2 * Copyright © 2013 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 * \file brw_vec4_tes.cpp
27 * Tessellaton evaluation shader specific code derived from the vec4_visitor class.
30 #include "brw_vec4_tes.h"
32 #include "dev/gen_debug.h"
36 vec4_tes_visitor::vec4_tes_visitor(const struct brw_compiler
*compiler
,
38 const struct brw_tes_prog_key
*key
,
39 struct brw_tes_prog_data
*prog_data
,
40 const nir_shader
*shader
,
42 int shader_time_index
)
43 : vec4_visitor(compiler
, log_data
, &key
->tex
, &prog_data
->base
,
44 shader
, mem_ctx
, false, shader_time_index
)
49 vec4_tes_visitor::setup_payload()
53 /* The payload always contains important data in r0 and r1, which contains
54 * the URB handles that are passed on to the URB write at the end
59 reg
= setup_uniforms(reg
);
61 foreach_block_and_inst(block
, vec4_instruction
, inst
, cfg
) {
62 for (int i
= 0; i
< 3; i
++) {
63 if (inst
->src
[i
].file
!= ATTR
)
66 bool is_64bit
= type_sz(inst
->src
[i
].type
) == 8;
68 unsigned slot
= inst
->src
[i
].nr
+ inst
->src
[i
].offset
/ 16;
69 struct brw_reg grf
= brw_vec4_grf(reg
+ slot
/ 2, 4 * (slot
% 2));
70 grf
= stride(grf
, 0, is_64bit
? 2 : 4, 1);
71 grf
.swizzle
= inst
->src
[i
].swizzle
;
72 grf
.type
= inst
->src
[i
].type
;
73 grf
.abs
= inst
->src
[i
].abs
;
74 grf
.negate
= inst
->src
[i
].negate
;
76 /* For 64-bit attributes we can end up with components XY in the
77 * second half of a register and components ZW in the first half
78 * of the next. Fix it up here.
80 if (is_64bit
&& grf
.subnr
> 0) {
81 /* We can't do swizzles that mix XY and ZW channels in this case.
82 * Such cases should have been handled by the scalarization pass.
84 assert((brw_mask_for_swizzle(grf
.swizzle
) & 0x3) ^
85 (brw_mask_for_swizzle(grf
.swizzle
) & 0xc));
86 if (brw_mask_for_swizzle(grf
.swizzle
) & 0xc) {
89 grf
.swizzle
-= BRW_SWIZZLE_ZZZZ
;
97 reg
+= 8 * prog_data
->urb_read_length
;
99 this->first_non_payload_grf
= reg
;
104 vec4_tes_visitor::emit_prolog()
106 input_read_header
= src_reg(this, glsl_type::uvec4_type
);
107 emit(TES_OPCODE_CREATE_INPUT_READ_HEADER
, dst_reg(input_read_header
));
109 this->current_annotation
= NULL
;
114 vec4_tes_visitor::emit_urb_write_header(int mrf
)
116 /* No need to do anything for DS; an implied write to this MRF will be
117 * performed by VS_OPCODE_URB_WRITE.
124 vec4_tes_visitor::emit_urb_write_opcode(bool complete
)
126 /* For DS, the URB writes end the thread. */
128 if (INTEL_DEBUG
& DEBUG_SHADER_TIME
)
129 emit_shader_time_end();
132 vec4_instruction
*inst
= emit(VS_OPCODE_URB_WRITE
);
133 inst
->urb_write_flags
= complete
?
134 BRW_URB_WRITE_EOT_COMPLETE
: BRW_URB_WRITE_NO_FLAGS
;
140 vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr
*instr
)
142 const struct brw_tes_prog_data
*tes_prog_data
=
143 (const struct brw_tes_prog_data
*) prog_data
;
145 switch (instr
->intrinsic
) {
146 case nir_intrinsic_load_tess_coord
:
147 /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */
148 emit(MOV(get_nir_dest(instr
->dest
, BRW_REGISTER_TYPE_F
),
149 src_reg(brw_vec8_grf(1, 0))));
151 case nir_intrinsic_load_tess_level_outer
:
152 if (tes_prog_data
->domain
== BRW_TESS_DOMAIN_ISOLINE
) {
153 emit(MOV(get_nir_dest(instr
->dest
, BRW_REGISTER_TYPE_F
),
154 swizzle(src_reg(ATTR
, 1, glsl_type::vec4_type
),
157 emit(MOV(get_nir_dest(instr
->dest
, BRW_REGISTER_TYPE_F
),
158 swizzle(src_reg(ATTR
, 1, glsl_type::vec4_type
),
162 case nir_intrinsic_load_tess_level_inner
:
163 if (tes_prog_data
->domain
== BRW_TESS_DOMAIN_QUAD
) {
164 emit(MOV(get_nir_dest(instr
->dest
, BRW_REGISTER_TYPE_F
),
165 swizzle(src_reg(ATTR
, 0, glsl_type::vec4_type
),
168 emit(MOV(get_nir_dest(instr
->dest
, BRW_REGISTER_TYPE_F
),
169 src_reg(ATTR
, 1, glsl_type::float_type
)));
172 case nir_intrinsic_load_primitive_id
:
173 emit(TES_OPCODE_GET_PRIMITIVE_ID
,
174 get_nir_dest(instr
->dest
, BRW_REGISTER_TYPE_UD
));
177 case nir_intrinsic_load_input
:
178 case nir_intrinsic_load_per_vertex_input
: {
179 src_reg indirect_offset
= get_indirect_offset(instr
);
180 unsigned imm_offset
= instr
->const_index
[0];
181 src_reg header
= input_read_header
;
182 bool is_64bit
= nir_dest_bit_size(instr
->dest
) == 64;
183 unsigned first_component
= nir_intrinsic_component(instr
);
185 first_component
/= 2;
187 if (indirect_offset
.file
!= BAD_FILE
) {
188 src_reg clamped_indirect_offset
= src_reg(this, glsl_type::uvec4_type
);
190 /* Page 190 of "Volume 7: 3D Media GPGPU Engine (Haswell)" says the
191 * valid range of the offset is [0, 0FFFFFFFh].
193 emit_minmax(BRW_CONDITIONAL_L
,
194 dst_reg(clamped_indirect_offset
),
195 retype(indirect_offset
, BRW_REGISTER_TYPE_UD
),
196 brw_imm_ud(0x0fffffffu
));
198 header
= src_reg(this, glsl_type::uvec4_type
);
199 emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET
, dst_reg(header
),
200 input_read_header
, clamped_indirect_offset
);
202 /* Arbitrarily only push up to 24 vec4 slots worth of data,
203 * which is 12 registers (since each holds 2 vec4 slots).
205 const unsigned max_push_slots
= 24;
206 if (imm_offset
< max_push_slots
) {
207 const glsl_type
*src_glsl_type
=
208 is_64bit
? glsl_type::dvec4_type
: glsl_type::ivec4_type
;
209 src_reg src
= src_reg(ATTR
, imm_offset
, src_glsl_type
);
210 src
.swizzle
= BRW_SWZ_COMP_INPUT(first_component
);
212 const brw_reg_type dst_reg_type
=
213 is_64bit
? BRW_REGISTER_TYPE_DF
: BRW_REGISTER_TYPE_D
;
214 emit(MOV(get_nir_dest(instr
->dest
, dst_reg_type
), src
));
216 prog_data
->urb_read_length
=
217 MAX2(prog_data
->urb_read_length
,
218 DIV_ROUND_UP(imm_offset
+ (is_64bit
? 2 : 1), 2));
224 dst_reg
temp(this, glsl_type::ivec4_type
);
225 vec4_instruction
*read
=
226 emit(VEC4_OPCODE_URB_READ
, temp
, src_reg(header
));
227 read
->offset
= imm_offset
;
228 read
->urb_write_flags
= BRW_URB_WRITE_PER_SLOT_OFFSET
;
230 src_reg src
= src_reg(temp
);
231 src
.swizzle
= BRW_SWZ_COMP_INPUT(first_component
);
233 /* Copy to target. We might end up with some funky writemasks landing
234 * in here, but we really don't want them in the above pseudo-ops.
236 dst_reg dst
= get_nir_dest(instr
->dest
, BRW_REGISTER_TYPE_D
);
237 dst
.writemask
= brw_writemask_for_size(instr
->num_components
);
240 /* For 64-bit we need to load twice as many 32-bit components, and for
241 * dvec3/4 we need to emit 2 URB Read messages
243 dst_reg
temp(this, glsl_type::dvec4_type
);
244 dst_reg temp_d
= retype(temp
, BRW_REGISTER_TYPE_D
);
246 vec4_instruction
*read
=
247 emit(VEC4_OPCODE_URB_READ
, temp_d
, src_reg(header
));
248 read
->offset
= imm_offset
;
249 read
->urb_write_flags
= BRW_URB_WRITE_PER_SLOT_OFFSET
;
251 if (instr
->num_components
> 2) {
252 read
= emit(VEC4_OPCODE_URB_READ
, byte_offset(temp_d
, REG_SIZE
),
254 read
->offset
= imm_offset
+ 1;
255 read
->urb_write_flags
= BRW_URB_WRITE_PER_SLOT_OFFSET
;
258 src_reg temp_as_src
= src_reg(temp
);
259 temp_as_src
.swizzle
= BRW_SWZ_COMP_INPUT(first_component
);
261 dst_reg
shuffled(this, glsl_type::dvec4_type
);
262 shuffle_64bit_data(shuffled
, temp_as_src
, false);
264 dst_reg dst
= get_nir_dest(instr
->dest
, BRW_REGISTER_TYPE_DF
);
265 dst
.writemask
= brw_writemask_for_size(instr
->num_components
);
266 emit(MOV(dst
, src_reg(shuffled
)));
271 vec4_visitor::nir_emit_intrinsic(instr
);
277 vec4_tes_visitor::emit_thread_end()
279 /* For DS, we always end the thread by emitting a single vertex.
280 * emit_urb_write_opcode() will take care of setting the eot flag on the
286 } /* namespace brw */