i965: Move the back-end compiler to src/intel/compiler
[mesa.git] / src / intel / compiler / brw_vec4_tes.cpp
1 /*
2 * Copyright © 2013 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file brw_vec4_tes.cpp
26 *
27 * Tessellaton evaluation shader specific code derived from the vec4_visitor class.
28 */
29
30 #include "brw_vec4_tes.h"
31 #include "brw_cfg.h"
32 #include "common/gen_debug.h"
33
34 namespace brw {
35
36 vec4_tes_visitor::vec4_tes_visitor(const struct brw_compiler *compiler,
37 void *log_data,
38 const struct brw_tes_prog_key *key,
39 struct brw_tes_prog_data *prog_data,
40 const nir_shader *shader,
41 void *mem_ctx,
42 int shader_time_index)
43 : vec4_visitor(compiler, log_data, &key->tex, &prog_data->base,
44 shader, mem_ctx, false, shader_time_index)
45 {
46 }
47
48
49 dst_reg *
50 vec4_tes_visitor::make_reg_for_system_value(int location)
51 {
52 return NULL;
53 }
54
55 void
56 vec4_tes_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
57 {
58 switch (instr->intrinsic) {
59 case nir_intrinsic_load_tess_level_outer:
60 case nir_intrinsic_load_tess_level_inner:
61 break;
62 default:
63 vec4_visitor::nir_setup_system_value_intrinsic(instr);
64 }
65 }
66
67
68 void
69 vec4_tes_visitor::setup_payload()
70 {
71 int reg = 0;
72
73 /* The payload always contains important data in r0 and r1, which contains
74 * the URB handles that are passed on to the URB write at the end
75 * of the thread.
76 */
77 reg += 2;
78
79 reg = setup_uniforms(reg);
80
81 foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
82 for (int i = 0; i < 3; i++) {
83 if (inst->src[i].file != ATTR)
84 continue;
85
86 bool is_64bit = type_sz(inst->src[i].type) == 8;
87
88 unsigned slot = inst->src[i].nr + inst->src[i].offset / 16;
89 struct brw_reg grf = brw_vec4_grf(reg + slot / 2, 4 * (slot % 2));
90 grf = stride(grf, 0, is_64bit ? 2 : 4, 1);
91 grf.swizzle = inst->src[i].swizzle;
92 grf.type = inst->src[i].type;
93 grf.abs = inst->src[i].abs;
94 grf.negate = inst->src[i].negate;
95
96 /* For 64-bit attributes we can end up with components XY in the
97 * second half of a register and components ZW in the first half
98 * of the next. Fix it up here.
99 */
100 if (is_64bit && grf.subnr > 0) {
101 /* We can't do swizzles that mix XY and ZW channels in this case.
102 * Such cases should have been handled by the scalarization pass.
103 */
104 assert((brw_mask_for_swizzle(grf.swizzle) & 0x3) ^
105 (brw_mask_for_swizzle(grf.swizzle) & 0xc));
106 if (brw_mask_for_swizzle(grf.swizzle) & 0xc) {
107 grf.subnr = 0;
108 grf.nr++;
109 grf.swizzle -= BRW_SWIZZLE_ZZZZ;
110 }
111 }
112
113 inst->src[i] = grf;
114 }
115 }
116
117 reg += 8 * prog_data->urb_read_length;
118
119 this->first_non_payload_grf = reg;
120 }
121
122
123 void
124 vec4_tes_visitor::emit_prolog()
125 {
126 input_read_header = src_reg(this, glsl_type::uvec4_type);
127 emit(TES_OPCODE_CREATE_INPUT_READ_HEADER, dst_reg(input_read_header));
128
129 this->current_annotation = NULL;
130 }
131
132
133 void
134 vec4_tes_visitor::emit_urb_write_header(int mrf)
135 {
136 /* No need to do anything for DS; an implied write to this MRF will be
137 * performed by VS_OPCODE_URB_WRITE.
138 */
139 (void) mrf;
140 }
141
142
143 vec4_instruction *
144 vec4_tes_visitor::emit_urb_write_opcode(bool complete)
145 {
146 /* For DS, the URB writes end the thread. */
147 if (complete) {
148 if (INTEL_DEBUG & DEBUG_SHADER_TIME)
149 emit_shader_time_end();
150 }
151
152 vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
153 inst->urb_write_flags = complete ?
154 BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS;
155
156 return inst;
157 }
158
159 void
160 vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
161 {
162 const struct brw_tes_prog_data *tes_prog_data =
163 (const struct brw_tes_prog_data *) prog_data;
164
165 switch (instr->intrinsic) {
166 case nir_intrinsic_load_tess_coord:
167 /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */
168 emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
169 src_reg(brw_vec8_grf(1, 0))));
170 break;
171 case nir_intrinsic_load_tess_level_outer:
172 if (tes_prog_data->domain == BRW_TESS_DOMAIN_ISOLINE) {
173 emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
174 swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
175 BRW_SWIZZLE_ZWZW)));
176 } else {
177 emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
178 swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
179 BRW_SWIZZLE_WZYX)));
180 }
181 break;
182 case nir_intrinsic_load_tess_level_inner:
183 if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) {
184 emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
185 swizzle(src_reg(ATTR, 0, glsl_type::vec4_type),
186 BRW_SWIZZLE_WZYX)));
187 } else {
188 emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
189 src_reg(ATTR, 1, glsl_type::float_type)));
190 }
191 break;
192 case nir_intrinsic_load_primitive_id:
193 emit(TES_OPCODE_GET_PRIMITIVE_ID,
194 get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD));
195 break;
196
197 case nir_intrinsic_load_input:
198 case nir_intrinsic_load_per_vertex_input: {
199 src_reg indirect_offset = get_indirect_offset(instr);
200 unsigned imm_offset = instr->const_index[0];
201 src_reg header = input_read_header;
202 bool is_64bit = nir_dest_bit_size(instr->dest) == 64;
203 unsigned first_component = nir_intrinsic_component(instr);
204 if (is_64bit)
205 first_component /= 2;
206
207 if (indirect_offset.file != BAD_FILE) {
208 header = src_reg(this, glsl_type::uvec4_type);
209 emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header),
210 input_read_header, indirect_offset);
211 } else {
212 /* Arbitrarily only push up to 24 vec4 slots worth of data,
213 * which is 12 registers (since each holds 2 vec4 slots).
214 */
215 const unsigned max_push_slots = 24;
216 if (imm_offset < max_push_slots) {
217 const glsl_type *src_glsl_type =
218 is_64bit ? glsl_type::dvec4_type : glsl_type::ivec4_type;
219 src_reg src = src_reg(ATTR, imm_offset, src_glsl_type);
220 src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
221
222 const brw_reg_type dst_reg_type =
223 is_64bit ? BRW_REGISTER_TYPE_DF : BRW_REGISTER_TYPE_D;
224 emit(MOV(get_nir_dest(instr->dest, dst_reg_type), src));
225
226 prog_data->urb_read_length =
227 MAX2(prog_data->urb_read_length,
228 DIV_ROUND_UP(imm_offset + (is_64bit ? 2 : 1), 2));
229 break;
230 }
231 }
232
233 if (!is_64bit) {
234 dst_reg temp(this, glsl_type::ivec4_type);
235 vec4_instruction *read =
236 emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
237 read->offset = imm_offset;
238 read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
239
240 src_reg src = src_reg(temp);
241 src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
242
243 /* Copy to target. We might end up with some funky writemasks landing
244 * in here, but we really don't want them in the above pseudo-ops.
245 */
246 dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
247 dst.writemask = brw_writemask_for_size(instr->num_components);
248 emit(MOV(dst, src));
249 } else {
250 /* For 64-bit we need to load twice as many 32-bit components, and for
251 * dvec3/4 we need to emit 2 URB Read messages
252 */
253 dst_reg temp(this, glsl_type::dvec4_type);
254 dst_reg temp_d = retype(temp, BRW_REGISTER_TYPE_D);
255
256 vec4_instruction *read =
257 emit(VEC4_OPCODE_URB_READ, temp_d, src_reg(header));
258 read->offset = imm_offset;
259 read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
260
261 if (instr->num_components > 2) {
262 read = emit(VEC4_OPCODE_URB_READ, byte_offset(temp_d, REG_SIZE),
263 src_reg(header));
264 read->offset = imm_offset + 1;
265 read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
266 }
267
268 src_reg temp_as_src = src_reg(temp);
269 temp_as_src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
270
271 dst_reg shuffled(this, glsl_type::dvec4_type);
272 shuffle_64bit_data(shuffled, temp_as_src, false);
273
274 dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_DF);
275 dst.writemask = brw_writemask_for_size(instr->num_components);
276 emit(MOV(dst, src_reg(shuffled)));
277 }
278 break;
279 }
280 default:
281 vec4_visitor::nir_emit_intrinsic(instr);
282 }
283 }
284
285
286 void
287 vec4_tes_visitor::emit_thread_end()
288 {
289 /* For DS, we always end the thread by emitting a single vertex.
290 * emit_urb_write_opcode() will take care of setting the eot flag on the
291 * SEND instruction.
292 */
293 emit_vertex();
294 }
295
296 } /* namespace brw */