20ab97142294e81bddfd431e77f9b4007506b8e7
[mesa.git] / src / gallium / drivers / r600 / sfn / sfn_nir_lower_tess_io.cpp
1 #include "sfn_nir.h"
2
3 bool r600_lower_tess_io_filter(const nir_instr *instr)
4 {
5 if (instr->type != nir_instr_type_intrinsic)
6 return false;
7
8 nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
9 switch (op->intrinsic) {
10 case nir_intrinsic_load_input:
11 case nir_intrinsic_store_output:
12 case nir_intrinsic_load_output:
13 case nir_intrinsic_load_per_vertex_input:
14 case nir_intrinsic_load_per_vertex_output:
15 case nir_intrinsic_store_per_vertex_output:
16 case nir_intrinsic_load_patch_vertices_in:
17 case nir_intrinsic_load_tess_level_outer:
18 case nir_intrinsic_load_tess_level_inner:
19 return true;
20 default:
21 ;
22 }
23 return false;
24 }
25
26 static nir_ssa_def *
27 emit_load_param_base(nir_builder *b, nir_intrinsic_op op)
28 {
29 nir_intrinsic_instr *result = nir_intrinsic_instr_create(b->shader, op);
30 nir_ssa_dest_init(&result->instr, &result->dest,
31 4, 32, NULL);
32 nir_builder_instr_insert(b, &result->instr);
33 return &result->dest.ssa;
34 }
35
36 static int get_tcs_varying_offset(exec_list *io, unsigned index)
37 {
38 nir_foreach_variable(var, io){
39 if (var->data.driver_location == index) {
40 switch (var->data.location) {
41 case VARYING_SLOT_POS:
42 return 0;
43 case VARYING_SLOT_PSIZ:
44 return 0x10;
45 case VARYING_SLOT_CLIP_DIST0:
46 return 0x20;
47 case VARYING_SLOT_CLIP_DIST1:
48 return 0x30;
49 case VARYING_SLOT_TESS_LEVEL_OUTER:
50 return 0;
51 case VARYING_SLOT_TESS_LEVEL_INNER:
52 return 0x10;
53 default:
54 if (var->data.location >= VARYING_SLOT_VAR0 &&
55 var->data.location <= VARYING_SLOT_VAR31)
56 return 0x10 * (var->data.location - VARYING_SLOT_VAR0) + 0x40;
57
58 if (var->data.location >= VARYING_SLOT_PATCH0) {
59 return 0x10 * (var->data.location - VARYING_SLOT_PATCH0) + 0x20;
60 }
61 }
62 /* TODO: PATCH is missing */
63 }
64 }
65 return 0;
66 }
67
68 static inline nir_ssa_def *
69 r600_umad_24(nir_builder *b, nir_ssa_def *op1, nir_ssa_def *op2, nir_ssa_def *op3)
70 {
71 return nir_build_alu(b, nir_op_umad24, op1, op2, op3, NULL);
72 }
73
74 static inline nir_ssa_def *
75 r600_tcs_base_address(nir_builder *b, nir_ssa_def *param_base, nir_ssa_def *rel_patch_id)
76 {
77 return r600_umad_24(b, nir_channel(b, param_base, 0),
78 rel_patch_id,
79 nir_channel(b, param_base, 3));
80 }
81
82
83 static nir_ssa_def *
84 emil_lsd_in_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_intrinsic_instr *op)
85 {
86 nir_ssa_def *addr = nir_build_alu(b, nir_op_umul24,
87 nir_channel(b, base, 0),
88 patch_id, NULL, NULL);
89
90 auto idx1 = nir_src_as_const_value(op->src[0]);
91 if (!idx1 || idx1->u32 != 0)
92 addr = r600_umad_24(b, nir_channel(b, base, 1),
93 op->src[0].ssa, addr);
94
95 auto offset = nir_imm_int(b, get_tcs_varying_offset(&b->shader->inputs, nir_intrinsic_base(op)));
96
97 auto idx2 = nir_src_as_const_value(op->src[1]);
98 if (!idx2 || idx2->u32 != 0)
99 offset = nir_iadd(b, offset, nir_ishl(b, op->src[1].ssa, nir_imm_int(b, 4)));
100
101 return nir_iadd(b, addr, offset);
102 }
103
104 static nir_ssa_def *
105 emil_lsd_out_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_intrinsic_instr *op, exec_list *io, int src_offset)
106 {
107
108 nir_ssa_def *addr1 = r600_umad_24(b, nir_channel(b, base, 0),
109 patch_id,
110 nir_channel(b, base, 2));
111 nir_ssa_def *addr2 = r600_umad_24(b, nir_channel(b, base, 1),
112 op->src[src_offset].ssa, addr1);
113
114 int offset = get_tcs_varying_offset(io, nir_intrinsic_base(op));
115 return nir_iadd(b, nir_iadd(b, addr2,
116 nir_ishl(b, op->src[src_offset + 1].ssa, nir_imm_int(b,4))),
117 nir_imm_int(b, offset));
118 }
119
120 static nir_ssa_def *load_offset_group(nir_builder *b, int ncomponents)
121 {
122 switch (ncomponents) {
123 /* tess outer offsets */
124 case 1: return nir_imm_int(b, 0);
125 case 2: return nir_imm_ivec2(b, 0, 4);
126 case 3: return r600_imm_ivec3(b, 0, 4, 8);
127 case 4: return nir_imm_ivec4(b, 0, 4, 8, 12);
128 /* tess inner offsets */
129 case 5: return nir_imm_int(b, 16);
130 case 6: return nir_imm_ivec2(b, 16, 20);
131 default:
132 debug_printf("Got %d components\n", ncomponents);
133 unreachable("Unsupported component count");
134 }
135 }
136
137 static void replace_load_instr(nir_builder *b, nir_intrinsic_instr *op, nir_ssa_def *addr)
138 {
139 nir_intrinsic_instr *load_tcs_in = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600);
140 load_tcs_in->num_components = op->num_components;
141 nir_ssa_dest_init(&load_tcs_in->instr, &load_tcs_in->dest,
142 load_tcs_in->num_components, 32, NULL);
143
144 nir_ssa_def *addr_outer = nir_iadd(b, addr, load_offset_group(b, load_tcs_in->num_components));
145 load_tcs_in->src[0] = nir_src_for_ssa(addr_outer);
146 nir_intrinsic_set_component(load_tcs_in, nir_intrinsic_component(op));
147 nir_builder_instr_insert(b, &load_tcs_in->instr);
148 nir_ssa_def_rewrite_uses(&op->dest.ssa, nir_src_for_ssa(&load_tcs_in->dest.ssa));
149 nir_instr_remove(&op->instr);
150
151 }
152
153 static nir_ssa_def *
154 r600_load_rel_patch_id(nir_builder *b)
155 {
156 auto patch_id = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_tcs_rel_patch_id_r600);
157 nir_ssa_dest_init(&patch_id->instr, &patch_id->dest,
158 1, 32, NULL);
159 nir_builder_instr_insert(b, &patch_id->instr);
160 return &patch_id->dest.ssa;
161 }
162
163 static void
164 emit_store_lds(nir_builder *b, nir_intrinsic_instr *op, nir_ssa_def *addr)
165 {
166 for (int i = 0; i < 2; ++i) {
167 unsigned test_mask = (0x3 << 2 * i);
168 if (!(nir_intrinsic_write_mask(op) & test_mask))
169 continue;
170
171 auto store_tcs_out = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_local_shared_r600);
172 unsigned writemask = nir_intrinsic_write_mask(op) & test_mask;
173 nir_intrinsic_set_write_mask(store_tcs_out, writemask);
174 store_tcs_out->src[0] = nir_src_for_ssa(op->src[0].ssa);
175 store_tcs_out->num_components = store_tcs_out->src[0].ssa->num_components;
176 bool start_even = (writemask & (1u << (2 * i)));
177
178 auto addr2 = nir_iadd(b, addr, nir_imm_int(b, 8 * i + (start_even ? 0 : 4)));
179 store_tcs_out->src[1] = nir_src_for_ssa(addr2);
180
181 nir_builder_instr_insert(b, &store_tcs_out->instr);
182 }
183 }
184
185 static nir_ssa_def *
186 emil_tcs_io_offset(nir_builder *b, nir_ssa_def *addr, nir_intrinsic_instr *op, exec_list *io, int src_offset)
187 {
188
189 int offset = get_tcs_varying_offset(io, nir_intrinsic_base(op));
190 return nir_iadd(b, nir_iadd(b, addr,
191 nir_ishl(b, op->src[src_offset].ssa, nir_imm_int(b,4))),
192 nir_imm_int(b, offset));
193 }
194
195
196 inline unsigned
197 outer_tf_components(pipe_prim_type prim_type)
198 {
199 switch (prim_type) {
200 case PIPE_PRIM_LINES: return 2;
201 case PIPE_PRIM_TRIANGLES: return 3;
202 case PIPE_PRIM_QUADS: return 4;
203 default:
204 return 0;
205 }
206 }
207
208
209
210 static bool
211 r600_lower_tess_io_impl(nir_builder *b, nir_instr *instr, enum pipe_prim_type prim_type)
212 {
213 static nir_ssa_def *load_in_param_base = nullptr;
214 static nir_ssa_def *load_out_param_base = nullptr;
215
216 b->cursor = nir_before_instr(instr);
217 nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
218
219 if (b->shader->info.stage == MESA_SHADER_TESS_CTRL) {
220 load_in_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_in_param_base_r600);
221 load_out_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600);
222 } else if (b->shader->info.stage == MESA_SHADER_TESS_EVAL) {
223 load_in_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600);
224 } else if (b->shader->info.stage == MESA_SHADER_VERTEX) {
225 load_out_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_in_param_base_r600);
226 }
227
228 auto rel_patch_id = r600_load_rel_patch_id(b);
229
230 unsigned tf_inner_address_offset = 0;
231 unsigned ncomps_correct = 0;
232
233 switch (op->intrinsic) {
234 case nir_intrinsic_load_patch_vertices_in: {
235 auto vertices_in = nir_channel(b, load_in_param_base, 2);
236 nir_ssa_def_rewrite_uses(&op->dest.ssa, nir_src_for_ssa(vertices_in));
237 nir_instr_remove(&op->instr);
238 return true;
239 }
240 case nir_intrinsic_load_per_vertex_input: {
241 nir_ssa_def *addr =
242 b->shader->info.stage == MESA_SHADER_TESS_CTRL ?
243 emil_lsd_in_addr(b, load_in_param_base, rel_patch_id, op) :
244 emil_lsd_out_addr(b, load_in_param_base, rel_patch_id, op, &b->shader->inputs, 0);
245 replace_load_instr(b, op, addr);
246 return true;
247 }
248 case nir_intrinsic_store_per_vertex_output: {
249 nir_ssa_def *addr = emil_lsd_out_addr(b, load_out_param_base, rel_patch_id, op, &b->shader->outputs, 1);
250 emit_store_lds(b, op, addr);
251 nir_instr_remove(instr);
252 return true;
253 }
254 case nir_intrinsic_load_per_vertex_output: {
255 nir_ssa_def *addr = emil_lsd_out_addr(b, load_out_param_base, rel_patch_id, op, &b->shader->outputs, 0);
256 replace_load_instr(b, op, addr);
257 return true;
258 }
259 case nir_intrinsic_store_output: {
260 nir_ssa_def *addr = (b->shader->info.stage == MESA_SHADER_TESS_CTRL) ?
261 r600_tcs_base_address(b, load_out_param_base, rel_patch_id):
262 nir_build_alu(b, nir_op_umul24,
263 nir_channel(b, load_out_param_base, 1),
264 rel_patch_id, NULL, NULL);
265 addr = emil_tcs_io_offset(b, addr, op, &b->shader->outputs, 1);
266 emit_store_lds(b, op, addr);
267 nir_instr_remove(instr);
268 return true;
269 }
270 case nir_intrinsic_load_output: {
271 nir_ssa_def *addr = r600_tcs_base_address(b, load_out_param_base, rel_patch_id);
272 addr = emil_tcs_io_offset(b, addr, op, &b->shader->outputs, 0);
273 replace_load_instr(b, op, addr);
274 return true;
275 }
276 case nir_intrinsic_load_input: {
277 nir_ssa_def *addr = r600_tcs_base_address(b, load_in_param_base, rel_patch_id);
278 addr = emil_tcs_io_offset(b, addr, op, &b->shader->inputs, 0);
279 replace_load_instr(b, op, addr);
280 return true;
281 }
282 case nir_intrinsic_load_tess_level_inner:
283 tf_inner_address_offset = 4;
284 ncomps_correct = 2;
285 /* fallthrough */
286 case nir_intrinsic_load_tess_level_outer: {
287 auto ncomps = outer_tf_components(prim_type);
288 if (!ncomps)
289 return false;
290 ncomps -= ncomps_correct;
291 auto base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600);
292 auto rel_patch_id = r600_load_rel_patch_id(b);
293 nir_ssa_def *addr0 = r600_tcs_base_address(b, base, rel_patch_id);
294 nir_ssa_def *addr_outer = nir_iadd(b, addr0, load_offset_group(b, tf_inner_address_offset + ncomps));
295
296 auto tf = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600);
297 tf->num_components = ncomps;
298 tf->src[0] = nir_src_for_ssa(addr_outer);
299 nir_ssa_dest_init(&tf->instr, &tf->dest,
300 tf->num_components, 32, NULL);
301 nir_intrinsic_set_component(tf, 0);
302 nir_builder_instr_insert(b, &tf->instr);
303
304 nir_ssa_def_rewrite_uses(&op->dest.ssa, nir_src_for_ssa(&tf->dest.ssa));
305 nir_instr_remove(instr);
306 return true;
307 }
308 default:
309 ;
310 }
311
312 return false;
313 }
314
315 bool r600_lower_tess_io(nir_shader *shader, enum pipe_prim_type prim_type)
316 {
317 bool progress = false;
318 nir_foreach_function(function, shader) {
319 if (function->impl) {
320 nir_builder b;
321 nir_builder_init(&b, function->impl);
322
323 nir_foreach_block(block, function->impl) {
324 nir_foreach_instr_safe(instr, block) {
325 if (instr->type != nir_instr_type_intrinsic)
326 continue;
327
328 if (r600_lower_tess_io_filter(instr))
329 progress |= r600_lower_tess_io_impl(&b, instr, prim_type);
330 }
331 }
332 }
333 }
334 return progress;
335 }
336
337 bool r600_emit_tf(nir_builder *b, nir_ssa_def *val)
338 {
339 nir_intrinsic_instr *store_tf = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_tf_r600);
340 store_tf->num_components = val->num_components;
341 store_tf->src[0] = nir_src_for_ssa(val);
342 nir_builder_instr_insert(b, &store_tf->instr);
343 return true;
344 }
345
346 bool r600_append_tcs_TF_emission(nir_shader *shader, enum pipe_prim_type prim_type) {
347 if (shader->info.stage != MESA_SHADER_TESS_CTRL)
348 return false;
349
350 nir_foreach_function(function, shader) {
351 nir_foreach_block(block, function->impl) {
352 nir_foreach_instr_safe(instr, block) {
353 if (instr->type != nir_instr_type_intrinsic)
354 continue;
355 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
356 if (intr->intrinsic == nir_intrinsic_store_tf_r600) {
357 return false;
358 }
359 }
360 }
361 }
362 nir_builder builder;
363 nir_builder *b = &builder;
364
365 assert(exec_list_length(&shader->functions) == 1);
366 nir_function *f = (nir_function *)shader->functions.get_head();
367 nir_builder_init(b, f->impl);
368
369 auto outer_comps = outer_tf_components(prim_type);
370 if (!outer_comps)
371 return false;
372
373 unsigned inner_comps = outer_comps - 2;
374 unsigned stride = (inner_comps + outer_comps) * 4;
375
376 b->cursor = nir_after_cf_list(&f->impl->body);
377
378 auto invocation_id = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_invocation_id);
379 nir_ssa_dest_init(&invocation_id->instr, &invocation_id->dest,
380 1, 32, NULL);
381 nir_builder_instr_insert(b, &invocation_id->instr);
382
383 nir_push_if(b, nir_ieq(b, &invocation_id->dest.ssa, nir_imm_int(b, 0)));
384 auto base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600);
385 auto rel_patch_id = r600_load_rel_patch_id(b);
386
387 nir_ssa_def *addr0 = r600_tcs_base_address(b, base, rel_patch_id);
388
389 nir_ssa_def *addr_outer = nir_iadd(b, addr0, load_offset_group(b, outer_comps));
390 auto tf_outer = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600);
391 tf_outer->num_components = outer_comps;
392 tf_outer->src[0] = nir_src_for_ssa(addr_outer);
393 nir_ssa_dest_init(&tf_outer->instr, &tf_outer->dest,
394 tf_outer->num_components, 32, NULL);
395 nir_intrinsic_set_component(tf_outer, 15);
396 nir_builder_instr_insert(b, &tf_outer->instr);
397
398 std::vector<nir_ssa_def *> tf_out;
399
400
401 auto tf_out_base = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_tcs_tess_factor_base_r600);
402 nir_ssa_dest_init(&tf_out_base->instr, &tf_out_base->dest,
403 1, 32, NULL);
404 nir_builder_instr_insert(b, &tf_out_base->instr);
405
406 auto out_addr0 = nir_build_alu(b, nir_op_umad24,
407 rel_patch_id,
408 nir_imm_int(b, stride),
409 &tf_out_base->dest.ssa,
410 NULL);
411 int chanx = 0;
412 int chany = 1;
413
414 if (prim_type == PIPE_PRIM_LINES)
415 std::swap(chanx, chany);
416
417
418 auto v0 = nir_vec4(b, out_addr0, nir_channel(b, &tf_outer->dest.ssa, chanx),
419 nir_iadd(b, out_addr0, nir_imm_int(b, 4)),
420 nir_channel(b, &tf_outer->dest.ssa, chany));
421
422 tf_out.push_back(v0);
423 if (outer_comps > 2) {
424 auto v1 = (outer_comps > 3) ? nir_vec4(b, nir_iadd(b, out_addr0, nir_imm_int(b, 8)),
425 nir_channel(b, &tf_outer->dest.ssa, 2),
426 nir_iadd(b, out_addr0, nir_imm_int(b, 12)),
427 nir_channel(b, &tf_outer->dest.ssa, 3)) :
428 nir_vec2(b, nir_iadd(b, out_addr0, nir_imm_int(b, 8)),
429 nir_channel(b, &tf_outer->dest.ssa, 2));
430 tf_out.push_back(v1);
431 }
432
433 if (inner_comps) {
434 nir_ssa_def *addr1 = nir_iadd(b, addr0, load_offset_group(b, 4 + inner_comps));
435 auto tf_inner = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600);
436 tf_inner->num_components = inner_comps;
437 tf_inner->src[0] = nir_src_for_ssa(addr1);
438 nir_ssa_dest_init(&tf_inner->instr, &tf_inner->dest,
439 tf_inner->num_components, 32, NULL);
440 nir_intrinsic_set_component(tf_inner, 3);
441 nir_builder_instr_insert(b, &tf_inner->instr);
442
443 auto v2 = (inner_comps > 1) ? nir_vec4(b, nir_iadd(b, out_addr0, nir_imm_int(b, 16)),
444 nir_channel(b, &tf_inner->dest.ssa, 0),
445 nir_iadd(b, out_addr0, nir_imm_int(b, 20)),
446 nir_channel(b, &tf_inner->dest.ssa, 1)):
447 nir_vec2(b, nir_iadd(b, out_addr0, nir_imm_int(b, 12)),
448 nir_channel(b, &tf_inner->dest.ssa, 0));
449 tf_out.push_back(v2);
450 }
451
452 for (auto tf: tf_out)
453 r600_emit_tf(b, tf);
454
455 nir_pop_if(b, nullptr);
456
457 nir_metadata_preserve(f->impl, nir_metadata_none);
458
459 return true;
460 }