r600/sfn: Add tesselation shaders
[mesa.git] / src / gallium / drivers / r600 / sfn / sfn_nir_lower_tess_io.cpp
1 #include "sfn_nir.h"
2
3 bool r600_lower_tess_io_filter(const nir_instr *instr)
4 {
5 if (instr->type != nir_instr_type_intrinsic)
6 return false;
7
8 nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
9 switch (op->intrinsic) {
10 case nir_intrinsic_load_input:
11 case nir_intrinsic_store_output:
12 case nir_intrinsic_load_output:
13 case nir_intrinsic_load_per_vertex_input:
14 case nir_intrinsic_load_per_vertex_output:
15 case nir_intrinsic_store_per_vertex_output:
16 case nir_intrinsic_load_patch_vertices_in:
17 case nir_intrinsic_load_tess_level_outer:
18 case nir_intrinsic_load_tess_level_inner:
19 return true;
20 default:
21 ;
22 }
23 return false;
24 }
25
26 static nir_ssa_def *
27 emit_load_param_base(nir_builder *b, nir_intrinsic_op op)
28 {
29 nir_intrinsic_instr *result = nir_intrinsic_instr_create(b->shader, op);
30 result->num_components = 4;
31 nir_ssa_dest_init(&result->instr, &result->dest,
32 result->num_components, 32, NULL);
33 nir_builder_instr_insert(b, &result->instr);
34 return &result->dest.ssa;
35 }
36
37 static int get_tcs_varying_offset(exec_list *io, unsigned index)
38 {
39 nir_foreach_variable(var, io){
40 if (var->data.driver_location == index) {
41 switch (var->data.location) {
42 case VARYING_SLOT_POS:
43 return 0;
44 case VARYING_SLOT_PSIZ:
45 return 0x10;
46 case VARYING_SLOT_CLIP_DIST0:
47 return 0x20;
48 case VARYING_SLOT_CLIP_DIST1:
49 return 0x30;
50 case VARYING_SLOT_TESS_LEVEL_OUTER:
51 return 0;
52 case VARYING_SLOT_TESS_LEVEL_INNER:
53 return 0x10;
54 default:
55 if (var->data.location >= VARYING_SLOT_VAR0 &&
56 var->data.location <= VARYING_SLOT_VAR31)
57 return 0x10 * (var->data.location - VARYING_SLOT_VAR0) + 0x40;
58
59 if (var->data.location >= VARYING_SLOT_PATCH0) {
60 return 0x10 * (var->data.location - VARYING_SLOT_PATCH0) + 0x20;
61 }
62 }
63 /* TODO: PATCH is missing */
64 }
65 }
66 return 0;
67 }
68
69 static inline nir_ssa_def *
70 r600_umad_24(nir_builder *b, nir_ssa_def *op1, nir_ssa_def *op2, nir_ssa_def *op3)
71 {
72 return nir_build_alu(b, nir_op_umad24, op1, op2, op3, NULL);
73 }
74
75 static inline nir_ssa_def *
76 r600_tcs_base_address(nir_builder *b, nir_ssa_def *param_base, nir_ssa_def *rel_patch_id)
77 {
78 return r600_umad_24(b, nir_channel(b, param_base, 0),
79 rel_patch_id,
80 nir_channel(b, param_base, 3));
81 }
82
83
84 static nir_ssa_def *
85 emil_lsd_in_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_intrinsic_instr *op)
86 {
87 nir_ssa_def *addr = nir_build_alu(b, nir_op_umul24,
88 nir_channel(b, base, 0),
89 patch_id, NULL, NULL);
90
91 auto idx1 = nir_src_as_const_value(op->src[0]);
92 if (!idx1 || idx1->u32 != 0)
93 addr = r600_umad_24(b, nir_channel(b, base, 1),
94 op->src[0].ssa, addr);
95
96 auto offset = nir_imm_int(b, get_tcs_varying_offset(&b->shader->inputs, nir_intrinsic_base(op)));
97
98 auto idx2 = nir_src_as_const_value(op->src[1]);
99 if (!idx2 || idx2->u32 != 0)
100 offset = nir_iadd(b, offset, nir_ishl(b, op->src[1].ssa, nir_imm_int(b, 4)));
101
102 return nir_iadd(b, addr, offset);
103 }
104
105 static nir_ssa_def *
106 emil_lsd_out_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_intrinsic_instr *op, exec_list *io, int src_offset)
107 {
108
109 nir_ssa_def *addr1 = r600_umad_24(b, nir_channel(b, base, 0),
110 patch_id,
111 nir_channel(b, base, 2));
112 nir_ssa_def *addr2 = r600_umad_24(b, nir_channel(b, base, 1),
113 op->src[src_offset].ssa, addr1);
114
115 int offset = get_tcs_varying_offset(io, nir_intrinsic_base(op));
116 return nir_iadd(b, nir_iadd(b, addr2,
117 nir_ishl(b, op->src[src_offset + 1].ssa, nir_imm_int(b,4))),
118 nir_imm_int(b, offset));
119 }
120
121 static nir_ssa_def *load_offset_group(nir_builder *b, int ncomponents)
122 {
123 switch (ncomponents) {
124 /* tess outer offsets */
125 case 1: return nir_imm_int(b, 0);
126 case 2: return nir_imm_ivec2(b, 0, 4);
127 case 3: return r600_imm_ivec3(b, 0, 4, 8);
128 case 4: return nir_imm_ivec4(b, 0, 4, 8, 12);
129 /* tess inner offsets */
130 case 5: return nir_imm_int(b, 16);
131 case 6: return nir_imm_ivec2(b, 16, 20);
132 default:
133 debug_printf("Got %d components\n", ncomponents);
134 unreachable("Unsupported component count");
135 }
136 }
137
138 static void replace_load_instr(nir_builder *b, nir_intrinsic_instr *op, nir_ssa_def *addr)
139 {
140 nir_intrinsic_instr *load_tcs_in = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600);
141 load_tcs_in->num_components = op->num_components;
142 nir_ssa_dest_init(&load_tcs_in->instr, &load_tcs_in->dest,
143 load_tcs_in->num_components, 32, NULL);
144
145 nir_ssa_def *addr_outer = nir_iadd(b, addr, load_offset_group(b, load_tcs_in->num_components));
146 load_tcs_in->src[0] = nir_src_for_ssa(addr_outer);
147 nir_intrinsic_set_component(load_tcs_in, nir_intrinsic_component(op));
148 nir_builder_instr_insert(b, &load_tcs_in->instr);
149 nir_ssa_def_rewrite_uses(&op->dest.ssa, nir_src_for_ssa(&load_tcs_in->dest.ssa));
150 nir_instr_remove(&op->instr);
151
152 }
153
154 static nir_ssa_def *
155 r600_load_rel_patch_id(nir_builder *b)
156 {
157 auto patch_id = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_tcs_rel_patch_id_r600);
158 patch_id->num_components = 1;
159 nir_ssa_dest_init(&patch_id->instr, &patch_id->dest,
160 patch_id->num_components, 32, NULL);
161 nir_builder_instr_insert(b, &patch_id->instr);
162 return &patch_id->dest.ssa;
163 }
164
165 static void
166 emit_store_lds(nir_builder *b, nir_intrinsic_instr *op, nir_ssa_def *addr)
167 {
168 for (int i = 0; i < 2; ++i) {
169 unsigned test_mask = (0x3 << 2 * i);
170 if (!(nir_intrinsic_write_mask(op) & test_mask))
171 continue;
172
173 auto store_tcs_out = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_local_shared_r600);
174 unsigned writemask = nir_intrinsic_write_mask(op) & test_mask;
175 nir_intrinsic_set_write_mask(store_tcs_out, writemask);
176 store_tcs_out->src[0] = nir_src_for_ssa(op->src[0].ssa);
177 store_tcs_out->num_components = store_tcs_out->src[0].ssa->num_components;
178 bool start_even = (writemask & (1u << (2 * i)));
179
180 auto addr2 = nir_iadd(b, addr, nir_imm_int(b, 8 * i + (start_even ? 0 : 4)));
181 store_tcs_out->src[1] = nir_src_for_ssa(addr2);
182
183 nir_builder_instr_insert(b, &store_tcs_out->instr);
184 }
185 }
186
187 static nir_ssa_def *
188 emil_tcs_io_offset(nir_builder *b, nir_ssa_def *addr, nir_intrinsic_instr *op, exec_list *io, int src_offset)
189 {
190
191 int offset = get_tcs_varying_offset(io, nir_intrinsic_base(op));
192 return nir_iadd(b, nir_iadd(b, addr,
193 nir_ishl(b, op->src[src_offset].ssa, nir_imm_int(b,4))),
194 nir_imm_int(b, offset));
195 }
196
197
198 inline unsigned
199 outer_tf_components(pipe_prim_type prim_type)
200 {
201 switch (prim_type) {
202 case PIPE_PRIM_LINES: return 2;
203 case PIPE_PRIM_TRIANGLES: return 3;
204 case PIPE_PRIM_QUADS: return 4;
205 default:
206 return 0;
207 }
208 }
209
210
211
212 static bool
213 r600_lower_tess_io_impl(nir_builder *b, nir_instr *instr, enum pipe_prim_type prim_type)
214 {
215 static nir_ssa_def *load_in_param_base = nullptr;
216 static nir_ssa_def *load_out_param_base = nullptr;
217
218 b->cursor = nir_before_instr(instr);
219 nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
220
221 if (b->shader->info.stage == MESA_SHADER_TESS_CTRL) {
222 load_in_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_in_param_base_r600);
223 load_out_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600);
224 } else if (b->shader->info.stage == MESA_SHADER_TESS_EVAL) {
225 load_in_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600);
226 } else if (b->shader->info.stage == MESA_SHADER_VERTEX) {
227 load_out_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_in_param_base_r600);
228 }
229
230 auto rel_patch_id = r600_load_rel_patch_id(b);
231
232 unsigned tf_inner_address_offset = 0;
233 unsigned ncomps_correct = 0;
234
235 switch (op->intrinsic) {
236 case nir_intrinsic_load_patch_vertices_in: {
237 auto vertices_in = nir_channel(b, load_in_param_base, 2);
238 nir_ssa_def_rewrite_uses(&op->dest.ssa, nir_src_for_ssa(vertices_in));
239 nir_instr_remove(&op->instr);
240 return true;
241 }
242 case nir_intrinsic_load_per_vertex_input: {
243 nir_ssa_def *addr =
244 b->shader->info.stage == MESA_SHADER_TESS_CTRL ?
245 emil_lsd_in_addr(b, load_in_param_base, rel_patch_id, op) :
246 emil_lsd_out_addr(b, load_in_param_base, rel_patch_id, op, &b->shader->inputs, 0);
247 replace_load_instr(b, op, addr);
248 return true;
249 }
250 case nir_intrinsic_store_per_vertex_output: {
251 nir_ssa_def *addr = emil_lsd_out_addr(b, load_out_param_base, rel_patch_id, op, &b->shader->outputs, 1);
252 emit_store_lds(b, op, addr);
253 nir_instr_remove(instr);
254 return true;
255 }
256 case nir_intrinsic_load_per_vertex_output: {
257 nir_ssa_def *addr = emil_lsd_out_addr(b, load_out_param_base, rel_patch_id, op, &b->shader->outputs, 0);
258 replace_load_instr(b, op, addr);
259 return true;
260 }
261 case nir_intrinsic_store_output: {
262 nir_ssa_def *addr = (b->shader->info.stage == MESA_SHADER_TESS_CTRL) ?
263 r600_tcs_base_address(b, load_out_param_base, rel_patch_id):
264 nir_build_alu(b, nir_op_umul24,
265 nir_channel(b, load_out_param_base, 1),
266 rel_patch_id, NULL, NULL);
267 addr = emil_tcs_io_offset(b, addr, op, &b->shader->outputs, 1);
268 emit_store_lds(b, op, addr);
269 nir_instr_remove(instr);
270 return true;
271 }
272 case nir_intrinsic_load_output: {
273 nir_ssa_def *addr = r600_tcs_base_address(b, load_out_param_base, rel_patch_id);
274 addr = emil_tcs_io_offset(b, addr, op, &b->shader->outputs, 0);
275 replace_load_instr(b, op, addr);
276 return true;
277 }
278 case nir_intrinsic_load_input: {
279 nir_ssa_def *addr = r600_tcs_base_address(b, load_in_param_base, rel_patch_id);
280 addr = emil_tcs_io_offset(b, addr, op, &b->shader->inputs, 0);
281 replace_load_instr(b, op, addr);
282 return true;
283 }
284 case nir_intrinsic_load_tess_level_inner:
285 tf_inner_address_offset = 4;
286 ncomps_correct = 2;
287 /* fallthrough */
288 case nir_intrinsic_load_tess_level_outer: {
289 auto ncomps = outer_tf_components(prim_type);
290 if (!ncomps)
291 return false;
292 ncomps -= ncomps_correct;
293 auto base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600);
294 auto rel_patch_id = r600_load_rel_patch_id(b);
295 nir_ssa_def *addr0 = r600_tcs_base_address(b, base, rel_patch_id);
296 nir_ssa_def *addr_outer = nir_iadd(b, addr0, load_offset_group(b, tf_inner_address_offset + ncomps));
297
298 auto tf = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600);
299 tf->num_components = ncomps;
300 tf->src[0] = nir_src_for_ssa(addr_outer);
301 nir_ssa_dest_init(&tf->instr, &tf->dest,
302 tf->num_components, 32, NULL);
303 nir_intrinsic_set_component(tf, 0);
304 nir_builder_instr_insert(b, &tf->instr);
305
306 nir_ssa_def_rewrite_uses(&op->dest.ssa, nir_src_for_ssa(&tf->dest.ssa));
307 nir_instr_remove(instr);
308 return true;
309 }
310 default:
311 ;
312 }
313
314 return false;
315 }
316
317 bool r600_lower_tess_io(nir_shader *shader, enum pipe_prim_type prim_type)
318 {
319 bool progress = false;
320 nir_foreach_function(function, shader) {
321 if (function->impl) {
322 nir_builder b;
323 nir_builder_init(&b, function->impl);
324
325 nir_foreach_block(block, function->impl) {
326 nir_foreach_instr_safe(instr, block) {
327 if (instr->type != nir_instr_type_intrinsic)
328 continue;
329
330 if (r600_lower_tess_io_filter(instr))
331 progress |= r600_lower_tess_io_impl(&b, instr, prim_type);
332 }
333 }
334 }
335 }
336 return progress;
337 }
338
339 bool r600_emit_tf(nir_builder *b, nir_ssa_def *val)
340 {
341 nir_intrinsic_instr *store_tf = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_tf_r600);
342 store_tf->num_components = val->num_components;
343 store_tf->src[0] = nir_src_for_ssa(val);
344 nir_builder_instr_insert(b, &store_tf->instr);
345 return true;
346 }
347
348 bool r600_append_tcs_TF_emission(nir_shader *shader, enum pipe_prim_type prim_type) {
349 if (shader->info.stage != MESA_SHADER_TESS_CTRL)
350 return false;
351
352 nir_foreach_function(function, shader) {
353 nir_foreach_block(block, function->impl) {
354 nir_foreach_instr_safe(instr, block) {
355 if (instr->type != nir_instr_type_intrinsic)
356 continue;
357 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
358 if (intr->intrinsic == nir_intrinsic_store_tf_r600) {
359 return false;
360 }
361 }
362 }
363 }
364 nir_builder builder;
365 nir_builder *b = &builder;
366
367 assert(exec_list_length(&shader->functions) == 1);
368 nir_function *f = (nir_function *)shader->functions.get_head();
369 nir_builder_init(b, f->impl);
370
371 auto outer_comps = outer_tf_components(prim_type);
372 if (!outer_comps)
373 return false;
374
375 unsigned inner_comps = outer_comps - 2;
376 unsigned stride = (inner_comps + outer_comps) * 4;
377
378 b->cursor = nir_after_cf_list(&f->impl->body);
379
380 auto invocation_id = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_invocation_id);
381 invocation_id->num_components = 1;
382 nir_ssa_dest_init(&invocation_id->instr, &invocation_id->dest,
383 invocation_id->num_components, 32, NULL);
384 nir_builder_instr_insert(b, &invocation_id->instr);
385
386 nir_push_if(b, nir_ieq(b, &invocation_id->dest.ssa, nir_imm_int(b, 0)));
387 auto base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600);
388 auto rel_patch_id = r600_load_rel_patch_id(b);
389
390 nir_ssa_def *addr0 = r600_tcs_base_address(b, base, rel_patch_id);
391
392 nir_ssa_def *addr_outer = nir_iadd(b, addr0, load_offset_group(b, outer_comps));
393 auto tf_outer = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600);
394 tf_outer->num_components = outer_comps;
395 tf_outer->src[0] = nir_src_for_ssa(addr_outer);
396 nir_ssa_dest_init(&tf_outer->instr, &tf_outer->dest,
397 tf_outer->num_components, 32, NULL);
398 nir_intrinsic_set_component(tf_outer, 15);
399 nir_builder_instr_insert(b, &tf_outer->instr);
400
401 std::vector<nir_ssa_def *> tf_out;
402
403
404 auto tf_out_base = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_tcs_tess_factor_base_r600);
405 tf_out_base->num_components = 1;
406 nir_ssa_dest_init(&tf_out_base->instr, &tf_out_base->dest,
407 tf_out_base->num_components, 32, NULL);
408 nir_builder_instr_insert(b, &tf_out_base->instr);
409
410 auto out_addr0 = nir_build_alu(b, nir_op_umad24,
411 rel_patch_id,
412 nir_imm_int(b, stride),
413 &tf_out_base->dest.ssa,
414 NULL);
415 int chanx = 0;
416 int chany = 1;
417
418 if (prim_type == PIPE_PRIM_LINES)
419 std::swap(chanx, chany);
420
421
422 auto v0 = nir_vec4(b, out_addr0, nir_channel(b, &tf_outer->dest.ssa, chanx),
423 nir_iadd(b, out_addr0, nir_imm_int(b, 4)),
424 nir_channel(b, &tf_outer->dest.ssa, chany));
425
426 tf_out.push_back(v0);
427 if (outer_comps > 2) {
428 auto v1 = (outer_comps > 3) ? nir_vec4(b, nir_iadd(b, out_addr0, nir_imm_int(b, 8)),
429 nir_channel(b, &tf_outer->dest.ssa, 2),
430 nir_iadd(b, out_addr0, nir_imm_int(b, 12)),
431 nir_channel(b, &tf_outer->dest.ssa, 3)) :
432 nir_vec2(b, nir_iadd(b, out_addr0, nir_imm_int(b, 8)),
433 nir_channel(b, &tf_outer->dest.ssa, 2));
434 tf_out.push_back(v1);
435 }
436
437 if (inner_comps) {
438 nir_ssa_def *addr1 = nir_iadd(b, addr0, load_offset_group(b, 4 + inner_comps));
439 auto tf_inner = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600);
440 tf_inner->num_components = inner_comps;
441 tf_inner->src[0] = nir_src_for_ssa(addr1);
442 nir_ssa_dest_init(&tf_inner->instr, &tf_inner->dest,
443 tf_inner->num_components, 32, NULL);
444 nir_intrinsic_set_component(tf_inner, 3);
445 nir_builder_instr_insert(b, &tf_inner->instr);
446
447 auto v2 = (inner_comps > 1) ? nir_vec4(b, nir_iadd(b, out_addr0, nir_imm_int(b, 16)),
448 nir_channel(b, &tf_inner->dest.ssa, 0),
449 nir_iadd(b, out_addr0, nir_imm_int(b, 20)),
450 nir_channel(b, &tf_inner->dest.ssa, 1)):
451 nir_vec2(b, nir_iadd(b, out_addr0, nir_imm_int(b, 12)),
452 nir_channel(b, &tf_inner->dest.ssa, 0));
453 tf_out.push_back(v2);
454 }
455
456 for (auto tf: tf_out)
457 r600_emit_tf(b, tf);
458
459 nir_pop_if(b, nullptr);
460
461 nir_metadata_preserve(f->impl, nir_metadata_none);
462
463 return true;
464 }