r600/sfn/lower_tess_io: Rework get_tcs_varying_offset
[mesa.git] / src / gallium / drivers / r600 / sfn / sfn_nir_lower_tess_io.cpp
1 #include "sfn_nir.h"
2
3 bool r600_lower_tess_io_filter(const nir_instr *instr)
4 {
5 if (instr->type != nir_instr_type_intrinsic)
6 return false;
7
8 nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
9 switch (op->intrinsic) {
10 case nir_intrinsic_load_input:
11 case nir_intrinsic_store_output:
12 case nir_intrinsic_load_output:
13 case nir_intrinsic_load_per_vertex_input:
14 case nir_intrinsic_load_per_vertex_output:
15 case nir_intrinsic_store_per_vertex_output:
16 case nir_intrinsic_load_patch_vertices_in:
17 case nir_intrinsic_load_tess_level_outer:
18 case nir_intrinsic_load_tess_level_inner:
19 return true;
20 default:
21 ;
22 }
23 return false;
24 }
25
26 static nir_ssa_def *
27 emit_load_param_base(nir_builder *b, nir_intrinsic_op op)
28 {
29 nir_intrinsic_instr *result = nir_intrinsic_instr_create(b->shader, op);
30 nir_ssa_dest_init(&result->instr, &result->dest,
31 4, 32, NULL);
32 nir_builder_instr_insert(b, &result->instr);
33 return &result->dest.ssa;
34 }
35
36 static int get_tcs_varying_offset(nir_shader *nir, nir_variable_mode mode,
37 unsigned index)
38 {
39 nir_foreach_variable_with_modes(var, nir, mode) {
40 if (var->data.driver_location == index) {
41 switch (var->data.location) {
42 case VARYING_SLOT_POS:
43 return 0;
44 case VARYING_SLOT_PSIZ:
45 return 0x10;
46 case VARYING_SLOT_CLIP_DIST0:
47 return 0x20;
48 case VARYING_SLOT_CLIP_DIST1:
49 return 0x30;
50 case VARYING_SLOT_TESS_LEVEL_OUTER:
51 return 0;
52 case VARYING_SLOT_TESS_LEVEL_INNER:
53 return 0x10;
54 default:
55 if (var->data.location >= VARYING_SLOT_VAR0 &&
56 var->data.location <= VARYING_SLOT_VAR31)
57 return 0x10 * (var->data.location - VARYING_SLOT_VAR0) + 0x40;
58
59 if (var->data.location >= VARYING_SLOT_PATCH0) {
60 return 0x10 * (var->data.location - VARYING_SLOT_PATCH0) + 0x20;
61 }
62 }
63 /* TODO: PATCH is missing */
64 }
65 }
66 return 0;
67 }
68
69 static inline nir_ssa_def *
70 r600_umad_24(nir_builder *b, nir_ssa_def *op1, nir_ssa_def *op2, nir_ssa_def *op3)
71 {
72 return nir_build_alu(b, nir_op_umad24, op1, op2, op3, NULL);
73 }
74
75 static inline nir_ssa_def *
76 r600_tcs_base_address(nir_builder *b, nir_ssa_def *param_base, nir_ssa_def *rel_patch_id)
77 {
78 return r600_umad_24(b, nir_channel(b, param_base, 0),
79 rel_patch_id,
80 nir_channel(b, param_base, 3));
81 }
82
83
84 static nir_ssa_def *
85 emil_lsd_in_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_intrinsic_instr *op)
86 {
87 nir_ssa_def *addr = nir_build_alu(b, nir_op_umul24,
88 nir_channel(b, base, 0),
89 patch_id, NULL, NULL);
90
91 auto idx1 = nir_src_as_const_value(op->src[0]);
92 if (!idx1 || idx1->u32 != 0)
93 addr = r600_umad_24(b, nir_channel(b, base, 1),
94 op->src[0].ssa, addr);
95
96 auto offset = nir_imm_int(b, get_tcs_varying_offset(b->shader, nir_var_shader_in, nir_intrinsic_base(op)));
97
98 auto idx2 = nir_src_as_const_value(op->src[1]);
99 if (!idx2 || idx2->u32 != 0)
100 offset = nir_iadd(b, offset, nir_ishl(b, op->src[1].ssa, nir_imm_int(b, 4)));
101
102 return nir_iadd(b, addr, offset);
103 }
104
105 static nir_ssa_def *
106 emil_lsd_out_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_intrinsic_instr *op, nir_variable_mode mode, int src_offset)
107 {
108
109 nir_ssa_def *addr1 = r600_umad_24(b, nir_channel(b, base, 0),
110 patch_id,
111 nir_channel(b, base, 2));
112 nir_ssa_def *addr2 = r600_umad_24(b, nir_channel(b, base, 1),
113 op->src[src_offset].ssa, addr1);
114
115 int offset = get_tcs_varying_offset(b->shader, mode, nir_intrinsic_base(op));
116 return nir_iadd(b, nir_iadd(b, addr2,
117 nir_ishl(b, op->src[src_offset + 1].ssa, nir_imm_int(b,4))),
118 nir_imm_int(b, offset));
119 }
120
121 static nir_ssa_def *load_offset_group(nir_builder *b, int ncomponents)
122 {
123 switch (ncomponents) {
124 /* tess outer offsets */
125 case 1: return nir_imm_int(b, 0);
126 case 2: return nir_imm_ivec2(b, 0, 4);
127 case 3: return r600_imm_ivec3(b, 0, 4, 8);
128 case 4: return nir_imm_ivec4(b, 0, 4, 8, 12);
129 /* tess inner offsets */
130 case 5: return nir_imm_int(b, 16);
131 case 6: return nir_imm_ivec2(b, 16, 20);
132 default:
133 debug_printf("Got %d components\n", ncomponents);
134 unreachable("Unsupported component count");
135 }
136 }
137
138 static void replace_load_instr(nir_builder *b, nir_intrinsic_instr *op, nir_ssa_def *addr)
139 {
140 nir_intrinsic_instr *load_tcs_in = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600);
141 load_tcs_in->num_components = op->num_components;
142 nir_ssa_dest_init(&load_tcs_in->instr, &load_tcs_in->dest,
143 load_tcs_in->num_components, 32, NULL);
144
145 nir_ssa_def *addr_outer = nir_iadd(b, addr, load_offset_group(b, load_tcs_in->num_components));
146 load_tcs_in->src[0] = nir_src_for_ssa(addr_outer);
147 nir_intrinsic_set_component(load_tcs_in, nir_intrinsic_component(op));
148 nir_builder_instr_insert(b, &load_tcs_in->instr);
149 nir_ssa_def_rewrite_uses(&op->dest.ssa, nir_src_for_ssa(&load_tcs_in->dest.ssa));
150 nir_instr_remove(&op->instr);
151
152 }
153
154 static nir_ssa_def *
155 r600_load_rel_patch_id(nir_builder *b)
156 {
157 auto patch_id = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_tcs_rel_patch_id_r600);
158 nir_ssa_dest_init(&patch_id->instr, &patch_id->dest,
159 1, 32, NULL);
160 nir_builder_instr_insert(b, &patch_id->instr);
161 return &patch_id->dest.ssa;
162 }
163
164 static void
165 emit_store_lds(nir_builder *b, nir_intrinsic_instr *op, nir_ssa_def *addr)
166 {
167 for (int i = 0; i < 2; ++i) {
168 unsigned test_mask = (0x3 << 2 * i);
169 if (!(nir_intrinsic_write_mask(op) & test_mask))
170 continue;
171
172 auto store_tcs_out = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_local_shared_r600);
173 unsigned writemask = nir_intrinsic_write_mask(op) & test_mask;
174 nir_intrinsic_set_write_mask(store_tcs_out, writemask);
175 store_tcs_out->src[0] = nir_src_for_ssa(op->src[0].ssa);
176 store_tcs_out->num_components = store_tcs_out->src[0].ssa->num_components;
177 bool start_even = (writemask & (1u << (2 * i)));
178
179 auto addr2 = nir_iadd(b, addr, nir_imm_int(b, 8 * i + (start_even ? 0 : 4)));
180 store_tcs_out->src[1] = nir_src_for_ssa(addr2);
181
182 nir_builder_instr_insert(b, &store_tcs_out->instr);
183 }
184 }
185
186 static nir_ssa_def *
187 emil_tcs_io_offset(nir_builder *b, nir_ssa_def *addr, nir_intrinsic_instr *op, nir_variable_mode mode, int src_offset)
188 {
189
190 int offset = get_tcs_varying_offset(b->shader, mode, nir_intrinsic_base(op));
191 return nir_iadd(b, nir_iadd(b, addr,
192 nir_ishl(b, op->src[src_offset].ssa, nir_imm_int(b,4))),
193 nir_imm_int(b, offset));
194 }
195
196
197 inline unsigned
198 outer_tf_components(pipe_prim_type prim_type)
199 {
200 switch (prim_type) {
201 case PIPE_PRIM_LINES: return 2;
202 case PIPE_PRIM_TRIANGLES: return 3;
203 case PIPE_PRIM_QUADS: return 4;
204 default:
205 return 0;
206 }
207 }
208
209
210
211 static bool
212 r600_lower_tess_io_impl(nir_builder *b, nir_instr *instr, enum pipe_prim_type prim_type)
213 {
214 static nir_ssa_def *load_in_param_base = nullptr;
215 static nir_ssa_def *load_out_param_base = nullptr;
216
217 b->cursor = nir_before_instr(instr);
218 nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
219
220 if (b->shader->info.stage == MESA_SHADER_TESS_CTRL) {
221 load_in_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_in_param_base_r600);
222 load_out_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600);
223 } else if (b->shader->info.stage == MESA_SHADER_TESS_EVAL) {
224 load_in_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600);
225 } else if (b->shader->info.stage == MESA_SHADER_VERTEX) {
226 load_out_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_in_param_base_r600);
227 }
228
229 auto rel_patch_id = r600_load_rel_patch_id(b);
230
231 unsigned tf_inner_address_offset = 0;
232 unsigned ncomps_correct = 0;
233
234 switch (op->intrinsic) {
235 case nir_intrinsic_load_patch_vertices_in: {
236 auto vertices_in = nir_channel(b, load_in_param_base, 2);
237 nir_ssa_def_rewrite_uses(&op->dest.ssa, nir_src_for_ssa(vertices_in));
238 nir_instr_remove(&op->instr);
239 return true;
240 }
241 case nir_intrinsic_load_per_vertex_input: {
242 nir_ssa_def *addr =
243 b->shader->info.stage == MESA_SHADER_TESS_CTRL ?
244 emil_lsd_in_addr(b, load_in_param_base, rel_patch_id, op) :
245 emil_lsd_out_addr(b, load_in_param_base, rel_patch_id, op, nir_var_shader_in, 0);
246 replace_load_instr(b, op, addr);
247 return true;
248 }
249 case nir_intrinsic_store_per_vertex_output: {
250 nir_ssa_def *addr = emil_lsd_out_addr(b, load_out_param_base, rel_patch_id, op, nir_var_shader_out, 1);
251 emit_store_lds(b, op, addr);
252 nir_instr_remove(instr);
253 return true;
254 }
255 case nir_intrinsic_load_per_vertex_output: {
256 nir_ssa_def *addr = emil_lsd_out_addr(b, load_out_param_base, rel_patch_id, op, nir_var_shader_out, 0);
257 replace_load_instr(b, op, addr);
258 return true;
259 }
260 case nir_intrinsic_store_output: {
261 nir_ssa_def *addr = (b->shader->info.stage == MESA_SHADER_TESS_CTRL) ?
262 r600_tcs_base_address(b, load_out_param_base, rel_patch_id):
263 nir_build_alu(b, nir_op_umul24,
264 nir_channel(b, load_out_param_base, 1),
265 rel_patch_id, NULL, NULL);
266 addr = emil_tcs_io_offset(b, addr, op, nir_var_shader_out, 1);
267 emit_store_lds(b, op, addr);
268 nir_instr_remove(instr);
269 return true;
270 }
271 case nir_intrinsic_load_output: {
272 nir_ssa_def *addr = r600_tcs_base_address(b, load_out_param_base, rel_patch_id);
273 addr = emil_tcs_io_offset(b, addr, op, nir_var_shader_out, 0);
274 replace_load_instr(b, op, addr);
275 return true;
276 }
277 case nir_intrinsic_load_input: {
278 nir_ssa_def *addr = r600_tcs_base_address(b, load_in_param_base, rel_patch_id);
279 addr = emil_tcs_io_offset(b, addr, op, nir_var_shader_in, 0);
280 replace_load_instr(b, op, addr);
281 return true;
282 }
283 case nir_intrinsic_load_tess_level_inner:
284 tf_inner_address_offset = 4;
285 ncomps_correct = 2;
286 /* fallthrough */
287 case nir_intrinsic_load_tess_level_outer: {
288 auto ncomps = outer_tf_components(prim_type);
289 if (!ncomps)
290 return false;
291 ncomps -= ncomps_correct;
292 auto base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600);
293 auto rel_patch_id = r600_load_rel_patch_id(b);
294 nir_ssa_def *addr0 = r600_tcs_base_address(b, base, rel_patch_id);
295 nir_ssa_def *addr_outer = nir_iadd(b, addr0, load_offset_group(b, tf_inner_address_offset + ncomps));
296
297 auto tf = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600);
298 tf->num_components = ncomps;
299 tf->src[0] = nir_src_for_ssa(addr_outer);
300 nir_ssa_dest_init(&tf->instr, &tf->dest,
301 tf->num_components, 32, NULL);
302 nir_intrinsic_set_component(tf, 0);
303 nir_builder_instr_insert(b, &tf->instr);
304
305 nir_ssa_def_rewrite_uses(&op->dest.ssa, nir_src_for_ssa(&tf->dest.ssa));
306 nir_instr_remove(instr);
307 return true;
308 }
309 default:
310 ;
311 }
312
313 return false;
314 }
315
316 bool r600_lower_tess_io(nir_shader *shader, enum pipe_prim_type prim_type)
317 {
318 bool progress = false;
319 nir_foreach_function(function, shader) {
320 if (function->impl) {
321 nir_builder b;
322 nir_builder_init(&b, function->impl);
323
324 nir_foreach_block(block, function->impl) {
325 nir_foreach_instr_safe(instr, block) {
326 if (instr->type != nir_instr_type_intrinsic)
327 continue;
328
329 if (r600_lower_tess_io_filter(instr))
330 progress |= r600_lower_tess_io_impl(&b, instr, prim_type);
331 }
332 }
333 }
334 }
335 return progress;
336 }
337
338 bool r600_emit_tf(nir_builder *b, nir_ssa_def *val)
339 {
340 nir_intrinsic_instr *store_tf = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_tf_r600);
341 store_tf->num_components = val->num_components;
342 store_tf->src[0] = nir_src_for_ssa(val);
343 nir_builder_instr_insert(b, &store_tf->instr);
344 return true;
345 }
346
347 bool r600_append_tcs_TF_emission(nir_shader *shader, enum pipe_prim_type prim_type) {
348 if (shader->info.stage != MESA_SHADER_TESS_CTRL)
349 return false;
350
351 nir_foreach_function(function, shader) {
352 nir_foreach_block(block, function->impl) {
353 nir_foreach_instr_safe(instr, block) {
354 if (instr->type != nir_instr_type_intrinsic)
355 continue;
356 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
357 if (intr->intrinsic == nir_intrinsic_store_tf_r600) {
358 return false;
359 }
360 }
361 }
362 }
363 nir_builder builder;
364 nir_builder *b = &builder;
365
366 assert(exec_list_length(&shader->functions) == 1);
367 nir_function *f = (nir_function *)shader->functions.get_head();
368 nir_builder_init(b, f->impl);
369
370 auto outer_comps = outer_tf_components(prim_type);
371 if (!outer_comps)
372 return false;
373
374 unsigned inner_comps = outer_comps - 2;
375 unsigned stride = (inner_comps + outer_comps) * 4;
376
377 b->cursor = nir_after_cf_list(&f->impl->body);
378
379 auto invocation_id = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_invocation_id);
380 nir_ssa_dest_init(&invocation_id->instr, &invocation_id->dest,
381 1, 32, NULL);
382 nir_builder_instr_insert(b, &invocation_id->instr);
383
384 nir_push_if(b, nir_ieq(b, &invocation_id->dest.ssa, nir_imm_int(b, 0)));
385 auto base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600);
386 auto rel_patch_id = r600_load_rel_patch_id(b);
387
388 nir_ssa_def *addr0 = r600_tcs_base_address(b, base, rel_patch_id);
389
390 nir_ssa_def *addr_outer = nir_iadd(b, addr0, load_offset_group(b, outer_comps));
391 auto tf_outer = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600);
392 tf_outer->num_components = outer_comps;
393 tf_outer->src[0] = nir_src_for_ssa(addr_outer);
394 nir_ssa_dest_init(&tf_outer->instr, &tf_outer->dest,
395 tf_outer->num_components, 32, NULL);
396 nir_intrinsic_set_component(tf_outer, 15);
397 nir_builder_instr_insert(b, &tf_outer->instr);
398
399 std::vector<nir_ssa_def *> tf_out;
400
401
402 auto tf_out_base = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_tcs_tess_factor_base_r600);
403 nir_ssa_dest_init(&tf_out_base->instr, &tf_out_base->dest,
404 1, 32, NULL);
405 nir_builder_instr_insert(b, &tf_out_base->instr);
406
407 auto out_addr0 = nir_build_alu(b, nir_op_umad24,
408 rel_patch_id,
409 nir_imm_int(b, stride),
410 &tf_out_base->dest.ssa,
411 NULL);
412 int chanx = 0;
413 int chany = 1;
414
415 if (prim_type == PIPE_PRIM_LINES)
416 std::swap(chanx, chany);
417
418
419 auto v0 = nir_vec4(b, out_addr0, nir_channel(b, &tf_outer->dest.ssa, chanx),
420 nir_iadd(b, out_addr0, nir_imm_int(b, 4)),
421 nir_channel(b, &tf_outer->dest.ssa, chany));
422
423 tf_out.push_back(v0);
424 if (outer_comps > 2) {
425 auto v1 = (outer_comps > 3) ? nir_vec4(b, nir_iadd(b, out_addr0, nir_imm_int(b, 8)),
426 nir_channel(b, &tf_outer->dest.ssa, 2),
427 nir_iadd(b, out_addr0, nir_imm_int(b, 12)),
428 nir_channel(b, &tf_outer->dest.ssa, 3)) :
429 nir_vec2(b, nir_iadd(b, out_addr0, nir_imm_int(b, 8)),
430 nir_channel(b, &tf_outer->dest.ssa, 2));
431 tf_out.push_back(v1);
432 }
433
434 if (inner_comps) {
435 nir_ssa_def *addr1 = nir_iadd(b, addr0, load_offset_group(b, 4 + inner_comps));
436 auto tf_inner = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600);
437 tf_inner->num_components = inner_comps;
438 tf_inner->src[0] = nir_src_for_ssa(addr1);
439 nir_ssa_dest_init(&tf_inner->instr, &tf_inner->dest,
440 tf_inner->num_components, 32, NULL);
441 nir_intrinsic_set_component(tf_inner, 3);
442 nir_builder_instr_insert(b, &tf_inner->instr);
443
444 auto v2 = (inner_comps > 1) ? nir_vec4(b, nir_iadd(b, out_addr0, nir_imm_int(b, 16)),
445 nir_channel(b, &tf_inner->dest.ssa, 0),
446 nir_iadd(b, out_addr0, nir_imm_int(b, 20)),
447 nir_channel(b, &tf_inner->dest.ssa, 1)):
448 nir_vec2(b, nir_iadd(b, out_addr0, nir_imm_int(b, 12)),
449 nir_channel(b, &tf_inner->dest.ssa, 0));
450 tf_out.push_back(v2);
451 }
452
453 for (auto tf: tf_out)
454 r600_emit_tf(b, tf);
455
456 nir_pop_if(b, nullptr);
457
458 nir_metadata_preserve(f->impl, nir_metadata_none);
459
460 return true;
461 }