radv: Gather info for deref instr based load/store.
[mesa.git] / src / amd / vulkan / radv_shader_info.c
1 /*
2 * Copyright © 2017 Red Hat
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23 #include "radv_private.h"
24 #include "radv_shader.h"
25 #include "nir/nir.h"
26 #include "nir/nir_deref.h"
27
28 static void mark_sampler_desc(const nir_variable *var,
29 struct radv_shader_info *info)
30 {
31 info->desc_set_used_mask |= (1 << var->data.descriptor_set);
32 }
33
34 static void mark_ls_output(struct radv_shader_info *info,
35 uint32_t param, int num_slots)
36 {
37 uint64_t mask = (1ull << num_slots) - 1ull;
38 info->vs.ls_outputs_written |= (mask << param);
39 }
40
41 static void mark_tess_output(struct radv_shader_info *info,
42 bool is_patch, uint32_t param, int num_slots)
43 {
44 uint64_t mask = (1ull << num_slots) - 1ull;
45 if (is_patch)
46 info->tcs.patch_outputs_written |= (mask << param);
47 else
48 info->tcs.outputs_written |= (mask << param);
49 }
50
51 static void get_deref_offset(nir_deref_var *deref, unsigned *const_out)
52 {
53 nir_deref *tail = &deref->deref;
54 unsigned const_offset = 0;
55
56 if (deref->var->data.compact) {
57 assert(tail->child->deref_type == nir_deref_type_array);
58 assert(glsl_type_is_scalar(glsl_without_array(deref->var->type)));
59
60 nir_deref_array *deref_array = nir_deref_as_array(tail->child);
61 /* We always lower indirect dereferences for "compact" array vars. */
62 assert(deref_array->deref_array_type == nir_deref_array_type_direct);
63
64 *const_out = deref_array->base_offset;
65 return;
66 }
67
68 while (tail->child != NULL) {
69 const struct glsl_type *parent_type = tail->type;
70 tail = tail->child;
71
72 if (tail->deref_type == nir_deref_type_array) {
73 nir_deref_array *deref_array = nir_deref_as_array(tail);
74 unsigned size = glsl_count_attribute_slots(tail->type, false);
75
76 const_offset += size * deref_array->base_offset;
77 } else if (tail->deref_type == nir_deref_type_struct) {
78 nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
79
80 for (unsigned i = 0; i < deref_struct->index; i++) {
81 const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
82 const_offset += glsl_count_attribute_slots(ft, false);
83 }
84 } else
85 unreachable("unsupported deref type");
86 }
87
88 *const_out = const_offset;
89 }
90
91 static void
92 get_deref_instr_offset(nir_deref_instr *instr,
93 unsigned *const_out)
94 {
95 nir_variable *var = nir_deref_instr_get_variable(instr);
96 nir_deref_path path;
97 unsigned idx_lvl = 1;
98
99 if (var->data.compact) {
100 assert(instr->deref_type == nir_deref_type_array);
101 nir_const_value *v = nir_src_as_const_value(instr->arr.index);
102 assert(v);
103 *const_out = v->u32[0];
104 return;
105 }
106
107 nir_deref_path_init(&path, instr, NULL);
108
109 uint32_t const_offset = 0;
110
111 for (; path.path[idx_lvl]; ++idx_lvl) {
112 const struct glsl_type *parent_type = path.path[idx_lvl - 1]->type;
113 if (path.path[idx_lvl]->deref_type == nir_deref_type_struct) {
114 unsigned index = path.path[idx_lvl]->strct.index;
115
116 for (unsigned i = 0; i < index; i++) {
117 const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
118 const_offset += glsl_count_attribute_slots(ft, false);
119 }
120 } else if(path.path[idx_lvl]->deref_type == nir_deref_type_array) {
121 unsigned size = glsl_count_attribute_slots(path.path[idx_lvl]->type, false);
122 nir_const_value *v = nir_src_as_const_value(path.path[idx_lvl]->arr.index);
123 if (v)
124 const_offset += v->u32[0] * size;
125 } else
126 unreachable("Uhandled deref type in get_deref_instr_offset");
127 }
128
129 *const_out = const_offset;
130
131 nir_deref_path_finish(&path);
132 }
133
134 static void
135 gather_intrinsic_load_var_info(const nir_shader *nir,
136 const nir_intrinsic_instr *instr,
137 struct radv_shader_info *info)
138 {
139 switch (nir->info.stage) {
140 case MESA_SHADER_VERTEX: {
141 nir_variable *var = instr->intrinsic == nir_intrinsic_load_var ? instr->variables[0]->var :
142 nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
143
144 if (var->data.mode == nir_var_shader_in) {
145 unsigned idx = var->data.location;
146 uint8_t mask = nir_ssa_def_components_read(&instr->dest.ssa);
147
148 info->vs.input_usage_mask[idx] |=
149 mask << var->data.location_frac;
150 }
151 break;
152 }
153 default:
154 break;
155 }
156 }
157
158 static void
159 gather_intrinsic_store_var_info(const nir_shader *nir,
160 const nir_intrinsic_instr *instr,
161 struct radv_shader_info *info)
162 {
163 nir_variable *var = instr->intrinsic == nir_intrinsic_store_var ? instr->variables[0]->var :
164 nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
165
166 if (var->data.mode == nir_var_shader_out) {
167 unsigned attrib_count = glsl_count_attribute_slots(var->type, false);
168 unsigned idx = var->data.location;
169 unsigned comp = var->data.location_frac;
170 unsigned const_offset = 0;
171
172 if (instr->intrinsic == nir_intrinsic_store_var)
173 get_deref_offset(instr->variables[0], &const_offset);
174 else
175 get_deref_instr_offset(nir_instr_as_deref(instr->src[0].ssa->parent_instr), &const_offset);
176
177 switch (nir->info.stage) {
178 case MESA_SHADER_VERTEX:
179 for (unsigned i = 0; i < attrib_count; i++) {
180 info->vs.output_usage_mask[idx + i + const_offset] |=
181 instr->const_index[0] << comp;
182 }
183 break;
184 case MESA_SHADER_GEOMETRY:
185 for (unsigned i = 0; i < attrib_count; i++) {
186 info->gs.output_usage_mask[idx + i + const_offset] |=
187 instr->const_index[0] << comp;
188 }
189 break;
190 case MESA_SHADER_TESS_EVAL:
191 for (unsigned i = 0; i < attrib_count; i++) {
192 info->tes.output_usage_mask[idx + i + const_offset] |=
193 instr->const_index[0] << comp;
194 }
195 break;
196 case MESA_SHADER_TESS_CTRL: {
197 unsigned param = shader_io_get_unique_index(idx);
198 const struct glsl_type *type = var->type;
199
200 if (!var->data.patch)
201 type = glsl_get_array_element(var->type);
202
203 unsigned slots =
204 var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4)
205 : glsl_count_attribute_slots(type, false);
206
207 if (idx == VARYING_SLOT_CLIP_DIST0)
208 slots = (nir->info.clip_distance_array_size +
209 nir->info.cull_distance_array_size > 4) ? 2 : 1;
210
211 mark_tess_output(info, var->data.patch, param, slots);
212 break;
213 }
214 default:
215 break;
216 }
217 }
218 }
219
220 static void
221 gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr,
222 struct radv_shader_info *info)
223 {
224 switch (instr->intrinsic) {
225 case nir_intrinsic_interp_var_at_sample:
226 case nir_intrinsic_interp_deref_at_sample:
227 info->ps.needs_sample_positions = true;
228 break;
229 case nir_intrinsic_load_draw_id:
230 info->vs.needs_draw_id = true;
231 break;
232 case nir_intrinsic_load_instance_id:
233 info->vs.needs_instance_id = true;
234 break;
235 case nir_intrinsic_load_num_work_groups:
236 info->cs.uses_grid_size = true;
237 break;
238 case nir_intrinsic_load_local_invocation_id:
239 case nir_intrinsic_load_work_group_id: {
240 unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
241 while (mask) {
242 unsigned i = u_bit_scan(&mask);
243
244 if (instr->intrinsic == nir_intrinsic_load_work_group_id)
245 info->cs.uses_block_id[i] = true;
246 else
247 info->cs.uses_thread_id[i] = true;
248 }
249 break;
250 }
251 case nir_intrinsic_load_local_invocation_index:
252 case nir_intrinsic_load_subgroup_id:
253 case nir_intrinsic_load_num_subgroups:
254 info->cs.uses_local_invocation_idx = true;
255 break;
256 case nir_intrinsic_load_sample_id:
257 info->ps.force_persample = true;
258 break;
259 case nir_intrinsic_load_sample_pos:
260 info->ps.force_persample = true;
261 break;
262 case nir_intrinsic_load_view_index:
263 info->needs_multiview_view_index = true;
264 if (nir->info.stage == MESA_SHADER_FRAGMENT)
265 info->ps.layer_input = true;
266 break;
267 case nir_intrinsic_load_invocation_id:
268 info->uses_invocation_id = true;
269 break;
270 case nir_intrinsic_load_primitive_id:
271 info->uses_prim_id = true;
272 break;
273 case nir_intrinsic_load_push_constant:
274 info->loads_push_constants = true;
275 break;
276 case nir_intrinsic_vulkan_resource_index:
277 info->desc_set_used_mask |= (1 << nir_intrinsic_desc_set(instr));
278 break;
279 case nir_intrinsic_image_var_load:
280 case nir_intrinsic_image_var_store:
281 case nir_intrinsic_image_var_atomic_add:
282 case nir_intrinsic_image_var_atomic_min:
283 case nir_intrinsic_image_var_atomic_max:
284 case nir_intrinsic_image_var_atomic_and:
285 case nir_intrinsic_image_var_atomic_or:
286 case nir_intrinsic_image_var_atomic_xor:
287 case nir_intrinsic_image_var_atomic_exchange:
288 case nir_intrinsic_image_var_atomic_comp_swap:
289 case nir_intrinsic_image_var_size: {
290 const struct glsl_type *type = glsl_without_array(instr->variables[0]->var->type);
291
292 enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
293 if (dim == GLSL_SAMPLER_DIM_SUBPASS ||
294 dim == GLSL_SAMPLER_DIM_SUBPASS_MS) {
295 info->ps.layer_input = true;
296 info->ps.uses_input_attachments = true;
297 }
298 mark_sampler_desc(instr->variables[0]->var, info);
299
300 if (nir_intrinsic_image_var_store ||
301 nir_intrinsic_image_var_atomic_add ||
302 nir_intrinsic_image_var_atomic_min ||
303 nir_intrinsic_image_var_atomic_max ||
304 nir_intrinsic_image_var_atomic_and ||
305 nir_intrinsic_image_var_atomic_or ||
306 nir_intrinsic_image_var_atomic_xor ||
307 nir_intrinsic_image_var_atomic_exchange ||
308 nir_intrinsic_image_var_atomic_comp_swap) {
309 if (nir->info.stage == MESA_SHADER_FRAGMENT)
310 info->ps.writes_memory = true;
311 }
312 break;
313 }
314 case nir_intrinsic_image_deref_load:
315 case nir_intrinsic_image_deref_store:
316 case nir_intrinsic_image_deref_atomic_add:
317 case nir_intrinsic_image_deref_atomic_min:
318 case nir_intrinsic_image_deref_atomic_max:
319 case nir_intrinsic_image_deref_atomic_and:
320 case nir_intrinsic_image_deref_atomic_or:
321 case nir_intrinsic_image_deref_atomic_xor:
322 case nir_intrinsic_image_deref_atomic_exchange:
323 case nir_intrinsic_image_deref_atomic_comp_swap:
324 case nir_intrinsic_image_deref_size: {
325 nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
326 const struct glsl_type *type = glsl_without_array(var->type);
327
328 enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
329 if (dim == GLSL_SAMPLER_DIM_SUBPASS ||
330 dim == GLSL_SAMPLER_DIM_SUBPASS_MS) {
331 info->ps.layer_input = true;
332 info->ps.uses_input_attachments = true;
333 }
334 mark_sampler_desc(var, info);
335
336 if (nir_intrinsic_image_deref_store ||
337 nir_intrinsic_image_deref_atomic_add ||
338 nir_intrinsic_image_deref_atomic_min ||
339 nir_intrinsic_image_deref_atomic_max ||
340 nir_intrinsic_image_deref_atomic_and ||
341 nir_intrinsic_image_deref_atomic_or ||
342 nir_intrinsic_image_deref_atomic_xor ||
343 nir_intrinsic_image_deref_atomic_exchange ||
344 nir_intrinsic_image_deref_atomic_comp_swap) {
345 if (nir->info.stage == MESA_SHADER_FRAGMENT)
346 info->ps.writes_memory = true;
347 }
348 break;
349 }
350 case nir_intrinsic_store_ssbo:
351 case nir_intrinsic_ssbo_atomic_add:
352 case nir_intrinsic_ssbo_atomic_imin:
353 case nir_intrinsic_ssbo_atomic_umin:
354 case nir_intrinsic_ssbo_atomic_imax:
355 case nir_intrinsic_ssbo_atomic_umax:
356 case nir_intrinsic_ssbo_atomic_and:
357 case nir_intrinsic_ssbo_atomic_or:
358 case nir_intrinsic_ssbo_atomic_xor:
359 case nir_intrinsic_ssbo_atomic_exchange:
360 case nir_intrinsic_ssbo_atomic_comp_swap:
361 if (nir->info.stage == MESA_SHADER_FRAGMENT)
362 info->ps.writes_memory = true;
363 break;
364 case nir_intrinsic_load_var:
365 case nir_intrinsic_load_deref:
366 gather_intrinsic_load_var_info(nir, instr, info);
367 break;
368 case nir_intrinsic_store_var:
369 case nir_intrinsic_store_deref:
370 gather_intrinsic_store_var_info(nir, instr, info);
371 break;
372 default:
373 break;
374 }
375 }
376
377 static void
378 gather_tex_info(const nir_shader *nir, const nir_tex_instr *instr,
379 struct radv_shader_info *info)
380 {
381 for (unsigned i = 0; i < instr->num_srcs; i++) {
382 switch (instr->src[i].src_type) {
383 case nir_tex_src_texture_deref:
384 mark_sampler_desc(nir_deref_instr_get_variable(nir_src_as_deref(instr->src[i].src)), info);
385 break;
386 case nir_tex_src_sampler_deref:
387 mark_sampler_desc(nir_deref_instr_get_variable(nir_src_as_deref(instr->src[i].src)), info);
388 break;
389 default:
390 break;
391 }
392 }
393
394 if (instr->sampler)
395 mark_sampler_desc(instr->sampler->var, info);
396 if (instr->texture)
397 mark_sampler_desc(instr->texture->var, info);
398 }
399
400 static void
401 gather_info_block(const nir_shader *nir, const nir_block *block,
402 struct radv_shader_info *info)
403 {
404 nir_foreach_instr(instr, block) {
405 switch (instr->type) {
406 case nir_instr_type_intrinsic:
407 gather_intrinsic_info(nir, nir_instr_as_intrinsic(instr), info);
408 break;
409 case nir_instr_type_tex:
410 gather_tex_info(nir, nir_instr_as_tex(instr), info);
411 break;
412 default:
413 break;
414 }
415 }
416 }
417
418 static void
419 gather_info_input_decl_vs(const nir_shader *nir, const nir_variable *var,
420 struct radv_shader_info *info)
421 {
422 int idx = var->data.location;
423
424 if (idx >= VERT_ATTRIB_GENERIC0 && idx <= VERT_ATTRIB_GENERIC15)
425 info->vs.has_vertex_buffers = true;
426 }
427
428 static void
429 gather_info_input_decl_ps(const nir_shader *nir, const nir_variable *var,
430 struct radv_shader_info *info)
431 {
432 const struct glsl_type *type = glsl_without_array(var->type);
433 int idx = var->data.location;
434
435 switch (idx) {
436 case VARYING_SLOT_PNTC:
437 info->ps.has_pcoord = true;
438 break;
439 case VARYING_SLOT_PRIMITIVE_ID:
440 info->ps.prim_id_input = true;
441 break;
442 case VARYING_SLOT_LAYER:
443 info->ps.layer_input = true;
444 break;
445 default:
446 break;
447 }
448
449 if (glsl_get_base_type(type) == GLSL_TYPE_FLOAT) {
450 if (var->data.sample)
451 info->ps.force_persample = true;
452 }
453 }
454
455 static void
456 gather_info_input_decl(const nir_shader *nir, const nir_variable *var,
457 struct radv_shader_info *info)
458 {
459 switch (nir->info.stage) {
460 case MESA_SHADER_VERTEX:
461 gather_info_input_decl_vs(nir, var, info);
462 break;
463 case MESA_SHADER_FRAGMENT:
464 gather_info_input_decl_ps(nir, var, info);
465 break;
466 default:
467 break;
468 }
469 }
470
471 static void
472 gather_info_output_decl_ls(const nir_shader *nir, const nir_variable *var,
473 struct radv_shader_info *info)
474 {
475 int idx = var->data.location;
476 unsigned param = shader_io_get_unique_index(idx);
477 int num_slots = glsl_count_attribute_slots(var->type, false);
478 if (idx == VARYING_SLOT_CLIP_DIST0)
479 num_slots = (nir->info.clip_distance_array_size + nir->info.cull_distance_array_size > 4) ? 2 : 1;
480 mark_ls_output(info, param, num_slots);
481 }
482
483 static void
484 gather_info_output_decl_ps(const nir_shader *nir, const nir_variable *var,
485 struct radv_shader_info *info)
486 {
487 int idx = var->data.location;
488
489 switch (idx) {
490 case FRAG_RESULT_DEPTH:
491 info->ps.writes_z = true;
492 break;
493 case FRAG_RESULT_STENCIL:
494 info->ps.writes_stencil = true;
495 break;
496 case FRAG_RESULT_SAMPLE_MASK:
497 info->ps.writes_sample_mask = true;
498 break;
499 default:
500 break;
501 }
502 }
503
504 static void
505 gather_info_output_decl(const nir_shader *nir, const nir_variable *var,
506 struct radv_shader_info *info,
507 const struct radv_nir_compiler_options *options)
508 {
509 switch (nir->info.stage) {
510 case MESA_SHADER_FRAGMENT:
511 gather_info_output_decl_ps(nir, var, info);
512 break;
513 case MESA_SHADER_VERTEX:
514 if (options->key.vs.as_ls)
515 gather_info_output_decl_ls(nir, var, info);
516 break;
517 default:
518 break;
519 }
520 }
521
522 void
523 radv_nir_shader_info_pass(const struct nir_shader *nir,
524 const struct radv_nir_compiler_options *options,
525 struct radv_shader_info *info)
526 {
527 struct nir_function *func =
528 (struct nir_function *)exec_list_get_head_const(&nir->functions);
529
530 if (options->layout && options->layout->dynamic_offset_count)
531 info->loads_push_constants = true;
532
533 nir_foreach_variable(variable, &nir->inputs)
534 gather_info_input_decl(nir, variable, info);
535
536 nir_foreach_block(block, func->impl) {
537 gather_info_block(nir, block, info);
538 }
539
540 nir_foreach_variable(variable, &nir->outputs)
541 gather_info_output_decl(nir, variable, info, options);
542 }