anv/pipeline: Convert apply_pipeline_layout to deref instructions
[mesa.git] / src / intel / vulkan / anv_nir_apply_pipeline_layout.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_nir.h"
25 #include "program/prog_parameter.h"
26 #include "nir/nir_builder.h"
27
28 struct apply_pipeline_layout_state {
29 nir_shader *shader;
30 nir_builder builder;
31
32 struct anv_pipeline_layout *layout;
33 bool add_bounds_checks;
34
35 struct {
36 BITSET_WORD *used;
37 uint8_t *surface_offsets;
38 uint8_t *sampler_offsets;
39 uint8_t *image_offsets;
40 } set[MAX_SETS];
41 };
42
43 static void
44 add_binding(struct apply_pipeline_layout_state *state,
45 uint32_t set, uint32_t binding)
46 {
47 BITSET_SET(state->set[set].used, binding);
48 }
49
50 static void
51 add_var_binding(struct apply_pipeline_layout_state *state, nir_variable *var)
52 {
53 add_binding(state, var->data.descriptor_set, var->data.binding);
54 }
55
56 static void
57 add_deref_src_binding(struct apply_pipeline_layout_state *state, nir_src src)
58 {
59 nir_deref_instr *deref = nir_src_as_deref(src);
60 add_var_binding(state, nir_deref_instr_get_variable(deref));
61 }
62
63 static void
64 add_tex_src_binding(struct apply_pipeline_layout_state *state,
65 nir_tex_instr *tex, nir_tex_src_type deref_src_type)
66 {
67 int deref_src_idx = nir_tex_instr_src_index(tex, deref_src_type);
68 if (deref_src_idx < 0)
69 return;
70
71 add_deref_src_binding(state, tex->src[deref_src_idx].src);
72 }
73
74 static void
75 get_used_bindings_block(nir_block *block,
76 struct apply_pipeline_layout_state *state)
77 {
78 nir_foreach_instr_safe(instr, block) {
79 switch (instr->type) {
80 case nir_instr_type_intrinsic: {
81 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
82 switch (intrin->intrinsic) {
83 case nir_intrinsic_vulkan_resource_index:
84 add_binding(state, nir_intrinsic_desc_set(intrin),
85 nir_intrinsic_binding(intrin));
86 break;
87
88 case nir_intrinsic_image_deref_load:
89 case nir_intrinsic_image_deref_store:
90 case nir_intrinsic_image_deref_atomic_add:
91 case nir_intrinsic_image_deref_atomic_min:
92 case nir_intrinsic_image_deref_atomic_max:
93 case nir_intrinsic_image_deref_atomic_and:
94 case nir_intrinsic_image_deref_atomic_or:
95 case nir_intrinsic_image_deref_atomic_xor:
96 case nir_intrinsic_image_deref_atomic_exchange:
97 case nir_intrinsic_image_deref_atomic_comp_swap:
98 case nir_intrinsic_image_deref_size:
99 case nir_intrinsic_image_deref_samples:
100 add_deref_src_binding(state, intrin->src[0]);
101 break;
102
103 default:
104 break;
105 }
106 break;
107 }
108 case nir_instr_type_tex: {
109 nir_tex_instr *tex = nir_instr_as_tex(instr);
110 add_tex_src_binding(state, tex, nir_tex_src_texture_deref);
111 add_tex_src_binding(state, tex, nir_tex_src_sampler_deref);
112 break;
113 }
114 default:
115 continue;
116 }
117 }
118 }
119
120 static void
121 lower_res_index_intrinsic(nir_intrinsic_instr *intrin,
122 struct apply_pipeline_layout_state *state)
123 {
124 nir_builder *b = &state->builder;
125
126 b->cursor = nir_before_instr(&intrin->instr);
127
128 uint32_t set = nir_intrinsic_desc_set(intrin);
129 uint32_t binding = nir_intrinsic_binding(intrin);
130
131 uint32_t surface_index = state->set[set].surface_offsets[binding];
132 uint32_t array_size =
133 state->layout->set[set].layout->binding[binding].array_size;
134
135 nir_const_value *const_array_index = nir_src_as_const_value(intrin->src[0]);
136
137 nir_ssa_def *block_index;
138 if (const_array_index) {
139 unsigned array_index = const_array_index->u32[0];
140 array_index = MIN2(array_index, array_size - 1);
141 block_index = nir_imm_int(b, surface_index + array_index);
142 } else {
143 block_index = nir_ssa_for_src(b, intrin->src[0], 1);
144
145 if (state->add_bounds_checks)
146 block_index = nir_umin(b, block_index, nir_imm_int(b, array_size - 1));
147
148 block_index = nir_iadd(b, nir_imm_int(b, surface_index), block_index);
149 }
150
151 assert(intrin->dest.is_ssa);
152 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(block_index));
153 nir_instr_remove(&intrin->instr);
154 }
155
156 static void
157 lower_res_reindex_intrinsic(nir_intrinsic_instr *intrin,
158 struct apply_pipeline_layout_state *state)
159 {
160 nir_builder *b = &state->builder;
161
162 /* For us, the resource indices are just indices into the binding table and
163 * array elements are sequential. A resource_reindex just turns into an
164 * add of the two indices.
165 */
166 assert(intrin->src[0].is_ssa && intrin->src[1].is_ssa);
167 nir_ssa_def *new_index = nir_iadd(b, intrin->src[0].ssa,
168 intrin->src[1].ssa);
169
170 assert(intrin->dest.is_ssa);
171 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(new_index));
172 nir_instr_remove(&intrin->instr);
173 }
174
175 static void
176 lower_tex_deref(nir_tex_instr *tex, nir_tex_src_type deref_src_type,
177 unsigned *base_index,
178 struct apply_pipeline_layout_state *state)
179 {
180 int deref_src_idx = nir_tex_instr_src_index(tex, deref_src_type);
181 if (deref_src_idx < 0)
182 return;
183
184 nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
185 nir_variable *var = nir_deref_instr_get_variable(deref);
186
187 unsigned set = var->data.descriptor_set;
188 unsigned binding = var->data.binding;
189 unsigned array_size =
190 state->layout->set[set].layout->binding[binding].array_size;
191
192 nir_tex_src_type offset_src_type;
193 if (deref_src_type == nir_tex_src_texture_deref) {
194 offset_src_type = nir_tex_src_texture_offset;
195 *base_index = state->set[set].surface_offsets[binding];
196 } else {
197 assert(deref_src_type == nir_tex_src_sampler_deref);
198 offset_src_type = nir_tex_src_sampler_offset;
199 *base_index = state->set[set].sampler_offsets[binding];
200 }
201
202 nir_ssa_def *index = NULL;
203 if (deref->deref_type != nir_deref_type_var) {
204 assert(deref->deref_type == nir_deref_type_array);
205
206 nir_const_value *const_index = nir_src_as_const_value(deref->arr.index);
207 if (const_index) {
208 *base_index += MIN2(const_index->u32[0], array_size - 1);
209 } else {
210 nir_builder *b = &state->builder;
211
212 /* From VK_KHR_sampler_ycbcr_conversion:
213 *
214 * If sampler Y’CBCR conversion is enabled, the combined image
215 * sampler must be indexed only by constant integral expressions when
216 * aggregated into arrays in shader code, irrespective of the
217 * shaderSampledImageArrayDynamicIndexing feature.
218 */
219 assert(nir_tex_instr_src_index(tex, nir_tex_src_plane) == -1);
220
221 index = nir_ssa_for_src(b, deref->arr.index, 1);
222
223 if (state->add_bounds_checks)
224 index = nir_umin(b, index, nir_imm_int(b, array_size - 1));
225 }
226 }
227
228 if (index) {
229 nir_instr_rewrite_src(&tex->instr, &tex->src[deref_src_idx].src,
230 nir_src_for_ssa(index));
231 tex->src[deref_src_idx].src_type = offset_src_type;
232 } else {
233 nir_tex_instr_remove_src(tex, deref_src_idx);
234 }
235 }
236
237 static uint32_t
238 tex_instr_get_and_remove_plane_src(nir_tex_instr *tex)
239 {
240 int plane_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_plane);
241 if (plane_src_idx < 0)
242 return 0;
243
244 unsigned plane =
245 nir_src_as_const_value(tex->src[plane_src_idx].src)->u32[0];
246
247 nir_tex_instr_remove_src(tex, plane_src_idx);
248
249 return plane;
250 }
251
252 static void
253 lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state)
254 {
255 state->builder.cursor = nir_before_instr(&tex->instr);
256
257 unsigned plane = tex_instr_get_and_remove_plane_src(tex);
258
259 lower_tex_deref(tex, nir_tex_src_texture_deref,
260 &tex->texture_index, state);
261 tex->texture_index += plane;
262
263 lower_tex_deref(tex, nir_tex_src_sampler_deref,
264 &tex->sampler_index, state);
265 tex->sampler_index += plane;
266
267 /* The backend only ever uses this to mark used surfaces. We don't care
268 * about that little optimization so it just needs to be non-zero.
269 */
270 tex->texture_array_size = 1;
271 }
272
273 static void
274 apply_pipeline_layout_block(nir_block *block,
275 struct apply_pipeline_layout_state *state)
276 {
277 nir_foreach_instr_safe(instr, block) {
278 switch (instr->type) {
279 case nir_instr_type_intrinsic: {
280 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
281 switch (intrin->intrinsic) {
282 case nir_intrinsic_vulkan_resource_index:
283 lower_res_index_intrinsic(intrin, state);
284 break;
285 case nir_intrinsic_vulkan_resource_reindex:
286 lower_res_reindex_intrinsic(intrin, state);
287 break;
288 default:
289 break;
290 }
291 break;
292 }
293 case nir_instr_type_tex:
294 lower_tex(nir_instr_as_tex(instr), state);
295 break;
296 default:
297 continue;
298 }
299 }
300 }
301
302 static void
303 setup_vec4_uniform_value(uint32_t *params, uint32_t offset, unsigned n)
304 {
305 for (unsigned i = 0; i < n; ++i)
306 params[i] = ANV_PARAM_PUSH(offset + i * sizeof(uint32_t));
307
308 for (unsigned i = n; i < 4; ++i)
309 params[i] = BRW_PARAM_BUILTIN_ZERO;
310 }
311
312 void
313 anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
314 struct anv_pipeline_layout *layout,
315 nir_shader *shader,
316 struct brw_stage_prog_data *prog_data,
317 struct anv_pipeline_bind_map *map)
318 {
319 gl_shader_stage stage = shader->info.stage;
320
321 struct apply_pipeline_layout_state state = {
322 .shader = shader,
323 .layout = layout,
324 .add_bounds_checks = pipeline->device->robust_buffer_access,
325 };
326
327 void *mem_ctx = ralloc_context(NULL);
328
329 for (unsigned s = 0; s < layout->num_sets; s++) {
330 const unsigned count = layout->set[s].layout->binding_count;
331 const unsigned words = BITSET_WORDS(count);
332 state.set[s].used = rzalloc_array(mem_ctx, BITSET_WORD, words);
333 state.set[s].surface_offsets = rzalloc_array(mem_ctx, uint8_t, count);
334 state.set[s].sampler_offsets = rzalloc_array(mem_ctx, uint8_t, count);
335 state.set[s].image_offsets = rzalloc_array(mem_ctx, uint8_t, count);
336 }
337
338 nir_foreach_function(function, shader) {
339 if (!function->impl)
340 continue;
341
342 nir_foreach_block(block, function->impl)
343 get_used_bindings_block(block, &state);
344 }
345
346 for (uint32_t set = 0; set < layout->num_sets; set++) {
347 struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
348
349 BITSET_WORD b, _tmp;
350 BITSET_FOREACH_SET(b, _tmp, state.set[set].used,
351 set_layout->binding_count) {
352 if (set_layout->binding[b].stage[stage].surface_index >= 0) {
353 map->surface_count +=
354 anv_descriptor_set_binding_layout_get_hw_size(&set_layout->binding[b]);
355 }
356 if (set_layout->binding[b].stage[stage].sampler_index >= 0) {
357 map->sampler_count +=
358 anv_descriptor_set_binding_layout_get_hw_size(&set_layout->binding[b]);
359 }
360 if (set_layout->binding[b].stage[stage].image_index >= 0)
361 map->image_count += set_layout->binding[b].array_size;
362 }
363 }
364
365 unsigned surface = 0;
366 unsigned sampler = 0;
367 unsigned image = 0;
368 for (uint32_t set = 0; set < layout->num_sets; set++) {
369 struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
370
371 BITSET_WORD b, _tmp;
372 BITSET_FOREACH_SET(b, _tmp, state.set[set].used,
373 set_layout->binding_count) {
374 struct anv_descriptor_set_binding_layout *binding =
375 &set_layout->binding[b];
376
377 if (binding->stage[stage].surface_index >= 0) {
378 state.set[set].surface_offsets[b] = surface;
379 struct anv_sampler **samplers = binding->immutable_samplers;
380 for (unsigned i = 0; i < binding->array_size; i++) {
381 uint8_t planes = samplers ? samplers[i]->n_planes : 1;
382 for (uint8_t p = 0; p < planes; p++) {
383 map->surface_to_descriptor[surface].set = set;
384 map->surface_to_descriptor[surface].binding = b;
385 map->surface_to_descriptor[surface].index = i;
386 map->surface_to_descriptor[surface].plane = p;
387 surface++;
388 }
389 }
390 }
391
392 if (binding->stage[stage].sampler_index >= 0) {
393 state.set[set].sampler_offsets[b] = sampler;
394 struct anv_sampler **samplers = binding->immutable_samplers;
395 for (unsigned i = 0; i < binding->array_size; i++) {
396 uint8_t planes = samplers ? samplers[i]->n_planes : 1;
397 for (uint8_t p = 0; p < planes; p++) {
398 map->sampler_to_descriptor[sampler].set = set;
399 map->sampler_to_descriptor[sampler].binding = b;
400 map->sampler_to_descriptor[sampler].index = i;
401 map->sampler_to_descriptor[sampler].plane = p;
402 sampler++;
403 }
404 }
405 }
406
407 if (binding->stage[stage].image_index >= 0) {
408 state.set[set].image_offsets[b] = image;
409 image += binding->array_size;
410 }
411 }
412 }
413
414 nir_foreach_variable(var, &shader->uniforms) {
415 const struct glsl_type *glsl_type = glsl_without_array(var->type);
416
417 if (!glsl_type_is_image(glsl_type))
418 continue;
419
420 enum glsl_sampler_dim dim = glsl_get_sampler_dim(glsl_type);
421
422 const uint32_t set = var->data.descriptor_set;
423 const uint32_t binding = var->data.binding;
424 const uint32_t array_size =
425 layout->set[set].layout->binding[binding].array_size;
426
427 if (!BITSET_TEST(state.set[set].used, binding))
428 continue;
429
430 struct anv_pipeline_binding *pipe_binding =
431 &map->surface_to_descriptor[state.set[set].surface_offsets[binding]];
432 for (unsigned i = 0; i < array_size; i++) {
433 assert(pipe_binding[i].set == set);
434 assert(pipe_binding[i].binding == binding);
435 assert(pipe_binding[i].index == i);
436
437 if (dim == GLSL_SAMPLER_DIM_SUBPASS ||
438 dim == GLSL_SAMPLER_DIM_SUBPASS_MS)
439 pipe_binding[i].input_attachment_index = var->data.index + i;
440
441 pipe_binding[i].write_only = var->data.image.write_only;
442 }
443 }
444
445 nir_foreach_function(function, shader) {
446 if (!function->impl)
447 continue;
448
449 nir_builder_init(&state.builder, function->impl);
450 nir_foreach_block(block, function->impl)
451 apply_pipeline_layout_block(block, &state);
452 nir_metadata_preserve(function->impl, nir_metadata_block_index |
453 nir_metadata_dominance);
454 }
455
456 if (map->image_count > 0) {
457 assert(map->image_count <= MAX_IMAGES);
458 nir_foreach_variable(var, &shader->uniforms) {
459 if (glsl_type_is_image(var->type) ||
460 (glsl_type_is_array(var->type) &&
461 glsl_type_is_image(glsl_get_array_element(var->type)))) {
462 /* Images are represented as uniform push constants and the actual
463 * information required for reading/writing to/from the image is
464 * storred in the uniform.
465 */
466 unsigned set = var->data.descriptor_set;
467 unsigned binding = var->data.binding;
468 unsigned image_index = state.set[set].image_offsets[binding];
469
470 var->data.driver_location = shader->num_uniforms +
471 image_index * BRW_IMAGE_PARAM_SIZE * 4;
472 }
473 }
474
475 uint32_t *param = brw_stage_prog_data_add_params(prog_data,
476 map->image_count *
477 BRW_IMAGE_PARAM_SIZE);
478 struct anv_push_constants *null_data = NULL;
479 const struct brw_image_param *image_param = null_data->images;
480 for (uint32_t i = 0; i < map->image_count; i++) {
481 setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET,
482 (uintptr_t)&image_param->surface_idx, 1);
483 setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
484 (uintptr_t)image_param->offset, 2);
485 setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET,
486 (uintptr_t)image_param->size, 3);
487 setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET,
488 (uintptr_t)image_param->stride, 4);
489 setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET,
490 (uintptr_t)image_param->tiling, 3);
491 setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
492 (uintptr_t)image_param->swizzling, 2);
493
494 param += BRW_IMAGE_PARAM_SIZE;
495 image_param ++;
496 }
497 assert(param == prog_data->param + prog_data->nr_params);
498
499 shader->num_uniforms += map->image_count * BRW_IMAGE_PARAM_SIZE * 4;
500 }
501
502 ralloc_free(mem_ctx);
503 }