7abc27be1032b69b4c15dcc1558f05c472435763
[mesa.git] / src / intel / vulkan / anv_nir_apply_pipeline_layout.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_nir.h"
25 #include "program/prog_parameter.h"
26 #include "nir/nir_builder.h"
27 #include "compiler/brw_nir.h"
28 #include "util/set.h"
29
30 /* Sampler tables don't actually have a maximum size but we pick one just so
31 * that we don't end up emitting too much state on-the-fly.
32 */
33 #define MAX_SAMPLER_TABLE_SIZE 128
34 #define BINDLESS_OFFSET 255
35
36 struct apply_pipeline_layout_state {
37 const struct anv_physical_device *pdevice;
38
39 nir_shader *shader;
40 nir_builder builder;
41
42 struct anv_pipeline_layout *layout;
43 bool add_bounds_checks;
44
45 /* Place to flag lowered instructions so we don't lower them twice */
46 struct set *lowered_instrs;
47
48 bool uses_constants;
49 uint8_t constants_offset;
50 struct {
51 bool desc_buffer_used;
52 uint8_t desc_offset;
53
54 uint8_t *use_count;
55 uint8_t *surface_offsets;
56 uint8_t *sampler_offsets;
57 } set[MAX_SETS];
58 };
59
60 static void
61 add_binding(struct apply_pipeline_layout_state *state,
62 uint32_t set, uint32_t binding)
63 {
64 const struct anv_descriptor_set_binding_layout *bind_layout =
65 &state->layout->set[set].layout->binding[binding];
66
67 if (state->set[set].use_count[binding] < UINT8_MAX)
68 state->set[set].use_count[binding]++;
69
70 /* Only flag the descriptor buffer as used if there's actually data for
71 * this binding. This lets us be lazy and call this function constantly
72 * without worrying about unnecessarily enabling the buffer.
73 */
74 if (anv_descriptor_size(bind_layout))
75 state->set[set].desc_buffer_used = true;
76 }
77
78 static void
79 add_deref_src_binding(struct apply_pipeline_layout_state *state, nir_src src)
80 {
81 nir_deref_instr *deref = nir_src_as_deref(src);
82 nir_variable *var = nir_deref_instr_get_variable(deref);
83 add_binding(state, var->data.descriptor_set, var->data.binding);
84 }
85
86 static void
87 add_tex_src_binding(struct apply_pipeline_layout_state *state,
88 nir_tex_instr *tex, nir_tex_src_type deref_src_type)
89 {
90 int deref_src_idx = nir_tex_instr_src_index(tex, deref_src_type);
91 if (deref_src_idx < 0)
92 return;
93
94 add_deref_src_binding(state, tex->src[deref_src_idx].src);
95 }
96
97 static void
98 get_used_bindings_block(nir_block *block,
99 struct apply_pipeline_layout_state *state)
100 {
101 nir_foreach_instr_safe(instr, block) {
102 switch (instr->type) {
103 case nir_instr_type_intrinsic: {
104 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
105 switch (intrin->intrinsic) {
106 case nir_intrinsic_vulkan_resource_index:
107 add_binding(state, nir_intrinsic_desc_set(intrin),
108 nir_intrinsic_binding(intrin));
109 break;
110
111 case nir_intrinsic_image_deref_load:
112 case nir_intrinsic_image_deref_store:
113 case nir_intrinsic_image_deref_atomic_add:
114 case nir_intrinsic_image_deref_atomic_min:
115 case nir_intrinsic_image_deref_atomic_max:
116 case nir_intrinsic_image_deref_atomic_and:
117 case nir_intrinsic_image_deref_atomic_or:
118 case nir_intrinsic_image_deref_atomic_xor:
119 case nir_intrinsic_image_deref_atomic_exchange:
120 case nir_intrinsic_image_deref_atomic_comp_swap:
121 case nir_intrinsic_image_deref_size:
122 case nir_intrinsic_image_deref_samples:
123 case nir_intrinsic_image_deref_load_param_intel:
124 case nir_intrinsic_image_deref_load_raw_intel:
125 case nir_intrinsic_image_deref_store_raw_intel:
126 add_deref_src_binding(state, intrin->src[0]);
127 break;
128
129 case nir_intrinsic_load_constant:
130 state->uses_constants = true;
131 break;
132
133 default:
134 break;
135 }
136 break;
137 }
138 case nir_instr_type_tex: {
139 nir_tex_instr *tex = nir_instr_as_tex(instr);
140 add_tex_src_binding(state, tex, nir_tex_src_texture_deref);
141 add_tex_src_binding(state, tex, nir_tex_src_sampler_deref);
142 break;
143 }
144 default:
145 continue;
146 }
147 }
148 }
149
150 static bool
151 find_descriptor_for_index_src(nir_src src,
152 struct apply_pipeline_layout_state *state)
153 {
154 nir_intrinsic_instr *intrin = nir_src_as_intrinsic(src);
155
156 while (intrin && intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex)
157 intrin = nir_src_as_intrinsic(intrin->src[0]);
158
159 if (!intrin || intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
160 return false;
161
162 return true;
163 }
164
165 static bool
166 nir_deref_find_descriptor(nir_deref_instr *deref,
167 struct apply_pipeline_layout_state *state)
168 {
169 while (1) {
170 /* Nothing we will use this on has a variable */
171 assert(deref->deref_type != nir_deref_type_var);
172
173 nir_deref_instr *parent = nir_src_as_deref(deref->parent);
174 if (!parent)
175 break;
176
177 deref = parent;
178 }
179 assert(deref->deref_type == nir_deref_type_cast);
180
181 nir_intrinsic_instr *intrin = nir_src_as_intrinsic(deref->parent);
182 if (!intrin || intrin->intrinsic != nir_intrinsic_load_vulkan_descriptor)
183 return false;
184
185 return find_descriptor_for_index_src(intrin->src[0], state);
186 }
187
188 static nir_ssa_def *
189 build_index_for_res_reindex(nir_intrinsic_instr *intrin,
190 struct apply_pipeline_layout_state *state)
191 {
192 nir_builder *b = &state->builder;
193
194 if (intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) {
195 nir_ssa_def *bti =
196 build_index_for_res_reindex(nir_src_as_intrinsic(intrin->src[0]), state);
197
198 b->cursor = nir_before_instr(&intrin->instr);
199 return nir_iadd(b, bti, nir_ssa_for_src(b, intrin->src[1], 1));
200 }
201
202 assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
203
204 uint32_t set = nir_intrinsic_desc_set(intrin);
205 uint32_t binding = nir_intrinsic_binding(intrin);
206
207 const struct anv_descriptor_set_binding_layout *bind_layout =
208 &state->layout->set[set].layout->binding[binding];
209
210 uint32_t surface_index = state->set[set].surface_offsets[binding];
211 uint32_t array_size = bind_layout->array_size;
212
213 b->cursor = nir_before_instr(&intrin->instr);
214
215 nir_ssa_def *array_index = nir_ssa_for_src(b, intrin->src[0], 1);
216 if (nir_src_is_const(intrin->src[0]) || state->add_bounds_checks)
217 array_index = nir_umin(b, array_index, nir_imm_int(b, array_size - 1));
218
219 return nir_iadd_imm(b, array_index, surface_index);
220 }
221
222 static nir_ssa_def *
223 build_index_offset_for_deref(nir_deref_instr *deref,
224 struct apply_pipeline_layout_state *state)
225 {
226 nir_builder *b = &state->builder;
227
228 nir_deref_instr *parent = nir_deref_instr_parent(deref);
229 if (parent) {
230 nir_ssa_def *addr = build_index_offset_for_deref(parent, state);
231
232 b->cursor = nir_before_instr(&deref->instr);
233 return nir_explicit_io_address_from_deref(b, deref, addr,
234 nir_address_format_32bit_index_offset);
235 }
236
237 nir_intrinsic_instr *load_desc = nir_src_as_intrinsic(deref->parent);
238 assert(load_desc->intrinsic == nir_intrinsic_load_vulkan_descriptor);
239
240 nir_ssa_def *index =
241 build_index_for_res_reindex(nir_src_as_intrinsic(load_desc->src[0]), state);
242
243 /* Return a 0 offset which will get picked up by the recursion */
244 b->cursor = nir_before_instr(&deref->instr);
245 return nir_vec2(b, index, nir_imm_int(b, 0));
246 }
247
248 static bool
249 try_lower_direct_buffer_intrinsic(nir_intrinsic_instr *intrin,
250 struct apply_pipeline_layout_state *state)
251 {
252 nir_builder *b = &state->builder;
253
254 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
255 if (deref->mode != nir_var_mem_ssbo)
256 return false;
257
258 if (!nir_deref_find_descriptor(deref, state))
259 return false;
260
261 nir_ssa_def *addr = build_index_offset_for_deref(deref, state);
262
263 b->cursor = nir_before_instr(&intrin->instr);
264 nir_lower_explicit_io_instr(b, intrin, addr,
265 nir_address_format_32bit_index_offset);
266 return true;
267 }
268
269 static void
270 lower_direct_buffer_access(nir_function_impl *impl,
271 struct apply_pipeline_layout_state *state)
272 {
273 nir_foreach_block(block, impl) {
274 nir_foreach_instr_safe(instr, block) {
275 if (instr->type != nir_instr_type_intrinsic)
276 continue;
277
278 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
279 switch (intrin->intrinsic) {
280 case nir_intrinsic_load_deref:
281 case nir_intrinsic_store_deref:
282 case nir_intrinsic_deref_atomic_add:
283 case nir_intrinsic_deref_atomic_imin:
284 case nir_intrinsic_deref_atomic_umin:
285 case nir_intrinsic_deref_atomic_imax:
286 case nir_intrinsic_deref_atomic_umax:
287 case nir_intrinsic_deref_atomic_and:
288 case nir_intrinsic_deref_atomic_or:
289 case nir_intrinsic_deref_atomic_xor:
290 case nir_intrinsic_deref_atomic_exchange:
291 case nir_intrinsic_deref_atomic_comp_swap:
292 case nir_intrinsic_deref_atomic_fmin:
293 case nir_intrinsic_deref_atomic_fmax:
294 case nir_intrinsic_deref_atomic_fcomp_swap:
295 try_lower_direct_buffer_intrinsic(intrin, state);
296 break;
297
298 case nir_intrinsic_get_buffer_size: {
299 /* The get_buffer_size intrinsic always just takes a
300 * index/reindex intrinsic.
301 */
302 if (!find_descriptor_for_index_src(intrin->src[0], state))
303 break;
304
305 nir_ssa_def *index =
306 build_index_for_res_reindex(nir_src_as_intrinsic(intrin->src[0]),
307 state);
308 nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
309 nir_src_for_ssa(index));
310 _mesa_set_add(state->lowered_instrs, intrin);
311 }
312
313 default:
314 break;
315 }
316 }
317 }
318 }
319
320 static void
321 lower_res_index_intrinsic(nir_intrinsic_instr *intrin,
322 struct apply_pipeline_layout_state *state)
323 {
324 nir_builder *b = &state->builder;
325
326 b->cursor = nir_before_instr(&intrin->instr);
327
328 uint32_t set = nir_intrinsic_desc_set(intrin);
329 uint32_t binding = nir_intrinsic_binding(intrin);
330
331 const struct anv_descriptor_set_binding_layout *bind_layout =
332 &state->layout->set[set].layout->binding[binding];
333
334 uint32_t surface_index = state->set[set].surface_offsets[binding];
335 uint32_t array_size = bind_layout->array_size;
336
337 nir_ssa_def *array_index = nir_ssa_for_src(b, intrin->src[0], 1);
338 if (nir_src_is_const(intrin->src[0]) || state->add_bounds_checks)
339 array_index = nir_umin(b, array_index, nir_imm_int(b, array_size - 1));
340
341 nir_ssa_def *index;
342 if (bind_layout->data & ANV_DESCRIPTOR_INLINE_UNIFORM) {
343 /* This is an inline uniform block. Just reference the descriptor set
344 * and use the descriptor offset as the base.
345 */
346 index = nir_imm_ivec2(b, state->set[set].desc_offset,
347 bind_layout->descriptor_offset);
348 } else {
349 /* We're using nir_address_format_32bit_index_offset */
350 index = nir_vec2(b, nir_iadd_imm(b, array_index, surface_index),
351 nir_imm_int(b, 0));
352 }
353
354 assert(intrin->dest.is_ssa);
355 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(index));
356 nir_instr_remove(&intrin->instr);
357 }
358
359 static void
360 lower_res_reindex_intrinsic(nir_intrinsic_instr *intrin,
361 struct apply_pipeline_layout_state *state)
362 {
363 nir_builder *b = &state->builder;
364
365 b->cursor = nir_before_instr(&intrin->instr);
366
367 /* For us, the resource indices are just indices into the binding table and
368 * array elements are sequential. A resource_reindex just turns into an
369 * add of the two indices.
370 */
371 assert(intrin->src[0].is_ssa && intrin->src[1].is_ssa);
372 nir_ssa_def *old_index = intrin->src[0].ssa;
373 nir_ssa_def *offset = intrin->src[1].ssa;
374
375 nir_ssa_def *new_index =
376 nir_vec2(b, nir_iadd(b, nir_channel(b, old_index, 0), offset),
377 nir_channel(b, old_index, 1));
378
379 assert(intrin->dest.is_ssa);
380 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(new_index));
381 nir_instr_remove(&intrin->instr);
382 }
383
384 static void
385 lower_load_vulkan_descriptor(nir_intrinsic_instr *intrin,
386 struct apply_pipeline_layout_state *state)
387 {
388 nir_builder *b = &state->builder;
389
390 b->cursor = nir_before_instr(&intrin->instr);
391
392 /* We follow the nir_address_format_32bit_index_offset model */
393 assert(intrin->src[0].is_ssa);
394 nir_ssa_def *index = intrin->src[0].ssa;
395
396 assert(intrin->dest.is_ssa);
397 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(index));
398 nir_instr_remove(&intrin->instr);
399 }
400
401 static void
402 lower_get_buffer_size(nir_intrinsic_instr *intrin,
403 struct apply_pipeline_layout_state *state)
404 {
405 if (_mesa_set_search(state->lowered_instrs, intrin))
406 return;
407
408 nir_builder *b = &state->builder;
409
410 b->cursor = nir_before_instr(&intrin->instr);
411
412 assert(intrin->src[0].is_ssa);
413 nir_ssa_def *index = intrin->src[0].ssa;
414
415 /* We're following the nir_address_format_32bit_index_offset model so the
416 * binding table index is the first component of the address. The
417 * back-end wants a scalar binding table index source.
418 */
419 nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
420 nir_src_for_ssa(nir_channel(b, index, 0)));
421 }
422
423 static nir_ssa_def *
424 build_descriptor_load(nir_deref_instr *deref, unsigned offset,
425 unsigned num_components, unsigned bit_size,
426 struct apply_pipeline_layout_state *state)
427 {
428 nir_variable *var = nir_deref_instr_get_variable(deref);
429
430 unsigned set = var->data.descriptor_set;
431 unsigned binding = var->data.binding;
432 unsigned array_size =
433 state->layout->set[set].layout->binding[binding].array_size;
434
435 const struct anv_descriptor_set_binding_layout *bind_layout =
436 &state->layout->set[set].layout->binding[binding];
437
438 nir_builder *b = &state->builder;
439
440 nir_ssa_def *desc_buffer_index =
441 nir_imm_int(b, state->set[set].desc_offset);
442
443 nir_ssa_def *desc_offset =
444 nir_imm_int(b, bind_layout->descriptor_offset + offset);
445 if (deref->deref_type != nir_deref_type_var) {
446 assert(deref->deref_type == nir_deref_type_array);
447
448 const unsigned descriptor_size = anv_descriptor_size(bind_layout);
449 nir_ssa_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1);
450 if (state->add_bounds_checks)
451 arr_index = nir_umin(b, arr_index, nir_imm_int(b, array_size - 1));
452
453 desc_offset = nir_iadd(b, desc_offset,
454 nir_imul_imm(b, arr_index, descriptor_size));
455 }
456
457 nir_intrinsic_instr *desc_load =
458 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo);
459 desc_load->src[0] = nir_src_for_ssa(desc_buffer_index);
460 desc_load->src[1] = nir_src_for_ssa(desc_offset);
461 desc_load->num_components = num_components;
462 nir_ssa_dest_init(&desc_load->instr, &desc_load->dest,
463 num_components, bit_size, NULL);
464 nir_builder_instr_insert(b, &desc_load->instr);
465
466 return &desc_load->dest.ssa;
467 }
468
469 static void
470 lower_image_intrinsic(nir_intrinsic_instr *intrin,
471 struct apply_pipeline_layout_state *state)
472 {
473 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
474
475 nir_builder *b = &state->builder;
476 b->cursor = nir_before_instr(&intrin->instr);
477
478 if (intrin->intrinsic == nir_intrinsic_image_deref_load_param_intel) {
479 b->cursor = nir_instr_remove(&intrin->instr);
480
481 const unsigned param = nir_intrinsic_base(intrin);
482
483 nir_ssa_def *desc =
484 build_descriptor_load(deref, param * 16,
485 intrin->dest.ssa.num_components,
486 intrin->dest.ssa.bit_size, state);
487
488 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(desc));
489 } else {
490 nir_variable *var = nir_deref_instr_get_variable(deref);
491
492 unsigned set = var->data.descriptor_set;
493 unsigned binding = var->data.binding;
494 unsigned binding_offset = state->set[set].surface_offsets[binding];
495 unsigned array_size =
496 state->layout->set[set].layout->binding[binding].array_size;
497
498 nir_ssa_def *index = NULL;
499 if (deref->deref_type != nir_deref_type_var) {
500 assert(deref->deref_type == nir_deref_type_array);
501 index = nir_ssa_for_src(b, deref->arr.index, 1);
502 if (state->add_bounds_checks)
503 index = nir_umin(b, index, nir_imm_int(b, array_size - 1));
504 } else {
505 index = nir_imm_int(b, 0);
506 }
507
508 index = nir_iadd_imm(b, index, binding_offset);
509 nir_rewrite_image_intrinsic(intrin, index, false);
510 }
511 }
512
513 static void
514 lower_load_constant(nir_intrinsic_instr *intrin,
515 struct apply_pipeline_layout_state *state)
516 {
517 nir_builder *b = &state->builder;
518
519 b->cursor = nir_before_instr(&intrin->instr);
520
521 nir_ssa_def *index = nir_imm_int(b, state->constants_offset);
522 nir_ssa_def *offset = nir_iadd(b, nir_ssa_for_src(b, intrin->src[0], 1),
523 nir_imm_int(b, nir_intrinsic_base(intrin)));
524
525 nir_intrinsic_instr *load_ubo =
526 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo);
527 load_ubo->num_components = intrin->num_components;
528 load_ubo->src[0] = nir_src_for_ssa(index);
529 load_ubo->src[1] = nir_src_for_ssa(offset);
530 nir_ssa_dest_init(&load_ubo->instr, &load_ubo->dest,
531 intrin->dest.ssa.num_components,
532 intrin->dest.ssa.bit_size, NULL);
533 nir_builder_instr_insert(b, &load_ubo->instr);
534
535 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
536 nir_src_for_ssa(&load_ubo->dest.ssa));
537 nir_instr_remove(&intrin->instr);
538 }
539
540 static void
541 lower_tex_deref(nir_tex_instr *tex, nir_tex_src_type deref_src_type,
542 unsigned *base_index,
543 struct apply_pipeline_layout_state *state)
544 {
545 int deref_src_idx = nir_tex_instr_src_index(tex, deref_src_type);
546 if (deref_src_idx < 0)
547 return;
548
549 nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
550 nir_variable *var = nir_deref_instr_get_variable(deref);
551
552 unsigned set = var->data.descriptor_set;
553 unsigned binding = var->data.binding;
554 unsigned array_size =
555 state->layout->set[set].layout->binding[binding].array_size;
556
557 nir_tex_src_type offset_src_type;
558 if (deref_src_type == nir_tex_src_texture_deref) {
559 offset_src_type = nir_tex_src_texture_offset;
560 *base_index = state->set[set].surface_offsets[binding];
561 } else {
562 assert(deref_src_type == nir_tex_src_sampler_deref);
563 offset_src_type = nir_tex_src_sampler_offset;
564 *base_index = state->set[set].sampler_offsets[binding];
565 }
566
567 nir_ssa_def *index = NULL;
568 if (deref->deref_type != nir_deref_type_var) {
569 assert(deref->deref_type == nir_deref_type_array);
570
571 if (nir_src_is_const(deref->arr.index)) {
572 unsigned arr_index = nir_src_as_uint(deref->arr.index);
573 *base_index += MIN2(arr_index, array_size - 1);
574 } else {
575 nir_builder *b = &state->builder;
576
577 /* From VK_KHR_sampler_ycbcr_conversion:
578 *
579 * If sampler Y’CBCR conversion is enabled, the combined image
580 * sampler must be indexed only by constant integral expressions when
581 * aggregated into arrays in shader code, irrespective of the
582 * shaderSampledImageArrayDynamicIndexing feature.
583 */
584 assert(nir_tex_instr_src_index(tex, nir_tex_src_plane) == -1);
585
586 index = nir_ssa_for_src(b, deref->arr.index, 1);
587
588 if (state->add_bounds_checks)
589 index = nir_umin(b, index, nir_imm_int(b, array_size - 1));
590 }
591 }
592
593 if (index) {
594 nir_instr_rewrite_src(&tex->instr, &tex->src[deref_src_idx].src,
595 nir_src_for_ssa(index));
596 tex->src[deref_src_idx].src_type = offset_src_type;
597 } else {
598 nir_tex_instr_remove_src(tex, deref_src_idx);
599 }
600 }
601
602 static uint32_t
603 tex_instr_get_and_remove_plane_src(nir_tex_instr *tex)
604 {
605 int plane_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_plane);
606 if (plane_src_idx < 0)
607 return 0;
608
609 unsigned plane = nir_src_as_uint(tex->src[plane_src_idx].src);
610
611 nir_tex_instr_remove_src(tex, plane_src_idx);
612
613 return plane;
614 }
615
616 static void
617 lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state)
618 {
619 state->builder.cursor = nir_before_instr(&tex->instr);
620
621 unsigned plane = tex_instr_get_and_remove_plane_src(tex);
622
623 lower_tex_deref(tex, nir_tex_src_texture_deref,
624 &tex->texture_index, state);
625 tex->texture_index += plane;
626
627 lower_tex_deref(tex, nir_tex_src_sampler_deref,
628 &tex->sampler_index, state);
629 tex->sampler_index += plane;
630
631 /* The backend only ever uses this to mark used surfaces. We don't care
632 * about that little optimization so it just needs to be non-zero.
633 */
634 tex->texture_array_size = 1;
635 }
636
637 static void
638 apply_pipeline_layout_block(nir_block *block,
639 struct apply_pipeline_layout_state *state)
640 {
641 nir_foreach_instr_safe(instr, block) {
642 switch (instr->type) {
643 case nir_instr_type_intrinsic: {
644 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
645 switch (intrin->intrinsic) {
646 case nir_intrinsic_vulkan_resource_index:
647 lower_res_index_intrinsic(intrin, state);
648 break;
649 case nir_intrinsic_vulkan_resource_reindex:
650 lower_res_reindex_intrinsic(intrin, state);
651 break;
652 case nir_intrinsic_load_vulkan_descriptor:
653 lower_load_vulkan_descriptor(intrin, state);
654 break;
655 case nir_intrinsic_get_buffer_size:
656 lower_get_buffer_size(intrin, state);
657 break;
658 case nir_intrinsic_image_deref_load:
659 case nir_intrinsic_image_deref_store:
660 case nir_intrinsic_image_deref_atomic_add:
661 case nir_intrinsic_image_deref_atomic_min:
662 case nir_intrinsic_image_deref_atomic_max:
663 case nir_intrinsic_image_deref_atomic_and:
664 case nir_intrinsic_image_deref_atomic_or:
665 case nir_intrinsic_image_deref_atomic_xor:
666 case nir_intrinsic_image_deref_atomic_exchange:
667 case nir_intrinsic_image_deref_atomic_comp_swap:
668 case nir_intrinsic_image_deref_size:
669 case nir_intrinsic_image_deref_samples:
670 case nir_intrinsic_image_deref_load_param_intel:
671 case nir_intrinsic_image_deref_load_raw_intel:
672 case nir_intrinsic_image_deref_store_raw_intel:
673 lower_image_intrinsic(intrin, state);
674 break;
675 case nir_intrinsic_load_constant:
676 lower_load_constant(intrin, state);
677 break;
678 default:
679 break;
680 }
681 break;
682 }
683 case nir_instr_type_tex:
684 lower_tex(nir_instr_as_tex(instr), state);
685 break;
686 default:
687 continue;
688 }
689 }
690 }
691
692 struct binding_info {
693 uint32_t binding;
694 uint8_t set;
695 uint16_t score;
696 };
697
698 static int
699 compare_binding_infos(const void *_a, const void *_b)
700 {
701 const struct binding_info *a = _a, *b = _b;
702 if (a->score != b->score)
703 return b->score - a->score;
704
705 if (a->set != b->set)
706 return a->set - b->set;
707
708 return a->binding - b->binding;
709 }
710
711 void
712 anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice,
713 bool robust_buffer_access,
714 struct anv_pipeline_layout *layout,
715 nir_shader *shader,
716 struct brw_stage_prog_data *prog_data,
717 struct anv_pipeline_bind_map *map)
718 {
719 void *mem_ctx = ralloc_context(NULL);
720
721 struct apply_pipeline_layout_state state = {
722 .pdevice = pdevice,
723 .shader = shader,
724 .layout = layout,
725 .add_bounds_checks = robust_buffer_access,
726 .lowered_instrs = _mesa_pointer_set_create(mem_ctx),
727 };
728
729 for (unsigned s = 0; s < layout->num_sets; s++) {
730 const unsigned count = layout->set[s].layout->binding_count;
731 state.set[s].use_count = rzalloc_array(mem_ctx, uint8_t, count);
732 state.set[s].surface_offsets = rzalloc_array(mem_ctx, uint8_t, count);
733 state.set[s].sampler_offsets = rzalloc_array(mem_ctx, uint8_t, count);
734 }
735
736 nir_foreach_function(function, shader) {
737 if (!function->impl)
738 continue;
739
740 nir_foreach_block(block, function->impl)
741 get_used_bindings_block(block, &state);
742 }
743
744 for (unsigned s = 0; s < layout->num_sets; s++) {
745 if (state.set[s].desc_buffer_used) {
746 map->surface_to_descriptor[map->surface_count] =
747 (struct anv_pipeline_binding) {
748 .set = ANV_DESCRIPTOR_SET_DESCRIPTORS,
749 .binding = s,
750 };
751 state.set[s].desc_offset = map->surface_count;
752 map->surface_count++;
753 }
754 }
755
756 if (state.uses_constants) {
757 state.constants_offset = map->surface_count;
758 map->surface_to_descriptor[map->surface_count].set =
759 ANV_DESCRIPTOR_SET_SHADER_CONSTANTS;
760 map->surface_count++;
761 }
762
763 unsigned used_binding_count = 0;
764 for (uint32_t set = 0; set < layout->num_sets; set++) {
765 struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
766 for (unsigned b = 0; b < set_layout->binding_count; b++) {
767 if (state.set[set].use_count[b] == 0)
768 continue;
769
770 used_binding_count++;
771 }
772 }
773
774 struct binding_info *infos =
775 rzalloc_array(mem_ctx, struct binding_info, used_binding_count);
776 used_binding_count = 0;
777 for (uint32_t set = 0; set < layout->num_sets; set++) {
778 struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
779 for (unsigned b = 0; b < set_layout->binding_count; b++) {
780 if (state.set[set].use_count[b] == 0)
781 continue;
782
783 struct anv_descriptor_set_binding_layout *binding =
784 &layout->set[set].layout->binding[b];
785
786 /* Do a fixed-point calculation to generate a score based on the
787 * number of uses and the binding array size. We shift by 7 instead
788 * of 8 because we're going to use the top bit below to make
789 * everything which does not support bindless super higher priority
790 * than things which do.
791 */
792 uint16_t score = ((uint16_t)state.set[set].use_count[b] << 7) /
793 binding->array_size;
794
795 /* If the descriptor type doesn't support bindless then put it at the
796 * beginning so we guarantee it gets a slot.
797 */
798 if (!anv_descriptor_supports_bindless(pdevice, binding, true) ||
799 !anv_descriptor_supports_bindless(pdevice, binding, false))
800 score |= 1 << 15;
801
802 infos[used_binding_count++] = (struct binding_info) {
803 .set = set,
804 .binding = b,
805 .score = score,
806 };
807 }
808 }
809
810 /* Order the binding infos based on score with highest scores first. If
811 * scores are equal we then order by set and binding.
812 */
813 qsort(infos, used_binding_count, sizeof(struct binding_info),
814 compare_binding_infos);
815
816 for (unsigned i = 0; i < used_binding_count; i++) {
817 unsigned set = infos[i].set, b = infos[i].binding;
818 struct anv_descriptor_set_binding_layout *binding =
819 &layout->set[set].layout->binding[b];
820
821 const uint32_t array_size = binding->array_size;
822
823 if (binding->data & ANV_DESCRIPTOR_SURFACE_STATE) {
824 if (map->surface_count + array_size > MAX_BINDING_TABLE_SIZE ||
825 anv_descriptor_requires_bindless(pdevice, binding, false)) {
826 /* If this descriptor doesn't fit in the binding table or if it
827 * requires bindless for some reason, flag it as bindless.
828 */
829 assert(anv_descriptor_supports_bindless(pdevice, binding, false));
830 state.set[set].surface_offsets[b] = BINDLESS_OFFSET;
831 } else {
832 state.set[set].surface_offsets[b] = map->surface_count;
833 struct anv_sampler **samplers = binding->immutable_samplers;
834 for (unsigned i = 0; i < binding->array_size; i++) {
835 uint8_t planes = samplers ? samplers[i]->n_planes : 1;
836 for (uint8_t p = 0; p < planes; p++) {
837 map->surface_to_descriptor[map->surface_count++] =
838 (struct anv_pipeline_binding) {
839 .set = set,
840 .binding = b,
841 .index = i,
842 .plane = p,
843 };
844 }
845 }
846 }
847 assert(map->surface_count <= MAX_BINDING_TABLE_SIZE);
848 }
849
850 if (binding->data & ANV_DESCRIPTOR_SAMPLER_STATE) {
851 if (map->sampler_count + array_size > MAX_SAMPLER_TABLE_SIZE ||
852 anv_descriptor_requires_bindless(pdevice, binding, true)) {
853 /* If this descriptor doesn't fit in the binding table or if it
854 * requires bindless for some reason, flag it as bindless.
855 */
856 assert(anv_descriptor_supports_bindless(pdevice, binding, true));
857 state.set[set].sampler_offsets[b] = BINDLESS_OFFSET;
858 } else {
859 state.set[set].sampler_offsets[b] = map->sampler_count;
860 struct anv_sampler **samplers = binding->immutable_samplers;
861 for (unsigned i = 0; i < binding->array_size; i++) {
862 uint8_t planes = samplers ? samplers[i]->n_planes : 1;
863 for (uint8_t p = 0; p < planes; p++) {
864 map->sampler_to_descriptor[map->sampler_count++] =
865 (struct anv_pipeline_binding) {
866 .set = set,
867 .binding = b,
868 .index = i,
869 .plane = p,
870 };
871 }
872 }
873 }
874 }
875 }
876
877 nir_foreach_variable(var, &shader->uniforms) {
878 const struct glsl_type *glsl_type = glsl_without_array(var->type);
879
880 if (!glsl_type_is_image(glsl_type))
881 continue;
882
883 enum glsl_sampler_dim dim = glsl_get_sampler_dim(glsl_type);
884
885 const uint32_t set = var->data.descriptor_set;
886 const uint32_t binding = var->data.binding;
887 const uint32_t array_size =
888 layout->set[set].layout->binding[binding].array_size;
889
890 if (state.set[set].use_count[binding] == 0)
891 continue;
892
893 if (state.set[set].surface_offsets[binding] >= MAX_BINDING_TABLE_SIZE)
894 continue;
895
896 struct anv_pipeline_binding *pipe_binding =
897 &map->surface_to_descriptor[state.set[set].surface_offsets[binding]];
898 for (unsigned i = 0; i < array_size; i++) {
899 assert(pipe_binding[i].set == set);
900 assert(pipe_binding[i].binding == binding);
901 assert(pipe_binding[i].index == i);
902
903 if (dim == GLSL_SAMPLER_DIM_SUBPASS ||
904 dim == GLSL_SAMPLER_DIM_SUBPASS_MS)
905 pipe_binding[i].input_attachment_index = var->data.index + i;
906
907 pipe_binding[i].write_only =
908 (var->data.image.access & ACCESS_NON_READABLE) != 0;
909 }
910 }
911
912 nir_foreach_function(function, shader) {
913 if (!function->impl)
914 continue;
915
916 /* Before we do the normal lowering, we look for any SSBO operations
917 * that we can lower to the BTI model and lower them up-front. The BTI
918 * model can perform better than the A64 model for a couple reasons:
919 *
920 * 1. 48-bit address calculations are potentially expensive and using
921 * the BTI model lets us simply compute 32-bit offsets and the
922 * hardware adds the 64-bit surface base address.
923 *
924 * 2. The BTI messages, because they use surface states, do bounds
925 * checking for us. With the A64 model, we have to do our own
926 * bounds checking and this means wider pointers and extra
927 * calculations and branching in the shader.
928 *
929 * The solution to both of these is to convert things to the BTI model
930 * opportunistically. The reason why we need to do this as a pre-pass
931 * is for two reasons:
932 *
933 * 1. The BTI model requires nir_address_format_32bit_index_offset
934 * pointers which are not the same type as the pointers needed for
935 * the A64 model. Because all our derefs are set up for the A64
936 * model (in case we have variable pointers), we have to crawl all
937 * the way back to the vulkan_resource_index intrinsic and build a
938 * completely fresh index+offset calculation.
939 *
940 * 2. Because the variable-pointers-capable lowering that we do as part
941 * of apply_pipeline_layout_block is destructive (It really has to
942 * be to handle variable pointers properly), we've lost the deref
943 * information by the time we get to the load/store/atomic
944 * intrinsics in that pass.
945 */
946 lower_direct_buffer_access(function->impl, &state);
947
948 nir_builder_init(&state.builder, function->impl);
949 nir_foreach_block(block, function->impl)
950 apply_pipeline_layout_block(block, &state);
951 nir_metadata_preserve(function->impl, nir_metadata_block_index |
952 nir_metadata_dominance);
953 }
954
955 ralloc_free(mem_ctx);
956 }