2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "program/prog_parameter.h"
26 #include "nir/nir_builder.h"
27 #include "compiler/brw_nir.h"
30 /* Sampler tables don't actually have a maximum size but we pick one just so
31 * that we don't end up emitting too much state on-the-fly.
33 #define MAX_SAMPLER_TABLE_SIZE 128
34 #define BINDLESS_OFFSET 255
36 struct apply_pipeline_layout_state
{
37 const struct anv_physical_device
*pdevice
;
42 struct anv_pipeline_layout
*layout
;
43 bool add_bounds_checks
;
45 /* Place to flag lowered instructions so we don't lower them twice */
46 struct set
*lowered_instrs
;
49 uint8_t constants_offset
;
51 bool desc_buffer_used
;
55 uint8_t *surface_offsets
;
56 uint8_t *sampler_offsets
;
61 add_binding(struct apply_pipeline_layout_state
*state
,
62 uint32_t set
, uint32_t binding
)
64 const struct anv_descriptor_set_binding_layout
*bind_layout
=
65 &state
->layout
->set
[set
].layout
->binding
[binding
];
67 if (state
->set
[set
].use_count
[binding
] < UINT8_MAX
)
68 state
->set
[set
].use_count
[binding
]++;
70 /* Only flag the descriptor buffer as used if there's actually data for
71 * this binding. This lets us be lazy and call this function constantly
72 * without worrying about unnecessarily enabling the buffer.
74 if (anv_descriptor_size(bind_layout
))
75 state
->set
[set
].desc_buffer_used
= true;
79 add_deref_src_binding(struct apply_pipeline_layout_state
*state
, nir_src src
)
81 nir_deref_instr
*deref
= nir_src_as_deref(src
);
82 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
83 add_binding(state
, var
->data
.descriptor_set
, var
->data
.binding
);
87 add_tex_src_binding(struct apply_pipeline_layout_state
*state
,
88 nir_tex_instr
*tex
, nir_tex_src_type deref_src_type
)
90 int deref_src_idx
= nir_tex_instr_src_index(tex
, deref_src_type
);
91 if (deref_src_idx
< 0)
94 add_deref_src_binding(state
, tex
->src
[deref_src_idx
].src
);
98 get_used_bindings_block(nir_block
*block
,
99 struct apply_pipeline_layout_state
*state
)
101 nir_foreach_instr_safe(instr
, block
) {
102 switch (instr
->type
) {
103 case nir_instr_type_intrinsic
: {
104 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
105 switch (intrin
->intrinsic
) {
106 case nir_intrinsic_vulkan_resource_index
:
107 add_binding(state
, nir_intrinsic_desc_set(intrin
),
108 nir_intrinsic_binding(intrin
));
111 case nir_intrinsic_image_deref_load
:
112 case nir_intrinsic_image_deref_store
:
113 case nir_intrinsic_image_deref_atomic_add
:
114 case nir_intrinsic_image_deref_atomic_min
:
115 case nir_intrinsic_image_deref_atomic_max
:
116 case nir_intrinsic_image_deref_atomic_and
:
117 case nir_intrinsic_image_deref_atomic_or
:
118 case nir_intrinsic_image_deref_atomic_xor
:
119 case nir_intrinsic_image_deref_atomic_exchange
:
120 case nir_intrinsic_image_deref_atomic_comp_swap
:
121 case nir_intrinsic_image_deref_size
:
122 case nir_intrinsic_image_deref_samples
:
123 case nir_intrinsic_image_deref_load_param_intel
:
124 case nir_intrinsic_image_deref_load_raw_intel
:
125 case nir_intrinsic_image_deref_store_raw_intel
:
126 add_deref_src_binding(state
, intrin
->src
[0]);
129 case nir_intrinsic_load_constant
:
130 state
->uses_constants
= true;
138 case nir_instr_type_tex
: {
139 nir_tex_instr
*tex
= nir_instr_as_tex(instr
);
140 add_tex_src_binding(state
, tex
, nir_tex_src_texture_deref
);
141 add_tex_src_binding(state
, tex
, nir_tex_src_sampler_deref
);
151 find_descriptor_for_index_src(nir_src src
,
152 struct apply_pipeline_layout_state
*state
)
154 nir_intrinsic_instr
*intrin
= nir_src_as_intrinsic(src
);
156 while (intrin
&& intrin
->intrinsic
== nir_intrinsic_vulkan_resource_reindex
)
157 intrin
= nir_src_as_intrinsic(intrin
->src
[0]);
159 if (!intrin
|| intrin
->intrinsic
!= nir_intrinsic_vulkan_resource_index
)
166 nir_deref_find_descriptor(nir_deref_instr
*deref
,
167 struct apply_pipeline_layout_state
*state
)
170 /* Nothing we will use this on has a variable */
171 assert(deref
->deref_type
!= nir_deref_type_var
);
173 nir_deref_instr
*parent
= nir_src_as_deref(deref
->parent
);
179 assert(deref
->deref_type
== nir_deref_type_cast
);
181 nir_intrinsic_instr
*intrin
= nir_src_as_intrinsic(deref
->parent
);
182 if (!intrin
|| intrin
->intrinsic
!= nir_intrinsic_load_vulkan_descriptor
)
185 return find_descriptor_for_index_src(intrin
->src
[0], state
);
189 build_index_for_res_reindex(nir_intrinsic_instr
*intrin
,
190 struct apply_pipeline_layout_state
*state
)
192 nir_builder
*b
= &state
->builder
;
194 if (intrin
->intrinsic
== nir_intrinsic_vulkan_resource_reindex
) {
196 build_index_for_res_reindex(nir_src_as_intrinsic(intrin
->src
[0]), state
);
198 b
->cursor
= nir_before_instr(&intrin
->instr
);
199 return nir_iadd(b
, bti
, nir_ssa_for_src(b
, intrin
->src
[1], 1));
202 assert(intrin
->intrinsic
== nir_intrinsic_vulkan_resource_index
);
204 uint32_t set
= nir_intrinsic_desc_set(intrin
);
205 uint32_t binding
= nir_intrinsic_binding(intrin
);
207 const struct anv_descriptor_set_binding_layout
*bind_layout
=
208 &state
->layout
->set
[set
].layout
->binding
[binding
];
210 uint32_t surface_index
= state
->set
[set
].surface_offsets
[binding
];
211 uint32_t array_size
= bind_layout
->array_size
;
213 b
->cursor
= nir_before_instr(&intrin
->instr
);
215 nir_ssa_def
*array_index
= nir_ssa_for_src(b
, intrin
->src
[0], 1);
216 if (nir_src_is_const(intrin
->src
[0]) || state
->add_bounds_checks
)
217 array_index
= nir_umin(b
, array_index
, nir_imm_int(b
, array_size
- 1));
219 return nir_iadd_imm(b
, array_index
, surface_index
);
223 build_index_offset_for_deref(nir_deref_instr
*deref
,
224 struct apply_pipeline_layout_state
*state
)
226 nir_builder
*b
= &state
->builder
;
228 nir_deref_instr
*parent
= nir_deref_instr_parent(deref
);
230 nir_ssa_def
*addr
= build_index_offset_for_deref(parent
, state
);
232 b
->cursor
= nir_before_instr(&deref
->instr
);
233 return nir_explicit_io_address_from_deref(b
, deref
, addr
,
234 nir_address_format_32bit_index_offset
);
237 nir_intrinsic_instr
*load_desc
= nir_src_as_intrinsic(deref
->parent
);
238 assert(load_desc
->intrinsic
== nir_intrinsic_load_vulkan_descriptor
);
241 build_index_for_res_reindex(nir_src_as_intrinsic(load_desc
->src
[0]), state
);
243 /* Return a 0 offset which will get picked up by the recursion */
244 b
->cursor
= nir_before_instr(&deref
->instr
);
245 return nir_vec2(b
, index
, nir_imm_int(b
, 0));
249 try_lower_direct_buffer_intrinsic(nir_intrinsic_instr
*intrin
,
250 struct apply_pipeline_layout_state
*state
)
252 nir_builder
*b
= &state
->builder
;
254 nir_deref_instr
*deref
= nir_src_as_deref(intrin
->src
[0]);
255 if (deref
->mode
!= nir_var_mem_ssbo
)
258 if (!nir_deref_find_descriptor(deref
, state
))
261 nir_ssa_def
*addr
= build_index_offset_for_deref(deref
, state
);
263 b
->cursor
= nir_before_instr(&intrin
->instr
);
264 nir_lower_explicit_io_instr(b
, intrin
, addr
,
265 nir_address_format_32bit_index_offset
);
270 lower_direct_buffer_access(nir_function_impl
*impl
,
271 struct apply_pipeline_layout_state
*state
)
273 nir_foreach_block(block
, impl
) {
274 nir_foreach_instr_safe(instr
, block
) {
275 if (instr
->type
!= nir_instr_type_intrinsic
)
278 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
279 switch (intrin
->intrinsic
) {
280 case nir_intrinsic_load_deref
:
281 case nir_intrinsic_store_deref
:
282 case nir_intrinsic_deref_atomic_add
:
283 case nir_intrinsic_deref_atomic_imin
:
284 case nir_intrinsic_deref_atomic_umin
:
285 case nir_intrinsic_deref_atomic_imax
:
286 case nir_intrinsic_deref_atomic_umax
:
287 case nir_intrinsic_deref_atomic_and
:
288 case nir_intrinsic_deref_atomic_or
:
289 case nir_intrinsic_deref_atomic_xor
:
290 case nir_intrinsic_deref_atomic_exchange
:
291 case nir_intrinsic_deref_atomic_comp_swap
:
292 case nir_intrinsic_deref_atomic_fmin
:
293 case nir_intrinsic_deref_atomic_fmax
:
294 case nir_intrinsic_deref_atomic_fcomp_swap
:
295 try_lower_direct_buffer_intrinsic(intrin
, state
);
298 case nir_intrinsic_get_buffer_size
: {
299 /* The get_buffer_size intrinsic always just takes a
300 * index/reindex intrinsic.
302 if (!find_descriptor_for_index_src(intrin
->src
[0], state
))
306 build_index_for_res_reindex(nir_src_as_intrinsic(intrin
->src
[0]),
308 nir_instr_rewrite_src(&intrin
->instr
, &intrin
->src
[0],
309 nir_src_for_ssa(index
));
310 _mesa_set_add(state
->lowered_instrs
, intrin
);
321 lower_res_index_intrinsic(nir_intrinsic_instr
*intrin
,
322 struct apply_pipeline_layout_state
*state
)
324 nir_builder
*b
= &state
->builder
;
326 b
->cursor
= nir_before_instr(&intrin
->instr
);
328 uint32_t set
= nir_intrinsic_desc_set(intrin
);
329 uint32_t binding
= nir_intrinsic_binding(intrin
);
331 const struct anv_descriptor_set_binding_layout
*bind_layout
=
332 &state
->layout
->set
[set
].layout
->binding
[binding
];
334 uint32_t surface_index
= state
->set
[set
].surface_offsets
[binding
];
335 uint32_t array_size
= bind_layout
->array_size
;
337 nir_ssa_def
*array_index
= nir_ssa_for_src(b
, intrin
->src
[0], 1);
338 if (nir_src_is_const(intrin
->src
[0]) || state
->add_bounds_checks
)
339 array_index
= nir_umin(b
, array_index
, nir_imm_int(b
, array_size
- 1));
342 if (bind_layout
->data
& ANV_DESCRIPTOR_INLINE_UNIFORM
) {
343 /* This is an inline uniform block. Just reference the descriptor set
344 * and use the descriptor offset as the base.
346 index
= nir_imm_ivec2(b
, state
->set
[set
].desc_offset
,
347 bind_layout
->descriptor_offset
);
349 /* We're using nir_address_format_32bit_index_offset */
350 index
= nir_vec2(b
, nir_iadd_imm(b
, array_index
, surface_index
),
354 assert(intrin
->dest
.is_ssa
);
355 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
, nir_src_for_ssa(index
));
356 nir_instr_remove(&intrin
->instr
);
360 lower_res_reindex_intrinsic(nir_intrinsic_instr
*intrin
,
361 struct apply_pipeline_layout_state
*state
)
363 nir_builder
*b
= &state
->builder
;
365 b
->cursor
= nir_before_instr(&intrin
->instr
);
367 /* For us, the resource indices are just indices into the binding table and
368 * array elements are sequential. A resource_reindex just turns into an
369 * add of the two indices.
371 assert(intrin
->src
[0].is_ssa
&& intrin
->src
[1].is_ssa
);
372 nir_ssa_def
*old_index
= intrin
->src
[0].ssa
;
373 nir_ssa_def
*offset
= intrin
->src
[1].ssa
;
375 nir_ssa_def
*new_index
=
376 nir_vec2(b
, nir_iadd(b
, nir_channel(b
, old_index
, 0), offset
),
377 nir_channel(b
, old_index
, 1));
379 assert(intrin
->dest
.is_ssa
);
380 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
, nir_src_for_ssa(new_index
));
381 nir_instr_remove(&intrin
->instr
);
385 lower_load_vulkan_descriptor(nir_intrinsic_instr
*intrin
,
386 struct apply_pipeline_layout_state
*state
)
388 nir_builder
*b
= &state
->builder
;
390 b
->cursor
= nir_before_instr(&intrin
->instr
);
392 /* We follow the nir_address_format_32bit_index_offset model */
393 assert(intrin
->src
[0].is_ssa
);
394 nir_ssa_def
*index
= intrin
->src
[0].ssa
;
396 assert(intrin
->dest
.is_ssa
);
397 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
, nir_src_for_ssa(index
));
398 nir_instr_remove(&intrin
->instr
);
402 lower_get_buffer_size(nir_intrinsic_instr
*intrin
,
403 struct apply_pipeline_layout_state
*state
)
405 if (_mesa_set_search(state
->lowered_instrs
, intrin
))
408 nir_builder
*b
= &state
->builder
;
410 b
->cursor
= nir_before_instr(&intrin
->instr
);
412 assert(intrin
->src
[0].is_ssa
);
413 nir_ssa_def
*index
= intrin
->src
[0].ssa
;
415 /* We're following the nir_address_format_32bit_index_offset model so the
416 * binding table index is the first component of the address. The
417 * back-end wants a scalar binding table index source.
419 nir_instr_rewrite_src(&intrin
->instr
, &intrin
->src
[0],
420 nir_src_for_ssa(nir_channel(b
, index
, 0)));
424 build_descriptor_load(nir_deref_instr
*deref
, unsigned offset
,
425 unsigned num_components
, unsigned bit_size
,
426 struct apply_pipeline_layout_state
*state
)
428 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
430 unsigned set
= var
->data
.descriptor_set
;
431 unsigned binding
= var
->data
.binding
;
432 unsigned array_size
=
433 state
->layout
->set
[set
].layout
->binding
[binding
].array_size
;
435 const struct anv_descriptor_set_binding_layout
*bind_layout
=
436 &state
->layout
->set
[set
].layout
->binding
[binding
];
438 nir_builder
*b
= &state
->builder
;
440 nir_ssa_def
*desc_buffer_index
=
441 nir_imm_int(b
, state
->set
[set
].desc_offset
);
443 nir_ssa_def
*desc_offset
=
444 nir_imm_int(b
, bind_layout
->descriptor_offset
+ offset
);
445 if (deref
->deref_type
!= nir_deref_type_var
) {
446 assert(deref
->deref_type
== nir_deref_type_array
);
448 const unsigned descriptor_size
= anv_descriptor_size(bind_layout
);
449 nir_ssa_def
*arr_index
= nir_ssa_for_src(b
, deref
->arr
.index
, 1);
450 if (state
->add_bounds_checks
)
451 arr_index
= nir_umin(b
, arr_index
, nir_imm_int(b
, array_size
- 1));
453 desc_offset
= nir_iadd(b
, desc_offset
,
454 nir_imul_imm(b
, arr_index
, descriptor_size
));
457 nir_intrinsic_instr
*desc_load
=
458 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_load_ubo
);
459 desc_load
->src
[0] = nir_src_for_ssa(desc_buffer_index
);
460 desc_load
->src
[1] = nir_src_for_ssa(desc_offset
);
461 desc_load
->num_components
= num_components
;
462 nir_ssa_dest_init(&desc_load
->instr
, &desc_load
->dest
,
463 num_components
, bit_size
, NULL
);
464 nir_builder_instr_insert(b
, &desc_load
->instr
);
466 return &desc_load
->dest
.ssa
;
470 lower_image_intrinsic(nir_intrinsic_instr
*intrin
,
471 struct apply_pipeline_layout_state
*state
)
473 nir_deref_instr
*deref
= nir_src_as_deref(intrin
->src
[0]);
475 nir_builder
*b
= &state
->builder
;
476 b
->cursor
= nir_before_instr(&intrin
->instr
);
478 if (intrin
->intrinsic
== nir_intrinsic_image_deref_load_param_intel
) {
479 b
->cursor
= nir_instr_remove(&intrin
->instr
);
481 const unsigned param
= nir_intrinsic_base(intrin
);
484 build_descriptor_load(deref
, param
* 16,
485 intrin
->dest
.ssa
.num_components
,
486 intrin
->dest
.ssa
.bit_size
, state
);
488 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
, nir_src_for_ssa(desc
));
490 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
492 unsigned set
= var
->data
.descriptor_set
;
493 unsigned binding
= var
->data
.binding
;
494 unsigned binding_offset
= state
->set
[set
].surface_offsets
[binding
];
495 unsigned array_size
=
496 state
->layout
->set
[set
].layout
->binding
[binding
].array_size
;
498 nir_ssa_def
*index
= NULL
;
499 if (deref
->deref_type
!= nir_deref_type_var
) {
500 assert(deref
->deref_type
== nir_deref_type_array
);
501 index
= nir_ssa_for_src(b
, deref
->arr
.index
, 1);
502 if (state
->add_bounds_checks
)
503 index
= nir_umin(b
, index
, nir_imm_int(b
, array_size
- 1));
505 index
= nir_imm_int(b
, 0);
508 index
= nir_iadd_imm(b
, index
, binding_offset
);
509 nir_rewrite_image_intrinsic(intrin
, index
, false);
514 lower_load_constant(nir_intrinsic_instr
*intrin
,
515 struct apply_pipeline_layout_state
*state
)
517 nir_builder
*b
= &state
->builder
;
519 b
->cursor
= nir_before_instr(&intrin
->instr
);
521 nir_ssa_def
*index
= nir_imm_int(b
, state
->constants_offset
);
522 nir_ssa_def
*offset
= nir_iadd(b
, nir_ssa_for_src(b
, intrin
->src
[0], 1),
523 nir_imm_int(b
, nir_intrinsic_base(intrin
)));
525 nir_intrinsic_instr
*load_ubo
=
526 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_load_ubo
);
527 load_ubo
->num_components
= intrin
->num_components
;
528 load_ubo
->src
[0] = nir_src_for_ssa(index
);
529 load_ubo
->src
[1] = nir_src_for_ssa(offset
);
530 nir_ssa_dest_init(&load_ubo
->instr
, &load_ubo
->dest
,
531 intrin
->dest
.ssa
.num_components
,
532 intrin
->dest
.ssa
.bit_size
, NULL
);
533 nir_builder_instr_insert(b
, &load_ubo
->instr
);
535 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
,
536 nir_src_for_ssa(&load_ubo
->dest
.ssa
));
537 nir_instr_remove(&intrin
->instr
);
541 lower_tex_deref(nir_tex_instr
*tex
, nir_tex_src_type deref_src_type
,
542 unsigned *base_index
,
543 struct apply_pipeline_layout_state
*state
)
545 int deref_src_idx
= nir_tex_instr_src_index(tex
, deref_src_type
);
546 if (deref_src_idx
< 0)
549 nir_deref_instr
*deref
= nir_src_as_deref(tex
->src
[deref_src_idx
].src
);
550 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
552 unsigned set
= var
->data
.descriptor_set
;
553 unsigned binding
= var
->data
.binding
;
554 unsigned array_size
=
555 state
->layout
->set
[set
].layout
->binding
[binding
].array_size
;
557 nir_tex_src_type offset_src_type
;
558 if (deref_src_type
== nir_tex_src_texture_deref
) {
559 offset_src_type
= nir_tex_src_texture_offset
;
560 *base_index
= state
->set
[set
].surface_offsets
[binding
];
562 assert(deref_src_type
== nir_tex_src_sampler_deref
);
563 offset_src_type
= nir_tex_src_sampler_offset
;
564 *base_index
= state
->set
[set
].sampler_offsets
[binding
];
567 nir_ssa_def
*index
= NULL
;
568 if (deref
->deref_type
!= nir_deref_type_var
) {
569 assert(deref
->deref_type
== nir_deref_type_array
);
571 if (nir_src_is_const(deref
->arr
.index
)) {
572 unsigned arr_index
= nir_src_as_uint(deref
->arr
.index
);
573 *base_index
+= MIN2(arr_index
, array_size
- 1);
575 nir_builder
*b
= &state
->builder
;
577 /* From VK_KHR_sampler_ycbcr_conversion:
579 * If sampler Y’CBCR conversion is enabled, the combined image
580 * sampler must be indexed only by constant integral expressions when
581 * aggregated into arrays in shader code, irrespective of the
582 * shaderSampledImageArrayDynamicIndexing feature.
584 assert(nir_tex_instr_src_index(tex
, nir_tex_src_plane
) == -1);
586 index
= nir_ssa_for_src(b
, deref
->arr
.index
, 1);
588 if (state
->add_bounds_checks
)
589 index
= nir_umin(b
, index
, nir_imm_int(b
, array_size
- 1));
594 nir_instr_rewrite_src(&tex
->instr
, &tex
->src
[deref_src_idx
].src
,
595 nir_src_for_ssa(index
));
596 tex
->src
[deref_src_idx
].src_type
= offset_src_type
;
598 nir_tex_instr_remove_src(tex
, deref_src_idx
);
603 tex_instr_get_and_remove_plane_src(nir_tex_instr
*tex
)
605 int plane_src_idx
= nir_tex_instr_src_index(tex
, nir_tex_src_plane
);
606 if (plane_src_idx
< 0)
609 unsigned plane
= nir_src_as_uint(tex
->src
[plane_src_idx
].src
);
611 nir_tex_instr_remove_src(tex
, plane_src_idx
);
617 lower_tex(nir_tex_instr
*tex
, struct apply_pipeline_layout_state
*state
)
619 state
->builder
.cursor
= nir_before_instr(&tex
->instr
);
621 unsigned plane
= tex_instr_get_and_remove_plane_src(tex
);
623 lower_tex_deref(tex
, nir_tex_src_texture_deref
,
624 &tex
->texture_index
, state
);
625 tex
->texture_index
+= plane
;
627 lower_tex_deref(tex
, nir_tex_src_sampler_deref
,
628 &tex
->sampler_index
, state
);
629 tex
->sampler_index
+= plane
;
631 /* The backend only ever uses this to mark used surfaces. We don't care
632 * about that little optimization so it just needs to be non-zero.
634 tex
->texture_array_size
= 1;
638 apply_pipeline_layout_block(nir_block
*block
,
639 struct apply_pipeline_layout_state
*state
)
641 nir_foreach_instr_safe(instr
, block
) {
642 switch (instr
->type
) {
643 case nir_instr_type_intrinsic
: {
644 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
645 switch (intrin
->intrinsic
) {
646 case nir_intrinsic_vulkan_resource_index
:
647 lower_res_index_intrinsic(intrin
, state
);
649 case nir_intrinsic_vulkan_resource_reindex
:
650 lower_res_reindex_intrinsic(intrin
, state
);
652 case nir_intrinsic_load_vulkan_descriptor
:
653 lower_load_vulkan_descriptor(intrin
, state
);
655 case nir_intrinsic_get_buffer_size
:
656 lower_get_buffer_size(intrin
, state
);
658 case nir_intrinsic_image_deref_load
:
659 case nir_intrinsic_image_deref_store
:
660 case nir_intrinsic_image_deref_atomic_add
:
661 case nir_intrinsic_image_deref_atomic_min
:
662 case nir_intrinsic_image_deref_atomic_max
:
663 case nir_intrinsic_image_deref_atomic_and
:
664 case nir_intrinsic_image_deref_atomic_or
:
665 case nir_intrinsic_image_deref_atomic_xor
:
666 case nir_intrinsic_image_deref_atomic_exchange
:
667 case nir_intrinsic_image_deref_atomic_comp_swap
:
668 case nir_intrinsic_image_deref_size
:
669 case nir_intrinsic_image_deref_samples
:
670 case nir_intrinsic_image_deref_load_param_intel
:
671 case nir_intrinsic_image_deref_load_raw_intel
:
672 case nir_intrinsic_image_deref_store_raw_intel
:
673 lower_image_intrinsic(intrin
, state
);
675 case nir_intrinsic_load_constant
:
676 lower_load_constant(intrin
, state
);
683 case nir_instr_type_tex
:
684 lower_tex(nir_instr_as_tex(instr
), state
);
692 struct binding_info
{
699 compare_binding_infos(const void *_a
, const void *_b
)
701 const struct binding_info
*a
= _a
, *b
= _b
;
702 if (a
->score
!= b
->score
)
703 return b
->score
- a
->score
;
705 if (a
->set
!= b
->set
)
706 return a
->set
- b
->set
;
708 return a
->binding
- b
->binding
;
712 anv_nir_apply_pipeline_layout(const struct anv_physical_device
*pdevice
,
713 bool robust_buffer_access
,
714 struct anv_pipeline_layout
*layout
,
716 struct brw_stage_prog_data
*prog_data
,
717 struct anv_pipeline_bind_map
*map
)
719 void *mem_ctx
= ralloc_context(NULL
);
721 struct apply_pipeline_layout_state state
= {
725 .add_bounds_checks
= robust_buffer_access
,
726 .lowered_instrs
= _mesa_pointer_set_create(mem_ctx
),
729 for (unsigned s
= 0; s
< layout
->num_sets
; s
++) {
730 const unsigned count
= layout
->set
[s
].layout
->binding_count
;
731 state
.set
[s
].use_count
= rzalloc_array(mem_ctx
, uint8_t, count
);
732 state
.set
[s
].surface_offsets
= rzalloc_array(mem_ctx
, uint8_t, count
);
733 state
.set
[s
].sampler_offsets
= rzalloc_array(mem_ctx
, uint8_t, count
);
736 nir_foreach_function(function
, shader
) {
740 nir_foreach_block(block
, function
->impl
)
741 get_used_bindings_block(block
, &state
);
744 for (unsigned s
= 0; s
< layout
->num_sets
; s
++) {
745 if (state
.set
[s
].desc_buffer_used
) {
746 map
->surface_to_descriptor
[map
->surface_count
] =
747 (struct anv_pipeline_binding
) {
748 .set
= ANV_DESCRIPTOR_SET_DESCRIPTORS
,
751 state
.set
[s
].desc_offset
= map
->surface_count
;
752 map
->surface_count
++;
756 if (state
.uses_constants
) {
757 state
.constants_offset
= map
->surface_count
;
758 map
->surface_to_descriptor
[map
->surface_count
].set
=
759 ANV_DESCRIPTOR_SET_SHADER_CONSTANTS
;
760 map
->surface_count
++;
763 unsigned used_binding_count
= 0;
764 for (uint32_t set
= 0; set
< layout
->num_sets
; set
++) {
765 struct anv_descriptor_set_layout
*set_layout
= layout
->set
[set
].layout
;
766 for (unsigned b
= 0; b
< set_layout
->binding_count
; b
++) {
767 if (state
.set
[set
].use_count
[b
] == 0)
770 used_binding_count
++;
774 struct binding_info
*infos
=
775 rzalloc_array(mem_ctx
, struct binding_info
, used_binding_count
);
776 used_binding_count
= 0;
777 for (uint32_t set
= 0; set
< layout
->num_sets
; set
++) {
778 struct anv_descriptor_set_layout
*set_layout
= layout
->set
[set
].layout
;
779 for (unsigned b
= 0; b
< set_layout
->binding_count
; b
++) {
780 if (state
.set
[set
].use_count
[b
] == 0)
783 struct anv_descriptor_set_binding_layout
*binding
=
784 &layout
->set
[set
].layout
->binding
[b
];
786 /* Do a fixed-point calculation to generate a score based on the
787 * number of uses and the binding array size. We shift by 7 instead
788 * of 8 because we're going to use the top bit below to make
789 * everything which does not support bindless super higher priority
790 * than things which do.
792 uint16_t score
= ((uint16_t)state
.set
[set
].use_count
[b
] << 7) /
795 /* If the descriptor type doesn't support bindless then put it at the
796 * beginning so we guarantee it gets a slot.
798 if (!anv_descriptor_supports_bindless(pdevice
, binding
, true) ||
799 !anv_descriptor_supports_bindless(pdevice
, binding
, false))
802 infos
[used_binding_count
++] = (struct binding_info
) {
810 /* Order the binding infos based on score with highest scores first. If
811 * scores are equal we then order by set and binding.
813 qsort(infos
, used_binding_count
, sizeof(struct binding_info
),
814 compare_binding_infos
);
816 for (unsigned i
= 0; i
< used_binding_count
; i
++) {
817 unsigned set
= infos
[i
].set
, b
= infos
[i
].binding
;
818 struct anv_descriptor_set_binding_layout
*binding
=
819 &layout
->set
[set
].layout
->binding
[b
];
821 const uint32_t array_size
= binding
->array_size
;
823 if (binding
->data
& ANV_DESCRIPTOR_SURFACE_STATE
) {
824 if (map
->surface_count
+ array_size
> MAX_BINDING_TABLE_SIZE
||
825 anv_descriptor_requires_bindless(pdevice
, binding
, false)) {
826 /* If this descriptor doesn't fit in the binding table or if it
827 * requires bindless for some reason, flag it as bindless.
829 assert(anv_descriptor_supports_bindless(pdevice
, binding
, false));
830 state
.set
[set
].surface_offsets
[b
] = BINDLESS_OFFSET
;
832 state
.set
[set
].surface_offsets
[b
] = map
->surface_count
;
833 struct anv_sampler
**samplers
= binding
->immutable_samplers
;
834 for (unsigned i
= 0; i
< binding
->array_size
; i
++) {
835 uint8_t planes
= samplers
? samplers
[i
]->n_planes
: 1;
836 for (uint8_t p
= 0; p
< planes
; p
++) {
837 map
->surface_to_descriptor
[map
->surface_count
++] =
838 (struct anv_pipeline_binding
) {
847 assert(map
->surface_count
<= MAX_BINDING_TABLE_SIZE
);
850 if (binding
->data
& ANV_DESCRIPTOR_SAMPLER_STATE
) {
851 if (map
->sampler_count
+ array_size
> MAX_SAMPLER_TABLE_SIZE
||
852 anv_descriptor_requires_bindless(pdevice
, binding
, true)) {
853 /* If this descriptor doesn't fit in the binding table or if it
854 * requires bindless for some reason, flag it as bindless.
856 assert(anv_descriptor_supports_bindless(pdevice
, binding
, true));
857 state
.set
[set
].sampler_offsets
[b
] = BINDLESS_OFFSET
;
859 state
.set
[set
].sampler_offsets
[b
] = map
->sampler_count
;
860 struct anv_sampler
**samplers
= binding
->immutable_samplers
;
861 for (unsigned i
= 0; i
< binding
->array_size
; i
++) {
862 uint8_t planes
= samplers
? samplers
[i
]->n_planes
: 1;
863 for (uint8_t p
= 0; p
< planes
; p
++) {
864 map
->sampler_to_descriptor
[map
->sampler_count
++] =
865 (struct anv_pipeline_binding
) {
877 nir_foreach_variable(var
, &shader
->uniforms
) {
878 const struct glsl_type
*glsl_type
= glsl_without_array(var
->type
);
880 if (!glsl_type_is_image(glsl_type
))
883 enum glsl_sampler_dim dim
= glsl_get_sampler_dim(glsl_type
);
885 const uint32_t set
= var
->data
.descriptor_set
;
886 const uint32_t binding
= var
->data
.binding
;
887 const uint32_t array_size
=
888 layout
->set
[set
].layout
->binding
[binding
].array_size
;
890 if (state
.set
[set
].use_count
[binding
] == 0)
893 if (state
.set
[set
].surface_offsets
[binding
] >= MAX_BINDING_TABLE_SIZE
)
896 struct anv_pipeline_binding
*pipe_binding
=
897 &map
->surface_to_descriptor
[state
.set
[set
].surface_offsets
[binding
]];
898 for (unsigned i
= 0; i
< array_size
; i
++) {
899 assert(pipe_binding
[i
].set
== set
);
900 assert(pipe_binding
[i
].binding
== binding
);
901 assert(pipe_binding
[i
].index
== i
);
903 if (dim
== GLSL_SAMPLER_DIM_SUBPASS
||
904 dim
== GLSL_SAMPLER_DIM_SUBPASS_MS
)
905 pipe_binding
[i
].input_attachment_index
= var
->data
.index
+ i
;
907 pipe_binding
[i
].write_only
=
908 (var
->data
.image
.access
& ACCESS_NON_READABLE
) != 0;
912 nir_foreach_function(function
, shader
) {
916 /* Before we do the normal lowering, we look for any SSBO operations
917 * that we can lower to the BTI model and lower them up-front. The BTI
918 * model can perform better than the A64 model for a couple reasons:
920 * 1. 48-bit address calculations are potentially expensive and using
921 * the BTI model lets us simply compute 32-bit offsets and the
922 * hardware adds the 64-bit surface base address.
924 * 2. The BTI messages, because they use surface states, do bounds
925 * checking for us. With the A64 model, we have to do our own
926 * bounds checking and this means wider pointers and extra
927 * calculations and branching in the shader.
929 * The solution to both of these is to convert things to the BTI model
930 * opportunistically. The reason why we need to do this as a pre-pass
931 * is for two reasons:
933 * 1. The BTI model requires nir_address_format_32bit_index_offset
934 * pointers which are not the same type as the pointers needed for
935 * the A64 model. Because all our derefs are set up for the A64
936 * model (in case we have variable pointers), we have to crawl all
937 * the way back to the vulkan_resource_index intrinsic and build a
938 * completely fresh index+offset calculation.
940 * 2. Because the variable-pointers-capable lowering that we do as part
941 * of apply_pipeline_layout_block is destructive (It really has to
942 * be to handle variable pointers properly), we've lost the deref
943 * information by the time we get to the load/store/atomic
944 * intrinsics in that pass.
946 lower_direct_buffer_access(function
->impl
, &state
);
948 nir_builder_init(&state
.builder
, function
->impl
);
949 nir_foreach_block(block
, function
->impl
)
950 apply_pipeline_layout_block(block
, &state
);
951 nir_metadata_preserve(function
->impl
, nir_metadata_block_index
|
952 nir_metadata_dominance
);
955 ralloc_free(mem_ctx
);