2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "program/prog_parameter.h"
26 #include "nir/nir_builder.h"
27 #include "compiler/brw_nir.h"
28 #include "util/mesa-sha1.h"
31 /* Sampler tables don't actually have a maximum size but we pick one just so
32 * that we don't end up emitting too much state on-the-fly.
34 #define MAX_SAMPLER_TABLE_SIZE 128
35 #define BINDLESS_OFFSET 255
37 struct apply_pipeline_layout_state
{
38 const struct anv_physical_device
*pdevice
;
43 const struct anv_pipeline_layout
*layout
;
44 bool add_bounds_checks
;
45 nir_address_format ssbo_addr_format
;
47 /* Place to flag lowered instructions so we don't lower them twice */
48 struct set
*lowered_instrs
;
51 bool has_dynamic_buffers
;
52 uint8_t constants_offset
;
54 bool desc_buffer_used
;
58 uint8_t *surface_offsets
;
59 uint8_t *sampler_offsets
;
64 add_binding(struct apply_pipeline_layout_state
*state
,
65 uint32_t set
, uint32_t binding
)
67 const struct anv_descriptor_set_binding_layout
*bind_layout
=
68 &state
->layout
->set
[set
].layout
->binding
[binding
];
70 if (state
->set
[set
].use_count
[binding
] < UINT8_MAX
)
71 state
->set
[set
].use_count
[binding
]++;
73 /* Only flag the descriptor buffer as used if there's actually data for
74 * this binding. This lets us be lazy and call this function constantly
75 * without worrying about unnecessarily enabling the buffer.
77 if (anv_descriptor_size(bind_layout
))
78 state
->set
[set
].desc_buffer_used
= true;
82 add_deref_src_binding(struct apply_pipeline_layout_state
*state
, nir_src src
)
84 nir_deref_instr
*deref
= nir_src_as_deref(src
);
85 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
86 add_binding(state
, var
->data
.descriptor_set
, var
->data
.binding
);
90 add_tex_src_binding(struct apply_pipeline_layout_state
*state
,
91 nir_tex_instr
*tex
, nir_tex_src_type deref_src_type
)
93 int deref_src_idx
= nir_tex_instr_src_index(tex
, deref_src_type
);
94 if (deref_src_idx
< 0)
97 add_deref_src_binding(state
, tex
->src
[deref_src_idx
].src
);
101 get_used_bindings_block(nir_block
*block
,
102 struct apply_pipeline_layout_state
*state
)
104 nir_foreach_instr_safe(instr
, block
) {
105 switch (instr
->type
) {
106 case nir_instr_type_intrinsic
: {
107 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
108 switch (intrin
->intrinsic
) {
109 case nir_intrinsic_vulkan_resource_index
:
110 add_binding(state
, nir_intrinsic_desc_set(intrin
),
111 nir_intrinsic_binding(intrin
));
114 case nir_intrinsic_image_deref_load
:
115 case nir_intrinsic_image_deref_store
:
116 case nir_intrinsic_image_deref_atomic_add
:
117 case nir_intrinsic_image_deref_atomic_imin
:
118 case nir_intrinsic_image_deref_atomic_umin
:
119 case nir_intrinsic_image_deref_atomic_imax
:
120 case nir_intrinsic_image_deref_atomic_umax
:
121 case nir_intrinsic_image_deref_atomic_and
:
122 case nir_intrinsic_image_deref_atomic_or
:
123 case nir_intrinsic_image_deref_atomic_xor
:
124 case nir_intrinsic_image_deref_atomic_exchange
:
125 case nir_intrinsic_image_deref_atomic_comp_swap
:
126 case nir_intrinsic_image_deref_size
:
127 case nir_intrinsic_image_deref_samples
:
128 case nir_intrinsic_image_deref_load_param_intel
:
129 case nir_intrinsic_image_deref_load_raw_intel
:
130 case nir_intrinsic_image_deref_store_raw_intel
:
131 add_deref_src_binding(state
, intrin
->src
[0]);
134 case nir_intrinsic_load_constant
:
135 state
->uses_constants
= true;
143 case nir_instr_type_tex
: {
144 nir_tex_instr
*tex
= nir_instr_as_tex(instr
);
145 add_tex_src_binding(state
, tex
, nir_tex_src_texture_deref
);
146 add_tex_src_binding(state
, tex
, nir_tex_src_sampler_deref
);
156 find_descriptor_for_index_src(nir_src src
,
157 struct apply_pipeline_layout_state
*state
)
159 nir_intrinsic_instr
*intrin
= nir_src_as_intrinsic(src
);
161 while (intrin
&& intrin
->intrinsic
== nir_intrinsic_vulkan_resource_reindex
)
162 intrin
= nir_src_as_intrinsic(intrin
->src
[0]);
164 if (!intrin
|| intrin
->intrinsic
!= nir_intrinsic_vulkan_resource_index
)
167 uint32_t set
= nir_intrinsic_desc_set(intrin
);
168 uint32_t binding
= nir_intrinsic_binding(intrin
);
169 uint32_t surface_index
= state
->set
[set
].surface_offsets
[binding
];
171 /* Only lower to a BTI message if we have a valid binding table index. */
172 return surface_index
< MAX_BINDING_TABLE_SIZE
;
176 nir_deref_find_descriptor(nir_deref_instr
*deref
,
177 struct apply_pipeline_layout_state
*state
)
180 /* Nothing we will use this on has a variable */
181 assert(deref
->deref_type
!= nir_deref_type_var
);
183 nir_deref_instr
*parent
= nir_src_as_deref(deref
->parent
);
189 assert(deref
->deref_type
== nir_deref_type_cast
);
191 nir_intrinsic_instr
*intrin
= nir_src_as_intrinsic(deref
->parent
);
192 if (!intrin
|| intrin
->intrinsic
!= nir_intrinsic_load_vulkan_descriptor
)
195 return find_descriptor_for_index_src(intrin
->src
[0], state
);
199 build_index_for_res_reindex(nir_intrinsic_instr
*intrin
,
200 struct apply_pipeline_layout_state
*state
)
202 nir_builder
*b
= &state
->builder
;
204 if (intrin
->intrinsic
== nir_intrinsic_vulkan_resource_reindex
) {
206 build_index_for_res_reindex(nir_src_as_intrinsic(intrin
->src
[0]), state
);
208 b
->cursor
= nir_before_instr(&intrin
->instr
);
209 return nir_iadd(b
, bti
, nir_ssa_for_src(b
, intrin
->src
[1], 1));
212 assert(intrin
->intrinsic
== nir_intrinsic_vulkan_resource_index
);
214 uint32_t set
= nir_intrinsic_desc_set(intrin
);
215 uint32_t binding
= nir_intrinsic_binding(intrin
);
217 const struct anv_descriptor_set_binding_layout
*bind_layout
=
218 &state
->layout
->set
[set
].layout
->binding
[binding
];
220 uint32_t surface_index
= state
->set
[set
].surface_offsets
[binding
];
221 uint32_t array_size
= bind_layout
->array_size
;
223 b
->cursor
= nir_before_instr(&intrin
->instr
);
225 nir_ssa_def
*array_index
= nir_ssa_for_src(b
, intrin
->src
[0], 1);
226 if (nir_src_is_const(intrin
->src
[0]) || state
->add_bounds_checks
)
227 array_index
= nir_umin(b
, array_index
, nir_imm_int(b
, array_size
- 1));
229 return nir_iadd_imm(b
, array_index
, surface_index
);
233 build_index_offset_for_deref(nir_deref_instr
*deref
,
234 struct apply_pipeline_layout_state
*state
)
236 nir_builder
*b
= &state
->builder
;
238 nir_deref_instr
*parent
= nir_deref_instr_parent(deref
);
240 nir_ssa_def
*addr
= build_index_offset_for_deref(parent
, state
);
242 b
->cursor
= nir_before_instr(&deref
->instr
);
243 return nir_explicit_io_address_from_deref(b
, deref
, addr
,
244 nir_address_format_32bit_index_offset
);
247 nir_intrinsic_instr
*load_desc
= nir_src_as_intrinsic(deref
->parent
);
248 assert(load_desc
->intrinsic
== nir_intrinsic_load_vulkan_descriptor
);
251 build_index_for_res_reindex(nir_src_as_intrinsic(load_desc
->src
[0]), state
);
253 /* Return a 0 offset which will get picked up by the recursion */
254 b
->cursor
= nir_before_instr(&deref
->instr
);
255 return nir_vec2(b
, index
, nir_imm_int(b
, 0));
259 try_lower_direct_buffer_intrinsic(nir_intrinsic_instr
*intrin
, bool is_atomic
,
260 struct apply_pipeline_layout_state
*state
)
262 nir_builder
*b
= &state
->builder
;
264 nir_deref_instr
*deref
= nir_src_as_deref(intrin
->src
[0]);
265 if (deref
->mode
!= nir_var_mem_ssbo
)
268 /* 64-bit atomics only support A64 messages so we can't lower them to the
269 * index+offset model.
271 if (is_atomic
&& nir_dest_bit_size(intrin
->dest
) == 64)
274 /* Normal binding table-based messages can't handle non-uniform access so
275 * we have to fall back to A64.
277 if (nir_intrinsic_access(intrin
) & ACCESS_NON_UNIFORM
)
280 if (!nir_deref_find_descriptor(deref
, state
))
283 nir_ssa_def
*addr
= build_index_offset_for_deref(deref
, state
);
285 b
->cursor
= nir_before_instr(&intrin
->instr
);
286 nir_lower_explicit_io_instr(b
, intrin
, addr
,
287 nir_address_format_32bit_index_offset
);
292 lower_direct_buffer_access(nir_function_impl
*impl
,
293 struct apply_pipeline_layout_state
*state
)
295 nir_foreach_block(block
, impl
) {
296 nir_foreach_instr_safe(instr
, block
) {
297 if (instr
->type
!= nir_instr_type_intrinsic
)
300 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
301 switch (intrin
->intrinsic
) {
302 case nir_intrinsic_load_deref
:
303 case nir_intrinsic_store_deref
:
304 try_lower_direct_buffer_intrinsic(intrin
, false, state
);
306 case nir_intrinsic_deref_atomic_add
:
307 case nir_intrinsic_deref_atomic_imin
:
308 case nir_intrinsic_deref_atomic_umin
:
309 case nir_intrinsic_deref_atomic_imax
:
310 case nir_intrinsic_deref_atomic_umax
:
311 case nir_intrinsic_deref_atomic_and
:
312 case nir_intrinsic_deref_atomic_or
:
313 case nir_intrinsic_deref_atomic_xor
:
314 case nir_intrinsic_deref_atomic_exchange
:
315 case nir_intrinsic_deref_atomic_comp_swap
:
316 case nir_intrinsic_deref_atomic_fmin
:
317 case nir_intrinsic_deref_atomic_fmax
:
318 case nir_intrinsic_deref_atomic_fcomp_swap
:
319 try_lower_direct_buffer_intrinsic(intrin
, true, state
);
322 case nir_intrinsic_get_buffer_size
: {
323 /* The get_buffer_size intrinsic always just takes a
324 * index/reindex intrinsic.
326 if (!find_descriptor_for_index_src(intrin
->src
[0], state
))
330 build_index_for_res_reindex(nir_src_as_intrinsic(intrin
->src
[0]),
332 nir_instr_rewrite_src(&intrin
->instr
, &intrin
->src
[0],
333 nir_src_for_ssa(index
));
334 _mesa_set_add(state
->lowered_instrs
, intrin
);
344 static nir_address_format
345 desc_addr_format(VkDescriptorType desc_type
,
346 struct apply_pipeline_layout_state
*state
)
348 return (desc_type
== VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
||
349 desc_type
== VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC
) ?
350 state
->ssbo_addr_format
: nir_address_format_32bit_index_offset
;
354 lower_res_index_intrinsic(nir_intrinsic_instr
*intrin
,
355 struct apply_pipeline_layout_state
*state
)
357 nir_builder
*b
= &state
->builder
;
359 b
->cursor
= nir_before_instr(&intrin
->instr
);
361 uint32_t set
= nir_intrinsic_desc_set(intrin
);
362 uint32_t binding
= nir_intrinsic_binding(intrin
);
363 const VkDescriptorType desc_type
= nir_intrinsic_desc_type(intrin
);
365 const struct anv_descriptor_set_binding_layout
*bind_layout
=
366 &state
->layout
->set
[set
].layout
->binding
[binding
];
368 uint32_t surface_index
= state
->set
[set
].surface_offsets
[binding
];
369 uint32_t array_size
= bind_layout
->array_size
;
371 nir_ssa_def
*array_index
= nir_ssa_for_src(b
, intrin
->src
[0], 1);
372 if (nir_src_is_const(intrin
->src
[0]) || state
->add_bounds_checks
)
373 array_index
= nir_umin(b
, array_index
, nir_imm_int(b
, array_size
- 1));
376 if (state
->pdevice
->has_a64_buffer_access
&&
377 (desc_type
== VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
||
378 desc_type
== VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC
)) {
379 /* We store the descriptor offset as 16.8.8 where the top 16 bits are
380 * the offset into the descriptor set, the next 8 are the binding table
381 * index of the descriptor buffer, and the bottom 8 bits are the offset
382 * (in bytes) into the dynamic offset table.
384 assert(bind_layout
->dynamic_offset_index
< MAX_DYNAMIC_BUFFERS
);
385 uint32_t dynamic_offset_index
= 0xff; /* No dynamic offset */
386 if (bind_layout
->dynamic_offset_index
>= 0) {
387 dynamic_offset_index
=
388 state
->layout
->set
[set
].dynamic_offset_start
+
389 bind_layout
->dynamic_offset_index
;
392 const uint32_t desc_offset
=
393 bind_layout
->descriptor_offset
<< 16 |
394 (uint32_t)state
->set
[set
].desc_offset
<< 8 |
395 dynamic_offset_index
;
397 if (state
->add_bounds_checks
) {
398 assert(desc_addr_format(desc_type
, state
) ==
399 nir_address_format_64bit_bounded_global
);
400 assert(intrin
->dest
.ssa
.num_components
== 4);
401 assert(intrin
->dest
.ssa
.bit_size
== 32);
402 index
= nir_vec4(b
, nir_imm_int(b
, desc_offset
),
403 nir_ssa_for_src(b
, intrin
->src
[0], 1),
404 nir_imm_int(b
, array_size
- 1),
405 nir_ssa_undef(b
, 1, 32));
407 assert(desc_addr_format(desc_type
, state
) ==
408 nir_address_format_64bit_global
);
409 assert(intrin
->dest
.ssa
.num_components
== 1);
410 assert(intrin
->dest
.ssa
.bit_size
== 64);
411 index
= nir_pack_64_2x32_split(b
, nir_imm_int(b
, desc_offset
),
412 nir_ssa_for_src(b
, intrin
->src
[0], 1));
414 } else if (bind_layout
->data
& ANV_DESCRIPTOR_INLINE_UNIFORM
) {
415 /* This is an inline uniform block. Just reference the descriptor set
416 * and use the descriptor offset as the base.
418 assert(desc_addr_format(desc_type
, state
) ==
419 nir_address_format_32bit_index_offset
);
420 assert(intrin
->dest
.ssa
.num_components
== 2);
421 assert(intrin
->dest
.ssa
.bit_size
== 32);
422 index
= nir_imm_ivec2(b
, state
->set
[set
].desc_offset
,
423 bind_layout
->descriptor_offset
);
425 assert(desc_addr_format(desc_type
, state
) ==
426 nir_address_format_32bit_index_offset
);
427 assert(intrin
->dest
.ssa
.num_components
== 2);
428 assert(intrin
->dest
.ssa
.bit_size
== 32);
429 index
= nir_vec2(b
, nir_iadd_imm(b
, array_index
, surface_index
),
433 assert(intrin
->dest
.is_ssa
);
434 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
, nir_src_for_ssa(index
));
435 nir_instr_remove(&intrin
->instr
);
439 lower_res_reindex_intrinsic(nir_intrinsic_instr
*intrin
,
440 struct apply_pipeline_layout_state
*state
)
442 nir_builder
*b
= &state
->builder
;
444 b
->cursor
= nir_before_instr(&intrin
->instr
);
446 const VkDescriptorType desc_type
= nir_intrinsic_desc_type(intrin
);
448 /* For us, the resource indices are just indices into the binding table and
449 * array elements are sequential. A resource_reindex just turns into an
450 * add of the two indices.
452 assert(intrin
->src
[0].is_ssa
&& intrin
->src
[1].is_ssa
);
453 nir_ssa_def
*old_index
= intrin
->src
[0].ssa
;
454 nir_ssa_def
*offset
= intrin
->src
[1].ssa
;
456 nir_ssa_def
*new_index
;
457 switch (desc_addr_format(desc_type
, state
)) {
458 case nir_address_format_64bit_bounded_global
:
459 /* See also lower_res_index_intrinsic() */
460 assert(intrin
->dest
.ssa
.num_components
== 4);
461 assert(intrin
->dest
.ssa
.bit_size
== 32);
462 new_index
= nir_vec4(b
, nir_channel(b
, old_index
, 0),
463 nir_iadd(b
, nir_channel(b
, old_index
, 1),
465 nir_channel(b
, old_index
, 2),
466 nir_ssa_undef(b
, 1, 32));
469 case nir_address_format_64bit_global
: {
470 /* See also lower_res_index_intrinsic() */
471 assert(intrin
->dest
.ssa
.num_components
== 1);
472 assert(intrin
->dest
.ssa
.bit_size
== 64);
473 nir_ssa_def
*base
= nir_unpack_64_2x32_split_x(b
, old_index
);
474 nir_ssa_def
*arr_idx
= nir_unpack_64_2x32_split_y(b
, old_index
);
475 new_index
= nir_pack_64_2x32_split(b
, base
, nir_iadd(b
, arr_idx
, offset
));
479 case nir_address_format_32bit_index_offset
:
480 assert(intrin
->dest
.ssa
.num_components
== 2);
481 assert(intrin
->dest
.ssa
.bit_size
== 32);
482 new_index
= nir_vec2(b
, nir_iadd(b
, nir_channel(b
, old_index
, 0), offset
),
483 nir_channel(b
, old_index
, 1));
487 unreachable("Uhandled address format");
490 assert(intrin
->dest
.is_ssa
);
491 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
, nir_src_for_ssa(new_index
));
492 nir_instr_remove(&intrin
->instr
);
496 build_ssbo_descriptor_load(const VkDescriptorType desc_type
,
498 struct apply_pipeline_layout_state
*state
)
500 nir_builder
*b
= &state
->builder
;
502 nir_ssa_def
*desc_offset
, *array_index
;
503 switch (state
->ssbo_addr_format
) {
504 case nir_address_format_64bit_bounded_global
:
505 /* See also lower_res_index_intrinsic() */
506 desc_offset
= nir_channel(b
, index
, 0);
507 array_index
= nir_umin(b
, nir_channel(b
, index
, 1),
508 nir_channel(b
, index
, 2));
511 case nir_address_format_64bit_global
:
512 /* See also lower_res_index_intrinsic() */
513 desc_offset
= nir_unpack_64_2x32_split_x(b
, index
);
514 array_index
= nir_unpack_64_2x32_split_y(b
, index
);
518 unreachable("Unhandled address format for SSBO");
521 /* The desc_offset is actually 16.8.8 */
522 nir_ssa_def
*desc_buffer_index
=
523 nir_extract_u8(b
, desc_offset
, nir_imm_int(b
, 1));
524 nir_ssa_def
*desc_offset_base
=
525 nir_extract_u16(b
, desc_offset
, nir_imm_int(b
, 1));
527 /* Compute the actual descriptor offset */
528 const unsigned descriptor_size
=
529 anv_descriptor_type_size(state
->pdevice
, desc_type
);
530 desc_offset
= nir_iadd(b
, desc_offset_base
,
531 nir_imul_imm(b
, array_index
, descriptor_size
));
533 nir_intrinsic_instr
*desc_load
=
534 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_load_ubo
);
535 desc_load
->src
[0] = nir_src_for_ssa(desc_buffer_index
);
536 desc_load
->src
[1] = nir_src_for_ssa(desc_offset
);
537 nir_intrinsic_set_align(desc_load
, 8, 0);
538 desc_load
->num_components
= 4;
539 nir_ssa_dest_init(&desc_load
->instr
, &desc_load
->dest
, 4, 32, NULL
);
540 nir_builder_instr_insert(b
, &desc_load
->instr
);
542 return &desc_load
->dest
.ssa
;
546 lower_load_vulkan_descriptor(nir_intrinsic_instr
*intrin
,
547 struct apply_pipeline_layout_state
*state
)
549 nir_builder
*b
= &state
->builder
;
551 b
->cursor
= nir_before_instr(&intrin
->instr
);
553 const VkDescriptorType desc_type
= nir_intrinsic_desc_type(intrin
);
555 assert(intrin
->src
[0].is_ssa
);
556 nir_ssa_def
*index
= intrin
->src
[0].ssa
;
559 if (state
->pdevice
->has_a64_buffer_access
&&
560 (desc_type
== VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
||
561 desc_type
== VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC
)) {
562 desc
= build_ssbo_descriptor_load(desc_type
, index
, state
);
564 /* We want nir_address_format_64bit_global */
565 if (!state
->add_bounds_checks
)
566 desc
= nir_pack_64_2x32(b
, nir_channels(b
, desc
, 0x3));
568 if (state
->has_dynamic_buffers
) {
569 /* This shader has dynamic offsets and we have no way of knowing
570 * (save from the dynamic offset base index) if this buffer has a
573 nir_ssa_def
*desc_offset
, *array_index
;
574 switch (state
->ssbo_addr_format
) {
575 case nir_address_format_64bit_bounded_global
:
576 /* See also lower_res_index_intrinsic() */
577 desc_offset
= nir_channel(b
, index
, 0);
578 array_index
= nir_umin(b
, nir_channel(b
, index
, 1),
579 nir_channel(b
, index
, 2));
582 case nir_address_format_64bit_global
:
583 /* See also lower_res_index_intrinsic() */
584 desc_offset
= nir_unpack_64_2x32_split_x(b
, index
);
585 array_index
= nir_unpack_64_2x32_split_y(b
, index
);
589 unreachable("Unhandled address format for SSBO");
592 nir_ssa_def
*dyn_offset_base
=
593 nir_extract_u8(b
, desc_offset
, nir_imm_int(b
, 0));
594 nir_ssa_def
*dyn_offset_idx
=
595 nir_iadd(b
, dyn_offset_base
, array_index
);
596 if (state
->add_bounds_checks
) {
597 dyn_offset_idx
= nir_umin(b
, dyn_offset_idx
,
598 nir_imm_int(b
, MAX_DYNAMIC_BUFFERS
));
601 nir_intrinsic_instr
*dyn_load
=
602 nir_intrinsic_instr_create(b
->shader
,
603 nir_intrinsic_load_push_constant
);
604 nir_intrinsic_set_base(dyn_load
, offsetof(struct anv_push_constants
,
606 nir_intrinsic_set_range(dyn_load
, MAX_DYNAMIC_BUFFERS
* 4);
607 dyn_load
->src
[0] = nir_src_for_ssa(nir_imul_imm(b
, dyn_offset_idx
, 4));
608 dyn_load
->num_components
= 1;
609 nir_ssa_dest_init(&dyn_load
->instr
, &dyn_load
->dest
, 1, 32, NULL
);
610 nir_builder_instr_insert(b
, &dyn_load
->instr
);
612 nir_ssa_def
*dynamic_offset
=
613 nir_bcsel(b
, nir_ieq(b
, dyn_offset_base
, nir_imm_int(b
, 0xff)),
614 nir_imm_int(b
, 0), &dyn_load
->dest
.ssa
);
616 switch (state
->ssbo_addr_format
) {
617 case nir_address_format_64bit_bounded_global
: {
618 /* The dynamic offset gets added to the base pointer so that we
619 * have a sliding window range.
621 nir_ssa_def
*base_ptr
=
622 nir_pack_64_2x32(b
, nir_channels(b
, desc
, 0x3));
623 base_ptr
= nir_iadd(b
, base_ptr
, nir_u2u64(b
, dynamic_offset
));
624 desc
= nir_vec4(b
, nir_unpack_64_2x32_split_x(b
, base_ptr
),
625 nir_unpack_64_2x32_split_y(b
, base_ptr
),
626 nir_channel(b
, desc
, 2),
627 nir_channel(b
, desc
, 3));
631 case nir_address_format_64bit_global
:
632 desc
= nir_iadd(b
, desc
, nir_u2u64(b
, dynamic_offset
));
636 unreachable("Unhandled address format for SSBO");
640 /* We follow the nir_address_format_32bit_index_offset model */
644 assert(intrin
->dest
.is_ssa
);
645 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
, nir_src_for_ssa(desc
));
646 nir_instr_remove(&intrin
->instr
);
650 lower_get_buffer_size(nir_intrinsic_instr
*intrin
,
651 struct apply_pipeline_layout_state
*state
)
653 if (_mesa_set_search(state
->lowered_instrs
, intrin
))
656 nir_builder
*b
= &state
->builder
;
658 b
->cursor
= nir_before_instr(&intrin
->instr
);
660 const VkDescriptorType desc_type
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
;
662 assert(intrin
->src
[0].is_ssa
);
663 nir_ssa_def
*index
= intrin
->src
[0].ssa
;
665 if (state
->pdevice
->has_a64_buffer_access
) {
666 nir_ssa_def
*desc
= build_ssbo_descriptor_load(desc_type
, index
, state
);
667 nir_ssa_def
*size
= nir_channel(b
, desc
, 2);
668 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
, nir_src_for_ssa(size
));
669 nir_instr_remove(&intrin
->instr
);
671 /* We're following the nir_address_format_32bit_index_offset model so
672 * the binding table index is the first component of the address. The
673 * back-end wants a scalar binding table index source.
675 nir_instr_rewrite_src(&intrin
->instr
, &intrin
->src
[0],
676 nir_src_for_ssa(nir_channel(b
, index
, 0)));
681 build_descriptor_load(nir_deref_instr
*deref
, unsigned offset
,
682 unsigned num_components
, unsigned bit_size
,
683 struct apply_pipeline_layout_state
*state
)
685 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
687 unsigned set
= var
->data
.descriptor_set
;
688 unsigned binding
= var
->data
.binding
;
689 unsigned array_size
=
690 state
->layout
->set
[set
].layout
->binding
[binding
].array_size
;
692 const struct anv_descriptor_set_binding_layout
*bind_layout
=
693 &state
->layout
->set
[set
].layout
->binding
[binding
];
695 nir_builder
*b
= &state
->builder
;
697 nir_ssa_def
*desc_buffer_index
=
698 nir_imm_int(b
, state
->set
[set
].desc_offset
);
700 nir_ssa_def
*desc_offset
=
701 nir_imm_int(b
, bind_layout
->descriptor_offset
+ offset
);
702 if (deref
->deref_type
!= nir_deref_type_var
) {
703 assert(deref
->deref_type
== nir_deref_type_array
);
705 const unsigned descriptor_size
= anv_descriptor_size(bind_layout
);
706 nir_ssa_def
*arr_index
= nir_ssa_for_src(b
, deref
->arr
.index
, 1);
707 if (state
->add_bounds_checks
)
708 arr_index
= nir_umin(b
, arr_index
, nir_imm_int(b
, array_size
- 1));
710 desc_offset
= nir_iadd(b
, desc_offset
,
711 nir_imul_imm(b
, arr_index
, descriptor_size
));
714 nir_intrinsic_instr
*desc_load
=
715 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_load_ubo
);
716 desc_load
->src
[0] = nir_src_for_ssa(desc_buffer_index
);
717 desc_load
->src
[1] = nir_src_for_ssa(desc_offset
);
718 nir_intrinsic_set_align(desc_load
, 8, offset
% 8);
719 desc_load
->num_components
= num_components
;
720 nir_ssa_dest_init(&desc_load
->instr
, &desc_load
->dest
,
721 num_components
, bit_size
, NULL
);
722 nir_builder_instr_insert(b
, &desc_load
->instr
);
724 return &desc_load
->dest
.ssa
;
728 lower_image_intrinsic(nir_intrinsic_instr
*intrin
,
729 struct apply_pipeline_layout_state
*state
)
731 nir_deref_instr
*deref
= nir_src_as_deref(intrin
->src
[0]);
732 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
734 unsigned set
= var
->data
.descriptor_set
;
735 unsigned binding
= var
->data
.binding
;
736 unsigned binding_offset
= state
->set
[set
].surface_offsets
[binding
];
738 nir_builder
*b
= &state
->builder
;
739 b
->cursor
= nir_before_instr(&intrin
->instr
);
741 ASSERTED
const bool use_bindless
= state
->pdevice
->has_bindless_images
;
743 if (intrin
->intrinsic
== nir_intrinsic_image_deref_load_param_intel
) {
744 b
->cursor
= nir_instr_remove(&intrin
->instr
);
746 assert(!use_bindless
); /* Otherwise our offsets would be wrong */
747 const unsigned param
= nir_intrinsic_base(intrin
);
750 build_descriptor_load(deref
, param
* 16,
751 intrin
->dest
.ssa
.num_components
,
752 intrin
->dest
.ssa
.bit_size
, state
);
754 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
, nir_src_for_ssa(desc
));
755 } else if (binding_offset
> MAX_BINDING_TABLE_SIZE
) {
756 const bool write_only
=
757 (var
->data
.access
& ACCESS_NON_READABLE
) != 0;
759 build_descriptor_load(deref
, 0, 2, 32, state
);
760 nir_ssa_def
*handle
= nir_channel(b
, desc
, write_only
? 1 : 0);
761 nir_rewrite_image_intrinsic(intrin
, handle
, true);
763 unsigned array_size
=
764 state
->layout
->set
[set
].layout
->binding
[binding
].array_size
;
766 nir_ssa_def
*index
= NULL
;
767 if (deref
->deref_type
!= nir_deref_type_var
) {
768 assert(deref
->deref_type
== nir_deref_type_array
);
769 index
= nir_ssa_for_src(b
, deref
->arr
.index
, 1);
770 if (state
->add_bounds_checks
)
771 index
= nir_umin(b
, index
, nir_imm_int(b
, array_size
- 1));
773 index
= nir_imm_int(b
, 0);
776 index
= nir_iadd_imm(b
, index
, binding_offset
);
777 nir_rewrite_image_intrinsic(intrin
, index
, false);
782 lower_load_constant(nir_intrinsic_instr
*intrin
,
783 struct apply_pipeline_layout_state
*state
)
785 nir_builder
*b
= &state
->builder
;
787 b
->cursor
= nir_before_instr(&intrin
->instr
);
789 /* Any constant-offset load_constant instructions should have been removed
790 * by constant folding.
792 assert(!nir_src_is_const(intrin
->src
[0]));
794 nir_ssa_def
*index
= nir_imm_int(b
, state
->constants_offset
);
795 nir_ssa_def
*offset
= nir_iadd(b
, nir_ssa_for_src(b
, intrin
->src
[0], 1),
796 nir_imm_int(b
, nir_intrinsic_base(intrin
)));
798 nir_intrinsic_instr
*load_ubo
=
799 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_load_ubo
);
800 load_ubo
->num_components
= intrin
->num_components
;
801 load_ubo
->src
[0] = nir_src_for_ssa(index
);
802 load_ubo
->src
[1] = nir_src_for_ssa(offset
);
803 nir_intrinsic_set_align(load_ubo
, intrin
->dest
.ssa
.bit_size
/ 8, 0);
804 nir_ssa_dest_init(&load_ubo
->instr
, &load_ubo
->dest
,
805 intrin
->dest
.ssa
.num_components
,
806 intrin
->dest
.ssa
.bit_size
, NULL
);
807 nir_builder_instr_insert(b
, &load_ubo
->instr
);
809 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
,
810 nir_src_for_ssa(&load_ubo
->dest
.ssa
));
811 nir_instr_remove(&intrin
->instr
);
815 lower_tex_deref(nir_tex_instr
*tex
, nir_tex_src_type deref_src_type
,
816 unsigned *base_index
, unsigned plane
,
817 struct apply_pipeline_layout_state
*state
)
819 int deref_src_idx
= nir_tex_instr_src_index(tex
, deref_src_type
);
820 if (deref_src_idx
< 0)
823 nir_deref_instr
*deref
= nir_src_as_deref(tex
->src
[deref_src_idx
].src
);
824 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
826 unsigned set
= var
->data
.descriptor_set
;
827 unsigned binding
= var
->data
.binding
;
828 unsigned array_size
=
829 state
->layout
->set
[set
].layout
->binding
[binding
].array_size
;
831 unsigned binding_offset
;
832 if (deref_src_type
== nir_tex_src_texture_deref
) {
833 binding_offset
= state
->set
[set
].surface_offsets
[binding
];
835 assert(deref_src_type
== nir_tex_src_sampler_deref
);
836 binding_offset
= state
->set
[set
].sampler_offsets
[binding
];
839 nir_builder
*b
= &state
->builder
;
841 nir_tex_src_type offset_src_type
;
842 nir_ssa_def
*index
= NULL
;
843 if (binding_offset
> MAX_BINDING_TABLE_SIZE
) {
844 const unsigned plane_offset
=
845 plane
* sizeof(struct anv_sampled_image_descriptor
);
848 build_descriptor_load(deref
, plane_offset
, 2, 32, state
);
850 if (deref_src_type
== nir_tex_src_texture_deref
) {
851 offset_src_type
= nir_tex_src_texture_handle
;
852 index
= nir_channel(b
, desc
, 0);
854 assert(deref_src_type
== nir_tex_src_sampler_deref
);
855 offset_src_type
= nir_tex_src_sampler_handle
;
856 index
= nir_channel(b
, desc
, 1);
859 if (deref_src_type
== nir_tex_src_texture_deref
) {
860 offset_src_type
= nir_tex_src_texture_offset
;
862 assert(deref_src_type
== nir_tex_src_sampler_deref
);
863 offset_src_type
= nir_tex_src_sampler_offset
;
866 *base_index
= binding_offset
+ plane
;
868 if (deref
->deref_type
!= nir_deref_type_var
) {
869 assert(deref
->deref_type
== nir_deref_type_array
);
871 if (nir_src_is_const(deref
->arr
.index
)) {
872 unsigned arr_index
= MIN2(nir_src_as_uint(deref
->arr
.index
), array_size
- 1);
873 struct anv_sampler
**immutable_samplers
=
874 state
->layout
->set
[set
].layout
->binding
[binding
].immutable_samplers
;
875 if (immutable_samplers
) {
876 /* Array of YCbCr samplers are tightly packed in the binding
877 * tables, compute the offset of an element in the array by
878 * adding the number of planes of all preceding elements.
880 unsigned desc_arr_index
= 0;
881 for (int i
= 0; i
< arr_index
; i
++)
882 desc_arr_index
+= immutable_samplers
[i
]->n_planes
;
883 *base_index
+= desc_arr_index
;
885 *base_index
+= arr_index
;
888 /* From VK_KHR_sampler_ycbcr_conversion:
890 * If sampler Y’CBCR conversion is enabled, the combined image
891 * sampler must be indexed only by constant integral expressions
892 * when aggregated into arrays in shader code, irrespective of
893 * the shaderSampledImageArrayDynamicIndexing feature.
895 assert(nir_tex_instr_src_index(tex
, nir_tex_src_plane
) == -1);
897 index
= nir_ssa_for_src(b
, deref
->arr
.index
, 1);
899 if (state
->add_bounds_checks
)
900 index
= nir_umin(b
, index
, nir_imm_int(b
, array_size
- 1));
906 nir_instr_rewrite_src(&tex
->instr
, &tex
->src
[deref_src_idx
].src
,
907 nir_src_for_ssa(index
));
908 tex
->src
[deref_src_idx
].src_type
= offset_src_type
;
910 nir_tex_instr_remove_src(tex
, deref_src_idx
);
915 tex_instr_get_and_remove_plane_src(nir_tex_instr
*tex
)
917 int plane_src_idx
= nir_tex_instr_src_index(tex
, nir_tex_src_plane
);
918 if (plane_src_idx
< 0)
921 unsigned plane
= nir_src_as_uint(tex
->src
[plane_src_idx
].src
);
923 nir_tex_instr_remove_src(tex
, plane_src_idx
);
929 build_def_array_select(nir_builder
*b
, nir_ssa_def
**srcs
, nir_ssa_def
*idx
,
930 unsigned start
, unsigned end
)
932 if (start
== end
- 1) {
935 unsigned mid
= start
+ (end
- start
) / 2;
936 return nir_bcsel(b
, nir_ilt(b
, idx
, nir_imm_int(b
, mid
)),
937 build_def_array_select(b
, srcs
, idx
, start
, mid
),
938 build_def_array_select(b
, srcs
, idx
, mid
, end
));
943 lower_gen7_tex_swizzle(nir_tex_instr
*tex
, unsigned plane
,
944 struct apply_pipeline_layout_state
*state
)
946 assert(state
->pdevice
->info
.gen
== 7 && !state
->pdevice
->info
.is_haswell
);
947 if (tex
->sampler_dim
== GLSL_SAMPLER_DIM_BUF
||
948 nir_tex_instr_is_query(tex
) ||
949 tex
->op
== nir_texop_tg4
|| /* We can't swizzle TG4 */
950 (tex
->is_shadow
&& tex
->is_new_style_shadow
))
953 int deref_src_idx
= nir_tex_instr_src_index(tex
, nir_tex_src_texture_deref
);
954 assert(deref_src_idx
>= 0);
956 nir_deref_instr
*deref
= nir_src_as_deref(tex
->src
[deref_src_idx
].src
);
957 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
959 unsigned set
= var
->data
.descriptor_set
;
960 unsigned binding
= var
->data
.binding
;
961 const struct anv_descriptor_set_binding_layout
*bind_layout
=
962 &state
->layout
->set
[set
].layout
->binding
[binding
];
964 if ((bind_layout
->data
& ANV_DESCRIPTOR_TEXTURE_SWIZZLE
) == 0)
967 nir_builder
*b
= &state
->builder
;
968 b
->cursor
= nir_before_instr(&tex
->instr
);
970 const unsigned plane_offset
=
971 plane
* sizeof(struct anv_texture_swizzle_descriptor
);
973 build_descriptor_load(deref
, plane_offset
, 1, 32, state
);
975 b
->cursor
= nir_after_instr(&tex
->instr
);
977 assert(tex
->dest
.ssa
.bit_size
== 32);
978 assert(tex
->dest
.ssa
.num_components
== 4);
980 /* Initializing to undef is ok; nir_opt_undef will clean it up. */
981 nir_ssa_def
*undef
= nir_ssa_undef(b
, 1, 32);
982 nir_ssa_def
*comps
[8];
983 for (unsigned i
= 0; i
< ARRAY_SIZE(comps
); i
++)
986 comps
[ISL_CHANNEL_SELECT_ZERO
] = nir_imm_int(b
, 0);
987 if (nir_alu_type_get_base_type(tex
->dest_type
) == nir_type_float
)
988 comps
[ISL_CHANNEL_SELECT_ONE
] = nir_imm_float(b
, 1);
990 comps
[ISL_CHANNEL_SELECT_ONE
] = nir_imm_int(b
, 1);
991 comps
[ISL_CHANNEL_SELECT_RED
] = nir_channel(b
, &tex
->dest
.ssa
, 0);
992 comps
[ISL_CHANNEL_SELECT_GREEN
] = nir_channel(b
, &tex
->dest
.ssa
, 1);
993 comps
[ISL_CHANNEL_SELECT_BLUE
] = nir_channel(b
, &tex
->dest
.ssa
, 2);
994 comps
[ISL_CHANNEL_SELECT_ALPHA
] = nir_channel(b
, &tex
->dest
.ssa
, 3);
996 nir_ssa_def
*swiz_comps
[4];
997 for (unsigned i
= 0; i
< 4; i
++) {
998 nir_ssa_def
*comp_swiz
= nir_extract_u8(b
, swiz
, nir_imm_int(b
, i
));
999 swiz_comps
[i
] = build_def_array_select(b
, comps
, comp_swiz
, 0, 8);
1001 nir_ssa_def
*swiz_tex_res
= nir_vec(b
, swiz_comps
, 4);
1003 /* Rewrite uses before we insert so we don't rewrite this use */
1004 nir_ssa_def_rewrite_uses_after(&tex
->dest
.ssa
,
1005 nir_src_for_ssa(swiz_tex_res
),
1006 swiz_tex_res
->parent_instr
);
1010 lower_tex(nir_tex_instr
*tex
, struct apply_pipeline_layout_state
*state
)
1012 unsigned plane
= tex_instr_get_and_remove_plane_src(tex
);
1014 /* On Ivy Bridge and Bay Trail, we have to swizzle in the shader. Do this
1015 * before we lower the derefs away so we can still find the descriptor.
1017 if (state
->pdevice
->info
.gen
== 7 && !state
->pdevice
->info
.is_haswell
)
1018 lower_gen7_tex_swizzle(tex
, plane
, state
);
1020 state
->builder
.cursor
= nir_before_instr(&tex
->instr
);
1022 lower_tex_deref(tex
, nir_tex_src_texture_deref
,
1023 &tex
->texture_index
, plane
, state
);
1025 lower_tex_deref(tex
, nir_tex_src_sampler_deref
,
1026 &tex
->sampler_index
, plane
, state
);
1030 apply_pipeline_layout_block(nir_block
*block
,
1031 struct apply_pipeline_layout_state
*state
)
1033 nir_foreach_instr_safe(instr
, block
) {
1034 switch (instr
->type
) {
1035 case nir_instr_type_intrinsic
: {
1036 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
1037 switch (intrin
->intrinsic
) {
1038 case nir_intrinsic_vulkan_resource_index
:
1039 lower_res_index_intrinsic(intrin
, state
);
1041 case nir_intrinsic_vulkan_resource_reindex
:
1042 lower_res_reindex_intrinsic(intrin
, state
);
1044 case nir_intrinsic_load_vulkan_descriptor
:
1045 lower_load_vulkan_descriptor(intrin
, state
);
1047 case nir_intrinsic_get_buffer_size
:
1048 lower_get_buffer_size(intrin
, state
);
1050 case nir_intrinsic_image_deref_load
:
1051 case nir_intrinsic_image_deref_store
:
1052 case nir_intrinsic_image_deref_atomic_add
:
1053 case nir_intrinsic_image_deref_atomic_imin
:
1054 case nir_intrinsic_image_deref_atomic_umin
:
1055 case nir_intrinsic_image_deref_atomic_imax
:
1056 case nir_intrinsic_image_deref_atomic_umax
:
1057 case nir_intrinsic_image_deref_atomic_and
:
1058 case nir_intrinsic_image_deref_atomic_or
:
1059 case nir_intrinsic_image_deref_atomic_xor
:
1060 case nir_intrinsic_image_deref_atomic_exchange
:
1061 case nir_intrinsic_image_deref_atomic_comp_swap
:
1062 case nir_intrinsic_image_deref_size
:
1063 case nir_intrinsic_image_deref_samples
:
1064 case nir_intrinsic_image_deref_load_param_intel
:
1065 case nir_intrinsic_image_deref_load_raw_intel
:
1066 case nir_intrinsic_image_deref_store_raw_intel
:
1067 lower_image_intrinsic(intrin
, state
);
1069 case nir_intrinsic_load_constant
:
1070 lower_load_constant(intrin
, state
);
1077 case nir_instr_type_tex
:
1078 lower_tex(nir_instr_as_tex(instr
), state
);
1086 struct binding_info
{
1093 compare_binding_infos(const void *_a
, const void *_b
)
1095 const struct binding_info
*a
= _a
, *b
= _b
;
1096 if (a
->score
!= b
->score
)
1097 return b
->score
- a
->score
;
1099 if (a
->set
!= b
->set
)
1100 return a
->set
- b
->set
;
1102 return a
->binding
- b
->binding
;
1106 anv_nir_apply_pipeline_layout(const struct anv_physical_device
*pdevice
,
1107 bool robust_buffer_access
,
1108 const struct anv_pipeline_layout
*layout
,
1110 struct anv_pipeline_bind_map
*map
)
1112 void *mem_ctx
= ralloc_context(NULL
);
1114 struct apply_pipeline_layout_state state
= {
1118 .add_bounds_checks
= robust_buffer_access
,
1119 .ssbo_addr_format
= anv_nir_ssbo_addr_format(pdevice
, robust_buffer_access
),
1120 .lowered_instrs
= _mesa_pointer_set_create(mem_ctx
),
1123 for (unsigned s
= 0; s
< layout
->num_sets
; s
++) {
1124 const unsigned count
= layout
->set
[s
].layout
->binding_count
;
1125 state
.set
[s
].use_count
= rzalloc_array(mem_ctx
, uint8_t, count
);
1126 state
.set
[s
].surface_offsets
= rzalloc_array(mem_ctx
, uint8_t, count
);
1127 state
.set
[s
].sampler_offsets
= rzalloc_array(mem_ctx
, uint8_t, count
);
1130 nir_foreach_function(function
, shader
) {
1131 if (!function
->impl
)
1134 nir_foreach_block(block
, function
->impl
)
1135 get_used_bindings_block(block
, &state
);
1138 for (unsigned s
= 0; s
< layout
->num_sets
; s
++) {
1139 if (state
.set
[s
].desc_buffer_used
) {
1140 map
->surface_to_descriptor
[map
->surface_count
] =
1141 (struct anv_pipeline_binding
) {
1142 .set
= ANV_DESCRIPTOR_SET_DESCRIPTORS
,
1145 state
.set
[s
].desc_offset
= map
->surface_count
;
1146 map
->surface_count
++;
1150 if (state
.uses_constants
) {
1151 state
.constants_offset
= map
->surface_count
;
1152 map
->surface_to_descriptor
[map
->surface_count
].set
=
1153 ANV_DESCRIPTOR_SET_SHADER_CONSTANTS
;
1154 map
->surface_count
++;
1157 unsigned used_binding_count
= 0;
1158 for (uint32_t set
= 0; set
< layout
->num_sets
; set
++) {
1159 struct anv_descriptor_set_layout
*set_layout
= layout
->set
[set
].layout
;
1160 for (unsigned b
= 0; b
< set_layout
->binding_count
; b
++) {
1161 if (state
.set
[set
].use_count
[b
] == 0)
1164 used_binding_count
++;
1168 struct binding_info
*infos
=
1169 rzalloc_array(mem_ctx
, struct binding_info
, used_binding_count
);
1170 used_binding_count
= 0;
1171 for (uint32_t set
= 0; set
< layout
->num_sets
; set
++) {
1172 const struct anv_descriptor_set_layout
*set_layout
= layout
->set
[set
].layout
;
1173 for (unsigned b
= 0; b
< set_layout
->binding_count
; b
++) {
1174 if (state
.set
[set
].use_count
[b
] == 0)
1177 const struct anv_descriptor_set_binding_layout
*binding
=
1178 &layout
->set
[set
].layout
->binding
[b
];
1180 /* Do a fixed-point calculation to generate a score based on the
1181 * number of uses and the binding array size. We shift by 7 instead
1182 * of 8 because we're going to use the top bit below to make
1183 * everything which does not support bindless super higher priority
1184 * than things which do.
1186 uint16_t score
= ((uint16_t)state
.set
[set
].use_count
[b
] << 7) /
1187 binding
->array_size
;
1189 /* If the descriptor type doesn't support bindless then put it at the
1190 * beginning so we guarantee it gets a slot.
1192 if (!anv_descriptor_supports_bindless(pdevice
, binding
, true) ||
1193 !anv_descriptor_supports_bindless(pdevice
, binding
, false))
1196 infos
[used_binding_count
++] = (struct binding_info
) {
1204 /* Order the binding infos based on score with highest scores first. If
1205 * scores are equal we then order by set and binding.
1207 qsort(infos
, used_binding_count
, sizeof(struct binding_info
),
1208 compare_binding_infos
);
1210 for (unsigned i
= 0; i
< used_binding_count
; i
++) {
1211 unsigned set
= infos
[i
].set
, b
= infos
[i
].binding
;
1212 const struct anv_descriptor_set_binding_layout
*binding
=
1213 &layout
->set
[set
].layout
->binding
[b
];
1215 const uint32_t array_size
= binding
->array_size
;
1217 if (binding
->dynamic_offset_index
>= 0)
1218 state
.has_dynamic_buffers
= true;
1220 if (binding
->data
& ANV_DESCRIPTOR_SURFACE_STATE
) {
1221 if (map
->surface_count
+ array_size
> MAX_BINDING_TABLE_SIZE
||
1222 anv_descriptor_requires_bindless(pdevice
, binding
, false)) {
1223 /* If this descriptor doesn't fit in the binding table or if it
1224 * requires bindless for some reason, flag it as bindless.
1226 assert(anv_descriptor_supports_bindless(pdevice
, binding
, false));
1227 state
.set
[set
].surface_offsets
[b
] = BINDLESS_OFFSET
;
1229 state
.set
[set
].surface_offsets
[b
] = map
->surface_count
;
1230 if (binding
->dynamic_offset_index
< 0) {
1231 struct anv_sampler
**samplers
= binding
->immutable_samplers
;
1232 for (unsigned i
= 0; i
< binding
->array_size
; i
++) {
1233 uint8_t planes
= samplers
? samplers
[i
]->n_planes
: 1;
1234 for (uint8_t p
= 0; p
< planes
; p
++) {
1235 map
->surface_to_descriptor
[map
->surface_count
++] =
1236 (struct anv_pipeline_binding
) {
1238 .index
= binding
->descriptor_index
+ i
,
1244 for (unsigned i
= 0; i
< binding
->array_size
; i
++) {
1245 map
->surface_to_descriptor
[map
->surface_count
++] =
1246 (struct anv_pipeline_binding
) {
1248 .index
= binding
->descriptor_index
+ i
,
1249 .dynamic_offset_index
=
1250 layout
->set
[set
].dynamic_offset_start
+
1251 binding
->dynamic_offset_index
+ i
,
1256 assert(map
->surface_count
<= MAX_BINDING_TABLE_SIZE
);
1259 if (binding
->data
& ANV_DESCRIPTOR_SAMPLER_STATE
) {
1260 if (map
->sampler_count
+ array_size
> MAX_SAMPLER_TABLE_SIZE
||
1261 anv_descriptor_requires_bindless(pdevice
, binding
, true)) {
1262 /* If this descriptor doesn't fit in the binding table or if it
1263 * requires bindless for some reason, flag it as bindless.
1265 * We also make large sampler arrays bindless because we can avoid
1266 * using indirect sends thanks to bindless samplers being packed
1267 * less tightly than the sampler table.
1269 assert(anv_descriptor_supports_bindless(pdevice
, binding
, true));
1270 state
.set
[set
].sampler_offsets
[b
] = BINDLESS_OFFSET
;
1272 state
.set
[set
].sampler_offsets
[b
] = map
->sampler_count
;
1273 struct anv_sampler
**samplers
= binding
->immutable_samplers
;
1274 for (unsigned i
= 0; i
< binding
->array_size
; i
++) {
1275 uint8_t planes
= samplers
? samplers
[i
]->n_planes
: 1;
1276 for (uint8_t p
= 0; p
< planes
; p
++) {
1277 map
->sampler_to_descriptor
[map
->sampler_count
++] =
1278 (struct anv_pipeline_binding
) {
1280 .index
= binding
->descriptor_index
+ i
,
1289 nir_foreach_variable(var
, &shader
->uniforms
) {
1290 const struct glsl_type
*glsl_type
= glsl_without_array(var
->type
);
1292 if (!glsl_type_is_image(glsl_type
))
1295 enum glsl_sampler_dim dim
= glsl_get_sampler_dim(glsl_type
);
1297 const uint32_t set
= var
->data
.descriptor_set
;
1298 const uint32_t binding
= var
->data
.binding
;
1299 const struct anv_descriptor_set_binding_layout
*bind_layout
=
1300 &layout
->set
[set
].layout
->binding
[binding
];
1301 const uint32_t array_size
= bind_layout
->array_size
;
1303 if (state
.set
[set
].use_count
[binding
] == 0)
1306 if (state
.set
[set
].surface_offsets
[binding
] >= MAX_BINDING_TABLE_SIZE
)
1309 struct anv_pipeline_binding
*pipe_binding
=
1310 &map
->surface_to_descriptor
[state
.set
[set
].surface_offsets
[binding
]];
1311 for (unsigned i
= 0; i
< array_size
; i
++) {
1312 assert(pipe_binding
[i
].set
== set
);
1313 assert(pipe_binding
[i
].index
== bind_layout
->descriptor_index
+ i
);
1315 if (dim
== GLSL_SAMPLER_DIM_SUBPASS
||
1316 dim
== GLSL_SAMPLER_DIM_SUBPASS_MS
)
1317 pipe_binding
[i
].input_attachment_index
= var
->data
.index
+ i
;
1319 /* NOTE: This is a uint8_t so we really do need to != 0 here */
1320 pipe_binding
[i
].write_only
=
1321 (var
->data
.access
& ACCESS_NON_READABLE
) != 0;
1325 nir_foreach_function(function
, shader
) {
1326 if (!function
->impl
)
1329 nir_builder_init(&state
.builder
, function
->impl
);
1331 /* Before we do the normal lowering, we look for any SSBO operations
1332 * that we can lower to the BTI model and lower them up-front. The BTI
1333 * model can perform better than the A64 model for a couple reasons:
1335 * 1. 48-bit address calculations are potentially expensive and using
1336 * the BTI model lets us simply compute 32-bit offsets and the
1337 * hardware adds the 64-bit surface base address.
1339 * 2. The BTI messages, because they use surface states, do bounds
1340 * checking for us. With the A64 model, we have to do our own
1341 * bounds checking and this means wider pointers and extra
1342 * calculations and branching in the shader.
1344 * The solution to both of these is to convert things to the BTI model
1345 * opportunistically. The reason why we need to do this as a pre-pass
1346 * is for two reasons:
1348 * 1. The BTI model requires nir_address_format_32bit_index_offset
1349 * pointers which are not the same type as the pointers needed for
1350 * the A64 model. Because all our derefs are set up for the A64
1351 * model (in case we have variable pointers), we have to crawl all
1352 * the way back to the vulkan_resource_index intrinsic and build a
1353 * completely fresh index+offset calculation.
1355 * 2. Because the variable-pointers-capable lowering that we do as part
1356 * of apply_pipeline_layout_block is destructive (It really has to
1357 * be to handle variable pointers properly), we've lost the deref
1358 * information by the time we get to the load/store/atomic
1359 * intrinsics in that pass.
1361 lower_direct_buffer_access(function
->impl
, &state
);
1363 nir_foreach_block(block
, function
->impl
)
1364 apply_pipeline_layout_block(block
, &state
);
1365 nir_metadata_preserve(function
->impl
, nir_metadata_block_index
|
1366 nir_metadata_dominance
);
1369 ralloc_free(mem_ctx
);
1371 /* Now that we're done computing the surface and sampler portions of the
1372 * bind map, hash them. This lets us quickly determine if the actual
1373 * mapping has changed and not just a no-op pipeline change.
1375 _mesa_sha1_compute(map
->surface_to_descriptor
,
1376 map
->surface_count
* sizeof(struct anv_pipeline_binding
),
1378 _mesa_sha1_compute(map
->sampler_to_descriptor
,
1379 map
->sampler_count
* sizeof(struct anv_pipeline_binding
),