2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "program/prog_parameter.h"
26 #include "nir/nir_builder.h"
27 #include "compiler/brw_nir.h"
30 /* Sampler tables don't actually have a maximum size but we pick one just so
31 * that we don't end up emitting too much state on-the-fly.
33 #define MAX_SAMPLER_TABLE_SIZE 128
34 #define BINDLESS_OFFSET 255
36 struct apply_pipeline_layout_state
{
37 const struct anv_physical_device
*pdevice
;
42 struct anv_pipeline_layout
*layout
;
43 bool add_bounds_checks
;
44 nir_address_format ssbo_addr_format
;
46 /* Place to flag lowered instructions so we don't lower them twice */
47 struct set
*lowered_instrs
;
49 int dynamic_offset_uniform_start
;
52 uint8_t constants_offset
;
54 bool desc_buffer_used
;
58 uint8_t *surface_offsets
;
59 uint8_t *sampler_offsets
;
64 add_binding(struct apply_pipeline_layout_state
*state
,
65 uint32_t set
, uint32_t binding
)
67 const struct anv_descriptor_set_binding_layout
*bind_layout
=
68 &state
->layout
->set
[set
].layout
->binding
[binding
];
70 if (state
->set
[set
].use_count
[binding
] < UINT8_MAX
)
71 state
->set
[set
].use_count
[binding
]++;
73 /* Only flag the descriptor buffer as used if there's actually data for
74 * this binding. This lets us be lazy and call this function constantly
75 * without worrying about unnecessarily enabling the buffer.
77 if (anv_descriptor_size(bind_layout
))
78 state
->set
[set
].desc_buffer_used
= true;
82 add_deref_src_binding(struct apply_pipeline_layout_state
*state
, nir_src src
)
84 nir_deref_instr
*deref
= nir_src_as_deref(src
);
85 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
86 add_binding(state
, var
->data
.descriptor_set
, var
->data
.binding
);
90 add_tex_src_binding(struct apply_pipeline_layout_state
*state
,
91 nir_tex_instr
*tex
, nir_tex_src_type deref_src_type
)
93 int deref_src_idx
= nir_tex_instr_src_index(tex
, deref_src_type
);
94 if (deref_src_idx
< 0)
97 add_deref_src_binding(state
, tex
->src
[deref_src_idx
].src
);
101 get_used_bindings_block(nir_block
*block
,
102 struct apply_pipeline_layout_state
*state
)
104 nir_foreach_instr_safe(instr
, block
) {
105 switch (instr
->type
) {
106 case nir_instr_type_intrinsic
: {
107 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
108 switch (intrin
->intrinsic
) {
109 case nir_intrinsic_vulkan_resource_index
:
110 add_binding(state
, nir_intrinsic_desc_set(intrin
),
111 nir_intrinsic_binding(intrin
));
114 case nir_intrinsic_image_deref_load
:
115 case nir_intrinsic_image_deref_store
:
116 case nir_intrinsic_image_deref_atomic_add
:
117 case nir_intrinsic_image_deref_atomic_imin
:
118 case nir_intrinsic_image_deref_atomic_umin
:
119 case nir_intrinsic_image_deref_atomic_imax
:
120 case nir_intrinsic_image_deref_atomic_umax
:
121 case nir_intrinsic_image_deref_atomic_and
:
122 case nir_intrinsic_image_deref_atomic_or
:
123 case nir_intrinsic_image_deref_atomic_xor
:
124 case nir_intrinsic_image_deref_atomic_exchange
:
125 case nir_intrinsic_image_deref_atomic_comp_swap
:
126 case nir_intrinsic_image_deref_size
:
127 case nir_intrinsic_image_deref_samples
:
128 case nir_intrinsic_image_deref_load_param_intel
:
129 case nir_intrinsic_image_deref_load_raw_intel
:
130 case nir_intrinsic_image_deref_store_raw_intel
:
131 add_deref_src_binding(state
, intrin
->src
[0]);
134 case nir_intrinsic_load_constant
:
135 state
->uses_constants
= true;
143 case nir_instr_type_tex
: {
144 nir_tex_instr
*tex
= nir_instr_as_tex(instr
);
145 add_tex_src_binding(state
, tex
, nir_tex_src_texture_deref
);
146 add_tex_src_binding(state
, tex
, nir_tex_src_sampler_deref
);
156 find_descriptor_for_index_src(nir_src src
,
157 struct apply_pipeline_layout_state
*state
)
159 nir_intrinsic_instr
*intrin
= nir_src_as_intrinsic(src
);
161 while (intrin
&& intrin
->intrinsic
== nir_intrinsic_vulkan_resource_reindex
)
162 intrin
= nir_src_as_intrinsic(intrin
->src
[0]);
164 if (!intrin
|| intrin
->intrinsic
!= nir_intrinsic_vulkan_resource_index
)
167 uint32_t set
= nir_intrinsic_desc_set(intrin
);
168 uint32_t binding
= nir_intrinsic_binding(intrin
);
169 uint32_t surface_index
= state
->set
[set
].surface_offsets
[binding
];
171 /* Only lower to a BTI message if we have a valid binding table index. */
172 return surface_index
< MAX_BINDING_TABLE_SIZE
;
176 nir_deref_find_descriptor(nir_deref_instr
*deref
,
177 struct apply_pipeline_layout_state
*state
)
180 /* Nothing we will use this on has a variable */
181 assert(deref
->deref_type
!= nir_deref_type_var
);
183 nir_deref_instr
*parent
= nir_src_as_deref(deref
->parent
);
189 assert(deref
->deref_type
== nir_deref_type_cast
);
191 nir_intrinsic_instr
*intrin
= nir_src_as_intrinsic(deref
->parent
);
192 if (!intrin
|| intrin
->intrinsic
!= nir_intrinsic_load_vulkan_descriptor
)
195 return find_descriptor_for_index_src(intrin
->src
[0], state
);
199 build_index_for_res_reindex(nir_intrinsic_instr
*intrin
,
200 struct apply_pipeline_layout_state
*state
)
202 nir_builder
*b
= &state
->builder
;
204 if (intrin
->intrinsic
== nir_intrinsic_vulkan_resource_reindex
) {
206 build_index_for_res_reindex(nir_src_as_intrinsic(intrin
->src
[0]), state
);
208 b
->cursor
= nir_before_instr(&intrin
->instr
);
209 return nir_iadd(b
, bti
, nir_ssa_for_src(b
, intrin
->src
[1], 1));
212 assert(intrin
->intrinsic
== nir_intrinsic_vulkan_resource_index
);
214 uint32_t set
= nir_intrinsic_desc_set(intrin
);
215 uint32_t binding
= nir_intrinsic_binding(intrin
);
217 const struct anv_descriptor_set_binding_layout
*bind_layout
=
218 &state
->layout
->set
[set
].layout
->binding
[binding
];
220 uint32_t surface_index
= state
->set
[set
].surface_offsets
[binding
];
221 uint32_t array_size
= bind_layout
->array_size
;
223 b
->cursor
= nir_before_instr(&intrin
->instr
);
225 nir_ssa_def
*array_index
= nir_ssa_for_src(b
, intrin
->src
[0], 1);
226 if (nir_src_is_const(intrin
->src
[0]) || state
->add_bounds_checks
)
227 array_index
= nir_umin(b
, array_index
, nir_imm_int(b
, array_size
- 1));
229 return nir_iadd_imm(b
, array_index
, surface_index
);
233 build_index_offset_for_deref(nir_deref_instr
*deref
,
234 struct apply_pipeline_layout_state
*state
)
236 nir_builder
*b
= &state
->builder
;
238 nir_deref_instr
*parent
= nir_deref_instr_parent(deref
);
240 nir_ssa_def
*addr
= build_index_offset_for_deref(parent
, state
);
242 b
->cursor
= nir_before_instr(&deref
->instr
);
243 return nir_explicit_io_address_from_deref(b
, deref
, addr
,
244 nir_address_format_32bit_index_offset
);
247 nir_intrinsic_instr
*load_desc
= nir_src_as_intrinsic(deref
->parent
);
248 assert(load_desc
->intrinsic
== nir_intrinsic_load_vulkan_descriptor
);
251 build_index_for_res_reindex(nir_src_as_intrinsic(load_desc
->src
[0]), state
);
253 /* Return a 0 offset which will get picked up by the recursion */
254 b
->cursor
= nir_before_instr(&deref
->instr
);
255 return nir_vec2(b
, index
, nir_imm_int(b
, 0));
259 try_lower_direct_buffer_intrinsic(nir_intrinsic_instr
*intrin
, bool is_atomic
,
260 struct apply_pipeline_layout_state
*state
)
262 nir_builder
*b
= &state
->builder
;
264 nir_deref_instr
*deref
= nir_src_as_deref(intrin
->src
[0]);
265 if (deref
->mode
!= nir_var_mem_ssbo
)
268 /* 64-bit atomics only support A64 messages so we can't lower them to the
269 * index+offset model.
271 if (is_atomic
&& nir_dest_bit_size(intrin
->dest
) == 64)
274 /* Normal binding table-based messages can't handle non-uniform access so
275 * we have to fall back to A64.
277 if (nir_intrinsic_access(intrin
) & ACCESS_NON_UNIFORM
)
280 if (!nir_deref_find_descriptor(deref
, state
))
283 nir_ssa_def
*addr
= build_index_offset_for_deref(deref
, state
);
285 b
->cursor
= nir_before_instr(&intrin
->instr
);
286 nir_lower_explicit_io_instr(b
, intrin
, addr
,
287 nir_address_format_32bit_index_offset
);
292 lower_direct_buffer_access(nir_function_impl
*impl
,
293 struct apply_pipeline_layout_state
*state
)
295 nir_foreach_block(block
, impl
) {
296 nir_foreach_instr_safe(instr
, block
) {
297 if (instr
->type
!= nir_instr_type_intrinsic
)
300 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
301 switch (intrin
->intrinsic
) {
302 case nir_intrinsic_load_deref
:
303 case nir_intrinsic_store_deref
:
304 try_lower_direct_buffer_intrinsic(intrin
, false, state
);
306 case nir_intrinsic_deref_atomic_add
:
307 case nir_intrinsic_deref_atomic_imin
:
308 case nir_intrinsic_deref_atomic_umin
:
309 case nir_intrinsic_deref_atomic_imax
:
310 case nir_intrinsic_deref_atomic_umax
:
311 case nir_intrinsic_deref_atomic_and
:
312 case nir_intrinsic_deref_atomic_or
:
313 case nir_intrinsic_deref_atomic_xor
:
314 case nir_intrinsic_deref_atomic_exchange
:
315 case nir_intrinsic_deref_atomic_comp_swap
:
316 case nir_intrinsic_deref_atomic_fmin
:
317 case nir_intrinsic_deref_atomic_fmax
:
318 case nir_intrinsic_deref_atomic_fcomp_swap
:
319 try_lower_direct_buffer_intrinsic(intrin
, true, state
);
322 case nir_intrinsic_get_buffer_size
: {
323 /* The get_buffer_size intrinsic always just takes a
324 * index/reindex intrinsic.
326 if (!find_descriptor_for_index_src(intrin
->src
[0], state
))
330 build_index_for_res_reindex(nir_src_as_intrinsic(intrin
->src
[0]),
332 nir_instr_rewrite_src(&intrin
->instr
, &intrin
->src
[0],
333 nir_src_for_ssa(index
));
334 _mesa_set_add(state
->lowered_instrs
, intrin
);
344 static nir_address_format
345 desc_addr_format(VkDescriptorType desc_type
,
346 struct apply_pipeline_layout_state
*state
)
348 return (desc_type
== VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
||
349 desc_type
== VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC
) ?
350 state
->ssbo_addr_format
: nir_address_format_32bit_index_offset
;
354 lower_res_index_intrinsic(nir_intrinsic_instr
*intrin
,
355 struct apply_pipeline_layout_state
*state
)
357 nir_builder
*b
= &state
->builder
;
359 b
->cursor
= nir_before_instr(&intrin
->instr
);
361 uint32_t set
= nir_intrinsic_desc_set(intrin
);
362 uint32_t binding
= nir_intrinsic_binding(intrin
);
363 const VkDescriptorType desc_type
= nir_intrinsic_desc_type(intrin
);
365 const struct anv_descriptor_set_binding_layout
*bind_layout
=
366 &state
->layout
->set
[set
].layout
->binding
[binding
];
368 uint32_t surface_index
= state
->set
[set
].surface_offsets
[binding
];
369 uint32_t array_size
= bind_layout
->array_size
;
371 nir_ssa_def
*array_index
= nir_ssa_for_src(b
, intrin
->src
[0], 1);
372 if (nir_src_is_const(intrin
->src
[0]) || state
->add_bounds_checks
)
373 array_index
= nir_umin(b
, array_index
, nir_imm_int(b
, array_size
- 1));
376 if (state
->pdevice
->has_a64_buffer_access
&&
377 (desc_type
== VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
||
378 desc_type
== VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC
)) {
379 /* We store the descriptor offset as 16.8.8 where the top 16 bits are
380 * the offset into the descriptor set, the next 8 are the binding table
381 * index of the descriptor buffer, and the bottom 8 bits are the offset
382 * (in bytes) into the dynamic offset table.
384 assert(bind_layout
->dynamic_offset_index
< MAX_DYNAMIC_BUFFERS
);
385 uint32_t dynamic_offset_index
= 0xff; /* No dynamic offset */
386 if (bind_layout
->dynamic_offset_index
>= 0) {
387 dynamic_offset_index
=
388 state
->layout
->set
[set
].dynamic_offset_start
+
389 bind_layout
->dynamic_offset_index
;
392 const uint32_t desc_offset
=
393 bind_layout
->descriptor_offset
<< 16 |
394 (uint32_t)state
->set
[set
].desc_offset
<< 8 |
395 dynamic_offset_index
;
397 if (state
->add_bounds_checks
) {
398 assert(desc_addr_format(desc_type
, state
) ==
399 nir_address_format_64bit_bounded_global
);
400 assert(intrin
->dest
.ssa
.num_components
== 4);
401 assert(intrin
->dest
.ssa
.bit_size
== 32);
402 index
= nir_vec4(b
, nir_imm_int(b
, desc_offset
),
403 nir_ssa_for_src(b
, intrin
->src
[0], 1),
404 nir_imm_int(b
, array_size
- 1),
405 nir_ssa_undef(b
, 1, 32));
407 assert(desc_addr_format(desc_type
, state
) ==
408 nir_address_format_64bit_global
);
409 assert(intrin
->dest
.ssa
.num_components
== 1);
410 assert(intrin
->dest
.ssa
.bit_size
== 64);
411 index
= nir_pack_64_2x32_split(b
, nir_imm_int(b
, desc_offset
),
412 nir_ssa_for_src(b
, intrin
->src
[0], 1));
414 } else if (bind_layout
->data
& ANV_DESCRIPTOR_INLINE_UNIFORM
) {
415 /* This is an inline uniform block. Just reference the descriptor set
416 * and use the descriptor offset as the base.
418 assert(desc_addr_format(desc_type
, state
) ==
419 nir_address_format_32bit_index_offset
);
420 assert(intrin
->dest
.ssa
.num_components
== 2);
421 assert(intrin
->dest
.ssa
.bit_size
== 32);
422 index
= nir_imm_ivec2(b
, state
->set
[set
].desc_offset
,
423 bind_layout
->descriptor_offset
);
425 assert(desc_addr_format(desc_type
, state
) ==
426 nir_address_format_32bit_index_offset
);
427 assert(intrin
->dest
.ssa
.num_components
== 2);
428 assert(intrin
->dest
.ssa
.bit_size
== 32);
429 index
= nir_vec2(b
, nir_iadd_imm(b
, array_index
, surface_index
),
433 assert(intrin
->dest
.is_ssa
);
434 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
, nir_src_for_ssa(index
));
435 nir_instr_remove(&intrin
->instr
);
439 lower_res_reindex_intrinsic(nir_intrinsic_instr
*intrin
,
440 struct apply_pipeline_layout_state
*state
)
442 nir_builder
*b
= &state
->builder
;
444 b
->cursor
= nir_before_instr(&intrin
->instr
);
446 const VkDescriptorType desc_type
= nir_intrinsic_desc_type(intrin
);
448 /* For us, the resource indices are just indices into the binding table and
449 * array elements are sequential. A resource_reindex just turns into an
450 * add of the two indices.
452 assert(intrin
->src
[0].is_ssa
&& intrin
->src
[1].is_ssa
);
453 nir_ssa_def
*old_index
= intrin
->src
[0].ssa
;
454 nir_ssa_def
*offset
= intrin
->src
[1].ssa
;
456 nir_ssa_def
*new_index
;
457 switch (desc_addr_format(desc_type
, state
)) {
458 case nir_address_format_64bit_bounded_global
:
459 /* See also lower_res_index_intrinsic() */
460 assert(intrin
->dest
.ssa
.num_components
== 4);
461 assert(intrin
->dest
.ssa
.bit_size
== 32);
462 new_index
= nir_vec4(b
, nir_channel(b
, old_index
, 0),
463 nir_iadd(b
, nir_channel(b
, old_index
, 1),
465 nir_channel(b
, old_index
, 2),
466 nir_ssa_undef(b
, 1, 32));
469 case nir_address_format_64bit_global
: {
470 /* See also lower_res_index_intrinsic() */
471 assert(intrin
->dest
.ssa
.num_components
== 1);
472 assert(intrin
->dest
.ssa
.bit_size
== 64);
473 nir_ssa_def
*base
= nir_unpack_64_2x32_split_x(b
, old_index
);
474 nir_ssa_def
*arr_idx
= nir_unpack_64_2x32_split_y(b
, old_index
);
475 new_index
= nir_pack_64_2x32_split(b
, base
, nir_iadd(b
, arr_idx
, offset
));
479 case nir_address_format_32bit_index_offset
:
480 assert(intrin
->dest
.ssa
.num_components
== 2);
481 assert(intrin
->dest
.ssa
.bit_size
== 32);
482 new_index
= nir_vec2(b
, nir_iadd(b
, nir_channel(b
, old_index
, 0), offset
),
483 nir_channel(b
, old_index
, 1));
487 unreachable("Uhandled address format");
490 assert(intrin
->dest
.is_ssa
);
491 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
, nir_src_for_ssa(new_index
));
492 nir_instr_remove(&intrin
->instr
);
496 build_ssbo_descriptor_load(const VkDescriptorType desc_type
,
498 struct apply_pipeline_layout_state
*state
)
500 nir_builder
*b
= &state
->builder
;
502 nir_ssa_def
*desc_offset
, *array_index
;
503 switch (state
->ssbo_addr_format
) {
504 case nir_address_format_64bit_bounded_global
:
505 /* See also lower_res_index_intrinsic() */
506 desc_offset
= nir_channel(b
, index
, 0);
507 array_index
= nir_umin(b
, nir_channel(b
, index
, 1),
508 nir_channel(b
, index
, 2));
511 case nir_address_format_64bit_global
:
512 /* See also lower_res_index_intrinsic() */
513 desc_offset
= nir_unpack_64_2x32_split_x(b
, index
);
514 array_index
= nir_unpack_64_2x32_split_y(b
, index
);
518 unreachable("Unhandled address format for SSBO");
521 /* The desc_offset is actually 16.8.8 */
522 nir_ssa_def
*desc_buffer_index
=
523 nir_extract_u8(b
, desc_offset
, nir_imm_int(b
, 1));
524 nir_ssa_def
*desc_offset_base
=
525 nir_extract_u16(b
, desc_offset
, nir_imm_int(b
, 1));
527 /* Compute the actual descriptor offset */
528 const unsigned descriptor_size
=
529 anv_descriptor_type_size(state
->pdevice
, desc_type
);
530 desc_offset
= nir_iadd(b
, desc_offset_base
,
531 nir_imul_imm(b
, array_index
, descriptor_size
));
533 nir_intrinsic_instr
*desc_load
=
534 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_load_ubo
);
535 desc_load
->src
[0] = nir_src_for_ssa(desc_buffer_index
);
536 desc_load
->src
[1] = nir_src_for_ssa(desc_offset
);
537 desc_load
->num_components
= 4;
538 nir_ssa_dest_init(&desc_load
->instr
, &desc_load
->dest
, 4, 32, NULL
);
539 nir_builder_instr_insert(b
, &desc_load
->instr
);
541 return &desc_load
->dest
.ssa
;
545 lower_load_vulkan_descriptor(nir_intrinsic_instr
*intrin
,
546 struct apply_pipeline_layout_state
*state
)
548 nir_builder
*b
= &state
->builder
;
550 b
->cursor
= nir_before_instr(&intrin
->instr
);
552 const VkDescriptorType desc_type
= nir_intrinsic_desc_type(intrin
);
554 assert(intrin
->src
[0].is_ssa
);
555 nir_ssa_def
*index
= intrin
->src
[0].ssa
;
558 if (state
->pdevice
->has_a64_buffer_access
&&
559 (desc_type
== VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
||
560 desc_type
== VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC
)) {
561 desc
= build_ssbo_descriptor_load(desc_type
, index
, state
);
563 /* We want nir_address_format_64bit_global */
564 if (!state
->add_bounds_checks
)
565 desc
= nir_pack_64_2x32(b
, nir_channels(b
, desc
, 0x3));
567 if (state
->dynamic_offset_uniform_start
>= 0) {
568 /* This shader has dynamic offsets and we have no way of knowing
569 * (save from the dynamic offset base index) if this buffer has a
572 nir_ssa_def
*desc_offset
, *array_index
;
573 switch (state
->ssbo_addr_format
) {
574 case nir_address_format_64bit_bounded_global
:
575 /* See also lower_res_index_intrinsic() */
576 desc_offset
= nir_channel(b
, index
, 0);
577 array_index
= nir_umin(b
, nir_channel(b
, index
, 1),
578 nir_channel(b
, index
, 2));
581 case nir_address_format_64bit_global
:
582 /* See also lower_res_index_intrinsic() */
583 desc_offset
= nir_unpack_64_2x32_split_x(b
, index
);
584 array_index
= nir_unpack_64_2x32_split_y(b
, index
);
588 unreachable("Unhandled address format for SSBO");
591 nir_ssa_def
*dyn_offset_base
=
592 nir_extract_u8(b
, desc_offset
, nir_imm_int(b
, 0));
593 nir_ssa_def
*dyn_offset_idx
=
594 nir_iadd(b
, dyn_offset_base
, array_index
);
595 if (state
->add_bounds_checks
) {
596 dyn_offset_idx
= nir_umin(b
, dyn_offset_idx
,
597 nir_imm_int(b
, MAX_DYNAMIC_BUFFERS
));
600 nir_intrinsic_instr
*dyn_load
=
601 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_load_uniform
);
602 nir_intrinsic_set_base(dyn_load
, state
->dynamic_offset_uniform_start
);
603 nir_intrinsic_set_range(dyn_load
, MAX_DYNAMIC_BUFFERS
* 4);
604 dyn_load
->src
[0] = nir_src_for_ssa(nir_imul_imm(b
, dyn_offset_idx
, 4));
605 dyn_load
->num_components
= 1;
606 nir_ssa_dest_init(&dyn_load
->instr
, &dyn_load
->dest
, 1, 32, NULL
);
607 nir_builder_instr_insert(b
, &dyn_load
->instr
);
609 nir_ssa_def
*dynamic_offset
=
610 nir_bcsel(b
, nir_ieq(b
, dyn_offset_base
, nir_imm_int(b
, 0xff)),
611 nir_imm_int(b
, 0), &dyn_load
->dest
.ssa
);
613 switch (state
->ssbo_addr_format
) {
614 case nir_address_format_64bit_bounded_global
: {
615 /* The dynamic offset gets added to the base pointer so that we
616 * have a sliding window range.
618 nir_ssa_def
*base_ptr
=
619 nir_pack_64_2x32(b
, nir_channels(b
, desc
, 0x3));
620 base_ptr
= nir_iadd(b
, base_ptr
, nir_u2u64(b
, dynamic_offset
));
621 desc
= nir_vec4(b
, nir_unpack_64_2x32_split_x(b
, base_ptr
),
622 nir_unpack_64_2x32_split_y(b
, base_ptr
),
623 nir_channel(b
, desc
, 2),
624 nir_channel(b
, desc
, 3));
628 case nir_address_format_64bit_global
:
629 desc
= nir_iadd(b
, desc
, nir_u2u64(b
, dynamic_offset
));
633 unreachable("Unhandled address format for SSBO");
637 /* We follow the nir_address_format_32bit_index_offset model */
641 assert(intrin
->dest
.is_ssa
);
642 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
, nir_src_for_ssa(desc
));
643 nir_instr_remove(&intrin
->instr
);
647 lower_get_buffer_size(nir_intrinsic_instr
*intrin
,
648 struct apply_pipeline_layout_state
*state
)
650 if (_mesa_set_search(state
->lowered_instrs
, intrin
))
653 nir_builder
*b
= &state
->builder
;
655 b
->cursor
= nir_before_instr(&intrin
->instr
);
657 const VkDescriptorType desc_type
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
;
659 assert(intrin
->src
[0].is_ssa
);
660 nir_ssa_def
*index
= intrin
->src
[0].ssa
;
662 if (state
->pdevice
->has_a64_buffer_access
) {
663 nir_ssa_def
*desc
= build_ssbo_descriptor_load(desc_type
, index
, state
);
664 nir_ssa_def
*size
= nir_channel(b
, desc
, 2);
665 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
, nir_src_for_ssa(size
));
666 nir_instr_remove(&intrin
->instr
);
668 /* We're following the nir_address_format_32bit_index_offset model so
669 * the binding table index is the first component of the address. The
670 * back-end wants a scalar binding table index source.
672 nir_instr_rewrite_src(&intrin
->instr
, &intrin
->src
[0],
673 nir_src_for_ssa(nir_channel(b
, index
, 0)));
678 build_descriptor_load(nir_deref_instr
*deref
, unsigned offset
,
679 unsigned num_components
, unsigned bit_size
,
680 struct apply_pipeline_layout_state
*state
)
682 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
684 unsigned set
= var
->data
.descriptor_set
;
685 unsigned binding
= var
->data
.binding
;
686 unsigned array_size
=
687 state
->layout
->set
[set
].layout
->binding
[binding
].array_size
;
689 const struct anv_descriptor_set_binding_layout
*bind_layout
=
690 &state
->layout
->set
[set
].layout
->binding
[binding
];
692 nir_builder
*b
= &state
->builder
;
694 nir_ssa_def
*desc_buffer_index
=
695 nir_imm_int(b
, state
->set
[set
].desc_offset
);
697 nir_ssa_def
*desc_offset
=
698 nir_imm_int(b
, bind_layout
->descriptor_offset
+ offset
);
699 if (deref
->deref_type
!= nir_deref_type_var
) {
700 assert(deref
->deref_type
== nir_deref_type_array
);
702 const unsigned descriptor_size
= anv_descriptor_size(bind_layout
);
703 nir_ssa_def
*arr_index
= nir_ssa_for_src(b
, deref
->arr
.index
, 1);
704 if (state
->add_bounds_checks
)
705 arr_index
= nir_umin(b
, arr_index
, nir_imm_int(b
, array_size
- 1));
707 desc_offset
= nir_iadd(b
, desc_offset
,
708 nir_imul_imm(b
, arr_index
, descriptor_size
));
711 nir_intrinsic_instr
*desc_load
=
712 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_load_ubo
);
713 desc_load
->src
[0] = nir_src_for_ssa(desc_buffer_index
);
714 desc_load
->src
[1] = nir_src_for_ssa(desc_offset
);
715 desc_load
->num_components
= num_components
;
716 nir_ssa_dest_init(&desc_load
->instr
, &desc_load
->dest
,
717 num_components
, bit_size
, NULL
);
718 nir_builder_instr_insert(b
, &desc_load
->instr
);
720 return &desc_load
->dest
.ssa
;
724 lower_image_intrinsic(nir_intrinsic_instr
*intrin
,
725 struct apply_pipeline_layout_state
*state
)
727 nir_deref_instr
*deref
= nir_src_as_deref(intrin
->src
[0]);
728 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
730 unsigned set
= var
->data
.descriptor_set
;
731 unsigned binding
= var
->data
.binding
;
732 unsigned binding_offset
= state
->set
[set
].surface_offsets
[binding
];
734 nir_builder
*b
= &state
->builder
;
735 b
->cursor
= nir_before_instr(&intrin
->instr
);
737 ASSERTED
const bool use_bindless
= state
->pdevice
->has_bindless_images
;
739 if (intrin
->intrinsic
== nir_intrinsic_image_deref_load_param_intel
) {
740 b
->cursor
= nir_instr_remove(&intrin
->instr
);
742 assert(!use_bindless
); /* Otherwise our offsets would be wrong */
743 const unsigned param
= nir_intrinsic_base(intrin
);
746 build_descriptor_load(deref
, param
* 16,
747 intrin
->dest
.ssa
.num_components
,
748 intrin
->dest
.ssa
.bit_size
, state
);
750 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
, nir_src_for_ssa(desc
));
751 } else if (binding_offset
> MAX_BINDING_TABLE_SIZE
) {
752 const bool write_only
=
753 (var
->data
.image
.access
& ACCESS_NON_READABLE
) != 0;
755 build_descriptor_load(deref
, 0, 2, 32, state
);
756 nir_ssa_def
*handle
= nir_channel(b
, desc
, write_only
? 1 : 0);
757 nir_rewrite_image_intrinsic(intrin
, handle
, true);
759 unsigned array_size
=
760 state
->layout
->set
[set
].layout
->binding
[binding
].array_size
;
762 nir_ssa_def
*index
= NULL
;
763 if (deref
->deref_type
!= nir_deref_type_var
) {
764 assert(deref
->deref_type
== nir_deref_type_array
);
765 index
= nir_ssa_for_src(b
, deref
->arr
.index
, 1);
766 if (state
->add_bounds_checks
)
767 index
= nir_umin(b
, index
, nir_imm_int(b
, array_size
- 1));
769 index
= nir_imm_int(b
, 0);
772 index
= nir_iadd_imm(b
, index
, binding_offset
);
773 nir_rewrite_image_intrinsic(intrin
, index
, false);
778 lower_load_constant(nir_intrinsic_instr
*intrin
,
779 struct apply_pipeline_layout_state
*state
)
781 nir_builder
*b
= &state
->builder
;
783 b
->cursor
= nir_before_instr(&intrin
->instr
);
785 nir_ssa_def
*index
= nir_imm_int(b
, state
->constants_offset
);
786 nir_ssa_def
*offset
= nir_iadd(b
, nir_ssa_for_src(b
, intrin
->src
[0], 1),
787 nir_imm_int(b
, nir_intrinsic_base(intrin
)));
789 nir_intrinsic_instr
*load_ubo
=
790 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_load_ubo
);
791 load_ubo
->num_components
= intrin
->num_components
;
792 load_ubo
->src
[0] = nir_src_for_ssa(index
);
793 load_ubo
->src
[1] = nir_src_for_ssa(offset
);
794 nir_ssa_dest_init(&load_ubo
->instr
, &load_ubo
->dest
,
795 intrin
->dest
.ssa
.num_components
,
796 intrin
->dest
.ssa
.bit_size
, NULL
);
797 nir_builder_instr_insert(b
, &load_ubo
->instr
);
799 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
,
800 nir_src_for_ssa(&load_ubo
->dest
.ssa
));
801 nir_instr_remove(&intrin
->instr
);
805 lower_tex_deref(nir_tex_instr
*tex
, nir_tex_src_type deref_src_type
,
806 unsigned *base_index
, unsigned plane
,
807 struct apply_pipeline_layout_state
*state
)
809 int deref_src_idx
= nir_tex_instr_src_index(tex
, deref_src_type
);
810 if (deref_src_idx
< 0)
813 nir_deref_instr
*deref
= nir_src_as_deref(tex
->src
[deref_src_idx
].src
);
814 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
816 unsigned set
= var
->data
.descriptor_set
;
817 unsigned binding
= var
->data
.binding
;
818 unsigned array_size
=
819 state
->layout
->set
[set
].layout
->binding
[binding
].array_size
;
821 unsigned binding_offset
;
822 if (deref_src_type
== nir_tex_src_texture_deref
) {
823 binding_offset
= state
->set
[set
].surface_offsets
[binding
];
825 assert(deref_src_type
== nir_tex_src_sampler_deref
);
826 binding_offset
= state
->set
[set
].sampler_offsets
[binding
];
829 nir_builder
*b
= &state
->builder
;
831 nir_tex_src_type offset_src_type
;
832 nir_ssa_def
*index
= NULL
;
833 if (binding_offset
> MAX_BINDING_TABLE_SIZE
) {
834 const unsigned plane_offset
=
835 plane
* sizeof(struct anv_sampled_image_descriptor
);
838 build_descriptor_load(deref
, plane_offset
, 2, 32, state
);
840 if (deref_src_type
== nir_tex_src_texture_deref
) {
841 offset_src_type
= nir_tex_src_texture_handle
;
842 index
= nir_channel(b
, desc
, 0);
844 assert(deref_src_type
== nir_tex_src_sampler_deref
);
845 offset_src_type
= nir_tex_src_sampler_handle
;
846 index
= nir_channel(b
, desc
, 1);
849 if (deref_src_type
== nir_tex_src_texture_deref
) {
850 offset_src_type
= nir_tex_src_texture_offset
;
852 assert(deref_src_type
== nir_tex_src_sampler_deref
);
853 offset_src_type
= nir_tex_src_sampler_offset
;
856 *base_index
= binding_offset
+ plane
;
858 if (deref
->deref_type
!= nir_deref_type_var
) {
859 assert(deref
->deref_type
== nir_deref_type_array
);
861 if (nir_src_is_const(deref
->arr
.index
)) {
862 unsigned arr_index
= MIN2(nir_src_as_uint(deref
->arr
.index
), array_size
- 1);
863 struct anv_sampler
**immutable_samplers
=
864 state
->layout
->set
[set
].layout
->binding
[binding
].immutable_samplers
;
865 if (immutable_samplers
) {
866 /* Array of YCbCr samplers are tightly packed in the binding
867 * tables, compute the offset of an element in the array by
868 * adding the number of planes of all preceding elements.
870 unsigned desc_arr_index
= 0;
871 for (int i
= 0; i
< arr_index
; i
++)
872 desc_arr_index
+= immutable_samplers
[i
]->n_planes
;
873 *base_index
+= desc_arr_index
;
875 *base_index
+= arr_index
;
878 /* From VK_KHR_sampler_ycbcr_conversion:
880 * If sampler Y’CBCR conversion is enabled, the combined image
881 * sampler must be indexed only by constant integral expressions
882 * when aggregated into arrays in shader code, irrespective of
883 * the shaderSampledImageArrayDynamicIndexing feature.
885 assert(nir_tex_instr_src_index(tex
, nir_tex_src_plane
) == -1);
887 index
= nir_ssa_for_src(b
, deref
->arr
.index
, 1);
889 if (state
->add_bounds_checks
)
890 index
= nir_umin(b
, index
, nir_imm_int(b
, array_size
- 1));
896 nir_instr_rewrite_src(&tex
->instr
, &tex
->src
[deref_src_idx
].src
,
897 nir_src_for_ssa(index
));
898 tex
->src
[deref_src_idx
].src_type
= offset_src_type
;
900 nir_tex_instr_remove_src(tex
, deref_src_idx
);
905 tex_instr_get_and_remove_plane_src(nir_tex_instr
*tex
)
907 int plane_src_idx
= nir_tex_instr_src_index(tex
, nir_tex_src_plane
);
908 if (plane_src_idx
< 0)
911 unsigned plane
= nir_src_as_uint(tex
->src
[plane_src_idx
].src
);
913 nir_tex_instr_remove_src(tex
, plane_src_idx
);
919 build_def_array_select(nir_builder
*b
, nir_ssa_def
**srcs
, nir_ssa_def
*idx
,
920 unsigned start
, unsigned end
)
922 if (start
== end
- 1) {
925 unsigned mid
= start
+ (end
- start
) / 2;
926 return nir_bcsel(b
, nir_ilt(b
, idx
, nir_imm_int(b
, mid
)),
927 build_def_array_select(b
, srcs
, idx
, start
, mid
),
928 build_def_array_select(b
, srcs
, idx
, mid
, end
));
933 lower_gen7_tex_swizzle(nir_tex_instr
*tex
, unsigned plane
,
934 struct apply_pipeline_layout_state
*state
)
936 assert(state
->pdevice
->info
.gen
== 7 && !state
->pdevice
->info
.is_haswell
);
937 if (tex
->sampler_dim
== GLSL_SAMPLER_DIM_BUF
||
938 nir_tex_instr_is_query(tex
) ||
939 tex
->op
== nir_texop_tg4
|| /* We can't swizzle TG4 */
940 (tex
->is_shadow
&& tex
->is_new_style_shadow
))
943 int deref_src_idx
= nir_tex_instr_src_index(tex
, nir_tex_src_texture_deref
);
944 assert(deref_src_idx
>= 0);
946 nir_deref_instr
*deref
= nir_src_as_deref(tex
->src
[deref_src_idx
].src
);
947 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
949 unsigned set
= var
->data
.descriptor_set
;
950 unsigned binding
= var
->data
.binding
;
951 const struct anv_descriptor_set_binding_layout
*bind_layout
=
952 &state
->layout
->set
[set
].layout
->binding
[binding
];
954 if ((bind_layout
->data
& ANV_DESCRIPTOR_TEXTURE_SWIZZLE
) == 0)
957 nir_builder
*b
= &state
->builder
;
958 b
->cursor
= nir_before_instr(&tex
->instr
);
960 const unsigned plane_offset
=
961 plane
* sizeof(struct anv_texture_swizzle_descriptor
);
963 build_descriptor_load(deref
, plane_offset
, 1, 32, state
);
965 b
->cursor
= nir_after_instr(&tex
->instr
);
967 assert(tex
->dest
.ssa
.bit_size
== 32);
968 assert(tex
->dest
.ssa
.num_components
== 4);
970 /* Initializing to undef is ok; nir_opt_undef will clean it up. */
971 nir_ssa_def
*undef
= nir_ssa_undef(b
, 1, 32);
972 nir_ssa_def
*comps
[8];
973 for (unsigned i
= 0; i
< ARRAY_SIZE(comps
); i
++)
976 comps
[ISL_CHANNEL_SELECT_ZERO
] = nir_imm_int(b
, 0);
977 if (nir_alu_type_get_base_type(tex
->dest_type
) == nir_type_float
)
978 comps
[ISL_CHANNEL_SELECT_ONE
] = nir_imm_float(b
, 1);
980 comps
[ISL_CHANNEL_SELECT_ONE
] = nir_imm_int(b
, 1);
981 comps
[ISL_CHANNEL_SELECT_RED
] = nir_channel(b
, &tex
->dest
.ssa
, 0);
982 comps
[ISL_CHANNEL_SELECT_GREEN
] = nir_channel(b
, &tex
->dest
.ssa
, 1);
983 comps
[ISL_CHANNEL_SELECT_BLUE
] = nir_channel(b
, &tex
->dest
.ssa
, 2);
984 comps
[ISL_CHANNEL_SELECT_ALPHA
] = nir_channel(b
, &tex
->dest
.ssa
, 3);
986 nir_ssa_def
*swiz_comps
[4];
987 for (unsigned i
= 0; i
< 4; i
++) {
988 nir_ssa_def
*comp_swiz
= nir_extract_u8(b
, swiz
, nir_imm_int(b
, i
));
989 swiz_comps
[i
] = build_def_array_select(b
, comps
, comp_swiz
, 0, 8);
991 nir_ssa_def
*swiz_tex_res
= nir_vec(b
, swiz_comps
, 4);
993 /* Rewrite uses before we insert so we don't rewrite this use */
994 nir_ssa_def_rewrite_uses_after(&tex
->dest
.ssa
,
995 nir_src_for_ssa(swiz_tex_res
),
996 swiz_tex_res
->parent_instr
);
1000 lower_tex(nir_tex_instr
*tex
, struct apply_pipeline_layout_state
*state
)
1002 unsigned plane
= tex_instr_get_and_remove_plane_src(tex
);
1004 /* On Ivy Bridge and Bay Trail, we have to swizzle in the shader. Do this
1005 * before we lower the derefs away so we can still find the descriptor.
1007 if (state
->pdevice
->info
.gen
== 7 && !state
->pdevice
->info
.is_haswell
)
1008 lower_gen7_tex_swizzle(tex
, plane
, state
);
1010 state
->builder
.cursor
= nir_before_instr(&tex
->instr
);
1012 lower_tex_deref(tex
, nir_tex_src_texture_deref
,
1013 &tex
->texture_index
, plane
, state
);
1015 lower_tex_deref(tex
, nir_tex_src_sampler_deref
,
1016 &tex
->sampler_index
, plane
, state
);
1018 /* The backend only ever uses this to mark used surfaces. We don't care
1019 * about that little optimization so it just needs to be non-zero.
1021 tex
->texture_array_size
= 1;
1025 apply_pipeline_layout_block(nir_block
*block
,
1026 struct apply_pipeline_layout_state
*state
)
1028 nir_foreach_instr_safe(instr
, block
) {
1029 switch (instr
->type
) {
1030 case nir_instr_type_intrinsic
: {
1031 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
1032 switch (intrin
->intrinsic
) {
1033 case nir_intrinsic_vulkan_resource_index
:
1034 lower_res_index_intrinsic(intrin
, state
);
1036 case nir_intrinsic_vulkan_resource_reindex
:
1037 lower_res_reindex_intrinsic(intrin
, state
);
1039 case nir_intrinsic_load_vulkan_descriptor
:
1040 lower_load_vulkan_descriptor(intrin
, state
);
1042 case nir_intrinsic_get_buffer_size
:
1043 lower_get_buffer_size(intrin
, state
);
1045 case nir_intrinsic_image_deref_load
:
1046 case nir_intrinsic_image_deref_store
:
1047 case nir_intrinsic_image_deref_atomic_add
:
1048 case nir_intrinsic_image_deref_atomic_imin
:
1049 case nir_intrinsic_image_deref_atomic_umin
:
1050 case nir_intrinsic_image_deref_atomic_imax
:
1051 case nir_intrinsic_image_deref_atomic_umax
:
1052 case nir_intrinsic_image_deref_atomic_and
:
1053 case nir_intrinsic_image_deref_atomic_or
:
1054 case nir_intrinsic_image_deref_atomic_xor
:
1055 case nir_intrinsic_image_deref_atomic_exchange
:
1056 case nir_intrinsic_image_deref_atomic_comp_swap
:
1057 case nir_intrinsic_image_deref_size
:
1058 case nir_intrinsic_image_deref_samples
:
1059 case nir_intrinsic_image_deref_load_param_intel
:
1060 case nir_intrinsic_image_deref_load_raw_intel
:
1061 case nir_intrinsic_image_deref_store_raw_intel
:
1062 lower_image_intrinsic(intrin
, state
);
1064 case nir_intrinsic_load_constant
:
1065 lower_load_constant(intrin
, state
);
1072 case nir_instr_type_tex
:
1073 lower_tex(nir_instr_as_tex(instr
), state
);
1081 struct binding_info
{
1088 compare_binding_infos(const void *_a
, const void *_b
)
1090 const struct binding_info
*a
= _a
, *b
= _b
;
1091 if (a
->score
!= b
->score
)
1092 return b
->score
- a
->score
;
1094 if (a
->set
!= b
->set
)
1095 return a
->set
- b
->set
;
1097 return a
->binding
- b
->binding
;
1101 anv_nir_apply_pipeline_layout(const struct anv_physical_device
*pdevice
,
1102 bool robust_buffer_access
,
1103 struct anv_pipeline_layout
*layout
,
1105 struct brw_stage_prog_data
*prog_data
,
1106 struct anv_pipeline_bind_map
*map
)
1108 void *mem_ctx
= ralloc_context(NULL
);
1110 struct apply_pipeline_layout_state state
= {
1114 .add_bounds_checks
= robust_buffer_access
,
1115 .ssbo_addr_format
= anv_nir_ssbo_addr_format(pdevice
, robust_buffer_access
),
1116 .lowered_instrs
= _mesa_pointer_set_create(mem_ctx
),
1117 .dynamic_offset_uniform_start
= -1,
1120 for (unsigned s
= 0; s
< layout
->num_sets
; s
++) {
1121 const unsigned count
= layout
->set
[s
].layout
->binding_count
;
1122 state
.set
[s
].use_count
= rzalloc_array(mem_ctx
, uint8_t, count
);
1123 state
.set
[s
].surface_offsets
= rzalloc_array(mem_ctx
, uint8_t, count
);
1124 state
.set
[s
].sampler_offsets
= rzalloc_array(mem_ctx
, uint8_t, count
);
1127 nir_foreach_function(function
, shader
) {
1128 if (!function
->impl
)
1131 nir_foreach_block(block
, function
->impl
)
1132 get_used_bindings_block(block
, &state
);
1135 for (unsigned s
= 0; s
< layout
->num_sets
; s
++) {
1136 if (state
.set
[s
].desc_buffer_used
) {
1137 map
->surface_to_descriptor
[map
->surface_count
] =
1138 (struct anv_pipeline_binding
) {
1139 .set
= ANV_DESCRIPTOR_SET_DESCRIPTORS
,
1142 state
.set
[s
].desc_offset
= map
->surface_count
;
1143 map
->surface_count
++;
1147 if (state
.uses_constants
) {
1148 state
.constants_offset
= map
->surface_count
;
1149 map
->surface_to_descriptor
[map
->surface_count
].set
=
1150 ANV_DESCRIPTOR_SET_SHADER_CONSTANTS
;
1151 map
->surface_count
++;
1154 unsigned used_binding_count
= 0;
1155 for (uint32_t set
= 0; set
< layout
->num_sets
; set
++) {
1156 struct anv_descriptor_set_layout
*set_layout
= layout
->set
[set
].layout
;
1157 for (unsigned b
= 0; b
< set_layout
->binding_count
; b
++) {
1158 if (state
.set
[set
].use_count
[b
] == 0)
1161 used_binding_count
++;
1165 struct binding_info
*infos
=
1166 rzalloc_array(mem_ctx
, struct binding_info
, used_binding_count
);
1167 used_binding_count
= 0;
1168 for (uint32_t set
= 0; set
< layout
->num_sets
; set
++) {
1169 struct anv_descriptor_set_layout
*set_layout
= layout
->set
[set
].layout
;
1170 for (unsigned b
= 0; b
< set_layout
->binding_count
; b
++) {
1171 if (state
.set
[set
].use_count
[b
] == 0)
1174 struct anv_descriptor_set_binding_layout
*binding
=
1175 &layout
->set
[set
].layout
->binding
[b
];
1177 /* Do a fixed-point calculation to generate a score based on the
1178 * number of uses and the binding array size. We shift by 7 instead
1179 * of 8 because we're going to use the top bit below to make
1180 * everything which does not support bindless super higher priority
1181 * than things which do.
1183 uint16_t score
= ((uint16_t)state
.set
[set
].use_count
[b
] << 7) /
1184 binding
->array_size
;
1186 /* If the descriptor type doesn't support bindless then put it at the
1187 * beginning so we guarantee it gets a slot.
1189 if (!anv_descriptor_supports_bindless(pdevice
, binding
, true) ||
1190 !anv_descriptor_supports_bindless(pdevice
, binding
, false))
1193 infos
[used_binding_count
++] = (struct binding_info
) {
1201 /* Order the binding infos based on score with highest scores first. If
1202 * scores are equal we then order by set and binding.
1204 qsort(infos
, used_binding_count
, sizeof(struct binding_info
),
1205 compare_binding_infos
);
1207 bool have_dynamic_buffers
= false;
1209 for (unsigned i
= 0; i
< used_binding_count
; i
++) {
1210 unsigned set
= infos
[i
].set
, b
= infos
[i
].binding
;
1211 struct anv_descriptor_set_binding_layout
*binding
=
1212 &layout
->set
[set
].layout
->binding
[b
];
1214 if (binding
->dynamic_offset_index
>= 0)
1215 have_dynamic_buffers
= true;
1217 const uint32_t array_size
= binding
->array_size
;
1219 if (binding
->data
& ANV_DESCRIPTOR_SURFACE_STATE
) {
1220 if (map
->surface_count
+ array_size
> MAX_BINDING_TABLE_SIZE
||
1221 anv_descriptor_requires_bindless(pdevice
, binding
, false)) {
1222 /* If this descriptor doesn't fit in the binding table or if it
1223 * requires bindless for some reason, flag it as bindless.
1225 assert(anv_descriptor_supports_bindless(pdevice
, binding
, false));
1226 state
.set
[set
].surface_offsets
[b
] = BINDLESS_OFFSET
;
1228 state
.set
[set
].surface_offsets
[b
] = map
->surface_count
;
1229 struct anv_sampler
**samplers
= binding
->immutable_samplers
;
1230 for (unsigned i
= 0; i
< binding
->array_size
; i
++) {
1231 uint8_t planes
= samplers
? samplers
[i
]->n_planes
: 1;
1232 for (uint8_t p
= 0; p
< planes
; p
++) {
1233 map
->surface_to_descriptor
[map
->surface_count
++] =
1234 (struct anv_pipeline_binding
) {
1243 assert(map
->surface_count
<= MAX_BINDING_TABLE_SIZE
);
1246 if (binding
->data
& ANV_DESCRIPTOR_SAMPLER_STATE
) {
1247 if (map
->sampler_count
+ array_size
> MAX_SAMPLER_TABLE_SIZE
||
1248 anv_descriptor_requires_bindless(pdevice
, binding
, true)) {
1249 /* If this descriptor doesn't fit in the binding table or if it
1250 * requires bindless for some reason, flag it as bindless.
1252 * We also make large sampler arrays bindless because we can avoid
1253 * using indirect sends thanks to bindless samplers being packed
1254 * less tightly than the sampler table.
1256 assert(anv_descriptor_supports_bindless(pdevice
, binding
, true));
1257 state
.set
[set
].sampler_offsets
[b
] = BINDLESS_OFFSET
;
1259 state
.set
[set
].sampler_offsets
[b
] = map
->sampler_count
;
1260 struct anv_sampler
**samplers
= binding
->immutable_samplers
;
1261 for (unsigned i
= 0; i
< binding
->array_size
; i
++) {
1262 uint8_t planes
= samplers
? samplers
[i
]->n_planes
: 1;
1263 for (uint8_t p
= 0; p
< planes
; p
++) {
1264 map
->sampler_to_descriptor
[map
->sampler_count
++] =
1265 (struct anv_pipeline_binding
) {
1277 if (have_dynamic_buffers
) {
1278 state
.dynamic_offset_uniform_start
= shader
->num_uniforms
;
1279 uint32_t *param
= brw_stage_prog_data_add_params(prog_data
,
1280 MAX_DYNAMIC_BUFFERS
);
1281 for (unsigned i
= 0; i
< MAX_DYNAMIC_BUFFERS
; i
++)
1282 param
[i
] = ANV_PARAM_DYN_OFFSET(i
);
1283 shader
->num_uniforms
+= MAX_DYNAMIC_BUFFERS
* 4;
1284 assert(shader
->num_uniforms
== prog_data
->nr_params
* 4);
1287 nir_foreach_variable(var
, &shader
->uniforms
) {
1288 const struct glsl_type
*glsl_type
= glsl_without_array(var
->type
);
1290 if (!glsl_type_is_image(glsl_type
))
1293 enum glsl_sampler_dim dim
= glsl_get_sampler_dim(glsl_type
);
1295 const uint32_t set
= var
->data
.descriptor_set
;
1296 const uint32_t binding
= var
->data
.binding
;
1297 const uint32_t array_size
=
1298 layout
->set
[set
].layout
->binding
[binding
].array_size
;
1300 if (state
.set
[set
].use_count
[binding
] == 0)
1303 if (state
.set
[set
].surface_offsets
[binding
] >= MAX_BINDING_TABLE_SIZE
)
1306 struct anv_pipeline_binding
*pipe_binding
=
1307 &map
->surface_to_descriptor
[state
.set
[set
].surface_offsets
[binding
]];
1308 for (unsigned i
= 0; i
< array_size
; i
++) {
1309 assert(pipe_binding
[i
].set
== set
);
1310 assert(pipe_binding
[i
].binding
== binding
);
1311 assert(pipe_binding
[i
].index
== i
);
1313 if (dim
== GLSL_SAMPLER_DIM_SUBPASS
||
1314 dim
== GLSL_SAMPLER_DIM_SUBPASS_MS
)
1315 pipe_binding
[i
].input_attachment_index
= var
->data
.index
+ i
;
1317 pipe_binding
[i
].write_only
=
1318 (var
->data
.image
.access
& ACCESS_NON_READABLE
) != 0;
1322 nir_foreach_function(function
, shader
) {
1323 if (!function
->impl
)
1326 /* Before we do the normal lowering, we look for any SSBO operations
1327 * that we can lower to the BTI model and lower them up-front. The BTI
1328 * model can perform better than the A64 model for a couple reasons:
1330 * 1. 48-bit address calculations are potentially expensive and using
1331 * the BTI model lets us simply compute 32-bit offsets and the
1332 * hardware adds the 64-bit surface base address.
1334 * 2. The BTI messages, because they use surface states, do bounds
1335 * checking for us. With the A64 model, we have to do our own
1336 * bounds checking and this means wider pointers and extra
1337 * calculations and branching in the shader.
1339 * The solution to both of these is to convert things to the BTI model
1340 * opportunistically. The reason why we need to do this as a pre-pass
1341 * is for two reasons:
1343 * 1. The BTI model requires nir_address_format_32bit_index_offset
1344 * pointers which are not the same type as the pointers needed for
1345 * the A64 model. Because all our derefs are set up for the A64
1346 * model (in case we have variable pointers), we have to crawl all
1347 * the way back to the vulkan_resource_index intrinsic and build a
1348 * completely fresh index+offset calculation.
1350 * 2. Because the variable-pointers-capable lowering that we do as part
1351 * of apply_pipeline_layout_block is destructive (It really has to
1352 * be to handle variable pointers properly), we've lost the deref
1353 * information by the time we get to the load/store/atomic
1354 * intrinsics in that pass.
1356 lower_direct_buffer_access(function
->impl
, &state
);
1358 nir_builder_init(&state
.builder
, function
->impl
);
1359 nir_foreach_block(block
, function
->impl
)
1360 apply_pipeline_layout_block(block
, &state
);
1361 nir_metadata_preserve(function
->impl
, nir_metadata_block_index
|
1362 nir_metadata_dominance
);
1365 ralloc_free(mem_ctx
);