2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "program/prog_parameter.h"
26 #include "nir/nir_builder.h"
27 #include "compiler/brw_nir.h"
30 /* Sampler tables don't actually have a maximum size but we pick one just so
31 * that we don't end up emitting too much state on-the-fly.
33 #define MAX_SAMPLER_TABLE_SIZE 128
34 #define BINDLESS_OFFSET 255
36 struct apply_pipeline_layout_state
{
37 const struct anv_physical_device
*pdevice
;
42 struct anv_pipeline_layout
*layout
;
43 bool add_bounds_checks
;
44 nir_address_format ssbo_addr_format
;
46 /* Place to flag lowered instructions so we don't lower them twice */
47 struct set
*lowered_instrs
;
49 int dynamic_offset_uniform_start
;
52 uint8_t constants_offset
;
54 bool desc_buffer_used
;
58 uint8_t *surface_offsets
;
59 uint8_t *sampler_offsets
;
64 add_binding(struct apply_pipeline_layout_state
*state
,
65 uint32_t set
, uint32_t binding
)
67 const struct anv_descriptor_set_binding_layout
*bind_layout
=
68 &state
->layout
->set
[set
].layout
->binding
[binding
];
70 if (state
->set
[set
].use_count
[binding
] < UINT8_MAX
)
71 state
->set
[set
].use_count
[binding
]++;
73 /* Only flag the descriptor buffer as used if there's actually data for
74 * this binding. This lets us be lazy and call this function constantly
75 * without worrying about unnecessarily enabling the buffer.
77 if (anv_descriptor_size(bind_layout
))
78 state
->set
[set
].desc_buffer_used
= true;
82 add_deref_src_binding(struct apply_pipeline_layout_state
*state
, nir_src src
)
84 nir_deref_instr
*deref
= nir_src_as_deref(src
);
85 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
86 add_binding(state
, var
->data
.descriptor_set
, var
->data
.binding
);
90 add_tex_src_binding(struct apply_pipeline_layout_state
*state
,
91 nir_tex_instr
*tex
, nir_tex_src_type deref_src_type
)
93 int deref_src_idx
= nir_tex_instr_src_index(tex
, deref_src_type
);
94 if (deref_src_idx
< 0)
97 add_deref_src_binding(state
, tex
->src
[deref_src_idx
].src
);
101 get_used_bindings_block(nir_block
*block
,
102 struct apply_pipeline_layout_state
*state
)
104 nir_foreach_instr_safe(instr
, block
) {
105 switch (instr
->type
) {
106 case nir_instr_type_intrinsic
: {
107 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
108 switch (intrin
->intrinsic
) {
109 case nir_intrinsic_vulkan_resource_index
:
110 add_binding(state
, nir_intrinsic_desc_set(intrin
),
111 nir_intrinsic_binding(intrin
));
114 case nir_intrinsic_image_deref_load
:
115 case nir_intrinsic_image_deref_store
:
116 case nir_intrinsic_image_deref_atomic_add
:
117 case nir_intrinsic_image_deref_atomic_min
:
118 case nir_intrinsic_image_deref_atomic_max
:
119 case nir_intrinsic_image_deref_atomic_and
:
120 case nir_intrinsic_image_deref_atomic_or
:
121 case nir_intrinsic_image_deref_atomic_xor
:
122 case nir_intrinsic_image_deref_atomic_exchange
:
123 case nir_intrinsic_image_deref_atomic_comp_swap
:
124 case nir_intrinsic_image_deref_size
:
125 case nir_intrinsic_image_deref_samples
:
126 case nir_intrinsic_image_deref_load_param_intel
:
127 case nir_intrinsic_image_deref_load_raw_intel
:
128 case nir_intrinsic_image_deref_store_raw_intel
:
129 add_deref_src_binding(state
, intrin
->src
[0]);
132 case nir_intrinsic_load_constant
:
133 state
->uses_constants
= true;
141 case nir_instr_type_tex
: {
142 nir_tex_instr
*tex
= nir_instr_as_tex(instr
);
143 add_tex_src_binding(state
, tex
, nir_tex_src_texture_deref
);
144 add_tex_src_binding(state
, tex
, nir_tex_src_sampler_deref
);
154 find_descriptor_for_index_src(nir_src src
,
155 struct apply_pipeline_layout_state
*state
)
157 nir_intrinsic_instr
*intrin
= nir_src_as_intrinsic(src
);
159 while (intrin
&& intrin
->intrinsic
== nir_intrinsic_vulkan_resource_reindex
)
160 intrin
= nir_src_as_intrinsic(intrin
->src
[0]);
162 if (!intrin
|| intrin
->intrinsic
!= nir_intrinsic_vulkan_resource_index
)
165 uint32_t set
= nir_intrinsic_desc_set(intrin
);
166 uint32_t binding
= nir_intrinsic_binding(intrin
);
167 uint32_t surface_index
= state
->set
[set
].surface_offsets
[binding
];
169 /* Only lower to a BTI message if we have a valid binding table index. */
170 return surface_index
< MAX_BINDING_TABLE_SIZE
;
174 nir_deref_find_descriptor(nir_deref_instr
*deref
,
175 struct apply_pipeline_layout_state
*state
)
178 /* Nothing we will use this on has a variable */
179 assert(deref
->deref_type
!= nir_deref_type_var
);
181 nir_deref_instr
*parent
= nir_src_as_deref(deref
->parent
);
187 assert(deref
->deref_type
== nir_deref_type_cast
);
189 nir_intrinsic_instr
*intrin
= nir_src_as_intrinsic(deref
->parent
);
190 if (!intrin
|| intrin
->intrinsic
!= nir_intrinsic_load_vulkan_descriptor
)
193 return find_descriptor_for_index_src(intrin
->src
[0], state
);
197 build_index_for_res_reindex(nir_intrinsic_instr
*intrin
,
198 struct apply_pipeline_layout_state
*state
)
200 nir_builder
*b
= &state
->builder
;
202 if (intrin
->intrinsic
== nir_intrinsic_vulkan_resource_reindex
) {
204 build_index_for_res_reindex(nir_src_as_intrinsic(intrin
->src
[0]), state
);
206 b
->cursor
= nir_before_instr(&intrin
->instr
);
207 return nir_iadd(b
, bti
, nir_ssa_for_src(b
, intrin
->src
[1], 1));
210 assert(intrin
->intrinsic
== nir_intrinsic_vulkan_resource_index
);
212 uint32_t set
= nir_intrinsic_desc_set(intrin
);
213 uint32_t binding
= nir_intrinsic_binding(intrin
);
215 const struct anv_descriptor_set_binding_layout
*bind_layout
=
216 &state
->layout
->set
[set
].layout
->binding
[binding
];
218 uint32_t surface_index
= state
->set
[set
].surface_offsets
[binding
];
219 uint32_t array_size
= bind_layout
->array_size
;
221 b
->cursor
= nir_before_instr(&intrin
->instr
);
223 nir_ssa_def
*array_index
= nir_ssa_for_src(b
, intrin
->src
[0], 1);
224 if (nir_src_is_const(intrin
->src
[0]) || state
->add_bounds_checks
)
225 array_index
= nir_umin(b
, array_index
, nir_imm_int(b
, array_size
- 1));
227 return nir_iadd_imm(b
, array_index
, surface_index
);
231 build_index_offset_for_deref(nir_deref_instr
*deref
,
232 struct apply_pipeline_layout_state
*state
)
234 nir_builder
*b
= &state
->builder
;
236 nir_deref_instr
*parent
= nir_deref_instr_parent(deref
);
238 nir_ssa_def
*addr
= build_index_offset_for_deref(parent
, state
);
240 b
->cursor
= nir_before_instr(&deref
->instr
);
241 return nir_explicit_io_address_from_deref(b
, deref
, addr
,
242 nir_address_format_32bit_index_offset
);
245 nir_intrinsic_instr
*load_desc
= nir_src_as_intrinsic(deref
->parent
);
246 assert(load_desc
->intrinsic
== nir_intrinsic_load_vulkan_descriptor
);
249 build_index_for_res_reindex(nir_src_as_intrinsic(load_desc
->src
[0]), state
);
251 /* Return a 0 offset which will get picked up by the recursion */
252 b
->cursor
= nir_before_instr(&deref
->instr
);
253 return nir_vec2(b
, index
, nir_imm_int(b
, 0));
257 try_lower_direct_buffer_intrinsic(nir_intrinsic_instr
*intrin
, bool is_atomic
,
258 struct apply_pipeline_layout_state
*state
)
260 nir_builder
*b
= &state
->builder
;
262 nir_deref_instr
*deref
= nir_src_as_deref(intrin
->src
[0]);
263 if (deref
->mode
!= nir_var_mem_ssbo
)
266 /* 64-bit atomics only support A64 messages so we can't lower them to the
267 * index+offset model.
269 if (is_atomic
&& nir_dest_bit_size(intrin
->dest
) == 64)
272 /* Normal binding table-based messages can't handle non-uniform access so
273 * we have to fall back to A64.
275 if (nir_intrinsic_access(intrin
) & ACCESS_NON_UNIFORM
)
278 if (!nir_deref_find_descriptor(deref
, state
))
281 nir_ssa_def
*addr
= build_index_offset_for_deref(deref
, state
);
283 b
->cursor
= nir_before_instr(&intrin
->instr
);
284 nir_lower_explicit_io_instr(b
, intrin
, addr
,
285 nir_address_format_32bit_index_offset
);
290 lower_direct_buffer_access(nir_function_impl
*impl
,
291 struct apply_pipeline_layout_state
*state
)
293 nir_foreach_block(block
, impl
) {
294 nir_foreach_instr_safe(instr
, block
) {
295 if (instr
->type
!= nir_instr_type_intrinsic
)
298 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
299 switch (intrin
->intrinsic
) {
300 case nir_intrinsic_load_deref
:
301 case nir_intrinsic_store_deref
:
302 try_lower_direct_buffer_intrinsic(intrin
, false, state
);
304 case nir_intrinsic_deref_atomic_add
:
305 case nir_intrinsic_deref_atomic_imin
:
306 case nir_intrinsic_deref_atomic_umin
:
307 case nir_intrinsic_deref_atomic_imax
:
308 case nir_intrinsic_deref_atomic_umax
:
309 case nir_intrinsic_deref_atomic_and
:
310 case nir_intrinsic_deref_atomic_or
:
311 case nir_intrinsic_deref_atomic_xor
:
312 case nir_intrinsic_deref_atomic_exchange
:
313 case nir_intrinsic_deref_atomic_comp_swap
:
314 case nir_intrinsic_deref_atomic_fmin
:
315 case nir_intrinsic_deref_atomic_fmax
:
316 case nir_intrinsic_deref_atomic_fcomp_swap
:
317 try_lower_direct_buffer_intrinsic(intrin
, true, state
);
320 case nir_intrinsic_get_buffer_size
: {
321 /* The get_buffer_size intrinsic always just takes a
322 * index/reindex intrinsic.
324 if (!find_descriptor_for_index_src(intrin
->src
[0], state
))
328 build_index_for_res_reindex(nir_src_as_intrinsic(intrin
->src
[0]),
330 nir_instr_rewrite_src(&intrin
->instr
, &intrin
->src
[0],
331 nir_src_for_ssa(index
));
332 _mesa_set_add(state
->lowered_instrs
, intrin
);
342 static nir_address_format
343 desc_addr_format(VkDescriptorType desc_type
,
344 struct apply_pipeline_layout_state
*state
)
346 return (desc_type
== VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
||
347 desc_type
== VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC
) ?
348 state
->ssbo_addr_format
: nir_address_format_32bit_index_offset
;
352 lower_res_index_intrinsic(nir_intrinsic_instr
*intrin
,
353 struct apply_pipeline_layout_state
*state
)
355 nir_builder
*b
= &state
->builder
;
357 b
->cursor
= nir_before_instr(&intrin
->instr
);
359 uint32_t set
= nir_intrinsic_desc_set(intrin
);
360 uint32_t binding
= nir_intrinsic_binding(intrin
);
361 const VkDescriptorType desc_type
= nir_intrinsic_desc_type(intrin
);
363 const struct anv_descriptor_set_binding_layout
*bind_layout
=
364 &state
->layout
->set
[set
].layout
->binding
[binding
];
366 uint32_t surface_index
= state
->set
[set
].surface_offsets
[binding
];
367 uint32_t array_size
= bind_layout
->array_size
;
369 nir_ssa_def
*array_index
= nir_ssa_for_src(b
, intrin
->src
[0], 1);
370 if (nir_src_is_const(intrin
->src
[0]) || state
->add_bounds_checks
)
371 array_index
= nir_umin(b
, array_index
, nir_imm_int(b
, array_size
- 1));
374 if (state
->pdevice
->has_a64_buffer_access
&&
375 (desc_type
== VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
||
376 desc_type
== VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC
)) {
377 /* We store the descriptor offset as 16.8.8 where the top 16 bits are
378 * the offset into the descriptor set, the next 8 are the binding table
379 * index of the descriptor buffer, and the bottom 8 bits are the offset
380 * (in bytes) into the dynamic offset table.
382 assert(bind_layout
->dynamic_offset_index
< MAX_DYNAMIC_BUFFERS
);
383 uint32_t dynamic_offset_index
= 0xff; /* No dynamic offset */
384 if (bind_layout
->dynamic_offset_index
>= 0) {
385 dynamic_offset_index
=
386 state
->layout
->set
[set
].dynamic_offset_start
+
387 bind_layout
->dynamic_offset_index
;
390 const uint32_t desc_offset
=
391 bind_layout
->descriptor_offset
<< 16 |
392 (uint32_t)state
->set
[set
].desc_offset
<< 8 |
393 dynamic_offset_index
;
395 if (state
->add_bounds_checks
) {
396 assert(desc_addr_format(desc_type
, state
) ==
397 nir_address_format_64bit_bounded_global
);
398 assert(intrin
->dest
.ssa
.num_components
== 4);
399 assert(intrin
->dest
.ssa
.bit_size
== 32);
400 index
= nir_vec4(b
, nir_imm_int(b
, desc_offset
),
401 nir_ssa_for_src(b
, intrin
->src
[0], 1),
402 nir_imm_int(b
, array_size
- 1),
403 nir_ssa_undef(b
, 1, 32));
405 assert(desc_addr_format(desc_type
, state
) ==
406 nir_address_format_64bit_global
);
407 assert(intrin
->dest
.ssa
.num_components
== 1);
408 assert(intrin
->dest
.ssa
.bit_size
== 64);
409 index
= nir_pack_64_2x32_split(b
, nir_imm_int(b
, desc_offset
),
410 nir_ssa_for_src(b
, intrin
->src
[0], 1));
412 } else if (bind_layout
->data
& ANV_DESCRIPTOR_INLINE_UNIFORM
) {
413 /* This is an inline uniform block. Just reference the descriptor set
414 * and use the descriptor offset as the base.
416 assert(desc_addr_format(desc_type
, state
) ==
417 nir_address_format_32bit_index_offset
);
418 assert(intrin
->dest
.ssa
.num_components
== 2);
419 assert(intrin
->dest
.ssa
.bit_size
== 32);
420 index
= nir_imm_ivec2(b
, state
->set
[set
].desc_offset
,
421 bind_layout
->descriptor_offset
);
423 assert(desc_addr_format(desc_type
, state
) ==
424 nir_address_format_32bit_index_offset
);
425 assert(intrin
->dest
.ssa
.num_components
== 2);
426 assert(intrin
->dest
.ssa
.bit_size
== 32);
427 index
= nir_vec2(b
, nir_iadd_imm(b
, array_index
, surface_index
),
431 assert(intrin
->dest
.is_ssa
);
432 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
, nir_src_for_ssa(index
));
433 nir_instr_remove(&intrin
->instr
);
437 lower_res_reindex_intrinsic(nir_intrinsic_instr
*intrin
,
438 struct apply_pipeline_layout_state
*state
)
440 nir_builder
*b
= &state
->builder
;
442 b
->cursor
= nir_before_instr(&intrin
->instr
);
444 const VkDescriptorType desc_type
= nir_intrinsic_desc_type(intrin
);
446 /* For us, the resource indices are just indices into the binding table and
447 * array elements are sequential. A resource_reindex just turns into an
448 * add of the two indices.
450 assert(intrin
->src
[0].is_ssa
&& intrin
->src
[1].is_ssa
);
451 nir_ssa_def
*old_index
= intrin
->src
[0].ssa
;
452 nir_ssa_def
*offset
= intrin
->src
[1].ssa
;
454 nir_ssa_def
*new_index
;
455 switch (desc_addr_format(desc_type
, state
)) {
456 case nir_address_format_64bit_bounded_global
:
457 /* See also lower_res_index_intrinsic() */
458 assert(intrin
->dest
.ssa
.num_components
== 4);
459 assert(intrin
->dest
.ssa
.bit_size
== 32);
460 new_index
= nir_vec4(b
, nir_channel(b
, old_index
, 0),
461 nir_iadd(b
, nir_channel(b
, old_index
, 1),
463 nir_channel(b
, old_index
, 2),
464 nir_ssa_undef(b
, 1, 32));
467 case nir_address_format_64bit_global
: {
468 /* See also lower_res_index_intrinsic() */
469 assert(intrin
->dest
.ssa
.num_components
== 1);
470 assert(intrin
->dest
.ssa
.bit_size
== 64);
471 nir_ssa_def
*base
= nir_unpack_64_2x32_split_x(b
, old_index
);
472 nir_ssa_def
*arr_idx
= nir_unpack_64_2x32_split_y(b
, old_index
);
473 new_index
= nir_pack_64_2x32_split(b
, base
, nir_iadd(b
, arr_idx
, offset
));
477 case nir_address_format_32bit_index_offset
:
478 assert(intrin
->dest
.ssa
.num_components
== 2);
479 assert(intrin
->dest
.ssa
.bit_size
== 32);
480 new_index
= nir_vec2(b
, nir_iadd(b
, nir_channel(b
, old_index
, 0), offset
),
481 nir_channel(b
, old_index
, 1));
485 unreachable("Uhandled address format");
488 assert(intrin
->dest
.is_ssa
);
489 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
, nir_src_for_ssa(new_index
));
490 nir_instr_remove(&intrin
->instr
);
494 build_ssbo_descriptor_load(const VkDescriptorType desc_type
,
496 struct apply_pipeline_layout_state
*state
)
498 nir_builder
*b
= &state
->builder
;
500 nir_ssa_def
*desc_offset
, *array_index
;
501 switch (state
->ssbo_addr_format
) {
502 case nir_address_format_64bit_bounded_global
:
503 /* See also lower_res_index_intrinsic() */
504 desc_offset
= nir_channel(b
, index
, 0);
505 array_index
= nir_umin(b
, nir_channel(b
, index
, 1),
506 nir_channel(b
, index
, 2));
509 case nir_address_format_64bit_global
:
510 /* See also lower_res_index_intrinsic() */
511 desc_offset
= nir_unpack_64_2x32_split_x(b
, index
);
512 array_index
= nir_unpack_64_2x32_split_y(b
, index
);
516 unreachable("Unhandled address format for SSBO");
519 /* The desc_offset is actually 16.8.8 */
520 nir_ssa_def
*desc_buffer_index
=
521 nir_extract_u8(b
, desc_offset
, nir_imm_int(b
, 1));
522 nir_ssa_def
*desc_offset_base
=
523 nir_extract_u16(b
, desc_offset
, nir_imm_int(b
, 1));
525 /* Compute the actual descriptor offset */
526 const unsigned descriptor_size
=
527 anv_descriptor_type_size(state
->pdevice
, desc_type
);
528 desc_offset
= nir_iadd(b
, desc_offset_base
,
529 nir_imul_imm(b
, array_index
, descriptor_size
));
531 nir_intrinsic_instr
*desc_load
=
532 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_load_ubo
);
533 desc_load
->src
[0] = nir_src_for_ssa(desc_buffer_index
);
534 desc_load
->src
[1] = nir_src_for_ssa(desc_offset
);
535 desc_load
->num_components
= 4;
536 nir_ssa_dest_init(&desc_load
->instr
, &desc_load
->dest
, 4, 32, NULL
);
537 nir_builder_instr_insert(b
, &desc_load
->instr
);
539 return &desc_load
->dest
.ssa
;
543 lower_load_vulkan_descriptor(nir_intrinsic_instr
*intrin
,
544 struct apply_pipeline_layout_state
*state
)
546 nir_builder
*b
= &state
->builder
;
548 b
->cursor
= nir_before_instr(&intrin
->instr
);
550 const VkDescriptorType desc_type
= nir_intrinsic_desc_type(intrin
);
552 assert(intrin
->src
[0].is_ssa
);
553 nir_ssa_def
*index
= intrin
->src
[0].ssa
;
556 if (state
->pdevice
->has_a64_buffer_access
&&
557 (desc_type
== VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
||
558 desc_type
== VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC
)) {
559 desc
= build_ssbo_descriptor_load(desc_type
, index
, state
);
561 /* We want nir_address_format_64bit_global */
562 if (!state
->add_bounds_checks
)
563 desc
= nir_pack_64_2x32(b
, nir_channels(b
, desc
, 0x3));
565 if (state
->dynamic_offset_uniform_start
>= 0) {
566 /* This shader has dynamic offsets and we have no way of knowing
567 * (save from the dynamic offset base index) if this buffer has a
570 nir_ssa_def
*desc_offset
, *array_index
;
571 switch (state
->ssbo_addr_format
) {
572 case nir_address_format_64bit_bounded_global
:
573 /* See also lower_res_index_intrinsic() */
574 desc_offset
= nir_channel(b
, index
, 0);
575 array_index
= nir_umin(b
, nir_channel(b
, index
, 1),
576 nir_channel(b
, index
, 2));
579 case nir_address_format_64bit_global
:
580 /* See also lower_res_index_intrinsic() */
581 desc_offset
= nir_unpack_64_2x32_split_x(b
, index
);
582 array_index
= nir_unpack_64_2x32_split_y(b
, index
);
586 unreachable("Unhandled address format for SSBO");
589 nir_ssa_def
*dyn_offset_base
=
590 nir_extract_u8(b
, desc_offset
, nir_imm_int(b
, 0));
591 nir_ssa_def
*dyn_offset_idx
=
592 nir_iadd(b
, dyn_offset_base
, array_index
);
593 if (state
->add_bounds_checks
) {
594 dyn_offset_idx
= nir_umin(b
, dyn_offset_idx
,
595 nir_imm_int(b
, MAX_DYNAMIC_BUFFERS
));
598 nir_intrinsic_instr
*dyn_load
=
599 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_load_uniform
);
600 nir_intrinsic_set_base(dyn_load
, state
->dynamic_offset_uniform_start
);
601 nir_intrinsic_set_range(dyn_load
, MAX_DYNAMIC_BUFFERS
* 4);
602 dyn_load
->src
[0] = nir_src_for_ssa(nir_imul_imm(b
, dyn_offset_idx
, 4));
603 dyn_load
->num_components
= 1;
604 nir_ssa_dest_init(&dyn_load
->instr
, &dyn_load
->dest
, 1, 32, NULL
);
605 nir_builder_instr_insert(b
, &dyn_load
->instr
);
607 nir_ssa_def
*dynamic_offset
=
608 nir_bcsel(b
, nir_ieq(b
, dyn_offset_base
, nir_imm_int(b
, 0xff)),
609 nir_imm_int(b
, 0), &dyn_load
->dest
.ssa
);
611 switch (state
->ssbo_addr_format
) {
612 case nir_address_format_64bit_bounded_global
: {
613 /* The dynamic offset gets added to the base pointer so that we
614 * have a sliding window range.
616 nir_ssa_def
*base_ptr
=
617 nir_pack_64_2x32(b
, nir_channels(b
, desc
, 0x3));
618 base_ptr
= nir_iadd(b
, base_ptr
, nir_u2u64(b
, dynamic_offset
));
619 desc
= nir_vec4(b
, nir_unpack_64_2x32_split_x(b
, base_ptr
),
620 nir_unpack_64_2x32_split_y(b
, base_ptr
),
621 nir_channel(b
, desc
, 2),
622 nir_channel(b
, desc
, 3));
626 case nir_address_format_64bit_global
:
627 desc
= nir_iadd(b
, desc
, nir_u2u64(b
, dynamic_offset
));
631 unreachable("Unhandled address format for SSBO");
635 /* We follow the nir_address_format_32bit_index_offset model */
639 assert(intrin
->dest
.is_ssa
);
640 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
, nir_src_for_ssa(desc
));
641 nir_instr_remove(&intrin
->instr
);
645 lower_get_buffer_size(nir_intrinsic_instr
*intrin
,
646 struct apply_pipeline_layout_state
*state
)
648 if (_mesa_set_search(state
->lowered_instrs
, intrin
))
651 nir_builder
*b
= &state
->builder
;
653 b
->cursor
= nir_before_instr(&intrin
->instr
);
655 const VkDescriptorType desc_type
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
;
657 assert(intrin
->src
[0].is_ssa
);
658 nir_ssa_def
*index
= intrin
->src
[0].ssa
;
660 if (state
->pdevice
->has_a64_buffer_access
) {
661 nir_ssa_def
*desc
= build_ssbo_descriptor_load(desc_type
, index
, state
);
662 nir_ssa_def
*size
= nir_channel(b
, desc
, 2);
663 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
, nir_src_for_ssa(size
));
664 nir_instr_remove(&intrin
->instr
);
666 /* We're following the nir_address_format_32bit_index_offset model so
667 * the binding table index is the first component of the address. The
668 * back-end wants a scalar binding table index source.
670 nir_instr_rewrite_src(&intrin
->instr
, &intrin
->src
[0],
671 nir_src_for_ssa(nir_channel(b
, index
, 0)));
676 build_descriptor_load(nir_deref_instr
*deref
, unsigned offset
,
677 unsigned num_components
, unsigned bit_size
,
678 struct apply_pipeline_layout_state
*state
)
680 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
682 unsigned set
= var
->data
.descriptor_set
;
683 unsigned binding
= var
->data
.binding
;
684 unsigned array_size
=
685 state
->layout
->set
[set
].layout
->binding
[binding
].array_size
;
687 const struct anv_descriptor_set_binding_layout
*bind_layout
=
688 &state
->layout
->set
[set
].layout
->binding
[binding
];
690 nir_builder
*b
= &state
->builder
;
692 nir_ssa_def
*desc_buffer_index
=
693 nir_imm_int(b
, state
->set
[set
].desc_offset
);
695 nir_ssa_def
*desc_offset
=
696 nir_imm_int(b
, bind_layout
->descriptor_offset
+ offset
);
697 if (deref
->deref_type
!= nir_deref_type_var
) {
698 assert(deref
->deref_type
== nir_deref_type_array
);
700 const unsigned descriptor_size
= anv_descriptor_size(bind_layout
);
701 nir_ssa_def
*arr_index
= nir_ssa_for_src(b
, deref
->arr
.index
, 1);
702 if (state
->add_bounds_checks
)
703 arr_index
= nir_umin(b
, arr_index
, nir_imm_int(b
, array_size
- 1));
705 desc_offset
= nir_iadd(b
, desc_offset
,
706 nir_imul_imm(b
, arr_index
, descriptor_size
));
709 nir_intrinsic_instr
*desc_load
=
710 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_load_ubo
);
711 desc_load
->src
[0] = nir_src_for_ssa(desc_buffer_index
);
712 desc_load
->src
[1] = nir_src_for_ssa(desc_offset
);
713 desc_load
->num_components
= num_components
;
714 nir_ssa_dest_init(&desc_load
->instr
, &desc_load
->dest
,
715 num_components
, bit_size
, NULL
);
716 nir_builder_instr_insert(b
, &desc_load
->instr
);
718 return &desc_load
->dest
.ssa
;
722 lower_image_intrinsic(nir_intrinsic_instr
*intrin
,
723 struct apply_pipeline_layout_state
*state
)
725 nir_deref_instr
*deref
= nir_src_as_deref(intrin
->src
[0]);
726 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
728 nir_builder
*b
= &state
->builder
;
729 b
->cursor
= nir_before_instr(&intrin
->instr
);
731 const bool use_bindless
= state
->pdevice
->has_bindless_images
;
733 if (intrin
->intrinsic
== nir_intrinsic_image_deref_load_param_intel
) {
734 b
->cursor
= nir_instr_remove(&intrin
->instr
);
736 assert(!use_bindless
); /* Otherwise our offsets would be wrong */
737 const unsigned param
= nir_intrinsic_base(intrin
);
740 build_descriptor_load(deref
, param
* 16,
741 intrin
->dest
.ssa
.num_components
,
742 intrin
->dest
.ssa
.bit_size
, state
);
744 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
, nir_src_for_ssa(desc
));
745 } else if (use_bindless
) {
746 const bool write_only
=
747 (var
->data
.image
.access
& ACCESS_NON_READABLE
) != 0;
749 build_descriptor_load(deref
, 0, 2, 32, state
);
750 nir_ssa_def
*handle
= nir_channel(b
, desc
, write_only
? 1 : 0);
751 nir_rewrite_image_intrinsic(intrin
, handle
, true);
753 unsigned set
= var
->data
.descriptor_set
;
754 unsigned binding
= var
->data
.binding
;
755 unsigned binding_offset
= state
->set
[set
].surface_offsets
[binding
];
756 unsigned array_size
=
757 state
->layout
->set
[set
].layout
->binding
[binding
].array_size
;
759 nir_ssa_def
*index
= NULL
;
760 if (deref
->deref_type
!= nir_deref_type_var
) {
761 assert(deref
->deref_type
== nir_deref_type_array
);
762 index
= nir_ssa_for_src(b
, deref
->arr
.index
, 1);
763 if (state
->add_bounds_checks
)
764 index
= nir_umin(b
, index
, nir_imm_int(b
, array_size
- 1));
766 index
= nir_imm_int(b
, 0);
769 index
= nir_iadd_imm(b
, index
, binding_offset
);
770 nir_rewrite_image_intrinsic(intrin
, index
, false);
775 lower_load_constant(nir_intrinsic_instr
*intrin
,
776 struct apply_pipeline_layout_state
*state
)
778 nir_builder
*b
= &state
->builder
;
780 b
->cursor
= nir_before_instr(&intrin
->instr
);
782 nir_ssa_def
*index
= nir_imm_int(b
, state
->constants_offset
);
783 nir_ssa_def
*offset
= nir_iadd(b
, nir_ssa_for_src(b
, intrin
->src
[0], 1),
784 nir_imm_int(b
, nir_intrinsic_base(intrin
)));
786 nir_intrinsic_instr
*load_ubo
=
787 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_load_ubo
);
788 load_ubo
->num_components
= intrin
->num_components
;
789 load_ubo
->src
[0] = nir_src_for_ssa(index
);
790 load_ubo
->src
[1] = nir_src_for_ssa(offset
);
791 nir_ssa_dest_init(&load_ubo
->instr
, &load_ubo
->dest
,
792 intrin
->dest
.ssa
.num_components
,
793 intrin
->dest
.ssa
.bit_size
, NULL
);
794 nir_builder_instr_insert(b
, &load_ubo
->instr
);
796 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
,
797 nir_src_for_ssa(&load_ubo
->dest
.ssa
));
798 nir_instr_remove(&intrin
->instr
);
802 lower_tex_deref(nir_tex_instr
*tex
, nir_tex_src_type deref_src_type
,
803 unsigned *base_index
, unsigned plane
,
804 struct apply_pipeline_layout_state
*state
)
806 int deref_src_idx
= nir_tex_instr_src_index(tex
, deref_src_type
);
807 if (deref_src_idx
< 0)
810 nir_deref_instr
*deref
= nir_src_as_deref(tex
->src
[deref_src_idx
].src
);
811 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
813 unsigned set
= var
->data
.descriptor_set
;
814 unsigned binding
= var
->data
.binding
;
815 unsigned array_size
=
816 state
->layout
->set
[set
].layout
->binding
[binding
].array_size
;
818 unsigned binding_offset
;
819 if (deref_src_type
== nir_tex_src_texture_deref
) {
820 binding_offset
= state
->set
[set
].surface_offsets
[binding
];
822 assert(deref_src_type
== nir_tex_src_sampler_deref
);
823 binding_offset
= state
->set
[set
].sampler_offsets
[binding
];
826 nir_builder
*b
= &state
->builder
;
828 nir_tex_src_type offset_src_type
;
829 nir_ssa_def
*index
= NULL
;
830 if (binding_offset
> MAX_BINDING_TABLE_SIZE
) {
831 const unsigned plane_offset
=
832 plane
* sizeof(struct anv_sampled_image_descriptor
);
835 build_descriptor_load(deref
, plane_offset
, 2, 32, state
);
837 if (deref_src_type
== nir_tex_src_texture_deref
) {
838 offset_src_type
= nir_tex_src_texture_handle
;
839 index
= nir_channel(b
, desc
, 0);
841 assert(deref_src_type
== nir_tex_src_sampler_deref
);
842 offset_src_type
= nir_tex_src_sampler_handle
;
843 index
= nir_channel(b
, desc
, 1);
846 if (deref_src_type
== nir_tex_src_texture_deref
) {
847 offset_src_type
= nir_tex_src_texture_offset
;
849 assert(deref_src_type
== nir_tex_src_sampler_deref
);
850 offset_src_type
= nir_tex_src_sampler_offset
;
853 *base_index
= binding_offset
+ plane
;
855 if (deref
->deref_type
!= nir_deref_type_var
) {
856 assert(deref
->deref_type
== nir_deref_type_array
);
858 if (nir_src_is_const(deref
->arr
.index
)) {
859 unsigned arr_index
= nir_src_as_uint(deref
->arr
.index
);
860 *base_index
+= MIN2(arr_index
, array_size
- 1);
862 /* From VK_KHR_sampler_ycbcr_conversion:
864 * If sampler Y’CBCR conversion is enabled, the combined image
865 * sampler must be indexed only by constant integral expressions
866 * when aggregated into arrays in shader code, irrespective of
867 * the shaderSampledImageArrayDynamicIndexing feature.
869 assert(nir_tex_instr_src_index(tex
, nir_tex_src_plane
) == -1);
871 index
= nir_ssa_for_src(b
, deref
->arr
.index
, 1);
873 if (state
->add_bounds_checks
)
874 index
= nir_umin(b
, index
, nir_imm_int(b
, array_size
- 1));
880 nir_instr_rewrite_src(&tex
->instr
, &tex
->src
[deref_src_idx
].src
,
881 nir_src_for_ssa(index
));
882 tex
->src
[deref_src_idx
].src_type
= offset_src_type
;
884 nir_tex_instr_remove_src(tex
, deref_src_idx
);
889 tex_instr_get_and_remove_plane_src(nir_tex_instr
*tex
)
891 int plane_src_idx
= nir_tex_instr_src_index(tex
, nir_tex_src_plane
);
892 if (plane_src_idx
< 0)
895 unsigned plane
= nir_src_as_uint(tex
->src
[plane_src_idx
].src
);
897 nir_tex_instr_remove_src(tex
, plane_src_idx
);
903 lower_tex(nir_tex_instr
*tex
, struct apply_pipeline_layout_state
*state
)
905 state
->builder
.cursor
= nir_before_instr(&tex
->instr
);
907 unsigned plane
= tex_instr_get_and_remove_plane_src(tex
);
909 lower_tex_deref(tex
, nir_tex_src_texture_deref
,
910 &tex
->texture_index
, plane
, state
);
912 lower_tex_deref(tex
, nir_tex_src_sampler_deref
,
913 &tex
->sampler_index
, plane
, state
);
915 /* The backend only ever uses this to mark used surfaces. We don't care
916 * about that little optimization so it just needs to be non-zero.
918 tex
->texture_array_size
= 1;
922 apply_pipeline_layout_block(nir_block
*block
,
923 struct apply_pipeline_layout_state
*state
)
925 nir_foreach_instr_safe(instr
, block
) {
926 switch (instr
->type
) {
927 case nir_instr_type_intrinsic
: {
928 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
929 switch (intrin
->intrinsic
) {
930 case nir_intrinsic_vulkan_resource_index
:
931 lower_res_index_intrinsic(intrin
, state
);
933 case nir_intrinsic_vulkan_resource_reindex
:
934 lower_res_reindex_intrinsic(intrin
, state
);
936 case nir_intrinsic_load_vulkan_descriptor
:
937 lower_load_vulkan_descriptor(intrin
, state
);
939 case nir_intrinsic_get_buffer_size
:
940 lower_get_buffer_size(intrin
, state
);
942 case nir_intrinsic_image_deref_load
:
943 case nir_intrinsic_image_deref_store
:
944 case nir_intrinsic_image_deref_atomic_add
:
945 case nir_intrinsic_image_deref_atomic_min
:
946 case nir_intrinsic_image_deref_atomic_max
:
947 case nir_intrinsic_image_deref_atomic_and
:
948 case nir_intrinsic_image_deref_atomic_or
:
949 case nir_intrinsic_image_deref_atomic_xor
:
950 case nir_intrinsic_image_deref_atomic_exchange
:
951 case nir_intrinsic_image_deref_atomic_comp_swap
:
952 case nir_intrinsic_image_deref_size
:
953 case nir_intrinsic_image_deref_samples
:
954 case nir_intrinsic_image_deref_load_param_intel
:
955 case nir_intrinsic_image_deref_load_raw_intel
:
956 case nir_intrinsic_image_deref_store_raw_intel
:
957 lower_image_intrinsic(intrin
, state
);
959 case nir_intrinsic_load_constant
:
960 lower_load_constant(intrin
, state
);
967 case nir_instr_type_tex
:
968 lower_tex(nir_instr_as_tex(instr
), state
);
976 struct binding_info
{
983 compare_binding_infos(const void *_a
, const void *_b
)
985 const struct binding_info
*a
= _a
, *b
= _b
;
986 if (a
->score
!= b
->score
)
987 return b
->score
- a
->score
;
989 if (a
->set
!= b
->set
)
990 return a
->set
- b
->set
;
992 return a
->binding
- b
->binding
;
996 anv_nir_apply_pipeline_layout(const struct anv_physical_device
*pdevice
,
997 bool robust_buffer_access
,
998 struct anv_pipeline_layout
*layout
,
1000 struct brw_stage_prog_data
*prog_data
,
1001 struct anv_pipeline_bind_map
*map
)
1003 void *mem_ctx
= ralloc_context(NULL
);
1005 struct apply_pipeline_layout_state state
= {
1009 .add_bounds_checks
= robust_buffer_access
,
1010 .ssbo_addr_format
= anv_nir_ssbo_addr_format(pdevice
, robust_buffer_access
),
1011 .lowered_instrs
= _mesa_pointer_set_create(mem_ctx
),
1012 .dynamic_offset_uniform_start
= -1,
1015 for (unsigned s
= 0; s
< layout
->num_sets
; s
++) {
1016 const unsigned count
= layout
->set
[s
].layout
->binding_count
;
1017 state
.set
[s
].use_count
= rzalloc_array(mem_ctx
, uint8_t, count
);
1018 state
.set
[s
].surface_offsets
= rzalloc_array(mem_ctx
, uint8_t, count
);
1019 state
.set
[s
].sampler_offsets
= rzalloc_array(mem_ctx
, uint8_t, count
);
1022 nir_foreach_function(function
, shader
) {
1023 if (!function
->impl
)
1026 nir_foreach_block(block
, function
->impl
)
1027 get_used_bindings_block(block
, &state
);
1030 for (unsigned s
= 0; s
< layout
->num_sets
; s
++) {
1031 if (state
.set
[s
].desc_buffer_used
) {
1032 map
->surface_to_descriptor
[map
->surface_count
] =
1033 (struct anv_pipeline_binding
) {
1034 .set
= ANV_DESCRIPTOR_SET_DESCRIPTORS
,
1037 state
.set
[s
].desc_offset
= map
->surface_count
;
1038 map
->surface_count
++;
1042 if (state
.uses_constants
) {
1043 state
.constants_offset
= map
->surface_count
;
1044 map
->surface_to_descriptor
[map
->surface_count
].set
=
1045 ANV_DESCRIPTOR_SET_SHADER_CONSTANTS
;
1046 map
->surface_count
++;
1049 unsigned used_binding_count
= 0;
1050 for (uint32_t set
= 0; set
< layout
->num_sets
; set
++) {
1051 struct anv_descriptor_set_layout
*set_layout
= layout
->set
[set
].layout
;
1052 for (unsigned b
= 0; b
< set_layout
->binding_count
; b
++) {
1053 if (state
.set
[set
].use_count
[b
] == 0)
1056 used_binding_count
++;
1060 struct binding_info
*infos
=
1061 rzalloc_array(mem_ctx
, struct binding_info
, used_binding_count
);
1062 used_binding_count
= 0;
1063 for (uint32_t set
= 0; set
< layout
->num_sets
; set
++) {
1064 struct anv_descriptor_set_layout
*set_layout
= layout
->set
[set
].layout
;
1065 for (unsigned b
= 0; b
< set_layout
->binding_count
; b
++) {
1066 if (state
.set
[set
].use_count
[b
] == 0)
1069 struct anv_descriptor_set_binding_layout
*binding
=
1070 &layout
->set
[set
].layout
->binding
[b
];
1072 /* Do a fixed-point calculation to generate a score based on the
1073 * number of uses and the binding array size. We shift by 7 instead
1074 * of 8 because we're going to use the top bit below to make
1075 * everything which does not support bindless super higher priority
1076 * than things which do.
1078 uint16_t score
= ((uint16_t)state
.set
[set
].use_count
[b
] << 7) /
1079 binding
->array_size
;
1081 /* If the descriptor type doesn't support bindless then put it at the
1082 * beginning so we guarantee it gets a slot.
1084 if (!anv_descriptor_supports_bindless(pdevice
, binding
, true) ||
1085 !anv_descriptor_supports_bindless(pdevice
, binding
, false))
1088 infos
[used_binding_count
++] = (struct binding_info
) {
1096 /* Order the binding infos based on score with highest scores first. If
1097 * scores are equal we then order by set and binding.
1099 qsort(infos
, used_binding_count
, sizeof(struct binding_info
),
1100 compare_binding_infos
);
1102 bool have_dynamic_buffers
= false;
1104 for (unsigned i
= 0; i
< used_binding_count
; i
++) {
1105 unsigned set
= infos
[i
].set
, b
= infos
[i
].binding
;
1106 struct anv_descriptor_set_binding_layout
*binding
=
1107 &layout
->set
[set
].layout
->binding
[b
];
1109 if (binding
->dynamic_offset_index
>= 0)
1110 have_dynamic_buffers
= true;
1112 const uint32_t array_size
= binding
->array_size
;
1114 if (binding
->data
& ANV_DESCRIPTOR_SURFACE_STATE
) {
1115 if (map
->surface_count
+ array_size
> MAX_BINDING_TABLE_SIZE
||
1116 anv_descriptor_requires_bindless(pdevice
, binding
, false)) {
1117 /* If this descriptor doesn't fit in the binding table or if it
1118 * requires bindless for some reason, flag it as bindless.
1120 assert(anv_descriptor_supports_bindless(pdevice
, binding
, false));
1121 state
.set
[set
].surface_offsets
[b
] = BINDLESS_OFFSET
;
1123 state
.set
[set
].surface_offsets
[b
] = map
->surface_count
;
1124 struct anv_sampler
**samplers
= binding
->immutable_samplers
;
1125 for (unsigned i
= 0; i
< binding
->array_size
; i
++) {
1126 uint8_t planes
= samplers
? samplers
[i
]->n_planes
: 1;
1127 for (uint8_t p
= 0; p
< planes
; p
++) {
1128 map
->surface_to_descriptor
[map
->surface_count
++] =
1129 (struct anv_pipeline_binding
) {
1138 assert(map
->surface_count
<= MAX_BINDING_TABLE_SIZE
);
1141 if (binding
->data
& ANV_DESCRIPTOR_SAMPLER_STATE
) {
1142 if (map
->sampler_count
+ array_size
> MAX_SAMPLER_TABLE_SIZE
||
1143 anv_descriptor_requires_bindless(pdevice
, binding
, true)) {
1144 /* If this descriptor doesn't fit in the binding table or if it
1145 * requires bindless for some reason, flag it as bindless.
1147 * We also make large sampler arrays bindless because we can avoid
1148 * using indirect sends thanks to bindless samplers being packed
1149 * less tightly than the sampler table.
1151 assert(anv_descriptor_supports_bindless(pdevice
, binding
, true));
1152 state
.set
[set
].sampler_offsets
[b
] = BINDLESS_OFFSET
;
1154 state
.set
[set
].sampler_offsets
[b
] = map
->sampler_count
;
1155 struct anv_sampler
**samplers
= binding
->immutable_samplers
;
1156 for (unsigned i
= 0; i
< binding
->array_size
; i
++) {
1157 uint8_t planes
= samplers
? samplers
[i
]->n_planes
: 1;
1158 for (uint8_t p
= 0; p
< planes
; p
++) {
1159 map
->sampler_to_descriptor
[map
->sampler_count
++] =
1160 (struct anv_pipeline_binding
) {
1172 if (have_dynamic_buffers
) {
1173 state
.dynamic_offset_uniform_start
= shader
->num_uniforms
;
1174 uint32_t *param
= brw_stage_prog_data_add_params(prog_data
,
1175 MAX_DYNAMIC_BUFFERS
);
1176 for (unsigned i
= 0; i
< MAX_DYNAMIC_BUFFERS
; i
++)
1177 param
[i
] = ANV_PARAM_DYN_OFFSET(i
);
1178 shader
->num_uniforms
+= MAX_DYNAMIC_BUFFERS
* 4;
1179 assert(shader
->num_uniforms
== prog_data
->nr_params
* 4);
1182 nir_foreach_variable(var
, &shader
->uniforms
) {
1183 const struct glsl_type
*glsl_type
= glsl_without_array(var
->type
);
1185 if (!glsl_type_is_image(glsl_type
))
1188 enum glsl_sampler_dim dim
= glsl_get_sampler_dim(glsl_type
);
1190 const uint32_t set
= var
->data
.descriptor_set
;
1191 const uint32_t binding
= var
->data
.binding
;
1192 const uint32_t array_size
=
1193 layout
->set
[set
].layout
->binding
[binding
].array_size
;
1195 if (state
.set
[set
].use_count
[binding
] == 0)
1198 if (state
.set
[set
].surface_offsets
[binding
] >= MAX_BINDING_TABLE_SIZE
)
1201 struct anv_pipeline_binding
*pipe_binding
=
1202 &map
->surface_to_descriptor
[state
.set
[set
].surface_offsets
[binding
]];
1203 for (unsigned i
= 0; i
< array_size
; i
++) {
1204 assert(pipe_binding
[i
].set
== set
);
1205 assert(pipe_binding
[i
].binding
== binding
);
1206 assert(pipe_binding
[i
].index
== i
);
1208 if (dim
== GLSL_SAMPLER_DIM_SUBPASS
||
1209 dim
== GLSL_SAMPLER_DIM_SUBPASS_MS
)
1210 pipe_binding
[i
].input_attachment_index
= var
->data
.index
+ i
;
1212 pipe_binding
[i
].write_only
=
1213 (var
->data
.image
.access
& ACCESS_NON_READABLE
) != 0;
1217 nir_foreach_function(function
, shader
) {
1218 if (!function
->impl
)
1221 /* Before we do the normal lowering, we look for any SSBO operations
1222 * that we can lower to the BTI model and lower them up-front. The BTI
1223 * model can perform better than the A64 model for a couple reasons:
1225 * 1. 48-bit address calculations are potentially expensive and using
1226 * the BTI model lets us simply compute 32-bit offsets and the
1227 * hardware adds the 64-bit surface base address.
1229 * 2. The BTI messages, because they use surface states, do bounds
1230 * checking for us. With the A64 model, we have to do our own
1231 * bounds checking and this means wider pointers and extra
1232 * calculations and branching in the shader.
1234 * The solution to both of these is to convert things to the BTI model
1235 * opportunistically. The reason why we need to do this as a pre-pass
1236 * is for two reasons:
1238 * 1. The BTI model requires nir_address_format_32bit_index_offset
1239 * pointers which are not the same type as the pointers needed for
1240 * the A64 model. Because all our derefs are set up for the A64
1241 * model (in case we have variable pointers), we have to crawl all
1242 * the way back to the vulkan_resource_index intrinsic and build a
1243 * completely fresh index+offset calculation.
1245 * 2. Because the variable-pointers-capable lowering that we do as part
1246 * of apply_pipeline_layout_block is destructive (It really has to
1247 * be to handle variable pointers properly), we've lost the deref
1248 * information by the time we get to the load/store/atomic
1249 * intrinsics in that pass.
1251 lower_direct_buffer_access(function
->impl
, &state
);
1253 nir_builder_init(&state
.builder
, function
->impl
);
1254 nir_foreach_block(block
, function
->impl
)
1255 apply_pipeline_layout_block(block
, &state
);
1256 nir_metadata_preserve(function
->impl
, nir_metadata_block_index
|
1257 nir_metadata_dominance
);
1260 ralloc_free(mem_ctx
);