2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "program/prog_parameter.h"
26 #include "nir/nir_builder.h"
27 #include "compiler/brw_nir.h"
30 /* Sampler tables don't actually have a maximum size but we pick one just so
31 * that we don't end up emitting too much state on-the-fly.
33 #define MAX_SAMPLER_TABLE_SIZE 128
34 #define BINDLESS_OFFSET 255
36 struct apply_pipeline_layout_state
{
37 const struct anv_physical_device
*pdevice
;
42 struct anv_pipeline_layout
*layout
;
43 bool add_bounds_checks
;
45 /* Place to flag lowered instructions so we don't lower them twice */
46 struct set
*lowered_instrs
;
48 int dynamic_offset_uniform_start
;
51 uint8_t constants_offset
;
53 bool desc_buffer_used
;
57 uint8_t *surface_offsets
;
58 uint8_t *sampler_offsets
;
63 add_binding(struct apply_pipeline_layout_state
*state
,
64 uint32_t set
, uint32_t binding
)
66 const struct anv_descriptor_set_binding_layout
*bind_layout
=
67 &state
->layout
->set
[set
].layout
->binding
[binding
];
69 if (state
->set
[set
].use_count
[binding
] < UINT8_MAX
)
70 state
->set
[set
].use_count
[binding
]++;
72 /* Only flag the descriptor buffer as used if there's actually data for
73 * this binding. This lets us be lazy and call this function constantly
74 * without worrying about unnecessarily enabling the buffer.
76 if (anv_descriptor_size(bind_layout
))
77 state
->set
[set
].desc_buffer_used
= true;
81 add_deref_src_binding(struct apply_pipeline_layout_state
*state
, nir_src src
)
83 nir_deref_instr
*deref
= nir_src_as_deref(src
);
84 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
85 add_binding(state
, var
->data
.descriptor_set
, var
->data
.binding
);
89 add_tex_src_binding(struct apply_pipeline_layout_state
*state
,
90 nir_tex_instr
*tex
, nir_tex_src_type deref_src_type
)
92 int deref_src_idx
= nir_tex_instr_src_index(tex
, deref_src_type
);
93 if (deref_src_idx
< 0)
96 add_deref_src_binding(state
, tex
->src
[deref_src_idx
].src
);
100 get_used_bindings_block(nir_block
*block
,
101 struct apply_pipeline_layout_state
*state
)
103 nir_foreach_instr_safe(instr
, block
) {
104 switch (instr
->type
) {
105 case nir_instr_type_intrinsic
: {
106 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
107 switch (intrin
->intrinsic
) {
108 case nir_intrinsic_vulkan_resource_index
:
109 add_binding(state
, nir_intrinsic_desc_set(intrin
),
110 nir_intrinsic_binding(intrin
));
113 case nir_intrinsic_image_deref_load
:
114 case nir_intrinsic_image_deref_store
:
115 case nir_intrinsic_image_deref_atomic_add
:
116 case nir_intrinsic_image_deref_atomic_min
:
117 case nir_intrinsic_image_deref_atomic_max
:
118 case nir_intrinsic_image_deref_atomic_and
:
119 case nir_intrinsic_image_deref_atomic_or
:
120 case nir_intrinsic_image_deref_atomic_xor
:
121 case nir_intrinsic_image_deref_atomic_exchange
:
122 case nir_intrinsic_image_deref_atomic_comp_swap
:
123 case nir_intrinsic_image_deref_size
:
124 case nir_intrinsic_image_deref_samples
:
125 case nir_intrinsic_image_deref_load_param_intel
:
126 case nir_intrinsic_image_deref_load_raw_intel
:
127 case nir_intrinsic_image_deref_store_raw_intel
:
128 add_deref_src_binding(state
, intrin
->src
[0]);
131 case nir_intrinsic_load_constant
:
132 state
->uses_constants
= true;
140 case nir_instr_type_tex
: {
141 nir_tex_instr
*tex
= nir_instr_as_tex(instr
);
142 add_tex_src_binding(state
, tex
, nir_tex_src_texture_deref
);
143 add_tex_src_binding(state
, tex
, nir_tex_src_sampler_deref
);
153 find_descriptor_for_index_src(nir_src src
,
154 struct apply_pipeline_layout_state
*state
)
156 nir_intrinsic_instr
*intrin
= nir_src_as_intrinsic(src
);
158 while (intrin
&& intrin
->intrinsic
== nir_intrinsic_vulkan_resource_reindex
)
159 intrin
= nir_src_as_intrinsic(intrin
->src
[0]);
161 if (!intrin
|| intrin
->intrinsic
!= nir_intrinsic_vulkan_resource_index
)
164 uint32_t set
= nir_intrinsic_desc_set(intrin
);
165 uint32_t binding
= nir_intrinsic_binding(intrin
);
166 uint32_t surface_index
= state
->set
[set
].surface_offsets
[binding
];
168 /* Only lower to a BTI message if we have a valid binding table index. */
169 return surface_index
< MAX_BINDING_TABLE_SIZE
;
173 nir_deref_find_descriptor(nir_deref_instr
*deref
,
174 struct apply_pipeline_layout_state
*state
)
177 /* Nothing we will use this on has a variable */
178 assert(deref
->deref_type
!= nir_deref_type_var
);
180 nir_deref_instr
*parent
= nir_src_as_deref(deref
->parent
);
186 assert(deref
->deref_type
== nir_deref_type_cast
);
188 nir_intrinsic_instr
*intrin
= nir_src_as_intrinsic(deref
->parent
);
189 if (!intrin
|| intrin
->intrinsic
!= nir_intrinsic_load_vulkan_descriptor
)
192 return find_descriptor_for_index_src(intrin
->src
[0], state
);
196 build_index_for_res_reindex(nir_intrinsic_instr
*intrin
,
197 struct apply_pipeline_layout_state
*state
)
199 nir_builder
*b
= &state
->builder
;
201 if (intrin
->intrinsic
== nir_intrinsic_vulkan_resource_reindex
) {
203 build_index_for_res_reindex(nir_src_as_intrinsic(intrin
->src
[0]), state
);
205 b
->cursor
= nir_before_instr(&intrin
->instr
);
206 return nir_iadd(b
, bti
, nir_ssa_for_src(b
, intrin
->src
[1], 1));
209 assert(intrin
->intrinsic
== nir_intrinsic_vulkan_resource_index
);
211 uint32_t set
= nir_intrinsic_desc_set(intrin
);
212 uint32_t binding
= nir_intrinsic_binding(intrin
);
214 const struct anv_descriptor_set_binding_layout
*bind_layout
=
215 &state
->layout
->set
[set
].layout
->binding
[binding
];
217 uint32_t surface_index
= state
->set
[set
].surface_offsets
[binding
];
218 uint32_t array_size
= bind_layout
->array_size
;
220 b
->cursor
= nir_before_instr(&intrin
->instr
);
222 nir_ssa_def
*array_index
= nir_ssa_for_src(b
, intrin
->src
[0], 1);
223 if (nir_src_is_const(intrin
->src
[0]) || state
->add_bounds_checks
)
224 array_index
= nir_umin(b
, array_index
, nir_imm_int(b
, array_size
- 1));
226 return nir_iadd_imm(b
, array_index
, surface_index
);
230 build_index_offset_for_deref(nir_deref_instr
*deref
,
231 struct apply_pipeline_layout_state
*state
)
233 nir_builder
*b
= &state
->builder
;
235 nir_deref_instr
*parent
= nir_deref_instr_parent(deref
);
237 nir_ssa_def
*addr
= build_index_offset_for_deref(parent
, state
);
239 b
->cursor
= nir_before_instr(&deref
->instr
);
240 return nir_explicit_io_address_from_deref(b
, deref
, addr
,
241 nir_address_format_32bit_index_offset
);
244 nir_intrinsic_instr
*load_desc
= nir_src_as_intrinsic(deref
->parent
);
245 assert(load_desc
->intrinsic
== nir_intrinsic_load_vulkan_descriptor
);
248 build_index_for_res_reindex(nir_src_as_intrinsic(load_desc
->src
[0]), state
);
250 /* Return a 0 offset which will get picked up by the recursion */
251 b
->cursor
= nir_before_instr(&deref
->instr
);
252 return nir_vec2(b
, index
, nir_imm_int(b
, 0));
256 try_lower_direct_buffer_intrinsic(nir_intrinsic_instr
*intrin
, bool is_atomic
,
257 struct apply_pipeline_layout_state
*state
)
259 nir_builder
*b
= &state
->builder
;
261 nir_deref_instr
*deref
= nir_src_as_deref(intrin
->src
[0]);
262 if (deref
->mode
!= nir_var_mem_ssbo
)
265 /* 64-bit atomics only support A64 messages so we can't lower them to the
266 * index+offset model.
268 if (is_atomic
&& nir_dest_bit_size(intrin
->dest
) == 64)
271 /* Normal binding table-based messages can't handle non-uniform access so
272 * we have to fall back to A64.
274 if (nir_intrinsic_access(intrin
) & ACCESS_NON_UNIFORM
)
277 if (!nir_deref_find_descriptor(deref
, state
))
280 nir_ssa_def
*addr
= build_index_offset_for_deref(deref
, state
);
282 b
->cursor
= nir_before_instr(&intrin
->instr
);
283 nir_lower_explicit_io_instr(b
, intrin
, addr
,
284 nir_address_format_32bit_index_offset
);
289 lower_direct_buffer_access(nir_function_impl
*impl
,
290 struct apply_pipeline_layout_state
*state
)
292 nir_foreach_block(block
, impl
) {
293 nir_foreach_instr_safe(instr
, block
) {
294 if (instr
->type
!= nir_instr_type_intrinsic
)
297 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
298 switch (intrin
->intrinsic
) {
299 case nir_intrinsic_load_deref
:
300 case nir_intrinsic_store_deref
:
301 try_lower_direct_buffer_intrinsic(intrin
, false, state
);
303 case nir_intrinsic_deref_atomic_add
:
304 case nir_intrinsic_deref_atomic_imin
:
305 case nir_intrinsic_deref_atomic_umin
:
306 case nir_intrinsic_deref_atomic_imax
:
307 case nir_intrinsic_deref_atomic_umax
:
308 case nir_intrinsic_deref_atomic_and
:
309 case nir_intrinsic_deref_atomic_or
:
310 case nir_intrinsic_deref_atomic_xor
:
311 case nir_intrinsic_deref_atomic_exchange
:
312 case nir_intrinsic_deref_atomic_comp_swap
:
313 case nir_intrinsic_deref_atomic_fmin
:
314 case nir_intrinsic_deref_atomic_fmax
:
315 case nir_intrinsic_deref_atomic_fcomp_swap
:
316 try_lower_direct_buffer_intrinsic(intrin
, true, state
);
319 case nir_intrinsic_get_buffer_size
: {
320 /* The get_buffer_size intrinsic always just takes a
321 * index/reindex intrinsic.
323 if (!find_descriptor_for_index_src(intrin
->src
[0], state
))
327 build_index_for_res_reindex(nir_src_as_intrinsic(intrin
->src
[0]),
329 nir_instr_rewrite_src(&intrin
->instr
, &intrin
->src
[0],
330 nir_src_for_ssa(index
));
331 _mesa_set_add(state
->lowered_instrs
, intrin
);
342 lower_res_index_intrinsic(nir_intrinsic_instr
*intrin
,
343 struct apply_pipeline_layout_state
*state
)
345 nir_builder
*b
= &state
->builder
;
347 b
->cursor
= nir_before_instr(&intrin
->instr
);
349 uint32_t set
= nir_intrinsic_desc_set(intrin
);
350 uint32_t binding
= nir_intrinsic_binding(intrin
);
351 const VkDescriptorType desc_type
= nir_intrinsic_desc_type(intrin
);
353 const struct anv_descriptor_set_binding_layout
*bind_layout
=
354 &state
->layout
->set
[set
].layout
->binding
[binding
];
356 uint32_t surface_index
= state
->set
[set
].surface_offsets
[binding
];
357 uint32_t array_size
= bind_layout
->array_size
;
359 nir_ssa_def
*array_index
= nir_ssa_for_src(b
, intrin
->src
[0], 1);
360 if (nir_src_is_const(intrin
->src
[0]) || state
->add_bounds_checks
)
361 array_index
= nir_umin(b
, array_index
, nir_imm_int(b
, array_size
- 1));
364 if (state
->pdevice
->has_a64_buffer_access
&&
365 (desc_type
== VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
||
366 desc_type
== VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC
)) {
367 /* We store the descriptor offset as 16.8.8 where the top 16 bits are
368 * the offset into the descriptor set, the next 8 are the binding table
369 * index of the descriptor buffer, and the bottom 8 bits are the offset
370 * (in bytes) into the dynamic offset table.
372 assert(bind_layout
->dynamic_offset_index
< MAX_DYNAMIC_BUFFERS
);
373 uint32_t dynamic_offset_index
= 0xff; /* No dynamic offset */
374 if (bind_layout
->dynamic_offset_index
>= 0) {
375 dynamic_offset_index
=
376 state
->layout
->set
[set
].dynamic_offset_start
+
377 bind_layout
->dynamic_offset_index
;
380 const uint32_t desc_offset
=
381 bind_layout
->descriptor_offset
<< 16 |
382 (uint32_t)state
->set
[set
].desc_offset
<< 8 |
383 dynamic_offset_index
;
385 if (state
->add_bounds_checks
) {
386 /* We're using nir_address_format_64bit_bounded_global */
387 assert(intrin
->dest
.ssa
.num_components
== 4);
388 assert(intrin
->dest
.ssa
.bit_size
== 32);
389 index
= nir_vec4(b
, nir_imm_int(b
, desc_offset
),
390 nir_ssa_for_src(b
, intrin
->src
[0], 1),
391 nir_imm_int(b
, array_size
- 1),
392 nir_ssa_undef(b
, 1, 32));
394 /* We're using nir_address_format_64bit_global */
395 assert(intrin
->dest
.ssa
.num_components
== 1);
396 assert(intrin
->dest
.ssa
.bit_size
== 64);
397 index
= nir_pack_64_2x32_split(b
, nir_imm_int(b
, desc_offset
),
398 nir_ssa_for_src(b
, intrin
->src
[0], 1));
400 } else if (bind_layout
->data
& ANV_DESCRIPTOR_INLINE_UNIFORM
) {
401 /* This is an inline uniform block. Just reference the descriptor set
402 * and use the descriptor offset as the base. Inline uniforms always
403 * use nir_address_format_32bit_index_offset
405 assert(intrin
->dest
.ssa
.num_components
== 2);
406 assert(intrin
->dest
.ssa
.bit_size
== 32);
407 index
= nir_imm_ivec2(b
, state
->set
[set
].desc_offset
,
408 bind_layout
->descriptor_offset
);
410 /* We're using nir_address_format_32bit_index_offset */
411 assert(intrin
->dest
.ssa
.num_components
== 2);
412 assert(intrin
->dest
.ssa
.bit_size
== 32);
413 index
= nir_vec2(b
, nir_iadd_imm(b
, array_index
, surface_index
),
417 assert(intrin
->dest
.is_ssa
);
418 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
, nir_src_for_ssa(index
));
419 nir_instr_remove(&intrin
->instr
);
423 lower_res_reindex_intrinsic(nir_intrinsic_instr
*intrin
,
424 struct apply_pipeline_layout_state
*state
)
426 nir_builder
*b
= &state
->builder
;
428 b
->cursor
= nir_before_instr(&intrin
->instr
);
430 const VkDescriptorType desc_type
= nir_intrinsic_desc_type(intrin
);
432 /* For us, the resource indices are just indices into the binding table and
433 * array elements are sequential. A resource_reindex just turns into an
434 * add of the two indices.
436 assert(intrin
->src
[0].is_ssa
&& intrin
->src
[1].is_ssa
);
437 nir_ssa_def
*old_index
= intrin
->src
[0].ssa
;
438 nir_ssa_def
*offset
= intrin
->src
[1].ssa
;
440 nir_ssa_def
*new_index
;
441 if (state
->pdevice
->has_a64_buffer_access
&&
442 (desc_type
== VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
||
443 desc_type
== VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC
)) {
444 if (state
->add_bounds_checks
) {
445 /* We're using nir_address_format_64bit_bounded_global */
446 assert(intrin
->dest
.ssa
.num_components
== 4);
447 assert(intrin
->dest
.ssa
.bit_size
== 32);
448 new_index
= nir_vec4(b
, nir_channel(b
, old_index
, 0),
449 nir_iadd(b
, nir_channel(b
, old_index
, 1),
451 nir_channel(b
, old_index
, 2),
452 nir_ssa_undef(b
, 1, 32));
454 /* We're using nir_address_format_64bit_global */
455 assert(intrin
->dest
.ssa
.num_components
== 1);
456 assert(intrin
->dest
.ssa
.bit_size
== 64);
457 nir_ssa_def
*base
= nir_unpack_64_2x32_split_x(b
, old_index
);
458 nir_ssa_def
*arr_idx
= nir_unpack_64_2x32_split_y(b
, old_index
);
459 new_index
= nir_pack_64_2x32_split(b
, base
, nir_iadd(b
, arr_idx
, offset
));
462 /* We're using nir_address_format_32bit_index_offset */
463 assert(intrin
->dest
.ssa
.num_components
== 2);
464 assert(intrin
->dest
.ssa
.bit_size
== 32);
465 new_index
= nir_vec2(b
, nir_iadd(b
, nir_channel(b
, old_index
, 0), offset
),
466 nir_channel(b
, old_index
, 1));
469 assert(intrin
->dest
.is_ssa
);
470 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
, nir_src_for_ssa(new_index
));
471 nir_instr_remove(&intrin
->instr
);
475 build_ssbo_descriptor_load(const VkDescriptorType desc_type
,
477 struct apply_pipeline_layout_state
*state
)
479 nir_builder
*b
= &state
->builder
;
481 nir_ssa_def
*desc_offset
, *array_index
;
482 if (state
->add_bounds_checks
) {
483 /* We're using nir_address_format_64bit_bounded_global */
484 desc_offset
= nir_channel(b
, index
, 0);
485 array_index
= nir_umin(b
, nir_channel(b
, index
, 1),
486 nir_channel(b
, index
, 2));
488 desc_offset
= nir_unpack_64_2x32_split_x(b
, index
);
489 array_index
= nir_unpack_64_2x32_split_y(b
, index
);
492 /* The desc_offset is actually 16.8.8 */
493 nir_ssa_def
*desc_buffer_index
=
494 nir_extract_u8(b
, desc_offset
, nir_imm_int(b
, 1));
495 nir_ssa_def
*desc_offset_base
=
496 nir_extract_u16(b
, desc_offset
, nir_imm_int(b
, 1));
498 /* Compute the actual descriptor offset */
499 const unsigned descriptor_size
=
500 anv_descriptor_type_size(state
->pdevice
, desc_type
);
501 desc_offset
= nir_iadd(b
, desc_offset_base
,
502 nir_imul_imm(b
, array_index
, descriptor_size
));
504 nir_intrinsic_instr
*desc_load
=
505 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_load_ubo
);
506 desc_load
->src
[0] = nir_src_for_ssa(desc_buffer_index
);
507 desc_load
->src
[1] = nir_src_for_ssa(desc_offset
);
508 desc_load
->num_components
= 4;
509 nir_ssa_dest_init(&desc_load
->instr
, &desc_load
->dest
, 4, 32, NULL
);
510 nir_builder_instr_insert(b
, &desc_load
->instr
);
512 return &desc_load
->dest
.ssa
;
516 lower_load_vulkan_descriptor(nir_intrinsic_instr
*intrin
,
517 struct apply_pipeline_layout_state
*state
)
519 nir_builder
*b
= &state
->builder
;
521 b
->cursor
= nir_before_instr(&intrin
->instr
);
523 const VkDescriptorType desc_type
= nir_intrinsic_desc_type(intrin
);
525 assert(intrin
->src
[0].is_ssa
);
526 nir_ssa_def
*index
= intrin
->src
[0].ssa
;
529 if (state
->pdevice
->has_a64_buffer_access
&&
530 (desc_type
== VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
||
531 desc_type
== VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC
)) {
532 desc
= build_ssbo_descriptor_load(desc_type
, index
, state
);
534 /* We want nir_address_format_64bit_global */
535 if (!state
->add_bounds_checks
)
536 desc
= nir_pack_64_2x32(b
, nir_channels(b
, desc
, 0x3));
538 if (state
->dynamic_offset_uniform_start
>= 0) {
539 /* This shader has dynamic offsets and we have no way of knowing
540 * (save from the dynamic offset base index) if this buffer has a
543 nir_ssa_def
*desc_offset
, *array_index
;
544 if (state
->add_bounds_checks
) {
545 /* We're using nir_address_format_64bit_bounded_global */
546 desc_offset
= nir_channel(b
, index
, 0);
547 array_index
= nir_umin(b
, nir_channel(b
, index
, 1),
548 nir_channel(b
, index
, 2));
550 desc_offset
= nir_unpack_64_2x32_split_x(b
, index
);
551 array_index
= nir_unpack_64_2x32_split_y(b
, index
);
554 nir_ssa_def
*dyn_offset_base
=
555 nir_extract_u8(b
, desc_offset
, nir_imm_int(b
, 0));
556 nir_ssa_def
*dyn_offset_idx
=
557 nir_iadd(b
, dyn_offset_base
, array_index
);
558 if (state
->add_bounds_checks
) {
559 dyn_offset_idx
= nir_umin(b
, dyn_offset_idx
,
560 nir_imm_int(b
, MAX_DYNAMIC_BUFFERS
));
563 nir_intrinsic_instr
*dyn_load
=
564 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_load_uniform
);
565 nir_intrinsic_set_base(dyn_load
, state
->dynamic_offset_uniform_start
);
566 nir_intrinsic_set_range(dyn_load
, MAX_DYNAMIC_BUFFERS
* 4);
567 dyn_load
->src
[0] = nir_src_for_ssa(nir_imul_imm(b
, dyn_offset_idx
, 4));
568 dyn_load
->num_components
= 1;
569 nir_ssa_dest_init(&dyn_load
->instr
, &dyn_load
->dest
, 1, 32, NULL
);
570 nir_builder_instr_insert(b
, &dyn_load
->instr
);
572 nir_ssa_def
*dynamic_offset
=
573 nir_bcsel(b
, nir_ieq(b
, dyn_offset_base
, nir_imm_int(b
, 0xff)),
574 nir_imm_int(b
, 0), &dyn_load
->dest
.ssa
);
576 if (state
->add_bounds_checks
) {
577 /* The dynamic offset gets added to the base pointer so that we
578 * have a sliding window range.
580 * We're using nir_address_format_64bit_bounded_global.
582 nir_ssa_def
*base_ptr
=
583 nir_pack_64_2x32(b
, nir_channels(b
, desc
, 0x3));
584 base_ptr
= nir_iadd(b
, base_ptr
, nir_u2u64(b
, dynamic_offset
));
585 desc
= nir_vec4(b
, nir_unpack_64_2x32_split_x(b
, base_ptr
),
586 nir_unpack_64_2x32_split_y(b
, base_ptr
),
587 nir_channel(b
, desc
, 2),
588 nir_channel(b
, desc
, 3));
590 /* We're using nir_address_format_64bit_global */
591 desc
= nir_iadd(b
, desc
, nir_u2u64(b
, dynamic_offset
));
595 /* We follow the nir_address_format_32bit_index_offset model */
599 assert(intrin
->dest
.is_ssa
);
600 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
, nir_src_for_ssa(desc
));
601 nir_instr_remove(&intrin
->instr
);
605 lower_get_buffer_size(nir_intrinsic_instr
*intrin
,
606 struct apply_pipeline_layout_state
*state
)
608 if (_mesa_set_search(state
->lowered_instrs
, intrin
))
611 nir_builder
*b
= &state
->builder
;
613 b
->cursor
= nir_before_instr(&intrin
->instr
);
615 const VkDescriptorType desc_type
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
;
617 assert(intrin
->src
[0].is_ssa
);
618 nir_ssa_def
*index
= intrin
->src
[0].ssa
;
620 if (state
->pdevice
->has_a64_buffer_access
) {
621 nir_ssa_def
*desc
= build_ssbo_descriptor_load(desc_type
, index
, state
);
622 nir_ssa_def
*size
= nir_channel(b
, desc
, 2);
623 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
, nir_src_for_ssa(size
));
624 nir_instr_remove(&intrin
->instr
);
626 /* We're following the nir_address_format_32bit_index_offset model so
627 * the binding table index is the first component of the address. The
628 * back-end wants a scalar binding table index source.
630 nir_instr_rewrite_src(&intrin
->instr
, &intrin
->src
[0],
631 nir_src_for_ssa(nir_channel(b
, index
, 0)));
636 build_descriptor_load(nir_deref_instr
*deref
, unsigned offset
,
637 unsigned num_components
, unsigned bit_size
,
638 struct apply_pipeline_layout_state
*state
)
640 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
642 unsigned set
= var
->data
.descriptor_set
;
643 unsigned binding
= var
->data
.binding
;
644 unsigned array_size
=
645 state
->layout
->set
[set
].layout
->binding
[binding
].array_size
;
647 const struct anv_descriptor_set_binding_layout
*bind_layout
=
648 &state
->layout
->set
[set
].layout
->binding
[binding
];
650 nir_builder
*b
= &state
->builder
;
652 nir_ssa_def
*desc_buffer_index
=
653 nir_imm_int(b
, state
->set
[set
].desc_offset
);
655 nir_ssa_def
*desc_offset
=
656 nir_imm_int(b
, bind_layout
->descriptor_offset
+ offset
);
657 if (deref
->deref_type
!= nir_deref_type_var
) {
658 assert(deref
->deref_type
== nir_deref_type_array
);
660 const unsigned descriptor_size
= anv_descriptor_size(bind_layout
);
661 nir_ssa_def
*arr_index
= nir_ssa_for_src(b
, deref
->arr
.index
, 1);
662 if (state
->add_bounds_checks
)
663 arr_index
= nir_umin(b
, arr_index
, nir_imm_int(b
, array_size
- 1));
665 desc_offset
= nir_iadd(b
, desc_offset
,
666 nir_imul_imm(b
, arr_index
, descriptor_size
));
669 nir_intrinsic_instr
*desc_load
=
670 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_load_ubo
);
671 desc_load
->src
[0] = nir_src_for_ssa(desc_buffer_index
);
672 desc_load
->src
[1] = nir_src_for_ssa(desc_offset
);
673 desc_load
->num_components
= num_components
;
674 nir_ssa_dest_init(&desc_load
->instr
, &desc_load
->dest
,
675 num_components
, bit_size
, NULL
);
676 nir_builder_instr_insert(b
, &desc_load
->instr
);
678 return &desc_load
->dest
.ssa
;
682 lower_image_intrinsic(nir_intrinsic_instr
*intrin
,
683 struct apply_pipeline_layout_state
*state
)
685 nir_deref_instr
*deref
= nir_src_as_deref(intrin
->src
[0]);
686 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
688 nir_builder
*b
= &state
->builder
;
689 b
->cursor
= nir_before_instr(&intrin
->instr
);
691 const bool use_bindless
= state
->pdevice
->has_bindless_images
;
693 if (intrin
->intrinsic
== nir_intrinsic_image_deref_load_param_intel
) {
694 b
->cursor
= nir_instr_remove(&intrin
->instr
);
696 assert(!use_bindless
); /* Otherwise our offsets would be wrong */
697 const unsigned param
= nir_intrinsic_base(intrin
);
700 build_descriptor_load(deref
, param
* 16,
701 intrin
->dest
.ssa
.num_components
,
702 intrin
->dest
.ssa
.bit_size
, state
);
704 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
, nir_src_for_ssa(desc
));
705 } else if (use_bindless
) {
706 const bool write_only
=
707 (var
->data
.image
.access
& ACCESS_NON_READABLE
) != 0;
709 build_descriptor_load(deref
, 0, 2, 32, state
);
710 nir_ssa_def
*handle
= nir_channel(b
, desc
, write_only
? 1 : 0);
711 nir_rewrite_image_intrinsic(intrin
, handle
, true);
713 unsigned set
= var
->data
.descriptor_set
;
714 unsigned binding
= var
->data
.binding
;
715 unsigned binding_offset
= state
->set
[set
].surface_offsets
[binding
];
716 unsigned array_size
=
717 state
->layout
->set
[set
].layout
->binding
[binding
].array_size
;
719 nir_ssa_def
*index
= NULL
;
720 if (deref
->deref_type
!= nir_deref_type_var
) {
721 assert(deref
->deref_type
== nir_deref_type_array
);
722 index
= nir_ssa_for_src(b
, deref
->arr
.index
, 1);
723 if (state
->add_bounds_checks
)
724 index
= nir_umin(b
, index
, nir_imm_int(b
, array_size
- 1));
726 index
= nir_imm_int(b
, 0);
729 index
= nir_iadd_imm(b
, index
, binding_offset
);
730 nir_rewrite_image_intrinsic(intrin
, index
, false);
735 lower_load_constant(nir_intrinsic_instr
*intrin
,
736 struct apply_pipeline_layout_state
*state
)
738 nir_builder
*b
= &state
->builder
;
740 b
->cursor
= nir_before_instr(&intrin
->instr
);
742 nir_ssa_def
*index
= nir_imm_int(b
, state
->constants_offset
);
743 nir_ssa_def
*offset
= nir_iadd(b
, nir_ssa_for_src(b
, intrin
->src
[0], 1),
744 nir_imm_int(b
, nir_intrinsic_base(intrin
)));
746 nir_intrinsic_instr
*load_ubo
=
747 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_load_ubo
);
748 load_ubo
->num_components
= intrin
->num_components
;
749 load_ubo
->src
[0] = nir_src_for_ssa(index
);
750 load_ubo
->src
[1] = nir_src_for_ssa(offset
);
751 nir_ssa_dest_init(&load_ubo
->instr
, &load_ubo
->dest
,
752 intrin
->dest
.ssa
.num_components
,
753 intrin
->dest
.ssa
.bit_size
, NULL
);
754 nir_builder_instr_insert(b
, &load_ubo
->instr
);
756 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
,
757 nir_src_for_ssa(&load_ubo
->dest
.ssa
));
758 nir_instr_remove(&intrin
->instr
);
762 lower_tex_deref(nir_tex_instr
*tex
, nir_tex_src_type deref_src_type
,
763 unsigned *base_index
, unsigned plane
,
764 struct apply_pipeline_layout_state
*state
)
766 int deref_src_idx
= nir_tex_instr_src_index(tex
, deref_src_type
);
767 if (deref_src_idx
< 0)
770 nir_deref_instr
*deref
= nir_src_as_deref(tex
->src
[deref_src_idx
].src
);
771 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
773 unsigned set
= var
->data
.descriptor_set
;
774 unsigned binding
= var
->data
.binding
;
775 unsigned array_size
=
776 state
->layout
->set
[set
].layout
->binding
[binding
].array_size
;
778 unsigned binding_offset
;
779 if (deref_src_type
== nir_tex_src_texture_deref
) {
780 binding_offset
= state
->set
[set
].surface_offsets
[binding
];
782 assert(deref_src_type
== nir_tex_src_sampler_deref
);
783 binding_offset
= state
->set
[set
].sampler_offsets
[binding
];
786 nir_builder
*b
= &state
->builder
;
788 nir_tex_src_type offset_src_type
;
789 nir_ssa_def
*index
= NULL
;
790 if (binding_offset
> MAX_BINDING_TABLE_SIZE
) {
791 const unsigned plane_offset
=
792 plane
* sizeof(struct anv_sampled_image_descriptor
);
795 build_descriptor_load(deref
, plane_offset
, 2, 32, state
);
797 if (deref_src_type
== nir_tex_src_texture_deref
) {
798 offset_src_type
= nir_tex_src_texture_handle
;
799 index
= nir_channel(b
, desc
, 0);
801 assert(deref_src_type
== nir_tex_src_sampler_deref
);
802 offset_src_type
= nir_tex_src_sampler_handle
;
803 index
= nir_channel(b
, desc
, 1);
806 if (deref_src_type
== nir_tex_src_texture_deref
) {
807 offset_src_type
= nir_tex_src_texture_offset
;
809 assert(deref_src_type
== nir_tex_src_sampler_deref
);
810 offset_src_type
= nir_tex_src_sampler_offset
;
813 *base_index
= binding_offset
+ plane
;
815 if (deref
->deref_type
!= nir_deref_type_var
) {
816 assert(deref
->deref_type
== nir_deref_type_array
);
818 if (nir_src_is_const(deref
->arr
.index
)) {
819 unsigned arr_index
= nir_src_as_uint(deref
->arr
.index
);
820 *base_index
+= MIN2(arr_index
, array_size
- 1);
822 /* From VK_KHR_sampler_ycbcr_conversion:
824 * If sampler Y’CBCR conversion is enabled, the combined image
825 * sampler must be indexed only by constant integral expressions
826 * when aggregated into arrays in shader code, irrespective of
827 * the shaderSampledImageArrayDynamicIndexing feature.
829 assert(nir_tex_instr_src_index(tex
, nir_tex_src_plane
) == -1);
831 index
= nir_ssa_for_src(b
, deref
->arr
.index
, 1);
833 if (state
->add_bounds_checks
)
834 index
= nir_umin(b
, index
, nir_imm_int(b
, array_size
- 1));
840 nir_instr_rewrite_src(&tex
->instr
, &tex
->src
[deref_src_idx
].src
,
841 nir_src_for_ssa(index
));
842 tex
->src
[deref_src_idx
].src_type
= offset_src_type
;
844 nir_tex_instr_remove_src(tex
, deref_src_idx
);
849 tex_instr_get_and_remove_plane_src(nir_tex_instr
*tex
)
851 int plane_src_idx
= nir_tex_instr_src_index(tex
, nir_tex_src_plane
);
852 if (plane_src_idx
< 0)
855 unsigned plane
= nir_src_as_uint(tex
->src
[plane_src_idx
].src
);
857 nir_tex_instr_remove_src(tex
, plane_src_idx
);
863 lower_tex(nir_tex_instr
*tex
, struct apply_pipeline_layout_state
*state
)
865 state
->builder
.cursor
= nir_before_instr(&tex
->instr
);
867 unsigned plane
= tex_instr_get_and_remove_plane_src(tex
);
869 lower_tex_deref(tex
, nir_tex_src_texture_deref
,
870 &tex
->texture_index
, plane
, state
);
872 lower_tex_deref(tex
, nir_tex_src_sampler_deref
,
873 &tex
->sampler_index
, plane
, state
);
875 /* The backend only ever uses this to mark used surfaces. We don't care
876 * about that little optimization so it just needs to be non-zero.
878 tex
->texture_array_size
= 1;
882 apply_pipeline_layout_block(nir_block
*block
,
883 struct apply_pipeline_layout_state
*state
)
885 nir_foreach_instr_safe(instr
, block
) {
886 switch (instr
->type
) {
887 case nir_instr_type_intrinsic
: {
888 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
889 switch (intrin
->intrinsic
) {
890 case nir_intrinsic_vulkan_resource_index
:
891 lower_res_index_intrinsic(intrin
, state
);
893 case nir_intrinsic_vulkan_resource_reindex
:
894 lower_res_reindex_intrinsic(intrin
, state
);
896 case nir_intrinsic_load_vulkan_descriptor
:
897 lower_load_vulkan_descriptor(intrin
, state
);
899 case nir_intrinsic_get_buffer_size
:
900 lower_get_buffer_size(intrin
, state
);
902 case nir_intrinsic_image_deref_load
:
903 case nir_intrinsic_image_deref_store
:
904 case nir_intrinsic_image_deref_atomic_add
:
905 case nir_intrinsic_image_deref_atomic_min
:
906 case nir_intrinsic_image_deref_atomic_max
:
907 case nir_intrinsic_image_deref_atomic_and
:
908 case nir_intrinsic_image_deref_atomic_or
:
909 case nir_intrinsic_image_deref_atomic_xor
:
910 case nir_intrinsic_image_deref_atomic_exchange
:
911 case nir_intrinsic_image_deref_atomic_comp_swap
:
912 case nir_intrinsic_image_deref_size
:
913 case nir_intrinsic_image_deref_samples
:
914 case nir_intrinsic_image_deref_load_param_intel
:
915 case nir_intrinsic_image_deref_load_raw_intel
:
916 case nir_intrinsic_image_deref_store_raw_intel
:
917 lower_image_intrinsic(intrin
, state
);
919 case nir_intrinsic_load_constant
:
920 lower_load_constant(intrin
, state
);
927 case nir_instr_type_tex
:
928 lower_tex(nir_instr_as_tex(instr
), state
);
936 struct binding_info
{
943 compare_binding_infos(const void *_a
, const void *_b
)
945 const struct binding_info
*a
= _a
, *b
= _b
;
946 if (a
->score
!= b
->score
)
947 return b
->score
- a
->score
;
949 if (a
->set
!= b
->set
)
950 return a
->set
- b
->set
;
952 return a
->binding
- b
->binding
;
956 anv_nir_apply_pipeline_layout(const struct anv_physical_device
*pdevice
,
957 bool robust_buffer_access
,
958 struct anv_pipeline_layout
*layout
,
960 struct brw_stage_prog_data
*prog_data
,
961 struct anv_pipeline_bind_map
*map
)
963 void *mem_ctx
= ralloc_context(NULL
);
965 struct apply_pipeline_layout_state state
= {
969 .add_bounds_checks
= robust_buffer_access
,
970 .lowered_instrs
= _mesa_pointer_set_create(mem_ctx
),
971 .dynamic_offset_uniform_start
= -1,
974 for (unsigned s
= 0; s
< layout
->num_sets
; s
++) {
975 const unsigned count
= layout
->set
[s
].layout
->binding_count
;
976 state
.set
[s
].use_count
= rzalloc_array(mem_ctx
, uint8_t, count
);
977 state
.set
[s
].surface_offsets
= rzalloc_array(mem_ctx
, uint8_t, count
);
978 state
.set
[s
].sampler_offsets
= rzalloc_array(mem_ctx
, uint8_t, count
);
981 nir_foreach_function(function
, shader
) {
985 nir_foreach_block(block
, function
->impl
)
986 get_used_bindings_block(block
, &state
);
989 for (unsigned s
= 0; s
< layout
->num_sets
; s
++) {
990 if (state
.set
[s
].desc_buffer_used
) {
991 map
->surface_to_descriptor
[map
->surface_count
] =
992 (struct anv_pipeline_binding
) {
993 .set
= ANV_DESCRIPTOR_SET_DESCRIPTORS
,
996 state
.set
[s
].desc_offset
= map
->surface_count
;
997 map
->surface_count
++;
1001 if (state
.uses_constants
) {
1002 state
.constants_offset
= map
->surface_count
;
1003 map
->surface_to_descriptor
[map
->surface_count
].set
=
1004 ANV_DESCRIPTOR_SET_SHADER_CONSTANTS
;
1005 map
->surface_count
++;
1008 unsigned used_binding_count
= 0;
1009 for (uint32_t set
= 0; set
< layout
->num_sets
; set
++) {
1010 struct anv_descriptor_set_layout
*set_layout
= layout
->set
[set
].layout
;
1011 for (unsigned b
= 0; b
< set_layout
->binding_count
; b
++) {
1012 if (state
.set
[set
].use_count
[b
] == 0)
1015 used_binding_count
++;
1019 struct binding_info
*infos
=
1020 rzalloc_array(mem_ctx
, struct binding_info
, used_binding_count
);
1021 used_binding_count
= 0;
1022 for (uint32_t set
= 0; set
< layout
->num_sets
; set
++) {
1023 struct anv_descriptor_set_layout
*set_layout
= layout
->set
[set
].layout
;
1024 for (unsigned b
= 0; b
< set_layout
->binding_count
; b
++) {
1025 if (state
.set
[set
].use_count
[b
] == 0)
1028 struct anv_descriptor_set_binding_layout
*binding
=
1029 &layout
->set
[set
].layout
->binding
[b
];
1031 /* Do a fixed-point calculation to generate a score based on the
1032 * number of uses and the binding array size. We shift by 7 instead
1033 * of 8 because we're going to use the top bit below to make
1034 * everything which does not support bindless super higher priority
1035 * than things which do.
1037 uint16_t score
= ((uint16_t)state
.set
[set
].use_count
[b
] << 7) /
1038 binding
->array_size
;
1040 /* If the descriptor type doesn't support bindless then put it at the
1041 * beginning so we guarantee it gets a slot.
1043 if (!anv_descriptor_supports_bindless(pdevice
, binding
, true) ||
1044 !anv_descriptor_supports_bindless(pdevice
, binding
, false))
1047 infos
[used_binding_count
++] = (struct binding_info
) {
1055 /* Order the binding infos based on score with highest scores first. If
1056 * scores are equal we then order by set and binding.
1058 qsort(infos
, used_binding_count
, sizeof(struct binding_info
),
1059 compare_binding_infos
);
1061 bool have_dynamic_buffers
= false;
1063 for (unsigned i
= 0; i
< used_binding_count
; i
++) {
1064 unsigned set
= infos
[i
].set
, b
= infos
[i
].binding
;
1065 struct anv_descriptor_set_binding_layout
*binding
=
1066 &layout
->set
[set
].layout
->binding
[b
];
1068 if (binding
->dynamic_offset_index
>= 0)
1069 have_dynamic_buffers
= true;
1071 const uint32_t array_size
= binding
->array_size
;
1073 if (binding
->data
& ANV_DESCRIPTOR_SURFACE_STATE
) {
1074 if (map
->surface_count
+ array_size
> MAX_BINDING_TABLE_SIZE
||
1075 anv_descriptor_requires_bindless(pdevice
, binding
, false)) {
1076 /* If this descriptor doesn't fit in the binding table or if it
1077 * requires bindless for some reason, flag it as bindless.
1079 assert(anv_descriptor_supports_bindless(pdevice
, binding
, false));
1080 state
.set
[set
].surface_offsets
[b
] = BINDLESS_OFFSET
;
1082 state
.set
[set
].surface_offsets
[b
] = map
->surface_count
;
1083 struct anv_sampler
**samplers
= binding
->immutable_samplers
;
1084 for (unsigned i
= 0; i
< binding
->array_size
; i
++) {
1085 uint8_t planes
= samplers
? samplers
[i
]->n_planes
: 1;
1086 for (uint8_t p
= 0; p
< planes
; p
++) {
1087 map
->surface_to_descriptor
[map
->surface_count
++] =
1088 (struct anv_pipeline_binding
) {
1097 assert(map
->surface_count
<= MAX_BINDING_TABLE_SIZE
);
1100 if (binding
->data
& ANV_DESCRIPTOR_SAMPLER_STATE
) {
1101 if (map
->sampler_count
+ array_size
> MAX_SAMPLER_TABLE_SIZE
||
1102 anv_descriptor_requires_bindless(pdevice
, binding
, true)) {
1103 /* If this descriptor doesn't fit in the binding table or if it
1104 * requires bindless for some reason, flag it as bindless.
1106 * We also make large sampler arrays bindless because we can avoid
1107 * using indirect sends thanks to bindless samplers being packed
1108 * less tightly than the sampler table.
1110 assert(anv_descriptor_supports_bindless(pdevice
, binding
, true));
1111 state
.set
[set
].sampler_offsets
[b
] = BINDLESS_OFFSET
;
1113 state
.set
[set
].sampler_offsets
[b
] = map
->sampler_count
;
1114 struct anv_sampler
**samplers
= binding
->immutable_samplers
;
1115 for (unsigned i
= 0; i
< binding
->array_size
; i
++) {
1116 uint8_t planes
= samplers
? samplers
[i
]->n_planes
: 1;
1117 for (uint8_t p
= 0; p
< planes
; p
++) {
1118 map
->sampler_to_descriptor
[map
->sampler_count
++] =
1119 (struct anv_pipeline_binding
) {
1131 if (have_dynamic_buffers
) {
1132 state
.dynamic_offset_uniform_start
= shader
->num_uniforms
;
1133 uint32_t *param
= brw_stage_prog_data_add_params(prog_data
,
1134 MAX_DYNAMIC_BUFFERS
);
1135 for (unsigned i
= 0; i
< MAX_DYNAMIC_BUFFERS
; i
++)
1136 param
[i
] = ANV_PARAM_DYN_OFFSET(i
);
1137 shader
->num_uniforms
+= MAX_DYNAMIC_BUFFERS
* 4;
1138 assert(shader
->num_uniforms
== prog_data
->nr_params
* 4);
1141 nir_foreach_variable(var
, &shader
->uniforms
) {
1142 const struct glsl_type
*glsl_type
= glsl_without_array(var
->type
);
1144 if (!glsl_type_is_image(glsl_type
))
1147 enum glsl_sampler_dim dim
= glsl_get_sampler_dim(glsl_type
);
1149 const uint32_t set
= var
->data
.descriptor_set
;
1150 const uint32_t binding
= var
->data
.binding
;
1151 const uint32_t array_size
=
1152 layout
->set
[set
].layout
->binding
[binding
].array_size
;
1154 if (state
.set
[set
].use_count
[binding
] == 0)
1157 if (state
.set
[set
].surface_offsets
[binding
] >= MAX_BINDING_TABLE_SIZE
)
1160 struct anv_pipeline_binding
*pipe_binding
=
1161 &map
->surface_to_descriptor
[state
.set
[set
].surface_offsets
[binding
]];
1162 for (unsigned i
= 0; i
< array_size
; i
++) {
1163 assert(pipe_binding
[i
].set
== set
);
1164 assert(pipe_binding
[i
].binding
== binding
);
1165 assert(pipe_binding
[i
].index
== i
);
1167 if (dim
== GLSL_SAMPLER_DIM_SUBPASS
||
1168 dim
== GLSL_SAMPLER_DIM_SUBPASS_MS
)
1169 pipe_binding
[i
].input_attachment_index
= var
->data
.index
+ i
;
1171 pipe_binding
[i
].write_only
=
1172 (var
->data
.image
.access
& ACCESS_NON_READABLE
) != 0;
1176 nir_foreach_function(function
, shader
) {
1177 if (!function
->impl
)
1180 /* Before we do the normal lowering, we look for any SSBO operations
1181 * that we can lower to the BTI model and lower them up-front. The BTI
1182 * model can perform better than the A64 model for a couple reasons:
1184 * 1. 48-bit address calculations are potentially expensive and using
1185 * the BTI model lets us simply compute 32-bit offsets and the
1186 * hardware adds the 64-bit surface base address.
1188 * 2. The BTI messages, because they use surface states, do bounds
1189 * checking for us. With the A64 model, we have to do our own
1190 * bounds checking and this means wider pointers and extra
1191 * calculations and branching in the shader.
1193 * The solution to both of these is to convert things to the BTI model
1194 * opportunistically. The reason why we need to do this as a pre-pass
1195 * is for two reasons:
1197 * 1. The BTI model requires nir_address_format_32bit_index_offset
1198 * pointers which are not the same type as the pointers needed for
1199 * the A64 model. Because all our derefs are set up for the A64
1200 * model (in case we have variable pointers), we have to crawl all
1201 * the way back to the vulkan_resource_index intrinsic and build a
1202 * completely fresh index+offset calculation.
1204 * 2. Because the variable-pointers-capable lowering that we do as part
1205 * of apply_pipeline_layout_block is destructive (It really has to
1206 * be to handle variable pointers properly), we've lost the deref
1207 * information by the time we get to the load/store/atomic
1208 * intrinsics in that pass.
1210 lower_direct_buffer_access(function
->impl
, &state
);
1212 nir_builder_init(&state
.builder
, function
->impl
);
1213 nir_foreach_block(block
, function
->impl
)
1214 apply_pipeline_layout_block(block
, &state
);
1215 nir_metadata_preserve(function
->impl
, nir_metadata_block_index
|
1216 nir_metadata_dominance
);
1219 ralloc_free(mem_ctx
);