X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fvulkan%2Fanv_nir_apply_pipeline_layout.c;h=1d13aa604924e68f888e066a3ecd9bf0608519cd;hb=f118ca20758c85da1aaf1792e61aadb298b32a47;hp=7abc27be1032b69b4c15dcc1558f05c472435763;hpb=3cf78ec2bdc22833082d026d59ccb28d79b07f6f;p=mesa.git diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c index 7abc27be103..1d13aa60492 100644 --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -41,10 +41,13 @@ struct apply_pipeline_layout_state { struct anv_pipeline_layout *layout; bool add_bounds_checks; + nir_address_format ssbo_addr_format; /* Place to flag lowered instructions so we don't lower them twice */ struct set *lowered_instrs; + int dynamic_offset_uniform_start; + bool uses_constants; uint8_t constants_offset; struct { @@ -111,8 +114,10 @@ get_used_bindings_block(nir_block *block, case nir_intrinsic_image_deref_load: case nir_intrinsic_image_deref_store: case nir_intrinsic_image_deref_atomic_add: - case nir_intrinsic_image_deref_atomic_min: - case nir_intrinsic_image_deref_atomic_max: + case nir_intrinsic_image_deref_atomic_imin: + case nir_intrinsic_image_deref_atomic_umin: + case nir_intrinsic_image_deref_atomic_imax: + case nir_intrinsic_image_deref_atomic_umax: case nir_intrinsic_image_deref_atomic_and: case nir_intrinsic_image_deref_atomic_or: case nir_intrinsic_image_deref_atomic_xor: @@ -159,7 +164,12 @@ find_descriptor_for_index_src(nir_src src, if (!intrin || intrin->intrinsic != nir_intrinsic_vulkan_resource_index) return false; - return true; + uint32_t set = nir_intrinsic_desc_set(intrin); + uint32_t binding = nir_intrinsic_binding(intrin); + uint32_t surface_index = state->set[set].surface_offsets[binding]; + + /* Only lower to a BTI message if we have a valid binding table index. */ + return surface_index < MAX_BINDING_TABLE_SIZE; } static bool @@ -246,7 +256,7 @@ build_index_offset_for_deref(nir_deref_instr *deref, } static bool -try_lower_direct_buffer_intrinsic(nir_intrinsic_instr *intrin, +try_lower_direct_buffer_intrinsic(nir_intrinsic_instr *intrin, bool is_atomic, struct apply_pipeline_layout_state *state) { nir_builder *b = &state->builder; @@ -255,6 +265,18 @@ try_lower_direct_buffer_intrinsic(nir_intrinsic_instr *intrin, if (deref->mode != nir_var_mem_ssbo) return false; + /* 64-bit atomics only support A64 messages so we can't lower them to the + * index+offset model. + */ + if (is_atomic && nir_dest_bit_size(intrin->dest) == 64) + return false; + + /* Normal binding table-based messages can't handle non-uniform access so + * we have to fall back to A64. + */ + if (nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM) + return false; + if (!nir_deref_find_descriptor(deref, state)) return false; @@ -279,6 +301,8 @@ lower_direct_buffer_access(nir_function_impl *impl, switch (intrin->intrinsic) { case nir_intrinsic_load_deref: case nir_intrinsic_store_deref: + try_lower_direct_buffer_intrinsic(intrin, false, state); + break; case nir_intrinsic_deref_atomic_add: case nir_intrinsic_deref_atomic_imin: case nir_intrinsic_deref_atomic_umin: @@ -292,7 +316,7 @@ lower_direct_buffer_access(nir_function_impl *impl, case nir_intrinsic_deref_atomic_fmin: case nir_intrinsic_deref_atomic_fmax: case nir_intrinsic_deref_atomic_fcomp_swap: - try_lower_direct_buffer_intrinsic(intrin, state); + try_lower_direct_buffer_intrinsic(intrin, true, state); break; case nir_intrinsic_get_buffer_size: { @@ -317,6 +341,15 @@ lower_direct_buffer_access(nir_function_impl *impl, } } +static nir_address_format +desc_addr_format(VkDescriptorType desc_type, + struct apply_pipeline_layout_state *state) +{ + return (desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER || + desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) ? + state->ssbo_addr_format : nir_address_format_32bit_index_offset; +} + static void lower_res_index_intrinsic(nir_intrinsic_instr *intrin, struct apply_pipeline_layout_state *state) @@ -327,6 +360,7 @@ lower_res_index_intrinsic(nir_intrinsic_instr *intrin, uint32_t set = nir_intrinsic_desc_set(intrin); uint32_t binding = nir_intrinsic_binding(intrin); + const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin); const struct anv_descriptor_set_binding_layout *bind_layout = &state->layout->set[set].layout->binding[binding]; @@ -339,14 +373,59 @@ lower_res_index_intrinsic(nir_intrinsic_instr *intrin, array_index = nir_umin(b, array_index, nir_imm_int(b, array_size - 1)); nir_ssa_def *index; - if (bind_layout->data & ANV_DESCRIPTOR_INLINE_UNIFORM) { + if (state->pdevice->has_a64_buffer_access && + (desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER || + desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)) { + /* We store the descriptor offset as 16.8.8 where the top 16 bits are + * the offset into the descriptor set, the next 8 are the binding table + * index of the descriptor buffer, and the bottom 8 bits are the offset + * (in bytes) into the dynamic offset table. + */ + assert(bind_layout->dynamic_offset_index < MAX_DYNAMIC_BUFFERS); + uint32_t dynamic_offset_index = 0xff; /* No dynamic offset */ + if (bind_layout->dynamic_offset_index >= 0) { + dynamic_offset_index = + state->layout->set[set].dynamic_offset_start + + bind_layout->dynamic_offset_index; + } + + const uint32_t desc_offset = + bind_layout->descriptor_offset << 16 | + (uint32_t)state->set[set].desc_offset << 8 | + dynamic_offset_index; + + if (state->add_bounds_checks) { + assert(desc_addr_format(desc_type, state) == + nir_address_format_64bit_bounded_global); + assert(intrin->dest.ssa.num_components == 4); + assert(intrin->dest.ssa.bit_size == 32); + index = nir_vec4(b, nir_imm_int(b, desc_offset), + nir_ssa_for_src(b, intrin->src[0], 1), + nir_imm_int(b, array_size - 1), + nir_ssa_undef(b, 1, 32)); + } else { + assert(desc_addr_format(desc_type, state) == + nir_address_format_64bit_global); + assert(intrin->dest.ssa.num_components == 1); + assert(intrin->dest.ssa.bit_size == 64); + index = nir_pack_64_2x32_split(b, nir_imm_int(b, desc_offset), + nir_ssa_for_src(b, intrin->src[0], 1)); + } + } else if (bind_layout->data & ANV_DESCRIPTOR_INLINE_UNIFORM) { /* This is an inline uniform block. Just reference the descriptor set * and use the descriptor offset as the base. */ + assert(desc_addr_format(desc_type, state) == + nir_address_format_32bit_index_offset); + assert(intrin->dest.ssa.num_components == 2); + assert(intrin->dest.ssa.bit_size == 32); index = nir_imm_ivec2(b, state->set[set].desc_offset, bind_layout->descriptor_offset); } else { - /* We're using nir_address_format_32bit_index_offset */ + assert(desc_addr_format(desc_type, state) == + nir_address_format_32bit_index_offset); + assert(intrin->dest.ssa.num_components == 2); + assert(intrin->dest.ssa.bit_size == 32); index = nir_vec2(b, nir_iadd_imm(b, array_index, surface_index), nir_imm_int(b, 0)); } @@ -364,6 +443,8 @@ lower_res_reindex_intrinsic(nir_intrinsic_instr *intrin, b->cursor = nir_before_instr(&intrin->instr); + const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin); + /* For us, the resource indices are just indices into the binding table and * array elements are sequential. A resource_reindex just turns into an * add of the two indices. @@ -372,15 +453,94 @@ lower_res_reindex_intrinsic(nir_intrinsic_instr *intrin, nir_ssa_def *old_index = intrin->src[0].ssa; nir_ssa_def *offset = intrin->src[1].ssa; - nir_ssa_def *new_index = - nir_vec2(b, nir_iadd(b, nir_channel(b, old_index, 0), offset), - nir_channel(b, old_index, 1)); + nir_ssa_def *new_index; + switch (desc_addr_format(desc_type, state)) { + case nir_address_format_64bit_bounded_global: + /* See also lower_res_index_intrinsic() */ + assert(intrin->dest.ssa.num_components == 4); + assert(intrin->dest.ssa.bit_size == 32); + new_index = nir_vec4(b, nir_channel(b, old_index, 0), + nir_iadd(b, nir_channel(b, old_index, 1), + offset), + nir_channel(b, old_index, 2), + nir_ssa_undef(b, 1, 32)); + break; + + case nir_address_format_64bit_global: { + /* See also lower_res_index_intrinsic() */ + assert(intrin->dest.ssa.num_components == 1); + assert(intrin->dest.ssa.bit_size == 64); + nir_ssa_def *base = nir_unpack_64_2x32_split_x(b, old_index); + nir_ssa_def *arr_idx = nir_unpack_64_2x32_split_y(b, old_index); + new_index = nir_pack_64_2x32_split(b, base, nir_iadd(b, arr_idx, offset)); + break; + } + + case nir_address_format_32bit_index_offset: + assert(intrin->dest.ssa.num_components == 2); + assert(intrin->dest.ssa.bit_size == 32); + new_index = nir_vec2(b, nir_iadd(b, nir_channel(b, old_index, 0), offset), + nir_channel(b, old_index, 1)); + break; + + default: + unreachable("Uhandled address format"); + } assert(intrin->dest.is_ssa); nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(new_index)); nir_instr_remove(&intrin->instr); } +static nir_ssa_def * +build_ssbo_descriptor_load(const VkDescriptorType desc_type, + nir_ssa_def *index, + struct apply_pipeline_layout_state *state) +{ + nir_builder *b = &state->builder; + + nir_ssa_def *desc_offset, *array_index; + switch (state->ssbo_addr_format) { + case nir_address_format_64bit_bounded_global: + /* See also lower_res_index_intrinsic() */ + desc_offset = nir_channel(b, index, 0); + array_index = nir_umin(b, nir_channel(b, index, 1), + nir_channel(b, index, 2)); + break; + + case nir_address_format_64bit_global: + /* See also lower_res_index_intrinsic() */ + desc_offset = nir_unpack_64_2x32_split_x(b, index); + array_index = nir_unpack_64_2x32_split_y(b, index); + break; + + default: + unreachable("Unhandled address format for SSBO"); + } + + /* The desc_offset is actually 16.8.8 */ + nir_ssa_def *desc_buffer_index = + nir_extract_u8(b, desc_offset, nir_imm_int(b, 1)); + nir_ssa_def *desc_offset_base = + nir_extract_u16(b, desc_offset, nir_imm_int(b, 1)); + + /* Compute the actual descriptor offset */ + const unsigned descriptor_size = + anv_descriptor_type_size(state->pdevice, desc_type); + desc_offset = nir_iadd(b, desc_offset_base, + nir_imul_imm(b, array_index, descriptor_size)); + + nir_intrinsic_instr *desc_load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo); + desc_load->src[0] = nir_src_for_ssa(desc_buffer_index); + desc_load->src[1] = nir_src_for_ssa(desc_offset); + desc_load->num_components = 4; + nir_ssa_dest_init(&desc_load->instr, &desc_load->dest, 4, 32, NULL); + nir_builder_instr_insert(b, &desc_load->instr); + + return &desc_load->dest.ssa; +} + static void lower_load_vulkan_descriptor(nir_intrinsic_instr *intrin, struct apply_pipeline_layout_state *state) @@ -389,12 +549,97 @@ lower_load_vulkan_descriptor(nir_intrinsic_instr *intrin, b->cursor = nir_before_instr(&intrin->instr); - /* We follow the nir_address_format_32bit_index_offset model */ + const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin); + assert(intrin->src[0].is_ssa); nir_ssa_def *index = intrin->src[0].ssa; + nir_ssa_def *desc; + if (state->pdevice->has_a64_buffer_access && + (desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER || + desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)) { + desc = build_ssbo_descriptor_load(desc_type, index, state); + + /* We want nir_address_format_64bit_global */ + if (!state->add_bounds_checks) + desc = nir_pack_64_2x32(b, nir_channels(b, desc, 0x3)); + + if (state->dynamic_offset_uniform_start >= 0) { + /* This shader has dynamic offsets and we have no way of knowing + * (save from the dynamic offset base index) if this buffer has a + * dynamic offset. + */ + nir_ssa_def *desc_offset, *array_index; + switch (state->ssbo_addr_format) { + case nir_address_format_64bit_bounded_global: + /* See also lower_res_index_intrinsic() */ + desc_offset = nir_channel(b, index, 0); + array_index = nir_umin(b, nir_channel(b, index, 1), + nir_channel(b, index, 2)); + break; + + case nir_address_format_64bit_global: + /* See also lower_res_index_intrinsic() */ + desc_offset = nir_unpack_64_2x32_split_x(b, index); + array_index = nir_unpack_64_2x32_split_y(b, index); + break; + + default: + unreachable("Unhandled address format for SSBO"); + } + + nir_ssa_def *dyn_offset_base = + nir_extract_u8(b, desc_offset, nir_imm_int(b, 0)); + nir_ssa_def *dyn_offset_idx = + nir_iadd(b, dyn_offset_base, array_index); + if (state->add_bounds_checks) { + dyn_offset_idx = nir_umin(b, dyn_offset_idx, + nir_imm_int(b, MAX_DYNAMIC_BUFFERS)); + } + + nir_intrinsic_instr *dyn_load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform); + nir_intrinsic_set_base(dyn_load, state->dynamic_offset_uniform_start); + nir_intrinsic_set_range(dyn_load, MAX_DYNAMIC_BUFFERS * 4); + dyn_load->src[0] = nir_src_for_ssa(nir_imul_imm(b, dyn_offset_idx, 4)); + dyn_load->num_components = 1; + nir_ssa_dest_init(&dyn_load->instr, &dyn_load->dest, 1, 32, NULL); + nir_builder_instr_insert(b, &dyn_load->instr); + + nir_ssa_def *dynamic_offset = + nir_bcsel(b, nir_ieq(b, dyn_offset_base, nir_imm_int(b, 0xff)), + nir_imm_int(b, 0), &dyn_load->dest.ssa); + + switch (state->ssbo_addr_format) { + case nir_address_format_64bit_bounded_global: { + /* The dynamic offset gets added to the base pointer so that we + * have a sliding window range. + */ + nir_ssa_def *base_ptr = + nir_pack_64_2x32(b, nir_channels(b, desc, 0x3)); + base_ptr = nir_iadd(b, base_ptr, nir_u2u64(b, dynamic_offset)); + desc = nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_ptr), + nir_unpack_64_2x32_split_y(b, base_ptr), + nir_channel(b, desc, 2), + nir_channel(b, desc, 3)); + break; + } + + case nir_address_format_64bit_global: + desc = nir_iadd(b, desc, nir_u2u64(b, dynamic_offset)); + break; + + default: + unreachable("Unhandled address format for SSBO"); + } + } + } else { + /* We follow the nir_address_format_32bit_index_offset model */ + desc = index; + } + assert(intrin->dest.is_ssa); - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(index)); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(desc)); nir_instr_remove(&intrin->instr); } @@ -409,15 +654,24 @@ lower_get_buffer_size(nir_intrinsic_instr *intrin, b->cursor = nir_before_instr(&intrin->instr); + const VkDescriptorType desc_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + assert(intrin->src[0].is_ssa); nir_ssa_def *index = intrin->src[0].ssa; - /* We're following the nir_address_format_32bit_index_offset model so the - * binding table index is the first component of the address. The - * back-end wants a scalar binding table index source. - */ - nir_instr_rewrite_src(&intrin->instr, &intrin->src[0], - nir_src_for_ssa(nir_channel(b, index, 0))); + if (state->pdevice->has_a64_buffer_access) { + nir_ssa_def *desc = build_ssbo_descriptor_load(desc_type, index, state); + nir_ssa_def *size = nir_channel(b, desc, 2); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(size)); + nir_instr_remove(&intrin->instr); + } else { + /* We're following the nir_address_format_32bit_index_offset model so + * the binding table index is the first component of the address. The + * back-end wants a scalar binding table index source. + */ + nir_instr_rewrite_src(&intrin->instr, &intrin->src[0], + nir_src_for_ssa(nir_channel(b, index, 0))); + } } static nir_ssa_def * @@ -471,13 +725,21 @@ lower_image_intrinsic(nir_intrinsic_instr *intrin, struct apply_pipeline_layout_state *state) { nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + + unsigned set = var->data.descriptor_set; + unsigned binding = var->data.binding; + unsigned binding_offset = state->set[set].surface_offsets[binding]; nir_builder *b = &state->builder; b->cursor = nir_before_instr(&intrin->instr); + ASSERTED const bool use_bindless = state->pdevice->has_bindless_images; + if (intrin->intrinsic == nir_intrinsic_image_deref_load_param_intel) { b->cursor = nir_instr_remove(&intrin->instr); + assert(!use_bindless); /* Otherwise our offsets would be wrong */ const unsigned param = nir_intrinsic_base(intrin); nir_ssa_def *desc = @@ -486,12 +748,14 @@ lower_image_intrinsic(nir_intrinsic_instr *intrin, intrin->dest.ssa.bit_size, state); nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(desc)); + } else if (binding_offset > MAX_BINDING_TABLE_SIZE) { + const bool write_only = + (var->data.image.access & ACCESS_NON_READABLE) != 0; + nir_ssa_def *desc = + build_descriptor_load(deref, 0, 2, 32, state); + nir_ssa_def *handle = nir_channel(b, desc, write_only ? 1 : 0); + nir_rewrite_image_intrinsic(intrin, handle, true); } else { - nir_variable *var = nir_deref_instr_get_variable(deref); - - unsigned set = var->data.descriptor_set; - unsigned binding = var->data.binding; - unsigned binding_offset = state->set[set].surface_offsets[binding]; unsigned array_size = state->layout->set[set].layout->binding[binding].array_size; @@ -539,7 +803,7 @@ lower_load_constant(nir_intrinsic_instr *intrin, static void lower_tex_deref(nir_tex_instr *tex, nir_tex_src_type deref_src_type, - unsigned *base_index, + unsigned *base_index, unsigned plane, struct apply_pipeline_layout_state *state) { int deref_src_idx = nir_tex_instr_src_index(tex, deref_src_type); @@ -554,39 +818,77 @@ lower_tex_deref(nir_tex_instr *tex, nir_tex_src_type deref_src_type, unsigned array_size = state->layout->set[set].layout->binding[binding].array_size; - nir_tex_src_type offset_src_type; + unsigned binding_offset; if (deref_src_type == nir_tex_src_texture_deref) { - offset_src_type = nir_tex_src_texture_offset; - *base_index = state->set[set].surface_offsets[binding]; + binding_offset = state->set[set].surface_offsets[binding]; } else { assert(deref_src_type == nir_tex_src_sampler_deref); - offset_src_type = nir_tex_src_sampler_offset; - *base_index = state->set[set].sampler_offsets[binding]; + binding_offset = state->set[set].sampler_offsets[binding]; } + nir_builder *b = &state->builder; + + nir_tex_src_type offset_src_type; nir_ssa_def *index = NULL; - if (deref->deref_type != nir_deref_type_var) { - assert(deref->deref_type == nir_deref_type_array); + if (binding_offset > MAX_BINDING_TABLE_SIZE) { + const unsigned plane_offset = + plane * sizeof(struct anv_sampled_image_descriptor); + + nir_ssa_def *desc = + build_descriptor_load(deref, plane_offset, 2, 32, state); - if (nir_src_is_const(deref->arr.index)) { - unsigned arr_index = nir_src_as_uint(deref->arr.index); - *base_index += MIN2(arr_index, array_size - 1); + if (deref_src_type == nir_tex_src_texture_deref) { + offset_src_type = nir_tex_src_texture_handle; + index = nir_channel(b, desc, 0); } else { - nir_builder *b = &state->builder; - - /* From VK_KHR_sampler_ycbcr_conversion: - * - * If sampler Y’CBCR conversion is enabled, the combined image - * sampler must be indexed only by constant integral expressions when - * aggregated into arrays in shader code, irrespective of the - * shaderSampledImageArrayDynamicIndexing feature. - */ - assert(nir_tex_instr_src_index(tex, nir_tex_src_plane) == -1); + assert(deref_src_type == nir_tex_src_sampler_deref); + offset_src_type = nir_tex_src_sampler_handle; + index = nir_channel(b, desc, 1); + } + } else { + if (deref_src_type == nir_tex_src_texture_deref) { + offset_src_type = nir_tex_src_texture_offset; + } else { + assert(deref_src_type == nir_tex_src_sampler_deref); + offset_src_type = nir_tex_src_sampler_offset; + } - index = nir_ssa_for_src(b, deref->arr.index, 1); + *base_index = binding_offset + plane; - if (state->add_bounds_checks) - index = nir_umin(b, index, nir_imm_int(b, array_size - 1)); + if (deref->deref_type != nir_deref_type_var) { + assert(deref->deref_type == nir_deref_type_array); + + if (nir_src_is_const(deref->arr.index)) { + unsigned arr_index = MIN2(nir_src_as_uint(deref->arr.index), array_size - 1); + struct anv_sampler **immutable_samplers = + state->layout->set[set].layout->binding[binding].immutable_samplers; + if (immutable_samplers) { + /* Array of YCbCr samplers are tightly packed in the binding + * tables, compute the offset of an element in the array by + * adding the number of planes of all preceding elements. + */ + unsigned desc_arr_index = 0; + for (int i = 0; i < arr_index; i++) + desc_arr_index += immutable_samplers[i]->n_planes; + *base_index += desc_arr_index; + } else { + *base_index += arr_index; + } + } else { + /* From VK_KHR_sampler_ycbcr_conversion: + * + * If sampler Y’CBCR conversion is enabled, the combined image + * sampler must be indexed only by constant integral expressions + * when aggregated into arrays in shader code, irrespective of + * the shaderSampledImageArrayDynamicIndexing feature. + */ + assert(nir_tex_instr_src_index(tex, nir_tex_src_plane) == -1); + + index = nir_ssa_for_src(b, deref->arr.index, 1); + + if (state->add_bounds_checks) + index = nir_umin(b, index, nir_imm_int(b, array_size - 1)); + } } } @@ -613,20 +915,105 @@ tex_instr_get_and_remove_plane_src(nir_tex_instr *tex) return plane; } +static nir_ssa_def * +build_def_array_select(nir_builder *b, nir_ssa_def **srcs, nir_ssa_def *idx, + unsigned start, unsigned end) +{ + if (start == end - 1) { + return srcs[start]; + } else { + unsigned mid = start + (end - start) / 2; + return nir_bcsel(b, nir_ilt(b, idx, nir_imm_int(b, mid)), + build_def_array_select(b, srcs, idx, start, mid), + build_def_array_select(b, srcs, idx, mid, end)); + } +} + static void -lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state) +lower_gen7_tex_swizzle(nir_tex_instr *tex, unsigned plane, + struct apply_pipeline_layout_state *state) { - state->builder.cursor = nir_before_instr(&tex->instr); + assert(state->pdevice->info.gen == 7 && !state->pdevice->info.is_haswell); + if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF || + nir_tex_instr_is_query(tex) || + tex->op == nir_texop_tg4 || /* We can't swizzle TG4 */ + (tex->is_shadow && tex->is_new_style_shadow)) + return; + + int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref); + assert(deref_src_idx >= 0); + + nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src); + nir_variable *var = nir_deref_instr_get_variable(deref); + + unsigned set = var->data.descriptor_set; + unsigned binding = var->data.binding; + const struct anv_descriptor_set_binding_layout *bind_layout = + &state->layout->set[set].layout->binding[binding]; + if ((bind_layout->data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE) == 0) + return; + + nir_builder *b = &state->builder; + b->cursor = nir_before_instr(&tex->instr); + + const unsigned plane_offset = + plane * sizeof(struct anv_texture_swizzle_descriptor); + nir_ssa_def *swiz = + build_descriptor_load(deref, plane_offset, 1, 32, state); + + b->cursor = nir_after_instr(&tex->instr); + + assert(tex->dest.ssa.bit_size == 32); + assert(tex->dest.ssa.num_components == 4); + + /* Initializing to undef is ok; nir_opt_undef will clean it up. */ + nir_ssa_def *undef = nir_ssa_undef(b, 1, 32); + nir_ssa_def *comps[8]; + for (unsigned i = 0; i < ARRAY_SIZE(comps); i++) + comps[i] = undef; + + comps[ISL_CHANNEL_SELECT_ZERO] = nir_imm_int(b, 0); + if (nir_alu_type_get_base_type(tex->dest_type) == nir_type_float) + comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_float(b, 1); + else + comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_int(b, 1); + comps[ISL_CHANNEL_SELECT_RED] = nir_channel(b, &tex->dest.ssa, 0); + comps[ISL_CHANNEL_SELECT_GREEN] = nir_channel(b, &tex->dest.ssa, 1); + comps[ISL_CHANNEL_SELECT_BLUE] = nir_channel(b, &tex->dest.ssa, 2); + comps[ISL_CHANNEL_SELECT_ALPHA] = nir_channel(b, &tex->dest.ssa, 3); + + nir_ssa_def *swiz_comps[4]; + for (unsigned i = 0; i < 4; i++) { + nir_ssa_def *comp_swiz = nir_extract_u8(b, swiz, nir_imm_int(b, i)); + swiz_comps[i] = build_def_array_select(b, comps, comp_swiz, 0, 8); + } + nir_ssa_def *swiz_tex_res = nir_vec(b, swiz_comps, 4); + + /* Rewrite uses before we insert so we don't rewrite this use */ + nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, + nir_src_for_ssa(swiz_tex_res), + swiz_tex_res->parent_instr); +} + +static void +lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state) +{ unsigned plane = tex_instr_get_and_remove_plane_src(tex); + /* On Ivy Bridge and Bay Trail, we have to swizzle in the shader. Do this + * before we lower the derefs away so we can still find the descriptor. + */ + if (state->pdevice->info.gen == 7 && !state->pdevice->info.is_haswell) + lower_gen7_tex_swizzle(tex, plane, state); + + state->builder.cursor = nir_before_instr(&tex->instr); + lower_tex_deref(tex, nir_tex_src_texture_deref, - &tex->texture_index, state); - tex->texture_index += plane; + &tex->texture_index, plane, state); lower_tex_deref(tex, nir_tex_src_sampler_deref, - &tex->sampler_index, state); - tex->sampler_index += plane; + &tex->sampler_index, plane, state); /* The backend only ever uses this to mark used surfaces. We don't care * about that little optimization so it just needs to be non-zero. @@ -658,8 +1045,10 @@ apply_pipeline_layout_block(nir_block *block, case nir_intrinsic_image_deref_load: case nir_intrinsic_image_deref_store: case nir_intrinsic_image_deref_atomic_add: - case nir_intrinsic_image_deref_atomic_min: - case nir_intrinsic_image_deref_atomic_max: + case nir_intrinsic_image_deref_atomic_imin: + case nir_intrinsic_image_deref_atomic_umin: + case nir_intrinsic_image_deref_atomic_imax: + case nir_intrinsic_image_deref_atomic_umax: case nir_intrinsic_image_deref_atomic_and: case nir_intrinsic_image_deref_atomic_or: case nir_intrinsic_image_deref_atomic_xor: @@ -723,7 +1112,9 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice, .shader = shader, .layout = layout, .add_bounds_checks = robust_buffer_access, + .ssbo_addr_format = anv_nir_ssbo_addr_format(pdevice, robust_buffer_access), .lowered_instrs = _mesa_pointer_set_create(mem_ctx), + .dynamic_offset_uniform_start = -1, }; for (unsigned s = 0; s < layout->num_sets; s++) { @@ -813,11 +1204,16 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice, qsort(infos, used_binding_count, sizeof(struct binding_info), compare_binding_infos); + bool have_dynamic_buffers = false; + for (unsigned i = 0; i < used_binding_count; i++) { unsigned set = infos[i].set, b = infos[i].binding; struct anv_descriptor_set_binding_layout *binding = &layout->set[set].layout->binding[b]; + if (binding->dynamic_offset_index >= 0) + have_dynamic_buffers = true; + const uint32_t array_size = binding->array_size; if (binding->data & ANV_DESCRIPTOR_SURFACE_STATE) { @@ -852,6 +1248,10 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice, anv_descriptor_requires_bindless(pdevice, binding, true)) { /* If this descriptor doesn't fit in the binding table or if it * requires bindless for some reason, flag it as bindless. + * + * We also make large sampler arrays bindless because we can avoid + * using indirect sends thanks to bindless samplers being packed + * less tightly than the sampler table. */ assert(anv_descriptor_supports_bindless(pdevice, binding, true)); state.set[set].sampler_offsets[b] = BINDLESS_OFFSET; @@ -874,6 +1274,16 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice, } } + if (have_dynamic_buffers) { + state.dynamic_offset_uniform_start = shader->num_uniforms; + uint32_t *param = brw_stage_prog_data_add_params(prog_data, + MAX_DYNAMIC_BUFFERS); + for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++) + param[i] = ANV_PARAM_DYN_OFFSET(i); + shader->num_uniforms += MAX_DYNAMIC_BUFFERS * 4; + assert(shader->num_uniforms == prog_data->nr_params * 4); + } + nir_foreach_variable(var, &shader->uniforms) { const struct glsl_type *glsl_type = glsl_without_array(var->type);