X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fvulkan%2Fanv_nir_apply_pipeline_layout.c;h=1d13aa604924e68f888e066a3ecd9bf0608519cd;hb=f118ca20758c85da1aaf1792e61aadb298b32a47;hp=800ed2ef3e24b2d7739f2fa5e36e093b84c21bff;hpb=e6803f6b6f06e805fe162d76aad5e25d2510232a;p=mesa.git diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c index 800ed2ef3e2..1d13aa60492 100644 --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -41,6 +41,7 @@ struct apply_pipeline_layout_state { struct anv_pipeline_layout *layout; bool add_bounds_checks; + nir_address_format ssbo_addr_format; /* Place to flag lowered instructions so we don't lower them twice */ struct set *lowered_instrs; @@ -113,8 +114,10 @@ get_used_bindings_block(nir_block *block, case nir_intrinsic_image_deref_load: case nir_intrinsic_image_deref_store: case nir_intrinsic_image_deref_atomic_add: - case nir_intrinsic_image_deref_atomic_min: - case nir_intrinsic_image_deref_atomic_max: + case nir_intrinsic_image_deref_atomic_imin: + case nir_intrinsic_image_deref_atomic_umin: + case nir_intrinsic_image_deref_atomic_imax: + case nir_intrinsic_image_deref_atomic_umax: case nir_intrinsic_image_deref_atomic_and: case nir_intrinsic_image_deref_atomic_or: case nir_intrinsic_image_deref_atomic_xor: @@ -268,6 +271,12 @@ try_lower_direct_buffer_intrinsic(nir_intrinsic_instr *intrin, bool is_atomic, if (is_atomic && nir_dest_bit_size(intrin->dest) == 64) return false; + /* Normal binding table-based messages can't handle non-uniform access so + * we have to fall back to A64. + */ + if (nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM) + return false; + if (!nir_deref_find_descriptor(deref, state)) return false; @@ -332,6 +341,15 @@ lower_direct_buffer_access(nir_function_impl *impl, } } +static nir_address_format +desc_addr_format(VkDescriptorType desc_type, + struct apply_pipeline_layout_state *state) +{ + return (desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER || + desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) ? + state->ssbo_addr_format : nir_address_format_32bit_index_offset; +} + static void lower_res_index_intrinsic(nir_intrinsic_instr *intrin, struct apply_pipeline_layout_state *state) @@ -377,7 +395,8 @@ lower_res_index_intrinsic(nir_intrinsic_instr *intrin, dynamic_offset_index; if (state->add_bounds_checks) { - /* We're using nir_address_format_64bit_bounded_global */ + assert(desc_addr_format(desc_type, state) == + nir_address_format_64bit_bounded_global); assert(intrin->dest.ssa.num_components == 4); assert(intrin->dest.ssa.bit_size == 32); index = nir_vec4(b, nir_imm_int(b, desc_offset), @@ -385,7 +404,8 @@ lower_res_index_intrinsic(nir_intrinsic_instr *intrin, nir_imm_int(b, array_size - 1), nir_ssa_undef(b, 1, 32)); } else { - /* We're using nir_address_format_64bit_global */ + assert(desc_addr_format(desc_type, state) == + nir_address_format_64bit_global); assert(intrin->dest.ssa.num_components == 1); assert(intrin->dest.ssa.bit_size == 64); index = nir_pack_64_2x32_split(b, nir_imm_int(b, desc_offset), @@ -393,15 +413,17 @@ lower_res_index_intrinsic(nir_intrinsic_instr *intrin, } } else if (bind_layout->data & ANV_DESCRIPTOR_INLINE_UNIFORM) { /* This is an inline uniform block. Just reference the descriptor set - * and use the descriptor offset as the base. Inline uniforms always - * use nir_address_format_32bit_index_offset + * and use the descriptor offset as the base. */ + assert(desc_addr_format(desc_type, state) == + nir_address_format_32bit_index_offset); assert(intrin->dest.ssa.num_components == 2); assert(intrin->dest.ssa.bit_size == 32); index = nir_imm_ivec2(b, state->set[set].desc_offset, bind_layout->descriptor_offset); } else { - /* We're using nir_address_format_32bit_index_offset */ + assert(desc_addr_format(desc_type, state) == + nir_address_format_32bit_index_offset); assert(intrin->dest.ssa.num_components == 2); assert(intrin->dest.ssa.bit_size == 32); index = nir_vec2(b, nir_iadd_imm(b, array_index, surface_index), @@ -432,32 +454,37 @@ lower_res_reindex_intrinsic(nir_intrinsic_instr *intrin, nir_ssa_def *offset = intrin->src[1].ssa; nir_ssa_def *new_index; - if (state->pdevice->has_a64_buffer_access && - (desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER || - desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)) { - if (state->add_bounds_checks) { - /* We're using nir_address_format_64bit_bounded_global */ - assert(intrin->dest.ssa.num_components == 4); - assert(intrin->dest.ssa.bit_size == 32); - new_index = nir_vec4(b, nir_channel(b, old_index, 0), - nir_iadd(b, nir_channel(b, old_index, 1), - offset), - nir_channel(b, old_index, 2), - nir_ssa_undef(b, 1, 32)); - } else { - /* We're using nir_address_format_64bit_global */ - assert(intrin->dest.ssa.num_components == 1); - assert(intrin->dest.ssa.bit_size == 64); - nir_ssa_def *base = nir_unpack_64_2x32_split_x(b, old_index); - nir_ssa_def *arr_idx = nir_unpack_64_2x32_split_y(b, old_index); - new_index = nir_pack_64_2x32_split(b, base, nir_iadd(b, arr_idx, offset)); - } - } else { - /* We're using nir_address_format_32bit_index_offset */ + switch (desc_addr_format(desc_type, state)) { + case nir_address_format_64bit_bounded_global: + /* See also lower_res_index_intrinsic() */ + assert(intrin->dest.ssa.num_components == 4); + assert(intrin->dest.ssa.bit_size == 32); + new_index = nir_vec4(b, nir_channel(b, old_index, 0), + nir_iadd(b, nir_channel(b, old_index, 1), + offset), + nir_channel(b, old_index, 2), + nir_ssa_undef(b, 1, 32)); + break; + + case nir_address_format_64bit_global: { + /* See also lower_res_index_intrinsic() */ + assert(intrin->dest.ssa.num_components == 1); + assert(intrin->dest.ssa.bit_size == 64); + nir_ssa_def *base = nir_unpack_64_2x32_split_x(b, old_index); + nir_ssa_def *arr_idx = nir_unpack_64_2x32_split_y(b, old_index); + new_index = nir_pack_64_2x32_split(b, base, nir_iadd(b, arr_idx, offset)); + break; + } + + case nir_address_format_32bit_index_offset: assert(intrin->dest.ssa.num_components == 2); assert(intrin->dest.ssa.bit_size == 32); new_index = nir_vec2(b, nir_iadd(b, nir_channel(b, old_index, 0), offset), nir_channel(b, old_index, 1)); + break; + + default: + unreachable("Uhandled address format"); } assert(intrin->dest.is_ssa); @@ -473,14 +500,22 @@ build_ssbo_descriptor_load(const VkDescriptorType desc_type, nir_builder *b = &state->builder; nir_ssa_def *desc_offset, *array_index; - if (state->add_bounds_checks) { - /* We're using nir_address_format_64bit_bounded_global */ + switch (state->ssbo_addr_format) { + case nir_address_format_64bit_bounded_global: + /* See also lower_res_index_intrinsic() */ desc_offset = nir_channel(b, index, 0); array_index = nir_umin(b, nir_channel(b, index, 1), nir_channel(b, index, 2)); - } else { + break; + + case nir_address_format_64bit_global: + /* See also lower_res_index_intrinsic() */ desc_offset = nir_unpack_64_2x32_split_x(b, index); array_index = nir_unpack_64_2x32_split_y(b, index); + break; + + default: + unreachable("Unhandled address format for SSBO"); } /* The desc_offset is actually 16.8.8 */ @@ -535,14 +570,22 @@ lower_load_vulkan_descriptor(nir_intrinsic_instr *intrin, * dynamic offset. */ nir_ssa_def *desc_offset, *array_index; - if (state->add_bounds_checks) { - /* We're using nir_address_format_64bit_bounded_global */ + switch (state->ssbo_addr_format) { + case nir_address_format_64bit_bounded_global: + /* See also lower_res_index_intrinsic() */ desc_offset = nir_channel(b, index, 0); array_index = nir_umin(b, nir_channel(b, index, 1), nir_channel(b, index, 2)); - } else { + break; + + case nir_address_format_64bit_global: + /* See also lower_res_index_intrinsic() */ desc_offset = nir_unpack_64_2x32_split_x(b, index); array_index = nir_unpack_64_2x32_split_y(b, index); + break; + + default: + unreachable("Unhandled address format for SSBO"); } nir_ssa_def *dyn_offset_base = @@ -567,11 +610,10 @@ lower_load_vulkan_descriptor(nir_intrinsic_instr *intrin, nir_bcsel(b, nir_ieq(b, dyn_offset_base, nir_imm_int(b, 0xff)), nir_imm_int(b, 0), &dyn_load->dest.ssa); - if (state->add_bounds_checks) { + switch (state->ssbo_addr_format) { + case nir_address_format_64bit_bounded_global: { /* The dynamic offset gets added to the base pointer so that we * have a sliding window range. - * - * We're using nir_address_format_64bit_bounded_global. */ nir_ssa_def *base_ptr = nir_pack_64_2x32(b, nir_channels(b, desc, 0x3)); @@ -580,9 +622,15 @@ lower_load_vulkan_descriptor(nir_intrinsic_instr *intrin, nir_unpack_64_2x32_split_y(b, base_ptr), nir_channel(b, desc, 2), nir_channel(b, desc, 3)); - } else { - /* We're using nir_address_format_64bit_global */ + break; + } + + case nir_address_format_64bit_global: desc = nir_iadd(b, desc, nir_u2u64(b, dynamic_offset)); + break; + + default: + unreachable("Unhandled address format for SSBO"); } } } else { @@ -677,13 +725,21 @@ lower_image_intrinsic(nir_intrinsic_instr *intrin, struct apply_pipeline_layout_state *state) { nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + + unsigned set = var->data.descriptor_set; + unsigned binding = var->data.binding; + unsigned binding_offset = state->set[set].surface_offsets[binding]; nir_builder *b = &state->builder; b->cursor = nir_before_instr(&intrin->instr); + ASSERTED const bool use_bindless = state->pdevice->has_bindless_images; + if (intrin->intrinsic == nir_intrinsic_image_deref_load_param_intel) { b->cursor = nir_instr_remove(&intrin->instr); + assert(!use_bindless); /* Otherwise our offsets would be wrong */ const unsigned param = nir_intrinsic_base(intrin); nir_ssa_def *desc = @@ -692,12 +748,14 @@ lower_image_intrinsic(nir_intrinsic_instr *intrin, intrin->dest.ssa.bit_size, state); nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(desc)); + } else if (binding_offset > MAX_BINDING_TABLE_SIZE) { + const bool write_only = + (var->data.image.access & ACCESS_NON_READABLE) != 0; + nir_ssa_def *desc = + build_descriptor_load(deref, 0, 2, 32, state); + nir_ssa_def *handle = nir_channel(b, desc, write_only ? 1 : 0); + nir_rewrite_image_intrinsic(intrin, handle, true); } else { - nir_variable *var = nir_deref_instr_get_variable(deref); - - unsigned set = var->data.descriptor_set; - unsigned binding = var->data.binding; - unsigned binding_offset = state->set[set].surface_offsets[binding]; unsigned array_size = state->layout->set[set].layout->binding[binding].array_size; @@ -801,8 +859,21 @@ lower_tex_deref(nir_tex_instr *tex, nir_tex_src_type deref_src_type, assert(deref->deref_type == nir_deref_type_array); if (nir_src_is_const(deref->arr.index)) { - unsigned arr_index = nir_src_as_uint(deref->arr.index); - *base_index += MIN2(arr_index, array_size - 1); + unsigned arr_index = MIN2(nir_src_as_uint(deref->arr.index), array_size - 1); + struct anv_sampler **immutable_samplers = + state->layout->set[set].layout->binding[binding].immutable_samplers; + if (immutable_samplers) { + /* Array of YCbCr samplers are tightly packed in the binding + * tables, compute the offset of an element in the array by + * adding the number of planes of all preceding elements. + */ + unsigned desc_arr_index = 0; + for (int i = 0; i < arr_index; i++) + desc_arr_index += immutable_samplers[i]->n_planes; + *base_index += desc_arr_index; + } else { + *base_index += arr_index; + } } else { /* From VK_KHR_sampler_ycbcr_conversion: * @@ -844,13 +915,100 @@ tex_instr_get_and_remove_plane_src(nir_tex_instr *tex) return plane; } +static nir_ssa_def * +build_def_array_select(nir_builder *b, nir_ssa_def **srcs, nir_ssa_def *idx, + unsigned start, unsigned end) +{ + if (start == end - 1) { + return srcs[start]; + } else { + unsigned mid = start + (end - start) / 2; + return nir_bcsel(b, nir_ilt(b, idx, nir_imm_int(b, mid)), + build_def_array_select(b, srcs, idx, start, mid), + build_def_array_select(b, srcs, idx, mid, end)); + } +} + static void -lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state) +lower_gen7_tex_swizzle(nir_tex_instr *tex, unsigned plane, + struct apply_pipeline_layout_state *state) { - state->builder.cursor = nir_before_instr(&tex->instr); + assert(state->pdevice->info.gen == 7 && !state->pdevice->info.is_haswell); + if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF || + nir_tex_instr_is_query(tex) || + tex->op == nir_texop_tg4 || /* We can't swizzle TG4 */ + (tex->is_shadow && tex->is_new_style_shadow)) + return; + + int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref); + assert(deref_src_idx >= 0); + + nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src); + nir_variable *var = nir_deref_instr_get_variable(deref); + + unsigned set = var->data.descriptor_set; + unsigned binding = var->data.binding; + const struct anv_descriptor_set_binding_layout *bind_layout = + &state->layout->set[set].layout->binding[binding]; + + if ((bind_layout->data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE) == 0) + return; + + nir_builder *b = &state->builder; + b->cursor = nir_before_instr(&tex->instr); + + const unsigned plane_offset = + plane * sizeof(struct anv_texture_swizzle_descriptor); + nir_ssa_def *swiz = + build_descriptor_load(deref, plane_offset, 1, 32, state); + + b->cursor = nir_after_instr(&tex->instr); + + assert(tex->dest.ssa.bit_size == 32); + assert(tex->dest.ssa.num_components == 4); + + /* Initializing to undef is ok; nir_opt_undef will clean it up. */ + nir_ssa_def *undef = nir_ssa_undef(b, 1, 32); + nir_ssa_def *comps[8]; + for (unsigned i = 0; i < ARRAY_SIZE(comps); i++) + comps[i] = undef; + + comps[ISL_CHANNEL_SELECT_ZERO] = nir_imm_int(b, 0); + if (nir_alu_type_get_base_type(tex->dest_type) == nir_type_float) + comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_float(b, 1); + else + comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_int(b, 1); + comps[ISL_CHANNEL_SELECT_RED] = nir_channel(b, &tex->dest.ssa, 0); + comps[ISL_CHANNEL_SELECT_GREEN] = nir_channel(b, &tex->dest.ssa, 1); + comps[ISL_CHANNEL_SELECT_BLUE] = nir_channel(b, &tex->dest.ssa, 2); + comps[ISL_CHANNEL_SELECT_ALPHA] = nir_channel(b, &tex->dest.ssa, 3); + + nir_ssa_def *swiz_comps[4]; + for (unsigned i = 0; i < 4; i++) { + nir_ssa_def *comp_swiz = nir_extract_u8(b, swiz, nir_imm_int(b, i)); + swiz_comps[i] = build_def_array_select(b, comps, comp_swiz, 0, 8); + } + nir_ssa_def *swiz_tex_res = nir_vec(b, swiz_comps, 4); + + /* Rewrite uses before we insert so we don't rewrite this use */ + nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, + nir_src_for_ssa(swiz_tex_res), + swiz_tex_res->parent_instr); +} +static void +lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state) +{ unsigned plane = tex_instr_get_and_remove_plane_src(tex); + /* On Ivy Bridge and Bay Trail, we have to swizzle in the shader. Do this + * before we lower the derefs away so we can still find the descriptor. + */ + if (state->pdevice->info.gen == 7 && !state->pdevice->info.is_haswell) + lower_gen7_tex_swizzle(tex, plane, state); + + state->builder.cursor = nir_before_instr(&tex->instr); + lower_tex_deref(tex, nir_tex_src_texture_deref, &tex->texture_index, plane, state); @@ -887,8 +1045,10 @@ apply_pipeline_layout_block(nir_block *block, case nir_intrinsic_image_deref_load: case nir_intrinsic_image_deref_store: case nir_intrinsic_image_deref_atomic_add: - case nir_intrinsic_image_deref_atomic_min: - case nir_intrinsic_image_deref_atomic_max: + case nir_intrinsic_image_deref_atomic_imin: + case nir_intrinsic_image_deref_atomic_umin: + case nir_intrinsic_image_deref_atomic_imax: + case nir_intrinsic_image_deref_atomic_umax: case nir_intrinsic_image_deref_atomic_and: case nir_intrinsic_image_deref_atomic_or: case nir_intrinsic_image_deref_atomic_xor: @@ -952,6 +1112,7 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice, .shader = shader, .layout = layout, .add_bounds_checks = robust_buffer_access, + .ssbo_addr_format = anv_nir_ssbo_addr_format(pdevice, robust_buffer_access), .lowered_instrs = _mesa_pointer_set_create(mem_ctx), .dynamic_offset_uniform_start = -1, };