X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Ffreedreno%2Fvulkan%2Ftu_shader.c;h=48b686a732979770d824a5ed4a4046f6b4b8c72e;hb=e72895767bc5201215e27ac607bde24af114bb60;hp=a43280e6a17010a78190c25761dc92b62b7f5952;hpb=6fcbce3b9960a6066cf198d5d2fc25a7e4cf0e7b;p=mesa.git diff --git a/src/freedreno/vulkan/tu_shader.c b/src/freedreno/vulkan/tu_shader.c index a43280e6a17..48b686a7329 100644 --- a/src/freedreno/vulkan/tu_shader.c +++ b/src/freedreno/vulkan/tu_shader.c @@ -42,9 +42,29 @@ tu_spirv_to_nir(struct ir3_compiler *compiler, /* TODO these are made-up */ const struct spirv_to_nir_options spirv_options = { .frag_coord_is_sysval = true, - .lower_ubo_ssbo_access_to_offsets = true, + .lower_ubo_ssbo_access_to_offsets = false, + + .ubo_addr_format = nir_address_format_vec2_index_32bit_offset, + .ssbo_addr_format = nir_address_format_vec2_index_32bit_offset, + + /* Accessed via stg/ldg */ + .phys_ssbo_addr_format = nir_address_format_64bit_global, + + /* Accessed via the const register file */ + .push_const_addr_format = nir_address_format_logical, + + /* Accessed via ldl/stl */ + .shared_addr_format = nir_address_format_32bit_offset, + + /* Accessed via stg/ldg (not used with Vulkan?) */ + .global_addr_format = nir_address_format_64bit_global, + .caps = { - .transform_feedback = compiler->gpu_id >= 600, + .transform_feedback = true, + .tessellation = true, + .draw_parameters = true, + .variable_pointers = true, + .stencil_export = true, }, }; const nir_shader_compiler_options *nir_options = @@ -141,8 +161,7 @@ lower_vulkan_resource_index(nir_builder *b, nir_intrinsic_instr *instr, case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: base = layout->set[set].dynamic_offset_start + - binding_layout->dynamic_offset_offset + - layout->input_attachment_count; + binding_layout->dynamic_offset_offset; set = MAX_SETS; break; default: @@ -150,21 +169,105 @@ lower_vulkan_resource_index(nir_builder *b, nir_intrinsic_instr *instr, break; } - nir_intrinsic_instr *bindless = - nir_intrinsic_instr_create(b->shader, - nir_intrinsic_bindless_resource_ir3); - bindless->num_components = 1; - nir_ssa_dest_init(&bindless->instr, &bindless->dest, - 1, 32, NULL); - nir_intrinsic_set_desc_set(bindless, set); - bindless->src[0] = nir_src_for_ssa(nir_iadd(b, nir_imm_int(b, base), vulkan_idx)); - nir_builder_instr_insert(b, &bindless->instr); + nir_ssa_def *def = nir_vec3(b, nir_imm_int(b, set), + nir_iadd(b, nir_imm_int(b, base), vulkan_idx), + nir_imm_int(b, 0)); - nir_ssa_def_rewrite_uses(&instr->dest.ssa, - nir_src_for_ssa(&bindless->dest.ssa)); + nir_ssa_def_rewrite_uses(&instr->dest.ssa, nir_src_for_ssa(def)); nir_instr_remove(&instr->instr); } +static void +lower_load_vulkan_descriptor(nir_intrinsic_instr *intrin) +{ + /* Loading the descriptor happens as part of the load/store instruction so + * this is a no-op. + */ + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, intrin->src[0]); + nir_instr_remove(&intrin->instr); +} + +static void +lower_ssbo_ubo_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin) +{ + const nir_intrinsic_info *info = &nir_intrinsic_infos[intrin->intrinsic]; + + /* The bindless base is part of the instruction, which means that part of + * the "pointer" has to be constant. We solve this in the same way the blob + * does, by generating a bunch of if-statements. In the usual case where + * the descriptor set is constant this will get optimized out. + */ + + unsigned buffer_src; + if (intrin->intrinsic == nir_intrinsic_store_ssbo) { + /* This has the value first */ + buffer_src = 1; + } else { + buffer_src = 0; + } + + nir_ssa_def *base_idx = nir_channel(b, intrin->src[buffer_src].ssa, 0); + nir_ssa_def *descriptor_idx = nir_channel(b, intrin->src[buffer_src].ssa, 1); + + nir_ssa_def *results[MAX_SETS + 1] = { NULL }; + + for (unsigned i = 0; i < MAX_SETS + 1; i++) { + /* if (base_idx == i) { ... */ + nir_if *nif = nir_push_if(b, nir_ieq(b, base_idx, nir_imm_int(b, i))); + + nir_intrinsic_instr *bindless = + nir_intrinsic_instr_create(b->shader, + nir_intrinsic_bindless_resource_ir3); + bindless->num_components = 0; + nir_ssa_dest_init(&bindless->instr, &bindless->dest, + 1, 32, NULL); + nir_intrinsic_set_desc_set(bindless, i); + bindless->src[0] = nir_src_for_ssa(descriptor_idx); + nir_builder_instr_insert(b, &bindless->instr); + + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(b->shader, intrin->intrinsic); + + copy->num_components = intrin->num_components; + + for (unsigned src = 0; src < info->num_srcs; src++) { + if (src == buffer_src) + copy->src[src] = nir_src_for_ssa(&bindless->dest.ssa); + else + copy->src[src] = nir_src_for_ssa(intrin->src[src].ssa); + } + + for (unsigned idx = 0; idx < info->num_indices; idx++) { + copy->const_index[idx] = intrin->const_index[idx]; + } + + if (info->has_dest) { + nir_ssa_dest_init(©->instr, ©->dest, + intrin->dest.ssa.num_components, + intrin->dest.ssa.bit_size, + intrin->dest.ssa.name); + results[i] = ©->dest.ssa; + } + + nir_builder_instr_insert(b, ©->instr); + + /* } else { ... */ + nir_push_else(b, nif); + } + + nir_ssa_def *result = + nir_ssa_undef(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size); + for (int i = MAX_SETS; i >= 0; i--) { + nir_pop_if(b, NULL); + if (info->has_dest) + result = nir_if_phi(b, results[i], result); + } + + if (info->has_dest) + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(result)); + nir_instr_remove(&intrin->instr); +} + static nir_ssa_def * build_bindless(nir_builder *b, nir_deref_instr *deref, bool is_sampler, struct tu_shader *shader, @@ -177,31 +280,42 @@ build_bindless(nir_builder *b, nir_deref_instr *deref, bool is_sampler, const struct tu_descriptor_set_binding_layout *bind_layout = &layout->set[set].layout->binding[binding]; + /* input attachments use non bindless workaround */ + if (bind_layout->type == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) { + const struct glsl_type *glsl_type = glsl_without_array(var->type); + uint32_t idx = var->data.index * 2; + + b->shader->info.textures_used |= + ((1ull << (bind_layout->array_size * 2)) - 1) << (idx * 2); + + /* D24S8 workaround: stencil of D24S8 will be sampled as uint */ + if (glsl_get_sampler_result_type(glsl_type) == GLSL_TYPE_UINT) + idx += 1; + + if (deref->deref_type == nir_deref_type_var) + return nir_imm_int(b, idx); + + nir_ssa_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1); + return nir_iadd(b, nir_imm_int(b, idx), + nir_imul_imm(b, arr_index, 2)); + } + shader->active_desc_sets |= 1u << set; nir_ssa_def *desc_offset; unsigned descriptor_stride; - if (bind_layout->type == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) { - unsigned offset = - layout->set[set].input_attachment_start + - bind_layout->input_attachment_offset; - desc_offset = nir_imm_int(b, offset); - set = MAX_SETS; - descriptor_stride = 1; - } else { - unsigned offset = 0; - /* Samplers come second in combined image/sampler descriptors, see - * write_combined_image_sampler_descriptor(). - */ - if (is_sampler && bind_layout->type == - VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { - offset = 1; - } - desc_offset = - nir_imm_int(b, (bind_layout->offset / (4 * A6XX_TEX_CONST_DWORDS)) + - offset); - descriptor_stride = bind_layout->size / (4 * A6XX_TEX_CONST_DWORDS); + unsigned offset = 0; + /* Samplers come second in combined image/sampler descriptors, see + * write_combined_image_sampler_descriptor(). + */ + if (is_sampler && bind_layout->type == + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { + offset = 1; } + desc_offset = + nir_imm_int(b, (bind_layout->offset / (4 * A6XX_TEX_CONST_DWORDS)) + + offset); + descriptor_stride = bind_layout->size / (4 * A6XX_TEX_CONST_DWORDS); if (deref->deref_type != nir_deref_type_var) { assert(deref->deref_type == nir_deref_type_array); @@ -214,7 +328,7 @@ build_bindless(nir_builder *b, nir_deref_instr *deref, bool is_sampler, nir_intrinsic_instr *bindless = nir_intrinsic_instr_create(b->shader, nir_intrinsic_bindless_resource_ir3); - bindless->num_components = 1; + bindless->num_components = 0; nir_ssa_dest_init(&bindless->instr, &bindless->dest, 1, 32, NULL); nir_intrinsic_set_desc_set(bindless, set); @@ -251,10 +365,35 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, lower_load_push_constant(b, instr, shader); return true; + case nir_intrinsic_load_vulkan_descriptor: + lower_load_vulkan_descriptor(instr); + return true; + case nir_intrinsic_vulkan_resource_index: lower_vulkan_resource_index(b, instr, shader, layout); return true; + case nir_intrinsic_load_ubo: + case nir_intrinsic_load_ssbo: + case nir_intrinsic_store_ssbo: + case nir_intrinsic_ssbo_atomic_add: + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_ssbo_atomic_umin: + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_ssbo_atomic_umax: + case nir_intrinsic_ssbo_atomic_and: + case nir_intrinsic_ssbo_atomic_or: + case nir_intrinsic_ssbo_atomic_xor: + case nir_intrinsic_ssbo_atomic_exchange: + case nir_intrinsic_ssbo_atomic_comp_swap: + case nir_intrinsic_ssbo_atomic_fadd: + case nir_intrinsic_ssbo_atomic_fmin: + case nir_intrinsic_ssbo_atomic_fmax: + case nir_intrinsic_ssbo_atomic_fcomp_swap: + case nir_intrinsic_get_buffer_size: + lower_ssbo_ubo_intrinsic(b, instr); + return true; + case nir_intrinsic_image_deref_load: case nir_intrinsic_image_deref_store: case nir_intrinsic_image_deref_atomic_add: @@ -356,6 +495,10 @@ lower_tex(nir_builder *b, nir_tex_instr *tex, nir_instr_rewrite_src(&tex->instr, &tex->src[tex_src_idx].src, nir_src_for_ssa(bindless)); tex->src[tex_src_idx].src_type = nir_tex_src_texture_handle; + + /* for the input attachment case: */ + if (bindless->parent_instr->type != nir_instr_type_intrinsic) + tex->src[tex_src_idx].src_type = nir_tex_src_texture_offset; } return true; @@ -385,6 +528,11 @@ lower_impl(nir_function_impl *impl, struct tu_shader *shader, } } + if (progress) + nir_metadata_preserve(impl, nir_metadata_none); + else + nir_metadata_preserve(impl, nir_metadata_all); + return progress; } @@ -435,38 +583,6 @@ gather_push_constants(nir_shader *shader, struct tu_shader *tu_shader) align(max, 16) / 16 - tu_shader->push_consts.lo; } -/* Gather the InputAttachmentIndex for each input attachment from the NIR - * shader and organize the info in a way so that draw-time patching is easy. - */ -static void -gather_input_attachments(nir_shader *shader, struct tu_shader *tu_shader, - const struct tu_pipeline_layout *layout) -{ - nir_foreach_variable(var, &shader->uniforms) { - const struct glsl_type *glsl_type = glsl_without_array(var->type); - - if (!glsl_type_is_image(glsl_type)) - continue; - - enum glsl_sampler_dim dim = glsl_get_sampler_dim(glsl_type); - - const uint32_t set = var->data.descriptor_set; - const uint32_t binding = var->data.binding; - const struct tu_descriptor_set_binding_layout *bind_layout = - &layout->set[set].layout->binding[binding]; - const uint32_t array_size = bind_layout->array_size; - - if (dim == GLSL_SAMPLER_DIM_SUBPASS || - dim == GLSL_SAMPLER_DIM_SUBPASS_MS) { - unsigned offset = - layout->set[set].input_attachment_start + - bind_layout->input_attachment_offset; - for (unsigned i = 0; i < array_size; i++) - tu_shader->attachment_idx[offset + i] = var->data.index + i; - } - } -} - static bool tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader, const struct tu_pipeline_layout *layout) @@ -474,7 +590,6 @@ tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader, bool progress = false; gather_push_constants(shader, tu_shader); - gather_input_attachments(shader, tu_shader, layout); nir_foreach_function(function, shader) { if (function->impl) @@ -484,13 +599,30 @@ tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader, /* Remove now-unused variables so that when we gather the shader info later * they won't be counted. */ - nir_remove_dead_variables(shader, - nir_var_uniform | nir_var_mem_ubo | nir_var_mem_ssbo, - NULL); + + if (progress) + nir_opt_dce(shader); + + progress |= + nir_remove_dead_variables(shader, + nir_var_uniform | nir_var_mem_ubo | nir_var_mem_ssbo, + NULL); return progress; } +static void +shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align) +{ + assert(glsl_type_is_vector_or_scalar(type)); + + unsigned comp_size = + glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8; + unsigned length = glsl_get_vector_elements(type); + *size = comp_size * length; + *align = 4; +} + static void tu_gather_xfb_info(nir_shader *nir, struct ir3_stream_output_info *info) { @@ -542,7 +674,7 @@ tu_shader_create(struct tu_device *dev, struct tu_shader *shader; shader = vk_zalloc2( - &dev->alloc, alloc, + &dev->vk.alloc, alloc, sizeof(*shader), 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); if (!shader) @@ -568,7 +700,7 @@ tu_shader_create(struct tu_device *dev, } if (!nir) { - vk_free2(&dev->alloc, alloc, shader); + vk_free2(&dev->vk.alloc, alloc, shader); return NULL; } @@ -581,6 +713,7 @@ tu_shader_create(struct tu_device *dev, NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp); NIR_PASS_V(nir, nir_lower_returns); NIR_PASS_V(nir, nir_inline_functions); + NIR_PASS_V(nir, nir_copy_prop); NIR_PASS_V(nir, nir_opt_deref); foreach_list_typed_safe(nir_function, func, node, &nir->functions) { if (!func->is_entrypoint) @@ -622,23 +755,46 @@ tu_shader_create(struct tu_device *dev, NIR_PASS_V(nir, nir_opt_combine_stores, nir_var_all); /* ir3 doesn't support indirect input/output */ + /* TODO: We shouldn't perform this lowering pass on gl_TessLevelInner + * and gl_TessLevelOuter. Since the tess levels are actually stored in + * a global BO, they can be directly accessed via stg and ldg. + * nir_lower_indirect_derefs will instead generate a big if-ladder which + * isn't *incorrect* but is much less efficient. */ NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in | nir_var_shader_out); NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false); - nir_assign_io_var_locations(&nir->inputs, &nir->num_inputs, stage); - nir_assign_io_var_locations(&nir->outputs, &nir->num_outputs, stage); + nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, stage); + nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, stage); NIR_PASS_V(nir, nir_lower_system_values); NIR_PASS_V(nir, nir_lower_frexp); - if (stage == MESA_SHADER_FRAGMENT) - NIR_PASS_V(nir, nir_lower_input_attachments, true); + if (stage == MESA_SHADER_FRAGMENT) { + NIR_PASS_V(nir, nir_lower_input_attachments, + &(nir_input_attachment_options) { + .use_fragcoord_sysval = true, + }); + } + + NIR_PASS_V(nir, nir_lower_explicit_io, + nir_var_mem_ubo | nir_var_mem_ssbo, + nir_address_format_vec2_index_32bit_offset); + + if (nir->info.stage == MESA_SHADER_COMPUTE) { + NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, + nir_var_mem_shared, shared_type_info); + NIR_PASS_V(nir, nir_lower_explicit_io, + nir_var_mem_shared, + nir_address_format_32bit_offset); + } NIR_PASS_V(nir, tu_lower_io, shader, layout); nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + ir3_finalize_nir(dev->compiler, nir); + shader->ir3_shader = ir3_shader_from_nir(dev->compiler, nir, align(shader->push_consts.count, 4), @@ -654,7 +810,7 @@ tu_shader_destroy(struct tu_device *dev, { ir3_shader_destroy(shader->ir3_shader); - vk_free2(&dev->alloc, alloc, shader); + vk_free2(&dev->vk.alloc, alloc, shader); } VkResult @@ -670,9 +826,9 @@ tu_CreateShaderModule(VkDevice _device, assert(pCreateInfo->flags == 0); assert(pCreateInfo->codeSize % 4 == 0); - module = vk_alloc2(&device->alloc, pAllocator, - sizeof(*module) + pCreateInfo->codeSize, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + module = vk_object_alloc(&device->vk, pAllocator, + sizeof(*module) + pCreateInfo->codeSize, + VK_OBJECT_TYPE_SHADER_MODULE); if (module == NULL) return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); @@ -697,5 +853,5 @@ tu_DestroyShaderModule(VkDevice _device, if (!module) return; - vk_free2(&device->alloc, pAllocator, module); + vk_object_free(&device->vk, pAllocator, module); }