X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Ffreedreno%2Fvulkan%2Ftu_shader.c;h=85bf6bbc50ffa269213f5342b8fea34f49c7bc8a;hb=0c05d46237db3095ef92d61452d886286806a827;hp=a05e819a55a91b0d52bd9b5955db74a8a044e614;hpb=349898a967f1bf28271454d88b3f1a36a4db2e06;p=mesa.git diff --git a/src/freedreno/vulkan/tu_shader.c b/src/freedreno/vulkan/tu_shader.c index a05e819a55a..85bf6bbc50f 100644 --- a/src/freedreno/vulkan/tu_shader.c +++ b/src/freedreno/vulkan/tu_shader.c @@ -25,6 +25,7 @@ #include "spirv/nir_spirv.h" #include "util/mesa-sha1.h" +#include "nir/nir_xfb_info.h" #include "ir3/ir3_nir.h" @@ -40,7 +41,9 @@ tu_spirv_to_nir(struct ir3_compiler *compiler, const struct spirv_to_nir_options spirv_options = { .frag_coord_is_sysval = true, .lower_ubo_ssbo_access_to_offsets = true, - .caps = { false }, + .caps = { + .transform_feedback = compiler->gpu_id >= 600, + }, }; const nir_shader_compiler_options *nir_options = ir3_get_compiler_options(compiler); @@ -80,141 +83,20 @@ tu_spirv_to_nir(struct ir3_compiler *compiler, return nir; } -static unsigned -map_add(struct tu_descriptor_map *map, int set, int binding, int value, - int array_size) -{ - unsigned index = 0; - for (unsigned i = 0; i < map->num; i++) { - if (set == map->set[i] && binding == map->binding[i]) { - assert(value == map->value[i]); - assert(array_size == map->array_size[i]); - return index; - } - index += map->array_size[i]; - } - - assert(index == map->num_desc); - - map->set[map->num] = set; - map->binding[map->num] = binding; - map->value[map->num] = value; - map->array_size[map->num] = array_size; - map->num++; - map->num_desc += array_size; - - return index; -} - -static void -lower_tex_src_to_offset(nir_builder *b, nir_tex_instr *instr, unsigned src_idx, - struct tu_shader *shader, - const struct tu_pipeline_layout *layout) -{ - nir_ssa_def *index = NULL; - unsigned base_index = 0; - unsigned array_elements = 1; - nir_tex_src *src = &instr->src[src_idx]; - bool is_sampler = src->src_type == nir_tex_src_sampler_deref; - - /* We compute first the offsets */ - nir_deref_instr *deref = nir_instr_as_deref(src->src.ssa->parent_instr); - while (deref->deref_type != nir_deref_type_var) { - assert(deref->parent.is_ssa); - nir_deref_instr *parent = - nir_instr_as_deref(deref->parent.ssa->parent_instr); - - assert(deref->deref_type == nir_deref_type_array); - - if (nir_src_is_const(deref->arr.index) && index == NULL) { - /* We're still building a direct index */ - base_index += nir_src_as_uint(deref->arr.index) * array_elements; - } else { - if (index == NULL) { - /* We used to be direct but not anymore */ - index = nir_imm_int(b, base_index); - base_index = 0; - } - - index = nir_iadd(b, index, - nir_imul(b, nir_imm_int(b, array_elements), - nir_ssa_for_src(b, deref->arr.index, 1))); - } - - array_elements *= glsl_get_length(parent->type); - - deref = parent; - } - - if (index) - index = nir_umin(b, index, nir_imm_int(b, array_elements - 1)); - - /* We have the offsets, we apply them, rewriting the source or removing - * instr if needed - */ - if (index) { - nir_instr_rewrite_src(&instr->instr, &src->src, - nir_src_for_ssa(index)); - - src->src_type = is_sampler ? - nir_tex_src_sampler_offset : - nir_tex_src_texture_offset; - } else { - nir_tex_instr_remove_src(instr, src_idx); - } - - uint32_t set = deref->var->data.descriptor_set; - uint32_t binding = deref->var->data.binding; - struct tu_descriptor_set_layout *set_layout = layout->set[set].layout; - struct tu_descriptor_set_binding_layout *binding_layout = - &set_layout->binding[binding]; - - int desc_index = map_add(is_sampler ? - &shader->sampler_map : &shader->texture_map, - deref->var->data.descriptor_set, - deref->var->data.binding, - deref->var->data.index, - binding_layout->array_size) + base_index; - if (is_sampler) - instr->sampler_index = desc_index; - else - instr->texture_index = desc_index; -} - -static bool -lower_sampler(nir_builder *b, nir_tex_instr *instr, struct tu_shader *shader, - const struct tu_pipeline_layout *layout) -{ - int texture_idx = - nir_tex_instr_src_index(instr, nir_tex_src_texture_deref); - - if (texture_idx >= 0) - lower_tex_src_to_offset(b, instr, texture_idx, shader, layout); - - int sampler_idx = - nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref); - - if (sampler_idx >= 0) - lower_tex_src_to_offset(b, instr, sampler_idx, shader, layout); - - if (texture_idx < 0 && sampler_idx < 0) - return false; - - return true; -} - static void lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr, struct tu_shader *shader) { - /* note: ir3 wants load_ubo, not load_uniform */ - assert(nir_intrinsic_base(instr) == 0); - nir_intrinsic_instr *load = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo); + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform); load->num_components = instr->num_components; - load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); - load->src[1] = instr->src[0]; + uint32_t base = nir_intrinsic_base(instr); + assert(base % 4 == 0); + assert(base >= shader->push_consts.lo * 16); + base -= shader->push_consts.lo * 16; + nir_intrinsic_set_base(load, base / 4); + load->src[0] = + nir_src_for_ssa(nir_ushr(b, instr->src[0].ssa, nir_imm_int(b, 2))); nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, instr->dest.ssa.bit_size, instr->dest.ssa.name); @@ -229,66 +111,108 @@ lower_vulkan_resource_index(nir_builder *b, nir_intrinsic_instr *instr, struct tu_shader *shader, const struct tu_pipeline_layout *layout) { - nir_const_value *const_val = nir_src_as_const_value(instr->src[0]); + nir_ssa_def *vulkan_idx = instr->src[0].ssa; unsigned set = nir_intrinsic_desc_set(instr); unsigned binding = nir_intrinsic_binding(instr); struct tu_descriptor_set_layout *set_layout = layout->set[set].layout; struct tu_descriptor_set_binding_layout *binding_layout = &set_layout->binding[binding]; - unsigned index = 0; + uint32_t base; - switch (nir_intrinsic_desc_type(instr)) { - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + switch (binding_layout->type) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - if (!const_val) - tu_finishme("non-constant vulkan_resource_index array index"); - /* skip index 0 which is used for push constants */ - index = map_add(&shader->ubo_map, set, binding, 0, - binding_layout->array_size) + 1; - index += const_val->u32; - break; - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - if (!const_val) - tu_finishme("non-constant vulkan_resource_index array index"); - index = map_add(&shader->ssbo_map, set, binding, 0, - binding_layout->array_size); - index += const_val->u32; + base = layout->set[set].dynamic_offset_start + + binding_layout->dynamic_offset_offset + + layout->input_attachment_count; + set = MAX_SETS; break; default: - tu_finishme("unsupported desc_type for vulkan_resource_index"); + base = binding_layout->offset / (4 * A6XX_TEX_CONST_DWORDS); break; } + nir_intrinsic_instr *bindless = + nir_intrinsic_instr_create(b->shader, + nir_intrinsic_bindless_resource_ir3); + bindless->num_components = 1; + nir_ssa_dest_init(&bindless->instr, &bindless->dest, + 1, 32, NULL); + nir_intrinsic_set_desc_set(bindless, set); + bindless->src[0] = nir_src_for_ssa(nir_iadd(b, nir_imm_int(b, base), vulkan_idx)); + nir_builder_instr_insert(b, &bindless->instr); + nir_ssa_def_rewrite_uses(&instr->dest.ssa, - nir_src_for_ssa(nir_imm_int(b, index))); + nir_src_for_ssa(&bindless->dest.ssa)); nir_instr_remove(&instr->instr); } -static void -lower_image_deref(nir_builder *b, - nir_intrinsic_instr *instr, struct tu_shader *shader, - const struct tu_pipeline_layout *layout) +static nir_ssa_def * +build_bindless(nir_builder *b, nir_deref_instr *deref, bool is_sampler, + struct tu_shader *shader, + const struct tu_pipeline_layout *layout) { - nir_deref_instr *deref = nir_src_as_deref(instr->src[0]); nir_variable *var = nir_deref_instr_get_variable(deref); - uint32_t set = var->data.descriptor_set; - uint32_t binding = var->data.binding; - struct tu_descriptor_set_layout *set_layout = layout->set[set].layout; - struct tu_descriptor_set_binding_layout *binding_layout = - &set_layout->binding[binding]; + unsigned set = var->data.descriptor_set; + unsigned binding = var->data.binding; + const struct tu_descriptor_set_binding_layout *bind_layout = + &layout->set[set].layout->binding[binding]; + + nir_ssa_def *desc_offset; + unsigned descriptor_stride; + if (bind_layout->type == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) { + unsigned offset = + layout->set[set].input_attachment_start + + bind_layout->input_attachment_offset; + desc_offset = nir_imm_int(b, offset); + set = MAX_SETS; + descriptor_stride = 1; + } else { + unsigned offset = 0; + /* Samplers come second in combined image/sampler descriptors, see + * write_combined_image_sampler_descriptor(). + */ + if (is_sampler && bind_layout->type == + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { + offset = 1; + } + desc_offset = + nir_imm_int(b, (bind_layout->offset / (4 * A6XX_TEX_CONST_DWORDS)) + + offset); + descriptor_stride = bind_layout->size / (4 * A6XX_TEX_CONST_DWORDS); + } - nir_ssa_def *index = nir_imm_int(b, - map_add(&shader->image_map, - set, binding, var->data.index, - binding_layout->array_size)); if (deref->deref_type != nir_deref_type_var) { assert(deref->deref_type == nir_deref_type_array); - index = nir_iadd(b, index, nir_ssa_for_src(b, deref->arr.index, 1)); + + nir_ssa_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1); + desc_offset = nir_iadd(b, desc_offset, + nir_imul_imm(b, arr_index, descriptor_stride)); } - nir_rewrite_image_intrinsic(instr, index, false); + + nir_intrinsic_instr *bindless = + nir_intrinsic_instr_create(b->shader, + nir_intrinsic_bindless_resource_ir3); + bindless->num_components = 1; + nir_ssa_dest_init(&bindless->instr, &bindless->dest, + 1, 32, NULL); + nir_intrinsic_set_desc_set(bindless, set); + bindless->src[0] = nir_src_for_ssa(desc_offset); + nir_builder_instr_insert(b, &bindless->instr); + + return &bindless->dest.ssa; +} + +static void +lower_image_deref(nir_builder *b, + nir_intrinsic_instr *instr, struct tu_shader *shader, + const struct tu_pipeline_layout *layout) +{ + nir_deref_instr *deref = nir_src_as_deref(instr->src[0]); + nir_ssa_def *bindless = build_bindless(b, deref, false, shader, layout); + nir_rewrite_image_intrinsic(instr, bindless, true); } static bool @@ -326,9 +250,6 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, case nir_intrinsic_image_deref_atomic_comp_swap: case nir_intrinsic_image_deref_size: case nir_intrinsic_image_deref_samples: - case nir_intrinsic_image_deref_load_param_intel: - case nir_intrinsic_image_deref_load_raw_intel: - case nir_intrinsic_image_deref_store_raw_intel: lower_image_deref(b, instr, shader, layout); return true; @@ -337,6 +258,31 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, } } +static bool +lower_tex(nir_builder *b, nir_tex_instr *tex, + struct tu_shader *shader, const struct tu_pipeline_layout *layout) +{ + int sampler_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref); + if (sampler_src_idx >= 0) { + nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src); + nir_ssa_def *bindless = build_bindless(b, deref, true, shader, layout); + nir_instr_rewrite_src(&tex->instr, &tex->src[sampler_src_idx].src, + nir_src_for_ssa(bindless)); + tex->src[sampler_src_idx].src_type = nir_tex_src_sampler_handle; + } + + int tex_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref); + if (tex_src_idx >= 0) { + nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_src_idx].src); + nir_ssa_def *bindless = build_bindless(b, deref, false, shader, layout); + nir_instr_rewrite_src(&tex->instr, &tex->src[tex_src_idx].src, + nir_src_for_ssa(bindless)); + tex->src[tex_src_idx].src_type = nir_tex_src_texture_handle; + } + + return true; +} + static bool lower_impl(nir_function_impl *impl, struct tu_shader *shader, const struct tu_pipeline_layout *layout) @@ -350,7 +296,7 @@ lower_impl(nir_function_impl *impl, struct tu_shader *shader, b.cursor = nir_before_instr(instr); switch (instr->type) { case nir_instr_type_tex: - progress |= lower_sampler(&b, nir_instr_as_tex(instr), shader, layout); + progress |= lower_tex(&b, nir_instr_as_tex(instr), shader, layout); break; case nir_instr_type_intrinsic: progress |= lower_intrinsic(&b, nir_instr_as_intrinsic(instr), shader, layout); @@ -364,26 +310,147 @@ lower_impl(nir_function_impl *impl, struct tu_shader *shader, return progress; } + +/* Figure out the range of push constants that we're actually going to push to + * the shader, and tell the backend to reserve this range when pushing UBO + * constants. + */ + +static void +gather_push_constants(nir_shader *shader, struct tu_shader *tu_shader) +{ + uint32_t min = UINT32_MAX, max = 0; + nir_foreach_function(function, shader) { + if (!function->impl) + continue; + + nir_foreach_block(block, function->impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic != nir_intrinsic_load_push_constant) + continue; + + uint32_t base = nir_intrinsic_base(intrin); + uint32_t range = nir_intrinsic_range(intrin); + min = MIN2(min, base); + max = MAX2(max, base + range); + break; + } + } + } + + if (min >= max) { + tu_shader->push_consts.lo = 0; + tu_shader->push_consts.count = 0; + tu_shader->ir3_shader.const_state.num_reserved_user_consts = 0; + return; + } + + /* CP_LOAD_STATE OFFSET and NUM_UNIT are in units of vec4 (4 dwords), + * however there's an alignment requirement of 4 on OFFSET. Expand the + * range and change units accordingly. + */ + tu_shader->push_consts.lo = (min / 16) / 4 * 4; + tu_shader->push_consts.count = + align(max, 16) / 16 - tu_shader->push_consts.lo; + tu_shader->ir3_shader.const_state.num_reserved_user_consts = + align(tu_shader->push_consts.count, 4); +} + +/* Gather the InputAttachmentIndex for each input attachment from the NIR + * shader and organize the info in a way so that draw-time patching is easy. + */ +static void +gather_input_attachments(nir_shader *shader, struct tu_shader *tu_shader, + const struct tu_pipeline_layout *layout) +{ + nir_foreach_variable(var, &shader->uniforms) { + const struct glsl_type *glsl_type = glsl_without_array(var->type); + + if (!glsl_type_is_image(glsl_type)) + continue; + + enum glsl_sampler_dim dim = glsl_get_sampler_dim(glsl_type); + + const uint32_t set = var->data.descriptor_set; + const uint32_t binding = var->data.binding; + const struct tu_descriptor_set_binding_layout *bind_layout = + &layout->set[set].layout->binding[binding]; + const uint32_t array_size = bind_layout->array_size; + + if (dim == GLSL_SAMPLER_DIM_SUBPASS || + dim == GLSL_SAMPLER_DIM_SUBPASS_MS) { + unsigned offset = + layout->set[set].input_attachment_start + + bind_layout->input_attachment_offset; + for (unsigned i = 0; i < array_size; i++) + tu_shader->attachment_idx[offset + i] = var->data.index + i; + } + } +} + static bool tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader, const struct tu_pipeline_layout *layout) { bool progress = false; + gather_push_constants(shader, tu_shader); + gather_input_attachments(shader, tu_shader, layout); + nir_foreach_function(function, shader) { if (function->impl) progress |= lower_impl(function->impl, tu_shader, layout); } - /* spirv_to_nir produces num_ssbos equal to the number of SSBO-containing - * variables, while ir3 wants the number of descriptors (like the gallium - * path). - */ - shader->info.num_ssbos = tu_shader->ssbo_map.num_desc; - return progress; } +static void +tu_gather_xfb_info(nir_shader *nir, struct tu_shader *shader) +{ + struct ir3_stream_output_info *info = &shader->ir3_shader.stream_output; + nir_xfb_info *xfb = nir_gather_xfb_info(nir, NULL); + + if (!xfb) + return; + + /* creating a map from VARYING_SLOT_* enums to consecutive index */ + uint8_t num_outputs = 0; + uint64_t outputs_written = 0; + for (int i = 0; i < xfb->output_count; i++) + outputs_written |= BITFIELD64_BIT(xfb->outputs[i].location); + + uint8_t output_map[VARYING_SLOT_TESS_MAX]; + memset(output_map, 0, sizeof(output_map)); + + for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) { + if (outputs_written & BITFIELD64_BIT(attr)) + output_map[attr] = num_outputs++; + } + + assert(xfb->output_count < IR3_MAX_SO_OUTPUTS); + info->num_outputs = xfb->output_count; + + for (int i = 0; i < IR3_MAX_SO_BUFFERS; i++) + info->stride[i] = xfb->buffers[i].stride / 4; + + for (int i = 0; i < xfb->output_count; i++) { + info->output[i].register_index = output_map[xfb->outputs[i].location]; + info->output[i].start_component = xfb->outputs[i].component_offset; + info->output[i].num_components = + util_bitcount(xfb->outputs[i].component_mask); + info->output[i].output_buffer = xfb->outputs[i].buffer; + info->output[i].dst_offset = xfb->outputs[i].offset / 4; + info->output[i].stream = xfb->buffer_to_stream[xfb->outputs[i].buffer]; + } + + ralloc_free(xfb); +} + struct tu_shader * tu_shader_create(struct tu_device *dev, gl_shader_stage stage, @@ -439,6 +506,16 @@ tu_shader_create(struct tu_device *dev, NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_in | nir_var_shader_out | nir_var_system_value | nir_var_mem_shared); + /* Gather information for transform feedback. + * This should be called after nir_split_per_member_structs. + * Also needs to be called after nir_remove_dead_variables with varyings, + * so that we could align stream outputs correctly. + */ + if (nir->info.stage == MESA_SHADER_VERTEX || + nir->info.stage == MESA_SHADER_TESS_EVAL || + nir->info.stage == MESA_SHADER_GEOMETRY) + tu_gather_xfb_info(nir, shader); + NIR_PASS_V(nir, nir_propagate_invariant); NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, true); @@ -518,9 +595,21 @@ tu_shader_compile_options_init( struct tu_shader_compile_options *options, const VkGraphicsPipelineCreateInfo *pipeline_info) { - *options = (struct tu_shader_compile_options) { - /* TODO ir3_key */ + bool has_gs = false; + if (pipeline_info) { + for (uint32_t i = 0; i < pipeline_info->stageCount; i++) { + if (pipeline_info->pStages[i].stage == VK_SHADER_STAGE_GEOMETRY_BIT) { + has_gs = true; + break; + } + } + } + *options = (struct tu_shader_compile_options) { + /* TODO: Populate the remaining fields of ir3_shader_key. */ + .key = { + .has_gs = has_gs, + }, /* TODO: VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT * some optimizations need to happen otherwise shader might not compile */ @@ -574,6 +663,14 @@ tu_shader_compile(struct tu_device *dev, if (!shader->binary) return VK_ERROR_OUT_OF_HOST_MEMORY; + if (shader_debug_enabled(shader->ir3_shader.type)) { + fprintf(stdout, "Native code for unnamed %s shader %s:\n", + ir3_shader_stage(&shader->variants[0]), shader->ir3_shader.nir->info.name); + if (shader->ir3_shader.type == MESA_SHADER_FRAGMENT) + fprintf(stdout, "SIMD0\n"); + ir3_shader_disasm(&shader->variants[0], shader->binary, stdout); + } + /* compile another variant for the binning pass */ if (options->include_binning_pass && shader->ir3_shader.type == MESA_SHADER_VERTEX) { @@ -584,6 +681,12 @@ tu_shader_compile(struct tu_device *dev, return VK_ERROR_OUT_OF_HOST_MEMORY; shader->has_binning_pass = true; + + if (shader_debug_enabled(MESA_SHADER_VERTEX)) { + fprintf(stdout, "Native code for unnamed binning shader %s:\n", + shader->ir3_shader.nir->info.name); + ir3_shader_disasm(&shader->variants[1], shader->binary, stdout); + } } if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_IR3)) {