From: Jason Ekstrand Date: Thu, 22 Nov 2018 00:26:27 +0000 (-0600) Subject: anv: Put image params in the descriptor set buffer on gen8 and earlier X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=3b755b52e80acc7705775ccaf5f31cf213edb207;p=mesa.git anv: Put image params in the descriptor set buffer on gen8 and earlier This is really where they belong; not push constants. The one downside here is that we can't push them anymore for compute shaders. However, that's a general problem and we should figure out how to push descriptor sets for compute shaders. This lets us bump MAX_IMAGES to 64 on BDW and earlier platforms because we no longer have to worry about push constant overhead limits. Reviewed-by: Lionel Landwerlin Reviewed-by: Caio Marcelo de Oliveira Filho --- diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c index a648105970d..fa1d042f792 100644 --- a/src/intel/vulkan/anv_descriptor_set.c +++ b/src/intel/vulkan/anv_descriptor_set.c @@ -90,7 +90,12 @@ anv_descriptor_data_for_type(const struct anv_physical_device *device, static unsigned anv_descriptor_data_size(enum anv_descriptor_data data) { - return 0; + unsigned size = 0; + + if (data & ANV_DESCRIPTOR_IMAGE_PARAM) + size += BRW_IMAGE_PARAM_SIZE * 4; + + return size; } /** Returns the size in bytes of each descriptor with the given layout */ @@ -902,6 +907,24 @@ VkResult anv_FreeDescriptorSets( return VK_SUCCESS; } +static void +anv_descriptor_set_write_image_param(uint32_t *param_desc_map, + const struct brw_image_param *param) +{ +#define WRITE_PARAM_FIELD(field, FIELD) \ + for (unsigned i = 0; i < ARRAY_SIZE(param->field); i++) \ + param_desc_map[BRW_IMAGE_PARAM_##FIELD##_OFFSET + i] = param->field[i] + + WRITE_PARAM_FIELD(offset, OFFSET); + WRITE_PARAM_FIELD(size, SIZE); + WRITE_PARAM_FIELD(stride, STRIDE); + WRITE_PARAM_FIELD(tiling, TILING); + WRITE_PARAM_FIELD(swizzling, SWIZZLING); + WRITE_PARAM_FIELD(size, SIZE); + +#undef WRITE_PARAM_FIELD +} + void anv_descriptor_set_write_image_view(struct anv_device *device, struct anv_descriptor_set *set, @@ -952,6 +975,18 @@ anv_descriptor_set_write_image_view(struct anv_device *device, .image_view = image_view, .sampler = sampler, }; + + void *desc_map = set->desc_mem.map + bind_layout->descriptor_offset + + element * anv_descriptor_size(bind_layout); + + if (bind_layout->data & ANV_DESCRIPTOR_IMAGE_PARAM) { + /* Storage images can only ever have one plane */ + assert(image_view->n_planes == 1); + const struct brw_image_param *image_param = + &image_view->planes[0].storage_image_param; + + anv_descriptor_set_write_image_param(desc_map, image_param); + } } void @@ -973,6 +1008,14 @@ anv_descriptor_set_write_buffer_view(struct anv_device *device, .type = type, .buffer_view = buffer_view, }; + + void *desc_map = set->desc_mem.map + bind_layout->descriptor_offset + + element * anv_descriptor_size(bind_layout); + + if (bind_layout->data & ANV_DESCRIPTOR_IMAGE_PARAM) { + anv_descriptor_set_write_image_param(desc_map, + &buffer_view->storage_image_param); + } } void diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 2af09d4f122..bc51c2c1aab 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -1096,8 +1096,6 @@ void anv_GetPhysicalDeviceProperties( const uint32_t max_samplers = (devinfo->gen >= 8 || devinfo->is_haswell) ? 128 : 16; - const uint32_t max_images = devinfo->gen < 9 ? MAX_GEN8_IMAGES : MAX_IMAGES; - VkSampleCountFlags sample_counts = isl_device_get_sample_counts(&pdevice->isl_dev); @@ -1121,7 +1119,7 @@ void anv_GetPhysicalDeviceProperties( .maxPerStageDescriptorUniformBuffers = 64, .maxPerStageDescriptorStorageBuffers = 64, .maxPerStageDescriptorSampledImages = max_samplers, - .maxPerStageDescriptorStorageImages = max_images, + .maxPerStageDescriptorStorageImages = MAX_IMAGES, .maxPerStageDescriptorInputAttachments = 64, .maxPerStageResources = 250, .maxDescriptorSetSamplers = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSamplers */ @@ -1130,7 +1128,7 @@ void anv_GetPhysicalDeviceProperties( .maxDescriptorSetStorageBuffers = 6 * 64, /* number of stages * maxPerStageDescriptorStorageBuffers */ .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2, .maxDescriptorSetSampledImages = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSampledImages */ - .maxDescriptorSetStorageImages = 6 * max_images, /* number of stages * maxPerStageDescriptorStorageImages */ + .maxDescriptorSetStorageImages = 6 * MAX_IMAGES, /* number of stages * maxPerStageDescriptorStorageImages */ .maxDescriptorSetInputAttachments = 256, .maxVertexInputAttributes = MAX_VBS, .maxVertexInputBindings = MAX_VBS, diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c index 29d43ca2795..3448e753842 100644 --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -35,8 +35,6 @@ struct apply_pipeline_layout_state { struct anv_pipeline_layout *layout; bool add_bounds_checks; - unsigned first_image_uniform; - bool uses_constants; uint8_t constants_offset; struct { @@ -46,7 +44,6 @@ struct apply_pipeline_layout_state { BITSET_WORD *used; uint8_t *surface_offsets; uint8_t *sampler_offsets; - uint8_t *image_offsets; } set[MAX_SETS]; }; @@ -239,11 +236,11 @@ lower_get_buffer_size(nir_intrinsic_instr *intrin, nir_src_for_ssa(nir_channel(b, index, 0))); } -static void -lower_image_intrinsic(nir_intrinsic_instr *intrin, +static nir_ssa_def * +build_descriptor_load(nir_deref_instr *deref, unsigned offset, + unsigned num_components, unsigned bit_size, struct apply_pipeline_layout_state *state) { - nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); nir_variable *var = nir_deref_instr_get_variable(deref); unsigned set = var->data.descriptor_set; @@ -251,47 +248,80 @@ lower_image_intrinsic(nir_intrinsic_instr *intrin, unsigned array_size = state->layout->set[set].layout->binding[binding].array_size; + const struct anv_descriptor_set_binding_layout *bind_layout = + &state->layout->set[set].layout->binding[binding]; + nir_builder *b = &state->builder; - b->cursor = nir_before_instr(&intrin->instr); - nir_ssa_def *index = NULL; + nir_ssa_def *desc_buffer_index = + nir_imm_int(b, state->set[set].desc_offset); + + nir_ssa_def *desc_offset = + nir_imm_int(b, bind_layout->descriptor_offset + offset); if (deref->deref_type != nir_deref_type_var) { assert(deref->deref_type == nir_deref_type_array); - index = nir_ssa_for_src(b, deref->arr.index, 1); + + const unsigned descriptor_size = anv_descriptor_size(bind_layout); + nir_ssa_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1); if (state->add_bounds_checks) - index = nir_umin(b, index, nir_imm_int(b, array_size - 1)); - } else { - index = nir_imm_int(b, 0); + arr_index = nir_umin(b, arr_index, nir_imm_int(b, array_size - 1)); + + desc_offset = nir_iadd(b, desc_offset, + nir_imul_imm(b, arr_index, descriptor_size)); } + nir_intrinsic_instr *desc_load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo); + desc_load->src[0] = nir_src_for_ssa(desc_buffer_index); + desc_load->src[1] = nir_src_for_ssa(desc_offset); + desc_load->num_components = num_components; + nir_ssa_dest_init(&desc_load->instr, &desc_load->dest, + num_components, bit_size, NULL); + nir_builder_instr_insert(b, &desc_load->instr); + + return &desc_load->dest.ssa; +} + +static void +lower_image_intrinsic(nir_intrinsic_instr *intrin, + struct apply_pipeline_layout_state *state) +{ + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + + nir_builder *b = &state->builder; + b->cursor = nir_before_instr(&intrin->instr); + if (intrin->intrinsic == nir_intrinsic_image_deref_load_param_intel) { b->cursor = nir_instr_remove(&intrin->instr); - nir_intrinsic_instr *load = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform); + const unsigned param = nir_intrinsic_base(intrin); - nir_intrinsic_set_base(load, state->first_image_uniform + - state->set[set].image_offsets[binding] * - BRW_IMAGE_PARAM_SIZE * 4); - nir_intrinsic_set_range(load, array_size * BRW_IMAGE_PARAM_SIZE * 4); + nir_ssa_def *desc = + build_descriptor_load(deref, param * 16, + intrin->dest.ssa.num_components, + intrin->dest.ssa.bit_size, state); - const unsigned param = nir_intrinsic_base(intrin); - nir_ssa_def *offset = - nir_imul(b, index, nir_imm_int(b, BRW_IMAGE_PARAM_SIZE * 4)); - offset = nir_iadd(b, offset, nir_imm_int(b, param * 16)); - load->src[0] = nir_src_for_ssa(offset); - - load->num_components = intrin->dest.ssa.num_components; - nir_ssa_dest_init(&load->instr, &load->dest, - intrin->dest.ssa.num_components, - intrin->dest.ssa.bit_size, NULL); - nir_builder_instr_insert(b, &load->instr); - - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, - nir_src_for_ssa(&load->dest.ssa)); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(desc)); } else { + nir_variable *var = nir_deref_instr_get_variable(deref); + + unsigned set = var->data.descriptor_set; + unsigned binding = var->data.binding; unsigned binding_offset = state->set[set].surface_offsets[binding]; - index = nir_iadd(b, index, nir_imm_int(b, binding_offset)); + unsigned array_size = + state->layout->set[set].layout->binding[binding].array_size; + + nir_ssa_def *index = NULL; + if (deref->deref_type != nir_deref_type_var) { + assert(deref->deref_type == nir_deref_type_array); + index = nir_ssa_for_src(b, deref->arr.index, 1); + if (state->add_bounds_checks) + index = nir_umin(b, index, nir_imm_int(b, array_size - 1)); + } else { + index = nir_imm_int(b, 0); + } + + index = nir_iadd_imm(b, index, binding_offset); nir_rewrite_image_intrinsic(intrin, index, false); } } @@ -475,16 +505,6 @@ apply_pipeline_layout_block(nir_block *block, } } -static void -setup_vec4_uniform_value(uint32_t *params, uint32_t offset, unsigned n) -{ - for (unsigned i = 0; i < n; ++i) - params[i] = ANV_PARAM_PUSH(offset + i * sizeof(uint32_t)); - - for (unsigned i = n; i < 4; ++i) - params[i] = BRW_PARAM_BUILTIN_ZERO; -} - void anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice, bool robust_buffer_access, @@ -508,7 +528,6 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice, state.set[s].used = rzalloc_array(mem_ctx, BITSET_WORD, words); state.set[s].surface_offsets = rzalloc_array(mem_ctx, uint8_t, count); state.set[s].sampler_offsets = rzalloc_array(mem_ctx, uint8_t, count); - state.set[s].image_offsets = rzalloc_array(mem_ctx, uint8_t, count); } nir_foreach_function(function, shader) { @@ -583,45 +602,9 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice, } } } - - if (binding->data & ANV_DESCRIPTOR_IMAGE_PARAM) { - state.set[set].image_offsets[b] = map->image_param_count; - map->image_param_count += binding->array_size; - } } } - if (map->image_param_count > 0) { - assert(map->image_param_count <= MAX_GEN8_IMAGES); - assert(shader->num_uniforms == prog_data->nr_params * 4); - state.first_image_uniform = shader->num_uniforms; - uint32_t *param = brw_stage_prog_data_add_params(prog_data, - map->image_param_count * - BRW_IMAGE_PARAM_SIZE); - struct anv_push_constants *null_data = NULL; - const struct brw_image_param *image_param = null_data->images; - for (uint32_t i = 0; i < map->image_param_count; i++) { - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET, - (uintptr_t)image_param->offset, 2); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET, - (uintptr_t)image_param->size, 3); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET, - (uintptr_t)image_param->stride, 4); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET, - (uintptr_t)image_param->tiling, 3); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, - (uintptr_t)image_param->swizzling, 2); - - param += BRW_IMAGE_PARAM_SIZE; - image_param ++; - } - assert(param == prog_data->param + prog_data->nr_params); - - shader->num_uniforms += map->image_param_count * - BRW_IMAGE_PARAM_SIZE * 4; - assert(shader->num_uniforms == prog_data->nr_params * 4); - } - nir_foreach_variable(var, &shader->uniforms) { const struct glsl_type *glsl_type = glsl_without_array(var->type); diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index bc7dd100e2b..7b9b1e83678 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -154,7 +154,6 @@ anv_shader_bin_write_to_blob(const struct anv_shader_bin *shader, blob_write_uint32(blob, shader->bind_map.surface_count); blob_write_uint32(blob, shader->bind_map.sampler_count); - blob_write_uint32(blob, shader->bind_map.image_param_count); blob_write_bytes(blob, shader->bind_map.surface_to_descriptor, shader->bind_map.surface_count * sizeof(*shader->bind_map.surface_to_descriptor)); @@ -194,7 +193,6 @@ anv_shader_bin_create_from_blob(struct anv_device *device, struct anv_pipeline_bind_map bind_map; bind_map.surface_count = blob_read_uint32(blob); bind_map.sampler_count = blob_read_uint32(blob); - bind_map.image_param_count = blob_read_uint32(blob); bind_map.surface_to_descriptor = (void *) blob_read_bytes(blob, bind_map.surface_count * sizeof(*bind_map.surface_to_descriptor)); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index bfeade7ce48..28854411b11 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -158,7 +158,6 @@ struct gen_l3_config; #define MAX_PUSH_CONSTANTS_SIZE 128 #define MAX_DYNAMIC_BUFFERS 16 #define MAX_IMAGES 64 -#define MAX_GEN8_IMAGES 8 #define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */ #define MAX_INLINE_UNIFORM_BLOCK_SIZE 4096 #define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32 @@ -2077,9 +2076,6 @@ struct anv_push_constants { /* Used for vkCmdDispatchBase */ uint32_t base_work_group_id[3]; - - /* Image data for image_load_store on pre-SKL */ - struct brw_image_param images[MAX_GEN8_IMAGES]; }; struct anv_dynamic_state { @@ -2587,7 +2583,6 @@ mesa_to_vk_shader_stage(gl_shader_stage mesa_stage) struct anv_pipeline_bind_map { uint32_t surface_count; uint32_t sampler_count; - uint32_t image_param_count; struct anv_pipeline_binding * surface_to_descriptor; struct anv_pipeline_binding * sampler_to_descriptor; diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index a6d061e7ca0..3189585cbd3 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -2010,7 +2010,6 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, gl_shader_stage stage, struct anv_state *bt_state) { - const struct gen_device_info *devinfo = &cmd_buffer->device->info; struct anv_subpass *subpass = cmd_buffer->state.subpass; struct anv_cmd_pipeline_state *pipe_state; struct anv_pipeline *pipeline; @@ -2051,17 +2050,6 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, const bool need_client_mem_relocs = !cmd_buffer->device->instance->physicalDevice.use_softpin; - /* We only use push constant space for images before gen9 */ - if (map->image_param_count > 0) { - VkResult result = - anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, images); - if (result != VK_SUCCESS) - return result; - - cmd_buffer->state.push_constants_dirty |= 1 << stage; - } - - uint32_t image = 0; for (uint32_t s = 0; s < map->surface_count; s++) { struct anv_pipeline_binding *binding = &map->surface_to_descriptor[s]; @@ -2201,18 +2189,6 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, assert(surface_state.alloc_size); if (need_client_mem_relocs) add_surface_state_relocs(cmd_buffer, sstate); - if (devinfo->gen < 9) { - /* We only need the image params on gen8 and earlier. No image - * workarounds that require tiling information are required on - * SKL and above. - */ - assert(image < MAX_GEN8_IMAGES); - struct brw_image_param *image_param = - &cmd_buffer->state.push_constants[stage]->images[image++]; - - *image_param = - desc->image_view->planes[binding->plane].storage_image_param; - } break; } @@ -2262,13 +2238,6 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, add_surface_reloc(cmd_buffer, surface_state, desc->buffer_view->address); } - if (devinfo->gen < 9) { - assert(image < MAX_GEN8_IMAGES); - struct brw_image_param *image_param = - &cmd_buffer->state.push_constants[stage]->images[image++]; - - *image_param = desc->buffer_view->storage_image_param; - } break; default: @@ -2278,7 +2247,6 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, bt_map[s] = surface_state.offset + state_offset; } - assert(image == map->image_param_count); #if GEN_GEN >= 11 /* The PIPE_CONTROL command description says: