From c2581a9375ea956abf4b9b75d27ec199fabb99d0 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Kristian=20H=C3=B8gsberg=20Kristensen?= Date: Wed, 3 Feb 2016 12:14:28 -0800 Subject: [PATCH] anv: Build the real pipeline layout in the pipeline This gives us the chance to pack the binding table down to just what the shaders actually need. Some applications use very large descriptor sets and only ever use a handful of entries. Compacted binding tables should be much more efficient in this case. It comes at the down-side of having to re-emit binding tables every time we switch pipelines, but that's considered an acceptable cost. --- src/vulkan/anv_cmd_buffer.c | 47 ++-- src/vulkan/anv_nir.h | 6 +- src/vulkan/anv_nir_apply_dynamic_offsets.c | 2 +- src/vulkan/anv_nir_apply_pipeline_layout.c | 242 ++++++++++++++------- src/vulkan/anv_pipeline.c | 11 +- src/vulkan/anv_private.h | 11 + 6 files changed, 201 insertions(+), 118 deletions(-) diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index bc6b3925cd2..b060828cf61 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -433,6 +433,7 @@ void anv_CmdBindPipeline( cmd_buffer->state.compute_pipeline = pipeline; cmd_buffer->state.compute_dirty |= ANV_CMD_DIRTY_PIPELINE; cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; break; case VK_PIPELINE_BIND_POINT_GRAPHICS: @@ -440,6 +441,7 @@ void anv_CmdBindPipeline( cmd_buffer->state.vb_dirty |= pipeline->vb_used; cmd_buffer->state.dirty |= ANV_CMD_DIRTY_PIPELINE; cmd_buffer->state.push_constants_dirty |= pipeline->active_stages; + cmd_buffer->state.descriptors_dirty |= pipeline->active_stages; /* Apply the dynamic state from the pipeline */ cmd_buffer->state.dirty |= pipeline->dynamic_state_mask; @@ -702,39 +704,34 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, { struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; struct anv_subpass *subpass = cmd_buffer->state.subpass; - struct anv_pipeline_layout *layout; + struct anv_pipeline_bind_map *map; uint32_t color_count, bias, state_offset; switch (stage) { case MESA_SHADER_FRAGMENT: - layout = cmd_buffer->state.pipeline->layout; + map = &cmd_buffer->state.pipeline->bindings[stage]; bias = MAX_RTS; color_count = subpass->color_count; break; case MESA_SHADER_COMPUTE: - layout = cmd_buffer->state.compute_pipeline->layout; + map = &cmd_buffer->state.compute_pipeline->bindings[stage]; bias = 1; color_count = 0; break; default: - layout = cmd_buffer->state.pipeline->layout; + map = &cmd_buffer->state.pipeline->bindings[stage]; bias = 0; color_count = 0; break; } - /* This is a little awkward: layout can be NULL but we still have to - * allocate and set a binding table for the PS stage for render - * targets. */ - uint32_t surface_count = layout ? layout->stage[stage].surface_count : 0; - - if (color_count + surface_count == 0) { + if (color_count + map->surface_count == 0) { *bt_state = (struct anv_state) { 0, }; return VK_SUCCESS; } *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, - bias + surface_count, + bias + map->surface_count, &state_offset); uint32_t *bt_map = bt_state->map; @@ -769,10 +766,10 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset); } - if (layout == NULL) + if (map->surface_count == 0) goto out; - if (layout->stage[stage].image_count > 0) { + if (map->image_count > 0) { VkResult result = anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, images); if (result != VK_SUCCESS) @@ -782,9 +779,8 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, } uint32_t image = 0; - for (uint32_t s = 0; s < layout->stage[stage].surface_count; s++) { - struct anv_pipeline_binding *binding = - &layout->stage[stage].surface_to_descriptor[s]; + for (uint32_t s = 0; s < map->surface_count; s++) { + struct anv_pipeline_binding *binding = &map->surface_to_descriptor[s]; struct anv_descriptor_set *set = cmd_buffer->state.descriptors[binding->set]; struct anv_descriptor *desc = &set->descriptors[binding->offset]; @@ -855,7 +851,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, bt_map[bias + s] = surface_state.offset + state_offset; add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset); } - assert(image == layout->stage[stage].image_count); + assert(image == map->image_count); out: if (!cmd_buffer->device->info.has_llc) @@ -868,29 +864,26 @@ VkResult anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, gl_shader_stage stage, struct anv_state *state) { - struct anv_pipeline_layout *layout; - uint32_t sampler_count; + struct anv_pipeline_bind_map *map; if (stage == MESA_SHADER_COMPUTE) - layout = cmd_buffer->state.compute_pipeline->layout; + map = &cmd_buffer->state.compute_pipeline->bindings[stage]; else - layout = cmd_buffer->state.pipeline->layout; + map = &cmd_buffer->state.pipeline->bindings[stage]; - sampler_count = layout ? layout->stage[stage].sampler_count : 0; - if (sampler_count == 0) { + if (map->sampler_count == 0) { *state = (struct anv_state) { 0, }; return VK_SUCCESS; } - uint32_t size = sampler_count * 16; + uint32_t size = map->sampler_count * 16; *state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 32); if (state->map == NULL) return VK_ERROR_OUT_OF_DEVICE_MEMORY; - for (uint32_t s = 0; s < layout->stage[stage].sampler_count; s++) { - struct anv_pipeline_binding *binding = - &layout->stage[stage].sampler_to_descriptor[s]; + for (uint32_t s = 0; s < map->sampler_count; s++) { + struct anv_pipeline_binding *binding = &map->sampler_to_descriptor[s]; struct anv_descriptor_set *set = cmd_buffer->state.descriptors[binding->set]; struct anv_descriptor *desc = &set->descriptors[binding->offset]; diff --git a/src/vulkan/anv_nir.h b/src/vulkan/anv_nir.h index c76314d9df6..a7ea3eb0e28 100644 --- a/src/vulkan/anv_nir.h +++ b/src/vulkan/anv_nir.h @@ -35,9 +35,9 @@ void anv_nir_lower_push_constants(nir_shader *shader, bool is_scalar); void anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline, nir_shader *shader, struct brw_stage_prog_data *prog_data); -bool anv_nir_apply_pipeline_layout(nir_shader *shader, - struct brw_stage_prog_data *prog_data, - const struct anv_pipeline_layout *layout); +void anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, + nir_shader *shader, + struct brw_stage_prog_data *prog_data); #ifdef __cplusplus } diff --git a/src/vulkan/anv_nir_apply_dynamic_offsets.c b/src/vulkan/anv_nir_apply_dynamic_offsets.c index a5e3238a36a..e71a8ffb1f4 100644 --- a/src/vulkan/anv_nir_apply_dynamic_offsets.c +++ b/src/vulkan/anv_nir_apply_dynamic_offsets.c @@ -28,7 +28,7 @@ struct apply_dynamic_offsets_state { nir_shader *shader; nir_builder builder; - struct anv_pipeline_layout *layout; + const struct anv_pipeline_layout *layout; uint32_t indices_start; }; diff --git a/src/vulkan/anv_nir_apply_pipeline_layout.c b/src/vulkan/anv_nir_apply_pipeline_layout.c index ee93e40e76c..c58a93878ee 100644 --- a/src/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/vulkan/anv_nir_apply_pipeline_layout.c @@ -29,78 +29,76 @@ struct apply_pipeline_layout_state { nir_shader *shader; nir_builder builder; - const struct anv_pipeline_layout *layout; - - bool progress; + struct { + BITSET_WORD *used; + uint8_t *surface_offsets; + uint8_t *sampler_offsets; + uint8_t *image_offsets; + } set[MAX_SETS]; }; -static uint32_t -get_surface_index(unsigned set, unsigned binding, - struct apply_pipeline_layout_state *state) +static void +add_binding(struct apply_pipeline_layout_state *state, + uint32_t set, uint32_t binding) { - assert(set < state->layout->num_sets); - struct anv_descriptor_set_layout *set_layout = - state->layout->set[set].layout; - - gl_shader_stage stage = state->shader->stage; - - assert(binding < set_layout->binding_count); - - assert(set_layout->binding[binding].stage[stage].surface_index >= 0); - - uint32_t surface_index = - state->layout->set[set].stage[stage].surface_start + - set_layout->binding[binding].stage[stage].surface_index; - - assert(surface_index < state->layout->stage[stage].surface_count); - - return surface_index; + BITSET_SET(state->set[set].used, binding); } -static uint32_t -get_sampler_index(unsigned set, unsigned binding, - struct apply_pipeline_layout_state *state) +static void +add_var_binding(struct apply_pipeline_layout_state *state, nir_variable *var) { - assert(set < state->layout->num_sets); - struct anv_descriptor_set_layout *set_layout = - state->layout->set[set].layout; - - gl_shader_stage stage = state->shader->stage; - - assert(binding < set_layout->binding_count); - - assert(set_layout->binding[binding].stage[stage].sampler_index >= 0); - - uint32_t sampler_index = - state->layout->set[set].stage[stage].sampler_start + - set_layout->binding[binding].stage[stage].sampler_index; - - assert(sampler_index < state->layout->stage[stage].sampler_count); - - return sampler_index; + add_binding(state, var->data.descriptor_set, var->data.binding); } -static uint32_t -get_image_index(unsigned set, unsigned binding, - struct apply_pipeline_layout_state *state) +static bool +get_used_bindings_block(nir_block *block, void *void_state) { - assert(set < state->layout->num_sets); - struct anv_descriptor_set_layout *set_layout = - state->layout->set[set].layout; - - assert(binding < set_layout->binding_count); - - gl_shader_stage stage = state->shader->stage; - - assert(set_layout->binding[binding].stage[stage].image_index >= 0); - - uint32_t image_index = - state->layout->set[set].stage[stage].image_start + - set_layout->binding[binding].stage[stage].image_index; + struct apply_pipeline_layout_state *state = void_state; - assert(image_index < state->layout->stage[stage].image_count); + nir_foreach_instr_safe(block, instr) { + switch (instr->type) { + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_vulkan_resource_index: + add_binding(state, nir_intrinsic_desc_set(intrin), + nir_intrinsic_binding(intrin)); + break; + + case nir_intrinsic_image_load: + case nir_intrinsic_image_store: + case nir_intrinsic_image_atomic_add: + case nir_intrinsic_image_atomic_min: + case nir_intrinsic_image_atomic_max: + case nir_intrinsic_image_atomic_and: + case nir_intrinsic_image_atomic_or: + case nir_intrinsic_image_atomic_xor: + case nir_intrinsic_image_atomic_exchange: + case nir_intrinsic_image_atomic_comp_swap: + case nir_intrinsic_image_size: + case nir_intrinsic_image_samples: + add_var_binding(state, intrin->variables[0]->var); + break; + + default: + break; + } + break; + } + case nir_instr_type_tex: { + nir_tex_instr *tex = nir_instr_as_tex(instr); + assert(tex->texture); + add_var_binding(state, tex->texture->var); + if (tex->sampler) + add_var_binding(state, tex->sampler->var); + break; + } + default: + continue; + } + } - return image_index; + return true; } static void @@ -114,7 +112,7 @@ lower_res_index_intrinsic(nir_intrinsic_instr *intrin, uint32_t set = nir_intrinsic_desc_set(intrin); uint32_t binding = nir_intrinsic_binding(intrin); - uint32_t surface_index = get_surface_index(set, binding, state); + uint32_t surface_index = state->set[set].surface_offsets[binding]; nir_const_value *const_block_idx = nir_src_as_const_value(intrin->src[0]); @@ -187,16 +185,16 @@ lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state) /* No one should have come by and lowered it already */ assert(tex->texture); - tex->texture_index = - get_surface_index(tex->texture->var->data.descriptor_set, - tex->texture->var->data.binding, state); + unsigned set = tex->texture->var->data.descriptor_set; + unsigned binding = tex->texture->var->data.binding; + tex->texture_index = state->set[set].surface_offsets[binding]; lower_tex_deref(tex, tex->texture, &tex->texture_index, nir_tex_src_texture_offset, state); if (tex->sampler) { - tex->sampler_index = - get_sampler_index(tex->sampler->var->data.descriptor_set, - tex->sampler->var->data.binding, state); + unsigned set = tex->sampler->var->data.descriptor_set; + unsigned binding = tex->sampler->var->data.binding; + tex->sampler_index = state->set[set].surface_offsets[binding]; lower_tex_deref(tex, tex->sampler, &tex->sampler_index, nir_tex_src_sampler_offset, state); } @@ -224,14 +222,11 @@ apply_pipeline_layout_block(nir_block *block, void *void_state) nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); if (intrin->intrinsic == nir_intrinsic_vulkan_resource_index) { lower_res_index_intrinsic(intrin, state); - state->progress = true; } break; } case nir_instr_type_tex: lower_tex(nir_instr_as_tex(instr), state); - /* All texture instructions need lowering */ - state->progress = true; break; default: continue; @@ -255,16 +250,97 @@ setup_vec4_uniform_value(const union gl_constant_value **params, params[i] = &zero; } -bool -anv_nir_apply_pipeline_layout(nir_shader *shader, - struct brw_stage_prog_data *prog_data, - const struct anv_pipeline_layout *layout) +void +anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, + nir_shader *shader, + struct brw_stage_prog_data *prog_data) { + struct anv_pipeline_layout *layout = pipeline->layout; + struct apply_pipeline_layout_state state = { .shader = shader, - .layout = layout, }; + void *mem_ctx = ralloc_context(NULL); + + for (unsigned s = 0; s < layout->num_sets; s++) { + const unsigned count = layout->set[s].layout->binding_count; + const unsigned words = BITSET_WORDS(count); + state.set[s].used = rzalloc_array(mem_ctx, BITSET_WORD, words); + state.set[s].surface_offsets = rzalloc_array(mem_ctx, uint8_t, count); + state.set[s].sampler_offsets = rzalloc_array(mem_ctx, uint8_t, count); + state.set[s].image_offsets = rzalloc_array(mem_ctx, uint8_t, count); + } + + nir_foreach_function(shader, function) { + if (function->impl) + nir_foreach_block(function->impl, get_used_bindings_block, &state); + } + + struct anv_pipeline_bind_map map = { + .surface_count = 0, + .sampler_count = 0, + }; + + for (uint32_t set = 0; set < layout->num_sets; set++) { + struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; + + BITSET_WORD b, _tmp; + BITSET_FOREACH_SET(b, _tmp, state.set[set].used, + set_layout->binding_count) { + if (set_layout->binding[b].stage[shader->stage].surface_index >= 0) + map.surface_count += set_layout->binding[b].array_size; + if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0) + map.sampler_count += set_layout->binding[b].array_size; + if (set_layout->binding[b].stage[shader->stage].image_index >= 0) + map.image_count += set_layout->binding[b].array_size; + } + } + + map.surface_to_descriptor = + malloc(map.surface_count * sizeof(struct anv_pipeline_binding)); + map.sampler_to_descriptor = + malloc(map.sampler_count * sizeof(struct anv_pipeline_binding)); + + pipeline->bindings[shader->stage] = map; + + unsigned surface = 0; + unsigned sampler = 0; + unsigned image = 0; + for (uint32_t set = 0; set < layout->num_sets; set++) { + struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; + + BITSET_WORD b, _tmp; + BITSET_FOREACH_SET(b, _tmp, state.set[set].used, + set_layout->binding_count) { + unsigned array_size = set_layout->binding[b].array_size; + unsigned set_offset = set_layout->binding[b].descriptor_index; + + if (set_layout->binding[b].stage[shader->stage].surface_index >= 0) { + state.set[set].surface_offsets[b] = surface; + for (unsigned i = 0; i < array_size; i++) { + map.surface_to_descriptor[surface + i].set = set; + map.surface_to_descriptor[surface + i].offset = set_offset + i; + } + surface += array_size; + } + + if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0) { + state.set[set].sampler_offsets[b] = sampler; + for (unsigned i = 0; i < array_size; i++) { + map.sampler_to_descriptor[sampler + i].set = set; + map.sampler_to_descriptor[sampler + i].offset = set_offset + i; + } + sampler += array_size; + } + + if (set_layout->binding[b].stage[shader->stage].image_index >= 0) { + state.set[set].image_offsets[b] = image; + image += array_size; + } + } + } + nir_foreach_function(shader, function) { if (function->impl) { nir_builder_init(&state.builder, function->impl); @@ -274,7 +350,7 @@ anv_nir_apply_pipeline_layout(nir_shader *shader, } } - if (layout->stage[shader->stage].image_count > 0) { + if (map.image_count > 0) { nir_foreach_variable(var, &shader->uniforms) { if (glsl_type_is_image(var->type) || (glsl_type_is_array(var->type) && @@ -283,8 +359,9 @@ anv_nir_apply_pipeline_layout(nir_shader *shader, * information required for reading/writing to/from the image is * storred in the uniform. */ - unsigned image_index = get_image_index(var->data.descriptor_set, - var->data.binding, &state); + unsigned set = var->data.descriptor_set; + unsigned binding = var->data.binding; + unsigned image_index = state.set[set].image_offsets[binding]; var->data.driver_location = shader->num_uniforms + image_index * BRW_IMAGE_PARAM_SIZE * 4; @@ -294,7 +371,7 @@ anv_nir_apply_pipeline_layout(nir_shader *shader, struct anv_push_constants *null_data = NULL; const gl_constant_value **param = prog_data->param + shader->num_uniforms; const struct brw_image_param *image_param = null_data->images; - for (uint32_t i = 0; i < layout->stage[shader->stage].image_count; i++) { + for (uint32_t i = 0; i < map.image_count; i++) { setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET, (const union gl_constant_value *)&image_param->surface_idx, 1); setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET, @@ -312,9 +389,6 @@ anv_nir_apply_pipeline_layout(nir_shader *shader, image_param ++; } - shader->num_uniforms += layout->stage[shader->stage].image_count * - BRW_IMAGE_PARAM_SIZE * 4; + shader->num_uniforms += map.image_count * BRW_IMAGE_PARAM_SIZE * 4; } - - return state.progress; } diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 4be2bfc625b..a7feefb540e 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -336,8 +336,8 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets) prog_data->nr_params += MAX_DYNAMIC_BUFFERS * 2; - if (pipeline->layout && pipeline->layout->stage[stage].image_count > 0) - prog_data->nr_params += pipeline->layout->stage[stage].image_count * + if (pipeline->bindings[stage].image_count > 0) + prog_data->nr_params += pipeline->bindings[stage].image_count * BRW_IMAGE_PARAM_SIZE; if (prog_data->nr_params > 0) { @@ -362,9 +362,13 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, /* Set up dynamic offsets */ anv_nir_apply_dynamic_offsets(pipeline, nir, prog_data); + char surface_usage_mask[256], sampler_usage_mask[256]; + zero(surface_usage_mask); + zero(sampler_usage_mask); + /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ if (pipeline->layout) - anv_nir_apply_pipeline_layout(nir, prog_data, pipeline->layout); + anv_nir_apply_pipeline_layout(pipeline, nir, prog_data); /* All binding table offsets provided by apply_pipeline_layout() are * relative to the start of the bindint table (plus MAX_RTS for VS). @@ -1059,6 +1063,7 @@ anv_pipeline_init(struct anv_pipeline *pipeline, */ memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); + memset(pipeline->bindings, 0, sizeof(pipeline->bindings)); pipeline->vs_simd8 = NO_KERNEL; pipeline->vs_vec4 = NO_KERNEL; diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 951a571944d..29343397b6c 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -1378,6 +1378,15 @@ mesa_to_vk_shader_stage(gl_shader_stage mesa_stage) stage = __builtin_ffs(__tmp) - 1, __tmp; \ __tmp &= ~(1 << (stage))) +struct anv_pipeline_bind_map { + uint32_t surface_count; + uint32_t sampler_count; + uint32_t image_count; + + struct anv_pipeline_binding * surface_to_descriptor; + struct anv_pipeline_binding * sampler_to_descriptor; +}; + struct anv_pipeline { struct anv_device * device; struct anv_batch batch; @@ -1387,6 +1396,8 @@ struct anv_pipeline { struct anv_dynamic_state dynamic_state; struct anv_pipeline_layout * layout; + struct anv_pipeline_bind_map bindings[MESA_SHADER_STAGES]; + bool use_repclear; struct brw_vs_prog_data vs_prog_data; -- 2.30.2