From 612e35b2c65c99773b73e53d0e6fd112b1a7431f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 6 Nov 2015 13:32:52 -0800 Subject: [PATCH] anv: Do range-checking in the shader for dynamic buffers --- src/vulkan/anv_cmd_buffer.c | 31 +++-- src/vulkan/anv_device.c | 7 + src/vulkan/anv_nir_apply_dynamic_offsets.c | 144 ++++++++++++--------- src/vulkan/anv_pipeline.c | 2 +- src/vulkan/anv_private.h | 7 +- 5 files changed, 118 insertions(+), 73 deletions(-) diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c index be8a537f656..c0e28bdf047 100644 --- a/src/vulkan/anv_cmd_buffer.c +++ b/src/vulkan/anv_cmd_buffer.c @@ -487,19 +487,28 @@ void anv_CmdBindDescriptorSets( if (set_layout->dynamic_offset_count > 0) { VkShaderStage s; for_each_bit(s, set_layout->shader_stages) { - anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, s, - dynamic_offsets); - uint32_t *offsets = - cmd_buffer->state.push_constants[s]->dynamic_offsets + - layout->set[firstSet + i].dynamic_offset_start; - - typed_memcpy(offsets, pDynamicOffsets + dynamic_slot, - set_layout->dynamic_offset_count); - + anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, s, dynamic); + + struct anv_push_constants *push = + cmd_buffer->state.push_constants[s]; + + unsigned d = layout->set[firstSet + i].dynamic_offset_start; + const uint32_t *offsets = pDynamicOffsets + dynamic_slot; + struct anv_descriptor *desc = set->descriptors; + + for (unsigned b = 0; b < set_layout->binding_count; b++) { + if (set_layout->binding[b].dynamic_offset_index < 0) + continue; + + unsigned array_size = set_layout->binding[b].array_size; + for (unsigned j = 0; j < array_size; j++) { + push->dynamic[d].offset = *(offsets++); + push->dynamic[d].range = (desc++)->range; + d++; + } + } } cmd_buffer->state.push_constants_dirty |= set_layout->shader_stages; - - dynamic_slot += set_layout->dynamic_offset_count; } } } diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c index c5bea7253ed..9483816b53f 100644 --- a/src/vulkan/anv_device.c +++ b/src/vulkan/anv_device.c @@ -1777,6 +1777,13 @@ void anv_UpdateDescriptorSets( .offset = write->pDescriptors[j].bufferInfo.offset, .range = write->pDescriptors[j].bufferInfo.range, }; + + /* For buffers with dynamic offsets, we use the full possible + * range in the surface state and do the actual range-checking + * in the shader. + */ + if (bind_layout->dynamic_offset_index >= 0) + desc[j].range = buffer->size - desc[j].offset; } default: diff --git a/src/vulkan/anv_nir_apply_dynamic_offsets.c b/src/vulkan/anv_nir_apply_dynamic_offsets.c index e14644cd222..d6c09474da7 100644 --- a/src/vulkan/anv_nir_apply_dynamic_offsets.c +++ b/src/vulkan/anv_nir_apply_dynamic_offsets.c @@ -107,77 +107,100 @@ apply_dynamic_offsets_block(nir_block *block, void *void_state) nir_intrinsic_instr *offset_load = nir_intrinsic_instr_create(state->shader, offset_load_op); - offset_load->num_components = 1; - offset_load->const_index[0] = state->indices_start + index; + offset_load->num_components = 2; + offset_load->const_index[0] = state->indices_start + index * 2; if (const_arr_idx) { - offset_load->const_index[1] = const_arr_idx->u[0]; + offset_load->const_index[1] = const_arr_idx->u[0] * 2; } else { offset_load->const_index[1] = 0; - nir_src_copy(&offset_load->src[0], &res_intrin->src[0], - &intrin->instr); + offset_load->src[0] = nir_src_for_ssa( + nir_imul(b, nir_ssa_for_src(b, res_intrin->src[0], 1), + nir_imm_int(b, 2))); } - nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 1, NULL); + nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 2, NULL); nir_builder_instr_insert(b, &offset_load->instr); - nir_ssa_def *offset = &offset_load->dest.ssa; + /* We calculate the full offset and don't bother with the base + * offset. We need the full offset for the predicate anyway. + */ + nir_ssa_def *rel_offset = nir_imm_int(b, intrin->const_index[0]); if (indirect_src >= 0) { assert(intrin->src[indirect_src].is_ssa); - offset = nir_iadd(b, intrin->src[indirect_src].ssa, offset); + rel_offset = nir_iadd(b, intrin->src[indirect_src].ssa, rel_offset); } + nir_ssa_def *global_offset = nir_iadd(b, rel_offset, + &offset_load->dest.ssa); - /* Now we can modify the load/store intrinsic */ - - if (indirect_src < 0) { - /* The original intrinsic is not an indirect variant. We need to - * create a new one and copy the old data over first. - */ - - nir_intrinsic_op indirect_op; - switch (intrin->intrinsic) { - case nir_intrinsic_load_ubo: - indirect_op = nir_intrinsic_load_ubo_indirect; - break; - case nir_intrinsic_load_ssbo: - indirect_op = nir_intrinsic_load_ssbo_indirect; - break; - case nir_intrinsic_store_ssbo: - indirect_op = nir_intrinsic_store_ssbo_indirect; - break; - default: - unreachable("Invalid direct load/store intrinsic"); - } - - nir_intrinsic_instr *copy = - nir_intrinsic_instr_create(state->shader, indirect_op); - copy->num_components = intrin->num_components; - - for (unsigned i = 0; i < 4; i++) - copy->const_index[i] = intrin->const_index[i]; - - /* The indirect is always the last source */ - indirect_src = nir_intrinsic_infos[intrin->intrinsic].num_srcs; - - for (unsigned i = 0; i < (unsigned)indirect_src; i++) - nir_src_copy(©->src[i], &intrin->src[i], ©->instr); - - copy->src[indirect_src] = nir_src_for_ssa(offset); - nir_ssa_dest_init(©->instr, ©->dest, - intrin->dest.ssa.num_components, - intrin->dest.ssa.name); - nir_builder_instr_insert(b, ©->instr); + /* Now we replace the load/store intrinsic */ + + nir_intrinsic_op indirect_op; + switch (intrin->intrinsic) { + case nir_intrinsic_load_ubo: + indirect_op = nir_intrinsic_load_ubo_indirect; + break; + case nir_intrinsic_load_ssbo: + indirect_op = nir_intrinsic_load_ssbo_indirect; + break; + case nir_intrinsic_store_ssbo: + indirect_op = nir_intrinsic_store_ssbo_indirect; + break; + default: + unreachable("Invalid direct load/store intrinsic"); + } + + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(state->shader, indirect_op); + copy->num_components = intrin->num_components; + + /* The indirect is always the last source */ + indirect_src = nir_intrinsic_infos[indirect_op].num_srcs - 1; + + for (unsigned i = 0; i < (unsigned)indirect_src; i++) + nir_src_copy(©->src[i], &intrin->src[i], ©->instr); + + copy->src[indirect_src] = nir_src_for_ssa(global_offset); + nir_ssa_dest_init(©->instr, ©->dest, + intrin->dest.ssa.num_components, + intrin->dest.ssa.name); + + /* In order to avoid out-of-bounds access, we predicate */ + nir_ssa_def *pred = nir_fge(b, nir_channel(b, &offset_load->dest.ssa, 1), + rel_offset); + nir_if *if_stmt = nir_if_create(b->shader); + if_stmt->condition = nir_src_for_ssa(pred); + nir_cf_node_insert(b->cursor, &if_stmt->cf_node); + + nir_instr_insert_after_cf_list(&if_stmt->then_list, ©->instr); + + if (indirect_op != nir_intrinsic_store_ssbo) { + /* It's a load, we need a phi node */ + nir_phi_instr *phi = nir_phi_instr_create(b->shader); + nir_ssa_dest_init(&phi->instr, &phi->dest, + intrin->num_components, NULL); + + nir_phi_src *src1 = ralloc(phi, nir_phi_src); + struct exec_node *tnode = exec_list_get_tail(&if_stmt->then_list); + src1->pred = exec_node_data(nir_block, tnode, cf_node.node); + src1->src = nir_src_for_ssa(©->dest.ssa); + exec_list_push_tail(&phi->srcs, &src1->node); + + b->cursor = nir_after_cf_list(&if_stmt->else_list); + nir_phi_src *src2 = ralloc(phi, nir_phi_src); + struct exec_node *enode = exec_list_get_tail(&if_stmt->else_list); + src2->pred = exec_node_data(nir_block, enode, cf_node.node); + src2->src = nir_src_for_ssa(nir_imm_int(b, 0)); + exec_list_push_tail(&phi->srcs, &src2->node); + + nir_instr_insert_after_cf(&if_stmt->cf_node, &phi->instr); assert(intrin->dest.is_ssa); nir_ssa_def_rewrite_uses(&intrin->dest.ssa, - nir_src_for_ssa(©->dest.ssa)); - - nir_instr_remove(&intrin->instr); - } else { - /* It's already indirect, so we can just rewrite the one source */ - nir_instr_rewrite_src(&intrin->instr, &intrin->src[indirect_src], - nir_src_for_ssa(offset)); + nir_src_for_ssa(&phi->dest.ssa)); } + + nir_instr_remove(&intrin->instr); } return true; @@ -208,9 +231,12 @@ anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline, } struct anv_push_constants *null_data = NULL; - for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++) - prog_data->param[i + shader->num_uniforms] = - (const gl_constant_value *)&null_data->dynamic_offsets[i]; + for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++) { + prog_data->param[i * 2 + shader->num_uniforms] = + (const gl_constant_value *)&null_data->dynamic[i].offset; + prog_data->param[i * 2 + 1 + shader->num_uniforms] = + (const gl_constant_value *)&null_data->dynamic[i].range; + } - shader->num_uniforms += MAX_DYNAMIC_BUFFERS; + shader->num_uniforms += MAX_DYNAMIC_BUFFERS * 2; } diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c index 1555b23fc69..e4f68350b81 100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@ -370,7 +370,7 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, } if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets) - prog_data->nr_params += MAX_DYNAMIC_BUFFERS; + prog_data->nr_params += MAX_DYNAMIC_BUFFERS * 2; if (prog_data->nr_params > 0) { prog_data->param = (const gl_constant_value **) diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h index 25f88fc8b84..821e16164eb 100644 --- a/src/vulkan/anv_private.h +++ b/src/vulkan/anv_private.h @@ -852,8 +852,11 @@ struct anv_push_constants { uint32_t base_vertex; uint32_t base_instance; - /* Offsets for dynamically bound buffers */ - uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS]; + /* Offsets and ranges for dynamically bound buffers */ + struct { + uint32_t offset; + uint32_t range; + } dynamic[MAX_DYNAMIC_BUFFERS]; /* Image data for image_load_store on pre-SKL */ struct brw_image_param images[MAX_IMAGES]; -- 2.30.2