From d3f45888045c84b2bc382a34d169a0ede4774a24 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Fri, 9 Oct 2015 14:41:21 +0200 Subject: [PATCH] i965: Adapt SSBOs to work with their own separate index space MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Kristian Høgsberg --- src/mesa/drivers/dri/i965/brw_context.h | 4 +- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 24 +++--- src/mesa/drivers/dri/i965/brw_shader.cpp | 9 ++- src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 24 +++--- .../drivers/dri/i965/brw_wm_surface_state.c | 79 +++++++++---------- 5 files changed, 71 insertions(+), 69 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index e59478a448a..4aba7b814c6 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -352,6 +352,7 @@ struct brw_stage_prog_data { uint32_t texture_start; uint32_t gather_texture_start; uint32_t ubo_start; + uint32_t ssbo_start; uint32_t abo_start; uint32_t image_start; uint32_t shader_time_start; @@ -717,9 +718,6 @@ struct brw_vs_prog_data { /** Max number of SSBOs in a shader */ #define BRW_MAX_SSBO 12 -/** Max number of combined UBOs and SSBOs in a shader */ -#define BRW_MAX_COMBINED_UBO_SSBO (BRW_MAX_UBO + BRW_MAX_SSBO) - /** Max number of atomic counter buffer objects in a shader */ #define BRW_MAX_ABO 16 diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 21d2967935a..05f3f63204b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1482,21 +1482,21 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr fs_reg surf_index; if (const_uniform_block) { - unsigned index = stage_prog_data->binding_table.ubo_start + + unsigned index = stage_prog_data->binding_table.ssbo_start + const_uniform_block->u[0]; surf_index = fs_reg(index); brw_mark_surface_used(prog_data, index); } else { surf_index = vgrf(glsl_type::uint_type); bld.ADD(surf_index, get_nir_src(instr->src[0]), - fs_reg(stage_prog_data->binding_table.ubo_start)); + fs_reg(stage_prog_data->binding_table.ssbo_start)); surf_index = bld.emit_uniformize(surf_index); /* Assume this may touch any UBO. It would be nice to provide * a tighter bound, but the array information is already lowered away. */ brw_mark_surface_used(prog_data, - stage_prog_data->binding_table.ubo_start + + stage_prog_data->binding_table.ssbo_start + nir->info.num_ssbos - 1); } @@ -1738,18 +1738,18 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[1]); if (const_uniform_block) { - unsigned index = stage_prog_data->binding_table.ubo_start + + unsigned index = stage_prog_data->binding_table.ssbo_start + const_uniform_block->u[0]; surf_index = fs_reg(index); brw_mark_surface_used(prog_data, index); } else { surf_index = vgrf(glsl_type::uint_type); bld.ADD(surf_index, get_nir_src(instr->src[1]), - fs_reg(stage_prog_data->binding_table.ubo_start)); + fs_reg(stage_prog_data->binding_table.ssbo_start)); surf_index = bld.emit_uniformize(surf_index); brw_mark_surface_used(prog_data, - stage_prog_data->binding_table.ubo_start + + stage_prog_data->binding_table.ssbo_start + nir->info.num_ssbos - 1); } @@ -1864,7 +1864,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr case nir_intrinsic_get_buffer_size: { nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]); - unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0; + unsigned ssbo_index = const_uniform_block ? const_uniform_block->u[0] : 0; int reg_width = dispatch_width / 8; /* Set LOD = 0 */ @@ -1875,7 +1875,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr BRW_REGISTER_TYPE_UD); bld.LOAD_PAYLOAD(src_payload, &source, 1, 0); - fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start + ubo_index); + fs_reg surf_index = fs_reg(prog_data->binding_table.ssbo_start + ssbo_index); fs_inst *inst = bld.emit(FS_OPCODE_GET_BUFFER_SIZE, dest, src_payload, surf_index); inst->header_size = 0; @@ -1928,20 +1928,20 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld, fs_reg surface; nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]); if (const_surface) { - unsigned surf_index = stage_prog_data->binding_table.ubo_start + + unsigned surf_index = stage_prog_data->binding_table.ssbo_start + const_surface->u[0]; surface = fs_reg(surf_index); brw_mark_surface_used(prog_data, surf_index); } else { surface = vgrf(glsl_type::uint_type); bld.ADD(surface, get_nir_src(instr->src[0]), - fs_reg(stage_prog_data->binding_table.ubo_start)); + fs_reg(stage_prog_data->binding_table.ssbo_start)); - /* Assume this may touch any UBO. This is the same we do for other + /* Assume this may touch any SSBO. This is the same we do for other * UBO/SSBO accesses with non-constant surface. */ brw_mark_surface_used(prog_data, - stage_prog_data->binding_table.ubo_start + + stage_prog_data->binding_table.ssbo_start + nir->info.num_ssbos - 1); } diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index b41e842005a..7ee0c66468c 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -1128,11 +1128,16 @@ brw_assign_common_binding_table_offsets(gl_shader_stage stage, next_binding_table_offset += num_textures; if (shader) { - assert(shader->NumBufferInterfaceBlocks <= BRW_MAX_COMBINED_UBO_SSBO); + assert(shader->NumUniformBlocks <= BRW_MAX_UBO); stage_prog_data->binding_table.ubo_start = next_binding_table_offset; - next_binding_table_offset += shader->NumBufferInterfaceBlocks; + next_binding_table_offset += shader->NumUniformBlocks; + + assert(shader->NumShaderStorageBlocks <= BRW_MAX_SSBO); + stage_prog_data->binding_table.ssbo_start = next_binding_table_offset; + next_binding_table_offset += shader->NumShaderStorageBlocks; } else { stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0; + stage_prog_data->binding_table.ssbo_start = 0xd0d0d0d0; } if (INTEL_DEBUG & DEBUG_SHADER_TIME) { diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index 9e095fb52c8..0025f3647a1 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -423,10 +423,10 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) case nir_intrinsic_get_buffer_size: { nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]); - unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0; + unsigned ssbo_index = const_uniform_block ? const_uniform_block->u[0] : 0; - src_reg surf_index = src_reg(prog_data->base.binding_table.ubo_start + - ubo_index); + src_reg surf_index = src_reg(prog_data->base.binding_table.ssbo_start + + ssbo_index); dst_reg result_dst = get_nir_dest(instr->dest); vec4_instruction *inst = new(mem_ctx) vec4_instruction(VS_OPCODE_GET_BUFFER_SIZE, result_dst); @@ -456,18 +456,18 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[1]); if (const_uniform_block) { - unsigned index = prog_data->base.binding_table.ubo_start + + unsigned index = prog_data->base.binding_table.ssbo_start + const_uniform_block->u[0]; surf_index = src_reg(index); brw_mark_surface_used(&prog_data->base, index); } else { surf_index = src_reg(this, glsl_type::uint_type); emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[1], 1), - src_reg(prog_data->base.binding_table.ubo_start))); + src_reg(prog_data->base.binding_table.ssbo_start))); surf_index = emit_uniformize(surf_index); brw_mark_surface_used(&prog_data->base, - prog_data->base.binding_table.ubo_start + + prog_data->base.binding_table.ssbo_start + nir->info.num_ssbos - 1); } @@ -599,7 +599,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) src_reg surf_index; if (const_uniform_block) { - unsigned index = prog_data->base.binding_table.ubo_start + + unsigned index = prog_data->base.binding_table.ssbo_start + const_uniform_block->u[0]; surf_index = src_reg(index); @@ -607,14 +607,14 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) } else { surf_index = src_reg(this, glsl_type::uint_type); emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[0], 1), - src_reg(prog_data->base.binding_table.ubo_start))); + src_reg(prog_data->base.binding_table.ssbo_start))); surf_index = emit_uniformize(surf_index); /* Assume this may touch any UBO. It would be nice to provide * a tighter bound, but the array information is already lowered away. */ brw_mark_surface_used(&prog_data->base, - prog_data->base.binding_table.ubo_start + + prog_data->base.binding_table.ssbo_start + nir->info.num_ssbos - 1); } @@ -821,20 +821,20 @@ vec4_visitor::nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr) src_reg surface; nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]); if (const_surface) { - unsigned surf_index = prog_data->base.binding_table.ubo_start + + unsigned surf_index = prog_data->base.binding_table.ssbo_start + const_surface->u[0]; surface = src_reg(surf_index); brw_mark_surface_used(&prog_data->base, surf_index); } else { surface = src_reg(this, glsl_type::uint_type); emit(ADD(dst_reg(surface), get_nir_src(instr->src[0]), - src_reg(prog_data->base.binding_table.ubo_start))); + src_reg(prog_data->base.binding_table.ssbo_start))); /* Assume this may touch any UBO. This is the same we do for other * UBO/SSBO accesses with non-constant surface. */ brw_mark_surface_used(&prog_data->base, - prog_data->base.binding_table.ubo_start + + prog_data->base.binding_table.ssbo_start + nir->info.num_ssbos - 1); } diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 3c019771603..a304eec3249 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -926,50 +926,49 @@ brw_upload_ubo_surfaces(struct brw_context *brw, if (!shader) return; - uint32_t *surf_offsets = + uint32_t *ubo_surf_offsets = &stage_state->surf_offset[prog_data->binding_table.ubo_start]; - for (int i = 0; i < shader->NumBufferInterfaceBlocks; i++) { - struct intel_buffer_object *intel_bo; + for (int i = 0; i < shader->NumUniformBlocks; i++) { + struct gl_uniform_buffer_binding *binding = + &ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding]; - /* Because behavior for referencing outside of the binding's size in the - * glBindBufferRange case is undefined, we can just bind the whole buffer - * glBindBufferBase wants and be a correct implementation. - */ - if (!shader->BufferInterfaceBlocks[i].IsShaderStorage) { - struct gl_uniform_buffer_binding *binding; - binding = - &ctx->UniformBufferBindings[shader->BufferInterfaceBlocks[i].Binding]; - if (binding->BufferObject == ctx->Shared->NullBufferObj) { - brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &surf_offsets[i]); - } else { - intel_bo = intel_buffer_object(binding->BufferObject); - drm_intel_bo *bo = - intel_bufferobj_buffer(brw, intel_bo, - binding->Offset, - binding->BufferObject->Size - binding->Offset); - brw_create_constant_surface(brw, bo, binding->Offset, - binding->BufferObject->Size - binding->Offset, - &surf_offsets[i], - dword_pitch); - } + if (binding->BufferObject == ctx->Shared->NullBufferObj) { + brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]); } else { - struct gl_shader_storage_buffer_binding *binding; - binding = - &ctx->ShaderStorageBufferBindings[shader->BufferInterfaceBlocks[i].Binding]; - if (binding->BufferObject == ctx->Shared->NullBufferObj) { - brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &surf_offsets[i]); - } else { - intel_bo = intel_buffer_object(binding->BufferObject); - drm_intel_bo *bo = - intel_bufferobj_buffer(brw, intel_bo, - binding->Offset, - binding->BufferObject->Size - binding->Offset); - brw_create_buffer_surface(brw, bo, binding->Offset, - binding->BufferObject->Size - binding->Offset, - &surf_offsets[i], - dword_pitch); - } + struct intel_buffer_object *intel_bo = + intel_buffer_object(binding->BufferObject); + drm_intel_bo *bo = + intel_bufferobj_buffer(brw, intel_bo, + binding->Offset, + binding->BufferObject->Size - binding->Offset); + brw_create_constant_surface(brw, bo, binding->Offset, + binding->BufferObject->Size - binding->Offset, + &ubo_surf_offsets[i], + dword_pitch); + } + } + + uint32_t *ssbo_surf_offsets = + &stage_state->surf_offset[prog_data->binding_table.ssbo_start]; + + for (int i = 0; i < shader->NumShaderStorageBlocks; i++) { + struct gl_shader_storage_buffer_binding *binding = + &ctx->ShaderStorageBufferBindings[shader->ShaderStorageBlocks[i]->Binding]; + + if (binding->BufferObject == ctx->Shared->NullBufferObj) { + brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]); + } else { + struct intel_buffer_object *intel_bo = + intel_buffer_object(binding->BufferObject); + drm_intel_bo *bo = + intel_bufferobj_buffer(brw, intel_bo, + binding->Offset, + binding->BufferObject->Size - binding->Offset); + brw_create_buffer_surface(brw, bo, binding->Offset, + binding->BufferObject->Size - binding->Offset, + &ssbo_surf_offsets[i], + dword_pitch); } } -- 2.30.2