From 9b8347c98842621a621746ec5718c95d297876c9 Mon Sep 17 00:00:00 2001 From: Caio Marcelo de Oliveira Filho Date: Thu, 21 May 2020 00:17:27 -0700 Subject: [PATCH] anv: Use new helper functions to pick SIMD variant for CS Also combine the existing individual anv helpers into a single one for all CS related parameters. Reviewed-by: Jason Ekstrand Part-of: --- src/intel/vulkan/anv_cmd_buffer.c | 6 +++--- src/intel/vulkan/anv_pipeline.c | 25 +++++++++++++------------ src/intel/vulkan/anv_private.h | 11 +++++++---- src/intel/vulkan/genX_cmd_buffer.c | 12 ++++++++---- src/intel/vulkan/genX_pipeline.c | 18 +++++++++--------- 5 files changed, 40 insertions(+), 32 deletions(-) diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index ea5ec415340..1ca33f206aa 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -838,9 +838,9 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer) const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); const struct anv_push_range *range = &pipeline->cs->bind_map.push_ranges[0]; - const uint32_t threads = anv_cs_threads(pipeline); + const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline); const unsigned total_push_constants_size = - brw_cs_push_const_total_size(cs_prog_data, threads); + brw_cs_push_const_total_size(cs_prog_data, cs_params.threads); if (total_push_constants_size == 0) return (struct anv_state) { .offset = 0 }; @@ -863,7 +863,7 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer) } if (cs_prog_data->push.per_thread.size > 0) { - for (unsigned t = 0; t < threads; t++) { + for (unsigned t = 0; t < cs_params.threads; t++) { memcpy(dst, src, cs_prog_data->push.per_thread.size); uint32_t *subgroup_id = dst + diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 88bc58f3771..bd2d1884d7a 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -1728,21 +1728,22 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline, return VK_SUCCESS; } -uint32_t -anv_cs_workgroup_size(const struct anv_compute_pipeline *pipeline) +struct anv_cs_parameters +anv_cs_parameters(const struct anv_compute_pipeline *pipeline) { const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); - return cs_prog_data->local_size[0] * - cs_prog_data->local_size[1] * - cs_prog_data->local_size[2]; -} -uint32_t -anv_cs_threads(const struct anv_compute_pipeline *pipeline) -{ - const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); - return DIV_ROUND_UP(anv_cs_workgroup_size(pipeline), - cs_prog_data->simd_size); + struct anv_cs_parameters cs_params = {}; + + cs_params.group_size = cs_prog_data->local_size[0] * + cs_prog_data->local_size[1] * + cs_prog_data->local_size[2]; + cs_params.simd_size = + brw_cs_simd_size_for_group_size(&pipeline->base.device->info, + cs_prog_data, cs_params.group_size); + cs_params.threads = DIV_ROUND_UP(cs_params.group_size, cs_params.simd_size); + + return cs_params; } /** diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 4bee161227d..8f5dcd37fbc 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -3413,11 +3413,14 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline, const char *entrypoint, const VkSpecializationInfo *spec_info); -uint32_t -anv_cs_workgroup_size(const struct anv_compute_pipeline *pipeline); +struct anv_cs_parameters { + uint32_t group_size; + uint32_t simd_size; + uint32_t threads; +}; -uint32_t -anv_cs_threads(const struct anv_compute_pipeline *pipeline); +struct anv_cs_parameters +anv_cs_parameters(const struct anv_compute_pipeline *pipeline); struct anv_format_plane { enum isl_format isl_format:16; diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 91ea16a0105..bf2a5a6dc75 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -4360,12 +4360,14 @@ void genX(CmdDispatchBase)( if (cmd_buffer->state.conditional_render_enabled) genX(cmd_emit_conditional_render_predicate)(cmd_buffer); + const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline); + anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), ggw) { ggw.PredicateEnable = cmd_buffer->state.conditional_render_enabled; - ggw.SIMDSize = prog_data->simd_size / 16; + ggw.SIMDSize = cs_params.simd_size / 16; ggw.ThreadDepthCounterMaximum = 0; ggw.ThreadHeightCounterMaximum = 0; - ggw.ThreadWidthCounterMaximum = anv_cs_threads(pipeline) - 1; + ggw.ThreadWidthCounterMaximum = cs_params.threads - 1; ggw.ThreadGroupIDXDimension = groupCountX; ggw.ThreadGroupIDYDimension = groupCountY; ggw.ThreadGroupIDZDimension = groupCountZ; @@ -4474,14 +4476,16 @@ void genX(CmdDispatchIndirect)( genX(cmd_emit_conditional_render_predicate)(cmd_buffer); #endif + const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline); + anv_batch_emit(batch, GENX(GPGPU_WALKER), ggw) { ggw.IndirectParameterEnable = true; ggw.PredicateEnable = GEN_GEN <= 7 || cmd_buffer->state.conditional_render_enabled; - ggw.SIMDSize = prog_data->simd_size / 16; + ggw.SIMDSize = cs_params.simd_size / 16; ggw.ThreadDepthCounterMaximum = 0; ggw.ThreadHeightCounterMaximum = 0; - ggw.ThreadWidthCounterMaximum = anv_cs_threads(pipeline) - 1; + ggw.ThreadWidthCounterMaximum = cs_params.threads - 1; ggw.RightExecutionMask = pipeline->cs_right_mask; ggw.BottomExecutionMask = 0xffffffff; } diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 43078ce2b84..6753b1419ef 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -2325,19 +2325,16 @@ compute_pipeline_create( anv_pipeline_setup_l3_config(&pipeline->base, cs_prog_data->base.total_shared > 0); - uint32_t group_size = cs_prog_data->local_size[0] * - cs_prog_data->local_size[1] * cs_prog_data->local_size[2]; - uint32_t remainder = group_size & (cs_prog_data->simd_size - 1); + const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline); + uint32_t remainder = cs_params.group_size & (cs_params.simd_size - 1); if (remainder > 0) pipeline->cs_right_mask = ~0u >> (32 - remainder); else - pipeline->cs_right_mask = ~0u >> (32 - cs_prog_data->simd_size); - - const uint32_t threads = anv_cs_threads(pipeline); + pipeline->cs_right_mask = ~0u >> (32 - cs_params.simd_size); const uint32_t vfe_curbe_allocation = - ALIGN(cs_prog_data->push.per_thread.regs * threads + + ALIGN(cs_prog_data->push.per_thread.regs * cs_params.threads + cs_prog_data->push.cross_thread.regs, 2); const uint32_t subslices = MAX2(device->physical->subslice_total, 1); @@ -2388,7 +2385,10 @@ compute_pipeline_create( } struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = { - .KernelStartPointer = cs_bin->kernel.offset, + .KernelStartPointer = + cs_bin->kernel.offset + + brw_cs_prog_data_prog_offset(cs_prog_data, cs_params.simd_size), + /* WA_1606682166 */ .SamplerCount = GEN_GEN == 11 ? 0 : get_sampler_count(cs_bin), /* We add 1 because the CS indirect parameters buffer isn't accounted @@ -2420,7 +2420,7 @@ compute_pipeline_create( .ThreadPreemptionDisable = true, #endif - .NumberofThreadsinGPGPUThreadGroup = threads, + .NumberofThreadsinGPGPUThreadGroup = cs_params.threads, }; GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL, pipeline->interface_descriptor_data, -- 2.30.2