anv: Use new helper functions to pick SIMD variant for CS

author Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>

Thu, 21 May 2020 07:17:27 +0000 (00:17 -0700)

committer Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>

Thu, 28 May 2020 01:16:31 +0000 (18:16 -0700)
author Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Thu, 21 May 2020 07:17:27 +0000 (00:17 -0700)
committer Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Thu, 28 May 2020 01:16:31 +0000 (18:16 -0700)
diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c

index ea5ec415340952b6b866a205eb4dea687a80d9d6..1ca33f206aa82f41cbb877af7bf976f98d8f3362 100644 (file)
--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@@ -838,9 +838,9 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
     const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
     const struct anv_push_range *range = &pipeline->cs->bind_map.push_ranges[0];
  
-   const uint32_t threads = anv_cs_threads(pipeline);
+   const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
     const unsigned total_push_constants_size =
-      brw_cs_push_const_total_size(cs_prog_data, threads);
+      brw_cs_push_const_total_size(cs_prog_data, cs_params.threads);
     if (total_push_constants_size == 0)
        return (struct anv_state) { .offset = 0 };
  
@@ -863,7 +863,7 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
     }
  
     if (cs_prog_data->push.per_thread.size > 0) {
-      for (unsigned t = 0; t < threads; t++) {
+      for (unsigned t = 0; t < cs_params.threads; t++) {
           memcpy(dst, src, cs_prog_data->push.per_thread.size);
  
           uint32_t *subgroup_id = dst +
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c

index 88bc58f3771ce0b5feb591411f5a02e8109918e9..bd2d1884d7a1e1f44e6fd4a531fdd82d575d098b 100644 (file)
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -1728,21 +1728,22 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
     return VK_SUCCESS;
  }
  
-uint32_t
-anv_cs_workgroup_size(const struct anv_compute_pipeline *pipeline)
+struct anv_cs_parameters
+anv_cs_parameters(const struct anv_compute_pipeline *pipeline)
  {
     const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
-   return cs_prog_data->local_size[0] *
-          cs_prog_data->local_size[1] *
-          cs_prog_data->local_size[2];
-}
  
-uint32_t
-anv_cs_threads(const struct anv_compute_pipeline *pipeline)
-{
-   const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
-   return DIV_ROUND_UP(anv_cs_workgroup_size(pipeline),
-                       cs_prog_data->simd_size);
+   struct anv_cs_parameters cs_params = {};
+
+   cs_params.group_size = cs_prog_data->local_size[0] *
+                          cs_prog_data->local_size[1] *
+                          cs_prog_data->local_size[2];
+   cs_params.simd_size =
+      brw_cs_simd_size_for_group_size(&pipeline->base.device->info,
+                                      cs_prog_data, cs_params.group_size);
+   cs_params.threads = DIV_ROUND_UP(cs_params.group_size, cs_params.simd_size);
+
+   return cs_params;
  }
  
  /**
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h

index 4bee161227d8fba4be30507c20d70cc6a1195893..8f5dcd37fbc5af9750d905ce1870353dc4c594f1 100644 (file)
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -3413,11 +3413,14 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
                          const char *entrypoint,
                          const VkSpecializationInfo *spec_info);
  
-uint32_t
-anv_cs_workgroup_size(const struct anv_compute_pipeline *pipeline);
+struct anv_cs_parameters {
+   uint32_t group_size;
+   uint32_t simd_size;
+   uint32_t threads;
+};
  
-uint32_t
-anv_cs_threads(const struct anv_compute_pipeline *pipeline);
+struct anv_cs_parameters
+anv_cs_parameters(const struct anv_compute_pipeline *pipeline);
  
  struct anv_format_plane {
     enum isl_format isl_format:16;
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c

index 91ea16a0105c4a0cff6801983077900de9952421..bf2a5a6dc75229b1786194d7a0e5562b4810f544 100644 (file)
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -4360,12 +4360,14 @@ void genX(CmdDispatchBase)(
     if (cmd_buffer->state.conditional_render_enabled)
        genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
  
+   const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
+
     anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), ggw) {
        ggw.PredicateEnable              = cmd_buffer->state.conditional_render_enabled;
-      ggw.SIMDSize                     = prog_data->simd_size / 16;
+      ggw.SIMDSize                     = cs_params.simd_size / 16;
        ggw.ThreadDepthCounterMaximum    = 0;
        ggw.ThreadHeightCounterMaximum   = 0;
-      ggw.ThreadWidthCounterMaximum    = anv_cs_threads(pipeline) - 1;
+      ggw.ThreadWidthCounterMaximum    = cs_params.threads - 1;
        ggw.ThreadGroupIDXDimension      = groupCountX;
        ggw.ThreadGroupIDYDimension      = groupCountY;
        ggw.ThreadGroupIDZDimension      = groupCountZ;
@@ -4474,14 +4476,16 @@ void genX(CmdDispatchIndirect)(
        genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
  #endif
  
+   const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
+
     anv_batch_emit(batch, GENX(GPGPU_WALKER), ggw) {
        ggw.IndirectParameterEnable      = true;
        ggw.PredicateEnable              = GEN_GEN <= 7 ||
                                           cmd_buffer->state.conditional_render_enabled;
-      ggw.SIMDSize                     = prog_data->simd_size / 16;
+      ggw.SIMDSize                     = cs_params.simd_size / 16;
        ggw.ThreadDepthCounterMaximum    = 0;
        ggw.ThreadHeightCounterMaximum   = 0;
-      ggw.ThreadWidthCounterMaximum    = anv_cs_threads(pipeline) - 1;
+      ggw.ThreadWidthCounterMaximum    = cs_params.threads - 1;
        ggw.RightExecutionMask           = pipeline->cs_right_mask;
        ggw.BottomExecutionMask          = 0xffffffff;
     }
diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c

index 43078ce2b84f4495e1aef5d375bf78bc159f8373..6753b1419efe35adadb26722bc79a599bccdd243 100644 (file)
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -2325,19 +2325,16 @@ compute_pipeline_create(
  
     anv_pipeline_setup_l3_config(&pipeline->base, cs_prog_data->base.total_shared > 0);
  
-   uint32_t group_size = cs_prog_data->local_size[0] *
-      cs_prog_data->local_size[1] * cs_prog_data->local_size[2];
-   uint32_t remainder = group_size & (cs_prog_data->simd_size - 1);
+   const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
+   uint32_t remainder = cs_params.group_size & (cs_params.simd_size - 1);
  
     if (remainder > 0)
        pipeline->cs_right_mask = ~0u >> (32 - remainder);
     else
-      pipeline->cs_right_mask = ~0u >> (32 - cs_prog_data->simd_size);
-
-   const uint32_t threads = anv_cs_threads(pipeline);
+      pipeline->cs_right_mask = ~0u >> (32 - cs_params.simd_size);
  
     const uint32_t vfe_curbe_allocation =
-      ALIGN(cs_prog_data->push.per_thread.regs * threads +
+      ALIGN(cs_prog_data->push.per_thread.regs * cs_params.threads +
              cs_prog_data->push.cross_thread.regs, 2);
  
     const uint32_t subslices = MAX2(device->physical->subslice_total, 1);
@@ -2388,7 +2385,10 @@ compute_pipeline_create(
     }
  
     struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
-      .KernelStartPointer     = cs_bin->kernel.offset,
+      .KernelStartPointer     =
+         cs_bin->kernel.offset +
+         brw_cs_prog_data_prog_offset(cs_prog_data, cs_params.simd_size),
+
        /* WA_1606682166 */
        .SamplerCount           = GEN_GEN == 11 ? 0 : get_sampler_count(cs_bin),
        /* We add 1 because the CS indirect parameters buffer isn't accounted
@@ -2420,7 +2420,7 @@ compute_pipeline_create(
        .ThreadPreemptionDisable = true,
  #endif
  
-      .NumberofThreadsinGPGPUThreadGroup = threads,
+      .NumberofThreadsinGPGPUThreadGroup = cs_params.threads,
     };
     GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL,
                                          pipeline->interface_descriptor_data,
author	Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
	Thu, 21 May 2020 07:17:27 +0000 (00:17 -0700)
committer	Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
	Thu, 28 May 2020 01:16:31 +0000 (18:16 -0700)
src/intel/vulkan/anv_cmd_buffer.c		patch \| blob \| history
src/intel/vulkan/anv_pipeline.c		patch \| blob \| history
src/intel/vulkan/anv_private.h		patch \| blob \| history
src/intel/vulkan/genX_cmd_buffer.c		patch \| blob \| history
src/intel/vulkan/genX_pipeline.c		patch \| blob \| history