anv: Stop using cs_prog_data->threads
authorCaio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Fri, 27 Mar 2020 15:18:00 +0000 (08:18 -0700)
committerCaio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Fri, 10 Apr 2020 02:23:12 +0000 (19:23 -0700)
Move the calculation to helper functions -- similar to what GL already
needs to do.

This is a preparation for dropping this field since this value is
expected to be calculated by the drivers now for variable group size
case.  And also the field would get in the way of brw_compile_cs
producing multiple SIMD variants (like FS).

Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4504>

src/intel/vulkan/anv_cmd_buffer.c
src/intel/vulkan/anv_pipeline.c
src/intel/vulkan/anv_private.h
src/intel/vulkan/genX_cmd_buffer.c
src/intel/vulkan/genX_pipeline.c

index 188aff6be741a8db291bef75678c4fcff56e949d..8f94715c0d02ab20c1269047219e006c77a7441a 100644 (file)
@@ -834,8 +834,9 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
    const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
    const struct anv_push_range *range = &pipeline->cs->bind_map.push_ranges[0];
 
+   const uint32_t threads = anv_cs_threads(pipeline);
    const unsigned total_push_constants_size =
-      brw_cs_push_const_total_size(cs_prog_data, cs_prog_data->threads);
+      brw_cs_push_const_total_size(cs_prog_data, threads);
    if (total_push_constants_size == 0)
       return (struct anv_state) { .offset = 0 };
 
@@ -858,7 +859,7 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
    }
 
    if (cs_prog_data->push.per_thread.size > 0) {
-      for (unsigned t = 0; t < cs_prog_data->threads; t++) {
+      for (unsigned t = 0; t < threads; t++) {
          memcpy(dst, src, cs_prog_data->push.per_thread.size);
 
          uint32_t *subgroup_id = dst +
index 9ccf638ed4045701f66027d90dd1341481705a9c..b4f6077f0b9c711eba8060a6b1884a8adc382d5a 100644 (file)
@@ -1710,6 +1710,23 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
    return VK_SUCCESS;
 }
 
+uint32_t
+anv_cs_workgroup_size(const struct anv_compute_pipeline *pipeline)
+{
+   const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
+   return cs_prog_data->local_size[0] *
+          cs_prog_data->local_size[1] *
+          cs_prog_data->local_size[2];
+}
+
+uint32_t
+anv_cs_threads(const struct anv_compute_pipeline *pipeline)
+{
+   const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
+   return DIV_ROUND_UP(anv_cs_workgroup_size(pipeline),
+                       cs_prog_data->simd_size);
+}
+
 /**
  * Copy pipeline state not marked as dynamic.
  * Dynamic state is pipeline state which hasn't been provided at pipeline
index 51f1ae823fcd243bac599549aaf0d5b8f3565796..af07a1d203aeecfdee59d3a348c35e6a57d868b2 100644 (file)
@@ -3315,6 +3315,12 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
                         const char *entrypoint,
                         const VkSpecializationInfo *spec_info);
 
+uint32_t
+anv_cs_workgroup_size(const struct anv_compute_pipeline *pipeline);
+
+uint32_t
+anv_cs_threads(const struct anv_compute_pipeline *pipeline);
+
 struct anv_format_plane {
    enum isl_format isl_format:16;
    struct isl_swizzle swizzle;
index 7af1da0f5e4c2dd6e5fe55ef283ddcf7da665030..13ad1ced3bcd7a82d0a9cf2c3195dfb2c34b620c 100644 (file)
@@ -4295,7 +4295,7 @@ void genX(CmdDispatchBase)(
       ggw.SIMDSize                     = prog_data->simd_size / 16;
       ggw.ThreadDepthCounterMaximum    = 0;
       ggw.ThreadHeightCounterMaximum   = 0;
-      ggw.ThreadWidthCounterMaximum    = prog_data->threads - 1;
+      ggw.ThreadWidthCounterMaximum    = anv_cs_threads(pipeline) - 1;
       ggw.ThreadGroupIDXDimension      = groupCountX;
       ggw.ThreadGroupIDYDimension      = groupCountY;
       ggw.ThreadGroupIDZDimension      = groupCountZ;
@@ -4411,7 +4411,7 @@ void genX(CmdDispatchIndirect)(
       ggw.SIMDSize                     = prog_data->simd_size / 16;
       ggw.ThreadDepthCounterMaximum    = 0;
       ggw.ThreadHeightCounterMaximum   = 0;
-      ggw.ThreadWidthCounterMaximum    = prog_data->threads - 1;
+      ggw.ThreadWidthCounterMaximum    = anv_cs_threads(pipeline) - 1;
       ggw.RightExecutionMask           = pipeline->cs_right_mask;
       ggw.BottomExecutionMask          = 0xffffffff;
    }
index 2c1d7545b72f85247374500fd0fbe5ed0e685c8e..c6f479168b6a5c0c11ef5e34d8646eec044cf160 100644 (file)
@@ -2321,8 +2321,10 @@ compute_pipeline_create(
    else
       pipeline->cs_right_mask = ~0u >> (32 - cs_prog_data->simd_size);
 
+   const uint32_t threads = anv_cs_threads(pipeline);
+
    const uint32_t vfe_curbe_allocation =
-      ALIGN(cs_prog_data->push.per_thread.regs * cs_prog_data->threads +
+      ALIGN(cs_prog_data->push.per_thread.regs * threads +
             cs_prog_data->push.cross_thread.regs, 2);
 
    const uint32_t subslices = MAX2(device->physical->subslice_total, 1);
@@ -2405,7 +2407,7 @@ compute_pipeline_create(
       .ThreadPreemptionDisable = true,
 #endif
 
-      .NumberofThreadsinGPGPUThreadGroup = cs_prog_data->threads,
+      .NumberofThreadsinGPGPUThreadGroup = threads,
    };
    GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL,
                                         pipeline->interface_descriptor_data,