vk: Compute CS exec mask and thread width max in pipeline
authorKristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>
Sat, 13 Jun 2015 00:21:01 +0000 (17:21 -0700)
committerKristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>
Sat, 13 Jun 2015 01:21:50 +0000 (18:21 -0700)
We compute the right mask and thread width max parameters as part of
pipeline creation and set them accordingly at vkCmdDispatch() and
vkCmdDispatchIndirect() time. These parameters depend only on the local
group size and the dispatch width of the program so we can figure this
out at pipeline create time.

src/vulkan/device.c
src/vulkan/pipeline.c
src/vulkan/private.h

index 0126b248233afc30b7dcafe74f0de50e6690ce82..e55e66fd74f30f2b10bb08a83275f2f6041a983d 100644 (file)
@@ -3390,31 +3390,20 @@ void anv_CmdDispatch(
     uint32_t                                    z)
 {
    struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
-   uint32_t size = SIMD8; /* FIXME */
-   uint32_t right_mask = 0; /* FIXME */
-   uint32_t thread_width_max = 0; /* FIXME */
+   struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline;
+   struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
 
    anv_cmd_buffer_flush_compute_state(cmd_buffer);
 
    anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER,
-
-                  .InterfaceDescriptorOffset = 0,
-                  .IndirectDataLength = 0,
-                  .IndirectDataStartAddress = 0,
-
-                  .SIMDSize = size,
-
+                  .SIMDSize = prog_data->simd_size / 16,
                   .ThreadDepthCounterMaximum = 0,
                   .ThreadHeightCounterMaximum = 0,
-                  .ThreadWidthCounterMaximum = thread_width_max,
-
-                  .ThreadGroupIDStartingX = 0,
+                  .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max,
                   .ThreadGroupIDXDimension = x,
-                  .ThreadGroupIDStartingY = 0,
                   .ThreadGroupIDYDimension = y,
-                  .ThreadGroupIDStartingResumeZ = 0,
                   .ThreadGroupIDZDimension = z,
-                  .RightExecutionMask = right_mask,
+                  .RightExecutionMask = pipeline->cs_right_mask,
                   .BottomExecutionMask = 0xffffffff);
 
    anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH);
@@ -3430,6 +3419,8 @@ void anv_CmdDispatchIndirect(
     VkDeviceSize                                offset)
 {
    struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
+   struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline;
+   struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
    struct anv_buffer *buffer = (struct anv_buffer *) _buffer;
    struct anv_bo *bo = buffer->bo;
    uint32_t bo_offset = buffer->offset + offset;
@@ -3440,26 +3431,13 @@ void anv_CmdDispatchIndirect(
    anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4);
    anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8);
 
-   uint32_t size = SIMD8; /* FIXME */
-   uint32_t right_mask = 0; /* FIXME */
-   uint32_t thread_width_max = 0; /* FIXME */
-
-   /* FIXME: We can't compute thread_width_max for indirect, looks like it
-    * depends on DIMX. */
-
    anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER,
                   .IndirectParameterEnable = true,
-                  .InterfaceDescriptorOffset = 0,
-                  .IndirectDataLength = 0,
-                  .IndirectDataStartAddress = 0,
-
-                  .SIMDSize = size,
-
+                  .SIMDSize = prog_data->simd_size / 16,
                   .ThreadDepthCounterMaximum = 0,
                   .ThreadHeightCounterMaximum = 0,
-                  .ThreadWidthCounterMaximum = thread_width_max,
-
-                  .RightExecutionMask = right_mask,
+                  .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max,
+                  .RightExecutionMask = pipeline->cs_right_mask,
                   .BottomExecutionMask = 0xffffffff);
 
    anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH);
index cf7562ae496073b05a8d596ec13526eba0369fe7..aa24ad4300441eb48fff1c0e0605ae1493c48ba9 100644 (file)
@@ -770,6 +770,18 @@ VkResult anv_CreateComputePipeline(
                   .URBEntryAllocationSize = 2,
                   .CURBEAllocationSize = 0);
 
+   struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
+   uint32_t group_size = prog_data->local_size[0] *
+      prog_data->local_size[1] * prog_data->local_size[2];
+   pipeline->cs_thread_width_max = DIV_ROUND_UP(group_size, prog_data->simd_size);
+   uint32_t remainder = group_size & (prog_data->simd_size - 1);
+
+   if (remainder > 0)
+      pipeline->cs_right_mask = ~0u >> (32 - remainder);
+   else
+      pipeline->cs_right_mask = ~0u >> (32 - prog_data->simd_size);
+
+
    *pPipeline = (VkPipeline) pipeline;
 
    return VK_SUCCESS;
index cf1cf4c12689a90d600e2cc99546cd9e69190fcd..08dea1526e875485b06e32b191e293f97589d538 100644 (file)
@@ -726,6 +726,9 @@ struct anv_pipeline {
    uint32_t                                     state_sf[GEN8_3DSTATE_SF_length];
    uint32_t                                     state_raster[GEN8_3DSTATE_RASTER_length];
    uint32_t                                     state_wm_depth_stencil[GEN8_3DSTATE_WM_DEPTH_STENCIL_length];
+
+   uint32_t                                     cs_thread_width_max;
+   uint32_t                                     cs_right_mask;
 };
 
 struct anv_pipeline_create_info {