anv/gen8: Add support for gl_NumWorkGroups
authorJordan Justen <jordan.l.justen@intel.com>
Fri, 18 Dec 2015 09:42:46 +0000 (01:42 -0800)
committerJordan Justen <jordan.l.justen@intel.com>
Fri, 18 Dec 2015 09:45:11 +0000 (01:45 -0800)
Co-authored-by: Kristian Høgsberg <krh@bitplanet.net>
Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
src/vulkan/anv_cmd_buffer.c
src/vulkan/anv_pipeline.c
src/vulkan/anv_private.h
src/vulkan/gen8_cmd_buffer.c

index 759c4677a74d2db1c232a35c9a06dcbca664971e..bfe06591d6dbeba2ab06ab36fd6147e1500653b8 100644 (file)
@@ -619,17 +619,22 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
    struct anv_pipeline_layout *layout;
    uint32_t color_count, bias, state_offset;
 
-   if (stage == MESA_SHADER_COMPUTE)
-      layout = cmd_buffer->state.compute_pipeline->layout;
-   else
+   switch (stage) {
+   case  MESA_SHADER_FRAGMENT:
       layout = cmd_buffer->state.pipeline->layout;
-
-   if (stage == MESA_SHADER_FRAGMENT) {
       bias = MAX_RTS;
       color_count = subpass->color_count;
-   } else {
+      break;
+   case  MESA_SHADER_COMPUTE:
+      layout = cmd_buffer->state.compute_pipeline->layout;
+      bias = 1;
+      color_count = 0;
+      break;
+   default:
+      layout = cmd_buffer->state.pipeline->layout;
       bias = 0;
       color_count = 0;
+      break;
    }
 
    /* This is a little awkward: layout can be NULL but we still have to
@@ -657,6 +662,27 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
                               iview->bo, iview->offset);
    }
 
+   if (stage == MESA_SHADER_COMPUTE &&
+       cmd_buffer->state.compute_pipeline->cs_prog_data.uses_num_work_groups) {
+      struct anv_bo *bo = cmd_buffer->state.num_workgroups_bo;
+      uint32_t bo_offset = cmd_buffer->state.num_workgroups_offset;
+
+      struct anv_state surface_state;
+      surface_state =
+         anv_cmd_buffer_alloc_surface_state(cmd_buffer);
+
+      fill_descriptor_buffer_surface_state(cmd_buffer->device,
+                                           surface_state.map, stage,
+                                           VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+                                           bo_offset, 12);
+
+      if (!cmd_buffer->device->info.has_llc)
+         anv_state_clflush(surface_state);
+
+      bt_map[0] = surface_state.offset + state_offset;
+      add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset);
+   }
+
    if (layout == NULL)
       goto out;
 
index bf983ed8f2ab7107b4cca04b241880f42572ac4a..1906205c4d042a505245ed54efc47612fdf169a4 100644 (file)
@@ -348,7 +348,18 @@ anv_pipeline_compile(struct anv_pipeline *pipeline,
    /* All binding table offsets provided by apply_pipeline_layout() are
     * relative to the start of the bindint table (plus MAX_RTS for VS).
     */
-   unsigned bias = stage == MESA_SHADER_FRAGMENT ? MAX_RTS : 0;
+   unsigned bias;
+   switch (stage) {
+   case MESA_SHADER_FRAGMENT:
+      bias = MAX_RTS;
+      break;
+   case MESA_SHADER_COMPUTE:
+      bias = 1;
+      break;
+   default:
+      bias = 0;
+      break;
+   }
    prog_data->binding_table.size_bytes = 0;
    prog_data->binding_table.texture_start = bias;
    prog_data->binding_table.ubo_start = bias;
@@ -619,6 +630,8 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
 
    memset(prog_data, 0, sizeof(*prog_data));
 
+   prog_data->binding_table.work_groups_start = 0;
+
    nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
                                           MESA_SHADER_COMPUTE,
                                           &prog_data->base);
index b763f7015708d2a7596fb1b6cfb549e5bc2dea47..a8583c98db4af629281a7e5afb401883b41da045 100644 (file)
@@ -1031,6 +1031,8 @@ struct anv_cmd_state {
    uint32_t                                     vb_dirty;
    anv_cmd_dirty_mask_t                         dirty;
    anv_cmd_dirty_mask_t                         compute_dirty;
+   uint32_t                                     num_workgroups_offset;
+   struct anv_bo                                *num_workgroups_bo;
    VkShaderStageFlags                           descriptors_dirty;
    VkShaderStageFlags                           push_constants_dirty;
    uint32_t                                     scratch_size;
index 5e566d3aaa16e49718867bfde81bab30919cb3a1..73d475eae0ea8fde5ed2994f38fb62d86b84c0f2 100644 (file)
@@ -617,6 +617,20 @@ void genX(CmdDispatch)(
    struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
    struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
 
+   if (prog_data->uses_num_work_groups) {
+      struct anv_state state =
+         anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 12, 4);
+      uint32_t *sizes = state.map;
+      sizes[0] = x;
+      sizes[1] = y;
+      sizes[2] = z;
+      if (!cmd_buffer->device->info.has_llc)
+         anv_state_clflush(state);
+      cmd_buffer->state.num_workgroups_offset = state.offset;
+      cmd_buffer->state.num_workgroups_bo =
+         &cmd_buffer->device->dynamic_state_block_pool.bo;
+   }
+
    cmd_buffer_flush_compute_state(cmd_buffer);
 
    anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER),
@@ -649,6 +663,11 @@ void genX(CmdDispatchIndirect)(
    struct anv_bo *bo = buffer->bo;
    uint32_t bo_offset = buffer->offset + offset;
 
+   if (prog_data->uses_num_work_groups) {
+      cmd_buffer->state.num_workgroups_offset = bo_offset;
+      cmd_buffer->state.num_workgroups_bo = bo;
+   }
+
    cmd_buffer_flush_compute_state(cmd_buffer);
 
    emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset);