anv: Implement VK_EXT_subgroup_size_control
authorJason Ekstrand <jason.ekstrand@intel.com>
Thu, 21 Feb 2019 20:50:10 +0000 (14:50 -0600)
committerJason Ekstrand <jason@jlekstrand.net>
Wed, 24 Jul 2019 17:55:40 +0000 (12:55 -0500)
Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
src/intel/vulkan/anv_device.c
src/intel/vulkan/anv_extensions.py
src/intel/vulkan/anv_pipeline.c

index d0ddb3e52bc471f32c0af7003873227ed6d2d503..217a779576c2b05e126b9ad31cc1155f83f87acc 100644 (file)
@@ -1590,6 +1590,17 @@ void anv_GetPhysicalDeviceProperties2(
          break;
       }
 
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT: {
+         VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *props =
+            (VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *)ext;
+         STATIC_ASSERT(8 <= BRW_SUBGROUP_SIZE && BRW_SUBGROUP_SIZE <= 32);
+         props->minSubgroupSize = 8;
+         props->maxSubgroupSize = 32;
+         props->maxComputeWorkgroupSubgroups = pdevice->info.max_cs_threads;
+         props->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
+         break;
+      }
+
       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT: {
          VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *props =
             (VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *)ext;
index 492f8ac46155d06447d2ff9781b0d9adae94e9a0..9956631eb26d7fdb551d0d81d259154866df596d 100644 (file)
@@ -147,6 +147,7 @@ EXTENSIONS = [
     Extension('VK_EXT_shader_demote_to_helper_invocation', 1, True),
     Extension('VK_EXT_shader_stencil_export',             1, 'device->info.gen >= 9'),
     Extension('VK_EXT_shader_viewport_index_layer',       1, True),
+    Extension('VK_EXT_subgroup_size_control',             1, True),
     Extension('VK_EXT_texel_buffer_alignment',            1, True),
     Extension('VK_EXT_transform_feedback',                1, True),
     Extension('VK_EXT_vertex_attribute_divisor',          3, True),
index ae3693d76ed334dfd2839a4da848e99fd9eb3a8c..c2a164cc802af972f94edb4838937b3ed1905f7e 100644 (file)
@@ -356,7 +356,10 @@ populate_base_prog_key(const struct gen_device_info *devinfo,
                        VkPipelineShaderStageCreateFlags flags,
                        struct brw_base_prog_key *key)
 {
-   key->subgroup_size_type = BRW_SUBGROUP_SIZE_API_CONSTANT;
+   if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT)
+      key->subgroup_size_type = BRW_SUBGROUP_SIZE_VARYING;
+   else
+      key->subgroup_size_type = BRW_SUBGROUP_SIZE_API_CONSTANT;
 
    populate_sampler_prog_key(devinfo, &key->tex);
 }
@@ -465,11 +468,36 @@ populate_wm_prog_key(const struct gen_device_info *devinfo,
 static void
 populate_cs_prog_key(const struct gen_device_info *devinfo,
                      VkPipelineShaderStageCreateFlags flags,
+                     const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info,
                      struct brw_cs_prog_key *key)
 {
    memset(key, 0, sizeof(*key));
 
    populate_base_prog_key(devinfo, flags, &key->base);
+
+   if (rss_info) {
+      assert(key->base.subgroup_size_type != BRW_SUBGROUP_SIZE_VARYING);
+
+      /* These enum values are expressly chosen to be equal to the subgroup
+       * size that they require.
+       */
+      assert(rss_info->requiredSubgroupSize == 8 ||
+             rss_info->requiredSubgroupSize == 16 ||
+             rss_info->requiredSubgroupSize == 32);
+      key->base.subgroup_size_type = rss_info->requiredSubgroupSize;
+   } else if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT) {
+      /* If the client expressly requests full subgroups and they don't
+       * specify a subgroup size, we need to pick one.  If they're requested
+       * varying subgroup sizes, we set it to UNIFORM and let the back-end
+       * compiler pick.  Otherwise, we specify the API value of 32.
+       * Performance will likely be terrible in this case but there's nothing
+       * we can do about that.  The client should have chosen a size.
+       */
+      if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT)
+         key->base.subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM;
+      else
+         key->base.subgroup_size_type = BRW_SUBGROUP_SIZE_REQUIRE_32;
+   }
 }
 
 struct anv_pipeline_stage {
@@ -1360,8 +1388,12 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
 
    struct anv_shader_bin *bin = NULL;
 
+   const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info =
+      vk_find_struct_const(info->stage.pNext,
+                           PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT);
+
    populate_cs_prog_key(&pipeline->device->info, info->stage.flags,
-                        &stage.key.cs);
+                        rss_info, &stage.key.cs);
 
    ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);