intel/compiler: Allow for required subgroup sizes
authorJason Ekstrand <jason@jlekstrand.net>
Tue, 9 Jul 2019 19:28:18 +0000 (14:28 -0500)
committerJason Ekstrand <jason@jlekstrand.net>
Wed, 24 Jul 2019 17:55:40 +0000 (12:55 -0500)
Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
src/intel/compiler/brw_compiler.h
src/intel/compiler/brw_fs.cpp
src/intel/compiler/brw_nir.c

index 8b62e67e2d7cfeda4ede4fa9ad73326e6cd71b09..b2b09d16e6007d45fe6b4cbd212a1ee71703aaa1 100644 (file)
@@ -206,9 +206,17 @@ struct brw_sampler_prog_key_data {
 /** An enum representing what kind of input gl_SubgroupSize is. */
 enum PACKED brw_subgroup_size_type
 {
-   BRW_SUBGROUP_SIZE_API_CONSTANT,  /**< Vulkan behavior */
-   BRW_SUBGROUP_SIZE_UNIFORM,       /**< OpenGL behavior */
-   BRW_SUBGROUP_SIZE_VARYING,       /**< VK_EXT_subgroup_size_control */
+   BRW_SUBGROUP_SIZE_API_CONSTANT,     /**< Default Vulkan behavior */
+   BRW_SUBGROUP_SIZE_UNIFORM,          /**< OpenGL behavior */
+   BRW_SUBGROUP_SIZE_VARYING,          /**< VK_EXT_subgroup_size_control */
+
+   /* These enums are specifically chosen so that the value of the enum is
+    * also the subgroup size.  If any new values are added, they must respect
+    * this invariant.
+    */
+   BRW_SUBGROUP_SIZE_REQUIRE_8   = 8,  /**< VK_EXT_subgroup_size_control */
+   BRW_SUBGROUP_SIZE_REQUIRE_16  = 16, /**< VK_EXT_subgroup_size_control */
+   BRW_SUBGROUP_SIZE_REQUIRE_32  = 32, /**< VK_EXT_subgroup_size_control */
 };
 
 struct brw_base_prog_key {
index 4ab50aad11f99b2baa7d1b66f6f1ffc9d31af7e7..b16ed3be4244c36ea8856010c659f8449356fbf8 100644 (file)
@@ -8264,15 +8264,33 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
    min_dispatch_width = MAX2(8, min_dispatch_width);
    min_dispatch_width = util_next_power_of_two(min_dispatch_width);
    assert(min_dispatch_width <= 32);
+   unsigned max_dispatch_width = 32;
 
    fs_visitor *v8 = NULL, *v16 = NULL, *v32 = NULL;
    cfg_t *cfg = NULL;
    const char *fail_msg = NULL;
    unsigned promoted_constants = 0;
 
+   if ((int)key->base.subgroup_size_type >= (int)BRW_SUBGROUP_SIZE_REQUIRE_8) {
+      /* These enum values are expressly chosen to be equal to the subgroup
+       * size that they require.
+       */
+      const unsigned required_dispatch_width =
+         (unsigned)key->base.subgroup_size_type;
+      assert(required_dispatch_width == 8 ||
+             required_dispatch_width == 16 ||
+             required_dispatch_width == 32);
+      if (required_dispatch_width < min_dispatch_width ||
+          required_dispatch_width > max_dispatch_width) {
+         fail_msg = "Cannot satisfy explicit subgroup size";
+      } else {
+         min_dispatch_width = max_dispatch_width = required_dispatch_width;
+      }
+   }
+
    /* Now the main event: Visit the shader IR and generate our CS IR for it.
     */
-   if (min_dispatch_width <= 8) {
+   if (!fail_msg && min_dispatch_width <= 8 && max_dispatch_width >= 8) {
       nir_shader *nir8 = compile_cs_to_nir(compiler, mem_ctx, key,
                                            src_shader, 8);
       v8 = new fs_visitor(compiler, log_data, mem_ctx, &key->base,
@@ -8293,7 +8311,7 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
    }
 
    if (likely(!(INTEL_DEBUG & DEBUG_NO16)) &&
-       !fail_msg && min_dispatch_width <= 16) {
+       !fail_msg && min_dispatch_width <= 16 && max_dispatch_width >= 16) {
       /* Try a SIMD16 compile */
       nir_shader *nir16 = compile_cs_to_nir(compiler, mem_ctx, key,
                                             src_shader, 16);
@@ -8327,7 +8345,8 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
    /* We should always be able to do SIMD32 for compute shaders */
    assert(!v16 || v16->max_dispatch_width >= 32);
 
-   if (!fail_msg && (min_dispatch_width > 16 || (INTEL_DEBUG & DEBUG_DO32))) {
+   if (!fail_msg && (min_dispatch_width > 16 || (INTEL_DEBUG & DEBUG_DO32)) &&
+       max_dispatch_width >= 32) {
       /* Try a SIMD32 compile */
       nir_shader *nir32 = compile_cs_to_nir(compiler, mem_ctx, key,
                                             src_shader, 32);
index 3f90ab5a2baeda3da91aa731c867792d40df7c9e..3260376ad1cf5cdd0f8170aa460f3a7c72a7dea5 100644 (file)
@@ -999,6 +999,15 @@ get_subgroup_size(gl_shader_stage stage,
        * size.
        */
       return stage == MESA_SHADER_FRAGMENT ? 0 : max_subgroup_size;
+
+   case BRW_SUBGROUP_SIZE_REQUIRE_8:
+   case BRW_SUBGROUP_SIZE_REQUIRE_16:
+   case BRW_SUBGROUP_SIZE_REQUIRE_32:
+      assert(stage == MESA_SHADER_COMPUTE);
+      /* These enum values are expressly chosen to be equal to the subgroup
+       * size that they require.
+       */
+      return key->subgroup_size_type;
    }
 
    unreachable("Invalid subgroup size type");