intel/compiler: Be more conservative about subgroup sizes in GL

author Jason Ekstrand <jason.ekstrand@intel.com>

Fri, 22 Feb 2019 16:48:39 +0000 (10:48 -0600)

committer Jason Ekstrand <jason@jlekstrand.net>

Wed, 24 Jul 2019 17:55:40 +0000 (12:55 -0500)
author Jason Ekstrand <jason.ekstrand@intel.com>
Fri, 22 Feb 2019 16:48:39 +0000 (10:48 -0600)
committer Jason Ekstrand <jason@jlekstrand.net>
Wed, 24 Jul 2019 17:55:40 +0000 (12:55 -0500)
diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c

index 5f243c2065a55b863c3292bd33668fee7df72b87..6f42057bc42f3068e8dbcff5032833fd8e8d4396 100644 (file)
--- a/src/gallium/drivers/iris/iris_program.c
+++ b/src/gallium/drivers/iris/iris_program.c
@@ -47,6 +47,7 @@
  #include "nir/tgsi_to_nir.h"
  
  #define KEY_INIT_NO_ID(gen)                              \
+   .base.subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM, \
     .base.tex.swizzles[0 ... MAX_SAMPLERS - 1] = 0x688,   \
     .base.tex.compressed_multisample_layout_mask = ~0,    \
     .base.tex.msaa_16 = (gen >= 9 ? ~0 : 0)
diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h

index 8f8cca79debaee298a9b40e3d9b98ef03e5579e4..ba95df5b7dc887f7caf1beeb33aff943f04ee178 100644 (file)
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -203,9 +203,18 @@ struct brw_sampler_prog_key_data {
     float scale_factors[32];
  };
  
+/** An enum representing what kind of input gl_SubgroupSize is. */
+enum PACKED brw_subgroup_size_type
+{
+   BRW_SUBGROUP_SIZE_API_CONSTANT,  /**< Vulkan behavior */
+   BRW_SUBGROUP_SIZE_UNIFORM,       /**< OpenGL behavior */
+};
+
  struct brw_base_prog_key {
     unsigned program_string_id;
  
+   enum brw_subgroup_size_type subgroup_size_type;
+
     struct brw_sampler_prog_key_data tex;
  };
  
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp

index 30e04ddc318b94288a15dc6cdc4d03d11c696c57..4ab50aad11f99b2baa7d1b66f6f1ffc9d31af7e7 100644 (file)
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -7967,7 +7967,9 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
  {
     const struct gen_device_info *devinfo = compiler->devinfo;
  
-   brw_nir_apply_key(shader, compiler, &key->base, true);
+   unsigned max_subgroup_size = unlikely(INTEL_DEBUG & DEBUG_DO32) ? 32 : 16;
+
+   brw_nir_apply_key(shader, compiler, &key->base, max_subgroup_size, true);
     brw_nir_lower_fs_inputs(shader, devinfo, key);
     brw_nir_lower_fs_outputs(shader);
  
@@ -8228,7 +8230,7 @@ compile_cs_to_nir(const struct brw_compiler *compiler,
                    unsigned dispatch_width)
  {
     nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
-   brw_nir_apply_key(shader, compiler, &key->base, true);
+   brw_nir_apply_key(shader, compiler, &key->base, dispatch_width, true);
  
     NIR_PASS_V(shader, brw_nir_lower_cs_intrinsics, dispatch_width);
  
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c

index 6bac6636c4708d36be276300fba57e980705e61d..9e4b33c8b49db6de4fb35ce2569491f1ea928d24 100644 (file)
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -809,13 +809,6 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
  
     UNUSED bool progress; /* Written by OPT */
  
-   const nir_lower_subgroups_options subgroups_options = {
-      .subgroup_size = BRW_SUBGROUP_SIZE,
-      .ballot_bit_size = 32,
-      .lower_subgroup_masks = true,
-   };
-   OPT(nir_lower_subgroups, &subgroups_options);
-
     OPT(brw_nir_lower_mem_access_bit_sizes);
  
     do {
@@ -973,16 +966,48 @@ brw_nir_apply_sampler_key(nir_shader *nir,
     return nir_lower_tex(nir, &tex_options);
  }
  
+static unsigned
+get_subgroup_size(const struct brw_base_prog_key *key,
+                  unsigned max_subgroup_size)
+{
+   switch (key->subgroup_size_type) {
+   case BRW_SUBGROUP_SIZE_API_CONSTANT:
+      /* We have to use the global constant size. */
+      return BRW_SUBGROUP_SIZE;
+
+   case BRW_SUBGROUP_SIZE_UNIFORM:
+      /* It has to be uniform across all invocations but can vary per stage
+       * if we want.  This gives us a bit more freedom.
+       *
+       * For compute, brw_nir_apply_key is called per-dispatch-width so this
+       * is the actual subgroup size and not a maximum.  However, we only
+       * invoke one size of any given compute shader so it's still guaranteed
+       * to be uniform across invocations.
+       */
+      return max_subgroup_size;
+   }
+
+   unreachable("Invalid subgroup size type");
+}
+
  void
  brw_nir_apply_key(nir_shader *nir,
                    const struct brw_compiler *compiler,
                    const struct brw_base_prog_key *key,
+                  unsigned max_subgroup_size,
                    bool is_scalar)
  {
     bool progress = false;
  
     OPT(brw_nir_apply_sampler_key, compiler, &key->tex);
  
+   const nir_lower_subgroups_options subgroups_options = {
+      .subgroup_size = get_subgroup_size(key, max_subgroup_size),
+      .ballot_bit_size = 32,
+      .lower_subgroup_masks = true,
+   };
+   OPT(nir_lower_subgroups, &subgroups_options);
+
     if (progress)
        brw_nir_optimize(nir, compiler, is_scalar, false);
  }
diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h

index 5f1843c628052f5530ad5d5772881ff49a5b30ae..d3130476ab1fbc0ce88958f3a7a81ab481df133c 100644 (file)
--- a/src/intel/compiler/brw_nir.h
+++ b/src/intel/compiler/brw_nir.h
@@ -140,6 +140,7 @@ void brw_nir_apply_tcs_quads_workaround(nir_shader *nir);
  void brw_nir_apply_key(nir_shader *nir,
                         const struct brw_compiler *compiler,
                         const struct brw_base_prog_key *key,
+                       unsigned max_subgroup_size,
                         bool is_scalar);
  
  enum brw_reg_type brw_type_for_nir_type(const struct gen_device_info *devinfo,
diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp

index 686c5c3a46d012c350e1f5bf80ed6c862fa77627..5f86e251c3379c6854448f936e0052c3b4d3a722 100644 (file)
--- a/src/intel/compiler/brw_shader.cpp
+++ b/src/intel/compiler/brw_shader.cpp
@@ -1244,7 +1244,7 @@ brw_compile_tes(const struct brw_compiler *compiler,
     nir->info.inputs_read = key->inputs_read;
     nir->info.patch_inputs_read = key->patch_inputs_read;
  
-   brw_nir_apply_key(nir, compiler, &key->base, is_scalar);
+   brw_nir_apply_key(nir, compiler, &key->base, 8, is_scalar);
     brw_nir_lower_tes_inputs(nir, input_vue_map);
     brw_nir_lower_vue_outputs(nir);
     brw_postprocess_nir(nir, compiler, is_scalar);
diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp

index 5f7decd5410f7990118407ce8bd0e495fb40851a..2014f95f9c7411bc6bffd93206bcef38488321f3 100644 (file)
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@@ -2845,7 +2845,7 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
                 char **error_str)
  {
     const bool is_scalar = compiler->scalar_stage[MESA_SHADER_VERTEX];
-   brw_nir_apply_key(shader, compiler, &key->base, is_scalar);
+   brw_nir_apply_key(shader, compiler, &key->base, 8, is_scalar);
  
     const unsigned *assembly = NULL;
  
diff --git a/src/intel/compiler/brw_vec4_gs_visitor.cpp b/src/intel/compiler/brw_vec4_gs_visitor.cpp

index c45e284e07e940062673b84c3787e7c83280f220..b7476afb466680bc44766694185dc2387beb5027 100644 (file)
--- a/src/intel/compiler/brw_vec4_gs_visitor.cpp
+++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp
@@ -639,7 +639,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
                         &c.input_vue_map, inputs_read,
                         shader->info.separate_shader);
  
-   brw_nir_apply_key(shader, compiler, &key->base, is_scalar);
+   brw_nir_apply_key(shader, compiler, &key->base, 8, is_scalar);
     brw_nir_lower_vue_inputs(shader, &c.input_vue_map);
     brw_nir_lower_vue_outputs(shader);
     brw_postprocess_nir(shader, compiler, is_scalar);
diff --git a/src/intel/compiler/brw_vec4_tcs.cpp b/src/intel/compiler/brw_vec4_tcs.cpp

index d82439f277187416ee1929323591e3f7739c0057..a107a9b66303d839aa3a79f67f52211ddd840a8a 100644 (file)
--- a/src/intel/compiler/brw_vec4_tcs.cpp
+++ b/src/intel/compiler/brw_vec4_tcs.cpp
@@ -397,7 +397,7 @@ brw_compile_tcs(const struct brw_compiler *compiler,
                              nir->info.outputs_written,
                              nir->info.patch_outputs_written);
  
-   brw_nir_apply_key(nir, compiler, &key->base, is_scalar);
+   brw_nir_apply_key(nir, compiler, &key->base, 8, is_scalar);
     brw_nir_lower_vue_inputs(nir, &input_vue_map);
     brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map,
                               key->tes_primitive_mode);
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c

index 697c1850221c77e0099c80566b7a56bc96423edd..5266fb0dc342aec0ffa472f85e293632dcac9c5a 100644 (file)
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -355,6 +355,8 @@ static void
  populate_base_prog_key(const struct gen_device_info *devinfo,
                         struct brw_base_prog_key *key)
  {
+   key->subgroup_size_type = BRW_SUBGROUP_SIZE_API_CONSTANT;
+
     populate_sampler_prog_key(devinfo, &key->tex);
  }
  
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c

index 368499776b9a44762eb7c15849035a7cd9f3fbc1..384c3f5c9565c92a4ad27520436e4831c133fb48 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -335,6 +335,7 @@ brw_populate_base_prog_key(struct gl_context *ctx,
                             struct brw_base_prog_key *key)
  {
     key->program_string_id = prog->id;
+   key->subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM;
     brw_populate_sampler_prog_key_data(ctx, &prog->program, &key->tex);
  }
  
@@ -344,6 +345,7 @@ brw_populate_default_base_prog_key(const struct gen_device_info *devinfo,
                                     struct brw_base_prog_key *key)
  {
     key->program_string_id = prog->id;
+   key->subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM;
     brw_setup_tex_for_precompile(devinfo, &key->tex, &prog->program);
  }
author	Jason Ekstrand <jason.ekstrand@intel.com>
	Fri, 22 Feb 2019 16:48:39 +0000 (10:48 -0600)
committer	Jason Ekstrand <jason@jlekstrand.net>
	Wed, 24 Jul 2019 17:55:40 +0000 (12:55 -0500)
src/gallium/drivers/iris/iris_program.c		patch \| blob \| history
src/intel/compiler/brw_compiler.h		patch \| blob \| history
src/intel/compiler/brw_fs.cpp		patch \| blob \| history
src/intel/compiler/brw_nir.c		patch \| blob \| history
src/intel/compiler/brw_nir.h		patch \| blob \| history
src/intel/compiler/brw_shader.cpp		patch \| blob \| history
src/intel/compiler/brw_vec4.cpp		patch \| blob \| history
src/intel/compiler/brw_vec4_gs_visitor.cpp		patch \| blob \| history
src/intel/compiler/brw_vec4_tcs.cpp		patch \| blob \| history
src/intel/vulkan/anv_pipeline.c		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_wm.c		patch \| blob \| history