intel,nir: Move gl_LocalInvocationID lowering to nir_lower_system_values
authorJason Ekstrand <jason.ekstrand@intel.com>
Thu, 15 Nov 2018 16:25:46 +0000 (10:25 -0600)
committerJason Ekstrand <jason.ekstrand@intel.com>
Mon, 19 Nov 2018 15:57:41 +0000 (09:57 -0600)
It's not at all intel-specific; the formula is dictated by OpenGL and
Vulkan.  The only intel-specific thing is that we need the lowering.  As
a nice side-effect, the new version is variable-group-size ready.

Reviewed-by: Plamena Manolova <plamena.manolova@intel.com>
src/compiler/nir/nir.h
src/compiler/nir/nir_lower_system_values.c
src/intel/compiler/brw_compiler.c
src/intel/compiler/brw_nir_lower_cs_intrinsics.c

index b0cff50eaf2aca860e8c85672ef9a627eb7d1f8a..1dd605010f6d1d5330091bc531a086e539917b75 100644 (file)
@@ -2178,6 +2178,7 @@ typedef struct nir_shader_compiler_options {
    bool lower_helper_invocation;
 
    bool lower_cs_local_index_from_id;
+   bool lower_cs_local_id_from_index;
 
    bool lower_device_index_to_zero;
 
index fbc40573579c6359d39bfb6a49fe7a28fdb248bc..08a9e8be44a96a4d983b730a417fc1bfdcab38cc 100644 (file)
@@ -51,6 +51,45 @@ build_local_group_size(nir_builder *b)
    return local_size;
 }
 
+static nir_ssa_def *
+build_local_invocation_id(nir_builder *b)
+{
+   if (b->shader->options->lower_cs_local_id_from_index) {
+      /* We lower gl_LocalInvocationID from gl_LocalInvocationIndex based
+       * on this formula:
+       *
+       *    gl_LocalInvocationID.x =
+       *       gl_LocalInvocationIndex % gl_WorkGroupSize.x;
+       *    gl_LocalInvocationID.y =
+       *       (gl_LocalInvocationIndex / gl_WorkGroupSize.x) %
+       *       gl_WorkGroupSize.y;
+       *    gl_LocalInvocationID.z =
+       *       (gl_LocalInvocationIndex /
+       *        (gl_WorkGroupSize.x * gl_WorkGroupSize.y)) %
+       *       gl_WorkGroupSize.z;
+       *
+       * However, the final % gl_WorkGroupSize.z does nothing unless we
+       * accidentally end up with a gl_LocalInvocationIndex that is too
+       * large so it can safely be omitted.
+       */
+      nir_ssa_def *local_index = nir_load_local_invocation_index(b);
+      nir_ssa_def *local_size = build_local_group_size(b);
+
+      nir_ssa_def *id_x, *id_y, *id_z;
+      id_x = nir_umod(b, local_index,
+                         nir_channel(b, local_size, 0));
+      id_y = nir_umod(b, nir_udiv(b, local_index,
+                                     nir_channel(b, local_size, 0)),
+                         nir_channel(b, local_size, 1));
+      id_z = nir_udiv(b, local_index,
+                         nir_imul(b, nir_channel(b, local_size, 0),
+                                     nir_channel(b, local_size, 1)));
+      return nir_vec3(b, id_x, id_y, id_z);
+   } else {
+      return nir_load_local_invocation_id(b);
+   }
+}
+
 static bool
 convert_block(nir_block *block, nir_builder *b)
 {
@@ -91,7 +130,7 @@ convert_block(nir_block *block, nir_builder *b)
           */
          nir_ssa_def *group_size = build_local_group_size(b);
          nir_ssa_def *group_id = nir_load_work_group_id(b);
-         nir_ssa_def *local_id = nir_load_local_invocation_id(b);
+         nir_ssa_def *local_id = build_local_invocation_id(b);
 
          sysval = nir_iadd(b, nir_imul(b, group_id, group_size), local_id);
          break;
@@ -126,6 +165,14 @@ convert_block(nir_block *block, nir_builder *b)
          break;
       }
 
+      case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
+         /* If lower_cs_local_id_from_index is true, then we derive the local
+          * index from the local id.
+          */
+         if (b->shader->options->lower_cs_local_id_from_index)
+            sysval = build_local_invocation_id(b);
+         break;
+
       case SYSTEM_VALUE_LOCAL_GROUP_SIZE: {
          sysval = build_local_group_size(b);
          break;
index e863b08b99134c7d2f63eb85ad1e49a570c4a3ec..fe632c5badc14ca4bf2b844ae5056881222ff1df 100644 (file)
@@ -42,6 +42,7 @@
    .lower_fdiv = true,                                                        \
    .lower_flrp64 = true,                                                      \
    .lower_ldexp = true,                                                       \
+   .lower_cs_local_id_from_index = true,                                      \
    .lower_device_index_to_zero = true,                                        \
    .native_integers = true,                                                   \
    .use_interpolated_input_intrinsics = true,                                 \
index bfbdea0e8fa322fa6bca7decdd7f4f6dfb76ed24..fab5edc893fb38b4cca41ee449f95ec33d7c4bfe 100644 (file)
@@ -70,39 +70,6 @@ lower_cs_intrinsics_convert_block(struct lower_intrinsics_state *state,
          break;
       }
 
-      case nir_intrinsic_load_local_invocation_id: {
-         /* We lower gl_LocalInvocationID from gl_LocalInvocationIndex based
-          * on this formula:
-          *
-          *    gl_LocalInvocationID.x =
-          *       gl_LocalInvocationIndex % gl_WorkGroupSize.x;
-          *    gl_LocalInvocationID.y =
-          *       (gl_LocalInvocationIndex / gl_WorkGroupSize.x) %
-          *       gl_WorkGroupSize.y;
-          *    gl_LocalInvocationID.z =
-          *       (gl_LocalInvocationIndex /
-          *        (gl_WorkGroupSize.x * gl_WorkGroupSize.y)) %
-          *       gl_WorkGroupSize.z;
-          */
-         unsigned *size = nir->info.cs.local_size;
-
-         nir_ssa_def *local_index = nir_load_local_invocation_index(b);
-
-         nir_const_value uvec3;
-         memset(&uvec3, 0, sizeof(uvec3));
-         uvec3.u32[0] = 1;
-         uvec3.u32[1] = size[0];
-         uvec3.u32[2] = size[0] * size[1];
-         nir_ssa_def *div_val = nir_build_imm(b, 3, 32, uvec3);
-         uvec3.u32[0] = size[0];
-         uvec3.u32[1] = size[1];
-         uvec3.u32[2] = size[2];
-         nir_ssa_def *mod_val = nir_build_imm(b, 3, 32, uvec3);
-
-         sysval = nir_umod(b, nir_udiv(b, local_index, div_val), mod_val);
-         break;
-      }
-
       case nir_intrinsic_load_subgroup_id:
          if (state->local_workgroup_size > 8)
             continue;