glsl: fix derived cs variables
authorIlia Mirkin <imirkin@alum.mit.edu>
Sat, 21 Oct 2017 19:15:41 +0000 (15:15 -0400)
committerIlia Mirkin <imirkin@alum.mit.edu>
Mon, 23 Oct 2017 12:34:56 +0000 (08:34 -0400)
There are two issues with the current implementation. First, it relies
on the layout(local_size_*) happening in the same shader as the main
function, and secondly it doesn't work for variable group sizes.

In both cases, the simplest fix is to move the setup of these derived
values to a later time, similar to how the gl_VertexID workarounds are
done. There already exist system values defined for both of the derived
values, so we use them unconditionally, and lower them after linking is
performed.

While we're at it, we move to using gl_LocalGroupSizeARB instead of
gl_WorkGroupSize for variable group sizes.

Also the dead code elimination avoidance can be removed, since there
can be situations where gl_LocalGroupSizeARB is needed but has not been
inserted for the shader with main function. As a result, the lowering
code has to insert its own copies of the system values if needed.

Reported-by: Stephane Chevigny <stephane.chevigny@polymtl.ca>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103393
Cc: mesa-stable@lists.freedesktop.org
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
src/compiler/Makefile.sources
src/compiler/glsl/builtin_variables.cpp
src/compiler/glsl/glsl_parser_extras.cpp
src/compiler/glsl/ir.h
src/compiler/glsl/ir_optimization.h
src/compiler/glsl/linker.cpp
src/compiler/glsl/lower_cs_derived.cpp [new file with mode: 0644]
src/compiler/glsl/meson.build
src/compiler/glsl/opt_dead_builtin_variables.cpp

index 2724a41286e040658ae7b00493eff86018ba3c0f..27cc33ab8356d7cca87c50735d0c2d4ddfa42cff 100644 (file)
@@ -85,6 +85,7 @@ LIBGLSL_FILES = \
        glsl/lower_buffer_access.cpp \
        glsl/lower_buffer_access.h \
        glsl/lower_const_arrays_to_uniforms.cpp \
+       glsl/lower_cs_derived.cpp \
        glsl/lower_discard.cpp \
        glsl/lower_discard_flow.cpp \
        glsl/lower_distance.cpp \
index ea2d897cc8e21f44da4876c19582eed6bd47c64f..00bc99dd619767b13d84ab3113cf1488284d6e85 100644 (file)
@@ -1295,15 +1295,10 @@ builtin_variable_generator::generate_cs_special_vars()
                        uvec3_t, "gl_LocalGroupSizeARB");
    }
 
-   if (state->ctx->Const.LowerCsDerivedVariables) {
-      add_variable("gl_GlobalInvocationID", uvec3_t, ir_var_auto, 0);
-      add_variable("gl_LocalInvocationIndex", uint_t, ir_var_auto, 0);
-   } else {
-      add_system_value(SYSTEM_VALUE_GLOBAL_INVOCATION_ID,
-                       uvec3_t, "gl_GlobalInvocationID");
-      add_system_value(SYSTEM_VALUE_LOCAL_INVOCATION_INDEX,
-                       uint_t, "gl_LocalInvocationIndex");
-   }
+   add_system_value(SYSTEM_VALUE_GLOBAL_INVOCATION_ID,
+                    uvec3_t, "gl_GlobalInvocationID");
+   add_system_value(SYSTEM_VALUE_LOCAL_INVOCATION_INDEX,
+                    uint_t, "gl_LocalInvocationIndex");
 }
 
 
@@ -1474,84 +1469,3 @@ _mesa_glsl_initialize_variables(exec_list *instructions,
       break;
    }
 }
-
-
-/**
- * Initialize compute shader variables with values that are derived from other
- * compute shader variable.
- */
-static void
-initialize_cs_derived_variables(gl_shader *shader,
-                                ir_function_signature *const main_sig)
-{
-   assert(shader->Stage == MESA_SHADER_COMPUTE);
-
-   ir_variable *gl_GlobalInvocationID =
-      shader->symbols->get_variable("gl_GlobalInvocationID");
-   assert(gl_GlobalInvocationID);
-   ir_variable *gl_WorkGroupID =
-      shader->symbols->get_variable("gl_WorkGroupID");
-   assert(gl_WorkGroupID);
-   ir_variable *gl_WorkGroupSize =
-      shader->symbols->get_variable("gl_WorkGroupSize");
-   if (gl_WorkGroupSize == NULL) {
-      void *const mem_ctx = ralloc_parent(shader->ir);
-      gl_WorkGroupSize = new(mem_ctx) ir_variable(glsl_type::uvec3_type,
-                                                  "gl_WorkGroupSize",
-                                                  ir_var_auto);
-      gl_WorkGroupSize->data.how_declared = ir_var_declared_implicitly;
-      gl_WorkGroupSize->data.read_only = true;
-      shader->ir->push_head(gl_WorkGroupSize);
-   }
-   ir_variable *gl_LocalInvocationID =
-      shader->symbols->get_variable("gl_LocalInvocationID");
-   assert(gl_LocalInvocationID);
-
-   /* gl_GlobalInvocationID =
-    *    gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID
-    */
-   ir_instruction *inst =
-      assign(gl_GlobalInvocationID,
-             add(mul(gl_WorkGroupID, gl_WorkGroupSize),
-                 gl_LocalInvocationID));
-   main_sig->body.push_head(inst);
-
-   /* gl_LocalInvocationIndex =
-    *    gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y +
-    *    gl_LocalInvocationID.y * gl_WorkGroupSize.x +
-    *    gl_LocalInvocationID.x;
-    */
-   ir_expression *index_z =
-      mul(mul(swizzle_z(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize)),
-          swizzle_y(gl_WorkGroupSize));
-   ir_expression *index_y =
-      mul(swizzle_y(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize));
-   ir_expression *index_y_plus_z = add(index_y, index_z);
-   operand index_x(swizzle_x(gl_LocalInvocationID));
-   ir_expression *index_x_plus_y_plus_z = add(index_y_plus_z, index_x);
-   ir_variable *gl_LocalInvocationIndex =
-      shader->symbols->get_variable("gl_LocalInvocationIndex");
-   assert(gl_LocalInvocationIndex);
-   inst = assign(gl_LocalInvocationIndex, index_x_plus_y_plus_z);
-   main_sig->body.push_head(inst);
-}
-
-
-/**
- * Initialize builtin variables with values based on other builtin variables.
- * These are initialized in the main function.
- */
-void
-_mesa_glsl_initialize_derived_variables(struct gl_context *ctx,
-                                        gl_shader *shader)
-{
-   /* We only need to set CS variables currently. */
-   if (shader->Stage == MESA_SHADER_COMPUTE &&
-       ctx->Const.LowerCsDerivedVariables) {
-      ir_function_signature *const main_sig =
-         _mesa_get_main_function_signature(shader->symbols);
-
-      if (main_sig != NULL)
-         initialize_cs_derived_variables(shader, main_sig);
-   }
-}
index 764c05ad802e987c370a061db7ffbc39bdb4455e..51d835bc535da10f0037765b779bbdc86feece7c 100644 (file)
@@ -2009,8 +2009,6 @@ opt_shader_and_create_symbol_table(struct gl_context *ctx,
          break;
       }
    }
-
-   _mesa_glsl_initialize_derived_variables(ctx, shader);
 }
 
 void
index 27eafe8e22bebf23cf879b7bc855cc7b2338ff14..070d7b3ffdd4ee60148252131e0d91d8f839e918 100644 (file)
@@ -2412,10 +2412,6 @@ extern void
 _mesa_glsl_initialize_variables(exec_list *instructions,
                                struct _mesa_glsl_parse_state *state);
 
-extern void
-_mesa_glsl_initialize_derived_variables(struct gl_context *ctx,
-                                        gl_shader *shader);
-
 extern void
 reparent_ir(exec_list *list, void *mem_ctx);
 
index eb3ec3b0c7d10e64a2b774aeaad17a11eea41aca..f44ddcb05be053eede78be616c2f5cd41e4e348c 100644 (file)
@@ -166,6 +166,7 @@ void optimize_dead_builtin_variables(exec_list *instructions,
 bool lower_tess_level(gl_linked_shader *shader);
 
 bool lower_vertex_id(gl_linked_shader *shader);
+bool lower_cs_derived(gl_linked_shader *shader);
 bool lower_blend_equation_advanced(gl_linked_shader *shader);
 
 bool lower_subroutine(exec_list *instructions, struct _mesa_glsl_parse_state *state);
index 37983096784cb69f813642a6f7b701d6d68006d6..94bd7fb65921f23cbe1268c5640c8f628f0f65d4 100644 (file)
@@ -2374,6 +2374,9 @@ link_intrastage_shaders(void *mem_ctx,
    if (ctx->Const.VertexID_is_zero_based)
       lower_vertex_id(linked);
 
+   if (ctx->Const.LowerCsDerivedVariables)
+      lower_cs_derived(linked);
+
 #ifdef DEBUG
    /* Compute the source checksum. */
    linked->SourceChecksum = 0;
diff --git a/src/compiler/glsl/lower_cs_derived.cpp b/src/compiler/glsl/lower_cs_derived.cpp
new file mode 100644 (file)
index 0000000..f7ec2a4
--- /dev/null
@@ -0,0 +1,234 @@
+/*
+ * Copyright © 2017 Ilia Mirkin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file lower_cs_derived.cpp
+ *
+ * For hardware that does not support the gl_GlobalInvocationID and
+ * gl_LocalInvocationIndex system values, replace them with fresh
+ * globals. Note that we can't rely on gl_WorkGroupSize or
+ * gl_LocalGroupSizeARB being available, since they may only have been defined
+ * in a non-main shader.
+ *
+ * [ This can happen if only a secondary shader has the layout(local_size_*)
+ *   declaration. ]
+ *
+ * This is meant to be run post-linking.
+ */
+
+#include "glsl_symbol_table.h"
+#include "ir_hierarchical_visitor.h"
+#include "ir.h"
+#include "ir_builder.h"
+#include "linker.h"
+#include "program/prog_statevars.h"
+#include "builtin_functions.h"
+
+using namespace ir_builder;
+
+namespace {
+
+class lower_cs_derived_visitor : public ir_hierarchical_visitor {
+public:
+   explicit lower_cs_derived_visitor(gl_linked_shader *shader)
+      : progress(false),
+        shader(shader),
+        local_size_variable(shader->Program->info.cs.local_size_variable),
+        gl_WorkGroupSize(NULL),
+        gl_WorkGroupID(NULL),
+        gl_LocalInvocationID(NULL),
+        gl_GlobalInvocationID(NULL),
+        gl_LocalInvocationIndex(NULL)
+   {
+      main_sig = _mesa_get_main_function_signature(shader->symbols);
+      assert(main_sig);
+   }
+
+   virtual ir_visitor_status visit(ir_dereference_variable *);
+
+   ir_variable *add_system_value(
+         int slot, const glsl_type *type, const char *name);
+   void find_sysvals();
+   void make_gl_GlobalInvocationID();
+   void make_gl_LocalInvocationIndex();
+
+   bool progress;
+
+private:
+   gl_linked_shader *shader;
+   bool local_size_variable;
+   ir_function_signature *main_sig;
+
+   ir_rvalue *gl_WorkGroupSize;
+   ir_variable *gl_WorkGroupID;
+   ir_variable *gl_LocalInvocationID;
+
+   ir_variable *gl_GlobalInvocationID;
+   ir_variable *gl_LocalInvocationIndex;
+};
+
+} /* anonymous namespace */
+
+ir_variable *
+lower_cs_derived_visitor::add_system_value(
+      int slot, const glsl_type *type, const char *name)
+{
+   ir_variable *var = new(shader) ir_variable(type, name, ir_var_system_value);
+   var->data.how_declared = ir_var_declared_implicitly;
+   var->data.read_only = true;
+   var->data.location = slot;
+   var->data.explicit_location = true;
+   var->data.explicit_index = 0;
+   shader->ir->push_head(var);
+
+   return var;
+}
+
+void
+lower_cs_derived_visitor::find_sysvals()
+{
+   if (gl_WorkGroupSize != NULL)
+      return;
+
+   ir_variable *WorkGroupSize;
+   if (local_size_variable)
+      WorkGroupSize = shader->symbols->get_variable("gl_LocalGroupSizeARB");
+   else
+      WorkGroupSize = shader->symbols->get_variable("gl_WorkGroupSize");
+   if (WorkGroupSize)
+      gl_WorkGroupSize = new(shader) ir_dereference_variable(WorkGroupSize);
+   gl_WorkGroupID = shader->symbols->get_variable("gl_WorkGroupID");
+   gl_LocalInvocationID = shader->symbols->get_variable("gl_LocalInvocationID");
+
+   /*
+    * These may be missing due to either dead code elimination, or, in the
+    * case of the group size, due to the layout being declared in a non-main
+    * shader. Re-create them.
+    */
+
+   if (!gl_WorkGroupID)
+      gl_WorkGroupID = add_system_value(
+            SYSTEM_VALUE_WORK_GROUP_ID, glsl_type::uvec3_type, "gl_WorkGroupID");
+   if (!gl_LocalInvocationID)
+      gl_LocalInvocationID = add_system_value(
+            SYSTEM_VALUE_LOCAL_INVOCATION_ID, glsl_type::uvec3_type,
+            "gl_LocalInvocationID");
+   if (!WorkGroupSize) {
+      if (local_size_variable) {
+         gl_WorkGroupSize = new(shader) ir_dereference_variable(
+               add_system_value(
+                     SYSTEM_VALUE_LOCAL_GROUP_SIZE, glsl_type::uvec3_type,
+                     "gl_LocalGroupSizeARB"));
+      } else {
+         ir_constant_data data;
+         memset(&data, 0, sizeof(data));
+         for (int i = 0; i < 3; i++)
+            data.u[i] = shader->Program->info.cs.local_size[i];
+         gl_WorkGroupSize = new(shader) ir_constant(glsl_type::uvec3_type, &data);
+      }
+   }
+}
+
+void
+lower_cs_derived_visitor::make_gl_GlobalInvocationID()
+{
+   if (gl_GlobalInvocationID != NULL)
+      return;
+
+   find_sysvals();
+
+   /* gl_GlobalInvocationID =
+    *    gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID
+    */
+   gl_GlobalInvocationID = new(shader) ir_variable(
+         glsl_type::uvec3_type, "__GlobalInvocationID", ir_var_temporary);
+   shader->ir->push_head(gl_GlobalInvocationID);
+
+   ir_instruction *inst =
+      assign(gl_GlobalInvocationID,
+             add(mul(gl_WorkGroupID, gl_WorkGroupSize->clone(shader, NULL)),
+                 gl_LocalInvocationID));
+   main_sig->body.push_head(inst);
+}
+
+void
+lower_cs_derived_visitor::make_gl_LocalInvocationIndex()
+{
+   if (gl_LocalInvocationIndex != NULL)
+      return;
+
+   find_sysvals();
+
+   /* gl_LocalInvocationIndex =
+    *    gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y +
+    *    gl_LocalInvocationID.y * gl_WorkGroupSize.x +
+    *    gl_LocalInvocationID.x;
+    */
+   gl_LocalInvocationIndex = new(shader)
+      ir_variable(glsl_type::uint_type, "__LocalInvocationIndex", ir_var_temporary);
+   shader->ir->push_head(gl_LocalInvocationIndex);
+
+   ir_expression *index_z =
+      mul(mul(swizzle_z(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize->clone(shader, NULL))),
+          swizzle_y(gl_WorkGroupSize->clone(shader, NULL)));
+   ir_expression *index_y =
+      mul(swizzle_y(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize->clone(shader, NULL)));
+   ir_expression *index_y_plus_z = add(index_y, index_z);
+   operand index_x(swizzle_x(gl_LocalInvocationID));
+   ir_expression *index_x_plus_y_plus_z = add(index_y_plus_z, index_x);
+   ir_instruction *inst =
+      assign(gl_LocalInvocationIndex, index_x_plus_y_plus_z);
+   main_sig->body.push_head(inst);
+}
+
+ir_visitor_status
+lower_cs_derived_visitor::visit(ir_dereference_variable *ir)
+{
+   if (ir->var->data.mode == ir_var_system_value &&
+       ir->var->data.location == SYSTEM_VALUE_GLOBAL_INVOCATION_ID) {
+      make_gl_GlobalInvocationID();
+      ir->var = gl_GlobalInvocationID;
+      progress = true;
+   }
+
+   if (ir->var->data.mode == ir_var_system_value &&
+       ir->var->data.location == SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) {
+      make_gl_LocalInvocationIndex();
+      ir->var = gl_LocalInvocationIndex;
+      progress = true;
+   }
+
+   return visit_continue;
+}
+
+bool
+lower_cs_derived(gl_linked_shader *shader)
+{
+   if (shader->Stage != MESA_SHADER_COMPUTE)
+      return false;
+
+   lower_cs_derived_visitor v(shader);
+   v.run(shader->ir);
+
+   return v.progress;
+}
index d1a75eb8c3647c00e4cac256151667160b5bff14..76fcafb9910024d1dd22bb240c867312670b403c 100644 (file)
@@ -124,6 +124,7 @@ files_libglsl = files(
   'lower_buffer_access.cpp',
   'lower_buffer_access.h',
   'lower_const_arrays_to_uniforms.cpp',
+  'lower_cs_derived.cpp',
   'lower_discard.cpp',
   'lower_discard_flow.cpp',
   'lower_distance.cpp',
index 03e578982b9f710591cf50d565a390a0036d0ead..0d4e3a8f00a2d2278d6e80fc0b31d13cc45e9cd7 100644 (file)
@@ -62,23 +62,6 @@ optimize_dead_builtin_variables(exec_list *instructions,
        * information, so removing these variables from the user shader will
        * cause problems later.
        *
-       * For compute shaders, gl_GlobalInvocationID has some dependencies, so
-       * we avoid removing these dependencies.
-       *
-       * We also avoid removing gl_GlobalInvocationID at this stage because it
-       * might be used by a linked shader. In this case it still needs to be
-       * initialized by the main function.
-       *
-       *    gl_GlobalInvocationID =
-       *       gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID
-       *
-       * Similarly, we initialize gl_LocalInvocationIndex in the main function:
-       *
-       *    gl_LocalInvocationIndex =
-       *       gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y +
-       *       gl_LocalInvocationID.y * gl_WorkGroupSize.x +
-       *       gl_LocalInvocationID.x;
-       *
        * Matrix uniforms with "Transpose" are not eliminated because there's
        * an optimization pass that can turn references to the regular matrix
        * into references to the transpose matrix.  Eliminating the transpose
@@ -90,11 +73,6 @@ optimize_dead_builtin_variables(exec_list *instructions,
        */
       if (strcmp(var->name, "gl_ModelViewProjectionMatrix") == 0
           || strcmp(var->name, "gl_Vertex") == 0
-          || strcmp(var->name, "gl_WorkGroupID") == 0
-          || strcmp(var->name, "gl_WorkGroupSize") == 0
-          || strcmp(var->name, "gl_LocalInvocationID") == 0
-          || strcmp(var->name, "gl_GlobalInvocationID") == 0
-          || strcmp(var->name, "gl_LocalInvocationIndex") == 0
           || strstr(var->name, "Transpose") != NULL)
          continue;