nir: optimize gl_SampleMaskIn to gl_HelperInvocation for radeonsi when possible
authorMarek Olšák <marek.olsak@amd.com>
Wed, 10 Apr 2019 01:40:33 +0000 (21:40 -0400)
committerMarek Olšák <marek.olsak@amd.com>
Tue, 16 Apr 2019 14:24:19 +0000 (10:24 -0400)
Acked-by: Timothy Arceri <tarceri@itsqueeze.com>
src/compiler/nir/nir.h
src/compiler/nir/nir_opt_intrinsics.c
src/gallium/drivers/radeonsi/si_get.c
src/mesa/state_tracker/st_glsl_to_nir.cpp

index 11f9b396e5676b88de9f085331c63210f1daa6ad..8cdc4ba5707dfc575b8435aca3b01fbd9e0e9026 100644 (file)
@@ -2307,6 +2307,14 @@ typedef struct nir_shader_compiler_options {
     */
    bool lower_helper_invocation;
 
+   /**
+    * Convert gl_SampleMaskIn to gl_HelperInvocation as follows:
+    *
+    *   gl_SampleMaskIn == 0 ---> gl_HelperInvocation
+    *   gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
+    */
+   bool optimize_sample_mask_in;
+
    bool lower_cs_local_index_from_id;
    bool lower_cs_local_id_from_index;
 
index 7b054faa20431bbcd5c76efec5a9bd1b7242c466..e185602bf9e083e9eb90d1b32e7672cfc1e4ab91 100644 (file)
@@ -29,7 +29,8 @@
  */
 
 static bool
-opt_intrinsics_impl(nir_function_impl *impl)
+opt_intrinsics_impl(nir_function_impl *impl,
+                    const struct nir_shader_compiler_options *options)
 {
    nir_builder b;
    nir_builder_init(&b, impl);
@@ -55,6 +56,41 @@ opt_intrinsics_impl(nir_function_impl *impl)
             if (nir_src_is_const(intrin->src[0]))
                replacement = nir_imm_true(&b);
             break;
+         case nir_intrinsic_load_sample_mask_in:
+            /* Transform:
+             *   gl_SampleMaskIn == 0 ---> gl_HelperInvocation
+             *   gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
+             */
+            if (!options->optimize_sample_mask_in)
+               continue;
+
+            nir_foreach_use_safe(use_src, &intrin->dest.ssa) {
+               if (use_src->parent_instr->type == nir_instr_type_alu) {
+                  nir_alu_instr *alu = nir_instr_as_alu(use_src->parent_instr);
+
+                  if (alu->op == nir_op_ieq ||
+                      alu->op == nir_op_ine) {
+                     /* Check for 0 in either operand. */
+                     nir_const_value *const_val =
+                         nir_src_as_const_value(alu->src[0].src);
+                     if (!const_val)
+                        const_val = nir_src_as_const_value(alu->src[1].src);
+                     if (!const_val || const_val->i32 != 0)
+                        continue;
+
+                     nir_ssa_def *new_expr = nir_load_helper_invocation(&b, 1);
+
+                     if (alu->op == nir_op_ine)
+                        new_expr = nir_inot(&b, new_expr);
+
+                     nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
+                                              nir_src_for_ssa(new_expr));
+                     nir_instr_remove(&alu->instr);
+                     continue;
+                  }
+               }
+            }
+            continue;
          default:
             break;
          }
@@ -81,7 +117,7 @@ nir_opt_intrinsics(nir_shader *shader)
       if (!function->impl)
          continue;
 
-      if (opt_intrinsics_impl(function->impl)) {
+      if (opt_intrinsics_impl(function->impl, shader->options)) {
          progress = true;
          nir_metadata_preserve(function->impl, nir_metadata_block_index |
                                                nir_metadata_dominance);
index 9e92dd40e7b3aedd4fab9abea1933d701efba703..67fbc50998b6008a5eb71ea28bdecf90a995b93c 100644 (file)
@@ -502,6 +502,7 @@ static const struct nir_shader_compiler_options nir_options = {
        .lower_unpack_unorm_4x8 = true,
        .lower_extract_byte = true,
        .lower_extract_word = true,
+       .optimize_sample_mask_in = true,
        .max_unroll_iterations = 32,
        .native_integers = true,
 };
index 21fba1c29982b860915e7ff73e4b3364003c0653..97b2831b880a8fc9c96aef95bccb826a46e29c03 100644 (file)
@@ -506,6 +506,7 @@ st_glsl_to_nir_post_opts(struct st_context *st, struct gl_program *prog,
 
    NIR_PASS_V(nir, st_nir_lower_builtin);
    NIR_PASS_V(nir, gl_nir_lower_atomics, shader_program, true);
+   NIR_PASS_V(nir, nir_opt_intrinsics);
 
    nir_variable_mode mask = nir_var_function_temp;
    nir_remove_dead_variables(nir, mask);