nir: Add system values from ARB_shader_ballot
authorMatt Turner <mattst88@gmail.com>
Thu, 22 Jun 2017 23:37:51 +0000 (16:37 -0700)
committerMatt Turner <mattst88@gmail.com>
Thu, 20 Jul 2017 23:56:49 +0000 (16:56 -0700)
We already had a channel_num system value, which I'm renaming to
subgroup_invocation to match the rest of the new system values.

Note that while ballotARB(true) will return zeros in the high 32-bits on
systems where gl_SubGroupSizeARB <= 32, the gl_SubGroup??MaskARB
variables do not consider whether channels are enabled. See issue (1) of
ARB_shader_ballot.

Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/compiler/nir/nir.c
src/compiler/nir/nir.h
src/compiler/nir/nir_intrinsics.h
src/compiler/nir/nir_lower_system_values.c
src/compiler/nir/nir_opt_intrinsics.c
src/intel/compiler/brw_fs_nir.cpp
src/intel/compiler/brw_nir_intrinsics.c

index 491b908396c07664b38ca17dee6b6c6a2323645a..b608b2a0d64ccadd02505e50636dbd5af1e60825 100644 (file)
@@ -1908,6 +1908,20 @@ nir_intrinsic_from_system_value(gl_system_value val)
       return nir_intrinsic_load_helper_invocation;
    case SYSTEM_VALUE_VIEW_INDEX:
       return nir_intrinsic_load_view_index;
+   case SYSTEM_VALUE_SUBGROUP_SIZE:
+      return nir_intrinsic_load_subgroup_size;
+   case SYSTEM_VALUE_SUBGROUP_INVOCATION:
+      return nir_intrinsic_load_subgroup_invocation;
+   case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
+       return nir_intrinsic_load_subgroup_eq_mask;
+   case SYSTEM_VALUE_SUBGROUP_GE_MASK:
+       return nir_intrinsic_load_subgroup_ge_mask;
+   case SYSTEM_VALUE_SUBGROUP_GT_MASK:
+       return nir_intrinsic_load_subgroup_gt_mask;
+   case SYSTEM_VALUE_SUBGROUP_LE_MASK:
+       return nir_intrinsic_load_subgroup_le_mask;
+   case SYSTEM_VALUE_SUBGROUP_LT_MASK:
+       return nir_intrinsic_load_subgroup_lt_mask;
    default:
       unreachable("system value does not directly correspond to intrinsic");
    }
@@ -1961,6 +1975,20 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
       return SYSTEM_VALUE_HELPER_INVOCATION;
    case nir_intrinsic_load_view_index:
       return SYSTEM_VALUE_VIEW_INDEX;
+   case SYSTEM_VALUE_SUBGROUP_SIZE:
+      return nir_intrinsic_load_subgroup_size;
+   case SYSTEM_VALUE_SUBGROUP_INVOCATION:
+      return nir_intrinsic_load_subgroup_invocation;
+   case nir_intrinsic_load_subgroup_eq_mask:
+      return SYSTEM_VALUE_SUBGROUP_EQ_MASK;
+   case nir_intrinsic_load_subgroup_ge_mask:
+      return SYSTEM_VALUE_SUBGROUP_GE_MASK;
+   case nir_intrinsic_load_subgroup_gt_mask:
+      return SYSTEM_VALUE_SUBGROUP_GT_MASK;
+   case nir_intrinsic_load_subgroup_le_mask:
+      return SYSTEM_VALUE_SUBGROUP_LE_MASK;
+   case nir_intrinsic_load_subgroup_lt_mask:
+      return SYSTEM_VALUE_SUBGROUP_LT_MASK;
    default:
       unreachable("intrinsic doesn't produce a system value");
    }
index c5e5358aaaabb9b6ace778381190e164ba43a252..860900695200e2e9d6adeb3dedadc8d1c4061866 100644 (file)
@@ -1822,6 +1822,7 @@ typedef struct nir_shader_compiler_options {
    bool lower_extract_word;
 
    bool lower_vote_trivial;
+   bool lower_subgroup_masks;
 
    /**
     * Does the driver support real 32-bit integers?  (Otherwise, integers
index 6c6ba4cf59081de9cd6ab5510fe09ecfdeb68c80..96ecfbc338df5a734be0101f048a353f7d843b1d 100644 (file)
@@ -344,10 +344,16 @@ SYSTEM_VALUE(work_group_id, 3, 0, xx, xx, xx)
 SYSTEM_VALUE(user_clip_plane, 4, 1, UCP_ID, xx, xx)
 SYSTEM_VALUE(num_work_groups, 3, 0, xx, xx, xx)
 SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx)
-SYSTEM_VALUE(channel_num, 1, 0, xx, xx, xx)
 SYSTEM_VALUE(alpha_ref_float, 1, 0, xx, xx, xx)
 SYSTEM_VALUE(layer_id, 1, 0, xx, xx, xx)
 SYSTEM_VALUE(view_index, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(subgroup_size, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(subgroup_invocation, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(subgroup_eq_mask, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(subgroup_ge_mask, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(subgroup_gt_mask, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(subgroup_le_mask, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(subgroup_lt_mask, 1, 0, xx, xx, xx)
 
 /* Blend constant color values.  Float values are clamped. */
 SYSTEM_VALUE(blend_const_color_r_float, 1, 0, xx, xx, xx)
index 810100a08160303b8a39da5c3787bf411767331d..ba20d3083f18a807c2f5967c857b1d1f6b0d5927 100644 (file)
@@ -116,6 +116,20 @@ convert_block(nir_block *block, nir_builder *b)
                            nir_load_base_instance(b));
          break;
 
+      case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
+      case SYSTEM_VALUE_SUBGROUP_GE_MASK:
+      case SYSTEM_VALUE_SUBGROUP_GT_MASK:
+      case SYSTEM_VALUE_SUBGROUP_LE_MASK:
+      case SYSTEM_VALUE_SUBGROUP_LT_MASK: {
+         nir_intrinsic_op op =
+            nir_intrinsic_from_system_value(var->data.location);
+         nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op);
+         nir_ssa_dest_init(&load->instr, &load->dest, 1, 64, NULL);
+         nir_builder_instr_insert(b, &load->instr);
+         sysval = &load->dest.ssa;
+         break;
+      }
+
       default:
          break;
       }
index bdb46e9300bc94ed523d96336e04374e8ed403eb..4f36166510b85f48ab57a7e375d8a646eef7358b 100644 (file)
@@ -62,6 +62,37 @@ opt_intrinsics_impl(nir_function_impl *impl)
             replacement = nir_imm_int(&b, NIR_TRUE);
             break;
          }
+         case nir_intrinsic_load_subgroup_eq_mask:
+         case nir_intrinsic_load_subgroup_ge_mask:
+         case nir_intrinsic_load_subgroup_gt_mask:
+         case nir_intrinsic_load_subgroup_le_mask:
+         case nir_intrinsic_load_subgroup_lt_mask: {
+            if (!b.shader->options->lower_subgroup_masks)
+               break;
+
+            nir_ssa_def *count = nir_load_subgroup_invocation(&b);
+
+            switch (intrin->intrinsic) {
+            case nir_intrinsic_load_subgroup_eq_mask:
+               replacement = nir_ishl(&b, nir_imm_int64(&b, 1ull), count);
+               break;
+            case nir_intrinsic_load_subgroup_ge_mask:
+               replacement = nir_ishl(&b, nir_imm_int64(&b, ~0ull), count);
+               break;
+            case nir_intrinsic_load_subgroup_gt_mask:
+               replacement = nir_ishl(&b, nir_imm_int64(&b, ~1ull), count);
+               break;
+            case nir_intrinsic_load_subgroup_le_mask:
+               replacement = nir_inot(&b, nir_ishl(&b, nir_imm_int64(&b, ~1ull), count));
+               break;
+            case nir_intrinsic_load_subgroup_lt_mask:
+               replacement = nir_inot(&b, nir_ishl(&b, nir_imm_int64(&b, ~0ull), count));
+               break;
+            default:
+               unreachable("you seriously can't tell this is unreachable?");
+            }
+            break;
+         }
          default:
             break;
          }
index 607cb01c8b755eac6a89c83291d60faa5c0ca61a..491f5184e3922c513ddc749ada9fe20b5ead0c37 100644 (file)
@@ -4103,7 +4103,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       break;
    }
 
-   case nir_intrinsic_load_channel_num: {
+   case nir_intrinsic_load_subgroup_invocation: {
       fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UW);
       dest = retype(dest, BRW_REGISTER_TYPE_UD);
       const fs_builder allbld8 = bld.group(8, 0).exec_all();
index d63570fa2a78adb967ccad9490277648e970f154..abbbc6f93ec9d1cd2d7b1871b333fc4c9ff2f635 100644 (file)
@@ -88,10 +88,10 @@ lower_cs_intrinsics_convert_block(struct lower_intrinsics_state *state,
          /* We construct the local invocation index from:
           *
           *    gl_LocalInvocationIndex =
-          *       cs_thread_local_id + channel_num;
+          *       cs_thread_local_id + subgroup_invocation;
           */
          nir_ssa_def *thread_local_id = read_thread_local_id(state);
-         nir_ssa_def *channel = nir_load_channel_num(b);
+         nir_ssa_def *channel = nir_load_subgroup_invocation(b);
          sysval = nir_iadd(b, channel, thread_local_id);
          break;
       }