From 43ef75b394f1cd779a54a22fe16fbb5ef23f0458 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Thu, 22 Jun 2017 16:37:51 -0700 Subject: [PATCH] nir: Add system values from ARB_shader_ballot We already had a channel_num system value, which I'm renaming to subgroup_invocation to match the rest of the new system values. Note that while ballotARB(true) will return zeros in the high 32-bits on systems where gl_SubGroupSizeARB <= 32, the gl_SubGroup??MaskARB variables do not consider whether channels are enabled. See issue (1) of ARB_shader_ballot. Reviewed-by: Connor Abbott Reviewed-by: Kenneth Graunke --- src/compiler/nir/nir.c | 28 +++++++++++++++++++ src/compiler/nir/nir.h | 1 + src/compiler/nir/nir_intrinsics.h | 8 +++++- src/compiler/nir/nir_lower_system_values.c | 14 ++++++++++ src/compiler/nir/nir_opt_intrinsics.c | 31 ++++++++++++++++++++++ src/intel/compiler/brw_fs_nir.cpp | 2 +- src/intel/compiler/brw_nir_intrinsics.c | 4 +-- 7 files changed, 84 insertions(+), 4 deletions(-) diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c index 491b908396c..b608b2a0d64 100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@ -1908,6 +1908,20 @@ nir_intrinsic_from_system_value(gl_system_value val) return nir_intrinsic_load_helper_invocation; case SYSTEM_VALUE_VIEW_INDEX: return nir_intrinsic_load_view_index; + case SYSTEM_VALUE_SUBGROUP_SIZE: + return nir_intrinsic_load_subgroup_size; + case SYSTEM_VALUE_SUBGROUP_INVOCATION: + return nir_intrinsic_load_subgroup_invocation; + case SYSTEM_VALUE_SUBGROUP_EQ_MASK: + return nir_intrinsic_load_subgroup_eq_mask; + case SYSTEM_VALUE_SUBGROUP_GE_MASK: + return nir_intrinsic_load_subgroup_ge_mask; + case SYSTEM_VALUE_SUBGROUP_GT_MASK: + return nir_intrinsic_load_subgroup_gt_mask; + case SYSTEM_VALUE_SUBGROUP_LE_MASK: + return nir_intrinsic_load_subgroup_le_mask; + case SYSTEM_VALUE_SUBGROUP_LT_MASK: + return nir_intrinsic_load_subgroup_lt_mask; default: unreachable("system value does not directly correspond to intrinsic"); } @@ -1961,6 +1975,20 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin) return SYSTEM_VALUE_HELPER_INVOCATION; case nir_intrinsic_load_view_index: return SYSTEM_VALUE_VIEW_INDEX; + case SYSTEM_VALUE_SUBGROUP_SIZE: + return nir_intrinsic_load_subgroup_size; + case SYSTEM_VALUE_SUBGROUP_INVOCATION: + return nir_intrinsic_load_subgroup_invocation; + case nir_intrinsic_load_subgroup_eq_mask: + return SYSTEM_VALUE_SUBGROUP_EQ_MASK; + case nir_intrinsic_load_subgroup_ge_mask: + return SYSTEM_VALUE_SUBGROUP_GE_MASK; + case nir_intrinsic_load_subgroup_gt_mask: + return SYSTEM_VALUE_SUBGROUP_GT_MASK; + case nir_intrinsic_load_subgroup_le_mask: + return SYSTEM_VALUE_SUBGROUP_LE_MASK; + case nir_intrinsic_load_subgroup_lt_mask: + return SYSTEM_VALUE_SUBGROUP_LT_MASK; default: unreachable("intrinsic doesn't produce a system value"); } diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index c5e5358aaaa..86090069520 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1822,6 +1822,7 @@ typedef struct nir_shader_compiler_options { bool lower_extract_word; bool lower_vote_trivial; + bool lower_subgroup_masks; /** * Does the driver support real 32-bit integers? (Otherwise, integers diff --git a/src/compiler/nir/nir_intrinsics.h b/src/compiler/nir/nir_intrinsics.h index 6c6ba4cf590..96ecfbc338d 100644 --- a/src/compiler/nir/nir_intrinsics.h +++ b/src/compiler/nir/nir_intrinsics.h @@ -344,10 +344,16 @@ SYSTEM_VALUE(work_group_id, 3, 0, xx, xx, xx) SYSTEM_VALUE(user_clip_plane, 4, 1, UCP_ID, xx, xx) SYSTEM_VALUE(num_work_groups, 3, 0, xx, xx, xx) SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx) -SYSTEM_VALUE(channel_num, 1, 0, xx, xx, xx) SYSTEM_VALUE(alpha_ref_float, 1, 0, xx, xx, xx) SYSTEM_VALUE(layer_id, 1, 0, xx, xx, xx) SYSTEM_VALUE(view_index, 1, 0, xx, xx, xx) +SYSTEM_VALUE(subgroup_size, 1, 0, xx, xx, xx) +SYSTEM_VALUE(subgroup_invocation, 1, 0, xx, xx, xx) +SYSTEM_VALUE(subgroup_eq_mask, 1, 0, xx, xx, xx) +SYSTEM_VALUE(subgroup_ge_mask, 1, 0, xx, xx, xx) +SYSTEM_VALUE(subgroup_gt_mask, 1, 0, xx, xx, xx) +SYSTEM_VALUE(subgroup_le_mask, 1, 0, xx, xx, xx) +SYSTEM_VALUE(subgroup_lt_mask, 1, 0, xx, xx, xx) /* Blend constant color values. Float values are clamped. */ SYSTEM_VALUE(blend_const_color_r_float, 1, 0, xx, xx, xx) diff --git a/src/compiler/nir/nir_lower_system_values.c b/src/compiler/nir/nir_lower_system_values.c index 810100a0816..ba20d3083f1 100644 --- a/src/compiler/nir/nir_lower_system_values.c +++ b/src/compiler/nir/nir_lower_system_values.c @@ -116,6 +116,20 @@ convert_block(nir_block *block, nir_builder *b) nir_load_base_instance(b)); break; + case SYSTEM_VALUE_SUBGROUP_EQ_MASK: + case SYSTEM_VALUE_SUBGROUP_GE_MASK: + case SYSTEM_VALUE_SUBGROUP_GT_MASK: + case SYSTEM_VALUE_SUBGROUP_LE_MASK: + case SYSTEM_VALUE_SUBGROUP_LT_MASK: { + nir_intrinsic_op op = + nir_intrinsic_from_system_value(var->data.location); + nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op); + nir_ssa_dest_init(&load->instr, &load->dest, 1, 64, NULL); + nir_builder_instr_insert(b, &load->instr); + sysval = &load->dest.ssa; + break; + } + default: break; } diff --git a/src/compiler/nir/nir_opt_intrinsics.c b/src/compiler/nir/nir_opt_intrinsics.c index bdb46e9300b..4f36166510b 100644 --- a/src/compiler/nir/nir_opt_intrinsics.c +++ b/src/compiler/nir/nir_opt_intrinsics.c @@ -62,6 +62,37 @@ opt_intrinsics_impl(nir_function_impl *impl) replacement = nir_imm_int(&b, NIR_TRUE); break; } + case nir_intrinsic_load_subgroup_eq_mask: + case nir_intrinsic_load_subgroup_ge_mask: + case nir_intrinsic_load_subgroup_gt_mask: + case nir_intrinsic_load_subgroup_le_mask: + case nir_intrinsic_load_subgroup_lt_mask: { + if (!b.shader->options->lower_subgroup_masks) + break; + + nir_ssa_def *count = nir_load_subgroup_invocation(&b); + + switch (intrin->intrinsic) { + case nir_intrinsic_load_subgroup_eq_mask: + replacement = nir_ishl(&b, nir_imm_int64(&b, 1ull), count); + break; + case nir_intrinsic_load_subgroup_ge_mask: + replacement = nir_ishl(&b, nir_imm_int64(&b, ~0ull), count); + break; + case nir_intrinsic_load_subgroup_gt_mask: + replacement = nir_ishl(&b, nir_imm_int64(&b, ~1ull), count); + break; + case nir_intrinsic_load_subgroup_le_mask: + replacement = nir_inot(&b, nir_ishl(&b, nir_imm_int64(&b, ~1ull), count)); + break; + case nir_intrinsic_load_subgroup_lt_mask: + replacement = nir_inot(&b, nir_ishl(&b, nir_imm_int64(&b, ~0ull), count)); + break; + default: + unreachable("you seriously can't tell this is unreachable?"); + } + break; + } default: break; } diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 607cb01c8b7..491f5184e39 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -4103,7 +4103,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } - case nir_intrinsic_load_channel_num: { + case nir_intrinsic_load_subgroup_invocation: { fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UW); dest = retype(dest, BRW_REGISTER_TYPE_UD); const fs_builder allbld8 = bld.group(8, 0).exec_all(); diff --git a/src/intel/compiler/brw_nir_intrinsics.c b/src/intel/compiler/brw_nir_intrinsics.c index d63570fa2a7..abbbc6f93ec 100644 --- a/src/intel/compiler/brw_nir_intrinsics.c +++ b/src/intel/compiler/brw_nir_intrinsics.c @@ -88,10 +88,10 @@ lower_cs_intrinsics_convert_block(struct lower_intrinsics_state *state, /* We construct the local invocation index from: * * gl_LocalInvocationIndex = - * cs_thread_local_id + channel_num; + * cs_thread_local_id + subgroup_invocation; */ nir_ssa_def *thread_local_id = read_thread_local_id(state); - nir_ssa_def *channel = nir_load_channel_num(b); + nir_ssa_def *channel = nir_load_subgroup_invocation(b); sysval = nir_iadd(b, channel, thread_local_id); break; } -- 2.30.2