From 97f10934edf8ac02f28fccea0bcbbe9293375357 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Mon, 5 Feb 2018 22:54:18 +0100 Subject: [PATCH] ac/nir: Add vote_ieq/vote_feq lowering pass. The old vote_eq implementation supported only booleans, but now we have to support arbitrary values, so use the read_first_invocation intrinsic + ballot. I took this as an opportunity to figure out how easy it was to do this in nir instead of in the nir_to_llvm pass, and it actually turned out pretty okay IMO. Only creating the pass is some extra code. Reviewed-by: Dave Airlie --- src/amd/Makefile.sources | 1 + src/amd/common/ac_lower_subgroups.c | 92 +++++++++++++++++++++++++++++ src/amd/common/ac_nir_to_llvm.c | 8 +-- src/amd/common/ac_nir_to_llvm.h | 2 + src/amd/common/meson.build | 1 + 5 files changed, 99 insertions(+), 5 deletions(-) create mode 100644 src/amd/common/ac_lower_subgroups.c diff --git a/src/amd/Makefile.sources b/src/amd/Makefile.sources index 10c4827e19c..f57dd403c5f 100644 --- a/src/amd/Makefile.sources +++ b/src/amd/Makefile.sources @@ -44,6 +44,7 @@ AMD_COMPILER_FILES = \ common/ac_llvm_helper.cpp \ common/ac_llvm_util.c \ common/ac_llvm_util.h \ + common/ac_lower_subgroups.c \ common/ac_shader_abi.h \ common/ac_shader_info.c \ common/ac_shader_info.h \ diff --git a/src/amd/common/ac_lower_subgroups.c b/src/amd/common/ac_lower_subgroups.c new file mode 100644 index 00000000000..d0782b481b7 --- /dev/null +++ b/src/amd/common/ac_lower_subgroups.c @@ -0,0 +1,92 @@ +/* + * Copyright © 2018 Google Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir/nir.h" +#include "nir/nir_builder.h" + +#include "ac_nir_to_llvm.h" + +static nir_ssa_def *ac_lower_subgroups_intrin(nir_builder *b, nir_intrinsic_instr *intrin) +{ + switch(intrin->intrinsic) { + case nir_intrinsic_vote_ieq: + case nir_intrinsic_vote_feq: { + nir_intrinsic_instr *rfi = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_read_first_invocation); + nir_ssa_dest_init(&rfi->instr, &rfi->dest, + 1, intrin->src[0].ssa->bit_size, NULL); + nir_src_copy(&rfi->src[0], &intrin->src[0], rfi); + rfi->num_components = 1; + + nir_ssa_def *is_ne; + if (intrin->intrinsic == nir_intrinsic_vote_feq) + is_ne = nir_fne(b, &rfi->dest.ssa, intrin->src[0].ssa); + else + is_ne = nir_ine(b, &rfi->dest.ssa, intrin->src[0].ssa); + + nir_intrinsic_instr *ballot = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_ballot); + nir_ssa_dest_init(&ballot->instr, &ballot->dest, + 1, 64, NULL); + ballot->src[0] = nir_src_for_ssa(is_ne); + ballot->num_components = 1; + + return nir_ieq(b, &ballot->dest.ssa, nir_imm_int64(b, 0)); + } + default: + return NULL; + } +} + +bool ac_lower_subgroups(struct nir_shader *shader) +{ + bool progress = false; + + nir_foreach_function(function, shader) { + if (!function->impl) + continue; + + nir_builder b; + nir_builder_init(&b, function->impl); + + nir_foreach_block(block, function->impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + b.cursor = nir_before_instr(instr); + + nir_ssa_def *lower = ac_lower_subgroups_intrin(&b, intrin); + if (!lower) + continue; + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(lower)); + nir_instr_remove(instr); + progress = true; + } + } + } + + return progress; +} diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 4b3b253c439..f6ad51a412a 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -4563,11 +4563,6 @@ static void visit_intrinsic(struct ac_nir_context *ctx, result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, ""); break; } - case nir_intrinsic_vote_ieq: { - LLVMValueRef tmp = ac_build_vote_eq(&ctx->ac, get_src(ctx, instr->src[0])); - result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, ""); - break; - } default: fprintf(stderr, "Unknown intrinsic: "); nir_print_instr(&instr->instr, stderr); @@ -6757,6 +6752,9 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi, struct ac_nir_context ctx = {}; struct nir_function *func; + /* Last minute passes for both radv & radeonsi */ + ac_lower_subgroups(nir); + ctx.ac = *ac; ctx.abi = abi; diff --git a/src/amd/common/ac_nir_to_llvm.h b/src/amd/common/ac_nir_to_llvm.h index 870e7f8988c..983189eb93a 100644 --- a/src/amd/common/ac_nir_to_llvm.h +++ b/src/amd/common/ac_nir_to_llvm.h @@ -235,4 +235,6 @@ void ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class); void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi, struct nir_shader *nir); +bool ac_lower_subgroups(struct nir_shader *shader); + #endif /* AC_NIR_TO_LLVM_H */ diff --git a/src/amd/common/meson.build b/src/amd/common/meson.build index 22c13b955fa..6c5c2f45ac5 100644 --- a/src/amd/common/meson.build +++ b/src/amd/common/meson.build @@ -35,6 +35,7 @@ amd_common_files = files( 'ac_llvm_helper.cpp', 'ac_llvm_util.c', 'ac_llvm_util.h', + 'ac_lower_subgroups.c', 'ac_shader_abi.h', 'ac_shader_info.c', 'ac_shader_info.h', -- 2.30.2