radeonsi/gfx10: use 32-bit wavemasks for Wave32
authorMarek Olšák <marek.olsak@amd.com>
Tue, 16 Jul 2019 04:55:46 +0000 (00:55 -0400)
committerMarek Olšák <marek.olsak@amd.com>
Sat, 20 Jul 2019 00:16:19 +0000 (20:16 -0400)
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
src/amd/common/ac_llvm_build.c
src/amd/common/ac_llvm_build.h
src/amd/common/ac_nir_to_llvm.c
src/gallium/drivers/radeonsi/si_shader.c

index 855ebb3d3dd6d2813e22587083c7ed208c83afa4..250bfc5229ec55e0560eacc6f4462504a7af65a1 100644 (file)
@@ -92,6 +92,7 @@ ac_llvm_context_init(struct ac_llvm_context *ctx,
        ctx->v3f32 = LLVMVectorType(ctx->f32, 3);
        ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
        ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
+       ctx->iN_wavemask = LLVMIntTypeInContext(ctx->context, ctx->wave_size);
 
        ctx->i8_0 = LLVMConstInt(ctx->i8, 0, false);
        ctx->i8_1 = LLVMConstInt(ctx->i8, 1, false);
@@ -447,7 +448,16 @@ LLVMValueRef
 ac_build_ballot(struct ac_llvm_context *ctx,
                LLVMValueRef value)
 {
-       const char *name = HAVE_LLVM >= 0x900 ? "llvm.amdgcn.icmp.i64.i32" : "llvm.amdgcn.icmp.i32";
+       const char *name;
+
+       if (HAVE_LLVM >= 0x900) {
+               if (ctx->wave_size == 64)
+                       name = "llvm.amdgcn.icmp.i64.i32";
+               else
+                       name = "llvm.amdgcn.icmp.i32.i32";
+       } else {
+               name = "llvm.amdgcn.icmp.i32";
+       }
        LLVMValueRef args[3] = {
                value,
                ctx->i32_0,
@@ -461,8 +471,7 @@ ac_build_ballot(struct ac_llvm_context *ctx,
 
        args[0] = ac_to_integer(ctx, args[0]);
 
-       return ac_build_intrinsic(ctx, name,
-                                 ctx->i64, args, 3,
+       return ac_build_intrinsic(ctx, name, ctx->iN_wavemask, args, 3,
                                  AC_FUNC_ATTR_NOUNWIND |
                                  AC_FUNC_ATTR_READNONE |
                                  AC_FUNC_ATTR_CONVERGENT);
@@ -498,7 +507,7 @@ ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef value)
 {
        LLVMValueRef vote_set = ac_build_ballot(ctx, value);
        return LLVMBuildICmp(ctx->builder, LLVMIntNE, vote_set,
-                            LLVMConstInt(ctx->i64, 0, 0), "");
+                            LLVMConstInt(ctx->iN_wavemask, 0, 0), "");
 }
 
 LLVMValueRef
@@ -511,7 +520,7 @@ ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value)
                                         vote_set, active_set, "");
        LLVMValueRef none = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
                                          vote_set,
-                                         LLVMConstInt(ctx->i64, 0, 0), "");
+                                         LLVMConstInt(ctx->iN_wavemask, 0, 0), "");
        return LLVMBuildOr(ctx->builder, all, none, "");
 }
 
@@ -3848,6 +3857,11 @@ ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef v
 LLVMValueRef
 ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask)
 {
+       if (ctx->wave_size == 32) {
+               return ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32,
+                                         (LLVMValueRef []) { mask, ctx->i32_0 },
+                                         2, AC_FUNC_ATTR_READNONE);
+       }
        LLVMValueRef mask_vec = LLVMBuildBitCast(ctx->builder, mask,
                                                 LLVMVectorType(ctx->i32, 2),
                                                 "");
index cc1807221b212ed1c89a5a7287a0bc4d116850fa..8fcede66fb2b4997179d25515ff6fe94ce000969 100644 (file)
@@ -74,6 +74,7 @@ struct ac_llvm_context {
        LLVMTypeRef v3f32;
        LLVMTypeRef v4f32;
        LLVMTypeRef v8i32;
+       LLVMTypeRef iN_wavemask;
 
        LLVMValueRef i8_0;
        LLVMValueRef i8_1;
index 96bf89a8bf939863e4bb1125b4873f45c55bf16c..f69e02f6d0a48971c44aaaa718d06d2209be67cc 100644 (file)
@@ -2809,12 +2809,12 @@ static LLVMValueRef
 visit_first_invocation(struct ac_nir_context *ctx)
 {
        LLVMValueRef active_set = ac_build_ballot(&ctx->ac, ctx->ac.i32_1);
+       const char *intr = ctx->ac.wave_size == 32 ? "llvm.cttz.i32" : "llvm.cttz.i64";
 
        /* The second argument is whether cttz(0) should be defined, but we do not care. */
        LLVMValueRef args[] = {active_set, ctx->ac.i1false};
-       LLVMValueRef result =  ac_build_intrinsic(&ctx->ac,
-                                                 "llvm.cttz.i64",
-                                                 ctx->ac.i64, args, 2,
+       LLVMValueRef result =  ac_build_intrinsic(&ctx->ac, intr,
+                                                 ctx->ac.iN_wavemask, args, 2,
                                                  AC_FUNC_ATTR_NOUNWIND |
                                                  AC_FUNC_ATTR_READNONE);
 
index b41e1e6252205f70d3471af4a82fe7c78f47e961..23a6a7455ec2f07d141c98ebdee07b868a38b0cd 100644 (file)
@@ -2178,8 +2178,12 @@ void si_load_system_value(struct si_shader_context *ctx,
        case TGSI_SEMANTIC_SUBGROUP_EQ_MASK:
        {
                LLVMValueRef id = ac_get_thread_id(&ctx->ac);
-               id = LLVMBuildZExt(ctx->ac.builder, id, ctx->i64, "");
-               value = LLVMBuildShl(ctx->ac.builder, LLVMConstInt(ctx->i64, 1, 0), id, "");
+               if (ctx->ac.wave_size == 64)
+                       id = LLVMBuildZExt(ctx->ac.builder, id, ctx->i64, "");
+               value = LLVMBuildShl(ctx->ac.builder,
+                                    LLVMConstInt(ctx->ac.iN_wavemask, 1, 0), id, "");
+               if (ctx->ac.wave_size == 32)
+                       value = LLVMBuildZExt(ctx->ac.builder, value, ctx->i64, "");
                value = LLVMBuildBitCast(ctx->ac.builder, value, ctx->v2i32, "");
                break;
        }
@@ -2193,16 +2197,19 @@ void si_load_system_value(struct si_shader_context *ctx,
                if (decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_GT_MASK ||
                    decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LE_MASK) {
                        /* All bits set except LSB */
-                       value = LLVMConstInt(ctx->i64, -2, 0);
+                       value = LLVMConstInt(ctx->ac.iN_wavemask, -2, 0);
                } else {
                        /* All bits set */
-                       value = LLVMConstInt(ctx->i64, -1, 0);
+                       value = LLVMConstInt(ctx->ac.iN_wavemask, -1, 0);
                }
-               id = LLVMBuildZExt(ctx->ac.builder, id, ctx->i64, "");
+               if (ctx->ac.wave_size == 64)
+                       id = LLVMBuildZExt(ctx->ac.builder, id, ctx->i64, "");
                value = LLVMBuildShl(ctx->ac.builder, value, id, "");
                if (decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LE_MASK ||
                    decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LT_MASK)
                        value = LLVMBuildNot(ctx->ac.builder, value, "");
+               if (ctx->ac.wave_size == 32)
+                       value = LLVMBuildZExt(ctx->ac.builder, value, ctx->i64, "");
                value = LLVMBuildBitCast(ctx->ac.builder, value, ctx->v2i32, "");
                break;
        }
@@ -4186,10 +4193,15 @@ static void ballot_emit(
 
        tmp = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
        tmp = ac_build_ballot(&ctx->ac, tmp);
-       tmp = LLVMBuildBitCast(builder, tmp, ctx->v2i32, "");
 
-       emit_data->output[0] = LLVMBuildExtractElement(builder, tmp, ctx->i32_0, "");
-       emit_data->output[1] = LLVMBuildExtractElement(builder, tmp, ctx->i32_1, "");
+       emit_data->output[0] = LLVMBuildTrunc(builder, tmp, ctx->i32, "");
+
+       if (ctx->ac.wave_size == 32) {
+               emit_data->output[1] = ctx->i32_0;
+       } else {
+               tmp = LLVMBuildLShr(builder, tmp, LLVMConstInt(ctx->i64, 32, 0), "");
+               emit_data->output[1] = LLVMBuildTrunc(builder, tmp, ctx->i32, "");
+       }
 }
 
 static void read_lane_emit(