radeonsi: implement ARB_shader_group_vote
authorNicolai Hähnle <nicolai.haehnle@amd.com>
Wed, 29 Mar 2017 18:29:37 +0000 (20:29 +0200)
committerNicolai Hähnle <nicolai.haehnle@amd.com>
Fri, 31 Mar 2017 05:56:27 +0000 (07:56 +0200)
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
docs/features.txt
docs/relnotes/17.1.0.html
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/radeonsi/si_shader.c

index d707f0118501d9207f50db106799941a6e9b3f8b..1e145e1ddad40478b332ddf9fa6ec492f7d408a5 100644 (file)
@@ -295,7 +295,7 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve
   GL_ARB_shader_ballot                                  not started
   GL_ARB_shader_clock                                   DONE (i965/gen7+, radeonsi)
   GL_ARB_shader_draw_parameters                         DONE (i965, nvc0, radeonsi)
-  GL_ARB_shader_group_vote                              DONE (nvc0)
+  GL_ARB_shader_group_vote                              DONE (nvc0, radeonsi)
   GL_ARB_shader_stencil_export                          DONE (i965/gen9+, radeonsi, softpipe, llvmpipe, swr)
   GL_ARB_shader_viewport_layer_array                    DONE (i965/gen6+)
   GL_ARB_sparse_buffer                                  not started
index c2a0166f72c13b20f55f452534f3b168e3309a7b..a11a37ffb8c11381129b016ab2e64d79a3d3ba32 100644 (file)
@@ -46,6 +46,7 @@ Note: some of the new features are only available with certain drivers.
 <ul>
 <li>GL_ARB_gpu_shader_int64 on i965/gen8+, nvc0, radeonsi, softpipe, llvmpipe</li>
 <li>GL_ARB_shader_clock on radeonsi</li>
+<li>GL_ARB_shader_group_vote on radeonsi</li>
 <li>GL_ARB_transform_feedback2 on i965/gen6</li>
 <li>GL_ARB_transform_feedback_overflow_query on i965/gen6+</li>
 <li>Geometry shaders enabled on swr</li>
index 2369471aa5887fc22328b4824ad6fe465296be3f..8aae11d3be457ad914576e2b256e95e979ec8a02 100644 (file)
@@ -424,6 +424,9 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
        case PIPE_CAP_TGSI_CLOCK:
                return HAVE_LLVM >= 0x0309;
 
+       case PIPE_CAP_TGSI_VOTE:
+               return HAVE_LLVM >= 0x0400;
+
        case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
                return !SI_BIG_ENDIAN && sscreen->b.info.has_userptr;
 
@@ -480,7 +483,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
        case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
        case PIPE_CAP_VERTEXID_NOBASE:
        case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
-       case PIPE_CAP_TGSI_VOTE:
        case PIPE_CAP_MAX_WINDOW_RECTANGLES:
        case PIPE_CAP_NATIVE_FENCE_FD:
        case PIPE_CAP_TGSI_FS_FBFETCH:
index ec063ad9028772f6961d6eff038b4ae489ff79d8..874535a6b773c28d6f984a80065e721150b226e4 100644 (file)
@@ -5125,6 +5125,84 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
        }
 }
 
+static LLVMValueRef si_emit_ballot(struct si_shader_context *ctx,
+                                  LLVMValueRef value)
+{
+       struct gallivm_state *gallivm = &ctx->gallivm;
+       LLVMValueRef args[3] = {
+               value,
+               ctx->i32_0,
+               LLVMConstInt(ctx->i32, LLVMIntNE, 0)
+       };
+
+       if (LLVMTypeOf(value) != ctx->i32)
+               args[0] = LLVMBuildBitCast(gallivm->builder, value, ctx->i32, "");
+
+       return lp_build_intrinsic(gallivm->builder,
+                                 "llvm.amdgcn.icmp.i32",
+                                 ctx->i64, args, 3,
+                                 LP_FUNC_ATTR_NOUNWIND |
+                                 LP_FUNC_ATTR_READNONE |
+                                 LP_FUNC_ATTR_CONVERGENT);
+}
+
+static void vote_all_emit(
+       const struct lp_build_tgsi_action *action,
+       struct lp_build_tgsi_context *bld_base,
+       struct lp_build_emit_data *emit_data)
+{
+       struct si_shader_context *ctx = si_shader_context(bld_base);
+       struct gallivm_state *gallivm = &ctx->gallivm;
+       LLVMValueRef active_set, vote_set;
+       LLVMValueRef tmp;
+
+       active_set = si_emit_ballot(ctx, ctx->i32_1);
+       vote_set = si_emit_ballot(ctx, emit_data->args[0]);
+
+       tmp = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, vote_set, active_set, "");
+       emit_data->output[emit_data->chan] =
+               LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, "");
+}
+
+static void vote_any_emit(
+       const struct lp_build_tgsi_action *action,
+       struct lp_build_tgsi_context *bld_base,
+       struct lp_build_emit_data *emit_data)
+{
+       struct si_shader_context *ctx = si_shader_context(bld_base);
+       struct gallivm_state *gallivm = &ctx->gallivm;
+       LLVMValueRef vote_set;
+       LLVMValueRef tmp;
+
+       vote_set = si_emit_ballot(ctx, emit_data->args[0]);
+
+       tmp = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
+                           vote_set, LLVMConstInt(ctx->i64, 0, 0), "");
+       emit_data->output[emit_data->chan] =
+               LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, "");
+}
+
+static void vote_eq_emit(
+       const struct lp_build_tgsi_action *action,
+       struct lp_build_tgsi_context *bld_base,
+       struct lp_build_emit_data *emit_data)
+{
+       struct si_shader_context *ctx = si_shader_context(bld_base);
+       struct gallivm_state *gallivm = &ctx->gallivm;
+       LLVMValueRef active_set, vote_set;
+       LLVMValueRef all, none, tmp;
+
+       active_set = si_emit_ballot(ctx, ctx->i32_1);
+       vote_set = si_emit_ballot(ctx, emit_data->args[0]);
+
+       all = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, vote_set, active_set, "");
+       none = LLVMBuildICmp(gallivm->builder, LLVMIntEQ,
+                            vote_set, LLVMConstInt(ctx->i64, 0, 0), "");
+       tmp = LLVMBuildOr(gallivm->builder, all, none, "");
+       emit_data->output[emit_data->chan] =
+               LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, "");
+}
+
 static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base,
                                       struct lp_build_emit_data *emit_data)
 {
@@ -6574,6 +6652,10 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
        bld_base->op_actions[TGSI_OPCODE_DDX_FINE].emit = si_llvm_emit_ddxy;
        bld_base->op_actions[TGSI_OPCODE_DDY_FINE].emit = si_llvm_emit_ddxy;
 
+       bld_base->op_actions[TGSI_OPCODE_VOTE_ALL].emit = vote_all_emit;
+       bld_base->op_actions[TGSI_OPCODE_VOTE_ANY].emit = vote_any_emit;
+       bld_base->op_actions[TGSI_OPCODE_VOTE_EQ].emit = vote_eq_emit;
+
        bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex;
        bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive;
        bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;