From d1c8aeb24f9a28ba95290c0e712e152eeeb64d09 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 23 Dec 2019 19:26:46 -0500 Subject: [PATCH] ac: unify primitive export code Reviewed-by: Pierre-Eric Pelloux-Prayer --- src/amd/llvm/ac_llvm_build.c | 53 +++++++++++++++ src/amd/llvm/ac_llvm_build.h | 13 ++++ src/amd/vulkan/radv_nir_to_llvm.c | 60 ++--------------- .../drivers/radeonsi/gfx10_shader_ngg.c | 65 ++----------------- 4 files changed, 78 insertions(+), 113 deletions(-) diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c index 3df941b1f59..a66272873f0 100644 --- a/src/amd/llvm/ac_llvm_build.c +++ b/src/amd/llvm/ac_llvm_build.c @@ -4748,6 +4748,59 @@ void ac_build_sendmsg_gs_alloc_req(struct ac_llvm_context *ctx, LLVMValueRef wav ac_build_endif(ctx, 5020); } +LLVMValueRef ac_pack_prim_export(struct ac_llvm_context *ctx, + const struct ac_ngg_prim *prim) +{ + /* The prim export format is: + * - bits 0..8: index 0 + * - bit 9: edge flag 0 + * - bits 10..18: index 1 + * - bit 19: edge flag 1 + * - bits 20..28: index 2 + * - bit 29: edge flag 2 + * - bit 31: null primitive (skip) + */ + LLVMBuilderRef builder = ctx->builder; + LLVMValueRef tmp = LLVMBuildZExt(builder, prim->isnull, ctx->i32, ""); + LLVMValueRef result = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->i32, 31, false), ""); + + for (unsigned i = 0; i < prim->num_vertices; ++i) { + tmp = LLVMBuildShl(builder, prim->index[i], + LLVMConstInt(ctx->i32, 10 * i, false), ""); + result = LLVMBuildOr(builder, result, tmp, ""); + tmp = LLVMBuildZExt(builder, prim->edgeflag[i], ctx->i32, ""); + tmp = LLVMBuildShl(builder, tmp, + LLVMConstInt(ctx->i32, 10 * i + 9, false), ""); + result = LLVMBuildOr(builder, result, tmp, ""); + } + return result; +} + +void ac_build_export_prim(struct ac_llvm_context *ctx, + const struct ac_ngg_prim *prim) +{ + struct ac_export_args args; + + if (prim->passthrough) { + args.out[0] = prim->passthrough; + } else { + args.out[0] = ac_pack_prim_export(ctx, prim); + } + + args.out[0] = LLVMBuildBitCast(ctx->builder, args.out[0], ctx->f32, ""); + args.out[1] = LLVMGetUndef(ctx->f32); + args.out[2] = LLVMGetUndef(ctx->f32); + args.out[3] = LLVMGetUndef(ctx->f32); + + args.target = V_008DFC_SQ_EXP_PRIM; + args.enabled_channels = 1; + args.done = true; + args.valid_mask = false; + args.compr = false; + + ac_build_export(ctx, &args); +} + static LLVMTypeRef arg_llvm_type(enum ac_arg_type type, unsigned size, struct ac_llvm_context *ctx) { diff --git a/src/amd/llvm/ac_llvm_build.h b/src/amd/llvm/ac_llvm_build.h index 9e216a80956..8672977bd67 100644 --- a/src/amd/llvm/ac_llvm_build.h +++ b/src/amd/llvm/ac_llvm_build.h @@ -750,6 +750,19 @@ ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth, void ac_build_sendmsg_gs_alloc_req(struct ac_llvm_context *ctx, LLVMValueRef wave_id, LLVMValueRef vtx_cnt, LLVMValueRef prim_cnt); +struct ac_ngg_prim { + unsigned num_vertices; + LLVMValueRef isnull; + LLVMValueRef index[3]; + LLVMValueRef edgeflag[3]; + LLVMValueRef passthrough; +}; + +LLVMValueRef ac_pack_prim_export(struct ac_llvm_context *ctx, + const struct ac_ngg_prim *prim); +void ac_build_export_prim(struct ac_llvm_context *ctx, + const struct ac_ngg_prim *prim); + static inline LLVMValueRef ac_get_arg(struct ac_llvm_context *ctx, struct ac_arg arg) { diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 3aeba178316..5a321a0677f 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -2371,47 +2371,6 @@ ngg_gs_emit_vertex_ptr(struct radv_shader_context *ctx, LLVMValueRef gsthread, return ngg_gs_vertex_ptr(ctx, vertexidx); } -struct ngg_prim { - unsigned num_vertices; - LLVMValueRef isnull; - LLVMValueRef index[3]; - LLVMValueRef edgeflag[3]; -}; - -static void build_export_prim(struct radv_shader_context *ctx, - const struct ngg_prim *prim) -{ - LLVMBuilderRef builder = ctx->ac.builder; - struct ac_export_args args; - LLVMValueRef tmp; - - tmp = LLVMBuildZExt(builder, prim->isnull, ctx->ac.i32, ""); - args.out[0] = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->ac.i32, 31, false), ""); - - for (unsigned i = 0; i < prim->num_vertices; ++i) { - tmp = LLVMBuildShl(builder, prim->index[i], - LLVMConstInt(ctx->ac.i32, 10 * i, false), ""); - args.out[0] = LLVMBuildOr(builder, args.out[0], tmp, ""); - tmp = LLVMBuildZExt(builder, prim->edgeflag[i], ctx->ac.i32, ""); - tmp = LLVMBuildShl(builder, tmp, - LLVMConstInt(ctx->ac.i32, 10 * i + 9, false), ""); - args.out[0] = LLVMBuildOr(builder, args.out[0], tmp, ""); - } - - args.out[0] = LLVMBuildBitCast(builder, args.out[0], ctx->ac.f32, ""); - args.out[1] = LLVMGetUndef(ctx->ac.f32); - args.out[2] = LLVMGetUndef(ctx->ac.f32); - args.out[3] = LLVMGetUndef(ctx->ac.f32); - - args.target = V_008DFC_SQ_EXP_PRIM; - args.enabled_channels = 1; - args.done = true; - args.valid_mask = false; - args.compr = false; - - ac_build_export(&ctx->ac, &args); -} - static struct radv_stream_output * radv_get_stream_output_by_loc(struct radv_streamout_info *so, unsigned location) { @@ -3002,14 +2961,7 @@ handle_ngg_outputs_post_2(struct radv_shader_context *ctx) ngg_get_vtx_cnt(ctx), ngg_get_prim_cnt(ctx)); /* TODO: streamout queries */ - /* Export primitive data to the index buffer. Format is: - * - bits 0..8: index 0 - * - bit 9: edge flag 0 - * - bits 10..18: index 1 - * - bit 19: edge flag 1 - * - bits 20..28: index 2 - * - bit 29: edge flag 2 - * - bit 31: null primitive (skip) + /* Export primitive data to the index buffer. * * For the first version, we will always build up all three indices * independent of the primitive type. The additional garbage data @@ -3020,7 +2972,7 @@ handle_ngg_outputs_post_2(struct radv_shader_context *ctx) */ ac_build_ifcc(&ctx->ac, is_gs_thread, 6001); { - struct ngg_prim prim = {}; + struct ac_ngg_prim prim = {}; prim.num_vertices = num_vertices; prim.isnull = ctx->ac.i1false; @@ -3033,7 +2985,7 @@ handle_ngg_outputs_post_2(struct radv_shader_context *ctx) prim.edgeflag[i] = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, ""); } - build_export_prim(ctx, &prim); + ac_build_export_prim(&ctx->ac, &prim); } ac_build_endif(&ctx->ac, 6001); @@ -3323,7 +3275,7 @@ static void gfx10_ngg_gs_emit_epilogue_2(struct radv_shader_context *ctx) ac_build_ifcc(&ctx->ac, tmp, 5140); { LLVMValueRef flags; - struct ngg_prim prim = {}; + struct ac_ngg_prim prim = {}; prim.num_vertices = verts_per_prim; tmp = ngg_gs_vertex_ptr(ctx, tid); @@ -3352,7 +3304,7 @@ static void gfx10_ngg_gs_emit_epilogue_2(struct radv_shader_context *ctx) LLVMValueRef is_odd = LLVMBuildLShr(builder, flags, ctx->ac.i8_1, ""); is_odd = LLVMBuildTrunc(builder, is_odd, ctx->ac.i1, ""); - struct ngg_prim in = prim; + struct ac_ngg_prim in = prim; prim.index[0] = in.index[0]; prim.index[1] = LLVMBuildSelect(builder, is_odd, in.index[2], in.index[1], ""); @@ -3360,7 +3312,7 @@ static void gfx10_ngg_gs_emit_epilogue_2(struct radv_shader_context *ctx) in.index[1], in.index[2], ""); } - build_export_prim(ctx, &prim); + ac_build_export_prim(&ctx->ac, &prim); } ac_build_endif(&ctx->ac, 5140); diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index d403383b09b..6bc5c99d02b 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -71,52 +71,6 @@ static LLVMValueRef ngg_get_query_buf(struct si_shader_context *ctx) LLVMConstInt(ctx->i32, GFX10_GS_QUERY_BUF, false)); } -struct ngg_prim { - unsigned num_vertices; - LLVMValueRef isnull; - LLVMValueRef index[3]; - LLVMValueRef edgeflag[3]; - LLVMValueRef passthrough; -}; - -static void build_export_prim(struct si_shader_context *ctx, - const struct ngg_prim *prim) -{ - LLVMBuilderRef builder = ctx->ac.builder; - struct ac_export_args args; - LLVMValueRef tmp; - - if (prim->passthrough) { - args.out[0] = prim->passthrough; - } else { - tmp = LLVMBuildZExt(builder, prim->isnull, ctx->ac.i32, ""); - args.out[0] = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->ac.i32, 31, false), ""); - - for (unsigned i = 0; i < prim->num_vertices; ++i) { - tmp = LLVMBuildShl(builder, prim->index[i], - LLVMConstInt(ctx->ac.i32, 10 * i, false), ""); - args.out[0] = LLVMBuildOr(builder, args.out[0], tmp, ""); - tmp = LLVMBuildZExt(builder, prim->edgeflag[i], ctx->ac.i32, ""); - tmp = LLVMBuildShl(builder, tmp, - LLVMConstInt(ctx->ac.i32, 10 * i + 9, false), ""); - args.out[0] = LLVMBuildOr(builder, args.out[0], tmp, ""); - } - } - - args.out[0] = LLVMBuildBitCast(builder, args.out[0], ctx->ac.f32, ""); - args.out[1] = LLVMGetUndef(ctx->ac.f32); - args.out[2] = LLVMGetUndef(ctx->ac.f32); - args.out[3] = LLVMGetUndef(ctx->ac.f32); - - args.target = V_008DFC_SQ_EXP_PRIM; - args.enabled_channels = 1; - args.done = true; - args.valid_mask = false; - args.compr = false; - - ac_build_export(&ctx->ac, &args); -} - static void build_streamout_vertex(struct si_shader_context *ctx, LLVMValueRef *so_buffer, LLVMValueRef *wg_offset_dw, unsigned stream, LLVMValueRef offset_vtx, @@ -693,14 +647,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi, ac_build_endif(&ctx->ac, 5029); } - /* Export primitive data to the index buffer. Format is: - * - bits 0..8: index 0 - * - bit 9: edge flag 0 - * - bits 10..18: index 1 - * - bit 19: edge flag 1 - * - bits 20..28: index 2 - * - bit 29: edge flag 2 - * - bit 31: null primitive (skip) + /* Build the primitive export. * * For the first version, we will always build up all three indices * independent of the primitive type. The additional garbage data @@ -711,7 +658,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi, */ ac_build_ifcc(&ctx->ac, is_gs_thread, 6001); { - struct ngg_prim prim = {}; + struct ac_ngg_prim prim = {}; if (gfx10_is_ngg_passthrough(ctx->shader)) { prim.passthrough = ac_get_arg(&ctx->ac, ctx->gs_vtx01_offset); @@ -739,7 +686,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi, } } - build_export_prim(ctx, &prim); + ac_build_export_prim(&ctx->ac, &prim); } ac_build_endif(&ctx->ac, 6001); @@ -1213,7 +1160,7 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx) ac_build_ifcc(&ctx->ac, tmp, 5140); { LLVMValueRef flags; - struct ngg_prim prim = {}; + struct ac_ngg_prim prim = {}; prim.num_vertices = verts_per_prim; tmp = ngg_gs_vertex_ptr(ctx, tid); @@ -1242,7 +1189,7 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx) si_unpack_param(ctx, ctx->vs_state_bits, 4, 2), ctx->i32_0, ""); - struct ngg_prim in = prim; + struct ac_ngg_prim in = prim; prim.index[0] = LLVMBuildSelect(builder, flatshade_first, in.index[0], LLVMBuildSelect(builder, is_odd, @@ -1258,7 +1205,7 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx) in.index[2], ""); } - build_export_prim(ctx, &prim); + ac_build_export_prim(&ctx->ac, &prim); } ac_build_endif(&ctx->ac, 5140); -- 2.30.2