From cd99c442c4b5edf6f13506474725dd662d9bb5a2 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 19 Apr 2017 03:37:04 +0200 Subject: [PATCH] radeonsi/gfx9: add GS prolog support for merged ES-GS MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_shader.c | 87 +++++++++++++++++++----- 1 file changed, 70 insertions(+), 17 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 9d7ab7bf68b..21287ae3ed4 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -7238,6 +7238,14 @@ static void si_count_scratch_private_memory(struct si_shader_context *ctx) } } +static void si_init_exec_full_mask(struct si_shader_context *ctx) +{ + LLVMValueRef full_mask = LLVMConstInt(ctx->i64, ~0ull, 0); + lp_build_intrinsic(ctx->gallivm.builder, + "llvm.amdgcn.init.exec", ctx->voidt, + &full_mask, 1, LP_FUNC_ATTR_CONVERGENT); +} + static void si_init_exec_from_input(struct si_shader_context *ctx, unsigned param, unsigned bitoffset) { @@ -7552,14 +7560,21 @@ static void si_get_ps_epilog_key(struct si_shader *shader, static void si_build_gs_prolog_function(struct si_shader_context *ctx, union si_shader_part_key *key) { - const unsigned num_sgprs = GFX6_GS_NUM_USER_SGPR + 2; - const unsigned num_vgprs = 8; + unsigned num_sgprs, num_vgprs; struct gallivm_state *gallivm = &ctx->gallivm; LLVMBuilderRef builder = gallivm->builder; - LLVMTypeRef params[32]; - LLVMTypeRef returns[32]; + LLVMTypeRef params[48]; /* 40 SGPRs (maximum) + some VGPRs */ + LLVMTypeRef returns[48]; LLVMValueRef func, ret; + if (ctx->screen->b.chip_class >= GFX9) { + num_sgprs = 8 + GFX9_GS_NUM_USER_SGPR; + num_vgprs = 5; /* ES inputs are not needed by GS */ + } else { + num_sgprs = GFX6_GS_NUM_USER_SGPR + 2; + num_vgprs = 8; + } + for (unsigned i = 0; i < num_sgprs; ++i) { params[i] = ctx->i32; returns[i] = ctx->i32; @@ -7575,6 +7590,13 @@ static void si_build_gs_prolog_function(struct si_shader_context *ctx, params, num_sgprs + num_vgprs, num_sgprs - 1); func = ctx->main_fn; + /* Set the full EXEC mask for the prolog, because we are only fiddling + * with registers here. The main shader part will set the correct EXEC + * mask. + */ + if (ctx->screen->b.chip_class >= GFX9) + si_init_exec_full_mask(ctx); + /* Copy inputs to outputs. This should be no-op, as the registers match, * but it will prevent the compiler from overwriting them unintentionally. */ @@ -7591,7 +7613,7 @@ static void si_build_gs_prolog_function(struct si_shader_context *ctx, if (key->gs_prolog.states.tri_strip_adj_fix) { /* Remap the input vertices for every other primitive. */ - const unsigned vtx_params[6] = { + const unsigned gfx6_vtx_params[6] = { num_sgprs, num_sgprs + 1, num_sgprs + 3, @@ -7599,18 +7621,53 @@ static void si_build_gs_prolog_function(struct si_shader_context *ctx, num_sgprs + 5, num_sgprs + 6 }; + const unsigned gfx9_vtx_params[3] = { + num_sgprs, + num_sgprs + 1, + num_sgprs + 4, + }; + LLVMValueRef vtx_in[6], vtx_out[6]; LLVMValueRef prim_id, rotate; + if (ctx->screen->b.chip_class >= GFX9) { + for (unsigned i = 0; i < 3; i++) { + vtx_in[i*2] = unpack_param(ctx, gfx9_vtx_params[i], 0, 16); + vtx_in[i*2+1] = unpack_param(ctx, gfx9_vtx_params[i], 16, 16); + } + } else { + for (unsigned i = 0; i < 6; i++) + vtx_in[i] = LLVMGetParam(func, gfx6_vtx_params[i]); + } + prim_id = LLVMGetParam(func, num_sgprs + 2); rotate = LLVMBuildTrunc(builder, prim_id, ctx->i1, ""); for (unsigned i = 0; i < 6; ++i) { - LLVMValueRef base, rotated, actual; - base = LLVMGetParam(func, vtx_params[i]); - rotated = LLVMGetParam(func, vtx_params[(i + 4) % 6]); - actual = LLVMBuildSelect(builder, rotate, rotated, base, ""); - actual = LLVMBuildBitCast(builder, actual, ctx->f32, ""); - ret = LLVMBuildInsertValue(builder, ret, actual, vtx_params[i], ""); + LLVMValueRef base, rotated; + base = vtx_in[i]; + rotated = vtx_in[(i + 4) % 6]; + vtx_out[i] = LLVMBuildSelect(builder, rotate, rotated, base, ""); + } + + if (ctx->screen->b.chip_class >= GFX9) { + for (unsigned i = 0; i < 3; i++) { + LLVMValueRef hi, out; + + hi = LLVMBuildShl(builder, vtx_out[i*2+1], + LLVMConstInt(ctx->i32, 16, 0), ""); + out = LLVMBuildOr(builder, vtx_out[i*2], hi, ""); + out = LLVMBuildBitCast(builder, out, ctx->f32, ""); + ret = LLVMBuildInsertValue(builder, ret, out, + gfx9_vtx_params[i], ""); + } + } else { + for (unsigned i = 0; i < 6; i++) { + LLVMValueRef out; + + out = LLVMBuildBitCast(builder, vtx_out[i], ctx->f32, ""); + ret = LLVMBuildInsertValue(builder, ret, out, + gfx6_vtx_params[i], ""); + } } } @@ -7692,12 +7749,8 @@ static void si_build_wrapper_function(struct si_shader_context *ctx, si_create_function(ctx, "wrapper", NULL, 0, param_types, num_params, last_sgpr_param); - if (is_merged_shader(ctx->shader)) { - LLVMValueRef full_mask = LLVMConstInt(ctx->i64, ~0ull, 0); - lp_build_intrinsic(ctx->gallivm.builder, - "llvm.amdgcn.init.exec", ctx->voidt, - &full_mask, 1, LP_FUNC_ATTR_CONVERGENT); - } + if (is_merged_shader(ctx->shader)) + si_init_exec_full_mask(ctx); /* Record the arguments of the function as if they were an output of * a previous part. -- 2.30.2