From 9f005f1f850710ea456f9847b1d247aaa8f0d6d2 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Fri, 13 Mar 2020 11:23:07 +0100 Subject: [PATCH] radv: enable lowering of GS intrinsics for the LLVM backend This replaces emit_vertex with: if (vertex_count < max_vertices) { emit_vertex_with_counter vertex_count ... vertex_count += 1 } Which is exactly what NIR->LLVM was doing but at NIR level. This pass is already called by ACO. pipeline-db changes on GFX10: Totals from affected shaders: SGPRS: 1952 -> 1912 (-2.05 %) VGPRS: 2112 -> 2044 (-3.22 %) Code Size: 189368 -> 185620 (-1.98 %) bytes Max Waves: 494 -> 491 (-0.61 %) No pipeline-db changes on other generations. Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen Part-of: --- src/amd/llvm/ac_nir_to_llvm.c | 9 +++++ src/amd/llvm/ac_shader_abi.h | 5 +++ src/amd/vulkan/radv_nir_to_llvm.c | 62 +++++++------------------------ src/amd/vulkan/radv_shader.c | 3 +- 4 files changed, 29 insertions(+), 50 deletions(-) diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 00970638516..627f5d2d931 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -3938,7 +3938,16 @@ static void visit_intrinsic(struct ac_nir_context *ctx, case nir_intrinsic_emit_vertex: ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs); break; + case nir_intrinsic_emit_vertex_with_counter: { + unsigned stream = nir_intrinsic_stream_id(instr); + LLVMValueRef next_vertex = get_src(ctx, instr->src[0]); + ctx->abi->emit_vertex_with_counter(ctx->abi, stream, + next_vertex, + ctx->abi->outputs); + break; + } case nir_intrinsic_end_primitive: + case nir_intrinsic_end_primitive_with_counter: ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr)); break; case nir_intrinsic_load_tess_coord: diff --git a/src/amd/llvm/ac_shader_abi.h b/src/amd/llvm/ac_shader_abi.h index f73465b014f..18f85a7911c 100644 --- a/src/amd/llvm/ac_shader_abi.h +++ b/src/amd/llvm/ac_shader_abi.h @@ -80,6 +80,11 @@ struct ac_shader_abi { void (*emit_primitive)(struct ac_shader_abi *abi, unsigned stream); + void (*emit_vertex_with_counter)(struct ac_shader_abi *abi, + unsigned stream, + LLVMValueRef vertexidx, + LLVMValueRef *addrs); + LLVMValueRef (*load_inputs)(struct ac_shader_abi *abi, unsigned location, unsigned driver_location, diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 7cb8deddc10..900246d275e 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -876,39 +876,21 @@ static LLVMValueRef load_sample_mask_in(struct ac_shader_abi *abi) static void gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx, unsigned stream, + LLVMValueRef vertexidx, LLVMValueRef *addrs); static void -visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addrs) +visit_emit_vertex_with_counter(struct ac_shader_abi *abi, unsigned stream, + LLVMValueRef vertexidx, LLVMValueRef *addrs) { - LLVMValueRef gs_next_vertex; - LLVMValueRef can_emit; unsigned offset = 0; struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); if (ctx->args->options->key.vs_common_out.as_ngg) { - gfx10_ngg_gs_emit_vertex(ctx, stream, addrs); + gfx10_ngg_gs_emit_vertex(ctx, stream, vertexidx, addrs); return; } - /* Write vertex attribute values to GSVS ring */ - gs_next_vertex = LLVMBuildLoad(ctx->ac.builder, - ctx->gs_next_vertex[stream], - ""); - - /* If this thread has already emitted the declared maximum number of - * vertices, don't emit any more: excessive vertex emissions are not - * supposed to have any effect. - */ - can_emit = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, gs_next_vertex, - LLVMConstInt(ctx->ac.i32, ctx->shader->info.gs.vertices_out, false), ""); - - bool use_kill = !ctx->args->shader_info->gs.writes_memory; - if (use_kill) - ac_build_kill_if_false(&ctx->ac, can_emit); - else - ac_build_ifcc(&ctx->ac, can_emit, 6505); - for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { unsigned output_usage_mask = ctx->args->shader_info->gs.output_usage_mask[i]; @@ -933,7 +915,7 @@ visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addr offset++; - voffset = LLVMBuildAdd(ctx->ac.builder, voffset, gs_next_vertex, ""); + voffset = LLVMBuildAdd(ctx->ac.builder, voffset, vertexidx, ""); voffset = LLVMBuildMul(ctx->ac.builder, voffset, LLVMConstInt(ctx->ac.i32, 4, false), ""); out_val = ac_to_integer(&ctx->ac, out_val); @@ -949,16 +931,9 @@ visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addr } } - gs_next_vertex = LLVMBuildAdd(ctx->ac.builder, gs_next_vertex, - ctx->ac.i32_1, ""); - LLVMBuildStore(ctx->ac.builder, gs_next_vertex, ctx->gs_next_vertex[stream]); - ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8), ctx->gs_wave_id); - - if (!use_kill) - ac_build_endif(&ctx->ac, 6505); } static void @@ -3309,25 +3284,11 @@ static void gfx10_ngg_gs_emit_epilogue_2(struct radv_shader_context *ctx) static void gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx, unsigned stream, + LLVMValueRef vertexidx, LLVMValueRef *addrs) { LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef tmp; - const LLVMValueRef vertexidx = - LLVMBuildLoad(builder, ctx->gs_next_vertex[stream], ""); - - /* If this thread has already emitted the declared maximum number of - * vertices, skip the write: excessive vertex emissions are not - * supposed to have any effect. - */ - const LLVMValueRef can_emit = - LLVMBuildICmp(builder, LLVMIntULT, vertexidx, - LLVMConstInt(ctx->ac.i32, ctx->shader->info.gs.vertices_out, false), ""); - ac_build_ifcc(&ctx->ac, can_emit, 9001); - - tmp = LLVMBuildAdd(builder, vertexidx, ctx->ac.i32_1, ""); - tmp = LLVMBuildSelect(builder, can_emit, tmp, vertexidx, ""); - LLVMBuildStore(builder, tmp, ctx->gs_next_vertex[stream]); const LLVMValueRef vertexptr = ngg_gs_emit_vertex_ptr(ctx, get_thread_id_in_tg(ctx), vertexidx); @@ -3359,6 +3320,13 @@ static void gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx, } assert(out_idx * 4 <= ctx->args->shader_info->gs.gsvs_vertex_size); + /* Store the current number of emitted vertices to zero out remaining + * primitive flags in case the geometry shader doesn't emit the maximum + * number of vertices. + */ + tmp = LLVMBuildAdd(builder, vertexidx, ctx->ac.i32_1, ""); + LLVMBuildStore(builder, tmp, ctx->gs_next_vertex[stream]); + /* Determine and store whether this vertex completed a primitive. */ const LLVMValueRef curverts = LLVMBuildLoad(builder, ctx->gs_curprim_verts[stream], ""); @@ -3395,8 +3363,6 @@ static void gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx, tmp = LLVMBuildLoad(builder, ctx->gs_generated_prims[stream], ""); tmp = LLVMBuildAdd(builder, tmp, LLVMBuildZExt(builder, iscompleteprim, ctx->ac.i32, ""), ""); LLVMBuildStore(builder, tmp, ctx->gs_generated_prims[stream]); - - ac_build_endif(&ctx->ac, 9001); } static void @@ -3948,7 +3914,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, ctx.abi.inputs = &ctx.inputs[0]; ctx.abi.emit_outputs = handle_shader_outputs_post; - ctx.abi.emit_vertex = visit_emit_vertex; + ctx.abi.emit_vertex_with_counter = visit_emit_vertex_with_counter; ctx.abi.load_ubo = radv_load_ubo; ctx.abi.load_ssbo = radv_load_ssbo; ctx.abi.load_sampler_desc = radv_get_sampler_desc; diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 537ab9f8b7f..bb88b368d05 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -453,8 +453,7 @@ radv_shader_compile_to_nir(struct radv_device *device, nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); - if (nir->info.stage == MESA_SHADER_GEOMETRY && - device->physical_device->use_aco) + if (nir->info.stage == MESA_SHADER_GEOMETRY) nir_lower_gs_intrinsics(nir, true); static const nir_lower_tex_options tex_options = { -- 2.30.2