From 969e5176c23252b92fbeca0dc4109a1b06de06c5 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 24 Jun 2019 16:13:24 -0400 Subject: [PATCH] ac: rework ac_build_waitcnt for gfx10 Acked-by: Bas Nieuwenhuizen --- src/amd/common/ac_llvm_build.c | 38 ++++++++++++++++++- src/amd/common/ac_llvm_build.h | 11 +++--- src/amd/common/ac_nir_to_llvm.c | 14 +++---- src/gallium/drivers/radeonsi/si_shader.c | 13 +++---- .../drivers/radeonsi/si_shader_tgsi_mem.c | 4 +- 5 files changed, 57 insertions(+), 23 deletions(-) diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index eb71a69eebd..4c48fe1d1d7 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -2882,13 +2882,49 @@ LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0, LLVMBuildFMul(ctx->builder, s0, s1, ""), s2, ""); } -void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16) +void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags) { + if (!wait_flags) + return; + + unsigned lgkmcnt = 63; + unsigned expcnt = 7; + unsigned vmcnt = ctx->chip_class >= GFX9 ? 63 : 15; + unsigned vscnt = 63; + + if (wait_flags & AC_WAIT_LGKM) + lgkmcnt = 0; + if (wait_flags & AC_WAIT_EXP) + expcnt = 0; + if (wait_flags & AC_WAIT_VLOAD) + vmcnt = 0; + + if (wait_flags & AC_WAIT_VSTORE) { + if (ctx->chip_class >= GFX10) + vscnt = 0; + else + vmcnt = 0; + } + + unsigned simm16 = (lgkmcnt << 8) | + (expcnt << 4) | + (vmcnt & 0xf) | + ((vmcnt >> 4) << 14); + LLVMValueRef args[1] = { LLVMConstInt(ctx->i32, simm16, false), }; ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt", ctx->voidt, args, 1, 0); + + /* TODO: add llvm.amdgcn.s.waitcnt.vscnt into LLVM: */ + if (0 && ctx->chip_class >= GFX10 && vscnt == 0) { + LLVMValueRef args[1] = { + LLVMConstInt(ctx->i32, vscnt, false), + }; + ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt.vscnt", + ctx->voidt, args, 1, 0); + } } LLVMValueRef ac_build_fmed3(struct ac_llvm_context *ctx, LLVMValueRef src0, diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index 4917315cc50..eba01e51616 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -43,11 +43,10 @@ enum { AC_ADDR_SPACE_CONST_32BIT = 6, /* same as CONST, but the pointer type has 32 bits */ }; -/* Combine these with & instead of |. */ -#define NOOP_WAITCNT 0xcf7f -#define LGKM_CNT 0xc07f -#define EXP_CNT 0xcf0f -#define VM_CNT 0x0f70 /* On GFX9, vmcnt has 6 bits in [0:3] and [14:15] */ +#define AC_WAIT_LGKM (1 << 0) /* LDS, GDS, constant, message */ +#define AC_WAIT_EXP (1 << 1) /* exports */ +#define AC_WAIT_VLOAD (1 << 2) /* VMEM load/sample instructions */ +#define AC_WAIT_VSTORE (1 << 3) /* VMEM store instructions */ struct ac_llvm_flow; @@ -575,7 +574,7 @@ LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0, LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0, LLVMValueRef s1, LLVMValueRef s2); -void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16); +void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags); LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize); diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 73941ba6f45..e5a9389e7bf 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -2741,26 +2741,26 @@ static LLVMValueRef visit_image_size(struct ac_nir_context *ctx, static void emit_membar(struct ac_llvm_context *ac, const nir_intrinsic_instr *instr) { - unsigned waitcnt = NOOP_WAITCNT; + unsigned wait_flags = 0; switch (instr->intrinsic) { case nir_intrinsic_memory_barrier: case nir_intrinsic_group_memory_barrier: - waitcnt &= VM_CNT & LGKM_CNT; + wait_flags = AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE; break; case nir_intrinsic_memory_barrier_atomic_counter: case nir_intrinsic_memory_barrier_buffer: case nir_intrinsic_memory_barrier_image: - waitcnt &= VM_CNT; + wait_flags = AC_WAIT_VLOAD | AC_WAIT_VSTORE; break; case nir_intrinsic_memory_barrier_shared: - waitcnt &= LGKM_CNT; + wait_flags = AC_WAIT_LGKM; break; default: break; } - if (waitcnt != NOOP_WAITCNT) - ac_build_waitcnt(ac, waitcnt); + + ac_build_waitcnt(ac, wait_flags); } void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage) @@ -2770,7 +2770,7 @@ void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage) * always fits into a single wave. */ if (ac->chip_class == GFX6 && stage == MESA_SHADER_TESS_CTRL) { - ac_build_waitcnt(ac, LGKM_CNT & VM_CNT); + ac_build_waitcnt(ac, AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE); return; } ac_build_s_barrier(ac); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 9a4a416513b..6bdddb13460 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -3908,21 +3908,20 @@ static void membar_emit( struct si_shader_context *ctx = si_shader_context(bld_base); LLVMValueRef src0 = lp_build_emit_fetch(bld_base, emit_data->inst, 0, 0); unsigned flags = LLVMConstIntGetZExtValue(src0); - unsigned waitcnt = NOOP_WAITCNT; + unsigned wait_flags = 0; if (flags & TGSI_MEMBAR_THREAD_GROUP) - waitcnt &= VM_CNT & LGKM_CNT; + wait_flags |= AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE; if (flags & (TGSI_MEMBAR_ATOMIC_BUFFER | TGSI_MEMBAR_SHADER_BUFFER | TGSI_MEMBAR_SHADER_IMAGE)) - waitcnt &= VM_CNT; + wait_flags |= AC_WAIT_VLOAD | AC_WAIT_VSTORE; if (flags & TGSI_MEMBAR_SHARED) - waitcnt &= LGKM_CNT; + wait_flags |= AC_WAIT_LGKM; - if (waitcnt != NOOP_WAITCNT) - ac_build_waitcnt(&ctx->ac, waitcnt); + ac_build_waitcnt(&ctx->ac, wait_flags); } static void clock_emit( @@ -4372,7 +4371,7 @@ static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action, */ if (ctx->screen->info.chip_class == GFX6 && ctx->type == PIPE_SHADER_TESS_CTRL) { - ac_build_waitcnt(&ctx->ac, LGKM_CNT & VM_CNT); + ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE); return; } diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c index 455af80e206..8dcc1a552c6 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c @@ -520,7 +520,7 @@ static void load_emit( } if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) - ac_build_waitcnt(&ctx->ac, VM_CNT); + ac_build_waitcnt(&ctx->ac, AC_WAIT_VLOAD | AC_WAIT_VSTORE); can_speculate = !(inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) && is_oneway_access_only(inst, info, @@ -709,7 +709,7 @@ static void store_emit( } if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) - ac_build_waitcnt(&ctx->ac, VM_CNT); + ac_build_waitcnt(&ctx->ac, AC_WAIT_VLOAD | AC_WAIT_VSTORE); bool is_image = inst->Dst[0].Register.File != TGSI_FILE_BUFFER; args.cache_policy = get_cache_policy(ctx, inst, -- 2.30.2