From: Samuel Pitoiset Date: Mon, 24 Oct 2016 19:41:11 +0000 (+0200) Subject: nv50/ir: do not perform global membar for shared memory X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=6dbb8d12a8b78769b9803884fad5f0d9923023bc;p=mesa.git nv50/ir: do not perform global membar for shared memory Shared memory is local to CTA, thus we should only wait for prior memory writes which are visible to other threads in the same CTA, and not at global level. This should speedup compute shaders which use shared memory. Signed-off-by: Samuel Pitoiset Reviewed-by: Ilia Mirkin --- diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index b47fc497c87..91cef81aa0d 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -3561,12 +3561,15 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode()); break; case TGSI_OPCODE_MEMBAR: + { + uint32_t level = tgsi.getSrc(0).getValueU32(0, info); geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL); geni->fixed = 1; - if (tgsi.getSrc(0).getValueU32(0, info) & TGSI_MEMBAR_THREAD_GROUP) + if (!(level & ~(TGSI_MEMBAR_THREAD_GROUP | TGSI_MEMBAR_SHARED))) geni->subOp = NV50_IR_SUBOP_MEMBAR(M, CTA); else geni->subOp = NV50_IR_SUBOP_MEMBAR(M, GL); + } break; case TGSI_OPCODE_ATOMUADD: case TGSI_OPCODE_ATOMXCHG: