From 20d0ae464c4accd97227b1b4e805a9c10183647d Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Thu, 24 Oct 2019 02:50:51 +0200 Subject: [PATCH] nv50/ir: implement global atomics and handle it for nir TGSI doesn't have any concept of global memory right now. Signed-off-by: Karol Herbst Acked-by: Dave Airlie --- .../nouveau/codegen/nv50_ir_from_nir.cpp | 42 +++++++++++++++++-- .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 2 + 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 08365988069..984ef9ad04d 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -582,40 +582,47 @@ Converter::getSubOp(nir_intrinsic_op op) { switch (op) { case nir_intrinsic_bindless_image_atomic_add: + case nir_intrinsic_global_atomic_add: case nir_intrinsic_image_atomic_add: case nir_intrinsic_image_deref_atomic_add: case nir_intrinsic_shared_atomic_add: case nir_intrinsic_ssbo_atomic_add: return NV50_IR_SUBOP_ATOM_ADD; case nir_intrinsic_bindless_image_atomic_and: + case nir_intrinsic_global_atomic_and: case nir_intrinsic_image_atomic_and: case nir_intrinsic_image_deref_atomic_and: case nir_intrinsic_shared_atomic_and: case nir_intrinsic_ssbo_atomic_and: return NV50_IR_SUBOP_ATOM_AND; case nir_intrinsic_bindless_image_atomic_comp_swap: + case nir_intrinsic_global_atomic_comp_swap: case nir_intrinsic_image_atomic_comp_swap: case nir_intrinsic_image_deref_atomic_comp_swap: case nir_intrinsic_shared_atomic_comp_swap: case nir_intrinsic_ssbo_atomic_comp_swap: return NV50_IR_SUBOP_ATOM_CAS; case nir_intrinsic_bindless_image_atomic_exchange: + case nir_intrinsic_global_atomic_exchange: case nir_intrinsic_image_atomic_exchange: case nir_intrinsic_image_deref_atomic_exchange: case nir_intrinsic_shared_atomic_exchange: case nir_intrinsic_ssbo_atomic_exchange: return NV50_IR_SUBOP_ATOM_EXCH; case nir_intrinsic_bindless_image_atomic_or: + case nir_intrinsic_global_atomic_or: case nir_intrinsic_image_atomic_or: case nir_intrinsic_image_deref_atomic_or: case nir_intrinsic_shared_atomic_or: case nir_intrinsic_ssbo_atomic_or: return NV50_IR_SUBOP_ATOM_OR; case nir_intrinsic_bindless_image_atomic_imax: - case nir_intrinsic_image_atomic_imax: - case nir_intrinsic_image_deref_atomic_imax: case nir_intrinsic_bindless_image_atomic_umax: + case nir_intrinsic_global_atomic_imax: + case nir_intrinsic_global_atomic_umax: + case nir_intrinsic_image_atomic_imax: case nir_intrinsic_image_atomic_umax: + case nir_intrinsic_image_deref_atomic_imax: case nir_intrinsic_image_deref_atomic_umax: case nir_intrinsic_shared_atomic_imax: case nir_intrinsic_shared_atomic_umax: @@ -623,10 +630,12 @@ Converter::getSubOp(nir_intrinsic_op op) case nir_intrinsic_ssbo_atomic_umax: return NV50_IR_SUBOP_ATOM_MAX; case nir_intrinsic_bindless_image_atomic_imin: - case nir_intrinsic_image_atomic_imin: - case nir_intrinsic_image_deref_atomic_imin: case nir_intrinsic_bindless_image_atomic_umin: + case nir_intrinsic_global_atomic_imin: + case nir_intrinsic_global_atomic_umin: + case nir_intrinsic_image_atomic_imin: case nir_intrinsic_image_atomic_umin: + case nir_intrinsic_image_deref_atomic_imin: case nir_intrinsic_image_deref_atomic_umin: case nir_intrinsic_shared_atomic_imin: case nir_intrinsic_shared_atomic_umin: @@ -634,6 +643,7 @@ Converter::getSubOp(nir_intrinsic_op op) case nir_intrinsic_ssbo_atomic_umin: return NV50_IR_SUBOP_ATOM_MIN; case nir_intrinsic_bindless_image_atomic_xor: + case nir_intrinsic_global_atomic_xor: case nir_intrinsic_image_atomic_xor: case nir_intrinsic_image_deref_atomic_xor: case nir_intrinsic_shared_atomic_xor: @@ -2379,6 +2389,30 @@ Converter::visit(nir_intrinsic_instr *insn) info->io.globalAccess |= 0x2; break; } + case nir_intrinsic_global_atomic_add: + case nir_intrinsic_global_atomic_and: + case nir_intrinsic_global_atomic_comp_swap: + case nir_intrinsic_global_atomic_exchange: + case nir_intrinsic_global_atomic_or: + case nir_intrinsic_global_atomic_imax: + case nir_intrinsic_global_atomic_imin: + case nir_intrinsic_global_atomic_umax: + case nir_intrinsic_global_atomic_umin: + case nir_intrinsic_global_atomic_xor: { + const DataType dType = getDType(insn); + LValues &newDefs = convert(&insn->dest); + Value *address; + uint32_t offset = getIndirect(&insn->src[0], 0, address); + + Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, dType, offset); + Instruction *atom = + mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0)); + atom->setIndirect(0, 0, address); + atom->subOp = getSubOp(op); + + info->io.globalAccess |= 0x2; + break; + } case nir_intrinsic_bindless_image_atomic_add: case nir_intrinsic_bindless_image_atomic_and: case nir_intrinsic_bindless_image_atomic_comp_swap: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index a76d6c60cda..a60881000fe 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -1645,6 +1645,8 @@ NVC0LoweringPass::handleATOM(Instruction *atom) else if (targ->getChipset() < NVISA_GM107_CHIPSET) handleSharedATOMNVE4(atom); return true; + case FILE_MEMORY_GLOBAL: + return true; default: assert(atom->src(0).getFile() == FILE_MEMORY_BUFFER); base = loadBufInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16); -- 2.30.2