From 75f1f852b00ad0d766684d01695322b93a2acd55 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Mon, 11 Mar 2013 17:34:43 +0100 Subject: [PATCH] nvc0/ir: try to fix CAS (CompareAndSwap) --- .../nvc0/codegen/nv50_ir_lowering_nvc0.cpp | 39 +++++++++++++++++++ .../nvc0/codegen/nv50_ir_target_nvc0.cpp | 4 +- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp index cd30f63037c..a82465af17f 100644 --- a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp @@ -596,6 +596,7 @@ private: bool handleTXQ(TexInstruction *); bool handleManualTXD(TexInstruction *); bool handleATOM(Instruction *); + bool handleCasExch(Instruction *, bool needCctl); void handleSurfaceOpNVE4(TexInstruction *); void checkPredicate(Instruction *); @@ -857,6 +858,38 @@ NVC0LoweringPass::handleATOM(Instruction *atom) return true; } +bool +NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl) +{ + if (cas->subOp != NV50_IR_SUBOP_ATOM_CAS && + cas->subOp != NV50_IR_SUBOP_ATOM_EXCH) + return false; + bld.setPosition(cas, true); + + if (needCctl) { + Instruction *cctl = bld.mkOp1(OP_CCTL, TYPE_NONE, NULL, cas->getSrc(0)); + cctl->setIndirect(0, 0, cas->getIndirect(0, 0)); + cctl->fixed = 1; + cctl->subOp = NV50_IR_SUBOP_CCTL_IV; + if (cas->isPredicated()) + cctl->setPredicate(cas->cc, cas->getPredicate()); + } + + if (cas->defExists(0) && cas->subOp == NV50_IR_SUBOP_ATOM_CAS) { + // CAS is crazy. It's 2nd source is a double reg, and the 3rd source + // should be set to the high part of the double reg or bad things will + // happen elsewhere in the universe. + // Also, it sometimes returns the new value instead of the old one + // under mysterious circumstances. + Value *dreg = bld.getSSA(8); + bld.setPosition(cas, false); + bld.mkOp2(OP_MERGE, TYPE_U64, dreg, cas->getSrc(1), cas->getSrc(2)); + cas->setSrc(1, dreg); + } + + return true; +} + inline Value * NVC0LoweringPass::loadResInfo32(Value *ptr, uint32_t off) { @@ -1185,6 +1218,7 @@ NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su) } if (su->op == OP_SUREDB || su->op == OP_SUREDP) { + // FIXME: for out of bounds access, destination value will be undefined ! Value *pred = su->getSrc(2); CondCode cc = CC_NOT_P; if (su->getPredicate()) { @@ -1208,6 +1242,7 @@ NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su) red->setIndirect(0, 0, su->getSrc(0)); red->setPredicate(cc, pred); delete_Instruction(bld.getProgram(), su); + handleCasExch(red, true); } else { su->sType = (su->tex.target == TEX_TARGET_BUFFER) ? TYPE_U32 : TYPE_U8; } @@ -1477,7 +1512,11 @@ NVC0LoweringPass::visit(Instruction *i) } break; case OP_ATOM: + { + const bool cctl = i->src(0).getFile() == FILE_MEMORY_GLOBAL; handleATOM(i); + handleCasExch(i, cctl); + } break; case OP_SULDB: case OP_SULDP: diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp index 04633228c8a..3aa29e222a1 100644 --- a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp +++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp @@ -287,7 +287,9 @@ TargetNVC0::insnCanLoad(const Instruction *i, int s, // immediate 0 can be represented by GPR $r63/$r255 if (sf == FILE_IMMEDIATE && ld->getSrc(0)->reg.data.u64 == 0) - return (!i->asTex() && i->op != OP_EXPORT && i->op != OP_STORE); + return (!i->isPseudo() && + !i->asTex() && + i->op != OP_EXPORT && i->op != OP_STORE); if (s >= opInfo[i->op].srcNr) return false; -- 2.30.2