From 40c224a573f2b763046001e622aafca90f68c693 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 25 May 2012 17:27:03 +0200 Subject: [PATCH] nvc0/ir: fix texture barrier insertion to prevent WAW hazards Fixes, for instance, object highlighting in Diablo 3 (wine). --- src/gallium/drivers/nv50/codegen/nv50_ir.h | 2 +- .../drivers/nv50/codegen/nv50_ir_graph.cpp | 12 ++- .../drivers/nv50/codegen/nv50_ir_graph.h | 2 +- .../drivers/nv50/codegen/nv50_ir_inlines.h | 2 +- .../nvc0/codegen/nv50_ir_emit_nvc0.cpp | 2 +- .../nvc0/codegen/nv50_ir_lowering_nvc0.cpp | 97 +++++++++++++++---- 6 files changed, 88 insertions(+), 29 deletions(-) diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.h b/src/gallium/drivers/nv50/codegen/nv50_ir.h index 9b47e3e13c1..0b47c32527f 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir.h @@ -864,7 +864,7 @@ public: inline bool isTerminated() const { return exit && exit->terminator; } bool dominatedBy(BasicBlock *bb); - inline bool reachableBy(BasicBlock *by, BasicBlock *term); + inline bool reachableBy(const BasicBlock *by, const BasicBlock *term); // returns mask of conditional out blocks // e.g. 3 for IF { .. } ELSE { .. } ENDIF, 1 for IF { .. } ENDIF diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_graph.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_graph.cpp index f1bff973636..33e35eea950 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_graph.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_graph.cpp @@ -23,6 +23,7 @@ #include "nv50_ir_graph.h" #include #include +#include #include "nv50_ir.h" namespace nv50_ir { @@ -165,16 +166,17 @@ Graph::Edge::Edge(Node *org, Node *tgt, Type kind) } bool -Graph::Node::reachableBy(Node *node, Node *term) +Graph::Node::reachableBy(const Node *node, const Node *term) const { - Stack stack; - Node *pos; + std::stack stack; + const Node *pos = NULL; const int seq = graph->nextSequence(); stack.push(node); - while (stack.getSize()) { - pos = reinterpret_cast(stack.pop().u.p); + while (!stack.empty()) { + pos = stack.top(); + stack.pop(); if (pos == this) return true; diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_graph.h b/src/gallium/drivers/nv50/codegen/nv50_ir_graph.h index 9ef317f943c..3bf84ba1e36 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_graph.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_graph.h @@ -117,7 +117,7 @@ public: inline Node *parent() const; // returns NULL if count(incident edges) != 1 - bool reachableBy(Node *node, Node *term); + bool reachableBy(const Node *node, const Node *term) const; inline bool visit(int); inline int getSequence() const; diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h b/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h index b62431f1e31..ab4c98fbcd7 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h @@ -359,7 +359,7 @@ Value *Value::get(Iterator &it) return reinterpret_cast(it.get()); } -bool BasicBlock::reachableBy(BasicBlock *by, BasicBlock *term) +bool BasicBlock::reachableBy(const BasicBlock *by, const BasicBlock *term) { return cfg.reachableBy(&by->cfg, &term->cfg); } diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp index fbd1aa5dfc9..57d5d723c6a 100644 --- a/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp @@ -960,7 +960,7 @@ void CodeEmitterNVC0::emitTEXBAR(const Instruction *i) code[0] = 0x00000006 | (i->subOp << 26); code[1] = 0xf0000000; emitPredicate(i); - emitCondCode(i->predSrc >= 0 ? i->cc : CC_ALWAYS, 5); + emitCondCode(i->flagsSrc >= 0 ? i->cc : CC_ALWAYS, 5); } void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i) diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp index 8fd4541a96a..efb51249115 100644 --- a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp @@ -148,7 +148,13 @@ private: }; bool insertTextureBarriers(Function *); inline bool insnDominatedBy(const Instruction *, const Instruction *) const; - void findFirstUses(const Instruction *, std::list&); + void findFirstUses(const Instruction *tex, const Instruction *def, + std::list&); + void findOverwritingDefs(const Instruction *tex, Instruction *insn, + const BasicBlock *term, + std::list&); + void addTexUse(std::list&, Instruction *, const Instruction *); + const Instruction *recurseDef(const Instruction *); private: LValue *r63; @@ -170,37 +176,88 @@ NVC0LegalizePostRA::insnDominatedBy(const Instruction *later, } void -NVC0LegalizePostRA::findFirstUses(const Instruction *insn, +NVC0LegalizePostRA::addTexUse(std::list &uses, + Instruction *usei, const Instruction *insn) +{ + bool add = true; + for (std::list::iterator it = uses.begin(); + it != uses.end();) { + if (insnDominatedBy(usei, it->insn)) { + add = false; + break; + } + if (insnDominatedBy(it->insn, usei)) + it = uses.erase(it); + else + ++it; + } + if (add) + uses.push_back(TexUse(usei, insn)); +} + +void +NVC0LegalizePostRA::findOverwritingDefs(const Instruction *texi, + Instruction *insn, + const BasicBlock *term, + std::list &uses) +{ + while (insn->op == OP_MOV && insn->getDef(0)->equals(insn->getSrc(0))) + insn = insn->getSrc(0)->getUniqueInsn(); + + if (!insn || !insn->bb->reachableBy(texi->bb, term)) + return; + + switch (insn->op) { + /* Values not connected to the tex's definition through any of these should + * not be conflicting. + */ + case OP_SPLIT: + case OP_MERGE: + case OP_PHI: + case OP_UNION: + /* recurse again */ + for (int s = 0; insn->srcExists(s); ++s) + findOverwritingDefs(texi, insn->getSrc(s)->getUniqueInsn(), term, + uses); + break; + default: + // if (!isTextureOp(insn->op)) // TODO: are TEXes always ordered ? + addTexUse(uses, insn, texi); + break; + } +} + +void +NVC0LegalizePostRA::findFirstUses(const Instruction *texi, + const Instruction *insn, std::list &uses) { for (int d = 0; insn->defExists(d); ++d) { Value *v = insn->getDef(d); for (Value::UseIterator u = v->uses.begin(); u != v->uses.end(); ++u) { Instruction *usei = (*u)->getInsn(); + + if (usei->op == OP_PHI || usei->op == OP_UNION) { + // need a barrier before WAW cases + for (int s = 0; usei->srcExists(s); ++s) { + Instruction *defi = usei->getSrc(s)->getUniqueInsn(); + if (defi && &usei->src(s) != *u) + findOverwritingDefs(texi, defi, usei->bb, uses); + } + } + if (usei->op == OP_SPLIT || + usei->op == OP_MERGE || usei->op == OP_PHI || usei->op == OP_UNION) { // these uses don't manifest in the machine code - findFirstUses(usei, uses); + findFirstUses(texi, usei, uses); } else if (usei->op == OP_MOV && usei->getDef(0)->equals(usei->getSrc(0)) && usei->subOp != NV50_IR_SUBOP_MOV_FINAL) { - findFirstUses(usei, uses); + findFirstUses(texi, usei, uses); } else { - bool add = true; - for (std::list::iterator it = uses.begin(); - it != uses.end();) { - if (insnDominatedBy(usei, it->insn)) { - add = false; - break; - } - if (insnDominatedBy(it->insn, usei)) - it = uses.erase(it); - else - ++it; - } - if (add) - uses.push_back(TexUse(usei, insn)); + addTexUse(uses, usei, insn); } } } @@ -255,7 +312,7 @@ NVC0LegalizePostRA::insertTextureBarriers(Function *fn) if (!uses) return false; for (size_t i = 0; i < texes.size(); ++i) - findFirstUses(texes[i], uses[i]); + findFirstUses(texes[i], texes[i], uses[i]); // determine the barrier level at each use for (size_t i = 0; i < texes.size(); ++i) { @@ -324,7 +381,7 @@ NVC0LegalizePostRA::insertTextureBarriers(Function *fn) limitS.resize(fn->allBBlocks.getSize()); // cull unneeded barriers (should do that earlier, but for simplicity) - IteratorRef bi = fn->cfg.iteratorDFS(true); + IteratorRef bi = fn->cfg.iteratorCFG(); // first calculate min/max outstanding TEXes for each BB for (bi->reset(); !bi->end(); bi->next()) { Graph::Node *n = reinterpret_cast(bi->get()); -- 2.30.2