From c3a5bc0bdf338453a5824f4bb50913600dc9f85a Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 23 Feb 2013 13:09:32 +0100 Subject: [PATCH] nv50/ir: add support for barriers nv50 part by Francisco Jerez. --- .../drivers/nv50/codegen/nv50_ir_driver.h | 2 + .../nv50/codegen/nv50_ir_emit_nv50.cpp | 17 ++++ .../nv50/codegen/nv50_ir_from_tgsi.cpp | 54 ++++++++--- .../nv50/codegen/nv50_ir_lowering_nv50.cpp | 1 + .../drivers/nv50/codegen/nv50_ir_target.cpp | 5 + .../nv50/codegen/nv50_ir_target_nv50.cpp | 1 + .../nvc0/codegen/nv50_ir_emit_nvc0.cpp | 96 +++++++++++++++++++ 7 files changed, 161 insertions(+), 15 deletions(-) diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h b/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h index 933a5e106ac..7bdd4b9bb65 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h @@ -167,6 +167,8 @@ struct nv50_ir_prog_info } cp; } prop; + uint8_t numBarriers; + struct { uint8_t clipDistance; /* index of first clip distance output */ uint8_t clipDistanceMask; /* mask of clip distances defined */ diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp index 796c1957eb5..2638ef1399d 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_emit_nv50.cpp @@ -122,6 +122,7 @@ private: void emitFlow(const Instruction *, uint8_t flowOp); void emitPRERETEmu(const FlowInstruction *); + void emitBAR(const Instruction *); void emitATOM(const Instruction *); }; @@ -1534,6 +1535,19 @@ CodeEmitterNV50::emitFlow(const Instruction *i, uint8_t flowOp) } } +void +CodeEmitterNV50::emitBAR(const Instruction *i) +{ + ImmediateValue *barId = i->getSrc(0)->asImm(); + assert(barId); + + code[0] = 0x82000003 | (barId->reg.data.u32 << 21); + code[1] = 0x00004000; + + if (i->subOp == NV50_IR_SUBOP_BAR_SYNC) + code[0] |= 1 << 26; +} + void CodeEmitterNV50::emitATOM(const Instruction *i) { @@ -1753,6 +1767,9 @@ CodeEmitterNV50::emitInstruction(Instruction *insn) case OP_ATOM: emitATOM(insn); break; + case OP_BAR: + emitBAR(insn); + break; case OP_PHI: case OP_UNION: case OP_CONSTRAINT: diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp index 61282f27ede..174ae3cc863 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp @@ -600,6 +600,27 @@ static nv50_ir::operation translateOpcode(uint opcode) } } +static uint16_t opcodeToSubOp(uint opcode) +{ + switch (opcode) { + case TGSI_OPCODE_LFENCE: return NV50_IR_SUBOP_MEMBAR(L, GL); + case TGSI_OPCODE_SFENCE: return NV50_IR_SUBOP_MEMBAR(S, GL); + case TGSI_OPCODE_MFENCE: return NV50_IR_SUBOP_MEMBAR(M, GL); + case TGSI_OPCODE_ATOMUADD: return NV50_IR_SUBOP_ATOM_ADD; + case TGSI_OPCODE_ATOMXCHG: return NV50_IR_SUBOP_ATOM_EXCH; + case TGSI_OPCODE_ATOMCAS: return NV50_IR_SUBOP_ATOM_CAS; + case TGSI_OPCODE_ATOMAND: return NV50_IR_SUBOP_ATOM_AND; + case TGSI_OPCODE_ATOMOR: return NV50_IR_SUBOP_ATOM_OR; + case TGSI_OPCODE_ATOMXOR: return NV50_IR_SUBOP_ATOM_XOR; + case TGSI_OPCODE_ATOMUMIN: return NV50_IR_SUBOP_ATOM_MIN; + case TGSI_OPCODE_ATOMIMIN: return NV50_IR_SUBOP_ATOM_MIN; + case TGSI_OPCODE_ATOMUMAX: return NV50_IR_SUBOP_ATOM_MAX; + case TGSI_OPCODE_ATOMIMAX: return NV50_IR_SUBOP_ATOM_MAX; + default: + return 0; + } +} + bool Instruction::checkDstSrcAliasing() const { if (insn->Dst[0].Register.Indirect) // no danger if indirect, using memory @@ -1004,6 +1025,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) { Instruction insn(inst); + if (insn.getOpcode() == TGSI_OPCODE_BARRIER) + info->numBarriers = 1; + if (insn.dstCount()) { if (insn.getDst(0).getFile() == TGSI_FILE_OUTPUT) { Instruction::DstRegister dst = insn.getDst(0); @@ -2066,6 +2090,8 @@ Converter::isEndOfSubroutine(uint ip) bool Converter::handleInstruction(const struct tgsi_full_instruction *insn) { + Instruction *geni; + Value *dst0[4], *rDst0[4]; Value *src0, *src1, *src2; Value *val0, *val1; @@ -2580,31 +2606,29 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) case TGSI_OPCODE_STORE: handleSTORE(); break; - case TGSI_OPCODE_ATOMUADD: - handleATOM(dst0, dstTy, NV50_IR_SUBOP_ATOM_ADD); + case TGSI_OPCODE_BARRIER: + geni = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0)); + geni->fixed = 1; + geni->subOp = NV50_IR_SUBOP_BAR_SYNC; break; - case TGSI_OPCODE_ATOMXCHG: - handleATOM(dst0, dstTy, NV50_IR_SUBOP_ATOM_EXCH); + case TGSI_OPCODE_MFENCE: + case TGSI_OPCODE_LFENCE: + case TGSI_OPCODE_SFENCE: + geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL); + geni->fixed = 1; + geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode()); break; + case TGSI_OPCODE_ATOMUADD: + case TGSI_OPCODE_ATOMXCHG: case TGSI_OPCODE_ATOMCAS: - handleATOM(dst0, dstTy, NV50_IR_SUBOP_ATOM_CAS); - break; case TGSI_OPCODE_ATOMAND: - handleATOM(dst0, dstTy, NV50_IR_SUBOP_ATOM_AND); - break; case TGSI_OPCODE_ATOMOR: - handleATOM(dst0, dstTy, NV50_IR_SUBOP_ATOM_OR); - break; case TGSI_OPCODE_ATOMXOR: - handleATOM(dst0, dstTy, NV50_IR_SUBOP_ATOM_XOR); - break; case TGSI_OPCODE_ATOMUMIN: case TGSI_OPCODE_ATOMIMIN: - handleATOM(dst0, dstTy, NV50_IR_SUBOP_ATOM_MIN); - break; case TGSI_OPCODE_ATOMUMAX: case TGSI_OPCODE_ATOMIMAX: - handleATOM(dst0, dstTy, NV50_IR_SUBOP_ATOM_MAX); + handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode())); break; default: ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode()); diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp index 83f7201fc35..9c3f8f64fa7 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp @@ -230,6 +230,7 @@ NV50LegalizePostRA::visit(BasicBlock *bb) handlePRERET(i->asFlow()); } else { if (i->op != OP_MOV && i->op != OP_PFETCH && + i->op != OP_BAR && (!i->defExists(0) || i->def(0).getFile() != FILE_ADDRESS)) replaceZero(i); if (typeSizeof(i->dType) == 8) diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp index 7642c5d16d6..92552a0efe5 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp @@ -264,6 +264,11 @@ CodeEmitter::prepareEmission(BasicBlock *bb) for (i = bb->getEntry(); i; i = next) { next = i->next; + if (i->op == OP_MEMBAR && !targ->isOpSupported(OP_MEMBAR, TYPE_NONE)) { + bb->remove(i); + continue; + } + i->encSize = getMinEncodingSize(i); if (next && i->encSize < 8) ++nShort; diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp index db09cb3dbae..dfb1173b367 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp @@ -394,6 +394,7 @@ TargetNV50::isOpSupported(operation op, DataType ty) const case OP_INSBF: case OP_EXTBF: case OP_EXIT: // want exit modifier instead (on NOP if required) + case OP_MEMBAR: return false; case OP_SAD: return ty == TYPE_S32; diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp index 66e971558ee..1013b50766d 100644 --- a/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp @@ -79,6 +79,7 @@ private: void emitSTORE(const Instruction *); void emitMOV(const Instruction *); void emitATOM(const Instruction *); + void emitMEMBAR(const Instruction *); void emitINTERP(const Instruction *); void emitPFETCH(const Instruction *); @@ -121,6 +122,7 @@ private: void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask); void emitFlow(const Instruction *); + void emitBAR(const Instruction *); void emitSUCLAMPMode(uint16_t); void emitSUCalc(Instruction *); @@ -1289,6 +1291,78 @@ CodeEmitterNVC0::emitFlow(const Instruction *i) } } +void +CodeEmitterNVC0::emitBAR(const Instruction *i) +{ + Value *rDef = NULL, *pDef = NULL; + + switch (i->subOp) { + case NV50_IR_SUBOP_BAR_ARRIVE: code[0] = 0x84; break; + case NV50_IR_SUBOP_BAR_RED_AND: code[0] = 0x24; break; + case NV50_IR_SUBOP_BAR_RED_OR: code[0] = 0x44; break; + case NV50_IR_SUBOP_BAR_RED_POPC: code[0] = 0x04; break; + default: + code[0] = 0x04; + assert(i->subOp == NV50_IR_SUBOP_BAR_SYNC); + break; + } + code[1] = 0x50000000; + + code[0] |= 63 << 14; + code[1] |= 7 << 21; + + emitPredicate(i); + + // barrier id + if (i->src(0).getFile() == FILE_GPR) { + srcId(i->src(0), 20); + } else { + ImmediateValue *imm = i->getSrc(0)->asImm(); + assert(imm); + code[0] |= imm->reg.data.u32 << 20; + } + + // thread count + if (i->src(1).getFile() == FILE_GPR) { + srcId(i->src(1), 26); + } else { + ImmediateValue *imm = i->getSrc(1)->asImm(); + assert(imm); + code[0] |= imm->reg.data.u32 << 26; + code[1] |= imm->reg.data.u32 >> 6; + } + + if (i->srcExists(2) && (i->predSrc != 2)) { + srcId(i->src(2), 32 + 17); + if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) + code[1] |= 1 << 20; + } else { + code[1] |= 7 << 17; + } + + if (i->defExists(0)) { + if (i->def(0).getFile() == FILE_GPR) + rDef = i->getDef(0); + else + pDef = i->getDef(0); + + if (i->defExists(1)) { + if (i->def(1).getFile() == FILE_GPR) + rDef = i->getDef(1); + else + pDef = i->getDef(1); + } + } + if (rDef) { + code[0] &= ~(63 << 14); + defId(rDef, 14); + } + if (pDef) { + code[1] &= ~(7 << 21); + defId(pDef, 32 + 21); + } +} + void CodeEmitterNVC0::emitPFETCH(const Instruction *i) { @@ -1753,6 +1827,22 @@ CodeEmitterNVC0::emitATOM(const Instruction *i) srcId(i->src(2), 32 + 17); } +void +CodeEmitterNVC0::emitMEMBAR(const Instruction *i) +{ + switch (NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp)) { + case NV50_IR_SUBOP_MEMBAR_CTA: code[0] = 0x05; break; + case NV50_IR_SUBOP_MEMBAR_GL: code[0] = 0x25; break; + default: + code[0] = 0x45; + assert(NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp) == NV50_IR_SUBOP_MEMBAR_SYS); + break; + } + code[1] = 0xe0000000; + + emitPredicate(i); +} + void CodeEmitterNVC0::emitSUCLAMPMode(uint16_t subOp) { @@ -2210,6 +2300,12 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn) emitNOP(insn); insn->join = 1; break; + case OP_BAR: + emitBAR(insn); + break; + case OP_MEMBAR: + emitMEMBAR(insn); + break; case OP_VSHL: emitVSHL(insn); break; -- 2.30.2