From: Ilia Mirkin Date: Fri, 24 Jul 2015 03:03:53 +0000 (-0400) Subject: nvc0/ir: kepler can't do indirect shader input/output loads directly X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=9d60793a03e40e1d139b78fce0144cad57438741;p=mesa.git nvc0/ir: kepler can't do indirect shader input/output loads directly There's a special AL2P instruction (called AFETCH in nv50 ir) which computes a "physical" value to be used with indirect addressing with ALD. Fixes tcs-input-array-*-index-rd tcs-output-array-*-index-wr varying-indexing tessellation tests on Kepler. Signed-off-by: Ilia Mirkin --- diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index 174dca49ec0..3ddaeafebbd 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -106,6 +106,7 @@ enum operation OP_MEMBAR, // memory barrier (mfence, lfence, sfence) OP_VFETCH, // indirection 0 in attribute space, indirection 1 is vertex base OP_PFETCH, // fetch base address of vertex src0 (immediate) [+ src1] + OP_AFETCH, // fetch base address of shader input (a[%r1+0x10]) OP_EXPORT, OP_LINTERP, OP_PINTERP, diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index 44d3a5efecf..f06056f8f17 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -77,6 +77,7 @@ private: void emitMOV(const Instruction *); void emitINTERP(const Instruction *); + void emitAFETCH(const Instruction *); void emitPFETCH(const Instruction *); void emitVFETCH(const Instruction *); void emitEXPORT(const Instruction *); @@ -1338,6 +1339,23 @@ CodeEmitterGK110::emitFlow(const Instruction *i) } } +void +CodeEmitterGK110::emitAFETCH(const Instruction *i) +{ + uint32_t offset = i->src(0).get()->reg.data.offset & 0x7ff; + + code[0] = 0x00000002 | (offset << 23); + code[1] = 0x7d000000 | (offset >> 9); + + if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT) + code[1] |= 0x8; + + emitPredicate(i); + + defId(i->def(0), 2); + srcId(i->src(0).getIndirect(0), 10); +} + void CodeEmitterGK110::emitPFETCH(const Instruction *i) { @@ -1707,6 +1725,9 @@ CodeEmitterGK110::emitInstruction(Instruction *insn) case OP_EXPORT: emitEXPORT(insn); break; + case OP_AFETCH: + emitAFETCH(insn); + break; case OP_PFETCH: emitPFETCH(insn); break; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index 65c1f23e101..ef5c87d0437 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp @@ -174,6 +174,7 @@ private: void emitALD(); void emitAST(); void emitISBERD(); + void emitAL2P(); void emitIPA(); void emitPIXLD(); @@ -2203,6 +2204,17 @@ CodeEmitterGM107::emitISBERD() emitGPR (0x00, insn->def(0)); } +void +CodeEmitterGM107::emitAL2P() +{ + emitInsn (0xefa00000); + emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1); + emitO (0x20); + emitField(0x14, 11, insn->src(0).get()->reg.data.offset); + emitGPR (0x08, insn->src(0).getIndirect(0)); + emitGPR (0x00, insn->def(0)); +} + void CodeEmitterGM107::emitIPA() { @@ -2759,6 +2771,9 @@ CodeEmitterGM107::emitInstruction(Instruction *i) case OP_PFETCH: emitISBERD(); break; + case OP_AFETCH: + emitAL2P(); + break; case OP_LINTERP: case OP_PINTERP: emitIPA(); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index 472e3a84119..3ed815bad39 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -85,6 +85,7 @@ private: void emitCCTL(const Instruction *); void emitINTERP(const Instruction *); + void emitAFETCH(const Instruction *); void emitPFETCH(const Instruction *); void emitVFETCH(const Instruction *); void emitEXPORT(const Instruction *); @@ -1493,6 +1494,21 @@ CodeEmitterNVC0::emitBAR(const Instruction *i) } } +void +CodeEmitterNVC0::emitAFETCH(const Instruction *i) +{ + code[0] = 0x00000006; + code[1] = 0x0c000000 | (i->src(0).get()->reg.data.offset & 0x7ff); + + if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT) + code[0] |= 0x200; + + emitPredicate(i); + + defId(i->def(0), 14); + srcId(i->src(0).getIndirect(0), 20); +} + void CodeEmitterNVC0::emitPFETCH(const Instruction *i) { diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index cd8ee71ad2b..710f53de1c4 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -1749,6 +1749,7 @@ NVC0LoweringPass::checkPredicate(Instruction *insn) bool NVC0LoweringPass::visit(Instruction *i) { + bool ret = true; bld.setPosition(i, false); if (i->cc != CC_ALWAYS) @@ -1780,7 +1781,8 @@ NVC0LoweringPass::visit(Instruction *i) case OP_SQRT: return handleSQRT(i); case OP_EXPORT: - return handleEXPORT(i); + ret = handleEXPORT(i); + break; case OP_EMIT: case OP_RESTART: return handleOUT(i); @@ -1843,7 +1845,20 @@ NVC0LoweringPass::visit(Instruction *i) default: break; } - return true; + + /* Kepler+ has a special opcode to compute a new base address to be used + * for indirect loads. + */ + if (targ->getChipset() >= NVISA_GK104_CHIPSET && !i->perPatch && + (i->op == OP_VFETCH || i->op == OP_EXPORT) && i->src(0).isIndirect(0)) { + Instruction *afetch = bld.mkOp1(OP_AFETCH, TYPE_U32, bld.getSSA(), + cloneShallow(func, i->getSrc(0))); + afetch->setIndirect(0, 0, i->getIndirect(0, 0)); + i->src(0).get()->reg.data.offset = 0; + i->setIndirect(0, 0, afetch->getDef(0)); + } + + return ret; } bool diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp index 556d01b864d..9ebdc6586db 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp @@ -135,6 +135,7 @@ const char *operationStr[OP_LAST + 1] = "membar", "vfetch", "pfetch", + "afetch", "export", "linterp", "pinterp", diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp index 7992f539782..fe530c76b62 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp @@ -41,7 +41,7 @@ const uint8_t Target::operationSrcNr[] = 0, 0, 0, 0, 0, // BRA, CALL, RET, CONT, BREAK, 0, 0, 0, // PRERET,CONT,BREAK 0, 0, 0, 0, 0, 0, // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR - 1, 1, 2, 1, 2, // VFETCH, PFETCH, EXPORT, LINTERP, PINTERP + 1, 1, 1, 2, 1, 2, // VFETCH, PFETCH, AFETCH, EXPORT, LINTERP, PINTERP 1, 1, // EMIT, RESTART 1, 1, 1, // TEX, TXB, TXL, 1, 1, 1, 1, 1, 1, 2, // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP @@ -96,8 +96,8 @@ const OpClass Target::operationClass[] = OPCLASS_FLOW, OPCLASS_FLOW, // MEMBAR OPCLASS_CONTROL, - // VFETCH, PFETCH, EXPORT - OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_STORE, + // VFETCH, PFETCH, AFETCH, EXPORT + OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_STORE, // LINTERP, PINTERP OPCLASS_SFU, OPCLASS_SFU, // EMIT, RESTART diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp index ca545a6024a..f3ddcaa5199 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp @@ -118,7 +118,7 @@ void TargetNV50::initOpInfo() static const uint32_t shortForm[(OP_LAST + 31) / 32] = { // MOV,ADD,SUB,MUL,MAD,SAD,L/PINTERP,RCP,TEX,TXF - 0x00014e40, 0x00000040, 0x00000498, 0x00000000 + 0x00014e40, 0x00000040, 0x00000930, 0x00000000 }; static const operation noDestList[] = {