From: Ilia Mirkin Date: Wed, 19 Aug 2015 01:09:12 +0000 (-0400) Subject: nvc0/ir: detect i2f/i2i which operate on specific bytes/words X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=63cb85e567ad1025ee990b38f43c2f1ef811821b;p=mesa.git nvc0/ir: detect i2f/i2i which operate on specific bytes/words Some Unigine shaders have been observed to unpack bytes out of 32-bit integers and convert them to floats. I2F/I2I can handle this sort of thing directly. Detect the handleable situations. This misses 16-bit word capabilities in nv50, but I haven't seen shaders that would actually make use of that. Signed-off-by: Ilia Mirkin --- diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index f06056f8f17..8f1542959c9 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -933,6 +933,7 @@ CodeEmitterGK110::emitCVT(const Instruction *i) code[0] |= typeSizeofLog2(dType) << 10; code[0] |= typeSizeofLog2(i->sType) << 12; + code[1] |= i->subOp << 12; if (isSignedIntType(dType)) code[0] |= 0x4000; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index ef5c87d0437..6e22788341f 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp @@ -818,6 +818,7 @@ CodeEmitterGM107::emitI2F() emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs()); emitCC (0x2f); emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg()); + emitField(0x29, 2, insn->subOp); emitRND (0x27, rnd, -1); emitField(0x0d, 1, isSignedType(insn->sType)); emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType))); @@ -850,6 +851,7 @@ CodeEmitterGM107::emitI2I() emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs()); emitCC (0x2f); emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg()); + emitField(0x29, 2, insn->subOp); emitField(0x0d, 1, isSignedType(insn->sType)); emitField(0x0c, 1, isSignedType(insn->dType)); emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType))); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index 5703712181c..6bf5219d346 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -1020,6 +1020,10 @@ CodeEmitterNVC0::emitCVT(Instruction *i) code[0] |= util_logbase2(typeSizeof(dType)) << 20; code[0] |= util_logbase2(typeSizeof(i->sType)) << 23; + // for 8/16 source types, the byte/word is in subOp. word 1 is + // represented as 2. + code[1] |= i->subOp << 0x17; + if (sat) code[0] |= 0x20; if (abs) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 83b884b72dd..ef286c0ab38 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -1238,7 +1238,8 @@ private: void handleRCP(Instruction *); void handleSLCT(Instruction *); void handleLOGOP(Instruction *); - void handleCVT(Instruction *); + void handleCVT_NEG(Instruction *); + void handleCVT_EXTBF(Instruction *); void handleSUCLAMP(Instruction *); BuildUtil bld; @@ -1489,12 +1490,12 @@ AlgebraicOpt::handleLOGOP(Instruction *logop) // nv50: // F2I(NEG(I2F(ABS(SET)))) void -AlgebraicOpt::handleCVT(Instruction *cvt) +AlgebraicOpt::handleCVT_NEG(Instruction *cvt) { + Instruction *insn = cvt->getSrc(0)->getInsn(); if (cvt->sType != TYPE_F32 || cvt->dType != TYPE_S32 || cvt->src(0).mod != Modifier(0)) return; - Instruction *insn = cvt->getSrc(0)->getInsn(); if (!insn || insn->op != OP_NEG || insn->dType != TYPE_F32) return; if (insn->src(0).mod != Modifier(0)) @@ -1524,6 +1525,74 @@ AlgebraicOpt::handleCVT(Instruction *cvt) delete_Instruction(prog, cvt); } +// Some shaders extract packed bytes out of words and convert them to +// e.g. float. The Fermi+ CVT instruction can extract those directly, as can +// nv50 for word sizes. +// +// CVT(EXTBF(x, byte/word)) +// CVT(AND(bytemask, x)) +// CVT(AND(bytemask, SHR(x, 8/16/24))) +void +AlgebraicOpt::handleCVT_EXTBF(Instruction *cvt) +{ + Instruction *insn = cvt->getSrc(0)->getInsn(); + ImmediateValue imm0, imm1; + Value *arg = NULL; + unsigned width, offset; + if ((cvt->sType != TYPE_U32 && cvt->sType != TYPE_S32) || !insn) + return; + if (insn->op == OP_EXTBF && insn->src(1).getImmediate(imm0)) { + width = (imm0.reg.data.u32 >> 8) & 0xff; + offset = imm0.reg.data.u32 & 0xff; + arg = insn->getSrc(0); + + if (width != 8 && width != 16) + return; + if (width == 8 && offset & 0x7) + return; + if (width == 16 && offset & 0xf) + return; + } else if (insn->op == OP_AND) { + int s; + if (insn->src(0).getImmediate(imm0)) + s = 0; + else if (insn->src(1).getImmediate(imm0)) + s = 1; + else + return; + + if (imm0.reg.data.u32 == 0xff) + width = 8; + else if (imm0.reg.data.u32 == 0xffff) + width = 16; + else + return; + + arg = insn->getSrc(!s); + Instruction *shift = arg->getInsn(); + offset = 0; + if (shift && shift->op == OP_SHR && + shift->src(1).getImmediate(imm1) && + ((width == 8 && (imm1.reg.data.u32 & 0x7) == 0) || + (width == 16 && (imm1.reg.data.u32 & 0xf) == 0))) { + arg = shift->getSrc(0); + offset = imm1.reg.data.u32; + } + } + + if (!arg) + return; + + if (width == 8) { + cvt->sType = cvt->sType == TYPE_U32 ? TYPE_U8 : TYPE_S8; + } else { + assert(width == 16); + cvt->sType = cvt->sType == TYPE_U32 ? TYPE_U16 : TYPE_S16; + } + cvt->setSrc(0, arg); + cvt->subOp = offset >> 3; +} + // SUCLAMP dst, (ADD b imm), k, 0 -> SUCLAMP dst, b, k, imm (if imm fits s6) void AlgebraicOpt::handleSUCLAMP(Instruction *insn) @@ -1594,7 +1663,9 @@ AlgebraicOpt::visit(BasicBlock *bb) handleLOGOP(i); break; case OP_CVT: - handleCVT(i); + handleCVT_NEG(i); + if (prog->getTarget()->isOpSupported(OP_EXTBF, TYPE_U32)) + handleCVT_EXTBF(i); break; case OP_SUCLAMP: handleSUCLAMP(i);