nvc0/ir: detect i2f/i2i which operate on specific bytes/words

author Ilia Mirkin <imirkin@alum.mit.edu>

Wed, 19 Aug 2015 01:09:12 +0000 (21:09 -0400)

committer Ilia Mirkin <imirkin@alum.mit.edu>

Thu, 20 Aug 2015 21:58:30 +0000 (17:58 -0400)
author Ilia Mirkin <imirkin@alum.mit.edu>
Wed, 19 Aug 2015 01:09:12 +0000 (21:09 -0400)
committer Ilia Mirkin <imirkin@alum.mit.edu>
Thu, 20 Aug 2015 21:58:30 +0000 (17:58 -0400)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp

index f06056f8f17e707a1eb4483e5474f72d0c0a97c0..8f1542959c9c4a63561f5968631435285f0e4518 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -933,6 +933,7 @@ CodeEmitterGK110::emitCVT(const Instruction *i)
  
     code[0] |= typeSizeofLog2(dType) << 10;
     code[0] |= typeSizeofLog2(i->sType) << 12;
+   code[1] |= i->subOp << 12;
  
     if (isSignedIntType(dType))
        code[0] |= 0x4000;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp

index ef5c87d043720abe18c111ee74e8f4dc6fa1d080..6e22788341fdaa93c6c2c2d2b56ddec662485ac6 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -818,6 +818,7 @@ CodeEmitterGM107::emitI2F()
     emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
     emitCC   (0x2f);
     emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
+   emitField(0x29, 2, insn->subOp);
     emitRND  (0x27, rnd, -1);
     emitField(0x0d, 1, isSignedType(insn->sType));
     emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
@@ -850,6 +851,7 @@ CodeEmitterGM107::emitI2I()
     emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
     emitCC   (0x2f);
     emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
+   emitField(0x29, 2, insn->subOp);
     emitField(0x0d, 1, isSignedType(insn->sType));
     emitField(0x0c, 1, isSignedType(insn->dType));
     emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp

index 5703712181c3219f37851f2d0f1f56472c0b2ad8..6bf5219d346a9ef9ca1f01298bda02d7f7443998 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -1020,6 +1020,10 @@ CodeEmitterNVC0::emitCVT(Instruction *i)
        code[0] |= util_logbase2(typeSizeof(dType)) << 20;
        code[0] |= util_logbase2(typeSizeof(i->sType)) << 23;
  
+      // for 8/16 source types, the byte/word is in subOp. word 1 is
+      // represented as 2.
+      code[1] |= i->subOp << 0x17;
+
        if (sat)
           code[0] |= 0x20;
        if (abs)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp

index 83b884b72dd73b1bfa7583c369a218cc5de3dbcd..ef286c0ab384ff5c8fb23ac9c37c4a8796416f9c 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -1238,7 +1238,8 @@ private:
     void handleRCP(Instruction *);
     void handleSLCT(Instruction *);
     void handleLOGOP(Instruction *);
-   void handleCVT(Instruction *);
+   void handleCVT_NEG(Instruction *);
+   void handleCVT_EXTBF(Instruction *);
     void handleSUCLAMP(Instruction *);
  
     BuildUtil bld;
@@ -1489,12 +1490,12 @@ AlgebraicOpt::handleLOGOP(Instruction *logop)
  // nv50:
  //  F2I(NEG(I2F(ABS(SET))))
  void
-AlgebraicOpt::handleCVT(Instruction *cvt)
+AlgebraicOpt::handleCVT_NEG(Instruction *cvt)
  {
+   Instruction *insn = cvt->getSrc(0)->getInsn();
     if (cvt->sType != TYPE_F32 ||
         cvt->dType != TYPE_S32 || cvt->src(0).mod != Modifier(0))
        return;
-   Instruction *insn = cvt->getSrc(0)->getInsn();
     if (!insn || insn->op != OP_NEG || insn->dType != TYPE_F32)
        return;
     if (insn->src(0).mod != Modifier(0))
@@ -1524,6 +1525,74 @@ AlgebraicOpt::handleCVT(Instruction *cvt)
     delete_Instruction(prog, cvt);
  }
  
+// Some shaders extract packed bytes out of words and convert them to
+// e.g. float. The Fermi+ CVT instruction can extract those directly, as can
+// nv50 for word sizes.
+//
+// CVT(EXTBF(x, byte/word))
+// CVT(AND(bytemask, x))
+// CVT(AND(bytemask, SHR(x, 8/16/24)))
+void
+AlgebraicOpt::handleCVT_EXTBF(Instruction *cvt)
+{
+   Instruction *insn = cvt->getSrc(0)->getInsn();
+   ImmediateValue imm0, imm1;
+   Value *arg = NULL;
+   unsigned width, offset;
+   if ((cvt->sType != TYPE_U32 && cvt->sType != TYPE_S32) || !insn)
+      return;
+   if (insn->op == OP_EXTBF && insn->src(1).getImmediate(imm0)) {
+      width = (imm0.reg.data.u32 >> 8) & 0xff;
+      offset = imm0.reg.data.u32 & 0xff;
+      arg = insn->getSrc(0);
+
+      if (width != 8 && width != 16)
+         return;
+      if (width == 8 && offset & 0x7)
+         return;
+      if (width == 16 && offset & 0xf)
+         return;
+   } else if (insn->op == OP_AND) {
+      int s;
+      if (insn->src(0).getImmediate(imm0))
+         s = 0;
+      else if (insn->src(1).getImmediate(imm0))
+         s = 1;
+      else
+         return;
+
+      if (imm0.reg.data.u32 == 0xff)
+         width = 8;
+      else if (imm0.reg.data.u32 == 0xffff)
+         width = 16;
+      else
+         return;
+
+      arg = insn->getSrc(!s);
+      Instruction *shift = arg->getInsn();
+      offset = 0;
+      if (shift && shift->op == OP_SHR &&
+          shift->src(1).getImmediate(imm1) &&
+          ((width == 8 && (imm1.reg.data.u32 & 0x7) == 0) ||
+           (width == 16 && (imm1.reg.data.u32 & 0xf) == 0))) {
+         arg = shift->getSrc(0);
+         offset = imm1.reg.data.u32;
+      }
+   }
+
+   if (!arg)
+      return;
+
+   if (width == 8) {
+      cvt->sType = cvt->sType == TYPE_U32 ? TYPE_U8 : TYPE_S8;
+   } else {
+      assert(width == 16);
+      cvt->sType = cvt->sType == TYPE_U32 ? TYPE_U16 : TYPE_S16;
+   }
+   cvt->setSrc(0, arg);
+   cvt->subOp = offset >> 3;
+}
+
  // SUCLAMP dst, (ADD b imm), k, 0 -> SUCLAMP dst, b, k, imm (if imm fits s6)
  void
  AlgebraicOpt::handleSUCLAMP(Instruction *insn)
@@ -1594,7 +1663,9 @@ AlgebraicOpt::visit(BasicBlock *bb)
           handleLOGOP(i);
           break;
        case OP_CVT:
-         handleCVT(i);
+         handleCVT_NEG(i);
+         if (prog->getTarget()->isOpSupported(OP_EXTBF, TYPE_U32))
+             handleCVT_EXTBF(i);
           break;
        case OP_SUCLAMP:
           handleSUCLAMP(i);
author	Ilia Mirkin <imirkin@alum.mit.edu>
	Wed, 19 Aug 2015 01:09:12 +0000 (21:09 -0400)
committer	Ilia Mirkin <imirkin@alum.mit.edu>
	Thu, 20 Aug 2015 21:58:30 +0000 (17:58 -0400)
src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp		patch \| blob \| history
src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp		patch \| blob \| history
src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp		patch \| blob \| history
src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp		patch \| blob \| history