nv50/ir: add preliminary support for OP_XMAD

author Rhys Perry <pendingchaos02@gmail.com>

Wed, 13 Jun 2018 15:21:20 +0000 (16:21 +0100)

committer Rhys Perry <pendingchaos02@gmail.com>

Mon, 27 Aug 2018 12:56:36 +0000 (13:56 +0100)
author Rhys Perry <pendingchaos02@gmail.com>
Wed, 13 Jun 2018 15:21:20 +0000 (16:21 +0100)
committer Rhys Perry <pendingchaos02@gmail.com>
Mon, 27 Aug 2018 12:56:36 +0000 (13:56 +0100)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h

index 0b220cc48de99856a280118e0852f1e36698161a..d5c9570a56b9d23939f6031d8162fd3e97c50ef0 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -58,6 +58,9 @@ enum operation
     OP_FMA,
     OP_SAD, // abs(src0 - src1) + src2
     OP_SHLADD,
+   // extended multiply-add (GM107+), does a lot of things.
+   // see envytools for detailed documentation
+   OP_XMAD,
     OP_ABS,
     OP_NEG,
     OP_NOT,
@@ -256,6 +259,29 @@ enum operation
  #define NV50_IR_SUBOP_MINMAX_MED  2
  #define NV50_IR_SUBOP_MINMAX_HIGH 3
  
+// xmad(src0, src1, 0) << 16 + src2
+#define NV50_IR_SUBOP_XMAD_PSL (1 << 0)
+// (xmad(src0, src1, src2) & 0xffff) | (src1 << 16)
+#define NV50_IR_SUBOP_XMAD_MRG (1 << 1)
+// xmad(src0, src1, src2.lo)
+#define NV50_IR_SUBOP_XMAD_CLO (1 << 2)
+// xmad(src0, src1, src2.hi)
+#define NV50_IR_SUBOP_XMAD_CHI (2 << 2)
+// if both operands to the multiplication are non-zero, subtract 65536 for each
+// negative operand
+#define NV50_IR_SUBOP_XMAD_CSFU (3 << 2)
+// xmad(src0, src1, src2) + src1 << 16
+#define NV50_IR_SUBOP_XMAD_CBCC (4 << 2)
+#define NV50_IR_SUBOP_XMAD_CMODE_SHIFT 2
+#define NV50_IR_SUBOP_XMAD_CMODE_MASK (0x7 << NV50_IR_SUBOP_XMAD_CMODE_SHIFT)
+
+// use the high 16 bits instead of the low 16 bits for the multiplication.
+// if the instruction's sType is signed, sign extend the operand from 16 bits
+// to 32 before multiplication.
+#define NV50_IR_SUBOP_XMAD_H1_SHIFT 5
+#define NV50_IR_SUBOP_XMAD_H1(i) (1 << (NV50_IR_SUBOP_XMAD_H1_SHIFT + (i)))
+#define NV50_IR_SUBOP_XMAD_H1_MASK (0x3 << NV50_IR_SUBOP_XMAD_H1_SHIFT)
+
  enum DataType
  {
     TYPE_NONE,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp

index 2f7cc206b8429bf0b174244594e855349f21bc4f..5b4a98d25cb7532968ba15a6153bee8c7e1c85e5 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -191,9 +191,17 @@ void
  LoadPropagation::checkSwapSrc01(Instruction *insn)
  {
     const Target *targ = prog->getTarget();
-   if (!targ->getOpInfo(insn).commutative)
-      if (insn->op != OP_SET && insn->op != OP_SLCT && insn->op != OP_SUB)
+   if (!targ->getOpInfo(insn).commutative) {
+      if (insn->op != OP_SET && insn->op != OP_SLCT &&
+          insn->op != OP_SUB && insn->op != OP_XMAD)
           return;
+      // XMAD is only commutative if both the CBCC and MRG flags are not set.
+      if (insn->op == OP_XMAD &&
+          (insn->subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK) == NV50_IR_SUBOP_XMAD_CBCC)
+         return;
+      if (insn->op == OP_XMAD && (insn->subOp & NV50_IR_SUBOP_XMAD_MRG))
+         return;
+   }
     if (insn->src(1).getFile() != FILE_GPR)
        return;
     // This is the special OP_SET used for alphatesting, we can't reverse its
@@ -236,6 +244,12 @@ LoadPropagation::checkSwapSrc01(Instruction *insn)
     if (insn->op == OP_SUB) {
        insn->src(0).mod = insn->src(0).mod ^ Modifier(NV50_IR_MOD_NEG);
        insn->src(1).mod = insn->src(1).mod ^ Modifier(NV50_IR_MOD_NEG);
+   } else
+   if (insn->op == OP_XMAD) {
+      // swap h1 flags
+      uint16_t h1 = (insn->subOp >> 1 & NV50_IR_SUBOP_XMAD_H1(0)) |
+                    (insn->subOp << 1 & NV50_IR_SUBOP_XMAD_H1(1));
+      insn->subOp = (insn->subOp & ~NV50_IR_SUBOP_XMAD_H1_MASK) | h1;
     }
  }
  
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp

index ee3506fbaee09a0db9908936a008f82e0521b746..7eab8b8d70d7d4f45326e586d2ada16806b1e066 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -86,6 +86,7 @@ const char *operationStr[OP_LAST + 1] =
     "fma",
     "sad",
     "shladd",
+   "xmad",
     "abs",
     "neg",
     "not",
@@ -240,6 +241,11 @@ static const char *barOpStr[] =
     "sync", "arrive", "red and", "red or", "red popc"
  };
  
+static const char *xmadOpCModeStr[] =
+{
+   "clo", "chi", "csfu", "cbcc"
+};
+
  static const char *DataTypeStr[] =
  {
     "-",
@@ -625,6 +631,19 @@ void Instruction::print() const
           if (subOp < ARRAY_SIZE(barOpStr))
              PRINT("%s ", barOpStr[subOp]);
           break;
+      case OP_XMAD: {
+         if (subOp & NV50_IR_SUBOP_XMAD_PSL)
+            PRINT("psl ");
+         if (subOp & NV50_IR_SUBOP_XMAD_MRG)
+            PRINT("mrg ");
+         unsigned cmode = (subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK);
+         cmode >>= NV50_IR_SUBOP_XMAD_CMODE_SHIFT;
+         if (cmode && cmode <= ARRAY_SIZE(xmadOpCModeStr))
+            PRINT("%s ", xmadOpCModeStr[cmode - 1]);
+         for (int i = 0; i < 2; i++)
+            PRINT("h%d ", (subOp & NV50_IR_SUBOP_XMAD_H1(i)) ? 1 : 0);
+         break;
+      }
        default:
           if (subOp)
              PRINT("(SUBOP:%u) ", subOp);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp

index 298e7c6ef9b14a664b585e9aff5c41ba88ffbdca..9193a01f189874a7fb384529a4f34fbc9148a452 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
@@ -30,7 +30,8 @@ const uint8_t Target::operationSrcNr[] =
     0, 0,                   // NOP, PHI
     0, 0, 0, 0,             // UNION, SPLIT, MERGE, CONSTRAINT
     1, 1, 2,                // MOV, LOAD, STORE
-   2, 2, 2, 2, 2, 3, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD, SHLADD
+   2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
+   3, 3,                   // SHLADD, XMAD
     1, 1, 1,                // ABS, NEG, NOT
     2, 2, 2, 2, 2,          // AND, OR, XOR, SHL, SHR
     2, 2, 1,                // MAX, MIN, SAT
@@ -70,10 +71,10 @@ const OpClass Target::operationClass[] =
     OPCLASS_MOVE,
     OPCLASS_LOAD,
     OPCLASS_STORE,
-   // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD
+   // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD, XMAD
     OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
     OPCLASS_ARITH, OPCLASS_ARITH,
-   OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
+   OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
     // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR
     OPCLASS_CONVERT, OPCLASS_CONVERT,
     OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp

index c25e6da024d9c3caf7e18de47b6465b5fbb7d898..2dd12322a89fb3242c38eda706728781f39b2ff5 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
@@ -59,6 +59,7 @@ TargetGM107::isOpSupported(operation op, DataType ty) const
     case OP_POW:
     case OP_DIV:
     case OP_MOD:
+   case OP_XMAD:
        return false;
     case OP_SQRT:
        if (ty == TYPE_F64)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp

index 1ad3467337c4300af13248baeb9ba1a6e63cc1f0..29814973408150c367b986726b1d7f2f0bda2b61 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
@@ -443,6 +443,7 @@ TargetNV50::isOpSupported(operation op, DataType ty) const
     case OP_EXIT: // want exit modifier instead (on NOP if required)
     case OP_MEMBAR:
     case OP_SHLADD:
+   case OP_XMAD:
        return false;
     case OP_SAD:
        return ty == TYPE_S32;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp

index 9304e3923617ba8c5b3957019d0c992d816a9aea..8e040695363f2c049625ba3708d59764dfa1e552 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
@@ -357,6 +357,18 @@ TargetNVC0::insnCanLoad(const Instruction *i, int s,
     if ((i->op == OP_SHL || i->op == OP_SHR) && typeSizeof(i->sType) == 8 &&
         sf == FILE_MEMORY_CONST)
        return false;
+   // constant buffer loads can't be used with cbcc xmads
+   if (i->op == OP_XMAD && sf == FILE_MEMORY_CONST &&
+       (i->subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK) == NV50_IR_SUBOP_XMAD_CBCC)
+      return false;
+   // constant buffer loads for the third operand can't be used with psl/mrg xmads
+   if (i->op == OP_XMAD && sf == FILE_MEMORY_CONST && s == 2 &&
+       (i->subOp & (NV50_IR_SUBOP_XMAD_PSL | NV50_IR_SUBOP_XMAD_MRG)))
+      return false;
+   // for xmads, immediates can't have the h1 flag set
+   if (i->op == OP_XMAD && sf == FILE_IMMEDIATE && s < 2 &&
+       i->subOp & NV50_IR_SUBOP_XMAD_H1(s))
+      return false;
  
     for (int k = 0; i->srcExists(k); ++k) {
        if (i->src(k).getFile() == FILE_IMMEDIATE) {
@@ -393,6 +405,9 @@ TargetNVC0::insnCanLoad(const Instruction *i, int s,
              // with u32, 0xfffff counts as 0xffffffff as well
              if (reg.data.s32 > 0x7ffff || reg.data.s32 < -0x80000)
                 return false;
+            // XMADs can only have 16-bit immediates
+            if (i->op == OP_XMAD && reg.data.u32 > 0xffff)
+               return false;
              break;
           case TYPE_U8:
           case TYPE_S8:
@@ -449,6 +464,8 @@ TargetNVC0::isOpSupported(operation op, DataType ty) const
        return false;
     if (op == OP_POW || op == OP_SQRT || op == OP_DIV || op == OP_MOD)
        return false;
+   if (op == OP_XMAD)
+      return false;
     return true;
  }
  
@@ -468,6 +485,7 @@ TargetNVC0::isModSupported(const Instruction *insn, int s, Modifier mod) const
        case OP_XOR:
        case OP_POPCNT:
        case OP_BFIND:
+      case OP_XMAD:
           break;
        case OP_SET:
           if (insn->sType != TYPE_F32)
author	Rhys Perry <pendingchaos02@gmail.com>
	Wed, 13 Jun 2018 15:21:20 +0000 (16:21 +0100)
committer	Rhys Perry <pendingchaos02@gmail.com>
	Mon, 27 Aug 2018 12:56:36 +0000 (13:56 +0100)
src/gallium/drivers/nouveau/codegen/nv50_ir.h		patch \| blob \| history
src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp		patch \| blob \| history
src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp		patch \| blob \| history
src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp		patch \| blob \| history
src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp		patch \| blob \| history
src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp		patch \| blob \| history
src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp		patch \| blob \| history