nv50/ir/ra: prefer def == src2 for fma with immediates on nvc0
authorKarol Herbst <karolherbst@gmail.com>
Tue, 27 Mar 2018 17:10:34 +0000 (19:10 +0200)
committerIlia Mirkin <imirkin@alum.mit.edu>
Sat, 21 Apr 2018 14:53:59 +0000 (10:53 -0400)
This helps with the PostRALoadPropagation pass moving long immediates into
FMA/MAD instructions.

changes in shader-db:
total instructions in shared programs : 5894114 -> 5886074 (-0.14%)
total gprs used in shared programs    : 666558 -> 666563 (0.00%)
total shared used in shared programs  : 520416 -> 520416 (0.00%)
total local used in shared programs   : 53524 -> 53524 (0.00%)
total bytes used in shared programs   : 54006744 -> 53932472 (-0.14%)

                local     shared        gpr       inst      bytes
    helped           0           0           2        4192        4192
      hurt           0           0           7           9           9

Signed-off-by: Karol Herbst <karolherbst@gmail.com>
[imirkin: minor edits to separate nv50 and nvc0+ cases]
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp

index 3a0e56e1385d3b1fea8c47bcd4af6fc2e69b0b56..3953475c0ac7c210d2456715823f02bc62b3fa2b 100644 (file)
@@ -1466,17 +1466,36 @@ GCRA::allocateRegisters(ArrayList& insns)
          nodes[i].init(regs, lval);
          RIG.insert(&nodes[i]);
 
-         if (lval->inFile(FILE_GPR) && lval->getInsn() != NULL &&
-             prog->getTarget()->getChipset() < 0xc0) {
+         if (lval->inFile(FILE_GPR) && lval->getInsn() != NULL) {
             Instruction *insn = lval->getInsn();
-            if (insn->op == OP_MAD || insn->op == OP_FMA || insn->op == OP_SAD)
-               // Short encoding only possible if they're all GPRs, no need to
-               // affect them otherwise.
-               if (insn->flagsDef < 0 &&
-                   insn->src(0).getFile() == FILE_GPR &&
-                   insn->src(1).getFile() == FILE_GPR &&
-                   insn->src(2).getFile() == FILE_GPR)
-                  nodes[i].addRegPreference(getNode(insn->getSrc(2)->asLValue()));
+            if (insn->op != OP_MAD && insn->op != OP_FMA && insn->op != OP_SAD)
+               continue;
+            // For both of the cases below, we only want to add the preference
+            // if all arguments are in registers.
+            if (insn->src(0).getFile() != FILE_GPR ||
+                insn->src(1).getFile() != FILE_GPR ||
+                insn->src(2).getFile() != FILE_GPR)
+               continue;
+            if (prog->getTarget()->getChipset() < 0xc0) {
+               // Outputting a flag is not supported with short encodings nor
+               // with immediate arguments.
+               // See handleMADforNV50.
+               if (insn->flagsDef >= 0)
+                  continue;
+            } else {
+               // We can only fold immediate arguments if dst == src2. This
+               // only matters if one of the first two arguments is an
+               // immediate. This form is also only supported for floats.
+               // See handleMADforNVC0.
+               ImmediateValue imm;
+               if (insn->dType != TYPE_F32)
+                  continue;
+               if (!insn->src(0).getImmediate(imm) &&
+                   !insn->src(1).getImmediate(imm))
+                  continue;
+            }
+
+            nodes[i].addRegPreference(getNode(insn->getSrc(2)->asLValue()));
          }
       }
    }