From 85132c7453230960f34cfe7b7b7fcaaab158d79f Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 14 Sep 2016 16:42:45 +0200 Subject: [PATCH] nv50/ir: add preliminary support for SHLADD This instruction is available since SM20 (Fermi) and allow to do (a << b) + c in one shot. In some situations, IMAD should be replaced by SHLADD when b is a power of 2, and ADD+SHL should be replaced by SHLADD as well. Signed-off-by: Samuel Pitoiset Reviewed-by: Ilia Mirkin --- src/gallium/drivers/nouveau/codegen/nv50_ir.h | 1 + src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp | 1 + .../drivers/nouveau/codegen/nv50_ir_target.cpp | 6 +++--- .../drivers/nouveau/codegen/nv50_ir_target_nv50.cpp | 5 +++-- .../drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp | 11 +++++++++-- 5 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index d6011d9c223..bedbdcc4cd2 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -57,6 +57,7 @@ enum operation OP_MAD, OP_FMA, OP_SAD, // abs(src0 - src1) + src2 + OP_SHLADD, OP_ABS, OP_NEG, OP_NOT, diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp index 22f2f5def71..dbd0f7d1fd3 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp @@ -86,6 +86,7 @@ const char *operationStr[OP_LAST + 1] = "mad", "fma", "sad", + "shladd", "abs", "neg", "not", diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp index 7d7b3158951..273ec34fd3b 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp @@ -30,7 +30,7 @@ const uint8_t Target::operationSrcNr[] = 0, 0, // NOP, PHI 0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT 1, 1, 2, // MOV, LOAD, STORE - 2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD + 2, 2, 2, 2, 2, 3, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD, SHLADD 1, 1, 1, // ABS, NEG, NOT 2, 2, 2, 2, 2, // AND, OR, XOR, SHL, SHR 2, 2, 1, // MAX, MIN, SAT @@ -70,10 +70,10 @@ const OpClass Target::operationClass[] = OPCLASS_MOVE, OPCLASS_LOAD, OPCLASS_STORE, - // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD + // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, - OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, + OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp index 1246cc65bca..83b4102b0ab 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp @@ -115,12 +115,12 @@ void TargetNV50::initOpInfo() { // ADD, MUL, MAD, FMA, AND, OR, XOR, MAX, MIN, SET_AND, SET_OR, SET_XOR, // SET, SELP, SLCT - 0x0670ca00, 0x0000003f, 0x00000000, 0x00000000 + 0x0ce0ca00, 0x0000007e, 0x00000000, 0x00000000 }; static const uint32_t shortForm[(OP_LAST + 31) / 32] = { // MOV, ADD, SUB, MUL, MAD, SAD, RCP, L/PINTERP, TEX, TXF - 0x00014e40, 0x00000040, 0x00000930, 0x00000000 + 0x00014e40, 0x00000080, 0x00001260, 0x00000000 }; static const operation noDestList[] = { @@ -438,6 +438,7 @@ TargetNV50::isOpSupported(operation op, DataType ty) const case OP_EXTBF: case OP_EXIT: // want exit modifier instead (on NOP if required) case OP_MEMBAR: + case OP_SHLADD: return false; case OP_SAD: return ty == TYPE_S32; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp index f75e395265f..86060653993 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp @@ -105,6 +105,7 @@ static const struct opProperties _initProps[] = { OP_MAX, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 }, { OP_MIN, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 }, { OP_MAD, 0x7, 0x0, 0x0, 0x8, 0x6, 0x2 | 0x8 }, // special c[] constraint + { OP_SHLADD, 0x5, 0x0, 0x0, 0x0, 0x4, 0x6 }, { OP_MADSP, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 }, { OP_ABS, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0 }, { OP_NEG, 0x0, 0x1, 0x0, 0x0, 0x1, 0x0 }, @@ -158,13 +159,13 @@ void TargetNVC0::initOpInfo() { // ADD, MUL, MAD, FMA, AND, OR, XOR, MAX, MIN, SET_AND, SET_OR, SET_XOR, // SET, SELP, SLCT - 0x0670ca00, 0x0000003f, 0x00000000, 0x00000000 + 0x0ce0ca00, 0x0000007e, 0x00000000, 0x00000000 }; static const uint32_t shortForm[(OP_LAST + 31) / 32] = { // ADD, MUL, MAD, FMA, AND, OR, XOR, MAX, MIN - 0x0670ca00, 0x00000000, 0x00000000, 0x00000000 + 0x0ce0ca00, 0x00000000, 0x00000000, 0x00000000 }; static const operation noDest[] = @@ -451,6 +452,12 @@ TargetNVC0::isModSupported(const Instruction *insn, int s, Modifier mod) const if (s == 0) return insn->src(1).mod.neg() ? false : true; break; + case OP_SHLADD: + if (s == 1) + return false; + if (insn->src(s ? 0 : 2).mod.neg()) + return false; + break; default: return false; } -- 2.30.2