From ad638514e362d0e84d28df2346a53fb9b29ff884 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Sun, 26 Mar 2017 21:45:59 +0200 Subject: [PATCH] gk110/ir: add LIMM form of mad v2: renamed commit reordered modifiers add assert(dst == src2) v3: removed wrong neg mod emission Signed-off-by: Karol Herbst Reviewed-by: Ilia Mirkin --- .../nouveau/codegen/nv50_ir_emit_gk110.cpp | 50 ++++++++++++------- .../nouveau/codegen/nv50_ir_peephole.cpp | 2 +- 2 files changed, 34 insertions(+), 18 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index 4210321ae13..1121ae09123 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -47,7 +47,7 @@ private: private: void emitForm_21(const Instruction *, uint32_t opc2, uint32_t opc1); void emitForm_C(const Instruction *, uint32_t opc, uint8_t ctg); - void emitForm_L(const Instruction *, uint32_t opc, uint8_t ctg, Modifier); + void emitForm_L(const Instruction *, uint32_t opc, uint8_t ctg, Modifier, int sCount = 3); void emitPredicate(const Instruction *); @@ -365,7 +365,7 @@ CodeEmitterGK110::setImmediate32(const Instruction *i, const int s, void CodeEmitterGK110::emitForm_L(const Instruction *i, uint32_t opc, uint8_t ctg, - Modifier mod) + Modifier mod, int sCount) { code[0] = ctg; code[1] = opc << 20; @@ -374,7 +374,7 @@ CodeEmitterGK110::emitForm_L(const Instruction *i, uint32_t opc, uint8_t ctg, defId(i->def(0), 2); - for (int s = 0; s < 3 && i->srcExists(s); ++s) { + for (int s = 0; s < sCount && i->srcExists(s); ++s) { switch (i->src(s).getFile()) { case FILE_GPR: srcId(i->src(s), s ? 42 : 10); @@ -487,25 +487,41 @@ CodeEmitterGK110::emitNOP(const Instruction *i) void CodeEmitterGK110::emitFMAD(const Instruction *i) { - assert(!isLIMM(i->src(1), TYPE_F32)); + bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg(); - emitForm_21(i, 0x0c0, 0x940); + if (isLIMM(i->src(1), TYPE_F32)) { + assert(i->getDef(0)->reg.data.id == i->getSrc(2)->reg.data.id); - NEG_(34, 2); - SAT_(35); - RND_(36, F); - FTZ_(38); - DNZ_(39); + // last source is dst, so force 2 sources + emitForm_L(i, 0x600, 0x0, 0, 2); - bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg(); + if (i->flagsDef >= 0) + code[1] |= 1 << 23; - if (code[0] & 0x1) { - if (neg1) - code[1] ^= 1 << 27; - } else - if (neg1) { - code[1] |= 1 << 19; + SAT_(3a); + NEG_(3c, 2); + + if (neg1) { + code[1] |= 1 << 27; + } + } else { + emitForm_21(i, 0x0c0, 0x940); + + NEG_(34, 2); + SAT_(35); + RND_(36, F); + + if (code[0] & 0x1) { + if (neg1) + code[1] ^= 1 << 27; + } else + if (neg1) { + code[1] |= 1 << 19; + } } + + FTZ_(38); + DNZ_(39); } void diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index f7293b273ee..df0f35949d6 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -3741,7 +3741,7 @@ bool Program::optimizePostRA(int level) { RUN_PASS(2, FlatteningPass, run); - if (getTarget()->getChipset() < NVISA_GK20A_CHIPSET) + if (getTarget()->getChipset() < NVISA_GM107_CHIPSET) RUN_PASS(2, PostRaLoadPropagation, run); return true; -- 2.30.2