X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fnouveau%2Fcodegen%2Fnv50_ir_emit_gk110.cpp;h=2118c3153f7f0ceffbf3ab7ab59ec76ee27b27d5;hb=bab4f6c724d384cfee7e7f98ff3b52648850641d;hp=27d9b8e16895591ea3d4129b9933e1452ed47b0d;hpb=df2881381ac67c42aa8ec9e0ed28f21a1d253785;p=mesa.git diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index 27d9b8e1689..2118c3153f7 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -47,7 +47,7 @@ private: private: void emitForm_21(const Instruction *, uint32_t opc2, uint32_t opc1); void emitForm_C(const Instruction *, uint32_t opc, uint8_t ctg); - void emitForm_L(const Instruction *, uint32_t opc, uint8_t ctg, Modifier); + void emitForm_L(const Instruction *, uint32_t opc, uint8_t ctg, Modifier, int sCount = 3); void emitPredicate(const Instruction *); @@ -96,6 +96,7 @@ private: void emitDMUL(const Instruction *); void emitIMAD(const Instruction *); void emitISAD(const Instruction *); + void emitSHLADD(const Instruction *); void emitFMAD(const Instruction *); void emitDMAD(const Instruction *); void emitMADSP(const Instruction *i); @@ -108,6 +109,7 @@ private: void emitBFIND(const Instruction *); void emitPERMT(const Instruction *); void emitShift(const Instruction *); + void emitShift64(const Instruction *); void emitSFnOp(const Instruction *, uint8_t subOp); @@ -133,6 +135,8 @@ private: void emitFlow(const Instruction *); + void emitSHFL(const Instruction *); + void emitVOTE(const Instruction *); void emitSULDGB(const TexInstruction *); @@ -196,14 +200,18 @@ void CodeEmitterGK110::srcAddr32(const ValueRef& src, const int pos) void CodeEmitterGK110::defId(const ValueDef& def, const int pos) { - code[pos / 32] |= (def.get() ? DDATA(def).id : GK110_GPR_ZERO) << (pos % 32); + code[pos / 32] |= (def.get() && def.getFile() != FILE_FLAGS ? DDATA(def).id : GK110_GPR_ZERO) << (pos % 32); } bool CodeEmitterGK110::isLIMM(const ValueRef& ref, DataType ty, bool mod) { const ImmediateValue *imm = ref.get()->asImm(); - return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000)); + if (ty == TYPE_F32) + return imm && imm->reg.data.u32 & 0xfff; + else + return imm && (imm->reg.data.s32 > 0x7ffff || + imm->reg.data.s32 < -0x80000); } void @@ -338,7 +346,7 @@ CodeEmitterGK110::setShortImmediate(const Instruction *i, const int s) code[1] |= ((u64 & 0x7fe0000000000000ULL) >> 53); code[1] |= ((u64 & 0x8000000000000000ULL) >> 36); } else { - assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000); + assert((u32 & 0xfff80000) == 0 || (u32 & 0xfff80000) == 0xfff80000); code[0] |= (u32 & 0x001ff) << 23; code[1] |= (u32 & 0x7fe00) >> 9; code[1] |= (u32 & 0x80000) << 8; @@ -363,7 +371,7 @@ CodeEmitterGK110::setImmediate32(const Instruction *i, const int s, void CodeEmitterGK110::emitForm_L(const Instruction *i, uint32_t opc, uint8_t ctg, - Modifier mod) + Modifier mod, int sCount) { code[0] = ctg; code[1] = opc << 20; @@ -372,7 +380,7 @@ CodeEmitterGK110::emitForm_L(const Instruction *i, uint32_t opc, uint8_t ctg, defId(i->def(0), 2); - for (int s = 0; s < 3 && i->srcExists(s); ++s) { + for (int s = 0; s < sCount && i->srcExists(s); ++s) { switch (i->src(s).getFile()) { case FILE_GPR: srcId(i->src(s), s ? 42 : 10); @@ -485,25 +493,41 @@ CodeEmitterGK110::emitNOP(const Instruction *i) void CodeEmitterGK110::emitFMAD(const Instruction *i) { - assert(!isLIMM(i->src(1), TYPE_F32)); + bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg(); - emitForm_21(i, 0x0c0, 0x940); + if (isLIMM(i->src(1), TYPE_F32)) { + assert(i->getDef(0)->reg.data.id == i->getSrc(2)->reg.data.id); - NEG_(34, 2); - SAT_(35); - RND_(36, F); - FTZ_(38); - DNZ_(39); + // last source is dst, so force 2 sources + emitForm_L(i, 0x600, 0x0, 0, 2); - bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg(); + if (i->flagsDef >= 0) + code[1] |= 1 << 23; - if (code[0] & 0x1) { - if (neg1) - code[1] ^= 1 << 27; - } else - if (neg1) { - code[1] |= 1 << 19; + SAT_(3a); + NEG_(3c, 2); + + if (neg1) { + code[1] |= 1 << 27; + } + } else { + emitForm_21(i, 0x0c0, 0x940); + + NEG_(34, 2); + SAT_(35); + RND_(36, F); + + if (code[0] & 0x1) { + if (neg1) + code[1] ^= 1 << 27; + } else + if (neg1) { + code[1] |= 1 << 19; + } } + + FTZ_(38); + DNZ_(39); } void @@ -613,7 +637,7 @@ CodeEmitterGK110::emitIMUL(const Instruction *i) assert(!i->src(0).mod.neg() && !i->src(1).mod.neg()); assert(!i->src(0).mod.abs() && !i->src(1).mod.abs()); - if (i->src(1).getFile() == FILE_IMMEDIATE) { + if (isLIMM(i->src(1), TYPE_S32)) { emitForm_L(i, 0x280, 2, Modifier(0)); if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) @@ -701,7 +725,7 @@ CodeEmitterGK110::emitUADD(const Instruction *i) if (addOp & 2) code[1] |= 1 << 27; - assert(!i->defExists(1)); + assert(i->flagsDef < 0); assert(i->flagsSrc < 0); SAT_(39); @@ -712,7 +736,7 @@ CodeEmitterGK110::emitUADD(const Instruction *i) code[1] |= addOp << 19; - if (i->defExists(1)) + if (i->flagsDef >= 0) code[1] |= 1 << 18; // write carry if (i->flagsSrc >= 0) code[1] |= 1 << 14; // add carry @@ -721,12 +745,11 @@ CodeEmitterGK110::emitUADD(const Instruction *i) } } -// TODO: shl-add void CodeEmitterGK110::emitIMAD(const Instruction *i) { uint8_t addOp = - (i->src(2).mod.neg() << 1) | (i->src(0).mod.neg() ^ i->src(1).mod.neg()); + i->src(2).mod.neg() | ((i->src(0).mod.neg() ^ i->src(1).mod.neg()) << 1); emitForm_21(i, 0x100, 0xa00); @@ -756,6 +779,54 @@ CodeEmitterGK110::emitISAD(const Instruction *i) code[1] |= 1 << 19; } +void +CodeEmitterGK110::emitSHLADD(const Instruction *i) +{ + uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(2).mod.neg(); + const ImmediateValue *imm = i->src(1).get()->asImm(); + assert(imm); + + if (i->src(2).getFile() == FILE_IMMEDIATE) { + code[0] = 0x1; + code[1] = 0xc0c << 20; + } else { + code[0] = 0x2; + code[1] = 0x20c << 20; + } + code[1] |= addOp << 19; + + emitPredicate(i); + + defId(i->def(0), 2); + srcId(i->src(0), 10); + + if (i->flagsDef >= 0) + code[1] |= 1 << 18; + + assert(!(imm->reg.data.u32 & 0xffffffe0)); + code[1] |= imm->reg.data.u32 << 10; + + switch (i->src(2).getFile()) { + case FILE_GPR: + assert(code[0] & 0x2); + code[1] |= 0xc << 28; + srcId(i->src(2), 23); + break; + case FILE_MEMORY_CONST: + assert(code[0] & 0x2); + code[1] |= 0x4 << 28; + setCAddress14(i->src(2)); + break; + case FILE_IMMEDIATE: + assert(code[0] & 0x1); + setShortImmediate(i, 2); + break; + default: + assert(!"bad src2 file"); + break; + } +} + void CodeEmitterGK110::emitNOT(const Instruction *i) { @@ -773,7 +844,7 @@ CodeEmitterGK110::emitNOT(const Instruction *i) break; case FILE_MEMORY_CONST: code[1] |= 0x4 << 28; - setCAddress14(i->src(1)); + setCAddress14(i->src(0)); break; default: assert(0); @@ -887,6 +958,24 @@ CodeEmitterGK110::emitShift(const Instruction *i) code[1] |= 1 << 10; } +void +CodeEmitterGK110::emitShift64(const Instruction *i) +{ + if (i->op == OP_SHR) { + emitForm_21(i, 0x27c, 0xc7c); + if (isSignedType(i->sType)) + code[1] |= 0x100; + if (i->subOp & NV50_IR_SUBOP_SHIFT_HIGH) + code[1] |= 1 << 19; + } else { + emitForm_21(i, 0xdfc, 0xf7c); + } + code[1] |= 0x200; + + if (i->subOp & NV50_IR_SUBOP_SHIFT_WRAP) + code[1] |= 1 << 21; +} + void CodeEmitterGK110::emitPreOp(const Instruction *i) { @@ -945,6 +1034,9 @@ CodeEmitterGK110::emitMINMAX(const Instruction *i) if (i->dType == TYPE_S32) code[1] |= 1 << 19; code[1] |= (i->op == OP_MIN) ? 0x1c00 : 0x3c00; // [!]pt + code[1] |= i->subOp << 14; + if (i->flagsDef >= 0) + code[1] |= i->subOp << 18; FTZ_(2f); ABS_(31, 0); @@ -1091,6 +1183,8 @@ CodeEmitterGK110::emitSET(const CmpInstruction *i) } else { code[1] |= 0x7 << 10; } + if (i->flagsSrc >= 0) + code[1] |= 1 << 14; emitCondCode(i->setCond, isFloatType(i->sType) ? 0x33 : 0x34, isFloatType(i->sType) ? 0xf : 0x7); @@ -1110,6 +1204,8 @@ CodeEmitterGK110::emitSLCT(const CmpInstruction *i) } else { emitForm_21(i, 0x1a0, 0xb20); emitCondCode(cc, 0x34, 0x7); + if (i->dType == TYPE_S32) + code[1] |= 1 << 19; } } @@ -1321,15 +1417,12 @@ void CodeEmitterGK110::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask) { code[0] = 0x00000002 | ((qOp & 1) << 31); - code[1] = 0x7fc00000 | (qOp >> 1) | (laneMask << 12); + code[1] = 0x7fc00200 | (qOp >> 1) | (laneMask << 12); // dall defId(i->def(0), 2); srcId(i->src(0), 10); srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 23); - if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT) - code[1] |= 1 << 9; // dall - emitPredicate(i); } @@ -1478,22 +1571,104 @@ CodeEmitterGK110::emitFlow(const Instruction *i) } } +void +CodeEmitterGK110::emitSHFL(const Instruction *i) +{ + const ImmediateValue *imm; + + code[0] = 0x00000002; + code[1] = 0x78800000 | (i->subOp << 1); + + emitPredicate(i); + + defId(i->def(0), 2); + srcId(i->src(0), 10); + + switch (i->src(1).getFile()) { + case FILE_GPR: + srcId(i->src(1), 23); + break; + case FILE_IMMEDIATE: + imm = i->getSrc(1)->asImm(); + assert(imm && imm->reg.data.u32 < 0x20); + code[0] |= imm->reg.data.u32 << 23; + code[0] |= 1 << 31; + break; + default: + assert(!"invalid src1 file"); + break; + } + + switch (i->src(2).getFile()) { + case FILE_GPR: + srcId(i->src(2), 42); + break; + case FILE_IMMEDIATE: + imm = i->getSrc(2)->asImm(); + assert(imm && imm->reg.data.u32 < 0x2000); + code[1] |= imm->reg.data.u32 << 5; + code[1] |= 1; + break; + default: + assert(!"invalid src2 file"); + break; + } + + if (!i->defExists(1)) + code[1] |= 7 << 19; + else { + assert(i->def(1).getFile() == FILE_PREDICATE); + defId(i->def(1), 51); + } +} + void CodeEmitterGK110::emitVOTE(const Instruction *i) { - assert(i->src(0).getFile() == FILE_PREDICATE && - i->def(1).getFile() == FILE_PREDICATE); + const ImmediateValue *imm; + uint32_t u32; code[0] = 0x00000002; code[1] = 0x86c00000 | (i->subOp << 19); emitPredicate(i); - defId(i->def(0), 2); - defId(i->def(1), 48); - if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) - code[1] |= 1 << 13; - srcId(i->src(0), 42); + unsigned rp = 0; + for (int d = 0; i->defExists(d); d++) { + if (i->def(d).getFile() == FILE_PREDICATE) { + assert(!(rp & 2)); + rp |= 2; + defId(i->def(d), 48); + } else if (i->def(d).getFile() == FILE_GPR) { + assert(!(rp & 1)); + rp |= 1; + defId(i->def(d), 2); + } else { + assert(!"Unhandled def"); + } + } + if (!(rp & 1)) + code[0] |= 255 << 2; + if (!(rp & 2)) + code[1] |= 7 << 16; + + switch (i->src(0).getFile()) { + case FILE_PREDICATE: + if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) + code[0] |= 1 << 13; + srcId(i->src(0), 42); + break; + case FILE_IMMEDIATE: + imm = i->getSrc(0)->asImm(); + assert(imm); + u32 = imm->reg.data.u32; + assert(u32 == 0 || u32 == 1); + code[1] |= (u32 == 1 ? 0x7 : 0xf) << 10; + break; + default: + assert(!"Unhandled src"); + break; + } } void @@ -2122,6 +2297,7 @@ CodeEmitterGK110::getSRegEncoding(const ValueRef& ref) case SV_INVOCATION_ID: return 0x11; case SV_YDIR: return 0x12; case SV_THREAD_KILL: return 0x13; + case SV_COMBINED_TID: return 0x20; case SV_TID: return 0x21 + SDATA(ref).sv.index; case SV_CTAID: return 0x25 + SDATA(ref).sv.index; case SV_NTID: return 0x29 + SDATA(ref).sv.index; @@ -2129,6 +2305,11 @@ CodeEmitterGK110::getSRegEncoding(const ValueRef& ref) case SV_NCTAID: return 0x2d + SDATA(ref).sv.index; case SV_LBASE: return 0x34; case SV_SBASE: return 0x30; + case SV_LANEMASK_EQ: return 0x38; + case SV_LANEMASK_LT: return 0x39; + case SV_LANEMASK_LE: return 0x3a; + case SV_LANEMASK_GT: return 0x3b; + case SV_LANEMASK_GE: return 0x3c; case SV_CLOCK: return 0x50 + SDATA(ref).sv.index; default: assert(!"no sreg for system value"); @@ -2139,6 +2320,34 @@ CodeEmitterGK110::getSRegEncoding(const ValueRef& ref) void CodeEmitterGK110::emitMOV(const Instruction *i) { + if (i->def(0).getFile() == FILE_PREDICATE) { + if (i->src(0).getFile() == FILE_GPR) { + // Use ISETP.NE.AND dst, PT, src, RZ, PT + code[0] = 0x00000002; + code[1] = 0xdb500000; + + code[0] |= 0x7 << 2; + code[0] |= 0xff << 23; + code[1] |= 0x7 << 10; + srcId(i->src(0), 10); + } else + if (i->src(0).getFile() == FILE_PREDICATE) { + // Use PSETP.AND.AND dst, PT, src, PT, PT + code[0] = 0x00000002; + code[1] = 0x84800000; + + code[0] |= 0x7 << 2; + code[1] |= 0x7 << 0; + code[1] |= 0x7 << 10; + + srcId(i->src(0), 14); + } else { + assert(!"Unexpected source for predicate destination"); + emitNOP(i); + } + emitPredicate(i); + defId(i->def(0), 5); + } else if (i->src(0).getFile() == FILE_SYSTEM_VALUE) { code[0] = 0x00000002 | (getSRegEncoding(i->src(0)) << 23); code[1] = 0x86400000; @@ -2363,6 +2572,9 @@ CodeEmitterGK110::emitInstruction(Instruction *insn) case OP_SAD: emitISAD(insn); break; + case OP_SHLADD: + emitSHLADD(insn); + break; case OP_NOT: emitNOT(insn); break; @@ -2377,7 +2589,10 @@ CodeEmitterGK110::emitInstruction(Instruction *insn) break; case OP_SHL: case OP_SHR: - emitShift(insn); + if (typeSizeof(insn->sType) == 8) + emitShift64(insn); + else + emitShift(insn); break; case OP_SET: case OP_SET_AND: @@ -2506,6 +2721,9 @@ CodeEmitterGK110::emitInstruction(Instruction *insn) case OP_CCTL: emitCCTL(insn); break; + case OP_SHFL: + emitSHFL(insn); + break; case OP_VOTE: emitVOTE(insn); break;