OP_POPCNT, // bitcount(src0 & src1)
OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7]
OP_EXTBF, // place bits [K,K+N) of src0 into dst, src1 = 0xNNKK
+ OP_BFIND, // find highest/lowest set bit
OP_PERMT, // dst = bytes from src2,src0 selected by src1 (nvc0's src order)
OP_ATOM,
OP_BAR, // execution barrier, sources = { id, thread count, predicate }
#define NV50_IR_SUBOP_TEXBAR(n) n
#define NV50_IR_SUBOP_MOV_FINAL 1
#define NV50_IR_SUBOP_EXTBF_REV 1
+#define NV50_IR_SUBOP_BFIND_SAMT 1
#define NV50_IR_SUBOP_PERMT_F4E 1
#define NV50_IR_SUBOP_PERMT_B4E 2
#define NV50_IR_SUBOP_PERMT_RC8 3
void emitLogicOp(const Instruction *, uint8_t subOp);
void emitPOPC(const Instruction *);
void emitINSBF(const Instruction *);
+ void emitEXTBF(const Instruction *);
+ void emitBFIND(const Instruction *);
void emitShift(const Instruction *);
void emitSFnOp(const Instruction *, uint8_t subOp);
emitForm_21(i, 0x1f8, 0xb78);
}
+void
+CodeEmitterGK110::emitEXTBF(const Instruction *i)
+{
+ emitForm_21(i, 0x600, 0xc00);
+
+ if (i->dType == TYPE_S32)
+ code[1] |= 0x80000;
+ if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)
+ code[1] |= 0x800;
+}
+
+void
+CodeEmitterGK110::emitBFIND(const Instruction *i)
+{
+ emitForm_21(i, 0x618, 0xc18);
+
+ if (i->dType == TYPE_S32)
+ code[1] |= 0x80000;
+ if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
+ code[1] |= 0x800;
+ if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT)
+ code[1] |= 0x1000;
+}
+
void
CodeEmitterGK110::emitShift(const Instruction *i)
{
case OP_POPCNT:
emitPOPC(insn);
break;
+ case OP_INSBF:
+ emitINSBF(insn);
+ break;
+ case OP_EXTBF:
+ emitEXTBF(insn);
+ break;
+ case OP_BFIND:
+ emitBFIND(insn);
+ break;
case OP_JOIN:
emitNOP(insn);
insn->join = 1;
void emitPOPC(const Instruction *);
void emitINSBF(const Instruction *);
void emitEXTBF(const Instruction *);
+ void emitBFIND(const Instruction *);
void emitPERMT(const Instruction *);
void emitShift(const Instruction *);
code[0] |= 1 << 8;
}
+void
+CodeEmitterNVC0::emitBFIND(const Instruction *i)
+{
+ emitForm_B(i, HEX64(78000000, 00000003));
+
+ if (i->dType == TYPE_S32)
+ code[0] |= 1 << 5;
+ if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
+ code[0] |= 1 << 8;
+ if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT)
+ code[0] |= 1 << 6;
+}
+
void
CodeEmitterNVC0::emitPERMT(const Instruction *i)
{
case OP_EXTBF:
emitEXTBF(insn);
break;
+ case OP_BFIND:
+ emitBFIND(insn);
+ break;
case OP_PERMT:
emitPERMT(insn);
break;
case TGSI_OPCODE_ATOMXOR:
case TGSI_OPCODE_ATOMUMIN:
case TGSI_OPCODE_ATOMUMAX:
+ case TGSI_OPCODE_UBFE:
+ case TGSI_OPCODE_UMSB:
return nv50_ir::TYPE_U32;
case TGSI_OPCODE_I2F:
case TGSI_OPCODE_IDIV:
case TGSI_OPCODE_UARL:
case TGSI_OPCODE_ATOMIMIN:
case TGSI_OPCODE_ATOMIMAX:
+ case TGSI_OPCODE_IBFE:
+ case TGSI_OPCODE_IMSB:
return nv50_ir::TYPE_S32;
default:
return nv50_ir::TYPE_F32;
NV50_IR_OPCODE_CASE(TXB2, TXB);
NV50_IR_OPCODE_CASE(TXL2, TXL);
+ NV50_IR_OPCODE_CASE(IBFE, EXTBF);
+ NV50_IR_OPCODE_CASE(UBFE, EXTBF);
+ NV50_IR_OPCODE_CASE(BFI, INSBF);
+ NV50_IR_OPCODE_CASE(BREV, EXTBF);
+ NV50_IR_OPCODE_CASE(POPC, POPCNT);
+ NV50_IR_OPCODE_CASE(LSB, BFIND);
+ NV50_IR_OPCODE_CASE(IMSB, BFIND);
+ NV50_IR_OPCODE_CASE(UMSB, BFIND);
+
NV50_IR_OPCODE_CASE(END, EXIT);
default:
Instruction *geni;
Value *dst0[4], *rDst0[4];
- Value *src0, *src1, *src2;
+ Value *src0, *src1, *src2, *src3;
Value *val0, *val1;
int c;
case TGSI_OPCODE_ATOMIMAX:
handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode()));
break;
+ case TGSI_OPCODE_IBFE:
+ case TGSI_OPCODE_UBFE:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = fetchSrc(0, c);
+ src1 = fetchSrc(1, c);
+ src2 = fetchSrc(2, c);
+ mkOp3(OP_INSBF, TYPE_U32, src1, src2, mkImm(0x808), src1);
+ mkOp2(OP_EXTBF, dstTy, dst0[c], src0, src1);
+ }
+ break;
+ case TGSI_OPCODE_BFI:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = fetchSrc(0, c);
+ src1 = fetchSrc(1, c);
+ src2 = fetchSrc(2, c);
+ src3 = fetchSrc(3, c);
+ mkOp3(OP_INSBF, TYPE_U32, src2, src3, mkImm(0x808), src2);
+ mkOp3(OP_INSBF, TYPE_U32, dst0[c], src1, src2, src0);
+ }
+ break;
+ case TGSI_OPCODE_LSB:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = fetchSrc(0, c);
+ geni = mkOp2(OP_EXTBF, TYPE_U32, src0, src0, mkImm(0x2000));
+ geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
+ geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], src0);
+ geni->subOp = NV50_IR_SUBOP_BFIND_SAMT;
+ }
+ break;
+ case TGSI_OPCODE_IMSB:
+ case TGSI_OPCODE_UMSB:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = fetchSrc(0, c);
+ mkOp1(OP_BFIND, srcTy, dst0[c], src0);
+ }
+ break;
+ case TGSI_OPCODE_BREV:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = fetchSrc(0, c);
+ geni = mkOp2(OP_EXTBF, TYPE_U32, dst0[c], src0, mkImm(0x2000));
+ geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
+ }
+ break;
+ case TGSI_OPCODE_POPC:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = fetchSrc(0, c);
+ mkOp2(OP_POPCNT, TYPE_U32, dst0[c], src0, src0);
+ }
+ break;
default:
ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode());
assert(0);
"popcnt",
"insbf",
"extbf",
+ "bfind",
"permt",
"atom",
"bar",
0, // TEXBAR
1, 1, // DFDX, DFDY
1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
- 2, 3, 2, 3, // POPCNT, INSBF, EXTBF, PERMT
+ 2, 3, 2, 1, 3, // POPCNT, INSBF, EXTBF, BFIND, PERMT
2, 2, // ATOM, BAR
2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
2, 2, 2, 1, // VSHR, VSHL, VSEL, CCTL
// DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
- // POPCNT, INSBF, EXTBF, PERMT
+ // POPCNT, INSBF, EXTBF, BFIND; PERMT
OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
+ OPCLASS_BITFIELD,
// ATOM, BAR
OPCLASS_ATOMIC, OPCLASS_CONTROL,
// VADD, VAVG, VMIN, VMAX
{ OP_DFDX, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
{ OP_DFDY, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
{ OP_CALL, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0 },
- { OP_INSBF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4 },
+ { OP_POPCNT, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 },
+ { OP_INSBF, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 },
+ { OP_EXTBF, 0x0, 0x0, 0x0, 0x0, 0x2, 0x2 },
+ { OP_BFIND, 0x0, 0x0, 0x1, 0x0, 0x1, 0x1 },
{ OP_PERMT, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 },
{ OP_SET_AND, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
{ OP_SET_OR, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
case OP_AND:
case OP_OR:
case OP_XOR:
+ case OP_POPCNT:
+ case OP_BFIND:
break;
case OP_SET:
if (insn->sType != TYPE_F32)