SM70 has this instruction, but no BFE.
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Reviewed-by: Karol Herbst <kherbst@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5377>
OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7]
OP_EXTBF, // place bits [K,K+N) of src0 into dst, src1 = 0xNNKK
OP_BFIND, // find highest/lowest set bit
+ OP_BREV, // bitfield reverse
OP_PERMT, // dst = bytes from src2,src0 selected by src1 (nvc0's src order)
OP_ATOM,
OP_BAR, // execution barrier, sources = { id, thread count, predicate }
if (op == nir_intrinsic_read_first_invocation) {
mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
- mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
+ mkOp1(OP_BREV, TYPE_U32, tmp, tmp);
mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
} else
tmp = getSrc(&insn->src[1], 0);
case nir_op_bitfield_reverse: {
DEFAULT_CHECKS;
LValues &newDefs = convert(&insn->dest);
- mkOp2(OP_EXTBF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
+ mkOp1(OP_BREV, TYPE_U32, newDefs[0], getSrc(&insn->src[0]));
break;
}
case nir_op_find_lsb: {
DEFAULT_CHECKS;
LValues &newDefs = convert(&insn->dest);
Value *tmp = getSSA();
- mkOp2(OP_EXTBF, TYPE_U32, tmp, getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
+ mkOp1(OP_BREV, TYPE_U32, tmp, getSrc(&insn->src[0]));
mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
break;
}
// ReadInvocationARB(src, findLSB(ballot(true)))
val0 = getScratch();
mkOp1(OP_VOTE, TYPE_U32, val0, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
- mkOp2(OP_EXTBF, TYPE_U32, val0, val0, mkImm(0x2000))
- ->subOp = NV50_IR_SUBOP_EXTBF_REV;
+ mkOp1(OP_BREV, TYPE_U32, val0, val0);
mkOp1(OP_BFIND, TYPE_U32, val0, val0)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
src1 = val0;
/* fallthrough */
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
val0 = getScratch();
- geni = mkOp2(OP_EXTBF, TYPE_U32, val0, src0, mkImm(0x2000));
- geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
+ mkOp1(OP_BREV, TYPE_U32, val0, src0);
geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], val0);
geni->subOp = NV50_IR_SUBOP_BFIND_SAMT;
}
case TGSI_OPCODE_BREV:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
- geni = mkOp2(OP_EXTBF, TYPE_U32, dst0[c], src0, mkImm(0x2000));
- geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
+ mkOp1(OP_BREV, TYPE_U32, dst0[c], src0);
}
break;
case TGSI_OPCODE_POPC:
cmp->sType = hTy;
}
+void
+NVC0LegalizeSSA::handleBREV(Instruction *i)
+{
+ i->op = OP_EXTBF;
+ i->subOp = NV50_IR_SUBOP_EXTBF_REV;
+ i->setSrc(1, bld.mkImm(0x2000));
+}
+
bool
NVC0LegalizeSSA::visit(Function *fn)
{
if (typeSizeof(i->sType) == 8 && i->sType != TYPE_F64)
handleSET(i->asCmp());
break;
+ case OP_BREV:
+ handleBREV(i);
+ break;
default:
break;
}
void handleSET(CmpInstruction *);
void handleTEXLOD(TexInstruction *);
void handleShift(Instruction *);
+ void handleBREV(Instruction *);
protected:
BuildUtil bld;
i->subOp = 0;
break;
}
+ case OP_BREV: {
+ uint32_t res = util_bitreverse(imm0.reg.data.u32);
+ i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res));
+ i->op = OP_MOV;
+ break;
+ }
case OP_POPCNT: {
// Only deal with 1-arg POPCNT here
if (i->srcExists(1))
"insbf",
"extbf",
"bfind",
+ "brev",
"permt",
"atom",
"bar",
0, // TEXBAR
1, 1, // DFDX, DFDY
1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
- 2, 3, 2, 1, 3, // POPCNT, INSBF, EXTBF, BFIND, PERMT
+ 2, 3, 2, 1, 1, 3, // POPCNT, INSBF, EXTBF, BFIND, BREV, PERMT
2, 2, // ATOM, BAR
2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
2, 2, 2, 1, // VSHR, VSHL, VSEL, CCTL
// DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
- // POPCNT, INSBF, EXTBF, BFIND; PERMT
+ // POPCNT, INSBF, EXTBF, BFIND, BREV; PERMT
OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
- OPCLASS_BITFIELD,
+ OPCLASS_BITFIELD, OPCLASS_BITFIELD,
// ATOM, BAR
OPCLASS_ATOMIC, OPCLASS_CONTROL,
// VADD, VAVG, VMIN, VMAX