From 60b28f7a5031324469a751cfbf9567204c4fc313 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sun, 7 Jun 2020 09:51:51 +1000 Subject: [PATCH] nvir: introduce OP_BREV with lowering to EXTBF_REV for current GPUs SM70 has this instruction, but no BFE. Signed-off-by: Ben Skeggs Reviewed-by: Karol Herbst Part-of: --- src/gallium/drivers/nouveau/codegen/nv50_ir.h | 1 + .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 6 +++--- .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 9 +++------ .../drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 11 +++++++++++ .../drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h | 1 + .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 6 ++++++ src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp | 1 + .../drivers/nouveau/codegen/nv50_ir_target.cpp | 6 +++--- 8 files changed, 29 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index 5b8595c961e..e4bbc8edafb 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -152,6 +152,7 @@ enum operation OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7] OP_EXTBF, // place bits [K,K+N) of src0 into dst, src1 = 0xNNKK OP_BFIND, // find highest/lowest set bit + OP_BREV, // bitfield reverse OP_PERMT, // dst = bytes from src2,src0 selected by src1 (nvc0's src order) OP_ATOM, OP_BAR, // execution barrier, sources = { id, thread count, predicate } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 50c8345e4b6..42731f32f92 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -1910,7 +1910,7 @@ Converter::visit(nir_intrinsic_instr *insn) if (op == nir_intrinsic_read_first_invocation) { mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY; - mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV; + mkOp1(OP_BREV, TYPE_U32, tmp, tmp); mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT; } else tmp = getSrc(&insn->src[1], 0); @@ -2794,14 +2794,14 @@ Converter::visit(nir_alu_instr *insn) case nir_op_bitfield_reverse: { DEFAULT_CHECKS; LValues &newDefs = convert(&insn->dest); - mkOp2(OP_EXTBF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV; + mkOp1(OP_BREV, TYPE_U32, newDefs[0], getSrc(&insn->src[0])); break; } case nir_op_find_lsb: { DEFAULT_CHECKS; LValues &newDefs = convert(&insn->dest); Value *tmp = getSSA(); - mkOp2(OP_EXTBF, TYPE_U32, tmp, getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV; + mkOp1(OP_BREV, TYPE_U32, tmp, getSrc(&insn->src[0])); mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT; break; } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 60f3d582a0b..3fd76f64de0 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -3401,8 +3401,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) // ReadInvocationARB(src, findLSB(ballot(true))) val0 = getScratch(); mkOp1(OP_VOTE, TYPE_U32, val0, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY; - mkOp2(OP_EXTBF, TYPE_U32, val0, val0, mkImm(0x2000)) - ->subOp = NV50_IR_SUBOP_EXTBF_REV; + mkOp1(OP_BREV, TYPE_U32, val0, val0); mkOp1(OP_BFIND, TYPE_U32, val0, val0)->subOp = NV50_IR_SUBOP_BFIND_SAMT; src1 = val0; /* fallthrough */ @@ -3820,8 +3819,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { src0 = fetchSrc(0, c); val0 = getScratch(); - geni = mkOp2(OP_EXTBF, TYPE_U32, val0, src0, mkImm(0x2000)); - geni->subOp = NV50_IR_SUBOP_EXTBF_REV; + mkOp1(OP_BREV, TYPE_U32, val0, src0); geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], val0); geni->subOp = NV50_IR_SUBOP_BFIND_SAMT; } @@ -3836,8 +3834,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) case TGSI_OPCODE_BREV: FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { src0 = fetchSrc(0, c); - geni = mkOp2(OP_EXTBF, TYPE_U32, dst0[c], src0, mkImm(0x2000)); - geni->subOp = NV50_IR_SUBOP_EXTBF_REV; + mkOp1(OP_BREV, TYPE_U32, dst0[c], src0); } break; case TGSI_OPCODE_POPC: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index a60881000fe..ccdc2f98ef6 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -310,6 +310,14 @@ NVC0LegalizeSSA::handleSET(CmpInstruction *cmp) cmp->sType = hTy; } +void +NVC0LegalizeSSA::handleBREV(Instruction *i) +{ + i->op = OP_EXTBF; + i->subOp = NV50_IR_SUBOP_EXTBF_REV; + i->setSrc(1, bld.mkImm(0x2000)); +} + bool NVC0LegalizeSSA::visit(Function *fn) { @@ -354,6 +362,9 @@ NVC0LegalizeSSA::visit(BasicBlock *bb) if (typeSizeof(i->sType) == 8 && i->sType != TYPE_F64) handleSET(i->asCmp()); break; + case OP_BREV: + handleBREV(i); + break; default: break; } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index b4c405a9ea5..a4925013ee4 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -68,6 +68,7 @@ private: void handleSET(CmpInstruction *); void handleTEXLOD(TexInstruction *); void handleShift(Instruction *); + void handleBREV(Instruction *); protected: BuildUtil bld; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 5c3d15968cf..d17e605a51e 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -1534,6 +1534,12 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) i->subOp = 0; break; } + case OP_BREV: { + uint32_t res = util_bitreverse(imm0.reg.data.u32); + i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res)); + i->op = OP_MOV; + break; + } case OP_POPCNT: { // Only deal with 1-arg POPCNT here if (i->srcExists(1)) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp index b85ace15b87..e3bac648761 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp @@ -178,6 +178,7 @@ const char *operationStr[OP_LAST + 1] = "insbf", "extbf", "bfind", + "brev", "permt", "atom", "bar", diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp index e801722ec09..77edf41e124 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp @@ -51,7 +51,7 @@ const uint8_t Target::operationSrcNr[] = 0, // TEXBAR 1, 1, // DFDX, DFDY 1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP - 2, 3, 2, 1, 3, // POPCNT, INSBF, EXTBF, BFIND, PERMT + 2, 3, 2, 1, 1, 3, // POPCNT, INSBF, EXTBF, BFIND, BREV, PERMT 2, 2, // ATOM, BAR 2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET, 2, 2, 2, 1, // VSHR, VSHL, VSEL, CCTL @@ -120,9 +120,9 @@ const OpClass Target::operationClass[] = // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL, - // POPCNT, INSBF, EXTBF, BFIND; PERMT + // POPCNT, INSBF, EXTBF, BFIND, BREV; PERMT OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, - OPCLASS_BITFIELD, + OPCLASS_BITFIELD, OPCLASS_BITFIELD, // ATOM, BAR OPCLASS_ATOMIC, OPCLASS_CONTROL, // VADD, VAVG, VMIN, VMAX -- 2.30.2