From 78103abe8784e788c7d04b807bc47527b504121e Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sun, 7 Jun 2020 09:52:35 +1000 Subject: [PATCH] nvir/gv100: initial support v2: - add TargetGV100::isBarrierRequired() for OP_BREV - use NV50_IR_SUBOP_LOP3_LUT() convenience macro where it makes sense - separated out nir_lower_idiv into its own commit - make use of the shared function to generate compiler options - disable lower_fpow, nir's lowering is broken v3: - use replaceCvt() instead of custom NEG/ABS/SAT lowering v4: - remove WAR from peephole, not needed now we're using replaceCvt() Signed-off-by: Ben Skeggs Acked-by: Karol Herbst Part-of: --- .../drivers/nouveau/codegen/nv50_ir_driver.h | 1 + .../nouveau/codegen/nv50_ir_emit_gv100.cpp | 2011 +++++++++++++++++ .../nouveau/codegen/nv50_ir_emit_gv100.h | 403 ++++ .../nouveau/codegen/nv50_ir_from_nir.cpp | 45 +- .../nouveau/codegen/nv50_ir_lowering_gm107.h | 1 + .../codegen/nv50_ir_lowering_gv100.cpp | 477 ++++ .../nouveau/codegen/nv50_ir_lowering_gv100.h | 79 + .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 2 + .../drivers/nouveau/codegen/nv50_ir_ra.cpp | 26 +- .../nouveau/codegen/nv50_ir_target.cpp | 3 + .../nouveau/codegen/nv50_ir_target_gv100.cpp | 594 +++++ .../nouveau/codegen/nv50_ir_target_gv100.h | 52 + .../nouveau/codegen/nv50_ir_target_nvc0.cpp | 2 +- src/gallium/drivers/nouveau/meson.build | 6 + 14 files changed, 3683 insertions(+), 19 deletions(-) create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.h create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.cpp create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.h create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.cpp create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.h diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index 3e3da9ec919..63ea7f5e7e8 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -77,6 +77,7 @@ struct nv50_ir_prog_symbol #define NVISA_GK20A_CHIPSET 0xea #define NVISA_GM107_CHIPSET 0x110 #define NVISA_GM200_CHIPSET 0x120 +#define NVISA_GV100_CHIPSET 0x140 struct nv50_ir_prog_info { diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp new file mode 100644 index 00000000000..0fbd47ccf88 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp @@ -0,0 +1,2011 @@ +/* + * Copyright 2020 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "codegen/nv50_ir_emit_gv100.h" +#include "codegen/nv50_ir_sched_gm107.h" + +namespace nv50_ir { + +/******************************************************************************* + * instruction format helpers + ******************************************************************************/ + +#define FA_NODEF (1 << 0) +#define FA_RRR (1 << 1) +#define FA_RRI (1 << 2) +#define FA_RRC (1 << 3) +#define FA_RIR (1 << 4) +#define FA_RCR (1 << 5) + +#define FA_SRC_MASK 0x0ff +#define FA_SRC_NEG 0x100 +#define FA_SRC_ABS 0x200 + +#define EMPTY -1 +#define __(a) (a) // no source modifiers +#define _A(a) ((a) | FA_SRC_ABS) +#define N_(a) ((a) | FA_SRC_NEG) +#define NA(a) ((a) | FA_SRC_NEG | FA_SRC_ABS) + +void +CodeEmitterGV100::emitFormA_I32(int src) +{ + emitIMMD(32, 32, insn->src(src)); + if (insn->src(src).mod.abs()) + code[1] &= 0x7fffffff; + if (insn->src(src).mod.neg()) + code[1] ^= 0x80000000; +} + +void +CodeEmitterGV100::emitFormA_RRC(uint16_t op, int src1, int src2) +{ + emitInsn(op); + if (src1 >= 0) { + emitNEG (75, (src1 & FA_SRC_MASK), (src1 & FA_SRC_NEG)); + emitABS (74, (src1 & FA_SRC_MASK), (src1 & FA_SRC_ABS)); + emitGPR (64, insn->src(src1 & FA_SRC_MASK)); + } + if (src2 >= 0) { + emitNEG (63, (src2 & FA_SRC_MASK), (src2 & FA_SRC_NEG)); + emitABS (62, (src2 & FA_SRC_MASK), (src2 & FA_SRC_ABS)); + emitCBUF(54, -1, 38, 0, 2, insn->src(src2 & FA_SRC_MASK)); + } +} + +void +CodeEmitterGV100::emitFormA_RRI(uint16_t op, int src1, int src2) +{ + emitInsn(op); + if (src1 >= 0) { + emitNEG (75, (src1 & FA_SRC_MASK), (src1 & FA_SRC_NEG)); + emitABS (74, (src1 & FA_SRC_MASK), (src1 & FA_SRC_ABS)); + emitGPR (64, insn->src(src1 & FA_SRC_MASK)); + } + if (src2 >= 0) + emitFormA_I32(src2 & FA_SRC_MASK); +} + +void +CodeEmitterGV100::emitFormA_RRR(uint16_t op, int src1, int src2) +{ + emitInsn(op); + if (src2 >= 0) { + emitNEG (75, (src2 & FA_SRC_MASK), (src2 & FA_SRC_NEG)); + emitABS (74, (src2 & FA_SRC_MASK), (src2 & FA_SRC_ABS)); + emitGPR (64, insn->src(src2 & FA_SRC_MASK)); + } + + if (src1 >= 0) { + emitNEG (63, (src1 & FA_SRC_MASK), (src1 & FA_SRC_NEG)); + emitABS (62, (src1 & FA_SRC_MASK), (src1 & FA_SRC_ABS)); + emitGPR (32, insn->src(src1 & FA_SRC_MASK)); + } +} + +void +CodeEmitterGV100::emitFormA(uint16_t op, uint8_t forms, + int src0, int src1, int src2) +{ + switch ((src1 < 0) ? FILE_GPR : insn->src(src1 & FA_SRC_MASK).getFile()) { + case FILE_GPR: + switch ((src2 < 0) ? FILE_GPR : insn->src(src2 & FA_SRC_MASK).getFile()) { + case FILE_GPR: + assert(forms & FA_RRR); + emitFormA_RRR((1 << 9) | op, src1, src2); + break; + case FILE_IMMEDIATE: + assert(forms & FA_RRI); + emitFormA_RRI((2 << 9) | op, src1, src2); + break; + case FILE_MEMORY_CONST: + assert(forms & FA_RRC); + emitFormA_RRC((3 << 9) | op, src1, src2); + break; + default: + assert(!"bad src2 file"); + break; + } + break; + case FILE_IMMEDIATE: + assert((src2 < 0) || insn->src(src2 & FA_SRC_MASK).getFile() == FILE_GPR); + assert(forms & FA_RIR); + emitFormA_RRI((4 << 9) | op, src2, src1); + break; + case FILE_MEMORY_CONST: + assert((src2 < 0) || insn->src(src2 & FA_SRC_MASK).getFile() == FILE_GPR); + assert(forms & FA_RCR); + emitFormA_RRC((5 << 9) | op, src2, src1); + break; + default: + assert(!"bad src1 file"); + break; + } + + if (src0 >= 0) { + assert(insn->src(src0 & FA_SRC_MASK).getFile() == FILE_GPR); + emitABS(73, (src0 & FA_SRC_MASK), (src0 & FA_SRC_ABS)); + emitNEG(72, (src0 & FA_SRC_MASK), (src0 & FA_SRC_NEG)); + emitGPR(24, insn->src(src0 & FA_SRC_MASK)); + } + + if (!(forms & FA_NODEF)) + emitGPR(16, insn->def(0)); +} + +/******************************************************************************* + * control + ******************************************************************************/ + +void +CodeEmitterGV100::emitBRA() +{ + const FlowInstruction *insn = this->insn->asFlow(); + int64_t target = ((int64_t)insn->target.bb->binPos - (codeSize + 0x10)) / 4; + + assert(!insn->indirect && !insn->absolute); + + emitInsn (0x947); + emitField(34, 48, target); + emitPRED (87); + emitField(86, 2, 0); // ./.INC/.DEC +} + +void +CodeEmitterGV100::emitEXIT() +{ + emitInsn (0x94d); + emitNOT (90); + emitPRED (87); + emitField(85, 1, 0); // .NO_ATEXIT + emitField(84, 2, 0); // ./.KEEPREFCOUNT/.PREEMPTED/.INVALID3 +} + +void +CodeEmitterGV100::emitKILL() +{ + emitInsn(0x95b); + emitPRED(87); +} + +void +CodeEmitterGV100::emitNOP() +{ + emitInsn(0x918); +} + +void +CodeEmitterGV100::emitWARPSYNC() +{ + emitFormA(0x148, FA_NODEF | FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY); + emitNOT (90); + emitPRED (87); +} + +/******************************************************************************* + * movement / conversion + ******************************************************************************/ + +void +CodeEmitterGV100::emitCS2R() +{ + emitInsn(0x805); + emitSYS (72, insn->src(0)); + emitGPR (16, insn->def(0)); +} + +void +CodeEmitterGV100::emitF2F() +{ + if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8) + emitFormA(0x104, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY); + else + emitFormA(0x110, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY); + emitField(84, 2, util_logbase2(typeSizeof(insn->sType))); + emitFMZ (80, 1); + emitRND (78); + emitField(75, 2, util_logbase2(typeSizeof(insn->dType))); + emitField(60, 2, insn->subOp); // ./.H1/.INVALID2/.INVALID3 +} + +void +CodeEmitterGV100::emitF2I() +{ + if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8) + emitFormA(0x105, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY); + else + emitFormA(0x111, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY); + emitField(84, 2, util_logbase2(typeSizeof(insn->sType))); + emitFMZ (80, 1); + emitRND (78); + emitField(77, 1, 0); // .NTZ + emitField(75, 2, util_logbase2(typeSizeof(insn->dType))); + emitField(72, 1, isSignedType(insn->dType)); +} + +void +CodeEmitterGV100::emitFRND() +{ + int subop = 0; + + switch (insn->op) { + case OP_CVT: + switch (insn->rnd) { + case ROUND_NI: subop = 0; break; + case ROUND_MI: subop = 1; break; + case ROUND_PI: subop = 2; break; + case ROUND_ZI: subop = 3; break; + default: + assert(!"invalid FRND mode"); + break; + } + break; + case OP_FLOOR: subop = 1; break; + case OP_CEIL : subop = 2; break; + case OP_TRUNC: subop = 3; break; + default: + assert(!"invalid FRND opcode"); + break; + } + + if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8) + emitFormA(0x107, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY); + else + emitFormA(0x113, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY); + emitField(84, 2, util_logbase2(typeSizeof(insn->sType))); + emitFMZ (80, 1); + emitField(78, 2, subop); + emitField(75, 2, util_logbase2(typeSizeof(insn->dType))); +} + +void +CodeEmitterGV100::emitI2F() +{ + if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8) + emitFormA(0x106, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY); + else + emitFormA(0x112, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY); + emitField(84, 2, util_logbase2(typeSizeof(insn->sType))); + emitRND (78); + emitField(75, 2, util_logbase2(typeSizeof(insn->dType))); + emitField(74, 1, isSignedType(insn->sType)); + if (typeSizeof(insn->sType) == 2) + emitField(60, 2, insn->subOp >> 1); + else + emitField(60, 2, insn->subOp); // ./.B1/.B2/.B3 +} + +void +CodeEmitterGV100::emitMOV() +{ + switch (insn->def(0).getFile()) { + case FILE_GPR: + switch (insn->src(0).getFile()) { + case FILE_GPR: + case FILE_MEMORY_CONST: + case FILE_IMMEDIATE: + emitFormA(0x002, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY); + emitField(72, 4, insn->lanes); + break; + case FILE_PREDICATE: + emitInsn (0x807); + emitGPR (16, insn->def(0)); + emitGPR (24); + emitField(32, 32, 0xffffffff); + emitField(90, 1, 1); + emitPRED (87, insn->src(0)); + break; + default: + assert(!"bad src file"); + break; + } + break; + case FILE_PREDICATE: + emitInsn (0x20c); + emitPRED (87); + emitPRED (84); + emitNOT (71); + emitPRED (68); + emitPRED (81, insn->def(0)); + emitCond3(76, CC_NE); + emitGPR (24, insn->src(0)); + emitGPR (32); + break; + default: + assert(!"bad dst file"); + break; + } +} + +void +CodeEmitterGV100::emitPRMT() +{ + emitFormA(0x016, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), __(2)); + emitField(72, 3, insn->subOp); +} + +void +CodeEmitterGV100::emitS2R() +{ + emitInsn(0x919); + emitSYS (72, insn->src(0)); + emitGPR (16, insn->def(0)); +} + +static void +selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data) +{ + int loc = entry->loc; + if (data.force_persample_interp) + code[loc + 2] |= 1 << 26; + else + code[loc + 2] &= ~(1 << 26); +} + +void +CodeEmitterGV100::emitSEL() +{ + emitFormA(0x007, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY); + emitNOT (90, insn->src(2)); + emitPRED (87, insn->src(2)); + if (insn->subOp == 1) + addInterp(0, 0, selpFlip); +} + +void +CodeEmitterGV100::emitSHFL() +{ + switch (insn->src(1).getFile()) { + case FILE_GPR: + switch (insn->src(2).getFile()) { + case FILE_GPR: + emitInsn(0x389); + emitGPR (64, insn->src(2)); + break; + case FILE_IMMEDIATE: + emitInsn(0x589); + emitIMMD(40, 13, insn->src(2)); + break; + default: + assert(!"bad src2 file"); + break; + } + emitGPR(32, insn->src(1)); + break; + case FILE_IMMEDIATE: + switch (insn->src(2).getFile()) { + case FILE_GPR: + emitInsn(0x989); + emitGPR (64, insn->src(2)); + break; + case FILE_IMMEDIATE: + emitInsn(0xf89); + emitIMMD(40, 13, insn->src(2)); + break; + default: + assert(!"bad src2 file"); + break; + } + emitIMMD(53, 5, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + + if (insn->defExists(1)) + emitPRED(81, insn->def(1)); + else + emitPRED(81); + + emitField(58, 2, insn->subOp); + emitGPR (24, insn->src(0)); + emitGPR (16, insn->def(0)); +} + +/******************************************************************************* + * fp32 + ******************************************************************************/ + +void +CodeEmitterGV100::emitFADD() +{ + if (insn->src(1).getFile() == FILE_GPR) + emitFormA(0x021, FA_RRR , NA(0), NA(1), EMPTY); + else + emitFormA(0x021, FA_RRI | FA_RRC, NA(0), EMPTY, NA(1)); + emitFMZ (80, 1); + emitRND (78); + emitSAT (77); +} + +void +CodeEmitterGV100::emitFFMA() +{ + emitFormA(0x023, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, NA(0), NA(1), NA(2)); + emitField(80, 1, insn->ftz); + emitRND (78); + emitSAT (77); + emitField(76, 1, insn->dnz); +} + +void +CodeEmitterGV100::emitFMNMX() +{ + emitFormA(0x009, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY); + emitField(90, 1, insn->op == OP_MAX); + emitPRED (87); + emitFMZ (80, 1); +} + +void +CodeEmitterGV100::emitFMUL() +{ + emitFormA(0x020, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY); + emitField(80, 1, insn->ftz); + emitPDIV (84); + emitRND (78); + emitSAT (77); + emitField(76, 1, insn->dnz); +} + +void +CodeEmitterGV100::emitFSET_BF() +{ + const CmpInstruction *insn = this->insn->asCmp(); + + emitFormA(0x00a, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY); + emitFMZ (80, 1); + emitCond4(76, insn->setCond); + + if (insn->op != OP_SET) { + switch (insn->op) { + case OP_SET_AND: emitField(74, 2, 0); break; + case OP_SET_OR : emitField(74, 2, 1); break; + case OP_SET_XOR: emitField(74, 2, 2); break; + default: + assert(!"invalid set op"); + break; + } + emitNOT (90, insn->src(2)); + emitPRED(87, insn->src(2)); + } else { + emitPRED(87); + } +} + +void +CodeEmitterGV100::emitFSETP() +{ + const CmpInstruction *insn = this->insn->asCmp(); + + emitFormA(0x00b, FA_NODEF | FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY); + emitFMZ (80, 1); + emitCond4(76, insn->setCond); + + if (insn->op != OP_SET) { + switch (insn->op) { + case OP_SET_AND: emitField(74, 2, 0); break; + case OP_SET_OR : emitField(74, 2, 1); break; + case OP_SET_XOR: emitField(74, 2, 2); break; + default: + assert(!"invalid set op"); + break; + } + emitNOT (90, insn->src(2)); + emitPRED(87, insn->src(2)); + } else { + emitPRED(87); + } + + if (insn->defExists(1)) + emitPRED(84, insn->def(1)); + else + emitPRED(84); + emitPRED(81, insn->def(0)); +} + +void +CodeEmitterGV100::emitFSWZADD() +{ + uint8_t subOp = 0; + + // NP/PN swapped vs SM60 + for (int i = 0; i < 4; i++) { + uint8_t p = ((insn->subOp >> (i * 2)) & 3); + if (p == 1 || p == 2) + p ^= 3; + subOp |= p << (i * 2); + } + + emitInsn (0x822); + emitFMZ (80, 1); + emitRND (78); + emitField(77, 1, insn->lanes); /* abused for .ndv */ + emitGPR (64, insn->src(1)); + emitField(32, 8, subOp); + emitGPR (24, insn->src(0)); + emitGPR (16, insn->def(0)); +} + +void +CodeEmitterGV100::emitMUFU() +{ + int mufu = 0; + + switch (insn->op) { + case OP_COS : mufu = 0; break; + case OP_SIN : mufu = 1; break; + case OP_EX2 : mufu = 2; break; + case OP_LG2 : mufu = 3; break; + case OP_RCP : mufu = 4 + 2 * insn->subOp; break; + case OP_RSQ : mufu = 5 + 2 * insn->subOp; break; + case OP_SQRT: mufu = 8; break; + default: + assert(!"invalid mufu"); + break; + } + + emitFormA(0x108, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY); + emitField(74, 4, mufu); +} + +/******************************************************************************* + * fp64 + ******************************************************************************/ + +void +CodeEmitterGV100::emitDADD() +{ + emitFormA(0x029, FA_RRR | FA_RRI | FA_RRC, NA(0), EMPTY, NA(1)); + emitRND(78); +} + +void +CodeEmitterGV100::emitDFMA() +{ + emitFormA(0x02b, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, NA(0), NA(1), NA(2)); + emitRND(78); +} + +void +CodeEmitterGV100::emitDMUL() +{ + emitFormA(0x028, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY); + emitRND(78); +} + +void +CodeEmitterGV100::emitDSETP() +{ + const CmpInstruction *insn = this->insn->asCmp(); + + if (insn->src(1).getFile() == FILE_GPR) + emitFormA(0x02a, FA_NODEF | FA_RRR , NA(0), NA(1), EMPTY); + else + emitFormA(0x02a, FA_NODEF | FA_RRI | FA_RRC, NA(0), EMPTY, NA(1)); + + if (insn->op != OP_SET) { + switch (insn->op) { + case OP_SET_AND: emitField(74, 2, 0); break; + case OP_SET_OR : emitField(74, 2, 1); break; + case OP_SET_XOR: emitField(74, 2, 2); break; + default: + assert(!"invalid set op"); + break; + } + emitNOT (90, insn->src(2)); + emitPRED(87, insn->src(2)); + } else { + emitPRED(87); + } + + if (insn->defExists(1)) + emitPRED(84, insn->def(1)); + else + emitPRED(84); + emitPRED (81, insn->def(0)); + emitCond4(76, insn->setCond); +} + +/******************************************************************************* + * integer + ******************************************************************************/ + +void +CodeEmitterGV100::emitBMSK() +{ + emitFormA(0x01b, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY); + emitField(75, 1, insn->subOp); // .C/.W +} + +void +CodeEmitterGV100::emitBREV() +{ + emitFormA(0x101, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY); +} + +void +CodeEmitterGV100::emitFLO() +{ + emitFormA(0x100, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY); + emitPRED (81); + emitField(74, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT); + emitField(73, 1, isSignedType(insn->dType)); + emitNOT (63, insn->src(0)); +} + +void +CodeEmitterGV100::emitIABS() +{ + emitFormA(0x013, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY); +} + +void +CodeEmitterGV100::emitIADD3() +{ +// emitFormA(0x010, FA_RRR | FA_RIR | FA_RCR, N_(0), N_(1), N_(2)); + emitFormA(0x010, FA_RRR | FA_RIR | FA_RCR, N_(0), N_(1), EMPTY); + emitGPR (64); //XXX: fix when switching back to N_(2) + emitPRED (84, NULL); // .CC1 + emitPRED (81, insn->flagsDef >= 0 ? insn->getDef(insn->flagsDef) : NULL); + if (insn->flagsSrc >= 0) { + emitField(74, 1, 1); // .X + emitPRED (87, insn->getSrc(insn->flagsSrc)); + emitField(77, 4, 0xf); // .X1 + } +} + +void +CodeEmitterGV100::emitIMAD() +{ + emitFormA(0x024, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), N_(2)); + emitField(73, 1, isSignedType(insn->sType)); +} + +void +CodeEmitterGV100::emitIMAD_WIDE() +{ + emitFormA(0x025, FA_RRR | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), N_(2)); + emitPRED (81); + emitField(73, 1, isSignedType(insn->sType)); +} + +void +CodeEmitterGV100::emitISETP() +{ + const CmpInstruction *insn = this->insn->asCmp(); + + emitFormA(0x00c, FA_NODEF | FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY); + + if (insn->op != OP_SET) { + switch (insn->op) { + case OP_SET_AND: emitField(74, 2, 0); break; + case OP_SET_OR : emitField(74, 2, 1); break; + case OP_SET_XOR: emitField(74, 2, 2); break; + default: + assert(!"invalid set op"); + break; + } + emitNOT (90, insn->src(2)); + emitPRED(87, insn->src(2)); + } else { + emitPRED(87); + } + + //XXX: CC->pred + if (insn->flagsSrc >= 0) { + assert(0); + emitField(68, 4, 6); + } else { + emitNOT (71); + if (!insn->subOp) + emitPRED(68); + } + + if (insn->defExists(1)) + emitPRED(84, insn->def(1)); + else + emitPRED(84); + emitPRED (81, insn->def(0)); + emitCond3(76, insn->setCond); + emitField(73, 1, isSignedType(insn->sType)); + + if (insn->subOp) { // .EX + assert(0); + emitField(72, 1, 1); + emitPRED (68, insn->srcExists(3) ? insn->src(3) : insn->src(2)); + } +} + +void +CodeEmitterGV100::emitLEA() +{ + assert(insn->src(1).get()->asImm()); + + emitFormA(0x011, FA_RRR | FA_RIR | FA_RCR, N_(0), N_(2), EMPTY); + emitPRED (81); + emitIMMD (75, 5, insn->src(1)); + emitGPR (64); +} + +void +CodeEmitterGV100::emitLOP3_LUT() +{ + emitFormA(0x012, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), __(2)); + emitField(90, 1, 1); + emitPRED (87); + emitPRED (81); + emitField(80, 1, 0); // .PAND + emitField(72, 8, insn->subOp); +} + +void +CodeEmitterGV100::emitPOPC() +{ + emitFormA(0x109, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY); + emitNOT (63, insn->src(0)); +} + +void +CodeEmitterGV100::emitSGXT() +{ + emitFormA(0x01a, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY); + emitField(75, 1, 0); // .W + emitField(73, 1, 1); // /.U32 +} + +void +CodeEmitterGV100::emitSHF() +{ + emitFormA(0x019, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), __(2)); + emitField(80, 1, !!(insn->subOp & NV50_IR_SUBOP_SHF_HI)); + emitField(76, 1, !!(insn->subOp & NV50_IR_SUBOP_SHF_R)); + emitField(75, 1, !!(insn->subOp & NV50_IR_SUBOP_SHF_W)); + + switch (insn->sType) { + case TYPE_S64: emitField(73, 2, 0); break; + case TYPE_U64: emitField(73, 2, 1); break; + case TYPE_S32: emitField(73, 2, 2); break; + case TYPE_U32: + default: + emitField(73, 2, 3); + break; + } +} + +/******************************************************************************* + * load/stores + ******************************************************************************/ + +void +CodeEmitterGV100::emitALD() +{ + emitInsn (0x321); + emitField(74, 2, (insn->getDef(0)->reg.size / 4) - 1); + emitGPR (32, insn->src(0).getIndirect(1)); + emitO (79); + emitP (76); + emitADDR (24, 40, 10, 0, insn->src(0)); + emitGPR (16, insn->def(0)); +} + +void +CodeEmitterGV100::emitAST() +{ + emitInsn (0x322); + emitField(74, 2, (typeSizeof(insn->dType) / 4) - 1); + emitGPR (64, insn->src(0).getIndirect(1)); + emitP (76); + emitADDR (24, 40, 10, 0, insn->src(0)); + emitGPR (32, insn->src(1)); +} + +void +CodeEmitterGV100::emitATOM() +{ + unsigned subOp, dType; + + if (insn->subOp != NV50_IR_SUBOP_ATOM_CAS) { + emitInsn(0x38a); + + if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) + subOp = 8; + else + subOp = insn->subOp; + emitField(87, 4, subOp); + + switch (insn->dType) { + case TYPE_U32 : dType = 0; break; + case TYPE_S32 : dType = 1; break; + case TYPE_U64 : dType = 2; break; + case TYPE_F32 : dType = 3; break; + case TYPE_B128: dType = 4; break; + case TYPE_S64 : dType = 5; break; + default: + assert(!"unexpected dType"); + dType = 0; + break; + } + emitField(73, 3, dType); + } else { + emitInsn(0x38b); + + switch (insn->dType) { + case TYPE_U32: dType = 0; break; + case TYPE_U64: dType = 2; break; + default: + assert(!"unexpected dType"); + dType = 0; + break; + } + emitField(73, 3, dType); + } + + emitPRED (81); + emitField(79, 2, 1); + emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8); + emitGPR (32, insn->src(1)); + emitADDR (24, 40, 24, 0, insn->src(0)); + emitGPR (16, insn->def(0)); +} + +void +CodeEmitterGV100::emitATOMS() +{ + unsigned dType, subOp; + + if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) { + switch (insn->dType) { + case TYPE_U32: dType = 0; break; + case TYPE_S32: dType = 1; break; + case TYPE_U64: dType = 2; break; + default: assert(!"unexpected dType"); dType = 0; break; + } + + emitInsn (0x38d); + emitField(87, 1, 0); // ATOMS.CAS/ATOMS.CAST + emitField(73, 2, dType); + emitGPR (64, insn->src(2)); + } else { + emitInsn(0x38c); + + if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) + subOp = 8; + else + subOp = insn->subOp; + emitField(87, 4, subOp); + + switch (insn->dType) { + case TYPE_U32: dType = 0; break; + case TYPE_S32: dType = 1; break; + case TYPE_U64: dType = 2; break; + default: assert(!"unexpected dType"); dType = 0; break; + } + + emitField(73, 2, dType); + } + + emitGPR (32, insn->src(1)); + emitADDR (24, 40, 24, 0, insn->src(0)); + emitGPR (16, insn->def(0)); +} + +void +CodeEmitterGV100::emitIPA() +{ + emitInsn (0x326); + emitPRED (81, insn->defExists(1) ? insn->def(1) : NULL); + + switch (insn->getInterpMode()) { + case NV50_IR_INTERP_LINEAR : + case NV50_IR_INTERP_PERSPECTIVE: emitField(78, 2, 0); break; + case NV50_IR_INTERP_FLAT : emitField(78, 2, 1); break; + case NV50_IR_INTERP_SC : emitField(78, 2, 2); break; + default: + assert(!"invalid ipa mode"); + break; + } + + if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET) { + switch (insn->getSampleMode()) { + case NV50_IR_INTERP_DEFAULT : emitField(76, 2, 0); break; + case NV50_IR_INTERP_CENTROID: emitField(76, 2, 1); break; + default: + break; + } + emitGPR (32); + } else { + emitField(76, 2, 2); + emitGPR (32, insn->src(1)); + } + + assert(!insn->src(0).isIndirect(0)); + emitADDR (-1, 64, 8, 2, insn->src(0)); + emitGPR (16, insn->def(0)); +} + +void +CodeEmitterGV100::emitISBERD() +{ + emitInsn(0x923); + emitGPR (24, insn->src(0)); + emitGPR (16, insn->def(0)); +} + +void +CodeEmitterGV100::emitLDSTc(int pos) +{ + int mode = 0; + + switch (insn->cache) { + case CACHE_CA: mode = 0; break; + case CACHE_CG: mode = 1; break; + case CACHE_CS: mode = 2; break; + case CACHE_CV: mode = 3; break; + default: + assert(!"invalid caching mode"); + break; + } + + emitField(pos, 2, mode); +} + +void +CodeEmitterGV100::emitLDSTs(int pos, DataType type) +{ + int data = 0; + + switch (typeSizeof(type)) { + case 1: data = isSignedType(type) ? 1 : 0; break; + case 2: data = isSignedType(type) ? 3 : 2; break; + case 4: data = 4; break; + case 8: data = 5; break; + case 16: data = 6; break; + default: + assert(!"bad type"); + break; + } + + emitField(pos, 3, data); +} + +void +CodeEmitterGV100::emitLD() +{ + emitInsn (0x980); + emitField(79, 2, 2); // .CONSTANT/./.STRONG/.MMIO + emitField(77, 2, 2); // .CTA/.SM/.GPU/.SYS + emitLDSTs(73, insn->dType); + emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8); + emitADDR (24, 32, 32, 0, insn->src(0)); + emitGPR (16, insn->def(0)); +} + +void +CodeEmitterGV100::emitLDC() +{ + emitFormA(0x182, FA_RCR, EMPTY, __(0), EMPTY); + emitField(78, 2, insn->subOp); + emitLDSTs(73, insn->dType); + emitGPR (24, insn->src(0).getIndirect(0)); +} + +void +CodeEmitterGV100::emitLDL() +{ + emitInsn (0x983); + emitField(84, 3, 1); // .EF/./.EL/.LU/.EU/.NA/.INVALID6/.INVALID7 + emitLDSTs(73, insn->dType); + emitADDR (24, 40, 24, 0, insn->src(0)); + emitGPR (16, insn->def(0)); +} + +void +CodeEmitterGV100::emitLDS() +{ + emitInsn (0x984); + emitLDSTs(73, insn->dType); + emitADDR (24, 40, 24, 0, insn->src(0)); + emitGPR (16, insn->def(0)); +} + +void +CodeEmitterGV100::emitOUT() +{ + const int cut = insn->op == OP_RESTART || insn->subOp; + const int emit = insn->op == OP_EMIT; + + if (insn->op != OP_FINAL) + emitFormA(0x124, FA_RRR | FA_RIR, __(0), __(1), EMPTY); + else + emitFormA(0x124, FA_RRR | FA_RIR, __(0), EMPTY, EMPTY); + emitField(78, 2, (cut << 1) | emit); +} + +void +CodeEmitterGV100::emitRED() +{ + unsigned dType; + + switch (insn->dType) { + case TYPE_U32: dType = 0; break; + case TYPE_S32: dType = 1; break; + case TYPE_U64: dType = 2; break; + case TYPE_F32: dType = 3; break; + case TYPE_B128: dType = 4; break; + case TYPE_S64: dType = 5; break; + default: assert(!"unexpected dType"); dType = 0; break; + } + + emitInsn (0x98e); + emitField(87, 3, insn->subOp); + emitField(84, 3, 1); // 0=.EF, 1=, 2=.EL, 3=.LU, 4=.EU, 5=.NA + emitField(79, 2, 2); // .INVALID0/./.STRONG/.INVALID3 + emitField(77, 2, 2); // .CTA/.SM/.GPU/.SYS + emitField(73, 3, dType); + emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8); + emitGPR (32, insn->src(1)); + emitADDR (24, 40, 24, 0, insn->src(0)); +} + +void +CodeEmitterGV100::emitST() +{ + emitInsn (0x385); + emitField(79, 2, 2); // .INVALID0/./.STRONG/.MMIO + emitField(77, 2, 2); // .CTA/.SM/.GPU/.SYS + emitLDSTs(73, insn->dType); + emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8); + emitGPR (64, insn->src(1)); + emitADDR (24, 32, 32, 0, insn->src(0)); +} + +void +CodeEmitterGV100::emitSTL() +{ + emitInsn (0x387); + emitField(84, 3, 1); // .EF/./.EL/.LU/.EU/.NA/.INVALID6/.INVALID7 + emitLDSTs(73, insn->dType); + emitADDR (24, 40, 24, 0, insn->src(0)); + emitGPR (32, insn->src(1)); +} + +void +CodeEmitterGV100::emitSTS() +{ + emitInsn (0x388); + emitLDSTs(73, insn->dType); + emitADDR (24, 40, 24, 0, insn->src(0)); + emitGPR (32, insn->src(1)); +} + +/******************************************************************************* + * texture + ******************************************************************************/ + +void +CodeEmitterGV100::emitTEXs(int pos) +{ + int src1 = insn->predSrc == 1 ? 2 : 1; + if (insn->srcExists(src1)) + emitGPR(pos, insn->src(src1)); + else + emitGPR(pos); +} + +void +CodeEmitterGV100::emitTEX() +{ + const TexInstruction *insn = this->insn->asTex(); + int lodm = 0; + + if (!insn->tex.levelZero) { + switch (insn->op) { + case OP_TEX: lodm = 0; break; + case OP_TXB: lodm = 2; break; + case OP_TXL: lodm = 3; break; + default: + assert(!"invalid tex op"); + break; + } + } else { + lodm = 1; + } + + if (insn->tex.rIndirectSrc < 0) { + emitInsn (0xb60); + emitField(54, 5, prog->driver->io.auxCBSlot); + emitField(40, 14, insn->tex.r); + } else { + emitInsn (0x361); + emitField(59, 1, 1); // .B + } + emitField(90, 1, insn->tex.liveOnly); // .NODEP + emitField(87, 3, lodm); + emitField(84, 3, 1); // 0=.EF, 1=, 2=.EL, 3=.LU, 4=.EU, 5=.NA + emitField(78, 1, insn->tex.target.isShadow()); // .DC + emitField(77, 1, insn->tex.derivAll); // .NDV + emitField(76, 1, insn->tex.useOffsets == 1); // .AOFFI + emitPRED (81); + emitGPR (64, insn->def(1)); + emitGPR (16, insn->def(0)); + emitGPR (24, insn->src(0)); + emitTEXs (32); + emitField(63, 1, insn->tex.target.isArray()); + emitField(61, 2, insn->tex.target.isCube() ? 3 : + insn->tex.target.getDim() - 1); + emitField(72, 4, insn->tex.mask); +} + +void +CodeEmitterGV100::emitTLD() +{ + const TexInstruction *insn = this->insn->asTex(); + + if (insn->tex.rIndirectSrc < 0) { + emitInsn (0xb66); + emitField(54, 5, prog->driver->io.auxCBSlot); + emitField(40, 14, insn->tex.r); + } else { + emitInsn (0x367); + emitField(59, 1, 1); // .B + } + emitField(90, 1, insn->tex.liveOnly); + emitField(87, 3, insn->tex.levelZero ? 1 /* .LZ */ : 3 /* .LL */); + emitPRED (81); + emitField(78, 1, insn->tex.target.isMS()); + emitField(76, 1, insn->tex.useOffsets == 1); + emitField(72, 4, insn->tex.mask); + emitGPR (64, insn->def(1)); + emitField(63, 1, insn->tex.target.isArray()); + emitField(61, 2, insn->tex.target.isCube() ? 3 : + insn->tex.target.getDim() - 1); + emitTEXs (32); + emitGPR (24, insn->src(0)); + emitGPR (16, insn->def(0)); +} + +void +CodeEmitterGV100::emitTLD4() +{ + const TexInstruction *insn = this->insn->asTex(); + + if (insn->tex.rIndirectSrc < 0) { + emitInsn (0xb63); + emitField(54, 5, prog->driver->io.auxCBSlot); + emitField(40, 14, insn->tex.r); + } else { + emitInsn (0x364); + emitField(59, 1, 1); // .B + } + emitField(90, 1, insn->tex.liveOnly); + emitField(87, 2, insn->tex.gatherComp); + emitField(84, 1, 1); // !.EF + emitPRED (81); + emitField(78, 1, insn->tex.target.isShadow()); + emitField(77, 2, insn->tex.useOffsets == 4); + emitField(76, 2, insn->tex.useOffsets == 1); + emitField(72, 4, insn->tex.mask); + emitGPR (64, insn->def(1)); + emitField(63, 1, insn->tex.target.isArray()); + emitField(61, 2, insn->tex.target.isCube() ? 3 : + insn->tex.target.getDim() - 1); + emitTEXs (32); + emitGPR (24, insn->src(0)); + emitGPR (16, insn->def(0)); +} + +void +CodeEmitterGV100::emitTMML() +{ + const TexInstruction *insn = this->insn->asTex(); + + if (insn->tex.rIndirectSrc < 0) { + emitInsn (0xb69); + emitField(54, 5, prog->driver->io.auxCBSlot); + emitField(40, 14, insn->tex.r); + } else { + emitInsn (0x36a); + emitField(59, 1, 1); // .B + } + emitField(90, 1, insn->tex.liveOnly); + emitField(77, 1, insn->tex.derivAll); + emitField(72, 4, insn->tex.mask); + emitGPR (64, insn->def(1)); + emitField(63, 1, insn->tex.target.isArray()); + emitField(61, 2, insn->tex.target.isCube() ? 3 : + insn->tex.target.getDim() - 1); + emitTEXs (32); + emitGPR (24, insn->src(0)); + emitGPR (16, insn->def(0)); +} + +void +CodeEmitterGV100::emitTXD() +{ + const TexInstruction *insn = this->insn->asTex(); + + if (insn->tex.rIndirectSrc < 0) { + emitInsn (0xb6c); + emitField(54, 5, prog->driver->io.auxCBSlot); + emitField(40, 14, insn->tex.r); + } else { + emitInsn (0x36d); + emitField(59, 1, 1); // .B + } + emitField(90, 1, insn->tex.liveOnly); + emitPRED (81); + emitField(76, 1, insn->tex.useOffsets == 1); + emitField(72, 4, insn->tex.mask); + emitGPR (64, insn->def(1)); + emitField(63, 1, insn->tex.target.isArray()); + emitField(61, 2, insn->tex.target.isCube() ? 3 : + insn->tex.target.getDim() - 1); + emitTEXs (32); + emitGPR (24, insn->src(0)); + emitGPR (16, insn->def(0)); +} + +void +CodeEmitterGV100::emitTXQ() +{ + const TexInstruction *insn = this->insn->asTex(); + int type = 0; + + switch (insn->tex.query) { + case TXQ_DIMS : type = 0x00; break; + case TXQ_TYPE : type = 0x01; break; + case TXQ_SAMPLE_POSITION: type = 0x02; break; + default: + assert(!"invalid txq query"); + break; + } + + if (insn->tex.rIndirectSrc < 0) { + emitInsn (0xb6f); + emitField(54, 5, prog->driver->io.auxCBSlot); + emitField(40, 14, insn->tex.r); + } else { + emitInsn (0x370); + emitField(59, 1, 1); // .B + } + emitField(90, 1, insn->tex.liveOnly); + emitField(72, 4, insn->tex.mask); + emitGPR (64, insn->def(1)); + emitField(62, 2, type); + emitGPR (24, insn->src(0)); + emitGPR (16, insn->def(0)); +} + +/******************************************************************************* + * surface + ******************************************************************************/ + +void +CodeEmitterGV100::emitSUHandle(const int s) +{ + const TexInstruction *insn = this->insn->asTex(); + + assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP); + + if (insn->src(s).getFile() == FILE_GPR) { + emitGPR(64, insn->src(s)); + } else { + assert(0); + //XXX: not done + ImmediateValue *imm = insn->getSrc(s)->asImm(); + assert(imm); + emitField(0x33, 1, 1); + emitField(0x24, 13, imm->reg.data.u32); + } +} + +void +CodeEmitterGV100::emitSUTarget() +{ + const TexInstruction *insn = this->insn->asTex(); + int target = 0; + + assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP); + + if (insn->tex.target == TEX_TARGET_BUFFER) { + target = 1; + } else if (insn->tex.target == TEX_TARGET_1D_ARRAY) { + target = 2; + } else if (insn->tex.target == TEX_TARGET_2D || + insn->tex.target == TEX_TARGET_RECT) { + target = 3; + } else if (insn->tex.target == TEX_TARGET_2D_ARRAY || + insn->tex.target == TEX_TARGET_CUBE || + insn->tex.target == TEX_TARGET_CUBE_ARRAY) { + target = 4; + } else if (insn->tex.target == TEX_TARGET_3D) { + target = 5; + } else { + assert(insn->tex.target == TEX_TARGET_1D); + } + emitField(61, 3, target); +} + +void +CodeEmitterGV100::emitSUATOM() +{ + const TexInstruction *insn = this->insn->asTex(); + uint8_t type = 0, subOp; + + if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) + emitInsn(0x396); // SUATOM.D.CAS + else + emitInsn(0x394); // SUATOM.D + + emitSUTarget(); + + // destination type + switch (insn->dType) { + case TYPE_S32: type = 1; break; + case TYPE_U64: type = 2; break; + case TYPE_F32: type = 3; break; + case TYPE_S64: type = 5; break; + default: + assert(insn->dType == TYPE_U32); + break; + } + + // atomic operation + if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) { + subOp = 0; + } else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) { + subOp = 8; + } else { + subOp = insn->subOp; + } + + emitField(87, 4, subOp); + emitPRED (81); + emitField(79, 2, 1); + emitField(73, 3, type); + emitField(72, 1, 0); // .BA + emitGPR (32, insn->src(1)); + emitGPR (24, insn->src(0)); + emitGPR (16, insn->def(0)); + + emitSUHandle(2); +} + +void +CodeEmitterGV100::emitSULD() +{ + const TexInstruction *insn = this->insn->asTex(); + int type = 0; + + if (insn->op == OP_SULDB) { + emitInsn(0x99a); + emitSUTarget(); + + switch (insn->dType) { + case TYPE_U8: type = 0; break; + case TYPE_S8: type = 1; break; + case TYPE_U16: type = 2; break; + case TYPE_S16: type = 3; break; + case TYPE_U32: type = 4; break; + case TYPE_U64: type = 5; break; + case TYPE_B128: type = 6; break; + default: + assert(0); + break; + } + // emitLDSTc(0x18); + emitField(73, 3, type); + } else { + emitInsn(0x998); + emitSUTarget(); + emitField(72, 4, 0xf); // rgba + } + + emitPRED (81); + emitField(79, 2, 1); + + emitGPR (16, insn->def(0)); + emitGPR (24, insn->src(0)); + + emitSUHandle(1); +} + +void +CodeEmitterGV100::emitSUST() +{ + const TexInstruction *insn = this->insn->asTex(); + + emitInsn(0x99c); // SUST.P +#if 0 + if (insn->op == OP_SUSTB) + emitField(0x34, 1, 1); +#endif + emitSUTarget(); + + +#if 0 + emitLDSTc(0x18); +#endif + + emitField(79, 2, 1); + emitField(72, 4, 0xf); // rgba + emitGPR(32, insn->src(1)); + emitGPR(24, insn->src(0)); + emitSUHandle(2); +} + +/******************************************************************************* + * misc + ******************************************************************************/ + +void +CodeEmitterGV100::emitAL2P() +{ + emitInsn (0x920); + emitO (79); + emitField(74, 2, (insn->getDef(0)->reg.size / 4) - 1); + emitField(40, 11, insn->src(0).get()->reg.data.offset); + emitGPR (24, insn->src(0).getIndirect(0)); + emitGPR (16, insn->def(0)); +} + +void +CodeEmitterGV100::emitBAR() +{ + uint8_t subop, redop = 0x00; + + // 80 + // 01: DEFER_BLOCKING + // 78:77 + // 00: SYNC + // 01: ARV + // 02: RED + // 03: SCAN + // 75:74 + // 00: RED.POPC + // 01: RED.AND + // 02: RED.OR + + switch (insn->subOp) { + case NV50_IR_SUBOP_BAR_RED_POPC: subop = 0x02; redop = 0x00; break; + case NV50_IR_SUBOP_BAR_RED_AND : subop = 0x02; redop = 0x01; break; + case NV50_IR_SUBOP_BAR_RED_OR : subop = 0x02; redop = 0x02; break; + case NV50_IR_SUBOP_BAR_ARRIVE : subop = 0x01; break; + default: + subop = 0x00; + assert(insn->subOp == NV50_IR_SUBOP_BAR_SYNC); + break; + } + + if (insn->src(0).getFile() == FILE_GPR) { + emitInsn ((1 << 9) | 0x11d); + emitGPR (32, insn->src(0)); //XXX: nvdisasm shows src0==src1 + } else { + ImmediateValue *imm = insn->getSrc(0)->asImm(); + assert(imm); + if (insn->src(1).getFile() == FILE_GPR) { + emitInsn ((4 << 9) | 0x11d); + emitGPR (32, insn->src(1)); + } else { + emitInsn ((5 << 9) | 0x11d); + } + emitField(54, 4, imm->reg.data.u32); + } + + emitField(77, 2, subop); + emitField(74, 2, redop); + + if (insn->srcExists(2) && (insn->predSrc != 2)) { + emitField(90, 1, insn->src(2).mod == Modifier(NV50_IR_MOD_NOT)); + emitPRED (87, insn->src(2)); + } else { + emitField(87, 3, 7); + } +} + +void +CodeEmitterGV100::emitCCTL() +{ + if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL) + emitInsn(0x98f); + else + emitInsn(0x990); + emitField(87, 4, insn->subOp); + emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8); + emitADDR (24, 32, 32, 0, insn->src(0)); +} + +void +CodeEmitterGV100::emitMEMBAR() +{ + emitInsn (0x992); + switch (NV50_IR_SUBOP_MEMBAR_SCOPE(insn->subOp)) { + case NV50_IR_SUBOP_MEMBAR_CTA: emitField(76, 3, 0); break; + case NV50_IR_SUBOP_MEMBAR_GL : emitField(76, 3, 2); break; + case NV50_IR_SUBOP_MEMBAR_SYS: emitField(76, 3, 3); break; + default: + assert(!"invalid scope"); + break; + } +} + +void +CodeEmitterGV100::emitPIXLD() +{ + emitInsn (0x925); + switch (insn->subOp) { + case NV50_IR_SUBOP_PIXLD_COVMASK : emitField(78, 3, 1); break; // .COVMASK + case NV50_IR_SUBOP_PIXLD_SAMPLEID: emitField(78, 3, 3); break; // .MY_INDEX + default: + assert(0); + break; + } + emitPRED (71); + emitGPR (16, insn->def(0)); +} + +void +CodeEmitterGV100::emitPLOP3_LUT() +{ + uint8_t op[2] = {}; + + switch (insn->op) { + case OP_AND: op[0] = 0xf0 & 0xcc; break; + case OP_OR : op[0] = 0xf0 | 0xcc; break; + case OP_XOR: op[0] = 0xf0 ^ 0xcc; break; + default: + assert(!"invalid PLOP3"); + break; + } + + emitInsn(0x81c); + emitNOT (90, insn->src(0)); + emitPRED(87, insn->src(0)); + emitPRED(84); // def(1) + emitPRED(81, insn->def(0)); + emitNOT (80, insn->src(1)); + emitPRED(77, insn->src(1)); + emitField(72, 5, op[0] >> 3); + emitNOT (71); // src(2) + emitPRED(68); // src(2) + emitField(64, 3, op[0] & 7); + emitField(16, 8, op[1]); +} + +void +CodeEmitterGV100::emitVOTE() +{ + const ImmediateValue *imm; + uint32_t u32; + + int r = -1, p = -1; + for (int i = 0; insn->defExists(i); i++) { + if (insn->def(i).getFile() == FILE_GPR) + r = i; + else if (insn->def(i).getFile() == FILE_PREDICATE) + p = i; + } + + emitInsn (0x806); + emitField(72, 2, insn->subOp); + if (r >= 0) + emitGPR (16, insn->def(r)); + else + emitGPR (16); + if (p >= 0) + emitPRED (81, insn->def(p)); + else + emitPRED (81); + + switch (insn->src(0).getFile()) { + case FILE_PREDICATE: + emitField(90, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT)); + emitPRED (87, insn->src(0)); + break; + case FILE_IMMEDIATE: + imm = insn->getSrc(0)->asImm(); + assert(imm); + u32 = imm->reg.data.u32; + assert(u32 == 0 || u32 == 1); + emitField(90, 1, u32 == 0); + emitPRED (87); + break; + default: + assert(!"Unhandled src"); + break; + } +} + +bool +CodeEmitterGV100::emitInstruction(Instruction *i) +{ + insn = i; + + switch (insn->op) { + case OP_ABS: + assert(!isFloatType(insn->dType)); + emitIABS(); + break; + case OP_ADD: + if (isFloatType(insn->dType)) { + if (insn->dType == TYPE_F32) + emitFADD(); + else + emitDADD(); + } else { + emitIADD3(); + } + break; + case OP_AFETCH: + emitAL2P(); + break; + case OP_AND: + case OP_OR: + case OP_XOR: + if (insn->def(0).getFile() == FILE_PREDICATE) { + emitPLOP3_LUT(); + } else { + assert(!"invalid logop"); + emitNOP(); + } + break; + case OP_ATOM: + if (insn->src(0).getFile() == FILE_MEMORY_SHARED) + emitATOMS(); + else + if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS) + emitRED(); + else + emitATOM(); + break; + case OP_BAR: + emitBAR(); + break; + case OP_BFIND: + emitFLO(); + break; + case OP_BMSK: + emitBMSK(); + break; + case OP_BREV: + emitBREV(); + break; + case OP_BRA: + case OP_JOIN: //XXX + emitBRA(); + break; + case OP_CCTL: + emitCCTL(); + break; + case OP_CEIL: + case OP_CVT: + case OP_FLOOR: + case OP_TRUNC: + if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE || + insn->src(0).getFile() == FILE_PREDICATE)) { + emitMOV(); + } else if (isFloatType(insn->dType)) { + if (isFloatType(insn->sType)) { + if (insn->sType == insn->dType) + emitFRND(); + else + emitF2F(); + } else { + emitI2F(); + } + } else { + if (isFloatType(insn->sType)) { + emitF2I(); + } else { + assert(!"I2I"); + emitNOP(); + } + } + break; + case OP_COS: + case OP_EX2: + case OP_LG2: + case OP_RCP: + case OP_RSQ: + case OP_SIN: + case OP_SQRT: + emitMUFU(); + break; + case OP_DISCARD: + emitKILL(); + break; + case OP_EMIT: + case OP_FINAL: + case OP_RESTART: + emitOUT(); + break; + case OP_EXIT: + emitEXIT(); + break; + case OP_EXPORT: + emitAST(); + break; + case OP_FMA: + case OP_MAD: + if (isFloatType(insn->dType)) { + if (insn->dType == TYPE_F32) + emitFFMA(); + else + emitDFMA(); + } else { + if (typeSizeof(insn->dType) != 8) + emitIMAD(); + else + emitIMAD_WIDE(); + } + break; + case OP_JOINAT: //XXX + emitNOP(); + break; + case OP_LINTERP: + emitIPA(); + break; + case OP_LOAD: + switch (insn->src(0).getFile()) { + case FILE_MEMORY_CONST : emitLDC(); break; + case FILE_MEMORY_LOCAL : emitLDL(); break; + case FILE_MEMORY_SHARED: emitLDS(); break; + case FILE_MEMORY_GLOBAL: emitLD(); break; + default: + assert(!"invalid load"); + emitNOP(); + break; + } + break; + case OP_LOP3_LUT: + emitLOP3_LUT(); + break; + case OP_MAX: + case OP_MIN: + if (isFloatType(insn->dType)) { + if (insn->dType == TYPE_F32) { + emitFMNMX(); + } else { + assert(!"invalid FMNMX"); + emitNOP(); + } + } else { + assert(!"invalid MNMX"); + emitNOP(); + } + break; + case OP_MEMBAR: + emitMEMBAR(); + break; + case OP_MOV: + emitMOV(); + break; + case OP_MUL: + if (isFloatType(insn->dType)) { + if (insn->dType == TYPE_F32) + emitFMUL(); + else + emitDMUL(); + } else { + assert(!"invalid IMUL"); + emitNOP(); + } + break; + case OP_PERMT: + emitPRMT(); + break; + case OP_PFETCH: + emitISBERD(); + break; + case OP_PIXLD: + emitPIXLD(); + break; + case OP_POPCNT: + emitPOPC(); + break; + case OP_QUADOP: + emitFSWZADD(); + break; + case OP_RDSV: + if (targ->isCS2RSV(insn->getSrc(0)->reg.data.sv.sv)) + emitCS2R(); + else + emitS2R(); + break; + case OP_SELP: + emitSEL(); + break; + case OP_SET: + case OP_SET_AND: + case OP_SET_OR: + case OP_SET_XOR: + if (insn->def(0).getFile() != FILE_PREDICATE) { + if (isFloatType(insn->dType)) { + if (insn->dType == TYPE_F32) { + emitFSET_BF(); + } else { + assert(!"invalid FSET"); + emitNOP(); + } + } else { + assert(!"invalid SET"); + emitNOP(); + } + } else { + if (isFloatType(insn->sType)) + if (insn->sType == TYPE_F64) + emitDSETP(); + else + emitFSETP(); + else + emitISETP(); + } + break; + case OP_SGXT: + emitSGXT(); + break; + case OP_SHF: + emitSHF(); + break; + case OP_SHFL: + emitSHFL(); + break; + case OP_SHLADD: + emitLEA(); + break; + case OP_STORE: + switch (insn->src(0).getFile()) { + case FILE_MEMORY_LOCAL : emitSTL(); break; + case FILE_MEMORY_SHARED: emitSTS(); break; + case FILE_MEMORY_GLOBAL: emitST(); break; + default: + assert(!"invalid store"); + emitNOP(); + break; + } + break; + case OP_SULDB: + case OP_SULDP: + emitSULD(); + break; + case OP_SUREDB: + case OP_SUREDP: + emitSUATOM(); + break; + case OP_SUSTB: + case OP_SUSTP: + emitSUST(); + break; + case OP_TEX: + case OP_TXB: + case OP_TXL: + emitTEX(); + break; + case OP_TXD: + emitTXD(); + break; + case OP_TXF: + emitTLD(); + break; + case OP_TXG: + emitTLD4(); + break; + case OP_TXLQ: + emitTMML(); + break; + case OP_TXQ: + emitTXQ(); + break; + case OP_VFETCH: + emitALD(); + break; + case OP_VOTE: + emitVOTE(); + break; + case OP_WARPSYNC: + emitWARPSYNC(); + break; + default: + assert(!"invalid opcode"); + emitNOP(); + break; + } + + code[3] &= 0x000001ff; + code[3] |= insn->sched << 9; + code += 4; + codeSize += 16; + return true; +} + +void +CodeEmitterGV100::prepareEmission(BasicBlock *bb) +{ + Function *func = bb->getFunction(); + Instruction *i; + int j; + + for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j); + + for (; j >= 0; --j) { + BasicBlock *in = func->bbArray[j]; + Instruction *exit = in->getExit(); + + if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) { + in->binSize -= 16; + func->binSize -= 16; + + for (++j; j < func->bbCount; ++j) + func->bbArray[j]->binPos -= 16; + + in->remove(exit); + } + bb->binPos = in->binPos + in->binSize; + if (in->binSize) // no more no-op branches to bb + break; + } + func->bbArray[func->bbCount++] = bb; + + if (!bb->getExit()) + return; + + for (i = bb->getEntry(); i; i = i->next) { + i->encSize = getMinEncodingSize(i); + bb->binSize += i->encSize; + } + + assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 16)); + + func->binSize += bb->binSize; +} + +void +CodeEmitterGV100::prepareEmission(Function *func) +{ + SchedDataCalculatorGM107 sched(targ); + CodeEmitter::prepareEmission(func); + sched.run(func, true, true); +} + +void +CodeEmitterGV100::prepareEmission(Program *prog) +{ + for (ArrayList::Iterator fi = prog->allFuncs.iterator(); + !fi.end(); fi.next()) { + Function *func = reinterpret_cast(fi.get()); + func->binPos = prog->binSize; + prepareEmission(func); + prog->binSize += func->binSize; + } + + this->prog = prog; +} + +CodeEmitterGV100::CodeEmitterGV100(TargetGV100 *target) + : CodeEmitter(target), targ(target) +{ + code = NULL; + codeSize = codeSizeLimit = 0; + relocInfo = NULL; +} +}; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.h new file mode 100644 index 00000000000..e97bf6580a1 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.h @@ -0,0 +1,403 @@ +/* + * Copyright 2020 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef __NV50_IR_EMIT_GV100_H__ +#define __NV50_IR_EMIT_GV100_H__ +#include "codegen/nv50_ir_target_gv100.h" + +namespace nv50_ir { + +class CodeEmitterGV100 : public CodeEmitter { +public: + CodeEmitterGV100(TargetGV100 *target); + + virtual bool emitInstruction(Instruction *); + virtual uint32_t getMinEncodingSize(const Instruction *) const { return 16; } + +private: + const Program *prog; + const TargetGV100 *targ; + const Instruction *insn; + + virtual void prepareEmission(Program *); + virtual void prepareEmission(Function *); + virtual void prepareEmission(BasicBlock *); + + inline void emitInsn(uint32_t op) { + code[0] = op; + code[1] = 0; + code[2] = 0; + code[3] = 0; + if (insn->predSrc >= 0) { + emitField(12, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id); + emitField(15, 1, insn->cc == CC_NOT_P); + } else { + emitField(12, 3, 7); + } + }; + + inline void emitField(int b, int s, uint64_t v) { + if (b >= 0) { + uint64_t m = ~0ULL >> (64 - s); + uint64_t d = v & m; + assert(!(v & ~m) || (v & ~m) == ~m); + if (b < 64 && b + s > 64) { + *(uint64_t *)&code[0] |= d << b; + *(uint64_t *)&code[2] |= d >> (64 - b); + } else { + *(uint64_t *)&code[(b/64*2)] |= d << (b & 0x3f); + } + } + }; + + inline void emitABS(int pos, int src, bool supported) + { + if (insn->src(src).mod.abs()) { + assert(supported); + emitField(pos, 1, 1); + } + } + + inline void emitABS(int pos, int src) + { + emitABS(pos, src, true); + } + + inline void emitNEG(int pos, int src, bool supported) { + if (insn->src(src).mod.neg()) { + assert(supported); + emitField(pos, 1, 1); + } + } + + inline void emitNEG(int pos, int src) { + emitNEG(pos, src, true); + } + + inline void emitNOT(int pos) { + emitField(pos, 1, 0); + }; + + inline void emitNOT(int pos, const ValueRef &ref) { + emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT))); + } + + inline void emitSAT(int pos) { + emitField(pos, 1, insn->saturate); + } + + inline void emitRND(int rmp, RoundMode rnd, int rip) { + int rm = 0, ri = 0; + switch (rnd) { + case ROUND_NI: ri = 1; + case ROUND_N : rm = 0; break; + case ROUND_MI: ri = 1; + case ROUND_M : rm = 1; break; + case ROUND_PI: ri = 1; + case ROUND_P : rm = 2; break; + case ROUND_ZI: ri = 1; + case ROUND_Z : rm = 3; break; + default: + assert(!"invalid round mode"); + break; + } + emitField(rip, 1, ri); + emitField(rmp, 2, rm); + } + + inline void emitRND(int pos) { + emitRND(pos, insn->rnd, -1); + } + + inline void emitFMZ(int pos, int len) { + emitField(pos, len, insn->dnz << 1 | insn->ftz); + } + + inline void emitPDIV(int pos) { + emitField(pos, 3, insn->postFactor + 4); + } + + inline void emitO(int pos) { + emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT); + } + + inline void emitP(int pos) { + emitField(pos, 1, insn->perPatch); + } + + inline void emitCond3(int pos, CondCode code) { + int data = 0; + + switch (code) { + case CC_FL : data = 0x00; break; + case CC_LTU: + case CC_LT : data = 0x01; break; + case CC_EQU: + case CC_EQ : data = 0x02; break; + case CC_LEU: + case CC_LE : data = 0x03; break; + case CC_GTU: + case CC_GT : data = 0x04; break; + case CC_NEU: + case CC_NE : data = 0x05; break; + case CC_GEU: + case CC_GE : data = 0x06; break; + case CC_TR : data = 0x07; break; + default: + assert(!"invalid cond3"); + break; + } + + emitField(pos, 3, data); + } + + inline void emitCond4(int pos, CondCode code) { + int data = 0; + + switch (code) { + case CC_FL: data = 0x00; break; + case CC_LT: data = 0x01; break; + case CC_EQ: data = 0x02; break; + case CC_LE: data = 0x03; break; + case CC_GT: data = 0x04; break; + case CC_NE: data = 0x05; break; + case CC_GE: data = 0x06; break; + // case CC_NUM: data = 0x07; break; + // case CC_NAN: data = 0x08; break; + case CC_LTU: data = 0x09; break; + case CC_EQU: data = 0x0a; break; + case CC_LEU: data = 0x0b; break; + case CC_GTU: data = 0x0c; break; + case CC_NEU: data = 0x0d; break; + case CC_GEU: data = 0x0e; break; + case CC_TR: data = 0x0f; break; + default: + assert(!"invalid cond4"); + break; + } + + emitField(pos, 4, data); + } + + inline void emitSYS(int pos, const Value *val) { + int id = val ? val->reg.data.id : -1; + + switch (id) { + case SV_LANEID : id = 0x00; break; + case SV_VERTEX_COUNT : id = 0x10; break; + case SV_INVOCATION_ID : id = 0x11; break; + case SV_THREAD_KILL : id = 0x13; break; + case SV_INVOCATION_INFO: id = 0x1d; break; + case SV_COMBINED_TID : id = 0x20; break; + case SV_TID : id = 0x21 + val->reg.data.sv.index; break; + case SV_CTAID : id = 0x25 + val->reg.data.sv.index; break; + case SV_LANEMASK_EQ : id = 0x38; break; + case SV_LANEMASK_LT : id = 0x39; break; + case SV_LANEMASK_LE : id = 0x3a; break; + case SV_LANEMASK_GT : id = 0x3b; break; + case SV_LANEMASK_GE : id = 0x3c; break; + case SV_CLOCK : id = 0x50 + val->reg.data.sv.index; break; + default: + assert(!"invalid system value"); + id = 0; + break; + } + + emitField(pos, 8, id); + } + + inline void emitSYS(int pos, const ValueRef &ref) { + emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL); + } + + inline void emitGPR(int pos, const Value *val, int off) { + emitField(pos, 8, val && !val->inFile(FILE_FLAGS) ? + val->reg.data.id + off: 255); + } + + inline void emitGPR(int pos, const Value *v) { + emitGPR(pos, v, 0); + } + + inline void emitGPR(int pos) { + emitGPR(pos, (const Value *)NULL); + } + + inline void emitGPR(int pos, const ValueRef &ref) { + emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL); + } + + inline void emitGPR(int pos, const ValueRef *ref) { + emitGPR(pos, ref ? ref->rep() : (const Value *)NULL); + } + + inline void emitGPR(int pos, const ValueDef &def) { + emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL); + } + + inline void emitGPR(int pos, const ValueDef &def, int off) { + emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL, off); + } + + inline void emitPRED(int pos, const Value *val) { + emitField(pos, 3, val ? val->reg.data.id : 7); + }; + + inline void emitPRED(int pos) { + emitPRED(pos, (const Value *)NULL); + } + + inline void emitPRED(int pos, const ValueRef &ref) { + emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL); + } + + inline void emitPRED(int pos, const ValueDef &def) { + emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL); + } + + inline void emitCBUF(int buf, int gpr, int off, int len, int align, + const ValueRef &ref) { + const Value *v = ref.get(); + const Symbol *s = v->asSym(); + + assert(!(s->reg.data.offset & ((1 << align) - 1))); + + emitField(buf, 5, v->reg.fileIndex); + if (gpr >= 0) + emitGPR(gpr, ref.getIndirect(0)); + emitField(off, 16, s->reg.data.offset); + } + + inline void emitIMMD(int pos, int len, const ValueRef &ref) { + const ImmediateValue *imm = ref.get()->asImm(); + uint32_t val = imm->reg.data.u32; + + if (insn->sType == TYPE_F64) { + assert(!(imm->reg.data.u64 & 0x00000000ffffffffULL)); + val = imm->reg.data.u64 >> 32; + } + + emitField(pos, len, val); + } + + inline void emitADDR(int gpr, int off, int len, int shr, + const ValueRef &ref) { + const Value *v = ref.get(); + assert(!(v->reg.data.offset & ((1 << shr) - 1))); + if (gpr >= 0) + emitGPR(gpr, ref.getIndirect(0)); + emitField(off, len, v->reg.data.offset >> shr); + } + + inline void emitFormA(uint16_t op, uint8_t forms, int src0, int src1, int src2); + inline void emitFormA_RRR(uint16_t op, int src1, int src2); + inline void emitFormA_RRI(uint16_t op, int src1, int src2); + inline void emitFormA_RRC(uint16_t op, int src1, int src2); + inline void emitFormA_I32(int src); + + void emitBRA(); + void emitEXIT(); + void emitKILL(); + void emitNOP(); + void emitWARPSYNC(); + + void emitCS2R(); + void emitF2F(); + void emitF2I(); + void emitFRND(); + void emitI2F(); + void emitMOV(); + void emitPRMT(); + void emitS2R(); + void emitSEL(); + void emitSHFL(); + + void emitFADD(); + void emitFFMA(); + void emitFMNMX(); + void emitFMUL(); + void emitFSET_BF(); + void emitFSETP(); + void emitFSWZADD(); + void emitMUFU(); + + void emitDADD(); + void emitDFMA(); + void emitDMUL(); + void emitDSETP(); + + void emitBMSK(); + void emitBREV(); + void emitFLO(); + void emitIABS(); + void emitIADD3(); + void emitIMAD(); + void emitIMAD_WIDE(); + void emitISETP(); + void emitLEA(); + void emitLOP3_LUT(); + void emitPOPC(); + void emitSGXT(); + void emitSHF(); + + void emitALD(); + void emitAST(); + void emitATOM(); + void emitATOMS(); + void emitIPA(); + void emitISBERD(); + void emitLDSTc(int); + void emitLDSTs(int, DataType); + void emitLD(); + void emitLDC(); + void emitLDL(); + void emitLDS(); + void emitOUT(); + void emitRED(); + void emitST(); + void emitSTL(); + void emitSTS(); + + void emitTEXs(int); + void emitTEX(); + void emitTLD(); + void emitTLD4(); + void emitTMML(); + void emitTXD(); + void emitTXQ(); + + void emitSUHandle(const int); + void emitSUTarget(); + void emitSUATOM(); + void emitSULD(); + void emitSUST(); + + void emitAL2P(); + void emitBAR(); + void emitCCTL(); + void emitMEMBAR(); + void emitPIXLD(); + void emitPLOP3_LUT(); + void emitVOTE(); +}; + +}; +#endif diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index a54facf8c6e..171006eacfa 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -3356,21 +3356,21 @@ static nir_shader_compiler_options nvir_nir_shader_compiler_options(int chipset) { return { - .lower_fdiv = false, + .lower_fdiv = (chipset >= NVISA_GV100_CHIPSET), .lower_ffma = false, .fuse_ffma = false, /* nir doesn't track mad vs fma */ - .lower_flrp16 = false, + .lower_flrp16 = (chipset >= NVISA_GV100_CHIPSET), .lower_flrp32 = true, .lower_flrp64 = true, - .lower_fpow = false, + .lower_fpow = false, // TODO: nir's lowering is broken, or we could use it .lower_fsat = false, .lower_fsqrt = false, // TODO: only before gm200 .lower_sincos = false, .lower_fmod = true, .lower_bitfield_extract = false, - .lower_bitfield_extract_to_shifts = false, + .lower_bitfield_extract_to_shifts = (chipset >= NVISA_GV100_CHIPSET), .lower_bitfield_insert = false, - .lower_bitfield_insert_to_shifts = false, + .lower_bitfield_insert_to_shifts = (chipset >= NVISA_GV100_CHIPSET), .lower_bitfield_insert_to_bitfield_select = false, .lower_bitfield_reverse = false, .lower_bit_count = false, @@ -3385,8 +3385,8 @@ nvir_nir_shader_compiler_options(int chipset) .lower_vector_cmp = false, .lower_idiv = true, .lower_bitops = false, - .lower_isign = false, // TODO - .lower_fsign = false, + .lower_isign = (chipset >= NVISA_GV100_CHIPSET), + .lower_fsign = (chipset >= NVISA_GV100_CHIPSET), .lower_fdph = false, .lower_fdot = false, .fdot_replicates = false, // TODO @@ -3425,18 +3425,35 @@ nvir_nir_shader_compiler_options(int chipset) .unify_interfaces = false, .use_interpolated_input_intrinsics = true, .lower_mul_2x32_64 = true, // TODO - .lower_rotate = true, + .lower_rotate = (chipset < NVISA_GV100_CHIPSET), .has_imul24 = false, .intel_vec4 = false, .max_unroll_iterations = 32, - .lower_int64_options = (nir_lower_int64_options) ( // TODO + .lower_int64_options = (nir_lower_int64_options) ( + ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul64 : 0) | + ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_isign64 : 0) | nir_lower_divmod64 | + ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_high64 : 0) | + ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_mov64 : 0) | + ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_icmp64 : 0) | + ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_iabs64 : 0) | + ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ineg64 : 0) | + ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_logic64 : 0) | + ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_minmax64 : 0) | + ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_shift64 : 0) | + ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_2x32_64 : 0) | ((chipset >= NVISA_GM107_CHIPSET) ? nir_lower_extract64 : 0) | nir_lower_ufind_msb64 ), - .lower_doubles_options = (nir_lower_doubles_options) ( // TODO - nir_lower_dmod - ), + .lower_doubles_options = (nir_lower_doubles_options) ( + ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drcp : 0) | + ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsqrt : 0) | + ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drsq : 0) | + ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dfract : 0) | + nir_lower_dmod | + ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsub : 0) | + ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ddiv : 0) + ) }; } @@ -3444,10 +3461,14 @@ static const nir_shader_compiler_options gf100_nir_shader_compiler_options = nvir_nir_shader_compiler_options(NVISA_GF100_CHIPSET); static const nir_shader_compiler_options gm107_nir_shader_compiler_options = nvir_nir_shader_compiler_options(NVISA_GM107_CHIPSET); +static const nir_shader_compiler_options gv100_nir_shader_compiler_options = +nvir_nir_shader_compiler_options(NVISA_GV100_CHIPSET); const nir_shader_compiler_options * nv50_ir_nir_shader_compiler_options(int chipset) { + if (chipset >= NVISA_GV100_CHIPSET) + return &gv100_nir_shader_compiler_options; if (chipset >= NVISA_GM107_CHIPSET) return &gm107_nir_shader_compiler_options; return &gf100_nir_shader_compiler_options; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h index 71e5ea6417a..dfa1d035dac 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h @@ -21,6 +21,7 @@ class GM107LegalizeSSA : public NVC0LegalizeSSA private: virtual bool visit(Instruction *); +protected: void handlePFETCH(Instruction *); void handleLOAD(Instruction *); }; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.cpp new file mode 100644 index 00000000000..4b6df0db588 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.cpp @@ -0,0 +1,477 @@ +/* + * Copyright 2020 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "codegen/nv50_ir.h" +#include "codegen/nv50_ir_build_util.h" + +#include "codegen/nv50_ir_target_nvc0.h" +#include "codegen/nv50_ir_lowering_gv100.h" + +#include + +namespace nv50_ir { + +bool +GV100LegalizeSSA::handleCMP(Instruction *i) +{ + Value *pred = bld.getSSA(1, FILE_PREDICATE); + + bld.mkCmp(OP_SET, reverseCondCode(i->asCmp()->setCond), TYPE_U8, pred, + i->sType, bld.mkImm(0), i->getSrc(2)); + bld.mkOp3(OP_SELP, TYPE_U32, i->getDef(0), i->getSrc(0), i->getSrc(1), pred); + return true; +} + +// NIR deals with most of these for us, but codegen generates more in pointer +// calculations from other lowering passes. +bool +GV100LegalizeSSA::handleIADD64(Instruction *i) +{ + Value *carry = bld.getSSA(1, FILE_PREDICATE); + Value *def[2] = { bld.getSSA(), bld.getSSA() }; + Value *src[2][2]; + + for (int s = 0; s < 2; s++) { + if (i->getSrc(s)->reg.size == 8) { + bld.mkSplit(src[s], 4, i->getSrc(s)); + } else { + src[s][0] = i->getSrc(s); + src[s][1] = bld.mkImm(0); + } + } + + bld.mkOp2(OP_ADD, TYPE_U32, def[0], src[0][0], src[1][0])-> + setFlagsDef(1, carry); + bld.mkOp2(OP_ADD, TYPE_U32, def[1], src[0][1], src[1][1])-> + setFlagsSrc(2, carry); + bld.mkOp2(OP_MERGE, i->dType, i->getDef(0), def[0], def[1]); + return true; +} + +bool +GV100LegalizeSSA::handleIMAD_HIGH(Instruction *i) +{ + Value *def = bld.getSSA(8), *defs[2]; + Value *src2; + + if (i->srcExists(2) && + (!i->getSrc(2)->asImm() || i->getSrc(2)->asImm()->reg.data.u32)) { + Value *src2s[2] = { bld.getSSA(), bld.getSSA() }; + bld.mkMov(src2s[0], bld.mkImm(0)); + bld.mkMov(src2s[1], i->getSrc(2)); + src2 = bld.mkOp2(OP_MERGE, TYPE_U64, bld.getSSA(8), src2s[0], src2s[1])->getDef(0); + } else { + src2 = bld.mkImm(0); + } + + bld.mkOp3(OP_MAD, isSignedType(i->sType) ? TYPE_S64 : TYPE_U64, def, + i->getSrc(0), i->getSrc(1), src2); + + bld.mkSplit(defs, 4, def); + i->def(0).replace(defs[1], false); + return true; +} + +// XXX: We should be able to do this in GV100LoweringPass, but codegen messes +// up somehow and swaps the condcode without swapping the sources. +// - tests/spec/glsl-1.50/execution/geometry/primitive-id-in.shader_test +bool +GV100LegalizeSSA::handleIMNMX(Instruction *i) +{ + Value *pred = bld.getSSA(1, FILE_PREDICATE); + + bld.mkCmp(OP_SET, (i->op == OP_MIN) ? CC_LT : CC_GT, i->dType, pred, + i->sType, i->getSrc(0), i->getSrc(1)); + bld.mkOp3(OP_SELP, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1), pred); + return true; +} + +bool +GV100LegalizeSSA::handleIMUL(Instruction *i) +{ + if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) + return handleIMAD_HIGH(i); + + bld.mkOp3(OP_MAD, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1), + bld.mkImm(0)); + return true; +} + +bool +GV100LegalizeSSA::handleLOP2(Instruction *i) +{ + uint8_t src0 = NV50_IR_SUBOP_LOP3_LUT_SRC0; + uint8_t src1 = NV50_IR_SUBOP_LOP3_LUT_SRC1; + uint8_t subOp; + + if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) + src0 = ~src0; + if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) + src1 = ~src1; + + switch (i->op) { + case OP_AND: subOp = src0 & src1; break; + case OP_OR : subOp = src0 | src1; break; + case OP_XOR: subOp = src0 ^ src1; break; + default: + assert(!"invalid LOP2 opcode"); + break; + } + + bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), i->getSrc(0), i->getSrc(1), + bld.mkImm(0))->subOp = subOp; + return true; +} + +bool +GV100LegalizeSSA::handleNOT(Instruction *i) +{ + bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), bld.mkImm(0), i->getSrc(0), + bld.mkImm(0))->subOp = (uint8_t)~NV50_IR_SUBOP_LOP3_LUT_SRC1; + return true; +} + +bool +GV100LegalizeSSA::handlePREEX2(Instruction *i) +{ + i->def(0).replace(i->src(0), false); + return true; +} + +bool +GV100LegalizeSSA::handleQUADON(Instruction *i) +{ + handleSHFL(i); // Inserts OP_WARPSYNC + return true; +} + +bool +GV100LegalizeSSA::handleQUADPOP(Instruction *i) +{ + return true; +} + +bool +GV100LegalizeSSA::handleSET(Instruction *i) +{ + Value *src2 = i->srcExists(2) ? i->getSrc(2) : NULL; + Value *pred = bld.getSSA(1, FILE_PREDICATE), *met; + Instruction *xsetp; + + if (isFloatType(i->dType)) { + if (i->sType == TYPE_F32) + return false; // HW has FSET.BF + met = bld.mkImm(0x3f800000); + } else { + met = bld.mkImm(0xffffffff); + } + + xsetp = bld.mkCmp(i->op, i->asCmp()->setCond, TYPE_U8, pred, i->sType, + i->getSrc(0), i->getSrc(1)); + xsetp->src(0).mod = i->src(0).mod; + xsetp->src(1).mod = i->src(1).mod; + xsetp->setSrc(2, src2); + + i = bld.mkOp3(OP_SELP, TYPE_U32, i->getDef(0), bld.mkImm(0), met, pred); + i->src(2).mod = Modifier(NV50_IR_MOD_NOT); + return true; +} + +bool +GV100LegalizeSSA::handleSHFL(Instruction *i) +{ + Instruction *sync = new_Instruction(func, OP_WARPSYNC, TYPE_NONE); + sync->fixed = 1; + sync->setSrc(0, bld.mkImm(0xffffffff)); + i->bb->insertBefore(i, sync); + return false; +} + +bool +GV100LegalizeSSA::handleSHL(Instruction *i) +{ + if (i->src(0).getFile() != FILE_GPR) { + bld.mkOp3(OP_SHF, i->dType, i->getDef(0), bld.mkImm(0), i->getSrc(1), + i->getSrc(0))->subOp = NV50_IR_SUBOP_SHF_L | + NV50_IR_SUBOP_SHF_HI; + } else { + bld.mkOp3(OP_SHF, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1), + bld.mkImm(0))->subOp = NV50_IR_SUBOP_SHF_L; + } + return true; +} + +bool +GV100LegalizeSSA::handleSHR(Instruction *i) +{ + bld.mkOp3(OP_SHF, i->dType, i->getDef(0), bld.mkImm(0), i->getSrc(1), + i->getSrc(0))->subOp = NV50_IR_SUBOP_SHF_R | NV50_IR_SUBOP_SHF_HI; + return true; +} + +bool +GV100LegalizeSSA::handleSUB(Instruction *i) +{ + Instruction *xadd = + bld.mkOp2(OP_ADD, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1)); + xadd->src(0).mod = i->src(0).mod; + xadd->src(1).mod = i->src(1).mod ^ Modifier(NV50_IR_MOD_NEG); + return true; +} + +bool +GV100LegalizeSSA::visit(Instruction *i) +{ + bool lowered = false; + + bld.setPosition(i, false); + + switch (i->op) { + case OP_AND: + case OP_OR: + case OP_XOR: + if (i->def(0).getFile() != FILE_PREDICATE) + lowered = handleLOP2(i); + break; + case OP_NOT: + lowered = handleNOT(i); + break; + case OP_SHL: + lowered = handleSHL(i); + break; + case OP_SHR: + lowered = handleSHR(i); + break; + case OP_SET: + case OP_SET_AND: + case OP_SET_OR: + case OP_SET_XOR: + if (i->def(0).getFile() != FILE_PREDICATE) + lowered = handleSET(i); + break; + case OP_SLCT: + lowered = handleCMP(i); + break; + case OP_PREEX2: + lowered = handlePREEX2(i); + break; + case OP_MUL: + if (!isFloatType(i->dType)) + lowered = handleIMUL(i); + break; + case OP_MAD: + if (!isFloatType(i->dType) && i->subOp == NV50_IR_SUBOP_MUL_HIGH) + lowered = handleIMAD_HIGH(i); + break; + case OP_SHFL: + lowered = handleSHFL(i); + break; + case OP_QUADON: + lowered = handleQUADON(i); + break; + case OP_QUADPOP: + lowered = handleQUADPOP(i); + break; + case OP_SUB: + lowered = handleSUB(i); + break; + case OP_MAX: + case OP_MIN: + if (!isFloatType(i->dType)) + lowered = handleIMNMX(i); + break; + case OP_ADD: + if (!isFloatType(i->dType) && typeSizeof(i->dType) == 8) + lowered = handleIADD64(i); + break; + case OP_PFETCH: + handlePFETCH(i); + break; + case OP_LOAD: + handleLOAD(i); + break; + default: + break; + } + + if (lowered) + delete_Instruction(prog, i); + + return true; +} + +bool +GV100LoweringPass::handleDMNMX(Instruction *i) +{ + Value *pred = bld.getSSA(1, FILE_PREDICATE); + Value *src0[2], *src1[2], *dest[2]; + + bld.mkCmp(OP_SET, (i->op == OP_MIN) ? CC_LT : CC_GT, TYPE_U32, pred, + i->sType, i->getSrc(0), i->getSrc(1)); + bld.mkSplit(src0, 4, i->getSrc(0)); + bld.mkSplit(src1, 4, i->getSrc(1)); + bld.mkSplit(dest, 4, i->getDef(0)); + bld.mkOp3(OP_SELP, TYPE_U32, dest[0], src0[0], src1[0], pred); + bld.mkOp3(OP_SELP, TYPE_U32, dest[1], src0[1], src1[1], pred); + bld.mkOp2(OP_MERGE, TYPE_U64, i->getDef(0), dest[0], dest[1]); + return true; +} + +bool +GV100LoweringPass::handleEXTBF(Instruction *i) +{ + Value *bit = bld.getScratch(); + Value *cnt = bld.getScratch(); + Value *mask = bld.getScratch(); + Value *zero = bld.mkImm(0); + + bld.mkOp3(OP_PERMT, TYPE_U32, bit, i->getSrc(1), bld.mkImm(0x4440), zero); + bld.mkOp3(OP_PERMT, TYPE_U32, cnt, i->getSrc(1), bld.mkImm(0x4441), zero); + bld.mkOp2(OP_BMSK, TYPE_U32, mask, bit, cnt); + bld.mkOp2(OP_AND, TYPE_U32, mask, i->getSrc(0), mask); + bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), mask, bit); + if (isSignedType(i->dType)) + bld.mkOp2(OP_SGXT, TYPE_S32, i->getDef(0), i->getDef(0), cnt); + + return true; +} + +bool +GV100LoweringPass::handleFLOW(Instruction *i) +{ + i->op = OP_BRA; + return false; +} + +bool +GV100LoweringPass::handleI2I(Instruction *i) +{ + bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(0), i->sType, i->getSrc(0))-> + subOp = i->subOp; + bld.mkCvt(OP_CVT, i->dType, i->getDef(0), TYPE_F32, i->getDef(0)); + return true; +} + +bool +GV100LoweringPass::handleINSBF(Instruction *i) +{ + Value *bit = bld.getScratch(); + Value *cnt = bld.getScratch(); + Value *mask = bld.getScratch(); + Value *src0 = bld.getScratch(); + Value *zero = bld.mkImm(0); + + bld.mkOp3(OP_PERMT, TYPE_U32, bit, i->getSrc(1), bld.mkImm(0x4440), zero); + bld.mkOp3(OP_PERMT, TYPE_U32, cnt, i->getSrc(1), bld.mkImm(0x4441), zero); + bld.mkOp2(OP_BMSK, TYPE_U32, mask, zero, cnt); + + bld.mkOp2(OP_AND, TYPE_U32, src0, i->getSrc(0), mask); + bld.mkOp2(OP_SHL, TYPE_U32, src0, src0, bit); + + bld.mkOp2(OP_SHL, TYPE_U32, mask, mask, bit); + bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), src0, i->getSrc(2), mask)-> + subOp = NV50_IR_SUBOP_LOP3_LUT(a | (b & ~c)); + + return true; +} + +bool +GV100LoweringPass::handlePINTERP(Instruction *i) +{ + Value *src2 = i->srcExists(2) ? i->getSrc(2) : NULL; + Instruction *ipa, *mul; + + ipa = bld.mkOp2(OP_LINTERP, TYPE_F32, i->getDef(0), i->getSrc(0), src2); + ipa->ipa = i->ipa; + mul = bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(0), i->getDef(0), i->getSrc(1)); + + if (i->getInterpMode() == NV50_IR_INTERP_SC) { + ipa->setDef(1, bld.getSSA(1, FILE_PREDICATE)); + mul->setPredicate(CC_NOT_P, ipa->getDef(1)); + } + + return true; +} + +bool +GV100LoweringPass::handlePREFLOW(Instruction *i) +{ + return true; +} + +bool +GV100LoweringPass::handlePRESIN(Instruction *i) +{ + const float f = 1.0 / (2.0 * 3.14159265); + bld.mkOp2(OP_MUL, i->dType, i->getDef(0), i->getSrc(0), bld.mkImm(f)); + return true; +} + +bool +GV100LoweringPass::visit(Instruction *i) +{ + bool lowered = false; + + bld.setPosition(i, false); + + switch (i->op) { + case OP_BREAK: + case OP_CONT: + lowered = handleFLOW(i); + break; + case OP_PREBREAK: + case OP_PRECONT: + lowered = handlePREFLOW(i); + break; + case OP_CVT: + if (i->src(0).getFile() != FILE_PREDICATE && + i->def(0).getFile() != FILE_PREDICATE && + !isFloatType(i->dType) && !isFloatType(i->sType)) + lowered = handleI2I(i); + break; + case OP_EXTBF: + lowered = handleEXTBF(i); + break; + case OP_INSBF: + lowered = handleINSBF(i); + break; + case OP_MAX: + case OP_MIN: + if (i->dType == TYPE_F64) + lowered = handleDMNMX(i); + break; + case OP_PINTERP: + lowered = handlePINTERP(i); + break; + case OP_PRESIN: + lowered = handlePRESIN(i); + break; + default: + break; + } + + if (lowered) + delete_Instruction(prog, i); + + return true; +} + +} // namespace nv50_ir diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.h new file mode 100644 index 00000000000..92fdb938244 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.h @@ -0,0 +1,79 @@ +/* + * Copyright 2020 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef __NV50_IR_LOWERING_GV100_H__ +#define __NV50_IR_LOWERING_GV100_H__ +#include "codegen/nv50_ir_lowering_gm107.h" + +namespace nv50_ir { + +class GV100LoweringPass : public Pass +{ +public: + GV100LoweringPass(Program *p) { + bld.setProgram(p); + } + +private: + BuildUtil bld; + + virtual bool visit(Instruction *); + + bool handleDMNMX(Instruction *); + bool handleEXTBF(Instruction *); + bool handleFLOW(Instruction *); + bool handleI2I(Instruction *); + bool handleINSBF(Instruction *); + bool handlePINTERP(Instruction *); + bool handlePREFLOW(Instruction *); + bool handlePRESIN(Instruction *); +}; + +class GV100LegalizeSSA : public GM107LegalizeSSA +{ +public: + GV100LegalizeSSA(Program *p) { + bld.setProgram(p); + } + +private: + virtual bool visit(Function *) { return true; } + virtual bool visit(BasicBlock *) { return true; } + virtual bool visit(Instruction *); + + bool handleCMP(Instruction *); + bool handleIADD64(Instruction *); + bool handleIMAD_HIGH(Instruction *); + bool handleIMNMX(Instruction *); + bool handleIMUL(Instruction *); + bool handleLOP2(Instruction *); + bool handleNOT(Instruction *); + bool handlePREEX2(Instruction *); + bool handleQUADON(Instruction *); + bool handleQUADPOP(Instruction *); + bool handleSET(Instruction *); + bool handleSHFL(Instruction *); + bool handleSHL(Instruction *); + bool handleSHR(Instruction *); + bool handleSUB(Instruction *); +}; +} +#endif diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 8e6b9775d79..f100445e9d0 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -898,6 +898,8 @@ NVC0LoweringPass::visit(Function *fn) gpEmitAddress = bld.loadImm(NULL, 0)->asLValue(); if (fn->cfgExit) { bld.setPosition(BasicBlock::get(fn->cfgExit)->getExit(), false); + if (prog->getTarget()->getChipset() >= NVISA_GV100_CHIPSET) + bld.mkOp1(OP_FINAL, TYPE_NONE, NULL, gpEmitAddress)->fixed = 1; bld.mkMovToReg(0, gpEmitAddress); } } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp index 6df2664da22..272c591ff0a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp @@ -988,6 +988,7 @@ GCRA::coalesce(ArrayList& insns) case 0x110: case 0x120: case 0x130: + case 0x140: ret = doCoalesce(insns, JOIN_MASK_UNION); break; default: @@ -2297,13 +2298,25 @@ RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex) if (isTextureOp(tex->op)) textureMask(tex); - if (isScalarTexGM107(tex)) { - handleScalarTexGM107(tex); - return; - } + if (targ->getChipset() < NVISA_GV100_CHIPSET) { + if (isScalarTexGM107(tex)) { + handleScalarTexGM107(tex); + return; + } - assert(!tex->tex.scalar); - condenseDefs(tex); + assert(!tex->tex.scalar); + condenseDefs(tex); + } else { + if (isTextureOp(tex->op)) { + int defCount = tex->defCount(0xff); + if (defCount > 3) + condenseDefs(tex, 2, 3); + if (defCount > 1) + condenseDefs(tex, 0, 1); + } else { + condenseDefs(tex); + } + } if (isSurfaceOp(tex->op)) { int s = tex->tex.target.getDim() + @@ -2485,6 +2498,7 @@ RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb) case 0x110: case 0x120: case 0x130: + case 0x140: texConstraintGM107(tex); break; default: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp index b0057cb7dce..06154a90b07 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp @@ -144,6 +144,7 @@ const OpClass Target::operationClass[] = }; +extern Target *getTargetGV100(unsigned int chipset); extern Target *getTargetGM107(unsigned int chipset); extern Target *getTargetNVC0(unsigned int chipset); extern Target *getTargetNV50(unsigned int chipset); @@ -153,6 +154,8 @@ Target *Target::create(unsigned int chipset) STATIC_ASSERT(ARRAY_SIZE(operationSrcNr) == OP_LAST + 1); STATIC_ASSERT(ARRAY_SIZE(operationClass) == OP_LAST + 1); switch (chipset & ~0xf) { + case 0x140: + return getTargetGV100(chipset); case 0x110: case 0x120: case 0x130: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.cpp new file mode 100644 index 00000000000..fd969e1ece5 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.cpp @@ -0,0 +1,594 @@ +/* + * Copyright 2020 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "codegen/nv50_ir_target_gv100.h" +#include "codegen/nv50_ir_lowering_gv100.h" +#include "codegen/nv50_ir_emit_gv100.h" + +namespace nv50_ir { + +void +TargetGV100::initOpInfo() +{ + unsigned int i, j; + + static const operation commutative[] = + { + OP_ADD, OP_MUL, OP_MAD, OP_FMA, OP_MAX, OP_MIN, + OP_SET_AND, OP_SET_OR, OP_SET_XOR, OP_SET, OP_SELP, OP_SLCT + }; + + static const operation noDest[] = + { + OP_EXIT + }; + + static const operation noPred[] = + { + }; + + for (i = 0; i < DATA_FILE_COUNT; ++i) + nativeFileMap[i] = (DataFile)i; + nativeFileMap[FILE_ADDRESS] = FILE_GPR; + nativeFileMap[FILE_FLAGS] = FILE_PREDICATE; + + for (i = 0; i < OP_LAST; ++i) { + opInfo[i].variants = NULL; + opInfo[i].op = (operation)i; + opInfo[i].srcTypes = 1 << (int)TYPE_F32; + opInfo[i].dstTypes = 1 << (int)TYPE_F32; + opInfo[i].immdBits = 0; + opInfo[i].srcNr = operationSrcNr[i]; + + for (j = 0; j < opInfo[i].srcNr; ++j) { + opInfo[i].srcMods[j] = 0; + opInfo[i].srcFiles[j] = 1 << (int)FILE_GPR; + } + opInfo[i].dstMods = 0; + opInfo[i].dstFiles = 1 << (int)FILE_GPR; + + opInfo[i].hasDest = 1; + opInfo[i].vector = (i >= OP_TEX && i <= OP_TEXCSAA); + opInfo[i].commutative = false; /* set below */ + opInfo[i].pseudo = (i < OP_MOV); + opInfo[i].predicate = !opInfo[i].pseudo; + opInfo[i].flow = (i >= OP_BRA && i <= OP_JOIN); + opInfo[i].minEncSize = 16; + } + for (i = 0; i < ARRAY_SIZE(commutative); ++i) + opInfo[commutative[i]].commutative = true; + for (i = 0; i < ARRAY_SIZE(noDest); ++i) + opInfo[noDest[i]].hasDest = 0; + for (i = 0; i < ARRAY_SIZE(noPred); ++i) + opInfo[noPred[i]].predicate = 0; +} + +struct opInfo { + struct { + uint8_t files; + uint8_t mods; + } src[3]; +}; + +#define SRC_NONE 0 +#define SRC_R (1 << FILE_GPR) +#define SRC_I (1 << FILE_MEMORY_CONST) +#define SRC_C (1 << FILE_IMMEDIATE) +#define SRC_RC (SRC_R | SRC_C) +#define SRC_RI (SRC_R | SRC_I ) +#define SRC_RIC (SRC_R | SRC_I | SRC_C) + +#define MOD_NONE 0 +#define MOD_NEG NV50_IR_MOD_NEG +#define MOD_ABS NV50_IR_MOD_ABS +#define MOD_NOT NV50_IR_MOD_NOT +#define MOD_NA (MOD_NEG | MOD_ABS) + +#define OPINFO(O,SA,MA,SB,MB,SC,MC) \ +static struct opInfo \ +opInfo_##O = { \ + .src = { { SRC_##SA, MOD_##MA }, \ + { SRC_##SB, MOD_##MB }, \ + { SRC_##SC, MOD_##MC }}, \ +}; + + +/* Handled by GV100LegalizeSSA. */ +OPINFO(FABS , RIC , NA , NONE, NONE, NONE, NONE); +OPINFO(FCMP , R , NONE, RIC , NONE, RIC , NONE); //XXX: use FSEL for mods +OPINFO(FNEG , RIC , NA , NONE, NONE, NONE, NONE); +OPINFO(FSET , R , NA , RIC , NA , NONE, NONE); +OPINFO(ICMP , R , NONE, RIC , NONE, RIC , NONE); +OPINFO(IMUL , R , NONE, RIC , NONE, NONE, NONE); +OPINFO(INEG , RIC , NEG , NONE, NONE, NONE, NONE); +OPINFO(ISET , R , NONE, RIC , NONE, NONE, NONE); +OPINFO(LOP2 , R , NOT , RIC , NOT , NONE, NONE); +OPINFO(NOT , RIC , NONE, NONE, NONE, NONE, NONE); +OPINFO(SAT , RIC , NA , NONE, NONE, NONE, NONE); +OPINFO(SHL , RIC , NONE, RIC , NONE, NONE, NONE); +OPINFO(SHR , RIC , NONE, RIC , NONE, NONE, NONE); +OPINFO(SUB , R , NONE, RIC , NEG , NONE, NONE); +OPINFO(IMNMX , R , NONE, RIC , NONE, NONE, NONE); + +/* Handled by CodeEmitterGV100. */ +OPINFO(AL2P , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(ALD , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(AST , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(ATOM , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(ATOMS , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(BAR , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(BRA , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(BMSK , R , NONE, RIC , NONE, NONE, NONE); +OPINFO(BREV , RIC , NONE, NONE, NONE, NONE, NONE); +OPINFO(CCTL , NONE, NONE, NONE, NONE, NONE, NONE); +//OPINFO(CS2R , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(DADD , R , NA , RIC , NA , NONE, NONE); +OPINFO(DFMA , R , NA , RIC , NA , RIC , NA ); +OPINFO(DMUL , R , NA , RIC , NA , NONE, NONE); +OPINFO(DSETP , R , NA , RIC , NA , NONE, NONE); +OPINFO(EXIT , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(F2F , RIC , NA , NONE, NONE, NONE, NONE); +OPINFO(F2I , RIC , NA , NONE, NONE, NONE, NONE); +OPINFO(FADD , R , NA , RIC , NA , NONE, NONE); +OPINFO(FFMA , R , NA , RIC , NA , RIC , NA ); +OPINFO(FLO , RIC , NOT , NONE, NONE, NONE, NONE); +OPINFO(FMNMX , R , NA , RIC , NA , NONE, NONE); +OPINFO(FMUL , R , NA , RIC , NA , NONE, NONE); +OPINFO(FRND , RIC , NA , NONE, NONE, NONE, NONE); +OPINFO(FSET_BF , R , NA , RIC , NA , NONE, NONE); +OPINFO(FSETP , R , NA , RIC , NA , NONE, NONE); +OPINFO(FSWZADD , R , NONE, R , NONE, NONE, NONE); +OPINFO(I2F , RIC , NONE, NONE, NONE, NONE, NONE); +OPINFO(IABS , RIC , NONE, NONE, NONE, NONE, NONE); +OPINFO(IADD3 , R , NEG , RIC , NEG , R , NEG ); +OPINFO(IMAD , R , NONE, RIC , NONE, RIC , NEG ); +OPINFO(IMAD_WIDE, R , NONE, RIC , NONE, RC , NEG ); +OPINFO(IPA , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(ISBERD , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(ISETP , R , NONE, RIC , NONE, NONE, NONE); +OPINFO(KILL , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(LD , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(LDC , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(LDL , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(LDS , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(LEA , R , NEG , I , NONE, RIC , NEG ); +OPINFO(LOP3_LUT , R , NONE, RIC , NONE, R , NONE); +OPINFO(MEMBAR , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(MOV , RIC , NONE, NONE, NONE, NONE, NONE); +OPINFO(MUFU , RIC , NA , NONE, NONE, NONE, NONE); +OPINFO(NOP , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(OUT , R , NONE, RI , NONE, NONE, NONE); +OPINFO(PIXLD , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(PLOP3_LUT, NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(POPC , RIC , NOT , NONE, NONE, NONE, NONE); +OPINFO(PRMT , R , NONE, RIC , NONE, RIC , NONE); +OPINFO(RED , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(SGXT , R , NONE, RIC , NONE, NONE, NONE); +OPINFO(S2R , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(SEL , R , NONE, RIC , NONE, NONE, NONE); +OPINFO(SHF , R , NONE, RIC , NONE, RIC , NONE); +OPINFO(SHFL , R , NONE, R , NONE, R , NONE); +OPINFO(ST , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(STL , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(STS , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(SUATOM , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(SULD , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(SUST , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(TEX , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(TLD , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(TLD4 , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(TMML , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(TXD , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(TXQ , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(VOTE , NONE, NONE, NONE, NONE, NONE, NONE); +OPINFO(WARPSYNC , R , NONE, NONE, NONE, NONE, NONE); + +static const struct opInfo * +getOpInfo(const Instruction *i) +{ + switch (i->op) { + case OP_ABS: + if (isFloatType(i->dType)) + return &opInfo_FABS; + return &opInfo_IABS; + case OP_ADD: + if (isFloatType(i->dType)) { + if (i->dType == TYPE_F32) + return &opInfo_FADD; + else + return &opInfo_DADD; + } else { + return &opInfo_IADD3; + } + break; + case OP_AFETCH: return &opInfo_AL2P; + case OP_AND: + case OP_OR: + case OP_XOR: + if (i->def(0).getFile() == FILE_PREDICATE) + return &opInfo_PLOP3_LUT; + return &opInfo_LOP2; + case OP_ATOM: + if (i->src(0).getFile() == FILE_MEMORY_SHARED) + return &opInfo_ATOMS; + else + if (!i->defExists(0) && i->subOp < NV50_IR_SUBOP_ATOM_CAS) + return &opInfo_RED; + else + return &opInfo_ATOM; + break; + case OP_BAR: return &opInfo_BAR; + case OP_BFIND: return &opInfo_FLO; + case OP_BMSK: return &opInfo_BMSK; + case OP_BREV: return &opInfo_BREV; + case OP_BRA: + case OP_JOIN: return &opInfo_BRA; //XXX + case OP_CCTL: return &opInfo_CCTL; + case OP_CEIL: + case OP_CVT: + case OP_FLOOR: + case OP_TRUNC: + if (i->op == OP_CVT && (i->def(0).getFile() == FILE_PREDICATE || + i->src(0).getFile() == FILE_PREDICATE)) { + return &opInfo_MOV; + } else if (isFloatType(i->dType)) { + if (isFloatType(i->sType)) { + if (i->sType == i->dType) + return &opInfo_FRND; + else + return &opInfo_F2F; + } else { + return &opInfo_I2F; + } + } else { + if (isFloatType(i->sType)) + return &opInfo_F2I; + } + break; + case OP_COS: + case OP_EX2: + case OP_LG2: + case OP_RCP: + case OP_RSQ: + case OP_SIN: + case OP_SQRT: return &opInfo_MUFU; + case OP_DISCARD: return &opInfo_KILL; + case OP_EMIT: + case OP_FINAL: + case OP_RESTART: return &opInfo_OUT; + case OP_EXIT: return &opInfo_EXIT; + case OP_EXPORT: return &opInfo_AST; + case OP_FMA: + case OP_MAD: + if (isFloatType(i->dType)) { + if (i->dType == TYPE_F32) + return &opInfo_FFMA; + else + return &opInfo_DFMA; + } else { + if (typeSizeof(i->dType) != 8) + return &opInfo_IMAD; + else + return &opInfo_IMAD_WIDE; + } + break; + case OP_JOINAT: return &opInfo_NOP; //XXX + case OP_LINTERP: return &opInfo_IPA; + case OP_LOAD: + switch (i->src(0).getFile()) { + case FILE_MEMORY_CONST : return &opInfo_LDC; + case FILE_MEMORY_LOCAL : return &opInfo_LDL; + case FILE_MEMORY_SHARED: return &opInfo_LDS; + case FILE_MEMORY_GLOBAL: return &opInfo_LD; + default: + break; + } + break; + case OP_LOP3_LUT: return &opInfo_LOP3_LUT; + case OP_MAX: + case OP_MIN: + if (isFloatType(i->dType)) { + if (i->dType == TYPE_F32) + return &opInfo_FMNMX; + } else { + return &opInfo_IMNMX; + } + break; + case OP_MEMBAR: return &opInfo_MEMBAR; + case OP_MOV: return &opInfo_MOV; + case OP_MUL: + if (isFloatType(i->dType)) { + if (i->dType == TYPE_F32) + return &opInfo_FMUL; + else + return &opInfo_DMUL; + } + return &opInfo_IMUL; + case OP_NEG: + if (isFloatType(i->dType)) + return &opInfo_FNEG; + return &opInfo_INEG; + case OP_NOT: return &opInfo_NOT; + case OP_PERMT: return &opInfo_PRMT; + case OP_PFETCH: return &opInfo_ISBERD; + case OP_PIXLD: return &opInfo_PIXLD; + case OP_POPCNT: return &opInfo_POPC; + case OP_QUADOP: return &opInfo_FSWZADD; + case OP_RDSV: +#if 0 + if (targ->isCS2RSV(i->getSrc(0)->reg.data.sv.sv)) + return &opInfo_CS2R; +#endif + return &opInfo_S2R; + case OP_SAT: return &opInfo_SAT; + case OP_SELP: return &opInfo_SEL; + case OP_SET: + case OP_SET_AND: + case OP_SET_OR: + case OP_SET_XOR: + if (i->def(0).getFile() != FILE_PREDICATE) { + if (isFloatType(i->dType)) { + if (i->dType == TYPE_F32) + return &opInfo_FSET_BF; + } else { + if (isFloatType(i->sType)) + return &opInfo_FSET; + return &opInfo_ISET; + } + } else { + if (isFloatType(i->sType)) + if (i->sType == TYPE_F64) + return &opInfo_DSETP; + else + return &opInfo_FSETP; + else + return &opInfo_ISETP; + } + break; + case OP_SGXT: return &opInfo_SGXT; + case OP_SHF: return &opInfo_SHF; + case OP_SHFL: return &opInfo_SHFL; + case OP_SHL: return &opInfo_SHL; + case OP_SHLADD: return &opInfo_LEA; + case OP_SHR: return &opInfo_SHR; + case OP_SLCT: + if (isFloatType(i->sType)) + return &opInfo_FCMP; + return &opInfo_ICMP; + case OP_STORE: + switch (i->src(0).getFile()) { + case FILE_MEMORY_LOCAL : return &opInfo_STL; + case FILE_MEMORY_SHARED: return &opInfo_STS; + case FILE_MEMORY_GLOBAL: return &opInfo_ST; + default: + break; + } + break; + case OP_SUB: return &opInfo_SUB; + case OP_SULDB: + case OP_SULDP: return &opInfo_SULD; + case OP_SUREDB: + case OP_SUREDP: return &opInfo_SUATOM; + case OP_SUSTB: + case OP_SUSTP: return &opInfo_SUST; + case OP_TEX: + case OP_TXB: + case OP_TXL: return &opInfo_TEX; + case OP_TXD: return &opInfo_TXD; + case OP_TXF: return &opInfo_TLD; + case OP_TXG: return &opInfo_TLD4; + case OP_TXLQ: return &opInfo_TMML; + case OP_TXQ: return &opInfo_TXQ; + case OP_VFETCH: return &opInfo_ALD; + case OP_VOTE: return &opInfo_VOTE; + case OP_WARPSYNC: return &opInfo_WARPSYNC; + default: + break; + } + return NULL; +} + +bool +TargetGV100::isSatSupported(const Instruction *i) const +{ + switch (i->dType) { + case TYPE_F32: + switch (i->op) { + case OP_ADD: + case OP_FMA: + case OP_MAD: + case OP_MUL: return true; + default: + break; + } + break; + default: + break; + } + return false; +} + +bool +TargetGV100::isModSupported(const Instruction *i, int s, Modifier mod) const +{ + const struct opInfo *info = nv50_ir::getOpInfo(i); + uint8_t mods = 0; + if (info && s < (int)ARRAY_SIZE(info->src)) + mods = info->src[s].mods; + return (mod & Modifier(mods)) == mod; +} + +bool +TargetGV100::isOpSupported(operation op, DataType ty) const +{ + if (op == OP_MAD || op == OP_FMA) + return true; + if (ty == TYPE_F32) { + if (op == OP_MAX) + return true; + } + if (op == OP_RSQ) + return true; + if (op == OP_SET || + op == OP_SET_AND || + op == OP_SET_OR || + op == OP_SET_XOR) + return true; + if (op == OP_SHLADD) + return true; + return false; +} + +bool +TargetGV100::isBarrierRequired(const Instruction *i) const +{ + switch (i->op) { + case OP_BREV: + return true; + default: + break; + } + + return TargetGM107::isBarrierRequired(i); +} + +bool +TargetGV100::insnCanLoad(const Instruction *i, int s, + const Instruction *ld) const +{ + const struct opInfo *info = nv50_ir::getOpInfo(i); + uint16_t files = 0; + + if (ld->src(0).getFile() == FILE_IMMEDIATE && ld->getSrc(0)->reg.data.u64 == 0) + return (!i->isPseudo() && + !i->asTex() && + i->op != OP_EXPORT && i->op != OP_STORE); + + if (ld->src(0).isIndirect(0)) + return false; + + if (info && s < (int)ARRAY_SIZE(info->src)) { + files = info->src[s].files; + if ((s == 1 && i->srcExists(2) && i->src(2).getFile() != FILE_GPR) || + (s == 2 && i->srcExists(1) && i->src(1).getFile() != FILE_GPR)) { + files &= ~(1 << FILE_MEMORY_CONST); + files &= ~(1 << FILE_IMMEDIATE); + } else + if ((i->op == OP_SHL || i->op == OP_SHR) && + ((s == 0 && i->srcExists(1) && i->src(1).getFile() != FILE_GPR) || + (s == 1 && i->srcExists(0) && i->src(0).getFile() != FILE_GPR))) { + files &= ~(1 << FILE_MEMORY_CONST); + files &= ~(1 << FILE_IMMEDIATE); + } + } + + if (ld->src(0).getFile() == FILE_IMMEDIATE) { + if (i->sType == TYPE_F64) { + if (ld->getSrc(0)->asImm()->reg.data.u64 & 0x00000000ffffffff) + return false; + } + } + + return (files & (1 << ld->src(0).getFile())); +} + +void +TargetGV100::getBuiltinCode(const uint32_t **code, uint32_t *size) const +{ + //XXX: find out why gv100 (tu1xx is fine) hangs without this + static uint32_t builtin[] = { + 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, + 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, + 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, + 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, + 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, + 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, + 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, + 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, + 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, + 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, + 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, + 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, + 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, + 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, + 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, + 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, + 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, + 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, + 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, + 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, + 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, + 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, + 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, + 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, + 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, + 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, + 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, + }; + *code = builtin; + *size = sizeof(builtin); +} + +uint32_t +TargetGV100::getBuiltinOffset(int builtin) const +{ + return 0; +} + +bool +TargetGV100::runLegalizePass(Program *prog, CGStage stage) const +{ + if (stage == CG_STAGE_PRE_SSA) { + GM107LoweringPass pass1(prog); + GV100LoweringPass pass2(prog); + pass1.run(prog, false, true); + pass2.run(prog, false, true); + return true; + } else + if (stage == CG_STAGE_SSA) { + GV100LegalizeSSA pass(prog); + return pass.run(prog, false, true); + } else + if (stage == CG_STAGE_POST_RA) { + NVC0LegalizePostRA pass(prog); + return pass.run(prog, false, true); + } + return false; +} + +CodeEmitter * +TargetGV100::getCodeEmitter(Program::Type type) +{ + return new CodeEmitterGV100(this); +} + +TargetGV100::TargetGV100(unsigned int chipset) + : TargetGM107(chipset) +{ + initOpInfo(); +}; + +Target *getTargetGV100(unsigned int chipset) +{ + return new TargetGV100(chipset); +} + +}; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.h new file mode 100644 index 00000000000..897e6a22d30 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.h @@ -0,0 +1,52 @@ +/* + * Copyright 2020 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef __NV50_IR_TARGET_GV100_H__ +#define __NV50_IR_TARGET_GV100_H__ +#include "codegen/nv50_ir_target_gm107.h" + +namespace nv50_ir { + +class TargetGV100 : public TargetGM107 { +public: + TargetGV100(unsigned int chipset); + + virtual CodeEmitter *getCodeEmitter(Program::Type); + + virtual bool runLegalizePass(Program *, CGStage stage) const; + + virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const; + virtual uint32_t getBuiltinOffset(int builtin) const; + + virtual bool insnCanLoad(const Instruction *, int, const Instruction *) const; + virtual bool isOpSupported(operation, DataType) const; + virtual bool isModSupported(const Instruction *, int s, Modifier) const; + virtual bool isSatSupported(const Instruction *) const; + + virtual bool isBarrierRequired(const Instruction *) const; + +private: + void initOpInfo(); + void initProps(const struct opProperties *, int); +}; + +}; +#endif diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp index 60134b445db..ed5b343ccba 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp @@ -30,7 +30,7 @@ Target *getTargetNVC0(unsigned int chipset) } TargetNVC0::TargetNVC0(unsigned int card) : - Target(card < 0x110, false, card >= 0xe4) + Target(card < 0x110, false, card >= 0xe4 && card < 0x140) { chipset = card; initOpInfo(); diff --git a/src/gallium/drivers/nouveau/meson.build b/src/gallium/drivers/nouveau/meson.build index c6caae2bb79..22aa48d6a01 100644 --- a/src/gallium/drivers/nouveau/meson.build +++ b/src/gallium/drivers/nouveau/meson.build @@ -150,13 +150,19 @@ files_libnouveau = files( 'codegen/nv50_ir_util.cpp', 'codegen/nv50_ir_util.h', 'codegen/unordered_set.h', + 'codegen/nv50_ir_emit_gv100.cpp', + 'codegen/nv50_ir_emit_gv100.h', 'codegen/nv50_ir_emit_gk110.cpp', 'codegen/nv50_ir_emit_gm107.cpp', 'codegen/nv50_ir_emit_nvc0.cpp', + 'codegen/nv50_ir_lowering_gv100.cpp', + 'codegen/nv50_ir_lowering_gv100.h', 'codegen/nv50_ir_lowering_gm107.cpp', 'codegen/nv50_ir_lowering_gm107.h', 'codegen/nv50_ir_lowering_nvc0.cpp', 'codegen/nv50_ir_lowering_nvc0.h', + 'codegen/nv50_ir_target_gv100.cpp', + 'codegen/nv50_ir_target_gv100.h', 'codegen/nv50_ir_target_gm107.cpp', 'codegen/nv50_ir_target_gm107.h', 'codegen/nv50_ir_target_nvc0.cpp', -- 2.30.2