From: Ben Skeggs Date: Fri, 9 May 2014 05:56:05 +0000 (+1000) Subject: nvc0: add maxwell (sm50) compiler backend X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=d548d47edf9f05e6dbf9656abc2f8e78d02cb2f6;p=mesa.git nvc0: add maxwell (sm50) compiler backend The big missing part here is proper sched data calculations, but hopefully the chosen placeholder will be sufficient for now. Passes piglit as well as GK107 does. Signed-off-by: Ben Skeggs Reviewed-by: Ilia Mirkin --- diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index cc84ec3edb6..d6eba244d79 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -72,10 +72,13 @@ NV50_CODEGEN_SOURCES := \ codegen/nv50_ir_util.cpp NVC0_CODEGEN_SOURCES := \ - codegen/nv50_ir_emit_gk110.cpp \ codegen/nv50_ir_emit_nvc0.cpp \ + codegen/nv50_ir_emit_gk110.cpp \ + codegen/nv50_ir_emit_gm107.cpp \ codegen/nv50_ir_lowering_nvc0.cpp \ - codegen/nv50_ir_target_nvc0.cpp + codegen/nv50_ir_lowering_gm107.cpp \ + codegen/nv50_ir_target_nvc0.cpp \ + codegen/nv50_ir_target_gm107.cpp NVC0_C_SOURCES := \ nvc0/nvc0_compute.c \ diff --git a/src/gallium/drivers/nouveau/codegen/lib/Makefile b/src/gallium/drivers/nouveau/codegen/lib/Makefile index 28a41a3f41e..06d1979d8b2 100644 --- a/src/gallium/drivers/nouveau/codegen/lib/Makefile +++ b/src/gallium/drivers/nouveau/codegen/lib/Makefile @@ -1,6 +1,6 @@ ENVYAS ?= envyas -all: gf100.asm.h gk104.asm.h gk110.asm.h +all: gf100.asm.h gk104.asm.h gk110.asm.h gm107.asm.h gf100.asm.h: %.asm.h: %.asm $(ENVYAS) -a -W -mnvc0 -Vnvc0 $< -o $@ @@ -8,3 +8,5 @@ gk104.asm.h: %.asm.h: %.asm $(ENVYAS) -a -W -mnvc0 -Vnve4 $< -o $@ gk110.asm.h: %.asm.h: %.asm $(ENVYAS) -a -W -mgk110 $< -o $@ +gm107.asm.h: %.asm.h: %.asm + $(ENVYAS) -a -W -mgm107 $< -o $@ diff --git a/src/gallium/drivers/nouveau/codegen/lib/gm107.asm b/src/gallium/drivers/nouveau/codegen/lib/gm107.asm new file mode 100644 index 00000000000..758cc81a159 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/lib/gm107.asm @@ -0,0 +1,115 @@ +.section #gm107_builtin_code +// DIV U32 +// +// UNR recurrence (q = a / b): +// look for z such that 2^32 - b <= b * z < 2^32 +// then q - 1 <= (a * z) / 2^32 <= q +// +// INPUT: $r0: dividend, $r1: divisor +// OUTPUT: $r0: result, $r1: modulus +// CLOBBER: $r2 - $r3, $p0 - $p1 +// SIZE: 22 / 14 * 8 bytes +// +gm107_div_u32: + sched 0x7e0 0x7e0 0x7e0 + flo u32 $r2 $r1 + lop xor 1 $r2 $r2 0x1f + mov $r3 0x1 0xf + sched 0x7e0 0x7e0 0x7e0 + shl $r2 $r3 $r2 + i2i u32 u32 $r1 neg $r1 + imul u32 u32 $r3 $r1 $r2 + sched 0x7e0 0x7e0 0x7e0 + imad u32 u32 hi $r2 $r2 $r3 $r2 + imul u32 u32 $r3 $r1 $r2 + imad u32 u32 hi $r2 $r2 $r3 $r2 + sched 0x7e0 0x7e0 0x7e0 + imul u32 u32 $r3 $r1 $r2 + imad u32 u32 hi $r2 $r2 $r3 $r2 + imul u32 u32 $r3 $r1 $r2 + sched 0x7e0 0x7e0 0x7e0 + imad u32 u32 hi $r2 $r2 $r3 $r2 + imul u32 u32 $r3 $r1 $r2 + imad u32 u32 hi $r2 $r2 $r3 $r2 + sched 0x7e0 0x7e0 0x7e0 + mov $r3 $r0 0xf + imul u32 u32 hi $r0 $r0 $r2 + i2i u32 u32 $r2 neg $r1 + sched 0x7e0 0x7e0 0x7e0 + imad u32 u32 $r1 $r1 $r0 $r3 + isetp ge u32 and $p0 1 $r1 $r2 1 + $p0 iadd $r1 $r1 neg $r2 + sched 0x7e0 0x7e0 0x7e0 + $p0 iadd $r0 $r0 0x1 + $p0 isetp ge u32 and $p0 1 $r1 $r2 1 + $p0 iadd $r1 $r1 neg $r2 + sched 0x7e0 0x7e0 0x7e0 + $p0 iadd $r0 $r0 0x1 + ret + nop 0 + +// DIV S32, like DIV U32 after taking ABS(inputs) +// +// INPUT: $r0: dividend, $r1: divisor +// OUTPUT: $r0: result, $r1: modulus +// CLOBBER: $r2 - $r3, $p0 - $p3 +// +gm107_div_s32: + sched 0x7e0 0x7e0 0x7e0 + isetp lt and $p2 0x1 $r0 0 1 + isetp lt xor $p3 1 $r1 0 $p2 + i2i s32 s32 $r0 abs $r0 + sched 0x7e0 0x7e0 0x7e0 + i2i s32 s32 $r1 abs $r1 + flo u32 $r2 $r1 + lop xor 1 $r2 $r2 0x1f + sched 0x7e0 0x7e0 0x7e0 + mov $r3 0x1 0xf + shl $r2 $r3 $r2 + i2i u32 u32 $r1 neg $r1 + sched 0x7e0 0x7e0 0x7e0 + imul u32 u32 $r3 $r1 $r2 + imad u32 u32 hi $r2 $r2 $r3 $r2 + imul u32 u32 $r3 $r1 $r2 + sched 0x7e0 0x7e0 0x7e0 + imad u32 u32 hi $r2 $r2 $r3 $r2 + imul u32 u32 $r3 $r1 $r2 + imad u32 u32 hi $r2 $r2 $r3 $r2 + sched 0x7e0 0x7e0 0x7e0 + imul u32 u32 $r3 $r1 $r2 + imad u32 u32 hi $r2 $r2 $r3 $r2 + imul u32 u32 $r3 $r1 $r2 + sched 0x7e0 0x7e0 0x7e0 + imad u32 u32 hi $r2 $r2 $r3 $r2 + mov $r3 $r0 0xf + imul u32 u32 hi $r0 $r0 $r2 + sched 0x7e0 0x7e0 0x7e0 + i2i u32 u32 $r2 neg $r1 + imad u32 u32 $r1 $r1 $r0 $r3 + isetp ge u32 and $p0 1 $r1 $r2 1 + sched 0x7e0 0x7e0 0x7e0 + $p0 iadd $r1 $r1 neg $r2 + $p0 iadd $r0 $r0 0x1 + $p0 isetp ge u32 and $p0 1 $r1 $r2 1 + sched 0x7e0 0x7e0 0x7e0 + $p0 iadd $r1 $r1 neg $r2 + $p0 iadd $r0 $r0 0x1 + $p3 i2i s32 s32 $r0 neg $r0 + sched 0x7e0 0x7e0 0x7e0 + $p2 i2i s32 s32 $r1 neg $r1 + ret + nop 0 + +// STUB +gm107_rcp_f64: +gm107_rsq_f64: + sched 0x7e0 0x7e0 0x7e0 + ret + nop 0 + nop 0 + +.section #gm107_builtin_offsets +.b64 #gm107_div_u32 +.b64 #gm107_div_s32 +.b64 #gm107_rcp_f64 +.b64 #gm107_rsq_f64 diff --git a/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h b/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h new file mode 100644 index 00000000000..7be25da5532 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h @@ -0,0 +1,97 @@ +uint64_t gm107_builtin_code[] = { +/* 0x0000: gm107_div_u32 */ + 0x001f8000fc0007e0, + 0x5c30000000170002, + 0x3847040001f70202, + 0x3898078000170003, + 0x001f8000fc0007e0, + 0x5c48000000270302, + 0x5ce0200000170a01, + 0x5c38000000270103, + 0x001f8000fc0007e0, + 0x5a40010000370202, + 0x5c38000000270103, + 0x5a40010000370202, + 0x001f8000fc0007e0, + 0x5c38000000270103, + 0x5a40010000370202, + 0x5c38000000270103, + 0x001f8000fc0007e0, + 0x5a40010000370202, + 0x5c38000000270103, + 0x5a40010000370202, + 0x001f8000fc0007e0, + 0x5c98078000070003, + 0x5c38008000270000, + 0x5ce0200000170a02, + 0x001f8000fc0007e0, + 0x5a00018000070101, + 0x5b6c038000270107, + 0x5c11000000200101, + 0x001f8000fc0007e0, + 0x3810000000100000, + 0x5b6c038000200107, + 0x5c11000000200101, + 0x001f8000fc0007e0, + 0x3810000000100000, + 0xe32000000007000f, + 0x50b0000000070f00, +/* 0x0120: gm107_div_s32 */ + 0x001f8000fc0007e0, + 0x5b6303800ff70017, + 0x5b6341000ff7011f, + 0x5ce2000000073a00, + 0x001f8000fc0007e0, + 0x5ce2000000173a01, + 0x5c30000000170002, + 0x3847040001f70202, + 0x001f8000fc0007e0, + 0x3898078000170003, + 0x5c48000000270302, + 0x5ce0200000170a01, + 0x001f8000fc0007e0, + 0x5c38000000270103, + 0x5a40010000370202, + 0x5c38000000270103, + 0x001f8000fc0007e0, + 0x5a40010000370202, + 0x5c38000000270103, + 0x5a40010000370202, + 0x001f8000fc0007e0, + 0x5c38000000270103, + 0x5a40010000370202, + 0x5c38000000270103, + 0x001f8000fc0007e0, + 0x5a40010000370202, + 0x5c98078000070003, + 0x5c38008000270000, + 0x001f8000fc0007e0, + 0x5ce0200000170a02, + 0x5a00018000070101, + 0x5b6c038000270107, + 0x001f8000fc0007e0, + 0x5c11000000200101, + 0x3810000000100000, + 0x5b6c038000200107, + 0x001f8000fc0007e0, + 0x5c11000000200101, + 0x3810000000100000, + 0x5ce0200000033a00, + 0x001f8000fc0007e0, + 0x5ce0200000123a01, + 0xe32000000007000f, + 0x50b0000000070f00, +/* 0x0280: gm107_rcp_f64 */ +/* 0x0280: gm107_rsq_f64 */ + 0x001f8000fc0007e0, + 0xe32000000007000f, + 0x50b0000000070f00, + 0x50b0000000070f00, +}; + +uint64_t gm107_builtin_offsets[] = { + 0x0000000000000000, + 0x0000000000000120, + 0x0000000000000280, + 0x0000000000000280, +}; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index f082f856ffc..88440309e6b 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -157,6 +157,7 @@ enum operation OP_VSHL, OP_VSEL, OP_CCTL, // cache control + OP_SHFL, // warp shuffle OP_LAST }; @@ -223,6 +224,10 @@ enum operation #define NV50_IR_SUBOP_PIXLD_OFFSET 3 #define NV50_IR_SUBOP_PIXLD_CENT_OFFSET 4 #define NV50_IR_SUBOP_PIXLD_SAMPLEID 5 +#define NV50_IR_SUBOP_SHFL_IDX 0 +#define NV50_IR_SUBOP_SHFL_UP 1 +#define NV50_IR_SUBOP_SHFL_DOWN 2 +#define NV50_IR_SUBOP_SHFL_BFLY 3 #define NV50_IR_SUBOP_MADSP_SD 0xffff // Yes, we could represent those with DataType. // Or put the type into operation and have a couple 1000 values in that enum. @@ -379,6 +384,7 @@ enum SVSemantic SV_LBASE, SV_SBASE, SV_VERTEX_STRIDE, + SV_INVOCATION_INFO, SV_UNDEFINED, SV_LAST }; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index d7a9c2c7d8c..bbb89d97932 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -92,6 +92,7 @@ struct nv50_ir_prog_symbol #define NVISA_GF100_CHIPSET_D0 0xd0 #define NVISA_GK104_CHIPSET 0xe0 #define NVISA_GK110_CHIPSET 0xf0 +#define NVISA_GM107_CHIPSET 0x110 struct nv50_ir_prog_info { diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp new file mode 100644 index 00000000000..c01a153c940 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp @@ -0,0 +1,2906 @@ +/* + * Copyright 2014 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + */ + +#include "codegen/nv50_ir_target_gm107.h" + +namespace nv50_ir { + +class CodeEmitterGM107 : public CodeEmitter +{ +public: + CodeEmitterGM107(const TargetGM107 *); + + virtual bool emitInstruction(Instruction *); + virtual uint32_t getMinEncodingSize(const Instruction *) const; + + virtual void prepareEmission(Program *); + virtual void prepareEmission(Function *); + + inline void setProgramType(Program::Type pType) { progType = pType; } + +private: + const TargetGM107 *targGM107; + + Program::Type progType; + + const Instruction *insn; + const bool writeIssueDelays; + uint32_t *data; + +private: + inline void emitField(uint32_t *, int, int, uint32_t); + inline void emitField(int b, int s, uint32_t v) { emitField(code, b, s, v); } + + inline void emitInsn(uint32_t, bool); + inline void emitInsn(uint32_t o) { emitInsn(o, true); } + inline void emitPred(); + inline void emitGPR(int, const Value *); + inline void emitGPR(int pos) { + emitGPR(pos, (const Value *)NULL); + } + inline void emitGPR(int pos, const ValueRef &ref) { + emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL); + } + inline void emitGPR(int pos, const ValueRef *ref) { + emitGPR(pos, ref ? ref->rep() : (const Value *)NULL); + } + inline void emitGPR(int pos, const ValueDef &def) { + emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL); + } + inline void emitSYS(int, const Value *); + inline void emitSYS(int pos, const ValueRef &ref) { + emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL); + } + inline void emitPRED(int, const Value *); + inline void emitPRED(int pos) { + emitPRED(pos, (const Value *)NULL); + } + inline void emitPRED(int pos, const ValueRef &ref) { + emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL); + } + inline void emitPRED(int pos, const ValueDef &def) { + emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL); + } + inline void emitADDR(int, int, int, int, const ValueRef &); + inline void emitCBUF(int, int, int, int, int, const ValueRef &); + inline bool longIMMD(const ValueRef &); + inline void emitIMMD(int, int, const ValueRef &); + + void emitCond3(int, CondCode); + void emitCond4(int, CondCode); + void emitCond5(int pos, CondCode cc) { emitCond4(pos, cc); } + inline void emitO(int); + inline void emitP(int); + inline void emitSAT(int); + inline void emitCC(int); + inline void emitX(int); + inline void emitABS(int, const ValueRef &); + inline void emitNEG(int, const ValueRef &); + inline void emitNEG2(int, const ValueRef &, const ValueRef &); + inline void emitFMZ(int, int); + inline void emitRND(int, RoundMode, int); + inline void emitRND(int pos) { + emitRND(pos, insn->rnd, -1); + } + inline void emitPDIV(int); + inline void emitINV(int, const ValueRef &); + + void emitEXIT(); + void emitBRA(); + void emitCAL(); + void emitPCNT(); + void emitCONT(); + void emitPBK(); + void emitBRK(); + void emitPRET(); + void emitRET(); + void emitSSY(); + void emitSYNC(); + void emitSAM(); + void emitRAM(); + + void emitMOV(); + void emitS2R(); + void emitF2F(); + void emitF2I(); + void emitI2F(); + void emitI2I(); + void emitSHFL(); + + void emitDADD(); + void emitDMUL(); + void emitDFMA(); + void emitDMNMX(); + void emitDSET(); + void emitDSETP(); + + void emitFADD(); + void emitFMUL(); + void emitFFMA(); + void emitMUFU(); + void emitFMNMX(); + void emitRRO(); + void emitFCMP(); + void emitFSET(); + void emitFSETP(); + void emitFSWZADD(); + + void emitLOP(); + void emitNOT(); + void emitIADD(); + void emitIMUL(); + void emitIMAD(); + void emitIMNMX(); + void emitICMP(); + void emitISET(); + void emitISETP(); + void emitSHL(); + void emitSHR(); + void emitPOPC(); + void emitBFI(); + void emitBFE(); + void emitFLO(); + + void emitLDSTs(int, DataType); + void emitLDSTc(int); + void emitLDC(); + void emitLDL(); + void emitLDS(); + void emitLD(); + void emitSTL(); + void emitSTS(); + void emitST(); + void emitALD(); + void emitAST(); + void emitISBERD(); + void emitIPA(); + + void emitPIXLD(); + + void emitTEXs(int); + void emitTEX(); + void emitTLD(); + void emitTLD4(); + void emitTXD(); + void emitTXQ(); + void emitTMML(); + void emitDEPBAR(); + + void emitNOP(); + void emitKIL(); + void emitOUT(); +}; + +/******************************************************************************* + * general instruction layout/fields + ******************************************************************************/ + +void +CodeEmitterGM107::emitField(uint32_t *data, int b, int s, uint32_t v) +{ + if (b >= 0) { + uint32_t m = ((1ULL << s) - 1); + uint64_t d = (uint64_t)(v & m) << b; + assert(!(v & ~m) || (v & ~m) == ~m); + data[1] |= d >> 32; + data[0] |= d; + } +} + +void +CodeEmitterGM107::emitPred() +{ + if (insn->predSrc >= 0) { + emitField(16, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id); + emitField(19, 1, insn->cc == CC_NOT_P); + } else { + emitField(16, 3, 7); + } +} + +void +CodeEmitterGM107::emitInsn(uint32_t hi, bool pred) +{ + code[0] = 0x00000000; + code[1] = hi; + if (pred) + emitPred(); +} + +void +CodeEmitterGM107::emitGPR(int pos, const Value *val) +{ + emitField(pos, 8, val ? val->reg.data.id : 255); +} + +void +CodeEmitterGM107::emitSYS(int pos, const Value *val) +{ + int id = val ? val->reg.data.id : -1; + + switch (id) { + case SV_INVOCATION_ID : id = 0x11; break; + case SV_INVOCATION_INFO: id = 0x1d; break; + default: + assert(!"invalid system value"); + id = 0; + break; + } + + emitField(pos, 8, id); +} + +void +CodeEmitterGM107::emitPRED(int pos, const Value *val) +{ + emitField(pos, 3, val ? val->reg.data.id : 7); +} + +void +CodeEmitterGM107::emitADDR(int gpr, int off, int len, int shr, + const ValueRef &ref) +{ + const Value *v = ref.get(); + assert(!(v->reg.data.offset & ((1 << shr) - 1))); + if (gpr >= 0) + emitGPR(gpr, ref.getIndirect(0)); + emitField(off, len, v->reg.data.offset >> shr); +} + +void +CodeEmitterGM107::emitCBUF(int buf, int gpr, int off, int len, int shr, + const ValueRef &ref) +{ + const Value *v = ref.get(); + const Symbol *s = v->asSym(); + + assert(!(s->reg.data.offset & ((1 << shr) - 1))); + + emitField(buf, 5, v->reg.fileIndex); + if (gpr >= 0) + emitGPR(gpr, ref.getIndirect(0)); + emitField(off, 16, s->reg.data.offset >> shr); +} + +bool +CodeEmitterGM107::longIMMD(const ValueRef &ref) +{ + if (ref.getFile() == FILE_IMMEDIATE) { + const ImmediateValue *imm = ref.get()->asImm(); + if (isFloatType(insn->sType)) { + if ((imm->reg.data.u32 & 0x00000fff) != 0x00000000) + return true; + } else { + if ((imm->reg.data.u32 & 0xfff00000) != 0x00000000 && + (imm->reg.data.u32 & 0xfff00000) != 0xfff00000) + return true; + } + } + return false; +} + +void +CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef &ref) +{ + const ImmediateValue *imm = ref.get()->asImm(); + uint32_t val = imm->reg.data.u32; + + if (len == 19) { + if (isFloatType(insn->sType)) { + assert(!(val & 0x00000fff)); + val >>= 12; + } + assert(!(val & 0xfff00000) || (val & 0xfff00000) == 0xfff00000); + emitField( 56, 1, (val & 0x80000) >> 19); + emitField(pos, len, (val & 0x7ffff)); + } else { + emitField(pos, len, val); + } +} + +/******************************************************************************* + * modifiers + ******************************************************************************/ + +void +CodeEmitterGM107::emitCond3(int pos, CondCode code) +{ + int data = 0; + + switch (code) { + case CC_FL : data = 0x00; break; + case CC_LTU: + case CC_LT : data = 0x01; break; + case CC_EQU: + case CC_EQ : data = 0x02; break; + case CC_LEU: + case CC_LE : data = 0x03; break; + case CC_GTU: + case CC_GT : data = 0x04; break; + case CC_NEU: + case CC_NE : data = 0x05; break; + case CC_GEU: + case CC_GE : data = 0x06; break; + case CC_TR : data = 0x07; break; + default: + assert(!"invalid cond3"); + break; + } + + emitField(pos, 3, data); +} + +void +CodeEmitterGM107::emitCond4(int pos, CondCode code) +{ + int data = 0; + + switch (code) { + case CC_FL: data = 0x00; break; + case CC_LT: data = 0x01; break; + case CC_EQ: data = 0x02; break; + case CC_LE: data = 0x03; break; + case CC_GT: data = 0x04; break; + case CC_NE: data = 0x05; break; + case CC_GE: data = 0x06; break; +// case CC_NUM: data = 0x07; break; +// case CC_NAN: data = 0x08; break; + case CC_LTU: data = 0x09; break; + case CC_EQU: data = 0x0a; break; + case CC_LEU: data = 0x0b; break; + case CC_GTU: data = 0x0c; break; + case CC_NEU: data = 0x0d; break; + case CC_GEU: data = 0x0e; break; + case CC_TR: data = 0x0f; break; + default: + assert(!"invalid cond4"); + break; + } + + emitField(pos, 4, data); +} + +void +CodeEmitterGM107::emitO(int pos) +{ + emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT); +} + +void +CodeEmitterGM107::emitP(int pos) +{ + emitField(pos, 1, insn->perPatch); +} + +void +CodeEmitterGM107::emitSAT(int pos) +{ + emitField(pos, 1, insn->saturate); +} + +void +CodeEmitterGM107::emitCC(int pos) +{ + emitField(pos, 1, insn->defExists(1)); +} + +void +CodeEmitterGM107::emitX(int pos) +{ + emitField(pos, 1, insn->flagsSrc >= 0); +} + +void +CodeEmitterGM107::emitABS(int pos, const ValueRef &ref) +{ + emitField(pos, 1, ref.mod.abs()); +} + +void +CodeEmitterGM107::emitNEG(int pos, const ValueRef &ref) +{ + emitField(pos, 1, ref.mod.neg()); +} + +void +CodeEmitterGM107::emitNEG2(int pos, const ValueRef &a, const ValueRef &b) +{ + emitField(pos, 1, a.mod.neg() ^ b.mod.neg()); +} + +void +CodeEmitterGM107::emitFMZ(int pos, int len) +{ + emitField(pos, len, /*XXX: insn->dnz << 1 | */ insn->ftz); +} + +void +CodeEmitterGM107::emitRND(int rmp, RoundMode rnd, int rip) +{ + int rm = 0, ri = 0; + switch (rnd) { + case ROUND_NI: ri = 1; + case ROUND_N : rm = 0; break; + case ROUND_MI: ri = 1; + case ROUND_M : rm = 1; break; + case ROUND_PI: ri = 1; + case ROUND_P : rm = 2; break; + case ROUND_ZI: ri = 1; + case ROUND_Z : rm = 3; break; + default: + assert(!"invalid round mode"); + break; + } + emitField(rip, 1, ri); + emitField(rmp, 2, rm); +} + +void +CodeEmitterGM107::emitPDIV(int pos) +{ + assert(insn->postFactor >= -3 && insn->postFactor <= 3); + if (insn->postFactor > 0) + emitField(pos, 3, 7 - insn->postFactor); + else + emitField(pos, 3, 0 - insn->postFactor); +} + +void +CodeEmitterGM107::emitINV(int pos, const ValueRef &ref) +{ + emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT))); +} + +/******************************************************************************* + * control flow + ******************************************************************************/ + +void +CodeEmitterGM107::emitEXIT() +{ + emitInsn (0xe3000000); + emitCond5(0x00, CC_TR); +} + +void +CodeEmitterGM107::emitBRA() +{ + const FlowInstruction *insn = this->insn->asFlow(); + int gpr = -1; + + if (insn->indirect) { + if (insn->absolute) + emitInsn(0xe2000000); // JMX + else + emitInsn(0xe2500000); // BRX + gpr = 0x08; + } else { + if (insn->absolute) + emitInsn(0xe2100000); // JMP + else + emitInsn(0xe2400000); // BRA + emitField(0x07, 1, insn->allWarp); + } + + emitField(0x06, 1, insn->limit); + emitCond5(0x00, CC_TR); + + if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) { + if (!insn->absolute) + emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8)); + else + emitField(0x14, 32, insn->target.bb->binPos); + } else { + emitCBUF (0x24, gpr, 20, 16, 0, insn->src(0)); + emitField(0x05, 1, 1); + } +} + +void +CodeEmitterGM107::emitCAL() +{ + const FlowInstruction *insn = this->insn->asFlow(); + + if (insn->absolute) { + emitInsn(0xe2200000, 0); // JCAL + } else { + emitInsn(0xe2600000, 0); // CAL + } + + if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) { + if (!insn->absolute) + emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8)); + else { + if (insn->builtin) { + int pcAbs = targGM107->getBuiltinOffset(insn->target.builtin); + addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfff00000, 20); + addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x000fffff, -12); + } else { + emitField(0x14, 32, insn->target.bb->binPos); + } + } + } else { + emitCBUF (0x24, -1, 20, 16, 0, insn->src(0)); + emitField(0x05, 1, 1); + } +} + +void +CodeEmitterGM107::emitPCNT() +{ + const FlowInstruction *insn = this->insn->asFlow(); + + emitInsn(0xe2b00000, 0); + + if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) { + emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8)); + } else { + emitCBUF (0x24, -1, 20, 16, 0, insn->src(0)); + emitField(0x05, 1, 1); + } +} + +void +CodeEmitterGM107::emitCONT() +{ + emitInsn (0xe3500000); + emitCond5(0x00, CC_TR); +} + +void +CodeEmitterGM107::emitPBK() +{ + const FlowInstruction *insn = this->insn->asFlow(); + + emitInsn(0xe2a00000, 0); + + if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) { + emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8)); + } else { + emitCBUF (0x24, -1, 20, 16, 0, insn->src(0)); + emitField(0x05, 1, 1); + } +} + +void +CodeEmitterGM107::emitBRK() +{ + emitInsn (0xe3400000); + emitCond5(0x00, CC_TR); +} + +void +CodeEmitterGM107::emitPRET() +{ + const FlowInstruction *insn = this->insn->asFlow(); + + emitInsn(0xe2700000, 0); + + if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) { + emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8)); + } else { + emitCBUF (0x24, -1, 20, 16, 0, insn->src(0)); + emitField(0x05, 1, 1); + } +} + +void +CodeEmitterGM107::emitRET() +{ + emitInsn (0xe3200000); + emitCond5(0x00, CC_TR); +} + +void +CodeEmitterGM107::emitSSY() +{ + const FlowInstruction *insn = this->insn->asFlow(); + + emitInsn(0xe2900000, 0); + + if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) { + emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8)); + } else { + emitCBUF (0x24, -1, 20, 16, 0, insn->src(0)); + emitField(0x05, 1, 1); + } +} + +void +CodeEmitterGM107::emitSYNC() +{ + emitInsn (0xf0f80000); + emitCond5(0x00, CC_TR); +} + +void +CodeEmitterGM107::emitSAM() +{ + emitInsn(0xe3700000, 0); +} + +void +CodeEmitterGM107::emitRAM() +{ + emitInsn(0xe3800000, 0); +} + +/******************************************************************************* + * predicate/cc + ******************************************************************************/ + +/******************************************************************************* + * movement / conversion + ******************************************************************************/ + +void +CodeEmitterGM107::emitMOV() +{ + if ( insn->src(0).getFile() != FILE_IMMEDIATE || + (insn->sType != TYPE_F32 && !longIMMD(insn->src(0)))) { + switch (insn->src(0).getFile()) { + case FILE_GPR: + emitInsn(0x5c980000); + emitGPR (0x14, insn->src(0)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c980000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38980000); + emitIMMD(0x14, 19, insn->src(0)); + break; + default: + assert(!"bad src file"); + break; + } + emitField(0x27, 4, insn->lanes); + } else { + emitInsn (0x01000000); + emitIMMD (0x14, 32, insn->src(0)); + emitField(0x0c, 4, insn->lanes); + } + + emitGPR(0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitS2R() +{ + emitInsn(0xf0c80000); + emitSYS (0x14, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitF2F() +{ + RoundMode rnd = insn->rnd; + + switch (insn->op) { + case OP_FLOOR: rnd = ROUND_MI; break; + case OP_CEIL : rnd = ROUND_PI; break; + case OP_TRUNC: rnd = ROUND_ZI; break; + default: + break; + } + + switch (insn->src(0).getFile()) { + case FILE_GPR: + emitInsn(0x5ca80000); + emitGPR (0x14, insn->src(0)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4ca80000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38a80000); + emitIMMD(0x14, 19, insn->src(0)); + break; + default: + assert(!"bad src0 file"); + break; + } + + emitField(0x32, 1, (insn->op == OP_SAT) || insn->saturate); + emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs()); + emitCC (0x2f); + emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg()); + emitFMZ (0x2c, 1); + emitRND (0x27, rnd, 0x2a); + emitField(0x0a, 2, util_logbase2(typeSizeof(insn->dType))); + emitField(0x08, 2, util_logbase2(typeSizeof(insn->sType))); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitF2I() +{ + RoundMode rnd = insn->rnd; + + switch (insn->op) { + case OP_FLOOR: rnd = ROUND_M; break; + case OP_CEIL : rnd = ROUND_P; break; + case OP_TRUNC: rnd = ROUND_Z; break; + default: + break; + } + + switch (insn->src(0).getFile()) { + case FILE_GPR: + emitInsn(0x5cb00000); + emitGPR (0x14, insn->src(0)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4cb00000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38b00000); + emitIMMD(0x14, 19, insn->src(0)); + break; + default: + assert(!"bad src0 file"); + break; + } + + emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs()); + emitCC (0x2f); + emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg()); + emitFMZ (0x2c, 1); + emitRND (0x27, rnd, 0x2a); + emitField(0x0c, 1, isSignedType(insn->dType)); + emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType))); + emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType))); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitI2F() +{ + RoundMode rnd = insn->rnd; + + switch (insn->op) { + case OP_FLOOR: rnd = ROUND_M; break; + case OP_CEIL : rnd = ROUND_P; break; + case OP_TRUNC: rnd = ROUND_Z; break; + default: + break; + } + + switch (insn->src(0).getFile()) { + case FILE_GPR: + emitInsn(0x5cb80000); + emitGPR (0x14, insn->src(0)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4cb80000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38b80000); + emitIMMD(0x14, 19, insn->src(0)); + break; + default: + assert(!"bad src0 file"); + break; + } + + emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs()); + emitCC (0x2f); + emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg()); + emitRND (0x27, rnd, -1); + emitField(0x0d, 1, isSignedType(insn->sType)); + emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType))); + emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType))); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitI2I() +{ + switch (insn->src(0).getFile()) { + case FILE_GPR: + emitInsn(0x5ce00000); + emitGPR (0x14, insn->src(0)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4ce00000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38e00000); + emitIMMD(0x14, 19, insn->src(0)); + break; + default: + assert(!"bad src0 file"); + break; + } + + emitSAT (0x32); + emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs()); + emitCC (0x2f); + emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg()); + emitField(0x0d, 1, isSignedType(insn->sType)); + emitField(0x0c, 1, isSignedType(insn->dType)); + emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType))); + emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType))); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitSHFL() +{ + int type = 0; + + emitInsn (0xef100000); + + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitGPR(0x14, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitIMMD(0x14, 5, insn->src(1)); + type |= 1; + break; + default: + assert(!"invalid src1 file"); + break; + } + + /*XXX: what is this arg? hardcode immediate for now */ + emitField(0x22, 13, 0x1c03); + type |= 2; + + emitPRED (0x30); + emitField(0x1e, 2, insn->subOp); + emitField(0x1c, 2, type); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +/******************************************************************************* + * double + ******************************************************************************/ + +void +CodeEmitterGM107::emitDADD() +{ + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5c700000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c700000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38700000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + emitABS(0x31, insn->src(1)); + emitNEG(0x30, insn->src(0)); + emitCC (0x2f); + emitABS(0x2e, insn->src(0)); + emitNEG(0x2d, insn->src(1)); + + if (insn->op == OP_SUB) + code[1] ^= 0x00002000; + + emitGPR(0x08, insn->src(0)); + emitGPR(0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitDMUL() +{ + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5c680000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c680000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38680000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + + emitNEG2(0x30, insn->src(0), insn->src(1)); + emitCC (0x2f); + emitRND (0x27); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitDFMA() +{ + switch(insn->src(2).getFile()) { + case FILE_GPR: + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5b700000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4b700000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x36700000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + emitGPR (0x27, insn->src(2)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x53700000); + emitGPR (0x27, insn->src(1)); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); + break; + default: + assert(!"bad src2 file"); + break; + } + + emitRND (0x32); + emitNEG (0x31, insn->src(2)); + emitNEG2(0x30, insn->src(0), insn->src(1)); + emitCC (0x2f); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitDMNMX() +{ + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5c500000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c500000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38500000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + + emitABS (0x31, insn->src(1)); + emitNEG (0x30, insn->src(0)); + emitCC (0x2f); + emitABS (0x2e, insn->src(0)); + emitNEG (0x2d, insn->src(1)); + emitField(0x2a, 1, insn->op == OP_MAX); + emitPRED (0x27); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitDSET() +{ + const CmpInstruction *insn = this->insn->asCmp(); + + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x59000000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x49000000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x32000000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + + if (insn->op != OP_SET) { + switch (insn->op) { + case OP_SET_AND: emitField(0x2d, 2, 0); break; + case OP_SET_OR : emitField(0x2d, 2, 1); break; + case OP_SET_XOR: emitField(0x2d, 2, 2); break; + default: + assert(!"invalid set op"); + break; + } + emitPRED(0x27, insn->src(2)); + } else { + emitPRED(0x27); + } + + emitABS (0x36, insn->src(0)); + emitNEG (0x35, insn->src(1)); + emitCond4(0x30, insn->setCond); + emitCC (0x2f); + emitABS (0x2c, insn->src(1)); + emitNEG (0x2b, insn->src(0)); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitDSETP() +{ + const CmpInstruction *insn = this->insn->asCmp(); + + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5b800000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4b800000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x36800000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + + if (insn->op != OP_SET) { + switch (insn->op) { + case OP_SET_AND: emitField(0x2d, 2, 0); break; + case OP_SET_OR : emitField(0x2d, 2, 1); break; + case OP_SET_XOR: emitField(0x2d, 2, 2); break; + default: + assert(!"invalid set op"); + break; + } + emitPRED(0x27, insn->src(2)); + } else { + emitPRED(0x27); + } + + emitCond4(0x30, insn->setCond); + emitABS (0x2c, insn->src(1)); + emitNEG (0x2b, insn->src(0)); + emitGPR (0x08, insn->src(0)); + emitABS (0x07, insn->src(0)); + emitNEG (0x06, insn->src(1)); + emitPRED (0x03, insn->def(0)); + if (insn->defExists(1)) + emitPRED(0x00, insn->def(1)); + else + emitPRED(0x00); +} + +/******************************************************************************* + * float + ******************************************************************************/ + +void +CodeEmitterGM107::emitFADD() +{ + if (!longIMMD(insn->src(1))) { + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5c580000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c580000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38580000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + emitSAT(0x32); + emitABS(0x31, insn->src(1)); + emitNEG(0x30, insn->src(0)); + emitCC (0x2f); + emitABS(0x2e, insn->src(0)); + emitNEG(0x2d, insn->src(1)); + emitFMZ(0x2c, 1); + } else { + emitInsn(0x08000000); + emitABS(0x39, insn->src(1)); + emitNEG(0x38, insn->src(0)); + emitFMZ(0x37, 1); + emitABS(0x36, insn->src(0)); + emitNEG(0x35, insn->src(1)); + emitCC (0x34); + emitIMMD(0x14, 32, insn->src(1)); + } + + if (insn->op == OP_SUB) + code[1] ^= 0x00002000; + + emitGPR(0x08, insn->src(0)); + emitGPR(0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitFMUL() +{ + if (!longIMMD(insn->src(1))) { + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5c680000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c680000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38680000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + emitSAT (0x32); + emitNEG2(0x30, insn->src(0), insn->src(1)); + emitCC (0x2f); + emitFMZ (0x2c, 2); + emitPDIV(0x29); + emitRND (0x27); + } else { + emitInsn(0x1e000000); + emitSAT (0x37); + emitFMZ (0x35, 2); + emitCC (0x34); + emitIMMD(0x14, 32, insn->src(1)); + if (insn->src(0).mod.neg() ^ insn->src(1).mod.neg()) + code[1] ^= 0x00080000; /* flip immd sign bit */ + } + + emitGPR(0x08, insn->src(0)); + emitGPR(0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitFFMA() +{ + /*XXX: ffma32i exists, but not using it as third src overlaps dst */ + switch(insn->src(2).getFile()) { + case FILE_GPR: + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x59800000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x49800000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x32800000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + emitGPR (0x27, insn->src(2)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x51800000); + emitGPR (0x27, insn->src(1)); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); + break; + default: + assert(!"bad src2 file"); + break; + } + emitRND (0x33); + emitSAT (0x32); + emitNEG (0x31, insn->src(2)); + emitNEG2(0x30, insn->src(0), insn->src(1)); + emitCC (0x2f); + + emitFMZ(0x35, 2); + emitGPR(0x08, insn->src(0)); + emitGPR(0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitMUFU() +{ + int mufu = 0; + + switch (insn->op) { + case OP_COS: mufu = 0; break; + case OP_SIN: mufu = 1; break; + case OP_EX2: mufu = 2; break; + case OP_LG2: mufu = 3; break; + case OP_RCP: mufu = 4; break; + case OP_RSQ: mufu = 5; break; + default: + assert(!"invalid mufu"); + break; + } + + emitInsn (0x50800000); + emitSAT (0x32); + emitNEG (0x30, insn->src(0)); + emitABS (0x2e, insn->src(0)); + emitField(0x14, 3, mufu); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitFMNMX() +{ + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5c600000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c600000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38600000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + + emitField(0x2a, 1, insn->op == OP_MAX); + emitPRED (0x27); + + emitABS(0x31, insn->src(1)); + emitNEG(0x30, insn->src(0)); + emitCC (0x2f); + emitABS(0x2e, insn->src(0)); + emitNEG(0x2d, insn->src(1)); + emitFMZ(0x2c, 1); + emitGPR(0x08, insn->src(0)); + emitGPR(0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitRRO() +{ + switch (insn->src(0).getFile()) { + case FILE_GPR: + emitInsn(0x5c900000); + emitGPR (0x14, insn->src(0)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c900000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38900000); + emitIMMD(0x14, 19, insn->src(0)); + break; + default: + assert(!"bad src file"); + break; + } + + emitABS (0x31, insn->src(0)); + emitNEG (0x2d, insn->src(0)); + emitField(0x27, 1, insn->op == OP_PREEX2); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitFCMP() +{ + const CmpInstruction *insn = this->insn->asCmp(); + CondCode cc = insn->setCond; + + if (insn->src(2).mod.neg()) + cc = reverseCondCode(cc); + + switch(insn->src(2).getFile()) { + case FILE_GPR: + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5ba00000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4ba00000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x36a00000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + emitGPR (0x27, insn->src(2)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x53a00000); + emitGPR (0x27, insn->src(1)); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); + break; + default: + assert(!"bad src2 file"); + break; + } + + emitCond4(0x30, cc); + emitFMZ (0x2f, 1); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitFSET() +{ + const CmpInstruction *insn = this->insn->asCmp(); + + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x58000000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x48000000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x30000000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + + if (insn->op != OP_SET) { + switch (insn->op) { + case OP_SET_AND: emitField(0x2d, 2, 0); break; + case OP_SET_OR : emitField(0x2d, 2, 1); break; + case OP_SET_XOR: emitField(0x2d, 2, 2); break; + default: + assert(!"invalid set op"); + break; + } + emitPRED(0x27, insn->src(2)); + } else { + emitPRED(0x27); + } + + emitFMZ (0x37, 1); + emitABS (0x36, insn->src(0)); + emitNEG (0x35, insn->src(1)); + emitField(0x34, 1, insn->dType == TYPE_F32); + emitCond4(0x30, insn->setCond); + emitCC (0x2f); + emitABS (0x2c, insn->src(1)); + emitNEG (0x2b, insn->src(0)); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitFSETP() +{ + const CmpInstruction *insn = this->insn->asCmp(); + + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5bb00000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4bb00000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x36b00000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + + if (insn->op != OP_SET) { + switch (insn->op) { + case OP_SET_AND: emitField(0x2d, 2, 0); break; + case OP_SET_OR : emitField(0x2d, 2, 1); break; + case OP_SET_XOR: emitField(0x2d, 2, 2); break; + default: + assert(!"invalid set op"); + break; + } + emitPRED(0x27, insn->src(2)); + } else { + emitPRED(0x27); + } + + emitCond4(0x30, insn->setCond); + emitFMZ (0x2f, 1); + emitABS (0x2c, insn->src(1)); + emitNEG (0x2b, insn->src(0)); + emitGPR (0x08, insn->src(0)); + emitABS (0x07, insn->src(0)); + emitNEG (0x06, insn->src(1)); + emitPRED (0x03, insn->def(0)); + if (insn->defExists(1)) + emitPRED(0x00, insn->def(1)); + else + emitPRED(0x00); +} + +void +CodeEmitterGM107::emitFSWZADD() +{ + emitInsn (0x50f80000); + emitCC (0x2f); + emitFMZ (0x2c, 1); + emitRND (0x27); + emitField(0x26, 1, insn->lanes); /* abused for .ndv */ + emitField(0x1c, 8, insn->subOp); + emitGPR (0x14, insn->src(1)); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +/******************************************************************************* + * integer + ******************************************************************************/ + +void +CodeEmitterGM107::emitLOP() +{ + int lop = 0; + + switch (insn->op) { + case OP_AND: lop = 0; break; + case OP_OR : lop = 1; break; + case OP_XOR: lop = 2; break; + default: + assert(!"invalid lop"); + break; + } + + if (!longIMMD(insn->src(1))) { + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5c400000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c400000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38400000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + emitPRED (0x30); + emitField(0x29, 2, lop); + emitINV (0x28, insn->src(1)); + emitINV (0x27, insn->src(0)); + } else { + emitInsn (0x04000000); + emitINV (0x38, insn->src(1)); + emitINV (0x37, insn->src(0)); + emitField(0x35, 2, lop); + emitIMMD (0x14, 32, insn->src(1)); + } + + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +/* special-case of emitLOP(): lop pass_b dst 0 ~src */ +void +CodeEmitterGM107::emitNOT() +{ + if (!longIMMD(insn->src(0))) { + switch (insn->src(0).getFile()) { + case FILE_GPR: + emitInsn(0x5c400700); + emitGPR (0x14, insn->src(0)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c400700); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38400700); + emitIMMD(0x14, 19, insn->src(0)); + break; + default: + assert(!"bad src1 file"); + break; + } + emitPRED (0x30); + } else { + emitInsn (0x05600000); + emitIMMD (0x14, 32, insn->src(1)); + } + + emitGPR(0x08); + emitGPR(0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitIADD() +{ + if (!longIMMD(insn->src(1))) { + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5c100000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c100000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38100000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + emitSAT(0x32); + emitNEG(0x31, insn->src(0)); + emitNEG(0x30, insn->src(1)); + emitCC (0x2f); + } else { + emitInsn(0x1c000000); + emitSAT (0x36); + emitCC (0x34); + emitIMMD(0x14, 32, insn->src(1)); + } + + if (insn->op == OP_SUB) + code[1] ^= 0x00010000; + + emitGPR(0x08, insn->src(0)); + emitGPR(0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitIMUL() +{ + if (!longIMMD(insn->src(1))) { + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5c380000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c380000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38380000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + emitCC (0x2f); + emitField(0x29, 1, isSignedType(insn->sType)); + emitField(0x28, 1, isSignedType(insn->dType)); + emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH); + } else { + emitInsn (0x1f000000); + emitField(0x37, 1, isSignedType(insn->sType)); + emitField(0x36, 1, isSignedType(insn->dType)); + emitField(0x35, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH); + emitCC (0x34); + emitIMMD (0x14, 32, insn->src(1)); + } + + emitGPR(0x08, insn->src(0)); + emitGPR(0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitIMAD() +{ + /*XXX: imad32i exists, but not using it as third src overlaps dst */ + switch(insn->src(2).getFile()) { + case FILE_GPR: + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5a000000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4a000000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x34000000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + emitGPR (0x27, insn->src(2)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x52000000); + emitGPR (0x27, insn->src(1)); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); + break; + default: + assert(!"bad src2 file"); + break; + } + + emitField(0x36, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH); + emitField(0x35, 1, isSignedType(insn->sType)); + emitNEG (0x34, insn->src(2)); + emitNEG2 (0x33, insn->src(0), insn->src(1)); + emitSAT (0x32); + emitX (0x31); + emitField(0x30, 1, isSignedType(insn->dType)); + emitCC (0x2f); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitIMNMX() +{ + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5c200000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c200000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38200000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + + emitField(0x30, 1, isSignedType(insn->dType)); + emitCC (0x2f); + emitField(0x2a, 1, insn->op == OP_MAX); + emitPRED (0x27); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitICMP() +{ + const CmpInstruction *insn = this->insn->asCmp(); + CondCode cc = insn->setCond; + + if (insn->src(2).mod.neg()) + cc = reverseCondCode(cc); + + switch(insn->src(2).getFile()) { + case FILE_GPR: + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5b400000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4b400000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x36400000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + emitGPR (0x27, insn->src(2)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x53400000); + emitGPR (0x27, insn->src(1)); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); + break; + default: + assert(!"bad src2 file"); + break; + } + + emitCond3(0x31, cc); + emitField(0x30, 1, isSignedType(insn->sType)); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitISET() +{ + const CmpInstruction *insn = this->insn->asCmp(); + + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5b500000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4b500000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x36500000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + + if (insn->op != OP_SET) { + switch (insn->op) { + case OP_SET_AND: emitField(0x2d, 2, 0); break; + case OP_SET_OR : emitField(0x2d, 2, 1); break; + case OP_SET_XOR: emitField(0x2d, 2, 2); break; + default: + assert(!"invalid set op"); + break; + } + emitPRED(0x27, insn->src(2)); + } else { + emitPRED(0x27); + } + + emitCond3(0x31, insn->setCond); + emitField(0x30, 1, isSignedType(insn->sType)); + emitCC (0x2f); + emitX (0x2b); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitISETP() +{ + const CmpInstruction *insn = this->insn->asCmp(); + + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5b600000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4b600000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x36600000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + + if (insn->op != OP_SET) { + switch (insn->op) { + case OP_SET_AND: emitField(0x2d, 2, 0); break; + case OP_SET_OR : emitField(0x2d, 2, 1); break; + case OP_SET_XOR: emitField(0x2d, 2, 2); break; + default: + assert(!"invalid set op"); + break; + } + emitPRED(0x27, insn->src(2)); + } else { + emitPRED(0x27); + } + + emitCond3(0x31, insn->setCond); + emitField(0x30, 1, isSignedType(insn->sType)); + emitX (0x2b); + emitGPR (0x08, insn->src(0)); + emitPRED (0x03, insn->def(0)); + if (insn->defExists(1)) + emitPRED(0x00, insn->def(1)); + else + emitPRED(0x00); +} + +void +CodeEmitterGM107::emitSHL() +{ + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5c480000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c480000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38480000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + + emitCC (0x2f); + emitX (0x2b); + emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitSHR() +{ + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5c280000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c280000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38280000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + + emitField(0x30, 1, isSignedType(insn->dType)); + emitCC (0x2f); + emitX (0x2c); + emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitPOPC() +{ + switch (insn->src(0).getFile()) { + case FILE_GPR: + emitInsn(0x5c080000); + emitGPR (0x14, insn->src(0)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c080000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38080000); + emitIMMD(0x14, 19, insn->src(0)); + break; + default: + assert(!"bad src1 file"); + break; + } + + emitINV(0x28, insn->src(0)); + emitGPR(0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitBFI() +{ + switch(insn->src(2).getFile()) { + case FILE_GPR: + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5bf00000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4bf00000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x36f00000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + emitGPR (0x27, insn->src(2)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x53f00000); + emitGPR (0x27, insn->src(1)); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); + break; + default: + assert(!"bad src2 file"); + break; + } + + emitCC (0x2f); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitBFE() +{ + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0x5c000000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c000000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38000000); + emitIMMD(0x14, 19, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + + emitField(0x30, 1, isSignedType(insn->dType)); + emitCC (0x2f); + emitField(0x28, 1, insn->subOp == NV50_IR_SUBOP_EXTBF_REV); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitFLO() +{ + switch (insn->src(0).getFile()) { + case FILE_GPR: + emitInsn(0x5c300000); + emitGPR (0x14, insn->src(0)); + break; + case FILE_MEMORY_CONST: + emitInsn(0x4c300000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); + break; + case FILE_IMMEDIATE: + emitInsn(0x38300000); + emitIMMD(0x14, 19, insn->src(0)); + break; + default: + assert(!"bad src1 file"); + break; + } + + emitField(0x30, 1, isSignedType(insn->dType)); + emitCC (0x2f); + emitField(0x29, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT); + emitINV (0x28, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +/******************************************************************************* + * memory + ******************************************************************************/ + +void +CodeEmitterGM107::emitLDSTs(int pos, DataType type) +{ + int data = 0; + + switch (typeSizeof(type)) { + case 1: data = isSignedType(type) ? 1 : 0; break; + case 2: data = isSignedType(type) ? 3 : 2; break; + case 4: data = 4; break; + case 8: data = 5; break; + case 16: data = 6; break; + default: + assert(!"bad type"); + break; + } + + emitField(pos, 3, data); +} + +void +CodeEmitterGM107::emitLDSTc(int pos) +{ + int mode = 0; + + switch (insn->cache) { + case CACHE_CA: mode = 0; break; + case CACHE_CG: mode = 1; break; + case CACHE_CS: mode = 2; break; + case CACHE_CV: mode = 3; break; + default: + assert(!"invalid caching mode"); + break; + } + + emitField(pos, 2, mode); +} + +void +CodeEmitterGM107::emitLDC() +{ + emitInsn (0xef900000); + emitLDSTs(0x30, insn->dType); + emitCBUF (0x24, 0x08, 0x14, 16, 0, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitLDL() +{ + emitInsn (0xef400000); + emitLDSTs(0x30, insn->dType); + emitLDSTc(0x2c); + emitADDR (0x08, 0x14, 24, 0, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitLDS() +{ + emitInsn (0xef480000); + emitLDSTs(0x30, insn->dType); + emitADDR (0x08, 0x14, 24, 0, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitLD() +{ + emitInsn (0x80000000); + emitPRED (0x3a); + emitLDSTc(0x38); + emitLDSTs(0x35, insn->dType); + emitADDR (0x08, 0x14, 32, 0, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitSTL() +{ + emitInsn (0xef500000); + emitLDSTs(0x30, insn->dType); + emitLDSTc(0x2c); + emitADDR (0x08, 0x14, 24, 0, insn->src(0)); + emitGPR (0x00, insn->src(1)); +} + +void +CodeEmitterGM107::emitSTS() +{ + emitInsn (0xef580000); + emitLDSTs(0x30, insn->dType); + emitADDR (0x08, 0x14, 24, 0, insn->src(0)); + emitGPR (0x00, insn->src(1)); +} + +void +CodeEmitterGM107::emitST() +{ + emitInsn (0xa0000000); + emitPRED (0x3a); + emitLDSTc(0x38); + emitLDSTs(0x35, insn->dType); + emitADDR (0x08, 0x14, 32, 0, insn->src(0)); + emitGPR (0x00, insn->src(1)); +} + +void +CodeEmitterGM107::emitALD() +{ + emitInsn (0xefd80000); + emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1); + emitGPR (0x27, insn->src(0).getIndirect(1)); + emitO (0x20); + emitP (0x1f); + emitADDR (0x08, 20, 10, 0, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitAST() +{ + emitInsn (0xeff00000); + emitField(0x2f, 2, (typeSizeof(insn->dType) / 4) - 1); + emitGPR (0x27, insn->src(0).getIndirect(1)); + emitP (0x1f); + emitADDR (0x08, 20, 10, 0, insn->src(0)); + emitGPR (0x00, insn->src(1)); +} + +void +CodeEmitterGM107::emitISBERD() +{ + emitInsn(0xefd00000); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitIPA() +{ + int ipam = 0, ipas = 0; + + switch (insn->getInterpMode()) { + case NV50_IR_INTERP_LINEAR : ipam = 0; break; + case NV50_IR_INTERP_PERSPECTIVE: ipam = 1; break; + case NV50_IR_INTERP_FLAT : ipam = 2; break; + case NV50_IR_INTERP_SC : ipam = 3; break; + default: + assert(!"invalid ipa mode"); + break; + } + + switch (insn->getSampleMode()) { + case NV50_IR_INTERP_DEFAULT : ipas = 0; break; + case NV50_IR_INTERP_CENTROID: ipas = 1; break; + case NV50_IR_INTERP_OFFSET : ipas = 2; break; + default: + assert(!"invalid ipa sample mode"); + break; + } + + emitInsn (0xe0000000); + emitField(0x36, 2, ipam); + emitField(0x34, 2, ipas); + emitSAT (0x33); + emitField(0x2f, 3, 7); + emitADDR (0x08, 0x1c, 10, 0, insn->src(0)); + if ((code[0] & 0x0000ff00) != 0x0000ff00) + code[1] |= 0x00000040; /* .idx */ + emitGPR(0x00, insn->def(0)); + + if (insn->op == OP_PINTERP) { + emitGPR(0x14, insn->src(1)); + if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET) + emitGPR(0x27, insn->src(2)); + } else { + if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET) + emitGPR(0x27, insn->src(1)); + emitGPR(0x14); + } + + if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET) + emitGPR(0x27); +} + +/******************************************************************************* + * surface + ******************************************************************************/ + +void +CodeEmitterGM107::emitPIXLD() +{ + emitInsn (0xefe80000); + emitPRED (0x2d); + emitField(0x1f, 3, insn->subOp); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +/******************************************************************************* + * texture + ******************************************************************************/ + +void +CodeEmitterGM107::emitTEXs(int pos) +{ + int src1 = insn->predSrc == 1 ? 2 : 1; + if (insn->srcExists(src1)) + emitGPR(pos, insn->src(src1)); + else + emitGPR(pos); +} + +void +CodeEmitterGM107::emitTEX() +{ + const TexInstruction *insn = this->insn->asTex(); + int lodm = 0; + + if (!insn->tex.levelZero) { + switch (insn->op) { + case OP_TEX: lodm = 0; break; + case OP_TXB: lodm = 2; break; + case OP_TXL: lodm = 3; break; + default: + assert(!"invalid tex op"); + break; + } + } else { + lodm = 1; + } + + if (insn->tex.rIndirectSrc >= 0) { + emitInsn (0xdeb80000); + emitField(0x35, 2, lodm); + emitField(0x24, 1, insn->tex.useOffsets == 1); + } else { + emitInsn (0xc0380000); + emitField(0x37, 2, lodm); + emitField(0x36, 1, insn->tex.useOffsets == 1); + emitField(0x24, 13, insn->tex.r); + } + + emitField(0x32, 1, insn->tex.target.isShadow()); + emitField(0x31, 1, insn->tex.liveOnly); + emitField(0x23, 1, insn->tex.derivAll); + emitField(0x1f, 4, insn->tex.mask); + emitField(0x1d, 2, insn->tex.target.isCube() ? 3 : + insn->tex.target.getDim() - 1); + emitField(0x1c, 1, insn->tex.target.isArray()); + emitTEXs (0x14); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitTLD() +{ + const TexInstruction *insn = this->insn->asTex(); + + if (insn->tex.rIndirectSrc >= 0) { + emitInsn (0xdd380000); + } else { + emitInsn (0xdc380000); + emitField(0x24, 13, insn->tex.r); + } + + emitField(0x37, 1, insn->tex.levelZero == 0); + emitField(0x32, 1, insn->tex.target.isMS()); + emitField(0x31, 1, insn->tex.liveOnly); + emitField(0x23, 1, insn->tex.useOffsets == 1); + emitField(0x1f, 4, insn->tex.mask); + emitField(0x1d, 2, insn->tex.target.isCube() ? 3 : + insn->tex.target.getDim() - 1); + emitField(0x1c, 1, insn->tex.target.isArray()); + emitTEXs (0x14); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitTLD4() +{ + const TexInstruction *insn = this->insn->asTex(); + + if (insn->tex.rIndirectSrc >= 0) { + emitInsn (0xdef80000); + emitField(0x26, 2, insn->tex.gatherComp); + emitField(0x25, 2, insn->tex.useOffsets == 4); + emitField(0x24, 2, insn->tex.useOffsets == 1); + } else { + emitInsn (0xc8380000); + emitField(0x38, 2, insn->tex.gatherComp); + emitField(0x37, 2, insn->tex.useOffsets == 4); + emitField(0x36, 2, insn->tex.useOffsets == 1); + emitField(0x24, 13, insn->tex.r); + } + + emitField(0x32, 1, insn->tex.target.isShadow()); + emitField(0x31, 1, insn->tex.liveOnly); + emitField(0x23, 1, insn->tex.derivAll); + emitField(0x1f, 4, insn->tex.mask); + emitField(0x1d, 2, insn->tex.target.isCube() ? 3 : + insn->tex.target.getDim() - 1); + emitField(0x1c, 1, insn->tex.target.isArray()); + emitTEXs (0x14); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitTXD() +{ + const TexInstruction *insn = this->insn->asTex(); + + if (insn->tex.rIndirectSrc >= 0) { + emitInsn (0xde780000); + } else { + emitInsn (0xde380000); + emitField(0x24, 13, insn->tex.r); + } + + emitField(0x31, 1, insn->tex.liveOnly); + emitField(0x23, 1, insn->tex.useOffsets == 1); + emitField(0x1f, 4, insn->tex.mask); + emitField(0x1d, 2, insn->tex.target.isCube() ? 3 : + insn->tex.target.getDim() - 1); + emitField(0x1c, 1, insn->tex.target.isArray()); + emitTEXs (0x14); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitTMML() +{ + const TexInstruction *insn = this->insn->asTex(); + + if (insn->tex.rIndirectSrc >= 0) { + emitInsn (0xdf600000); + } else { + emitInsn (0xdf580000); + emitField(0x24, 13, insn->tex.r); + } + + emitField(0x31, 1, insn->tex.liveOnly); + emitField(0x23, 1, insn->tex.derivAll); + emitField(0x1f, 4, insn->tex.mask); + emitField(0x1d, 2, insn->tex.target.isCube() ? 3 : + insn->tex.target.getDim() - 1); + emitField(0x1c, 1, insn->tex.target.isArray()); + emitTEXs (0x14); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitTXQ() +{ + const TexInstruction *insn = this->insn->asTex(); + int type = 0; + + switch (insn->tex.query) { + case TXQ_DIMS : type = 0x01; break; + case TXQ_TYPE : type = 0x02; break; + case TXQ_SAMPLE_POSITION: type = 0x05; break; + case TXQ_FILTER : type = 0x10; break; + case TXQ_LOD : type = 0x12; break; + case TXQ_WRAP : type = 0x14; break; + case TXQ_BORDER_COLOUR : type = 0x16; break; + default: + assert(!"invalid txq query"); + break; + } + + emitInsn (0xdf4a0000); + emitField(0x24, 13, insn->tex.r); + emitField(0x1f, 4, insn->tex.mask); + emitField(0x16, 6, type); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void +CodeEmitterGM107::emitDEPBAR() +{ + emitInsn (0xf0f00000); + emitField(0x1d, 1, 1); /* le */ + emitField(0x1a, 3, 5); + emitField(0x14, 6, insn->subOp); + emitField(0x00, 6, insn->subOp); +} + +/******************************************************************************* + * misc + ******************************************************************************/ + +void +CodeEmitterGM107::emitNOP() +{ + emitInsn(0x50b00000); +} + +void +CodeEmitterGM107::emitKIL() +{ + emitInsn (0xe3300000); + emitCond5(0x00, CC_TR); +} + +void +CodeEmitterGM107::emitOUT() +{ + const int cut = insn->op == OP_RESTART || insn->subOp; + const int emit = insn->op == OP_EMIT; + + switch (insn->src(1).getFile()) { + case FILE_GPR: + emitInsn(0xfbe00000); + emitGPR (0x14, insn->src(1)); + break; + case FILE_IMMEDIATE: + emitInsn(0xf6e00000); + emitIMMD(0x14, 19, insn->src(1)); + break; + case FILE_MEMORY_CONST: + emitInsn(0xebe00000); + emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); + break; + default: + assert(!"bad src1 file"); + break; + } + + emitField(0x27, 2, (cut << 1) | emit); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +/******************************************************************************* + * assembler front-end + ******************************************************************************/ + +bool +CodeEmitterGM107::emitInstruction(Instruction *i) +{ + const unsigned int size = (writeIssueDelays && !(codeSize & 0x1f)) ? 16 : 8; + bool ret = true; + + insn = i; + + if (insn->encSize != 8) { + ERROR("skipping undecodable instruction: "); insn->print(); + return false; + } else + if (codeSize + size > codeSizeLimit) { + ERROR("code emitter output buffer too small\n"); + return false; + } + + if (writeIssueDelays) { + int n = ((codeSize & 0x1f) / 8) - 1; + if (n < 0) { + data = code; + data[0] = 0x00000000; + data[1] = 0x00000000; + code += 2; + codeSize += 8; + n++; + } + + emitField(data, n * 21, 21, insn->sched); + } + + switch (insn->op) { + case OP_EXIT: + emitEXIT(); + break; + case OP_BRA: + emitBRA(); + break; + case OP_CALL: + emitCAL(); + break; + case OP_PRECONT: + emitPCNT(); + break; + case OP_CONT: + emitCONT(); + break; + case OP_PREBREAK: + emitPBK(); + break; + case OP_BREAK: + emitBRK(); + break; + case OP_PRERET: + emitPRET(); + break; + case OP_RET: + emitRET(); + break; + case OP_JOINAT: + emitSSY(); + break; + case OP_JOIN: + emitSYNC(); + break; + case OP_QUADON: + emitSAM(); + break; + case OP_QUADPOP: + emitRAM(); + break; + case OP_MOV: + if (insn->def(0).getFile() == FILE_GPR && + insn->src(0).getFile() != FILE_PREDICATE) + emitMOV(); + else + assert(!"R2P/P2R"); + break; + case OP_RDSV: + emitS2R(); + break; + case OP_ABS: + case OP_NEG: + case OP_SAT: + case OP_FLOOR: + case OP_CEIL: + case OP_TRUNC: + case OP_CVT: + if (isFloatType(insn->dType)) { + if (isFloatType(insn->sType)) + emitF2F(); + else + emitI2F(); + } else { + if (isFloatType(insn->sType)) + emitF2I(); + else + emitI2I(); + } + break; + case OP_SHFL: + emitSHFL(); + break; + case OP_ADD: + case OP_SUB: + if (isFloatType(insn->dType)) { + if (insn->dType == TYPE_F64) + emitDADD(); + else + emitFADD(); + } else { + emitIADD(); + } + break; + case OP_MUL: + if (isFloatType(insn->dType)) { + if (insn->dType == TYPE_F64) + emitDMUL(); + else + emitFMUL(); + } else { + emitIMUL(); + } + break; + case OP_MAD: + case OP_FMA: + if (isFloatType(insn->dType)) { + if (insn->dType == TYPE_F64) + emitDFMA(); + else + emitFFMA(); + } else { + emitIMAD(); + } + break; + case OP_MIN: + case OP_MAX: + if (isFloatType(insn->dType)) { + if (insn->dType == TYPE_F64) + emitDMNMX(); + else + emitFMNMX(); + } else { + emitIMNMX(); + } + break; + case OP_SHL: + emitSHL(); + break; + case OP_SHR: + emitSHR(); + break; + case OP_POPCNT: + emitPOPC(); + break; + case OP_INSBF: + emitBFI(); + break; + case OP_EXTBF: + emitBFE(); + break; + case OP_BFIND: + emitFLO(); + break; + case OP_SLCT: + if (isFloatType(insn->dType)) + emitFCMP(); + else + emitICMP(); + break; + case OP_SET: + case OP_SET_AND: + case OP_SET_OR: + case OP_SET_XOR: + if (insn->def(0).getFile() != FILE_PREDICATE) { + if (isFloatType(insn->sType)) + if (insn->sType == TYPE_F64) + emitDSET(); + else + emitFSET(); + else + emitISET(); + } else { + if (isFloatType(insn->sType)) + if (insn->sType == TYPE_F64) + emitDSETP(); + else + emitFSETP(); + else + emitISETP(); + } + break; + case OP_PRESIN: + case OP_PREEX2: + emitRRO(); + break; + case OP_COS: + case OP_SIN: + case OP_EX2: + case OP_LG2: + case OP_RCP: + case OP_RSQ: + emitMUFU(); + break; + case OP_AND: + case OP_OR: + case OP_XOR: + emitLOP(); + break; + case OP_NOT: + emitNOT(); + break; + case OP_LOAD: + switch (insn->src(0).getFile()) { + case FILE_MEMORY_CONST : emitLDC(); break; + case FILE_MEMORY_LOCAL : emitLDL(); break; + case FILE_MEMORY_SHARED: emitLDS(); break; + case FILE_MEMORY_GLOBAL: emitLD(); break; + default: + assert(!"invalid load"); + emitNOP(); + break; + } + break; + case OP_STORE: + switch (insn->src(0).getFile()) { + case FILE_MEMORY_LOCAL : emitSTL(); break; + case FILE_MEMORY_SHARED: emitSTS(); break; + case FILE_MEMORY_GLOBAL: emitST(); break; + default: + assert(!"invalid load"); + emitNOP(); + break; + } + break; + case OP_VFETCH: + emitALD(); + break; + case OP_EXPORT: + emitAST(); + break; + case OP_PFETCH: + emitISBERD(); + break; + case OP_LINTERP: + case OP_PINTERP: + emitIPA(); + break; + case OP_PIXLD: + emitPIXLD(); + break; + case OP_TEX: + case OP_TXB: + case OP_TXL: + emitTEX(); + break; + case OP_TXF: + emitTLD(); + break; + case OP_TXG: + emitTLD4(); + break; + case OP_TXD: + emitTXD(); + break; + case OP_TXQ: + emitTXQ(); + break; + case OP_TXLQ: + emitTMML(); + break; + case OP_TEXBAR: + emitDEPBAR(); + break; + case OP_QUADOP: + emitFSWZADD(); + break; + case OP_NOP: + emitNOP(); + break; + case OP_DISCARD: + emitKIL(); + break; + case OP_EMIT: + case OP_RESTART: + emitOUT(); + break; + default: + assert(!"invalid opcode"); + emitNOP(); + ret = false; + break; + } + + if (insn->join) { + /*XXX*/ + } + + code += 2; + codeSize += 8; + return ret; +} + +uint32_t +CodeEmitterGM107::getMinEncodingSize(const Instruction *i) const +{ + return 8; +} + +/******************************************************************************* + * sched data calculator + ******************************************************************************/ + +class SchedDataCalculatorGM107 : public Pass +{ +public: + SchedDataCalculatorGM107(const Target *targ) : targ(targ) {} +private: + const Target *targ; + bool visit(BasicBlock *bb); +}; + +bool +SchedDataCalculatorGM107::visit(BasicBlock *bb) +{ + for (Instruction *insn = bb->getEntry(); insn; insn = insn->next) { + /*XXX*/ + insn->sched = 0x7e0; + } + + return true; +} + +/******************************************************************************* + * main + ******************************************************************************/ + +void +CodeEmitterGM107::prepareEmission(Function *func) +{ + SchedDataCalculatorGM107 sched(targ); + CodeEmitter::prepareEmission(func); + sched.run(func, true, true); +} + +static inline uint32_t sizeToBundlesGM107(uint32_t size) +{ + return (size + 23) / 24; +} + +void +CodeEmitterGM107::prepareEmission(Program *prog) +{ + for (ArrayList::Iterator fi = prog->allFuncs.iterator(); + !fi.end(); fi.next()) { + Function *func = reinterpret_cast(fi.get()); + func->binPos = prog->binSize; + prepareEmission(func); + + // adjust sizes & positions for schedulding info: + if (prog->getTarget()->hasSWSched) { + uint32_t adjPos = func->binPos; + BasicBlock *bb = NULL; + for (int i = 0; i < func->bbCount; ++i) { + bb = func->bbArray[i]; + int32_t adjSize = bb->binSize; + if (adjPos % 32) { + adjSize -= 32 - adjPos % 32; + if (adjSize < 0) + adjSize = 0; + } + adjSize = bb->binSize + sizeToBundlesGM107(adjSize) * 8; + bb->binPos = adjPos; + bb->binSize = adjSize; + adjPos += adjSize; + } + if (bb) + func->binSize = adjPos - func->binPos; + } + + prog->binSize += func->binSize; + } +} + +CodeEmitterGM107::CodeEmitterGM107(const TargetGM107 *target) + : CodeEmitter(target), + targGM107(target), + writeIssueDelays(target->hasSWSched) +{ + code = NULL; + codeSize = codeSizeLimit = 0; + relocInfo = NULL; +} + +CodeEmitter * +TargetGM107::createCodeEmitterGM107(Program::Type type) +{ + CodeEmitterGM107 *emit = new CodeEmitterGM107(this); + emit->setProgramType(type); + return emit; +} + +} // namespace nv50_ir diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp new file mode 100644 index 00000000000..070b20a2133 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp @@ -0,0 +1,273 @@ +/* + * Copyright 2011 Christoph Bumiller + * 2014 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "codegen/nv50_ir.h" +#include "codegen/nv50_ir_build_util.h" + +#include "codegen/nv50_ir_target_nvc0.h" +#include "codegen/nv50_ir_lowering_gm107.h" + +#include + +namespace nv50_ir { + +#define QOP_ADD 0 +#define QOP_SUBR 1 +#define QOP_SUB 2 +#define QOP_MOV2 3 + +// UL UR LL LR +#define QUADOP(q, r, s, t) \ + ((QOP_##q << 6) | (QOP_##r << 4) | \ + (QOP_##s << 2) | (QOP_##t << 0)) + +bool +GM107LoweringPass::handleManualTXD(TexInstruction *i) +{ + static const uint8_t qOps[4][2] = + { + { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) }, // l0 + { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD, ADD) }, // l1 + { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2 + { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3 + }; + Value *def[4][4]; + Value *crd[3]; + Value *tmp; + Instruction *tex, *add; + Value *zero = bld.loadImm(bld.getSSA(), 0); + int l, c; + const int dim = i->tex.target.getDim(); + + i->op = OP_TEX; // no need to clone dPdx/dPdy later + + for (c = 0; c < dim; ++c) + crd[c] = bld.getScratch(); + tmp = bld.getScratch(); + + for (l = 0; l < 4; ++l) { + // mov coordinates from lane l to all lanes + bld.mkOp(OP_QUADON, TYPE_NONE, NULL); + for (c = 0; c < dim; ++c) { + bld.mkOp2(OP_SHFL, TYPE_F32, crd[c], i->getSrc(c), bld.mkImm(l)); + add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], crd[c], zero); + add->subOp = 0x00; + add->lanes = 1; /* abused for .ndv */ + } + + // add dPdx from lane l to lanes dx + for (c = 0; c < dim; ++c) { + bld.mkOp2(OP_SHFL, TYPE_F32, tmp, i->dPdx[c].get(), bld.mkImm(l)); + add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]); + add->subOp = qOps[l][0]; + add->lanes = 1; /* abused for .ndv */ + } + + // add dPdy from lane l to lanes dy + for (c = 0; c < dim; ++c) { + bld.mkOp2(OP_SHFL, TYPE_F32, tmp, i->dPdy[c].get(), bld.mkImm(l)); + add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]); + add->subOp = qOps[l][1]; + add->lanes = 1; /* abused for .ndv */ + } + + // texture + bld.insert(tex = cloneForward(func, i)); + for (c = 0; c < dim; ++c) + tex->setSrc(c, crd[c]); + bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL); + + // save results + for (c = 0; i->defExists(c); ++c) { + Instruction *mov; + def[c][l] = bld.getSSA(); + mov = bld.mkMov(def[c][l], tex->getDef(c)); + mov->fixed = 1; + mov->lanes = 1 << l; + } + } + + for (c = 0; i->defExists(c); ++c) { + Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c)); + for (l = 0; l < 4; ++l) + u->setSrc(l, def[c][l]); + } + + i->bb->remove(i); + return true; +} + +bool +GM107LoweringPass::handleDFDX(Instruction *insn) +{ + Instruction *shfl; + int qop = 0, xid = 0; + + switch (insn->op) { + case OP_DFDX: + qop = QUADOP(SUB, SUBR, SUB, SUBR); + xid = 1; + break; + case OP_DFDY: + qop = QUADOP(SUB, SUB, SUBR, SUBR); + xid = 2; + break; + default: + assert(!"invalid dfdx opcode"); + break; + } + + shfl = bld.mkOp2(OP_SHFL, TYPE_F32, bld.getScratch(), + insn->getSrc(0), bld.mkImm(xid)); + shfl->subOp = NV50_IR_SUBOP_SHFL_BFLY; + insn->op = OP_QUADOP; + insn->subOp = qop; + insn->lanes = 0; /* abused for !.ndv */ + insn->setSrc(1, insn->getSrc(0)); + insn->setSrc(0, shfl->getDef(0)); + return true; +} + +bool +GM107LoweringPass::handlePFETCH(Instruction *i) +{ + Value *tmp0 = bld.getScratch(); + Value *tmp1 = bld.getScratch(); + Value *tmp2 = bld.getScratch(); + bld.mkOp1(OP_RDSV, TYPE_U32, tmp0, bld.mkSysVal(SV_INVOCATION_INFO, 0)); + bld.mkOp2(OP_SHR , TYPE_U32, tmp1, tmp0, bld.mkImm(16)); + bld.mkOp2(OP_AND , TYPE_U32, tmp0, tmp0, bld.mkImm(0xff)); + bld.mkOp2(OP_AND , TYPE_U32, tmp1, tmp1, bld.mkImm(0xff)); + bld.mkOp1(OP_MOV , TYPE_U32, tmp2, bld.mkImm(i->getSrc(0)->reg.data.u32)); + bld.mkOp3(OP_MAD , TYPE_U32, tmp0, tmp0, tmp1, tmp2); + i->setSrc(0, tmp0); + i->setSrc(1, NULL); + return true; +} + +bool +GM107LoweringPass::handlePOPCNT(Instruction *i) +{ + Value *tmp = bld.mkOp2v(OP_AND, i->sType, bld.getScratch(), + i->getSrc(0), i->getSrc(1)); + i->setSrc(0, tmp); + i->setSrc(1, NULL); + return TRUE; +} + +// +// - add quadop dance for texturing +// - put FP outputs in GPRs +// - convert instruction sequences +// +bool +GM107LoweringPass::visit(Instruction *i) +{ + bld.setPosition(i, false); + + if (i->cc != CC_ALWAYS) + checkPredicate(i); + + switch (i->op) { + case OP_TEX: + case OP_TXB: + case OP_TXL: + case OP_TXF: + case OP_TXG: + return handleTEX(i->asTex()); + case OP_TXD: + return handleTXD(i->asTex()); + case OP_TXLQ: + return handleTXLQ(i->asTex()); + case OP_TXQ: + return handleTXQ(i->asTex()); + case OP_EX2: + bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0)); + i->setSrc(0, i->getDef(0)); + break; + case OP_POW: + return handlePOW(i); + case OP_DIV: + return handleDIV(i); + case OP_MOD: + return handleMOD(i); + case OP_SQRT: + return handleSQRT(i); + case OP_EXPORT: + return handleEXPORT(i); + case OP_PFETCH: + return handlePFETCH(i); + case OP_EMIT: + case OP_RESTART: + return handleOUT(i); + case OP_RDSV: + return handleRDSV(i); + case OP_WRSV: + return handleWRSV(i); + case OP_LOAD: + if (i->src(0).getFile() == FILE_SHADER_INPUT) { + if (prog->getType() == Program::TYPE_COMPUTE) { + i->getSrc(0)->reg.file = FILE_MEMORY_CONST; + i->getSrc(0)->reg.fileIndex = 0; + } else + if (prog->getType() == Program::TYPE_GEOMETRY && + i->src(0).isIndirect(0)) { + // XXX: this assumes vec4 units + Value *ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), + i->getIndirect(0, 0), bld.mkImm(4)); + i->setIndirect(0, 0, ptr); + } else { + i->op = OP_VFETCH; + assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP + } + } + break; + case OP_ATOM: + { + const bool cctl = i->src(0).getFile() == FILE_MEMORY_GLOBAL; + handleATOM(i); + handleCasExch(i, cctl); + } + break; + case OP_SULDB: + case OP_SULDP: + case OP_SUSTB: + case OP_SUSTP: + case OP_SUREDB: + case OP_SUREDP: + handleSurfaceOpNVE4(i->asTex()); + break; + case OP_DFDX: + case OP_DFDY: + handleDFDX(i); + break; + case OP_POPCNT: + handlePOPCNT(i); + break; + default: + break; + } + return true; +} + +} // namespace nv50_ir diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h new file mode 100644 index 00000000000..036abf055ed --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h @@ -0,0 +1,18 @@ +#include "codegen/nv50_ir_lowering_nvc0.h" + +namespace nv50_ir { + +class GM107LoweringPass : public NVC0LoweringPass +{ +public: + GM107LoweringPass(Program *p) : NVC0LoweringPass(p) {} +private: + virtual bool visit(Instruction *); + + virtual bool handleManualTXD(TexInstruction *); + bool handleDFDX(Instruction *); + bool handlePFETCH(Instruction *); + bool handlePOPCNT(Instruction *); +}; + +} // namespace nv50_ir diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index b68c2d09146..7f39c289554 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -99,7 +99,7 @@ protected: bool handleTEX(TexInstruction *); bool handleTXD(TexInstruction *); bool handleTXQ(TexInstruction *); - bool handleManualTXD(TexInstruction *); + virtual bool handleManualTXD(TexInstruction *); bool handleTXLQ(TexInstruction *); bool handleATOM(Instruction *); bool handleCasExch(Instruction *, bool needCctl); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp index f788c72669b..ef3de6ff92a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp @@ -187,6 +187,7 @@ const char *operationStr[OP_LAST + 1] = "vshl", "vsel", "cctl", + "shfl", "(invalid)" }; @@ -271,6 +272,7 @@ static const char *SemanticStr[SV_LAST + 1] = "LBASE", "SBASE", "VERTEX_STRIDE", + "INVOCATION_INFO", "?", "(INVALID)" }; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp index 60a6a3f486b..cbf0dd2119d 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp @@ -256,6 +256,7 @@ private: void texConstraintNV50(TexInstruction *); void texConstraintNVC0(TexInstruction *); void texConstraintNVE0(TexInstruction *); + void texConstraintGM107(TexInstruction *); std::list constrList; @@ -855,6 +856,7 @@ GCRA::coalesce(ArrayList& insns) case 0xe0: case 0xf0: case 0x100: + case 0x110: ret = doCoalesce(insns, JOIN_MASK_UNION); break; default: @@ -1880,6 +1882,34 @@ RegAlloc::InsertConstraintsPass::condenseSrcs(Instruction *insn, constrList.push_back(merge); } +void +RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex) +{ + int n, s; + + if (isTextureOp(tex->op)) + textureMask(tex); + condenseDefs(tex); + + if (tex->op == OP_SUSTB || tex->op == OP_SUSTP) { + condenseSrcs(tex, 3, (3 + typeSizeof(tex->dType) / 4) - 1); + } else + if (isTextureOp(tex->op)) { + if (tex->op != OP_TXQ) { + s = tex->tex.target.getArgCount() - tex->tex.target.isMS(); + n = tex->srcCount(0xff) - s; + } else { + s = tex->srcCount(0xff); + n = 0; + } + + if (s > 1) + condenseSrcs(tex, 0, s - 1); + if (n > 1) // NOTE: first call modified positions already + condenseSrcs(tex, 1, n); + } +} + void RegAlloc::InsertConstraintsPass::texConstraintNVE0(TexInstruction *tex) { @@ -1987,6 +2017,9 @@ RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb) case 0x100: texConstraintNVE0(tex); break; + case 0x110: + texConstraintGM107(tex); + break; default: break; } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp index 4ca5687765c..0397bdcad55 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp @@ -54,6 +54,7 @@ const uint8_t Target::operationSrcNr[] = 2, 2, // ATOM, BAR 2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET, 2, 2, 2, 1, // VSHR, VSHL, VSEL, CCTL + 3, // SHFL 0 }; @@ -126,10 +127,13 @@ const OpClass Target::operationClass[] = OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, // VSEL, CCTL OPCLASS_VECTOR, OPCLASS_CONTROL, + // SHFL + OPCLASS_OTHER, OPCLASS_PSEUDO // LAST }; +extern Target *getTargetGM107(unsigned int chipset); extern Target *getTargetNVC0(unsigned int chipset); extern Target *getTargetNV50(unsigned int chipset); @@ -138,6 +142,8 @@ Target *Target::create(unsigned int chipset) STATIC_ASSERT(Elements(operationSrcNr) == OP_LAST + 1); STATIC_ASSERT(Elements(operationClass) == OP_LAST + 1); switch (chipset & ~0xf) { + case 0x110: + return getTargetGM107(chipset); case 0xc0: case 0xd0: case 0xe0: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h index 711056e5961..cb9fd37c4c8 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h @@ -78,7 +78,7 @@ public: inline void *getRelocInfo() const { return relocInfo; } - void prepareEmission(Program *); + virtual void prepareEmission(Program *); virtual void prepareEmission(Function *); virtual void prepareEmission(BasicBlock *); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp new file mode 100644 index 00000000000..202d7443588 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp @@ -0,0 +1,100 @@ +/* + * Copyright 2011 Christoph Bumiller + * 2014 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "codegen/nv50_ir_target_gm107.h" +#include "codegen/nv50_ir_lowering_gm107.h" + +namespace nv50_ir { + +Target *getTargetGM107(unsigned int chipset) +{ + return new TargetGM107(chipset); +} + +// BULTINS / LIBRARY FUNCTIONS: + +// lazyness -> will just hardcode everything for the time being + +#include "lib/gm107.asm.h" + +void +TargetGM107::getBuiltinCode(const uint32_t **code, uint32_t *size) const +{ + *code = (const uint32_t *)&gm107_builtin_code[0]; + *size = sizeof(gm107_builtin_code); +} + +uint32_t +TargetGM107::getBuiltinOffset(int builtin) const +{ + assert(builtin < NVC0_BUILTIN_COUNT); + return gm107_builtin_offsets[builtin]; +} + +bool +TargetGM107::isOpSupported(operation op, DataType ty) const +{ + switch (op) { + case OP_MAD: + case OP_FMA: + if (ty != TYPE_F32) + return false; + break; + case OP_SAD: + case OP_POW: + case OP_SQRT: + case OP_DIV: + case OP_MOD: + return false; + default: + break; + } + + return true; +} + +bool +TargetGM107::runLegalizePass(Program *prog, CGStage stage) const +{ + if (stage == CG_STAGE_PRE_SSA) { + GM107LoweringPass pass(prog); + return pass.run(prog, false, true); + } else + if (stage == CG_STAGE_POST_RA) { + NVC0LegalizePostRA pass(prog); + return pass.run(prog, false, true); + } else + if (stage == CG_STAGE_SSA) { + NVC0LegalizeSSA pass; + return pass.run(prog, false, true); + } + return false; +} + +CodeEmitter * +TargetGM107::getCodeEmitter(Program::Type type) +{ + return createCodeEmitterGM107(type); +} + +} // namespace nv50_ir diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.h new file mode 100644 index 00000000000..5d606378953 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.h @@ -0,0 +1,21 @@ +#include "codegen/nv50_ir_target_nvc0.h" + +namespace nv50_ir { + +class TargetGM107 : public TargetNVC0 +{ +public: + TargetGM107(unsigned int chipset) : TargetNVC0(chipset) {} + + virtual CodeEmitter *getCodeEmitter(Program::Type); + CodeEmitter *createCodeEmitterGM107(Program::Type); + + virtual bool runLegalizePass(Program *, CGStage) const; + + virtual void getBuiltinCode(const uint32_t **, uint32_t *) const; + virtual uint32_t getBuiltinOffset(int) const; + + virtual bool isOpSupported(operation, DataType) const; +}; + +} // namespace nv50_ir