From e98e209528b2c7acb721a84a7cfda925aeed4d57 Mon Sep 17 00:00:00 2001 From: Vadim Girlin Date: Mon, 7 May 2012 13:10:09 +0400 Subject: [PATCH] radeon/llvm: add support for CUBE ALU instruction Signed-off-by: Vadim Girlin --- .../drivers/radeon/AMDGPUIntrinsics.td | 1 + src/gallium/drivers/radeon/AMDGPUUtil.cpp | 11 ++++ src/gallium/drivers/radeon/AMDGPUUtil.h | 1 + .../drivers/radeon/R600CodeEmitter.cpp | 58 ++++++++++++------- .../drivers/radeon/R600Instructions.td | 13 ++++- 5 files changed, 63 insertions(+), 21 deletions(-) diff --git a/src/gallium/drivers/radeon/AMDGPUIntrinsics.td b/src/gallium/drivers/radeon/AMDGPUIntrinsics.td index d8ea45287a4..089d3b66f61 100644 --- a/src/gallium/drivers/radeon/AMDGPUIntrinsics.td +++ b/src/gallium/drivers/radeon/AMDGPUIntrinsics.td @@ -54,6 +54,7 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in { def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; + def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], []>; } let TargetPrefix = "TGSI", isTarget = 1 in { diff --git a/src/gallium/drivers/radeon/AMDGPUUtil.cpp b/src/gallium/drivers/radeon/AMDGPUUtil.cpp index d6f72b19050..2d15e21ce36 100644 --- a/src/gallium/drivers/radeon/AMDGPUUtil.cpp +++ b/src/gallium/drivers/radeon/AMDGPUUtil.cpp @@ -98,6 +98,17 @@ bool llvm::isReductionOp(unsigned opcode) } } +bool llvm::isCubeOp(unsigned opcode) +{ + switch(opcode) { + default: return false; + case AMDIL::CUBE_r600: + case AMDIL::CUBE_eg: + return true; + } +} + + bool llvm::isFCOp(unsigned opcode) { switch(opcode) { diff --git a/src/gallium/drivers/radeon/AMDGPUUtil.h b/src/gallium/drivers/radeon/AMDGPUUtil.h index 299146e1ba7..38a7ebc1103 100644 --- a/src/gallium/drivers/radeon/AMDGPUUtil.h +++ b/src/gallium/drivers/radeon/AMDGPUUtil.h @@ -29,6 +29,7 @@ bool isPlaceHolderOpcode(unsigned opcode); bool isTransOp(unsigned opcode); bool isTexOp(unsigned opcode); bool isReductionOp(unsigned opcode); +bool isCubeOp(unsigned opcode); bool isFCOp(unsigned opcode); /* XXX: Move these to AMDGPUInstrInfo.h */ diff --git a/src/gallium/drivers/radeon/R600CodeEmitter.cpp b/src/gallium/drivers/radeon/R600CodeEmitter.cpp index e0bc95b9465..eed53a4f662 100644 --- a/src/gallium/drivers/radeon/R600CodeEmitter.cpp +++ b/src/gallium/drivers/radeon/R600CodeEmitter.cpp @@ -44,8 +44,9 @@ namespace { const R600RegisterInfo * TRI; bool evergreenEncoding; + bool isCube; bool isReduction; - unsigned reductionElement; + unsigned currentElement; bool isLast; unsigned section_start; @@ -53,7 +54,7 @@ namespace { public: R600CodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID), - _OS(OS), TM(NULL), evergreenEncoding(false), isReduction(false), + _OS(OS), TM(NULL), evergreenEncoding(false), isCube(false), isReduction(false), isLast(true) { } const char *getPassName() const { return "AMDGPU Machine Code Emitter"; } @@ -65,7 +66,7 @@ namespace { private: void emitALUInstr(MachineInstr &MI); - void emitSrc(const MachineOperand & MO); + void emitSrc(const MachineOperand & MO, int chan_override = -1); void emitDst(const MachineOperand & MO); void emitALU(MachineInstr &MI, unsigned numSrc); void emitTexInstr(MachineInstr &MI); @@ -176,11 +177,19 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) { } else if (isReductionOp(MI.getOpcode())) { isReduction = true; isLast = false; - for (reductionElement = 0; reductionElement < 4; reductionElement++) { - isLast = (reductionElement == 3); + for (currentElement = 0; currentElement < 4; currentElement++) { + isLast = (currentElement == 3); emitALUInstr(MI); } isReduction = false; + } else if (isCubeOp(MI.getOpcode())) { + isCube = true; + isLast = false; + for (currentElement = 0; currentElement < 4; currentElement++) { + isLast = (currentElement == 3); + emitALUInstr(MI); + } + isCube = false; } else if (MI.getOpcode() == AMDIL::RETURN || MI.getOpcode() == AMDIL::BUNDLE || MI.getOpcode() == AMDIL::KILL) { @@ -307,18 +316,25 @@ void R600CodeEmitter::emitALUInstr(MachineInstr &MI) /* Emit instruction type */ emitByte(0); - unsigned int opIndex; - for (opIndex = 1; opIndex < numOperands; opIndex++) { - /* Literal constants are always stored as the last operand. */ - if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) { - break; + if (isCube) { + static const int cube_src_swz[] = {2, 2, 0, 1}; + emitSrc(MI.getOperand(1), cube_src_swz[currentElement]); + emitSrc(MI.getOperand(1), cube_src_swz[3-currentElement]); + emitNullBytes(SRC_BYTE_COUNT); + } else { + unsigned int opIndex; + for (opIndex = 1; opIndex < numOperands; opIndex++) { + /* Literal constants are always stored as the last operand. */ + if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) { + break; + } + emitSrc(MI.getOperand(opIndex)); } - emitSrc(MI.getOperand(opIndex)); - } /* Emit zeros for unused sources */ - for ( ; opIndex < 4; opIndex++) { - emitNullBytes(SRC_BYTE_COUNT); + for ( ; opIndex < 4; opIndex++) { + emitNullBytes(SRC_BYTE_COUNT); + } } emitDst(dstOp); @@ -326,7 +342,7 @@ void R600CodeEmitter::emitALUInstr(MachineInstr &MI) emitALU(MI, numOperands - 1); } -void R600CodeEmitter::emitSrc(const MachineOperand & MO) +void R600CodeEmitter::emitSrc(const MachineOperand & MO, int chan_override /* = -1 */) { uint32_t value = 0; /* Emit the source select (2 bytes). For GPRs, this is the register index. @@ -352,8 +368,10 @@ void R600CodeEmitter::emitSrc(const MachineOperand & MO) } /* Emit the source channel (1 byte) */ - if (isReduction) { - emitByte(reductionElement); + if (chan_override != -1) { + emitByte(chan_override); + } else if (isReduction) { + emitByte(currentElement); } else if (MO.isReg()) { emitByte(TRI->getHWRegChan(MO.getReg())); } else { @@ -395,8 +413,8 @@ void R600CodeEmitter::emitDst(const MachineOperand & MO) emitByte(getHWReg(MO.getReg())); /* Emit the element of the destination register (1 byte)*/ - if (isReduction) { - emitByte(reductionElement); + if (isReduction || isCube) { + emitByte(currentElement); } else { emitByte(TRI->getHWRegChan(MO.getReg())); } @@ -409,7 +427,7 @@ void R600CodeEmitter::emitDst(const MachineOperand & MO) } /* Emit writemask (1 byte). */ - if ((isReduction && reductionElement != TRI->getHWRegChan(MO.getReg())) + if ((isReduction && currentElement != TRI->getHWRegChan(MO.getReg())) || MO.getTargetFlags() & MO_FLAG_MASK) { emitByte(0); } else { diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td index edbade70627..381ad715504 100644 --- a/src/gallium/drivers/radeon/R600Instructions.td +++ b/src/gallium/drivers/radeon/R600Instructions.td @@ -92,7 +92,7 @@ class R600_3OP inst, string opName, list pattern, } class R600_REDUCTION inst, dag ins, string asm, list pattern, - InstrItinClass itin = AnyALU> : + InstrItinClass itin = VecALU> : InstR600 inst> : R600_REDUCTION < [(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))] >; +class CUBE_Common inst> : InstR600 < + inst, + (outs R600_Reg128:$dst), + (ins R600_Reg128:$src), + "CUBE $dst $src", + [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))], + VecALU +>; + class EXP_IEEE_Common inst> : R600_1OP < inst, "EXP_IEEE", []> { @@ -681,6 +690,7 @@ let Gen = AMDGPUGen.R600 in { def CNDGT_r600 : CNDGT_Common<0x19>; def CNDGE_r600 : CNDGE_Common<0x1A>; def DOT4_r600 : DOT4_Common<0x50>; + def CUBE_r600 : CUBE_Common<0x52>; def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>; def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>; def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>; @@ -887,6 +897,7 @@ let Gen = AMDGPUGen.EG_CAYMAN in { def RECIP_UINT_eg : RECIP_UINT_Common<0x94>; def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>; def DOT4_eg : DOT4_Common<0xBE>; + def CUBE_eg : CUBE_Common<0xC0>; } // End AMDGPUGen.EG_CAYMAN -- 2.30.2