From: Tom Stellard Date: Tue, 21 Aug 2012 14:53:50 +0000 (+0000) Subject: radeon/llvm: ExpandSpecialInstrs - Add support for cube instructions X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=1cb07bd3b8abd5e52e9dbd80bb1666058545387e;p=mesa.git radeon/llvm: ExpandSpecialInstrs - Add support for cube instructions --- diff --git a/src/gallium/drivers/radeon/R600CodeEmitter.cpp b/src/gallium/drivers/radeon/R600CodeEmitter.cpp index 396ae6f5054..efe194b5206 100644 --- a/src/gallium/drivers/radeon/R600CodeEmitter.cpp +++ b/src/gallium/drivers/radeon/R600CodeEmitter.cpp @@ -49,17 +49,14 @@ private: const R600RegisterInfo * TRI; const R600InstrInfo * TII; - bool IsCube; unsigned currentElement; - bool IsLast; unsigned section_start; public: R600CodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID), - _OS(OS), TM(NULL), IsCube(false), - IsLast(true) { } + _OS(OS), TM(NULL) { } const char *getPassName() const { return "AMDGPU Machine Code Emitter"; } @@ -70,7 +67,7 @@ public: private: void EmitALUInstr(MachineInstr &MI); - void EmitSrc(const MachineOperand & MO, int chan_override = -1); + void EmitSrc(const MachineOperand & MO); void EmitDst(const MachineOperand & MO); void EmitALU(MachineInstr &MI, unsigned numSrc); void EmitTexInstr(MachineInstr &MI); @@ -160,7 +157,6 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock::instr_iterator I = MBB.instr_begin(), E = MBB.instr_end(); I != E; ++I) { MachineInstr &MI = *I; - IsCube = TII->isCubeOp(MI.getOpcode()); if (MI.getNumOperands() > 1 && MI.getOperand(0).isReg() && MI.getOperand(0).isDead()) { continue; } @@ -168,15 +164,6 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) { EmitTexInstr(MI); } else if (TII->isFCOp(MI.getOpcode())){ EmitFCInstr(MI); - } else if (IsCube) { - IsLast = false; - // XXX: On Cayman, some (all?) of the vector instructions only need - // to fill the first three slots. - for (currentElement = 0; currentElement < 4; currentElement++) { - IsLast = (currentElement == 3); - EmitALUInstr(MI); - } - IsCube = false; } else if (MI.getOpcode() == AMDGPU::RETURN || MI.getOpcode() == AMDGPU::BUNDLE || MI.getOpcode() == AMDGPU::KILL) { @@ -250,25 +237,18 @@ void R600CodeEmitter::EmitALUInstr(MachineInstr &MI) // Emit instruction type EmitByte(0); - if (IsCube) { - static const int cube_src_swz[] = {2, 2, 0, 1}; - EmitSrc(MI.getOperand(1), cube_src_swz[currentElement]); - EmitSrc(MI.getOperand(1), cube_src_swz[3-currentElement]); - EmitNullBytes(SRC_BYTE_COUNT); - } else { - unsigned int opIndex; - for (opIndex = 1; opIndex < numOperands; opIndex++) { - // Literal constants are always stored as the last operand. - if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) { - break; - } - EmitSrc(MI.getOperand(opIndex)); + unsigned int opIndex; + for (opIndex = 1; opIndex < numOperands; opIndex++) { + // Literal constants are always stored as the last operand. + if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) { + break; } + EmitSrc(MI.getOperand(opIndex)); + } - // Emit zeros for unused sources - for ( ; opIndex < 4; opIndex++) { - EmitNullBytes(SRC_BYTE_COUNT); - } + // Emit zeros for unused sources + for ( ; opIndex < 4; opIndex++) { + EmitNullBytes(SRC_BYTE_COUNT); } EmitDst(dstOp); @@ -276,7 +256,7 @@ void R600CodeEmitter::EmitALUInstr(MachineInstr &MI) EmitALU(MI, numOperands - 1); } -void R600CodeEmitter::EmitSrc(const MachineOperand & MO, int chan_override) +void R600CodeEmitter::EmitSrc(const MachineOperand & MO) { uint32_t value = 0; // Emit the source select (2 bytes). For GPRs, this is the register index. @@ -302,9 +282,7 @@ void R600CodeEmitter::EmitSrc(const MachineOperand & MO, int chan_override) } // Emit the source channel (1 byte) - if (chan_override != -1) { - EmitByte(chan_override); - } else if (MO.isReg()) { + if (MO.isReg()) { EmitByte(TRI->getHWRegChan(MO.getReg())); } else { EmitByte(0); @@ -345,11 +323,7 @@ void R600CodeEmitter::EmitDst(const MachineOperand & MO) EmitByte(getHWReg(MO.getReg())); // Emit the element of the destination register (1 byte) - if (IsCube) { - EmitByte(currentElement); - } else { - EmitByte(TRI->getHWRegChan(MO.getReg())); - } + EmitByte(TRI->getHWRegChan(MO.getReg())); // Emit isClamped (1 byte) if (MO.getTargetFlags() & MO_FLAG_CLAMP) { @@ -379,9 +353,8 @@ void R600CodeEmitter::EmitALU(MachineInstr &MI, unsigned numSrc) EmitTwoBytes(getBinaryCodeForInstr(MI)); // Emit IsLast (for this instruction group) (1 byte) - if (!IsLast || - (MI.isInsideBundle() && - !(MI.getOperand(0).getTargetFlags() & MO_FLAG_LAST))) { + if (MI.isInsideBundle() && + !(MI.getOperand(0).getTargetFlags() & MO_FLAG_LAST)) { EmitByte(0); } else { EmitByte(1); diff --git a/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp b/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp index ba336a37467..9f1b8168a65 100644 --- a/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp +++ b/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp @@ -61,7 +61,8 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { bool IsReduction = TII->isReductionOp(MI.getOpcode()); bool IsVector = TII->isVector(MI); - if (!IsReduction && !IsVector) { + bool IsCube = TII->isCubeOp(MI.getOpcode()); + if (!IsReduction && !IsVector && !IsCube) { continue; } @@ -82,23 +83,73 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { // T0_Y (write masked) = MULLO_INT T1_X, T2_X // T0_Z (write masked) = MULLO_INT T1_X, T2_X // T0_W (write masked) = MULLO_INT T1_X, T2_X + // + // Cube instructions: + // T0_XYZW = CUBE T1_XYZW + // becomes: + // TO_X = CUBE T1_Z, T1_Y + // T0_Y = CUBE T1_Z, T1_X + // T0_Z = CUBE T1_X, T1_Z + // T0_W = CUBE T1_Y, T1_Z for (unsigned Chan = 0; Chan < 4; Chan++) { unsigned DstReg = MI.getOperand(0).getReg(); unsigned Src0 = MI.getOperand(1).getReg(); - unsigned Src1 = MI.getOperand(2).getReg(); + unsigned Src1 = 0; + + // Determine the correct source registers + if (!IsCube) { + Src1 = MI.getOperand(2).getReg(); + } if (IsReduction) { unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); Src0 = TRI.getSubReg(Src0, SubRegIndex); Src1 = TRI.getSubReg(Src1, SubRegIndex); + } else if (IsCube) { + static const int CubeSrcSwz[] = {2, 2, 0, 1}; + unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]); + unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]); + Src1 = TRI.getSubReg(Src0, SubRegIndex1); + Src0 = TRI.getSubReg(Src0, SubRegIndex0); + } + + // Determine the correct destination registers; + unsigned Flags = 0; + if (IsCube) { + unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); + DstReg = TRI.getSubReg(DstReg, SubRegIndex); + } else { + // Mask the write if the original instruction does not write to + // the current Channel. + Flags |= (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0); + unsigned DstBase = TRI.getHWRegIndex(DstReg); + DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); } - unsigned DstBase = TRI.getHWRegIndex(DstReg); - unsigned NewDstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); - unsigned Flags = (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0); + + // Set the IsLast bit Flags |= (Chan == 3 ? MO_FLAG_LAST : 0); - MachineOperand NewDstOp = MachineOperand::CreateReg(NewDstReg, true); + + // Add the new instruction + unsigned Opcode; + if (IsCube) { + switch (MI.getOpcode()) { + case AMDGPU::CUBE_r600_pseudo: + Opcode = AMDGPU::CUBE_r600_real; + break; + case AMDGPU::CUBE_eg_pseudo: + Opcode = AMDGPU::CUBE_eg_real; + break; + default: + assert(!"Unknown CUBE instruction"); + Opcode = 0; + break; + } + } else { + Opcode = MI.getOpcode(); + } + MachineOperand NewDstOp = MachineOperand::CreateReg(DstReg, true); NewDstOp.addTargetFlag(Flags); - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(MI.getOpcode())) + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(Opcode)) .addOperand(NewDstOp) .addReg(Src0) .addReg(Src1) diff --git a/src/gallium/drivers/radeon/R600InstrInfo.cpp b/src/gallium/drivers/radeon/R600InstrInfo.cpp index 12b46654bb7..56a2cf93199 100644 --- a/src/gallium/drivers/radeon/R600InstrInfo.cpp +++ b/src/gallium/drivers/radeon/R600InstrInfo.cpp @@ -153,8 +153,10 @@ bool R600InstrInfo::isCubeOp(unsigned opcode) const { switch(opcode) { default: return false; - case AMDGPU::CUBE_r600: - case AMDGPU::CUBE_eg: + case AMDGPU::CUBE_r600_pseudo: + case AMDGPU::CUBE_r600_real: + case AMDGPU::CUBE_eg_pseudo: + case AMDGPU::CUBE_eg_real: return true; } } diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td index f8d2bb0debe..1b02533edbb 100644 --- a/src/gallium/drivers/radeon/R600Instructions.td +++ b/src/gallium/drivers/radeon/R600Instructions.td @@ -593,14 +593,25 @@ class DOT4_Common inst> : R600_REDUCTION < [(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))] >; -class CUBE_Common inst> : InstR600 < - inst, - (outs R600_Reg128:$dst), - (ins R600_Reg128:$src), - "CUBE $dst $src", - [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))], - VecALU ->; +multiclass CUBE_Common inst> { + + def _pseudo : InstR600 < + inst, + (outs R600_Reg128:$dst), + (ins R600_Reg128:$src), + "CUBE $dst $src", + [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))], + VecALU + >; + + def _real : InstR600 < + inst, + (outs R600_Reg32:$dst), + (ins R600_Reg32:$src0, R600_Reg32:$src1), + "CUBE $dst, $src0, $src1", + [], VecALU + >; +} class EXP_IEEE_Common inst> : R600_1OP < inst, "EXP_IEEE", @@ -737,7 +748,7 @@ let Predicates = [isR600] in { def CNDGT_r600 : CNDGT_Common<0x19>; def CNDGE_r600 : CNDGE_Common<0x1A>; def DOT4_r600 : DOT4_Common<0x50>; - def CUBE_r600 : CUBE_Common<0x52>; + defm CUBE_r600 : CUBE_Common<0x52>; def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>; def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>; def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>; @@ -853,7 +864,7 @@ let Predicates = [isEGorCayman] in { def SIN_eg : SIN_Common<0x8D>; def COS_eg : COS_Common<0x8E>; def DOT4_eg : DOT4_Common<0xBE>; - def CUBE_eg : CUBE_Common<0xC0>; + defm CUBE_eg : CUBE_Common<0xC0>; def DIV_eg : DIV_Common; def POW_eg : POW_Common;