From: Tom Stellard Date: Tue, 21 Aug 2012 14:33:04 +0000 (+0000) Subject: radeon/llvm: ExpandSpecialInstrs - Add support for vector instructions X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=6c99f2101fbd3edb7d5899c44ca9d984a3c0f8b6;p=mesa.git radeon/llvm: ExpandSpecialInstrs - Add support for vector instructions --- diff --git a/src/gallium/drivers/radeon/R600CodeEmitter.cpp b/src/gallium/drivers/radeon/R600CodeEmitter.cpp index 14e877b2518..396ae6f5054 100644 --- a/src/gallium/drivers/radeon/R600CodeEmitter.cpp +++ b/src/gallium/drivers/radeon/R600CodeEmitter.cpp @@ -50,7 +50,6 @@ private: const R600InstrInfo * TII; bool IsCube; - bool IsVector; unsigned currentElement; bool IsLast; @@ -59,7 +58,7 @@ private: public: R600CodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID), - _OS(OS), TM(NULL), IsCube(false), IsVector(false), + _OS(OS), TM(NULL), IsCube(false), IsLast(true) { } const char *getPassName() const { return "AMDGPU Machine Code Emitter"; } @@ -161,7 +160,6 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock::instr_iterator I = MBB.instr_begin(), E = MBB.instr_end(); I != E; ++I) { MachineInstr &MI = *I; - IsVector = TII->isVector(MI); IsCube = TII->isCubeOp(MI.getOpcode()); if (MI.getNumOperands() > 1 && MI.getOperand(0).isReg() && MI.getOperand(0).isDead()) { continue; @@ -170,7 +168,7 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) { EmitTexInstr(MI); } else if (TII->isFCOp(MI.getOpcode())){ EmitFCInstr(MI); - } else if (IsVector || IsCube) { + } else if (IsCube) { IsLast = false; // XXX: On Cayman, some (all?) of the vector instructions only need // to fill the first three slots. @@ -178,7 +176,6 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) { IsLast = (currentElement == 3); EmitALUInstr(MI); } - IsVector = false; IsCube = false; } else if (MI.getOpcode() == AMDGPU::RETURN || MI.getOpcode() == AMDGPU::BUNDLE || @@ -348,7 +345,7 @@ void R600CodeEmitter::EmitDst(const MachineOperand & MO) EmitByte(getHWReg(MO.getReg())); // Emit the element of the destination register (1 byte) - if (IsCube || IsVector) { + if (IsCube) { EmitByte(currentElement); } else { EmitByte(TRI->getHWRegChan(MO.getReg())); @@ -362,9 +359,7 @@ void R600CodeEmitter::EmitDst(const MachineOperand & MO) } // Emit writemask (1 byte). - if ((IsVector && - currentElement != TRI->getHWRegChan(MO.getReg())) - || MO.getTargetFlags() & MO_FLAG_MASK) { + if (MO.getTargetFlags() & MO_FLAG_MASK) { EmitByte(0); } else { EmitByte(1); diff --git a/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp b/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp index 4c67ba47568..ba336a37467 100644 --- a/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp +++ b/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp @@ -59,18 +59,38 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { MachineInstr &MI = *I; I = llvm::next(I); - if (!TII->isReductionOp(MI.getOpcode())) { + bool IsReduction = TII->isReductionOp(MI.getOpcode()); + bool IsVector = TII->isVector(MI); + if (!IsReduction && !IsVector) { continue; } // Expand the instruction + // + // Reduction instructions: + // T0_X = DP4 T1_XYZW, T2_XYZW + // becomes: + // TO_X = DP4 T1_X, T2_X + // TO_Y (write masked) = DP4 T1_Y, T2_Y + // TO_Z (write masked) = DP4 T1_Z, T2_Z + // TO_W (write masked) = DP4 T1_W, T2_W + // + // Vector instructions: + // T0_X = MULLO_INT T1_X, T2_X + // becomes: + // T0_X = MULLO_INT T1_X, T2_X + // T0_Y (write masked) = MULLO_INT T1_X, T2_X + // T0_Z (write masked) = MULLO_INT T1_X, T2_X + // T0_W (write masked) = MULLO_INT T1_X, T2_X for (unsigned Chan = 0; Chan < 4; Chan++) { unsigned DstReg = MI.getOperand(0).getReg(); unsigned Src0 = MI.getOperand(1).getReg(); unsigned Src1 = MI.getOperand(2).getReg(); - unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); - unsigned NewSrc0 = TRI.getSubReg(Src0, SubRegIndex); - unsigned NewSrc1 = TRI.getSubReg(Src1, SubRegIndex); + if (IsReduction) { + unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); + Src0 = TRI.getSubReg(Src0, SubRegIndex); + Src1 = TRI.getSubReg(Src1, SubRegIndex); + } unsigned DstBase = TRI.getHWRegIndex(DstReg); unsigned NewDstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); unsigned Flags = (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0); @@ -80,8 +100,8 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(MI.getOpcode())) .addOperand(NewDstOp) - .addReg(NewSrc0) - .addReg(NewSrc1) + .addReg(Src0) + .addReg(Src1) ->setIsInsideBundle(Chan != 0); } MI.eraseFromParent();