From 67a47a445b544ac638d10303dc697d70f25d12fb Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 22 Aug 2012 15:04:58 +0000 Subject: [PATCH] radeon/llvm: Add flag operand to some instructions This new operand replaces the MachineOperand flags in LLVM, which will be deprecated soon. Eventually all instructions should have a flag operand, but for now this operand has only been added to instructions that need it. --- src/gallium/drivers/radeon/AMDGPUInstrInfo.h | 1 + .../drivers/radeon/R600CodeEmitter.cpp | 25 +++++---- .../radeon/R600ExpandSpecialInstrs.cpp | 3 +- .../drivers/radeon/R600ISelLowering.cpp | 9 ++- src/gallium/drivers/radeon/R600InstrInfo.cpp | 30 +++++++++- src/gallium/drivers/radeon/R600InstrInfo.h | 6 ++ .../drivers/radeon/R600Instructions.td | 56 +++++++++++++------ 7 files changed, 97 insertions(+), 33 deletions(-) diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h index de3c5940613..5f72869cb0a 100644 --- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h +++ b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h @@ -31,6 +31,7 @@ #define MO_FLAG_MASK (1 << 3) #define MO_FLAG_PUSH (1 << 4) #define MO_FLAG_LAST (1 << 5) +#define NUM_MO_FLAGS 6 #define OPCODE_IS_ZERO_INT 0x00000045 #define OPCODE_IS_NOT_ZERO_INT 0x00000042 diff --git a/src/gallium/drivers/radeon/R600CodeEmitter.cpp b/src/gallium/drivers/radeon/R600CodeEmitter.cpp index efe194b5206..9e76cb1e5c2 100644 --- a/src/gallium/drivers/radeon/R600CodeEmitter.cpp +++ b/src/gallium/drivers/radeon/R600CodeEmitter.cpp @@ -67,7 +67,7 @@ public: private: void EmitALUInstr(MachineInstr &MI); - void EmitSrc(const MachineOperand & MO); + void EmitSrc(const MachineOperand & MO, unsigned SrcIdx); void EmitDst(const MachineOperand & MO); void EmitALU(MachineInstr &MI, unsigned numSrc); void EmitTexInstr(MachineInstr &MI); @@ -218,6 +218,8 @@ void R600CodeEmitter::EmitALUInstr(MachineInstr &MI) unsigned numOperands = MI.getNumExplicitOperands(); if(MI.findFirstPredOperandIdx() > -1) numOperands--; + if (TII->HasFlagOperand(MI)) + numOperands--; // Some instructions are just place holder instructions that represent // operations that the GPU does automatically. They should be ignored. @@ -243,7 +245,7 @@ void R600CodeEmitter::EmitALUInstr(MachineInstr &MI) if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) { break; } - EmitSrc(MI.getOperand(opIndex)); + EmitSrc(MI.getOperand(opIndex), opIndex); } // Emit zeros for unused sources @@ -256,8 +258,9 @@ void R600CodeEmitter::EmitALUInstr(MachineInstr &MI) EmitALU(MI, numOperands - 1); } -void R600CodeEmitter::EmitSrc(const MachineOperand & MO) +void R600CodeEmitter::EmitSrc(const MachineOperand & MO, unsigned SrcIdx) { + const MachineInstr *MI = MO.getParent(); uint32_t value = 0; // Emit the source select (2 bytes). For GPRs, this is the register index. // For other potential instruction operands, (e.g. constant registers) the @@ -289,8 +292,8 @@ void R600CodeEmitter::EmitSrc(const MachineOperand & MO) } // XXX: Emit isNegated (1 byte) - if ((!(MO.getTargetFlags() & MO_FLAG_ABS)) - && (MO.getTargetFlags() & MO_FLAG_NEG || + if ((!(TII->IsFlagSet(*MI, SrcIdx, MO_FLAG_ABS))) + && (TII->IsFlagSet(*MI, SrcIdx, MO_FLAG_NEG) || (MO.isReg() && (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){ EmitByte(1); @@ -299,7 +302,7 @@ void R600CodeEmitter::EmitSrc(const MachineOperand & MO) } // Emit isAbsolute (1 byte) - if (MO.getTargetFlags() & MO_FLAG_ABS) { + if (TII->IsFlagSet(*MI, SrcIdx, MO_FLAG_ABS)) { EmitByte(1); } else { EmitByte(0); @@ -318,6 +321,7 @@ void R600CodeEmitter::EmitSrc(const MachineOperand & MO) void R600CodeEmitter::EmitDst(const MachineOperand & MO) { + const MachineInstr *MI = MO.getParent(); if (MO.isReg() && MO.getReg() != AMDGPU::PREDICATE_BIT) { // Emit the destination register index (1 byte) EmitByte(getHWReg(MO.getReg())); @@ -326,14 +330,14 @@ void R600CodeEmitter::EmitDst(const MachineOperand & MO) EmitByte(TRI->getHWRegChan(MO.getReg())); // Emit isClamped (1 byte) - if (MO.getTargetFlags() & MO_FLAG_CLAMP) { + if (TII->IsFlagSet(*MI, 0, MO_FLAG_CLAMP)) { EmitByte(1); } else { EmitByte(0); } // Emit writemask (1 byte). - if (MO.getTargetFlags() & MO_FLAG_MASK) { + if (TII->IsFlagSet(*MI, 0, MO_FLAG_MASK)) { EmitByte(0); } else { EmitByte(1); @@ -353,8 +357,7 @@ void R600CodeEmitter::EmitALU(MachineInstr &MI, unsigned numSrc) EmitTwoBytes(getBinaryCodeForInstr(MI)); // Emit IsLast (for this instruction group) (1 byte) - if (MI.isInsideBundle() && - !(MI.getOperand(0).getTargetFlags() & MO_FLAG_LAST)) { + if (MI.isInsideBundle() && !TII->IsFlagSet(MI, 0, MO_FLAG_LAST)) { EmitByte(0); } else { EmitByte(1); @@ -508,7 +511,7 @@ void R600CodeEmitter::EmitFCInstr(MachineInstr &MI) unsigned numOperands = MI.getNumOperands(); if (numOperands > 0) { assert(numOperands == 1); - EmitSrc(MI.getOperand(0)); + EmitSrc(MI.getOperand(0), 0); } else { EmitNullBytes(SRC_BYTE_COUNT); } diff --git a/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp b/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp index 2c19437e2be..93229370d2a 100644 --- a/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp +++ b/src/gallium/drivers/radeon/R600ExpandSpecialInstrs.cpp @@ -149,7 +149,8 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { MachineInstr *NewMI = BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(Opcode), DstReg) .addReg(Src0) - .addReg(Src1); + .addReg(Src1) + .addImm(0); // Flag NewMI->setIsInsideBundle(Chan != 0); TII->AddFlag(NewMI, 0, Flags); diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp index ec1250d14da..d134979dd04 100644 --- a/src/gallium/drivers/radeon/R600ISelLowering.cpp +++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp @@ -64,6 +64,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)) + .addImm(0) // Flags .addReg(AMDGPU::PRED_SEL_OFF); TII->AddFlag(NewMI, 0, MO_FLAG_CLAMP); break; @@ -74,6 +75,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)) + .addImm(0) // Flags .addReg(AMDGPU::PRED_SEL_OFF); TII->AddFlag(NewMI, 1, MO_FLAG_ABS); break; @@ -85,6 +87,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)) + .addImm(0) // Flags .addReg(AMDGPU::PRED_SEL_OFF); TII->AddFlag(NewMI, 1, MO_FLAG_NEG); break; @@ -200,7 +203,8 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X)) .addReg(AMDGPU::PREDICATE_BIT) .addOperand(MI->getOperand(1)) - .addImm(OPCODE_IS_ZERO); + .addImm(OPCODE_IS_ZERO) + .addImm(0); // Flags TII->AddFlag(NewMI, 1, MO_FLAG_PUSH); BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) .addOperand(MI->getOperand(0)) @@ -213,7 +217,8 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X)) .addReg(AMDGPU::PREDICATE_BIT) .addOperand(MI->getOperand(1)) - .addImm(OPCODE_IS_ZERO_INT); + .addImm(OPCODE_IS_ZERO_INT) + .addImm(0); // Flags TII->AddFlag(NewMI, 1, MO_FLAG_PUSH); BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) .addOperand(MI->getOperand(0)) diff --git a/src/gallium/drivers/radeon/R600InstrInfo.cpp b/src/gallium/drivers/radeon/R600InstrInfo.cpp index 2b6ce4be36f..4cca8ebfea7 100644 --- a/src/gallium/drivers/radeon/R600InstrInfo.cpp +++ b/src/gallium/drivers/radeon/R600InstrInfo.cpp @@ -57,6 +57,7 @@ R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, BuildMI(MBB, MI, DL, get(AMDGPU::MOV)) .addReg(RI.getSubReg(DestReg, SubRegIndex), RegState::Define) .addReg(RI.getSubReg(SrcReg, SubRegIndex)) + .addImm(0) // Flag .addReg(0) // PREDICATE_BIT .addReg(DestReg, RegState::Define | RegState::Implicit); } @@ -68,6 +69,7 @@ R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, BuildMI(MBB, MI, DL, get(AMDGPU::MOV), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(0) // Flag .addReg(0); // PREDICATE_BIT } } @@ -520,11 +522,35 @@ int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, } //===----------------------------------------------------------------------===// -// Instruction flag setters +// Instruction flag getters/setters //===----------------------------------------------------------------------===// +#define GET_FLAG_OPERAND_IDX(MI) (((MI).getDesc().TSFlags >> 7) & 0x3) + +bool R600InstrInfo::HasFlagOperand(const MachineInstr &MI) const +{ + return GET_FLAG_OPERAND_IDX(MI) != 0; +} + void R600InstrInfo::AddFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const { - MI->getOperand(Operand).addTargetFlag(Flag); + unsigned FlagIndex = GET_FLAG_OPERAND_IDX(*MI); + assert(FlagIndex != 0 && + "Instruction flags not supported for this instruction"); + MachineOperand &FlagOp = MI->getOperand(FlagIndex); + assert(FlagOp.isImm()); + FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand))); +} + +bool R600InstrInfo::IsFlagSet(const MachineInstr &MI, unsigned Operand, + unsigned Flag) const +{ + unsigned FlagIndex = GET_FLAG_OPERAND_IDX(MI); + if (FlagIndex == 0) { + return false; + } + assert(MI.getOperand(FlagIndex).isImm()); + return !!((MI.getOperand(FlagIndex).getImm() >> + (NUM_MO_FLAGS * Operand)) & Flag); } diff --git a/src/gallium/drivers/radeon/R600InstrInfo.h b/src/gallium/drivers/radeon/R600InstrInfo.h index 20de7dc7f2d..5e160a0d57a 100644 --- a/src/gallium/drivers/radeon/R600InstrInfo.h +++ b/src/gallium/drivers/radeon/R600InstrInfo.h @@ -112,8 +112,13 @@ namespace llvm { virtual int getInstrLatency(const InstrItineraryData *ItinData, SDNode *Node) const { return 1;} + bool HasFlagOperand(const MachineInstr &MI) const; + ///AddFlag - Add one of the MO_FLAG* flags to the specified Operand. void AddFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const; + + ///IsFlagSet - Determine if the specified flag is set on this Operand. + bool IsFlagSet(const MachineInstr &MI, unsigned Operand, unsigned Flag) const; }; } // End llvm namespace @@ -127,6 +132,7 @@ namespace R600_InstFlag { TRIG = (1 << 4), OP3 = (1 << 5), VECTOR = (1 << 6) + //FlagOperand bits 7, 8 }; } diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td index 1b02533edbb..73c2002a382 100644 --- a/src/gallium/drivers/radeon/R600Instructions.td +++ b/src/gallium/drivers/radeon/R600Instructions.td @@ -20,7 +20,8 @@ class InstR600 inst, dag outs, dag ins, string asm, list pattern, field bits<32> Inst; bit Trig = 0; bit Op3 = 0; - bit isVector = 0; + bit isVector = 0; + bits<2> FlagOperandIdx = 0; let Inst = inst; let Namespace = "AMDGPU"; @@ -36,6 +37,7 @@ class InstR600 inst, dag outs, dag ins, string asm, list pattern, // Vector instructions are instructions that must fill all slots in an // instruction group let TSFlags{6} = isVector; + let TSFlags{8-7} = FlagOperandIdx; } class InstR600ISA pattern> : @@ -107,20 +109,19 @@ class R600_3OP inst, string opName, list pattern, -def PRED_X : AMDGPUInst <(outs R600_Predicate_Bit:$dst), - (ins R600_Reg32:$src0, i32imm:$src1), +def PRED_X : InstR600 <0, (outs R600_Predicate_Bit:$dst), + (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags), "PRED $dst, $src0, $src1", - []> + [], NullALU> { let DisableEncoding = "$src0"; field bits<32> Inst; bits<32> src1; let Inst = src1; + let FlagOperandIdx = 3; } - - let isTerminator = 1, isBranch = 1 in { def JUMP : InstR600 <0x10, (outs), @@ -365,7 +366,12 @@ def FLOOR : R600_1OP < [(set R600_Reg32:$dst, (int_AMDGPU_floor R600_Reg32:$src))] >; -def MOV : R600_1OP <0x19, "MOV", []>; +def MOV : InstR600 <0x19, (outs R600_Reg32:$dst), + (ins R600_Reg32:$src0, i32imm:$flags, + R600_Pred:$p), + "MOV $dst, $src0", [], AnyALU> { + let FlagOperandIdx = 2; +} class MOV_IMM : InstR600 <0x19, (outs R600_Reg32:$dst), @@ -386,10 +392,15 @@ def : Pat < (MOV_IMM_F32 (i32 ALU_LITERAL_X), fpimm:$val) >; -def KILLGT : R600_2OP < - 0x2D, "KILLGT", - [] ->; +def KILLGT : InstR600 <0x2D, + (outs R600_Reg32:$dst), + (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags, R600_Pred:$p, + variable_ops), + "KILLGT $dst, $src0, $src1, $flags ($p)", + [], + NullALU>{ + let FlagOperandIdx = 3; +} def AND_INT : R600_2OP < 0x30, "AND_INT", @@ -588,9 +599,16 @@ class CNDGE_Common inst> : R600_3OP < class DOT4_Common inst> : R600_REDUCTION < inst, - (ins R600_Reg128:$src0, R600_Reg128:$src1), + (ins R600_Reg128:$src0, R600_Reg128:$src1, i32imm:$flags), "DOT4 $dst $src0, $src1", - [(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))] + [] + > { + let FlagOperandIdx = 3; +} + +class DOT4_Pat : Pat < + (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1), + (dot4 R600_Reg128:$src0, R600_Reg128:$src1, 0) >; multiclass CUBE_Common inst> { @@ -607,10 +625,12 @@ multiclass CUBE_Common inst> { def _real : InstR600 < inst, (outs R600_Reg32:$dst), - (ins R600_Reg32:$src0, R600_Reg32:$src1), + (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags), "CUBE $dst, $src0, $src1", [], VecALU - >; + >{ + let FlagOperandIdx = 3; + } } class EXP_IEEE_Common inst> : R600_1OP < @@ -748,6 +768,7 @@ let Predicates = [isR600] in { def CNDGT_r600 : CNDGT_Common<0x19>; def CNDGE_r600 : CNDGE_Common<0x1A>; def DOT4_r600 : DOT4_Common<0x50>; + def : DOT4_Pat ; defm CUBE_r600 : CUBE_Common<0x52>; def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>; def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>; @@ -864,6 +885,7 @@ let Predicates = [isEGorCayman] in { def SIN_eg : SIN_Common<0x8D>; def COS_eg : COS_Common<0x8E>; def DOT4_eg : DOT4_Common<0xBE>; + def : DOT4_Pat ; defm CUBE_eg : CUBE_Common<0xC0>; def DIV_eg : DIV_Common; @@ -1198,12 +1220,12 @@ def MASK_WRITE : AMDGPUShaderInst < // KIL Patterns def KILP : Pat < (int_AMDGPU_kilp), - (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO))) + (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO), 0)) >; def KIL : Pat < (int_AMDGPU_kill R600_Reg32:$src0), - (MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0))) + (MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0), 0)) >; // SGT Reverse args -- 2.30.2