const R600RegisterInfo * TRI;
const R600InstrInfo * TII;
- bool IsCube;
unsigned currentElement;
- bool IsLast;
unsigned section_start;
public:
R600CodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID),
- _OS(OS), TM(NULL), IsCube(false),
- IsLast(true) { }
+ _OS(OS), TM(NULL) { }
const char *getPassName() const { return "AMDGPU Machine Code Emitter"; }
private:
void EmitALUInstr(MachineInstr &MI);
- void EmitSrc(const MachineOperand & MO, int chan_override = -1);
+ void EmitSrc(const MachineOperand & MO);
void EmitDst(const MachineOperand & MO);
void EmitALU(MachineInstr &MI, unsigned numSrc);
void EmitTexInstr(MachineInstr &MI);
for (MachineBasicBlock::instr_iterator I = MBB.instr_begin(),
E = MBB.instr_end(); I != E; ++I) {
MachineInstr &MI = *I;
- IsCube = TII->isCubeOp(MI.getOpcode());
if (MI.getNumOperands() > 1 && MI.getOperand(0).isReg() && MI.getOperand(0).isDead()) {
continue;
}
EmitTexInstr(MI);
} else if (TII->isFCOp(MI.getOpcode())){
EmitFCInstr(MI);
- } else if (IsCube) {
- IsLast = false;
- // XXX: On Cayman, some (all?) of the vector instructions only need
- // to fill the first three slots.
- for (currentElement = 0; currentElement < 4; currentElement++) {
- IsLast = (currentElement == 3);
- EmitALUInstr(MI);
- }
- IsCube = false;
} else if (MI.getOpcode() == AMDGPU::RETURN ||
MI.getOpcode() == AMDGPU::BUNDLE ||
MI.getOpcode() == AMDGPU::KILL) {
// Emit instruction type
EmitByte(0);
- if (IsCube) {
- static const int cube_src_swz[] = {2, 2, 0, 1};
- EmitSrc(MI.getOperand(1), cube_src_swz[currentElement]);
- EmitSrc(MI.getOperand(1), cube_src_swz[3-currentElement]);
- EmitNullBytes(SRC_BYTE_COUNT);
- } else {
- unsigned int opIndex;
- for (opIndex = 1; opIndex < numOperands; opIndex++) {
- // Literal constants are always stored as the last operand.
- if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) {
- break;
- }
- EmitSrc(MI.getOperand(opIndex));
+ unsigned int opIndex;
+ for (opIndex = 1; opIndex < numOperands; opIndex++) {
+ // Literal constants are always stored as the last operand.
+ if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) {
+ break;
}
+ EmitSrc(MI.getOperand(opIndex));
+ }
- // Emit zeros for unused sources
- for ( ; opIndex < 4; opIndex++) {
- EmitNullBytes(SRC_BYTE_COUNT);
- }
+ // Emit zeros for unused sources
+ for ( ; opIndex < 4; opIndex++) {
+ EmitNullBytes(SRC_BYTE_COUNT);
}
EmitDst(dstOp);
EmitALU(MI, numOperands - 1);
}
-void R600CodeEmitter::EmitSrc(const MachineOperand & MO, int chan_override)
+void R600CodeEmitter::EmitSrc(const MachineOperand & MO)
{
uint32_t value = 0;
// Emit the source select (2 bytes). For GPRs, this is the register index.
}
// Emit the source channel (1 byte)
- if (chan_override != -1) {
- EmitByte(chan_override);
- } else if (MO.isReg()) {
+ if (MO.isReg()) {
EmitByte(TRI->getHWRegChan(MO.getReg()));
} else {
EmitByte(0);
EmitByte(getHWReg(MO.getReg()));
// Emit the element of the destination register (1 byte)
- if (IsCube) {
- EmitByte(currentElement);
- } else {
- EmitByte(TRI->getHWRegChan(MO.getReg()));
- }
+ EmitByte(TRI->getHWRegChan(MO.getReg()));
// Emit isClamped (1 byte)
if (MO.getTargetFlags() & MO_FLAG_CLAMP) {
EmitTwoBytes(getBinaryCodeForInstr(MI));
// Emit IsLast (for this instruction group) (1 byte)
- if (!IsLast ||
- (MI.isInsideBundle() &&
- !(MI.getOperand(0).getTargetFlags() & MO_FLAG_LAST))) {
+ if (MI.isInsideBundle() &&
+ !(MI.getOperand(0).getTargetFlags() & MO_FLAG_LAST)) {
EmitByte(0);
} else {
EmitByte(1);
bool IsReduction = TII->isReductionOp(MI.getOpcode());
bool IsVector = TII->isVector(MI);
- if (!IsReduction && !IsVector) {
+ bool IsCube = TII->isCubeOp(MI.getOpcode());
+ if (!IsReduction && !IsVector && !IsCube) {
continue;
}
// T0_Y (write masked) = MULLO_INT T1_X, T2_X
// T0_Z (write masked) = MULLO_INT T1_X, T2_X
// T0_W (write masked) = MULLO_INT T1_X, T2_X
+ //
+ // Cube instructions:
+ // T0_XYZW = CUBE T1_XYZW
+ // becomes:
+ // TO_X = CUBE T1_Z, T1_Y
+ // T0_Y = CUBE T1_Z, T1_X
+ // T0_Z = CUBE T1_X, T1_Z
+ // T0_W = CUBE T1_Y, T1_Z
for (unsigned Chan = 0; Chan < 4; Chan++) {
unsigned DstReg = MI.getOperand(0).getReg();
unsigned Src0 = MI.getOperand(1).getReg();
- unsigned Src1 = MI.getOperand(2).getReg();
+ unsigned Src1 = 0;
+
+ // Determine the correct source registers
+ if (!IsCube) {
+ Src1 = MI.getOperand(2).getReg();
+ }
if (IsReduction) {
unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
Src0 = TRI.getSubReg(Src0, SubRegIndex);
Src1 = TRI.getSubReg(Src1, SubRegIndex);
+ } else if (IsCube) {
+ static const int CubeSrcSwz[] = {2, 2, 0, 1};
+ unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]);
+ unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
+ Src1 = TRI.getSubReg(Src0, SubRegIndex1);
+ Src0 = TRI.getSubReg(Src0, SubRegIndex0);
+ }
+
+ // Determine the correct destination registers;
+ unsigned Flags = 0;
+ if (IsCube) {
+ unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
+ DstReg = TRI.getSubReg(DstReg, SubRegIndex);
+ } else {
+ // Mask the write if the original instruction does not write to
+ // the current Channel.
+ Flags |= (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0);
+ unsigned DstBase = TRI.getHWRegIndex(DstReg);
+ DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
}
- unsigned DstBase = TRI.getHWRegIndex(DstReg);
- unsigned NewDstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
- unsigned Flags = (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0);
+
+ // Set the IsLast bit
Flags |= (Chan == 3 ? MO_FLAG_LAST : 0);
- MachineOperand NewDstOp = MachineOperand::CreateReg(NewDstReg, true);
+
+ // Add the new instruction
+ unsigned Opcode;
+ if (IsCube) {
+ switch (MI.getOpcode()) {
+ case AMDGPU::CUBE_r600_pseudo:
+ Opcode = AMDGPU::CUBE_r600_real;
+ break;
+ case AMDGPU::CUBE_eg_pseudo:
+ Opcode = AMDGPU::CUBE_eg_real;
+ break;
+ default:
+ assert(!"Unknown CUBE instruction");
+ Opcode = 0;
+ break;
+ }
+ } else {
+ Opcode = MI.getOpcode();
+ }
+ MachineOperand NewDstOp = MachineOperand::CreateReg(DstReg, true);
NewDstOp.addTargetFlag(Flags);
- BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(MI.getOpcode()))
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(Opcode))
.addOperand(NewDstOp)
.addReg(Src0)
.addReg(Src1)
{
switch(opcode) {
default: return false;
- case AMDGPU::CUBE_r600:
- case AMDGPU::CUBE_eg:
+ case AMDGPU::CUBE_r600_pseudo:
+ case AMDGPU::CUBE_r600_real:
+ case AMDGPU::CUBE_eg_pseudo:
+ case AMDGPU::CUBE_eg_real:
return true;
}
}
[(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))]
>;
-class CUBE_Common <bits<32> inst> : InstR600 <
- inst,
- (outs R600_Reg128:$dst),
- (ins R600_Reg128:$src),
- "CUBE $dst $src",
- [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))],
- VecALU
->;
+multiclass CUBE_Common <bits<32> inst> {
+
+ def _pseudo : InstR600 <
+ inst,
+ (outs R600_Reg128:$dst),
+ (ins R600_Reg128:$src),
+ "CUBE $dst $src",
+ [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))],
+ VecALU
+ >;
+
+ def _real : InstR600 <
+ inst,
+ (outs R600_Reg32:$dst),
+ (ins R600_Reg32:$src0, R600_Reg32:$src1),
+ "CUBE $dst, $src0, $src1",
+ [], VecALU
+ >;
+}
class EXP_IEEE_Common <bits<32> inst> : R600_1OP <
inst, "EXP_IEEE",
def CNDGT_r600 : CNDGT_Common<0x19>;
def CNDGE_r600 : CNDGE_Common<0x1A>;
def DOT4_r600 : DOT4_Common<0x50>;
- def CUBE_r600 : CUBE_Common<0x52>;
+ defm CUBE_r600 : CUBE_Common<0x52>;
def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>;
def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>;
def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>;
def SIN_eg : SIN_Common<0x8D>;
def COS_eg : COS_Common<0x8E>;
def DOT4_eg : DOT4_Common<0xBE>;
- def CUBE_eg : CUBE_Common<0xC0>;
+ defm CUBE_eg : CUBE_Common<0xC0>;
def DIV_eg : DIV_Common<RECIP_IEEE_eg>;
def POW_eg : POW_Common<LOG_IEEE_eg, EXP_IEEE_eg, MUL, GPRF32>;