const R600RegisterInfo * TRI;
bool evergreenEncoding;
+ bool isCube;
bool isReduction;
- unsigned reductionElement;
+ unsigned currentElement;
bool isLast;
unsigned section_start;
public:
R600CodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID),
- _OS(OS), TM(NULL), evergreenEncoding(false), isReduction(false),
+ _OS(OS), TM(NULL), evergreenEncoding(false), isCube(false), isReduction(false),
isLast(true) { }
const char *getPassName() const { return "AMDGPU Machine Code Emitter"; }
private:
void emitALUInstr(MachineInstr &MI);
- void emitSrc(const MachineOperand & MO);
+ void emitSrc(const MachineOperand & MO, int chan_override = -1);
void emitDst(const MachineOperand & MO);
void emitALU(MachineInstr &MI, unsigned numSrc);
void emitTexInstr(MachineInstr &MI);
} else if (isReductionOp(MI.getOpcode())) {
isReduction = true;
isLast = false;
- for (reductionElement = 0; reductionElement < 4; reductionElement++) {
- isLast = (reductionElement == 3);
+ for (currentElement = 0; currentElement < 4; currentElement++) {
+ isLast = (currentElement == 3);
emitALUInstr(MI);
}
isReduction = false;
+ } else if (isCubeOp(MI.getOpcode())) {
+ isCube = true;
+ isLast = false;
+ for (currentElement = 0; currentElement < 4; currentElement++) {
+ isLast = (currentElement == 3);
+ emitALUInstr(MI);
+ }
+ isCube = false;
} else if (MI.getOpcode() == AMDIL::RETURN ||
MI.getOpcode() == AMDIL::BUNDLE ||
MI.getOpcode() == AMDIL::KILL) {
/* Emit instruction type */
emitByte(0);
- unsigned int opIndex;
- for (opIndex = 1; opIndex < numOperands; opIndex++) {
- /* Literal constants are always stored as the last operand. */
- if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) {
- break;
+ if (isCube) {
+ static const int cube_src_swz[] = {2, 2, 0, 1};
+ emitSrc(MI.getOperand(1), cube_src_swz[currentElement]);
+ emitSrc(MI.getOperand(1), cube_src_swz[3-currentElement]);
+ emitNullBytes(SRC_BYTE_COUNT);
+ } else {
+ unsigned int opIndex;
+ for (opIndex = 1; opIndex < numOperands; opIndex++) {
+ /* Literal constants are always stored as the last operand. */
+ if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) {
+ break;
+ }
+ emitSrc(MI.getOperand(opIndex));
}
- emitSrc(MI.getOperand(opIndex));
- }
/* Emit zeros for unused sources */
- for ( ; opIndex < 4; opIndex++) {
- emitNullBytes(SRC_BYTE_COUNT);
+ for ( ; opIndex < 4; opIndex++) {
+ emitNullBytes(SRC_BYTE_COUNT);
+ }
}
emitDst(dstOp);
emitALU(MI, numOperands - 1);
}
-void R600CodeEmitter::emitSrc(const MachineOperand & MO)
+void R600CodeEmitter::emitSrc(const MachineOperand & MO, int chan_override /* = -1 */)
{
uint32_t value = 0;
/* Emit the source select (2 bytes). For GPRs, this is the register index.
}
/* Emit the source channel (1 byte) */
- if (isReduction) {
- emitByte(reductionElement);
+ if (chan_override != -1) {
+ emitByte(chan_override);
+ } else if (isReduction) {
+ emitByte(currentElement);
} else if (MO.isReg()) {
emitByte(TRI->getHWRegChan(MO.getReg()));
} else {
emitByte(getHWReg(MO.getReg()));
/* Emit the element of the destination register (1 byte)*/
- if (isReduction) {
- emitByte(reductionElement);
+ if (isReduction || isCube) {
+ emitByte(currentElement);
} else {
emitByte(TRI->getHWRegChan(MO.getReg()));
}
}
/* Emit writemask (1 byte). */
- if ((isReduction && reductionElement != TRI->getHWRegChan(MO.getReg()))
+ if ((isReduction && currentElement != TRI->getHWRegChan(MO.getReg()))
|| MO.getTargetFlags() & MO_FLAG_MASK) {
emitByte(0);
} else {
}
class R600_REDUCTION <bits<32> inst, dag ins, string asm, list<dag> pattern,
- InstrItinClass itin = AnyALU> :
+ InstrItinClass itin = VecALU> :
InstR600 <inst,
(outs R600_Reg32:$dst),
ins,
[(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))]
>;
+class CUBE_Common <bits<32> inst> : InstR600 <
+ inst,
+ (outs R600_Reg128:$dst),
+ (ins R600_Reg128:$src),
+ "CUBE $dst $src",
+ [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))],
+ VecALU
+>;
+
class EXP_IEEE_Common <bits<32> inst> : R600_1OP <
inst, "EXP_IEEE",
[]> {
def CNDGT_r600 : CNDGT_Common<0x19>;
def CNDGE_r600 : CNDGE_Common<0x1A>;
def DOT4_r600 : DOT4_Common<0x50>;
+ def CUBE_r600 : CUBE_Common<0x52>;
def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>;
def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>;
def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>;
def RECIP_UINT_eg : RECIP_UINT_Common<0x94>;
def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>;
def DOT4_eg : DOT4_Common<0xBE>;
+ def CUBE_eg : CUBE_Common<0xC0>;
} // End AMDGPUGen.EG_CAYMAN