field bits<32> Inst;
bit Trig = 0;
bit Op3 = 0;
- bit isVector = 0;
+ bit isVector = 0;
+ bits<2> FlagOperandIdx = 0;
let Inst = inst;
- let Namespace = "AMDIL";
+ let Namespace = "AMDGPU";
let OutOperandList = outs;
let InOperandList = ins;
let AsmString = asm;
// Vector instructions are instructions that must fill all slots in an
// instruction group
let TSFlags{6} = isVector;
+ let TSFlags{8-7} = FlagOperandIdx;
}
class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> :
{
field bits<64> Inst;
- let Namespace = "AMDIL";
+ let Namespace = "AMDGPU";
}
def MEMxi : Operand<iPTR> {
}
+def R600_Pred : PredicateOperand<i32, (ops R600_Predicate),
+ (ops PRED_SEL_OFF)>;
+
class R600_1OP <bits<32> inst, string opName, list<dag> pattern,
InstrItinClass itin = AnyALU> :
InstR600 <inst,
(outs R600_Reg32:$dst),
- (ins R600_Reg32:$src, variable_ops),
- !strconcat(opName, " $dst, $src"),
+ (ins R600_Reg32:$src, R600_Pred:$p, variable_ops),
+ !strconcat(opName, " $dst, $src ($p)"),
pattern,
itin
>;
InstrItinClass itin = AnyALU> :
InstR600 <inst,
(outs R600_Reg32:$dst),
- (ins R600_Reg32:$src0, R600_Reg32:$src1, variable_ops),
+ (ins R600_Reg32:$src0, R600_Reg32:$src1,R600_Pred:$p, variable_ops),
!strconcat(opName, " $dst, $src0, $src1"),
pattern,
itin
InstrItinClass itin = AnyALU> :
InstR600 <inst,
(outs R600_Reg32:$dst),
- (ins R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2, variable_ops),
+ (ins R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2,R600_Pred:$p, variable_ops),
!strconcat(opName, " $dst, $src0, $src1, $src2"),
pattern,
itin>{
let Op3 = 1;
}
+
+
+def PRED_X : InstR600 <0, (outs R600_Predicate_Bit:$dst),
+ (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags),
+ "PRED $dst, $src0, $src1",
+ [], NullALU>
+{
+ let DisableEncoding = "$src0";
+ field bits<32> Inst;
+ bits<32> src1;
+
+ let Inst = src1;
+ let FlagOperandIdx = 3;
+}
+
+let isTerminator = 1, isBranch = 1 in {
+def JUMP : InstR600 <0x10,
+ (outs),
+ (ins brtarget:$target, R600_Pred:$p),
+ "JUMP $target ($p)",
+ [], AnyALU
+ >;
+}
+
class R600_REDUCTION <bits<32> inst, dag ins, string asm, list<dag> pattern,
InstrItinClass itin = VecALU> :
InstR600 <inst,
}]
>;
-def COND_EQ : PatLeaf <
- (cond),
- [{switch(N->get()){{default: return false;
- case ISD::SETOEQ: case ISD::SETUEQ:
- case ISD::SETEQ: return true;}}}]
->;
-
-def COND_NE : PatLeaf <
- (cond),
- [{switch(N->get()){{default: return false;
- case ISD::SETONE: case ISD::SETUNE:
- case ISD::SETNE: return true;}}}]
->;
-def COND_GT : PatLeaf <
- (cond),
- [{switch(N->get()){{default: return false;
- case ISD::SETOGT: case ISD::SETUGT:
- case ISD::SETGT: return true;}}}]
->;
-
-def COND_GE : PatLeaf <
- (cond),
- [{switch(N->get()){{default: return false;
- case ISD::SETOGE: case ISD::SETUGE:
- case ISD::SETGE: return true;}}}]
->;
-
-def COND_LT : PatLeaf <
- (cond),
- [{switch(N->get()){{default: return false;
- case ISD::SETOLT: case ISD::SETULT:
- case ISD::SETLT: return true;}}}]
->;
-
-def COND_LE : PatLeaf <
- (cond),
- [{switch(N->get()){{default: return false;
- case ISD::SETOLE: case ISD::SETULE:
- case ISD::SETLE: return true;}}}]
->;
-
class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs,
dag ins, string asm, list<dag> pattern> :
InstR600ISA <outs, ins, asm, pattern>
const Value *Src = cast<LoadSDNode>(N)->getSrcValue();
if (Src) {
PointerType * PT = dyn_cast<PointerType>(Src->getType());
- return PT && PT->getAddressSpace() == AMDILAS::PARAM_I_ADDRESS;
+ return PT && PT->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS;
}
return false;
}]>;
//}
*/
def isR600 : Predicate<"Subtarget.device()"
- "->getGeneration() == AMDILDeviceInfo::HD4XXX">;
+ "->getGeneration() == AMDGPUDeviceInfo::HD4XXX">;
def isR700 : Predicate<"Subtarget.device()"
- "->getGeneration() == AMDILDeviceInfo::HD4XXX &&"
+ "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&"
"Subtarget.device()->getDeviceFlag()"
">= OCL_DEVICE_RV710">;
def isEG : Predicate<"Subtarget.device()"
- "->getGeneration() >= AMDILDeviceInfo::HD5XXX && "
+ "->getGeneration() >= AMDGPUDeviceInfo::HD5XXX && "
"Subtarget.device()->getDeviceFlag() != OCL_DEVICE_CAYMAN">;
def isCayman : Predicate<"Subtarget.device()"
"->getDeviceFlag() == OCL_DEVICE_CAYMAN">;
def isEGorCayman : Predicate<"Subtarget.device()"
- "->getGeneration() == AMDILDeviceInfo::HD5XXX"
+ "->getGeneration() == AMDGPUDeviceInfo::HD5XXX"
"|| Subtarget.device()->getGeneration() =="
- "AMDILDeviceInfo::HD6XXX">;
+ "AMDGPUDeviceInfo::HD6XXX">;
def isR600toCayman : Predicate<
- "Subtarget.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX">;
+ "Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">;
let Predicates = [isR600toCayman] in {
[(set R600_Reg32:$dst, (int_AMDGPU_floor R600_Reg32:$src))]
>;
-def MOV : R600_1OP <0x19, "MOV", []>;
+def MOV : InstR600 <0x19, (outs R600_Reg32:$dst),
+ (ins R600_Reg32:$src0, i32imm:$flags,
+ R600_Pred:$p),
+ "MOV $dst, $src0", [], AnyALU> {
+ let FlagOperandIdx = 2;
+}
class MOV_IMM <ValueType vt, Operand immType> : InstR600 <0x19,
(outs R600_Reg32:$dst),
- (ins R600_Reg32:$alu_literal, immType:$imm),
+ (ins R600_Reg32:$alu_literal, R600_Pred:$p, immType:$imm),
"MOV_IMM $dst, $imm",
[], AnyALU
>;
(MOV_IMM_F32 (i32 ALU_LITERAL_X), fpimm:$val)
>;
-def KILLGT : R600_2OP <
- 0x2D, "KILLGT",
- []
->;
+def KILLGT : InstR600 <0x2D,
+ (outs R600_Reg32:$dst),
+ (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags, R600_Pred:$p,
+ variable_ops),
+ "KILLGT $dst, $src0, $src1, $flags ($p)",
+ [],
+ NullALU>{
+ let FlagOperandIdx = 3;
+}
def AND_INT : R600_2OP <
0x30, "AND_INT",
def CNDE_INT : R600_3OP <
0x1C, "CNDE_INT",
[(set (i32 R600_Reg32:$dst),
- (IL_cmov_logical R600_Reg32:$src0, R600_Reg32:$src2, R600_Reg32:$src1))]
+ (select R600_Reg32:$src0, R600_Reg32:$src2, R600_Reg32:$src1))]
>;
/* Texture instructions */
class CNDE_Common <bits<32> inst> : R600_3OP <
inst, "CNDE",
[(set (f32 R600_Reg32:$dst),
- (IL_cmov_logical R600_Reg32:$src0, R600_Reg32:$src2, R600_Reg32:$src1))]
+ (select (i32 (fp_to_sint (fneg R600_Reg32:$src0))), (f32 R600_Reg32:$src2), (f32 R600_Reg32:$src1)))]
>;
class CNDGT_Common <bits<32> inst> : R600_3OP <
class DOT4_Common <bits<32> inst> : R600_REDUCTION <
inst,
- (ins R600_Reg128:$src0, R600_Reg128:$src1),
+ (ins R600_Reg128:$src0, R600_Reg128:$src1, i32imm:$flags),
"DOT4 $dst $src0, $src1",
- [(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))]
->;
+ []
+ > {
+ let FlagOperandIdx = 3;
+}
-class CUBE_Common <bits<32> inst> : InstR600 <
- inst,
- (outs R600_Reg128:$dst),
- (ins R600_Reg128:$src),
- "CUBE $dst $src",
- [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))],
- VecALU
+class DOT4_Pat <Instruction dot4> : Pat <
+ (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1),
+ (dot4 R600_Reg128:$src0, R600_Reg128:$src1, 0)
>;
+multiclass CUBE_Common <bits<32> inst> {
+
+ def _pseudo : InstR600 <
+ inst,
+ (outs R600_Reg128:$dst),
+ (ins R600_Reg128:$src),
+ "CUBE $dst $src",
+ [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))],
+ VecALU
+ >;
+
+ def _real : InstR600 <
+ inst,
+ (outs R600_Reg32:$dst),
+ (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags),
+ "CUBE $dst, $src0, $src1",
+ [], VecALU
+ >{
+ let FlagOperandIdx = 3;
+ }
+}
+
class EXP_IEEE_Common <bits<32> inst> : R600_1OP <
inst, "EXP_IEEE",
[(set R600_Reg32:$dst, (fexp2 R600_Reg32:$src))]
def CNDGT_r600 : CNDGT_Common<0x19>;
def CNDGE_r600 : CNDGE_Common<0x1A>;
def DOT4_r600 : DOT4_Common<0x50>;
- def CUBE_r600 : CUBE_Common<0x52>;
+ def : DOT4_Pat <DOT4_r600>;
+ defm CUBE_r600 : CUBE_Common<0x52>;
def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>;
def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>;
def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>;
def SIN_eg : SIN_Common<0x8D>;
def COS_eg : COS_Common<0x8E>;
def DOT4_eg : DOT4_Common<0xBE>;
- def CUBE_eg : CUBE_Common<0xC0>;
+ def : DOT4_Pat <DOT4_eg>;
+ defm CUBE_eg : CUBE_Common<0xC0>;
def DIV_eg : DIV_Common<RECIP_IEEE_eg>;
def POW_eg : POW_Common<LOG_IEEE_eg, EXP_IEEE_eg, MUL, GPRF32>;
let DST_SEL_Z = 7; // Masked
let DST_SEL_W = 7; // Masked
let DATA_FORMAT = 0xD; // COLOR_32
+
+ // This is not really necessary, but there were some GPU hangs that appeared
+ // to be caused by ALU instructions in the next instruction group that wrote
+ // to the $ptr registers of the VTX_READ.
+ // e.g.
+ // %T3_X<def> = VTX_READ_PARAM_i32_eg %T2_X<kill>, 24
+ // %T2_X<def> = MOV %ZERO
+ //Adding this constraint prevents this from happening.
+ let Constraints = "$ptr.ptr = $dst";
+}
+
+class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern>
+ : VTX_READ_eg <buffer_id, (outs R600_Reg128:$dst), pattern> {
+
+ let MEGA_FETCH_COUNT = 16;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 1;
+ let DST_SEL_Z = 2;
+ let DST_SEL_W = 3;
+ let DATA_FORMAT = 0x22; // COLOR_32_32_32_32
+
+ // XXX: Need to force VTX_READ_128 instructions to write to the same register
+ // that holds its buffer address to avoid potential hangs. We can't use
+ // the same constraint as VTX_READ_32_eg, because the $ptr.ptr and $dst
+ // registers are different sizes.
}
-def VTX_READ_PARAM_eg : VTX_READ_32_eg <0,
- [(set (i32 R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))]
+//===----------------------------------------------------------------------===//
+// VTX Read from parameter memory space
+//===----------------------------------------------------------------------===//
+
+class VTX_READ_PARAM_32_eg <ValueType vt> : VTX_READ_32_eg <0,
+ [(set (vt R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))]
+>;
+
+def VTX_READ_PARAM_i32_eg : VTX_READ_PARAM_32_eg<i32>;
+def VTX_READ_PARAM_f32_eg : VTX_READ_PARAM_32_eg<f32>;
+
+
+//===----------------------------------------------------------------------===//
+// VTX Read from global memory space
+//===----------------------------------------------------------------------===//
+
+// 32-bit reads
+
+class VTX_READ_GLOBAL_eg <ValueType vt> : VTX_READ_32_eg <1,
+ [(set (vt R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))]
>;
-def VTX_READ_GLOBAL_eg : VTX_READ_32_eg <1,
- [(set (i32 R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))]
+def VTX_READ_GLOBAL_i32_eg : VTX_READ_GLOBAL_eg<i32>;
+def VTX_READ_GLOBAL_f32_eg : VTX_READ_GLOBAL_eg<f32>;
+
+// 128-bit reads
+
+class VTX_READ_GLOBAL_128_eg <ValueType vt> : VTX_READ_128_eg <1,
+ [(set (vt R600_Reg128:$dst), (global_load ADDRVTX_READ:$ptr))]
>;
+def VTX_READ_GLOBAL_v4i32_eg : VTX_READ_GLOBAL_128_eg<v4i32>;
+def VTX_READ_GLOBAL_v4f32_eg : VTX_READ_GLOBAL_128_eg<v4f32>;
+
}
let Predicates = [isCayman] in {
[(set R600_TReg32:$dst, (intr))]
>;
-def TGID_X : R600PreloadInst <"TGID_X", int_r600_read_tgid_x>;
-def TGID_Y : R600PreloadInst <"TGID_Y", int_r600_read_tgid_y>;
-def TGID_Z : R600PreloadInst <"TGID_Z", int_r600_read_tgid_z>;
-
-def TIDIG_X : R600PreloadInst <"TIDIG_X", int_r600_read_tidig_x>;
-def TIDIG_Y : R600PreloadInst <"TIDIG_Y", int_r600_read_tidig_y>;
-def TIDIG_Z : R600PreloadInst <"TIDIG_Z", int_r600_read_tidig_z>;
-
-def NGROUPS_X : R600PreloadInst <"NGROUPS_X", int_r600_read_ngroups_x>;
-def NGROUPS_Y : R600PreloadInst <"NGROUPS_Y", int_r600_read_ngroups_y>;
-def NGROUPS_Z : R600PreloadInst <"NGROUPS_Z", int_r600_read_ngroups_z>;
-
-def GLOBAL_SIZE_X : R600PreloadInst <"GLOBAL_SIZE_X",
- int_r600_read_global_size_x>;
-def GLOBAL_SIZE_Y : R600PreloadInst <"GLOBAL_SIZE_Y",
- int_r600_read_global_size_y>;
-def GLOBAL_SIZE_Z : R600PreloadInst <"GLOBAL_SIZE_Z",
- int_r600_read_global_size_z>;
-
-def LOCAL_SIZE_X : R600PreloadInst <"LOCAL_SIZE_X",
- int_r600_read_local_size_x>;
-def LOCAL_SIZE_Y : R600PreloadInst <"LOCAL_SIZE_Y",
- int_r600_read_local_size_y>;
-def LOCAL_SIZE_Z : R600PreloadInst <"LOCAL_SIZE_Z",
- int_r600_read_local_size_z>;
-
def R600_LOAD_CONST : AMDGPUShaderInst <
(outs R600_Reg32:$dst),
(ins i32imm:$src0),
[(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0))]
>;
-def LOAD_INPUT : AMDGPUShaderInst <
- (outs R600_Reg32:$dst),
- (ins i32imm:$src),
- "LOAD_INPUT $dst, $src",
- [(set R600_Reg32:$dst, (int_R600_load_input imm:$src))]
->;
-
def RESERVE_REG : AMDGPUShaderInst <
(outs),
(ins i32imm:$src),
[(int_AMDGPU_reserve_reg imm:$src)]
>;
-def STORE_OUTPUT: AMDGPUShaderInst <
- (outs),
- (ins R600_Reg32:$src0, i32imm:$src1),
- "STORE_OUTPUT $src0, $src1",
- [(int_AMDGPU_store_output R600_Reg32:$src0, imm:$src1)]
->;
-
def TXD: AMDGPUShaderInst <
(outs R600_Reg128:$dst),
(ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$src3, i32imm:$src4),
// KIL Patterns
def KILP : Pat <
(int_AMDGPU_kilp),
- (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO)))
+ (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO), 0))
>;
def KIL : Pat <
(int_AMDGPU_kill R600_Reg32:$src0),
- (MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0)))
+ (MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0), 0))
>;
// SGT Reverse args
def : BitConvert <i32, f32, R600_Reg32>;
def : BitConvert <f32, i32, R600_Reg32>;
+def : BitConvert <v4f32, v4i32, R600_Reg128>;
} // End isR600toCayman Predicate