radeon/llvm: Lower ROTL to BIT_ALIGN
authorTom Stellard <thomas.stellard@amd.com>
Wed, 20 Jun 2012 20:28:01 +0000 (16:28 -0400)
committerTom Stellard <thomas.stellard@amd.com>
Thu, 21 Jun 2012 20:42:06 +0000 (20:42 +0000)
src/gallium/drivers/radeon/AMDGPUISelLowering.h
src/gallium/drivers/radeon/AMDGPUInstrInfo.td
src/gallium/drivers/radeon/AMDILISelLowering.cpp
src/gallium/drivers/radeon/R600ISelLowering.cpp
src/gallium/drivers/radeon/R600ISelLowering.h
src/gallium/drivers/radeon/R600Instructions.td

index 9aa602ba80092467d8a60b433ba3d1bc80e0e649..72342c996147174c9a3bbaf2b357d5b9f4d48feb 100644 (file)
@@ -57,6 +57,7 @@ namespace AMDGPUISD
 enum
 {
   AMDGPU_FIRST = AMDILISD::LAST_ISD_NUMBER,
+  BITALIGN,
   FRACT,
   FMAX,
   SMAX,
index 5e44ef9c40b67f775e801163de427e38e308ca99..4452719686162fb103ca286888172a8c30b17dee 100644 (file)
 //
 //===----------------------------------------------------------------------===//
 
+//===----------------------------------------------------------------------===//
+// AMDGPU DAG Profiles
+//===----------------------------------------------------------------------===//
+
+def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
+  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
+]>;
+
 //===----------------------------------------------------------------------===//
 // AMDGPU DAG Nodes
 //
 
+// out = ((a << 32) | b) >> c)
+//
+// Can be used to optimize rtol:
+// rotl(a, b) = bitalign(a, a, 32 - b)
+def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>;
+
 // out = a - floor(a)
 def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
 
index 12b3dce5ee8cd5506552bb14da3cc395d9cda2dc..28380010a945bda822ecc0c7d50c77ec3c9864ae 100644 (file)
@@ -564,7 +564,6 @@ AMDILTargetLowering::LowerMemArgument(
 
     // GPU doesn't have a rotl, rotr, or byteswap instruction
     setOperationAction(ISD::ROTR, VT, Expand);
-    setOperationAction(ISD::ROTL, VT, Expand);
     setOperationAction(ISD::BSWAP, VT, Expand);
 
     // GPU doesn't have any counting operators
index 3e021a23dbb37f85cd77c4d76b2db53964c196e3..5694c0bc9a2b8e748fb77130ff64bf023da2f6ed 100644 (file)
@@ -33,6 +33,8 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
 
   setOperationAction(ISD::FSUB, MVT::f32, Expand);
 
+  setOperationAction(ISD::ROTL, MVT::i32, Custom);
+
   setSchedulingPreference(Sched::VLIW);
 }
 
@@ -256,3 +258,29 @@ void R600TargetLowering::lowerImplicitParameter(MachineInstr *MI, MachineBasicBl
           .addReg(PtrReg)
           .addImm(0);
 }
+
+//===----------------------------------------------------------------------===//
+// Custom DAG Lowering Operations
+//===----------------------------------------------------------------------===//
+
+
+SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
+{
+  switch (Op.getOpcode()) {
+  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+  case ISD::ROTL: return LowerROTL(Op, DAG);
+  }
+}
+
+SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
+{
+  DebugLoc DL = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+
+  return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
+                     Op.getOperand(0),
+                     Op.getOperand(0),
+                     DAG.getNode(ISD::SUB, DL, VT,
+                                 DAG.getConstant(32, MVT::i32),
+                                 Op.getOperand(1)));
+}
index 6296145f668e6d8ea74a363340e8ec2af252c746..7b91373187a0ed0631b589d9e4a5238416d38b39 100644 (file)
@@ -26,6 +26,7 @@ public:
   R600TargetLowering(TargetMachine &TM);
   virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
       MachineBasicBlock * BB) const;
+  virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 
 private:
   const R600InstrInfo * TII;
@@ -37,6 +38,9 @@ private:
   void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
       MachineRegisterInfo & MRI, unsigned dword_offset) const;
 
+  /// LowerROTL - Lower ROTL opcode to BITALIGN
+  SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
+
 };
 
 } // End namespace llvm;
index 4be9fca9e237973a14767c520527baeec66e1779..409969b0586c6269a13b796667aa6fae90578850 100644 (file)
@@ -821,6 +821,13 @@ def RECIP_UINT_eg : RECIP_UINT_Common<0x94>;
 /* ------------------------------- */
 
 let Predicates = [isEGorCayman] in {
+
+  def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT",
+    [(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1,
+                                          R600_Reg32:$src2))],
+    VecALU
+  >;
+
   def MULADD_eg : MULADD_Common<0x14>;
   def ASHR_eg : ASHR_Common<0x15>;
   def LSHR_eg : LSHR_Common<0x16>;