radeonsi: Fix sampler views for depth textures.
[mesa.git] / src / gallium / drivers / radeon / R600ExpandSpecialInstrs.cpp
1 //===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 // Vector, Reduction, and Cube instructions need to fill the entire instruction
10 // group to work correctly. This pass expands these individual instructions
11 // into several instructions that will completely fill the instruction group.
12 //===----------------------------------------------------------------------===//
13
14 #include "AMDGPU.h"
15 #include "R600Defines.h"
16 #include "R600InstrInfo.h"
17 #include "R600RegisterInfo.h"
18 #include "R600MachineFunctionInfo.h"
19 #include "llvm/CodeGen/MachineFunctionPass.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 #include "llvm/CodeGen/MachineRegisterInfo.h"
22
23 using namespace llvm;
24
25 namespace {
26
27 class R600ExpandSpecialInstrsPass : public MachineFunctionPass {
28
29 private:
30 static char ID;
31 const R600InstrInfo *TII;
32
33 bool ExpandInputPerspective(MachineInstr& MI);
34 bool ExpandInputConstant(MachineInstr& MI);
35
36 public:
37 R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
38 TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
39
40 virtual bool runOnMachineFunction(MachineFunction &MF);
41
42 const char *getPassName() const {
43 return "R600 Expand special instructions pass";
44 }
45 };
46
47 } // End anonymous namespace
48
49 char R600ExpandSpecialInstrsPass::ID = 0;
50
51 FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
52 return new R600ExpandSpecialInstrsPass(TM);
53 }
54
55 bool R600ExpandSpecialInstrsPass::ExpandInputPerspective(MachineInstr &MI)
56 {
57 const R600RegisterInfo &TRI = TII->getRegisterInfo();
58 if (MI.getOpcode() != AMDGPU::input_perspective)
59 return false;
60
61 MachineBasicBlock::iterator I = &MI;
62 unsigned DstReg = MI.getOperand(0).getReg();
63 R600MachineFunctionInfo *MFI = MI.getParent()->getParent()
64 ->getInfo<R600MachineFunctionInfo>();
65 unsigned IJIndexBase;
66
67 // In Evergreen ISA doc section 8.3.2 :
68 // We need to interpolate XY and ZW in two different instruction groups.
69 // An INTERP_* must occupy all 4 slots of an instruction group.
70 // Output of INTERP_XY is written in X,Y slots
71 // Output of INTERP_ZW is written in Z,W slots
72 //
73 // Thus interpolation requires the following sequences :
74 //
75 // AnyGPR.x = INTERP_ZW; (Write Masked Out)
76 // AnyGPR.y = INTERP_ZW; (Write Masked Out)
77 // DstGPR.z = INTERP_ZW;
78 // DstGPR.w = INTERP_ZW; (End of first IG)
79 // DstGPR.x = INTERP_XY;
80 // DstGPR.y = INTERP_XY;
81 // AnyGPR.z = INTERP_XY; (Write Masked Out)
82 // AnyGPR.w = INTERP_XY; (Write Masked Out) (End of second IG)
83 //
84 switch (MI.getOperand(1).getImm()) {
85 case 0:
86 IJIndexBase = MFI->GetIJPerspectiveIndex();
87 break;
88 case 1:
89 IJIndexBase = MFI->GetIJLinearIndex();
90 break;
91 default:
92 assert(0 && "Unknow ij index");
93 }
94
95 for (unsigned i = 0; i < 8; i++) {
96 unsigned IJIndex = AMDGPU::R600_TReg32RegClass.getRegister(
97 2 * IJIndexBase + ((i + 1) % 2));
98 unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister(
99 4 * MI.getOperand(2).getImm());
100
101 unsigned Sel;
102 switch (i % 4) {
103 case 0:Sel = AMDGPU::sel_x;break;
104 case 1:Sel = AMDGPU::sel_y;break;
105 case 2:Sel = AMDGPU::sel_z;break;
106 case 3:Sel = AMDGPU::sel_w;break;
107 default:break;
108 }
109
110 unsigned Res = TRI.getSubReg(DstReg, Sel);
111
112 const MCInstrDesc &Opcode = (i < 4)?
113 TII->get(AMDGPU::INTERP_ZW):
114 TII->get(AMDGPU::INTERP_XY);
115
116 MachineInstr *NewMI = BuildMI(*(MI.getParent()),
117 I, MI.getParent()->findDebugLoc(I),
118 Opcode, Res)
119 .addReg(IJIndex)
120 .addReg(ReadReg)
121 .addImm(0);
122
123 if (!(i> 1 && i < 6)) {
124 TII->addFlag(NewMI, 0, MO_FLAG_MASK);
125 }
126
127 if (i % 4 != 3)
128 TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
129 }
130
131 MI.eraseFromParent();
132
133 return true;
134 }
135
136 bool R600ExpandSpecialInstrsPass::ExpandInputConstant(MachineInstr &MI)
137 {
138 const R600RegisterInfo &TRI = TII->getRegisterInfo();
139 if (MI.getOpcode() != AMDGPU::input_constant)
140 return false;
141
142 MachineBasicBlock::iterator I = &MI;
143 unsigned DstReg = MI.getOperand(0).getReg();
144
145 for (unsigned i = 0; i < 4; i++) {
146 unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister(
147 4 * MI.getOperand(1).getImm() + i);
148
149 unsigned Sel;
150 switch (i % 4) {
151 case 0:Sel = AMDGPU::sel_x;break;
152 case 1:Sel = AMDGPU::sel_y;break;
153 case 2:Sel = AMDGPU::sel_z;break;
154 case 3:Sel = AMDGPU::sel_w;break;
155 default:break;
156 }
157
158 unsigned Res = TRI.getSubReg(DstReg, Sel);
159
160 MachineInstr *NewMI = BuildMI(*(MI.getParent()),
161 I, MI.getParent()->findDebugLoc(I),
162 TII->get(AMDGPU::INTERP_LOAD_P0), Res)
163 .addReg(ReadReg)
164 .addImm(0);
165
166 if (i % 4 != 3)
167 TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
168 }
169
170 MI.eraseFromParent();
171
172 return true;
173 }
174
175 bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
176
177 const R600RegisterInfo &TRI = TII->getRegisterInfo();
178
179 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
180 BB != BB_E; ++BB) {
181 MachineBasicBlock &MBB = *BB;
182 MachineBasicBlock::iterator I = MBB.begin();
183 while (I != MBB.end()) {
184 MachineInstr &MI = *I;
185 I = llvm::next(I);
186
187 if (ExpandInputPerspective(MI))
188 continue;
189 if (ExpandInputConstant(MI))
190 continue;
191
192 bool IsReduction = TII->isReductionOp(MI.getOpcode());
193 bool IsVector = TII->isVector(MI);
194 bool IsCube = TII->isCubeOp(MI.getOpcode());
195 if (!IsReduction && !IsVector && !IsCube) {
196 continue;
197 }
198
199 // Expand the instruction
200 //
201 // Reduction instructions:
202 // T0_X = DP4 T1_XYZW, T2_XYZW
203 // becomes:
204 // TO_X = DP4 T1_X, T2_X
205 // TO_Y (write masked) = DP4 T1_Y, T2_Y
206 // TO_Z (write masked) = DP4 T1_Z, T2_Z
207 // TO_W (write masked) = DP4 T1_W, T2_W
208 //
209 // Vector instructions:
210 // T0_X = MULLO_INT T1_X, T2_X
211 // becomes:
212 // T0_X = MULLO_INT T1_X, T2_X
213 // T0_Y (write masked) = MULLO_INT T1_X, T2_X
214 // T0_Z (write masked) = MULLO_INT T1_X, T2_X
215 // T0_W (write masked) = MULLO_INT T1_X, T2_X
216 //
217 // Cube instructions:
218 // T0_XYZW = CUBE T1_XYZW
219 // becomes:
220 // TO_X = CUBE T1_Z, T1_Y
221 // T0_Y = CUBE T1_Z, T1_X
222 // T0_Z = CUBE T1_X, T1_Z
223 // T0_W = CUBE T1_Y, T1_Z
224 for (unsigned Chan = 0; Chan < 4; Chan++) {
225 unsigned DstReg = MI.getOperand(0).getReg();
226 unsigned Src0 = MI.getOperand(1).getReg();
227 unsigned Src1 = 0;
228
229 // Determine the correct source registers
230 if (!IsCube) {
231 Src1 = MI.getOperand(2).getReg();
232 }
233 if (IsReduction) {
234 unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
235 Src0 = TRI.getSubReg(Src0, SubRegIndex);
236 Src1 = TRI.getSubReg(Src1, SubRegIndex);
237 } else if (IsCube) {
238 static const int CubeSrcSwz[] = {2, 2, 0, 1};
239 unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]);
240 unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
241 Src1 = TRI.getSubReg(Src0, SubRegIndex1);
242 Src0 = TRI.getSubReg(Src0, SubRegIndex0);
243 }
244
245 // Determine the correct destination registers;
246 unsigned Flags = 0;
247 if (IsCube) {
248 unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
249 DstReg = TRI.getSubReg(DstReg, SubRegIndex);
250 } else {
251 // Mask the write if the original instruction does not write to
252 // the current Channel.
253 Flags |= (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0);
254 unsigned DstBase = TRI.getHWRegIndex(DstReg);
255 DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
256 }
257
258 // Set the IsLast bit
259 Flags |= (Chan != 3 ? MO_FLAG_NOT_LAST : 0);
260
261 // Add the new instruction
262 unsigned Opcode;
263 if (IsCube) {
264 switch (MI.getOpcode()) {
265 case AMDGPU::CUBE_r600_pseudo:
266 Opcode = AMDGPU::CUBE_r600_real;
267 break;
268 case AMDGPU::CUBE_eg_pseudo:
269 Opcode = AMDGPU::CUBE_eg_real;
270 break;
271 default:
272 assert(!"Unknown CUBE instruction");
273 Opcode = 0;
274 break;
275 }
276 } else {
277 Opcode = MI.getOpcode();
278 }
279 MachineInstr *NewMI =
280 BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(Opcode), DstReg)
281 .addReg(Src0)
282 .addReg(Src1)
283 .addImm(0); // Flag
284
285 NewMI->setIsInsideBundle(Chan != 0);
286 TII->addFlag(NewMI, 0, Flags);
287 }
288 MI.eraseFromParent();
289 }
290 }
291 return false;
292 }