nvc0: add maxwell (sm50) compiler backend
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_lowering_gm107.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 * 2014 Red Hat Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "codegen/nv50_ir.h"
25 #include "codegen/nv50_ir_build_util.h"
26
27 #include "codegen/nv50_ir_target_nvc0.h"
28 #include "codegen/nv50_ir_lowering_gm107.h"
29
30 #include <limits>
31
32 namespace nv50_ir {
33
34 #define QOP_ADD 0
35 #define QOP_SUBR 1
36 #define QOP_SUB 2
37 #define QOP_MOV2 3
38
39 // UL UR LL LR
40 #define QUADOP(q, r, s, t) \
41 ((QOP_##q << 6) | (QOP_##r << 4) | \
42 (QOP_##s << 2) | (QOP_##t << 0))
43
44 bool
45 GM107LoweringPass::handleManualTXD(TexInstruction *i)
46 {
47 static const uint8_t qOps[4][2] =
48 {
49 { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) }, // l0
50 { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD, ADD) }, // l1
51 { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2
52 { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3
53 };
54 Value *def[4][4];
55 Value *crd[3];
56 Value *tmp;
57 Instruction *tex, *add;
58 Value *zero = bld.loadImm(bld.getSSA(), 0);
59 int l, c;
60 const int dim = i->tex.target.getDim();
61
62 i->op = OP_TEX; // no need to clone dPdx/dPdy later
63
64 for (c = 0; c < dim; ++c)
65 crd[c] = bld.getScratch();
66 tmp = bld.getScratch();
67
68 for (l = 0; l < 4; ++l) {
69 // mov coordinates from lane l to all lanes
70 bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
71 for (c = 0; c < dim; ++c) {
72 bld.mkOp2(OP_SHFL, TYPE_F32, crd[c], i->getSrc(c), bld.mkImm(l));
73 add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], crd[c], zero);
74 add->subOp = 0x00;
75 add->lanes = 1; /* abused for .ndv */
76 }
77
78 // add dPdx from lane l to lanes dx
79 for (c = 0; c < dim; ++c) {
80 bld.mkOp2(OP_SHFL, TYPE_F32, tmp, i->dPdx[c].get(), bld.mkImm(l));
81 add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]);
82 add->subOp = qOps[l][0];
83 add->lanes = 1; /* abused for .ndv */
84 }
85
86 // add dPdy from lane l to lanes dy
87 for (c = 0; c < dim; ++c) {
88 bld.mkOp2(OP_SHFL, TYPE_F32, tmp, i->dPdy[c].get(), bld.mkImm(l));
89 add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]);
90 add->subOp = qOps[l][1];
91 add->lanes = 1; /* abused for .ndv */
92 }
93
94 // texture
95 bld.insert(tex = cloneForward(func, i));
96 for (c = 0; c < dim; ++c)
97 tex->setSrc(c, crd[c]);
98 bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL);
99
100 // save results
101 for (c = 0; i->defExists(c); ++c) {
102 Instruction *mov;
103 def[c][l] = bld.getSSA();
104 mov = bld.mkMov(def[c][l], tex->getDef(c));
105 mov->fixed = 1;
106 mov->lanes = 1 << l;
107 }
108 }
109
110 for (c = 0; i->defExists(c); ++c) {
111 Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c));
112 for (l = 0; l < 4; ++l)
113 u->setSrc(l, def[c][l]);
114 }
115
116 i->bb->remove(i);
117 return true;
118 }
119
120 bool
121 GM107LoweringPass::handleDFDX(Instruction *insn)
122 {
123 Instruction *shfl;
124 int qop = 0, xid = 0;
125
126 switch (insn->op) {
127 case OP_DFDX:
128 qop = QUADOP(SUB, SUBR, SUB, SUBR);
129 xid = 1;
130 break;
131 case OP_DFDY:
132 qop = QUADOP(SUB, SUB, SUBR, SUBR);
133 xid = 2;
134 break;
135 default:
136 assert(!"invalid dfdx opcode");
137 break;
138 }
139
140 shfl = bld.mkOp2(OP_SHFL, TYPE_F32, bld.getScratch(),
141 insn->getSrc(0), bld.mkImm(xid));
142 shfl->subOp = NV50_IR_SUBOP_SHFL_BFLY;
143 insn->op = OP_QUADOP;
144 insn->subOp = qop;
145 insn->lanes = 0; /* abused for !.ndv */
146 insn->setSrc(1, insn->getSrc(0));
147 insn->setSrc(0, shfl->getDef(0));
148 return true;
149 }
150
151 bool
152 GM107LoweringPass::handlePFETCH(Instruction *i)
153 {
154 Value *tmp0 = bld.getScratch();
155 Value *tmp1 = bld.getScratch();
156 Value *tmp2 = bld.getScratch();
157 bld.mkOp1(OP_RDSV, TYPE_U32, tmp0, bld.mkSysVal(SV_INVOCATION_INFO, 0));
158 bld.mkOp2(OP_SHR , TYPE_U32, tmp1, tmp0, bld.mkImm(16));
159 bld.mkOp2(OP_AND , TYPE_U32, tmp0, tmp0, bld.mkImm(0xff));
160 bld.mkOp2(OP_AND , TYPE_U32, tmp1, tmp1, bld.mkImm(0xff));
161 bld.mkOp1(OP_MOV , TYPE_U32, tmp2, bld.mkImm(i->getSrc(0)->reg.data.u32));
162 bld.mkOp3(OP_MAD , TYPE_U32, tmp0, tmp0, tmp1, tmp2);
163 i->setSrc(0, tmp0);
164 i->setSrc(1, NULL);
165 return true;
166 }
167
168 bool
169 GM107LoweringPass::handlePOPCNT(Instruction *i)
170 {
171 Value *tmp = bld.mkOp2v(OP_AND, i->sType, bld.getScratch(),
172 i->getSrc(0), i->getSrc(1));
173 i->setSrc(0, tmp);
174 i->setSrc(1, NULL);
175 return TRUE;
176 }
177
178 //
179 // - add quadop dance for texturing
180 // - put FP outputs in GPRs
181 // - convert instruction sequences
182 //
183 bool
184 GM107LoweringPass::visit(Instruction *i)
185 {
186 bld.setPosition(i, false);
187
188 if (i->cc != CC_ALWAYS)
189 checkPredicate(i);
190
191 switch (i->op) {
192 case OP_TEX:
193 case OP_TXB:
194 case OP_TXL:
195 case OP_TXF:
196 case OP_TXG:
197 return handleTEX(i->asTex());
198 case OP_TXD:
199 return handleTXD(i->asTex());
200 case OP_TXLQ:
201 return handleTXLQ(i->asTex());
202 case OP_TXQ:
203 return handleTXQ(i->asTex());
204 case OP_EX2:
205 bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0));
206 i->setSrc(0, i->getDef(0));
207 break;
208 case OP_POW:
209 return handlePOW(i);
210 case OP_DIV:
211 return handleDIV(i);
212 case OP_MOD:
213 return handleMOD(i);
214 case OP_SQRT:
215 return handleSQRT(i);
216 case OP_EXPORT:
217 return handleEXPORT(i);
218 case OP_PFETCH:
219 return handlePFETCH(i);
220 case OP_EMIT:
221 case OP_RESTART:
222 return handleOUT(i);
223 case OP_RDSV:
224 return handleRDSV(i);
225 case OP_WRSV:
226 return handleWRSV(i);
227 case OP_LOAD:
228 if (i->src(0).getFile() == FILE_SHADER_INPUT) {
229 if (prog->getType() == Program::TYPE_COMPUTE) {
230 i->getSrc(0)->reg.file = FILE_MEMORY_CONST;
231 i->getSrc(0)->reg.fileIndex = 0;
232 } else
233 if (prog->getType() == Program::TYPE_GEOMETRY &&
234 i->src(0).isIndirect(0)) {
235 // XXX: this assumes vec4 units
236 Value *ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
237 i->getIndirect(0, 0), bld.mkImm(4));
238 i->setIndirect(0, 0, ptr);
239 } else {
240 i->op = OP_VFETCH;
241 assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP
242 }
243 }
244 break;
245 case OP_ATOM:
246 {
247 const bool cctl = i->src(0).getFile() == FILE_MEMORY_GLOBAL;
248 handleATOM(i);
249 handleCasExch(i, cctl);
250 }
251 break;
252 case OP_SULDB:
253 case OP_SULDP:
254 case OP_SUSTB:
255 case OP_SUSTP:
256 case OP_SUREDB:
257 case OP_SUREDP:
258 handleSurfaceOpNVE4(i->asTex());
259 break;
260 case OP_DFDX:
261 case OP_DFDY:
262 handleDFDX(i);
263 break;
264 case OP_POPCNT:
265 handlePOPCNT(i);
266 break;
267 default:
268 break;
269 }
270 return true;
271 }
272
273 } // namespace nv50_ir