nv50/ir: only unspill once ahead of a group of instructions
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_lowering_gm107.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 * 2014 Red Hat Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "codegen/nv50_ir.h"
25 #include "codegen/nv50_ir_build_util.h"
26
27 #include "codegen/nv50_ir_target_nvc0.h"
28 #include "codegen/nv50_ir_lowering_gm107.h"
29
30 #include <limits>
31
32 namespace nv50_ir {
33
34 #define QOP_ADD 0
35 #define QOP_SUBR 1
36 #define QOP_SUB 2
37 #define QOP_MOV2 3
38
39 // UL UR LL LR
40 #define QUADOP(q, r, s, t) \
41 ((QOP_##q << 6) | (QOP_##r << 4) | \
42 (QOP_##s << 2) | (QOP_##t << 0))
43
44 bool
45 GM107LoweringPass::handleManualTXD(TexInstruction *i)
46 {
47 static const uint8_t qOps[4][2] =
48 {
49 { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) }, // l0
50 { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD, ADD) }, // l1
51 { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2
52 { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3
53 };
54 Value *def[4][4];
55 Value *crd[3];
56 Value *tmp;
57 Instruction *tex, *add;
58 Value *zero = bld.loadImm(bld.getSSA(), 0);
59 int l, c;
60 const int dim = i->tex.target.getDim();
61 const int array = i->tex.target.isArray();
62
63 i->op = OP_TEX; // no need to clone dPdx/dPdy later
64
65 for (c = 0; c < dim; ++c)
66 crd[c] = bld.getScratch();
67 tmp = bld.getScratch();
68
69 for (l = 0; l < 4; ++l) {
70 // mov coordinates from lane l to all lanes
71 bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
72 for (c = 0; c < dim; ++c) {
73 bld.mkOp2(OP_SHFL, TYPE_F32, crd[c], i->getSrc(c + array), bld.mkImm(l));
74 add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], crd[c], zero);
75 add->subOp = 0x00;
76 add->lanes = 1; /* abused for .ndv */
77 }
78
79 // add dPdx from lane l to lanes dx
80 for (c = 0; c < dim; ++c) {
81 bld.mkOp2(OP_SHFL, TYPE_F32, tmp, i->dPdx[c].get(), bld.mkImm(l));
82 add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]);
83 add->subOp = qOps[l][0];
84 add->lanes = 1; /* abused for .ndv */
85 }
86
87 // add dPdy from lane l to lanes dy
88 for (c = 0; c < dim; ++c) {
89 bld.mkOp2(OP_SHFL, TYPE_F32, tmp, i->dPdy[c].get(), bld.mkImm(l));
90 add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]);
91 add->subOp = qOps[l][1];
92 add->lanes = 1; /* abused for .ndv */
93 }
94
95 // texture
96 bld.insert(tex = cloneForward(func, i));
97 for (c = 0; c < dim; ++c)
98 tex->setSrc(c + array, crd[c]);
99 bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL);
100
101 // save results
102 for (c = 0; i->defExists(c); ++c) {
103 Instruction *mov;
104 def[c][l] = bld.getSSA();
105 mov = bld.mkMov(def[c][l], tex->getDef(c));
106 mov->fixed = 1;
107 mov->lanes = 1 << l;
108 }
109 }
110
111 for (c = 0; i->defExists(c); ++c) {
112 Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c));
113 for (l = 0; l < 4; ++l)
114 u->setSrc(l, def[c][l]);
115 }
116
117 i->bb->remove(i);
118 return true;
119 }
120
121 bool
122 GM107LoweringPass::handleDFDX(Instruction *insn)
123 {
124 Instruction *shfl;
125 int qop = 0, xid = 0;
126
127 switch (insn->op) {
128 case OP_DFDX:
129 qop = QUADOP(SUB, SUBR, SUB, SUBR);
130 xid = 1;
131 break;
132 case OP_DFDY:
133 qop = QUADOP(SUB, SUB, SUBR, SUBR);
134 xid = 2;
135 break;
136 default:
137 assert(!"invalid dfdx opcode");
138 break;
139 }
140
141 shfl = bld.mkOp2(OP_SHFL, TYPE_F32, bld.getScratch(),
142 insn->getSrc(0), bld.mkImm(xid));
143 shfl->subOp = NV50_IR_SUBOP_SHFL_BFLY;
144 insn->op = OP_QUADOP;
145 insn->subOp = qop;
146 insn->lanes = 0; /* abused for !.ndv */
147 insn->setSrc(1, insn->getSrc(0));
148 insn->setSrc(0, shfl->getDef(0));
149 return true;
150 }
151
152 bool
153 GM107LoweringPass::handlePFETCH(Instruction *i)
154 {
155 Value *tmp0 = bld.getScratch();
156 Value *tmp1 = bld.getScratch();
157 Value *tmp2 = bld.getScratch();
158 bld.mkOp1(OP_RDSV, TYPE_U32, tmp0, bld.mkSysVal(SV_INVOCATION_INFO, 0));
159 bld.mkOp2(OP_SHR , TYPE_U32, tmp1, tmp0, bld.mkImm(16));
160 bld.mkOp2(OP_AND , TYPE_U32, tmp0, tmp0, bld.mkImm(0xff));
161 bld.mkOp2(OP_AND , TYPE_U32, tmp1, tmp1, bld.mkImm(0xff));
162 if (i->getSrc(1))
163 bld.mkOp2(OP_ADD , TYPE_U32, tmp2, i->getSrc(0), i->getSrc(1));
164 else
165 bld.mkOp1(OP_MOV , TYPE_U32, tmp2, i->getSrc(0));
166 bld.mkOp3(OP_MAD , TYPE_U32, tmp0, tmp0, tmp1, tmp2);
167 i->setSrc(0, tmp0);
168 i->setSrc(1, NULL);
169 return true;
170 }
171
172 bool
173 GM107LoweringPass::handlePOPCNT(Instruction *i)
174 {
175 Value *tmp = bld.mkOp2v(OP_AND, i->sType, bld.getScratch(),
176 i->getSrc(0), i->getSrc(1));
177 i->setSrc(0, tmp);
178 i->setSrc(1, NULL);
179 return true;
180 }
181
182 //
183 // - add quadop dance for texturing
184 // - put FP outputs in GPRs
185 // - convert instruction sequences
186 //
187 bool
188 GM107LoweringPass::visit(Instruction *i)
189 {
190 bld.setPosition(i, false);
191
192 if (i->cc != CC_ALWAYS)
193 checkPredicate(i);
194
195 switch (i->op) {
196 case OP_PFETCH:
197 return handlePFETCH(i);
198 case OP_DFDX:
199 case OP_DFDY:
200 return handleDFDX(i);
201 case OP_POPCNT:
202 return handlePOPCNT(i);
203 default:
204 return NVC0LoweringPass::visit(i);
205 }
206 }
207
208 } // namespace nv50_ir