gm107/ir: use CS2R for SV_CLOCK
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_target_gm107.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 * 2014 Red Hat Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "codegen/nv50_ir_target_gm107.h"
25 #include "codegen/nv50_ir_lowering_gm107.h"
26
27 namespace nv50_ir {
28
29 Target *getTargetGM107(unsigned int chipset)
30 {
31 return new TargetGM107(chipset);
32 }
33
34 // BULTINS / LIBRARY FUNCTIONS:
35
36 // lazyness -> will just hardcode everything for the time being
37
38 #include "lib/gm107.asm.h"
39
40 void
41 TargetGM107::getBuiltinCode(const uint32_t **code, uint32_t *size) const
42 {
43 *code = (const uint32_t *)&gm107_builtin_code[0];
44 *size = sizeof(gm107_builtin_code);
45 }
46
47 uint32_t
48 TargetGM107::getBuiltinOffset(int builtin) const
49 {
50 assert(builtin < NVC0_BUILTIN_COUNT);
51 return gm107_builtin_offsets[builtin];
52 }
53
54 bool
55 TargetGM107::isOpSupported(operation op, DataType ty) const
56 {
57 switch (op) {
58 case OP_SAD:
59 case OP_POW:
60 case OP_SQRT:
61 case OP_DIV:
62 case OP_MOD:
63 return false;
64 default:
65 break;
66 }
67
68 return true;
69 }
70
71 // Return true when an instruction supports the reuse flag. When supported, the
72 // hardware will use the operand reuse cache introduced since Maxwell, which
73 // should try to reduce bank conflicts by caching values for the subsequent
74 // instructions. Note that the next instructions have to use the same GPR id in
75 // the same operand slot.
76 bool
77 TargetGM107::isReuseSupported(const Instruction *insn) const
78 {
79 const OpClass cl = getOpClass(insn->op);
80
81 // TODO: double-check!
82 switch (cl) {
83 case OPCLASS_ARITH:
84 case OPCLASS_COMPARE:
85 case OPCLASS_LOGIC:
86 case OPCLASS_MOVE:
87 case OPCLASS_SHIFT:
88 return true;
89 case OPCLASS_BITFIELD:
90 if (insn->op == OP_INSBF || insn->op == OP_EXTBF)
91 return true;
92 break;
93 default:
94 break;
95 }
96 return false;
97 }
98
99 // Return true when an instruction requires to set up a barrier because it
100 // doesn't operate at a fixed latency. Variable latency instructions are memory
101 // operations, double precision operations, special function unit operations
102 // and other low throughput instructions.
103 bool
104 TargetGM107::isBarrierRequired(const Instruction *insn) const
105 {
106 const OpClass cl = getOpClass(insn->op);
107
108 if (insn->dType == TYPE_F64 || insn->sType == TYPE_F64)
109 return true;
110
111 switch (cl) {
112 case OPCLASS_ATOMIC:
113 case OPCLASS_LOAD:
114 case OPCLASS_STORE:
115 case OPCLASS_SURFACE:
116 case OPCLASS_TEXTURE:
117 return true;
118 case OPCLASS_SFU:
119 switch (insn->op) {
120 case OP_COS:
121 case OP_EX2:
122 case OP_LG2:
123 case OP_LINTERP:
124 case OP_PINTERP:
125 case OP_RCP:
126 case OP_RSQ:
127 case OP_SIN:
128 return true;
129 default:
130 break;
131 }
132 break;
133 case OPCLASS_BITFIELD:
134 switch (insn->op) {
135 case OP_BFIND:
136 case OP_POPCNT:
137 return true;
138 default:
139 break;
140 }
141 break;
142 case OPCLASS_CONTROL:
143 switch (insn->op) {
144 case OP_EMIT:
145 case OP_RESTART:
146 return true;
147 default:
148 break;
149 }
150 break;
151 case OPCLASS_OTHER:
152 switch (insn->op) {
153 case OP_AFETCH:
154 case OP_PFETCH:
155 case OP_PIXLD:
156 case OP_SHFL:
157 return true;
158 case OP_RDSV:
159 return !isCS2RSV(insn->getSrc(0)->reg.data.sv.sv);
160 default:
161 break;
162 }
163 break;
164 case OPCLASS_ARITH:
165 // TODO: IMUL/IMAD require barriers too, use of XMAD instead!
166 if ((insn->op == OP_MUL || insn->op == OP_MAD) &&
167 !isFloatType(insn->dType))
168 return true;
169 break;
170 case OPCLASS_CONVERT:
171 if (insn->def(0).getFile() != FILE_PREDICATE &&
172 insn->src(0).getFile() != FILE_PREDICATE)
173 return true;
174 break;
175 default:
176 break;
177 }
178 return false;
179 }
180
181 bool
182 TargetGM107::canDualIssue(const Instruction *a, const Instruction *b) const
183 {
184 // TODO
185 return false;
186 }
187
188 // Return the number of stall counts needed to complete a single instruction.
189 // On Maxwell GPUs, the pipeline depth is 6, but some instructions require
190 // different number of stall counts like memory operations.
191 int
192 TargetGM107::getLatency(const Instruction *insn) const
193 {
194 // TODO: better values! This should be good enough for now though.
195 switch (insn->op) {
196 case OP_EMIT:
197 case OP_EXPORT:
198 case OP_PIXLD:
199 case OP_RESTART:
200 case OP_STORE:
201 case OP_SUSTB:
202 case OP_SUSTP:
203 return 1;
204 case OP_SHFL:
205 return 2;
206 case OP_ADD:
207 case OP_AND:
208 case OP_EXTBF:
209 case OP_FMA:
210 case OP_INSBF:
211 case OP_MAD:
212 case OP_MAX:
213 case OP_MIN:
214 case OP_MOV:
215 case OP_MUL:
216 case OP_NOT:
217 case OP_OR:
218 case OP_PREEX2:
219 case OP_PRESIN:
220 case OP_QUADOP:
221 case OP_SELP:
222 case OP_SET:
223 case OP_SET_AND:
224 case OP_SET_OR:
225 case OP_SET_XOR:
226 case OP_SHL:
227 case OP_SHLADD:
228 case OP_SHR:
229 case OP_SLCT:
230 case OP_SUB:
231 case OP_VOTE:
232 case OP_XOR:
233 if (insn->dType != TYPE_F64)
234 return 6;
235 break;
236 case OP_RDSV:
237 return isCS2RSV(insn->getSrc(0)->reg.data.sv.sv) ? 6 : 15;
238 case OP_ABS:
239 case OP_CEIL:
240 case OP_CVT:
241 case OP_FLOOR:
242 case OP_NEG:
243 case OP_SAT:
244 case OP_TRUNC:
245 if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
246 insn->src(0).getFile() == FILE_PREDICATE))
247 return 6;
248 break;
249 case OP_BFIND:
250 case OP_COS:
251 case OP_EX2:
252 case OP_LG2:
253 case OP_POPCNT:
254 case OP_QUADON:
255 case OP_QUADPOP:
256 case OP_RCP:
257 case OP_RSQ:
258 case OP_SIN:
259 return 13;
260 default:
261 break;
262 }
263 // Use the maximum number of stall counts for other instructions.
264 return 15;
265 }
266
267 // Return the operand read latency which is the number of stall counts before
268 // an instruction can read its sources. For memory operations like ATOM, LOAD
269 // and STORE, the memory access has to be indirect.
270 int
271 TargetGM107::getReadLatency(const Instruction *insn) const
272 {
273 switch (insn->op) {
274 case OP_ABS:
275 case OP_BFIND:
276 case OP_CEIL:
277 case OP_COS:
278 case OP_EX2:
279 case OP_FLOOR:
280 case OP_LG2:
281 case OP_NEG:
282 case OP_POPCNT:
283 case OP_RCP:
284 case OP_RSQ:
285 case OP_SAT:
286 case OP_SIN:
287 case OP_SULDB:
288 case OP_SULDP:
289 case OP_SUREDB:
290 case OP_SUREDP:
291 case OP_SUSTB:
292 case OP_SUSTP:
293 case OP_TRUNC:
294 return 4;
295 case OP_CVT:
296 if (insn->def(0).getFile() != FILE_PREDICATE &&
297 insn->src(0).getFile() != FILE_PREDICATE)
298 return 4;
299 break;
300 case OP_ATOM:
301 case OP_LOAD:
302 case OP_STORE:
303 if (insn->src(0).isIndirect(0)) {
304 switch (insn->src(0).getFile()) {
305 case FILE_MEMORY_SHARED:
306 case FILE_MEMORY_CONST:
307 return 2;
308 case FILE_MEMORY_GLOBAL:
309 case FILE_MEMORY_LOCAL:
310 return 4;
311 default:
312 break;
313 }
314 }
315 break;
316 case OP_EXPORT:
317 case OP_PFETCH:
318 case OP_SHFL:
319 case OP_VFETCH:
320 return 2;
321 default:
322 break;
323 }
324 return 0;
325 }
326
327 bool
328 TargetGM107::isCS2RSV(SVSemantic sv) const
329 {
330 return sv == SV_CLOCK;
331 }
332
333 bool
334 TargetGM107::runLegalizePass(Program *prog, CGStage stage) const
335 {
336 if (stage == CG_STAGE_PRE_SSA) {
337 GM107LoweringPass pass(prog);
338 return pass.run(prog, false, true);
339 } else
340 if (stage == CG_STAGE_POST_RA) {
341 NVC0LegalizePostRA pass(prog);
342 return pass.run(prog, false, true);
343 } else
344 if (stage == CG_STAGE_SSA) {
345 GM107LegalizeSSA pass;
346 return pass.run(prog, false, true);
347 }
348 return false;
349 }
350
351 CodeEmitter *
352 TargetGM107::getCodeEmitter(Program::Type type)
353 {
354 return createCodeEmitterGM107(type);
355 }
356
357 } // namespace nv50_ir