nvc0/ir: return 0 in imageLoad on incomplete textures
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_target_gm107.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 * 2014 Red Hat Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "codegen/nv50_ir_target_gm107.h"
25 #include "codegen/nv50_ir_lowering_gm107.h"
26
27 namespace nv50_ir {
28
29 Target *getTargetGM107(unsigned int chipset)
30 {
31 return new TargetGM107(chipset);
32 }
33
34 // BULTINS / LIBRARY FUNCTIONS:
35
36 // lazyness -> will just hardcode everything for the time being
37
38 #include "lib/gm107.asm.h"
39
40 void
41 TargetGM107::getBuiltinCode(const uint32_t **code, uint32_t *size) const
42 {
43 *code = (const uint32_t *)&gm107_builtin_code[0];
44 *size = sizeof(gm107_builtin_code);
45 }
46
47 uint32_t
48 TargetGM107::getBuiltinOffset(int builtin) const
49 {
50 assert(builtin < NVC0_BUILTIN_COUNT);
51 return gm107_builtin_offsets[builtin];
52 }
53
54 bool
55 TargetGM107::isOpSupported(operation op, DataType ty) const
56 {
57 switch (op) {
58 case OP_SAD:
59 case OP_POW:
60 case OP_DIV:
61 case OP_MOD:
62 return false;
63 case OP_SQRT:
64 if (ty == TYPE_F64)
65 return false;
66 return chipset >= NVISA_GM200_CHIPSET;
67 default:
68 break;
69 }
70
71 return true;
72 }
73
74 // Return true when an instruction supports the reuse flag. When supported, the
75 // hardware will use the operand reuse cache introduced since Maxwell, which
76 // should try to reduce bank conflicts by caching values for the subsequent
77 // instructions. Note that the next instructions have to use the same GPR id in
78 // the same operand slot.
79 bool
80 TargetGM107::isReuseSupported(const Instruction *insn) const
81 {
82 const OpClass cl = getOpClass(insn->op);
83
84 // TODO: double-check!
85 switch (cl) {
86 case OPCLASS_ARITH:
87 case OPCLASS_COMPARE:
88 case OPCLASS_LOGIC:
89 case OPCLASS_MOVE:
90 case OPCLASS_SHIFT:
91 return true;
92 case OPCLASS_BITFIELD:
93 if (insn->op == OP_INSBF || insn->op == OP_EXTBF)
94 return true;
95 break;
96 default:
97 break;
98 }
99 return false;
100 }
101
102 // Return true when an instruction requires to set up a barrier because it
103 // doesn't operate at a fixed latency. Variable latency instructions are memory
104 // operations, double precision operations, special function unit operations
105 // and other low throughput instructions.
106 bool
107 TargetGM107::isBarrierRequired(const Instruction *insn) const
108 {
109 const OpClass cl = getOpClass(insn->op);
110
111 if (insn->dType == TYPE_F64 || insn->sType == TYPE_F64)
112 return true;
113
114 switch (cl) {
115 case OPCLASS_ATOMIC:
116 case OPCLASS_LOAD:
117 case OPCLASS_STORE:
118 case OPCLASS_SURFACE:
119 case OPCLASS_TEXTURE:
120 return true;
121 case OPCLASS_SFU:
122 switch (insn->op) {
123 case OP_COS:
124 case OP_EX2:
125 case OP_LG2:
126 case OP_LINTERP:
127 case OP_PINTERP:
128 case OP_RCP:
129 case OP_RSQ:
130 case OP_SIN:
131 case OP_SQRT:
132 return true;
133 default:
134 break;
135 }
136 break;
137 case OPCLASS_BITFIELD:
138 switch (insn->op) {
139 case OP_BFIND:
140 case OP_POPCNT:
141 return true;
142 default:
143 break;
144 }
145 break;
146 case OPCLASS_CONTROL:
147 switch (insn->op) {
148 case OP_EMIT:
149 case OP_RESTART:
150 return true;
151 default:
152 break;
153 }
154 break;
155 case OPCLASS_OTHER:
156 switch (insn->op) {
157 case OP_AFETCH:
158 case OP_PFETCH:
159 case OP_PIXLD:
160 case OP_SHFL:
161 return true;
162 case OP_RDSV:
163 return !isCS2RSV(insn->getSrc(0)->reg.data.sv.sv);
164 default:
165 break;
166 }
167 break;
168 case OPCLASS_ARITH:
169 // TODO: IMUL/IMAD require barriers too, use of XMAD instead!
170 if ((insn->op == OP_MUL || insn->op == OP_MAD) &&
171 !isFloatType(insn->dType))
172 return true;
173 break;
174 case OPCLASS_CONVERT:
175 if (insn->def(0).getFile() != FILE_PREDICATE &&
176 insn->src(0).getFile() != FILE_PREDICATE)
177 return true;
178 break;
179 default:
180 break;
181 }
182 return false;
183 }
184
185 bool
186 TargetGM107::canDualIssue(const Instruction *a, const Instruction *b) const
187 {
188 // TODO
189 return false;
190 }
191
192 // Return the number of stall counts needed to complete a single instruction.
193 // On Maxwell GPUs, the pipeline depth is 6, but some instructions require
194 // different number of stall counts like memory operations.
195 int
196 TargetGM107::getLatency(const Instruction *insn) const
197 {
198 // TODO: better values! This should be good enough for now though.
199 switch (insn->op) {
200 case OP_EMIT:
201 case OP_EXPORT:
202 case OP_PIXLD:
203 case OP_RESTART:
204 case OP_STORE:
205 case OP_SUSTB:
206 case OP_SUSTP:
207 return 1;
208 case OP_SHFL:
209 return 2;
210 case OP_ADD:
211 case OP_AND:
212 case OP_EXTBF:
213 case OP_FMA:
214 case OP_INSBF:
215 case OP_MAD:
216 case OP_MAX:
217 case OP_MIN:
218 case OP_MOV:
219 case OP_MUL:
220 case OP_NOT:
221 case OP_OR:
222 case OP_PREEX2:
223 case OP_PRESIN:
224 case OP_QUADOP:
225 case OP_SELP:
226 case OP_SET:
227 case OP_SET_AND:
228 case OP_SET_OR:
229 case OP_SET_XOR:
230 case OP_SHL:
231 case OP_SHLADD:
232 case OP_SHR:
233 case OP_SLCT:
234 case OP_SUB:
235 case OP_VOTE:
236 case OP_XOR:
237 if (insn->dType != TYPE_F64)
238 return 6;
239 break;
240 case OP_RDSV:
241 return isCS2RSV(insn->getSrc(0)->reg.data.sv.sv) ? 6 : 15;
242 case OP_ABS:
243 case OP_CEIL:
244 case OP_CVT:
245 case OP_FLOOR:
246 case OP_NEG:
247 case OP_SAT:
248 case OP_TRUNC:
249 if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
250 insn->src(0).getFile() == FILE_PREDICATE))
251 return 6;
252 break;
253 case OP_BFIND:
254 case OP_COS:
255 case OP_EX2:
256 case OP_LG2:
257 case OP_POPCNT:
258 case OP_QUADON:
259 case OP_QUADPOP:
260 case OP_RCP:
261 case OP_RSQ:
262 case OP_SIN:
263 case OP_SQRT:
264 return 13;
265 default:
266 break;
267 }
268 // Use the maximum number of stall counts for other instructions.
269 return 15;
270 }
271
272 // Return the operand read latency which is the number of stall counts before
273 // an instruction can read its sources. For memory operations like ATOM, LOAD
274 // and STORE, the memory access has to be indirect.
275 int
276 TargetGM107::getReadLatency(const Instruction *insn) const
277 {
278 switch (insn->op) {
279 case OP_ABS:
280 case OP_BFIND:
281 case OP_CEIL:
282 case OP_COS:
283 case OP_EX2:
284 case OP_FLOOR:
285 case OP_LG2:
286 case OP_NEG:
287 case OP_POPCNT:
288 case OP_RCP:
289 case OP_RSQ:
290 case OP_SAT:
291 case OP_SIN:
292 case OP_SQRT:
293 case OP_SULDB:
294 case OP_SULDP:
295 case OP_SUREDB:
296 case OP_SUREDP:
297 case OP_SUSTB:
298 case OP_SUSTP:
299 case OP_TRUNC:
300 return 4;
301 case OP_CVT:
302 if (insn->def(0).getFile() != FILE_PREDICATE &&
303 insn->src(0).getFile() != FILE_PREDICATE)
304 return 4;
305 break;
306 case OP_ATOM:
307 case OP_LOAD:
308 case OP_STORE:
309 if (insn->src(0).isIndirect(0)) {
310 switch (insn->src(0).getFile()) {
311 case FILE_MEMORY_SHARED:
312 case FILE_MEMORY_CONST:
313 return 2;
314 case FILE_MEMORY_GLOBAL:
315 case FILE_MEMORY_LOCAL:
316 return 4;
317 default:
318 break;
319 }
320 }
321 break;
322 case OP_EXPORT:
323 case OP_PFETCH:
324 case OP_SHFL:
325 case OP_VFETCH:
326 return 2;
327 default:
328 break;
329 }
330 return 0;
331 }
332
333 bool
334 TargetGM107::isCS2RSV(SVSemantic sv) const
335 {
336 return sv == SV_CLOCK;
337 }
338
339 bool
340 TargetGM107::runLegalizePass(Program *prog, CGStage stage) const
341 {
342 if (stage == CG_STAGE_PRE_SSA) {
343 GM107LoweringPass pass(prog);
344 return pass.run(prog, false, true);
345 } else
346 if (stage == CG_STAGE_POST_RA) {
347 NVC0LegalizePostRA pass(prog);
348 return pass.run(prog, false, true);
349 } else
350 if (stage == CG_STAGE_SSA) {
351 GM107LegalizeSSA pass;
352 return pass.run(prog, false, true);
353 }
354 return false;
355 }
356
357 CodeEmitter *
358 TargetGM107::getCodeEmitter(Program::Type type)
359 {
360 return createCodeEmitterGM107(type);
361 }
362
363 } // namespace nv50_ir