nv50/ir: add missing license headers
[mesa.git] / src / gallium / drivers / nvc0 / codegen / nv50_ir_target_nvc0.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "nv50_ir_target_nvc0.h"
24
25 namespace nv50_ir {
26
27 Target *getTargetNVC0(unsigned int chipset)
28 {
29 return new TargetNVC0(chipset);
30 }
31
32 TargetNVC0::TargetNVC0(unsigned int card)
33 {
34 chipset = card;
35 initOpInfo();
36 }
37
38 // BULTINS / LIBRARY FUNCTIONS:
39
40 // lazyness -> will just hardcode everything for the time being
41
42 // Will probably make this nicer once we support subroutines properly,
43 // i.e. when we have an input IR that provides function declarations.
44
45 static const uint32_t nvc0_builtin_code[] =
46 {
47 // DIV U32: slow unsigned integer division
48 //
49 // UNR recurrence (q = a / b):
50 // look for z such that 2^32 - b <= b * z < 2^32
51 // then q - 1 <= (a * z) / 2^32 <= q
52 //
53 // INPUT: $r0: dividend, $r1: divisor
54 // OUTPUT: $r0: result, $r1: modulus
55 // CLOBBER: $r2 - $r3, $p0 - $p1
56 // SIZE: 22 / 14 * 8 bytes
57 //
58 #if 1
59 0x04009c03, 0x78000000,
60 0x7c209cdd,
61 0x0010dd18,
62 0x08309c03, 0x60000000,
63 0x05605c18,
64 0x0810dc2a,
65 0x0c209c43, 0x20040000,
66 0x0810dc03, 0x50000000,
67 0x0c209c43, 0x20040000,
68 0x0810dc03, 0x50000000,
69 0x0c209c43, 0x20040000,
70 0x0810dc03, 0x50000000,
71 0x0c209c43, 0x20040000,
72 0x0810dc03, 0x50000000,
73 0x0c209c43, 0x20040000,
74 0x0000dde4, 0x28000000,
75 0x08001c43, 0x50000000,
76 0x05609c18,
77 0x0010430d,
78 0x0811dc03, 0x1b0e0000,
79 0x08104103, 0x48000000,
80 0x04000002, 0x08000000,
81 0x0811c003, 0x1b0e0000,
82 0x08104103, 0x48000000,
83 0x040000ac,
84 0x90001dff,
85 #else
86 0x0401dc03, 0x1b0e0000,
87 0x00008003, 0x78000000,
88 0x0400c003, 0x78000000,
89 0x0c20c103, 0x48000000,
90 0x0c108003, 0x60000000,
91 0x00005c28,
92 0x00001d18,
93 0x0031c023, 0x1b0ec000,
94 0xb000a1e7, 0x40000000,
95 0x04000003, 0x6000c000,
96 0x0813dc03, 0x1b000000,
97 0x0420446c,
98 0x040004bd,
99 0x04208003, 0x5800c000,
100 0x0430c103, 0x4800c000,
101 0x0ffc5dff,
102 0x90001dff,
103 #endif
104
105 // DIV S32: slow signed integer division
106 //
107 // INPUT: $r0: dividend, $r1: divisor
108 // OUTPUT: $r0: result, $r1: modulus
109 // CLOBBER: $r2 - $r3, $p0 - $p3
110 // SIZE: 18 * 8 bytes
111 //
112 0xfc05dc23, 0x188e0000,
113 0xfc17dc23, 0x18c40000,
114 0x03301e18,
115 0x07305e18,
116 0x0401dc03, 0x1b0e0000,
117 0x00008003, 0x78000000,
118 0x0400c003, 0x78000000,
119 0x0c20c103, 0x48000000,
120 0x0c108003, 0x60000000,
121 0x00005c28,
122 0x00001d18,
123 0x0031c023, 0x1b0ec000,
124 0xb000a1e7, 0x40000000,
125 0x04000003, 0x6000c000,
126 0x0813dc03, 0x1b000000,
127 0x0420446c,
128 0x040004bd,
129 0x04208003, 0x5800c000,
130 0x0430c103, 0x4800c000,
131 0x0ffc5dff,
132 0x01700e18,
133 0x05704a18,
134 0x90001dff,
135
136 // RCP F64: Newton Raphson reciprocal(x): r_{i+1} = r_i * (2.0 - x * r_i)
137 //
138 // INPUT: $r0d (x)
139 // OUTPUT: $r0d (rcp(x))
140 // CLOBBER: $r2 - $r7
141 // SIZE: 9 * 8 bytes
142 //
143 0x9810dc08,
144 0x00009c28,
145 0x4001df18,
146 0x00019d18,
147 0x08011e01, 0x200c0000,
148 0x10209c01, 0x50000000,
149 0x08011e01, 0x200c0000,
150 0x10209c01, 0x50000000,
151 0x08011e01, 0x200c0000,
152 0x10201c01, 0x50000000,
153 0x00001de7, 0x90000000,
154
155 // RSQ F64: Newton Raphson rsqrt(x): r_{i+1} = r_i * (1.5 - 0.5 * x * r_i * r_i)
156 //
157 // INPUT: $r0d (x)
158 // OUTPUT: $r0d (rsqrt(x))
159 // CLOBBER: $r2 - $r7
160 // SIZE: 14 * 8 bytes
161 //
162 0x9c10dc08,
163 0x00009c28,
164 0x00019d18,
165 0x3fe1df18,
166 0x18001c01, 0x50000000,
167 0x0001dde2, 0x18ffe000,
168 0x08211c01, 0x50000000,
169 0x10011e01, 0x200c0000,
170 0x10209c01, 0x50000000,
171 0x08211c01, 0x50000000,
172 0x10011e01, 0x200c0000,
173 0x10209c01, 0x50000000,
174 0x08211c01, 0x50000000,
175 0x10011e01, 0x200c0000,
176 0x10201c01, 0x50000000,
177 0x00001de7, 0x90000000,
178 };
179
180 static const uint16_t nvc0_builtin_offsets[NVC0_BUILTIN_COUNT] =
181 {
182 0,
183 8 * (22),
184 8 * (22 + 18),
185 8 * (22 + 18 + 9)
186 };
187
188 void
189 TargetNVC0::getBuiltinCode(const uint32_t **code, uint32_t *size) const
190 {
191 *code = &nvc0_builtin_code[0];
192 *size = sizeof(nvc0_builtin_code);
193 }
194
195 uint32_t
196 TargetNVC0::getBuiltinOffset(int builtin) const
197 {
198 assert(builtin < NVC0_BUILTIN_COUNT);
199 return nvc0_builtin_offsets[builtin];
200 }
201
202 struct opProperties
203 {
204 operation op;
205 unsigned int mNeg : 4;
206 unsigned int mAbs : 4;
207 unsigned int mNot : 4;
208 unsigned int mSat : 4;
209 unsigned int fConst : 3;
210 unsigned int fImmd : 4; // last bit indicates if full immediate is suppoted
211 };
212
213 static const struct opProperties _initProps[] =
214 {
215 // neg abs not sat c[] imm
216 { OP_ADD, 0x3, 0x3, 0x0, 0x8, 0x2, 0x2 | 0x8 },
217 { OP_SUB, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 | 0x8 },
218 { OP_MUL, 0x3, 0x0, 0x0, 0x8, 0x2, 0x2 | 0x8 },
219 { OP_MAX, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
220 { OP_MIN, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
221 { OP_MAD, 0x7, 0x0, 0x0, 0x8, 0x6, 0x2 | 0x8 }, // special c[] constraint
222 { OP_ABS, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0 },
223 { OP_NEG, 0x0, 0x1, 0x0, 0x0, 0x1, 0x0 },
224 { OP_CVT, 0x1, 0x1, 0x0, 0x8, 0x1, 0x0 },
225 { OP_AND, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 },
226 { OP_OR, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 },
227 { OP_XOR, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 },
228 { OP_SHL, 0x0, 0x0, 0x0, 0x0, 0x2, 0x2 },
229 { OP_SHR, 0x0, 0x0, 0x0, 0x0, 0x2, 0x2 },
230 { OP_SET, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
231 { OP_SLCT, 0x4, 0x0, 0x0, 0x0, 0x6, 0x2 }, // special c[] constraint
232 { OP_PREEX2, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1 },
233 { OP_PRESIN, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1 },
234 { OP_COS, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
235 { OP_SIN, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
236 { OP_EX2, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
237 { OP_LG2, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
238 { OP_RCP, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
239 { OP_RSQ, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
240 { OP_DFDX, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
241 { OP_DFDY, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
242 { OP_CALL, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0 },
243 { OP_INSBF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4 },
244 { OP_SET_AND, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
245 { OP_SET_OR, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
246 { OP_SET_XOR, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
247 // saturate only:
248 { OP_LINTERP, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0 },
249 { OP_PINTERP, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0 },
250 };
251
252 void TargetNVC0::initOpInfo()
253 {
254 unsigned int i, j;
255
256 static const uint32_t commutative[(OP_LAST + 31) / 32] =
257 {
258 // ADD, MAD, MUL, AND, OR, XOR, MAX, MIN
259 0x0670ca00, 0x0000003f, 0x00000000
260 };
261
262 static const uint32_t shortForm[(OP_LAST + 31) / 32] =
263 {
264 // ADD, MAD, MUL, AND, OR, XOR, PRESIN, PREEX2, SFN, CVT, PINTERP, MOV
265 0x0670ca00, 0x00000000, 0x00000000
266 };
267
268 static const operation noDest[] =
269 {
270 OP_STORE, OP_WRSV, OP_EXPORT, OP_BRA, OP_CALL, OP_RET, OP_EXIT,
271 OP_DISCARD, OP_CONT, OP_BREAK, OP_PRECONT, OP_PREBREAK, OP_PRERET,
272 OP_JOIN, OP_JOINAT, OP_BRKPT, OP_MEMBAR, OP_EMIT, OP_RESTART,
273 OP_QUADON, OP_QUADPOP
274 };
275
276 joinAnterior = false;
277
278 for (i = 0; i < DATA_FILE_COUNT; ++i)
279 nativeFileMap[i] = (DataFile)i;
280 nativeFileMap[FILE_ADDRESS] = FILE_GPR;
281
282 for (i = 0; i < OP_LAST; ++i) {
283 opInfo[i].variants = NULL;
284 opInfo[i].op = (operation)i;
285 opInfo[i].srcTypes = 1 << (int)TYPE_F32;
286 opInfo[i].dstTypes = 1 << (int)TYPE_F32;
287 opInfo[i].immdBits = 0;
288 opInfo[i].srcNr = operationSrcNr[i];
289
290 for (j = 0; j < opInfo[i].srcNr; ++j) {
291 opInfo[i].srcMods[j] = 0;
292 opInfo[i].srcFiles[j] = 1 << (int)FILE_GPR;
293 }
294 opInfo[i].dstMods = 0;
295 opInfo[i].dstFiles = 1 << (int)FILE_GPR;
296
297 opInfo[i].hasDest = 1;
298 opInfo[i].vector = (i >= OP_TEX && i <= OP_TEXCSAA);
299 opInfo[i].commutative = (commutative[i / 32] >> (i % 32)) & 1;
300 opInfo[i].pseudo = (i < OP_MOV);
301 opInfo[i].predicate = !opInfo[i].pseudo;
302 opInfo[i].flow = (i >= OP_BRA && i <= OP_JOIN);
303 opInfo[i].minEncSize = (shortForm[i / 32] & (1 << (i % 32))) ? 4 : 8;
304 }
305 for (i = 0; i < sizeof(noDest) / sizeof(noDest[0]); ++i)
306 opInfo[noDest[i]].hasDest = 0;
307
308 for (i = 0; i < sizeof(_initProps) / sizeof(_initProps[0]); ++i) {
309 const struct opProperties *prop = &_initProps[i];
310
311 for (int s = 0; s < 3; ++s) {
312 if (prop->mNeg & (1 << s))
313 opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_NEG;
314 if (prop->mAbs & (1 << s))
315 opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_ABS;
316 if (prop->mNot & (1 << s))
317 opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_NOT;
318 if (prop->fConst & (1 << s))
319 opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_MEMORY_CONST;
320 if (prop->fImmd & (1 << s))
321 opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_IMMEDIATE;
322 if (prop->fImmd & 8)
323 opInfo[prop->op].immdBits = 0xffffffff;
324 }
325 if (prop->mSat & 8)
326 opInfo[prop->op].dstMods = NV50_IR_MOD_SAT;
327 }
328 }
329
330 unsigned int
331 TargetNVC0::getFileSize(DataFile file) const
332 {
333 switch (file) {
334 case FILE_NULL: return 0;
335 case FILE_GPR: return 63;
336 case FILE_PREDICATE: return 7;
337 case FILE_FLAGS: return 1;
338 case FILE_ADDRESS: return 0;
339 case FILE_IMMEDIATE: return 0;
340 case FILE_MEMORY_CONST: return 65536;
341 case FILE_SHADER_INPUT: return 0x400;
342 case FILE_SHADER_OUTPUT: return 0x400;
343 case FILE_MEMORY_GLOBAL: return 0xffffffff;
344 case FILE_MEMORY_SHARED: return 16 << 10;
345 case FILE_MEMORY_LOCAL: return 48 << 10;
346 case FILE_SYSTEM_VALUE: return 32;
347 default:
348 assert(!"invalid file");
349 return 0;
350 }
351 }
352
353 unsigned int
354 TargetNVC0::getFileUnit(DataFile file) const
355 {
356 if (file == FILE_GPR || file == FILE_ADDRESS || file == FILE_SYSTEM_VALUE)
357 return 2;
358 return 0;
359 }
360
361 uint32_t
362 TargetNVC0::getSVAddress(DataFile shaderFile, const Symbol *sym) const
363 {
364 const int idx = sym->reg.data.sv.index;
365 const SVSemantic sv = sym->reg.data.sv.sv;
366
367 const bool isInput = shaderFile == FILE_SHADER_INPUT;
368
369 switch (sv) {
370 case SV_POSITION: return 0x070 + idx * 4;
371 case SV_INSTANCE_ID: return 0x2f8;
372 case SV_VERTEX_ID: return 0x2fc;
373 case SV_PRIMITIVE_ID: return isInput ? 0x060 : 0x040;
374 case SV_LAYER: return 0x064;
375 case SV_VIEWPORT_INDEX: return 0x068;
376 case SV_POINT_SIZE: return 0x06c;
377 case SV_CLIP_DISTANCE: return 0x2c0 + idx * 4;
378 case SV_POINT_COORD: return 0x2e0 + idx * 4;
379 case SV_FACE: return 0x3fc;
380 case SV_TESS_FACTOR: return 0x000 + idx * 4;
381 case SV_TESS_COORD: return 0x2f0 + idx * 4;
382 default:
383 return 0xffffffff;
384 }
385 }
386
387 bool
388 TargetNVC0::insnCanLoad(const Instruction *i, int s,
389 const Instruction *ld) const
390 {
391 DataFile sf = ld->src[0].getFile();
392
393 // immediate 0 can be represented by GPR $r63
394 if (sf == FILE_IMMEDIATE && ld->getSrc(0)->reg.data.u64 == 0)
395 return (!i->asTex() && i->op != OP_EXPORT && i->op != OP_STORE);
396
397 if (s > opInfo[i->op].srcNr)
398 return false;
399 if (!(opInfo[i->op].srcFiles[s] & (1 << (int)sf)))
400 return false;
401
402 // indirect loads can only be done by OP_LOAD/VFETCH/INTERP on nvc0
403 if (ld->src[0].isIndirect(0))
404 return false;
405
406 for (int k = 0; i->srcExists(k); ++k) {
407 if (i->src[k].getFile() == FILE_IMMEDIATE) {
408 if (i->getSrc(k)->reg.data.u64 != 0)
409 return false;
410 } else
411 if (i->src[k].getFile() != FILE_GPR &&
412 i->src[k].getFile() != FILE_PREDICATE) {
413 return false;
414 }
415 }
416
417 // not all instructions support full 32 bit immediates
418 if (sf == FILE_IMMEDIATE) {
419 Storage &reg = ld->getSrc(0)->asImm()->reg;
420
421 if (opInfo[i->op].immdBits != 0xffffffff) {
422 if (i->sType == TYPE_F32) {
423 if (reg.data.u32 & 0xfff)
424 return false;
425 } else
426 if (i->sType == TYPE_S32 || i->sType == TYPE_U32) {
427 // with u32, 0xfffff counts as 0xffffffff as well
428 if (reg.data.s32 > 0x7ffff || reg.data.s32 < -0x80000)
429 return false;
430 }
431 } else
432 if (i->op == OP_MAD || i->op == OP_FMA) {
433 // requires src == dst, cannot decide before RA
434 // (except if we implement more constraints)
435 if (ld->getSrc(0)->asImm()->reg.data.u32 & 0xfff)
436 return false;
437 }
438 }
439
440 return true;
441 }
442
443 bool
444 TargetNVC0::isOpSupported(operation op, DataType ty) const
445 {
446 if ((op == OP_MAD || op == OP_FMA) && (ty != TYPE_F32))
447 return false;
448 if (op == OP_SAD && ty != TYPE_S32)
449 return false;
450 if (op == OP_POW || op == OP_SQRT || op == OP_DIV || op == OP_MOD)
451 return false;
452 return true;
453 }
454
455 bool
456 TargetNVC0::isModSupported(const Instruction *insn, int s, Modifier mod) const
457 {
458 if (!isFloatType(insn->dType)) {
459 switch (insn->op) {
460 case OP_ABS:
461 case OP_NEG:
462 case OP_CVT:
463 case OP_CEIL:
464 case OP_FLOOR:
465 case OP_TRUNC:
466 case OP_AND:
467 case OP_OR:
468 case OP_XOR:
469 break;
470 case OP_ADD:
471 if (insn->src[s ? 0 : 1].mod.neg())
472 return false;
473 break;
474 case OP_SUB:
475 if (s == 0)
476 return insn->src[1].mod.neg() ? false : true;
477 break;
478 default:
479 return false;
480 }
481 }
482 if (s > 3)
483 return false;
484 return (mod & Modifier(opInfo[insn->op].srcMods[s])) == mod;
485 }
486
487 bool
488 TargetNVC0::mayPredicate(const Instruction *insn, const Value *pred) const
489 {
490 if (insn->getPredicate())
491 return false;
492 return opInfo[insn->op].predicate;
493 }
494
495 bool
496 TargetNVC0::isSatSupported(const Instruction *insn) const
497 {
498 if (insn->op == OP_CVT)
499 return true;
500 if (!(opInfo[insn->op].dstMods & NV50_IR_MOD_SAT))
501 return false;
502
503 if (insn->dType == TYPE_U32)
504 return (insn->op == OP_ADD) || (insn->op == OP_MAD);
505
506 return insn->dType == TYPE_F32;
507 }
508
509 // TODO: better values
510 int TargetNVC0::getLatency(const Instruction *i) const
511 {
512 if (i->op == OP_LOAD) {
513 if (i->cache == CACHE_CV)
514 return 700;
515 return 48;
516 }
517 return 24;
518 }
519
520 // These are "inverse" throughput values, i.e. the number of cycles required
521 // to issue a specific instruction for a full warp (32 threads).
522 //
523 // Assuming we have more than 1 warp in flight, a higher issue latency results
524 // in a lower result latency since the MP will have spent more time with other
525 // warps.
526 // This also helps to determine the number of cycles between instructions in
527 // a single warp.
528 //
529 int TargetNVC0::getThroughput(const Instruction *i) const
530 {
531 // TODO: better values
532 if (i->dType == TYPE_F32) {
533 switch (i->op) {
534 case OP_ADD:
535 case OP_MUL:
536 case OP_MAD:
537 case OP_FMA:
538 return 1;
539 case OP_CVT:
540 case OP_CEIL:
541 case OP_FLOOR:
542 case OP_TRUNC:
543 case OP_SET:
544 case OP_SLCT:
545 case OP_MIN:
546 case OP_MAX:
547 return 2;
548 case OP_RCP:
549 case OP_RSQ:
550 case OP_LG2:
551 case OP_SIN:
552 case OP_COS:
553 case OP_PRESIN:
554 case OP_PREEX2:
555 default:
556 return 8;
557 }
558 } else
559 if (i->dType == TYPE_U32 || i->dType == TYPE_S32) {
560 switch (i->op) {
561 case OP_ADD:
562 case OP_AND:
563 case OP_OR:
564 case OP_XOR:
565 case OP_NOT:
566 return 1;
567 case OP_MUL:
568 case OP_MAD:
569 case OP_CVT:
570 case OP_SET:
571 case OP_SLCT:
572 case OP_SHL:
573 case OP_SHR:
574 case OP_NEG:
575 case OP_ABS:
576 case OP_MIN:
577 case OP_MAX:
578 default:
579 return 2;
580 }
581 } else
582 if (i->dType == TYPE_F64) {
583 return 2;
584 } else {
585 return 1;
586 }
587 }
588
589 } // namespace nv50_ir