nv50/ir/opt: improve post-multiply and check target for support
[mesa.git] / src / gallium / drivers / nvc0 / codegen / nv50_ir_target_nvc0.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "nv50_ir_target_nvc0.h"
24
25 namespace nv50_ir {
26
27 Target *getTargetNVC0(unsigned int chipset)
28 {
29 return new TargetNVC0(chipset);
30 }
31
32 TargetNVC0::TargetNVC0(unsigned int card)
33 {
34 chipset = card;
35 initOpInfo();
36 }
37
38 // BULTINS / LIBRARY FUNCTIONS:
39
40 // lazyness -> will just hardcode everything for the time being
41
42 // Will probably make this nicer once we support subroutines properly,
43 // i.e. when we have an input IR that provides function declarations.
44
45 static const uint32_t nvc0_builtin_code[] =
46 {
47 // DIV U32: slow unsigned integer division
48 //
49 // UNR recurrence (q = a / b):
50 // look for z such that 2^32 - b <= b * z < 2^32
51 // then q - 1 <= (a * z) / 2^32 <= q
52 //
53 // INPUT: $r0: dividend, $r1: divisor
54 // OUTPUT: $r0: result, $r1: modulus
55 // CLOBBER: $r2 - $r3, $p0 - $p1
56 // SIZE: 22 / 14 * 8 bytes
57 //
58 #if 1
59 0x04009c03, 0x78000000,
60 0x7c209cdd,
61 0x0010dd18,
62 0x08309c03, 0x60000000,
63 0x05605c18,
64 0x0810dc2a,
65 0x0c209c43, 0x20040000,
66 0x0810dc03, 0x50000000,
67 0x0c209c43, 0x20040000,
68 0x0810dc03, 0x50000000,
69 0x0c209c43, 0x20040000,
70 0x0810dc03, 0x50000000,
71 0x0c209c43, 0x20040000,
72 0x0810dc03, 0x50000000,
73 0x0c209c43, 0x20040000,
74 0x0000dde4, 0x28000000,
75 0x08001c43, 0x50000000,
76 0x05609c18,
77 0x0010430d,
78 0x0811dc03, 0x1b0e0000,
79 0x08104103, 0x48000000,
80 0x04000002, 0x08000000,
81 0x0811c003, 0x1b0e0000,
82 0x08104103, 0x48000000,
83 0x040000ac,
84 0x90001dff,
85 #else
86 0x0401dc03, 0x1b0e0000,
87 0x00008003, 0x78000000,
88 0x0400c003, 0x78000000,
89 0x0c20c103, 0x48000000,
90 0x0c108003, 0x60000000,
91 0x00005c28,
92 0x00001d18,
93 0x0031c023, 0x1b0ec000,
94 0xb000a1e7, 0x40000000,
95 0x04000003, 0x6000c000,
96 0x0813dc03, 0x1b000000,
97 0x0420446c,
98 0x040004bd,
99 0x04208003, 0x5800c000,
100 0x0430c103, 0x4800c000,
101 0x0ffc5dff,
102 0x90001dff,
103 #endif
104
105 // DIV S32: slow signed integer division
106 //
107 // INPUT: $r0: dividend, $r1: divisor
108 // OUTPUT: $r0: result, $r1: modulus
109 // CLOBBER: $r2 - $r3, $p0 - $p3
110 // SIZE: 18 * 8 bytes
111 //
112 0xfc05dc23, 0x188e0000,
113 0xfc17dc23, 0x18c40000,
114 0x03301e18,
115 0x07305e18,
116 0x0401dc03, 0x1b0e0000,
117 0x00008003, 0x78000000,
118 0x0400c003, 0x78000000,
119 0x0c20c103, 0x48000000,
120 0x0c108003, 0x60000000,
121 0x00005c28,
122 0x00001d18,
123 0x0031c023, 0x1b0ec000,
124 0xb000a1e7, 0x40000000,
125 0x04000003, 0x6000c000,
126 0x0813dc03, 0x1b000000,
127 0x0420446c,
128 0x040004bd,
129 0x04208003, 0x5800c000,
130 0x0430c103, 0x4800c000,
131 0x0ffc5dff,
132 0x01700e18,
133 0x05704a18,
134 0x90001dff,
135
136 // RCP F64: Newton Raphson reciprocal(x): r_{i+1} = r_i * (2.0 - x * r_i)
137 //
138 // INPUT: $r0d (x)
139 // OUTPUT: $r0d (rcp(x))
140 // CLOBBER: $r2 - $r7
141 // SIZE: 9 * 8 bytes
142 //
143 0x9810dc08,
144 0x00009c28,
145 0x4001df18,
146 0x00019d18,
147 0x08011e01, 0x200c0000,
148 0x10209c01, 0x50000000,
149 0x08011e01, 0x200c0000,
150 0x10209c01, 0x50000000,
151 0x08011e01, 0x200c0000,
152 0x10201c01, 0x50000000,
153 0x00001de7, 0x90000000,
154
155 // RSQ F64: Newton Raphson rsqrt(x): r_{i+1} = r_i * (1.5 - 0.5 * x * r_i * r_i)
156 //
157 // INPUT: $r0d (x)
158 // OUTPUT: $r0d (rsqrt(x))
159 // CLOBBER: $r2 - $r7
160 // SIZE: 14 * 8 bytes
161 //
162 0x9c10dc08,
163 0x00009c28,
164 0x00019d18,
165 0x3fe1df18,
166 0x18001c01, 0x50000000,
167 0x0001dde2, 0x18ffe000,
168 0x08211c01, 0x50000000,
169 0x10011e01, 0x200c0000,
170 0x10209c01, 0x50000000,
171 0x08211c01, 0x50000000,
172 0x10011e01, 0x200c0000,
173 0x10209c01, 0x50000000,
174 0x08211c01, 0x50000000,
175 0x10011e01, 0x200c0000,
176 0x10201c01, 0x50000000,
177 0x00001de7, 0x90000000,
178 };
179
180 static const uint16_t nvc0_builtin_offsets[NVC0_BUILTIN_COUNT] =
181 {
182 0,
183 8 * (22),
184 8 * (22 + 18),
185 8 * (22 + 18 + 9)
186 };
187
188 void
189 TargetNVC0::getBuiltinCode(const uint32_t **code, uint32_t *size) const
190 {
191 *code = &nvc0_builtin_code[0];
192 *size = sizeof(nvc0_builtin_code);
193 }
194
195 uint32_t
196 TargetNVC0::getBuiltinOffset(int builtin) const
197 {
198 assert(builtin < NVC0_BUILTIN_COUNT);
199 return nvc0_builtin_offsets[builtin];
200 }
201
202 struct opProperties
203 {
204 operation op;
205 unsigned int mNeg : 4;
206 unsigned int mAbs : 4;
207 unsigned int mNot : 4;
208 unsigned int mSat : 4;
209 unsigned int fConst : 3;
210 unsigned int fImmd : 4; // last bit indicates if full immediate is suppoted
211 };
212
213 static const struct opProperties _initProps[] =
214 {
215 // neg abs not sat c[] imm
216 { OP_ADD, 0x3, 0x3, 0x0, 0x8, 0x2, 0x2 | 0x8 },
217 { OP_SUB, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 | 0x8 },
218 { OP_MUL, 0x3, 0x0, 0x0, 0x8, 0x2, 0x2 | 0x8 },
219 { OP_MAX, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
220 { OP_MIN, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
221 { OP_MAD, 0x7, 0x0, 0x0, 0x8, 0x6, 0x2 | 0x8 }, // special c[] constraint
222 { OP_ABS, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0 },
223 { OP_NEG, 0x0, 0x1, 0x0, 0x0, 0x1, 0x0 },
224 { OP_CVT, 0x1, 0x1, 0x0, 0x8, 0x1, 0x0 },
225 { OP_AND, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 },
226 { OP_OR, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 },
227 { OP_XOR, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 },
228 { OP_SHL, 0x0, 0x0, 0x0, 0x0, 0x2, 0x2 },
229 { OP_SHR, 0x0, 0x0, 0x0, 0x0, 0x2, 0x2 },
230 { OP_SET, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
231 { OP_SLCT, 0x4, 0x0, 0x0, 0x0, 0x6, 0x2 }, // special c[] constraint
232 { OP_PREEX2, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1 },
233 { OP_PRESIN, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1 },
234 { OP_COS, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
235 { OP_SIN, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
236 { OP_EX2, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
237 { OP_LG2, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
238 { OP_RCP, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
239 { OP_RSQ, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 },
240 { OP_DFDX, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
241 { OP_DFDY, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 },
242 { OP_CALL, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0 },
243 { OP_INSBF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4 },
244 { OP_SET_AND, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
245 { OP_SET_OR, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
246 { OP_SET_XOR, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
247 // saturate only:
248 { OP_LINTERP, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0 },
249 { OP_PINTERP, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0 },
250 };
251
252 void TargetNVC0::initOpInfo()
253 {
254 unsigned int i, j;
255
256 static const uint32_t commutative[(OP_LAST + 31) / 32] =
257 {
258 // ADD, MAD, MUL, AND, OR, XOR, MAX, MIN
259 0x0670ca00, 0x0000003f, 0x00000000
260 };
261
262 static const uint32_t shortForm[(OP_LAST + 31) / 32] =
263 {
264 // ADD, MAD, MUL, AND, OR, XOR, PRESIN, PREEX2, SFN, CVT, PINTERP, MOV
265 0x0670ca00, 0x00000000, 0x00000000
266 };
267
268 static const operation noDest[] =
269 {
270 OP_STORE, OP_WRSV, OP_EXPORT, OP_BRA, OP_CALL, OP_RET, OP_EXIT,
271 OP_DISCARD, OP_CONT, OP_BREAK, OP_PRECONT, OP_PREBREAK, OP_PRERET,
272 OP_JOIN, OP_JOINAT, OP_BRKPT, OP_MEMBAR, OP_EMIT, OP_RESTART,
273 OP_QUADON, OP_QUADPOP
274 };
275
276 joinAnterior = false;
277
278 for (i = 0; i < DATA_FILE_COUNT; ++i)
279 nativeFileMap[i] = (DataFile)i;
280 nativeFileMap[FILE_ADDRESS] = FILE_GPR;
281
282 for (i = 0; i < OP_LAST; ++i) {
283 opInfo[i].variants = NULL;
284 opInfo[i].op = (operation)i;
285 opInfo[i].srcTypes = 1 << (int)TYPE_F32;
286 opInfo[i].dstTypes = 1 << (int)TYPE_F32;
287 opInfo[i].immdBits = 0;
288 opInfo[i].srcNr = operationSrcNr[i];
289
290 for (j = 0; j < opInfo[i].srcNr; ++j) {
291 opInfo[i].srcMods[j] = 0;
292 opInfo[i].srcFiles[j] = 1 << (int)FILE_GPR;
293 }
294 opInfo[i].dstMods = 0;
295 opInfo[i].dstFiles = 1 << (int)FILE_GPR;
296
297 opInfo[i].hasDest = 1;
298 opInfo[i].vector = (i >= OP_TEX && i <= OP_TEXCSAA);
299 opInfo[i].commutative = (commutative[i / 32] >> (i % 32)) & 1;
300 opInfo[i].pseudo = (i < OP_MOV);
301 opInfo[i].predicate = !opInfo[i].pseudo;
302 opInfo[i].flow = (i >= OP_BRA && i <= OP_JOIN);
303 opInfo[i].minEncSize = (shortForm[i / 32] & (1 << (i % 32))) ? 4 : 8;
304 }
305 for (i = 0; i < sizeof(noDest) / sizeof(noDest[0]); ++i)
306 opInfo[noDest[i]].hasDest = 0;
307
308 for (i = 0; i < sizeof(_initProps) / sizeof(_initProps[0]); ++i) {
309 const struct opProperties *prop = &_initProps[i];
310
311 for (int s = 0; s < 3; ++s) {
312 if (prop->mNeg & (1 << s))
313 opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_NEG;
314 if (prop->mAbs & (1 << s))
315 opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_ABS;
316 if (prop->mNot & (1 << s))
317 opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_NOT;
318 if (prop->fConst & (1 << s))
319 opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_MEMORY_CONST;
320 if (prop->fImmd & (1 << s))
321 opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_IMMEDIATE;
322 if (prop->fImmd & 8)
323 opInfo[prop->op].immdBits = 0xffffffff;
324 }
325 if (prop->mSat & 8)
326 opInfo[prop->op].dstMods = NV50_IR_MOD_SAT;
327 }
328 }
329
330 unsigned int
331 TargetNVC0::getFileSize(DataFile file) const
332 {
333 switch (file) {
334 case FILE_NULL: return 0;
335 case FILE_GPR: return 63;
336 case FILE_PREDICATE: return 7;
337 case FILE_FLAGS: return 1;
338 case FILE_ADDRESS: return 0;
339 case FILE_IMMEDIATE: return 0;
340 case FILE_MEMORY_CONST: return 65536;
341 case FILE_SHADER_INPUT: return 0x400;
342 case FILE_SHADER_OUTPUT: return 0x400;
343 case FILE_MEMORY_GLOBAL: return 0xffffffff;
344 case FILE_MEMORY_SHARED: return 16 << 10;
345 case FILE_MEMORY_LOCAL: return 48 << 10;
346 case FILE_SYSTEM_VALUE: return 32;
347 default:
348 assert(!"invalid file");
349 return 0;
350 }
351 }
352
353 unsigned int
354 TargetNVC0::getFileUnit(DataFile file) const
355 {
356 if (file == FILE_GPR || file == FILE_ADDRESS || file == FILE_SYSTEM_VALUE)
357 return 2;
358 return 0;
359 }
360
361 uint32_t
362 TargetNVC0::getSVAddress(DataFile shaderFile, const Symbol *sym) const
363 {
364 const int idx = sym->reg.data.sv.index;
365 const SVSemantic sv = sym->reg.data.sv.sv;
366
367 const bool isInput = shaderFile == FILE_SHADER_INPUT;
368
369 switch (sv) {
370 case SV_POSITION: return 0x070 + idx * 4;
371 case SV_INSTANCE_ID: return 0x2f8;
372 case SV_VERTEX_ID: return 0x2fc;
373 case SV_PRIMITIVE_ID: return isInput ? 0x060 : 0x040;
374 case SV_LAYER: return 0x064;
375 case SV_VIEWPORT_INDEX: return 0x068;
376 case SV_POINT_SIZE: return 0x06c;
377 case SV_CLIP_DISTANCE: return 0x2c0 + idx * 4;
378 case SV_POINT_COORD: return 0x2e0 + idx * 4;
379 case SV_FACE: return 0x3fc;
380 case SV_TESS_FACTOR: return 0x000 + idx * 4;
381 case SV_TESS_COORD: return 0x2f0 + idx * 4;
382 default:
383 return 0xffffffff;
384 }
385 }
386
387 bool
388 TargetNVC0::insnCanLoad(const Instruction *i, int s,
389 const Instruction *ld) const
390 {
391 DataFile sf = ld->src[0].getFile();
392
393 // immediate 0 can be represented by GPR $r63
394 if (sf == FILE_IMMEDIATE && ld->getSrc(0)->reg.data.u64 == 0)
395 return (!i->asTex() && i->op != OP_EXPORT && i->op != OP_STORE);
396
397 if (s > opInfo[i->op].srcNr)
398 return false;
399 if (!(opInfo[i->op].srcFiles[s] & (1 << (int)sf)))
400 return false;
401
402 // indirect loads can only be done by OP_LOAD/VFETCH/INTERP on nvc0
403 if (ld->src[0].isIndirect(0))
404 return false;
405
406 for (int k = 0; i->srcExists(k); ++k) {
407 if (i->src[k].getFile() == FILE_IMMEDIATE) {
408 if (i->getSrc(k)->reg.data.u64 != 0)
409 return false;
410 } else
411 if (i->src[k].getFile() != FILE_GPR &&
412 i->src[k].getFile() != FILE_PREDICATE) {
413 return false;
414 }
415 }
416
417 // not all instructions support full 32 bit immediates
418 if (sf == FILE_IMMEDIATE) {
419 Storage &reg = ld->getSrc(0)->asImm()->reg;
420
421 if (opInfo[i->op].immdBits != 0xffffffff) {
422 if (i->sType == TYPE_F32) {
423 if (reg.data.u32 & 0xfff)
424 return false;
425 } else
426 if (i->sType == TYPE_S32 || i->sType == TYPE_U32) {
427 // with u32, 0xfffff counts as 0xffffffff as well
428 if (reg.data.s32 > 0x7ffff || reg.data.s32 < -0x80000)
429 return false;
430 }
431 } else
432 if (i->op == OP_MAD || i->op == OP_FMA) {
433 // requires src == dst, cannot decide before RA
434 // (except if we implement more constraints)
435 if (ld->getSrc(0)->asImm()->reg.data.u32 & 0xfff)
436 return false;
437 }
438 }
439
440 return true;
441 }
442
443 bool
444 TargetNVC0::isAccessSupported(DataFile file, DataType ty) const
445 {
446 if (ty == TYPE_NONE)
447 return false;
448 if (ty == TYPE_B96)
449 return (file == FILE_SHADER_INPUT) || (file == FILE_SHADER_OUTPUT);
450 return true;
451 }
452
453 bool
454 TargetNVC0::isOpSupported(operation op, DataType ty) const
455 {
456 if ((op == OP_MAD || op == OP_FMA) && (ty != TYPE_F32))
457 return false;
458 if (op == OP_SAD && ty != TYPE_S32)
459 return false;
460 if (op == OP_POW || op == OP_SQRT || op == OP_DIV || op == OP_MOD)
461 return false;
462 return true;
463 }
464
465 bool
466 TargetNVC0::isModSupported(const Instruction *insn, int s, Modifier mod) const
467 {
468 if (!isFloatType(insn->dType)) {
469 switch (insn->op) {
470 case OP_ABS:
471 case OP_NEG:
472 case OP_CVT:
473 case OP_CEIL:
474 case OP_FLOOR:
475 case OP_TRUNC:
476 case OP_AND:
477 case OP_OR:
478 case OP_XOR:
479 break;
480 case OP_ADD:
481 if (insn->src[s ? 0 : 1].mod.neg())
482 return false;
483 break;
484 case OP_SUB:
485 if (s == 0)
486 return insn->src[1].mod.neg() ? false : true;
487 break;
488 default:
489 return false;
490 }
491 }
492 if (s > 3)
493 return false;
494 return (mod & Modifier(opInfo[insn->op].srcMods[s])) == mod;
495 }
496
497 bool
498 TargetNVC0::mayPredicate(const Instruction *insn, const Value *pred) const
499 {
500 if (insn->getPredicate())
501 return false;
502 return opInfo[insn->op].predicate;
503 }
504
505 bool
506 TargetNVC0::isSatSupported(const Instruction *insn) const
507 {
508 if (insn->op == OP_CVT)
509 return true;
510 if (!(opInfo[insn->op].dstMods & NV50_IR_MOD_SAT))
511 return false;
512
513 if (insn->dType == TYPE_U32)
514 return (insn->op == OP_ADD) || (insn->op == OP_MAD);
515
516 return insn->dType == TYPE_F32;
517 }
518
519 bool
520 TargetNVC0::isPostMultiplySupported(operation op, float f, int& e) const
521 {
522 if (op != OP_MUL)
523 return false;
524 f = fabsf(f);
525 e = static_cast<int>(log2f(f));
526 if (e < -3 || e > 3)
527 return false;
528 return f == exp2f(static_cast<float>(e));
529 }
530
531 // TODO: better values
532 int TargetNVC0::getLatency(const Instruction *i) const
533 {
534 if (i->op == OP_LOAD) {
535 if (i->cache == CACHE_CV)
536 return 700;
537 return 48;
538 }
539 return 24;
540 }
541
542 // These are "inverse" throughput values, i.e. the number of cycles required
543 // to issue a specific instruction for a full warp (32 threads).
544 //
545 // Assuming we have more than 1 warp in flight, a higher issue latency results
546 // in a lower result latency since the MP will have spent more time with other
547 // warps.
548 // This also helps to determine the number of cycles between instructions in
549 // a single warp.
550 //
551 int TargetNVC0::getThroughput(const Instruction *i) const
552 {
553 // TODO: better values
554 if (i->dType == TYPE_F32) {
555 switch (i->op) {
556 case OP_ADD:
557 case OP_MUL:
558 case OP_MAD:
559 case OP_FMA:
560 return 1;
561 case OP_CVT:
562 case OP_CEIL:
563 case OP_FLOOR:
564 case OP_TRUNC:
565 case OP_SET:
566 case OP_SLCT:
567 case OP_MIN:
568 case OP_MAX:
569 return 2;
570 case OP_RCP:
571 case OP_RSQ:
572 case OP_LG2:
573 case OP_SIN:
574 case OP_COS:
575 case OP_PRESIN:
576 case OP_PREEX2:
577 default:
578 return 8;
579 }
580 } else
581 if (i->dType == TYPE_U32 || i->dType == TYPE_S32) {
582 switch (i->op) {
583 case OP_ADD:
584 case OP_AND:
585 case OP_OR:
586 case OP_XOR:
587 case OP_NOT:
588 return 1;
589 case OP_MUL:
590 case OP_MAD:
591 case OP_CVT:
592 case OP_SET:
593 case OP_SLCT:
594 case OP_SHL:
595 case OP_SHR:
596 case OP_NEG:
597 case OP_ABS:
598 case OP_MIN:
599 case OP_MAX:
600 default:
601 return 2;
602 }
603 } else
604 if (i->dType == TYPE_F64) {
605 return 2;
606 } else {
607 return 1;
608 }
609 }
610
611 } // namespace nv50_ir