nv50/ir: Add sat modifier for mul
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_emit_nv50.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "codegen/nv50_ir.h"
24 #include "codegen/nv50_ir_target_nv50.h"
25
26 namespace nv50_ir {
27
28 #define NV50_OP_ENC_LONG 0
29 #define NV50_OP_ENC_SHORT 1
30 #define NV50_OP_ENC_IMM 2
31 #define NV50_OP_ENC_LONG_ALT 3
32
33 class CodeEmitterNV50 : public CodeEmitter
34 {
35 public:
36 CodeEmitterNV50(const TargetNV50 *);
37
38 virtual bool emitInstruction(Instruction *);
39
40 virtual uint32_t getMinEncodingSize(const Instruction *) const;
41
42 inline void setProgramType(Program::Type pType) { progType = pType; }
43
44 virtual void prepareEmission(Function *);
45
46 private:
47 Program::Type progType;
48
49 const TargetNV50 *targNV50;
50
51 private:
52 inline void defId(const ValueDef&, const int pos);
53 inline void srcId(const ValueRef&, const int pos);
54 inline void srcId(const ValueRef *, const int pos);
55
56 inline void srcAddr16(const ValueRef&, bool adj, const int pos);
57 inline void srcAddr8(const ValueRef&, const int pos);
58
59 void emitFlagsRd(const Instruction *);
60 void emitFlagsWr(const Instruction *);
61
62 void emitCondCode(CondCode cc, DataType ty, int pos);
63
64 inline void setARegBits(unsigned int);
65
66 void setAReg16(const Instruction *, int s);
67 void setImmediate(const Instruction *, int s);
68
69 void setDst(const Value *);
70 void setDst(const Instruction *, int d);
71 void setSrcFileBits(const Instruction *, int enc);
72 void setSrc(const Instruction *, unsigned int s, int slot);
73
74 void emitForm_MAD(const Instruction *);
75 void emitForm_ADD(const Instruction *);
76 void emitForm_MUL(const Instruction *);
77 void emitForm_IMM(const Instruction *);
78
79 void emitLoadStoreSizeLG(DataType ty, int pos);
80 void emitLoadStoreSizeCS(DataType ty);
81
82 void roundMode_MAD(const Instruction *);
83 void roundMode_CVT(RoundMode);
84
85 void emitMNeg12(const Instruction *);
86
87 void emitLOAD(const Instruction *);
88 void emitSTORE(const Instruction *);
89 void emitMOV(const Instruction *);
90 void emitRDSV(const Instruction *);
91 void emitNOP();
92 void emitINTERP(const Instruction *);
93 void emitPFETCH(const Instruction *);
94 void emitOUT(const Instruction *);
95
96 void emitUADD(const Instruction *);
97 void emitAADD(const Instruction *);
98 void emitFADD(const Instruction *);
99 void emitIMUL(const Instruction *);
100 void emitFMUL(const Instruction *);
101 void emitFMAD(const Instruction *);
102 void emitIMAD(const Instruction *);
103 void emitISAD(const Instruction *);
104
105 void emitMINMAX(const Instruction *);
106
107 void emitPreOp(const Instruction *);
108 void emitSFnOp(const Instruction *, uint8_t subOp);
109
110 void emitShift(const Instruction *);
111 void emitARL(const Instruction *, unsigned int shl);
112 void emitLogicOp(const Instruction *);
113 void emitNOT(const Instruction *);
114
115 void emitCVT(const Instruction *);
116 void emitSET(const Instruction *);
117
118 void emitTEX(const TexInstruction *);
119 void emitTXQ(const TexInstruction *);
120 void emitTEXPREP(const TexInstruction *);
121
122 void emitQUADOP(const Instruction *, uint8_t lane, uint8_t quOp);
123
124 void emitFlow(const Instruction *, uint8_t flowOp);
125 void emitPRERETEmu(const FlowInstruction *);
126 void emitBAR(const Instruction *);
127
128 void emitATOM(const Instruction *);
129 };
130
131 #define SDATA(a) ((a).rep()->reg.data)
132 #define DDATA(a) ((a).rep()->reg.data)
133
134 void CodeEmitterNV50::srcId(const ValueRef& src, const int pos)
135 {
136 assert(src.get());
137 code[pos / 32] |= SDATA(src).id << (pos % 32);
138 }
139
140 void CodeEmitterNV50::srcId(const ValueRef *src, const int pos)
141 {
142 assert(src->get());
143 code[pos / 32] |= SDATA(*src).id << (pos % 32);
144 }
145
146 void CodeEmitterNV50::srcAddr16(const ValueRef& src, bool adj, const int pos)
147 {
148 assert(src.get());
149
150 int32_t offset = SDATA(src).offset;
151
152 assert(!adj || src.get()->reg.size <= 4);
153 if (adj)
154 offset /= src.get()->reg.size;
155
156 assert(offset <= 0x7fff && offset >= (int32_t)-0x8000 && (pos % 32) <= 16);
157
158 if (offset < 0)
159 offset &= adj ? (0xffff >> (src.get()->reg.size >> 1)) : 0xffff;
160
161 code[pos / 32] |= offset << (pos % 32);
162 }
163
164 void CodeEmitterNV50::srcAddr8(const ValueRef& src, const int pos)
165 {
166 assert(src.get());
167
168 uint32_t offset = SDATA(src).offset;
169
170 assert((offset <= 0x1fc || offset == 0x3fc) && !(offset & 0x3));
171
172 code[pos / 32] |= (offset >> 2) << (pos % 32);
173 }
174
175 void CodeEmitterNV50::defId(const ValueDef& def, const int pos)
176 {
177 assert(def.get() && def.getFile() != FILE_SHADER_OUTPUT);
178
179 code[pos / 32] |= DDATA(def).id << (pos % 32);
180 }
181
182 void
183 CodeEmitterNV50::roundMode_MAD(const Instruction *insn)
184 {
185 switch (insn->rnd) {
186 case ROUND_M: code[1] |= 1 << 22; break;
187 case ROUND_P: code[1] |= 2 << 22; break;
188 case ROUND_Z: code[1] |= 3 << 22; break;
189 default:
190 assert(insn->rnd == ROUND_N);
191 break;
192 }
193 }
194
195 void
196 CodeEmitterNV50::emitMNeg12(const Instruction *i)
197 {
198 code[1] |= i->src(0).mod.neg() << 26;
199 code[1] |= i->src(1).mod.neg() << 27;
200 }
201
202 void CodeEmitterNV50::emitCondCode(CondCode cc, DataType ty, int pos)
203 {
204 uint8_t enc;
205
206 assert(pos >= 32 || pos <= 27);
207
208 switch (cc) {
209 case CC_LT: enc = 0x1; break;
210 case CC_LTU: enc = 0x9; break;
211 case CC_EQ: enc = 0x2; break;
212 case CC_EQU: enc = 0xa; break;
213 case CC_LE: enc = 0x3; break;
214 case CC_LEU: enc = 0xb; break;
215 case CC_GT: enc = 0x4; break;
216 case CC_GTU: enc = 0xc; break;
217 case CC_NE: enc = 0x5; break;
218 case CC_NEU: enc = 0xd; break;
219 case CC_GE: enc = 0x6; break;
220 case CC_GEU: enc = 0xe; break;
221 case CC_TR: enc = 0xf; break;
222 case CC_FL: enc = 0x0; break;
223
224 case CC_O: enc = 0x10; break;
225 case CC_C: enc = 0x11; break;
226 case CC_A: enc = 0x12; break;
227 case CC_S: enc = 0x13; break;
228 case CC_NS: enc = 0x1c; break;
229 case CC_NA: enc = 0x1d; break;
230 case CC_NC: enc = 0x1e; break;
231 case CC_NO: enc = 0x1f; break;
232
233 default:
234 enc = 0;
235 assert(!"invalid condition code");
236 break;
237 }
238 if (ty != TYPE_NONE && !isFloatType(ty))
239 enc &= ~0x8; // unordered only exists for float types
240
241 code[pos / 32] |= enc << (pos % 32);
242 }
243
244 void
245 CodeEmitterNV50::emitFlagsRd(const Instruction *i)
246 {
247 int s = (i->flagsSrc >= 0) ? i->flagsSrc : i->predSrc;
248
249 assert(!(code[1] & 0x00003f80));
250
251 if (s >= 0) {
252 assert(i->getSrc(s)->reg.file == FILE_FLAGS);
253 emitCondCode(i->cc, TYPE_NONE, 32 + 7);
254 srcId(i->src(s), 32 + 12);
255 } else {
256 code[1] |= 0x0780;
257 }
258 }
259
260 void
261 CodeEmitterNV50::emitFlagsWr(const Instruction *i)
262 {
263 assert(!(code[1] & 0x70));
264
265 int flagsDef = i->flagsDef;
266
267 // find flags definition and check that it is the last def
268 if (flagsDef < 0) {
269 for (int d = 0; i->defExists(d); ++d)
270 if (i->def(d).getFile() == FILE_FLAGS)
271 flagsDef = d;
272 if (flagsDef >= 0 && 0) // TODO: enforce use of flagsDef at some point
273 WARN("Instruction::flagsDef was not set properly\n");
274 }
275 if (flagsDef == 0 && i->defExists(1))
276 WARN("flags def should not be the primary definition\n");
277
278 if (flagsDef >= 0)
279 code[1] |= (DDATA(i->def(flagsDef)).id << 4) | 0x40;
280
281 }
282
283 void
284 CodeEmitterNV50::setARegBits(unsigned int u)
285 {
286 code[0] |= (u & 3) << 26;
287 code[1] |= (u & 4);
288 }
289
290 void
291 CodeEmitterNV50::setAReg16(const Instruction *i, int s)
292 {
293 if (i->srcExists(s)) {
294 s = i->src(s).indirect[0];
295 if (s >= 0)
296 setARegBits(SDATA(i->src(s)).id + 1);
297 }
298 }
299
300 void
301 CodeEmitterNV50::setImmediate(const Instruction *i, int s)
302 {
303 const ImmediateValue *imm = i->src(s).get()->asImm();
304 assert(imm);
305
306 uint32_t u = imm->reg.data.u32;
307
308 if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT))
309 u = ~u;
310
311 code[1] |= 3;
312 code[0] |= (u & 0x3f) << 16;
313 code[1] |= (u >> 6) << 2;
314 }
315
316 void
317 CodeEmitterNV50::setDst(const Value *dst)
318 {
319 const Storage *reg = &dst->join->reg;
320
321 assert(reg->file != FILE_ADDRESS);
322
323 if (reg->data.id < 0 || reg->file == FILE_FLAGS) {
324 code[0] |= (127 << 2) | 1;
325 code[1] |= 8;
326 } else {
327 int id;
328 if (reg->file == FILE_SHADER_OUTPUT) {
329 code[1] |= 8;
330 id = reg->data.offset / 4;
331 } else {
332 id = reg->data.id;
333 }
334 code[0] |= id << 2;
335 }
336 }
337
338 void
339 CodeEmitterNV50::setDst(const Instruction *i, int d)
340 {
341 if (i->defExists(d)) {
342 setDst(i->getDef(d));
343 } else
344 if (!d) {
345 code[0] |= 0x01fc; // bit bucket
346 code[1] |= 0x0008;
347 }
348 }
349
350 // 3 * 2 bits:
351 // 0: r
352 // 1: a/s
353 // 2: c
354 // 3: i
355 void
356 CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc)
357 {
358 uint8_t mode = 0;
359
360 for (unsigned int s = 0; s < Target::operationSrcNr[i->op]; ++s) {
361 switch (i->src(s).getFile()) {
362 case FILE_GPR:
363 break;
364 case FILE_MEMORY_SHARED:
365 case FILE_SHADER_INPUT:
366 mode |= 1 << (s * 2);
367 break;
368 case FILE_MEMORY_CONST:
369 mode |= 2 << (s * 2);
370 break;
371 case FILE_IMMEDIATE:
372 mode |= 3 << (s * 2);
373 break;
374 default:
375 ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile());
376 assert(0);
377 break;
378 }
379 }
380 switch (mode) {
381 case 0x00: // rrr
382 break;
383 case 0x01: // arr/grr
384 if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
385 code[0] |= 0x01800000;
386 if (enc == NV50_OP_ENC_LONG || enc == NV50_OP_ENC_LONG_ALT)
387 code[1] |= 0x00200000;
388 } else {
389 if (enc == NV50_OP_ENC_SHORT)
390 code[0] |= 0x01000000;
391 else
392 code[1] |= 0x00200000;
393 }
394 break;
395 case 0x03: // irr
396 assert(i->op == OP_MOV);
397 return;
398 case 0x0c: // rir
399 break;
400 case 0x0d: // gir
401 assert(progType == Program::TYPE_GEOMETRY ||
402 progType == Program::TYPE_COMPUTE);
403 code[0] |= 0x01000000;
404 if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
405 int reg = i->src(0).getIndirect(0)->rep()->reg.data.id;
406 assert(reg < 3);
407 code[0] |= (reg + 1) << 26;
408 }
409 break;
410 case 0x08: // rcr
411 code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
412 code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
413 break;
414 case 0x09: // acr/gcr
415 if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
416 code[0] |= 0x01800000;
417 } else {
418 code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
419 code[1] |= 0x00200000;
420 }
421 code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
422 break;
423 case 0x20: // rrc
424 code[0] |= 0x01000000;
425 code[1] |= (i->getSrc(2)->reg.fileIndex << 22);
426 break;
427 case 0x21: // arc
428 code[0] |= 0x01000000;
429 code[1] |= 0x00200000 | (i->getSrc(2)->reg.fileIndex << 22);
430 assert(progType != Program::TYPE_GEOMETRY);
431 break;
432 default:
433 ERROR("not encodable: %x\n", mode);
434 assert(0);
435 break;
436 }
437 if (progType != Program::TYPE_COMPUTE)
438 return;
439
440 if ((mode & 3) == 1) {
441 const int pos = i->src(1).getFile() == FILE_IMMEDIATE ? 13 : 14;
442
443 switch (i->getSrc(0)->reg.type) {
444 case TYPE_U8:
445 break;
446 case TYPE_U16:
447 code[0] |= 1 << pos;
448 break;
449 case TYPE_S16:
450 code[0] |= 2 << pos;
451 break;
452 default:
453 code[0] |= 3 << pos;
454 assert(i->getSrc(0)->reg.size == 4);
455 break;
456 }
457 }
458 }
459
460 void
461 CodeEmitterNV50::setSrc(const Instruction *i, unsigned int s, int slot)
462 {
463 if (Target::operationSrcNr[i->op] <= s)
464 return;
465 const Storage *reg = &i->src(s).rep()->reg;
466
467 unsigned int id = (reg->file == FILE_GPR) ?
468 reg->data.id :
469 reg->data.offset >> (reg->size >> 1); // no > 4 byte sources here
470
471 switch (slot) {
472 case 0: code[0] |= id << 9; break;
473 case 1: code[0] |= id << 16; break;
474 case 2: code[1] |= id << 14; break;
475 default:
476 assert(0);
477 break;
478 }
479 }
480
481 // the default form:
482 // - long instruction
483 // - 1 to 3 sources in slots 0, 1, 2 (rrr, arr, rcr, acr, rrc, arc, gcr, grr)
484 // - address & flags
485 void
486 CodeEmitterNV50::emitForm_MAD(const Instruction *i)
487 {
488 assert(i->encSize == 8);
489 code[0] |= 1;
490
491 emitFlagsRd(i);
492 emitFlagsWr(i);
493
494 setDst(i, 0);
495
496 setSrcFileBits(i, NV50_OP_ENC_LONG);
497 setSrc(i, 0, 0);
498 setSrc(i, 1, 1);
499 setSrc(i, 2, 2);
500
501 if (i->getIndirect(0, 0)) {
502 assert(!i->getIndirect(1, 0));
503 setAReg16(i, 0);
504 } else {
505 setAReg16(i, 1);
506 }
507 }
508
509 // like default form, but 2nd source in slot 2, and no 3rd source
510 void
511 CodeEmitterNV50::emitForm_ADD(const Instruction *i)
512 {
513 assert(i->encSize == 8);
514 code[0] |= 1;
515
516 emitFlagsRd(i);
517 emitFlagsWr(i);
518
519 setDst(i, 0);
520
521 setSrcFileBits(i, NV50_OP_ENC_LONG_ALT);
522 setSrc(i, 0, 0);
523 setSrc(i, 1, 2);
524
525 if (i->getIndirect(0, 0)) {
526 assert(!i->getIndirect(1, 0));
527 setAReg16(i, 0);
528 } else {
529 setAReg16(i, 1);
530 }
531 }
532
533 // default short form (rr, ar, rc, gr)
534 void
535 CodeEmitterNV50::emitForm_MUL(const Instruction *i)
536 {
537 assert(i->encSize == 4 && !(code[0] & 1));
538 assert(i->defExists(0));
539 assert(!i->getPredicate());
540
541 setDst(i, 0);
542
543 setSrcFileBits(i, NV50_OP_ENC_SHORT);
544 setSrc(i, 0, 0);
545 setSrc(i, 1, 1);
546 }
547
548 // usual immediate form
549 // - 1 to 3 sources where last is immediate (rir, gir)
550 // - no address or predicate possible
551 void
552 CodeEmitterNV50::emitForm_IMM(const Instruction *i)
553 {
554 assert(i->encSize == 8);
555 code[0] |= 1;
556
557 assert(i->defExists(0) && i->srcExists(0));
558
559 setDst(i, 0);
560
561 setSrcFileBits(i, NV50_OP_ENC_IMM);
562 if (Target::operationSrcNr[i->op] > 1) {
563 setSrc(i, 0, 0);
564 setImmediate(i, 1);
565 setSrc(i, 2, 1);
566 } else {
567 setImmediate(i, 0);
568 }
569 }
570
571 void
572 CodeEmitterNV50::emitLoadStoreSizeLG(DataType ty, int pos)
573 {
574 uint8_t enc;
575
576 switch (ty) {
577 case TYPE_F32: // fall through
578 case TYPE_S32: // fall through
579 case TYPE_U32: enc = 0x6; break;
580 case TYPE_B128: enc = 0x5; break;
581 case TYPE_F64: // fall through
582 case TYPE_S64: // fall through
583 case TYPE_U64: enc = 0x4; break;
584 case TYPE_S16: enc = 0x3; break;
585 case TYPE_U16: enc = 0x2; break;
586 case TYPE_S8: enc = 0x1; break;
587 case TYPE_U8: enc = 0x0; break;
588 default:
589 enc = 0;
590 assert(!"invalid load/store type");
591 break;
592 }
593 code[pos / 32] |= enc << (pos % 32);
594 }
595
596 void
597 CodeEmitterNV50::emitLoadStoreSizeCS(DataType ty)
598 {
599 switch (ty) {
600 case TYPE_U8: break;
601 case TYPE_U16: code[1] |= 0x4000; break;
602 case TYPE_S16: code[1] |= 0x8000; break;
603 case TYPE_F32:
604 case TYPE_S32:
605 case TYPE_U32: code[1] |= 0xc000; break;
606 default:
607 assert(0);
608 break;
609 }
610 }
611
612 void
613 CodeEmitterNV50::emitLOAD(const Instruction *i)
614 {
615 DataFile sf = i->src(0).getFile();
616 int32_t offset = i->getSrc(0)->reg.data.offset;
617
618 switch (sf) {
619 case FILE_SHADER_INPUT:
620 if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0))
621 code[0] = 0x11800001;
622 else
623 // use 'mov' where we can
624 code[0] = i->src(0).isIndirect(0) ? 0x00000001 : 0x10000001;
625 code[1] = 0x00200000 | (i->lanes << 14);
626 if (typeSizeof(i->dType) == 4)
627 code[1] |= 0x04000000;
628 break;
629 case FILE_MEMORY_SHARED:
630 if (targ->getChipset() >= 0x84) {
631 assert(offset <= (int32_t)(0x3fff * typeSizeof(i->sType)));
632 code[0] = 0x10000001;
633 code[1] = 0x40000000;
634
635 if (typeSizeof(i->dType) == 4)
636 code[1] |= 0x04000000;
637
638 emitLoadStoreSizeCS(i->sType);
639 } else {
640 assert(offset <= (int32_t)(0x1f * typeSizeof(i->sType)));
641 code[0] = 0x10000001;
642 code[1] = 0x00200000 | (i->lanes << 14);
643 emitLoadStoreSizeCS(i->sType);
644 }
645 break;
646 case FILE_MEMORY_CONST:
647 code[0] = 0x10000001;
648 code[1] = 0x20000000 | (i->getSrc(0)->reg.fileIndex << 22);
649 if (typeSizeof(i->dType) == 4)
650 code[1] |= 0x04000000;
651 emitLoadStoreSizeCS(i->sType);
652 break;
653 case FILE_MEMORY_LOCAL:
654 code[0] = 0xd0000001;
655 code[1] = 0x40000000;
656 break;
657 case FILE_MEMORY_GLOBAL:
658 code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
659 code[1] = 0x80000000;
660 break;
661 default:
662 assert(!"invalid load source file");
663 break;
664 }
665 if (sf == FILE_MEMORY_LOCAL ||
666 sf == FILE_MEMORY_GLOBAL)
667 emitLoadStoreSizeLG(i->sType, 21 + 32);
668
669 setDst(i, 0);
670
671 emitFlagsRd(i);
672 emitFlagsWr(i);
673
674 if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
675 srcId(*i->src(0).getIndirect(0), 9);
676 } else {
677 setAReg16(i, 0);
678 srcAddr16(i->src(0), i->src(0).getFile() != FILE_MEMORY_LOCAL, 9);
679 }
680 }
681
682 void
683 CodeEmitterNV50::emitSTORE(const Instruction *i)
684 {
685 DataFile f = i->getSrc(0)->reg.file;
686 int32_t offset = i->getSrc(0)->reg.data.offset;
687
688 switch (f) {
689 case FILE_SHADER_OUTPUT:
690 code[0] = 0x00000001 | ((offset >> 2) << 9);
691 code[1] = 0x80c00000;
692 srcId(i->src(1), 32 + 14);
693 break;
694 case FILE_MEMORY_GLOBAL:
695 code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
696 code[1] = 0xa0000000;
697 emitLoadStoreSizeLG(i->dType, 21 + 32);
698 srcId(i->src(1), 2);
699 break;
700 case FILE_MEMORY_LOCAL:
701 code[0] = 0xd0000001;
702 code[1] = 0x60000000;
703 emitLoadStoreSizeLG(i->dType, 21 + 32);
704 srcId(i->src(1), 2);
705 break;
706 case FILE_MEMORY_SHARED:
707 code[0] = 0x00000001;
708 code[1] = 0xe0000000;
709 switch (typeSizeof(i->dType)) {
710 case 1:
711 code[0] |= offset << 9;
712 code[1] |= 0x00400000;
713 break;
714 case 2:
715 code[0] |= (offset >> 1) << 9;
716 break;
717 case 4:
718 code[0] |= (offset >> 2) << 9;
719 code[1] |= 0x04200000;
720 break;
721 default:
722 assert(0);
723 break;
724 }
725 srcId(i->src(1), 32 + 14);
726 break;
727 default:
728 assert(!"invalid store destination file");
729 break;
730 }
731
732 if (f == FILE_MEMORY_GLOBAL)
733 srcId(*i->src(0).getIndirect(0), 9);
734 else
735 setAReg16(i, 0);
736
737 if (f == FILE_MEMORY_LOCAL)
738 srcAddr16(i->src(0), false, 9);
739
740 emitFlagsRd(i);
741 }
742
743 void
744 CodeEmitterNV50::emitMOV(const Instruction *i)
745 {
746 DataFile sf = i->getSrc(0)->reg.file;
747 DataFile df = i->getDef(0)->reg.file;
748
749 assert(sf == FILE_GPR || df == FILE_GPR);
750
751 if (sf == FILE_FLAGS) {
752 code[0] = 0x00000001;
753 code[1] = 0x20000000;
754 defId(i->def(0), 2);
755 srcId(i->src(0), 12);
756 emitFlagsRd(i);
757 } else
758 if (sf == FILE_ADDRESS) {
759 code[0] = 0x00000001;
760 code[1] = 0x40000000;
761 defId(i->def(0), 2);
762 setARegBits(SDATA(i->src(0)).id + 1);
763 emitFlagsRd(i);
764 } else
765 if (df == FILE_FLAGS) {
766 code[0] = 0x00000001;
767 code[1] = 0xa0000000;
768 defId(i->def(0), 4);
769 srcId(i->src(0), 9);
770 emitFlagsRd(i);
771 } else
772 if (sf == FILE_IMMEDIATE) {
773 code[0] = 0x10008001;
774 code[1] = 0x00000003;
775 emitForm_IMM(i);
776 } else {
777 if (i->encSize == 4) {
778 code[0] = 0x10008000;
779 } else {
780 code[0] = 0x10000001;
781 code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
782 code[1] |= (i->lanes << 14);
783 emitFlagsRd(i);
784 }
785 defId(i->def(0), 2);
786 srcId(i->src(0), 9);
787 }
788 if (df == FILE_SHADER_OUTPUT) {
789 assert(i->encSize == 8);
790 code[1] |= 0x8;
791 }
792 }
793
794 static inline uint8_t getSRegEncoding(const ValueRef &ref)
795 {
796 switch (SDATA(ref).sv.sv) {
797 case SV_PHYSID: return 0;
798 case SV_CLOCK: return 1;
799 case SV_VERTEX_STRIDE: return 3;
800 // case SV_PM_COUNTER: return 4 + SDATA(ref).sv.index;
801 case SV_SAMPLE_INDEX: return 8;
802 default:
803 assert(!"no sreg for system value");
804 return 0;
805 }
806 }
807
808 void
809 CodeEmitterNV50::emitRDSV(const Instruction *i)
810 {
811 code[0] = 0x00000001;
812 code[1] = 0x60000000 | (getSRegEncoding(i->src(0)) << 14);
813 defId(i->def(0), 2);
814 emitFlagsRd(i);
815 }
816
817 void
818 CodeEmitterNV50::emitNOP()
819 {
820 code[0] = 0xf0000001;
821 code[1] = 0xe0000000;
822 }
823
824 void
825 CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp)
826 {
827 code[0] = 0xc0000000 | (lane << 16);
828 code[1] = 0x80000000;
829
830 code[0] |= (quOp & 0x03) << 20;
831 code[1] |= (quOp & 0xfc) << 20;
832
833 emitForm_ADD(i);
834
835 if (!i->srcExists(1))
836 srcId(i->src(0), 32 + 14);
837 }
838
839 /* NOTE: This returns the base address of a vertex inside the primitive.
840 * src0 is an immediate, the index (not offset) of the vertex
841 * inside the primitive. XXX: signed or unsigned ?
842 * src1 (may be NULL) should use whatever units the hardware requires
843 * (on nv50 this is bytes, so, relative index * 4; signed 16 bit value).
844 */
845 void
846 CodeEmitterNV50::emitPFETCH(const Instruction *i)
847 {
848 const uint32_t prim = i->src(0).get()->reg.data.u32;
849 assert(prim <= 127);
850
851 if (i->def(0).getFile() == FILE_ADDRESS) {
852 // shl $aX a[] 0
853 code[0] = 0x00000001 | ((DDATA(i->def(0)).id + 1) << 2);
854 code[1] = 0xc0200000;
855 code[0] |= prim << 9;
856 assert(!i->srcExists(1));
857 } else
858 if (i->srcExists(1)) {
859 // ld b32 $rX a[$aX+base]
860 code[0] = 0x00000001;
861 code[1] = 0x04200000 | (0xf << 14);
862 defId(i->def(0), 2);
863 code[0] |= prim << 9;
864 setARegBits(SDATA(i->src(1)).id + 1);
865 } else {
866 // mov b32 $rX a[]
867 code[0] = 0x10000001;
868 code[1] = 0x04200000 | (0xf << 14);
869 defId(i->def(0), 2);
870 code[0] |= prim << 9;
871 }
872 emitFlagsRd(i);
873 }
874
875 void
876 CodeEmitterNV50::emitINTERP(const Instruction *i)
877 {
878 code[0] = 0x80000000;
879
880 defId(i->def(0), 2);
881 srcAddr8(i->src(0), 16);
882
883 if (i->getInterpMode() == NV50_IR_INTERP_FLAT) {
884 code[0] |= 1 << 8;
885 } else {
886 if (i->op == OP_PINTERP) {
887 code[0] |= 1 << 25;
888 srcId(i->src(1), 9);
889 }
890 if (i->getSampleMode() == NV50_IR_INTERP_CENTROID)
891 code[0] |= 1 << 24;
892 }
893
894 if (i->encSize == 8) {
895 code[1] =
896 (code[0] & (3 << 24)) >> (24 - 16) |
897 (code[0] & (1 << 8)) << (18 - 8);
898 code[0] &= ~0x03000100;
899 code[0] |= 1;
900 emitFlagsRd(i);
901 }
902 }
903
904 void
905 CodeEmitterNV50::emitMINMAX(const Instruction *i)
906 {
907 if (i->dType == TYPE_F64) {
908 code[0] = 0xe0000000;
909 code[1] = (i->op == OP_MIN) ? 0xa0000000 : 0xc0000000;
910 } else {
911 code[0] = 0x30000000;
912 code[1] = 0x80000000;
913 if (i->op == OP_MIN)
914 code[1] |= 0x20000000;
915
916 switch (i->dType) {
917 case TYPE_F32: code[0] |= 0x80000000; break;
918 case TYPE_S32: code[1] |= 0x8c000000; break;
919 case TYPE_U32: code[1] |= 0x84000000; break;
920 case TYPE_S16: code[1] |= 0x80000000; break;
921 case TYPE_U16: break;
922 default:
923 assert(0);
924 break;
925 }
926 code[1] |= i->src(0).mod.abs() << 20;
927 code[1] |= i->src(0).mod.neg() << 26;
928 code[1] |= i->src(1).mod.abs() << 19;
929 code[1] |= i->src(1).mod.neg() << 27;
930 }
931 emitForm_MAD(i);
932 }
933
934 void
935 CodeEmitterNV50::emitFMAD(const Instruction *i)
936 {
937 const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
938 const int neg_add = i->src(2).mod.neg();
939
940 code[0] = 0xe0000000;
941
942 if (i->encSize == 4) {
943 emitForm_MUL(i);
944 assert(!neg_mul && !neg_add);
945 } else {
946 code[1] = neg_mul << 26;
947 code[1] |= neg_add << 27;
948 if (i->saturate)
949 code[1] |= 1 << 29;
950 emitForm_MAD(i);
951 }
952 }
953
954 void
955 CodeEmitterNV50::emitFADD(const Instruction *i)
956 {
957 const int neg0 = i->src(0).mod.neg();
958 const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
959
960 code[0] = 0xb0000000;
961
962 assert(!(i->src(0).mod | i->src(1).mod).abs());
963
964 if (i->src(1).getFile() == FILE_IMMEDIATE) {
965 code[1] = 0;
966 emitForm_IMM(i);
967 code[0] |= neg0 << 15;
968 code[0] |= neg1 << 22;
969 if (i->saturate)
970 code[0] |= 1 << 8;
971 } else
972 if (i->encSize == 8) {
973 code[1] = 0;
974 emitForm_ADD(i);
975 code[1] |= neg0 << 26;
976 code[1] |= neg1 << 27;
977 if (i->saturate)
978 code[1] |= 1 << 29;
979 } else {
980 emitForm_MUL(i);
981 code[0] |= neg0 << 15;
982 code[0] |= neg1 << 22;
983 if (i->saturate)
984 code[0] |= 1 << 8;
985 }
986 }
987
988 void
989 CodeEmitterNV50::emitUADD(const Instruction *i)
990 {
991 const int neg0 = i->src(0).mod.neg();
992 const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
993
994 code[0] = 0x20008000;
995
996 if (i->src(1).getFile() == FILE_IMMEDIATE) {
997 code[1] = 0;
998 emitForm_IMM(i);
999 } else
1000 if (i->encSize == 8) {
1001 code[0] = 0x20000000;
1002 code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
1003 emitForm_ADD(i);
1004 } else {
1005 emitForm_MUL(i);
1006 }
1007 assert(!(neg0 && neg1));
1008 code[0] |= neg0 << 28;
1009 code[0] |= neg1 << 22;
1010
1011 if (i->flagsSrc >= 0) {
1012 // addc == sub | subr
1013 assert(!(code[0] & 0x10400000) && !i->getPredicate());
1014 code[0] |= 0x10400000;
1015 srcId(i->src(i->flagsSrc), 32 + 12);
1016 }
1017 }
1018
1019 void
1020 CodeEmitterNV50::emitAADD(const Instruction *i)
1021 {
1022 const int s = (i->op == OP_MOV) ? 0 : 1;
1023
1024 code[0] = 0xd0000001 | (i->getSrc(s)->reg.data.u16 << 9);
1025 code[1] = 0x20000000;
1026
1027 code[0] |= (DDATA(i->def(0)).id + 1) << 2;
1028
1029 emitFlagsRd(i);
1030
1031 if (s && i->srcExists(0))
1032 setARegBits(SDATA(i->src(0)).id + 1);
1033 }
1034
1035 void
1036 CodeEmitterNV50::emitIMUL(const Instruction *i)
1037 {
1038 code[0] = 0x40000000;
1039
1040 if (i->encSize == 8) {
1041 code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000;
1042 emitForm_MAD(i);
1043 } else {
1044 if (i->sType == TYPE_S16)
1045 code[0] |= 0x8100;
1046 emitForm_MUL(i);
1047 }
1048 }
1049
1050 void
1051 CodeEmitterNV50::emitFMUL(const Instruction *i)
1052 {
1053 const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
1054
1055 code[0] = 0xc0000000;
1056
1057 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1058 code[1] = 0;
1059 emitForm_IMM(i);
1060 if (neg)
1061 code[0] |= 0x8000;
1062 if (i->saturate)
1063 code[0] |= 1 << 8;
1064 } else
1065 if (i->encSize == 8) {
1066 code[1] = i->rnd == ROUND_Z ? 0x0000c000 : 0;
1067 if (neg)
1068 code[1] |= 0x08000000;
1069 if (i->saturate)
1070 code[1] |= 1 << 20;
1071 emitForm_MAD(i);
1072 } else {
1073 emitForm_MUL(i);
1074 if (neg)
1075 code[0] |= 0x8000;
1076 if (i->saturate)
1077 code[0] |= 1 << 8;
1078 }
1079 }
1080
1081 void
1082 CodeEmitterNV50::emitIMAD(const Instruction *i)
1083 {
1084 code[0] = 0x60000000;
1085 if (isSignedType(i->sType))
1086 code[1] = i->saturate ? 0x40000000 : 0x20000000;
1087 else
1088 code[1] = 0x00000000;
1089
1090 int neg1 = i->src(0).mod.neg() ^ i->src(1).mod.neg();
1091 int neg2 = i->src(2).mod.neg();
1092
1093 assert(!(neg1 & neg2));
1094 code[1] |= neg1 << 27;
1095 code[1] |= neg2 << 26;
1096
1097 emitForm_MAD(i);
1098
1099 if (i->flagsSrc >= 0) {
1100 // add with carry from $cX
1101 assert(!(code[1] & 0x0c000000) && !i->getPredicate());
1102 code[1] |= 0xc << 24;
1103 srcId(i->src(i->flagsSrc), 32 + 12);
1104 }
1105 }
1106
1107 void
1108 CodeEmitterNV50::emitISAD(const Instruction *i)
1109 {
1110 if (i->encSize == 8) {
1111 code[0] = 0x50000000;
1112 switch (i->sType) {
1113 case TYPE_U32: code[1] = 0x04000000; break;
1114 case TYPE_S32: code[1] = 0x0c000000; break;
1115 case TYPE_U16: code[1] = 0x00000000; break;
1116 case TYPE_S16: code[1] = 0x08000000; break;
1117 default:
1118 assert(0);
1119 break;
1120 }
1121 emitForm_MAD(i);
1122 } else {
1123 switch (i->sType) {
1124 case TYPE_U32: code[0] = 0x50008000; break;
1125 case TYPE_S32: code[0] = 0x50008100; break;
1126 case TYPE_U16: code[0] = 0x50000000; break;
1127 case TYPE_S16: code[0] = 0x50000100; break;
1128 default:
1129 assert(0);
1130 break;
1131 }
1132 emitForm_MUL(i);
1133 }
1134 }
1135
1136 void
1137 CodeEmitterNV50::emitSET(const Instruction *i)
1138 {
1139 code[0] = 0x30000000;
1140 code[1] = 0x60000000;
1141
1142 emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14);
1143
1144 switch (i->sType) {
1145 case TYPE_F32: code[0] |= 0x80000000; break;
1146 case TYPE_S32: code[1] |= 0x0c000000; break;
1147 case TYPE_U32: code[1] |= 0x04000000; break;
1148 case TYPE_S16: code[1] |= 0x08000000; break;
1149 case TYPE_U16: break;
1150 default:
1151 assert(0);
1152 break;
1153 }
1154 if (i->src(0).mod.neg()) code[1] |= 0x04000000;
1155 if (i->src(1).mod.neg()) code[1] |= 0x08000000;
1156 if (i->src(0).mod.abs()) code[1] |= 0x00100000;
1157 if (i->src(1).mod.abs()) code[1] |= 0x00080000;
1158
1159 emitForm_MAD(i);
1160 }
1161
1162 void
1163 CodeEmitterNV50::roundMode_CVT(RoundMode rnd)
1164 {
1165 switch (rnd) {
1166 case ROUND_NI: code[1] |= 0x08000000; break;
1167 case ROUND_M: code[1] |= 0x00020000; break;
1168 case ROUND_MI: code[1] |= 0x08020000; break;
1169 case ROUND_P: code[1] |= 0x00040000; break;
1170 case ROUND_PI: code[1] |= 0x08040000; break;
1171 case ROUND_Z: code[1] |= 0x00060000; break;
1172 case ROUND_ZI: code[1] |= 0x08060000; break;
1173 default:
1174 assert(rnd == ROUND_N);
1175 break;
1176 }
1177 }
1178
1179 void
1180 CodeEmitterNV50::emitCVT(const Instruction *i)
1181 {
1182 const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
1183 RoundMode rnd;
1184 DataType dType;
1185
1186 switch (i->op) {
1187 case OP_CEIL: rnd = f2f ? ROUND_PI : ROUND_P; break;
1188 case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;
1189 case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;
1190 default:
1191 rnd = i->rnd;
1192 break;
1193 }
1194
1195 if (i->op == OP_NEG && i->dType == TYPE_U32)
1196 dType = TYPE_S32;
1197 else
1198 dType = i->dType;
1199
1200 code[0] = 0xa0000000;
1201
1202 switch (dType) {
1203 case TYPE_F64:
1204 switch (i->sType) {
1205 case TYPE_F64: code[1] = 0xc4404000; break;
1206 case TYPE_S64: code[1] = 0x44414000; break;
1207 case TYPE_U64: code[1] = 0x44404000; break;
1208 case TYPE_F32: code[1] = 0xc4400000; break;
1209 case TYPE_S32: code[1] = 0x44410000; break;
1210 case TYPE_U32: code[1] = 0x44400000; break;
1211 default:
1212 assert(0);
1213 break;
1214 }
1215 break;
1216 case TYPE_S64:
1217 switch (i->sType) {
1218 case TYPE_F64: code[1] = 0x8c404000; break;
1219 case TYPE_F32: code[1] = 0x8c400000; break;
1220 default:
1221 assert(0);
1222 break;
1223 }
1224 break;
1225 case TYPE_U64:
1226 switch (i->sType) {
1227 case TYPE_F64: code[1] = 0x84404000; break;
1228 case TYPE_F32: code[1] = 0x84400000; break;
1229 default:
1230 assert(0);
1231 break;
1232 }
1233 break;
1234 case TYPE_F32:
1235 switch (i->sType) {
1236 case TYPE_F64: code[1] = 0xc0404000; break;
1237 case TYPE_S64: code[1] = 0x40414000; break;
1238 case TYPE_U64: code[1] = 0x40404000; break;
1239 case TYPE_F32: code[1] = 0xc4004000; break;
1240 case TYPE_S32: code[1] = 0x44014000; break;
1241 case TYPE_U32: code[1] = 0x44004000; break;
1242 case TYPE_F16: code[1] = 0xc4000000; break;
1243 case TYPE_U16: code[1] = 0x44000000; break;
1244 default:
1245 assert(0);
1246 break;
1247 }
1248 break;
1249 case TYPE_S32:
1250 switch (i->sType) {
1251 case TYPE_F64: code[1] = 0x88404000; break;
1252 case TYPE_F32: code[1] = 0x8c004000; break;
1253 case TYPE_S32: code[1] = 0x0c014000; break;
1254 case TYPE_U32: code[1] = 0x0c004000; break;
1255 case TYPE_F16: code[1] = 0x8c000000; break;
1256 case TYPE_S16: code[1] = 0x0c010000; break;
1257 case TYPE_U16: code[1] = 0x0c000000; break;
1258 case TYPE_S8: code[1] = 0x0c018000; break;
1259 case TYPE_U8: code[1] = 0x0c008000; break;
1260 default:
1261 assert(0);
1262 break;
1263 }
1264 break;
1265 case TYPE_U32:
1266 switch (i->sType) {
1267 case TYPE_F64: code[1] = 0x80404000; break;
1268 case TYPE_F32: code[1] = 0x84004000; break;
1269 case TYPE_S32: code[1] = 0x04014000; break;
1270 case TYPE_U32: code[1] = 0x04004000; break;
1271 case TYPE_F16: code[1] = 0x84000000; break;
1272 case TYPE_S16: code[1] = 0x04010000; break;
1273 case TYPE_U16: code[1] = 0x04000000; break;
1274 case TYPE_S8: code[1] = 0x04018000; break;
1275 case TYPE_U8: code[1] = 0x04008000; break;
1276 default:
1277 assert(0);
1278 break;
1279 }
1280 break;
1281 case TYPE_S16:
1282 case TYPE_U16:
1283 case TYPE_S8:
1284 case TYPE_U8:
1285 default:
1286 assert(0);
1287 break;
1288 }
1289 if (typeSizeof(i->sType) == 1 && i->getSrc(0)->reg.size == 4)
1290 code[1] |= 0x00004000;
1291
1292 roundMode_CVT(rnd);
1293
1294 switch (i->op) {
1295 case OP_ABS: code[1] |= 1 << 20; break;
1296 case OP_SAT: code[1] |= 1 << 19; break;
1297 case OP_NEG: code[1] |= 1 << 29; break;
1298 default:
1299 break;
1300 }
1301 code[1] ^= i->src(0).mod.neg() << 29;
1302 code[1] |= i->src(0).mod.abs() << 20;
1303 if (i->saturate)
1304 code[1] |= 1 << 19;
1305
1306 assert(i->op != OP_ABS || !i->src(0).mod.neg());
1307
1308 emitForm_MAD(i);
1309 }
1310
1311 void
1312 CodeEmitterNV50::emitPreOp(const Instruction *i)
1313 {
1314 code[0] = 0xb0000000;
1315 code[1] = (i->op == OP_PREEX2) ? 0xc0004000 : 0xc0000000;
1316
1317 code[1] |= i->src(0).mod.abs() << 20;
1318 code[1] |= i->src(0).mod.neg() << 26;
1319
1320 emitForm_MAD(i);
1321 }
1322
1323 void
1324 CodeEmitterNV50::emitSFnOp(const Instruction *i, uint8_t subOp)
1325 {
1326 code[0] = 0x90000000;
1327
1328 if (i->encSize == 4) {
1329 assert(i->op == OP_RCP);
1330 code[0] |= i->src(0).mod.abs() << 15;
1331 code[0] |= i->src(0).mod.neg() << 22;
1332 emitForm_MUL(i);
1333 } else {
1334 code[1] = subOp << 29;
1335 code[1] |= i->src(0).mod.abs() << 20;
1336 code[1] |= i->src(0).mod.neg() << 26;
1337 emitForm_MAD(i);
1338 }
1339 }
1340
1341 void
1342 CodeEmitterNV50::emitNOT(const Instruction *i)
1343 {
1344 code[0] = 0xd0000000;
1345 code[1] = 0x0002c000;
1346
1347 switch (i->sType) {
1348 case TYPE_U32:
1349 case TYPE_S32:
1350 code[1] |= 0x04000000;
1351 break;
1352 default:
1353 break;
1354 }
1355 emitForm_MAD(i);
1356 setSrc(i, 0, 1);
1357 }
1358
1359 void
1360 CodeEmitterNV50::emitLogicOp(const Instruction *i)
1361 {
1362 code[0] = 0xd0000000;
1363 code[1] = 0;
1364
1365 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1366 switch (i->op) {
1367 case OP_OR: code[0] |= 0x0100; break;
1368 case OP_XOR: code[0] |= 0x8000; break;
1369 default:
1370 assert(i->op == OP_AND);
1371 break;
1372 }
1373 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
1374 code[0] |= 1 << 22;
1375
1376 emitForm_IMM(i);
1377 } else {
1378 switch (i->op) {
1379 case OP_AND: code[1] = 0x04000000; break;
1380 case OP_OR: code[1] = 0x04004000; break;
1381 case OP_XOR: code[1] = 0x04008000; break;
1382 default:
1383 assert(0);
1384 break;
1385 }
1386 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
1387 code[1] |= 1 << 16;
1388 if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT))
1389 code[1] |= 1 << 17;
1390
1391 emitForm_MAD(i);
1392 }
1393 }
1394
1395 void
1396 CodeEmitterNV50::emitARL(const Instruction *i, unsigned int shl)
1397 {
1398 code[0] = 0x00000001 | (shl << 16);
1399 code[1] = 0xc0000000;
1400
1401 code[0] |= (DDATA(i->def(0)).id + 1) << 2;
1402
1403 setSrcFileBits(i, NV50_OP_ENC_IMM);
1404 setSrc(i, 0, 0);
1405 emitFlagsRd(i);
1406 }
1407
1408 void
1409 CodeEmitterNV50::emitShift(const Instruction *i)
1410 {
1411 if (i->def(0).getFile() == FILE_ADDRESS) {
1412 assert(i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE);
1413 emitARL(i, i->getSrc(1)->reg.data.u32 & 0x3f);
1414 } else {
1415 code[0] = 0x30000001;
1416 code[1] = (i->op == OP_SHR) ? 0xe4000000 : 0xc4000000;
1417 if (i->op == OP_SHR && isSignedType(i->sType))
1418 code[1] |= 1 << 27;
1419
1420 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1421 code[1] |= 1 << 20;
1422 code[0] |= (i->getSrc(1)->reg.data.u32 & 0x7f) << 16;
1423 defId(i->def(0), 2);
1424 srcId(i->src(0), 9);
1425 emitFlagsRd(i);
1426 } else {
1427 emitForm_MAD(i);
1428 }
1429 }
1430 }
1431
1432 void
1433 CodeEmitterNV50::emitOUT(const Instruction *i)
1434 {
1435 code[0] = (i->op == OP_EMIT) ? 0xf0000201 : 0xf0000401;
1436 code[1] = 0xc0000000;
1437
1438 emitFlagsRd(i);
1439 }
1440
1441 void
1442 CodeEmitterNV50::emitTEX(const TexInstruction *i)
1443 {
1444 code[0] = 0xf0000001;
1445 code[1] = 0x00000000;
1446
1447 switch (i->op) {
1448 case OP_TXB:
1449 code[1] = 0x20000000;
1450 break;
1451 case OP_TXL:
1452 code[1] = 0x40000000;
1453 break;
1454 case OP_TXF:
1455 code[0] |= 0x01000000;
1456 break;
1457 case OP_TXG:
1458 code[0] |= 0x01000000;
1459 code[1] = 0x80000000;
1460 break;
1461 case OP_TXLQ:
1462 code[1] = 0x60020000;
1463 break;
1464 default:
1465 assert(i->op == OP_TEX);
1466 break;
1467 }
1468
1469 code[0] |= i->tex.r << 9;
1470 code[0] |= i->tex.s << 17;
1471
1472 int argc = i->tex.target.getArgCount();
1473
1474 if (i->op == OP_TXB || i->op == OP_TXL || i->op == OP_TXF)
1475 argc += 1;
1476 if (i->tex.target.isShadow())
1477 argc += 1;
1478 assert(argc <= 4);
1479
1480 code[0] |= (argc - 1) << 22;
1481
1482 if (i->tex.target.isCube()) {
1483 code[0] |= 0x08000000;
1484 } else
1485 if (i->tex.useOffsets) {
1486 code[1] |= (i->tex.offset[0] & 0xf) << 24;
1487 code[1] |= (i->tex.offset[1] & 0xf) << 20;
1488 code[1] |= (i->tex.offset[2] & 0xf) << 16;
1489 }
1490
1491 code[0] |= (i->tex.mask & 0x3) << 25;
1492 code[1] |= (i->tex.mask & 0xc) << 12;
1493
1494 if (i->tex.liveOnly)
1495 code[1] |= 4;
1496
1497 defId(i->def(0), 2);
1498
1499 emitFlagsRd(i);
1500 }
1501
1502 void
1503 CodeEmitterNV50::emitTXQ(const TexInstruction *i)
1504 {
1505 assert(i->tex.query == TXQ_DIMS);
1506
1507 code[0] = 0xf0000001;
1508 code[1] = 0x60000000;
1509
1510 code[0] |= i->tex.r << 9;
1511 code[0] |= i->tex.s << 17;
1512
1513 code[0] |= (i->tex.mask & 0x3) << 25;
1514 code[1] |= (i->tex.mask & 0xc) << 12;
1515
1516 defId(i->def(0), 2);
1517
1518 emitFlagsRd(i);
1519 }
1520
1521 void
1522 CodeEmitterNV50::emitTEXPREP(const TexInstruction *i)
1523 {
1524 code[0] = 0xf8000001 | (3 << 22) | (i->tex.s << 17) | (i->tex.r << 9);
1525 code[1] = 0x60010000;
1526
1527 code[0] |= (i->tex.mask & 0x3) << 25;
1528 code[1] |= (i->tex.mask & 0xc) << 12;
1529 defId(i->def(0), 2);
1530
1531 emitFlagsRd(i);
1532 }
1533
1534 void
1535 CodeEmitterNV50::emitPRERETEmu(const FlowInstruction *i)
1536 {
1537 uint32_t pos = i->target.bb->binPos + 8; // +8 to skip an op */
1538
1539 code[0] = 0x10000003; // bra
1540 code[1] = 0x00000780; // always
1541
1542 switch (i->subOp) {
1543 case NV50_IR_SUBOP_EMU_PRERET + 0: // bra to the call
1544 break;
1545 case NV50_IR_SUBOP_EMU_PRERET + 1: // bra to skip the call
1546 pos += 8;
1547 break;
1548 default:
1549 assert(i->subOp == (NV50_IR_SUBOP_EMU_PRERET + 2));
1550 code[0] = 0x20000003; // call
1551 code[1] = 0x00000000; // no predicate
1552 break;
1553 }
1554 addReloc(RelocEntry::TYPE_CODE, 0, pos, 0x07fff800, 9);
1555 addReloc(RelocEntry::TYPE_CODE, 1, pos, 0x000fc000, -4);
1556 }
1557
1558 void
1559 CodeEmitterNV50::emitFlow(const Instruction *i, uint8_t flowOp)
1560 {
1561 const FlowInstruction *f = i->asFlow();
1562 bool hasPred = false;
1563 bool hasTarg = false;
1564
1565 code[0] = 0x00000003 | (flowOp << 28);
1566 code[1] = 0x00000000;
1567
1568 switch (i->op) {
1569 case OP_BRA:
1570 hasPred = true;
1571 hasTarg = true;
1572 break;
1573 case OP_BREAK:
1574 case OP_BRKPT:
1575 case OP_DISCARD:
1576 case OP_RET:
1577 hasPred = true;
1578 break;
1579 case OP_CALL:
1580 case OP_PREBREAK:
1581 case OP_JOINAT:
1582 hasTarg = true;
1583 break;
1584 case OP_PRERET:
1585 hasTarg = true;
1586 if (i->subOp >= NV50_IR_SUBOP_EMU_PRERET) {
1587 emitPRERETEmu(f);
1588 return;
1589 }
1590 break;
1591 default:
1592 break;
1593 }
1594
1595 if (hasPred)
1596 emitFlagsRd(i);
1597
1598 if (hasTarg && f) {
1599 uint32_t pos;
1600
1601 if (f->op == OP_CALL) {
1602 if (f->builtin) {
1603 pos = targNV50->getBuiltinOffset(f->target.builtin);
1604 } else {
1605 pos = f->target.fn->binPos;
1606 }
1607 } else {
1608 pos = f->target.bb->binPos;
1609 }
1610
1611 code[0] |= ((pos >> 2) & 0xffff) << 11;
1612 code[1] |= ((pos >> 18) & 0x003f) << 14;
1613
1614 RelocEntry::Type relocTy;
1615
1616 relocTy = f->builtin ? RelocEntry::TYPE_BUILTIN : RelocEntry::TYPE_CODE;
1617
1618 addReloc(relocTy, 0, pos, 0x07fff800, 9);
1619 addReloc(relocTy, 1, pos, 0x000fc000, -4);
1620 }
1621 }
1622
1623 void
1624 CodeEmitterNV50::emitBAR(const Instruction *i)
1625 {
1626 ImmediateValue *barId = i->getSrc(0)->asImm();
1627 assert(barId);
1628
1629 code[0] = 0x82000003 | (barId->reg.data.u32 << 21);
1630 code[1] = 0x00004000;
1631
1632 if (i->subOp == NV50_IR_SUBOP_BAR_SYNC)
1633 code[0] |= 1 << 26;
1634 }
1635
1636 void
1637 CodeEmitterNV50::emitATOM(const Instruction *i)
1638 {
1639 uint8_t subOp;
1640 switch (i->subOp) {
1641 case NV50_IR_SUBOP_ATOM_ADD: subOp = 0x0; break;
1642 case NV50_IR_SUBOP_ATOM_MIN: subOp = 0x7; break;
1643 case NV50_IR_SUBOP_ATOM_MAX: subOp = 0x6; break;
1644 case NV50_IR_SUBOP_ATOM_INC: subOp = 0x4; break;
1645 case NV50_IR_SUBOP_ATOM_DEC: subOp = 0x5; break;
1646 case NV50_IR_SUBOP_ATOM_AND: subOp = 0xa; break;
1647 case NV50_IR_SUBOP_ATOM_OR: subOp = 0xb; break;
1648 case NV50_IR_SUBOP_ATOM_XOR: subOp = 0xc; break;
1649 case NV50_IR_SUBOP_ATOM_CAS: subOp = 0x2; break;
1650 case NV50_IR_SUBOP_ATOM_EXCH: subOp = 0x1; break;
1651 default:
1652 assert(!"invalid subop");
1653 return;
1654 }
1655 code[0] = 0xd0000001;
1656 code[1] = 0xe0c00000 | (subOp << 2);
1657 if (isSignedType(i->dType))
1658 code[1] |= 1 << 21;
1659
1660 // args
1661 emitFlagsRd(i);
1662 setDst(i, 0);
1663 setSrc(i, 1, 1);
1664 if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
1665 setSrc(i, 2, 2);
1666
1667 // g[] pointer
1668 code[0] |= i->getSrc(0)->reg.fileIndex << 23;
1669 srcId(i->getIndirect(0, 0), 9);
1670 }
1671
1672 bool
1673 CodeEmitterNV50::emitInstruction(Instruction *insn)
1674 {
1675 if (!insn->encSize) {
1676 ERROR("skipping unencodable instruction: "); insn->print();
1677 return false;
1678 } else
1679 if (codeSize + insn->encSize > codeSizeLimit) {
1680 ERROR("code emitter output buffer too small\n");
1681 return false;
1682 }
1683
1684 if (insn->bb->getProgram()->dbgFlags & NV50_IR_DEBUG_BASIC) {
1685 INFO("EMIT: "); insn->print();
1686 }
1687
1688 switch (insn->op) {
1689 case OP_MOV:
1690 emitMOV(insn);
1691 break;
1692 case OP_EXIT:
1693 case OP_NOP:
1694 case OP_JOIN:
1695 emitNOP();
1696 break;
1697 case OP_VFETCH:
1698 case OP_LOAD:
1699 emitLOAD(insn);
1700 break;
1701 case OP_EXPORT:
1702 case OP_STORE:
1703 emitSTORE(insn);
1704 break;
1705 case OP_PFETCH:
1706 emitPFETCH(insn);
1707 break;
1708 case OP_RDSV:
1709 emitRDSV(insn);
1710 break;
1711 case OP_LINTERP:
1712 case OP_PINTERP:
1713 emitINTERP(insn);
1714 break;
1715 case OP_ADD:
1716 case OP_SUB:
1717 if (isFloatType(insn->dType))
1718 emitFADD(insn);
1719 else if (insn->getDef(0)->reg.file == FILE_ADDRESS)
1720 emitAADD(insn);
1721 else
1722 emitUADD(insn);
1723 break;
1724 case OP_MUL:
1725 if (isFloatType(insn->dType))
1726 emitFMUL(insn);
1727 else
1728 emitIMUL(insn);
1729 break;
1730 case OP_MAD:
1731 case OP_FMA:
1732 if (isFloatType(insn->dType))
1733 emitFMAD(insn);
1734 else
1735 emitIMAD(insn);
1736 break;
1737 case OP_SAD:
1738 emitISAD(insn);
1739 break;
1740 case OP_NOT:
1741 emitNOT(insn);
1742 break;
1743 case OP_AND:
1744 case OP_OR:
1745 case OP_XOR:
1746 emitLogicOp(insn);
1747 break;
1748 case OP_SHL:
1749 case OP_SHR:
1750 emitShift(insn);
1751 break;
1752 case OP_SET:
1753 emitSET(insn);
1754 break;
1755 case OP_MIN:
1756 case OP_MAX:
1757 emitMINMAX(insn);
1758 break;
1759 case OP_CEIL:
1760 case OP_FLOOR:
1761 case OP_TRUNC:
1762 case OP_ABS:
1763 case OP_NEG:
1764 case OP_SAT:
1765 emitCVT(insn);
1766 break;
1767 case OP_CVT:
1768 if (insn->def(0).getFile() == FILE_ADDRESS)
1769 emitARL(insn, 0);
1770 else
1771 if (insn->def(0).getFile() == FILE_FLAGS ||
1772 insn->src(0).getFile() == FILE_FLAGS ||
1773 insn->src(0).getFile() == FILE_ADDRESS)
1774 emitMOV(insn);
1775 else
1776 emitCVT(insn);
1777 break;
1778 case OP_RCP:
1779 emitSFnOp(insn, 0);
1780 break;
1781 case OP_RSQ:
1782 emitSFnOp(insn, 2);
1783 break;
1784 case OP_LG2:
1785 emitSFnOp(insn, 3);
1786 break;
1787 case OP_SIN:
1788 emitSFnOp(insn, 4);
1789 break;
1790 case OP_COS:
1791 emitSFnOp(insn, 5);
1792 break;
1793 case OP_EX2:
1794 emitSFnOp(insn, 6);
1795 break;
1796 case OP_PRESIN:
1797 case OP_PREEX2:
1798 emitPreOp(insn);
1799 break;
1800 case OP_TEX:
1801 case OP_TXB:
1802 case OP_TXL:
1803 case OP_TXF:
1804 case OP_TXG:
1805 case OP_TXLQ:
1806 emitTEX(insn->asTex());
1807 break;
1808 case OP_TXQ:
1809 emitTXQ(insn->asTex());
1810 break;
1811 case OP_TEXPREP:
1812 emitTEXPREP(insn->asTex());
1813 break;
1814 case OP_EMIT:
1815 case OP_RESTART:
1816 emitOUT(insn);
1817 break;
1818 case OP_DISCARD:
1819 emitFlow(insn, 0x0);
1820 break;
1821 case OP_BRA:
1822 emitFlow(insn, 0x1);
1823 break;
1824 case OP_CALL:
1825 emitFlow(insn, 0x2);
1826 break;
1827 case OP_RET:
1828 emitFlow(insn, 0x3);
1829 break;
1830 case OP_PREBREAK:
1831 emitFlow(insn, 0x4);
1832 break;
1833 case OP_BREAK:
1834 emitFlow(insn, 0x5);
1835 break;
1836 case OP_QUADON:
1837 emitFlow(insn, 0x6);
1838 break;
1839 case OP_QUADPOP:
1840 emitFlow(insn, 0x7);
1841 break;
1842 case OP_JOINAT:
1843 emitFlow(insn, 0xa);
1844 break;
1845 case OP_PRERET:
1846 emitFlow(insn, 0xd);
1847 break;
1848 case OP_QUADOP:
1849 emitQUADOP(insn, insn->lanes, insn->subOp);
1850 break;
1851 case OP_DFDX:
1852 emitQUADOP(insn, 4, insn->src(0).mod.neg() ? 0x66 : 0x99);
1853 break;
1854 case OP_DFDY:
1855 emitQUADOP(insn, 5, insn->src(0).mod.neg() ? 0x5a : 0xa5);
1856 break;
1857 case OP_ATOM:
1858 emitATOM(insn);
1859 break;
1860 case OP_BAR:
1861 emitBAR(insn);
1862 break;
1863 case OP_PHI:
1864 case OP_UNION:
1865 case OP_CONSTRAINT:
1866 ERROR("operation should have been eliminated\n");
1867 return false;
1868 case OP_EXP:
1869 case OP_LOG:
1870 case OP_SQRT:
1871 case OP_POW:
1872 case OP_SELP:
1873 case OP_SLCT:
1874 case OP_TXD:
1875 case OP_PRECONT:
1876 case OP_CONT:
1877 case OP_POPCNT:
1878 case OP_INSBF:
1879 case OP_EXTBF:
1880 ERROR("operation should have been lowered\n");
1881 return false;
1882 default:
1883 ERROR("unknown op: %u\n", insn->op);
1884 return false;
1885 }
1886 if (insn->join || insn->op == OP_JOIN)
1887 code[1] |= 0x2;
1888 else
1889 if (insn->exit || insn->op == OP_EXIT)
1890 code[1] |= 0x1;
1891
1892 assert((insn->encSize == 8) == (code[0] & 1));
1893
1894 code += insn->encSize / 4;
1895 codeSize += insn->encSize;
1896 return true;
1897 }
1898
1899 uint32_t
1900 CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const
1901 {
1902 const Target::OpInfo &info = targ->getOpInfo(i);
1903
1904 if (info.minEncSize > 4)
1905 return 8;
1906
1907 // check constraints on dst and src operands
1908 for (int d = 0; i->defExists(d); ++d) {
1909 if (i->def(d).rep()->reg.data.id > 63 ||
1910 i->def(d).rep()->reg.file != FILE_GPR)
1911 return 8;
1912 }
1913
1914 for (int s = 0; i->srcExists(s); ++s) {
1915 DataFile sf = i->src(s).getFile();
1916 if (sf != FILE_GPR)
1917 if (sf != FILE_SHADER_INPUT || progType != Program::TYPE_FRAGMENT)
1918 return 8;
1919 if (i->src(s).rep()->reg.data.id > 63)
1920 return 8;
1921 }
1922
1923 // check modifiers & rounding
1924 if (i->join || i->lanes != 0xf || i->exit)
1925 return 8;
1926 if (i->op == OP_MUL && i->rnd != ROUND_N)
1927 return 8;
1928
1929 if (i->asTex())
1930 return 8; // TODO: short tex encoding
1931
1932 // check constraints on short MAD
1933 if (info.srcNr >= 2 && i->srcExists(2)) {
1934 if (i->saturate || i->src(2).mod)
1935 return 8;
1936 if ((i->src(0).mod ^ i->src(1).mod) ||
1937 (i->src(0).mod | i->src(1).mod).abs())
1938 return 8;
1939 if (!i->defExists(0) ||
1940 i->def(0).rep()->reg.data.id != i->src(2).rep()->reg.data.id)
1941 return 8;
1942 }
1943
1944 return info.minEncSize;
1945 }
1946
1947 // Change the encoding size of an instruction after BBs have been scheduled.
1948 static void
1949 makeInstructionLong(Instruction *insn)
1950 {
1951 if (insn->encSize == 8)
1952 return;
1953 Function *fn = insn->bb->getFunction();
1954 int n = 0;
1955 int adj = 4;
1956
1957 for (Instruction *i = insn->next; i && i->encSize == 4; ++n, i = i->next);
1958
1959 if (n & 1) {
1960 adj = 8;
1961 insn->next->encSize = 8;
1962 } else
1963 if (insn->prev && insn->prev->encSize == 4) {
1964 adj = 8;
1965 insn->prev->encSize = 8;
1966 }
1967 insn->encSize = 8;
1968
1969 for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) {
1970 fn->bbArray[i]->binPos += 4;
1971 }
1972 fn->binSize += adj;
1973 insn->bb->binSize += adj;
1974 }
1975
1976 static bool
1977 trySetExitModifier(Instruction *insn)
1978 {
1979 if (insn->op == OP_DISCARD ||
1980 insn->op == OP_QUADON ||
1981 insn->op == OP_QUADPOP)
1982 return false;
1983 for (int s = 0; insn->srcExists(s); ++s)
1984 if (insn->src(s).getFile() == FILE_IMMEDIATE)
1985 return false;
1986 if (insn->asFlow()) {
1987 if (insn->op == OP_CALL) // side effects !
1988 return false;
1989 if (insn->getPredicate()) // cannot do conditional exit (or can we ?)
1990 return false;
1991 insn->op = OP_EXIT;
1992 }
1993 insn->exit = 1;
1994 makeInstructionLong(insn);
1995 return true;
1996 }
1997
1998 static void
1999 replaceExitWithModifier(Function *func)
2000 {
2001 BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
2002
2003 if (!epilogue->getExit() ||
2004 epilogue->getExit()->op != OP_EXIT) // only main will use OP_EXIT
2005 return;
2006
2007 if (epilogue->getEntry()->op != OP_EXIT) {
2008 Instruction *insn = epilogue->getExit()->prev;
2009 if (!insn || !trySetExitModifier(insn))
2010 return;
2011 insn->exit = 1;
2012 } else {
2013 for (Graph::EdgeIterator ei = func->cfgExit->incident();
2014 !ei.end(); ei.next()) {
2015 BasicBlock *bb = BasicBlock::get(ei.getNode());
2016 Instruction *i = bb->getExit();
2017
2018 if (!i || !trySetExitModifier(i))
2019 return;
2020 }
2021 }
2022 epilogue->binSize -= 8;
2023 func->binSize -= 8;
2024 delete_Instruction(func->getProgram(), epilogue->getExit());
2025 }
2026
2027 void
2028 CodeEmitterNV50::prepareEmission(Function *func)
2029 {
2030 CodeEmitter::prepareEmission(func);
2031
2032 replaceExitWithModifier(func);
2033 }
2034
2035 CodeEmitterNV50::CodeEmitterNV50(const TargetNV50 *target) :
2036 CodeEmitter(target), targNV50(target)
2037 {
2038 targ = target; // specialized
2039 code = NULL;
2040 codeSize = codeSizeLimit = 0;
2041 relocInfo = NULL;
2042 }
2043
2044 CodeEmitter *
2045 TargetNV50::getCodeEmitter(Program::Type type)
2046 {
2047 CodeEmitterNV50 *emit = new CodeEmitterNV50(this);
2048 emit->setProgramType(type);
2049 return emit;
2050 }
2051
2052 } // namespace nv50_ir