nv50/ir: add saturate support on ex2
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_emit_nv50.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "codegen/nv50_ir.h"
24 #include "codegen/nv50_ir_target_nv50.h"
25
26 namespace nv50_ir {
27
28 #define NV50_OP_ENC_LONG 0
29 #define NV50_OP_ENC_SHORT 1
30 #define NV50_OP_ENC_IMM 2
31 #define NV50_OP_ENC_LONG_ALT 3
32
33 class CodeEmitterNV50 : public CodeEmitter
34 {
35 public:
36 CodeEmitterNV50(const TargetNV50 *);
37
38 virtual bool emitInstruction(Instruction *);
39
40 virtual uint32_t getMinEncodingSize(const Instruction *) const;
41
42 inline void setProgramType(Program::Type pType) { progType = pType; }
43
44 virtual void prepareEmission(Function *);
45
46 private:
47 Program::Type progType;
48
49 const TargetNV50 *targNV50;
50
51 private:
52 inline void defId(const ValueDef&, const int pos);
53 inline void srcId(const ValueRef&, const int pos);
54 inline void srcId(const ValueRef *, const int pos);
55
56 inline void srcAddr16(const ValueRef&, bool adj, const int pos);
57 inline void srcAddr8(const ValueRef&, const int pos);
58
59 void emitFlagsRd(const Instruction *);
60 void emitFlagsWr(const Instruction *);
61
62 void emitCondCode(CondCode cc, DataType ty, int pos);
63
64 inline void setARegBits(unsigned int);
65
66 void setAReg16(const Instruction *, int s);
67 void setImmediate(const Instruction *, int s);
68
69 void setDst(const Value *);
70 void setDst(const Instruction *, int d);
71 void setSrcFileBits(const Instruction *, int enc);
72 void setSrc(const Instruction *, unsigned int s, int slot);
73
74 void emitForm_MAD(const Instruction *);
75 void emitForm_ADD(const Instruction *);
76 void emitForm_MUL(const Instruction *);
77 void emitForm_IMM(const Instruction *);
78
79 void emitLoadStoreSizeLG(DataType ty, int pos);
80 void emitLoadStoreSizeCS(DataType ty);
81
82 void roundMode_MAD(const Instruction *);
83 void roundMode_CVT(RoundMode);
84
85 void emitMNeg12(const Instruction *);
86
87 void emitLOAD(const Instruction *);
88 void emitSTORE(const Instruction *);
89 void emitMOV(const Instruction *);
90 void emitRDSV(const Instruction *);
91 void emitNOP();
92 void emitINTERP(const Instruction *);
93 void emitPFETCH(const Instruction *);
94 void emitOUT(const Instruction *);
95
96 void emitUADD(const Instruction *);
97 void emitAADD(const Instruction *);
98 void emitFADD(const Instruction *);
99 void emitDADD(const Instruction *);
100 void emitIMUL(const Instruction *);
101 void emitFMUL(const Instruction *);
102 void emitDMUL(const Instruction *);
103 void emitFMAD(const Instruction *);
104 void emitDMAD(const Instruction *);
105 void emitIMAD(const Instruction *);
106 void emitISAD(const Instruction *);
107
108 void emitMINMAX(const Instruction *);
109
110 void emitPreOp(const Instruction *);
111 void emitSFnOp(const Instruction *, uint8_t subOp);
112
113 void emitShift(const Instruction *);
114 void emitARL(const Instruction *, unsigned int shl);
115 void emitLogicOp(const Instruction *);
116 void emitNOT(const Instruction *);
117
118 void emitCVT(const Instruction *);
119 void emitSET(const Instruction *);
120
121 void emitTEX(const TexInstruction *);
122 void emitTXQ(const TexInstruction *);
123 void emitTEXPREP(const TexInstruction *);
124
125 void emitQUADOP(const Instruction *, uint8_t lane, uint8_t quOp);
126
127 void emitFlow(const Instruction *, uint8_t flowOp);
128 void emitPRERETEmu(const FlowInstruction *);
129 void emitBAR(const Instruction *);
130
131 void emitATOM(const Instruction *);
132 };
133
134 #define SDATA(a) ((a).rep()->reg.data)
135 #define DDATA(a) ((a).rep()->reg.data)
136
137 void CodeEmitterNV50::srcId(const ValueRef& src, const int pos)
138 {
139 assert(src.get());
140 code[pos / 32] |= SDATA(src).id << (pos % 32);
141 }
142
143 void CodeEmitterNV50::srcId(const ValueRef *src, const int pos)
144 {
145 assert(src->get());
146 code[pos / 32] |= SDATA(*src).id << (pos % 32);
147 }
148
149 void CodeEmitterNV50::srcAddr16(const ValueRef& src, bool adj, const int pos)
150 {
151 assert(src.get());
152
153 int32_t offset = SDATA(src).offset;
154
155 assert(!adj || src.get()->reg.size <= 4);
156 if (adj)
157 offset /= src.get()->reg.size;
158
159 assert(offset <= 0x7fff && offset >= (int32_t)-0x8000 && (pos % 32) <= 16);
160
161 if (offset < 0)
162 offset &= adj ? (0xffff >> (src.get()->reg.size >> 1)) : 0xffff;
163
164 code[pos / 32] |= offset << (pos % 32);
165 }
166
167 void CodeEmitterNV50::srcAddr8(const ValueRef& src, const int pos)
168 {
169 assert(src.get());
170
171 uint32_t offset = SDATA(src).offset;
172
173 assert((offset <= 0x1fc || offset == 0x3fc) && !(offset & 0x3));
174
175 code[pos / 32] |= (offset >> 2) << (pos % 32);
176 }
177
178 void CodeEmitterNV50::defId(const ValueDef& def, const int pos)
179 {
180 assert(def.get() && def.getFile() != FILE_SHADER_OUTPUT);
181
182 code[pos / 32] |= DDATA(def).id << (pos % 32);
183 }
184
185 void
186 CodeEmitterNV50::roundMode_MAD(const Instruction *insn)
187 {
188 switch (insn->rnd) {
189 case ROUND_M: code[1] |= 1 << 22; break;
190 case ROUND_P: code[1] |= 2 << 22; break;
191 case ROUND_Z: code[1] |= 3 << 22; break;
192 default:
193 assert(insn->rnd == ROUND_N);
194 break;
195 }
196 }
197
198 void
199 CodeEmitterNV50::emitMNeg12(const Instruction *i)
200 {
201 code[1] |= i->src(0).mod.neg() << 26;
202 code[1] |= i->src(1).mod.neg() << 27;
203 }
204
205 void CodeEmitterNV50::emitCondCode(CondCode cc, DataType ty, int pos)
206 {
207 uint8_t enc;
208
209 assert(pos >= 32 || pos <= 27);
210
211 switch (cc) {
212 case CC_LT: enc = 0x1; break;
213 case CC_LTU: enc = 0x9; break;
214 case CC_EQ: enc = 0x2; break;
215 case CC_EQU: enc = 0xa; break;
216 case CC_LE: enc = 0x3; break;
217 case CC_LEU: enc = 0xb; break;
218 case CC_GT: enc = 0x4; break;
219 case CC_GTU: enc = 0xc; break;
220 case CC_NE: enc = 0x5; break;
221 case CC_NEU: enc = 0xd; break;
222 case CC_GE: enc = 0x6; break;
223 case CC_GEU: enc = 0xe; break;
224 case CC_TR: enc = 0xf; break;
225 case CC_FL: enc = 0x0; break;
226
227 case CC_O: enc = 0x10; break;
228 case CC_C: enc = 0x11; break;
229 case CC_A: enc = 0x12; break;
230 case CC_S: enc = 0x13; break;
231 case CC_NS: enc = 0x1c; break;
232 case CC_NA: enc = 0x1d; break;
233 case CC_NC: enc = 0x1e; break;
234 case CC_NO: enc = 0x1f; break;
235
236 default:
237 enc = 0;
238 assert(!"invalid condition code");
239 break;
240 }
241 if (ty != TYPE_NONE && !isFloatType(ty))
242 enc &= ~0x8; // unordered only exists for float types
243
244 code[pos / 32] |= enc << (pos % 32);
245 }
246
247 void
248 CodeEmitterNV50::emitFlagsRd(const Instruction *i)
249 {
250 int s = (i->flagsSrc >= 0) ? i->flagsSrc : i->predSrc;
251
252 assert(!(code[1] & 0x00003f80));
253
254 if (s >= 0) {
255 assert(i->getSrc(s)->reg.file == FILE_FLAGS);
256 emitCondCode(i->cc, TYPE_NONE, 32 + 7);
257 srcId(i->src(s), 32 + 12);
258 } else {
259 code[1] |= 0x0780;
260 }
261 }
262
263 void
264 CodeEmitterNV50::emitFlagsWr(const Instruction *i)
265 {
266 assert(!(code[1] & 0x70));
267
268 int flagsDef = i->flagsDef;
269
270 // find flags definition and check that it is the last def
271 if (flagsDef < 0) {
272 for (int d = 0; i->defExists(d); ++d)
273 if (i->def(d).getFile() == FILE_FLAGS)
274 flagsDef = d;
275 if (flagsDef >= 0 && 0) // TODO: enforce use of flagsDef at some point
276 WARN("Instruction::flagsDef was not set properly\n");
277 }
278 if (flagsDef == 0 && i->defExists(1))
279 WARN("flags def should not be the primary definition\n");
280
281 if (flagsDef >= 0)
282 code[1] |= (DDATA(i->def(flagsDef)).id << 4) | 0x40;
283
284 }
285
286 void
287 CodeEmitterNV50::setARegBits(unsigned int u)
288 {
289 code[0] |= (u & 3) << 26;
290 code[1] |= (u & 4);
291 }
292
293 void
294 CodeEmitterNV50::setAReg16(const Instruction *i, int s)
295 {
296 if (i->srcExists(s)) {
297 s = i->src(s).indirect[0];
298 if (s >= 0)
299 setARegBits(SDATA(i->src(s)).id + 1);
300 }
301 }
302
303 void
304 CodeEmitterNV50::setImmediate(const Instruction *i, int s)
305 {
306 const ImmediateValue *imm = i->src(s).get()->asImm();
307 assert(imm);
308
309 uint32_t u = imm->reg.data.u32;
310
311 if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT))
312 u = ~u;
313
314 code[1] |= 3;
315 code[0] |= (u & 0x3f) << 16;
316 code[1] |= (u >> 6) << 2;
317 }
318
319 void
320 CodeEmitterNV50::setDst(const Value *dst)
321 {
322 const Storage *reg = &dst->join->reg;
323
324 assert(reg->file != FILE_ADDRESS);
325
326 if (reg->data.id < 0 || reg->file == FILE_FLAGS) {
327 code[0] |= (127 << 2) | 1;
328 code[1] |= 8;
329 } else {
330 int id;
331 if (reg->file == FILE_SHADER_OUTPUT) {
332 code[1] |= 8;
333 id = reg->data.offset / 4;
334 } else {
335 id = reg->data.id;
336 }
337 code[0] |= id << 2;
338 }
339 }
340
341 void
342 CodeEmitterNV50::setDst(const Instruction *i, int d)
343 {
344 if (i->defExists(d)) {
345 setDst(i->getDef(d));
346 } else
347 if (!d) {
348 code[0] |= 0x01fc; // bit bucket
349 code[1] |= 0x0008;
350 }
351 }
352
353 // 3 * 2 bits:
354 // 0: r
355 // 1: a/s
356 // 2: c
357 // 3: i
358 void
359 CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc)
360 {
361 uint8_t mode = 0;
362
363 for (unsigned int s = 0; s < Target::operationSrcNr[i->op]; ++s) {
364 switch (i->src(s).getFile()) {
365 case FILE_GPR:
366 break;
367 case FILE_MEMORY_SHARED:
368 case FILE_SHADER_INPUT:
369 mode |= 1 << (s * 2);
370 break;
371 case FILE_MEMORY_CONST:
372 mode |= 2 << (s * 2);
373 break;
374 case FILE_IMMEDIATE:
375 mode |= 3 << (s * 2);
376 break;
377 default:
378 ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile());
379 assert(0);
380 break;
381 }
382 }
383 switch (mode) {
384 case 0x00: // rrr
385 break;
386 case 0x01: // arr/grr
387 if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
388 code[0] |= 0x01800000;
389 if (enc == NV50_OP_ENC_LONG || enc == NV50_OP_ENC_LONG_ALT)
390 code[1] |= 0x00200000;
391 } else {
392 if (enc == NV50_OP_ENC_SHORT)
393 code[0] |= 0x01000000;
394 else
395 code[1] |= 0x00200000;
396 }
397 break;
398 case 0x03: // irr
399 assert(i->op == OP_MOV);
400 return;
401 case 0x0c: // rir
402 break;
403 case 0x0d: // gir
404 assert(progType == Program::TYPE_GEOMETRY ||
405 progType == Program::TYPE_COMPUTE);
406 code[0] |= 0x01000000;
407 if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
408 int reg = i->src(0).getIndirect(0)->rep()->reg.data.id;
409 assert(reg < 3);
410 code[0] |= (reg + 1) << 26;
411 }
412 break;
413 case 0x08: // rcr
414 code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
415 code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
416 break;
417 case 0x09: // acr/gcr
418 if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
419 code[0] |= 0x01800000;
420 } else {
421 code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
422 code[1] |= 0x00200000;
423 }
424 code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
425 break;
426 case 0x20: // rrc
427 code[0] |= 0x01000000;
428 code[1] |= (i->getSrc(2)->reg.fileIndex << 22);
429 break;
430 case 0x21: // arc
431 code[0] |= 0x01000000;
432 code[1] |= 0x00200000 | (i->getSrc(2)->reg.fileIndex << 22);
433 assert(progType != Program::TYPE_GEOMETRY);
434 break;
435 default:
436 ERROR("not encodable: %x\n", mode);
437 assert(0);
438 break;
439 }
440 if (progType != Program::TYPE_COMPUTE)
441 return;
442
443 if ((mode & 3) == 1) {
444 const int pos = ((mode >> 2) & 3) == 3 ? 13 : 14;
445
446 switch (i->sType) {
447 case TYPE_U8:
448 break;
449 case TYPE_U16:
450 code[0] |= 1 << pos;
451 break;
452 case TYPE_S16:
453 code[0] |= 2 << pos;
454 break;
455 default:
456 code[0] |= 3 << pos;
457 assert(i->getSrc(0)->reg.size == 4);
458 break;
459 }
460 }
461 }
462
463 void
464 CodeEmitterNV50::setSrc(const Instruction *i, unsigned int s, int slot)
465 {
466 if (Target::operationSrcNr[i->op] <= s)
467 return;
468 const Storage *reg = &i->src(s).rep()->reg;
469
470 unsigned int id = (reg->file == FILE_GPR) ?
471 reg->data.id :
472 reg->data.offset >> (reg->size >> 1); // no > 4 byte sources here
473
474 switch (slot) {
475 case 0: code[0] |= id << 9; break;
476 case 1: code[0] |= id << 16; break;
477 case 2: code[1] |= id << 14; break;
478 default:
479 assert(0);
480 break;
481 }
482 }
483
484 // the default form:
485 // - long instruction
486 // - 1 to 3 sources in slots 0, 1, 2 (rrr, arr, rcr, acr, rrc, arc, gcr, grr)
487 // - address & flags
488 void
489 CodeEmitterNV50::emitForm_MAD(const Instruction *i)
490 {
491 assert(i->encSize == 8);
492 code[0] |= 1;
493
494 emitFlagsRd(i);
495 emitFlagsWr(i);
496
497 setDst(i, 0);
498
499 setSrcFileBits(i, NV50_OP_ENC_LONG);
500 setSrc(i, 0, 0);
501 setSrc(i, 1, 1);
502 setSrc(i, 2, 2);
503
504 if (i->getIndirect(0, 0)) {
505 assert(!i->srcExists(1) || !i->getIndirect(1, 0));
506 assert(!i->srcExists(2) || !i->getIndirect(2, 0));
507 setAReg16(i, 0);
508 } else if (i->srcExists(1) && i->getIndirect(1, 0)) {
509 assert(!i->srcExists(2) || !i->getIndirect(2, 0));
510 setAReg16(i, 1);
511 } else {
512 setAReg16(i, 2);
513 }
514 }
515
516 // like default form, but 2nd source in slot 2, and no 3rd source
517 void
518 CodeEmitterNV50::emitForm_ADD(const Instruction *i)
519 {
520 assert(i->encSize == 8);
521 code[0] |= 1;
522
523 emitFlagsRd(i);
524 emitFlagsWr(i);
525
526 setDst(i, 0);
527
528 setSrcFileBits(i, NV50_OP_ENC_LONG_ALT);
529 setSrc(i, 0, 0);
530 setSrc(i, 1, 2);
531
532 if (i->getIndirect(0, 0)) {
533 assert(!i->getIndirect(1, 0));
534 setAReg16(i, 0);
535 } else {
536 setAReg16(i, 1);
537 }
538 }
539
540 // default short form (rr, ar, rc, gr)
541 void
542 CodeEmitterNV50::emitForm_MUL(const Instruction *i)
543 {
544 assert(i->encSize == 4 && !(code[0] & 1));
545 assert(i->defExists(0));
546 assert(!i->getPredicate());
547
548 setDst(i, 0);
549
550 setSrcFileBits(i, NV50_OP_ENC_SHORT);
551 setSrc(i, 0, 0);
552 setSrc(i, 1, 1);
553 }
554
555 // usual immediate form
556 // - 1 to 3 sources where second is immediate (rir, gir)
557 // - no address or predicate possible
558 void
559 CodeEmitterNV50::emitForm_IMM(const Instruction *i)
560 {
561 assert(i->encSize == 8);
562 code[0] |= 1;
563
564 assert(i->defExists(0) && i->srcExists(0));
565
566 setDst(i, 0);
567
568 setSrcFileBits(i, NV50_OP_ENC_IMM);
569 if (Target::operationSrcNr[i->op] > 1) {
570 setSrc(i, 0, 0);
571 setImmediate(i, 1);
572 // If there is another source, it has to be the same as the dest reg.
573 } else {
574 setImmediate(i, 0);
575 }
576 }
577
578 void
579 CodeEmitterNV50::emitLoadStoreSizeLG(DataType ty, int pos)
580 {
581 uint8_t enc;
582
583 switch (ty) {
584 case TYPE_F32: // fall through
585 case TYPE_S32: // fall through
586 case TYPE_U32: enc = 0x6; break;
587 case TYPE_B128: enc = 0x5; break;
588 case TYPE_F64: // fall through
589 case TYPE_S64: // fall through
590 case TYPE_U64: enc = 0x4; break;
591 case TYPE_S16: enc = 0x3; break;
592 case TYPE_U16: enc = 0x2; break;
593 case TYPE_S8: enc = 0x1; break;
594 case TYPE_U8: enc = 0x0; break;
595 default:
596 enc = 0;
597 assert(!"invalid load/store type");
598 break;
599 }
600 code[pos / 32] |= enc << (pos % 32);
601 }
602
603 void
604 CodeEmitterNV50::emitLoadStoreSizeCS(DataType ty)
605 {
606 switch (ty) {
607 case TYPE_U8: break;
608 case TYPE_U16: code[1] |= 0x4000; break;
609 case TYPE_S16: code[1] |= 0x8000; break;
610 case TYPE_F32:
611 case TYPE_S32:
612 case TYPE_U32: code[1] |= 0xc000; break;
613 default:
614 assert(0);
615 break;
616 }
617 }
618
619 void
620 CodeEmitterNV50::emitLOAD(const Instruction *i)
621 {
622 DataFile sf = i->src(0).getFile();
623 int32_t offset = i->getSrc(0)->reg.data.offset;
624
625 switch (sf) {
626 case FILE_SHADER_INPUT:
627 if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0))
628 code[0] = 0x11800001;
629 else
630 // use 'mov' where we can
631 code[0] = i->src(0).isIndirect(0) ? 0x00000001 : 0x10000001;
632 code[1] = 0x00200000 | (i->lanes << 14);
633 if (typeSizeof(i->dType) == 4)
634 code[1] |= 0x04000000;
635 break;
636 case FILE_MEMORY_SHARED:
637 if (targ->getChipset() >= 0x84) {
638 assert(offset <= (int32_t)(0x3fff * typeSizeof(i->sType)));
639 code[0] = 0x10000001;
640 code[1] = 0x40000000;
641
642 if (typeSizeof(i->dType) == 4)
643 code[1] |= 0x04000000;
644
645 emitLoadStoreSizeCS(i->sType);
646 } else {
647 assert(offset <= (int32_t)(0x1f * typeSizeof(i->sType)));
648 code[0] = 0x10000001;
649 code[1] = 0x00200000 | (i->lanes << 14);
650 emitLoadStoreSizeCS(i->sType);
651 }
652 break;
653 case FILE_MEMORY_CONST:
654 code[0] = 0x10000001;
655 code[1] = 0x20000000 | (i->getSrc(0)->reg.fileIndex << 22);
656 if (typeSizeof(i->dType) == 4)
657 code[1] |= 0x04000000;
658 emitLoadStoreSizeCS(i->sType);
659 break;
660 case FILE_MEMORY_LOCAL:
661 code[0] = 0xd0000001;
662 code[1] = 0x40000000;
663 break;
664 case FILE_MEMORY_GLOBAL:
665 code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
666 code[1] = 0x80000000;
667 break;
668 default:
669 assert(!"invalid load source file");
670 break;
671 }
672 if (sf == FILE_MEMORY_LOCAL ||
673 sf == FILE_MEMORY_GLOBAL)
674 emitLoadStoreSizeLG(i->sType, 21 + 32);
675
676 setDst(i, 0);
677
678 emitFlagsRd(i);
679 emitFlagsWr(i);
680
681 if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
682 srcId(*i->src(0).getIndirect(0), 9);
683 } else {
684 setAReg16(i, 0);
685 srcAddr16(i->src(0), i->src(0).getFile() != FILE_MEMORY_LOCAL, 9);
686 }
687 }
688
689 void
690 CodeEmitterNV50::emitSTORE(const Instruction *i)
691 {
692 DataFile f = i->getSrc(0)->reg.file;
693 int32_t offset = i->getSrc(0)->reg.data.offset;
694
695 switch (f) {
696 case FILE_SHADER_OUTPUT:
697 code[0] = 0x00000001 | ((offset >> 2) << 9);
698 code[1] = 0x80c00000;
699 srcId(i->src(1), 32 + 14);
700 break;
701 case FILE_MEMORY_GLOBAL:
702 code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
703 code[1] = 0xa0000000;
704 emitLoadStoreSizeLG(i->dType, 21 + 32);
705 srcId(i->src(1), 2);
706 break;
707 case FILE_MEMORY_LOCAL:
708 code[0] = 0xd0000001;
709 code[1] = 0x60000000;
710 emitLoadStoreSizeLG(i->dType, 21 + 32);
711 srcId(i->src(1), 2);
712 break;
713 case FILE_MEMORY_SHARED:
714 code[0] = 0x00000001;
715 code[1] = 0xe0000000;
716 switch (typeSizeof(i->dType)) {
717 case 1:
718 code[0] |= offset << 9;
719 code[1] |= 0x00400000;
720 break;
721 case 2:
722 code[0] |= (offset >> 1) << 9;
723 break;
724 case 4:
725 code[0] |= (offset >> 2) << 9;
726 code[1] |= 0x04200000;
727 break;
728 default:
729 assert(0);
730 break;
731 }
732 srcId(i->src(1), 32 + 14);
733 break;
734 default:
735 assert(!"invalid store destination file");
736 break;
737 }
738
739 if (f == FILE_MEMORY_GLOBAL)
740 srcId(*i->src(0).getIndirect(0), 9);
741 else
742 setAReg16(i, 0);
743
744 if (f == FILE_MEMORY_LOCAL)
745 srcAddr16(i->src(0), false, 9);
746
747 emitFlagsRd(i);
748 }
749
750 void
751 CodeEmitterNV50::emitMOV(const Instruction *i)
752 {
753 DataFile sf = i->getSrc(0)->reg.file;
754 DataFile df = i->getDef(0)->reg.file;
755
756 assert(sf == FILE_GPR || df == FILE_GPR);
757
758 if (sf == FILE_FLAGS) {
759 assert(i->flagsSrc >= 0);
760 code[0] = 0x00000001;
761 code[1] = 0x20000000;
762 defId(i->def(0), 2);
763 emitFlagsRd(i);
764 } else
765 if (sf == FILE_ADDRESS) {
766 code[0] = 0x00000001;
767 code[1] = 0x40000000;
768 defId(i->def(0), 2);
769 setARegBits(SDATA(i->src(0)).id + 1);
770 emitFlagsRd(i);
771 } else
772 if (df == FILE_FLAGS) {
773 assert(i->flagsDef >= 0);
774 code[0] = 0x00000001;
775 code[1] = 0xa0000000;
776 srcId(i->src(0), 9);
777 emitFlagsRd(i);
778 emitFlagsWr(i);
779 } else
780 if (sf == FILE_IMMEDIATE) {
781 code[0] = 0x10008001;
782 code[1] = 0x00000003;
783 emitForm_IMM(i);
784 } else {
785 if (i->encSize == 4) {
786 code[0] = 0x10008000;
787 } else {
788 code[0] = 0x10000001;
789 code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
790 code[1] |= (i->lanes << 14);
791 emitFlagsRd(i);
792 }
793 defId(i->def(0), 2);
794 srcId(i->src(0), 9);
795 }
796 if (df == FILE_SHADER_OUTPUT) {
797 assert(i->encSize == 8);
798 code[1] |= 0x8;
799 }
800 }
801
802 static inline uint8_t getSRegEncoding(const ValueRef &ref)
803 {
804 switch (SDATA(ref).sv.sv) {
805 case SV_PHYSID: return 0;
806 case SV_CLOCK: return 1;
807 case SV_VERTEX_STRIDE: return 3;
808 // case SV_PM_COUNTER: return 4 + SDATA(ref).sv.index;
809 case SV_SAMPLE_INDEX: return 8;
810 default:
811 assert(!"no sreg for system value");
812 return 0;
813 }
814 }
815
816 void
817 CodeEmitterNV50::emitRDSV(const Instruction *i)
818 {
819 code[0] = 0x00000001;
820 code[1] = 0x60000000 | (getSRegEncoding(i->src(0)) << 14);
821 defId(i->def(0), 2);
822 emitFlagsRd(i);
823 }
824
825 void
826 CodeEmitterNV50::emitNOP()
827 {
828 code[0] = 0xf0000001;
829 code[1] = 0xe0000000;
830 }
831
832 void
833 CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp)
834 {
835 code[0] = 0xc0000000 | (lane << 16);
836 code[1] = 0x80000000;
837
838 code[0] |= (quOp & 0x03) << 20;
839 code[1] |= (quOp & 0xfc) << 20;
840
841 emitForm_ADD(i);
842
843 if (!i->srcExists(1))
844 srcId(i->src(0), 32 + 14);
845 }
846
847 /* NOTE: This returns the base address of a vertex inside the primitive.
848 * src0 is an immediate, the index (not offset) of the vertex
849 * inside the primitive. XXX: signed or unsigned ?
850 * src1 (may be NULL) should use whatever units the hardware requires
851 * (on nv50 this is bytes, so, relative index * 4; signed 16 bit value).
852 */
853 void
854 CodeEmitterNV50::emitPFETCH(const Instruction *i)
855 {
856 const uint32_t prim = i->src(0).get()->reg.data.u32;
857 assert(prim <= 127);
858
859 if (i->def(0).getFile() == FILE_ADDRESS) {
860 // shl $aX a[] 0
861 code[0] = 0x00000001 | ((DDATA(i->def(0)).id + 1) << 2);
862 code[1] = 0xc0200000;
863 code[0] |= prim << 9;
864 assert(!i->srcExists(1));
865 } else
866 if (i->srcExists(1)) {
867 // ld b32 $rX a[$aX+base]
868 code[0] = 0x00000001;
869 code[1] = 0x04200000 | (0xf << 14);
870 defId(i->def(0), 2);
871 code[0] |= prim << 9;
872 setARegBits(SDATA(i->src(1)).id + 1);
873 } else {
874 // mov b32 $rX a[]
875 code[0] = 0x10000001;
876 code[1] = 0x04200000 | (0xf << 14);
877 defId(i->def(0), 2);
878 code[0] |= prim << 9;
879 }
880 emitFlagsRd(i);
881 }
882
883 static void
884 interpApply(const InterpEntry *entry, uint32_t *code,
885 bool force_persample_interp, bool flatshade)
886 {
887 int ipa = entry->ipa;
888 int encSize = entry->reg;
889 int loc = entry->loc;
890
891 if ((ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
892 (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
893 if (force_persample_interp) {
894 if (encSize == 8)
895 code[loc + 1] |= 1 << 16;
896 else
897 code[loc + 0] |= 1 << 24;
898 } else {
899 if (encSize == 8)
900 code[loc + 1] &= ~(1 << 16);
901 else
902 code[loc + 0] &= ~(1 << 24);
903 }
904 }
905 }
906
907 void
908 CodeEmitterNV50::emitINTERP(const Instruction *i)
909 {
910 code[0] = 0x80000000;
911
912 defId(i->def(0), 2);
913 srcAddr8(i->src(0), 16);
914
915 if (i->encSize != 8 && i->getInterpMode() == NV50_IR_INTERP_FLAT) {
916 code[0] |= 1 << 8;
917 } else {
918 if (i->op == OP_PINTERP) {
919 code[0] |= 1 << 25;
920 srcId(i->src(1), 9);
921 }
922 if (i->getSampleMode() == NV50_IR_INTERP_CENTROID)
923 code[0] |= 1 << 24;
924 }
925
926 if (i->encSize == 8) {
927 if (i->getInterpMode() == NV50_IR_INTERP_FLAT)
928 code[1] = 4 << 16;
929 else
930 code[1] = (code[0] & (3 << 24)) >> (24 - 16);
931 code[0] &= ~0x03000000;
932 code[0] |= 1;
933 emitFlagsRd(i);
934 }
935
936 addInterp(i->ipa, i->encSize, interpApply);
937 }
938
939 void
940 CodeEmitterNV50::emitMINMAX(const Instruction *i)
941 {
942 if (i->dType == TYPE_F64) {
943 code[0] = 0xe0000000;
944 code[1] = (i->op == OP_MIN) ? 0xa0000000 : 0xc0000000;
945 } else {
946 code[0] = 0x30000000;
947 code[1] = 0x80000000;
948 if (i->op == OP_MIN)
949 code[1] |= 0x20000000;
950
951 switch (i->dType) {
952 case TYPE_F32: code[0] |= 0x80000000; break;
953 case TYPE_S32: code[1] |= 0x8c000000; break;
954 case TYPE_U32: code[1] |= 0x84000000; break;
955 case TYPE_S16: code[1] |= 0x80000000; break;
956 case TYPE_U16: break;
957 default:
958 assert(0);
959 break;
960 }
961 }
962
963 code[1] |= i->src(0).mod.abs() << 20;
964 code[1] |= i->src(0).mod.neg() << 26;
965 code[1] |= i->src(1).mod.abs() << 19;
966 code[1] |= i->src(1).mod.neg() << 27;
967
968 emitForm_MAD(i);
969 }
970
971 void
972 CodeEmitterNV50::emitFMAD(const Instruction *i)
973 {
974 const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
975 const int neg_add = i->src(2).mod.neg();
976
977 code[0] = 0xe0000000;
978
979 if (i->src(1).getFile() == FILE_IMMEDIATE) {
980 code[1] = 0;
981 emitForm_IMM(i);
982 code[0] |= neg_mul << 15;
983 code[0] |= neg_add << 22;
984 if (i->saturate)
985 code[0] |= 1 << 8;
986 } else
987 if (i->encSize == 4) {
988 emitForm_MUL(i);
989 code[0] |= neg_mul << 15;
990 code[0] |= neg_add << 22;
991 if (i->saturate)
992 code[0] |= 1 << 8;
993 } else {
994 code[1] = neg_mul << 26;
995 code[1] |= neg_add << 27;
996 if (i->saturate)
997 code[1] |= 1 << 29;
998 emitForm_MAD(i);
999 }
1000 }
1001
1002 void
1003 CodeEmitterNV50::emitDMAD(const Instruction *i)
1004 {
1005 const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
1006 const int neg_add = i->src(2).mod.neg();
1007
1008 assert(i->encSize == 8);
1009 assert(!i->saturate);
1010
1011 code[1] = 0x40000000;
1012 code[0] = 0xe0000000;
1013
1014 code[1] |= neg_mul << 26;
1015 code[1] |= neg_add << 27;
1016
1017 roundMode_MAD(i);
1018
1019 emitForm_MAD(i);
1020 }
1021
1022 void
1023 CodeEmitterNV50::emitFADD(const Instruction *i)
1024 {
1025 const int neg0 = i->src(0).mod.neg();
1026 const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
1027
1028 code[0] = 0xb0000000;
1029
1030 assert(!(i->src(0).mod | i->src(1).mod).abs());
1031
1032 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1033 code[1] = 0;
1034 emitForm_IMM(i);
1035 code[0] |= neg0 << 15;
1036 code[0] |= neg1 << 22;
1037 if (i->saturate)
1038 code[0] |= 1 << 8;
1039 } else
1040 if (i->encSize == 8) {
1041 code[1] = 0;
1042 emitForm_ADD(i);
1043 code[1] |= neg0 << 26;
1044 code[1] |= neg1 << 27;
1045 if (i->saturate)
1046 code[1] |= 1 << 29;
1047 } else {
1048 emitForm_MUL(i);
1049 code[0] |= neg0 << 15;
1050 code[0] |= neg1 << 22;
1051 if (i->saturate)
1052 code[0] |= 1 << 8;
1053 }
1054 }
1055
1056 void
1057 CodeEmitterNV50::emitDADD(const Instruction *i)
1058 {
1059 const int neg0 = i->src(0).mod.neg();
1060 const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
1061
1062 assert(!(i->src(0).mod | i->src(1).mod).abs());
1063 assert(!i->saturate);
1064 assert(i->encSize == 8);
1065
1066 code[1] = 0x60000000;
1067 code[0] = 0xe0000000;
1068
1069 emitForm_ADD(i);
1070
1071 code[1] |= neg0 << 26;
1072 code[1] |= neg1 << 27;
1073 }
1074
1075 void
1076 CodeEmitterNV50::emitUADD(const Instruction *i)
1077 {
1078 const int neg0 = i->src(0).mod.neg();
1079 const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
1080
1081 code[0] = 0x20008000;
1082
1083 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1084 code[1] = 0;
1085 emitForm_IMM(i);
1086 } else
1087 if (i->encSize == 8) {
1088 code[0] = 0x20000000;
1089 code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
1090 emitForm_ADD(i);
1091 } else {
1092 emitForm_MUL(i);
1093 }
1094 assert(!(neg0 && neg1));
1095 code[0] |= neg0 << 28;
1096 code[0] |= neg1 << 22;
1097
1098 if (i->flagsSrc >= 0) {
1099 // addc == sub | subr
1100 assert(!(code[0] & 0x10400000) && !i->getPredicate());
1101 code[0] |= 0x10400000;
1102 srcId(i->src(i->flagsSrc), 32 + 12);
1103 }
1104 }
1105
1106 void
1107 CodeEmitterNV50::emitAADD(const Instruction *i)
1108 {
1109 const int s = (i->op == OP_MOV) ? 0 : 1;
1110
1111 code[0] = 0xd0000001 | (i->getSrc(s)->reg.data.u16 << 9);
1112 code[1] = 0x20000000;
1113
1114 code[0] |= (DDATA(i->def(0)).id + 1) << 2;
1115
1116 emitFlagsRd(i);
1117
1118 if (s && i->srcExists(0))
1119 setARegBits(SDATA(i->src(0)).id + 1);
1120 }
1121
1122 void
1123 CodeEmitterNV50::emitIMUL(const Instruction *i)
1124 {
1125 code[0] = 0x40000000;
1126
1127 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1128 if (i->sType == TYPE_S16)
1129 code[0] |= 0x8100;
1130 code[1] = 0;
1131 emitForm_IMM(i);
1132 } else
1133 if (i->encSize == 8) {
1134 code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000;
1135 emitForm_MAD(i);
1136 } else {
1137 if (i->sType == TYPE_S16)
1138 code[0] |= 0x8100;
1139 emitForm_MUL(i);
1140 }
1141 }
1142
1143 void
1144 CodeEmitterNV50::emitFMUL(const Instruction *i)
1145 {
1146 const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
1147
1148 code[0] = 0xc0000000;
1149
1150 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1151 code[1] = 0;
1152 emitForm_IMM(i);
1153 if (neg)
1154 code[0] |= 0x8000;
1155 if (i->saturate)
1156 code[0] |= 1 << 8;
1157 } else
1158 if (i->encSize == 8) {
1159 code[1] = i->rnd == ROUND_Z ? 0x0000c000 : 0;
1160 if (neg)
1161 code[1] |= 0x08000000;
1162 if (i->saturate)
1163 code[1] |= 1 << 20;
1164 emitForm_MAD(i);
1165 } else {
1166 emitForm_MUL(i);
1167 if (neg)
1168 code[0] |= 0x8000;
1169 if (i->saturate)
1170 code[0] |= 1 << 8;
1171 }
1172 }
1173
1174 void
1175 CodeEmitterNV50::emitDMUL(const Instruction *i)
1176 {
1177 const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
1178
1179 assert(!i->saturate);
1180 assert(i->encSize == 8);
1181
1182 code[1] = 0x80000000;
1183 code[0] = 0xe0000000;
1184
1185 if (neg)
1186 code[1] |= 0x08000000;
1187
1188 roundMode_CVT(i->rnd);
1189
1190 emitForm_MAD(i);
1191 }
1192
1193 void
1194 CodeEmitterNV50::emitIMAD(const Instruction *i)
1195 {
1196 int mode;
1197 code[0] = 0x60000000;
1198
1199 assert(!i->src(0).mod && !i->src(1).mod && !i->src(2).mod);
1200 if (!isSignedType(i->sType))
1201 mode = 0;
1202 else if (i->saturate)
1203 mode = 2;
1204 else
1205 mode = 1;
1206
1207 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1208 code[1] = 0;
1209 emitForm_IMM(i);
1210 code[0] |= (mode & 1) << 8 | (mode & 2) << 14;
1211 if (i->flagsSrc >= 0) {
1212 assert(!(code[0] & 0x10400000));
1213 assert(SDATA(i->src(i->flagsSrc)).id == 0);
1214 code[0] |= 0x10400000;
1215 }
1216 } else
1217 if (i->encSize == 4) {
1218 emitForm_MUL(i);
1219 code[0] |= (mode & 1) << 8 | (mode & 2) << 14;
1220 if (i->flagsSrc >= 0) {
1221 assert(!(code[0] & 0x10400000));
1222 assert(SDATA(i->src(i->flagsSrc)).id == 0);
1223 code[0] |= 0x10400000;
1224 }
1225 } else {
1226 code[1] = mode << 29;
1227 emitForm_MAD(i);
1228
1229 if (i->flagsSrc >= 0) {
1230 // add with carry from $cX
1231 assert(!(code[1] & 0x0c000000) && !i->getPredicate());
1232 code[1] |= 0xc << 24;
1233 srcId(i->src(i->flagsSrc), 32 + 12);
1234 }
1235 }
1236 }
1237
1238 void
1239 CodeEmitterNV50::emitISAD(const Instruction *i)
1240 {
1241 if (i->encSize == 8) {
1242 code[0] = 0x50000000;
1243 switch (i->sType) {
1244 case TYPE_U32: code[1] = 0x04000000; break;
1245 case TYPE_S32: code[1] = 0x0c000000; break;
1246 case TYPE_U16: code[1] = 0x00000000; break;
1247 case TYPE_S16: code[1] = 0x08000000; break;
1248 default:
1249 assert(0);
1250 break;
1251 }
1252 emitForm_MAD(i);
1253 } else {
1254 switch (i->sType) {
1255 case TYPE_U32: code[0] = 0x50008000; break;
1256 case TYPE_S32: code[0] = 0x50008100; break;
1257 case TYPE_U16: code[0] = 0x50000000; break;
1258 case TYPE_S16: code[0] = 0x50000100; break;
1259 default:
1260 assert(0);
1261 break;
1262 }
1263 emitForm_MUL(i);
1264 }
1265 }
1266
1267 void
1268 CodeEmitterNV50::emitSET(const Instruction *i)
1269 {
1270 code[0] = 0x30000000;
1271 code[1] = 0x60000000;
1272
1273 switch (i->sType) {
1274 case TYPE_F64:
1275 code[0] = 0xe0000000;
1276 code[1] = 0xe0000000;
1277 break;
1278 case TYPE_F32: code[0] |= 0x80000000; break;
1279 case TYPE_S32: code[1] |= 0x0c000000; break;
1280 case TYPE_U32: code[1] |= 0x04000000; break;
1281 case TYPE_S16: code[1] |= 0x08000000; break;
1282 case TYPE_U16: break;
1283 default:
1284 assert(0);
1285 break;
1286 }
1287
1288 emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14);
1289
1290 if (i->src(0).mod.neg()) code[1] |= 0x04000000;
1291 if (i->src(1).mod.neg()) code[1] |= 0x08000000;
1292 if (i->src(0).mod.abs()) code[1] |= 0x00100000;
1293 if (i->src(1).mod.abs()) code[1] |= 0x00080000;
1294
1295 emitForm_MAD(i);
1296 }
1297
1298 void
1299 CodeEmitterNV50::roundMode_CVT(RoundMode rnd)
1300 {
1301 switch (rnd) {
1302 case ROUND_NI: code[1] |= 0x08000000; break;
1303 case ROUND_M: code[1] |= 0x00020000; break;
1304 case ROUND_MI: code[1] |= 0x08020000; break;
1305 case ROUND_P: code[1] |= 0x00040000; break;
1306 case ROUND_PI: code[1] |= 0x08040000; break;
1307 case ROUND_Z: code[1] |= 0x00060000; break;
1308 case ROUND_ZI: code[1] |= 0x08060000; break;
1309 default:
1310 assert(rnd == ROUND_N);
1311 break;
1312 }
1313 }
1314
1315 void
1316 CodeEmitterNV50::emitCVT(const Instruction *i)
1317 {
1318 const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
1319 RoundMode rnd;
1320 DataType dType;
1321
1322 switch (i->op) {
1323 case OP_CEIL: rnd = f2f ? ROUND_PI : ROUND_P; break;
1324 case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;
1325 case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;
1326 default:
1327 rnd = i->rnd;
1328 break;
1329 }
1330
1331 if (i->op == OP_NEG && i->dType == TYPE_U32)
1332 dType = TYPE_S32;
1333 else
1334 dType = i->dType;
1335
1336 code[0] = 0xa0000000;
1337
1338 switch (dType) {
1339 case TYPE_F64:
1340 switch (i->sType) {
1341 case TYPE_F64: code[1] = 0xc4404000; break;
1342 case TYPE_S64: code[1] = 0x44414000; break;
1343 case TYPE_U64: code[1] = 0x44404000; break;
1344 case TYPE_F32: code[1] = 0xc4400000; break;
1345 case TYPE_S32: code[1] = 0x44410000; break;
1346 case TYPE_U32: code[1] = 0x44400000; break;
1347 default:
1348 assert(0);
1349 break;
1350 }
1351 break;
1352 case TYPE_S64:
1353 switch (i->sType) {
1354 case TYPE_F64: code[1] = 0x8c404000; break;
1355 case TYPE_F32: code[1] = 0x8c400000; break;
1356 default:
1357 assert(0);
1358 break;
1359 }
1360 break;
1361 case TYPE_U64:
1362 switch (i->sType) {
1363 case TYPE_F64: code[1] = 0x84404000; break;
1364 case TYPE_F32: code[1] = 0x84400000; break;
1365 default:
1366 assert(0);
1367 break;
1368 }
1369 break;
1370 case TYPE_F32:
1371 switch (i->sType) {
1372 case TYPE_F64: code[1] = 0xc0404000; break;
1373 case TYPE_S64: code[1] = 0x40414000; break;
1374 case TYPE_U64: code[1] = 0x40404000; break;
1375 case TYPE_F32: code[1] = 0xc4004000; break;
1376 case TYPE_S32: code[1] = 0x44014000; break;
1377 case TYPE_U32: code[1] = 0x44004000; break;
1378 case TYPE_F16: code[1] = 0xc4000000; break;
1379 case TYPE_U16: code[1] = 0x44000000; break;
1380 default:
1381 assert(0);
1382 break;
1383 }
1384 break;
1385 case TYPE_S32:
1386 switch (i->sType) {
1387 case TYPE_F64: code[1] = 0x88404000; break;
1388 case TYPE_F32: code[1] = 0x8c004000; break;
1389 case TYPE_S32: code[1] = 0x0c014000; break;
1390 case TYPE_U32: code[1] = 0x0c004000; break;
1391 case TYPE_F16: code[1] = 0x8c000000; break;
1392 case TYPE_S16: code[1] = 0x0c010000; break;
1393 case TYPE_U16: code[1] = 0x0c000000; break;
1394 case TYPE_S8: code[1] = 0x0c018000; break;
1395 case TYPE_U8: code[1] = 0x0c008000; break;
1396 default:
1397 assert(0);
1398 break;
1399 }
1400 break;
1401 case TYPE_U32:
1402 switch (i->sType) {
1403 case TYPE_F64: code[1] = 0x80404000; break;
1404 case TYPE_F32: code[1] = 0x84004000; break;
1405 case TYPE_S32: code[1] = 0x04014000; break;
1406 case TYPE_U32: code[1] = 0x04004000; break;
1407 case TYPE_F16: code[1] = 0x84000000; break;
1408 case TYPE_S16: code[1] = 0x04010000; break;
1409 case TYPE_U16: code[1] = 0x04000000; break;
1410 case TYPE_S8: code[1] = 0x04018000; break;
1411 case TYPE_U8: code[1] = 0x04008000; break;
1412 default:
1413 assert(0);
1414 break;
1415 }
1416 break;
1417 case TYPE_S16:
1418 case TYPE_U16:
1419 case TYPE_S8:
1420 case TYPE_U8:
1421 default:
1422 assert(0);
1423 break;
1424 }
1425 if (typeSizeof(i->sType) == 1 && i->getSrc(0)->reg.size == 4)
1426 code[1] |= 0x00004000;
1427
1428 roundMode_CVT(rnd);
1429
1430 switch (i->op) {
1431 case OP_ABS: code[1] |= 1 << 20; break;
1432 case OP_SAT: code[1] |= 1 << 19; break;
1433 case OP_NEG: code[1] |= 1 << 29; break;
1434 default:
1435 break;
1436 }
1437 code[1] ^= i->src(0).mod.neg() << 29;
1438 code[1] |= i->src(0).mod.abs() << 20;
1439 if (i->saturate)
1440 code[1] |= 1 << 19;
1441
1442 assert(i->op != OP_ABS || !i->src(0).mod.neg());
1443
1444 emitForm_MAD(i);
1445 }
1446
1447 void
1448 CodeEmitterNV50::emitPreOp(const Instruction *i)
1449 {
1450 code[0] = 0xb0000000;
1451 code[1] = (i->op == OP_PREEX2) ? 0xc0004000 : 0xc0000000;
1452
1453 code[1] |= i->src(0).mod.abs() << 20;
1454 code[1] |= i->src(0).mod.neg() << 26;
1455
1456 emitForm_MAD(i);
1457 }
1458
1459 void
1460 CodeEmitterNV50::emitSFnOp(const Instruction *i, uint8_t subOp)
1461 {
1462 code[0] = 0x90000000;
1463
1464 if (i->encSize == 4) {
1465 assert(i->op == OP_RCP);
1466 assert(!i->saturate);
1467 code[0] |= i->src(0).mod.abs() << 15;
1468 code[0] |= i->src(0).mod.neg() << 22;
1469 emitForm_MUL(i);
1470 } else {
1471 code[1] = subOp << 29;
1472 code[1] |= i->src(0).mod.abs() << 20;
1473 code[1] |= i->src(0).mod.neg() << 26;
1474 if (i->saturate) {
1475 assert(subOp == 6 && i->op == OP_EX2);
1476 code[1] |= 1 << 27;
1477 }
1478 emitForm_MAD(i);
1479 }
1480 }
1481
1482 void
1483 CodeEmitterNV50::emitNOT(const Instruction *i)
1484 {
1485 code[0] = 0xd0000000;
1486 code[1] = 0x0002c000;
1487
1488 switch (i->sType) {
1489 case TYPE_U32:
1490 case TYPE_S32:
1491 code[1] |= 0x04000000;
1492 break;
1493 default:
1494 break;
1495 }
1496 emitForm_MAD(i);
1497 setSrc(i, 0, 1);
1498 }
1499
1500 void
1501 CodeEmitterNV50::emitLogicOp(const Instruction *i)
1502 {
1503 code[0] = 0xd0000000;
1504 code[1] = 0;
1505
1506 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1507 switch (i->op) {
1508 case OP_OR: code[0] |= 0x0100; break;
1509 case OP_XOR: code[0] |= 0x8000; break;
1510 default:
1511 assert(i->op == OP_AND);
1512 break;
1513 }
1514 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
1515 code[0] |= 1 << 22;
1516
1517 emitForm_IMM(i);
1518 } else {
1519 switch (i->op) {
1520 case OP_AND: code[1] = 0x04000000; break;
1521 case OP_OR: code[1] = 0x04004000; break;
1522 case OP_XOR: code[1] = 0x04008000; break;
1523 default:
1524 assert(0);
1525 break;
1526 }
1527 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
1528 code[1] |= 1 << 16;
1529 if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT))
1530 code[1] |= 1 << 17;
1531
1532 emitForm_MAD(i);
1533 }
1534 }
1535
1536 void
1537 CodeEmitterNV50::emitARL(const Instruction *i, unsigned int shl)
1538 {
1539 code[0] = 0x00000001 | (shl << 16);
1540 code[1] = 0xc0000000;
1541
1542 code[0] |= (DDATA(i->def(0)).id + 1) << 2;
1543
1544 setSrcFileBits(i, NV50_OP_ENC_IMM);
1545 setSrc(i, 0, 0);
1546 emitFlagsRd(i);
1547 }
1548
1549 void
1550 CodeEmitterNV50::emitShift(const Instruction *i)
1551 {
1552 if (i->def(0).getFile() == FILE_ADDRESS) {
1553 assert(i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE);
1554 emitARL(i, i->getSrc(1)->reg.data.u32 & 0x3f);
1555 } else {
1556 code[0] = 0x30000001;
1557 code[1] = (i->op == OP_SHR) ? 0xe4000000 : 0xc4000000;
1558 if (i->op == OP_SHR && isSignedType(i->sType))
1559 code[1] |= 1 << 27;
1560
1561 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1562 code[1] |= 1 << 20;
1563 code[0] |= (i->getSrc(1)->reg.data.u32 & 0x7f) << 16;
1564 defId(i->def(0), 2);
1565 srcId(i->src(0), 9);
1566 emitFlagsRd(i);
1567 } else {
1568 emitForm_MAD(i);
1569 }
1570 }
1571 }
1572
1573 void
1574 CodeEmitterNV50::emitOUT(const Instruction *i)
1575 {
1576 code[0] = (i->op == OP_EMIT) ? 0xf0000201 : 0xf0000401;
1577 code[1] = 0xc0000000;
1578
1579 emitFlagsRd(i);
1580 }
1581
1582 void
1583 CodeEmitterNV50::emitTEX(const TexInstruction *i)
1584 {
1585 code[0] = 0xf0000001;
1586 code[1] = 0x00000000;
1587
1588 switch (i->op) {
1589 case OP_TXB:
1590 code[1] = 0x20000000;
1591 break;
1592 case OP_TXL:
1593 code[1] = 0x40000000;
1594 break;
1595 case OP_TXF:
1596 code[0] |= 0x01000000;
1597 break;
1598 case OP_TXG:
1599 code[0] |= 0x01000000;
1600 code[1] = 0x80000000;
1601 break;
1602 case OP_TXLQ:
1603 code[1] = 0x60020000;
1604 break;
1605 default:
1606 assert(i->op == OP_TEX);
1607 break;
1608 }
1609
1610 code[0] |= i->tex.r << 9;
1611 code[0] |= i->tex.s << 17;
1612
1613 int argc = i->tex.target.getArgCount();
1614
1615 if (i->op == OP_TXB || i->op == OP_TXL || i->op == OP_TXF)
1616 argc += 1;
1617 if (i->tex.target.isShadow())
1618 argc += 1;
1619 assert(argc <= 4);
1620
1621 code[0] |= (argc - 1) << 22;
1622
1623 if (i->tex.target.isCube()) {
1624 code[0] |= 0x08000000;
1625 } else
1626 if (i->tex.useOffsets) {
1627 code[1] |= (i->tex.offset[0] & 0xf) << 24;
1628 code[1] |= (i->tex.offset[1] & 0xf) << 20;
1629 code[1] |= (i->tex.offset[2] & 0xf) << 16;
1630 }
1631
1632 code[0] |= (i->tex.mask & 0x3) << 25;
1633 code[1] |= (i->tex.mask & 0xc) << 12;
1634
1635 if (i->tex.liveOnly)
1636 code[1] |= 4;
1637
1638 defId(i->def(0), 2);
1639
1640 emitFlagsRd(i);
1641 }
1642
1643 void
1644 CodeEmitterNV50::emitTXQ(const TexInstruction *i)
1645 {
1646 assert(i->tex.query == TXQ_DIMS);
1647
1648 code[0] = 0xf0000001;
1649 code[1] = 0x60000000;
1650
1651 code[0] |= i->tex.r << 9;
1652 code[0] |= i->tex.s << 17;
1653
1654 code[0] |= (i->tex.mask & 0x3) << 25;
1655 code[1] |= (i->tex.mask & 0xc) << 12;
1656
1657 defId(i->def(0), 2);
1658
1659 emitFlagsRd(i);
1660 }
1661
1662 void
1663 CodeEmitterNV50::emitTEXPREP(const TexInstruction *i)
1664 {
1665 code[0] = 0xf8000001 | (3 << 22) | (i->tex.s << 17) | (i->tex.r << 9);
1666 code[1] = 0x60010000;
1667
1668 code[0] |= (i->tex.mask & 0x3) << 25;
1669 code[1] |= (i->tex.mask & 0xc) << 12;
1670 defId(i->def(0), 2);
1671
1672 emitFlagsRd(i);
1673 }
1674
1675 void
1676 CodeEmitterNV50::emitPRERETEmu(const FlowInstruction *i)
1677 {
1678 uint32_t pos = i->target.bb->binPos + 8; // +8 to skip an op */
1679
1680 code[0] = 0x10000003; // bra
1681 code[1] = 0x00000780; // always
1682
1683 switch (i->subOp) {
1684 case NV50_IR_SUBOP_EMU_PRERET + 0: // bra to the call
1685 break;
1686 case NV50_IR_SUBOP_EMU_PRERET + 1: // bra to skip the call
1687 pos += 8;
1688 break;
1689 default:
1690 assert(i->subOp == (NV50_IR_SUBOP_EMU_PRERET + 2));
1691 code[0] = 0x20000003; // call
1692 code[1] = 0x00000000; // no predicate
1693 break;
1694 }
1695 addReloc(RelocEntry::TYPE_CODE, 0, pos, 0x07fff800, 9);
1696 addReloc(RelocEntry::TYPE_CODE, 1, pos, 0x000fc000, -4);
1697 }
1698
1699 void
1700 CodeEmitterNV50::emitFlow(const Instruction *i, uint8_t flowOp)
1701 {
1702 const FlowInstruction *f = i->asFlow();
1703 bool hasPred = false;
1704 bool hasTarg = false;
1705
1706 code[0] = 0x00000003 | (flowOp << 28);
1707 code[1] = 0x00000000;
1708
1709 switch (i->op) {
1710 case OP_BRA:
1711 hasPred = true;
1712 hasTarg = true;
1713 break;
1714 case OP_BREAK:
1715 case OP_BRKPT:
1716 case OP_DISCARD:
1717 case OP_RET:
1718 hasPred = true;
1719 break;
1720 case OP_CALL:
1721 case OP_PREBREAK:
1722 case OP_JOINAT:
1723 hasTarg = true;
1724 break;
1725 case OP_PRERET:
1726 hasTarg = true;
1727 if (i->subOp >= NV50_IR_SUBOP_EMU_PRERET) {
1728 emitPRERETEmu(f);
1729 return;
1730 }
1731 break;
1732 default:
1733 break;
1734 }
1735
1736 if (hasPred)
1737 emitFlagsRd(i);
1738
1739 if (hasTarg && f) {
1740 uint32_t pos;
1741
1742 if (f->op == OP_CALL) {
1743 if (f->builtin) {
1744 pos = targNV50->getBuiltinOffset(f->target.builtin);
1745 } else {
1746 pos = f->target.fn->binPos;
1747 }
1748 } else {
1749 pos = f->target.bb->binPos;
1750 }
1751
1752 code[0] |= ((pos >> 2) & 0xffff) << 11;
1753 code[1] |= ((pos >> 18) & 0x003f) << 14;
1754
1755 RelocEntry::Type relocTy;
1756
1757 relocTy = f->builtin ? RelocEntry::TYPE_BUILTIN : RelocEntry::TYPE_CODE;
1758
1759 addReloc(relocTy, 0, pos, 0x07fff800, 9);
1760 addReloc(relocTy, 1, pos, 0x000fc000, -4);
1761 }
1762 }
1763
1764 void
1765 CodeEmitterNV50::emitBAR(const Instruction *i)
1766 {
1767 ImmediateValue *barId = i->getSrc(0)->asImm();
1768 assert(barId);
1769
1770 code[0] = 0x82000003 | (barId->reg.data.u32 << 21);
1771 code[1] = 0x00004000;
1772
1773 if (i->subOp == NV50_IR_SUBOP_BAR_SYNC)
1774 code[0] |= 1 << 26;
1775 }
1776
1777 void
1778 CodeEmitterNV50::emitATOM(const Instruction *i)
1779 {
1780 uint8_t subOp;
1781 switch (i->subOp) {
1782 case NV50_IR_SUBOP_ATOM_ADD: subOp = 0x0; break;
1783 case NV50_IR_SUBOP_ATOM_MIN: subOp = 0x7; break;
1784 case NV50_IR_SUBOP_ATOM_MAX: subOp = 0x6; break;
1785 case NV50_IR_SUBOP_ATOM_INC: subOp = 0x4; break;
1786 case NV50_IR_SUBOP_ATOM_DEC: subOp = 0x5; break;
1787 case NV50_IR_SUBOP_ATOM_AND: subOp = 0xa; break;
1788 case NV50_IR_SUBOP_ATOM_OR: subOp = 0xb; break;
1789 case NV50_IR_SUBOP_ATOM_XOR: subOp = 0xc; break;
1790 case NV50_IR_SUBOP_ATOM_CAS: subOp = 0x2; break;
1791 case NV50_IR_SUBOP_ATOM_EXCH: subOp = 0x1; break;
1792 default:
1793 assert(!"invalid subop");
1794 return;
1795 }
1796 code[0] = 0xd0000001;
1797 code[1] = 0xe0c00000 | (subOp << 2);
1798 if (isSignedType(i->dType))
1799 code[1] |= 1 << 21;
1800
1801 // args
1802 emitFlagsRd(i);
1803 setDst(i, 0);
1804 setSrc(i, 1, 1);
1805 if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
1806 setSrc(i, 2, 2);
1807
1808 // g[] pointer
1809 code[0] |= i->getSrc(0)->reg.fileIndex << 23;
1810 srcId(i->getIndirect(0, 0), 9);
1811 }
1812
1813 bool
1814 CodeEmitterNV50::emitInstruction(Instruction *insn)
1815 {
1816 if (!insn->encSize) {
1817 ERROR("skipping unencodable instruction: "); insn->print();
1818 return false;
1819 } else
1820 if (codeSize + insn->encSize > codeSizeLimit) {
1821 ERROR("code emitter output buffer too small\n");
1822 return false;
1823 }
1824
1825 if (insn->bb->getProgram()->dbgFlags & NV50_IR_DEBUG_BASIC) {
1826 INFO("EMIT: "); insn->print();
1827 }
1828
1829 switch (insn->op) {
1830 case OP_MOV:
1831 emitMOV(insn);
1832 break;
1833 case OP_EXIT:
1834 case OP_NOP:
1835 case OP_JOIN:
1836 emitNOP();
1837 break;
1838 case OP_VFETCH:
1839 case OP_LOAD:
1840 emitLOAD(insn);
1841 break;
1842 case OP_EXPORT:
1843 case OP_STORE:
1844 emitSTORE(insn);
1845 break;
1846 case OP_PFETCH:
1847 emitPFETCH(insn);
1848 break;
1849 case OP_RDSV:
1850 emitRDSV(insn);
1851 break;
1852 case OP_LINTERP:
1853 case OP_PINTERP:
1854 emitINTERP(insn);
1855 break;
1856 case OP_ADD:
1857 case OP_SUB:
1858 if (insn->dType == TYPE_F64)
1859 emitDADD(insn);
1860 else if (isFloatType(insn->dType))
1861 emitFADD(insn);
1862 else if (insn->getDef(0)->reg.file == FILE_ADDRESS)
1863 emitAADD(insn);
1864 else
1865 emitUADD(insn);
1866 break;
1867 case OP_MUL:
1868 if (insn->dType == TYPE_F64)
1869 emitDMUL(insn);
1870 else if (isFloatType(insn->dType))
1871 emitFMUL(insn);
1872 else
1873 emitIMUL(insn);
1874 break;
1875 case OP_MAD:
1876 case OP_FMA:
1877 if (insn->dType == TYPE_F64)
1878 emitDMAD(insn);
1879 else if (isFloatType(insn->dType))
1880 emitFMAD(insn);
1881 else
1882 emitIMAD(insn);
1883 break;
1884 case OP_SAD:
1885 emitISAD(insn);
1886 break;
1887 case OP_NOT:
1888 emitNOT(insn);
1889 break;
1890 case OP_AND:
1891 case OP_OR:
1892 case OP_XOR:
1893 emitLogicOp(insn);
1894 break;
1895 case OP_SHL:
1896 case OP_SHR:
1897 emitShift(insn);
1898 break;
1899 case OP_SET:
1900 emitSET(insn);
1901 break;
1902 case OP_MIN:
1903 case OP_MAX:
1904 emitMINMAX(insn);
1905 break;
1906 case OP_CEIL:
1907 case OP_FLOOR:
1908 case OP_TRUNC:
1909 case OP_ABS:
1910 case OP_NEG:
1911 case OP_SAT:
1912 emitCVT(insn);
1913 break;
1914 case OP_CVT:
1915 if (insn->def(0).getFile() == FILE_ADDRESS)
1916 emitARL(insn, 0);
1917 else
1918 if (insn->def(0).getFile() == FILE_FLAGS ||
1919 insn->src(0).getFile() == FILE_FLAGS ||
1920 insn->src(0).getFile() == FILE_ADDRESS)
1921 emitMOV(insn);
1922 else
1923 emitCVT(insn);
1924 break;
1925 case OP_RCP:
1926 emitSFnOp(insn, 0);
1927 break;
1928 case OP_RSQ:
1929 emitSFnOp(insn, 2);
1930 break;
1931 case OP_LG2:
1932 emitSFnOp(insn, 3);
1933 break;
1934 case OP_SIN:
1935 emitSFnOp(insn, 4);
1936 break;
1937 case OP_COS:
1938 emitSFnOp(insn, 5);
1939 break;
1940 case OP_EX2:
1941 emitSFnOp(insn, 6);
1942 break;
1943 case OP_PRESIN:
1944 case OP_PREEX2:
1945 emitPreOp(insn);
1946 break;
1947 case OP_TEX:
1948 case OP_TXB:
1949 case OP_TXL:
1950 case OP_TXF:
1951 case OP_TXG:
1952 case OP_TXLQ:
1953 emitTEX(insn->asTex());
1954 break;
1955 case OP_TXQ:
1956 emitTXQ(insn->asTex());
1957 break;
1958 case OP_TEXPREP:
1959 emitTEXPREP(insn->asTex());
1960 break;
1961 case OP_EMIT:
1962 case OP_RESTART:
1963 emitOUT(insn);
1964 break;
1965 case OP_DISCARD:
1966 emitFlow(insn, 0x0);
1967 break;
1968 case OP_BRA:
1969 emitFlow(insn, 0x1);
1970 break;
1971 case OP_CALL:
1972 emitFlow(insn, 0x2);
1973 break;
1974 case OP_RET:
1975 emitFlow(insn, 0x3);
1976 break;
1977 case OP_PREBREAK:
1978 emitFlow(insn, 0x4);
1979 break;
1980 case OP_BREAK:
1981 emitFlow(insn, 0x5);
1982 break;
1983 case OP_QUADON:
1984 emitFlow(insn, 0x6);
1985 break;
1986 case OP_QUADPOP:
1987 emitFlow(insn, 0x7);
1988 break;
1989 case OP_JOINAT:
1990 emitFlow(insn, 0xa);
1991 break;
1992 case OP_PRERET:
1993 emitFlow(insn, 0xd);
1994 break;
1995 case OP_QUADOP:
1996 emitQUADOP(insn, insn->lanes, insn->subOp);
1997 break;
1998 case OP_DFDX:
1999 emitQUADOP(insn, 4, insn->src(0).mod.neg() ? 0x66 : 0x99);
2000 break;
2001 case OP_DFDY:
2002 emitQUADOP(insn, 5, insn->src(0).mod.neg() ? 0x5a : 0xa5);
2003 break;
2004 case OP_ATOM:
2005 emitATOM(insn);
2006 break;
2007 case OP_BAR:
2008 emitBAR(insn);
2009 break;
2010 case OP_PHI:
2011 case OP_UNION:
2012 case OP_CONSTRAINT:
2013 ERROR("operation should have been eliminated\n");
2014 return false;
2015 case OP_EXP:
2016 case OP_LOG:
2017 case OP_SQRT:
2018 case OP_POW:
2019 case OP_SELP:
2020 case OP_SLCT:
2021 case OP_TXD:
2022 case OP_PRECONT:
2023 case OP_CONT:
2024 case OP_POPCNT:
2025 case OP_INSBF:
2026 case OP_EXTBF:
2027 ERROR("operation should have been lowered\n");
2028 return false;
2029 default:
2030 ERROR("unknown op: %u\n", insn->op);
2031 return false;
2032 }
2033 if (insn->join || insn->op == OP_JOIN)
2034 code[1] |= 0x2;
2035 else
2036 if (insn->exit || insn->op == OP_EXIT)
2037 code[1] |= 0x1;
2038
2039 assert((insn->encSize == 8) == (code[0] & 1));
2040
2041 code += insn->encSize / 4;
2042 codeSize += insn->encSize;
2043 return true;
2044 }
2045
2046 uint32_t
2047 CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const
2048 {
2049 const Target::OpInfo &info = targ->getOpInfo(i);
2050
2051 if (info.minEncSize > 4 || i->dType == TYPE_F64)
2052 return 8;
2053
2054 // check constraints on dst and src operands
2055 for (int d = 0; i->defExists(d); ++d) {
2056 if (i->def(d).rep()->reg.data.id > 63 ||
2057 i->def(d).rep()->reg.file != FILE_GPR)
2058 return 8;
2059 }
2060
2061 for (int s = 0; i->srcExists(s); ++s) {
2062 DataFile sf = i->src(s).getFile();
2063 if (sf != FILE_GPR)
2064 if (sf != FILE_SHADER_INPUT || progType != Program::TYPE_FRAGMENT)
2065 return 8;
2066 if (i->src(s).rep()->reg.data.id > 63)
2067 return 8;
2068 }
2069
2070 // check modifiers & rounding
2071 if (i->join || i->lanes != 0xf || i->exit)
2072 return 8;
2073 if (i->op == OP_MUL && i->rnd != ROUND_N)
2074 return 8;
2075
2076 if (i->asTex())
2077 return 8; // TODO: short tex encoding
2078
2079 // check constraints on short MAD
2080 if (info.srcNr >= 2 && i->srcExists(2)) {
2081 if (!i->defExists(0) ||
2082 (i->flagsSrc >= 0 && SDATA(i->src(i->flagsSrc)).id > 0) ||
2083 DDATA(i->def(0)).id != SDATA(i->src(2)).id)
2084 return 8;
2085 }
2086
2087 return info.minEncSize;
2088 }
2089
2090 // Change the encoding size of an instruction after BBs have been scheduled.
2091 static void
2092 makeInstructionLong(Instruction *insn)
2093 {
2094 if (insn->encSize == 8)
2095 return;
2096 Function *fn = insn->bb->getFunction();
2097 int n = 0;
2098 int adj = 4;
2099
2100 for (Instruction *i = insn->next; i && i->encSize == 4; ++n, i = i->next);
2101
2102 if (n & 1) {
2103 adj = 8;
2104 insn->next->encSize = 8;
2105 } else
2106 if (insn->prev && insn->prev->encSize == 4) {
2107 adj = 8;
2108 insn->prev->encSize = 8;
2109 }
2110 insn->encSize = 8;
2111
2112 for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) {
2113 fn->bbArray[i]->binPos += 4;
2114 }
2115 fn->binSize += adj;
2116 insn->bb->binSize += adj;
2117 }
2118
2119 static bool
2120 trySetExitModifier(Instruction *insn)
2121 {
2122 if (insn->op == OP_DISCARD ||
2123 insn->op == OP_QUADON ||
2124 insn->op == OP_QUADPOP)
2125 return false;
2126 for (int s = 0; insn->srcExists(s); ++s)
2127 if (insn->src(s).getFile() == FILE_IMMEDIATE)
2128 return false;
2129 if (insn->asFlow()) {
2130 if (insn->op == OP_CALL) // side effects !
2131 return false;
2132 if (insn->getPredicate()) // cannot do conditional exit (or can we ?)
2133 return false;
2134 insn->op = OP_EXIT;
2135 }
2136 insn->exit = 1;
2137 makeInstructionLong(insn);
2138 return true;
2139 }
2140
2141 static void
2142 replaceExitWithModifier(Function *func)
2143 {
2144 BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
2145
2146 if (!epilogue->getExit() ||
2147 epilogue->getExit()->op != OP_EXIT) // only main will use OP_EXIT
2148 return;
2149
2150 if (epilogue->getEntry()->op != OP_EXIT) {
2151 Instruction *insn = epilogue->getExit()->prev;
2152 if (!insn || !trySetExitModifier(insn))
2153 return;
2154 insn->exit = 1;
2155 } else {
2156 for (Graph::EdgeIterator ei = func->cfgExit->incident();
2157 !ei.end(); ei.next()) {
2158 BasicBlock *bb = BasicBlock::get(ei.getNode());
2159 Instruction *i = bb->getExit();
2160
2161 if (!i || !trySetExitModifier(i))
2162 return;
2163 }
2164 }
2165 epilogue->binSize -= 8;
2166 func->binSize -= 8;
2167 delete_Instruction(func->getProgram(), epilogue->getExit());
2168 }
2169
2170 void
2171 CodeEmitterNV50::prepareEmission(Function *func)
2172 {
2173 CodeEmitter::prepareEmission(func);
2174
2175 replaceExitWithModifier(func);
2176 }
2177
2178 CodeEmitterNV50::CodeEmitterNV50(const TargetNV50 *target) :
2179 CodeEmitter(target), targNV50(target)
2180 {
2181 targ = target; // specialized
2182 code = NULL;
2183 codeSize = codeSizeLimit = 0;
2184 relocInfo = NULL;
2185 }
2186
2187 CodeEmitter *
2188 TargetNV50::getCodeEmitter(Program::Type type)
2189 {
2190 CodeEmitterNV50 *emit = new CodeEmitterNV50(this);
2191 emit->setProgramType(type);
2192 return emit;
2193 }
2194
2195 } // namespace nv50_ir