nouveau/codegen: set dType to S32 for OP_NEG U32
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_emit_nv50.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "codegen/nv50_ir.h"
24 #include "codegen/nv50_ir_target_nv50.h"
25
26 namespace nv50_ir {
27
28 #define NV50_OP_ENC_LONG 0
29 #define NV50_OP_ENC_SHORT 1
30 #define NV50_OP_ENC_IMM 2
31 #define NV50_OP_ENC_LONG_ALT 3
32
33 class CodeEmitterNV50 : public CodeEmitter
34 {
35 public:
36 CodeEmitterNV50(const TargetNV50 *);
37
38 virtual bool emitInstruction(Instruction *);
39
40 virtual uint32_t getMinEncodingSize(const Instruction *) const;
41
42 inline void setProgramType(Program::Type pType) { progType = pType; }
43
44 virtual void prepareEmission(Function *);
45
46 private:
47 Program::Type progType;
48
49 const TargetNV50 *targNV50;
50
51 private:
52 inline void defId(const ValueDef&, const int pos);
53 inline void srcId(const ValueRef&, const int pos);
54 inline void srcId(const ValueRef *, const int pos);
55
56 inline void srcAddr16(const ValueRef&, bool adj, const int pos);
57 inline void srcAddr8(const ValueRef&, const int pos);
58
59 void emitFlagsRd(const Instruction *);
60 void emitFlagsWr(const Instruction *);
61
62 void emitCondCode(CondCode cc, DataType ty, int pos);
63
64 inline void setARegBits(unsigned int);
65
66 void setAReg16(const Instruction *, int s);
67 void setImmediate(const Instruction *, int s);
68
69 void setDst(const Value *);
70 void setDst(const Instruction *, int d);
71 void setSrcFileBits(const Instruction *, int enc);
72 void setSrc(const Instruction *, unsigned int s, int slot);
73
74 void emitForm_MAD(const Instruction *);
75 void emitForm_ADD(const Instruction *);
76 void emitForm_MUL(const Instruction *);
77 void emitForm_IMM(const Instruction *);
78
79 void emitLoadStoreSizeLG(DataType ty, int pos);
80 void emitLoadStoreSizeCS(DataType ty);
81
82 void roundMode_MAD(const Instruction *);
83 void roundMode_CVT(RoundMode);
84
85 void emitMNeg12(const Instruction *);
86
87 void emitLOAD(const Instruction *);
88 void emitSTORE(const Instruction *);
89 void emitMOV(const Instruction *);
90 void emitNOP();
91 void emitINTERP(const Instruction *);
92 void emitPFETCH(const Instruction *);
93 void emitOUT(const Instruction *);
94
95 void emitUADD(const Instruction *);
96 void emitAADD(const Instruction *);
97 void emitFADD(const Instruction *);
98 void emitIMUL(const Instruction *);
99 void emitFMUL(const Instruction *);
100 void emitFMAD(const Instruction *);
101 void emitIMAD(const Instruction *);
102 void emitISAD(const Instruction *);
103
104 void emitMINMAX(const Instruction *);
105
106 void emitPreOp(const Instruction *);
107 void emitSFnOp(const Instruction *, uint8_t subOp);
108
109 void emitShift(const Instruction *);
110 void emitARL(const Instruction *, unsigned int shl);
111 void emitLogicOp(const Instruction *);
112 void emitNOT(const Instruction *);
113
114 void emitCVT(const Instruction *);
115 void emitSET(const Instruction *);
116
117 void emitTEX(const TexInstruction *);
118 void emitTXQ(const TexInstruction *);
119 void emitTEXPREP(const TexInstruction *);
120
121 void emitQUADOP(const Instruction *, uint8_t lane, uint8_t quOp);
122
123 void emitFlow(const Instruction *, uint8_t flowOp);
124 void emitPRERETEmu(const FlowInstruction *);
125 void emitBAR(const Instruction *);
126
127 void emitATOM(const Instruction *);
128 };
129
130 #define SDATA(a) ((a).rep()->reg.data)
131 #define DDATA(a) ((a).rep()->reg.data)
132
133 void CodeEmitterNV50::srcId(const ValueRef& src, const int pos)
134 {
135 assert(src.get());
136 code[pos / 32] |= SDATA(src).id << (pos % 32);
137 }
138
139 void CodeEmitterNV50::srcId(const ValueRef *src, const int pos)
140 {
141 assert(src->get());
142 code[pos / 32] |= SDATA(*src).id << (pos % 32);
143 }
144
145 void CodeEmitterNV50::srcAddr16(const ValueRef& src, bool adj, const int pos)
146 {
147 assert(src.get());
148
149 int32_t offset = SDATA(src).offset;
150
151 assert(!adj || src.get()->reg.size <= 4);
152 if (adj)
153 offset /= src.get()->reg.size;
154
155 assert(offset <= 0x7fff && offset >= (int32_t)-0x8000 && (pos % 32) <= 16);
156
157 if (offset < 0)
158 offset &= adj ? (0xffff >> (src.get()->reg.size >> 1)) : 0xffff;
159
160 code[pos / 32] |= offset << (pos % 32);
161 }
162
163 void CodeEmitterNV50::srcAddr8(const ValueRef& src, const int pos)
164 {
165 assert(src.get());
166
167 uint32_t offset = SDATA(src).offset;
168
169 assert((offset <= 0x1fc || offset == 0x3fc) && !(offset & 0x3));
170
171 code[pos / 32] |= (offset >> 2) << (pos % 32);
172 }
173
174 void CodeEmitterNV50::defId(const ValueDef& def, const int pos)
175 {
176 assert(def.get() && def.getFile() != FILE_SHADER_OUTPUT);
177
178 code[pos / 32] |= DDATA(def).id << (pos % 32);
179 }
180
181 void
182 CodeEmitterNV50::roundMode_MAD(const Instruction *insn)
183 {
184 switch (insn->rnd) {
185 case ROUND_M: code[1] |= 1 << 22; break;
186 case ROUND_P: code[1] |= 2 << 22; break;
187 case ROUND_Z: code[1] |= 3 << 22; break;
188 default:
189 assert(insn->rnd == ROUND_N);
190 break;
191 }
192 }
193
194 void
195 CodeEmitterNV50::emitMNeg12(const Instruction *i)
196 {
197 code[1] |= i->src(0).mod.neg() << 26;
198 code[1] |= i->src(1).mod.neg() << 27;
199 }
200
201 void CodeEmitterNV50::emitCondCode(CondCode cc, DataType ty, int pos)
202 {
203 uint8_t enc;
204
205 assert(pos >= 32 || pos <= 27);
206
207 switch (cc) {
208 case CC_LT: enc = 0x1; break;
209 case CC_LTU: enc = 0x9; break;
210 case CC_EQ: enc = 0x2; break;
211 case CC_EQU: enc = 0xa; break;
212 case CC_LE: enc = 0x3; break;
213 case CC_LEU: enc = 0xb; break;
214 case CC_GT: enc = 0x4; break;
215 case CC_GTU: enc = 0xc; break;
216 case CC_NE: enc = 0x5; break;
217 case CC_NEU: enc = 0xd; break;
218 case CC_GE: enc = 0x6; break;
219 case CC_GEU: enc = 0xe; break;
220 case CC_TR: enc = 0xf; break;
221 case CC_FL: enc = 0x0; break;
222
223 case CC_O: enc = 0x10; break;
224 case CC_C: enc = 0x11; break;
225 case CC_A: enc = 0x12; break;
226 case CC_S: enc = 0x13; break;
227 case CC_NS: enc = 0x1c; break;
228 case CC_NA: enc = 0x1d; break;
229 case CC_NC: enc = 0x1e; break;
230 case CC_NO: enc = 0x1f; break;
231
232 default:
233 enc = 0;
234 assert(!"invalid condition code");
235 break;
236 }
237 if (ty != TYPE_NONE && !isFloatType(ty))
238 enc &= ~0x8; // unordered only exists for float types
239
240 code[pos / 32] |= enc << (pos % 32);
241 }
242
243 void
244 CodeEmitterNV50::emitFlagsRd(const Instruction *i)
245 {
246 int s = (i->flagsSrc >= 0) ? i->flagsSrc : i->predSrc;
247
248 assert(!(code[1] & 0x00003f80));
249
250 if (s >= 0) {
251 assert(i->getSrc(s)->reg.file == FILE_FLAGS);
252 emitCondCode(i->cc, TYPE_NONE, 32 + 7);
253 srcId(i->src(s), 32 + 12);
254 } else {
255 code[1] |= 0x0780;
256 }
257 }
258
259 void
260 CodeEmitterNV50::emitFlagsWr(const Instruction *i)
261 {
262 assert(!(code[1] & 0x70));
263
264 int flagsDef = i->flagsDef;
265
266 // find flags definition and check that it is the last def
267 if (flagsDef < 0) {
268 for (int d = 0; i->defExists(d); ++d)
269 if (i->def(d).getFile() == FILE_FLAGS)
270 flagsDef = d;
271 if (flagsDef >= 0 && 0) // TODO: enforce use of flagsDef at some point
272 WARN("Instruction::flagsDef was not set properly\n");
273 }
274 if (flagsDef == 0 && i->defExists(1))
275 WARN("flags def should not be the primary definition\n");
276
277 if (flagsDef >= 0)
278 code[1] |= (DDATA(i->def(flagsDef)).id << 4) | 0x40;
279
280 }
281
282 void
283 CodeEmitterNV50::setARegBits(unsigned int u)
284 {
285 code[0] |= (u & 3) << 26;
286 code[1] |= (u & 4);
287 }
288
289 void
290 CodeEmitterNV50::setAReg16(const Instruction *i, int s)
291 {
292 if (i->srcExists(s)) {
293 s = i->src(s).indirect[0];
294 if (s >= 0)
295 setARegBits(SDATA(i->src(s)).id + 1);
296 }
297 }
298
299 void
300 CodeEmitterNV50::setImmediate(const Instruction *i, int s)
301 {
302 const ImmediateValue *imm = i->src(s).get()->asImm();
303 assert(imm);
304
305 uint32_t u = imm->reg.data.u32;
306
307 if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT))
308 u = ~u;
309
310 code[1] |= 3;
311 code[0] |= (u & 0x3f) << 16;
312 code[1] |= (u >> 6) << 2;
313 }
314
315 void
316 CodeEmitterNV50::setDst(const Value *dst)
317 {
318 const Storage *reg = &dst->join->reg;
319
320 assert(reg->file != FILE_ADDRESS);
321
322 if (reg->data.id < 0 || reg->file == FILE_FLAGS) {
323 code[0] |= (127 << 2) | 1;
324 code[1] |= 8;
325 } else {
326 int id;
327 if (reg->file == FILE_SHADER_OUTPUT) {
328 code[1] |= 8;
329 id = reg->data.offset / 4;
330 } else {
331 id = reg->data.id;
332 }
333 code[0] |= id << 2;
334 }
335 }
336
337 void
338 CodeEmitterNV50::setDst(const Instruction *i, int d)
339 {
340 if (i->defExists(d)) {
341 setDst(i->getDef(d));
342 } else
343 if (!d) {
344 code[0] |= 0x01fc; // bit bucket
345 code[1] |= 0x0008;
346 }
347 }
348
349 // 3 * 2 bits:
350 // 0: r
351 // 1: a/s
352 // 2: c
353 // 3: i
354 void
355 CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc)
356 {
357 uint8_t mode = 0;
358
359 for (unsigned int s = 0; s < Target::operationSrcNr[i->op]; ++s) {
360 switch (i->src(s).getFile()) {
361 case FILE_GPR:
362 break;
363 case FILE_MEMORY_SHARED:
364 case FILE_SHADER_INPUT:
365 mode |= 1 << (s * 2);
366 break;
367 case FILE_MEMORY_CONST:
368 mode |= 2 << (s * 2);
369 break;
370 case FILE_IMMEDIATE:
371 mode |= 3 << (s * 2);
372 break;
373 default:
374 ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile());
375 assert(0);
376 break;
377 }
378 }
379 switch (mode) {
380 case 0x00: // rrr
381 break;
382 case 0x01: // arr/grr
383 if (progType == Program::TYPE_GEOMETRY) {
384 code[0] |= 0x01800000;
385 if (enc == NV50_OP_ENC_LONG || enc == NV50_OP_ENC_LONG_ALT)
386 code[1] |= 0x00200000;
387 } else {
388 if (enc == NV50_OP_ENC_SHORT)
389 code[0] |= 0x01000000;
390 else
391 code[1] |= 0x00200000;
392 }
393 break;
394 case 0x03: // irr
395 assert(i->op == OP_MOV);
396 return;
397 case 0x0c: // rir
398 break;
399 case 0x0d: // gir
400 code[0] |= 0x01000000;
401 assert(progType == Program::TYPE_GEOMETRY ||
402 progType == Program::TYPE_COMPUTE);
403 break;
404 case 0x08: // rcr
405 code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
406 code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
407 break;
408 case 0x09: // acr/gcr
409 if (progType == Program::TYPE_GEOMETRY) {
410 code[0] |= 0x01800000;
411 } else {
412 code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
413 code[1] |= 0x00200000;
414 }
415 code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
416 break;
417 case 0x20: // rrc
418 code[0] |= 0x01000000;
419 code[1] |= (i->getSrc(2)->reg.fileIndex << 22);
420 break;
421 case 0x21: // arc
422 code[0] |= 0x01000000;
423 code[1] |= 0x00200000 | (i->getSrc(2)->reg.fileIndex << 22);
424 assert(progType != Program::TYPE_GEOMETRY);
425 break;
426 default:
427 ERROR("not encodable: %x\n", mode);
428 assert(0);
429 break;
430 }
431 if (progType != Program::TYPE_COMPUTE)
432 return;
433
434 if ((mode & 3) == 1) {
435 const int pos = i->src(1).getFile() == FILE_IMMEDIATE ? 13 : 14;
436
437 switch (i->getSrc(0)->reg.type) {
438 case TYPE_U8:
439 break;
440 case TYPE_U16:
441 code[0] |= 1 << pos;
442 break;
443 case TYPE_S16:
444 code[0] |= 2 << pos;
445 break;
446 default:
447 code[0] |= 3 << pos;
448 assert(i->getSrc(0)->reg.size == 4);
449 break;
450 }
451 }
452 }
453
454 void
455 CodeEmitterNV50::setSrc(const Instruction *i, unsigned int s, int slot)
456 {
457 if (Target::operationSrcNr[i->op] <= s)
458 return;
459 const Storage *reg = &i->src(s).rep()->reg;
460
461 unsigned int id = (reg->file == FILE_GPR) ?
462 reg->data.id :
463 reg->data.offset >> (reg->size >> 1); // no > 4 byte sources here
464
465 switch (slot) {
466 case 0: code[0] |= id << 9; break;
467 case 1: code[0] |= id << 16; break;
468 case 2: code[1] |= id << 14; break;
469 default:
470 assert(0);
471 break;
472 }
473 }
474
475 // the default form:
476 // - long instruction
477 // - 1 to 3 sources in slots 0, 1, 2 (rrr, arr, rcr, acr, rrc, arc, gcr, grr)
478 // - address & flags
479 void
480 CodeEmitterNV50::emitForm_MAD(const Instruction *i)
481 {
482 assert(i->encSize == 8);
483 code[0] |= 1;
484
485 emitFlagsRd(i);
486 emitFlagsWr(i);
487
488 setDst(i, 0);
489
490 setSrcFileBits(i, NV50_OP_ENC_LONG);
491 setSrc(i, 0, 0);
492 setSrc(i, 1, 1);
493 setSrc(i, 2, 2);
494
495 setAReg16(i, 1);
496 }
497
498 // like default form, but 2nd source in slot 2, and no 3rd source
499 void
500 CodeEmitterNV50::emitForm_ADD(const Instruction *i)
501 {
502 assert(i->encSize == 8);
503 code[0] |= 1;
504
505 emitFlagsRd(i);
506 emitFlagsWr(i);
507
508 setDst(i, 0);
509
510 setSrcFileBits(i, NV50_OP_ENC_LONG_ALT);
511 setSrc(i, 0, 0);
512 setSrc(i, 1, 2);
513
514 setAReg16(i, 1);
515 }
516
517 // default short form (rr, ar, rc, gr)
518 void
519 CodeEmitterNV50::emitForm_MUL(const Instruction *i)
520 {
521 assert(i->encSize == 4 && !(code[0] & 1));
522 assert(i->defExists(0));
523 assert(!i->getPredicate());
524
525 setDst(i, 0);
526
527 setSrcFileBits(i, NV50_OP_ENC_SHORT);
528 setSrc(i, 0, 0);
529 setSrc(i, 1, 1);
530 }
531
532 // usual immediate form
533 // - 1 to 3 sources where last is immediate (rir, gir)
534 // - no address or predicate possible
535 void
536 CodeEmitterNV50::emitForm_IMM(const Instruction *i)
537 {
538 assert(i->encSize == 8);
539 code[0] |= 1;
540
541 assert(i->defExists(0) && i->srcExists(0));
542
543 setDst(i, 0);
544
545 setSrcFileBits(i, NV50_OP_ENC_IMM);
546 if (Target::operationSrcNr[i->op] > 1) {
547 setSrc(i, 0, 0);
548 setImmediate(i, 1);
549 setSrc(i, 2, 1);
550 } else {
551 setImmediate(i, 0);
552 }
553 }
554
555 void
556 CodeEmitterNV50::emitLoadStoreSizeLG(DataType ty, int pos)
557 {
558 uint8_t enc;
559
560 switch (ty) {
561 case TYPE_F32: // fall through
562 case TYPE_S32: // fall through
563 case TYPE_U32: enc = 0x6; break;
564 case TYPE_B128: enc = 0x5; break;
565 case TYPE_F64: // fall through
566 case TYPE_S64: // fall through
567 case TYPE_U64: enc = 0x4; break;
568 case TYPE_S16: enc = 0x3; break;
569 case TYPE_U16: enc = 0x2; break;
570 case TYPE_S8: enc = 0x1; break;
571 case TYPE_U8: enc = 0x0; break;
572 default:
573 enc = 0;
574 assert(!"invalid load/store type");
575 break;
576 }
577 code[pos / 32] |= enc << (pos % 32);
578 }
579
580 void
581 CodeEmitterNV50::emitLoadStoreSizeCS(DataType ty)
582 {
583 switch (ty) {
584 case TYPE_U8: break;
585 case TYPE_U16: code[1] |= 0x4000; break;
586 case TYPE_S16: code[1] |= 0x8000; break;
587 case TYPE_F32:
588 case TYPE_S32:
589 case TYPE_U32: code[1] |= 0xc000; break;
590 default:
591 assert(0);
592 break;
593 }
594 }
595
596 void
597 CodeEmitterNV50::emitLOAD(const Instruction *i)
598 {
599 DataFile sf = i->src(0).getFile();
600 int32_t offset = i->getSrc(0)->reg.data.offset;
601
602 switch (sf) {
603 case FILE_SHADER_INPUT:
604 // use 'mov' where we can
605 code[0] = i->src(0).isIndirect(0) ? 0x00000001 : 0x10000001;
606 code[1] = 0x00200000 | (i->lanes << 14);
607 if (typeSizeof(i->dType) == 4)
608 code[1] |= 0x04000000;
609 break;
610 case FILE_MEMORY_SHARED:
611 if (targ->getChipset() >= 0x84) {
612 assert(offset <= (int32_t)(0x3fff * typeSizeof(i->sType)));
613 code[0] = 0x10000001;
614 code[1] = 0x40000000;
615
616 if (typeSizeof(i->dType) == 4)
617 code[1] |= 0x04000000;
618
619 emitLoadStoreSizeCS(i->sType);
620 } else {
621 assert(offset <= (int32_t)(0x1f * typeSizeof(i->sType)));
622 code[0] = 0x10000001;
623 code[1] = 0x00200000 | (i->lanes << 14);
624 emitLoadStoreSizeCS(i->sType);
625 }
626 break;
627 case FILE_MEMORY_CONST:
628 code[0] = 0x10000001;
629 code[1] = 0x20000000 | (i->getSrc(0)->reg.fileIndex << 22);
630 if (typeSizeof(i->dType) == 4)
631 code[1] |= 0x04000000;
632 emitLoadStoreSizeCS(i->sType);
633 break;
634 case FILE_MEMORY_LOCAL:
635 code[0] = 0xd0000001;
636 code[1] = 0x40000000;
637 break;
638 case FILE_MEMORY_GLOBAL:
639 code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
640 code[1] = 0x80000000;
641 break;
642 default:
643 assert(!"invalid load source file");
644 break;
645 }
646 if (sf == FILE_MEMORY_LOCAL ||
647 sf == FILE_MEMORY_GLOBAL)
648 emitLoadStoreSizeLG(i->sType, 21 + 32);
649
650 setDst(i, 0);
651
652 emitFlagsRd(i);
653 emitFlagsWr(i);
654
655 if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
656 srcId(*i->src(0).getIndirect(0), 9);
657 } else {
658 setAReg16(i, 0);
659 srcAddr16(i->src(0), i->src(0).getFile() != FILE_MEMORY_LOCAL, 9);
660 }
661 }
662
663 void
664 CodeEmitterNV50::emitSTORE(const Instruction *i)
665 {
666 DataFile f = i->getSrc(0)->reg.file;
667 int32_t offset = i->getSrc(0)->reg.data.offset;
668
669 switch (f) {
670 case FILE_SHADER_OUTPUT:
671 code[0] = 0x00000001 | ((offset >> 2) << 9);
672 code[1] = 0x80c00000;
673 srcId(i->src(1), 32 + 14);
674 break;
675 case FILE_MEMORY_GLOBAL:
676 code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
677 code[1] = 0xa0000000;
678 emitLoadStoreSizeLG(i->dType, 21 + 32);
679 srcId(i->src(1), 2);
680 break;
681 case FILE_MEMORY_LOCAL:
682 code[0] = 0xd0000001;
683 code[1] = 0x60000000;
684 emitLoadStoreSizeLG(i->dType, 21 + 32);
685 srcId(i->src(1), 2);
686 break;
687 case FILE_MEMORY_SHARED:
688 code[0] = 0x00000001;
689 code[1] = 0xe0000000;
690 switch (typeSizeof(i->dType)) {
691 case 1:
692 code[0] |= offset << 9;
693 code[1] |= 0x00400000;
694 break;
695 case 2:
696 code[0] |= (offset >> 1) << 9;
697 break;
698 case 4:
699 code[0] |= (offset >> 2) << 9;
700 code[1] |= 0x04200000;
701 break;
702 default:
703 assert(0);
704 break;
705 }
706 srcId(i->src(1), 32 + 14);
707 break;
708 default:
709 assert(!"invalid store destination file");
710 break;
711 }
712
713 if (f == FILE_MEMORY_GLOBAL)
714 srcId(*i->src(0).getIndirect(0), 9);
715 else
716 setAReg16(i, 0);
717
718 if (f == FILE_MEMORY_LOCAL)
719 srcAddr16(i->src(0), false, 9);
720
721 emitFlagsRd(i);
722 }
723
724 void
725 CodeEmitterNV50::emitMOV(const Instruction *i)
726 {
727 DataFile sf = i->getSrc(0)->reg.file;
728 DataFile df = i->getDef(0)->reg.file;
729
730 assert(sf == FILE_GPR || df == FILE_GPR);
731
732 if (sf == FILE_FLAGS) {
733 code[0] = 0x00000001;
734 code[1] = 0x20000000;
735 defId(i->def(0), 2);
736 srcId(i->src(0), 12);
737 emitFlagsRd(i);
738 } else
739 if (sf == FILE_ADDRESS) {
740 code[0] = 0x00000001;
741 code[1] = 0x40000000;
742 defId(i->def(0), 2);
743 setARegBits(SDATA(i->src(0)).id + 1);
744 emitFlagsRd(i);
745 } else
746 if (df == FILE_FLAGS) {
747 code[0] = 0x00000001;
748 code[1] = 0xa0000000;
749 defId(i->def(0), 4);
750 srcId(i->src(0), 9);
751 emitFlagsRd(i);
752 } else
753 if (sf == FILE_IMMEDIATE) {
754 code[0] = 0x10008001;
755 code[1] = 0x00000003;
756 emitForm_IMM(i);
757 } else {
758 if (i->encSize == 4) {
759 code[0] = 0x10008000;
760 } else {
761 code[0] = 0x10000001;
762 code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
763 code[1] |= (i->lanes << 14);
764 emitFlagsRd(i);
765 }
766 defId(i->def(0), 2);
767 srcId(i->src(0), 9);
768 }
769 if (df == FILE_SHADER_OUTPUT) {
770 assert(i->encSize == 8);
771 code[1] |= 0x8;
772 }
773 }
774
775 void
776 CodeEmitterNV50::emitNOP()
777 {
778 code[0] = 0xf0000001;
779 code[1] = 0xe0000000;
780 }
781
782 void
783 CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp)
784 {
785 code[0] = 0xc0000000 | (lane << 16);
786 code[1] = 0x80000000;
787
788 code[0] |= (quOp & 0x03) << 20;
789 code[1] |= (quOp & 0xfc) << 20;
790
791 emitForm_ADD(i);
792
793 if (!i->srcExists(1))
794 srcId(i->src(0), 32 + 14);
795 }
796
797 void
798 CodeEmitterNV50::emitPFETCH(const Instruction *i)
799 {
800 code[0] = 0x11800001;
801 code[1] = 0x04200000 | (0xf << 14);
802
803 defId(i->def(0), 2);
804 srcAddr8(i->src(0), 9);
805 setAReg16(i, 0);
806 }
807
808 void
809 CodeEmitterNV50::emitINTERP(const Instruction *i)
810 {
811 code[0] = 0x80000000;
812
813 defId(i->def(0), 2);
814 srcAddr8(i->src(0), 16);
815
816 if (i->getInterpMode() == NV50_IR_INTERP_FLAT) {
817 code[0] |= 1 << 8;
818 } else {
819 if (i->op == OP_PINTERP) {
820 code[0] |= 1 << 25;
821 srcId(i->src(1), 9);
822 }
823 if (i->getSampleMode() == NV50_IR_INTERP_CENTROID)
824 code[0] |= 1 << 24;
825 }
826
827 if (i->encSize == 8) {
828 code[1] =
829 (code[0] & (3 << 24)) >> (24 - 16) |
830 (code[0] & (1 << 8)) << (18 - 8);
831 code[0] &= ~0x03000100;
832 code[0] |= 1;
833 emitFlagsRd(i);
834 }
835 }
836
837 void
838 CodeEmitterNV50::emitMINMAX(const Instruction *i)
839 {
840 if (i->dType == TYPE_F64) {
841 code[0] = 0xe0000000;
842 code[1] = (i->op == OP_MIN) ? 0xa0000000 : 0xc0000000;
843 } else {
844 code[0] = 0x30000000;
845 code[1] = 0x80000000;
846 if (i->op == OP_MIN)
847 code[1] |= 0x20000000;
848
849 switch (i->dType) {
850 case TYPE_F32: code[0] |= 0x80000000; break;
851 case TYPE_S32: code[1] |= 0x8c000000; break;
852 case TYPE_U32: code[1] |= 0x84000000; break;
853 case TYPE_S16: code[1] |= 0x80000000; break;
854 case TYPE_U16: break;
855 default:
856 assert(0);
857 break;
858 }
859 code[1] |= i->src(0).mod.abs() << 20;
860 code[1] |= i->src(1).mod.abs() << 19;
861 }
862 emitForm_MAD(i);
863 }
864
865 void
866 CodeEmitterNV50::emitFMAD(const Instruction *i)
867 {
868 const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
869 const int neg_add = i->src(2).mod.neg();
870
871 code[0] = 0xe0000000;
872
873 if (i->encSize == 4) {
874 emitForm_MUL(i);
875 assert(!neg_mul && !neg_add);
876 } else {
877 code[1] = neg_mul << 26;
878 code[1] |= neg_add << 27;
879 if (i->saturate)
880 code[1] |= 1 << 29;
881 emitForm_MAD(i);
882 }
883 }
884
885 void
886 CodeEmitterNV50::emitFADD(const Instruction *i)
887 {
888 const int neg0 = i->src(0).mod.neg();
889 const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
890
891 code[0] = 0xb0000000;
892
893 assert(!(i->src(0).mod | i->src(1).mod).abs());
894
895 if (i->src(1).getFile() == FILE_IMMEDIATE) {
896 code[1] = 0;
897 emitForm_IMM(i);
898 code[0] |= neg0 << 15;
899 code[0] |= neg1 << 22;
900 if (i->saturate)
901 code[0] |= 1 << 8;
902 } else
903 if (i->encSize == 8) {
904 code[1] = 0;
905 emitForm_ADD(i);
906 code[1] |= neg0 << 26;
907 code[1] |= neg1 << 27;
908 if (i->saturate)
909 code[1] |= 1 << 29;
910 } else {
911 emitForm_MUL(i);
912 code[0] |= neg0 << 15;
913 code[0] |= neg1 << 22;
914 if (i->saturate)
915 code[0] |= 1 << 8;
916 }
917 }
918
919 void
920 CodeEmitterNV50::emitUADD(const Instruction *i)
921 {
922 const int neg0 = i->src(0).mod.neg();
923 const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
924
925 code[0] = 0x20008000;
926
927 if (i->src(1).getFile() == FILE_IMMEDIATE) {
928 code[1] = 0;
929 emitForm_IMM(i);
930 } else
931 if (i->encSize == 8) {
932 code[0] = 0x20000000;
933 code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
934 emitForm_ADD(i);
935 } else {
936 emitForm_MUL(i);
937 }
938 assert(!(neg0 && neg1));
939 code[0] |= neg0 << 28;
940 code[0] |= neg1 << 22;
941
942 if (i->flagsSrc >= 0) {
943 // addc == sub | subr
944 assert(!(code[0] & 0x10400000) && !i->getPredicate());
945 code[0] |= 0x10400000;
946 srcId(i->src(i->flagsSrc), 32 + 12);
947 }
948 }
949
950 void
951 CodeEmitterNV50::emitAADD(const Instruction *i)
952 {
953 const int s = (i->op == OP_MOV) ? 0 : 1;
954
955 code[0] = 0xd0000001 | (i->getSrc(s)->reg.data.u16 << 9);
956 code[1] = 0x20000000;
957
958 code[0] |= (DDATA(i->def(0)).id + 1) << 2;
959
960 emitFlagsRd(i);
961
962 if (s && i->srcExists(0))
963 setARegBits(SDATA(i->src(0)).id + 1);
964 }
965
966 void
967 CodeEmitterNV50::emitIMUL(const Instruction *i)
968 {
969 code[0] = 0x40000000;
970
971 if (i->encSize == 8) {
972 code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000;
973 emitForm_MAD(i);
974 } else {
975 if (i->sType == TYPE_S16)
976 code[0] |= 0x8100;
977 emitForm_MUL(i);
978 }
979 }
980
981 void
982 CodeEmitterNV50::emitFMUL(const Instruction *i)
983 {
984 const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
985
986 code[0] = 0xc0000000;
987
988 if (i->src(1).getFile() == FILE_IMMEDIATE) {
989 code[1] = 0;
990 emitForm_IMM(i);
991 if (neg)
992 code[0] |= 0x8000;
993 } else
994 if (i->encSize == 8) {
995 code[1] = i->rnd == ROUND_Z ? 0x0000c000 : 0;
996 if (neg)
997 code[1] |= 0x08000000;
998 emitForm_MAD(i);
999 } else {
1000 emitForm_MUL(i);
1001 if (neg)
1002 code[0] |= 0x8000;
1003 }
1004 }
1005
1006 void
1007 CodeEmitterNV50::emitIMAD(const Instruction *i)
1008 {
1009 code[0] = 0x60000000;
1010 if (isSignedType(i->sType))
1011 code[1] = i->saturate ? 0x40000000 : 0x20000000;
1012 else
1013 code[1] = 0x00000000;
1014
1015 int neg1 = i->src(0).mod.neg() ^ i->src(1).mod.neg();
1016 int neg2 = i->src(2).mod.neg();
1017
1018 assert(!(neg1 & neg2));
1019 code[1] |= neg1 << 27;
1020 code[1] |= neg2 << 26;
1021
1022 emitForm_MAD(i);
1023
1024 if (i->flagsSrc >= 0) {
1025 // add with carry from $cX
1026 assert(!(code[1] & 0x0c000000) && !i->getPredicate());
1027 code[1] |= 0xc << 24;
1028 srcId(i->src(i->flagsSrc), 32 + 12);
1029 }
1030 }
1031
1032 void
1033 CodeEmitterNV50::emitISAD(const Instruction *i)
1034 {
1035 if (i->encSize == 8) {
1036 code[0] = 0x50000000;
1037 switch (i->sType) {
1038 case TYPE_U32: code[1] = 0x04000000; break;
1039 case TYPE_S32: code[1] = 0x0c000000; break;
1040 case TYPE_U16: code[1] = 0x00000000; break;
1041 case TYPE_S16: code[1] = 0x08000000; break;
1042 default:
1043 assert(0);
1044 break;
1045 }
1046 emitForm_MAD(i);
1047 } else {
1048 switch (i->sType) {
1049 case TYPE_U32: code[0] = 0x50008000; break;
1050 case TYPE_S32: code[0] = 0x50008100; break;
1051 case TYPE_U16: code[0] = 0x50000000; break;
1052 case TYPE_S16: code[0] = 0x50000100; break;
1053 default:
1054 assert(0);
1055 break;
1056 }
1057 emitForm_MUL(i);
1058 }
1059 }
1060
1061 void
1062 CodeEmitterNV50::emitSET(const Instruction *i)
1063 {
1064 code[0] = 0x30000000;
1065 code[1] = 0x60000000;
1066
1067 emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14);
1068
1069 switch (i->sType) {
1070 case TYPE_F32: code[0] |= 0x80000000; break;
1071 case TYPE_S32: code[1] |= 0x0c000000; break;
1072 case TYPE_U32: code[1] |= 0x04000000; break;
1073 case TYPE_S16: code[1] |= 0x08000000; break;
1074 case TYPE_U16: break;
1075 default:
1076 assert(0);
1077 break;
1078 }
1079 if (i->src(0).mod.neg()) code[1] |= 0x04000000;
1080 if (i->src(1).mod.neg()) code[1] |= 0x08000000;
1081 if (i->src(0).mod.abs()) code[1] |= 0x00100000;
1082 if (i->src(1).mod.abs()) code[1] |= 0x00080000;
1083
1084 emitForm_MAD(i);
1085 }
1086
1087 void
1088 CodeEmitterNV50::roundMode_CVT(RoundMode rnd)
1089 {
1090 switch (rnd) {
1091 case ROUND_NI: code[1] |= 0x08000000; break;
1092 case ROUND_M: code[1] |= 0x00020000; break;
1093 case ROUND_MI: code[1] |= 0x08020000; break;
1094 case ROUND_P: code[1] |= 0x00040000; break;
1095 case ROUND_PI: code[1] |= 0x08040000; break;
1096 case ROUND_Z: code[1] |= 0x00060000; break;
1097 case ROUND_ZI: code[1] |= 0x08060000; break;
1098 default:
1099 assert(rnd == ROUND_N);
1100 break;
1101 }
1102 }
1103
1104 void
1105 CodeEmitterNV50::emitCVT(const Instruction *i)
1106 {
1107 const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
1108 RoundMode rnd;
1109 DataType dType;
1110
1111 switch (i->op) {
1112 case OP_CEIL: rnd = f2f ? ROUND_PI : ROUND_P; break;
1113 case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;
1114 case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;
1115 default:
1116 rnd = i->rnd;
1117 break;
1118 }
1119
1120 if (i->op == OP_NEG && i->dType == TYPE_U32)
1121 dType = TYPE_S32;
1122 else
1123 dType = i->dType;
1124
1125 code[0] = 0xa0000000;
1126
1127 switch (dType) {
1128 case TYPE_F64:
1129 switch (i->sType) {
1130 case TYPE_F64: code[1] = 0xc4404000; break;
1131 case TYPE_S64: code[1] = 0x44414000; break;
1132 case TYPE_U64: code[1] = 0x44404000; break;
1133 case TYPE_F32: code[1] = 0xc4400000; break;
1134 case TYPE_S32: code[1] = 0x44410000; break;
1135 case TYPE_U32: code[1] = 0x44400000; break;
1136 default:
1137 assert(0);
1138 break;
1139 }
1140 break;
1141 case TYPE_S64:
1142 switch (i->sType) {
1143 case TYPE_F64: code[1] = 0x8c404000; break;
1144 case TYPE_F32: code[1] = 0x8c400000; break;
1145 default:
1146 assert(0);
1147 break;
1148 }
1149 break;
1150 case TYPE_U64:
1151 switch (i->sType) {
1152 case TYPE_F64: code[1] = 0x84404000; break;
1153 case TYPE_F32: code[1] = 0x84400000; break;
1154 default:
1155 assert(0);
1156 break;
1157 }
1158 break;
1159 case TYPE_F32:
1160 switch (i->sType) {
1161 case TYPE_F64: code[1] = 0xc0404000; break;
1162 case TYPE_S64: code[1] = 0x40414000; break;
1163 case TYPE_U64: code[1] = 0x40404000; break;
1164 case TYPE_F32: code[1] = 0xc4004000; break;
1165 case TYPE_S32: code[1] = 0x44014000; break;
1166 case TYPE_U32: code[1] = 0x44004000; break;
1167 case TYPE_F16: code[1] = 0xc4000000; break;
1168 default:
1169 assert(0);
1170 break;
1171 }
1172 break;
1173 case TYPE_S32:
1174 switch (i->sType) {
1175 case TYPE_F64: code[1] = 0x88404000; break;
1176 case TYPE_F32: code[1] = 0x8c004000; break;
1177 case TYPE_S32: code[1] = 0x0c014000; break;
1178 case TYPE_U32: code[1] = 0x0c004000; break;
1179 case TYPE_F16: code[1] = 0x8c000000; break;
1180 case TYPE_S16: code[1] = 0x0c010000; break;
1181 case TYPE_U16: code[1] = 0x0c000000; break;
1182 case TYPE_S8: code[1] = 0x0c018000; break;
1183 case TYPE_U8: code[1] = 0x0c008000; break;
1184 default:
1185 assert(0);
1186 break;
1187 }
1188 break;
1189 case TYPE_U32:
1190 switch (i->sType) {
1191 case TYPE_F64: code[1] = 0x80404000; break;
1192 case TYPE_F32: code[1] = 0x84004000; break;
1193 case TYPE_S32: code[1] = 0x04014000; break;
1194 case TYPE_U32: code[1] = 0x04004000; break;
1195 case TYPE_F16: code[1] = 0x84000000; break;
1196 case TYPE_S16: code[1] = 0x04010000; break;
1197 case TYPE_U16: code[1] = 0x04000000; break;
1198 case TYPE_S8: code[1] = 0x04018000; break;
1199 case TYPE_U8: code[1] = 0x04008000; break;
1200 default:
1201 assert(0);
1202 break;
1203 }
1204 break;
1205 case TYPE_S16:
1206 case TYPE_U16:
1207 case TYPE_S8:
1208 case TYPE_U8:
1209 default:
1210 assert(0);
1211 break;
1212 }
1213 if (typeSizeof(i->sType) == 1 && i->getSrc(0)->reg.size == 4)
1214 code[1] |= 0x00004000;
1215
1216 roundMode_CVT(rnd);
1217
1218 switch (i->op) {
1219 case OP_ABS: code[1] |= 1 << 20; break;
1220 case OP_SAT: code[1] |= 1 << 19; break;
1221 case OP_NEG: code[1] |= 1 << 29; break;
1222 default:
1223 break;
1224 }
1225 code[1] ^= i->src(0).mod.neg() << 29;
1226 code[1] |= i->src(0).mod.abs() << 20;
1227 if (i->saturate)
1228 code[1] |= 1 << 19;
1229
1230 assert(i->op != OP_ABS || !i->src(0).mod.neg());
1231
1232 emitForm_MAD(i);
1233 }
1234
1235 void
1236 CodeEmitterNV50::emitPreOp(const Instruction *i)
1237 {
1238 code[0] = 0xb0000000;
1239 code[1] = (i->op == OP_PREEX2) ? 0xc0004000 : 0xc0000000;
1240
1241 code[1] |= i->src(0).mod.abs() << 20;
1242 code[1] |= i->src(0).mod.neg() << 26;
1243
1244 emitForm_MAD(i);
1245 }
1246
1247 void
1248 CodeEmitterNV50::emitSFnOp(const Instruction *i, uint8_t subOp)
1249 {
1250 code[0] = 0x90000000;
1251
1252 if (i->encSize == 4) {
1253 assert(i->op == OP_RCP);
1254 code[0] |= i->src(0).mod.abs() << 15;
1255 code[0] |= i->src(0).mod.neg() << 22;
1256 emitForm_MUL(i);
1257 } else {
1258 code[1] = subOp << 29;
1259 code[1] |= i->src(0).mod.abs() << 20;
1260 code[1] |= i->src(0).mod.neg() << 26;
1261 emitForm_MAD(i);
1262 }
1263 }
1264
1265 void
1266 CodeEmitterNV50::emitNOT(const Instruction *i)
1267 {
1268 code[0] = 0xd0000000;
1269 code[1] = 0x0002c000;
1270
1271 switch (i->sType) {
1272 case TYPE_U32:
1273 case TYPE_S32:
1274 code[1] |= 0x04000000;
1275 break;
1276 default:
1277 break;
1278 }
1279 emitForm_MAD(i);
1280 setSrc(i, 0, 1);
1281 }
1282
1283 void
1284 CodeEmitterNV50::emitLogicOp(const Instruction *i)
1285 {
1286 code[0] = 0xd0000000;
1287 code[1] = 0;
1288
1289 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1290 switch (i->op) {
1291 case OP_OR: code[0] |= 0x0100; break;
1292 case OP_XOR: code[0] |= 0x8000; break;
1293 default:
1294 assert(i->op == OP_AND);
1295 break;
1296 }
1297 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
1298 code[0] |= 1 << 22;
1299
1300 emitForm_IMM(i);
1301 } else {
1302 switch (i->op) {
1303 case OP_AND: code[1] = 0x04000000; break;
1304 case OP_OR: code[1] = 0x04004000; break;
1305 case OP_XOR: code[1] = 0x04008000; break;
1306 default:
1307 assert(0);
1308 break;
1309 }
1310 if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
1311 code[1] |= 1 << 16;
1312 if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT))
1313 code[1] |= 1 << 17;
1314
1315 emitForm_MAD(i);
1316 }
1317 }
1318
1319 void
1320 CodeEmitterNV50::emitARL(const Instruction *i, unsigned int shl)
1321 {
1322 code[0] = 0x00000001 | (shl << 16);
1323 code[1] = 0xc0000000;
1324
1325 code[0] |= (DDATA(i->def(0)).id + 1) << 2;
1326
1327 setSrcFileBits(i, NV50_OP_ENC_IMM);
1328 setSrc(i, 0, 0);
1329 emitFlagsRd(i);
1330 }
1331
1332 void
1333 CodeEmitterNV50::emitShift(const Instruction *i)
1334 {
1335 if (i->def(0).getFile() == FILE_ADDRESS) {
1336 assert(i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE);
1337 emitARL(i, i->getSrc(1)->reg.data.u32 & 0x3f);
1338 } else {
1339 code[0] = 0x30000001;
1340 code[1] = (i->op == OP_SHR) ? 0xe4000000 : 0xc4000000;
1341 if (i->op == OP_SHR && isSignedType(i->sType))
1342 code[1] |= 1 << 27;
1343
1344 if (i->src(1).getFile() == FILE_IMMEDIATE) {
1345 code[1] |= 1 << 20;
1346 code[0] |= (i->getSrc(1)->reg.data.u32 & 0x7f) << 16;
1347 defId(i->def(0), 2);
1348 srcId(i->src(0), 9);
1349 emitFlagsRd(i);
1350 } else {
1351 emitForm_MAD(i);
1352 }
1353 }
1354 }
1355
1356 void
1357 CodeEmitterNV50::emitOUT(const Instruction *i)
1358 {
1359 code[0] = (i->op == OP_EMIT) ? 0xf0000200 : 0xf0000400;
1360 code[1] = 0xc0000001;
1361
1362 emitFlagsRd(i);
1363 }
1364
1365 void
1366 CodeEmitterNV50::emitTEX(const TexInstruction *i)
1367 {
1368 code[0] = 0xf0000001;
1369 code[1] = 0x00000000;
1370
1371 switch (i->op) {
1372 case OP_TXB:
1373 code[1] = 0x20000000;
1374 break;
1375 case OP_TXL:
1376 code[1] = 0x40000000;
1377 break;
1378 case OP_TXF:
1379 code[0] |= 0x01000000;
1380 break;
1381 case OP_TXG:
1382 code[0] = 0x01000000;
1383 code[1] = 0x80000000;
1384 break;
1385 default:
1386 assert(i->op == OP_TEX);
1387 break;
1388 }
1389
1390 code[0] |= i->tex.r << 9;
1391 code[0] |= i->tex.s << 17;
1392
1393 int argc = i->tex.target.getArgCount();
1394
1395 if (i->op == OP_TXB || i->op == OP_TXL || i->op == OP_TXF)
1396 argc += 1;
1397 if (i->tex.target.isShadow())
1398 argc += 1;
1399 assert(argc <= 4);
1400
1401 code[0] |= (argc - 1) << 22;
1402
1403 if (i->tex.target.isCube()) {
1404 code[0] |= 0x08000000;
1405 } else
1406 if (i->tex.useOffsets) {
1407 code[1] |= (i->tex.offset[0][0] & 0xf) << 24;
1408 code[1] |= (i->tex.offset[0][1] & 0xf) << 20;
1409 code[1] |= (i->tex.offset[0][2] & 0xf) << 16;
1410 }
1411
1412 code[0] |= (i->tex.mask & 0x3) << 25;
1413 code[1] |= (i->tex.mask & 0xc) << 12;
1414
1415 if (i->tex.liveOnly)
1416 code[1] |= 4;
1417
1418 defId(i->def(0), 2);
1419
1420 emitFlagsRd(i);
1421 }
1422
1423 void
1424 CodeEmitterNV50::emitTXQ(const TexInstruction *i)
1425 {
1426 assert(i->tex.query == TXQ_DIMS);
1427
1428 code[0] = 0xf0000001;
1429 code[1] = 0x60000000;
1430
1431 code[0] |= i->tex.r << 9;
1432 code[0] |= i->tex.s << 17;
1433
1434 code[0] |= (i->tex.mask & 0x3) << 25;
1435 code[1] |= (i->tex.mask & 0xc) << 12;
1436
1437 defId(i->def(0), 2);
1438
1439 emitFlagsRd(i);
1440 }
1441
1442 void
1443 CodeEmitterNV50::emitTEXPREP(const TexInstruction *i)
1444 {
1445 code[0] = 0xf8000001 | (3 << 22) | (i->tex.s << 17) | (i->tex.r << 9);
1446 code[1] = 0x60010000;
1447
1448 code[0] |= (i->tex.mask & 0x3) << 25;
1449 code[1] |= (i->tex.mask & 0xc) << 12;
1450 defId(i->def(0), 2);
1451
1452 emitFlagsRd(i);
1453 }
1454
1455 void
1456 CodeEmitterNV50::emitPRERETEmu(const FlowInstruction *i)
1457 {
1458 uint32_t pos = i->target.bb->binPos + 8; // +8 to skip an op */
1459
1460 code[0] = 0x10000003; // bra
1461 code[1] = 0x00000780; // always
1462
1463 switch (i->subOp) {
1464 case NV50_IR_SUBOP_EMU_PRERET + 0: // bra to the call
1465 break;
1466 case NV50_IR_SUBOP_EMU_PRERET + 1: // bra to skip the call
1467 pos += 8;
1468 break;
1469 default:
1470 assert(i->subOp == (NV50_IR_SUBOP_EMU_PRERET + 2));
1471 code[0] = 0x20000003; // call
1472 code[1] = 0x00000000; // no predicate
1473 break;
1474 }
1475 addReloc(RelocEntry::TYPE_CODE, 0, pos, 0x07fff800, 9);
1476 addReloc(RelocEntry::TYPE_CODE, 1, pos, 0x000fc000, -4);
1477 }
1478
1479 void
1480 CodeEmitterNV50::emitFlow(const Instruction *i, uint8_t flowOp)
1481 {
1482 const FlowInstruction *f = i->asFlow();
1483 bool hasPred = false;
1484 bool hasTarg = false;
1485
1486 code[0] = 0x00000003 | (flowOp << 28);
1487 code[1] = 0x00000000;
1488
1489 switch (i->op) {
1490 case OP_BRA:
1491 hasPred = true;
1492 hasTarg = true;
1493 break;
1494 case OP_BREAK:
1495 case OP_BRKPT:
1496 case OP_DISCARD:
1497 case OP_RET:
1498 hasPred = true;
1499 break;
1500 case OP_CALL:
1501 case OP_PREBREAK:
1502 case OP_JOINAT:
1503 hasTarg = true;
1504 break;
1505 case OP_PRERET:
1506 hasTarg = true;
1507 if (i->subOp >= NV50_IR_SUBOP_EMU_PRERET) {
1508 emitPRERETEmu(f);
1509 return;
1510 }
1511 break;
1512 default:
1513 break;
1514 }
1515
1516 if (hasPred)
1517 emitFlagsRd(i);
1518
1519 if (hasTarg && f) {
1520 uint32_t pos;
1521
1522 if (f->op == OP_CALL) {
1523 if (f->builtin) {
1524 pos = targNV50->getBuiltinOffset(f->target.builtin);
1525 } else {
1526 pos = f->target.fn->binPos;
1527 }
1528 } else {
1529 pos = f->target.bb->binPos;
1530 }
1531
1532 code[0] |= ((pos >> 2) & 0xffff) << 11;
1533 code[1] |= ((pos >> 18) & 0x003f) << 14;
1534
1535 RelocEntry::Type relocTy;
1536
1537 relocTy = f->builtin ? RelocEntry::TYPE_BUILTIN : RelocEntry::TYPE_CODE;
1538
1539 addReloc(relocTy, 0, pos, 0x07fff800, 9);
1540 addReloc(relocTy, 1, pos, 0x000fc000, -4);
1541 }
1542 }
1543
1544 void
1545 CodeEmitterNV50::emitBAR(const Instruction *i)
1546 {
1547 ImmediateValue *barId = i->getSrc(0)->asImm();
1548 assert(barId);
1549
1550 code[0] = 0x82000003 | (barId->reg.data.u32 << 21);
1551 code[1] = 0x00004000;
1552
1553 if (i->subOp == NV50_IR_SUBOP_BAR_SYNC)
1554 code[0] |= 1 << 26;
1555 }
1556
1557 void
1558 CodeEmitterNV50::emitATOM(const Instruction *i)
1559 {
1560 uint8_t subOp;
1561 switch (i->subOp) {
1562 case NV50_IR_SUBOP_ATOM_ADD: subOp = 0x0; break;
1563 case NV50_IR_SUBOP_ATOM_MIN: subOp = 0x7; break;
1564 case NV50_IR_SUBOP_ATOM_MAX: subOp = 0x6; break;
1565 case NV50_IR_SUBOP_ATOM_INC: subOp = 0x4; break;
1566 case NV50_IR_SUBOP_ATOM_DEC: subOp = 0x5; break;
1567 case NV50_IR_SUBOP_ATOM_AND: subOp = 0xa; break;
1568 case NV50_IR_SUBOP_ATOM_OR: subOp = 0xb; break;
1569 case NV50_IR_SUBOP_ATOM_XOR: subOp = 0xc; break;
1570 case NV50_IR_SUBOP_ATOM_CAS: subOp = 0x2; break;
1571 case NV50_IR_SUBOP_ATOM_EXCH: subOp = 0x1; break;
1572 default:
1573 assert(!"invalid subop");
1574 return;
1575 }
1576 code[0] = 0xd0000001;
1577 code[1] = 0xe0c00000 | (subOp << 2);
1578 if (isSignedType(i->dType))
1579 code[1] |= 1 << 21;
1580
1581 // args
1582 emitFlagsRd(i);
1583 setDst(i, 0);
1584 setSrc(i, 1, 1);
1585 if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
1586 setSrc(i, 2, 2);
1587
1588 // g[] pointer
1589 code[0] |= i->getSrc(0)->reg.fileIndex << 23;
1590 srcId(i->getIndirect(0, 0), 9);
1591 }
1592
1593 bool
1594 CodeEmitterNV50::emitInstruction(Instruction *insn)
1595 {
1596 if (!insn->encSize) {
1597 ERROR("skipping unencodable instruction: "); insn->print();
1598 return false;
1599 } else
1600 if (codeSize + insn->encSize > codeSizeLimit) {
1601 ERROR("code emitter output buffer too small\n");
1602 return false;
1603 }
1604
1605 if (insn->bb->getProgram()->dbgFlags & NV50_IR_DEBUG_BASIC) {
1606 INFO("EMIT: "); insn->print();
1607 }
1608
1609 switch (insn->op) {
1610 case OP_MOV:
1611 emitMOV(insn);
1612 break;
1613 case OP_EXIT:
1614 case OP_NOP:
1615 case OP_JOIN:
1616 emitNOP();
1617 break;
1618 case OP_VFETCH:
1619 case OP_LOAD:
1620 emitLOAD(insn);
1621 break;
1622 case OP_EXPORT:
1623 case OP_STORE:
1624 emitSTORE(insn);
1625 break;
1626 case OP_PFETCH:
1627 emitPFETCH(insn);
1628 break;
1629 case OP_LINTERP:
1630 case OP_PINTERP:
1631 emitINTERP(insn);
1632 break;
1633 case OP_ADD:
1634 case OP_SUB:
1635 if (isFloatType(insn->dType))
1636 emitFADD(insn);
1637 else if (insn->getDef(0)->reg.file == FILE_ADDRESS)
1638 emitAADD(insn);
1639 else
1640 emitUADD(insn);
1641 break;
1642 case OP_MUL:
1643 if (isFloatType(insn->dType))
1644 emitFMUL(insn);
1645 else
1646 emitIMUL(insn);
1647 break;
1648 case OP_MAD:
1649 case OP_FMA:
1650 if (isFloatType(insn->dType))
1651 emitFMAD(insn);
1652 else
1653 emitIMAD(insn);
1654 break;
1655 case OP_SAD:
1656 emitISAD(insn);
1657 break;
1658 case OP_NOT:
1659 emitNOT(insn);
1660 break;
1661 case OP_AND:
1662 case OP_OR:
1663 case OP_XOR:
1664 emitLogicOp(insn);
1665 break;
1666 case OP_SHL:
1667 case OP_SHR:
1668 emitShift(insn);
1669 break;
1670 case OP_SET:
1671 emitSET(insn);
1672 break;
1673 case OP_MIN:
1674 case OP_MAX:
1675 emitMINMAX(insn);
1676 break;
1677 case OP_CEIL:
1678 case OP_FLOOR:
1679 case OP_TRUNC:
1680 case OP_ABS:
1681 case OP_NEG:
1682 case OP_SAT:
1683 emitCVT(insn);
1684 break;
1685 case OP_CVT:
1686 if (insn->def(0).getFile() == FILE_ADDRESS)
1687 emitARL(insn, 0);
1688 else
1689 if (insn->def(0).getFile() == FILE_FLAGS ||
1690 insn->src(0).getFile() == FILE_FLAGS ||
1691 insn->src(0).getFile() == FILE_ADDRESS)
1692 emitMOV(insn);
1693 else
1694 emitCVT(insn);
1695 break;
1696 case OP_RCP:
1697 emitSFnOp(insn, 0);
1698 break;
1699 case OP_RSQ:
1700 emitSFnOp(insn, 2);
1701 break;
1702 case OP_LG2:
1703 emitSFnOp(insn, 3);
1704 break;
1705 case OP_SIN:
1706 emitSFnOp(insn, 4);
1707 break;
1708 case OP_COS:
1709 emitSFnOp(insn, 5);
1710 break;
1711 case OP_EX2:
1712 emitSFnOp(insn, 6);
1713 break;
1714 case OP_PRESIN:
1715 case OP_PREEX2:
1716 emitPreOp(insn);
1717 break;
1718 case OP_TEX:
1719 case OP_TXB:
1720 case OP_TXL:
1721 case OP_TXF:
1722 emitTEX(insn->asTex());
1723 break;
1724 case OP_TXQ:
1725 emitTXQ(insn->asTex());
1726 break;
1727 case OP_TEXPREP:
1728 emitTEXPREP(insn->asTex());
1729 break;
1730 case OP_EMIT:
1731 case OP_RESTART:
1732 emitOUT(insn);
1733 break;
1734 case OP_DISCARD:
1735 emitFlow(insn, 0x0);
1736 break;
1737 case OP_BRA:
1738 emitFlow(insn, 0x1);
1739 break;
1740 case OP_CALL:
1741 emitFlow(insn, 0x2);
1742 break;
1743 case OP_RET:
1744 emitFlow(insn, 0x3);
1745 break;
1746 case OP_PREBREAK:
1747 emitFlow(insn, 0x4);
1748 break;
1749 case OP_BREAK:
1750 emitFlow(insn, 0x5);
1751 break;
1752 case OP_QUADON:
1753 emitFlow(insn, 0x6);
1754 break;
1755 case OP_QUADPOP:
1756 emitFlow(insn, 0x7);
1757 break;
1758 case OP_JOINAT:
1759 emitFlow(insn, 0xa);
1760 break;
1761 case OP_PRERET:
1762 emitFlow(insn, 0xd);
1763 break;
1764 case OP_QUADOP:
1765 emitQUADOP(insn, insn->lanes, insn->subOp);
1766 break;
1767 case OP_DFDX:
1768 emitQUADOP(insn, 4, insn->src(0).mod.neg() ? 0x66 : 0x99);
1769 break;
1770 case OP_DFDY:
1771 emitQUADOP(insn, 5, insn->src(0).mod.neg() ? 0x5a : 0xa5);
1772 break;
1773 case OP_ATOM:
1774 emitATOM(insn);
1775 break;
1776 case OP_BAR:
1777 emitBAR(insn);
1778 break;
1779 case OP_PHI:
1780 case OP_UNION:
1781 case OP_CONSTRAINT:
1782 ERROR("operation should have been eliminated\n");
1783 return false;
1784 case OP_EXP:
1785 case OP_LOG:
1786 case OP_SQRT:
1787 case OP_POW:
1788 case OP_SELP:
1789 case OP_SLCT:
1790 case OP_TXD:
1791 case OP_PRECONT:
1792 case OP_CONT:
1793 case OP_POPCNT:
1794 case OP_INSBF:
1795 case OP_EXTBF:
1796 ERROR("operation should have been lowered\n");
1797 return false;
1798 default:
1799 ERROR("unknown op: %u\n", insn->op);
1800 return false;
1801 }
1802 if (insn->join || insn->op == OP_JOIN)
1803 code[1] |= 0x2;
1804 else
1805 if (insn->exit || insn->op == OP_EXIT)
1806 code[1] |= 0x1;
1807
1808 assert((insn->encSize == 8) == (code[0] & 1));
1809
1810 code += insn->encSize / 4;
1811 codeSize += insn->encSize;
1812 return true;
1813 }
1814
1815 uint32_t
1816 CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const
1817 {
1818 const Target::OpInfo &info = targ->getOpInfo(i);
1819
1820 if (info.minEncSize > 4)
1821 return 8;
1822
1823 // check constraints on dst and src operands
1824 for (int d = 0; i->defExists(d); ++d) {
1825 if (i->def(d).rep()->reg.data.id > 63 ||
1826 i->def(d).rep()->reg.file != FILE_GPR)
1827 return 8;
1828 }
1829
1830 for (int s = 0; i->srcExists(s); ++s) {
1831 DataFile sf = i->src(s).getFile();
1832 if (sf != FILE_GPR)
1833 if (sf != FILE_SHADER_INPUT || progType != Program::TYPE_FRAGMENT)
1834 return 8;
1835 if (i->src(s).rep()->reg.data.id > 63)
1836 return 8;
1837 }
1838
1839 // check modifiers & rounding
1840 if (i->join || i->lanes != 0xf || i->exit)
1841 return 8;
1842 if (i->op == OP_MUL && i->rnd != ROUND_N)
1843 return 8;
1844
1845 if (i->asTex())
1846 return 8; // TODO: short tex encoding
1847
1848 // check constraints on short MAD
1849 if (info.srcNr >= 2 && i->srcExists(2)) {
1850 if (i->saturate || i->src(2).mod)
1851 return 8;
1852 if ((i->src(0).mod ^ i->src(1).mod) ||
1853 (i->src(0).mod | i->src(1).mod).abs())
1854 return 8;
1855 if (!i->defExists(0) ||
1856 i->def(0).rep()->reg.data.id != i->src(2).rep()->reg.data.id)
1857 return 8;
1858 }
1859
1860 return info.minEncSize;
1861 }
1862
1863 // Change the encoding size of an instruction after BBs have been scheduled.
1864 static void
1865 makeInstructionLong(Instruction *insn)
1866 {
1867 if (insn->encSize == 8)
1868 return;
1869 Function *fn = insn->bb->getFunction();
1870 int n = 0;
1871 int adj = 4;
1872
1873 for (Instruction *i = insn->next; i && i->encSize == 4; ++n, i = i->next);
1874
1875 if (n & 1) {
1876 adj = 8;
1877 insn->next->encSize = 8;
1878 } else
1879 if (insn->prev && insn->prev->encSize == 4) {
1880 adj = 8;
1881 insn->prev->encSize = 8;
1882 }
1883 insn->encSize = 8;
1884
1885 for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) {
1886 fn->bbArray[i]->binPos += 4;
1887 }
1888 fn->binSize += adj;
1889 insn->bb->binSize += adj;
1890 }
1891
1892 static bool
1893 trySetExitModifier(Instruction *insn)
1894 {
1895 if (insn->op == OP_DISCARD ||
1896 insn->op == OP_QUADON ||
1897 insn->op == OP_QUADPOP)
1898 return false;
1899 for (int s = 0; insn->srcExists(s); ++s)
1900 if (insn->src(s).getFile() == FILE_IMMEDIATE)
1901 return false;
1902 if (insn->asFlow()) {
1903 if (insn->op == OP_CALL) // side effects !
1904 return false;
1905 if (insn->getPredicate()) // cannot do conditional exit (or can we ?)
1906 return false;
1907 insn->op = OP_EXIT;
1908 }
1909 insn->exit = 1;
1910 makeInstructionLong(insn);
1911 return true;
1912 }
1913
1914 static void
1915 replaceExitWithModifier(Function *func)
1916 {
1917 BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
1918
1919 if (!epilogue->getExit() ||
1920 epilogue->getExit()->op != OP_EXIT) // only main will use OP_EXIT
1921 return;
1922
1923 if (epilogue->getEntry()->op != OP_EXIT) {
1924 Instruction *insn = epilogue->getExit()->prev;
1925 if (!insn || !trySetExitModifier(insn))
1926 return;
1927 insn->exit = 1;
1928 } else {
1929 for (Graph::EdgeIterator ei = func->cfgExit->incident();
1930 !ei.end(); ei.next()) {
1931 BasicBlock *bb = BasicBlock::get(ei.getNode());
1932 Instruction *i = bb->getExit();
1933
1934 if (!i || !trySetExitModifier(i))
1935 return;
1936 }
1937 }
1938 epilogue->binSize -= 8;
1939 func->binSize -= 8;
1940 delete_Instruction(func->getProgram(), epilogue->getExit());
1941 }
1942
1943 void
1944 CodeEmitterNV50::prepareEmission(Function *func)
1945 {
1946 CodeEmitter::prepareEmission(func);
1947
1948 replaceExitWithModifier(func);
1949 }
1950
1951 CodeEmitterNV50::CodeEmitterNV50(const TargetNV50 *target) :
1952 CodeEmitter(target), targNV50(target)
1953 {
1954 targ = target; // specialized
1955 code = NULL;
1956 codeSize = codeSizeLimit = 0;
1957 relocInfo = NULL;
1958 }
1959
1960 CodeEmitter *
1961 TargetNV50::getCodeEmitter(Program::Type type)
1962 {
1963 CodeEmitterNV50 *emit = new CodeEmitterNV50(this);
1964 emit->setProgramType(type);
1965 return emit;
1966 }
1967
1968 } // namespace nv50_ir