nvc0/ir: add support for emitting partial min/max ops for int64
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_emit_gm107.cpp
1 /*
2 * Copyright 2014 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Ben Skeggs <bskeggs@redhat.com>
23 */
24
25 #include "codegen/nv50_ir_target_gm107.h"
26
27 //#define GM107_DEBUG_SCHED_DATA
28
29 namespace nv50_ir {
30
31 class CodeEmitterGM107 : public CodeEmitter
32 {
33 public:
34 CodeEmitterGM107(const TargetGM107 *);
35
36 virtual bool emitInstruction(Instruction *);
37 virtual uint32_t getMinEncodingSize(const Instruction *) const;
38
39 virtual void prepareEmission(Program *);
40 virtual void prepareEmission(Function *);
41
42 inline void setProgramType(Program::Type pType) { progType = pType; }
43
44 private:
45 const TargetGM107 *targGM107;
46
47 Program::Type progType;
48
49 const Instruction *insn;
50 const bool writeIssueDelays;
51 uint32_t *data;
52
53 private:
54 inline void emitField(uint32_t *, int, int, uint32_t);
55 inline void emitField(int b, int s, uint32_t v) { emitField(code, b, s, v); }
56
57 inline void emitInsn(uint32_t, bool);
58 inline void emitInsn(uint32_t o) { emitInsn(o, true); }
59 inline void emitPred();
60 inline void emitGPR(int, const Value *);
61 inline void emitGPR(int pos) {
62 emitGPR(pos, (const Value *)NULL);
63 }
64 inline void emitGPR(int pos, const ValueRef &ref) {
65 emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL);
66 }
67 inline void emitGPR(int pos, const ValueRef *ref) {
68 emitGPR(pos, ref ? ref->rep() : (const Value *)NULL);
69 }
70 inline void emitGPR(int pos, const ValueDef &def) {
71 emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL);
72 }
73 inline void emitSYS(int, const Value *);
74 inline void emitSYS(int pos, const ValueRef &ref) {
75 emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL);
76 }
77 inline void emitPRED(int, const Value *);
78 inline void emitPRED(int pos) {
79 emitPRED(pos, (const Value *)NULL);
80 }
81 inline void emitPRED(int pos, const ValueRef &ref) {
82 emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL);
83 }
84 inline void emitPRED(int pos, const ValueDef &def) {
85 emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL);
86 }
87 inline void emitADDR(int, int, int, int, const ValueRef &);
88 inline void emitCBUF(int, int, int, int, int, const ValueRef &);
89 inline bool longIMMD(const ValueRef &);
90 inline void emitIMMD(int, int, const ValueRef &);
91
92 void emitCond3(int, CondCode);
93 void emitCond4(int, CondCode);
94 void emitCond5(int pos, CondCode cc) { emitCond4(pos, cc); }
95 inline void emitO(int);
96 inline void emitP(int);
97 inline void emitSAT(int);
98 inline void emitCC(int);
99 inline void emitX(int);
100 inline void emitABS(int, const ValueRef &);
101 inline void emitNEG(int, const ValueRef &);
102 inline void emitNEG2(int, const ValueRef &, const ValueRef &);
103 inline void emitFMZ(int, int);
104 inline void emitRND(int, RoundMode, int);
105 inline void emitRND(int pos) {
106 emitRND(pos, insn->rnd, -1);
107 }
108 inline void emitPDIV(int);
109 inline void emitINV(int, const ValueRef &);
110
111 void emitEXIT();
112 void emitBRA();
113 void emitCAL();
114 void emitPCNT();
115 void emitCONT();
116 void emitPBK();
117 void emitBRK();
118 void emitPRET();
119 void emitRET();
120 void emitSSY();
121 void emitSYNC();
122 void emitSAM();
123 void emitRAM();
124
125 void emitMOV();
126 void emitS2R();
127 void emitF2F();
128 void emitF2I();
129 void emitI2F();
130 void emitI2I();
131 void emitSEL();
132 void emitSHFL();
133
134 void emitDADD();
135 void emitDMUL();
136 void emitDFMA();
137 void emitDMNMX();
138 void emitDSET();
139 void emitDSETP();
140
141 void emitFADD();
142 void emitFMUL();
143 void emitFFMA();
144 void emitMUFU();
145 void emitFMNMX();
146 void emitRRO();
147 void emitFCMP();
148 void emitFSET();
149 void emitFSETP();
150 void emitFSWZADD();
151
152 void emitLOP();
153 void emitNOT();
154 void emitIADD();
155 void emitIMUL();
156 void emitIMAD();
157 void emitISCADD();
158 void emitIMNMX();
159 void emitICMP();
160 void emitISET();
161 void emitISETP();
162 void emitSHL();
163 void emitSHR();
164 void emitPOPC();
165 void emitBFI();
166 void emitBFE();
167 void emitFLO();
168
169 void emitLDSTs(int, DataType);
170 void emitLDSTc(int);
171 void emitLDC();
172 void emitLDL();
173 void emitLDS();
174 void emitLD();
175 void emitSTL();
176 void emitSTS();
177 void emitST();
178 void emitALD();
179 void emitAST();
180 void emitISBERD();
181 void emitAL2P();
182 void emitIPA();
183 void emitATOM();
184 void emitATOMS();
185 void emitRED();
186 void emitCCTL();
187
188 void emitPIXLD();
189
190 void emitTEXs(int);
191 void emitTEX();
192 void emitTLD();
193 void emitTLD4();
194 void emitTXD();
195 void emitTXQ();
196 void emitTMML();
197 void emitDEPBAR();
198
199 void emitNOP();
200 void emitKIL();
201 void emitOUT();
202
203 void emitBAR();
204 void emitMEMBAR();
205
206 void emitVOTE();
207
208 void emitSUTarget();
209 void emitSUHandle(const int s);
210 void emitSUSTx();
211 void emitSULDx();
212 void emitSUREDx();
213 };
214
215 /*******************************************************************************
216 * general instruction layout/fields
217 ******************************************************************************/
218
219 void
220 CodeEmitterGM107::emitField(uint32_t *data, int b, int s, uint32_t v)
221 {
222 if (b >= 0) {
223 uint32_t m = ((1ULL << s) - 1);
224 uint64_t d = (uint64_t)(v & m) << b;
225 assert(!(v & ~m) || (v & ~m) == ~m);
226 data[1] |= d >> 32;
227 data[0] |= d;
228 }
229 }
230
231 void
232 CodeEmitterGM107::emitPred()
233 {
234 if (insn->predSrc >= 0) {
235 emitField(16, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id);
236 emitField(19, 1, insn->cc == CC_NOT_P);
237 } else {
238 emitField(16, 3, 7);
239 }
240 }
241
242 void
243 CodeEmitterGM107::emitInsn(uint32_t hi, bool pred)
244 {
245 code[0] = 0x00000000;
246 code[1] = hi;
247 if (pred)
248 emitPred();
249 }
250
251 void
252 CodeEmitterGM107::emitGPR(int pos, const Value *val)
253 {
254 emitField(pos, 8, val ? val->reg.data.id : 255);
255 }
256
257 void
258 CodeEmitterGM107::emitSYS(int pos, const Value *val)
259 {
260 int id = val ? val->reg.data.id : -1;
261
262 switch (id) {
263 case SV_LANEID : id = 0x00; break;
264 case SV_VERTEX_COUNT : id = 0x10; break;
265 case SV_INVOCATION_ID : id = 0x11; break;
266 case SV_THREAD_KILL : id = 0x13; break;
267 case SV_INVOCATION_INFO: id = 0x1d; break;
268 case SV_TID : id = 0x21 + val->reg.data.sv.index; break;
269 case SV_CTAID : id = 0x25 + val->reg.data.sv.index; break;
270 default:
271 assert(!"invalid system value");
272 id = 0;
273 break;
274 }
275
276 emitField(pos, 8, id);
277 }
278
279 void
280 CodeEmitterGM107::emitPRED(int pos, const Value *val)
281 {
282 emitField(pos, 3, val ? val->reg.data.id : 7);
283 }
284
285 void
286 CodeEmitterGM107::emitADDR(int gpr, int off, int len, int shr,
287 const ValueRef &ref)
288 {
289 const Value *v = ref.get();
290 assert(!(v->reg.data.offset & ((1 << shr) - 1)));
291 if (gpr >= 0)
292 emitGPR(gpr, ref.getIndirect(0));
293 emitField(off, len, v->reg.data.offset >> shr);
294 }
295
296 void
297 CodeEmitterGM107::emitCBUF(int buf, int gpr, int off, int len, int shr,
298 const ValueRef &ref)
299 {
300 const Value *v = ref.get();
301 const Symbol *s = v->asSym();
302
303 assert(!(s->reg.data.offset & ((1 << shr) - 1)));
304
305 emitField(buf, 5, v->reg.fileIndex);
306 if (gpr >= 0)
307 emitGPR(gpr, ref.getIndirect(0));
308 emitField(off, 16, s->reg.data.offset >> shr);
309 }
310
311 bool
312 CodeEmitterGM107::longIMMD(const ValueRef &ref)
313 {
314 if (ref.getFile() == FILE_IMMEDIATE) {
315 const ImmediateValue *imm = ref.get()->asImm();
316 if (isFloatType(insn->sType)) {
317 if ((imm->reg.data.u32 & 0x00000fff) != 0x00000000)
318 return true;
319 } else {
320 if ((imm->reg.data.u32 & 0xfff00000) != 0x00000000 &&
321 (imm->reg.data.u32 & 0xfff00000) != 0xfff00000)
322 return true;
323 }
324 }
325 return false;
326 }
327
328 void
329 CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef &ref)
330 {
331 const ImmediateValue *imm = ref.get()->asImm();
332 uint32_t val = imm->reg.data.u32;
333
334 if (len == 19) {
335 if (insn->sType == TYPE_F32 || insn->sType == TYPE_F16) {
336 assert(!(val & 0x00000fff));
337 val >>= 12;
338 } else if (insn->sType == TYPE_F64) {
339 assert(!(imm->reg.data.u64 & 0x00000fffffffffffULL));
340 val = imm->reg.data.u64 >> 44;
341 }
342 assert(!(val & 0xfff00000) || (val & 0xfff00000) == 0xfff00000);
343 emitField( 56, 1, (val & 0x80000) >> 19);
344 emitField(pos, len, (val & 0x7ffff));
345 } else {
346 emitField(pos, len, val);
347 }
348 }
349
350 /*******************************************************************************
351 * modifiers
352 ******************************************************************************/
353
354 void
355 CodeEmitterGM107::emitCond3(int pos, CondCode code)
356 {
357 int data = 0;
358
359 switch (code) {
360 case CC_FL : data = 0x00; break;
361 case CC_LTU:
362 case CC_LT : data = 0x01; break;
363 case CC_EQU:
364 case CC_EQ : data = 0x02; break;
365 case CC_LEU:
366 case CC_LE : data = 0x03; break;
367 case CC_GTU:
368 case CC_GT : data = 0x04; break;
369 case CC_NEU:
370 case CC_NE : data = 0x05; break;
371 case CC_GEU:
372 case CC_GE : data = 0x06; break;
373 case CC_TR : data = 0x07; break;
374 default:
375 assert(!"invalid cond3");
376 break;
377 }
378
379 emitField(pos, 3, data);
380 }
381
382 void
383 CodeEmitterGM107::emitCond4(int pos, CondCode code)
384 {
385 int data = 0;
386
387 switch (code) {
388 case CC_FL: data = 0x00; break;
389 case CC_LT: data = 0x01; break;
390 case CC_EQ: data = 0x02; break;
391 case CC_LE: data = 0x03; break;
392 case CC_GT: data = 0x04; break;
393 case CC_NE: data = 0x05; break;
394 case CC_GE: data = 0x06; break;
395 // case CC_NUM: data = 0x07; break;
396 // case CC_NAN: data = 0x08; break;
397 case CC_LTU: data = 0x09; break;
398 case CC_EQU: data = 0x0a; break;
399 case CC_LEU: data = 0x0b; break;
400 case CC_GTU: data = 0x0c; break;
401 case CC_NEU: data = 0x0d; break;
402 case CC_GEU: data = 0x0e; break;
403 case CC_TR: data = 0x0f; break;
404 default:
405 assert(!"invalid cond4");
406 break;
407 }
408
409 emitField(pos, 4, data);
410 }
411
412 void
413 CodeEmitterGM107::emitO(int pos)
414 {
415 emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT);
416 }
417
418 void
419 CodeEmitterGM107::emitP(int pos)
420 {
421 emitField(pos, 1, insn->perPatch);
422 }
423
424 void
425 CodeEmitterGM107::emitSAT(int pos)
426 {
427 emitField(pos, 1, insn->saturate);
428 }
429
430 void
431 CodeEmitterGM107::emitCC(int pos)
432 {
433 emitField(pos, 1, insn->flagsDef >= 0);
434 }
435
436 void
437 CodeEmitterGM107::emitX(int pos)
438 {
439 emitField(pos, 1, insn->flagsSrc >= 0);
440 }
441
442 void
443 CodeEmitterGM107::emitABS(int pos, const ValueRef &ref)
444 {
445 emitField(pos, 1, ref.mod.abs());
446 }
447
448 void
449 CodeEmitterGM107::emitNEG(int pos, const ValueRef &ref)
450 {
451 emitField(pos, 1, ref.mod.neg());
452 }
453
454 void
455 CodeEmitterGM107::emitNEG2(int pos, const ValueRef &a, const ValueRef &b)
456 {
457 emitField(pos, 1, a.mod.neg() ^ b.mod.neg());
458 }
459
460 void
461 CodeEmitterGM107::emitFMZ(int pos, int len)
462 {
463 emitField(pos, len, insn->dnz << 1 | insn->ftz);
464 }
465
466 void
467 CodeEmitterGM107::emitRND(int rmp, RoundMode rnd, int rip)
468 {
469 int rm = 0, ri = 0;
470 switch (rnd) {
471 case ROUND_NI: ri = 1;
472 case ROUND_N : rm = 0; break;
473 case ROUND_MI: ri = 1;
474 case ROUND_M : rm = 1; break;
475 case ROUND_PI: ri = 1;
476 case ROUND_P : rm = 2; break;
477 case ROUND_ZI: ri = 1;
478 case ROUND_Z : rm = 3; break;
479 default:
480 assert(!"invalid round mode");
481 break;
482 }
483 emitField(rip, 1, ri);
484 emitField(rmp, 2, rm);
485 }
486
487 void
488 CodeEmitterGM107::emitPDIV(int pos)
489 {
490 assert(insn->postFactor >= -3 && insn->postFactor <= 3);
491 if (insn->postFactor > 0)
492 emitField(pos, 3, 7 - insn->postFactor);
493 else
494 emitField(pos, 3, 0 - insn->postFactor);
495 }
496
497 void
498 CodeEmitterGM107::emitINV(int pos, const ValueRef &ref)
499 {
500 emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT)));
501 }
502
503 /*******************************************************************************
504 * control flow
505 ******************************************************************************/
506
507 void
508 CodeEmitterGM107::emitEXIT()
509 {
510 emitInsn (0xe3000000);
511 emitCond5(0x00, CC_TR);
512 }
513
514 void
515 CodeEmitterGM107::emitBRA()
516 {
517 const FlowInstruction *insn = this->insn->asFlow();
518 int gpr = -1;
519
520 if (insn->indirect) {
521 if (insn->absolute)
522 emitInsn(0xe2000000); // JMX
523 else
524 emitInsn(0xe2500000); // BRX
525 gpr = 0x08;
526 } else {
527 if (insn->absolute)
528 emitInsn(0xe2100000); // JMP
529 else
530 emitInsn(0xe2400000); // BRA
531 emitField(0x07, 1, insn->allWarp);
532 }
533
534 emitField(0x06, 1, insn->limit);
535 emitCond5(0x00, CC_TR);
536
537 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
538 int32_t pos = insn->target.bb->binPos;
539 if (writeIssueDelays && !(pos & 0x1f))
540 pos += 8;
541 if (!insn->absolute)
542 emitField(0x14, 24, pos - (codeSize + 8));
543 else
544 emitField(0x14, 32, pos);
545 } else {
546 emitCBUF (0x24, gpr, 20, 16, 0, insn->src(0));
547 emitField(0x05, 1, 1);
548 }
549 }
550
551 void
552 CodeEmitterGM107::emitCAL()
553 {
554 const FlowInstruction *insn = this->insn->asFlow();
555
556 if (insn->absolute) {
557 emitInsn(0xe2200000, 0); // JCAL
558 } else {
559 emitInsn(0xe2600000, 0); // CAL
560 }
561
562 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
563 if (!insn->absolute)
564 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
565 else {
566 if (insn->builtin) {
567 int pcAbs = targGM107->getBuiltinOffset(insn->target.builtin);
568 addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfff00000, 20);
569 addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x000fffff, -12);
570 } else {
571 emitField(0x14, 32, insn->target.bb->binPos);
572 }
573 }
574 } else {
575 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
576 emitField(0x05, 1, 1);
577 }
578 }
579
580 void
581 CodeEmitterGM107::emitPCNT()
582 {
583 const FlowInstruction *insn = this->insn->asFlow();
584
585 emitInsn(0xe2b00000, 0);
586
587 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
588 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
589 } else {
590 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
591 emitField(0x05, 1, 1);
592 }
593 }
594
595 void
596 CodeEmitterGM107::emitCONT()
597 {
598 emitInsn (0xe3500000);
599 emitCond5(0x00, CC_TR);
600 }
601
602 void
603 CodeEmitterGM107::emitPBK()
604 {
605 const FlowInstruction *insn = this->insn->asFlow();
606
607 emitInsn(0xe2a00000, 0);
608
609 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
610 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
611 } else {
612 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
613 emitField(0x05, 1, 1);
614 }
615 }
616
617 void
618 CodeEmitterGM107::emitBRK()
619 {
620 emitInsn (0xe3400000);
621 emitCond5(0x00, CC_TR);
622 }
623
624 void
625 CodeEmitterGM107::emitPRET()
626 {
627 const FlowInstruction *insn = this->insn->asFlow();
628
629 emitInsn(0xe2700000, 0);
630
631 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
632 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
633 } else {
634 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
635 emitField(0x05, 1, 1);
636 }
637 }
638
639 void
640 CodeEmitterGM107::emitRET()
641 {
642 emitInsn (0xe3200000);
643 emitCond5(0x00, CC_TR);
644 }
645
646 void
647 CodeEmitterGM107::emitSSY()
648 {
649 const FlowInstruction *insn = this->insn->asFlow();
650
651 emitInsn(0xe2900000, 0);
652
653 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
654 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
655 } else {
656 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
657 emitField(0x05, 1, 1);
658 }
659 }
660
661 void
662 CodeEmitterGM107::emitSYNC()
663 {
664 emitInsn (0xf0f80000);
665 emitCond5(0x00, CC_TR);
666 }
667
668 void
669 CodeEmitterGM107::emitSAM()
670 {
671 emitInsn(0xe3700000, 0);
672 }
673
674 void
675 CodeEmitterGM107::emitRAM()
676 {
677 emitInsn(0xe3800000, 0);
678 }
679
680 /*******************************************************************************
681 * predicate/cc
682 ******************************************************************************/
683
684 /*******************************************************************************
685 * movement / conversion
686 ******************************************************************************/
687
688 void
689 CodeEmitterGM107::emitMOV()
690 {
691 if (insn->src(0).getFile() != FILE_IMMEDIATE) {
692 switch (insn->src(0).getFile()) {
693 case FILE_GPR:
694 if (insn->def(0).getFile() == FILE_PREDICATE) {
695 emitInsn(0x5b6a0000);
696 emitGPR (0x08);
697 } else {
698 emitInsn(0x5c980000);
699 }
700 emitGPR (0x14, insn->src(0));
701 break;
702 case FILE_MEMORY_CONST:
703 emitInsn(0x4c980000);
704 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
705 break;
706 case FILE_IMMEDIATE:
707 emitInsn(0x38980000);
708 emitIMMD(0x14, 19, insn->src(0));
709 break;
710 case FILE_PREDICATE:
711 emitInsn(0x50880000);
712 emitPRED(0x0c, insn->src(0));
713 emitPRED(0x1d);
714 emitPRED(0x27);
715 break;
716 default:
717 assert(!"bad src file");
718 break;
719 }
720 if (insn->def(0).getFile() != FILE_PREDICATE &&
721 insn->src(0).getFile() != FILE_PREDICATE)
722 emitField(0x27, 4, insn->lanes);
723 } else {
724 emitInsn (0x01000000);
725 emitIMMD (0x14, 32, insn->src(0));
726 emitField(0x0c, 4, insn->lanes);
727 }
728
729 if (insn->def(0).getFile() == FILE_PREDICATE) {
730 emitPRED(0x27);
731 emitPRED(0x03, insn->def(0));
732 emitPRED(0x00);
733 } else {
734 emitGPR(0x00, insn->def(0));
735 }
736 }
737
738 void
739 CodeEmitterGM107::emitS2R()
740 {
741 emitInsn(0xf0c80000);
742 emitSYS (0x14, insn->src(0));
743 emitGPR (0x00, insn->def(0));
744 }
745
746 void
747 CodeEmitterGM107::emitF2F()
748 {
749 RoundMode rnd = insn->rnd;
750
751 switch (insn->op) {
752 case OP_FLOOR: rnd = ROUND_MI; break;
753 case OP_CEIL : rnd = ROUND_PI; break;
754 case OP_TRUNC: rnd = ROUND_ZI; break;
755 default:
756 break;
757 }
758
759 switch (insn->src(0).getFile()) {
760 case FILE_GPR:
761 emitInsn(0x5ca80000);
762 emitGPR (0x14, insn->src(0));
763 break;
764 case FILE_MEMORY_CONST:
765 emitInsn(0x4ca80000);
766 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
767 break;
768 case FILE_IMMEDIATE:
769 emitInsn(0x38a80000);
770 emitIMMD(0x14, 19, insn->src(0));
771 break;
772 default:
773 assert(!"bad src0 file");
774 break;
775 }
776
777 emitField(0x32, 1, (insn->op == OP_SAT) || insn->saturate);
778 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
779 emitCC (0x2f);
780 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
781 emitFMZ (0x2c, 1);
782 emitField(0x29, 1, insn->subOp);
783 emitRND (0x27, rnd, 0x2a);
784 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
785 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
786 emitGPR (0x00, insn->def(0));
787 }
788
789 void
790 CodeEmitterGM107::emitF2I()
791 {
792 RoundMode rnd = insn->rnd;
793
794 switch (insn->op) {
795 case OP_FLOOR: rnd = ROUND_M; break;
796 case OP_CEIL : rnd = ROUND_P; break;
797 case OP_TRUNC: rnd = ROUND_Z; break;
798 default:
799 break;
800 }
801
802 switch (insn->src(0).getFile()) {
803 case FILE_GPR:
804 emitInsn(0x5cb00000);
805 emitGPR (0x14, insn->src(0));
806 break;
807 case FILE_MEMORY_CONST:
808 emitInsn(0x4cb00000);
809 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
810 break;
811 case FILE_IMMEDIATE:
812 emitInsn(0x38b00000);
813 emitIMMD(0x14, 19, insn->src(0));
814 break;
815 default:
816 assert(!"bad src0 file");
817 break;
818 }
819
820 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
821 emitCC (0x2f);
822 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
823 emitFMZ (0x2c, 1);
824 emitRND (0x27, rnd, 0x2a);
825 emitField(0x0c, 1, isSignedType(insn->dType));
826 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
827 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
828 emitGPR (0x00, insn->def(0));
829 }
830
831 void
832 CodeEmitterGM107::emitI2F()
833 {
834 RoundMode rnd = insn->rnd;
835
836 switch (insn->op) {
837 case OP_FLOOR: rnd = ROUND_M; break;
838 case OP_CEIL : rnd = ROUND_P; break;
839 case OP_TRUNC: rnd = ROUND_Z; break;
840 default:
841 break;
842 }
843
844 switch (insn->src(0).getFile()) {
845 case FILE_GPR:
846 emitInsn(0x5cb80000);
847 emitGPR (0x14, insn->src(0));
848 break;
849 case FILE_MEMORY_CONST:
850 emitInsn(0x4cb80000);
851 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
852 break;
853 case FILE_IMMEDIATE:
854 emitInsn(0x38b80000);
855 emitIMMD(0x14, 19, insn->src(0));
856 break;
857 default:
858 assert(!"bad src0 file");
859 break;
860 }
861
862 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
863 emitCC (0x2f);
864 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
865 emitField(0x29, 2, insn->subOp);
866 emitRND (0x27, rnd, -1);
867 emitField(0x0d, 1, isSignedType(insn->sType));
868 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
869 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
870 emitGPR (0x00, insn->def(0));
871 }
872
873 void
874 CodeEmitterGM107::emitI2I()
875 {
876 switch (insn->src(0).getFile()) {
877 case FILE_GPR:
878 emitInsn(0x5ce00000);
879 emitGPR (0x14, insn->src(0));
880 break;
881 case FILE_MEMORY_CONST:
882 emitInsn(0x4ce00000);
883 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
884 break;
885 case FILE_IMMEDIATE:
886 emitInsn(0x38e00000);
887 emitIMMD(0x14, 19, insn->src(0));
888 break;
889 default:
890 assert(!"bad src0 file");
891 break;
892 }
893
894 emitSAT (0x32);
895 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
896 emitCC (0x2f);
897 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
898 emitField(0x29, 2, insn->subOp);
899 emitField(0x0d, 1, isSignedType(insn->sType));
900 emitField(0x0c, 1, isSignedType(insn->dType));
901 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
902 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
903 emitGPR (0x00, insn->def(0));
904 }
905
906 static void
907 selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
908 {
909 int loc = entry->loc;
910 if (data.force_persample_interp)
911 code[loc + 1] |= 1 << 10;
912 else
913 code[loc + 1] &= ~(1 << 10);
914 }
915
916 void
917 CodeEmitterGM107::emitSEL()
918 {
919 switch (insn->src(1).getFile()) {
920 case FILE_GPR:
921 emitInsn(0x5ca00000);
922 emitGPR (0x14, insn->src(1));
923 break;
924 case FILE_MEMORY_CONST:
925 emitInsn(0x4ca00000);
926 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
927 break;
928 case FILE_IMMEDIATE:
929 emitInsn(0x38a00000);
930 emitIMMD(0x14, 19, insn->src(1));
931 break;
932 default:
933 assert(!"bad src1 file");
934 break;
935 }
936
937 emitINV (0x2a, insn->src(2));
938 emitPRED(0x27, insn->src(2));
939 emitGPR (0x08, insn->src(0));
940 emitGPR (0x00, insn->def(0));
941
942 if (insn->subOp == 1) {
943 addInterp(0, 0, selpFlip);
944 }
945 }
946
947 void
948 CodeEmitterGM107::emitSHFL()
949 {
950 int type = 0;
951
952 emitInsn (0xef100000);
953
954 switch (insn->src(1).getFile()) {
955 case FILE_GPR:
956 emitGPR(0x14, insn->src(1));
957 break;
958 case FILE_IMMEDIATE:
959 emitIMMD(0x14, 5, insn->src(1));
960 type |= 1;
961 break;
962 default:
963 assert(!"invalid src1 file");
964 break;
965 }
966
967 /*XXX: what is this arg? hardcode immediate for now */
968 emitField(0x22, 13, 0x1c03);
969 type |= 2;
970
971 emitPRED (0x30);
972 emitField(0x1e, 2, insn->subOp);
973 emitField(0x1c, 2, type);
974 emitGPR (0x08, insn->src(0));
975 emitGPR (0x00, insn->def(0));
976 }
977
978 /*******************************************************************************
979 * double
980 ******************************************************************************/
981
982 void
983 CodeEmitterGM107::emitDADD()
984 {
985 switch (insn->src(1).getFile()) {
986 case FILE_GPR:
987 emitInsn(0x5c700000);
988 emitGPR (0x14, insn->src(1));
989 break;
990 case FILE_MEMORY_CONST:
991 emitInsn(0x4c700000);
992 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
993 break;
994 case FILE_IMMEDIATE:
995 emitInsn(0x38700000);
996 emitIMMD(0x14, 19, insn->src(1));
997 break;
998 default:
999 assert(!"bad src1 file");
1000 break;
1001 }
1002 emitABS(0x31, insn->src(1));
1003 emitNEG(0x30, insn->src(0));
1004 emitCC (0x2f);
1005 emitABS(0x2e, insn->src(0));
1006 emitNEG(0x2d, insn->src(1));
1007
1008 if (insn->op == OP_SUB)
1009 code[1] ^= 0x00002000;
1010
1011 emitGPR(0x08, insn->src(0));
1012 emitGPR(0x00, insn->def(0));
1013 }
1014
1015 void
1016 CodeEmitterGM107::emitDMUL()
1017 {
1018 switch (insn->src(1).getFile()) {
1019 case FILE_GPR:
1020 emitInsn(0x5c800000);
1021 emitGPR (0x14, insn->src(1));
1022 break;
1023 case FILE_MEMORY_CONST:
1024 emitInsn(0x4c800000);
1025 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1026 break;
1027 case FILE_IMMEDIATE:
1028 emitInsn(0x38800000);
1029 emitIMMD(0x14, 19, insn->src(1));
1030 break;
1031 default:
1032 assert(!"bad src1 file");
1033 break;
1034 }
1035
1036 emitNEG2(0x30, insn->src(0), insn->src(1));
1037 emitCC (0x2f);
1038 emitRND (0x27);
1039 emitGPR (0x08, insn->src(0));
1040 emitGPR (0x00, insn->def(0));
1041 }
1042
1043 void
1044 CodeEmitterGM107::emitDFMA()
1045 {
1046 switch(insn->src(2).getFile()) {
1047 case FILE_GPR:
1048 switch (insn->src(1).getFile()) {
1049 case FILE_GPR:
1050 emitInsn(0x5b700000);
1051 emitGPR (0x14, insn->src(1));
1052 break;
1053 case FILE_MEMORY_CONST:
1054 emitInsn(0x4b700000);
1055 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1056 break;
1057 case FILE_IMMEDIATE:
1058 emitInsn(0x36700000);
1059 emitIMMD(0x14, 19, insn->src(1));
1060 break;
1061 default:
1062 assert(!"bad src1 file");
1063 break;
1064 }
1065 emitGPR (0x27, insn->src(2));
1066 break;
1067 case FILE_MEMORY_CONST:
1068 emitInsn(0x53700000);
1069 emitGPR (0x27, insn->src(1));
1070 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1071 break;
1072 default:
1073 assert(!"bad src2 file");
1074 break;
1075 }
1076
1077 emitRND (0x32);
1078 emitNEG (0x31, insn->src(2));
1079 emitNEG2(0x30, insn->src(0), insn->src(1));
1080 emitCC (0x2f);
1081 emitGPR (0x08, insn->src(0));
1082 emitGPR (0x00, insn->def(0));
1083 }
1084
1085 void
1086 CodeEmitterGM107::emitDMNMX()
1087 {
1088 switch (insn->src(1).getFile()) {
1089 case FILE_GPR:
1090 emitInsn(0x5c500000);
1091 emitGPR (0x14, insn->src(1));
1092 break;
1093 case FILE_MEMORY_CONST:
1094 emitInsn(0x4c500000);
1095 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1096 break;
1097 case FILE_IMMEDIATE:
1098 emitInsn(0x38500000);
1099 emitIMMD(0x14, 19, insn->src(1));
1100 break;
1101 default:
1102 assert(!"bad src1 file");
1103 break;
1104 }
1105
1106 emitABS (0x31, insn->src(1));
1107 emitNEG (0x30, insn->src(0));
1108 emitCC (0x2f);
1109 emitABS (0x2e, insn->src(0));
1110 emitNEG (0x2d, insn->src(1));
1111 emitField(0x2a, 1, insn->op == OP_MAX);
1112 emitPRED (0x27);
1113 emitGPR (0x08, insn->src(0));
1114 emitGPR (0x00, insn->def(0));
1115 }
1116
1117 void
1118 CodeEmitterGM107::emitDSET()
1119 {
1120 const CmpInstruction *insn = this->insn->asCmp();
1121
1122 switch (insn->src(1).getFile()) {
1123 case FILE_GPR:
1124 emitInsn(0x59000000);
1125 emitGPR (0x14, insn->src(1));
1126 break;
1127 case FILE_MEMORY_CONST:
1128 emitInsn(0x49000000);
1129 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1130 break;
1131 case FILE_IMMEDIATE:
1132 emitInsn(0x32000000);
1133 emitIMMD(0x14, 19, insn->src(1));
1134 break;
1135 default:
1136 assert(!"bad src1 file");
1137 break;
1138 }
1139
1140 if (insn->op != OP_SET) {
1141 switch (insn->op) {
1142 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1143 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1144 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1145 default:
1146 assert(!"invalid set op");
1147 break;
1148 }
1149 emitPRED(0x27, insn->src(2));
1150 } else {
1151 emitPRED(0x27);
1152 }
1153
1154 emitABS (0x36, insn->src(0));
1155 emitNEG (0x35, insn->src(1));
1156 emitField(0x34, 1, insn->dType == TYPE_F32);
1157 emitCond4(0x30, insn->setCond);
1158 emitCC (0x2f);
1159 emitABS (0x2c, insn->src(1));
1160 emitNEG (0x2b, insn->src(0));
1161 emitGPR (0x08, insn->src(0));
1162 emitGPR (0x00, insn->def(0));
1163 }
1164
1165 void
1166 CodeEmitterGM107::emitDSETP()
1167 {
1168 const CmpInstruction *insn = this->insn->asCmp();
1169
1170 switch (insn->src(1).getFile()) {
1171 case FILE_GPR:
1172 emitInsn(0x5b800000);
1173 emitGPR (0x14, insn->src(1));
1174 break;
1175 case FILE_MEMORY_CONST:
1176 emitInsn(0x4b800000);
1177 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1178 break;
1179 case FILE_IMMEDIATE:
1180 emitInsn(0x36800000);
1181 emitIMMD(0x14, 19, insn->src(1));
1182 break;
1183 default:
1184 assert(!"bad src1 file");
1185 break;
1186 }
1187
1188 if (insn->op != OP_SET) {
1189 switch (insn->op) {
1190 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1191 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1192 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1193 default:
1194 assert(!"invalid set op");
1195 break;
1196 }
1197 emitPRED(0x27, insn->src(2));
1198 } else {
1199 emitPRED(0x27);
1200 }
1201
1202 emitCond4(0x30, insn->setCond);
1203 emitABS (0x2c, insn->src(1));
1204 emitNEG (0x2b, insn->src(0));
1205 emitGPR (0x08, insn->src(0));
1206 emitABS (0x07, insn->src(0));
1207 emitNEG (0x06, insn->src(1));
1208 emitPRED (0x03, insn->def(0));
1209 if (insn->defExists(1))
1210 emitPRED(0x00, insn->def(1));
1211 else
1212 emitPRED(0x00);
1213 }
1214
1215 /*******************************************************************************
1216 * float
1217 ******************************************************************************/
1218
1219 void
1220 CodeEmitterGM107::emitFADD()
1221 {
1222 if (!longIMMD(insn->src(1))) {
1223 switch (insn->src(1).getFile()) {
1224 case FILE_GPR:
1225 emitInsn(0x5c580000);
1226 emitGPR (0x14, insn->src(1));
1227 break;
1228 case FILE_MEMORY_CONST:
1229 emitInsn(0x4c580000);
1230 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1231 break;
1232 case FILE_IMMEDIATE:
1233 emitInsn(0x38580000);
1234 emitIMMD(0x14, 19, insn->src(1));
1235 break;
1236 default:
1237 assert(!"bad src1 file");
1238 break;
1239 }
1240 emitSAT(0x32);
1241 emitABS(0x31, insn->src(1));
1242 emitNEG(0x30, insn->src(0));
1243 emitCC (0x2f);
1244 emitABS(0x2e, insn->src(0));
1245 emitNEG(0x2d, insn->src(1));
1246 emitFMZ(0x2c, 1);
1247
1248 if (insn->op == OP_SUB)
1249 code[1] ^= 0x00002000;
1250 } else {
1251 emitInsn(0x08000000);
1252 emitABS(0x39, insn->src(1));
1253 emitNEG(0x38, insn->src(0));
1254 emitFMZ(0x37, 1);
1255 emitABS(0x36, insn->src(0));
1256 emitNEG(0x35, insn->src(1));
1257 emitCC (0x34);
1258 emitIMMD(0x14, 32, insn->src(1));
1259
1260 if (insn->op == OP_SUB)
1261 code[1] ^= 0x00080000;
1262 }
1263
1264 emitGPR(0x08, insn->src(0));
1265 emitGPR(0x00, insn->def(0));
1266 }
1267
1268 void
1269 CodeEmitterGM107::emitFMUL()
1270 {
1271 if (!longIMMD(insn->src(1))) {
1272 switch (insn->src(1).getFile()) {
1273 case FILE_GPR:
1274 emitInsn(0x5c680000);
1275 emitGPR (0x14, insn->src(1));
1276 break;
1277 case FILE_MEMORY_CONST:
1278 emitInsn(0x4c680000);
1279 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1280 break;
1281 case FILE_IMMEDIATE:
1282 emitInsn(0x38680000);
1283 emitIMMD(0x14, 19, insn->src(1));
1284 break;
1285 default:
1286 assert(!"bad src1 file");
1287 break;
1288 }
1289 emitSAT (0x32);
1290 emitNEG2(0x30, insn->src(0), insn->src(1));
1291 emitCC (0x2f);
1292 emitFMZ (0x2c, 2);
1293 emitPDIV(0x29);
1294 emitRND (0x27);
1295 } else {
1296 emitInsn(0x1e000000);
1297 emitSAT (0x37);
1298 emitFMZ (0x35, 2);
1299 emitCC (0x34);
1300 emitIMMD(0x14, 32, insn->src(1));
1301 if (insn->src(0).mod.neg() ^ insn->src(1).mod.neg())
1302 code[1] ^= 0x00080000; /* flip immd sign bit */
1303 }
1304
1305 emitGPR(0x08, insn->src(0));
1306 emitGPR(0x00, insn->def(0));
1307 }
1308
1309 void
1310 CodeEmitterGM107::emitFFMA()
1311 {
1312 /*XXX: ffma32i exists, but not using it as third src overlaps dst */
1313 switch(insn->src(2).getFile()) {
1314 case FILE_GPR:
1315 switch (insn->src(1).getFile()) {
1316 case FILE_GPR:
1317 emitInsn(0x59800000);
1318 emitGPR (0x14, insn->src(1));
1319 break;
1320 case FILE_MEMORY_CONST:
1321 emitInsn(0x49800000);
1322 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1323 break;
1324 case FILE_IMMEDIATE:
1325 emitInsn(0x32800000);
1326 emitIMMD(0x14, 19, insn->src(1));
1327 break;
1328 default:
1329 assert(!"bad src1 file");
1330 break;
1331 }
1332 emitGPR (0x27, insn->src(2));
1333 break;
1334 case FILE_MEMORY_CONST:
1335 emitInsn(0x51800000);
1336 emitGPR (0x27, insn->src(1));
1337 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1338 break;
1339 default:
1340 assert(!"bad src2 file");
1341 break;
1342 }
1343 emitRND (0x33);
1344 emitSAT (0x32);
1345 emitNEG (0x31, insn->src(2));
1346 emitNEG2(0x30, insn->src(0), insn->src(1));
1347 emitCC (0x2f);
1348
1349 emitFMZ(0x35, 2);
1350 emitGPR(0x08, insn->src(0));
1351 emitGPR(0x00, insn->def(0));
1352 }
1353
1354 void
1355 CodeEmitterGM107::emitMUFU()
1356 {
1357 int mufu = 0;
1358
1359 switch (insn->op) {
1360 case OP_COS: mufu = 0; break;
1361 case OP_SIN: mufu = 1; break;
1362 case OP_EX2: mufu = 2; break;
1363 case OP_LG2: mufu = 3; break;
1364 case OP_RCP: mufu = 4 + 2 * insn->subOp; break;
1365 case OP_RSQ: mufu = 5 + 2 * insn->subOp; break;
1366 default:
1367 assert(!"invalid mufu");
1368 break;
1369 }
1370
1371 emitInsn (0x50800000);
1372 emitSAT (0x32);
1373 emitNEG (0x30, insn->src(0));
1374 emitABS (0x2e, insn->src(0));
1375 emitField(0x14, 3, mufu);
1376 emitGPR (0x08, insn->src(0));
1377 emitGPR (0x00, insn->def(0));
1378 }
1379
1380 void
1381 CodeEmitterGM107::emitFMNMX()
1382 {
1383 switch (insn->src(1).getFile()) {
1384 case FILE_GPR:
1385 emitInsn(0x5c600000);
1386 emitGPR (0x14, insn->src(1));
1387 break;
1388 case FILE_MEMORY_CONST:
1389 emitInsn(0x4c600000);
1390 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1391 break;
1392 case FILE_IMMEDIATE:
1393 emitInsn(0x38600000);
1394 emitIMMD(0x14, 19, insn->src(1));
1395 break;
1396 default:
1397 assert(!"bad src1 file");
1398 break;
1399 }
1400
1401 emitField(0x2a, 1, insn->op == OP_MAX);
1402 emitPRED (0x27);
1403
1404 emitABS(0x31, insn->src(1));
1405 emitNEG(0x30, insn->src(0));
1406 emitCC (0x2f);
1407 emitABS(0x2e, insn->src(0));
1408 emitNEG(0x2d, insn->src(1));
1409 emitFMZ(0x2c, 1);
1410 emitGPR(0x08, insn->src(0));
1411 emitGPR(0x00, insn->def(0));
1412 }
1413
1414 void
1415 CodeEmitterGM107::emitRRO()
1416 {
1417 switch (insn->src(0).getFile()) {
1418 case FILE_GPR:
1419 emitInsn(0x5c900000);
1420 emitGPR (0x14, insn->src(0));
1421 break;
1422 case FILE_MEMORY_CONST:
1423 emitInsn(0x4c900000);
1424 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1425 break;
1426 case FILE_IMMEDIATE:
1427 emitInsn(0x38900000);
1428 emitIMMD(0x14, 19, insn->src(0));
1429 break;
1430 default:
1431 assert(!"bad src file");
1432 break;
1433 }
1434
1435 emitABS (0x31, insn->src(0));
1436 emitNEG (0x2d, insn->src(0));
1437 emitField(0x27, 1, insn->op == OP_PREEX2);
1438 emitGPR (0x00, insn->def(0));
1439 }
1440
1441 void
1442 CodeEmitterGM107::emitFCMP()
1443 {
1444 const CmpInstruction *insn = this->insn->asCmp();
1445 CondCode cc = insn->setCond;
1446
1447 if (insn->src(2).mod.neg())
1448 cc = reverseCondCode(cc);
1449
1450 switch(insn->src(2).getFile()) {
1451 case FILE_GPR:
1452 switch (insn->src(1).getFile()) {
1453 case FILE_GPR:
1454 emitInsn(0x5ba00000);
1455 emitGPR (0x14, insn->src(1));
1456 break;
1457 case FILE_MEMORY_CONST:
1458 emitInsn(0x4ba00000);
1459 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1460 break;
1461 case FILE_IMMEDIATE:
1462 emitInsn(0x36a00000);
1463 emitIMMD(0x14, 19, insn->src(1));
1464 break;
1465 default:
1466 assert(!"bad src1 file");
1467 break;
1468 }
1469 emitGPR (0x27, insn->src(2));
1470 break;
1471 case FILE_MEMORY_CONST:
1472 emitInsn(0x53a00000);
1473 emitGPR (0x27, insn->src(1));
1474 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1475 break;
1476 default:
1477 assert(!"bad src2 file");
1478 break;
1479 }
1480
1481 emitCond4(0x30, cc);
1482 emitFMZ (0x2f, 1);
1483 emitGPR (0x08, insn->src(0));
1484 emitGPR (0x00, insn->def(0));
1485 }
1486
1487 void
1488 CodeEmitterGM107::emitFSET()
1489 {
1490 const CmpInstruction *insn = this->insn->asCmp();
1491
1492 switch (insn->src(1).getFile()) {
1493 case FILE_GPR:
1494 emitInsn(0x58000000);
1495 emitGPR (0x14, insn->src(1));
1496 break;
1497 case FILE_MEMORY_CONST:
1498 emitInsn(0x48000000);
1499 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1500 break;
1501 case FILE_IMMEDIATE:
1502 emitInsn(0x30000000);
1503 emitIMMD(0x14, 19, insn->src(1));
1504 break;
1505 default:
1506 assert(!"bad src1 file");
1507 break;
1508 }
1509
1510 if (insn->op != OP_SET) {
1511 switch (insn->op) {
1512 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1513 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1514 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1515 default:
1516 assert(!"invalid set op");
1517 break;
1518 }
1519 emitPRED(0x27, insn->src(2));
1520 } else {
1521 emitPRED(0x27);
1522 }
1523
1524 emitFMZ (0x37, 1);
1525 emitABS (0x36, insn->src(0));
1526 emitNEG (0x35, insn->src(1));
1527 emitField(0x34, 1, insn->dType == TYPE_F32);
1528 emitCond4(0x30, insn->setCond);
1529 emitCC (0x2f);
1530 emitABS (0x2c, insn->src(1));
1531 emitNEG (0x2b, insn->src(0));
1532 emitGPR (0x08, insn->src(0));
1533 emitGPR (0x00, insn->def(0));
1534 }
1535
1536 void
1537 CodeEmitterGM107::emitFSETP()
1538 {
1539 const CmpInstruction *insn = this->insn->asCmp();
1540
1541 switch (insn->src(1).getFile()) {
1542 case FILE_GPR:
1543 emitInsn(0x5bb00000);
1544 emitGPR (0x14, insn->src(1));
1545 break;
1546 case FILE_MEMORY_CONST:
1547 emitInsn(0x4bb00000);
1548 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1549 break;
1550 case FILE_IMMEDIATE:
1551 emitInsn(0x36b00000);
1552 emitIMMD(0x14, 19, insn->src(1));
1553 break;
1554 default:
1555 assert(!"bad src1 file");
1556 break;
1557 }
1558
1559 if (insn->op != OP_SET) {
1560 switch (insn->op) {
1561 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1562 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1563 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1564 default:
1565 assert(!"invalid set op");
1566 break;
1567 }
1568 emitPRED(0x27, insn->src(2));
1569 } else {
1570 emitPRED(0x27);
1571 }
1572
1573 emitCond4(0x30, insn->setCond);
1574 emitFMZ (0x2f, 1);
1575 emitABS (0x2c, insn->src(1));
1576 emitNEG (0x2b, insn->src(0));
1577 emitGPR (0x08, insn->src(0));
1578 emitABS (0x07, insn->src(0));
1579 emitNEG (0x06, insn->src(1));
1580 emitPRED (0x03, insn->def(0));
1581 if (insn->defExists(1))
1582 emitPRED(0x00, insn->def(1));
1583 else
1584 emitPRED(0x00);
1585 }
1586
1587 void
1588 CodeEmitterGM107::emitFSWZADD()
1589 {
1590 emitInsn (0x50f80000);
1591 emitCC (0x2f);
1592 emitFMZ (0x2c, 1);
1593 emitRND (0x27);
1594 emitField(0x26, 1, insn->lanes); /* abused for .ndv */
1595 emitField(0x1c, 8, insn->subOp);
1596 if (insn->predSrc != 1)
1597 emitGPR (0x14, insn->src(1));
1598 else
1599 emitGPR (0x14);
1600 emitGPR (0x08, insn->src(0));
1601 emitGPR (0x00, insn->def(0));
1602 }
1603
1604 /*******************************************************************************
1605 * integer
1606 ******************************************************************************/
1607
1608 void
1609 CodeEmitterGM107::emitLOP()
1610 {
1611 int lop = 0;
1612
1613 switch (insn->op) {
1614 case OP_AND: lop = 0; break;
1615 case OP_OR : lop = 1; break;
1616 case OP_XOR: lop = 2; break;
1617 default:
1618 assert(!"invalid lop");
1619 break;
1620 }
1621
1622 if (insn->src(1).getFile() != FILE_IMMEDIATE) {
1623 switch (insn->src(1).getFile()) {
1624 case FILE_GPR:
1625 emitInsn(0x5c400000);
1626 emitGPR (0x14, insn->src(1));
1627 break;
1628 case FILE_MEMORY_CONST:
1629 emitInsn(0x4c400000);
1630 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1631 break;
1632 case FILE_IMMEDIATE:
1633 emitInsn(0x38400000);
1634 emitIMMD(0x14, 19, insn->src(1));
1635 break;
1636 default:
1637 assert(!"bad src1 file");
1638 break;
1639 }
1640 emitPRED (0x30);
1641 emitCC (0x2f);
1642 emitX (0x2b);
1643 emitField(0x29, 2, lop);
1644 emitINV (0x28, insn->src(1));
1645 emitINV (0x27, insn->src(0));
1646 } else {
1647 emitInsn (0x04000000);
1648 emitX (0x39);
1649 emitINV (0x38, insn->src(1));
1650 emitINV (0x37, insn->src(0));
1651 emitField(0x35, 2, lop);
1652 emitCC (0x34);
1653 emitIMMD (0x14, 32, insn->src(1));
1654 }
1655
1656 emitGPR (0x08, insn->src(0));
1657 emitGPR (0x00, insn->def(0));
1658 }
1659
1660 /* special-case of emitLOP(): lop pass_b dst 0 ~src */
1661 void
1662 CodeEmitterGM107::emitNOT()
1663 {
1664 if (!longIMMD(insn->src(0))) {
1665 switch (insn->src(0).getFile()) {
1666 case FILE_GPR:
1667 emitInsn(0x5c400700);
1668 emitGPR (0x14, insn->src(0));
1669 break;
1670 case FILE_MEMORY_CONST:
1671 emitInsn(0x4c400700);
1672 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1673 break;
1674 case FILE_IMMEDIATE:
1675 emitInsn(0x38400700);
1676 emitIMMD(0x14, 19, insn->src(0));
1677 break;
1678 default:
1679 assert(!"bad src1 file");
1680 break;
1681 }
1682 emitPRED (0x30);
1683 } else {
1684 emitInsn (0x05600000);
1685 emitIMMD (0x14, 32, insn->src(1));
1686 }
1687
1688 emitGPR(0x08);
1689 emitGPR(0x00, insn->def(0));
1690 }
1691
1692 void
1693 CodeEmitterGM107::emitIADD()
1694 {
1695 if (insn->src(1).getFile() != FILE_IMMEDIATE) {
1696 switch (insn->src(1).getFile()) {
1697 case FILE_GPR:
1698 emitInsn(0x5c100000);
1699 emitGPR (0x14, insn->src(1));
1700 break;
1701 case FILE_MEMORY_CONST:
1702 emitInsn(0x4c100000);
1703 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1704 break;
1705 case FILE_IMMEDIATE:
1706 emitInsn(0x38100000);
1707 emitIMMD(0x14, 19, insn->src(1));
1708 break;
1709 default:
1710 assert(!"bad src1 file");
1711 break;
1712 }
1713 emitSAT(0x32);
1714 emitNEG(0x31, insn->src(0));
1715 emitNEG(0x30, insn->src(1));
1716 emitCC (0x2f);
1717 emitX (0x2b);
1718 } else {
1719 emitInsn(0x1c000000);
1720 emitNEG (0x38, insn->src(0));
1721 emitSAT (0x36);
1722 emitX (0x35);
1723 emitCC (0x34);
1724 emitIMMD(0x14, 32, insn->src(1));
1725 }
1726
1727 if (insn->op == OP_SUB)
1728 code[1] ^= 0x00010000;
1729
1730 emitGPR(0x08, insn->src(0));
1731 emitGPR(0x00, insn->def(0));
1732 }
1733
1734 void
1735 CodeEmitterGM107::emitIMUL()
1736 {
1737 if (insn->src(1).getFile() != FILE_IMMEDIATE) {
1738 switch (insn->src(1).getFile()) {
1739 case FILE_GPR:
1740 emitInsn(0x5c380000);
1741 emitGPR (0x14, insn->src(1));
1742 break;
1743 case FILE_MEMORY_CONST:
1744 emitInsn(0x4c380000);
1745 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1746 break;
1747 case FILE_IMMEDIATE:
1748 emitInsn(0x38380000);
1749 emitIMMD(0x14, 19, insn->src(1));
1750 break;
1751 default:
1752 assert(!"bad src1 file");
1753 break;
1754 }
1755 emitCC (0x2f);
1756 emitField(0x29, 1, isSignedType(insn->sType));
1757 emitField(0x28, 1, isSignedType(insn->dType));
1758 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1759 } else {
1760 emitInsn (0x1f000000);
1761 emitField(0x37, 1, isSignedType(insn->sType));
1762 emitField(0x36, 1, isSignedType(insn->dType));
1763 emitField(0x35, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1764 emitCC (0x34);
1765 emitIMMD (0x14, 32, insn->src(1));
1766 }
1767
1768 emitGPR(0x08, insn->src(0));
1769 emitGPR(0x00, insn->def(0));
1770 }
1771
1772 void
1773 CodeEmitterGM107::emitIMAD()
1774 {
1775 /*XXX: imad32i exists, but not using it as third src overlaps dst */
1776 switch(insn->src(2).getFile()) {
1777 case FILE_GPR:
1778 switch (insn->src(1).getFile()) {
1779 case FILE_GPR:
1780 emitInsn(0x5a000000);
1781 emitGPR (0x14, insn->src(1));
1782 break;
1783 case FILE_MEMORY_CONST:
1784 emitInsn(0x4a000000);
1785 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1786 break;
1787 case FILE_IMMEDIATE:
1788 emitInsn(0x34000000);
1789 emitIMMD(0x14, 19, insn->src(1));
1790 break;
1791 default:
1792 assert(!"bad src1 file");
1793 break;
1794 }
1795 emitGPR (0x27, insn->src(2));
1796 break;
1797 case FILE_MEMORY_CONST:
1798 emitInsn(0x52000000);
1799 emitGPR (0x27, insn->src(1));
1800 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1801 break;
1802 default:
1803 assert(!"bad src2 file");
1804 break;
1805 }
1806
1807 emitField(0x36, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1808 emitField(0x35, 1, isSignedType(insn->sType));
1809 emitNEG (0x34, insn->src(2));
1810 emitNEG2 (0x33, insn->src(0), insn->src(1));
1811 emitSAT (0x32);
1812 emitX (0x31);
1813 emitField(0x30, 1, isSignedType(insn->dType));
1814 emitCC (0x2f);
1815 emitGPR (0x08, insn->src(0));
1816 emitGPR (0x00, insn->def(0));
1817 }
1818
1819 void
1820 CodeEmitterGM107::emitISCADD()
1821 {
1822 switch (insn->src(2).getFile()) {
1823 case FILE_GPR:
1824 emitInsn(0x5c180000);
1825 emitGPR (0x14, insn->src(2));
1826 break;
1827 case FILE_MEMORY_CONST:
1828 emitInsn(0x4c180000);
1829 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1830 break;
1831 case FILE_IMMEDIATE:
1832 emitInsn(0x38180000);
1833 emitIMMD(0x14, 19, insn->src(2));
1834 break;
1835 default:
1836 assert(!"bad src1 file");
1837 break;
1838 }
1839 emitNEG (0x31, insn->src(0));
1840 emitNEG (0x30, insn->src(2));
1841 emitCC (0x2f);
1842 emitIMMD(0x27, 5, insn->src(1));
1843 emitGPR (0x08, insn->src(0));
1844 emitGPR (0x00, insn->def(0));
1845 }
1846
1847 void
1848 CodeEmitterGM107::emitIMNMX()
1849 {
1850 switch (insn->src(1).getFile()) {
1851 case FILE_GPR:
1852 emitInsn(0x5c200000);
1853 emitGPR (0x14, insn->src(1));
1854 break;
1855 case FILE_MEMORY_CONST:
1856 emitInsn(0x4c200000);
1857 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1858 break;
1859 case FILE_IMMEDIATE:
1860 emitInsn(0x38200000);
1861 emitIMMD(0x14, 19, insn->src(1));
1862 break;
1863 default:
1864 assert(!"bad src1 file");
1865 break;
1866 }
1867
1868 emitField(0x30, 1, isSignedType(insn->dType));
1869 emitCC (0x2f);
1870 emitField(0x2b, 2, insn->subOp);
1871 emitField(0x2a, 1, insn->op == OP_MAX);
1872 emitPRED (0x27);
1873 emitGPR (0x08, insn->src(0));
1874 emitGPR (0x00, insn->def(0));
1875 }
1876
1877 void
1878 CodeEmitterGM107::emitICMP()
1879 {
1880 const CmpInstruction *insn = this->insn->asCmp();
1881 CondCode cc = insn->setCond;
1882
1883 if (insn->src(2).mod.neg())
1884 cc = reverseCondCode(cc);
1885
1886 switch(insn->src(2).getFile()) {
1887 case FILE_GPR:
1888 switch (insn->src(1).getFile()) {
1889 case FILE_GPR:
1890 emitInsn(0x5b400000);
1891 emitGPR (0x14, insn->src(1));
1892 break;
1893 case FILE_MEMORY_CONST:
1894 emitInsn(0x4b400000);
1895 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1896 break;
1897 case FILE_IMMEDIATE:
1898 emitInsn(0x36400000);
1899 emitIMMD(0x14, 19, insn->src(1));
1900 break;
1901 default:
1902 assert(!"bad src1 file");
1903 break;
1904 }
1905 emitGPR (0x27, insn->src(2));
1906 break;
1907 case FILE_MEMORY_CONST:
1908 emitInsn(0x53400000);
1909 emitGPR (0x27, insn->src(1));
1910 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1911 break;
1912 default:
1913 assert(!"bad src2 file");
1914 break;
1915 }
1916
1917 emitCond3(0x31, cc);
1918 emitField(0x30, 1, isSignedType(insn->sType));
1919 emitGPR (0x08, insn->src(0));
1920 emitGPR (0x00, insn->def(0));
1921 }
1922
1923 void
1924 CodeEmitterGM107::emitISET()
1925 {
1926 const CmpInstruction *insn = this->insn->asCmp();
1927
1928 switch (insn->src(1).getFile()) {
1929 case FILE_GPR:
1930 emitInsn(0x5b500000);
1931 emitGPR (0x14, insn->src(1));
1932 break;
1933 case FILE_MEMORY_CONST:
1934 emitInsn(0x4b500000);
1935 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1936 break;
1937 case FILE_IMMEDIATE:
1938 emitInsn(0x36500000);
1939 emitIMMD(0x14, 19, insn->src(1));
1940 break;
1941 default:
1942 assert(!"bad src1 file");
1943 break;
1944 }
1945
1946 if (insn->op != OP_SET) {
1947 switch (insn->op) {
1948 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1949 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1950 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1951 default:
1952 assert(!"invalid set op");
1953 break;
1954 }
1955 emitPRED(0x27, insn->src(2));
1956 } else {
1957 emitPRED(0x27);
1958 }
1959
1960 emitCond3(0x31, insn->setCond);
1961 emitField(0x30, 1, isSignedType(insn->sType));
1962 emitCC (0x2f);
1963 emitField(0x2c, 1, insn->dType == TYPE_F32);
1964 emitX (0x2b);
1965 emitGPR (0x08, insn->src(0));
1966 emitGPR (0x00, insn->def(0));
1967 }
1968
1969 void
1970 CodeEmitterGM107::emitISETP()
1971 {
1972 const CmpInstruction *insn = this->insn->asCmp();
1973
1974 switch (insn->src(1).getFile()) {
1975 case FILE_GPR:
1976 emitInsn(0x5b600000);
1977 emitGPR (0x14, insn->src(1));
1978 break;
1979 case FILE_MEMORY_CONST:
1980 emitInsn(0x4b600000);
1981 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1982 break;
1983 case FILE_IMMEDIATE:
1984 emitInsn(0x36600000);
1985 emitIMMD(0x14, 19, insn->src(1));
1986 break;
1987 default:
1988 assert(!"bad src1 file");
1989 break;
1990 }
1991
1992 if (insn->op != OP_SET) {
1993 switch (insn->op) {
1994 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1995 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1996 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1997 default:
1998 assert(!"invalid set op");
1999 break;
2000 }
2001 emitPRED(0x27, insn->src(2));
2002 } else {
2003 emitPRED(0x27);
2004 }
2005
2006 emitCond3(0x31, insn->setCond);
2007 emitField(0x30, 1, isSignedType(insn->sType));
2008 emitX (0x2b);
2009 emitGPR (0x08, insn->src(0));
2010 emitPRED (0x03, insn->def(0));
2011 if (insn->defExists(1))
2012 emitPRED(0x00, insn->def(1));
2013 else
2014 emitPRED(0x00);
2015 }
2016
2017 void
2018 CodeEmitterGM107::emitSHL()
2019 {
2020 switch (insn->src(1).getFile()) {
2021 case FILE_GPR:
2022 emitInsn(0x5c480000);
2023 emitGPR (0x14, insn->src(1));
2024 break;
2025 case FILE_MEMORY_CONST:
2026 emitInsn(0x4c480000);
2027 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2028 break;
2029 case FILE_IMMEDIATE:
2030 emitInsn(0x38480000);
2031 emitIMMD(0x14, 19, insn->src(1));
2032 break;
2033 default:
2034 assert(!"bad src1 file");
2035 break;
2036 }
2037
2038 emitCC (0x2f);
2039 emitX (0x2b);
2040 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2041 emitGPR (0x08, insn->src(0));
2042 emitGPR (0x00, insn->def(0));
2043 }
2044
2045 void
2046 CodeEmitterGM107::emitSHR()
2047 {
2048 switch (insn->src(1).getFile()) {
2049 case FILE_GPR:
2050 emitInsn(0x5c280000);
2051 emitGPR (0x14, insn->src(1));
2052 break;
2053 case FILE_MEMORY_CONST:
2054 emitInsn(0x4c280000);
2055 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2056 break;
2057 case FILE_IMMEDIATE:
2058 emitInsn(0x38280000);
2059 emitIMMD(0x14, 19, insn->src(1));
2060 break;
2061 default:
2062 assert(!"bad src1 file");
2063 break;
2064 }
2065
2066 emitField(0x30, 1, isSignedType(insn->dType));
2067 emitCC (0x2f);
2068 emitX (0x2c);
2069 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2070 emitGPR (0x08, insn->src(0));
2071 emitGPR (0x00, insn->def(0));
2072 }
2073
2074 void
2075 CodeEmitterGM107::emitPOPC()
2076 {
2077 switch (insn->src(0).getFile()) {
2078 case FILE_GPR:
2079 emitInsn(0x5c080000);
2080 emitGPR (0x14, insn->src(0));
2081 break;
2082 case FILE_MEMORY_CONST:
2083 emitInsn(0x4c080000);
2084 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2085 break;
2086 case FILE_IMMEDIATE:
2087 emitInsn(0x38080000);
2088 emitIMMD(0x14, 19, insn->src(0));
2089 break;
2090 default:
2091 assert(!"bad src1 file");
2092 break;
2093 }
2094
2095 emitINV(0x28, insn->src(0));
2096 emitGPR(0x00, insn->def(0));
2097 }
2098
2099 void
2100 CodeEmitterGM107::emitBFI()
2101 {
2102 switch(insn->src(2).getFile()) {
2103 case FILE_GPR:
2104 switch (insn->src(1).getFile()) {
2105 case FILE_GPR:
2106 emitInsn(0x5bf00000);
2107 emitGPR (0x14, insn->src(1));
2108 break;
2109 case FILE_MEMORY_CONST:
2110 emitInsn(0x4bf00000);
2111 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2112 break;
2113 case FILE_IMMEDIATE:
2114 emitInsn(0x36f00000);
2115 emitIMMD(0x14, 19, insn->src(1));
2116 break;
2117 default:
2118 assert(!"bad src1 file");
2119 break;
2120 }
2121 emitGPR (0x27, insn->src(2));
2122 break;
2123 case FILE_MEMORY_CONST:
2124 emitInsn(0x53f00000);
2125 emitGPR (0x27, insn->src(1));
2126 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
2127 break;
2128 default:
2129 assert(!"bad src2 file");
2130 break;
2131 }
2132
2133 emitCC (0x2f);
2134 emitGPR (0x08, insn->src(0));
2135 emitGPR (0x00, insn->def(0));
2136 }
2137
2138 void
2139 CodeEmitterGM107::emitBFE()
2140 {
2141 switch (insn->src(1).getFile()) {
2142 case FILE_GPR:
2143 emitInsn(0x5c000000);
2144 emitGPR (0x14, insn->src(1));
2145 break;
2146 case FILE_MEMORY_CONST:
2147 emitInsn(0x4c000000);
2148 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2149 break;
2150 case FILE_IMMEDIATE:
2151 emitInsn(0x38000000);
2152 emitIMMD(0x14, 19, insn->src(1));
2153 break;
2154 default:
2155 assert(!"bad src1 file");
2156 break;
2157 }
2158
2159 emitField(0x30, 1, isSignedType(insn->dType));
2160 emitCC (0x2f);
2161 emitField(0x28, 1, insn->subOp == NV50_IR_SUBOP_EXTBF_REV);
2162 emitGPR (0x08, insn->src(0));
2163 emitGPR (0x00, insn->def(0));
2164 }
2165
2166 void
2167 CodeEmitterGM107::emitFLO()
2168 {
2169 switch (insn->src(0).getFile()) {
2170 case FILE_GPR:
2171 emitInsn(0x5c300000);
2172 emitGPR (0x14, insn->src(0));
2173 break;
2174 case FILE_MEMORY_CONST:
2175 emitInsn(0x4c300000);
2176 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2177 break;
2178 case FILE_IMMEDIATE:
2179 emitInsn(0x38300000);
2180 emitIMMD(0x14, 19, insn->src(0));
2181 break;
2182 default:
2183 assert(!"bad src1 file");
2184 break;
2185 }
2186
2187 emitField(0x30, 1, isSignedType(insn->dType));
2188 emitCC (0x2f);
2189 emitField(0x29, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT);
2190 emitINV (0x28, insn->src(0));
2191 emitGPR (0x00, insn->def(0));
2192 }
2193
2194 /*******************************************************************************
2195 * memory
2196 ******************************************************************************/
2197
2198 void
2199 CodeEmitterGM107::emitLDSTs(int pos, DataType type)
2200 {
2201 int data = 0;
2202
2203 switch (typeSizeof(type)) {
2204 case 1: data = isSignedType(type) ? 1 : 0; break;
2205 case 2: data = isSignedType(type) ? 3 : 2; break;
2206 case 4: data = 4; break;
2207 case 8: data = 5; break;
2208 case 16: data = 6; break;
2209 default:
2210 assert(!"bad type");
2211 break;
2212 }
2213
2214 emitField(pos, 3, data);
2215 }
2216
2217 void
2218 CodeEmitterGM107::emitLDSTc(int pos)
2219 {
2220 int mode = 0;
2221
2222 switch (insn->cache) {
2223 case CACHE_CA: mode = 0; break;
2224 case CACHE_CG: mode = 1; break;
2225 case CACHE_CS: mode = 2; break;
2226 case CACHE_CV: mode = 3; break;
2227 default:
2228 assert(!"invalid caching mode");
2229 break;
2230 }
2231
2232 emitField(pos, 2, mode);
2233 }
2234
2235 void
2236 CodeEmitterGM107::emitLDC()
2237 {
2238 emitInsn (0xef900000);
2239 emitLDSTs(0x30, insn->dType);
2240 emitField(0x2c, 2, insn->subOp);
2241 emitCBUF (0x24, 0x08, 0x14, 16, 0, insn->src(0));
2242 emitGPR (0x00, insn->def(0));
2243 }
2244
2245 void
2246 CodeEmitterGM107::emitLDL()
2247 {
2248 emitInsn (0xef400000);
2249 emitLDSTs(0x30, insn->dType);
2250 emitLDSTc(0x2c);
2251 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2252 emitGPR (0x00, insn->def(0));
2253 }
2254
2255 void
2256 CodeEmitterGM107::emitLDS()
2257 {
2258 emitInsn (0xef480000);
2259 emitLDSTs(0x30, insn->dType);
2260 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2261 emitGPR (0x00, insn->def(0));
2262 }
2263
2264 void
2265 CodeEmitterGM107::emitLD()
2266 {
2267 emitInsn (0x80000000);
2268 emitPRED (0x3a);
2269 emitLDSTc(0x38);
2270 emitLDSTs(0x35, insn->dType);
2271 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2272 emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2273 emitGPR (0x00, insn->def(0));
2274 }
2275
2276 void
2277 CodeEmitterGM107::emitSTL()
2278 {
2279 emitInsn (0xef500000);
2280 emitLDSTs(0x30, insn->dType);
2281 emitLDSTc(0x2c);
2282 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2283 emitGPR (0x00, insn->src(1));
2284 }
2285
2286 void
2287 CodeEmitterGM107::emitSTS()
2288 {
2289 emitInsn (0xef580000);
2290 emitLDSTs(0x30, insn->dType);
2291 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2292 emitGPR (0x00, insn->src(1));
2293 }
2294
2295 void
2296 CodeEmitterGM107::emitST()
2297 {
2298 emitInsn (0xa0000000);
2299 emitPRED (0x3a);
2300 emitLDSTc(0x38);
2301 emitLDSTs(0x35, insn->dType);
2302 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2303 emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2304 emitGPR (0x00, insn->src(1));
2305 }
2306
2307 void
2308 CodeEmitterGM107::emitALD()
2309 {
2310 emitInsn (0xefd80000);
2311 emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2312 emitGPR (0x27, insn->src(0).getIndirect(1));
2313 emitO (0x20);
2314 emitP (0x1f);
2315 emitADDR (0x08, 20, 10, 0, insn->src(0));
2316 emitGPR (0x00, insn->def(0));
2317 }
2318
2319 void
2320 CodeEmitterGM107::emitAST()
2321 {
2322 emitInsn (0xeff00000);
2323 emitField(0x2f, 2, (typeSizeof(insn->dType) / 4) - 1);
2324 emitGPR (0x27, insn->src(0).getIndirect(1));
2325 emitP (0x1f);
2326 emitADDR (0x08, 20, 10, 0, insn->src(0));
2327 emitGPR (0x00, insn->src(1));
2328 }
2329
2330 void
2331 CodeEmitterGM107::emitISBERD()
2332 {
2333 emitInsn(0xefd00000);
2334 emitGPR (0x08, insn->src(0));
2335 emitGPR (0x00, insn->def(0));
2336 }
2337
2338 void
2339 CodeEmitterGM107::emitAL2P()
2340 {
2341 emitInsn (0xefa00000);
2342 emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2343 emitPRED (0x2c);
2344 emitO (0x20);
2345 emitField(0x14, 11, insn->src(0).get()->reg.data.offset);
2346 emitGPR (0x08, insn->src(0).getIndirect(0));
2347 emitGPR (0x00, insn->def(0));
2348 }
2349
2350 static void
2351 interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
2352 {
2353 int ipa = entry->ipa;
2354 int reg = entry->reg;
2355 int loc = entry->loc;
2356
2357 if (data.flatshade &&
2358 (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
2359 ipa = NV50_IR_INTERP_FLAT;
2360 reg = 0xff;
2361 } else if (data.force_persample_interp &&
2362 (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
2363 (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
2364 ipa |= NV50_IR_INTERP_CENTROID;
2365 }
2366 code[loc + 1] &= ~(0xf << 0x14);
2367 code[loc + 1] |= (ipa & 0x3) << 0x16;
2368 code[loc + 1] |= (ipa & 0xc) << (0x14 - 2);
2369 code[loc + 0] &= ~(0xff << 0x14);
2370 code[loc + 0] |= reg << 0x14;
2371 }
2372
2373 void
2374 CodeEmitterGM107::emitIPA()
2375 {
2376 int ipam = 0, ipas = 0;
2377
2378 switch (insn->getInterpMode()) {
2379 case NV50_IR_INTERP_LINEAR : ipam = 0; break;
2380 case NV50_IR_INTERP_PERSPECTIVE: ipam = 1; break;
2381 case NV50_IR_INTERP_FLAT : ipam = 2; break;
2382 case NV50_IR_INTERP_SC : ipam = 3; break;
2383 default:
2384 assert(!"invalid ipa mode");
2385 break;
2386 }
2387
2388 switch (insn->getSampleMode()) {
2389 case NV50_IR_INTERP_DEFAULT : ipas = 0; break;
2390 case NV50_IR_INTERP_CENTROID: ipas = 1; break;
2391 case NV50_IR_INTERP_OFFSET : ipas = 2; break;
2392 default:
2393 assert(!"invalid ipa sample mode");
2394 break;
2395 }
2396
2397 emitInsn (0xe0000000);
2398 emitField(0x36, 2, ipam);
2399 emitField(0x34, 2, ipas);
2400 emitSAT (0x33);
2401 emitField(0x2f, 3, 7);
2402 emitADDR (0x08, 0x1c, 10, 0, insn->src(0));
2403 if ((code[0] & 0x0000ff00) != 0x0000ff00)
2404 code[1] |= 0x00000040; /* .idx */
2405 emitGPR(0x00, insn->def(0));
2406
2407 if (insn->op == OP_PINTERP) {
2408 emitGPR(0x14, insn->src(1));
2409 if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2410 emitGPR(0x27, insn->src(2));
2411 addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, interpApply);
2412 } else {
2413 if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2414 emitGPR(0x27, insn->src(1));
2415 emitGPR(0x14);
2416 addInterp(insn->ipa, 0xff, interpApply);
2417 }
2418
2419 if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET)
2420 emitGPR(0x27);
2421 }
2422
2423 void
2424 CodeEmitterGM107::emitATOM()
2425 {
2426 unsigned dType, subOp;
2427
2428 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2429 switch (insn->dType) {
2430 case TYPE_U32: dType = 0; break;
2431 case TYPE_U64: dType = 1; break;
2432 default: assert(!"unexpected dType"); dType = 0; break;
2433 }
2434 subOp = 15;
2435
2436 emitInsn (0xee000000);
2437 } else {
2438 switch (insn->dType) {
2439 case TYPE_U32: dType = 0; break;
2440 case TYPE_S32: dType = 1; break;
2441 case TYPE_U64: dType = 2; break;
2442 case TYPE_F32: dType = 3; break;
2443 case TYPE_B128: dType = 4; break;
2444 case TYPE_S64: dType = 5; break;
2445 default: assert(!"unexpected dType"); dType = 0; break;
2446 }
2447 if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2448 subOp = 8;
2449 else
2450 subOp = insn->subOp;
2451
2452 emitInsn (0xed000000);
2453 }
2454
2455 emitField(0x34, 4, subOp);
2456 emitField(0x31, 3, dType);
2457 emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2458 emitGPR (0x14, insn->src(1));
2459 emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2460 emitGPR (0x00, insn->def(0));
2461 }
2462
2463 void
2464 CodeEmitterGM107::emitATOMS()
2465 {
2466 unsigned dType, subOp;
2467
2468 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2469 switch (insn->dType) {
2470 case TYPE_U32: dType = 0; break;
2471 case TYPE_U64: dType = 1; break;
2472 default: assert(!"unexpected dType"); dType = 0; break;
2473 }
2474 subOp = 4;
2475
2476 emitInsn (0xee000000);
2477 emitField(0x34, 1, dType);
2478 } else {
2479 switch (insn->dType) {
2480 case TYPE_U32: dType = 0; break;
2481 case TYPE_S32: dType = 1; break;
2482 case TYPE_U64: dType = 2; break;
2483 case TYPE_S64: dType = 3; break;
2484 default: assert(!"unexpected dType"); dType = 0; break;
2485 }
2486
2487 if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2488 subOp = 8;
2489 else
2490 subOp = insn->subOp;
2491
2492 emitInsn (0xec000000);
2493 emitField(0x1c, 3, dType);
2494 }
2495
2496 emitField(0x34, 4, subOp);
2497 emitGPR (0x14, insn->src(1));
2498 emitADDR (0x08, 0x12, 22, 0, insn->src(0));
2499 emitGPR (0x00, insn->def(0));
2500 }
2501
2502 void
2503 CodeEmitterGM107::emitRED()
2504 {
2505 unsigned dType;
2506
2507 switch (insn->dType) {
2508 case TYPE_U32: dType = 0; break;
2509 case TYPE_S32: dType = 1; break;
2510 case TYPE_U64: dType = 2; break;
2511 case TYPE_F32: dType = 3; break;
2512 case TYPE_B128: dType = 4; break;
2513 case TYPE_S64: dType = 5; break;
2514 default: assert(!"unexpected dType"); dType = 0; break;
2515 }
2516
2517 emitInsn (0xebf80000);
2518 emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2519 emitField(0x17, 3, insn->subOp);
2520 emitField(0x14, 3, dType);
2521 emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2522 emitGPR (0x00, insn->src(1));
2523 }
2524
2525 void
2526 CodeEmitterGM107::emitCCTL()
2527 {
2528 unsigned width;
2529 if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2530 emitInsn(0xef600000);
2531 width = 30;
2532 } else {
2533 emitInsn(0xef800000);
2534 width = 22;
2535 }
2536 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2537 emitADDR (0x08, 0x16, width, 2, insn->src(0));
2538 emitField(0x00, 4, insn->subOp);
2539 }
2540
2541 /*******************************************************************************
2542 * surface
2543 ******************************************************************************/
2544
2545 void
2546 CodeEmitterGM107::emitPIXLD()
2547 {
2548 emitInsn (0xefe80000);
2549 emitPRED (0x2d);
2550 emitField(0x1f, 3, insn->subOp);
2551 emitGPR (0x08, insn->src(0));
2552 emitGPR (0x00, insn->def(0));
2553 }
2554
2555 /*******************************************************************************
2556 * texture
2557 ******************************************************************************/
2558
2559 void
2560 CodeEmitterGM107::emitTEXs(int pos)
2561 {
2562 int src1 = insn->predSrc == 1 ? 2 : 1;
2563 if (insn->srcExists(src1))
2564 emitGPR(pos, insn->src(src1));
2565 else
2566 emitGPR(pos);
2567 }
2568
2569 void
2570 CodeEmitterGM107::emitTEX()
2571 {
2572 const TexInstruction *insn = this->insn->asTex();
2573 int lodm = 0;
2574
2575 if (!insn->tex.levelZero) {
2576 switch (insn->op) {
2577 case OP_TEX: lodm = 0; break;
2578 case OP_TXB: lodm = 2; break;
2579 case OP_TXL: lodm = 3; break;
2580 default:
2581 assert(!"invalid tex op");
2582 break;
2583 }
2584 } else {
2585 lodm = 1;
2586 }
2587
2588 if (insn->tex.rIndirectSrc >= 0) {
2589 emitInsn (0xdeb80000);
2590 emitField(0x25, 2, lodm);
2591 emitField(0x24, 1, insn->tex.useOffsets == 1);
2592 } else {
2593 emitInsn (0xc0380000);
2594 emitField(0x37, 2, lodm);
2595 emitField(0x36, 1, insn->tex.useOffsets == 1);
2596 emitField(0x24, 13, insn->tex.r);
2597 }
2598
2599 emitField(0x32, 1, insn->tex.target.isShadow());
2600 emitField(0x31, 1, insn->tex.liveOnly);
2601 emitField(0x23, 1, insn->tex.derivAll);
2602 emitField(0x1f, 4, insn->tex.mask);
2603 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2604 insn->tex.target.getDim() - 1);
2605 emitField(0x1c, 1, insn->tex.target.isArray());
2606 emitTEXs (0x14);
2607 emitGPR (0x08, insn->src(0));
2608 emitGPR (0x00, insn->def(0));
2609 }
2610
2611 void
2612 CodeEmitterGM107::emitTLD()
2613 {
2614 const TexInstruction *insn = this->insn->asTex();
2615
2616 if (insn->tex.rIndirectSrc >= 0) {
2617 emitInsn (0xdd380000);
2618 } else {
2619 emitInsn (0xdc380000);
2620 emitField(0x24, 13, insn->tex.r);
2621 }
2622
2623 emitField(0x37, 1, insn->tex.levelZero == 0);
2624 emitField(0x32, 1, insn->tex.target.isMS());
2625 emitField(0x31, 1, insn->tex.liveOnly);
2626 emitField(0x23, 1, insn->tex.useOffsets == 1);
2627 emitField(0x1f, 4, insn->tex.mask);
2628 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2629 insn->tex.target.getDim() - 1);
2630 emitField(0x1c, 1, insn->tex.target.isArray());
2631 emitTEXs (0x14);
2632 emitGPR (0x08, insn->src(0));
2633 emitGPR (0x00, insn->def(0));
2634 }
2635
2636 void
2637 CodeEmitterGM107::emitTLD4()
2638 {
2639 const TexInstruction *insn = this->insn->asTex();
2640
2641 if (insn->tex.rIndirectSrc >= 0) {
2642 emitInsn (0xdef80000);
2643 emitField(0x26, 2, insn->tex.gatherComp);
2644 emitField(0x25, 2, insn->tex.useOffsets == 4);
2645 emitField(0x24, 2, insn->tex.useOffsets == 1);
2646 } else {
2647 emitInsn (0xc8380000);
2648 emitField(0x38, 2, insn->tex.gatherComp);
2649 emitField(0x37, 2, insn->tex.useOffsets == 4);
2650 emitField(0x36, 2, insn->tex.useOffsets == 1);
2651 emitField(0x24, 13, insn->tex.r);
2652 }
2653
2654 emitField(0x32, 1, insn->tex.target.isShadow());
2655 emitField(0x31, 1, insn->tex.liveOnly);
2656 emitField(0x23, 1, insn->tex.derivAll);
2657 emitField(0x1f, 4, insn->tex.mask);
2658 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2659 insn->tex.target.getDim() - 1);
2660 emitField(0x1c, 1, insn->tex.target.isArray());
2661 emitTEXs (0x14);
2662 emitGPR (0x08, insn->src(0));
2663 emitGPR (0x00, insn->def(0));
2664 }
2665
2666 void
2667 CodeEmitterGM107::emitTXD()
2668 {
2669 const TexInstruction *insn = this->insn->asTex();
2670
2671 if (insn->tex.rIndirectSrc >= 0) {
2672 emitInsn (0xde780000);
2673 } else {
2674 emitInsn (0xde380000);
2675 emitField(0x24, 13, insn->tex.r);
2676 }
2677
2678 emitField(0x31, 1, insn->tex.liveOnly);
2679 emitField(0x23, 1, insn->tex.useOffsets == 1);
2680 emitField(0x1f, 4, insn->tex.mask);
2681 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2682 insn->tex.target.getDim() - 1);
2683 emitField(0x1c, 1, insn->tex.target.isArray());
2684 emitTEXs (0x14);
2685 emitGPR (0x08, insn->src(0));
2686 emitGPR (0x00, insn->def(0));
2687 }
2688
2689 void
2690 CodeEmitterGM107::emitTMML()
2691 {
2692 const TexInstruction *insn = this->insn->asTex();
2693
2694 if (insn->tex.rIndirectSrc >= 0) {
2695 emitInsn (0xdf600000);
2696 } else {
2697 emitInsn (0xdf580000);
2698 emitField(0x24, 13, insn->tex.r);
2699 }
2700
2701 emitField(0x31, 1, insn->tex.liveOnly);
2702 emitField(0x23, 1, insn->tex.derivAll);
2703 emitField(0x1f, 4, insn->tex.mask);
2704 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2705 insn->tex.target.getDim() - 1);
2706 emitField(0x1c, 1, insn->tex.target.isArray());
2707 emitTEXs (0x14);
2708 emitGPR (0x08, insn->src(0));
2709 emitGPR (0x00, insn->def(0));
2710 }
2711
2712 void
2713 CodeEmitterGM107::emitTXQ()
2714 {
2715 const TexInstruction *insn = this->insn->asTex();
2716 int type = 0;
2717
2718 switch (insn->tex.query) {
2719 case TXQ_DIMS : type = 0x01; break;
2720 case TXQ_TYPE : type = 0x02; break;
2721 case TXQ_SAMPLE_POSITION: type = 0x05; break;
2722 case TXQ_FILTER : type = 0x10; break;
2723 case TXQ_LOD : type = 0x12; break;
2724 case TXQ_WRAP : type = 0x14; break;
2725 case TXQ_BORDER_COLOUR : type = 0x16; break;
2726 default:
2727 assert(!"invalid txq query");
2728 break;
2729 }
2730
2731 if (insn->tex.rIndirectSrc >= 0) {
2732 emitInsn (0xdf500000);
2733 } else {
2734 emitInsn (0xdf480000);
2735 emitField(0x24, 13, insn->tex.r);
2736 }
2737
2738 emitField(0x31, 1, insn->tex.liveOnly);
2739 emitField(0x1f, 4, insn->tex.mask);
2740 emitField(0x16, 6, type);
2741 emitGPR (0x08, insn->src(0));
2742 emitGPR (0x00, insn->def(0));
2743 }
2744
2745 void
2746 CodeEmitterGM107::emitDEPBAR()
2747 {
2748 emitInsn (0xf0f00000);
2749 emitField(0x1d, 1, 1); /* le */
2750 emitField(0x1a, 3, 5);
2751 emitField(0x14, 6, insn->subOp);
2752 emitField(0x00, 6, insn->subOp);
2753 }
2754
2755 /*******************************************************************************
2756 * misc
2757 ******************************************************************************/
2758
2759 void
2760 CodeEmitterGM107::emitNOP()
2761 {
2762 emitInsn(0x50b00000);
2763 }
2764
2765 void
2766 CodeEmitterGM107::emitKIL()
2767 {
2768 emitInsn (0xe3300000);
2769 emitCond5(0x00, CC_TR);
2770 }
2771
2772 void
2773 CodeEmitterGM107::emitOUT()
2774 {
2775 const int cut = insn->op == OP_RESTART || insn->subOp;
2776 const int emit = insn->op == OP_EMIT;
2777
2778 switch (insn->src(1).getFile()) {
2779 case FILE_GPR:
2780 emitInsn(0xfbe00000);
2781 emitGPR (0x14, insn->src(1));
2782 break;
2783 case FILE_IMMEDIATE:
2784 emitInsn(0xf6e00000);
2785 emitIMMD(0x14, 19, insn->src(1));
2786 break;
2787 case FILE_MEMORY_CONST:
2788 emitInsn(0xebe00000);
2789 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2790 break;
2791 default:
2792 assert(!"bad src1 file");
2793 break;
2794 }
2795
2796 emitField(0x27, 2, (cut << 1) | emit);
2797 emitGPR (0x08, insn->src(0));
2798 emitGPR (0x00, insn->def(0));
2799 }
2800
2801 void
2802 CodeEmitterGM107::emitBAR()
2803 {
2804 uint8_t subop;
2805
2806 emitInsn (0xf0a80000);
2807
2808 switch (insn->subOp) {
2809 case NV50_IR_SUBOP_BAR_RED_POPC: subop = 0x02; break;
2810 case NV50_IR_SUBOP_BAR_RED_AND: subop = 0x0a; break;
2811 case NV50_IR_SUBOP_BAR_RED_OR: subop = 0x12; break;
2812 case NV50_IR_SUBOP_BAR_ARRIVE: subop = 0x81; break;
2813 default:
2814 subop = 0x80;
2815 assert(insn->subOp == NV50_IR_SUBOP_BAR_SYNC);
2816 break;
2817 }
2818
2819 emitField(0x20, 8, subop);
2820
2821 // barrier id
2822 if (insn->src(0).getFile() == FILE_GPR) {
2823 emitGPR(0x08, insn->src(0));
2824 } else {
2825 ImmediateValue *imm = insn->getSrc(0)->asImm();
2826 assert(imm);
2827 emitField(0x08, 8, imm->reg.data.u32);
2828 emitField(0x2b, 1, 1);
2829 }
2830
2831 // thread count
2832 if (insn->src(1).getFile() == FILE_GPR) {
2833 emitGPR(0x14, insn->src(1));
2834 } else {
2835 ImmediateValue *imm = insn->getSrc(0)->asImm();
2836 assert(imm);
2837 emitField(0x14, 12, imm->reg.data.u32);
2838 emitField(0x2c, 1, 1);
2839 }
2840
2841 if (insn->srcExists(2) && (insn->predSrc != 2)) {
2842 emitPRED (0x27, insn->src(2));
2843 emitField(0x2a, 1, insn->src(2).mod == Modifier(NV50_IR_MOD_NOT));
2844 } else {
2845 emitField(0x27, 3, 7);
2846 }
2847 }
2848
2849 void
2850 CodeEmitterGM107::emitMEMBAR()
2851 {
2852 emitInsn (0xef980000);
2853 emitField(0x08, 2, insn->subOp >> 2);
2854 }
2855
2856 void
2857 CodeEmitterGM107::emitVOTE()
2858 {
2859 assert(insn->src(0).getFile() == FILE_PREDICATE);
2860
2861 int r = -1, p = -1;
2862 for (int i = 0; insn->defExists(i); i++) {
2863 if (insn->def(i).getFile() == FILE_GPR)
2864 r = i;
2865 else if (insn->def(i).getFile() == FILE_PREDICATE)
2866 p = i;
2867 }
2868
2869 emitInsn (0x50d80000);
2870 emitField(0x30, 2, insn->subOp);
2871 if (r >= 0)
2872 emitGPR (0x00, insn->def(r));
2873 else
2874 emitGPR (0x00);
2875 if (p >= 0)
2876 emitPRED (0x2d, insn->def(p));
2877 else
2878 emitPRED (0x2d);
2879 emitField(0x2a, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT));
2880 emitPRED (0x27, insn->src(0));
2881 }
2882
2883 void
2884 CodeEmitterGM107::emitSUTarget()
2885 {
2886 const TexInstruction *insn = this->insn->asTex();
2887 int target = 0;
2888
2889 assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
2890
2891 if (insn->tex.target == TEX_TARGET_BUFFER) {
2892 target = 2;
2893 } else if (insn->tex.target == TEX_TARGET_1D_ARRAY) {
2894 target = 4;
2895 } else if (insn->tex.target == TEX_TARGET_2D ||
2896 insn->tex.target == TEX_TARGET_RECT) {
2897 target = 6;
2898 } else if (insn->tex.target == TEX_TARGET_2D_ARRAY ||
2899 insn->tex.target == TEX_TARGET_CUBE ||
2900 insn->tex.target == TEX_TARGET_CUBE_ARRAY) {
2901 target = 8;
2902 } else if (insn->tex.target == TEX_TARGET_3D) {
2903 target = 10;
2904 } else {
2905 assert(insn->tex.target == TEX_TARGET_1D);
2906 }
2907 emitField(0x20, 4, target);
2908 }
2909
2910 void
2911 CodeEmitterGM107::emitSUHandle(const int s)
2912 {
2913 const TexInstruction *insn = this->insn->asTex();
2914
2915 assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
2916
2917 if (insn->src(s).getFile() == FILE_GPR) {
2918 emitGPR(0x27, insn->src(s));
2919 } else {
2920 ImmediateValue *imm = insn->getSrc(s)->asImm();
2921 assert(imm);
2922 emitField(0x33, 1, 1);
2923 emitField(0x24, 13, imm->reg.data.u32);
2924 }
2925 }
2926
2927 void
2928 CodeEmitterGM107::emitSUSTx()
2929 {
2930 const TexInstruction *insn = this->insn->asTex();
2931
2932 emitInsn(0xeb200000);
2933 if (insn->op == OP_SUSTB)
2934 emitField(0x34, 1, 1);
2935 emitSUTarget();
2936
2937 emitLDSTc(0x18);
2938 emitField(0x14, 4, 0xf); // rgba
2939 emitGPR (0x08, insn->src(0));
2940 emitGPR (0x00, insn->src(1));
2941
2942 emitSUHandle(2);
2943 }
2944
2945 void
2946 CodeEmitterGM107::emitSULDx()
2947 {
2948 const TexInstruction *insn = this->insn->asTex();
2949 int type = 0;
2950
2951 emitInsn(0xeb000000);
2952 if (insn->op == OP_SULDB)
2953 emitField(0x34, 1, 1);
2954 emitSUTarget();
2955
2956 switch (insn->dType) {
2957 case TYPE_S8: type = 1; break;
2958 case TYPE_U16: type = 2; break;
2959 case TYPE_S16: type = 3; break;
2960 case TYPE_U32: type = 4; break;
2961 case TYPE_U64: type = 5; break;
2962 case TYPE_B128: type = 6; break;
2963 default:
2964 assert(insn->dType == TYPE_U8);
2965 break;
2966 }
2967 emitLDSTc(0x18);
2968 emitField(0x14, 3, type);
2969 emitGPR (0x00, insn->def(0));
2970 emitGPR (0x08, insn->src(0));
2971
2972 emitSUHandle(1);
2973 }
2974
2975 void
2976 CodeEmitterGM107::emitSUREDx()
2977 {
2978 const TexInstruction *insn = this->insn->asTex();
2979 uint8_t type = 0, subOp;
2980
2981 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS)
2982 emitInsn(0xeac00000);
2983 else
2984 emitInsn(0xea600000);
2985
2986 if (insn->op == OP_SUREDB)
2987 emitField(0x34, 1, 1);
2988 emitSUTarget();
2989
2990 // destination type
2991 switch (insn->dType) {
2992 case TYPE_S32: type = 1; break;
2993 case TYPE_U64: type = 2; break;
2994 case TYPE_F32: type = 3; break;
2995 case TYPE_S64: type = 5; break;
2996 default:
2997 assert(insn->dType == TYPE_U32);
2998 break;
2999 }
3000
3001 // atomic operation
3002 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
3003 subOp = 0;
3004 } else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
3005 subOp = 8;
3006 } else {
3007 subOp = insn->subOp;
3008 }
3009
3010 emitField(0x24, 3, type);
3011 emitField(0x1d, 4, subOp);
3012 emitGPR (0x14, insn->src(1));
3013 emitGPR (0x08, insn->src(0));
3014 emitGPR (0x00, insn->def(0));
3015
3016 emitSUHandle(2);
3017 }
3018
3019 /*******************************************************************************
3020 * assembler front-end
3021 ******************************************************************************/
3022
3023 bool
3024 CodeEmitterGM107::emitInstruction(Instruction *i)
3025 {
3026 const unsigned int size = (writeIssueDelays && !(codeSize & 0x1f)) ? 16 : 8;
3027 bool ret = true;
3028
3029 insn = i;
3030
3031 if (insn->encSize != 8) {
3032 ERROR("skipping undecodable instruction: "); insn->print();
3033 return false;
3034 } else
3035 if (codeSize + size > codeSizeLimit) {
3036 ERROR("code emitter output buffer too small\n");
3037 return false;
3038 }
3039
3040 if (writeIssueDelays) {
3041 int n = ((codeSize & 0x1f) / 8) - 1;
3042 if (n < 0) {
3043 data = code;
3044 data[0] = 0x00000000;
3045 data[1] = 0x00000000;
3046 code += 2;
3047 codeSize += 8;
3048 n++;
3049 }
3050
3051 emitField(data, n * 21, 21, insn->sched);
3052 }
3053
3054 switch (insn->op) {
3055 case OP_EXIT:
3056 emitEXIT();
3057 break;
3058 case OP_BRA:
3059 emitBRA();
3060 break;
3061 case OP_CALL:
3062 emitCAL();
3063 break;
3064 case OP_PRECONT:
3065 emitPCNT();
3066 break;
3067 case OP_CONT:
3068 emitCONT();
3069 break;
3070 case OP_PREBREAK:
3071 emitPBK();
3072 break;
3073 case OP_BREAK:
3074 emitBRK();
3075 break;
3076 case OP_PRERET:
3077 emitPRET();
3078 break;
3079 case OP_RET:
3080 emitRET();
3081 break;
3082 case OP_JOINAT:
3083 emitSSY();
3084 break;
3085 case OP_JOIN:
3086 emitSYNC();
3087 break;
3088 case OP_QUADON:
3089 emitSAM();
3090 break;
3091 case OP_QUADPOP:
3092 emitRAM();
3093 break;
3094 case OP_MOV:
3095 emitMOV();
3096 break;
3097 case OP_RDSV:
3098 emitS2R();
3099 break;
3100 case OP_ABS:
3101 case OP_NEG:
3102 case OP_SAT:
3103 case OP_FLOOR:
3104 case OP_CEIL:
3105 case OP_TRUNC:
3106 case OP_CVT:
3107 if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
3108 insn->src(0).getFile() == FILE_PREDICATE)) {
3109 emitMOV();
3110 } else if (isFloatType(insn->dType)) {
3111 if (isFloatType(insn->sType))
3112 emitF2F();
3113 else
3114 emitI2F();
3115 } else {
3116 if (isFloatType(insn->sType))
3117 emitF2I();
3118 else
3119 emitI2I();
3120 }
3121 break;
3122 case OP_SHFL:
3123 emitSHFL();
3124 break;
3125 case OP_ADD:
3126 case OP_SUB:
3127 if (isFloatType(insn->dType)) {
3128 if (insn->dType == TYPE_F64)
3129 emitDADD();
3130 else
3131 emitFADD();
3132 } else {
3133 emitIADD();
3134 }
3135 break;
3136 case OP_MUL:
3137 if (isFloatType(insn->dType)) {
3138 if (insn->dType == TYPE_F64)
3139 emitDMUL();
3140 else
3141 emitFMUL();
3142 } else {
3143 emitIMUL();
3144 }
3145 break;
3146 case OP_MAD:
3147 case OP_FMA:
3148 if (isFloatType(insn->dType)) {
3149 if (insn->dType == TYPE_F64)
3150 emitDFMA();
3151 else
3152 emitFFMA();
3153 } else {
3154 emitIMAD();
3155 }
3156 break;
3157 case OP_SHLADD:
3158 emitISCADD();
3159 break;
3160 case OP_MIN:
3161 case OP_MAX:
3162 if (isFloatType(insn->dType)) {
3163 if (insn->dType == TYPE_F64)
3164 emitDMNMX();
3165 else
3166 emitFMNMX();
3167 } else {
3168 emitIMNMX();
3169 }
3170 break;
3171 case OP_SHL:
3172 emitSHL();
3173 break;
3174 case OP_SHR:
3175 emitSHR();
3176 break;
3177 case OP_POPCNT:
3178 emitPOPC();
3179 break;
3180 case OP_INSBF:
3181 emitBFI();
3182 break;
3183 case OP_EXTBF:
3184 emitBFE();
3185 break;
3186 case OP_BFIND:
3187 emitFLO();
3188 break;
3189 case OP_SLCT:
3190 if (isFloatType(insn->dType))
3191 emitFCMP();
3192 else
3193 emitICMP();
3194 break;
3195 case OP_SET:
3196 case OP_SET_AND:
3197 case OP_SET_OR:
3198 case OP_SET_XOR:
3199 if (insn->def(0).getFile() != FILE_PREDICATE) {
3200 if (isFloatType(insn->sType))
3201 if (insn->sType == TYPE_F64)
3202 emitDSET();
3203 else
3204 emitFSET();
3205 else
3206 emitISET();
3207 } else {
3208 if (isFloatType(insn->sType))
3209 if (insn->sType == TYPE_F64)
3210 emitDSETP();
3211 else
3212 emitFSETP();
3213 else
3214 emitISETP();
3215 }
3216 break;
3217 case OP_SELP:
3218 emitSEL();
3219 break;
3220 case OP_PRESIN:
3221 case OP_PREEX2:
3222 emitRRO();
3223 break;
3224 case OP_COS:
3225 case OP_SIN:
3226 case OP_EX2:
3227 case OP_LG2:
3228 case OP_RCP:
3229 case OP_RSQ:
3230 emitMUFU();
3231 break;
3232 case OP_AND:
3233 case OP_OR:
3234 case OP_XOR:
3235 emitLOP();
3236 break;
3237 case OP_NOT:
3238 emitNOT();
3239 break;
3240 case OP_LOAD:
3241 switch (insn->src(0).getFile()) {
3242 case FILE_MEMORY_CONST : emitLDC(); break;
3243 case FILE_MEMORY_LOCAL : emitLDL(); break;
3244 case FILE_MEMORY_SHARED: emitLDS(); break;
3245 case FILE_MEMORY_GLOBAL: emitLD(); break;
3246 default:
3247 assert(!"invalid load");
3248 emitNOP();
3249 break;
3250 }
3251 break;
3252 case OP_STORE:
3253 switch (insn->src(0).getFile()) {
3254 case FILE_MEMORY_LOCAL : emitSTL(); break;
3255 case FILE_MEMORY_SHARED: emitSTS(); break;
3256 case FILE_MEMORY_GLOBAL: emitST(); break;
3257 default:
3258 assert(!"invalid store");
3259 emitNOP();
3260 break;
3261 }
3262 break;
3263 case OP_ATOM:
3264 if (insn->src(0).getFile() == FILE_MEMORY_SHARED)
3265 emitATOMS();
3266 else
3267 if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS)
3268 emitRED();
3269 else
3270 emitATOM();
3271 break;
3272 case OP_CCTL:
3273 emitCCTL();
3274 break;
3275 case OP_VFETCH:
3276 emitALD();
3277 break;
3278 case OP_EXPORT:
3279 emitAST();
3280 break;
3281 case OP_PFETCH:
3282 emitISBERD();
3283 break;
3284 case OP_AFETCH:
3285 emitAL2P();
3286 break;
3287 case OP_LINTERP:
3288 case OP_PINTERP:
3289 emitIPA();
3290 break;
3291 case OP_PIXLD:
3292 emitPIXLD();
3293 break;
3294 case OP_TEX:
3295 case OP_TXB:
3296 case OP_TXL:
3297 emitTEX();
3298 break;
3299 case OP_TXF:
3300 emitTLD();
3301 break;
3302 case OP_TXG:
3303 emitTLD4();
3304 break;
3305 case OP_TXD:
3306 emitTXD();
3307 break;
3308 case OP_TXQ:
3309 emitTXQ();
3310 break;
3311 case OP_TXLQ:
3312 emitTMML();
3313 break;
3314 case OP_TEXBAR:
3315 emitDEPBAR();
3316 break;
3317 case OP_QUADOP:
3318 emitFSWZADD();
3319 break;
3320 case OP_NOP:
3321 emitNOP();
3322 break;
3323 case OP_DISCARD:
3324 emitKIL();
3325 break;
3326 case OP_EMIT:
3327 case OP_RESTART:
3328 emitOUT();
3329 break;
3330 case OP_BAR:
3331 emitBAR();
3332 break;
3333 case OP_MEMBAR:
3334 emitMEMBAR();
3335 break;
3336 case OP_VOTE:
3337 emitVOTE();
3338 break;
3339 case OP_SUSTB:
3340 case OP_SUSTP:
3341 emitSUSTx();
3342 break;
3343 case OP_SULDB:
3344 case OP_SULDP:
3345 emitSULDx();
3346 break;
3347 case OP_SUREDB:
3348 case OP_SUREDP:
3349 emitSUREDx();
3350 break;
3351 default:
3352 assert(!"invalid opcode");
3353 emitNOP();
3354 ret = false;
3355 break;
3356 }
3357
3358 if (insn->join) {
3359 /*XXX*/
3360 }
3361
3362 code += 2;
3363 codeSize += 8;
3364 return ret;
3365 }
3366
3367 uint32_t
3368 CodeEmitterGM107::getMinEncodingSize(const Instruction *i) const
3369 {
3370 return 8;
3371 }
3372
3373 /*******************************************************************************
3374 * sched data calculator
3375 ******************************************************************************/
3376
3377 class SchedDataCalculatorGM107 : public Pass
3378 {
3379 public:
3380 SchedDataCalculatorGM107(const TargetGM107 *targ) : targ(targ) {}
3381
3382 private:
3383 struct RegScores
3384 {
3385 struct ScoreData {
3386 int r[256];
3387 int p[8];
3388 int c;
3389 } rd, wr;
3390 int base;
3391
3392 void rebase(const int base)
3393 {
3394 const int delta = this->base - base;
3395 if (!delta)
3396 return;
3397 this->base = 0;
3398
3399 for (int i = 0; i < 256; ++i) {
3400 rd.r[i] += delta;
3401 wr.r[i] += delta;
3402 }
3403 for (int i = 0; i < 8; ++i) {
3404 rd.p[i] += delta;
3405 wr.p[i] += delta;
3406 }
3407 rd.c += delta;
3408 wr.c += delta;
3409 }
3410 void wipe()
3411 {
3412 memset(&rd, 0, sizeof(rd));
3413 memset(&wr, 0, sizeof(wr));
3414 }
3415 int getLatest(const ScoreData& d) const
3416 {
3417 int max = 0;
3418 for (int i = 0; i < 256; ++i)
3419 if (d.r[i] > max)
3420 max = d.r[i];
3421 for (int i = 0; i < 8; ++i)
3422 if (d.p[i] > max)
3423 max = d.p[i];
3424 if (d.c > max)
3425 max = d.c;
3426 return max;
3427 }
3428 inline int getLatestRd() const
3429 {
3430 return getLatest(rd);
3431 }
3432 inline int getLatestWr() const
3433 {
3434 return getLatest(wr);
3435 }
3436 inline int getLatest() const
3437 {
3438 return MAX2(getLatestRd(), getLatestWr());
3439 }
3440 void setMax(const RegScores *that)
3441 {
3442 for (int i = 0; i < 256; ++i) {
3443 rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
3444 wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
3445 }
3446 for (int i = 0; i < 8; ++i) {
3447 rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
3448 wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
3449 }
3450 rd.c = MAX2(rd.c, that->rd.c);
3451 wr.c = MAX2(wr.c, that->wr.c);
3452 }
3453 void print(int cycle)
3454 {
3455 for (int i = 0; i < 256; ++i) {
3456 if (rd.r[i] > cycle)
3457 INFO("rd $r%i @ %i\n", i, rd.r[i]);
3458 if (wr.r[i] > cycle)
3459 INFO("wr $r%i @ %i\n", i, wr.r[i]);
3460 }
3461 for (int i = 0; i < 8; ++i) {
3462 if (rd.p[i] > cycle)
3463 INFO("rd $p%i @ %i\n", i, rd.p[i]);
3464 if (wr.p[i] > cycle)
3465 INFO("wr $p%i @ %i\n", i, wr.p[i]);
3466 }
3467 if (rd.c > cycle)
3468 INFO("rd $c @ %i\n", rd.c);
3469 if (wr.c > cycle)
3470 INFO("wr $c @ %i\n", wr.c);
3471 }
3472 };
3473
3474 RegScores *score; // for current BB
3475 std::vector<RegScores> scoreBoards;
3476
3477 const TargetGM107 *targ;
3478 bool visit(Function *);
3479 bool visit(BasicBlock *);
3480
3481 void commitInsn(const Instruction *, int);
3482 int calcDelay(const Instruction *, int) const;
3483 void setDelay(Instruction *, int, const Instruction *);
3484 void recordWr(const Value *, int, int);
3485 void checkRd(const Value *, int, int&) const;
3486
3487 inline void emitYield(Instruction *);
3488 inline void emitStall(Instruction *, uint8_t);
3489 inline void emitReuse(Instruction *, uint8_t);
3490 inline void emitWrDepBar(Instruction *, uint8_t);
3491 inline void emitRdDepBar(Instruction *, uint8_t);
3492 inline void emitWtDepBar(Instruction *, uint8_t);
3493
3494 inline int getStall(const Instruction *) const;
3495 inline int getWrDepBar(const Instruction *) const;
3496 inline int getRdDepBar(const Instruction *) const;
3497 inline int getWtDepBar(const Instruction *) const;
3498
3499 void setReuseFlag(Instruction *);
3500
3501 inline void printSchedInfo(int, const Instruction *) const;
3502
3503 struct LiveBarUse {
3504 LiveBarUse(Instruction *insn, Instruction *usei)
3505 : insn(insn), usei(usei) { }
3506 Instruction *insn;
3507 Instruction *usei;
3508 };
3509
3510 struct LiveBarDef {
3511 LiveBarDef(Instruction *insn, Instruction *defi)
3512 : insn(insn), defi(defi) { }
3513 Instruction *insn;
3514 Instruction *defi;
3515 };
3516
3517 bool insertBarriers(BasicBlock *);
3518
3519 Instruction *findFirstUse(const Instruction *) const;
3520 Instruction *findFirstDef(const Instruction *) const;
3521
3522 bool needRdDepBar(const Instruction *) const;
3523 bool needWrDepBar(const Instruction *) const;
3524 };
3525
3526 inline void
3527 SchedDataCalculatorGM107::emitStall(Instruction *insn, uint8_t cnt)
3528 {
3529 assert(cnt < 16);
3530 insn->sched |= cnt;
3531 }
3532
3533 inline void
3534 SchedDataCalculatorGM107::emitYield(Instruction *insn)
3535 {
3536 insn->sched |= 1 << 4;
3537 }
3538
3539 inline void
3540 SchedDataCalculatorGM107::emitWrDepBar(Instruction *insn, uint8_t id)
3541 {
3542 assert(id < 6);
3543 if ((insn->sched & 0xe0) == 0xe0)
3544 insn->sched ^= 0xe0;
3545 insn->sched |= id << 5;
3546 }
3547
3548 inline void
3549 SchedDataCalculatorGM107::emitRdDepBar(Instruction *insn, uint8_t id)
3550 {
3551 assert(id < 6);
3552 if ((insn->sched & 0x700) == 0x700)
3553 insn->sched ^= 0x700;
3554 insn->sched |= id << 8;
3555 }
3556
3557 inline void
3558 SchedDataCalculatorGM107::emitWtDepBar(Instruction *insn, uint8_t id)
3559 {
3560 assert(id < 6);
3561 insn->sched |= 1 << (11 + id);
3562 }
3563
3564 inline void
3565 SchedDataCalculatorGM107::emitReuse(Instruction *insn, uint8_t id)
3566 {
3567 assert(id < 4);
3568 insn->sched |= 1 << (17 + id);
3569 }
3570
3571 inline void
3572 SchedDataCalculatorGM107::printSchedInfo(int cycle,
3573 const Instruction *insn) const
3574 {
3575 uint8_t st, yl, wr, rd, wt, ru;
3576
3577 st = (insn->sched & 0x00000f) >> 0;
3578 yl = (insn->sched & 0x000010) >> 4;
3579 wr = (insn->sched & 0x0000e0) >> 5;
3580 rd = (insn->sched & 0x000700) >> 8;
3581 wt = (insn->sched & 0x01f800) >> 11;
3582 ru = (insn->sched & 0x1e0000) >> 17;
3583
3584 INFO("cycle %i, (st 0x%x, yl 0x%x, wr 0x%x, rd 0x%x, wt 0x%x, ru 0x%x)\n",
3585 cycle, st, yl, wr, rd, wt, ru);
3586 }
3587
3588 inline int
3589 SchedDataCalculatorGM107::getStall(const Instruction *insn) const
3590 {
3591 return insn->sched & 0xf;
3592 }
3593
3594 inline int
3595 SchedDataCalculatorGM107::getWrDepBar(const Instruction *insn) const
3596 {
3597 return (insn->sched & 0x0000e0) >> 5;
3598 }
3599
3600 inline int
3601 SchedDataCalculatorGM107::getRdDepBar(const Instruction *insn) const
3602 {
3603 return (insn->sched & 0x000700) >> 8;
3604 }
3605
3606 inline int
3607 SchedDataCalculatorGM107::getWtDepBar(const Instruction *insn) const
3608 {
3609 return (insn->sched & 0x01f800) >> 11;
3610 }
3611
3612 // Emit the reuse flag which allows to make use of the new memory hierarchy
3613 // introduced since Maxwell, the operand reuse cache.
3614 //
3615 // It allows to reduce bank conflicts by caching operands. Each time you issue
3616 // an instruction, that flag can tell the hw which operands are going to be
3617 // re-used by the next instruction. Note that the next instruction has to use
3618 // the same GPR id in the same operand slot.
3619 void
3620 SchedDataCalculatorGM107::setReuseFlag(Instruction *insn)
3621 {
3622 Instruction *next = insn->next;
3623 BitSet defs(255, 1);
3624
3625 if (!targ->isReuseSupported(insn))
3626 return;
3627
3628 for (int d = 0; insn->defExists(d); ++d) {
3629 const Value *def = insn->def(d).rep();
3630 if (insn->def(d).getFile() != FILE_GPR)
3631 continue;
3632 if (typeSizeof(insn->dType) != 4 || def->reg.data.id == 255)
3633 continue;
3634 defs.set(def->reg.data.id);
3635 }
3636
3637 for (int s = 0; insn->srcExists(s); s++) {
3638 const Value *src = insn->src(s).rep();
3639 if (insn->src(s).getFile() != FILE_GPR)
3640 continue;
3641 if (typeSizeof(insn->sType) != 4 || src->reg.data.id == 255)
3642 continue;
3643 if (defs.test(src->reg.data.id))
3644 continue;
3645 if (!next->srcExists(s) || next->src(s).getFile() != FILE_GPR)
3646 continue;
3647 if (src->reg.data.id != next->getSrc(s)->reg.data.id)
3648 continue;
3649 assert(s < 4);
3650 emitReuse(insn, s);
3651 }
3652 }
3653
3654 void
3655 SchedDataCalculatorGM107::recordWr(const Value *v, int cycle, int ready)
3656 {
3657 int a = v->reg.data.id, b;
3658
3659 switch (v->reg.file) {
3660 case FILE_GPR:
3661 b = a + v->reg.size / 4;
3662 for (int r = a; r < b; ++r)
3663 score->rd.r[r] = ready;
3664 break;
3665 case FILE_PREDICATE:
3666 // To immediately use a predicate set by any instructions, the minimum
3667 // number of stall counts is 13.
3668 score->rd.p[a] = cycle + 13;
3669 break;
3670 case FILE_FLAGS:
3671 score->rd.c = ready;
3672 break;
3673 default:
3674 break;
3675 }
3676 }
3677
3678 void
3679 SchedDataCalculatorGM107::checkRd(const Value *v, int cycle, int &delay) const
3680 {
3681 int a = v->reg.data.id, b;
3682 int ready = cycle;
3683
3684 switch (v->reg.file) {
3685 case FILE_GPR:
3686 b = a + v->reg.size / 4;
3687 for (int r = a; r < b; ++r)
3688 ready = MAX2(ready, score->rd.r[r]);
3689 break;
3690 case FILE_PREDICATE:
3691 ready = MAX2(ready, score->rd.p[a]);
3692 break;
3693 case FILE_FLAGS:
3694 ready = MAX2(ready, score->rd.c);
3695 break;
3696 default:
3697 break;
3698 }
3699 if (cycle < ready)
3700 delay = MAX2(delay, ready - cycle);
3701 }
3702
3703 void
3704 SchedDataCalculatorGM107::commitInsn(const Instruction *insn, int cycle)
3705 {
3706 const int ready = cycle + targ->getLatency(insn);
3707
3708 for (int d = 0; insn->defExists(d); ++d)
3709 recordWr(insn->getDef(d), cycle, ready);
3710
3711 #ifdef GM107_DEBUG_SCHED_DATA
3712 score->print(cycle);
3713 #endif
3714 }
3715
3716 #define GM107_MIN_ISSUE_DELAY 0x1
3717 #define GM107_MAX_ISSUE_DELAY 0xf
3718
3719 int
3720 SchedDataCalculatorGM107::calcDelay(const Instruction *insn, int cycle) const
3721 {
3722 int delay = 0, ready = cycle;
3723
3724 for (int s = 0; insn->srcExists(s); ++s)
3725 checkRd(insn->getSrc(s), cycle, delay);
3726
3727 // TODO: make use of getReadLatency()!
3728
3729 return MAX2(delay, ready - cycle);
3730 }
3731
3732 void
3733 SchedDataCalculatorGM107::setDelay(Instruction *insn, int delay,
3734 const Instruction *next)
3735 {
3736 const OpClass cl = targ->getOpClass(insn->op);
3737 int wr, rd;
3738
3739 if (insn->op == OP_EXIT ||
3740 insn->op == OP_BAR ||
3741 insn->op == OP_MEMBAR) {
3742 delay = GM107_MAX_ISSUE_DELAY;
3743 } else
3744 if (insn->op == OP_QUADON ||
3745 insn->op == OP_QUADPOP) {
3746 delay = 0xd;
3747 } else
3748 if (cl == OPCLASS_FLOW || insn->join) {
3749 delay = 0xd;
3750 }
3751
3752 if (!next || !targ->canDualIssue(insn, next)) {
3753 delay = CLAMP(delay, GM107_MIN_ISSUE_DELAY, GM107_MAX_ISSUE_DELAY);
3754 } else {
3755 delay = 0x0; // dual-issue
3756 }
3757
3758 wr = getWrDepBar(insn);
3759 rd = getRdDepBar(insn);
3760
3761 if (delay == GM107_MIN_ISSUE_DELAY && (wr & rd) != 7) {
3762 // Barriers take one additional clock cycle to become active on top of
3763 // the clock consumed by the instruction producing it.
3764 if (!next || insn->bb != next->bb) {
3765 delay = 0x2;
3766 } else {
3767 int wt = getWtDepBar(next);
3768 if ((wt & (1 << wr)) | (wt & (1 << rd)))
3769 delay = 0x2;
3770 }
3771 }
3772
3773 emitStall(insn, delay);
3774 }
3775
3776
3777 // Return true when the given instruction needs to emit a read dependency
3778 // barrier (for WaR hazards) because it doesn't operate at a fixed latency, and
3779 // setting the maximum number of stall counts is not enough.
3780 bool
3781 SchedDataCalculatorGM107::needRdDepBar(const Instruction *insn) const
3782 {
3783 BitSet srcs(255, 1), defs(255, 1);
3784 int a, b;
3785
3786 if (!targ->isBarrierRequired(insn))
3787 return false;
3788
3789 // Do not emit a read dependency barrier when the instruction doesn't use
3790 // any GPR (like st s[0x4] 0x0) as input because it's unnecessary.
3791 for (int s = 0; insn->srcExists(s); ++s) {
3792 const Value *src = insn->src(s).rep();
3793 if (insn->src(s).getFile() != FILE_GPR)
3794 continue;
3795 if (src->reg.data.id == 255)
3796 continue;
3797
3798 a = src->reg.data.id;
3799 b = a + src->reg.size / 4;
3800 for (int r = a; r < b; ++r)
3801 srcs.set(r);
3802 }
3803
3804 if (!srcs.popCount())
3805 return false;
3806
3807 // Do not emit a read dependency barrier when the output GPRs are equal to
3808 // the input GPRs (like rcp $r0 $r0) because a write dependency barrier will
3809 // be produced and WaR hazards are prevented.
3810 for (int d = 0; insn->defExists(d); ++d) {
3811 const Value *def = insn->def(d).rep();
3812 if (insn->def(d).getFile() != FILE_GPR)
3813 continue;
3814 if (def->reg.data.id == 255)
3815 continue;
3816
3817 a = def->reg.data.id;
3818 b = a + def->reg.size / 4;
3819 for (int r = a; r < b; ++r)
3820 defs.set(r);
3821 }
3822
3823 srcs.andNot(defs);
3824 if (!srcs.popCount())
3825 return false;
3826
3827 return true;
3828 }
3829
3830 // Return true when the given instruction needs to emit a write dependency
3831 // barrier (for RaW hazards) because it doesn't operate at a fixed latency, and
3832 // setting the maximum number of stall counts is not enough. This is only legal
3833 // if the instruction output something.
3834 bool
3835 SchedDataCalculatorGM107::needWrDepBar(const Instruction *insn) const
3836 {
3837 if (!targ->isBarrierRequired(insn))
3838 return false;
3839
3840 for (int d = 0; insn->defExists(d); ++d) {
3841 if (insn->def(d).getFile() == FILE_GPR ||
3842 insn->def(d).getFile() == FILE_PREDICATE)
3843 return true;
3844 }
3845 return false;
3846 }
3847
3848 // Find the next instruction inside the same basic block which uses the output
3849 // of the given instruction in order to avoid RaW hazards.
3850 Instruction *
3851 SchedDataCalculatorGM107::findFirstUse(const Instruction *bari) const
3852 {
3853 Instruction *insn, *next;
3854 int minGPR, maxGPR;
3855
3856 if (!bari->defExists(0))
3857 return NULL;
3858
3859 minGPR = bari->def(0).rep()->reg.data.id;
3860 maxGPR = minGPR + bari->def(0).rep()->reg.size / 4 - 1;
3861
3862 for (insn = bari->next; insn != NULL; insn = next) {
3863 next = insn->next;
3864
3865 for (int s = 0; insn->srcExists(s); ++s) {
3866 const Value *src = insn->src(s).rep();
3867 if (bari->def(0).getFile() == FILE_GPR) {
3868 if (insn->src(s).getFile() != FILE_GPR ||
3869 src->reg.data.id + src->reg.size / 4 - 1 < minGPR ||
3870 src->reg.data.id > maxGPR)
3871 continue;
3872 return insn;
3873 } else
3874 if (bari->def(0).getFile() == FILE_PREDICATE) {
3875 if (insn->src(s).getFile() != FILE_PREDICATE ||
3876 src->reg.data.id != minGPR)
3877 continue;
3878 return insn;
3879 }
3880 }
3881 }
3882 return NULL;
3883 }
3884
3885 // Find the next instruction inside the same basic block which overwrites, at
3886 // least, one source of the given instruction in order to avoid WaR hazards.
3887 Instruction *
3888 SchedDataCalculatorGM107::findFirstDef(const Instruction *bari) const
3889 {
3890 Instruction *insn, *next;
3891 int minGPR, maxGPR;
3892
3893 for (insn = bari->next; insn != NULL; insn = next) {
3894 next = insn->next;
3895
3896 for (int d = 0; insn->defExists(d); ++d) {
3897 const Value *def = insn->def(d).rep();
3898 if (insn->def(d).getFile() != FILE_GPR)
3899 continue;
3900
3901 minGPR = def->reg.data.id;
3902 maxGPR = minGPR + def->reg.size / 4 - 1;
3903
3904 for (int s = 0; bari->srcExists(s); ++s) {
3905 const Value *src = bari->src(s).rep();
3906 if (bari->src(s).getFile() != FILE_GPR ||
3907 src->reg.data.id + src->reg.size / 4 - 1 < minGPR ||
3908 src->reg.data.id > maxGPR)
3909 continue;
3910 return insn;
3911 }
3912 }
3913 }
3914 return NULL;
3915 }
3916
3917 // Dependency barriers:
3918 // This pass is a bit ugly and could probably be improved by performing a
3919 // better allocation.
3920 //
3921 // The main idea is to avoid WaR and RaW hazards by emitting read/write
3922 // dependency barriers using the control codes.
3923 bool
3924 SchedDataCalculatorGM107::insertBarriers(BasicBlock *bb)
3925 {
3926 std::list<LiveBarUse> live_uses;
3927 std::list<LiveBarDef> live_defs;
3928 Instruction *insn, *next;
3929 BitSet bars(6, 1);
3930 int bar_id;
3931
3932 for (insn = bb->getEntry(); insn != NULL; insn = next) {
3933 Instruction *usei = NULL, *defi = NULL;
3934 bool need_wr_bar, need_rd_bar;
3935
3936 next = insn->next;
3937
3938 // Expire old barrier uses.
3939 for (std::list<LiveBarUse>::iterator it = live_uses.begin();
3940 it != live_uses.end();) {
3941 if (insn->serial >= it->usei->serial) {
3942 int wr = getWrDepBar(it->insn);
3943 emitWtDepBar(insn, wr);
3944 bars.clr(wr); // free barrier
3945 it = live_uses.erase(it);
3946 continue;
3947 }
3948 ++it;
3949 }
3950
3951 // Expire old barrier defs.
3952 for (std::list<LiveBarDef>::iterator it = live_defs.begin();
3953 it != live_defs.end();) {
3954 if (insn->serial >= it->defi->serial) {
3955 int rd = getRdDepBar(it->insn);
3956 emitWtDepBar(insn, rd);
3957 bars.clr(rd); // free barrier
3958 it = live_defs.erase(it);
3959 continue;
3960 }
3961 ++it;
3962 }
3963
3964 need_wr_bar = needWrDepBar(insn);
3965 need_rd_bar = needRdDepBar(insn);
3966
3967 if (need_wr_bar) {
3968 // When the instruction requires to emit a write dependency barrier
3969 // (all which write something at a variable latency), find the next
3970 // instruction which reads the outputs.
3971 usei = findFirstUse(insn);
3972
3973 // Allocate and emit a new barrier.
3974 bar_id = bars.findFreeRange(1);
3975 if (bar_id == -1)
3976 bar_id = 5;
3977 bars.set(bar_id);
3978 emitWrDepBar(insn, bar_id);
3979 if (usei)
3980 live_uses.push_back(LiveBarUse(insn, usei));
3981 }
3982
3983 if (need_rd_bar) {
3984 // When the instruction requires to emit a read dependency barrier
3985 // (all which read something at a variable latency), find the next
3986 // instruction which will write the inputs.
3987 defi = findFirstDef(insn);
3988
3989 if (usei && defi && usei->serial <= defi->serial)
3990 continue;
3991
3992 // Allocate and emit a new barrier.
3993 bar_id = bars.findFreeRange(1);
3994 if (bar_id == -1)
3995 bar_id = 5;
3996 bars.set(bar_id);
3997 emitRdDepBar(insn, bar_id);
3998 if (defi)
3999 live_defs.push_back(LiveBarDef(insn, defi));
4000 }
4001 }
4002
4003 // Remove unnecessary barrier waits.
4004 BitSet alive_bars(6, 1);
4005 for (insn = bb->getEntry(); insn != NULL; insn = next) {
4006 int wr, rd, wt;
4007
4008 next = insn->next;
4009
4010 wr = getWrDepBar(insn);
4011 rd = getRdDepBar(insn);
4012 wt = getWtDepBar(insn);
4013
4014 for (int idx = 0; idx < 6; ++idx) {
4015 if (!(wt & (1 << idx)))
4016 continue;
4017 if (!alive_bars.test(idx)) {
4018 insn->sched &= ~(1 << (11 + idx));
4019 } else {
4020 alive_bars.clr(idx);
4021 }
4022 }
4023
4024 if (wr < 6)
4025 alive_bars.set(wr);
4026 if (rd < 6)
4027 alive_bars.set(rd);
4028 }
4029
4030 return true;
4031 }
4032
4033 bool
4034 SchedDataCalculatorGM107::visit(Function *func)
4035 {
4036 ArrayList insns;
4037
4038 func->orderInstructions(insns);
4039
4040 scoreBoards.resize(func->cfg.getSize());
4041 for (size_t i = 0; i < scoreBoards.size(); ++i)
4042 scoreBoards[i].wipe();
4043 return true;
4044 }
4045
4046 bool
4047 SchedDataCalculatorGM107::visit(BasicBlock *bb)
4048 {
4049 Instruction *insn, *next = NULL;
4050 int cycle = 0;
4051
4052 for (Instruction *insn = bb->getEntry(); insn; insn = insn->next) {
4053 /*XXX*/
4054 insn->sched = 0x7e0;
4055 }
4056
4057 if (!debug_get_bool_option("NV50_PROG_SCHED", true))
4058 return true;
4059
4060 // Insert read/write dependency barriers for instructions which don't
4061 // operate at a fixed latency.
4062 insertBarriers(bb);
4063
4064 score = &scoreBoards.at(bb->getId());
4065
4066 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
4067 // back branches will wait until all target dependencies are satisfied
4068 if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
4069 continue;
4070 BasicBlock *in = BasicBlock::get(ei.getNode());
4071 score->setMax(&scoreBoards.at(in->getId()));
4072 }
4073
4074 #ifdef GM107_DEBUG_SCHED_DATA
4075 INFO("=== BB:%i initial scores\n", bb->getId());
4076 score->print(cycle);
4077 #endif
4078
4079 // Because barriers are allocated locally (intra-BB), we have to make sure
4080 // that all produced barriers have been consumed before entering inside a
4081 // new basic block. The best way is to do a global allocation pre RA but
4082 // it's really more difficult, especially because of the phi nodes. Anyways,
4083 // it seems like that waiting on a barrier which has already been consumed
4084 // doesn't add any additional cost, it's just not elegant!
4085 Instruction *start = bb->getEntry();
4086 if (start && bb->cfg.incidentCount() > 0) {
4087 for (int b = 0; b < 6; b++)
4088 emitWtDepBar(start, b);
4089 }
4090
4091 for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
4092 next = insn->next;
4093
4094 commitInsn(insn, cycle);
4095 int delay = calcDelay(next, cycle);
4096 setDelay(insn, delay, next);
4097 cycle += getStall(insn);
4098
4099 setReuseFlag(insn);
4100
4101 // XXX: The yield flag seems to destroy a bunch of things when it is
4102 // set on every instruction, need investigation.
4103 //emitYield(insn);
4104
4105 #ifdef GM107_DEBUG_SCHED_DATA
4106 printSchedInfo(cycle, insn);
4107 insn->print();
4108 next->print();
4109 #endif
4110 }
4111
4112 if (!insn)
4113 return true;
4114 commitInsn(insn, cycle);
4115
4116 int bbDelay = -1;
4117
4118 #ifdef GM107_DEBUG_SCHED_DATA
4119 fprintf(stderr, "last instruction is : ");
4120 insn->print();
4121 fprintf(stderr, "cycle=%d\n", cycle);
4122 #endif
4123
4124 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
4125 BasicBlock *out = BasicBlock::get(ei.getNode());
4126
4127 if (ei.getType() != Graph::Edge::BACK) {
4128 // Only test the first instruction of the outgoing block.
4129 next = out->getEntry();
4130 if (next) {
4131 bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
4132 } else {
4133 // When the outgoing BB is empty, make sure to set the number of
4134 // stall counts needed by the instruction because we don't know the
4135 // next instruction.
4136 bbDelay = MAX2(bbDelay, targ->getLatency(insn));
4137 }
4138 } else {
4139 // Wait until all dependencies are satisfied.
4140 const int regsFree = score->getLatest();
4141 next = out->getFirst();
4142 for (int c = cycle; next && c < regsFree; next = next->next) {
4143 bbDelay = MAX2(bbDelay, calcDelay(next, c));
4144 c += getStall(next);
4145 }
4146 next = NULL;
4147 }
4148 }
4149 if (bb->cfg.outgoingCount() != 1)
4150 next = NULL;
4151 setDelay(insn, bbDelay, next);
4152 cycle += getStall(insn);
4153
4154 score->rebase(cycle); // common base for initializing out blocks' scores
4155 return true;
4156 }
4157
4158 /*******************************************************************************
4159 * main
4160 ******************************************************************************/
4161
4162 void
4163 CodeEmitterGM107::prepareEmission(Function *func)
4164 {
4165 SchedDataCalculatorGM107 sched(targGM107);
4166 CodeEmitter::prepareEmission(func);
4167 sched.run(func, true, true);
4168 }
4169
4170 static inline uint32_t sizeToBundlesGM107(uint32_t size)
4171 {
4172 return (size + 23) / 24;
4173 }
4174
4175 void
4176 CodeEmitterGM107::prepareEmission(Program *prog)
4177 {
4178 for (ArrayList::Iterator fi = prog->allFuncs.iterator();
4179 !fi.end(); fi.next()) {
4180 Function *func = reinterpret_cast<Function *>(fi.get());
4181 func->binPos = prog->binSize;
4182 prepareEmission(func);
4183
4184 // adjust sizes & positions for schedulding info:
4185 if (prog->getTarget()->hasSWSched) {
4186 uint32_t adjPos = func->binPos;
4187 BasicBlock *bb = NULL;
4188 for (int i = 0; i < func->bbCount; ++i) {
4189 bb = func->bbArray[i];
4190 int32_t adjSize = bb->binSize;
4191 if (adjPos % 32) {
4192 adjSize -= 32 - adjPos % 32;
4193 if (adjSize < 0)
4194 adjSize = 0;
4195 }
4196 adjSize = bb->binSize + sizeToBundlesGM107(adjSize) * 8;
4197 bb->binPos = adjPos;
4198 bb->binSize = adjSize;
4199 adjPos += adjSize;
4200 }
4201 if (bb)
4202 func->binSize = adjPos - func->binPos;
4203 }
4204
4205 prog->binSize += func->binSize;
4206 }
4207 }
4208
4209 CodeEmitterGM107::CodeEmitterGM107(const TargetGM107 *target)
4210 : CodeEmitter(target),
4211 targGM107(target),
4212 writeIssueDelays(target->hasSWSched)
4213 {
4214 code = NULL;
4215 codeSize = codeSizeLimit = 0;
4216 relocInfo = NULL;
4217 }
4218
4219 CodeEmitter *
4220 TargetGM107::createCodeEmitterGM107(Program::Type type)
4221 {
4222 CodeEmitterGM107 *emit = new CodeEmitterGM107(this);
4223 emit->setProgramType(type);
4224 return emit;
4225 }
4226
4227 } // namespace nv50_ir