f3ebd2b0be8f7b52a54d0300ea130d251cf537e1
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_emit_gm107.cpp
1 /*
2 * Copyright 2014 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Ben Skeggs <bskeggs@redhat.com>
23 */
24
25 #include "codegen/nv50_ir_target_gm107.h"
26
27 //#define GM107_DEBUG_SCHED_DATA
28
29 namespace nv50_ir {
30
31 class CodeEmitterGM107 : public CodeEmitter
32 {
33 public:
34 CodeEmitterGM107(const TargetGM107 *);
35
36 virtual bool emitInstruction(Instruction *);
37 virtual uint32_t getMinEncodingSize(const Instruction *) const;
38
39 virtual void prepareEmission(Program *);
40 virtual void prepareEmission(Function *);
41
42 inline void setProgramType(Program::Type pType) { progType = pType; }
43
44 private:
45 const TargetGM107 *targGM107;
46
47 Program::Type progType;
48
49 const Instruction *insn;
50 const bool writeIssueDelays;
51 uint32_t *data;
52
53 private:
54 inline void emitField(uint32_t *, int, int, uint32_t);
55 inline void emitField(int b, int s, uint32_t v) { emitField(code, b, s, v); }
56
57 inline void emitInsn(uint32_t, bool);
58 inline void emitInsn(uint32_t o) { emitInsn(o, true); }
59 inline void emitPred();
60 inline void emitGPR(int, const Value *);
61 inline void emitGPR(int pos) {
62 emitGPR(pos, (const Value *)NULL);
63 }
64 inline void emitGPR(int pos, const ValueRef &ref) {
65 emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL);
66 }
67 inline void emitGPR(int pos, const ValueRef *ref) {
68 emitGPR(pos, ref ? ref->rep() : (const Value *)NULL);
69 }
70 inline void emitGPR(int pos, const ValueDef &def) {
71 emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL);
72 }
73 inline void emitSYS(int, const Value *);
74 inline void emitSYS(int pos, const ValueRef &ref) {
75 emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL);
76 }
77 inline void emitPRED(int, const Value *);
78 inline void emitPRED(int pos) {
79 emitPRED(pos, (const Value *)NULL);
80 }
81 inline void emitPRED(int pos, const ValueRef &ref) {
82 emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL);
83 }
84 inline void emitPRED(int pos, const ValueDef &def) {
85 emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL);
86 }
87 inline void emitADDR(int, int, int, int, const ValueRef &);
88 inline void emitCBUF(int, int, int, int, int, const ValueRef &);
89 inline bool longIMMD(const ValueRef &);
90 inline void emitIMMD(int, int, const ValueRef &);
91
92 void emitCond3(int, CondCode);
93 void emitCond4(int, CondCode);
94 void emitCond5(int pos, CondCode cc) { emitCond4(pos, cc); }
95 inline void emitO(int);
96 inline void emitP(int);
97 inline void emitSAT(int);
98 inline void emitCC(int);
99 inline void emitX(int);
100 inline void emitABS(int, const ValueRef &);
101 inline void emitNEG(int, const ValueRef &);
102 inline void emitNEG2(int, const ValueRef &, const ValueRef &);
103 inline void emitFMZ(int, int);
104 inline void emitRND(int, RoundMode, int);
105 inline void emitRND(int pos) {
106 emitRND(pos, insn->rnd, -1);
107 }
108 inline void emitPDIV(int);
109 inline void emitINV(int, const ValueRef &);
110
111 void emitEXIT();
112 void emitBRA();
113 void emitCAL();
114 void emitPCNT();
115 void emitCONT();
116 void emitPBK();
117 void emitBRK();
118 void emitPRET();
119 void emitRET();
120 void emitSSY();
121 void emitSYNC();
122 void emitSAM();
123 void emitRAM();
124
125 void emitMOV();
126 void emitS2R();
127 void emitF2F();
128 void emitF2I();
129 void emitI2F();
130 void emitI2I();
131 void emitSEL();
132 void emitSHFL();
133
134 void emitDADD();
135 void emitDMUL();
136 void emitDFMA();
137 void emitDMNMX();
138 void emitDSET();
139 void emitDSETP();
140
141 void emitFADD();
142 void emitFMUL();
143 void emitFFMA();
144 void emitMUFU();
145 void emitFMNMX();
146 void emitRRO();
147 void emitFCMP();
148 void emitFSET();
149 void emitFSETP();
150 void emitFSWZADD();
151
152 void emitLOP();
153 void emitNOT();
154 void emitIADD();
155 void emitIMUL();
156 void emitIMAD();
157 void emitISCADD();
158 void emitIMNMX();
159 void emitICMP();
160 void emitISET();
161 void emitISETP();
162 void emitSHL();
163 void emitSHR();
164 void emitPOPC();
165 void emitBFI();
166 void emitBFE();
167 void emitFLO();
168
169 void emitLDSTs(int, DataType);
170 void emitLDSTc(int);
171 void emitLDC();
172 void emitLDL();
173 void emitLDS();
174 void emitLD();
175 void emitSTL();
176 void emitSTS();
177 void emitST();
178 void emitALD();
179 void emitAST();
180 void emitISBERD();
181 void emitAL2P();
182 void emitIPA();
183 void emitATOM();
184 void emitATOMS();
185 void emitRED();
186 void emitCCTL();
187
188 void emitPIXLD();
189
190 void emitTEXs(int);
191 void emitTEX();
192 void emitTLD();
193 void emitTLD4();
194 void emitTXD();
195 void emitTXQ();
196 void emitTMML();
197 void emitDEPBAR();
198
199 void emitNOP();
200 void emitKIL();
201 void emitOUT();
202
203 void emitBAR();
204 void emitMEMBAR();
205
206 void emitVOTE();
207
208 void emitSUTarget();
209 void emitSUHandle(const int s);
210 void emitSUSTx();
211 void emitSULDx();
212 void emitSUREDx();
213 };
214
215 /*******************************************************************************
216 * general instruction layout/fields
217 ******************************************************************************/
218
219 void
220 CodeEmitterGM107::emitField(uint32_t *data, int b, int s, uint32_t v)
221 {
222 if (b >= 0) {
223 uint32_t m = ((1ULL << s) - 1);
224 uint64_t d = (uint64_t)(v & m) << b;
225 assert(!(v & ~m) || (v & ~m) == ~m);
226 data[1] |= d >> 32;
227 data[0] |= d;
228 }
229 }
230
231 void
232 CodeEmitterGM107::emitPred()
233 {
234 if (insn->predSrc >= 0) {
235 emitField(16, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id);
236 emitField(19, 1, insn->cc == CC_NOT_P);
237 } else {
238 emitField(16, 3, 7);
239 }
240 }
241
242 void
243 CodeEmitterGM107::emitInsn(uint32_t hi, bool pred)
244 {
245 code[0] = 0x00000000;
246 code[1] = hi;
247 if (pred)
248 emitPred();
249 }
250
251 void
252 CodeEmitterGM107::emitGPR(int pos, const Value *val)
253 {
254 emitField(pos, 8, val ? val->reg.data.id : 255);
255 }
256
257 void
258 CodeEmitterGM107::emitSYS(int pos, const Value *val)
259 {
260 int id = val ? val->reg.data.id : -1;
261
262 switch (id) {
263 case SV_LANEID : id = 0x00; break;
264 case SV_VERTEX_COUNT : id = 0x10; break;
265 case SV_INVOCATION_ID : id = 0x11; break;
266 case SV_THREAD_KILL : id = 0x13; break;
267 case SV_INVOCATION_INFO: id = 0x1d; break;
268 case SV_TID : id = 0x21 + val->reg.data.sv.index; break;
269 case SV_CTAID : id = 0x25 + val->reg.data.sv.index; break;
270 default:
271 assert(!"invalid system value");
272 id = 0;
273 break;
274 }
275
276 emitField(pos, 8, id);
277 }
278
279 void
280 CodeEmitterGM107::emitPRED(int pos, const Value *val)
281 {
282 emitField(pos, 3, val ? val->reg.data.id : 7);
283 }
284
285 void
286 CodeEmitterGM107::emitADDR(int gpr, int off, int len, int shr,
287 const ValueRef &ref)
288 {
289 const Value *v = ref.get();
290 assert(!(v->reg.data.offset & ((1 << shr) - 1)));
291 if (gpr >= 0)
292 emitGPR(gpr, ref.getIndirect(0));
293 emitField(off, len, v->reg.data.offset >> shr);
294 }
295
296 void
297 CodeEmitterGM107::emitCBUF(int buf, int gpr, int off, int len, int shr,
298 const ValueRef &ref)
299 {
300 const Value *v = ref.get();
301 const Symbol *s = v->asSym();
302
303 assert(!(s->reg.data.offset & ((1 << shr) - 1)));
304
305 emitField(buf, 5, v->reg.fileIndex);
306 if (gpr >= 0)
307 emitGPR(gpr, ref.getIndirect(0));
308 emitField(off, 16, s->reg.data.offset >> shr);
309 }
310
311 bool
312 CodeEmitterGM107::longIMMD(const ValueRef &ref)
313 {
314 if (ref.getFile() == FILE_IMMEDIATE) {
315 const ImmediateValue *imm = ref.get()->asImm();
316 if (isFloatType(insn->sType)) {
317 if ((imm->reg.data.u32 & 0x00000fff) != 0x00000000)
318 return true;
319 } else {
320 if ((imm->reg.data.u32 & 0xfff00000) != 0x00000000 &&
321 (imm->reg.data.u32 & 0xfff00000) != 0xfff00000)
322 return true;
323 }
324 }
325 return false;
326 }
327
328 void
329 CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef &ref)
330 {
331 const ImmediateValue *imm = ref.get()->asImm();
332 uint32_t val = imm->reg.data.u32;
333
334 if (len == 19) {
335 if (insn->sType == TYPE_F32 || insn->sType == TYPE_F16) {
336 assert(!(val & 0x00000fff));
337 val >>= 12;
338 } else if (insn->sType == TYPE_F64) {
339 assert(!(imm->reg.data.u64 & 0x00000fffffffffffULL));
340 val = imm->reg.data.u64 >> 44;
341 }
342 assert(!(val & 0xfff00000) || (val & 0xfff00000) == 0xfff00000);
343 emitField( 56, 1, (val & 0x80000) >> 19);
344 emitField(pos, len, (val & 0x7ffff));
345 } else {
346 emitField(pos, len, val);
347 }
348 }
349
350 /*******************************************************************************
351 * modifiers
352 ******************************************************************************/
353
354 void
355 CodeEmitterGM107::emitCond3(int pos, CondCode code)
356 {
357 int data = 0;
358
359 switch (code) {
360 case CC_FL : data = 0x00; break;
361 case CC_LTU:
362 case CC_LT : data = 0x01; break;
363 case CC_EQU:
364 case CC_EQ : data = 0x02; break;
365 case CC_LEU:
366 case CC_LE : data = 0x03; break;
367 case CC_GTU:
368 case CC_GT : data = 0x04; break;
369 case CC_NEU:
370 case CC_NE : data = 0x05; break;
371 case CC_GEU:
372 case CC_GE : data = 0x06; break;
373 case CC_TR : data = 0x07; break;
374 default:
375 assert(!"invalid cond3");
376 break;
377 }
378
379 emitField(pos, 3, data);
380 }
381
382 void
383 CodeEmitterGM107::emitCond4(int pos, CondCode code)
384 {
385 int data = 0;
386
387 switch (code) {
388 case CC_FL: data = 0x00; break;
389 case CC_LT: data = 0x01; break;
390 case CC_EQ: data = 0x02; break;
391 case CC_LE: data = 0x03; break;
392 case CC_GT: data = 0x04; break;
393 case CC_NE: data = 0x05; break;
394 case CC_GE: data = 0x06; break;
395 // case CC_NUM: data = 0x07; break;
396 // case CC_NAN: data = 0x08; break;
397 case CC_LTU: data = 0x09; break;
398 case CC_EQU: data = 0x0a; break;
399 case CC_LEU: data = 0x0b; break;
400 case CC_GTU: data = 0x0c; break;
401 case CC_NEU: data = 0x0d; break;
402 case CC_GEU: data = 0x0e; break;
403 case CC_TR: data = 0x0f; break;
404 default:
405 assert(!"invalid cond4");
406 break;
407 }
408
409 emitField(pos, 4, data);
410 }
411
412 void
413 CodeEmitterGM107::emitO(int pos)
414 {
415 emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT);
416 }
417
418 void
419 CodeEmitterGM107::emitP(int pos)
420 {
421 emitField(pos, 1, insn->perPatch);
422 }
423
424 void
425 CodeEmitterGM107::emitSAT(int pos)
426 {
427 emitField(pos, 1, insn->saturate);
428 }
429
430 void
431 CodeEmitterGM107::emitCC(int pos)
432 {
433 emitField(pos, 1, insn->flagsDef >= 0);
434 }
435
436 void
437 CodeEmitterGM107::emitX(int pos)
438 {
439 emitField(pos, 1, insn->flagsSrc >= 0);
440 }
441
442 void
443 CodeEmitterGM107::emitABS(int pos, const ValueRef &ref)
444 {
445 emitField(pos, 1, ref.mod.abs());
446 }
447
448 void
449 CodeEmitterGM107::emitNEG(int pos, const ValueRef &ref)
450 {
451 emitField(pos, 1, ref.mod.neg());
452 }
453
454 void
455 CodeEmitterGM107::emitNEG2(int pos, const ValueRef &a, const ValueRef &b)
456 {
457 emitField(pos, 1, a.mod.neg() ^ b.mod.neg());
458 }
459
460 void
461 CodeEmitterGM107::emitFMZ(int pos, int len)
462 {
463 emitField(pos, len, insn->dnz << 1 | insn->ftz);
464 }
465
466 void
467 CodeEmitterGM107::emitRND(int rmp, RoundMode rnd, int rip)
468 {
469 int rm = 0, ri = 0;
470 switch (rnd) {
471 case ROUND_NI: ri = 1;
472 case ROUND_N : rm = 0; break;
473 case ROUND_MI: ri = 1;
474 case ROUND_M : rm = 1; break;
475 case ROUND_PI: ri = 1;
476 case ROUND_P : rm = 2; break;
477 case ROUND_ZI: ri = 1;
478 case ROUND_Z : rm = 3; break;
479 default:
480 assert(!"invalid round mode");
481 break;
482 }
483 emitField(rip, 1, ri);
484 emitField(rmp, 2, rm);
485 }
486
487 void
488 CodeEmitterGM107::emitPDIV(int pos)
489 {
490 assert(insn->postFactor >= -3 && insn->postFactor <= 3);
491 if (insn->postFactor > 0)
492 emitField(pos, 3, 7 - insn->postFactor);
493 else
494 emitField(pos, 3, 0 - insn->postFactor);
495 }
496
497 void
498 CodeEmitterGM107::emitINV(int pos, const ValueRef &ref)
499 {
500 emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT)));
501 }
502
503 /*******************************************************************************
504 * control flow
505 ******************************************************************************/
506
507 void
508 CodeEmitterGM107::emitEXIT()
509 {
510 emitInsn (0xe3000000);
511 emitCond5(0x00, CC_TR);
512 }
513
514 void
515 CodeEmitterGM107::emitBRA()
516 {
517 const FlowInstruction *insn = this->insn->asFlow();
518 int gpr = -1;
519
520 if (insn->indirect) {
521 if (insn->absolute)
522 emitInsn(0xe2000000); // JMX
523 else
524 emitInsn(0xe2500000); // BRX
525 gpr = 0x08;
526 } else {
527 if (insn->absolute)
528 emitInsn(0xe2100000); // JMP
529 else
530 emitInsn(0xe2400000); // BRA
531 emitField(0x07, 1, insn->allWarp);
532 }
533
534 emitField(0x06, 1, insn->limit);
535 emitCond5(0x00, CC_TR);
536
537 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
538 int32_t pos = insn->target.bb->binPos;
539 if (writeIssueDelays && !(pos & 0x1f))
540 pos += 8;
541 if (!insn->absolute)
542 emitField(0x14, 24, pos - (codeSize + 8));
543 else
544 emitField(0x14, 32, pos);
545 } else {
546 emitCBUF (0x24, gpr, 20, 16, 0, insn->src(0));
547 emitField(0x05, 1, 1);
548 }
549 }
550
551 void
552 CodeEmitterGM107::emitCAL()
553 {
554 const FlowInstruction *insn = this->insn->asFlow();
555
556 if (insn->absolute) {
557 emitInsn(0xe2200000, 0); // JCAL
558 } else {
559 emitInsn(0xe2600000, 0); // CAL
560 }
561
562 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
563 if (!insn->absolute)
564 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
565 else {
566 if (insn->builtin) {
567 int pcAbs = targGM107->getBuiltinOffset(insn->target.builtin);
568 addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfff00000, 20);
569 addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x000fffff, -12);
570 } else {
571 emitField(0x14, 32, insn->target.bb->binPos);
572 }
573 }
574 } else {
575 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
576 emitField(0x05, 1, 1);
577 }
578 }
579
580 void
581 CodeEmitterGM107::emitPCNT()
582 {
583 const FlowInstruction *insn = this->insn->asFlow();
584
585 emitInsn(0xe2b00000, 0);
586
587 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
588 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
589 } else {
590 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
591 emitField(0x05, 1, 1);
592 }
593 }
594
595 void
596 CodeEmitterGM107::emitCONT()
597 {
598 emitInsn (0xe3500000);
599 emitCond5(0x00, CC_TR);
600 }
601
602 void
603 CodeEmitterGM107::emitPBK()
604 {
605 const FlowInstruction *insn = this->insn->asFlow();
606
607 emitInsn(0xe2a00000, 0);
608
609 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
610 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
611 } else {
612 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
613 emitField(0x05, 1, 1);
614 }
615 }
616
617 void
618 CodeEmitterGM107::emitBRK()
619 {
620 emitInsn (0xe3400000);
621 emitCond5(0x00, CC_TR);
622 }
623
624 void
625 CodeEmitterGM107::emitPRET()
626 {
627 const FlowInstruction *insn = this->insn->asFlow();
628
629 emitInsn(0xe2700000, 0);
630
631 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
632 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
633 } else {
634 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
635 emitField(0x05, 1, 1);
636 }
637 }
638
639 void
640 CodeEmitterGM107::emitRET()
641 {
642 emitInsn (0xe3200000);
643 emitCond5(0x00, CC_TR);
644 }
645
646 void
647 CodeEmitterGM107::emitSSY()
648 {
649 const FlowInstruction *insn = this->insn->asFlow();
650
651 emitInsn(0xe2900000, 0);
652
653 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
654 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
655 } else {
656 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
657 emitField(0x05, 1, 1);
658 }
659 }
660
661 void
662 CodeEmitterGM107::emitSYNC()
663 {
664 emitInsn (0xf0f80000);
665 emitCond5(0x00, CC_TR);
666 }
667
668 void
669 CodeEmitterGM107::emitSAM()
670 {
671 emitInsn(0xe3700000, 0);
672 }
673
674 void
675 CodeEmitterGM107::emitRAM()
676 {
677 emitInsn(0xe3800000, 0);
678 }
679
680 /*******************************************************************************
681 * predicate/cc
682 ******************************************************************************/
683
684 /*******************************************************************************
685 * movement / conversion
686 ******************************************************************************/
687
688 void
689 CodeEmitterGM107::emitMOV()
690 {
691 if (insn->src(0).getFile() != FILE_IMMEDIATE) {
692 switch (insn->src(0).getFile()) {
693 case FILE_GPR:
694 if (insn->def(0).getFile() == FILE_PREDICATE) {
695 emitInsn(0x5b6a0000);
696 emitGPR (0x08);
697 } else {
698 emitInsn(0x5c980000);
699 }
700 emitGPR (0x14, insn->src(0));
701 break;
702 case FILE_MEMORY_CONST:
703 emitInsn(0x4c980000);
704 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
705 break;
706 case FILE_IMMEDIATE:
707 emitInsn(0x38980000);
708 emitIMMD(0x14, 19, insn->src(0));
709 break;
710 case FILE_PREDICATE:
711 emitInsn(0x50880000);
712 emitPRED(0x0c, insn->src(0));
713 emitPRED(0x1d);
714 emitPRED(0x27);
715 break;
716 default:
717 assert(!"bad src file");
718 break;
719 }
720 if (insn->def(0).getFile() != FILE_PREDICATE &&
721 insn->src(0).getFile() != FILE_PREDICATE)
722 emitField(0x27, 4, insn->lanes);
723 } else {
724 emitInsn (0x01000000);
725 emitIMMD (0x14, 32, insn->src(0));
726 emitField(0x0c, 4, insn->lanes);
727 }
728
729 if (insn->def(0).getFile() == FILE_PREDICATE) {
730 emitPRED(0x27);
731 emitPRED(0x03, insn->def(0));
732 emitPRED(0x00);
733 } else {
734 emitGPR(0x00, insn->def(0));
735 }
736 }
737
738 void
739 CodeEmitterGM107::emitS2R()
740 {
741 emitInsn(0xf0c80000);
742 emitSYS (0x14, insn->src(0));
743 emitGPR (0x00, insn->def(0));
744 }
745
746 void
747 CodeEmitterGM107::emitF2F()
748 {
749 RoundMode rnd = insn->rnd;
750
751 switch (insn->op) {
752 case OP_FLOOR: rnd = ROUND_MI; break;
753 case OP_CEIL : rnd = ROUND_PI; break;
754 case OP_TRUNC: rnd = ROUND_ZI; break;
755 default:
756 break;
757 }
758
759 switch (insn->src(0).getFile()) {
760 case FILE_GPR:
761 emitInsn(0x5ca80000);
762 emitGPR (0x14, insn->src(0));
763 break;
764 case FILE_MEMORY_CONST:
765 emitInsn(0x4ca80000);
766 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
767 break;
768 case FILE_IMMEDIATE:
769 emitInsn(0x38a80000);
770 emitIMMD(0x14, 19, insn->src(0));
771 break;
772 default:
773 assert(!"bad src0 file");
774 break;
775 }
776
777 emitField(0x32, 1, (insn->op == OP_SAT) || insn->saturate);
778 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
779 emitCC (0x2f);
780 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
781 emitFMZ (0x2c, 1);
782 emitField(0x29, 1, insn->subOp);
783 emitRND (0x27, rnd, 0x2a);
784 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
785 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
786 emitGPR (0x00, insn->def(0));
787 }
788
789 void
790 CodeEmitterGM107::emitF2I()
791 {
792 RoundMode rnd = insn->rnd;
793
794 switch (insn->op) {
795 case OP_FLOOR: rnd = ROUND_M; break;
796 case OP_CEIL : rnd = ROUND_P; break;
797 case OP_TRUNC: rnd = ROUND_Z; break;
798 default:
799 break;
800 }
801
802 switch (insn->src(0).getFile()) {
803 case FILE_GPR:
804 emitInsn(0x5cb00000);
805 emitGPR (0x14, insn->src(0));
806 break;
807 case FILE_MEMORY_CONST:
808 emitInsn(0x4cb00000);
809 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
810 break;
811 case FILE_IMMEDIATE:
812 emitInsn(0x38b00000);
813 emitIMMD(0x14, 19, insn->src(0));
814 break;
815 default:
816 assert(!"bad src0 file");
817 break;
818 }
819
820 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
821 emitCC (0x2f);
822 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
823 emitFMZ (0x2c, 1);
824 emitRND (0x27, rnd, 0x2a);
825 emitField(0x0c, 1, isSignedType(insn->dType));
826 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
827 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
828 emitGPR (0x00, insn->def(0));
829 }
830
831 void
832 CodeEmitterGM107::emitI2F()
833 {
834 RoundMode rnd = insn->rnd;
835
836 switch (insn->op) {
837 case OP_FLOOR: rnd = ROUND_M; break;
838 case OP_CEIL : rnd = ROUND_P; break;
839 case OP_TRUNC: rnd = ROUND_Z; break;
840 default:
841 break;
842 }
843
844 switch (insn->src(0).getFile()) {
845 case FILE_GPR:
846 emitInsn(0x5cb80000);
847 emitGPR (0x14, insn->src(0));
848 break;
849 case FILE_MEMORY_CONST:
850 emitInsn(0x4cb80000);
851 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
852 break;
853 case FILE_IMMEDIATE:
854 emitInsn(0x38b80000);
855 emitIMMD(0x14, 19, insn->src(0));
856 break;
857 default:
858 assert(!"bad src0 file");
859 break;
860 }
861
862 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
863 emitCC (0x2f);
864 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
865 emitField(0x29, 2, insn->subOp);
866 emitRND (0x27, rnd, -1);
867 emitField(0x0d, 1, isSignedType(insn->sType));
868 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
869 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
870 emitGPR (0x00, insn->def(0));
871 }
872
873 void
874 CodeEmitterGM107::emitI2I()
875 {
876 switch (insn->src(0).getFile()) {
877 case FILE_GPR:
878 emitInsn(0x5ce00000);
879 emitGPR (0x14, insn->src(0));
880 break;
881 case FILE_MEMORY_CONST:
882 emitInsn(0x4ce00000);
883 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
884 break;
885 case FILE_IMMEDIATE:
886 emitInsn(0x38e00000);
887 emitIMMD(0x14, 19, insn->src(0));
888 break;
889 default:
890 assert(!"bad src0 file");
891 break;
892 }
893
894 emitSAT (0x32);
895 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
896 emitCC (0x2f);
897 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
898 emitField(0x29, 2, insn->subOp);
899 emitField(0x0d, 1, isSignedType(insn->sType));
900 emitField(0x0c, 1, isSignedType(insn->dType));
901 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
902 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
903 emitGPR (0x00, insn->def(0));
904 }
905
906 static void
907 selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
908 {
909 int loc = entry->loc;
910 if (data.force_persample_interp)
911 code[loc + 1] |= 1 << 10;
912 else
913 code[loc + 1] &= ~(1 << 10);
914 }
915
916 void
917 CodeEmitterGM107::emitSEL()
918 {
919 switch (insn->src(1).getFile()) {
920 case FILE_GPR:
921 emitInsn(0x5ca00000);
922 emitGPR (0x14, insn->src(1));
923 break;
924 case FILE_MEMORY_CONST:
925 emitInsn(0x4ca00000);
926 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
927 break;
928 case FILE_IMMEDIATE:
929 emitInsn(0x38a00000);
930 emitIMMD(0x14, 19, insn->src(1));
931 break;
932 default:
933 assert(!"bad src1 file");
934 break;
935 }
936
937 emitINV (0x2a, insn->src(2));
938 emitPRED(0x27, insn->src(2));
939 emitGPR (0x08, insn->src(0));
940 emitGPR (0x00, insn->def(0));
941
942 if (insn->subOp == 1) {
943 addInterp(0, 0, selpFlip);
944 }
945 }
946
947 void
948 CodeEmitterGM107::emitSHFL()
949 {
950 int type = 0;
951
952 emitInsn (0xef100000);
953
954 switch (insn->src(1).getFile()) {
955 case FILE_GPR:
956 emitGPR(0x14, insn->src(1));
957 break;
958 case FILE_IMMEDIATE:
959 emitIMMD(0x14, 5, insn->src(1));
960 type |= 1;
961 break;
962 default:
963 assert(!"invalid src1 file");
964 break;
965 }
966
967 /*XXX: what is this arg? hardcode immediate for now */
968 emitField(0x22, 13, 0x1c03);
969 type |= 2;
970
971 emitPRED (0x30);
972 emitField(0x1e, 2, insn->subOp);
973 emitField(0x1c, 2, type);
974 emitGPR (0x08, insn->src(0));
975 emitGPR (0x00, insn->def(0));
976 }
977
978 /*******************************************************************************
979 * double
980 ******************************************************************************/
981
982 void
983 CodeEmitterGM107::emitDADD()
984 {
985 switch (insn->src(1).getFile()) {
986 case FILE_GPR:
987 emitInsn(0x5c700000);
988 emitGPR (0x14, insn->src(1));
989 break;
990 case FILE_MEMORY_CONST:
991 emitInsn(0x4c700000);
992 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
993 break;
994 case FILE_IMMEDIATE:
995 emitInsn(0x38700000);
996 emitIMMD(0x14, 19, insn->src(1));
997 break;
998 default:
999 assert(!"bad src1 file");
1000 break;
1001 }
1002 emitABS(0x31, insn->src(1));
1003 emitNEG(0x30, insn->src(0));
1004 emitCC (0x2f);
1005 emitABS(0x2e, insn->src(0));
1006 emitNEG(0x2d, insn->src(1));
1007
1008 if (insn->op == OP_SUB)
1009 code[1] ^= 0x00002000;
1010
1011 emitGPR(0x08, insn->src(0));
1012 emitGPR(0x00, insn->def(0));
1013 }
1014
1015 void
1016 CodeEmitterGM107::emitDMUL()
1017 {
1018 switch (insn->src(1).getFile()) {
1019 case FILE_GPR:
1020 emitInsn(0x5c800000);
1021 emitGPR (0x14, insn->src(1));
1022 break;
1023 case FILE_MEMORY_CONST:
1024 emitInsn(0x4c800000);
1025 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1026 break;
1027 case FILE_IMMEDIATE:
1028 emitInsn(0x38800000);
1029 emitIMMD(0x14, 19, insn->src(1));
1030 break;
1031 default:
1032 assert(!"bad src1 file");
1033 break;
1034 }
1035
1036 emitNEG2(0x30, insn->src(0), insn->src(1));
1037 emitCC (0x2f);
1038 emitRND (0x27);
1039 emitGPR (0x08, insn->src(0));
1040 emitGPR (0x00, insn->def(0));
1041 }
1042
1043 void
1044 CodeEmitterGM107::emitDFMA()
1045 {
1046 switch(insn->src(2).getFile()) {
1047 case FILE_GPR:
1048 switch (insn->src(1).getFile()) {
1049 case FILE_GPR:
1050 emitInsn(0x5b700000);
1051 emitGPR (0x14, insn->src(1));
1052 break;
1053 case FILE_MEMORY_CONST:
1054 emitInsn(0x4b700000);
1055 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1056 break;
1057 case FILE_IMMEDIATE:
1058 emitInsn(0x36700000);
1059 emitIMMD(0x14, 19, insn->src(1));
1060 break;
1061 default:
1062 assert(!"bad src1 file");
1063 break;
1064 }
1065 emitGPR (0x27, insn->src(2));
1066 break;
1067 case FILE_MEMORY_CONST:
1068 emitInsn(0x53700000);
1069 emitGPR (0x27, insn->src(1));
1070 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1071 break;
1072 default:
1073 assert(!"bad src2 file");
1074 break;
1075 }
1076
1077 emitRND (0x32);
1078 emitNEG (0x31, insn->src(2));
1079 emitNEG2(0x30, insn->src(0), insn->src(1));
1080 emitCC (0x2f);
1081 emitGPR (0x08, insn->src(0));
1082 emitGPR (0x00, insn->def(0));
1083 }
1084
1085 void
1086 CodeEmitterGM107::emitDMNMX()
1087 {
1088 switch (insn->src(1).getFile()) {
1089 case FILE_GPR:
1090 emitInsn(0x5c500000);
1091 emitGPR (0x14, insn->src(1));
1092 break;
1093 case FILE_MEMORY_CONST:
1094 emitInsn(0x4c500000);
1095 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1096 break;
1097 case FILE_IMMEDIATE:
1098 emitInsn(0x38500000);
1099 emitIMMD(0x14, 19, insn->src(1));
1100 break;
1101 default:
1102 assert(!"bad src1 file");
1103 break;
1104 }
1105
1106 emitABS (0x31, insn->src(1));
1107 emitNEG (0x30, insn->src(0));
1108 emitCC (0x2f);
1109 emitABS (0x2e, insn->src(0));
1110 emitNEG (0x2d, insn->src(1));
1111 emitField(0x2a, 1, insn->op == OP_MAX);
1112 emitPRED (0x27);
1113 emitGPR (0x08, insn->src(0));
1114 emitGPR (0x00, insn->def(0));
1115 }
1116
1117 void
1118 CodeEmitterGM107::emitDSET()
1119 {
1120 const CmpInstruction *insn = this->insn->asCmp();
1121
1122 switch (insn->src(1).getFile()) {
1123 case FILE_GPR:
1124 emitInsn(0x59000000);
1125 emitGPR (0x14, insn->src(1));
1126 break;
1127 case FILE_MEMORY_CONST:
1128 emitInsn(0x49000000);
1129 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1130 break;
1131 case FILE_IMMEDIATE:
1132 emitInsn(0x32000000);
1133 emitIMMD(0x14, 19, insn->src(1));
1134 break;
1135 default:
1136 assert(!"bad src1 file");
1137 break;
1138 }
1139
1140 if (insn->op != OP_SET) {
1141 switch (insn->op) {
1142 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1143 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1144 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1145 default:
1146 assert(!"invalid set op");
1147 break;
1148 }
1149 emitPRED(0x27, insn->src(2));
1150 } else {
1151 emitPRED(0x27);
1152 }
1153
1154 emitABS (0x36, insn->src(0));
1155 emitNEG (0x35, insn->src(1));
1156 emitField(0x34, 1, insn->dType == TYPE_F32);
1157 emitCond4(0x30, insn->setCond);
1158 emitCC (0x2f);
1159 emitABS (0x2c, insn->src(1));
1160 emitNEG (0x2b, insn->src(0));
1161 emitGPR (0x08, insn->src(0));
1162 emitGPR (0x00, insn->def(0));
1163 }
1164
1165 void
1166 CodeEmitterGM107::emitDSETP()
1167 {
1168 const CmpInstruction *insn = this->insn->asCmp();
1169
1170 switch (insn->src(1).getFile()) {
1171 case FILE_GPR:
1172 emitInsn(0x5b800000);
1173 emitGPR (0x14, insn->src(1));
1174 break;
1175 case FILE_MEMORY_CONST:
1176 emitInsn(0x4b800000);
1177 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1178 break;
1179 case FILE_IMMEDIATE:
1180 emitInsn(0x36800000);
1181 emitIMMD(0x14, 19, insn->src(1));
1182 break;
1183 default:
1184 assert(!"bad src1 file");
1185 break;
1186 }
1187
1188 if (insn->op != OP_SET) {
1189 switch (insn->op) {
1190 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1191 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1192 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1193 default:
1194 assert(!"invalid set op");
1195 break;
1196 }
1197 emitPRED(0x27, insn->src(2));
1198 } else {
1199 emitPRED(0x27);
1200 }
1201
1202 emitCond4(0x30, insn->setCond);
1203 emitABS (0x2c, insn->src(1));
1204 emitNEG (0x2b, insn->src(0));
1205 emitGPR (0x08, insn->src(0));
1206 emitABS (0x07, insn->src(0));
1207 emitNEG (0x06, insn->src(1));
1208 emitPRED (0x03, insn->def(0));
1209 if (insn->defExists(1))
1210 emitPRED(0x00, insn->def(1));
1211 else
1212 emitPRED(0x00);
1213 }
1214
1215 /*******************************************************************************
1216 * float
1217 ******************************************************************************/
1218
1219 void
1220 CodeEmitterGM107::emitFADD()
1221 {
1222 if (!longIMMD(insn->src(1))) {
1223 switch (insn->src(1).getFile()) {
1224 case FILE_GPR:
1225 emitInsn(0x5c580000);
1226 emitGPR (0x14, insn->src(1));
1227 break;
1228 case FILE_MEMORY_CONST:
1229 emitInsn(0x4c580000);
1230 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1231 break;
1232 case FILE_IMMEDIATE:
1233 emitInsn(0x38580000);
1234 emitIMMD(0x14, 19, insn->src(1));
1235 break;
1236 default:
1237 assert(!"bad src1 file");
1238 break;
1239 }
1240 emitSAT(0x32);
1241 emitABS(0x31, insn->src(1));
1242 emitNEG(0x30, insn->src(0));
1243 emitCC (0x2f);
1244 emitABS(0x2e, insn->src(0));
1245 emitNEG(0x2d, insn->src(1));
1246 emitFMZ(0x2c, 1);
1247
1248 if (insn->op == OP_SUB)
1249 code[1] ^= 0x00002000;
1250 } else {
1251 emitInsn(0x08000000);
1252 emitABS(0x39, insn->src(1));
1253 emitNEG(0x38, insn->src(0));
1254 emitFMZ(0x37, 1);
1255 emitABS(0x36, insn->src(0));
1256 emitNEG(0x35, insn->src(1));
1257 emitCC (0x34);
1258 emitIMMD(0x14, 32, insn->src(1));
1259
1260 if (insn->op == OP_SUB)
1261 code[1] ^= 0x00080000;
1262 }
1263
1264 emitGPR(0x08, insn->src(0));
1265 emitGPR(0x00, insn->def(0));
1266 }
1267
1268 void
1269 CodeEmitterGM107::emitFMUL()
1270 {
1271 if (!longIMMD(insn->src(1))) {
1272 switch (insn->src(1).getFile()) {
1273 case FILE_GPR:
1274 emitInsn(0x5c680000);
1275 emitGPR (0x14, insn->src(1));
1276 break;
1277 case FILE_MEMORY_CONST:
1278 emitInsn(0x4c680000);
1279 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1280 break;
1281 case FILE_IMMEDIATE:
1282 emitInsn(0x38680000);
1283 emitIMMD(0x14, 19, insn->src(1));
1284 break;
1285 default:
1286 assert(!"bad src1 file");
1287 break;
1288 }
1289 emitSAT (0x32);
1290 emitNEG2(0x30, insn->src(0), insn->src(1));
1291 emitCC (0x2f);
1292 emitFMZ (0x2c, 2);
1293 emitPDIV(0x29);
1294 emitRND (0x27);
1295 } else {
1296 emitInsn(0x1e000000);
1297 emitSAT (0x37);
1298 emitFMZ (0x35, 2);
1299 emitCC (0x34);
1300 emitIMMD(0x14, 32, insn->src(1));
1301 if (insn->src(0).mod.neg() ^ insn->src(1).mod.neg())
1302 code[1] ^= 0x00080000; /* flip immd sign bit */
1303 }
1304
1305 emitGPR(0x08, insn->src(0));
1306 emitGPR(0x00, insn->def(0));
1307 }
1308
1309 void
1310 CodeEmitterGM107::emitFFMA()
1311 {
1312 /*XXX: ffma32i exists, but not using it as third src overlaps dst */
1313 switch(insn->src(2).getFile()) {
1314 case FILE_GPR:
1315 switch (insn->src(1).getFile()) {
1316 case FILE_GPR:
1317 emitInsn(0x59800000);
1318 emitGPR (0x14, insn->src(1));
1319 break;
1320 case FILE_MEMORY_CONST:
1321 emitInsn(0x49800000);
1322 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1323 break;
1324 case FILE_IMMEDIATE:
1325 emitInsn(0x32800000);
1326 emitIMMD(0x14, 19, insn->src(1));
1327 break;
1328 default:
1329 assert(!"bad src1 file");
1330 break;
1331 }
1332 emitGPR (0x27, insn->src(2));
1333 break;
1334 case FILE_MEMORY_CONST:
1335 emitInsn(0x51800000);
1336 emitGPR (0x27, insn->src(1));
1337 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1338 break;
1339 default:
1340 assert(!"bad src2 file");
1341 break;
1342 }
1343 emitRND (0x33);
1344 emitSAT (0x32);
1345 emitNEG (0x31, insn->src(2));
1346 emitNEG2(0x30, insn->src(0), insn->src(1));
1347 emitCC (0x2f);
1348
1349 emitFMZ(0x35, 2);
1350 emitGPR(0x08, insn->src(0));
1351 emitGPR(0x00, insn->def(0));
1352 }
1353
1354 void
1355 CodeEmitterGM107::emitMUFU()
1356 {
1357 int mufu = 0;
1358
1359 switch (insn->op) {
1360 case OP_COS: mufu = 0; break;
1361 case OP_SIN: mufu = 1; break;
1362 case OP_EX2: mufu = 2; break;
1363 case OP_LG2: mufu = 3; break;
1364 case OP_RCP: mufu = 4 + 2 * insn->subOp; break;
1365 case OP_RSQ: mufu = 5 + 2 * insn->subOp; break;
1366 default:
1367 assert(!"invalid mufu");
1368 break;
1369 }
1370
1371 emitInsn (0x50800000);
1372 emitSAT (0x32);
1373 emitNEG (0x30, insn->src(0));
1374 emitABS (0x2e, insn->src(0));
1375 emitField(0x14, 3, mufu);
1376 emitGPR (0x08, insn->src(0));
1377 emitGPR (0x00, insn->def(0));
1378 }
1379
1380 void
1381 CodeEmitterGM107::emitFMNMX()
1382 {
1383 switch (insn->src(1).getFile()) {
1384 case FILE_GPR:
1385 emitInsn(0x5c600000);
1386 emitGPR (0x14, insn->src(1));
1387 break;
1388 case FILE_MEMORY_CONST:
1389 emitInsn(0x4c600000);
1390 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1391 break;
1392 case FILE_IMMEDIATE:
1393 emitInsn(0x38600000);
1394 emitIMMD(0x14, 19, insn->src(1));
1395 break;
1396 default:
1397 assert(!"bad src1 file");
1398 break;
1399 }
1400
1401 emitField(0x2a, 1, insn->op == OP_MAX);
1402 emitPRED (0x27);
1403
1404 emitABS(0x31, insn->src(1));
1405 emitNEG(0x30, insn->src(0));
1406 emitCC (0x2f);
1407 emitABS(0x2e, insn->src(0));
1408 emitNEG(0x2d, insn->src(1));
1409 emitFMZ(0x2c, 1);
1410 emitGPR(0x08, insn->src(0));
1411 emitGPR(0x00, insn->def(0));
1412 }
1413
1414 void
1415 CodeEmitterGM107::emitRRO()
1416 {
1417 switch (insn->src(0).getFile()) {
1418 case FILE_GPR:
1419 emitInsn(0x5c900000);
1420 emitGPR (0x14, insn->src(0));
1421 break;
1422 case FILE_MEMORY_CONST:
1423 emitInsn(0x4c900000);
1424 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1425 break;
1426 case FILE_IMMEDIATE:
1427 emitInsn(0x38900000);
1428 emitIMMD(0x14, 19, insn->src(0));
1429 break;
1430 default:
1431 assert(!"bad src file");
1432 break;
1433 }
1434
1435 emitABS (0x31, insn->src(0));
1436 emitNEG (0x2d, insn->src(0));
1437 emitField(0x27, 1, insn->op == OP_PREEX2);
1438 emitGPR (0x00, insn->def(0));
1439 }
1440
1441 void
1442 CodeEmitterGM107::emitFCMP()
1443 {
1444 const CmpInstruction *insn = this->insn->asCmp();
1445 CondCode cc = insn->setCond;
1446
1447 if (insn->src(2).mod.neg())
1448 cc = reverseCondCode(cc);
1449
1450 switch(insn->src(2).getFile()) {
1451 case FILE_GPR:
1452 switch (insn->src(1).getFile()) {
1453 case FILE_GPR:
1454 emitInsn(0x5ba00000);
1455 emitGPR (0x14, insn->src(1));
1456 break;
1457 case FILE_MEMORY_CONST:
1458 emitInsn(0x4ba00000);
1459 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1460 break;
1461 case FILE_IMMEDIATE:
1462 emitInsn(0x36a00000);
1463 emitIMMD(0x14, 19, insn->src(1));
1464 break;
1465 default:
1466 assert(!"bad src1 file");
1467 break;
1468 }
1469 emitGPR (0x27, insn->src(2));
1470 break;
1471 case FILE_MEMORY_CONST:
1472 emitInsn(0x53a00000);
1473 emitGPR (0x27, insn->src(1));
1474 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1475 break;
1476 default:
1477 assert(!"bad src2 file");
1478 break;
1479 }
1480
1481 emitCond4(0x30, cc);
1482 emitFMZ (0x2f, 1);
1483 emitGPR (0x08, insn->src(0));
1484 emitGPR (0x00, insn->def(0));
1485 }
1486
1487 void
1488 CodeEmitterGM107::emitFSET()
1489 {
1490 const CmpInstruction *insn = this->insn->asCmp();
1491
1492 switch (insn->src(1).getFile()) {
1493 case FILE_GPR:
1494 emitInsn(0x58000000);
1495 emitGPR (0x14, insn->src(1));
1496 break;
1497 case FILE_MEMORY_CONST:
1498 emitInsn(0x48000000);
1499 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1500 break;
1501 case FILE_IMMEDIATE:
1502 emitInsn(0x30000000);
1503 emitIMMD(0x14, 19, insn->src(1));
1504 break;
1505 default:
1506 assert(!"bad src1 file");
1507 break;
1508 }
1509
1510 if (insn->op != OP_SET) {
1511 switch (insn->op) {
1512 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1513 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1514 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1515 default:
1516 assert(!"invalid set op");
1517 break;
1518 }
1519 emitPRED(0x27, insn->src(2));
1520 } else {
1521 emitPRED(0x27);
1522 }
1523
1524 emitFMZ (0x37, 1);
1525 emitABS (0x36, insn->src(0));
1526 emitNEG (0x35, insn->src(1));
1527 emitField(0x34, 1, insn->dType == TYPE_F32);
1528 emitCond4(0x30, insn->setCond);
1529 emitCC (0x2f);
1530 emitABS (0x2c, insn->src(1));
1531 emitNEG (0x2b, insn->src(0));
1532 emitGPR (0x08, insn->src(0));
1533 emitGPR (0x00, insn->def(0));
1534 }
1535
1536 void
1537 CodeEmitterGM107::emitFSETP()
1538 {
1539 const CmpInstruction *insn = this->insn->asCmp();
1540
1541 switch (insn->src(1).getFile()) {
1542 case FILE_GPR:
1543 emitInsn(0x5bb00000);
1544 emitGPR (0x14, insn->src(1));
1545 break;
1546 case FILE_MEMORY_CONST:
1547 emitInsn(0x4bb00000);
1548 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1549 break;
1550 case FILE_IMMEDIATE:
1551 emitInsn(0x36b00000);
1552 emitIMMD(0x14, 19, insn->src(1));
1553 break;
1554 default:
1555 assert(!"bad src1 file");
1556 break;
1557 }
1558
1559 if (insn->op != OP_SET) {
1560 switch (insn->op) {
1561 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1562 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1563 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1564 default:
1565 assert(!"invalid set op");
1566 break;
1567 }
1568 emitPRED(0x27, insn->src(2));
1569 } else {
1570 emitPRED(0x27);
1571 }
1572
1573 emitCond4(0x30, insn->setCond);
1574 emitFMZ (0x2f, 1);
1575 emitABS (0x2c, insn->src(1));
1576 emitNEG (0x2b, insn->src(0));
1577 emitGPR (0x08, insn->src(0));
1578 emitABS (0x07, insn->src(0));
1579 emitNEG (0x06, insn->src(1));
1580 emitPRED (0x03, insn->def(0));
1581 if (insn->defExists(1))
1582 emitPRED(0x00, insn->def(1));
1583 else
1584 emitPRED(0x00);
1585 }
1586
1587 void
1588 CodeEmitterGM107::emitFSWZADD()
1589 {
1590 emitInsn (0x50f80000);
1591 emitCC (0x2f);
1592 emitFMZ (0x2c, 1);
1593 emitRND (0x27);
1594 emitField(0x26, 1, insn->lanes); /* abused for .ndv */
1595 emitField(0x1c, 8, insn->subOp);
1596 if (insn->predSrc != 1)
1597 emitGPR (0x14, insn->src(1));
1598 else
1599 emitGPR (0x14);
1600 emitGPR (0x08, insn->src(0));
1601 emitGPR (0x00, insn->def(0));
1602 }
1603
1604 /*******************************************************************************
1605 * integer
1606 ******************************************************************************/
1607
1608 void
1609 CodeEmitterGM107::emitLOP()
1610 {
1611 int lop = 0;
1612
1613 switch (insn->op) {
1614 case OP_AND: lop = 0; break;
1615 case OP_OR : lop = 1; break;
1616 case OP_XOR: lop = 2; break;
1617 default:
1618 assert(!"invalid lop");
1619 break;
1620 }
1621
1622 if (insn->src(1).getFile() != FILE_IMMEDIATE) {
1623 switch (insn->src(1).getFile()) {
1624 case FILE_GPR:
1625 emitInsn(0x5c400000);
1626 emitGPR (0x14, insn->src(1));
1627 break;
1628 case FILE_MEMORY_CONST:
1629 emitInsn(0x4c400000);
1630 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1631 break;
1632 case FILE_IMMEDIATE:
1633 emitInsn(0x38400000);
1634 emitIMMD(0x14, 19, insn->src(1));
1635 break;
1636 default:
1637 assert(!"bad src1 file");
1638 break;
1639 }
1640 emitPRED (0x30);
1641 emitCC (0x2f);
1642 emitX (0x2b);
1643 emitField(0x29, 2, lop);
1644 emitINV (0x28, insn->src(1));
1645 emitINV (0x27, insn->src(0));
1646 } else {
1647 emitInsn (0x04000000);
1648 emitX (0x39);
1649 emitINV (0x38, insn->src(1));
1650 emitINV (0x37, insn->src(0));
1651 emitField(0x35, 2, lop);
1652 emitCC (0x34);
1653 emitIMMD (0x14, 32, insn->src(1));
1654 }
1655
1656 emitGPR (0x08, insn->src(0));
1657 emitGPR (0x00, insn->def(0));
1658 }
1659
1660 /* special-case of emitLOP(): lop pass_b dst 0 ~src */
1661 void
1662 CodeEmitterGM107::emitNOT()
1663 {
1664 if (!longIMMD(insn->src(0))) {
1665 switch (insn->src(0).getFile()) {
1666 case FILE_GPR:
1667 emitInsn(0x5c400700);
1668 emitGPR (0x14, insn->src(0));
1669 break;
1670 case FILE_MEMORY_CONST:
1671 emitInsn(0x4c400700);
1672 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1673 break;
1674 case FILE_IMMEDIATE:
1675 emitInsn(0x38400700);
1676 emitIMMD(0x14, 19, insn->src(0));
1677 break;
1678 default:
1679 assert(!"bad src1 file");
1680 break;
1681 }
1682 emitPRED (0x30);
1683 } else {
1684 emitInsn (0x05600000);
1685 emitIMMD (0x14, 32, insn->src(1));
1686 }
1687
1688 emitGPR(0x08);
1689 emitGPR(0x00, insn->def(0));
1690 }
1691
1692 void
1693 CodeEmitterGM107::emitIADD()
1694 {
1695 if (insn->src(1).getFile() != FILE_IMMEDIATE) {
1696 switch (insn->src(1).getFile()) {
1697 case FILE_GPR:
1698 emitInsn(0x5c100000);
1699 emitGPR (0x14, insn->src(1));
1700 break;
1701 case FILE_MEMORY_CONST:
1702 emitInsn(0x4c100000);
1703 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1704 break;
1705 case FILE_IMMEDIATE:
1706 emitInsn(0x38100000);
1707 emitIMMD(0x14, 19, insn->src(1));
1708 break;
1709 default:
1710 assert(!"bad src1 file");
1711 break;
1712 }
1713 emitSAT(0x32);
1714 emitNEG(0x31, insn->src(0));
1715 emitNEG(0x30, insn->src(1));
1716 emitCC (0x2f);
1717 emitX (0x2b);
1718 } else {
1719 emitInsn(0x1c000000);
1720 emitNEG (0x38, insn->src(0));
1721 emitSAT (0x36);
1722 emitX (0x35);
1723 emitCC (0x34);
1724 emitIMMD(0x14, 32, insn->src(1));
1725 }
1726
1727 if (insn->op == OP_SUB)
1728 code[1] ^= 0x00010000;
1729
1730 emitGPR(0x08, insn->src(0));
1731 emitGPR(0x00, insn->def(0));
1732 }
1733
1734 void
1735 CodeEmitterGM107::emitIMUL()
1736 {
1737 if (insn->src(1).getFile() != FILE_IMMEDIATE) {
1738 switch (insn->src(1).getFile()) {
1739 case FILE_GPR:
1740 emitInsn(0x5c380000);
1741 emitGPR (0x14, insn->src(1));
1742 break;
1743 case FILE_MEMORY_CONST:
1744 emitInsn(0x4c380000);
1745 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1746 break;
1747 case FILE_IMMEDIATE:
1748 emitInsn(0x38380000);
1749 emitIMMD(0x14, 19, insn->src(1));
1750 break;
1751 default:
1752 assert(!"bad src1 file");
1753 break;
1754 }
1755 emitCC (0x2f);
1756 emitField(0x29, 1, isSignedType(insn->sType));
1757 emitField(0x28, 1, isSignedType(insn->dType));
1758 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1759 } else {
1760 emitInsn (0x1f000000);
1761 emitField(0x37, 1, isSignedType(insn->sType));
1762 emitField(0x36, 1, isSignedType(insn->dType));
1763 emitField(0x35, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1764 emitCC (0x34);
1765 emitIMMD (0x14, 32, insn->src(1));
1766 }
1767
1768 emitGPR(0x08, insn->src(0));
1769 emitGPR(0x00, insn->def(0));
1770 }
1771
1772 void
1773 CodeEmitterGM107::emitIMAD()
1774 {
1775 /*XXX: imad32i exists, but not using it as third src overlaps dst */
1776 switch(insn->src(2).getFile()) {
1777 case FILE_GPR:
1778 switch (insn->src(1).getFile()) {
1779 case FILE_GPR:
1780 emitInsn(0x5a000000);
1781 emitGPR (0x14, insn->src(1));
1782 break;
1783 case FILE_MEMORY_CONST:
1784 emitInsn(0x4a000000);
1785 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1786 break;
1787 case FILE_IMMEDIATE:
1788 emitInsn(0x34000000);
1789 emitIMMD(0x14, 19, insn->src(1));
1790 break;
1791 default:
1792 assert(!"bad src1 file");
1793 break;
1794 }
1795 emitGPR (0x27, insn->src(2));
1796 break;
1797 case FILE_MEMORY_CONST:
1798 emitInsn(0x52000000);
1799 emitGPR (0x27, insn->src(1));
1800 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1801 break;
1802 default:
1803 assert(!"bad src2 file");
1804 break;
1805 }
1806
1807 emitField(0x36, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1808 emitField(0x35, 1, isSignedType(insn->sType));
1809 emitNEG (0x34, insn->src(2));
1810 emitNEG2 (0x33, insn->src(0), insn->src(1));
1811 emitSAT (0x32);
1812 emitX (0x31);
1813 emitField(0x30, 1, isSignedType(insn->dType));
1814 emitCC (0x2f);
1815 emitGPR (0x08, insn->src(0));
1816 emitGPR (0x00, insn->def(0));
1817 }
1818
1819 void
1820 CodeEmitterGM107::emitISCADD()
1821 {
1822 switch (insn->src(2).getFile()) {
1823 case FILE_GPR:
1824 emitInsn(0x5c180000);
1825 emitGPR (0x14, insn->src(2));
1826 break;
1827 case FILE_MEMORY_CONST:
1828 emitInsn(0x4c180000);
1829 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1830 break;
1831 case FILE_IMMEDIATE:
1832 emitInsn(0x38180000);
1833 emitIMMD(0x14, 19, insn->src(2));
1834 break;
1835 default:
1836 assert(!"bad src1 file");
1837 break;
1838 }
1839 emitNEG (0x31, insn->src(0));
1840 emitNEG (0x30, insn->src(2));
1841 emitCC (0x2f);
1842 emitIMMD(0x27, 5, insn->src(1));
1843 emitGPR (0x08, insn->src(0));
1844 emitGPR (0x00, insn->def(0));
1845 }
1846
1847 void
1848 CodeEmitterGM107::emitIMNMX()
1849 {
1850 switch (insn->src(1).getFile()) {
1851 case FILE_GPR:
1852 emitInsn(0x5c200000);
1853 emitGPR (0x14, insn->src(1));
1854 break;
1855 case FILE_MEMORY_CONST:
1856 emitInsn(0x4c200000);
1857 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1858 break;
1859 case FILE_IMMEDIATE:
1860 emitInsn(0x38200000);
1861 emitIMMD(0x14, 19, insn->src(1));
1862 break;
1863 default:
1864 assert(!"bad src1 file");
1865 break;
1866 }
1867
1868 emitField(0x30, 1, isSignedType(insn->dType));
1869 emitCC (0x2f);
1870 emitField(0x2a, 1, insn->op == OP_MAX);
1871 emitPRED (0x27);
1872 emitGPR (0x08, insn->src(0));
1873 emitGPR (0x00, insn->def(0));
1874 }
1875
1876 void
1877 CodeEmitterGM107::emitICMP()
1878 {
1879 const CmpInstruction *insn = this->insn->asCmp();
1880 CondCode cc = insn->setCond;
1881
1882 if (insn->src(2).mod.neg())
1883 cc = reverseCondCode(cc);
1884
1885 switch(insn->src(2).getFile()) {
1886 case FILE_GPR:
1887 switch (insn->src(1).getFile()) {
1888 case FILE_GPR:
1889 emitInsn(0x5b400000);
1890 emitGPR (0x14, insn->src(1));
1891 break;
1892 case FILE_MEMORY_CONST:
1893 emitInsn(0x4b400000);
1894 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1895 break;
1896 case FILE_IMMEDIATE:
1897 emitInsn(0x36400000);
1898 emitIMMD(0x14, 19, insn->src(1));
1899 break;
1900 default:
1901 assert(!"bad src1 file");
1902 break;
1903 }
1904 emitGPR (0x27, insn->src(2));
1905 break;
1906 case FILE_MEMORY_CONST:
1907 emitInsn(0x53400000);
1908 emitGPR (0x27, insn->src(1));
1909 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1910 break;
1911 default:
1912 assert(!"bad src2 file");
1913 break;
1914 }
1915
1916 emitCond3(0x31, cc);
1917 emitField(0x30, 1, isSignedType(insn->sType));
1918 emitGPR (0x08, insn->src(0));
1919 emitGPR (0x00, insn->def(0));
1920 }
1921
1922 void
1923 CodeEmitterGM107::emitISET()
1924 {
1925 const CmpInstruction *insn = this->insn->asCmp();
1926
1927 switch (insn->src(1).getFile()) {
1928 case FILE_GPR:
1929 emitInsn(0x5b500000);
1930 emitGPR (0x14, insn->src(1));
1931 break;
1932 case FILE_MEMORY_CONST:
1933 emitInsn(0x4b500000);
1934 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1935 break;
1936 case FILE_IMMEDIATE:
1937 emitInsn(0x36500000);
1938 emitIMMD(0x14, 19, insn->src(1));
1939 break;
1940 default:
1941 assert(!"bad src1 file");
1942 break;
1943 }
1944
1945 if (insn->op != OP_SET) {
1946 switch (insn->op) {
1947 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1948 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1949 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1950 default:
1951 assert(!"invalid set op");
1952 break;
1953 }
1954 emitPRED(0x27, insn->src(2));
1955 } else {
1956 emitPRED(0x27);
1957 }
1958
1959 emitCond3(0x31, insn->setCond);
1960 emitField(0x30, 1, isSignedType(insn->sType));
1961 emitCC (0x2f);
1962 emitField(0x2c, 1, insn->dType == TYPE_F32);
1963 emitX (0x2b);
1964 emitGPR (0x08, insn->src(0));
1965 emitGPR (0x00, insn->def(0));
1966 }
1967
1968 void
1969 CodeEmitterGM107::emitISETP()
1970 {
1971 const CmpInstruction *insn = this->insn->asCmp();
1972
1973 switch (insn->src(1).getFile()) {
1974 case FILE_GPR:
1975 emitInsn(0x5b600000);
1976 emitGPR (0x14, insn->src(1));
1977 break;
1978 case FILE_MEMORY_CONST:
1979 emitInsn(0x4b600000);
1980 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1981 break;
1982 case FILE_IMMEDIATE:
1983 emitInsn(0x36600000);
1984 emitIMMD(0x14, 19, insn->src(1));
1985 break;
1986 default:
1987 assert(!"bad src1 file");
1988 break;
1989 }
1990
1991 if (insn->op != OP_SET) {
1992 switch (insn->op) {
1993 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1994 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1995 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1996 default:
1997 assert(!"invalid set op");
1998 break;
1999 }
2000 emitPRED(0x27, insn->src(2));
2001 } else {
2002 emitPRED(0x27);
2003 }
2004
2005 emitCond3(0x31, insn->setCond);
2006 emitField(0x30, 1, isSignedType(insn->sType));
2007 emitX (0x2b);
2008 emitGPR (0x08, insn->src(0));
2009 emitPRED (0x03, insn->def(0));
2010 if (insn->defExists(1))
2011 emitPRED(0x00, insn->def(1));
2012 else
2013 emitPRED(0x00);
2014 }
2015
2016 void
2017 CodeEmitterGM107::emitSHL()
2018 {
2019 switch (insn->src(1).getFile()) {
2020 case FILE_GPR:
2021 emitInsn(0x5c480000);
2022 emitGPR (0x14, insn->src(1));
2023 break;
2024 case FILE_MEMORY_CONST:
2025 emitInsn(0x4c480000);
2026 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2027 break;
2028 case FILE_IMMEDIATE:
2029 emitInsn(0x38480000);
2030 emitIMMD(0x14, 19, insn->src(1));
2031 break;
2032 default:
2033 assert(!"bad src1 file");
2034 break;
2035 }
2036
2037 emitCC (0x2f);
2038 emitX (0x2b);
2039 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2040 emitGPR (0x08, insn->src(0));
2041 emitGPR (0x00, insn->def(0));
2042 }
2043
2044 void
2045 CodeEmitterGM107::emitSHR()
2046 {
2047 switch (insn->src(1).getFile()) {
2048 case FILE_GPR:
2049 emitInsn(0x5c280000);
2050 emitGPR (0x14, insn->src(1));
2051 break;
2052 case FILE_MEMORY_CONST:
2053 emitInsn(0x4c280000);
2054 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2055 break;
2056 case FILE_IMMEDIATE:
2057 emitInsn(0x38280000);
2058 emitIMMD(0x14, 19, insn->src(1));
2059 break;
2060 default:
2061 assert(!"bad src1 file");
2062 break;
2063 }
2064
2065 emitField(0x30, 1, isSignedType(insn->dType));
2066 emitCC (0x2f);
2067 emitX (0x2c);
2068 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2069 emitGPR (0x08, insn->src(0));
2070 emitGPR (0x00, insn->def(0));
2071 }
2072
2073 void
2074 CodeEmitterGM107::emitPOPC()
2075 {
2076 switch (insn->src(0).getFile()) {
2077 case FILE_GPR:
2078 emitInsn(0x5c080000);
2079 emitGPR (0x14, insn->src(0));
2080 break;
2081 case FILE_MEMORY_CONST:
2082 emitInsn(0x4c080000);
2083 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2084 break;
2085 case FILE_IMMEDIATE:
2086 emitInsn(0x38080000);
2087 emitIMMD(0x14, 19, insn->src(0));
2088 break;
2089 default:
2090 assert(!"bad src1 file");
2091 break;
2092 }
2093
2094 emitINV(0x28, insn->src(0));
2095 emitGPR(0x00, insn->def(0));
2096 }
2097
2098 void
2099 CodeEmitterGM107::emitBFI()
2100 {
2101 switch(insn->src(2).getFile()) {
2102 case FILE_GPR:
2103 switch (insn->src(1).getFile()) {
2104 case FILE_GPR:
2105 emitInsn(0x5bf00000);
2106 emitGPR (0x14, insn->src(1));
2107 break;
2108 case FILE_MEMORY_CONST:
2109 emitInsn(0x4bf00000);
2110 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2111 break;
2112 case FILE_IMMEDIATE:
2113 emitInsn(0x36f00000);
2114 emitIMMD(0x14, 19, insn->src(1));
2115 break;
2116 default:
2117 assert(!"bad src1 file");
2118 break;
2119 }
2120 emitGPR (0x27, insn->src(2));
2121 break;
2122 case FILE_MEMORY_CONST:
2123 emitInsn(0x53f00000);
2124 emitGPR (0x27, insn->src(1));
2125 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
2126 break;
2127 default:
2128 assert(!"bad src2 file");
2129 break;
2130 }
2131
2132 emitCC (0x2f);
2133 emitGPR (0x08, insn->src(0));
2134 emitGPR (0x00, insn->def(0));
2135 }
2136
2137 void
2138 CodeEmitterGM107::emitBFE()
2139 {
2140 switch (insn->src(1).getFile()) {
2141 case FILE_GPR:
2142 emitInsn(0x5c000000);
2143 emitGPR (0x14, insn->src(1));
2144 break;
2145 case FILE_MEMORY_CONST:
2146 emitInsn(0x4c000000);
2147 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2148 break;
2149 case FILE_IMMEDIATE:
2150 emitInsn(0x38000000);
2151 emitIMMD(0x14, 19, insn->src(1));
2152 break;
2153 default:
2154 assert(!"bad src1 file");
2155 break;
2156 }
2157
2158 emitField(0x30, 1, isSignedType(insn->dType));
2159 emitCC (0x2f);
2160 emitField(0x28, 1, insn->subOp == NV50_IR_SUBOP_EXTBF_REV);
2161 emitGPR (0x08, insn->src(0));
2162 emitGPR (0x00, insn->def(0));
2163 }
2164
2165 void
2166 CodeEmitterGM107::emitFLO()
2167 {
2168 switch (insn->src(0).getFile()) {
2169 case FILE_GPR:
2170 emitInsn(0x5c300000);
2171 emitGPR (0x14, insn->src(0));
2172 break;
2173 case FILE_MEMORY_CONST:
2174 emitInsn(0x4c300000);
2175 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2176 break;
2177 case FILE_IMMEDIATE:
2178 emitInsn(0x38300000);
2179 emitIMMD(0x14, 19, insn->src(0));
2180 break;
2181 default:
2182 assert(!"bad src1 file");
2183 break;
2184 }
2185
2186 emitField(0x30, 1, isSignedType(insn->dType));
2187 emitCC (0x2f);
2188 emitField(0x29, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT);
2189 emitINV (0x28, insn->src(0));
2190 emitGPR (0x00, insn->def(0));
2191 }
2192
2193 /*******************************************************************************
2194 * memory
2195 ******************************************************************************/
2196
2197 void
2198 CodeEmitterGM107::emitLDSTs(int pos, DataType type)
2199 {
2200 int data = 0;
2201
2202 switch (typeSizeof(type)) {
2203 case 1: data = isSignedType(type) ? 1 : 0; break;
2204 case 2: data = isSignedType(type) ? 3 : 2; break;
2205 case 4: data = 4; break;
2206 case 8: data = 5; break;
2207 case 16: data = 6; break;
2208 default:
2209 assert(!"bad type");
2210 break;
2211 }
2212
2213 emitField(pos, 3, data);
2214 }
2215
2216 void
2217 CodeEmitterGM107::emitLDSTc(int pos)
2218 {
2219 int mode = 0;
2220
2221 switch (insn->cache) {
2222 case CACHE_CA: mode = 0; break;
2223 case CACHE_CG: mode = 1; break;
2224 case CACHE_CS: mode = 2; break;
2225 case CACHE_CV: mode = 3; break;
2226 default:
2227 assert(!"invalid caching mode");
2228 break;
2229 }
2230
2231 emitField(pos, 2, mode);
2232 }
2233
2234 void
2235 CodeEmitterGM107::emitLDC()
2236 {
2237 emitInsn (0xef900000);
2238 emitLDSTs(0x30, insn->dType);
2239 emitField(0x2c, 2, insn->subOp);
2240 emitCBUF (0x24, 0x08, 0x14, 16, 0, insn->src(0));
2241 emitGPR (0x00, insn->def(0));
2242 }
2243
2244 void
2245 CodeEmitterGM107::emitLDL()
2246 {
2247 emitInsn (0xef400000);
2248 emitLDSTs(0x30, insn->dType);
2249 emitLDSTc(0x2c);
2250 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2251 emitGPR (0x00, insn->def(0));
2252 }
2253
2254 void
2255 CodeEmitterGM107::emitLDS()
2256 {
2257 emitInsn (0xef480000);
2258 emitLDSTs(0x30, insn->dType);
2259 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2260 emitGPR (0x00, insn->def(0));
2261 }
2262
2263 void
2264 CodeEmitterGM107::emitLD()
2265 {
2266 emitInsn (0x80000000);
2267 emitPRED (0x3a);
2268 emitLDSTc(0x38);
2269 emitLDSTs(0x35, insn->dType);
2270 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2271 emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2272 emitGPR (0x00, insn->def(0));
2273 }
2274
2275 void
2276 CodeEmitterGM107::emitSTL()
2277 {
2278 emitInsn (0xef500000);
2279 emitLDSTs(0x30, insn->dType);
2280 emitLDSTc(0x2c);
2281 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2282 emitGPR (0x00, insn->src(1));
2283 }
2284
2285 void
2286 CodeEmitterGM107::emitSTS()
2287 {
2288 emitInsn (0xef580000);
2289 emitLDSTs(0x30, insn->dType);
2290 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2291 emitGPR (0x00, insn->src(1));
2292 }
2293
2294 void
2295 CodeEmitterGM107::emitST()
2296 {
2297 emitInsn (0xa0000000);
2298 emitPRED (0x3a);
2299 emitLDSTc(0x38);
2300 emitLDSTs(0x35, insn->dType);
2301 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2302 emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2303 emitGPR (0x00, insn->src(1));
2304 }
2305
2306 void
2307 CodeEmitterGM107::emitALD()
2308 {
2309 emitInsn (0xefd80000);
2310 emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2311 emitGPR (0x27, insn->src(0).getIndirect(1));
2312 emitO (0x20);
2313 emitP (0x1f);
2314 emitADDR (0x08, 20, 10, 0, insn->src(0));
2315 emitGPR (0x00, insn->def(0));
2316 }
2317
2318 void
2319 CodeEmitterGM107::emitAST()
2320 {
2321 emitInsn (0xeff00000);
2322 emitField(0x2f, 2, (typeSizeof(insn->dType) / 4) - 1);
2323 emitGPR (0x27, insn->src(0).getIndirect(1));
2324 emitP (0x1f);
2325 emitADDR (0x08, 20, 10, 0, insn->src(0));
2326 emitGPR (0x00, insn->src(1));
2327 }
2328
2329 void
2330 CodeEmitterGM107::emitISBERD()
2331 {
2332 emitInsn(0xefd00000);
2333 emitGPR (0x08, insn->src(0));
2334 emitGPR (0x00, insn->def(0));
2335 }
2336
2337 void
2338 CodeEmitterGM107::emitAL2P()
2339 {
2340 emitInsn (0xefa00000);
2341 emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2342 emitPRED (0x2c);
2343 emitO (0x20);
2344 emitField(0x14, 11, insn->src(0).get()->reg.data.offset);
2345 emitGPR (0x08, insn->src(0).getIndirect(0));
2346 emitGPR (0x00, insn->def(0));
2347 }
2348
2349 static void
2350 interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
2351 {
2352 int ipa = entry->ipa;
2353 int reg = entry->reg;
2354 int loc = entry->loc;
2355
2356 if (data.flatshade &&
2357 (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
2358 ipa = NV50_IR_INTERP_FLAT;
2359 reg = 0xff;
2360 } else if (data.force_persample_interp &&
2361 (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
2362 (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
2363 ipa |= NV50_IR_INTERP_CENTROID;
2364 }
2365 code[loc + 1] &= ~(0xf << 0x14);
2366 code[loc + 1] |= (ipa & 0x3) << 0x16;
2367 code[loc + 1] |= (ipa & 0xc) << (0x14 - 2);
2368 code[loc + 0] &= ~(0xff << 0x14);
2369 code[loc + 0] |= reg << 0x14;
2370 }
2371
2372 void
2373 CodeEmitterGM107::emitIPA()
2374 {
2375 int ipam = 0, ipas = 0;
2376
2377 switch (insn->getInterpMode()) {
2378 case NV50_IR_INTERP_LINEAR : ipam = 0; break;
2379 case NV50_IR_INTERP_PERSPECTIVE: ipam = 1; break;
2380 case NV50_IR_INTERP_FLAT : ipam = 2; break;
2381 case NV50_IR_INTERP_SC : ipam = 3; break;
2382 default:
2383 assert(!"invalid ipa mode");
2384 break;
2385 }
2386
2387 switch (insn->getSampleMode()) {
2388 case NV50_IR_INTERP_DEFAULT : ipas = 0; break;
2389 case NV50_IR_INTERP_CENTROID: ipas = 1; break;
2390 case NV50_IR_INTERP_OFFSET : ipas = 2; break;
2391 default:
2392 assert(!"invalid ipa sample mode");
2393 break;
2394 }
2395
2396 emitInsn (0xe0000000);
2397 emitField(0x36, 2, ipam);
2398 emitField(0x34, 2, ipas);
2399 emitSAT (0x33);
2400 emitField(0x2f, 3, 7);
2401 emitADDR (0x08, 0x1c, 10, 0, insn->src(0));
2402 if ((code[0] & 0x0000ff00) != 0x0000ff00)
2403 code[1] |= 0x00000040; /* .idx */
2404 emitGPR(0x00, insn->def(0));
2405
2406 if (insn->op == OP_PINTERP) {
2407 emitGPR(0x14, insn->src(1));
2408 if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2409 emitGPR(0x27, insn->src(2));
2410 addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, interpApply);
2411 } else {
2412 if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2413 emitGPR(0x27, insn->src(1));
2414 emitGPR(0x14);
2415 addInterp(insn->ipa, 0xff, interpApply);
2416 }
2417
2418 if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET)
2419 emitGPR(0x27);
2420 }
2421
2422 void
2423 CodeEmitterGM107::emitATOM()
2424 {
2425 unsigned dType, subOp;
2426
2427 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2428 switch (insn->dType) {
2429 case TYPE_U32: dType = 0; break;
2430 case TYPE_U64: dType = 1; break;
2431 default: assert(!"unexpected dType"); dType = 0; break;
2432 }
2433 subOp = 15;
2434
2435 emitInsn (0xee000000);
2436 } else {
2437 switch (insn->dType) {
2438 case TYPE_U32: dType = 0; break;
2439 case TYPE_S32: dType = 1; break;
2440 case TYPE_U64: dType = 2; break;
2441 case TYPE_F32: dType = 3; break;
2442 case TYPE_B128: dType = 4; break;
2443 case TYPE_S64: dType = 5; break;
2444 default: assert(!"unexpected dType"); dType = 0; break;
2445 }
2446 if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2447 subOp = 8;
2448 else
2449 subOp = insn->subOp;
2450
2451 emitInsn (0xed000000);
2452 }
2453
2454 emitField(0x34, 4, subOp);
2455 emitField(0x31, 3, dType);
2456 emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2457 emitGPR (0x14, insn->src(1));
2458 emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2459 emitGPR (0x00, insn->def(0));
2460 }
2461
2462 void
2463 CodeEmitterGM107::emitATOMS()
2464 {
2465 unsigned dType, subOp;
2466
2467 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2468 switch (insn->dType) {
2469 case TYPE_U32: dType = 0; break;
2470 case TYPE_U64: dType = 1; break;
2471 default: assert(!"unexpected dType"); dType = 0; break;
2472 }
2473 subOp = 4;
2474
2475 emitInsn (0xee000000);
2476 emitField(0x34, 1, dType);
2477 } else {
2478 switch (insn->dType) {
2479 case TYPE_U32: dType = 0; break;
2480 case TYPE_S32: dType = 1; break;
2481 case TYPE_U64: dType = 2; break;
2482 case TYPE_S64: dType = 3; break;
2483 default: assert(!"unexpected dType"); dType = 0; break;
2484 }
2485
2486 if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2487 subOp = 8;
2488 else
2489 subOp = insn->subOp;
2490
2491 emitInsn (0xec000000);
2492 emitField(0x1c, 3, dType);
2493 }
2494
2495 emitField(0x34, 4, subOp);
2496 emitGPR (0x14, insn->src(1));
2497 emitADDR (0x08, 0x12, 22, 0, insn->src(0));
2498 emitGPR (0x00, insn->def(0));
2499 }
2500
2501 void
2502 CodeEmitterGM107::emitRED()
2503 {
2504 unsigned dType;
2505
2506 switch (insn->dType) {
2507 case TYPE_U32: dType = 0; break;
2508 case TYPE_S32: dType = 1; break;
2509 case TYPE_U64: dType = 2; break;
2510 case TYPE_F32: dType = 3; break;
2511 case TYPE_B128: dType = 4; break;
2512 case TYPE_S64: dType = 5; break;
2513 default: assert(!"unexpected dType"); dType = 0; break;
2514 }
2515
2516 emitInsn (0xebf80000);
2517 emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2518 emitField(0x17, 3, insn->subOp);
2519 emitField(0x14, 3, dType);
2520 emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2521 emitGPR (0x00, insn->src(1));
2522 }
2523
2524 void
2525 CodeEmitterGM107::emitCCTL()
2526 {
2527 unsigned width;
2528 if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2529 emitInsn(0xef600000);
2530 width = 30;
2531 } else {
2532 emitInsn(0xef800000);
2533 width = 22;
2534 }
2535 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2536 emitADDR (0x08, 0x16, width, 2, insn->src(0));
2537 emitField(0x00, 4, insn->subOp);
2538 }
2539
2540 /*******************************************************************************
2541 * surface
2542 ******************************************************************************/
2543
2544 void
2545 CodeEmitterGM107::emitPIXLD()
2546 {
2547 emitInsn (0xefe80000);
2548 emitPRED (0x2d);
2549 emitField(0x1f, 3, insn->subOp);
2550 emitGPR (0x08, insn->src(0));
2551 emitGPR (0x00, insn->def(0));
2552 }
2553
2554 /*******************************************************************************
2555 * texture
2556 ******************************************************************************/
2557
2558 void
2559 CodeEmitterGM107::emitTEXs(int pos)
2560 {
2561 int src1 = insn->predSrc == 1 ? 2 : 1;
2562 if (insn->srcExists(src1))
2563 emitGPR(pos, insn->src(src1));
2564 else
2565 emitGPR(pos);
2566 }
2567
2568 void
2569 CodeEmitterGM107::emitTEX()
2570 {
2571 const TexInstruction *insn = this->insn->asTex();
2572 int lodm = 0;
2573
2574 if (!insn->tex.levelZero) {
2575 switch (insn->op) {
2576 case OP_TEX: lodm = 0; break;
2577 case OP_TXB: lodm = 2; break;
2578 case OP_TXL: lodm = 3; break;
2579 default:
2580 assert(!"invalid tex op");
2581 break;
2582 }
2583 } else {
2584 lodm = 1;
2585 }
2586
2587 if (insn->tex.rIndirectSrc >= 0) {
2588 emitInsn (0xdeb80000);
2589 emitField(0x25, 2, lodm);
2590 emitField(0x24, 1, insn->tex.useOffsets == 1);
2591 } else {
2592 emitInsn (0xc0380000);
2593 emitField(0x37, 2, lodm);
2594 emitField(0x36, 1, insn->tex.useOffsets == 1);
2595 emitField(0x24, 13, insn->tex.r);
2596 }
2597
2598 emitField(0x32, 1, insn->tex.target.isShadow());
2599 emitField(0x31, 1, insn->tex.liveOnly);
2600 emitField(0x23, 1, insn->tex.derivAll);
2601 emitField(0x1f, 4, insn->tex.mask);
2602 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2603 insn->tex.target.getDim() - 1);
2604 emitField(0x1c, 1, insn->tex.target.isArray());
2605 emitTEXs (0x14);
2606 emitGPR (0x08, insn->src(0));
2607 emitGPR (0x00, insn->def(0));
2608 }
2609
2610 void
2611 CodeEmitterGM107::emitTLD()
2612 {
2613 const TexInstruction *insn = this->insn->asTex();
2614
2615 if (insn->tex.rIndirectSrc >= 0) {
2616 emitInsn (0xdd380000);
2617 } else {
2618 emitInsn (0xdc380000);
2619 emitField(0x24, 13, insn->tex.r);
2620 }
2621
2622 emitField(0x37, 1, insn->tex.levelZero == 0);
2623 emitField(0x32, 1, insn->tex.target.isMS());
2624 emitField(0x31, 1, insn->tex.liveOnly);
2625 emitField(0x23, 1, insn->tex.useOffsets == 1);
2626 emitField(0x1f, 4, insn->tex.mask);
2627 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2628 insn->tex.target.getDim() - 1);
2629 emitField(0x1c, 1, insn->tex.target.isArray());
2630 emitTEXs (0x14);
2631 emitGPR (0x08, insn->src(0));
2632 emitGPR (0x00, insn->def(0));
2633 }
2634
2635 void
2636 CodeEmitterGM107::emitTLD4()
2637 {
2638 const TexInstruction *insn = this->insn->asTex();
2639
2640 if (insn->tex.rIndirectSrc >= 0) {
2641 emitInsn (0xdef80000);
2642 emitField(0x26, 2, insn->tex.gatherComp);
2643 emitField(0x25, 2, insn->tex.useOffsets == 4);
2644 emitField(0x24, 2, insn->tex.useOffsets == 1);
2645 } else {
2646 emitInsn (0xc8380000);
2647 emitField(0x38, 2, insn->tex.gatherComp);
2648 emitField(0x37, 2, insn->tex.useOffsets == 4);
2649 emitField(0x36, 2, insn->tex.useOffsets == 1);
2650 emitField(0x24, 13, insn->tex.r);
2651 }
2652
2653 emitField(0x32, 1, insn->tex.target.isShadow());
2654 emitField(0x31, 1, insn->tex.liveOnly);
2655 emitField(0x23, 1, insn->tex.derivAll);
2656 emitField(0x1f, 4, insn->tex.mask);
2657 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2658 insn->tex.target.getDim() - 1);
2659 emitField(0x1c, 1, insn->tex.target.isArray());
2660 emitTEXs (0x14);
2661 emitGPR (0x08, insn->src(0));
2662 emitGPR (0x00, insn->def(0));
2663 }
2664
2665 void
2666 CodeEmitterGM107::emitTXD()
2667 {
2668 const TexInstruction *insn = this->insn->asTex();
2669
2670 if (insn->tex.rIndirectSrc >= 0) {
2671 emitInsn (0xde780000);
2672 } else {
2673 emitInsn (0xde380000);
2674 emitField(0x24, 13, insn->tex.r);
2675 }
2676
2677 emitField(0x31, 1, insn->tex.liveOnly);
2678 emitField(0x23, 1, insn->tex.useOffsets == 1);
2679 emitField(0x1f, 4, insn->tex.mask);
2680 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2681 insn->tex.target.getDim() - 1);
2682 emitField(0x1c, 1, insn->tex.target.isArray());
2683 emitTEXs (0x14);
2684 emitGPR (0x08, insn->src(0));
2685 emitGPR (0x00, insn->def(0));
2686 }
2687
2688 void
2689 CodeEmitterGM107::emitTMML()
2690 {
2691 const TexInstruction *insn = this->insn->asTex();
2692
2693 if (insn->tex.rIndirectSrc >= 0) {
2694 emitInsn (0xdf600000);
2695 } else {
2696 emitInsn (0xdf580000);
2697 emitField(0x24, 13, insn->tex.r);
2698 }
2699
2700 emitField(0x31, 1, insn->tex.liveOnly);
2701 emitField(0x23, 1, insn->tex.derivAll);
2702 emitField(0x1f, 4, insn->tex.mask);
2703 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2704 insn->tex.target.getDim() - 1);
2705 emitField(0x1c, 1, insn->tex.target.isArray());
2706 emitTEXs (0x14);
2707 emitGPR (0x08, insn->src(0));
2708 emitGPR (0x00, insn->def(0));
2709 }
2710
2711 void
2712 CodeEmitterGM107::emitTXQ()
2713 {
2714 const TexInstruction *insn = this->insn->asTex();
2715 int type = 0;
2716
2717 switch (insn->tex.query) {
2718 case TXQ_DIMS : type = 0x01; break;
2719 case TXQ_TYPE : type = 0x02; break;
2720 case TXQ_SAMPLE_POSITION: type = 0x05; break;
2721 case TXQ_FILTER : type = 0x10; break;
2722 case TXQ_LOD : type = 0x12; break;
2723 case TXQ_WRAP : type = 0x14; break;
2724 case TXQ_BORDER_COLOUR : type = 0x16; break;
2725 default:
2726 assert(!"invalid txq query");
2727 break;
2728 }
2729
2730 if (insn->tex.rIndirectSrc >= 0) {
2731 emitInsn (0xdf500000);
2732 } else {
2733 emitInsn (0xdf480000);
2734 emitField(0x24, 13, insn->tex.r);
2735 }
2736
2737 emitField(0x31, 1, insn->tex.liveOnly);
2738 emitField(0x1f, 4, insn->tex.mask);
2739 emitField(0x16, 6, type);
2740 emitGPR (0x08, insn->src(0));
2741 emitGPR (0x00, insn->def(0));
2742 }
2743
2744 void
2745 CodeEmitterGM107::emitDEPBAR()
2746 {
2747 emitInsn (0xf0f00000);
2748 emitField(0x1d, 1, 1); /* le */
2749 emitField(0x1a, 3, 5);
2750 emitField(0x14, 6, insn->subOp);
2751 emitField(0x00, 6, insn->subOp);
2752 }
2753
2754 /*******************************************************************************
2755 * misc
2756 ******************************************************************************/
2757
2758 void
2759 CodeEmitterGM107::emitNOP()
2760 {
2761 emitInsn(0x50b00000);
2762 }
2763
2764 void
2765 CodeEmitterGM107::emitKIL()
2766 {
2767 emitInsn (0xe3300000);
2768 emitCond5(0x00, CC_TR);
2769 }
2770
2771 void
2772 CodeEmitterGM107::emitOUT()
2773 {
2774 const int cut = insn->op == OP_RESTART || insn->subOp;
2775 const int emit = insn->op == OP_EMIT;
2776
2777 switch (insn->src(1).getFile()) {
2778 case FILE_GPR:
2779 emitInsn(0xfbe00000);
2780 emitGPR (0x14, insn->src(1));
2781 break;
2782 case FILE_IMMEDIATE:
2783 emitInsn(0xf6e00000);
2784 emitIMMD(0x14, 19, insn->src(1));
2785 break;
2786 case FILE_MEMORY_CONST:
2787 emitInsn(0xebe00000);
2788 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2789 break;
2790 default:
2791 assert(!"bad src1 file");
2792 break;
2793 }
2794
2795 emitField(0x27, 2, (cut << 1) | emit);
2796 emitGPR (0x08, insn->src(0));
2797 emitGPR (0x00, insn->def(0));
2798 }
2799
2800 void
2801 CodeEmitterGM107::emitBAR()
2802 {
2803 uint8_t subop;
2804
2805 emitInsn (0xf0a80000);
2806
2807 switch (insn->subOp) {
2808 case NV50_IR_SUBOP_BAR_RED_POPC: subop = 0x02; break;
2809 case NV50_IR_SUBOP_BAR_RED_AND: subop = 0x0a; break;
2810 case NV50_IR_SUBOP_BAR_RED_OR: subop = 0x12; break;
2811 case NV50_IR_SUBOP_BAR_ARRIVE: subop = 0x81; break;
2812 default:
2813 subop = 0x80;
2814 assert(insn->subOp == NV50_IR_SUBOP_BAR_SYNC);
2815 break;
2816 }
2817
2818 emitField(0x20, 8, subop);
2819
2820 // barrier id
2821 if (insn->src(0).getFile() == FILE_GPR) {
2822 emitGPR(0x08, insn->src(0));
2823 } else {
2824 ImmediateValue *imm = insn->getSrc(0)->asImm();
2825 assert(imm);
2826 emitField(0x08, 8, imm->reg.data.u32);
2827 emitField(0x2b, 1, 1);
2828 }
2829
2830 // thread count
2831 if (insn->src(1).getFile() == FILE_GPR) {
2832 emitGPR(0x14, insn->src(1));
2833 } else {
2834 ImmediateValue *imm = insn->getSrc(0)->asImm();
2835 assert(imm);
2836 emitField(0x14, 12, imm->reg.data.u32);
2837 emitField(0x2c, 1, 1);
2838 }
2839
2840 if (insn->srcExists(2) && (insn->predSrc != 2)) {
2841 emitPRED (0x27, insn->src(2));
2842 emitField(0x2a, 1, insn->src(2).mod == Modifier(NV50_IR_MOD_NOT));
2843 } else {
2844 emitField(0x27, 3, 7);
2845 }
2846 }
2847
2848 void
2849 CodeEmitterGM107::emitMEMBAR()
2850 {
2851 emitInsn (0xef980000);
2852 emitField(0x08, 2, insn->subOp >> 2);
2853 }
2854
2855 void
2856 CodeEmitterGM107::emitVOTE()
2857 {
2858 assert(insn->src(0).getFile() == FILE_PREDICATE);
2859
2860 int r = -1, p = -1;
2861 for (int i = 0; insn->defExists(i); i++) {
2862 if (insn->def(i).getFile() == FILE_GPR)
2863 r = i;
2864 else if (insn->def(i).getFile() == FILE_PREDICATE)
2865 p = i;
2866 }
2867
2868 emitInsn (0x50d80000);
2869 emitField(0x30, 2, insn->subOp);
2870 if (r >= 0)
2871 emitGPR (0x00, insn->def(r));
2872 else
2873 emitGPR (0x00);
2874 if (p >= 0)
2875 emitPRED (0x2d, insn->def(p));
2876 else
2877 emitPRED (0x2d);
2878 emitField(0x2a, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT));
2879 emitPRED (0x27, insn->src(0));
2880 }
2881
2882 void
2883 CodeEmitterGM107::emitSUTarget()
2884 {
2885 const TexInstruction *insn = this->insn->asTex();
2886 int target = 0;
2887
2888 assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
2889
2890 if (insn->tex.target == TEX_TARGET_BUFFER) {
2891 target = 2;
2892 } else if (insn->tex.target == TEX_TARGET_1D_ARRAY) {
2893 target = 4;
2894 } else if (insn->tex.target == TEX_TARGET_2D ||
2895 insn->tex.target == TEX_TARGET_RECT) {
2896 target = 6;
2897 } else if (insn->tex.target == TEX_TARGET_2D_ARRAY ||
2898 insn->tex.target == TEX_TARGET_CUBE ||
2899 insn->tex.target == TEX_TARGET_CUBE_ARRAY) {
2900 target = 8;
2901 } else if (insn->tex.target == TEX_TARGET_3D) {
2902 target = 10;
2903 } else {
2904 assert(insn->tex.target == TEX_TARGET_1D);
2905 }
2906 emitField(0x20, 4, target);
2907 }
2908
2909 void
2910 CodeEmitterGM107::emitSUHandle(const int s)
2911 {
2912 const TexInstruction *insn = this->insn->asTex();
2913
2914 assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
2915
2916 if (insn->src(s).getFile() == FILE_GPR) {
2917 emitGPR(0x27, insn->src(s));
2918 } else {
2919 ImmediateValue *imm = insn->getSrc(s)->asImm();
2920 assert(imm);
2921 emitField(0x33, 1, 1);
2922 emitField(0x24, 13, imm->reg.data.u32);
2923 }
2924 }
2925
2926 void
2927 CodeEmitterGM107::emitSUSTx()
2928 {
2929 const TexInstruction *insn = this->insn->asTex();
2930
2931 emitInsn(0xeb200000);
2932 if (insn->op == OP_SUSTB)
2933 emitField(0x34, 1, 1);
2934 emitSUTarget();
2935
2936 emitLDSTc(0x18);
2937 emitField(0x14, 4, 0xf); // rgba
2938 emitGPR (0x08, insn->src(0));
2939 emitGPR (0x00, insn->src(1));
2940
2941 emitSUHandle(2);
2942 }
2943
2944 void
2945 CodeEmitterGM107::emitSULDx()
2946 {
2947 const TexInstruction *insn = this->insn->asTex();
2948 int type = 0;
2949
2950 emitInsn(0xeb000000);
2951 if (insn->op == OP_SULDB)
2952 emitField(0x34, 1, 1);
2953 emitSUTarget();
2954
2955 switch (insn->dType) {
2956 case TYPE_S8: type = 1; break;
2957 case TYPE_U16: type = 2; break;
2958 case TYPE_S16: type = 3; break;
2959 case TYPE_U32: type = 4; break;
2960 case TYPE_U64: type = 5; break;
2961 case TYPE_B128: type = 6; break;
2962 default:
2963 assert(insn->dType == TYPE_U8);
2964 break;
2965 }
2966 emitLDSTc(0x18);
2967 emitField(0x14, 3, type);
2968 emitGPR (0x00, insn->def(0));
2969 emitGPR (0x08, insn->src(0));
2970
2971 emitSUHandle(1);
2972 }
2973
2974 void
2975 CodeEmitterGM107::emitSUREDx()
2976 {
2977 const TexInstruction *insn = this->insn->asTex();
2978 uint8_t type = 0, subOp;
2979
2980 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS)
2981 emitInsn(0xeac00000);
2982 else
2983 emitInsn(0xea600000);
2984
2985 if (insn->op == OP_SUREDB)
2986 emitField(0x34, 1, 1);
2987 emitSUTarget();
2988
2989 // destination type
2990 switch (insn->dType) {
2991 case TYPE_S32: type = 1; break;
2992 case TYPE_U64: type = 2; break;
2993 case TYPE_F32: type = 3; break;
2994 case TYPE_S64: type = 5; break;
2995 default:
2996 assert(insn->dType == TYPE_U32);
2997 break;
2998 }
2999
3000 // atomic operation
3001 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
3002 subOp = 0;
3003 } else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
3004 subOp = 8;
3005 } else {
3006 subOp = insn->subOp;
3007 }
3008
3009 emitField(0x24, 3, type);
3010 emitField(0x1d, 4, subOp);
3011 emitGPR (0x14, insn->src(1));
3012 emitGPR (0x08, insn->src(0));
3013 emitGPR (0x00, insn->def(0));
3014
3015 emitSUHandle(2);
3016 }
3017
3018 /*******************************************************************************
3019 * assembler front-end
3020 ******************************************************************************/
3021
3022 bool
3023 CodeEmitterGM107::emitInstruction(Instruction *i)
3024 {
3025 const unsigned int size = (writeIssueDelays && !(codeSize & 0x1f)) ? 16 : 8;
3026 bool ret = true;
3027
3028 insn = i;
3029
3030 if (insn->encSize != 8) {
3031 ERROR("skipping undecodable instruction: "); insn->print();
3032 return false;
3033 } else
3034 if (codeSize + size > codeSizeLimit) {
3035 ERROR("code emitter output buffer too small\n");
3036 return false;
3037 }
3038
3039 if (writeIssueDelays) {
3040 int n = ((codeSize & 0x1f) / 8) - 1;
3041 if (n < 0) {
3042 data = code;
3043 data[0] = 0x00000000;
3044 data[1] = 0x00000000;
3045 code += 2;
3046 codeSize += 8;
3047 n++;
3048 }
3049
3050 emitField(data, n * 21, 21, insn->sched);
3051 }
3052
3053 switch (insn->op) {
3054 case OP_EXIT:
3055 emitEXIT();
3056 break;
3057 case OP_BRA:
3058 emitBRA();
3059 break;
3060 case OP_CALL:
3061 emitCAL();
3062 break;
3063 case OP_PRECONT:
3064 emitPCNT();
3065 break;
3066 case OP_CONT:
3067 emitCONT();
3068 break;
3069 case OP_PREBREAK:
3070 emitPBK();
3071 break;
3072 case OP_BREAK:
3073 emitBRK();
3074 break;
3075 case OP_PRERET:
3076 emitPRET();
3077 break;
3078 case OP_RET:
3079 emitRET();
3080 break;
3081 case OP_JOINAT:
3082 emitSSY();
3083 break;
3084 case OP_JOIN:
3085 emitSYNC();
3086 break;
3087 case OP_QUADON:
3088 emitSAM();
3089 break;
3090 case OP_QUADPOP:
3091 emitRAM();
3092 break;
3093 case OP_MOV:
3094 emitMOV();
3095 break;
3096 case OP_RDSV:
3097 emitS2R();
3098 break;
3099 case OP_ABS:
3100 case OP_NEG:
3101 case OP_SAT:
3102 case OP_FLOOR:
3103 case OP_CEIL:
3104 case OP_TRUNC:
3105 case OP_CVT:
3106 if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
3107 insn->src(0).getFile() == FILE_PREDICATE)) {
3108 emitMOV();
3109 } else if (isFloatType(insn->dType)) {
3110 if (isFloatType(insn->sType))
3111 emitF2F();
3112 else
3113 emitI2F();
3114 } else {
3115 if (isFloatType(insn->sType))
3116 emitF2I();
3117 else
3118 emitI2I();
3119 }
3120 break;
3121 case OP_SHFL:
3122 emitSHFL();
3123 break;
3124 case OP_ADD:
3125 case OP_SUB:
3126 if (isFloatType(insn->dType)) {
3127 if (insn->dType == TYPE_F64)
3128 emitDADD();
3129 else
3130 emitFADD();
3131 } else {
3132 emitIADD();
3133 }
3134 break;
3135 case OP_MUL:
3136 if (isFloatType(insn->dType)) {
3137 if (insn->dType == TYPE_F64)
3138 emitDMUL();
3139 else
3140 emitFMUL();
3141 } else {
3142 emitIMUL();
3143 }
3144 break;
3145 case OP_MAD:
3146 case OP_FMA:
3147 if (isFloatType(insn->dType)) {
3148 if (insn->dType == TYPE_F64)
3149 emitDFMA();
3150 else
3151 emitFFMA();
3152 } else {
3153 emitIMAD();
3154 }
3155 break;
3156 case OP_SHLADD:
3157 emitISCADD();
3158 break;
3159 case OP_MIN:
3160 case OP_MAX:
3161 if (isFloatType(insn->dType)) {
3162 if (insn->dType == TYPE_F64)
3163 emitDMNMX();
3164 else
3165 emitFMNMX();
3166 } else {
3167 emitIMNMX();
3168 }
3169 break;
3170 case OP_SHL:
3171 emitSHL();
3172 break;
3173 case OP_SHR:
3174 emitSHR();
3175 break;
3176 case OP_POPCNT:
3177 emitPOPC();
3178 break;
3179 case OP_INSBF:
3180 emitBFI();
3181 break;
3182 case OP_EXTBF:
3183 emitBFE();
3184 break;
3185 case OP_BFIND:
3186 emitFLO();
3187 break;
3188 case OP_SLCT:
3189 if (isFloatType(insn->dType))
3190 emitFCMP();
3191 else
3192 emitICMP();
3193 break;
3194 case OP_SET:
3195 case OP_SET_AND:
3196 case OP_SET_OR:
3197 case OP_SET_XOR:
3198 if (insn->def(0).getFile() != FILE_PREDICATE) {
3199 if (isFloatType(insn->sType))
3200 if (insn->sType == TYPE_F64)
3201 emitDSET();
3202 else
3203 emitFSET();
3204 else
3205 emitISET();
3206 } else {
3207 if (isFloatType(insn->sType))
3208 if (insn->sType == TYPE_F64)
3209 emitDSETP();
3210 else
3211 emitFSETP();
3212 else
3213 emitISETP();
3214 }
3215 break;
3216 case OP_SELP:
3217 emitSEL();
3218 break;
3219 case OP_PRESIN:
3220 case OP_PREEX2:
3221 emitRRO();
3222 break;
3223 case OP_COS:
3224 case OP_SIN:
3225 case OP_EX2:
3226 case OP_LG2:
3227 case OP_RCP:
3228 case OP_RSQ:
3229 emitMUFU();
3230 break;
3231 case OP_AND:
3232 case OP_OR:
3233 case OP_XOR:
3234 emitLOP();
3235 break;
3236 case OP_NOT:
3237 emitNOT();
3238 break;
3239 case OP_LOAD:
3240 switch (insn->src(0).getFile()) {
3241 case FILE_MEMORY_CONST : emitLDC(); break;
3242 case FILE_MEMORY_LOCAL : emitLDL(); break;
3243 case FILE_MEMORY_SHARED: emitLDS(); break;
3244 case FILE_MEMORY_GLOBAL: emitLD(); break;
3245 default:
3246 assert(!"invalid load");
3247 emitNOP();
3248 break;
3249 }
3250 break;
3251 case OP_STORE:
3252 switch (insn->src(0).getFile()) {
3253 case FILE_MEMORY_LOCAL : emitSTL(); break;
3254 case FILE_MEMORY_SHARED: emitSTS(); break;
3255 case FILE_MEMORY_GLOBAL: emitST(); break;
3256 default:
3257 assert(!"invalid store");
3258 emitNOP();
3259 break;
3260 }
3261 break;
3262 case OP_ATOM:
3263 if (insn->src(0).getFile() == FILE_MEMORY_SHARED)
3264 emitATOMS();
3265 else
3266 if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS)
3267 emitRED();
3268 else
3269 emitATOM();
3270 break;
3271 case OP_CCTL:
3272 emitCCTL();
3273 break;
3274 case OP_VFETCH:
3275 emitALD();
3276 break;
3277 case OP_EXPORT:
3278 emitAST();
3279 break;
3280 case OP_PFETCH:
3281 emitISBERD();
3282 break;
3283 case OP_AFETCH:
3284 emitAL2P();
3285 break;
3286 case OP_LINTERP:
3287 case OP_PINTERP:
3288 emitIPA();
3289 break;
3290 case OP_PIXLD:
3291 emitPIXLD();
3292 break;
3293 case OP_TEX:
3294 case OP_TXB:
3295 case OP_TXL:
3296 emitTEX();
3297 break;
3298 case OP_TXF:
3299 emitTLD();
3300 break;
3301 case OP_TXG:
3302 emitTLD4();
3303 break;
3304 case OP_TXD:
3305 emitTXD();
3306 break;
3307 case OP_TXQ:
3308 emitTXQ();
3309 break;
3310 case OP_TXLQ:
3311 emitTMML();
3312 break;
3313 case OP_TEXBAR:
3314 emitDEPBAR();
3315 break;
3316 case OP_QUADOP:
3317 emitFSWZADD();
3318 break;
3319 case OP_NOP:
3320 emitNOP();
3321 break;
3322 case OP_DISCARD:
3323 emitKIL();
3324 break;
3325 case OP_EMIT:
3326 case OP_RESTART:
3327 emitOUT();
3328 break;
3329 case OP_BAR:
3330 emitBAR();
3331 break;
3332 case OP_MEMBAR:
3333 emitMEMBAR();
3334 break;
3335 case OP_VOTE:
3336 emitVOTE();
3337 break;
3338 case OP_SUSTB:
3339 case OP_SUSTP:
3340 emitSUSTx();
3341 break;
3342 case OP_SULDB:
3343 case OP_SULDP:
3344 emitSULDx();
3345 break;
3346 case OP_SUREDB:
3347 case OP_SUREDP:
3348 emitSUREDx();
3349 break;
3350 default:
3351 assert(!"invalid opcode");
3352 emitNOP();
3353 ret = false;
3354 break;
3355 }
3356
3357 if (insn->join) {
3358 /*XXX*/
3359 }
3360
3361 code += 2;
3362 codeSize += 8;
3363 return ret;
3364 }
3365
3366 uint32_t
3367 CodeEmitterGM107::getMinEncodingSize(const Instruction *i) const
3368 {
3369 return 8;
3370 }
3371
3372 /*******************************************************************************
3373 * sched data calculator
3374 ******************************************************************************/
3375
3376 class SchedDataCalculatorGM107 : public Pass
3377 {
3378 public:
3379 SchedDataCalculatorGM107(const TargetGM107 *targ) : targ(targ) {}
3380
3381 private:
3382 struct RegScores
3383 {
3384 struct ScoreData {
3385 int r[256];
3386 int p[8];
3387 int c;
3388 } rd, wr;
3389 int base;
3390
3391 void rebase(const int base)
3392 {
3393 const int delta = this->base - base;
3394 if (!delta)
3395 return;
3396 this->base = 0;
3397
3398 for (int i = 0; i < 256; ++i) {
3399 rd.r[i] += delta;
3400 wr.r[i] += delta;
3401 }
3402 for (int i = 0; i < 8; ++i) {
3403 rd.p[i] += delta;
3404 wr.p[i] += delta;
3405 }
3406 rd.c += delta;
3407 wr.c += delta;
3408 }
3409 void wipe()
3410 {
3411 memset(&rd, 0, sizeof(rd));
3412 memset(&wr, 0, sizeof(wr));
3413 }
3414 int getLatest(const ScoreData& d) const
3415 {
3416 int max = 0;
3417 for (int i = 0; i < 256; ++i)
3418 if (d.r[i] > max)
3419 max = d.r[i];
3420 for (int i = 0; i < 8; ++i)
3421 if (d.p[i] > max)
3422 max = d.p[i];
3423 if (d.c > max)
3424 max = d.c;
3425 return max;
3426 }
3427 inline int getLatestRd() const
3428 {
3429 return getLatest(rd);
3430 }
3431 inline int getLatestWr() const
3432 {
3433 return getLatest(wr);
3434 }
3435 inline int getLatest() const
3436 {
3437 return MAX2(getLatestRd(), getLatestWr());
3438 }
3439 void setMax(const RegScores *that)
3440 {
3441 for (int i = 0; i < 256; ++i) {
3442 rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
3443 wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
3444 }
3445 for (int i = 0; i < 8; ++i) {
3446 rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
3447 wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
3448 }
3449 rd.c = MAX2(rd.c, that->rd.c);
3450 wr.c = MAX2(wr.c, that->wr.c);
3451 }
3452 void print(int cycle)
3453 {
3454 for (int i = 0; i < 256; ++i) {
3455 if (rd.r[i] > cycle)
3456 INFO("rd $r%i @ %i\n", i, rd.r[i]);
3457 if (wr.r[i] > cycle)
3458 INFO("wr $r%i @ %i\n", i, wr.r[i]);
3459 }
3460 for (int i = 0; i < 8; ++i) {
3461 if (rd.p[i] > cycle)
3462 INFO("rd $p%i @ %i\n", i, rd.p[i]);
3463 if (wr.p[i] > cycle)
3464 INFO("wr $p%i @ %i\n", i, wr.p[i]);
3465 }
3466 if (rd.c > cycle)
3467 INFO("rd $c @ %i\n", rd.c);
3468 if (wr.c > cycle)
3469 INFO("wr $c @ %i\n", wr.c);
3470 }
3471 };
3472
3473 RegScores *score; // for current BB
3474 std::vector<RegScores> scoreBoards;
3475
3476 const TargetGM107 *targ;
3477 bool visit(Function *);
3478 bool visit(BasicBlock *);
3479
3480 void commitInsn(const Instruction *, int);
3481 int calcDelay(const Instruction *, int) const;
3482 void setDelay(Instruction *, int, const Instruction *);
3483 void recordWr(const Value *, int, int);
3484 void checkRd(const Value *, int, int&) const;
3485
3486 inline void emitYield(Instruction *);
3487 inline void emitStall(Instruction *, uint8_t);
3488 inline void emitReuse(Instruction *, uint8_t);
3489 inline void emitWrDepBar(Instruction *, uint8_t);
3490 inline void emitRdDepBar(Instruction *, uint8_t);
3491 inline void emitWtDepBar(Instruction *, uint8_t);
3492
3493 inline int getStall(const Instruction *) const;
3494 inline int getWrDepBar(const Instruction *) const;
3495 inline int getRdDepBar(const Instruction *) const;
3496 inline int getWtDepBar(const Instruction *) const;
3497
3498 void setReuseFlag(Instruction *);
3499
3500 inline void printSchedInfo(int, const Instruction *) const;
3501
3502 struct LiveBarUse {
3503 LiveBarUse(Instruction *insn, Instruction *usei)
3504 : insn(insn), usei(usei) { }
3505 Instruction *insn;
3506 Instruction *usei;
3507 };
3508
3509 struct LiveBarDef {
3510 LiveBarDef(Instruction *insn, Instruction *defi)
3511 : insn(insn), defi(defi) { }
3512 Instruction *insn;
3513 Instruction *defi;
3514 };
3515
3516 bool insertBarriers(BasicBlock *);
3517
3518 Instruction *findFirstUse(const Instruction *) const;
3519 Instruction *findFirstDef(const Instruction *) const;
3520
3521 bool needRdDepBar(const Instruction *) const;
3522 bool needWrDepBar(const Instruction *) const;
3523 };
3524
3525 inline void
3526 SchedDataCalculatorGM107::emitStall(Instruction *insn, uint8_t cnt)
3527 {
3528 assert(cnt < 16);
3529 insn->sched |= cnt;
3530 }
3531
3532 inline void
3533 SchedDataCalculatorGM107::emitYield(Instruction *insn)
3534 {
3535 insn->sched |= 1 << 4;
3536 }
3537
3538 inline void
3539 SchedDataCalculatorGM107::emitWrDepBar(Instruction *insn, uint8_t id)
3540 {
3541 assert(id < 6);
3542 if ((insn->sched & 0xe0) == 0xe0)
3543 insn->sched ^= 0xe0;
3544 insn->sched |= id << 5;
3545 }
3546
3547 inline void
3548 SchedDataCalculatorGM107::emitRdDepBar(Instruction *insn, uint8_t id)
3549 {
3550 assert(id < 6);
3551 if ((insn->sched & 0x700) == 0x700)
3552 insn->sched ^= 0x700;
3553 insn->sched |= id << 8;
3554 }
3555
3556 inline void
3557 SchedDataCalculatorGM107::emitWtDepBar(Instruction *insn, uint8_t id)
3558 {
3559 assert(id < 6);
3560 insn->sched |= 1 << (11 + id);
3561 }
3562
3563 inline void
3564 SchedDataCalculatorGM107::emitReuse(Instruction *insn, uint8_t id)
3565 {
3566 assert(id < 4);
3567 insn->sched |= 1 << (17 + id);
3568 }
3569
3570 inline void
3571 SchedDataCalculatorGM107::printSchedInfo(int cycle,
3572 const Instruction *insn) const
3573 {
3574 uint8_t st, yl, wr, rd, wt, ru;
3575
3576 st = (insn->sched & 0x00000f) >> 0;
3577 yl = (insn->sched & 0x000010) >> 4;
3578 wr = (insn->sched & 0x0000e0) >> 5;
3579 rd = (insn->sched & 0x000700) >> 8;
3580 wt = (insn->sched & 0x01f800) >> 11;
3581 ru = (insn->sched & 0x1e0000) >> 17;
3582
3583 INFO("cycle %i, (st 0x%x, yl 0x%x, wr 0x%x, rd 0x%x, wt 0x%x, ru 0x%x)\n",
3584 cycle, st, yl, wr, rd, wt, ru);
3585 }
3586
3587 inline int
3588 SchedDataCalculatorGM107::getStall(const Instruction *insn) const
3589 {
3590 return insn->sched & 0xf;
3591 }
3592
3593 inline int
3594 SchedDataCalculatorGM107::getWrDepBar(const Instruction *insn) const
3595 {
3596 return (insn->sched & 0x0000e0) >> 5;
3597 }
3598
3599 inline int
3600 SchedDataCalculatorGM107::getRdDepBar(const Instruction *insn) const
3601 {
3602 return (insn->sched & 0x000700) >> 8;
3603 }
3604
3605 inline int
3606 SchedDataCalculatorGM107::getWtDepBar(const Instruction *insn) const
3607 {
3608 return (insn->sched & 0x01f800) >> 11;
3609 }
3610
3611 // Emit the reuse flag which allows to make use of the new memory hierarchy
3612 // introduced since Maxwell, the operand reuse cache.
3613 //
3614 // It allows to reduce bank conflicts by caching operands. Each time you issue
3615 // an instruction, that flag can tell the hw which operands are going to be
3616 // re-used by the next instruction. Note that the next instruction has to use
3617 // the same GPR id in the same operand slot.
3618 void
3619 SchedDataCalculatorGM107::setReuseFlag(Instruction *insn)
3620 {
3621 Instruction *next = insn->next;
3622 BitSet defs(255, 1);
3623
3624 if (!targ->isReuseSupported(insn))
3625 return;
3626
3627 for (int d = 0; insn->defExists(d); ++d) {
3628 const Value *def = insn->def(d).rep();
3629 if (insn->def(d).getFile() != FILE_GPR)
3630 continue;
3631 if (typeSizeof(insn->dType) != 4 || def->reg.data.id == 255)
3632 continue;
3633 defs.set(def->reg.data.id);
3634 }
3635
3636 for (int s = 0; insn->srcExists(s); s++) {
3637 const Value *src = insn->src(s).rep();
3638 if (insn->src(s).getFile() != FILE_GPR)
3639 continue;
3640 if (typeSizeof(insn->sType) != 4 || src->reg.data.id == 255)
3641 continue;
3642 if (defs.test(src->reg.data.id))
3643 continue;
3644 if (!next->srcExists(s) || next->src(s).getFile() != FILE_GPR)
3645 continue;
3646 if (src->reg.data.id != next->getSrc(s)->reg.data.id)
3647 continue;
3648 assert(s < 4);
3649 emitReuse(insn, s);
3650 }
3651 }
3652
3653 void
3654 SchedDataCalculatorGM107::recordWr(const Value *v, int cycle, int ready)
3655 {
3656 int a = v->reg.data.id, b;
3657
3658 switch (v->reg.file) {
3659 case FILE_GPR:
3660 b = a + v->reg.size / 4;
3661 for (int r = a; r < b; ++r)
3662 score->rd.r[r] = ready;
3663 break;
3664 case FILE_PREDICATE:
3665 // To immediately use a predicate set by any instructions, the minimum
3666 // number of stall counts is 13.
3667 score->rd.p[a] = cycle + 13;
3668 break;
3669 case FILE_FLAGS:
3670 score->rd.c = ready;
3671 break;
3672 default:
3673 break;
3674 }
3675 }
3676
3677 void
3678 SchedDataCalculatorGM107::checkRd(const Value *v, int cycle, int &delay) const
3679 {
3680 int a = v->reg.data.id, b;
3681 int ready = cycle;
3682
3683 switch (v->reg.file) {
3684 case FILE_GPR:
3685 b = a + v->reg.size / 4;
3686 for (int r = a; r < b; ++r)
3687 ready = MAX2(ready, score->rd.r[r]);
3688 break;
3689 case FILE_PREDICATE:
3690 ready = MAX2(ready, score->rd.p[a]);
3691 break;
3692 case FILE_FLAGS:
3693 ready = MAX2(ready, score->rd.c);
3694 break;
3695 default:
3696 break;
3697 }
3698 if (cycle < ready)
3699 delay = MAX2(delay, ready - cycle);
3700 }
3701
3702 void
3703 SchedDataCalculatorGM107::commitInsn(const Instruction *insn, int cycle)
3704 {
3705 const int ready = cycle + targ->getLatency(insn);
3706
3707 for (int d = 0; insn->defExists(d); ++d)
3708 recordWr(insn->getDef(d), cycle, ready);
3709
3710 #ifdef GM107_DEBUG_SCHED_DATA
3711 score->print(cycle);
3712 #endif
3713 }
3714
3715 #define GM107_MIN_ISSUE_DELAY 0x1
3716 #define GM107_MAX_ISSUE_DELAY 0xf
3717
3718 int
3719 SchedDataCalculatorGM107::calcDelay(const Instruction *insn, int cycle) const
3720 {
3721 int delay = 0, ready = cycle;
3722
3723 for (int s = 0; insn->srcExists(s); ++s)
3724 checkRd(insn->getSrc(s), cycle, delay);
3725
3726 // TODO: make use of getReadLatency()!
3727
3728 return MAX2(delay, ready - cycle);
3729 }
3730
3731 void
3732 SchedDataCalculatorGM107::setDelay(Instruction *insn, int delay,
3733 const Instruction *next)
3734 {
3735 const OpClass cl = targ->getOpClass(insn->op);
3736 int wr, rd;
3737
3738 if (insn->op == OP_EXIT ||
3739 insn->op == OP_BAR ||
3740 insn->op == OP_MEMBAR) {
3741 delay = GM107_MAX_ISSUE_DELAY;
3742 } else
3743 if (insn->op == OP_QUADON ||
3744 insn->op == OP_QUADPOP) {
3745 delay = 0xd;
3746 } else
3747 if (cl == OPCLASS_FLOW || insn->join) {
3748 delay = 0xd;
3749 }
3750
3751 if (!next || !targ->canDualIssue(insn, next)) {
3752 delay = CLAMP(delay, GM107_MIN_ISSUE_DELAY, GM107_MAX_ISSUE_DELAY);
3753 } else {
3754 delay = 0x0; // dual-issue
3755 }
3756
3757 wr = getWrDepBar(insn);
3758 rd = getRdDepBar(insn);
3759
3760 if (delay == GM107_MIN_ISSUE_DELAY && (wr & rd) != 7) {
3761 // Barriers take one additional clock cycle to become active on top of
3762 // the clock consumed by the instruction producing it.
3763 if (!next || insn->bb != next->bb) {
3764 delay = 0x2;
3765 } else {
3766 int wt = getWtDepBar(next);
3767 if ((wt & (1 << wr)) | (wt & (1 << rd)))
3768 delay = 0x2;
3769 }
3770 }
3771
3772 emitStall(insn, delay);
3773 }
3774
3775
3776 // Return true when the given instruction needs to emit a read dependency
3777 // barrier (for WaR hazards) because it doesn't operate at a fixed latency, and
3778 // setting the maximum number of stall counts is not enough.
3779 bool
3780 SchedDataCalculatorGM107::needRdDepBar(const Instruction *insn) const
3781 {
3782 BitSet srcs(255, 1), defs(255, 1);
3783 int a, b;
3784
3785 if (!targ->isBarrierRequired(insn))
3786 return false;
3787
3788 // Do not emit a read dependency barrier when the instruction doesn't use
3789 // any GPR (like st s[0x4] 0x0) as input because it's unnecessary.
3790 for (int s = 0; insn->srcExists(s); ++s) {
3791 const Value *src = insn->src(s).rep();
3792 if (insn->src(s).getFile() != FILE_GPR)
3793 continue;
3794 if (src->reg.data.id == 255)
3795 continue;
3796
3797 a = src->reg.data.id;
3798 b = a + src->reg.size / 4;
3799 for (int r = a; r < b; ++r)
3800 srcs.set(r);
3801 }
3802
3803 if (!srcs.popCount())
3804 return false;
3805
3806 // Do not emit a read dependency barrier when the output GPRs are equal to
3807 // the input GPRs (like rcp $r0 $r0) because a write dependency barrier will
3808 // be produced and WaR hazards are prevented.
3809 for (int d = 0; insn->defExists(d); ++d) {
3810 const Value *def = insn->def(d).rep();
3811 if (insn->def(d).getFile() != FILE_GPR)
3812 continue;
3813 if (def->reg.data.id == 255)
3814 continue;
3815
3816 a = def->reg.data.id;
3817 b = a + def->reg.size / 4;
3818 for (int r = a; r < b; ++r)
3819 defs.set(r);
3820 }
3821
3822 srcs.andNot(defs);
3823 if (!srcs.popCount())
3824 return false;
3825
3826 return true;
3827 }
3828
3829 // Return true when the given instruction needs to emit a write dependency
3830 // barrier (for RaW hazards) because it doesn't operate at a fixed latency, and
3831 // setting the maximum number of stall counts is not enough. This is only legal
3832 // if the instruction output something.
3833 bool
3834 SchedDataCalculatorGM107::needWrDepBar(const Instruction *insn) const
3835 {
3836 if (!targ->isBarrierRequired(insn))
3837 return false;
3838
3839 for (int d = 0; insn->defExists(d); ++d) {
3840 if (insn->def(d).getFile() == FILE_GPR ||
3841 insn->def(d).getFile() == FILE_PREDICATE)
3842 return true;
3843 }
3844 return false;
3845 }
3846
3847 // Find the next instruction inside the same basic block which uses the output
3848 // of the given instruction in order to avoid RaW hazards.
3849 Instruction *
3850 SchedDataCalculatorGM107::findFirstUse(const Instruction *bari) const
3851 {
3852 Instruction *insn, *next;
3853 int minGPR, maxGPR;
3854
3855 if (!bari->defExists(0))
3856 return NULL;
3857
3858 minGPR = bari->def(0).rep()->reg.data.id;
3859 maxGPR = minGPR + bari->def(0).rep()->reg.size / 4 - 1;
3860
3861 for (insn = bari->next; insn != NULL; insn = next) {
3862 next = insn->next;
3863
3864 for (int s = 0; insn->srcExists(s); ++s) {
3865 const Value *src = insn->src(s).rep();
3866 if (bari->def(0).getFile() == FILE_GPR) {
3867 if (insn->src(s).getFile() != FILE_GPR ||
3868 src->reg.data.id + src->reg.size / 4 - 1 < minGPR ||
3869 src->reg.data.id > maxGPR)
3870 continue;
3871 return insn;
3872 } else
3873 if (bari->def(0).getFile() == FILE_PREDICATE) {
3874 if (insn->src(s).getFile() != FILE_PREDICATE ||
3875 src->reg.data.id != minGPR)
3876 continue;
3877 return insn;
3878 }
3879 }
3880 }
3881 return NULL;
3882 }
3883
3884 // Find the next instruction inside the same basic block which overwrites, at
3885 // least, one source of the given instruction in order to avoid WaR hazards.
3886 Instruction *
3887 SchedDataCalculatorGM107::findFirstDef(const Instruction *bari) const
3888 {
3889 Instruction *insn, *next;
3890 int minGPR, maxGPR;
3891
3892 for (insn = bari->next; insn != NULL; insn = next) {
3893 next = insn->next;
3894
3895 for (int d = 0; insn->defExists(d); ++d) {
3896 const Value *def = insn->def(d).rep();
3897 if (insn->def(d).getFile() != FILE_GPR)
3898 continue;
3899
3900 minGPR = def->reg.data.id;
3901 maxGPR = minGPR + def->reg.size / 4 - 1;
3902
3903 for (int s = 0; bari->srcExists(s); ++s) {
3904 const Value *src = bari->src(s).rep();
3905 if (bari->src(s).getFile() != FILE_GPR ||
3906 src->reg.data.id + src->reg.size / 4 - 1 < minGPR ||
3907 src->reg.data.id > maxGPR)
3908 continue;
3909 return insn;
3910 }
3911 }
3912 }
3913 return NULL;
3914 }
3915
3916 // Dependency barriers:
3917 // This pass is a bit ugly and could probably be improved by performing a
3918 // better allocation.
3919 //
3920 // The main idea is to avoid WaR and RaW hazards by emitting read/write
3921 // dependency barriers using the control codes.
3922 bool
3923 SchedDataCalculatorGM107::insertBarriers(BasicBlock *bb)
3924 {
3925 std::list<LiveBarUse> live_uses;
3926 std::list<LiveBarDef> live_defs;
3927 Instruction *insn, *next;
3928 BitSet bars(6, 1);
3929 int bar_id;
3930
3931 for (insn = bb->getEntry(); insn != NULL; insn = next) {
3932 Instruction *usei = NULL, *defi = NULL;
3933 bool need_wr_bar, need_rd_bar;
3934
3935 next = insn->next;
3936
3937 // Expire old barrier uses.
3938 for (std::list<LiveBarUse>::iterator it = live_uses.begin();
3939 it != live_uses.end();) {
3940 if (insn->serial >= it->usei->serial) {
3941 int wr = getWrDepBar(it->insn);
3942 emitWtDepBar(insn, wr);
3943 bars.clr(wr); // free barrier
3944 it = live_uses.erase(it);
3945 continue;
3946 }
3947 ++it;
3948 }
3949
3950 // Expire old barrier defs.
3951 for (std::list<LiveBarDef>::iterator it = live_defs.begin();
3952 it != live_defs.end();) {
3953 if (insn->serial >= it->defi->serial) {
3954 int rd = getRdDepBar(it->insn);
3955 emitWtDepBar(insn, rd);
3956 bars.clr(rd); // free barrier
3957 it = live_defs.erase(it);
3958 continue;
3959 }
3960 ++it;
3961 }
3962
3963 need_wr_bar = needWrDepBar(insn);
3964 need_rd_bar = needRdDepBar(insn);
3965
3966 if (need_wr_bar) {
3967 // When the instruction requires to emit a write dependency barrier
3968 // (all which write something at a variable latency), find the next
3969 // instruction which reads the outputs.
3970 usei = findFirstUse(insn);
3971
3972 // Allocate and emit a new barrier.
3973 bar_id = bars.findFreeRange(1);
3974 if (bar_id == -1)
3975 bar_id = 5;
3976 bars.set(bar_id);
3977 emitWrDepBar(insn, bar_id);
3978 if (usei)
3979 live_uses.push_back(LiveBarUse(insn, usei));
3980 }
3981
3982 if (need_rd_bar) {
3983 // When the instruction requires to emit a read dependency barrier
3984 // (all which read something at a variable latency), find the next
3985 // instruction which will write the inputs.
3986 defi = findFirstDef(insn);
3987
3988 if (usei && defi && usei->serial <= defi->serial)
3989 continue;
3990
3991 // Allocate and emit a new barrier.
3992 bar_id = bars.findFreeRange(1);
3993 if (bar_id == -1)
3994 bar_id = 5;
3995 bars.set(bar_id);
3996 emitRdDepBar(insn, bar_id);
3997 if (defi)
3998 live_defs.push_back(LiveBarDef(insn, defi));
3999 }
4000 }
4001
4002 // Remove unnecessary barrier waits.
4003 BitSet alive_bars(6, 1);
4004 for (insn = bb->getEntry(); insn != NULL; insn = next) {
4005 int wr, rd, wt;
4006
4007 next = insn->next;
4008
4009 wr = getWrDepBar(insn);
4010 rd = getRdDepBar(insn);
4011 wt = getWtDepBar(insn);
4012
4013 for (int idx = 0; idx < 6; ++idx) {
4014 if (!(wt & (1 << idx)))
4015 continue;
4016 if (!alive_bars.test(idx)) {
4017 insn->sched &= ~(1 << (11 + idx));
4018 } else {
4019 alive_bars.clr(idx);
4020 }
4021 }
4022
4023 if (wr < 6)
4024 alive_bars.set(wr);
4025 if (rd < 6)
4026 alive_bars.set(rd);
4027 }
4028
4029 return true;
4030 }
4031
4032 bool
4033 SchedDataCalculatorGM107::visit(Function *func)
4034 {
4035 ArrayList insns;
4036
4037 func->orderInstructions(insns);
4038
4039 scoreBoards.resize(func->cfg.getSize());
4040 for (size_t i = 0; i < scoreBoards.size(); ++i)
4041 scoreBoards[i].wipe();
4042 return true;
4043 }
4044
4045 bool
4046 SchedDataCalculatorGM107::visit(BasicBlock *bb)
4047 {
4048 Instruction *insn, *next = NULL;
4049 int cycle = 0;
4050
4051 for (Instruction *insn = bb->getEntry(); insn; insn = insn->next) {
4052 /*XXX*/
4053 insn->sched = 0x7e0;
4054 }
4055
4056 if (!debug_get_bool_option("NV50_PROG_SCHED", true))
4057 return true;
4058
4059 // Insert read/write dependency barriers for instructions which don't
4060 // operate at a fixed latency.
4061 insertBarriers(bb);
4062
4063 score = &scoreBoards.at(bb->getId());
4064
4065 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
4066 // back branches will wait until all target dependencies are satisfied
4067 if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
4068 continue;
4069 BasicBlock *in = BasicBlock::get(ei.getNode());
4070 score->setMax(&scoreBoards.at(in->getId()));
4071 }
4072
4073 #ifdef GM107_DEBUG_SCHED_DATA
4074 INFO("=== BB:%i initial scores\n", bb->getId());
4075 score->print(cycle);
4076 #endif
4077
4078 // Because barriers are allocated locally (intra-BB), we have to make sure
4079 // that all produced barriers have been consumed before entering inside a
4080 // new basic block. The best way is to do a global allocation pre RA but
4081 // it's really more difficult, especially because of the phi nodes. Anyways,
4082 // it seems like that waiting on a barrier which has already been consumed
4083 // doesn't add any additional cost, it's just not elegant!
4084 Instruction *start = bb->getEntry();
4085 if (start && bb->cfg.incidentCount() > 0) {
4086 for (int b = 0; b < 6; b++)
4087 emitWtDepBar(start, b);
4088 }
4089
4090 for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
4091 next = insn->next;
4092
4093 commitInsn(insn, cycle);
4094 int delay = calcDelay(next, cycle);
4095 setDelay(insn, delay, next);
4096 cycle += getStall(insn);
4097
4098 setReuseFlag(insn);
4099
4100 // XXX: The yield flag seems to destroy a bunch of things when it is
4101 // set on every instruction, need investigation.
4102 //emitYield(insn);
4103
4104 #ifdef GM107_DEBUG_SCHED_DATA
4105 printSchedInfo(cycle, insn);
4106 insn->print();
4107 next->print();
4108 #endif
4109 }
4110
4111 if (!insn)
4112 return true;
4113 commitInsn(insn, cycle);
4114
4115 int bbDelay = -1;
4116
4117 #ifdef GM107_DEBUG_SCHED_DATA
4118 fprintf(stderr, "last instruction is : ");
4119 insn->print();
4120 fprintf(stderr, "cycle=%d\n", cycle);
4121 #endif
4122
4123 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
4124 BasicBlock *out = BasicBlock::get(ei.getNode());
4125
4126 if (ei.getType() != Graph::Edge::BACK) {
4127 // Only test the first instruction of the outgoing block.
4128 next = out->getEntry();
4129 if (next) {
4130 bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
4131 } else {
4132 // When the outgoing BB is empty, make sure to set the number of
4133 // stall counts needed by the instruction because we don't know the
4134 // next instruction.
4135 bbDelay = MAX2(bbDelay, targ->getLatency(insn));
4136 }
4137 } else {
4138 // Wait until all dependencies are satisfied.
4139 const int regsFree = score->getLatest();
4140 next = out->getFirst();
4141 for (int c = cycle; next && c < regsFree; next = next->next) {
4142 bbDelay = MAX2(bbDelay, calcDelay(next, c));
4143 c += getStall(next);
4144 }
4145 next = NULL;
4146 }
4147 }
4148 if (bb->cfg.outgoingCount() != 1)
4149 next = NULL;
4150 setDelay(insn, bbDelay, next);
4151 cycle += getStall(insn);
4152
4153 score->rebase(cycle); // common base for initializing out blocks' scores
4154 return true;
4155 }
4156
4157 /*******************************************************************************
4158 * main
4159 ******************************************************************************/
4160
4161 void
4162 CodeEmitterGM107::prepareEmission(Function *func)
4163 {
4164 SchedDataCalculatorGM107 sched(targGM107);
4165 CodeEmitter::prepareEmission(func);
4166 sched.run(func, true, true);
4167 }
4168
4169 static inline uint32_t sizeToBundlesGM107(uint32_t size)
4170 {
4171 return (size + 23) / 24;
4172 }
4173
4174 void
4175 CodeEmitterGM107::prepareEmission(Program *prog)
4176 {
4177 for (ArrayList::Iterator fi = prog->allFuncs.iterator();
4178 !fi.end(); fi.next()) {
4179 Function *func = reinterpret_cast<Function *>(fi.get());
4180 func->binPos = prog->binSize;
4181 prepareEmission(func);
4182
4183 // adjust sizes & positions for schedulding info:
4184 if (prog->getTarget()->hasSWSched) {
4185 uint32_t adjPos = func->binPos;
4186 BasicBlock *bb = NULL;
4187 for (int i = 0; i < func->bbCount; ++i) {
4188 bb = func->bbArray[i];
4189 int32_t adjSize = bb->binSize;
4190 if (adjPos % 32) {
4191 adjSize -= 32 - adjPos % 32;
4192 if (adjSize < 0)
4193 adjSize = 0;
4194 }
4195 adjSize = bb->binSize + sizeToBundlesGM107(adjSize) * 8;
4196 bb->binPos = adjPos;
4197 bb->binSize = adjSize;
4198 adjPos += adjSize;
4199 }
4200 if (bb)
4201 func->binSize = adjPos - func->binPos;
4202 }
4203
4204 prog->binSize += func->binSize;
4205 }
4206 }
4207
4208 CodeEmitterGM107::CodeEmitterGM107(const TargetGM107 *target)
4209 : CodeEmitter(target),
4210 targGM107(target),
4211 writeIssueDelays(target->hasSWSched)
4212 {
4213 code = NULL;
4214 codeSize = codeSizeLimit = 0;
4215 relocInfo = NULL;
4216 }
4217
4218 CodeEmitter *
4219 TargetGM107::createCodeEmitterGM107(Program::Type type)
4220 {
4221 CodeEmitterGM107 *emit = new CodeEmitterGM107(this);
4222 emit->setProgramType(type);
4223 return emit;
4224 }
4225
4226 } // namespace nv50_ir