nvc0/ir: make it possible to have the flags def in def0
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_emit_gm107.cpp
1 /*
2 * Copyright 2014 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Ben Skeggs <bskeggs@redhat.com>
23 */
24
25 #include "codegen/nv50_ir_target_gm107.h"
26
27 //#define GM107_DEBUG_SCHED_DATA
28
29 namespace nv50_ir {
30
31 class CodeEmitterGM107 : public CodeEmitter
32 {
33 public:
34 CodeEmitterGM107(const TargetGM107 *);
35
36 virtual bool emitInstruction(Instruction *);
37 virtual uint32_t getMinEncodingSize(const Instruction *) const;
38
39 virtual void prepareEmission(Program *);
40 virtual void prepareEmission(Function *);
41
42 inline void setProgramType(Program::Type pType) { progType = pType; }
43
44 private:
45 const TargetGM107 *targGM107;
46
47 Program::Type progType;
48
49 const Instruction *insn;
50 const bool writeIssueDelays;
51 uint32_t *data;
52
53 private:
54 inline void emitField(uint32_t *, int, int, uint32_t);
55 inline void emitField(int b, int s, uint32_t v) { emitField(code, b, s, v); }
56
57 inline void emitInsn(uint32_t, bool);
58 inline void emitInsn(uint32_t o) { emitInsn(o, true); }
59 inline void emitPred();
60 inline void emitGPR(int, const Value *);
61 inline void emitGPR(int pos) {
62 emitGPR(pos, (const Value *)NULL);
63 }
64 inline void emitGPR(int pos, const ValueRef &ref) {
65 emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL);
66 }
67 inline void emitGPR(int pos, const ValueRef *ref) {
68 emitGPR(pos, ref ? ref->rep() : (const Value *)NULL);
69 }
70 inline void emitGPR(int pos, const ValueDef &def) {
71 emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL);
72 }
73 inline void emitSYS(int, const Value *);
74 inline void emitSYS(int pos, const ValueRef &ref) {
75 emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL);
76 }
77 inline void emitPRED(int, const Value *);
78 inline void emitPRED(int pos) {
79 emitPRED(pos, (const Value *)NULL);
80 }
81 inline void emitPRED(int pos, const ValueRef &ref) {
82 emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL);
83 }
84 inline void emitPRED(int pos, const ValueDef &def) {
85 emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL);
86 }
87 inline void emitADDR(int, int, int, int, const ValueRef &);
88 inline void emitCBUF(int, int, int, int, int, const ValueRef &);
89 inline bool longIMMD(const ValueRef &);
90 inline void emitIMMD(int, int, const ValueRef &);
91
92 void emitCond3(int, CondCode);
93 void emitCond4(int, CondCode);
94 void emitCond5(int pos, CondCode cc) { emitCond4(pos, cc); }
95 inline void emitO(int);
96 inline void emitP(int);
97 inline void emitSAT(int);
98 inline void emitCC(int);
99 inline void emitX(int);
100 inline void emitABS(int, const ValueRef &);
101 inline void emitNEG(int, const ValueRef &);
102 inline void emitNEG2(int, const ValueRef &, const ValueRef &);
103 inline void emitFMZ(int, int);
104 inline void emitRND(int, RoundMode, int);
105 inline void emitRND(int pos) {
106 emitRND(pos, insn->rnd, -1);
107 }
108 inline void emitPDIV(int);
109 inline void emitINV(int, const ValueRef &);
110
111 void emitEXIT();
112 void emitBRA();
113 void emitCAL();
114 void emitPCNT();
115 void emitCONT();
116 void emitPBK();
117 void emitBRK();
118 void emitPRET();
119 void emitRET();
120 void emitSSY();
121 void emitSYNC();
122 void emitSAM();
123 void emitRAM();
124
125 void emitMOV();
126 void emitS2R();
127 void emitF2F();
128 void emitF2I();
129 void emitI2F();
130 void emitI2I();
131 void emitSEL();
132 void emitSHFL();
133
134 void emitDADD();
135 void emitDMUL();
136 void emitDFMA();
137 void emitDMNMX();
138 void emitDSET();
139 void emitDSETP();
140
141 void emitFADD();
142 void emitFMUL();
143 void emitFFMA();
144 void emitMUFU();
145 void emitFMNMX();
146 void emitRRO();
147 void emitFCMP();
148 void emitFSET();
149 void emitFSETP();
150 void emitFSWZADD();
151
152 void emitLOP();
153 void emitNOT();
154 void emitIADD();
155 void emitIMUL();
156 void emitIMAD();
157 void emitISCADD();
158 void emitIMNMX();
159 void emitICMP();
160 void emitISET();
161 void emitISETP();
162 void emitSHL();
163 void emitSHR();
164 void emitSHF();
165 void emitPOPC();
166 void emitBFI();
167 void emitBFE();
168 void emitFLO();
169
170 void emitLDSTs(int, DataType);
171 void emitLDSTc(int);
172 void emitLDC();
173 void emitLDL();
174 void emitLDS();
175 void emitLD();
176 void emitSTL();
177 void emitSTS();
178 void emitST();
179 void emitALD();
180 void emitAST();
181 void emitISBERD();
182 void emitAL2P();
183 void emitIPA();
184 void emitATOM();
185 void emitATOMS();
186 void emitRED();
187 void emitCCTL();
188
189 void emitPIXLD();
190
191 void emitTEXs(int);
192 void emitTEX();
193 void emitTLD();
194 void emitTLD4();
195 void emitTXD();
196 void emitTXQ();
197 void emitTMML();
198 void emitDEPBAR();
199
200 void emitNOP();
201 void emitKIL();
202 void emitOUT();
203
204 void emitBAR();
205 void emitMEMBAR();
206
207 void emitVOTE();
208
209 void emitSUTarget();
210 void emitSUHandle(const int s);
211 void emitSUSTx();
212 void emitSULDx();
213 void emitSUREDx();
214 };
215
216 /*******************************************************************************
217 * general instruction layout/fields
218 ******************************************************************************/
219
220 void
221 CodeEmitterGM107::emitField(uint32_t *data, int b, int s, uint32_t v)
222 {
223 if (b >= 0) {
224 uint32_t m = ((1ULL << s) - 1);
225 uint64_t d = (uint64_t)(v & m) << b;
226 assert(!(v & ~m) || (v & ~m) == ~m);
227 data[1] |= d >> 32;
228 data[0] |= d;
229 }
230 }
231
232 void
233 CodeEmitterGM107::emitPred()
234 {
235 if (insn->predSrc >= 0) {
236 emitField(16, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id);
237 emitField(19, 1, insn->cc == CC_NOT_P);
238 } else {
239 emitField(16, 3, 7);
240 }
241 }
242
243 void
244 CodeEmitterGM107::emitInsn(uint32_t hi, bool pred)
245 {
246 code[0] = 0x00000000;
247 code[1] = hi;
248 if (pred)
249 emitPred();
250 }
251
252 void
253 CodeEmitterGM107::emitGPR(int pos, const Value *val)
254 {
255 emitField(pos, 8, val && !val->inFile(FILE_FLAGS) ?
256 val->reg.data.id : 255);
257 }
258
259 void
260 CodeEmitterGM107::emitSYS(int pos, const Value *val)
261 {
262 int id = val ? val->reg.data.id : -1;
263
264 switch (id) {
265 case SV_LANEID : id = 0x00; break;
266 case SV_VERTEX_COUNT : id = 0x10; break;
267 case SV_INVOCATION_ID : id = 0x11; break;
268 case SV_THREAD_KILL : id = 0x13; break;
269 case SV_INVOCATION_INFO: id = 0x1d; break;
270 case SV_TID : id = 0x21 + val->reg.data.sv.index; break;
271 case SV_CTAID : id = 0x25 + val->reg.data.sv.index; break;
272 default:
273 assert(!"invalid system value");
274 id = 0;
275 break;
276 }
277
278 emitField(pos, 8, id);
279 }
280
281 void
282 CodeEmitterGM107::emitPRED(int pos, const Value *val)
283 {
284 emitField(pos, 3, val ? val->reg.data.id : 7);
285 }
286
287 void
288 CodeEmitterGM107::emitADDR(int gpr, int off, int len, int shr,
289 const ValueRef &ref)
290 {
291 const Value *v = ref.get();
292 assert(!(v->reg.data.offset & ((1 << shr) - 1)));
293 if (gpr >= 0)
294 emitGPR(gpr, ref.getIndirect(0));
295 emitField(off, len, v->reg.data.offset >> shr);
296 }
297
298 void
299 CodeEmitterGM107::emitCBUF(int buf, int gpr, int off, int len, int shr,
300 const ValueRef &ref)
301 {
302 const Value *v = ref.get();
303 const Symbol *s = v->asSym();
304
305 assert(!(s->reg.data.offset & ((1 << shr) - 1)));
306
307 emitField(buf, 5, v->reg.fileIndex);
308 if (gpr >= 0)
309 emitGPR(gpr, ref.getIndirect(0));
310 emitField(off, 16, s->reg.data.offset >> shr);
311 }
312
313 bool
314 CodeEmitterGM107::longIMMD(const ValueRef &ref)
315 {
316 if (ref.getFile() == FILE_IMMEDIATE) {
317 const ImmediateValue *imm = ref.get()->asImm();
318 if (isFloatType(insn->sType)) {
319 if ((imm->reg.data.u32 & 0x00000fff) != 0x00000000)
320 return true;
321 } else {
322 if ((imm->reg.data.u32 & 0xfff00000) != 0x00000000 &&
323 (imm->reg.data.u32 & 0xfff00000) != 0xfff00000)
324 return true;
325 }
326 }
327 return false;
328 }
329
330 void
331 CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef &ref)
332 {
333 const ImmediateValue *imm = ref.get()->asImm();
334 uint32_t val = imm->reg.data.u32;
335
336 if (len == 19) {
337 if (insn->sType == TYPE_F32 || insn->sType == TYPE_F16) {
338 assert(!(val & 0x00000fff));
339 val >>= 12;
340 } else if (insn->sType == TYPE_F64) {
341 assert(!(imm->reg.data.u64 & 0x00000fffffffffffULL));
342 val = imm->reg.data.u64 >> 44;
343 }
344 assert(!(val & 0xfff00000) || (val & 0xfff00000) == 0xfff00000);
345 emitField( 56, 1, (val & 0x80000) >> 19);
346 emitField(pos, len, (val & 0x7ffff));
347 } else {
348 emitField(pos, len, val);
349 }
350 }
351
352 /*******************************************************************************
353 * modifiers
354 ******************************************************************************/
355
356 void
357 CodeEmitterGM107::emitCond3(int pos, CondCode code)
358 {
359 int data = 0;
360
361 switch (code) {
362 case CC_FL : data = 0x00; break;
363 case CC_LTU:
364 case CC_LT : data = 0x01; break;
365 case CC_EQU:
366 case CC_EQ : data = 0x02; break;
367 case CC_LEU:
368 case CC_LE : data = 0x03; break;
369 case CC_GTU:
370 case CC_GT : data = 0x04; break;
371 case CC_NEU:
372 case CC_NE : data = 0x05; break;
373 case CC_GEU:
374 case CC_GE : data = 0x06; break;
375 case CC_TR : data = 0x07; break;
376 default:
377 assert(!"invalid cond3");
378 break;
379 }
380
381 emitField(pos, 3, data);
382 }
383
384 void
385 CodeEmitterGM107::emitCond4(int pos, CondCode code)
386 {
387 int data = 0;
388
389 switch (code) {
390 case CC_FL: data = 0x00; break;
391 case CC_LT: data = 0x01; break;
392 case CC_EQ: data = 0x02; break;
393 case CC_LE: data = 0x03; break;
394 case CC_GT: data = 0x04; break;
395 case CC_NE: data = 0x05; break;
396 case CC_GE: data = 0x06; break;
397 // case CC_NUM: data = 0x07; break;
398 // case CC_NAN: data = 0x08; break;
399 case CC_LTU: data = 0x09; break;
400 case CC_EQU: data = 0x0a; break;
401 case CC_LEU: data = 0x0b; break;
402 case CC_GTU: data = 0x0c; break;
403 case CC_NEU: data = 0x0d; break;
404 case CC_GEU: data = 0x0e; break;
405 case CC_TR: data = 0x0f; break;
406 default:
407 assert(!"invalid cond4");
408 break;
409 }
410
411 emitField(pos, 4, data);
412 }
413
414 void
415 CodeEmitterGM107::emitO(int pos)
416 {
417 emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT);
418 }
419
420 void
421 CodeEmitterGM107::emitP(int pos)
422 {
423 emitField(pos, 1, insn->perPatch);
424 }
425
426 void
427 CodeEmitterGM107::emitSAT(int pos)
428 {
429 emitField(pos, 1, insn->saturate);
430 }
431
432 void
433 CodeEmitterGM107::emitCC(int pos)
434 {
435 emitField(pos, 1, insn->flagsDef >= 0);
436 }
437
438 void
439 CodeEmitterGM107::emitX(int pos)
440 {
441 emitField(pos, 1, insn->flagsSrc >= 0);
442 }
443
444 void
445 CodeEmitterGM107::emitABS(int pos, const ValueRef &ref)
446 {
447 emitField(pos, 1, ref.mod.abs());
448 }
449
450 void
451 CodeEmitterGM107::emitNEG(int pos, const ValueRef &ref)
452 {
453 emitField(pos, 1, ref.mod.neg());
454 }
455
456 void
457 CodeEmitterGM107::emitNEG2(int pos, const ValueRef &a, const ValueRef &b)
458 {
459 emitField(pos, 1, a.mod.neg() ^ b.mod.neg());
460 }
461
462 void
463 CodeEmitterGM107::emitFMZ(int pos, int len)
464 {
465 emitField(pos, len, insn->dnz << 1 | insn->ftz);
466 }
467
468 void
469 CodeEmitterGM107::emitRND(int rmp, RoundMode rnd, int rip)
470 {
471 int rm = 0, ri = 0;
472 switch (rnd) {
473 case ROUND_NI: ri = 1;
474 case ROUND_N : rm = 0; break;
475 case ROUND_MI: ri = 1;
476 case ROUND_M : rm = 1; break;
477 case ROUND_PI: ri = 1;
478 case ROUND_P : rm = 2; break;
479 case ROUND_ZI: ri = 1;
480 case ROUND_Z : rm = 3; break;
481 default:
482 assert(!"invalid round mode");
483 break;
484 }
485 emitField(rip, 1, ri);
486 emitField(rmp, 2, rm);
487 }
488
489 void
490 CodeEmitterGM107::emitPDIV(int pos)
491 {
492 assert(insn->postFactor >= -3 && insn->postFactor <= 3);
493 if (insn->postFactor > 0)
494 emitField(pos, 3, 7 - insn->postFactor);
495 else
496 emitField(pos, 3, 0 - insn->postFactor);
497 }
498
499 void
500 CodeEmitterGM107::emitINV(int pos, const ValueRef &ref)
501 {
502 emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT)));
503 }
504
505 /*******************************************************************************
506 * control flow
507 ******************************************************************************/
508
509 void
510 CodeEmitterGM107::emitEXIT()
511 {
512 emitInsn (0xe3000000);
513 emitCond5(0x00, CC_TR);
514 }
515
516 void
517 CodeEmitterGM107::emitBRA()
518 {
519 const FlowInstruction *insn = this->insn->asFlow();
520 int gpr = -1;
521
522 if (insn->indirect) {
523 if (insn->absolute)
524 emitInsn(0xe2000000); // JMX
525 else
526 emitInsn(0xe2500000); // BRX
527 gpr = 0x08;
528 } else {
529 if (insn->absolute)
530 emitInsn(0xe2100000); // JMP
531 else
532 emitInsn(0xe2400000); // BRA
533 emitField(0x07, 1, insn->allWarp);
534 }
535
536 emitField(0x06, 1, insn->limit);
537 emitCond5(0x00, CC_TR);
538
539 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
540 int32_t pos = insn->target.bb->binPos;
541 if (writeIssueDelays && !(pos & 0x1f))
542 pos += 8;
543 if (!insn->absolute)
544 emitField(0x14, 24, pos - (codeSize + 8));
545 else
546 emitField(0x14, 32, pos);
547 } else {
548 emitCBUF (0x24, gpr, 20, 16, 0, insn->src(0));
549 emitField(0x05, 1, 1);
550 }
551 }
552
553 void
554 CodeEmitterGM107::emitCAL()
555 {
556 const FlowInstruction *insn = this->insn->asFlow();
557
558 if (insn->absolute) {
559 emitInsn(0xe2200000, 0); // JCAL
560 } else {
561 emitInsn(0xe2600000, 0); // CAL
562 }
563
564 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
565 if (!insn->absolute)
566 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
567 else {
568 if (insn->builtin) {
569 int pcAbs = targGM107->getBuiltinOffset(insn->target.builtin);
570 addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfff00000, 20);
571 addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x000fffff, -12);
572 } else {
573 emitField(0x14, 32, insn->target.bb->binPos);
574 }
575 }
576 } else {
577 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
578 emitField(0x05, 1, 1);
579 }
580 }
581
582 void
583 CodeEmitterGM107::emitPCNT()
584 {
585 const FlowInstruction *insn = this->insn->asFlow();
586
587 emitInsn(0xe2b00000, 0);
588
589 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
590 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
591 } else {
592 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
593 emitField(0x05, 1, 1);
594 }
595 }
596
597 void
598 CodeEmitterGM107::emitCONT()
599 {
600 emitInsn (0xe3500000);
601 emitCond5(0x00, CC_TR);
602 }
603
604 void
605 CodeEmitterGM107::emitPBK()
606 {
607 const FlowInstruction *insn = this->insn->asFlow();
608
609 emitInsn(0xe2a00000, 0);
610
611 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
612 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
613 } else {
614 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
615 emitField(0x05, 1, 1);
616 }
617 }
618
619 void
620 CodeEmitterGM107::emitBRK()
621 {
622 emitInsn (0xe3400000);
623 emitCond5(0x00, CC_TR);
624 }
625
626 void
627 CodeEmitterGM107::emitPRET()
628 {
629 const FlowInstruction *insn = this->insn->asFlow();
630
631 emitInsn(0xe2700000, 0);
632
633 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
634 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
635 } else {
636 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
637 emitField(0x05, 1, 1);
638 }
639 }
640
641 void
642 CodeEmitterGM107::emitRET()
643 {
644 emitInsn (0xe3200000);
645 emitCond5(0x00, CC_TR);
646 }
647
648 void
649 CodeEmitterGM107::emitSSY()
650 {
651 const FlowInstruction *insn = this->insn->asFlow();
652
653 emitInsn(0xe2900000, 0);
654
655 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
656 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
657 } else {
658 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
659 emitField(0x05, 1, 1);
660 }
661 }
662
663 void
664 CodeEmitterGM107::emitSYNC()
665 {
666 emitInsn (0xf0f80000);
667 emitCond5(0x00, CC_TR);
668 }
669
670 void
671 CodeEmitterGM107::emitSAM()
672 {
673 emitInsn(0xe3700000, 0);
674 }
675
676 void
677 CodeEmitterGM107::emitRAM()
678 {
679 emitInsn(0xe3800000, 0);
680 }
681
682 /*******************************************************************************
683 * predicate/cc
684 ******************************************************************************/
685
686 /*******************************************************************************
687 * movement / conversion
688 ******************************************************************************/
689
690 void
691 CodeEmitterGM107::emitMOV()
692 {
693 if (insn->src(0).getFile() != FILE_IMMEDIATE) {
694 switch (insn->src(0).getFile()) {
695 case FILE_GPR:
696 if (insn->def(0).getFile() == FILE_PREDICATE) {
697 emitInsn(0x5b6a0000);
698 emitGPR (0x08);
699 } else {
700 emitInsn(0x5c980000);
701 }
702 emitGPR (0x14, insn->src(0));
703 break;
704 case FILE_MEMORY_CONST:
705 emitInsn(0x4c980000);
706 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
707 break;
708 case FILE_IMMEDIATE:
709 emitInsn(0x38980000);
710 emitIMMD(0x14, 19, insn->src(0));
711 break;
712 case FILE_PREDICATE:
713 emitInsn(0x50880000);
714 emitPRED(0x0c, insn->src(0));
715 emitPRED(0x1d);
716 emitPRED(0x27);
717 break;
718 default:
719 assert(!"bad src file");
720 break;
721 }
722 if (insn->def(0).getFile() != FILE_PREDICATE &&
723 insn->src(0).getFile() != FILE_PREDICATE)
724 emitField(0x27, 4, insn->lanes);
725 } else {
726 emitInsn (0x01000000);
727 emitIMMD (0x14, 32, insn->src(0));
728 emitField(0x0c, 4, insn->lanes);
729 }
730
731 if (insn->def(0).getFile() == FILE_PREDICATE) {
732 emitPRED(0x27);
733 emitPRED(0x03, insn->def(0));
734 emitPRED(0x00);
735 } else {
736 emitGPR(0x00, insn->def(0));
737 }
738 }
739
740 void
741 CodeEmitterGM107::emitS2R()
742 {
743 emitInsn(0xf0c80000);
744 emitSYS (0x14, insn->src(0));
745 emitGPR (0x00, insn->def(0));
746 }
747
748 void
749 CodeEmitterGM107::emitF2F()
750 {
751 RoundMode rnd = insn->rnd;
752
753 switch (insn->op) {
754 case OP_FLOOR: rnd = ROUND_MI; break;
755 case OP_CEIL : rnd = ROUND_PI; break;
756 case OP_TRUNC: rnd = ROUND_ZI; break;
757 default:
758 break;
759 }
760
761 switch (insn->src(0).getFile()) {
762 case FILE_GPR:
763 emitInsn(0x5ca80000);
764 emitGPR (0x14, insn->src(0));
765 break;
766 case FILE_MEMORY_CONST:
767 emitInsn(0x4ca80000);
768 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
769 break;
770 case FILE_IMMEDIATE:
771 emitInsn(0x38a80000);
772 emitIMMD(0x14, 19, insn->src(0));
773 break;
774 default:
775 assert(!"bad src0 file");
776 break;
777 }
778
779 emitField(0x32, 1, (insn->op == OP_SAT) || insn->saturate);
780 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
781 emitCC (0x2f);
782 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
783 emitFMZ (0x2c, 1);
784 emitField(0x29, 1, insn->subOp);
785 emitRND (0x27, rnd, 0x2a);
786 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
787 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
788 emitGPR (0x00, insn->def(0));
789 }
790
791 void
792 CodeEmitterGM107::emitF2I()
793 {
794 RoundMode rnd = insn->rnd;
795
796 switch (insn->op) {
797 case OP_FLOOR: rnd = ROUND_M; break;
798 case OP_CEIL : rnd = ROUND_P; break;
799 case OP_TRUNC: rnd = ROUND_Z; break;
800 default:
801 break;
802 }
803
804 switch (insn->src(0).getFile()) {
805 case FILE_GPR:
806 emitInsn(0x5cb00000);
807 emitGPR (0x14, insn->src(0));
808 break;
809 case FILE_MEMORY_CONST:
810 emitInsn(0x4cb00000);
811 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
812 break;
813 case FILE_IMMEDIATE:
814 emitInsn(0x38b00000);
815 emitIMMD(0x14, 19, insn->src(0));
816 break;
817 default:
818 assert(!"bad src0 file");
819 break;
820 }
821
822 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
823 emitCC (0x2f);
824 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
825 emitFMZ (0x2c, 1);
826 emitRND (0x27, rnd, 0x2a);
827 emitField(0x0c, 1, isSignedType(insn->dType));
828 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
829 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
830 emitGPR (0x00, insn->def(0));
831 }
832
833 void
834 CodeEmitterGM107::emitI2F()
835 {
836 RoundMode rnd = insn->rnd;
837
838 switch (insn->op) {
839 case OP_FLOOR: rnd = ROUND_M; break;
840 case OP_CEIL : rnd = ROUND_P; break;
841 case OP_TRUNC: rnd = ROUND_Z; break;
842 default:
843 break;
844 }
845
846 switch (insn->src(0).getFile()) {
847 case FILE_GPR:
848 emitInsn(0x5cb80000);
849 emitGPR (0x14, insn->src(0));
850 break;
851 case FILE_MEMORY_CONST:
852 emitInsn(0x4cb80000);
853 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
854 break;
855 case FILE_IMMEDIATE:
856 emitInsn(0x38b80000);
857 emitIMMD(0x14, 19, insn->src(0));
858 break;
859 default:
860 assert(!"bad src0 file");
861 break;
862 }
863
864 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
865 emitCC (0x2f);
866 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
867 emitField(0x29, 2, insn->subOp);
868 emitRND (0x27, rnd, -1);
869 emitField(0x0d, 1, isSignedType(insn->sType));
870 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
871 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
872 emitGPR (0x00, insn->def(0));
873 }
874
875 void
876 CodeEmitterGM107::emitI2I()
877 {
878 switch (insn->src(0).getFile()) {
879 case FILE_GPR:
880 emitInsn(0x5ce00000);
881 emitGPR (0x14, insn->src(0));
882 break;
883 case FILE_MEMORY_CONST:
884 emitInsn(0x4ce00000);
885 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
886 break;
887 case FILE_IMMEDIATE:
888 emitInsn(0x38e00000);
889 emitIMMD(0x14, 19, insn->src(0));
890 break;
891 default:
892 assert(!"bad src0 file");
893 break;
894 }
895
896 emitSAT (0x32);
897 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
898 emitCC (0x2f);
899 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
900 emitField(0x29, 2, insn->subOp);
901 emitField(0x0d, 1, isSignedType(insn->sType));
902 emitField(0x0c, 1, isSignedType(insn->dType));
903 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
904 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
905 emitGPR (0x00, insn->def(0));
906 }
907
908 static void
909 selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
910 {
911 int loc = entry->loc;
912 if (data.force_persample_interp)
913 code[loc + 1] |= 1 << 10;
914 else
915 code[loc + 1] &= ~(1 << 10);
916 }
917
918 void
919 CodeEmitterGM107::emitSEL()
920 {
921 switch (insn->src(1).getFile()) {
922 case FILE_GPR:
923 emitInsn(0x5ca00000);
924 emitGPR (0x14, insn->src(1));
925 break;
926 case FILE_MEMORY_CONST:
927 emitInsn(0x4ca00000);
928 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
929 break;
930 case FILE_IMMEDIATE:
931 emitInsn(0x38a00000);
932 emitIMMD(0x14, 19, insn->src(1));
933 break;
934 default:
935 assert(!"bad src1 file");
936 break;
937 }
938
939 emitINV (0x2a, insn->src(2));
940 emitPRED(0x27, insn->src(2));
941 emitGPR (0x08, insn->src(0));
942 emitGPR (0x00, insn->def(0));
943
944 if (insn->subOp == 1) {
945 addInterp(0, 0, selpFlip);
946 }
947 }
948
949 void
950 CodeEmitterGM107::emitSHFL()
951 {
952 int type = 0;
953
954 emitInsn (0xef100000);
955
956 switch (insn->src(1).getFile()) {
957 case FILE_GPR:
958 emitGPR(0x14, insn->src(1));
959 break;
960 case FILE_IMMEDIATE:
961 emitIMMD(0x14, 5, insn->src(1));
962 type |= 1;
963 break;
964 default:
965 assert(!"invalid src1 file");
966 break;
967 }
968
969 /*XXX: what is this arg? hardcode immediate for now */
970 emitField(0x22, 13, 0x1c03);
971 type |= 2;
972
973 emitPRED (0x30);
974 emitField(0x1e, 2, insn->subOp);
975 emitField(0x1c, 2, type);
976 emitGPR (0x08, insn->src(0));
977 emitGPR (0x00, insn->def(0));
978 }
979
980 /*******************************************************************************
981 * double
982 ******************************************************************************/
983
984 void
985 CodeEmitterGM107::emitDADD()
986 {
987 switch (insn->src(1).getFile()) {
988 case FILE_GPR:
989 emitInsn(0x5c700000);
990 emitGPR (0x14, insn->src(1));
991 break;
992 case FILE_MEMORY_CONST:
993 emitInsn(0x4c700000);
994 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
995 break;
996 case FILE_IMMEDIATE:
997 emitInsn(0x38700000);
998 emitIMMD(0x14, 19, insn->src(1));
999 break;
1000 default:
1001 assert(!"bad src1 file");
1002 break;
1003 }
1004 emitABS(0x31, insn->src(1));
1005 emitNEG(0x30, insn->src(0));
1006 emitCC (0x2f);
1007 emitABS(0x2e, insn->src(0));
1008 emitNEG(0x2d, insn->src(1));
1009
1010 if (insn->op == OP_SUB)
1011 code[1] ^= 0x00002000;
1012
1013 emitGPR(0x08, insn->src(0));
1014 emitGPR(0x00, insn->def(0));
1015 }
1016
1017 void
1018 CodeEmitterGM107::emitDMUL()
1019 {
1020 switch (insn->src(1).getFile()) {
1021 case FILE_GPR:
1022 emitInsn(0x5c800000);
1023 emitGPR (0x14, insn->src(1));
1024 break;
1025 case FILE_MEMORY_CONST:
1026 emitInsn(0x4c800000);
1027 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1028 break;
1029 case FILE_IMMEDIATE:
1030 emitInsn(0x38800000);
1031 emitIMMD(0x14, 19, insn->src(1));
1032 break;
1033 default:
1034 assert(!"bad src1 file");
1035 break;
1036 }
1037
1038 emitNEG2(0x30, insn->src(0), insn->src(1));
1039 emitCC (0x2f);
1040 emitRND (0x27);
1041 emitGPR (0x08, insn->src(0));
1042 emitGPR (0x00, insn->def(0));
1043 }
1044
1045 void
1046 CodeEmitterGM107::emitDFMA()
1047 {
1048 switch(insn->src(2).getFile()) {
1049 case FILE_GPR:
1050 switch (insn->src(1).getFile()) {
1051 case FILE_GPR:
1052 emitInsn(0x5b700000);
1053 emitGPR (0x14, insn->src(1));
1054 break;
1055 case FILE_MEMORY_CONST:
1056 emitInsn(0x4b700000);
1057 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1058 break;
1059 case FILE_IMMEDIATE:
1060 emitInsn(0x36700000);
1061 emitIMMD(0x14, 19, insn->src(1));
1062 break;
1063 default:
1064 assert(!"bad src1 file");
1065 break;
1066 }
1067 emitGPR (0x27, insn->src(2));
1068 break;
1069 case FILE_MEMORY_CONST:
1070 emitInsn(0x53700000);
1071 emitGPR (0x27, insn->src(1));
1072 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1073 break;
1074 default:
1075 assert(!"bad src2 file");
1076 break;
1077 }
1078
1079 emitRND (0x32);
1080 emitNEG (0x31, insn->src(2));
1081 emitNEG2(0x30, insn->src(0), insn->src(1));
1082 emitCC (0x2f);
1083 emitGPR (0x08, insn->src(0));
1084 emitGPR (0x00, insn->def(0));
1085 }
1086
1087 void
1088 CodeEmitterGM107::emitDMNMX()
1089 {
1090 switch (insn->src(1).getFile()) {
1091 case FILE_GPR:
1092 emitInsn(0x5c500000);
1093 emitGPR (0x14, insn->src(1));
1094 break;
1095 case FILE_MEMORY_CONST:
1096 emitInsn(0x4c500000);
1097 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1098 break;
1099 case FILE_IMMEDIATE:
1100 emitInsn(0x38500000);
1101 emitIMMD(0x14, 19, insn->src(1));
1102 break;
1103 default:
1104 assert(!"bad src1 file");
1105 break;
1106 }
1107
1108 emitABS (0x31, insn->src(1));
1109 emitNEG (0x30, insn->src(0));
1110 emitCC (0x2f);
1111 emitABS (0x2e, insn->src(0));
1112 emitNEG (0x2d, insn->src(1));
1113 emitField(0x2a, 1, insn->op == OP_MAX);
1114 emitPRED (0x27);
1115 emitGPR (0x08, insn->src(0));
1116 emitGPR (0x00, insn->def(0));
1117 }
1118
1119 void
1120 CodeEmitterGM107::emitDSET()
1121 {
1122 const CmpInstruction *insn = this->insn->asCmp();
1123
1124 switch (insn->src(1).getFile()) {
1125 case FILE_GPR:
1126 emitInsn(0x59000000);
1127 emitGPR (0x14, insn->src(1));
1128 break;
1129 case FILE_MEMORY_CONST:
1130 emitInsn(0x49000000);
1131 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1132 break;
1133 case FILE_IMMEDIATE:
1134 emitInsn(0x32000000);
1135 emitIMMD(0x14, 19, insn->src(1));
1136 break;
1137 default:
1138 assert(!"bad src1 file");
1139 break;
1140 }
1141
1142 if (insn->op != OP_SET) {
1143 switch (insn->op) {
1144 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1145 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1146 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1147 default:
1148 assert(!"invalid set op");
1149 break;
1150 }
1151 emitPRED(0x27, insn->src(2));
1152 } else {
1153 emitPRED(0x27);
1154 }
1155
1156 emitABS (0x36, insn->src(0));
1157 emitNEG (0x35, insn->src(1));
1158 emitField(0x34, 1, insn->dType == TYPE_F32);
1159 emitCond4(0x30, insn->setCond);
1160 emitCC (0x2f);
1161 emitABS (0x2c, insn->src(1));
1162 emitNEG (0x2b, insn->src(0));
1163 emitGPR (0x08, insn->src(0));
1164 emitGPR (0x00, insn->def(0));
1165 }
1166
1167 void
1168 CodeEmitterGM107::emitDSETP()
1169 {
1170 const CmpInstruction *insn = this->insn->asCmp();
1171
1172 switch (insn->src(1).getFile()) {
1173 case FILE_GPR:
1174 emitInsn(0x5b800000);
1175 emitGPR (0x14, insn->src(1));
1176 break;
1177 case FILE_MEMORY_CONST:
1178 emitInsn(0x4b800000);
1179 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1180 break;
1181 case FILE_IMMEDIATE:
1182 emitInsn(0x36800000);
1183 emitIMMD(0x14, 19, insn->src(1));
1184 break;
1185 default:
1186 assert(!"bad src1 file");
1187 break;
1188 }
1189
1190 if (insn->op != OP_SET) {
1191 switch (insn->op) {
1192 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1193 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1194 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1195 default:
1196 assert(!"invalid set op");
1197 break;
1198 }
1199 emitPRED(0x27, insn->src(2));
1200 } else {
1201 emitPRED(0x27);
1202 }
1203
1204 emitCond4(0x30, insn->setCond);
1205 emitABS (0x2c, insn->src(1));
1206 emitNEG (0x2b, insn->src(0));
1207 emitGPR (0x08, insn->src(0));
1208 emitABS (0x07, insn->src(0));
1209 emitNEG (0x06, insn->src(1));
1210 emitPRED (0x03, insn->def(0));
1211 if (insn->defExists(1))
1212 emitPRED(0x00, insn->def(1));
1213 else
1214 emitPRED(0x00);
1215 }
1216
1217 /*******************************************************************************
1218 * float
1219 ******************************************************************************/
1220
1221 void
1222 CodeEmitterGM107::emitFADD()
1223 {
1224 if (!longIMMD(insn->src(1))) {
1225 switch (insn->src(1).getFile()) {
1226 case FILE_GPR:
1227 emitInsn(0x5c580000);
1228 emitGPR (0x14, insn->src(1));
1229 break;
1230 case FILE_MEMORY_CONST:
1231 emitInsn(0x4c580000);
1232 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1233 break;
1234 case FILE_IMMEDIATE:
1235 emitInsn(0x38580000);
1236 emitIMMD(0x14, 19, insn->src(1));
1237 break;
1238 default:
1239 assert(!"bad src1 file");
1240 break;
1241 }
1242 emitSAT(0x32);
1243 emitABS(0x31, insn->src(1));
1244 emitNEG(0x30, insn->src(0));
1245 emitCC (0x2f);
1246 emitABS(0x2e, insn->src(0));
1247 emitNEG(0x2d, insn->src(1));
1248 emitFMZ(0x2c, 1);
1249
1250 if (insn->op == OP_SUB)
1251 code[1] ^= 0x00002000;
1252 } else {
1253 emitInsn(0x08000000);
1254 emitABS(0x39, insn->src(1));
1255 emitNEG(0x38, insn->src(0));
1256 emitFMZ(0x37, 1);
1257 emitABS(0x36, insn->src(0));
1258 emitNEG(0x35, insn->src(1));
1259 emitCC (0x34);
1260 emitIMMD(0x14, 32, insn->src(1));
1261
1262 if (insn->op == OP_SUB)
1263 code[1] ^= 0x00080000;
1264 }
1265
1266 emitGPR(0x08, insn->src(0));
1267 emitGPR(0x00, insn->def(0));
1268 }
1269
1270 void
1271 CodeEmitterGM107::emitFMUL()
1272 {
1273 if (!longIMMD(insn->src(1))) {
1274 switch (insn->src(1).getFile()) {
1275 case FILE_GPR:
1276 emitInsn(0x5c680000);
1277 emitGPR (0x14, insn->src(1));
1278 break;
1279 case FILE_MEMORY_CONST:
1280 emitInsn(0x4c680000);
1281 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1282 break;
1283 case FILE_IMMEDIATE:
1284 emitInsn(0x38680000);
1285 emitIMMD(0x14, 19, insn->src(1));
1286 break;
1287 default:
1288 assert(!"bad src1 file");
1289 break;
1290 }
1291 emitSAT (0x32);
1292 emitNEG2(0x30, insn->src(0), insn->src(1));
1293 emitCC (0x2f);
1294 emitFMZ (0x2c, 2);
1295 emitPDIV(0x29);
1296 emitRND (0x27);
1297 } else {
1298 emitInsn(0x1e000000);
1299 emitSAT (0x37);
1300 emitFMZ (0x35, 2);
1301 emitCC (0x34);
1302 emitIMMD(0x14, 32, insn->src(1));
1303 if (insn->src(0).mod.neg() ^ insn->src(1).mod.neg())
1304 code[1] ^= 0x00080000; /* flip immd sign bit */
1305 }
1306
1307 emitGPR(0x08, insn->src(0));
1308 emitGPR(0x00, insn->def(0));
1309 }
1310
1311 void
1312 CodeEmitterGM107::emitFFMA()
1313 {
1314 /*XXX: ffma32i exists, but not using it as third src overlaps dst */
1315 switch(insn->src(2).getFile()) {
1316 case FILE_GPR:
1317 switch (insn->src(1).getFile()) {
1318 case FILE_GPR:
1319 emitInsn(0x59800000);
1320 emitGPR (0x14, insn->src(1));
1321 break;
1322 case FILE_MEMORY_CONST:
1323 emitInsn(0x49800000);
1324 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1325 break;
1326 case FILE_IMMEDIATE:
1327 emitInsn(0x32800000);
1328 emitIMMD(0x14, 19, insn->src(1));
1329 break;
1330 default:
1331 assert(!"bad src1 file");
1332 break;
1333 }
1334 emitGPR (0x27, insn->src(2));
1335 break;
1336 case FILE_MEMORY_CONST:
1337 emitInsn(0x51800000);
1338 emitGPR (0x27, insn->src(1));
1339 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1340 break;
1341 default:
1342 assert(!"bad src2 file");
1343 break;
1344 }
1345 emitRND (0x33);
1346 emitSAT (0x32);
1347 emitNEG (0x31, insn->src(2));
1348 emitNEG2(0x30, insn->src(0), insn->src(1));
1349 emitCC (0x2f);
1350
1351 emitFMZ(0x35, 2);
1352 emitGPR(0x08, insn->src(0));
1353 emitGPR(0x00, insn->def(0));
1354 }
1355
1356 void
1357 CodeEmitterGM107::emitMUFU()
1358 {
1359 int mufu = 0;
1360
1361 switch (insn->op) {
1362 case OP_COS: mufu = 0; break;
1363 case OP_SIN: mufu = 1; break;
1364 case OP_EX2: mufu = 2; break;
1365 case OP_LG2: mufu = 3; break;
1366 case OP_RCP: mufu = 4 + 2 * insn->subOp; break;
1367 case OP_RSQ: mufu = 5 + 2 * insn->subOp; break;
1368 default:
1369 assert(!"invalid mufu");
1370 break;
1371 }
1372
1373 emitInsn (0x50800000);
1374 emitSAT (0x32);
1375 emitNEG (0x30, insn->src(0));
1376 emitABS (0x2e, insn->src(0));
1377 emitField(0x14, 3, mufu);
1378 emitGPR (0x08, insn->src(0));
1379 emitGPR (0x00, insn->def(0));
1380 }
1381
1382 void
1383 CodeEmitterGM107::emitFMNMX()
1384 {
1385 switch (insn->src(1).getFile()) {
1386 case FILE_GPR:
1387 emitInsn(0x5c600000);
1388 emitGPR (0x14, insn->src(1));
1389 break;
1390 case FILE_MEMORY_CONST:
1391 emitInsn(0x4c600000);
1392 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1393 break;
1394 case FILE_IMMEDIATE:
1395 emitInsn(0x38600000);
1396 emitIMMD(0x14, 19, insn->src(1));
1397 break;
1398 default:
1399 assert(!"bad src1 file");
1400 break;
1401 }
1402
1403 emitField(0x2a, 1, insn->op == OP_MAX);
1404 emitPRED (0x27);
1405
1406 emitABS(0x31, insn->src(1));
1407 emitNEG(0x30, insn->src(0));
1408 emitCC (0x2f);
1409 emitABS(0x2e, insn->src(0));
1410 emitNEG(0x2d, insn->src(1));
1411 emitFMZ(0x2c, 1);
1412 emitGPR(0x08, insn->src(0));
1413 emitGPR(0x00, insn->def(0));
1414 }
1415
1416 void
1417 CodeEmitterGM107::emitRRO()
1418 {
1419 switch (insn->src(0).getFile()) {
1420 case FILE_GPR:
1421 emitInsn(0x5c900000);
1422 emitGPR (0x14, insn->src(0));
1423 break;
1424 case FILE_MEMORY_CONST:
1425 emitInsn(0x4c900000);
1426 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1427 break;
1428 case FILE_IMMEDIATE:
1429 emitInsn(0x38900000);
1430 emitIMMD(0x14, 19, insn->src(0));
1431 break;
1432 default:
1433 assert(!"bad src file");
1434 break;
1435 }
1436
1437 emitABS (0x31, insn->src(0));
1438 emitNEG (0x2d, insn->src(0));
1439 emitField(0x27, 1, insn->op == OP_PREEX2);
1440 emitGPR (0x00, insn->def(0));
1441 }
1442
1443 void
1444 CodeEmitterGM107::emitFCMP()
1445 {
1446 const CmpInstruction *insn = this->insn->asCmp();
1447 CondCode cc = insn->setCond;
1448
1449 if (insn->src(2).mod.neg())
1450 cc = reverseCondCode(cc);
1451
1452 switch(insn->src(2).getFile()) {
1453 case FILE_GPR:
1454 switch (insn->src(1).getFile()) {
1455 case FILE_GPR:
1456 emitInsn(0x5ba00000);
1457 emitGPR (0x14, insn->src(1));
1458 break;
1459 case FILE_MEMORY_CONST:
1460 emitInsn(0x4ba00000);
1461 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1462 break;
1463 case FILE_IMMEDIATE:
1464 emitInsn(0x36a00000);
1465 emitIMMD(0x14, 19, insn->src(1));
1466 break;
1467 default:
1468 assert(!"bad src1 file");
1469 break;
1470 }
1471 emitGPR (0x27, insn->src(2));
1472 break;
1473 case FILE_MEMORY_CONST:
1474 emitInsn(0x53a00000);
1475 emitGPR (0x27, insn->src(1));
1476 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1477 break;
1478 default:
1479 assert(!"bad src2 file");
1480 break;
1481 }
1482
1483 emitCond4(0x30, cc);
1484 emitFMZ (0x2f, 1);
1485 emitGPR (0x08, insn->src(0));
1486 emitGPR (0x00, insn->def(0));
1487 }
1488
1489 void
1490 CodeEmitterGM107::emitFSET()
1491 {
1492 const CmpInstruction *insn = this->insn->asCmp();
1493
1494 switch (insn->src(1).getFile()) {
1495 case FILE_GPR:
1496 emitInsn(0x58000000);
1497 emitGPR (0x14, insn->src(1));
1498 break;
1499 case FILE_MEMORY_CONST:
1500 emitInsn(0x48000000);
1501 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1502 break;
1503 case FILE_IMMEDIATE:
1504 emitInsn(0x30000000);
1505 emitIMMD(0x14, 19, insn->src(1));
1506 break;
1507 default:
1508 assert(!"bad src1 file");
1509 break;
1510 }
1511
1512 if (insn->op != OP_SET) {
1513 switch (insn->op) {
1514 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1515 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1516 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1517 default:
1518 assert(!"invalid set op");
1519 break;
1520 }
1521 emitPRED(0x27, insn->src(2));
1522 } else {
1523 emitPRED(0x27);
1524 }
1525
1526 emitFMZ (0x37, 1);
1527 emitABS (0x36, insn->src(0));
1528 emitNEG (0x35, insn->src(1));
1529 emitField(0x34, 1, insn->dType == TYPE_F32);
1530 emitCond4(0x30, insn->setCond);
1531 emitCC (0x2f);
1532 emitABS (0x2c, insn->src(1));
1533 emitNEG (0x2b, insn->src(0));
1534 emitGPR (0x08, insn->src(0));
1535 emitGPR (0x00, insn->def(0));
1536 }
1537
1538 void
1539 CodeEmitterGM107::emitFSETP()
1540 {
1541 const CmpInstruction *insn = this->insn->asCmp();
1542
1543 switch (insn->src(1).getFile()) {
1544 case FILE_GPR:
1545 emitInsn(0x5bb00000);
1546 emitGPR (0x14, insn->src(1));
1547 break;
1548 case FILE_MEMORY_CONST:
1549 emitInsn(0x4bb00000);
1550 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1551 break;
1552 case FILE_IMMEDIATE:
1553 emitInsn(0x36b00000);
1554 emitIMMD(0x14, 19, insn->src(1));
1555 break;
1556 default:
1557 assert(!"bad src1 file");
1558 break;
1559 }
1560
1561 if (insn->op != OP_SET) {
1562 switch (insn->op) {
1563 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1564 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1565 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1566 default:
1567 assert(!"invalid set op");
1568 break;
1569 }
1570 emitPRED(0x27, insn->src(2));
1571 } else {
1572 emitPRED(0x27);
1573 }
1574
1575 emitCond4(0x30, insn->setCond);
1576 emitFMZ (0x2f, 1);
1577 emitABS (0x2c, insn->src(1));
1578 emitNEG (0x2b, insn->src(0));
1579 emitGPR (0x08, insn->src(0));
1580 emitABS (0x07, insn->src(0));
1581 emitNEG (0x06, insn->src(1));
1582 emitPRED (0x03, insn->def(0));
1583 if (insn->defExists(1))
1584 emitPRED(0x00, insn->def(1));
1585 else
1586 emitPRED(0x00);
1587 }
1588
1589 void
1590 CodeEmitterGM107::emitFSWZADD()
1591 {
1592 emitInsn (0x50f80000);
1593 emitCC (0x2f);
1594 emitFMZ (0x2c, 1);
1595 emitRND (0x27);
1596 emitField(0x26, 1, insn->lanes); /* abused for .ndv */
1597 emitField(0x1c, 8, insn->subOp);
1598 if (insn->predSrc != 1)
1599 emitGPR (0x14, insn->src(1));
1600 else
1601 emitGPR (0x14);
1602 emitGPR (0x08, insn->src(0));
1603 emitGPR (0x00, insn->def(0));
1604 }
1605
1606 /*******************************************************************************
1607 * integer
1608 ******************************************************************************/
1609
1610 void
1611 CodeEmitterGM107::emitLOP()
1612 {
1613 int lop = 0;
1614
1615 switch (insn->op) {
1616 case OP_AND: lop = 0; break;
1617 case OP_OR : lop = 1; break;
1618 case OP_XOR: lop = 2; break;
1619 default:
1620 assert(!"invalid lop");
1621 break;
1622 }
1623
1624 if (insn->src(1).getFile() != FILE_IMMEDIATE) {
1625 switch (insn->src(1).getFile()) {
1626 case FILE_GPR:
1627 emitInsn(0x5c400000);
1628 emitGPR (0x14, insn->src(1));
1629 break;
1630 case FILE_MEMORY_CONST:
1631 emitInsn(0x4c400000);
1632 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1633 break;
1634 case FILE_IMMEDIATE:
1635 emitInsn(0x38400000);
1636 emitIMMD(0x14, 19, insn->src(1));
1637 break;
1638 default:
1639 assert(!"bad src1 file");
1640 break;
1641 }
1642 emitPRED (0x30);
1643 emitCC (0x2f);
1644 emitX (0x2b);
1645 emitField(0x29, 2, lop);
1646 emitINV (0x28, insn->src(1));
1647 emitINV (0x27, insn->src(0));
1648 } else {
1649 emitInsn (0x04000000);
1650 emitX (0x39);
1651 emitINV (0x38, insn->src(1));
1652 emitINV (0x37, insn->src(0));
1653 emitField(0x35, 2, lop);
1654 emitCC (0x34);
1655 emitIMMD (0x14, 32, insn->src(1));
1656 }
1657
1658 emitGPR (0x08, insn->src(0));
1659 emitGPR (0x00, insn->def(0));
1660 }
1661
1662 /* special-case of emitLOP(): lop pass_b dst 0 ~src */
1663 void
1664 CodeEmitterGM107::emitNOT()
1665 {
1666 if (!longIMMD(insn->src(0))) {
1667 switch (insn->src(0).getFile()) {
1668 case FILE_GPR:
1669 emitInsn(0x5c400700);
1670 emitGPR (0x14, insn->src(0));
1671 break;
1672 case FILE_MEMORY_CONST:
1673 emitInsn(0x4c400700);
1674 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1675 break;
1676 case FILE_IMMEDIATE:
1677 emitInsn(0x38400700);
1678 emitIMMD(0x14, 19, insn->src(0));
1679 break;
1680 default:
1681 assert(!"bad src1 file");
1682 break;
1683 }
1684 emitPRED (0x30);
1685 } else {
1686 emitInsn (0x05600000);
1687 emitIMMD (0x14, 32, insn->src(1));
1688 }
1689
1690 emitGPR(0x08);
1691 emitGPR(0x00, insn->def(0));
1692 }
1693
1694 void
1695 CodeEmitterGM107::emitIADD()
1696 {
1697 if (insn->src(1).getFile() != FILE_IMMEDIATE) {
1698 switch (insn->src(1).getFile()) {
1699 case FILE_GPR:
1700 emitInsn(0x5c100000);
1701 emitGPR (0x14, insn->src(1));
1702 break;
1703 case FILE_MEMORY_CONST:
1704 emitInsn(0x4c100000);
1705 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1706 break;
1707 case FILE_IMMEDIATE:
1708 emitInsn(0x38100000);
1709 emitIMMD(0x14, 19, insn->src(1));
1710 break;
1711 default:
1712 assert(!"bad src1 file");
1713 break;
1714 }
1715 emitSAT(0x32);
1716 emitNEG(0x31, insn->src(0));
1717 emitNEG(0x30, insn->src(1));
1718 emitCC (0x2f);
1719 emitX (0x2b);
1720 } else {
1721 emitInsn(0x1c000000);
1722 emitNEG (0x38, insn->src(0));
1723 emitSAT (0x36);
1724 emitX (0x35);
1725 emitCC (0x34);
1726 emitIMMD(0x14, 32, insn->src(1));
1727 }
1728
1729 if (insn->op == OP_SUB)
1730 code[1] ^= 0x00010000;
1731
1732 emitGPR(0x08, insn->src(0));
1733 emitGPR(0x00, insn->def(0));
1734 }
1735
1736 void
1737 CodeEmitterGM107::emitIMUL()
1738 {
1739 if (insn->src(1).getFile() != FILE_IMMEDIATE) {
1740 switch (insn->src(1).getFile()) {
1741 case FILE_GPR:
1742 emitInsn(0x5c380000);
1743 emitGPR (0x14, insn->src(1));
1744 break;
1745 case FILE_MEMORY_CONST:
1746 emitInsn(0x4c380000);
1747 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1748 break;
1749 case FILE_IMMEDIATE:
1750 emitInsn(0x38380000);
1751 emitIMMD(0x14, 19, insn->src(1));
1752 break;
1753 default:
1754 assert(!"bad src1 file");
1755 break;
1756 }
1757 emitCC (0x2f);
1758 emitField(0x29, 1, isSignedType(insn->sType));
1759 emitField(0x28, 1, isSignedType(insn->dType));
1760 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1761 } else {
1762 emitInsn (0x1f000000);
1763 emitField(0x37, 1, isSignedType(insn->sType));
1764 emitField(0x36, 1, isSignedType(insn->dType));
1765 emitField(0x35, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1766 emitCC (0x34);
1767 emitIMMD (0x14, 32, insn->src(1));
1768 }
1769
1770 emitGPR(0x08, insn->src(0));
1771 emitGPR(0x00, insn->def(0));
1772 }
1773
1774 void
1775 CodeEmitterGM107::emitIMAD()
1776 {
1777 /*XXX: imad32i exists, but not using it as third src overlaps dst */
1778 switch(insn->src(2).getFile()) {
1779 case FILE_GPR:
1780 switch (insn->src(1).getFile()) {
1781 case FILE_GPR:
1782 emitInsn(0x5a000000);
1783 emitGPR (0x14, insn->src(1));
1784 break;
1785 case FILE_MEMORY_CONST:
1786 emitInsn(0x4a000000);
1787 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1788 break;
1789 case FILE_IMMEDIATE:
1790 emitInsn(0x34000000);
1791 emitIMMD(0x14, 19, insn->src(1));
1792 break;
1793 default:
1794 assert(!"bad src1 file");
1795 break;
1796 }
1797 emitGPR (0x27, insn->src(2));
1798 break;
1799 case FILE_MEMORY_CONST:
1800 emitInsn(0x52000000);
1801 emitGPR (0x27, insn->src(1));
1802 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1803 break;
1804 default:
1805 assert(!"bad src2 file");
1806 break;
1807 }
1808
1809 emitField(0x36, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1810 emitField(0x35, 1, isSignedType(insn->sType));
1811 emitNEG (0x34, insn->src(2));
1812 emitNEG2 (0x33, insn->src(0), insn->src(1));
1813 emitSAT (0x32);
1814 emitX (0x31);
1815 emitField(0x30, 1, isSignedType(insn->dType));
1816 emitCC (0x2f);
1817 emitGPR (0x08, insn->src(0));
1818 emitGPR (0x00, insn->def(0));
1819 }
1820
1821 void
1822 CodeEmitterGM107::emitISCADD()
1823 {
1824 switch (insn->src(2).getFile()) {
1825 case FILE_GPR:
1826 emitInsn(0x5c180000);
1827 emitGPR (0x14, insn->src(2));
1828 break;
1829 case FILE_MEMORY_CONST:
1830 emitInsn(0x4c180000);
1831 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1832 break;
1833 case FILE_IMMEDIATE:
1834 emitInsn(0x38180000);
1835 emitIMMD(0x14, 19, insn->src(2));
1836 break;
1837 default:
1838 assert(!"bad src1 file");
1839 break;
1840 }
1841 emitNEG (0x31, insn->src(0));
1842 emitNEG (0x30, insn->src(2));
1843 emitCC (0x2f);
1844 emitIMMD(0x27, 5, insn->src(1));
1845 emitGPR (0x08, insn->src(0));
1846 emitGPR (0x00, insn->def(0));
1847 }
1848
1849 void
1850 CodeEmitterGM107::emitIMNMX()
1851 {
1852 switch (insn->src(1).getFile()) {
1853 case FILE_GPR:
1854 emitInsn(0x5c200000);
1855 emitGPR (0x14, insn->src(1));
1856 break;
1857 case FILE_MEMORY_CONST:
1858 emitInsn(0x4c200000);
1859 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1860 break;
1861 case FILE_IMMEDIATE:
1862 emitInsn(0x38200000);
1863 emitIMMD(0x14, 19, insn->src(1));
1864 break;
1865 default:
1866 assert(!"bad src1 file");
1867 break;
1868 }
1869
1870 emitField(0x30, 1, isSignedType(insn->dType));
1871 emitCC (0x2f);
1872 emitField(0x2b, 2, insn->subOp);
1873 emitField(0x2a, 1, insn->op == OP_MAX);
1874 emitPRED (0x27);
1875 emitGPR (0x08, insn->src(0));
1876 emitGPR (0x00, insn->def(0));
1877 }
1878
1879 void
1880 CodeEmitterGM107::emitICMP()
1881 {
1882 const CmpInstruction *insn = this->insn->asCmp();
1883 CondCode cc = insn->setCond;
1884
1885 if (insn->src(2).mod.neg())
1886 cc = reverseCondCode(cc);
1887
1888 switch(insn->src(2).getFile()) {
1889 case FILE_GPR:
1890 switch (insn->src(1).getFile()) {
1891 case FILE_GPR:
1892 emitInsn(0x5b400000);
1893 emitGPR (0x14, insn->src(1));
1894 break;
1895 case FILE_MEMORY_CONST:
1896 emitInsn(0x4b400000);
1897 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1898 break;
1899 case FILE_IMMEDIATE:
1900 emitInsn(0x36400000);
1901 emitIMMD(0x14, 19, insn->src(1));
1902 break;
1903 default:
1904 assert(!"bad src1 file");
1905 break;
1906 }
1907 emitGPR (0x27, insn->src(2));
1908 break;
1909 case FILE_MEMORY_CONST:
1910 emitInsn(0x53400000);
1911 emitGPR (0x27, insn->src(1));
1912 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1913 break;
1914 default:
1915 assert(!"bad src2 file");
1916 break;
1917 }
1918
1919 emitCond3(0x31, cc);
1920 emitField(0x30, 1, isSignedType(insn->sType));
1921 emitGPR (0x08, insn->src(0));
1922 emitGPR (0x00, insn->def(0));
1923 }
1924
1925 void
1926 CodeEmitterGM107::emitISET()
1927 {
1928 const CmpInstruction *insn = this->insn->asCmp();
1929
1930 switch (insn->src(1).getFile()) {
1931 case FILE_GPR:
1932 emitInsn(0x5b500000);
1933 emitGPR (0x14, insn->src(1));
1934 break;
1935 case FILE_MEMORY_CONST:
1936 emitInsn(0x4b500000);
1937 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1938 break;
1939 case FILE_IMMEDIATE:
1940 emitInsn(0x36500000);
1941 emitIMMD(0x14, 19, insn->src(1));
1942 break;
1943 default:
1944 assert(!"bad src1 file");
1945 break;
1946 }
1947
1948 if (insn->op != OP_SET) {
1949 switch (insn->op) {
1950 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1951 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1952 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1953 default:
1954 assert(!"invalid set op");
1955 break;
1956 }
1957 emitPRED(0x27, insn->src(2));
1958 } else {
1959 emitPRED(0x27);
1960 }
1961
1962 emitCond3(0x31, insn->setCond);
1963 emitField(0x30, 1, isSignedType(insn->sType));
1964 emitCC (0x2f);
1965 emitField(0x2c, 1, insn->dType == TYPE_F32);
1966 emitX (0x2b);
1967 emitGPR (0x08, insn->src(0));
1968 emitGPR (0x00, insn->def(0));
1969 }
1970
1971 void
1972 CodeEmitterGM107::emitISETP()
1973 {
1974 const CmpInstruction *insn = this->insn->asCmp();
1975
1976 switch (insn->src(1).getFile()) {
1977 case FILE_GPR:
1978 emitInsn(0x5b600000);
1979 emitGPR (0x14, insn->src(1));
1980 break;
1981 case FILE_MEMORY_CONST:
1982 emitInsn(0x4b600000);
1983 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1984 break;
1985 case FILE_IMMEDIATE:
1986 emitInsn(0x36600000);
1987 emitIMMD(0x14, 19, insn->src(1));
1988 break;
1989 default:
1990 assert(!"bad src1 file");
1991 break;
1992 }
1993
1994 if (insn->op != OP_SET) {
1995 switch (insn->op) {
1996 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1997 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1998 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1999 default:
2000 assert(!"invalid set op");
2001 break;
2002 }
2003 emitPRED(0x27, insn->src(2));
2004 } else {
2005 emitPRED(0x27);
2006 }
2007
2008 emitCond3(0x31, insn->setCond);
2009 emitField(0x30, 1, isSignedType(insn->sType));
2010 emitX (0x2b);
2011 emitGPR (0x08, insn->src(0));
2012 emitPRED (0x03, insn->def(0));
2013 if (insn->defExists(1))
2014 emitPRED(0x00, insn->def(1));
2015 else
2016 emitPRED(0x00);
2017 }
2018
2019 void
2020 CodeEmitterGM107::emitSHL()
2021 {
2022 switch (insn->src(1).getFile()) {
2023 case FILE_GPR:
2024 emitInsn(0x5c480000);
2025 emitGPR (0x14, insn->src(1));
2026 break;
2027 case FILE_MEMORY_CONST:
2028 emitInsn(0x4c480000);
2029 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2030 break;
2031 case FILE_IMMEDIATE:
2032 emitInsn(0x38480000);
2033 emitIMMD(0x14, 19, insn->src(1));
2034 break;
2035 default:
2036 assert(!"bad src1 file");
2037 break;
2038 }
2039
2040 emitCC (0x2f);
2041 emitX (0x2b);
2042 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2043 emitGPR (0x08, insn->src(0));
2044 emitGPR (0x00, insn->def(0));
2045 }
2046
2047 void
2048 CodeEmitterGM107::emitSHR()
2049 {
2050 switch (insn->src(1).getFile()) {
2051 case FILE_GPR:
2052 emitInsn(0x5c280000);
2053 emitGPR (0x14, insn->src(1));
2054 break;
2055 case FILE_MEMORY_CONST:
2056 emitInsn(0x4c280000);
2057 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2058 break;
2059 case FILE_IMMEDIATE:
2060 emitInsn(0x38280000);
2061 emitIMMD(0x14, 19, insn->src(1));
2062 break;
2063 default:
2064 assert(!"bad src1 file");
2065 break;
2066 }
2067
2068 emitField(0x30, 1, isSignedType(insn->dType));
2069 emitCC (0x2f);
2070 emitX (0x2c);
2071 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2072 emitGPR (0x08, insn->src(0));
2073 emitGPR (0x00, insn->def(0));
2074 }
2075
2076 void
2077 CodeEmitterGM107::emitSHF()
2078 {
2079 unsigned type;
2080
2081 switch (insn->src(1).getFile()) {
2082 case FILE_GPR:
2083 emitInsn(insn->op == OP_SHL ? 0x5bf80000 : 0x5cf80000);
2084 emitGPR(0x14, insn->src(1));
2085 break;
2086 case FILE_IMMEDIATE:
2087 emitInsn(insn->op == OP_SHL ? 0x36f80000 : 0x38f80000);
2088 emitIMMD(0x14, 19, insn->src(1));
2089 break;
2090 default:
2091 assert(!"bad src1 file");
2092 break;
2093 }
2094
2095 switch (insn->sType) {
2096 case TYPE_U64:
2097 type = 2;
2098 break;
2099 case TYPE_S64:
2100 type = 3;
2101 break;
2102 default:
2103 type = 0;
2104 break;
2105 }
2106
2107 emitField(0x32, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_WRAP));
2108 emitX (0x31);
2109 emitField(0x30, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_HIGH));
2110 emitCC (0x2f);
2111 emitGPR (0x27, insn->src(2));
2112 emitField(0x25, 2, type);
2113 emitGPR (0x08, insn->src(0));
2114 emitGPR (0x00, insn->def(0));
2115 }
2116
2117 void
2118 CodeEmitterGM107::emitPOPC()
2119 {
2120 switch (insn->src(0).getFile()) {
2121 case FILE_GPR:
2122 emitInsn(0x5c080000);
2123 emitGPR (0x14, insn->src(0));
2124 break;
2125 case FILE_MEMORY_CONST:
2126 emitInsn(0x4c080000);
2127 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2128 break;
2129 case FILE_IMMEDIATE:
2130 emitInsn(0x38080000);
2131 emitIMMD(0x14, 19, insn->src(0));
2132 break;
2133 default:
2134 assert(!"bad src1 file");
2135 break;
2136 }
2137
2138 emitINV(0x28, insn->src(0));
2139 emitGPR(0x00, insn->def(0));
2140 }
2141
2142 void
2143 CodeEmitterGM107::emitBFI()
2144 {
2145 switch(insn->src(2).getFile()) {
2146 case FILE_GPR:
2147 switch (insn->src(1).getFile()) {
2148 case FILE_GPR:
2149 emitInsn(0x5bf00000);
2150 emitGPR (0x14, insn->src(1));
2151 break;
2152 case FILE_MEMORY_CONST:
2153 emitInsn(0x4bf00000);
2154 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2155 break;
2156 case FILE_IMMEDIATE:
2157 emitInsn(0x36f00000);
2158 emitIMMD(0x14, 19, insn->src(1));
2159 break;
2160 default:
2161 assert(!"bad src1 file");
2162 break;
2163 }
2164 emitGPR (0x27, insn->src(2));
2165 break;
2166 case FILE_MEMORY_CONST:
2167 emitInsn(0x53f00000);
2168 emitGPR (0x27, insn->src(1));
2169 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
2170 break;
2171 default:
2172 assert(!"bad src2 file");
2173 break;
2174 }
2175
2176 emitCC (0x2f);
2177 emitGPR (0x08, insn->src(0));
2178 emitGPR (0x00, insn->def(0));
2179 }
2180
2181 void
2182 CodeEmitterGM107::emitBFE()
2183 {
2184 switch (insn->src(1).getFile()) {
2185 case FILE_GPR:
2186 emitInsn(0x5c000000);
2187 emitGPR (0x14, insn->src(1));
2188 break;
2189 case FILE_MEMORY_CONST:
2190 emitInsn(0x4c000000);
2191 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2192 break;
2193 case FILE_IMMEDIATE:
2194 emitInsn(0x38000000);
2195 emitIMMD(0x14, 19, insn->src(1));
2196 break;
2197 default:
2198 assert(!"bad src1 file");
2199 break;
2200 }
2201
2202 emitField(0x30, 1, isSignedType(insn->dType));
2203 emitCC (0x2f);
2204 emitField(0x28, 1, insn->subOp == NV50_IR_SUBOP_EXTBF_REV);
2205 emitGPR (0x08, insn->src(0));
2206 emitGPR (0x00, insn->def(0));
2207 }
2208
2209 void
2210 CodeEmitterGM107::emitFLO()
2211 {
2212 switch (insn->src(0).getFile()) {
2213 case FILE_GPR:
2214 emitInsn(0x5c300000);
2215 emitGPR (0x14, insn->src(0));
2216 break;
2217 case FILE_MEMORY_CONST:
2218 emitInsn(0x4c300000);
2219 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2220 break;
2221 case FILE_IMMEDIATE:
2222 emitInsn(0x38300000);
2223 emitIMMD(0x14, 19, insn->src(0));
2224 break;
2225 default:
2226 assert(!"bad src1 file");
2227 break;
2228 }
2229
2230 emitField(0x30, 1, isSignedType(insn->dType));
2231 emitCC (0x2f);
2232 emitField(0x29, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT);
2233 emitINV (0x28, insn->src(0));
2234 emitGPR (0x00, insn->def(0));
2235 }
2236
2237 /*******************************************************************************
2238 * memory
2239 ******************************************************************************/
2240
2241 void
2242 CodeEmitterGM107::emitLDSTs(int pos, DataType type)
2243 {
2244 int data = 0;
2245
2246 switch (typeSizeof(type)) {
2247 case 1: data = isSignedType(type) ? 1 : 0; break;
2248 case 2: data = isSignedType(type) ? 3 : 2; break;
2249 case 4: data = 4; break;
2250 case 8: data = 5; break;
2251 case 16: data = 6; break;
2252 default:
2253 assert(!"bad type");
2254 break;
2255 }
2256
2257 emitField(pos, 3, data);
2258 }
2259
2260 void
2261 CodeEmitterGM107::emitLDSTc(int pos)
2262 {
2263 int mode = 0;
2264
2265 switch (insn->cache) {
2266 case CACHE_CA: mode = 0; break;
2267 case CACHE_CG: mode = 1; break;
2268 case CACHE_CS: mode = 2; break;
2269 case CACHE_CV: mode = 3; break;
2270 default:
2271 assert(!"invalid caching mode");
2272 break;
2273 }
2274
2275 emitField(pos, 2, mode);
2276 }
2277
2278 void
2279 CodeEmitterGM107::emitLDC()
2280 {
2281 emitInsn (0xef900000);
2282 emitLDSTs(0x30, insn->dType);
2283 emitField(0x2c, 2, insn->subOp);
2284 emitCBUF (0x24, 0x08, 0x14, 16, 0, insn->src(0));
2285 emitGPR (0x00, insn->def(0));
2286 }
2287
2288 void
2289 CodeEmitterGM107::emitLDL()
2290 {
2291 emitInsn (0xef400000);
2292 emitLDSTs(0x30, insn->dType);
2293 emitLDSTc(0x2c);
2294 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2295 emitGPR (0x00, insn->def(0));
2296 }
2297
2298 void
2299 CodeEmitterGM107::emitLDS()
2300 {
2301 emitInsn (0xef480000);
2302 emitLDSTs(0x30, insn->dType);
2303 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2304 emitGPR (0x00, insn->def(0));
2305 }
2306
2307 void
2308 CodeEmitterGM107::emitLD()
2309 {
2310 emitInsn (0x80000000);
2311 emitPRED (0x3a);
2312 emitLDSTc(0x38);
2313 emitLDSTs(0x35, insn->dType);
2314 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2315 emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2316 emitGPR (0x00, insn->def(0));
2317 }
2318
2319 void
2320 CodeEmitterGM107::emitSTL()
2321 {
2322 emitInsn (0xef500000);
2323 emitLDSTs(0x30, insn->dType);
2324 emitLDSTc(0x2c);
2325 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2326 emitGPR (0x00, insn->src(1));
2327 }
2328
2329 void
2330 CodeEmitterGM107::emitSTS()
2331 {
2332 emitInsn (0xef580000);
2333 emitLDSTs(0x30, insn->dType);
2334 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2335 emitGPR (0x00, insn->src(1));
2336 }
2337
2338 void
2339 CodeEmitterGM107::emitST()
2340 {
2341 emitInsn (0xa0000000);
2342 emitPRED (0x3a);
2343 emitLDSTc(0x38);
2344 emitLDSTs(0x35, insn->dType);
2345 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2346 emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2347 emitGPR (0x00, insn->src(1));
2348 }
2349
2350 void
2351 CodeEmitterGM107::emitALD()
2352 {
2353 emitInsn (0xefd80000);
2354 emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2355 emitGPR (0x27, insn->src(0).getIndirect(1));
2356 emitO (0x20);
2357 emitP (0x1f);
2358 emitADDR (0x08, 20, 10, 0, insn->src(0));
2359 emitGPR (0x00, insn->def(0));
2360 }
2361
2362 void
2363 CodeEmitterGM107::emitAST()
2364 {
2365 emitInsn (0xeff00000);
2366 emitField(0x2f, 2, (typeSizeof(insn->dType) / 4) - 1);
2367 emitGPR (0x27, insn->src(0).getIndirect(1));
2368 emitP (0x1f);
2369 emitADDR (0x08, 20, 10, 0, insn->src(0));
2370 emitGPR (0x00, insn->src(1));
2371 }
2372
2373 void
2374 CodeEmitterGM107::emitISBERD()
2375 {
2376 emitInsn(0xefd00000);
2377 emitGPR (0x08, insn->src(0));
2378 emitGPR (0x00, insn->def(0));
2379 }
2380
2381 void
2382 CodeEmitterGM107::emitAL2P()
2383 {
2384 emitInsn (0xefa00000);
2385 emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2386 emitPRED (0x2c);
2387 emitO (0x20);
2388 emitField(0x14, 11, insn->src(0).get()->reg.data.offset);
2389 emitGPR (0x08, insn->src(0).getIndirect(0));
2390 emitGPR (0x00, insn->def(0));
2391 }
2392
2393 static void
2394 interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
2395 {
2396 int ipa = entry->ipa;
2397 int reg = entry->reg;
2398 int loc = entry->loc;
2399
2400 if (data.flatshade &&
2401 (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
2402 ipa = NV50_IR_INTERP_FLAT;
2403 reg = 0xff;
2404 } else if (data.force_persample_interp &&
2405 (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
2406 (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
2407 ipa |= NV50_IR_INTERP_CENTROID;
2408 }
2409 code[loc + 1] &= ~(0xf << 0x14);
2410 code[loc + 1] |= (ipa & 0x3) << 0x16;
2411 code[loc + 1] |= (ipa & 0xc) << (0x14 - 2);
2412 code[loc + 0] &= ~(0xff << 0x14);
2413 code[loc + 0] |= reg << 0x14;
2414 }
2415
2416 void
2417 CodeEmitterGM107::emitIPA()
2418 {
2419 int ipam = 0, ipas = 0;
2420
2421 switch (insn->getInterpMode()) {
2422 case NV50_IR_INTERP_LINEAR : ipam = 0; break;
2423 case NV50_IR_INTERP_PERSPECTIVE: ipam = 1; break;
2424 case NV50_IR_INTERP_FLAT : ipam = 2; break;
2425 case NV50_IR_INTERP_SC : ipam = 3; break;
2426 default:
2427 assert(!"invalid ipa mode");
2428 break;
2429 }
2430
2431 switch (insn->getSampleMode()) {
2432 case NV50_IR_INTERP_DEFAULT : ipas = 0; break;
2433 case NV50_IR_INTERP_CENTROID: ipas = 1; break;
2434 case NV50_IR_INTERP_OFFSET : ipas = 2; break;
2435 default:
2436 assert(!"invalid ipa sample mode");
2437 break;
2438 }
2439
2440 emitInsn (0xe0000000);
2441 emitField(0x36, 2, ipam);
2442 emitField(0x34, 2, ipas);
2443 emitSAT (0x33);
2444 emitField(0x2f, 3, 7);
2445 emitADDR (0x08, 0x1c, 10, 0, insn->src(0));
2446 if ((code[0] & 0x0000ff00) != 0x0000ff00)
2447 code[1] |= 0x00000040; /* .idx */
2448 emitGPR(0x00, insn->def(0));
2449
2450 if (insn->op == OP_PINTERP) {
2451 emitGPR(0x14, insn->src(1));
2452 if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2453 emitGPR(0x27, insn->src(2));
2454 addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, interpApply);
2455 } else {
2456 if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2457 emitGPR(0x27, insn->src(1));
2458 emitGPR(0x14);
2459 addInterp(insn->ipa, 0xff, interpApply);
2460 }
2461
2462 if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET)
2463 emitGPR(0x27);
2464 }
2465
2466 void
2467 CodeEmitterGM107::emitATOM()
2468 {
2469 unsigned dType, subOp;
2470
2471 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2472 switch (insn->dType) {
2473 case TYPE_U32: dType = 0; break;
2474 case TYPE_U64: dType = 1; break;
2475 default: assert(!"unexpected dType"); dType = 0; break;
2476 }
2477 subOp = 15;
2478
2479 emitInsn (0xee000000);
2480 } else {
2481 switch (insn->dType) {
2482 case TYPE_U32: dType = 0; break;
2483 case TYPE_S32: dType = 1; break;
2484 case TYPE_U64: dType = 2; break;
2485 case TYPE_F32: dType = 3; break;
2486 case TYPE_B128: dType = 4; break;
2487 case TYPE_S64: dType = 5; break;
2488 default: assert(!"unexpected dType"); dType = 0; break;
2489 }
2490 if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2491 subOp = 8;
2492 else
2493 subOp = insn->subOp;
2494
2495 emitInsn (0xed000000);
2496 }
2497
2498 emitField(0x34, 4, subOp);
2499 emitField(0x31, 3, dType);
2500 emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2501 emitGPR (0x14, insn->src(1));
2502 emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2503 emitGPR (0x00, insn->def(0));
2504 }
2505
2506 void
2507 CodeEmitterGM107::emitATOMS()
2508 {
2509 unsigned dType, subOp;
2510
2511 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2512 switch (insn->dType) {
2513 case TYPE_U32: dType = 0; break;
2514 case TYPE_U64: dType = 1; break;
2515 default: assert(!"unexpected dType"); dType = 0; break;
2516 }
2517 subOp = 4;
2518
2519 emitInsn (0xee000000);
2520 emitField(0x34, 1, dType);
2521 } else {
2522 switch (insn->dType) {
2523 case TYPE_U32: dType = 0; break;
2524 case TYPE_S32: dType = 1; break;
2525 case TYPE_U64: dType = 2; break;
2526 case TYPE_S64: dType = 3; break;
2527 default: assert(!"unexpected dType"); dType = 0; break;
2528 }
2529
2530 if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2531 subOp = 8;
2532 else
2533 subOp = insn->subOp;
2534
2535 emitInsn (0xec000000);
2536 emitField(0x1c, 3, dType);
2537 }
2538
2539 emitField(0x34, 4, subOp);
2540 emitGPR (0x14, insn->src(1));
2541 emitADDR (0x08, 0x12, 22, 0, insn->src(0));
2542 emitGPR (0x00, insn->def(0));
2543 }
2544
2545 void
2546 CodeEmitterGM107::emitRED()
2547 {
2548 unsigned dType;
2549
2550 switch (insn->dType) {
2551 case TYPE_U32: dType = 0; break;
2552 case TYPE_S32: dType = 1; break;
2553 case TYPE_U64: dType = 2; break;
2554 case TYPE_F32: dType = 3; break;
2555 case TYPE_B128: dType = 4; break;
2556 case TYPE_S64: dType = 5; break;
2557 default: assert(!"unexpected dType"); dType = 0; break;
2558 }
2559
2560 emitInsn (0xebf80000);
2561 emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2562 emitField(0x17, 3, insn->subOp);
2563 emitField(0x14, 3, dType);
2564 emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2565 emitGPR (0x00, insn->src(1));
2566 }
2567
2568 void
2569 CodeEmitterGM107::emitCCTL()
2570 {
2571 unsigned width;
2572 if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2573 emitInsn(0xef600000);
2574 width = 30;
2575 } else {
2576 emitInsn(0xef800000);
2577 width = 22;
2578 }
2579 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2580 emitADDR (0x08, 0x16, width, 2, insn->src(0));
2581 emitField(0x00, 4, insn->subOp);
2582 }
2583
2584 /*******************************************************************************
2585 * surface
2586 ******************************************************************************/
2587
2588 void
2589 CodeEmitterGM107::emitPIXLD()
2590 {
2591 emitInsn (0xefe80000);
2592 emitPRED (0x2d);
2593 emitField(0x1f, 3, insn->subOp);
2594 emitGPR (0x08, insn->src(0));
2595 emitGPR (0x00, insn->def(0));
2596 }
2597
2598 /*******************************************************************************
2599 * texture
2600 ******************************************************************************/
2601
2602 void
2603 CodeEmitterGM107::emitTEXs(int pos)
2604 {
2605 int src1 = insn->predSrc == 1 ? 2 : 1;
2606 if (insn->srcExists(src1))
2607 emitGPR(pos, insn->src(src1));
2608 else
2609 emitGPR(pos);
2610 }
2611
2612 void
2613 CodeEmitterGM107::emitTEX()
2614 {
2615 const TexInstruction *insn = this->insn->asTex();
2616 int lodm = 0;
2617
2618 if (!insn->tex.levelZero) {
2619 switch (insn->op) {
2620 case OP_TEX: lodm = 0; break;
2621 case OP_TXB: lodm = 2; break;
2622 case OP_TXL: lodm = 3; break;
2623 default:
2624 assert(!"invalid tex op");
2625 break;
2626 }
2627 } else {
2628 lodm = 1;
2629 }
2630
2631 if (insn->tex.rIndirectSrc >= 0) {
2632 emitInsn (0xdeb80000);
2633 emitField(0x25, 2, lodm);
2634 emitField(0x24, 1, insn->tex.useOffsets == 1);
2635 } else {
2636 emitInsn (0xc0380000);
2637 emitField(0x37, 2, lodm);
2638 emitField(0x36, 1, insn->tex.useOffsets == 1);
2639 emitField(0x24, 13, insn->tex.r);
2640 }
2641
2642 emitField(0x32, 1, insn->tex.target.isShadow());
2643 emitField(0x31, 1, insn->tex.liveOnly);
2644 emitField(0x23, 1, insn->tex.derivAll);
2645 emitField(0x1f, 4, insn->tex.mask);
2646 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2647 insn->tex.target.getDim() - 1);
2648 emitField(0x1c, 1, insn->tex.target.isArray());
2649 emitTEXs (0x14);
2650 emitGPR (0x08, insn->src(0));
2651 emitGPR (0x00, insn->def(0));
2652 }
2653
2654 void
2655 CodeEmitterGM107::emitTLD()
2656 {
2657 const TexInstruction *insn = this->insn->asTex();
2658
2659 if (insn->tex.rIndirectSrc >= 0) {
2660 emitInsn (0xdd380000);
2661 } else {
2662 emitInsn (0xdc380000);
2663 emitField(0x24, 13, insn->tex.r);
2664 }
2665
2666 emitField(0x37, 1, insn->tex.levelZero == 0);
2667 emitField(0x32, 1, insn->tex.target.isMS());
2668 emitField(0x31, 1, insn->tex.liveOnly);
2669 emitField(0x23, 1, insn->tex.useOffsets == 1);
2670 emitField(0x1f, 4, insn->tex.mask);
2671 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2672 insn->tex.target.getDim() - 1);
2673 emitField(0x1c, 1, insn->tex.target.isArray());
2674 emitTEXs (0x14);
2675 emitGPR (0x08, insn->src(0));
2676 emitGPR (0x00, insn->def(0));
2677 }
2678
2679 void
2680 CodeEmitterGM107::emitTLD4()
2681 {
2682 const TexInstruction *insn = this->insn->asTex();
2683
2684 if (insn->tex.rIndirectSrc >= 0) {
2685 emitInsn (0xdef80000);
2686 emitField(0x26, 2, insn->tex.gatherComp);
2687 emitField(0x25, 2, insn->tex.useOffsets == 4);
2688 emitField(0x24, 2, insn->tex.useOffsets == 1);
2689 } else {
2690 emitInsn (0xc8380000);
2691 emitField(0x38, 2, insn->tex.gatherComp);
2692 emitField(0x37, 2, insn->tex.useOffsets == 4);
2693 emitField(0x36, 2, insn->tex.useOffsets == 1);
2694 emitField(0x24, 13, insn->tex.r);
2695 }
2696
2697 emitField(0x32, 1, insn->tex.target.isShadow());
2698 emitField(0x31, 1, insn->tex.liveOnly);
2699 emitField(0x23, 1, insn->tex.derivAll);
2700 emitField(0x1f, 4, insn->tex.mask);
2701 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2702 insn->tex.target.getDim() - 1);
2703 emitField(0x1c, 1, insn->tex.target.isArray());
2704 emitTEXs (0x14);
2705 emitGPR (0x08, insn->src(0));
2706 emitGPR (0x00, insn->def(0));
2707 }
2708
2709 void
2710 CodeEmitterGM107::emitTXD()
2711 {
2712 const TexInstruction *insn = this->insn->asTex();
2713
2714 if (insn->tex.rIndirectSrc >= 0) {
2715 emitInsn (0xde780000);
2716 } else {
2717 emitInsn (0xde380000);
2718 emitField(0x24, 13, insn->tex.r);
2719 }
2720
2721 emitField(0x31, 1, insn->tex.liveOnly);
2722 emitField(0x23, 1, insn->tex.useOffsets == 1);
2723 emitField(0x1f, 4, insn->tex.mask);
2724 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2725 insn->tex.target.getDim() - 1);
2726 emitField(0x1c, 1, insn->tex.target.isArray());
2727 emitTEXs (0x14);
2728 emitGPR (0x08, insn->src(0));
2729 emitGPR (0x00, insn->def(0));
2730 }
2731
2732 void
2733 CodeEmitterGM107::emitTMML()
2734 {
2735 const TexInstruction *insn = this->insn->asTex();
2736
2737 if (insn->tex.rIndirectSrc >= 0) {
2738 emitInsn (0xdf600000);
2739 } else {
2740 emitInsn (0xdf580000);
2741 emitField(0x24, 13, insn->tex.r);
2742 }
2743
2744 emitField(0x31, 1, insn->tex.liveOnly);
2745 emitField(0x23, 1, insn->tex.derivAll);
2746 emitField(0x1f, 4, insn->tex.mask);
2747 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2748 insn->tex.target.getDim() - 1);
2749 emitField(0x1c, 1, insn->tex.target.isArray());
2750 emitTEXs (0x14);
2751 emitGPR (0x08, insn->src(0));
2752 emitGPR (0x00, insn->def(0));
2753 }
2754
2755 void
2756 CodeEmitterGM107::emitTXQ()
2757 {
2758 const TexInstruction *insn = this->insn->asTex();
2759 int type = 0;
2760
2761 switch (insn->tex.query) {
2762 case TXQ_DIMS : type = 0x01; break;
2763 case TXQ_TYPE : type = 0x02; break;
2764 case TXQ_SAMPLE_POSITION: type = 0x05; break;
2765 case TXQ_FILTER : type = 0x10; break;
2766 case TXQ_LOD : type = 0x12; break;
2767 case TXQ_WRAP : type = 0x14; break;
2768 case TXQ_BORDER_COLOUR : type = 0x16; break;
2769 default:
2770 assert(!"invalid txq query");
2771 break;
2772 }
2773
2774 if (insn->tex.rIndirectSrc >= 0) {
2775 emitInsn (0xdf500000);
2776 } else {
2777 emitInsn (0xdf480000);
2778 emitField(0x24, 13, insn->tex.r);
2779 }
2780
2781 emitField(0x31, 1, insn->tex.liveOnly);
2782 emitField(0x1f, 4, insn->tex.mask);
2783 emitField(0x16, 6, type);
2784 emitGPR (0x08, insn->src(0));
2785 emitGPR (0x00, insn->def(0));
2786 }
2787
2788 void
2789 CodeEmitterGM107::emitDEPBAR()
2790 {
2791 emitInsn (0xf0f00000);
2792 emitField(0x1d, 1, 1); /* le */
2793 emitField(0x1a, 3, 5);
2794 emitField(0x14, 6, insn->subOp);
2795 emitField(0x00, 6, insn->subOp);
2796 }
2797
2798 /*******************************************************************************
2799 * misc
2800 ******************************************************************************/
2801
2802 void
2803 CodeEmitterGM107::emitNOP()
2804 {
2805 emitInsn(0x50b00000);
2806 }
2807
2808 void
2809 CodeEmitterGM107::emitKIL()
2810 {
2811 emitInsn (0xe3300000);
2812 emitCond5(0x00, CC_TR);
2813 }
2814
2815 void
2816 CodeEmitterGM107::emitOUT()
2817 {
2818 const int cut = insn->op == OP_RESTART || insn->subOp;
2819 const int emit = insn->op == OP_EMIT;
2820
2821 switch (insn->src(1).getFile()) {
2822 case FILE_GPR:
2823 emitInsn(0xfbe00000);
2824 emitGPR (0x14, insn->src(1));
2825 break;
2826 case FILE_IMMEDIATE:
2827 emitInsn(0xf6e00000);
2828 emitIMMD(0x14, 19, insn->src(1));
2829 break;
2830 case FILE_MEMORY_CONST:
2831 emitInsn(0xebe00000);
2832 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2833 break;
2834 default:
2835 assert(!"bad src1 file");
2836 break;
2837 }
2838
2839 emitField(0x27, 2, (cut << 1) | emit);
2840 emitGPR (0x08, insn->src(0));
2841 emitGPR (0x00, insn->def(0));
2842 }
2843
2844 void
2845 CodeEmitterGM107::emitBAR()
2846 {
2847 uint8_t subop;
2848
2849 emitInsn (0xf0a80000);
2850
2851 switch (insn->subOp) {
2852 case NV50_IR_SUBOP_BAR_RED_POPC: subop = 0x02; break;
2853 case NV50_IR_SUBOP_BAR_RED_AND: subop = 0x0a; break;
2854 case NV50_IR_SUBOP_BAR_RED_OR: subop = 0x12; break;
2855 case NV50_IR_SUBOP_BAR_ARRIVE: subop = 0x81; break;
2856 default:
2857 subop = 0x80;
2858 assert(insn->subOp == NV50_IR_SUBOP_BAR_SYNC);
2859 break;
2860 }
2861
2862 emitField(0x20, 8, subop);
2863
2864 // barrier id
2865 if (insn->src(0).getFile() == FILE_GPR) {
2866 emitGPR(0x08, insn->src(0));
2867 } else {
2868 ImmediateValue *imm = insn->getSrc(0)->asImm();
2869 assert(imm);
2870 emitField(0x08, 8, imm->reg.data.u32);
2871 emitField(0x2b, 1, 1);
2872 }
2873
2874 // thread count
2875 if (insn->src(1).getFile() == FILE_GPR) {
2876 emitGPR(0x14, insn->src(1));
2877 } else {
2878 ImmediateValue *imm = insn->getSrc(0)->asImm();
2879 assert(imm);
2880 emitField(0x14, 12, imm->reg.data.u32);
2881 emitField(0x2c, 1, 1);
2882 }
2883
2884 if (insn->srcExists(2) && (insn->predSrc != 2)) {
2885 emitPRED (0x27, insn->src(2));
2886 emitField(0x2a, 1, insn->src(2).mod == Modifier(NV50_IR_MOD_NOT));
2887 } else {
2888 emitField(0x27, 3, 7);
2889 }
2890 }
2891
2892 void
2893 CodeEmitterGM107::emitMEMBAR()
2894 {
2895 emitInsn (0xef980000);
2896 emitField(0x08, 2, insn->subOp >> 2);
2897 }
2898
2899 void
2900 CodeEmitterGM107::emitVOTE()
2901 {
2902 assert(insn->src(0).getFile() == FILE_PREDICATE);
2903
2904 int r = -1, p = -1;
2905 for (int i = 0; insn->defExists(i); i++) {
2906 if (insn->def(i).getFile() == FILE_GPR)
2907 r = i;
2908 else if (insn->def(i).getFile() == FILE_PREDICATE)
2909 p = i;
2910 }
2911
2912 emitInsn (0x50d80000);
2913 emitField(0x30, 2, insn->subOp);
2914 if (r >= 0)
2915 emitGPR (0x00, insn->def(r));
2916 else
2917 emitGPR (0x00);
2918 if (p >= 0)
2919 emitPRED (0x2d, insn->def(p));
2920 else
2921 emitPRED (0x2d);
2922 emitField(0x2a, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT));
2923 emitPRED (0x27, insn->src(0));
2924 }
2925
2926 void
2927 CodeEmitterGM107::emitSUTarget()
2928 {
2929 const TexInstruction *insn = this->insn->asTex();
2930 int target = 0;
2931
2932 assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
2933
2934 if (insn->tex.target == TEX_TARGET_BUFFER) {
2935 target = 2;
2936 } else if (insn->tex.target == TEX_TARGET_1D_ARRAY) {
2937 target = 4;
2938 } else if (insn->tex.target == TEX_TARGET_2D ||
2939 insn->tex.target == TEX_TARGET_RECT) {
2940 target = 6;
2941 } else if (insn->tex.target == TEX_TARGET_2D_ARRAY ||
2942 insn->tex.target == TEX_TARGET_CUBE ||
2943 insn->tex.target == TEX_TARGET_CUBE_ARRAY) {
2944 target = 8;
2945 } else if (insn->tex.target == TEX_TARGET_3D) {
2946 target = 10;
2947 } else {
2948 assert(insn->tex.target == TEX_TARGET_1D);
2949 }
2950 emitField(0x20, 4, target);
2951 }
2952
2953 void
2954 CodeEmitterGM107::emitSUHandle(const int s)
2955 {
2956 const TexInstruction *insn = this->insn->asTex();
2957
2958 assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
2959
2960 if (insn->src(s).getFile() == FILE_GPR) {
2961 emitGPR(0x27, insn->src(s));
2962 } else {
2963 ImmediateValue *imm = insn->getSrc(s)->asImm();
2964 assert(imm);
2965 emitField(0x33, 1, 1);
2966 emitField(0x24, 13, imm->reg.data.u32);
2967 }
2968 }
2969
2970 void
2971 CodeEmitterGM107::emitSUSTx()
2972 {
2973 const TexInstruction *insn = this->insn->asTex();
2974
2975 emitInsn(0xeb200000);
2976 if (insn->op == OP_SUSTB)
2977 emitField(0x34, 1, 1);
2978 emitSUTarget();
2979
2980 emitLDSTc(0x18);
2981 emitField(0x14, 4, 0xf); // rgba
2982 emitGPR (0x08, insn->src(0));
2983 emitGPR (0x00, insn->src(1));
2984
2985 emitSUHandle(2);
2986 }
2987
2988 void
2989 CodeEmitterGM107::emitSULDx()
2990 {
2991 const TexInstruction *insn = this->insn->asTex();
2992 int type = 0;
2993
2994 emitInsn(0xeb000000);
2995 if (insn->op == OP_SULDB)
2996 emitField(0x34, 1, 1);
2997 emitSUTarget();
2998
2999 switch (insn->dType) {
3000 case TYPE_S8: type = 1; break;
3001 case TYPE_U16: type = 2; break;
3002 case TYPE_S16: type = 3; break;
3003 case TYPE_U32: type = 4; break;
3004 case TYPE_U64: type = 5; break;
3005 case TYPE_B128: type = 6; break;
3006 default:
3007 assert(insn->dType == TYPE_U8);
3008 break;
3009 }
3010 emitLDSTc(0x18);
3011 emitField(0x14, 3, type);
3012 emitGPR (0x00, insn->def(0));
3013 emitGPR (0x08, insn->src(0));
3014
3015 emitSUHandle(1);
3016 }
3017
3018 void
3019 CodeEmitterGM107::emitSUREDx()
3020 {
3021 const TexInstruction *insn = this->insn->asTex();
3022 uint8_t type = 0, subOp;
3023
3024 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS)
3025 emitInsn(0xeac00000);
3026 else
3027 emitInsn(0xea600000);
3028
3029 if (insn->op == OP_SUREDB)
3030 emitField(0x34, 1, 1);
3031 emitSUTarget();
3032
3033 // destination type
3034 switch (insn->dType) {
3035 case TYPE_S32: type = 1; break;
3036 case TYPE_U64: type = 2; break;
3037 case TYPE_F32: type = 3; break;
3038 case TYPE_S64: type = 5; break;
3039 default:
3040 assert(insn->dType == TYPE_U32);
3041 break;
3042 }
3043
3044 // atomic operation
3045 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
3046 subOp = 0;
3047 } else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
3048 subOp = 8;
3049 } else {
3050 subOp = insn->subOp;
3051 }
3052
3053 emitField(0x24, 3, type);
3054 emitField(0x1d, 4, subOp);
3055 emitGPR (0x14, insn->src(1));
3056 emitGPR (0x08, insn->src(0));
3057 emitGPR (0x00, insn->def(0));
3058
3059 emitSUHandle(2);
3060 }
3061
3062 /*******************************************************************************
3063 * assembler front-end
3064 ******************************************************************************/
3065
3066 bool
3067 CodeEmitterGM107::emitInstruction(Instruction *i)
3068 {
3069 const unsigned int size = (writeIssueDelays && !(codeSize & 0x1f)) ? 16 : 8;
3070 bool ret = true;
3071
3072 insn = i;
3073
3074 if (insn->encSize != 8) {
3075 ERROR("skipping undecodable instruction: "); insn->print();
3076 return false;
3077 } else
3078 if (codeSize + size > codeSizeLimit) {
3079 ERROR("code emitter output buffer too small\n");
3080 return false;
3081 }
3082
3083 if (writeIssueDelays) {
3084 int n = ((codeSize & 0x1f) / 8) - 1;
3085 if (n < 0) {
3086 data = code;
3087 data[0] = 0x00000000;
3088 data[1] = 0x00000000;
3089 code += 2;
3090 codeSize += 8;
3091 n++;
3092 }
3093
3094 emitField(data, n * 21, 21, insn->sched);
3095 }
3096
3097 switch (insn->op) {
3098 case OP_EXIT:
3099 emitEXIT();
3100 break;
3101 case OP_BRA:
3102 emitBRA();
3103 break;
3104 case OP_CALL:
3105 emitCAL();
3106 break;
3107 case OP_PRECONT:
3108 emitPCNT();
3109 break;
3110 case OP_CONT:
3111 emitCONT();
3112 break;
3113 case OP_PREBREAK:
3114 emitPBK();
3115 break;
3116 case OP_BREAK:
3117 emitBRK();
3118 break;
3119 case OP_PRERET:
3120 emitPRET();
3121 break;
3122 case OP_RET:
3123 emitRET();
3124 break;
3125 case OP_JOINAT:
3126 emitSSY();
3127 break;
3128 case OP_JOIN:
3129 emitSYNC();
3130 break;
3131 case OP_QUADON:
3132 emitSAM();
3133 break;
3134 case OP_QUADPOP:
3135 emitRAM();
3136 break;
3137 case OP_MOV:
3138 emitMOV();
3139 break;
3140 case OP_RDSV:
3141 emitS2R();
3142 break;
3143 case OP_ABS:
3144 case OP_NEG:
3145 case OP_SAT:
3146 case OP_FLOOR:
3147 case OP_CEIL:
3148 case OP_TRUNC:
3149 case OP_CVT:
3150 if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
3151 insn->src(0).getFile() == FILE_PREDICATE)) {
3152 emitMOV();
3153 } else if (isFloatType(insn->dType)) {
3154 if (isFloatType(insn->sType))
3155 emitF2F();
3156 else
3157 emitI2F();
3158 } else {
3159 if (isFloatType(insn->sType))
3160 emitF2I();
3161 else
3162 emitI2I();
3163 }
3164 break;
3165 case OP_SHFL:
3166 emitSHFL();
3167 break;
3168 case OP_ADD:
3169 case OP_SUB:
3170 if (isFloatType(insn->dType)) {
3171 if (insn->dType == TYPE_F64)
3172 emitDADD();
3173 else
3174 emitFADD();
3175 } else {
3176 emitIADD();
3177 }
3178 break;
3179 case OP_MUL:
3180 if (isFloatType(insn->dType)) {
3181 if (insn->dType == TYPE_F64)
3182 emitDMUL();
3183 else
3184 emitFMUL();
3185 } else {
3186 emitIMUL();
3187 }
3188 break;
3189 case OP_MAD:
3190 case OP_FMA:
3191 if (isFloatType(insn->dType)) {
3192 if (insn->dType == TYPE_F64)
3193 emitDFMA();
3194 else
3195 emitFFMA();
3196 } else {
3197 emitIMAD();
3198 }
3199 break;
3200 case OP_SHLADD:
3201 emitISCADD();
3202 break;
3203 case OP_MIN:
3204 case OP_MAX:
3205 if (isFloatType(insn->dType)) {
3206 if (insn->dType == TYPE_F64)
3207 emitDMNMX();
3208 else
3209 emitFMNMX();
3210 } else {
3211 emitIMNMX();
3212 }
3213 break;
3214 case OP_SHL:
3215 if (typeSizeof(insn->sType) == 8)
3216 emitSHF();
3217 else
3218 emitSHL();
3219 break;
3220 case OP_SHR:
3221 if (typeSizeof(insn->sType) == 8)
3222 emitSHF();
3223 else
3224 emitSHR();
3225 break;
3226 case OP_POPCNT:
3227 emitPOPC();
3228 break;
3229 case OP_INSBF:
3230 emitBFI();
3231 break;
3232 case OP_EXTBF:
3233 emitBFE();
3234 break;
3235 case OP_BFIND:
3236 emitFLO();
3237 break;
3238 case OP_SLCT:
3239 if (isFloatType(insn->dType))
3240 emitFCMP();
3241 else
3242 emitICMP();
3243 break;
3244 case OP_SET:
3245 case OP_SET_AND:
3246 case OP_SET_OR:
3247 case OP_SET_XOR:
3248 if (insn->def(0).getFile() != FILE_PREDICATE) {
3249 if (isFloatType(insn->sType))
3250 if (insn->sType == TYPE_F64)
3251 emitDSET();
3252 else
3253 emitFSET();
3254 else
3255 emitISET();
3256 } else {
3257 if (isFloatType(insn->sType))
3258 if (insn->sType == TYPE_F64)
3259 emitDSETP();
3260 else
3261 emitFSETP();
3262 else
3263 emitISETP();
3264 }
3265 break;
3266 case OP_SELP:
3267 emitSEL();
3268 break;
3269 case OP_PRESIN:
3270 case OP_PREEX2:
3271 emitRRO();
3272 break;
3273 case OP_COS:
3274 case OP_SIN:
3275 case OP_EX2:
3276 case OP_LG2:
3277 case OP_RCP:
3278 case OP_RSQ:
3279 emitMUFU();
3280 break;
3281 case OP_AND:
3282 case OP_OR:
3283 case OP_XOR:
3284 emitLOP();
3285 break;
3286 case OP_NOT:
3287 emitNOT();
3288 break;
3289 case OP_LOAD:
3290 switch (insn->src(0).getFile()) {
3291 case FILE_MEMORY_CONST : emitLDC(); break;
3292 case FILE_MEMORY_LOCAL : emitLDL(); break;
3293 case FILE_MEMORY_SHARED: emitLDS(); break;
3294 case FILE_MEMORY_GLOBAL: emitLD(); break;
3295 default:
3296 assert(!"invalid load");
3297 emitNOP();
3298 break;
3299 }
3300 break;
3301 case OP_STORE:
3302 switch (insn->src(0).getFile()) {
3303 case FILE_MEMORY_LOCAL : emitSTL(); break;
3304 case FILE_MEMORY_SHARED: emitSTS(); break;
3305 case FILE_MEMORY_GLOBAL: emitST(); break;
3306 default:
3307 assert(!"invalid store");
3308 emitNOP();
3309 break;
3310 }
3311 break;
3312 case OP_ATOM:
3313 if (insn->src(0).getFile() == FILE_MEMORY_SHARED)
3314 emitATOMS();
3315 else
3316 if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS)
3317 emitRED();
3318 else
3319 emitATOM();
3320 break;
3321 case OP_CCTL:
3322 emitCCTL();
3323 break;
3324 case OP_VFETCH:
3325 emitALD();
3326 break;
3327 case OP_EXPORT:
3328 emitAST();
3329 break;
3330 case OP_PFETCH:
3331 emitISBERD();
3332 break;
3333 case OP_AFETCH:
3334 emitAL2P();
3335 break;
3336 case OP_LINTERP:
3337 case OP_PINTERP:
3338 emitIPA();
3339 break;
3340 case OP_PIXLD:
3341 emitPIXLD();
3342 break;
3343 case OP_TEX:
3344 case OP_TXB:
3345 case OP_TXL:
3346 emitTEX();
3347 break;
3348 case OP_TXF:
3349 emitTLD();
3350 break;
3351 case OP_TXG:
3352 emitTLD4();
3353 break;
3354 case OP_TXD:
3355 emitTXD();
3356 break;
3357 case OP_TXQ:
3358 emitTXQ();
3359 break;
3360 case OP_TXLQ:
3361 emitTMML();
3362 break;
3363 case OP_TEXBAR:
3364 emitDEPBAR();
3365 break;
3366 case OP_QUADOP:
3367 emitFSWZADD();
3368 break;
3369 case OP_NOP:
3370 emitNOP();
3371 break;
3372 case OP_DISCARD:
3373 emitKIL();
3374 break;
3375 case OP_EMIT:
3376 case OP_RESTART:
3377 emitOUT();
3378 break;
3379 case OP_BAR:
3380 emitBAR();
3381 break;
3382 case OP_MEMBAR:
3383 emitMEMBAR();
3384 break;
3385 case OP_VOTE:
3386 emitVOTE();
3387 break;
3388 case OP_SUSTB:
3389 case OP_SUSTP:
3390 emitSUSTx();
3391 break;
3392 case OP_SULDB:
3393 case OP_SULDP:
3394 emitSULDx();
3395 break;
3396 case OP_SUREDB:
3397 case OP_SUREDP:
3398 emitSUREDx();
3399 break;
3400 default:
3401 assert(!"invalid opcode");
3402 emitNOP();
3403 ret = false;
3404 break;
3405 }
3406
3407 if (insn->join) {
3408 /*XXX*/
3409 }
3410
3411 code += 2;
3412 codeSize += 8;
3413 return ret;
3414 }
3415
3416 uint32_t
3417 CodeEmitterGM107::getMinEncodingSize(const Instruction *i) const
3418 {
3419 return 8;
3420 }
3421
3422 /*******************************************************************************
3423 * sched data calculator
3424 ******************************************************************************/
3425
3426 class SchedDataCalculatorGM107 : public Pass
3427 {
3428 public:
3429 SchedDataCalculatorGM107(const TargetGM107 *targ) : targ(targ) {}
3430
3431 private:
3432 struct RegScores
3433 {
3434 struct ScoreData {
3435 int r[256];
3436 int p[8];
3437 int c;
3438 } rd, wr;
3439 int base;
3440
3441 void rebase(const int base)
3442 {
3443 const int delta = this->base - base;
3444 if (!delta)
3445 return;
3446 this->base = 0;
3447
3448 for (int i = 0; i < 256; ++i) {
3449 rd.r[i] += delta;
3450 wr.r[i] += delta;
3451 }
3452 for (int i = 0; i < 8; ++i) {
3453 rd.p[i] += delta;
3454 wr.p[i] += delta;
3455 }
3456 rd.c += delta;
3457 wr.c += delta;
3458 }
3459 void wipe()
3460 {
3461 memset(&rd, 0, sizeof(rd));
3462 memset(&wr, 0, sizeof(wr));
3463 }
3464 int getLatest(const ScoreData& d) const
3465 {
3466 int max = 0;
3467 for (int i = 0; i < 256; ++i)
3468 if (d.r[i] > max)
3469 max = d.r[i];
3470 for (int i = 0; i < 8; ++i)
3471 if (d.p[i] > max)
3472 max = d.p[i];
3473 if (d.c > max)
3474 max = d.c;
3475 return max;
3476 }
3477 inline int getLatestRd() const
3478 {
3479 return getLatest(rd);
3480 }
3481 inline int getLatestWr() const
3482 {
3483 return getLatest(wr);
3484 }
3485 inline int getLatest() const
3486 {
3487 return MAX2(getLatestRd(), getLatestWr());
3488 }
3489 void setMax(const RegScores *that)
3490 {
3491 for (int i = 0; i < 256; ++i) {
3492 rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
3493 wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
3494 }
3495 for (int i = 0; i < 8; ++i) {
3496 rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
3497 wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
3498 }
3499 rd.c = MAX2(rd.c, that->rd.c);
3500 wr.c = MAX2(wr.c, that->wr.c);
3501 }
3502 void print(int cycle)
3503 {
3504 for (int i = 0; i < 256; ++i) {
3505 if (rd.r[i] > cycle)
3506 INFO("rd $r%i @ %i\n", i, rd.r[i]);
3507 if (wr.r[i] > cycle)
3508 INFO("wr $r%i @ %i\n", i, wr.r[i]);
3509 }
3510 for (int i = 0; i < 8; ++i) {
3511 if (rd.p[i] > cycle)
3512 INFO("rd $p%i @ %i\n", i, rd.p[i]);
3513 if (wr.p[i] > cycle)
3514 INFO("wr $p%i @ %i\n", i, wr.p[i]);
3515 }
3516 if (rd.c > cycle)
3517 INFO("rd $c @ %i\n", rd.c);
3518 if (wr.c > cycle)
3519 INFO("wr $c @ %i\n", wr.c);
3520 }
3521 };
3522
3523 RegScores *score; // for current BB
3524 std::vector<RegScores> scoreBoards;
3525
3526 const TargetGM107 *targ;
3527 bool visit(Function *);
3528 bool visit(BasicBlock *);
3529
3530 void commitInsn(const Instruction *, int);
3531 int calcDelay(const Instruction *, int) const;
3532 void setDelay(Instruction *, int, const Instruction *);
3533 void recordWr(const Value *, int, int);
3534 void checkRd(const Value *, int, int&) const;
3535
3536 inline void emitYield(Instruction *);
3537 inline void emitStall(Instruction *, uint8_t);
3538 inline void emitReuse(Instruction *, uint8_t);
3539 inline void emitWrDepBar(Instruction *, uint8_t);
3540 inline void emitRdDepBar(Instruction *, uint8_t);
3541 inline void emitWtDepBar(Instruction *, uint8_t);
3542
3543 inline int getStall(const Instruction *) const;
3544 inline int getWrDepBar(const Instruction *) const;
3545 inline int getRdDepBar(const Instruction *) const;
3546 inline int getWtDepBar(const Instruction *) const;
3547
3548 void setReuseFlag(Instruction *);
3549
3550 inline void printSchedInfo(int, const Instruction *) const;
3551
3552 struct LiveBarUse {
3553 LiveBarUse(Instruction *insn, Instruction *usei)
3554 : insn(insn), usei(usei) { }
3555 Instruction *insn;
3556 Instruction *usei;
3557 };
3558
3559 struct LiveBarDef {
3560 LiveBarDef(Instruction *insn, Instruction *defi)
3561 : insn(insn), defi(defi) { }
3562 Instruction *insn;
3563 Instruction *defi;
3564 };
3565
3566 bool insertBarriers(BasicBlock *);
3567
3568 Instruction *findFirstUse(const Instruction *) const;
3569 Instruction *findFirstDef(const Instruction *) const;
3570
3571 bool needRdDepBar(const Instruction *) const;
3572 bool needWrDepBar(const Instruction *) const;
3573 };
3574
3575 inline void
3576 SchedDataCalculatorGM107::emitStall(Instruction *insn, uint8_t cnt)
3577 {
3578 assert(cnt < 16);
3579 insn->sched |= cnt;
3580 }
3581
3582 inline void
3583 SchedDataCalculatorGM107::emitYield(Instruction *insn)
3584 {
3585 insn->sched |= 1 << 4;
3586 }
3587
3588 inline void
3589 SchedDataCalculatorGM107::emitWrDepBar(Instruction *insn, uint8_t id)
3590 {
3591 assert(id < 6);
3592 if ((insn->sched & 0xe0) == 0xe0)
3593 insn->sched ^= 0xe0;
3594 insn->sched |= id << 5;
3595 }
3596
3597 inline void
3598 SchedDataCalculatorGM107::emitRdDepBar(Instruction *insn, uint8_t id)
3599 {
3600 assert(id < 6);
3601 if ((insn->sched & 0x700) == 0x700)
3602 insn->sched ^= 0x700;
3603 insn->sched |= id << 8;
3604 }
3605
3606 inline void
3607 SchedDataCalculatorGM107::emitWtDepBar(Instruction *insn, uint8_t id)
3608 {
3609 assert(id < 6);
3610 insn->sched |= 1 << (11 + id);
3611 }
3612
3613 inline void
3614 SchedDataCalculatorGM107::emitReuse(Instruction *insn, uint8_t id)
3615 {
3616 assert(id < 4);
3617 insn->sched |= 1 << (17 + id);
3618 }
3619
3620 inline void
3621 SchedDataCalculatorGM107::printSchedInfo(int cycle,
3622 const Instruction *insn) const
3623 {
3624 uint8_t st, yl, wr, rd, wt, ru;
3625
3626 st = (insn->sched & 0x00000f) >> 0;
3627 yl = (insn->sched & 0x000010) >> 4;
3628 wr = (insn->sched & 0x0000e0) >> 5;
3629 rd = (insn->sched & 0x000700) >> 8;
3630 wt = (insn->sched & 0x01f800) >> 11;
3631 ru = (insn->sched & 0x1e0000) >> 17;
3632
3633 INFO("cycle %i, (st 0x%x, yl 0x%x, wr 0x%x, rd 0x%x, wt 0x%x, ru 0x%x)\n",
3634 cycle, st, yl, wr, rd, wt, ru);
3635 }
3636
3637 inline int
3638 SchedDataCalculatorGM107::getStall(const Instruction *insn) const
3639 {
3640 return insn->sched & 0xf;
3641 }
3642
3643 inline int
3644 SchedDataCalculatorGM107::getWrDepBar(const Instruction *insn) const
3645 {
3646 return (insn->sched & 0x0000e0) >> 5;
3647 }
3648
3649 inline int
3650 SchedDataCalculatorGM107::getRdDepBar(const Instruction *insn) const
3651 {
3652 return (insn->sched & 0x000700) >> 8;
3653 }
3654
3655 inline int
3656 SchedDataCalculatorGM107::getWtDepBar(const Instruction *insn) const
3657 {
3658 return (insn->sched & 0x01f800) >> 11;
3659 }
3660
3661 // Emit the reuse flag which allows to make use of the new memory hierarchy
3662 // introduced since Maxwell, the operand reuse cache.
3663 //
3664 // It allows to reduce bank conflicts by caching operands. Each time you issue
3665 // an instruction, that flag can tell the hw which operands are going to be
3666 // re-used by the next instruction. Note that the next instruction has to use
3667 // the same GPR id in the same operand slot.
3668 void
3669 SchedDataCalculatorGM107::setReuseFlag(Instruction *insn)
3670 {
3671 Instruction *next = insn->next;
3672 BitSet defs(255, 1);
3673
3674 if (!targ->isReuseSupported(insn))
3675 return;
3676
3677 for (int d = 0; insn->defExists(d); ++d) {
3678 const Value *def = insn->def(d).rep();
3679 if (insn->def(d).getFile() != FILE_GPR)
3680 continue;
3681 if (typeSizeof(insn->dType) != 4 || def->reg.data.id == 255)
3682 continue;
3683 defs.set(def->reg.data.id);
3684 }
3685
3686 for (int s = 0; insn->srcExists(s); s++) {
3687 const Value *src = insn->src(s).rep();
3688 if (insn->src(s).getFile() != FILE_GPR)
3689 continue;
3690 if (typeSizeof(insn->sType) != 4 || src->reg.data.id == 255)
3691 continue;
3692 if (defs.test(src->reg.data.id))
3693 continue;
3694 if (!next->srcExists(s) || next->src(s).getFile() != FILE_GPR)
3695 continue;
3696 if (src->reg.data.id != next->getSrc(s)->reg.data.id)
3697 continue;
3698 assert(s < 4);
3699 emitReuse(insn, s);
3700 }
3701 }
3702
3703 void
3704 SchedDataCalculatorGM107::recordWr(const Value *v, int cycle, int ready)
3705 {
3706 int a = v->reg.data.id, b;
3707
3708 switch (v->reg.file) {
3709 case FILE_GPR:
3710 b = a + v->reg.size / 4;
3711 for (int r = a; r < b; ++r)
3712 score->rd.r[r] = ready;
3713 break;
3714 case FILE_PREDICATE:
3715 // To immediately use a predicate set by any instructions, the minimum
3716 // number of stall counts is 13.
3717 score->rd.p[a] = cycle + 13;
3718 break;
3719 case FILE_FLAGS:
3720 score->rd.c = ready;
3721 break;
3722 default:
3723 break;
3724 }
3725 }
3726
3727 void
3728 SchedDataCalculatorGM107::checkRd(const Value *v, int cycle, int &delay) const
3729 {
3730 int a = v->reg.data.id, b;
3731 int ready = cycle;
3732
3733 switch (v->reg.file) {
3734 case FILE_GPR:
3735 b = a + v->reg.size / 4;
3736 for (int r = a; r < b; ++r)
3737 ready = MAX2(ready, score->rd.r[r]);
3738 break;
3739 case FILE_PREDICATE:
3740 ready = MAX2(ready, score->rd.p[a]);
3741 break;
3742 case FILE_FLAGS:
3743 ready = MAX2(ready, score->rd.c);
3744 break;
3745 default:
3746 break;
3747 }
3748 if (cycle < ready)
3749 delay = MAX2(delay, ready - cycle);
3750 }
3751
3752 void
3753 SchedDataCalculatorGM107::commitInsn(const Instruction *insn, int cycle)
3754 {
3755 const int ready = cycle + targ->getLatency(insn);
3756
3757 for (int d = 0; insn->defExists(d); ++d)
3758 recordWr(insn->getDef(d), cycle, ready);
3759
3760 #ifdef GM107_DEBUG_SCHED_DATA
3761 score->print(cycle);
3762 #endif
3763 }
3764
3765 #define GM107_MIN_ISSUE_DELAY 0x1
3766 #define GM107_MAX_ISSUE_DELAY 0xf
3767
3768 int
3769 SchedDataCalculatorGM107::calcDelay(const Instruction *insn, int cycle) const
3770 {
3771 int delay = 0, ready = cycle;
3772
3773 for (int s = 0; insn->srcExists(s); ++s)
3774 checkRd(insn->getSrc(s), cycle, delay);
3775
3776 // TODO: make use of getReadLatency()!
3777
3778 return MAX2(delay, ready - cycle);
3779 }
3780
3781 void
3782 SchedDataCalculatorGM107::setDelay(Instruction *insn, int delay,
3783 const Instruction *next)
3784 {
3785 const OpClass cl = targ->getOpClass(insn->op);
3786 int wr, rd;
3787
3788 if (insn->op == OP_EXIT ||
3789 insn->op == OP_BAR ||
3790 insn->op == OP_MEMBAR) {
3791 delay = GM107_MAX_ISSUE_DELAY;
3792 } else
3793 if (insn->op == OP_QUADON ||
3794 insn->op == OP_QUADPOP) {
3795 delay = 0xd;
3796 } else
3797 if (cl == OPCLASS_FLOW || insn->join) {
3798 delay = 0xd;
3799 }
3800
3801 if (!next || !targ->canDualIssue(insn, next)) {
3802 delay = CLAMP(delay, GM107_MIN_ISSUE_DELAY, GM107_MAX_ISSUE_DELAY);
3803 } else {
3804 delay = 0x0; // dual-issue
3805 }
3806
3807 wr = getWrDepBar(insn);
3808 rd = getRdDepBar(insn);
3809
3810 if (delay == GM107_MIN_ISSUE_DELAY && (wr & rd) != 7) {
3811 // Barriers take one additional clock cycle to become active on top of
3812 // the clock consumed by the instruction producing it.
3813 if (!next || insn->bb != next->bb) {
3814 delay = 0x2;
3815 } else {
3816 int wt = getWtDepBar(next);
3817 if ((wt & (1 << wr)) | (wt & (1 << rd)))
3818 delay = 0x2;
3819 }
3820 }
3821
3822 emitStall(insn, delay);
3823 }
3824
3825
3826 // Return true when the given instruction needs to emit a read dependency
3827 // barrier (for WaR hazards) because it doesn't operate at a fixed latency, and
3828 // setting the maximum number of stall counts is not enough.
3829 bool
3830 SchedDataCalculatorGM107::needRdDepBar(const Instruction *insn) const
3831 {
3832 BitSet srcs(255, 1), defs(255, 1);
3833 int a, b;
3834
3835 if (!targ->isBarrierRequired(insn))
3836 return false;
3837
3838 // Do not emit a read dependency barrier when the instruction doesn't use
3839 // any GPR (like st s[0x4] 0x0) as input because it's unnecessary.
3840 for (int s = 0; insn->srcExists(s); ++s) {
3841 const Value *src = insn->src(s).rep();
3842 if (insn->src(s).getFile() != FILE_GPR)
3843 continue;
3844 if (src->reg.data.id == 255)
3845 continue;
3846
3847 a = src->reg.data.id;
3848 b = a + src->reg.size / 4;
3849 for (int r = a; r < b; ++r)
3850 srcs.set(r);
3851 }
3852
3853 if (!srcs.popCount())
3854 return false;
3855
3856 // Do not emit a read dependency barrier when the output GPRs are equal to
3857 // the input GPRs (like rcp $r0 $r0) because a write dependency barrier will
3858 // be produced and WaR hazards are prevented.
3859 for (int d = 0; insn->defExists(d); ++d) {
3860 const Value *def = insn->def(d).rep();
3861 if (insn->def(d).getFile() != FILE_GPR)
3862 continue;
3863 if (def->reg.data.id == 255)
3864 continue;
3865
3866 a = def->reg.data.id;
3867 b = a + def->reg.size / 4;
3868 for (int r = a; r < b; ++r)
3869 defs.set(r);
3870 }
3871
3872 srcs.andNot(defs);
3873 if (!srcs.popCount())
3874 return false;
3875
3876 return true;
3877 }
3878
3879 // Return true when the given instruction needs to emit a write dependency
3880 // barrier (for RaW hazards) because it doesn't operate at a fixed latency, and
3881 // setting the maximum number of stall counts is not enough. This is only legal
3882 // if the instruction output something.
3883 bool
3884 SchedDataCalculatorGM107::needWrDepBar(const Instruction *insn) const
3885 {
3886 if (!targ->isBarrierRequired(insn))
3887 return false;
3888
3889 for (int d = 0; insn->defExists(d); ++d) {
3890 if (insn->def(d).getFile() == FILE_GPR ||
3891 insn->def(d).getFile() == FILE_PREDICATE)
3892 return true;
3893 }
3894 return false;
3895 }
3896
3897 // Find the next instruction inside the same basic block which uses the output
3898 // of the given instruction in order to avoid RaW hazards.
3899 Instruction *
3900 SchedDataCalculatorGM107::findFirstUse(const Instruction *bari) const
3901 {
3902 Instruction *insn, *next;
3903 int minGPR, maxGPR;
3904
3905 if (!bari->defExists(0))
3906 return NULL;
3907
3908 minGPR = bari->def(0).rep()->reg.data.id;
3909 maxGPR = minGPR + bari->def(0).rep()->reg.size / 4 - 1;
3910
3911 for (insn = bari->next; insn != NULL; insn = next) {
3912 next = insn->next;
3913
3914 for (int s = 0; insn->srcExists(s); ++s) {
3915 const Value *src = insn->src(s).rep();
3916 if (bari->def(0).getFile() == FILE_GPR) {
3917 if (insn->src(s).getFile() != FILE_GPR ||
3918 src->reg.data.id + src->reg.size / 4 - 1 < minGPR ||
3919 src->reg.data.id > maxGPR)
3920 continue;
3921 return insn;
3922 } else
3923 if (bari->def(0).getFile() == FILE_PREDICATE) {
3924 if (insn->src(s).getFile() != FILE_PREDICATE ||
3925 src->reg.data.id != minGPR)
3926 continue;
3927 return insn;
3928 }
3929 }
3930 }
3931 return NULL;
3932 }
3933
3934 // Find the next instruction inside the same basic block which overwrites, at
3935 // least, one source of the given instruction in order to avoid WaR hazards.
3936 Instruction *
3937 SchedDataCalculatorGM107::findFirstDef(const Instruction *bari) const
3938 {
3939 Instruction *insn, *next;
3940 int minGPR, maxGPR;
3941
3942 for (insn = bari->next; insn != NULL; insn = next) {
3943 next = insn->next;
3944
3945 for (int d = 0; insn->defExists(d); ++d) {
3946 const Value *def = insn->def(d).rep();
3947 if (insn->def(d).getFile() != FILE_GPR)
3948 continue;
3949
3950 minGPR = def->reg.data.id;
3951 maxGPR = minGPR + def->reg.size / 4 - 1;
3952
3953 for (int s = 0; bari->srcExists(s); ++s) {
3954 const Value *src = bari->src(s).rep();
3955 if (bari->src(s).getFile() != FILE_GPR ||
3956 src->reg.data.id + src->reg.size / 4 - 1 < minGPR ||
3957 src->reg.data.id > maxGPR)
3958 continue;
3959 return insn;
3960 }
3961 }
3962 }
3963 return NULL;
3964 }
3965
3966 // Dependency barriers:
3967 // This pass is a bit ugly and could probably be improved by performing a
3968 // better allocation.
3969 //
3970 // The main idea is to avoid WaR and RaW hazards by emitting read/write
3971 // dependency barriers using the control codes.
3972 bool
3973 SchedDataCalculatorGM107::insertBarriers(BasicBlock *bb)
3974 {
3975 std::list<LiveBarUse> live_uses;
3976 std::list<LiveBarDef> live_defs;
3977 Instruction *insn, *next;
3978 BitSet bars(6, 1);
3979 int bar_id;
3980
3981 for (insn = bb->getEntry(); insn != NULL; insn = next) {
3982 Instruction *usei = NULL, *defi = NULL;
3983 bool need_wr_bar, need_rd_bar;
3984
3985 next = insn->next;
3986
3987 // Expire old barrier uses.
3988 for (std::list<LiveBarUse>::iterator it = live_uses.begin();
3989 it != live_uses.end();) {
3990 if (insn->serial >= it->usei->serial) {
3991 int wr = getWrDepBar(it->insn);
3992 emitWtDepBar(insn, wr);
3993 bars.clr(wr); // free barrier
3994 it = live_uses.erase(it);
3995 continue;
3996 }
3997 ++it;
3998 }
3999
4000 // Expire old barrier defs.
4001 for (std::list<LiveBarDef>::iterator it = live_defs.begin();
4002 it != live_defs.end();) {
4003 if (insn->serial >= it->defi->serial) {
4004 int rd = getRdDepBar(it->insn);
4005 emitWtDepBar(insn, rd);
4006 bars.clr(rd); // free barrier
4007 it = live_defs.erase(it);
4008 continue;
4009 }
4010 ++it;
4011 }
4012
4013 need_wr_bar = needWrDepBar(insn);
4014 need_rd_bar = needRdDepBar(insn);
4015
4016 if (need_wr_bar) {
4017 // When the instruction requires to emit a write dependency barrier
4018 // (all which write something at a variable latency), find the next
4019 // instruction which reads the outputs.
4020 usei = findFirstUse(insn);
4021
4022 // Allocate and emit a new barrier.
4023 bar_id = bars.findFreeRange(1);
4024 if (bar_id == -1)
4025 bar_id = 5;
4026 bars.set(bar_id);
4027 emitWrDepBar(insn, bar_id);
4028 if (usei)
4029 live_uses.push_back(LiveBarUse(insn, usei));
4030 }
4031
4032 if (need_rd_bar) {
4033 // When the instruction requires to emit a read dependency barrier
4034 // (all which read something at a variable latency), find the next
4035 // instruction which will write the inputs.
4036 defi = findFirstDef(insn);
4037
4038 if (usei && defi && usei->serial <= defi->serial)
4039 continue;
4040
4041 // Allocate and emit a new barrier.
4042 bar_id = bars.findFreeRange(1);
4043 if (bar_id == -1)
4044 bar_id = 5;
4045 bars.set(bar_id);
4046 emitRdDepBar(insn, bar_id);
4047 if (defi)
4048 live_defs.push_back(LiveBarDef(insn, defi));
4049 }
4050 }
4051
4052 // Remove unnecessary barrier waits.
4053 BitSet alive_bars(6, 1);
4054 for (insn = bb->getEntry(); insn != NULL; insn = next) {
4055 int wr, rd, wt;
4056
4057 next = insn->next;
4058
4059 wr = getWrDepBar(insn);
4060 rd = getRdDepBar(insn);
4061 wt = getWtDepBar(insn);
4062
4063 for (int idx = 0; idx < 6; ++idx) {
4064 if (!(wt & (1 << idx)))
4065 continue;
4066 if (!alive_bars.test(idx)) {
4067 insn->sched &= ~(1 << (11 + idx));
4068 } else {
4069 alive_bars.clr(idx);
4070 }
4071 }
4072
4073 if (wr < 6)
4074 alive_bars.set(wr);
4075 if (rd < 6)
4076 alive_bars.set(rd);
4077 }
4078
4079 return true;
4080 }
4081
4082 bool
4083 SchedDataCalculatorGM107::visit(Function *func)
4084 {
4085 ArrayList insns;
4086
4087 func->orderInstructions(insns);
4088
4089 scoreBoards.resize(func->cfg.getSize());
4090 for (size_t i = 0; i < scoreBoards.size(); ++i)
4091 scoreBoards[i].wipe();
4092 return true;
4093 }
4094
4095 bool
4096 SchedDataCalculatorGM107::visit(BasicBlock *bb)
4097 {
4098 Instruction *insn, *next = NULL;
4099 int cycle = 0;
4100
4101 for (Instruction *insn = bb->getEntry(); insn; insn = insn->next) {
4102 /*XXX*/
4103 insn->sched = 0x7e0;
4104 }
4105
4106 if (!debug_get_bool_option("NV50_PROG_SCHED", true))
4107 return true;
4108
4109 // Insert read/write dependency barriers for instructions which don't
4110 // operate at a fixed latency.
4111 insertBarriers(bb);
4112
4113 score = &scoreBoards.at(bb->getId());
4114
4115 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
4116 // back branches will wait until all target dependencies are satisfied
4117 if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
4118 continue;
4119 BasicBlock *in = BasicBlock::get(ei.getNode());
4120 score->setMax(&scoreBoards.at(in->getId()));
4121 }
4122
4123 #ifdef GM107_DEBUG_SCHED_DATA
4124 INFO("=== BB:%i initial scores\n", bb->getId());
4125 score->print(cycle);
4126 #endif
4127
4128 // Because barriers are allocated locally (intra-BB), we have to make sure
4129 // that all produced barriers have been consumed before entering inside a
4130 // new basic block. The best way is to do a global allocation pre RA but
4131 // it's really more difficult, especially because of the phi nodes. Anyways,
4132 // it seems like that waiting on a barrier which has already been consumed
4133 // doesn't add any additional cost, it's just not elegant!
4134 Instruction *start = bb->getEntry();
4135 if (start && bb->cfg.incidentCount() > 0) {
4136 for (int b = 0; b < 6; b++)
4137 emitWtDepBar(start, b);
4138 }
4139
4140 for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
4141 next = insn->next;
4142
4143 commitInsn(insn, cycle);
4144 int delay = calcDelay(next, cycle);
4145 setDelay(insn, delay, next);
4146 cycle += getStall(insn);
4147
4148 setReuseFlag(insn);
4149
4150 // XXX: The yield flag seems to destroy a bunch of things when it is
4151 // set on every instruction, need investigation.
4152 //emitYield(insn);
4153
4154 #ifdef GM107_DEBUG_SCHED_DATA
4155 printSchedInfo(cycle, insn);
4156 insn->print();
4157 next->print();
4158 #endif
4159 }
4160
4161 if (!insn)
4162 return true;
4163 commitInsn(insn, cycle);
4164
4165 int bbDelay = -1;
4166
4167 #ifdef GM107_DEBUG_SCHED_DATA
4168 fprintf(stderr, "last instruction is : ");
4169 insn->print();
4170 fprintf(stderr, "cycle=%d\n", cycle);
4171 #endif
4172
4173 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
4174 BasicBlock *out = BasicBlock::get(ei.getNode());
4175
4176 if (ei.getType() != Graph::Edge::BACK) {
4177 // Only test the first instruction of the outgoing block.
4178 next = out->getEntry();
4179 if (next) {
4180 bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
4181 } else {
4182 // When the outgoing BB is empty, make sure to set the number of
4183 // stall counts needed by the instruction because we don't know the
4184 // next instruction.
4185 bbDelay = MAX2(bbDelay, targ->getLatency(insn));
4186 }
4187 } else {
4188 // Wait until all dependencies are satisfied.
4189 const int regsFree = score->getLatest();
4190 next = out->getFirst();
4191 for (int c = cycle; next && c < regsFree; next = next->next) {
4192 bbDelay = MAX2(bbDelay, calcDelay(next, c));
4193 c += getStall(next);
4194 }
4195 next = NULL;
4196 }
4197 }
4198 if (bb->cfg.outgoingCount() != 1)
4199 next = NULL;
4200 setDelay(insn, bbDelay, next);
4201 cycle += getStall(insn);
4202
4203 score->rebase(cycle); // common base for initializing out blocks' scores
4204 return true;
4205 }
4206
4207 /*******************************************************************************
4208 * main
4209 ******************************************************************************/
4210
4211 void
4212 CodeEmitterGM107::prepareEmission(Function *func)
4213 {
4214 SchedDataCalculatorGM107 sched(targGM107);
4215 CodeEmitter::prepareEmission(func);
4216 sched.run(func, true, true);
4217 }
4218
4219 static inline uint32_t sizeToBundlesGM107(uint32_t size)
4220 {
4221 return (size + 23) / 24;
4222 }
4223
4224 void
4225 CodeEmitterGM107::prepareEmission(Program *prog)
4226 {
4227 for (ArrayList::Iterator fi = prog->allFuncs.iterator();
4228 !fi.end(); fi.next()) {
4229 Function *func = reinterpret_cast<Function *>(fi.get());
4230 func->binPos = prog->binSize;
4231 prepareEmission(func);
4232
4233 // adjust sizes & positions for schedulding info:
4234 if (prog->getTarget()->hasSWSched) {
4235 uint32_t adjPos = func->binPos;
4236 BasicBlock *bb = NULL;
4237 for (int i = 0; i < func->bbCount; ++i) {
4238 bb = func->bbArray[i];
4239 int32_t adjSize = bb->binSize;
4240 if (adjPos % 32) {
4241 adjSize -= 32 - adjPos % 32;
4242 if (adjSize < 0)
4243 adjSize = 0;
4244 }
4245 adjSize = bb->binSize + sizeToBundlesGM107(adjSize) * 8;
4246 bb->binPos = adjPos;
4247 bb->binSize = adjSize;
4248 adjPos += adjSize;
4249 }
4250 if (bb)
4251 func->binSize = adjPos - func->binPos;
4252 }
4253
4254 prog->binSize += func->binSize;
4255 }
4256 }
4257
4258 CodeEmitterGM107::CodeEmitterGM107(const TargetGM107 *target)
4259 : CodeEmitter(target),
4260 targGM107(target),
4261 writeIssueDelays(target->hasSWSched)
4262 {
4263 code = NULL;
4264 codeSize = codeSizeLimit = 0;
4265 relocInfo = NULL;
4266 }
4267
4268 CodeEmitter *
4269 TargetGM107::createCodeEmitterGM107(Program::Type type)
4270 {
4271 CodeEmitterGM107 *emit = new CodeEmitterGM107(this);
4272 emit->setProgramType(type);
4273 return emit;
4274 }
4275
4276 } // namespace nv50_ir