8b58df49c2782542b8cf4d4423cf685a62f0e48c
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_emit_gm107.cpp
1 /*
2 * Copyright 2014 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Ben Skeggs <bskeggs@redhat.com>
23 */
24
25 #include "codegen/nv50_ir_target_gm107.h"
26
27 //#define GM107_DEBUG_SCHED_DATA
28
29 namespace nv50_ir {
30
31 class CodeEmitterGM107 : public CodeEmitter
32 {
33 public:
34 CodeEmitterGM107(const TargetGM107 *);
35
36 virtual bool emitInstruction(Instruction *);
37 virtual uint32_t getMinEncodingSize(const Instruction *) const;
38
39 virtual void prepareEmission(Program *);
40 virtual void prepareEmission(Function *);
41
42 inline void setProgramType(Program::Type pType) { progType = pType; }
43
44 private:
45 const TargetGM107 *targGM107;
46
47 Program::Type progType;
48
49 const Instruction *insn;
50 const bool writeIssueDelays;
51 uint32_t *data;
52
53 private:
54 inline void emitField(uint32_t *, int, int, uint32_t);
55 inline void emitField(int b, int s, uint32_t v) { emitField(code, b, s, v); }
56
57 inline void emitInsn(uint32_t, bool);
58 inline void emitInsn(uint32_t o) { emitInsn(o, true); }
59 inline void emitPred();
60 inline void emitGPR(int, const Value *);
61 inline void emitGPR(int pos) {
62 emitGPR(pos, (const Value *)NULL);
63 }
64 inline void emitGPR(int pos, const ValueRef &ref) {
65 emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL);
66 }
67 inline void emitGPR(int pos, const ValueRef *ref) {
68 emitGPR(pos, ref ? ref->rep() : (const Value *)NULL);
69 }
70 inline void emitGPR(int pos, const ValueDef &def) {
71 emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL);
72 }
73 inline void emitSYS(int, const Value *);
74 inline void emitSYS(int pos, const ValueRef &ref) {
75 emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL);
76 }
77 inline void emitPRED(int, const Value *);
78 inline void emitPRED(int pos) {
79 emitPRED(pos, (const Value *)NULL);
80 }
81 inline void emitPRED(int pos, const ValueRef &ref) {
82 emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL);
83 }
84 inline void emitPRED(int pos, const ValueDef &def) {
85 emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL);
86 }
87 inline void emitADDR(int, int, int, int, const ValueRef &);
88 inline void emitCBUF(int, int, int, int, int, const ValueRef &);
89 inline bool longIMMD(const ValueRef &);
90 inline void emitIMMD(int, int, const ValueRef &);
91
92 void emitCond3(int, CondCode);
93 void emitCond4(int, CondCode);
94 void emitCond5(int pos, CondCode cc) { emitCond4(pos, cc); }
95 inline void emitO(int);
96 inline void emitP(int);
97 inline void emitSAT(int);
98 inline void emitCC(int);
99 inline void emitX(int);
100 inline void emitABS(int, const ValueRef &);
101 inline void emitNEG(int, const ValueRef &);
102 inline void emitNEG2(int, const ValueRef &, const ValueRef &);
103 inline void emitFMZ(int, int);
104 inline void emitRND(int, RoundMode, int);
105 inline void emitRND(int pos) {
106 emitRND(pos, insn->rnd, -1);
107 }
108 inline void emitPDIV(int);
109 inline void emitINV(int, const ValueRef &);
110
111 void emitEXIT();
112 void emitBRA();
113 void emitCAL();
114 void emitPCNT();
115 void emitCONT();
116 void emitPBK();
117 void emitBRK();
118 void emitPRET();
119 void emitRET();
120 void emitSSY();
121 void emitSYNC();
122 void emitSAM();
123 void emitRAM();
124
125 void emitMOV();
126 void emitS2R();
127 void emitF2F();
128 void emitF2I();
129 void emitI2F();
130 void emitI2I();
131 void emitSEL();
132 void emitSHFL();
133
134 void emitDADD();
135 void emitDMUL();
136 void emitDFMA();
137 void emitDMNMX();
138 void emitDSET();
139 void emitDSETP();
140
141 void emitFADD();
142 void emitFMUL();
143 void emitFFMA();
144 void emitMUFU();
145 void emitFMNMX();
146 void emitRRO();
147 void emitFCMP();
148 void emitFSET();
149 void emitFSETP();
150 void emitFSWZADD();
151
152 void emitLOP();
153 void emitNOT();
154 void emitIADD();
155 void emitIMUL();
156 void emitIMAD();
157 void emitISCADD();
158 void emitIMNMX();
159 void emitICMP();
160 void emitISET();
161 void emitISETP();
162 void emitSHL();
163 void emitSHR();
164 void emitSHF();
165 void emitPOPC();
166 void emitBFI();
167 void emitBFE();
168 void emitFLO();
169
170 void emitLDSTs(int, DataType);
171 void emitLDSTc(int);
172 void emitLDC();
173 void emitLDL();
174 void emitLDS();
175 void emitLD();
176 void emitSTL();
177 void emitSTS();
178 void emitST();
179 void emitALD();
180 void emitAST();
181 void emitISBERD();
182 void emitAL2P();
183 void emitIPA();
184 void emitATOM();
185 void emitATOMS();
186 void emitRED();
187 void emitCCTL();
188
189 void emitPIXLD();
190
191 void emitTEXs(int);
192 void emitTEX();
193 void emitTLD();
194 void emitTLD4();
195 void emitTXD();
196 void emitTXQ();
197 void emitTMML();
198 void emitDEPBAR();
199
200 void emitNOP();
201 void emitKIL();
202 void emitOUT();
203
204 void emitBAR();
205 void emitMEMBAR();
206
207 void emitVOTE();
208
209 void emitSUTarget();
210 void emitSUHandle(const int s);
211 void emitSUSTx();
212 void emitSULDx();
213 void emitSUREDx();
214 };
215
216 /*******************************************************************************
217 * general instruction layout/fields
218 ******************************************************************************/
219
220 void
221 CodeEmitterGM107::emitField(uint32_t *data, int b, int s, uint32_t v)
222 {
223 if (b >= 0) {
224 uint32_t m = ((1ULL << s) - 1);
225 uint64_t d = (uint64_t)(v & m) << b;
226 assert(!(v & ~m) || (v & ~m) == ~m);
227 data[1] |= d >> 32;
228 data[0] |= d;
229 }
230 }
231
232 void
233 CodeEmitterGM107::emitPred()
234 {
235 if (insn->predSrc >= 0) {
236 emitField(16, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id);
237 emitField(19, 1, insn->cc == CC_NOT_P);
238 } else {
239 emitField(16, 3, 7);
240 }
241 }
242
243 void
244 CodeEmitterGM107::emitInsn(uint32_t hi, bool pred)
245 {
246 code[0] = 0x00000000;
247 code[1] = hi;
248 if (pred)
249 emitPred();
250 }
251
252 void
253 CodeEmitterGM107::emitGPR(int pos, const Value *val)
254 {
255 emitField(pos, 8, val && !val->inFile(FILE_FLAGS) ?
256 val->reg.data.id : 255);
257 }
258
259 void
260 CodeEmitterGM107::emitSYS(int pos, const Value *val)
261 {
262 int id = val ? val->reg.data.id : -1;
263
264 switch (id) {
265 case SV_LANEID : id = 0x00; break;
266 case SV_VERTEX_COUNT : id = 0x10; break;
267 case SV_INVOCATION_ID : id = 0x11; break;
268 case SV_THREAD_KILL : id = 0x13; break;
269 case SV_INVOCATION_INFO: id = 0x1d; break;
270 case SV_TID : id = 0x21 + val->reg.data.sv.index; break;
271 case SV_CTAID : id = 0x25 + val->reg.data.sv.index; break;
272 case SV_LANEMASK_EQ : id = 0x38; break;
273 case SV_LANEMASK_LT : id = 0x39; break;
274 case SV_LANEMASK_LE : id = 0x3a; break;
275 case SV_LANEMASK_GT : id = 0x3b; break;
276 case SV_LANEMASK_GE : id = 0x3c; break;
277 case SV_CLOCK : id = 0x50 + val->reg.data.sv.index; break;
278 default:
279 assert(!"invalid system value");
280 id = 0;
281 break;
282 }
283
284 emitField(pos, 8, id);
285 }
286
287 void
288 CodeEmitterGM107::emitPRED(int pos, const Value *val)
289 {
290 emitField(pos, 3, val ? val->reg.data.id : 7);
291 }
292
293 void
294 CodeEmitterGM107::emitADDR(int gpr, int off, int len, int shr,
295 const ValueRef &ref)
296 {
297 const Value *v = ref.get();
298 assert(!(v->reg.data.offset & ((1 << shr) - 1)));
299 if (gpr >= 0)
300 emitGPR(gpr, ref.getIndirect(0));
301 emitField(off, len, v->reg.data.offset >> shr);
302 }
303
304 void
305 CodeEmitterGM107::emitCBUF(int buf, int gpr, int off, int len, int shr,
306 const ValueRef &ref)
307 {
308 const Value *v = ref.get();
309 const Symbol *s = v->asSym();
310
311 assert(!(s->reg.data.offset & ((1 << shr) - 1)));
312
313 emitField(buf, 5, v->reg.fileIndex);
314 if (gpr >= 0)
315 emitGPR(gpr, ref.getIndirect(0));
316 emitField(off, 16, s->reg.data.offset >> shr);
317 }
318
319 bool
320 CodeEmitterGM107::longIMMD(const ValueRef &ref)
321 {
322 if (ref.getFile() == FILE_IMMEDIATE) {
323 const ImmediateValue *imm = ref.get()->asImm();
324 if (isFloatType(insn->sType)) {
325 if ((imm->reg.data.u32 & 0x00000fff) != 0x00000000)
326 return true;
327 } else {
328 if ((imm->reg.data.u32 & 0xfff00000) != 0x00000000 &&
329 (imm->reg.data.u32 & 0xfff00000) != 0xfff00000)
330 return true;
331 }
332 }
333 return false;
334 }
335
336 void
337 CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef &ref)
338 {
339 const ImmediateValue *imm = ref.get()->asImm();
340 uint32_t val = imm->reg.data.u32;
341
342 if (len == 19) {
343 if (insn->sType == TYPE_F32 || insn->sType == TYPE_F16) {
344 assert(!(val & 0x00000fff));
345 val >>= 12;
346 } else if (insn->sType == TYPE_F64) {
347 assert(!(imm->reg.data.u64 & 0x00000fffffffffffULL));
348 val = imm->reg.data.u64 >> 44;
349 }
350 assert(!(val & 0xfff00000) || (val & 0xfff00000) == 0xfff00000);
351 emitField( 56, 1, (val & 0x80000) >> 19);
352 emitField(pos, len, (val & 0x7ffff));
353 } else {
354 emitField(pos, len, val);
355 }
356 }
357
358 /*******************************************************************************
359 * modifiers
360 ******************************************************************************/
361
362 void
363 CodeEmitterGM107::emitCond3(int pos, CondCode code)
364 {
365 int data = 0;
366
367 switch (code) {
368 case CC_FL : data = 0x00; break;
369 case CC_LTU:
370 case CC_LT : data = 0x01; break;
371 case CC_EQU:
372 case CC_EQ : data = 0x02; break;
373 case CC_LEU:
374 case CC_LE : data = 0x03; break;
375 case CC_GTU:
376 case CC_GT : data = 0x04; break;
377 case CC_NEU:
378 case CC_NE : data = 0x05; break;
379 case CC_GEU:
380 case CC_GE : data = 0x06; break;
381 case CC_TR : data = 0x07; break;
382 default:
383 assert(!"invalid cond3");
384 break;
385 }
386
387 emitField(pos, 3, data);
388 }
389
390 void
391 CodeEmitterGM107::emitCond4(int pos, CondCode code)
392 {
393 int data = 0;
394
395 switch (code) {
396 case CC_FL: data = 0x00; break;
397 case CC_LT: data = 0x01; break;
398 case CC_EQ: data = 0x02; break;
399 case CC_LE: data = 0x03; break;
400 case CC_GT: data = 0x04; break;
401 case CC_NE: data = 0x05; break;
402 case CC_GE: data = 0x06; break;
403 // case CC_NUM: data = 0x07; break;
404 // case CC_NAN: data = 0x08; break;
405 case CC_LTU: data = 0x09; break;
406 case CC_EQU: data = 0x0a; break;
407 case CC_LEU: data = 0x0b; break;
408 case CC_GTU: data = 0x0c; break;
409 case CC_NEU: data = 0x0d; break;
410 case CC_GEU: data = 0x0e; break;
411 case CC_TR: data = 0x0f; break;
412 default:
413 assert(!"invalid cond4");
414 break;
415 }
416
417 emitField(pos, 4, data);
418 }
419
420 void
421 CodeEmitterGM107::emitO(int pos)
422 {
423 emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT);
424 }
425
426 void
427 CodeEmitterGM107::emitP(int pos)
428 {
429 emitField(pos, 1, insn->perPatch);
430 }
431
432 void
433 CodeEmitterGM107::emitSAT(int pos)
434 {
435 emitField(pos, 1, insn->saturate);
436 }
437
438 void
439 CodeEmitterGM107::emitCC(int pos)
440 {
441 emitField(pos, 1, insn->flagsDef >= 0);
442 }
443
444 void
445 CodeEmitterGM107::emitX(int pos)
446 {
447 emitField(pos, 1, insn->flagsSrc >= 0);
448 }
449
450 void
451 CodeEmitterGM107::emitABS(int pos, const ValueRef &ref)
452 {
453 emitField(pos, 1, ref.mod.abs());
454 }
455
456 void
457 CodeEmitterGM107::emitNEG(int pos, const ValueRef &ref)
458 {
459 emitField(pos, 1, ref.mod.neg());
460 }
461
462 void
463 CodeEmitterGM107::emitNEG2(int pos, const ValueRef &a, const ValueRef &b)
464 {
465 emitField(pos, 1, a.mod.neg() ^ b.mod.neg());
466 }
467
468 void
469 CodeEmitterGM107::emitFMZ(int pos, int len)
470 {
471 emitField(pos, len, insn->dnz << 1 | insn->ftz);
472 }
473
474 void
475 CodeEmitterGM107::emitRND(int rmp, RoundMode rnd, int rip)
476 {
477 int rm = 0, ri = 0;
478 switch (rnd) {
479 case ROUND_NI: ri = 1;
480 case ROUND_N : rm = 0; break;
481 case ROUND_MI: ri = 1;
482 case ROUND_M : rm = 1; break;
483 case ROUND_PI: ri = 1;
484 case ROUND_P : rm = 2; break;
485 case ROUND_ZI: ri = 1;
486 case ROUND_Z : rm = 3; break;
487 default:
488 assert(!"invalid round mode");
489 break;
490 }
491 emitField(rip, 1, ri);
492 emitField(rmp, 2, rm);
493 }
494
495 void
496 CodeEmitterGM107::emitPDIV(int pos)
497 {
498 assert(insn->postFactor >= -3 && insn->postFactor <= 3);
499 if (insn->postFactor > 0)
500 emitField(pos, 3, 7 - insn->postFactor);
501 else
502 emitField(pos, 3, 0 - insn->postFactor);
503 }
504
505 void
506 CodeEmitterGM107::emitINV(int pos, const ValueRef &ref)
507 {
508 emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT)));
509 }
510
511 /*******************************************************************************
512 * control flow
513 ******************************************************************************/
514
515 void
516 CodeEmitterGM107::emitEXIT()
517 {
518 emitInsn (0xe3000000);
519 emitCond5(0x00, CC_TR);
520 }
521
522 void
523 CodeEmitterGM107::emitBRA()
524 {
525 const FlowInstruction *insn = this->insn->asFlow();
526 int gpr = -1;
527
528 if (insn->indirect) {
529 if (insn->absolute)
530 emitInsn(0xe2000000); // JMX
531 else
532 emitInsn(0xe2500000); // BRX
533 gpr = 0x08;
534 } else {
535 if (insn->absolute)
536 emitInsn(0xe2100000); // JMP
537 else
538 emitInsn(0xe2400000); // BRA
539 emitField(0x07, 1, insn->allWarp);
540 }
541
542 emitField(0x06, 1, insn->limit);
543 emitCond5(0x00, CC_TR);
544
545 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
546 int32_t pos = insn->target.bb->binPos;
547 if (writeIssueDelays && !(pos & 0x1f))
548 pos += 8;
549 if (!insn->absolute)
550 emitField(0x14, 24, pos - (codeSize + 8));
551 else
552 emitField(0x14, 32, pos);
553 } else {
554 emitCBUF (0x24, gpr, 20, 16, 0, insn->src(0));
555 emitField(0x05, 1, 1);
556 }
557 }
558
559 void
560 CodeEmitterGM107::emitCAL()
561 {
562 const FlowInstruction *insn = this->insn->asFlow();
563
564 if (insn->absolute) {
565 emitInsn(0xe2200000, 0); // JCAL
566 } else {
567 emitInsn(0xe2600000, 0); // CAL
568 }
569
570 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
571 if (!insn->absolute)
572 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
573 else {
574 if (insn->builtin) {
575 int pcAbs = targGM107->getBuiltinOffset(insn->target.builtin);
576 addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfff00000, 20);
577 addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x000fffff, -12);
578 } else {
579 emitField(0x14, 32, insn->target.bb->binPos);
580 }
581 }
582 } else {
583 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
584 emitField(0x05, 1, 1);
585 }
586 }
587
588 void
589 CodeEmitterGM107::emitPCNT()
590 {
591 const FlowInstruction *insn = this->insn->asFlow();
592
593 emitInsn(0xe2b00000, 0);
594
595 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
596 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
597 } else {
598 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
599 emitField(0x05, 1, 1);
600 }
601 }
602
603 void
604 CodeEmitterGM107::emitCONT()
605 {
606 emitInsn (0xe3500000);
607 emitCond5(0x00, CC_TR);
608 }
609
610 void
611 CodeEmitterGM107::emitPBK()
612 {
613 const FlowInstruction *insn = this->insn->asFlow();
614
615 emitInsn(0xe2a00000, 0);
616
617 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
618 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
619 } else {
620 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
621 emitField(0x05, 1, 1);
622 }
623 }
624
625 void
626 CodeEmitterGM107::emitBRK()
627 {
628 emitInsn (0xe3400000);
629 emitCond5(0x00, CC_TR);
630 }
631
632 void
633 CodeEmitterGM107::emitPRET()
634 {
635 const FlowInstruction *insn = this->insn->asFlow();
636
637 emitInsn(0xe2700000, 0);
638
639 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
640 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
641 } else {
642 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
643 emitField(0x05, 1, 1);
644 }
645 }
646
647 void
648 CodeEmitterGM107::emitRET()
649 {
650 emitInsn (0xe3200000);
651 emitCond5(0x00, CC_TR);
652 }
653
654 void
655 CodeEmitterGM107::emitSSY()
656 {
657 const FlowInstruction *insn = this->insn->asFlow();
658
659 emitInsn(0xe2900000, 0);
660
661 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
662 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
663 } else {
664 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
665 emitField(0x05, 1, 1);
666 }
667 }
668
669 void
670 CodeEmitterGM107::emitSYNC()
671 {
672 emitInsn (0xf0f80000);
673 emitCond5(0x00, CC_TR);
674 }
675
676 void
677 CodeEmitterGM107::emitSAM()
678 {
679 emitInsn(0xe3700000, 0);
680 }
681
682 void
683 CodeEmitterGM107::emitRAM()
684 {
685 emitInsn(0xe3800000, 0);
686 }
687
688 /*******************************************************************************
689 * predicate/cc
690 ******************************************************************************/
691
692 /*******************************************************************************
693 * movement / conversion
694 ******************************************************************************/
695
696 void
697 CodeEmitterGM107::emitMOV()
698 {
699 if (insn->src(0).getFile() != FILE_IMMEDIATE) {
700 switch (insn->src(0).getFile()) {
701 case FILE_GPR:
702 if (insn->def(0).getFile() == FILE_PREDICATE) {
703 emitInsn(0x5b6a0000);
704 emitGPR (0x08);
705 } else {
706 emitInsn(0x5c980000);
707 }
708 emitGPR (0x14, insn->src(0));
709 break;
710 case FILE_MEMORY_CONST:
711 emitInsn(0x4c980000);
712 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
713 break;
714 case FILE_IMMEDIATE:
715 emitInsn(0x38980000);
716 emitIMMD(0x14, 19, insn->src(0));
717 break;
718 case FILE_PREDICATE:
719 emitInsn(0x50880000);
720 emitPRED(0x0c, insn->src(0));
721 emitPRED(0x1d);
722 emitPRED(0x27);
723 break;
724 default:
725 assert(!"bad src file");
726 break;
727 }
728 if (insn->def(0).getFile() != FILE_PREDICATE &&
729 insn->src(0).getFile() != FILE_PREDICATE)
730 emitField(0x27, 4, insn->lanes);
731 } else {
732 emitInsn (0x01000000);
733 emitIMMD (0x14, 32, insn->src(0));
734 emitField(0x0c, 4, insn->lanes);
735 }
736
737 if (insn->def(0).getFile() == FILE_PREDICATE) {
738 emitPRED(0x27);
739 emitPRED(0x03, insn->def(0));
740 emitPRED(0x00);
741 } else {
742 emitGPR(0x00, insn->def(0));
743 }
744 }
745
746 void
747 CodeEmitterGM107::emitS2R()
748 {
749 emitInsn(0xf0c80000);
750 emitSYS (0x14, insn->src(0));
751 emitGPR (0x00, insn->def(0));
752 }
753
754 void
755 CodeEmitterGM107::emitF2F()
756 {
757 RoundMode rnd = insn->rnd;
758
759 switch (insn->op) {
760 case OP_FLOOR: rnd = ROUND_MI; break;
761 case OP_CEIL : rnd = ROUND_PI; break;
762 case OP_TRUNC: rnd = ROUND_ZI; break;
763 default:
764 break;
765 }
766
767 switch (insn->src(0).getFile()) {
768 case FILE_GPR:
769 emitInsn(0x5ca80000);
770 emitGPR (0x14, insn->src(0));
771 break;
772 case FILE_MEMORY_CONST:
773 emitInsn(0x4ca80000);
774 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
775 break;
776 case FILE_IMMEDIATE:
777 emitInsn(0x38a80000);
778 emitIMMD(0x14, 19, insn->src(0));
779 break;
780 default:
781 assert(!"bad src0 file");
782 break;
783 }
784
785 emitField(0x32, 1, (insn->op == OP_SAT) || insn->saturate);
786 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
787 emitCC (0x2f);
788 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
789 emitFMZ (0x2c, 1);
790 emitField(0x29, 1, insn->subOp);
791 emitRND (0x27, rnd, 0x2a);
792 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
793 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
794 emitGPR (0x00, insn->def(0));
795 }
796
797 void
798 CodeEmitterGM107::emitF2I()
799 {
800 RoundMode rnd = insn->rnd;
801
802 switch (insn->op) {
803 case OP_FLOOR: rnd = ROUND_M; break;
804 case OP_CEIL : rnd = ROUND_P; break;
805 case OP_TRUNC: rnd = ROUND_Z; break;
806 default:
807 break;
808 }
809
810 switch (insn->src(0).getFile()) {
811 case FILE_GPR:
812 emitInsn(0x5cb00000);
813 emitGPR (0x14, insn->src(0));
814 break;
815 case FILE_MEMORY_CONST:
816 emitInsn(0x4cb00000);
817 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
818 break;
819 case FILE_IMMEDIATE:
820 emitInsn(0x38b00000);
821 emitIMMD(0x14, 19, insn->src(0));
822 break;
823 default:
824 assert(!"bad src0 file");
825 break;
826 }
827
828 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
829 emitCC (0x2f);
830 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
831 emitFMZ (0x2c, 1);
832 emitRND (0x27, rnd, 0x2a);
833 emitField(0x0c, 1, isSignedType(insn->dType));
834 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
835 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
836 emitGPR (0x00, insn->def(0));
837 }
838
839 void
840 CodeEmitterGM107::emitI2F()
841 {
842 RoundMode rnd = insn->rnd;
843
844 switch (insn->op) {
845 case OP_FLOOR: rnd = ROUND_M; break;
846 case OP_CEIL : rnd = ROUND_P; break;
847 case OP_TRUNC: rnd = ROUND_Z; break;
848 default:
849 break;
850 }
851
852 switch (insn->src(0).getFile()) {
853 case FILE_GPR:
854 emitInsn(0x5cb80000);
855 emitGPR (0x14, insn->src(0));
856 break;
857 case FILE_MEMORY_CONST:
858 emitInsn(0x4cb80000);
859 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
860 break;
861 case FILE_IMMEDIATE:
862 emitInsn(0x38b80000);
863 emitIMMD(0x14, 19, insn->src(0));
864 break;
865 default:
866 assert(!"bad src0 file");
867 break;
868 }
869
870 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
871 emitCC (0x2f);
872 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
873 emitField(0x29, 2, insn->subOp);
874 emitRND (0x27, rnd, -1);
875 emitField(0x0d, 1, isSignedType(insn->sType));
876 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
877 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
878 emitGPR (0x00, insn->def(0));
879 }
880
881 void
882 CodeEmitterGM107::emitI2I()
883 {
884 switch (insn->src(0).getFile()) {
885 case FILE_GPR:
886 emitInsn(0x5ce00000);
887 emitGPR (0x14, insn->src(0));
888 break;
889 case FILE_MEMORY_CONST:
890 emitInsn(0x4ce00000);
891 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
892 break;
893 case FILE_IMMEDIATE:
894 emitInsn(0x38e00000);
895 emitIMMD(0x14, 19, insn->src(0));
896 break;
897 default:
898 assert(!"bad src0 file");
899 break;
900 }
901
902 emitSAT (0x32);
903 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
904 emitCC (0x2f);
905 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
906 emitField(0x29, 2, insn->subOp);
907 emitField(0x0d, 1, isSignedType(insn->sType));
908 emitField(0x0c, 1, isSignedType(insn->dType));
909 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
910 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
911 emitGPR (0x00, insn->def(0));
912 }
913
914 static void
915 selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
916 {
917 int loc = entry->loc;
918 if (data.force_persample_interp)
919 code[loc + 1] |= 1 << 10;
920 else
921 code[loc + 1] &= ~(1 << 10);
922 }
923
924 void
925 CodeEmitterGM107::emitSEL()
926 {
927 switch (insn->src(1).getFile()) {
928 case FILE_GPR:
929 emitInsn(0x5ca00000);
930 emitGPR (0x14, insn->src(1));
931 break;
932 case FILE_MEMORY_CONST:
933 emitInsn(0x4ca00000);
934 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
935 break;
936 case FILE_IMMEDIATE:
937 emitInsn(0x38a00000);
938 emitIMMD(0x14, 19, insn->src(1));
939 break;
940 default:
941 assert(!"bad src1 file");
942 break;
943 }
944
945 emitINV (0x2a, insn->src(2));
946 emitPRED(0x27, insn->src(2));
947 emitGPR (0x08, insn->src(0));
948 emitGPR (0x00, insn->def(0));
949
950 if (insn->subOp == 1) {
951 addInterp(0, 0, selpFlip);
952 }
953 }
954
955 void
956 CodeEmitterGM107::emitSHFL()
957 {
958 int type = 0;
959
960 emitInsn (0xef100000);
961
962 switch (insn->src(1).getFile()) {
963 case FILE_GPR:
964 emitGPR(0x14, insn->src(1));
965 break;
966 case FILE_IMMEDIATE:
967 emitIMMD(0x14, 5, insn->src(1));
968 type |= 1;
969 break;
970 default:
971 assert(!"invalid src1 file");
972 break;
973 }
974
975 switch (insn->src(2).getFile()) {
976 case FILE_GPR:
977 emitGPR(0x27, insn->src(2));
978 break;
979 case FILE_IMMEDIATE:
980 emitIMMD(0x22, 13, insn->src(2));
981 type |= 2;
982 break;
983 default:
984 assert(!"invalid src2 file");
985 break;
986 }
987
988 if (!insn->defExists(1))
989 emitPRED(0x30);
990 else {
991 assert(insn->def(1).getFile() == FILE_PREDICATE);
992 emitPRED(0x30, insn->def(1));
993 }
994
995 emitField(0x1e, 2, insn->subOp);
996 emitField(0x1c, 2, type);
997 emitGPR (0x08, insn->src(0));
998 emitGPR (0x00, insn->def(0));
999 }
1000
1001 /*******************************************************************************
1002 * double
1003 ******************************************************************************/
1004
1005 void
1006 CodeEmitterGM107::emitDADD()
1007 {
1008 switch (insn->src(1).getFile()) {
1009 case FILE_GPR:
1010 emitInsn(0x5c700000);
1011 emitGPR (0x14, insn->src(1));
1012 break;
1013 case FILE_MEMORY_CONST:
1014 emitInsn(0x4c700000);
1015 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1016 break;
1017 case FILE_IMMEDIATE:
1018 emitInsn(0x38700000);
1019 emitIMMD(0x14, 19, insn->src(1));
1020 break;
1021 default:
1022 assert(!"bad src1 file");
1023 break;
1024 }
1025 emitABS(0x31, insn->src(1));
1026 emitNEG(0x30, insn->src(0));
1027 emitCC (0x2f);
1028 emitABS(0x2e, insn->src(0));
1029 emitNEG(0x2d, insn->src(1));
1030
1031 if (insn->op == OP_SUB)
1032 code[1] ^= 0x00002000;
1033
1034 emitGPR(0x08, insn->src(0));
1035 emitGPR(0x00, insn->def(0));
1036 }
1037
1038 void
1039 CodeEmitterGM107::emitDMUL()
1040 {
1041 switch (insn->src(1).getFile()) {
1042 case FILE_GPR:
1043 emitInsn(0x5c800000);
1044 emitGPR (0x14, insn->src(1));
1045 break;
1046 case FILE_MEMORY_CONST:
1047 emitInsn(0x4c800000);
1048 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1049 break;
1050 case FILE_IMMEDIATE:
1051 emitInsn(0x38800000);
1052 emitIMMD(0x14, 19, insn->src(1));
1053 break;
1054 default:
1055 assert(!"bad src1 file");
1056 break;
1057 }
1058
1059 emitNEG2(0x30, insn->src(0), insn->src(1));
1060 emitCC (0x2f);
1061 emitRND (0x27);
1062 emitGPR (0x08, insn->src(0));
1063 emitGPR (0x00, insn->def(0));
1064 }
1065
1066 void
1067 CodeEmitterGM107::emitDFMA()
1068 {
1069 switch(insn->src(2).getFile()) {
1070 case FILE_GPR:
1071 switch (insn->src(1).getFile()) {
1072 case FILE_GPR:
1073 emitInsn(0x5b700000);
1074 emitGPR (0x14, insn->src(1));
1075 break;
1076 case FILE_MEMORY_CONST:
1077 emitInsn(0x4b700000);
1078 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1079 break;
1080 case FILE_IMMEDIATE:
1081 emitInsn(0x36700000);
1082 emitIMMD(0x14, 19, insn->src(1));
1083 break;
1084 default:
1085 assert(!"bad src1 file");
1086 break;
1087 }
1088 emitGPR (0x27, insn->src(2));
1089 break;
1090 case FILE_MEMORY_CONST:
1091 emitInsn(0x53700000);
1092 emitGPR (0x27, insn->src(1));
1093 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1094 break;
1095 default:
1096 assert(!"bad src2 file");
1097 break;
1098 }
1099
1100 emitRND (0x32);
1101 emitNEG (0x31, insn->src(2));
1102 emitNEG2(0x30, insn->src(0), insn->src(1));
1103 emitCC (0x2f);
1104 emitGPR (0x08, insn->src(0));
1105 emitGPR (0x00, insn->def(0));
1106 }
1107
1108 void
1109 CodeEmitterGM107::emitDMNMX()
1110 {
1111 switch (insn->src(1).getFile()) {
1112 case FILE_GPR:
1113 emitInsn(0x5c500000);
1114 emitGPR (0x14, insn->src(1));
1115 break;
1116 case FILE_MEMORY_CONST:
1117 emitInsn(0x4c500000);
1118 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1119 break;
1120 case FILE_IMMEDIATE:
1121 emitInsn(0x38500000);
1122 emitIMMD(0x14, 19, insn->src(1));
1123 break;
1124 default:
1125 assert(!"bad src1 file");
1126 break;
1127 }
1128
1129 emitABS (0x31, insn->src(1));
1130 emitNEG (0x30, insn->src(0));
1131 emitCC (0x2f);
1132 emitABS (0x2e, insn->src(0));
1133 emitNEG (0x2d, insn->src(1));
1134 emitField(0x2a, 1, insn->op == OP_MAX);
1135 emitPRED (0x27);
1136 emitGPR (0x08, insn->src(0));
1137 emitGPR (0x00, insn->def(0));
1138 }
1139
1140 void
1141 CodeEmitterGM107::emitDSET()
1142 {
1143 const CmpInstruction *insn = this->insn->asCmp();
1144
1145 switch (insn->src(1).getFile()) {
1146 case FILE_GPR:
1147 emitInsn(0x59000000);
1148 emitGPR (0x14, insn->src(1));
1149 break;
1150 case FILE_MEMORY_CONST:
1151 emitInsn(0x49000000);
1152 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1153 break;
1154 case FILE_IMMEDIATE:
1155 emitInsn(0x32000000);
1156 emitIMMD(0x14, 19, insn->src(1));
1157 break;
1158 default:
1159 assert(!"bad src1 file");
1160 break;
1161 }
1162
1163 if (insn->op != OP_SET) {
1164 switch (insn->op) {
1165 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1166 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1167 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1168 default:
1169 assert(!"invalid set op");
1170 break;
1171 }
1172 emitPRED(0x27, insn->src(2));
1173 } else {
1174 emitPRED(0x27);
1175 }
1176
1177 emitABS (0x36, insn->src(0));
1178 emitNEG (0x35, insn->src(1));
1179 emitField(0x34, 1, insn->dType == TYPE_F32);
1180 emitCond4(0x30, insn->setCond);
1181 emitCC (0x2f);
1182 emitABS (0x2c, insn->src(1));
1183 emitNEG (0x2b, insn->src(0));
1184 emitGPR (0x08, insn->src(0));
1185 emitGPR (0x00, insn->def(0));
1186 }
1187
1188 void
1189 CodeEmitterGM107::emitDSETP()
1190 {
1191 const CmpInstruction *insn = this->insn->asCmp();
1192
1193 switch (insn->src(1).getFile()) {
1194 case FILE_GPR:
1195 emitInsn(0x5b800000);
1196 emitGPR (0x14, insn->src(1));
1197 break;
1198 case FILE_MEMORY_CONST:
1199 emitInsn(0x4b800000);
1200 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1201 break;
1202 case FILE_IMMEDIATE:
1203 emitInsn(0x36800000);
1204 emitIMMD(0x14, 19, insn->src(1));
1205 break;
1206 default:
1207 assert(!"bad src1 file");
1208 break;
1209 }
1210
1211 if (insn->op != OP_SET) {
1212 switch (insn->op) {
1213 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1214 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1215 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1216 default:
1217 assert(!"invalid set op");
1218 break;
1219 }
1220 emitPRED(0x27, insn->src(2));
1221 } else {
1222 emitPRED(0x27);
1223 }
1224
1225 emitCond4(0x30, insn->setCond);
1226 emitABS (0x2c, insn->src(1));
1227 emitNEG (0x2b, insn->src(0));
1228 emitGPR (0x08, insn->src(0));
1229 emitABS (0x07, insn->src(0));
1230 emitNEG (0x06, insn->src(1));
1231 emitPRED (0x03, insn->def(0));
1232 if (insn->defExists(1))
1233 emitPRED(0x00, insn->def(1));
1234 else
1235 emitPRED(0x00);
1236 }
1237
1238 /*******************************************************************************
1239 * float
1240 ******************************************************************************/
1241
1242 void
1243 CodeEmitterGM107::emitFADD()
1244 {
1245 if (!longIMMD(insn->src(1))) {
1246 switch (insn->src(1).getFile()) {
1247 case FILE_GPR:
1248 emitInsn(0x5c580000);
1249 emitGPR (0x14, insn->src(1));
1250 break;
1251 case FILE_MEMORY_CONST:
1252 emitInsn(0x4c580000);
1253 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1254 break;
1255 case FILE_IMMEDIATE:
1256 emitInsn(0x38580000);
1257 emitIMMD(0x14, 19, insn->src(1));
1258 break;
1259 default:
1260 assert(!"bad src1 file");
1261 break;
1262 }
1263 emitSAT(0x32);
1264 emitABS(0x31, insn->src(1));
1265 emitNEG(0x30, insn->src(0));
1266 emitCC (0x2f);
1267 emitABS(0x2e, insn->src(0));
1268 emitNEG(0x2d, insn->src(1));
1269 emitFMZ(0x2c, 1);
1270
1271 if (insn->op == OP_SUB)
1272 code[1] ^= 0x00002000;
1273 } else {
1274 emitInsn(0x08000000);
1275 emitABS(0x39, insn->src(1));
1276 emitNEG(0x38, insn->src(0));
1277 emitFMZ(0x37, 1);
1278 emitABS(0x36, insn->src(0));
1279 emitNEG(0x35, insn->src(1));
1280 emitCC (0x34);
1281 emitIMMD(0x14, 32, insn->src(1));
1282
1283 if (insn->op == OP_SUB)
1284 code[1] ^= 0x00080000;
1285 }
1286
1287 emitGPR(0x08, insn->src(0));
1288 emitGPR(0x00, insn->def(0));
1289 }
1290
1291 void
1292 CodeEmitterGM107::emitFMUL()
1293 {
1294 if (!longIMMD(insn->src(1))) {
1295 switch (insn->src(1).getFile()) {
1296 case FILE_GPR:
1297 emitInsn(0x5c680000);
1298 emitGPR (0x14, insn->src(1));
1299 break;
1300 case FILE_MEMORY_CONST:
1301 emitInsn(0x4c680000);
1302 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1303 break;
1304 case FILE_IMMEDIATE:
1305 emitInsn(0x38680000);
1306 emitIMMD(0x14, 19, insn->src(1));
1307 break;
1308 default:
1309 assert(!"bad src1 file");
1310 break;
1311 }
1312 emitSAT (0x32);
1313 emitNEG2(0x30, insn->src(0), insn->src(1));
1314 emitCC (0x2f);
1315 emitFMZ (0x2c, 2);
1316 emitPDIV(0x29);
1317 emitRND (0x27);
1318 } else {
1319 emitInsn(0x1e000000);
1320 emitSAT (0x37);
1321 emitFMZ (0x35, 2);
1322 emitCC (0x34);
1323 emitIMMD(0x14, 32, insn->src(1));
1324 if (insn->src(0).mod.neg() ^ insn->src(1).mod.neg())
1325 code[1] ^= 0x00080000; /* flip immd sign bit */
1326 }
1327
1328 emitGPR(0x08, insn->src(0));
1329 emitGPR(0x00, insn->def(0));
1330 }
1331
1332 void
1333 CodeEmitterGM107::emitFFMA()
1334 {
1335 bool isLongIMMD = false;
1336 switch(insn->src(2).getFile()) {
1337 case FILE_GPR:
1338 switch (insn->src(1).getFile()) {
1339 case FILE_GPR:
1340 emitInsn(0x59800000);
1341 emitGPR (0x14, insn->src(1));
1342 break;
1343 case FILE_MEMORY_CONST:
1344 emitInsn(0x49800000);
1345 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1346 break;
1347 case FILE_IMMEDIATE:
1348 if (longIMMD(insn->getSrc(1))) {
1349 assert(insn->getDef(0)->reg.data.id == insn->getSrc(2)->reg.data.id);
1350 isLongIMMD = true;
1351 emitInsn(0x0c000000);
1352 emitIMMD(0x14, 32, insn->src(1));
1353 } else {
1354 emitInsn(0x32800000);
1355 emitIMMD(0x14, 19, insn->src(1));
1356 }
1357 break;
1358 default:
1359 assert(!"bad src1 file");
1360 break;
1361 }
1362 if (!isLongIMMD)
1363 emitGPR (0x27, insn->src(2));
1364 break;
1365 case FILE_MEMORY_CONST:
1366 emitInsn(0x51800000);
1367 emitGPR (0x27, insn->src(1));
1368 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1369 break;
1370 default:
1371 assert(!"bad src2 file");
1372 break;
1373 }
1374
1375 if (isLongIMMD) {
1376 emitNEG (0x39, insn->src(2));
1377 emitNEG2(0x38, insn->src(0), insn->src(1));
1378 emitSAT (0x37);
1379 emitCC (0x34);
1380 } else {
1381 emitRND (0x33);
1382 emitSAT (0x32);
1383 emitNEG (0x31, insn->src(2));
1384 emitNEG2(0x30, insn->src(0), insn->src(1));
1385 emitCC (0x2f);
1386 }
1387
1388 emitFMZ(0x35, 2);
1389 emitGPR(0x08, insn->src(0));
1390 emitGPR(0x00, insn->def(0));
1391 }
1392
1393 void
1394 CodeEmitterGM107::emitMUFU()
1395 {
1396 int mufu = 0;
1397
1398 switch (insn->op) {
1399 case OP_COS: mufu = 0; break;
1400 case OP_SIN: mufu = 1; break;
1401 case OP_EX2: mufu = 2; break;
1402 case OP_LG2: mufu = 3; break;
1403 case OP_RCP: mufu = 4 + 2 * insn->subOp; break;
1404 case OP_RSQ: mufu = 5 + 2 * insn->subOp; break;
1405 default:
1406 assert(!"invalid mufu");
1407 break;
1408 }
1409
1410 emitInsn (0x50800000);
1411 emitSAT (0x32);
1412 emitNEG (0x30, insn->src(0));
1413 emitABS (0x2e, insn->src(0));
1414 emitField(0x14, 3, mufu);
1415 emitGPR (0x08, insn->src(0));
1416 emitGPR (0x00, insn->def(0));
1417 }
1418
1419 void
1420 CodeEmitterGM107::emitFMNMX()
1421 {
1422 switch (insn->src(1).getFile()) {
1423 case FILE_GPR:
1424 emitInsn(0x5c600000);
1425 emitGPR (0x14, insn->src(1));
1426 break;
1427 case FILE_MEMORY_CONST:
1428 emitInsn(0x4c600000);
1429 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1430 break;
1431 case FILE_IMMEDIATE:
1432 emitInsn(0x38600000);
1433 emitIMMD(0x14, 19, insn->src(1));
1434 break;
1435 default:
1436 assert(!"bad src1 file");
1437 break;
1438 }
1439
1440 emitField(0x2a, 1, insn->op == OP_MAX);
1441 emitPRED (0x27);
1442
1443 emitABS(0x31, insn->src(1));
1444 emitNEG(0x30, insn->src(0));
1445 emitCC (0x2f);
1446 emitABS(0x2e, insn->src(0));
1447 emitNEG(0x2d, insn->src(1));
1448 emitFMZ(0x2c, 1);
1449 emitGPR(0x08, insn->src(0));
1450 emitGPR(0x00, insn->def(0));
1451 }
1452
1453 void
1454 CodeEmitterGM107::emitRRO()
1455 {
1456 switch (insn->src(0).getFile()) {
1457 case FILE_GPR:
1458 emitInsn(0x5c900000);
1459 emitGPR (0x14, insn->src(0));
1460 break;
1461 case FILE_MEMORY_CONST:
1462 emitInsn(0x4c900000);
1463 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1464 break;
1465 case FILE_IMMEDIATE:
1466 emitInsn(0x38900000);
1467 emitIMMD(0x14, 19, insn->src(0));
1468 break;
1469 default:
1470 assert(!"bad src file");
1471 break;
1472 }
1473
1474 emitABS (0x31, insn->src(0));
1475 emitNEG (0x2d, insn->src(0));
1476 emitField(0x27, 1, insn->op == OP_PREEX2);
1477 emitGPR (0x00, insn->def(0));
1478 }
1479
1480 void
1481 CodeEmitterGM107::emitFCMP()
1482 {
1483 const CmpInstruction *insn = this->insn->asCmp();
1484 CondCode cc = insn->setCond;
1485
1486 if (insn->src(2).mod.neg())
1487 cc = reverseCondCode(cc);
1488
1489 switch(insn->src(2).getFile()) {
1490 case FILE_GPR:
1491 switch (insn->src(1).getFile()) {
1492 case FILE_GPR:
1493 emitInsn(0x5ba00000);
1494 emitGPR (0x14, insn->src(1));
1495 break;
1496 case FILE_MEMORY_CONST:
1497 emitInsn(0x4ba00000);
1498 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1499 break;
1500 case FILE_IMMEDIATE:
1501 emitInsn(0x36a00000);
1502 emitIMMD(0x14, 19, insn->src(1));
1503 break;
1504 default:
1505 assert(!"bad src1 file");
1506 break;
1507 }
1508 emitGPR (0x27, insn->src(2));
1509 break;
1510 case FILE_MEMORY_CONST:
1511 emitInsn(0x53a00000);
1512 emitGPR (0x27, insn->src(1));
1513 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1514 break;
1515 default:
1516 assert(!"bad src2 file");
1517 break;
1518 }
1519
1520 emitCond4(0x30, cc);
1521 emitFMZ (0x2f, 1);
1522 emitGPR (0x08, insn->src(0));
1523 emitGPR (0x00, insn->def(0));
1524 }
1525
1526 void
1527 CodeEmitterGM107::emitFSET()
1528 {
1529 const CmpInstruction *insn = this->insn->asCmp();
1530
1531 switch (insn->src(1).getFile()) {
1532 case FILE_GPR:
1533 emitInsn(0x58000000);
1534 emitGPR (0x14, insn->src(1));
1535 break;
1536 case FILE_MEMORY_CONST:
1537 emitInsn(0x48000000);
1538 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1539 break;
1540 case FILE_IMMEDIATE:
1541 emitInsn(0x30000000);
1542 emitIMMD(0x14, 19, insn->src(1));
1543 break;
1544 default:
1545 assert(!"bad src1 file");
1546 break;
1547 }
1548
1549 if (insn->op != OP_SET) {
1550 switch (insn->op) {
1551 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1552 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1553 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1554 default:
1555 assert(!"invalid set op");
1556 break;
1557 }
1558 emitPRED(0x27, insn->src(2));
1559 } else {
1560 emitPRED(0x27);
1561 }
1562
1563 emitFMZ (0x37, 1);
1564 emitABS (0x36, insn->src(0));
1565 emitNEG (0x35, insn->src(1));
1566 emitField(0x34, 1, insn->dType == TYPE_F32);
1567 emitCond4(0x30, insn->setCond);
1568 emitCC (0x2f);
1569 emitABS (0x2c, insn->src(1));
1570 emitNEG (0x2b, insn->src(0));
1571 emitGPR (0x08, insn->src(0));
1572 emitGPR (0x00, insn->def(0));
1573 }
1574
1575 void
1576 CodeEmitterGM107::emitFSETP()
1577 {
1578 const CmpInstruction *insn = this->insn->asCmp();
1579
1580 switch (insn->src(1).getFile()) {
1581 case FILE_GPR:
1582 emitInsn(0x5bb00000);
1583 emitGPR (0x14, insn->src(1));
1584 break;
1585 case FILE_MEMORY_CONST:
1586 emitInsn(0x4bb00000);
1587 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1588 break;
1589 case FILE_IMMEDIATE:
1590 emitInsn(0x36b00000);
1591 emitIMMD(0x14, 19, insn->src(1));
1592 break;
1593 default:
1594 assert(!"bad src1 file");
1595 break;
1596 }
1597
1598 if (insn->op != OP_SET) {
1599 switch (insn->op) {
1600 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1601 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1602 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1603 default:
1604 assert(!"invalid set op");
1605 break;
1606 }
1607 emitPRED(0x27, insn->src(2));
1608 } else {
1609 emitPRED(0x27);
1610 }
1611
1612 emitCond4(0x30, insn->setCond);
1613 emitFMZ (0x2f, 1);
1614 emitABS (0x2c, insn->src(1));
1615 emitNEG (0x2b, insn->src(0));
1616 emitGPR (0x08, insn->src(0));
1617 emitABS (0x07, insn->src(0));
1618 emitNEG (0x06, insn->src(1));
1619 emitPRED (0x03, insn->def(0));
1620 if (insn->defExists(1))
1621 emitPRED(0x00, insn->def(1));
1622 else
1623 emitPRED(0x00);
1624 }
1625
1626 void
1627 CodeEmitterGM107::emitFSWZADD()
1628 {
1629 emitInsn (0x50f80000);
1630 emitCC (0x2f);
1631 emitFMZ (0x2c, 1);
1632 emitRND (0x27);
1633 emitField(0x26, 1, insn->lanes); /* abused for .ndv */
1634 emitField(0x1c, 8, insn->subOp);
1635 if (insn->predSrc != 1)
1636 emitGPR (0x14, insn->src(1));
1637 else
1638 emitGPR (0x14);
1639 emitGPR (0x08, insn->src(0));
1640 emitGPR (0x00, insn->def(0));
1641 }
1642
1643 /*******************************************************************************
1644 * integer
1645 ******************************************************************************/
1646
1647 void
1648 CodeEmitterGM107::emitLOP()
1649 {
1650 int lop = 0;
1651
1652 switch (insn->op) {
1653 case OP_AND: lop = 0; break;
1654 case OP_OR : lop = 1; break;
1655 case OP_XOR: lop = 2; break;
1656 default:
1657 assert(!"invalid lop");
1658 break;
1659 }
1660
1661 if (insn->src(1).getFile() != FILE_IMMEDIATE) {
1662 switch (insn->src(1).getFile()) {
1663 case FILE_GPR:
1664 emitInsn(0x5c400000);
1665 emitGPR (0x14, insn->src(1));
1666 break;
1667 case FILE_MEMORY_CONST:
1668 emitInsn(0x4c400000);
1669 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1670 break;
1671 case FILE_IMMEDIATE:
1672 emitInsn(0x38400000);
1673 emitIMMD(0x14, 19, insn->src(1));
1674 break;
1675 default:
1676 assert(!"bad src1 file");
1677 break;
1678 }
1679 emitPRED (0x30);
1680 emitCC (0x2f);
1681 emitX (0x2b);
1682 emitField(0x29, 2, lop);
1683 emitINV (0x28, insn->src(1));
1684 emitINV (0x27, insn->src(0));
1685 } else {
1686 emitInsn (0x04000000);
1687 emitX (0x39);
1688 emitINV (0x38, insn->src(1));
1689 emitINV (0x37, insn->src(0));
1690 emitField(0x35, 2, lop);
1691 emitCC (0x34);
1692 emitIMMD (0x14, 32, insn->src(1));
1693 }
1694
1695 emitGPR (0x08, insn->src(0));
1696 emitGPR (0x00, insn->def(0));
1697 }
1698
1699 /* special-case of emitLOP(): lop pass_b dst 0 ~src */
1700 void
1701 CodeEmitterGM107::emitNOT()
1702 {
1703 if (!longIMMD(insn->src(0))) {
1704 switch (insn->src(0).getFile()) {
1705 case FILE_GPR:
1706 emitInsn(0x5c400700);
1707 emitGPR (0x14, insn->src(0));
1708 break;
1709 case FILE_MEMORY_CONST:
1710 emitInsn(0x4c400700);
1711 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1712 break;
1713 case FILE_IMMEDIATE:
1714 emitInsn(0x38400700);
1715 emitIMMD(0x14, 19, insn->src(0));
1716 break;
1717 default:
1718 assert(!"bad src1 file");
1719 break;
1720 }
1721 emitPRED (0x30);
1722 } else {
1723 emitInsn (0x05600000);
1724 emitIMMD (0x14, 32, insn->src(1));
1725 }
1726
1727 emitGPR(0x08);
1728 emitGPR(0x00, insn->def(0));
1729 }
1730
1731 void
1732 CodeEmitterGM107::emitIADD()
1733 {
1734 if (insn->src(1).getFile() != FILE_IMMEDIATE) {
1735 switch (insn->src(1).getFile()) {
1736 case FILE_GPR:
1737 emitInsn(0x5c100000);
1738 emitGPR (0x14, insn->src(1));
1739 break;
1740 case FILE_MEMORY_CONST:
1741 emitInsn(0x4c100000);
1742 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1743 break;
1744 case FILE_IMMEDIATE:
1745 emitInsn(0x38100000);
1746 emitIMMD(0x14, 19, insn->src(1));
1747 break;
1748 default:
1749 assert(!"bad src1 file");
1750 break;
1751 }
1752 emitSAT(0x32);
1753 emitNEG(0x31, insn->src(0));
1754 emitNEG(0x30, insn->src(1));
1755 emitCC (0x2f);
1756 emitX (0x2b);
1757 } else {
1758 emitInsn(0x1c000000);
1759 emitNEG (0x38, insn->src(0));
1760 emitSAT (0x36);
1761 emitX (0x35);
1762 emitCC (0x34);
1763 emitIMMD(0x14, 32, insn->src(1));
1764 }
1765
1766 if (insn->op == OP_SUB)
1767 code[1] ^= 0x00010000;
1768
1769 emitGPR(0x08, insn->src(0));
1770 emitGPR(0x00, insn->def(0));
1771 }
1772
1773 void
1774 CodeEmitterGM107::emitIMUL()
1775 {
1776 if (insn->src(1).getFile() != FILE_IMMEDIATE) {
1777 switch (insn->src(1).getFile()) {
1778 case FILE_GPR:
1779 emitInsn(0x5c380000);
1780 emitGPR (0x14, insn->src(1));
1781 break;
1782 case FILE_MEMORY_CONST:
1783 emitInsn(0x4c380000);
1784 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1785 break;
1786 case FILE_IMMEDIATE:
1787 emitInsn(0x38380000);
1788 emitIMMD(0x14, 19, insn->src(1));
1789 break;
1790 default:
1791 assert(!"bad src1 file");
1792 break;
1793 }
1794 emitCC (0x2f);
1795 emitField(0x29, 1, isSignedType(insn->sType));
1796 emitField(0x28, 1, isSignedType(insn->dType));
1797 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1798 } else {
1799 emitInsn (0x1f000000);
1800 emitField(0x37, 1, isSignedType(insn->sType));
1801 emitField(0x36, 1, isSignedType(insn->dType));
1802 emitField(0x35, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1803 emitCC (0x34);
1804 emitIMMD (0x14, 32, insn->src(1));
1805 }
1806
1807 emitGPR(0x08, insn->src(0));
1808 emitGPR(0x00, insn->def(0));
1809 }
1810
1811 void
1812 CodeEmitterGM107::emitIMAD()
1813 {
1814 /*XXX: imad32i exists, but not using it as third src overlaps dst */
1815 switch(insn->src(2).getFile()) {
1816 case FILE_GPR:
1817 switch (insn->src(1).getFile()) {
1818 case FILE_GPR:
1819 emitInsn(0x5a000000);
1820 emitGPR (0x14, insn->src(1));
1821 break;
1822 case FILE_MEMORY_CONST:
1823 emitInsn(0x4a000000);
1824 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1825 break;
1826 case FILE_IMMEDIATE:
1827 emitInsn(0x34000000);
1828 emitIMMD(0x14, 19, insn->src(1));
1829 break;
1830 default:
1831 assert(!"bad src1 file");
1832 break;
1833 }
1834 emitGPR (0x27, insn->src(2));
1835 break;
1836 case FILE_MEMORY_CONST:
1837 emitInsn(0x52000000);
1838 emitGPR (0x27, insn->src(1));
1839 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1840 break;
1841 default:
1842 assert(!"bad src2 file");
1843 break;
1844 }
1845
1846 emitField(0x36, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1847 emitField(0x35, 1, isSignedType(insn->sType));
1848 emitNEG (0x34, insn->src(2));
1849 emitNEG2 (0x33, insn->src(0), insn->src(1));
1850 emitSAT (0x32);
1851 emitX (0x31);
1852 emitField(0x30, 1, isSignedType(insn->dType));
1853 emitCC (0x2f);
1854 emitGPR (0x08, insn->src(0));
1855 emitGPR (0x00, insn->def(0));
1856 }
1857
1858 void
1859 CodeEmitterGM107::emitISCADD()
1860 {
1861 switch (insn->src(2).getFile()) {
1862 case FILE_GPR:
1863 emitInsn(0x5c180000);
1864 emitGPR (0x14, insn->src(2));
1865 break;
1866 case FILE_MEMORY_CONST:
1867 emitInsn(0x4c180000);
1868 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1869 break;
1870 case FILE_IMMEDIATE:
1871 emitInsn(0x38180000);
1872 emitIMMD(0x14, 19, insn->src(2));
1873 break;
1874 default:
1875 assert(!"bad src1 file");
1876 break;
1877 }
1878 emitNEG (0x31, insn->src(0));
1879 emitNEG (0x30, insn->src(2));
1880 emitCC (0x2f);
1881 emitIMMD(0x27, 5, insn->src(1));
1882 emitGPR (0x08, insn->src(0));
1883 emitGPR (0x00, insn->def(0));
1884 }
1885
1886 void
1887 CodeEmitterGM107::emitIMNMX()
1888 {
1889 switch (insn->src(1).getFile()) {
1890 case FILE_GPR:
1891 emitInsn(0x5c200000);
1892 emitGPR (0x14, insn->src(1));
1893 break;
1894 case FILE_MEMORY_CONST:
1895 emitInsn(0x4c200000);
1896 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1897 break;
1898 case FILE_IMMEDIATE:
1899 emitInsn(0x38200000);
1900 emitIMMD(0x14, 19, insn->src(1));
1901 break;
1902 default:
1903 assert(!"bad src1 file");
1904 break;
1905 }
1906
1907 emitField(0x30, 1, isSignedType(insn->dType));
1908 emitCC (0x2f);
1909 emitField(0x2b, 2, insn->subOp);
1910 emitField(0x2a, 1, insn->op == OP_MAX);
1911 emitPRED (0x27);
1912 emitGPR (0x08, insn->src(0));
1913 emitGPR (0x00, insn->def(0));
1914 }
1915
1916 void
1917 CodeEmitterGM107::emitICMP()
1918 {
1919 const CmpInstruction *insn = this->insn->asCmp();
1920 CondCode cc = insn->setCond;
1921
1922 if (insn->src(2).mod.neg())
1923 cc = reverseCondCode(cc);
1924
1925 switch(insn->src(2).getFile()) {
1926 case FILE_GPR:
1927 switch (insn->src(1).getFile()) {
1928 case FILE_GPR:
1929 emitInsn(0x5b400000);
1930 emitGPR (0x14, insn->src(1));
1931 break;
1932 case FILE_MEMORY_CONST:
1933 emitInsn(0x4b400000);
1934 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1935 break;
1936 case FILE_IMMEDIATE:
1937 emitInsn(0x36400000);
1938 emitIMMD(0x14, 19, insn->src(1));
1939 break;
1940 default:
1941 assert(!"bad src1 file");
1942 break;
1943 }
1944 emitGPR (0x27, insn->src(2));
1945 break;
1946 case FILE_MEMORY_CONST:
1947 emitInsn(0x53400000);
1948 emitGPR (0x27, insn->src(1));
1949 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1950 break;
1951 default:
1952 assert(!"bad src2 file");
1953 break;
1954 }
1955
1956 emitCond3(0x31, cc);
1957 emitField(0x30, 1, isSignedType(insn->sType));
1958 emitGPR (0x08, insn->src(0));
1959 emitGPR (0x00, insn->def(0));
1960 }
1961
1962 void
1963 CodeEmitterGM107::emitISET()
1964 {
1965 const CmpInstruction *insn = this->insn->asCmp();
1966
1967 switch (insn->src(1).getFile()) {
1968 case FILE_GPR:
1969 emitInsn(0x5b500000);
1970 emitGPR (0x14, insn->src(1));
1971 break;
1972 case FILE_MEMORY_CONST:
1973 emitInsn(0x4b500000);
1974 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1975 break;
1976 case FILE_IMMEDIATE:
1977 emitInsn(0x36500000);
1978 emitIMMD(0x14, 19, insn->src(1));
1979 break;
1980 default:
1981 assert(!"bad src1 file");
1982 break;
1983 }
1984
1985 if (insn->op != OP_SET) {
1986 switch (insn->op) {
1987 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1988 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1989 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1990 default:
1991 assert(!"invalid set op");
1992 break;
1993 }
1994 emitPRED(0x27, insn->src(2));
1995 } else {
1996 emitPRED(0x27);
1997 }
1998
1999 emitCond3(0x31, insn->setCond);
2000 emitField(0x30, 1, isSignedType(insn->sType));
2001 emitCC (0x2f);
2002 emitField(0x2c, 1, insn->dType == TYPE_F32);
2003 emitX (0x2b);
2004 emitGPR (0x08, insn->src(0));
2005 emitGPR (0x00, insn->def(0));
2006 }
2007
2008 void
2009 CodeEmitterGM107::emitISETP()
2010 {
2011 const CmpInstruction *insn = this->insn->asCmp();
2012
2013 switch (insn->src(1).getFile()) {
2014 case FILE_GPR:
2015 emitInsn(0x5b600000);
2016 emitGPR (0x14, insn->src(1));
2017 break;
2018 case FILE_MEMORY_CONST:
2019 emitInsn(0x4b600000);
2020 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2021 break;
2022 case FILE_IMMEDIATE:
2023 emitInsn(0x36600000);
2024 emitIMMD(0x14, 19, insn->src(1));
2025 break;
2026 default:
2027 assert(!"bad src1 file");
2028 break;
2029 }
2030
2031 if (insn->op != OP_SET) {
2032 switch (insn->op) {
2033 case OP_SET_AND: emitField(0x2d, 2, 0); break;
2034 case OP_SET_OR : emitField(0x2d, 2, 1); break;
2035 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
2036 default:
2037 assert(!"invalid set op");
2038 break;
2039 }
2040 emitPRED(0x27, insn->src(2));
2041 } else {
2042 emitPRED(0x27);
2043 }
2044
2045 emitCond3(0x31, insn->setCond);
2046 emitField(0x30, 1, isSignedType(insn->sType));
2047 emitX (0x2b);
2048 emitGPR (0x08, insn->src(0));
2049 emitPRED (0x03, insn->def(0));
2050 if (insn->defExists(1))
2051 emitPRED(0x00, insn->def(1));
2052 else
2053 emitPRED(0x00);
2054 }
2055
2056 void
2057 CodeEmitterGM107::emitSHL()
2058 {
2059 switch (insn->src(1).getFile()) {
2060 case FILE_GPR:
2061 emitInsn(0x5c480000);
2062 emitGPR (0x14, insn->src(1));
2063 break;
2064 case FILE_MEMORY_CONST:
2065 emitInsn(0x4c480000);
2066 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2067 break;
2068 case FILE_IMMEDIATE:
2069 emitInsn(0x38480000);
2070 emitIMMD(0x14, 19, insn->src(1));
2071 break;
2072 default:
2073 assert(!"bad src1 file");
2074 break;
2075 }
2076
2077 emitCC (0x2f);
2078 emitX (0x2b);
2079 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2080 emitGPR (0x08, insn->src(0));
2081 emitGPR (0x00, insn->def(0));
2082 }
2083
2084 void
2085 CodeEmitterGM107::emitSHR()
2086 {
2087 switch (insn->src(1).getFile()) {
2088 case FILE_GPR:
2089 emitInsn(0x5c280000);
2090 emitGPR (0x14, insn->src(1));
2091 break;
2092 case FILE_MEMORY_CONST:
2093 emitInsn(0x4c280000);
2094 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2095 break;
2096 case FILE_IMMEDIATE:
2097 emitInsn(0x38280000);
2098 emitIMMD(0x14, 19, insn->src(1));
2099 break;
2100 default:
2101 assert(!"bad src1 file");
2102 break;
2103 }
2104
2105 emitField(0x30, 1, isSignedType(insn->dType));
2106 emitCC (0x2f);
2107 emitX (0x2c);
2108 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2109 emitGPR (0x08, insn->src(0));
2110 emitGPR (0x00, insn->def(0));
2111 }
2112
2113 void
2114 CodeEmitterGM107::emitSHF()
2115 {
2116 unsigned type;
2117
2118 switch (insn->src(1).getFile()) {
2119 case FILE_GPR:
2120 emitInsn(insn->op == OP_SHL ? 0x5bf80000 : 0x5cf80000);
2121 emitGPR(0x14, insn->src(1));
2122 break;
2123 case FILE_IMMEDIATE:
2124 emitInsn(insn->op == OP_SHL ? 0x36f80000 : 0x38f80000);
2125 emitIMMD(0x14, 19, insn->src(1));
2126 break;
2127 default:
2128 assert(!"bad src1 file");
2129 break;
2130 }
2131
2132 switch (insn->sType) {
2133 case TYPE_U64:
2134 type = 2;
2135 break;
2136 case TYPE_S64:
2137 type = 3;
2138 break;
2139 default:
2140 type = 0;
2141 break;
2142 }
2143
2144 emitField(0x32, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_WRAP));
2145 emitX (0x31);
2146 emitField(0x30, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_HIGH));
2147 emitCC (0x2f);
2148 emitGPR (0x27, insn->src(2));
2149 emitField(0x25, 2, type);
2150 emitGPR (0x08, insn->src(0));
2151 emitGPR (0x00, insn->def(0));
2152 }
2153
2154 void
2155 CodeEmitterGM107::emitPOPC()
2156 {
2157 switch (insn->src(0).getFile()) {
2158 case FILE_GPR:
2159 emitInsn(0x5c080000);
2160 emitGPR (0x14, insn->src(0));
2161 break;
2162 case FILE_MEMORY_CONST:
2163 emitInsn(0x4c080000);
2164 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2165 break;
2166 case FILE_IMMEDIATE:
2167 emitInsn(0x38080000);
2168 emitIMMD(0x14, 19, insn->src(0));
2169 break;
2170 default:
2171 assert(!"bad src1 file");
2172 break;
2173 }
2174
2175 emitINV(0x28, insn->src(0));
2176 emitGPR(0x00, insn->def(0));
2177 }
2178
2179 void
2180 CodeEmitterGM107::emitBFI()
2181 {
2182 switch(insn->src(2).getFile()) {
2183 case FILE_GPR:
2184 switch (insn->src(1).getFile()) {
2185 case FILE_GPR:
2186 emitInsn(0x5bf00000);
2187 emitGPR (0x14, insn->src(1));
2188 break;
2189 case FILE_MEMORY_CONST:
2190 emitInsn(0x4bf00000);
2191 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2192 break;
2193 case FILE_IMMEDIATE:
2194 emitInsn(0x36f00000);
2195 emitIMMD(0x14, 19, insn->src(1));
2196 break;
2197 default:
2198 assert(!"bad src1 file");
2199 break;
2200 }
2201 emitGPR (0x27, insn->src(2));
2202 break;
2203 case FILE_MEMORY_CONST:
2204 emitInsn(0x53f00000);
2205 emitGPR (0x27, insn->src(1));
2206 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
2207 break;
2208 default:
2209 assert(!"bad src2 file");
2210 break;
2211 }
2212
2213 emitCC (0x2f);
2214 emitGPR (0x08, insn->src(0));
2215 emitGPR (0x00, insn->def(0));
2216 }
2217
2218 void
2219 CodeEmitterGM107::emitBFE()
2220 {
2221 switch (insn->src(1).getFile()) {
2222 case FILE_GPR:
2223 emitInsn(0x5c000000);
2224 emitGPR (0x14, insn->src(1));
2225 break;
2226 case FILE_MEMORY_CONST:
2227 emitInsn(0x4c000000);
2228 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2229 break;
2230 case FILE_IMMEDIATE:
2231 emitInsn(0x38000000);
2232 emitIMMD(0x14, 19, insn->src(1));
2233 break;
2234 default:
2235 assert(!"bad src1 file");
2236 break;
2237 }
2238
2239 emitField(0x30, 1, isSignedType(insn->dType));
2240 emitCC (0x2f);
2241 emitField(0x28, 1, insn->subOp == NV50_IR_SUBOP_EXTBF_REV);
2242 emitGPR (0x08, insn->src(0));
2243 emitGPR (0x00, insn->def(0));
2244 }
2245
2246 void
2247 CodeEmitterGM107::emitFLO()
2248 {
2249 switch (insn->src(0).getFile()) {
2250 case FILE_GPR:
2251 emitInsn(0x5c300000);
2252 emitGPR (0x14, insn->src(0));
2253 break;
2254 case FILE_MEMORY_CONST:
2255 emitInsn(0x4c300000);
2256 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2257 break;
2258 case FILE_IMMEDIATE:
2259 emitInsn(0x38300000);
2260 emitIMMD(0x14, 19, insn->src(0));
2261 break;
2262 default:
2263 assert(!"bad src1 file");
2264 break;
2265 }
2266
2267 emitField(0x30, 1, isSignedType(insn->dType));
2268 emitCC (0x2f);
2269 emitField(0x29, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT);
2270 emitINV (0x28, insn->src(0));
2271 emitGPR (0x00, insn->def(0));
2272 }
2273
2274 /*******************************************************************************
2275 * memory
2276 ******************************************************************************/
2277
2278 void
2279 CodeEmitterGM107::emitLDSTs(int pos, DataType type)
2280 {
2281 int data = 0;
2282
2283 switch (typeSizeof(type)) {
2284 case 1: data = isSignedType(type) ? 1 : 0; break;
2285 case 2: data = isSignedType(type) ? 3 : 2; break;
2286 case 4: data = 4; break;
2287 case 8: data = 5; break;
2288 case 16: data = 6; break;
2289 default:
2290 assert(!"bad type");
2291 break;
2292 }
2293
2294 emitField(pos, 3, data);
2295 }
2296
2297 void
2298 CodeEmitterGM107::emitLDSTc(int pos)
2299 {
2300 int mode = 0;
2301
2302 switch (insn->cache) {
2303 case CACHE_CA: mode = 0; break;
2304 case CACHE_CG: mode = 1; break;
2305 case CACHE_CS: mode = 2; break;
2306 case CACHE_CV: mode = 3; break;
2307 default:
2308 assert(!"invalid caching mode");
2309 break;
2310 }
2311
2312 emitField(pos, 2, mode);
2313 }
2314
2315 void
2316 CodeEmitterGM107::emitLDC()
2317 {
2318 emitInsn (0xef900000);
2319 emitLDSTs(0x30, insn->dType);
2320 emitField(0x2c, 2, insn->subOp);
2321 emitCBUF (0x24, 0x08, 0x14, 16, 0, insn->src(0));
2322 emitGPR (0x00, insn->def(0));
2323 }
2324
2325 void
2326 CodeEmitterGM107::emitLDL()
2327 {
2328 emitInsn (0xef400000);
2329 emitLDSTs(0x30, insn->dType);
2330 emitLDSTc(0x2c);
2331 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2332 emitGPR (0x00, insn->def(0));
2333 }
2334
2335 void
2336 CodeEmitterGM107::emitLDS()
2337 {
2338 emitInsn (0xef480000);
2339 emitLDSTs(0x30, insn->dType);
2340 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2341 emitGPR (0x00, insn->def(0));
2342 }
2343
2344 void
2345 CodeEmitterGM107::emitLD()
2346 {
2347 emitInsn (0x80000000);
2348 emitPRED (0x3a);
2349 emitLDSTc(0x38);
2350 emitLDSTs(0x35, insn->dType);
2351 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2352 emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2353 emitGPR (0x00, insn->def(0));
2354 }
2355
2356 void
2357 CodeEmitterGM107::emitSTL()
2358 {
2359 emitInsn (0xef500000);
2360 emitLDSTs(0x30, insn->dType);
2361 emitLDSTc(0x2c);
2362 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2363 emitGPR (0x00, insn->src(1));
2364 }
2365
2366 void
2367 CodeEmitterGM107::emitSTS()
2368 {
2369 emitInsn (0xef580000);
2370 emitLDSTs(0x30, insn->dType);
2371 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2372 emitGPR (0x00, insn->src(1));
2373 }
2374
2375 void
2376 CodeEmitterGM107::emitST()
2377 {
2378 emitInsn (0xa0000000);
2379 emitPRED (0x3a);
2380 emitLDSTc(0x38);
2381 emitLDSTs(0x35, insn->dType);
2382 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2383 emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2384 emitGPR (0x00, insn->src(1));
2385 }
2386
2387 void
2388 CodeEmitterGM107::emitALD()
2389 {
2390 emitInsn (0xefd80000);
2391 emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2392 emitGPR (0x27, insn->src(0).getIndirect(1));
2393 emitO (0x20);
2394 emitP (0x1f);
2395 emitADDR (0x08, 20, 10, 0, insn->src(0));
2396 emitGPR (0x00, insn->def(0));
2397 }
2398
2399 void
2400 CodeEmitterGM107::emitAST()
2401 {
2402 emitInsn (0xeff00000);
2403 emitField(0x2f, 2, (typeSizeof(insn->dType) / 4) - 1);
2404 emitGPR (0x27, insn->src(0).getIndirect(1));
2405 emitP (0x1f);
2406 emitADDR (0x08, 20, 10, 0, insn->src(0));
2407 emitGPR (0x00, insn->src(1));
2408 }
2409
2410 void
2411 CodeEmitterGM107::emitISBERD()
2412 {
2413 emitInsn(0xefd00000);
2414 emitGPR (0x08, insn->src(0));
2415 emitGPR (0x00, insn->def(0));
2416 }
2417
2418 void
2419 CodeEmitterGM107::emitAL2P()
2420 {
2421 emitInsn (0xefa00000);
2422 emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2423 emitPRED (0x2c);
2424 emitO (0x20);
2425 emitField(0x14, 11, insn->src(0).get()->reg.data.offset);
2426 emitGPR (0x08, insn->src(0).getIndirect(0));
2427 emitGPR (0x00, insn->def(0));
2428 }
2429
2430 static void
2431 interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
2432 {
2433 int ipa = entry->ipa;
2434 int reg = entry->reg;
2435 int loc = entry->loc;
2436
2437 if (data.flatshade &&
2438 (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
2439 ipa = NV50_IR_INTERP_FLAT;
2440 reg = 0xff;
2441 } else if (data.force_persample_interp &&
2442 (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
2443 (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
2444 ipa |= NV50_IR_INTERP_CENTROID;
2445 }
2446 code[loc + 1] &= ~(0xf << 0x14);
2447 code[loc + 1] |= (ipa & 0x3) << 0x16;
2448 code[loc + 1] |= (ipa & 0xc) << (0x14 - 2);
2449 code[loc + 0] &= ~(0xff << 0x14);
2450 code[loc + 0] |= reg << 0x14;
2451 }
2452
2453 void
2454 CodeEmitterGM107::emitIPA()
2455 {
2456 int ipam = 0, ipas = 0;
2457
2458 switch (insn->getInterpMode()) {
2459 case NV50_IR_INTERP_LINEAR : ipam = 0; break;
2460 case NV50_IR_INTERP_PERSPECTIVE: ipam = 1; break;
2461 case NV50_IR_INTERP_FLAT : ipam = 2; break;
2462 case NV50_IR_INTERP_SC : ipam = 3; break;
2463 default:
2464 assert(!"invalid ipa mode");
2465 break;
2466 }
2467
2468 switch (insn->getSampleMode()) {
2469 case NV50_IR_INTERP_DEFAULT : ipas = 0; break;
2470 case NV50_IR_INTERP_CENTROID: ipas = 1; break;
2471 case NV50_IR_INTERP_OFFSET : ipas = 2; break;
2472 default:
2473 assert(!"invalid ipa sample mode");
2474 break;
2475 }
2476
2477 emitInsn (0xe0000000);
2478 emitField(0x36, 2, ipam);
2479 emitField(0x34, 2, ipas);
2480 emitSAT (0x33);
2481 emitField(0x2f, 3, 7);
2482 emitADDR (0x08, 0x1c, 10, 0, insn->src(0));
2483 if ((code[0] & 0x0000ff00) != 0x0000ff00)
2484 code[1] |= 0x00000040; /* .idx */
2485 emitGPR(0x00, insn->def(0));
2486
2487 if (insn->op == OP_PINTERP) {
2488 emitGPR(0x14, insn->src(1));
2489 if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2490 emitGPR(0x27, insn->src(2));
2491 addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, interpApply);
2492 } else {
2493 if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2494 emitGPR(0x27, insn->src(1));
2495 emitGPR(0x14);
2496 addInterp(insn->ipa, 0xff, interpApply);
2497 }
2498
2499 if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET)
2500 emitGPR(0x27);
2501 }
2502
2503 void
2504 CodeEmitterGM107::emitATOM()
2505 {
2506 unsigned dType, subOp;
2507
2508 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2509 switch (insn->dType) {
2510 case TYPE_U32: dType = 0; break;
2511 case TYPE_U64: dType = 1; break;
2512 default: assert(!"unexpected dType"); dType = 0; break;
2513 }
2514 subOp = 15;
2515
2516 emitInsn (0xee000000);
2517 } else {
2518 switch (insn->dType) {
2519 case TYPE_U32: dType = 0; break;
2520 case TYPE_S32: dType = 1; break;
2521 case TYPE_U64: dType = 2; break;
2522 case TYPE_F32: dType = 3; break;
2523 case TYPE_B128: dType = 4; break;
2524 case TYPE_S64: dType = 5; break;
2525 default: assert(!"unexpected dType"); dType = 0; break;
2526 }
2527 if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2528 subOp = 8;
2529 else
2530 subOp = insn->subOp;
2531
2532 emitInsn (0xed000000);
2533 }
2534
2535 emitField(0x34, 4, subOp);
2536 emitField(0x31, 3, dType);
2537 emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2538 emitGPR (0x14, insn->src(1));
2539 emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2540 emitGPR (0x00, insn->def(0));
2541 }
2542
2543 void
2544 CodeEmitterGM107::emitATOMS()
2545 {
2546 unsigned dType, subOp;
2547
2548 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2549 switch (insn->dType) {
2550 case TYPE_U32: dType = 0; break;
2551 case TYPE_U64: dType = 1; break;
2552 default: assert(!"unexpected dType"); dType = 0; break;
2553 }
2554 subOp = 4;
2555
2556 emitInsn (0xee000000);
2557 emitField(0x34, 1, dType);
2558 } else {
2559 switch (insn->dType) {
2560 case TYPE_U32: dType = 0; break;
2561 case TYPE_S32: dType = 1; break;
2562 case TYPE_U64: dType = 2; break;
2563 case TYPE_S64: dType = 3; break;
2564 default: assert(!"unexpected dType"); dType = 0; break;
2565 }
2566
2567 if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2568 subOp = 8;
2569 else
2570 subOp = insn->subOp;
2571
2572 emitInsn (0xec000000);
2573 emitField(0x1c, 3, dType);
2574 }
2575
2576 emitField(0x34, 4, subOp);
2577 emitGPR (0x14, insn->src(1));
2578 emitADDR (0x08, 0x1e, 22, 2, insn->src(0));
2579 emitGPR (0x00, insn->def(0));
2580 }
2581
2582 void
2583 CodeEmitterGM107::emitRED()
2584 {
2585 unsigned dType;
2586
2587 switch (insn->dType) {
2588 case TYPE_U32: dType = 0; break;
2589 case TYPE_S32: dType = 1; break;
2590 case TYPE_U64: dType = 2; break;
2591 case TYPE_F32: dType = 3; break;
2592 case TYPE_B128: dType = 4; break;
2593 case TYPE_S64: dType = 5; break;
2594 default: assert(!"unexpected dType"); dType = 0; break;
2595 }
2596
2597 emitInsn (0xebf80000);
2598 emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2599 emitField(0x17, 3, insn->subOp);
2600 emitField(0x14, 3, dType);
2601 emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2602 emitGPR (0x00, insn->src(1));
2603 }
2604
2605 void
2606 CodeEmitterGM107::emitCCTL()
2607 {
2608 unsigned width;
2609 if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2610 emitInsn(0xef600000);
2611 width = 30;
2612 } else {
2613 emitInsn(0xef800000);
2614 width = 22;
2615 }
2616 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2617 emitADDR (0x08, 0x16, width, 2, insn->src(0));
2618 emitField(0x00, 4, insn->subOp);
2619 }
2620
2621 /*******************************************************************************
2622 * surface
2623 ******************************************************************************/
2624
2625 void
2626 CodeEmitterGM107::emitPIXLD()
2627 {
2628 emitInsn (0xefe80000);
2629 emitPRED (0x2d);
2630 emitField(0x1f, 3, insn->subOp);
2631 emitGPR (0x08, insn->src(0));
2632 emitGPR (0x00, insn->def(0));
2633 }
2634
2635 /*******************************************************************************
2636 * texture
2637 ******************************************************************************/
2638
2639 void
2640 CodeEmitterGM107::emitTEXs(int pos)
2641 {
2642 int src1 = insn->predSrc == 1 ? 2 : 1;
2643 if (insn->srcExists(src1))
2644 emitGPR(pos, insn->src(src1));
2645 else
2646 emitGPR(pos);
2647 }
2648
2649 void
2650 CodeEmitterGM107::emitTEX()
2651 {
2652 const TexInstruction *insn = this->insn->asTex();
2653 int lodm = 0;
2654
2655 if (!insn->tex.levelZero) {
2656 switch (insn->op) {
2657 case OP_TEX: lodm = 0; break;
2658 case OP_TXB: lodm = 2; break;
2659 case OP_TXL: lodm = 3; break;
2660 default:
2661 assert(!"invalid tex op");
2662 break;
2663 }
2664 } else {
2665 lodm = 1;
2666 }
2667
2668 if (insn->tex.rIndirectSrc >= 0) {
2669 emitInsn (0xdeb80000);
2670 emitField(0x25, 2, lodm);
2671 emitField(0x24, 1, insn->tex.useOffsets == 1);
2672 } else {
2673 emitInsn (0xc0380000);
2674 emitField(0x37, 2, lodm);
2675 emitField(0x36, 1, insn->tex.useOffsets == 1);
2676 emitField(0x24, 13, insn->tex.r);
2677 }
2678
2679 emitField(0x32, 1, insn->tex.target.isShadow());
2680 emitField(0x31, 1, insn->tex.liveOnly);
2681 emitField(0x23, 1, insn->tex.derivAll);
2682 emitField(0x1f, 4, insn->tex.mask);
2683 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2684 insn->tex.target.getDim() - 1);
2685 emitField(0x1c, 1, insn->tex.target.isArray());
2686 emitTEXs (0x14);
2687 emitGPR (0x08, insn->src(0));
2688 emitGPR (0x00, insn->def(0));
2689 }
2690
2691 void
2692 CodeEmitterGM107::emitTLD()
2693 {
2694 const TexInstruction *insn = this->insn->asTex();
2695
2696 if (insn->tex.rIndirectSrc >= 0) {
2697 emitInsn (0xdd380000);
2698 } else {
2699 emitInsn (0xdc380000);
2700 emitField(0x24, 13, insn->tex.r);
2701 }
2702
2703 emitField(0x37, 1, insn->tex.levelZero == 0);
2704 emitField(0x32, 1, insn->tex.target.isMS());
2705 emitField(0x31, 1, insn->tex.liveOnly);
2706 emitField(0x23, 1, insn->tex.useOffsets == 1);
2707 emitField(0x1f, 4, insn->tex.mask);
2708 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2709 insn->tex.target.getDim() - 1);
2710 emitField(0x1c, 1, insn->tex.target.isArray());
2711 emitTEXs (0x14);
2712 emitGPR (0x08, insn->src(0));
2713 emitGPR (0x00, insn->def(0));
2714 }
2715
2716 void
2717 CodeEmitterGM107::emitTLD4()
2718 {
2719 const TexInstruction *insn = this->insn->asTex();
2720
2721 if (insn->tex.rIndirectSrc >= 0) {
2722 emitInsn (0xdef80000);
2723 emitField(0x26, 2, insn->tex.gatherComp);
2724 emitField(0x25, 2, insn->tex.useOffsets == 4);
2725 emitField(0x24, 2, insn->tex.useOffsets == 1);
2726 } else {
2727 emitInsn (0xc8380000);
2728 emitField(0x38, 2, insn->tex.gatherComp);
2729 emitField(0x37, 2, insn->tex.useOffsets == 4);
2730 emitField(0x36, 2, insn->tex.useOffsets == 1);
2731 emitField(0x24, 13, insn->tex.r);
2732 }
2733
2734 emitField(0x32, 1, insn->tex.target.isShadow());
2735 emitField(0x31, 1, insn->tex.liveOnly);
2736 emitField(0x23, 1, insn->tex.derivAll);
2737 emitField(0x1f, 4, insn->tex.mask);
2738 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2739 insn->tex.target.getDim() - 1);
2740 emitField(0x1c, 1, insn->tex.target.isArray());
2741 emitTEXs (0x14);
2742 emitGPR (0x08, insn->src(0));
2743 emitGPR (0x00, insn->def(0));
2744 }
2745
2746 void
2747 CodeEmitterGM107::emitTXD()
2748 {
2749 const TexInstruction *insn = this->insn->asTex();
2750
2751 if (insn->tex.rIndirectSrc >= 0) {
2752 emitInsn (0xde780000);
2753 } else {
2754 emitInsn (0xde380000);
2755 emitField(0x24, 13, insn->tex.r);
2756 }
2757
2758 emitField(0x31, 1, insn->tex.liveOnly);
2759 emitField(0x23, 1, insn->tex.useOffsets == 1);
2760 emitField(0x1f, 4, insn->tex.mask);
2761 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2762 insn->tex.target.getDim() - 1);
2763 emitField(0x1c, 1, insn->tex.target.isArray());
2764 emitTEXs (0x14);
2765 emitGPR (0x08, insn->src(0));
2766 emitGPR (0x00, insn->def(0));
2767 }
2768
2769 void
2770 CodeEmitterGM107::emitTMML()
2771 {
2772 const TexInstruction *insn = this->insn->asTex();
2773
2774 if (insn->tex.rIndirectSrc >= 0) {
2775 emitInsn (0xdf600000);
2776 } else {
2777 emitInsn (0xdf580000);
2778 emitField(0x24, 13, insn->tex.r);
2779 }
2780
2781 emitField(0x31, 1, insn->tex.liveOnly);
2782 emitField(0x23, 1, insn->tex.derivAll);
2783 emitField(0x1f, 4, insn->tex.mask);
2784 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2785 insn->tex.target.getDim() - 1);
2786 emitField(0x1c, 1, insn->tex.target.isArray());
2787 emitTEXs (0x14);
2788 emitGPR (0x08, insn->src(0));
2789 emitGPR (0x00, insn->def(0));
2790 }
2791
2792 void
2793 CodeEmitterGM107::emitTXQ()
2794 {
2795 const TexInstruction *insn = this->insn->asTex();
2796 int type = 0;
2797
2798 switch (insn->tex.query) {
2799 case TXQ_DIMS : type = 0x01; break;
2800 case TXQ_TYPE : type = 0x02; break;
2801 case TXQ_SAMPLE_POSITION: type = 0x05; break;
2802 case TXQ_FILTER : type = 0x10; break;
2803 case TXQ_LOD : type = 0x12; break;
2804 case TXQ_WRAP : type = 0x14; break;
2805 case TXQ_BORDER_COLOUR : type = 0x16; break;
2806 default:
2807 assert(!"invalid txq query");
2808 break;
2809 }
2810
2811 if (insn->tex.rIndirectSrc >= 0) {
2812 emitInsn (0xdf500000);
2813 } else {
2814 emitInsn (0xdf480000);
2815 emitField(0x24, 13, insn->tex.r);
2816 }
2817
2818 emitField(0x31, 1, insn->tex.liveOnly);
2819 emitField(0x1f, 4, insn->tex.mask);
2820 emitField(0x16, 6, type);
2821 emitGPR (0x08, insn->src(0));
2822 emitGPR (0x00, insn->def(0));
2823 }
2824
2825 void
2826 CodeEmitterGM107::emitDEPBAR()
2827 {
2828 emitInsn (0xf0f00000);
2829 emitField(0x1d, 1, 1); /* le */
2830 emitField(0x1a, 3, 5);
2831 emitField(0x14, 6, insn->subOp);
2832 emitField(0x00, 6, insn->subOp);
2833 }
2834
2835 /*******************************************************************************
2836 * misc
2837 ******************************************************************************/
2838
2839 void
2840 CodeEmitterGM107::emitNOP()
2841 {
2842 emitInsn(0x50b00000);
2843 }
2844
2845 void
2846 CodeEmitterGM107::emitKIL()
2847 {
2848 emitInsn (0xe3300000);
2849 emitCond5(0x00, CC_TR);
2850 }
2851
2852 void
2853 CodeEmitterGM107::emitOUT()
2854 {
2855 const int cut = insn->op == OP_RESTART || insn->subOp;
2856 const int emit = insn->op == OP_EMIT;
2857
2858 switch (insn->src(1).getFile()) {
2859 case FILE_GPR:
2860 emitInsn(0xfbe00000);
2861 emitGPR (0x14, insn->src(1));
2862 break;
2863 case FILE_IMMEDIATE:
2864 emitInsn(0xf6e00000);
2865 emitIMMD(0x14, 19, insn->src(1));
2866 break;
2867 case FILE_MEMORY_CONST:
2868 emitInsn(0xebe00000);
2869 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2870 break;
2871 default:
2872 assert(!"bad src1 file");
2873 break;
2874 }
2875
2876 emitField(0x27, 2, (cut << 1) | emit);
2877 emitGPR (0x08, insn->src(0));
2878 emitGPR (0x00, insn->def(0));
2879 }
2880
2881 void
2882 CodeEmitterGM107::emitBAR()
2883 {
2884 uint8_t subop;
2885
2886 emitInsn (0xf0a80000);
2887
2888 switch (insn->subOp) {
2889 case NV50_IR_SUBOP_BAR_RED_POPC: subop = 0x02; break;
2890 case NV50_IR_SUBOP_BAR_RED_AND: subop = 0x0a; break;
2891 case NV50_IR_SUBOP_BAR_RED_OR: subop = 0x12; break;
2892 case NV50_IR_SUBOP_BAR_ARRIVE: subop = 0x81; break;
2893 default:
2894 subop = 0x80;
2895 assert(insn->subOp == NV50_IR_SUBOP_BAR_SYNC);
2896 break;
2897 }
2898
2899 emitField(0x20, 8, subop);
2900
2901 // barrier id
2902 if (insn->src(0).getFile() == FILE_GPR) {
2903 emitGPR(0x08, insn->src(0));
2904 } else {
2905 ImmediateValue *imm = insn->getSrc(0)->asImm();
2906 assert(imm);
2907 emitField(0x08, 8, imm->reg.data.u32);
2908 emitField(0x2b, 1, 1);
2909 }
2910
2911 // thread count
2912 if (insn->src(1).getFile() == FILE_GPR) {
2913 emitGPR(0x14, insn->src(1));
2914 } else {
2915 ImmediateValue *imm = insn->getSrc(0)->asImm();
2916 assert(imm);
2917 emitField(0x14, 12, imm->reg.data.u32);
2918 emitField(0x2c, 1, 1);
2919 }
2920
2921 if (insn->srcExists(2) && (insn->predSrc != 2)) {
2922 emitPRED (0x27, insn->src(2));
2923 emitField(0x2a, 1, insn->src(2).mod == Modifier(NV50_IR_MOD_NOT));
2924 } else {
2925 emitField(0x27, 3, 7);
2926 }
2927 }
2928
2929 void
2930 CodeEmitterGM107::emitMEMBAR()
2931 {
2932 emitInsn (0xef980000);
2933 emitField(0x08, 2, insn->subOp >> 2);
2934 }
2935
2936 void
2937 CodeEmitterGM107::emitVOTE()
2938 {
2939 const ImmediateValue *imm;
2940 uint32_t u32;
2941
2942 int r = -1, p = -1;
2943 for (int i = 0; insn->defExists(i); i++) {
2944 if (insn->def(i).getFile() == FILE_GPR)
2945 r = i;
2946 else if (insn->def(i).getFile() == FILE_PREDICATE)
2947 p = i;
2948 }
2949
2950 emitInsn (0x50d80000);
2951 emitField(0x30, 2, insn->subOp);
2952 if (r >= 0)
2953 emitGPR (0x00, insn->def(r));
2954 else
2955 emitGPR (0x00);
2956 if (p >= 0)
2957 emitPRED (0x2d, insn->def(p));
2958 else
2959 emitPRED (0x2d);
2960
2961 switch (insn->src(0).getFile()) {
2962 case FILE_PREDICATE:
2963 emitField(0x2a, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT));
2964 emitPRED (0x27, insn->src(0));
2965 break;
2966 case FILE_IMMEDIATE:
2967 imm = insn->getSrc(0)->asImm();
2968 assert(imm);
2969 u32 = imm->reg.data.u32;
2970 assert(u32 == 0 || u32 == 1);
2971 emitPRED(0x27);
2972 emitField(0x2a, 1, u32 == 0);
2973 break;
2974 default:
2975 assert(!"Unhandled src");
2976 break;
2977 }
2978 }
2979
2980 void
2981 CodeEmitterGM107::emitSUTarget()
2982 {
2983 const TexInstruction *insn = this->insn->asTex();
2984 int target = 0;
2985
2986 assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
2987
2988 if (insn->tex.target == TEX_TARGET_BUFFER) {
2989 target = 2;
2990 } else if (insn->tex.target == TEX_TARGET_1D_ARRAY) {
2991 target = 4;
2992 } else if (insn->tex.target == TEX_TARGET_2D ||
2993 insn->tex.target == TEX_TARGET_RECT) {
2994 target = 6;
2995 } else if (insn->tex.target == TEX_TARGET_2D_ARRAY ||
2996 insn->tex.target == TEX_TARGET_CUBE ||
2997 insn->tex.target == TEX_TARGET_CUBE_ARRAY) {
2998 target = 8;
2999 } else if (insn->tex.target == TEX_TARGET_3D) {
3000 target = 10;
3001 } else {
3002 assert(insn->tex.target == TEX_TARGET_1D);
3003 }
3004 emitField(0x20, 4, target);
3005 }
3006
3007 void
3008 CodeEmitterGM107::emitSUHandle(const int s)
3009 {
3010 const TexInstruction *insn = this->insn->asTex();
3011
3012 assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
3013
3014 if (insn->src(s).getFile() == FILE_GPR) {
3015 emitGPR(0x27, insn->src(s));
3016 } else {
3017 ImmediateValue *imm = insn->getSrc(s)->asImm();
3018 assert(imm);
3019 emitField(0x33, 1, 1);
3020 emitField(0x24, 13, imm->reg.data.u32);
3021 }
3022 }
3023
3024 void
3025 CodeEmitterGM107::emitSUSTx()
3026 {
3027 const TexInstruction *insn = this->insn->asTex();
3028
3029 emitInsn(0xeb200000);
3030 if (insn->op == OP_SUSTB)
3031 emitField(0x34, 1, 1);
3032 emitSUTarget();
3033
3034 emitLDSTc(0x18);
3035 emitField(0x14, 4, 0xf); // rgba
3036 emitGPR (0x08, insn->src(0));
3037 emitGPR (0x00, insn->src(1));
3038
3039 emitSUHandle(2);
3040 }
3041
3042 void
3043 CodeEmitterGM107::emitSULDx()
3044 {
3045 const TexInstruction *insn = this->insn->asTex();
3046 int type = 0;
3047
3048 emitInsn(0xeb000000);
3049 if (insn->op == OP_SULDB)
3050 emitField(0x34, 1, 1);
3051 emitSUTarget();
3052
3053 switch (insn->dType) {
3054 case TYPE_S8: type = 1; break;
3055 case TYPE_U16: type = 2; break;
3056 case TYPE_S16: type = 3; break;
3057 case TYPE_U32: type = 4; break;
3058 case TYPE_U64: type = 5; break;
3059 case TYPE_B128: type = 6; break;
3060 default:
3061 assert(insn->dType == TYPE_U8);
3062 break;
3063 }
3064 emitLDSTc(0x18);
3065 emitField(0x14, 3, type);
3066 emitGPR (0x00, insn->def(0));
3067 emitGPR (0x08, insn->src(0));
3068
3069 emitSUHandle(1);
3070 }
3071
3072 void
3073 CodeEmitterGM107::emitSUREDx()
3074 {
3075 const TexInstruction *insn = this->insn->asTex();
3076 uint8_t type = 0, subOp;
3077
3078 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS)
3079 emitInsn(0xeac00000);
3080 else
3081 emitInsn(0xea600000);
3082
3083 if (insn->op == OP_SUREDB)
3084 emitField(0x34, 1, 1);
3085 emitSUTarget();
3086
3087 // destination type
3088 switch (insn->dType) {
3089 case TYPE_S32: type = 1; break;
3090 case TYPE_U64: type = 2; break;
3091 case TYPE_F32: type = 3; break;
3092 case TYPE_S64: type = 5; break;
3093 default:
3094 assert(insn->dType == TYPE_U32);
3095 break;
3096 }
3097
3098 // atomic operation
3099 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
3100 subOp = 0;
3101 } else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
3102 subOp = 8;
3103 } else {
3104 subOp = insn->subOp;
3105 }
3106
3107 emitField(0x24, 3, type);
3108 emitField(0x1d, 4, subOp);
3109 emitGPR (0x14, insn->src(1));
3110 emitGPR (0x08, insn->src(0));
3111 emitGPR (0x00, insn->def(0));
3112
3113 emitSUHandle(2);
3114 }
3115
3116 /*******************************************************************************
3117 * assembler front-end
3118 ******************************************************************************/
3119
3120 bool
3121 CodeEmitterGM107::emitInstruction(Instruction *i)
3122 {
3123 const unsigned int size = (writeIssueDelays && !(codeSize & 0x1f)) ? 16 : 8;
3124 bool ret = true;
3125
3126 insn = i;
3127
3128 if (insn->encSize != 8) {
3129 ERROR("skipping undecodable instruction: "); insn->print();
3130 return false;
3131 } else
3132 if (codeSize + size > codeSizeLimit) {
3133 ERROR("code emitter output buffer too small\n");
3134 return false;
3135 }
3136
3137 if (writeIssueDelays) {
3138 int n = ((codeSize & 0x1f) / 8) - 1;
3139 if (n < 0) {
3140 data = code;
3141 data[0] = 0x00000000;
3142 data[1] = 0x00000000;
3143 code += 2;
3144 codeSize += 8;
3145 n++;
3146 }
3147
3148 emitField(data, n * 21, 21, insn->sched);
3149 }
3150
3151 switch (insn->op) {
3152 case OP_EXIT:
3153 emitEXIT();
3154 break;
3155 case OP_BRA:
3156 emitBRA();
3157 break;
3158 case OP_CALL:
3159 emitCAL();
3160 break;
3161 case OP_PRECONT:
3162 emitPCNT();
3163 break;
3164 case OP_CONT:
3165 emitCONT();
3166 break;
3167 case OP_PREBREAK:
3168 emitPBK();
3169 break;
3170 case OP_BREAK:
3171 emitBRK();
3172 break;
3173 case OP_PRERET:
3174 emitPRET();
3175 break;
3176 case OP_RET:
3177 emitRET();
3178 break;
3179 case OP_JOINAT:
3180 emitSSY();
3181 break;
3182 case OP_JOIN:
3183 emitSYNC();
3184 break;
3185 case OP_QUADON:
3186 emitSAM();
3187 break;
3188 case OP_QUADPOP:
3189 emitRAM();
3190 break;
3191 case OP_MOV:
3192 emitMOV();
3193 break;
3194 case OP_RDSV:
3195 emitS2R();
3196 break;
3197 case OP_ABS:
3198 case OP_NEG:
3199 case OP_SAT:
3200 case OP_FLOOR:
3201 case OP_CEIL:
3202 case OP_TRUNC:
3203 case OP_CVT:
3204 if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
3205 insn->src(0).getFile() == FILE_PREDICATE)) {
3206 emitMOV();
3207 } else if (isFloatType(insn->dType)) {
3208 if (isFloatType(insn->sType))
3209 emitF2F();
3210 else
3211 emitI2F();
3212 } else {
3213 if (isFloatType(insn->sType))
3214 emitF2I();
3215 else
3216 emitI2I();
3217 }
3218 break;
3219 case OP_SHFL:
3220 emitSHFL();
3221 break;
3222 case OP_ADD:
3223 case OP_SUB:
3224 if (isFloatType(insn->dType)) {
3225 if (insn->dType == TYPE_F64)
3226 emitDADD();
3227 else
3228 emitFADD();
3229 } else {
3230 emitIADD();
3231 }
3232 break;
3233 case OP_MUL:
3234 if (isFloatType(insn->dType)) {
3235 if (insn->dType == TYPE_F64)
3236 emitDMUL();
3237 else
3238 emitFMUL();
3239 } else {
3240 emitIMUL();
3241 }
3242 break;
3243 case OP_MAD:
3244 case OP_FMA:
3245 if (isFloatType(insn->dType)) {
3246 if (insn->dType == TYPE_F64)
3247 emitDFMA();
3248 else
3249 emitFFMA();
3250 } else {
3251 emitIMAD();
3252 }
3253 break;
3254 case OP_SHLADD:
3255 emitISCADD();
3256 break;
3257 case OP_MIN:
3258 case OP_MAX:
3259 if (isFloatType(insn->dType)) {
3260 if (insn->dType == TYPE_F64)
3261 emitDMNMX();
3262 else
3263 emitFMNMX();
3264 } else {
3265 emitIMNMX();
3266 }
3267 break;
3268 case OP_SHL:
3269 if (typeSizeof(insn->sType) == 8)
3270 emitSHF();
3271 else
3272 emitSHL();
3273 break;
3274 case OP_SHR:
3275 if (typeSizeof(insn->sType) == 8)
3276 emitSHF();
3277 else
3278 emitSHR();
3279 break;
3280 case OP_POPCNT:
3281 emitPOPC();
3282 break;
3283 case OP_INSBF:
3284 emitBFI();
3285 break;
3286 case OP_EXTBF:
3287 emitBFE();
3288 break;
3289 case OP_BFIND:
3290 emitFLO();
3291 break;
3292 case OP_SLCT:
3293 if (isFloatType(insn->dType))
3294 emitFCMP();
3295 else
3296 emitICMP();
3297 break;
3298 case OP_SET:
3299 case OP_SET_AND:
3300 case OP_SET_OR:
3301 case OP_SET_XOR:
3302 if (insn->def(0).getFile() != FILE_PREDICATE) {
3303 if (isFloatType(insn->sType))
3304 if (insn->sType == TYPE_F64)
3305 emitDSET();
3306 else
3307 emitFSET();
3308 else
3309 emitISET();
3310 } else {
3311 if (isFloatType(insn->sType))
3312 if (insn->sType == TYPE_F64)
3313 emitDSETP();
3314 else
3315 emitFSETP();
3316 else
3317 emitISETP();
3318 }
3319 break;
3320 case OP_SELP:
3321 emitSEL();
3322 break;
3323 case OP_PRESIN:
3324 case OP_PREEX2:
3325 emitRRO();
3326 break;
3327 case OP_COS:
3328 case OP_SIN:
3329 case OP_EX2:
3330 case OP_LG2:
3331 case OP_RCP:
3332 case OP_RSQ:
3333 emitMUFU();
3334 break;
3335 case OP_AND:
3336 case OP_OR:
3337 case OP_XOR:
3338 emitLOP();
3339 break;
3340 case OP_NOT:
3341 emitNOT();
3342 break;
3343 case OP_LOAD:
3344 switch (insn->src(0).getFile()) {
3345 case FILE_MEMORY_CONST : emitLDC(); break;
3346 case FILE_MEMORY_LOCAL : emitLDL(); break;
3347 case FILE_MEMORY_SHARED: emitLDS(); break;
3348 case FILE_MEMORY_GLOBAL: emitLD(); break;
3349 default:
3350 assert(!"invalid load");
3351 emitNOP();
3352 break;
3353 }
3354 break;
3355 case OP_STORE:
3356 switch (insn->src(0).getFile()) {
3357 case FILE_MEMORY_LOCAL : emitSTL(); break;
3358 case FILE_MEMORY_SHARED: emitSTS(); break;
3359 case FILE_MEMORY_GLOBAL: emitST(); break;
3360 default:
3361 assert(!"invalid store");
3362 emitNOP();
3363 break;
3364 }
3365 break;
3366 case OP_ATOM:
3367 if (insn->src(0).getFile() == FILE_MEMORY_SHARED)
3368 emitATOMS();
3369 else
3370 if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS)
3371 emitRED();
3372 else
3373 emitATOM();
3374 break;
3375 case OP_CCTL:
3376 emitCCTL();
3377 break;
3378 case OP_VFETCH:
3379 emitALD();
3380 break;
3381 case OP_EXPORT:
3382 emitAST();
3383 break;
3384 case OP_PFETCH:
3385 emitISBERD();
3386 break;
3387 case OP_AFETCH:
3388 emitAL2P();
3389 break;
3390 case OP_LINTERP:
3391 case OP_PINTERP:
3392 emitIPA();
3393 break;
3394 case OP_PIXLD:
3395 emitPIXLD();
3396 break;
3397 case OP_TEX:
3398 case OP_TXB:
3399 case OP_TXL:
3400 emitTEX();
3401 break;
3402 case OP_TXF:
3403 emitTLD();
3404 break;
3405 case OP_TXG:
3406 emitTLD4();
3407 break;
3408 case OP_TXD:
3409 emitTXD();
3410 break;
3411 case OP_TXQ:
3412 emitTXQ();
3413 break;
3414 case OP_TXLQ:
3415 emitTMML();
3416 break;
3417 case OP_TEXBAR:
3418 emitDEPBAR();
3419 break;
3420 case OP_QUADOP:
3421 emitFSWZADD();
3422 break;
3423 case OP_NOP:
3424 emitNOP();
3425 break;
3426 case OP_DISCARD:
3427 emitKIL();
3428 break;
3429 case OP_EMIT:
3430 case OP_RESTART:
3431 emitOUT();
3432 break;
3433 case OP_BAR:
3434 emitBAR();
3435 break;
3436 case OP_MEMBAR:
3437 emitMEMBAR();
3438 break;
3439 case OP_VOTE:
3440 emitVOTE();
3441 break;
3442 case OP_SUSTB:
3443 case OP_SUSTP:
3444 emitSUSTx();
3445 break;
3446 case OP_SULDB:
3447 case OP_SULDP:
3448 emitSULDx();
3449 break;
3450 case OP_SUREDB:
3451 case OP_SUREDP:
3452 emitSUREDx();
3453 break;
3454 default:
3455 assert(!"invalid opcode");
3456 emitNOP();
3457 ret = false;
3458 break;
3459 }
3460
3461 if (insn->join) {
3462 /*XXX*/
3463 }
3464
3465 code += 2;
3466 codeSize += 8;
3467 return ret;
3468 }
3469
3470 uint32_t
3471 CodeEmitterGM107::getMinEncodingSize(const Instruction *i) const
3472 {
3473 return 8;
3474 }
3475
3476 /*******************************************************************************
3477 * sched data calculator
3478 ******************************************************************************/
3479
3480 class SchedDataCalculatorGM107 : public Pass
3481 {
3482 public:
3483 SchedDataCalculatorGM107(const TargetGM107 *targ) : targ(targ) {}
3484
3485 private:
3486 struct RegScores
3487 {
3488 struct ScoreData {
3489 int r[256];
3490 int p[8];
3491 int c;
3492 } rd, wr;
3493 int base;
3494
3495 void rebase(const int base)
3496 {
3497 const int delta = this->base - base;
3498 if (!delta)
3499 return;
3500 this->base = 0;
3501
3502 for (int i = 0; i < 256; ++i) {
3503 rd.r[i] += delta;
3504 wr.r[i] += delta;
3505 }
3506 for (int i = 0; i < 8; ++i) {
3507 rd.p[i] += delta;
3508 wr.p[i] += delta;
3509 }
3510 rd.c += delta;
3511 wr.c += delta;
3512 }
3513 void wipe()
3514 {
3515 memset(&rd, 0, sizeof(rd));
3516 memset(&wr, 0, sizeof(wr));
3517 }
3518 int getLatest(const ScoreData& d) const
3519 {
3520 int max = 0;
3521 for (int i = 0; i < 256; ++i)
3522 if (d.r[i] > max)
3523 max = d.r[i];
3524 for (int i = 0; i < 8; ++i)
3525 if (d.p[i] > max)
3526 max = d.p[i];
3527 if (d.c > max)
3528 max = d.c;
3529 return max;
3530 }
3531 inline int getLatestRd() const
3532 {
3533 return getLatest(rd);
3534 }
3535 inline int getLatestWr() const
3536 {
3537 return getLatest(wr);
3538 }
3539 inline int getLatest() const
3540 {
3541 return MAX2(getLatestRd(), getLatestWr());
3542 }
3543 void setMax(const RegScores *that)
3544 {
3545 for (int i = 0; i < 256; ++i) {
3546 rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
3547 wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
3548 }
3549 for (int i = 0; i < 8; ++i) {
3550 rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
3551 wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
3552 }
3553 rd.c = MAX2(rd.c, that->rd.c);
3554 wr.c = MAX2(wr.c, that->wr.c);
3555 }
3556 void print(int cycle)
3557 {
3558 for (int i = 0; i < 256; ++i) {
3559 if (rd.r[i] > cycle)
3560 INFO("rd $r%i @ %i\n", i, rd.r[i]);
3561 if (wr.r[i] > cycle)
3562 INFO("wr $r%i @ %i\n", i, wr.r[i]);
3563 }
3564 for (int i = 0; i < 8; ++i) {
3565 if (rd.p[i] > cycle)
3566 INFO("rd $p%i @ %i\n", i, rd.p[i]);
3567 if (wr.p[i] > cycle)
3568 INFO("wr $p%i @ %i\n", i, wr.p[i]);
3569 }
3570 if (rd.c > cycle)
3571 INFO("rd $c @ %i\n", rd.c);
3572 if (wr.c > cycle)
3573 INFO("wr $c @ %i\n", wr.c);
3574 }
3575 };
3576
3577 RegScores *score; // for current BB
3578 std::vector<RegScores> scoreBoards;
3579
3580 const TargetGM107 *targ;
3581 bool visit(Function *);
3582 bool visit(BasicBlock *);
3583
3584 void commitInsn(const Instruction *, int);
3585 int calcDelay(const Instruction *, int) const;
3586 void setDelay(Instruction *, int, const Instruction *);
3587 void recordWr(const Value *, int, int);
3588 void checkRd(const Value *, int, int&) const;
3589
3590 inline void emitYield(Instruction *);
3591 inline void emitStall(Instruction *, uint8_t);
3592 inline void emitReuse(Instruction *, uint8_t);
3593 inline void emitWrDepBar(Instruction *, uint8_t);
3594 inline void emitRdDepBar(Instruction *, uint8_t);
3595 inline void emitWtDepBar(Instruction *, uint8_t);
3596
3597 inline int getStall(const Instruction *) const;
3598 inline int getWrDepBar(const Instruction *) const;
3599 inline int getRdDepBar(const Instruction *) const;
3600 inline int getWtDepBar(const Instruction *) const;
3601
3602 void setReuseFlag(Instruction *);
3603
3604 inline void printSchedInfo(int, const Instruction *) const;
3605
3606 struct LiveBarUse {
3607 LiveBarUse(Instruction *insn, Instruction *usei)
3608 : insn(insn), usei(usei) { }
3609 Instruction *insn;
3610 Instruction *usei;
3611 };
3612
3613 struct LiveBarDef {
3614 LiveBarDef(Instruction *insn, Instruction *defi)
3615 : insn(insn), defi(defi) { }
3616 Instruction *insn;
3617 Instruction *defi;
3618 };
3619
3620 bool insertBarriers(BasicBlock *);
3621
3622 Instruction *findFirstUse(const Instruction *) const;
3623 Instruction *findFirstDef(const Instruction *) const;
3624
3625 bool needRdDepBar(const Instruction *) const;
3626 bool needWrDepBar(const Instruction *) const;
3627 };
3628
3629 inline void
3630 SchedDataCalculatorGM107::emitStall(Instruction *insn, uint8_t cnt)
3631 {
3632 assert(cnt < 16);
3633 insn->sched |= cnt;
3634 }
3635
3636 inline void
3637 SchedDataCalculatorGM107::emitYield(Instruction *insn)
3638 {
3639 insn->sched |= 1 << 4;
3640 }
3641
3642 inline void
3643 SchedDataCalculatorGM107::emitWrDepBar(Instruction *insn, uint8_t id)
3644 {
3645 assert(id < 6);
3646 if ((insn->sched & 0xe0) == 0xe0)
3647 insn->sched ^= 0xe0;
3648 insn->sched |= id << 5;
3649 }
3650
3651 inline void
3652 SchedDataCalculatorGM107::emitRdDepBar(Instruction *insn, uint8_t id)
3653 {
3654 assert(id < 6);
3655 if ((insn->sched & 0x700) == 0x700)
3656 insn->sched ^= 0x700;
3657 insn->sched |= id << 8;
3658 }
3659
3660 inline void
3661 SchedDataCalculatorGM107::emitWtDepBar(Instruction *insn, uint8_t id)
3662 {
3663 assert(id < 6);
3664 insn->sched |= 1 << (11 + id);
3665 }
3666
3667 inline void
3668 SchedDataCalculatorGM107::emitReuse(Instruction *insn, uint8_t id)
3669 {
3670 assert(id < 4);
3671 insn->sched |= 1 << (17 + id);
3672 }
3673
3674 inline void
3675 SchedDataCalculatorGM107::printSchedInfo(int cycle,
3676 const Instruction *insn) const
3677 {
3678 uint8_t st, yl, wr, rd, wt, ru;
3679
3680 st = (insn->sched & 0x00000f) >> 0;
3681 yl = (insn->sched & 0x000010) >> 4;
3682 wr = (insn->sched & 0x0000e0) >> 5;
3683 rd = (insn->sched & 0x000700) >> 8;
3684 wt = (insn->sched & 0x01f800) >> 11;
3685 ru = (insn->sched & 0x1e0000) >> 17;
3686
3687 INFO("cycle %i, (st 0x%x, yl 0x%x, wr 0x%x, rd 0x%x, wt 0x%x, ru 0x%x)\n",
3688 cycle, st, yl, wr, rd, wt, ru);
3689 }
3690
3691 inline int
3692 SchedDataCalculatorGM107::getStall(const Instruction *insn) const
3693 {
3694 return insn->sched & 0xf;
3695 }
3696
3697 inline int
3698 SchedDataCalculatorGM107::getWrDepBar(const Instruction *insn) const
3699 {
3700 return (insn->sched & 0x0000e0) >> 5;
3701 }
3702
3703 inline int
3704 SchedDataCalculatorGM107::getRdDepBar(const Instruction *insn) const
3705 {
3706 return (insn->sched & 0x000700) >> 8;
3707 }
3708
3709 inline int
3710 SchedDataCalculatorGM107::getWtDepBar(const Instruction *insn) const
3711 {
3712 return (insn->sched & 0x01f800) >> 11;
3713 }
3714
3715 // Emit the reuse flag which allows to make use of the new memory hierarchy
3716 // introduced since Maxwell, the operand reuse cache.
3717 //
3718 // It allows to reduce bank conflicts by caching operands. Each time you issue
3719 // an instruction, that flag can tell the hw which operands are going to be
3720 // re-used by the next instruction. Note that the next instruction has to use
3721 // the same GPR id in the same operand slot.
3722 void
3723 SchedDataCalculatorGM107::setReuseFlag(Instruction *insn)
3724 {
3725 Instruction *next = insn->next;
3726 BitSet defs(255, 1);
3727
3728 if (!targ->isReuseSupported(insn))
3729 return;
3730
3731 for (int d = 0; insn->defExists(d); ++d) {
3732 const Value *def = insn->def(d).rep();
3733 if (insn->def(d).getFile() != FILE_GPR)
3734 continue;
3735 if (typeSizeof(insn->dType) != 4 || def->reg.data.id == 255)
3736 continue;
3737 defs.set(def->reg.data.id);
3738 }
3739
3740 for (int s = 0; insn->srcExists(s); s++) {
3741 const Value *src = insn->src(s).rep();
3742 if (insn->src(s).getFile() != FILE_GPR)
3743 continue;
3744 if (typeSizeof(insn->sType) != 4 || src->reg.data.id == 255)
3745 continue;
3746 if (defs.test(src->reg.data.id))
3747 continue;
3748 if (!next->srcExists(s) || next->src(s).getFile() != FILE_GPR)
3749 continue;
3750 if (src->reg.data.id != next->getSrc(s)->reg.data.id)
3751 continue;
3752 assert(s < 4);
3753 emitReuse(insn, s);
3754 }
3755 }
3756
3757 void
3758 SchedDataCalculatorGM107::recordWr(const Value *v, int cycle, int ready)
3759 {
3760 int a = v->reg.data.id, b;
3761
3762 switch (v->reg.file) {
3763 case FILE_GPR:
3764 b = a + v->reg.size / 4;
3765 for (int r = a; r < b; ++r)
3766 score->rd.r[r] = ready;
3767 break;
3768 case FILE_PREDICATE:
3769 // To immediately use a predicate set by any instructions, the minimum
3770 // number of stall counts is 13.
3771 score->rd.p[a] = cycle + 13;
3772 break;
3773 case FILE_FLAGS:
3774 score->rd.c = ready;
3775 break;
3776 default:
3777 break;
3778 }
3779 }
3780
3781 void
3782 SchedDataCalculatorGM107::checkRd(const Value *v, int cycle, int &delay) const
3783 {
3784 int a = v->reg.data.id, b;
3785 int ready = cycle;
3786
3787 switch (v->reg.file) {
3788 case FILE_GPR:
3789 b = a + v->reg.size / 4;
3790 for (int r = a; r < b; ++r)
3791 ready = MAX2(ready, score->rd.r[r]);
3792 break;
3793 case FILE_PREDICATE:
3794 ready = MAX2(ready, score->rd.p[a]);
3795 break;
3796 case FILE_FLAGS:
3797 ready = MAX2(ready, score->rd.c);
3798 break;
3799 default:
3800 break;
3801 }
3802 if (cycle < ready)
3803 delay = MAX2(delay, ready - cycle);
3804 }
3805
3806 void
3807 SchedDataCalculatorGM107::commitInsn(const Instruction *insn, int cycle)
3808 {
3809 const int ready = cycle + targ->getLatency(insn);
3810
3811 for (int d = 0; insn->defExists(d); ++d)
3812 recordWr(insn->getDef(d), cycle, ready);
3813
3814 #ifdef GM107_DEBUG_SCHED_DATA
3815 score->print(cycle);
3816 #endif
3817 }
3818
3819 #define GM107_MIN_ISSUE_DELAY 0x1
3820 #define GM107_MAX_ISSUE_DELAY 0xf
3821
3822 int
3823 SchedDataCalculatorGM107::calcDelay(const Instruction *insn, int cycle) const
3824 {
3825 int delay = 0, ready = cycle;
3826
3827 for (int s = 0; insn->srcExists(s); ++s)
3828 checkRd(insn->getSrc(s), cycle, delay);
3829
3830 // TODO: make use of getReadLatency()!
3831
3832 return MAX2(delay, ready - cycle);
3833 }
3834
3835 void
3836 SchedDataCalculatorGM107::setDelay(Instruction *insn, int delay,
3837 const Instruction *next)
3838 {
3839 const OpClass cl = targ->getOpClass(insn->op);
3840 int wr, rd;
3841
3842 if (insn->op == OP_EXIT ||
3843 insn->op == OP_BAR ||
3844 insn->op == OP_MEMBAR) {
3845 delay = GM107_MAX_ISSUE_DELAY;
3846 } else
3847 if (insn->op == OP_QUADON ||
3848 insn->op == OP_QUADPOP) {
3849 delay = 0xd;
3850 } else
3851 if (cl == OPCLASS_FLOW || insn->join) {
3852 delay = 0xd;
3853 }
3854
3855 if (!next || !targ->canDualIssue(insn, next)) {
3856 delay = CLAMP(delay, GM107_MIN_ISSUE_DELAY, GM107_MAX_ISSUE_DELAY);
3857 } else {
3858 delay = 0x0; // dual-issue
3859 }
3860
3861 wr = getWrDepBar(insn);
3862 rd = getRdDepBar(insn);
3863
3864 if (delay == GM107_MIN_ISSUE_DELAY && (wr & rd) != 7) {
3865 // Barriers take one additional clock cycle to become active on top of
3866 // the clock consumed by the instruction producing it.
3867 if (!next || insn->bb != next->bb) {
3868 delay = 0x2;
3869 } else {
3870 int wt = getWtDepBar(next);
3871 if ((wt & (1 << wr)) | (wt & (1 << rd)))
3872 delay = 0x2;
3873 }
3874 }
3875
3876 emitStall(insn, delay);
3877 }
3878
3879
3880 // Return true when the given instruction needs to emit a read dependency
3881 // barrier (for WaR hazards) because it doesn't operate at a fixed latency, and
3882 // setting the maximum number of stall counts is not enough.
3883 bool
3884 SchedDataCalculatorGM107::needRdDepBar(const Instruction *insn) const
3885 {
3886 BitSet srcs(255, 1), defs(255, 1);
3887 int a, b;
3888
3889 if (!targ->isBarrierRequired(insn))
3890 return false;
3891
3892 // Do not emit a read dependency barrier when the instruction doesn't use
3893 // any GPR (like st s[0x4] 0x0) as input because it's unnecessary.
3894 for (int s = 0; insn->srcExists(s); ++s) {
3895 const Value *src = insn->src(s).rep();
3896 if (insn->src(s).getFile() != FILE_GPR)
3897 continue;
3898 if (src->reg.data.id == 255)
3899 continue;
3900
3901 a = src->reg.data.id;
3902 b = a + src->reg.size / 4;
3903 for (int r = a; r < b; ++r)
3904 srcs.set(r);
3905 }
3906
3907 if (!srcs.popCount())
3908 return false;
3909
3910 // Do not emit a read dependency barrier when the output GPRs are equal to
3911 // the input GPRs (like rcp $r0 $r0) because a write dependency barrier will
3912 // be produced and WaR hazards are prevented.
3913 for (int d = 0; insn->defExists(d); ++d) {
3914 const Value *def = insn->def(d).rep();
3915 if (insn->def(d).getFile() != FILE_GPR)
3916 continue;
3917 if (def->reg.data.id == 255)
3918 continue;
3919
3920 a = def->reg.data.id;
3921 b = a + def->reg.size / 4;
3922 for (int r = a; r < b; ++r)
3923 defs.set(r);
3924 }
3925
3926 srcs.andNot(defs);
3927 if (!srcs.popCount())
3928 return false;
3929
3930 return true;
3931 }
3932
3933 // Return true when the given instruction needs to emit a write dependency
3934 // barrier (for RaW hazards) because it doesn't operate at a fixed latency, and
3935 // setting the maximum number of stall counts is not enough. This is only legal
3936 // if the instruction output something.
3937 bool
3938 SchedDataCalculatorGM107::needWrDepBar(const Instruction *insn) const
3939 {
3940 if (!targ->isBarrierRequired(insn))
3941 return false;
3942
3943 for (int d = 0; insn->defExists(d); ++d) {
3944 if (insn->def(d).getFile() == FILE_GPR ||
3945 insn->def(d).getFile() == FILE_PREDICATE)
3946 return true;
3947 }
3948 return false;
3949 }
3950
3951 // Find the next instruction inside the same basic block which uses the output
3952 // of the given instruction in order to avoid RaW hazards.
3953 Instruction *
3954 SchedDataCalculatorGM107::findFirstUse(const Instruction *bari) const
3955 {
3956 Instruction *insn, *next;
3957 int minGPR, maxGPR;
3958
3959 if (!bari->defExists(0))
3960 return NULL;
3961
3962 minGPR = bari->def(0).rep()->reg.data.id;
3963 maxGPR = minGPR + bari->def(0).rep()->reg.size / 4 - 1;
3964
3965 for (insn = bari->next; insn != NULL; insn = next) {
3966 next = insn->next;
3967
3968 for (int s = 0; insn->srcExists(s); ++s) {
3969 const Value *src = insn->src(s).rep();
3970 if (bari->def(0).getFile() == FILE_GPR) {
3971 if (insn->src(s).getFile() != FILE_GPR ||
3972 src->reg.data.id + src->reg.size / 4 - 1 < minGPR ||
3973 src->reg.data.id > maxGPR)
3974 continue;
3975 return insn;
3976 } else
3977 if (bari->def(0).getFile() == FILE_PREDICATE) {
3978 if (insn->src(s).getFile() != FILE_PREDICATE ||
3979 src->reg.data.id != minGPR)
3980 continue;
3981 return insn;
3982 }
3983 }
3984 }
3985 return NULL;
3986 }
3987
3988 // Find the next instruction inside the same basic block which overwrites, at
3989 // least, one source of the given instruction in order to avoid WaR hazards.
3990 Instruction *
3991 SchedDataCalculatorGM107::findFirstDef(const Instruction *bari) const
3992 {
3993 Instruction *insn, *next;
3994 int minGPR, maxGPR;
3995
3996 for (insn = bari->next; insn != NULL; insn = next) {
3997 next = insn->next;
3998
3999 for (int d = 0; insn->defExists(d); ++d) {
4000 const Value *def = insn->def(d).rep();
4001 if (insn->def(d).getFile() != FILE_GPR)
4002 continue;
4003
4004 minGPR = def->reg.data.id;
4005 maxGPR = minGPR + def->reg.size / 4 - 1;
4006
4007 for (int s = 0; bari->srcExists(s); ++s) {
4008 const Value *src = bari->src(s).rep();
4009 if (bari->src(s).getFile() != FILE_GPR ||
4010 src->reg.data.id + src->reg.size / 4 - 1 < minGPR ||
4011 src->reg.data.id > maxGPR)
4012 continue;
4013 return insn;
4014 }
4015 }
4016 }
4017 return NULL;
4018 }
4019
4020 // Dependency barriers:
4021 // This pass is a bit ugly and could probably be improved by performing a
4022 // better allocation.
4023 //
4024 // The main idea is to avoid WaR and RaW hazards by emitting read/write
4025 // dependency barriers using the control codes.
4026 bool
4027 SchedDataCalculatorGM107::insertBarriers(BasicBlock *bb)
4028 {
4029 std::list<LiveBarUse> live_uses;
4030 std::list<LiveBarDef> live_defs;
4031 Instruction *insn, *next;
4032 BitSet bars(6, 1);
4033 int bar_id;
4034
4035 for (insn = bb->getEntry(); insn != NULL; insn = next) {
4036 Instruction *usei = NULL, *defi = NULL;
4037 bool need_wr_bar, need_rd_bar;
4038
4039 next = insn->next;
4040
4041 // Expire old barrier uses.
4042 for (std::list<LiveBarUse>::iterator it = live_uses.begin();
4043 it != live_uses.end();) {
4044 if (insn->serial >= it->usei->serial) {
4045 int wr = getWrDepBar(it->insn);
4046 emitWtDepBar(insn, wr);
4047 bars.clr(wr); // free barrier
4048 it = live_uses.erase(it);
4049 continue;
4050 }
4051 ++it;
4052 }
4053
4054 // Expire old barrier defs.
4055 for (std::list<LiveBarDef>::iterator it = live_defs.begin();
4056 it != live_defs.end();) {
4057 if (insn->serial >= it->defi->serial) {
4058 int rd = getRdDepBar(it->insn);
4059 emitWtDepBar(insn, rd);
4060 bars.clr(rd); // free barrier
4061 it = live_defs.erase(it);
4062 continue;
4063 }
4064 ++it;
4065 }
4066
4067 need_wr_bar = needWrDepBar(insn);
4068 need_rd_bar = needRdDepBar(insn);
4069
4070 if (need_wr_bar) {
4071 // When the instruction requires to emit a write dependency barrier
4072 // (all which write something at a variable latency), find the next
4073 // instruction which reads the outputs.
4074 usei = findFirstUse(insn);
4075
4076 // Allocate and emit a new barrier.
4077 bar_id = bars.findFreeRange(1);
4078 if (bar_id == -1)
4079 bar_id = 5;
4080 bars.set(bar_id);
4081 emitWrDepBar(insn, bar_id);
4082 if (usei)
4083 live_uses.push_back(LiveBarUse(insn, usei));
4084 }
4085
4086 if (need_rd_bar) {
4087 // When the instruction requires to emit a read dependency barrier
4088 // (all which read something at a variable latency), find the next
4089 // instruction which will write the inputs.
4090 defi = findFirstDef(insn);
4091
4092 if (usei && defi && usei->serial <= defi->serial)
4093 continue;
4094
4095 // Allocate and emit a new barrier.
4096 bar_id = bars.findFreeRange(1);
4097 if (bar_id == -1)
4098 bar_id = 5;
4099 bars.set(bar_id);
4100 emitRdDepBar(insn, bar_id);
4101 if (defi)
4102 live_defs.push_back(LiveBarDef(insn, defi));
4103 }
4104 }
4105
4106 // Remove unnecessary barrier waits.
4107 BitSet alive_bars(6, 1);
4108 for (insn = bb->getEntry(); insn != NULL; insn = next) {
4109 int wr, rd, wt;
4110
4111 next = insn->next;
4112
4113 wr = getWrDepBar(insn);
4114 rd = getRdDepBar(insn);
4115 wt = getWtDepBar(insn);
4116
4117 for (int idx = 0; idx < 6; ++idx) {
4118 if (!(wt & (1 << idx)))
4119 continue;
4120 if (!alive_bars.test(idx)) {
4121 insn->sched &= ~(1 << (11 + idx));
4122 } else {
4123 alive_bars.clr(idx);
4124 }
4125 }
4126
4127 if (wr < 6)
4128 alive_bars.set(wr);
4129 if (rd < 6)
4130 alive_bars.set(rd);
4131 }
4132
4133 return true;
4134 }
4135
4136 bool
4137 SchedDataCalculatorGM107::visit(Function *func)
4138 {
4139 ArrayList insns;
4140
4141 func->orderInstructions(insns);
4142
4143 scoreBoards.resize(func->cfg.getSize());
4144 for (size_t i = 0; i < scoreBoards.size(); ++i)
4145 scoreBoards[i].wipe();
4146 return true;
4147 }
4148
4149 bool
4150 SchedDataCalculatorGM107::visit(BasicBlock *bb)
4151 {
4152 Instruction *insn, *next = NULL;
4153 int cycle = 0;
4154
4155 for (Instruction *insn = bb->getEntry(); insn; insn = insn->next) {
4156 /*XXX*/
4157 insn->sched = 0x7e0;
4158 }
4159
4160 if (!debug_get_bool_option("NV50_PROG_SCHED", true))
4161 return true;
4162
4163 // Insert read/write dependency barriers for instructions which don't
4164 // operate at a fixed latency.
4165 insertBarriers(bb);
4166
4167 score = &scoreBoards.at(bb->getId());
4168
4169 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
4170 // back branches will wait until all target dependencies are satisfied
4171 if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
4172 continue;
4173 BasicBlock *in = BasicBlock::get(ei.getNode());
4174 score->setMax(&scoreBoards.at(in->getId()));
4175 }
4176
4177 #ifdef GM107_DEBUG_SCHED_DATA
4178 INFO("=== BB:%i initial scores\n", bb->getId());
4179 score->print(cycle);
4180 #endif
4181
4182 // Because barriers are allocated locally (intra-BB), we have to make sure
4183 // that all produced barriers have been consumed before entering inside a
4184 // new basic block. The best way is to do a global allocation pre RA but
4185 // it's really more difficult, especially because of the phi nodes. Anyways,
4186 // it seems like that waiting on a barrier which has already been consumed
4187 // doesn't add any additional cost, it's just not elegant!
4188 Instruction *start = bb->getEntry();
4189 if (start && bb->cfg.incidentCount() > 0) {
4190 for (int b = 0; b < 6; b++)
4191 emitWtDepBar(start, b);
4192 }
4193
4194 for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
4195 next = insn->next;
4196
4197 commitInsn(insn, cycle);
4198 int delay = calcDelay(next, cycle);
4199 setDelay(insn, delay, next);
4200 cycle += getStall(insn);
4201
4202 setReuseFlag(insn);
4203
4204 // XXX: The yield flag seems to destroy a bunch of things when it is
4205 // set on every instruction, need investigation.
4206 //emitYield(insn);
4207
4208 #ifdef GM107_DEBUG_SCHED_DATA
4209 printSchedInfo(cycle, insn);
4210 insn->print();
4211 next->print();
4212 #endif
4213 }
4214
4215 if (!insn)
4216 return true;
4217 commitInsn(insn, cycle);
4218
4219 int bbDelay = -1;
4220
4221 #ifdef GM107_DEBUG_SCHED_DATA
4222 fprintf(stderr, "last instruction is : ");
4223 insn->print();
4224 fprintf(stderr, "cycle=%d\n", cycle);
4225 #endif
4226
4227 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
4228 BasicBlock *out = BasicBlock::get(ei.getNode());
4229
4230 if (ei.getType() != Graph::Edge::BACK) {
4231 // Only test the first instruction of the outgoing block.
4232 next = out->getEntry();
4233 if (next) {
4234 bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
4235 } else {
4236 // When the outgoing BB is empty, make sure to set the number of
4237 // stall counts needed by the instruction because we don't know the
4238 // next instruction.
4239 bbDelay = MAX2(bbDelay, targ->getLatency(insn));
4240 }
4241 } else {
4242 // Wait until all dependencies are satisfied.
4243 const int regsFree = score->getLatest();
4244 next = out->getFirst();
4245 for (int c = cycle; next && c < regsFree; next = next->next) {
4246 bbDelay = MAX2(bbDelay, calcDelay(next, c));
4247 c += getStall(next);
4248 }
4249 next = NULL;
4250 }
4251 }
4252 if (bb->cfg.outgoingCount() != 1)
4253 next = NULL;
4254 setDelay(insn, bbDelay, next);
4255 cycle += getStall(insn);
4256
4257 score->rebase(cycle); // common base for initializing out blocks' scores
4258 return true;
4259 }
4260
4261 /*******************************************************************************
4262 * main
4263 ******************************************************************************/
4264
4265 void
4266 CodeEmitterGM107::prepareEmission(Function *func)
4267 {
4268 SchedDataCalculatorGM107 sched(targGM107);
4269 CodeEmitter::prepareEmission(func);
4270 sched.run(func, true, true);
4271 }
4272
4273 static inline uint32_t sizeToBundlesGM107(uint32_t size)
4274 {
4275 return (size + 23) / 24;
4276 }
4277
4278 void
4279 CodeEmitterGM107::prepareEmission(Program *prog)
4280 {
4281 for (ArrayList::Iterator fi = prog->allFuncs.iterator();
4282 !fi.end(); fi.next()) {
4283 Function *func = reinterpret_cast<Function *>(fi.get());
4284 func->binPos = prog->binSize;
4285 prepareEmission(func);
4286
4287 // adjust sizes & positions for schedulding info:
4288 if (prog->getTarget()->hasSWSched) {
4289 uint32_t adjPos = func->binPos;
4290 BasicBlock *bb = NULL;
4291 for (int i = 0; i < func->bbCount; ++i) {
4292 bb = func->bbArray[i];
4293 int32_t adjSize = bb->binSize;
4294 if (adjPos % 32) {
4295 adjSize -= 32 - adjPos % 32;
4296 if (adjSize < 0)
4297 adjSize = 0;
4298 }
4299 adjSize = bb->binSize + sizeToBundlesGM107(adjSize) * 8;
4300 bb->binPos = adjPos;
4301 bb->binSize = adjSize;
4302 adjPos += adjSize;
4303 }
4304 if (bb)
4305 func->binSize = adjPos - func->binPos;
4306 }
4307
4308 prog->binSize += func->binSize;
4309 }
4310 }
4311
4312 CodeEmitterGM107::CodeEmitterGM107(const TargetGM107 *target)
4313 : CodeEmitter(target),
4314 targGM107(target),
4315 writeIssueDelays(target->hasSWSched)
4316 {
4317 code = NULL;
4318 codeSize = codeSizeLimit = 0;
4319 relocInfo = NULL;
4320 }
4321
4322 CodeEmitter *
4323 TargetGM107::createCodeEmitterGM107(Program::Type type)
4324 {
4325 CodeEmitterGM107 *emit = new CodeEmitterGM107(this);
4326 emit->setProgramType(type);
4327 return emit;
4328 }
4329
4330 } // namespace nv50_ir