nv50/ir/opt: Improve modifier handling.
[mesa.git] / src / gallium / drivers / nv50 / codegen / nv50_ir.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "nv50_ir.h"
24 #include "nv50_ir_target.h"
25 #include "nv50_ir_driver.h"
26
27 extern "C" {
28 #include "nv50/nv50_program.h"
29 #include "nv50/nv50_debug.h"
30 }
31
32 namespace nv50_ir {
33
34 Modifier::Modifier(operation op)
35 {
36 switch (op) {
37 case OP_NEG: bits = NV50_IR_MOD_NEG; break;
38 case OP_ABS: bits = NV50_IR_MOD_ABS; break;
39 case OP_SAT: bits = NV50_IR_MOD_SAT; break;
40 case OP_NOT: bits = NV50_IR_MOD_NOT; break;
41 default:
42 bits = 0;
43 break;
44 }
45 }
46
47 Modifier Modifier::operator*(const Modifier m) const
48 {
49 unsigned int a, b, c;
50
51 b = m.bits;
52 if (this->bits & NV50_IR_MOD_ABS)
53 b &= ~NV50_IR_MOD_NEG;
54
55 a = (this->bits ^ b) & (NV50_IR_MOD_NOT | NV50_IR_MOD_NEG);
56 c = (this->bits | m.bits) & (NV50_IR_MOD_ABS | NV50_IR_MOD_SAT);
57
58 return Modifier(a | c);
59 }
60
61 ValueRef::ValueRef(Value *v) : value(NULL), insn(NULL)
62 {
63 indirect[0] = -1;
64 indirect[1] = -1;
65 usedAsPtr = false;
66 set(v);
67 }
68
69 ValueRef::ValueRef(const ValueRef& ref) : value(NULL), insn(ref.insn)
70 {
71 set(ref);
72 usedAsPtr = ref.usedAsPtr;
73 }
74
75 ValueRef::~ValueRef()
76 {
77 this->set(NULL);
78 }
79
80 ImmediateValue *ValueRef::getImmediate() const
81 {
82 Value *src = value;
83
84 while (src) {
85 if (src->reg.file == FILE_IMMEDIATE)
86 return src->asImm();
87
88 Instruction *insn = src->getUniqueInsn();
89
90 src = (insn && insn->op == OP_MOV) ? insn->getSrc(0) : NULL;
91 }
92 return NULL;
93 }
94
95 ValueDef::ValueDef(Value *v) : value(NULL), insn(NULL)
96 {
97 set(v);
98 }
99
100 ValueDef::ValueDef(const ValueDef& def) : value(NULL), insn(NULL)
101 {
102 set(def.get());
103 }
104
105 ValueDef::~ValueDef()
106 {
107 this->set(NULL);
108 }
109
110 void
111 ValueRef::set(const ValueRef &ref)
112 {
113 this->set(ref.get());
114 mod = ref.mod;
115 indirect[0] = ref.indirect[0];
116 indirect[1] = ref.indirect[1];
117 }
118
119 void
120 ValueRef::set(Value *refVal)
121 {
122 if (value == refVal)
123 return;
124 if (value)
125 value->uses.remove(this);
126 if (refVal)
127 refVal->uses.push_back(this);
128
129 value = refVal;
130 }
131
132 void
133 ValueDef::set(Value *defVal)
134 {
135 if (value == defVal)
136 return;
137 if (value)
138 value->defs.remove(this);
139 if (defVal)
140 defVal->defs.push_back(this);
141
142 value = defVal;
143 }
144
145 // Check if we can replace this definition's value by the value in @rep,
146 // including the source modifiers, i.e. make sure that all uses support
147 // @rep.mod.
148 bool
149 ValueDef::mayReplace(const ValueRef &rep)
150 {
151 if (!rep.mod)
152 return true;
153
154 if (!insn || !insn->bb) // Unbound instruction ?
155 return false;
156
157 const Target *target = insn->bb->getProgram()->getTarget();
158
159 for (Value::UseIterator it = value->uses.begin(); it != value->uses.end();
160 ++it) {
161 Instruction *insn = (*it)->getInsn();
162 int s = -1;
163
164 for (int i = 0; insn->srcExists(i); ++i) {
165 if (insn->src(i).get() == value) {
166 // If there are multiple references to us we'd have to check if the
167 // combination of mods is still supported, but just bail for now.
168 if (&insn->src(i) != (*it))
169 return false;
170 s = i;
171 }
172 }
173 assert(s >= 0); // integrity of uses list
174
175 if (!target->isModSupported(insn, s, rep.mod))
176 return false;
177 }
178 return true;
179 }
180
181 void
182 ValueDef::replace(const ValueRef &repVal, bool doSet)
183 {
184 assert(mayReplace(repVal));
185
186 if (value == repVal.get())
187 return;
188
189 while (!value->uses.empty()) {
190 ValueRef *ref = value->uses.front();
191 ref->set(repVal.get());
192 ref->mod *= repVal.mod;
193 }
194
195 if (doSet)
196 set(repVal.get());
197 }
198
199 Value::Value()
200 {
201 join = this;
202 memset(&reg, 0, sizeof(reg));
203 reg.size = 4;
204 }
205
206 bool
207 Value::coalesce(Value *jval, bool force)
208 {
209 Value *repr = this->join; // new representative
210 Value *jrep = jval->join;
211
212 if (reg.file != jval->reg.file || reg.size != jval->reg.size) {
213 if (!force)
214 return false;
215 ERROR("forced coalescing of values of different sizes/files");
216 }
217
218 if (!force && (repr->reg.data.id != jrep->reg.data.id)) {
219 if (repr->reg.data.id >= 0 &&
220 jrep->reg.data.id >= 0)
221 return false;
222 if (jrep->reg.data.id >= 0) {
223 repr = jval->join;
224 jrep = this->join;
225 jval = this;
226 }
227
228 // need to check all fixed register values of the program for overlap
229 Function *func = defs.front()->getInsn()->bb->getFunction();
230
231 // TODO: put values in by register-id bins per function
232 ArrayList::Iterator iter = func->allLValues.iterator();
233 for (; !iter.end(); iter.next()) {
234 Value *fixed = reinterpret_cast<Value *>(iter.get());
235 assert(fixed);
236 if (fixed->reg.data.id == repr->reg.data.id)
237 if (fixed->livei.overlaps(jrep->livei))
238 return false;
239 }
240 }
241 if (repr->livei.overlaps(jrep->livei)) {
242 if (!force)
243 return false;
244 // do we really want this ? if at all, only for constraint ops
245 INFO("NOTE: forced coalescing with live range overlap\n");
246 }
247
248 for (DefIterator it = jrep->defs.begin(); it != jrep->defs.end(); ++it)
249 (*it)->get()->join = repr;
250
251 repr->defs.insert(repr->defs.end(),
252 jrep->defs.begin(), jrep->defs.end());
253 repr->livei.unify(jrep->livei);
254
255 assert(repr->join == repr && jval->join == repr);
256 return true;
257 }
258
259 LValue::LValue(Function *fn, DataFile file)
260 {
261 reg.file = file;
262 reg.size = (file != FILE_PREDICATE) ? 4 : 1;
263 reg.data.id = -1;
264
265 affinity = -1;
266
267 fn->add(this, this->id);
268 }
269
270 LValue::LValue(Function *fn, LValue *lval)
271 {
272 assert(lval);
273
274 reg.file = lval->reg.file;
275 reg.size = lval->reg.size;
276 reg.data.id = -1;
277
278 affinity = -1;
279
280 fn->add(this, this->id);
281 }
282
283 LValue *
284 LValue::clone(ClonePolicy<Function>& pol) const
285 {
286 LValue *that = new_LValue(pol.context(), reg.file);
287
288 pol.set<Value>(this, that);
289
290 that->reg.size = this->reg.size;
291 that->reg.type = this->reg.type;
292 that->reg.data = this->reg.data;
293
294 return that;
295 }
296
297 Symbol::Symbol(Program *prog, DataFile f, ubyte fidx)
298 {
299 baseSym = NULL;
300
301 reg.file = f;
302 reg.fileIndex = fidx;
303 reg.data.offset = 0;
304
305 prog->add(this, this->id);
306 }
307
308 Symbol *
309 Symbol::clone(ClonePolicy<Function>& pol) const
310 {
311 Program *prog = pol.context()->getProgram();
312
313 Symbol *that = new_Symbol(prog, reg.file, reg.fileIndex);
314
315 pol.set<Value>(this, that);
316
317 that->reg.size = this->reg.size;
318 that->reg.type = this->reg.type;
319 that->reg.data = this->reg.data;
320
321 that->baseSym = this->baseSym;
322
323 return that;
324 }
325
326 ImmediateValue::ImmediateValue(Program *prog, uint32_t uval)
327 {
328 memset(&reg, 0, sizeof(reg));
329
330 reg.file = FILE_IMMEDIATE;
331 reg.size = 4;
332 reg.type = TYPE_U32;
333
334 reg.data.u32 = uval;
335
336 prog->add(this, this->id);
337 }
338
339 ImmediateValue::ImmediateValue(Program *prog, float fval)
340 {
341 memset(&reg, 0, sizeof(reg));
342
343 reg.file = FILE_IMMEDIATE;
344 reg.size = 4;
345 reg.type = TYPE_F32;
346
347 reg.data.f32 = fval;
348
349 prog->add(this, this->id);
350 }
351
352 ImmediateValue::ImmediateValue(Program *prog, double dval)
353 {
354 memset(&reg, 0, sizeof(reg));
355
356 reg.file = FILE_IMMEDIATE;
357 reg.size = 8;
358 reg.type = TYPE_F64;
359
360 reg.data.f64 = dval;
361
362 prog->add(this, this->id);
363 }
364
365 ImmediateValue::ImmediateValue(const ImmediateValue *proto, DataType ty)
366 {
367 reg = proto->reg;
368
369 reg.type = ty;
370 reg.size = typeSizeof(ty);
371 }
372
373 ImmediateValue *
374 ImmediateValue::clone(ClonePolicy<Function>& pol) const
375 {
376 Program *prog = pol.context()->getProgram();
377 ImmediateValue *that = new_ImmediateValue(prog, 0u);
378
379 pol.set<Value>(this, that);
380
381 that->reg.size = this->reg.size;
382 that->reg.type = this->reg.type;
383 that->reg.data = this->reg.data;
384
385 return that;
386 }
387
388 bool
389 ImmediateValue::isInteger(const int i) const
390 {
391 switch (reg.type) {
392 case TYPE_S8:
393 return reg.data.s8 == i;
394 case TYPE_U8:
395 return reg.data.u8 == i;
396 case TYPE_S16:
397 return reg.data.s16 == i;
398 case TYPE_U16:
399 return reg.data.u16 == i;
400 case TYPE_S32:
401 case TYPE_U32:
402 return reg.data.s32 == i; // as if ...
403 case TYPE_F32:
404 return reg.data.f32 == static_cast<float>(i);
405 case TYPE_F64:
406 return reg.data.f64 == static_cast<double>(i);
407 default:
408 return false;
409 }
410 }
411
412 bool
413 ImmediateValue::isNegative() const
414 {
415 switch (reg.type) {
416 case TYPE_S8: return reg.data.s8 < 0;
417 case TYPE_S16: return reg.data.s16 < 0;
418 case TYPE_S32:
419 case TYPE_U32: return reg.data.s32 < 0;
420 case TYPE_F32: return reg.data.u32 & (1 << 31);
421 case TYPE_F64: return reg.data.u64 & (1ULL << 63);
422 default:
423 return false;
424 }
425 }
426
427 bool
428 ImmediateValue::isPow2() const
429 {
430 switch (reg.type) {
431 case TYPE_U8:
432 case TYPE_U16:
433 case TYPE_U32: return util_is_power_of_two(reg.data.u32);
434 default:
435 return false;
436 }
437 }
438
439 void
440 ImmediateValue::applyLog2()
441 {
442 switch (reg.type) {
443 case TYPE_S8:
444 case TYPE_S16:
445 case TYPE_S32:
446 assert(!this->isNegative());
447 // fall through
448 case TYPE_U8:
449 case TYPE_U16:
450 case TYPE_U32:
451 reg.data.u32 = util_logbase2(reg.data.u32);
452 break;
453 case TYPE_F32:
454 reg.data.f32 = log2f(reg.data.f32);
455 break;
456 case TYPE_F64:
457 reg.data.f64 = log2(reg.data.f64);
458 break;
459 default:
460 assert(0);
461 break;
462 }
463 }
464
465 bool
466 ImmediateValue::compare(CondCode cc, float fval) const
467 {
468 if (reg.type != TYPE_F32)
469 ERROR("immediate value is not of type f32");
470
471 switch (static_cast<CondCode>(cc & 7)) {
472 case CC_TR: return true;
473 case CC_FL: return false;
474 case CC_LT: return reg.data.f32 < fval;
475 case CC_LE: return reg.data.f32 <= fval;
476 case CC_GT: return reg.data.f32 > fval;
477 case CC_GE: return reg.data.f32 >= fval;
478 case CC_EQ: return reg.data.f32 == fval;
479 case CC_NE: return reg.data.f32 != fval;
480 default:
481 assert(0);
482 return false;
483 }
484 }
485
486 bool
487 Value::interfers(const Value *that) const
488 {
489 uint32_t idA, idB;
490
491 if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
492 return false;
493 if (this->asImm())
494 return false;
495
496 if (this->asSym()) {
497 idA = this->join->reg.data.offset;
498 idB = that->join->reg.data.offset;
499 } else {
500 idA = this->join->reg.data.id * this->reg.size;
501 idB = that->join->reg.data.id * that->reg.size;
502 }
503
504 if (idA < idB)
505 return (idA + this->reg.size > idB);
506 else
507 if (idA > idB)
508 return (idB + that->reg.size > idA);
509 else
510 return (idA == idB);
511 }
512
513 bool
514 Value::equals(const Value *that, bool strict) const
515 {
516 that = that->join;
517
518 if (strict)
519 return this == that;
520
521 if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
522 return false;
523 if (that->reg.size != this->reg.size)
524 return false;
525
526 if (that->reg.data.id != this->reg.data.id)
527 return false;
528
529 return true;
530 }
531
532 bool
533 ImmediateValue::equals(const Value *that, bool strict) const
534 {
535 const ImmediateValue *imm = that->asImm();
536 if (!imm)
537 return false;
538 return reg.data.u64 == imm->reg.data.u64;
539 }
540
541 bool
542 Symbol::equals(const Value *that, bool strict) const
543 {
544 if (reg.file != that->reg.file || reg.fileIndex != that->reg.fileIndex)
545 return false;
546 assert(that->asSym());
547
548 if (this->baseSym != that->asSym()->baseSym)
549 return false;
550
551 return this->reg.data.offset == that->reg.data.offset;
552 }
553
554 void Instruction::init()
555 {
556 next = prev = 0;
557
558 cc = CC_ALWAYS;
559 rnd = ROUND_N;
560 cache = CACHE_CA;
561 subOp = 0;
562
563 saturate = 0;
564 join = terminator = 0;
565 ftz = dnz = 0;
566 atomic = 0;
567 perPatch = 0;
568 fixed = 0;
569 encSize = 0;
570 ipa = 0;
571
572 lanes = 0xf;
573
574 postFactor = 0;
575
576 predSrc = -1;
577 flagsDef = -1;
578 flagsSrc = -1;
579 }
580
581 Instruction::Instruction()
582 {
583 init();
584
585 op = OP_NOP;
586 dType = sType = TYPE_F32;
587
588 id = -1;
589 bb = 0;
590 }
591
592 Instruction::Instruction(Function *fn, operation opr, DataType ty)
593 {
594 init();
595
596 op = opr;
597 dType = sType = ty;
598
599 fn->add(this, id);
600 }
601
602 Instruction::~Instruction()
603 {
604 if (bb) {
605 Function *fn = bb->getFunction();
606 bb->remove(this);
607 fn->allInsns.remove(id);
608 }
609
610 for (int s = 0; srcExists(s); ++s)
611 setSrc(s, NULL);
612 // must unlink defs too since the list pointers will get deallocated
613 for (int d = 0; defExists(d); ++d)
614 setDef(d, NULL);
615 }
616
617 void
618 Instruction::setDef(int i, Value *val)
619 {
620 int size = defs.size();
621 if (i >= size) {
622 defs.resize(i + 1);
623 while (size <= i)
624 defs[size++].setInsn(this);
625 }
626 defs[i].set(val);
627 }
628
629 void
630 Instruction::setSrc(int s, Value *val)
631 {
632 int size = srcs.size();
633 if (s >= size) {
634 srcs.resize(s + 1);
635 while (size <= s)
636 srcs[size++].setInsn(this);
637 }
638 srcs[s].set(val);
639 }
640
641 void
642 Instruction::setSrc(int s, const ValueRef& ref)
643 {
644 setSrc(s, ref.get());
645 srcs[s].mod = ref.mod;
646 }
647
648 void
649 Instruction::swapSources(int a, int b)
650 {
651 Value *value = srcs[a].get();
652 Modifier m = srcs[a].mod;
653
654 setSrc(a, srcs[b]);
655
656 srcs[b].set(value);
657 srcs[b].mod = m;
658 }
659
660 void
661 Instruction::takeExtraSources(int s, Value *values[3])
662 {
663 values[0] = getIndirect(s, 0);
664 if (values[0])
665 setIndirect(s, 0, NULL);
666
667 values[1] = getIndirect(s, 1);
668 if (values[1])
669 setIndirect(s, 1, NULL);
670
671 values[2] = getPredicate();
672 if (values[2])
673 setPredicate(cc, NULL);
674 }
675
676 void
677 Instruction::putExtraSources(int s, Value *values[3])
678 {
679 if (values[0])
680 setIndirect(s, 0, values[0]);
681 if (values[1])
682 setIndirect(s, 1, values[1]);
683 if (values[2])
684 setPredicate(cc, values[2]);
685 }
686
687 Instruction *
688 Instruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
689 {
690 if (!i)
691 i = new_Instruction(pol.context(), op, dType);
692 assert(typeid(*i) == typeid(*this));
693
694 pol.set<Instruction>(this, i);
695
696 i->sType = sType;
697
698 i->rnd = rnd;
699 i->cache = cache;
700 i->subOp = subOp;
701
702 i->saturate = saturate;
703 i->join = join;
704 i->exit = exit;
705 i->atomic = atomic;
706 i->ftz = ftz;
707 i->dnz = dnz;
708 i->ipa = ipa;
709 i->lanes = lanes;
710 i->perPatch = perPatch;
711
712 i->postFactor = postFactor;
713
714 for (int d = 0; defExists(d); ++d)
715 i->setDef(d, pol.get(getDef(d)));
716
717 for (int s = 0; srcExists(s); ++s) {
718 i->setSrc(s, pol.get(getSrc(s)));
719 i->src(s).mod = src(s).mod;
720 }
721
722 i->cc = cc;
723 i->predSrc = predSrc;
724 i->flagsDef = flagsDef;
725 i->flagsSrc = flagsSrc;
726
727 return i;
728 }
729
730 unsigned int
731 Instruction::defCount(unsigned int mask) const
732 {
733 unsigned int i, n;
734
735 for (n = 0, i = 0; this->defExists(i); ++i, mask >>= 1)
736 n += mask & 1;
737 return n;
738 }
739
740 unsigned int
741 Instruction::srcCount(unsigned int mask) const
742 {
743 unsigned int i, n;
744
745 for (n = 0, i = 0; this->srcExists(i); ++i, mask >>= 1)
746 n += mask & 1;
747 return n;
748 }
749
750 bool
751 Instruction::setIndirect(int s, int dim, Value *value)
752 {
753 assert(this->srcExists(s));
754
755 int p = srcs[s].indirect[dim];
756 if (p < 0) {
757 if (!value)
758 return true;
759 p = srcs.size();
760 while (p > 0 && !srcExists(p - 1))
761 --p;
762 }
763 setSrc(p, value);
764 srcs[p].usedAsPtr = (value != 0);
765 srcs[s].indirect[dim] = value ? p : -1;
766 return true;
767 }
768
769 bool
770 Instruction::setPredicate(CondCode ccode, Value *value)
771 {
772 cc = ccode;
773
774 if (!value) {
775 if (predSrc >= 0) {
776 srcs[predSrc].set(NULL);
777 predSrc = -1;
778 }
779 return true;
780 }
781
782 if (predSrc < 0) {
783 predSrc = srcs.size();
784 while (predSrc > 0 && !srcExists(predSrc - 1))
785 --predSrc;
786 }
787
788 setSrc(predSrc, value);
789 return true;
790 }
791
792 bool
793 Instruction::writesPredicate() const
794 {
795 for (int d = 0; defExists(d); ++d)
796 if (getDef(d)->inFile(FILE_PREDICATE) || getDef(d)->inFile(FILE_FLAGS))
797 return true;
798 return false;
799 }
800
801 static bool
802 insnCheckCommutation(const Instruction *a, const Instruction *b)
803 {
804 for (int d = 0; a->defExists(d); ++d)
805 for (int s = 0; b->srcExists(s); ++s)
806 if (a->getDef(d)->interfers(b->getSrc(s)))
807 return false;
808 return true;
809 }
810
811 bool
812 Instruction::isCommutationLegal(const Instruction *i) const
813 {
814 bool ret = true;
815 ret = ret && insnCheckCommutation(this, i);
816 ret = ret && insnCheckCommutation(i, this);
817 return ret;
818 }
819
820 TexInstruction::TexInstruction(Function *fn, operation op)
821 : Instruction(fn, op, TYPE_F32)
822 {
823 memset(&tex, 0, sizeof(tex));
824
825 tex.rIndirectSrc = -1;
826 tex.sIndirectSrc = -1;
827 }
828
829 TexInstruction::~TexInstruction()
830 {
831 for (int c = 0; c < 3; ++c) {
832 dPdx[c].set(NULL);
833 dPdy[c].set(NULL);
834 }
835 }
836
837 TexInstruction *
838 TexInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
839 {
840 TexInstruction *tex = (i ? static_cast<TexInstruction *>(i) :
841 new_TexInstruction(pol.context(), op));
842
843 Instruction::clone(pol, tex);
844
845 tex->tex = this->tex;
846
847 if (op == OP_TXD) {
848 for (unsigned int c = 0; c < tex->tex.target.getDim(); ++c) {
849 tex->dPdx[c].set(dPdx[c]);
850 tex->dPdy[c].set(dPdy[c]);
851 }
852 }
853
854 return tex;
855 }
856
857 const struct TexInstruction::Target::Desc TexInstruction::Target::descTable[] =
858 {
859 { "1D", 1, 1, false, false, false },
860 { "2D", 2, 2, false, false, false },
861 { "2D_MS", 2, 2, false, false, false },
862 { "3D", 3, 3, false, false, false },
863 { "CUBE", 2, 3, false, true, false },
864 { "1D_SHADOW", 1, 1, false, false, true },
865 { "2D_SHADOW", 2, 2, false, false, true },
866 { "CUBE_SHADOW", 2, 3, false, true, true },
867 { "1D_ARRAY", 1, 2, true, false, false },
868 { "2D_ARRAY", 2, 3, true, false, false },
869 { "2D_MS_ARRAY", 2, 3, true, false, false },
870 { "CUBE_ARRAY", 2, 4, true, true, false },
871 { "1D_ARRAY_SHADOW", 1, 2, true, false, true },
872 { "2D_ARRAY_SHADOW", 2, 3, true, false, true },
873 { "RECT", 2, 2, false, false, false },
874 { "RECT_SHADOW", 2, 2, false, false, true },
875 { "CUBE_ARRAY_SHADOW", 2, 4, true, true, true },
876 { "BUFFER", 1, 1, false, false, false },
877 };
878
879 CmpInstruction::CmpInstruction(Function *fn, operation op)
880 : Instruction(fn, op, TYPE_F32)
881 {
882 setCond = CC_ALWAYS;
883 }
884
885 CmpInstruction *
886 CmpInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
887 {
888 CmpInstruction *cmp = (i ? static_cast<CmpInstruction *>(i) :
889 new_CmpInstruction(pol.context(), op));
890 cmp->dType = dType;
891 Instruction::clone(pol, cmp);
892 cmp->setCond = setCond;
893 return cmp;
894 }
895
896 FlowInstruction::FlowInstruction(Function *fn, operation op,
897 BasicBlock *targ)
898 : Instruction(fn, op, TYPE_NONE)
899 {
900 target.bb = targ;
901
902 if (op == OP_BRA ||
903 op == OP_CONT || op == OP_BREAK ||
904 op == OP_RET || op == OP_EXIT)
905 terminator = 1;
906 else
907 if (op == OP_JOIN)
908 terminator = targ ? 1 : 0;
909
910 allWarp = absolute = limit = 0;
911 }
912
913 FlowInstruction *
914 FlowInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
915 {
916 FlowInstruction *flow = (i ? static_cast<FlowInstruction *>(i) :
917 new_FlowInstruction(pol.context(), op, NULL));
918
919 Instruction::clone(pol, flow);
920 flow->allWarp = allWarp;
921 flow->absolute = absolute;
922 flow->limit = limit;
923 flow->builtin = builtin;
924
925 if (builtin)
926 flow->target.builtin = target.builtin;
927 else
928 if (op == OP_CALL)
929 flow->target.fn = target.fn;
930 else
931 if (target.bb)
932 flow->target.bb = pol.get<BasicBlock>(target.bb);
933
934 return flow;
935 }
936
937 Program::Program(Type type, Target *arch)
938 : progType(type),
939 target(arch),
940 mem_Instruction(sizeof(Instruction), 6),
941 mem_CmpInstruction(sizeof(CmpInstruction), 4),
942 mem_TexInstruction(sizeof(TexInstruction), 4),
943 mem_FlowInstruction(sizeof(FlowInstruction), 4),
944 mem_LValue(sizeof(LValue), 8),
945 mem_Symbol(sizeof(Symbol), 7),
946 mem_ImmediateValue(sizeof(ImmediateValue), 7)
947 {
948 code = NULL;
949 binSize = 0;
950
951 maxGPR = -1;
952
953 main = new Function(this, "MAIN");
954
955 dbgFlags = 0;
956 }
957
958 Program::~Program()
959 {
960 for (ArrayList::Iterator it = allFuncs.iterator(); !it.end(); it.next())
961 delete reinterpret_cast<Function *>(it.get());
962
963 for (ArrayList::Iterator it = allRValues.iterator(); !it.end(); it.next())
964 releaseValue(reinterpret_cast<Value *>(it.get()));
965 }
966
967 void Program::releaseInstruction(Instruction *insn)
968 {
969 // TODO: make this not suck so much
970
971 insn->~Instruction();
972
973 if (insn->asCmp())
974 mem_CmpInstruction.release(insn);
975 else
976 if (insn->asTex())
977 mem_TexInstruction.release(insn);
978 else
979 if (insn->asFlow())
980 mem_FlowInstruction.release(insn);
981 else
982 mem_Instruction.release(insn);
983 }
984
985 void Program::releaseValue(Value *value)
986 {
987 value->~Value();
988
989 if (value->asLValue())
990 mem_LValue.release(value);
991 else
992 if (value->asImm())
993 mem_ImmediateValue.release(value);
994 else
995 if (value->asSym())
996 mem_Symbol.release(value);
997 }
998
999
1000 } // namespace nv50_ir
1001
1002 extern "C" {
1003
1004 static void
1005 nv50_ir_init_prog_info(struct nv50_ir_prog_info *info)
1006 {
1007 #if defined(PIPE_SHADER_HULL) && defined(PIPE_SHADER_DOMAIN)
1008 if (info->type == PIPE_SHADER_HULL || info->type == PIPE_SHADER_DOMAIN) {
1009 info->prop.tp.domain = PIPE_PRIM_MAX;
1010 info->prop.tp.outputPrim = PIPE_PRIM_MAX;
1011 }
1012 #endif
1013 if (info->type == PIPE_SHADER_GEOMETRY) {
1014 info->prop.gp.instanceCount = 1;
1015 info->prop.gp.maxVertices = 1;
1016 }
1017 info->io.clipDistance = 0xff;
1018 info->io.pointSize = 0xff;
1019 info->io.vertexId = 0xff;
1020 info->io.edgeFlagIn = 0xff;
1021 info->io.edgeFlagOut = 0xff;
1022 info->io.fragDepth = 0xff;
1023 info->io.sampleMask = 0xff;
1024 info->io.backFaceColor[0] = info->io.backFaceColor[1] = 0xff;
1025 }
1026
1027 int
1028 nv50_ir_generate_code(struct nv50_ir_prog_info *info)
1029 {
1030 int ret = 0;
1031
1032 nv50_ir::Program::Type type;
1033
1034 nv50_ir_init_prog_info(info);
1035
1036 #define PROG_TYPE_CASE(a, b) \
1037 case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break
1038
1039 switch (info->type) {
1040 PROG_TYPE_CASE(VERTEX, VERTEX);
1041 // PROG_TYPE_CASE(HULL, TESSELLATION_CONTROL);
1042 // PROG_TYPE_CASE(DOMAIN, TESSELLATION_EVAL);
1043 PROG_TYPE_CASE(GEOMETRY, GEOMETRY);
1044 PROG_TYPE_CASE(FRAGMENT, FRAGMENT);
1045 default:
1046 type = nv50_ir::Program::TYPE_COMPUTE;
1047 break;
1048 }
1049 INFO_DBG(info->dbgFlags, VERBOSE, "translating program of type %u\n", type);
1050
1051 nv50_ir::Target *targ = nv50_ir::Target::create(info->target);
1052 if (!targ)
1053 return -1;
1054
1055 nv50_ir::Program *prog = new nv50_ir::Program(type, targ);
1056 if (!prog)
1057 return -1;
1058 prog->dbgFlags = info->dbgFlags;
1059
1060 switch (info->bin.sourceRep) {
1061 #if 0
1062 case PIPE_IR_LLVM:
1063 case PIPE_IR_GLSL:
1064 return -1;
1065 case PIPE_IR_SM4:
1066 ret = prog->makeFromSM4(info) ? 0 : -2;
1067 break;
1068 case PIPE_IR_TGSI:
1069 #endif
1070 default:
1071 ret = prog->makeFromTGSI(info) ? 0 : -2;
1072 break;
1073 }
1074 if (ret < 0)
1075 goto out;
1076 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
1077 prog->print();
1078
1079 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA);
1080
1081 prog->convertToSSA();
1082
1083 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
1084 prog->print();
1085
1086 prog->optimizeSSA(info->optLevel);
1087 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_SSA);
1088
1089 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
1090 prog->print();
1091
1092 if (!prog->registerAllocation()) {
1093 ret = -4;
1094 goto out;
1095 }
1096 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_POST_RA);
1097
1098 prog->optimizePostRA(info->optLevel);
1099
1100 if (!prog->emitBinary(info)) {
1101 ret = -5;
1102 goto out;
1103 }
1104
1105 out:
1106 INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret);
1107
1108 info->bin.maxGPR = prog->maxGPR;
1109 info->bin.code = prog->code;
1110 info->bin.codeSize = prog->binSize;
1111
1112 delete prog;
1113 nv50_ir::Target::destroy(targ);
1114
1115 return ret;
1116 }
1117
1118 } // extern "C"