2 * Copyright 2011 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 #include "nv50_ir_target.h"
25 #include "nv50_ir_driver.h"
28 #include "nv50/nv50_program.h"
29 #include "nv50/nv50_debug.h"
34 Modifier::Modifier(operation op
)
37 case OP_NEG
: bits
= NV50_IR_MOD_NEG
; break;
38 case OP_ABS
: bits
= NV50_IR_MOD_ABS
; break;
39 case OP_SAT
: bits
= NV50_IR_MOD_SAT
; break;
40 case OP_NOP
: bits
= NV50_IR_MOD_NOT
; break;
47 Modifier
Modifier::operator*(const Modifier m
) const
52 if (this->bits
& NV50_IR_MOD_ABS
)
53 b
&= ~NV50_IR_MOD_NEG
;
55 a
= (this->bits
^ b
) & (NV50_IR_MOD_NOT
| NV50_IR_MOD_NEG
);
56 c
= (this->bits
| m
.bits
) & (NV50_IR_MOD_ABS
| NV50_IR_MOD_SAT
);
58 return Modifier(a
| c
);
61 ValueRef::ValueRef() : value(0), insn(0), next(this), prev(this)
73 ImmediateValue
*ValueRef::getImmediate() const
78 if (src
->reg
.file
== FILE_IMMEDIATE
)
81 Instruction
*insn
= src
->getUniqueInsn();
83 src
= (insn
&& insn
->op
== OP_MOV
) ? insn
->getSrc(0) : NULL
;
88 ValueDef::ValueDef() : value(0), insn(0), next(this), prev(this)
99 ValueRef::set(const ValueRef
&ref
)
101 this->set(ref
.get());
103 indirect
[0] = ref
.indirect
[0];
104 indirect
[1] = ref
.indirect
[1];
108 ValueRef::set(Value
*refVal
)
113 if (value
->uses
== this)
114 value
->uses
= (next
== this) ? NULL
: next
;
121 DLLIST_ADDTAIL(refVal
->uses
, this);
130 ValueDef::set(Value
*defVal
)
132 assert(next
!= this || prev
== this); // check that SSA hack isn't active
137 if (value
->defs
== this)
138 value
->defs
= (next
== this) ? NULL
: next
;
144 DLLIST_ADDTAIL(defVal
->defs
, this);
151 // TODO: make me faster by using a safe iterator
153 ValueDef::replace(Value
*repVal
, bool doSet
)
155 ValueRef
**refs
= new ValueRef
* [value
->refCount()];
158 if (!refs
&& value
->refCount())
159 FATAL("memory allocation failed");
161 for (ValueRef::Iterator iter
= value
->uses
->iterator(); !iter
.end();
163 assert(n
< value
->refCount());
164 refs
[n
++] = iter
.get();
167 refs
[--n
]->set(repVal
);
177 ValueDef::mergeDefs(ValueDef
*join
)
179 DLLIST_MERGE(this, join
, ValueDef
*);
189 memset(®
, 0, sizeof(reg
));
194 Value::coalesce(Value
*jval
, bool force
)
196 Value
*repr
= this->join
; // new representative
197 Value
*jrep
= jval
->join
;
199 if (reg
.file
!= jval
->reg
.file
|| reg
.size
!= jval
->reg
.size
) {
202 ERROR("forced coalescing of values of different sizes/files");
205 if (!force
&& (repr
->reg
.data
.id
!= jrep
->reg
.data
.id
)) {
206 if (repr
->reg
.data
.id
>= 0 &&
207 jrep
->reg
.data
.id
>= 0)
209 if (jrep
->reg
.data
.id
>= 0) {
215 // need to check all fixed register values of the program for overlap
216 Function
*func
= defs
->getInsn()->bb
->getFunction();
218 // TODO: put values in by register-id bins per function
219 ArrayList::Iterator iter
= func
->allLValues
.iterator();
220 for (; !iter
.end(); iter
.next()) {
221 Value
*fixed
= reinterpret_cast<Value
*>(iter
.get());
223 if (fixed
->reg
.data
.id
== repr
->reg
.data
.id
)
224 if (fixed
->livei
.overlaps(jrep
->livei
))
228 if (repr
->livei
.overlaps(jrep
->livei
)) {
231 // do we really want this ? if at all, only for constraint ops
232 INFO("NOTE: forced coalescing with live range overlap\n");
235 ValueDef::Iterator iter
= jrep
->defs
->iterator();
236 for (; !iter
.end(); iter
.next())
237 iter
.get()->get()->join
= repr
;
239 repr
->defs
->mergeDefs(jrep
->defs
);
240 repr
->livei
.unify(jrep
->livei
);
242 assert(repr
->join
== repr
&& jval
->join
== repr
);
246 LValue::LValue(Function
*fn
, DataFile file
)
249 reg
.size
= (file
!= FILE_PREDICATE
) ? 4 : 1;
254 fn
->add(this, this->id
);
257 LValue::LValue(Function
*fn
, LValue
*lval
)
261 reg
.file
= lval
->reg
.file
;
262 reg
.size
= lval
->reg
.size
;
267 fn
->add(this, this->id
);
270 Value
*LValue::clone(Function
*func
) const
272 LValue
*that
= new_LValue(func
, reg
.file
);
274 that
->reg
.size
= this->reg
.size
;
275 that
->reg
.type
= this->reg
.type
;
276 that
->reg
.data
= this->reg
.data
;
281 Symbol::Symbol(Program
*prog
, DataFile f
, ubyte fidx
)
286 reg
.fileIndex
= fidx
;
289 prog
->add(this, this->id
);
293 Symbol::clone(Function
*func
) const
295 Program
*prog
= func
->getProgram();
297 Symbol
*that
= new_Symbol(prog
, reg
.file
, reg
.fileIndex
);
299 that
->reg
.size
= this->reg
.size
;
300 that
->reg
.type
= this->reg
.type
;
301 that
->reg
.data
= this->reg
.data
;
303 that
->baseSym
= this->baseSym
;
308 ImmediateValue::ImmediateValue(Program
*prog
, uint32_t uval
)
310 memset(®
, 0, sizeof(reg
));
312 reg
.file
= FILE_IMMEDIATE
;
318 prog
->add(this, this->id
);
321 ImmediateValue::ImmediateValue(Program
*prog
, float fval
)
323 memset(®
, 0, sizeof(reg
));
325 reg
.file
= FILE_IMMEDIATE
;
331 prog
->add(this, this->id
);
334 ImmediateValue::ImmediateValue(Program
*prog
, double dval
)
336 memset(®
, 0, sizeof(reg
));
338 reg
.file
= FILE_IMMEDIATE
;
344 prog
->add(this, this->id
);
347 ImmediateValue::ImmediateValue(const ImmediateValue
*proto
, DataType ty
)
352 reg
.size
= typeSizeof(ty
);
356 ImmediateValue::isInteger(const int i
) const
360 return reg
.data
.s8
== i
;
362 return reg
.data
.u8
== i
;
364 return reg
.data
.s16
== i
;
366 return reg
.data
.u16
== i
;
369 return reg
.data
.s32
== i
; // as if ...
371 return reg
.data
.f32
== static_cast<float>(i
);
373 return reg
.data
.f64
== static_cast<double>(i
);
380 ImmediateValue::isNegative() const
383 case TYPE_S8
: return reg
.data
.s8
< 0;
384 case TYPE_S16
: return reg
.data
.s16
< 0;
386 case TYPE_U32
: return reg
.data
.s32
< 0;
387 case TYPE_F32
: return reg
.data
.u32
& (1 << 31);
388 case TYPE_F64
: return reg
.data
.u64
& (1ULL << 63);
395 ImmediateValue::isPow2() const
400 case TYPE_U32
: return util_is_power_of_two(reg
.data
.u32
);
407 ImmediateValue::applyLog2()
413 assert(!this->isNegative());
418 reg
.data
.u32
= util_logbase2(reg
.data
.u32
);
421 reg
.data
.f32
= log2f(reg
.data
.f32
);
424 reg
.data
.f64
= log2(reg
.data
.f64
);
433 ImmediateValue::compare(CondCode cc
, float fval
) const
435 if (reg
.type
!= TYPE_F32
)
436 ERROR("immediate value is not of type f32");
438 switch (static_cast<CondCode
>(cc
& 7)) {
439 case CC_TR
: return true;
440 case CC_FL
: return false;
441 case CC_LT
: return reg
.data
.f32
< fval
;
442 case CC_LE
: return reg
.data
.f32
<= fval
;
443 case CC_GT
: return reg
.data
.f32
> fval
;
444 case CC_GE
: return reg
.data
.f32
>= fval
;
445 case CC_EQ
: return reg
.data
.f32
== fval
;
446 case CC_NE
: return reg
.data
.f32
!= fval
;
454 Value::interfers(const Value
*that
) const
458 if (that
->reg
.file
!= reg
.file
|| that
->reg
.fileIndex
!= reg
.fileIndex
)
464 idA
= this->join
->reg
.data
.offset
;
465 idB
= that
->join
->reg
.data
.offset
;
467 idA
= this->join
->reg
.data
.id
* this->reg
.size
;
468 idB
= that
->join
->reg
.data
.id
* that
->reg
.size
;
472 return (idA
+ this->reg
.size
> idB
);
475 return (idB
+ that
->reg
.size
> idA
);
481 Value::equals(const Value
*that
, bool strict
) const
488 if (that
->reg
.file
!= reg
.file
|| that
->reg
.fileIndex
!= reg
.fileIndex
)
490 if (that
->reg
.size
!= this->reg
.size
)
493 if (that
->reg
.data
.id
!= this->reg
.data
.id
)
500 ImmediateValue::equals(const Value
*that
, bool strict
) const
502 const ImmediateValue
*imm
= that
->asImm();
505 return reg
.data
.u64
== imm
->reg
.data
.u64
;
509 Symbol::equals(const Value
*that
, bool strict
) const
511 if (reg
.file
!= that
->reg
.file
|| reg
.fileIndex
!= that
->reg
.fileIndex
)
513 assert(that
->asSym());
515 if (this->baseSym
!= that
->asSym()->baseSym
)
518 return this->reg
.data
.offset
== that
->reg
.data
.offset
;
521 void Instruction::init()
531 join
= terminator
= 0;
543 for (int p
= 0; p
< NV50_IR_MAX_DEFS
; ++p
)
544 def
[p
].setInsn(this);
545 for (int p
= 0; p
< NV50_IR_MAX_SRCS
; ++p
)
546 src
[p
].setInsn(this);
553 Instruction::Instruction()
558 dType
= sType
= TYPE_F32
;
564 Instruction::Instruction(Function
*fn
, operation opr
, DataType ty
)
574 Instruction::~Instruction()
577 Function
*fn
= bb
->getFunction();
579 fn
->allInsns
.remove(id
);
582 for (int s
= 0; srcExists(s
); ++s
)
584 // must unlink defs too since the list pointers will get deallocated
585 for (int d
= 0; defExists(d
); ++d
)
590 Instruction::setSrc(int s
, ValueRef
& ref
)
592 setSrc(s
, ref
.get());
593 src
[s
].mod
= ref
.mod
;
597 Instruction::swapSources(int a
, int b
)
599 Value
*value
= src
[a
].get();
600 Modifier m
= src
[a
].mod
;
609 Instruction::takeExtraSources(int s
, Value
*values
[3])
611 values
[0] = getIndirect(s
, 0);
613 setIndirect(s
, 0, NULL
);
615 values
[1] = getIndirect(s
, 1);
617 setIndirect(s
, 1, NULL
);
619 values
[2] = getPredicate();
621 setPredicate(cc
, NULL
);
625 Instruction::putExtraSources(int s
, Value
*values
[3])
628 setIndirect(s
, 0, values
[0]);
630 setIndirect(s
, 1, values
[1]);
632 setPredicate(cc
, values
[2]);
636 Instruction::clone(bool deep
) const
638 Instruction
*insn
= new_Instruction(bb
->getFunction(), op
, dType
);
639 assert(!asCmp() && !asFlow());
640 cloneBase(insn
, deep
);
645 Instruction::cloneBase(Instruction
*insn
, bool deep
) const
647 insn
->sType
= this->sType
;
650 insn
->rnd
= this->rnd
;
651 insn
->cache
= this->cache
;
652 insn
->subOp
= this->subOp
;
654 insn
->saturate
= this->saturate
;
655 insn
->atomic
= this->atomic
;
656 insn
->ftz
= this->ftz
;
657 insn
->dnz
= this->dnz
;
658 insn
->ipa
= this->ipa
;
659 insn
->lanes
= this->lanes
;
660 insn
->perPatch
= this->perPatch
;
662 insn
->postFactor
= this->postFactor
;
667 Function
*fn
= bb
->getFunction();
668 for (int d
= 0; this->defExists(d
); ++d
)
669 insn
->setDef(d
, this->getDef(d
)->clone(fn
));
671 for (int d
= 0; this->defExists(d
); ++d
)
672 insn
->setDef(d
, this->getDef(d
));
675 for (int s
= 0; this->srcExists(s
); ++s
)
676 insn
->src
[s
].set(this->src
[s
]);
678 insn
->predSrc
= this->predSrc
;
679 insn
->flagsDef
= this->flagsDef
;
680 insn
->flagsSrc
= this->flagsSrc
;
684 Instruction::defCount(unsigned int mask
) const
688 for (n
= 0, i
= 0; this->defExists(i
); ++i
, mask
>>= 1)
694 Instruction::srcCount(unsigned int mask
) const
698 for (n
= 0, i
= 0; this->srcExists(i
); ++i
, mask
>>= 1)
704 Instruction::setIndirect(int s
, int dim
, Value
*value
)
706 int p
= src
[s
].indirect
[dim
];
708 assert(this->srcExists(s
));
712 for (p
= s
+ 1; this->srcExists(p
); ++p
);
714 assert(p
< NV50_IR_MAX_SRCS
);
717 src
[p
].usedAsPtr
= (value
!= 0);
718 src
[s
].indirect
[dim
] = value
? p
: -1;
723 Instruction::setPredicate(CondCode ccode
, Value
*value
)
737 for (s
= 0; this->srcExists(s
); ++s
)
738 assert(s
< NV50_IR_MAX_SRCS
);
741 src
[predSrc
] = value
;
746 Instruction::writesPredicate() const
748 for (int d
= 0; d
< 2 && def
[d
].exists(); ++d
)
749 if (def
[d
].exists() &&
750 (getDef(d
)->inFile(FILE_PREDICATE
) || getDef(d
)->inFile(FILE_FLAGS
)))
756 insnCheckCommutation(const Instruction
*a
, const Instruction
*b
)
758 for (int d
= 0; a
->defExists(d
); ++d
)
759 for (int s
= 0; b
->srcExists(s
); ++s
)
760 if (a
->getDef(d
)->interfers(b
->getSrc(s
)))
766 Instruction::isCommutationLegal(const Instruction
*i
) const
769 ret
= ret
&& insnCheckCommutation(this, i
);
770 ret
= ret
&& insnCheckCommutation(i
, this);
774 TexInstruction::TexInstruction(Function
*fn
, operation op
)
775 : Instruction(fn
, op
, TYPE_F32
)
777 memset(&tex
, 0, sizeof(tex
));
779 tex
.rIndirectSrc
= -1;
780 tex
.sIndirectSrc
= -1;
783 TexInstruction::~TexInstruction()
785 for (int c
= 0; c
< 3; ++c
) {
792 TexInstruction::clone(bool deep
) const
794 TexInstruction
*tex
= new_TexInstruction(bb
->getFunction(), op
);
795 cloneBase(tex
, deep
);
797 tex
->tex
= this->tex
;
800 for (unsigned int c
= 0; c
< tex
->tex
.target
.getDim(); ++c
) {
801 tex
->dPdx
[c
].set(dPdx
[c
]);
802 tex
->dPdy
[c
].set(dPdy
[c
]);
809 const struct TexInstruction::Target::Desc
TexInstruction::Target::descTable
[] =
811 { "1D", 1, 1, false, false, false },
812 { "2D", 2, 2, false, false, false },
813 { "2D_MS", 2, 2, false, false, false },
814 { "3D", 3, 3, false, false, false },
815 { "CUBE", 2, 3, false, true, false },
816 { "1D_SHADOW", 1, 1, false, false, true },
817 { "2D_SHADOW", 2, 2, false, false, true },
818 { "CUBE_SHADOW", 2, 3, false, true, true },
819 { "1D_ARRAY", 1, 2, true, false, false },
820 { "2D_ARRAY", 2, 3, true, false, false },
821 { "2D_MS_ARRAY", 2, 3, true, false, false },
822 { "CUBE_ARRAY", 2, 4, true, true, false },
823 { "1D_ARRAY_SHADOW", 1, 2, true, false, true },
824 { "2D_ARRAY_SHADOW", 2, 3, true, false, true },
825 { "RECT", 2, 2, false, false, false },
826 { "RECT_SHADOW", 2, 2, false, false, true },
827 { "CUBE_ARRAY_SHADOW", 2, 4, true, true, true },
828 { "BUFFER", 1, 1, false, false, false },
831 CmpInstruction::CmpInstruction(Function
*fn
, operation op
)
832 : Instruction(fn
, op
, TYPE_F32
)
838 CmpInstruction::clone(bool deep
) const
840 CmpInstruction
*cmp
= new_CmpInstruction(bb
->getFunction(), op
);
841 cloneBase(cmp
, deep
);
842 cmp
->setCond
= setCond
;
847 FlowInstruction::FlowInstruction(Function
*fn
, operation op
,
849 : Instruction(fn
, op
, TYPE_NONE
)
854 op
== OP_CONT
|| op
== OP_BREAK
||
855 op
== OP_RET
|| op
== OP_EXIT
)
859 terminator
= targ
? 1 : 0;
861 allWarp
= absolute
= limit
= 0;
864 Program::Program(Type type
, Target
*arch
)
867 mem_Instruction(sizeof(Instruction
), 6),
868 mem_CmpInstruction(sizeof(CmpInstruction
), 4),
869 mem_TexInstruction(sizeof(TexInstruction
), 4),
870 mem_FlowInstruction(sizeof(FlowInstruction
), 4),
871 mem_LValue(sizeof(LValue
), 8),
872 mem_Symbol(sizeof(Symbol
), 7),
873 mem_ImmediateValue(sizeof(ImmediateValue
), 7)
880 main
= new Function(this, "MAIN");
891 void Program::releaseInstruction(Instruction
*insn
)
893 // TODO: make this not suck so much
895 insn
->~Instruction();
898 mem_CmpInstruction
.release(insn
);
901 mem_TexInstruction
.release(insn
);
904 mem_FlowInstruction
.release(insn
);
906 mem_Instruction
.release(insn
);
909 void Program::releaseValue(Value
*value
)
911 if (value
->asLValue())
912 mem_LValue
.release(value
);
915 mem_ImmediateValue
.release(value
);
918 mem_Symbol
.release(value
);
922 } // namespace nv50_ir
927 nv50_ir_init_prog_info(struct nv50_ir_prog_info
*info
)
929 #if defined(PIPE_SHADER_HULL) && defined(PIPE_SHADER_DOMAIN)
930 if (info
->type
== PIPE_SHADER_HULL
|| info
->type
== PIPE_SHADER_DOMAIN
) {
931 info
->prop
.tp
.domain
= PIPE_PRIM_MAX
;
932 info
->prop
.tp
.outputPrim
= PIPE_PRIM_MAX
;
935 if (info
->type
== PIPE_SHADER_GEOMETRY
) {
936 info
->prop
.gp
.instanceCount
= 1;
937 info
->prop
.gp
.maxVertices
= 1;
939 info
->io
.clipDistance
= 0xff;
940 info
->io
.pointSize
= 0xff;
941 info
->io
.edgeFlagIn
= 0xff;
942 info
->io
.edgeFlagOut
= 0xff;
943 info
->io
.fragDepth
= 0xff;
944 info
->io
.sampleMask
= 0xff;
945 info
->io
.backFaceColor
[0] = info
->io
.backFaceColor
[1] = 0xff;
949 nv50_ir_generate_code(struct nv50_ir_prog_info
*info
)
953 nv50_ir::Program::Type type
;
955 nv50_ir_init_prog_info(info
);
957 #define PROG_TYPE_CASE(a, b) \
958 case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break
960 switch (info
->type
) {
961 PROG_TYPE_CASE(VERTEX
, VERTEX
);
962 // PROG_TYPE_CASE(HULL, TESSELLATION_CONTROL);
963 // PROG_TYPE_CASE(DOMAIN, TESSELLATION_EVAL);
964 PROG_TYPE_CASE(GEOMETRY
, GEOMETRY
);
965 PROG_TYPE_CASE(FRAGMENT
, FRAGMENT
);
967 type
= nv50_ir::Program::TYPE_COMPUTE
;
970 INFO_DBG(info
->dbgFlags
, VERBOSE
, "translating program of type %u\n", type
);
972 nv50_ir::Target
*targ
= nv50_ir::Target::create(info
->target
);
976 nv50_ir::Program
*prog
= new nv50_ir::Program(type
, targ
);
979 prog
->dbgFlags
= info
->dbgFlags
;
981 switch (info
->bin
.sourceRep
) {
987 ret
= prog
->makeFromSM4(info
) ? 0 : -2;
992 ret
= prog
->makeFromTGSI(info
) ? 0 : -2;
997 if (prog
->dbgFlags
& NV50_IR_DEBUG_VERBOSE
)
1000 prog
->getTarget()->runLegalizePass(prog
, nv50_ir::CG_STAGE_PRE_SSA
);
1002 prog
->convertToSSA();
1004 if (prog
->dbgFlags
& NV50_IR_DEBUG_VERBOSE
)
1007 prog
->optimizeSSA(info
->optLevel
);
1008 prog
->getTarget()->runLegalizePass(prog
, nv50_ir::CG_STAGE_SSA
);
1010 if (prog
->dbgFlags
& NV50_IR_DEBUG_BASIC
)
1013 if (!prog
->registerAllocation()) {
1017 prog
->getTarget()->runLegalizePass(prog
, nv50_ir::CG_STAGE_POST_RA
);
1019 prog
->optimizePostRA(info
->optLevel
);
1021 if (!prog
->emitBinary(info
)) {
1027 INFO_DBG(prog
->dbgFlags
, VERBOSE
, "nv50_ir_generate_code: ret = %i\n", ret
);
1029 info
->bin
.maxGPR
= prog
->maxGPR
;
1030 info
->bin
.code
= prog
->code
;
1031 info
->bin
.codeSize
= prog
->binSize
;
1034 nv50_ir::Target::destroy(targ
);