2 * Copyright 2011 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 #include "nv50_ir_target.h"
25 #include "nv50_ir_driver.h"
28 #include "nv50/nv50_program.h"
29 #include "nv50/nv50_debug.h"
34 Modifier::Modifier(operation op
)
37 case OP_NEG
: bits
= NV50_IR_MOD_NEG
; break;
38 case OP_ABS
: bits
= NV50_IR_MOD_ABS
; break;
39 case OP_SAT
: bits
= NV50_IR_MOD_SAT
; break;
40 case OP_NOT
: bits
= NV50_IR_MOD_NOT
; break;
47 Modifier
Modifier::operator*(const Modifier m
) const
52 if (this->bits
& NV50_IR_MOD_ABS
)
53 b
&= ~NV50_IR_MOD_NEG
;
55 a
= (this->bits
^ b
) & (NV50_IR_MOD_NOT
| NV50_IR_MOD_NEG
);
56 c
= (this->bits
| m
.bits
) & (NV50_IR_MOD_ABS
| NV50_IR_MOD_SAT
);
58 return Modifier(a
| c
);
61 ValueRef::ValueRef() : value(NULL
), insn(NULL
)
68 ValueRef::ValueRef(const ValueRef
& ref
) : value(NULL
), insn(ref
.insn
)
71 usedAsPtr
= ref
.usedAsPtr
;
79 ImmediateValue
*ValueRef::getImmediate() const
84 if (src
->reg
.file
== FILE_IMMEDIATE
)
87 Instruction
*insn
= src
->getUniqueInsn();
89 src
= (insn
&& insn
->op
== OP_MOV
) ? insn
->getSrc(0) : NULL
;
94 ValueDef::ValueDef() : value(NULL
), insn(NULL
)
99 ValueDef::ValueDef(const ValueDef
& def
) : value(NULL
), insn(NULL
)
104 ValueDef::~ValueDef()
110 ValueRef::set(const ValueRef
&ref
)
112 this->set(ref
.get());
114 indirect
[0] = ref
.indirect
[0];
115 indirect
[1] = ref
.indirect
[1];
119 ValueRef::set(Value
*refVal
)
124 value
->uses
.remove(this);
126 refVal
->uses
.push_back(this);
132 ValueDef::set(Value
*defVal
)
137 value
->defs
.remove(this);
139 defVal
->defs
.push_back(this);
145 ValueDef::replace(Value
*repVal
, bool doSet
)
150 while (value
->refCount())
151 value
->uses
.front()->set(repVal
);
160 memset(®
, 0, sizeof(reg
));
165 Value::coalesce(Value
*jval
, bool force
)
167 Value
*repr
= this->join
; // new representative
168 Value
*jrep
= jval
->join
;
170 if (reg
.file
!= jval
->reg
.file
|| reg
.size
!= jval
->reg
.size
) {
173 ERROR("forced coalescing of values of different sizes/files");
176 if (!force
&& (repr
->reg
.data
.id
!= jrep
->reg
.data
.id
)) {
177 if (repr
->reg
.data
.id
>= 0 &&
178 jrep
->reg
.data
.id
>= 0)
180 if (jrep
->reg
.data
.id
>= 0) {
186 // need to check all fixed register values of the program for overlap
187 Function
*func
= defs
.front()->getInsn()->bb
->getFunction();
189 // TODO: put values in by register-id bins per function
190 ArrayList::Iterator iter
= func
->allLValues
.iterator();
191 for (; !iter
.end(); iter
.next()) {
192 Value
*fixed
= reinterpret_cast<Value
*>(iter
.get());
194 if (fixed
->reg
.data
.id
== repr
->reg
.data
.id
)
195 if (fixed
->livei
.overlaps(jrep
->livei
))
199 if (repr
->livei
.overlaps(jrep
->livei
)) {
202 // do we really want this ? if at all, only for constraint ops
203 INFO("NOTE: forced coalescing with live range overlap\n");
206 for (DefIterator it
= jrep
->defs
.begin(); it
!= jrep
->defs
.end(); ++it
)
207 (*it
)->get()->join
= repr
;
209 repr
->defs
.insert(repr
->defs
.end(),
210 jrep
->defs
.begin(), jrep
->defs
.end());
211 repr
->livei
.unify(jrep
->livei
);
213 assert(repr
->join
== repr
&& jval
->join
== repr
);
217 LValue::LValue(Function
*fn
, DataFile file
)
220 reg
.size
= (file
!= FILE_PREDICATE
) ? 4 : 1;
225 fn
->add(this, this->id
);
228 LValue::LValue(Function
*fn
, LValue
*lval
)
232 reg
.file
= lval
->reg
.file
;
233 reg
.size
= lval
->reg
.size
;
238 fn
->add(this, this->id
);
241 Value
*LValue::clone(Function
*func
) const
243 LValue
*that
= new_LValue(func
, reg
.file
);
245 that
->reg
.size
= this->reg
.size
;
246 that
->reg
.type
= this->reg
.type
;
247 that
->reg
.data
= this->reg
.data
;
252 Symbol::Symbol(Program
*prog
, DataFile f
, ubyte fidx
)
257 reg
.fileIndex
= fidx
;
260 prog
->add(this, this->id
);
264 Symbol::clone(Function
*func
) const
266 Program
*prog
= func
->getProgram();
268 Symbol
*that
= new_Symbol(prog
, reg
.file
, reg
.fileIndex
);
270 that
->reg
.size
= this->reg
.size
;
271 that
->reg
.type
= this->reg
.type
;
272 that
->reg
.data
= this->reg
.data
;
274 that
->baseSym
= this->baseSym
;
279 ImmediateValue::ImmediateValue(Program
*prog
, uint32_t uval
)
281 memset(®
, 0, sizeof(reg
));
283 reg
.file
= FILE_IMMEDIATE
;
289 prog
->add(this, this->id
);
292 ImmediateValue::ImmediateValue(Program
*prog
, float fval
)
294 memset(®
, 0, sizeof(reg
));
296 reg
.file
= FILE_IMMEDIATE
;
302 prog
->add(this, this->id
);
305 ImmediateValue::ImmediateValue(Program
*prog
, double dval
)
307 memset(®
, 0, sizeof(reg
));
309 reg
.file
= FILE_IMMEDIATE
;
315 prog
->add(this, this->id
);
318 ImmediateValue::ImmediateValue(const ImmediateValue
*proto
, DataType ty
)
323 reg
.size
= typeSizeof(ty
);
327 ImmediateValue::isInteger(const int i
) const
331 return reg
.data
.s8
== i
;
333 return reg
.data
.u8
== i
;
335 return reg
.data
.s16
== i
;
337 return reg
.data
.u16
== i
;
340 return reg
.data
.s32
== i
; // as if ...
342 return reg
.data
.f32
== static_cast<float>(i
);
344 return reg
.data
.f64
== static_cast<double>(i
);
351 ImmediateValue::isNegative() const
354 case TYPE_S8
: return reg
.data
.s8
< 0;
355 case TYPE_S16
: return reg
.data
.s16
< 0;
357 case TYPE_U32
: return reg
.data
.s32
< 0;
358 case TYPE_F32
: return reg
.data
.u32
& (1 << 31);
359 case TYPE_F64
: return reg
.data
.u64
& (1ULL << 63);
366 ImmediateValue::isPow2() const
371 case TYPE_U32
: return util_is_power_of_two(reg
.data
.u32
);
378 ImmediateValue::applyLog2()
384 assert(!this->isNegative());
389 reg
.data
.u32
= util_logbase2(reg
.data
.u32
);
392 reg
.data
.f32
= log2f(reg
.data
.f32
);
395 reg
.data
.f64
= log2(reg
.data
.f64
);
404 ImmediateValue::compare(CondCode cc
, float fval
) const
406 if (reg
.type
!= TYPE_F32
)
407 ERROR("immediate value is not of type f32");
409 switch (static_cast<CondCode
>(cc
& 7)) {
410 case CC_TR
: return true;
411 case CC_FL
: return false;
412 case CC_LT
: return reg
.data
.f32
< fval
;
413 case CC_LE
: return reg
.data
.f32
<= fval
;
414 case CC_GT
: return reg
.data
.f32
> fval
;
415 case CC_GE
: return reg
.data
.f32
>= fval
;
416 case CC_EQ
: return reg
.data
.f32
== fval
;
417 case CC_NE
: return reg
.data
.f32
!= fval
;
425 Value::interfers(const Value
*that
) const
429 if (that
->reg
.file
!= reg
.file
|| that
->reg
.fileIndex
!= reg
.fileIndex
)
435 idA
= this->join
->reg
.data
.offset
;
436 idB
= that
->join
->reg
.data
.offset
;
438 idA
= this->join
->reg
.data
.id
* this->reg
.size
;
439 idB
= that
->join
->reg
.data
.id
* that
->reg
.size
;
443 return (idA
+ this->reg
.size
> idB
);
446 return (idB
+ that
->reg
.size
> idA
);
452 Value::equals(const Value
*that
, bool strict
) const
459 if (that
->reg
.file
!= reg
.file
|| that
->reg
.fileIndex
!= reg
.fileIndex
)
461 if (that
->reg
.size
!= this->reg
.size
)
464 if (that
->reg
.data
.id
!= this->reg
.data
.id
)
471 ImmediateValue::equals(const Value
*that
, bool strict
) const
473 const ImmediateValue
*imm
= that
->asImm();
476 return reg
.data
.u64
== imm
->reg
.data
.u64
;
480 Symbol::equals(const Value
*that
, bool strict
) const
482 if (reg
.file
!= that
->reg
.file
|| reg
.fileIndex
!= that
->reg
.fileIndex
)
484 assert(that
->asSym());
486 if (this->baseSym
!= that
->asSym()->baseSym
)
489 return this->reg
.data
.offset
== that
->reg
.data
.offset
;
492 void Instruction::init()
502 join
= terminator
= 0;
519 Instruction::Instruction()
524 dType
= sType
= TYPE_F32
;
530 Instruction::Instruction(Function
*fn
, operation opr
, DataType ty
)
540 Instruction::~Instruction()
543 Function
*fn
= bb
->getFunction();
545 fn
->allInsns
.remove(id
);
548 for (int s
= 0; srcExists(s
); ++s
)
550 // must unlink defs too since the list pointers will get deallocated
551 for (int d
= 0; defExists(d
); ++d
)
556 Instruction::setDef(int i
, Value
*val
)
558 int size
= defs
.size();
562 defs
[size
++].setInsn(this);
568 Instruction::setSrc(int s
, Value
*val
)
570 int size
= srcs
.size();
574 srcs
[size
++].setInsn(this);
580 Instruction::setSrc(int s
, const ValueRef
& ref
)
582 setSrc(s
, ref
.get());
583 srcs
[s
].mod
= ref
.mod
;
587 Instruction::swapSources(int a
, int b
)
589 Value
*value
= srcs
[a
].get();
590 Modifier m
= srcs
[a
].mod
;
599 Instruction::takeExtraSources(int s
, Value
*values
[3])
601 values
[0] = getIndirect(s
, 0);
603 setIndirect(s
, 0, NULL
);
605 values
[1] = getIndirect(s
, 1);
607 setIndirect(s
, 1, NULL
);
609 values
[2] = getPredicate();
611 setPredicate(cc
, NULL
);
615 Instruction::putExtraSources(int s
, Value
*values
[3])
618 setIndirect(s
, 0, values
[0]);
620 setIndirect(s
, 1, values
[1]);
622 setPredicate(cc
, values
[2]);
626 Instruction::clone(bool deep
) const
628 Instruction
*insn
= new_Instruction(bb
->getFunction(), op
, dType
);
629 assert(!asCmp() && !asFlow());
630 cloneBase(insn
, deep
);
635 Instruction::cloneBase(Instruction
*insn
, bool deep
) const
637 insn
->sType
= this->sType
;
640 insn
->rnd
= this->rnd
;
641 insn
->cache
= this->cache
;
642 insn
->subOp
= this->subOp
;
644 insn
->saturate
= this->saturate
;
645 insn
->atomic
= this->atomic
;
646 insn
->ftz
= this->ftz
;
647 insn
->dnz
= this->dnz
;
648 insn
->ipa
= this->ipa
;
649 insn
->lanes
= this->lanes
;
650 insn
->perPatch
= this->perPatch
;
652 insn
->postFactor
= this->postFactor
;
657 Function
*fn
= bb
->getFunction();
658 for (int d
= 0; this->defExists(d
); ++d
)
659 insn
->setDef(d
, this->getDef(d
)->clone(fn
));
661 for (int d
= 0; this->defExists(d
); ++d
)
662 insn
->setDef(d
, this->getDef(d
));
665 for (int s
= 0; this->srcExists(s
); ++s
)
666 insn
->setSrc(s
, this->srcs
[s
]);
668 insn
->predSrc
= this->predSrc
;
669 insn
->flagsDef
= this->flagsDef
;
670 insn
->flagsSrc
= this->flagsSrc
;
674 Instruction::defCount(unsigned int mask
) const
678 for (n
= 0, i
= 0; this->defExists(i
); ++i
, mask
>>= 1)
684 Instruction::srcCount(unsigned int mask
) const
688 for (n
= 0, i
= 0; this->srcExists(i
); ++i
, mask
>>= 1)
694 Instruction::setIndirect(int s
, int dim
, Value
*value
)
696 assert(this->srcExists(s
));
698 int p
= srcs
[s
].indirect
[dim
];
703 while (p
> 0 && !srcExists(p
- 1))
707 srcs
[p
].usedAsPtr
= (value
!= 0);
708 srcs
[s
].indirect
[dim
] = value
? p
: -1;
713 Instruction::setPredicate(CondCode ccode
, Value
*value
)
719 srcs
[predSrc
].set(NULL
);
726 predSrc
= srcs
.size();
727 while (predSrc
> 0 && !srcExists(predSrc
- 1))
731 setSrc(predSrc
, value
);
736 Instruction::writesPredicate() const
738 for (int d
= 0; defExists(d
); ++d
)
739 if (getDef(d
)->inFile(FILE_PREDICATE
) || getDef(d
)->inFile(FILE_FLAGS
))
745 insnCheckCommutation(const Instruction
*a
, const Instruction
*b
)
747 for (int d
= 0; a
->defExists(d
); ++d
)
748 for (int s
= 0; b
->srcExists(s
); ++s
)
749 if (a
->getDef(d
)->interfers(b
->getSrc(s
)))
755 Instruction::isCommutationLegal(const Instruction
*i
) const
758 ret
= ret
&& insnCheckCommutation(this, i
);
759 ret
= ret
&& insnCheckCommutation(i
, this);
763 TexInstruction::TexInstruction(Function
*fn
, operation op
)
764 : Instruction(fn
, op
, TYPE_F32
)
766 memset(&tex
, 0, sizeof(tex
));
768 tex
.rIndirectSrc
= -1;
769 tex
.sIndirectSrc
= -1;
772 TexInstruction::~TexInstruction()
774 for (int c
= 0; c
< 3; ++c
) {
781 TexInstruction::clone(bool deep
) const
783 TexInstruction
*tex
= new_TexInstruction(bb
->getFunction(), op
);
784 cloneBase(tex
, deep
);
786 tex
->tex
= this->tex
;
789 for (unsigned int c
= 0; c
< tex
->tex
.target
.getDim(); ++c
) {
790 tex
->dPdx
[c
].set(dPdx
[c
]);
791 tex
->dPdy
[c
].set(dPdy
[c
]);
798 const struct TexInstruction::Target::Desc
TexInstruction::Target::descTable
[] =
800 { "1D", 1, 1, false, false, false },
801 { "2D", 2, 2, false, false, false },
802 { "2D_MS", 2, 2, false, false, false },
803 { "3D", 3, 3, false, false, false },
804 { "CUBE", 2, 3, false, true, false },
805 { "1D_SHADOW", 1, 1, false, false, true },
806 { "2D_SHADOW", 2, 2, false, false, true },
807 { "CUBE_SHADOW", 2, 3, false, true, true },
808 { "1D_ARRAY", 1, 2, true, false, false },
809 { "2D_ARRAY", 2, 3, true, false, false },
810 { "2D_MS_ARRAY", 2, 3, true, false, false },
811 { "CUBE_ARRAY", 2, 4, true, true, false },
812 { "1D_ARRAY_SHADOW", 1, 2, true, false, true },
813 { "2D_ARRAY_SHADOW", 2, 3, true, false, true },
814 { "RECT", 2, 2, false, false, false },
815 { "RECT_SHADOW", 2, 2, false, false, true },
816 { "CUBE_ARRAY_SHADOW", 2, 4, true, true, true },
817 { "BUFFER", 1, 1, false, false, false },
820 CmpInstruction::CmpInstruction(Function
*fn
, operation op
)
821 : Instruction(fn
, op
, TYPE_F32
)
827 CmpInstruction::clone(bool deep
) const
829 CmpInstruction
*cmp
= new_CmpInstruction(bb
->getFunction(), op
);
830 cloneBase(cmp
, deep
);
831 cmp
->setCond
= setCond
;
836 FlowInstruction::FlowInstruction(Function
*fn
, operation op
,
838 : Instruction(fn
, op
, TYPE_NONE
)
843 op
== OP_CONT
|| op
== OP_BREAK
||
844 op
== OP_RET
|| op
== OP_EXIT
)
848 terminator
= targ
? 1 : 0;
850 allWarp
= absolute
= limit
= 0;
853 Program::Program(Type type
, Target
*arch
)
856 mem_Instruction(sizeof(Instruction
), 6),
857 mem_CmpInstruction(sizeof(CmpInstruction
), 4),
858 mem_TexInstruction(sizeof(TexInstruction
), 4),
859 mem_FlowInstruction(sizeof(FlowInstruction
), 4),
860 mem_LValue(sizeof(LValue
), 8),
861 mem_Symbol(sizeof(Symbol
), 7),
862 mem_ImmediateValue(sizeof(ImmediateValue
), 7)
869 main
= new Function(this, "MAIN");
880 void Program::releaseInstruction(Instruction
*insn
)
882 // TODO: make this not suck so much
884 insn
->~Instruction();
887 mem_CmpInstruction
.release(insn
);
890 mem_TexInstruction
.release(insn
);
893 mem_FlowInstruction
.release(insn
);
895 mem_Instruction
.release(insn
);
898 void Program::releaseValue(Value
*value
)
900 if (value
->asLValue())
901 mem_LValue
.release(value
);
904 mem_ImmediateValue
.release(value
);
907 mem_Symbol
.release(value
);
911 } // namespace nv50_ir
916 nv50_ir_init_prog_info(struct nv50_ir_prog_info
*info
)
918 #if defined(PIPE_SHADER_HULL) && defined(PIPE_SHADER_DOMAIN)
919 if (info
->type
== PIPE_SHADER_HULL
|| info
->type
== PIPE_SHADER_DOMAIN
) {
920 info
->prop
.tp
.domain
= PIPE_PRIM_MAX
;
921 info
->prop
.tp
.outputPrim
= PIPE_PRIM_MAX
;
924 if (info
->type
== PIPE_SHADER_GEOMETRY
) {
925 info
->prop
.gp
.instanceCount
= 1;
926 info
->prop
.gp
.maxVertices
= 1;
928 info
->io
.clipDistance
= 0xff;
929 info
->io
.pointSize
= 0xff;
930 info
->io
.vertexId
= 0xff;
931 info
->io
.edgeFlagIn
= 0xff;
932 info
->io
.edgeFlagOut
= 0xff;
933 info
->io
.fragDepth
= 0xff;
934 info
->io
.sampleMask
= 0xff;
935 info
->io
.backFaceColor
[0] = info
->io
.backFaceColor
[1] = 0xff;
939 nv50_ir_generate_code(struct nv50_ir_prog_info
*info
)
943 nv50_ir::Program::Type type
;
945 nv50_ir_init_prog_info(info
);
947 #define PROG_TYPE_CASE(a, b) \
948 case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break
950 switch (info
->type
) {
951 PROG_TYPE_CASE(VERTEX
, VERTEX
);
952 // PROG_TYPE_CASE(HULL, TESSELLATION_CONTROL);
953 // PROG_TYPE_CASE(DOMAIN, TESSELLATION_EVAL);
954 PROG_TYPE_CASE(GEOMETRY
, GEOMETRY
);
955 PROG_TYPE_CASE(FRAGMENT
, FRAGMENT
);
957 type
= nv50_ir::Program::TYPE_COMPUTE
;
960 INFO_DBG(info
->dbgFlags
, VERBOSE
, "translating program of type %u\n", type
);
962 nv50_ir::Target
*targ
= nv50_ir::Target::create(info
->target
);
966 nv50_ir::Program
*prog
= new nv50_ir::Program(type
, targ
);
969 prog
->dbgFlags
= info
->dbgFlags
;
971 switch (info
->bin
.sourceRep
) {
977 ret
= prog
->makeFromSM4(info
) ? 0 : -2;
982 ret
= prog
->makeFromTGSI(info
) ? 0 : -2;
987 if (prog
->dbgFlags
& NV50_IR_DEBUG_VERBOSE
)
990 prog
->getTarget()->runLegalizePass(prog
, nv50_ir::CG_STAGE_PRE_SSA
);
992 prog
->convertToSSA();
994 if (prog
->dbgFlags
& NV50_IR_DEBUG_VERBOSE
)
997 prog
->optimizeSSA(info
->optLevel
);
998 prog
->getTarget()->runLegalizePass(prog
, nv50_ir::CG_STAGE_SSA
);
1000 if (prog
->dbgFlags
& NV50_IR_DEBUG_BASIC
)
1003 if (!prog
->registerAllocation()) {
1007 prog
->getTarget()->runLegalizePass(prog
, nv50_ir::CG_STAGE_POST_RA
);
1009 prog
->optimizePostRA(info
->optLevel
);
1011 if (!prog
->emitBinary(info
)) {
1017 INFO_DBG(prog
->dbgFlags
, VERBOSE
, "nv50_ir_generate_code: ret = %i\n", ret
);
1019 info
->bin
.maxGPR
= prog
->maxGPR
;
1020 info
->bin
.code
= prog
->code
;
1021 info
->bin
.codeSize
= prog
->binSize
;
1024 nv50_ir::Target::destroy(targ
);