8e7fc31eced5c34f423f333fccd035c13b5e290b
2 * Copyright 2011 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 #include "nv50_ir_target.h"
25 #include "nv50_ir_driver.h"
28 #include "nv50/nv50_program.h"
29 #include "nv50/nv50_debug.h"
34 Modifier::Modifier(operation op
)
37 case OP_NEG
: bits
= NV50_IR_MOD_NEG
; break;
38 case OP_ABS
: bits
= NV50_IR_MOD_ABS
; break;
39 case OP_SAT
: bits
= NV50_IR_MOD_SAT
; break;
40 case OP_NOT
: bits
= NV50_IR_MOD_NOT
; break;
47 Modifier
Modifier::operator*(const Modifier m
) const
52 if (this->bits
& NV50_IR_MOD_ABS
)
53 b
&= ~NV50_IR_MOD_NEG
;
55 a
= (this->bits
^ b
) & (NV50_IR_MOD_NOT
| NV50_IR_MOD_NEG
);
56 c
= (this->bits
| m
.bits
) & (NV50_IR_MOD_ABS
| NV50_IR_MOD_SAT
);
58 return Modifier(a
| c
);
61 ValueRef::ValueRef() : value(NULL
), insn(NULL
)
68 ValueRef::ValueRef(const ValueRef
& ref
) : value(NULL
), insn(ref
.insn
)
71 usedAsPtr
= ref
.usedAsPtr
;
79 ImmediateValue
*ValueRef::getImmediate() const
84 if (src
->reg
.file
== FILE_IMMEDIATE
)
87 Instruction
*insn
= src
->getUniqueInsn();
89 src
= (insn
&& insn
->op
== OP_MOV
) ? insn
->getSrc(0) : NULL
;
94 ValueDef::ValueDef() : value(NULL
), insn(NULL
)
99 ValueDef::ValueDef(const ValueDef
& def
) : value(NULL
), insn(NULL
)
104 ValueDef::~ValueDef()
110 ValueRef::set(const ValueRef
&ref
)
112 this->set(ref
.get());
114 indirect
[0] = ref
.indirect
[0];
115 indirect
[1] = ref
.indirect
[1];
119 ValueRef::set(Value
*refVal
)
124 value
->uses
.remove(this);
126 refVal
->uses
.push_back(this);
132 ValueDef::set(Value
*defVal
)
137 value
->defs
.remove(this);
139 defVal
->defs
.push_back(this);
145 ValueDef::replace(Value
*repVal
, bool doSet
)
150 while (value
->refCount())
151 value
->uses
.front()->set(repVal
);
160 memset(®
, 0, sizeof(reg
));
165 Value::coalesce(Value
*jval
, bool force
)
167 Value
*repr
= this->join
; // new representative
168 Value
*jrep
= jval
->join
;
170 if (reg
.file
!= jval
->reg
.file
|| reg
.size
!= jval
->reg
.size
) {
173 ERROR("forced coalescing of values of different sizes/files");
176 if (!force
&& (repr
->reg
.data
.id
!= jrep
->reg
.data
.id
)) {
177 if (repr
->reg
.data
.id
>= 0 &&
178 jrep
->reg
.data
.id
>= 0)
180 if (jrep
->reg
.data
.id
>= 0) {
186 // need to check all fixed register values of the program for overlap
187 Function
*func
= defs
.front()->getInsn()->bb
->getFunction();
189 // TODO: put values in by register-id bins per function
190 ArrayList::Iterator iter
= func
->allLValues
.iterator();
191 for (; !iter
.end(); iter
.next()) {
192 Value
*fixed
= reinterpret_cast<Value
*>(iter
.get());
194 if (fixed
->reg
.data
.id
== repr
->reg
.data
.id
)
195 if (fixed
->livei
.overlaps(jrep
->livei
))
199 if (repr
->livei
.overlaps(jrep
->livei
)) {
202 // do we really want this ? if at all, only for constraint ops
203 INFO("NOTE: forced coalescing with live range overlap\n");
206 for (DefIterator it
= jrep
->defs
.begin(); it
!= jrep
->defs
.end(); ++it
)
207 (*it
)->get()->join
= repr
;
209 repr
->defs
.insert(repr
->defs
.end(),
210 jrep
->defs
.begin(), jrep
->defs
.end());
211 repr
->livei
.unify(jrep
->livei
);
213 assert(repr
->join
== repr
&& jval
->join
== repr
);
217 LValue::LValue(Function
*fn
, DataFile file
)
220 reg
.size
= (file
!= FILE_PREDICATE
) ? 4 : 1;
225 fn
->add(this, this->id
);
228 LValue::LValue(Function
*fn
, LValue
*lval
)
232 reg
.file
= lval
->reg
.file
;
233 reg
.size
= lval
->reg
.size
;
238 fn
->add(this, this->id
);
242 LValue::clone(ClonePolicy
<Function
>& pol
) const
244 LValue
*that
= new_LValue(pol
.context(), reg
.file
);
246 pol
.set
<Value
>(this, that
);
248 that
->reg
.size
= this->reg
.size
;
249 that
->reg
.type
= this->reg
.type
;
250 that
->reg
.data
= this->reg
.data
;
255 Symbol::Symbol(Program
*prog
, DataFile f
, ubyte fidx
)
260 reg
.fileIndex
= fidx
;
263 prog
->add(this, this->id
);
267 Symbol::clone(ClonePolicy
<Function
>& pol
) const
269 Program
*prog
= pol
.context()->getProgram();
271 Symbol
*that
= new_Symbol(prog
, reg
.file
, reg
.fileIndex
);
273 pol
.set
<Value
>(this, that
);
275 that
->reg
.size
= this->reg
.size
;
276 that
->reg
.type
= this->reg
.type
;
277 that
->reg
.data
= this->reg
.data
;
279 that
->baseSym
= this->baseSym
;
284 ImmediateValue::ImmediateValue(Program
*prog
, uint32_t uval
)
286 memset(®
, 0, sizeof(reg
));
288 reg
.file
= FILE_IMMEDIATE
;
294 prog
->add(this, this->id
);
297 ImmediateValue::ImmediateValue(Program
*prog
, float fval
)
299 memset(®
, 0, sizeof(reg
));
301 reg
.file
= FILE_IMMEDIATE
;
307 prog
->add(this, this->id
);
310 ImmediateValue::ImmediateValue(Program
*prog
, double dval
)
312 memset(®
, 0, sizeof(reg
));
314 reg
.file
= FILE_IMMEDIATE
;
320 prog
->add(this, this->id
);
323 ImmediateValue::ImmediateValue(const ImmediateValue
*proto
, DataType ty
)
328 reg
.size
= typeSizeof(ty
);
332 ImmediateValue::clone(ClonePolicy
<Function
>& pol
) const
334 Program
*prog
= pol
.context()->getProgram();
335 ImmediateValue
*that
= new_ImmediateValue(prog
, 0u);
337 pol
.set
<Value
>(this, that
);
339 that
->reg
.size
= this->reg
.size
;
340 that
->reg
.type
= this->reg
.type
;
341 that
->reg
.data
= this->reg
.data
;
347 ImmediateValue::isInteger(const int i
) const
351 return reg
.data
.s8
== i
;
353 return reg
.data
.u8
== i
;
355 return reg
.data
.s16
== i
;
357 return reg
.data
.u16
== i
;
360 return reg
.data
.s32
== i
; // as if ...
362 return reg
.data
.f32
== static_cast<float>(i
);
364 return reg
.data
.f64
== static_cast<double>(i
);
371 ImmediateValue::isNegative() const
374 case TYPE_S8
: return reg
.data
.s8
< 0;
375 case TYPE_S16
: return reg
.data
.s16
< 0;
377 case TYPE_U32
: return reg
.data
.s32
< 0;
378 case TYPE_F32
: return reg
.data
.u32
& (1 << 31);
379 case TYPE_F64
: return reg
.data
.u64
& (1ULL << 63);
386 ImmediateValue::isPow2() const
391 case TYPE_U32
: return util_is_power_of_two(reg
.data
.u32
);
398 ImmediateValue::applyLog2()
404 assert(!this->isNegative());
409 reg
.data
.u32
= util_logbase2(reg
.data
.u32
);
412 reg
.data
.f32
= log2f(reg
.data
.f32
);
415 reg
.data
.f64
= log2(reg
.data
.f64
);
424 ImmediateValue::compare(CondCode cc
, float fval
) const
426 if (reg
.type
!= TYPE_F32
)
427 ERROR("immediate value is not of type f32");
429 switch (static_cast<CondCode
>(cc
& 7)) {
430 case CC_TR
: return true;
431 case CC_FL
: return false;
432 case CC_LT
: return reg
.data
.f32
< fval
;
433 case CC_LE
: return reg
.data
.f32
<= fval
;
434 case CC_GT
: return reg
.data
.f32
> fval
;
435 case CC_GE
: return reg
.data
.f32
>= fval
;
436 case CC_EQ
: return reg
.data
.f32
== fval
;
437 case CC_NE
: return reg
.data
.f32
!= fval
;
445 Value::interfers(const Value
*that
) const
449 if (that
->reg
.file
!= reg
.file
|| that
->reg
.fileIndex
!= reg
.fileIndex
)
455 idA
= this->join
->reg
.data
.offset
;
456 idB
= that
->join
->reg
.data
.offset
;
458 idA
= this->join
->reg
.data
.id
* this->reg
.size
;
459 idB
= that
->join
->reg
.data
.id
* that
->reg
.size
;
463 return (idA
+ this->reg
.size
> idB
);
466 return (idB
+ that
->reg
.size
> idA
);
472 Value::equals(const Value
*that
, bool strict
) const
479 if (that
->reg
.file
!= reg
.file
|| that
->reg
.fileIndex
!= reg
.fileIndex
)
481 if (that
->reg
.size
!= this->reg
.size
)
484 if (that
->reg
.data
.id
!= this->reg
.data
.id
)
491 ImmediateValue::equals(const Value
*that
, bool strict
) const
493 const ImmediateValue
*imm
= that
->asImm();
496 return reg
.data
.u64
== imm
->reg
.data
.u64
;
500 Symbol::equals(const Value
*that
, bool strict
) const
502 if (reg
.file
!= that
->reg
.file
|| reg
.fileIndex
!= that
->reg
.fileIndex
)
504 assert(that
->asSym());
506 if (this->baseSym
!= that
->asSym()->baseSym
)
509 return this->reg
.data
.offset
== that
->reg
.data
.offset
;
512 void Instruction::init()
522 join
= terminator
= 0;
539 Instruction::Instruction()
544 dType
= sType
= TYPE_F32
;
550 Instruction::Instruction(Function
*fn
, operation opr
, DataType ty
)
560 Instruction::~Instruction()
563 Function
*fn
= bb
->getFunction();
565 fn
->allInsns
.remove(id
);
568 for (int s
= 0; srcExists(s
); ++s
)
570 // must unlink defs too since the list pointers will get deallocated
571 for (int d
= 0; defExists(d
); ++d
)
576 Instruction::setDef(int i
, Value
*val
)
578 int size
= defs
.size();
582 defs
[size
++].setInsn(this);
588 Instruction::setSrc(int s
, Value
*val
)
590 int size
= srcs
.size();
594 srcs
[size
++].setInsn(this);
600 Instruction::setSrc(int s
, const ValueRef
& ref
)
602 setSrc(s
, ref
.get());
603 srcs
[s
].mod
= ref
.mod
;
607 Instruction::swapSources(int a
, int b
)
609 Value
*value
= srcs
[a
].get();
610 Modifier m
= srcs
[a
].mod
;
619 Instruction::takeExtraSources(int s
, Value
*values
[3])
621 values
[0] = getIndirect(s
, 0);
623 setIndirect(s
, 0, NULL
);
625 values
[1] = getIndirect(s
, 1);
627 setIndirect(s
, 1, NULL
);
629 values
[2] = getPredicate();
631 setPredicate(cc
, NULL
);
635 Instruction::putExtraSources(int s
, Value
*values
[3])
638 setIndirect(s
, 0, values
[0]);
640 setIndirect(s
, 1, values
[1]);
642 setPredicate(cc
, values
[2]);
646 Instruction::clone(ClonePolicy
<Function
>& pol
, Instruction
*i
) const
649 i
= new_Instruction(pol
.context(), op
, dType
);
650 assert(typeid(*i
) == typeid(*this));
652 pol
.set
<Instruction
>(this, i
);
660 i
->saturate
= saturate
;
668 i
->perPatch
= perPatch
;
670 i
->postFactor
= postFactor
;
672 for (int d
= 0; defExists(d
); ++d
)
673 i
->setDef(d
, pol
.get(getDef(d
)));
675 for (int s
= 0; srcExists(s
); ++s
) {
676 i
->setSrc(s
, pol
.get(getSrc(s
)));
677 i
->src(s
).mod
= src(s
).mod
;
681 i
->predSrc
= predSrc
;
682 i
->flagsDef
= flagsDef
;
683 i
->flagsSrc
= flagsSrc
;
689 Instruction::defCount(unsigned int mask
) const
693 for (n
= 0, i
= 0; this->defExists(i
); ++i
, mask
>>= 1)
699 Instruction::srcCount(unsigned int mask
) const
703 for (n
= 0, i
= 0; this->srcExists(i
); ++i
, mask
>>= 1)
709 Instruction::setIndirect(int s
, int dim
, Value
*value
)
711 assert(this->srcExists(s
));
713 int p
= srcs
[s
].indirect
[dim
];
718 while (p
> 0 && !srcExists(p
- 1))
722 srcs
[p
].usedAsPtr
= (value
!= 0);
723 srcs
[s
].indirect
[dim
] = value
? p
: -1;
728 Instruction::setPredicate(CondCode ccode
, Value
*value
)
734 srcs
[predSrc
].set(NULL
);
741 predSrc
= srcs
.size();
742 while (predSrc
> 0 && !srcExists(predSrc
- 1))
746 setSrc(predSrc
, value
);
751 Instruction::writesPredicate() const
753 for (int d
= 0; defExists(d
); ++d
)
754 if (getDef(d
)->inFile(FILE_PREDICATE
) || getDef(d
)->inFile(FILE_FLAGS
))
760 insnCheckCommutation(const Instruction
*a
, const Instruction
*b
)
762 for (int d
= 0; a
->defExists(d
); ++d
)
763 for (int s
= 0; b
->srcExists(s
); ++s
)
764 if (a
->getDef(d
)->interfers(b
->getSrc(s
)))
770 Instruction::isCommutationLegal(const Instruction
*i
) const
773 ret
= ret
&& insnCheckCommutation(this, i
);
774 ret
= ret
&& insnCheckCommutation(i
, this);
778 TexInstruction::TexInstruction(Function
*fn
, operation op
)
779 : Instruction(fn
, op
, TYPE_F32
)
781 memset(&tex
, 0, sizeof(tex
));
783 tex
.rIndirectSrc
= -1;
784 tex
.sIndirectSrc
= -1;
787 TexInstruction::~TexInstruction()
789 for (int c
= 0; c
< 3; ++c
) {
796 TexInstruction::clone(ClonePolicy
<Function
>& pol
, Instruction
*i
) const
798 TexInstruction
*tex
= (i
? static_cast<TexInstruction
*>(i
) :
799 new_TexInstruction(pol
.context(), op
));
801 Instruction::clone(pol
, tex
);
803 tex
->tex
= this->tex
;
806 for (unsigned int c
= 0; c
< tex
->tex
.target
.getDim(); ++c
) {
807 tex
->dPdx
[c
].set(dPdx
[c
]);
808 tex
->dPdy
[c
].set(dPdy
[c
]);
815 const struct TexInstruction::Target::Desc
TexInstruction::Target::descTable
[] =
817 { "1D", 1, 1, false, false, false },
818 { "2D", 2, 2, false, false, false },
819 { "2D_MS", 2, 2, false, false, false },
820 { "3D", 3, 3, false, false, false },
821 { "CUBE", 2, 3, false, true, false },
822 { "1D_SHADOW", 1, 1, false, false, true },
823 { "2D_SHADOW", 2, 2, false, false, true },
824 { "CUBE_SHADOW", 2, 3, false, true, true },
825 { "1D_ARRAY", 1, 2, true, false, false },
826 { "2D_ARRAY", 2, 3, true, false, false },
827 { "2D_MS_ARRAY", 2, 3, true, false, false },
828 { "CUBE_ARRAY", 2, 4, true, true, false },
829 { "1D_ARRAY_SHADOW", 1, 2, true, false, true },
830 { "2D_ARRAY_SHADOW", 2, 3, true, false, true },
831 { "RECT", 2, 2, false, false, false },
832 { "RECT_SHADOW", 2, 2, false, false, true },
833 { "CUBE_ARRAY_SHADOW", 2, 4, true, true, true },
834 { "BUFFER", 1, 1, false, false, false },
837 CmpInstruction::CmpInstruction(Function
*fn
, operation op
)
838 : Instruction(fn
, op
, TYPE_F32
)
844 CmpInstruction::clone(ClonePolicy
<Function
>& pol
, Instruction
*i
) const
846 CmpInstruction
*cmp
= (i
? static_cast<CmpInstruction
*>(i
) :
847 new_CmpInstruction(pol
.context(), op
));
849 Instruction::clone(pol
, cmp
);
850 cmp
->setCond
= setCond
;
854 FlowInstruction::FlowInstruction(Function
*fn
, operation op
,
856 : Instruction(fn
, op
, TYPE_NONE
)
861 op
== OP_CONT
|| op
== OP_BREAK
||
862 op
== OP_RET
|| op
== OP_EXIT
)
866 terminator
= targ
? 1 : 0;
868 allWarp
= absolute
= limit
= 0;
872 FlowInstruction::clone(ClonePolicy
<Function
>& pol
, Instruction
*i
) const
874 FlowInstruction
*flow
= (i
? static_cast<FlowInstruction
*>(i
) :
875 new_FlowInstruction(pol
.context(), op
, NULL
));
877 Instruction::clone(pol
, flow
);
878 flow
->allWarp
= allWarp
;
879 flow
->absolute
= absolute
;
881 flow
->builtin
= builtin
;
884 flow
->target
.builtin
= target
.builtin
;
887 flow
->target
.fn
= target
.fn
;
890 flow
->target
.bb
= pol
.get
<BasicBlock
>(target
.bb
);
895 Program::Program(Type type
, Target
*arch
)
898 mem_Instruction(sizeof(Instruction
), 6),
899 mem_CmpInstruction(sizeof(CmpInstruction
), 4),
900 mem_TexInstruction(sizeof(TexInstruction
), 4),
901 mem_FlowInstruction(sizeof(FlowInstruction
), 4),
902 mem_LValue(sizeof(LValue
), 8),
903 mem_Symbol(sizeof(Symbol
), 7),
904 mem_ImmediateValue(sizeof(ImmediateValue
), 7)
911 main
= new Function(this, "MAIN");
918 for (ArrayList::Iterator it
= allFuncs
.iterator(); !it
.end(); it
.next())
919 delete reinterpret_cast<Function
*>(it
.get());
921 for (ArrayList::Iterator it
= allRValues
.iterator(); !it
.end(); it
.next())
922 releaseValue(reinterpret_cast<Value
*>(it
.get()));
925 void Program::releaseInstruction(Instruction
*insn
)
927 // TODO: make this not suck so much
929 insn
->~Instruction();
932 mem_CmpInstruction
.release(insn
);
935 mem_TexInstruction
.release(insn
);
938 mem_FlowInstruction
.release(insn
);
940 mem_Instruction
.release(insn
);
943 void Program::releaseValue(Value
*value
)
947 if (value
->asLValue())
948 mem_LValue
.release(value
);
951 mem_ImmediateValue
.release(value
);
954 mem_Symbol
.release(value
);
958 } // namespace nv50_ir
963 nv50_ir_init_prog_info(struct nv50_ir_prog_info
*info
)
965 #if defined(PIPE_SHADER_HULL) && defined(PIPE_SHADER_DOMAIN)
966 if (info
->type
== PIPE_SHADER_HULL
|| info
->type
== PIPE_SHADER_DOMAIN
) {
967 info
->prop
.tp
.domain
= PIPE_PRIM_MAX
;
968 info
->prop
.tp
.outputPrim
= PIPE_PRIM_MAX
;
971 if (info
->type
== PIPE_SHADER_GEOMETRY
) {
972 info
->prop
.gp
.instanceCount
= 1;
973 info
->prop
.gp
.maxVertices
= 1;
975 info
->io
.clipDistance
= 0xff;
976 info
->io
.pointSize
= 0xff;
977 info
->io
.vertexId
= 0xff;
978 info
->io
.edgeFlagIn
= 0xff;
979 info
->io
.edgeFlagOut
= 0xff;
980 info
->io
.fragDepth
= 0xff;
981 info
->io
.sampleMask
= 0xff;
982 info
->io
.backFaceColor
[0] = info
->io
.backFaceColor
[1] = 0xff;
986 nv50_ir_generate_code(struct nv50_ir_prog_info
*info
)
990 nv50_ir::Program::Type type
;
992 nv50_ir_init_prog_info(info
);
994 #define PROG_TYPE_CASE(a, b) \
995 case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break
997 switch (info
->type
) {
998 PROG_TYPE_CASE(VERTEX
, VERTEX
);
999 // PROG_TYPE_CASE(HULL, TESSELLATION_CONTROL);
1000 // PROG_TYPE_CASE(DOMAIN, TESSELLATION_EVAL);
1001 PROG_TYPE_CASE(GEOMETRY
, GEOMETRY
);
1002 PROG_TYPE_CASE(FRAGMENT
, FRAGMENT
);
1004 type
= nv50_ir::Program::TYPE_COMPUTE
;
1007 INFO_DBG(info
->dbgFlags
, VERBOSE
, "translating program of type %u\n", type
);
1009 nv50_ir::Target
*targ
= nv50_ir::Target::create(info
->target
);
1013 nv50_ir::Program
*prog
= new nv50_ir::Program(type
, targ
);
1016 prog
->dbgFlags
= info
->dbgFlags
;
1018 switch (info
->bin
.sourceRep
) {
1024 ret
= prog
->makeFromSM4(info
) ? 0 : -2;
1029 ret
= prog
->makeFromTGSI(info
) ? 0 : -2;
1034 if (prog
->dbgFlags
& NV50_IR_DEBUG_VERBOSE
)
1037 prog
->getTarget()->runLegalizePass(prog
, nv50_ir::CG_STAGE_PRE_SSA
);
1039 prog
->convertToSSA();
1041 if (prog
->dbgFlags
& NV50_IR_DEBUG_VERBOSE
)
1044 prog
->optimizeSSA(info
->optLevel
);
1045 prog
->getTarget()->runLegalizePass(prog
, nv50_ir::CG_STAGE_SSA
);
1047 if (prog
->dbgFlags
& NV50_IR_DEBUG_BASIC
)
1050 if (!prog
->registerAllocation()) {
1054 prog
->getTarget()->runLegalizePass(prog
, nv50_ir::CG_STAGE_POST_RA
);
1056 prog
->optimizePostRA(info
->optLevel
);
1058 if (!prog
->emitBinary(info
)) {
1064 INFO_DBG(prog
->dbgFlags
, VERBOSE
, "nv50_ir_generate_code: ret = %i\n", ret
);
1066 info
->bin
.maxGPR
= prog
->maxGPR
;
1067 info
->bin
.code
= prog
->code
;
1068 info
->bin
.codeSize
= prog
->binSize
;
1071 nv50_ir::Target::destroy(targ
);