2 * Copyright 2011 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include "nv50_ir_target.h"
25 #include "nv50_ir_build_util.h"
28 #include "util/u_math.h"
34 Instruction::isNop() const
36 if (op
== OP_PHI
|| op
== OP_SPLIT
|| op
== OP_MERGE
|| op
== OP_CONSTRAINT
)
38 if (terminator
|| join
) // XXX: should terminator imply flow ?
42 if (!fixed
&& op
== OP_NOP
)
45 if (defExists(0) && def(0).rep()->reg
.data
.id
< 0) {
46 for (int d
= 1; defExists(d
); ++d
)
47 if (def(d
).rep()->reg
.data
.id
>= 0)
48 WARN("part of vector result is unused !\n");
52 if (op
== OP_MOV
|| op
== OP_UNION
) {
53 if (!getDef(0)->equals(getSrc(0)))
56 if (!def(0).rep()->equals(getSrc(1)))
64 bool Instruction::isDead() const
69 op
== OP_SUSTB
|| op
== OP_SUSTP
|| op
== OP_SUREDP
|| op
== OP_SUREDB
||
73 for (int d
= 0; defExists(d
); ++d
)
74 if (getDef(d
)->refCount() || getDef(d
)->reg
.data
.id
>= 0)
77 if (terminator
|| asFlow())
85 // =============================================================================
87 class CopyPropagation
: public Pass
90 virtual bool visit(BasicBlock
*);
93 // Propagate all MOVs forward to make subsequent optimization easier, except if
94 // the sources stem from a phi, in which case we don't want to mess up potential
95 // swaps $rX <-> $rY, i.e. do not create live range overlaps of phi src and def.
97 CopyPropagation::visit(BasicBlock
*bb
)
99 Instruction
*mov
, *si
, *next
;
101 for (mov
= bb
->getEntry(); mov
; mov
= next
) {
103 if (mov
->op
!= OP_MOV
|| mov
->fixed
|| !mov
->getSrc(0)->asLValue())
105 if (mov
->getPredicate())
107 if (mov
->def(0).getFile() != mov
->src(0).getFile())
109 si
= mov
->getSrc(0)->getInsn();
110 if (mov
->getDef(0)->reg
.data
.id
< 0 && si
&& si
->op
!= OP_PHI
) {
112 mov
->def(0).replace(mov
->getSrc(0), false);
113 delete_Instruction(prog
, mov
);
119 // =============================================================================
121 class LoadPropagation
: public Pass
124 virtual bool visit(BasicBlock
*);
126 void checkSwapSrc01(Instruction
*);
128 bool isCSpaceLoad(Instruction
*);
129 bool isImmd32Load(Instruction
*);
130 bool isAttribOrSharedLoad(Instruction
*);
134 LoadPropagation::isCSpaceLoad(Instruction
*ld
)
136 return ld
&& ld
->op
== OP_LOAD
&& ld
->src(0).getFile() == FILE_MEMORY_CONST
;
140 LoadPropagation::isImmd32Load(Instruction
*ld
)
142 if (!ld
|| (ld
->op
!= OP_MOV
) || (typeSizeof(ld
->dType
) != 4))
144 return ld
->src(0).getFile() == FILE_IMMEDIATE
;
148 LoadPropagation::isAttribOrSharedLoad(Instruction
*ld
)
151 (ld
->op
== OP_VFETCH
||
152 (ld
->op
== OP_LOAD
&&
153 (ld
->src(0).getFile() == FILE_SHADER_INPUT
||
154 ld
->src(0).getFile() == FILE_MEMORY_SHARED
)));
158 LoadPropagation::checkSwapSrc01(Instruction
*insn
)
160 if (!prog
->getTarget()->getOpInfo(insn
).commutative
)
161 if (insn
->op
!= OP_SET
&& insn
->op
!= OP_SLCT
)
163 if (insn
->src(1).getFile() != FILE_GPR
)
166 Instruction
*i0
= insn
->getSrc(0)->getInsn();
167 Instruction
*i1
= insn
->getSrc(1)->getInsn();
169 if (isCSpaceLoad(i0
)) {
170 if (!isCSpaceLoad(i1
))
171 insn
->swapSources(0, 1);
175 if (isImmd32Load(i0
)) {
176 if (!isCSpaceLoad(i1
) && !isImmd32Load(i1
))
177 insn
->swapSources(0, 1);
181 if (isAttribOrSharedLoad(i1
)) {
182 if (!isAttribOrSharedLoad(i0
))
183 insn
->swapSources(0, 1);
190 if (insn
->op
== OP_SET
)
191 insn
->asCmp()->setCond
= reverseCondCode(insn
->asCmp()->setCond
);
193 if (insn
->op
== OP_SLCT
)
194 insn
->asCmp()->setCond
= inverseCondCode(insn
->asCmp()->setCond
);
198 LoadPropagation::visit(BasicBlock
*bb
)
200 const Target
*targ
= prog
->getTarget();
203 for (Instruction
*i
= bb
->getEntry(); i
; i
= next
) {
206 if (i
->op
== OP_CALL
) // calls have args as sources, they must be in regs
212 for (int s
= 0; i
->srcExists(s
); ++s
) {
213 Instruction
*ld
= i
->getSrc(s
)->getInsn();
215 if (!ld
|| ld
->fixed
|| (ld
->op
!= OP_LOAD
&& ld
->op
!= OP_MOV
))
217 if (!targ
->insnCanLoad(i
, s
, ld
))
221 i
->setSrc(s
, ld
->getSrc(0));
222 if (ld
->src(0).isIndirect(0))
223 i
->setIndirect(s
, 0, ld
->getIndirect(0, 0));
225 if (ld
->getDef(0)->refCount() == 0)
226 delete_Instruction(prog
, ld
);
232 // =============================================================================
234 // Evaluate constant expressions.
235 class ConstantFolding
: public Pass
238 bool foldAll(Program
*);
241 virtual bool visit(BasicBlock
*);
243 void expr(Instruction
*, ImmediateValue
&, ImmediateValue
&);
244 void opnd(Instruction
*, ImmediateValue
&, int s
);
246 void unary(Instruction
*, const ImmediateValue
&);
248 void tryCollapseChainedMULs(Instruction
*, const int s
, ImmediateValue
&);
250 // TGSI 'true' is converted to -1 by F2I(NEG(SET)), track back to SET
251 CmpInstruction
*findOriginForTestWithZero(Value
*);
253 unsigned int foldCount
;
258 // TODO: remember generated immediates and only revisit these
260 ConstantFolding::foldAll(Program
*prog
)
262 unsigned int iterCount
= 0;
267 } while (foldCount
&& ++iterCount
< 2);
272 ConstantFolding::visit(BasicBlock
*bb
)
274 Instruction
*i
, *next
;
276 for (i
= bb
->getEntry(); i
; i
= next
) {
278 if (i
->op
== OP_MOV
|| i
->op
== OP_CALL
)
281 ImmediateValue src0
, src1
;
283 if (i
->srcExists(1) &&
284 i
->src(0).getImmediate(src0
) && i
->src(1).getImmediate(src1
))
287 if (i
->srcExists(0) && i
->src(0).getImmediate(src0
))
290 if (i
->srcExists(1) && i
->src(1).getImmediate(src1
))
297 ConstantFolding::findOriginForTestWithZero(Value
*value
)
301 Instruction
*insn
= value
->getInsn();
303 while (insn
&& insn
->op
!= OP_SET
) {
304 Instruction
*next
= NULL
;
309 next
= insn
->getSrc(0)->getInsn();
310 if (insn
->sType
!= next
->dType
)
314 next
= insn
->getSrc(0)->getInsn();
321 return insn
? insn
->asCmp() : NULL
;
325 Modifier::applyTo(ImmediateValue
& imm
) const
327 if (!bits
) // avoid failure if imm.reg.type is unhandled (e.g. b128)
329 switch (imm
.reg
.type
) {
331 if (bits
& NV50_IR_MOD_ABS
)
332 imm
.reg
.data
.f32
= fabsf(imm
.reg
.data
.f32
);
333 if (bits
& NV50_IR_MOD_NEG
)
334 imm
.reg
.data
.f32
= -imm
.reg
.data
.f32
;
335 if (bits
& NV50_IR_MOD_SAT
) {
336 if (imm
.reg
.data
.f32
< 0.0f
)
337 imm
.reg
.data
.f32
= 0.0f
;
339 if (imm
.reg
.data
.f32
> 1.0f
)
340 imm
.reg
.data
.f32
= 1.0f
;
342 assert(!(bits
& NV50_IR_MOD_NOT
));
345 case TYPE_S8
: // NOTE: will be extended
348 case TYPE_U8
: // NOTE: treated as signed
351 if (bits
& NV50_IR_MOD_ABS
)
352 imm
.reg
.data
.s32
= (imm
.reg
.data
.s32
>= 0) ?
353 imm
.reg
.data
.s32
: -imm
.reg
.data
.s32
;
354 if (bits
& NV50_IR_MOD_NEG
)
355 imm
.reg
.data
.s32
= -imm
.reg
.data
.s32
;
356 if (bits
& NV50_IR_MOD_NOT
)
357 imm
.reg
.data
.s32
= ~imm
.reg
.data
.s32
;
361 if (bits
& NV50_IR_MOD_ABS
)
362 imm
.reg
.data
.f64
= fabs(imm
.reg
.data
.f64
);
363 if (bits
& NV50_IR_MOD_NEG
)
364 imm
.reg
.data
.f64
= -imm
.reg
.data
.f64
;
365 if (bits
& NV50_IR_MOD_SAT
) {
366 if (imm
.reg
.data
.f64
< 0.0)
367 imm
.reg
.data
.f64
= 0.0;
369 if (imm
.reg
.data
.f64
> 1.0)
370 imm
.reg
.data
.f64
= 1.0;
372 assert(!(bits
& NV50_IR_MOD_NOT
));
376 assert(!"invalid/unhandled type");
377 imm
.reg
.data
.u64
= 0;
383 Modifier::getOp() const
386 case NV50_IR_MOD_ABS
: return OP_ABS
;
387 case NV50_IR_MOD_NEG
: return OP_NEG
;
388 case NV50_IR_MOD_SAT
: return OP_SAT
;
389 case NV50_IR_MOD_NOT
: return OP_NOT
;
398 ConstantFolding::expr(Instruction
*i
,
399 ImmediateValue
&imm0
, ImmediateValue
&imm1
)
401 struct Storage
*const a
= &imm0
.reg
, *const b
= &imm1
.reg
;
404 memset(&res
.data
, 0, sizeof(res
.data
));
410 if (i
->dnz
&& i
->dType
== TYPE_F32
) {
411 if (!isfinite(a
->data
.f32
))
413 if (!isfinite(b
->data
.f32
))
417 case TYPE_F32
: res
.data
.f32
= a
->data
.f32
* b
->data
.f32
; break;
418 case TYPE_F64
: res
.data
.f64
= a
->data
.f64
* b
->data
.f64
; break;
420 case TYPE_U32
: res
.data
.u32
= a
->data
.u32
* b
->data
.u32
; break;
426 if (b
->data
.u32
== 0)
429 case TYPE_F32
: res
.data
.f32
= a
->data
.f32
/ b
->data
.f32
; break;
430 case TYPE_F64
: res
.data
.f64
= a
->data
.f64
/ b
->data
.f64
; break;
431 case TYPE_S32
: res
.data
.s32
= a
->data
.s32
/ b
->data
.s32
; break;
432 case TYPE_U32
: res
.data
.u32
= a
->data
.u32
/ b
->data
.u32
; break;
439 case TYPE_F32
: res
.data
.f32
= a
->data
.f32
+ b
->data
.f32
; break;
440 case TYPE_F64
: res
.data
.f64
= a
->data
.f64
+ b
->data
.f64
; break;
442 case TYPE_U32
: res
.data
.u32
= a
->data
.u32
+ b
->data
.u32
; break;
449 case TYPE_F32
: res
.data
.f32
= pow(a
->data
.f32
, b
->data
.f32
); break;
450 case TYPE_F64
: res
.data
.f64
= pow(a
->data
.f64
, b
->data
.f64
); break;
457 case TYPE_F32
: res
.data
.f32
= MAX2(a
->data
.f32
, b
->data
.f32
); break;
458 case TYPE_F64
: res
.data
.f64
= MAX2(a
->data
.f64
, b
->data
.f64
); break;
459 case TYPE_S32
: res
.data
.s32
= MAX2(a
->data
.s32
, b
->data
.s32
); break;
460 case TYPE_U32
: res
.data
.u32
= MAX2(a
->data
.u32
, b
->data
.u32
); break;
467 case TYPE_F32
: res
.data
.f32
= MIN2(a
->data
.f32
, b
->data
.f32
); break;
468 case TYPE_F64
: res
.data
.f64
= MIN2(a
->data
.f64
, b
->data
.f64
); break;
469 case TYPE_S32
: res
.data
.s32
= MIN2(a
->data
.s32
, b
->data
.s32
); break;
470 case TYPE_U32
: res
.data
.u32
= MIN2(a
->data
.u32
, b
->data
.u32
); break;
476 res
.data
.u64
= a
->data
.u64
& b
->data
.u64
;
479 res
.data
.u64
= a
->data
.u64
| b
->data
.u64
;
482 res
.data
.u64
= a
->data
.u64
^ b
->data
.u64
;
485 res
.data
.u32
= a
->data
.u32
<< b
->data
.u32
;
489 case TYPE_S32
: res
.data
.s32
= a
->data
.s32
>> b
->data
.u32
; break;
490 case TYPE_U32
: res
.data
.u32
= a
->data
.u32
>> b
->data
.u32
; break;
496 if (a
->data
.u32
!= b
->data
.u32
)
498 res
.data
.u32
= a
->data
.u32
;
505 i
->src(0).mod
= Modifier(0);
506 i
->src(1).mod
= Modifier(0);
508 i
->setSrc(0, new_ImmediateValue(i
->bb
->getProgram(), res
.data
.u32
));
511 i
->getSrc(0)->reg
.data
= res
.data
;
513 if (i
->op
== OP_MAD
|| i
->op
== OP_FMA
) {
516 i
->setSrc(1, i
->getSrc(0));
517 i
->src(1).mod
= i
->src(2).mod
;
518 i
->setSrc(0, i
->getSrc(2));
522 if (i
->src(0).getImmediate(src0
))
523 expr(i
, src0
, *i
->getSrc(1)->asImm());
530 ConstantFolding::unary(Instruction
*i
, const ImmediateValue
&imm
)
534 if (i
->dType
!= TYPE_F32
)
537 case OP_NEG
: res
.data
.f32
= -imm
.reg
.data
.f32
; break;
538 case OP_ABS
: res
.data
.f32
= fabsf(imm
.reg
.data
.f32
); break;
539 case OP_RCP
: res
.data
.f32
= 1.0f
/ imm
.reg
.data
.f32
; break;
540 case OP_RSQ
: res
.data
.f32
= 1.0f
/ sqrtf(imm
.reg
.data
.f32
); break;
541 case OP_LG2
: res
.data
.f32
= log2f(imm
.reg
.data
.f32
); break;
542 case OP_EX2
: res
.data
.f32
= exp2f(imm
.reg
.data
.f32
); break;
543 case OP_SIN
: res
.data
.f32
= sinf(imm
.reg
.data
.f32
); break;
544 case OP_COS
: res
.data
.f32
= cosf(imm
.reg
.data
.f32
); break;
545 case OP_SQRT
: res
.data
.f32
= sqrtf(imm
.reg
.data
.f32
); break;
548 // these should be handled in subsequent OP_SIN/COS/EX2
549 res
.data
.f32
= imm
.reg
.data
.f32
;
555 i
->setSrc(0, new_ImmediateValue(i
->bb
->getProgram(), res
.data
.f32
));
556 i
->src(0).mod
= Modifier(0);
560 ConstantFolding::tryCollapseChainedMULs(Instruction
*mul2
,
561 const int s
, ImmediateValue
& imm2
)
563 const int t
= s
? 0 : 1;
565 Instruction
*mul1
= NULL
; // mul1 before mul2
567 float f
= imm2
.reg
.data
.f32
;
570 assert(mul2
->op
== OP_MUL
&& mul2
->dType
== TYPE_F32
);
572 if (mul2
->getSrc(t
)->refCount() == 1) {
573 insn
= mul2
->getSrc(t
)->getInsn();
574 if (!mul2
->src(t
).mod
&& insn
->op
== OP_MUL
&& insn
->dType
== TYPE_F32
)
576 if (mul1
&& !mul1
->saturate
) {
579 if (mul1
->src(s1
= 0).getImmediate(imm1
) ||
580 mul1
->src(s1
= 1).getImmediate(imm1
)) {
581 bld
.setPosition(mul1
, false);
583 // d = mul a, imm2 -> d = mul r, (imm1 * imm2)
584 mul1
->setSrc(s1
, bld
.loadImm(NULL
, f
* imm1
.reg
.data
.f32
));
585 mul1
->src(s1
).mod
= Modifier(0);
586 mul2
->def(0).replace(mul1
->getDef(0), false);
588 if (prog
->getTarget()->isPostMultiplySupported(OP_MUL
, f
, e
)) {
590 // d = mul c, imm -> d = mul_x_imm a, b
591 mul1
->postFactor
= e
;
592 mul2
->def(0).replace(mul1
->getDef(0), false);
594 mul1
->src(0).mod
*= Modifier(NV50_IR_MOD_NEG
);
596 mul1
->saturate
= mul2
->saturate
;
600 if (mul2
->getDef(0)->refCount() == 1 && !mul2
->saturate
) {
602 // d = mul b, c -> d = mul_x_imm a, c
604 insn
= mul2
->getDef(0)->uses
.front()->getInsn();
609 s2
= insn
->getSrc(0) == mul1
->getDef(0) ? 0 : 1;
611 if (insn
->op
== OP_MUL
&& insn
->dType
== TYPE_F32
)
612 if (!insn
->src(s2
).mod
&& !insn
->src(t2
).getImmediate(imm1
))
614 if (mul2
&& prog
->getTarget()->isPostMultiplySupported(OP_MUL
, f
, e
)) {
615 mul2
->postFactor
= e
;
616 mul2
->setSrc(s2
, mul1
->src(t
));
618 mul2
->src(s2
).mod
*= Modifier(NV50_IR_MOD_NEG
);
624 ConstantFolding::opnd(Instruction
*i
, ImmediateValue
&imm0
, int s
)
627 const operation op
= i
->op
;
631 if (i
->dType
== TYPE_F32
)
632 tryCollapseChainedMULs(i
, s
, imm0
);
634 if (imm0
.isInteger(0)) {
636 i
->setSrc(0, new_ImmediateValue(prog
, 0u));
637 i
->src(0).mod
= Modifier(0);
640 if (imm0
.isInteger(1) || imm0
.isInteger(-1)) {
641 if (imm0
.isNegative())
642 i
->src(t
).mod
= i
->src(t
).mod
^ Modifier(NV50_IR_MOD_NEG
);
643 i
->op
= i
->src(t
).mod
.getOp();
645 i
->setSrc(0, i
->getSrc(1));
646 i
->src(0).mod
= i
->src(1).mod
;
653 if (imm0
.isInteger(2) || imm0
.isInteger(-2)) {
654 if (imm0
.isNegative())
655 i
->src(t
).mod
= i
->src(t
).mod
^ Modifier(NV50_IR_MOD_NEG
);
657 i
->setSrc(s
, i
->getSrc(t
));
658 i
->src(s
).mod
= i
->src(t
).mod
;
660 if (!isFloatType(i
->sType
) && !imm0
.isNegative() && imm0
.isPow2()) {
663 i
->setSrc(0, i
->getSrc(t
));
664 i
->src(0).mod
= i
->src(t
).mod
;
665 i
->setSrc(1, new_ImmediateValue(prog
, imm0
.reg
.data
.u32
));
672 if (imm0
.isInteger(0)) {
674 i
->setSrc(0, i
->getSrc(1));
675 i
->src(0).mod
= i
->src(1).mod
;
678 i
->op
= i
->src(0).mod
.getOp();
680 i
->src(0).mod
= Modifier(0);
685 if (s
!= 1 || (i
->dType
!= TYPE_S32
&& i
->dType
!= TYPE_U32
))
687 bld
.setPosition(i
, false);
688 if (imm0
.reg
.data
.u32
== 0) {
691 if (imm0
.reg
.data
.u32
== 1) {
695 if (i
->dType
== TYPE_U32
&& imm0
.isPow2()) {
697 i
->setSrc(1, bld
.mkImm(util_logbase2(imm0
.reg
.data
.u32
)));
699 if (i
->dType
== TYPE_U32
) {
702 const uint32_t d
= imm0
.reg
.data
.u32
;
705 uint32_t l
= util_logbase2(d
);
706 if (((uint32_t)1 << l
) < d
)
708 m
= (((uint64_t)1 << 32) * (((uint64_t)1 << l
) - d
)) / d
+ 1;
714 mul
= bld
.mkOp2(OP_MUL
, TYPE_U32
, tA
, i
->getSrc(0),
715 bld
.loadImm(NULL
, m
));
716 mul
->subOp
= NV50_IR_SUBOP_MUL_HIGH
;
717 bld
.mkOp2(OP_SUB
, TYPE_U32
, tB
, i
->getSrc(0), tA
);
720 bld
.mkOp2(OP_SHR
, TYPE_U32
, tA
, tB
, bld
.mkImm(r
));
723 tB
= s
? bld
.getSSA() : i
->getDef(0);
724 bld
.mkOp2(OP_ADD
, TYPE_U32
, tB
, mul
->getDef(0), tA
);
726 bld
.mkOp2(OP_SHR
, TYPE_U32
, i
->getDef(0), tB
, bld
.mkImm(s
));
728 delete_Instruction(prog
, i
);
730 if (imm0
.reg
.data
.s32
== -1) {
736 const int32_t d
= imm0
.reg
.data
.s32
;
738 int32_t l
= util_logbase2(static_cast<unsigned>(abs(d
)));
739 if ((1 << l
) < abs(d
))
743 m
= ((uint64_t)1 << (32 + l
- 1)) / abs(d
) + 1 - ((uint64_t)1 << 32);
747 bld
.mkOp3(OP_MAD
, TYPE_S32
, tA
, i
->getSrc(0), bld
.loadImm(NULL
, m
),
748 i
->getSrc(0))->subOp
= NV50_IR_SUBOP_MUL_HIGH
;
750 bld
.mkOp2(OP_SHR
, TYPE_S32
, tB
, tA
, bld
.mkImm(l
- 1));
754 bld
.mkCmp(OP_SET
, CC_LT
, TYPE_S32
, tA
, i
->getSrc(0), bld
.mkImm(0));
755 tD
= (d
< 0) ? bld
.getSSA() : i
->getDef(0)->asLValue();
756 bld
.mkOp2(OP_SUB
, TYPE_U32
, tD
, tB
, tA
);
758 bld
.mkOp1(OP_NEG
, TYPE_S32
, i
->getDef(0), tB
);
760 delete_Instruction(prog
, i
);
765 if (i
->sType
== TYPE_U32
&& imm0
.isPow2()) {
766 bld
.setPosition(i
, false);
768 i
->setSrc(1, bld
.loadImm(NULL
, imm0
.reg
.data
.u32
- 1));
772 case OP_SET
: // TODO: SET_AND,OR,XOR
774 CmpInstruction
*si
= findOriginForTestWithZero(i
->getSrc(t
));
776 if (i
->src(t
).mod
!= Modifier(0))
778 if (imm0
.reg
.data
.u32
!= 0 || !si
|| si
->op
!= OP_SET
)
781 ccZ
= (CondCode
)((unsigned int)i
->asCmp()->setCond
& ~CC_U
);
783 ccZ
= reverseCondCode(ccZ
);
785 case CC_LT
: cc
= CC_FL
; break;
786 case CC_GE
: cc
= CC_TR
; break;
787 case CC_EQ
: cc
= inverseCondCode(cc
); break;
788 case CC_LE
: cc
= inverseCondCode(cc
); break;
794 i
->asCmp()->setCond
= cc
;
795 i
->setSrc(0, si
->src(0));
796 i
->setSrc(1, si
->src(1));
797 i
->sType
= si
->sType
;
803 if (s
!= 1 || i
->src(0).mod
!= Modifier(0))
805 // try to concatenate shifts
806 Instruction
*si
= i
->getSrc(0)->getInsn();
807 if (!si
|| si
->op
!= OP_SHL
)
810 if (si
->src(1).getImmediate(imm1
)) {
811 bld
.setPosition(i
, false);
812 i
->setSrc(0, si
->getSrc(0));
813 i
->setSrc(1, bld
.loadImm(NULL
, imm0
.reg
.data
.u32
+ imm1
.reg
.data
.u32
));
838 // =============================================================================
840 // Merge modifier operations (ABS, NEG, NOT) into ValueRefs where allowed.
841 class ModifierFolding
: public Pass
844 virtual bool visit(BasicBlock
*);
848 ModifierFolding::visit(BasicBlock
*bb
)
850 const Target
*target
= prog
->getTarget();
852 Instruction
*i
, *next
, *mi
;
855 for (i
= bb
->getEntry(); i
; i
= next
) {
858 if (0 && i
->op
== OP_SUB
) {
859 // turn "sub" into "add neg" (do we really want this ?)
861 i
->src(0).mod
= i
->src(0).mod
^ Modifier(NV50_IR_MOD_NEG
);
864 for (int s
= 0; s
< 3 && i
->srcExists(s
); ++s
) {
865 mi
= i
->getSrc(s
)->getInsn();
867 mi
->predSrc
>= 0 || mi
->getDef(0)->refCount() > 8)
869 if (i
->sType
== TYPE_U32
&& mi
->dType
== TYPE_S32
) {
870 if ((i
->op
!= OP_ADD
&&
876 if (i
->sType
!= mi
->dType
) {
879 if ((mod
= Modifier(mi
->op
)) == Modifier(0))
881 mod
*= mi
->src(0).mod
;
883 if ((i
->op
== OP_ABS
) || i
->src(s
).mod
.abs()) {
884 // abs neg [abs] = abs
885 mod
= mod
& Modifier(~(NV50_IR_MOD_NEG
| NV50_IR_MOD_ABS
));
887 if ((i
->op
== OP_NEG
) && mod
.neg()) {
889 // neg as both opcode and modifier on same insn is prohibited
890 // neg neg abs = abs, neg neg = identity
891 mod
= mod
& Modifier(~NV50_IR_MOD_NEG
);
893 mod
= mod
& Modifier(~NV50_IR_MOD_ABS
);
894 if (mod
== Modifier(0))
898 if (target
->isModSupported(i
, s
, mod
)) {
899 i
->setSrc(s
, mi
->getSrc(0));
900 i
->src(s
).mod
*= mod
;
904 if (i
->op
== OP_SAT
) {
905 mi
= i
->getSrc(0)->getInsn();
907 mi
->getDef(0)->refCount() <= 1 && target
->isSatSupported(mi
)) {
909 mi
->setDef(0, i
->getDef(0));
910 delete_Instruction(prog
, i
);
918 // =============================================================================
920 // MUL + ADD -> MAD/FMA
921 // MIN/MAX(a, a) -> a, etc.
922 // SLCT(a, b, const) -> cc(const) ? a : b
924 // MUL(MUL(a, b), const) -> MUL_Xconst(a, b)
925 class AlgebraicOpt
: public Pass
928 virtual bool visit(BasicBlock
*);
930 void handleABS(Instruction
*);
931 bool handleADD(Instruction
*);
932 bool tryADDToMADOrSAD(Instruction
*, operation toOp
);
933 void handleMINMAX(Instruction
*);
934 void handleRCP(Instruction
*);
935 void handleSLCT(Instruction
*);
936 void handleLOGOP(Instruction
*);
937 void handleCVT(Instruction
*);
938 void handleSUCLAMP(Instruction
*);
944 AlgebraicOpt::handleABS(Instruction
*abs
)
946 Instruction
*sub
= abs
->getSrc(0)->getInsn();
949 !prog
->getTarget()->isOpSupported(OP_SAD
, abs
->dType
))
951 // expect not to have mods yet, if we do, bail
952 if (sub
->src(0).mod
|| sub
->src(1).mod
)
954 // hidden conversion ?
955 ty
= intTypeToSigned(sub
->dType
);
956 if (abs
->dType
!= abs
->sType
|| ty
!= abs
->sType
)
959 if ((sub
->op
!= OP_ADD
&& sub
->op
!= OP_SUB
) ||
960 sub
->src(0).getFile() != FILE_GPR
|| sub
->src(0).mod
||
961 sub
->src(1).getFile() != FILE_GPR
|| sub
->src(1).mod
)
964 Value
*src0
= sub
->getSrc(0);
965 Value
*src1
= sub
->getSrc(1);
967 if (sub
->op
== OP_ADD
) {
968 Instruction
*neg
= sub
->getSrc(1)->getInsn();
969 if (neg
&& neg
->op
!= OP_NEG
) {
970 neg
= sub
->getSrc(0)->getInsn();
971 src0
= sub
->getSrc(1);
973 if (!neg
|| neg
->op
!= OP_NEG
||
974 neg
->dType
!= neg
->sType
|| neg
->sType
!= ty
)
976 src1
= neg
->getSrc(0);
980 abs
->moveSources(1, 2); // move sources >=1 up by 2
982 abs
->setType(sub
->dType
);
983 abs
->setSrc(0, src0
);
984 abs
->setSrc(1, src1
);
985 bld
.setPosition(abs
, false);
986 abs
->setSrc(2, bld
.loadImm(bld
.getSSA(typeSizeof(ty
)), 0));
990 AlgebraicOpt::handleADD(Instruction
*add
)
992 Value
*src0
= add
->getSrc(0);
993 Value
*src1
= add
->getSrc(1);
995 if (src0
->reg
.file
!= FILE_GPR
|| src1
->reg
.file
!= FILE_GPR
)
998 bool changed
= false;
999 if (!changed
&& prog
->getTarget()->isOpSupported(OP_MAD
, add
->dType
))
1000 changed
= tryADDToMADOrSAD(add
, OP_MAD
);
1001 if (!changed
&& prog
->getTarget()->isOpSupported(OP_SAD
, add
->dType
))
1002 changed
= tryADDToMADOrSAD(add
, OP_SAD
);
1006 // ADD(SAD(a,b,0), c) -> SAD(a,b,c)
1007 // ADD(MUL(a,b), c) -> MAD(a,b,c)
1009 AlgebraicOpt::tryADDToMADOrSAD(Instruction
*add
, operation toOp
)
1011 Value
*src0
= add
->getSrc(0);
1012 Value
*src1
= add
->getSrc(1);
1015 const operation srcOp
= toOp
== OP_SAD
? OP_SAD
: OP_MUL
;
1016 const Modifier modBad
= Modifier(~((toOp
== OP_MAD
) ? NV50_IR_MOD_NEG
: 0));
1019 if (src0
->refCount() == 1 &&
1020 src0
->getUniqueInsn() && src0
->getUniqueInsn()->op
== srcOp
)
1023 if (src1
->refCount() == 1 &&
1024 src1
->getUniqueInsn() && src1
->getUniqueInsn()->op
== srcOp
)
1029 if ((src0
->getUniqueInsn() && src0
->getUniqueInsn()->bb
!= add
->bb
) ||
1030 (src1
->getUniqueInsn() && src1
->getUniqueInsn()->bb
!= add
->bb
))
1033 src
= add
->getSrc(s
);
1035 if (src
->getInsn()->postFactor
)
1037 if (toOp
== OP_SAD
) {
1039 if (!src
->getInsn()->src(2).getImmediate(imm
))
1041 if (!imm
.isInteger(0))
1045 mod
[0] = add
->src(0).mod
;
1046 mod
[1] = add
->src(1).mod
;
1047 mod
[2] = src
->getUniqueInsn()->src(0).mod
;
1048 mod
[3] = src
->getUniqueInsn()->src(1).mod
;
1050 if (((mod
[0] | mod
[1]) | (mod
[2] | mod
[3])) & modBad
)
1054 add
->subOp
= src
->getInsn()->subOp
; // potentially mul-high
1056 add
->setSrc(2, add
->src(s
? 0 : 1));
1058 add
->setSrc(0, src
->getInsn()->getSrc(0));
1059 add
->src(0).mod
= mod
[2] ^ mod
[s
];
1060 add
->setSrc(1, src
->getInsn()->getSrc(1));
1061 add
->src(1).mod
= mod
[3];
1067 AlgebraicOpt::handleMINMAX(Instruction
*minmax
)
1069 Value
*src0
= minmax
->getSrc(0);
1070 Value
*src1
= minmax
->getSrc(1);
1072 if (src0
!= src1
|| src0
->reg
.file
!= FILE_GPR
)
1074 if (minmax
->src(0).mod
== minmax
->src(1).mod
) {
1075 if (minmax
->def(0).mayReplace(minmax
->src(0))) {
1076 minmax
->def(0).replace(minmax
->src(0), false);
1077 minmax
->bb
->remove(minmax
);
1079 minmax
->op
= OP_CVT
;
1080 minmax
->setSrc(1, NULL
);
1084 // min(x, -x) = -abs(x)
1085 // min(x, -abs(x)) = -abs(x)
1086 // min(x, abs(x)) = x
1087 // max(x, -abs(x)) = x
1088 // max(x, abs(x)) = abs(x)
1089 // max(x, -x) = abs(x)
1094 AlgebraicOpt::handleRCP(Instruction
*rcp
)
1096 Instruction
*si
= rcp
->getSrc(0)->getUniqueInsn();
1098 if (si
&& si
->op
== OP_RCP
) {
1099 Modifier mod
= rcp
->src(0).mod
* si
->src(0).mod
;
1100 rcp
->op
= mod
.getOp();
1101 rcp
->setSrc(0, si
->getSrc(0));
1106 AlgebraicOpt::handleSLCT(Instruction
*slct
)
1108 if (slct
->getSrc(2)->reg
.file
== FILE_IMMEDIATE
) {
1109 if (slct
->getSrc(2)->asImm()->compare(slct
->asCmp()->setCond
, 0.0f
))
1110 slct
->setSrc(0, slct
->getSrc(1));
1112 if (slct
->getSrc(0) != slct
->getSrc(1)) {
1116 slct
->setSrc(1, NULL
);
1117 slct
->setSrc(2, NULL
);
1121 AlgebraicOpt::handleLOGOP(Instruction
*logop
)
1123 Value
*src0
= logop
->getSrc(0);
1124 Value
*src1
= logop
->getSrc(1);
1126 if (src0
->reg
.file
!= FILE_GPR
|| src1
->reg
.file
!= FILE_GPR
)
1130 if ((logop
->op
== OP_AND
|| logop
->op
== OP_OR
) &&
1131 logop
->def(0).mayReplace(logop
->src(0))) {
1132 logop
->def(0).replace(logop
->src(0), false);
1133 delete_Instruction(prog
, logop
);
1136 // try AND(SET, SET) -> SET_AND(SET)
1137 Instruction
*set0
= src0
->getInsn();
1138 Instruction
*set1
= src1
->getInsn();
1140 if (!set0
|| set0
->fixed
|| !set1
|| set1
->fixed
)
1142 if (set1
->op
!= OP_SET
) {
1143 Instruction
*xchg
= set0
;
1146 if (set1
->op
!= OP_SET
)
1149 operation redOp
= (logop
->op
== OP_AND
? OP_SET_AND
:
1150 logop
->op
== OP_XOR
? OP_SET_XOR
: OP_SET_OR
);
1151 if (!prog
->getTarget()->isOpSupported(redOp
, set1
->sType
))
1153 if (set0
->op
!= OP_SET
&&
1154 set0
->op
!= OP_SET_AND
&&
1155 set0
->op
!= OP_SET_OR
&&
1156 set0
->op
!= OP_SET_XOR
)
1158 if (set0
->getDef(0)->refCount() > 1 &&
1159 set1
->getDef(0)->refCount() > 1)
1161 if (set0
->getPredicate() || set1
->getPredicate())
1163 // check that they don't source each other
1164 for (int s
= 0; s
< 2; ++s
)
1165 if (set0
->getSrc(s
) == set1
->getDef(0) ||
1166 set1
->getSrc(s
) == set0
->getDef(0))
1169 set0
= cloneForward(func
, set0
);
1170 set1
= cloneShallow(func
, set1
);
1171 logop
->bb
->insertAfter(logop
, set1
);
1172 logop
->bb
->insertAfter(logop
, set0
);
1174 set0
->dType
= TYPE_U8
;
1175 set0
->getDef(0)->reg
.file
= FILE_PREDICATE
;
1176 set0
->getDef(0)->reg
.size
= 1;
1177 set1
->setSrc(2, set0
->getDef(0));
1179 set1
->setDef(0, logop
->getDef(0));
1180 delete_Instruction(prog
, logop
);
1184 // F2I(NEG(SET with result 1.0f/0.0f)) -> SET with result -1/0
1186 // F2I(NEG(I2F(ABS(SET))))
1188 AlgebraicOpt::handleCVT(Instruction
*cvt
)
1190 if (cvt
->sType
!= TYPE_F32
||
1191 cvt
->dType
!= TYPE_S32
|| cvt
->src(0).mod
!= Modifier(0))
1193 Instruction
*insn
= cvt
->getSrc(0)->getInsn();
1194 if (!insn
|| insn
->op
!= OP_NEG
|| insn
->dType
!= TYPE_F32
)
1196 if (insn
->src(0).mod
!= Modifier(0))
1198 insn
= insn
->getSrc(0)->getInsn();
1200 // check for nv50 SET(-1,0) -> SET(1.0f/0.0f) chain and nvc0's f32 SET
1201 if (insn
&& insn
->op
== OP_CVT
&&
1202 insn
->dType
== TYPE_F32
&&
1203 insn
->sType
== TYPE_S32
) {
1204 insn
= insn
->getSrc(0)->getInsn();
1205 if (!insn
|| insn
->op
!= OP_ABS
|| insn
->sType
!= TYPE_S32
||
1208 insn
= insn
->getSrc(0)->getInsn();
1209 if (!insn
|| insn
->op
!= OP_SET
|| insn
->dType
!= TYPE_U32
)
1212 if (!insn
|| insn
->op
!= OP_SET
|| insn
->dType
!= TYPE_F32
) {
1216 Instruction
*bset
= cloneShallow(func
, insn
);
1217 bset
->dType
= TYPE_U32
;
1218 bset
->setDef(0, cvt
->getDef(0));
1219 cvt
->bb
->insertAfter(cvt
, bset
);
1220 delete_Instruction(prog
, cvt
);
1223 // SUCLAMP dst, (ADD b imm), k, 0 -> SUCLAMP dst, b, k, imm (if imm fits s6)
1225 AlgebraicOpt::handleSUCLAMP(Instruction
*insn
)
1228 int32_t val
= insn
->getSrc(2)->asImm()->reg
.data
.s32
;
1232 assert(insn
->srcExists(0) && insn
->src(0).getFile() == FILE_GPR
);
1234 // look for ADD (TODO: only count references by non-SUCLAMP)
1235 if (insn
->getSrc(0)->refCount() > 1)
1237 add
= insn
->getSrc(0)->getInsn();
1238 if (!add
|| add
->op
!= OP_ADD
||
1239 (add
->dType
!= TYPE_U32
&&
1240 add
->dType
!= TYPE_S32
))
1243 // look for immediate
1244 for (s
= 0; s
< 2; ++s
)
1245 if (add
->src(s
).getImmediate(imm
))
1250 // determine if immediate fits
1251 val
+= imm
.reg
.data
.s32
;
1252 if (val
> 31 || val
< -32)
1254 // determine if other addend fits
1255 if (add
->src(s
).getFile() != FILE_GPR
|| add
->src(s
).mod
!= Modifier(0))
1258 bld
.setPosition(insn
, false); // make sure bld is init'ed
1260 insn
->setSrc(2, bld
.mkImm(val
));
1261 insn
->setSrc(0, add
->getSrc(s
));
1265 AlgebraicOpt::visit(BasicBlock
*bb
)
1268 for (Instruction
*i
= bb
->getEntry(); i
; i
= next
) {
1306 // =============================================================================
1309 updateLdStOffset(Instruction
*ldst
, int32_t offset
, Function
*fn
)
1311 if (offset
!= ldst
->getSrc(0)->reg
.data
.offset
) {
1312 if (ldst
->getSrc(0)->refCount() > 1)
1313 ldst
->setSrc(0, cloneShallow(fn
, ldst
->getSrc(0)));
1314 ldst
->getSrc(0)->reg
.data
.offset
= offset
;
1318 // Combine loads and stores, forward stores to loads where possible.
1319 class MemoryOpt
: public Pass
1327 const Value
*rel
[2];
1335 bool overlaps(const Instruction
*ldst
) const;
1337 inline void link(Record
**);
1338 inline void unlink(Record
**);
1339 inline void set(const Instruction
*ldst
);
1345 Record
*loads
[DATA_FILE_COUNT
];
1346 Record
*stores
[DATA_FILE_COUNT
];
1348 MemoryPool recordPool
;
1351 virtual bool visit(BasicBlock
*);
1352 bool runOpt(BasicBlock
*);
1354 Record
**getList(const Instruction
*);
1356 Record
*findRecord(const Instruction
*, bool load
, bool& isAdjacent
) const;
1358 // merge @insn into load/store instruction from @rec
1359 bool combineLd(Record
*rec
, Instruction
*ld
);
1360 bool combineSt(Record
*rec
, Instruction
*st
);
1362 bool replaceLdFromLd(Instruction
*ld
, Record
*ldRec
);
1363 bool replaceLdFromSt(Instruction
*ld
, Record
*stRec
);
1364 bool replaceStFromSt(Instruction
*restrict st
, Record
*stRec
);
1366 void addRecord(Instruction
*ldst
);
1367 void purgeRecords(Instruction
*const st
, DataFile
);
1368 void lockStores(Instruction
*const ld
);
1375 MemoryOpt::MemoryOpt() : recordPool(sizeof(MemoryOpt::Record
), 6)
1377 for (int i
= 0; i
< DATA_FILE_COUNT
; ++i
) {
1387 for (unsigned int i
= 0; i
< DATA_FILE_COUNT
; ++i
) {
1389 for (it
= loads
[i
]; it
; it
= next
) {
1391 recordPool
.release(it
);
1394 for (it
= stores
[i
]; it
; it
= next
) {
1396 recordPool
.release(it
);
1403 MemoryOpt::combineLd(Record
*rec
, Instruction
*ld
)
1405 int32_t offRc
= rec
->offset
;
1406 int32_t offLd
= ld
->getSrc(0)->reg
.data
.offset
;
1407 int sizeRc
= rec
->size
;
1408 int sizeLd
= typeSizeof(ld
->dType
);
1409 int size
= sizeRc
+ sizeLd
;
1412 if (!prog
->getTarget()->
1413 isAccessSupported(ld
->getSrc(0)->reg
.file
, typeOfSize(size
)))
1415 // no unaligned loads
1416 if (((size
== 0x8) && (MIN2(offLd
, offRc
) & 0x7)) ||
1417 ((size
== 0xc) && (MIN2(offLd
, offRc
) & 0xf)))
1420 assert(sizeRc
+ sizeLd
<= 16 && offRc
!= offLd
);
1422 for (j
= 0; sizeRc
; sizeRc
-= rec
->insn
->getDef(j
)->reg
.size
, ++j
);
1424 if (offLd
< offRc
) {
1426 for (sz
= 0, d
= 0; sz
< sizeLd
; sz
+= ld
->getDef(d
)->reg
.size
, ++d
);
1427 // d: nr of definitions in ld
1428 // j: nr of definitions in rec->insn, move:
1429 for (d
= d
+ j
- 1; j
> 0; --j
, --d
)
1430 rec
->insn
->setDef(d
, rec
->insn
->getDef(j
- 1));
1432 if (rec
->insn
->getSrc(0)->refCount() > 1)
1433 rec
->insn
->setSrc(0, cloneShallow(func
, rec
->insn
->getSrc(0)));
1434 rec
->offset
= rec
->insn
->getSrc(0)->reg
.data
.offset
= offLd
;
1440 // move definitions of @ld to @rec->insn
1441 for (j
= 0; sizeLd
; ++j
, ++d
) {
1442 sizeLd
-= ld
->getDef(j
)->reg
.size
;
1443 rec
->insn
->setDef(d
, ld
->getDef(j
));
1447 rec
->insn
->getSrc(0)->reg
.size
= size
;
1448 rec
->insn
->setType(typeOfSize(size
));
1450 delete_Instruction(prog
, ld
);
1456 MemoryOpt::combineSt(Record
*rec
, Instruction
*st
)
1458 int32_t offRc
= rec
->offset
;
1459 int32_t offSt
= st
->getSrc(0)->reg
.data
.offset
;
1460 int sizeRc
= rec
->size
;
1461 int sizeSt
= typeSizeof(st
->dType
);
1463 int size
= sizeRc
+ sizeSt
;
1465 Value
*src
[4]; // no modifiers in ValueRef allowed for st
1468 if (!prog
->getTarget()->
1469 isAccessSupported(st
->getSrc(0)->reg
.file
, typeOfSize(size
)))
1471 if (size
== 8 && MIN2(offRc
, offSt
) & 0x7)
1474 st
->takeExtraSources(0, extra
); // save predicate and indirect address
1476 if (offRc
< offSt
) {
1477 // save values from @st
1478 for (s
= 0; sizeSt
; ++s
) {
1479 sizeSt
-= st
->getSrc(s
+ 1)->reg
.size
;
1480 src
[s
] = st
->getSrc(s
+ 1);
1482 // set record's values as low sources of @st
1483 for (j
= 1; sizeRc
; ++j
) {
1484 sizeRc
-= rec
->insn
->getSrc(j
)->reg
.size
;
1485 st
->setSrc(j
, rec
->insn
->getSrc(j
));
1487 // set saved values as high sources of @st
1488 for (k
= j
, j
= 0; j
< s
; ++j
)
1489 st
->setSrc(k
++, src
[j
]);
1491 updateLdStOffset(st
, offRc
, func
);
1493 for (j
= 1; sizeSt
; ++j
)
1494 sizeSt
-= st
->getSrc(j
)->reg
.size
;
1495 for (s
= 1; sizeRc
; ++j
, ++s
) {
1496 sizeRc
-= rec
->insn
->getSrc(s
)->reg
.size
;
1497 st
->setSrc(j
, rec
->insn
->getSrc(s
));
1499 rec
->offset
= offSt
;
1501 st
->putExtraSources(0, extra
); // restore pointer and predicate
1503 delete_Instruction(prog
, rec
->insn
);
1506 rec
->insn
->getSrc(0)->reg
.size
= size
;
1507 rec
->insn
->setType(typeOfSize(size
));
1512 MemoryOpt::Record::set(const Instruction
*ldst
)
1514 const Symbol
*mem
= ldst
->getSrc(0)->asSym();
1515 fileIndex
= mem
->reg
.fileIndex
;
1516 rel
[0] = ldst
->getIndirect(0, 0);
1517 rel
[1] = ldst
->getIndirect(0, 1);
1518 offset
= mem
->reg
.data
.offset
;
1519 base
= mem
->getBase();
1520 size
= typeSizeof(ldst
->sType
);
1524 MemoryOpt::Record::link(Record
**list
)
1534 MemoryOpt::Record::unlink(Record
**list
)
1544 MemoryOpt::Record
**
1545 MemoryOpt::getList(const Instruction
*insn
)
1547 if (insn
->op
== OP_LOAD
|| insn
->op
== OP_VFETCH
)
1548 return &loads
[insn
->src(0).getFile()];
1549 return &stores
[insn
->src(0).getFile()];
1553 MemoryOpt::addRecord(Instruction
*i
)
1555 Record
**list
= getList(i
);
1556 Record
*it
= reinterpret_cast<Record
*>(recordPool
.allocate());
1565 MemoryOpt::findRecord(const Instruction
*insn
, bool load
, bool& isAdj
) const
1567 const Symbol
*sym
= insn
->getSrc(0)->asSym();
1568 const int size
= typeSizeof(insn
->sType
);
1570 Record
*it
= load
? loads
[sym
->reg
.file
] : stores
[sym
->reg
.file
];
1572 for (; it
; it
= it
->next
) {
1573 if (it
->locked
&& insn
->op
!= OP_LOAD
)
1575 if ((it
->offset
>> 4) != (sym
->reg
.data
.offset
>> 4) ||
1576 it
->rel
[0] != insn
->getIndirect(0, 0) ||
1577 it
->fileIndex
!= sym
->reg
.fileIndex
||
1578 it
->rel
[1] != insn
->getIndirect(0, 1))
1581 if (it
->offset
< sym
->reg
.data
.offset
) {
1582 if (it
->offset
+ it
->size
>= sym
->reg
.data
.offset
) {
1583 isAdj
= (it
->offset
+ it
->size
== sym
->reg
.data
.offset
);
1586 if (!(it
->offset
& 0x7))
1590 isAdj
= it
->offset
!= sym
->reg
.data
.offset
;
1591 if (size
<= it
->size
&& !isAdj
)
1594 if (!(sym
->reg
.data
.offset
& 0x7))
1595 if (it
->offset
- size
<= sym
->reg
.data
.offset
)
1603 MemoryOpt::replaceLdFromSt(Instruction
*ld
, Record
*rec
)
1605 Instruction
*st
= rec
->insn
;
1606 int32_t offSt
= rec
->offset
;
1607 int32_t offLd
= ld
->getSrc(0)->reg
.data
.offset
;
1610 for (s
= 1; offSt
!= offLd
&& st
->srcExists(s
); ++s
)
1611 offSt
+= st
->getSrc(s
)->reg
.size
;
1615 for (d
= 0; ld
->defExists(d
) && st
->srcExists(s
); ++d
, ++s
) {
1616 if (ld
->getDef(d
)->reg
.size
!= st
->getSrc(s
)->reg
.size
)
1618 if (st
->getSrc(s
)->reg
.file
!= FILE_GPR
)
1620 ld
->def(d
).replace(st
->src(s
), false);
1627 MemoryOpt::replaceLdFromLd(Instruction
*ldE
, Record
*rec
)
1629 Instruction
*ldR
= rec
->insn
;
1630 int32_t offR
= rec
->offset
;
1631 int32_t offE
= ldE
->getSrc(0)->reg
.data
.offset
;
1634 assert(offR
<= offE
);
1635 for (dR
= 0; offR
< offE
&& ldR
->defExists(dR
); ++dR
)
1636 offR
+= ldR
->getDef(dR
)->reg
.size
;
1640 for (dE
= 0; ldE
->defExists(dE
) && ldR
->defExists(dR
); ++dE
, ++dR
) {
1641 if (ldE
->getDef(dE
)->reg
.size
!= ldR
->getDef(dR
)->reg
.size
)
1643 ldE
->def(dE
).replace(ldR
->getDef(dR
), false);
1646 delete_Instruction(prog
, ldE
);
1651 MemoryOpt::replaceStFromSt(Instruction
*restrict st
, Record
*rec
)
1653 const Instruction
*const ri
= rec
->insn
;
1656 int32_t offS
= st
->getSrc(0)->reg
.data
.offset
;
1657 int32_t offR
= rec
->offset
;
1658 int32_t endS
= offS
+ typeSizeof(st
->dType
);
1659 int32_t endR
= offR
+ typeSizeof(ri
->dType
);
1661 rec
->size
= MAX2(endS
, endR
) - MIN2(offS
, offR
);
1663 st
->takeExtraSources(0, extra
);
1669 // get non-replaced sources of ri
1670 for (s
= 1; offR
< offS
; offR
+= ri
->getSrc(s
)->reg
.size
, ++s
)
1671 vals
[k
++] = ri
->getSrc(s
);
1673 // get replaced sources of st
1674 for (s
= 1; st
->srcExists(s
); offS
+= st
->getSrc(s
)->reg
.size
, ++s
)
1675 vals
[k
++] = st
->getSrc(s
);
1676 // skip replaced sources of ri
1677 for (s
= n
; offR
< endS
; offR
+= ri
->getSrc(s
)->reg
.size
, ++s
);
1678 // get non-replaced sources after values covered by st
1679 for (; offR
< endR
; offR
+= ri
->getSrc(s
)->reg
.size
, ++s
)
1680 vals
[k
++] = ri
->getSrc(s
);
1681 assert((unsigned int)k
<= Elements(vals
));
1682 for (s
= 0; s
< k
; ++s
)
1683 st
->setSrc(s
+ 1, vals
[s
]);
1684 st
->setSrc(0, ri
->getSrc(0));
1688 for (j
= 1; offR
< endS
; offR
+= ri
->getSrc(j
++)->reg
.size
);
1689 for (s
= 1; offS
< endS
; offS
+= st
->getSrc(s
++)->reg
.size
);
1690 for (; offR
< endR
; offR
+= ri
->getSrc(j
++)->reg
.size
)
1691 st
->setSrc(s
++, ri
->getSrc(j
));
1693 st
->putExtraSources(0, extra
);
1695 delete_Instruction(prog
, rec
->insn
);
1698 rec
->offset
= st
->getSrc(0)->reg
.data
.offset
;
1700 st
->setType(typeOfSize(rec
->size
));
1706 MemoryOpt::Record::overlaps(const Instruction
*ldst
) const
1711 if (this->fileIndex
!= that
.fileIndex
)
1714 if (this->rel
[0] || that
.rel
[0])
1715 return this->base
== that
.base
;
1717 (this->offset
< that
.offset
+ that
.size
) &&
1718 (this->offset
+ this->size
> that
.offset
);
1721 // We must not eliminate stores that affect the result of @ld if
1722 // we find later stores to the same location, and we may no longer
1723 // merge them with later stores.
1724 // The stored value can, however, still be used to determine the value
1725 // returned by future loads.
1727 MemoryOpt::lockStores(Instruction
*const ld
)
1729 for (Record
*r
= stores
[ld
->src(0).getFile()]; r
; r
= r
->next
)
1730 if (!r
->locked
&& r
->overlaps(ld
))
1734 // Prior loads from the location of @st are no longer valid.
1735 // Stores to the location of @st may no longer be used to derive
1736 // the value at it nor be coalesced into later stores.
1738 MemoryOpt::purgeRecords(Instruction
*const st
, DataFile f
)
1741 f
= st
->src(0).getFile();
1743 for (Record
*r
= loads
[f
]; r
; r
= r
->next
)
1744 if (!st
|| r
->overlaps(st
))
1745 r
->unlink(&loads
[f
]);
1747 for (Record
*r
= stores
[f
]; r
; r
= r
->next
)
1748 if (!st
|| r
->overlaps(st
))
1749 r
->unlink(&stores
[f
]);
1753 MemoryOpt::visit(BasicBlock
*bb
)
1755 bool ret
= runOpt(bb
);
1756 // Run again, one pass won't combine 4 32 bit ld/st to a single 128 bit ld/st
1757 // where 96 bit memory operations are forbidden.
1764 MemoryOpt::runOpt(BasicBlock
*bb
)
1766 Instruction
*ldst
, *next
;
1768 bool isAdjacent
= true;
1770 for (ldst
= bb
->getEntry(); ldst
; ldst
= next
) {
1775 if (ldst
->op
== OP_LOAD
|| ldst
->op
== OP_VFETCH
) {
1776 if (ldst
->isDead()) {
1777 // might have been produced by earlier optimization
1778 delete_Instruction(prog
, ldst
);
1782 if (ldst
->op
== OP_STORE
|| ldst
->op
== OP_EXPORT
) {
1785 // TODO: maybe have all fixed ops act as barrier ?
1786 if (ldst
->op
== OP_CALL
||
1787 ldst
->op
== OP_BAR
||
1788 ldst
->op
== OP_MEMBAR
) {
1789 purgeRecords(NULL
, FILE_MEMORY_LOCAL
);
1790 purgeRecords(NULL
, FILE_MEMORY_GLOBAL
);
1791 purgeRecords(NULL
, FILE_MEMORY_SHARED
);
1792 purgeRecords(NULL
, FILE_SHADER_OUTPUT
);
1794 if (ldst
->op
== OP_ATOM
|| ldst
->op
== OP_CCTL
) {
1795 if (ldst
->src(0).getFile() == FILE_MEMORY_GLOBAL
) {
1796 purgeRecords(NULL
, FILE_MEMORY_LOCAL
);
1797 purgeRecords(NULL
, FILE_MEMORY_GLOBAL
);
1798 purgeRecords(NULL
, FILE_MEMORY_SHARED
);
1800 purgeRecords(NULL
, ldst
->src(0).getFile());
1803 if (ldst
->op
== OP_EMIT
|| ldst
->op
== OP_RESTART
) {
1804 purgeRecords(NULL
, FILE_SHADER_OUTPUT
);
1808 if (ldst
->getPredicate()) // TODO: handle predicated ld/st
1812 DataFile file
= ldst
->src(0).getFile();
1814 // if ld l[]/g[] look for previous store to eliminate the reload
1815 if (file
== FILE_MEMORY_GLOBAL
|| file
== FILE_MEMORY_LOCAL
) {
1816 // TODO: shared memory ?
1817 rec
= findRecord(ldst
, false, isAdjacent
);
1818 if (rec
&& !isAdjacent
)
1819 keep
= !replaceLdFromSt(ldst
, rec
);
1822 // or look for ld from the same location and replace this one
1823 rec
= keep
? findRecord(ldst
, true, isAdjacent
) : NULL
;
1826 keep
= !replaceLdFromLd(ldst
, rec
);
1828 // or combine a previous load with this one
1829 keep
= !combineLd(rec
, ldst
);
1834 rec
= findRecord(ldst
, false, isAdjacent
);
1837 keep
= !replaceStFromSt(ldst
, rec
);
1839 keep
= !combineSt(rec
, ldst
);
1842 purgeRecords(ldst
, DATA_FILE_COUNT
);
1852 // =============================================================================
1854 // Turn control flow into predicated instructions (after register allocation !).
1856 // Could move this to before register allocation on NVC0 and also handle nested
1858 class FlatteningPass
: public Pass
1861 virtual bool visit(BasicBlock
*);
1863 bool tryPredicateConditional(BasicBlock
*);
1864 void predicateInstructions(BasicBlock
*, Value
*pred
, CondCode cc
);
1865 void tryPropagateBranch(BasicBlock
*);
1866 inline bool isConstantCondition(Value
*pred
);
1867 inline bool mayPredicate(const Instruction
*, const Value
*pred
) const;
1868 inline void removeFlow(Instruction
*);
1872 FlatteningPass::isConstantCondition(Value
*pred
)
1874 Instruction
*insn
= pred
->getUniqueInsn();
1876 if (insn
->op
!= OP_SET
|| insn
->srcExists(2))
1879 for (int s
= 0; s
< 2 && insn
->srcExists(s
); ++s
) {
1880 Instruction
*ld
= insn
->getSrc(s
)->getUniqueInsn();
1883 if (ld
->op
!= OP_MOV
&& ld
->op
!= OP_LOAD
)
1885 if (ld
->src(0).isIndirect(0))
1887 file
= ld
->src(0).getFile();
1889 file
= insn
->src(s
).getFile();
1890 // catch $r63 on NVC0
1891 if (file
== FILE_GPR
&& insn
->getSrc(s
)->reg
.data
.id
> prog
->maxGPR
)
1892 file
= FILE_IMMEDIATE
;
1894 if (file
!= FILE_IMMEDIATE
&& file
!= FILE_MEMORY_CONST
)
1901 FlatteningPass::removeFlow(Instruction
*insn
)
1903 FlowInstruction
*term
= insn
? insn
->asFlow() : NULL
;
1906 Graph::Edge::Type ty
= term
->bb
->cfg
.outgoing().getType();
1908 if (term
->op
== OP_BRA
) {
1909 // TODO: this might get more difficult when we get arbitrary BRAs
1910 if (ty
== Graph::Edge::CROSS
|| ty
== Graph::Edge::BACK
)
1913 if (term
->op
!= OP_JOIN
)
1916 Value
*pred
= term
->getPredicate();
1918 delete_Instruction(prog
, term
);
1920 if (pred
&& pred
->refCount() == 0) {
1921 Instruction
*pSet
= pred
->getUniqueInsn();
1922 pred
->join
->reg
.data
.id
= -1; // deallocate
1924 delete_Instruction(prog
, pSet
);
1929 FlatteningPass::predicateInstructions(BasicBlock
*bb
, Value
*pred
, CondCode cc
)
1931 for (Instruction
*i
= bb
->getEntry(); i
; i
= i
->next
) {
1934 assert(!i
->getPredicate());
1935 i
->setPredicate(cc
, pred
);
1937 removeFlow(bb
->getExit());
1941 FlatteningPass::mayPredicate(const Instruction
*insn
, const Value
*pred
) const
1943 if (insn
->isPseudo())
1945 // TODO: calls where we don't know which registers are modified
1947 if (!prog
->getTarget()->mayPredicate(insn
, pred
))
1949 for (int d
= 0; insn
->defExists(d
); ++d
)
1950 if (insn
->getDef(d
)->equals(pred
))
1955 // If we jump to BRA/RET/EXIT, replace the jump with it.
1956 // NOTE: We do not update the CFG anymore here !
1958 // TODO: Handle cases where we skip over a branch (maybe do that elsewhere ?):
1960 // @p0 bra BB:2 -> @!p0 bra BB:3 iff (!) BB:2 immediately adjoins BB:1
1968 FlatteningPass::tryPropagateBranch(BasicBlock
*bb
)
1970 for (Instruction
*i
= bb
->getExit(); i
&& i
->op
== OP_BRA
; i
= i
->prev
) {
1971 BasicBlock
*bf
= i
->asFlow()->target
.bb
;
1973 if (bf
->getInsnCount() != 1)
1976 FlowInstruction
*bra
= i
->asFlow();
1977 FlowInstruction
*rep
= bf
->getExit()->asFlow();
1979 if (!rep
|| rep
->getPredicate())
1981 if (rep
->op
!= OP_BRA
&&
1982 rep
->op
!= OP_JOIN
&&
1986 // TODO: If there are multiple branches to @rep, only the first would
1987 // be replaced, so only remove them after this pass is done ?
1988 // Also, need to check all incident blocks for fall-through exits and
1989 // add the branch there.
1991 bra
->target
.bb
= rep
->target
.bb
;
1992 if (bf
->cfg
.incidentCount() == 1)
1998 FlatteningPass::visit(BasicBlock
*bb
)
2000 if (tryPredicateConditional(bb
))
2003 // try to attach join to previous instruction
2004 Instruction
*insn
= bb
->getExit();
2005 if (insn
&& insn
->op
== OP_JOIN
&& !insn
->getPredicate()) {
2007 if (insn
&& !insn
->getPredicate() &&
2009 insn
->op
!= OP_TEXBAR
&&
2010 !isTextureOp(insn
->op
) && // probably just nve4
2011 !isSurfaceOp(insn
->op
) && // not confirmed
2012 insn
->op
!= OP_LINTERP
&& // probably just nve4
2013 insn
->op
!= OP_PINTERP
&& // probably just nve4
2014 ((insn
->op
!= OP_LOAD
&& insn
->op
!= OP_STORE
) ||
2015 typeSizeof(insn
->dType
) <= 4) &&
2018 bb
->remove(bb
->getExit());
2023 tryPropagateBranch(bb
);
2029 FlatteningPass::tryPredicateConditional(BasicBlock
*bb
)
2031 BasicBlock
*bL
= NULL
, *bR
= NULL
;
2032 unsigned int nL
= 0, nR
= 0, limit
= 12;
2036 mask
= bb
->initiatesSimpleConditional();
2040 assert(bb
->getExit());
2041 Value
*pred
= bb
->getExit()->getPredicate();
2044 if (isConstantCondition(pred
))
2047 Graph::EdgeIterator ei
= bb
->cfg
.outgoing();
2050 bL
= BasicBlock::get(ei
.getNode());
2051 for (insn
= bL
->getEntry(); insn
; insn
= insn
->next
, ++nL
)
2052 if (!mayPredicate(insn
, pred
))
2055 return false; // too long, do a real branch
2060 bR
= BasicBlock::get(ei
.getNode());
2061 for (insn
= bR
->getEntry(); insn
; insn
= insn
->next
, ++nR
)
2062 if (!mayPredicate(insn
, pred
))
2065 return false; // too long, do a real branch
2069 predicateInstructions(bL
, pred
, bb
->getExit()->cc
);
2071 predicateInstructions(bR
, pred
, inverseCondCode(bb
->getExit()->cc
));
2074 bb
->remove(bb
->joinAt
);
2077 removeFlow(bb
->getExit()); // delete the branch/join at the fork point
2079 // remove potential join operations at the end of the conditional
2080 if (prog
->getTarget()->joinAnterior
) {
2081 bb
= BasicBlock::get((bL
? bL
: bR
)->cfg
.outgoing().getNode());
2082 if (bb
->getEntry() && bb
->getEntry()->op
== OP_JOIN
)
2083 removeFlow(bb
->getEntry());
2089 // =============================================================================
2091 // Common subexpression elimination. Stupid O^2 implementation.
2092 class LocalCSE
: public Pass
2095 virtual bool visit(BasicBlock
*);
2097 inline bool tryReplace(Instruction
**, Instruction
*);
2099 DLList ops
[OP_LAST
+ 1];
2102 class GlobalCSE
: public Pass
2105 virtual bool visit(BasicBlock
*);
2109 Instruction::isActionEqual(const Instruction
*that
) const
2111 if (this->op
!= that
->op
||
2112 this->dType
!= that
->dType
||
2113 this->sType
!= that
->sType
)
2115 if (this->cc
!= that
->cc
)
2118 if (this->asTex()) {
2119 if (memcmp(&this->asTex()->tex
,
2120 &that
->asTex()->tex
,
2121 sizeof(this->asTex()->tex
)))
2124 if (this->asCmp()) {
2125 if (this->asCmp()->setCond
!= that
->asCmp()->setCond
)
2128 if (this->asFlow()) {
2131 if (this->ipa
!= that
->ipa
||
2132 this->lanes
!= that
->lanes
||
2133 this->perPatch
!= that
->perPatch
)
2135 if (this->postFactor
!= that
->postFactor
)
2139 if (this->subOp
!= that
->subOp
||
2140 this->saturate
!= that
->saturate
||
2141 this->rnd
!= that
->rnd
||
2142 this->ftz
!= that
->ftz
||
2143 this->dnz
!= that
->dnz
||
2144 this->cache
!= that
->cache
||
2145 this->mask
!= that
->mask
)
2152 Instruction::isResultEqual(const Instruction
*that
) const
2156 // NOTE: location of discard only affects tex with liveOnly and quadops
2157 if (!this->defExists(0) && this->op
!= OP_DISCARD
)
2160 if (!isActionEqual(that
))
2163 if (this->predSrc
!= that
->predSrc
)
2166 for (d
= 0; this->defExists(d
); ++d
) {
2167 if (!that
->defExists(d
) ||
2168 !this->getDef(d
)->equals(that
->getDef(d
), false))
2171 if (that
->defExists(d
))
2174 for (s
= 0; this->srcExists(s
); ++s
) {
2175 if (!that
->srcExists(s
))
2177 if (this->src(s
).mod
!= that
->src(s
).mod
)
2179 if (!this->getSrc(s
)->equals(that
->getSrc(s
), true))
2182 if (that
->srcExists(s
))
2185 if (op
== OP_LOAD
|| op
== OP_VFETCH
) {
2186 switch (src(0).getFile()) {
2187 case FILE_MEMORY_CONST
:
2188 case FILE_SHADER_INPUT
:
2198 // pull through common expressions from different in-blocks
2200 GlobalCSE::visit(BasicBlock
*bb
)
2202 Instruction
*phi
, *next
, *ik
;
2205 // TODO: maybe do this with OP_UNION, too
2207 for (phi
= bb
->getPhi(); phi
&& phi
->op
== OP_PHI
; phi
= next
) {
2209 if (phi
->getSrc(0)->refCount() > 1)
2211 ik
= phi
->getSrc(0)->getInsn();
2213 continue; // probably a function input
2214 for (s
= 1; phi
->srcExists(s
); ++s
) {
2215 if (phi
->getSrc(s
)->refCount() > 1)
2217 if (!phi
->getSrc(s
)->getInsn() ||
2218 !phi
->getSrc(s
)->getInsn()->isResultEqual(ik
))
2221 if (!phi
->srcExists(s
)) {
2222 Instruction
*entry
= bb
->getEntry();
2224 if (!entry
|| entry
->op
!= OP_JOIN
)
2227 bb
->insertAfter(entry
, ik
);
2228 ik
->setDef(0, phi
->getDef(0));
2229 delete_Instruction(prog
, phi
);
2237 LocalCSE::tryReplace(Instruction
**ptr
, Instruction
*i
)
2239 Instruction
*old
= *ptr
;
2241 // TODO: maybe relax this later (causes trouble with OP_UNION)
2242 if (i
->isPredicated())
2245 if (!old
->isResultEqual(i
))
2248 for (int d
= 0; old
->defExists(d
); ++d
)
2249 old
->def(d
).replace(i
->getDef(d
), false);
2250 delete_Instruction(prog
, old
);
2256 LocalCSE::visit(BasicBlock
*bb
)
2258 unsigned int replaced
;
2261 Instruction
*ir
, *next
;
2265 // will need to know the order of instructions
2267 for (ir
= bb
->getFirst(); ir
; ir
= ir
->next
)
2268 ir
->serial
= serial
++;
2270 for (ir
= bb
->getEntry(); ir
; ir
= next
) {
2277 ops
[ir
->op
].insert(ir
);
2281 for (s
= 0; ir
->srcExists(s
); ++s
)
2282 if (ir
->getSrc(s
)->asLValue())
2283 if (!src
|| ir
->getSrc(s
)->refCount() < src
->refCount())
2284 src
= ir
->getSrc(s
);
2287 for (Value::UseIterator it
= src
->uses
.begin();
2288 it
!= src
->uses
.end(); ++it
) {
2289 Instruction
*ik
= (*it
)->getInsn();
2290 if (ik
&& ik
->bb
== ir
->bb
&& ik
->serial
< ir
->serial
)
2291 if (tryReplace(&ir
, ik
))
2295 DLLIST_FOR_EACH(&ops
[ir
->op
], iter
)
2297 Instruction
*ik
= reinterpret_cast<Instruction
*>(iter
.get());
2298 if (tryReplace(&ir
, ik
))
2304 ops
[ir
->op
].insert(ir
);
2308 for (unsigned int i
= 0; i
<= OP_LAST
; ++i
)
2316 // =============================================================================
2318 // Remove computations of unused values.
2319 class DeadCodeElim
: public Pass
2322 bool buryAll(Program
*);
2325 virtual bool visit(BasicBlock
*);
2327 void checkSplitLoad(Instruction
*ld
); // for partially dead loads
2329 unsigned int deadCount
;
2333 DeadCodeElim::buryAll(Program
*prog
)
2337 if (!this->run(prog
, false, false))
2339 } while (deadCount
);
2345 DeadCodeElim::visit(BasicBlock
*bb
)
2349 for (Instruction
*i
= bb
->getFirst(); i
; i
= next
) {
2353 delete_Instruction(prog
, i
);
2355 if (i
->defExists(1) && (i
->op
== OP_VFETCH
|| i
->op
== OP_LOAD
)) {
2358 if (i
->defExists(0) && !i
->getDef(0)->refCount()) {
2359 if (i
->op
== OP_ATOM
||
2360 i
->op
== OP_SUREDP
||
2369 DeadCodeElim::checkSplitLoad(Instruction
*ld1
)
2371 Instruction
*ld2
= NULL
; // can get at most 2 loads
2374 int32_t addr1
, addr2
;
2375 int32_t size1
, size2
;
2377 uint32_t mask
= 0xffffffff;
2379 for (d
= 0; ld1
->defExists(d
); ++d
)
2380 if (!ld1
->getDef(d
)->refCount() && ld1
->getDef(d
)->reg
.data
.id
< 0)
2382 if (mask
== 0xffffffff)
2385 addr1
= ld1
->getSrc(0)->reg
.data
.offset
;
2388 for (d
= 0; ld1
->defExists(d
); ++d
) {
2389 if (mask
& (1 << d
)) {
2390 if (size1
&& (addr1
& 0x7))
2392 def1
[n1
] = ld1
->getDef(d
);
2393 size1
+= def1
[n1
++]->reg
.size
;
2396 addr1
+= ld1
->getDef(d
)->reg
.size
;
2401 for (addr2
= addr1
+ size1
; ld1
->defExists(d
); ++d
) {
2402 if (mask
& (1 << d
)) {
2403 def2
[n2
] = ld1
->getDef(d
);
2404 size2
+= def2
[n2
++]->reg
.size
;
2407 addr2
+= ld1
->getDef(d
)->reg
.size
;
2411 updateLdStOffset(ld1
, addr1
, func
);
2412 ld1
->setType(typeOfSize(size1
));
2413 for (d
= 0; d
< 4; ++d
)
2414 ld1
->setDef(d
, (d
< n1
) ? def1
[d
] : NULL
);
2419 ld2
= cloneShallow(func
, ld1
);
2420 updateLdStOffset(ld2
, addr2
, func
);
2421 ld2
->setType(typeOfSize(size2
));
2422 for (d
= 0; d
< 4; ++d
)
2423 ld2
->setDef(d
, (d
< n2
) ? def2
[d
] : NULL
);
2425 ld1
->bb
->insertAfter(ld1
, ld2
);
2428 // =============================================================================
2430 #define RUN_PASS(l, n, f) \
2431 if (level >= (l)) { \
2432 if (dbgFlags & NV50_IR_DEBUG_VERBOSE) \
2433 INFO("PEEPHOLE: %s\n", #n); \
2435 if (!pass.f(this)) \
2440 Program::optimizeSSA(int level
)
2442 RUN_PASS(1, DeadCodeElim
, buryAll
);
2443 RUN_PASS(1, CopyPropagation
, run
);
2444 RUN_PASS(2, GlobalCSE
, run
);
2445 RUN_PASS(1, LocalCSE
, run
);
2446 RUN_PASS(2, AlgebraicOpt
, run
);
2447 RUN_PASS(2, ModifierFolding
, run
); // before load propagation -> less checks
2448 RUN_PASS(1, ConstantFolding
, foldAll
);
2449 RUN_PASS(1, LoadPropagation
, run
);
2450 RUN_PASS(2, MemoryOpt
, run
);
2451 RUN_PASS(2, LocalCSE
, run
);
2452 RUN_PASS(0, DeadCodeElim
, buryAll
);
2458 Program::optimizePostRA(int level
)
2460 RUN_PASS(2, FlatteningPass
, run
);