2 * Copyright 2011 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 #include "nv50_ir_target.h"
25 #include "nv50_ir_build_util.h"
28 #include "util/u_math.h"
34 Instruction::isNop() const
36 if (op
== OP_CONSTRAINT
|| op
== OP_PHI
)
38 if (terminator
|| join
) // XXX: should terminator imply flow ?
40 if (!fixed
&& op
== OP_NOP
)
43 if (def
[0].exists() && def
[0].rep()->reg
.data
.id
< 0) {
44 for (int d
= 1; defExists(d
); ++d
)
45 if (def
[d
].rep()->reg
.data
.id
>= 0)
46 WARN("part of vector result is unused !\n");
50 if (op
== OP_MOV
|| op
== OP_UNION
) {
51 if (!def
[0].rep()->equals(getSrc(0)))
54 if (!def
[0].rep()->equals(getSrc(1)))
62 bool Instruction::isDead() const
68 for (int d
= 0; defExists(d
); ++d
)
69 if (getDef(d
)->refCount() || getDef(d
)->reg
.data
.id
>= 0)
72 if (terminator
|| asFlow())
80 // =============================================================================
82 class CopyPropagation
: public Pass
85 virtual bool visit(BasicBlock
*);
88 // Propagate all MOVs forward to make subsequent optimization easier, except if
89 // the sources stem from a phi, in which case we don't want to mess up potential
90 // swaps $rX <-> $rY, i.e. do not create live range overlaps of phi src and def.
92 CopyPropagation::visit(BasicBlock
*bb
)
94 Instruction
*mov
, *si
, *next
;
96 for (mov
= bb
->getEntry(); mov
; mov
= next
) {
98 if (mov
->op
!= OP_MOV
|| mov
->fixed
|| !mov
->getSrc(0)->asLValue())
100 si
= mov
->getSrc(0)->getInsn();
101 if (mov
->getDef(0)->reg
.data
.id
< 0 && si
&& si
->op
!= OP_PHI
) {
103 mov
->def
[0].replace(mov
->getSrc(0), false);
104 delete_Instruction(prog
, mov
);
110 // =============================================================================
112 class LoadPropagation
: public Pass
115 virtual bool visit(BasicBlock
*);
117 void checkSwapSrc01(Instruction
*);
119 bool isCSpaceLoad(Instruction
*);
120 bool isImmd32Load(Instruction
*);
124 LoadPropagation::isCSpaceLoad(Instruction
*ld
)
126 return ld
&& ld
->op
== OP_LOAD
&& ld
->src
[0].getFile() == FILE_MEMORY_CONST
;
130 LoadPropagation::isImmd32Load(Instruction
*ld
)
132 if (!ld
|| (ld
->op
!= OP_MOV
) || (typeSizeof(ld
->dType
) != 4))
134 return ld
->src
[0].getFile() == FILE_IMMEDIATE
;
138 LoadPropagation::checkSwapSrc01(Instruction
*insn
)
140 if (!prog
->getTarget()->getOpInfo(insn
).commutative
)
141 if (insn
->op
!= OP_SET
&& insn
->op
!= OP_SLCT
)
143 if (insn
->src
[1].getFile() != FILE_GPR
)
146 Instruction
*i0
= insn
->getSrc(0)->getInsn();
147 Instruction
*i1
= insn
->getSrc(1)->getInsn();
149 if (isCSpaceLoad(i0
)) {
150 if (!isCSpaceLoad(i1
))
151 insn
->swapSources(0, 1);
155 if (isImmd32Load(i0
)) {
156 if (!isCSpaceLoad(i1
) && !isImmd32Load(i1
))
157 insn
->swapSources(0, 1);
164 if (insn
->op
== OP_SET
)
165 insn
->asCmp()->setCond
= reverseCondCode(insn
->asCmp()->setCond
);
167 if (insn
->op
== OP_SLCT
)
168 insn
->asCmp()->setCond
= inverseCondCode(insn
->asCmp()->setCond
);
172 LoadPropagation::visit(BasicBlock
*bb
)
174 const Target
*targ
= prog
->getTarget();
177 for (Instruction
*i
= bb
->getEntry(); i
; i
= next
) {
183 for (int s
= 0; i
->srcExists(s
); ++s
) {
184 Instruction
*ld
= i
->getSrc(s
)->getInsn();
186 if (!ld
|| ld
->fixed
|| (ld
->op
!= OP_LOAD
&& ld
->op
!= OP_MOV
))
188 if (!targ
->insnCanLoad(i
, s
, ld
))
192 i
->setSrc(s
, ld
->getSrc(0));
193 if (ld
->src
[0].isIndirect(0))
194 i
->setIndirect(s
, 0, ld
->getIndirect(0, 0));
196 if (ld
->getDef(0)->refCount() == 0)
197 delete_Instruction(prog
, ld
);
203 // =============================================================================
205 // Evaluate constant expressions.
206 class ConstantFolding
: public Pass
209 bool foldAll(Program
*);
212 virtual bool visit(BasicBlock
*);
214 void expr(Instruction
*, ImmediateValue
*, ImmediateValue
*);
215 void opnd(Instruction
*, ImmediateValue
*, int s
);
217 void unary(Instruction
*, const ImmediateValue
&);
219 // TGSI 'true' is converted to -1 by F2I(NEG(SET)), track back to SET
220 CmpInstruction
*findOriginForTestWithZero(Value
*);
222 unsigned int foldCount
;
227 // TODO: remember generated immediates and only revisit these
229 ConstantFolding::foldAll(Program
*prog
)
231 unsigned int iterCount
= 0;
236 } while (foldCount
&& ++iterCount
< 2);
241 ConstantFolding::visit(BasicBlock
*bb
)
243 Instruction
*i
, *next
;
245 for (i
= bb
->getEntry(); i
; i
= next
) {
247 if (i
->op
== OP_MOV
) // continue early, MOV appears frequently
250 ImmediateValue
*src0
= i
->src
[0].getImmediate();
251 ImmediateValue
*src1
= i
->src
[1].getImmediate();
266 ConstantFolding::findOriginForTestWithZero(Value
*value
)
270 Instruction
*insn
= value
->getInsn();
272 while (insn
&& insn
->op
!= OP_SET
) {
273 Instruction
*next
= NULL
;
278 next
= insn
->getSrc(0)->getInsn();
279 if (insn
->sType
!= next
->dType
)
283 next
= insn
->getSrc(0)->getInsn();
290 return insn
? insn
->asCmp() : NULL
;
294 Modifier::applyTo(ImmediateValue
& imm
) const
296 switch (imm
.reg
.type
) {
298 if (bits
& NV50_IR_MOD_ABS
)
299 imm
.reg
.data
.f32
= fabsf(imm
.reg
.data
.f32
);
300 if (bits
& NV50_IR_MOD_NEG
)
301 imm
.reg
.data
.f32
= -imm
.reg
.data
.f32
;
302 if (bits
& NV50_IR_MOD_SAT
) {
303 if (imm
.reg
.data
.f32
< 0.0f
)
304 imm
.reg
.data
.f32
= 0.0f
;
306 if (imm
.reg
.data
.f32
> 1.0f
)
307 imm
.reg
.data
.f32
= 1.0f
;
309 assert(!(bits
& NV50_IR_MOD_NOT
));
312 case TYPE_S8
: // NOTE: will be extended
315 case TYPE_U8
: // NOTE: treated as signed
318 if (bits
& NV50_IR_MOD_ABS
)
319 imm
.reg
.data
.s32
= (imm
.reg
.data
.s32
>= 0) ?
320 imm
.reg
.data
.s32
: -imm
.reg
.data
.s32
;
321 if (bits
& NV50_IR_MOD_NEG
)
322 imm
.reg
.data
.s32
= -imm
.reg
.data
.s32
;
323 if (bits
& NV50_IR_MOD_NOT
)
324 imm
.reg
.data
.s32
= ~imm
.reg
.data
.s32
;
328 if (bits
& NV50_IR_MOD_ABS
)
329 imm
.reg
.data
.f64
= fabs(imm
.reg
.data
.f64
);
330 if (bits
& NV50_IR_MOD_NEG
)
331 imm
.reg
.data
.f64
= -imm
.reg
.data
.f64
;
332 if (bits
& NV50_IR_MOD_SAT
) {
333 if (imm
.reg
.data
.f64
< 0.0)
334 imm
.reg
.data
.f64
= 0.0;
336 if (imm
.reg
.data
.f64
> 1.0)
337 imm
.reg
.data
.f64
= 1.0;
339 assert(!(bits
& NV50_IR_MOD_NOT
));
343 assert(!"invalid/unhandled type");
344 imm
.reg
.data
.u64
= 0;
350 Modifier::getOp() const
353 case NV50_IR_MOD_ABS
: return OP_ABS
;
354 case NV50_IR_MOD_NEG
: return OP_NEG
;
355 case NV50_IR_MOD_SAT
: return OP_SAT
;
356 case NV50_IR_MOD_NOT
: return OP_NOT
;
365 ConstantFolding::expr(Instruction
*i
,
366 ImmediateValue
*src0
, ImmediateValue
*src1
)
368 ImmediateValue
imm0(src0
, i
->sType
);
369 ImmediateValue
imm1(src1
, i
->sType
);
371 struct Storage
*const a
= &imm0
.reg
, *const b
= &imm1
.reg
;
373 i
->src
[0].mod
.applyTo(imm0
);
374 i
->src
[1].mod
.applyTo(imm1
);
380 if (i
->dnz
&& i
->dType
== TYPE_F32
) {
381 if (!isfinite(a
->data
.f32
))
383 if (!isfinite(b
->data
.f32
))
387 case TYPE_F32
: res
.data
.f32
= a
->data
.f32
* b
->data
.f32
; break;
388 case TYPE_F64
: res
.data
.f64
= a
->data
.f64
* b
->data
.f64
; break;
390 case TYPE_U32
: res
.data
.u32
= a
->data
.u32
* b
->data
.u32
; break;
396 if (b
->data
.u32
== 0)
399 case TYPE_F32
: res
.data
.f32
= a
->data
.f32
/ b
->data
.f32
; break;
400 case TYPE_F64
: res
.data
.f64
= a
->data
.f64
/ b
->data
.f64
; break;
401 case TYPE_S32
: res
.data
.s32
= a
->data
.s32
/ b
->data
.s32
; break;
402 case TYPE_U32
: res
.data
.u32
= a
->data
.u32
/ b
->data
.u32
; break;
409 case TYPE_F32
: res
.data
.f32
= a
->data
.f32
+ b
->data
.f32
; break;
410 case TYPE_F64
: res
.data
.f64
= a
->data
.f64
+ b
->data
.f64
; break;
412 case TYPE_U32
: res
.data
.u32
= a
->data
.u32
+ b
->data
.u32
; break;
419 case TYPE_F32
: res
.data
.f32
= pow(a
->data
.f32
, b
->data
.f32
); break;
420 case TYPE_F64
: res
.data
.f64
= pow(a
->data
.f64
, b
->data
.f64
); break;
427 case TYPE_F32
: res
.data
.f32
= MAX2(a
->data
.f32
, b
->data
.f32
); break;
428 case TYPE_F64
: res
.data
.f64
= MAX2(a
->data
.f64
, b
->data
.f64
); break;
429 case TYPE_S32
: res
.data
.s32
= MAX2(a
->data
.s32
, b
->data
.s32
); break;
430 case TYPE_U32
: res
.data
.u32
= MAX2(a
->data
.u32
, b
->data
.u32
); break;
437 case TYPE_F32
: res
.data
.f32
= MIN2(a
->data
.f32
, b
->data
.f32
); break;
438 case TYPE_F64
: res
.data
.f64
= MIN2(a
->data
.f64
, b
->data
.f64
); break;
439 case TYPE_S32
: res
.data
.s32
= MIN2(a
->data
.s32
, b
->data
.s32
); break;
440 case TYPE_U32
: res
.data
.u32
= MIN2(a
->data
.u32
, b
->data
.u32
); break;
446 res
.data
.u64
= a
->data
.u64
& b
->data
.u64
;
449 res
.data
.u64
= a
->data
.u64
| b
->data
.u64
;
452 res
.data
.u64
= a
->data
.u64
^ b
->data
.u64
;
455 res
.data
.u32
= a
->data
.u32
<< b
->data
.u32
;
459 case TYPE_S32
: res
.data
.s32
= a
->data
.s32
>> b
->data
.u32
; break;
460 case TYPE_U32
: res
.data
.u32
= a
->data
.u32
>> b
->data
.u32
; break;
466 if (a
->data
.u32
!= b
->data
.u32
)
468 res
.data
.u32
= a
->data
.u32
;
475 i
->src
[0].mod
= Modifier(0);
476 i
->src
[1].mod
= Modifier(0);
478 i
->setSrc(0, new_ImmediateValue(i
->bb
->getProgram(), res
.data
.u32
));
481 i
->getSrc(0)->reg
.data
= res
.data
;
483 if (i
->op
== OP_MAD
|| i
->op
== OP_FMA
) {
486 i
->setSrc(1, i
->getSrc(0));
487 i
->setSrc(0, i
->getSrc(2));
490 i
->src
[1].mod
= i
->src
[2].mod
;
492 src0
= i
->src
[0].getImmediate();
494 expr(i
, src0
, i
->getSrc(1)->asImm());
501 ConstantFolding::unary(Instruction
*i
, const ImmediateValue
&imm
)
505 if (i
->dType
!= TYPE_F32
)
508 case OP_NEG
: res
.data
.f32
= -imm
.reg
.data
.f32
; break;
509 case OP_ABS
: res
.data
.f32
= fabsf(imm
.reg
.data
.f32
); break;
510 case OP_RCP
: res
.data
.f32
= 1.0f
/ imm
.reg
.data
.f32
; break;
511 case OP_RSQ
: res
.data
.f32
= 1.0f
/ sqrtf(imm
.reg
.data
.f32
); break;
512 case OP_LG2
: res
.data
.f32
= log2f(imm
.reg
.data
.f32
); break;
513 case OP_EX2
: res
.data
.f32
= exp2f(imm
.reg
.data
.f32
); break;
514 case OP_SIN
: res
.data
.f32
= sinf(imm
.reg
.data
.f32
); break;
515 case OP_COS
: res
.data
.f32
= cosf(imm
.reg
.data
.f32
); break;
516 case OP_SQRT
: res
.data
.f32
= sqrtf(imm
.reg
.data
.f32
); break;
519 // these should be handled in subsequent OP_SIN/COS/EX2
520 res
.data
.f32
= imm
.reg
.data
.f32
;
526 i
->setSrc(0, new_ImmediateValue(i
->bb
->getProgram(), res
.data
.f32
));
527 i
->src
[0].mod
= Modifier(0);
531 ConstantFolding::opnd(Instruction
*i
, ImmediateValue
*src
, int s
)
534 const operation op
= i
->op
;
536 ImmediateValue
imm(src
, i
->sType
);
538 i
->src
[s
].mod
.applyTo(imm
);
542 if (i
->dType
== TYPE_F32
&& i
->getSrc(t
)->refCount() == 1) {
543 Instruction
*si
= i
->getSrc(t
)->getUniqueInsn();
545 if (si
&& si
->op
== OP_MUL
) {
546 float f
= imm
.reg
.data
.f32
;
548 if (si
->src
[1].getImmediate()) {
549 f
*= si
->src
[1].getImmediate()->reg
.data
.f32
;
550 si
->setSrc(1, new_ImmediateValue(prog
, f
));
551 i
->def
[0].replace(i
->getSrc(t
), false);
555 if (f
== 0.125f
) fac
= -3;
557 if (f
== 0.250f
) fac
= -2;
559 if (f
== 0.500f
) fac
= -1;
561 if (f
== 2.000f
) fac
= +1;
563 if (f
== 4.000f
) fac
= +2;
565 if (f
== 8.000f
) fac
= +3;
569 // FIXME: allowed & modifier
570 si
->postFactor
= fac
;
571 i
->def
[0].replace(i
->getSrc(t
), false);
577 if (imm
.isInteger(0)) {
579 i
->setSrc(0, i
->getSrc(s
));
582 if (imm
.isInteger(1) || imm
.isInteger(-1)) {
583 if (imm
.isNegative())
584 i
->src
[t
].mod
= i
->src
[t
].mod
^ Modifier(NV50_IR_MOD_NEG
);
585 i
->op
= i
->src
[t
].mod
.getOp();
587 i
->setSrc(0, i
->getSrc(1));
588 i
->src
[0].mod
= i
->src
[1].mod
;
595 if (imm
.isInteger(2) || imm
.isInteger(-2)) {
596 if (imm
.isNegative())
597 i
->src
[t
].mod
= i
->src
[t
].mod
^ Modifier(NV50_IR_MOD_NEG
);
599 i
->setSrc(s
, i
->getSrc(t
));
600 i
->src
[s
].mod
= i
->src
[t
].mod
;
602 if (!isFloatType(i
->sType
) && !imm
.isNegative() && imm
.isPow2()) {
605 i
->setSrc(1, new_ImmediateValue(prog
, imm
.reg
.data
.u32
));
609 if (imm
.isInteger(0)) {
611 i
->setSrc(0, i
->getSrc(1));
612 i
->src
[0].mod
= i
->src
[1].mod
;
615 i
->op
= i
->src
[0].mod
.getOp();
617 i
->src
[0].mod
= Modifier(0);
622 if (s
!= 1 || (i
->dType
!= TYPE_S32
&& i
->dType
!= TYPE_U32
))
624 bld
.setPosition(i
, false);
625 if (imm
.reg
.data
.u32
== 0) {
628 if (imm
.reg
.data
.u32
== 1) {
632 if (i
->dType
== TYPE_U32
&& imm
.isPow2()) {
634 i
->setSrc(1, bld
.mkImm(util_logbase2(imm
.reg
.data
.u32
)));
636 if (i
->dType
== TYPE_U32
) {
639 const uint32_t d
= imm
.reg
.data
.u32
;
642 uint32_t l
= util_logbase2(d
);
643 if (((uint32_t)1 << l
) < d
)
645 m
= (((uint64_t)1 << 32) * (((uint64_t)1 << l
) - d
)) / d
+ 1;
651 mul
= bld
.mkOp2(OP_MUL
, TYPE_U32
, tA
, i
->getSrc(0),
652 bld
.loadImm(NULL
, m
));
653 mul
->subOp
= NV50_IR_SUBOP_MUL_HIGH
;
654 bld
.mkOp2(OP_SUB
, TYPE_U32
, tB
, i
->getSrc(0), tA
);
657 bld
.mkOp2(OP_SHR
, TYPE_U32
, tA
, tB
, bld
.mkImm(r
));
660 tB
= s
? bld
.getSSA() : i
->getDef(0);
661 bld
.mkOp2(OP_ADD
, TYPE_U32
, tB
, mul
->getDef(0), tA
);
663 bld
.mkOp2(OP_SHR
, TYPE_U32
, i
->getDef(0), tB
, bld
.mkImm(s
));
665 delete_Instruction(prog
, i
);
667 if (imm
.reg
.data
.s32
== -1) {
673 const int32_t d
= imm
.reg
.data
.s32
;
675 int32_t l
= util_logbase2(static_cast<unsigned>(abs(d
)));
676 if ((1 << l
) < abs(d
))
680 m
= ((uint64_t)1 << (32 + l
- 1)) / abs(d
) + 1 - ((uint64_t)1 << 32);
684 bld
.mkOp3(OP_MAD
, TYPE_S32
, tA
, i
->getSrc(0), bld
.loadImm(NULL
, m
),
685 i
->getSrc(0))->subOp
= NV50_IR_SUBOP_MUL_HIGH
;
687 bld
.mkOp2(OP_SHR
, TYPE_S32
, tB
, tA
, bld
.mkImm(l
- 1));
691 bld
.mkCmp(OP_SET
, CC_LT
, TYPE_S32
, tA
, i
->getSrc(0), bld
.mkImm(0));
692 tD
= (d
< 0) ? bld
.getSSA() : i
->getDef(0)->asLValue();
693 bld
.mkOp2(OP_SUB
, TYPE_U32
, tD
, tB
, tA
);
695 bld
.mkOp1(OP_NEG
, TYPE_S32
, i
->getDef(0), tB
);
697 delete_Instruction(prog
, i
);
702 if (i
->sType
== TYPE_U32
&& imm
.isPow2()) {
703 bld
.setPosition(i
, false);
705 i
->setSrc(1, bld
.loadImm(NULL
, imm
.reg
.data
.u32
- 1));
709 case OP_SET
: // TODO: SET_AND,OR,XOR
711 CmpInstruction
*si
= findOriginForTestWithZero(i
->getSrc(t
));
713 if (i
->src
[t
].mod
!= Modifier(0))
715 if (imm
.reg
.data
.u32
!= 0 || !si
|| si
->op
!= OP_SET
)
718 ccZ
= (CondCode
)((unsigned int)i
->asCmp()->setCond
& ~CC_U
);
720 ccZ
= reverseCondCode(ccZ
);
722 case CC_LT
: cc
= CC_FL
; break;
723 case CC_GE
: cc
= CC_TR
; break;
724 case CC_EQ
: cc
= inverseCondCode(cc
); break;
725 case CC_LE
: cc
= inverseCondCode(cc
); break;
731 i
->asCmp()->setCond
= cc
;
732 i
->setSrc(0, si
->src
[0]);
733 i
->setSrc(1, si
->src
[1]);
734 i
->sType
= si
->sType
;
740 if (s
!= 1 || i
->src
[0].mod
!= Modifier(0))
742 // try to concatenate shifts
743 Instruction
*si
= i
->getSrc(0)->getInsn();
745 si
->op
!= OP_SHL
|| si
->src
[1].mod
!= Modifier(0))
747 ImmediateValue
*siImm
= si
->src
[1].getImmediate();
749 bld
.setPosition(i
, false);
750 i
->setSrc(0, si
->getSrc(0));
751 i
->setSrc(1, bld
.loadImm(NULL
,
752 imm
.reg
.data
.u32
+ siImm
->reg
.data
.u32
));
777 // =============================================================================
779 // Merge modifier operations (ABS, NEG, NOT) into ValueRefs where allowed.
780 class ModifierFolding
: public Pass
783 virtual bool visit(BasicBlock
*);
787 ModifierFolding::visit(BasicBlock
*bb
)
789 const Target
*target
= prog
->getTarget();
791 Instruction
*i
, *next
, *mi
;
794 for (i
= bb
->getEntry(); i
; i
= next
) {
797 if (0 && i
->op
== OP_SUB
) {
798 // turn "sub" into "add neg" (do we really want this ?)
800 i
->src
[0].mod
= i
->src
[0].mod
^ Modifier(NV50_IR_MOD_NEG
);
803 for (int s
= 0; s
< 3 && i
->srcExists(s
); ++s
) {
804 mi
= i
->getSrc(s
)->getInsn();
806 mi
->predSrc
>= 0 || mi
->getDef(0)->refCount() > 8)
808 if (i
->sType
== TYPE_U32
&& mi
->dType
== TYPE_S32
) {
809 if ((i
->op
!= OP_ADD
&&
815 if (i
->sType
!= mi
->dType
) {
818 if ((mod
= Modifier(mi
->op
)) == Modifier(0))
820 mod
= mod
* mi
->src
[0].mod
;
822 if ((i
->op
== OP_ABS
) || i
->src
[s
].mod
.abs()) {
823 // abs neg [abs] = abs
824 mod
= mod
& Modifier(~(NV50_IR_MOD_NEG
| NV50_IR_MOD_ABS
));
826 if ((i
->op
== OP_NEG
) && mod
.neg()) {
828 // neg as both opcode and modifier on same insn is prohibited
829 // neg neg abs = abs, neg neg = identity
830 mod
= mod
& Modifier(~NV50_IR_MOD_NEG
);
832 mod
= mod
& Modifier(~NV50_IR_MOD_ABS
);
833 if (mod
== Modifier(0))
837 if (target
->isModSupported(i
, s
, mod
)) {
838 i
->setSrc(s
, mi
->getSrc(0));
839 i
->src
[s
].mod
= i
->src
[s
].mod
* mod
;
843 if (i
->op
== OP_SAT
) {
844 mi
= i
->getSrc(0)->getInsn();
846 mi
->getDef(0)->refCount() <= 1 && target
->isSatSupported(mi
)) {
848 mi
->setDef(0, i
->getDef(0));
849 delete_Instruction(prog
, i
);
857 // =============================================================================
859 // MUL + ADD -> MAD/FMA
860 // MIN/MAX(a, a) -> a, etc.
861 // SLCT(a, b, const) -> cc(const) ? a : b
863 // MUL(MUL(a, b), const) -> MUL_Xconst(a, b)
864 class AlgebraicOpt
: public Pass
867 virtual bool visit(BasicBlock
*);
869 void handleADD(Instruction
*);
870 void handleMINMAX(Instruction
*);
871 void handleRCP(Instruction
*);
872 void handleSLCT(Instruction
*);
873 void handleLOGOP(Instruction
*);
874 void handleCVT(Instruction
*);
878 AlgebraicOpt::handleADD(Instruction
*add
)
880 Value
*src0
= add
->getSrc(0);
881 Value
*src1
= add
->getSrc(1);
886 if (!prog
->getTarget()->isOpSupported(OP_MAD
, add
->dType
))
889 if (src0
->reg
.file
!= FILE_GPR
|| src1
->reg
.file
!= FILE_GPR
)
892 if (src0
->refCount() == 1 &&
893 src0
->getUniqueInsn() && src0
->getUniqueInsn()->op
== OP_MUL
)
896 if (src1
->refCount() == 1 &&
897 src1
->getUniqueInsn() && src1
->getUniqueInsn()->op
== OP_MUL
)
902 if ((src0
->getUniqueInsn() && src0
->getUniqueInsn()->bb
!= add
->bb
) ||
903 (src1
->getUniqueInsn() && src1
->getUniqueInsn()->bb
!= add
->bb
))
906 src
= add
->getSrc(s
);
908 mod
[0] = add
->src
[0].mod
;
909 mod
[1] = add
->src
[1].mod
;
910 mod
[2] = src
->getUniqueInsn()->src
[0].mod
;
911 mod
[3] = src
->getUniqueInsn()->src
[1].mod
;
913 if (((mod
[0] | mod
[1]) | (mod
[2] | mod
[3])) & Modifier(~NV50_IR_MOD_NEG
))
917 add
->subOp
= src
->getInsn()->subOp
; // potentially mul-high
919 add
->setSrc(2, add
->src
[s
? 0 : 1]);
921 add
->setSrc(0, src
->getInsn()->getSrc(0));
922 add
->src
[0].mod
= mod
[2] ^ mod
[s
];
923 add
->setSrc(1, src
->getInsn()->getSrc(1));
924 add
->src
[1].mod
= mod
[3];
928 AlgebraicOpt::handleMINMAX(Instruction
*minmax
)
930 Value
*src0
= minmax
->getSrc(0);
931 Value
*src1
= minmax
->getSrc(1);
933 if (src0
!= src1
|| src0
->reg
.file
!= FILE_GPR
)
935 if (minmax
->src
[0].mod
== minmax
->src
[1].mod
) {
936 if (minmax
->src
[0].mod
) {
938 minmax
->setSrc(1, NULL
);
940 minmax
->def
[0].replace(minmax
->getSrc(0), false);
941 minmax
->bb
->remove(minmax
);
945 // min(x, -x) = -abs(x)
946 // min(x, -abs(x)) = -abs(x)
947 // min(x, abs(x)) = x
948 // max(x, -abs(x)) = x
949 // max(x, abs(x)) = abs(x)
950 // max(x, -x) = abs(x)
955 AlgebraicOpt::handleRCP(Instruction
*rcp
)
957 Instruction
*si
= rcp
->getSrc(0)->getUniqueInsn();
959 if (si
&& si
->op
== OP_RCP
) {
960 Modifier mod
= rcp
->src
[0].mod
* si
->src
[0].mod
;
961 rcp
->op
= mod
.getOp();
962 rcp
->setSrc(0, si
->getSrc(0));
967 AlgebraicOpt::handleSLCT(Instruction
*slct
)
969 if (slct
->getSrc(2)->reg
.file
== FILE_IMMEDIATE
) {
970 if (slct
->getSrc(2)->asImm()->compare(slct
->asCmp()->setCond
, 0.0f
))
971 slct
->setSrc(0, slct
->getSrc(1));
973 if (slct
->getSrc(0) != slct
->getSrc(1)) {
977 slct
->setSrc(1, NULL
);
978 slct
->setSrc(2, NULL
);
982 AlgebraicOpt::handleLOGOP(Instruction
*logop
)
984 Value
*src0
= logop
->getSrc(0);
985 Value
*src1
= logop
->getSrc(1);
987 if (src0
->reg
.file
!= FILE_GPR
|| src1
->reg
.file
!= FILE_GPR
)
991 if (logop
->src
[0].mod
!= Modifier(0) ||
992 logop
->src
[1].mod
!= Modifier(0))
994 if (logop
->op
== OP_AND
|| logop
->op
== OP_OR
) {
995 logop
->def
[0].replace(logop
->getSrc(0), false);
996 delete_Instruction(prog
, logop
);
999 // try AND(SET, SET) -> SET_AND(SET)
1000 Instruction
*set0
= src0
->getInsn();
1001 Instruction
*set1
= src1
->getInsn();
1003 if (!set0
|| set0
->fixed
|| !set1
|| set1
->fixed
)
1005 if (set1
->op
!= OP_SET
) {
1006 Instruction
*xchg
= set0
;
1009 if (set1
->op
!= OP_SET
)
1012 if (set0
->op
!= OP_SET
&&
1013 set0
->op
!= OP_SET_AND
&&
1014 set0
->op
!= OP_SET_OR
&&
1015 set0
->op
!= OP_SET_XOR
)
1017 if (set0
->getDef(0)->refCount() > 1 &&
1018 set1
->getDef(0)->refCount() > 1)
1020 if (set0
->getPredicate() || set1
->getPredicate())
1022 // check that they don't source each other
1023 for (int s
= 0; s
< 2; ++s
)
1024 if (set0
->getSrc(s
) == set1
->getDef(0) ||
1025 set1
->getSrc(s
) == set0
->getDef(0))
1028 set0
= set0
->clone(true);
1029 set1
= set1
->clone(false);
1030 logop
->bb
->insertAfter(logop
, set1
);
1031 logop
->bb
->insertAfter(logop
, set0
);
1033 set0
->dType
= TYPE_U8
;
1034 set0
->getDef(0)->reg
.file
= FILE_PREDICATE
;
1035 set0
->getDef(0)->reg
.size
= 1;
1036 set1
->setSrc(2, set0
->getDef(0));
1037 switch (logop
->op
) {
1038 case OP_AND
: set1
->op
= OP_SET_AND
; break;
1039 case OP_OR
: set1
->op
= OP_SET_OR
; break;
1040 case OP_XOR
: set1
->op
= OP_SET_XOR
; break;
1045 set1
->setDef(0, logop
->getDef(0));
1046 delete_Instruction(prog
, logop
);
1050 // F2I(NEG(SET with result 1.0f/0.0f)) -> SET with result -1/0
1052 AlgebraicOpt::handleCVT(Instruction
*cvt
)
1054 if (cvt
->sType
!= TYPE_F32
||
1055 cvt
->dType
!= TYPE_S32
|| cvt
->src
[0].mod
!= Modifier(0))
1057 Instruction
*insn
= cvt
->getSrc(0)->getInsn();
1058 if (!insn
|| insn
->op
!= OP_NEG
|| insn
->dType
!= TYPE_F32
)
1060 if (insn
->src
[0].mod
!= Modifier(0))
1062 insn
= insn
->getSrc(0)->getInsn();
1063 if (!insn
|| insn
->op
!= OP_SET
|| insn
->dType
!= TYPE_F32
)
1066 Instruction
*bset
= insn
->clone(false);
1067 bset
->dType
= TYPE_U32
;
1068 bset
->setDef(0, cvt
->getDef(0));
1069 cvt
->bb
->insertAfter(cvt
, bset
);
1070 delete_Instruction(prog
, cvt
);
1074 AlgebraicOpt::visit(BasicBlock
*bb
)
1077 for (Instruction
*i
= bb
->getEntry(); i
; i
= next
) {
1109 // =============================================================================
1112 updateLdStOffset(Instruction
*ldst
, int32_t offset
, Function
*fn
)
1114 if (offset
!= ldst
->getSrc(0)->reg
.data
.offset
) {
1115 if (ldst
->getSrc(0)->refCount() > 1)
1116 ldst
->setSrc(0, ldst
->getSrc(0)->clone(fn
));
1117 ldst
->getSrc(0)->reg
.data
.offset
= offset
;
1121 // Combine loads and stores, forward stores to loads where possible.
1122 class MemoryOpt
: public Pass
1130 const Value
*rel
[2];
1138 bool overlaps(const Instruction
*ldst
) const;
1140 inline void link(Record
**);
1141 inline void unlink(Record
**);
1142 inline void set(const Instruction
*ldst
);
1148 Record
*loads
[DATA_FILE_COUNT
];
1149 Record
*stores
[DATA_FILE_COUNT
];
1151 MemoryPool recordPool
;
1154 virtual bool visit(BasicBlock
*);
1155 bool runOpt(BasicBlock
*);
1157 Record
**getList(const Instruction
*);
1159 Record
*findRecord(const Instruction
*, bool load
, bool& isAdjacent
) const;
1161 // merge @insn into load/store instruction from @rec
1162 bool combineLd(Record
*rec
, Instruction
*ld
);
1163 bool combineSt(Record
*rec
, Instruction
*st
);
1165 bool replaceLdFromLd(Instruction
*ld
, Record
*ldRec
);
1166 bool replaceLdFromSt(Instruction
*ld
, Record
*stRec
);
1167 bool replaceStFromSt(Instruction
*restrict st
, Record
*stRec
);
1169 void addRecord(Instruction
*ldst
);
1170 void purgeRecords(Instruction
*const st
, DataFile
);
1171 void lockStores(Instruction
*const ld
);
1178 MemoryOpt::MemoryOpt() : recordPool(sizeof(MemoryOpt::Record
), 6)
1180 for (int i
= 0; i
< DATA_FILE_COUNT
; ++i
) {
1190 for (unsigned int i
= 0; i
< DATA_FILE_COUNT
; ++i
) {
1192 for (it
= loads
[i
]; it
; it
= next
) {
1194 recordPool
.release(it
);
1197 for (it
= stores
[i
]; it
; it
= next
) {
1199 recordPool
.release(it
);
1206 MemoryOpt::combineLd(Record
*rec
, Instruction
*ld
)
1208 int32_t offRc
= rec
->offset
;
1209 int32_t offLd
= ld
->getSrc(0)->reg
.data
.offset
;
1210 int sizeRc
= rec
->size
;
1211 int sizeLd
= typeSizeof(ld
->dType
);
1212 int size
= sizeRc
+ sizeLd
;
1215 // only VFETCH can do a 96 byte load
1216 if (ld
->op
!= OP_VFETCH
&& size
== 12)
1218 // no unaligned loads
1219 if (((size
== 0x8) && (MIN2(offLd
, offRc
) & 0x7)) ||
1220 ((size
== 0xc) && (MIN2(offLd
, offRc
) & 0xf)))
1223 assert(sizeRc
+ sizeLd
<= 16 && offRc
!= offLd
);
1225 for (j
= 0; sizeRc
; sizeRc
-= rec
->insn
->getDef(j
)->reg
.size
, ++j
);
1227 if (offLd
< offRc
) {
1229 for (sz
= 0, d
= 0; sz
< sizeLd
; sz
+= ld
->getDef(d
)->reg
.size
, ++d
);
1230 // d: nr of definitions in ld
1231 // j: nr of definitions in rec->insn, move:
1232 for (d
= d
+ j
- 1; j
> 0; --j
, --d
)
1233 rec
->insn
->setDef(d
, rec
->insn
->getDef(j
- 1));
1235 if (rec
->insn
->getSrc(0)->refCount() > 1)
1236 rec
->insn
->setSrc(0, rec
->insn
->getSrc(0)->clone(func
));
1237 rec
->offset
= rec
->insn
->getSrc(0)->reg
.data
.offset
= offLd
;
1243 // move definitions of @ld to @rec->insn
1244 for (j
= 0; sizeLd
; ++j
, ++d
) {
1245 sizeLd
-= ld
->getDef(j
)->reg
.size
;
1246 rec
->insn
->setDef(d
, ld
->getDef(j
));
1250 rec
->insn
->setType(typeOfSize(size
));
1252 delete_Instruction(prog
, ld
);
1258 MemoryOpt::combineSt(Record
*rec
, Instruction
*st
)
1260 int32_t offRc
= rec
->offset
;
1261 int32_t offSt
= st
->getSrc(0)->reg
.data
.offset
;
1262 int sizeRc
= rec
->size
;
1263 int sizeSt
= typeSizeof(st
->dType
);
1265 int size
= sizeRc
+ sizeSt
;
1267 Value
*src
[4]; // no modifiers in ValueRef allowed for st
1270 if (size
== 12) // XXX: check if EXPORT a[] can do this after all
1272 if (size
== 8 && MIN2(offRc
, offSt
) & 0x7)
1275 st
->takeExtraSources(0, extra
); // save predicate and indirect address
1277 if (offRc
< offSt
) {
1278 // save values from @st
1279 for (s
= 0; sizeSt
; ++s
) {
1280 sizeSt
-= st
->getSrc(s
+ 1)->reg
.size
;
1281 src
[s
] = st
->getSrc(s
+ 1);
1283 // set record's values as low sources of @st
1284 for (j
= 1; sizeRc
; ++j
) {
1285 sizeRc
-= st
->getSrc(j
)->reg
.size
;
1286 st
->setSrc(j
, rec
->insn
->getSrc(j
));
1288 // set saved values as high sources of @st
1289 for (k
= j
, j
= 0; j
< s
; ++j
)
1290 st
->setSrc(k
++, src
[j
]);
1292 updateLdStOffset(st
, offRc
, func
);
1294 for (j
= 1; sizeSt
; ++j
)
1295 sizeSt
-= st
->getSrc(j
)->reg
.size
;
1296 for (s
= 1; sizeRc
; ++j
, ++s
) {
1297 sizeRc
-= rec
->insn
->getSrc(s
)->reg
.size
;
1298 st
->setSrc(j
, rec
->insn
->getSrc(s
));
1300 rec
->offset
= offSt
;
1302 st
->putExtraSources(0, extra
); // restore pointer and predicate
1304 delete_Instruction(prog
, rec
->insn
);
1307 rec
->insn
->setType(typeOfSize(size
));
1312 MemoryOpt::Record::set(const Instruction
*ldst
)
1314 const Symbol
*mem
= ldst
->getSrc(0)->asSym();
1315 fileIndex
= mem
->reg
.fileIndex
;
1316 rel
[0] = ldst
->getIndirect(0, 0);
1317 rel
[1] = ldst
->getIndirect(0, 1);
1318 offset
= mem
->reg
.data
.offset
;
1319 base
= mem
->getBase();
1320 size
= typeSizeof(ldst
->sType
);
1324 MemoryOpt::Record::link(Record
**list
)
1334 MemoryOpt::Record::unlink(Record
**list
)
1344 MemoryOpt::Record
**
1345 MemoryOpt::getList(const Instruction
*insn
)
1347 if (insn
->op
== OP_LOAD
|| insn
->op
== OP_VFETCH
)
1348 return &loads
[insn
->src
[0].getFile()];
1349 return &stores
[insn
->src
[0].getFile()];
1353 MemoryOpt::addRecord(Instruction
*i
)
1355 Record
**list
= getList(i
);
1356 Record
*it
= reinterpret_cast<Record
*>(recordPool
.allocate());
1365 MemoryOpt::findRecord(const Instruction
*insn
, bool load
, bool& isAdj
) const
1367 const Symbol
*sym
= insn
->getSrc(0)->asSym();
1368 const int size
= typeSizeof(insn
->sType
);
1370 Record
*it
= load
? loads
[sym
->reg
.file
] : stores
[sym
->reg
.file
];
1372 for (; it
; it
= it
->next
) {
1373 if (it
->locked
&& insn
->op
!= OP_LOAD
)
1375 if ((it
->offset
>> 4) != (sym
->reg
.data
.offset
>> 4) ||
1376 it
->rel
[0] != insn
->getIndirect(0, 0) ||
1377 it
->fileIndex
!= sym
->reg
.fileIndex
||
1378 it
->rel
[1] != insn
->getIndirect(0, 1))
1381 if (it
->offset
< sym
->reg
.data
.offset
) {
1382 if (it
->offset
+ it
->size
>= sym
->reg
.data
.offset
) {
1383 isAdj
= (it
->offset
+ it
->size
== sym
->reg
.data
.offset
);
1386 if (!(it
->offset
& 0x7))
1390 isAdj
= it
->offset
!= sym
->reg
.data
.offset
;
1391 if (size
<= it
->size
&& !isAdj
)
1394 if (!(sym
->reg
.data
.offset
& 0x7))
1395 if (it
->offset
- size
<= sym
->reg
.data
.offset
)
1403 MemoryOpt::replaceLdFromSt(Instruction
*ld
, Record
*rec
)
1405 Instruction
*st
= rec
->insn
;
1406 int32_t offSt
= rec
->offset
;
1407 int32_t offLd
= ld
->getSrc(0)->reg
.data
.offset
;
1410 for (s
= 1; offSt
!= offLd
&& st
->srcExists(s
); ++s
)
1411 offSt
+= st
->getSrc(s
)->reg
.size
;
1415 for (d
= 0; ld
->defExists(d
) && st
->srcExists(s
); ++d
, ++s
) {
1416 if (ld
->getDef(d
)->reg
.size
!= st
->getSrc(s
)->reg
.size
)
1418 if (st
->getSrc(s
)->reg
.file
!= FILE_GPR
)
1420 ld
->def
[d
].replace(st
->getSrc(s
), false);
1427 MemoryOpt::replaceLdFromLd(Instruction
*ldE
, Record
*rec
)
1429 Instruction
*ldR
= rec
->insn
;
1430 int32_t offR
= rec
->offset
;
1431 int32_t offE
= ldE
->getSrc(0)->reg
.data
.offset
;
1434 assert(offR
<= offE
);
1435 for (dR
= 0; offR
< offE
&& ldR
->defExists(dR
); ++dR
)
1436 offR
+= ldR
->getDef(dR
)->reg
.size
;
1440 for (dE
= 0; ldE
->defExists(dE
) && ldR
->defExists(dR
); ++dE
, ++dR
) {
1441 if (ldE
->getDef(dE
)->reg
.size
!= ldR
->getDef(dR
)->reg
.size
)
1443 ldE
->def
[dE
].replace(ldR
->getDef(dR
), false);
1446 delete_Instruction(prog
, ldE
);
1451 MemoryOpt::replaceStFromSt(Instruction
*restrict st
, Record
*rec
)
1453 const Instruction
*const ri
= rec
->insn
;
1456 int32_t offS
= st
->getSrc(0)->reg
.data
.offset
;
1457 int32_t offR
= rec
->offset
;
1458 int32_t endS
= offS
+ typeSizeof(st
->dType
);
1459 int32_t endR
= offR
+ typeSizeof(ri
->dType
);
1461 rec
->size
= MAX2(endS
, endR
) - MIN2(offS
, offR
);
1463 st
->takeExtraSources(0, extra
);
1469 // get non-replaced sources of ri
1470 for (s
= 1; offR
< offS
; offR
+= ri
->getSrc(s
)->reg
.size
, ++s
)
1471 vals
[k
++] = ri
->getSrc(s
);
1473 // get replaced sources of st
1474 for (s
= 1; st
->srcExists(s
); offS
+= st
->getSrc(s
)->reg
.size
, ++s
)
1475 vals
[k
++] = st
->getSrc(s
);
1476 // skip replaced sources of ri
1477 for (s
= n
; offR
< endS
; offR
+= ri
->getSrc(s
)->reg
.size
, ++s
);
1478 // get non-replaced sources after values covered by st
1479 for (; offR
< endR
; offR
+= ri
->getSrc(s
)->reg
.size
, ++s
)
1480 vals
[k
++] = ri
->getSrc(s
);
1481 for (s
= 0; s
< k
; ++s
)
1482 st
->setSrc(s
+ 1, vals
[s
]);
1483 st
->setSrc(0, ri
->getSrc(0));
1487 for (j
= 1; offR
< endS
; offR
+= ri
->getSrc(j
++)->reg
.size
);
1488 for (s
= 1; offS
< endS
; offS
+= st
->getSrc(s
++)->reg
.size
);
1489 for (; offR
< endR
; offR
+= ri
->getSrc(j
++)->reg
.size
)
1490 st
->setSrc(s
++, ri
->getSrc(j
));
1492 st
->putExtraSources(0, extra
);
1494 delete_Instruction(prog
, rec
->insn
);
1497 rec
->offset
= st
->getSrc(0)->reg
.data
.offset
;
1499 st
->setType(typeOfSize(rec
->size
));
1505 MemoryOpt::Record::overlaps(const Instruction
*ldst
) const
1510 if (this->fileIndex
!= that
.fileIndex
)
1513 if (this->rel
[0] || that
.rel
[0])
1514 return this->base
== that
.base
;
1516 (this->offset
< that
.offset
+ that
.size
) &&
1517 (this->offset
+ this->size
> that
.offset
);
1520 // We must not eliminate stores that affect the result of @ld if
1521 // we find later stores to the same location, and we may no longer
1522 // merge them with later stores.
1523 // The stored value can, however, still be used to determine the value
1524 // returned by future loads.
1526 MemoryOpt::lockStores(Instruction
*const ld
)
1528 for (Record
*r
= stores
[ld
->src
[0].getFile()]; r
; r
= r
->next
)
1529 if (!r
->locked
&& r
->overlaps(ld
))
1533 // Prior loads from the location of @st are no longer valid.
1534 // Stores to the location of @st may no longer be used to derive
1535 // the value at it nor be coalesced into later stores.
1537 MemoryOpt::purgeRecords(Instruction
*const st
, DataFile f
)
1540 f
= st
->src
[0].getFile();
1542 for (Record
*r
= loads
[f
]; r
; r
= r
->next
)
1543 if (!st
|| r
->overlaps(st
))
1544 r
->unlink(&loads
[f
]);
1546 for (Record
*r
= stores
[f
]; r
; r
= r
->next
)
1547 if (!st
|| r
->overlaps(st
))
1548 r
->unlink(&stores
[f
]);
1552 MemoryOpt::visit(BasicBlock
*bb
)
1554 bool ret
= runOpt(bb
);
1555 // Run again, one pass won't combine 4 32 bit ld/st to a single 128 bit ld/st
1556 // where 96 bit memory operations are forbidden.
1563 MemoryOpt::runOpt(BasicBlock
*bb
)
1565 Instruction
*ldst
, *next
;
1567 bool isAdjacent
= true;
1569 for (ldst
= bb
->getEntry(); ldst
; ldst
= next
) {
1574 if (ldst
->op
== OP_LOAD
|| ldst
->op
== OP_VFETCH
) {
1575 if (ldst
->isDead()) {
1576 // might have been produced by earlier optimization
1577 delete_Instruction(prog
, ldst
);
1581 if (ldst
->op
== OP_STORE
|| ldst
->op
== OP_EXPORT
) {
1584 // TODO: maybe have all fixed ops act as barrier ?
1585 if (ldst
->op
== OP_CALL
) {
1586 purgeRecords(NULL
, FILE_MEMORY_LOCAL
);
1587 purgeRecords(NULL
, FILE_MEMORY_GLOBAL
);
1588 purgeRecords(NULL
, FILE_MEMORY_SHARED
);
1589 purgeRecords(NULL
, FILE_SHADER_OUTPUT
);
1591 if (ldst
->op
== OP_EMIT
|| ldst
->op
== OP_RESTART
) {
1592 purgeRecords(NULL
, FILE_SHADER_OUTPUT
);
1596 if (ldst
->getPredicate()) // TODO: handle predicated ld/st
1600 DataFile file
= ldst
->src
[0].getFile();
1602 // if ld l[]/g[] look for previous store to eliminate the reload
1603 if (file
== FILE_MEMORY_GLOBAL
|| file
== FILE_MEMORY_LOCAL
) {
1604 // TODO: shared memory ?
1605 rec
= findRecord(ldst
, false, isAdjacent
);
1606 if (rec
&& !isAdjacent
)
1607 keep
= !replaceLdFromSt(ldst
, rec
);
1610 // or look for ld from the same location and replace this one
1611 rec
= keep
? findRecord(ldst
, true, isAdjacent
) : NULL
;
1614 keep
= !replaceLdFromLd(ldst
, rec
);
1616 // or combine a previous load with this one
1617 keep
= !combineLd(rec
, ldst
);
1622 rec
= findRecord(ldst
, false, isAdjacent
);
1625 keep
= !replaceStFromSt(ldst
, rec
);
1627 keep
= !combineSt(rec
, ldst
);
1630 purgeRecords(ldst
, DATA_FILE_COUNT
);
1640 // =============================================================================
1642 // Turn control flow into predicated instructions (after register allocation !).
1644 // Could move this to before register allocation on NVC0 and also handle nested
1646 class FlatteningPass
: public Pass
1649 virtual bool visit(BasicBlock
*);
1651 bool tryPredicateConditional(BasicBlock
*);
1652 void predicateInstructions(BasicBlock
*, Value
*pred
, CondCode cc
);
1653 void tryPropagateBranch(BasicBlock
*);
1654 inline bool isConstantCondition(Value
*pred
);
1655 inline bool mayPredicate(const Instruction
*, const Value
*pred
) const;
1656 inline void removeFlow(Instruction
*);
1660 FlatteningPass::isConstantCondition(Value
*pred
)
1662 Instruction
*insn
= pred
->getUniqueInsn();
1664 if (insn
->op
!= OP_SET
|| insn
->srcExists(2))
1667 for (int s
= 0; s
< 2 && insn
->srcExists(s
); ++s
) {
1668 Instruction
*ld
= insn
->getSrc(s
)->getUniqueInsn();
1671 if (ld
->op
!= OP_MOV
&& ld
->op
!= OP_LOAD
)
1673 if (ld
->src
[0].isIndirect(0))
1675 file
= ld
->src
[0].getFile();
1677 file
= insn
->src
[s
].getFile();
1678 // catch $r63 on NVC0
1679 if (file
== FILE_GPR
&& insn
->getSrc(s
)->reg
.data
.id
> prog
->maxGPR
)
1680 file
= FILE_IMMEDIATE
;
1682 if (file
!= FILE_IMMEDIATE
&& file
!= FILE_MEMORY_CONST
)
1689 FlatteningPass::removeFlow(Instruction
*insn
)
1691 FlowInstruction
*term
= insn
? insn
->asFlow() : NULL
;
1694 Graph::Edge::Type ty
= term
->bb
->cfg
.outgoing().getType();
1696 if (term
->op
== OP_BRA
) {
1697 // TODO: this might get more difficult when we get arbitrary BRAs
1698 if (ty
== Graph::Edge::CROSS
|| ty
== Graph::Edge::BACK
)
1701 if (term
->op
!= OP_JOIN
)
1704 delete_Instruction(prog
, term
);
1706 Value
*pred
= term
->getPredicate();
1708 if (pred
&& pred
->refCount() == 0) {
1709 Instruction
*pSet
= pred
->getUniqueInsn();
1710 pred
->join
->reg
.data
.id
= -1; // deallocate
1712 delete_Instruction(prog
, pSet
);
1717 FlatteningPass::predicateInstructions(BasicBlock
*bb
, Value
*pred
, CondCode cc
)
1719 for (Instruction
*i
= bb
->getEntry(); i
; i
= i
->next
) {
1722 assert(!i
->getPredicate());
1723 i
->setPredicate(cc
, pred
);
1725 removeFlow(bb
->getExit());
1729 FlatteningPass::mayPredicate(const Instruction
*insn
, const Value
*pred
) const
1731 if (insn
->isPseudo())
1733 // TODO: calls where we don't know which registers are modified
1735 if (!prog
->getTarget()->mayPredicate(insn
, pred
))
1737 for (int d
= 0; insn
->defExists(d
); ++d
)
1738 if (insn
->getDef(d
)->equals(pred
))
1743 // If we conditionally skip over or to a branch instruction, replace it.
1744 // NOTE: We do not update the CFG anymore here !
1746 FlatteningPass::tryPropagateBranch(BasicBlock
*bb
)
1748 BasicBlock
*bf
= NULL
;
1751 if (bb
->cfg
.outgoingCount() != 2)
1753 if (!bb
->getExit() || bb
->getExit()->op
!= OP_BRA
)
1755 Graph::EdgeIterator ei
= bb
->cfg
.outgoing();
1757 for (i
= 0; !ei
.end(); ++i
, ei
.next()) {
1758 bf
= BasicBlock::get(ei
.getNode());
1759 if (bf
->getInsnCount() == 1)
1762 if (ei
.end() || !bf
->getExit())
1764 FlowInstruction
*bra
= bb
->getExit()->asFlow();
1765 FlowInstruction
*rep
= bf
->getExit()->asFlow();
1767 if (rep
->getPredicate())
1769 if (rep
->op
!= OP_BRA
&&
1770 rep
->op
!= OP_JOIN
&&
1775 bra
->target
.bb
= rep
->target
.bb
;
1776 if (i
) // 2nd out block means branch not taken
1777 bra
->cc
= inverseCondCode(bra
->cc
);
1782 FlatteningPass::visit(BasicBlock
*bb
)
1784 if (tryPredicateConditional(bb
))
1787 // try to attach join to previous instruction
1788 Instruction
*insn
= bb
->getExit();
1789 if (insn
&& insn
->op
== OP_JOIN
&& !insn
->getPredicate()) {
1791 if (insn
&& !insn
->getPredicate() && !insn
->asFlow() && !insn
->isNop()) {
1793 bb
->remove(bb
->getExit());
1798 tryPropagateBranch(bb
);
1804 FlatteningPass::tryPredicateConditional(BasicBlock
*bb
)
1806 BasicBlock
*bL
= NULL
, *bR
= NULL
;
1807 unsigned int nL
= 0, nR
= 0, limit
= 12;
1811 mask
= bb
->initiatesSimpleConditional();
1815 assert(bb
->getExit());
1816 Value
*pred
= bb
->getExit()->getPredicate();
1819 if (isConstantCondition(pred
))
1822 Graph::EdgeIterator ei
= bb
->cfg
.outgoing();
1825 bL
= BasicBlock::get(ei
.getNode());
1826 for (insn
= bL
->getEntry(); insn
; insn
= insn
->next
, ++nL
)
1827 if (!mayPredicate(insn
, pred
))
1830 return false; // too long, do a real branch
1835 bR
= BasicBlock::get(ei
.getNode());
1836 for (insn
= bR
->getEntry(); insn
; insn
= insn
->next
, ++nR
)
1837 if (!mayPredicate(insn
, pred
))
1840 return false; // too long, do a real branch
1844 predicateInstructions(bL
, pred
, bb
->getExit()->cc
);
1846 predicateInstructions(bR
, pred
, inverseCondCode(bb
->getExit()->cc
));
1849 bb
->remove(bb
->joinAt
);
1852 removeFlow(bb
->getExit()); // delete the branch/join at the fork point
1854 // remove potential join operations at the end of the conditional
1855 if (prog
->getTarget()->joinAnterior
) {
1856 bb
= BasicBlock::get((bL
? bL
: bR
)->cfg
.outgoing().getNode());
1857 if (bb
->getEntry() && bb
->getEntry()->op
== OP_JOIN
)
1858 removeFlow(bb
->getEntry());
1864 // =============================================================================
1866 // Common subexpression elimination. Stupid O^2 implementation.
1867 class LocalCSE
: public Pass
1870 virtual bool visit(BasicBlock
*);
1872 inline bool tryReplace(Instruction
**, Instruction
*);
1874 DLList ops
[OP_LAST
+ 1];
1877 class GlobalCSE
: public Pass
1880 virtual bool visit(BasicBlock
*);
1884 Instruction::isActionEqual(const Instruction
*that
) const
1886 if (this->op
!= that
->op
||
1887 this->dType
!= that
->dType
||
1888 this->sType
!= that
->sType
)
1890 if (this->cc
!= that
->cc
)
1893 if (this->asTex()) {
1894 if (memcmp(&this->asTex()->tex
,
1895 &that
->asTex()->tex
,
1896 sizeof(this->asTex()->tex
)))
1899 if (this->asCmp()) {
1900 if (this->asCmp()->setCond
!= that
->asCmp()->setCond
)
1903 if (this->asFlow()) {
1906 if (this->atomic
!= that
->atomic
||
1907 this->ipa
!= that
->ipa
||
1908 this->lanes
!= that
->lanes
||
1909 this->perPatch
!= that
->perPatch
)
1911 if (this->postFactor
!= that
->postFactor
)
1915 if (this->subOp
!= that
->subOp
||
1916 this->saturate
!= that
->saturate
||
1917 this->rnd
!= that
->rnd
||
1918 this->ftz
!= that
->ftz
||
1919 this->dnz
!= that
->dnz
||
1920 this->cache
!= that
->cache
)
1927 Instruction::isResultEqual(const Instruction
*that
) const
1931 // NOTE: location of discard only affects tex with liveOnly and quadops
1932 if (!this->defExists(0) && this->op
!= OP_DISCARD
)
1935 if (!isActionEqual(that
))
1938 if (this->predSrc
!= that
->predSrc
)
1941 for (d
= 0; this->defExists(d
); ++d
) {
1942 if (!that
->defExists(d
) ||
1943 !this->getDef(d
)->equals(that
->getDef(d
), false))
1946 if (that
->defExists(d
))
1949 for (s
= 0; this->srcExists(s
); ++s
) {
1950 if (!that
->srcExists(s
))
1952 if (this->src
[s
].mod
!= that
->src
[s
].mod
)
1954 if (!this->getSrc(s
)->equals(that
->getSrc(s
), true))
1957 if (that
->srcExists(s
))
1960 if (op
== OP_LOAD
|| op
== OP_VFETCH
) {
1961 switch (src
[0].getFile()) {
1962 case FILE_MEMORY_CONST
:
1963 case FILE_SHADER_INPUT
:
1973 // pull through common expressions from different in-blocks
1975 GlobalCSE::visit(BasicBlock
*bb
)
1977 Instruction
*phi
, *next
, *ik
;
1980 for (phi
= bb
->getPhi(); phi
&& phi
->op
== OP_PHI
; phi
= next
) {
1982 if (phi
->getSrc(0)->refCount() > 1)
1984 ik
= phi
->getSrc(0)->getInsn();
1985 for (s
= 1; phi
->srcExists(s
); ++s
) {
1986 if (phi
->getSrc(s
)->refCount() > 1)
1988 if (!phi
->getSrc(s
)->getInsn()->isResultEqual(ik
))
1991 if (!phi
->srcExists(s
)) {
1992 Instruction
*entry
= bb
->getEntry();
1994 if (!entry
|| entry
->op
!= OP_JOIN
)
1997 bb
->insertAfter(entry
, ik
);
1998 ik
->setDef(0, phi
->getDef(0));
1999 delete_Instruction(prog
, phi
);
2007 LocalCSE::tryReplace(Instruction
**ptr
, Instruction
*i
)
2009 Instruction
*old
= *ptr
;
2010 if (!old
->isResultEqual(i
))
2012 for (int d
= 0; old
->defExists(d
); ++d
)
2013 old
->def
[d
].replace(i
->getDef(d
), false);
2014 delete_Instruction(prog
, old
);
2020 LocalCSE::visit(BasicBlock
*bb
)
2022 unsigned int replaced
;
2025 Instruction
*ir
, *next
;
2029 // will need to know the order of instructions
2031 for (ir
= bb
->getEntry(); ir
; ir
= ir
->next
)
2032 ir
->serial
= serial
++;
2034 for (ir
= bb
->getEntry(); ir
; ir
= next
) {
2041 ops
[ir
->op
].insert(ir
);
2045 for (s
= 0; ir
->srcExists(s
); ++s
)
2046 if (ir
->getSrc(s
)->asLValue())
2047 if (!src
|| ir
->getSrc(s
)->refCount() < src
->refCount())
2048 src
= ir
->getSrc(s
);
2051 for (ValueRef::Iterator refs
= src
->uses
->iterator(); !refs
.end();
2053 Instruction
*ik
= refs
.get()->getInsn();
2054 if (ik
->serial
< ir
->serial
&& ik
->bb
== ir
->bb
)
2055 if (tryReplace(&ir
, ik
))
2059 DLLIST_FOR_EACH(&ops
[ir
->op
], iter
)
2061 Instruction
*ik
= reinterpret_cast<Instruction
*>(iter
.get());
2062 if (tryReplace(&ir
, ik
))
2068 ops
[ir
->op
].insert(ir
);
2072 for (unsigned int i
= 0; i
<= OP_LAST
; ++i
)
2080 // =============================================================================
2082 // Remove computations of unused values.
2083 class DeadCodeElim
: public Pass
2086 bool buryAll(Program
*);
2089 virtual bool visit(BasicBlock
*);
2091 void checkSplitLoad(Instruction
*ld
); // for partially dead loads
2093 unsigned int deadCount
;
2097 DeadCodeElim::buryAll(Program
*prog
)
2101 if (!this->run(prog
, false, false))
2103 } while (deadCount
);
2109 DeadCodeElim::visit(BasicBlock
*bb
)
2113 for (Instruction
*i
= bb
->getFirst(); i
; i
= next
) {
2117 delete_Instruction(prog
, i
);
2119 if (i
->defExists(1) && (i
->op
== OP_VFETCH
|| i
->op
== OP_LOAD
)) {
2127 DeadCodeElim::checkSplitLoad(Instruction
*ld1
)
2129 Instruction
*ld2
= NULL
; // can get at most 2 loads
2132 int32_t addr1
, addr2
;
2133 int32_t size1
, size2
;
2135 uint32_t mask
= 0xffffffff;
2137 for (d
= 0; ld1
->defExists(d
); ++d
)
2138 if (!ld1
->getDef(d
)->refCount() && ld1
->getDef(d
)->reg
.data
.id
< 0)
2140 if (mask
== 0xffffffff)
2143 addr1
= ld1
->getSrc(0)->reg
.data
.offset
;
2146 for (d
= 0; ld1
->defExists(d
); ++d
) {
2147 if (mask
& (1 << d
)) {
2148 if (size1
&& (addr1
& 0x7))
2150 def1
[n1
] = ld1
->getDef(d
);
2151 size1
+= def1
[n1
++]->reg
.size
;
2154 addr1
+= ld1
->getDef(d
)->reg
.size
;
2159 for (addr2
= addr1
+ size1
; ld1
->defExists(d
); ++d
) {
2160 if (mask
& (1 << d
)) {
2161 def2
[n2
] = ld1
->getDef(d
);
2162 size2
+= def2
[n2
++]->reg
.size
;
2165 addr2
+= ld1
->getDef(d
)->reg
.size
;
2169 updateLdStOffset(ld1
, addr1
, func
);
2170 ld1
->setType(typeOfSize(size1
));
2171 for (d
= 0; d
< 4; ++d
)
2172 ld1
->setDef(d
, (d
< n1
) ? def1
[d
] : NULL
);
2177 ld2
= ld1
->clone(false);
2178 updateLdStOffset(ld2
, addr2
, func
);
2179 ld2
->setType(typeOfSize(size2
));
2180 for (d
= 0; d
< 4; ++d
)
2181 ld2
->setDef(d
, (d
< n2
) ? def2
[d
] : NULL
);
2183 ld1
->bb
->insertAfter(ld1
, ld2
);
2186 // =============================================================================
2188 #define RUN_PASS(l, n, f) \
2189 if (level >= (l)) { \
2190 if (dbgFlags & NV50_IR_DEBUG_VERBOSE) \
2191 INFO("PEEPHOLE: %s\n", #n); \
2193 if (!pass.f(this)) \
2198 Program::optimizeSSA(int level
)
2200 RUN_PASS(1, DeadCodeElim
, buryAll
);
2201 RUN_PASS(1, CopyPropagation
, run
);
2202 RUN_PASS(2, GlobalCSE
, run
);
2203 RUN_PASS(1, LocalCSE
, run
);
2204 RUN_PASS(2, AlgebraicOpt
, run
);
2205 RUN_PASS(2, ModifierFolding
, run
); // before load propagation -> less checks
2206 RUN_PASS(1, ConstantFolding
, foldAll
);
2207 RUN_PASS(1, LoadPropagation
, run
);
2208 RUN_PASS(2, MemoryOpt
, run
);
2209 RUN_PASS(2, LocalCSE
, run
);
2210 RUN_PASS(0, DeadCodeElim
, buryAll
);
2215 Program::optimizePostRA(int level
)
2217 RUN_PASS(2, FlatteningPass
, run
);