2 * Copyright 2010 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 #define DESCEND_ARBITRARY(j, f) \
27 b->pass_seq = ctx->pc->pass_seq; \
29 for (j = 0; j < 2; ++j) \
30 if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
34 extern unsigned nv50_inst_min_size(struct nv_instruction
*);
41 values_equal(struct nv_value
*a
, struct nv_value
*b
)
44 return (a
->reg
.file
== b
->reg
.file
&& a
->join
->reg
.id
== b
->join
->reg
.id
);
48 inst_commutation_check(struct nv_instruction
*a
,
49 struct nv_instruction
*b
)
53 for (di
= 0; di
< 4; ++di
) {
56 for (si
= 0; si
< 5; ++si
) {
59 if (values_equal(a
->def
[di
], b
->src
[si
]->value
))
64 if (b
->flags_src
&& b
->flags_src
->value
== a
->flags_def
)
70 /* Check whether we can swap the order of the instructions,
71 * where a & b may be either the earlier or the later one.
74 inst_commutation_legal(struct nv_instruction
*a
,
75 struct nv_instruction
*b
)
77 return inst_commutation_check(a
, b
) && inst_commutation_check(b
, a
);
81 inst_cullable(struct nv_instruction
*nvi
)
83 if (nvi
->opcode
== NV_OP_STA
)
85 return (!(nvi
->is_terminator
|| nvi
->is_join
||
88 nv_nvi_refcount(nvi
)));
92 nvi_isnop(struct nv_instruction
*nvi
)
94 if (nvi
->opcode
== NV_OP_EXPORT
|| nvi
->opcode
== NV_OP_UNDEF
)
97 /* NOTE: 'fixed' now only means that it shouldn't be optimized away,
98 * but we can still remove it if it is a no-op move.
100 if (/* nvi->fixed || */
101 /* nvi->flags_src || */ /* cond. MOV to same register is still NOP */
103 nvi
->is_terminator
||
107 if (nvi
->def
[0] && nvi
->def
[0]->join
->reg
.id
< 0)
110 if (nvi
->opcode
!= NV_OP_MOV
&& nvi
->opcode
!= NV_OP_SELECT
)
113 if (nvi
->def
[0]->reg
.file
!= nvi
->src
[0]->value
->reg
.file
)
116 if (nvi
->src
[0]->value
->join
->reg
.id
< 0) {
117 NV50_DBGMSG(PROG_IR
, "nvi_isnop: orphaned value detected\n");
121 if (nvi
->opcode
== NV_OP_SELECT
)
122 if (!values_equal(nvi
->def
[0], nvi
->src
[1]->value
))
125 return values_equal(nvi
->def
[0], nvi
->src
[0]->value
);
135 nv_pass_flatten(struct nv_pass
*ctx
, struct nv_basic_block
*b
);
138 nv_pc_pass_pre_emission(void *priv
, struct nv_basic_block
*b
)
140 struct nv_pc
*pc
= (struct nv_pc
*)priv
;
141 struct nv_basic_block
*in
;
142 struct nv_instruction
*nvi
, *next
;
146 /* find first non-empty block emitted before b */
147 for (j
= pc
->num_blocks
- 1; j
>= 0 && !pc
->bb_list
[j
]->bin_size
; --j
);
148 for (; j
>= 0; --j
) {
151 /* check for no-op branches (BRA $PC+8) */
152 if (in
->exit
&& in
->exit
->opcode
== NV_OP_BRA
&& in
->exit
->target
== b
) {
156 for (++j
; j
< pc
->num_blocks
; ++j
)
157 pc
->bb_list
[j
]->bin_pos
-= 8;
159 nv_nvi_delete(in
->exit
);
161 b
->bin_pos
= in
->bin_pos
+ in
->bin_size
;
163 if (in
->bin_size
) /* no more no-op branches to b */
167 pc
->bb_list
[pc
->num_blocks
++] = b
;
171 for (nvi
= b
->entry
; nvi
; nvi
= next
) {
177 for (nvi
= b
->entry
; nvi
; nvi
= next
) {
180 size
= nv50_inst_min_size(nvi
);
181 if (nvi
->next
&& size
< 8)
184 if ((n32
& 1) && nvi
->next
&&
185 nv50_inst_min_size(nvi
->next
) == 4 &&
186 inst_commutation_legal(nvi
, nvi
->next
)) {
188 nv_nvi_permute(nvi
, nvi
->next
);
193 b
->bin_size
+= n32
& 1;
195 nvi
->prev
->is_long
= 1;
198 b
->bin_size
+= 1 + nvi
->is_long
;
202 NV50_DBGMSG(PROG_IR
, "block %p is now empty\n", b
);
204 if (!b
->exit
->is_long
) {
206 b
->exit
->is_long
= 1;
209 /* might have del'd a hole tail of instructions */
210 if (!b
->exit
->prev
->is_long
&& !(n32
& 1)) {
212 b
->exit
->prev
->is_long
= 1;
215 assert(!b
->entry
|| (b
->exit
&& b
->exit
->is_long
));
217 pc
->bin_size
+= b
->bin_size
*= 4;
221 nv_pc_pass2(struct nv_pc
*pc
, struct nv_basic_block
*root
)
229 nv_pass_flatten(&pass
, root
);
231 nv_pc_pass_in_order(root
, nv_pc_pass_pre_emission
, pc
);
237 nv_pc_exec_pass2(struct nv_pc
*pc
)
241 NV50_DBGMSG(PROG_IR
, "preparing %u blocks for emission\n", pc
->num_blocks
);
243 pc
->num_blocks
= 0; /* will reorder bb_list */
245 for (i
= 0; i
< pc
->num_subroutines
+ 1; ++i
)
246 if (pc
->root
[i
] && (ret
= nv_pc_pass2(pc
, pc
->root
[i
])))
251 static INLINE boolean
252 is_cmem_load(struct nv_instruction
*nvi
)
254 return (nvi
->opcode
== NV_OP_LDA
&&
255 nvi
->src
[0]->value
->reg
.file
>= NV_FILE_MEM_C(0) &&
256 nvi
->src
[0]->value
->reg
.file
<= NV_FILE_MEM_C(15));
259 static INLINE boolean
260 is_smem_load(struct nv_instruction
*nvi
)
262 return (nvi
->opcode
== NV_OP_LDA
&&
263 (nvi
->src
[0]->value
->reg
.file
== NV_FILE_MEM_S
||
264 nvi
->src
[0]->value
->reg
.file
<= NV_FILE_MEM_P
));
267 static INLINE boolean
268 is_immd_move(struct nv_instruction
*nvi
)
270 return (nvi
->opcode
== NV_OP_MOV
&&
271 nvi
->src
[0]->value
->reg
.file
== NV_FILE_IMM
);
275 check_swap_src_0_1(struct nv_instruction
*nvi
)
277 static const ubyte cc_swapped
[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
279 struct nv_ref
*src0
= nvi
->src
[0], *src1
= nvi
->src
[1];
281 if (!nv_op_commutative(nvi
->opcode
))
283 assert(src0
&& src1
);
285 if (src1
->value
->reg
.file
== NV_FILE_IMM
)
288 if (is_cmem_load(src0
->value
->insn
)) {
289 if (!is_cmem_load(src1
->value
->insn
)) {
292 /* debug_printf("swapping cmem load to 1\n"); */
295 if (is_smem_load(src1
->value
->insn
)) {
296 if (!is_smem_load(src0
->value
->insn
)) {
299 /* debug_printf("swapping smem load to 0\n"); */
303 if (nvi
->opcode
== NV_OP_SET
&& nvi
->src
[0] != src0
)
304 nvi
->set_cond
= (nvi
->set_cond
& ~7) | cc_swapped
[nvi
->set_cond
& 7];
308 nv_pass_fold_stores(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
310 struct nv_instruction
*nvi
, *sti
, *next
;
313 for (sti
= b
->entry
; sti
; sti
= next
) {
316 /* only handling MOV to $oX here */
317 if (!sti
->def
[0] || sti
->def
[0]->reg
.file
!= NV_FILE_OUT
)
319 if (sti
->opcode
!= NV_OP_MOV
&& sti
->opcode
!= NV_OP_STA
)
322 nvi
= sti
->src
[0]->value
->insn
;
323 if (!nvi
|| nvi
->opcode
== NV_OP_PHI
|| nv_is_vector_op(nvi
->opcode
))
325 assert(nvi
->def
[0] == sti
->src
[0]->value
);
327 if (nvi
->opcode
== NV_OP_SELECT
)
329 if (nvi
->def
[0]->refc
> 1)
332 /* cannot write to $oX when using immediate */
333 for (j
= 0; j
< 4 && nvi
->src
[j
]; ++j
)
334 if (nvi
->src
[j
]->value
->reg
.file
== NV_FILE_IMM
||
335 nvi
->src
[j
]->value
->reg
.file
== NV_FILE_MEM_L
)
337 if (j
< 4 && nvi
->src
[j
])
340 nvi
->def
[0] = sti
->def
[0];
341 nvi
->def
[0]->insn
= nvi
;
342 nvi
->fixed
= sti
->fixed
;
346 DESCEND_ARBITRARY(j
, nv_pass_fold_stores
);
352 nv_pass_fold_loads(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
354 struct nv_instruction
*nvi
, *ld
;
357 for (nvi
= b
->entry
; nvi
; nvi
= nvi
->next
) {
358 check_swap_src_0_1(nvi
);
360 for (j
= 0; j
< 3; ++j
) {
363 ld
= nvi
->src
[j
]->value
->insn
;
367 if (is_immd_move(ld
) && nv50_nvi_can_use_imm(nvi
, j
)) {
368 nv_reference(ctx
->pc
, &nvi
->src
[j
], ld
->src
[0]->value
);
372 if (ld
->opcode
!= NV_OP_LDA
)
374 if (!nv50_nvi_can_load(nvi
, j
, ld
->src
[0]->value
))
377 if (j
== 0 && ld
->src
[4]) /* can't load shared mem */
381 nv_reference(ctx
->pc
, &nvi
->src
[j
], ld
->src
[0]->value
);
383 nv_reference(ctx
->pc
, &nvi
->src
[4], ld
->src
[4]->value
);
385 if (!nv_nvi_refcount(ld
))
389 DESCEND_ARBITRARY(j
, nv_pass_fold_loads
);
394 /* NOTE: Assumes loads have not yet been folded. */
396 nv_pass_lower_mods(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
399 struct nv_instruction
*nvi
, *mi
, *next
;
402 for (nvi
= b
->entry
; nvi
; nvi
= next
) {
404 if (nvi
->opcode
== NV_OP_SUB
) {
405 nvi
->opcode
= NV_OP_ADD
;
406 nvi
->src
[1]->mod
^= NV_MOD_NEG
;
409 for (j
= 0; j
< 4 && nvi
->src
[j
]; ++j
) {
410 mi
= nvi
->src
[j
]->value
->insn
;
413 if (mi
->def
[0]->refc
> 1)
416 if (mi
->opcode
== NV_OP_NEG
) mod
= NV_MOD_NEG
;
418 if (mi
->opcode
== NV_OP_ABS
) mod
= NV_MOD_ABS
;
421 assert(!(mod
& mi
->src
[0]->mod
& NV_MOD_NEG
));
423 mod
|= mi
->src
[0]->mod
;
425 if (mi
->flags_def
|| mi
->flags_src
)
428 if ((nvi
->opcode
== NV_OP_ABS
) || (nvi
->src
[j
]->mod
& NV_MOD_ABS
)) {
429 /* abs neg [abs] = abs */
430 mod
&= ~(NV_MOD_NEG
| NV_MOD_ABS
);
432 if ((nvi
->opcode
== NV_OP_NEG
) && (mod
& NV_MOD_NEG
)) {
433 /* neg as opcode and modifier on same insn cannot occur */
434 /* neg neg abs = abs, neg neg = identity */
436 if (mod
& NV_MOD_ABS
)
437 nvi
->opcode
= NV_OP_ABS
;
440 nvi
->opcode
= NV_OP_CVT
;
442 nvi
->opcode
= NV_OP_MOV
;
446 if ((nv50_supported_src_mods(nvi
->opcode
, j
) & mod
) != mod
)
449 nv_reference(ctx
->pc
, &nvi
->src
[j
], mi
->src
[0]->value
);
451 nvi
->src
[j
]->mod
^= mod
;
454 if (nvi
->opcode
== NV_OP_SAT
) {
455 mi
= nvi
->src
[0]->value
->insn
;
457 if (mi
->opcode
!= NV_OP_ADD
&& mi
->opcode
!= NV_OP_MAD
)
459 if (mi
->flags_def
|| mi
->def
[0]->refc
> 1)
463 mi
->def
[0] = nvi
->def
[0];
464 mi
->def
[0]->insn
= mi
;
465 if (nvi
->flags_def
) {
466 mi
->flags_def
= nvi
->flags_def
;
467 mi
->flags_def
->insn
= mi
;
472 DESCEND_ARBITRARY(j
, nv_pass_lower_mods
);
477 #define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
480 modifiers_apply(uint32_t *val
, ubyte type
, ubyte mod
)
482 if (mod
& NV_MOD_ABS
) {
483 if (type
== NV_TYPE_F32
)
486 if ((*val
) & (1 << 31))
489 if (mod
& NV_MOD_NEG
) {
490 if (type
== NV_TYPE_F32
)
498 modifiers_opcode(ubyte mod
)
501 case NV_MOD_NEG
: return NV_OP_NEG
;
502 case NV_MOD_ABS
: return NV_OP_ABS
;
511 constant_expression(struct nv_pc
*pc
, struct nv_instruction
*nvi
,
512 struct nv_value
*src0
, struct nv_value
*src1
)
514 struct nv_value
*val
;
524 type
= nvi
->def
[0]->reg
.type
;
527 u0
.u32
= src0
->reg
.imm
.u32
;
528 u1
.u32
= src1
->reg
.imm
.u32
;
530 modifiers_apply(&u0
.u32
, type
, nvi
->src
[0]->mod
);
531 modifiers_apply(&u1
.u32
, type
, nvi
->src
[1]->mod
);
533 switch (nvi
->opcode
) {
535 if (nvi
->src
[2]->value
->reg
.file
!= NV_FILE_GPR
)
540 case NV_TYPE_F32
: u
.f32
= u0
.f32
* u1
.f32
; break;
541 case NV_TYPE_U32
: u
.u32
= u0
.u32
* u1
.u32
; break;
542 case NV_TYPE_S32
: u
.s32
= u0
.s32
* u1
.s32
; break;
550 case NV_TYPE_F32
: u
.f32
= u0
.f32
+ u1
.f32
; break;
551 case NV_TYPE_U32
: u
.u32
= u0
.u32
+ u1
.u32
; break;
552 case NV_TYPE_S32
: u
.s32
= u0
.s32
+ u1
.s32
; break;
560 case NV_TYPE_F32
: u
.f32
= u0
.f32
- u1
.f32
; break;
561 case NV_TYPE_U32
: u
.u32
= u0
.u32
- u1
.u32
; break;
562 case NV_TYPE_S32
: u
.s32
= u0
.s32
- u1
.s32
; break;
572 nvi
->opcode
= NV_OP_MOV
;
574 val
= new_value(pc
, NV_FILE_IMM
, type
);
576 val
->reg
.imm
.u32
= u
.u32
;
578 nv_reference(pc
, &nvi
->src
[1], NULL
);
579 nv_reference(pc
, &nvi
->src
[0], val
);
581 if (nvi
->src
[2]) { /* from MAD */
582 nvi
->src
[1] = nvi
->src
[0];
583 nvi
->src
[0] = nvi
->src
[2];
585 nvi
->opcode
= NV_OP_ADD
;
587 if (val
->reg
.imm
.u32
== 0) {
589 nvi
->opcode
= NV_OP_MOV
;
595 constant_operand(struct nv_pc
*pc
,
596 struct nv_instruction
*nvi
, struct nv_value
*val
, int s
)
609 type
= nvi
->def
[0]->reg
.type
;
611 u
.u32
= val
->reg
.imm
.u32
;
612 modifiers_apply(&u
.u32
, type
, nvi
->src
[s
]->mod
);
614 switch (nvi
->opcode
) {
616 if ((type
== NV_TYPE_F32
&& u
.f32
== 1.0f
) ||
617 (NV_TYPE_ISINT(type
) && u
.u32
== 1)) {
618 if ((op
= modifiers_opcode(nvi
->src
[t
]->mod
)) == NV_OP_NOP
)
621 nv_reference(pc
, &nvi
->src
[s
], NULL
);
622 nvi
->src
[0] = nvi
->src
[t
];
625 if ((type
== NV_TYPE_F32
&& u
.f32
== 2.0f
) ||
626 (NV_TYPE_ISINT(type
) && u
.u32
== 2)) {
627 nvi
->opcode
= NV_OP_ADD
;
628 nv_reference(pc
, &nvi
->src
[s
], nvi
->src
[t
]->value
);
629 nvi
->src
[s
]->mod
= nvi
->src
[t
]->mod
;
631 if (type
== NV_TYPE_F32
&& u
.f32
== -1.0f
) {
632 if (nvi
->src
[t
]->mod
& NV_MOD_NEG
)
633 nvi
->opcode
= NV_OP_MOV
;
635 nvi
->opcode
= NV_OP_NEG
;
636 nv_reference(pc
, &nvi
->src
[s
], NULL
);
637 nvi
->src
[0] = nvi
->src
[t
];
640 if (type
== NV_TYPE_F32
&& u
.f32
== -2.0f
) {
641 nvi
->opcode
= NV_OP_ADD
;
642 nv_reference(pc
, &nvi
->src
[s
], nvi
->src
[t
]->value
);
643 nvi
->src
[s
]->mod
= (nvi
->src
[t
]->mod
^= NV_MOD_NEG
);
646 nvi
->opcode
= NV_OP_MOV
;
647 nv_reference(pc
, &nvi
->src
[t
], NULL
);
649 nvi
->src
[0] = nvi
->src
[1];
656 if ((op
= modifiers_opcode(nvi
->src
[t
]->mod
)) == NV_OP_NOP
)
659 nv_reference(pc
, &nvi
->src
[s
], NULL
);
660 nvi
->src
[0] = nvi
->src
[t
];
665 u
.f32
= 1.0f
/ u
.f32
;
666 (val
= new_value(pc
, NV_FILE_IMM
, NV_TYPE_F32
))->reg
.imm
.f32
= u
.f32
;
667 nvi
->opcode
= NV_OP_MOV
;
669 nv_reference(pc
, &nvi
->src
[0], val
);
672 u
.f32
= 1.0f
/ sqrtf(u
.f32
);
673 (val
= new_value(pc
, NV_FILE_IMM
, NV_TYPE_F32
))->reg
.imm
.f32
= u
.f32
;
674 nvi
->opcode
= NV_OP_MOV
;
676 nv_reference(pc
, &nvi
->src
[0], val
);
682 if (nvi
->opcode
== NV_OP_MOV
&& nvi
->flags_def
) {
683 struct nv_instruction
*cvt
= new_instruction_at(pc
, nvi
, NV_OP_CVT
);
685 nv_reference(pc
, &cvt
->src
[0], nvi
->def
[0]);
687 cvt
->flags_def
= nvi
->flags_def
;
688 nvi
->flags_def
= NULL
;
693 nv_pass_lower_arith(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
695 struct nv_instruction
*nvi
, *next
;
698 for (nvi
= b
->entry
; nvi
; nvi
= next
) {
699 struct nv_value
*src0
, *src1
, *src
;
704 src0
= nvcg_find_immediate(nvi
->src
[0]);
705 src1
= nvcg_find_immediate(nvi
->src
[1]);
708 constant_expression(ctx
->pc
, nvi
, src0
, src1
);
711 constant_operand(ctx
->pc
, nvi
, src0
, 0);
714 constant_operand(ctx
->pc
, nvi
, src1
, 1);
717 /* try to combine MUL, ADD into MAD */
718 if (nvi
->opcode
!= NV_OP_ADD
)
721 src0
= nvi
->src
[0]->value
;
722 src1
= nvi
->src
[1]->value
;
724 if (SRC_IS_MUL(src0
) && src0
->refc
== 1)
727 if (SRC_IS_MUL(src1
) && src1
->refc
== 1)
732 /* could have an immediate from above constant_* */
733 if (src0
->reg
.file
!= NV_FILE_GPR
|| src1
->reg
.file
!= NV_FILE_GPR
)
736 nvi
->opcode
= NV_OP_MAD
;
737 mod
= nvi
->src
[(src
== src0
) ? 0 : 1]->mod
;
738 nv_reference(ctx
->pc
, &nvi
->src
[(src
== src0
) ? 0 : 1], NULL
);
739 nvi
->src
[2] = nvi
->src
[(src
== src0
) ? 1 : 0];
741 assert(!(mod
& ~NV_MOD_NEG
));
742 nvi
->src
[0] = new_ref(ctx
->pc
, src
->insn
->src
[0]->value
);
743 nvi
->src
[1] = new_ref(ctx
->pc
, src
->insn
->src
[1]->value
);
744 nvi
->src
[0]->mod
= src
->insn
->src
[0]->mod
^ mod
;
745 nvi
->src
[1]->mod
= src
->insn
->src
[1]->mod
;
747 DESCEND_ARBITRARY(j
, nv_pass_lower_arith
);
752 /* TODO: redundant store elimination */
755 struct load_record
*next
;
757 struct nv_value
*value
;
760 #define LOAD_RECORD_POOL_SIZE 1024
762 struct nv_pass_reld_elim
{
765 struct load_record
*imm
;
766 struct load_record
*mem_s
;
767 struct load_record
*mem_v
;
768 struct load_record
*mem_c
[16];
769 struct load_record
*mem_l
;
771 struct load_record pool
[LOAD_RECORD_POOL_SIZE
];
775 /* TODO: properly handle loads from l[] memory in the presence of stores */
777 nv_pass_reload_elim(struct nv_pass_reld_elim
*ctx
, struct nv_basic_block
*b
)
779 struct load_record
**rec
, *it
;
780 struct nv_instruction
*ld
, *next
;
782 struct nv_value
*val
;
785 for (ld
= b
->entry
; ld
; ld
= next
) {
789 val
= ld
->src
[0]->value
;
792 if (ld
->opcode
== NV_OP_LINTERP
|| ld
->opcode
== NV_OP_PINTERP
) {
793 data
[0] = val
->reg
.id
;
797 if (ld
->opcode
== NV_OP_LDA
) {
798 data
[0] = val
->reg
.id
;
799 data
[1] = ld
->src
[4] ? ld
->src
[4]->value
->n
: ~0ULL;
800 if (val
->reg
.file
>= NV_FILE_MEM_C(0) &&
801 val
->reg
.file
<= NV_FILE_MEM_C(15))
802 rec
= &ctx
->mem_c
[val
->reg
.file
- NV_FILE_MEM_C(0)];
804 if (val
->reg
.file
== NV_FILE_MEM_S
)
807 if (val
->reg
.file
== NV_FILE_MEM_L
)
810 if ((ld
->opcode
== NV_OP_MOV
) && (val
->reg
.file
== NV_FILE_IMM
)) {
811 data
[0] = val
->reg
.imm
.u32
;
816 if (!rec
|| !ld
->def
[0]->refc
)
819 for (it
= *rec
; it
; it
= it
->next
)
820 if (it
->data
[0] == data
[0] && it
->data
[1] == data
[1])
824 if (ld
->def
[0]->reg
.id
>= 0)
825 it
->value
= ld
->def
[0];
828 nvcg_replace_value(ctx
->pc
, ld
->def
[0], it
->value
);
830 if (ctx
->alloc
== LOAD_RECORD_POOL_SIZE
)
832 it
= &ctx
->pool
[ctx
->alloc
++];
834 it
->data
[0] = data
[0];
835 it
->data
[1] = data
[1];
836 it
->value
= ld
->def
[0];
844 for (j
= 0; j
< 16; ++j
)
845 ctx
->mem_c
[j
] = NULL
;
849 DESCEND_ARBITRARY(j
, nv_pass_reload_elim
);
855 nv_pass_tex_mask(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
859 for (i
= 0; i
< ctx
->pc
->num_instructions
; ++i
) {
860 struct nv_instruction
*nvi
= &ctx
->pc
->instructions
[i
];
861 struct nv_value
*def
[4];
863 if (!nv_is_vector_op(nvi
->opcode
))
867 for (c
= 0; c
< 4; ++c
) {
868 if (nvi
->def
[c
]->refc
)
869 nvi
->tex_mask
|= 1 << c
;
870 def
[c
] = nvi
->def
[c
];
874 for (c
= 0; c
< 4; ++c
)
875 if (nvi
->tex_mask
& (1 << c
))
876 nvi
->def
[j
++] = def
[c
];
877 for (c
= 0; c
< 4; ++c
)
878 if (!(nvi
->tex_mask
& (1 << c
)))
879 nvi
->def
[j
++] = def
[c
];
891 nv_pass_dce(struct nv_pass_dce
*ctx
, struct nv_basic_block
*b
)
894 struct nv_instruction
*nvi
, *next
;
896 for (nvi
= b
->phi
? b
->phi
: b
->entry
; nvi
; nvi
= next
) {
899 if (inst_cullable(nvi
)) {
905 DESCEND_ARBITRARY(j
, nv_pass_dce
);
910 /* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
911 * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
912 * BREAK and dummy ELSE block.
914 static INLINE boolean
915 bb_is_if_else_endif(struct nv_basic_block
*bb
)
917 if (!bb
->out
[0] || !bb
->out
[1])
920 if (bb
->out
[0]->out_kind
[0] == CFG_EDGE_LOOP_LEAVE
) {
921 return (bb
->out
[0]->out
[1] == bb
->out
[1]->out
[0] &&
922 !bb
->out
[1]->out
[1]);
924 return (bb
->out
[0]->out
[0] == bb
->out
[1]->out
[0] &&
925 !bb
->out
[0]->out
[1] &&
926 !bb
->out
[1]->out
[1]);
930 /* predicate instructions and remove branch at the end */
932 predicate_instructions(struct nv_pc
*pc
, struct nv_basic_block
*b
,
933 struct nv_value
*p
, ubyte cc
)
935 struct nv_instruction
*nvi
;
939 for (nvi
= b
->entry
; nvi
->next
; nvi
= nvi
->next
) {
940 if (!nvi_isnop(nvi
)) {
942 nv_reference(pc
, &nvi
->flags_src
, p
);
946 if (nvi
->opcode
== NV_OP_BRA
)
949 if (!nvi_isnop(nvi
)) {
951 nv_reference(pc
, &nvi
->flags_src
, p
);
955 /* NOTE: Run this after register allocation, we can just cut out the cflow
956 * instructions and hook the predicates to the conditional OPs if they are
957 * not using immediates; better than inserting SELECT to join definitions.
959 * NOTE: Should adapt prior optimization to make this possible more often.
962 nv_pass_flatten(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
964 struct nv_instruction
*nvi
;
965 struct nv_value
*pred
;
969 if (bb_is_if_else_endif(b
)) {
972 "pass_flatten: IF/ELSE/ENDIF construct at BB:%i\n", b
->id
);
974 for (n0
= 0, nvi
= b
->out
[0]->entry
; nvi
; nvi
= nvi
->next
, ++n0
)
975 if (!nv50_nvi_can_predicate(nvi
))
978 for (n1
= 0, nvi
= b
->out
[1]->entry
; nvi
; nvi
= nvi
->next
, ++n1
)
979 if (!nv50_nvi_can_predicate(nvi
))
981 #if NV50_DEBUG & NV50_DEBUG_PROG_IR
983 debug_printf("cannot predicate: "); nv_print_instruction(nvi
);
986 debug_printf("cannot predicate: "); nv_print_instruction(nvi
);
990 if (!nvi
&& n0
< 12 && n1
< 12) { /* 12 as arbitrary limit */
991 assert(b
->exit
&& b
->exit
->flags_src
);
992 pred
= b
->exit
->flags_src
->value
;
994 predicate_instructions(ctx
->pc
, b
->out
[0], pred
, NV_CC_NE
| NV_CC_U
);
995 predicate_instructions(ctx
->pc
, b
->out
[1], pred
, NV_CC_EQ
);
997 assert(b
->exit
&& b
->exit
->opcode
== NV_OP_BRA
);
998 nv_nvi_delete(b
->exit
);
1000 if (b
->exit
&& b
->exit
->opcode
== NV_OP_JOINAT
)
1001 nv_nvi_delete(b
->exit
);
1003 i
= (b
->out
[0]->out_kind
[0] == CFG_EDGE_LOOP_LEAVE
) ? 1 : 0;
1005 if ((nvi
= b
->out
[0]->out
[i
]->entry
)) {
1007 if (nvi
->opcode
== NV_OP_JOIN
)
1012 DESCEND_ARBITRARY(i
, nv_pass_flatten
);
1017 /* local common subexpression elimination, stupid O(n^2) implementation */
1019 nv_pass_cse(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
1021 struct nv_instruction
*ir
, *ik
, *next
;
1022 struct nv_instruction
*entry
= b
->phi
? b
->phi
: b
->entry
;
1028 for (ir
= entry
; ir
; ir
= next
) {
1030 for (ik
= entry
; ik
!= ir
; ik
= ik
->next
) {
1031 if (ir
->opcode
!= ik
->opcode
|| ir
->fixed
)
1034 if (!ir
->def
[0] || !ik
->def
[0] ||
1035 ik
->opcode
== NV_OP_LDA
||
1036 ik
->opcode
== NV_OP_STA
||
1037 ik
->opcode
== NV_OP_MOV
||
1038 nv_is_vector_op(ik
->opcode
))
1039 continue; /* ignore loads, stores & moves */
1041 if (ik
->src
[4] || ir
->src
[4])
1042 continue; /* don't mess with address registers */
1044 if (ik
->flags_src
|| ir
->flags_src
||
1045 ik
->flags_def
|| ir
->flags_def
)
1046 continue; /* and also not with flags, for now */
1048 if (ik
->def
[0]->reg
.file
== NV_FILE_OUT
||
1049 ir
->def
[0]->reg
.file
== NV_FILE_OUT
||
1050 !values_equal(ik
->def
[0], ir
->def
[0]))
1053 for (s
= 0; s
< 3; ++s
) {
1054 struct nv_value
*a
, *b
;
1061 if (ik
->src
[s
]->mod
!= ir
->src
[s
]->mod
)
1063 a
= ik
->src
[s
]->value
;
1064 b
= ir
->src
[s
]->value
;
1067 if (a
->reg
.file
!= b
->reg
.file
||
1069 a
->reg
.id
!= b
->reg
.id
)
1075 nvcg_replace_value(ctx
->pc
, ir
->def
[0], ik
->def
[0]);
1082 DESCEND_ARBITRARY(s
, nv_pass_cse
);
1088 nv_pc_pass0(struct nv_pc
*pc
, struct nv_basic_block
*root
)
1090 struct nv_pass_reld_elim
*reldelim
;
1091 struct nv_pass pass
;
1092 struct nv_pass_dce dce
;
1098 /* Do this first, so we don't have to pay attention
1099 * to whether sources are supported memory loads.
1102 ret
= nv_pass_lower_arith(&pass
, root
);
1107 ret
= nv_pass_lower_mods(&pass
, root
);
1112 ret
= nv_pass_fold_loads(&pass
, root
);
1117 ret
= nv_pass_fold_stores(&pass
, root
);
1121 if (pc
->opt_reload_elim
) {
1122 reldelim
= CALLOC_STRUCT(nv_pass_reld_elim
);
1125 ret
= nv_pass_reload_elim(reldelim
, root
);
1132 ret
= nv_pass_cse(&pass
, root
);
1140 ret
= nv_pass_dce(&dce
, root
);
1143 } while (dce
.removed
);
1145 ret
= nv_pass_tex_mask(&pass
, root
);
1153 nv_pc_exec_pass0(struct nv_pc
*pc
)
1157 for (i
= 0; i
< pc
->num_subroutines
+ 1; ++i
)
1158 if (pc
->root
[i
] && (ret
= nv_pc_pass0(pc
, pc
->root
[i
])))