2 * Copyright 2010 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 #define DESCEND_ARBITRARY(j, f) \
27 b->pass_seq = ctx->pc->pass_seq; \
29 for (j = 0; j < 2; ++j) \
30 if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
34 extern unsigned nv50_inst_min_size(struct nv_instruction
*);
41 values_equal(struct nv_value
*a
, struct nv_value
*b
)
44 return (a
->reg
.file
== b
->reg
.file
&& a
->join
->reg
.id
== b
->join
->reg
.id
);
48 inst_commutation_check(struct nv_instruction
*a
,
49 struct nv_instruction
*b
)
53 for (di
= 0; di
< 4; ++di
) {
56 for (si
= 0; si
< 5; ++si
) {
59 if (values_equal(a
->def
[di
], b
->src
[si
]->value
))
64 if (b
->flags_src
&& b
->flags_src
->value
== a
->flags_def
)
70 /* Check whether we can swap the order of the instructions,
71 * where a & b may be either the earlier or the later one.
74 inst_commutation_legal(struct nv_instruction
*a
,
75 struct nv_instruction
*b
)
77 return inst_commutation_check(a
, b
) && inst_commutation_check(b
, a
);
81 inst_cullable(struct nv_instruction
*nvi
)
83 return (!(nvi
->is_terminator
||
86 nv_nvi_refcount(nvi
)));
90 nvi_isnop(struct nv_instruction
*nvi
)
92 if (nvi
->opcode
== NV_OP_EXPORT
)
101 if (nvi
->def
[0]->join
->reg
.id
< 0)
104 if (nvi
->opcode
!= NV_OP_MOV
&& nvi
->opcode
!= NV_OP_SELECT
)
107 if (nvi
->def
[0]->reg
.file
!= nvi
->src
[0]->value
->reg
.file
)
110 if (nvi
->src
[0]->value
->join
->reg
.id
< 0) {
111 debug_printf("nvi_isnop: orphaned value detected\n");
115 if (nvi
->opcode
== NV_OP_SELECT
)
116 if (!values_equal(nvi
->def
[0], nvi
->src
[1]->value
))
119 return values_equal(nvi
->def
[0], nvi
->src
[0]->value
);
123 nv_pc_pass_pre_emission(struct nv_pc
*pc
, struct nv_basic_block
*b
)
125 struct nv_instruction
*nvi
, *next
;
132 b
->bin_pos
= pc
->bb_list
[pc
->num_blocks
- 1]->bin_pos
+
133 pc
->bb_list
[pc
->num_blocks
- 1]->bin_size
;
135 pc
->bb_list
[pc
->num_blocks
++] = b
;
139 for (nvi
= b
->entry
; nvi
; nvi
= next
) {
145 for (nvi
= b
->entry
; nvi
; nvi
= next
) {
148 size
= nv50_inst_min_size(nvi
);
149 if (nvi
->next
&& size
< 8)
152 if ((n32
& 1) && nvi
->next
&&
153 nv50_inst_min_size(nvi
->next
) == 4 &&
154 inst_commutation_legal(nvi
, nvi
->next
)) {
156 debug_printf("permuting: ");
157 nv_print_instruction(nvi
);
158 nv_print_instruction(nvi
->next
);
159 nv_nvi_permute(nvi
, nvi
->next
);
164 b
->bin_size
+= n32
& 1;
166 nvi
->prev
->is_long
= 1;
169 b
->bin_size
+= 1 + nvi
->is_long
;
173 debug_printf("block %p is now empty\n", b
);
175 if (!b
->exit
->is_long
) {
177 b
->exit
->is_long
= 1;
180 /* might have del'd a hole tail of instructions */
181 if (!b
->exit
->prev
->is_long
&& !(n32
& 1)) {
183 b
->exit
->prev
->is_long
= 1;
186 assert(!b
->exit
|| b
->exit
->is_long
);
188 pc
->bin_size
+= b
->bin_size
*= 4;
194 if (!b
->out
[1] && ++(b
->out
[0]->priv
) != b
->out
[0]->num_in
)
198 /* delete ELSE branch */
200 b
->entry
->opcode
== NV_OP_BRA
&& b
->entry
->target
== b
->out
[0]) {
201 nv_nvi_delete(b
->entry
);
206 for (j
= 0; j
< 2; ++j
)
207 if (b
->out
[j
] && b
->out
[j
] != b
)
208 nv_pc_pass_pre_emission(pc
, b
->out
[j
]);
212 nv_pc_exec_pass2(struct nv_pc
*pc
)
214 debug_printf("preparing %u blocks for emission\n", pc
->num_blocks
);
216 pc
->bb_list
= CALLOC(pc
->num_blocks
, sizeof(struct nv_basic_block
*));
219 nv_pc_pass_pre_emission(pc
, pc
->root
);
224 static INLINE boolean
225 is_cmem_load(struct nv_instruction
*nvi
)
227 return (nvi
->opcode
== NV_OP_LDA
&&
228 nvi
->src
[0]->value
->reg
.file
>= NV_FILE_MEM_C(0) &&
229 nvi
->src
[0]->value
->reg
.file
<= NV_FILE_MEM_C(15));
232 static INLINE boolean
233 is_smem_load(struct nv_instruction
*nvi
)
235 return (nvi
->opcode
== NV_OP_LDA
&&
236 (nvi
->src
[0]->value
->reg
.file
== NV_FILE_MEM_S
||
237 nvi
->src
[0]->value
->reg
.file
<= NV_FILE_MEM_P
));
240 static INLINE boolean
241 is_immd_move(struct nv_instruction
*nvi
)
243 return (nvi
->opcode
== NV_OP_MOV
&&
244 nvi
->src
[0]->value
->reg
.file
== NV_FILE_IMM
);
248 check_swap_src_0_1(struct nv_instruction
*nvi
)
250 static const ubyte cc_swapped
[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
252 struct nv_ref
*src0
= nvi
->src
[0], *src1
= nvi
->src
[1];
254 if (!nv_op_commutative(nvi
->opcode
))
256 assert(src0
&& src1
);
258 if (is_cmem_load(src0
->value
->insn
)) {
259 if (!is_cmem_load(src1
->value
->insn
)) {
262 /* debug_printf("swapping cmem load to 1\n"); */
265 if (is_smem_load(src1
->value
->insn
)) {
266 if (!is_smem_load(src0
->value
->insn
)) {
269 /* debug_printf("swapping smem load to 0\n"); */
273 if (nvi
->opcode
== NV_OP_SET
&& nvi
->src
[0] != src0
)
274 nvi
->set_cond
= cc_swapped
[nvi
->set_cond
];
284 nv_pass_fold_stores(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
286 struct nv_instruction
*nvi
, *sti
;
289 for (sti
= b
->entry
; sti
; sti
= sti
->next
) {
290 if (!sti
->def
[0] || sti
->def
[0]->reg
.file
!= NV_FILE_OUT
)
293 /* only handling MOV to $oX here */
294 if (sti
->opcode
!= NV_OP_MOV
&& sti
->opcode
!= NV_OP_STA
)
297 nvi
= sti
->src
[0]->value
->insn
;
298 if (!nvi
|| nvi
->opcode
== NV_OP_PHI
)
300 assert(nvi
->def
[0] == sti
->src
[0]->value
);
302 if (nvi
->def
[0]->refc
> 1)
305 /* cannot MOV immediate to $oX */
306 if (nvi
->src
[0]->value
->reg
.file
== NV_FILE_IMM
)
309 nvi
->def
[0] = sti
->def
[0];
311 nvi
->fixed
= sti
->fixed
;
314 DESCEND_ARBITRARY(j
, nv_pass_fold_stores
);
320 nv_pass_fold_loads(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
322 struct nv_instruction
*nvi
, *ld
;
325 for (nvi
= b
->entry
; nvi
; nvi
= nvi
->next
) {
326 check_swap_src_0_1(nvi
);
328 for (j
= 0; j
< 3; ++j
) {
331 ld
= nvi
->src
[j
]->value
->insn
;
335 if (is_immd_move(ld
) && nv50_nvi_can_use_imm(nvi
, j
)) {
336 nv_reference(ctx
->pc
, &nvi
->src
[j
], ld
->src
[0]->value
);
337 debug_printf("folded immediate %i\n", ld
->def
[0]->n
);
341 if (ld
->opcode
!= NV_OP_LDA
)
343 if (!nv50_nvi_can_load(nvi
, j
, ld
->src
[0]->value
))
346 if (j
== 0 && ld
->src
[4]) /* can't load shared mem */
349 /* fold it ! */ /* XXX: ref->insn */
350 nv_reference(ctx
->pc
, &nvi
->src
[j
], ld
->src
[0]->value
);
352 nv_reference(ctx
->pc
, &nvi
->src
[4], ld
->src
[4]->value
);
355 DESCEND_ARBITRARY(j
, nv_pass_fold_loads
);
361 nv_pass_lower_mods(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
364 struct nv_instruction
*nvi
, *mi
, *next
;
367 for (nvi
= b
->entry
; nvi
; nvi
= next
) {
369 if (nvi
->opcode
== NV_OP_SUB
) {
370 nvi
->opcode
= NV_OP_ADD
;
371 nvi
->src
[1]->mod
^= NV_MOD_NEG
;
374 /* should not put any modifiers on NEG and ABS */
375 assert(nvi
->opcode
!= NV_MOD_NEG
|| !nvi
->src
[0]->mod
);
376 assert(nvi
->opcode
!= NV_MOD_ABS
|| !nvi
->src
[0]->mod
);
378 for (j
= 0; j
< 4; ++j
) {
382 mi
= nvi
->src
[j
]->value
->insn
;
385 if (mi
->def
[0]->refc
> 1)
388 if (mi
->opcode
== NV_OP_NEG
) mod
= NV_MOD_NEG
;
390 if (mi
->opcode
== NV_OP_ABS
) mod
= NV_MOD_ABS
;
394 if (nvi
->opcode
== NV_OP_ABS
)
395 mod
&= ~(NV_MOD_NEG
| NV_MOD_ABS
);
397 if (nvi
->opcode
== NV_OP_NEG
&& mod
== NV_MOD_NEG
) {
398 nvi
->opcode
= NV_OP_MOV
;
402 if (!(nv50_supported_src_mods(nvi
->opcode
, j
) & mod
))
405 nv_reference(ctx
->pc
, &nvi
->src
[j
], mi
->src
[0]->value
);
407 nvi
->src
[j
]->mod
^= mod
;
410 if (nvi
->opcode
== NV_OP_SAT
) {
411 mi
= nvi
->src
[0]->value
->insn
;
413 if ((mi
->opcode
== NV_OP_MAD
) && !mi
->flags_def
) {
415 mi
->def
[0] = nvi
->def
[0];
420 DESCEND_ARBITRARY(j
, nv_pass_lower_mods
);
425 #define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
427 static struct nv_value
*
428 find_immediate(struct nv_ref
*ref
)
430 struct nv_value
*src
;
436 while (src
->insn
&& src
->insn
->opcode
== NV_OP_MOV
) {
437 assert(!src
->insn
->src
[0]->mod
);
438 src
= src
->insn
->src
[0]->value
;
440 return (src
->reg
.file
== NV_FILE_IMM
) ? src
: NULL
;
444 constant_operand(struct nv_pc
*pc
,
445 struct nv_instruction
*nvi
, struct nv_value
*val
, int s
)
452 type
= nvi
->def
[0]->reg
.type
;
454 switch (nvi
->opcode
) {
456 if ((type
== NV_TYPE_F32
&& val
->reg
.imm
.f32
== 1.0f
) ||
457 (NV_TYPE_ISINT(type
) && val
->reg
.imm
.u32
== 1)) {
458 nvi
->opcode
= NV_OP_MOV
;
459 nv_reference(pc
, &nvi
->src
[s
], NULL
);
461 nvi
->src
[0] = nvi
->src
[1];
465 if ((type
== NV_TYPE_F32
&& val
->reg
.imm
.f32
== 2.0f
) ||
466 (NV_TYPE_ISINT(type
) && val
->reg
.imm
.u32
== 2)) {
467 nvi
->opcode
= NV_OP_ADD
;
468 nv_reference(pc
, &nvi
->src
[s
], NULL
);
470 nvi
->src
[0] = nvi
->src
[1];
474 if (type
== NV_TYPE_F32
&& val
->reg
.imm
.f32
== -1.0f
) {
475 nvi
->opcode
= NV_OP_NEG
;
476 nv_reference(pc
, &nvi
->src
[s
], NULL
);
477 nvi
->src
[0] = nvi
->src
[t
];
480 if (type
== NV_TYPE_F32
&& val
->reg
.imm
.f32
== -2.0f
) {
481 nvi
->opcode
= NV_OP_ADD
;
482 assert(!nvi
->src
[s
]->mod
);
483 nv_reference(pc
, &nvi
->src
[s
], nvi
->src
[t
]->value
);
484 nvi
->src
[t
]->mod
^= NV_MOD_NEG
;
485 nvi
->src
[s
]->mod
|= NV_MOD_NEG
;
487 if (val
->reg
.imm
.u32
== 0) {
488 nvi
->opcode
= NV_OP_MOV
;
489 nv_reference(pc
, &nvi
->src
[t
], NULL
);
491 nvi
->src
[0] = nvi
->src
[1];
497 if (val
->reg
.imm
.u32
== 0) {
498 nvi
->opcode
= NV_OP_MOV
;
499 nv_reference(pc
, &nvi
->src
[s
], NULL
);
500 nvi
->src
[0] = nvi
->src
[t
];
510 nv_pass_lower_arith(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
512 struct nv_instruction
*nvi
, *next
;
515 for (nvi
= b
->entry
; nvi
; nvi
= next
) {
516 struct nv_value
*src0
, *src1
, *src
;
521 if ((src
= find_immediate(nvi
->src
[0])) != NULL
)
522 constant_operand(ctx
->pc
, nvi
, src
, 0);
524 if ((src
= find_immediate(nvi
->src
[1])) != NULL
)
525 constant_operand(ctx
->pc
, nvi
, src
, 1);
527 /* try to combine MUL, ADD into MAD */
528 if (nvi
->opcode
!= NV_OP_ADD
)
531 src0
= nvi
->src
[0]->value
;
532 src1
= nvi
->src
[1]->value
;
534 if (SRC_IS_MUL(src0
) && src0
->refc
== 1)
537 if (SRC_IS_MUL(src1
) && src1
->refc
== 1)
542 nvi
->opcode
= NV_OP_MAD
;
543 mod
= nvi
->src
[(src
== src0
) ? 0 : 1]->mod
;
544 nv_reference(ctx
->pc
, &nvi
->src
[(src
== src0
) ? 0 : 1], NULL
);
545 nvi
->src
[2] = nvi
->src
[(src
== src0
) ? 1 : 0];
547 assert(!(mod
& ~NV_MOD_NEG
));
548 nvi
->src
[0] = new_ref(ctx
->pc
, src
->insn
->src
[0]->value
);
549 nvi
->src
[1] = new_ref(ctx
->pc
, src
->insn
->src
[1]->value
);
550 nvi
->src
[0]->mod
= src
->insn
->src
[0]->mod
^ mod
;
551 nvi
->src
[1]->mod
= src
->insn
->src
[1]->mod
;
553 DESCEND_ARBITRARY(j
, nv_pass_lower_arith
);
559 set $r2 g f32 $r2 $r3
560 cvt abs rn f32 $r2 s32 $r2
561 cvt f32 $c0 # f32 $r2
566 nv_pass_lower_cond(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
568 /* XXX: easier in IR builder for now */
573 /* TODO: redundant store elimination */
576 struct load_record
*next
;
578 struct nv_value
*value
;
581 #define LOAD_RECORD_POOL_SIZE 1024
583 struct nv_pass_reld_elim
{
586 struct load_record
*imm
;
587 struct load_record
*mem_s
;
588 struct load_record
*mem_v
;
589 struct load_record
*mem_c
[16];
590 struct load_record
*mem_l
;
592 struct load_record pool
[LOAD_RECORD_POOL_SIZE
];
597 nv_pass_reload_elim(struct nv_pass_reld_elim
*ctx
, struct nv_basic_block
*b
)
599 struct load_record
**rec
, *it
;
600 struct nv_instruction
*ld
, *next
;
602 struct nv_value
*val
;
605 for (ld
= b
->entry
; ld
; ld
= next
) {
609 val
= ld
->src
[0]->value
;
612 if (ld
->opcode
== NV_OP_LINTERP
|| ld
->opcode
== NV_OP_PINTERP
) {
616 if (ld
->opcode
== NV_OP_LDA
) {
618 if (val
->reg
.file
>= NV_FILE_MEM_C(0) &&
619 val
->reg
.file
<= NV_FILE_MEM_C(15))
620 rec
= &ctx
->mem_c
[val
->reg
.file
- NV_FILE_MEM_C(0)];
622 if (val
->reg
.file
== NV_FILE_MEM_S
)
625 if (val
->reg
.file
== NV_FILE_MEM_L
)
628 if ((ld
->opcode
== NV_OP_MOV
) && (val
->reg
.file
== NV_FILE_IMM
)) {
629 data
= val
->reg
.imm
.u32
;
633 if (!rec
|| !ld
->def
[0]->refc
)
636 for (it
= *rec
; it
; it
= it
->next
)
637 if (it
->data
== data
)
642 nvcg_replace_value(ctx
->pc
, ld
->def
[0], it
->value
);
644 ld
->opcode
= NV_OP_MOV
;
645 nv_reference(ctx
->pc
, &ld
->src
[0], it
->value
);
648 if (ctx
->alloc
== LOAD_RECORD_POOL_SIZE
)
650 it
= &ctx
->pool
[ctx
->alloc
++];
653 it
->value
= ld
->def
[0];
661 for (j
= 0; j
< 16; ++j
)
662 ctx
->mem_c
[j
] = NULL
;
666 DESCEND_ARBITRARY(j
, nv_pass_reload_elim
);
672 nv_pass_tex_mask(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
676 for (i
= 0; i
< ctx
->pc
->num_instructions
; ++i
) {
677 struct nv_instruction
*nvi
= &ctx
->pc
->instructions
[i
];
678 struct nv_value
*def
[4];
680 if (!nv_is_vector_op(nvi
->opcode
))
684 for (c
= 0; c
< 4; ++c
) {
685 if (nvi
->def
[c
]->refc
)
686 nvi
->tex_mask
|= 1 << c
;
687 def
[c
] = nvi
->def
[c
];
691 for (c
= 0; c
< 4; ++c
)
692 if (nvi
->tex_mask
& (1 << c
))
693 nvi
->def
[j
++] = def
[c
];
694 for (c
= 0; c
< 4; ++c
)
695 if (!(nvi
->tex_mask
& (1 << c
)))
696 nvi
->def
[j
++] = def
[c
];
708 nv_pass_dce(struct nv_pass_dce
*ctx
, struct nv_basic_block
*b
)
711 struct nv_instruction
*nvi
, *next
;
713 for (nvi
= b
->entry
; nvi
; nvi
= next
) {
716 if (inst_cullable(nvi
)) {
722 DESCEND_ARBITRARY(j
, nv_pass_dce
);
727 static INLINE boolean
728 bb_simple_if_endif(struct nv_basic_block
*bb
)
730 return (bb
->out
[0] && bb
->out
[1] &&
731 bb
->out
[0]->out
[0] == bb
->out
[1] &&
732 !bb
->out
[0]->out
[1]);
736 nv_pass_flatten(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
740 if (bb_simple_if_endif(b
)) {
742 debug_printf("nv_pass_flatten: total IF/ENDIF constructs: %i\n", ctx
->n
);
744 DESCEND_ARBITRARY(j
, nv_pass_flatten
);
749 /* local common subexpression elimination, stupid O(n^2) implementation */
751 nv_pass_cse(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
753 struct nv_instruction
*ir
, *ik
, *next
;
754 struct nv_instruction
*entry
= b
->phi
? b
->phi
: b
->entry
;
760 for (ir
= entry
; ir
; ir
= next
) {
762 for (ik
= entry
; ik
!= ir
; ik
= ik
->next
) {
763 if (ir
->opcode
!= ik
->opcode
)
766 if (ik
->opcode
== NV_OP_LDA
||
767 ik
->opcode
== NV_OP_STA
||
768 ik
->opcode
== NV_OP_MOV
||
769 nv_is_vector_op(ik
->opcode
))
770 continue; /* ignore loads, stores & moves */
772 if (ik
->src
[4] || ir
->src
[4])
773 continue; /* don't mess with address registers */
775 for (s
= 0; s
< 3; ++s
) {
776 struct nv_value
*a
, *b
;
783 if (ik
->src
[s
]->mod
!= ir
->src
[s
]->mod
)
785 a
= ik
->src
[s
]->value
;
786 b
= ir
->src
[s
]->value
;
789 if (a
->reg
.file
!= b
->reg
.file
||
791 a
->reg
.id
!= b
->reg
.id
)
797 nvcg_replace_value(ctx
->pc
, ir
->def
[0], ik
->def
[0]);
804 DESCEND_ARBITRARY(s
, nv_pass_cse
);
810 nv_pc_exec_pass0(struct nv_pc
*pc
)
812 struct nv_pass_reld_elim
*reldelim
;
814 struct nv_pass_dce dce
;
820 ret
= nv_pass_flatten(&pass
, pc
->root
);
824 /* Do this first, so we don't have to pay attention
825 * to whether sources are supported memory loads.
828 ret
= nv_pass_lower_arith(&pass
, pc
->root
);
833 ret
= nv_pass_fold_loads(&pass
, pc
->root
);
838 ret
= nv_pass_fold_stores(&pass
, pc
->root
);
842 reldelim
= CALLOC_STRUCT(nv_pass_reld_elim
);
845 ret
= nv_pass_reload_elim(reldelim
, pc
->root
);
851 ret
= nv_pass_cse(&pass
, pc
->root
);
856 ret
= nv_pass_lower_mods(&pass
, pc
->root
);
864 ret
= nv_pass_dce(&dce
, pc
->root
);
867 } while (dce
.removed
);
869 ret
= nv_pass_tex_mask(&pass
, pc
->root
);