4 #define DESCEND_ARBITRARY(j, f) \
6 b->pass_seq = ctx->pc->pass_seq; \
8 for (j = 0; j < 2; ++j) \
9 if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
13 extern unsigned nv50_inst_min_size(struct nv_instruction
*);
20 values_equal(struct nv_value
*a
, struct nv_value
*b
)
23 return (a
->reg
.file
== b
->reg
.file
&& a
->join
->reg
.id
== b
->join
->reg
.id
);
27 inst_commutation_check(struct nv_instruction
*a
,
28 struct nv_instruction
*b
)
32 for (di
= 0; di
< 4; ++di
) {
35 for (si
= 0; si
< 5; ++si
) {
38 if (values_equal(a
->def
[di
], b
->src
[si
]->value
))
43 if (b
->flags_src
&& b
->flags_src
->value
== a
->flags_def
)
49 /* Check whether we can swap the order of the instructions,
50 * where a & b may be either the earlier or the later one.
53 inst_commutation_legal(struct nv_instruction
*a
,
54 struct nv_instruction
*b
)
56 return inst_commutation_check(a
, b
) && inst_commutation_check(b
, a
);
60 inst_cullable(struct nv_instruction
*nvi
)
62 return (!(nvi
->is_terminator
||
65 nv_nvi_refcount(nvi
)));
69 nvi_isnop(struct nv_instruction
*nvi
)
71 if (nvi
->opcode
== NV_OP_EXPORT
)
80 if (nvi
->def
[0]->join
->reg
.id
< 0)
83 if (nvi
->opcode
!= NV_OP_MOV
&& nvi
->opcode
!= NV_OP_SELECT
)
86 if (nvi
->def
[0]->reg
.file
!= nvi
->src
[0]->value
->reg
.file
)
89 if (nvi
->src
[0]->value
->join
->reg
.id
< 0) {
90 debug_printf("nvi_isnop: orphaned value detected\n");
94 if (nvi
->opcode
== NV_OP_SELECT
)
95 if (!values_equal(nvi
->def
[0], nvi
->src
[1]->value
))
98 return values_equal(nvi
->def
[0], nvi
->src
[0]->value
);
102 nv_pc_pass_pre_emission(struct nv_pc
*pc
, struct nv_basic_block
*b
)
104 struct nv_instruction
*nvi
, *next
;
111 b
->bin_pos
= pc
->bb_list
[pc
->num_blocks
- 1]->bin_pos
+
112 pc
->bb_list
[pc
->num_blocks
- 1]->bin_size
;
114 pc
->bb_list
[pc
->num_blocks
++] = b
;
118 for (nvi
= b
->entry
; nvi
; nvi
= next
) {
124 for (nvi
= b
->entry
; nvi
; nvi
= next
) {
127 size
= nv50_inst_min_size(nvi
);
128 if (nvi
->next
&& size
< 8)
131 if ((n32
& 1) && nvi
->next
&&
132 nv50_inst_min_size(nvi
->next
) == 4 &&
133 inst_commutation_legal(nvi
, nvi
->next
)) {
135 debug_printf("permuting: ");
136 nv_print_instruction(nvi
);
137 nv_print_instruction(nvi
->next
);
138 nv_nvi_permute(nvi
, nvi
->next
);
143 b
->bin_size
+= n32
& 1;
145 nvi
->prev
->is_long
= 1;
148 b
->bin_size
+= 1 + nvi
->is_long
;
152 debug_printf("block %p is now empty\n", b
);
154 if (!b
->exit
->is_long
) {
156 b
->exit
->is_long
= 1;
159 /* might have del'd a hole tail of instructions */
160 if (!b
->exit
->prev
->is_long
&& !(n32
& 1)) {
162 b
->exit
->prev
->is_long
= 1;
165 assert(!b
->exit
|| b
->exit
->is_long
);
167 pc
->bin_size
+= b
->bin_size
*= 4;
173 if (!b
->out
[1] && ++(b
->out
[0]->priv
) != b
->out
[0]->num_in
)
177 /* delete ELSE branch */
179 b
->entry
->opcode
== NV_OP_BRA
&& b
->entry
->target
== b
->out
[0]) {
180 nv_nvi_delete(b
->entry
);
185 for (j
= 0; j
< 2; ++j
)
186 if (b
->out
[j
] && b
->out
[j
] != b
)
187 nv_pc_pass_pre_emission(pc
, b
->out
[j
]);
191 nv_pc_exec_pass2(struct nv_pc
*pc
)
193 debug_printf("preparing %u blocks for emission\n", pc
->num_blocks
);
195 pc
->bb_list
= CALLOC(pc
->num_blocks
, sizeof(struct nv_basic_block
*));
198 nv_pc_pass_pre_emission(pc
, pc
->root
);
203 static INLINE boolean
204 is_cmem_load(struct nv_instruction
*nvi
)
206 return (nvi
->opcode
== NV_OP_LDA
&&
207 nvi
->src
[0]->value
->reg
.file
>= NV_FILE_MEM_C(0) &&
208 nvi
->src
[0]->value
->reg
.file
<= NV_FILE_MEM_C(15));
211 static INLINE boolean
212 is_smem_load(struct nv_instruction
*nvi
)
214 return (nvi
->opcode
== NV_OP_LDA
&&
215 (nvi
->src
[0]->value
->reg
.file
== NV_FILE_MEM_S
||
216 nvi
->src
[0]->value
->reg
.file
<= NV_FILE_MEM_P
));
219 static INLINE boolean
220 is_immd_move(struct nv_instruction
*nvi
)
222 return (nvi
->opcode
== NV_OP_MOV
&&
223 nvi
->src
[0]->value
->reg
.file
== NV_FILE_IMM
);
227 check_swap_src_0_1(struct nv_instruction
*nvi
)
229 static const ubyte cc_swapped
[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
231 struct nv_ref
*src0
= nvi
->src
[0], *src1
= nvi
->src
[1];
233 if (!nv_op_commutative(nvi
->opcode
))
235 assert(src0
&& src1
);
237 if (is_cmem_load(src0
->value
->insn
)) {
238 if (!is_cmem_load(src1
->value
->insn
)) {
241 /* debug_printf("swapping cmem load to 1\n"); */
244 if (is_smem_load(src1
->value
->insn
)) {
245 if (!is_smem_load(src0
->value
->insn
)) {
248 /* debug_printf("swapping smem load to 0\n"); */
252 if (nvi
->opcode
== NV_OP_SET
&& nvi
->src
[0] != src0
)
253 nvi
->set_cond
= cc_swapped
[nvi
->set_cond
];
263 nv_pass_fold_stores(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
265 struct nv_instruction
*nvi
, *sti
;
268 for (sti
= b
->entry
; sti
; sti
= sti
->next
) {
269 if (!sti
->def
[0] || sti
->def
[0]->reg
.file
!= NV_FILE_OUT
)
272 /* only handling MOV to $oX here */
273 if (sti
->opcode
!= NV_OP_MOV
&& sti
->opcode
!= NV_OP_STA
)
276 nvi
= sti
->src
[0]->value
->insn
;
277 if (!nvi
|| nvi
->opcode
== NV_OP_PHI
)
279 assert(nvi
->def
[0] == sti
->src
[0]->value
);
281 if (nvi
->def
[0]->refc
> 1)
284 /* cannot MOV immediate to $oX */
285 if (nvi
->src
[0]->value
->reg
.file
== NV_FILE_IMM
)
288 nvi
->def
[0] = sti
->def
[0];
290 nvi
->fixed
= sti
->fixed
;
293 DESCEND_ARBITRARY(j
, nv_pass_fold_stores
);
299 nv_pass_fold_loads(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
301 struct nv_instruction
*nvi
, *ld
;
304 for (nvi
= b
->entry
; nvi
; nvi
= nvi
->next
) {
305 check_swap_src_0_1(nvi
);
307 for (j
= 0; j
< 3; ++j
) {
310 ld
= nvi
->src
[j
]->value
->insn
;
314 if (is_immd_move(ld
) && nv50_nvi_can_use_imm(nvi
, j
)) {
315 nv_reference(ctx
->pc
, &nvi
->src
[j
], ld
->src
[0]->value
);
316 debug_printf("folded immediate %i\n", ld
->def
[0]->n
);
320 if (ld
->opcode
!= NV_OP_LDA
)
322 if (!nv50_nvi_can_load(nvi
, j
, ld
->src
[0]->value
))
325 if (j
== 0 && ld
->src
[4]) /* can't load shared mem */
328 /* fold it ! */ /* XXX: ref->insn */
329 nv_reference(ctx
->pc
, &nvi
->src
[j
], ld
->src
[0]->value
);
331 nv_reference(ctx
->pc
, &nvi
->src
[4], ld
->src
[4]->value
);
334 DESCEND_ARBITRARY(j
, nv_pass_fold_loads
);
340 nv_pass_lower_mods(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
343 struct nv_instruction
*nvi
, *mi
, *next
;
346 for (nvi
= b
->entry
; nvi
; nvi
= next
) {
348 if (nvi
->opcode
== NV_OP_SUB
) {
349 nvi
->opcode
= NV_OP_ADD
;
350 nvi
->src
[1]->mod
^= NV_MOD_NEG
;
353 /* should not put any modifiers on NEG and ABS */
354 assert(nvi
->opcode
!= NV_MOD_NEG
|| !nvi
->src
[0]->mod
);
355 assert(nvi
->opcode
!= NV_MOD_ABS
|| !nvi
->src
[0]->mod
);
357 for (j
= 0; j
< 4; ++j
) {
361 mi
= nvi
->src
[j
]->value
->insn
;
364 if (mi
->def
[0]->refc
> 1)
367 if (mi
->opcode
== NV_OP_NEG
) mod
= NV_MOD_NEG
;
369 if (mi
->opcode
== NV_OP_ABS
) mod
= NV_MOD_ABS
;
373 if (nvi
->opcode
== NV_OP_ABS
)
374 mod
&= ~(NV_MOD_NEG
| NV_MOD_ABS
);
376 if (nvi
->opcode
== NV_OP_NEG
&& mod
== NV_MOD_NEG
) {
377 nvi
->opcode
= NV_OP_MOV
;
381 if (!(nv50_supported_src_mods(nvi
->opcode
, j
) & mod
))
384 nv_reference(ctx
->pc
, &nvi
->src
[j
], mi
->src
[0]->value
);
386 nvi
->src
[j
]->mod
^= mod
;
389 if (nvi
->opcode
== NV_OP_SAT
) {
390 mi
= nvi
->src
[0]->value
->insn
;
392 if ((mi
->opcode
== NV_OP_MAD
) && !mi
->flags_def
) {
394 mi
->def
[0] = nvi
->def
[0];
399 DESCEND_ARBITRARY(j
, nv_pass_lower_mods
);
404 #define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
406 static struct nv_value
*
407 find_immediate(struct nv_ref
*ref
)
409 struct nv_value
*src
;
415 while (src
->insn
&& src
->insn
->opcode
== NV_OP_MOV
) {
416 assert(!src
->insn
->src
[0]->mod
);
417 src
= src
->insn
->src
[0]->value
;
419 return (src
->reg
.file
== NV_FILE_IMM
) ? src
: NULL
;
423 constant_operand(struct nv_pc
*pc
,
424 struct nv_instruction
*nvi
, struct nv_value
*val
, int s
)
431 type
= nvi
->def
[0]->reg
.type
;
433 switch (nvi
->opcode
) {
435 if ((type
== NV_TYPE_F32
&& val
->reg
.imm
.f32
== 1.0f
) ||
436 (NV_TYPE_ISINT(type
) && val
->reg
.imm
.u32
== 1)) {
437 nvi
->opcode
= NV_OP_MOV
;
438 nv_reference(pc
, &nvi
->src
[s
], NULL
);
440 nvi
->src
[0] = nvi
->src
[1];
444 if ((type
== NV_TYPE_F32
&& val
->reg
.imm
.f32
== 2.0f
) ||
445 (NV_TYPE_ISINT(type
) && val
->reg
.imm
.u32
== 2)) {
446 nvi
->opcode
= NV_OP_ADD
;
447 nv_reference(pc
, &nvi
->src
[s
], NULL
);
449 nvi
->src
[0] = nvi
->src
[1];
453 if (type
== NV_TYPE_F32
&& val
->reg
.imm
.f32
== -1.0f
) {
454 nvi
->opcode
= NV_OP_NEG
;
455 nv_reference(pc
, &nvi
->src
[s
], NULL
);
456 nvi
->src
[0] = nvi
->src
[t
];
459 if (type
== NV_TYPE_F32
&& val
->reg
.imm
.f32
== -2.0f
) {
460 nvi
->opcode
= NV_OP_ADD
;
461 assert(!nvi
->src
[s
]->mod
);
462 nv_reference(pc
, &nvi
->src
[s
], nvi
->src
[t
]->value
);
463 nvi
->src
[t
]->mod
^= NV_MOD_NEG
;
464 nvi
->src
[s
]->mod
|= NV_MOD_NEG
;
466 if (val
->reg
.imm
.u32
== 0) {
467 nvi
->opcode
= NV_OP_MOV
;
468 nv_reference(pc
, &nvi
->src
[t
], NULL
);
470 nvi
->src
[0] = nvi
->src
[1];
476 if (val
->reg
.imm
.u32
== 0) {
477 nvi
->opcode
= NV_OP_MOV
;
478 nv_reference(pc
, &nvi
->src
[s
], NULL
);
479 nvi
->src
[0] = nvi
->src
[t
];
489 nv_pass_lower_arith(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
491 struct nv_instruction
*nvi
, *next
;
494 for (nvi
= b
->entry
; nvi
; nvi
= next
) {
495 struct nv_value
*src0
, *src1
, *src
;
500 if ((src
= find_immediate(nvi
->src
[0])) != NULL
)
501 constant_operand(ctx
->pc
, nvi
, src
, 0);
503 if ((src
= find_immediate(nvi
->src
[1])) != NULL
)
504 constant_operand(ctx
->pc
, nvi
, src
, 1);
506 /* try to combine MUL, ADD into MAD */
507 if (nvi
->opcode
!= NV_OP_ADD
)
510 src0
= nvi
->src
[0]->value
;
511 src1
= nvi
->src
[1]->value
;
513 if (SRC_IS_MUL(src0
) && src0
->refc
== 1)
516 if (SRC_IS_MUL(src1
) && src1
->refc
== 1)
521 nvi
->opcode
= NV_OP_MAD
;
522 mod
= nvi
->src
[(src
== src0
) ? 0 : 1]->mod
;
523 nv_reference(ctx
->pc
, &nvi
->src
[(src
== src0
) ? 0 : 1], NULL
);
524 nvi
->src
[2] = nvi
->src
[(src
== src0
) ? 1 : 0];
526 assert(!(mod
& ~NV_MOD_NEG
));
527 nvi
->src
[0] = new_ref(ctx
->pc
, src
->insn
->src
[0]->value
);
528 nvi
->src
[1] = new_ref(ctx
->pc
, src
->insn
->src
[1]->value
);
529 nvi
->src
[0]->mod
= src
->insn
->src
[0]->mod
^ mod
;
530 nvi
->src
[1]->mod
= src
->insn
->src
[1]->mod
;
532 DESCEND_ARBITRARY(j
, nv_pass_lower_arith
);
538 set $r2 g f32 $r2 $r3
539 cvt abs rn f32 $r2 s32 $r2
540 cvt f32 $c0 # f32 $r2
545 nv_pass_lower_cond(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
547 /* XXX: easier in IR builder for now */
552 /* TODO: reload elimination, redundant store elimination */
554 struct nv_pass_reldelim
{
559 nv_pass_reload_elim(struct nv_pass_reldelim
*ctx
, struct nv_basic_block
*b
)
562 struct nv_instruction
*ld
, *next
;
564 for (ld
= b
->entry
; ld
; ld
= next
) {
567 if (ld
->opcode
== NV_OP_LINTERP
|| ld
->opcode
== NV_OP_PINTERP
) {
570 if (ld
->opcode
== NV_OP_LDA
) {
573 if (ld
->opcode
== NV_OP_MOV
) {
577 DESCEND_ARBITRARY(j
, nv_pass_reload_elim
);
583 nv_pass_tex_mask(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
587 for (i
= 0; i
< ctx
->pc
->num_instructions
; ++i
) {
588 struct nv_instruction
*nvi
= &ctx
->pc
->instructions
[i
];
589 struct nv_value
*def
[4];
591 if (!nv_is_vector_op(nvi
->opcode
))
595 for (c
= 0; c
< 4; ++c
) {
596 if (nvi
->def
[c
]->refc
)
597 nvi
->tex_mask
|= 1 << c
;
598 def
[c
] = nvi
->def
[c
];
602 for (c
= 0; c
< 4; ++c
)
603 if (nvi
->tex_mask
& (1 << c
))
604 nvi
->def
[j
++] = def
[c
];
605 for (c
= 0; c
< 4; ++c
)
606 if (!(nvi
->tex_mask
& (1 << c
)))
607 nvi
->def
[j
++] = def
[c
];
619 nv_pass_dce(struct nv_pass_dce
*ctx
, struct nv_basic_block
*b
)
622 struct nv_instruction
*nvi
, *next
;
624 for (nvi
= b
->entry
; nvi
; nvi
= next
) {
627 if (inst_cullable(nvi
)) {
633 DESCEND_ARBITRARY(j
, nv_pass_dce
);
638 static INLINE boolean
639 bb_simple_if_endif(struct nv_basic_block
*bb
)
641 return (bb
->out
[0] && bb
->out
[1] &&
642 bb
->out
[0]->out
[0] == bb
->out
[1] &&
643 !bb
->out
[0]->out
[1]);
647 nv_pass_flatten(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
651 if (bb_simple_if_endif(b
)) {
653 debug_printf("nv_pass_flatten: total IF/ENDIF constructs: %i\n", ctx
->n
);
655 DESCEND_ARBITRARY(j
, nv_pass_flatten
);
661 nv_pc_exec_pass0(struct nv_pc
*pc
)
663 struct nv_pass_reldelim
*reldelim
;
665 struct nv_pass_dce dce
;
668 reldelim
= CALLOC_STRUCT(nv_pass_reldelim
);
671 ret
= nv_pass_reload_elim(reldelim
, pc
->root
);
680 ret
= nv_pass_flatten(&pass
, pc
->root
);
684 /* Do this first, so we don't have to pay attention
685 * to whether sources are supported memory loads.
688 ret
= nv_pass_lower_arith(&pass
, pc
->root
);
693 ret
= nv_pass_fold_loads(&pass
, pc
->root
);
698 ret
= nv_pass_fold_stores(&pass
, pc
->root
);
703 ret
= nv_pass_lower_mods(&pass
, pc
->root
);
711 ret
= nv_pass_dce(&dce
, pc
->root
);
714 } while (dce
.removed
);
716 ret
= nv_pass_tex_mask(&pass
, pc
->root
);