2 * Copyright 2010 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 #include "nvc0_program.h"
26 #define DESCEND_ARBITRARY(j, f) \
28 b->pass_seq = ctx->pc->pass_seq; \
30 for (j = 0; j < 2; ++j) \
31 if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
36 registers_interfere(struct nv_value
*a
, struct nv_value
*b
)
38 if (a
->reg
.file
!= b
->reg
.file
)
40 if (NV_IS_MEMORY_FILE(a
->reg
.file
) || NV_IS_MEMORY_FILE(b
->reg
.file
))
43 assert(a
->join
->reg
.id
>= 0 && b
->join
->reg
.id
>= 0);
45 if (a
->join
->reg
.id
< b
->join
->reg
.id
) {
46 return (a
->join
->reg
.id
+ a
->reg
.size
>= b
->join
->reg
.id
);
48 if (a
->join
->reg
.id
> b
->join
->reg
.id
) {
49 return (b
->join
->reg
.id
+ b
->reg
.size
>= a
->join
->reg
.id
);
56 values_equal(struct nv_value
*a
, struct nv_value
*b
)
58 if (a
->reg
.file
!= b
->reg
.file
|| a
->reg
.size
!= b
->reg
.size
)
60 if (NV_IS_MEMORY_FILE(a
->reg
.file
))
61 return a
->reg
.address
== b
->reg
.address
;
63 return a
->join
->reg
.id
== b
->join
->reg
.id
;
68 inst_commutation_check(struct nv_instruction
*a
, struct nv_instruction
*b
)
72 for (di
= 0; di
< 4 && a
->def
[di
]; ++di
)
73 for (si
= 0; si
< 5 && b
->src
[si
]; ++si
)
74 if (registers_interfere(a
->def
[di
], b
->src
[si
]->value
))
80 /* Check whether we can swap the order of the instructions,
81 * where a & b may be either the earlier or the later one.
84 inst_commutation_legal(struct nv_instruction
*a
, struct nv_instruction
*b
)
86 return inst_commutation_check(a
, b
) && inst_commutation_check(b
, a
);
91 inst_removable(struct nv_instruction
*nvi
)
93 if (nvi
->opcode
== NV_OP_ST
)
95 return (!(nvi
->terminator
||
99 nvc0_insn_refcount(nvi
)));
102 static INLINE boolean
103 inst_is_noop(struct nv_instruction
*nvi
)
105 if (nvi
->opcode
== NV_OP_UNDEF
|| nvi
->opcode
== NV_OP_BIND
)
107 if (nvi
->terminator
|| nvi
->join
)
109 if (nvi
->def
[0] && nvi
->def
[0]->join
->reg
.id
< 0)
111 if (nvi
->opcode
!= NV_OP_MOV
&& nvi
->opcode
!= NV_OP_SELECT
)
113 if (nvi
->def
[0]->reg
.file
!= nvi
->src
[0]->value
->reg
.file
)
116 if (nvi
->src
[0]->value
->join
->reg
.id
< 0) {
117 NOUVEAU_DBG("inst_is_noop: orphaned value detected\n");
121 if (nvi
->opcode
== NV_OP_SELECT
)
122 if (!values_equal(nvi
->def
[0], nvi
->src
[1]->value
))
124 return values_equal(nvi
->def
[0], nvi
->src
[0]->value
);
134 nv_pass_flatten(struct nv_pass
*ctx
, struct nv_basic_block
*b
);
137 nv_pc_pass_pre_emission(void *priv
, struct nv_basic_block
*b
)
139 struct nv_pc
*pc
= (struct nv_pc
*)priv
;
140 struct nv_basic_block
*in
;
141 struct nv_instruction
*nvi
, *next
;
144 for (j
= pc
->num_blocks
- 1; j
>= 0 && !pc
->bb_list
[j
]->emit_size
; --j
);
149 /* check for no-op branches (BRA $PC+8) */
150 if (in
->exit
&& in
->exit
->opcode
== NV_OP_BRA
&& in
->exit
->target
== b
) {
154 for (++j
; j
< pc
->num_blocks
; ++j
)
155 pc
->bb_list
[j
]->emit_pos
-= 8;
157 nvc0_insn_delete(in
->exit
);
159 b
->emit_pos
= in
->emit_pos
+ in
->emit_size
;
162 pc
->bb_list
[pc
->num_blocks
++] = b
;
166 for (nvi
= b
->entry
; nvi
; nvi
= next
) {
168 if (inst_is_noop(nvi
) ||
169 (pc
->is_fragprog
&& nvi
->opcode
== NV_OP_EXPORT
)) {
170 nvc0_insn_delete(nvi
);
174 pc
->emit_size
+= b
->emit_size
;
178 debug_printf("BB:%i is now empty\n", b
->id
);
180 debug_printf("BB:%i size = %u\n", b
->id
, b
->emit_size
);
185 nv_pc_pass2(struct nv_pc
*pc
, struct nv_basic_block
*root
)
192 nv_pass_flatten(&pass
, root
);
194 nvc0_pc_pass_in_order(root
, nv_pc_pass_pre_emission
, pc
);
200 nvc0_pc_exec_pass2(struct nv_pc
*pc
)
204 NOUVEAU_DBG("preparing %u blocks for emission\n", pc
->num_blocks
);
206 pc
->num_blocks
= 0; /* will reorder bb_list */
208 for (i
= 0; i
< pc
->num_subroutines
+ 1; ++i
)
209 if (pc
->root
[i
] && (ret
= nv_pc_pass2(pc
, pc
->root
[i
])))
214 static INLINE boolean
215 is_cspace_load(struct nv_instruction
*nvi
)
219 assert(nvi
->indirect
!= 0);
220 return (nvi
->opcode
== NV_OP_LD
&&
221 nvi
->src
[0]->value
->reg
.file
>= NV_FILE_MEM_C(0) &&
222 nvi
->src
[0]->value
->reg
.file
<= NV_FILE_MEM_C(15));
225 static INLINE boolean
226 is_immd32_load(struct nv_instruction
*nvi
)
230 return (nvi
->opcode
== NV_OP_MOV
&&
231 nvi
->src
[0]->value
->reg
.file
== NV_FILE_IMM
&&
232 nvi
->src
[0]->value
->reg
.size
== 4);
236 check_swap_src_0_1(struct nv_instruction
*nvi
)
238 static const uint8_t cc_swapped
[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
240 struct nv_ref
*src0
= nvi
->src
[0];
241 struct nv_ref
*src1
= nvi
->src
[1];
243 if (!nv_op_commutative(nvi
->opcode
))
245 assert(src0
&& src1
&& src0
->value
&& src1
->value
);
247 if (is_cspace_load(src0
->value
->insn
)) {
248 if (!is_cspace_load(src1
->value
->insn
)) {
254 if (nvi
->src
[0] != src0
&& nvi
->opcode
== NV_OP_SET
)
255 nvi
->set_cond
= cc_swapped
[nvi
->set_cond
];
259 nvi_set_indirect_load(struct nv_pc
*pc
,
260 struct nv_instruction
*nvi
, struct nv_value
*val
)
262 for (nvi
->indirect
= 0; nvi
->indirect
< 6 && nvi
->src
[nvi
->indirect
];
264 assert(nvi
->indirect
< 6);
265 nv_reference(pc
, nvi
, nvi
->indirect
, val
);
269 nvc0_pass_fold_loads(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
271 struct nv_instruction
*nvi
, *ld
;
274 for (nvi
= b
->entry
; nvi
; nvi
= nvi
->next
) {
275 check_swap_src_0_1(nvi
);
277 for (s
= 0; s
< 3 && nvi
->src
[s
]; ++s
) {
278 ld
= nvi
->src
[s
]->value
->insn
;
279 if (!ld
|| ld
->opcode
!= NV_OP_LD
)
281 if (!nvc0_insn_can_load(nvi
, s
, ld
))
285 nv_reference(ctx
->pc
, nvi
, s
, ld
->src
[0]->value
);
286 if (ld
->indirect
>= 0)
287 nvi_set_indirect_load(ctx
->pc
, nvi
, ld
->src
[ld
->indirect
]->value
);
289 if (!nvc0_insn_refcount(ld
))
290 nvc0_insn_delete(ld
);
293 DESCEND_ARBITRARY(s
, nvc0_pass_fold_loads
);
299 modifiers_opcode(uint8_t mod
)
302 case NV_MOD_NEG
: return NV_OP_NEG
;
303 case NV_MOD_ABS
: return NV_OP_ABS
;
311 /* NOTE: Assumes loads have not yet been folded. */
313 nv_pass_lower_mods(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
315 struct nv_instruction
*nvi
, *mi
, *next
;
319 for (nvi
= b
->entry
; nvi
; nvi
= next
) {
321 if (nvi
->opcode
== NV_OP_SUB
) {
322 nvi
->src
[1]->mod
^= NV_MOD_NEG
;
323 nvi
->opcode
= NV_OP_ADD
;
326 for (j
= 0; j
< 3 && nvi
->src
[j
]; ++j
) {
327 mi
= nvi
->src
[j
]->value
->insn
;
330 if (mi
->def
[0]->refc
> 1 || mi
->predicate
>= 0)
333 if (NV_BASEOP(mi
->opcode
) == NV_OP_NEG
) mod
= NV_MOD_NEG
;
335 if (NV_BASEOP(mi
->opcode
) == NV_OP_ABS
) mod
= NV_MOD_ABS
;
338 assert(!(mod
& mi
->src
[0]->mod
& NV_MOD_NEG
));
340 mod
|= mi
->src
[0]->mod
;
342 if ((nvi
->opcode
== NV_OP_ABS
) || (nvi
->src
[j
]->mod
& NV_MOD_ABS
)) {
343 /* abs neg [abs] = abs */
344 mod
&= ~(NV_MOD_NEG
| NV_MOD_ABS
);
346 if ((nvi
->opcode
== NV_OP_NEG
) && (mod
& NV_MOD_NEG
)) {
347 /* neg as opcode and modifier on same insn cannot occur */
348 /* neg neg abs = abs, neg neg = identity */
350 if (mod
& NV_MOD_ABS
)
351 nvi
->opcode
= NV_OP_ABS
;
353 nvi
->opcode
= NV_OP_MOV
;
357 if ((nv_op_supported_src_mods(nvi
->opcode
) & mod
) != mod
)
360 nv_reference(ctx
->pc
, nvi
, j
, mi
->src
[0]->value
);
362 nvi
->src
[j
]->mod
^= mod
;
365 if (nvi
->opcode
== NV_OP_SAT
) {
366 mi
= nvi
->src
[0]->value
->insn
;
368 if (mi
->def
[0]->refc
> 1 ||
369 (mi
->opcode
!= NV_OP_ADD
&&
370 mi
->opcode
!= NV_OP_MUL
&&
371 mi
->opcode
!= NV_OP_MAD
))
374 mi
->def
[0] = nvi
->def
[0];
375 mi
->def
[0]->insn
= mi
;
376 nvc0_insn_delete(nvi
);
379 DESCEND_ARBITRARY(j
, nv_pass_lower_mods
);
384 #define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
388 modifiers_apply(uint32_t *val, ubyte type, ubyte mod)
390 if (mod & NV_MOD_ABS) {
391 if (type == NV_TYPE_F32)
394 if ((*val) & (1 << 31))
397 if (mod & NV_MOD_NEG) {
398 if (type == NV_TYPE_F32)
408 constant_expression(struct nv_pc
*pc
, struct nv_instruction
*nvi
,
409 struct nv_value
*src0
, struct nv_value
*src1
)
411 struct nv_value
*val
;
421 type
= NV_OPTYPE(nvi
->opcode
);
424 u0
.u32
= src0
->reg
.imm
.u32
;
425 u1
.u32
= src1
->reg
.imm
.u32
;
427 modifiers_apply(&u0
.u32
, type
, nvi
->src
[0]->mod
);
428 modifiers_apply(&u1
.u32
, type
, nvi
->src
[1]->mod
);
430 switch (nvi
->opcode
) {
432 if (nvi
->src
[2]->value
->reg
.file
!= NV_FILE_GPR
)
437 case NV_TYPE_F32
: u
.f32
= u0
.f32
* u1
.f32
; break;
438 case NV_TYPE_U32
: u
.u32
= u0
.u32
* u1
.u32
; break;
439 case NV_TYPE_S32
: u
.s32
= u0
.s32
* u1
.s32
; break;
447 case NV_TYPE_F32
: u
.f32
= u0
.f32
+ u1
.f32
; break;
448 case NV_TYPE_U32
: u
.u32
= u0
.u32
+ u1
.u32
; break;
449 case NV_TYPE_S32
: u
.s32
= u0
.s32
+ u1
.s32
; break;
457 case NV_TYPE_F32
: u
.f32
= u0
.f32
- u1
.f32
; break;
458 case NV_TYPE_U32
: u
.u32
= u0
.u32
- u1
.u32
; break;
459 case NV_TYPE_S32
: u
.s32
= u0
.s32
- u1
.s32
; break;
469 nvi
->opcode
= NV_OP_MOV
;
471 val
= new_value(pc
, NV_FILE_IMM
, type
);
473 val
->reg
.imm
.u32
= u
.u32
;
475 nv_reference(pc
, nvi
, 1, NULL
);
476 nv_reference(pc
, nvi
, 0, val
);
478 if (nvi
->src
[2]) { /* from MAD */
479 nvi
->src
[1] = nvi
->src
[0];
480 nvi
->src
[0] = nvi
->src
[2];
482 nvi
->opcode
= NV_OP_ADD
;
484 if (val
->reg
.imm
.u32
== 0) {
486 nvi
->opcode
= NV_OP_MOV
;
492 constant_operand(struct nv_pc
*pc
,
493 struct nv_instruction
*nvi
, struct nv_value
*val
, int s
)
506 type
= NV_OPTYPE(nvi
->opcode
);
508 u
.u32
= val
->reg
.imm
.u32
;
509 modifiers_apply(&u
.u32
, type
, nvi
->src
[s
]->mod
);
511 switch (NV_BASEOP(nvi
->opcode
)) {
513 if ((type
== NV_TYPE_F32
&& u
.f32
== 1.0f
) ||
514 (NV_TYPE_ISINT(type
) && u
.u32
== 1)) {
515 if ((op
= modifiers_opcode(nvi
->src
[t
]->mod
)) == NV_OP_NOP
)
518 nv_reference(pc
, nvi
, s
, NULL
);
519 nvi
->src
[0] = nvi
->src
[t
];
522 if ((type
== NV_TYPE_F32
&& u
.f32
== 2.0f
) ||
523 (NV_TYPE_ISINT(type
) && u
.u32
== 2)) {
524 nvi
->opcode
= NV_OP_ADD
;
525 nv_reference(pc
, nvi
, s
, nvi
->src
[t
]->value
);
526 nvi
->src
[s
]->mod
= nvi
->src
[t
]->mod
;
528 if (type
== NV_TYPE_F32
&& u
.f32
== -1.0f
) {
529 if (nvi
->src
[t
]->mod
& NV_MOD_NEG
)
530 nvi
->opcode
= NV_OP_MOV
;
532 nvi
->opcode
= NV_OP_NEG
;
533 nv_reference(pc
, nvi
, s
, NULL
);
534 nvi
->src
[0] = nvi
->src
[t
];
537 if (type
== NV_TYPE_F32
&& u
.f32
== -2.0f
) {
538 nvi
->opcode
= NV_OP_ADD
;
539 nv_reference(pc
, nvi
, s
, nvi
->src
[t
]->value
);
540 nvi
->src
[s
]->mod
= (nvi
->src
[t
]->mod
^= NV_MOD_NEG
);
543 nvi
->opcode
= NV_OP_MOV
;
544 nv_reference(pc
, nvi
, t
, NULL
);
546 nvi
->src
[0] = nvi
->src
[1];
553 if ((op
= modifiers_opcode(nvi
->src
[t
]->mod
)) == NV_OP_NOP
)
556 nv_reference(pc
, nvi
, s
, NULL
);
557 nvi
->src
[0] = nvi
->src
[t
];
562 u
.f32
= 1.0f
/ u
.f32
;
563 (val
= new_value(pc
, NV_FILE_IMM
, NV_TYPE_F32
))->reg
.imm
.f32
= u
.f32
;
564 nvi
->opcode
= NV_OP_MOV
;
566 nv_reference(pc
, nvi
, 0, val
);
569 u
.f32
= 1.0f
/ sqrtf(u
.f32
);
570 (val
= new_value(pc
, NV_FILE_IMM
, NV_TYPE_F32
))->reg
.imm
.f32
= u
.f32
;
571 nvi
->opcode
= NV_OP_MOV
;
573 nv_reference(pc
, nvi
, 0, val
);
582 nv_pass_lower_arith(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
585 struct nv_instruction
*nvi
, *next
;
588 for (nvi
= b
->entry
; nvi
; nvi
= next
) {
589 struct nv_value
*src0
, *src1
, *src
;
594 src0
= nvcg_find_immediate(nvi
->src
[0]);
595 src1
= nvcg_find_immediate(nvi
->src
[1]);
598 constant_expression(ctx
->pc
, nvi
, src0
, src1
);
601 constant_operand(ctx
->pc
, nvi
, src0
, 0);
604 constant_operand(ctx
->pc
, nvi
, src1
, 1);
607 /* try to combine MUL, ADD into MAD */
608 if (nvi
->opcode
!= NV_OP_ADD
)
611 src0
= nvi
->src
[0]->value
;
612 src1
= nvi
->src
[1]->value
;
614 if (SRC_IS_MUL(src0
) && src0
->refc
== 1)
617 if (SRC_IS_MUL(src1
) && src1
->refc
== 1)
622 /* could have an immediate from above constant_* */
623 if (src0
->reg
.file
!= NV_FILE_GPR
|| src1
->reg
.file
!= NV_FILE_GPR
)
626 nvi
->opcode
= NV_OP_MAD
;
627 mod
= nvi
->src
[(src
== src0
) ? 0 : 1]->mod
;
628 nv_reference(ctx
->pc
, &nvi
->src
[(src
== src0
) ? 0 : 1], NULL
);
629 nvi
->src
[2] = nvi
->src
[(src
== src0
) ? 1 : 0];
631 assert(!(mod
& ~NV_MOD_NEG
));
632 nvi
->src
[0] = new_ref(ctx
->pc
, src
->insn
->src
[0]->value
);
633 nvi
->src
[1] = new_ref(ctx
->pc
, src
->insn
->src
[1]->value
);
634 nvi
->src
[0]->mod
= src
->insn
->src
[0]->mod
^ mod
;
635 nvi
->src
[1]->mod
= src
->insn
->src
[1]->mod
;
637 DESCEND_ARBITRARY(j
, nv_pass_lower_arith
);
642 /* TODO: redundant store elimination */
645 struct mem_record
*next
;
646 struct nv_instruction
*insn
;
652 #define MEM_RECORD_POOL_SIZE 1024
654 struct pass_reld_elim
{
657 struct mem_record
*imm
;
658 struct mem_record
*mem_v
;
659 struct mem_record
*mem_a
;
660 struct mem_record
*mem_c
[16];
661 struct mem_record
*mem_l
;
663 struct mem_record pool
[MEM_RECORD_POOL_SIZE
];
668 combine_load(struct mem_record
*rec
, struct nv_instruction
*ld
)
670 struct nv_instruction
*fv
= rec
->insn
;
671 struct nv_value
*mem
= ld
->src
[0]->value
;
672 uint32_t size
= rec
->size
+ mem
->reg
.size
;
674 int d
= rec
->size
/ 4;
676 assert(rec
->size
< 16);
677 if (rec
->ofst
> mem
->reg
.address
) {
678 if ((size
== 8 && mem
->reg
.address
& 3) ||
679 (size
> 8 && mem
->reg
.address
& 7))
681 rec
->ofst
= mem
->reg
.address
;
682 for (j
= 0; j
< d
; ++j
)
683 fv
->def
[d
+ j
] = fv
->def
[j
];
686 if ((size
== 8 && rec
->ofst
& 3) ||
687 (size
> 8 && rec
->ofst
& 7)) {
691 for (j
= 0; j
< mem
->reg
.size
/ 4; ++j
) {
692 fv
->def
[d
] = ld
->def
[j
];
693 fv
->def
[d
++]->insn
= fv
;
696 fv
->src
[0]->value
->reg
.size
= rec
->size
= size
;
698 nvc0_insn_delete(ld
);
702 combine_export(struct mem_record
*rec
, struct nv_instruction
*ex
)
708 add_mem_record(struct pass_reld_elim
*ctx
, struct mem_record
**rec
,
709 uint32_t base
, uint32_t ofst
, struct nv_instruction
*nvi
)
711 struct mem_record
*it
= &ctx
->pool
[ctx
->alloc
++];
718 it
->size
= nvi
->src
[0]->value
->reg
.size
;
721 /* vectorize and reuse loads from memory or of immediates */
723 nv_pass_mem_opt(struct pass_reld_elim
*ctx
, struct nv_basic_block
*b
)
725 struct mem_record
**rec
, *it
;
726 struct nv_instruction
*ld
, *next
;
727 struct nv_value
*mem
;
731 for (ld
= b
->entry
; ld
; ld
= next
) {
734 if (is_cspace_load(ld
)) {
735 mem
= ld
->src
[0]->value
;
736 rec
= &ctx
->mem_c
[ld
->src
[0]->value
->reg
.file
- NV_FILE_MEM_C(0)];
738 if (ld
->opcode
== NV_OP_VFETCH
) {
739 mem
= ld
->src
[0]->value
;
742 if (ld
->opcode
== NV_OP_EXPORT
) {
743 mem
= ld
->src
[0]->value
;
744 if (mem
->reg
.file
!= NV_FILE_MEM_V
)
750 if (ld
->def
[0] && ld
->def
[0]->refc
== 0)
752 ofst
= mem
->reg
.address
;
753 base
= (ld
->indirect
>= 0) ? ld
->src
[ld
->indirect
]->value
->n
: 0;
755 for (it
= *rec
; it
; it
= it
->next
) {
756 if (it
->base
== base
&&
757 ((it
->ofst
>> 4) == (ofst
>> 4)) &&
758 ((it
->ofst
+ it
->size
== ofst
) ||
759 (it
->ofst
- mem
->reg
.size
== ofst
))) {
760 if (ld
->opcode
== NV_OP_LD
&& it
->size
+ mem
->reg
.size
== 12)
762 if (it
->ofst
< ofst
) {
763 if ((it
->ofst
& 0xf) == 4)
766 if ((ofst
& 0xf) == 4)
772 switch (ld
->opcode
) {
773 case NV_OP_EXPORT
: combine_export(it
, ld
); break;
775 combine_load(it
, ld
);
779 if (ctx
->alloc
< MEM_RECORD_POOL_SIZE
) {
780 add_mem_record(ctx
, rec
, base
, ofst
, ld
);
784 DESCEND_ARBITRARY(s
, nv_pass_mem_opt
);
789 eliminate_store(struct mem_record
*rec
, struct nv_instruction
*st
)
793 /* elimination of redundant stores */
795 pass_store_elim(struct pass_reld_elim
*ctx
, struct nv_basic_block
*b
)
797 struct mem_record
**rec
, *it
;
798 struct nv_instruction
*st
, *next
;
799 struct nv_value
*mem
;
800 uint32_t base
, ofst
, size
;
803 for (st
= b
->entry
; st
; st
= next
) {
806 if (st
->opcode
== NV_OP_ST
) {
807 mem
= st
->src
[0]->value
;
810 if (st
->opcode
== NV_OP_EXPORT
) {
811 mem
= st
->src
[0]->value
;
812 if (mem
->reg
.file
!= NV_FILE_MEM_V
)
816 if (st
->opcode
== NV_OP_ST
) {
819 ofst
= mem
->reg
.address
;
820 base
= (st
->indirect
>= 0) ? st
->src
[st
->indirect
]->value
->n
: 0;
821 size
= mem
->reg
.size
;
823 for (it
= *rec
; it
; it
= it
->next
) {
824 if (it
->base
== base
&&
825 (it
->ofst
<= ofst
&& (it
->ofst
+ size
) > ofst
))
829 eliminate_store(it
, st
);
831 add_mem_record(ctx
, rec
, base
, ofst
, st
);
834 DESCEND_ARBITRARY(s
, nv_pass_mem_opt
);
838 /* TODO: properly handle loads from l[] memory in the presence of stores */
840 nv_pass_reload_elim(struct pass_reld_elim
*ctx
, struct nv_basic_block
*b
)
843 struct load_record
**rec
, *it
;
844 struct nv_instruction
*ld
, *next
;
846 struct nv_value
*val
;
849 for (ld
= b
->entry
; ld
; ld
= next
) {
853 val
= ld
->src
[0]->value
;
856 if (ld
->opcode
== NV_OP_LINTERP
|| ld
->opcode
== NV_OP_PINTERP
) {
857 data
[0] = val
->reg
.id
;
861 if (ld
->opcode
== NV_OP_LDA
) {
862 data
[0] = val
->reg
.id
;
863 data
[1] = ld
->src
[4] ? ld
->src
[4]->value
->n
: ~0ULL;
864 if (val
->reg
.file
>= NV_FILE_MEM_C(0) &&
865 val
->reg
.file
<= NV_FILE_MEM_C(15))
866 rec
= &ctx
->mem_c
[val
->reg
.file
- NV_FILE_MEM_C(0)];
868 if (val
->reg
.file
== NV_FILE_MEM_S
)
871 if (val
->reg
.file
== NV_FILE_MEM_L
)
874 if ((ld
->opcode
== NV_OP_MOV
) && (val
->reg
.file
== NV_FILE_IMM
)) {
875 data
[0] = val
->reg
.imm
.u32
;
880 if (!rec
|| !ld
->def
[0]->refc
)
883 for (it
= *rec
; it
; it
= it
->next
)
884 if (it
->data
[0] == data
[0] && it
->data
[1] == data
[1])
888 if (ld
->def
[0]->reg
.id
>= 0)
889 it
->value
= ld
->def
[0];
892 nvc0_pc_replace_value(ctx
->pc
, ld
->def
[0], it
->value
);
894 if (ctx
->alloc
== LOAD_RECORD_POOL_SIZE
)
896 it
= &ctx
->pool
[ctx
->alloc
++];
898 it
->data
[0] = data
[0];
899 it
->data
[1] = data
[1];
900 it
->value
= ld
->def
[0];
908 for (j
= 0; j
< 16; ++j
)
909 ctx
->mem_c
[j
] = NULL
;
913 DESCEND_ARBITRARY(j
, nv_pass_reload_elim
);
919 nv_pass_tex_mask(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
923 for (i
= 0; i
< ctx
->pc
->num_instructions
; ++i
) {
924 struct nv_instruction
*nvi
= &ctx
->pc
->instructions
[i
];
925 struct nv_value
*def
[4];
927 if (!nv_is_texture_op(nvi
->opcode
))
931 for (c
= 0; c
< 4; ++c
) {
932 if (nvi
->def
[c
]->refc
)
933 nvi
->tex_mask
|= 1 << c
;
934 def
[c
] = nvi
->def
[c
];
938 for (c
= 0; c
< 4; ++c
)
939 if (nvi
->tex_mask
& (1 << c
))
940 nvi
->def
[j
++] = def
[c
];
941 for (c
= 0; c
< 4; ++c
)
942 if (!(nvi
->tex_mask
& (1 << c
)))
943 nvi
->def
[j
++] = def
[c
];
955 nv_pass_dce(struct nv_pass_dce
*ctx
, struct nv_basic_block
*b
)
958 struct nv_instruction
*nvi
, *next
;
960 for (nvi
= b
->phi
? b
->phi
: b
->entry
; nvi
; nvi
= next
) {
963 if (inst_removable(nvi
)) {
964 nvc0_insn_delete(nvi
);
968 DESCEND_ARBITRARY(j
, nv_pass_dce
);
974 /* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
975 * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
976 * BREAK and dummy ELSE block.
978 static INLINE boolean
979 bb_is_if_else_endif(struct nv_basic_block
*bb
)
981 if (!bb
->out
[0] || !bb
->out
[1])
984 if (bb
->out
[0]->out_kind
[0] == CFG_EDGE_LOOP_LEAVE
) {
985 return (bb
->out
[0]->out
[1] == bb
->out
[1]->out
[0] &&
986 !bb
->out
[1]->out
[1]);
988 return (bb
->out
[0]->out
[0] == bb
->out
[1]->out
[0] &&
989 !bb
->out
[0]->out
[1] &&
990 !bb
->out
[1]->out
[1]);
994 /* predicate instructions and remove branch at the end */
996 predicate_instructions(struct nv_pc
*pc
, struct nv_basic_block
*b
,
997 struct nv_value
*p
, ubyte cc
)
1003 /* NOTE: Run this after register allocation, we can just cut out the cflow
1004 * instructions and hook the predicates to the conditional OPs if they are
1005 * not using immediates; better than inserting SELECT to join definitions.
1007 * NOTE: Should adapt prior optimization to make this possible more often.
1010 nv_pass_flatten(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
1015 /* local common subexpression elimination, stupid O(n^2) implementation */
1017 nv_pass_cse(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
1020 struct nv_instruction
*ir
, *ik
, *next
;
1021 struct nv_instruction
*entry
= b
->phi
? b
->phi
: b
->entry
;
1027 for (ir
= entry
; ir
; ir
= next
) {
1029 for (ik
= entry
; ik
!= ir
; ik
= ik
->next
) {
1030 if (ir
->opcode
!= ik
->opcode
|| ir
->fixed
)
1033 if (!ir
->def
[0] || !ik
->def
[0] ||
1034 ik
->opcode
== NV_OP_LDA
||
1035 ik
->opcode
== NV_OP_STA
||
1036 ik
->opcode
== NV_OP_MOV
||
1037 nv_is_vector_op(ik
->opcode
))
1038 continue; /* ignore loads, stores & moves */
1040 if (ik
->src
[4] || ir
->src
[4])
1041 continue; /* don't mess with address registers */
1043 if (ik
->flags_src
|| ir
->flags_src
||
1044 ik
->flags_def
|| ir
->flags_def
)
1045 continue; /* and also not with flags, for now */
1047 if (ik
->def
[0]->reg
.file
== NV_FILE_OUT
||
1048 ir
->def
[0]->reg
.file
== NV_FILE_OUT
||
1049 !values_equal(ik
->def
[0], ir
->def
[0]))
1052 for (s
= 0; s
< 3; ++s
) {
1053 struct nv_value
*a
, *b
;
1060 if (ik
->src
[s
]->mod
!= ir
->src
[s
]->mod
)
1062 a
= ik
->src
[s
]->value
;
1063 b
= ir
->src
[s
]->value
;
1066 if (a
->reg
.file
!= b
->reg
.file
||
1068 a
->reg
.id
!= b
->reg
.id
)
1072 nvc0_insn_delete(ir
);
1074 nvcg_replace_value(ctx
->pc
, ir
->def
[0], ik
->def
[0]);
1081 DESCEND_ARBITRARY(s
, nv_pass_cse
);
1087 nv_pc_pass0(struct nv_pc
*pc
, struct nv_basic_block
*root
)
1089 struct pass_reld_elim
*reldelim
;
1090 struct nv_pass pass
;
1091 struct nv_pass_dce dce
;
1097 /* Do this first, so we don't have to pay attention
1098 * to whether sources are supported memory loads.
1101 ret
= nv_pass_lower_arith(&pass
, root
);
1106 ret
= nv_pass_lower_mods(&pass
, root
);
1111 ret
= nvc0_pass_fold_loads(&pass
, root
);
1115 if (pc
->opt_reload_elim
) {
1116 reldelim
= CALLOC_STRUCT(pass_reld_elim
);
1120 ret
= nv_pass_reload_elim(reldelim
, root
);
1125 memset(reldelim
, 0, sizeof(struct pass_reld_elim
));
1130 ret
= nv_pass_cse(&pass
, root
);
1138 ret
= nv_pass_dce(&dce
, root
);
1141 } while (dce
.removed
);
1143 if (pc
->opt_reload_elim
) {
1145 ret
= nv_pass_mem_opt(reldelim
, root
);
1147 memset(reldelim
, 0, sizeof(struct pass_reld_elim
));
1151 ret
= nv_pass_mem_opt(reldelim
, root
);
1158 ret
= nv_pass_tex_mask(&pass
, root
);
1166 nvc0_pc_exec_pass0(struct nv_pc
*pc
)
1170 for (i
= 0; i
< pc
->num_subroutines
+ 1; ++i
)
1171 if (pc
->root
[i
] && (ret
= nv_pc_pass0(pc
, pc
->root
[i
])))