2 * Copyright (C) 2009 Nicolai Haehnle.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 #include "radeon_dataflow.h"
30 #include "radeon_compiler.h"
31 #include "radeon_swizzle.h"
33 struct peephole_state
{
34 struct rc_instruction
* Inst
;
35 /** Stores a bitmask of the components that are still "alive" (i.e.
36 * they have not been written to since Inst was executed.)
38 unsigned int WriteMask
;
41 typedef void (*rc_presub_replace_fn
)(struct peephole_state
*,
42 struct rc_instruction
*,
45 static struct rc_src_register
chain_srcregs(struct rc_src_register outer
, struct rc_src_register inner
)
47 struct rc_src_register combine
;
48 combine
.File
= inner
.File
;
49 combine
.Index
= inner
.Index
;
50 combine
.RelAddr
= inner
.RelAddr
;
53 combine
.Negate
= outer
.Negate
;
55 combine
.Abs
= inner
.Abs
;
57 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
58 unsigned int swz
= GET_SWZ(outer
.Swizzle
, chan
);
60 combine
.Negate
|= GET_BIT(inner
.Negate
, swz
) << chan
;
62 combine
.Negate
^= outer
.Negate
;
64 combine
.Swizzle
= combine_swizzles(inner
.Swizzle
, outer
.Swizzle
);
68 struct copy_propagate_state
{
69 struct radeon_compiler
* C
;
70 struct rc_instruction
* Mov
;
71 unsigned int Conflict
:1;
73 /** Whether Mov's source has been clobbered */
74 unsigned int SourceClobbered
:1;
76 /** Which components of Mov's destination register are still from that Mov? */
77 unsigned int MovMask
:4;
79 /** Which components of Mov's destination register are clearly *not* from that Mov */
80 unsigned int DefinedMask
:4;
82 /** Which components of Mov's source register are sourced */
83 unsigned int SourcedMask
:4;
85 /** Branch depth beyond Mov; negative value indicates we left the Mov's block */
90 * This is a callback function that is meant to be passed to
91 * rc_for_all_reads_mask. This function will be called once for each source
93 * @param inst The instruction that the source register belongs to.
94 * @param file The register file of the source register.
95 * @param index The index of the source register.
96 * @param mask The components of the source register that are being read from.
98 static void copy_propagate_scan_read(void * data
, struct rc_instruction
* inst
,
99 rc_register_file file
, unsigned int index
, unsigned int mask
)
101 struct copy_propagate_state
* s
= data
;
103 /* XXX This could probably be handled better. */
104 if (file
== RC_FILE_ADDRESS
) {
109 if (file
!= RC_FILE_TEMPORARY
|| index
!= s
->Mov
->U
.I
.DstReg
.Index
)
112 /* These instructions cannot read from the constants file.
113 * see radeonTransformTEX()
115 if(s
->Mov
->U
.I
.SrcReg
[0].File
!= RC_FILE_TEMPORARY
&&
116 s
->Mov
->U
.I
.SrcReg
[0].File
!= RC_FILE_INPUT
&&
117 (inst
->U
.I
.Opcode
== RC_OPCODE_TEX
||
118 inst
->U
.I
.Opcode
== RC_OPCODE_TXB
||
119 inst
->U
.I
.Opcode
== RC_OPCODE_TXP
||
120 inst
->U
.I
.Opcode
== RC_OPCODE_KIL
)){
124 if ((mask
& s
->MovMask
) == mask
) {
125 if (s
->SourceClobbered
) {
128 } else if ((mask
& s
->DefinedMask
) == mask
) {
129 /* read from something entirely written by other instruction: this is okay */
131 /* read from component combination that is not well-defined without
132 * the MOV: cannot remove it */
137 static void copy_propagate_scan_write(void * data
, struct rc_instruction
* inst
,
138 rc_register_file file
, unsigned int index
, unsigned int mask
)
140 struct copy_propagate_state
* s
= data
;
142 if (s
->BranchDepth
< 0)
145 if (file
== s
->Mov
->U
.I
.DstReg
.File
&& index
== s
->Mov
->U
.I
.DstReg
.Index
) {
147 if (s
->BranchDepth
== 0)
148 s
->DefinedMask
|= mask
;
150 s
->DefinedMask
&= ~mask
;
152 if (file
== s
->Mov
->U
.I
.SrcReg
[0].File
&& index
== s
->Mov
->U
.I
.SrcReg
[0].Index
) {
153 if (mask
& s
->SourcedMask
)
154 s
->SourceClobbered
= 1;
155 } else if (s
->Mov
->U
.I
.SrcReg
[0].RelAddr
&& file
== RC_FILE_ADDRESS
) {
156 s
->SourceClobbered
= 1;
160 static void copy_propagate(struct radeon_compiler
* c
, struct rc_instruction
* inst_mov
)
162 struct copy_propagate_state s
;
164 if (inst_mov
->U
.I
.DstReg
.File
!= RC_FILE_TEMPORARY
||
165 inst_mov
->U
.I
.DstReg
.RelAddr
||
166 inst_mov
->U
.I
.WriteALUResult
||
167 inst_mov
->U
.I
.SaturateMode
)
170 memset(&s
, 0, sizeof(s
));
173 s
.MovMask
= inst_mov
->U
.I
.DstReg
.WriteMask
;
174 s
.DefinedMask
= RC_MASK_XYZW
& ~s
.MovMask
;
176 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
177 unsigned int swz
= GET_SWZ(inst_mov
->U
.I
.SrcReg
[0].Swizzle
, chan
);
178 s
.SourcedMask
|= (1 << swz
) & RC_MASK_XYZW
;
181 /* 1st pass: Check whether all subsequent readers can be changed */
182 for(struct rc_instruction
* inst
= inst_mov
->Next
;
183 inst
!= &c
->Program
.Instructions
;
185 const struct rc_opcode_info
* info
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
186 /* XXX In the future we might be able to make the optimizer
187 * smart enough to handle loops. */
188 if(inst
->U
.I
.Opcode
== RC_OPCODE_BGNLOOP
189 || inst
->U
.I
.Opcode
== RC_OPCODE_ENDLOOP
){
193 /* It is possible to do copy propigation in this situation,
194 * just not right now, see peephole_add_presub_inv() */
195 if (inst_mov
->U
.I
.PreSub
.Opcode
!= RC_PRESUB_NONE
&&
196 (info
->NumSrcRegs
> 2 || info
->HasTexture
)) {
200 rc_for_all_reads_mask(inst
, copy_propagate_scan_read
, &s
);
201 rc_for_all_writes_mask(inst
, copy_propagate_scan_write
, &s
);
205 if (s
.BranchDepth
>= 0) {
206 if (inst
->U
.I
.Opcode
== RC_OPCODE_IF
) {
208 } else if (inst
->U
.I
.Opcode
== RC_OPCODE_ENDIF
209 || inst
->U
.I
.Opcode
== RC_OPCODE_ELSE
) {
211 if (s
.BranchDepth
< 0) {
212 s
.DefinedMask
&= ~s
.MovMask
;
222 /* 2nd pass: We can satisfy all readers, so switch them over all at once */
223 s
.MovMask
= inst_mov
->U
.I
.DstReg
.WriteMask
;
226 for(struct rc_instruction
* inst
= inst_mov
->Next
;
227 inst
!= &c
->Program
.Instructions
;
229 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
230 for(unsigned int src
= 0; src
< opcode
->NumSrcRegs
; ++src
) {
231 if (inst
->U
.I
.SrcReg
[src
].File
== RC_FILE_TEMPORARY
&&
232 inst
->U
.I
.SrcReg
[src
].Index
== s
.Mov
->U
.I
.DstReg
.Index
) {
233 unsigned int refmask
= 0;
235 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
236 unsigned int swz
= GET_SWZ(inst
->U
.I
.SrcReg
[src
].Swizzle
, chan
);
237 refmask
|= (1 << swz
) & RC_MASK_XYZW
;
240 if ((refmask
& s
.MovMask
) == refmask
) {
241 inst
->U
.I
.SrcReg
[src
] = chain_srcregs(inst
->U
.I
.SrcReg
[src
], s
.Mov
->U
.I
.SrcReg
[0]);
242 if (s
.Mov
->U
.I
.SrcReg
[0].File
== RC_FILE_PRESUB
)
243 inst
->U
.I
.PreSub
= s
.Mov
->U
.I
.PreSub
;
248 if (opcode
->HasDstReg
) {
249 if (inst
->U
.I
.DstReg
.File
== RC_FILE_TEMPORARY
&&
250 inst
->U
.I
.DstReg
.Index
== s
.Mov
->U
.I
.DstReg
.Index
) {
251 s
.MovMask
&= ~inst
->U
.I
.DstReg
.WriteMask
;
255 if (s
.BranchDepth
>= 0) {
256 if (inst
->U
.I
.Opcode
== RC_OPCODE_IF
) {
258 } else if (inst
->U
.I
.Opcode
== RC_OPCODE_ENDIF
259 || inst
->U
.I
.Opcode
== RC_OPCODE_ELSE
) {
261 if (s
.BranchDepth
< 0)
262 break; /* no more readers after this point */
267 /* Finally, remove the original MOV instruction */
268 rc_remove_instruction(inst_mov
);
272 * Check if a source register is actually always the same
275 static int is_src_uniform_constant(struct rc_src_register src
,
276 rc_swizzle
* pswz
, unsigned int * pnegate
)
280 if (src
.File
!= RC_FILE_NONE
) {
285 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
286 unsigned int swz
= GET_SWZ(src
.Swizzle
, chan
);
291 if (swz
== RC_SWIZZLE_UNUSED
)
296 *pnegate
= GET_BIT(src
.Negate
, chan
);
299 if (swz
!= *pswz
|| *pnegate
!= GET_BIT(src
.Negate
, chan
)) {
309 static void constant_folding_mad(struct rc_instruction
* inst
)
314 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[2], &swz
, &negate
)) {
315 if (swz
== RC_SWIZZLE_ZERO
) {
316 inst
->U
.I
.Opcode
= RC_OPCODE_MUL
;
321 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[1], &swz
, &negate
)) {
322 if (swz
== RC_SWIZZLE_ONE
) {
323 inst
->U
.I
.Opcode
= RC_OPCODE_ADD
;
325 inst
->U
.I
.SrcReg
[0].Negate
^= RC_MASK_XYZW
;
326 inst
->U
.I
.SrcReg
[1] = inst
->U
.I
.SrcReg
[2];
328 } else if (swz
== RC_SWIZZLE_ZERO
) {
329 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
330 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[2];
335 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[0], &swz
, &negate
)) {
336 if (swz
== RC_SWIZZLE_ONE
) {
337 inst
->U
.I
.Opcode
= RC_OPCODE_ADD
;
339 inst
->U
.I
.SrcReg
[1].Negate
^= RC_MASK_XYZW
;
340 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[2];
342 } else if (swz
== RC_SWIZZLE_ZERO
) {
343 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
344 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[2];
350 static void constant_folding_mul(struct rc_instruction
* inst
)
355 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[0], &swz
, &negate
)) {
356 if (swz
== RC_SWIZZLE_ONE
) {
357 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
358 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[1];
360 inst
->U
.I
.SrcReg
[0].Negate
^= RC_MASK_XYZW
;
362 } else if (swz
== RC_SWIZZLE_ZERO
) {
363 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
364 inst
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_0000
;
369 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[1], &swz
, &negate
)) {
370 if (swz
== RC_SWIZZLE_ONE
) {
371 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
373 inst
->U
.I
.SrcReg
[0].Negate
^= RC_MASK_XYZW
;
375 } else if (swz
== RC_SWIZZLE_ZERO
) {
376 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
377 inst
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_0000
;
383 static void constant_folding_add(struct rc_instruction
* inst
)
388 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[0], &swz
, &negate
)) {
389 if (swz
== RC_SWIZZLE_ZERO
) {
390 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
391 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[1];
396 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[1], &swz
, &negate
)) {
397 if (swz
== RC_SWIZZLE_ZERO
) {
398 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
405 * Replace 0.0, 1.0 and 0.5 immediate constants by their
406 * respective swizzles. Simplify instructions like ADD dst, src, 0;
408 static void constant_folding(struct radeon_compiler
* c
, struct rc_instruction
* inst
)
410 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
413 /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
414 for(unsigned int src
= 0; src
< opcode
->NumSrcRegs
; ++src
) {
415 struct rc_constant
* constant
;
416 struct rc_src_register newsrc
;
417 int have_real_reference
;
419 if (inst
->U
.I
.SrcReg
[src
].File
!= RC_FILE_CONSTANT
||
420 inst
->U
.I
.SrcReg
[src
].RelAddr
||
421 inst
->U
.I
.SrcReg
[src
].Index
>= c
->Program
.Constants
.Count
)
425 &c
->Program
.Constants
.Constants
[inst
->U
.I
.SrcReg
[src
].Index
];
427 if (constant
->Type
!= RC_CONSTANT_IMMEDIATE
)
430 newsrc
= inst
->U
.I
.SrcReg
[src
];
431 have_real_reference
= 0;
432 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
433 unsigned int swz
= GET_SWZ(newsrc
.Swizzle
, chan
);
441 imm
= constant
->u
.Immediate
[swz
];
446 if (baseimm
== 0.0) {
447 newswz
= RC_SWIZZLE_ZERO
;
448 } else if (baseimm
== 1.0) {
449 newswz
= RC_SWIZZLE_ONE
;
450 } else if (baseimm
== 0.5 && c
->has_half_swizzles
) {
451 newswz
= RC_SWIZZLE_HALF
;
453 have_real_reference
= 1;
457 SET_SWZ(newsrc
.Swizzle
, chan
, newswz
);
458 if (imm
< 0.0 && !newsrc
.Abs
)
459 newsrc
.Negate
^= 1 << chan
;
462 if (!have_real_reference
) {
463 newsrc
.File
= RC_FILE_NONE
;
467 /* don't make the swizzle worse */
468 if (!c
->SwizzleCaps
->IsNative(inst
->U
.I
.Opcode
, newsrc
) &&
469 c
->SwizzleCaps
->IsNative(inst
->U
.I
.Opcode
, inst
->U
.I
.SrcReg
[src
]))
472 inst
->U
.I
.SrcReg
[src
] = newsrc
;
475 /* Simplify instructions based on constants */
476 if (inst
->U
.I
.Opcode
== RC_OPCODE_MAD
)
477 constant_folding_mad(inst
);
479 /* note: MAD can simplify to MUL or ADD */
480 if (inst
->U
.I
.Opcode
== RC_OPCODE_MUL
)
481 constant_folding_mul(inst
);
482 else if (inst
->U
.I
.Opcode
== RC_OPCODE_ADD
)
483 constant_folding_add(inst
);
485 /* In case this instruction has been converted, make sure all of the
486 * registers that are no longer used are empty. */
487 opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
488 for(i
= opcode
->NumSrcRegs
; i
< 3; i
++) {
489 memset(&inst
->U
.I
.SrcReg
[i
], 0, sizeof(struct rc_src_register
));
494 * If src and dst use the same register, this function returns a writemask that
495 * indicates wich components are read by src. Otherwise zero is returned.
497 static unsigned int src_reads_dst_mask(struct rc_src_register src
,
498 struct rc_dst_register dst
)
500 unsigned int mask
= 0;
502 if (dst
.File
!= src
.File
|| dst
.Index
!= src
.Index
) {
506 for(i
= 0; i
< 4; i
++) {
507 mask
|= 1 << GET_SWZ(src
.Swizzle
, i
);
509 mask
&= RC_MASK_XYZW
;
514 /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
515 * in any of its channels. Return 0 otherwise. */
516 static int src_has_const_swz(struct rc_src_register src
) {
518 for(chan
= 0; chan
< 4; chan
++) {
519 unsigned int swz
= GET_SWZ(src
.Swizzle
, chan
);
520 if (swz
== RC_SWIZZLE_ZERO
|| swz
== RC_SWIZZLE_HALF
521 || swz
== RC_SWIZZLE_ONE
) {
528 static void peephole_scan_write(void * data
, struct rc_instruction
* inst
,
529 rc_register_file file
, unsigned int index
, unsigned int mask
)
531 struct peephole_state
* s
= data
;
532 if(s
->Inst
->U
.I
.DstReg
.File
== file
533 && s
->Inst
->U
.I
.DstReg
.Index
== index
) {
534 unsigned int common_mask
= s
->WriteMask
& mask
;
535 s
->WriteMask
&= ~common_mask
;
539 static int presub_helper(
540 struct radeon_compiler
* c
,
541 struct peephole_state
* s
,
542 rc_presubtract_op presub_opcode
,
543 rc_presub_replace_fn presub_replace
)
545 struct rc_instruction
* inst
;
546 unsigned int can_remove
= 0;
547 unsigned int cant_sub
= 0;
549 for(inst
= s
->Inst
->Next
; inst
!= &c
->Program
.Instructions
;
552 unsigned char can_use_presub
= 1;
553 const struct rc_opcode_info
* info
=
554 rc_get_opcode_info(inst
->U
.I
.Opcode
);
555 /* XXX: There are some situations where instructions
556 * with more than 2 src registers can use the
557 * presubtract select, but to keep things simple we
558 * will disable presubtract on these instructions for
560 if (info
->NumSrcRegs
> 2 || info
->HasTexture
) {
564 /* We can't use more than one presubtract value in an
565 * instruction, unless the two prsubtract operations
566 * are the same and read from the same registers. */
567 if (inst
->U
.I
.PreSub
.Opcode
!= RC_PRESUB_NONE
) {
568 if (inst
->U
.I
.PreSub
.Opcode
!= presub_opcode
569 || inst
->U
.I
.PreSub
.SrcReg
[0].File
!=
570 s
->Inst
->U
.I
.SrcReg
[1].File
571 || inst
->U
.I
.PreSub
.SrcReg
[0].Index
!=
572 s
->Inst
->U
.I
.SrcReg
[1].Index
) {
577 /* Even if the instruction can't use a presubtract operation
578 * we still need to check if the instruction reads from
579 * s->Inst->U.I.DstReg, because if it does we must not
581 for(i
= 0; i
< info
->NumSrcRegs
; i
++) {
582 unsigned int mask
= src_reads_dst_mask(
583 inst
->U
.I
.SrcReg
[i
], s
->Inst
->U
.I
.DstReg
);
584 /* XXX We could be more aggressive here using
585 * presubtract. It is okay if SrcReg[i] only reads
586 * from some of the mask components. */
587 if(s
->Inst
->U
.I
.DstReg
.WriteMask
!= mask
) {
588 if (s
->Inst
->U
.I
.DstReg
.WriteMask
& mask
) {
595 if (cant_sub
|| !can_use_presub
) {
599 presub_replace(s
, inst
, i
);
604 rc_for_all_writes_mask(inst
, peephole_scan_write
, s
);
605 /* If all components of inst_add's destination register have
606 * been written to by subsequent instructions, the original
607 * value of the destination register is no longer valid and
608 * we can't keep doing substitutions. */
612 /* Make this instruction doesn't write to the presubtract source. */
613 if (inst
->U
.I
.DstReg
.WriteMask
&
614 src_reads_dst_mask(s
->Inst
->U
.I
.SrcReg
[1],
616 || src_reads_dst_mask(s
->Inst
->U
.I
.SrcReg
[0],
618 || info
->IsFlowControl
) {
625 /* This function assumes that s->Inst->U.I.SrcReg[0] and
626 * s->Inst->U.I.SrcReg[1] aren't both negative. */
627 static void presub_replace_add(struct peephole_state
*s
,
628 struct rc_instruction
* inst
,
629 unsigned int src_index
)
631 rc_presubtract_op presub_opcode
;
632 if (s
->Inst
->U
.I
.SrcReg
[1].Negate
|| s
->Inst
->U
.I
.SrcReg
[0].Negate
)
633 presub_opcode
= RC_PRESUB_SUB
;
635 presub_opcode
= RC_PRESUB_ADD
;
637 if (s
->Inst
->U
.I
.SrcReg
[1].Negate
) {
638 inst
->U
.I
.PreSub
.SrcReg
[0] = s
->Inst
->U
.I
.SrcReg
[1];
639 inst
->U
.I
.PreSub
.SrcReg
[1] = s
->Inst
->U
.I
.SrcReg
[0];
641 inst
->U
.I
.PreSub
.SrcReg
[0] = s
->Inst
->U
.I
.SrcReg
[0];
642 inst
->U
.I
.PreSub
.SrcReg
[1] = s
->Inst
->U
.I
.SrcReg
[1];
644 inst
->U
.I
.PreSub
.SrcReg
[0].Negate
= 0;
645 inst
->U
.I
.PreSub
.SrcReg
[1].Negate
= 0;
646 inst
->U
.I
.PreSub
.Opcode
= presub_opcode
;
647 inst
->U
.I
.SrcReg
[src_index
] = chain_srcregs(inst
->U
.I
.SrcReg
[src_index
],
648 inst
->U
.I
.PreSub
.SrcReg
[0]);
649 inst
->U
.I
.SrcReg
[src_index
].File
= RC_FILE_PRESUB
;
650 inst
->U
.I
.SrcReg
[src_index
].Index
= presub_opcode
;
653 static int is_presub_candidate(struct rc_instruction
* inst
)
655 const struct rc_opcode_info
* info
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
658 if (inst
->U
.I
.PreSub
.Opcode
!= RC_PRESUB_NONE
|| inst
->U
.I
.SaturateMode
)
661 for(i
= 0; i
< info
->NumSrcRegs
; i
++) {
662 if (src_reads_dst_mask(inst
->U
.I
.SrcReg
[i
], inst
->U
.I
.DstReg
))
668 static int peephole_add_presub_add(
669 struct radeon_compiler
* c
,
670 struct rc_instruction
* inst_add
)
672 struct rc_src_register
* src0
= NULL
;
673 struct rc_src_register
* src1
= NULL
;
675 struct peephole_state s
;
677 if (!is_presub_candidate(inst_add
))
680 if (inst_add
->U
.I
.SrcReg
[0].Swizzle
!= inst_add
->U
.I
.SrcReg
[1].Swizzle
)
683 /* src0 and src1 can't have absolute values only one can be negative and they must be all negative or all positive. */
684 for (i
= 0; i
< 2; i
++) {
685 if (inst_add
->U
.I
.SrcReg
[i
].Abs
)
687 if ((inst_add
->U
.I
.SrcReg
[i
].Negate
688 & inst_add
->U
.I
.DstReg
.WriteMask
) ==
689 inst_add
->U
.I
.DstReg
.WriteMask
) {
690 src0
= &inst_add
->U
.I
.SrcReg
[i
];
692 src1
= &inst_add
->U
.I
.SrcReg
[i
];
694 src0
= &inst_add
->U
.I
.SrcReg
[i
];
702 s
.WriteMask
= inst_add
->U
.I
.DstReg
.WriteMask
;
703 if (presub_helper(c
, &s
, RC_PRESUB_ADD
, presub_replace_add
)) {
704 rc_remove_instruction(inst_add
);
710 static void presub_replace_inv(struct peephole_state
* s
,
711 struct rc_instruction
* inst
,
712 unsigned int src_index
)
714 /* We must be careful not to modify s->Inst, since it
715 * is possible it will remain part of the program.
716 * XXX Maybe pass a struct instead of a pointer for s->Inst.*/
717 inst
->U
.I
.PreSub
.SrcReg
[0] = s
->Inst
->U
.I
.SrcReg
[1];
718 inst
->U
.I
.PreSub
.SrcReg
[0].Negate
= 0;
719 inst
->U
.I
.PreSub
.Opcode
= RC_PRESUB_INV
;
720 inst
->U
.I
.SrcReg
[src_index
] = chain_srcregs(inst
->U
.I
.SrcReg
[src_index
],
721 inst
->U
.I
.PreSub
.SrcReg
[0]);
723 inst
->U
.I
.SrcReg
[src_index
].File
= RC_FILE_PRESUB
;
724 inst
->U
.I
.SrcReg
[src_index
].Index
= RC_PRESUB_INV
;
728 * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
729 * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
730 * of the add instruction must have the constatnt 1 swizzle. This function
731 * does not check const registers to see if their value is 1.0, so it should
732 * be called after the constant_folding optimization.
734 * 0 if the ADD instruction is still part of the program.
735 * 1 if the ADD instruction is no longer part of the program.
737 static int peephole_add_presub_inv(
738 struct radeon_compiler
* c
,
739 struct rc_instruction
* inst_add
)
741 unsigned int i
, swz
, mask
;
742 struct peephole_state s
;
744 if (!is_presub_candidate(inst_add
))
747 mask
= inst_add
->U
.I
.DstReg
.WriteMask
;
749 /* Check if src0 is 1. */
750 /* XXX It would be nice to use is_src_uniform_constant here, but that
751 * function only works if the register's file is RC_FILE_NONE */
752 for(i
= 0; i
< 4; i
++ ) {
753 swz
= GET_SWZ(inst_add
->U
.I
.SrcReg
[0].Swizzle
, i
);
754 if(((1 << i
) & inst_add
->U
.I
.DstReg
.WriteMask
)
755 && swz
!= RC_SWIZZLE_ONE
) {
761 if ((inst_add
->U
.I
.SrcReg
[1].Negate
& inst_add
->U
.I
.DstReg
.WriteMask
) !=
762 inst_add
->U
.I
.DstReg
.WriteMask
763 || inst_add
->U
.I
.SrcReg
[1].Abs
764 || (inst_add
->U
.I
.SrcReg
[1].File
!= RC_FILE_TEMPORARY
765 && inst_add
->U
.I
.SrcReg
[1].File
!= RC_FILE_CONSTANT
)
766 || src_has_const_swz(inst_add
->U
.I
.SrcReg
[1])) {
771 /* Setup the peephole_state information. */
773 s
.WriteMask
= inst_add
->U
.I
.DstReg
.WriteMask
;
775 if (presub_helper(c
, &s
, RC_PRESUB_INV
, presub_replace_inv
)) {
776 rc_remove_instruction(inst_add
);
784 * 0 if inst is still part of the program.
785 * 1 if inst is no longer part of the program.
787 static int peephole(struct radeon_compiler
* c
, struct rc_instruction
* inst
)
789 switch(inst
->U
.I
.Opcode
){
792 if(peephole_add_presub_inv(c
, inst
))
794 if(peephole_add_presub_add(c
, inst
))
804 void rc_optimize(struct radeon_compiler
* c
, void *user
)
806 struct rc_instruction
* inst
= c
->Program
.Instructions
.Next
;
807 while(inst
!= &c
->Program
.Instructions
) {
808 struct rc_instruction
* cur
= inst
;
811 constant_folding(c
, cur
);
816 if (cur
->U
.I
.Opcode
== RC_OPCODE_MOV
) {
817 copy_propagate(c
, cur
);
818 /* cur may no longer be part of the program */