2 * Copyright (C) 2009 Nicolai Haehnle.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 #include "radeon_dataflow.h"
30 #include "radeon_compiler.h"
33 static struct rc_src_register
chain_srcregs(struct rc_src_register outer
, struct rc_src_register inner
)
35 struct rc_src_register combine
;
36 combine
.File
= inner
.File
;
37 combine
.Index
= inner
.Index
;
38 combine
.RelAddr
= inner
.RelAddr
;
41 combine
.Negate
= outer
.Negate
;
43 combine
.Abs
= inner
.Abs
;
45 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
46 unsigned int swz
= GET_SWZ(outer
.Swizzle
, chan
);
48 combine
.Negate
|= GET_BIT(inner
.Negate
, swz
) << chan
;
50 combine
.Negate
^= outer
.Negate
;
52 combine
.Swizzle
= combine_swizzles(inner
.Swizzle
, outer
.Swizzle
);
56 struct peephole_state
{
57 struct radeon_compiler
* C
;
58 struct rc_instruction
* Mov
;
59 unsigned int Conflict
:1;
61 /** Whether Mov's source has been clobbered */
62 unsigned int SourceClobbered
:1;
64 /** Which components of Mov's destination register are still from that Mov? */
65 unsigned int MovMask
:4;
67 /** Which components of Mov's destination register are clearly *not* from that Mov */
68 unsigned int DefinedMask
:4;
70 /** Which components of Mov's source register are sourced */
71 unsigned int SourcedMask
:4;
73 /** Branch depth beyond Mov; negative value indicates we left the Mov's block */
77 static void peephole_scan_read(void * data
, struct rc_instruction
* inst
,
78 rc_register_file file
, unsigned int index
, unsigned int mask
)
80 struct peephole_state
* s
= data
;
82 if (file
!= RC_FILE_TEMPORARY
|| index
!= s
->Mov
->U
.I
.DstReg
.Index
)
85 if ((mask
& s
->MovMask
) == mask
) {
86 if (s
->SourceClobbered
) {
89 } else if ((mask
& s
->DefinedMask
) == mask
) {
90 /* read from something entirely written by other instruction: this is okay */
92 /* read from component combination that is not well-defined without
93 * the MOV: cannot remove it */
98 static void peephole_scan_write(void * data
, struct rc_instruction
* inst
,
99 rc_register_file file
, unsigned int index
, unsigned int mask
)
101 struct peephole_state
* s
= data
;
103 if (s
->BranchDepth
< 0)
106 if (file
== s
->Mov
->U
.I
.DstReg
.File
&& index
== s
->Mov
->U
.I
.DstReg
.Index
) {
108 if (s
->BranchDepth
== 0)
109 s
->DefinedMask
|= mask
;
111 s
->DefinedMask
&= ~mask
;
112 } else if (file
== s
->Mov
->U
.I
.SrcReg
[0].File
&& index
== s
->Mov
->U
.I
.SrcReg
[0].Index
) {
113 if (mask
& s
->SourcedMask
)
114 s
->SourceClobbered
= 1;
115 } else if (s
->Mov
->U
.I
.SrcReg
[0].RelAddr
&& file
== RC_FILE_ADDRESS
) {
116 s
->SourceClobbered
= 1;
120 static void peephole(struct radeon_compiler
* c
, struct rc_instruction
* inst_mov
)
122 struct peephole_state s
;
124 if (inst_mov
->U
.I
.DstReg
.File
!= RC_FILE_TEMPORARY
|| inst_mov
->U
.I
.WriteALUResult
)
127 memset(&s
, 0, sizeof(s
));
130 s
.MovMask
= inst_mov
->U
.I
.DstReg
.WriteMask
;
131 s
.DefinedMask
= RC_MASK_XYZW
& ~s
.MovMask
;
133 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
134 unsigned int swz
= GET_SWZ(inst_mov
->U
.I
.SrcReg
[0].Swizzle
, chan
);
135 s
.SourcedMask
|= (1 << swz
) & RC_MASK_XYZW
;
138 /* 1st pass: Check whether all subsequent readers can be changed */
139 for(struct rc_instruction
* inst
= inst_mov
->Next
;
140 inst
!= &c
->Program
.Instructions
;
142 rc_for_all_reads_mask(inst
, peephole_scan_read
, &s
);
143 rc_for_all_writes_mask(inst
, peephole_scan_write
, &s
);
147 if (s
.BranchDepth
>= 0) {
148 if (inst
->U
.I
.Opcode
== RC_OPCODE_IF
) {
150 } else if (inst
->U
.I
.Opcode
== RC_OPCODE_ENDIF
) {
152 if (s
.BranchDepth
< 0) {
153 s
.DefinedMask
&= ~s
.MovMask
;
163 /* 2nd pass: We can satisfy all readers, so switch them over all at once */
164 s
.MovMask
= inst_mov
->U
.I
.DstReg
.WriteMask
;
167 for(struct rc_instruction
* inst
= inst_mov
->Next
;
168 inst
!= &c
->Program
.Instructions
;
170 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
172 for(unsigned int src
= 0; src
< opcode
->NumSrcRegs
; ++src
) {
173 if (inst
->U
.I
.SrcReg
[src
].File
== RC_FILE_TEMPORARY
&&
174 inst
->U
.I
.SrcReg
[src
].Index
== s
.Mov
->U
.I
.DstReg
.Index
) {
175 unsigned int refmask
= 0;
177 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
178 unsigned int swz
= GET_SWZ(inst
->U
.I
.SrcReg
[src
].Swizzle
, chan
);
179 refmask
|= (1 << swz
) & RC_MASK_XYZW
;
182 if ((refmask
& s
.MovMask
) == refmask
)
183 inst
->U
.I
.SrcReg
[src
] = chain_srcregs(inst
->U
.I
.SrcReg
[src
], s
.Mov
->U
.I
.SrcReg
[0]);
187 if (opcode
->HasDstReg
) {
188 if (inst
->U
.I
.DstReg
.File
== RC_FILE_TEMPORARY
&&
189 inst
->U
.I
.DstReg
.Index
== s
.Mov
->U
.I
.DstReg
.Index
) {
190 s
.MovMask
&= ~inst
->U
.I
.DstReg
.WriteMask
;
194 if (s
.BranchDepth
>= 0) {
195 if (inst
->U
.I
.Opcode
== RC_OPCODE_IF
) {
197 } else if (inst
->U
.I
.Opcode
== RC_OPCODE_ENDIF
) {
199 if (s
.BranchDepth
< 0)
200 break; /* no more readers after this point */
205 /* Finally, remove the original MOV instruction */
206 rc_remove_instruction(inst_mov
);
209 void rc_optimize(struct radeon_compiler
* c
)
211 struct rc_instruction
* inst
= c
->Program
.Instructions
.Next
;
212 while(inst
!= &c
->Program
.Instructions
) {
213 struct rc_instruction
* cur
= inst
;
216 if (cur
->U
.I
.Opcode
== RC_OPCODE_MOV
)