2 * Copyright (C) 2009 Nicolai Haehnle.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 #include "radeon_dataflow.h"
30 #include "radeon_compiler.h"
31 #include "radeon_swizzle.h"
34 static struct rc_src_register
chain_srcregs(struct rc_src_register outer
, struct rc_src_register inner
)
36 struct rc_src_register combine
;
37 combine
.File
= inner
.File
;
38 combine
.Index
= inner
.Index
;
39 combine
.RelAddr
= inner
.RelAddr
;
42 combine
.Negate
= outer
.Negate
;
44 combine
.Abs
= inner
.Abs
;
46 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
47 unsigned int swz
= GET_SWZ(outer
.Swizzle
, chan
);
49 combine
.Negate
|= GET_BIT(inner
.Negate
, swz
) << chan
;
51 combine
.Negate
^= outer
.Negate
;
53 combine
.Swizzle
= combine_swizzles(inner
.Swizzle
, outer
.Swizzle
);
57 struct peephole_state
{
58 struct radeon_compiler
* C
;
59 struct rc_instruction
* Mov
;
60 unsigned int Conflict
:1;
62 /** Whether Mov's source has been clobbered */
63 unsigned int SourceClobbered
:1;
65 /** Which components of Mov's destination register are still from that Mov? */
66 unsigned int MovMask
:4;
68 /** Which components of Mov's destination register are clearly *not* from that Mov */
69 unsigned int DefinedMask
:4;
71 /** Which components of Mov's source register are sourced */
72 unsigned int SourcedMask
:4;
74 /** Branch depth beyond Mov; negative value indicates we left the Mov's block */
79 * This is a callback function that is meant to be passed to
80 * rc_for_all_reads_mask. This function will be called once for each source
82 * @param inst The instruction that the source register belongs to.
83 * @param file The register file of the source register.
84 * @param index The index of the source register.
85 * @param mask The components of the source register that are being read from.
87 static void peephole_scan_read(void * data
, struct rc_instruction
* inst
,
88 rc_register_file file
, unsigned int index
, unsigned int mask
)
90 struct peephole_state
* s
= data
;
92 /* XXX This could probably be handled better. */
93 if (file
== RC_FILE_ADDRESS
) {
98 if (file
!= RC_FILE_TEMPORARY
|| index
!= s
->Mov
->U
.I
.DstReg
.Index
)
101 /* These instructions cannot read from the constants file.
102 * see radeonTransformTEX()
104 if(s
->Mov
->U
.I
.SrcReg
[0].File
!= RC_FILE_TEMPORARY
&&
105 s
->Mov
->U
.I
.SrcReg
[0].File
!= RC_FILE_INPUT
&&
106 (inst
->U
.I
.Opcode
== RC_OPCODE_TEX
||
107 inst
->U
.I
.Opcode
== RC_OPCODE_TXB
||
108 inst
->U
.I
.Opcode
== RC_OPCODE_TXP
||
109 inst
->U
.I
.Opcode
== RC_OPCODE_KIL
)){
113 if ((mask
& s
->MovMask
) == mask
) {
114 if (s
->SourceClobbered
) {
117 } else if ((mask
& s
->DefinedMask
) == mask
) {
118 /* read from something entirely written by other instruction: this is okay */
120 /* read from component combination that is not well-defined without
121 * the MOV: cannot remove it */
126 static void peephole_scan_write(void * data
, struct rc_instruction
* inst
,
127 rc_register_file file
, unsigned int index
, unsigned int mask
)
129 struct peephole_state
* s
= data
;
131 if (s
->BranchDepth
< 0)
134 if (file
== s
->Mov
->U
.I
.DstReg
.File
&& index
== s
->Mov
->U
.I
.DstReg
.Index
) {
136 if (s
->BranchDepth
== 0)
137 s
->DefinedMask
|= mask
;
139 s
->DefinedMask
&= ~mask
;
141 if (file
== s
->Mov
->U
.I
.SrcReg
[0].File
&& index
== s
->Mov
->U
.I
.SrcReg
[0].Index
) {
142 if (mask
& s
->SourcedMask
)
143 s
->SourceClobbered
= 1;
144 } else if (s
->Mov
->U
.I
.SrcReg
[0].RelAddr
&& file
== RC_FILE_ADDRESS
) {
145 s
->SourceClobbered
= 1;
149 static void peephole(struct radeon_compiler
* c
, struct rc_instruction
* inst_mov
)
151 struct peephole_state s
;
153 if (inst_mov
->U
.I
.DstReg
.File
!= RC_FILE_TEMPORARY
||
154 inst_mov
->U
.I
.DstReg
.RelAddr
||
155 inst_mov
->U
.I
.WriteALUResult
)
158 memset(&s
, 0, sizeof(s
));
161 s
.MovMask
= inst_mov
->U
.I
.DstReg
.WriteMask
;
162 s
.DefinedMask
= RC_MASK_XYZW
& ~s
.MovMask
;
164 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
165 unsigned int swz
= GET_SWZ(inst_mov
->U
.I
.SrcReg
[0].Swizzle
, chan
);
166 s
.SourcedMask
|= (1 << swz
) & RC_MASK_XYZW
;
169 /* 1st pass: Check whether all subsequent readers can be changed */
170 for(struct rc_instruction
* inst
= inst_mov
->Next
;
171 inst
!= &c
->Program
.Instructions
;
173 /* XXX In the future we might be able to make the optimizer
174 * smart enough to handle loops. */
175 if(inst
->U
.I
.Opcode
== RC_OPCODE_BGNLOOP
176 || inst
->U
.I
.Opcode
== RC_OPCODE_ENDLOOP
){
179 rc_for_all_reads_mask(inst
, peephole_scan_read
, &s
);
180 rc_for_all_writes_mask(inst
, peephole_scan_write
, &s
);
184 if (s
.BranchDepth
>= 0) {
185 if (inst
->U
.I
.Opcode
== RC_OPCODE_IF
) {
187 } else if (inst
->U
.I
.Opcode
== RC_OPCODE_ENDIF
188 || inst
->U
.I
.Opcode
== RC_OPCODE_ELSE
) {
190 if (s
.BranchDepth
< 0) {
191 s
.DefinedMask
&= ~s
.MovMask
;
201 /* 2nd pass: We can satisfy all readers, so switch them over all at once */
202 s
.MovMask
= inst_mov
->U
.I
.DstReg
.WriteMask
;
205 for(struct rc_instruction
* inst
= inst_mov
->Next
;
206 inst
!= &c
->Program
.Instructions
;
208 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
210 for(unsigned int src
= 0; src
< opcode
->NumSrcRegs
; ++src
) {
211 if (inst
->U
.I
.SrcReg
[src
].File
== RC_FILE_TEMPORARY
&&
212 inst
->U
.I
.SrcReg
[src
].Index
== s
.Mov
->U
.I
.DstReg
.Index
) {
213 unsigned int refmask
= 0;
215 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
216 unsigned int swz
= GET_SWZ(inst
->U
.I
.SrcReg
[src
].Swizzle
, chan
);
217 refmask
|= (1 << swz
) & RC_MASK_XYZW
;
220 if ((refmask
& s
.MovMask
) == refmask
)
221 inst
->U
.I
.SrcReg
[src
] = chain_srcregs(inst
->U
.I
.SrcReg
[src
], s
.Mov
->U
.I
.SrcReg
[0]);
225 if (opcode
->HasDstReg
) {
226 if (inst
->U
.I
.DstReg
.File
== RC_FILE_TEMPORARY
&&
227 inst
->U
.I
.DstReg
.Index
== s
.Mov
->U
.I
.DstReg
.Index
) {
228 s
.MovMask
&= ~inst
->U
.I
.DstReg
.WriteMask
;
232 if (s
.BranchDepth
>= 0) {
233 if (inst
->U
.I
.Opcode
== RC_OPCODE_IF
) {
235 } else if (inst
->U
.I
.Opcode
== RC_OPCODE_ENDIF
236 || inst
->U
.I
.Opcode
== RC_OPCODE_ELSE
) {
238 if (s
.BranchDepth
< 0)
239 break; /* no more readers after this point */
244 /* Finally, remove the original MOV instruction */
245 rc_remove_instruction(inst_mov
);
249 * Check if a source register is actually always the same
252 static int is_src_uniform_constant(struct rc_src_register src
,
253 rc_swizzle
* pswz
, unsigned int * pnegate
)
257 if (src
.File
!= RC_FILE_NONE
) {
262 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
263 unsigned int swz
= GET_SWZ(src
.Swizzle
, chan
);
268 if (swz
== RC_SWIZZLE_UNUSED
)
273 *pnegate
= GET_BIT(src
.Negate
, chan
);
276 if (swz
!= *pswz
|| *pnegate
!= GET_BIT(src
.Negate
, chan
)) {
287 static void constant_folding_mad(struct rc_instruction
* inst
)
292 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[2], &swz
, &negate
)) {
293 if (swz
== RC_SWIZZLE_ZERO
) {
294 inst
->U
.I
.Opcode
= RC_OPCODE_MUL
;
299 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[1], &swz
, &negate
)) {
300 if (swz
== RC_SWIZZLE_ONE
) {
301 inst
->U
.I
.Opcode
= RC_OPCODE_ADD
;
303 inst
->U
.I
.SrcReg
[0].Negate
^= RC_MASK_XYZW
;
304 inst
->U
.I
.SrcReg
[1] = inst
->U
.I
.SrcReg
[2];
306 } else if (swz
== RC_SWIZZLE_ZERO
) {
307 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
308 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[2];
313 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[0], &swz
, &negate
)) {
314 if (swz
== RC_SWIZZLE_ONE
) {
315 inst
->U
.I
.Opcode
= RC_OPCODE_ADD
;
317 inst
->U
.I
.SrcReg
[1].Negate
^= RC_MASK_XYZW
;
318 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[2];
320 } else if (swz
== RC_SWIZZLE_ZERO
) {
321 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
322 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[2];
328 static void constant_folding_mul(struct rc_instruction
* inst
)
333 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[0], &swz
, &negate
)) {
334 if (swz
== RC_SWIZZLE_ONE
) {
335 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
336 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[1];
338 inst
->U
.I
.SrcReg
[0].Negate
^= RC_MASK_XYZW
;
340 } else if (swz
== RC_SWIZZLE_ZERO
) {
341 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
342 inst
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_0000
;
347 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[1], &swz
, &negate
)) {
348 if (swz
== RC_SWIZZLE_ONE
) {
349 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
351 inst
->U
.I
.SrcReg
[0].Negate
^= RC_MASK_XYZW
;
353 } else if (swz
== RC_SWIZZLE_ZERO
) {
354 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
355 inst
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_0000
;
361 static void constant_folding_add(struct rc_instruction
* inst
)
366 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[0], &swz
, &negate
)) {
367 if (swz
== RC_SWIZZLE_ZERO
) {
368 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
369 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[1];
374 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[1], &swz
, &negate
)) {
375 if (swz
== RC_SWIZZLE_ZERO
) {
376 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
384 * Replace 0.0, 1.0 and 0.5 immediate constants by their
385 * respective swizzles. Simplify instructions like ADD dst, src, 0;
387 static void constant_folding(struct radeon_compiler
* c
, struct rc_instruction
* inst
)
389 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
391 /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
392 for(unsigned int src
= 0; src
< opcode
->NumSrcRegs
; ++src
) {
393 if (inst
->U
.I
.SrcReg
[src
].File
!= RC_FILE_CONSTANT
||
394 inst
->U
.I
.SrcReg
[src
].RelAddr
||
395 inst
->U
.I
.SrcReg
[src
].Index
>= c
->Program
.Constants
.Count
)
398 struct rc_constant
* constant
=
399 &c
->Program
.Constants
.Constants
[inst
->U
.I
.SrcReg
[src
].Index
];
401 if (constant
->Type
!= RC_CONSTANT_IMMEDIATE
)
404 struct rc_src_register newsrc
= inst
->U
.I
.SrcReg
[src
];
405 int have_real_reference
= 0;
406 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
407 unsigned int swz
= GET_SWZ(newsrc
.Swizzle
, chan
);
412 float imm
= constant
->u
.Immediate
[swz
];
417 if (baseimm
== 0.0) {
418 newswz
= RC_SWIZZLE_ZERO
;
419 } else if (baseimm
== 1.0) {
420 newswz
= RC_SWIZZLE_ONE
;
421 } else if (baseimm
== 0.5 && c
->has_half_swizzles
) {
422 newswz
= RC_SWIZZLE_HALF
;
424 have_real_reference
= 1;
428 SET_SWZ(newsrc
.Swizzle
, chan
, newswz
);
429 if (imm
< 0.0 && !newsrc
.Abs
)
430 newsrc
.Negate
^= 1 << chan
;
433 if (!have_real_reference
) {
434 newsrc
.File
= RC_FILE_NONE
;
438 /* don't make the swizzle worse */
439 if (!c
->SwizzleCaps
->IsNative(inst
->U
.I
.Opcode
, newsrc
) &&
440 c
->SwizzleCaps
->IsNative(inst
->U
.I
.Opcode
, inst
->U
.I
.SrcReg
[src
]))
443 inst
->U
.I
.SrcReg
[src
] = newsrc
;
446 /* Simplify instructions based on constants */
447 if (inst
->U
.I
.Opcode
== RC_OPCODE_MAD
)
448 constant_folding_mad(inst
);
450 /* note: MAD can simplify to MUL or ADD */
451 if (inst
->U
.I
.Opcode
== RC_OPCODE_MUL
)
452 constant_folding_mul(inst
);
453 else if (inst
->U
.I
.Opcode
== RC_OPCODE_ADD
)
454 constant_folding_add(inst
);
457 void rc_optimize(struct radeon_compiler
* c
, void *user
)
459 struct rc_instruction
* inst
= c
->Program
.Instructions
.Next
;
460 while(inst
!= &c
->Program
.Instructions
) {
461 struct rc_instruction
* cur
= inst
;
464 constant_folding(c
, cur
);
466 if (cur
->U
.I
.Opcode
== RC_OPCODE_MOV
) {
468 /* cur may no longer be part of the program */