2 * Copyright (C) 2009 Nicolai Haehnle.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 #include "radeon_dataflow.h"
30 #include "radeon_compiler.h"
31 #include "radeon_swizzle.h"
34 static struct rc_src_register
chain_srcregs(struct rc_src_register outer
, struct rc_src_register inner
)
36 struct rc_src_register combine
;
37 combine
.File
= inner
.File
;
38 combine
.Index
= inner
.Index
;
39 combine
.RelAddr
= inner
.RelAddr
;
42 combine
.Negate
= outer
.Negate
;
44 combine
.Abs
= inner
.Abs
;
46 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
47 unsigned int swz
= GET_SWZ(outer
.Swizzle
, chan
);
49 combine
.Negate
|= GET_BIT(inner
.Negate
, swz
) << chan
;
51 combine
.Negate
^= outer
.Negate
;
53 combine
.Swizzle
= combine_swizzles(inner
.Swizzle
, outer
.Swizzle
);
57 struct peephole_state
{
58 struct radeon_compiler
* C
;
59 struct rc_instruction
* Mov
;
60 unsigned int Conflict
:1;
62 /** Whether Mov's source has been clobbered */
63 unsigned int SourceClobbered
:1;
65 /** Which components of Mov's destination register are still from that Mov? */
66 unsigned int MovMask
:4;
68 /** Which components of Mov's destination register are clearly *not* from that Mov */
69 unsigned int DefinedMask
:4;
71 /** Which components of Mov's source register are sourced */
72 unsigned int SourcedMask
:4;
74 /** Branch depth beyond Mov; negative value indicates we left the Mov's block */
78 static void peephole_scan_read(void * data
, struct rc_instruction
* inst
,
79 rc_register_file file
, unsigned int index
, unsigned int mask
)
81 struct peephole_state
* s
= data
;
83 if (file
!= RC_FILE_TEMPORARY
|| index
!= s
->Mov
->U
.I
.DstReg
.Index
)
86 /* These instructions cannot read from the constants file.
87 * see radeonTransformTEX()
89 if(s
->Mov
->U
.I
.SrcReg
[0].File
!= RC_FILE_TEMPORARY
&&
90 s
->Mov
->U
.I
.SrcReg
[0].File
!= RC_FILE_INPUT
&&
91 (inst
->U
.I
.Opcode
== RC_OPCODE_TEX
||
92 inst
->U
.I
.Opcode
== RC_OPCODE_TXB
||
93 inst
->U
.I
.Opcode
== RC_OPCODE_TXP
||
94 inst
->U
.I
.Opcode
== RC_OPCODE_KIL
)){
98 if ((mask
& s
->MovMask
) == mask
) {
99 if (s
->SourceClobbered
) {
102 } else if ((mask
& s
->DefinedMask
) == mask
) {
103 /* read from something entirely written by other instruction: this is okay */
105 /* read from component combination that is not well-defined without
106 * the MOV: cannot remove it */
111 static void peephole_scan_write(void * data
, struct rc_instruction
* inst
,
112 rc_register_file file
, unsigned int index
, unsigned int mask
)
114 struct peephole_state
* s
= data
;
116 if (s
->BranchDepth
< 0)
119 if (file
== s
->Mov
->U
.I
.DstReg
.File
&& index
== s
->Mov
->U
.I
.DstReg
.Index
) {
121 if (s
->BranchDepth
== 0)
122 s
->DefinedMask
|= mask
;
124 s
->DefinedMask
&= ~mask
;
126 if (file
== s
->Mov
->U
.I
.SrcReg
[0].File
&& index
== s
->Mov
->U
.I
.SrcReg
[0].Index
) {
127 if (mask
& s
->SourcedMask
)
128 s
->SourceClobbered
= 1;
129 } else if (s
->Mov
->U
.I
.SrcReg
[0].RelAddr
&& file
== RC_FILE_ADDRESS
) {
130 s
->SourceClobbered
= 1;
134 static void peephole(struct radeon_compiler
* c
, struct rc_instruction
* inst_mov
)
136 struct peephole_state s
;
138 if (inst_mov
->U
.I
.DstReg
.File
!= RC_FILE_TEMPORARY
|| inst_mov
->U
.I
.WriteALUResult
)
141 memset(&s
, 0, sizeof(s
));
144 s
.MovMask
= inst_mov
->U
.I
.DstReg
.WriteMask
;
145 s
.DefinedMask
= RC_MASK_XYZW
& ~s
.MovMask
;
147 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
148 unsigned int swz
= GET_SWZ(inst_mov
->U
.I
.SrcReg
[0].Swizzle
, chan
);
149 s
.SourcedMask
|= (1 << swz
) & RC_MASK_XYZW
;
152 /* 1st pass: Check whether all subsequent readers can be changed */
153 for(struct rc_instruction
* inst
= inst_mov
->Next
;
154 inst
!= &c
->Program
.Instructions
;
156 rc_for_all_reads_mask(inst
, peephole_scan_read
, &s
);
157 rc_for_all_writes_mask(inst
, peephole_scan_write
, &s
);
161 if (s
.BranchDepth
>= 0) {
162 if (inst
->U
.I
.Opcode
== RC_OPCODE_IF
) {
164 } else if (inst
->U
.I
.Opcode
== RC_OPCODE_ENDIF
) {
166 if (s
.BranchDepth
< 0) {
167 s
.DefinedMask
&= ~s
.MovMask
;
177 /* 2nd pass: We can satisfy all readers, so switch them over all at once */
178 s
.MovMask
= inst_mov
->U
.I
.DstReg
.WriteMask
;
181 for(struct rc_instruction
* inst
= inst_mov
->Next
;
182 inst
!= &c
->Program
.Instructions
;
184 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
186 for(unsigned int src
= 0; src
< opcode
->NumSrcRegs
; ++src
) {
187 if (inst
->U
.I
.SrcReg
[src
].File
== RC_FILE_TEMPORARY
&&
188 inst
->U
.I
.SrcReg
[src
].Index
== s
.Mov
->U
.I
.DstReg
.Index
) {
189 unsigned int refmask
= 0;
191 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
192 unsigned int swz
= GET_SWZ(inst
->U
.I
.SrcReg
[src
].Swizzle
, chan
);
193 refmask
|= (1 << swz
) & RC_MASK_XYZW
;
196 if ((refmask
& s
.MovMask
) == refmask
)
197 inst
->U
.I
.SrcReg
[src
] = chain_srcregs(inst
->U
.I
.SrcReg
[src
], s
.Mov
->U
.I
.SrcReg
[0]);
201 if (opcode
->HasDstReg
) {
202 if (inst
->U
.I
.DstReg
.File
== RC_FILE_TEMPORARY
&&
203 inst
->U
.I
.DstReg
.Index
== s
.Mov
->U
.I
.DstReg
.Index
) {
204 s
.MovMask
&= ~inst
->U
.I
.DstReg
.WriteMask
;
208 if (s
.BranchDepth
>= 0) {
209 if (inst
->U
.I
.Opcode
== RC_OPCODE_IF
) {
211 } else if (inst
->U
.I
.Opcode
== RC_OPCODE_ENDIF
) {
213 if (s
.BranchDepth
< 0)
214 break; /* no more readers after this point */
219 /* Finally, remove the original MOV instruction */
220 rc_remove_instruction(inst_mov
);
224 * Check if a source register is actually always the same
227 static int is_src_uniform_constant(struct rc_src_register src
,
228 rc_swizzle
* pswz
, unsigned int * pnegate
)
232 if (src
.File
!= RC_FILE_NONE
) {
237 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
238 unsigned int swz
= GET_SWZ(src
.Swizzle
, chan
);
243 if (swz
== RC_SWIZZLE_UNUSED
)
248 *pnegate
= GET_BIT(src
.Negate
, chan
);
251 if (swz
!= *pswz
|| *pnegate
!= GET_BIT(src
.Negate
, chan
)) {
262 static void constant_folding_mad(struct rc_instruction
* inst
)
267 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[2], &swz
, &negate
)) {
268 if (swz
== RC_SWIZZLE_ZERO
) {
269 inst
->U
.I
.Opcode
= RC_OPCODE_MUL
;
274 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[1], &swz
, &negate
)) {
275 if (swz
== RC_SWIZZLE_ONE
) {
276 inst
->U
.I
.Opcode
= RC_OPCODE_ADD
;
278 inst
->U
.I
.SrcReg
[0].Negate
^= RC_MASK_XYZW
;
279 inst
->U
.I
.SrcReg
[1] = inst
->U
.I
.SrcReg
[2];
281 } else if (swz
== RC_SWIZZLE_ZERO
) {
282 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
283 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[2];
288 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[0], &swz
, &negate
)) {
289 if (swz
== RC_SWIZZLE_ONE
) {
290 inst
->U
.I
.Opcode
= RC_OPCODE_ADD
;
292 inst
->U
.I
.SrcReg
[1].Negate
^= RC_MASK_XYZW
;
293 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[2];
295 } else if (swz
== RC_SWIZZLE_ZERO
) {
296 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
297 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[2];
303 static void constant_folding_mul(struct rc_instruction
* inst
)
308 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[0], &swz
, &negate
)) {
309 if (swz
== RC_SWIZZLE_ONE
) {
310 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
311 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[1];
313 inst
->U
.I
.SrcReg
[0].Negate
^= RC_MASK_XYZW
;
315 } else if (swz
== RC_SWIZZLE_ZERO
) {
316 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
317 inst
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_0000
;
322 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[1], &swz
, &negate
)) {
323 if (swz
== RC_SWIZZLE_ONE
) {
324 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
326 inst
->U
.I
.SrcReg
[0].Negate
^= RC_MASK_XYZW
;
328 } else if (swz
== RC_SWIZZLE_ZERO
) {
329 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
330 inst
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_0000
;
336 static void constant_folding_add(struct rc_instruction
* inst
)
341 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[0], &swz
, &negate
)) {
342 if (swz
== RC_SWIZZLE_ZERO
) {
343 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
344 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[1];
349 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[1], &swz
, &negate
)) {
350 if (swz
== RC_SWIZZLE_ZERO
) {
351 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
359 * Replace 0.0, 1.0 and 0.5 immediate constants by their
360 * respective swizzles. Simplify instructions like ADD dst, src, 0;
362 static void constant_folding(struct radeon_compiler
* c
, struct rc_instruction
* inst
)
364 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
366 /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
367 for(unsigned int src
= 0; src
< opcode
->NumSrcRegs
; ++src
) {
368 if (inst
->U
.I
.SrcReg
[src
].File
!= RC_FILE_CONSTANT
||
369 inst
->U
.I
.SrcReg
[src
].RelAddr
||
370 inst
->U
.I
.SrcReg
[src
].Index
>= c
->Program
.Constants
.Count
)
373 struct rc_constant
* constant
=
374 &c
->Program
.Constants
.Constants
[inst
->U
.I
.SrcReg
[src
].Index
];
376 if (constant
->Type
!= RC_CONSTANT_IMMEDIATE
)
379 struct rc_src_register newsrc
= inst
->U
.I
.SrcReg
[src
];
380 int have_real_reference
= 0;
381 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
382 unsigned int swz
= GET_SWZ(newsrc
.Swizzle
, chan
);
387 float imm
= constant
->u
.Immediate
[swz
];
392 if (baseimm
== 0.0) {
393 newswz
= RC_SWIZZLE_ZERO
;
394 } else if (baseimm
== 1.0) {
395 newswz
= RC_SWIZZLE_ONE
;
396 } else if (baseimm
== 0.5) {
397 newswz
= RC_SWIZZLE_HALF
;
399 have_real_reference
= 1;
403 SET_SWZ(newsrc
.Swizzle
, chan
, newswz
);
404 if (imm
< 0.0 && !newsrc
.Abs
)
405 newsrc
.Negate
^= 1 << chan
;
408 if (!have_real_reference
) {
409 newsrc
.File
= RC_FILE_NONE
;
413 /* don't make the swizzle worse */
414 if (!c
->SwizzleCaps
->IsNative(inst
->U
.I
.Opcode
, newsrc
) &&
415 c
->SwizzleCaps
->IsNative(inst
->U
.I
.Opcode
, inst
->U
.I
.SrcReg
[src
]))
418 inst
->U
.I
.SrcReg
[src
] = newsrc
;
421 /* Simplify instructions based on constants */
422 if (inst
->U
.I
.Opcode
== RC_OPCODE_MAD
)
423 constant_folding_mad(inst
);
425 /* note: MAD can simplify to MUL or ADD */
426 if (inst
->U
.I
.Opcode
== RC_OPCODE_MUL
)
427 constant_folding_mul(inst
);
428 else if (inst
->U
.I
.Opcode
== RC_OPCODE_ADD
)
429 constant_folding_add(inst
);
432 void rc_optimize(struct radeon_compiler
* c
)
434 struct rc_instruction
* inst
= c
->Program
.Instructions
.Next
;
435 while(inst
!= &c
->Program
.Instructions
) {
436 struct rc_instruction
* cur
= inst
;
439 constant_folding(c
, cur
);
441 if (cur
->U
.I
.Opcode
== RC_OPCODE_MOV
) {
443 /* cur may no longer be part of the program */