2 * Copyright (C) 2009 Nicolai Haehnle.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 #include "radeon_dataflow.h"
30 #include "radeon_compiler.h"
31 #include "radeon_swizzle.h"
34 static struct rc_src_register
chain_srcregs(struct rc_src_register outer
, struct rc_src_register inner
)
36 struct rc_src_register combine
;
37 combine
.File
= inner
.File
;
38 combine
.Index
= inner
.Index
;
39 combine
.RelAddr
= inner
.RelAddr
;
42 combine
.Negate
= outer
.Negate
;
44 combine
.Abs
= inner
.Abs
;
46 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
47 unsigned int swz
= GET_SWZ(outer
.Swizzle
, chan
);
49 combine
.Negate
|= GET_BIT(inner
.Negate
, swz
) << chan
;
51 combine
.Negate
^= outer
.Negate
;
53 combine
.Swizzle
= combine_swizzles(inner
.Swizzle
, outer
.Swizzle
);
57 struct peephole_state
{
58 struct radeon_compiler
* C
;
59 struct rc_instruction
* Mov
;
60 unsigned int Conflict
:1;
62 /** Whether Mov's source has been clobbered */
63 unsigned int SourceClobbered
:1;
65 /** Which components of Mov's destination register are still from that Mov? */
66 unsigned int MovMask
:4;
68 /** Which components of Mov's destination register are clearly *not* from that Mov */
69 unsigned int DefinedMask
:4;
71 /** Which components of Mov's source register are sourced */
72 unsigned int SourcedMask
:4;
74 /** Branch depth beyond Mov; negative value indicates we left the Mov's block */
79 * This is a callback function that is meant to be passed to
80 * rc_for_all_reads_mask. This function will be called once for each source
82 * @param inst The instruction that the source register belongs to.
83 * @param file The register file of the source register.
84 * @param index The index of the source register.
85 * @param mask The components of the source register that are being read from.
87 static void peephole_scan_read(void * data
, struct rc_instruction
* inst
,
88 rc_register_file file
, unsigned int index
, unsigned int mask
)
90 struct peephole_state
* s
= data
;
92 if (file
!= RC_FILE_TEMPORARY
|| index
!= s
->Mov
->U
.I
.DstReg
.Index
)
95 /* These instructions cannot read from the constants file.
96 * see radeonTransformTEX()
98 if(s
->Mov
->U
.I
.SrcReg
[0].File
!= RC_FILE_TEMPORARY
&&
99 s
->Mov
->U
.I
.SrcReg
[0].File
!= RC_FILE_INPUT
&&
100 (inst
->U
.I
.Opcode
== RC_OPCODE_TEX
||
101 inst
->U
.I
.Opcode
== RC_OPCODE_TXB
||
102 inst
->U
.I
.Opcode
== RC_OPCODE_TXP
||
103 inst
->U
.I
.Opcode
== RC_OPCODE_KIL
)){
107 if ((mask
& s
->MovMask
) == mask
) {
108 if (s
->SourceClobbered
) {
111 } else if ((mask
& s
->DefinedMask
) == mask
) {
112 /* read from something entirely written by other instruction: this is okay */
114 /* read from component combination that is not well-defined without
115 * the MOV: cannot remove it */
120 static void peephole_scan_write(void * data
, struct rc_instruction
* inst
,
121 rc_register_file file
, unsigned int index
, unsigned int mask
)
123 struct peephole_state
* s
= data
;
125 if (s
->BranchDepth
< 0)
128 if (file
== s
->Mov
->U
.I
.DstReg
.File
&& index
== s
->Mov
->U
.I
.DstReg
.Index
) {
130 if (s
->BranchDepth
== 0)
131 s
->DefinedMask
|= mask
;
133 s
->DefinedMask
&= ~mask
;
135 if (file
== s
->Mov
->U
.I
.SrcReg
[0].File
&& index
== s
->Mov
->U
.I
.SrcReg
[0].Index
) {
136 if (mask
& s
->SourcedMask
)
137 s
->SourceClobbered
= 1;
138 } else if (s
->Mov
->U
.I
.SrcReg
[0].RelAddr
&& file
== RC_FILE_ADDRESS
) {
139 s
->SourceClobbered
= 1;
143 static void peephole(struct radeon_compiler
* c
, struct rc_instruction
* inst_mov
)
145 struct peephole_state s
;
147 if (inst_mov
->U
.I
.DstReg
.File
!= RC_FILE_TEMPORARY
|| inst_mov
->U
.I
.WriteALUResult
)
150 memset(&s
, 0, sizeof(s
));
153 s
.MovMask
= inst_mov
->U
.I
.DstReg
.WriteMask
;
154 s
.DefinedMask
= RC_MASK_XYZW
& ~s
.MovMask
;
156 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
157 unsigned int swz
= GET_SWZ(inst_mov
->U
.I
.SrcReg
[0].Swizzle
, chan
);
158 s
.SourcedMask
|= (1 << swz
) & RC_MASK_XYZW
;
161 /* 1st pass: Check whether all subsequent readers can be changed */
162 for(struct rc_instruction
* inst
= inst_mov
->Next
;
163 inst
!= &c
->Program
.Instructions
;
165 /* XXX In the future we might be able to make the optimizer
166 * smart enough to handle loops. */
167 if(inst
->U
.I
.Opcode
== RC_OPCODE_BGNLOOP
){
170 rc_for_all_reads_mask(inst
, peephole_scan_read
, &s
);
171 rc_for_all_writes_mask(inst
, peephole_scan_write
, &s
);
175 if (s
.BranchDepth
>= 0) {
176 if (inst
->U
.I
.Opcode
== RC_OPCODE_IF
) {
178 } else if (inst
->U
.I
.Opcode
== RC_OPCODE_ENDIF
179 || inst
->U
.I
.Opcode
== RC_OPCODE_ELSE
) {
181 if (s
.BranchDepth
< 0) {
182 s
.DefinedMask
&= ~s
.MovMask
;
192 /* 2nd pass: We can satisfy all readers, so switch them over all at once */
193 s
.MovMask
= inst_mov
->U
.I
.DstReg
.WriteMask
;
196 for(struct rc_instruction
* inst
= inst_mov
->Next
;
197 inst
!= &c
->Program
.Instructions
;
199 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
201 for(unsigned int src
= 0; src
< opcode
->NumSrcRegs
; ++src
) {
202 if (inst
->U
.I
.SrcReg
[src
].File
== RC_FILE_TEMPORARY
&&
203 inst
->U
.I
.SrcReg
[src
].Index
== s
.Mov
->U
.I
.DstReg
.Index
) {
204 unsigned int refmask
= 0;
206 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
207 unsigned int swz
= GET_SWZ(inst
->U
.I
.SrcReg
[src
].Swizzle
, chan
);
208 refmask
|= (1 << swz
) & RC_MASK_XYZW
;
211 if ((refmask
& s
.MovMask
) == refmask
)
212 inst
->U
.I
.SrcReg
[src
] = chain_srcregs(inst
->U
.I
.SrcReg
[src
], s
.Mov
->U
.I
.SrcReg
[0]);
216 if (opcode
->HasDstReg
) {
217 if (inst
->U
.I
.DstReg
.File
== RC_FILE_TEMPORARY
&&
218 inst
->U
.I
.DstReg
.Index
== s
.Mov
->U
.I
.DstReg
.Index
) {
219 s
.MovMask
&= ~inst
->U
.I
.DstReg
.WriteMask
;
223 if (s
.BranchDepth
>= 0) {
224 if (inst
->U
.I
.Opcode
== RC_OPCODE_IF
) {
226 } else if (inst
->U
.I
.Opcode
== RC_OPCODE_ENDIF
227 || inst
->U
.I
.Opcode
== RC_OPCODE_ELSE
) {
229 if (s
.BranchDepth
< 0)
230 break; /* no more readers after this point */
235 /* Finally, remove the original MOV instruction */
236 rc_remove_instruction(inst_mov
);
240 * Check if a source register is actually always the same
243 static int is_src_uniform_constant(struct rc_src_register src
,
244 rc_swizzle
* pswz
, unsigned int * pnegate
)
248 if (src
.File
!= RC_FILE_NONE
) {
253 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
254 unsigned int swz
= GET_SWZ(src
.Swizzle
, chan
);
259 if (swz
== RC_SWIZZLE_UNUSED
)
264 *pnegate
= GET_BIT(src
.Negate
, chan
);
267 if (swz
!= *pswz
|| *pnegate
!= GET_BIT(src
.Negate
, chan
)) {
278 static void constant_folding_mad(struct rc_instruction
* inst
)
283 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[2], &swz
, &negate
)) {
284 if (swz
== RC_SWIZZLE_ZERO
) {
285 inst
->U
.I
.Opcode
= RC_OPCODE_MUL
;
290 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[1], &swz
, &negate
)) {
291 if (swz
== RC_SWIZZLE_ONE
) {
292 inst
->U
.I
.Opcode
= RC_OPCODE_ADD
;
294 inst
->U
.I
.SrcReg
[0].Negate
^= RC_MASK_XYZW
;
295 inst
->U
.I
.SrcReg
[1] = inst
->U
.I
.SrcReg
[2];
297 } else if (swz
== RC_SWIZZLE_ZERO
) {
298 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
299 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[2];
304 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[0], &swz
, &negate
)) {
305 if (swz
== RC_SWIZZLE_ONE
) {
306 inst
->U
.I
.Opcode
= RC_OPCODE_ADD
;
308 inst
->U
.I
.SrcReg
[1].Negate
^= RC_MASK_XYZW
;
309 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[2];
311 } else if (swz
== RC_SWIZZLE_ZERO
) {
312 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
313 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[2];
319 static void constant_folding_mul(struct rc_instruction
* inst
)
324 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[0], &swz
, &negate
)) {
325 if (swz
== RC_SWIZZLE_ONE
) {
326 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
327 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[1];
329 inst
->U
.I
.SrcReg
[0].Negate
^= RC_MASK_XYZW
;
331 } else if (swz
== RC_SWIZZLE_ZERO
) {
332 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
333 inst
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_0000
;
338 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[1], &swz
, &negate
)) {
339 if (swz
== RC_SWIZZLE_ONE
) {
340 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
342 inst
->U
.I
.SrcReg
[0].Negate
^= RC_MASK_XYZW
;
344 } else if (swz
== RC_SWIZZLE_ZERO
) {
345 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
346 inst
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_0000
;
352 static void constant_folding_add(struct rc_instruction
* inst
)
357 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[0], &swz
, &negate
)) {
358 if (swz
== RC_SWIZZLE_ZERO
) {
359 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
360 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[1];
365 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[1], &swz
, &negate
)) {
366 if (swz
== RC_SWIZZLE_ZERO
) {
367 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
375 * Replace 0.0, 1.0 and 0.5 immediate constants by their
376 * respective swizzles. Simplify instructions like ADD dst, src, 0;
378 static void constant_folding(struct radeon_compiler
* c
, struct rc_instruction
* inst
)
380 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
382 /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
383 for(unsigned int src
= 0; src
< opcode
->NumSrcRegs
; ++src
) {
384 if (inst
->U
.I
.SrcReg
[src
].File
!= RC_FILE_CONSTANT
||
385 inst
->U
.I
.SrcReg
[src
].RelAddr
||
386 inst
->U
.I
.SrcReg
[src
].Index
>= c
->Program
.Constants
.Count
)
389 struct rc_constant
* constant
=
390 &c
->Program
.Constants
.Constants
[inst
->U
.I
.SrcReg
[src
].Index
];
392 if (constant
->Type
!= RC_CONSTANT_IMMEDIATE
)
395 struct rc_src_register newsrc
= inst
->U
.I
.SrcReg
[src
];
396 int have_real_reference
= 0;
397 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
398 unsigned int swz
= GET_SWZ(newsrc
.Swizzle
, chan
);
403 float imm
= constant
->u
.Immediate
[swz
];
408 if (baseimm
== 0.0) {
409 newswz
= RC_SWIZZLE_ZERO
;
410 } else if (baseimm
== 1.0) {
411 newswz
= RC_SWIZZLE_ONE
;
412 } else if (baseimm
== 0.5) {
413 newswz
= RC_SWIZZLE_HALF
;
415 have_real_reference
= 1;
419 SET_SWZ(newsrc
.Swizzle
, chan
, newswz
);
420 if (imm
< 0.0 && !newsrc
.Abs
)
421 newsrc
.Negate
^= 1 << chan
;
424 if (!have_real_reference
) {
425 newsrc
.File
= RC_FILE_NONE
;
429 /* don't make the swizzle worse */
430 if (!c
->SwizzleCaps
->IsNative(inst
->U
.I
.Opcode
, newsrc
) &&
431 c
->SwizzleCaps
->IsNative(inst
->U
.I
.Opcode
, inst
->U
.I
.SrcReg
[src
]))
434 inst
->U
.I
.SrcReg
[src
] = newsrc
;
437 /* Simplify instructions based on constants */
438 if (inst
->U
.I
.Opcode
== RC_OPCODE_MAD
)
439 constant_folding_mad(inst
);
441 /* note: MAD can simplify to MUL or ADD */
442 if (inst
->U
.I
.Opcode
== RC_OPCODE_MUL
)
443 constant_folding_mul(inst
);
444 else if (inst
->U
.I
.Opcode
== RC_OPCODE_ADD
)
445 constant_folding_add(inst
);
448 void rc_optimize(struct radeon_compiler
* c
)
450 struct rc_instruction
* inst
= c
->Program
.Instructions
.Next
;
451 while(inst
!= &c
->Program
.Instructions
) {
452 struct rc_instruction
* cur
= inst
;
455 constant_folding(c
, cur
);
457 if (cur
->U
.I
.Opcode
== RC_OPCODE_MOV
) {
459 /* cur may no longer be part of the program */