2 * Copyright (C) 2009 Nicolai Haehnle.
3 * Copyright 2012 Advanced Micro Devices, Inc.
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 * Tom Stellard <thomas.stellard@amd.com>
32 #include "radeon_dataflow.h"
34 #include "radeon_code.h"
35 #include "radeon_compiler.h"
36 #include "radeon_compiler_util.h"
37 #include "radeon_swizzle.h"
40 static void rewrite_source(struct radeon_compiler
* c
,
41 struct rc_instruction
* inst
, unsigned src
)
43 struct rc_swizzle_split split
;
44 unsigned int tempreg
= rc_find_free_temporary(c
);
48 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
49 if (GET_SWZ(inst
->U
.I
.SrcReg
[src
].Swizzle
, chan
) != RC_SWIZZLE_UNUSED
)
53 c
->SwizzleCaps
->Split(inst
->U
.I
.SrcReg
[src
], usemask
, &split
);
55 for(unsigned int phase
= 0; phase
< split
.NumPhases
; ++phase
) {
56 struct rc_instruction
* mov
= rc_insert_new_instruction(c
, inst
->Prev
);
57 unsigned int phase_refmask
;
58 unsigned int masked_negate
;
60 mov
->U
.I
.Opcode
= RC_OPCODE_MOV
;
61 mov
->U
.I
.DstReg
.File
= RC_FILE_TEMPORARY
;
62 mov
->U
.I
.DstReg
.Index
= tempreg
;
63 mov
->U
.I
.DstReg
.WriteMask
= split
.Phase
[phase
];
64 mov
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[src
];
65 mov
->U
.I
.PreSub
= inst
->U
.I
.PreSub
;
68 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
69 if (!GET_BIT(split
.Phase
[phase
], chan
))
70 SET_SWZ(mov
->U
.I
.SrcReg
[0].Swizzle
, chan
, RC_SWIZZLE_UNUSED
);
72 phase_refmask
|= 1 << GET_SWZ(mov
->U
.I
.SrcReg
[0].Swizzle
, chan
);
75 phase_refmask
&= RC_MASK_XYZW
;
77 masked_negate
= split
.Phase
[phase
] & mov
->U
.I
.SrcReg
[0].Negate
;
78 if (masked_negate
== 0)
79 mov
->U
.I
.SrcReg
[0].Negate
= 0;
80 else if (masked_negate
== split
.Phase
[phase
])
81 mov
->U
.I
.SrcReg
[0].Negate
= RC_MASK_XYZW
;
85 inst
->U
.I
.SrcReg
[src
].File
= RC_FILE_TEMPORARY
;
86 inst
->U
.I
.SrcReg
[src
].Index
= tempreg
;
87 inst
->U
.I
.SrcReg
[src
].Swizzle
= 0;
88 inst
->U
.I
.SrcReg
[src
].Negate
= RC_MASK_NONE
;
89 inst
->U
.I
.SrcReg
[src
].Abs
= 0;
90 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
91 SET_SWZ(inst
->U
.I
.SrcReg
[src
].Swizzle
, chan
,
92 GET_BIT(usemask
, chan
) ? chan
: RC_SWIZZLE_UNUSED
);
97 * This function will attempt to rewrite non-native swizzles that read from
98 * immediate registers by rearranging the immediates to allow the
99 * instruction to use native swizzles.
101 static unsigned try_rewrite_constant(struct radeon_compiler
*c
,
102 struct rc_src_register
*reg
)
104 unsigned new_swizzle
, chan
, swz0
, swz1
, swz2
, swz3
, found_swizzle
, swz
;
105 unsigned all_inline
= 0;
106 float imms
[4] = {0.0f
, 0.0f
, 0.0f
, 0.0f
};
108 if (!rc_src_reg_is_immediate(c
, reg
->File
, reg
->Index
)) {
109 /* The register does not contain immediates, but if all
110 * the swizzles are inline constants, we can still rewrite
113 new_swizzle
= RC_SWIZZLE_XYZW
;
114 for (chan
= 0 ; chan
< 4; chan
++) {
115 unsigned swz
= GET_SWZ(reg
->Swizzle
, chan
);
116 if (swz
<= RC_SWIZZLE_W
) {
119 if (swz
== RC_SWIZZLE_UNUSED
) {
120 SET_SWZ(new_swizzle
, chan
, RC_SWIZZLE_UNUSED
);
125 new_swizzle
= reg
->Swizzle
;
128 swz
= RC_SWIZZLE_UNUSED
;
130 /* Check if all channels have the same swizzle. If they do we can skip
131 * the search for a native swizzle. We only need to check the first
132 * three channels, because any swizzle is legal in the fourth channel.
134 for (chan
= 0; chan
< 3; chan
++) {
135 unsigned chan_swz
= GET_SWZ(reg
->Swizzle
, chan
);
136 if (chan_swz
== RC_SWIZZLE_UNUSED
) {
139 if (swz
== RC_SWIZZLE_UNUSED
) {
141 } else if (swz
!= chan_swz
) {
147 /* Find a legal swizzle */
149 /* This loop attempts to find a native swizzle where all the
150 * channels are different. */
151 while (!found_swizzle
&& !all_inline
) {
152 swz0
= GET_SWZ(new_swizzle
, 0);
153 swz1
= GET_SWZ(new_swizzle
, 1);
154 swz2
= GET_SWZ(new_swizzle
, 2);
156 /* Swizzle .W. is never legal. */
157 if (swz1
== RC_SWIZZLE_W
||
158 swz1
== RC_SWIZZLE_UNUSED
||
159 swz1
== RC_SWIZZLE_ZERO
||
160 swz1
== RC_SWIZZLE_HALF
||
161 swz1
== RC_SWIZZLE_ONE
) {
162 /* We chose Z, because there are two non-repeating
163 * swizzle combinations of the form .Z. There are
164 * only one combination each for .X. and .Y. */
165 SET_SWZ(new_swizzle
, 1, RC_SWIZZLE_Z
);
169 if (swz2
== RC_SWIZZLE_UNUSED
) {
170 /* We choose Y, because there are two non-repeating
171 * swizzle combinations of the form ..Y */
172 SET_SWZ(new_swizzle
, 2, RC_SWIZZLE_Y
);
179 /* Legal swizzles that start with X: XYZ, XXX */
183 /* The new swizzle will be:
184 * ZXY (XX. => ZX. => ZXY) */
185 SET_SWZ(new_swizzle
, 0, RC_SWIZZLE_Z
);
189 /* The new swizzle is XYZ */
190 SET_SWZ(new_swizzle
, 2, RC_SWIZZLE_Z
);
196 if (swz2
== RC_SWIZZLE_Z
) {
197 /* The new swizzle is XYZ */
198 SET_SWZ(new_swizzle
, 1, RC_SWIZZLE_Y
);
200 } else { /* XZ[^Z] */
201 /* The new swizzle will be:
202 * YZX (XZ. => YZ. => YZX) */
203 SET_SWZ(new_swizzle
, 0, RC_SWIZZLE_Y
);
206 /* XW. Should have already been handled. */
214 /* Legal swizzles that start with Y: YYY, YZX */
218 /* The new swizzle will be:
219 * XYZ (YY. => XY. => XYZ) */
220 SET_SWZ(new_swizzle
, 0, RC_SWIZZLE_X
);
224 /* The new swizzle is YZX */
225 SET_SWZ(new_swizzle
, 2, RC_SWIZZLE_X
);
231 if (swz2
== RC_SWIZZLE_X
) {
232 /*The new swizzle is YZX */
233 SET_SWZ(new_swizzle
, 1, RC_SWIZZLE_Z
);
235 } else { /* YX[^X] */
236 /* The new swizzle will be:
237 * ZXY (YX. => ZX. -> ZXY) */
238 SET_SWZ(new_swizzle
, 0, RC_SWIZZLE_Z
);
241 /* YW. Should have already been handled. */
249 /* Legal swizzles that start with Z: ZZZ, ZXY */
253 /* The new swizzle will be:
254 * WZY (ZZ. => WZ. => WZY) */
255 SET_SWZ(new_swizzle
, 0, RC_SWIZZLE_W
);
259 /* The new swizzle is ZXY */
260 SET_SWZ(new_swizzle
, 2, RC_SWIZZLE_Y
);
266 if (swz2
== RC_SWIZZLE_Y
) {
267 /* The new swizzle is ZXY */
268 SET_SWZ(new_swizzle
, 1, RC_SWIZZLE_X
);
270 } else { /* ZY[^Y] */
271 /* The new swizzle will be:
272 * XYZ (ZY. => XY. => XYZ) */
273 SET_SWZ(new_swizzle
, 0, RC_SWIZZLE_X
);
276 /* ZW. Should have already been handled. */
285 /* Legal swizzles that start with X: WWW, WZY */
287 /* WW. Should have already been handled. */
293 /* The new swizzle will be WZY */
294 SET_SWZ(new_swizzle
, 2, RC_SWIZZLE_Y
);
302 if (swz2
== RC_SWIZZLE_Y
) {
303 /* The new swizzle will be WZY */
304 SET_SWZ(new_swizzle
, 1, RC_SWIZZLE_Z
);
306 } else { /* W[XY][^Y] */
307 /* The new swizzle will be:
308 * ZXY (WX. => XX. => ZX. => ZXY) or
309 * XYZ (WY. => XY. => XYZ)
311 SET_SWZ(new_swizzle
, 0, RC_SWIZZLE_X
);
317 case RC_SWIZZLE_UNUSED
:
318 case RC_SWIZZLE_ZERO
:
320 case RC_SWIZZLE_HALF
:
321 SET_SWZ(new_swizzle
, 0, RC_SWIZZLE_X
);
326 /* Handle the swizzle in the w channel. */
327 swz3
= GET_SWZ(reg
->Swizzle
, 3);
329 /* We can skip this if the swizzle in channel w is an inline constant. */
330 if (swz3
<= RC_SWIZZLE_W
) {
331 for (chan
= 0; chan
< 3; chan
++) {
332 unsigned old_swz
= GET_SWZ(reg
->Swizzle
, chan
);
333 unsigned new_swz
= GET_SWZ(new_swizzle
, chan
);
334 /* If the swizzle in the w channel is the same as the
335 * swizzle in any other channels, we need to rewrite it.
337 * reg->Swizzle == XWZW
338 * new_swizzle == XYZX
339 * Since the swizzle in the y channel is being
340 * rewritten from W -> Y we need to change the swizzle
341 * in the w channel from W -> Y as well.
343 if (old_swz
== swz3
) {
344 SET_SWZ(new_swizzle
, 3,
345 GET_SWZ(new_swizzle
, chan
));
349 /* The swizzle in channel w will be overwritten by one
350 * of the new swizzles. */
351 if (new_swz
== swz3
) {
352 /* Find an unused swizzle */
355 for (i
= 0; i
< 3; i
++) {
356 used
|= 1 << GET_SWZ(new_swizzle
, i
);
358 for (i
= 0; i
< 4; i
++) {
359 if (used
& (1 << i
)) {
362 SET_SWZ(new_swizzle
, 3, i
);
368 for (chan
= 0; chan
< 4; chan
++) {
369 unsigned old_swz
= GET_SWZ(reg
->Swizzle
, chan
);
370 unsigned new_swz
= GET_SWZ(new_swizzle
, chan
);
372 if (old_swz
== RC_SWIZZLE_UNUSED
) {
376 /* We don't need to change the swizzle in channel w if it is
377 * an inline constant. These are always legal in the w channel.
379 * Swizzles with a value > RC_SWIZZLE_W are inline constants.
381 if (chan
== 3 && old_swz
> RC_SWIZZLE_W
) {
385 assert(new_swz
<= RC_SWIZZLE_W
);
388 case RC_SWIZZLE_ZERO
:
389 imms
[new_swz
] = 0.0f
;
391 case RC_SWIZZLE_HALF
:
392 if (reg
->Negate
& (1 << chan
)) {
393 imms
[new_swz
] = -0.5f
;
395 imms
[new_swz
] = 0.5f
;
399 if (reg
->Negate
& (1 << chan
)) {
400 imms
[new_swz
] = -1.0f
;
402 imms
[new_swz
] = 1.0f
;
406 imms
[new_swz
] = rc_get_constant_value(c
, reg
->Index
,
407 reg
->Swizzle
, reg
->Negate
, chan
);
409 SET_SWZ(reg
->Swizzle
, chan
, new_swz
);
411 reg
->Index
= rc_constants_add_immediate_vec4(&c
->Program
.Constants
,
413 /* We need to set the register file to CONSTANT in case we are
414 * converting a non-constant register with constant swizzles (e.g.
417 reg
->File
= RC_FILE_CONSTANT
;
422 void rc_dataflow_swizzles(struct radeon_compiler
* c
, void *user
)
424 struct rc_instruction
* inst
;
426 for(inst
= c
->Program
.Instructions
.Next
;
427 inst
!= &c
->Program
.Instructions
;
429 const struct rc_opcode_info
* opcode
=
430 rc_get_opcode_info(inst
->U
.I
.Opcode
);
433 for(src
= 0; src
< opcode
->NumSrcRegs
; ++src
) {
434 struct rc_src_register
*reg
= &inst
->U
.I
.SrcReg
[src
];
435 if (c
->SwizzleCaps
->IsNative(inst
->U
.I
.Opcode
, *reg
)) {
439 c
->Program
.Constants
.Count
< R300_PFS_NUM_CONST_REGS
&&
440 try_rewrite_constant(c
, reg
)) {
443 rewrite_source(c
, inst
, src
);
446 if (c
->Debug
& RC_DBG_LOG
)
447 rc_constants_print(&c
->Program
.Constants
);