2 * Copyright (C) 2009 Nicolai Haehnle.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 #include "radeon_program_pair.h"
30 #include "radeon_compiler.h"
31 #include "radeon_compiler_util.h"
35 * Finally rewrite ADD, MOV, MUL as the appropriate native instruction
36 * and reverse the order of arguments for CMP.
38 static void final_rewrite(struct rc_sub_instruction
*inst
)
40 struct rc_src_register tmp
;
42 switch(inst
->Opcode
) {
44 inst
->SrcReg
[2] = inst
->SrcReg
[1];
45 inst
->SrcReg
[1].File
= RC_FILE_NONE
;
46 inst
->SrcReg
[1].Swizzle
= RC_SWIZZLE_1111
;
47 inst
->SrcReg
[1].Negate
= RC_MASK_NONE
;
48 inst
->Opcode
= RC_OPCODE_MAD
;
51 tmp
= inst
->SrcReg
[2];
52 inst
->SrcReg
[2] = inst
->SrcReg
[0];
53 inst
->SrcReg
[0] = tmp
;
56 /* AMD say we should use CMP.
57 * However, when we transform
60 * CMP tmp, -r0, -r0, 0;
62 * we get incorrect behaviour on R500 when r0 == 0.0.
63 * It appears that the R500 KIL hardware treats -0.0 as less
66 inst
->SrcReg
[1].File
= RC_FILE_NONE
;
67 inst
->SrcReg
[1].Swizzle
= RC_SWIZZLE_1111
;
68 inst
->SrcReg
[2].File
= RC_FILE_NONE
;
69 inst
->SrcReg
[2].Swizzle
= RC_SWIZZLE_0000
;
70 inst
->Opcode
= RC_OPCODE_MAD
;
73 inst
->SrcReg
[2].File
= RC_FILE_NONE
;
74 inst
->SrcReg
[2].Swizzle
= RC_SWIZZLE_0000
;
75 inst
->Opcode
= RC_OPCODE_MAD
;
85 * Classify an instruction according to which ALUs etc. it needs
87 static void classify_instruction(struct rc_sub_instruction
* inst
,
88 int * needrgb
, int * needalpha
, int * istranscendent
)
90 *needrgb
= (inst
->DstReg
.WriteMask
& RC_MASK_XYZ
) ? 1 : 0;
91 *needalpha
= (inst
->DstReg
.WriteMask
& RC_MASK_W
) ? 1 : 0;
94 if (inst
->WriteALUResult
== RC_ALURESULT_X
)
96 else if (inst
->WriteALUResult
== RC_ALURESULT_W
)
99 switch(inst
->Opcode
) {
132 static void src_uses(struct rc_src_register src
, unsigned int * rgb
,
133 unsigned int * alpha
)
136 for(j
= 0; j
< 4; ++j
) {
137 unsigned int swz
= GET_SWZ(src
.Swizzle
, j
);
146 * Fill the given ALU instruction's opcodes and source operands into the given pair,
149 static void set_pair_instruction(struct r300_fragment_program_compiler
*c
,
150 struct rc_pair_instruction
* pair
,
151 struct rc_sub_instruction
* inst
)
153 int needrgb
, needalpha
, istranscendent
;
154 const struct rc_opcode_info
* opcode
;
157 memset(pair
, 0, sizeof(struct rc_pair_instruction
));
159 classify_instruction(inst
, &needrgb
, &needalpha
, &istranscendent
);
163 pair
->RGB
.Opcode
= RC_OPCODE_REPL_ALPHA
;
165 pair
->RGB
.Opcode
= inst
->Opcode
;
166 if (inst
->SaturateMode
== RC_SATURATE_ZERO_ONE
)
167 pair
->RGB
.Saturate
= 1;
170 pair
->Alpha
.Opcode
= inst
->Opcode
;
171 if (inst
->SaturateMode
== RC_SATURATE_ZERO_ONE
)
172 pair
->Alpha
.Saturate
= 1;
175 opcode
= rc_get_opcode_info(inst
->Opcode
);
177 /* Presubtract handling:
178 * We need to make sure that the values used by the presubtract
179 * operation end up in src0 or src1. */
180 if(inst
->PreSub
.Opcode
!= RC_PRESUB_NONE
) {
181 /* rc_pair_alloc_source() will fill in data for
182 * pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */
184 for(j
= 0; j
< 3; j
++) {
186 if(inst
->SrcReg
[j
].File
!= RC_FILE_PRESUB
)
189 src_regs
= rc_presubtract_src_reg_count(
190 inst
->PreSub
.Opcode
);
191 for(i
= 0; i
< src_regs
; i
++) {
192 unsigned int rgb
= 0;
193 unsigned int alpha
= 0;
194 src_uses(inst
->SrcReg
[j
], &rgb
, &alpha
);
196 pair
->RGB
.Src
[i
].File
=
197 inst
->PreSub
.SrcReg
[i
].File
;
198 pair
->RGB
.Src
[i
].Index
=
199 inst
->PreSub
.SrcReg
[i
].Index
;
200 pair
->RGB
.Src
[i
].Used
= 1;
203 pair
->Alpha
.Src
[i
].File
=
204 inst
->PreSub
.SrcReg
[i
].File
;
205 pair
->Alpha
.Src
[i
].Index
=
206 inst
->PreSub
.SrcReg
[i
].Index
;
207 pair
->Alpha
.Src
[i
].Used
= 1;
213 for(i
= 0; i
< opcode
->NumSrcRegs
; ++i
) {
215 if (needrgb
&& !istranscendent
) {
216 unsigned int srcrgb
= 0;
217 unsigned int srcalpha
= 0;
218 unsigned int srcmask
= 0;
220 /* We don't care about the alpha channel here. We only
221 * want the part of the swizzle that writes to rgb,
222 * since we are creating an rgb instruction. */
223 for(j
= 0; j
< 3; ++j
) {
224 unsigned int swz
= GET_SWZ(inst
->SrcReg
[i
].Swizzle
, j
);
226 if (swz
< RC_SWIZZLE_W
)
228 else if (swz
== RC_SWIZZLE_W
)
231 if (swz
< RC_SWIZZLE_UNUSED
)
234 source
= rc_pair_alloc_source(pair
, srcrgb
, srcalpha
,
235 inst
->SrcReg
[i
].File
, inst
->SrcReg
[i
].Index
);
237 rc_error(&c
->Base
, "Failed to translate "
238 "rgb instruction.\n");
241 pair
->RGB
.Arg
[i
].Source
= source
;
242 pair
->RGB
.Arg
[i
].Swizzle
=
243 rc_init_swizzle(inst
->SrcReg
[i
].Swizzle
, 3);
244 pair
->RGB
.Arg
[i
].Abs
= inst
->SrcReg
[i
].Abs
;
245 pair
->RGB
.Arg
[i
].Negate
= !!(srcmask
& inst
->SrcReg
[i
].Negate
& (RC_MASK_X
| RC_MASK_Y
| RC_MASK_Z
));
248 unsigned int srcrgb
= 0;
249 unsigned int srcalpha
= 0;
251 if (istranscendent
) {
252 swz
= rc_get_scalar_src_swz(inst
->SrcReg
[i
].Swizzle
);
254 swz
= GET_SWZ(inst
->SrcReg
[i
].Swizzle
, 3);
261 source
= rc_pair_alloc_source(pair
, srcrgb
, srcalpha
,
262 inst
->SrcReg
[i
].File
, inst
->SrcReg
[i
].Index
);
264 rc_error(&c
->Base
, "Failed to translate "
265 "alpha instruction.\n");
268 pair
->Alpha
.Arg
[i
].Source
= source
;
269 pair
->Alpha
.Arg
[i
].Swizzle
= rc_init_swizzle(swz
, 1);
270 pair
->Alpha
.Arg
[i
].Abs
= inst
->SrcReg
[i
].Abs
;
272 if (istranscendent
) {
273 pair
->Alpha
.Arg
[i
].Negate
=
274 !!(inst
->SrcReg
[i
].Negate
&
275 inst
->DstReg
.WriteMask
);
277 pair
->Alpha
.Arg
[i
].Negate
=
278 !!(inst
->SrcReg
[i
].Negate
& RC_MASK_W
);
283 /* Destination handling */
284 if (inst
->DstReg
.File
== RC_FILE_OUTPUT
) {
285 if (inst
->DstReg
.Index
== c
->OutputDepth
) {
286 pair
->Alpha
.DepthWriteMask
|= GET_BIT(inst
->DstReg
.WriteMask
, 3);
288 for (i
= 0; i
< 4; i
++) {
289 if (inst
->DstReg
.Index
== c
->OutputColor
[i
]) {
290 pair
->RGB
.Target
= i
;
291 pair
->Alpha
.Target
= i
;
292 pair
->RGB
.OutputWriteMask
|=
293 inst
->DstReg
.WriteMask
& RC_MASK_XYZ
;
294 pair
->Alpha
.OutputWriteMask
|=
295 GET_BIT(inst
->DstReg
.WriteMask
, 3);
302 pair
->RGB
.DestIndex
= inst
->DstReg
.Index
;
303 pair
->RGB
.WriteMask
|= inst
->DstReg
.WriteMask
& RC_MASK_XYZ
;
307 pair
->Alpha
.WriteMask
|= (GET_BIT(inst
->DstReg
.WriteMask
, 3) << 3);
308 if (pair
->Alpha
.WriteMask
) {
309 pair
->Alpha
.DestIndex
= inst
->DstReg
.Index
;
315 pair
->RGB
.Omod
= inst
->Omod
;
318 pair
->Alpha
.Omod
= inst
->Omod
;
321 if (inst
->WriteALUResult
) {
322 pair
->WriteALUResult
= inst
->WriteALUResult
;
323 pair
->ALUResultCompare
= inst
->ALUResultCompare
;
328 static void check_opcode_support(struct r300_fragment_program_compiler
*c
,
329 struct rc_sub_instruction
*inst
)
331 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->Opcode
);
333 if (opcode
->HasDstReg
) {
334 if (inst
->SaturateMode
== RC_SATURATE_MINUS_PLUS_ONE
) {
335 rc_error(&c
->Base
, "Fragment program does not support signed Saturate.\n");
340 for (unsigned i
= 0; i
< opcode
->NumSrcRegs
; i
++) {
341 if (inst
->SrcReg
[i
].RelAddr
) {
342 rc_error(&c
->Base
, "Fragment program does not support relative addressing "
343 " of source operands.\n");
351 * Translate all ALU instructions into corresponding pair instructions,
352 * performing no other changes.
354 void rc_pair_translate(struct radeon_compiler
*cc
, void *user
)
356 struct r300_fragment_program_compiler
*c
= (struct r300_fragment_program_compiler
*)cc
;
358 for(struct rc_instruction
* inst
= c
->Base
.Program
.Instructions
.Next
;
359 inst
!= &c
->Base
.Program
.Instructions
;
361 const struct rc_opcode_info
* opcode
;
362 struct rc_sub_instruction copy
;
364 if (inst
->Type
!= RC_INSTRUCTION_NORMAL
)
367 opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
369 if (opcode
->HasTexture
|| opcode
->IsFlowControl
|| opcode
->Opcode
== RC_OPCODE_KIL
)
374 check_opcode_support(c
, ©
);
376 final_rewrite(©
);
377 inst
->Type
= RC_INSTRUCTION_PAIR
;
378 set_pair_instruction(c
, &inst
->U
.P
, ©
);