2 * Copyright (C) 2008 Nicolai Haehnle.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 * Shareable transformations that transform "special" ALU instructions
32 * into ALU instructions that are supported by hardware.
36 #include "radeon_program_alu.h"
39 static struct prog_instruction
*emit1(struct gl_program
* p
,
40 gl_inst_opcode Opcode
, struct prog_dst_register DstReg
,
41 struct prog_src_register SrcReg
)
43 struct prog_instruction
*fpi
= radeonAppendInstructions(p
, 1);
47 fpi
->SrcReg
[0] = SrcReg
;
51 static struct prog_instruction
*emit2(struct gl_program
* p
,
52 gl_inst_opcode Opcode
, struct prog_dst_register DstReg
,
53 struct prog_src_register SrcReg0
, struct prog_src_register SrcReg1
)
55 struct prog_instruction
*fpi
= radeonAppendInstructions(p
, 1);
59 fpi
->SrcReg
[0] = SrcReg0
;
60 fpi
->SrcReg
[1] = SrcReg1
;
64 static struct prog_instruction
*emit3(struct gl_program
* p
,
65 gl_inst_opcode Opcode
, struct prog_dst_register DstReg
,
66 struct prog_src_register SrcReg0
, struct prog_src_register SrcReg1
,
67 struct prog_src_register SrcReg2
)
69 struct prog_instruction
*fpi
= radeonAppendInstructions(p
, 1);
73 fpi
->SrcReg
[0] = SrcReg0
;
74 fpi
->SrcReg
[1] = SrcReg1
;
75 fpi
->SrcReg
[2] = SrcReg2
;
79 static void set_swizzle(struct prog_src_register
*SrcReg
, int coordinate
, int swz
)
81 SrcReg
->Swizzle
&= ~(7 << (3*coordinate
));
82 SrcReg
->Swizzle
|= swz
<< (3*coordinate
);
85 static void set_negate_base(struct prog_src_register
*SrcReg
, int coordinate
, int negate
)
87 SrcReg
->NegateBase
&= ~(1 << coordinate
);
88 SrcReg
->NegateBase
|= (negate
<< coordinate
);
91 static struct prog_dst_register
dstreg(int file
, int index
)
93 struct prog_dst_register dst
;
96 dst
.WriteMask
= WRITEMASK_XYZW
;
97 dst
.CondMask
= COND_TR
;
98 dst
.CondSwizzle
= SWIZZLE_NOOP
;
104 static const struct prog_src_register builtin_zero
= {
105 .File
= PROGRAM_BUILTIN
,
107 .Swizzle
= SWIZZLE_0000
109 static const struct prog_src_register builtin_one
= {
110 .File
= PROGRAM_BUILTIN
,
112 .Swizzle
= SWIZZLE_1111
114 static const struct prog_src_register srcreg_undefined
= {
115 .File
= PROGRAM_UNDEFINED
,
117 .Swizzle
= SWIZZLE_NOOP
120 static struct prog_src_register
srcreg(int file
, int index
)
122 struct prog_src_register src
= srcreg_undefined
;
128 static struct prog_src_register
negate(struct prog_src_register reg
)
130 struct prog_src_register newreg
= reg
;
131 newreg
.NegateAbs
= !newreg
.NegateAbs
;
135 static struct prog_src_register
swizzle(struct prog_src_register reg
, GLuint x
, GLuint y
, GLuint z
, GLuint w
)
137 struct prog_src_register swizzled
= reg
;
138 swizzled
.Swizzle
= MAKE_SWIZZLE4(
139 GET_SWZ(reg
.Swizzle
, x
),
140 GET_SWZ(reg
.Swizzle
, y
),
141 GET_SWZ(reg
.Swizzle
, z
),
142 GET_SWZ(reg
.Swizzle
, w
));
146 static struct prog_src_register
scalar(struct prog_src_register reg
)
148 return swizzle(reg
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
);
151 static void transform_ABS(struct gl_program
* p
,
152 struct prog_instruction
* inst
)
154 struct prog_src_register src
= inst
->SrcReg
[0];
158 emit1(p
, OPCODE_MOV
, inst
->DstReg
, src
);
161 static void transform_DPH(struct gl_program
* p
,
162 struct prog_instruction
* inst
)
164 struct prog_src_register src0
= inst
->SrcReg
[0];
165 if (src0
.NegateAbs
) {
167 int tempreg
= _mesa_find_free_register(p
, PROGRAM_TEMPORARY
);
168 emit1(p
, OPCODE_MOV
, dstreg(PROGRAM_TEMPORARY
, tempreg
), src0
);
169 src0
= srcreg(src0
.File
, src0
.Index
);
172 src0
.NegateBase
^= NEGATE_XYZW
;
175 set_swizzle(&src0
, 3, SWIZZLE_ONE
);
176 set_negate_base(&src0
, 3, 0);
177 emit2(p
, OPCODE_DP4
, inst
->DstReg
, src0
, inst
->SrcReg
[1]);
180 static void transform_FLR(struct gl_program
* p
,
181 struct prog_instruction
* inst
)
183 int tempreg
= _mesa_find_free_register(p
, PROGRAM_TEMPORARY
);
184 emit1(p
, OPCODE_FRC
, dstreg(PROGRAM_TEMPORARY
, tempreg
), inst
->SrcReg
[0]);
185 emit2(p
, OPCODE_ADD
, inst
->DstReg
, inst
->SrcReg
[0], negate(srcreg(PROGRAM_TEMPORARY
, tempreg
)));
188 static void transform_POW(struct gl_program
* p
,
189 struct prog_instruction
* inst
)
191 int tempreg
= _mesa_find_free_register(p
, PROGRAM_TEMPORARY
);
192 struct prog_dst_register tempdst
= dstreg(PROGRAM_TEMPORARY
, tempreg
);
193 struct prog_src_register tempsrc
= srcreg(PROGRAM_TEMPORARY
, tempreg
);
194 tempdst
.WriteMask
= WRITEMASK_W
;
195 tempsrc
.Swizzle
= SWIZZLE_WWWW
;
197 emit1(p
, OPCODE_LG2
, tempdst
, scalar(inst
->SrcReg
[0]));
198 emit2(p
, OPCODE_MUL
, tempdst
, tempsrc
, scalar(inst
->SrcReg
[1]));
199 emit1(p
, OPCODE_EX2
, inst
->DstReg
, tempsrc
);
202 static void transform_SGE(struct gl_program
* p
,
203 struct prog_instruction
* inst
)
205 int tempreg
= _mesa_find_free_register(p
, PROGRAM_TEMPORARY
);
207 emit2(p
, OPCODE_ADD
, dstreg(PROGRAM_TEMPORARY
, tempreg
), inst
->SrcReg
[0], negate(inst
->SrcReg
[1]));
208 emit3(p
, OPCODE_CMP
, inst
->DstReg
, srcreg(PROGRAM_TEMPORARY
, tempreg
), builtin_zero
, builtin_one
);
211 static void transform_SLT(struct gl_program
* p
,
212 struct prog_instruction
* inst
)
214 int tempreg
= _mesa_find_free_register(p
, PROGRAM_TEMPORARY
);
216 emit2(p
, OPCODE_ADD
, dstreg(PROGRAM_TEMPORARY
, tempreg
), inst
->SrcReg
[0], negate(inst
->SrcReg
[1]));
217 emit3(p
, OPCODE_CMP
, inst
->DstReg
, srcreg(PROGRAM_TEMPORARY
, tempreg
), builtin_one
, builtin_zero
);
220 static void transform_SUB(struct gl_program
* p
,
221 struct prog_instruction
* inst
)
223 emit2(p
, OPCODE_ADD
, inst
->DstReg
, inst
->SrcReg
[0], negate(inst
->SrcReg
[1]));
226 static void transform_SWZ(struct gl_program
* p
,
227 struct prog_instruction
* inst
)
229 emit1(p
, OPCODE_MOV
, inst
->DstReg
, inst
->SrcReg
[0]);
232 static void transform_XPD(struct gl_program
* p
,
233 struct prog_instruction
* inst
)
235 int tempreg
= _mesa_find_free_register(p
, PROGRAM_TEMPORARY
);
237 emit2(p
, OPCODE_MUL
, dstreg(PROGRAM_TEMPORARY
, tempreg
),
238 swizzle(inst
->SrcReg
[0], SWIZZLE_Z
, SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_W
),
239 swizzle(inst
->SrcReg
[1], SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_X
, SWIZZLE_W
));
240 emit3(p
, OPCODE_MAD
, inst
->DstReg
,
241 swizzle(inst
->SrcReg
[0], SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_X
, SWIZZLE_W
),
242 swizzle(inst
->SrcReg
[1], SWIZZLE_Z
, SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_W
),
243 negate(srcreg(PROGRAM_TEMPORARY
, tempreg
)));
248 * Can be used as a transformation for @ref radeonClauseLocalTransform,
249 * no userData necessary.
251 * Eliminates the following ALU instructions:
252 * ABS, DPH, FLR, POW, SGE, SLT, SUB, SWZ, XPD
254 * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP
256 * @note should be applicable to R300 and R500 fragment programs.
258 * @todo add LIT here as well?
260 GLboolean
radeonTransformALU(
262 struct gl_program
* prog
,
263 struct prog_instruction
* inst
,
266 switch(inst
->Opcode
) {
267 case OPCODE_ABS
: transform_ABS(prog
, inst
); return GL_TRUE
;
268 case OPCODE_DPH
: transform_DPH(prog
, inst
); return GL_TRUE
;
269 case OPCODE_FLR
: transform_FLR(prog
, inst
); return GL_TRUE
;
270 case OPCODE_POW
: transform_POW(prog
, inst
); return GL_TRUE
;
271 case OPCODE_SGE
: transform_SGE(prog
, inst
); return GL_TRUE
;
272 case OPCODE_SLT
: transform_SLT(prog
, inst
); return GL_TRUE
;
273 case OPCODE_SUB
: transform_SUB(prog
, inst
); return GL_TRUE
;
274 case OPCODE_SWZ
: transform_SWZ(prog
, inst
); return GL_TRUE
;
275 case OPCODE_XPD
: transform_XPD(prog
, inst
); return GL_TRUE
;