2 * Copyright (C) 2008 Nicolai Haehnle.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 * Shareable transformations that transform "special" ALU instructions
32 * into ALU instructions that are supported by hardware.
36 #include "radeon_program_alu.h"
38 #include "radeon_compiler.h"
41 static struct rc_instruction
*emit1(
42 struct radeon_compiler
* c
, struct rc_instruction
* after
,
43 gl_inst_opcode Opcode
, GLuint Saturate
, struct prog_dst_register DstReg
,
44 struct prog_src_register SrcReg
)
46 struct rc_instruction
*fpi
= rc_insert_new_instruction(c
, after
);
48 fpi
->I
.Opcode
= Opcode
;
49 fpi
->I
.SaturateMode
= Saturate
;
50 fpi
->I
.DstReg
= DstReg
;
51 fpi
->I
.SrcReg
[0] = SrcReg
;
55 static struct rc_instruction
*emit2(
56 struct radeon_compiler
* c
, struct rc_instruction
* after
,
57 gl_inst_opcode Opcode
, GLuint Saturate
, struct prog_dst_register DstReg
,
58 struct prog_src_register SrcReg0
, struct prog_src_register SrcReg1
)
60 struct rc_instruction
*fpi
= rc_insert_new_instruction(c
, after
);
62 fpi
->I
.Opcode
= Opcode
;
63 fpi
->I
.SaturateMode
= Saturate
;
64 fpi
->I
.DstReg
= DstReg
;
65 fpi
->I
.SrcReg
[0] = SrcReg0
;
66 fpi
->I
.SrcReg
[1] = SrcReg1
;
70 static struct rc_instruction
*emit3(
71 struct radeon_compiler
* c
, struct rc_instruction
* after
,
72 gl_inst_opcode Opcode
, GLuint Saturate
, struct prog_dst_register DstReg
,
73 struct prog_src_register SrcReg0
, struct prog_src_register SrcReg1
,
74 struct prog_src_register SrcReg2
)
76 struct rc_instruction
*fpi
= rc_insert_new_instruction(c
, after
);
78 fpi
->I
.Opcode
= Opcode
;
79 fpi
->I
.SaturateMode
= Saturate
;
80 fpi
->I
.DstReg
= DstReg
;
81 fpi
->I
.SrcReg
[0] = SrcReg0
;
82 fpi
->I
.SrcReg
[1] = SrcReg1
;
83 fpi
->I
.SrcReg
[2] = SrcReg2
;
87 static struct prog_dst_register
dstreg(int file
, int index
)
89 struct prog_dst_register dst
;
92 dst
.WriteMask
= WRITEMASK_XYZW
;
93 dst
.CondMask
= COND_TR
;
95 dst
.CondSwizzle
= SWIZZLE_NOOP
;
101 static struct prog_dst_register
dstregtmpmask(int index
, int mask
)
103 struct prog_dst_register dst
= {0};
104 dst
.File
= PROGRAM_TEMPORARY
;
106 dst
.WriteMask
= mask
;
108 dst
.CondMask
= COND_TR
;
109 dst
.CondSwizzle
= SWIZZLE_NOOP
;
115 static const struct prog_src_register builtin_zero
= {
116 .File
= PROGRAM_BUILTIN
,
118 .Swizzle
= SWIZZLE_0000
120 static const struct prog_src_register builtin_one
= {
121 .File
= PROGRAM_BUILTIN
,
123 .Swizzle
= SWIZZLE_1111
125 static const struct prog_src_register srcreg_undefined
= {
126 .File
= PROGRAM_UNDEFINED
,
128 .Swizzle
= SWIZZLE_NOOP
131 static struct prog_src_register
srcreg(int file
, int index
)
133 struct prog_src_register src
= srcreg_undefined
;
139 static struct prog_src_register
srcregswz(int file
, int index
, int swz
)
141 struct prog_src_register src
= srcreg_undefined
;
148 static struct prog_src_register
absolute(struct prog_src_register reg
)
150 struct prog_src_register newreg
= reg
;
152 newreg
.Negate
= NEGATE_NONE
;
156 static struct prog_src_register
negate(struct prog_src_register reg
)
158 struct prog_src_register newreg
= reg
;
159 newreg
.Negate
= newreg
.Negate
^ NEGATE_XYZW
;
163 static struct prog_src_register
swizzle(struct prog_src_register reg
, GLuint x
, GLuint y
, GLuint z
, GLuint w
)
165 struct prog_src_register swizzled
= reg
;
166 swizzled
.Swizzle
= MAKE_SWIZZLE4(
167 x
>= 4 ? x
: GET_SWZ(reg
.Swizzle
, x
),
168 y
>= 4 ? y
: GET_SWZ(reg
.Swizzle
, y
),
169 z
>= 4 ? z
: GET_SWZ(reg
.Swizzle
, z
),
170 w
>= 4 ? w
: GET_SWZ(reg
.Swizzle
, w
));
174 static struct prog_src_register
scalar(struct prog_src_register reg
)
176 return swizzle(reg
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
);
179 static void transform_ABS(struct radeon_compiler
* c
,
180 struct rc_instruction
* inst
)
182 struct prog_src_register src
= inst
->I
.SrcReg
[0];
184 src
.Negate
= NEGATE_NONE
;
185 emit1(c
, inst
->Prev
, OPCODE_MOV
, inst
->I
.SaturateMode
, inst
->I
.DstReg
, src
);
186 rc_remove_instruction(inst
);
189 static void transform_DP3(struct radeon_compiler
* c
,
190 struct rc_instruction
* inst
)
192 struct prog_src_register src0
= inst
->I
.SrcReg
[0];
193 struct prog_src_register src1
= inst
->I
.SrcReg
[1];
194 src0
.Negate
&= ~NEGATE_W
;
195 src0
.Swizzle
&= ~(7 << (3 * 3));
196 src0
.Swizzle
|= SWIZZLE_ZERO
<< (3 * 3);
197 src1
.Negate
&= ~NEGATE_W
;
198 src1
.Swizzle
&= ~(7 << (3 * 3));
199 src1
.Swizzle
|= SWIZZLE_ZERO
<< (3 * 3);
200 emit2(c
, inst
->Prev
, OPCODE_DP4
, inst
->I
.SaturateMode
, inst
->I
.DstReg
, src0
, src1
);
201 rc_remove_instruction(inst
);
204 static void transform_DPH(struct radeon_compiler
* c
,
205 struct rc_instruction
* inst
)
207 struct prog_src_register src0
= inst
->I
.SrcReg
[0];
208 src0
.Negate
&= ~NEGATE_W
;
209 src0
.Swizzle
&= ~(7 << (3 * 3));
210 src0
.Swizzle
|= SWIZZLE_ONE
<< (3 * 3);
211 emit2(c
, inst
->Prev
, OPCODE_DP4
, inst
->I
.SaturateMode
, inst
->I
.DstReg
, src0
, inst
->I
.SrcReg
[1]);
212 rc_remove_instruction(inst
);
216 * [1, src0.y*src1.y, src0.z, src1.w]
217 * So basically MUL with lotsa swizzling.
219 static void transform_DST(struct radeon_compiler
* c
,
220 struct rc_instruction
* inst
)
222 emit2(c
, inst
->Prev
, OPCODE_MUL
, inst
->I
.SaturateMode
, inst
->I
.DstReg
,
223 swizzle(inst
->I
.SrcReg
[0], SWIZZLE_ONE
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_ONE
),
224 swizzle(inst
->I
.SrcReg
[1], SWIZZLE_ONE
, SWIZZLE_Y
, SWIZZLE_ONE
, SWIZZLE_W
));
225 rc_remove_instruction(inst
);
228 static void transform_FLR(struct radeon_compiler
* c
,
229 struct rc_instruction
* inst
)
231 int tempreg
= rc_find_free_temporary(c
);
232 emit1(c
, inst
->Prev
, OPCODE_FRC
, 0, dstreg(PROGRAM_TEMPORARY
, tempreg
), inst
->I
.SrcReg
[0]);
233 emit2(c
, inst
->Prev
, OPCODE_ADD
, inst
->I
.SaturateMode
, inst
->I
.DstReg
,
234 inst
->I
.SrcReg
[0], negate(srcreg(PROGRAM_TEMPORARY
, tempreg
)));
235 rc_remove_instruction(inst
);
239 * Definition of LIT (from ARB_fragment_program):
241 * tmp = VectorLoad(op0);
242 * if (tmp.x < 0) tmp.x = 0;
243 * if (tmp.y < 0) tmp.y = 0;
244 * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
245 * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
248 * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
251 * The longest path of computation is the one leading to result.z,
252 * consisting of 5 operations. This implementation of LIT takes
253 * 5 slots, if the subsequent optimization passes are clever enough
254 * to pair instructions correctly.
256 static void transform_LIT(struct radeon_compiler
* c
,
257 struct rc_instruction
* inst
)
260 GLuint constant_swizzle
;
262 struct prog_src_register srctemp
;
264 constant
= rc_constants_add_immediate_scalar(&c
->Program
.Constants
, -127.999999, &constant_swizzle
);
266 if (inst
->I
.DstReg
.WriteMask
!= WRITEMASK_XYZW
|| inst
->I
.DstReg
.File
!= PROGRAM_TEMPORARY
) {
267 struct rc_instruction
* inst_mov
;
269 inst_mov
= emit1(c
, inst
,
270 OPCODE_MOV
, 0, inst
->I
.DstReg
,
271 srcreg(PROGRAM_TEMPORARY
, rc_find_free_temporary(c
)));
273 inst
->I
.DstReg
.File
= PROGRAM_TEMPORARY
;
274 inst
->I
.DstReg
.Index
= inst_mov
->I
.SrcReg
[0].Index
;
275 inst
->I
.DstReg
.WriteMask
= WRITEMASK_XYZW
;
278 temp
= inst
->I
.DstReg
.Index
;
279 srctemp
= srcreg(PROGRAM_TEMPORARY
, temp
);
281 // tmp.x = max(0.0, Src.x);
282 // tmp.y = max(0.0, Src.y);
283 // tmp.w = clamp(Src.z, -128+eps, 128-eps);
284 emit2(c
, inst
->Prev
, OPCODE_MAX
, 0,
285 dstregtmpmask(temp
, WRITEMASK_XYW
),
287 swizzle(srcreg(PROGRAM_CONSTANT
, constant
),
288 SWIZZLE_ZERO
, SWIZZLE_ZERO
, SWIZZLE_ZERO
, constant_swizzle
&3));
289 emit2(c
, inst
->Prev
, OPCODE_MIN
, 0,
290 dstregtmpmask(temp
, WRITEMASK_Z
),
291 swizzle(srctemp
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
292 negate(srcregswz(PROGRAM_CONSTANT
, constant
, constant_swizzle
)));
294 // tmp.w = Pow(tmp.y, tmp.w)
295 emit1(c
, inst
->Prev
, OPCODE_LG2
, 0,
296 dstregtmpmask(temp
, WRITEMASK_W
),
297 swizzle(srctemp
, SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
));
298 emit2(c
, inst
->Prev
, OPCODE_MUL
, 0,
299 dstregtmpmask(temp
, WRITEMASK_W
),
300 swizzle(srctemp
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
301 swizzle(srctemp
, SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
));
302 emit1(c
, inst
->Prev
, OPCODE_EX2
, 0,
303 dstregtmpmask(temp
, WRITEMASK_W
),
304 swizzle(srctemp
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
));
306 // tmp.z = (tmp.x > 0) ? tmp.w : 0.0
307 emit3(c
, inst
->Prev
, OPCODE_CMP
, inst
->I
.SaturateMode
,
308 dstregtmpmask(temp
, WRITEMASK_Z
),
309 negate(swizzle(srctemp
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
)),
310 swizzle(srctemp
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
313 // tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0
314 emit1(c
, inst
->Prev
, OPCODE_MOV
, inst
->I
.SaturateMode
,
315 dstregtmpmask(temp
, WRITEMASK_XYW
),
316 swizzle(srctemp
, SWIZZLE_ONE
, SWIZZLE_X
, SWIZZLE_ONE
, SWIZZLE_ONE
));
318 rc_remove_instruction(inst
);
321 static void transform_LRP(struct radeon_compiler
* c
,
322 struct rc_instruction
* inst
)
324 int tempreg
= rc_find_free_temporary(c
);
326 emit2(c
, inst
->Prev
, OPCODE_ADD
, 0,
327 dstreg(PROGRAM_TEMPORARY
, tempreg
),
328 inst
->I
.SrcReg
[1], negate(inst
->I
.SrcReg
[2]));
329 emit3(c
, inst
->Prev
, OPCODE_MAD
, inst
->I
.SaturateMode
,
331 inst
->I
.SrcReg
[0], srcreg(PROGRAM_TEMPORARY
, tempreg
), inst
->I
.SrcReg
[2]);
333 rc_remove_instruction(inst
);
336 static void transform_POW(struct radeon_compiler
* c
,
337 struct rc_instruction
* inst
)
339 int tempreg
= rc_find_free_temporary(c
);
340 struct prog_dst_register tempdst
= dstreg(PROGRAM_TEMPORARY
, tempreg
);
341 struct prog_src_register tempsrc
= srcreg(PROGRAM_TEMPORARY
, tempreg
);
342 tempdst
.WriteMask
= WRITEMASK_W
;
343 tempsrc
.Swizzle
= SWIZZLE_WWWW
;
345 emit1(c
, inst
->Prev
, OPCODE_LG2
, 0, tempdst
, scalar(inst
->I
.SrcReg
[0]));
346 emit2(c
, inst
->Prev
, OPCODE_MUL
, 0, tempdst
, tempsrc
, scalar(inst
->I
.SrcReg
[1]));
347 emit1(c
, inst
->Prev
, OPCODE_EX2
, inst
->I
.SaturateMode
, inst
->I
.DstReg
, tempsrc
);
349 rc_remove_instruction(inst
);
352 static void transform_RSQ(struct radeon_compiler
* c
,
353 struct rc_instruction
* inst
)
355 inst
->I
.SrcReg
[0] = absolute(inst
->I
.SrcReg
[0]);
358 static void transform_SGE(struct radeon_compiler
* c
,
359 struct rc_instruction
* inst
)
361 int tempreg
= rc_find_free_temporary(c
);
363 emit2(c
, inst
->Prev
, OPCODE_ADD
, 0, dstreg(PROGRAM_TEMPORARY
, tempreg
), inst
->I
.SrcReg
[0], negate(inst
->I
.SrcReg
[1]));
364 emit3(c
, inst
->Prev
, OPCODE_CMP
, inst
->I
.SaturateMode
, inst
->I
.DstReg
,
365 srcreg(PROGRAM_TEMPORARY
, tempreg
), builtin_zero
, builtin_one
);
367 rc_remove_instruction(inst
);
370 static void transform_SLT(struct radeon_compiler
* c
,
371 struct rc_instruction
* inst
)
373 int tempreg
= rc_find_free_temporary(c
);
375 emit2(c
, inst
->Prev
, OPCODE_ADD
, 0, dstreg(PROGRAM_TEMPORARY
, tempreg
), inst
->I
.SrcReg
[0], negate(inst
->I
.SrcReg
[1]));
376 emit3(c
, inst
->Prev
, OPCODE_CMP
, inst
->I
.SaturateMode
, inst
->I
.DstReg
,
377 srcreg(PROGRAM_TEMPORARY
, tempreg
), builtin_one
, builtin_zero
);
379 rc_remove_instruction(inst
);
382 static void transform_SUB(struct radeon_compiler
* c
,
383 struct rc_instruction
* inst
)
385 inst
->I
.Opcode
= OPCODE_ADD
;
386 inst
->I
.SrcReg
[1] = negate(inst
->I
.SrcReg
[1]);
389 static void transform_SWZ(struct radeon_compiler
* c
,
390 struct rc_instruction
* inst
)
392 inst
->I
.Opcode
= OPCODE_MOV
;
395 static void transform_XPD(struct radeon_compiler
* c
,
396 struct rc_instruction
* inst
)
398 int tempreg
= rc_find_free_temporary(c
);
400 emit2(c
, inst
->Prev
, OPCODE_MUL
, 0, dstreg(PROGRAM_TEMPORARY
, tempreg
),
401 swizzle(inst
->I
.SrcReg
[0], SWIZZLE_Z
, SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_W
),
402 swizzle(inst
->I
.SrcReg
[1], SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_X
, SWIZZLE_W
));
403 emit3(c
, inst
->Prev
, OPCODE_MAD
, inst
->I
.SaturateMode
, inst
->I
.DstReg
,
404 swizzle(inst
->I
.SrcReg
[0], SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_X
, SWIZZLE_W
),
405 swizzle(inst
->I
.SrcReg
[1], SWIZZLE_Z
, SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_W
),
406 negate(srcreg(PROGRAM_TEMPORARY
, tempreg
)));
408 rc_remove_instruction(inst
);
413 * Can be used as a transformation for @ref radeonClauseLocalTransform,
414 * no userData necessary.
416 * Eliminates the following ALU instructions:
417 * ABS, DPH, DST, FLR, LIT, LRP, POW, SGE, SLT, SUB, SWZ, XPD
419 * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP
421 * Transforms RSQ to Radeon's native RSQ by explicitly setting
424 * @note should be applicable to R300 and R500 fragment programs.
426 GLboolean
radeonTransformALU(
427 struct radeon_compiler
* c
,
428 struct rc_instruction
* inst
,
431 switch(inst
->I
.Opcode
) {
432 case OPCODE_ABS
: transform_ABS(c
, inst
); return GL_TRUE
;
433 case OPCODE_DPH
: transform_DPH(c
, inst
); return GL_TRUE
;
434 case OPCODE_DST
: transform_DST(c
, inst
); return GL_TRUE
;
435 case OPCODE_FLR
: transform_FLR(c
, inst
); return GL_TRUE
;
436 case OPCODE_LIT
: transform_LIT(c
, inst
); return GL_TRUE
;
437 case OPCODE_LRP
: transform_LRP(c
, inst
); return GL_TRUE
;
438 case OPCODE_POW
: transform_POW(c
, inst
); return GL_TRUE
;
439 case OPCODE_RSQ
: transform_RSQ(c
, inst
); return GL_TRUE
;
440 case OPCODE_SGE
: transform_SGE(c
, inst
); return GL_TRUE
;
441 case OPCODE_SLT
: transform_SLT(c
, inst
); return GL_TRUE
;
442 case OPCODE_SUB
: transform_SUB(c
, inst
); return GL_TRUE
;
443 case OPCODE_SWZ
: transform_SWZ(c
, inst
); return GL_TRUE
;
444 case OPCODE_XPD
: transform_XPD(c
, inst
); return GL_TRUE
;
451 static void transform_r300_vertex_ABS(struct radeon_compiler
* c
,
452 struct rc_instruction
* inst
)
454 /* Note: r500 can take absolute values, but r300 cannot. */
455 inst
->I
.Opcode
= OPCODE_MAX
;
456 inst
->I
.SrcReg
[1] = inst
->I
.SrcReg
[0];
457 inst
->I
.SrcReg
[1].Negate
^= NEGATE_XYZW
;
461 * For use with radeonLocalTransform, this transforms non-native ALU
462 * instructions of the r300 up to r500 vertex engine.
464 GLboolean
r300_transform_vertex_alu(
465 struct radeon_compiler
* c
,
466 struct rc_instruction
* inst
,
469 switch(inst
->I
.Opcode
) {
470 case OPCODE_ABS
: transform_r300_vertex_ABS(c
, inst
); return GL_TRUE
;
471 case OPCODE_DP3
: transform_DP3(c
, inst
); return GL_TRUE
;
472 case OPCODE_DPH
: transform_DPH(c
, inst
); return GL_TRUE
;
473 case OPCODE_FLR
: transform_FLR(c
, inst
); return GL_TRUE
;
474 case OPCODE_LRP
: transform_LRP(c
, inst
); return GL_TRUE
;
475 case OPCODE_SUB
: transform_SUB(c
, inst
); return GL_TRUE
;
476 case OPCODE_SWZ
: transform_SWZ(c
, inst
); return GL_TRUE
;
477 case OPCODE_XPD
: transform_XPD(c
, inst
); return GL_TRUE
;
483 static void sincos_constants(struct radeon_compiler
* c
, GLuint
*constants
)
485 static const GLfloat SinCosConsts
[2][4] = {
488 -0.405284735, // -4/(PI*PI)
495 0.159154943, // 1/(2*PI)
501 for(i
= 0; i
< 2; ++i
)
502 constants
[i
] = rc_constants_add_immediate_vec4(&c
->Program
.Constants
, SinCosConsts
[i
]);
506 * Approximate sin(x), where x is clamped to (-pi/2, pi/2).
508 * MUL tmp.xy, src, { 4/PI, -4/(PI^2) }
509 * MAD tmp.x, tmp.y, |src|, tmp.x
510 * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x
511 * MAD dest, tmp.y, weight, tmp.x
513 static void sin_approx(
514 struct radeon_compiler
* c
, struct rc_instruction
* before
,
515 struct prog_dst_register dst
, struct prog_src_register src
, const GLuint
* constants
)
517 GLuint tempreg
= rc_find_free_temporary(c
);
519 emit2(c
, before
->Prev
, OPCODE_MUL
, 0, dstregtmpmask(tempreg
, WRITEMASK_XY
),
520 swizzle(src
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
),
521 srcreg(PROGRAM_CONSTANT
, constants
[0]));
522 emit3(c
, before
->Prev
, OPCODE_MAD
, 0, dstregtmpmask(tempreg
, WRITEMASK_X
),
523 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
),
524 absolute(swizzle(src
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
)),
525 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
));
526 emit3(c
, before
->Prev
, OPCODE_MAD
, 0, dstregtmpmask(tempreg
, WRITEMASK_Y
),
527 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
),
528 absolute(swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
)),
529 negate(swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
)));
530 emit3(c
, before
->Prev
, OPCODE_MAD
, 0, dst
,
531 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
),
532 swizzle(srcreg(PROGRAM_CONSTANT
, constants
[0]), SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
533 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
));
537 * Translate the trigonometric functions COS, SIN, and SCS
538 * using only the basic instructions
539 * MOV, ADD, MUL, MAD, FRC
541 GLboolean
radeonTransformTrigSimple(struct radeon_compiler
* c
,
542 struct rc_instruction
* inst
,
545 if (inst
->I
.Opcode
!= OPCODE_COS
&&
546 inst
->I
.Opcode
!= OPCODE_SIN
&&
547 inst
->I
.Opcode
!= OPCODE_SCS
)
551 GLuint tempreg
= rc_find_free_temporary(c
);
553 sincos_constants(c
, constants
);
555 if (inst
->I
.Opcode
== OPCODE_COS
) {
556 // MAD tmp.x, src, 1/(2*PI), 0.75
558 // MAD tmp.z, tmp.x, 2*PI, -PI
559 emit3(c
, inst
->Prev
, OPCODE_MAD
, 0, dstregtmpmask(tempreg
, WRITEMASK_W
),
560 swizzle(inst
->I
.SrcReg
[0], SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
),
561 swizzle(srcreg(PROGRAM_CONSTANT
, constants
[1]), SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
),
562 swizzle(srcreg(PROGRAM_CONSTANT
, constants
[1]), SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
));
563 emit1(c
, inst
->Prev
, OPCODE_FRC
, 0, dstregtmpmask(tempreg
, WRITEMASK_W
),
564 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
));
565 emit3(c
, inst
->Prev
, OPCODE_MAD
, 0, dstregtmpmask(tempreg
, WRITEMASK_W
),
566 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
567 swizzle(srcreg(PROGRAM_CONSTANT
, constants
[1]), SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
568 negate(swizzle(srcreg(PROGRAM_CONSTANT
, constants
[0]), SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
)));
570 sin_approx(c
, inst
, inst
->I
.DstReg
,
571 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
573 } else if (inst
->I
.Opcode
== OPCODE_SIN
) {
574 emit3(c
, inst
->Prev
, OPCODE_MAD
, 0, dstregtmpmask(tempreg
, WRITEMASK_W
),
575 swizzle(inst
->I
.SrcReg
[0], SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
),
576 swizzle(srcreg(PROGRAM_CONSTANT
, constants
[1]), SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
),
577 swizzle(srcreg(PROGRAM_CONSTANT
, constants
[1]), SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
));
578 emit1(c
, inst
->Prev
, OPCODE_FRC
, 0, dstregtmpmask(tempreg
, WRITEMASK_W
),
579 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
));
580 emit3(c
, inst
->Prev
, OPCODE_MAD
, 0, dstregtmpmask(tempreg
, WRITEMASK_W
),
581 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
582 swizzle(srcreg(PROGRAM_CONSTANT
, constants
[1]), SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
583 negate(swizzle(srcreg(PROGRAM_CONSTANT
, constants
[0]), SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
)));
585 sin_approx(c
, inst
, inst
->I
.DstReg
,
586 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
589 emit3(c
, inst
->Prev
, OPCODE_MAD
, 0, dstregtmpmask(tempreg
, WRITEMASK_XY
),
590 swizzle(inst
->I
.SrcReg
[0], SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
),
591 swizzle(srcreg(PROGRAM_CONSTANT
, constants
[1]), SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
),
592 swizzle(srcreg(PROGRAM_CONSTANT
, constants
[1]), SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_W
));
593 emit1(c
, inst
->Prev
, OPCODE_FRC
, 0, dstregtmpmask(tempreg
, WRITEMASK_XY
),
594 srcreg(PROGRAM_TEMPORARY
, tempreg
));
595 emit3(c
, inst
->Prev
, OPCODE_MAD
, 0, dstregtmpmask(tempreg
, WRITEMASK_XY
),
596 srcreg(PROGRAM_TEMPORARY
, tempreg
),
597 swizzle(srcreg(PROGRAM_CONSTANT
, constants
[1]), SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
598 negate(swizzle(srcreg(PROGRAM_CONSTANT
, constants
[0]), SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
)));
600 struct prog_dst_register dst
= inst
->I
.DstReg
;
602 dst
.WriteMask
= inst
->I
.DstReg
.WriteMask
& WRITEMASK_X
;
603 sin_approx(c
, inst
, dst
,
604 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
),
607 dst
.WriteMask
= inst
->I
.DstReg
.WriteMask
& WRITEMASK_Y
;
608 sin_approx(c
, inst
, dst
,
609 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
),
613 rc_remove_instruction(inst
);
620 * Transform the trigonometric functions COS, SIN, and SCS
621 * to include pre-scaling by 1/(2*PI) and taking the fractional
622 * part, so that the input to COS and SIN is always in the range [0,1).
623 * SCS is replaced by one COS and one SIN instruction.
625 * @warning This transformation implicitly changes the semantics of SIN and COS!
627 GLboolean
radeonTransformTrigScale(struct radeon_compiler
* c
,
628 struct rc_instruction
* inst
,
631 if (inst
->I
.Opcode
!= OPCODE_COS
&&
632 inst
->I
.Opcode
!= OPCODE_SIN
&&
633 inst
->I
.Opcode
!= OPCODE_SCS
)
636 static const GLfloat RCP_2PI
= 0.15915494309189535;
639 GLuint constant_swizzle
;
641 temp
= rc_find_free_temporary(c
);
642 constant
= rc_constants_add_immediate_scalar(&c
->Program
.Constants
, RCP_2PI
, &constant_swizzle
);
644 emit2(c
, inst
->Prev
, OPCODE_MUL
, 0, dstregtmpmask(temp
, WRITEMASK_W
),
645 swizzle(inst
->I
.SrcReg
[0], SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
),
646 srcregswz(PROGRAM_CONSTANT
, constant
, constant_swizzle
));
647 emit1(c
, inst
->Prev
, OPCODE_FRC
, 0, dstregtmpmask(temp
, WRITEMASK_W
),
648 srcreg(PROGRAM_TEMPORARY
, temp
));
650 if (inst
->I
.Opcode
== OPCODE_COS
) {
651 emit1(c
, inst
->Prev
, OPCODE_COS
, inst
->I
.SaturateMode
, inst
->I
.DstReg
,
652 srcregswz(PROGRAM_TEMPORARY
, temp
, SWIZZLE_WWWW
));
653 } else if (inst
->I
.Opcode
== OPCODE_SIN
) {
654 emit1(c
, inst
->Prev
, OPCODE_SIN
, inst
->I
.SaturateMode
,
655 inst
->I
.DstReg
, srcregswz(PROGRAM_TEMPORARY
, temp
, SWIZZLE_WWWW
));
656 } else if (inst
->I
.Opcode
== OPCODE_SCS
) {
657 struct prog_dst_register moddst
= inst
->I
.DstReg
;
659 if (inst
->I
.DstReg
.WriteMask
& WRITEMASK_X
) {
660 moddst
.WriteMask
= WRITEMASK_X
;
661 emit1(c
, inst
->Prev
, OPCODE_COS
, inst
->I
.SaturateMode
, moddst
,
662 srcregswz(PROGRAM_TEMPORARY
, temp
, SWIZZLE_WWWW
));
664 if (inst
->I
.DstReg
.WriteMask
& WRITEMASK_Y
) {
665 moddst
.WriteMask
= WRITEMASK_Y
;
666 emit1(c
, inst
->Prev
, OPCODE_SIN
, inst
->I
.SaturateMode
, moddst
,
667 srcregswz(PROGRAM_TEMPORARY
, temp
, SWIZZLE_WWWW
));
671 rc_remove_instruction(inst
);
677 * Rewrite DDX/DDY instructions to properly work with r5xx shaders.
678 * The r5xx MDH/MDV instruction provides per-quad partial derivatives.
679 * It takes the form A*B+C. A and C are set by setting src0. B should be -1.
681 * @warning This explicitly changes the form of DDX and DDY!
684 GLboolean
radeonTransformDeriv(struct radeon_compiler
* c
,
685 struct rc_instruction
* inst
,
688 if (inst
->I
.Opcode
!= OPCODE_DDX
&& inst
->I
.Opcode
!= OPCODE_DDY
)
691 inst
->I
.SrcReg
[1].Swizzle
= MAKE_SWIZZLE4(SWIZZLE_ONE
, SWIZZLE_ONE
, SWIZZLE_ONE
, SWIZZLE_ONE
);
692 inst
->I
.SrcReg
[1].Negate
= NEGATE_XYZW
;