2 * Copyright (C) 2008 Nicolai Haehnle.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 * Shareable transformations that transform "special" ALU instructions
32 * into ALU instructions that are supported by hardware.
36 #include "radeon_program_alu.h"
38 #include "shader/prog_parameter.h"
41 static struct prog_instruction
*emit1(struct gl_program
* p
,
42 gl_inst_opcode Opcode
, GLuint Saturate
, struct prog_dst_register DstReg
,
43 struct prog_src_register SrcReg
)
45 struct prog_instruction
*fpi
= radeonAppendInstructions(p
, 1);
48 fpi
->SaturateMode
= Saturate
;
50 fpi
->SrcReg
[0] = SrcReg
;
54 static struct prog_instruction
*emit2(struct gl_program
* p
,
55 gl_inst_opcode Opcode
, GLuint Saturate
, struct prog_dst_register DstReg
,
56 struct prog_src_register SrcReg0
, struct prog_src_register SrcReg1
)
58 struct prog_instruction
*fpi
= radeonAppendInstructions(p
, 1);
61 fpi
->SaturateMode
= Saturate
;
63 fpi
->SrcReg
[0] = SrcReg0
;
64 fpi
->SrcReg
[1] = SrcReg1
;
68 static struct prog_instruction
*emit3(struct gl_program
* p
,
69 gl_inst_opcode Opcode
, GLuint Saturate
, struct prog_dst_register DstReg
,
70 struct prog_src_register SrcReg0
, struct prog_src_register SrcReg1
,
71 struct prog_src_register SrcReg2
)
73 struct prog_instruction
*fpi
= radeonAppendInstructions(p
, 1);
76 fpi
->SaturateMode
= Saturate
;
78 fpi
->SrcReg
[0] = SrcReg0
;
79 fpi
->SrcReg
[1] = SrcReg1
;
80 fpi
->SrcReg
[2] = SrcReg2
;
84 static struct prog_dst_register
dstreg(int file
, int index
)
86 struct prog_dst_register dst
;
89 dst
.WriteMask
= WRITEMASK_XYZW
;
90 dst
.CondMask
= COND_TR
;
91 dst
.CondSwizzle
= SWIZZLE_NOOP
;
97 static struct prog_dst_register
dstregtmpmask(int index
, int mask
)
99 struct prog_dst_register dst
;
100 dst
.File
= PROGRAM_TEMPORARY
;
102 dst
.WriteMask
= mask
;
103 dst
.CondMask
= COND_TR
;
104 dst
.CondSwizzle
= SWIZZLE_NOOP
;
110 static const struct prog_src_register builtin_zero
= {
111 .File
= PROGRAM_BUILTIN
,
113 .Swizzle
= SWIZZLE_0000
115 static const struct prog_src_register builtin_one
= {
116 .File
= PROGRAM_BUILTIN
,
118 .Swizzle
= SWIZZLE_1111
120 static const struct prog_src_register srcreg_undefined
= {
121 .File
= PROGRAM_UNDEFINED
,
123 .Swizzle
= SWIZZLE_NOOP
126 static struct prog_src_register
srcreg(int file
, int index
)
128 struct prog_src_register src
= srcreg_undefined
;
134 static struct prog_src_register
srcregswz(int file
, int index
, int swz
)
136 struct prog_src_register src
= srcreg_undefined
;
143 static struct prog_src_register
absolute(struct prog_src_register reg
)
145 struct prog_src_register newreg
= reg
;
147 newreg
.Negate
= NEGATE_NONE
;
151 static struct prog_src_register
negate(struct prog_src_register reg
)
153 struct prog_src_register newreg
= reg
;
154 newreg
.Negate
= newreg
.Negate
^ NEGATE_XYZW
;
158 static struct prog_src_register
swizzle(struct prog_src_register reg
, GLuint x
, GLuint y
, GLuint z
, GLuint w
)
160 struct prog_src_register swizzled
= reg
;
161 swizzled
.Swizzle
= MAKE_SWIZZLE4(
162 x
>= 4 ? x
: GET_SWZ(reg
.Swizzle
, x
),
163 y
>= 4 ? y
: GET_SWZ(reg
.Swizzle
, y
),
164 z
>= 4 ? z
: GET_SWZ(reg
.Swizzle
, z
),
165 w
>= 4 ? w
: GET_SWZ(reg
.Swizzle
, w
));
169 static struct prog_src_register
scalar(struct prog_src_register reg
)
171 return swizzle(reg
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
);
174 static void transform_ABS(struct radeon_transform_context
* t
,
175 struct prog_instruction
* inst
)
177 struct prog_src_register src
= inst
->SrcReg
[0];
179 src
.Negate
= NEGATE_NONE
;
180 emit1(t
->Program
, OPCODE_MOV
, inst
->SaturateMode
, inst
->DstReg
, src
);
183 static void transform_DPH(struct radeon_transform_context
* t
,
184 struct prog_instruction
* inst
)
186 struct prog_src_register src0
= inst
->SrcReg
[0];
187 src0
.Negate
&= ~NEGATE_W
;
188 src0
.Swizzle
&= ~(7 << (3 * 3));
189 src0
.Swizzle
|= SWIZZLE_ONE
<< (3 * 3);
190 emit2(t
->Program
, OPCODE_DP4
, inst
->SaturateMode
, inst
->DstReg
, src0
, inst
->SrcReg
[1]);
194 * [1, src0.y*src1.y, src0.z, src1.w]
195 * So basically MUL with lotsa swizzling.
197 static void transform_DST(struct radeon_transform_context
* t
,
198 struct prog_instruction
* inst
)
200 emit2(t
->Program
, OPCODE_MUL
, inst
->SaturateMode
, inst
->DstReg
,
201 swizzle(inst
->SrcReg
[0], SWIZZLE_ONE
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_ONE
),
202 swizzle(inst
->SrcReg
[1], SWIZZLE_ONE
, SWIZZLE_Y
, SWIZZLE_ONE
, SWIZZLE_W
));
205 static void transform_FLR(struct radeon_transform_context
* t
,
206 struct prog_instruction
* inst
)
208 int tempreg
= radeonFindFreeTemporary(t
);
209 emit1(t
->Program
, OPCODE_FRC
, 0, dstreg(PROGRAM_TEMPORARY
, tempreg
), inst
->SrcReg
[0]);
210 emit2(t
->Program
, OPCODE_ADD
, inst
->SaturateMode
, inst
->DstReg
,
211 inst
->SrcReg
[0], negate(srcreg(PROGRAM_TEMPORARY
, tempreg
)));
215 * Definition of LIT (from ARB_fragment_program):
217 * tmp = VectorLoad(op0);
218 * if (tmp.x < 0) tmp.x = 0;
219 * if (tmp.y < 0) tmp.y = 0;
220 * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
221 * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
224 * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
227 * The longest path of computation is the one leading to result.z,
228 * consisting of 5 operations. This implementation of LIT takes
229 * 5 slots, if the subsequent optimization passes are clever enough
230 * to pair instructions correctly.
232 static void transform_LIT(struct radeon_transform_context
* t
,
233 struct prog_instruction
* inst
)
235 static const GLfloat LitConst
[4] = { -127.999999 };
238 GLuint constant_swizzle
;
240 int needTemporary
= 0;
241 struct prog_src_register srctemp
;
243 constant
= _mesa_add_unnamed_constant(t
->Program
->Parameters
, LitConst
, 1, &constant_swizzle
);
245 if (inst
->DstReg
.WriteMask
!= WRITEMASK_XYZW
) {
247 } else if (inst
->DstReg
.File
!= PROGRAM_TEMPORARY
) {
248 // LIT is typically followed by DP3/DP4, so there's no point
249 // in creating special code for this case
254 temp
= radeonFindFreeTemporary(t
);
256 temp
= inst
->DstReg
.Index
;
258 srctemp
= srcreg(PROGRAM_TEMPORARY
, temp
);
260 // tmp.x = max(0.0, Src.x);
261 // tmp.y = max(0.0, Src.y);
262 // tmp.w = clamp(Src.z, -128+eps, 128-eps);
263 emit2(t
->Program
, OPCODE_MAX
, 0,
264 dstregtmpmask(temp
, WRITEMASK_XYW
),
266 swizzle(srcreg(PROGRAM_CONSTANT
, constant
),
267 SWIZZLE_ZERO
, SWIZZLE_ZERO
, SWIZZLE_ZERO
, constant_swizzle
&3));
268 emit2(t
->Program
, OPCODE_MIN
, 0,
269 dstregtmpmask(temp
, WRITEMASK_Z
),
270 swizzle(srctemp
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
271 negate(srcregswz(PROGRAM_CONSTANT
, constant
, constant_swizzle
)));
273 // tmp.w = Pow(tmp.y, tmp.w)
274 emit1(t
->Program
, OPCODE_LG2
, 0,
275 dstregtmpmask(temp
, WRITEMASK_W
),
276 swizzle(srctemp
, SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
));
277 emit2(t
->Program
, OPCODE_MUL
, 0,
278 dstregtmpmask(temp
, WRITEMASK_W
),
279 swizzle(srctemp
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
280 swizzle(srctemp
, SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
));
281 emit1(t
->Program
, OPCODE_EX2
, 0,
282 dstregtmpmask(temp
, WRITEMASK_W
),
283 swizzle(srctemp
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
));
285 // tmp.z = (tmp.x > 0) ? tmp.w : 0.0
286 emit3(t
->Program
, OPCODE_CMP
, inst
->SaturateMode
,
287 dstregtmpmask(temp
, WRITEMASK_Z
),
288 negate(swizzle(srctemp
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
)),
289 swizzle(srctemp
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
292 // tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0
293 emit1(t
->Program
, OPCODE_MOV
, inst
->SaturateMode
,
294 dstregtmpmask(temp
, WRITEMASK_XYW
),
295 swizzle(srctemp
, SWIZZLE_ONE
, SWIZZLE_X
, SWIZZLE_ONE
, SWIZZLE_ONE
));
298 emit1(t
->Program
, OPCODE_MOV
, 0, inst
->DstReg
, srctemp
);
301 static void transform_LRP(struct radeon_transform_context
* t
,
302 struct prog_instruction
* inst
)
304 int tempreg
= radeonFindFreeTemporary(t
);
306 emit2(t
->Program
, OPCODE_ADD
, 0,
307 dstreg(PROGRAM_TEMPORARY
, tempreg
),
308 inst
->SrcReg
[1], negate(inst
->SrcReg
[2]));
309 emit3(t
->Program
, OPCODE_MAD
, inst
->SaturateMode
,
311 inst
->SrcReg
[0], srcreg(PROGRAM_TEMPORARY
, tempreg
), inst
->SrcReg
[2]);
314 static void transform_POW(struct radeon_transform_context
* t
,
315 struct prog_instruction
* inst
)
317 int tempreg
= radeonFindFreeTemporary(t
);
318 struct prog_dst_register tempdst
= dstreg(PROGRAM_TEMPORARY
, tempreg
);
319 struct prog_src_register tempsrc
= srcreg(PROGRAM_TEMPORARY
, tempreg
);
320 tempdst
.WriteMask
= WRITEMASK_W
;
321 tempsrc
.Swizzle
= SWIZZLE_WWWW
;
323 emit1(t
->Program
, OPCODE_LG2
, 0, tempdst
, scalar(inst
->SrcReg
[0]));
324 emit2(t
->Program
, OPCODE_MUL
, 0, tempdst
, tempsrc
, scalar(inst
->SrcReg
[1]));
325 emit1(t
->Program
, OPCODE_EX2
, inst
->SaturateMode
, inst
->DstReg
, tempsrc
);
328 static void transform_RSQ(struct radeon_transform_context
* t
,
329 struct prog_instruction
* inst
)
331 emit1(t
->Program
, OPCODE_RSQ
, inst
->SaturateMode
, inst
->DstReg
, absolute(inst
->SrcReg
[0]));
334 static void transform_SGE(struct radeon_transform_context
* t
,
335 struct prog_instruction
* inst
)
337 int tempreg
= radeonFindFreeTemporary(t
);
339 emit2(t
->Program
, OPCODE_ADD
, 0, dstreg(PROGRAM_TEMPORARY
, tempreg
), inst
->SrcReg
[0], negate(inst
->SrcReg
[1]));
340 emit3(t
->Program
, OPCODE_CMP
, inst
->SaturateMode
, inst
->DstReg
,
341 srcreg(PROGRAM_TEMPORARY
, tempreg
), builtin_zero
, builtin_one
);
344 static void transform_SLT(struct radeon_transform_context
* t
,
345 struct prog_instruction
* inst
)
347 int tempreg
= radeonFindFreeTemporary(t
);
349 emit2(t
->Program
, OPCODE_ADD
, 0, dstreg(PROGRAM_TEMPORARY
, tempreg
), inst
->SrcReg
[0], negate(inst
->SrcReg
[1]));
350 emit3(t
->Program
, OPCODE_CMP
, inst
->SaturateMode
, inst
->DstReg
,
351 srcreg(PROGRAM_TEMPORARY
, tempreg
), builtin_one
, builtin_zero
);
354 static void transform_SUB(struct radeon_transform_context
* t
,
355 struct prog_instruction
* inst
)
357 emit2(t
->Program
, OPCODE_ADD
, inst
->SaturateMode
, inst
->DstReg
, inst
->SrcReg
[0], negate(inst
->SrcReg
[1]));
360 static void transform_SWZ(struct radeon_transform_context
* t
,
361 struct prog_instruction
* inst
)
363 emit1(t
->Program
, OPCODE_MOV
, inst
->SaturateMode
, inst
->DstReg
, inst
->SrcReg
[0]);
366 static void transform_XPD(struct radeon_transform_context
* t
,
367 struct prog_instruction
* inst
)
369 int tempreg
= radeonFindFreeTemporary(t
);
371 emit2(t
->Program
, OPCODE_MUL
, 0, dstreg(PROGRAM_TEMPORARY
, tempreg
),
372 swizzle(inst
->SrcReg
[0], SWIZZLE_Z
, SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_W
),
373 swizzle(inst
->SrcReg
[1], SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_X
, SWIZZLE_W
));
374 emit3(t
->Program
, OPCODE_MAD
, inst
->SaturateMode
, inst
->DstReg
,
375 swizzle(inst
->SrcReg
[0], SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_X
, SWIZZLE_W
),
376 swizzle(inst
->SrcReg
[1], SWIZZLE_Z
, SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_W
),
377 negate(srcreg(PROGRAM_TEMPORARY
, tempreg
)));
382 * Can be used as a transformation for @ref radeonClauseLocalTransform,
383 * no userData necessary.
385 * Eliminates the following ALU instructions:
386 * ABS, DPH, DST, FLR, LIT, LRP, POW, SGE, SLT, SUB, SWZ, XPD
388 * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP
390 * Transforms RSQ to Radeon's native RSQ by explicitly setting
393 * @note should be applicable to R300 and R500 fragment programs.
395 GLboolean
radeonTransformALU(struct radeon_transform_context
* t
,
396 struct prog_instruction
* inst
,
399 switch(inst
->Opcode
) {
400 case OPCODE_ABS
: transform_ABS(t
, inst
); return GL_TRUE
;
401 case OPCODE_DPH
: transform_DPH(t
, inst
); return GL_TRUE
;
402 case OPCODE_DST
: transform_DST(t
, inst
); return GL_TRUE
;
403 case OPCODE_FLR
: transform_FLR(t
, inst
); return GL_TRUE
;
404 case OPCODE_LIT
: transform_LIT(t
, inst
); return GL_TRUE
;
405 case OPCODE_LRP
: transform_LRP(t
, inst
); return GL_TRUE
;
406 case OPCODE_POW
: transform_POW(t
, inst
); return GL_TRUE
;
407 case OPCODE_RSQ
: transform_RSQ(t
, inst
); return GL_TRUE
;
408 case OPCODE_SGE
: transform_SGE(t
, inst
); return GL_TRUE
;
409 case OPCODE_SLT
: transform_SLT(t
, inst
); return GL_TRUE
;
410 case OPCODE_SUB
: transform_SUB(t
, inst
); return GL_TRUE
;
411 case OPCODE_SWZ
: transform_SWZ(t
, inst
); return GL_TRUE
;
412 case OPCODE_XPD
: transform_XPD(t
, inst
); return GL_TRUE
;
419 static void sincos_constants(struct radeon_transform_context
* t
, GLuint
*constants
)
421 static const GLfloat SinCosConsts
[2][4] = {
424 -0.405284735, // -4/(PI*PI)
431 0.159154943, // 1/(2*PI)
437 for(i
= 0; i
< 2; ++i
) {
439 constants
[i
] = _mesa_add_unnamed_constant(t
->Program
->Parameters
, SinCosConsts
[i
], 4, &swz
);
440 ASSERT(swz
== SWIZZLE_NOOP
);
445 * Approximate sin(x), where x is clamped to (-pi/2, pi/2).
447 * MUL tmp.xy, src, { 4/PI, -4/(PI^2) }
448 * MAD tmp.x, tmp.y, |src|, tmp.x
449 * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x
450 * MAD dest, tmp.y, weight, tmp.x
452 static void sin_approx(struct radeon_transform_context
* t
,
453 struct prog_dst_register dst
, struct prog_src_register src
, const GLuint
* constants
)
455 GLuint tempreg
= radeonFindFreeTemporary(t
);
457 emit2(t
->Program
, OPCODE_MUL
, 0, dstregtmpmask(tempreg
, WRITEMASK_XY
),
458 swizzle(src
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
),
459 srcreg(PROGRAM_CONSTANT
, constants
[0]));
460 emit3(t
->Program
, OPCODE_MAD
, 0, dstregtmpmask(tempreg
, WRITEMASK_X
),
461 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
),
462 absolute(swizzle(src
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
)),
463 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
));
464 emit3(t
->Program
, OPCODE_MAD
, 0, dstregtmpmask(tempreg
, WRITEMASK_Y
),
465 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
),
466 absolute(swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
)),
467 negate(swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
)));
468 emit3(t
->Program
, OPCODE_MAD
, 0, dst
,
469 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
),
470 swizzle(srcreg(PROGRAM_CONSTANT
, constants
[0]), SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
471 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
));
475 * Translate the trigonometric functions COS, SIN, and SCS
476 * using only the basic instructions
477 * MOV, ADD, MUL, MAD, FRC
479 GLboolean
radeonTransformTrigSimple(struct radeon_transform_context
* t
,
480 struct prog_instruction
* inst
,
483 if (inst
->Opcode
!= OPCODE_COS
&&
484 inst
->Opcode
!= OPCODE_SIN
&&
485 inst
->Opcode
!= OPCODE_SCS
)
489 GLuint tempreg
= radeonFindFreeTemporary(t
);
491 sincos_constants(t
, constants
);
493 if (inst
->Opcode
== OPCODE_COS
) {
494 // MAD tmp.x, src, 1/(2*PI), 0.75
496 // MAD tmp.z, tmp.x, 2*PI, -PI
497 emit3(t
->Program
, OPCODE_MAD
, 0, dstregtmpmask(tempreg
, WRITEMASK_W
),
498 swizzle(inst
->SrcReg
[0], SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
),
499 swizzle(srcreg(PROGRAM_CONSTANT
, constants
[1]), SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
),
500 swizzle(srcreg(PROGRAM_CONSTANT
, constants
[1]), SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
));
501 emit1(t
->Program
, OPCODE_FRC
, 0, dstregtmpmask(tempreg
, WRITEMASK_W
),
502 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
));
503 emit3(t
->Program
, OPCODE_MAD
, 0, dstregtmpmask(tempreg
, WRITEMASK_W
),
504 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
505 swizzle(srcreg(PROGRAM_CONSTANT
, constants
[1]), SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
506 negate(swizzle(srcreg(PROGRAM_CONSTANT
, constants
[0]), SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
)));
508 sin_approx(t
, inst
->DstReg
,
509 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
511 } else if (inst
->Opcode
== OPCODE_SIN
) {
512 emit3(t
->Program
, OPCODE_MAD
, 0, dstregtmpmask(tempreg
, WRITEMASK_W
),
513 swizzle(inst
->SrcReg
[0], SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
),
514 swizzle(srcreg(PROGRAM_CONSTANT
, constants
[1]), SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
),
515 swizzle(srcreg(PROGRAM_CONSTANT
, constants
[1]), SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
));
516 emit1(t
->Program
, OPCODE_FRC
, 0, dstregtmpmask(tempreg
, WRITEMASK_W
),
517 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
));
518 emit3(t
->Program
, OPCODE_MAD
, 0, dstregtmpmask(tempreg
, WRITEMASK_W
),
519 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
520 swizzle(srcreg(PROGRAM_CONSTANT
, constants
[1]), SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
521 negate(swizzle(srcreg(PROGRAM_CONSTANT
, constants
[0]), SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
)));
523 sin_approx(t
, inst
->DstReg
,
524 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
527 emit3(t
->Program
, OPCODE_MAD
, 0, dstregtmpmask(tempreg
, WRITEMASK_XY
),
528 swizzle(inst
->SrcReg
[0], SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
),
529 swizzle(srcreg(PROGRAM_CONSTANT
, constants
[1]), SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
),
530 swizzle(srcreg(PROGRAM_CONSTANT
, constants
[1]), SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_W
));
531 emit1(t
->Program
, OPCODE_FRC
, 0, dstregtmpmask(tempreg
, WRITEMASK_XY
),
532 srcreg(PROGRAM_TEMPORARY
, tempreg
));
533 emit3(t
->Program
, OPCODE_MAD
, 0, dstregtmpmask(tempreg
, WRITEMASK_XY
),
534 srcreg(PROGRAM_TEMPORARY
, tempreg
),
535 swizzle(srcreg(PROGRAM_CONSTANT
, constants
[1]), SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
536 negate(swizzle(srcreg(PROGRAM_CONSTANT
, constants
[0]), SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
)));
538 struct prog_dst_register dst
= inst
->DstReg
;
540 dst
.WriteMask
= inst
->DstReg
.WriteMask
& WRITEMASK_X
;
542 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
),
545 dst
.WriteMask
= inst
->DstReg
.WriteMask
& WRITEMASK_Y
;
547 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
),
556 * Transform the trigonometric functions COS, SIN, and SCS
557 * to include pre-scaling by 1/(2*PI) and taking the fractional
558 * part, so that the input to COS and SIN is always in the range [0,1).
559 * SCS is replaced by one COS and one SIN instruction.
561 * @warning This transformation implicitly changes the semantics of SIN and COS!
563 GLboolean
radeonTransformTrigScale(struct radeon_transform_context
* t
,
564 struct prog_instruction
* inst
,
567 if (inst
->Opcode
!= OPCODE_COS
&&
568 inst
->Opcode
!= OPCODE_SIN
&&
569 inst
->Opcode
!= OPCODE_SCS
)
572 static const GLfloat RCP_2PI
[] = { 0.15915494309189535 };
575 GLuint constant_swizzle
;
577 temp
= radeonFindFreeTemporary(t
);
578 constant
= _mesa_add_unnamed_constant(t
->Program
->Parameters
, RCP_2PI
, 1, &constant_swizzle
);
580 emit2(t
->Program
, OPCODE_MUL
, 0, dstregtmpmask(temp
, WRITEMASK_W
),
581 swizzle(inst
->SrcReg
[0], SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
),
582 srcregswz(PROGRAM_CONSTANT
, constant
, constant_swizzle
));
583 emit1(t
->Program
, OPCODE_FRC
, 0, dstregtmpmask(temp
, WRITEMASK_W
),
584 srcreg(PROGRAM_TEMPORARY
, temp
));
586 if (inst
->Opcode
== OPCODE_COS
) {
587 emit1(t
->Program
, OPCODE_COS
, inst
->SaturateMode
, inst
->DstReg
,
588 srcregswz(PROGRAM_TEMPORARY
, temp
, SWIZZLE_WWWW
));
589 } else if (inst
->Opcode
== OPCODE_SIN
) {
590 emit1(t
->Program
, OPCODE_SIN
, inst
->SaturateMode
,
591 inst
->DstReg
, srcregswz(PROGRAM_TEMPORARY
, temp
, SWIZZLE_WWWW
));
592 } else if (inst
->Opcode
== OPCODE_SCS
) {
593 struct prog_dst_register moddst
= inst
->DstReg
;
595 if (inst
->DstReg
.WriteMask
& WRITEMASK_X
) {
596 moddst
.WriteMask
= WRITEMASK_X
;
597 emit1(t
->Program
, OPCODE_COS
, inst
->SaturateMode
, moddst
,
598 srcregswz(PROGRAM_TEMPORARY
, temp
, SWIZZLE_WWWW
));
600 if (inst
->DstReg
.WriteMask
& WRITEMASK_Y
) {
601 moddst
.WriteMask
= WRITEMASK_Y
;
602 emit1(t
->Program
, OPCODE_SIN
, inst
->SaturateMode
, moddst
,
603 srcregswz(PROGRAM_TEMPORARY
, temp
, SWIZZLE_WWWW
));
611 * Rewrite DDX/DDY instructions to properly work with r5xx shaders.
612 * The r5xx MDH/MDV instruction provides per-quad partial derivatives.
613 * It takes the form A*B+C. A and C are set by setting src0. B should be -1.
615 * @warning This explicitly changes the form of DDX and DDY!
618 GLboolean
radeonTransformDeriv(struct radeon_transform_context
* t
,
619 struct prog_instruction
* inst
,
622 if (inst
->Opcode
!= OPCODE_DDX
&& inst
->Opcode
!= OPCODE_DDY
)
625 struct prog_src_register B
= inst
->SrcReg
[1];
627 B
.Swizzle
= MAKE_SWIZZLE4(SWIZZLE_ONE
, SWIZZLE_ONE
,
628 SWIZZLE_ONE
, SWIZZLE_ONE
);
629 B
.Negate
= NEGATE_XYZW
;
631 emit2(t
->Program
, inst
->Opcode
, inst
->SaturateMode
, inst
->DstReg
,