2 * Copyright (C) 2008 Nicolai Haehnle.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 * Shareable transformations that transform "special" ALU instructions
32 * into ALU instructions that are supported by hardware.
36 #include "radeon_program_alu.h"
38 #include "shader/prog_parameter.h"
41 static struct prog_instruction
*emit1(struct gl_program
* p
,
42 gl_inst_opcode Opcode
, GLuint Saturate
, struct prog_dst_register DstReg
,
43 struct prog_src_register SrcReg
)
45 struct prog_instruction
*fpi
= radeonAppendInstructions(p
, 1);
48 fpi
->SaturateMode
= Saturate
;
50 fpi
->SrcReg
[0] = SrcReg
;
54 static struct prog_instruction
*emit2(struct gl_program
* p
,
55 gl_inst_opcode Opcode
, GLuint Saturate
, struct prog_dst_register DstReg
,
56 struct prog_src_register SrcReg0
, struct prog_src_register SrcReg1
)
58 struct prog_instruction
*fpi
= radeonAppendInstructions(p
, 1);
61 fpi
->SaturateMode
= Saturate
;
63 fpi
->SrcReg
[0] = SrcReg0
;
64 fpi
->SrcReg
[1] = SrcReg1
;
68 static struct prog_instruction
*emit3(struct gl_program
* p
,
69 gl_inst_opcode Opcode
, GLuint Saturate
, struct prog_dst_register DstReg
,
70 struct prog_src_register SrcReg0
, struct prog_src_register SrcReg1
,
71 struct prog_src_register SrcReg2
)
73 struct prog_instruction
*fpi
= radeonAppendInstructions(p
, 1);
76 fpi
->SaturateMode
= Saturate
;
78 fpi
->SrcReg
[0] = SrcReg0
;
79 fpi
->SrcReg
[1] = SrcReg1
;
80 fpi
->SrcReg
[2] = SrcReg2
;
84 static void set_swizzle(struct prog_src_register
*SrcReg
, int coordinate
, int swz
)
86 SrcReg
->Swizzle
&= ~(7 << (3*coordinate
));
87 SrcReg
->Swizzle
|= swz
<< (3*coordinate
);
90 static void set_negate_base(struct prog_src_register
*SrcReg
, int coordinate
, int negate
)
92 SrcReg
->NegateBase
&= ~(1 << coordinate
);
93 SrcReg
->NegateBase
|= (negate
<< coordinate
);
96 static struct prog_dst_register
dstreg(int file
, int index
)
98 struct prog_dst_register dst
;
101 dst
.WriteMask
= WRITEMASK_XYZW
;
102 dst
.CondMask
= COND_TR
;
103 dst
.CondSwizzle
= SWIZZLE_NOOP
;
109 static struct prog_dst_register
dstregtmpmask(int index
, int mask
)
111 struct prog_dst_register dst
;
112 dst
.File
= PROGRAM_TEMPORARY
;
114 dst
.WriteMask
= mask
;
115 dst
.CondMask
= COND_TR
;
116 dst
.CondSwizzle
= SWIZZLE_NOOP
;
122 static const struct prog_src_register builtin_zero
= {
123 .File
= PROGRAM_BUILTIN
,
125 .Swizzle
= SWIZZLE_0000
127 static const struct prog_src_register builtin_one
= {
128 .File
= PROGRAM_BUILTIN
,
130 .Swizzle
= SWIZZLE_1111
132 static const struct prog_src_register srcreg_undefined
= {
133 .File
= PROGRAM_UNDEFINED
,
135 .Swizzle
= SWIZZLE_NOOP
138 static struct prog_src_register
srcreg(int file
, int index
)
140 struct prog_src_register src
= srcreg_undefined
;
146 static struct prog_src_register
srcregswz(int file
, int index
, int swz
)
148 struct prog_src_register src
= srcreg_undefined
;
155 static struct prog_src_register
absolute(struct prog_src_register reg
)
157 struct prog_src_register newreg
= reg
;
159 newreg
.NegateBase
= 0;
160 newreg
.NegateAbs
= 0;
164 static struct prog_src_register
negate(struct prog_src_register reg
)
166 struct prog_src_register newreg
= reg
;
167 newreg
.NegateAbs
= !newreg
.NegateAbs
;
171 static struct prog_src_register
swizzle(struct prog_src_register reg
, GLuint x
, GLuint y
, GLuint z
, GLuint w
)
173 struct prog_src_register swizzled
= reg
;
174 swizzled
.Swizzle
= MAKE_SWIZZLE4(
175 x
>= 4 ? x
: GET_SWZ(reg
.Swizzle
, x
),
176 y
>= 4 ? y
: GET_SWZ(reg
.Swizzle
, y
),
177 z
>= 4 ? z
: GET_SWZ(reg
.Swizzle
, z
),
178 w
>= 4 ? w
: GET_SWZ(reg
.Swizzle
, w
));
182 static struct prog_src_register
scalar(struct prog_src_register reg
)
184 return swizzle(reg
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
);
187 static void transform_ABS(struct radeon_transform_context
* t
,
188 struct prog_instruction
* inst
)
190 struct prog_src_register src
= inst
->SrcReg
[0];
194 emit1(t
->Program
, OPCODE_MOV
, inst
->SaturateMode
, inst
->DstReg
, src
);
197 static void transform_DPH(struct radeon_transform_context
* t
,
198 struct prog_instruction
* inst
)
200 struct prog_src_register src0
= inst
->SrcReg
[0];
201 if (src0
.NegateAbs
) {
203 int tempreg
= radeonFindFreeTemporary(t
);
204 emit1(t
->Program
, OPCODE_MOV
, 0, dstreg(PROGRAM_TEMPORARY
, tempreg
), src0
);
205 src0
= srcreg(src0
.File
, src0
.Index
);
208 src0
.NegateBase
^= NEGATE_XYZW
;
211 set_swizzle(&src0
, 3, SWIZZLE_ONE
);
212 set_negate_base(&src0
, 3, 0);
213 emit2(t
->Program
, OPCODE_DP4
, inst
->SaturateMode
, inst
->DstReg
, src0
, inst
->SrcReg
[1]);
217 * [1, src0.y*src1.y, src0.z, src1.w]
218 * So basically MUL with lotsa swizzling.
220 static void transform_DST(struct radeon_transform_context
* t
,
221 struct prog_instruction
* inst
)
223 emit2(t
->Program
, OPCODE_MUL
, inst
->SaturateMode
, inst
->DstReg
,
224 swizzle(inst
->SrcReg
[0], SWIZZLE_ONE
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_ONE
),
225 swizzle(inst
->SrcReg
[1], SWIZZLE_ONE
, SWIZZLE_Y
, SWIZZLE_ONE
, SWIZZLE_W
));
228 static void transform_FLR(struct radeon_transform_context
* t
,
229 struct prog_instruction
* inst
)
231 int tempreg
= radeonFindFreeTemporary(t
);
232 emit1(t
->Program
, OPCODE_FRC
, 0, dstreg(PROGRAM_TEMPORARY
, tempreg
), inst
->SrcReg
[0]);
233 emit2(t
->Program
, OPCODE_ADD
, inst
->SaturateMode
, inst
->DstReg
,
234 inst
->SrcReg
[0], negate(srcreg(PROGRAM_TEMPORARY
, tempreg
)));
238 * Definition of LIT (from ARB_fragment_program):
240 * tmp = VectorLoad(op0);
241 * if (tmp.x < 0) tmp.x = 0;
242 * if (tmp.y < 0) tmp.y = 0;
243 * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
244 * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
247 * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
250 * The longest path of computation is the one leading to result.z,
251 * consisting of 5 operations. This implementation of LIT takes
252 * 5 slots, if the subsequent optimization passes are clever enough
253 * to pair instructions correctly.
255 static void transform_LIT(struct radeon_transform_context
* t
,
256 struct prog_instruction
* inst
)
258 static const GLfloat LitConst
[4] = { -127.999999 };
261 GLuint constant_swizzle
;
263 int needTemporary
= 0;
264 struct prog_src_register srctemp
;
266 constant
= _mesa_add_unnamed_constant(t
->Program
->Parameters
, LitConst
, 1, &constant_swizzle
);
268 if (inst
->DstReg
.WriteMask
!= WRITEMASK_XYZW
) {
270 } else if (inst
->DstReg
.File
!= PROGRAM_TEMPORARY
) {
271 // LIT is typically followed by DP3/DP4, so there's no point
272 // in creating special code for this case
277 temp
= radeonFindFreeTemporary(t
);
279 temp
= inst
->DstReg
.Index
;
281 srctemp
= srcreg(PROGRAM_TEMPORARY
, temp
);
283 // tmp.x = max(0.0, Src.x);
284 // tmp.y = max(0.0, Src.y);
285 // tmp.w = clamp(Src.z, -128+eps, 128-eps);
286 emit2(t
->Program
, OPCODE_MAX
, 0,
287 dstregtmpmask(temp
, WRITEMASK_XYW
),
289 swizzle(srcreg(PROGRAM_CONSTANT
, constant
),
290 SWIZZLE_ZERO
, SWIZZLE_ZERO
, SWIZZLE_ZERO
, constant_swizzle
&3));
291 emit2(t
->Program
, OPCODE_MIN
, 0,
292 dstregtmpmask(temp
, WRITEMASK_Z
),
293 swizzle(srctemp
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
294 negate(srcregswz(PROGRAM_CONSTANT
, constant
, constant_swizzle
)));
296 // tmp.w = Pow(tmp.y, tmp.w)
297 emit1(t
->Program
, OPCODE_LG2
, 0,
298 dstregtmpmask(temp
, WRITEMASK_W
),
299 swizzle(srctemp
, SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
));
300 emit2(t
->Program
, OPCODE_MUL
, 0,
301 dstregtmpmask(temp
, WRITEMASK_W
),
302 swizzle(srctemp
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
303 swizzle(srctemp
, SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
));
304 emit1(t
->Program
, OPCODE_EX2
, 0,
305 dstregtmpmask(temp
, WRITEMASK_W
),
306 swizzle(srctemp
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
));
308 // tmp.z = (tmp.x > 0) ? tmp.w : 0.0
309 emit3(t
->Program
, OPCODE_CMP
, inst
->SaturateMode
,
310 dstregtmpmask(temp
, WRITEMASK_Z
),
311 negate(swizzle(srctemp
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
)),
312 swizzle(srctemp
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
315 // tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0
316 emit1(t
->Program
, OPCODE_MOV
, inst
->SaturateMode
,
317 dstregtmpmask(temp
, WRITEMASK_XYW
),
318 swizzle(srctemp
, SWIZZLE_ONE
, SWIZZLE_X
, SWIZZLE_ONE
, SWIZZLE_ONE
));
321 emit1(t
->Program
, OPCODE_MOV
, 0, inst
->DstReg
, srctemp
);
324 static void transform_LRP(struct radeon_transform_context
* t
,
325 struct prog_instruction
* inst
)
327 int tempreg
= radeonFindFreeTemporary(t
);
329 emit2(t
->Program
, OPCODE_ADD
, 0,
330 dstreg(PROGRAM_TEMPORARY
, tempreg
),
331 inst
->SrcReg
[1], negate(inst
->SrcReg
[2]));
332 emit3(t
->Program
, OPCODE_MAD
, inst
->SaturateMode
,
334 inst
->SrcReg
[0], srcreg(PROGRAM_TEMPORARY
, tempreg
), inst
->SrcReg
[2]);
337 static void transform_POW(struct radeon_transform_context
* t
,
338 struct prog_instruction
* inst
)
340 int tempreg
= radeonFindFreeTemporary(t
);
341 struct prog_dst_register tempdst
= dstreg(PROGRAM_TEMPORARY
, tempreg
);
342 struct prog_src_register tempsrc
= srcreg(PROGRAM_TEMPORARY
, tempreg
);
343 tempdst
.WriteMask
= WRITEMASK_W
;
344 tempsrc
.Swizzle
= SWIZZLE_WWWW
;
346 emit1(t
->Program
, OPCODE_LG2
, 0, tempdst
, scalar(inst
->SrcReg
[0]));
347 emit2(t
->Program
, OPCODE_MUL
, 0, tempdst
, tempsrc
, scalar(inst
->SrcReg
[1]));
348 emit1(t
->Program
, OPCODE_EX2
, inst
->SaturateMode
, inst
->DstReg
, tempsrc
);
351 static void transform_RSQ(struct radeon_transform_context
* t
,
352 struct prog_instruction
* inst
)
354 emit1(t
->Program
, OPCODE_RSQ
, inst
->SaturateMode
, inst
->DstReg
, absolute(inst
->SrcReg
[0]));
357 static void transform_SGE(struct radeon_transform_context
* t
,
358 struct prog_instruction
* inst
)
360 int tempreg
= radeonFindFreeTemporary(t
);
362 emit2(t
->Program
, OPCODE_ADD
, 0, dstreg(PROGRAM_TEMPORARY
, tempreg
), inst
->SrcReg
[0], negate(inst
->SrcReg
[1]));
363 emit3(t
->Program
, OPCODE_CMP
, inst
->SaturateMode
, inst
->DstReg
,
364 srcreg(PROGRAM_TEMPORARY
, tempreg
), builtin_zero
, builtin_one
);
367 static void transform_SLT(struct radeon_transform_context
* t
,
368 struct prog_instruction
* inst
)
370 int tempreg
= radeonFindFreeTemporary(t
);
372 emit2(t
->Program
, OPCODE_ADD
, 0, dstreg(PROGRAM_TEMPORARY
, tempreg
), inst
->SrcReg
[0], negate(inst
->SrcReg
[1]));
373 emit3(t
->Program
, OPCODE_CMP
, inst
->SaturateMode
, inst
->DstReg
,
374 srcreg(PROGRAM_TEMPORARY
, tempreg
), builtin_one
, builtin_zero
);
377 static void transform_SUB(struct radeon_transform_context
* t
,
378 struct prog_instruction
* inst
)
380 emit2(t
->Program
, OPCODE_ADD
, inst
->SaturateMode
, inst
->DstReg
, inst
->SrcReg
[0], negate(inst
->SrcReg
[1]));
383 static void transform_SWZ(struct radeon_transform_context
* t
,
384 struct prog_instruction
* inst
)
386 emit1(t
->Program
, OPCODE_MOV
, inst
->SaturateMode
, inst
->DstReg
, inst
->SrcReg
[0]);
389 static void transform_XPD(struct radeon_transform_context
* t
,
390 struct prog_instruction
* inst
)
392 int tempreg
= radeonFindFreeTemporary(t
);
394 emit2(t
->Program
, OPCODE_MUL
, 0, dstreg(PROGRAM_TEMPORARY
, tempreg
),
395 swizzle(inst
->SrcReg
[0], SWIZZLE_Z
, SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_W
),
396 swizzle(inst
->SrcReg
[1], SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_X
, SWIZZLE_W
));
397 emit3(t
->Program
, OPCODE_MAD
, inst
->SaturateMode
, inst
->DstReg
,
398 swizzle(inst
->SrcReg
[0], SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_X
, SWIZZLE_W
),
399 swizzle(inst
->SrcReg
[1], SWIZZLE_Z
, SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_W
),
400 negate(srcreg(PROGRAM_TEMPORARY
, tempreg
)));
405 * Can be used as a transformation for @ref radeonClauseLocalTransform,
406 * no userData necessary.
408 * Eliminates the following ALU instructions:
409 * ABS, DPH, DST, FLR, LIT, LRP, POW, SGE, SLT, SUB, SWZ, XPD
411 * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP
413 * Transforms RSQ to Radeon's native RSQ by explicitly setting
416 * @note should be applicable to R300 and R500 fragment programs.
418 GLboolean
radeonTransformALU(struct radeon_transform_context
* t
,
419 struct prog_instruction
* inst
,
422 switch(inst
->Opcode
) {
423 case OPCODE_ABS
: transform_ABS(t
, inst
); return GL_TRUE
;
424 case OPCODE_DPH
: transform_DPH(t
, inst
); return GL_TRUE
;
425 case OPCODE_DST
: transform_DST(t
, inst
); return GL_TRUE
;
426 case OPCODE_FLR
: transform_FLR(t
, inst
); return GL_TRUE
;
427 case OPCODE_LIT
: transform_LIT(t
, inst
); return GL_TRUE
;
428 case OPCODE_LRP
: transform_LRP(t
, inst
); return GL_TRUE
;
429 case OPCODE_POW
: transform_POW(t
, inst
); return GL_TRUE
;
430 case OPCODE_RSQ
: transform_RSQ(t
, inst
); return GL_TRUE
;
431 case OPCODE_SGE
: transform_SGE(t
, inst
); return GL_TRUE
;
432 case OPCODE_SLT
: transform_SLT(t
, inst
); return GL_TRUE
;
433 case OPCODE_SUB
: transform_SUB(t
, inst
); return GL_TRUE
;
434 case OPCODE_SWZ
: transform_SWZ(t
, inst
); return GL_TRUE
;
435 case OPCODE_XPD
: transform_XPD(t
, inst
); return GL_TRUE
;
442 static void sincos_constants(struct radeon_transform_context
* t
, GLuint
*constants
)
444 static const GLfloat SinCosConsts
[2][4] = {
447 -0.405284735, // -4/(PI*PI)
454 0.159154943, // 1/(2*PI)
460 for(i
= 0; i
< 2; ++i
) {
462 constants
[i
] = _mesa_add_unnamed_constant(t
->Program
->Parameters
, SinCosConsts
[i
], 4, &swz
);
463 ASSERT(swz
== SWIZZLE_NOOP
);
468 * Approximate sin(x), where x is clamped to (-pi/2, pi/2).
470 * MUL tmp.xy, src, { 4/PI, -4/(PI^2) }
471 * MAD tmp.x, tmp.y, |src|, tmp.x
472 * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x
473 * MAD dest, tmp.y, weight, tmp.x
475 static void sin_approx(struct radeon_transform_context
* t
,
476 struct prog_dst_register dst
, struct prog_src_register src
, const GLuint
* constants
)
478 GLuint tempreg
= radeonFindFreeTemporary(t
);
480 emit2(t
->Program
, OPCODE_MUL
, 0, dstregtmpmask(tempreg
, WRITEMASK_XY
),
481 swizzle(src
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
),
482 srcreg(PROGRAM_CONSTANT
, constants
[0]));
483 emit3(t
->Program
, OPCODE_MAD
, 0, dstregtmpmask(tempreg
, WRITEMASK_X
),
484 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
),
485 absolute(swizzle(src
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
)),
486 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
));
487 emit3(t
->Program
, OPCODE_MAD
, 0, dstregtmpmask(tempreg
, WRITEMASK_Y
),
488 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
),
489 absolute(swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
)),
490 negate(swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
)));
491 emit3(t
->Program
, OPCODE_MAD
, 0, dst
,
492 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
),
493 swizzle(srcreg(PROGRAM_CONSTANT
, constants
[0]), SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
494 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
));
498 * Translate the trigonometric functions COS, SIN, and SCS
499 * using only the basic instructions
500 * MOV, ADD, MUL, MAD, FRC
502 GLboolean
radeonTransformTrigSimple(struct radeon_transform_context
* t
,
503 struct prog_instruction
* inst
,
506 if (inst
->Opcode
!= OPCODE_COS
&&
507 inst
->Opcode
!= OPCODE_SIN
&&
508 inst
->Opcode
!= OPCODE_SCS
)
512 GLuint tempreg
= radeonFindFreeTemporary(t
);
514 sincos_constants(t
, constants
);
516 if (inst
->Opcode
== OPCODE_COS
) {
517 // MAD tmp.x, src, 1/(2*PI), 0.75
519 // MAD tmp.z, tmp.x, 2*PI, -PI
520 emit3(t
->Program
, OPCODE_MAD
, 0, dstregtmpmask(tempreg
, WRITEMASK_W
),
521 swizzle(inst
->SrcReg
[0], SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
),
522 swizzle(srcreg(PROGRAM_CONSTANT
, constants
[1]), SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
),
523 swizzle(srcreg(PROGRAM_CONSTANT
, constants
[1]), SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
));
524 emit1(t
->Program
, OPCODE_FRC
, 0, dstregtmpmask(tempreg
, WRITEMASK_W
),
525 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
));
526 emit3(t
->Program
, OPCODE_MAD
, 0, dstregtmpmask(tempreg
, WRITEMASK_W
),
527 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
528 swizzle(srcreg(PROGRAM_CONSTANT
, constants
[1]), SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
529 negate(swizzle(srcreg(PROGRAM_CONSTANT
, constants
[0]), SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
)));
531 sin_approx(t
, inst
->DstReg
,
532 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
534 } else if (inst
->Opcode
== OPCODE_SIN
) {
535 emit3(t
->Program
, OPCODE_MAD
, 0, dstregtmpmask(tempreg
, WRITEMASK_W
),
536 swizzle(inst
->SrcReg
[0], SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
),
537 swizzle(srcreg(PROGRAM_CONSTANT
, constants
[1]), SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
),
538 swizzle(srcreg(PROGRAM_CONSTANT
, constants
[1]), SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
));
539 emit1(t
->Program
, OPCODE_FRC
, 0, dstregtmpmask(tempreg
, WRITEMASK_W
),
540 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
));
541 emit3(t
->Program
, OPCODE_MAD
, 0, dstregtmpmask(tempreg
, WRITEMASK_W
),
542 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
543 swizzle(srcreg(PROGRAM_CONSTANT
, constants
[1]), SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
544 negate(swizzle(srcreg(PROGRAM_CONSTANT
, constants
[0]), SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
)));
546 sin_approx(t
, inst
->DstReg
,
547 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
550 emit3(t
->Program
, OPCODE_MAD
, 0, dstregtmpmask(tempreg
, WRITEMASK_XY
),
551 swizzle(inst
->SrcReg
[0], SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
),
552 swizzle(srcreg(PROGRAM_CONSTANT
, constants
[1]), SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
),
553 swizzle(srcreg(PROGRAM_CONSTANT
, constants
[1]), SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_W
));
554 emit1(t
->Program
, OPCODE_FRC
, 0, dstregtmpmask(tempreg
, WRITEMASK_XY
),
555 srcreg(PROGRAM_TEMPORARY
, tempreg
));
556 emit3(t
->Program
, OPCODE_MAD
, 0, dstregtmpmask(tempreg
, WRITEMASK_XY
),
557 srcreg(PROGRAM_TEMPORARY
, tempreg
),
558 swizzle(srcreg(PROGRAM_CONSTANT
, constants
[1]), SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
),
559 negate(swizzle(srcreg(PROGRAM_CONSTANT
, constants
[0]), SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
, SWIZZLE_Z
)));
561 struct prog_dst_register dst
= inst
->DstReg
;
563 dst
.WriteMask
= inst
->DstReg
.WriteMask
& WRITEMASK_X
;
565 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
),
568 dst
.WriteMask
= inst
->DstReg
.WriteMask
& WRITEMASK_Y
;
570 swizzle(srcreg(PROGRAM_TEMPORARY
, tempreg
), SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
),
579 * Transform the trigonometric functions COS, SIN, and SCS
580 * to include pre-scaling by 1/(2*PI) and taking the fractional
581 * part, so that the input to COS and SIN is always in the range [0,1).
582 * SCS is replaced by one COS and one SIN instruction.
584 * @warning This transformation implicitly changes the semantics of SIN and COS!
586 GLboolean
radeonTransformTrigScale(struct radeon_transform_context
* t
,
587 struct prog_instruction
* inst
,
590 if (inst
->Opcode
!= OPCODE_COS
&&
591 inst
->Opcode
!= OPCODE_SIN
&&
592 inst
->Opcode
!= OPCODE_SCS
)
595 static const GLfloat RCP_2PI
[] = { 0.15915494309189535 };
598 GLuint constant_swizzle
;
600 temp
= radeonFindFreeTemporary(t
);
601 constant
= _mesa_add_unnamed_constant(t
->Program
->Parameters
, RCP_2PI
, 1, &constant_swizzle
);
603 emit2(t
->Program
, OPCODE_MUL
, 0, dstregtmpmask(temp
, WRITEMASK_W
),
604 swizzle(inst
->SrcReg
[0], SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
),
605 srcregswz(PROGRAM_CONSTANT
, constant
, constant_swizzle
));
606 emit1(t
->Program
, OPCODE_FRC
, 0, dstregtmpmask(temp
, WRITEMASK_W
),
607 srcreg(PROGRAM_TEMPORARY
, temp
));
609 if (inst
->Opcode
== OPCODE_COS
) {
610 emit1(t
->Program
, OPCODE_COS
, inst
->SaturateMode
, inst
->DstReg
,
611 srcregswz(PROGRAM_TEMPORARY
, temp
, SWIZZLE_WWWW
));
612 } else if (inst
->Opcode
== OPCODE_SIN
) {
613 emit1(t
->Program
, OPCODE_SIN
, inst
->SaturateMode
,
614 inst
->DstReg
, srcregswz(PROGRAM_TEMPORARY
, temp
, SWIZZLE_WWWW
));
615 } else if (inst
->Opcode
== OPCODE_SCS
) {
616 struct prog_dst_register moddst
= inst
->DstReg
;
618 if (inst
->DstReg
.WriteMask
& WRITEMASK_X
) {
619 moddst
.WriteMask
= WRITEMASK_X
;
620 emit1(t
->Program
, OPCODE_COS
, inst
->SaturateMode
, moddst
,
621 srcregswz(PROGRAM_TEMPORARY
, temp
, SWIZZLE_WWWW
));
623 if (inst
->DstReg
.WriteMask
& WRITEMASK_Y
) {
624 moddst
.WriteMask
= WRITEMASK_Y
;
625 emit1(t
->Program
, OPCODE_SIN
, inst
->SaturateMode
, moddst
,
626 srcregswz(PROGRAM_TEMPORARY
, temp
, SWIZZLE_WWWW
));
634 * Rewrite DDX/DDY instructions to properly work with r5xx shaders.
635 * The r5xx MDH/MDV instruction provides per-quad partial derivatives.
636 * It takes the form A*B+C. A and C are set by setting src0. B should be -1.
638 * @warning This explicitly changes the form of DDX and DDY!
641 GLboolean
radeonTransformDeriv(struct radeon_transform_context
* t
,
642 struct prog_instruction
* inst
,
645 if (inst
->Opcode
!= OPCODE_DDX
&& inst
->Opcode
!= OPCODE_DDY
)
648 struct prog_src_register B
= inst
->SrcReg
[1];
650 B
.Swizzle
= MAKE_SWIZZLE4(SWIZZLE_ONE
, SWIZZLE_ONE
,
651 SWIZZLE_ONE
, SWIZZLE_ONE
);
652 B
.NegateBase
= NEGATE_XYZW
;
654 emit2(t
->Program
, inst
->Opcode
, inst
->SaturateMode
, inst
->DstReg
,