2 * Copyright (C) 2008 Nicolai Haehnle.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 * Shareable transformations that transform "special" ALU instructions
32 * into ALU instructions that are supported by hardware.
36 #include "radeon_program_alu.h"
38 #include "radeon_compiler.h"
41 static struct rc_instruction
*emit1(
42 struct radeon_compiler
* c
, struct rc_instruction
* after
,
43 rc_opcode Opcode
, rc_saturate_mode Saturate
, struct rc_dst_register DstReg
,
44 struct rc_src_register SrcReg
)
46 struct rc_instruction
*fpi
= rc_insert_new_instruction(c
, after
);
48 fpi
->U
.I
.Opcode
= Opcode
;
49 fpi
->U
.I
.SaturateMode
= Saturate
;
50 fpi
->U
.I
.DstReg
= DstReg
;
51 fpi
->U
.I
.SrcReg
[0] = SrcReg
;
55 static struct rc_instruction
*emit2(
56 struct radeon_compiler
* c
, struct rc_instruction
* after
,
57 rc_opcode Opcode
, rc_saturate_mode Saturate
, struct rc_dst_register DstReg
,
58 struct rc_src_register SrcReg0
, struct rc_src_register SrcReg1
)
60 struct rc_instruction
*fpi
= rc_insert_new_instruction(c
, after
);
62 fpi
->U
.I
.Opcode
= Opcode
;
63 fpi
->U
.I
.SaturateMode
= Saturate
;
64 fpi
->U
.I
.DstReg
= DstReg
;
65 fpi
->U
.I
.SrcReg
[0] = SrcReg0
;
66 fpi
->U
.I
.SrcReg
[1] = SrcReg1
;
70 static struct rc_instruction
*emit3(
71 struct radeon_compiler
* c
, struct rc_instruction
* after
,
72 rc_opcode Opcode
, rc_saturate_mode Saturate
, struct rc_dst_register DstReg
,
73 struct rc_src_register SrcReg0
, struct rc_src_register SrcReg1
,
74 struct rc_src_register SrcReg2
)
76 struct rc_instruction
*fpi
= rc_insert_new_instruction(c
, after
);
78 fpi
->U
.I
.Opcode
= Opcode
;
79 fpi
->U
.I
.SaturateMode
= Saturate
;
80 fpi
->U
.I
.DstReg
= DstReg
;
81 fpi
->U
.I
.SrcReg
[0] = SrcReg0
;
82 fpi
->U
.I
.SrcReg
[1] = SrcReg1
;
83 fpi
->U
.I
.SrcReg
[2] = SrcReg2
;
87 static struct rc_dst_register
dstreg(int file
, int index
)
89 struct rc_dst_register dst
;
92 dst
.WriteMask
= RC_MASK_XYZW
;
97 static struct rc_dst_register
dstregtmpmask(int index
, int mask
)
99 struct rc_dst_register dst
= {0};
100 dst
.File
= RC_FILE_TEMPORARY
;
102 dst
.WriteMask
= mask
;
107 static const struct rc_src_register builtin_zero
= {
108 .File
= RC_FILE_NONE
,
110 .Swizzle
= RC_SWIZZLE_0000
112 static const struct rc_src_register builtin_one
= {
113 .File
= RC_FILE_NONE
,
115 .Swizzle
= RC_SWIZZLE_1111
117 static const struct rc_src_register srcreg_undefined
= {
118 .File
= RC_FILE_NONE
,
120 .Swizzle
= RC_SWIZZLE_XYZW
123 static struct rc_src_register
srcreg(int file
, int index
)
125 struct rc_src_register src
= srcreg_undefined
;
131 static struct rc_src_register
srcregswz(int file
, int index
, int swz
)
133 struct rc_src_register src
= srcreg_undefined
;
140 static struct rc_src_register
absolute(struct rc_src_register reg
)
142 struct rc_src_register newreg
= reg
;
144 newreg
.Negate
= RC_MASK_NONE
;
148 static struct rc_src_register
negate(struct rc_src_register reg
)
150 struct rc_src_register newreg
= reg
;
151 newreg
.Negate
= newreg
.Negate
^ RC_MASK_XYZW
;
155 static struct rc_src_register
swizzle(struct rc_src_register reg
,
156 rc_swizzle x
, rc_swizzle y
, rc_swizzle z
, rc_swizzle w
)
158 struct rc_src_register swizzled
= reg
;
159 swizzled
.Swizzle
= combine_swizzles4(reg
.Swizzle
, x
, y
, z
, w
);
163 static struct rc_src_register
swizzle_smear(struct rc_src_register reg
,
166 return swizzle(reg
, x
, x
, x
, x
);
169 static struct rc_src_register
swizzle_xxxx(struct rc_src_register reg
)
171 return swizzle_smear(reg
, RC_SWIZZLE_X
);
174 static struct rc_src_register
swizzle_yyyy(struct rc_src_register reg
)
176 return swizzle_smear(reg
, RC_SWIZZLE_Y
);
179 static struct rc_src_register
swizzle_zzzz(struct rc_src_register reg
)
181 return swizzle_smear(reg
, RC_SWIZZLE_Z
);
184 static struct rc_src_register
swizzle_wwww(struct rc_src_register reg
)
186 return swizzle_smear(reg
, RC_SWIZZLE_W
);
189 static void transform_ABS(struct radeon_compiler
* c
,
190 struct rc_instruction
* inst
)
192 struct rc_src_register src
= inst
->U
.I
.SrcReg
[0];
194 src
.Negate
= RC_MASK_NONE
;
195 emit1(c
, inst
->Prev
, RC_OPCODE_MOV
, inst
->U
.I
.SaturateMode
, inst
->U
.I
.DstReg
, src
);
196 rc_remove_instruction(inst
);
199 static void transform_CEIL(struct radeon_compiler
* c
,
200 struct rc_instruction
* inst
)
203 * ceil(x) = -floor(-x)
205 * After inlining floor:
206 * ceil(x) = -(-x-frac(-x))
208 * After simplification:
209 * ceil(x) = x+frac(-x)
212 int tempreg
= rc_find_free_temporary(c
);
213 emit1(c
, inst
->Prev
, RC_OPCODE_FRC
, 0, dstreg(RC_FILE_TEMPORARY
, tempreg
), negate(inst
->U
.I
.SrcReg
[0]));
214 emit2(c
, inst
->Prev
, RC_OPCODE_ADD
, inst
->U
.I
.SaturateMode
, inst
->U
.I
.DstReg
,
215 inst
->U
.I
.SrcReg
[0], srcreg(RC_FILE_TEMPORARY
, tempreg
));
216 rc_remove_instruction(inst
);
219 static void transform_DP2(struct radeon_compiler
* c
,
220 struct rc_instruction
* inst
)
222 struct rc_src_register src0
= inst
->U
.I
.SrcReg
[0];
223 struct rc_src_register src1
= inst
->U
.I
.SrcReg
[1];
224 src0
.Negate
&= ~(RC_MASK_Z
| RC_MASK_W
);
225 src0
.Swizzle
&= ~(63 << (3 * 2));
226 src0
.Swizzle
|= (RC_SWIZZLE_ZERO
<< (3 * 2)) | (RC_SWIZZLE_ZERO
<< (3 * 3));
227 src1
.Negate
&= ~(RC_MASK_Z
| RC_MASK_W
);
228 src1
.Swizzle
&= ~(63 << (3 * 2));
229 src1
.Swizzle
|= (RC_SWIZZLE_ZERO
<< (3 * 2)) | (RC_SWIZZLE_ZERO
<< (3 * 3));
230 emit2(c
, inst
->Prev
, RC_OPCODE_DP3
, inst
->U
.I
.SaturateMode
, inst
->U
.I
.DstReg
, src0
, src1
);
231 rc_remove_instruction(inst
);
234 static void transform_DPH(struct radeon_compiler
* c
,
235 struct rc_instruction
* inst
)
237 struct rc_src_register src0
= inst
->U
.I
.SrcReg
[0];
238 src0
.Negate
&= ~RC_MASK_W
;
239 src0
.Swizzle
&= ~(7 << (3 * 3));
240 src0
.Swizzle
|= RC_SWIZZLE_ONE
<< (3 * 3);
241 emit2(c
, inst
->Prev
, RC_OPCODE_DP4
, inst
->U
.I
.SaturateMode
, inst
->U
.I
.DstReg
, src0
, inst
->U
.I
.SrcReg
[1]);
242 rc_remove_instruction(inst
);
246 * [1, src0.y*src1.y, src0.z, src1.w]
247 * So basically MUL with lotsa swizzling.
249 static void transform_DST(struct radeon_compiler
* c
,
250 struct rc_instruction
* inst
)
252 emit2(c
, inst
->Prev
, RC_OPCODE_MUL
, inst
->U
.I
.SaturateMode
, inst
->U
.I
.DstReg
,
253 swizzle(inst
->U
.I
.SrcReg
[0], RC_SWIZZLE_ONE
, RC_SWIZZLE_Y
, RC_SWIZZLE_Z
, RC_SWIZZLE_ONE
),
254 swizzle(inst
->U
.I
.SrcReg
[1], RC_SWIZZLE_ONE
, RC_SWIZZLE_Y
, RC_SWIZZLE_ONE
, RC_SWIZZLE_W
));
255 rc_remove_instruction(inst
);
258 static void transform_FLR(struct radeon_compiler
* c
,
259 struct rc_instruction
* inst
)
261 int tempreg
= rc_find_free_temporary(c
);
262 emit1(c
, inst
->Prev
, RC_OPCODE_FRC
, 0, dstreg(RC_FILE_TEMPORARY
, tempreg
), inst
->U
.I
.SrcReg
[0]);
263 emit2(c
, inst
->Prev
, RC_OPCODE_ADD
, inst
->U
.I
.SaturateMode
, inst
->U
.I
.DstReg
,
264 inst
->U
.I
.SrcReg
[0], negate(srcreg(RC_FILE_TEMPORARY
, tempreg
)));
265 rc_remove_instruction(inst
);
269 * Definition of LIT (from ARB_fragment_program):
271 * tmp = VectorLoad(op0);
272 * if (tmp.x < 0) tmp.x = 0;
273 * if (tmp.y < 0) tmp.y = 0;
274 * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
275 * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
278 * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
281 * The longest path of computation is the one leading to result.z,
282 * consisting of 5 operations. This implementation of LIT takes
283 * 5 slots, if the subsequent optimization passes are clever enough
284 * to pair instructions correctly.
286 static void transform_LIT(struct radeon_compiler
* c
,
287 struct rc_instruction
* inst
)
289 unsigned int constant
;
290 unsigned int constant_swizzle
;
292 struct rc_src_register srctemp
;
294 constant
= rc_constants_add_immediate_scalar(&c
->Program
.Constants
, -127.999999, &constant_swizzle
);
296 if (inst
->U
.I
.DstReg
.WriteMask
!= RC_MASK_XYZW
|| inst
->U
.I
.DstReg
.File
!= RC_FILE_TEMPORARY
) {
297 struct rc_instruction
* inst_mov
;
299 inst_mov
= emit1(c
, inst
,
300 RC_OPCODE_MOV
, 0, inst
->U
.I
.DstReg
,
301 srcreg(RC_FILE_TEMPORARY
, rc_find_free_temporary(c
)));
303 inst
->U
.I
.DstReg
.File
= RC_FILE_TEMPORARY
;
304 inst
->U
.I
.DstReg
.Index
= inst_mov
->U
.I
.SrcReg
[0].Index
;
305 inst
->U
.I
.DstReg
.WriteMask
= RC_MASK_XYZW
;
308 temp
= inst
->U
.I
.DstReg
.Index
;
309 srctemp
= srcreg(RC_FILE_TEMPORARY
, temp
);
311 /* tmp.x = max(0.0, Src.x); */
312 /* tmp.y = max(0.0, Src.y); */
313 /* tmp.w = clamp(Src.z, -128+eps, 128-eps); */
314 emit2(c
, inst
->Prev
, RC_OPCODE_MAX
, 0,
315 dstregtmpmask(temp
, RC_MASK_XYW
),
317 swizzle(srcreg(RC_FILE_CONSTANT
, constant
),
318 RC_SWIZZLE_ZERO
, RC_SWIZZLE_ZERO
, RC_SWIZZLE_ZERO
, constant_swizzle
&3));
319 emit2(c
, inst
->Prev
, RC_OPCODE_MIN
, 0,
320 dstregtmpmask(temp
, RC_MASK_Z
),
321 swizzle_wwww(srctemp
),
322 negate(srcregswz(RC_FILE_CONSTANT
, constant
, constant_swizzle
)));
324 /* tmp.w = Pow(tmp.y, tmp.w) */
325 emit1(c
, inst
->Prev
, RC_OPCODE_LG2
, 0,
326 dstregtmpmask(temp
, RC_MASK_W
),
327 swizzle_yyyy(srctemp
));
328 emit2(c
, inst
->Prev
, RC_OPCODE_MUL
, 0,
329 dstregtmpmask(temp
, RC_MASK_W
),
330 swizzle_wwww(srctemp
),
331 swizzle_zzzz(srctemp
));
332 emit1(c
, inst
->Prev
, RC_OPCODE_EX2
, 0,
333 dstregtmpmask(temp
, RC_MASK_W
),
334 swizzle_wwww(srctemp
));
336 /* tmp.z = (tmp.x > 0) ? tmp.w : 0.0 */
337 emit3(c
, inst
->Prev
, RC_OPCODE_CMP
, inst
->U
.I
.SaturateMode
,
338 dstregtmpmask(temp
, RC_MASK_Z
),
339 negate(swizzle_xxxx(srctemp
)),
340 swizzle_wwww(srctemp
),
343 /* tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 */
344 emit1(c
, inst
->Prev
, RC_OPCODE_MOV
, inst
->U
.I
.SaturateMode
,
345 dstregtmpmask(temp
, RC_MASK_XYW
),
346 swizzle(srctemp
, RC_SWIZZLE_ONE
, RC_SWIZZLE_X
, RC_SWIZZLE_ONE
, RC_SWIZZLE_ONE
));
348 rc_remove_instruction(inst
);
351 static void transform_LRP(struct radeon_compiler
* c
,
352 struct rc_instruction
* inst
)
354 int tempreg
= rc_find_free_temporary(c
);
356 emit2(c
, inst
->Prev
, RC_OPCODE_ADD
, 0,
357 dstreg(RC_FILE_TEMPORARY
, tempreg
),
358 inst
->U
.I
.SrcReg
[1], negate(inst
->U
.I
.SrcReg
[2]));
359 emit3(c
, inst
->Prev
, RC_OPCODE_MAD
, inst
->U
.I
.SaturateMode
,
361 inst
->U
.I
.SrcReg
[0], srcreg(RC_FILE_TEMPORARY
, tempreg
), inst
->U
.I
.SrcReg
[2]);
363 rc_remove_instruction(inst
);
366 static void transform_POW(struct radeon_compiler
* c
,
367 struct rc_instruction
* inst
)
369 int tempreg
= rc_find_free_temporary(c
);
370 struct rc_dst_register tempdst
= dstreg(RC_FILE_TEMPORARY
, tempreg
);
371 struct rc_src_register tempsrc
= srcreg(RC_FILE_TEMPORARY
, tempreg
);
372 tempdst
.WriteMask
= RC_MASK_W
;
373 tempsrc
.Swizzle
= RC_SWIZZLE_WWWW
;
375 emit1(c
, inst
->Prev
, RC_OPCODE_LG2
, 0, tempdst
, swizzle_xxxx(inst
->U
.I
.SrcReg
[0]));
376 emit2(c
, inst
->Prev
, RC_OPCODE_MUL
, 0, tempdst
, tempsrc
, swizzle_xxxx(inst
->U
.I
.SrcReg
[1]));
377 emit1(c
, inst
->Prev
, RC_OPCODE_EX2
, inst
->U
.I
.SaturateMode
, inst
->U
.I
.DstReg
, tempsrc
);
379 rc_remove_instruction(inst
);
382 static void transform_RSQ(struct radeon_compiler
* c
,
383 struct rc_instruction
* inst
)
385 inst
->U
.I
.SrcReg
[0] = absolute(inst
->U
.I
.SrcReg
[0]);
388 static void transform_SEQ(struct radeon_compiler
* c
,
389 struct rc_instruction
* inst
)
391 int tempreg
= rc_find_free_temporary(c
);
393 emit2(c
, inst
->Prev
, RC_OPCODE_ADD
, 0, dstreg(RC_FILE_TEMPORARY
, tempreg
), inst
->U
.I
.SrcReg
[0], negate(inst
->U
.I
.SrcReg
[1]));
394 emit3(c
, inst
->Prev
, RC_OPCODE_CMP
, inst
->U
.I
.SaturateMode
, inst
->U
.I
.DstReg
,
395 negate(absolute(srcreg(RC_FILE_TEMPORARY
, tempreg
))), builtin_zero
, builtin_one
);
397 rc_remove_instruction(inst
);
400 static void transform_SFL(struct radeon_compiler
* c
,
401 struct rc_instruction
* inst
)
403 emit1(c
, inst
->Prev
, RC_OPCODE_MOV
, inst
->U
.I
.SaturateMode
, inst
->U
.I
.DstReg
, builtin_zero
);
404 rc_remove_instruction(inst
);
407 static void transform_SGE(struct radeon_compiler
* c
,
408 struct rc_instruction
* inst
)
410 int tempreg
= rc_find_free_temporary(c
);
412 emit2(c
, inst
->Prev
, RC_OPCODE_ADD
, 0, dstreg(RC_FILE_TEMPORARY
, tempreg
), inst
->U
.I
.SrcReg
[0], negate(inst
->U
.I
.SrcReg
[1]));
413 emit3(c
, inst
->Prev
, RC_OPCODE_CMP
, inst
->U
.I
.SaturateMode
, inst
->U
.I
.DstReg
,
414 srcreg(RC_FILE_TEMPORARY
, tempreg
), builtin_zero
, builtin_one
);
416 rc_remove_instruction(inst
);
419 static void transform_SGT(struct radeon_compiler
* c
,
420 struct rc_instruction
* inst
)
422 int tempreg
= rc_find_free_temporary(c
);
424 emit2(c
, inst
->Prev
, RC_OPCODE_ADD
, 0, dstreg(RC_FILE_TEMPORARY
, tempreg
), negate(inst
->U
.I
.SrcReg
[0]), inst
->U
.I
.SrcReg
[1]);
425 emit3(c
, inst
->Prev
, RC_OPCODE_CMP
, inst
->U
.I
.SaturateMode
, inst
->U
.I
.DstReg
,
426 srcreg(RC_FILE_TEMPORARY
, tempreg
), builtin_one
, builtin_zero
);
428 rc_remove_instruction(inst
);
431 static void transform_SLE(struct radeon_compiler
* c
,
432 struct rc_instruction
* inst
)
434 int tempreg
= rc_find_free_temporary(c
);
436 emit2(c
, inst
->Prev
, RC_OPCODE_ADD
, 0, dstreg(RC_FILE_TEMPORARY
, tempreg
), negate(inst
->U
.I
.SrcReg
[0]), inst
->U
.I
.SrcReg
[1]);
437 emit3(c
, inst
->Prev
, RC_OPCODE_CMP
, inst
->U
.I
.SaturateMode
, inst
->U
.I
.DstReg
,
438 srcreg(RC_FILE_TEMPORARY
, tempreg
), builtin_zero
, builtin_one
);
440 rc_remove_instruction(inst
);
443 static void transform_SLT(struct radeon_compiler
* c
,
444 struct rc_instruction
* inst
)
446 int tempreg
= rc_find_free_temporary(c
);
448 emit2(c
, inst
->Prev
, RC_OPCODE_ADD
, 0, dstreg(RC_FILE_TEMPORARY
, tempreg
), inst
->U
.I
.SrcReg
[0], negate(inst
->U
.I
.SrcReg
[1]));
449 emit3(c
, inst
->Prev
, RC_OPCODE_CMP
, inst
->U
.I
.SaturateMode
, inst
->U
.I
.DstReg
,
450 srcreg(RC_FILE_TEMPORARY
, tempreg
), builtin_one
, builtin_zero
);
452 rc_remove_instruction(inst
);
455 static void transform_SNE(struct radeon_compiler
* c
,
456 struct rc_instruction
* inst
)
458 int tempreg
= rc_find_free_temporary(c
);
460 emit2(c
, inst
->Prev
, RC_OPCODE_ADD
, 0, dstreg(RC_FILE_TEMPORARY
, tempreg
), inst
->U
.I
.SrcReg
[0], negate(inst
->U
.I
.SrcReg
[1]));
461 emit3(c
, inst
->Prev
, RC_OPCODE_CMP
, inst
->U
.I
.SaturateMode
, inst
->U
.I
.DstReg
,
462 negate(absolute(srcreg(RC_FILE_TEMPORARY
, tempreg
))), builtin_one
, builtin_zero
);
464 rc_remove_instruction(inst
);
467 static void transform_SSG(struct radeon_compiler
* c
,
468 struct rc_instruction
* inst
)
474 * ADD result, tmp0, -tmp1;
479 tmp0
= rc_find_free_temporary(c
);
480 emit3(c
, inst
->Prev
, RC_OPCODE_CMP
, 0,
481 dstregtmpmask(tmp0
, inst
->U
.I
.DstReg
.WriteMask
),
482 negate(inst
->U
.I
.SrcReg
[0]),
487 tmp1
= rc_find_free_temporary(c
);
488 emit3(c
, inst
->Prev
, RC_OPCODE_CMP
, 0,
489 dstregtmpmask(tmp1
, inst
->U
.I
.DstReg
.WriteMask
),
494 /* Either both are zero, or one of them is one and the other is zero. */
495 /* result = tmp0 - tmp1 */
496 emit2(c
, inst
->Prev
, RC_OPCODE_ADD
, 0,
498 srcreg(RC_FILE_TEMPORARY
, tmp0
),
499 negate(srcreg(RC_FILE_TEMPORARY
, tmp1
)));
501 rc_remove_instruction(inst
);
504 static void transform_SUB(struct radeon_compiler
* c
,
505 struct rc_instruction
* inst
)
507 inst
->U
.I
.Opcode
= RC_OPCODE_ADD
;
508 inst
->U
.I
.SrcReg
[1] = negate(inst
->U
.I
.SrcReg
[1]);
511 static void transform_SWZ(struct radeon_compiler
* c
,
512 struct rc_instruction
* inst
)
514 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
517 static void transform_XPD(struct radeon_compiler
* c
,
518 struct rc_instruction
* inst
)
520 int tempreg
= rc_find_free_temporary(c
);
522 emit2(c
, inst
->Prev
, RC_OPCODE_MUL
, 0, dstreg(RC_FILE_TEMPORARY
, tempreg
),
523 swizzle(inst
->U
.I
.SrcReg
[0], RC_SWIZZLE_Z
, RC_SWIZZLE_X
, RC_SWIZZLE_Y
, RC_SWIZZLE_W
),
524 swizzle(inst
->U
.I
.SrcReg
[1], RC_SWIZZLE_Y
, RC_SWIZZLE_Z
, RC_SWIZZLE_X
, RC_SWIZZLE_W
));
525 emit3(c
, inst
->Prev
, RC_OPCODE_MAD
, inst
->U
.I
.SaturateMode
, inst
->U
.I
.DstReg
,
526 swizzle(inst
->U
.I
.SrcReg
[0], RC_SWIZZLE_Y
, RC_SWIZZLE_Z
, RC_SWIZZLE_X
, RC_SWIZZLE_W
),
527 swizzle(inst
->U
.I
.SrcReg
[1], RC_SWIZZLE_Z
, RC_SWIZZLE_X
, RC_SWIZZLE_Y
, RC_SWIZZLE_W
),
528 negate(srcreg(RC_FILE_TEMPORARY
, tempreg
)));
530 rc_remove_instruction(inst
);
535 * Can be used as a transformation for @ref radeonClauseLocalTransform,
536 * no userData necessary.
538 * Eliminates the following ALU instructions:
539 * ABS, CEIL, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD
541 * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP
543 * Transforms RSQ to Radeon's native RSQ by explicitly setting
546 * @note should be applicable to R300 and R500 fragment programs.
548 int radeonTransformALU(
549 struct radeon_compiler
* c
,
550 struct rc_instruction
* inst
,
553 switch(inst
->U
.I
.Opcode
) {
554 case RC_OPCODE_ABS
: transform_ABS(c
, inst
); return 1;
555 case RC_OPCODE_CEIL
: transform_CEIL(c
, inst
); return 1;
556 case RC_OPCODE_DP2
: transform_DP2(c
, inst
); return 1;
557 case RC_OPCODE_DPH
: transform_DPH(c
, inst
); return 1;
558 case RC_OPCODE_DST
: transform_DST(c
, inst
); return 1;
559 case RC_OPCODE_FLR
: transform_FLR(c
, inst
); return 1;
560 case RC_OPCODE_LIT
: transform_LIT(c
, inst
); return 1;
561 case RC_OPCODE_LRP
: transform_LRP(c
, inst
); return 1;
562 case RC_OPCODE_POW
: transform_POW(c
, inst
); return 1;
563 case RC_OPCODE_RSQ
: transform_RSQ(c
, inst
); return 1;
564 case RC_OPCODE_SEQ
: transform_SEQ(c
, inst
); return 1;
565 case RC_OPCODE_SFL
: transform_SFL(c
, inst
); return 1;
566 case RC_OPCODE_SGE
: transform_SGE(c
, inst
); return 1;
567 case RC_OPCODE_SGT
: transform_SGT(c
, inst
); return 1;
568 case RC_OPCODE_SLE
: transform_SLE(c
, inst
); return 1;
569 case RC_OPCODE_SLT
: transform_SLT(c
, inst
); return 1;
570 case RC_OPCODE_SNE
: transform_SNE(c
, inst
); return 1;
571 case RC_OPCODE_SSG
: transform_SSG(c
, inst
); return 1;
572 case RC_OPCODE_SUB
: transform_SUB(c
, inst
); return 1;
573 case RC_OPCODE_SWZ
: transform_SWZ(c
, inst
); return 1;
574 case RC_OPCODE_XPD
: transform_XPD(c
, inst
); return 1;
581 static void transform_r300_vertex_ABS(struct radeon_compiler
* c
,
582 struct rc_instruction
* inst
)
584 /* Note: r500 can take absolute values, but r300 cannot. */
585 inst
->U
.I
.Opcode
= RC_OPCODE_MAX
;
586 inst
->U
.I
.SrcReg
[1] = inst
->U
.I
.SrcReg
[0];
587 inst
->U
.I
.SrcReg
[1].Negate
^= RC_MASK_XYZW
;
590 static void transform_r300_vertex_CMP(struct radeon_compiler
* c
,
591 struct rc_instruction
* inst
)
593 /* There is no decent CMP available, so let's rig one up.
594 * CMP is defined as dst = src0 < 0.0 ? src1 : src2
595 * The following sequence consumes two temps and two extra slots
596 * (the second temp and the second slot is consumed by transform_LRP),
597 * but should be equivalent:
599 * SLT tmp0, src0, 0.0
600 * LRP dst, tmp0, src1, src2
602 * Yes, I know, I'm a mad scientist. ~ C. & M. */
603 int tempreg0
= rc_find_free_temporary(c
);
605 /* SLT tmp0, src0, 0.0 */
606 emit2(c
, inst
->Prev
, RC_OPCODE_SLT
, 0,
607 dstreg(RC_FILE_TEMPORARY
, tempreg0
),
608 inst
->U
.I
.SrcReg
[0], builtin_zero
);
610 /* LRP dst, tmp0, src1, src2 */
612 emit3(c
, inst
->Prev
, RC_OPCODE_LRP
, 0,
614 srcreg(RC_FILE_TEMPORARY
, tempreg0
), inst
->U
.I
.SrcReg
[1], inst
->U
.I
.SrcReg
[2]));
616 rc_remove_instruction(inst
);
619 static void transform_r300_vertex_DP2(struct radeon_compiler
* c
,
620 struct rc_instruction
* inst
)
622 struct rc_instruction
*next_inst
= inst
->Next
;
623 transform_DP2(c
, inst
);
624 next_inst
->Prev
->U
.I
.Opcode
= RC_OPCODE_DP4
;
627 static void transform_r300_vertex_DP3(struct radeon_compiler
* c
,
628 struct rc_instruction
* inst
)
630 struct rc_src_register src0
= inst
->U
.I
.SrcReg
[0];
631 struct rc_src_register src1
= inst
->U
.I
.SrcReg
[1];
632 src0
.Negate
&= ~RC_MASK_W
;
633 src0
.Swizzle
&= ~(7 << (3 * 3));
634 src0
.Swizzle
|= RC_SWIZZLE_ZERO
<< (3 * 3);
635 src1
.Negate
&= ~RC_MASK_W
;
636 src1
.Swizzle
&= ~(7 << (3 * 3));
637 src1
.Swizzle
|= RC_SWIZZLE_ZERO
<< (3 * 3);
638 emit2(c
, inst
->Prev
, RC_OPCODE_DP4
, inst
->U
.I
.SaturateMode
, inst
->U
.I
.DstReg
, src0
, src1
);
639 rc_remove_instruction(inst
);
642 static void transform_r300_vertex_fix_LIT(struct radeon_compiler
* c
,
643 struct rc_instruction
* inst
)
645 int tempreg
= rc_find_free_temporary(c
);
646 unsigned constant_swizzle
;
647 int constant
= rc_constants_add_immediate_scalar(&c
->Program
.Constants
,
648 0.0000000000000000001,
652 emit1(c
, inst
->Prev
, RC_OPCODE_MOV
, 0,
653 dstreg(RC_FILE_TEMPORARY
, tempreg
),
654 inst
->U
.I
.SrcReg
[0]);
656 /* MAX dst.z, src, 0.00...001 */
657 emit2(c
, inst
->Prev
, RC_OPCODE_MAX
, 0,
658 dstregtmpmask(tempreg
, RC_MASK_Y
),
659 srcreg(RC_FILE_TEMPORARY
, tempreg
),
660 srcregswz(RC_FILE_CONSTANT
, constant
, constant_swizzle
));
662 inst
->U
.I
.SrcReg
[0] = srcreg(RC_FILE_TEMPORARY
, tempreg
);
665 static void transform_r300_vertex_SEQ(struct radeon_compiler
*c
,
666 struct rc_instruction
*inst
)
668 /* x = y <==> x >= y && y >= x */
669 int tmp
= rc_find_free_temporary(c
);
672 emit2(c
, inst
->Prev
, RC_OPCODE_SGE
, 0,
673 dstregtmpmask(tmp
, inst
->U
.I
.DstReg
.WriteMask
),
675 inst
->U
.I
.SrcReg
[1]);
678 emit2(c
, inst
->Prev
, RC_OPCODE_SGE
, 0,
681 inst
->U
.I
.SrcReg
[0]);
684 emit2(c
, inst
->Prev
, RC_OPCODE_MUL
, 0,
686 srcreg(RC_FILE_TEMPORARY
, tmp
),
687 srcreg(inst
->U
.I
.DstReg
.File
, inst
->U
.I
.DstReg
.Index
));
689 rc_remove_instruction(inst
);
692 static void transform_r300_vertex_SNE(struct radeon_compiler
*c
,
693 struct rc_instruction
*inst
)
695 /* x != y <==> x < y || y < x */
696 int tmp
= rc_find_free_temporary(c
);
699 emit2(c
, inst
->Prev
, RC_OPCODE_SLT
, 0,
700 dstregtmpmask(tmp
, inst
->U
.I
.DstReg
.WriteMask
),
702 inst
->U
.I
.SrcReg
[1]);
705 emit2(c
, inst
->Prev
, RC_OPCODE_SLT
, 0,
708 inst
->U
.I
.SrcReg
[0]);
710 /* x || y = max(x, y) */
711 emit2(c
, inst
->Prev
, RC_OPCODE_MAX
, 0,
713 srcreg(RC_FILE_TEMPORARY
, tmp
),
714 srcreg(inst
->U
.I
.DstReg
.File
, inst
->U
.I
.DstReg
.Index
));
716 rc_remove_instruction(inst
);
719 static void transform_r300_vertex_SGT(struct radeon_compiler
* c
,
720 struct rc_instruction
* inst
)
722 /* x > y <==> -x < -y */
723 inst
->U
.I
.Opcode
= RC_OPCODE_SLT
;
724 inst
->U
.I
.SrcReg
[0].Negate
^= RC_MASK_XYZW
;
725 inst
->U
.I
.SrcReg
[1].Negate
^= RC_MASK_XYZW
;
728 static void transform_r300_vertex_SLE(struct radeon_compiler
* c
,
729 struct rc_instruction
* inst
)
731 /* x <= y <==> -x >= -y */
732 inst
->U
.I
.Opcode
= RC_OPCODE_SGE
;
733 inst
->U
.I
.SrcReg
[0].Negate
^= RC_MASK_XYZW
;
734 inst
->U
.I
.SrcReg
[1].Negate
^= RC_MASK_XYZW
;
737 static void transform_r300_vertex_SSG(struct radeon_compiler
* c
,
738 struct rc_instruction
* inst
)
744 * ADD result, tmp0, -tmp1;
749 tmp0
= rc_find_free_temporary(c
);
750 emit2(c
, inst
->Prev
, RC_OPCODE_SLT
, 0,
751 dstregtmpmask(tmp0
, inst
->U
.I
.DstReg
.WriteMask
),
753 inst
->U
.I
.SrcReg
[0]);
756 tmp1
= rc_find_free_temporary(c
);
757 emit2(c
, inst
->Prev
, RC_OPCODE_SLT
, 0,
758 dstregtmpmask(tmp1
, inst
->U
.I
.DstReg
.WriteMask
),
762 /* Either both are zero, or one of them is one and the other is zero. */
763 /* result = tmp0 - tmp1 */
764 emit2(c
, inst
->Prev
, RC_OPCODE_ADD
, 0,
766 srcreg(RC_FILE_TEMPORARY
, tmp0
),
767 negate(srcreg(RC_FILE_TEMPORARY
, tmp1
)));
769 rc_remove_instruction(inst
);
773 * For use with radeonLocalTransform, this transforms non-native ALU
774 * instructions of the r300 up to r500 vertex engine.
776 int r300_transform_vertex_alu(
777 struct radeon_compiler
* c
,
778 struct rc_instruction
* inst
,
781 switch(inst
->U
.I
.Opcode
) {
782 case RC_OPCODE_ABS
: transform_r300_vertex_ABS(c
, inst
); return 1;
783 case RC_OPCODE_CEIL
: transform_CEIL(c
, inst
); return 1;
784 case RC_OPCODE_CMP
: transform_r300_vertex_CMP(c
, inst
); return 1;
785 case RC_OPCODE_DP2
: transform_r300_vertex_DP2(c
, inst
); return 1;
786 case RC_OPCODE_DP3
: transform_r300_vertex_DP3(c
, inst
); return 1;
787 case RC_OPCODE_DPH
: transform_DPH(c
, inst
); return 1;
788 case RC_OPCODE_FLR
: transform_FLR(c
, inst
); return 1;
789 case RC_OPCODE_LIT
: transform_r300_vertex_fix_LIT(c
, inst
); return 1;
790 case RC_OPCODE_LRP
: transform_LRP(c
, inst
); return 1;
793 transform_r300_vertex_SEQ(c
, inst
);
797 case RC_OPCODE_SFL
: transform_SFL(c
, inst
); return 1;
798 case RC_OPCODE_SGT
: transform_r300_vertex_SGT(c
, inst
); return 1;
799 case RC_OPCODE_SLE
: transform_r300_vertex_SLE(c
, inst
); return 1;
802 transform_r300_vertex_SNE(c
, inst
);
806 case RC_OPCODE_SSG
: transform_r300_vertex_SSG(c
, inst
); return 1;
807 case RC_OPCODE_SUB
: transform_SUB(c
, inst
); return 1;
808 case RC_OPCODE_SWZ
: transform_SWZ(c
, inst
); return 1;
809 case RC_OPCODE_XPD
: transform_XPD(c
, inst
); return 1;
815 static void sincos_constants(struct radeon_compiler
* c
, unsigned int *constants
)
817 static const float SinCosConsts
[2][4] = {
819 1.273239545, /* 4/PI */
820 -0.405284735, /* -4/(PI*PI) */
821 3.141592654, /* PI */
827 0.159154943, /* 1/(2*PI) */
828 6.283185307 /* 2*PI */
833 for(i
= 0; i
< 2; ++i
)
834 constants
[i
] = rc_constants_add_immediate_vec4(&c
->Program
.Constants
, SinCosConsts
[i
]);
838 * Approximate sin(x), where x is clamped to (-pi/2, pi/2).
840 * MUL tmp.xy, src, { 4/PI, -4/(PI^2) }
841 * MAD tmp.x, tmp.y, |src|, tmp.x
842 * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x
843 * MAD dest, tmp.y, weight, tmp.x
845 static void sin_approx(
846 struct radeon_compiler
* c
, struct rc_instruction
* inst
,
847 struct rc_dst_register dst
, struct rc_src_register src
, const unsigned int* constants
)
849 unsigned int tempreg
= rc_find_free_temporary(c
);
851 emit2(c
, inst
->Prev
, RC_OPCODE_MUL
, 0, dstregtmpmask(tempreg
, RC_MASK_XY
),
853 srcreg(RC_FILE_CONSTANT
, constants
[0]));
854 emit3(c
, inst
->Prev
, RC_OPCODE_MAD
, 0, dstregtmpmask(tempreg
, RC_MASK_X
),
855 swizzle_yyyy(srcreg(RC_FILE_TEMPORARY
, tempreg
)),
856 absolute(swizzle_xxxx(src
)),
857 swizzle_xxxx(srcreg(RC_FILE_TEMPORARY
, tempreg
)));
858 emit3(c
, inst
->Prev
, RC_OPCODE_MAD
, 0, dstregtmpmask(tempreg
, RC_MASK_Y
),
859 swizzle_xxxx(srcreg(RC_FILE_TEMPORARY
, tempreg
)),
860 absolute(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY
, tempreg
))),
861 negate(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY
, tempreg
))));
862 emit3(c
, inst
->Prev
, RC_OPCODE_MAD
, 0, dst
,
863 swizzle_yyyy(srcreg(RC_FILE_TEMPORARY
, tempreg
)),
864 swizzle_wwww(srcreg(RC_FILE_CONSTANT
, constants
[0])),
865 swizzle_xxxx(srcreg(RC_FILE_TEMPORARY
, tempreg
)));
869 * Translate the trigonometric functions COS, SIN, and SCS
870 * using only the basic instructions
871 * MOV, ADD, MUL, MAD, FRC
873 int radeonTransformTrigSimple(struct radeon_compiler
* c
,
874 struct rc_instruction
* inst
,
877 if (inst
->U
.I
.Opcode
!= RC_OPCODE_COS
&&
878 inst
->U
.I
.Opcode
!= RC_OPCODE_SIN
&&
879 inst
->U
.I
.Opcode
!= RC_OPCODE_SCS
)
882 unsigned int constants
[2];
883 unsigned int tempreg
= rc_find_free_temporary(c
);
885 sincos_constants(c
, constants
);
887 if (inst
->U
.I
.Opcode
== RC_OPCODE_COS
) {
888 /* MAD tmp.x, src, 1/(2*PI), 0.75 */
889 /* FRC tmp.x, tmp.x */
890 /* MAD tmp.z, tmp.x, 2*PI, -PI */
891 emit3(c
, inst
->Prev
, RC_OPCODE_MAD
, 0, dstregtmpmask(tempreg
, RC_MASK_W
),
892 swizzle_xxxx(inst
->U
.I
.SrcReg
[0]),
893 swizzle_zzzz(srcreg(RC_FILE_CONSTANT
, constants
[1])),
894 swizzle_xxxx(srcreg(RC_FILE_CONSTANT
, constants
[1])));
895 emit1(c
, inst
->Prev
, RC_OPCODE_FRC
, 0, dstregtmpmask(tempreg
, RC_MASK_W
),
896 swizzle_wwww(srcreg(RC_FILE_TEMPORARY
, tempreg
)));
897 emit3(c
, inst
->Prev
, RC_OPCODE_MAD
, 0, dstregtmpmask(tempreg
, RC_MASK_W
),
898 swizzle_wwww(srcreg(RC_FILE_TEMPORARY
, tempreg
)),
899 swizzle_wwww(srcreg(RC_FILE_CONSTANT
, constants
[1])),
900 negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT
, constants
[0]))));
902 sin_approx(c
, inst
, inst
->U
.I
.DstReg
,
903 swizzle_wwww(srcreg(RC_FILE_TEMPORARY
, tempreg
)),
905 } else if (inst
->U
.I
.Opcode
== RC_OPCODE_SIN
) {
906 emit3(c
, inst
->Prev
, RC_OPCODE_MAD
, 0, dstregtmpmask(tempreg
, RC_MASK_W
),
907 swizzle_xxxx(inst
->U
.I
.SrcReg
[0]),
908 swizzle_zzzz(srcreg(RC_FILE_CONSTANT
, constants
[1])),
909 swizzle_yyyy(srcreg(RC_FILE_CONSTANT
, constants
[1])));
910 emit1(c
, inst
->Prev
, RC_OPCODE_FRC
, 0, dstregtmpmask(tempreg
, RC_MASK_W
),
911 swizzle_wwww(srcreg(RC_FILE_TEMPORARY
, tempreg
)));
912 emit3(c
, inst
->Prev
, RC_OPCODE_MAD
, 0, dstregtmpmask(tempreg
, RC_MASK_W
),
913 swizzle_wwww(srcreg(RC_FILE_TEMPORARY
, tempreg
)),
914 swizzle_wwww(srcreg(RC_FILE_CONSTANT
, constants
[1])),
915 negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT
, constants
[0]))));
917 sin_approx(c
, inst
, inst
->U
.I
.DstReg
,
918 swizzle_wwww(srcreg(RC_FILE_TEMPORARY
, tempreg
)),
921 emit3(c
, inst
->Prev
, RC_OPCODE_MAD
, 0, dstregtmpmask(tempreg
, RC_MASK_XY
),
922 swizzle_xxxx(inst
->U
.I
.SrcReg
[0]),
923 swizzle_zzzz(srcreg(RC_FILE_CONSTANT
, constants
[1])),
924 swizzle(srcreg(RC_FILE_CONSTANT
, constants
[1]), RC_SWIZZLE_X
, RC_SWIZZLE_Y
, RC_SWIZZLE_Z
, RC_SWIZZLE_W
));
925 emit1(c
, inst
->Prev
, RC_OPCODE_FRC
, 0, dstregtmpmask(tempreg
, RC_MASK_XY
),
926 srcreg(RC_FILE_TEMPORARY
, tempreg
));
927 emit3(c
, inst
->Prev
, RC_OPCODE_MAD
, 0, dstregtmpmask(tempreg
, RC_MASK_XY
),
928 srcreg(RC_FILE_TEMPORARY
, tempreg
),
929 swizzle_wwww(srcreg(RC_FILE_CONSTANT
, constants
[1])),
930 negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT
, constants
[0]))));
932 struct rc_dst_register dst
= inst
->U
.I
.DstReg
;
934 dst
.WriteMask
= inst
->U
.I
.DstReg
.WriteMask
& RC_MASK_X
;
935 sin_approx(c
, inst
, dst
,
936 swizzle_xxxx(srcreg(RC_FILE_TEMPORARY
, tempreg
)),
939 dst
.WriteMask
= inst
->U
.I
.DstReg
.WriteMask
& RC_MASK_Y
;
940 sin_approx(c
, inst
, dst
,
941 swizzle_yyyy(srcreg(RC_FILE_TEMPORARY
, tempreg
)),
945 rc_remove_instruction(inst
);
950 static void r300_transform_SIN_COS_SCS(struct radeon_compiler
*c
,
951 struct rc_instruction
*inst
,
954 if (inst
->U
.I
.Opcode
== RC_OPCODE_COS
) {
955 emit1(c
, inst
->Prev
, RC_OPCODE_COS
, inst
->U
.I
.SaturateMode
, inst
->U
.I
.DstReg
,
956 srcregswz(RC_FILE_TEMPORARY
, srctmp
, RC_SWIZZLE_WWWW
));
957 } else if (inst
->U
.I
.Opcode
== RC_OPCODE_SIN
) {
958 emit1(c
, inst
->Prev
, RC_OPCODE_SIN
, inst
->U
.I
.SaturateMode
,
959 inst
->U
.I
.DstReg
, srcregswz(RC_FILE_TEMPORARY
, srctmp
, RC_SWIZZLE_WWWW
));
960 } else if (inst
->U
.I
.Opcode
== RC_OPCODE_SCS
) {
961 struct rc_dst_register moddst
= inst
->U
.I
.DstReg
;
963 if (inst
->U
.I
.DstReg
.WriteMask
& RC_MASK_X
) {
964 moddst
.WriteMask
= RC_MASK_X
;
965 emit1(c
, inst
->Prev
, RC_OPCODE_COS
, inst
->U
.I
.SaturateMode
, moddst
,
966 srcregswz(RC_FILE_TEMPORARY
, srctmp
, RC_SWIZZLE_WWWW
));
968 if (inst
->U
.I
.DstReg
.WriteMask
& RC_MASK_Y
) {
969 moddst
.WriteMask
= RC_MASK_Y
;
970 emit1(c
, inst
->Prev
, RC_OPCODE_SIN
, inst
->U
.I
.SaturateMode
, moddst
,
971 srcregswz(RC_FILE_TEMPORARY
, srctmp
, RC_SWIZZLE_WWWW
));
975 rc_remove_instruction(inst
);
980 * Transform the trigonometric functions COS, SIN, and SCS
981 * to include pre-scaling by 1/(2*PI) and taking the fractional
982 * part, so that the input to COS and SIN is always in the range [0,1).
983 * SCS is replaced by one COS and one SIN instruction.
985 * @warning This transformation implicitly changes the semantics of SIN and COS!
987 int radeonTransformTrigScale(struct radeon_compiler
* c
,
988 struct rc_instruction
* inst
,
991 if (inst
->U
.I
.Opcode
!= RC_OPCODE_COS
&&
992 inst
->U
.I
.Opcode
!= RC_OPCODE_SIN
&&
993 inst
->U
.I
.Opcode
!= RC_OPCODE_SCS
)
996 static const float RCP_2PI
= 0.15915494309189535;
998 unsigned int constant
;
999 unsigned int constant_swizzle
;
1001 temp
= rc_find_free_temporary(c
);
1002 constant
= rc_constants_add_immediate_scalar(&c
->Program
.Constants
, RCP_2PI
, &constant_swizzle
);
1004 emit2(c
, inst
->Prev
, RC_OPCODE_MUL
, 0, dstregtmpmask(temp
, RC_MASK_W
),
1005 swizzle_xxxx(inst
->U
.I
.SrcReg
[0]),
1006 srcregswz(RC_FILE_CONSTANT
, constant
, constant_swizzle
));
1007 emit1(c
, inst
->Prev
, RC_OPCODE_FRC
, 0, dstregtmpmask(temp
, RC_MASK_W
),
1008 srcreg(RC_FILE_TEMPORARY
, temp
));
1010 r300_transform_SIN_COS_SCS(c
, inst
, temp
);
1015 * Transform the trigonometric functions COS, SIN, and SCS
1016 * so that the input to COS and SIN is always in the range [-PI, PI].
1017 * SCS is replaced by one COS and one SIN instruction.
1019 int r300_transform_trig_scale_vertex(struct radeon_compiler
*c
,
1020 struct rc_instruction
*inst
,
1023 if (inst
->U
.I
.Opcode
!= RC_OPCODE_COS
&&
1024 inst
->U
.I
.Opcode
!= RC_OPCODE_SIN
&&
1025 inst
->U
.I
.Opcode
!= RC_OPCODE_SCS
)
1028 /* Repeat x in the range [-PI, PI]:
1030 * repeat(x) = frac(x / 2PI + 0.5) * 2PI - PI
1033 static const float cons
[4] = {0.15915494309189535, 0.5, 6.28318530717959, -3.14159265358979};
1035 unsigned int constant
;
1037 temp
= rc_find_free_temporary(c
);
1038 constant
= rc_constants_add_immediate_vec4(&c
->Program
.Constants
, cons
);
1040 emit3(c
, inst
->Prev
, RC_OPCODE_MAD
, 0, dstregtmpmask(temp
, RC_MASK_W
),
1041 swizzle_xxxx(inst
->U
.I
.SrcReg
[0]),
1042 srcregswz(RC_FILE_CONSTANT
, constant
, RC_SWIZZLE_XXXX
),
1043 srcregswz(RC_FILE_CONSTANT
, constant
, RC_SWIZZLE_YYYY
));
1044 emit1(c
, inst
->Prev
, RC_OPCODE_FRC
, 0, dstregtmpmask(temp
, RC_MASK_W
),
1045 srcreg(RC_FILE_TEMPORARY
, temp
));
1046 emit3(c
, inst
->Prev
, RC_OPCODE_MAD
, 0, dstregtmpmask(temp
, RC_MASK_W
),
1047 srcreg(RC_FILE_TEMPORARY
, temp
),
1048 srcregswz(RC_FILE_CONSTANT
, constant
, RC_SWIZZLE_ZZZZ
),
1049 srcregswz(RC_FILE_CONSTANT
, constant
, RC_SWIZZLE_WWWW
));
1051 r300_transform_SIN_COS_SCS(c
, inst
, temp
);
1056 * Rewrite DDX/DDY instructions to properly work with r5xx shaders.
1057 * The r5xx MDH/MDV instruction provides per-quad partial derivatives.
1058 * It takes the form A*B+C. A and C are set by setting src0. B should be -1.
1060 * @warning This explicitly changes the form of DDX and DDY!
1063 int radeonTransformDeriv(struct radeon_compiler
* c
,
1064 struct rc_instruction
* inst
,
1067 if (inst
->U
.I
.Opcode
!= RC_OPCODE_DDX
&& inst
->U
.I
.Opcode
!= RC_OPCODE_DDY
)
1070 inst
->U
.I
.SrcReg
[1].Swizzle
= RC_SWIZZLE_1111
;
1071 inst
->U
.I
.SrcReg
[1].Negate
= RC_MASK_XYZW
;
1078 * KILP - > KIL -abs(Temp[0].x)
1081 * This needs to be done in its own pass, because it modifies the instructions
1082 * before and after KILP.
1084 void radeonTransformKILP(struct radeon_compiler
* c
)
1086 struct rc_instruction
* inst
;
1087 for (inst
= c
->Program
.Instructions
.Next
;
1088 inst
!= &c
->Program
.Instructions
; inst
= inst
->Next
) {
1090 if (inst
->U
.I
.Opcode
!= RC_OPCODE_KILP
)
1093 inst
->U
.I
.Opcode
= RC_OPCODE_KIL
;
1095 if (inst
->Prev
->U
.I
.Opcode
!= RC_OPCODE_IF
1096 || inst
->Next
->U
.I
.Opcode
!= RC_OPCODE_ENDIF
) {
1097 inst
->U
.I
.SrcReg
[0] = negate(builtin_one
);
1100 inst
->U
.I
.SrcReg
[0] =
1101 negate(absolute(inst
->Prev
->U
.I
.SrcReg
[0]));
1103 rc_remove_instruction(inst
->Prev
);
1105 rc_remove_instruction(inst
->Next
);