r300: Detangle fragment program compiler from driver-specific structure
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_program_alu.c
1 /*
2 * Copyright (C) 2008 Nicolai Haehnle.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * @file
30 *
31 * Shareable transformations that transform "special" ALU instructions
32 * into ALU instructions that are supported by hardware.
33 *
34 */
35
36 #include "radeon_program_alu.h"
37
38 #include "shader/prog_parameter.h"
39
40
41 static struct prog_instruction *emit1(struct gl_program* p,
42 gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
43 struct prog_src_register SrcReg)
44 {
45 struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
46
47 fpi->Opcode = Opcode;
48 fpi->SaturateMode = Saturate;
49 fpi->DstReg = DstReg;
50 fpi->SrcReg[0] = SrcReg;
51 return fpi;
52 }
53
54 static struct prog_instruction *emit2(struct gl_program* p,
55 gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
56 struct prog_src_register SrcReg0, struct prog_src_register SrcReg1)
57 {
58 struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
59
60 fpi->Opcode = Opcode;
61 fpi->SaturateMode = Saturate;
62 fpi->DstReg = DstReg;
63 fpi->SrcReg[0] = SrcReg0;
64 fpi->SrcReg[1] = SrcReg1;
65 return fpi;
66 }
67
68 static struct prog_instruction *emit3(struct gl_program* p,
69 gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
70 struct prog_src_register SrcReg0, struct prog_src_register SrcReg1,
71 struct prog_src_register SrcReg2)
72 {
73 struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
74
75 fpi->Opcode = Opcode;
76 fpi->SaturateMode = Saturate;
77 fpi->DstReg = DstReg;
78 fpi->SrcReg[0] = SrcReg0;
79 fpi->SrcReg[1] = SrcReg1;
80 fpi->SrcReg[2] = SrcReg2;
81 return fpi;
82 }
83
84 static struct prog_dst_register dstreg(int file, int index)
85 {
86 struct prog_dst_register dst;
87 dst.File = file;
88 dst.Index = index;
89 dst.WriteMask = WRITEMASK_XYZW;
90 dst.CondMask = COND_TR;
91 dst.CondSwizzle = SWIZZLE_NOOP;
92 dst.CondSrc = 0;
93 dst.pad = 0;
94 return dst;
95 }
96
97 static struct prog_dst_register dstregtmpmask(int index, int mask)
98 {
99 struct prog_dst_register dst;
100 dst.File = PROGRAM_TEMPORARY;
101 dst.Index = index;
102 dst.WriteMask = mask;
103 dst.CondMask = COND_TR;
104 dst.CondSwizzle = SWIZZLE_NOOP;
105 dst.CondSrc = 0;
106 dst.pad = 0;
107 return dst;
108 }
109
110 static const struct prog_src_register builtin_zero = {
111 .File = PROGRAM_BUILTIN,
112 .Index = 0,
113 .Swizzle = SWIZZLE_0000
114 };
115 static const struct prog_src_register builtin_one = {
116 .File = PROGRAM_BUILTIN,
117 .Index = 0,
118 .Swizzle = SWIZZLE_1111
119 };
120 static const struct prog_src_register srcreg_undefined = {
121 .File = PROGRAM_UNDEFINED,
122 .Index = 0,
123 .Swizzle = SWIZZLE_NOOP
124 };
125
126 static struct prog_src_register srcreg(int file, int index)
127 {
128 struct prog_src_register src = srcreg_undefined;
129 src.File = file;
130 src.Index = index;
131 return src;
132 }
133
134 static struct prog_src_register srcregswz(int file, int index, int swz)
135 {
136 struct prog_src_register src = srcreg_undefined;
137 src.File = file;
138 src.Index = index;
139 src.Swizzle = swz;
140 return src;
141 }
142
143 static struct prog_src_register absolute(struct prog_src_register reg)
144 {
145 struct prog_src_register newreg = reg;
146 newreg.Abs = 1;
147 newreg.Negate = NEGATE_NONE;
148 return newreg;
149 }
150
151 static struct prog_src_register negate(struct prog_src_register reg)
152 {
153 struct prog_src_register newreg = reg;
154 newreg.Negate = newreg.Negate ^ NEGATE_XYZW;
155 return newreg;
156 }
157
158 static struct prog_src_register swizzle(struct prog_src_register reg, GLuint x, GLuint y, GLuint z, GLuint w)
159 {
160 struct prog_src_register swizzled = reg;
161 swizzled.Swizzle = MAKE_SWIZZLE4(
162 x >= 4 ? x : GET_SWZ(reg.Swizzle, x),
163 y >= 4 ? y : GET_SWZ(reg.Swizzle, y),
164 z >= 4 ? z : GET_SWZ(reg.Swizzle, z),
165 w >= 4 ? w : GET_SWZ(reg.Swizzle, w));
166 return swizzled;
167 }
168
169 static struct prog_src_register scalar(struct prog_src_register reg)
170 {
171 return swizzle(reg, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X);
172 }
173
174 static void transform_ABS(struct radeon_transform_context* t,
175 struct prog_instruction* inst)
176 {
177 struct prog_src_register src = inst->SrcReg[0];
178 src.Abs = 1;
179 src.Negate = NEGATE_NONE;
180 emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, src);
181 }
182
183 static void transform_DPH(struct radeon_transform_context* t,
184 struct prog_instruction* inst)
185 {
186 struct prog_src_register src0 = inst->SrcReg[0];
187 src0.Negate &= ~NEGATE_W;
188 src0.Swizzle &= ~(7 << (3 * 3));
189 src0.Swizzle |= SWIZZLE_ONE << (3 * 3);
190 emit2(t->Program, OPCODE_DP4, inst->SaturateMode, inst->DstReg, src0, inst->SrcReg[1]);
191 }
192
193 /**
194 * [1, src0.y*src1.y, src0.z, src1.w]
195 * So basically MUL with lotsa swizzling.
196 */
197 static void transform_DST(struct radeon_transform_context* t,
198 struct prog_instruction* inst)
199 {
200 emit2(t->Program, OPCODE_MUL, inst->SaturateMode, inst->DstReg,
201 swizzle(inst->SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE),
202 swizzle(inst->SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W));
203 }
204
205 static void transform_FLR(struct radeon_transform_context* t,
206 struct prog_instruction* inst)
207 {
208 int tempreg = radeonFindFreeTemporary(t);
209 emit1(t->Program, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0]);
210 emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg,
211 inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
212 }
213
214 /**
215 * Definition of LIT (from ARB_fragment_program):
216 *
217 * tmp = VectorLoad(op0);
218 * if (tmp.x < 0) tmp.x = 0;
219 * if (tmp.y < 0) tmp.y = 0;
220 * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
221 * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
222 * result.x = 1.0;
223 * result.y = tmp.x;
224 * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
225 * result.w = 1.0;
226 *
227 * The longest path of computation is the one leading to result.z,
228 * consisting of 5 operations. This implementation of LIT takes
229 * 5 slots, if the subsequent optimization passes are clever enough
230 * to pair instructions correctly.
231 */
232 static void transform_LIT(struct radeon_transform_context* t,
233 struct prog_instruction* inst)
234 {
235 static const GLfloat LitConst[4] = { -127.999999 };
236
237 GLuint constant;
238 GLuint constant_swizzle;
239 GLuint temp;
240 int needTemporary = 0;
241 struct prog_src_register srctemp;
242
243 constant = _mesa_add_unnamed_constant(t->Program->Parameters, LitConst, 1, &constant_swizzle);
244
245 if (inst->DstReg.WriteMask != WRITEMASK_XYZW) {
246 needTemporary = 1;
247 } else if (inst->DstReg.File != PROGRAM_TEMPORARY) {
248 // LIT is typically followed by DP3/DP4, so there's no point
249 // in creating special code for this case
250 needTemporary = 1;
251 }
252
253 if (needTemporary) {
254 temp = radeonFindFreeTemporary(t);
255 } else {
256 temp = inst->DstReg.Index;
257 }
258 srctemp = srcreg(PROGRAM_TEMPORARY, temp);
259
260 // tmp.x = max(0.0, Src.x);
261 // tmp.y = max(0.0, Src.y);
262 // tmp.w = clamp(Src.z, -128+eps, 128-eps);
263 emit2(t->Program, OPCODE_MAX, 0,
264 dstregtmpmask(temp, WRITEMASK_XYW),
265 inst->SrcReg[0],
266 swizzle(srcreg(PROGRAM_CONSTANT, constant),
267 SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, constant_swizzle&3));
268 emit2(t->Program, OPCODE_MIN, 0,
269 dstregtmpmask(temp, WRITEMASK_Z),
270 swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
271 negate(srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle)));
272
273 // tmp.w = Pow(tmp.y, tmp.w)
274 emit1(t->Program, OPCODE_LG2, 0,
275 dstregtmpmask(temp, WRITEMASK_W),
276 swizzle(srctemp, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y));
277 emit2(t->Program, OPCODE_MUL, 0,
278 dstregtmpmask(temp, WRITEMASK_W),
279 swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
280 swizzle(srctemp, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z));
281 emit1(t->Program, OPCODE_EX2, 0,
282 dstregtmpmask(temp, WRITEMASK_W),
283 swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
284
285 // tmp.z = (tmp.x > 0) ? tmp.w : 0.0
286 emit3(t->Program, OPCODE_CMP, inst->SaturateMode,
287 dstregtmpmask(temp, WRITEMASK_Z),
288 negate(swizzle(srctemp, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
289 swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
290 builtin_zero);
291
292 // tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0
293 emit1(t->Program, OPCODE_MOV, inst->SaturateMode,
294 dstregtmpmask(temp, WRITEMASK_XYW),
295 swizzle(srctemp, SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE));
296
297 if (needTemporary)
298 emit1(t->Program, OPCODE_MOV, 0, inst->DstReg, srctemp);
299 }
300
301 static void transform_LRP(struct radeon_transform_context* t,
302 struct prog_instruction* inst)
303 {
304 int tempreg = radeonFindFreeTemporary(t);
305
306 emit2(t->Program, OPCODE_ADD, 0,
307 dstreg(PROGRAM_TEMPORARY, tempreg),
308 inst->SrcReg[1], negate(inst->SrcReg[2]));
309 emit3(t->Program, OPCODE_MAD, inst->SaturateMode,
310 inst->DstReg,
311 inst->SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[2]);
312 }
313
314 static void transform_POW(struct radeon_transform_context* t,
315 struct prog_instruction* inst)
316 {
317 int tempreg = radeonFindFreeTemporary(t);
318 struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg);
319 struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg);
320 tempdst.WriteMask = WRITEMASK_W;
321 tempsrc.Swizzle = SWIZZLE_WWWW;
322
323 emit1(t->Program, OPCODE_LG2, 0, tempdst, scalar(inst->SrcReg[0]));
324 emit2(t->Program, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->SrcReg[1]));
325 emit1(t->Program, OPCODE_EX2, inst->SaturateMode, inst->DstReg, tempsrc);
326 }
327
328 static void transform_RSQ(struct radeon_transform_context* t,
329 struct prog_instruction* inst)
330 {
331 emit1(t->Program, OPCODE_RSQ, inst->SaturateMode, inst->DstReg, absolute(inst->SrcReg[0]));
332 }
333
334 static void transform_SGE(struct radeon_transform_context* t,
335 struct prog_instruction* inst)
336 {
337 int tempreg = radeonFindFreeTemporary(t);
338
339 emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1]));
340 emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg,
341 srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one);
342 }
343
344 static void transform_SLT(struct radeon_transform_context* t,
345 struct prog_instruction* inst)
346 {
347 int tempreg = radeonFindFreeTemporary(t);
348
349 emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1]));
350 emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg,
351 srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero);
352 }
353
354 static void transform_SUB(struct radeon_transform_context* t,
355 struct prog_instruction* inst)
356 {
357 emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg, inst->SrcReg[0], negate(inst->SrcReg[1]));
358 }
359
360 static void transform_SWZ(struct radeon_transform_context* t,
361 struct prog_instruction* inst)
362 {
363 emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, inst->SrcReg[0]);
364 }
365
366 static void transform_XPD(struct radeon_transform_context* t,
367 struct prog_instruction* inst)
368 {
369 int tempreg = radeonFindFreeTemporary(t);
370
371 emit2(t->Program, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg),
372 swizzle(inst->SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
373 swizzle(inst->SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W));
374 emit3(t->Program, OPCODE_MAD, inst->SaturateMode, inst->DstReg,
375 swizzle(inst->SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W),
376 swizzle(inst->SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
377 negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
378 }
379
380
381 /**
382 * Can be used as a transformation for @ref radeonClauseLocalTransform,
383 * no userData necessary.
384 *
385 * Eliminates the following ALU instructions:
386 * ABS, DPH, DST, FLR, LIT, LRP, POW, SGE, SLT, SUB, SWZ, XPD
387 * using:
388 * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP
389 *
390 * Transforms RSQ to Radeon's native RSQ by explicitly setting
391 * absolute value.
392 *
393 * @note should be applicable to R300 and R500 fragment programs.
394 */
395 GLboolean radeonTransformALU(struct radeon_transform_context* t,
396 struct prog_instruction* inst,
397 void* unused)
398 {
399 switch(inst->Opcode) {
400 case OPCODE_ABS: transform_ABS(t, inst); return GL_TRUE;
401 case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE;
402 case OPCODE_DST: transform_DST(t, inst); return GL_TRUE;
403 case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE;
404 case OPCODE_LIT: transform_LIT(t, inst); return GL_TRUE;
405 case OPCODE_LRP: transform_LRP(t, inst); return GL_TRUE;
406 case OPCODE_POW: transform_POW(t, inst); return GL_TRUE;
407 case OPCODE_RSQ: transform_RSQ(t, inst); return GL_TRUE;
408 case OPCODE_SGE: transform_SGE(t, inst); return GL_TRUE;
409 case OPCODE_SLT: transform_SLT(t, inst); return GL_TRUE;
410 case OPCODE_SUB: transform_SUB(t, inst); return GL_TRUE;
411 case OPCODE_SWZ: transform_SWZ(t, inst); return GL_TRUE;
412 case OPCODE_XPD: transform_XPD(t, inst); return GL_TRUE;
413 default:
414 return GL_FALSE;
415 }
416 }
417
418
419 static void sincos_constants(struct radeon_transform_context* t, GLuint *constants)
420 {
421 static const GLfloat SinCosConsts[2][4] = {
422 {
423 1.273239545, // 4/PI
424 -0.405284735, // -4/(PI*PI)
425 3.141592654, // PI
426 0.2225 // weight
427 },
428 {
429 0.75,
430 0.5,
431 0.159154943, // 1/(2*PI)
432 6.283185307 // 2*PI
433 }
434 };
435 int i;
436
437 for(i = 0; i < 2; ++i) {
438 GLuint swz;
439 constants[i] = _mesa_add_unnamed_constant(t->Program->Parameters, SinCosConsts[i], 4, &swz);
440 ASSERT(swz == SWIZZLE_NOOP);
441 }
442 }
443
444 /**
445 * Approximate sin(x), where x is clamped to (-pi/2, pi/2).
446 *
447 * MUL tmp.xy, src, { 4/PI, -4/(PI^2) }
448 * MAD tmp.x, tmp.y, |src|, tmp.x
449 * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x
450 * MAD dest, tmp.y, weight, tmp.x
451 */
452 static void sin_approx(struct radeon_transform_context* t,
453 struct prog_dst_register dst, struct prog_src_register src, const GLuint* constants)
454 {
455 GLuint tempreg = radeonFindFreeTemporary(t);
456
457 emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
458 swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
459 srcreg(PROGRAM_CONSTANT, constants[0]));
460 emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X),
461 swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
462 absolute(swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
463 swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
464 emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y),
465 swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
466 absolute(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
467 negate(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)));
468 emit3(t->Program, OPCODE_MAD, 0, dst,
469 swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
470 swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
471 swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
472 }
473
474 /**
475 * Translate the trigonometric functions COS, SIN, and SCS
476 * using only the basic instructions
477 * MOV, ADD, MUL, MAD, FRC
478 */
479 GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t,
480 struct prog_instruction* inst,
481 void* unused)
482 {
483 if (inst->Opcode != OPCODE_COS &&
484 inst->Opcode != OPCODE_SIN &&
485 inst->Opcode != OPCODE_SCS)
486 return GL_FALSE;
487
488 GLuint constants[2];
489 GLuint tempreg = radeonFindFreeTemporary(t);
490
491 sincos_constants(t, constants);
492
493 if (inst->Opcode == OPCODE_COS) {
494 // MAD tmp.x, src, 1/(2*PI), 0.75
495 // FRC tmp.x, tmp.x
496 // MAD tmp.z, tmp.x, 2*PI, -PI
497 emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
498 swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
499 swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
500 swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
501 emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
502 swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
503 emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
504 swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
505 swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
506 negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
507
508 sin_approx(t, inst->DstReg,
509 swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
510 constants);
511 } else if (inst->Opcode == OPCODE_SIN) {
512 emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
513 swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
514 swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
515 swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y));
516 emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
517 swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
518 emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
519 swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
520 swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
521 negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
522
523 sin_approx(t, inst->DstReg,
524 swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
525 constants);
526 } else {
527 emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
528 swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
529 swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
530 swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W));
531 emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
532 srcreg(PROGRAM_TEMPORARY, tempreg));
533 emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
534 srcreg(PROGRAM_TEMPORARY, tempreg),
535 swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
536 negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
537
538 struct prog_dst_register dst = inst->DstReg;
539
540 dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_X;
541 sin_approx(t, dst,
542 swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
543 constants);
544
545 dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_Y;
546 sin_approx(t, dst,
547 swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
548 constants);
549 }
550
551 return GL_TRUE;
552 }
553
554
555 /**
556 * Transform the trigonometric functions COS, SIN, and SCS
557 * to include pre-scaling by 1/(2*PI) and taking the fractional
558 * part, so that the input to COS and SIN is always in the range [0,1).
559 * SCS is replaced by one COS and one SIN instruction.
560 *
561 * @warning This transformation implicitly changes the semantics of SIN and COS!
562 */
563 GLboolean radeonTransformTrigScale(struct radeon_transform_context* t,
564 struct prog_instruction* inst,
565 void* unused)
566 {
567 if (inst->Opcode != OPCODE_COS &&
568 inst->Opcode != OPCODE_SIN &&
569 inst->Opcode != OPCODE_SCS)
570 return GL_FALSE;
571
572 static const GLfloat RCP_2PI[] = { 0.15915494309189535 };
573 GLuint temp;
574 GLuint constant;
575 GLuint constant_swizzle;
576
577 temp = radeonFindFreeTemporary(t);
578 constant = _mesa_add_unnamed_constant(t->Program->Parameters, RCP_2PI, 1, &constant_swizzle);
579
580 emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W),
581 swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
582 srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle));
583 emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W),
584 srcreg(PROGRAM_TEMPORARY, temp));
585
586 if (inst->Opcode == OPCODE_COS) {
587 emit1(t->Program, OPCODE_COS, inst->SaturateMode, inst->DstReg,
588 srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
589 } else if (inst->Opcode == OPCODE_SIN) {
590 emit1(t->Program, OPCODE_SIN, inst->SaturateMode,
591 inst->DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
592 } else if (inst->Opcode == OPCODE_SCS) {
593 struct prog_dst_register moddst = inst->DstReg;
594
595 if (inst->DstReg.WriteMask & WRITEMASK_X) {
596 moddst.WriteMask = WRITEMASK_X;
597 emit1(t->Program, OPCODE_COS, inst->SaturateMode, moddst,
598 srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
599 }
600 if (inst->DstReg.WriteMask & WRITEMASK_Y) {
601 moddst.WriteMask = WRITEMASK_Y;
602 emit1(t->Program, OPCODE_SIN, inst->SaturateMode, moddst,
603 srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
604 }
605 }
606
607 return GL_TRUE;
608 }
609
610 /**
611 * Rewrite DDX/DDY instructions to properly work with r5xx shaders.
612 * The r5xx MDH/MDV instruction provides per-quad partial derivatives.
613 * It takes the form A*B+C. A and C are set by setting src0. B should be -1.
614 *
615 * @warning This explicitly changes the form of DDX and DDY!
616 */
617
618 GLboolean radeonTransformDeriv(struct radeon_transform_context* t,
619 struct prog_instruction* inst,
620 void* unused)
621 {
622 if (inst->Opcode != OPCODE_DDX && inst->Opcode != OPCODE_DDY)
623 return GL_FALSE;
624
625 struct prog_src_register B = inst->SrcReg[1];
626
627 B.Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE,
628 SWIZZLE_ONE, SWIZZLE_ONE);
629 B.Negate = NEGATE_XYZW;
630
631 emit2(t->Program, inst->Opcode, inst->SaturateMode, inst->DstReg,
632 inst->SrcReg[0], B);
633
634 return GL_TRUE;
635 }