f76a3d9560b7fe0ce9e6c83ca34f691c3675bb0d
[mesa.git] / src / mesa / drivers / dri / r300 / r500_fragprog.c
1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
5 * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
6 *
7 * All Rights Reserved.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining
10 * a copy of this software and associated documentation files (the
11 * "Software"), to deal in the Software without restriction, including
12 * without limitation the rights to use, copy, modify, merge, publish,
13 * distribute, sublicense, and/or sell copies of the Software, and to
14 * permit persons to whom the Software is furnished to do so, subject to
15 * the following conditions:
16 *
17 * The above copyright notice and this permission notice (including the
18 * next paragraph) shall be included in all copies or substantial
19 * portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 *
29 */
30
31 /**
32 * \file
33 *
34 * \author Ben Skeggs <darktama@iinet.net.au>
35 *
36 * \author Jerome Glisse <j.glisse@gmail.com>
37 *
38 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
39 *
40 * \todo Depth write, WPOS/FOGC inputs
41 *
42 * \todo FogOption
43 *
44 * \todo Verify results of opcodes for accuracy, I've only checked them in
45 * specific cases.
46 */
47
48 #include "glheader.h"
49 #include "macros.h"
50 #include "enums.h"
51 #include "shader/prog_instruction.h"
52 #include "shader/prog_parameter.h"
53 #include "shader/prog_print.h"
54
55 #include "r300_context.h"
56 #include "r500_fragprog.h"
57 #include "r300_reg.h"
58 #include "r300_state.h"
59
60 /*
61 * Useful macros and values
62 */
63 #define ERROR(fmt, args...) do { \
64 fprintf(stderr, "%s::%s(): " fmt "\n", \
65 __FILE__, __FUNCTION__, ##args); \
66 fp->error = GL_TRUE; \
67 } while(0)
68
69 #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
70
71 #define R500_US_NUM_TEMP_REGS 128
72 #define R500_US_NUM_CONST_REGS 256
73
74 /* "Register" flags */
75 #define REG_CONSTANT (1 << 8)
76 #define REG_SRC_REL (1 << 9)
77 #define REG_DEST_REL (1 << 7)
78
79 /* Swizzle tools */
80 #define R500_SWIZZLE_ZERO 4
81 #define R500_SWIZZLE_HALF 5
82 #define R500_SWIZZLE_ONE 6
83 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
84 #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
85 #define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6))
86 #define R500_SWIZ_MOD_NEG 1
87 #define R500_SWIZ_MOD_ABS 2
88 #define R500_SWIZ_MOD_NEG_ABS 3
89 /* Swizzles for inst2 */
90 #define MAKE_SWIZ_TEX_STRQ(x) (x << 8)
91 #define MAKE_SWIZ_TEX_RGBA(x) (x << 24)
92 /* Swizzles for inst3 */
93 #define MAKE_SWIZ_RGB_A(x) (x << 2)
94 #define MAKE_SWIZ_RGB_B(x) (x << 15)
95 /* Swizzles for inst4 */
96 #define MAKE_SWIZ_ALPHA_A(x) (x << 14)
97 #define MAKE_SWIZ_ALPHA_B(x) (x << 21)
98 /* Swizzle for inst5 */
99 #define MAKE_SWIZ_RGBA_C(x) (x << 14)
100 #define MAKE_SWIZ_ALPHA_C(x) (x << 27)
101
102 /* Writemasks */
103 #define R500_WRITEMASK_G 0x2
104 #define R500_WRITEMASK_A 0x8
105 #define R500_WRITEMASK_AR 0x9
106 #define R500_WRITEMASK_AG 0xA
107 #define R500_WRITEMASK_ARG 0xB
108 #define R500_WRITEMASK_AB 0xC
109 #define R500_WRITEMASK_ARGB 0xF
110
111 /* 1/(2pi), needed for quick modulus in trig insts
112 * Thanks to glisse for pointing out how to do it! */
113 static const GLfloat RCP_2PI[] = {0.15915494309189535,
114 0.15915494309189535,
115 0.15915494309189535,
116 0.15915494309189535};
117
118 static const GLfloat LIT[] = {127.999999,
119 127.999999,
120 127.999999,
121 -127.999999};
122
123 static void dump_program(struct r500_fragment_program *fp);
124
125 static inline GLuint make_rgb_swizzle(struct prog_src_register src) {
126 GLuint swiz = 0x0;
127 GLuint temp;
128 /* This could be optimized, but it should be plenty fast already. */
129 int i;
130 for (i = 0; i < 3; i++) {
131 temp = GET_SWZ(src.Swizzle, i);
132 /* Fix SWIZZLE_ONE */
133 if (temp == 5) temp++;
134 swiz |= temp << i*3;
135 }
136 if (src.NegateBase)
137 swiz |= (R500_SWIZ_MOD_NEG << 9);
138 return swiz;
139 }
140
141 static inline GLuint make_alpha_swizzle(struct prog_src_register src) {
142 GLuint swiz = GET_SWZ(src.Swizzle, 3);
143
144 if (swiz == 5) swiz++;
145
146 if (src.NegateBase)
147 swiz |= (R500_SWIZ_MOD_NEG << 3);
148
149 return swiz;
150 }
151
152 static inline GLuint make_sop_swizzle(struct prog_src_register src) {
153 GLuint swiz = GET_SWZ(src.Swizzle, 0);
154
155 if (swiz == 5) swiz++;
156 return swiz;
157 }
158
159 static inline GLuint make_strq_swizzle(struct prog_src_register src) {
160 GLuint swiz = 0x0, temp = 0x0;
161 int i;
162 for (i = 0; i < 4; i++) {
163 temp = GET_SWZ(src.Swizzle, i) & 0x3;
164 swiz |= temp << i*2;
165 }
166 return swiz;
167 }
168
169 static int get_temp(struct r500_fragment_program *fp, int slot) {
170
171 COMPILE_STATE;
172
173 int r = fp->temp_reg_offset + cs->temp_in_use + slot;
174
175 if (r > R500_US_NUM_TEMP_REGS) {
176 ERROR("Too many temporary registers requested, can't compile!\n");
177 }
178
179 return r;
180 }
181
182 /* Borrowed verbatim from r300_fragprog since it hasn't changed. */
183 static GLuint emit_const4fv(struct r500_fragment_program *fp,
184 const GLfloat * cp)
185 {
186 GLuint reg = 0x0;
187 int index;
188
189 for (index = 0; index < fp->const_nr; ++index) {
190 if (fp->constant[index] == cp)
191 break;
192 }
193
194 if (index >= fp->const_nr) {
195 if (index >= R500_US_NUM_CONST_REGS) {
196 ERROR("Out of hw constants!\n");
197 return reg;
198 }
199
200 fp->const_nr++;
201 fp->constant[index] = cp;
202 }
203
204 reg = index | REG_CONSTANT;
205 return reg;
206 }
207
208 static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_register src) {
209 COMPILE_STATE;
210 GLuint reg;
211 switch (src.File) {
212 case PROGRAM_TEMPORARY:
213 reg = src.Index + fp->temp_reg_offset;
214 break;
215 case PROGRAM_INPUT:
216 reg = cs->inputs[src.Index].reg;
217 break;
218 case PROGRAM_LOCAL_PARAM:
219 reg = emit_const4fv(fp,
220 fp->mesa_program.Base.LocalParams[src.
221 Index]);
222 break;
223 case PROGRAM_ENV_PARAM:
224 reg = emit_const4fv(fp,
225 fp->ctx->FragmentProgram.Parameters[src.
226 Index]);
227 break;
228 case PROGRAM_STATE_VAR:
229 case PROGRAM_NAMED_PARAM:
230 case PROGRAM_CONSTANT:
231 reg = emit_const4fv(fp, fp->mesa_program.Base.Parameters->
232 ParameterValues[src.Index]);
233 break;
234 default:
235 ERROR("Can't handle src.File %x\n", src.File);
236 reg = 0x0;
237 break;
238 }
239 return reg;
240 }
241
242 static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_register dest) {
243 GLuint reg;
244 switch (dest.File) {
245 case PROGRAM_TEMPORARY:
246 reg = dest.Index + fp->temp_reg_offset;
247 break;
248 case PROGRAM_OUTPUT:
249 /* Eventually we may need to handle multiple
250 * rendering targets... */
251 reg = dest.Index;
252 break;
253 default:
254 ERROR("Can't handle dest.File %x\n", dest.File);
255 reg = 0x0;
256 break;
257 }
258 return reg;
259 }
260
261 static void emit_tex(struct r500_fragment_program *fp,
262 struct prog_instruction *fpi, int dest, int counter)
263 {
264 int hwsrc, hwdest;
265 GLuint mask;
266
267 mask = fpi->DstReg.WriteMask << 11;
268 hwsrc = make_src(fp, fpi->SrcReg[0]);
269
270 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
271 hwdest = get_temp(fp, 0);
272 } else {
273 hwdest = dest;
274 }
275
276 fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask
277 | R500_INST_TEX_SEM_WAIT;
278
279 fp->inst[counter].inst1 = R500_TEX_ID(fpi->TexSrcUnit)
280 | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
281
282 if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX)
283 fp->inst[counter].inst1 |= R500_TEX_UNSCALED;
284
285 switch (fpi->Opcode) {
286 case OPCODE_KIL:
287 fp->inst[counter].inst1 |= R500_TEX_INST_TEXKILL;
288 break;
289 case OPCODE_TEX:
290 fp->inst[counter].inst1 |= R500_TEX_INST_LD;
291 break;
292 case OPCODE_TXB:
293 fp->inst[counter].inst1 |= R500_TEX_INST_LODBIAS;
294 break;
295 case OPCODE_TXP:
296 fp->inst[counter].inst1 |= R500_TEX_INST_PROJ;
297 break;
298 default:
299 ERROR("emit_tex can't handle opcode %x\n", fpi->Opcode);
300 }
301
302 fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(hwsrc)
303 | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0]))
304 /* | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G
305 | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A */
306 | R500_TEX_DST_ADDR(hwdest)
307 | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
308 | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
309
310 fp->inst[counter].inst3 = 0x0;
311 fp->inst[counter].inst4 = 0x0;
312 fp->inst[counter].inst5 = 0x0;
313
314 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
315 counter++;
316 fp->inst[counter].inst0 = R500_INST_TYPE_OUT
317 | R500_INST_TEX_SEM_WAIT | (mask << 4);
318 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
319 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
320 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
321 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
322 | R500_ALU_RGB_SEL_B_SRC0
323 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB)
324 | R500_ALU_RGB_OMOD_DISABLE;
325 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
326 | R500_ALPHA_ADDRD(dest)
327 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_ALPHA_SWIZ_A_A)
328 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_ALPHA_SWIZ_A_A)
329 | R500_ALPHA_OMOD_DISABLE;
330 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
331 | R500_ALU_RGBA_ADDRD(dest)
332 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
333 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
334 }
335 }
336
337 static void emit_alu(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi) {
338 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
339 fp->inst[counter].inst0 = R500_INST_TYPE_OUT;
340
341 if (fpi->DstReg.Index == FRAG_RESULT_COLR)
342 fp->inst[counter].inst0 |= (fpi->DstReg.WriteMask << 15);
343
344 if (fpi->DstReg.Index == FRAG_RESULT_DEPR)
345 fp->inst[counter].inst4 = R500_ALPHA_W_OMASK;
346 } else {
347 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
348 /* pixel_mask */
349 | (fpi->DstReg.WriteMask << 11);
350 }
351
352 fp->inst[counter].inst0 |= R500_INST_TEX_SEM_WAIT;
353
354 /* Ideally, we shouldn't have to explicitly clear memory here! */
355 fp->inst[counter].inst1 = 0x0;
356 fp->inst[counter].inst2 = 0x0;
357 fp->inst[counter].inst3 = 0x0;
358 fp->inst[counter].inst5 = 0x0;
359 }
360
361 static void emit_mov(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi, GLuint src_reg, GLuint swizzle, GLuint dest) {
362 /* The r3xx shader uses MAD to implement MOV. We are using CMP, since
363 * it is technically more accurate and recommended by ATI/AMD. */
364 emit_alu(fp, counter, fpi);
365 fp->inst[counter].inst1 = R500_RGB_ADDR0(src_reg);
366 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg);
367 /* 0x1FF is 9 bits, size of an RGB swizzle. */
368 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
369 | MAKE_SWIZ_RGB_A((swizzle & 0x1ff))
370 | R500_ALU_RGB_SEL_B_SRC0
371 | MAKE_SWIZ_RGB_B((swizzle & 0x1ff))
372 | R500_ALU_RGB_OMOD_DISABLE;
373 fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP
374 | R500_ALPHA_ADDRD(dest)
375 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(GET_SWZ(swizzle, 3))
376 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(GET_SWZ(swizzle, 3))
377 | R500_ALPHA_OMOD_DISABLE;
378 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
379 | R500_ALU_RGBA_ADDRD(dest)
380 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
381 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
382 }
383
384 static void emit_mad(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi, int one, int two, int three) {
385 /* Note: This code was all Corbin's. Corbin is a rather hackish coder.
386 * If you can make it pretty or fast, please do so! */
387 emit_alu(fp, counter, fpi);
388 /* Common MAD stuff */
389 fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD
390 | R500_ALPHA_ADDRD(make_dest(fp, fpi->DstReg));
391 fp->inst[counter].inst5 |= R500_ALU_RGBA_OP_MAD
392 | R500_ALU_RGBA_ADDRD(make_dest(fp, fpi->DstReg));
393 switch (one) {
394 case 0:
395 case 1:
396 case 2:
397 fp->inst[counter].inst1 |= R500_RGB_ADDR0(make_src(fp, fpi->SrcReg[one]));
398 fp->inst[counter].inst2 |= R500_ALPHA_ADDR0(make_src(fp, fpi->SrcReg[one]));
399 fp->inst[counter].inst3 |= R500_ALU_RGB_SEL_A_SRC0
400 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[one]));
401 fp->inst[counter].inst4 |= R500_ALPHA_SEL_A_SRC0
402 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[one]));
403 break;
404 case R500_SWIZZLE_ZERO:
405 fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO);
406 fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO);
407 break;
408 case R500_SWIZZLE_ONE:
409 fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE);
410 fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE);
411 break;
412 default:
413 ERROR("Bad src index in emit_mad: %d\n", one);
414 break;
415 }
416 switch (two) {
417 case 0:
418 case 1:
419 case 2:
420 fp->inst[counter].inst1 |= R500_RGB_ADDR1(make_src(fp, fpi->SrcReg[two]));
421 fp->inst[counter].inst2 |= R500_ALPHA_ADDR1(make_src(fp, fpi->SrcReg[two]));
422 fp->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1
423 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[two]));
424 fp->inst[counter].inst4 |= R500_ALPHA_SEL_B_SRC1
425 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[two]));
426 break;
427 case R500_SWIZZLE_ZERO:
428 fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
429 fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO);
430 break;
431 case R500_SWIZZLE_ONE:
432 fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE);
433 fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE);
434 break;
435 default:
436 ERROR("Bad src index in emit_mad: %d\n", two);
437 break;
438 }
439 switch (three) {
440 case 0:
441 case 1:
442 case 2:
443 fp->inst[counter].inst1 |= R500_RGB_ADDR2(make_src(fp, fpi->SrcReg[three]));
444 fp->inst[counter].inst2 |= R500_ALPHA_ADDR2(make_src(fp, fpi->SrcReg[three]));
445 fp->inst[counter].inst5 |= R500_ALU_RGBA_SEL_C_SRC2
446 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[three]))
447 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
448 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[three]));
449 break;
450 case R500_SWIZZLE_ZERO:
451 fp->inst[counter].inst5 |= MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
452 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
453 break;
454 case R500_SWIZZLE_ONE:
455 fp->inst[counter].inst5 |= MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ONE)
456 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ONE);
457 break;
458 default:
459 ERROR("Bad src index in emit_mad: %d\n", three);
460 break;
461 }
462 }
463
464 static GLboolean parse_program(struct r500_fragment_program *fp)
465 {
466 struct gl_fragment_program *mp = &fp->mesa_program;
467 const struct prog_instruction *inst = mp->Base.Instructions;
468 struct prog_instruction *fpi;
469 GLuint src[3], dest = 0;
470 int temp_swiz, counter = 0;
471
472 if (!inst || inst[0].Opcode == OPCODE_END) {
473 ERROR("The program is empty!\n");
474 return GL_FALSE;
475 }
476
477 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
478
479 if (fpi->Opcode != OPCODE_KIL) {
480 dest = make_dest(fp, fpi->DstReg);
481 }
482
483 switch (fpi->Opcode) {
484 case OPCODE_ABS:
485 emit_mov(fp, counter, fpi, make_src(fp, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest);
486 fp->inst[counter].inst3 |= R500_ALU_RGB_MOD_A_ABS
487 | R500_ALU_RGB_MOD_B_ABS;
488 fp->inst[counter].inst4 |= R500_ALPHA_MOD_A_ABS
489 | R500_ALPHA_MOD_B_ABS;
490 break;
491 case OPCODE_ADD:
492 /* Variation on MAD: 1*src0+src1 */
493 emit_mad(fp, counter, fpi, R500_SWIZZLE_ONE, 0, 1);
494 break;
495 case OPCODE_CMP:
496 /* This inst's selects need to be swapped as follows:
497 * 0 -> C ; 1 -> B ; 2 -> A */
498 src[0] = make_src(fp, fpi->SrcReg[0]);
499 src[1] = make_src(fp, fpi->SrcReg[1]);
500 src[2] = make_src(fp, fpi->SrcReg[2]);
501 emit_alu(fp, counter, fpi);
502 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[2])
503 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[0]);
504 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[2])
505 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[0]);
506 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
507 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[2]))
508 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
509 fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP
510 | R500_ALPHA_ADDRD(dest)
511 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[2]))
512 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
513 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
514 | R500_ALU_RGBA_ADDRD(dest)
515 | R500_ALU_RGBA_SEL_C_SRC2
516 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0]))
517 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
518 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0]));
519 break;
520 case OPCODE_COS:
521 src[0] = make_src(fp, fpi->SrcReg[0]);
522 src[1] = emit_const4fv(fp, RCP_2PI);
523 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
524 | (R500_WRITEMASK_ARGB << 11);
525 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
526 | R500_RGB_ADDR1(src[1]);
527 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
528 | R500_ALPHA_ADDR1(src[1]);
529 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
530 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
531 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
532 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
533 | R500_ALPHA_ADDRD(get_temp(fp, 0))
534 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
535 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
536 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
537 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
538 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
539 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
540 counter++;
541 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
542 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
543 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
544 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
545 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB);
546 fp->inst[counter].inst4 = R500_ALPHA_OP_FRC
547 | R500_ALPHA_ADDRD(get_temp(fp, 1))
548 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A;
549 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
550 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1));
551 counter++;
552 emit_alu(fp, counter, fpi);
553 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
554 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
555 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0;
556 fp->inst[counter].inst4 |= R500_ALPHA_OP_COS
557 | R500_ALPHA_ADDRD(dest)
558 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
559 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
560 | R500_ALU_RGBA_ADDRD(dest);
561 break;
562 case OPCODE_DP3:
563 src[0] = make_src(fp, fpi->SrcReg[0]);
564 src[1] = make_src(fp, fpi->SrcReg[1]);
565 emit_alu(fp, counter, fpi);
566 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
567 | R500_RGB_ADDR1(src[1]);
568 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
569 | R500_ALPHA_ADDR1(src[1]);
570 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
571 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
572 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
573 fp->inst[counter].inst4 |= R500_ALPHA_OP_DP
574 | R500_ALPHA_ADDRD(dest)
575 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
576 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
577 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP3
578 | R500_ALU_RGBA_ADDRD(dest);
579 break;
580 case OPCODE_DP4:
581 src[0] = make_src(fp, fpi->SrcReg[0]);
582 src[1] = make_src(fp, fpi->SrcReg[1]);
583 /* Based on DP3 */
584 emit_alu(fp, counter, fpi);
585 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
586 | R500_RGB_ADDR1(src[1]);
587 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
588 | R500_ALPHA_ADDR1(src[1]);
589 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
590 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
591 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
592 fp->inst[counter].inst4 |= R500_ALPHA_OP_DP
593 | R500_ALPHA_ADDRD(dest)
594 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
595 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
596 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
597 | R500_ALU_RGBA_ADDRD(dest);
598 break;
599 case OPCODE_DPH:
600 src[0] = make_src(fp, fpi->SrcReg[0]);
601 src[1] = make_src(fp, fpi->SrcReg[1]);
602 /* Based on DP3 */
603 emit_alu(fp, counter, fpi);
604 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
605 | R500_RGB_ADDR1(src[1]);
606 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
607 | R500_ALPHA_ADDR1(src[1]);
608 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
609 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
610 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
611 fp->inst[counter].inst4 |= R500_ALPHA_OP_DP
612 | R500_ALPHA_ADDRD(dest)
613 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
614 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
615 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
616 | R500_ALU_RGBA_ADDRD(dest);
617 break;
618 case OPCODE_DST:
619 src[0] = make_src(fp, fpi->SrcReg[0]);
620 src[1] = make_src(fp, fpi->SrcReg[1]);
621 /* [1, src0.y*src1.y, src0.z, src1.w]
622 * So basically MUL with lotsa swizzling. */
623 emit_alu(fp, counter, fpi);
624 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
625 | R500_RGB_ADDR1(src[1]);
626 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
627 | R500_ALPHA_ADDR1(src[1]);
628 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
629 | R500_ALU_RGB_SEL_B_SRC1;
630 /* Select [1, y, z, 1] */
631 temp_swiz = (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x7) | R500_SWIZZLE_ONE;
632 fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(temp_swiz);
633 /* Select [1, y, 1, w] */
634 temp_swiz = (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x1c7) | R500_SWIZZLE_ONE | (R500_SWIZZLE_ONE << 6);
635 fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(temp_swiz);
636 fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD
637 | R500_ALPHA_ADDRD(dest)
638 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
639 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
640 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
641 | R500_ALU_RGBA_ADDRD(dest)
642 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
643 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
644 break;
645 case OPCODE_EX2:
646 src[0] = make_src(fp, fpi->SrcReg[0]);
647 emit_alu(fp, counter, fpi);
648 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
649 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
650 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
651 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
652 fp->inst[counter].inst4 |= R500_ALPHA_OP_EX2
653 | R500_ALPHA_ADDRD(dest)
654 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
655 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
656 | R500_ALU_RGBA_ADDRD(dest);
657 break;
658 case OPCODE_FRC:
659 src[0] = make_src(fp, fpi->SrcReg[0]);
660 emit_alu(fp, counter, fpi);
661 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
662 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
663 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
664 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
665 fp->inst[counter].inst4 |= R500_ALPHA_OP_FRC
666 | R500_ALPHA_ADDRD(dest)
667 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
668 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
669 | R500_ALU_RGBA_ADDRD(dest);
670 break;
671 case OPCODE_LG2:
672 src[0] = make_src(fp, fpi->SrcReg[0]);
673 emit_alu(fp, counter, fpi);
674 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
675 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
676 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
677 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
678 fp->inst[counter].inst4 |= R500_ALPHA_OP_LN2
679 | R500_ALPHA_ADDRD(dest)
680 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
681 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
682 | R500_ALU_RGBA_ADDRD(dest);
683 break;
684 case OPCODE_LIT:
685 /* To be honest, I have no idea how I came up with the following.
686 * All I know is that it's based on the r3xx stuff, and was
687 * concieved with the help of NyQuil. Mmm, MyQuil. */
688
689 /* First instruction */
690 src[0] = make_src(fp, fpi->SrcReg[0]);
691 src[1] = emit_const4fv(fp, LIT);
692 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
693 | (R500_WRITEMASK_ARG << 11);
694 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
695 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
696 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
697 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
698 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
699 fp->inst[counter].inst4 = R500_ALPHA_OP_MAX
700 | R500_ALPHA_ADDRD(get_temp(fp, 0))
701 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
702 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
703 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
704 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0));
705 counter++;
706 /* Second instruction */
707 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_AB << 11);
708 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
709 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)) | R500_ALPHA_ADDR1(src[1]);
710 /* Select [z, z, z, y] */
711 temp_swiz = 2 | (2 << 3) | (2 << 6);
712 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
713 | MAKE_SWIZ_RGB_A(temp_swiz)
714 | R500_ALU_RGB_SEL_B_SRC0
715 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
716 fp->inst[counter].inst4 = R500_ALPHA_OP_LN2
717 | R500_ALPHA_ADDRD(get_temp(fp, 0))
718 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_G;
719 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN
720 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0));
721 counter++;
722 /* Third instruction */
723 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_AG << 11);
724 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
725 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
726 /* Select [x, x, x, z] */
727 temp_swiz = 0;
728 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
729 | MAKE_SWIZ_RGB_A(temp_swiz)
730 | R500_ALU_RGB_SEL_B_SRC0
731 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE);
732 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
733 | R500_ALPHA_ADDRD(get_temp(fp, 1))
734 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_B
735 | R500_ALPHA_SEL_B_SRC0 | R500_ALPHA_SWIZ_B_A;
736 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
737 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1))
738 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
739 | R500_ALU_RGBA_A_SWIZ_0;
740 counter++;
741 /* Fourth instruction */
742 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_AR << 11);
743 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
744 fp->inst[counter].inst3 = MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
745 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE);
746 fp->inst[counter].inst4 = R500_ALPHA_OP_EX2
747 | R500_ALPHA_ADDRD(get_temp(fp, 0))
748 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A;
749 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
750 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
751 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
752 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
753 counter++;
754 /* Fifth instruction */
755 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_AB << 11);
756 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
757 /* Select [w, w, w] */
758 temp_swiz = 3 | (3 << 3) | (3 << 6);
759 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
760 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO)
761 | R500_ALU_RGB_SEL_B_SRC0
762 | MAKE_SWIZ_RGB_B(temp_swiz);
763 fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD
764 | R500_ALPHA_ADDRD(get_temp(fp, 0))
765 | R500_ALPHA_SWIZ_A_1
766 | R500_ALPHA_SWIZ_B_1;
767 /* Select [-y, -y, -y] */
768 temp_swiz = 1 | (1 << 3) | (1 << 6);
769 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
770 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
771 | MAKE_SWIZ_RGBA_C(temp_swiz)
772 | R500_ALU_RGBA_MOD_C_NEG
773 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
774 counter++;
775 /* Final instruction */
776 emit_mov(fp, counter, fpi, get_temp(fp, 0), 1672, dest);
777 break;
778 case OPCODE_LRP:
779 /* src0 * src1 + INV(src0) * src2
780 * 1) MUL src0, src1, temp
781 * 2) PRE 1-src0; MAD srcp, src2, temp */
782 src[0] = make_src(fp, fpi->SrcReg[0]);
783 src[1] = make_src(fp, fpi->SrcReg[1]);
784 src[2] = make_src(fp, fpi->SrcReg[2]);
785 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
786 | R500_INST_NOP | (R500_WRITEMASK_ARGB << 11);
787 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
788 | R500_RGB_ADDR1(src[1]);
789 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
790 | R500_ALPHA_ADDR1(src[1]);
791 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
792 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
793 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
794 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
795 | R500_ALPHA_ADDRD(get_temp(fp, 0))
796 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
797 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
798 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
799 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
800 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
801 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
802 counter++;
803 emit_alu(fp, counter, fpi);
804 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
805 | R500_RGB_ADDR1(src[2])
806 | R500_RGB_ADDR2(get_temp(fp, 0))
807 | R500_RGB_SRCP_OP_1_MINUS_RGB0;
808 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
809 | R500_ALPHA_ADDR1(src[2])
810 | R500_ALPHA_ADDR2(get_temp(fp, 0))
811 | R500_ALPHA_SRCP_OP_1_MINUS_A0;
812 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRCP
813 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
814 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
815 fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD
816 | R500_ALPHA_ADDRD(dest)
817 | R500_ALPHA_SEL_A_SRCP | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
818 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
819 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
820 | R500_ALU_RGBA_ADDRD(dest)
821 | R500_ALU_RGBA_SEL_C_SRC2 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2]))
822 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
823 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2]));
824 break;
825 case OPCODE_MAD:
826 emit_mad(fp, counter, fpi, 0, 1, 2);
827 break;
828 case OPCODE_MAX:
829 src[0] = make_src(fp, fpi->SrcReg[0]);
830 src[1] = make_src(fp, fpi->SrcReg[1]);
831 emit_alu(fp, counter, fpi);
832 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
833 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
834 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
835 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
836 | R500_ALU_RGB_SEL_B_SRC1
837 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
838 fp->inst[counter].inst4 |= R500_ALPHA_OP_MAX
839 | R500_ALPHA_ADDRD(dest)
840 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
841 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
842 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
843 | R500_ALU_RGBA_ADDRD(dest);
844 break;
845 case OPCODE_MIN:
846 src[0] = make_src(fp, fpi->SrcReg[0]);
847 src[1] = make_src(fp, fpi->SrcReg[1]);
848 emit_alu(fp, counter, fpi);
849 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
850 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
851 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
852 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
853 | R500_ALU_RGB_SEL_B_SRC1
854 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
855 fp->inst[counter].inst4 |= R500_ALPHA_OP_MIN
856 | R500_ALPHA_ADDRD(dest)
857 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
858 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
859 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN
860 | R500_ALU_RGBA_ADDRD(dest);
861 break;
862 case OPCODE_MOV:
863 emit_mov(fp, counter, fpi, make_src(fp, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest);
864 break;
865 case OPCODE_MUL:
866 /* Variation on MAD: src0*src1+0 */
867 emit_mad(fp, counter, fpi, 0, 1, R500_SWIZZLE_ZERO);
868 break;
869 case OPCODE_POW:
870 /* POW(a,b) = EX2(LN2(a)*b) */
871 src[0] = make_src(fp, fpi->SrcReg[0]);
872 src[1] = make_src(fp, fpi->SrcReg[1]);
873 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
874 | (R500_WRITEMASK_ARGB << 11);
875 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
876 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
877 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
878 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
879 fp->inst[counter].inst4 = R500_ALPHA_OP_LN2
880 | R500_ALPHA_ADDRD(get_temp(fp, 0))
881 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
882 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
883 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0));
884 counter++;
885 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
886 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0))
887 | R500_RGB_ADDR1(src[1]);
888 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0))
889 | R500_ALPHA_ADDR1(src[1]);
890 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
891 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
892 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
893 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
894 | R500_ALPHA_ADDRD(get_temp(fp, 1))
895 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
896 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
897 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
898 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1))
899 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
900 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
901 counter++;
902 emit_alu(fp, counter, fpi);
903 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
904 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
905 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
906 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
907 fp->inst[counter].inst4 |= R500_ALPHA_OP_EX2
908 | R500_ALPHA_ADDRD(dest)
909 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
910 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
911 | R500_ALU_RGBA_ADDRD(dest);
912 break;
913 case OPCODE_RCP:
914 src[0] = make_src(fp, fpi->SrcReg[0]);
915 emit_alu(fp, counter, fpi);
916 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
917 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
918 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
919 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
920 fp->inst[counter].inst4 |= R500_ALPHA_OP_RCP
921 | R500_ALPHA_ADDRD(dest)
922 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
923 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
924 | R500_ALU_RGBA_ADDRD(dest);
925 break;
926 case OPCODE_RSQ:
927 src[0] = make_src(fp, fpi->SrcReg[0]);
928 emit_alu(fp, counter, fpi);
929 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
930 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
931 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
932 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
933 fp->inst[counter].inst4 |= R500_ALPHA_OP_RSQ
934 | R500_ALPHA_ADDRD(dest)
935 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
936 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
937 | R500_ALU_RGBA_ADDRD(dest);
938 break;
939 case OPCODE_SCS:
940 src[0] = make_src(fp, fpi->SrcReg[0]);
941 src[1] = emit_const4fv(fp, RCP_2PI);
942 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
943 | (R500_WRITEMASK_ARGB << 11);
944 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
945 | R500_RGB_ADDR1(src[1]);
946 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
947 | R500_ALPHA_ADDR1(src[1]);
948 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
949 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
950 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
951 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
952 | R500_ALPHA_ADDRD(get_temp(fp, 0))
953 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
954 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
955 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
956 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
957 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
958 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
959 counter++;
960 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
961 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
962 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
963 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
964 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB);
965 fp->inst[counter].inst4 = R500_ALPHA_OP_FRC
966 | R500_ALPHA_ADDRD(get_temp(fp, 1))
967 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A;
968 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
969 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1));
970 counter++;
971 /* Do a cosine, then a sine, masking out the channels we want to protect. */
972 /* Cosine only goes in R (x) channel. */
973 fpi->DstReg.WriteMask = 0x1;
974 emit_alu(fp, counter, fpi);
975 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
976 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
977 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
978 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
979 fp->inst[counter].inst4 |= R500_ALPHA_OP_COS
980 | R500_ALPHA_ADDRD(dest)
981 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
982 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
983 | R500_ALU_RGBA_ADDRD(dest);
984 counter++;
985 /* Sine only goes in G (y) channel. */
986 fpi->DstReg.WriteMask = 0x2;
987 emit_alu(fp, counter, fpi);
988 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
989 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
990 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
991 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
992 fp->inst[counter].inst4 |= R500_ALPHA_OP_SIN
993 | R500_ALPHA_ADDRD(dest)
994 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
995 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
996 | R500_ALU_RGBA_ADDRD(dest);
997 break;
998 case OPCODE_SGE:
999 src[0] = make_src(fp, fpi->SrcReg[0]);
1000 src[1] = make_src(fp, fpi->SrcReg[1]);
1001 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
1002 | (R500_WRITEMASK_ARGB << 11);
1003 fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0])
1004 | R500_RGB_ADDR2(src[1]);
1005 fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0])
1006 | R500_ALPHA_ADDR2(src[1]);
1007 fp->inst[counter].inst3 = /* 1 */
1008 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
1009 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
1010 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
1011 | R500_ALPHA_ADDRD(get_temp(fp, 0))
1012 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
1013 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
1014 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
1015 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
1016 | R500_ALU_RGBA_SEL_C_SRC2
1017 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
1018 | R500_ALU_RGBA_MOD_C_NEG
1019 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
1020 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]))
1021 | R500_ALU_RGBA_ALPHA_MOD_C_NEG;
1022 counter++;
1023 /* This inst's selects need to be swapped as follows:
1024 * 0 -> C ; 1 -> B ; 2 -> A */
1025 emit_alu(fp, counter, fpi);
1026 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
1027 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
1028 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
1029 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
1030 | R500_ALU_RGB_SEL_B_SRC0
1031 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
1032 fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP
1033 | R500_ALPHA_ADDRD(dest)
1034 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
1035 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO);
1036 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
1037 | R500_ALU_RGBA_ADDRD(dest)
1038 | R500_ALU_RGBA_SEL_C_SRC0
1039 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB)
1040 | R500_ALU_RGBA_ALPHA_SEL_C_SRC0
1041 | R500_ALU_RGBA_A_SWIZ_A;
1042 break;
1043 case OPCODE_SIN:
1044 src[0] = make_src(fp, fpi->SrcReg[0]);
1045 src[1] = emit_const4fv(fp, RCP_2PI);
1046 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
1047 | (R500_WRITEMASK_ARGB << 11);
1048 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
1049 | R500_RGB_ADDR1(src[1]);
1050 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
1051 | R500_ALPHA_ADDR1(src[1]);
1052 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
1053 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
1054 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
1055 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
1056 | R500_ALPHA_ADDRD(get_temp(fp, 0))
1057 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
1058 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
1059 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
1060 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
1061 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
1062 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
1063 counter++;
1064 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
1065 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
1066 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
1067 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
1068 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB);
1069 fp->inst[counter].inst4 = R500_ALPHA_OP_FRC
1070 | R500_ALPHA_ADDRD(get_temp(fp, 1))
1071 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A;
1072 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
1073 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1));
1074 counter++;
1075 emit_alu(fp, counter, fpi);
1076 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
1077 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
1078 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0;
1079 fp->inst[counter].inst4 |= R500_ALPHA_OP_SIN
1080 | R500_ALPHA_ADDRD(dest)
1081 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
1082 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
1083 | R500_ALU_RGBA_ADDRD(dest);
1084 break;
1085 case OPCODE_SLT:
1086 src[0] = make_src(fp, fpi->SrcReg[0]);
1087 src[1] = make_src(fp, fpi->SrcReg[1]);
1088 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
1089 | (R500_WRITEMASK_ARGB << 11);
1090 fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0])
1091 | R500_RGB_ADDR2(src[1]);
1092 fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0])
1093 | R500_ALPHA_ADDR2(src[1]);
1094 fp->inst[counter].inst3 = /* 1 */
1095 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
1096 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
1097 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
1098 | R500_ALPHA_ADDRD(get_temp(fp, 0))
1099 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
1100 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
1101 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
1102 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
1103 | R500_ALU_RGBA_SEL_C_SRC2
1104 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
1105 | R500_ALU_RGBA_MOD_C_NEG
1106 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
1107 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]))
1108 | R500_ALU_RGBA_ALPHA_MOD_C_NEG;
1109 counter++;
1110 /* This inst's selects need to be swapped as follows:
1111 * 0 -> C ; 1 -> B ; 2 -> A */
1112 emit_alu(fp, counter, fpi);
1113 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
1114 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
1115 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
1116 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO)
1117 | R500_ALU_RGB_SEL_B_SRC0
1118 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE);
1119 fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP
1120 | R500_ALPHA_ADDRD(dest)
1121 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO)
1122 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE);
1123 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
1124 | R500_ALU_RGBA_ADDRD(dest)
1125 | R500_ALU_RGBA_SEL_C_SRC0
1126 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB)
1127 | R500_ALU_RGBA_ALPHA_SEL_C_SRC0
1128 | R500_ALU_RGBA_A_SWIZ_A;
1129 break;
1130 case OPCODE_SUB:
1131 /* Variation on MAD: 1*src0-src1 */
1132 fpi->SrcReg[1].NegateBase = 0xF; /* NEG_XYZW */
1133 emit_mad(fp, counter, fpi, R500_SWIZZLE_ONE, 0, 1);
1134 break;
1135 case OPCODE_SWZ:
1136 /* TODO: The rarer negation masks! */
1137 emit_mov(fp, counter, fpi, make_src(fp, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest);
1138 break;
1139 case OPCODE_KIL:
1140 case OPCODE_TEX:
1141 case OPCODE_TXB:
1142 case OPCODE_TXP:
1143 emit_tex(fp, fpi, dest, counter);
1144 if (fpi->DstReg.File == PROGRAM_OUTPUT)
1145 counter++;
1146 break;
1147 default:
1148 ERROR("unknown fpi->Opcode %s\n", _mesa_opcode_string(fpi->Opcode));
1149 break;
1150 }
1151
1152 /* Finishing touches */
1153 if (fpi->SaturateMode == SATURATE_ZERO_ONE) {
1154 fp->inst[counter].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP;
1155 }
1156
1157 counter++;
1158
1159 if (fp->error)
1160 return GL_FALSE;
1161
1162 }
1163
1164 /* Finish him! (If it's an ALU/OUT instruction...) */
1165 if ((fp->inst[counter-1].inst0 & 0x3) == 1) {
1166 fp->inst[counter-1].inst0 |= R500_INST_LAST;
1167 } else {
1168 /* We still need to put an output inst, right? */
1169 WARN_ONCE("Final FP instruction is not an OUT.\n");
1170 }
1171
1172 fp->cs->nrslots = counter;
1173
1174 fp->max_temp_idx++;
1175
1176 return GL_TRUE;
1177 }
1178
1179 static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp)
1180 {
1181 struct r300_pfs_compile_state *cs = NULL;
1182 struct gl_fragment_program *mp = &fp->mesa_program;
1183 struct prog_instruction *fpi;
1184 GLuint InputsRead = mp->Base.InputsRead;
1185 GLuint temps_used = 0;
1186 int i, j;
1187
1188 /* New compile, reset tracking data */
1189 fp->optimization =
1190 driQueryOptioni(&r300->radeon.optionCache, "fp_optimization");
1191 fp->translated = GL_FALSE;
1192 fp->error = GL_FALSE;
1193 fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile);
1194 fp->cur_node = 0;
1195 fp->first_node_has_tex = 0;
1196 fp->const_nr = 0;
1197 /* Size of pixel stack, plus 1. */
1198 fp->max_temp_idx = 1;
1199 /* Temp register offset. */
1200 fp->temp_reg_offset = 0;
1201 fp->node[0].alu_end = -1;
1202 fp->node[0].tex_end = -1;
1203
1204 _mesa_memset(cs, 0, sizeof(*fp->cs));
1205 for (i = 0; i < PFS_MAX_ALU_INST; i++) {
1206 for (j = 0; j < 3; j++) {
1207 cs->slot[i].vsrc[j] = SRC_CONST;
1208 cs->slot[i].ssrc[j] = SRC_CONST;
1209 }
1210 }
1211
1212 /* Work out what temps the Mesa inputs correspond to, this must match
1213 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
1214 * configures itself based on the fragprog's InputsRead
1215 *
1216 * NOTE: this depends on get_hw_temp() allocating registers in order,
1217 * starting from register 0, so we're just going to do that instead.
1218 */
1219
1220 /* Texcoords come first */
1221 for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) {
1222 if (InputsRead & (FRAG_BIT_TEX0 << i)) {
1223 cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0;
1224 cs->inputs[FRAG_ATTRIB_TEX0 + i].reg =
1225 fp->temp_reg_offset;
1226 fp->temp_reg_offset++;
1227 }
1228 }
1229 InputsRead &= ~FRAG_BITS_TEX_ANY;
1230
1231 /* fragment position treated as a texcoord */
1232 if (InputsRead & FRAG_BIT_WPOS) {
1233 cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0;
1234 cs->inputs[FRAG_ATTRIB_WPOS].reg =
1235 fp->temp_reg_offset;
1236 fp->temp_reg_offset++;
1237 }
1238 InputsRead &= ~FRAG_BIT_WPOS;
1239
1240 /* Then primary colour */
1241 if (InputsRead & FRAG_BIT_COL0) {
1242 cs->inputs[FRAG_ATTRIB_COL0].refcount = 0;
1243 cs->inputs[FRAG_ATTRIB_COL0].reg =
1244 fp->temp_reg_offset;
1245 fp->temp_reg_offset++;
1246 }
1247 InputsRead &= ~FRAG_BIT_COL0;
1248
1249 /* Secondary color */
1250 if (InputsRead & FRAG_BIT_COL1) {
1251 cs->inputs[FRAG_ATTRIB_COL1].refcount = 0;
1252 cs->inputs[FRAG_ATTRIB_COL1].reg =
1253 fp->temp_reg_offset;
1254 fp->temp_reg_offset++;
1255 }
1256 InputsRead &= ~FRAG_BIT_COL1;
1257
1258 /* Anything else */
1259 if (InputsRead) {
1260 WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead);
1261 /* force read from hwreg 0 for now */
1262 for (i = 0; i < 32; i++)
1263 if (InputsRead & (1 << i))
1264 cs->inputs[i].reg = 0;
1265 }
1266
1267 if (!mp->Base.Instructions) {
1268 ERROR("No instructions found in program, going to go die now.\n");
1269 return;
1270 }
1271
1272 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
1273 for (i = 0; i < 3; i++) {
1274 if (fpi->SrcReg[i].File == PROGRAM_TEMPORARY) {
1275 if (fpi->SrcReg[i].Index >= temps_used)
1276 temps_used = fpi->SrcReg[i].Index + 1;
1277 }
1278 }
1279 }
1280
1281 cs->temp_in_use = temps_used + 1;
1282
1283 fp->max_temp_idx = fp->temp_reg_offset + cs->temp_in_use;
1284
1285 if (RADEON_DEBUG & DEBUG_PIXEL)
1286 fprintf(stderr, "FP temp indices: fp->max_temp_idx: %d cs->temp_in_use: %d\n", fp->max_temp_idx, cs->temp_in_use);
1287 }
1288
1289 static void update_params(struct r500_fragment_program *fp)
1290 {
1291 struct gl_fragment_program *mp = &fp->mesa_program;
1292
1293 /* Ask Mesa nicely to fill in ParameterValues for us */
1294 if (mp->Base.Parameters)
1295 _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters);
1296 }
1297
1298 static void dumb_shader(struct r500_fragment_program *fp)
1299 {
1300 fp->inst[0].inst0 = R500_INST_TYPE_TEX
1301 | R500_INST_TEX_SEM_WAIT
1302 | R500_INST_RGB_WMASK_R
1303 | R500_INST_RGB_WMASK_G
1304 | R500_INST_RGB_WMASK_B
1305 | R500_INST_ALPHA_WMASK
1306 | R500_INST_RGB_CLAMP
1307 | R500_INST_ALPHA_CLAMP;
1308 fp->inst[0].inst1 = R500_TEX_ID(0)
1309 | R500_TEX_INST_LD
1310 | R500_TEX_SEM_ACQUIRE
1311 | R500_TEX_IGNORE_UNCOVERED;
1312 fp->inst[0].inst2 = R500_TEX_SRC_ADDR(0)
1313 | R500_TEX_SRC_S_SWIZ_R
1314 | R500_TEX_SRC_T_SWIZ_G
1315 | R500_TEX_DST_ADDR(0)
1316 | R500_TEX_DST_R_SWIZ_R
1317 | R500_TEX_DST_G_SWIZ_G
1318 | R500_TEX_DST_B_SWIZ_B
1319 | R500_TEX_DST_A_SWIZ_A;
1320 fp->inst[0].inst3 = R500_DX_ADDR(0)
1321 | R500_DX_S_SWIZ_R
1322 | R500_DX_T_SWIZ_R
1323 | R500_DX_R_SWIZ_R
1324 | R500_DX_Q_SWIZ_R
1325 | R500_DY_ADDR(0)
1326 | R500_DY_S_SWIZ_R
1327 | R500_DY_T_SWIZ_R
1328 | R500_DY_R_SWIZ_R
1329 | R500_DY_Q_SWIZ_R;
1330 fp->inst[0].inst4 = 0x0;
1331 fp->inst[0].inst5 = 0x0;
1332
1333 fp->inst[1].inst0 = R500_INST_TYPE_OUT |
1334 R500_INST_TEX_SEM_WAIT |
1335 R500_INST_LAST |
1336 R500_INST_RGB_OMASK_R |
1337 R500_INST_RGB_OMASK_G |
1338 R500_INST_RGB_OMASK_B |
1339 R500_INST_ALPHA_OMASK;
1340 fp->inst[1].inst1 = R500_RGB_ADDR0(0) |
1341 R500_RGB_ADDR1(0) |
1342 R500_RGB_ADDR1_CONST |
1343 R500_RGB_ADDR2(0) |
1344 R500_RGB_ADDR2_CONST |
1345 R500_RGB_SRCP_OP_1_MINUS_2RGB0;
1346 fp->inst[1].inst2 = R500_ALPHA_ADDR0(0) |
1347 R500_ALPHA_ADDR1(0) |
1348 R500_ALPHA_ADDR1_CONST |
1349 R500_ALPHA_ADDR2(0) |
1350 R500_ALPHA_ADDR2_CONST |
1351 R500_ALPHA_SRCP_OP_1_MINUS_2A0;
1352 fp->inst[1].inst3 = R500_ALU_RGB_SEL_A_SRC0 |
1353 R500_ALU_RGB_R_SWIZ_A_R |
1354 R500_ALU_RGB_G_SWIZ_A_G |
1355 R500_ALU_RGB_B_SWIZ_A_B |
1356 R500_ALU_RGB_SEL_B_SRC0 |
1357 R500_ALU_RGB_R_SWIZ_B_1 |
1358 R500_ALU_RGB_B_SWIZ_B_1 |
1359 R500_ALU_RGB_G_SWIZ_B_1;
1360 fp->inst[1].inst4 = R500_ALPHA_OP_MAD |
1361 R500_ALPHA_SWIZ_A_A |
1362 R500_ALPHA_SWIZ_B_1;
1363 fp->inst[1].inst5 = R500_ALU_RGBA_OP_MAD |
1364 R500_ALU_RGBA_R_SWIZ_0 |
1365 R500_ALU_RGBA_G_SWIZ_0 |
1366 R500_ALU_RGBA_B_SWIZ_0 |
1367 R500_ALU_RGBA_A_SWIZ_0;
1368
1369 fp->cs->nrslots = 2;
1370 fp->translated = GL_TRUE;
1371 }
1372
1373 void r500TranslateFragmentShader(r300ContextPtr r300,
1374 struct r500_fragment_program *fp)
1375 {
1376
1377 struct r300_pfs_compile_state *cs = NULL;
1378
1379 if (!fp->translated) {
1380
1381 init_program(r300, fp);
1382 cs = fp->cs;
1383
1384 if (parse_program(fp) == GL_FALSE) {
1385 ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n");
1386 dumb_shader(fp);
1387 fp->inst_offset = 0;
1388 fp->inst_end = cs->nrslots - 1;
1389 return;
1390 }
1391 fp->inst_offset = 0;
1392 fp->inst_end = cs->nrslots - 1;
1393
1394 fp->translated = GL_TRUE;
1395 if (RADEON_DEBUG & DEBUG_PIXEL) {
1396 fprintf(stderr, "Mesa program:\n");
1397 fprintf(stderr, "-------------\n");
1398 _mesa_print_program(&fp->mesa_program.Base);
1399 fflush(stdout);
1400 dump_program(fp);
1401 }
1402
1403
1404 r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM);
1405 }
1406
1407 update_params(fp);
1408
1409 }
1410
1411 static char *toswiz(int swiz_val) {
1412 switch(swiz_val) {
1413 case 0: return "R";
1414 case 1: return "G";
1415 case 2: return "B";
1416 case 3: return "A";
1417 case 4: return "0";
1418 case 5: return "1/2";
1419 case 6: return "1";
1420 case 7: return "U";
1421 }
1422 return NULL;
1423 }
1424
1425 static char *toop(int op_val)
1426 {
1427 char *str;
1428 switch (op_val) {
1429 case 0: str = "MAD"; break;
1430 case 1: str = "DP3"; break;
1431 case 2: str = "DP4"; break;
1432 case 3: str = "D2A"; break;
1433 case 4: str = "MIN"; break;
1434 case 5: str = "MAX"; break;
1435 case 6: str = "Reserved"; break;
1436 case 7: str = "CND"; break;
1437 case 8: str = "CMP"; break;
1438 case 9: str = "FRC"; break;
1439 case 10: str = "SOP"; break;
1440 case 11: str = "MDH"; break;
1441 case 12: str = "MDV"; break;
1442 }
1443 return str;
1444 }
1445
1446 static char *to_alpha_op(int op_val)
1447 {
1448 char *str = NULL;
1449 switch (op_val) {
1450 case 0: str = "MAD"; break;
1451 case 1: str = "DP"; break;
1452 case 2: str = "MIN"; break;
1453 case 3: str = "MAX"; break;
1454 case 4: str = "Reserved"; break;
1455 case 5: str = "CND"; break;
1456 case 6: str = "CMP"; break;
1457 case 7: str = "FRC"; break;
1458 case 8: str = "EX2"; break;
1459 case 9: str = "LN2"; break;
1460 case 10: str = "RCP"; break;
1461 case 11: str = "RSQ"; break;
1462 case 12: str = "SIN"; break;
1463 case 13: str = "COS"; break;
1464 case 14: str = "MDH"; break;
1465 case 15: str = "MDV"; break;
1466 }
1467 return str;
1468 }
1469
1470 static char *to_mask(int val)
1471 {
1472 char *str = NULL;
1473 switch(val) {
1474 case 0: str = "NONE"; break;
1475 case 1: str = "R"; break;
1476 case 2: str = "G"; break;
1477 case 3: str = "RG"; break;
1478 case 4: str = "B"; break;
1479 case 5: str = "RB"; break;
1480 case 6: str = "GB"; break;
1481 case 7: str = "RGB"; break;
1482 case 8: str = "A"; break;
1483 case 9: str = "AR"; break;
1484 case 10: str = "AG"; break;
1485 case 11: str = "ARG"; break;
1486 case 12: str = "AB"; break;
1487 case 13: str = "ARB"; break;
1488 case 14: str = "AGB"; break;
1489 case 15: str = "ARGB"; break;
1490 }
1491 return str;
1492 }
1493
1494 static char *to_texop(int val)
1495 {
1496 switch(val) {
1497 case 0: return "NOP";
1498 case 1: return "LD";
1499 case 2: return "TEXKILL";
1500 case 3: return "PROJ";
1501 case 4: return "LODBIAS";
1502 case 5: return "LOD";
1503 case 6: return "DXDY";
1504 }
1505 return NULL;
1506 }
1507
1508 static void dump_program(struct r500_fragment_program *fp)
1509 {
1510 int pc = 0;
1511 int n;
1512 uint32_t inst;
1513 uint32_t inst0;
1514 char *str = NULL;
1515
1516 for (n = 0; n < fp->inst_end+1; n++) {
1517 inst0 = inst = fp->inst[n].inst0;
1518 fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst);
1519 switch(inst & 0x3) {
1520 case R500_INST_TYPE_ALU: str = "ALU"; break;
1521 case R500_INST_TYPE_OUT: str = "OUT"; break;
1522 case R500_INST_TYPE_FC: str = "FC"; break;
1523 case R500_INST_TYPE_TEX: str = "TEX"; break;
1524 };
1525 fprintf(stderr,"%s %s %s %s %s ", str,
1526 inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "",
1527 inst & R500_INST_LAST ? "LAST" : "",
1528 inst & R500_INST_NOP ? "NOP" : "",
1529 inst & R500_INST_ALU_WAIT ? "ALU WAIT" : "");
1530 fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf),
1531 to_mask((inst >> 15) & 0xf));
1532
1533 switch(inst0 & 0x3) {
1534 case 0:
1535 case 1:
1536 fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", fp->inst[n].inst1);
1537 inst = fp->inst[n].inst1;
1538
1539 fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1540 inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
1541 (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
1542 (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
1543 (inst >> 30));
1544
1545 fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", fp->inst[n].inst2);
1546 inst = fp->inst[n].inst2;
1547 fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1548 inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
1549 (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
1550 (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
1551 (inst >> 30));
1552 fprintf(stderr,"\t3 RGB_INST: 0x%08x:", fp->inst[n].inst3);
1553 inst = fp->inst[n].inst3;
1554 fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n",
1555 (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7),
1556 (inst >> 11) & 0x3,
1557 (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),
1558 (inst >> 24) & 0x3);
1559
1560
1561 fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", fp->inst[n].inst4);
1562 inst = fp->inst[n].inst4;
1563 fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d w:%d\n", to_alpha_op(inst & 0xf),
1564 (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
1565 (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,
1566 (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3,
1567 (inst >> 31) & 0x1);
1568
1569 fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", fp->inst[n].inst5);
1570 inst = fp->inst[n].inst5;
1571 fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf),
1572 (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
1573 (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7),
1574 (inst >> 23) & 0x3,
1575 (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3);
1576 break;
1577 case 2:
1578 break;
1579 case 3:
1580 inst = fp->inst[n].inst1;
1581 fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf,
1582 to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "",
1583 (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED");
1584 inst = fp->inst[n].inst2;
1585 fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst,
1586 inst & 127, inst & (1<<7) ? "(rel)" : "",
1587 toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3),
1588 toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3),
1589 (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "",
1590 toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3),
1591 toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3));
1592
1593 fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", fp->inst[n].inst3);
1594 break;
1595 }
1596 fprintf(stderr,"\n");
1597 }
1598
1599 }