d22697ae90bbb95f3e2a47e9d91e8790ab5ae09a
[mesa.git] / src / mesa / drivers / dri / r300 / r500_fragprog.c
1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
5 * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
6 *
7 * All Rights Reserved.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining
10 * a copy of this software and associated documentation files (the
11 * "Software"), to deal in the Software without restriction, including
12 * without limitation the rights to use, copy, modify, merge, publish,
13 * distribute, sublicense, and/or sell copies of the Software, and to
14 * permit persons to whom the Software is furnished to do so, subject to
15 * the following conditions:
16 *
17 * The above copyright notice and this permission notice (including the
18 * next paragraph) shall be included in all copies or substantial
19 * portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 *
29 */
30
31 /**
32 * \file
33 *
34 * \author Ben Skeggs <darktama@iinet.net.au>
35 *
36 * \author Jerome Glisse <j.glisse@gmail.com>
37 *
38 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
39 *
40 * \todo Depth write, WPOS/FOGC inputs
41 *
42 * \todo FogOption
43 *
44 * \todo Verify results of opcodes for accuracy, I've only checked them in
45 * specific cases.
46 */
47
48 #include "glheader.h"
49 #include "macros.h"
50 #include "enums.h"
51 #include "shader/prog_instruction.h"
52 #include "shader/prog_parameter.h"
53 #include "shader/prog_print.h"
54
55 #include "r300_context.h"
56 #include "r500_fragprog.h"
57 #include "r300_reg.h"
58 #include "r300_state.h"
59
60 /*
61 * Useful macros and values
62 */
63 #define ERROR(fmt, args...) do { \
64 fprintf(stderr, "%s::%s(): " fmt "\n", \
65 __FILE__, __FUNCTION__, ##args); \
66 fp->error = GL_TRUE; \
67 } while(0)
68
69 #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
70
71 #define R500_US_NUM_TEMP_REGS 128
72 #define R500_US_NUM_CONST_REGS 256
73
74 /* "Register" flags */
75 #define REG_CONSTANT (1 << 8)
76 #define REG_SRC_REL (1 << 9)
77 #define REG_DEST_REL (1 << 7)
78
79 /* Swizzle tools */
80 #define R500_SWIZZLE_ZERO 4
81 #define R500_SWIZZLE_HALF 5
82 #define R500_SWIZZLE_ONE 6
83 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
84 #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
85 #define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6))
86 #define R500_SWIZ_MOD_NEG 1
87 #define R500_SWIZ_MOD_ABS 2
88 #define R500_SWIZ_MOD_NEG_ABS 3
89 /* Swizzles for inst2 */
90 #define MAKE_SWIZ_TEX_STRQ(x) (x << 8)
91 #define MAKE_SWIZ_TEX_RGBA(x) (x << 24)
92 /* Swizzles for inst3 */
93 #define MAKE_SWIZ_RGB_A(x) (x << 2)
94 #define MAKE_SWIZ_RGB_B(x) (x << 15)
95 /* Swizzles for inst4 */
96 #define MAKE_SWIZ_ALPHA_A(x) (x << 14)
97 #define MAKE_SWIZ_ALPHA_B(x) (x << 21)
98 /* Swizzle for inst5 */
99 #define MAKE_SWIZ_RGBA_C(x) (x << 14)
100 #define MAKE_SWIZ_ALPHA_C(x) (x << 27)
101
102 /* Writemasks */
103 #define R500_WRITEMASK_G 0x2
104 #define R500_WRITEMASK_B 0x4
105 #define R500_WRITEMASK_RGB 0x7
106 #define R500_WRITEMASK_A 0x8
107 #define R500_WRITEMASK_AR 0x9
108 #define R500_WRITEMASK_AG 0xA
109 #define R500_WRITEMASK_ARG 0xB
110 #define R500_WRITEMASK_AB 0xC
111 #define R500_WRITEMASK_ARGB 0xF
112
113 /* 1/(2pi), needed for quick modulus in trig insts
114 * Thanks to glisse for pointing out how to do it! */
115 static const GLfloat RCP_2PI[] = {0.15915494309189535,
116 0.15915494309189535,
117 0.15915494309189535,
118 0.15915494309189535};
119
120 static const GLfloat LIT[] = {127.999999,
121 127.999999,
122 127.999999,
123 -127.999999};
124
125 static void dump_program(struct r500_fragment_program *fp);
126
127 static inline GLuint make_rgb_swizzle(struct prog_src_register src) {
128 GLuint swiz = 0x0;
129 GLuint temp;
130 /* This could be optimized, but it should be plenty fast already. */
131 int i;
132 for (i = 0; i < 3; i++) {
133 temp = GET_SWZ(src.Swizzle, i);
134 /* Fix SWIZZLE_ONE */
135 if (temp == 5) temp++;
136 swiz |= temp << i*3;
137 }
138 if (src.NegateBase)
139 swiz |= (R500_SWIZ_MOD_NEG << 9);
140 return swiz;
141 }
142
143 static inline GLuint make_rgba_swizzle(GLuint src) {
144 GLuint swiz = 0x0;
145 GLuint temp;
146 int i;
147 for (i = 0; i < 4; i++) {
148 temp = GET_SWZ(src, i);
149 /* Fix SWIZZLE_ONE */
150 if (temp == 5) temp++;
151 swiz |= temp << i*3;
152 }
153 return swiz;
154 }
155
156 static inline GLuint make_alpha_swizzle(struct prog_src_register src) {
157 GLuint swiz = GET_SWZ(src.Swizzle, 3);
158
159 if (swiz == 5) swiz++;
160
161 if (src.NegateBase)
162 swiz |= (R500_SWIZ_MOD_NEG << 3);
163
164 return swiz;
165 }
166
167 static inline GLuint make_sop_swizzle(struct prog_src_register src) {
168 GLuint swiz = GET_SWZ(src.Swizzle, 0);
169
170 if (swiz == 5) swiz++;
171 return swiz;
172 }
173
174 static inline GLuint make_strq_swizzle(struct prog_src_register src) {
175 GLuint swiz = 0x0, temp = 0x0;
176 int i;
177 for (i = 0; i < 4; i++) {
178 temp = GET_SWZ(src.Swizzle, i) & 0x3;
179 swiz |= temp << i*2;
180 }
181 return swiz;
182 }
183
184 static int get_temp(struct r500_fragment_program *fp, int slot) {
185
186 COMPILE_STATE;
187
188 int r = fp->temp_reg_offset + cs->temp_in_use + slot;
189
190 if (r > R500_US_NUM_TEMP_REGS) {
191 ERROR("Too many temporary registers requested, can't compile!\n");
192 }
193
194 return r;
195 }
196
197 /* Borrowed verbatim from r300_fragprog since it hasn't changed. */
198 static GLuint emit_const4fv(struct r500_fragment_program *fp,
199 const GLfloat * cp)
200 {
201 GLuint reg = 0x0;
202 int index;
203
204 for (index = 0; index < fp->const_nr; ++index) {
205 if (fp->constant[index] == cp)
206 break;
207 }
208
209 if (index >= fp->const_nr) {
210 if (index >= R500_US_NUM_CONST_REGS) {
211 ERROR("Out of hw constants!\n");
212 return reg;
213 }
214
215 fp->const_nr++;
216 fp->constant[index] = cp;
217 }
218
219 reg = index | REG_CONSTANT;
220 return reg;
221 }
222
223 static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_register src) {
224 COMPILE_STATE;
225 GLuint reg;
226 switch (src.File) {
227 case PROGRAM_TEMPORARY:
228 reg = src.Index + fp->temp_reg_offset;
229 break;
230 case PROGRAM_INPUT:
231 reg = cs->inputs[src.Index].reg;
232 break;
233 case PROGRAM_LOCAL_PARAM:
234 reg = emit_const4fv(fp,
235 fp->mesa_program.Base.LocalParams[src.
236 Index]);
237 break;
238 case PROGRAM_ENV_PARAM:
239 reg = emit_const4fv(fp,
240 fp->ctx->FragmentProgram.Parameters[src.
241 Index]);
242 break;
243 case PROGRAM_STATE_VAR:
244 case PROGRAM_NAMED_PARAM:
245 case PROGRAM_CONSTANT:
246 reg = emit_const4fv(fp, fp->mesa_program.Base.Parameters->
247 ParameterValues[src.Index]);
248 break;
249 default:
250 ERROR("Can't handle src.File %x\n", src.File);
251 reg = 0x0;
252 break;
253 }
254 return reg;
255 }
256
257 static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_register dest) {
258 GLuint reg;
259 switch (dest.File) {
260 case PROGRAM_TEMPORARY:
261 reg = dest.Index + fp->temp_reg_offset;
262 break;
263 case PROGRAM_OUTPUT:
264 /* Eventually we may need to handle multiple
265 * rendering targets... */
266 reg = dest.Index;
267 break;
268 default:
269 ERROR("Can't handle dest.File %x\n", dest.File);
270 reg = 0x0;
271 break;
272 }
273 return reg;
274 }
275
276 static void emit_tex(struct r500_fragment_program *fp,
277 struct prog_instruction *fpi, int dest, int counter)
278 {
279 int hwsrc, hwdest;
280 GLuint mask;
281
282 mask = fpi->DstReg.WriteMask << 11;
283 hwsrc = make_src(fp, fpi->SrcReg[0]);
284
285 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
286 hwdest = get_temp(fp, 0);
287 } else {
288 hwdest = dest;
289 }
290
291 fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask
292 | R500_INST_TEX_SEM_WAIT;
293
294 fp->inst[counter].inst1 = R500_TEX_ID(fpi->TexSrcUnit)
295 | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
296
297 if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX)
298 fp->inst[counter].inst1 |= R500_TEX_UNSCALED;
299
300 switch (fpi->Opcode) {
301 case OPCODE_KIL:
302 fp->inst[counter].inst1 |= R500_TEX_INST_TEXKILL;
303 break;
304 case OPCODE_TEX:
305 fp->inst[counter].inst1 |= R500_TEX_INST_LD;
306 break;
307 case OPCODE_TXB:
308 fp->inst[counter].inst1 |= R500_TEX_INST_LODBIAS;
309 break;
310 case OPCODE_TXP:
311 fp->inst[counter].inst1 |= R500_TEX_INST_PROJ;
312 break;
313 default:
314 ERROR("emit_tex can't handle opcode %x\n", fpi->Opcode);
315 }
316
317 fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(hwsrc)
318 | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0]))
319 /* | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G
320 | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A */
321 | R500_TEX_DST_ADDR(hwdest)
322 | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
323 | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
324
325 fp->inst[counter].inst3 = 0x0;
326 fp->inst[counter].inst4 = 0x0;
327 fp->inst[counter].inst5 = 0x0;
328
329 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
330 counter++;
331 fp->inst[counter].inst0 = R500_INST_TYPE_OUT
332 | R500_INST_TEX_SEM_WAIT | (mask << 4);
333 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
334 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
335 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
336 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
337 | R500_ALU_RGB_SEL_B_SRC0
338 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB)
339 | R500_ALU_RGB_OMOD_DISABLE;
340 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
341 | R500_ALPHA_ADDRD(dest)
342 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_ALPHA_SWIZ_A_A)
343 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_ALPHA_SWIZ_A_A)
344 | R500_ALPHA_OMOD_DISABLE;
345 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
346 | R500_ALU_RGBA_ADDRD(dest)
347 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
348 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
349 }
350 }
351
352 static void emit_alu(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi) {
353 /* Ideally, we shouldn't have to explicitly clear memory here! */
354 fp->inst[counter].inst0 = 0x0;
355 fp->inst[counter].inst1 = 0x0;
356 fp->inst[counter].inst2 = 0x0;
357 fp->inst[counter].inst3 = 0x0;
358 fp->inst[counter].inst4 = 0x0;
359 fp->inst[counter].inst5 = 0x0;
360
361 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
362 fp->inst[counter].inst0 = R500_INST_TYPE_OUT;
363
364 if (fpi->DstReg.Index == FRAG_RESULT_COLR)
365 fp->inst[counter].inst0 |= (fpi->DstReg.WriteMask << 15);
366
367 if (fpi->DstReg.Index == FRAG_RESULT_DEPR)
368 fp->inst[counter].inst4 = R500_ALPHA_W_OMASK;
369 } else {
370 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
371 /* pixel_mask */
372 | (fpi->DstReg.WriteMask << 11);
373 }
374
375 fp->inst[counter].inst0 |= R500_INST_TEX_SEM_WAIT;
376 }
377
378 static void emit_mov(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi, GLuint src_reg, GLuint swizzle, GLuint dest) {
379 /* The r3xx shader uses MAD to implement MOV. We are using CMP, since
380 * it is technically more accurate and recommended by ATI/AMD. */
381 emit_alu(fp, counter, fpi);
382 fp->inst[counter].inst1 = R500_RGB_ADDR0(src_reg);
383 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg);
384 /* (De)mangle the swizzle from Mesa to R500. */
385 swizzle = make_rgba_swizzle(swizzle);
386 /* 0x1FF is 9 bits, size of an RGB swizzle. */
387 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
388 | MAKE_SWIZ_RGB_A((swizzle & 0x1ff))
389 | R500_ALU_RGB_SEL_B_SRC0
390 | MAKE_SWIZ_RGB_B((swizzle & 0x1ff))
391 | R500_ALU_RGB_OMOD_DISABLE;
392 fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP
393 | R500_ALPHA_ADDRD(dest)
394 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(GET_SWZ(swizzle, 3))
395 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(GET_SWZ(swizzle, 3))
396 | R500_ALPHA_OMOD_DISABLE;
397 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
398 | R500_ALU_RGBA_ADDRD(dest)
399 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
400 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
401 }
402
403 static void emit_mad(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi, int one, int two, int three) {
404 /* Note: This code was all Corbin's. Corbin is a rather hackish coder.
405 * If you can make it pretty or fast, please do so! */
406 emit_alu(fp, counter, fpi);
407 /* Common MAD stuff */
408 fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD
409 | R500_ALPHA_ADDRD(make_dest(fp, fpi->DstReg));
410 fp->inst[counter].inst5 |= R500_ALU_RGBA_OP_MAD
411 | R500_ALU_RGBA_ADDRD(make_dest(fp, fpi->DstReg));
412 switch (one) {
413 case 0:
414 case 1:
415 case 2:
416 fp->inst[counter].inst1 |= R500_RGB_ADDR0(make_src(fp, fpi->SrcReg[one]));
417 fp->inst[counter].inst2 |= R500_ALPHA_ADDR0(make_src(fp, fpi->SrcReg[one]));
418 fp->inst[counter].inst3 |= R500_ALU_RGB_SEL_A_SRC0
419 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[one]));
420 fp->inst[counter].inst4 |= R500_ALPHA_SEL_A_SRC0
421 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[one]));
422 break;
423 case R500_SWIZZLE_ZERO:
424 fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO);
425 fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO);
426 break;
427 case R500_SWIZZLE_ONE:
428 fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE);
429 fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE);
430 break;
431 default:
432 ERROR("Bad src index in emit_mad: %d\n", one);
433 break;
434 }
435 switch (two) {
436 case 0:
437 case 1:
438 case 2:
439 fp->inst[counter].inst1 |= R500_RGB_ADDR1(make_src(fp, fpi->SrcReg[two]));
440 fp->inst[counter].inst2 |= R500_ALPHA_ADDR1(make_src(fp, fpi->SrcReg[two]));
441 fp->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1
442 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[two]));
443 fp->inst[counter].inst4 |= R500_ALPHA_SEL_B_SRC1
444 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[two]));
445 break;
446 case R500_SWIZZLE_ZERO:
447 fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
448 fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO);
449 break;
450 case R500_SWIZZLE_ONE:
451 fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE);
452 fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE);
453 break;
454 default:
455 ERROR("Bad src index in emit_mad: %d\n", two);
456 break;
457 }
458 switch (three) {
459 case 0:
460 case 1:
461 case 2:
462 fp->inst[counter].inst1 |= R500_RGB_ADDR2(make_src(fp, fpi->SrcReg[three]));
463 fp->inst[counter].inst2 |= R500_ALPHA_ADDR2(make_src(fp, fpi->SrcReg[three]));
464 fp->inst[counter].inst5 |= R500_ALU_RGBA_SEL_C_SRC2
465 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[three]))
466 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
467 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[three]));
468 break;
469 case R500_SWIZZLE_ZERO:
470 fp->inst[counter].inst5 |= MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
471 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
472 break;
473 case R500_SWIZZLE_ONE:
474 fp->inst[counter].inst5 |= MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ONE)
475 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ONE);
476 break;
477 default:
478 ERROR("Bad src index in emit_mad: %d\n", three);
479 break;
480 }
481 }
482
483 static void emit_sop(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi, int opcode, GLuint src, GLuint dest) {
484 emit_alu(fp, counter, fpi);
485 fp->inst[counter].inst1 = R500_RGB_ADDR0(src);
486 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src);
487 fp->inst[counter].inst4 |= R500_ALPHA_ADDRD(dest)
488 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
489 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
490 | R500_ALU_RGBA_ADDRD(dest);
491 switch (opcode) {
492 case OPCODE_EX2:
493 fp->inst[counter].inst4 |= R500_ALPHA_OP_EX2;
494 break;
495 case OPCODE_LG2:
496 fp->inst[counter].inst4 |= R500_ALPHA_OP_LN2;
497 break;
498 case OPCODE_RCP:
499 fp->inst[counter].inst4 |= R500_ALPHA_OP_RCP;
500 break;
501 case OPCODE_RSQ:
502 fp->inst[counter].inst4 |= R500_ALPHA_OP_RSQ;
503 break;
504 default:
505 ERROR("Bad opcode in emit_sop: %d\n", opcode);
506 break;
507 }
508 }
509
510 static GLboolean parse_program(struct r500_fragment_program *fp)
511 {
512 struct gl_fragment_program *mp = &fp->mesa_program;
513 const struct prog_instruction *inst = mp->Base.Instructions;
514 struct prog_instruction *fpi;
515 GLuint src[3], dest = 0;
516 int temp_swiz, counter = 0;
517
518 if (!inst || inst[0].Opcode == OPCODE_END) {
519 ERROR("The program is empty!\n");
520 return GL_FALSE;
521 }
522
523 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
524
525 if (fpi->Opcode != OPCODE_KIL) {
526 dest = make_dest(fp, fpi->DstReg);
527 }
528
529 switch (fpi->Opcode) {
530 case OPCODE_ABS:
531 emit_mov(fp, counter, fpi, make_src(fp, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest);
532 fp->inst[counter].inst3 |= R500_ALU_RGB_MOD_A_ABS
533 | R500_ALU_RGB_MOD_B_ABS;
534 fp->inst[counter].inst4 |= R500_ALPHA_MOD_A_ABS
535 | R500_ALPHA_MOD_B_ABS;
536 break;
537 case OPCODE_ADD:
538 /* Variation on MAD: 1*src0+src1 */
539 emit_mad(fp, counter, fpi, R500_SWIZZLE_ONE, 0, 1);
540 break;
541 case OPCODE_CMP:
542 /* This inst's selects need to be swapped as follows:
543 * 0 -> C ; 1 -> B ; 2 -> A */
544 src[0] = make_src(fp, fpi->SrcReg[0]);
545 src[1] = make_src(fp, fpi->SrcReg[1]);
546 src[2] = make_src(fp, fpi->SrcReg[2]);
547 emit_alu(fp, counter, fpi);
548 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[2])
549 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[0]);
550 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[2])
551 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[0]);
552 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
553 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[2]))
554 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
555 fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP
556 | R500_ALPHA_ADDRD(dest)
557 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[2]))
558 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
559 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
560 | R500_ALU_RGBA_ADDRD(dest)
561 | R500_ALU_RGBA_SEL_C_SRC2
562 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0]))
563 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
564 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0]));
565 break;
566 case OPCODE_COS:
567 src[0] = make_src(fp, fpi->SrcReg[0]);
568 src[1] = emit_const4fv(fp, RCP_2PI);
569 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
570 | (R500_WRITEMASK_ARGB << 11);
571 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
572 | R500_RGB_ADDR1(src[1]);
573 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
574 | R500_ALPHA_ADDR1(src[1]);
575 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
576 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
577 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
578 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
579 | R500_ALPHA_ADDRD(get_temp(fp, 0))
580 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
581 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
582 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
583 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
584 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
585 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
586 counter++;
587 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
588 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
589 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
590 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
591 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB);
592 fp->inst[counter].inst4 = R500_ALPHA_OP_FRC
593 | R500_ALPHA_ADDRD(get_temp(fp, 1))
594 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A;
595 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
596 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1));
597 counter++;
598 emit_alu(fp, counter, fpi);
599 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
600 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
601 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0;
602 fp->inst[counter].inst4 |= R500_ALPHA_OP_COS
603 | R500_ALPHA_ADDRD(dest)
604 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
605 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
606 | R500_ALU_RGBA_ADDRD(dest);
607 break;
608 case OPCODE_DP3:
609 src[0] = make_src(fp, fpi->SrcReg[0]);
610 src[1] = make_src(fp, fpi->SrcReg[1]);
611 emit_alu(fp, counter, fpi);
612 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
613 | R500_RGB_ADDR1(src[1]);
614 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
615 | R500_ALPHA_ADDR1(src[1]);
616 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
617 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
618 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
619 fp->inst[counter].inst4 |= R500_ALPHA_OP_DP
620 | R500_ALPHA_ADDRD(dest)
621 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
622 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
623 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP3
624 | R500_ALU_RGBA_ADDRD(dest);
625 break;
626 case OPCODE_DP4:
627 src[0] = make_src(fp, fpi->SrcReg[0]);
628 src[1] = make_src(fp, fpi->SrcReg[1]);
629 /* Based on DP3 */
630 emit_alu(fp, counter, fpi);
631 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
632 | R500_RGB_ADDR1(src[1]);
633 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
634 | R500_ALPHA_ADDR1(src[1]);
635 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
636 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
637 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
638 fp->inst[counter].inst4 |= R500_ALPHA_OP_DP
639 | R500_ALPHA_ADDRD(dest)
640 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
641 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
642 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
643 | R500_ALU_RGBA_ADDRD(dest);
644 break;
645 case OPCODE_DPH:
646 src[0] = make_src(fp, fpi->SrcReg[0]);
647 src[1] = make_src(fp, fpi->SrcReg[1]);
648 /* Based on DP3 */
649 emit_alu(fp, counter, fpi);
650 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
651 | R500_RGB_ADDR1(src[1]);
652 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
653 | R500_ALPHA_ADDR1(src[1]);
654 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
655 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
656 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
657 fp->inst[counter].inst4 |= R500_ALPHA_OP_DP
658 | R500_ALPHA_ADDRD(dest)
659 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
660 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
661 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
662 | R500_ALU_RGBA_ADDRD(dest);
663 break;
664 case OPCODE_DST:
665 src[0] = make_src(fp, fpi->SrcReg[0]);
666 src[1] = make_src(fp, fpi->SrcReg[1]);
667 /* [1, src0.y*src1.y, src0.z, src1.w]
668 * So basically MUL with lotsa swizzling. */
669 emit_alu(fp, counter, fpi);
670 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
671 | R500_RGB_ADDR1(src[1]);
672 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
673 | R500_ALPHA_ADDR1(src[1]);
674 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
675 | R500_ALU_RGB_SEL_B_SRC1;
676 /* Select [1, y, z, 1] */
677 temp_swiz = (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x7) | R500_SWIZZLE_ONE;
678 fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(temp_swiz);
679 /* Select [1, y, 1, w] */
680 temp_swiz = (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x1c7) | R500_SWIZZLE_ONE | (R500_SWIZZLE_ONE << 6);
681 fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(temp_swiz);
682 fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD
683 | R500_ALPHA_ADDRD(dest)
684 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
685 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
686 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
687 | R500_ALU_RGBA_ADDRD(dest)
688 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
689 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
690 break;
691 case OPCODE_EX2:
692 src[0] = make_src(fp, fpi->SrcReg[0]);
693 emit_sop(fp, counter, fpi, OPCODE_EX2, src[0], dest);
694 break;
695 case OPCODE_FRC:
696 src[0] = make_src(fp, fpi->SrcReg[0]);
697 emit_alu(fp, counter, fpi);
698 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
699 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
700 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
701 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
702 fp->inst[counter].inst4 |= R500_ALPHA_OP_FRC
703 | R500_ALPHA_ADDRD(dest)
704 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
705 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
706 | R500_ALU_RGBA_ADDRD(dest);
707 break;
708 case OPCODE_LG2:
709 src[0] = make_src(fp, fpi->SrcReg[0]);
710 emit_sop(fp, counter, fpi, OPCODE_LG2, src[0], dest);
711 break;
712 case OPCODE_LIT:
713 /* To be honest, I have no idea how I came up with the following.
714 * All I know is that it's based on the r3xx stuff, and was
715 * concieved with the help of NyQuil. Mmm, MyQuil. */
716
717 /* First instruction */
718 src[0] = make_src(fp, fpi->SrcReg[0]);
719 src[1] = emit_const4fv(fp, LIT);
720 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
721 | (R500_WRITEMASK_ARG << 11);
722 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
723 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
724 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
725 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
726 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
727 fp->inst[counter].inst4 = R500_ALPHA_OP_MAX
728 | R500_ALPHA_ADDRD(get_temp(fp, 0))
729 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
730 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
731 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
732 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0));
733 counter++;
734 /* Second instruction */
735 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_AB << 11);
736 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)) | R500_RGB_ADDR1(src[1]);
737 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
738 /* Select [w, w, w, y] */
739 temp_swiz = 3 | (3 << 3) | (3 << 6);
740 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
741 | MAKE_SWIZ_RGB_A(temp_swiz)
742 | R500_ALU_RGB_SEL_B_SRC1
743 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
744 fp->inst[counter].inst4 = R500_ALPHA_OP_LN2
745 | R500_ALPHA_ADDRD(get_temp(fp, 0))
746 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_G;
747 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN
748 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0));
749 counter++;
750 /* Third instruction */
751 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_AG << 11);
752 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
753 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
754 /* Select [x, x, x, z] */
755 temp_swiz = 0;
756 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
757 | MAKE_SWIZ_RGB_A(temp_swiz)
758 | R500_ALU_RGB_SEL_B_SRC0
759 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE);
760 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
761 | R500_ALPHA_ADDRD(get_temp(fp, 1))
762 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
763 | R500_ALPHA_SEL_B_SRC0 | R500_ALPHA_SWIZ_B_B;
764 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
765 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1))
766 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
767 | R500_ALU_RGBA_A_SWIZ_0;
768 counter++;
769 /* Fourth instruction */
770 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_AR << 11);
771 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
772 fp->inst[counter].inst3 = MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
773 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE);
774 fp->inst[counter].inst4 = R500_ALPHA_OP_EX2
775 | R500_ALPHA_ADDRD(get_temp(fp, 0))
776 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A;
777 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
778 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
779 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
780 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
781 counter++;
782 /* Fifth instruction */
783 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_AB << 11);
784 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
785 /* Select [w, w, w] */
786 temp_swiz = 3 | (3 << 3) | (3 << 6);
787 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
788 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO)
789 | R500_ALU_RGB_SEL_B_SRC0
790 | MAKE_SWIZ_RGB_B(temp_swiz);
791 fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD
792 | R500_ALPHA_ADDRD(get_temp(fp, 0))
793 | R500_ALPHA_SWIZ_A_1
794 | R500_ALPHA_SWIZ_B_1;
795 /* Select [-y, -y, -y] */
796 temp_swiz = 1 | (1 << 3) | (1 << 6);
797 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
798 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
799 | MAKE_SWIZ_RGBA_C(temp_swiz)
800 | R500_ALU_RGBA_MOD_C_NEG
801 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
802 counter++;
803 /* Final instruction */
804 emit_mov(fp, counter, fpi, get_temp(fp, 0), SWIZZLE_NOOP, dest);
805 break;
806 case OPCODE_LRP:
807 /* src0 * src1 + INV(src0) * src2
808 * 1) MUL src0, src1, temp
809 * 2) PRE 1-src0; MAD srcp, src2, temp */
810 src[0] = make_src(fp, fpi->SrcReg[0]);
811 src[1] = make_src(fp, fpi->SrcReg[1]);
812 src[2] = make_src(fp, fpi->SrcReg[2]);
813 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
814 | R500_INST_NOP | (R500_WRITEMASK_ARGB << 11);
815 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
816 | R500_RGB_ADDR1(src[1]);
817 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
818 | R500_ALPHA_ADDR1(src[1]);
819 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
820 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
821 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
822 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
823 | R500_ALPHA_ADDRD(get_temp(fp, 0))
824 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
825 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
826 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
827 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
828 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
829 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
830 counter++;
831 emit_alu(fp, counter, fpi);
832 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
833 | R500_RGB_ADDR1(src[2])
834 | R500_RGB_ADDR2(get_temp(fp, 0))
835 | R500_RGB_SRCP_OP_1_MINUS_RGB0;
836 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
837 | R500_ALPHA_ADDR1(src[2])
838 | R500_ALPHA_ADDR2(get_temp(fp, 0))
839 | R500_ALPHA_SRCP_OP_1_MINUS_A0;
840 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRCP
841 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
842 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
843 fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD
844 | R500_ALPHA_ADDRD(dest)
845 | R500_ALPHA_SEL_A_SRCP | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
846 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
847 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
848 | R500_ALU_RGBA_ADDRD(dest)
849 | R500_ALU_RGBA_SEL_C_SRC2 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2]))
850 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
851 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2]));
852 break;
853 case OPCODE_MAD:
854 emit_mad(fp, counter, fpi, 0, 1, 2);
855 break;
856 case OPCODE_MAX:
857 src[0] = make_src(fp, fpi->SrcReg[0]);
858 src[1] = make_src(fp, fpi->SrcReg[1]);
859 emit_alu(fp, counter, fpi);
860 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
861 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
862 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
863 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
864 | R500_ALU_RGB_SEL_B_SRC1
865 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
866 fp->inst[counter].inst4 |= R500_ALPHA_OP_MAX
867 | R500_ALPHA_ADDRD(dest)
868 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
869 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
870 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
871 | R500_ALU_RGBA_ADDRD(dest);
872 break;
873 case OPCODE_MIN:
874 src[0] = make_src(fp, fpi->SrcReg[0]);
875 src[1] = make_src(fp, fpi->SrcReg[1]);
876 emit_alu(fp, counter, fpi);
877 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
878 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
879 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
880 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
881 | R500_ALU_RGB_SEL_B_SRC1
882 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
883 fp->inst[counter].inst4 |= R500_ALPHA_OP_MIN
884 | R500_ALPHA_ADDRD(dest)
885 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
886 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
887 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN
888 | R500_ALU_RGBA_ADDRD(dest);
889 break;
890 case OPCODE_MOV:
891 emit_mov(fp, counter, fpi, make_src(fp, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest);
892 break;
893 case OPCODE_MUL:
894 /* Variation on MAD: src0*src1+0 */
895 emit_mad(fp, counter, fpi, 0, 1, R500_SWIZZLE_ZERO);
896 break;
897 case OPCODE_POW:
898 /* POW(a,b) = EX2(LN2(a)*b) */
899 src[0] = make_src(fp, fpi->SrcReg[0]);
900 src[1] = make_src(fp, fpi->SrcReg[1]);
901 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
902 | (R500_WRITEMASK_ARGB << 11);
903 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
904 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
905 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
906 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
907 fp->inst[counter].inst4 = R500_ALPHA_OP_LN2
908 | R500_ALPHA_ADDRD(get_temp(fp, 0))
909 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
910 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
911 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0));
912 counter++;
913 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
914 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0))
915 | R500_RGB_ADDR1(src[1]);
916 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0))
917 | R500_ALPHA_ADDR1(src[1]);
918 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
919 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
920 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
921 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
922 | R500_ALPHA_ADDRD(get_temp(fp, 1))
923 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
924 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
925 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
926 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1))
927 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
928 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
929 counter++;
930 emit_alu(fp, counter, fpi);
931 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
932 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
933 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
934 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
935 fp->inst[counter].inst4 |= R500_ALPHA_OP_EX2
936 | R500_ALPHA_ADDRD(dest)
937 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
938 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
939 | R500_ALU_RGBA_ADDRD(dest);
940 break;
941 case OPCODE_RCP:
942 src[0] = make_src(fp, fpi->SrcReg[0]);
943 emit_sop(fp, counter, fpi, OPCODE_RCP, src[0], dest);
944 break;
945 case OPCODE_RSQ:
946 src[0] = make_src(fp, fpi->SrcReg[0]);
947 emit_sop(fp, counter, fpi, OPCODE_RSQ, src[0], dest);
948 break;
949 case OPCODE_SCS:
950 src[0] = make_src(fp, fpi->SrcReg[0]);
951 src[1] = emit_const4fv(fp, RCP_2PI);
952 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
953 | (R500_WRITEMASK_ARGB << 11);
954 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
955 | R500_RGB_ADDR1(src[1]);
956 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
957 | R500_ALPHA_ADDR1(src[1]);
958 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
959 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
960 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
961 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
962 | R500_ALPHA_ADDRD(get_temp(fp, 0))
963 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
964 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
965 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
966 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
967 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
968 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
969 counter++;
970 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
971 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
972 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
973 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
974 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB);
975 fp->inst[counter].inst4 = R500_ALPHA_OP_FRC
976 | R500_ALPHA_ADDRD(get_temp(fp, 1))
977 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A;
978 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
979 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1));
980 counter++;
981 /* Do a cosine, then a sine, masking out the channels we want to protect. */
982 /* Cosine only goes in R (x) channel. */
983 fpi->DstReg.WriteMask = 0x1;
984 emit_alu(fp, counter, fpi);
985 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
986 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
987 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
988 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
989 fp->inst[counter].inst4 |= R500_ALPHA_OP_COS
990 | R500_ALPHA_ADDRD(dest)
991 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
992 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
993 | R500_ALU_RGBA_ADDRD(dest);
994 counter++;
995 /* Sine only goes in G (y) channel. */
996 fpi->DstReg.WriteMask = 0x2;
997 emit_alu(fp, counter, fpi);
998 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
999 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
1000 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
1001 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
1002 fp->inst[counter].inst4 |= R500_ALPHA_OP_SIN
1003 | R500_ALPHA_ADDRD(dest)
1004 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
1005 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
1006 | R500_ALU_RGBA_ADDRD(dest);
1007 break;
1008 case OPCODE_SGE:
1009 src[0] = make_src(fp, fpi->SrcReg[0]);
1010 src[1] = make_src(fp, fpi->SrcReg[1]);
1011 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
1012 | (R500_WRITEMASK_ARGB << 11);
1013 fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0])
1014 | R500_RGB_ADDR2(src[1]);
1015 fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0])
1016 | R500_ALPHA_ADDR2(src[1]);
1017 fp->inst[counter].inst3 = /* 1 */
1018 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
1019 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
1020 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
1021 | R500_ALPHA_ADDRD(get_temp(fp, 0))
1022 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
1023 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
1024 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
1025 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
1026 | R500_ALU_RGBA_SEL_C_SRC2
1027 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
1028 | R500_ALU_RGBA_MOD_C_NEG
1029 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
1030 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]))
1031 | R500_ALU_RGBA_ALPHA_MOD_C_NEG;
1032 counter++;
1033 /* This inst's selects need to be swapped as follows:
1034 * 0 -> C ; 1 -> B ; 2 -> A */
1035 emit_alu(fp, counter, fpi);
1036 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
1037 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
1038 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
1039 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
1040 | R500_ALU_RGB_SEL_B_SRC0
1041 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
1042 fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP
1043 | R500_ALPHA_ADDRD(dest)
1044 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
1045 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO);
1046 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
1047 | R500_ALU_RGBA_ADDRD(dest)
1048 | R500_ALU_RGBA_SEL_C_SRC0
1049 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB)
1050 | R500_ALU_RGBA_ALPHA_SEL_C_SRC0
1051 | R500_ALU_RGBA_A_SWIZ_A;
1052 break;
1053 case OPCODE_SIN:
1054 src[0] = make_src(fp, fpi->SrcReg[0]);
1055 src[1] = emit_const4fv(fp, RCP_2PI);
1056 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
1057 | (R500_WRITEMASK_ARGB << 11);
1058 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
1059 | R500_RGB_ADDR1(src[1]);
1060 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
1061 | R500_ALPHA_ADDR1(src[1]);
1062 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
1063 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
1064 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
1065 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
1066 | R500_ALPHA_ADDRD(get_temp(fp, 0))
1067 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
1068 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
1069 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
1070 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
1071 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
1072 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
1073 counter++;
1074 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
1075 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
1076 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
1077 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
1078 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB);
1079 fp->inst[counter].inst4 = R500_ALPHA_OP_FRC
1080 | R500_ALPHA_ADDRD(get_temp(fp, 1))
1081 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A;
1082 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
1083 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1));
1084 counter++;
1085 emit_alu(fp, counter, fpi);
1086 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
1087 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
1088 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0;
1089 fp->inst[counter].inst4 |= R500_ALPHA_OP_SIN
1090 | R500_ALPHA_ADDRD(dest)
1091 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
1092 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
1093 | R500_ALU_RGBA_ADDRD(dest);
1094 break;
1095 case OPCODE_SLT:
1096 src[0] = make_src(fp, fpi->SrcReg[0]);
1097 src[1] = make_src(fp, fpi->SrcReg[1]);
1098 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
1099 | (R500_WRITEMASK_ARGB << 11);
1100 fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0])
1101 | R500_RGB_ADDR2(src[1]);
1102 fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0])
1103 | R500_ALPHA_ADDR2(src[1]);
1104 fp->inst[counter].inst3 = /* 1 */
1105 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
1106 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
1107 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
1108 | R500_ALPHA_ADDRD(get_temp(fp, 0))
1109 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
1110 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
1111 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
1112 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
1113 | R500_ALU_RGBA_SEL_C_SRC2
1114 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
1115 | R500_ALU_RGBA_MOD_C_NEG
1116 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
1117 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]))
1118 | R500_ALU_RGBA_ALPHA_MOD_C_NEG;
1119 counter++;
1120 /* This inst's selects need to be swapped as follows:
1121 * 0 -> C ; 1 -> B ; 2 -> A */
1122 emit_alu(fp, counter, fpi);
1123 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
1124 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
1125 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
1126 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO)
1127 | R500_ALU_RGB_SEL_B_SRC0
1128 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE);
1129 fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP
1130 | R500_ALPHA_ADDRD(dest)
1131 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO)
1132 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE);
1133 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
1134 | R500_ALU_RGBA_ADDRD(dest)
1135 | R500_ALU_RGBA_SEL_C_SRC0
1136 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB)
1137 | R500_ALU_RGBA_ALPHA_SEL_C_SRC0
1138 | R500_ALU_RGBA_A_SWIZ_A;
1139 break;
1140 case OPCODE_SUB:
1141 /* Variation on MAD: 1*src0-src1 */
1142 fpi->SrcReg[1].NegateBase = 0xF; /* NEG_XYZW */
1143 emit_mad(fp, counter, fpi, R500_SWIZZLE_ONE, 0, 1);
1144 break;
1145 case OPCODE_SWZ:
1146 /* TODO: The rarer negation masks! */
1147 emit_mov(fp, counter, fpi, make_src(fp, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest);
1148 break;
1149 case OPCODE_XPD:
1150 /* src0 * src1 - src1 * src0
1151 * 1) MUL temp.xyz, src0.yzx, src1.zxy
1152 * 2) MAD src0.zxy, src1.yzx, -temp.xyz */
1153 src[0] = make_src(fp, fpi->SrcReg[0]);
1154 src[1] = make_src(fp, fpi->SrcReg[1]);
1155 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
1156 | (R500_WRITEMASK_RGB << 11);
1157 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
1158 | R500_RGB_ADDR1(src[1]);
1159 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
1160 | R500_ALPHA_ADDR1(src[1]);
1161 /* Select [y, z, x] */
1162 temp_swiz = make_rgb_swizzle(fpi->SrcReg[0]);
1163 temp_swiz = (GET_SWZ(temp_swiz, 1) << 0) | (GET_SWZ(temp_swiz, 2) << 3) | (GET_SWZ(temp_swiz, 0) << 6);
1164 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
1165 | MAKE_SWIZ_RGB_A(temp_swiz);
1166 /* Select [z, x, y] */
1167 temp_swiz = make_rgb_swizzle(fpi->SrcReg[1]);
1168 temp_swiz = (GET_SWZ(temp_swiz, 2) << 0) | (GET_SWZ(temp_swiz, 0) << 3) | (GET_SWZ(temp_swiz, 1) << 6);
1169 fp->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1
1170 | MAKE_SWIZ_RGB_B(temp_swiz);
1171 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
1172 | R500_ALPHA_ADDRD(get_temp(fp, 0))
1173 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
1174 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
1175 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
1176 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
1177 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
1178 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
1179 counter++;
1180 emit_alu(fp, counter, fpi);
1181 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
1182 | R500_RGB_ADDR1(src[1])
1183 | R500_RGB_ADDR2(get_temp(fp, 0));
1184 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
1185 | R500_ALPHA_ADDR1(src[1])
1186 | R500_ALPHA_ADDR2(get_temp(fp, 0));
1187 /* Select [z, x, y] */
1188 temp_swiz = make_rgb_swizzle(fpi->SrcReg[0]);
1189 temp_swiz = (GET_SWZ(temp_swiz, 2) << 0) | (GET_SWZ(temp_swiz, 0) << 3) | (GET_SWZ(temp_swiz, 1) << 6);
1190 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
1191 | MAKE_SWIZ_RGB_A(temp_swiz);
1192 /* Select [y, z, x] */
1193 temp_swiz = make_rgb_swizzle(fpi->SrcReg[1]);
1194 temp_swiz = (GET_SWZ(temp_swiz, 1) << 0) | (GET_SWZ(temp_swiz, 2) << 3) | (GET_SWZ(temp_swiz, 0) << 6);
1195 fp->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1
1196 | MAKE_SWIZ_RGB_B(temp_swiz);
1197 fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD
1198 | R500_ALPHA_ADDRD(dest)
1199 | R500_ALPHA_SWIZ_A_1
1200 | R500_ALPHA_SWIZ_B_1;
1201 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
1202 | R500_ALU_RGBA_ADDRD(dest)
1203 | R500_ALU_RGBA_SEL_C_SRC2
1204 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB)
1205 | R500_ALU_RGBA_MOD_C_NEG
1206 | R500_ALU_RGBA_A_SWIZ_0;
1207 break;
1208 case OPCODE_KIL:
1209 case OPCODE_TEX:
1210 case OPCODE_TXB:
1211 case OPCODE_TXP:
1212 emit_tex(fp, fpi, dest, counter);
1213 if (fpi->DstReg.File == PROGRAM_OUTPUT)
1214 counter++;
1215 break;
1216 default:
1217 ERROR("unknown fpi->Opcode %s\n", _mesa_opcode_string(fpi->Opcode));
1218 break;
1219 }
1220
1221 /* Finishing touches */
1222 if (fpi->SaturateMode == SATURATE_ZERO_ONE) {
1223 fp->inst[counter].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP;
1224 }
1225
1226 counter++;
1227
1228 if (fp->error)
1229 return GL_FALSE;
1230
1231 }
1232
1233 /* Finish him! (If it's an ALU/OUT instruction...) */
1234 if ((fp->inst[counter-1].inst0 & 0x3) == 1) {
1235 fp->inst[counter-1].inst0 |= R500_INST_LAST;
1236 } else {
1237 /* We still need to put an output inst, right? */
1238 WARN_ONCE("Final FP instruction is not an OUT.\n");
1239 }
1240
1241 fp->cs->nrslots = counter;
1242
1243 fp->max_temp_idx++;
1244
1245 return GL_TRUE;
1246 }
1247
1248 static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp)
1249 {
1250 struct r300_pfs_compile_state *cs = NULL;
1251 struct gl_fragment_program *mp = &fp->mesa_program;
1252 struct prog_instruction *fpi;
1253 GLuint InputsRead = mp->Base.InputsRead;
1254 GLuint temps_used = 0;
1255 int i, j;
1256
1257 /* New compile, reset tracking data */
1258 fp->optimization =
1259 driQueryOptioni(&r300->radeon.optionCache, "fp_optimization");
1260 fp->translated = GL_FALSE;
1261 fp->error = GL_FALSE;
1262 fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile);
1263 fp->const_nr = 0;
1264 /* Size of pixel stack, plus 1. */
1265 fp->max_temp_idx = 1;
1266 /* Temp register offset. */
1267 fp->temp_reg_offset = 0;
1268
1269 _mesa_memset(cs, 0, sizeof(*fp->cs));
1270 for (i = 0; i < PFS_MAX_ALU_INST; i++) {
1271 for (j = 0; j < 3; j++) {
1272 cs->slot[i].vsrc[j] = SRC_CONST;
1273 cs->slot[i].ssrc[j] = SRC_CONST;
1274 }
1275 }
1276
1277 /* Work out what temps the Mesa inputs correspond to, this must match
1278 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
1279 * configures itself based on the fragprog's InputsRead
1280 *
1281 * NOTE: this depends on get_hw_temp() allocating registers in order,
1282 * starting from register 0, so we're just going to do that instead.
1283 */
1284
1285 /* Texcoords come first */
1286 for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) {
1287 if (InputsRead & (FRAG_BIT_TEX0 << i)) {
1288 cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0;
1289 cs->inputs[FRAG_ATTRIB_TEX0 + i].reg =
1290 fp->temp_reg_offset;
1291 fp->temp_reg_offset++;
1292 }
1293 }
1294 InputsRead &= ~FRAG_BITS_TEX_ANY;
1295
1296 /* fragment position treated as a texcoord */
1297 if (InputsRead & FRAG_BIT_WPOS) {
1298 cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0;
1299 cs->inputs[FRAG_ATTRIB_WPOS].reg =
1300 fp->temp_reg_offset;
1301 fp->temp_reg_offset++;
1302 }
1303 InputsRead &= ~FRAG_BIT_WPOS;
1304
1305 /* Then primary colour */
1306 if (InputsRead & FRAG_BIT_COL0) {
1307 cs->inputs[FRAG_ATTRIB_COL0].refcount = 0;
1308 cs->inputs[FRAG_ATTRIB_COL0].reg =
1309 fp->temp_reg_offset;
1310 fp->temp_reg_offset++;
1311 }
1312 InputsRead &= ~FRAG_BIT_COL0;
1313
1314 /* Secondary color */
1315 if (InputsRead & FRAG_BIT_COL1) {
1316 cs->inputs[FRAG_ATTRIB_COL1].refcount = 0;
1317 cs->inputs[FRAG_ATTRIB_COL1].reg =
1318 fp->temp_reg_offset;
1319 fp->temp_reg_offset++;
1320 }
1321 InputsRead &= ~FRAG_BIT_COL1;
1322
1323 /* Anything else */
1324 if (InputsRead) {
1325 WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead);
1326 /* force read from hwreg 0 for now */
1327 for (i = 0; i < 32; i++)
1328 if (InputsRead & (1 << i))
1329 cs->inputs[i].reg = 0;
1330 }
1331
1332 if (!mp->Base.Instructions) {
1333 ERROR("No instructions found in program, going to go die now.\n");
1334 return;
1335 }
1336
1337 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
1338 for (i = 0; i < 3; i++) {
1339 if (fpi->SrcReg[i].File == PROGRAM_TEMPORARY) {
1340 if (fpi->SrcReg[i].Index >= temps_used)
1341 temps_used = fpi->SrcReg[i].Index + 1;
1342 }
1343 }
1344 }
1345
1346 cs->temp_in_use = temps_used + 1;
1347
1348 fp->max_temp_idx = fp->temp_reg_offset + cs->temp_in_use;
1349
1350 if (RADEON_DEBUG & DEBUG_PIXEL)
1351 fprintf(stderr, "FP temp indices: fp->max_temp_idx: %d cs->temp_in_use: %d\n", fp->max_temp_idx, cs->temp_in_use);
1352 }
1353
1354 static void update_params(struct r500_fragment_program *fp)
1355 {
1356 struct gl_fragment_program *mp = &fp->mesa_program;
1357
1358 /* Ask Mesa nicely to fill in ParameterValues for us */
1359 if (mp->Base.Parameters)
1360 _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters);
1361 }
1362
1363 static void dumb_shader(struct r500_fragment_program *fp)
1364 {
1365 fp->inst[0].inst0 = R500_INST_TYPE_TEX
1366 | R500_INST_TEX_SEM_WAIT
1367 | R500_INST_RGB_WMASK_R
1368 | R500_INST_RGB_WMASK_G
1369 | R500_INST_RGB_WMASK_B
1370 | R500_INST_ALPHA_WMASK
1371 | R500_INST_RGB_CLAMP
1372 | R500_INST_ALPHA_CLAMP;
1373 fp->inst[0].inst1 = R500_TEX_ID(0)
1374 | R500_TEX_INST_LD
1375 | R500_TEX_SEM_ACQUIRE
1376 | R500_TEX_IGNORE_UNCOVERED;
1377 fp->inst[0].inst2 = R500_TEX_SRC_ADDR(0)
1378 | R500_TEX_SRC_S_SWIZ_R
1379 | R500_TEX_SRC_T_SWIZ_G
1380 | R500_TEX_DST_ADDR(0)
1381 | R500_TEX_DST_R_SWIZ_R
1382 | R500_TEX_DST_G_SWIZ_G
1383 | R500_TEX_DST_B_SWIZ_B
1384 | R500_TEX_DST_A_SWIZ_A;
1385 fp->inst[0].inst3 = R500_DX_ADDR(0)
1386 | R500_DX_S_SWIZ_R
1387 | R500_DX_T_SWIZ_R
1388 | R500_DX_R_SWIZ_R
1389 | R500_DX_Q_SWIZ_R
1390 | R500_DY_ADDR(0)
1391 | R500_DY_S_SWIZ_R
1392 | R500_DY_T_SWIZ_R
1393 | R500_DY_R_SWIZ_R
1394 | R500_DY_Q_SWIZ_R;
1395 fp->inst[0].inst4 = 0x0;
1396 fp->inst[0].inst5 = 0x0;
1397
1398 fp->inst[1].inst0 = R500_INST_TYPE_OUT |
1399 R500_INST_TEX_SEM_WAIT |
1400 R500_INST_LAST |
1401 R500_INST_RGB_OMASK_R |
1402 R500_INST_RGB_OMASK_G |
1403 R500_INST_RGB_OMASK_B |
1404 R500_INST_ALPHA_OMASK;
1405 fp->inst[1].inst1 = R500_RGB_ADDR0(0) |
1406 R500_RGB_ADDR1(0) |
1407 R500_RGB_ADDR1_CONST |
1408 R500_RGB_ADDR2(0) |
1409 R500_RGB_ADDR2_CONST |
1410 R500_RGB_SRCP_OP_1_MINUS_2RGB0;
1411 fp->inst[1].inst2 = R500_ALPHA_ADDR0(0) |
1412 R500_ALPHA_ADDR1(0) |
1413 R500_ALPHA_ADDR1_CONST |
1414 R500_ALPHA_ADDR2(0) |
1415 R500_ALPHA_ADDR2_CONST |
1416 R500_ALPHA_SRCP_OP_1_MINUS_2A0;
1417 fp->inst[1].inst3 = R500_ALU_RGB_SEL_A_SRC0 |
1418 R500_ALU_RGB_R_SWIZ_A_R |
1419 R500_ALU_RGB_G_SWIZ_A_G |
1420 R500_ALU_RGB_B_SWIZ_A_B |
1421 R500_ALU_RGB_SEL_B_SRC0 |
1422 R500_ALU_RGB_R_SWIZ_B_1 |
1423 R500_ALU_RGB_B_SWIZ_B_1 |
1424 R500_ALU_RGB_G_SWIZ_B_1;
1425 fp->inst[1].inst4 = R500_ALPHA_OP_MAD |
1426 R500_ALPHA_SWIZ_A_A |
1427 R500_ALPHA_SWIZ_B_1;
1428 fp->inst[1].inst5 = R500_ALU_RGBA_OP_MAD |
1429 R500_ALU_RGBA_R_SWIZ_0 |
1430 R500_ALU_RGBA_G_SWIZ_0 |
1431 R500_ALU_RGBA_B_SWIZ_0 |
1432 R500_ALU_RGBA_A_SWIZ_0;
1433
1434 fp->cs->nrslots = 2;
1435 fp->translated = GL_TRUE;
1436 }
1437
1438 void r500TranslateFragmentShader(r300ContextPtr r300,
1439 struct r500_fragment_program *fp)
1440 {
1441
1442 struct r300_pfs_compile_state *cs = NULL;
1443
1444 if (!fp->translated) {
1445
1446 init_program(r300, fp);
1447 cs = fp->cs;
1448
1449 if (parse_program(fp) == GL_FALSE) {
1450 ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n");
1451 dumb_shader(fp);
1452 fp->inst_offset = 0;
1453 fp->inst_end = cs->nrslots - 1;
1454 return;
1455 }
1456 fp->inst_offset = 0;
1457 fp->inst_end = cs->nrslots - 1;
1458
1459 fp->translated = GL_TRUE;
1460 if (RADEON_DEBUG & DEBUG_PIXEL) {
1461 fprintf(stderr, "Mesa program:\n");
1462 fprintf(stderr, "-------------\n");
1463 _mesa_print_program(&fp->mesa_program.Base);
1464 fflush(stdout);
1465 dump_program(fp);
1466 }
1467
1468
1469 r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM);
1470 }
1471
1472 update_params(fp);
1473
1474 }
1475
1476 static char *toswiz(int swiz_val) {
1477 switch(swiz_val) {
1478 case 0: return "R";
1479 case 1: return "G";
1480 case 2: return "B";
1481 case 3: return "A";
1482 case 4: return "0";
1483 case 5: return "1/2";
1484 case 6: return "1";
1485 case 7: return "U";
1486 }
1487 return NULL;
1488 }
1489
1490 static char *toop(int op_val)
1491 {
1492 char *str = NULL;
1493 switch (op_val) {
1494 case 0: str = "MAD"; break;
1495 case 1: str = "DP3"; break;
1496 case 2: str = "DP4"; break;
1497 case 3: str = "D2A"; break;
1498 case 4: str = "MIN"; break;
1499 case 5: str = "MAX"; break;
1500 case 6: str = "Reserved"; break;
1501 case 7: str = "CND"; break;
1502 case 8: str = "CMP"; break;
1503 case 9: str = "FRC"; break;
1504 case 10: str = "SOP"; break;
1505 case 11: str = "MDH"; break;
1506 case 12: str = "MDV"; break;
1507 }
1508 return str;
1509 }
1510
1511 static char *to_alpha_op(int op_val)
1512 {
1513 char *str = NULL;
1514 switch (op_val) {
1515 case 0: str = "MAD"; break;
1516 case 1: str = "DP"; break;
1517 case 2: str = "MIN"; break;
1518 case 3: str = "MAX"; break;
1519 case 4: str = "Reserved"; break;
1520 case 5: str = "CND"; break;
1521 case 6: str = "CMP"; break;
1522 case 7: str = "FRC"; break;
1523 case 8: str = "EX2"; break;
1524 case 9: str = "LN2"; break;
1525 case 10: str = "RCP"; break;
1526 case 11: str = "RSQ"; break;
1527 case 12: str = "SIN"; break;
1528 case 13: str = "COS"; break;
1529 case 14: str = "MDH"; break;
1530 case 15: str = "MDV"; break;
1531 }
1532 return str;
1533 }
1534
1535 static char *to_mask(int val)
1536 {
1537 char *str = NULL;
1538 switch(val) {
1539 case 0: str = "NONE"; break;
1540 case 1: str = "R"; break;
1541 case 2: str = "G"; break;
1542 case 3: str = "RG"; break;
1543 case 4: str = "B"; break;
1544 case 5: str = "RB"; break;
1545 case 6: str = "GB"; break;
1546 case 7: str = "RGB"; break;
1547 case 8: str = "A"; break;
1548 case 9: str = "AR"; break;
1549 case 10: str = "AG"; break;
1550 case 11: str = "ARG"; break;
1551 case 12: str = "AB"; break;
1552 case 13: str = "ARB"; break;
1553 case 14: str = "AGB"; break;
1554 case 15: str = "ARGB"; break;
1555 }
1556 return str;
1557 }
1558
1559 static char *to_texop(int val)
1560 {
1561 switch(val) {
1562 case 0: return "NOP";
1563 case 1: return "LD";
1564 case 2: return "TEXKILL";
1565 case 3: return "PROJ";
1566 case 4: return "LODBIAS";
1567 case 5: return "LOD";
1568 case 6: return "DXDY";
1569 }
1570 return NULL;
1571 }
1572
1573 static void dump_program(struct r500_fragment_program *fp)
1574 {
1575
1576 fprintf(stderr, "R500 Fragment Program:\n--------\n");
1577
1578 int n;
1579 uint32_t inst;
1580 uint32_t inst0;
1581 char *str = NULL;
1582
1583 if (fp->const_nr) {
1584 fprintf(stderr, "--------\nConstants:\n");
1585 for (n = 0; n < fp->const_nr; n++) {
1586 fprintf(stderr, "Constant %d: %f %f\n\t %f %f\n", n,
1587 fp->constant[n][0], fp->constant[n][1], fp->constant[n][2],
1588 fp->constant[n][3]);
1589 }
1590 fprintf(stderr, "--------\n");
1591 }
1592
1593 for (n = 0; n < fp->inst_end+1; n++) {
1594 inst0 = inst = fp->inst[n].inst0;
1595 fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst);
1596 switch(inst & 0x3) {
1597 case R500_INST_TYPE_ALU: str = "ALU"; break;
1598 case R500_INST_TYPE_OUT: str = "OUT"; break;
1599 case R500_INST_TYPE_FC: str = "FC"; break;
1600 case R500_INST_TYPE_TEX: str = "TEX"; break;
1601 };
1602 fprintf(stderr,"%s %s %s %s %s ", str,
1603 inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "",
1604 inst & R500_INST_LAST ? "LAST" : "",
1605 inst & R500_INST_NOP ? "NOP" : "",
1606 inst & R500_INST_ALU_WAIT ? "ALU WAIT" : "");
1607 fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf),
1608 to_mask((inst >> 15) & 0xf));
1609
1610 switch(inst0 & 0x3) {
1611 case 0:
1612 case 1:
1613 fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", fp->inst[n].inst1);
1614 inst = fp->inst[n].inst1;
1615
1616 fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1617 inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
1618 (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
1619 (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
1620 (inst >> 30));
1621
1622 fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", fp->inst[n].inst2);
1623 inst = fp->inst[n].inst2;
1624 fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1625 inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
1626 (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
1627 (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
1628 (inst >> 30));
1629 fprintf(stderr,"\t3 RGB_INST: 0x%08x:", fp->inst[n].inst3);
1630 inst = fp->inst[n].inst3;
1631 fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n",
1632 (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7),
1633 (inst >> 11) & 0x3,
1634 (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),
1635 (inst >> 24) & 0x3);
1636
1637
1638 fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", fp->inst[n].inst4);
1639 inst = fp->inst[n].inst4;
1640 fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d w:%d\n", to_alpha_op(inst & 0xf),
1641 (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
1642 (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,
1643 (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3,
1644 (inst >> 31) & 0x1);
1645
1646 fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", fp->inst[n].inst5);
1647 inst = fp->inst[n].inst5;
1648 fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf),
1649 (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
1650 (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7),
1651 (inst >> 23) & 0x3,
1652 (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3);
1653 break;
1654 case 2:
1655 break;
1656 case 3:
1657 inst = fp->inst[n].inst1;
1658 fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf,
1659 to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "",
1660 (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED");
1661 inst = fp->inst[n].inst2;
1662 fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst,
1663 inst & 127, inst & (1<<7) ? "(rel)" : "",
1664 toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3),
1665 toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3),
1666 (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "",
1667 toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3),
1668 toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3));
1669
1670 fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", fp->inst[n].inst3);
1671 break;
1672 }
1673 fprintf(stderr,"\n");
1674 }
1675
1676 }