r5xx: Initial (broken) OPCODE_LRP.
[mesa.git] / src / mesa / drivers / dri / r300 / r500_fragprog.c
1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * \file
30 *
31 * \author Ben Skeggs <darktama@iinet.net.au>
32 *
33 * \author Jerome Glisse <j.glisse@gmail.com>
34 *
35 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
36 *
37 * \todo Depth write, WPOS/FOGC inputs
38 *
39 * \todo FogOption
40 *
41 * \todo Verify results of opcodes for accuracy, I've only checked them in
42 * specific cases.
43 */
44
45 #include "glheader.h"
46 #include "macros.h"
47 #include "enums.h"
48 #include "shader/prog_instruction.h"
49 #include "shader/prog_parameter.h"
50 #include "shader/prog_print.h"
51
52 #include "r300_context.h"
53 #include "r500_fragprog.h"
54 #include "r300_reg.h"
55 #include "r300_state.h"
56
57 /*
58 * Useful macros and values
59 */
60 #define ERROR(fmt, args...) do { \
61 fprintf(stderr, "%s::%s(): " fmt "\n", \
62 __FILE__, __FUNCTION__, ##args); \
63 fp->error = GL_TRUE; \
64 } while(0)
65
66 #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
67
68 #define R500_US_NUM_TEMP_REGS 128
69 #define R500_US_NUM_CONST_REGS 256
70
71 /* "Register" flags */
72 #define REG_CONSTANT (1 << 8)
73 #define REG_SRC_REL (1 << 9)
74 #define REG_DEST_REL (1 << 7)
75
76 /* Swizzle tools */
77 #define R500_SWIZZLE_ZERO 4
78 #define R500_SWIZZLE_HALF 5
79 #define R500_SWIZZLE_ONE 6
80 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
81 #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
82 #define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6))
83 /* Swizzles for inst2 */
84 #define MAKE_SWIZ_TEX_STRQ(x) (x << 8)
85 #define MAKE_SWIZ_TEX_RGBA(x) (x << 24)
86 /* Swizzles for inst3 */
87 #define MAKE_SWIZ_RGB_A(x) (x << 2)
88 #define MAKE_SWIZ_RGB_B(x) (x << 15)
89 /* Swizzles for inst4 */
90 #define MAKE_SWIZ_ALPHA_A(x) (x << 14)
91 #define MAKE_SWIZ_ALPHA_B(x) (x << 21)
92 /* Swizzle for inst5 */
93 #define MAKE_SWIZ_RGBA_C(x) (x << 14)
94 #define MAKE_SWIZ_ALPHA_C(x) (x << 27)
95
96 /* Writemasks */
97 #define R500_WRITEMASK_ARGB 0xF
98
99 static void dump_program(struct r500_fragment_program *fp);
100
101 static inline GLuint make_rgb_swizzle(struct prog_src_register src) {
102 GLuint swiz = 0x0;
103 GLuint temp;
104 /* This could be optimized, but it should be plenty fast already. */
105 int i;
106 for (i = 0; i < 3; i++) {
107 temp = GET_SWZ(src.Swizzle, i);
108 /* Fix SWIZZLE_ONE */
109 if (temp == 5) temp++;
110 swiz += temp << i*3;
111 }
112 return swiz;
113 }
114
115 static inline GLuint make_alpha_swizzle(struct prog_src_register src) {
116 GLuint swiz = GET_SWZ(src.Swizzle, 3);
117
118 if (swiz == 5) swiz++;
119 return swiz;
120 }
121
122 static inline GLuint make_sop_swizzle(struct prog_src_register src) {
123 GLuint swiz = GET_SWZ(src.Swizzle, 0);
124
125 if (swiz == 5) swiz++;
126 return swiz;
127 }
128
129 static inline GLuint make_strq_swizzle(struct prog_src_register src) {
130 GLuint swiz = 0x0;
131 GLuint temp = src.Swizzle;
132 int i;
133 for (i = 0; i < 4; i++) {
134 swiz += (temp & 0x3) << i*2;
135 temp >>= 3;
136 }
137 return swiz;
138 }
139
140 static int get_temp(struct r500_fragment_program *fp, int slot) {
141
142 COMPILE_STATE;
143
144 int r = cs->temp_in_use + 1 + slot;
145
146 if (r > R500_US_NUM_TEMP_REGS) {
147 ERROR("Too many temporary registers requested, can't compile!\n");
148 }
149
150 return r;
151 }
152
153 /* Borrowed verbatim from r300_fragprog since it hasn't changed. */
154 static GLuint emit_const4fv(struct r500_fragment_program *fp,
155 const GLfloat * cp)
156 {
157 GLuint reg = 0x0;
158 int index;
159
160 for (index = 0; index < fp->const_nr; ++index) {
161 if (fp->constant[index] == cp)
162 break;
163 }
164
165 if (index >= fp->const_nr) {
166 if (index >= R500_US_NUM_CONST_REGS) {
167 ERROR("Out of hw constants!\n");
168 return reg;
169 }
170
171 fp->const_nr++;
172 fp->constant[index] = cp;
173 }
174
175 reg = index | REG_CONSTANT;
176 return reg;
177 }
178
179 static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_register src) {
180 COMPILE_STATE;
181 GLuint reg;
182 switch (src.File) {
183 case PROGRAM_TEMPORARY:
184 reg = src.Index + fp->temp_reg_offset;
185 break;
186 case PROGRAM_INPUT:
187 reg = cs->inputs[src.Index].reg;
188 break;
189 case PROGRAM_LOCAL_PARAM:
190 reg = emit_const4fv(fp,
191 fp->mesa_program.Base.LocalParams[src.
192 Index]);
193 break;
194 case PROGRAM_ENV_PARAM:
195 reg = emit_const4fv(fp,
196 fp->ctx->FragmentProgram.Parameters[src.
197 Index]);
198 break;
199 case PROGRAM_STATE_VAR:
200 case PROGRAM_NAMED_PARAM:
201 case PROGRAM_CONSTANT:
202 reg = emit_const4fv(fp, fp->mesa_program.Base.Parameters->
203 ParameterValues[src.Index]);
204 break;
205 default:
206 ERROR("Can't handle src.File %x\n", src.File);
207 reg = 0x0;
208 break;
209 }
210 return reg;
211 }
212
213 static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_register dest) {
214 GLuint reg;
215 switch (dest.File) {
216 case PROGRAM_TEMPORARY:
217 reg = dest.Index + fp->temp_reg_offset;
218 break;
219 case PROGRAM_OUTPUT:
220 /* Eventually we may need to handle multiple
221 * rendering targets... */
222 reg = dest.Index;
223 break;
224 default:
225 ERROR("Can't handle dest.File %x\n", dest.File);
226 reg = 0x0;
227 break;
228 }
229 return reg;
230 }
231
232 static void emit_tex(struct r500_fragment_program *fp,
233 struct prog_instruction *fpi, int opcode, int dest, int counter)
234 {
235 int hwsrc, hwdest;
236 GLuint mask;
237
238 mask = fpi->DstReg.WriteMask << 11;
239 hwsrc = make_src(fp, fpi->SrcReg[0]);
240
241 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
242 hwdest = get_temp(fp, 0);
243 } else {
244 hwdest = dest;
245 }
246
247 fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask
248 | R500_INST_TEX_SEM_WAIT;
249
250 fp->inst[counter].inst1 = R500_TEX_ID(fpi->TexSrcUnit)
251 | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
252
253 if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX)
254 fp->inst[counter].inst1 |= R500_TEX_UNSCALED;
255
256 switch (opcode) {
257 case OPCODE_KIL:
258 fp->inst[counter].inst1 |= R500_TEX_INST_TEXKILL;
259 break;
260 case OPCODE_TEX:
261 fp->inst[counter].inst1 |= R500_TEX_INST_LD;
262 break;
263 case OPCODE_TXB:
264 fp->inst[counter].inst1 |= R500_TEX_INST_LODBIAS;
265 break;
266 case OPCODE_TXP:
267 fp->inst[counter].inst1 |= R500_TEX_INST_PROJ;
268 break;
269 default:
270 ERROR("emit_tex can't handle opcode %x\n", opcode);
271 }
272
273 fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(hwsrc)
274 /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */
275 | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G
276 | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A
277 | R500_TEX_DST_ADDR(hwdest)
278 | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
279 | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
280
281 fp->inst[counter].inst3 = 0x0;
282 fp->inst[counter].inst4 = 0x0;
283 fp->inst[counter].inst5 = 0x0;
284
285 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
286 counter++;
287 fp->inst[counter].inst0 = R500_INST_TYPE_OUT
288 | R500_INST_TEX_SEM_WAIT | (mask << 4);
289 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
290 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
291 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
292 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
293 | R500_ALU_RGB_SEL_B_SRC0
294 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB)
295 | R500_ALU_RGB_OMOD_DISABLE;
296 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
297 | R500_ALPHA_ADDRD(dest)
298 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_ALPHA_SWIZ_A_A)
299 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_ALPHA_SWIZ_A_A)
300 | R500_ALPHA_OMOD_DISABLE;
301 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
302 | R500_ALU_RGBA_ADDRD(dest)
303 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
304 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
305 }
306 }
307
308 static void dumb_shader(struct r500_fragment_program *fp)
309 {
310 fp->inst[0].inst0 = R500_INST_TYPE_TEX
311 | R500_INST_TEX_SEM_WAIT
312 | R500_INST_RGB_WMASK_R
313 | R500_INST_RGB_WMASK_G
314 | R500_INST_RGB_WMASK_B
315 | R500_INST_ALPHA_WMASK
316 | R500_INST_RGB_CLAMP
317 | R500_INST_ALPHA_CLAMP;
318 fp->inst[0].inst1 = R500_TEX_ID(0)
319 | R500_TEX_INST_LD
320 | R500_TEX_SEM_ACQUIRE
321 | R500_TEX_IGNORE_UNCOVERED;
322 fp->inst[0].inst2 = R500_TEX_SRC_ADDR(0)
323 | R500_TEX_SRC_S_SWIZ_R
324 | R500_TEX_SRC_T_SWIZ_G
325 | R500_TEX_DST_ADDR(0)
326 | R500_TEX_DST_R_SWIZ_R
327 | R500_TEX_DST_G_SWIZ_G
328 | R500_TEX_DST_B_SWIZ_B
329 | R500_TEX_DST_A_SWIZ_A;
330 fp->inst[0].inst3 = R500_DX_ADDR(0)
331 | R500_DX_S_SWIZ_R
332 | R500_DX_T_SWIZ_R
333 | R500_DX_R_SWIZ_R
334 | R500_DX_Q_SWIZ_R
335 | R500_DY_ADDR(0)
336 | R500_DY_S_SWIZ_R
337 | R500_DY_T_SWIZ_R
338 | R500_DY_R_SWIZ_R
339 | R500_DY_Q_SWIZ_R;
340 fp->inst[0].inst4 = 0x0;
341 fp->inst[0].inst5 = 0x0;
342
343 fp->inst[1].inst0 = R500_INST_TYPE_OUT |
344 R500_INST_TEX_SEM_WAIT |
345 R500_INST_LAST |
346 R500_INST_RGB_OMASK_R |
347 R500_INST_RGB_OMASK_G |
348 R500_INST_RGB_OMASK_B |
349 R500_INST_ALPHA_OMASK;
350 fp->inst[1].inst1 = R500_RGB_ADDR0(0) |
351 R500_RGB_ADDR1(0) |
352 R500_RGB_ADDR1_CONST |
353 R500_RGB_ADDR2(0) |
354 R500_RGB_ADDR2_CONST |
355 R500_RGB_SRCP_OP_1_MINUS_2RGB0;
356 fp->inst[1].inst2 = R500_ALPHA_ADDR0(0) |
357 R500_ALPHA_ADDR1(0) |
358 R500_ALPHA_ADDR1_CONST |
359 R500_ALPHA_ADDR2(0) |
360 R500_ALPHA_ADDR2_CONST |
361 R500_ALPHA_SRCP_OP_1_MINUS_2A0;
362 fp->inst[1].inst3 = R500_ALU_RGB_SEL_A_SRC0 |
363 R500_ALU_RGB_R_SWIZ_A_R |
364 R500_ALU_RGB_G_SWIZ_A_G |
365 R500_ALU_RGB_B_SWIZ_A_B |
366 R500_ALU_RGB_SEL_B_SRC0 |
367 R500_ALU_RGB_R_SWIZ_B_1 |
368 R500_ALU_RGB_B_SWIZ_B_1 |
369 R500_ALU_RGB_G_SWIZ_B_1;
370 fp->inst[1].inst4 = R500_ALPHA_OP_MAD |
371 R500_ALPHA_SWIZ_A_A |
372 R500_ALPHA_SWIZ_B_1;
373 fp->inst[1].inst5 = R500_ALU_RGBA_OP_MAD |
374 R500_ALU_RGBA_R_SWIZ_0 |
375 R500_ALU_RGBA_G_SWIZ_0 |
376 R500_ALU_RGBA_B_SWIZ_0 |
377 R500_ALU_RGBA_A_SWIZ_0;
378
379 fp->cs->nrslots = 2;
380 fp->translated = GL_TRUE;
381 }
382
383 static void emit_alu(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi) {
384 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
385 fp->inst[counter].inst0 = R500_INST_TYPE_OUT
386 /* output_mask */
387 | (fpi->DstReg.WriteMask << 15);
388 } else {
389 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
390 /* pixel_mask */
391 | (fpi->DstReg.WriteMask << 11);
392 }
393
394 fp->inst[counter].inst0 |= R500_INST_TEX_SEM_WAIT;
395 }
396
397 static void emit_mov(struct r500_fragment_program *fp, int counter, struct prog_src_register src, GLuint dest) {
398 /* The r3xx shader uses MAD to implement MOV. We are using CMP, since
399 * it is technically more accurate and recommended by ATI/AMD. */
400 GLuint src_reg = make_src(fp, src);
401 fp->inst[counter].inst1 = R500_RGB_ADDR0(src_reg);
402 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg);
403 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
404 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(src))
405 | R500_ALU_RGB_SEL_B_SRC0
406 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(src))
407 | R500_ALU_RGB_OMOD_DISABLE;
408 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
409 | R500_ALPHA_ADDRD(dest)
410 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(src))
411 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(src))
412 | R500_ALPHA_OMOD_DISABLE;
413 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
414 | R500_ALU_RGBA_ADDRD(dest)
415 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
416 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
417 }
418
419 static GLboolean parse_program(struct r500_fragment_program *fp)
420 {
421 struct gl_fragment_program *mp = &fp->mesa_program;
422 const struct prog_instruction *inst = mp->Base.Instructions;
423 struct prog_instruction *fpi;
424 GLuint src[3], dest, temp[2];
425 int flags, pixel_mask = 0, output_mask = 0, counter = 0;
426
427 if (!inst || inst[0].Opcode == OPCODE_END) {
428 ERROR("The program is empty!\n");
429 return GL_FALSE;
430 }
431
432 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
433
434 if (fpi->Opcode != OPCODE_KIL) {
435 dest = make_dest(fp, fpi->DstReg);
436
437 pixel_mask = fpi->DstReg.WriteMask << 11;
438 output_mask = fpi->DstReg.WriteMask << 15;
439 }
440
441 switch (fpi->Opcode) {
442 case OPCODE_ABS:
443 emit_alu(fp, counter, fpi);
444 emit_mov(fp, counter, fpi->SrcReg[0], dest);
445 fp->inst[counter].inst3 |= R500_ALU_RGB_MOD_A_ABS
446 | R500_ALU_RGB_MOD_B_ABS;
447 fp->inst[counter].inst4 |= R500_ALPHA_MOD_A_ABS
448 | R500_ALPHA_MOD_B_ABS;
449 break;
450 case OPCODE_ADD:
451 src[0] = make_src(fp, fpi->SrcReg[0]);
452 src[1] = make_src(fp, fpi->SrcReg[1]);
453 /* Variation on MAD: 1*src0+src1 */
454 emit_alu(fp, counter, fpi);
455 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
456 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(0);
457 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
458 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(0);
459 fp->inst[counter].inst3 = /* 1 */
460 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
461 | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
462 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
463 | R500_ALPHA_ADDRD(dest)
464 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
465 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
466 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
467 | R500_ALU_RGBA_ADDRD(dest)
468 | R500_ALU_RGBA_SEL_C_SRC1
469 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
470 | R500_ALU_RGBA_ALPHA_SEL_C_SRC1
471 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]));
472 break;
473 case OPCODE_CMP:
474 /* This inst's selects need to be swapped as follows:
475 * 0 -> C ; 1 -> B ; 2 -> A */
476 src[0] = make_src(fp, fpi->SrcReg[0]);
477 src[1] = make_src(fp, fpi->SrcReg[1]);
478 src[2] = make_src(fp, fpi->SrcReg[2]);
479 emit_alu(fp, counter, fpi);
480 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[2])
481 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[0]);
482 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[2])
483 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[0]);
484 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
485 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[2]))
486 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
487 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
488 | R500_ALPHA_ADDRD(dest)
489 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[2]))
490 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
491 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
492 | R500_ALU_RGBA_ADDRD(dest)
493 | R500_ALU_RGBA_SEL_C_SRC2
494 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0]))
495 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
496 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0]));
497 break;
498 case OPCODE_COS:
499 src[0] = make_src(fp, fpi->SrcReg[0]);
500 emit_alu(fp, counter, fpi);
501 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
502 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
503 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0;
504 fp->inst[counter].inst4 = R500_ALPHA_OP_COS
505 | R500_ALPHA_ADDRD(dest)
506 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
507 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
508 | R500_ALU_RGBA_ADDRD(dest);
509 break;
510 case OPCODE_DP3:
511 src[0] = make_src(fp, fpi->SrcReg[0]);
512 src[1] = make_src(fp, fpi->SrcReg[1]);
513 emit_alu(fp, counter, fpi);
514 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
515 | R500_RGB_ADDR1(src[1]);
516 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
517 | R500_ALPHA_ADDR1(src[1]);
518 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
519 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
520 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
521 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
522 | R500_ALPHA_ADDRD(dest)
523 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
524 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
525 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP3
526 | R500_ALU_RGBA_ADDRD(dest);
527 break;
528 case OPCODE_DP4:
529 src[0] = make_src(fp, fpi->SrcReg[0]);
530 src[1] = make_src(fp, fpi->SrcReg[1]);
531 /* Based on DP3 */
532 emit_alu(fp, counter, fpi);
533 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
534 | R500_RGB_ADDR1(src[1]);
535 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
536 | R500_ALPHA_ADDR1(src[1]);
537 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
538 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
539 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
540 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
541 | R500_ALPHA_ADDRD(dest)
542 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
543 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
544 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
545 | R500_ALU_RGBA_ADDRD(dest);
546 break;
547 case OPCODE_DPH:
548 src[0] = make_src(fp, fpi->SrcReg[0]);
549 src[1] = make_src(fp, fpi->SrcReg[1]);
550 /* Based on DP3 */
551 emit_alu(fp, counter, fpi);
552 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
553 | R500_RGB_ADDR1(src[1]);
554 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
555 | R500_ALPHA_ADDR1(src[1]);
556 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
557 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
558 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
559 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
560 | R500_ALPHA_ADDRD(dest)
561 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
562 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
563 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
564 | R500_ALU_RGBA_ADDRD(dest);
565 break;
566 case OPCODE_EX2:
567 src[0] = make_src(fp, fpi->SrcReg[0]);
568 emit_alu(fp, counter, fpi);
569 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
570 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
571 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
572 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
573 fp->inst[counter].inst4 = R500_ALPHA_OP_EX2
574 | R500_ALPHA_ADDRD(dest)
575 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
576 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
577 | R500_ALU_RGBA_ADDRD(dest);
578 break;
579 case OPCODE_FRC:
580 src[0] = make_src(fp, fpi->SrcReg[0]);
581 emit_alu(fp, counter, fpi);
582 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
583 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
584 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
585 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
586 fp->inst[counter].inst4 = R500_ALPHA_OP_FRC
587 | R500_ALPHA_ADDRD(dest)
588 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
589 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
590 | R500_ALU_RGBA_ADDRD(dest);
591 break;
592 case OPCODE_KIL:
593 emit_tex(fp, fpi, OPCODE_KIL, dest, counter);
594 break;
595 case OPCODE_LG2:
596 src[0] = make_src(fp, fpi->SrcReg[0]);
597 emit_alu(fp, counter, fpi);
598 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
599 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
600 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
601 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
602 fp->inst[counter].inst4 = R500_ALPHA_OP_LN2
603 | R500_ALPHA_ADDRD(dest)
604 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
605 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
606 | R500_ALU_RGBA_ADDRD(dest);
607 break;
608 case OPCODE_LRP:
609 /* src0 * src1 + INV(src0) * src2 */
610 src[0] = make_src(fp, fpi->SrcReg[0]);
611 src[1] = make_src(fp, fpi->SrcReg[1]);
612 src[2] = make_src(fp, fpi->SrcReg[2]);
613 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
614 | (R500_WRITEMASK_ARGB << 11);
615 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
616 | R500_RGB_ADDR1(src[1]);
617 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
618 | R500_ALPHA_ADDR1(src[1]);
619 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
620 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
621 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
622 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
623 | R500_ALPHA_ADDRD(get_temp(fp, 0))
624 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
625 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
626 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
627 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
628 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
629 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
630 counter++;
631 emit_alu(fp, counter, fpi);
632 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
633 | R500_RGB_ADDR1(get_temp(fp, 0))
634 | R500_RGB_SRCP_OP_1_MINUS_RGB0;
635 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
636 | R500_ALPHA_ADDR1(get_temp(fp, 0))
637 | R500_ALPHA_SRCP_OP_1_MINUS_A0;
638 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRCP
639 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
640 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
641 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
642 | R500_ALPHA_ADDRD(dest)
643 | R500_ALPHA_SEL_A_SRCP | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
644 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
645 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
646 | R500_ALU_RGBA_ADDRD(dest)
647 | R500_ALU_RGBA_SEL_C_SRC0 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0]))
648 | R500_ALU_RGBA_ALPHA_SEL_C_SRC0
649 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0]));
650 break;
651 case OPCODE_MAD:
652 src[0] = make_src(fp, fpi->SrcReg[0]);
653 src[1] = make_src(fp, fpi->SrcReg[1]);
654 src[2] = make_src(fp, fpi->SrcReg[2]);
655 emit_alu(fp, counter, fpi);
656 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
657 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]);
658 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
659 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]);
660 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
661 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
662 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
663 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
664 | R500_ALPHA_ADDRD(dest)
665 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
666 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
667 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
668 | R500_ALU_RGBA_ADDRD(dest)
669 | R500_ALU_RGBA_SEL_C_SRC2
670 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2]))
671 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
672 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2]));
673 break;
674 case OPCODE_MAX:
675 src[0] = make_src(fp, fpi->SrcReg[0]);
676 src[1] = make_src(fp, fpi->SrcReg[1]);
677 emit_alu(fp, counter, fpi);
678 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
679 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
680 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
681 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
682 | R500_ALU_RGB_SEL_B_SRC1
683 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
684 fp->inst[counter].inst4 = R500_ALPHA_OP_MAX
685 | R500_ALPHA_ADDRD(dest)
686 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
687 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
688 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
689 | R500_ALU_RGBA_ADDRD(dest);
690 break;
691 case OPCODE_MIN:
692 src[0] = make_src(fp, fpi->SrcReg[0]);
693 src[1] = make_src(fp, fpi->SrcReg[1]);
694 emit_alu(fp, counter, fpi);
695 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
696 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
697 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
698 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
699 | R500_ALU_RGB_SEL_B_SRC1
700 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
701 fp->inst[counter].inst4 = R500_ALPHA_OP_MIN
702 | R500_ALPHA_ADDRD(dest)
703 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
704 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
705 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN
706 | R500_ALU_RGBA_ADDRD(dest);
707 break;
708 case OPCODE_MOV:
709 emit_alu(fp, counter, fpi);
710 emit_mov(fp, counter, fpi->SrcReg[0], dest);
711 break;
712 case OPCODE_MUL:
713 src[0] = make_src(fp, fpi->SrcReg[0]);
714 src[1] = make_src(fp, fpi->SrcReg[1]);
715 /* Variation on MAD: src0*src1+0 */
716 emit_alu(fp, counter, fpi);
717 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
718 | R500_RGB_ADDR1(src[1]);
719 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
720 | R500_ALPHA_ADDR1(src[1]);
721 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
722 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
723 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
724 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
725 | R500_ALPHA_ADDRD(dest)
726 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
727 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
728 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
729 | R500_ALU_RGBA_ADDRD(dest)
730 // | R500_ALU_RGBA_SEL_C_SRC2
731 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
732 // | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
733 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
734 break;
735 case OPCODE_POW:
736 /* POW(a,b) = EX2(LN2(a)*b) */
737 src[0] = make_src(fp, fpi->SrcReg[0]);
738 src[1] = make_src(fp, fpi->SrcReg[1]);
739 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
740 | (R500_WRITEMASK_ARGB << 11);
741 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
742 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
743 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
744 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
745 fp->inst[counter].inst4 = R500_ALPHA_OP_LN2
746 | R500_ALPHA_ADDRD(get_temp(fp, 0))
747 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
748 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
749 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0));
750 counter++;
751 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
752 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0))
753 | R500_RGB_ADDR1(src[1]);
754 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0))
755 | R500_ALPHA_ADDR1(src[1]);
756 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
757 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
758 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
759 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
760 | R500_ALPHA_ADDRD(get_temp(fp, 1))
761 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
762 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
763 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
764 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1))
765 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
766 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
767 counter++;
768 emit_alu(fp, counter, fpi);
769 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
770 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
771 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
772 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
773 fp->inst[counter].inst4 = R500_ALPHA_OP_EX2
774 | R500_ALPHA_ADDRD(dest)
775 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
776 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
777 | R500_ALU_RGBA_ADDRD(dest);
778 break;
779 case OPCODE_RCP:
780 src[0] = make_src(fp, fpi->SrcReg[0]);
781 emit_alu(fp, counter, fpi);
782 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
783 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
784 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
785 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
786 fp->inst[counter].inst4 = R500_ALPHA_OP_RCP
787 | R500_ALPHA_ADDRD(dest)
788 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
789 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
790 | R500_ALU_RGBA_ADDRD(dest);
791 break;
792 case OPCODE_RSQ:
793 src[0] = make_src(fp, fpi->SrcReg[0]);
794 emit_alu(fp, counter, fpi);
795 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
796 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
797 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
798 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
799 fp->inst[counter].inst4 = R500_ALPHA_OP_RSQ
800 | R500_ALPHA_ADDRD(dest)
801 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
802 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
803 | R500_ALU_RGBA_ADDRD(dest);
804 break;
805 case OPCODE_SCS:
806 /* TODO: Make this elegant! */
807 /* Do a cosine, then a sine, masking out the channels we want to protect. */
808 src[0] = make_src(fp, fpi->SrcReg[0]);
809 /* Cosine only goes in R (x) channel. */
810 fpi->DstReg.WriteMask = 0x1;
811 emit_alu(fp, counter, fpi);
812 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
813 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
814 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
815 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
816 fp->inst[counter].inst4 = R500_ALPHA_OP_COS
817 | R500_ALPHA_ADDRD(dest)
818 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
819 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
820 | R500_ALU_RGBA_ADDRD(dest);
821 counter++;
822 /* Sine only goes in G (y) channel. */
823 fpi->DstReg.WriteMask = 0x2;
824 emit_alu(fp, counter, fpi);
825 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
826 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
827 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
828 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
829 fp->inst[counter].inst4 = R500_ALPHA_OP_SIN
830 | R500_ALPHA_ADDRD(dest)
831 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
832 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
833 | R500_ALU_RGBA_ADDRD(dest);
834 /* Put 0 into B,A (z,w) channels.
835 counter++;
836 fpi->DstReg.WriteMask = 0xC;
837 emit_alu(fp, counter, fpi);
838 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
839 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
840 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
841 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO)
842 | R500_ALU_RGB_SEL_B_SRC0
843 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
844 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
845 | R500_ALPHA_ADDRD(dest)
846 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO)
847 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO);
848 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
849 | R500_ALU_RGBA_ADDRD(dest)
850 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
851 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); */
852 break;
853 case OPCODE_SGE:
854 /* We use SRCP, so as a precaution we're
855 * going to set NOP in previous inst, if possible. */
856 /* This inst's selects need to be swapped as follows:
857 * 0 -> C ; 1 -> B ; 2 -> A */
858 src[0] = make_src(fp, fpi->SrcReg[0]);
859 src[1] = make_src(fp, fpi->SrcReg[1]);
860 emit_alu(fp, counter, fpi);
861 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
862 | R500_RGB_ADDR1(src[1])
863 | R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
864 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
865 | R500_ALPHA_ADDR1(src[1])
866 | R500_ALPHA_SRCP_OP_A1_MINUS_A0;
867 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
868 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
869 | R500_ALU_RGB_SEL_B_SRC1
870 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
871 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
872 | R500_ALPHA_ADDRD(dest)
873 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
874 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO);
875 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
876 | R500_ALU_RGBA_ADDRD(dest)
877 | R500_ALU_RGBA_SEL_C_SRCP
878 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0]))
879 | R500_ALU_RGBA_ALPHA_SEL_C_SRCP
880 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0]));
881 break;
882 case OPCODE_SIN:
883 src[0] = make_src(fp, fpi->SrcReg[0]);
884 emit_alu(fp, counter, fpi);
885 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
886 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
887 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0;
888 fp->inst[counter].inst4 = R500_ALPHA_OP_SIN
889 | R500_ALPHA_ADDRD(dest)
890 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
891 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
892 | R500_ALU_RGBA_ADDRD(dest);
893 break;
894 case OPCODE_SLT:
895 /* We use SRCP, so as a precaution we're
896 * going to set NOP in previous inst, if possible. */
897 /* This inst's selects need to be swapped as follows:
898 * 0 -> C ; 1 -> B ; 2 -> A */
899 src[0] = make_src(fp, fpi->SrcReg[0]);
900 src[1] = make_src(fp, fpi->SrcReg[1]);
901 emit_alu(fp, counter, fpi);
902 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
903 | R500_RGB_ADDR1(src[1])
904 | R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
905 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
906 | R500_ALPHA_ADDR1(src[1])
907 | R500_ALPHA_SRCP_OP_A1_MINUS_A0;
908 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
909 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO)
910 | R500_ALU_RGB_SEL_B_SRC1
911 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE);
912 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
913 | R500_ALPHA_ADDRD(dest)
914 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO)
915 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE);
916 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
917 | R500_ALU_RGBA_ADDRD(dest)
918 | R500_ALU_RGBA_SEL_C_SRCP
919 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0]))
920 | R500_ALU_RGBA_ALPHA_SEL_C_SRCP
921 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0]));
922 break;
923 case OPCODE_SUB:
924 src[0] = make_src(fp, fpi->SrcReg[0]);
925 src[1] = make_src(fp, fpi->SrcReg[1]);
926 /* Variation on MAD: 1*src0-src1 */
927 emit_alu(fp, counter, fpi);
928 fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0])
929 | R500_RGB_ADDR2(src[1]);
930 fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0])
931 | R500_ALPHA_ADDR2(src[1]);
932 fp->inst[counter].inst3 = /* 1 */
933 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
934 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
935 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
936 | R500_ALPHA_ADDRD(dest)
937 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
938 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
939 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
940 | R500_ALU_RGBA_ADDRD(dest)
941 | R500_ALU_RGBA_SEL_C_SRC2
942 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
943 | R500_ALU_RGBA_MOD_C_NEG
944 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
945 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]))
946 | R500_ALU_RGBA_ALPHA_MOD_C_NEG;
947 break;
948 case OPCODE_SWZ:
949 /* TODO: Negation masks! */
950 emit_alu(fp, counter, fpi);
951 emit_mov(fp, counter, fpi->SrcReg[0], dest);
952 break;
953 case OPCODE_TEX:
954 emit_tex(fp, fpi, OPCODE_TEX, dest, counter);
955 if (fpi->DstReg.File == PROGRAM_OUTPUT)
956 counter++;
957 break;
958 case OPCODE_TXB:
959 emit_tex(fp, fpi, OPCODE_TXB, dest, counter);
960 if (fpi->DstReg.File == PROGRAM_OUTPUT)
961 counter++;
962 break;
963 case OPCODE_TXP:
964 emit_tex(fp, fpi, OPCODE_TXP, dest, counter);
965 if (fpi->DstReg.File == PROGRAM_OUTPUT)
966 counter++;
967 break;
968 default:
969 ERROR("unknown fpi->Opcode %s\n", _mesa_opcode_string(fpi->Opcode));
970 break;
971 }
972
973 /* Finishing touches */
974 if (fpi->SaturateMode == SATURATE_ZERO_ONE) {
975 fp->inst[counter].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP;
976 }
977
978 counter++;
979
980 if (fp->error)
981 return GL_FALSE;
982
983 }
984
985 /* Finish him! (If it's an ALU/OUT instruction...) */
986 if ((fp->inst[counter-1].inst0 & 0x3) == 1) {
987 fp->inst[counter-1].inst0 |= R500_INST_LAST;
988 } else {
989 /* We still need to put an output inst, right? */
990 WARN_ONCE("Final FP instruction is not an OUT.\n");
991 #if 0
992
993 #endif
994 }
995
996 fp->cs->nrslots = counter;
997
998 fp->max_temp_idx++;
999
1000 return GL_TRUE;
1001 }
1002
1003 static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp)
1004 {
1005 struct r300_pfs_compile_state *cs = NULL;
1006 struct gl_fragment_program *mp = &fp->mesa_program;
1007 struct prog_instruction *fpi;
1008 GLuint InputsRead = mp->Base.InputsRead;
1009 GLuint temps_used = 0;
1010 int i, j;
1011
1012 /* New compile, reset tracking data */
1013 fp->optimization =
1014 driQueryOptioni(&r300->radeon.optionCache, "fp_optimization");
1015 fp->translated = GL_FALSE;
1016 fp->error = GL_FALSE;
1017 fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile);
1018 fp->cur_node = 0;
1019 fp->first_node_has_tex = 0;
1020 fp->const_nr = 0;
1021 /* Size of pixel stack, plus 1. */
1022 fp->max_temp_idx = 1;
1023 /* Temp register offset. */
1024 fp->temp_reg_offset = 0;
1025 fp->node[0].alu_end = -1;
1026 fp->node[0].tex_end = -1;
1027
1028 _mesa_memset(cs, 0, sizeof(*fp->cs));
1029 for (i = 0; i < PFS_MAX_ALU_INST; i++) {
1030 for (j = 0; j < 3; j++) {
1031 cs->slot[i].vsrc[j] = SRC_CONST;
1032 cs->slot[i].ssrc[j] = SRC_CONST;
1033 }
1034 }
1035
1036 /* Work out what temps the Mesa inputs correspond to, this must match
1037 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
1038 * configures itself based on the fragprog's InputsRead
1039 *
1040 * NOTE: this depends on get_hw_temp() allocating registers in order,
1041 * starting from register 0, so we're just going to do that instead.
1042 */
1043
1044 /* Texcoords come first */
1045 for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) {
1046 if (InputsRead & (FRAG_BIT_TEX0 << i)) {
1047 cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0;
1048 cs->inputs[FRAG_ATTRIB_TEX0 + i].reg =
1049 fp->temp_reg_offset;
1050 fp->temp_reg_offset++;
1051 }
1052 }
1053 InputsRead &= ~FRAG_BITS_TEX_ANY;
1054
1055 /* fragment position treated as a texcoord */
1056 if (InputsRead & FRAG_BIT_WPOS) {
1057 cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0;
1058 cs->inputs[FRAG_ATTRIB_WPOS].reg =
1059 fp->temp_reg_offset;
1060 fp->temp_reg_offset++;
1061 }
1062 InputsRead &= ~FRAG_BIT_WPOS;
1063
1064 /* Then primary colour */
1065 if (InputsRead & FRAG_BIT_COL0) {
1066 cs->inputs[FRAG_ATTRIB_COL0].refcount = 0;
1067 cs->inputs[FRAG_ATTRIB_COL0].reg =
1068 fp->temp_reg_offset;
1069 fp->temp_reg_offset++;
1070 }
1071 InputsRead &= ~FRAG_BIT_COL0;
1072
1073 /* Secondary color */
1074 if (InputsRead & FRAG_BIT_COL1) {
1075 cs->inputs[FRAG_ATTRIB_COL1].refcount = 0;
1076 cs->inputs[FRAG_ATTRIB_COL1].reg =
1077 fp->temp_reg_offset;
1078 fp->temp_reg_offset++;
1079 }
1080 InputsRead &= ~FRAG_BIT_COL1;
1081
1082 /* Anything else */
1083 if (InputsRead) {
1084 WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead);
1085 /* force read from hwreg 0 for now */
1086 for (i = 0; i < 32; i++)
1087 if (InputsRead & (1 << i))
1088 cs->inputs[i].reg = 0;
1089 }
1090
1091 if (!mp->Base.Instructions) {
1092 ERROR("No instructions found in program, going to go die now.\n");
1093 return;
1094 }
1095
1096 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
1097 for (i = 0; i < 3; i++) {
1098 if (fpi->SrcReg[i].File == PROGRAM_TEMPORARY) {
1099 if (fpi->SrcReg[i].Index > temps_used)
1100 temps_used = fpi->SrcReg[i].Index;
1101 }
1102 }
1103 }
1104
1105 cs->temp_in_use = temps_used;
1106
1107 fp->max_temp_idx = fp->temp_reg_offset + cs->temp_in_use + 1;
1108 }
1109
1110 static void update_params(struct r500_fragment_program *fp)
1111 {
1112 struct gl_fragment_program *mp = &fp->mesa_program;
1113
1114 /* Ask Mesa nicely to fill in ParameterValues for us */
1115 if (mp->Base.Parameters)
1116 _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters);
1117 }
1118
1119 void r500TranslateFragmentShader(r300ContextPtr r300,
1120 struct r500_fragment_program *fp)
1121 {
1122
1123 struct r300_pfs_compile_state *cs = NULL;
1124
1125 if (!fp->translated) {
1126
1127
1128
1129 init_program(r300, fp);
1130 cs = fp->cs;
1131
1132 if (parse_program(fp) == GL_FALSE) {
1133 ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n");
1134 dumb_shader(fp);
1135 fp->inst_offset = 0;
1136 fp->inst_end = cs->nrslots - 1;
1137 return;
1138 }
1139 fp->inst_offset = 0;
1140 fp->inst_end = cs->nrslots - 1;
1141
1142 fp->translated = GL_TRUE;
1143 if (RADEON_DEBUG & DEBUG_PIXEL) {
1144 dump_program(fp);
1145 fprintf(stderr, "Mesa program:\n");
1146 fprintf(stderr, "-------------\n");
1147 _mesa_print_program(&fp->mesa_program.Base);
1148 fflush(stdout);
1149 }
1150
1151
1152 r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM);
1153 }
1154
1155 update_params(fp);
1156
1157 }
1158
1159 static char *toswiz(int swiz_val) {
1160 switch(swiz_val) {
1161 case 0: return "R";
1162 case 1: return "G";
1163 case 2: return "B";
1164 case 3: return "A";
1165 case 4: return "0";
1166 case 5: return "1/2";
1167 case 6: return "1";
1168 case 7: return "U";
1169 }
1170 return NULL;
1171 }
1172
1173 static char *toop(int op_val)
1174 {
1175 char *str;
1176 switch (op_val) {
1177 case 0: str = "MAD"; break;
1178 case 1: str = "DP3"; break;
1179 case 2: str = "DP4"; break;
1180 case 3: str = "D2A"; break;
1181 case 4: str = "MIN"; break;
1182 case 5: str = "MAX"; break;
1183 case 6: str = "Reserved"; break;
1184 case 7: str = "CND"; break;
1185 case 8: str = "CMP"; break;
1186 case 9: str = "FRC"; break;
1187 case 10: str = "SOP"; break;
1188 case 11: str = "MDH"; break;
1189 case 12: str = "MDV"; break;
1190 }
1191 return str;
1192 }
1193
1194 static char *to_alpha_op(int op_val)
1195 {
1196 char *str = NULL;
1197 switch (op_val) {
1198 case 0: str = "MAD"; break;
1199 case 1: str = "DP"; break;
1200 case 2: str = "MIN"; break;
1201 case 3: str = "MAX"; break;
1202 case 4: str = "Reserved"; break;
1203 case 5: str = "CND"; break;
1204 case 6: str = "CMP"; break;
1205 case 7: str = "FRC"; break;
1206 case 8: str = "EX2"; break;
1207 case 9: str = "LN2"; break;
1208 case 10: str = "RCP"; break;
1209 case 11: str = "RSQ"; break;
1210 case 12: str = "SIN"; break;
1211 case 13: str = "COS"; break;
1212 case 14: str = "MDH"; break;
1213 case 15: str = "MDV"; break;
1214 }
1215 return str;
1216 }
1217
1218 static char *to_mask(int val)
1219 {
1220 char *str = NULL;
1221 switch(val) {
1222 case 0: str = "NONE"; break;
1223 case 1: str = "R"; break;
1224 case 2: str = "G"; break;
1225 case 3: str = "RG"; break;
1226 case 4: str = "B"; break;
1227 case 5: str = "RB"; break;
1228 case 6: str = "GB"; break;
1229 case 7: str = "RGB"; break;
1230 case 8: str = "A"; break;
1231 case 9: str = "AR"; break;
1232 case 10: str = "AG"; break;
1233 case 11: str = "ARG"; break;
1234 case 12: str = "AB"; break;
1235 case 13: str = "ARB"; break;
1236 case 14: str = "AGB"; break;
1237 case 15: str = "ARGB"; break;
1238 }
1239 return str;
1240 }
1241
1242 static char *to_texop(int val)
1243 {
1244 switch(val) {
1245 case 0: return "NOP";
1246 case 1: return "LD";
1247 case 2: return "TEXKILL";
1248 case 3: return "PROJ";
1249 case 4: return "LODBIAS";
1250 case 5: return "LOD";
1251 case 6: return "DXDY";
1252 }
1253 return NULL;
1254 }
1255
1256 static void dump_program(struct r500_fragment_program *fp)
1257 {
1258 int pc = 0;
1259 int n;
1260 uint32_t inst;
1261 uint32_t inst0;
1262 char *str = NULL;
1263
1264 for (n = 0; n < fp->inst_end+1; n++) {
1265 inst0 = inst = fp->inst[n].inst0;
1266 fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst);
1267 switch(inst & 0x3) {
1268 case R500_INST_TYPE_ALU: str = "ALU"; break;
1269 case R500_INST_TYPE_OUT: str = "OUT"; break;
1270 case R500_INST_TYPE_FC: str = "FC"; break;
1271 case R500_INST_TYPE_TEX: str = "TEX"; break;
1272 };
1273 fprintf(stderr,"%s %s %s %s %s ", str,
1274 inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "",
1275 inst & R500_INST_LAST ? "LAST" : "",
1276 inst & R500_INST_NOP ? "NOP" : "",
1277 inst & R500_INST_ALU_WAIT ? "ALU WAIT" : "");
1278 fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf),
1279 to_mask((inst >> 15) & 0xf));
1280
1281 switch(inst0 & 0x3) {
1282 case 0:
1283 case 1:
1284 fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", fp->inst[n].inst1);
1285 inst = fp->inst[n].inst1;
1286
1287 fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1288 inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
1289 (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
1290 (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
1291 (inst >> 30));
1292
1293 fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", fp->inst[n].inst2);
1294 inst = fp->inst[n].inst2;
1295 fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1296 inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
1297 (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
1298 (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
1299 (inst >> 30));
1300 fprintf(stderr,"\t3 RGB_INST: 0x%08x:", fp->inst[n].inst3);
1301 inst = fp->inst[n].inst3;
1302 fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n",
1303 (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7),
1304 (inst >> 11) & 0x3,
1305 (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),
1306 (inst >> 24) & 0x3);
1307
1308
1309 fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", fp->inst[n].inst4);
1310 inst = fp->inst[n].inst4;
1311 fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d\n", to_alpha_op(inst & 0xf),
1312 (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
1313 (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,
1314 (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3);
1315
1316 fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", fp->inst[n].inst5);
1317 inst = fp->inst[n].inst5;
1318 fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf),
1319 (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
1320 (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7),
1321 (inst >> 23) & 0x3,
1322 (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3);
1323 break;
1324 case 2:
1325 break;
1326 case 3:
1327 inst = fp->inst[n].inst1;
1328 fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf,
1329 to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "",
1330 (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED");
1331 inst = fp->inst[n].inst2;
1332 fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst,
1333 inst & 127, inst & (1<<7) ? "(rel)" : "",
1334 toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3),
1335 toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3),
1336 (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "",
1337 toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3),
1338 toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3));
1339
1340 fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", fp->inst[n].inst3);
1341 break;
1342 }
1343 fprintf(stderr,"\n");
1344 }
1345
1346 }