2 * Copyright (C) 2005 Ben Skeggs.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 * \author Ben Skeggs <darktama@iinet.net.au>
33 * \author Jerome Glisse <j.glisse@gmail.com>
35 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
37 * \todo Depth write, WPOS/FOGC inputs
41 * \todo Verify results of opcodes for accuracy, I've only checked them in
48 #include "shader/prog_instruction.h"
49 #include "shader/prog_parameter.h"
50 #include "shader/prog_print.h"
52 #include "r300_context.h"
53 #include "r500_fragprog.h"
55 #include "r300_state.h"
58 * Useful macros and values
60 #define ERROR(fmt, args...) do { \
61 fprintf(stderr, "%s::%s(): " fmt "\n", \
62 __FILE__, __FUNCTION__, ##args); \
63 fp->error = GL_TRUE; \
66 #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
68 #define R500_US_NUM_TEMP_REGS 128
69 #define R500_US_NUM_CONST_REGS 256
71 /* "Register" flags */
72 #define REG_CONSTANT (1 << 8)
73 #define REG_SRC_REL (1 << 9)
74 #define REG_DEST_REL (1 << 7)
77 #define R500_SWIZZLE_ZERO 4
78 #define R500_SWIZZLE_HALF 5
79 #define R500_SWIZZLE_ONE 6
80 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
81 #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
82 #define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6))
83 /* Swizzles for inst2 */
84 #define MAKE_SWIZ_TEX_STRQ(x) (x << 8)
85 #define MAKE_SWIZ_TEX_RGBA(x) (x << 24)
86 /* Swizzles for inst3 */
87 #define MAKE_SWIZ_RGB_A(x) (x << 2)
88 #define MAKE_SWIZ_RGB_B(x) (x << 15)
89 /* Swizzles for inst4 */
90 #define MAKE_SWIZ_ALPHA_A(x) (x << 14)
91 #define MAKE_SWIZ_ALPHA_B(x) (x << 21)
92 /* Swizzle for inst5 */
93 #define MAKE_SWIZ_RGBA_C(x) (x << 14)
94 #define MAKE_SWIZ_ALPHA_C(x) (x << 27)
97 #define R500_WRITEMASK_ARGB 0xF
99 /* 1/(2pi), needed for quick modulus in trig insts
100 * Thanks to glisse for pointing out how to do it! */
101 static const GLfloat RCP_2PI
[] = {0.15915494309189535,
104 0.15915494309189535};
106 static void dump_program(struct r500_fragment_program
*fp
);
108 static inline GLuint
make_rgb_swizzle(struct prog_src_register src
) {
111 /* This could be optimized, but it should be plenty fast already. */
113 for (i
= 0; i
< 3; i
++) {
114 temp
= GET_SWZ(src
.Swizzle
, i
);
115 /* Fix SWIZZLE_ONE */
116 if (temp
== 5) temp
++;
122 static inline GLuint
make_alpha_swizzle(struct prog_src_register src
) {
123 GLuint swiz
= GET_SWZ(src
.Swizzle
, 3);
125 if (swiz
== 5) swiz
++;
129 static inline GLuint
make_sop_swizzle(struct prog_src_register src
) {
130 GLuint swiz
= GET_SWZ(src
.Swizzle
, 0);
132 if (swiz
== 5) swiz
++;
136 static inline GLuint
make_strq_swizzle(struct prog_src_register src
) {
138 GLuint temp
= src
.Swizzle
;
140 for (i
= 0; i
< 4; i
++) {
141 swiz
+= (temp
& 0x3) << i
*2;
147 static int get_temp(struct r500_fragment_program
*fp
, int slot
) {
151 int r
= cs
->temp_in_use
+ 1 + slot
;
153 if (r
> R500_US_NUM_TEMP_REGS
) {
154 ERROR("Too many temporary registers requested, can't compile!\n");
160 /* Borrowed verbatim from r300_fragprog since it hasn't changed. */
161 static GLuint
emit_const4fv(struct r500_fragment_program
*fp
,
167 for (index
= 0; index
< fp
->const_nr
; ++index
) {
168 if (fp
->constant
[index
] == cp
)
172 if (index
>= fp
->const_nr
) {
173 if (index
>= R500_US_NUM_CONST_REGS
) {
174 ERROR("Out of hw constants!\n");
179 fp
->constant
[index
] = cp
;
182 reg
= index
| REG_CONSTANT
;
186 static GLuint
make_src(struct r500_fragment_program
*fp
, struct prog_src_register src
) {
190 case PROGRAM_TEMPORARY
:
191 reg
= src
.Index
+ fp
->temp_reg_offset
;
194 reg
= cs
->inputs
[src
.Index
].reg
;
196 case PROGRAM_LOCAL_PARAM
:
197 reg
= emit_const4fv(fp
,
198 fp
->mesa_program
.Base
.LocalParams
[src
.
201 case PROGRAM_ENV_PARAM
:
202 reg
= emit_const4fv(fp
,
203 fp
->ctx
->FragmentProgram
.Parameters
[src
.
206 case PROGRAM_STATE_VAR
:
207 case PROGRAM_NAMED_PARAM
:
208 case PROGRAM_CONSTANT
:
209 reg
= emit_const4fv(fp
, fp
->mesa_program
.Base
.Parameters
->
210 ParameterValues
[src
.Index
]);
213 ERROR("Can't handle src.File %x\n", src
.File
);
220 static GLuint
make_dest(struct r500_fragment_program
*fp
, struct prog_dst_register dest
) {
223 case PROGRAM_TEMPORARY
:
224 reg
= dest
.Index
+ fp
->temp_reg_offset
;
227 /* Eventually we may need to handle multiple
228 * rendering targets... */
232 ERROR("Can't handle dest.File %x\n", dest
.File
);
239 static void emit_tex(struct r500_fragment_program
*fp
,
240 struct prog_instruction
*fpi
, int opcode
, int dest
, int counter
)
245 mask
= fpi
->DstReg
.WriteMask
<< 11;
246 hwsrc
= make_src(fp
, fpi
->SrcReg
[0]);
248 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
) {
249 hwdest
= get_temp(fp
, 0);
254 fp
->inst
[counter
].inst0
= R500_INST_TYPE_TEX
| mask
255 | R500_INST_TEX_SEM_WAIT
;
257 fp
->inst
[counter
].inst1
= R500_TEX_ID(fpi
->TexSrcUnit
)
258 | R500_TEX_SEM_ACQUIRE
| R500_TEX_IGNORE_UNCOVERED
;
260 if (fpi
->TexSrcTarget
== TEXTURE_RECT_INDEX
)
261 fp
->inst
[counter
].inst1
|= R500_TEX_UNSCALED
;
265 fp
->inst
[counter
].inst1
|= R500_TEX_INST_TEXKILL
;
268 fp
->inst
[counter
].inst1
|= R500_TEX_INST_LD
;
271 fp
->inst
[counter
].inst1
|= R500_TEX_INST_LODBIAS
;
274 fp
->inst
[counter
].inst1
|= R500_TEX_INST_PROJ
;
277 ERROR("emit_tex can't handle opcode %x\n", opcode
);
280 fp
->inst
[counter
].inst2
= R500_TEX_SRC_ADDR(hwsrc
)
281 /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */
282 | R500_TEX_SRC_S_SWIZ_R
| R500_TEX_SRC_T_SWIZ_G
283 | R500_TEX_SRC_R_SWIZ_B
| R500_TEX_SRC_Q_SWIZ_A
284 | R500_TEX_DST_ADDR(hwdest
)
285 | R500_TEX_DST_R_SWIZ_R
| R500_TEX_DST_G_SWIZ_G
286 | R500_TEX_DST_B_SWIZ_B
| R500_TEX_DST_A_SWIZ_A
;
288 fp
->inst
[counter
].inst3
= 0x0;
289 fp
->inst
[counter
].inst4
= 0x0;
290 fp
->inst
[counter
].inst5
= 0x0;
292 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
) {
294 fp
->inst
[counter
].inst0
= R500_INST_TYPE_OUT
295 | R500_INST_TEX_SEM_WAIT
| (mask
<< 4);
296 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(fp
, 0));
297 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(fp
, 0));
298 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
299 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
)
300 | R500_ALU_RGB_SEL_B_SRC0
301 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB
)
302 | R500_ALU_RGB_OMOD_DISABLE
;
303 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_CMP
304 | R500_ALPHA_ADDRD(dest
)
305 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_ALPHA_SWIZ_A_A
)
306 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(R500_ALPHA_SWIZ_A_A
)
307 | R500_ALPHA_OMOD_DISABLE
;
308 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
309 | R500_ALU_RGBA_ADDRD(dest
)
310 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
311 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
315 static void dumb_shader(struct r500_fragment_program
*fp
)
317 fp
->inst
[0].inst0
= R500_INST_TYPE_TEX
318 | R500_INST_TEX_SEM_WAIT
319 | R500_INST_RGB_WMASK_R
320 | R500_INST_RGB_WMASK_G
321 | R500_INST_RGB_WMASK_B
322 | R500_INST_ALPHA_WMASK
323 | R500_INST_RGB_CLAMP
324 | R500_INST_ALPHA_CLAMP
;
325 fp
->inst
[0].inst1
= R500_TEX_ID(0)
327 | R500_TEX_SEM_ACQUIRE
328 | R500_TEX_IGNORE_UNCOVERED
;
329 fp
->inst
[0].inst2
= R500_TEX_SRC_ADDR(0)
330 | R500_TEX_SRC_S_SWIZ_R
331 | R500_TEX_SRC_T_SWIZ_G
332 | R500_TEX_DST_ADDR(0)
333 | R500_TEX_DST_R_SWIZ_R
334 | R500_TEX_DST_G_SWIZ_G
335 | R500_TEX_DST_B_SWIZ_B
336 | R500_TEX_DST_A_SWIZ_A
;
337 fp
->inst
[0].inst3
= R500_DX_ADDR(0)
347 fp
->inst
[0].inst4
= 0x0;
348 fp
->inst
[0].inst5
= 0x0;
350 fp
->inst
[1].inst0
= R500_INST_TYPE_OUT
|
351 R500_INST_TEX_SEM_WAIT
|
353 R500_INST_RGB_OMASK_R
|
354 R500_INST_RGB_OMASK_G
|
355 R500_INST_RGB_OMASK_B
|
356 R500_INST_ALPHA_OMASK
;
357 fp
->inst
[1].inst1
= R500_RGB_ADDR0(0) |
359 R500_RGB_ADDR1_CONST
|
361 R500_RGB_ADDR2_CONST
|
362 R500_RGB_SRCP_OP_1_MINUS_2RGB0
;
363 fp
->inst
[1].inst2
= R500_ALPHA_ADDR0(0) |
364 R500_ALPHA_ADDR1(0) |
365 R500_ALPHA_ADDR1_CONST
|
366 R500_ALPHA_ADDR2(0) |
367 R500_ALPHA_ADDR2_CONST
|
368 R500_ALPHA_SRCP_OP_1_MINUS_2A0
;
369 fp
->inst
[1].inst3
= R500_ALU_RGB_SEL_A_SRC0
|
370 R500_ALU_RGB_R_SWIZ_A_R
|
371 R500_ALU_RGB_G_SWIZ_A_G
|
372 R500_ALU_RGB_B_SWIZ_A_B
|
373 R500_ALU_RGB_SEL_B_SRC0
|
374 R500_ALU_RGB_R_SWIZ_B_1
|
375 R500_ALU_RGB_B_SWIZ_B_1
|
376 R500_ALU_RGB_G_SWIZ_B_1
;
377 fp
->inst
[1].inst4
= R500_ALPHA_OP_MAD
|
378 R500_ALPHA_SWIZ_A_A
|
380 fp
->inst
[1].inst5
= R500_ALU_RGBA_OP_MAD
|
381 R500_ALU_RGBA_R_SWIZ_0
|
382 R500_ALU_RGBA_G_SWIZ_0
|
383 R500_ALU_RGBA_B_SWIZ_0
|
384 R500_ALU_RGBA_A_SWIZ_0
;
387 fp
->translated
= GL_TRUE
;
390 static void emit_alu(struct r500_fragment_program
*fp
, int counter
, struct prog_instruction
*fpi
) {
391 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
) {
392 fp
->inst
[counter
].inst0
= R500_INST_TYPE_OUT
394 | (fpi
->DstReg
.WriteMask
<< 15);
396 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
398 | (fpi
->DstReg
.WriteMask
<< 11);
401 fp
->inst
[counter
].inst0
|= R500_INST_TEX_SEM_WAIT
;
404 static void emit_mov(struct r500_fragment_program
*fp
, int counter
, struct prog_src_register src
, GLuint dest
) {
405 /* The r3xx shader uses MAD to implement MOV. We are using CMP, since
406 * it is technically more accurate and recommended by ATI/AMD. */
407 GLuint src_reg
= make_src(fp
, src
);
408 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src_reg
);
409 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src_reg
);
410 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
411 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(src
))
412 | R500_ALU_RGB_SEL_B_SRC0
413 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(src
))
414 | R500_ALU_RGB_OMOD_DISABLE
;
415 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_CMP
416 | R500_ALPHA_ADDRD(dest
)
417 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(src
))
418 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(src
))
419 | R500_ALPHA_OMOD_DISABLE
;
420 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
421 | R500_ALU_RGBA_ADDRD(dest
)
422 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
423 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
426 static GLboolean
parse_program(struct r500_fragment_program
*fp
)
428 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
429 const struct prog_instruction
*inst
= mp
->Base
.Instructions
;
430 struct prog_instruction
*fpi
;
431 GLuint src
[3], dest
, temp
[2];
432 int flags
, pixel_mask
= 0, output_mask
= 0, counter
= 0;
434 if (!inst
|| inst
[0].Opcode
== OPCODE_END
) {
435 ERROR("The program is empty!\n");
439 for (fpi
= mp
->Base
.Instructions
; fpi
->Opcode
!= OPCODE_END
; fpi
++) {
441 if (fpi
->Opcode
!= OPCODE_KIL
) {
442 dest
= make_dest(fp
, fpi
->DstReg
);
444 pixel_mask
= fpi
->DstReg
.WriteMask
<< 11;
445 output_mask
= fpi
->DstReg
.WriteMask
<< 15;
448 switch (fpi
->Opcode
) {
450 emit_alu(fp
, counter
, fpi
);
451 emit_mov(fp
, counter
, fpi
->SrcReg
[0], dest
);
452 fp
->inst
[counter
].inst3
|= R500_ALU_RGB_MOD_A_ABS
453 | R500_ALU_RGB_MOD_B_ABS
;
454 fp
->inst
[counter
].inst4
|= R500_ALPHA_MOD_A_ABS
455 | R500_ALPHA_MOD_B_ABS
;
458 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
459 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
460 /* Variation on MAD: 1*src0+src1 */
461 emit_alu(fp
, counter
, fpi
);
462 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
463 | R500_RGB_ADDR1(src
[1]) | R500_RGB_ADDR2(0);
464 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
465 | R500_ALPHA_ADDR1(src
[1]) | R500_ALPHA_ADDR2(0);
466 fp
->inst
[counter
].inst3
= /* 1 */
467 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE
)
468 | R500_ALU_RGB_SEL_B_SRC0
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[0]));
469 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
470 | R500_ALPHA_ADDRD(dest
)
471 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
472 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[0]));
473 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
474 | R500_ALU_RGBA_ADDRD(dest
)
475 | R500_ALU_RGBA_SEL_C_SRC1
476 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[1]))
477 | R500_ALU_RGBA_ALPHA_SEL_C_SRC1
478 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[1]));
481 /* This inst's selects need to be swapped as follows:
482 * 0 -> C ; 1 -> B ; 2 -> A */
483 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
484 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
485 src
[2] = make_src(fp
, fpi
->SrcReg
[2]);
486 emit_alu(fp
, counter
, fpi
);
487 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[2])
488 | R500_RGB_ADDR1(src
[1]) | R500_RGB_ADDR2(src
[0]);
489 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[2])
490 | R500_ALPHA_ADDR1(src
[1]) | R500_ALPHA_ADDR2(src
[0]);
491 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
492 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[2]))
493 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
494 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_CMP
495 | R500_ALPHA_ADDRD(dest
)
496 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[2]))
497 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
498 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
499 | R500_ALU_RGBA_ADDRD(dest
)
500 | R500_ALU_RGBA_SEL_C_SRC2
501 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[0]))
502 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
503 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[0]));
506 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
507 src
[1] = emit_const4fv(fp
, RCP_2PI
);
508 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| R500_INST_TEX_SEM_WAIT
509 | (R500_WRITEMASK_ARGB
<< 11);
510 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
511 | R500_RGB_ADDR1(src
[1]);
512 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
513 | R500_ALPHA_ADDR1(src
[1]);
514 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
515 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
)
516 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB
);
517 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
518 | R500_ALPHA_ADDRD(get_temp(fp
, 0))
519 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SWIZ_A_A
520 | R500_ALPHA_SEL_B_SRC1
| R500_ALPHA_SWIZ_B_A
;
521 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
522 | R500_ALU_RGBA_ADDRD(get_temp(fp
, 0))
523 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
524 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
526 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| (R500_WRITEMASK_ARGB
<< 11);
527 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(fp
, 0));
528 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(fp
, 0));
529 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
530 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
);
531 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_FRC
532 | R500_ALPHA_ADDRD(get_temp(fp
, 1))
533 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SWIZ_A_A
;
534 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_FRC
535 | R500_ALU_RGBA_ADDRD(get_temp(fp
, 1));
537 emit_alu(fp
, counter
, fpi
);
538 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(fp
, 1));
539 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(fp
, 1));
540 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
;
541 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_COS
542 | R500_ALPHA_ADDRD(dest
)
543 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
544 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
545 | R500_ALU_RGBA_ADDRD(dest
);
548 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
549 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
550 emit_alu(fp
, counter
, fpi
);
551 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
552 | R500_RGB_ADDR1(src
[1]);
553 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
554 | R500_ALPHA_ADDR1(src
[1]);
555 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
556 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
557 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
558 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_DP
559 | R500_ALPHA_ADDRD(dest
)
560 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
561 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
562 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_DP3
563 | R500_ALU_RGBA_ADDRD(dest
);
566 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
567 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
569 emit_alu(fp
, counter
, fpi
);
570 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
571 | R500_RGB_ADDR1(src
[1]);
572 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
573 | R500_ALPHA_ADDR1(src
[1]);
574 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
575 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
576 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
577 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_DP
578 | R500_ALPHA_ADDRD(dest
)
579 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
580 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
581 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_DP4
582 | R500_ALU_RGBA_ADDRD(dest
);
585 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
586 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
588 emit_alu(fp
, counter
, fpi
);
589 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
590 | R500_RGB_ADDR1(src
[1]);
591 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
592 | R500_ALPHA_ADDR1(src
[1]);
593 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
594 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
595 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
596 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_DP
597 | R500_ALPHA_ADDRD(dest
)
598 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
599 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
600 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_DP4
601 | R500_ALU_RGBA_ADDRD(dest
);
604 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
605 emit_alu(fp
, counter
, fpi
);
606 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
607 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
608 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
609 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
610 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_EX2
611 | R500_ALPHA_ADDRD(dest
)
612 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
613 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
614 | R500_ALU_RGBA_ADDRD(dest
);
617 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
618 emit_alu(fp
, counter
, fpi
);
619 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
620 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
621 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
622 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
623 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_FRC
624 | R500_ALPHA_ADDRD(dest
)
625 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
626 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_FRC
627 | R500_ALU_RGBA_ADDRD(dest
);
630 emit_tex(fp
, fpi
, OPCODE_KIL
, dest
, counter
);
633 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
634 emit_alu(fp
, counter
, fpi
);
635 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
636 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
637 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
638 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
639 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_LN2
640 | R500_ALPHA_ADDRD(dest
)
641 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
642 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
643 | R500_ALU_RGBA_ADDRD(dest
);
646 /* src0 * src1 + INV(src0) * src2
647 * 1) MUL src0, src1, temp
648 * 2) PRE 1-src0; MAD srcp, src2, temp */
649 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
650 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
651 src
[2] = make_src(fp
, fpi
->SrcReg
[2]);
652 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| R500_INST_TEX_SEM_WAIT
653 | R500_INST_NOP
| (R500_WRITEMASK_ARGB
<< 11);
654 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
655 | R500_RGB_ADDR1(src
[1]);
656 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
657 | R500_ALPHA_ADDR1(src
[1]);
658 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
659 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
660 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
661 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
662 | R500_ALPHA_ADDRD(get_temp(fp
, 0))
663 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
664 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
665 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
666 | R500_ALU_RGBA_ADDRD(get_temp(fp
, 0))
667 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
668 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
670 emit_alu(fp
, counter
, fpi
);
671 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
672 | R500_RGB_ADDR1(src
[2])
673 | R500_RGB_ADDR2(get_temp(fp
, 0))
674 | R500_RGB_SRCP_OP_1_MINUS_RGB0
;
675 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
676 | R500_ALPHA_ADDR1(src
[2])
677 | R500_ALPHA_ADDR2(get_temp(fp
, 0))
678 | R500_ALPHA_SRCP_OP_1_MINUS_A0
;
679 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRCP
680 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
681 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB
);
682 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
683 | R500_ALPHA_ADDRD(dest
)
684 | R500_ALPHA_SEL_A_SRCP
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
685 | R500_ALPHA_SEL_B_SRC1
| R500_ALPHA_SWIZ_B_A
;
686 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
687 | R500_ALU_RGBA_ADDRD(dest
)
688 | R500_ALU_RGBA_SEL_C_SRC2
| MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[2]))
689 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
690 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[2]));
693 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
694 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
695 src
[2] = make_src(fp
, fpi
->SrcReg
[2]);
696 emit_alu(fp
, counter
, fpi
);
697 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
698 | R500_RGB_ADDR1(src
[1]) | R500_RGB_ADDR2(src
[2]);
699 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
700 | R500_ALPHA_ADDR1(src
[1]) | R500_ALPHA_ADDR2(src
[2]);
701 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
702 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
703 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
704 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
705 | R500_ALPHA_ADDRD(dest
)
706 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
707 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
708 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
709 | R500_ALU_RGBA_ADDRD(dest
)
710 | R500_ALU_RGBA_SEL_C_SRC2
711 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[2]))
712 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
713 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[2]));
716 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
717 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
718 emit_alu(fp
, counter
, fpi
);
719 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]) | R500_RGB_ADDR1(src
[1]);
720 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]) | R500_ALPHA_ADDR1(src
[1]);
721 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
722 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
723 | R500_ALU_RGB_SEL_B_SRC1
724 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
725 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAX
726 | R500_ALPHA_ADDRD(dest
)
727 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
728 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
729 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAX
730 | R500_ALU_RGBA_ADDRD(dest
);
733 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
734 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
735 emit_alu(fp
, counter
, fpi
);
736 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]) | R500_RGB_ADDR1(src
[1]);
737 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]) | R500_ALPHA_ADDR1(src
[1]);
738 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
739 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
740 | R500_ALU_RGB_SEL_B_SRC1
741 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
742 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MIN
743 | R500_ALPHA_ADDRD(dest
)
744 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
745 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
746 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MIN
747 | R500_ALU_RGBA_ADDRD(dest
);
750 emit_alu(fp
, counter
, fpi
);
751 emit_mov(fp
, counter
, fpi
->SrcReg
[0], dest
);
754 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
755 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
756 /* Variation on MAD: src0*src1+0 */
757 emit_alu(fp
, counter
, fpi
);
758 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
759 | R500_RGB_ADDR1(src
[1]);
760 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
761 | R500_ALPHA_ADDR1(src
[1]);
762 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
763 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
764 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
765 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
766 | R500_ALPHA_ADDRD(dest
)
767 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
768 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
769 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
770 | R500_ALU_RGBA_ADDRD(dest
)
771 // | R500_ALU_RGBA_SEL_C_SRC2
772 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
773 // | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
774 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
777 /* POW(a,b) = EX2(LN2(a)*b) */
778 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
779 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
780 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| R500_INST_TEX_SEM_WAIT
781 | (R500_WRITEMASK_ARGB
<< 11);
782 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
783 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
784 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
785 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
786 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_LN2
787 | R500_ALPHA_ADDRD(get_temp(fp
, 0))
788 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
789 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
790 | R500_ALU_RGBA_ADDRD(get_temp(fp
, 0));
792 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| (R500_WRITEMASK_ARGB
<< 11);
793 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(fp
, 0))
794 | R500_RGB_ADDR1(src
[1]);
795 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(fp
, 0))
796 | R500_ALPHA_ADDR1(src
[1]);
797 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
798 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
799 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
800 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
801 | R500_ALPHA_ADDRD(get_temp(fp
, 1))
802 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
803 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
804 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
805 | R500_ALU_RGBA_ADDRD(get_temp(fp
, 1))
806 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
807 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
809 emit_alu(fp
, counter
, fpi
);
810 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(fp
, 1));
811 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(fp
, 1));
812 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
813 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
814 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_EX2
815 | R500_ALPHA_ADDRD(dest
)
816 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
817 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
818 | R500_ALU_RGBA_ADDRD(dest
);
821 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
822 emit_alu(fp
, counter
, fpi
);
823 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
824 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
825 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
826 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
827 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_RCP
828 | R500_ALPHA_ADDRD(dest
)
829 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
830 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
831 | R500_ALU_RGBA_ADDRD(dest
);
834 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
835 emit_alu(fp
, counter
, fpi
);
836 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
837 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
838 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
839 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
840 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_RSQ
841 | R500_ALPHA_ADDRD(dest
)
842 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
843 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
844 | R500_ALU_RGBA_ADDRD(dest
);
847 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
848 src
[1] = emit_const4fv(fp
, RCP_2PI
);
849 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| R500_INST_TEX_SEM_WAIT
850 | (R500_WRITEMASK_ARGB
<< 11);
851 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
852 | R500_RGB_ADDR1(src
[1]);
853 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
854 | R500_ALPHA_ADDR1(src
[1]);
855 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
856 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
)
857 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB
);
858 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
859 | R500_ALPHA_ADDRD(get_temp(fp
, 0))
860 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SWIZ_A_A
861 | R500_ALPHA_SEL_B_SRC1
| R500_ALPHA_SWIZ_B_A
;
862 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
863 | R500_ALU_RGBA_ADDRD(get_temp(fp
, 0))
864 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
865 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
867 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| (R500_WRITEMASK_ARGB
<< 11);
868 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(fp
, 0));
869 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(fp
, 0));
870 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
871 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
);
872 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_FRC
873 | R500_ALPHA_ADDRD(get_temp(fp
, 1))
874 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SWIZ_A_A
;
875 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_FRC
876 | R500_ALU_RGBA_ADDRD(get_temp(fp
, 1));
878 /* Do a cosine, then a sine, masking out the channels we want to protect. */
879 /* Cosine only goes in R (x) channel. */
880 fpi
->DstReg
.WriteMask
= 0x1;
881 emit_alu(fp
, counter
, fpi
);
882 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(fp
, 1));
883 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(fp
, 1));
884 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
885 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
886 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_COS
887 | R500_ALPHA_ADDRD(dest
)
888 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
889 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
890 | R500_ALU_RGBA_ADDRD(dest
);
892 /* Sine only goes in G (y) channel. */
893 fpi
->DstReg
.WriteMask
= 0x2;
894 emit_alu(fp
, counter
, fpi
);
895 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(fp
, 1));
896 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(fp
, 1));
897 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
898 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
899 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_SIN
900 | R500_ALPHA_ADDRD(dest
)
901 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
902 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
903 | R500_ALU_RGBA_ADDRD(dest
);
906 /* We use SRCP, so as a precaution we're
907 * going to set NOP in previous inst, if possible. */
908 /* This inst's selects need to be swapped as follows:
909 * 0 -> C ; 1 -> B ; 2 -> A */
910 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
911 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
912 emit_alu(fp
, counter
, fpi
);
913 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
914 | R500_RGB_ADDR1(src
[1])
915 | R500_RGB_SRCP_OP_RGB1_MINUS_RGB0
;
916 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
917 | R500_ALPHA_ADDR1(src
[1])
918 | R500_ALPHA_SRCP_OP_A1_MINUS_A0
;
919 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
920 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE
)
921 | R500_ALU_RGB_SEL_B_SRC1
922 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO
);
923 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_CMP
924 | R500_ALPHA_ADDRD(dest
)
925 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
926 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO
);
927 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
928 | R500_ALU_RGBA_ADDRD(dest
)
929 | R500_ALU_RGBA_SEL_C_SRCP
930 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[0]))
931 | R500_ALU_RGBA_ALPHA_SEL_C_SRCP
932 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[0]));
935 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
936 src
[1] = emit_const4fv(fp
, RCP_2PI
);
937 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| R500_INST_TEX_SEM_WAIT
938 | (R500_WRITEMASK_ARGB
<< 11);
939 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
940 | R500_RGB_ADDR1(src
[1]);
941 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
942 | R500_ALPHA_ADDR1(src
[1]);
943 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
944 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
)
945 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB
);
946 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
947 | R500_ALPHA_ADDRD(get_temp(fp
, 0))
948 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SWIZ_A_A
949 | R500_ALPHA_SEL_B_SRC1
| R500_ALPHA_SWIZ_B_A
;
950 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
951 | R500_ALU_RGBA_ADDRD(get_temp(fp
, 0))
952 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
953 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
955 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| (R500_WRITEMASK_ARGB
<< 11);
956 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(fp
, 0));
957 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(fp
, 0));
958 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
959 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
);
960 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_FRC
961 | R500_ALPHA_ADDRD(get_temp(fp
, 1))
962 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SWIZ_A_A
;
963 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_FRC
964 | R500_ALU_RGBA_ADDRD(get_temp(fp
, 1));
966 emit_alu(fp
, counter
, fpi
);
967 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(fp
, 1));
968 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(fp
, 1));
969 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
;
970 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_SIN
971 | R500_ALPHA_ADDRD(dest
)
972 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
973 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
974 | R500_ALU_RGBA_ADDRD(dest
);
977 /* We use SRCP, so as a precaution we're
978 * going to set NOP in previous inst, if possible. */
979 /* This inst's selects need to be swapped as follows:
980 * 0 -> C ; 1 -> B ; 2 -> A */
981 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
982 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
983 emit_alu(fp
, counter
, fpi
);
984 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
985 | R500_RGB_ADDR1(src
[1])
986 | R500_RGB_SRCP_OP_RGB1_MINUS_RGB0
;
987 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
988 | R500_ALPHA_ADDR1(src
[1])
989 | R500_ALPHA_SRCP_OP_A1_MINUS_A0
;
990 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
991 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO
)
992 | R500_ALU_RGB_SEL_B_SRC1
993 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE
);
994 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_CMP
995 | R500_ALPHA_ADDRD(dest
)
996 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO
)
997 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE
);
998 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
999 | R500_ALU_RGBA_ADDRD(dest
)
1000 | R500_ALU_RGBA_SEL_C_SRCP
1001 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[0]))
1002 | R500_ALU_RGBA_ALPHA_SEL_C_SRCP
1003 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[0]));
1006 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
1007 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
1008 /* Variation on MAD: 1*src0-src1 */
1009 emit_alu(fp
, counter
, fpi
);
1010 fp
->inst
[counter
].inst1
= R500_RGB_ADDR1(src
[0])
1011 | R500_RGB_ADDR2(src
[1]);
1012 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR1(src
[0])
1013 | R500_ALPHA_ADDR2(src
[1]);
1014 fp
->inst
[counter
].inst3
= /* 1 */
1015 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE
)
1016 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[0]));
1017 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
1018 | R500_ALPHA_ADDRD(dest
)
1019 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
1020 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[0]));
1021 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
1022 | R500_ALU_RGBA_ADDRD(dest
)
1023 | R500_ALU_RGBA_SEL_C_SRC2
1024 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[1]))
1025 | R500_ALU_RGBA_MOD_C_NEG
1026 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
1027 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[1]))
1028 | R500_ALU_RGBA_ALPHA_MOD_C_NEG
;
1031 /* TODO: Negation masks! */
1032 emit_alu(fp
, counter
, fpi
);
1033 emit_mov(fp
, counter
, fpi
->SrcReg
[0], dest
);
1036 emit_tex(fp
, fpi
, OPCODE_TEX
, dest
, counter
);
1037 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
)
1041 emit_tex(fp
, fpi
, OPCODE_TXB
, dest
, counter
);
1042 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
)
1046 emit_tex(fp
, fpi
, OPCODE_TXP
, dest
, counter
);
1047 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
)
1051 ERROR("unknown fpi->Opcode %s\n", _mesa_opcode_string(fpi
->Opcode
));
1055 /* Finishing touches */
1056 if (fpi
->SaturateMode
== SATURATE_ZERO_ONE
) {
1057 fp
->inst
[counter
].inst0
|= R500_INST_RGB_CLAMP
| R500_INST_ALPHA_CLAMP
;
1067 /* Finish him! (If it's an ALU/OUT instruction...) */
1068 if ((fp
->inst
[counter
-1].inst0
& 0x3) == 1) {
1069 fp
->inst
[counter
-1].inst0
|= R500_INST_LAST
;
1071 /* We still need to put an output inst, right? */
1072 WARN_ONCE("Final FP instruction is not an OUT.\n");
1078 fp
->cs
->nrslots
= counter
;
1085 static void init_program(r300ContextPtr r300
, struct r500_fragment_program
*fp
)
1087 struct r300_pfs_compile_state
*cs
= NULL
;
1088 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
1089 struct prog_instruction
*fpi
;
1090 GLuint InputsRead
= mp
->Base
.InputsRead
;
1091 GLuint temps_used
= 0;
1094 /* New compile, reset tracking data */
1096 driQueryOptioni(&r300
->radeon
.optionCache
, "fp_optimization");
1097 fp
->translated
= GL_FALSE
;
1098 fp
->error
= GL_FALSE
;
1099 fp
->cs
= cs
= &(R300_CONTEXT(fp
->ctx
)->state
.pfs_compile
);
1101 fp
->first_node_has_tex
= 0;
1103 /* Size of pixel stack, plus 1. */
1104 fp
->max_temp_idx
= 1;
1105 /* Temp register offset. */
1106 fp
->temp_reg_offset
= 0;
1107 fp
->node
[0].alu_end
= -1;
1108 fp
->node
[0].tex_end
= -1;
1110 _mesa_memset(cs
, 0, sizeof(*fp
->cs
));
1111 for (i
= 0; i
< PFS_MAX_ALU_INST
; i
++) {
1112 for (j
= 0; j
< 3; j
++) {
1113 cs
->slot
[i
].vsrc
[j
] = SRC_CONST
;
1114 cs
->slot
[i
].ssrc
[j
] = SRC_CONST
;
1118 /* Work out what temps the Mesa inputs correspond to, this must match
1119 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
1120 * configures itself based on the fragprog's InputsRead
1122 * NOTE: this depends on get_hw_temp() allocating registers in order,
1123 * starting from register 0, so we're just going to do that instead.
1126 /* Texcoords come first */
1127 for (i
= 0; i
< fp
->ctx
->Const
.MaxTextureUnits
; i
++) {
1128 if (InputsRead
& (FRAG_BIT_TEX0
<< i
)) {
1129 cs
->inputs
[FRAG_ATTRIB_TEX0
+ i
].refcount
= 0;
1130 cs
->inputs
[FRAG_ATTRIB_TEX0
+ i
].reg
=
1131 fp
->temp_reg_offset
;
1132 fp
->temp_reg_offset
++;
1135 InputsRead
&= ~FRAG_BITS_TEX_ANY
;
1137 /* fragment position treated as a texcoord */
1138 if (InputsRead
& FRAG_BIT_WPOS
) {
1139 cs
->inputs
[FRAG_ATTRIB_WPOS
].refcount
= 0;
1140 cs
->inputs
[FRAG_ATTRIB_WPOS
].reg
=
1141 fp
->temp_reg_offset
;
1142 fp
->temp_reg_offset
++;
1144 InputsRead
&= ~FRAG_BIT_WPOS
;
1146 /* Then primary colour */
1147 if (InputsRead
& FRAG_BIT_COL0
) {
1148 cs
->inputs
[FRAG_ATTRIB_COL0
].refcount
= 0;
1149 cs
->inputs
[FRAG_ATTRIB_COL0
].reg
=
1150 fp
->temp_reg_offset
;
1151 fp
->temp_reg_offset
++;
1153 InputsRead
&= ~FRAG_BIT_COL0
;
1155 /* Secondary color */
1156 if (InputsRead
& FRAG_BIT_COL1
) {
1157 cs
->inputs
[FRAG_ATTRIB_COL1
].refcount
= 0;
1158 cs
->inputs
[FRAG_ATTRIB_COL1
].reg
=
1159 fp
->temp_reg_offset
;
1160 fp
->temp_reg_offset
++;
1162 InputsRead
&= ~FRAG_BIT_COL1
;
1166 WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead
);
1167 /* force read from hwreg 0 for now */
1168 for (i
= 0; i
< 32; i
++)
1169 if (InputsRead
& (1 << i
))
1170 cs
->inputs
[i
].reg
= 0;
1173 if (!mp
->Base
.Instructions
) {
1174 ERROR("No instructions found in program, going to go die now.\n");
1178 for (fpi
= mp
->Base
.Instructions
; fpi
->Opcode
!= OPCODE_END
; fpi
++) {
1179 for (i
= 0; i
< 3; i
++) {
1180 if (fpi
->SrcReg
[i
].File
== PROGRAM_TEMPORARY
) {
1181 if (fpi
->SrcReg
[i
].Index
> temps_used
)
1182 temps_used
= fpi
->SrcReg
[i
].Index
;
1187 cs
->temp_in_use
= temps_used
;
1189 fp
->max_temp_idx
= fp
->temp_reg_offset
+ cs
->temp_in_use
+ 1;
1192 static void update_params(struct r500_fragment_program
*fp
)
1194 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
1196 /* Ask Mesa nicely to fill in ParameterValues for us */
1197 if (mp
->Base
.Parameters
)
1198 _mesa_load_state_parameters(fp
->ctx
, mp
->Base
.Parameters
);
1201 void r500TranslateFragmentShader(r300ContextPtr r300
,
1202 struct r500_fragment_program
*fp
)
1205 struct r300_pfs_compile_state
*cs
= NULL
;
1207 if (!fp
->translated
) {
1211 init_program(r300
, fp
);
1214 if (parse_program(fp
) == GL_FALSE
) {
1215 ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n");
1217 fp
->inst_offset
= 0;
1218 fp
->inst_end
= cs
->nrslots
- 1;
1221 fp
->inst_offset
= 0;
1222 fp
->inst_end
= cs
->nrslots
- 1;
1224 fp
->translated
= GL_TRUE
;
1225 if (1 || RADEON_DEBUG
& DEBUG_PIXEL
) {
1226 fprintf(stderr
, "Mesa program:\n");
1227 fprintf(stderr
, "-------------\n");
1228 _mesa_print_program(&fp
->mesa_program
.Base
);
1234 r300UpdateStateParameters(fp
->ctx
, _NEW_PROGRAM
);
1241 static char *toswiz(int swiz_val
) {
1248 case 5: return "1/2";
1255 static char *toop(int op_val
)
1259 case 0: str
= "MAD"; break;
1260 case 1: str
= "DP3"; break;
1261 case 2: str
= "DP4"; break;
1262 case 3: str
= "D2A"; break;
1263 case 4: str
= "MIN"; break;
1264 case 5: str
= "MAX"; break;
1265 case 6: str
= "Reserved"; break;
1266 case 7: str
= "CND"; break;
1267 case 8: str
= "CMP"; break;
1268 case 9: str
= "FRC"; break;
1269 case 10: str
= "SOP"; break;
1270 case 11: str
= "MDH"; break;
1271 case 12: str
= "MDV"; break;
1276 static char *to_alpha_op(int op_val
)
1280 case 0: str
= "MAD"; break;
1281 case 1: str
= "DP"; break;
1282 case 2: str
= "MIN"; break;
1283 case 3: str
= "MAX"; break;
1284 case 4: str
= "Reserved"; break;
1285 case 5: str
= "CND"; break;
1286 case 6: str
= "CMP"; break;
1287 case 7: str
= "FRC"; break;
1288 case 8: str
= "EX2"; break;
1289 case 9: str
= "LN2"; break;
1290 case 10: str
= "RCP"; break;
1291 case 11: str
= "RSQ"; break;
1292 case 12: str
= "SIN"; break;
1293 case 13: str
= "COS"; break;
1294 case 14: str
= "MDH"; break;
1295 case 15: str
= "MDV"; break;
1300 static char *to_mask(int val
)
1304 case 0: str
= "NONE"; break;
1305 case 1: str
= "R"; break;
1306 case 2: str
= "G"; break;
1307 case 3: str
= "RG"; break;
1308 case 4: str
= "B"; break;
1309 case 5: str
= "RB"; break;
1310 case 6: str
= "GB"; break;
1311 case 7: str
= "RGB"; break;
1312 case 8: str
= "A"; break;
1313 case 9: str
= "AR"; break;
1314 case 10: str
= "AG"; break;
1315 case 11: str
= "ARG"; break;
1316 case 12: str
= "AB"; break;
1317 case 13: str
= "ARB"; break;
1318 case 14: str
= "AGB"; break;
1319 case 15: str
= "ARGB"; break;
1324 static char *to_texop(int val
)
1327 case 0: return "NOP";
1328 case 1: return "LD";
1329 case 2: return "TEXKILL";
1330 case 3: return "PROJ";
1331 case 4: return "LODBIAS";
1332 case 5: return "LOD";
1333 case 6: return "DXDY";
1338 static void dump_program(struct r500_fragment_program
*fp
)
1346 for (n
= 0; n
< fp
->inst_end
+1; n
++) {
1347 inst0
= inst
= fp
->inst
[n
].inst0
;
1348 fprintf(stderr
,"%d\t0:CMN_INST 0x%08x:", n
, inst
);
1349 switch(inst
& 0x3) {
1350 case R500_INST_TYPE_ALU
: str
= "ALU"; break;
1351 case R500_INST_TYPE_OUT
: str
= "OUT"; break;
1352 case R500_INST_TYPE_FC
: str
= "FC"; break;
1353 case R500_INST_TYPE_TEX
: str
= "TEX"; break;
1355 fprintf(stderr
,"%s %s %s %s %s ", str
,
1356 inst
& R500_INST_TEX_SEM_WAIT
? "TEX_WAIT" : "",
1357 inst
& R500_INST_LAST
? "LAST" : "",
1358 inst
& R500_INST_NOP
? "NOP" : "",
1359 inst
& R500_INST_ALU_WAIT
? "ALU WAIT" : "");
1360 fprintf(stderr
,"wmask: %s omask: %s\n", to_mask((inst
>> 11) & 0xf),
1361 to_mask((inst
>> 15) & 0xf));
1363 switch(inst0
& 0x3) {
1366 fprintf(stderr
,"\t1:RGB_ADDR 0x%08x:", fp
->inst
[n
].inst1
);
1367 inst
= fp
->inst
[n
].inst1
;
1369 fprintf(stderr
,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1370 inst
& 0xff, (inst
& (1<<8)) ? 'c' : 't',
1371 (inst
>> 10) & 0xff, (inst
& (1<<18)) ? 'c' : 't',
1372 (inst
>> 20) & 0xff, (inst
& (1<<28)) ? 'c' : 't',
1375 fprintf(stderr
,"\t2:ALPHA_ADDR 0x%08x:", fp
->inst
[n
].inst2
);
1376 inst
= fp
->inst
[n
].inst2
;
1377 fprintf(stderr
,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1378 inst
& 0xff, (inst
& (1<<8)) ? 'c' : 't',
1379 (inst
>> 10) & 0xff, (inst
& (1<<18)) ? 'c' : 't',
1380 (inst
>> 20) & 0xff, (inst
& (1<<28)) ? 'c' : 't',
1382 fprintf(stderr
,"\t3 RGB_INST: 0x%08x:", fp
->inst
[n
].inst3
);
1383 inst
= fp
->inst
[n
].inst3
;
1384 fprintf(stderr
,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n",
1385 (inst
) & 0x3, toswiz((inst
>> 2) & 0x7), toswiz((inst
>> 5) & 0x7), toswiz((inst
>> 8) & 0x7),
1387 (inst
>> 13) & 0x3, toswiz((inst
>> 15) & 0x7), toswiz((inst
>> 18) & 0x7), toswiz((inst
>> 21) & 0x7),
1388 (inst
>> 24) & 0x3);
1391 fprintf(stderr
,"\t4 ALPHA_INST:0x%08x:", fp
->inst
[n
].inst4
);
1392 inst
= fp
->inst
[n
].inst4
;
1393 fprintf(stderr
,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d\n", to_alpha_op(inst
& 0xf),
1394 (inst
>> 4) & 0x7f, inst
& (1<<11) ? "(rel)":"",
1395 (inst
>> 12) & 0x3, toswiz((inst
>> 14) & 0x7), (inst
>> 17) & 0x3,
1396 (inst
>> 19) & 0x3, toswiz((inst
>> 21) & 0x7), (inst
>> 24) & 0x3);
1398 fprintf(stderr
,"\t5 RGBA_INST: 0x%08x:", fp
->inst
[n
].inst5
);
1399 inst
= fp
->inst
[n
].inst5
;
1400 fprintf(stderr
,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst
& 0xf),
1401 (inst
>> 4) & 0x7f, inst
& (1<<11) ? "(rel)":"",
1402 (inst
>> 12) & 0x3, toswiz((inst
>> 14) & 0x7), toswiz((inst
>> 17) & 0x7), toswiz((inst
>> 20) & 0x7),
1404 (inst
>> 25) & 0x3, toswiz((inst
>> 27) & 0x7), (inst
>> 30) & 0x3);
1409 inst
= fp
->inst
[n
].inst1
;
1410 fprintf(stderr
,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst
, (inst
>> 16) & 0xf,
1411 to_texop((inst
>> 22) & 0x7), (inst
& (1<<25)) ? "ACQ" : "",
1412 (inst
& (1<<26)) ? "IGNUNC" : "", (inst
& (1<<27)) ? "UNSCALED" : "SCALED");
1413 inst
= fp
->inst
[n
].inst2
;
1414 fprintf(stderr
,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst
,
1415 inst
& 127, inst
& (1<<7) ? "(rel)" : "",
1416 toswiz((inst
>> 8) & 0x3), toswiz((inst
>> 10) & 0x3),
1417 toswiz((inst
>> 12) & 0x3), toswiz((inst
>> 14) & 0x3),
1418 (inst
>> 16) & 127, inst
& (1<<23) ? "(rel)" : "",
1419 toswiz((inst
>> 24) & 0x3), toswiz((inst
>> 26) & 0x3),
1420 toswiz((inst
>> 28) & 0x3), toswiz((inst
>> 30) & 0x3));
1422 fprintf(stderr
,"\t3:TEX_DXDY: 0x%08x\n", fp
->inst
[n
].inst3
);
1425 fprintf(stderr
,"\n");