2 * Copyright (C) 2005 Ben Skeggs.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 * \author Ben Skeggs <darktama@iinet.net.au>
33 * \author Jerome Glisse <j.glisse@gmail.com>
35 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
37 * \todo Depth write, WPOS/FOGC inputs
41 * \todo Verify results of opcodes for accuracy, I've only checked them in
48 #include "shader/prog_instruction.h"
49 #include "shader/prog_parameter.h"
50 #include "shader/prog_print.h"
52 #include "r300_context.h"
53 #include "r500_fragprog.h"
55 #include "r300_state.h"
58 * Useful macros and values
60 #define ERROR(fmt, args...) do { \
61 fprintf(stderr, "%s::%s(): " fmt "\n", \
62 __FILE__, __FUNCTION__, ##args); \
63 fp->error = GL_TRUE; \
66 #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
68 #define R500_US_NUM_TEMP_REGS 128
69 #define R500_US_NUM_CONST_REGS 256
71 /* "Register" flags */
72 #define REG_CONSTANT (1 << 8)
73 #define REG_SRC_REL (1 << 9)
74 #define REG_DEST_REL (1 << 7)
77 #define R500_SWIZZLE_ZERO 4
78 #define R500_SWIZZLE_HALF 5
79 #define R500_SWIZZLE_ONE 6
80 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
81 #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
82 #define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6))
83 /* Swizzles for inst2 */
84 #define MAKE_SWIZ_TEX_STRQ(x) (x << 8)
85 #define MAKE_SWIZ_TEX_RGBA(x) (x << 24)
86 /* Swizzles for inst3 */
87 #define MAKE_SWIZ_RGB_A(x) (x << 2)
88 #define MAKE_SWIZ_RGB_B(x) (x << 15)
89 /* Swizzles for inst4 */
90 #define MAKE_SWIZ_ALPHA_A(x) (x << 14)
91 #define MAKE_SWIZ_ALPHA_B(x) (x << 21)
92 /* Swizzle for inst5 */
93 #define MAKE_SWIZ_RGBA_C(x) (x << 14)
94 #define MAKE_SWIZ_ALPHA_C(x) (x << 27)
96 static void dump_program(struct r500_fragment_program
*fp
);
98 static inline GLuint
make_rgb_swizzle(struct prog_src_register src
) {
101 /* This could be optimized, but it should be plenty fast already. */
103 for (i
= 0; i
< 3; i
++) {
104 temp
= GET_SWZ(src
.Swizzle
, i
);
105 /* Fix SWIZZLE_ONE */
106 if (temp
== 5) temp
++;
112 static inline GLuint
make_alpha_swizzle(struct prog_src_register src
) {
113 GLuint swiz
= GET_SWZ(src
.Swizzle
, 3);
115 if (swiz
== 5) swiz
++;
119 static inline GLuint
make_sop_swizzle(struct prog_src_register src
) {
120 GLuint swiz
= GET_SWZ(src
.Swizzle
, 0);
122 if (swiz
== 5) swiz
++;
126 static inline GLuint
make_strq_swizzle(struct prog_src_register src
) {
128 GLuint temp
= src
.Swizzle
;
130 for (i
= 0; i
< 4; i
++) {
131 swiz
+= (temp
& 0x3) << i
*2;
137 static int get_temp(struct r500_fragment_program
*fp
, int slot
) {
141 int r
= cs
->temp_in_use
+ 1 + slot
;
143 if (r
> R500_US_NUM_TEMP_REGS
) {
144 ERROR("Too many temporary registers requested, can't compile!\n");
150 /* Borrowed verbatim from r300_fragprog since it hasn't changed. */
151 static GLuint
emit_const4fv(struct r500_fragment_program
*fp
,
157 for (index
= 0; index
< fp
->const_nr
; ++index
) {
158 if (fp
->constant
[index
] == cp
)
162 if (index
>= fp
->const_nr
) {
163 if (index
>= R500_US_NUM_CONST_REGS
) {
164 ERROR("Out of hw constants!\n");
169 fp
->constant
[index
] = cp
;
172 reg
= index
| REG_CONSTANT
;
176 static GLuint
make_src(struct r500_fragment_program
*fp
, struct prog_src_register src
) {
180 case PROGRAM_TEMPORARY
:
181 reg
= src
.Index
+ fp
->temp_reg_offset
;
184 reg
= cs
->inputs
[src
.Index
].reg
;
186 case PROGRAM_LOCAL_PARAM
:
187 reg
= emit_const4fv(fp
,
188 fp
->mesa_program
.Base
.LocalParams
[src
.
191 case PROGRAM_ENV_PARAM
:
192 reg
= emit_const4fv(fp
,
193 fp
->ctx
->FragmentProgram
.Parameters
[src
.
196 case PROGRAM_STATE_VAR
:
197 case PROGRAM_NAMED_PARAM
:
198 case PROGRAM_CONSTANT
:
199 reg
= emit_const4fv(fp
, fp
->mesa_program
.Base
.Parameters
->
200 ParameterValues
[src
.Index
]);
203 ERROR("Can't handle src.File %x\n", src
.File
);
210 static GLuint
make_dest(struct r500_fragment_program
*fp
, struct prog_dst_register dest
) {
213 case PROGRAM_TEMPORARY
:
214 reg
= dest
.Index
+ fp
->temp_reg_offset
;
217 /* Eventually we may need to handle multiple
218 * rendering targets... */
222 ERROR("Can't handle dest.File %x\n", dest
.File
);
229 static void emit_tex(struct r500_fragment_program
*fp
,
230 struct prog_instruction
*fpi
, int opcode
, int dest
, int counter
)
235 mask
= fpi
->DstReg
.WriteMask
<< 11;
236 hwsrc
= make_src(fp
, fpi
->SrcReg
[0]);
238 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
) {
239 hwdest
= get_temp(fp
, 0);
244 fp
->inst
[counter
].inst0
= R500_INST_TYPE_TEX
| mask
245 | R500_INST_TEX_SEM_WAIT
;
247 fp
->inst
[counter
].inst1
= R500_TEX_ID(fpi
->TexSrcUnit
)
248 | R500_TEX_SEM_ACQUIRE
| R500_TEX_IGNORE_UNCOVERED
;
250 if (fpi
->TexSrcTarget
== TEXTURE_RECT_INDEX
)
251 fp
->inst
[counter
].inst1
|= R500_TEX_UNSCALED
;
255 fp
->inst
[counter
].inst1
|= R500_TEX_INST_TEXKILL
;
258 fp
->inst
[counter
].inst1
|= R500_TEX_INST_LD
;
261 fp
->inst
[counter
].inst1
|= R500_TEX_INST_LODBIAS
;
264 fp
->inst
[counter
].inst1
|= R500_TEX_INST_PROJ
;
267 ERROR("emit_tex can't handle opcode %x\n", opcode
);
270 fp
->inst
[counter
].inst2
= R500_TEX_SRC_ADDR(hwsrc
)
271 /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */
272 | R500_TEX_SRC_S_SWIZ_R
| R500_TEX_SRC_T_SWIZ_G
273 | R500_TEX_SRC_R_SWIZ_B
| R500_TEX_SRC_Q_SWIZ_A
274 | R500_TEX_DST_ADDR(hwdest
)
275 | R500_TEX_DST_R_SWIZ_R
| R500_TEX_DST_G_SWIZ_G
276 | R500_TEX_DST_B_SWIZ_B
| R500_TEX_DST_A_SWIZ_A
;
278 fp
->inst
[counter
].inst3
= 0x0;
279 fp
->inst
[counter
].inst4
= 0x0;
280 fp
->inst
[counter
].inst5
= 0x0;
282 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
) {
284 fp
->inst
[counter
].inst0
= R500_INST_TYPE_OUT
285 | R500_INST_TEX_SEM_WAIT
| (mask
<< 4);
286 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(fp
, 0));
287 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(fp
, 0));
288 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
289 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
)
290 | R500_ALU_RGB_SEL_B_SRC0
291 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB
)
292 | R500_ALU_RGB_OMOD_DISABLE
;
293 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_CMP
294 | R500_ALPHA_ADDRD(dest
)
295 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_ALPHA_SWIZ_A_A
)
296 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(R500_ALPHA_SWIZ_A_A
)
297 | R500_ALPHA_OMOD_DISABLE
;
298 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
299 | R500_ALU_RGBA_ADDRD(dest
)
300 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
301 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
305 static void dumb_shader(struct r500_fragment_program
*fp
)
307 fp
->inst
[0].inst0
= R500_INST_TYPE_TEX
308 | R500_INST_TEX_SEM_WAIT
309 | R500_INST_RGB_WMASK_R
310 | R500_INST_RGB_WMASK_G
311 | R500_INST_RGB_WMASK_B
312 | R500_INST_ALPHA_WMASK
313 | R500_INST_RGB_CLAMP
314 | R500_INST_ALPHA_CLAMP
;
315 fp
->inst
[0].inst1
= R500_TEX_ID(0)
317 | R500_TEX_SEM_ACQUIRE
318 | R500_TEX_IGNORE_UNCOVERED
;
319 fp
->inst
[0].inst2
= R500_TEX_SRC_ADDR(0)
320 | R500_TEX_SRC_S_SWIZ_R
321 | R500_TEX_SRC_T_SWIZ_G
322 | R500_TEX_DST_ADDR(0)
323 | R500_TEX_DST_R_SWIZ_R
324 | R500_TEX_DST_G_SWIZ_G
325 | R500_TEX_DST_B_SWIZ_B
326 | R500_TEX_DST_A_SWIZ_A
;
327 fp
->inst
[0].inst3
= R500_DX_ADDR(0)
337 fp
->inst
[0].inst4
= 0x0;
338 fp
->inst
[0].inst5
= 0x0;
340 fp
->inst
[1].inst0
= R500_INST_TYPE_OUT
|
341 R500_INST_TEX_SEM_WAIT
|
343 R500_INST_RGB_OMASK_R
|
344 R500_INST_RGB_OMASK_G
|
345 R500_INST_RGB_OMASK_B
|
346 R500_INST_ALPHA_OMASK
;
347 fp
->inst
[1].inst1
= R500_RGB_ADDR0(0) |
349 R500_RGB_ADDR1_CONST
|
351 R500_RGB_ADDR2_CONST
|
352 R500_RGB_SRCP_OP_1_MINUS_2RGB0
;
353 fp
->inst
[1].inst2
= R500_ALPHA_ADDR0(0) |
354 R500_ALPHA_ADDR1(0) |
355 R500_ALPHA_ADDR1_CONST
|
356 R500_ALPHA_ADDR2(0) |
357 R500_ALPHA_ADDR2_CONST
|
358 R500_ALPHA_SRCP_OP_1_MINUS_2A0
;
359 fp
->inst
[1].inst3
= R500_ALU_RGB_SEL_A_SRC0
|
360 R500_ALU_RGB_R_SWIZ_A_R
|
361 R500_ALU_RGB_G_SWIZ_A_G
|
362 R500_ALU_RGB_B_SWIZ_A_B
|
363 R500_ALU_RGB_SEL_B_SRC0
|
364 R500_ALU_RGB_R_SWIZ_B_1
|
365 R500_ALU_RGB_B_SWIZ_B_1
|
366 R500_ALU_RGB_G_SWIZ_B_1
;
367 fp
->inst
[1].inst4
= R500_ALPHA_OP_MAD
|
368 R500_ALPHA_SWIZ_A_A
|
370 fp
->inst
[1].inst5
= R500_ALU_RGBA_OP_MAD
|
371 R500_ALU_RGBA_R_SWIZ_0
|
372 R500_ALU_RGBA_G_SWIZ_0
|
373 R500_ALU_RGBA_B_SWIZ_0
|
374 R500_ALU_RGBA_A_SWIZ_0
;
377 fp
->translated
= GL_TRUE
;
380 static void emit_alu(struct r500_fragment_program
*fp
, int counter
, struct prog_instruction
*fpi
) {
381 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
) {
382 fp
->inst
[counter
].inst0
= R500_INST_TYPE_OUT
384 | (fpi
->DstReg
.WriteMask
<< 15);
386 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
388 | (fpi
->DstReg
.WriteMask
<< 11);
391 fp
->inst
[counter
].inst0
|= R500_INST_TEX_SEM_WAIT
;
394 static void emit_mov(struct r500_fragment_program
*fp
, int counter
, struct prog_src_register src
, GLuint dest
) {
395 /* The r3xx shader uses MAD to implement MOV. We are using CMP, since
396 * it is technically more accurate and recommended by ATI/AMD. */
397 GLuint src_reg
= make_src(fp
, src
);
398 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src_reg
);
399 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src_reg
);
400 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
401 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(src
))
402 | R500_ALU_RGB_SEL_B_SRC0
403 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(src
))
404 | R500_ALU_RGB_OMOD_DISABLE
;
405 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_CMP
406 | R500_ALPHA_ADDRD(dest
)
407 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(src
))
408 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(src
))
409 | R500_ALPHA_OMOD_DISABLE
;
410 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
411 | R500_ALU_RGBA_ADDRD(dest
)
412 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
413 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
416 static GLboolean
parse_program(struct r500_fragment_program
*fp
)
418 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
419 const struct prog_instruction
*inst
= mp
->Base
.Instructions
;
420 struct prog_instruction
*fpi
;
421 GLuint src
[3], dest
, temp
[2];
422 int flags
, pixel_mask
= 0, output_mask
= 0, counter
= 0;
424 if (!inst
|| inst
[0].Opcode
== OPCODE_END
) {
425 ERROR("The program is empty!\n");
429 for (fpi
= mp
->Base
.Instructions
; fpi
->Opcode
!= OPCODE_END
; fpi
++) {
431 if (fpi
->Opcode
!= OPCODE_KIL
) {
432 dest
= make_dest(fp
, fpi
->DstReg
);
434 pixel_mask
= fpi
->DstReg
.WriteMask
<< 11;
435 output_mask
= fpi
->DstReg
.WriteMask
<< 15;
438 switch (fpi
->Opcode
) {
440 emit_alu(fp
, counter
, fpi
);
441 emit_mov(fp
, counter
, fpi
->SrcReg
[0], dest
);
442 fp
->inst
[counter
].inst3
|= R500_ALU_RGB_MOD_A_ABS
443 | R500_ALU_RGB_MOD_B_ABS
;
444 fp
->inst
[counter
].inst4
|= R500_ALPHA_MOD_A_ABS
445 | R500_ALPHA_MOD_B_ABS
;
448 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
449 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
450 /* Variation on MAD: 1*src0+src1 */
451 emit_alu(fp
, counter
, fpi
);
452 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
453 | R500_RGB_ADDR1(src
[1]) | R500_RGB_ADDR2(0);
454 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
455 | R500_ALPHA_ADDR1(src
[1]) | R500_ALPHA_ADDR2(0);
456 fp
->inst
[counter
].inst3
= /* 1 */
457 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE
)
458 | R500_ALU_RGB_SEL_B_SRC0
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[0]));
459 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
460 | R500_ALPHA_ADDRD(dest
)
461 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
462 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[0]));
463 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
464 | R500_ALU_RGBA_ADDRD(dest
)
465 | R500_ALU_RGBA_SEL_C_SRC1
466 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[1]))
467 | R500_ALU_RGBA_ALPHA_SEL_C_SRC1
468 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[1]));
471 /* This inst's selects need to be swapped as follows:
472 * 0 -> C ; 1 -> B ; 2 -> A */
473 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
474 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
475 src
[2] = make_src(fp
, fpi
->SrcReg
[2]);
476 emit_alu(fp
, counter
, fpi
);
477 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[2])
478 | R500_RGB_ADDR1(src
[1]) | R500_RGB_ADDR2(src
[0]);
479 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[2])
480 | R500_ALPHA_ADDR1(src
[1]) | R500_ALPHA_ADDR2(src
[0]);
481 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
482 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[2]))
483 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
484 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_CMP
485 | R500_ALPHA_ADDRD(dest
)
486 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[2]))
487 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
488 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
489 | R500_ALU_RGBA_ADDRD(dest
)
490 | R500_ALU_RGBA_SEL_C_SRC2
491 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[0]))
492 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
493 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[0]));
496 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
497 emit_alu(fp
, counter
, fpi
);
498 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
499 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
500 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
;
501 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_COS
502 | R500_ALPHA_ADDRD(dest
)
503 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
504 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
505 | R500_ALU_RGBA_ADDRD(dest
);
508 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
509 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
510 emit_alu(fp
, counter
, fpi
);
511 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
512 | R500_RGB_ADDR1(src
[1]);
513 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
514 | R500_ALPHA_ADDR1(src
[1]);
515 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
516 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
517 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
518 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_DP
519 | R500_ALPHA_ADDRD(dest
)
520 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
521 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
522 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_DP3
523 | R500_ALU_RGBA_ADDRD(dest
);
526 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
527 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
529 emit_alu(fp
, counter
, fpi
);
530 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
531 | R500_RGB_ADDR1(src
[1]);
532 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
533 | R500_ALPHA_ADDR1(src
[1]);
534 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
535 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
536 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
537 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_DP
538 | R500_ALPHA_ADDRD(dest
)
539 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
540 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
541 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_DP4
542 | R500_ALU_RGBA_ADDRD(dest
);
545 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
546 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
548 emit_alu(fp
, counter
, fpi
);
549 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
550 | R500_RGB_ADDR1(src
[1]);
551 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
552 | R500_ALPHA_ADDR1(src
[1]);
553 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
554 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
555 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
556 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_DP
557 | R500_ALPHA_ADDRD(dest
)
558 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
559 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
560 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_DP4
561 | R500_ALU_RGBA_ADDRD(dest
);
564 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
565 emit_alu(fp
, counter
, fpi
);
566 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
567 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
568 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
569 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
570 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_EX2
571 | R500_ALPHA_ADDRD(dest
)
572 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
573 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
574 | R500_ALU_RGBA_ADDRD(dest
);
577 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
578 emit_alu(fp
, counter
, fpi
);
579 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
580 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
581 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
582 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
583 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_FRC
584 | R500_ALPHA_ADDRD(dest
)
585 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
586 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_FRC
587 | R500_ALU_RGBA_ADDRD(dest
);
590 emit_tex(fp
, fpi
, OPCODE_KIL
, dest
, counter
);
593 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
594 emit_alu(fp
, counter
, fpi
);
595 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
596 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
597 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
598 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
599 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_LN2
600 | R500_ALPHA_ADDRD(dest
)
601 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
602 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
603 | R500_ALU_RGBA_ADDRD(dest
);
606 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
607 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
608 src
[2] = make_src(fp
, fpi
->SrcReg
[2]);
609 emit_alu(fp
, counter
, fpi
);
610 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
611 | R500_RGB_ADDR1(src
[1]) | R500_RGB_ADDR2(src
[2]);
612 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
613 | R500_ALPHA_ADDR1(src
[1]) | R500_ALPHA_ADDR2(src
[2]);
614 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
615 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
616 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
617 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
618 | R500_ALPHA_ADDRD(dest
)
619 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
620 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
621 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
622 | R500_ALU_RGBA_ADDRD(dest
)
623 | R500_ALU_RGBA_SEL_C_SRC2
624 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[2]))
625 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
626 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[2]));
629 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
630 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
631 emit_alu(fp
, counter
, fpi
);
632 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]) | R500_RGB_ADDR1(src
[1]);
633 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]) | R500_ALPHA_ADDR1(src
[1]);
634 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
635 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
636 | R500_ALU_RGB_SEL_B_SRC1
637 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
638 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAX
639 | R500_ALPHA_ADDRD(dest
)
640 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
641 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
642 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAX
643 | R500_ALU_RGBA_ADDRD(dest
);
646 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
647 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
648 emit_alu(fp
, counter
, fpi
);
649 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]) | R500_RGB_ADDR1(src
[1]);
650 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]) | R500_ALPHA_ADDR1(src
[1]);
651 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
652 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
653 | R500_ALU_RGB_SEL_B_SRC1
654 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
655 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MIN
656 | R500_ALPHA_ADDRD(dest
)
657 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
658 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
659 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MIN
660 | R500_ALU_RGBA_ADDRD(dest
);
663 emit_alu(fp
, counter
, fpi
);
664 emit_mov(fp
, counter
, fpi
->SrcReg
[0], dest
);
667 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
668 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
669 /* Variation on MAD: src0*src1+0 */
670 emit_alu(fp
, counter
, fpi
);
671 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
672 | R500_RGB_ADDR1(src
[1]);
673 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
674 | R500_ALPHA_ADDR1(src
[1]);
675 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
676 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
677 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
678 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
679 | R500_ALPHA_ADDRD(dest
)
680 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
681 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
682 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
683 | R500_ALU_RGBA_ADDRD(dest
)
684 // | R500_ALU_RGBA_SEL_C_SRC2
685 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
686 // | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
687 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
690 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
691 emit_alu(fp
, counter
, fpi
);
692 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
693 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
694 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
695 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
696 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_RCP
697 | R500_ALPHA_ADDRD(dest
)
698 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
699 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
700 | R500_ALU_RGBA_ADDRD(dest
);
703 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
704 emit_alu(fp
, counter
, fpi
);
705 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
706 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
707 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
708 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
709 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_RSQ
710 | R500_ALPHA_ADDRD(dest
)
711 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
712 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
713 | R500_ALU_RGBA_ADDRD(dest
);
716 /* TODO: Make this elegant! */
717 /* Do a cosine, then a sine, masking out the channels we want to protect. */
718 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
719 /* Cosine only goes in R (x) channel. */
720 fpi
->DstReg
.WriteMask
= 0x1;
721 emit_alu(fp
, counter
, fpi
);
722 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
723 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
724 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
725 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
726 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_COS
727 | R500_ALPHA_ADDRD(dest
)
728 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
729 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
730 | R500_ALU_RGBA_ADDRD(dest
);
732 /* Sine only goes in G (y) channel. */
733 fpi
->DstReg
.WriteMask
= 0x2;
734 emit_alu(fp
, counter
, fpi
);
735 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
736 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
737 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
738 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
739 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_SIN
740 | R500_ALPHA_ADDRD(dest
)
741 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
742 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
743 | R500_ALU_RGBA_ADDRD(dest
);
744 /* Put 0 into B,A (z,w) channels.
746 fpi->DstReg.WriteMask = 0xC;
747 emit_alu(fp, counter, fpi);
748 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
749 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
750 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
751 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO)
752 | R500_ALU_RGB_SEL_B_SRC0
753 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
754 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
755 | R500_ALPHA_ADDRD(dest)
756 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO)
757 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO);
758 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
759 | R500_ALU_RGBA_ADDRD(dest)
760 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
761 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); */
764 /* We use SRCP, so as a precaution we're
765 * going to set NOP in previous inst, if possible. */
766 /* This inst's selects need to be swapped as follows:
767 * 0 -> C ; 1 -> B ; 2 -> A */
768 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
769 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
770 emit_alu(fp
, counter
, fpi
);
771 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
772 | R500_RGB_ADDR1(src
[1])
773 | R500_RGB_SRCP_OP_RGB1_MINUS_RGB0
;
774 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
775 | R500_ALPHA_ADDR1(src
[1])
776 | R500_ALPHA_SRCP_OP_A1_MINUS_A0
;
777 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
778 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE
)
779 | R500_ALU_RGB_SEL_B_SRC1
780 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO
);
781 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_CMP
782 | R500_ALPHA_ADDRD(dest
)
783 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
784 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO
);
785 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
786 | R500_ALU_RGBA_ADDRD(dest
)
787 | R500_ALU_RGBA_SEL_C_SRCP
788 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[0]))
789 | R500_ALU_RGBA_ALPHA_SEL_C_SRCP
790 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[0]));
793 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
794 emit_alu(fp
, counter
, fpi
);
795 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
796 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
797 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
;
798 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_SIN
799 | R500_ALPHA_ADDRD(dest
)
800 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
801 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
802 | R500_ALU_RGBA_ADDRD(dest
);
805 /* We use SRCP, so as a precaution we're
806 * going to set NOP in previous inst, if possible. */
807 /* This inst's selects need to be swapped as follows:
808 * 0 -> C ; 1 -> B ; 2 -> A */
809 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
810 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
811 emit_alu(fp
, counter
, fpi
);
812 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
813 | R500_RGB_ADDR1(src
[1])
814 | R500_RGB_SRCP_OP_RGB1_MINUS_RGB0
;
815 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
816 | R500_ALPHA_ADDR1(src
[1])
817 | R500_ALPHA_SRCP_OP_A1_MINUS_A0
;
818 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
819 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO
)
820 | R500_ALU_RGB_SEL_B_SRC1
821 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE
);
822 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_CMP
823 | R500_ALPHA_ADDRD(dest
)
824 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO
)
825 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE
);
826 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
827 | R500_ALU_RGBA_ADDRD(dest
)
828 | R500_ALU_RGBA_SEL_C_SRCP
829 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[0]))
830 | R500_ALU_RGBA_ALPHA_SEL_C_SRCP
831 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[0]));
834 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
835 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
836 /* Variation on MAD: 1*src0-src1 */
837 emit_alu(fp
, counter
, fpi
);
838 fp
->inst
[counter
].inst1
= R500_RGB_ADDR1(src
[0])
839 | R500_RGB_ADDR2(src
[1]);
840 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR1(src
[0])
841 | R500_ALPHA_ADDR2(src
[1]);
842 fp
->inst
[counter
].inst3
= /* 1 */
843 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE
)
844 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[0]));
845 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
846 | R500_ALPHA_ADDRD(dest
)
847 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
848 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[0]));
849 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
850 | R500_ALU_RGBA_ADDRD(dest
)
851 | R500_ALU_RGBA_SEL_C_SRC2
852 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[1]))
853 | R500_ALU_RGBA_MOD_C_NEG
854 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
855 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[1]))
856 | R500_ALU_RGBA_ALPHA_MOD_C_NEG
;
859 /* TODO: Negation masks! */
860 emit_alu(fp
, counter
, fpi
);
861 emit_mov(fp
, counter
, fpi
->SrcReg
[0], dest
);
864 emit_tex(fp
, fpi
, OPCODE_TEX
, dest
, counter
);
865 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
)
869 emit_tex(fp
, fpi
, OPCODE_TXB
, dest
, counter
);
870 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
)
874 emit_tex(fp
, fpi
, OPCODE_TXP
, dest
, counter
);
875 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
)
879 ERROR("unknown fpi->Opcode %s\n", _mesa_opcode_string(fpi
->Opcode
));
883 /* Finishing touches */
884 if (fpi
->SaturateMode
== SATURATE_ZERO_ONE
) {
885 fp
->inst
[counter
].inst0
|= R500_INST_RGB_CLAMP
| R500_INST_ALPHA_CLAMP
;
895 /* Finish him! (If it's an ALU/OUT instruction...) */
896 if ((fp
->inst
[counter
-1].inst0
& 0x3) == 1) {
897 fp
->inst
[counter
-1].inst0
|= R500_INST_LAST
;
899 /* We still need to put an output inst, right? */
900 WARN_ONCE("Final FP instruction is not an OUT.\n");
906 fp
->cs
->nrslots
= counter
;
913 static void init_program(r300ContextPtr r300
, struct r500_fragment_program
*fp
)
915 struct r300_pfs_compile_state
*cs
= NULL
;
916 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
917 struct prog_instruction
*fpi
;
918 GLuint InputsRead
= mp
->Base
.InputsRead
;
919 GLuint temps_used
= 0;
922 /* New compile, reset tracking data */
924 driQueryOptioni(&r300
->radeon
.optionCache
, "fp_optimization");
925 fp
->translated
= GL_FALSE
;
926 fp
->error
= GL_FALSE
;
927 fp
->cs
= cs
= &(R300_CONTEXT(fp
->ctx
)->state
.pfs_compile
);
929 fp
->first_node_has_tex
= 0;
931 /* Size of pixel stack, plus 1. */
932 fp
->max_temp_idx
= 1;
933 /* Temp register offset. */
934 fp
->temp_reg_offset
= 0;
935 fp
->node
[0].alu_end
= -1;
936 fp
->node
[0].tex_end
= -1;
938 _mesa_memset(cs
, 0, sizeof(*fp
->cs
));
939 for (i
= 0; i
< PFS_MAX_ALU_INST
; i
++) {
940 for (j
= 0; j
< 3; j
++) {
941 cs
->slot
[i
].vsrc
[j
] = SRC_CONST
;
942 cs
->slot
[i
].ssrc
[j
] = SRC_CONST
;
946 /* Work out what temps the Mesa inputs correspond to, this must match
947 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
948 * configures itself based on the fragprog's InputsRead
950 * NOTE: this depends on get_hw_temp() allocating registers in order,
951 * starting from register 0, so we're just going to do that instead.
954 /* Texcoords come first */
955 for (i
= 0; i
< fp
->ctx
->Const
.MaxTextureUnits
; i
++) {
956 if (InputsRead
& (FRAG_BIT_TEX0
<< i
)) {
957 cs
->inputs
[FRAG_ATTRIB_TEX0
+ i
].refcount
= 0;
958 cs
->inputs
[FRAG_ATTRIB_TEX0
+ i
].reg
=
960 fp
->temp_reg_offset
++;
963 InputsRead
&= ~FRAG_BITS_TEX_ANY
;
965 /* fragment position treated as a texcoord */
966 if (InputsRead
& FRAG_BIT_WPOS
) {
967 cs
->inputs
[FRAG_ATTRIB_WPOS
].refcount
= 0;
968 cs
->inputs
[FRAG_ATTRIB_WPOS
].reg
=
970 fp
->temp_reg_offset
++;
972 InputsRead
&= ~FRAG_BIT_WPOS
;
974 /* Then primary colour */
975 if (InputsRead
& FRAG_BIT_COL0
) {
976 cs
->inputs
[FRAG_ATTRIB_COL0
].refcount
= 0;
977 cs
->inputs
[FRAG_ATTRIB_COL0
].reg
=
979 fp
->temp_reg_offset
++;
981 InputsRead
&= ~FRAG_BIT_COL0
;
983 /* Secondary color */
984 if (InputsRead
& FRAG_BIT_COL1
) {
985 cs
->inputs
[FRAG_ATTRIB_COL1
].refcount
= 0;
986 cs
->inputs
[FRAG_ATTRIB_COL1
].reg
=
988 fp
->temp_reg_offset
++;
990 InputsRead
&= ~FRAG_BIT_COL1
;
994 WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead
);
995 /* force read from hwreg 0 for now */
996 for (i
= 0; i
< 32; i
++)
997 if (InputsRead
& (1 << i
))
998 cs
->inputs
[i
].reg
= 0;
1001 if (!mp
->Base
.Instructions
) {
1002 ERROR("No instructions found in program, going to go die now.\n");
1006 for (fpi
= mp
->Base
.Instructions
; fpi
->Opcode
!= OPCODE_END
; fpi
++) {
1007 for (i
= 0; i
< 3; i
++) {
1008 if (fpi
->SrcReg
[i
].File
== PROGRAM_TEMPORARY
) {
1009 if (fpi
->SrcReg
[i
].Index
> temps_used
)
1010 temps_used
= fpi
->SrcReg
[i
].Index
;
1015 cs
->temp_in_use
= temps_used
;
1017 fp
->max_temp_idx
= fp
->temp_reg_offset
+ cs
->temp_in_use
+ 1;
1020 static void update_params(struct r500_fragment_program
*fp
)
1022 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
1024 /* Ask Mesa nicely to fill in ParameterValues for us */
1025 if (mp
->Base
.Parameters
)
1026 _mesa_load_state_parameters(fp
->ctx
, mp
->Base
.Parameters
);
1029 void r500TranslateFragmentShader(r300ContextPtr r300
,
1030 struct r500_fragment_program
*fp
)
1033 struct r300_pfs_compile_state
*cs
= NULL
;
1035 if (!fp
->translated
) {
1039 init_program(r300
, fp
);
1042 if (parse_program(fp
) == GL_FALSE
) {
1043 ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n");
1045 fp
->inst_offset
= 0;
1046 fp
->inst_end
= cs
->nrslots
- 1;
1049 fp
->inst_offset
= 0;
1050 fp
->inst_end
= cs
->nrslots
- 1;
1052 fp
->translated
= GL_TRUE
;
1053 if (RADEON_DEBUG
& DEBUG_PIXEL
) {
1055 fprintf(stderr
, "Mesa program:\n");
1056 fprintf(stderr
, "-------------\n");
1057 _mesa_print_program(&fp
->mesa_program
.Base
);
1062 r300UpdateStateParameters(fp
->ctx
, _NEW_PROGRAM
);
1069 static char *toswiz(int swiz_val
) {
1076 case 5: return "1/2";
1083 static char *toop(int op_val
)
1087 case 0: str
= "MAD"; break;
1088 case 1: str
= "DP3"; break;
1089 case 2: str
= "DP4"; break;
1090 case 3: str
= "D2A"; break;
1091 case 4: str
= "MIN"; break;
1092 case 5: str
= "MAX"; break;
1093 case 6: str
= "Reserved"; break;
1094 case 7: str
= "CND"; break;
1095 case 8: str
= "CMP"; break;
1096 case 9: str
= "FRC"; break;
1097 case 10: str
= "SOP"; break;
1098 case 11: str
= "MDH"; break;
1099 case 12: str
= "MDV"; break;
1104 static char *to_alpha_op(int op_val
)
1108 case 0: str
= "MAD"; break;
1109 case 1: str
= "DP"; break;
1110 case 2: str
= "MIN"; break;
1111 case 3: str
= "MAX"; break;
1112 case 4: str
= "Reserved"; break;
1113 case 5: str
= "CND"; break;
1114 case 6: str
= "CMP"; break;
1115 case 7: str
= "FRC"; break;
1116 case 8: str
= "EX2"; break;
1117 case 9: str
= "LN2"; break;
1118 case 10: str
= "RCP"; break;
1119 case 11: str
= "RSQ"; break;
1120 case 12: str
= "SIN"; break;
1121 case 13: str
= "COS"; break;
1122 case 14: str
= "MDH"; break;
1123 case 15: str
= "MDV"; break;
1128 static char *to_mask(int val
)
1132 case 0: str
= "NONE"; break;
1133 case 1: str
= "R"; break;
1134 case 2: str
= "G"; break;
1135 case 3: str
= "RG"; break;
1136 case 4: str
= "B"; break;
1137 case 5: str
= "RB"; break;
1138 case 6: str
= "GB"; break;
1139 case 7: str
= "RGB"; break;
1140 case 8: str
= "A"; break;
1141 case 9: str
= "AR"; break;
1142 case 10: str
= "AG"; break;
1143 case 11: str
= "ARG"; break;
1144 case 12: str
= "AB"; break;
1145 case 13: str
= "ARB"; break;
1146 case 14: str
= "AGB"; break;
1147 case 15: str
= "ARGB"; break;
1152 static char *to_texop(int val
)
1155 case 0: return "NOP";
1156 case 1: return "LD";
1157 case 2: return "TEXKILL";
1158 case 3: return "PROJ";
1159 case 4: return "LODBIAS";
1160 case 5: return "LOD";
1161 case 6: return "DXDY";
1166 static void dump_program(struct r500_fragment_program
*fp
)
1174 for (n
= 0; n
< fp
->inst_end
+1; n
++) {
1175 inst0
= inst
= fp
->inst
[n
].inst0
;
1176 fprintf(stderr
,"%d\t0:CMN_INST 0x%08x:", n
, inst
);
1177 switch(inst
& 0x3) {
1178 case R500_INST_TYPE_ALU
: str
= "ALU"; break;
1179 case R500_INST_TYPE_OUT
: str
= "OUT"; break;
1180 case R500_INST_TYPE_FC
: str
= "FC"; break;
1181 case R500_INST_TYPE_TEX
: str
= "TEX"; break;
1183 fprintf(stderr
,"%s %s %s %s %s ", str
,
1184 inst
& R500_INST_TEX_SEM_WAIT
? "TEX_WAIT" : "",
1185 inst
& R500_INST_LAST
? "LAST" : "",
1186 inst
& R500_INST_NOP
? "NOP" : "",
1187 inst
& R500_INST_ALU_WAIT
? "ALU WAIT" : "");
1188 fprintf(stderr
,"wmask: %s omask: %s\n", to_mask((inst
>> 11) & 0xf),
1189 to_mask((inst
>> 15) & 0xf));
1191 switch(inst0
& 0x3) {
1194 fprintf(stderr
,"\t1:RGB_ADDR 0x%08x:", fp
->inst
[n
].inst1
);
1195 inst
= fp
->inst
[n
].inst1
;
1197 fprintf(stderr
,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1198 inst
& 0xff, (inst
& (1<<8)) ? 'c' : 't',
1199 (inst
>> 10) & 0xff, (inst
& (1<<18)) ? 'c' : 't',
1200 (inst
>> 20) & 0xff, (inst
& (1<<28)) ? 'c' : 't',
1203 fprintf(stderr
,"\t2:ALPHA_ADDR 0x%08x:", fp
->inst
[n
].inst2
);
1204 inst
= fp
->inst
[n
].inst2
;
1205 fprintf(stderr
,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1206 inst
& 0xff, (inst
& (1<<8)) ? 'c' : 't',
1207 (inst
>> 10) & 0xff, (inst
& (1<<18)) ? 'c' : 't',
1208 (inst
>> 20) & 0xff, (inst
& (1<<28)) ? 'c' : 't',
1210 fprintf(stderr
,"\t3 RGB_INST: 0x%08x:", fp
->inst
[n
].inst3
);
1211 inst
= fp
->inst
[n
].inst3
;
1212 fprintf(stderr
,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n",
1213 (inst
) & 0x3, toswiz((inst
>> 2) & 0x7), toswiz((inst
>> 5) & 0x7), toswiz((inst
>> 8) & 0x7),
1215 (inst
>> 13) & 0x3, toswiz((inst
>> 15) & 0x7), toswiz((inst
>> 18) & 0x7), toswiz((inst
>> 21) & 0x7),
1216 (inst
>> 24) & 0x3);
1219 fprintf(stderr
,"\t4 ALPHA_INST:0x%08x:", fp
->inst
[n
].inst4
);
1220 inst
= fp
->inst
[n
].inst4
;
1221 fprintf(stderr
,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d\n", to_alpha_op(inst
& 0xf),
1222 (inst
>> 4) & 0x7f, inst
& (1<<11) ? "(rel)":"",
1223 (inst
>> 12) & 0x3, toswiz((inst
>> 14) & 0x7), (inst
>> 17) & 0x3,
1224 (inst
>> 19) & 0x3, toswiz((inst
>> 21) & 0x7), (inst
>> 24) & 0x3);
1226 fprintf(stderr
,"\t5 RGBA_INST: 0x%08x:", fp
->inst
[n
].inst5
);
1227 inst
= fp
->inst
[n
].inst5
;
1228 fprintf(stderr
,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst
& 0xf),
1229 (inst
>> 4) & 0x7f, inst
& (1<<11) ? "(rel)":"",
1230 (inst
>> 12) & 0x3, toswiz((inst
>> 14) & 0x7), toswiz((inst
>> 17) & 0x7), toswiz((inst
>> 20) & 0x7),
1232 (inst
>> 25) & 0x3, toswiz((inst
>> 27) & 0x7), (inst
>> 30) & 0x3);
1237 inst
= fp
->inst
[n
].inst1
;
1238 fprintf(stderr
,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst
, (inst
>> 16) & 0xf,
1239 to_texop((inst
>> 22) & 0x7), (inst
& (1<<25)) ? "ACQ" : "",
1240 (inst
& (1<<26)) ? "IGNUNC" : "", (inst
& (1<<27)) ? "UNSCALED" : "SCALED");
1241 inst
= fp
->inst
[n
].inst2
;
1242 fprintf(stderr
,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst
,
1243 inst
& 127, inst
& (1<<7) ? "(rel)" : "",
1244 toswiz((inst
>> 8) & 0x3), toswiz((inst
>> 10) & 0x3),
1245 toswiz((inst
>> 12) & 0x3), toswiz((inst
>> 14) & 0x3),
1246 (inst
>> 16) & 127, inst
& (1<<23) ? "(rel)" : "",
1247 toswiz((inst
>> 24) & 0x3), toswiz((inst
>> 26) & 0x3),
1248 toswiz((inst
>> 28) & 0x3), toswiz((inst
>> 30) & 0x3));
1250 fprintf(stderr
,"\t3:TEX_DXDY: 0x%08x\n", fp
->inst
[n
].inst3
);
1253 fprintf(stderr
,"\n");