2 * Copyright (C) 2005 Ben Skeggs.
4 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
5 * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
9 * Permission is hereby granted, free of charge, to any person obtaining
10 * a copy of this software and associated documentation files (the
11 * "Software"), to deal in the Software without restriction, including
12 * without limitation the rights to use, copy, modify, merge, publish,
13 * distribute, sublicense, and/or sell copies of the Software, and to
14 * permit persons to whom the Software is furnished to do so, subject to
15 * the following conditions:
17 * The above copyright notice and this permission notice (including the
18 * next paragraph) shall be included in all copies or substantial
19 * portions of the Software.
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
34 * \author Ben Skeggs <darktama@iinet.net.au>
36 * \author Jerome Glisse <j.glisse@gmail.com>
38 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
40 * \todo Depth write, WPOS/FOGC inputs
49 #include "shader/prog_instruction.h"
50 #include "shader/prog_parameter.h"
51 #include "shader/prog_print.h"
53 #include "r300_context.h"
54 #include "r500_fragprog.h"
56 #include "r300_state.h"
58 /* Mapping Mesa registers to R500 temporaries */
60 int reg
; /* Assigned hw temp */
61 unsigned int refcount
; /* Number of uses by mesa program */
65 * Describe the current lifetime information for an R300 temporary
68 /* Index of the first slot where this register is free in the sense
69 that it can be used as a new destination register.
70 This is -1 if the register has been assigned to a Mesa register
71 and the last access to the register has not yet been emitted */
74 /* Index of the first slot where this register is currently reserved.
75 This is used to stop e.g. a scalar operation from being moved
76 before the allocation time of a register that was first allocated
77 for a vector operation. */
80 /* Index of the first slot in which the register can be used as a
81 source without losing the value that is written by the last
82 emitted instruction that writes to the register */
86 /* Index to the slot where the register was last read.
87 This is also the first slot in which the register may be written again */
93 * Store usage information about an ALU instruction slot during the
94 * compilation of a fragment program.
96 #define SLOT_SRC_VECTOR (1<<0)
97 #define SLOT_SRC_SCALAR (1<<3)
98 #define SLOT_SRC_BOTH (SLOT_SRC_VECTOR | SLOT_SRC_SCALAR)
99 #define SLOT_OP_VECTOR (1<<16)
100 #define SLOT_OP_SCALAR (1<<17)
101 #define SLOT_OP_BOTH (SLOT_OP_VECTOR | SLOT_OP_SCALAR)
103 struct r500_pfs_compile_slot
{
104 /* Bitmask indicating which parts of the slot are used, using SLOT_ constants
108 /* Selected sources */
114 * Store information during compilation of fragment programs.
116 struct r500_pfs_compile_state
{
117 struct r500_fragment_program_compiler
*compiler
;
119 /* number of ALU slots used so far */
122 /* Track which (parts of) slots are already filled with instructions */
123 struct r500_pfs_compile_slot slot
[PFS_MAX_ALU_INST
];
125 /* Track the validity of R300 temporaries */
126 struct reg_lifetime hwtemps
[PFS_NUM_TEMP_REGS
];
128 /* Used to map Mesa's inputs/temps onto hardware temps */
130 struct reg_acc temps
[PFS_NUM_TEMP_REGS
];
131 struct reg_acc inputs
[32]; /* don't actually need 32... */
133 /* Track usage of hardware temps, for register allocation,
134 * indirection detection, etc. */
140 * Useful macros and values
142 #define ERROR(fmt, args...) do { \
143 fprintf(stderr, "%s::%s(): " fmt "\n", \
144 __FILE__, __FUNCTION__, ##args); \
145 cs->compiler->fp->error = GL_TRUE; \
148 #define PROG_CODE struct r500_fragment_program_code *code = cs->compiler->code
150 #define R500_US_NUM_TEMP_REGS 128
151 #define R500_US_NUM_CONST_REGS 256
153 /* "Register" flags */
154 #define REG_CONSTANT (1 << 8)
155 #define REG_SRC_REL (1 << 9)
156 #define REG_DEST_REL (1 << 7)
159 #define R500_SWIZZLE_ZERO 4
160 #define R500_SWIZZLE_HALF 5
161 #define R500_SWIZZLE_ONE 6
162 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
163 #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
164 #define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6))
165 #define R500_SWIZ_MOD_NEG 1
166 #define R500_SWIZ_MOD_ABS 2
167 #define R500_SWIZ_MOD_NEG_ABS 3
168 /* Swizzles for inst2 */
169 #define MAKE_SWIZ_TEX_STRQ(x) (x << 8)
170 #define MAKE_SWIZ_TEX_RGBA(x) (x << 24)
171 /* Swizzles for inst3 */
172 #define MAKE_SWIZ_RGB_A(x) (x << 2)
173 #define MAKE_SWIZ_RGB_B(x) (x << 15)
174 /* Swizzles for inst4 */
175 #define MAKE_SWIZ_ALPHA_A(x) (x << 14)
176 #define MAKE_SWIZ_ALPHA_B(x) (x << 21)
177 /* Swizzle for inst5 */
178 #define MAKE_SWIZ_RGBA_C(x) (x << 14)
179 #define MAKE_SWIZ_ALPHA_C(x) (x << 27)
182 #define R500_WRITEMASK_G 0x2
183 #define R500_WRITEMASK_B 0x4
184 #define R500_WRITEMASK_RGB 0x7
185 #define R500_WRITEMASK_A 0x8
186 #define R500_WRITEMASK_AR 0x9
187 #define R500_WRITEMASK_AG 0xA
188 #define R500_WRITEMASK_ARG 0xB
189 #define R500_WRITEMASK_AB 0xC
190 #define R500_WRITEMASK_ARGB 0xF
192 /* 1/(2pi), needed for quick modulus in trig insts
193 * Thanks to glisse for pointing out how to do it! */
194 static const GLfloat RCP_2PI
[] = {0.15915494309189535,
197 0.15915494309189535};
199 static const GLfloat LIT
[] = {127.999999,
204 static INLINE GLuint
make_rgb_swizzle(struct prog_src_register src
) {
207 /* This could be optimized, but it should be plenty fast already. */
209 for (i
= 0; i
< 3; i
++) {
210 temp
= GET_SWZ(src
.Swizzle
, i
);
211 /* Fix SWIZZLE_ONE */
212 if (temp
== 5) temp
++;
216 swiz
|= (R500_SWIZ_MOD_NEG
<< 9);
220 static INLINE GLuint
make_rgba_swizzle(GLuint src
) {
224 for (i
= 0; i
< 4; i
++) {
225 temp
= GET_SWZ(src
, i
);
226 /* Fix SWIZZLE_ONE */
227 if (temp
== 5) temp
++;
233 static INLINE GLuint
make_alpha_swizzle(struct prog_src_register src
) {
234 GLuint swiz
= GET_SWZ(src
.Swizzle
, 3);
236 if (swiz
== 5) swiz
++;
239 swiz
|= (R500_SWIZ_MOD_NEG
<< 3);
244 static INLINE GLuint
make_sop_swizzle(struct prog_src_register src
) {
245 GLuint swiz
= GET_SWZ(src
.Swizzle
, 0);
247 if (swiz
== 5) swiz
++;
251 static INLINE GLuint
make_strq_swizzle(struct prog_src_register src
) {
252 GLuint swiz
= 0x0, temp
= 0x0;
254 for (i
= 0; i
< 4; i
++) {
255 temp
= GET_SWZ(src
.Swizzle
, i
) & 0x3;
261 static int get_temp(struct r500_pfs_compile_state
*cs
, int slot
) {
265 int r
= code
->temp_reg_offset
+ cs
->temp_in_use
+ slot
;
267 if (r
> R500_US_NUM_TEMP_REGS
) {
268 ERROR("Too many temporary registers requested, can't compile!\n");
274 /* Borrowed verbatim from r300_fragprog since it hasn't changed. */
275 static GLuint
emit_const4fv(struct r500_pfs_compile_state
*cs
,
283 for (index
= 0; index
< code
->const_nr
; ++index
) {
284 if (code
->constant
[index
] == cp
)
288 if (index
>= code
->const_nr
) {
289 if (index
>= R500_US_NUM_CONST_REGS
) {
290 ERROR("Out of hw constants!\n");
295 code
->constant
[index
] = cp
;
298 reg
= index
| REG_CONSTANT
;
302 static GLuint
make_src(struct r500_pfs_compile_state
*cs
, struct prog_src_register src
) {
306 case PROGRAM_TEMPORARY
:
307 reg
= src
.Index
+ code
->temp_reg_offset
;
310 reg
= cs
->inputs
[src
.Index
].reg
;
312 case PROGRAM_LOCAL_PARAM
:
313 reg
= emit_const4fv(cs
,
314 cs
->compiler
->fp
->mesa_program
.Base
.LocalParams
[src
.Index
]);
316 case PROGRAM_ENV_PARAM
:
317 reg
= emit_const4fv(cs
,
318 cs
->compiler
->compiler
.Ctx
->FragmentProgram
.Parameters
[src
.Index
]);
320 case PROGRAM_STATE_VAR
:
321 case PROGRAM_NAMED_PARAM
:
322 case PROGRAM_CONSTANT
:
323 reg
= emit_const4fv(cs
,
324 cs
->compiler
->fp
->mesa_program
.Base
.Parameters
->ParameterValues
[src
.Index
]);
326 case PROGRAM_BUILTIN
:
330 ERROR("Can't handle src.File %x\n", src
.File
);
337 static GLuint
make_dest(struct r500_pfs_compile_state
*cs
, struct prog_dst_register dest
) {
341 case PROGRAM_TEMPORARY
:
342 reg
= dest
.Index
+ code
->temp_reg_offset
;
345 /* Eventually we may need to handle multiple
346 * rendering targets... */
349 case PROGRAM_BUILTIN
:
353 ERROR("Can't handle dest.File %x\n", dest
.File
);
360 static void emit_tex(struct r500_pfs_compile_state
*cs
,
361 struct prog_instruction
*fpi
, int dest
, int counter
)
367 mask
= fpi
->DstReg
.WriteMask
<< 11;
368 hwsrc
= make_src(cs
, fpi
->SrcReg
[0]);
370 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
) {
371 hwdest
= get_temp(cs
, 0);
376 code
->inst
[counter
].inst0
= R500_INST_TYPE_TEX
| mask
377 | R500_INST_TEX_SEM_WAIT
;
379 code
->inst
[counter
].inst1
= R500_TEX_ID(fpi
->TexSrcUnit
)
380 | R500_TEX_SEM_ACQUIRE
| R500_TEX_IGNORE_UNCOVERED
;
382 if (fpi
->TexSrcTarget
== TEXTURE_RECT_INDEX
)
383 code
->inst
[counter
].inst1
|= R500_TEX_UNSCALED
;
385 switch (fpi
->Opcode
) {
387 code
->inst
[counter
].inst1
|= R500_TEX_INST_TEXKILL
;
390 code
->inst
[counter
].inst1
|= R500_TEX_INST_LD
;
393 code
->inst
[counter
].inst1
|= R500_TEX_INST_LODBIAS
;
396 code
->inst
[counter
].inst1
|= R500_TEX_INST_PROJ
;
399 ERROR("emit_tex can't handle opcode %x\n", fpi
->Opcode
);
402 code
->inst
[counter
].inst2
= R500_TEX_SRC_ADDR(hwsrc
)
403 | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi
->SrcReg
[0]))
404 /* | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G
405 | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A */
406 | R500_TEX_DST_ADDR(hwdest
)
407 | R500_TEX_DST_R_SWIZ_R
| R500_TEX_DST_G_SWIZ_G
408 | R500_TEX_DST_B_SWIZ_B
| R500_TEX_DST_A_SWIZ_A
;
410 code
->inst
[counter
].inst3
= 0x0;
411 code
->inst
[counter
].inst4
= 0x0;
412 code
->inst
[counter
].inst5
= 0x0;
414 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
) {
416 code
->inst
[counter
].inst0
= R500_INST_TYPE_OUT
417 | R500_INST_TEX_SEM_WAIT
| (mask
<< 4);
418 code
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(cs
, 0));
419 code
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(cs
, 0));
420 code
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
421 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
)
422 | R500_ALU_RGB_SEL_B_SRC0
423 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB
)
424 | R500_ALU_RGB_OMOD_DISABLE
;
425 code
->inst
[counter
].inst4
= R500_ALPHA_OP_CMP
426 | R500_ALPHA_ADDRD(dest
)
427 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_ALPHA_SWIZ_A_A
)
428 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(R500_ALPHA_SWIZ_A_A
)
429 | R500_ALPHA_OMOD_DISABLE
;
430 code
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
431 | R500_ALU_RGBA_ADDRD(dest
)
432 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
433 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
437 static void emit_alu(struct r500_pfs_compile_state
*cs
, int counter
, struct prog_instruction
*fpi
) {
439 /* Ideally, we shouldn't have to explicitly clear memory here! */
440 code
->inst
[counter
].inst0
= 0x0;
441 code
->inst
[counter
].inst1
= 0x0;
442 code
->inst
[counter
].inst2
= 0x0;
443 code
->inst
[counter
].inst3
= 0x0;
444 code
->inst
[counter
].inst4
= 0x0;
445 code
->inst
[counter
].inst5
= 0x0;
447 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
) {
448 code
->inst
[counter
].inst0
= R500_INST_TYPE_OUT
;
450 if (fpi
->DstReg
.Index
== FRAG_RESULT_COLR
)
451 code
->inst
[counter
].inst0
|= (fpi
->DstReg
.WriteMask
<< 15);
453 if (fpi
->DstReg
.Index
== FRAG_RESULT_DEPR
) {
454 code
->inst
[counter
].inst4
|= R500_ALPHA_W_OMASK
;
455 /* Notify the state emission! */
456 cs
->compiler
->fp
->writes_depth
= GL_TRUE
;
459 code
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
461 | (fpi
->DstReg
.WriteMask
<< 11);
464 code
->inst
[counter
].inst0
|= R500_INST_TEX_SEM_WAIT
;
467 static void emit_mov(struct r500_pfs_compile_state
*cs
, int counter
, struct prog_instruction
*fpi
, GLuint src_reg
, GLuint swizzle
, GLuint dest
) {
469 /* The r3xx shader uses MAD to implement MOV. We are using CMP, since
470 * it is technically more accurate and recommended by ATI/AMD. */
471 emit_alu(cs
, counter
, fpi
);
472 code
->inst
[counter
].inst1
= R500_RGB_ADDR0(src_reg
);
473 code
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src_reg
);
474 /* (De)mangle the swizzle from Mesa to R500. */
475 swizzle
= make_rgba_swizzle(swizzle
);
476 /* 0x1FF is 9 bits, size of an RGB swizzle. */
477 code
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
478 | MAKE_SWIZ_RGB_A((swizzle
& 0x1ff))
479 | R500_ALU_RGB_SEL_B_SRC0
480 | MAKE_SWIZ_RGB_B((swizzle
& 0x1ff))
481 | R500_ALU_RGB_OMOD_DISABLE
;
482 code
->inst
[counter
].inst4
|= R500_ALPHA_OP_CMP
483 | R500_ALPHA_ADDRD(dest
)
484 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(GET_SWZ(swizzle
, 3))
485 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(GET_SWZ(swizzle
, 3))
486 | R500_ALPHA_OMOD_DISABLE
;
487 code
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
488 | R500_ALU_RGBA_ADDRD(dest
)
489 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
490 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
493 static void emit_mad(struct r500_pfs_compile_state
*cs
, int counter
, struct prog_instruction
*fpi
, int one
, int two
, int three
) {
495 /* Note: This code was all Corbin's. Corbin is a rather hackish coder.
496 * If you can make it pretty or fast, please do so! */
497 emit_alu(cs
, counter
, fpi
);
498 /* Common MAD stuff */
499 code
->inst
[counter
].inst4
|= R500_ALPHA_OP_MAD
500 | R500_ALPHA_ADDRD(make_dest(cs
, fpi
->DstReg
));
501 code
->inst
[counter
].inst5
|= R500_ALU_RGBA_OP_MAD
502 | R500_ALU_RGBA_ADDRD(make_dest(cs
, fpi
->DstReg
));
507 code
->inst
[counter
].inst1
|= R500_RGB_ADDR0(make_src(cs
, fpi
->SrcReg
[one
]));
508 code
->inst
[counter
].inst2
|= R500_ALPHA_ADDR0(make_src(cs
, fpi
->SrcReg
[one
]));
509 code
->inst
[counter
].inst3
|= R500_ALU_RGB_SEL_A_SRC0
510 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[one
]));
511 code
->inst
[counter
].inst4
|= R500_ALPHA_SEL_A_SRC0
512 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[one
]));
514 case R500_SWIZZLE_ZERO
:
515 code
->inst
[counter
].inst3
|= MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO
);
516 code
->inst
[counter
].inst4
|= MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO
);
518 case R500_SWIZZLE_ONE
:
519 code
->inst
[counter
].inst3
|= MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE
);
520 code
->inst
[counter
].inst4
|= MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
);
523 ERROR("Bad src index in emit_mad: %d\n", one
);
530 code
->inst
[counter
].inst1
|= R500_RGB_ADDR1(make_src(cs
, fpi
->SrcReg
[two
]));
531 code
->inst
[counter
].inst2
|= R500_ALPHA_ADDR1(make_src(cs
, fpi
->SrcReg
[two
]));
532 code
->inst
[counter
].inst3
|= R500_ALU_RGB_SEL_B_SRC1
533 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[two
]));
534 code
->inst
[counter
].inst4
|= R500_ALPHA_SEL_B_SRC1
535 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[two
]));
537 case R500_SWIZZLE_ZERO
:
538 code
->inst
[counter
].inst3
|= MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO
);
539 code
->inst
[counter
].inst4
|= MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO
);
541 case R500_SWIZZLE_ONE
:
542 code
->inst
[counter
].inst3
|= MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE
);
543 code
->inst
[counter
].inst4
|= MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE
);
546 ERROR("Bad src index in emit_mad: %d\n", two
);
553 code
->inst
[counter
].inst1
|= R500_RGB_ADDR2(make_src(cs
, fpi
->SrcReg
[three
]));
554 code
->inst
[counter
].inst2
|= R500_ALPHA_ADDR2(make_src(cs
, fpi
->SrcReg
[three
]));
555 code
->inst
[counter
].inst5
|= R500_ALU_RGBA_SEL_C_SRC2
556 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[three
]))
557 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
558 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[three
]));
560 case R500_SWIZZLE_ZERO
:
561 code
->inst
[counter
].inst5
|= MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
562 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
564 case R500_SWIZZLE_ONE
:
565 code
->inst
[counter
].inst5
|= MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ONE
)
566 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ONE
);
569 ERROR("Bad src index in emit_mad: %d\n", three
);
574 static void emit_sop(struct r500_pfs_compile_state
*cs
, int counter
, struct prog_instruction
*fpi
, int opcode
, GLuint src
, GLuint swiz
, GLuint dest
) {
576 emit_alu(cs
, counter
, fpi
);
577 code
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
);
578 code
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
);
579 code
->inst
[counter
].inst4
|= R500_ALPHA_ADDRD(dest
)
580 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(swiz
);
581 code
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
582 | R500_ALU_RGBA_ADDRD(dest
);
585 code
->inst
[counter
].inst4
|= R500_ALPHA_OP_COS
;
588 code
->inst
[counter
].inst4
|= R500_ALPHA_OP_EX2
;
591 code
->inst
[counter
].inst4
|= R500_ALPHA_OP_LN2
;
594 code
->inst
[counter
].inst4
|= R500_ALPHA_OP_RCP
;
597 code
->inst
[counter
].inst4
|= R500_ALPHA_OP_RSQ
;
600 code
->inst
[counter
].inst4
|= R500_ALPHA_OP_SIN
;
603 ERROR("Bad opcode in emit_sop: %d\n", opcode
);
608 static int do_inst(struct r500_pfs_compile_state
*cs
, struct prog_instruction
*fpi
, int counter
) {
610 GLuint src
[3], dest
= 0;
613 if (fpi
->Opcode
!= OPCODE_KIL
) {
614 dest
= make_dest(cs
, fpi
->DstReg
);
617 switch (fpi
->Opcode
) {
619 emit_mov(cs
, counter
, fpi
, make_src(cs
, fpi
->SrcReg
[0]), fpi
->SrcReg
[0].Swizzle
, dest
);
620 code
->inst
[counter
].inst3
|= R500_ALU_RGB_MOD_A_ABS
621 | R500_ALU_RGB_MOD_B_ABS
;
622 code
->inst
[counter
].inst4
|= R500_ALPHA_MOD_A_ABS
623 | R500_ALPHA_MOD_B_ABS
;
626 /* Variation on MAD: 1*src0+src1 */
627 emit_mad(cs
, counter
, fpi
, R500_SWIZZLE_ONE
, 0, 1);
630 /* This inst's selects need to be swapped as follows:
631 * 0 -> C ; 1 -> B ; 2 -> A */
632 src
[0] = make_src(cs
, fpi
->SrcReg
[0]);
633 src
[1] = make_src(cs
, fpi
->SrcReg
[1]);
634 src
[2] = make_src(cs
, fpi
->SrcReg
[2]);
635 emit_alu(cs
, counter
, fpi
);
636 code
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[2])
637 | R500_RGB_ADDR1(src
[1]) | R500_RGB_ADDR2(src
[0]);
638 code
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[2])
639 | R500_ALPHA_ADDR1(src
[1]) | R500_ALPHA_ADDR2(src
[0]);
640 code
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
641 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[2]))
642 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
643 code
->inst
[counter
].inst4
|= R500_ALPHA_OP_CMP
644 | R500_ALPHA_ADDRD(dest
)
645 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[2]))
646 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
647 code
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
648 | R500_ALU_RGBA_ADDRD(dest
)
649 | R500_ALU_RGBA_SEL_C_SRC2
650 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[0]))
651 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
652 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[0]));
655 src
[0] = make_src(cs
, fpi
->SrcReg
[0]);
656 src
[1] = emit_const4fv(cs
, RCP_2PI
);
657 code
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| R500_INST_TEX_SEM_WAIT
658 | (R500_WRITEMASK_ARGB
<< 11);
659 code
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
660 | R500_RGB_ADDR1(src
[1]);
661 code
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
662 | R500_ALPHA_ADDR1(src
[1]);
663 code
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
664 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
)
665 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB
);
666 code
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
667 | R500_ALPHA_ADDRD(get_temp(cs
, 0))
668 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SWIZ_A_A
669 | R500_ALPHA_SEL_B_SRC1
| R500_ALPHA_SWIZ_B_A
;
670 code
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
671 | R500_ALU_RGBA_ADDRD(get_temp(cs
, 0))
672 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
673 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
675 code
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| (R500_WRITEMASK_ARGB
<< 11);
676 code
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(cs
, 0));
677 code
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(cs
, 0));
678 code
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
679 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
);
680 code
->inst
[counter
].inst4
= R500_ALPHA_OP_FRC
681 | R500_ALPHA_ADDRD(get_temp(cs
, 1))
682 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SWIZ_A_A
;
683 code
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_FRC
684 | R500_ALU_RGBA_ADDRD(get_temp(cs
, 1));
686 emit_sop(cs
, counter
, fpi
, OPCODE_COS
, get_temp(cs
, 1), make_sop_swizzle(fpi
->SrcReg
[0]), dest
);
689 src
[0] = make_src(cs
, fpi
->SrcReg
[0]);
690 src
[1] = make_src(cs
, fpi
->SrcReg
[1]);
691 emit_alu(cs
, counter
, fpi
);
692 code
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
693 | R500_RGB_ADDR1(src
[1]);
694 code
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
695 | R500_ALPHA_ADDR1(src
[1]);
696 code
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
697 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
698 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
699 code
->inst
[counter
].inst4
|= R500_ALPHA_OP_DP
700 | R500_ALPHA_ADDRD(dest
)
701 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
702 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
703 code
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_DP3
704 | R500_ALU_RGBA_ADDRD(dest
);
707 src
[0] = make_src(cs
, fpi
->SrcReg
[0]);
708 src
[1] = make_src(cs
, fpi
->SrcReg
[1]);
710 emit_alu(cs
, counter
, fpi
);
711 code
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
712 | R500_RGB_ADDR1(src
[1]);
713 code
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
714 | R500_ALPHA_ADDR1(src
[1]);
715 code
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
716 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
717 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
718 code
->inst
[counter
].inst4
|= R500_ALPHA_OP_DP
719 | R500_ALPHA_ADDRD(dest
)
720 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
721 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
722 code
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_DP4
723 | R500_ALU_RGBA_ADDRD(dest
);
726 src
[0] = make_src(cs
, fpi
->SrcReg
[0]);
727 src
[1] = make_src(cs
, fpi
->SrcReg
[1]);
729 emit_alu(cs
, counter
, fpi
);
730 code
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
731 | R500_RGB_ADDR1(src
[1]);
732 code
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
733 | R500_ALPHA_ADDR1(src
[1]);
734 code
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
735 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
736 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
737 code
->inst
[counter
].inst4
|= R500_ALPHA_OP_DP
738 | R500_ALPHA_ADDRD(dest
)
739 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
740 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
741 code
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_DP4
742 | R500_ALU_RGBA_ADDRD(dest
);
745 src
[0] = make_src(cs
, fpi
->SrcReg
[0]);
746 src
[1] = make_src(cs
, fpi
->SrcReg
[1]);
747 /* [1, src0.y*src1.y, src0.z, src1.w]
748 * So basically MUL with lotsa swizzling. */
749 emit_alu(cs
, counter
, fpi
);
750 code
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
751 | R500_RGB_ADDR1(src
[1]);
752 code
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
753 | R500_ALPHA_ADDR1(src
[1]);
754 code
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
755 | R500_ALU_RGB_SEL_B_SRC1
;
756 /* Select [1, y, z, 1] */
757 temp_swiz
= (make_rgb_swizzle(fpi
->SrcReg
[0]) & ~0x7) | R500_SWIZZLE_ONE
;
758 code
->inst
[counter
].inst3
|= MAKE_SWIZ_RGB_A(temp_swiz
);
759 /* Select [1, y, 1, w] */
760 temp_swiz
= (make_rgb_swizzle(fpi
->SrcReg
[0]) & ~0x1c7) | R500_SWIZZLE_ONE
| (R500_SWIZZLE_ONE
<< 6);
761 code
->inst
[counter
].inst3
|= MAKE_SWIZ_RGB_B(temp_swiz
);
762 code
->inst
[counter
].inst4
|= R500_ALPHA_OP_MAD
763 | R500_ALPHA_ADDRD(dest
)
764 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
765 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
766 code
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
767 | R500_ALU_RGBA_ADDRD(dest
)
768 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
769 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
772 src
[0] = make_src(cs
, fpi
->SrcReg
[0]);
773 emit_sop(cs
, counter
, fpi
, OPCODE_EX2
, src
[0], make_sop_swizzle(fpi
->SrcReg
[0]), dest
);
776 src
[0] = make_src(cs
, fpi
->SrcReg
[0]);
777 code
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| (R500_WRITEMASK_ARGB
<< 11);
778 code
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
779 code
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
780 code
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
781 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
782 code
->inst
[counter
].inst4
|= R500_ALPHA_OP_FRC
783 | R500_ALPHA_ADDRD(get_temp(cs
, 0))
784 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
785 code
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_FRC
786 | R500_ALU_RGBA_ADDRD(get_temp(cs
, 0));
788 emit_alu(cs
, counter
, fpi
);
789 code
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
790 | R500_RGB_ADDR1(get_temp(cs
, 0));
791 code
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
792 | R500_ALPHA_ADDR1(get_temp(cs
, 0));
793 code
->inst
[counter
].inst3
= MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE
)
794 | R500_ALU_RGB_SEL_B_SRC0
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[0]));
795 code
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
796 | R500_ALPHA_ADDRD(dest
)
797 | R500_ALPHA_SWIZ_A_A
798 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[0]));
799 code
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
800 | R500_ALU_RGBA_ADDRD(dest
)
801 | R500_ALU_RGBA_SEL_C_SRC1
802 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[0]))
803 | R500_ALU_RGBA_ALPHA_SEL_C_SRC1
804 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[0]))
805 | R500_ALU_RGBA_MOD_C_NEG
;
808 src
[0] = make_src(cs
, fpi
->SrcReg
[0]);
809 emit_alu(cs
, counter
, fpi
);
810 code
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
811 code
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
812 code
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
813 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
814 code
->inst
[counter
].inst4
|= R500_ALPHA_OP_FRC
815 | R500_ALPHA_ADDRD(dest
)
816 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
817 code
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_FRC
818 | R500_ALU_RGBA_ADDRD(dest
);
821 src
[0] = make_src(cs
, fpi
->SrcReg
[0]);
822 emit_sop(cs
, counter
, fpi
, OPCODE_LG2
, src
[0], make_sop_swizzle(fpi
->SrcReg
[0]), dest
);
825 src
[0] = make_src(cs
, fpi
->SrcReg
[0]);
826 src
[1] = emit_const4fv(cs
, LIT
);
827 /* First inst: MAX temp, input, [0, 0, 0, -128]
829 code
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| R500_INST_TEX_SEM_WAIT
830 | (R500_WRITEMASK_ARG
<< 11);
831 code
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]) | R500_RGB_ADDR1(src
[1]);
832 code
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]) | R500_ALPHA_ADDR1(src
[1]);
833 code
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
834 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
835 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO
);
836 code
->inst
[counter
].inst4
= R500_ALPHA_OP_MAX
837 | R500_ALPHA_ADDRD(get_temp(cs
, 0))
838 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
839 | R500_ALPHA_SEL_B_SRC1
| R500_ALPHA_SWIZ_B_A
;
840 code
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAX
841 | R500_ALU_RGBA_ADDRD(get_temp(cs
, 0));
843 /* Second inst: MIN temp, temp, [x, x, x, 128]
845 code
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| (R500_WRITEMASK_A
<< 11);
846 code
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(cs
, 0)) | R500_RGB_ADDR1(src
[1]);
847 code
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(cs
, 0)) | R500_ALPHA_ADDR1(src
[1]);
848 /* code->inst[counter].inst3; */
849 code
->inst
[counter
].inst4
= R500_ALPHA_OP_MAX
850 | R500_ALPHA_ADDRD(dest
)
851 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SWIZ_A_A
852 | R500_ALPHA_SEL_B_SRC1
| R500_ALPHA_SWIZ_B_A
;
853 code
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAX
854 | R500_ALU_RGBA_ADDRD(dest
);
856 /* Third-fifth insts: POW temp, temp.y, temp.w
858 emit_sop(cs
, counter
, fpi
, OPCODE_LG2
, get_temp(cs
, 0), SWIZZLE_Y
, get_temp(cs
, 1));
859 code
->inst
[counter
].inst0
|= (R500_WRITEMASK_ARGB
<< 11);
861 code
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| (R500_WRITEMASK_ARGB
<< 11);
862 code
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(cs
, 1))
863 | R500_RGB_ADDR1(get_temp(cs
, 0));
864 code
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(cs
, 1))
865 | R500_ALPHA_ADDR1(get_temp(cs
, 0));
866 code
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
867 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
)
868 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB
);
869 code
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
870 | R500_ALPHA_ADDRD(get_temp(cs
, 1))
871 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SWIZ_A_A
872 | R500_ALPHA_SEL_B_SRC1
| R500_ALPHA_SWIZ_B_A
;
873 code
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
874 | R500_ALU_RGBA_ADDRD(get_temp(cs
, 1))
875 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
876 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
878 emit_sop(cs
, counter
, fpi
, OPCODE_EX2
, get_temp(cs
, 1), SWIZZLE_W
, get_temp(cs
, 0));
879 code
->inst
[counter
].inst0
|= (R500_WRITEMASK_B
<< 11);
881 /* Sixth inst: CMP dest, temp.xxxx, temp.[1, x, z, 1], temp.[1, x, 0, 1];
883 * This inst's selects need to be swapped as follows:
884 * 0 -> C ; 1 -> B ; 2 -> A */
885 emit_alu(cs
, counter
, fpi
);
886 code
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(cs
, 0));
887 code
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(cs
, 0));
888 code
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
889 | R500_ALU_RGB_R_SWIZ_A_1
890 | R500_ALU_RGB_G_SWIZ_A_R
891 | R500_ALU_RGB_B_SWIZ_A_B
892 | R500_ALU_RGB_SEL_B_SRC0
893 | R500_ALU_RGB_R_SWIZ_B_1
894 | R500_ALU_RGB_G_SWIZ_B_R
895 | R500_ALU_RGB_B_SWIZ_B_0
;
896 code
->inst
[counter
].inst4
|= R500_ALPHA_OP_CMP
897 | R500_ALPHA_ADDRD(dest
)
898 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SWIZ_A_1
899 | R500_ALPHA_SEL_B_SRC0
| R500_ALPHA_SWIZ_B_1
;
900 code
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
901 | R500_ALU_RGBA_ADDRD(dest
)
902 | R500_ALU_RGBA_SEL_C_SRC0
903 | R500_ALU_RGBA_ALPHA_SEL_C_SRC0
904 | R500_ALU_RGBA_R_SWIZ_R
905 | R500_ALU_RGBA_G_SWIZ_R
906 | R500_ALU_RGBA_B_SWIZ_R
907 | R500_ALU_RGBA_A_SWIZ_R
;
910 /* src0 * src1 + INV(src0) * src2
911 * 1) MUL src0, src1, temp
912 * 2) PRE 1-src0; MAD srcp, src2, temp */
913 src
[0] = make_src(cs
, fpi
->SrcReg
[0]);
914 src
[1] = make_src(cs
, fpi
->SrcReg
[1]);
915 src
[2] = make_src(cs
, fpi
->SrcReg
[2]);
916 code
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| R500_INST_TEX_SEM_WAIT
917 | R500_INST_NOP
| (R500_WRITEMASK_ARGB
<< 11);
918 code
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
919 | R500_RGB_ADDR1(src
[1]);
920 code
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
921 | R500_ALPHA_ADDR1(src
[1]);
922 code
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
923 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
924 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
925 code
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
926 | R500_ALPHA_ADDRD(get_temp(cs
, 0))
927 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
928 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
929 code
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
930 | R500_ALU_RGBA_ADDRD(get_temp(cs
, 0))
931 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
932 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
934 emit_alu(cs
, counter
, fpi
);
935 code
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
936 | R500_RGB_ADDR1(src
[2])
937 | R500_RGB_ADDR2(get_temp(cs
, 0))
938 | R500_RGB_SRCP_OP_1_MINUS_RGB0
;
939 code
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
940 | R500_ALPHA_ADDR1(src
[2])
941 | R500_ALPHA_ADDR2(get_temp(cs
, 0))
942 | R500_ALPHA_SRCP_OP_1_MINUS_A0
;
943 code
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRCP
944 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
945 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB
);
946 code
->inst
[counter
].inst4
|= R500_ALPHA_OP_MAD
947 | R500_ALPHA_ADDRD(dest
)
948 | R500_ALPHA_SEL_A_SRCP
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
949 | R500_ALPHA_SEL_B_SRC1
| R500_ALPHA_SWIZ_B_A
;
950 code
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
951 | R500_ALU_RGBA_ADDRD(dest
)
952 | R500_ALU_RGBA_SEL_C_SRC2
| MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[2]))
953 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
954 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[2]));
957 emit_mad(cs
, counter
, fpi
, 0, 1, 2);
960 src
[0] = make_src(cs
, fpi
->SrcReg
[0]);
961 src
[1] = make_src(cs
, fpi
->SrcReg
[1]);
962 emit_alu(cs
, counter
, fpi
);
963 code
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]) | R500_RGB_ADDR1(src
[1]);
964 code
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]) | R500_ALPHA_ADDR1(src
[1]);
965 code
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
966 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
967 | R500_ALU_RGB_SEL_B_SRC1
968 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
969 code
->inst
[counter
].inst4
|= R500_ALPHA_OP_MAX
970 | R500_ALPHA_ADDRD(dest
)
971 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
972 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
973 code
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAX
974 | R500_ALU_RGBA_ADDRD(dest
);
977 src
[0] = make_src(cs
, fpi
->SrcReg
[0]);
978 src
[1] = make_src(cs
, fpi
->SrcReg
[1]);
979 emit_alu(cs
, counter
, fpi
);
980 code
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]) | R500_RGB_ADDR1(src
[1]);
981 code
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]) | R500_ALPHA_ADDR1(src
[1]);
982 code
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
983 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
984 | R500_ALU_RGB_SEL_B_SRC1
985 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
986 code
->inst
[counter
].inst4
|= R500_ALPHA_OP_MIN
987 | R500_ALPHA_ADDRD(dest
)
988 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
989 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
990 code
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MIN
991 | R500_ALU_RGBA_ADDRD(dest
);
994 emit_mov(cs
, counter
, fpi
, make_src(cs
, fpi
->SrcReg
[0]), fpi
->SrcReg
[0].Swizzle
, dest
);
997 /* Variation on MAD: src0*src1+0 */
998 emit_mad(cs
, counter
, fpi
, 0, 1, R500_SWIZZLE_ZERO
);
1001 /* POW(a,b) = EX2(LN2(a)*b) */
1002 src
[0] = make_src(cs
, fpi
->SrcReg
[0]);
1003 src
[1] = make_src(cs
, fpi
->SrcReg
[1]);
1004 emit_sop(cs
, counter
, fpi
, OPCODE_LG2
, src
[0], make_sop_swizzle(fpi
->SrcReg
[0]), get_temp(cs
, 0));
1005 code
->inst
[counter
].inst0
|= (R500_WRITEMASK_ARGB
<< 11);
1007 code
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| (R500_WRITEMASK_ARGB
<< 11);
1008 code
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(cs
, 0))
1009 | R500_RGB_ADDR1(src
[1]);
1010 code
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(cs
, 0))
1011 | R500_ALPHA_ADDR1(src
[1]);
1012 code
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
1013 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
1014 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
1015 code
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
1016 | R500_ALPHA_ADDRD(get_temp(cs
, 1))
1017 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
1018 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
1019 code
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
1020 | R500_ALU_RGBA_ADDRD(get_temp(cs
, 1))
1021 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
1022 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
1024 emit_sop(cs
, counter
, fpi
, OPCODE_EX2
, get_temp(cs
, 1), make_sop_swizzle(fpi
->SrcReg
[0]), dest
);
1027 src
[0] = make_src(cs
, fpi
->SrcReg
[0]);
1028 emit_sop(cs
, counter
, fpi
, OPCODE_RCP
, src
[0], make_sop_swizzle(fpi
->SrcReg
[0]), dest
);
1031 src
[0] = make_src(cs
, fpi
->SrcReg
[0]);
1032 emit_sop(cs
, counter
, fpi
, OPCODE_RSQ
, src
[0], make_sop_swizzle(fpi
->SrcReg
[0]), dest
);
1035 src
[0] = make_src(cs
, fpi
->SrcReg
[0]);
1036 src
[1] = emit_const4fv(cs
, RCP_2PI
);
1037 code
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| R500_INST_TEX_SEM_WAIT
1038 | (R500_WRITEMASK_ARGB
<< 11);
1039 code
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
1040 | R500_RGB_ADDR1(src
[1]);
1041 code
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
1042 | R500_ALPHA_ADDR1(src
[1]);
1043 code
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
1044 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
)
1045 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB
);
1046 code
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
1047 | R500_ALPHA_ADDRD(get_temp(cs
, 0))
1048 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SWIZ_A_A
1049 | R500_ALPHA_SEL_B_SRC1
| R500_ALPHA_SWIZ_B_A
;
1050 code
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
1051 | R500_ALU_RGBA_ADDRD(get_temp(cs
, 0))
1052 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
1053 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
1055 code
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| (R500_WRITEMASK_ARGB
<< 11);
1056 code
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(cs
, 0));
1057 code
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(cs
, 0));
1058 code
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
1059 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
);
1060 code
->inst
[counter
].inst4
= R500_ALPHA_OP_FRC
1061 | R500_ALPHA_ADDRD(get_temp(cs
, 1))
1062 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SWIZ_A_A
;
1063 code
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_FRC
1064 | R500_ALU_RGBA_ADDRD(get_temp(cs
, 1));
1066 /* Do a cosine, then a sine, masking out the channels we want to protect. */
1067 /* Cosine only goes in R (x) channel. */
1068 fpi
->DstReg
.WriteMask
= 0x1;
1069 emit_sop(cs
, counter
, fpi
, OPCODE_COS
, get_temp(cs
, 1), make_sop_swizzle(fpi
->SrcReg
[0]), dest
);
1071 /* Sine only goes in G (y) channel. */
1072 fpi
->DstReg
.WriteMask
= 0x2;
1073 emit_sop(cs
, counter
, fpi
, OPCODE_SIN
, get_temp(cs
, 1), make_sop_swizzle(fpi
->SrcReg
[0]), dest
);
1076 src
[0] = make_src(cs
, fpi
->SrcReg
[0]);
1077 src
[1] = make_src(cs
, fpi
->SrcReg
[1]);
1078 code
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| R500_INST_TEX_SEM_WAIT
1079 | (R500_WRITEMASK_ARGB
<< 11);
1080 code
->inst
[counter
].inst1
= R500_RGB_ADDR1(src
[0])
1081 | R500_RGB_ADDR2(src
[1]);
1082 code
->inst
[counter
].inst2
= R500_ALPHA_ADDR1(src
[0])
1083 | R500_ALPHA_ADDR2(src
[1]);
1084 code
->inst
[counter
].inst3
= /* 1 */
1085 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE
)
1086 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[0]));
1087 code
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
1088 | R500_ALPHA_ADDRD(get_temp(cs
, 0))
1089 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
1090 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[0]));
1091 code
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
1092 | R500_ALU_RGBA_ADDRD(get_temp(cs
, 0))
1093 | R500_ALU_RGBA_SEL_C_SRC2
1094 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[1]))
1095 | R500_ALU_RGBA_MOD_C_NEG
1096 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
1097 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[1]))
1098 | R500_ALU_RGBA_ALPHA_MOD_C_NEG
;
1100 /* This inst's selects need to be swapped as follows:
1101 * 0 -> C ; 1 -> B ; 2 -> A */
1102 emit_alu(cs
, counter
, fpi
);
1103 code
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(cs
, 0));
1104 code
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(cs
, 0));
1105 code
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
1106 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE
)
1107 | R500_ALU_RGB_SEL_B_SRC0
1108 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO
);
1109 code
->inst
[counter
].inst4
|= R500_ALPHA_OP_CMP
1110 | R500_ALPHA_ADDRD(dest
)
1111 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
1112 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO
);
1113 code
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
1114 | R500_ALU_RGBA_ADDRD(dest
)
1115 | R500_ALU_RGBA_SEL_C_SRC0
1116 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB
)
1117 | R500_ALU_RGBA_ALPHA_SEL_C_SRC0
1118 | R500_ALU_RGBA_A_SWIZ_A
;
1121 src
[0] = make_src(cs
, fpi
->SrcReg
[0]);
1122 src
[1] = emit_const4fv(cs
, RCP_2PI
);
1123 code
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| R500_INST_TEX_SEM_WAIT
1124 | (R500_WRITEMASK_ARGB
<< 11);
1125 code
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
1126 | R500_RGB_ADDR1(src
[1]);
1127 code
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
1128 | R500_ALPHA_ADDR1(src
[1]);
1129 code
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
1130 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
)
1131 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB
);
1132 code
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
1133 | R500_ALPHA_ADDRD(get_temp(cs
, 0))
1134 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SWIZ_A_A
1135 | R500_ALPHA_SEL_B_SRC1
| R500_ALPHA_SWIZ_B_A
;
1136 code
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
1137 | R500_ALU_RGBA_ADDRD(get_temp(cs
, 0))
1138 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
1139 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
1141 code
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| (R500_WRITEMASK_ARGB
<< 11);
1142 code
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(cs
, 0));
1143 code
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(cs
, 0));
1144 code
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
1145 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
);
1146 code
->inst
[counter
].inst4
= R500_ALPHA_OP_FRC
1147 | R500_ALPHA_ADDRD(get_temp(cs
, 1))
1148 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SWIZ_A_A
;
1149 code
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_FRC
1150 | R500_ALU_RGBA_ADDRD(get_temp(cs
, 1));
1152 emit_sop(cs
, counter
, fpi
, OPCODE_SIN
, get_temp(cs
, 1), make_sop_swizzle(fpi
->SrcReg
[0]), dest
);
1155 src
[0] = make_src(cs
, fpi
->SrcReg
[0]);
1156 src
[1] = make_src(cs
, fpi
->SrcReg
[1]);
1157 code
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| R500_INST_TEX_SEM_WAIT
1158 | (R500_WRITEMASK_ARGB
<< 11);
1159 code
->inst
[counter
].inst1
= R500_RGB_ADDR1(src
[0])
1160 | R500_RGB_ADDR2(src
[1]);
1161 code
->inst
[counter
].inst2
= R500_ALPHA_ADDR1(src
[0])
1162 | R500_ALPHA_ADDR2(src
[1]);
1163 code
->inst
[counter
].inst3
= /* 1 */
1164 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE
)
1165 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[0]));
1166 code
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
1167 | R500_ALPHA_ADDRD(get_temp(cs
, 0))
1168 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
1169 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[0]));
1170 code
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
1171 | R500_ALU_RGBA_ADDRD(get_temp(cs
, 0))
1172 | R500_ALU_RGBA_SEL_C_SRC2
1173 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[1]))
1174 | R500_ALU_RGBA_MOD_C_NEG
1175 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
1176 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[1]))
1177 | R500_ALU_RGBA_ALPHA_MOD_C_NEG
;
1179 /* This inst's selects need to be swapped as follows:
1180 * 0 -> C ; 1 -> B ; 2 -> A */
1181 emit_alu(cs
, counter
, fpi
);
1182 code
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(cs
, 0));
1183 code
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(cs
, 0));
1184 code
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
1185 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO
)
1186 | R500_ALU_RGB_SEL_B_SRC0
1187 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE
);
1188 code
->inst
[counter
].inst4
|= R500_ALPHA_OP_CMP
1189 | R500_ALPHA_ADDRD(dest
)
1190 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO
)
1191 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE
);
1192 code
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
1193 | R500_ALU_RGBA_ADDRD(dest
)
1194 | R500_ALU_RGBA_SEL_C_SRC0
1195 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB
)
1196 | R500_ALU_RGBA_ALPHA_SEL_C_SRC0
1197 | R500_ALU_RGBA_A_SWIZ_A
;
1200 /* Variation on MAD: 1*src0-src1 */
1201 fpi
->SrcReg
[1].NegateBase
= 0xF; /* NEG_XYZW */
1202 emit_mad(cs
, counter
, fpi
, R500_SWIZZLE_ONE
, 0, 1);
1205 /* TODO: The rarer negation masks! */
1206 emit_mov(cs
, counter
, fpi
, make_src(cs
, fpi
->SrcReg
[0]), fpi
->SrcReg
[0].Swizzle
, dest
);
1209 /* src0 * src1 - src1 * src0
1210 * 1) MUL temp.xyz, src0.yzx, src1.zxy
1211 * 2) MAD src0.zxy, src1.yzx, -temp.xyz */
1212 src
[0] = make_src(cs
, fpi
->SrcReg
[0]);
1213 src
[1] = make_src(cs
, fpi
->SrcReg
[1]);
1214 code
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| R500_INST_TEX_SEM_WAIT
1215 | (R500_WRITEMASK_RGB
<< 11);
1216 code
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
1217 | R500_RGB_ADDR1(src
[1]);
1218 code
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
1219 | R500_ALPHA_ADDR1(src
[1]);
1220 /* Select [y, z, x] */
1221 temp_swiz
= make_rgb_swizzle(fpi
->SrcReg
[0]);
1222 temp_swiz
= (GET_SWZ(temp_swiz
, 1) << 0) | (GET_SWZ(temp_swiz
, 2) << 3) | (GET_SWZ(temp_swiz
, 0) << 6);
1223 code
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
1224 | MAKE_SWIZ_RGB_A(temp_swiz
);
1225 /* Select [z, x, y] */
1226 temp_swiz
= make_rgb_swizzle(fpi
->SrcReg
[1]);
1227 temp_swiz
= (GET_SWZ(temp_swiz
, 2) << 0) | (GET_SWZ(temp_swiz
, 0) << 3) | (GET_SWZ(temp_swiz
, 1) << 6);
1228 code
->inst
[counter
].inst3
|= R500_ALU_RGB_SEL_B_SRC1
1229 | MAKE_SWIZ_RGB_B(temp_swiz
);
1230 code
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
1231 | R500_ALPHA_ADDRD(get_temp(cs
, 0))
1232 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
1233 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
1234 code
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
1235 | R500_ALU_RGBA_ADDRD(get_temp(cs
, 0))
1236 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
1237 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
1239 emit_alu(cs
, counter
, fpi
);
1240 code
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
1241 | R500_RGB_ADDR1(src
[1])
1242 | R500_RGB_ADDR2(get_temp(cs
, 0));
1243 code
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
1244 | R500_ALPHA_ADDR1(src
[1])
1245 | R500_ALPHA_ADDR2(get_temp(cs
, 0));
1246 /* Select [z, x, y] */
1247 temp_swiz
= make_rgb_swizzle(fpi
->SrcReg
[0]);
1248 temp_swiz
= (GET_SWZ(temp_swiz
, 2) << 0) | (GET_SWZ(temp_swiz
, 0) << 3) | (GET_SWZ(temp_swiz
, 1) << 6);
1249 code
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
1250 | MAKE_SWIZ_RGB_A(temp_swiz
);
1251 /* Select [y, z, x] */
1252 temp_swiz
= make_rgb_swizzle(fpi
->SrcReg
[1]);
1253 temp_swiz
= (GET_SWZ(temp_swiz
, 1) << 0) | (GET_SWZ(temp_swiz
, 2) << 3) | (GET_SWZ(temp_swiz
, 0) << 6);
1254 code
->inst
[counter
].inst3
|= R500_ALU_RGB_SEL_B_SRC1
1255 | MAKE_SWIZ_RGB_B(temp_swiz
);
1256 code
->inst
[counter
].inst4
|= R500_ALPHA_OP_MAD
1257 | R500_ALPHA_ADDRD(dest
)
1258 | R500_ALPHA_SWIZ_A_1
1259 | R500_ALPHA_SWIZ_B_1
;
1260 code
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
1261 | R500_ALU_RGBA_ADDRD(dest
)
1262 | R500_ALU_RGBA_SEL_C_SRC2
1263 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB
)
1264 | R500_ALU_RGBA_MOD_C_NEG
1265 | R500_ALU_RGBA_A_SWIZ_0
;
1271 emit_tex(cs
, fpi
, dest
, counter
);
1272 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
)
1276 ERROR("unknown fpi->Opcode %s\n", _mesa_opcode_string(fpi
->Opcode
));
1280 /* Finishing touches */
1281 if (fpi
->SaturateMode
== SATURATE_ZERO_ONE
) {
1282 code
->inst
[counter
].inst0
|= R500_INST_RGB_CLAMP
| R500_INST_ALPHA_CLAMP
;
1290 static GLboolean
parse_program(struct r500_pfs_compile_state
*cs
)
1293 int clauseidx
, counter
= 0;
1295 for (clauseidx
= 0; clauseidx
< cs
->compiler
->compiler
.NumClauses
; clauseidx
++) {
1296 struct radeon_clause
* clause
= &cs
->compiler
->compiler
.Clauses
[clauseidx
];
1297 struct prog_instruction
* fpi
;
1301 for (ip
= 0; ip
< clause
->NumInstructions
; ip
++) {
1302 fpi
= clause
->Instructions
+ ip
;
1303 counter
= do_inst(cs
, fpi
, counter
);
1305 if (cs
->compiler
->fp
->error
)
1310 /* Finish him! (If it's an ALU/OUT instruction...) */
1311 if ((code
->inst
[counter
-1].inst0
& 0x3) == 1) {
1312 code
->inst
[counter
-1].inst0
|= R500_INST_LAST
;
1314 /* We still need to put an output inst, right? */
1315 WARN_ONCE("Final FP instruction is not an OUT.\n");
1318 cs
->nrslots
= counter
;
1320 code
->max_temp_idx
++;
1325 static void init_program(struct r500_pfs_compile_state
*cs
)
1328 struct gl_fragment_program
*mp
= &cs
->compiler
->fp
->mesa_program
;
1329 struct prog_instruction
*fpi
;
1330 GLuint InputsRead
= mp
->Base
.InputsRead
;
1331 GLuint temps_used
= 0;
1334 /* New compile, reset tracking data */
1335 cs
->compiler
->fp
->optimization
=
1336 driQueryOptioni(&cs
->compiler
->r300
->radeon
.optionCache
, "fp_optimization");
1337 cs
->compiler
->fp
->translated
= GL_FALSE
;
1338 cs
->compiler
->fp
->error
= GL_FALSE
;
1340 /* Size of pixel stack, plus 1. */
1341 code
->max_temp_idx
= 1;
1342 /* Temp register offset. */
1343 code
->temp_reg_offset
= 0;
1344 /* Whether or not we perform any depth writing. */
1345 cs
->compiler
->fp
->writes_depth
= GL_FALSE
;
1347 for (i
= 0; i
< PFS_MAX_ALU_INST
; i
++) {
1348 for (j
= 0; j
< 3; j
++) {
1349 cs
->slot
[i
].vsrc
[j
] = SRC_CONST
;
1350 cs
->slot
[i
].ssrc
[j
] = SRC_CONST
;
1354 /* Work out what temps the Mesa inputs correspond to, this must match
1355 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
1356 * configures itself based on the fragprog's InputsRead
1358 * NOTE: this depends on get_hw_temp() allocating registers in order,
1359 * starting from register 0, so we're just going to do that instead.
1362 /* Texcoords come first */
1363 for (i
= 0; i
< cs
->compiler
->fp
->ctx
->Const
.MaxTextureUnits
; i
++) {
1364 if (InputsRead
& (FRAG_BIT_TEX0
<< i
)) {
1365 cs
->inputs
[FRAG_ATTRIB_TEX0
+ i
].refcount
= 0;
1366 cs
->inputs
[FRAG_ATTRIB_TEX0
+ i
].reg
=
1367 code
->temp_reg_offset
;
1368 code
->temp_reg_offset
++;
1371 InputsRead
&= ~FRAG_BITS_TEX_ANY
;
1373 /* fragment position treated as a texcoord */
1374 if (InputsRead
& FRAG_BIT_WPOS
) {
1375 cs
->inputs
[FRAG_ATTRIB_WPOS
].refcount
= 0;
1376 cs
->inputs
[FRAG_ATTRIB_WPOS
].reg
=
1377 code
->temp_reg_offset
;
1378 code
->temp_reg_offset
++;
1380 InputsRead
&= ~FRAG_BIT_WPOS
;
1382 /* Then primary colour */
1383 if (InputsRead
& FRAG_BIT_COL0
) {
1384 cs
->inputs
[FRAG_ATTRIB_COL0
].refcount
= 0;
1385 cs
->inputs
[FRAG_ATTRIB_COL0
].reg
=
1386 code
->temp_reg_offset
;
1387 code
->temp_reg_offset
++;
1389 InputsRead
&= ~FRAG_BIT_COL0
;
1391 /* Secondary color */
1392 if (InputsRead
& FRAG_BIT_COL1
) {
1393 cs
->inputs
[FRAG_ATTRIB_COL1
].refcount
= 0;
1394 cs
->inputs
[FRAG_ATTRIB_COL1
].reg
=
1395 code
->temp_reg_offset
;
1396 code
->temp_reg_offset
++;
1398 InputsRead
&= ~FRAG_BIT_COL1
;
1402 WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead
);
1403 /* force read from hwreg 0 for now */
1404 for (i
= 0; i
< 32; i
++)
1405 if (InputsRead
& (1 << i
))
1406 cs
->inputs
[i
].reg
= 0;
1411 for (clauseidx
= 0; clauseidx
< cs
->compiler
->compiler
.NumClauses
; ++clauseidx
) {
1412 struct radeon_clause
* clause
= &cs
->compiler
->compiler
.Clauses
[clauseidx
];
1415 for (ip
= 0; ip
< clause
->NumInstructions
; ip
++) {
1416 fpi
= clause
->Instructions
+ ip
;
1417 for (i
= 0; i
< 3; i
++) {
1418 if (fpi
->SrcReg
[i
].File
== PROGRAM_TEMPORARY
) {
1419 if (fpi
->SrcReg
[i
].Index
>= temps_used
)
1420 temps_used
= fpi
->SrcReg
[i
].Index
+ 1;
1427 cs
->temp_in_use
= temps_used
+ 1;
1429 code
->max_temp_idx
= code
->temp_reg_offset
+ cs
->temp_in_use
;
1431 if (RADEON_DEBUG
& DEBUG_PIXEL
)
1432 fprintf(stderr
, "FP temp indices: code->max_temp_idx: %d cs->temp_in_use: %d\n", code
->max_temp_idx
, cs
->temp_in_use
);
1435 static void dumb_shader(struct r500_pfs_compile_state
*cs
)
1438 code
->inst
[0].inst0
= R500_INST_TYPE_TEX
1439 | R500_INST_TEX_SEM_WAIT
1440 | R500_INST_RGB_WMASK_R
1441 | R500_INST_RGB_WMASK_G
1442 | R500_INST_RGB_WMASK_B
1443 | R500_INST_ALPHA_WMASK
1444 | R500_INST_RGB_CLAMP
1445 | R500_INST_ALPHA_CLAMP
;
1446 code
->inst
[0].inst1
= R500_TEX_ID(0)
1448 | R500_TEX_SEM_ACQUIRE
1449 | R500_TEX_IGNORE_UNCOVERED
;
1450 code
->inst
[0].inst2
= R500_TEX_SRC_ADDR(0)
1451 | R500_TEX_SRC_S_SWIZ_R
1452 | R500_TEX_SRC_T_SWIZ_G
1453 | R500_TEX_DST_ADDR(0)
1454 | R500_TEX_DST_R_SWIZ_R
1455 | R500_TEX_DST_G_SWIZ_G
1456 | R500_TEX_DST_B_SWIZ_B
1457 | R500_TEX_DST_A_SWIZ_A
;
1458 code
->inst
[0].inst3
= R500_DX_ADDR(0)
1468 code
->inst
[0].inst4
= 0x0;
1469 code
->inst
[0].inst5
= 0x0;
1471 code
->inst
[1].inst0
= R500_INST_TYPE_OUT
|
1472 R500_INST_TEX_SEM_WAIT
|
1474 R500_INST_RGB_OMASK_R
|
1475 R500_INST_RGB_OMASK_G
|
1476 R500_INST_RGB_OMASK_B
|
1477 R500_INST_ALPHA_OMASK
;
1478 code
->inst
[1].inst1
= R500_RGB_ADDR0(0) |
1480 R500_RGB_ADDR1_CONST
|
1482 R500_RGB_ADDR2_CONST
|
1483 R500_RGB_SRCP_OP_1_MINUS_2RGB0
;
1484 code
->inst
[1].inst2
= R500_ALPHA_ADDR0(0) |
1485 R500_ALPHA_ADDR1(0) |
1486 R500_ALPHA_ADDR1_CONST
|
1487 R500_ALPHA_ADDR2(0) |
1488 R500_ALPHA_ADDR2_CONST
|
1489 R500_ALPHA_SRCP_OP_1_MINUS_2A0
;
1490 code
->inst
[1].inst3
= R500_ALU_RGB_SEL_A_SRC0
|
1491 R500_ALU_RGB_R_SWIZ_A_R
|
1492 R500_ALU_RGB_G_SWIZ_A_G
|
1493 R500_ALU_RGB_B_SWIZ_A_B
|
1494 R500_ALU_RGB_SEL_B_SRC0
|
1495 R500_ALU_RGB_R_SWIZ_B_1
|
1496 R500_ALU_RGB_B_SWIZ_B_1
|
1497 R500_ALU_RGB_G_SWIZ_B_1
;
1498 code
->inst
[1].inst4
= R500_ALPHA_OP_MAD
|
1499 R500_ALPHA_SWIZ_A_A
|
1500 R500_ALPHA_SWIZ_B_1
;
1501 code
->inst
[1].inst5
= R500_ALU_RGBA_OP_MAD
|
1502 R500_ALU_RGBA_R_SWIZ_0
|
1503 R500_ALU_RGBA_G_SWIZ_0
|
1504 R500_ALU_RGBA_B_SWIZ_0
|
1505 R500_ALU_RGBA_A_SWIZ_0
;
1510 GLboolean
r500FragmentProgramEmit(struct r500_fragment_program_compiler
*compiler
)
1512 struct r500_pfs_compile_state cs
;
1513 struct r500_fragment_program_code
*code
= compiler
->code
;
1515 _mesa_memset(&cs
, 0, sizeof(cs
));
1516 cs
.compiler
= compiler
;
1519 if (!parse_program(&cs
)) {
1521 ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n");
1523 code
->inst_offset
= 0;
1524 code
->inst_end
= cs
.nrslots
- 1;
1529 code
->inst_offset
= 0;
1530 code
->inst_end
= cs
.nrslots
- 1;