2 * Copyright (C) 2005 Ben Skeggs.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 * \author Ben Skeggs <darktama@iinet.net.au>
33 * \author Jerome Glisse <j.glisse@gmail.com>
35 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
37 * \todo Depth write, WPOS/FOGC inputs
41 * \todo Verify results of opcodes for accuracy, I've only checked them in
48 #include "shader/prog_instruction.h"
49 #include "shader/prog_parameter.h"
50 #include "shader/prog_print.h"
52 #include "r300_context.h"
53 #include "r500_fragprog.h"
55 #include "r300_state.h"
58 * Useful macros and values
60 #define ERROR(fmt, args...) do { \
61 fprintf(stderr, "%s::%s(): " fmt "\n", \
62 __FILE__, __FUNCTION__, ##args); \
63 fp->error = GL_TRUE; \
66 #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
68 #define R500_US_NUM_TEMP_REGS 128
69 #define R500_US_NUM_CONST_REGS 256
71 /* "Register" flags */
72 #define REG_CONSTANT (1 << 8)
73 #define REG_SRC_REL (1 << 9)
74 #define REG_DEST_REL (1 << 7)
77 #define R500_SWIZZLE_ZERO 4
78 #define R500_SWIZZLE_HALF 5
79 #define R500_SWIZZLE_ONE 6
80 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
81 #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
82 #define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6))
83 /* Swizzles for inst2 */
84 #define MAKE_SWIZ_TEX_STRQ(x) (x << 8)
85 #define MAKE_SWIZ_TEX_RGBA(x) (x << 24)
86 /* Swizzles for inst3 */
87 #define MAKE_SWIZ_RGB_A(x) (x << 2)
88 #define MAKE_SWIZ_RGB_B(x) (x << 15)
89 /* Swizzles for inst4 */
90 #define MAKE_SWIZ_ALPHA_A(x) (x << 14)
91 #define MAKE_SWIZ_ALPHA_B(x) (x << 21)
92 /* Swizzle for inst5 */
93 #define MAKE_SWIZ_RGBA_C(x) (x << 14)
94 #define MAKE_SWIZ_ALPHA_C(x) (x << 27)
97 #define R500_WRITEMASK_ARGB 0xF
99 /* 1/(2pi), needed for quick modulus in trig insts
100 * Thanks to glisse for pointing out how to do it! */
101 static const GLfloat RCP_2PI
[] = {0.15915494309189535,
104 0.15915494309189535};
106 static void dump_program(struct r500_fragment_program
*fp
);
108 static inline GLuint
make_rgb_swizzle(struct prog_src_register src
) {
111 /* This could be optimized, but it should be plenty fast already. */
113 for (i
= 0; i
< 3; i
++) {
114 temp
= GET_SWZ(src
.Swizzle
, i
);
115 /* Fix SWIZZLE_ONE */
116 if (temp
== 5) temp
++;
122 static inline GLuint
make_alpha_swizzle(struct prog_src_register src
) {
123 GLuint swiz
= GET_SWZ(src
.Swizzle
, 3);
125 if (swiz
== 5) swiz
++;
129 static inline GLuint
make_sop_swizzle(struct prog_src_register src
) {
130 GLuint swiz
= GET_SWZ(src
.Swizzle
, 0);
132 if (swiz
== 5) swiz
++;
136 static inline GLuint
make_strq_swizzle(struct prog_src_register src
) {
138 GLuint temp
= src
.Swizzle
;
140 for (i
= 0; i
< 4; i
++) {
141 swiz
+= (temp
& 0x3) << i
*2;
147 static int get_temp(struct r500_fragment_program
*fp
, int slot
) {
151 int r
= cs
->temp_in_use
+ 1 + slot
;
153 if (r
> R500_US_NUM_TEMP_REGS
) {
154 ERROR("Too many temporary registers requested, can't compile!\n");
160 /* Borrowed verbatim from r300_fragprog since it hasn't changed. */
161 static GLuint
emit_const4fv(struct r500_fragment_program
*fp
,
167 for (index
= 0; index
< fp
->const_nr
; ++index
) {
168 if (fp
->constant
[index
] == cp
)
172 if (index
>= fp
->const_nr
) {
173 if (index
>= R500_US_NUM_CONST_REGS
) {
174 ERROR("Out of hw constants!\n");
179 fp
->constant
[index
] = cp
;
182 reg
= index
| REG_CONSTANT
;
186 static GLuint
make_src(struct r500_fragment_program
*fp
, struct prog_src_register src
) {
190 case PROGRAM_TEMPORARY
:
191 reg
= src
.Index
+ fp
->temp_reg_offset
;
194 reg
= cs
->inputs
[src
.Index
].reg
;
196 case PROGRAM_LOCAL_PARAM
:
197 reg
= emit_const4fv(fp
,
198 fp
->mesa_program
.Base
.LocalParams
[src
.
201 case PROGRAM_ENV_PARAM
:
202 reg
= emit_const4fv(fp
,
203 fp
->ctx
->FragmentProgram
.Parameters
[src
.
206 case PROGRAM_STATE_VAR
:
207 case PROGRAM_NAMED_PARAM
:
208 case PROGRAM_CONSTANT
:
209 reg
= emit_const4fv(fp
, fp
->mesa_program
.Base
.Parameters
->
210 ParameterValues
[src
.Index
]);
213 ERROR("Can't handle src.File %x\n", src
.File
);
220 static GLuint
make_dest(struct r500_fragment_program
*fp
, struct prog_dst_register dest
) {
223 case PROGRAM_TEMPORARY
:
224 reg
= dest
.Index
+ fp
->temp_reg_offset
;
227 /* Eventually we may need to handle multiple
228 * rendering targets... */
232 ERROR("Can't handle dest.File %x\n", dest
.File
);
239 static void emit_tex(struct r500_fragment_program
*fp
,
240 struct prog_instruction
*fpi
, int opcode
, int dest
, int counter
)
245 mask
= fpi
->DstReg
.WriteMask
<< 11;
246 hwsrc
= make_src(fp
, fpi
->SrcReg
[0]);
248 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
) {
249 hwdest
= get_temp(fp
, 0);
254 fp
->inst
[counter
].inst0
= R500_INST_TYPE_TEX
| mask
255 | R500_INST_TEX_SEM_WAIT
;
257 fp
->inst
[counter
].inst1
= R500_TEX_ID(fpi
->TexSrcUnit
)
258 | R500_TEX_SEM_ACQUIRE
| R500_TEX_IGNORE_UNCOVERED
;
260 if (fpi
->TexSrcTarget
== TEXTURE_RECT_INDEX
)
261 fp
->inst
[counter
].inst1
|= R500_TEX_UNSCALED
;
265 fp
->inst
[counter
].inst1
|= R500_TEX_INST_TEXKILL
;
268 fp
->inst
[counter
].inst1
|= R500_TEX_INST_LD
;
271 fp
->inst
[counter
].inst1
|= R500_TEX_INST_LODBIAS
;
274 fp
->inst
[counter
].inst1
|= R500_TEX_INST_PROJ
;
277 ERROR("emit_tex can't handle opcode %x\n", opcode
);
280 fp
->inst
[counter
].inst2
= R500_TEX_SRC_ADDR(hwsrc
)
281 /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */
282 | R500_TEX_SRC_S_SWIZ_R
| R500_TEX_SRC_T_SWIZ_G
283 | R500_TEX_SRC_R_SWIZ_B
| R500_TEX_SRC_Q_SWIZ_A
284 | R500_TEX_DST_ADDR(hwdest
)
285 | R500_TEX_DST_R_SWIZ_R
| R500_TEX_DST_G_SWIZ_G
286 | R500_TEX_DST_B_SWIZ_B
| R500_TEX_DST_A_SWIZ_A
;
288 fp
->inst
[counter
].inst3
= 0x0;
289 fp
->inst
[counter
].inst4
= 0x0;
290 fp
->inst
[counter
].inst5
= 0x0;
292 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
) {
294 fp
->inst
[counter
].inst0
= R500_INST_TYPE_OUT
295 | R500_INST_TEX_SEM_WAIT
| (mask
<< 4);
296 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(fp
, 0));
297 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(fp
, 0));
298 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
299 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
)
300 | R500_ALU_RGB_SEL_B_SRC0
301 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB
)
302 | R500_ALU_RGB_OMOD_DISABLE
;
303 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_CMP
304 | R500_ALPHA_ADDRD(dest
)
305 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_ALPHA_SWIZ_A_A
)
306 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(R500_ALPHA_SWIZ_A_A
)
307 | R500_ALPHA_OMOD_DISABLE
;
308 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
309 | R500_ALU_RGBA_ADDRD(dest
)
310 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
311 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
315 static void emit_alu(struct r500_fragment_program
*fp
, int counter
, struct prog_instruction
*fpi
) {
316 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
) {
317 fp
->inst
[counter
].inst0
= R500_INST_TYPE_OUT
319 | (fpi
->DstReg
.WriteMask
<< 15);
321 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
323 | (fpi
->DstReg
.WriteMask
<< 11);
326 fp
->inst
[counter
].inst0
|= R500_INST_TEX_SEM_WAIT
;
329 static void emit_mov(struct r500_fragment_program
*fp
, int counter
, struct prog_src_register src
, GLuint dest
) {
330 /* The r3xx shader uses MAD to implement MOV. We are using CMP, since
331 * it is technically more accurate and recommended by ATI/AMD. */
332 GLuint src_reg
= make_src(fp
, src
);
333 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src_reg
);
334 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src_reg
);
335 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
336 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(src
))
337 | R500_ALU_RGB_SEL_B_SRC0
338 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(src
))
339 | R500_ALU_RGB_OMOD_DISABLE
;
340 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_CMP
341 | R500_ALPHA_ADDRD(dest
)
342 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(src
))
343 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(src
))
344 | R500_ALPHA_OMOD_DISABLE
;
345 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
346 | R500_ALU_RGBA_ADDRD(dest
)
347 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
348 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
351 static GLboolean
parse_program(struct r500_fragment_program
*fp
)
353 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
354 const struct prog_instruction
*inst
= mp
->Base
.Instructions
;
355 struct prog_instruction
*fpi
;
356 GLuint src
[3], dest
= 0;
357 int temp_swiz
, pixel_mask
= 0, output_mask
= 0, counter
= 0;
359 if (!inst
|| inst
[0].Opcode
== OPCODE_END
) {
360 ERROR("The program is empty!\n");
364 for (fpi
= mp
->Base
.Instructions
; fpi
->Opcode
!= OPCODE_END
; fpi
++) {
366 if (fpi
->Opcode
!= OPCODE_KIL
) {
367 dest
= make_dest(fp
, fpi
->DstReg
);
369 pixel_mask
= fpi
->DstReg
.WriteMask
<< 11;
370 output_mask
= fpi
->DstReg
.WriteMask
<< 15;
373 switch (fpi
->Opcode
) {
375 emit_alu(fp
, counter
, fpi
);
376 emit_mov(fp
, counter
, fpi
->SrcReg
[0], dest
);
377 fp
->inst
[counter
].inst3
|= R500_ALU_RGB_MOD_A_ABS
378 | R500_ALU_RGB_MOD_B_ABS
;
379 fp
->inst
[counter
].inst4
|= R500_ALPHA_MOD_A_ABS
380 | R500_ALPHA_MOD_B_ABS
;
383 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
384 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
385 /* Variation on MAD: 1*src0+src1 */
386 emit_alu(fp
, counter
, fpi
);
387 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
388 | R500_RGB_ADDR1(src
[1]) | R500_RGB_ADDR2(0);
389 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
390 | R500_ALPHA_ADDR1(src
[1]) | R500_ALPHA_ADDR2(0);
391 fp
->inst
[counter
].inst3
= /* 1 */
392 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE
)
393 | R500_ALU_RGB_SEL_B_SRC0
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[0]));
394 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
395 | R500_ALPHA_ADDRD(dest
)
396 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
397 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[0]));
398 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
399 | R500_ALU_RGBA_ADDRD(dest
)
400 | R500_ALU_RGBA_SEL_C_SRC1
401 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[1]))
402 | R500_ALU_RGBA_ALPHA_SEL_C_SRC1
403 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[1]));
406 /* This inst's selects need to be swapped as follows:
407 * 0 -> C ; 1 -> B ; 2 -> A */
408 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
409 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
410 src
[2] = make_src(fp
, fpi
->SrcReg
[2]);
411 emit_alu(fp
, counter
, fpi
);
412 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[2])
413 | R500_RGB_ADDR1(src
[1]) | R500_RGB_ADDR2(src
[0]);
414 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[2])
415 | R500_ALPHA_ADDR1(src
[1]) | R500_ALPHA_ADDR2(src
[0]);
416 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
417 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[2]))
418 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
419 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_CMP
420 | R500_ALPHA_ADDRD(dest
)
421 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[2]))
422 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
423 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
424 | R500_ALU_RGBA_ADDRD(dest
)
425 | R500_ALU_RGBA_SEL_C_SRC2
426 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[0]))
427 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
428 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[0]));
431 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
432 src
[1] = emit_const4fv(fp
, RCP_2PI
);
433 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| R500_INST_TEX_SEM_WAIT
434 | (R500_WRITEMASK_ARGB
<< 11);
435 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
436 | R500_RGB_ADDR1(src
[1]);
437 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
438 | R500_ALPHA_ADDR1(src
[1]);
439 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
440 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
)
441 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB
);
442 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
443 | R500_ALPHA_ADDRD(get_temp(fp
, 0))
444 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SWIZ_A_A
445 | R500_ALPHA_SEL_B_SRC1
| R500_ALPHA_SWIZ_B_A
;
446 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
447 | R500_ALU_RGBA_ADDRD(get_temp(fp
, 0))
448 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
449 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
451 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| (R500_WRITEMASK_ARGB
<< 11);
452 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(fp
, 0));
453 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(fp
, 0));
454 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
455 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
);
456 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_FRC
457 | R500_ALPHA_ADDRD(get_temp(fp
, 1))
458 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SWIZ_A_A
;
459 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_FRC
460 | R500_ALU_RGBA_ADDRD(get_temp(fp
, 1));
462 emit_alu(fp
, counter
, fpi
);
463 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(fp
, 1));
464 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(fp
, 1));
465 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
;
466 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_COS
467 | R500_ALPHA_ADDRD(dest
)
468 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
469 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
470 | R500_ALU_RGBA_ADDRD(dest
);
473 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
474 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
475 emit_alu(fp
, counter
, fpi
);
476 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
477 | R500_RGB_ADDR1(src
[1]);
478 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
479 | R500_ALPHA_ADDR1(src
[1]);
480 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
481 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
482 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
483 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_DP
484 | R500_ALPHA_ADDRD(dest
)
485 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
486 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
487 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_DP3
488 | R500_ALU_RGBA_ADDRD(dest
);
491 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
492 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
494 emit_alu(fp
, counter
, fpi
);
495 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
496 | R500_RGB_ADDR1(src
[1]);
497 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
498 | R500_ALPHA_ADDR1(src
[1]);
499 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
500 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
501 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
502 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_DP
503 | R500_ALPHA_ADDRD(dest
)
504 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
505 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
506 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_DP4
507 | R500_ALU_RGBA_ADDRD(dest
);
510 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
511 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
513 emit_alu(fp
, counter
, fpi
);
514 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
515 | R500_RGB_ADDR1(src
[1]);
516 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
517 | R500_ALPHA_ADDR1(src
[1]);
518 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
519 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
520 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
521 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_DP
522 | R500_ALPHA_ADDRD(dest
)
523 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
524 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
525 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_DP4
526 | R500_ALU_RGBA_ADDRD(dest
);
529 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
530 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
531 /* [1, src0.y*src1.y, src0.z, src1.w]
532 * So basically MUL with lotsa swizzling. */
533 emit_alu(fp
, counter
, fpi
);
534 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
535 | R500_RGB_ADDR1(src
[1]);
536 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
537 | R500_ALPHA_ADDR1(src
[1]);
538 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
539 | R500_ALU_RGB_SEL_B_SRC1
;
540 /* Select [1, y, z, 1] */
541 temp_swiz
= (make_rgb_swizzle(fpi
->SrcReg
[0]) & ~0x7) | R500_SWIZZLE_ONE
;
542 fp
->inst
[counter
].inst3
|= MAKE_SWIZ_RGB_A(temp_swiz
);
543 /* Select [1, y, 1, w] */
544 temp_swiz
= (make_rgb_swizzle(fpi
->SrcReg
[0]) & ~0x1c7) | R500_SWIZZLE_ONE
| (R500_SWIZZLE_ONE
<< 6);
545 fp
->inst
[counter
].inst3
|= MAKE_SWIZ_RGB_B(temp_swiz
);
546 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
547 | R500_ALPHA_ADDRD(dest
)
548 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
549 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
550 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
551 | R500_ALU_RGBA_ADDRD(dest
)
552 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
553 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
556 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
557 emit_alu(fp
, counter
, fpi
);
558 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
559 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
560 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
561 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
562 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_EX2
563 | R500_ALPHA_ADDRD(dest
)
564 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
565 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
566 | R500_ALU_RGBA_ADDRD(dest
);
569 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
570 emit_alu(fp
, counter
, fpi
);
571 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
572 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
573 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
574 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
575 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_FRC
576 | R500_ALPHA_ADDRD(dest
)
577 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
578 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_FRC
579 | R500_ALU_RGBA_ADDRD(dest
);
582 emit_tex(fp
, fpi
, OPCODE_KIL
, dest
, counter
);
585 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
586 emit_alu(fp
, counter
, fpi
);
587 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
588 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
589 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
590 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
591 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_LN2
592 | R500_ALPHA_ADDRD(dest
)
593 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
594 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
595 | R500_ALU_RGBA_ADDRD(dest
);
598 /* src0 * src1 + INV(src0) * src2
599 * 1) MUL src0, src1, temp
600 * 2) PRE 1-src0; MAD srcp, src2, temp */
601 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
602 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
603 src
[2] = make_src(fp
, fpi
->SrcReg
[2]);
604 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| R500_INST_TEX_SEM_WAIT
605 | R500_INST_NOP
| (R500_WRITEMASK_ARGB
<< 11);
606 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
607 | R500_RGB_ADDR1(src
[1]);
608 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
609 | R500_ALPHA_ADDR1(src
[1]);
610 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
611 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
612 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
613 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
614 | R500_ALPHA_ADDRD(get_temp(fp
, 0))
615 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
616 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
617 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
618 | R500_ALU_RGBA_ADDRD(get_temp(fp
, 0))
619 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
620 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
622 emit_alu(fp
, counter
, fpi
);
623 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
624 | R500_RGB_ADDR1(src
[2])
625 | R500_RGB_ADDR2(get_temp(fp
, 0))
626 | R500_RGB_SRCP_OP_1_MINUS_RGB0
;
627 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
628 | R500_ALPHA_ADDR1(src
[2])
629 | R500_ALPHA_ADDR2(get_temp(fp
, 0))
630 | R500_ALPHA_SRCP_OP_1_MINUS_A0
;
631 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRCP
632 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
633 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB
);
634 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
635 | R500_ALPHA_ADDRD(dest
)
636 | R500_ALPHA_SEL_A_SRCP
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
637 | R500_ALPHA_SEL_B_SRC1
| R500_ALPHA_SWIZ_B_A
;
638 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
639 | R500_ALU_RGBA_ADDRD(dest
)
640 | R500_ALU_RGBA_SEL_C_SRC2
| MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[2]))
641 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
642 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[2]));
645 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
646 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
647 src
[2] = make_src(fp
, fpi
->SrcReg
[2]);
648 emit_alu(fp
, counter
, fpi
);
649 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
650 | R500_RGB_ADDR1(src
[1]) | R500_RGB_ADDR2(src
[2]);
651 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
652 | R500_ALPHA_ADDR1(src
[1]) | R500_ALPHA_ADDR2(src
[2]);
653 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
654 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
655 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
656 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
657 | R500_ALPHA_ADDRD(dest
)
658 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
659 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
660 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
661 | R500_ALU_RGBA_ADDRD(dest
)
662 | R500_ALU_RGBA_SEL_C_SRC2
663 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[2]))
664 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
665 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[2]));
668 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
669 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
670 emit_alu(fp
, counter
, fpi
);
671 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]) | R500_RGB_ADDR1(src
[1]);
672 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]) | R500_ALPHA_ADDR1(src
[1]);
673 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
674 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
675 | R500_ALU_RGB_SEL_B_SRC1
676 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
677 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAX
678 | R500_ALPHA_ADDRD(dest
)
679 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
680 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
681 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAX
682 | R500_ALU_RGBA_ADDRD(dest
);
685 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
686 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
687 emit_alu(fp
, counter
, fpi
);
688 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]) | R500_RGB_ADDR1(src
[1]);
689 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]) | R500_ALPHA_ADDR1(src
[1]);
690 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
691 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
692 | R500_ALU_RGB_SEL_B_SRC1
693 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
694 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MIN
695 | R500_ALPHA_ADDRD(dest
)
696 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
697 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
698 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MIN
699 | R500_ALU_RGBA_ADDRD(dest
);
702 emit_alu(fp
, counter
, fpi
);
703 emit_mov(fp
, counter
, fpi
->SrcReg
[0], dest
);
706 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
707 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
708 /* Variation on MAD: src0*src1+0 */
709 emit_alu(fp
, counter
, fpi
);
710 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
711 | R500_RGB_ADDR1(src
[1]);
712 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
713 | R500_ALPHA_ADDR1(src
[1]);
714 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
715 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
716 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
717 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
718 | R500_ALPHA_ADDRD(dest
)
719 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
720 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
721 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
722 | R500_ALU_RGBA_ADDRD(dest
)
723 // | R500_ALU_RGBA_SEL_C_SRC2
724 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
725 // | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
726 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
729 /* POW(a,b) = EX2(LN2(a)*b) */
730 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
731 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
732 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| R500_INST_TEX_SEM_WAIT
733 | (R500_WRITEMASK_ARGB
<< 11);
734 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
735 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
736 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
737 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
738 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_LN2
739 | R500_ALPHA_ADDRD(get_temp(fp
, 0))
740 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
741 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
742 | R500_ALU_RGBA_ADDRD(get_temp(fp
, 0));
744 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| (R500_WRITEMASK_ARGB
<< 11);
745 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(fp
, 0))
746 | R500_RGB_ADDR1(src
[1]);
747 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(fp
, 0))
748 | R500_ALPHA_ADDR1(src
[1]);
749 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
750 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
751 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
752 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
753 | R500_ALPHA_ADDRD(get_temp(fp
, 1))
754 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
755 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
756 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
757 | R500_ALU_RGBA_ADDRD(get_temp(fp
, 1))
758 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
759 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
761 emit_alu(fp
, counter
, fpi
);
762 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(fp
, 1));
763 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(fp
, 1));
764 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
765 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
766 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_EX2
767 | R500_ALPHA_ADDRD(dest
)
768 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
769 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
770 | R500_ALU_RGBA_ADDRD(dest
);
773 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
774 emit_alu(fp
, counter
, fpi
);
775 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
776 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
777 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
778 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
779 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_RCP
780 | R500_ALPHA_ADDRD(dest
)
781 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
782 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
783 | R500_ALU_RGBA_ADDRD(dest
);
786 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
787 emit_alu(fp
, counter
, fpi
);
788 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
789 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
790 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
791 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
792 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_RSQ
793 | R500_ALPHA_ADDRD(dest
)
794 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
795 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
796 | R500_ALU_RGBA_ADDRD(dest
);
799 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
800 src
[1] = emit_const4fv(fp
, RCP_2PI
);
801 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| R500_INST_TEX_SEM_WAIT
802 | (R500_WRITEMASK_ARGB
<< 11);
803 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
804 | R500_RGB_ADDR1(src
[1]);
805 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
806 | R500_ALPHA_ADDR1(src
[1]);
807 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
808 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
)
809 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB
);
810 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
811 | R500_ALPHA_ADDRD(get_temp(fp
, 0))
812 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SWIZ_A_A
813 | R500_ALPHA_SEL_B_SRC1
| R500_ALPHA_SWIZ_B_A
;
814 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
815 | R500_ALU_RGBA_ADDRD(get_temp(fp
, 0))
816 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
817 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
819 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| (R500_WRITEMASK_ARGB
<< 11);
820 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(fp
, 0));
821 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(fp
, 0));
822 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
823 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
);
824 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_FRC
825 | R500_ALPHA_ADDRD(get_temp(fp
, 1))
826 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SWIZ_A_A
;
827 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_FRC
828 | R500_ALU_RGBA_ADDRD(get_temp(fp
, 1));
830 /* Do a cosine, then a sine, masking out the channels we want to protect. */
831 /* Cosine only goes in R (x) channel. */
832 fpi
->DstReg
.WriteMask
= 0x1;
833 emit_alu(fp
, counter
, fpi
);
834 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(fp
, 1));
835 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(fp
, 1));
836 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
837 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
838 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_COS
839 | R500_ALPHA_ADDRD(dest
)
840 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
841 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
842 | R500_ALU_RGBA_ADDRD(dest
);
844 /* Sine only goes in G (y) channel. */
845 fpi
->DstReg
.WriteMask
= 0x2;
846 emit_alu(fp
, counter
, fpi
);
847 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(fp
, 1));
848 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(fp
, 1));
849 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
850 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
851 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_SIN
852 | R500_ALPHA_ADDRD(dest
)
853 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
854 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
855 | R500_ALU_RGBA_ADDRD(dest
);
858 /* We use SRCP, so as a precaution we're
859 * going to set NOP in previous inst, if possible. */
860 /* This inst's selects need to be swapped as follows:
861 * 0 -> C ; 1 -> B ; 2 -> A */
862 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
863 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
864 emit_alu(fp
, counter
, fpi
);
865 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
866 | R500_RGB_ADDR1(src
[1])
867 | R500_RGB_SRCP_OP_RGB1_MINUS_RGB0
;
868 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
869 | R500_ALPHA_ADDR1(src
[1])
870 | R500_ALPHA_SRCP_OP_A1_MINUS_A0
;
871 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
872 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE
)
873 | R500_ALU_RGB_SEL_B_SRC1
874 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO
);
875 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_CMP
876 | R500_ALPHA_ADDRD(dest
)
877 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
878 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO
);
879 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
880 | R500_ALU_RGBA_ADDRD(dest
)
881 | R500_ALU_RGBA_SEL_C_SRCP
882 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[0]))
883 | R500_ALU_RGBA_ALPHA_SEL_C_SRCP
884 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[0]));
887 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
888 src
[1] = emit_const4fv(fp
, RCP_2PI
);
889 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| R500_INST_TEX_SEM_WAIT
890 | (R500_WRITEMASK_ARGB
<< 11);
891 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
892 | R500_RGB_ADDR1(src
[1]);
893 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
894 | R500_ALPHA_ADDR1(src
[1]);
895 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
896 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
)
897 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB
);
898 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
899 | R500_ALPHA_ADDRD(get_temp(fp
, 0))
900 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SWIZ_A_A
901 | R500_ALPHA_SEL_B_SRC1
| R500_ALPHA_SWIZ_B_A
;
902 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
903 | R500_ALU_RGBA_ADDRD(get_temp(fp
, 0))
904 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
905 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
907 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| (R500_WRITEMASK_ARGB
<< 11);
908 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(fp
, 0));
909 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(fp
, 0));
910 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
911 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
);
912 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_FRC
913 | R500_ALPHA_ADDRD(get_temp(fp
, 1))
914 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SWIZ_A_A
;
915 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_FRC
916 | R500_ALU_RGBA_ADDRD(get_temp(fp
, 1));
918 emit_alu(fp
, counter
, fpi
);
919 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(fp
, 1));
920 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(fp
, 1));
921 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
;
922 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_SIN
923 | R500_ALPHA_ADDRD(dest
)
924 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
925 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
926 | R500_ALU_RGBA_ADDRD(dest
);
929 /* We use SRCP, so as a precaution we're
930 * going to set NOP in previous inst, if possible. */
931 /* This inst's selects need to be swapped as follows:
932 * 0 -> C ; 1 -> B ; 2 -> A */
933 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
934 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
935 emit_alu(fp
, counter
, fpi
);
936 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
937 | R500_RGB_ADDR1(src
[1])
938 | R500_RGB_SRCP_OP_RGB1_MINUS_RGB0
;
939 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
940 | R500_ALPHA_ADDR1(src
[1])
941 | R500_ALPHA_SRCP_OP_A1_MINUS_A0
;
942 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
943 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO
)
944 | R500_ALU_RGB_SEL_B_SRC1
945 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE
);
946 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_CMP
947 | R500_ALPHA_ADDRD(dest
)
948 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO
)
949 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE
);
950 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
951 | R500_ALU_RGBA_ADDRD(dest
)
952 | R500_ALU_RGBA_SEL_C_SRCP
953 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[0]))
954 | R500_ALU_RGBA_ALPHA_SEL_C_SRCP
955 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[0]));
958 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
959 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
960 /* Variation on MAD: 1*src0-src1 */
961 emit_alu(fp
, counter
, fpi
);
962 fp
->inst
[counter
].inst1
= R500_RGB_ADDR1(src
[0])
963 | R500_RGB_ADDR2(src
[1]);
964 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR1(src
[0])
965 | R500_ALPHA_ADDR2(src
[1]);
966 fp
->inst
[counter
].inst3
= /* 1 */
967 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE
)
968 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[0]));
969 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
970 | R500_ALPHA_ADDRD(dest
)
971 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
972 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[0]));
973 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
974 | R500_ALU_RGBA_ADDRD(dest
)
975 | R500_ALU_RGBA_SEL_C_SRC2
976 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[1]))
977 | R500_ALU_RGBA_MOD_C_NEG
978 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
979 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[1]))
980 | R500_ALU_RGBA_ALPHA_MOD_C_NEG
;
983 /* TODO: Negation masks! */
984 emit_alu(fp
, counter
, fpi
);
985 emit_mov(fp
, counter
, fpi
->SrcReg
[0], dest
);
988 emit_tex(fp
, fpi
, OPCODE_TEX
, dest
, counter
);
989 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
)
993 emit_tex(fp
, fpi
, OPCODE_TXB
, dest
, counter
);
994 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
)
998 emit_tex(fp
, fpi
, OPCODE_TXP
, dest
, counter
);
999 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
)
1003 ERROR("unknown fpi->Opcode %s\n", _mesa_opcode_string(fpi
->Opcode
));
1007 /* Finishing touches */
1008 if (fpi
->SaturateMode
== SATURATE_ZERO_ONE
) {
1009 fp
->inst
[counter
].inst0
|= R500_INST_RGB_CLAMP
| R500_INST_ALPHA_CLAMP
;
1019 /* Finish him! (If it's an ALU/OUT instruction...) */
1020 if ((fp
->inst
[counter
-1].inst0
& 0x3) == 1) {
1021 fp
->inst
[counter
-1].inst0
|= R500_INST_LAST
;
1023 /* We still need to put an output inst, right? */
1024 WARN_ONCE("Final FP instruction is not an OUT.\n");
1030 fp
->cs
->nrslots
= counter
;
1037 static void init_program(r300ContextPtr r300
, struct r500_fragment_program
*fp
)
1039 struct r300_pfs_compile_state
*cs
= NULL
;
1040 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
1041 struct prog_instruction
*fpi
;
1042 GLuint InputsRead
= mp
->Base
.InputsRead
;
1043 GLuint temps_used
= 0;
1046 /* New compile, reset tracking data */
1048 driQueryOptioni(&r300
->radeon
.optionCache
, "fp_optimization");
1049 fp
->translated
= GL_FALSE
;
1050 fp
->error
= GL_FALSE
;
1051 fp
->cs
= cs
= &(R300_CONTEXT(fp
->ctx
)->state
.pfs_compile
);
1053 fp
->first_node_has_tex
= 0;
1055 /* Size of pixel stack, plus 1. */
1056 fp
->max_temp_idx
= 1;
1057 /* Temp register offset. */
1058 fp
->temp_reg_offset
= 0;
1059 fp
->node
[0].alu_end
= -1;
1060 fp
->node
[0].tex_end
= -1;
1062 _mesa_memset(cs
, 0, sizeof(*fp
->cs
));
1063 for (i
= 0; i
< PFS_MAX_ALU_INST
; i
++) {
1064 for (j
= 0; j
< 3; j
++) {
1065 cs
->slot
[i
].vsrc
[j
] = SRC_CONST
;
1066 cs
->slot
[i
].ssrc
[j
] = SRC_CONST
;
1070 /* Work out what temps the Mesa inputs correspond to, this must match
1071 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
1072 * configures itself based on the fragprog's InputsRead
1074 * NOTE: this depends on get_hw_temp() allocating registers in order,
1075 * starting from register 0, so we're just going to do that instead.
1078 /* Texcoords come first */
1079 for (i
= 0; i
< fp
->ctx
->Const
.MaxTextureUnits
; i
++) {
1080 if (InputsRead
& (FRAG_BIT_TEX0
<< i
)) {
1081 cs
->inputs
[FRAG_ATTRIB_TEX0
+ i
].refcount
= 0;
1082 cs
->inputs
[FRAG_ATTRIB_TEX0
+ i
].reg
=
1083 fp
->temp_reg_offset
;
1084 fp
->temp_reg_offset
++;
1087 InputsRead
&= ~FRAG_BITS_TEX_ANY
;
1089 /* fragment position treated as a texcoord */
1090 if (InputsRead
& FRAG_BIT_WPOS
) {
1091 cs
->inputs
[FRAG_ATTRIB_WPOS
].refcount
= 0;
1092 cs
->inputs
[FRAG_ATTRIB_WPOS
].reg
=
1093 fp
->temp_reg_offset
;
1094 fp
->temp_reg_offset
++;
1096 InputsRead
&= ~FRAG_BIT_WPOS
;
1098 /* Then primary colour */
1099 if (InputsRead
& FRAG_BIT_COL0
) {
1100 cs
->inputs
[FRAG_ATTRIB_COL0
].refcount
= 0;
1101 cs
->inputs
[FRAG_ATTRIB_COL0
].reg
=
1102 fp
->temp_reg_offset
;
1103 fp
->temp_reg_offset
++;
1105 InputsRead
&= ~FRAG_BIT_COL0
;
1107 /* Secondary color */
1108 if (InputsRead
& FRAG_BIT_COL1
) {
1109 cs
->inputs
[FRAG_ATTRIB_COL1
].refcount
= 0;
1110 cs
->inputs
[FRAG_ATTRIB_COL1
].reg
=
1111 fp
->temp_reg_offset
;
1112 fp
->temp_reg_offset
++;
1114 InputsRead
&= ~FRAG_BIT_COL1
;
1118 WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead
);
1119 /* force read from hwreg 0 for now */
1120 for (i
= 0; i
< 32; i
++)
1121 if (InputsRead
& (1 << i
))
1122 cs
->inputs
[i
].reg
= 0;
1125 if (!mp
->Base
.Instructions
) {
1126 ERROR("No instructions found in program, going to go die now.\n");
1130 for (fpi
= mp
->Base
.Instructions
; fpi
->Opcode
!= OPCODE_END
; fpi
++) {
1131 for (i
= 0; i
< 3; i
++) {
1132 if (fpi
->SrcReg
[i
].File
== PROGRAM_TEMPORARY
) {
1133 if (fpi
->SrcReg
[i
].Index
> temps_used
)
1134 temps_used
= fpi
->SrcReg
[i
].Index
;
1139 cs
->temp_in_use
= temps_used
;
1141 fp
->max_temp_idx
= fp
->temp_reg_offset
+ cs
->temp_in_use
+ 1;
1144 static void update_params(struct r500_fragment_program
*fp
)
1146 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
1148 /* Ask Mesa nicely to fill in ParameterValues for us */
1149 if (mp
->Base
.Parameters
)
1150 _mesa_load_state_parameters(fp
->ctx
, mp
->Base
.Parameters
);
1153 static void dumb_shader(struct r500_fragment_program
*fp
)
1155 fp
->inst
[0].inst0
= R500_INST_TYPE_TEX
1156 | R500_INST_TEX_SEM_WAIT
1157 | R500_INST_RGB_WMASK_R
1158 | R500_INST_RGB_WMASK_G
1159 | R500_INST_RGB_WMASK_B
1160 | R500_INST_ALPHA_WMASK
1161 | R500_INST_RGB_CLAMP
1162 | R500_INST_ALPHA_CLAMP
;
1163 fp
->inst
[0].inst1
= R500_TEX_ID(0)
1165 | R500_TEX_SEM_ACQUIRE
1166 | R500_TEX_IGNORE_UNCOVERED
;
1167 fp
->inst
[0].inst2
= R500_TEX_SRC_ADDR(0)
1168 | R500_TEX_SRC_S_SWIZ_R
1169 | R500_TEX_SRC_T_SWIZ_G
1170 | R500_TEX_DST_ADDR(0)
1171 | R500_TEX_DST_R_SWIZ_R
1172 | R500_TEX_DST_G_SWIZ_G
1173 | R500_TEX_DST_B_SWIZ_B
1174 | R500_TEX_DST_A_SWIZ_A
;
1175 fp
->inst
[0].inst3
= R500_DX_ADDR(0)
1185 fp
->inst
[0].inst4
= 0x0;
1186 fp
->inst
[0].inst5
= 0x0;
1188 fp
->inst
[1].inst0
= R500_INST_TYPE_OUT
|
1189 R500_INST_TEX_SEM_WAIT
|
1191 R500_INST_RGB_OMASK_R
|
1192 R500_INST_RGB_OMASK_G
|
1193 R500_INST_RGB_OMASK_B
|
1194 R500_INST_ALPHA_OMASK
;
1195 fp
->inst
[1].inst1
= R500_RGB_ADDR0(0) |
1197 R500_RGB_ADDR1_CONST
|
1199 R500_RGB_ADDR2_CONST
|
1200 R500_RGB_SRCP_OP_1_MINUS_2RGB0
;
1201 fp
->inst
[1].inst2
= R500_ALPHA_ADDR0(0) |
1202 R500_ALPHA_ADDR1(0) |
1203 R500_ALPHA_ADDR1_CONST
|
1204 R500_ALPHA_ADDR2(0) |
1205 R500_ALPHA_ADDR2_CONST
|
1206 R500_ALPHA_SRCP_OP_1_MINUS_2A0
;
1207 fp
->inst
[1].inst3
= R500_ALU_RGB_SEL_A_SRC0
|
1208 R500_ALU_RGB_R_SWIZ_A_R
|
1209 R500_ALU_RGB_G_SWIZ_A_G
|
1210 R500_ALU_RGB_B_SWIZ_A_B
|
1211 R500_ALU_RGB_SEL_B_SRC0
|
1212 R500_ALU_RGB_R_SWIZ_B_1
|
1213 R500_ALU_RGB_B_SWIZ_B_1
|
1214 R500_ALU_RGB_G_SWIZ_B_1
;
1215 fp
->inst
[1].inst4
= R500_ALPHA_OP_MAD
|
1216 R500_ALPHA_SWIZ_A_A
|
1217 R500_ALPHA_SWIZ_B_1
;
1218 fp
->inst
[1].inst5
= R500_ALU_RGBA_OP_MAD
|
1219 R500_ALU_RGBA_R_SWIZ_0
|
1220 R500_ALU_RGBA_G_SWIZ_0
|
1221 R500_ALU_RGBA_B_SWIZ_0
|
1222 R500_ALU_RGBA_A_SWIZ_0
;
1224 fp
->cs
->nrslots
= 2;
1225 fp
->translated
= GL_TRUE
;
1228 void r500TranslateFragmentShader(r300ContextPtr r300
,
1229 struct r500_fragment_program
*fp
)
1232 struct r300_pfs_compile_state
*cs
= NULL
;
1234 if (!fp
->translated
) {
1236 init_program(r300
, fp
);
1239 if (parse_program(fp
) == GL_FALSE
) {
1240 ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n");
1242 fp
->inst_offset
= 0;
1243 fp
->inst_end
= cs
->nrslots
- 1;
1246 fp
->inst_offset
= 0;
1247 fp
->inst_end
= cs
->nrslots
- 1;
1249 fp
->translated
= GL_TRUE
;
1250 if (1 || RADEON_DEBUG
& DEBUG_PIXEL
) {
1251 fprintf(stderr
, "Mesa program:\n");
1252 fprintf(stderr
, "-------------\n");
1253 _mesa_print_program(&fp
->mesa_program
.Base
);
1259 r300UpdateStateParameters(fp
->ctx
, _NEW_PROGRAM
);
1266 static char *toswiz(int swiz_val
) {
1273 case 5: return "1/2";
1280 static char *toop(int op_val
)
1284 case 0: str
= "MAD"; break;
1285 case 1: str
= "DP3"; break;
1286 case 2: str
= "DP4"; break;
1287 case 3: str
= "D2A"; break;
1288 case 4: str
= "MIN"; break;
1289 case 5: str
= "MAX"; break;
1290 case 6: str
= "Reserved"; break;
1291 case 7: str
= "CND"; break;
1292 case 8: str
= "CMP"; break;
1293 case 9: str
= "FRC"; break;
1294 case 10: str
= "SOP"; break;
1295 case 11: str
= "MDH"; break;
1296 case 12: str
= "MDV"; break;
1301 static char *to_alpha_op(int op_val
)
1305 case 0: str
= "MAD"; break;
1306 case 1: str
= "DP"; break;
1307 case 2: str
= "MIN"; break;
1308 case 3: str
= "MAX"; break;
1309 case 4: str
= "Reserved"; break;
1310 case 5: str
= "CND"; break;
1311 case 6: str
= "CMP"; break;
1312 case 7: str
= "FRC"; break;
1313 case 8: str
= "EX2"; break;
1314 case 9: str
= "LN2"; break;
1315 case 10: str
= "RCP"; break;
1316 case 11: str
= "RSQ"; break;
1317 case 12: str
= "SIN"; break;
1318 case 13: str
= "COS"; break;
1319 case 14: str
= "MDH"; break;
1320 case 15: str
= "MDV"; break;
1325 static char *to_mask(int val
)
1329 case 0: str
= "NONE"; break;
1330 case 1: str
= "R"; break;
1331 case 2: str
= "G"; break;
1332 case 3: str
= "RG"; break;
1333 case 4: str
= "B"; break;
1334 case 5: str
= "RB"; break;
1335 case 6: str
= "GB"; break;
1336 case 7: str
= "RGB"; break;
1337 case 8: str
= "A"; break;
1338 case 9: str
= "AR"; break;
1339 case 10: str
= "AG"; break;
1340 case 11: str
= "ARG"; break;
1341 case 12: str
= "AB"; break;
1342 case 13: str
= "ARB"; break;
1343 case 14: str
= "AGB"; break;
1344 case 15: str
= "ARGB"; break;
1349 static char *to_texop(int val
)
1352 case 0: return "NOP";
1353 case 1: return "LD";
1354 case 2: return "TEXKILL";
1355 case 3: return "PROJ";
1356 case 4: return "LODBIAS";
1357 case 5: return "LOD";
1358 case 6: return "DXDY";
1363 static void dump_program(struct r500_fragment_program
*fp
)
1371 for (n
= 0; n
< fp
->inst_end
+1; n
++) {
1372 inst0
= inst
= fp
->inst
[n
].inst0
;
1373 fprintf(stderr
,"%d\t0:CMN_INST 0x%08x:", n
, inst
);
1374 switch(inst
& 0x3) {
1375 case R500_INST_TYPE_ALU
: str
= "ALU"; break;
1376 case R500_INST_TYPE_OUT
: str
= "OUT"; break;
1377 case R500_INST_TYPE_FC
: str
= "FC"; break;
1378 case R500_INST_TYPE_TEX
: str
= "TEX"; break;
1380 fprintf(stderr
,"%s %s %s %s %s ", str
,
1381 inst
& R500_INST_TEX_SEM_WAIT
? "TEX_WAIT" : "",
1382 inst
& R500_INST_LAST
? "LAST" : "",
1383 inst
& R500_INST_NOP
? "NOP" : "",
1384 inst
& R500_INST_ALU_WAIT
? "ALU WAIT" : "");
1385 fprintf(stderr
,"wmask: %s omask: %s\n", to_mask((inst
>> 11) & 0xf),
1386 to_mask((inst
>> 15) & 0xf));
1388 switch(inst0
& 0x3) {
1391 fprintf(stderr
,"\t1:RGB_ADDR 0x%08x:", fp
->inst
[n
].inst1
);
1392 inst
= fp
->inst
[n
].inst1
;
1394 fprintf(stderr
,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1395 inst
& 0xff, (inst
& (1<<8)) ? 'c' : 't',
1396 (inst
>> 10) & 0xff, (inst
& (1<<18)) ? 'c' : 't',
1397 (inst
>> 20) & 0xff, (inst
& (1<<28)) ? 'c' : 't',
1400 fprintf(stderr
,"\t2:ALPHA_ADDR 0x%08x:", fp
->inst
[n
].inst2
);
1401 inst
= fp
->inst
[n
].inst2
;
1402 fprintf(stderr
,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1403 inst
& 0xff, (inst
& (1<<8)) ? 'c' : 't',
1404 (inst
>> 10) & 0xff, (inst
& (1<<18)) ? 'c' : 't',
1405 (inst
>> 20) & 0xff, (inst
& (1<<28)) ? 'c' : 't',
1407 fprintf(stderr
,"\t3 RGB_INST: 0x%08x:", fp
->inst
[n
].inst3
);
1408 inst
= fp
->inst
[n
].inst3
;
1409 fprintf(stderr
,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n",
1410 (inst
) & 0x3, toswiz((inst
>> 2) & 0x7), toswiz((inst
>> 5) & 0x7), toswiz((inst
>> 8) & 0x7),
1412 (inst
>> 13) & 0x3, toswiz((inst
>> 15) & 0x7), toswiz((inst
>> 18) & 0x7), toswiz((inst
>> 21) & 0x7),
1413 (inst
>> 24) & 0x3);
1416 fprintf(stderr
,"\t4 ALPHA_INST:0x%08x:", fp
->inst
[n
].inst4
);
1417 inst
= fp
->inst
[n
].inst4
;
1418 fprintf(stderr
,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d\n", to_alpha_op(inst
& 0xf),
1419 (inst
>> 4) & 0x7f, inst
& (1<<11) ? "(rel)":"",
1420 (inst
>> 12) & 0x3, toswiz((inst
>> 14) & 0x7), (inst
>> 17) & 0x3,
1421 (inst
>> 19) & 0x3, toswiz((inst
>> 21) & 0x7), (inst
>> 24) & 0x3);
1423 fprintf(stderr
,"\t5 RGBA_INST: 0x%08x:", fp
->inst
[n
].inst5
);
1424 inst
= fp
->inst
[n
].inst5
;
1425 fprintf(stderr
,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst
& 0xf),
1426 (inst
>> 4) & 0x7f, inst
& (1<<11) ? "(rel)":"",
1427 (inst
>> 12) & 0x3, toswiz((inst
>> 14) & 0x7), toswiz((inst
>> 17) & 0x7), toswiz((inst
>> 20) & 0x7),
1429 (inst
>> 25) & 0x3, toswiz((inst
>> 27) & 0x7), (inst
>> 30) & 0x3);
1434 inst
= fp
->inst
[n
].inst1
;
1435 fprintf(stderr
,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst
, (inst
>> 16) & 0xf,
1436 to_texop((inst
>> 22) & 0x7), (inst
& (1<<25)) ? "ACQ" : "",
1437 (inst
& (1<<26)) ? "IGNUNC" : "", (inst
& (1<<27)) ? "UNSCALED" : "SCALED");
1438 inst
= fp
->inst
[n
].inst2
;
1439 fprintf(stderr
,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst
,
1440 inst
& 127, inst
& (1<<7) ? "(rel)" : "",
1441 toswiz((inst
>> 8) & 0x3), toswiz((inst
>> 10) & 0x3),
1442 toswiz((inst
>> 12) & 0x3), toswiz((inst
>> 14) & 0x3),
1443 (inst
>> 16) & 127, inst
& (1<<23) ? "(rel)" : "",
1444 toswiz((inst
>> 24) & 0x3), toswiz((inst
>> 26) & 0x3),
1445 toswiz((inst
>> 28) & 0x3), toswiz((inst
>> 30) & 0x3));
1447 fprintf(stderr
,"\t3:TEX_DXDY: 0x%08x\n", fp
->inst
[n
].inst3
);
1450 fprintf(stderr
,"\n");