2 * Copyright (C) 2005 Ben Skeggs.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 * \author Ben Skeggs <darktama@iinet.net.au>
33 * \author Jerome Glisse <j.glisse@gmail.com>
35 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
37 * \todo Depth write, WPOS/FOGC inputs
41 * \todo Verify results of opcodes for accuracy, I've only checked them in
48 #include "shader/prog_instruction.h"
49 #include "shader/prog_parameter.h"
50 #include "shader/prog_print.h"
52 #include "r300_context.h"
53 #include "r500_fragprog.h"
55 #include "r300_state.h"
58 * Useful macros and values
60 #define ERROR(fmt, args...) do { \
61 fprintf(stderr, "%s::%s(): " fmt "\n", \
62 __FILE__, __FUNCTION__, ##args); \
63 fp->error = GL_TRUE; \
66 #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
68 #define R500_US_NUM_TEMP_REGS 128
69 #define R500_US_NUM_CONST_REGS 256
71 /* "Register" flags */
72 #define REG_CONSTANT (1 << 8)
73 #define REG_SRC_REL (1 << 9)
74 #define REG_DEST_REL (1 << 7)
77 #define R500_SWIZZLE_ZERO 4
78 #define R500_SWIZZLE_HALF 5
79 #define R500_SWIZZLE_ONE 6
80 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
81 #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
82 #define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6))
83 /* Swizzles for inst2 */
84 #define MAKE_SWIZ_TEX_STRQ(x) (x << 8)
85 #define MAKE_SWIZ_TEX_RGBA(x) (x << 24)
86 /* Swizzles for inst3 */
87 #define MAKE_SWIZ_RGB_A(x) (x << 2)
88 #define MAKE_SWIZ_RGB_B(x) (x << 15)
89 /* Swizzles for inst4 */
90 #define MAKE_SWIZ_ALPHA_A(x) (x << 14)
91 #define MAKE_SWIZ_ALPHA_B(x) (x << 21)
92 /* Swizzle for inst5 */
93 #define MAKE_SWIZ_RGBA_C(x) (x << 14)
94 #define MAKE_SWIZ_ALPHA_C(x) (x << 27)
97 #define R500_WRITEMASK_ARGB 0xF
99 /* 1/(2pi), needed for quick modulus in trig insts
100 * Thanks to glisse for pointing out how to do it! */
101 static const GLfloat RCP_2PI
[] = {0.15915494309189535,
104 0.15915494309189535};
106 static void dump_program(struct r500_fragment_program
*fp
);
108 static inline GLuint
make_rgb_swizzle(struct prog_src_register src
) {
111 /* This could be optimized, but it should be plenty fast already. */
113 for (i
= 0; i
< 3; i
++) {
114 temp
= GET_SWZ(src
.Swizzle
, i
);
115 /* Fix SWIZZLE_ONE */
116 if (temp
== 5) temp
++;
122 static inline GLuint
make_alpha_swizzle(struct prog_src_register src
) {
123 GLuint swiz
= GET_SWZ(src
.Swizzle
, 3);
125 if (swiz
== 5) swiz
++;
129 static inline GLuint
make_sop_swizzle(struct prog_src_register src
) {
130 GLuint swiz
= GET_SWZ(src
.Swizzle
, 0);
132 if (swiz
== 5) swiz
++;
136 static inline GLuint
make_strq_swizzle(struct prog_src_register src
) {
138 GLuint temp
= src
.Swizzle
;
140 for (i
= 0; i
< 4; i
++) {
141 swiz
+= (temp
& 0x3) << i
*2;
147 static int get_temp(struct r500_fragment_program
*fp
, int slot
) {
151 int r
= cs
->temp_in_use
+ 1 + slot
;
153 if (r
> R500_US_NUM_TEMP_REGS
) {
154 ERROR("Too many temporary registers requested, can't compile!\n");
160 /* Borrowed verbatim from r300_fragprog since it hasn't changed. */
161 static GLuint
emit_const4fv(struct r500_fragment_program
*fp
,
167 for (index
= 0; index
< fp
->const_nr
; ++index
) {
168 if (fp
->constant
[index
] == cp
)
172 if (index
>= fp
->const_nr
) {
173 if (index
>= R500_US_NUM_CONST_REGS
) {
174 ERROR("Out of hw constants!\n");
179 fp
->constant
[index
] = cp
;
182 reg
= index
| REG_CONSTANT
;
186 static GLuint
make_src(struct r500_fragment_program
*fp
, struct prog_src_register src
) {
190 case PROGRAM_TEMPORARY
:
191 reg
= src
.Index
+ fp
->temp_reg_offset
;
194 reg
= cs
->inputs
[src
.Index
].reg
;
196 case PROGRAM_LOCAL_PARAM
:
197 reg
= emit_const4fv(fp
,
198 fp
->mesa_program
.Base
.LocalParams
[src
.
201 case PROGRAM_ENV_PARAM
:
202 reg
= emit_const4fv(fp
,
203 fp
->ctx
->FragmentProgram
.Parameters
[src
.
206 case PROGRAM_STATE_VAR
:
207 case PROGRAM_NAMED_PARAM
:
208 case PROGRAM_CONSTANT
:
209 reg
= emit_const4fv(fp
, fp
->mesa_program
.Base
.Parameters
->
210 ParameterValues
[src
.Index
]);
213 ERROR("Can't handle src.File %x\n", src
.File
);
220 static GLuint
make_dest(struct r500_fragment_program
*fp
, struct prog_dst_register dest
) {
223 case PROGRAM_TEMPORARY
:
224 reg
= dest
.Index
+ fp
->temp_reg_offset
;
227 /* Eventually we may need to handle multiple
228 * rendering targets... */
232 ERROR("Can't handle dest.File %x\n", dest
.File
);
239 static void emit_tex(struct r500_fragment_program
*fp
,
240 struct prog_instruction
*fpi
, int opcode
, int dest
, int counter
)
245 mask
= fpi
->DstReg
.WriteMask
<< 11;
246 hwsrc
= make_src(fp
, fpi
->SrcReg
[0]);
248 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
) {
249 hwdest
= get_temp(fp
, 0);
254 fp
->inst
[counter
].inst0
= R500_INST_TYPE_TEX
| mask
255 | R500_INST_TEX_SEM_WAIT
;
257 fp
->inst
[counter
].inst1
= R500_TEX_ID(fpi
->TexSrcUnit
)
258 | R500_TEX_SEM_ACQUIRE
| R500_TEX_IGNORE_UNCOVERED
;
260 if (fpi
->TexSrcTarget
== TEXTURE_RECT_INDEX
)
261 fp
->inst
[counter
].inst1
|= R500_TEX_UNSCALED
;
265 fp
->inst
[counter
].inst1
|= R500_TEX_INST_TEXKILL
;
268 fp
->inst
[counter
].inst1
|= R500_TEX_INST_LD
;
271 fp
->inst
[counter
].inst1
|= R500_TEX_INST_LODBIAS
;
274 fp
->inst
[counter
].inst1
|= R500_TEX_INST_PROJ
;
277 ERROR("emit_tex can't handle opcode %x\n", opcode
);
280 fp
->inst
[counter
].inst2
= R500_TEX_SRC_ADDR(hwsrc
)
281 /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */
282 | R500_TEX_SRC_S_SWIZ_R
| R500_TEX_SRC_T_SWIZ_G
283 | R500_TEX_SRC_R_SWIZ_B
| R500_TEX_SRC_Q_SWIZ_A
284 | R500_TEX_DST_ADDR(hwdest
)
285 | R500_TEX_DST_R_SWIZ_R
| R500_TEX_DST_G_SWIZ_G
286 | R500_TEX_DST_B_SWIZ_B
| R500_TEX_DST_A_SWIZ_A
;
288 fp
->inst
[counter
].inst3
= 0x0;
289 fp
->inst
[counter
].inst4
= 0x0;
290 fp
->inst
[counter
].inst5
= 0x0;
292 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
) {
294 fp
->inst
[counter
].inst0
= R500_INST_TYPE_OUT
295 | R500_INST_TEX_SEM_WAIT
| (mask
<< 4);
296 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(fp
, 0));
297 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(fp
, 0));
298 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
299 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
)
300 | R500_ALU_RGB_SEL_B_SRC0
301 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB
)
302 | R500_ALU_RGB_OMOD_DISABLE
;
303 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_CMP
304 | R500_ALPHA_ADDRD(dest
)
305 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_ALPHA_SWIZ_A_A
)
306 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(R500_ALPHA_SWIZ_A_A
)
307 | R500_ALPHA_OMOD_DISABLE
;
308 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
309 | R500_ALU_RGBA_ADDRD(dest
)
310 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
311 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
315 static void emit_alu(struct r500_fragment_program
*fp
, int counter
, struct prog_instruction
*fpi
) {
316 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
) {
317 fp
->inst
[counter
].inst0
= R500_INST_TYPE_OUT
;
319 if (fpi
->DstReg
.Index
== FRAG_RESULT_COLR
)
320 fp
->inst
[counter
].inst0
|= (fpi
->DstReg
.WriteMask
<< 15);
322 if (fpi
->DstReg
.Index
== FRAG_RESULT_DEPR
)
323 fp
->inst
[counter
].inst4
= R500_ALPHA_W_OMASK
;
325 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
327 | (fpi
->DstReg
.WriteMask
<< 11);
330 fp
->inst
[counter
].inst0
|= R500_INST_TEX_SEM_WAIT
;
333 static void emit_mov(struct r500_fragment_program
*fp
, int counter
, struct prog_src_register src
, GLuint dest
) {
334 /* The r3xx shader uses MAD to implement MOV. We are using CMP, since
335 * it is technically more accurate and recommended by ATI/AMD. */
336 GLuint src_reg
= make_src(fp
, src
);
337 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src_reg
);
338 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src_reg
);
339 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
340 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(src
))
341 | R500_ALU_RGB_SEL_B_SRC0
342 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(src
))
343 | R500_ALU_RGB_OMOD_DISABLE
;
344 fp
->inst
[counter
].inst4
|= R500_ALPHA_OP_CMP
345 | R500_ALPHA_ADDRD(dest
)
346 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(src
))
347 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(src
))
348 | R500_ALPHA_OMOD_DISABLE
;
349 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
350 | R500_ALU_RGBA_ADDRD(dest
)
351 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
352 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
355 static GLboolean
parse_program(struct r500_fragment_program
*fp
)
357 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
358 const struct prog_instruction
*inst
= mp
->Base
.Instructions
;
359 struct prog_instruction
*fpi
;
360 GLuint src
[3], dest
= 0;
361 int temp_swiz
, counter
= 0;
363 if (!inst
|| inst
[0].Opcode
== OPCODE_END
) {
364 ERROR("The program is empty!\n");
368 for (fpi
= mp
->Base
.Instructions
; fpi
->Opcode
!= OPCODE_END
; fpi
++) {
370 if (fpi
->Opcode
!= OPCODE_KIL
) {
371 dest
= make_dest(fp
, fpi
->DstReg
);
374 switch (fpi
->Opcode
) {
376 emit_alu(fp
, counter
, fpi
);
377 emit_mov(fp
, counter
, fpi
->SrcReg
[0], dest
);
378 fp
->inst
[counter
].inst3
|= R500_ALU_RGB_MOD_A_ABS
379 | R500_ALU_RGB_MOD_B_ABS
;
380 fp
->inst
[counter
].inst4
|= R500_ALPHA_MOD_A_ABS
381 | R500_ALPHA_MOD_B_ABS
;
384 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
385 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
386 /* Variation on MAD: 1*src0+src1 */
387 emit_alu(fp
, counter
, fpi
);
388 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
389 | R500_RGB_ADDR1(src
[1]) | R500_RGB_ADDR2(0);
390 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
391 | R500_ALPHA_ADDR1(src
[1]) | R500_ALPHA_ADDR2(0);
392 fp
->inst
[counter
].inst3
= /* 1 */
393 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE
)
394 | R500_ALU_RGB_SEL_B_SRC0
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[0]));
395 fp
->inst
[counter
].inst4
|= R500_ALPHA_OP_MAD
396 | R500_ALPHA_ADDRD(dest
)
397 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
398 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[0]));
399 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
400 | R500_ALU_RGBA_ADDRD(dest
)
401 | R500_ALU_RGBA_SEL_C_SRC1
402 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[1]))
403 | R500_ALU_RGBA_ALPHA_SEL_C_SRC1
404 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[1]));
407 /* This inst's selects need to be swapped as follows:
408 * 0 -> C ; 1 -> B ; 2 -> A */
409 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
410 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
411 src
[2] = make_src(fp
, fpi
->SrcReg
[2]);
412 emit_alu(fp
, counter
, fpi
);
413 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[2])
414 | R500_RGB_ADDR1(src
[1]) | R500_RGB_ADDR2(src
[0]);
415 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[2])
416 | R500_ALPHA_ADDR1(src
[1]) | R500_ALPHA_ADDR2(src
[0]);
417 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
418 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[2]))
419 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
420 fp
->inst
[counter
].inst4
|= R500_ALPHA_OP_CMP
421 | R500_ALPHA_ADDRD(dest
)
422 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[2]))
423 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
424 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
425 | R500_ALU_RGBA_ADDRD(dest
)
426 | R500_ALU_RGBA_SEL_C_SRC2
427 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[0]))
428 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
429 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[0]));
432 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
433 src
[1] = emit_const4fv(fp
, RCP_2PI
);
434 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| R500_INST_TEX_SEM_WAIT
435 | (R500_WRITEMASK_ARGB
<< 11);
436 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
437 | R500_RGB_ADDR1(src
[1]);
438 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
439 | R500_ALPHA_ADDR1(src
[1]);
440 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
441 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
)
442 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB
);
443 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
444 | R500_ALPHA_ADDRD(get_temp(fp
, 0))
445 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SWIZ_A_A
446 | R500_ALPHA_SEL_B_SRC1
| R500_ALPHA_SWIZ_B_A
;
447 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
448 | R500_ALU_RGBA_ADDRD(get_temp(fp
, 0))
449 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
450 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
452 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| (R500_WRITEMASK_ARGB
<< 11);
453 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(fp
, 0));
454 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(fp
, 0));
455 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
456 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
);
457 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_FRC
458 | R500_ALPHA_ADDRD(get_temp(fp
, 1))
459 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SWIZ_A_A
;
460 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_FRC
461 | R500_ALU_RGBA_ADDRD(get_temp(fp
, 1));
463 emit_alu(fp
, counter
, fpi
);
464 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(fp
, 1));
465 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(fp
, 1));
466 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
;
467 fp
->inst
[counter
].inst4
|= R500_ALPHA_OP_COS
468 | R500_ALPHA_ADDRD(dest
)
469 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
470 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
471 | R500_ALU_RGBA_ADDRD(dest
);
474 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
475 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
476 emit_alu(fp
, counter
, fpi
);
477 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
478 | R500_RGB_ADDR1(src
[1]);
479 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
480 | R500_ALPHA_ADDR1(src
[1]);
481 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
482 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
483 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
484 fp
->inst
[counter
].inst4
|= R500_ALPHA_OP_DP
485 | R500_ALPHA_ADDRD(dest
)
486 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
487 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
488 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_DP3
489 | R500_ALU_RGBA_ADDRD(dest
);
492 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
493 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
495 emit_alu(fp
, counter
, fpi
);
496 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
497 | R500_RGB_ADDR1(src
[1]);
498 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
499 | R500_ALPHA_ADDR1(src
[1]);
500 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
501 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
502 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
503 fp
->inst
[counter
].inst4
|= R500_ALPHA_OP_DP
504 | R500_ALPHA_ADDRD(dest
)
505 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
506 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
507 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_DP4
508 | R500_ALU_RGBA_ADDRD(dest
);
511 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
512 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
514 emit_alu(fp
, counter
, fpi
);
515 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
516 | R500_RGB_ADDR1(src
[1]);
517 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
518 | R500_ALPHA_ADDR1(src
[1]);
519 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
520 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
521 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
522 fp
->inst
[counter
].inst4
|= R500_ALPHA_OP_DP
523 | R500_ALPHA_ADDRD(dest
)
524 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
525 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
526 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_DP4
527 | R500_ALU_RGBA_ADDRD(dest
);
530 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
531 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
532 /* [1, src0.y*src1.y, src0.z, src1.w]
533 * So basically MUL with lotsa swizzling. */
534 emit_alu(fp
, counter
, fpi
);
535 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
536 | R500_RGB_ADDR1(src
[1]);
537 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
538 | R500_ALPHA_ADDR1(src
[1]);
539 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
540 | R500_ALU_RGB_SEL_B_SRC1
;
541 /* Select [1, y, z, 1] */
542 temp_swiz
= (make_rgb_swizzle(fpi
->SrcReg
[0]) & ~0x7) | R500_SWIZZLE_ONE
;
543 fp
->inst
[counter
].inst3
|= MAKE_SWIZ_RGB_A(temp_swiz
);
544 /* Select [1, y, 1, w] */
545 temp_swiz
= (make_rgb_swizzle(fpi
->SrcReg
[0]) & ~0x1c7) | R500_SWIZZLE_ONE
| (R500_SWIZZLE_ONE
<< 6);
546 fp
->inst
[counter
].inst3
|= MAKE_SWIZ_RGB_B(temp_swiz
);
547 fp
->inst
[counter
].inst4
|= R500_ALPHA_OP_MAD
548 | R500_ALPHA_ADDRD(dest
)
549 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
550 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
551 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
552 | R500_ALU_RGBA_ADDRD(dest
)
553 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
554 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
557 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
558 emit_alu(fp
, counter
, fpi
);
559 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
560 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
561 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
562 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
563 fp
->inst
[counter
].inst4
|= R500_ALPHA_OP_EX2
564 | R500_ALPHA_ADDRD(dest
)
565 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
566 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
567 | R500_ALU_RGBA_ADDRD(dest
);
570 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
571 emit_alu(fp
, counter
, fpi
);
572 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
573 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
574 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
575 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
576 fp
->inst
[counter
].inst4
|= R500_ALPHA_OP_FRC
577 | R500_ALPHA_ADDRD(dest
)
578 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
579 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_FRC
580 | R500_ALU_RGBA_ADDRD(dest
);
583 emit_tex(fp
, fpi
, OPCODE_KIL
, dest
, counter
);
586 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
587 emit_alu(fp
, counter
, fpi
);
588 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
589 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
590 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
591 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
592 fp
->inst
[counter
].inst4
|= R500_ALPHA_OP_LN2
593 | R500_ALPHA_ADDRD(dest
)
594 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
595 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
596 | R500_ALU_RGBA_ADDRD(dest
);
599 /* src0 * src1 + INV(src0) * src2
600 * 1) MUL src0, src1, temp
601 * 2) PRE 1-src0; MAD srcp, src2, temp */
602 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
603 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
604 src
[2] = make_src(fp
, fpi
->SrcReg
[2]);
605 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| R500_INST_TEX_SEM_WAIT
606 | R500_INST_NOP
| (R500_WRITEMASK_ARGB
<< 11);
607 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
608 | R500_RGB_ADDR1(src
[1]);
609 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
610 | R500_ALPHA_ADDR1(src
[1]);
611 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
612 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
613 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
614 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
615 | R500_ALPHA_ADDRD(get_temp(fp
, 0))
616 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
617 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
618 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
619 | R500_ALU_RGBA_ADDRD(get_temp(fp
, 0))
620 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
621 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
623 emit_alu(fp
, counter
, fpi
);
624 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
625 | R500_RGB_ADDR1(src
[2])
626 | R500_RGB_ADDR2(get_temp(fp
, 0))
627 | R500_RGB_SRCP_OP_1_MINUS_RGB0
;
628 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
629 | R500_ALPHA_ADDR1(src
[2])
630 | R500_ALPHA_ADDR2(get_temp(fp
, 0))
631 | R500_ALPHA_SRCP_OP_1_MINUS_A0
;
632 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRCP
633 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
634 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB
);
635 fp
->inst
[counter
].inst4
|= R500_ALPHA_OP_MAD
636 | R500_ALPHA_ADDRD(dest
)
637 | R500_ALPHA_SEL_A_SRCP
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
638 | R500_ALPHA_SEL_B_SRC1
| R500_ALPHA_SWIZ_B_A
;
639 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
640 | R500_ALU_RGBA_ADDRD(dest
)
641 | R500_ALU_RGBA_SEL_C_SRC2
| MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[2]))
642 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
643 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[2]));
646 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
647 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
648 src
[2] = make_src(fp
, fpi
->SrcReg
[2]);
649 emit_alu(fp
, counter
, fpi
);
650 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
651 | R500_RGB_ADDR1(src
[1]) | R500_RGB_ADDR2(src
[2]);
652 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
653 | R500_ALPHA_ADDR1(src
[1]) | R500_ALPHA_ADDR2(src
[2]);
654 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
655 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
656 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
657 fp
->inst
[counter
].inst4
|= R500_ALPHA_OP_MAD
658 | R500_ALPHA_ADDRD(dest
)
659 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
660 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
661 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
662 | R500_ALU_RGBA_ADDRD(dest
)
663 | R500_ALU_RGBA_SEL_C_SRC2
664 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[2]))
665 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
666 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[2]));
669 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
670 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
671 emit_alu(fp
, counter
, fpi
);
672 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]) | R500_RGB_ADDR1(src
[1]);
673 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]) | R500_ALPHA_ADDR1(src
[1]);
674 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
675 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
676 | R500_ALU_RGB_SEL_B_SRC1
677 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
678 fp
->inst
[counter
].inst4
|= R500_ALPHA_OP_MAX
679 | R500_ALPHA_ADDRD(dest
)
680 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
681 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
682 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAX
683 | R500_ALU_RGBA_ADDRD(dest
);
686 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
687 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
688 emit_alu(fp
, counter
, fpi
);
689 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]) | R500_RGB_ADDR1(src
[1]);
690 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]) | R500_ALPHA_ADDR1(src
[1]);
691 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
692 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
693 | R500_ALU_RGB_SEL_B_SRC1
694 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
695 fp
->inst
[counter
].inst4
|= R500_ALPHA_OP_MIN
696 | R500_ALPHA_ADDRD(dest
)
697 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
698 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
699 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MIN
700 | R500_ALU_RGBA_ADDRD(dest
);
703 emit_alu(fp
, counter
, fpi
);
704 emit_mov(fp
, counter
, fpi
->SrcReg
[0], dest
);
707 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
708 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
709 /* Variation on MAD: src0*src1+0 */
710 emit_alu(fp
, counter
, fpi
);
711 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
712 | R500_RGB_ADDR1(src
[1]);
713 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
714 | R500_ALPHA_ADDR1(src
[1]);
715 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
716 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
717 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
718 fp
->inst
[counter
].inst4
|= R500_ALPHA_OP_MAD
719 | R500_ALPHA_ADDRD(dest
)
720 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
721 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
722 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
723 | R500_ALU_RGBA_ADDRD(dest
)
724 // | R500_ALU_RGBA_SEL_C_SRC2
725 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
726 // | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
727 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
730 /* POW(a,b) = EX2(LN2(a)*b) */
731 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
732 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
733 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| R500_INST_TEX_SEM_WAIT
734 | (R500_WRITEMASK_ARGB
<< 11);
735 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
736 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
737 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
738 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
739 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_LN2
740 | R500_ALPHA_ADDRD(get_temp(fp
, 0))
741 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
742 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
743 | R500_ALU_RGBA_ADDRD(get_temp(fp
, 0));
745 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| (R500_WRITEMASK_ARGB
<< 11);
746 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(fp
, 0))
747 | R500_RGB_ADDR1(src
[1]);
748 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(fp
, 0))
749 | R500_ALPHA_ADDR1(src
[1]);
750 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
751 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
752 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
753 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
754 | R500_ALPHA_ADDRD(get_temp(fp
, 1))
755 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
756 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
757 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
758 | R500_ALU_RGBA_ADDRD(get_temp(fp
, 1))
759 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
760 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
762 emit_alu(fp
, counter
, fpi
);
763 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(fp
, 1));
764 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(fp
, 1));
765 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
766 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
767 fp
->inst
[counter
].inst4
|= R500_ALPHA_OP_EX2
768 | R500_ALPHA_ADDRD(dest
)
769 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
770 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
771 | R500_ALU_RGBA_ADDRD(dest
);
774 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
775 emit_alu(fp
, counter
, fpi
);
776 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
777 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
778 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
779 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
780 fp
->inst
[counter
].inst4
|= R500_ALPHA_OP_RCP
781 | R500_ALPHA_ADDRD(dest
)
782 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
783 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
784 | R500_ALU_RGBA_ADDRD(dest
);
787 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
788 emit_alu(fp
, counter
, fpi
);
789 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
790 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
791 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
792 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
793 fp
->inst
[counter
].inst4
|= R500_ALPHA_OP_RSQ
794 | R500_ALPHA_ADDRD(dest
)
795 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
796 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
797 | R500_ALU_RGBA_ADDRD(dest
);
800 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
801 src
[1] = emit_const4fv(fp
, RCP_2PI
);
802 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| R500_INST_TEX_SEM_WAIT
803 | (R500_WRITEMASK_ARGB
<< 11);
804 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
805 | R500_RGB_ADDR1(src
[1]);
806 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
807 | R500_ALPHA_ADDR1(src
[1]);
808 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
809 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
)
810 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB
);
811 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
812 | R500_ALPHA_ADDRD(get_temp(fp
, 0))
813 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SWIZ_A_A
814 | R500_ALPHA_SEL_B_SRC1
| R500_ALPHA_SWIZ_B_A
;
815 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
816 | R500_ALU_RGBA_ADDRD(get_temp(fp
, 0))
817 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
818 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
820 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| (R500_WRITEMASK_ARGB
<< 11);
821 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(fp
, 0));
822 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(fp
, 0));
823 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
824 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
);
825 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_FRC
826 | R500_ALPHA_ADDRD(get_temp(fp
, 1))
827 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SWIZ_A_A
;
828 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_FRC
829 | R500_ALU_RGBA_ADDRD(get_temp(fp
, 1));
831 /* Do a cosine, then a sine, masking out the channels we want to protect. */
832 /* Cosine only goes in R (x) channel. */
833 fpi
->DstReg
.WriteMask
= 0x1;
834 emit_alu(fp
, counter
, fpi
);
835 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(fp
, 1));
836 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(fp
, 1));
837 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
838 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
839 fp
->inst
[counter
].inst4
|= R500_ALPHA_OP_COS
840 | R500_ALPHA_ADDRD(dest
)
841 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
842 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
843 | R500_ALU_RGBA_ADDRD(dest
);
845 /* Sine only goes in G (y) channel. */
846 fpi
->DstReg
.WriteMask
= 0x2;
847 emit_alu(fp
, counter
, fpi
);
848 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(fp
, 1));
849 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(fp
, 1));
850 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
851 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
852 fp
->inst
[counter
].inst4
|= R500_ALPHA_OP_SIN
853 | R500_ALPHA_ADDRD(dest
)
854 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
855 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
856 | R500_ALU_RGBA_ADDRD(dest
);
859 /* We use SRCP, so as a precaution we're
860 * going to set NOP in previous inst, if possible. */
861 /* This inst's selects need to be swapped as follows:
862 * 0 -> C ; 1 -> B ; 2 -> A */
863 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
864 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
865 emit_alu(fp
, counter
, fpi
);
866 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
867 | R500_RGB_ADDR1(src
[1])
868 | R500_RGB_SRCP_OP_RGB1_MINUS_RGB0
;
869 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
870 | R500_ALPHA_ADDR1(src
[1])
871 | R500_ALPHA_SRCP_OP_A1_MINUS_A0
;
872 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
873 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE
)
874 | R500_ALU_RGB_SEL_B_SRC1
875 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO
);
876 fp
->inst
[counter
].inst4
|= R500_ALPHA_OP_CMP
877 | R500_ALPHA_ADDRD(dest
)
878 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
879 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO
);
880 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
881 | R500_ALU_RGBA_ADDRD(dest
)
882 | R500_ALU_RGBA_SEL_C_SRCP
883 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[0]))
884 | R500_ALU_RGBA_ALPHA_SEL_C_SRCP
885 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[0]));
888 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
889 src
[1] = emit_const4fv(fp
, RCP_2PI
);
890 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| R500_INST_TEX_SEM_WAIT
891 | (R500_WRITEMASK_ARGB
<< 11);
892 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
893 | R500_RGB_ADDR1(src
[1]);
894 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
895 | R500_ALPHA_ADDR1(src
[1]);
896 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
897 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
)
898 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB
);
899 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
900 | R500_ALPHA_ADDRD(get_temp(fp
, 0))
901 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SWIZ_A_A
902 | R500_ALPHA_SEL_B_SRC1
| R500_ALPHA_SWIZ_B_A
;
903 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
904 | R500_ALU_RGBA_ADDRD(get_temp(fp
, 0))
905 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
906 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
908 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| (R500_WRITEMASK_ARGB
<< 11);
909 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(fp
, 0));
910 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(fp
, 0));
911 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
912 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
);
913 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_FRC
914 | R500_ALPHA_ADDRD(get_temp(fp
, 1))
915 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SWIZ_A_A
;
916 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_FRC
917 | R500_ALU_RGBA_ADDRD(get_temp(fp
, 1));
919 emit_alu(fp
, counter
, fpi
);
920 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(fp
, 1));
921 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(fp
, 1));
922 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
;
923 fp
->inst
[counter
].inst4
|= R500_ALPHA_OP_SIN
924 | R500_ALPHA_ADDRD(dest
)
925 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi
->SrcReg
[0]));
926 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
927 | R500_ALU_RGBA_ADDRD(dest
);
930 /* We use SRCP, so as a precaution we're
931 * going to set NOP in previous inst, if possible. */
932 /* This inst's selects need to be swapped as follows:
933 * 0 -> C ; 1 -> B ; 2 -> A */
934 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
935 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
936 emit_alu(fp
, counter
, fpi
);
937 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
938 | R500_RGB_ADDR1(src
[1])
939 | R500_RGB_SRCP_OP_RGB1_MINUS_RGB0
;
940 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
941 | R500_ALPHA_ADDR1(src
[1])
942 | R500_ALPHA_SRCP_OP_A1_MINUS_A0
;
943 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
944 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO
)
945 | R500_ALU_RGB_SEL_B_SRC1
946 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE
);
947 fp
->inst
[counter
].inst4
|= R500_ALPHA_OP_CMP
948 | R500_ALPHA_ADDRD(dest
)
949 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO
)
950 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE
);
951 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
952 | R500_ALU_RGBA_ADDRD(dest
)
953 | R500_ALU_RGBA_SEL_C_SRCP
954 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[0]))
955 | R500_ALU_RGBA_ALPHA_SEL_C_SRCP
956 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[0]));
959 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
960 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
961 /* Variation on MAD: 1*src0-src1 */
962 emit_alu(fp
, counter
, fpi
);
963 fp
->inst
[counter
].inst1
= R500_RGB_ADDR1(src
[0])
964 | R500_RGB_ADDR2(src
[1]);
965 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR1(src
[0])
966 | R500_ALPHA_ADDR2(src
[1]);
967 fp
->inst
[counter
].inst3
= /* 1 */
968 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE
)
969 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[0]));
970 fp
->inst
[counter
].inst4
|= R500_ALPHA_OP_MAD
971 | R500_ALPHA_ADDRD(dest
)
972 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
973 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[0]));
974 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
975 | R500_ALU_RGBA_ADDRD(dest
)
976 | R500_ALU_RGBA_SEL_C_SRC2
977 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[1]))
978 | R500_ALU_RGBA_MOD_C_NEG
979 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
980 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[1]))
981 | R500_ALU_RGBA_ALPHA_MOD_C_NEG
;
984 /* TODO: Negation masks! */
985 emit_alu(fp
, counter
, fpi
);
986 emit_mov(fp
, counter
, fpi
->SrcReg
[0], dest
);
989 emit_tex(fp
, fpi
, OPCODE_TEX
, dest
, counter
);
990 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
)
994 emit_tex(fp
, fpi
, OPCODE_TXB
, dest
, counter
);
995 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
)
999 emit_tex(fp
, fpi
, OPCODE_TXP
, dest
, counter
);
1000 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
)
1004 ERROR("unknown fpi->Opcode %s\n", _mesa_opcode_string(fpi
->Opcode
));
1008 /* Finishing touches */
1009 if (fpi
->SaturateMode
== SATURATE_ZERO_ONE
) {
1010 fp
->inst
[counter
].inst0
|= R500_INST_RGB_CLAMP
| R500_INST_ALPHA_CLAMP
;
1020 /* Finish him! (If it's an ALU/OUT instruction...) */
1021 if ((fp
->inst
[counter
-1].inst0
& 0x3) == 1) {
1022 fp
->inst
[counter
-1].inst0
|= R500_INST_LAST
;
1024 /* We still need to put an output inst, right? */
1025 WARN_ONCE("Final FP instruction is not an OUT.\n");
1028 fp
->cs
->nrslots
= counter
;
1035 static void init_program(r300ContextPtr r300
, struct r500_fragment_program
*fp
)
1037 struct r300_pfs_compile_state
*cs
= NULL
;
1038 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
1039 struct prog_instruction
*fpi
;
1040 GLuint InputsRead
= mp
->Base
.InputsRead
;
1041 GLuint temps_used
= 0;
1044 /* New compile, reset tracking data */
1046 driQueryOptioni(&r300
->radeon
.optionCache
, "fp_optimization");
1047 fp
->translated
= GL_FALSE
;
1048 fp
->error
= GL_FALSE
;
1049 fp
->cs
= cs
= &(R300_CONTEXT(fp
->ctx
)->state
.pfs_compile
);
1051 fp
->first_node_has_tex
= 0;
1053 /* Size of pixel stack, plus 1. */
1054 fp
->max_temp_idx
= 1;
1055 /* Temp register offset. */
1056 fp
->temp_reg_offset
= 0;
1057 fp
->node
[0].alu_end
= -1;
1058 fp
->node
[0].tex_end
= -1;
1060 _mesa_memset(cs
, 0, sizeof(*fp
->cs
));
1061 for (i
= 0; i
< PFS_MAX_ALU_INST
; i
++) {
1062 for (j
= 0; j
< 3; j
++) {
1063 cs
->slot
[i
].vsrc
[j
] = SRC_CONST
;
1064 cs
->slot
[i
].ssrc
[j
] = SRC_CONST
;
1068 /* Work out what temps the Mesa inputs correspond to, this must match
1069 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
1070 * configures itself based on the fragprog's InputsRead
1072 * NOTE: this depends on get_hw_temp() allocating registers in order,
1073 * starting from register 0, so we're just going to do that instead.
1076 /* Texcoords come first */
1077 for (i
= 0; i
< fp
->ctx
->Const
.MaxTextureUnits
; i
++) {
1078 if (InputsRead
& (FRAG_BIT_TEX0
<< i
)) {
1079 cs
->inputs
[FRAG_ATTRIB_TEX0
+ i
].refcount
= 0;
1080 cs
->inputs
[FRAG_ATTRIB_TEX0
+ i
].reg
=
1081 fp
->temp_reg_offset
;
1082 fp
->temp_reg_offset
++;
1085 InputsRead
&= ~FRAG_BITS_TEX_ANY
;
1087 /* fragment position treated as a texcoord */
1088 if (InputsRead
& FRAG_BIT_WPOS
) {
1089 cs
->inputs
[FRAG_ATTRIB_WPOS
].refcount
= 0;
1090 cs
->inputs
[FRAG_ATTRIB_WPOS
].reg
=
1091 fp
->temp_reg_offset
;
1092 fp
->temp_reg_offset
++;
1094 InputsRead
&= ~FRAG_BIT_WPOS
;
1096 /* Then primary colour */
1097 if (InputsRead
& FRAG_BIT_COL0
) {
1098 cs
->inputs
[FRAG_ATTRIB_COL0
].refcount
= 0;
1099 cs
->inputs
[FRAG_ATTRIB_COL0
].reg
=
1100 fp
->temp_reg_offset
;
1101 fp
->temp_reg_offset
++;
1103 InputsRead
&= ~FRAG_BIT_COL0
;
1105 /* Secondary color */
1106 if (InputsRead
& FRAG_BIT_COL1
) {
1107 cs
->inputs
[FRAG_ATTRIB_COL1
].refcount
= 0;
1108 cs
->inputs
[FRAG_ATTRIB_COL1
].reg
=
1109 fp
->temp_reg_offset
;
1110 fp
->temp_reg_offset
++;
1112 InputsRead
&= ~FRAG_BIT_COL1
;
1116 WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead
);
1117 /* force read from hwreg 0 for now */
1118 for (i
= 0; i
< 32; i
++)
1119 if (InputsRead
& (1 << i
))
1120 cs
->inputs
[i
].reg
= 0;
1123 if (!mp
->Base
.Instructions
) {
1124 ERROR("No instructions found in program, going to go die now.\n");
1128 for (fpi
= mp
->Base
.Instructions
; fpi
->Opcode
!= OPCODE_END
; fpi
++) {
1129 for (i
= 0; i
< 3; i
++) {
1130 if (fpi
->SrcReg
[i
].File
== PROGRAM_TEMPORARY
) {
1131 if (fpi
->SrcReg
[i
].Index
> temps_used
)
1132 temps_used
= fpi
->SrcReg
[i
].Index
;
1137 cs
->temp_in_use
= temps_used
;
1139 fp
->max_temp_idx
= fp
->temp_reg_offset
+ cs
->temp_in_use
+ 1;
1142 static void update_params(struct r500_fragment_program
*fp
)
1144 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
1146 /* Ask Mesa nicely to fill in ParameterValues for us */
1147 if (mp
->Base
.Parameters
)
1148 _mesa_load_state_parameters(fp
->ctx
, mp
->Base
.Parameters
);
1151 static void dumb_shader(struct r500_fragment_program
*fp
)
1153 fp
->inst
[0].inst0
= R500_INST_TYPE_TEX
1154 | R500_INST_TEX_SEM_WAIT
1155 | R500_INST_RGB_WMASK_R
1156 | R500_INST_RGB_WMASK_G
1157 | R500_INST_RGB_WMASK_B
1158 | R500_INST_ALPHA_WMASK
1159 | R500_INST_RGB_CLAMP
1160 | R500_INST_ALPHA_CLAMP
;
1161 fp
->inst
[0].inst1
= R500_TEX_ID(0)
1163 | R500_TEX_SEM_ACQUIRE
1164 | R500_TEX_IGNORE_UNCOVERED
;
1165 fp
->inst
[0].inst2
= R500_TEX_SRC_ADDR(0)
1166 | R500_TEX_SRC_S_SWIZ_R
1167 | R500_TEX_SRC_T_SWIZ_G
1168 | R500_TEX_DST_ADDR(0)
1169 | R500_TEX_DST_R_SWIZ_R
1170 | R500_TEX_DST_G_SWIZ_G
1171 | R500_TEX_DST_B_SWIZ_B
1172 | R500_TEX_DST_A_SWIZ_A
;
1173 fp
->inst
[0].inst3
= R500_DX_ADDR(0)
1183 fp
->inst
[0].inst4
= 0x0;
1184 fp
->inst
[0].inst5
= 0x0;
1186 fp
->inst
[1].inst0
= R500_INST_TYPE_OUT
|
1187 R500_INST_TEX_SEM_WAIT
|
1189 R500_INST_RGB_OMASK_R
|
1190 R500_INST_RGB_OMASK_G
|
1191 R500_INST_RGB_OMASK_B
|
1192 R500_INST_ALPHA_OMASK
;
1193 fp
->inst
[1].inst1
= R500_RGB_ADDR0(0) |
1195 R500_RGB_ADDR1_CONST
|
1197 R500_RGB_ADDR2_CONST
|
1198 R500_RGB_SRCP_OP_1_MINUS_2RGB0
;
1199 fp
->inst
[1].inst2
= R500_ALPHA_ADDR0(0) |
1200 R500_ALPHA_ADDR1(0) |
1201 R500_ALPHA_ADDR1_CONST
|
1202 R500_ALPHA_ADDR2(0) |
1203 R500_ALPHA_ADDR2_CONST
|
1204 R500_ALPHA_SRCP_OP_1_MINUS_2A0
;
1205 fp
->inst
[1].inst3
= R500_ALU_RGB_SEL_A_SRC0
|
1206 R500_ALU_RGB_R_SWIZ_A_R
|
1207 R500_ALU_RGB_G_SWIZ_A_G
|
1208 R500_ALU_RGB_B_SWIZ_A_B
|
1209 R500_ALU_RGB_SEL_B_SRC0
|
1210 R500_ALU_RGB_R_SWIZ_B_1
|
1211 R500_ALU_RGB_B_SWIZ_B_1
|
1212 R500_ALU_RGB_G_SWIZ_B_1
;
1213 fp
->inst
[1].inst4
= R500_ALPHA_OP_MAD
|
1214 R500_ALPHA_SWIZ_A_A
|
1215 R500_ALPHA_SWIZ_B_1
;
1216 fp
->inst
[1].inst5
= R500_ALU_RGBA_OP_MAD
|
1217 R500_ALU_RGBA_R_SWIZ_0
|
1218 R500_ALU_RGBA_G_SWIZ_0
|
1219 R500_ALU_RGBA_B_SWIZ_0
|
1220 R500_ALU_RGBA_A_SWIZ_0
;
1222 fp
->cs
->nrslots
= 2;
1223 fp
->translated
= GL_TRUE
;
1226 void r500TranslateFragmentShader(r300ContextPtr r300
,
1227 struct r500_fragment_program
*fp
)
1230 struct r300_pfs_compile_state
*cs
= NULL
;
1232 if (!fp
->translated
) {
1234 init_program(r300
, fp
);
1237 if (parse_program(fp
) == GL_FALSE
) {
1238 ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n");
1240 fp
->inst_offset
= 0;
1241 fp
->inst_end
= cs
->nrslots
- 1;
1244 fp
->inst_offset
= 0;
1245 fp
->inst_end
= cs
->nrslots
- 1;
1247 fp
->translated
= GL_TRUE
;
1248 if (RADEON_DEBUG
& DEBUG_PIXEL
) {
1249 fprintf(stderr
, "Mesa program:\n");
1250 fprintf(stderr
, "-------------\n");
1251 _mesa_print_program(&fp
->mesa_program
.Base
);
1257 r300UpdateStateParameters(fp
->ctx
, _NEW_PROGRAM
);
1264 static char *toswiz(int swiz_val
) {
1271 case 5: return "1/2";
1278 static char *toop(int op_val
)
1282 case 0: str
= "MAD"; break;
1283 case 1: str
= "DP3"; break;
1284 case 2: str
= "DP4"; break;
1285 case 3: str
= "D2A"; break;
1286 case 4: str
= "MIN"; break;
1287 case 5: str
= "MAX"; break;
1288 case 6: str
= "Reserved"; break;
1289 case 7: str
= "CND"; break;
1290 case 8: str
= "CMP"; break;
1291 case 9: str
= "FRC"; break;
1292 case 10: str
= "SOP"; break;
1293 case 11: str
= "MDH"; break;
1294 case 12: str
= "MDV"; break;
1299 static char *to_alpha_op(int op_val
)
1303 case 0: str
= "MAD"; break;
1304 case 1: str
= "DP"; break;
1305 case 2: str
= "MIN"; break;
1306 case 3: str
= "MAX"; break;
1307 case 4: str
= "Reserved"; break;
1308 case 5: str
= "CND"; break;
1309 case 6: str
= "CMP"; break;
1310 case 7: str
= "FRC"; break;
1311 case 8: str
= "EX2"; break;
1312 case 9: str
= "LN2"; break;
1313 case 10: str
= "RCP"; break;
1314 case 11: str
= "RSQ"; break;
1315 case 12: str
= "SIN"; break;
1316 case 13: str
= "COS"; break;
1317 case 14: str
= "MDH"; break;
1318 case 15: str
= "MDV"; break;
1323 static char *to_mask(int val
)
1327 case 0: str
= "NONE"; break;
1328 case 1: str
= "R"; break;
1329 case 2: str
= "G"; break;
1330 case 3: str
= "RG"; break;
1331 case 4: str
= "B"; break;
1332 case 5: str
= "RB"; break;
1333 case 6: str
= "GB"; break;
1334 case 7: str
= "RGB"; break;
1335 case 8: str
= "A"; break;
1336 case 9: str
= "AR"; break;
1337 case 10: str
= "AG"; break;
1338 case 11: str
= "ARG"; break;
1339 case 12: str
= "AB"; break;
1340 case 13: str
= "ARB"; break;
1341 case 14: str
= "AGB"; break;
1342 case 15: str
= "ARGB"; break;
1347 static char *to_texop(int val
)
1350 case 0: return "NOP";
1351 case 1: return "LD";
1352 case 2: return "TEXKILL";
1353 case 3: return "PROJ";
1354 case 4: return "LODBIAS";
1355 case 5: return "LOD";
1356 case 6: return "DXDY";
1361 static void dump_program(struct r500_fragment_program
*fp
)
1369 for (n
= 0; n
< fp
->inst_end
+1; n
++) {
1370 inst0
= inst
= fp
->inst
[n
].inst0
;
1371 fprintf(stderr
,"%d\t0:CMN_INST 0x%08x:", n
, inst
);
1372 switch(inst
& 0x3) {
1373 case R500_INST_TYPE_ALU
: str
= "ALU"; break;
1374 case R500_INST_TYPE_OUT
: str
= "OUT"; break;
1375 case R500_INST_TYPE_FC
: str
= "FC"; break;
1376 case R500_INST_TYPE_TEX
: str
= "TEX"; break;
1378 fprintf(stderr
,"%s %s %s %s %s ", str
,
1379 inst
& R500_INST_TEX_SEM_WAIT
? "TEX_WAIT" : "",
1380 inst
& R500_INST_LAST
? "LAST" : "",
1381 inst
& R500_INST_NOP
? "NOP" : "",
1382 inst
& R500_INST_ALU_WAIT
? "ALU WAIT" : "");
1383 fprintf(stderr
,"wmask: %s omask: %s\n", to_mask((inst
>> 11) & 0xf),
1384 to_mask((inst
>> 15) & 0xf));
1386 switch(inst0
& 0x3) {
1389 fprintf(stderr
,"\t1:RGB_ADDR 0x%08x:", fp
->inst
[n
].inst1
);
1390 inst
= fp
->inst
[n
].inst1
;
1392 fprintf(stderr
,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1393 inst
& 0xff, (inst
& (1<<8)) ? 'c' : 't',
1394 (inst
>> 10) & 0xff, (inst
& (1<<18)) ? 'c' : 't',
1395 (inst
>> 20) & 0xff, (inst
& (1<<28)) ? 'c' : 't',
1398 fprintf(stderr
,"\t2:ALPHA_ADDR 0x%08x:", fp
->inst
[n
].inst2
);
1399 inst
= fp
->inst
[n
].inst2
;
1400 fprintf(stderr
,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1401 inst
& 0xff, (inst
& (1<<8)) ? 'c' : 't',
1402 (inst
>> 10) & 0xff, (inst
& (1<<18)) ? 'c' : 't',
1403 (inst
>> 20) & 0xff, (inst
& (1<<28)) ? 'c' : 't',
1405 fprintf(stderr
,"\t3 RGB_INST: 0x%08x:", fp
->inst
[n
].inst3
);
1406 inst
= fp
->inst
[n
].inst3
;
1407 fprintf(stderr
,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n",
1408 (inst
) & 0x3, toswiz((inst
>> 2) & 0x7), toswiz((inst
>> 5) & 0x7), toswiz((inst
>> 8) & 0x7),
1410 (inst
>> 13) & 0x3, toswiz((inst
>> 15) & 0x7), toswiz((inst
>> 18) & 0x7), toswiz((inst
>> 21) & 0x7),
1411 (inst
>> 24) & 0x3);
1414 fprintf(stderr
,"\t4 ALPHA_INST:0x%08x:", fp
->inst
[n
].inst4
);
1415 inst
= fp
->inst
[n
].inst4
;
1416 fprintf(stderr
,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d w:%d\n", to_alpha_op(inst
& 0xf),
1417 (inst
>> 4) & 0x7f, inst
& (1<<11) ? "(rel)":"",
1418 (inst
>> 12) & 0x3, toswiz((inst
>> 14) & 0x7), (inst
>> 17) & 0x3,
1419 (inst
>> 19) & 0x3, toswiz((inst
>> 21) & 0x7), (inst
>> 24) & 0x3,
1420 (inst
>> 31) & 0x1);
1422 fprintf(stderr
,"\t5 RGBA_INST: 0x%08x:", fp
->inst
[n
].inst5
);
1423 inst
= fp
->inst
[n
].inst5
;
1424 fprintf(stderr
,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst
& 0xf),
1425 (inst
>> 4) & 0x7f, inst
& (1<<11) ? "(rel)":"",
1426 (inst
>> 12) & 0x3, toswiz((inst
>> 14) & 0x7), toswiz((inst
>> 17) & 0x7), toswiz((inst
>> 20) & 0x7),
1428 (inst
>> 25) & 0x3, toswiz((inst
>> 27) & 0x7), (inst
>> 30) & 0x3);
1433 inst
= fp
->inst
[n
].inst1
;
1434 fprintf(stderr
,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst
, (inst
>> 16) & 0xf,
1435 to_texop((inst
>> 22) & 0x7), (inst
& (1<<25)) ? "ACQ" : "",
1436 (inst
& (1<<26)) ? "IGNUNC" : "", (inst
& (1<<27)) ? "UNSCALED" : "SCALED");
1437 inst
= fp
->inst
[n
].inst2
;
1438 fprintf(stderr
,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst
,
1439 inst
& 127, inst
& (1<<7) ? "(rel)" : "",
1440 toswiz((inst
>> 8) & 0x3), toswiz((inst
>> 10) & 0x3),
1441 toswiz((inst
>> 12) & 0x3), toswiz((inst
>> 14) & 0x3),
1442 (inst
>> 16) & 127, inst
& (1<<23) ? "(rel)" : "",
1443 toswiz((inst
>> 24) & 0x3), toswiz((inst
>> 26) & 0x3),
1444 toswiz((inst
>> 28) & 0x3), toswiz((inst
>> 30) & 0x3));
1446 fprintf(stderr
,"\t3:TEX_DXDY: 0x%08x\n", fp
->inst
[n
].inst3
);
1449 fprintf(stderr
,"\n");