2 * Copyright (C) 2005 Ben Skeggs.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 * \author Ben Skeggs <darktama@iinet.net.au>
33 * \author Jerome Glisse <j.glisse@gmail.com>
35 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
37 * \todo Depth write, WPOS/FOGC inputs
41 * \todo Verify results of opcodes for accuracy, I've only checked them in
48 #include "shader/prog_instruction.h"
49 #include "shader/prog_parameter.h"
50 #include "shader/prog_print.h"
52 #include "r300_context.h"
53 #include "r500_fragprog.h"
55 #include "r300_state.h"
58 * Useful macros and values
60 #define ERROR(fmt, args...) do { \
61 fprintf(stderr, "%s::%s(): " fmt "\n", \
62 __FILE__, __FUNCTION__, ##args); \
63 fp->error = GL_TRUE; \
66 #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
68 #define R500_US_NUM_TEMP_REGS 128
69 #define R500_US_NUM_CONST_REGS 256
71 /* "Register" flags */
72 #define REG_CONSTANT (1 << 8)
73 #define REG_SRC_REL (1 << 9)
74 #define REG_DEST_REL (1 << 7)
77 #define R500_SWIZZLE_ZERO 4
78 #define R500_SWIZZLE_HALF 5
79 #define R500_SWIZZLE_ONE 6
80 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
81 #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
82 #define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6))
83 /* Swizzles for inst2 */
84 #define MAKE_SWIZ_TEX_STRQ(x) (x << 8)
85 #define MAKE_SWIZ_TEX_RGBA(x) (x << 24)
86 /* Swizzles for inst3 */
87 #define MAKE_SWIZ_RGB_A(x) (x << 2)
88 #define MAKE_SWIZ_RGB_B(x) (x << 15)
89 /* Swizzles for inst4 */
90 #define MAKE_SWIZ_ALPHA_A(x) (x << 14)
91 #define MAKE_SWIZ_ALPHA_B(x) (x << 21)
92 /* Swizzle for inst5 */
93 #define MAKE_SWIZ_RGBA_C(x) (x << 14)
94 #define MAKE_SWIZ_ALPHA_C(x) (x << 27)
96 static void dump_program(struct r500_fragment_program
*fp
);
98 static inline GLuint
make_rgb_swizzle(struct prog_src_register src
) {
101 /* This could be optimized, but it should be plenty fast already. */
103 for (i
= 0; i
< 3; i
++) {
104 temp
= GET_SWZ(src
.Swizzle
, i
);
105 /* Fix SWIZZLE_ONE */
106 if (temp
== 5) temp
++;
112 static inline GLuint
make_alpha_swizzle(struct prog_src_register src
) {
113 GLuint swiz
= GET_SWZ(src
.Swizzle
, 3);
115 if (swiz
== 5) swiz
++;
119 static inline GLuint
make_strq_swizzle(struct prog_src_register src
) {
121 GLuint temp
= src
.Swizzle
;
123 for (i
= 0; i
< 4; i
++) {
124 swiz
+= (temp
& 0x3) << i
*2;
130 static int get_temp(struct r500_fragment_program
*fp
) {
131 return fp
->max_temp_idx
+ 1;
134 /* Borrowed verbatim from r300_fragprog since it hasn't changed. */
135 static GLuint
emit_const4fv(struct r500_fragment_program
*fp
,
141 for (index
= 0; index
< fp
->const_nr
; ++index
) {
142 if (fp
->constant
[index
] == cp
)
146 if (index
>= fp
->const_nr
) {
147 if (index
>= R500_US_NUM_CONST_REGS
) {
148 ERROR("Out of hw constants!\n");
153 fp
->constant
[index
] = cp
;
156 reg
= index
| REG_CONSTANT
;
160 static GLuint
make_src(struct r500_fragment_program
*fp
, struct prog_src_register src
) {
164 case PROGRAM_TEMPORARY
:
165 reg
= src
.Index
+ fp
->temp_reg_offset
;
168 reg
= cs
->inputs
[src
.Index
].reg
;
170 case PROGRAM_LOCAL_PARAM
:
171 reg
= emit_const4fv(fp
,
172 fp
->mesa_program
.Base
.LocalParams
[src
.
175 case PROGRAM_ENV_PARAM
:
176 reg
= emit_const4fv(fp
,
177 fp
->ctx
->FragmentProgram
.Parameters
[src
.
180 case PROGRAM_STATE_VAR
:
181 case PROGRAM_NAMED_PARAM
:
182 case PROGRAM_CONSTANT
:
183 reg
= emit_const4fv(fp
, fp
->mesa_program
.Base
.Parameters
->
184 ParameterValues
[src
.Index
]);
187 ERROR("Can't handle src.File %x\n", src
.File
);
194 static GLuint
make_dest(struct r500_fragment_program
*fp
, struct prog_dst_register dest
) {
197 case PROGRAM_TEMPORARY
:
198 reg
= dest
.Index
+ fp
->temp_reg_offset
;
201 /* Eventually we may need to handle multiple
202 * rendering targets... */
206 ERROR("Can't handle dest.File %x\n", dest
.File
);
213 static void emit_tex(struct r500_fragment_program
*fp
,
214 struct prog_instruction
*fpi
, int opcode
, int dest
, int counter
)
219 mask
= fpi
->DstReg
.WriteMask
<< 11;
220 hwsrc
= make_src(fp
, fpi
->SrcReg
[0]);
222 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
) {
223 hwdest
= get_temp(fp
);
228 fp
->inst
[counter
].inst0
= R500_INST_TYPE_TEX
| mask
229 | R500_INST_TEX_SEM_WAIT
;
231 fp
->inst
[counter
].inst1
= R500_TEX_ID(fpi
->TexSrcUnit
)
232 | R500_TEX_SEM_ACQUIRE
| R500_TEX_IGNORE_UNCOVERED
;
234 if (fpi
->TexSrcTarget
== TEXTURE_RECT_INDEX
)
235 fp
->inst
[counter
].inst1
|= R500_TEX_UNSCALED
;
239 fp
->inst
[counter
].inst1
|= R500_TEX_INST_TEXKILL
;
242 fp
->inst
[counter
].inst1
|= R500_TEX_INST_LD
;
245 fp
->inst
[counter
].inst1
|= R500_TEX_INST_LODBIAS
;
248 fp
->inst
[counter
].inst1
|= R500_TEX_INST_PROJ
;
251 ERROR("emit_tex can't handle opcode %x\n", opcode
);
254 fp
->inst
[counter
].inst2
= R500_TEX_SRC_ADDR(hwsrc
)
255 /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */
256 | R500_TEX_SRC_S_SWIZ_R
| R500_TEX_SRC_T_SWIZ_G
257 | R500_TEX_SRC_R_SWIZ_B
| R500_TEX_SRC_Q_SWIZ_A
258 | R500_TEX_DST_ADDR(hwdest
)
259 | R500_TEX_DST_R_SWIZ_R
| R500_TEX_DST_G_SWIZ_G
260 | R500_TEX_DST_B_SWIZ_B
| R500_TEX_DST_A_SWIZ_A
;
262 fp
->inst
[counter
].inst3
= 0x0;
263 fp
->inst
[counter
].inst4
= 0x0;
264 fp
->inst
[counter
].inst5
= 0x0;
266 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
) {
268 fp
->inst
[counter
].inst0
= R500_INST_TYPE_OUT
269 | R500_INST_TEX_SEM_WAIT
| (mask
<< 4);
270 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(get_temp(fp
));
271 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(get_temp(fp
));
272 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
273 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
)
274 | R500_ALU_RGB_SEL_B_SRC0
275 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB
)
276 | R500_ALU_RGB_OMOD_DISABLE
;
277 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_CMP
278 | R500_ALPHA_ADDRD(dest
)
279 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_ALPHA_SWIZ_A_A
)
280 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(R500_ALPHA_SWIZ_A_A
)
281 | R500_ALPHA_OMOD_DISABLE
;
282 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
283 | R500_ALU_RGBA_ADDRD(dest
)
284 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
285 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
289 static void dumb_shader(struct r500_fragment_program
*fp
)
291 fp
->inst
[0].inst0
= R500_INST_TYPE_TEX
292 | R500_INST_TEX_SEM_WAIT
293 | R500_INST_RGB_WMASK_R
294 | R500_INST_RGB_WMASK_G
295 | R500_INST_RGB_WMASK_B
296 | R500_INST_ALPHA_WMASK
297 | R500_INST_RGB_CLAMP
298 | R500_INST_ALPHA_CLAMP
;
299 fp
->inst
[0].inst1
= R500_TEX_ID(0)
301 | R500_TEX_SEM_ACQUIRE
302 | R500_TEX_IGNORE_UNCOVERED
;
303 fp
->inst
[0].inst2
= R500_TEX_SRC_ADDR(0)
304 | R500_TEX_SRC_S_SWIZ_R
305 | R500_TEX_SRC_T_SWIZ_G
306 | R500_TEX_DST_ADDR(0)
307 | R500_TEX_DST_R_SWIZ_R
308 | R500_TEX_DST_G_SWIZ_G
309 | R500_TEX_DST_B_SWIZ_B
310 | R500_TEX_DST_A_SWIZ_A
;
311 fp
->inst
[0].inst3
= R500_DX_ADDR(0)
321 fp
->inst
[0].inst4
= 0x0;
322 fp
->inst
[0].inst5
= 0x0;
324 fp
->inst
[1].inst0
= R500_INST_TYPE_OUT
|
325 R500_INST_TEX_SEM_WAIT
|
327 R500_INST_RGB_OMASK_R
|
328 R500_INST_RGB_OMASK_G
|
329 R500_INST_RGB_OMASK_B
|
330 R500_INST_ALPHA_OMASK
;
331 fp
->inst
[1].inst1
= R500_RGB_ADDR0(0) |
333 R500_RGB_ADDR1_CONST
|
335 R500_RGB_ADDR2_CONST
|
336 R500_RGB_SRCP_OP_1_MINUS_2RGB0
;
337 fp
->inst
[1].inst2
= R500_ALPHA_ADDR0(0) |
338 R500_ALPHA_ADDR1(0) |
339 R500_ALPHA_ADDR1_CONST
|
340 R500_ALPHA_ADDR2(0) |
341 R500_ALPHA_ADDR2_CONST
|
342 R500_ALPHA_SRCP_OP_1_MINUS_2A0
;
343 fp
->inst
[1].inst3
= R500_ALU_RGB_SEL_A_SRC0
|
344 R500_ALU_RGB_R_SWIZ_A_R
|
345 R500_ALU_RGB_G_SWIZ_A_G
|
346 R500_ALU_RGB_B_SWIZ_A_B
|
347 R500_ALU_RGB_SEL_B_SRC0
|
348 R500_ALU_RGB_R_SWIZ_B_1
|
349 R500_ALU_RGB_B_SWIZ_B_1
|
350 R500_ALU_RGB_G_SWIZ_B_1
;
351 fp
->inst
[1].inst4
= R500_ALPHA_OP_MAD
|
352 R500_ALPHA_SWIZ_A_A
|
354 fp
->inst
[1].inst5
= R500_ALU_RGBA_OP_MAD
|
355 R500_ALU_RGBA_R_SWIZ_0
|
356 R500_ALU_RGBA_G_SWIZ_0
|
357 R500_ALU_RGBA_B_SWIZ_0
|
358 R500_ALU_RGBA_A_SWIZ_0
;
361 fp
->translated
= GL_TRUE
;
364 static void emit_alu(struct r500_fragment_program
*fp
, int counter
, struct prog_instruction
*fpi
) {
365 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
) {
366 fp
->inst
[counter
].inst0
= R500_INST_TYPE_OUT
368 | (fpi
->DstReg
.WriteMask
<< 15);
370 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
372 | (fpi
->DstReg
.WriteMask
<< 11);
375 fp
->inst
[counter
].inst0
|= R500_INST_TEX_SEM_WAIT
;
378 static void emit_mov(struct r500_fragment_program
*fp
, int counter
, struct prog_src_register src
, GLuint dest
) {
379 /* The r3xx shader uses MAD to implement MOV. We are using CMP, since
380 * it is technically more accurate and recommended by ATI/AMD. */
381 GLuint src_reg
= make_src(fp
, src
);
382 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src_reg
);
383 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src_reg
);
384 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
385 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(src
))
386 | R500_ALU_RGB_SEL_B_SRC0
387 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(src
))
388 | R500_ALU_RGB_OMOD_DISABLE
;
389 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_CMP
390 | R500_ALPHA_ADDRD(dest
)
391 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(src
))
392 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(src
))
393 | R500_ALPHA_OMOD_DISABLE
;
394 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
395 | R500_ALU_RGBA_ADDRD(dest
)
396 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
397 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
400 static GLboolean
parse_program(struct r500_fragment_program
*fp
)
402 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
403 const struct prog_instruction
*inst
= mp
->Base
.Instructions
;
404 struct prog_instruction
*fpi
;
405 GLuint src
[3], dest
, temp
[2];
406 int flags
, pixel_mask
= 0, output_mask
= 0, counter
= 0;
408 if (!inst
|| inst
[0].Opcode
== OPCODE_END
) {
409 ERROR("The program is empty!\n");
413 for (fpi
= mp
->Base
.Instructions
; fpi
->Opcode
!= OPCODE_END
; fpi
++) {
415 if (fpi
->Opcode
!= OPCODE_KIL
) {
416 dest
= make_dest(fp
, fpi
->DstReg
);
418 pixel_mask
= fpi
->DstReg
.WriteMask
<< 11;
419 output_mask
= fpi
->DstReg
.WriteMask
<< 15;
422 switch (fpi
->Opcode
) {
424 emit_alu(fp
, counter
, fpi
);
425 emit_mov(fp
, counter
, fpi
->SrcReg
[0], dest
);
426 fp
->inst
[counter
].inst3
|= R500_ALU_RGB_MOD_A_ABS
427 | R500_ALU_RGB_MOD_B_ABS
;
428 fp
->inst
[counter
].inst4
|= R500_ALPHA_MOD_A_ABS
429 | R500_ALPHA_MOD_B_ABS
;
432 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
433 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
434 /* Variation on MAD: 1*src0+src1 */
435 emit_alu(fp
, counter
, fpi
);
436 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
437 | R500_RGB_ADDR1(src
[1]) | R500_RGB_ADDR2(0);
438 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
439 | R500_ALPHA_ADDR1(src
[1]) | R500_ALPHA_ADDR2(0);
440 fp
->inst
[counter
].inst3
= /* 1 */
441 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE
)
442 | R500_ALU_RGB_SEL_B_SRC0
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[0]));
443 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
444 | R500_ALPHA_ADDRD(dest
)
445 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
446 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[0]));
447 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
448 | R500_ALU_RGBA_ADDRD(dest
)
449 | R500_ALU_RGBA_SEL_C_SRC1
450 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[1]))
451 | R500_ALU_RGBA_ALPHA_SEL_C_SRC1
452 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[1]));
455 /* This inst's selects need to be swapped as follows:
456 * 0 -> C ; 1 -> B ; 2 -> A */
457 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
458 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
459 src
[2] = make_src(fp
, fpi
->SrcReg
[2]);
460 emit_alu(fp
, counter
, fpi
);
461 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[2])
462 | R500_RGB_ADDR1(src
[1]) | R500_RGB_ADDR2(src
[0]);
463 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[2])
464 | R500_ALPHA_ADDR1(src
[1]) | R500_ALPHA_ADDR2(src
[0]);
465 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
466 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[2]))
467 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
468 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_CMP
469 | R500_ALPHA_ADDRD(dest
)
470 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[2]))
471 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
472 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
473 | R500_ALU_RGBA_ADDRD(dest
)
474 | R500_ALU_RGBA_SEL_C_SRC2
475 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[0]))
476 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
477 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[0]));
480 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
481 emit_alu(fp
, counter
, fpi
);
482 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
483 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
484 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
485 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
486 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_COS
487 | R500_ALPHA_ADDRD(dest
)
488 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
489 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
490 | R500_ALU_RGBA_ADDRD(dest
);
493 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
494 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
495 emit_alu(fp
, counter
, fpi
);
496 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
497 | R500_RGB_ADDR1(src
[1]);
498 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
499 | R500_ALPHA_ADDR1(src
[1]);
500 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
501 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
502 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
503 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_DP
504 | R500_ALPHA_ADDRD(dest
)
505 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
506 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
507 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_DP3
508 | R500_ALU_RGBA_ADDRD(dest
);
511 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
512 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
514 emit_alu(fp
, counter
, fpi
);
515 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
516 | R500_RGB_ADDR1(src
[1]);
517 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
518 | R500_ALPHA_ADDR1(src
[1]);
519 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
520 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
521 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
522 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_DP
523 | R500_ALPHA_ADDRD(dest
)
524 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
525 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
526 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_DP4
527 | R500_ALU_RGBA_ADDRD(dest
);
530 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
531 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
533 emit_alu(fp
, counter
, fpi
);
534 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
535 | R500_RGB_ADDR1(src
[1]);
536 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
537 | R500_ALPHA_ADDR1(src
[1]);
538 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
539 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
540 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
541 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_DP
542 | R500_ALPHA_ADDRD(dest
)
543 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
544 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
545 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_DP4
546 | R500_ALU_RGBA_ADDRD(dest
);
549 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
550 emit_alu(fp
, counter
, fpi
);
551 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
552 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
553 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
554 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
555 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_EX2
556 | R500_ALPHA_ADDRD(dest
)
557 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
558 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
559 | R500_ALU_RGBA_ADDRD(dest
);
562 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
563 emit_alu(fp
, counter
, fpi
);
564 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
565 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
566 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
567 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
568 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_FRC
569 | R500_ALPHA_ADDRD(dest
)
570 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
571 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_FRC
572 | R500_ALU_RGBA_ADDRD(dest
);
575 emit_tex(fp
, fpi
, OPCODE_KIL
, dest
, counter
);
578 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
579 emit_alu(fp
, counter
, fpi
);
580 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
581 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
582 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
583 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
584 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_LN2
585 | R500_ALPHA_ADDRD(dest
)
586 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
587 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
588 | R500_ALU_RGBA_ADDRD(dest
);
591 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
592 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
593 src
[2] = make_src(fp
, fpi
->SrcReg
[2]);
594 emit_alu(fp
, counter
, fpi
);
595 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
596 | R500_RGB_ADDR1(src
[1]) | R500_RGB_ADDR2(src
[2]);
597 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
598 | R500_ALPHA_ADDR1(src
[1]) | R500_ALPHA_ADDR2(src
[2]);
599 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
600 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
601 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
602 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
603 | R500_ALPHA_ADDRD(dest
)
604 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
605 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
606 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
607 | R500_ALU_RGBA_ADDRD(dest
)
608 | R500_ALU_RGBA_SEL_C_SRC2
609 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[2]))
610 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
611 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[2]));
614 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
615 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
616 emit_alu(fp
, counter
, fpi
);
617 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]) | R500_RGB_ADDR1(src
[1]);
618 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]) | R500_ALPHA_ADDR1(src
[1]);
619 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
620 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
621 | R500_ALU_RGB_SEL_B_SRC1
622 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
623 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAX
624 | R500_ALPHA_ADDRD(dest
)
625 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
626 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
627 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAX
628 | R500_ALU_RGBA_ADDRD(dest
);
631 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
632 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
633 emit_alu(fp
, counter
, fpi
);
634 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]) | R500_RGB_ADDR1(src
[1]);
635 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]) | R500_ALPHA_ADDR1(src
[1]);
636 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
637 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
638 | R500_ALU_RGB_SEL_B_SRC1
639 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
640 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MIN
641 | R500_ALPHA_ADDRD(dest
)
642 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
643 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
644 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MIN
645 | R500_ALU_RGBA_ADDRD(dest
);
648 emit_alu(fp
, counter
, fpi
);
649 emit_mov(fp
, counter
, fpi
->SrcReg
[0], dest
);
652 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
653 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
654 /* Variation on MAD: src0*src1+0 */
655 emit_alu(fp
, counter
, fpi
);
656 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
657 | R500_RGB_ADDR1(src
[1]);
658 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
659 | R500_ALPHA_ADDR1(src
[1]);
660 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
661 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
662 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
663 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
664 | R500_ALPHA_ADDRD(dest
)
665 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
666 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
667 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
668 | R500_ALU_RGBA_ADDRD(dest
)
669 // | R500_ALU_RGBA_SEL_C_SRC2
670 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
671 // | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
672 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
675 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
676 emit_alu(fp
, counter
, fpi
);
677 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
678 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
679 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
680 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
681 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_RCP
682 | R500_ALPHA_ADDRD(dest
)
683 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
684 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
685 | R500_ALU_RGBA_ADDRD(dest
);
688 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
689 emit_alu(fp
, counter
, fpi
);
690 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
691 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
692 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
693 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
694 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_RSQ
695 | R500_ALPHA_ADDRD(dest
)
696 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
697 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
698 | R500_ALU_RGBA_ADDRD(dest
);
701 /* TODO: Make this elegant! */
702 /* Do a cosine, then a sine, masking out the channels we want to protect. */
703 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
704 /* Cosine only goes in R (x) channel. */
705 fpi
->DstReg
.WriteMask
= 0x1;
706 emit_alu(fp
, counter
, fpi
);
707 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
) {
708 fp
->inst
[counter
].inst0
= R500_INST_TYPE_OUT
709 | R500_INST_TEX_SEM_WAIT
| 0x1 << 14;
711 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
712 | R500_INST_TEX_SEM_WAIT
| 0x1 << 11;
714 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
715 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
716 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
717 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
718 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_COS
719 | R500_ALPHA_ADDRD(dest
)
720 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
721 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
722 | R500_ALU_RGBA_ADDRD(dest
);
724 /* Sine only goes in G (y) channel. */
725 fpi
->DstReg
.WriteMask
= 0x2;
726 emit_alu(fp
, counter
, fpi
);
727 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
728 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
729 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
730 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
731 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_SIN
732 | R500_ALPHA_ADDRD(dest
)
733 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
734 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
735 | R500_ALU_RGBA_ADDRD(dest
);
736 /* Put 0 into B,A (z,w) channels.
738 fpi->DstReg.WriteMask = 0xC;
739 emit_alu(fp, counter, fpi);
740 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
741 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
742 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
743 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO)
744 | R500_ALU_RGB_SEL_B_SRC0
745 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
746 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
747 | R500_ALPHA_ADDRD(dest)
748 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO)
749 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO);
750 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
751 | R500_ALU_RGBA_ADDRD(dest)
752 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
753 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); */
756 /* We use SRCP, so as a precaution we're
757 * going to set NOP in previous inst, if possible. */
758 /* This inst's selects need to be swapped as follows:
759 * 0 -> C ; 1 -> B ; 2 -> A */
760 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
761 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
762 emit_alu(fp
, counter
, fpi
);
763 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
764 | R500_RGB_ADDR1(src
[1])
765 | R500_RGB_SRCP_OP_RGB1_MINUS_RGB0
;
766 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
767 | R500_ALPHA_ADDR1(src
[1])
768 | R500_ALPHA_SRCP_OP_A1_MINUS_A0
;
769 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
770 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE
)
771 | R500_ALU_RGB_SEL_B_SRC1
772 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO
);
773 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_CMP
774 | R500_ALPHA_ADDRD(dest
)
775 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
776 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO
);
777 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
778 | R500_ALU_RGBA_ADDRD(dest
)
779 | R500_ALU_RGBA_SEL_C_SRCP
780 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[0]))
781 | R500_ALU_RGBA_ALPHA_SEL_C_SRCP
782 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[0]));
785 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
786 emit_alu(fp
, counter
, fpi
);
787 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
788 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
789 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
790 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
791 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_SIN
792 | R500_ALPHA_ADDRD(dest
)
793 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
794 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
795 | R500_ALU_RGBA_ADDRD(dest
);
798 /* We use SRCP, so as a precaution we're
799 * going to set NOP in previous inst, if possible. */
800 /* This inst's selects need to be swapped as follows:
801 * 0 -> C ; 1 -> B ; 2 -> A */
802 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
803 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
804 emit_alu(fp
, counter
, fpi
);
805 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
806 | R500_RGB_ADDR1(src
[1])
807 | R500_RGB_SRCP_OP_RGB1_MINUS_RGB0
;
808 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
809 | R500_ALPHA_ADDR1(src
[1])
810 | R500_ALPHA_SRCP_OP_A1_MINUS_A0
;
811 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
812 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO
)
813 | R500_ALU_RGB_SEL_B_SRC1
814 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE
);
815 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_CMP
816 | R500_ALPHA_ADDRD(dest
)
817 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO
)
818 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE
);
819 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
820 | R500_ALU_RGBA_ADDRD(dest
)
821 | R500_ALU_RGBA_SEL_C_SRCP
822 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[0]))
823 | R500_ALU_RGBA_ALPHA_SEL_C_SRCP
824 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[0]));
827 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
828 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
829 /* Variation on MAD: 1*src0-src1 */
830 emit_alu(fp
, counter
, fpi
);
831 fp
->inst
[counter
].inst1
= R500_RGB_ADDR1(src
[0])
832 | R500_RGB_ADDR2(src
[1]);
833 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR1(src
[0])
834 | R500_ALPHA_ADDR2(src
[1]);
835 fp
->inst
[counter
].inst3
= /* 1 */
836 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE
)
837 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[0]));
838 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
839 | R500_ALPHA_ADDRD(dest
)
840 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
841 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[0]));
842 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
843 | R500_ALU_RGBA_ADDRD(dest
)
844 | R500_ALU_RGBA_SEL_C_SRC2
845 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[1]))
846 | R500_ALU_RGBA_MOD_C_NEG
847 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
848 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[1]))
849 | R500_ALU_RGBA_ALPHA_MOD_C_NEG
;
852 /* TODO: Negation masks! */
853 emit_alu(fp
, counter
, fpi
);
854 emit_mov(fp
, counter
, fpi
->SrcReg
[0], dest
);
857 emit_tex(fp
, fpi
, OPCODE_TEX
, dest
, counter
);
858 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
)
862 emit_tex(fp
, fpi
, OPCODE_TXB
, dest
, counter
);
863 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
)
867 emit_tex(fp
, fpi
, OPCODE_TXP
, dest
, counter
);
868 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
)
872 ERROR("unknown fpi->Opcode %d\n", fpi
->Opcode
);
876 /* Finishing touches */
877 if (fpi
->SaturateMode
== SATURATE_ZERO_ONE
) {
878 fp
->inst
[counter
].inst0
|= R500_INST_RGB_CLAMP
| R500_INST_ALPHA_CLAMP
;
888 /* Finish him! (If it's an ALU/OUT instruction...) */
889 if ((fp
->inst
[counter
-1].inst0
& 0x3) == 1) {
890 fp
->inst
[counter
-1].inst0
|= R500_INST_LAST
;
892 /* We still need to put an output inst, right? */
893 WARN_ONCE("Final FP instruction is not an OUT.\n");
899 fp
->cs
->nrslots
= counter
;
906 static void init_program(r300ContextPtr r300
, struct r500_fragment_program
*fp
)
908 struct r300_pfs_compile_state
*cs
= NULL
;
909 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
910 struct prog_instruction
*fpi
;
911 GLuint InputsRead
= mp
->Base
.InputsRead
;
912 GLuint temps_used
= 0; /* for fp->temps[] */
915 /* New compile, reset tracking data */
917 driQueryOptioni(&r300
->radeon
.optionCache
, "fp_optimization");
918 fp
->translated
= GL_FALSE
;
919 fp
->error
= GL_FALSE
;
920 fp
->cs
= cs
= &(R300_CONTEXT(fp
->ctx
)->state
.pfs_compile
);
922 fp
->first_node_has_tex
= 0;
924 /* Size of pixel stack, plus 1. */
925 fp
->max_temp_idx
= 1;
926 /* Temp register offset. */
927 fp
->temp_reg_offset
= 0;
928 fp
->node
[0].alu_end
= -1;
929 fp
->node
[0].tex_end
= -1;
931 _mesa_memset(cs
, 0, sizeof(*fp
->cs
));
932 for (i
= 0; i
< PFS_MAX_ALU_INST
; i
++) {
933 for (j
= 0; j
< 3; j
++) {
934 cs
->slot
[i
].vsrc
[j
] = SRC_CONST
;
935 cs
->slot
[i
].ssrc
[j
] = SRC_CONST
;
939 /* Work out what temps the Mesa inputs correspond to, this must match
940 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
941 * configures itself based on the fragprog's InputsRead
943 * NOTE: this depends on get_hw_temp() allocating registers in order,
944 * starting from register 0, so we're just going to do that instead.
947 /* Texcoords come first */
948 for (i
= 0; i
< fp
->ctx
->Const
.MaxTextureUnits
; i
++) {
949 if (InputsRead
& (FRAG_BIT_TEX0
<< i
)) {
950 cs
->inputs
[FRAG_ATTRIB_TEX0
+ i
].refcount
= 0;
951 cs
->inputs
[FRAG_ATTRIB_TEX0
+ i
].reg
=
953 fp
->temp_reg_offset
++;
956 InputsRead
&= ~FRAG_BITS_TEX_ANY
;
958 /* fragment position treated as a texcoord */
959 if (InputsRead
& FRAG_BIT_WPOS
) {
960 cs
->inputs
[FRAG_ATTRIB_WPOS
].refcount
= 0;
961 cs
->inputs
[FRAG_ATTRIB_WPOS
].reg
=
963 fp
->temp_reg_offset
++;
965 InputsRead
&= ~FRAG_BIT_WPOS
;
967 /* Then primary colour */
968 if (InputsRead
& FRAG_BIT_COL0
) {
969 cs
->inputs
[FRAG_ATTRIB_COL0
].refcount
= 0;
970 cs
->inputs
[FRAG_ATTRIB_COL0
].reg
=
972 fp
->temp_reg_offset
++;
974 InputsRead
&= ~FRAG_BIT_COL0
;
976 /* Secondary color */
977 if (InputsRead
& FRAG_BIT_COL1
) {
978 cs
->inputs
[FRAG_ATTRIB_COL1
].refcount
= 0;
979 cs
->inputs
[FRAG_ATTRIB_COL1
].reg
=
981 fp
->temp_reg_offset
++;
983 InputsRead
&= ~FRAG_BIT_COL1
;
987 WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead
);
988 /* force read from hwreg 0 for now */
989 for (i
= 0; i
< 32; i
++)
990 if (InputsRead
& (1 << i
))
991 cs
->inputs
[i
].reg
= 0;
994 /* Pre-parse the mesa program, grabbing refcounts on input/temp regs.
995 * That way, we can free up the reg when it's no longer needed
997 if (!mp
->Base
.Instructions
) {
998 ERROR("No instructions found in program, going to go die now.\n");
1002 fp
->max_temp_idx
= fp
->temp_reg_offset
+ 1;
1004 cs
->temp_in_use
= temps_used
;
1007 static void update_params(struct r500_fragment_program
*fp
)
1009 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
1011 /* Ask Mesa nicely to fill in ParameterValues for us */
1012 if (mp
->Base
.Parameters
)
1013 _mesa_load_state_parameters(fp
->ctx
, mp
->Base
.Parameters
);
1016 void r500TranslateFragmentShader(r300ContextPtr r300
,
1017 struct r500_fragment_program
*fp
)
1020 struct r300_pfs_compile_state
*cs
= NULL
;
1022 if (!fp
->translated
) {
1024 /* I need to see what I'm working with! */
1025 fprintf(stderr
, "Mesa program:\n");
1026 fprintf(stderr
, "-------------\n");
1027 _mesa_print_program(&fp
->mesa_program
.Base
);
1030 init_program(r300
, fp
);
1033 if (parse_program(fp
) == GL_FALSE
) {
1034 ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n");
1036 fp
->inst_offset
= 0;
1037 fp
->inst_end
= cs
->nrslots
- 1;
1040 fp
->inst_offset
= 0;
1041 fp
->inst_end
= cs
->nrslots
- 1;
1043 fp
->translated
= GL_TRUE
;
1044 if (RADEON_DEBUG
& DEBUG_PIXEL
)
1048 r300UpdateStateParameters(fp
->ctx
, _NEW_PROGRAM
);
1055 static char *toswiz(int swiz_val
) {
1062 case 5: return "1/2";
1069 static char *toop(int op_val
)
1073 case 0: str
= "MAD"; break;
1074 case 1: str
= "DP3"; break;
1075 case 2: str
= "DP4"; break;
1076 case 3: str
= "D2A"; break;
1077 case 4: str
= "MIN"; break;
1078 case 5: str
= "MAX"; break;
1079 case 6: str
= "Reserved"; break;
1080 case 7: str
= "CND"; break;
1081 case 8: str
= "CMP"; break;
1082 case 9: str
= "FRC"; break;
1083 case 10: str
= "SOP"; break;
1084 case 11: str
= "MDH"; break;
1085 case 12: str
= "MDV"; break;
1090 static char *to_alpha_op(int op_val
)
1094 case 0: str
= "MAD"; break;
1095 case 1: str
= "DP"; break;
1096 case 2: str
= "MIN"; break;
1097 case 3: str
= "MAX"; break;
1098 case 4: str
= "Reserved"; break;
1099 case 5: str
= "CND"; break;
1100 case 6: str
= "CMP"; break;
1101 case 7: str
= "FRC"; break;
1102 case 8: str
= "EX2"; break;
1103 case 9: str
= "LN2"; break;
1104 case 10: str
= "RCP"; break;
1105 case 11: str
= "RSQ"; break;
1106 case 12: str
= "SIN"; break;
1107 case 13: str
= "COS"; break;
1108 case 14: str
= "MDH"; break;
1109 case 15: str
= "MDV"; break;
1114 static char *to_mask(int val
)
1118 case 0: str
= "NONE"; break;
1119 case 1: str
= "R"; break;
1120 case 2: str
= "G"; break;
1121 case 3: str
= "RG"; break;
1122 case 4: str
= "B"; break;
1123 case 5: str
= "RB"; break;
1124 case 6: str
= "GB"; break;
1125 case 7: str
= "RGB"; break;
1126 case 8: str
= "A"; break;
1127 case 9: str
= "AR"; break;
1128 case 10: str
= "AG"; break;
1129 case 11: str
= "ARG"; break;
1130 case 12: str
= "AB"; break;
1131 case 13: str
= "ARB"; break;
1132 case 14: str
= "AGB"; break;
1133 case 15: str
= "ARGB"; break;
1138 static void dump_program(struct r500_fragment_program
*fp
)
1146 for (n
= 0; n
< fp
->inst_end
+1; n
++) {
1147 inst0
= inst
= fp
->inst
[n
].inst0
;
1148 fprintf(stderr
,"%d\t0:CMN_INST 0x%08x:", n
, inst
);
1149 switch(inst
& 0x3) {
1150 case R500_INST_TYPE_ALU
: str
= "ALU"; break;
1151 case R500_INST_TYPE_OUT
: str
= "OUT"; break;
1152 case R500_INST_TYPE_FC
: str
= "FC"; break;
1153 case R500_INST_TYPE_TEX
: str
= "TEX"; break;
1155 fprintf(stderr
,"%s %s %s %s %s ", str
,
1156 inst
& R500_INST_TEX_SEM_WAIT
? "TEX_WAIT" : "",
1157 inst
& R500_INST_LAST
? "LAST" : "",
1158 inst
& R500_INST_NOP
? "NOP" : "",
1159 inst
& R500_INST_ALU_WAIT
? "ALU WAIT" : "");
1160 fprintf(stderr
,"wmask: %s omask: %s\n", to_mask((inst
>> 11) & 0xf),
1161 to_mask((inst
>> 15) & 0xf));
1163 switch(inst0
& 0x3) {
1166 fprintf(stderr
,"\t1:RGB_ADDR 0x%08x:", fp
->inst
[n
].inst1
);
1167 inst
= fp
->inst
[n
].inst1
;
1169 fprintf(stderr
,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1170 inst
& 0xff, (inst
& (1<<8)) ? 'c' : 't',
1171 (inst
>> 10) & 0xff, (inst
& (1<<18)) ? 'c' : 't',
1172 (inst
>> 20) & 0xff, (inst
& (1<<28)) ? 'c' : 't',
1175 fprintf(stderr
,"\t2:ALPHA_ADDR 0x%08x:", fp
->inst
[n
].inst2
);
1176 inst
= fp
->inst
[n
].inst2
;
1177 fprintf(stderr
,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1178 inst
& 0xff, (inst
& (1<<8)) ? 'c' : 't',
1179 (inst
>> 10) & 0xff, (inst
& (1<<18)) ? 'c' : 't',
1180 (inst
>> 20) & 0xff, (inst
& (1<<28)) ? 'c' : 't',
1182 fprintf(stderr
,"\t3 RGB_INST: 0x%08x:", fp
->inst
[n
].inst3
);
1183 inst
= fp
->inst
[n
].inst3
;
1184 fprintf(stderr
,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n",
1185 (inst
) & 0x3, toswiz((inst
>> 2) & 0x7), toswiz((inst
>> 5) & 0x7), toswiz((inst
>> 8) & 0x7),
1187 (inst
>> 13) & 0x3, toswiz((inst
>> 15) & 0x7), toswiz((inst
>> 18) & 0x7), toswiz((inst
>> 21) & 0x7),
1188 (inst
>> 24) & 0x3);
1191 fprintf(stderr
,"\t4 ALPHA_INST:0x%08x:", fp
->inst
[n
].inst4
);
1192 inst
= fp
->inst
[n
].inst4
;
1193 fprintf(stderr
,"%s dest:%d%s alp_A_src:%d %s %d alp_b_src:%d %s %d\n", to_alpha_op(inst
& 0xf),
1194 (inst
>> 4) & 0x7f, inst
& (1<<11) ? "(rel)":"",
1195 (inst
>> 12) & 0x3, toswiz((inst
>> 14) & 0x7), (inst
>> 17) & 0x3,
1196 (inst
>> 19) & 0x3, toswiz((inst
>> 21) & 0x7), (inst
>> 24) & 0x3);
1198 fprintf(stderr
,"\t5 RGBA_INST: 0x%08x:", fp
->inst
[n
].inst5
);
1199 inst
= fp
->inst
[n
].inst5
;
1200 fprintf(stderr
,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst
& 0xf),
1201 (inst
>> 4) & 0x7f, inst
& (1<<11) ? "(rel)":"",
1202 (inst
>> 12) & 0x3, toswiz((inst
>> 14) & 0x7), toswiz((inst
>> 17) & 0x7), toswiz((inst
>> 20) & 0x7),
1204 (inst
>> 25) & 0x3, toswiz((inst
>> 27) & 0x7), (inst
>> 30) & 0x3);
1209 fprintf(stderr
,"1: TEX INST 0x%08x\n", fp
->inst
[n
].inst1
);
1210 fprintf(stderr
,"2: TEX ADDR 0x%08x\n", fp
->inst
[n
].inst2
);
1211 fprintf(stderr
,"2: TEX ADDR DXDY 0x%08x\n", fp
->inst
[n
].inst3
);
1214 fprintf(stderr
,"\n");