2 * Copyright (C) 2005 Ben Skeggs.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 * \author Ben Skeggs <darktama@iinet.net.au>
33 * \author Jerome Glisse <j.glisse@gmail.com>
35 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
37 * \todo Depth write, WPOS/FOGC inputs
41 * \todo Verify results of opcodes for accuracy, I've only checked them in
48 #include "shader/prog_instruction.h"
49 #include "shader/prog_parameter.h"
50 #include "shader/prog_print.h"
52 #include "r300_context.h"
53 #include "r500_fragprog.h"
55 #include "r300_state.h"
58 * Useful macros and values
60 #define ERROR(fmt, args...) do { \
61 fprintf(stderr, "%s::%s(): " fmt "\n", \
62 __FILE__, __FUNCTION__, ##args); \
63 fp->error = GL_TRUE; \
66 #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
68 #define R500_US_NUM_TEMP_REGS 128
69 #define R500_US_NUM_CONST_REGS 256
71 /* "Register" flags */
72 #define REG_CONSTANT (1 << 8)
73 #define REG_SRC_REL (1 << 9)
74 #define REG_DEST_REL (1 << 7)
77 #define R500_SWIZZLE_ZERO 4
78 #define R500_SWIZZLE_HALF 5
79 #define R500_SWIZZLE_ONE 6
80 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
81 #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
82 #define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6))
83 /* Swizzles for inst2 */
84 #define MAKE_SWIZ_TEX_STRQ(x) (x << 8)
85 #define MAKE_SWIZ_TEX_RGBA(x) (x << 24)
86 /* Swizzles for inst3 */
87 #define MAKE_SWIZ_RGB_A(x) (x << 2)
88 #define MAKE_SWIZ_RGB_B(x) (x << 15)
89 /* Swizzles for inst4 */
90 #define MAKE_SWIZ_ALPHA_A(x) (x << 14)
91 #define MAKE_SWIZ_ALPHA_B(x) (x << 21)
92 /* Swizzle for inst5 */
93 #define MAKE_SWIZ_RGBA_C(x) (x << 14)
94 #define MAKE_SWIZ_ALPHA_C(x) (x << 27)
96 static void dump_program(struct r500_fragment_program
*fp
);
98 static inline GLuint
make_rgb_swizzle(struct prog_src_register src
) {
101 /* This could be optimized, but it should be plenty fast already. */
103 for (i
= 0; i
< 3; i
++) {
104 temp
= GET_SWZ(src
.Swizzle
, i
);
105 /* Fix SWIZZLE_ONE */
106 if (temp
== 5) temp
++;
112 static inline GLuint
make_alpha_swizzle(struct prog_src_register src
) {
113 GLuint swiz
= GET_SWZ(src
.Swizzle
, 3);
115 if (swiz
== 5) swiz
++;
119 static inline GLuint
make_strq_swizzle(struct prog_src_register src
) {
121 GLuint temp
= src
.Swizzle
;
123 for (i
= 0; i
< 4; i
++) {
124 swiz
+= (temp
& 0x3) << i
*2;
130 static int get_temp(struct r500_fragment_program
*fp
, int slot
) {
136 while (cs
->inputs
[r
].refcount
!= 0) {
141 fp
->temp_reg_offset
= r
- slot
;
143 if (r
>= R500_US_NUM_TEMP_REGS
) {
144 ERROR("Out of hardware temps!\n");
148 if (r
> fp
->max_temp_idx
)
149 fp
->max_temp_idx
= r
;
154 /* Borrowed verbatim from r300_fragprog since it hasn't changed. */
155 static GLuint
emit_const4fv(struct r500_fragment_program
*fp
,
161 for (index
= 0; index
< fp
->const_nr
; ++index
) {
162 if (fp
->constant
[index
] == cp
)
166 if (index
>= fp
->const_nr
) {
167 if (index
>= R500_US_NUM_CONST_REGS
) {
168 ERROR("Out of hw constants!\n");
173 fp
->constant
[index
] = cp
;
176 reg
= index
| REG_CONSTANT
;
180 static GLuint
make_src(struct r500_fragment_program
*fp
, struct prog_src_register src
) {
184 case PROGRAM_TEMPORARY
:
185 reg
= src
.Index
+ fp
->temp_reg_offset
;
188 reg
= cs
->inputs
[src
.Index
].reg
;
190 case PROGRAM_STATE_VAR
:
191 case PROGRAM_NAMED_PARAM
:
192 case PROGRAM_CONSTANT
:
193 reg
= emit_const4fv(fp
, fp
->mesa_program
.Base
.Parameters
->
194 ParameterValues
[src
.Index
]);
197 ERROR("Can't handle src.File %x\n", src
.File
);
204 static GLuint
make_dest(struct r500_fragment_program
*fp
, struct prog_dst_register dest
) {
207 case PROGRAM_TEMPORARY
:
208 reg
= dest
.Index
+ fp
->temp_reg_offset
;
211 /* Eventually we may need to handle multiple
212 * rendering targets... */
216 ERROR("Can't handle dest.File %x\n", dest
.File
);
223 static void emit_tex(struct r500_fragment_program
*fp
,
224 struct prog_instruction
*fpi
, int opcode
, int dest
, int counter
)
229 mask
= fpi
->DstReg
.WriteMask
<< 11;
230 hwsrc
= make_src(fp
, fpi
->SrcReg
[0]);
232 fp
->inst
[counter
].inst0
= R500_INST_TYPE_TEX
| mask
233 | R500_INST_TEX_SEM_WAIT
;
235 fp
->inst
[counter
].inst1
= R500_TEX_ID(fpi
->TexSrcUnit
)
236 | R500_TEX_SEM_ACQUIRE
| R500_TEX_IGNORE_UNCOVERED
;
238 if (fpi
->TexSrcTarget
== TEXTURE_RECT_INDEX
)
239 fp
->inst
[counter
].inst1
|= R500_TEX_UNSCALED
;
243 fp
->inst
[counter
].inst1
|= R500_TEX_INST_TEXKILL
;
246 fp
->inst
[counter
].inst1
|= R500_TEX_INST_LD
;
249 fp
->inst
[counter
].inst1
|= R500_TEX_INST_LODBIAS
;
252 fp
->inst
[counter
].inst1
|= R500_TEX_INST_PROJ
;
255 ERROR("emit_tex can't handle opcode %x\n", opcode
);
258 fp
->inst
[counter
].inst2
= R500_TEX_SRC_ADDR(hwsrc
)
259 /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */
260 | R500_TEX_SRC_S_SWIZ_R
| R500_TEX_SRC_T_SWIZ_G
261 | R500_TEX_SRC_R_SWIZ_B
| R500_TEX_SRC_Q_SWIZ_A
262 | R500_TEX_DST_ADDR(dest
)
263 | R500_TEX_DST_R_SWIZ_R
| R500_TEX_DST_G_SWIZ_G
264 | R500_TEX_DST_B_SWIZ_B
| R500_TEX_DST_A_SWIZ_A
;
268 fp
->inst
[counter
].inst3
= 0x0;
269 fp
->inst
[counter
].inst4
= 0x0;
270 fp
->inst
[counter
].inst5
= 0x0;
273 static void dumb_shader(struct r500_fragment_program
*fp
)
275 fp
->inst
[0].inst0
= R500_INST_TYPE_TEX
276 | R500_INST_TEX_SEM_WAIT
277 | R500_INST_RGB_WMASK_R
278 | R500_INST_RGB_WMASK_G
279 | R500_INST_RGB_WMASK_B
280 | R500_INST_ALPHA_WMASK
281 | R500_INST_RGB_CLAMP
282 | R500_INST_ALPHA_CLAMP
;
283 fp
->inst
[0].inst1
= R500_TEX_ID(0)
285 | R500_TEX_SEM_ACQUIRE
286 | R500_TEX_IGNORE_UNCOVERED
;
287 fp
->inst
[0].inst2
= R500_TEX_SRC_ADDR(0)
288 | R500_TEX_SRC_S_SWIZ_R
289 | R500_TEX_SRC_T_SWIZ_G
290 | R500_TEX_DST_ADDR(0)
291 | R500_TEX_DST_R_SWIZ_R
292 | R500_TEX_DST_G_SWIZ_G
293 | R500_TEX_DST_B_SWIZ_B
294 | R500_TEX_DST_A_SWIZ_A
;
295 fp
->inst
[0].inst3
= R500_DX_ADDR(0)
305 fp
->inst
[0].inst4
= 0x0;
306 fp
->inst
[0].inst5
= 0x0;
308 fp
->inst
[1].inst0
= R500_INST_TYPE_OUT
|
309 R500_INST_TEX_SEM_WAIT
|
311 R500_INST_RGB_OMASK_R
|
312 R500_INST_RGB_OMASK_G
|
313 R500_INST_RGB_OMASK_B
|
314 R500_INST_ALPHA_OMASK
;
315 fp
->inst
[1].inst1
= R500_RGB_ADDR0(0) |
317 R500_RGB_ADDR1_CONST
|
319 R500_RGB_ADDR2_CONST
|
320 R500_RGB_SRCP_OP_1_MINUS_2RGB0
;
321 fp
->inst
[1].inst2
= R500_ALPHA_ADDR0(0) |
322 R500_ALPHA_ADDR1(0) |
323 R500_ALPHA_ADDR1_CONST
|
324 R500_ALPHA_ADDR2(0) |
325 R500_ALPHA_ADDR2_CONST
|
326 R500_ALPHA_SRCP_OP_1_MINUS_2A0
;
327 fp
->inst
[1].inst3
= R500_ALU_RGB_SEL_A_SRC0
|
328 R500_ALU_RGB_R_SWIZ_A_R
|
329 R500_ALU_RGB_G_SWIZ_A_G
|
330 R500_ALU_RGB_B_SWIZ_A_B
|
331 R500_ALU_RGB_SEL_B_SRC0
|
332 R500_ALU_RGB_R_SWIZ_B_1
|
333 R500_ALU_RGB_B_SWIZ_B_1
|
334 R500_ALU_RGB_G_SWIZ_B_1
;
335 fp
->inst
[1].inst4
= R500_ALPHA_OP_MAD
|
336 R500_ALPHA_SWIZ_A_A
|
338 fp
->inst
[1].inst5
= R500_ALU_RGBA_OP_MAD
|
339 R500_ALU_RGBA_R_SWIZ_0
|
340 R500_ALU_RGBA_G_SWIZ_0
|
341 R500_ALU_RGBA_B_SWIZ_0
|
342 R500_ALU_RGBA_A_SWIZ_0
;
345 fp
->translated
= GL_TRUE
;
348 static void emit_alu(struct r500_fragment_program
*fp
, int counter
, struct prog_instruction
*fpi
) {
349 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
) {
350 fp
->inst
[counter
].inst0
= R500_INST_TYPE_OUT
352 | (fpi
->DstReg
.WriteMask
<< 15);
354 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
356 | (fpi
->DstReg
.WriteMask
<< 11);
359 fp
->inst
[counter
].inst0
|= R500_INST_TEX_SEM_WAIT
;
362 static void emit_mov(struct r500_fragment_program
*fp
, int counter
, struct prog_src_register src
, GLuint dest
) {
363 /* The r3xx shader uses MAD to implement MOV. We are using CMP, since
364 * it is technically more accurate and recommended by ATI/AMD. */
365 GLuint src_reg
= make_src(fp
, src
);
366 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src_reg
);
367 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src_reg
);
368 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
369 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(src
))
370 | R500_ALU_RGB_SEL_B_SRC0
371 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(src
))
372 | R500_ALU_RGB_OMOD_DISABLE
;
373 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_CMP
374 | R500_ALPHA_ADDRD(dest
)
375 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(src
))
376 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(src
))
377 | R500_ALPHA_OMOD_DISABLE
;
378 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
379 | R500_ALU_RGBA_ADDRD(dest
)
380 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
381 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
384 static GLboolean
parse_program(struct r500_fragment_program
*fp
)
386 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
387 const struct prog_instruction
*inst
= mp
->Base
.Instructions
;
388 struct prog_instruction
*fpi
;
389 GLuint src
[3], dest
, temp
[2];
390 int flags
, pixel_mask
= 0, output_mask
= 0, counter
= 0;
392 if (!inst
|| inst
[0].Opcode
== OPCODE_END
) {
393 ERROR("The program is empty!\n");
397 for (fpi
= mp
->Base
.Instructions
; fpi
->Opcode
!= OPCODE_END
; fpi
++) {
399 if (fpi
->Opcode
!= OPCODE_KIL
) {
400 dest
= make_dest(fp
, fpi
->DstReg
);
402 pixel_mask
= fpi
->DstReg
.WriteMask
<< 11;
403 output_mask
= fpi
->DstReg
.WriteMask
<< 15;
406 switch (fpi
->Opcode
) {
408 emit_alu(fp
, counter
, fpi
);
409 emit_mov(fp
, counter
, fpi
->SrcReg
[0], dest
);
410 fp
->inst
[counter
].inst3
|= R500_ALU_RGB_MOD_A_ABS
411 | R500_ALU_RGB_MOD_B_ABS
;
412 fp
->inst
[counter
].inst4
|= R500_ALPHA_MOD_A_ABS
413 | R500_ALPHA_MOD_B_ABS
;
416 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
417 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
418 /* Variation on MAD: 1*src0+src1 */
419 emit_alu(fp
, counter
, fpi
);
420 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
421 | R500_RGB_ADDR1(src
[1]) | R500_RGB_ADDR2(0);
422 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
423 | R500_ALPHA_ADDR1(src
[1]) | R500_ALPHA_ADDR2(0);
424 fp
->inst
[counter
].inst3
= /* 1 */
425 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE
)
426 | R500_ALU_RGB_SEL_B_SRC0
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[0]));
427 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
428 | R500_ALPHA_ADDRD(dest
)
429 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
430 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[0]));
431 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
432 | R500_ALU_RGBA_ADDRD(dest
)
433 | R500_ALU_RGBA_SEL_C_SRC1
434 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[1]))
435 | R500_ALU_RGBA_ALPHA_SEL_C_SRC1
436 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[1]));
439 /* This inst's selects need to be swapped as follows:
440 * 0 -> C ; 1 -> B ; 2 -> A */
441 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
442 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
443 src
[2] = make_src(fp
, fpi
->SrcReg
[2]);
444 emit_alu(fp
, counter
, fpi
);
445 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[2])
446 | R500_RGB_ADDR1(src
[1]) | R500_RGB_ADDR2(src
[0]);
447 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[2])
448 | R500_ALPHA_ADDR1(src
[1]) | R500_ALPHA_ADDR2(src
[0]);
449 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
450 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[2]))
451 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
452 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_CMP
453 | R500_ALPHA_ADDRD(dest
)
454 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[2]))
455 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
456 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
457 | R500_ALU_RGBA_ADDRD(dest
)
458 | R500_ALU_RGBA_SEL_C_SRC2
459 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[0]))
460 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
461 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[0]));
464 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
465 emit_alu(fp
, counter
, fpi
);
466 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
467 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
468 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
469 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
470 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_COS
471 | R500_ALPHA_ADDRD(dest
)
472 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
473 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
474 | R500_ALU_RGBA_ADDRD(dest
);
477 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
478 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
479 emit_alu(fp
, counter
, fpi
);
480 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
481 | R500_RGB_ADDR1(src
[1]);
482 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
483 | R500_ALPHA_ADDR1(src
[1]);
484 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
485 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
486 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
487 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_DP
488 | R500_ALPHA_ADDRD(dest
)
489 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
490 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
491 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_DP3
492 | R500_ALU_RGBA_ADDRD(dest
);
495 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
496 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
498 emit_alu(fp
, counter
, fpi
);
499 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
500 | R500_RGB_ADDR1(src
[1]);
501 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
502 | R500_ALPHA_ADDR1(src
[1]);
503 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
504 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
505 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
506 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_DP
507 | R500_ALPHA_ADDRD(dest
)
508 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
509 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
510 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_DP4
511 | R500_ALU_RGBA_ADDRD(dest
);
514 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
515 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
517 emit_alu(fp
, counter
, fpi
);
518 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
519 | R500_RGB_ADDR1(src
[1]);
520 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
521 | R500_ALPHA_ADDR1(src
[1]);
522 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
523 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
524 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
525 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_DP
526 | R500_ALPHA_ADDRD(dest
)
527 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
528 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
529 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_DP4
530 | R500_ALU_RGBA_ADDRD(dest
);
533 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
534 emit_alu(fp
, counter
, fpi
);
535 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
536 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
537 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
538 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
539 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_EX2
540 | R500_ALPHA_ADDRD(dest
)
541 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
542 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
543 | R500_ALU_RGBA_ADDRD(dest
);
546 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
547 emit_alu(fp
, counter
, fpi
);
548 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
549 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
550 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
551 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
552 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_FRC
553 | R500_ALPHA_ADDRD(dest
)
554 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
555 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_FRC
556 | R500_ALU_RGBA_ADDRD(dest
);
559 emit_tex(fp
, fpi
, OPCODE_KIL
, dest
, counter
);
562 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
563 emit_alu(fp
, counter
, fpi
);
564 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
565 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
566 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
567 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
568 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_LN2
569 | R500_ALPHA_ADDRD(dest
)
570 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
571 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
572 | R500_ALU_RGBA_ADDRD(dest
);
575 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
576 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
577 src
[2] = make_src(fp
, fpi
->SrcReg
[2]);
578 emit_alu(fp
, counter
, fpi
);
579 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
580 | R500_RGB_ADDR1(src
[1]) | R500_RGB_ADDR2(src
[2]);
581 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
582 | R500_ALPHA_ADDR1(src
[1]) | R500_ALPHA_ADDR2(src
[2]);
583 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
584 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
585 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
586 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
587 | R500_ALPHA_ADDRD(dest
)
588 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
589 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
590 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
591 | R500_ALU_RGBA_ADDRD(dest
)
592 | R500_ALU_RGBA_SEL_C_SRC2
593 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[2]))
594 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
595 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[2]));
598 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
599 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
600 emit_alu(fp
, counter
, fpi
);
601 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]) | R500_RGB_ADDR1(src
[1]);
602 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]) | R500_ALPHA_ADDR1(src
[1]);
603 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
604 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
605 | R500_ALU_RGB_SEL_B_SRC1
606 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
607 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAX
608 | R500_ALPHA_ADDRD(dest
)
609 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
610 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
611 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAX
612 | R500_ALU_RGBA_ADDRD(dest
);
615 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
616 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
617 emit_alu(fp
, counter
, fpi
);
618 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]) | R500_RGB_ADDR1(src
[1]);
619 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]) | R500_ALPHA_ADDR1(src
[1]);
620 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
621 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
622 | R500_ALU_RGB_SEL_B_SRC1
623 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
624 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MIN
625 | R500_ALPHA_ADDRD(dest
)
626 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
627 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
628 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MIN
629 | R500_ALU_RGBA_ADDRD(dest
);
632 emit_alu(fp
, counter
, fpi
);
633 emit_mov(fp
, counter
, fpi
->SrcReg
[0], dest
);
636 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
637 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
638 /* Variation on MAD: src0*src1+0 */
639 emit_alu(fp
, counter
, fpi
);
640 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
641 | R500_RGB_ADDR1(src
[1]);
642 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
643 | R500_ALPHA_ADDR1(src
[1]);
644 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
645 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
646 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
647 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
648 | R500_ALPHA_ADDRD(dest
)
649 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
650 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
651 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
652 | R500_ALU_RGBA_ADDRD(dest
)
653 // | R500_ALU_RGBA_SEL_C_SRC2
654 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
655 // | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
656 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
659 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
660 emit_alu(fp
, counter
, fpi
);
661 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
662 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
663 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
664 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
665 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_RCP
666 | R500_ALPHA_ADDRD(dest
)
667 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
668 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
669 | R500_ALU_RGBA_ADDRD(dest
);
672 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
673 emit_alu(fp
, counter
, fpi
);
674 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
675 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
676 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
677 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
678 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_RSQ
679 | R500_ALPHA_ADDRD(dest
)
680 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
681 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
682 | R500_ALU_RGBA_ADDRD(dest
);
685 /* TODO: Make this elegant! */
686 /* Do a cosine, then a sine, masking out the channels we want to protect. */
687 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
688 /* Cosine only goes in R (x) channel. */
689 fpi
->DstReg
.WriteMask
= 0x1;
690 emit_alu(fp
, counter
, fpi
);
691 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
) {
692 fp
->inst
[counter
].inst0
= R500_INST_TYPE_OUT
693 | R500_INST_TEX_SEM_WAIT
| 0x1 << 14;
695 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
696 | R500_INST_TEX_SEM_WAIT
| 0x1 << 11;
698 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
699 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
700 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
701 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
702 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_COS
703 | R500_ALPHA_ADDRD(dest
)
704 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
705 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
706 | R500_ALU_RGBA_ADDRD(dest
);
708 /* Sine only goes in G (y) channel. */
709 fpi
->DstReg
.WriteMask
= 0x2;
710 emit_alu(fp
, counter
, fpi
);
711 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
712 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
713 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
714 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
715 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_SIN
716 | R500_ALPHA_ADDRD(dest
)
717 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
718 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
719 | R500_ALU_RGBA_ADDRD(dest
);
721 /* Put 0 into B,A (z,w) channels. */
722 fpi
->DstReg
.WriteMask
= 0xC;
723 emit_alu(fp
, counter
, fpi
);
724 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
725 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
726 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
727 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO
)
728 | R500_ALU_RGB_SEL_B_SRC0
729 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO
);
730 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_CMP
731 | R500_ALPHA_ADDRD(dest
)
732 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO
)
733 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO
);
734 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_CMP
735 | R500_ALU_RGBA_ADDRD(dest
)
736 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
737 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
740 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
741 emit_alu(fp
, counter
, fpi
);
742 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
743 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
744 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
745 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]));
746 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_SIN
747 | R500_ALPHA_ADDRD(dest
)
748 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]));
749 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_SOP
750 | R500_ALU_RGBA_ADDRD(dest
);
753 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
754 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
755 /* Variation on MAD: 1*src0-src1 */
756 emit_alu(fp
, counter
, fpi
);
757 fp
->inst
[counter
].inst1
= R500_RGB_ADDR1(src
[0])
758 | R500_RGB_ADDR2(src
[1]);
759 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR1(src
[0])
760 | R500_ALPHA_ADDR2(src
[1]);
761 fp
->inst
[counter
].inst3
= /* 1 */
762 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE
)
763 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[0]));
764 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
765 | R500_ALPHA_ADDRD(dest
)
766 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
767 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[0]));
768 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
769 | R500_ALU_RGBA_ADDRD(dest
)
770 | R500_ALU_RGBA_SEL_C_SRC2
771 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[1]))
772 | R500_ALU_RGBA_MOD_C_NEG
773 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
774 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[1]))
775 | R500_ALU_RGBA_ALPHA_MOD_C_NEG
;
778 /* TODO: Negation masks! */
779 emit_alu(fp
, counter
, fpi
);
780 emit_mov(fp
, counter
, fpi
->SrcReg
[0], dest
);
783 emit_tex(fp
, fpi
, OPCODE_TEX
, dest
, counter
);
786 emit_tex(fp
, fpi
, OPCODE_TXB
, dest
, counter
);
789 emit_tex(fp
, fpi
, OPCODE_TXP
, dest
, counter
);
792 ERROR("unknown fpi->Opcode %d\n", fpi
->Opcode
);
796 /* Finishing touches */
797 if (fpi
->SaturateMode
== SATURATE_ZERO_ONE
) {
798 fp
->inst
[counter
].inst0
|= R500_INST_RGB_CLAMP
| R500_INST_ALPHA_CLAMP
;
808 /* Finish him! (If it's an ALU/OUT instruction...) */
809 if ((fp
->inst
[counter
-1].inst0
& 0x3) == 1) {
810 fp
->inst
[counter
-1].inst0
|= R500_INST_LAST
;
812 /* We still need to put an output inst, right? */
813 WARN_ONCE("Final FP instruction is not an OUT.\n");
815 fp
->inst
[counter
].inst0
= R500_INST_TYPE_OUT
816 | R500_INST_TEX_SEM_WAIT
| R500_INST_LAST
|
818 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(dest
);
819 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(dest
);
820 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
821 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
)
822 | R500_ALU_RGB_SEL_B_SRC0
823 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE
);
824 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
825 | R500_ALPHA_ADDRD(0)
826 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SEL_B_SRC0
827 | R500_ALPHA_SWIZ_A_A
| R500_ALPHA_SWIZ_B_1
;
828 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
829 | R500_ALU_RGBA_ADDRD(0)
830 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
831 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
836 fp
->cs
->nrslots
= counter
;
843 static void init_program(r300ContextPtr r300
, struct r500_fragment_program
*fp
)
845 struct r300_pfs_compile_state
*cs
= NULL
;
846 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
847 struct prog_instruction
*fpi
;
848 GLuint InputsRead
= mp
->Base
.InputsRead
;
849 GLuint temps_used
= 0; /* for fp->temps[] */
852 /* New compile, reset tracking data */
854 driQueryOptioni(&r300
->radeon
.optionCache
, "fp_optimization");
855 fp
->translated
= GL_FALSE
;
856 fp
->error
= GL_FALSE
;
857 fp
->cs
= cs
= &(R300_CONTEXT(fp
->ctx
)->state
.pfs_compile
);
859 fp
->first_node_has_tex
= 0;
861 /* Size of pixel stack, plus 1. */
862 fp
->max_temp_idx
= 1;
863 /* Temp register offset. */
864 fp
->temp_reg_offset
= 0;
865 fp
->node
[0].alu_end
= -1;
866 fp
->node
[0].tex_end
= -1;
868 _mesa_memset(cs
, 0, sizeof(*fp
->cs
));
869 for (i
= 0; i
< PFS_MAX_ALU_INST
; i
++) {
870 for (j
= 0; j
< 3; j
++) {
871 cs
->slot
[i
].vsrc
[j
] = SRC_CONST
;
872 cs
->slot
[i
].ssrc
[j
] = SRC_CONST
;
876 /* Work out what temps the Mesa inputs correspond to, this must match
877 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
878 * configures itself based on the fragprog's InputsRead
880 * NOTE: this depends on get_hw_temp() allocating registers in order,
881 * starting from register 0, so we're just going to do that instead.
884 /* Texcoords come first */
885 for (i
= 0; i
< fp
->ctx
->Const
.MaxTextureUnits
; i
++) {
886 if (InputsRead
& (FRAG_BIT_TEX0
<< i
)) {
887 cs
->inputs
[FRAG_ATTRIB_TEX0
+ i
].refcount
= 0;
888 cs
->inputs
[FRAG_ATTRIB_TEX0
+ i
].reg
=
890 fp
->temp_reg_offset
++;
893 InputsRead
&= ~FRAG_BITS_TEX_ANY
;
895 /* fragment position treated as a texcoord */
896 if (InputsRead
& FRAG_BIT_WPOS
) {
897 cs
->inputs
[FRAG_ATTRIB_WPOS
].refcount
= 0;
898 cs
->inputs
[FRAG_ATTRIB_WPOS
].reg
=
900 fp
->temp_reg_offset
++;
902 InputsRead
&= ~FRAG_BIT_WPOS
;
904 /* Then primary colour */
905 if (InputsRead
& FRAG_BIT_COL0
) {
906 cs
->inputs
[FRAG_ATTRIB_COL0
].refcount
= 0;
907 cs
->inputs
[FRAG_ATTRIB_COL0
].reg
=
909 fp
->temp_reg_offset
++;
911 InputsRead
&= ~FRAG_BIT_COL0
;
913 /* Secondary color */
914 if (InputsRead
& FRAG_BIT_COL1
) {
915 cs
->inputs
[FRAG_ATTRIB_COL1
].refcount
= 0;
916 cs
->inputs
[FRAG_ATTRIB_COL1
].reg
=
918 fp
->temp_reg_offset
++;
920 InputsRead
&= ~FRAG_BIT_COL1
;
924 WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead
);
925 /* force read from hwreg 0 for now */
926 for (i
= 0; i
< 32; i
++)
927 if (InputsRead
& (1 << i
))
928 cs
->inputs
[i
].reg
= 0;
931 /* Pre-parse the mesa program, grabbing refcounts on input/temp regs.
932 * That way, we can free up the reg when it's no longer needed
934 if (!mp
->Base
.Instructions
) {
935 ERROR("No instructions found in program, going to go die now.\n");
940 for (fpi
= mp
->Base
.Instructions
; fpi
->Opcode
!= OPCODE_END
; fpi
++) {
942 for (i
= 0; i
< 3; i
++) {
943 idx
= fpi
->SrcReg
[i
].Index
;
944 if (fpi
->SrcReg
[i
].File
== PROGRAM_INPUT
) {
945 cs
->inputs
[idx
].refcount
++;
946 if (fp
->max_temp_idx
< idx
)
947 fp
->max_temp_idx
= idx
;
953 fp
->max_temp_idx
= fp
->temp_reg_offset
+ 1;
955 cs
->temp_in_use
= temps_used
;
958 static void update_params(struct r500_fragment_program
*fp
)
960 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
962 /* Ask Mesa nicely to fill in ParameterValues for us */
963 if (mp
->Base
.Parameters
)
964 _mesa_load_state_parameters(fp
->ctx
, mp
->Base
.Parameters
);
967 void r500TranslateFragmentShader(r300ContextPtr r300
,
968 struct r500_fragment_program
*fp
)
971 struct r300_pfs_compile_state
*cs
= NULL
;
973 if (!fp
->translated
) {
975 /* I need to see what I'm working with! */
976 fprintf(stderr
, "Mesa program:\n");
977 fprintf(stderr
, "-------------\n");
978 _mesa_print_program(&fp
->mesa_program
.Base
);
981 init_program(r300
, fp
);
984 if (parse_program(fp
) == GL_FALSE
) {
985 ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n");
988 fp
->inst_end
= cs
->nrslots
- 1;
992 fp
->inst_end
= cs
->nrslots
- 1;
994 fp
->translated
= GL_TRUE
;
995 if (RADEON_DEBUG
& DEBUG_PIXEL
)
998 r300UpdateStateParameters(fp
->ctx
, _NEW_PROGRAM
);
1005 static char *toswiz(int swiz_val
) {
1012 case 5: return "1/2";
1019 static char *toop(int op_val
)
1023 case 0: str
= "MAD"; break;
1024 case 1: str
= "DP3"; break;
1025 case 2: str
= "DP4"; break;
1026 case 3: str
= "D2A"; break;
1027 case 4: str
= "MIN"; break;
1028 case 5: str
= "MAX"; break;
1029 case 6: str
= "Reserved"; break;
1030 case 7: str
= "CND"; break;
1031 case 8: str
= "CMP"; break;
1032 case 9: str
= "FRC"; break;
1033 case 10: str
= "SOP"; break;
1034 case 11: str
= "MDH"; break;
1035 case 12: str
= "MDV"; break;
1040 static char *to_alpha_op(int op_val
)
1044 case 0: str
= "MAD"; break;
1045 case 1: str
= "DP"; break;
1046 case 2: str
= "MIN"; break;
1047 case 3: str
= "MAX"; break;
1048 case 4: str
= "Reserved"; break;
1049 case 5: str
= "CND"; break;
1050 case 6: str
= "CMP"; break;
1051 case 7: str
= "FRC"; break;
1052 case 8: str
= "EX2"; break;
1053 case 9: str
= "LN2"; break;
1054 case 10: str
= "RCP"; break;
1055 case 11: str
= "RSQ"; break;
1056 case 12: str
= "SIN"; break;
1057 case 13: str
= "COS"; break;
1058 case 14: str
= "MDH"; break;
1059 case 15: str
= "MDV"; break;
1064 static char *to_mask(int val
)
1068 case 0: str
= "NONE"; break;
1069 case 1: str
= "R"; break;
1070 case 2: str
= "G"; break;
1071 case 3: str
= "RG"; break;
1072 case 4: str
= "B"; break;
1073 case 5: str
= "RB"; break;
1074 case 6: str
= "GB"; break;
1075 case 7: str
= "RGB"; break;
1076 case 8: str
= "A"; break;
1077 case 9: str
= "AR"; break;
1078 case 10: str
= "AG"; break;
1079 case 11: str
= "ARG"; break;
1080 case 12: str
= "AB"; break;
1081 case 13: str
= "ARB"; break;
1082 case 14: str
= "AGB"; break;
1083 case 15: str
= "ARGB"; break;
1088 static void dump_program(struct r500_fragment_program
*fp
)
1096 for (n
= 0; n
< fp
->inst_end
+1; n
++) {
1097 inst0
= inst
= fp
->inst
[n
].inst0
;
1098 fprintf(stderr
,"%d\t0:CMN_INST 0x%08x:", n
, inst
);
1099 switch(inst
& 0x3) {
1100 case R500_INST_TYPE_ALU
: str
= "ALU"; break;
1101 case R500_INST_TYPE_OUT
: str
= "OUT"; break;
1102 case R500_INST_TYPE_FC
: str
= "FC"; break;
1103 case R500_INST_TYPE_TEX
: str
= "TEX"; break;
1105 fprintf(stderr
,"%s %s %s %s %s ", str
,
1106 inst
& R500_INST_TEX_SEM_WAIT
? "TEX_WAIT" : "",
1107 inst
& R500_INST_LAST
? "LAST" : "",
1108 inst
& R500_INST_NOP
? "NOP" : "",
1109 inst
& R500_INST_ALU_WAIT
? "ALU WAIT" : "");
1110 fprintf(stderr
,"wmask: %s omask: %s\n", to_mask((inst
>> 11) & 0xf),
1111 to_mask((inst
>> 15) & 0xf));
1113 switch(inst0
& 0x3) {
1116 fprintf(stderr
,"\t1:RGB_ADDR 0x%08x:", fp
->inst
[n
].inst1
);
1117 inst
= fp
->inst
[n
].inst1
;
1119 fprintf(stderr
,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1120 inst
& 0xff, (inst
& (1<<8)) ? 'c' : 't',
1121 (inst
>> 10) & 0xff, (inst
& (1<<18)) ? 'c' : 't',
1122 (inst
>> 20) & 0xff, (inst
& (1<<28)) ? 'c' : 't',
1125 fprintf(stderr
,"\t2:ALPHA_ADDR 0x%08x:", fp
->inst
[n
].inst2
);
1126 inst
= fp
->inst
[n
].inst2
;
1127 fprintf(stderr
,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1128 inst
& 0xff, (inst
& (1<<8)) ? 'c' : 't',
1129 (inst
>> 10) & 0xff, (inst
& (1<<18)) ? 'c' : 't',
1130 (inst
>> 20) & 0xff, (inst
& (1<<28)) ? 'c' : 't',
1132 fprintf(stderr
,"\t3 RGB_INST: 0x%08x:", fp
->inst
[n
].inst3
);
1133 inst
= fp
->inst
[n
].inst3
;
1134 fprintf(stderr
,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n",
1135 (inst
) & 0x3, toswiz((inst
>> 2) & 0x7), toswiz((inst
>> 5) & 0x7), toswiz((inst
>> 8) & 0x7),
1137 (inst
>> 13) & 0x3, toswiz((inst
>> 15) & 0x7), toswiz((inst
>> 18) & 0x7), toswiz((inst
>> 21) & 0x7),
1138 (inst
>> 24) & 0x3);
1141 fprintf(stderr
,"\t4 ALPHA_INST:0x%08x:", fp
->inst
[n
].inst4
);
1142 inst
= fp
->inst
[n
].inst4
;
1143 fprintf(stderr
,"%s dest:%d%s alp_A_src:%d %s %d alp_b_src:%d %s %d\n", to_alpha_op(inst
& 0xf),
1144 (inst
>> 4) & 0x7f, inst
& (1<<11) ? "(rel)":"",
1145 (inst
>> 12) & 0x3, toswiz((inst
>> 14) & 0x7), (inst
>> 17) & 0x3,
1146 (inst
>> 19) & 0x3, toswiz((inst
>> 21) & 0x7), (inst
>> 24) & 0x3);
1148 fprintf(stderr
,"\t5 RGBA_INST: 0x%08x:", fp
->inst
[n
].inst5
);
1149 inst
= fp
->inst
[n
].inst5
;
1150 fprintf(stderr
,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst
& 0xf),
1151 (inst
>> 4) & 0x7f, inst
& (1<<11) ? "(rel)":"",
1152 (inst
>> 12) & 0x3, toswiz((inst
>> 14) & 0x7), toswiz((inst
>> 17) & 0x7), toswiz((inst
>> 20) & 0x7),
1154 (inst
>> 25) & 0x3, toswiz((inst
>> 27) & 0x7), (inst
>> 30) & 0x3);
1159 fprintf(stderr
,"1: TEX INST 0x%08x\n", fp
->inst
[n
].inst1
);
1160 fprintf(stderr
,"2: TEX ADDR 0x%08x\n", fp
->inst
[n
].inst2
);
1161 fprintf(stderr
,"2: TEX ADDR DXDY 0x%08x\n", fp
->inst
[n
].inst3
);
1164 fprintf(stderr
,"\n");