2 * Copyright (C) 2005 Ben Skeggs.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 * Ben Skeggs <darktama@iinet.net.au>
35 * - COS/SIN/SCS/LIT instructions
36 * - Depth write, WPOS/FOGC inputs
38 * - Negate on individual components (implement in swizzle code?)
39 * - Verify results of opcodes for accuracy, I've only checked them
49 #include "program_instruction.h"
50 #include "r300_context.h"
51 #include "r300_fragprog.h"
54 #define PFS_INVAL 0xFFFFFFFF
55 #define COMPILE_STATE struct r300_pfs_compile_state *cs = rp->cs
57 static void dump_program(struct r300_fragment_program
*rp
);
58 static void emit_arith(struct r300_fragment_program
*rp
, int op
,
59 pfs_reg_t dest
, int mask
,
60 pfs_reg_t src0
, pfs_reg_t src1
, pfs_reg_t src2
,
63 /***************************************
64 * begin: useful data structions for fragment program generation
65 ***************************************/
67 /* description of r300 native hw instructions */
74 { "MAD", 3, R300_FPI0_OUTC_MAD
, R300_FPI2_OUTA_MAD
},
75 { "DP3", 2, R300_FPI0_OUTC_DP3
, R300_FPI2_OUTA_DP4
},
76 { "DP4", 2, R300_FPI0_OUTC_DP4
, R300_FPI2_OUTA_DP4
},
77 { "MIN", 2, R300_FPI0_OUTC_MIN
, R300_FPI2_OUTA_MIN
},
78 { "MAX", 2, R300_FPI0_OUTC_MAX
, R300_FPI2_OUTA_MAX
},
79 { "CMP", 3, R300_FPI0_OUTC_CMP
, R300_FPI2_OUTA_CMP
},
80 { "FRC", 1, R300_FPI0_OUTC_FRC
, R300_FPI2_OUTA_FRC
},
81 { "EX2", 1, R300_FPI0_OUTC_REPL_ALPHA
, R300_FPI2_OUTA_EX2
},
82 { "LG2", 1, R300_FPI0_OUTC_REPL_ALPHA
, R300_FPI2_OUTA_LG2
},
83 { "RCP", 1, R300_FPI0_OUTC_REPL_ALPHA
, R300_FPI2_OUTA_RCP
},
84 { "RSQ", 1, R300_FPI0_OUTC_REPL_ALPHA
, R300_FPI2_OUTA_RSQ
},
85 { "REPL_ALPHA", 1, R300_FPI0_OUTC_REPL_ALPHA
, PFS_INVAL
},
86 { "CMPH", 3, R300_FPI0_OUTC_CMPH
, PFS_INVAL
},
89 #define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, \
94 #define SLOT_VECTOR (1<<0)
95 #define SLOT_SCALAR (1<<3)
96 #define SLOT_BOTH (SLOT_VECTOR|SLOT_SCALAR)
98 /* vector swizzles r300 can support natively, with a couple of
99 * cases we handle specially
101 * pfs_reg_t.v_swz/pfs_reg_t.s_swz is an index into this table
103 static const struct r300_pfs_swizzle
{
104 GLuint hash
; /* swizzle value this matches */
105 GLuint base
; /* base value for hw swizzle */
106 GLuint stride
; /* difference in base between arg0/1/2 */
109 /* native swizzles */
110 { MAKE_SWZ3(X
, Y
, Z
), R300_FPI0_ARGC_SRC0C_XYZ
, 4, SLOT_VECTOR
},
111 { MAKE_SWZ3(X
, X
, X
), R300_FPI0_ARGC_SRC0C_XXX
, 4, SLOT_VECTOR
},
112 { MAKE_SWZ3(Y
, Y
, Y
), R300_FPI0_ARGC_SRC0C_YYY
, 4, SLOT_VECTOR
},
113 { MAKE_SWZ3(Z
, Z
, Z
), R300_FPI0_ARGC_SRC0C_ZZZ
, 4, SLOT_VECTOR
},
114 { MAKE_SWZ3(W
, W
, W
), R300_FPI0_ARGC_SRC0A
, 1, SLOT_SCALAR
},
115 { MAKE_SWZ3(Y
, Z
, X
), R300_FPI0_ARGC_SRC0C_YZX
, 1, SLOT_VECTOR
},
116 { MAKE_SWZ3(Z
, X
, Y
), R300_FPI0_ARGC_SRC0C_ZXY
, 1, SLOT_VECTOR
},
117 { MAKE_SWZ3(W
, Z
, Y
), R300_FPI0_ARGC_SRC0CA_WZY
, 1, SLOT_BOTH
},
118 { MAKE_SWZ3(ONE
, ONE
, ONE
), R300_FPI0_ARGC_ONE
, 0, 0},
119 { MAKE_SWZ3(ZERO
, ZERO
, ZERO
), R300_FPI0_ARGC_ZERO
, 0, 0},
120 { PFS_INVAL
, R300_FPI0_ARGC_HALF
, 0, 0},
121 { PFS_INVAL
, 0, 0, 0},
123 #define SWIZZLE_XYZ 0
124 #define SWIZZLE_XXX 1
125 #define SWIZZLE_YYY 2
126 #define SWIZZLE_ZZZ 3
127 #define SWIZZLE_WWW 4
128 #define SWIZZLE_YZX 5
129 #define SWIZZLE_ZXY 6
130 #define SWIZZLE_WZY 7
131 #define SWIZZLE_111 8
132 #define SWIZZLE_000 9
133 #define SWIZZLE_HHH 10
135 #define SWZ_X_MASK (7 << 0)
136 #define SWZ_Y_MASK (7 << 3)
137 #define SWZ_Z_MASK (7 << 6)
138 #define SWZ_W_MASK (7 << 9)
139 /* used during matching of non-native swizzles */
140 static const struct {
141 GLuint hash
; /* used to mask matching swizzle components */
142 int mask
; /* actual outmask */
143 int count
; /* count of components matched */
145 { SWZ_X_MASK
|SWZ_Y_MASK
|SWZ_Z_MASK
, 1|2|4, 3},
146 { SWZ_X_MASK
|SWZ_Y_MASK
, 1|2, 2},
147 { SWZ_X_MASK
|SWZ_Z_MASK
, 1|4, 2},
148 { SWZ_Y_MASK
|SWZ_Z_MASK
, 2|4, 2},
152 { PFS_INVAL
, PFS_INVAL
, PFS_INVAL
}
155 /* mapping from SWIZZLE_* to r300 native values for scalar insns */
156 static const struct {
157 int base
; /* hw value of swizzle */
158 int stride
; /* difference between SRC0/1/2 */
161 { R300_FPI2_ARGA_SRC0C_X
, 3, SLOT_VECTOR
},
162 { R300_FPI2_ARGA_SRC0C_Y
, 3, SLOT_VECTOR
},
163 { R300_FPI2_ARGA_SRC0C_Z
, 3, SLOT_VECTOR
},
164 { R300_FPI2_ARGA_SRC0A
, 1, SLOT_SCALAR
},
165 { R300_FPI2_ARGA_ZERO
, 0, 0 },
166 { R300_FPI2_ARGA_ONE
, 0, 0 },
167 { R300_FPI2_ARGA_HALF
, 0, 0 }
169 #define SWIZZLE_HALF 6
171 /* boiler-plate reg, for convenience */
172 static const pfs_reg_t undef
= {
184 /* constant one source */
185 static const pfs_reg_t pfs_one
= {
186 type
: REG_TYPE_CONST
,
193 /* constant half source */
194 static const pfs_reg_t pfs_half
= {
195 type
: REG_TYPE_CONST
,
202 /* constant zero source */
203 static const pfs_reg_t pfs_zero
= {
204 type
: REG_TYPE_CONST
,
211 /***************************************
212 * end: data structures
213 ***************************************/
215 #define ERROR(fmt, args...) do { \
216 fprintf(stderr, "%s::%s(): " fmt "\n",\
217 __FILE__, __func__, ##args); \
218 rp->error = GL_TRUE; \
221 static int get_hw_temp(struct r300_fragment_program
*rp
)
224 int r
= ffs(~cs
->hwreg_in_use
);
226 ERROR("Out of hardware temps\n");
230 cs
->hwreg_in_use
|= (1 << --r
);
231 if (r
> rp
->max_temp_idx
)
232 rp
->max_temp_idx
= r
;
237 static int get_hw_temp_tex(struct r300_fragment_program
*rp
)
242 r
= ffs(~(cs
->hwreg_in_use
| cs
->used_in_node
));
244 return get_hw_temp(rp
); /* Will cause an indirection */
246 cs
->hwreg_in_use
|= (1 << --r
);
247 if (r
> rp
->max_temp_idx
)
248 rp
->max_temp_idx
= r
;
253 static void free_hw_temp(struct r300_fragment_program
*rp
, int idx
)
256 cs
->hwreg_in_use
&= ~(1<<idx
);
259 static pfs_reg_t
get_temp_reg(struct r300_fragment_program
*rp
)
264 r
.index
= ffs(~cs
->temp_in_use
);
266 ERROR("Out of program temps\n");
269 cs
->temp_in_use
|= (1 << --r
.index
);
271 cs
->temps
[r
.index
].refcount
= 0xFFFFFFFF;
272 cs
->temps
[r
.index
].reg
= -1;
277 static pfs_reg_t
get_temp_reg_tex(struct r300_fragment_program
*rp
)
282 r
.index
= ffs(~cs
->temp_in_use
);
284 ERROR("Out of program temps\n");
287 cs
->temp_in_use
|= (1 << --r
.index
);
289 cs
->temps
[r
.index
].refcount
= 0xFFFFFFFF;
290 cs
->temps
[r
.index
].reg
= get_hw_temp_tex(rp
);
295 static void free_temp(struct r300_fragment_program
*rp
, pfs_reg_t r
)
298 if (!(cs
->temp_in_use
& (1<<r
.index
))) return;
300 if (r
.type
== REG_TYPE_TEMP
) {
301 free_hw_temp(rp
, cs
->temps
[r
.index
].reg
);
302 cs
->temps
[r
.index
].reg
= -1;
303 cs
->temp_in_use
&= ~(1<<r
.index
);
304 } else if (r
.type
== REG_TYPE_INPUT
) {
305 free_hw_temp(rp
, cs
->inputs
[r
.index
].reg
);
306 cs
->inputs
[r
.index
].reg
= -1;
310 static pfs_reg_t
emit_param4fv(struct r300_fragment_program
*rp
,
314 r
.type
= REG_TYPE_CONST
;
317 pidx
= rp
->param_nr
++;
318 r
.index
= rp
->const_nr
++;
319 if (pidx
>= PFS_NUM_CONST_REGS
|| r
.index
>= PFS_NUM_CONST_REGS
) {
320 ERROR("Out of const/param slots!\n");
324 rp
->param
[pidx
].idx
= r
.index
;
325 rp
->param
[pidx
].values
= values
;
326 rp
->params_uptodate
= GL_FALSE
;
332 static pfs_reg_t
emit_const4fv(struct r300_fragment_program
*rp
, GLfloat
*cp
)
335 r
.type
= REG_TYPE_CONST
;
337 r
.index
= rp
->const_nr
++;
338 if (r
.index
>= PFS_NUM_CONST_REGS
) {
339 ERROR("Out of hw constants!\n");
343 COPY_4V(rp
->constant
[r
.index
], cp
);
348 static __inline pfs_reg_t
negate(pfs_reg_t r
)
355 /* Hack, to prevent clobbering sources used multiple times when
356 * emulating non-native instructions
358 static __inline pfs_reg_t
keep(pfs_reg_t r
)
364 static __inline pfs_reg_t
absolute(pfs_reg_t r
)
370 static int swz_native(struct r300_fragment_program
*rp
,
371 pfs_reg_t src
, pfs_reg_t
*r
, GLuint arbneg
)
373 /* Native swizzle, nothing to see here */
374 src
.negate_s
= (arbneg
>> 3) & 1;
376 if ((arbneg
& 0x7) == 0x0) {
379 } else if ((arbneg
& 0x7) == 0x7) {
384 *r
= get_temp_reg(rp
);
386 emit_arith(rp
, PFS_OP_MAD
, *r
, arbneg
& 0x7,
387 keep(src
), pfs_one
, pfs_zero
, 0);
389 emit_arith(rp
, PFS_OP_MAD
, *r
,
390 (arbneg
^ 0x7) | WRITEMASK_W
,
391 src
, pfs_one
, pfs_zero
, 0);
397 static int swz_emit_partial(struct r300_fragment_program
*rp
, pfs_reg_t src
,
398 pfs_reg_t
*r
, int mask
, int mc
, GLuint arbneg
)
404 *r
= get_temp_reg(rp
);
406 /* A partial match, src.v_swz/mask define what parts of the
407 * desired swizzle we match */
408 if (mc
+ s_mask
[mask
].count
== 3) {
410 src
.negate_s
= (arbneg
>> 3) & 1;
413 tmp
= arbneg
& s_mask
[mask
].mask
;
415 tmp
= tmp
^ s_mask
[mask
].mask
;
418 emit_arith(rp
, PFS_OP_MAD
, *r
,
419 arbneg
& s_mask
[mask
].mask
,
420 keep(src
), pfs_one
, pfs_zero
, 0);
422 if (!wmask
) src
.no_use
= GL_TRUE
;
423 else src
.no_use
= GL_FALSE
;
424 emit_arith(rp
, PFS_OP_MAD
, *r
, tmp
| wmask
,
425 src
, pfs_one
, pfs_zero
, 0);
428 if (!wmask
) src
.no_use
= GL_TRUE
;
429 else src
.no_use
= GL_FALSE
;
430 emit_arith(rp
, PFS_OP_MAD
, *r
,
431 (arbneg
& s_mask
[mask
].mask
) | wmask
,
432 src
, pfs_one
, pfs_zero
, 0);
436 if (!wmask
) src
.no_use
= GL_TRUE
;
437 else src
.no_use
= GL_FALSE
;
438 emit_arith(rp
, PFS_OP_MAD
, *r
,
439 s_mask
[mask
].mask
| wmask
,
440 src
, pfs_one
, pfs_zero
, 0);
443 return s_mask
[mask
].count
;
446 #define swizzle(r, x, y, z, w) do_swizzle(rp, r, \
453 static pfs_reg_t
do_swizzle(struct r300_fragment_program
*rp
,
454 pfs_reg_t src
, GLuint arbswz
, GLuint arbneg
)
461 /* If swizzling from something without an XYZW native swizzle,
462 * emit result to a temp, and do new swizzle from the temp.
464 if (src
.v_swz
!= SWIZZLE_XYZ
|| src
.s_swz
!= SWIZZLE_W
) {
465 pfs_reg_t temp
= get_temp_reg(rp
);
466 emit_arith(rp
, PFS_OP_MAD
, temp
, WRITEMASK_XYZW
, src
, pfs_one
,
470 src
.s_swz
= GET_SWZ(arbswz
, 3);
474 #define CUR_HASH (v_swiz[src.v_swz].hash & s_mask[c_mask].hash)
475 if (CUR_HASH
== (arbswz
& s_mask
[c_mask
].hash
)) {
476 if (s_mask
[c_mask
].count
== 3)
477 v_matched
+= swz_native(rp
, src
, &r
,
480 v_matched
+= swz_emit_partial(rp
, src
,
489 /* Fill with something invalid.. all 0's was
490 * wrong before, matched SWIZZLE_X. So all
491 * 1's will be okay for now */
492 arbswz
|= (PFS_INVAL
& s_mask
[c_mask
].hash
);
494 } while(v_swiz
[++src
.v_swz
].hash
!= PFS_INVAL
);
495 src
.v_swz
= SWIZZLE_XYZ
;
496 } while (s_mask
[++c_mask
].hash
!= PFS_INVAL
);
498 ERROR("should NEVER get here\n");
502 static pfs_reg_t
t_src(struct r300_fragment_program
*rp
,
503 struct prog_src_register fpsrc
)
510 switch (fpsrc
.File
) {
511 case PROGRAM_TEMPORARY
:
512 r
.index
= fpsrc
.Index
;
516 r
.index
= fpsrc
.Index
;
517 r
.type
= REG_TYPE_INPUT
;
520 case PROGRAM_LOCAL_PARAM
:
521 r
= emit_param4fv(rp
,
522 rp
->mesa_program
.Base
.LocalParams
[fpsrc
.Index
]);
524 case PROGRAM_ENV_PARAM
:
525 r
= emit_param4fv(rp
,
526 rp
->ctx
->FragmentProgram
.Parameters
[fpsrc
.Index
]);
528 case PROGRAM_STATE_VAR
:
529 case PROGRAM_NAMED_PARAM
:
530 r
= emit_param4fv(rp
,
531 rp
->mesa_program
.Base
.Parameters
->ParameterValues
[fpsrc
.Index
]);
534 ERROR("unknown SrcReg->File %x\n", fpsrc
.File
);
538 /* no point swizzling ONE/ZERO/HALF constants... */
539 if (r
.v_swz
< SWIZZLE_111
|| r
.s_swz
< SWIZZLE_ZERO
)
540 r
= do_swizzle(rp
, r
, fpsrc
.Swizzle
, fpsrc
.NegateBase
);
542 /* WRONG! Need to be able to do individual component negation,
543 * should probably handle this in the swizzling code unless
544 * all components are negated, then we can do this natively */
545 if ((fpsrc
.NegateBase
& 0xf) == 0xf)
548 r
.negate_s
= (fpsrc
.NegateBase
>> 3) & 1;
550 if ((fpsrc
.NegateBase
& 0x7) == 0x0) {
552 } else if ((fpsrc
.NegateBase
& 0x7) == 0x7) {
555 if (r
.type
!= REG_TYPE_TEMP
) {
556 n
= get_temp_reg(rp
);
557 emit_arith(rp
, PFS_OP_MAD
, n
, 0x7 ^ fpsrc
.NegateBase
,
558 keep(r
), pfs_one
, pfs_zero
, 0);
560 emit_arith(rp
, PFS_OP_MAD
, n
,
561 fpsrc
.NegateBase
& 0x7 | WRITEMASK_W
,
562 r
, pfs_one
, pfs_zero
, 0);
567 emit_arith(rp
, PFS_OP_MAD
, r
,
568 fpsrc
.NegateBase
& 0x7 | WRITEMASK_W
,
569 r
, pfs_one
, pfs_zero
, 0);
578 static pfs_reg_t
t_scalar_src(struct r300_fragment_program
*rp
,
579 struct prog_src_register fpsrc
)
581 struct prog_src_register src
= fpsrc
;
582 int sc
= GET_SWZ(fpsrc
.Swizzle
, 0); /* X */
584 src
.Swizzle
= ((sc
<<0)|(sc
<<3)|(sc
<<6)|(sc
<<9));
586 return t_src(rp
, src
);
589 static pfs_reg_t
t_dst(struct r300_fragment_program
*rp
,
590 struct prog_dst_register dest
) {
594 case PROGRAM_TEMPORARY
:
595 r
.index
= dest
.Index
;
599 r
.type
= REG_TYPE_OUTPUT
;
600 switch (dest
.Index
) {
601 case FRAG_RESULT_COLR
:
602 case FRAG_RESULT_DEPR
:
603 r
.index
= dest
.Index
;
607 ERROR("Bad DstReg->Index 0x%x\n", dest
.Index
);
611 ERROR("Bad DstReg->File 0x%x\n", dest
.File
);
616 static int t_hw_src(struct r300_fragment_program
*rp
, pfs_reg_t src
,
624 /* NOTE: if reg==-1 here, a source is being read that
625 * hasn't been written to. Undefined results */
626 if (cs
->temps
[src
.index
].reg
== -1)
627 cs
->temps
[src
.index
].reg
= get_hw_temp(rp
);
628 idx
= cs
->temps
[src
.index
].reg
;
630 if (!src
.no_use
&& (--cs
->temps
[src
.index
].refcount
== 0))
634 idx
= cs
->inputs
[src
.index
].reg
;
636 if (!src
.no_use
&& (--cs
->inputs
[src
.index
].refcount
== 0))
637 free_hw_temp(rp
, cs
->inputs
[src
.index
].reg
);
640 return (src
.index
| SRC_CONST
);
642 ERROR("Invalid type for source reg\n");
643 return (0 | SRC_CONST
);
646 if (!tex
) cs
->used_in_node
|= (1 << idx
);
651 static int t_hw_dst(struct r300_fragment_program
*rp
, pfs_reg_t dest
,
660 if (cs
->temps
[dest
.index
].reg
== -1) {
662 cs
->temps
[dest
.index
].reg
= get_hw_temp(rp
);
664 cs
->temps
[dest
.index
].reg
= get_hw_temp_tex(rp
);
666 idx
= cs
->temps
[dest
.index
].reg
;
668 if (!dest
.no_use
&& (--cs
->temps
[dest
.index
].refcount
== 0))
671 cs
->dest_in_node
|= (1 << idx
);
672 cs
->used_in_node
|= (1 << idx
);
674 case REG_TYPE_OUTPUT
:
675 switch (dest
.index
) {
676 case FRAG_RESULT_COLR
:
677 rp
->node
[rp
->cur_node
].flags
|= R300_PFS_NODE_OUTPUT_COLOR
;
679 case FRAG_RESULT_DEPR
:
680 rp
->node
[rp
->cur_node
].flags
|= R300_PFS_NODE_OUTPUT_DEPTH
;
686 ERROR("invalid dest reg type %d\n", dest
.type
);
693 static void emit_nop(struct r300_fragment_program
*rp
, GLuint mask
,
699 cs
->v_pos
= cs
->s_pos
= MAX2(cs
->v_pos
, cs
->s_pos
);
701 if (mask
& WRITEMASK_XYZ
) {
702 rp
->alu
.inst
[cs
->v_pos
].inst0
= NOP_INST0
;
703 rp
->alu
.inst
[cs
->v_pos
].inst1
= NOP_INST1
;
707 if (mask
& WRITEMASK_W
) {
708 rp
->alu
.inst
[cs
->s_pos
].inst2
= NOP_INST2
;
709 rp
->alu
.inst
[cs
->s_pos
].inst3
= NOP_INST3
;
714 static void emit_tex(struct r300_fragment_program
*rp
,
715 struct prog_instruction
*fpi
,
719 pfs_reg_t coord
= t_src(rp
, fpi
->SrcReg
[0]);
720 pfs_reg_t dest
= undef
, rdest
= undef
;
721 GLuint din
= cs
->dest_in_node
, uin
= cs
->used_in_node
;
722 int unit
= fpi
->TexSrcUnit
;
725 /* Resolve source/dest to hardware registers */
726 hwsrc
= t_hw_src(rp
, coord
, GL_TRUE
);
727 if (opcode
!= R300_FPITX_OP_KIL
) {
728 dest
= t_dst(rp
, fpi
->DstReg
);
730 /* r300 doesn't seem to be able to do TEX->output reg */
731 if (dest
.type
== REG_TYPE_OUTPUT
) {
733 dest
= get_temp_reg_tex(rp
);
735 hwdest
= t_hw_dst(rp
, dest
, GL_TRUE
);
737 /* Use a temp that hasn't been used in this node, rather
738 * than causing an indirection
740 if (uin
& (1 << hwdest
)) {
741 free_hw_temp(rp
, hwdest
);
742 hwdest
= get_hw_temp_tex(rp
);
743 cs
->temps
[dest
.index
].reg
= hwdest
;
750 /* Indirection if source has been written in this node, or if the
751 * dest has been read/written in this node
753 if ((coord
.type
!= REG_TYPE_CONST
&& (din
& (1<<hwsrc
))) ||
754 (uin
& (1<<hwdest
))) {
756 /* Finish off current node */
757 cs
->v_pos
= cs
->s_pos
= MAX2(cs
->v_pos
, cs
->s_pos
);
758 if (rp
->node
[rp
->cur_node
].alu_offset
== cs
->v_pos
) {
759 /* No alu instructions in the node? Emit a NOP. */
760 emit_nop(rp
, WRITEMASK_XYZW
, GL_TRUE
);
761 cs
->v_pos
= cs
->s_pos
= MAX2(cs
->v_pos
, cs
->s_pos
);
764 rp
->node
[rp
->cur_node
].alu_end
=
765 cs
->v_pos
- rp
->node
[rp
->cur_node
].alu_offset
- 1;
766 assert(rp
->node
[rp
->cur_node
].alu_end
>= 0);
768 if (++rp
->cur_node
>= PFS_MAX_TEX_INDIRECT
) {
769 ERROR("too many levels of texture indirection\n");
774 rp
->node
[rp
->cur_node
].tex_offset
= rp
->tex
.length
;
775 rp
->node
[rp
->cur_node
].alu_offset
= cs
->v_pos
;
776 rp
->node
[rp
->cur_node
].tex_end
= -1;
777 rp
->node
[rp
->cur_node
].alu_end
= -1;
778 rp
->node
[rp
->cur_node
].flags
= 0;
779 cs
->used_in_node
= 0;
780 cs
->dest_in_node
= 0;
783 if (rp
->cur_node
== 0)
784 rp
->first_node_has_tex
= 1;
786 rp
->tex
.inst
[rp
->tex
.length
++] = 0
787 | (hwsrc
<< R300_FPITX_SRC_SHIFT
)
788 | (hwdest
<< R300_FPITX_DST_SHIFT
)
789 | (unit
<< R300_FPITX_IMAGE_SHIFT
)
790 /* not entirely sure about this */
791 | (opcode
<< R300_FPITX_OPCODE_SHIFT
);
793 cs
->dest_in_node
|= (1 << hwdest
);
794 if (coord
.type
!= REG_TYPE_CONST
)
795 cs
->used_in_node
|= (1 << hwsrc
);
797 rp
->node
[rp
->cur_node
].tex_end
++;
799 /* Copy from temp to output if needed */
801 emit_arith(rp
, PFS_OP_MAD
, rdest
, WRITEMASK_XYZW
, dest
,
802 pfs_one
, pfs_zero
, 0);
807 /* Add sources to FPI1/FPI3 lists. If source is already on list,
808 * reuse the index instead of wasting a source.
810 static int add_src(struct r300_fragment_program
*rp
, int reg
, int pos
,
816 /* Look for matches */
817 for (i
=0,csm
=srcmask
; i
<3; i
++,csm
=csm
<<1) {
818 /* If sources have been allocated in this position(s)... */
819 if ((cs
->slot
[pos
].umask
& csm
) == csm
) {
820 /* ... and the register number(s) match, re-use the
822 if (srcmask
== SLOT_VECTOR
&&
823 cs
->slot
[pos
].vsrc
[i
] == reg
)
825 if (srcmask
== SLOT_SCALAR
&&
826 cs
->slot
[pos
].ssrc
[i
] == reg
)
828 if (srcmask
== SLOT_BOTH
&&
829 cs
->slot
[pos
].vsrc
[i
] == reg
&&
830 cs
->slot
[pos
].ssrc
[i
] == reg
)
835 /* Look for free spaces */
836 for (i
=0,csm
=srcmask
; i
<3; i
++,csm
=csm
<<1) {
837 /* If the position(s) haven't been allocated */
838 if ((cs
->slot
[pos
].umask
& csm
) == 0) {
839 cs
->slot
[pos
].umask
|= csm
;
841 if (srcmask
& SLOT_VECTOR
)
842 cs
->slot
[pos
].vsrc
[i
] = reg
;
843 if (srcmask
& SLOT_SCALAR
)
844 cs
->slot
[pos
].ssrc
[i
] = reg
;
849 //ERROR("Failed to allocate sources in FPI1/FPI3!\n");
853 /* Determine whether or not to position opcode in the same ALU slot for both
854 * vector and scalar portions of an instruction.
856 * It's not necessary to force the first case, but it makes disassembled
857 * shaders easier to read.
859 static GLboolean
force_same_slot(int vop
, int sop
,
860 GLboolean emit_vop
, GLboolean emit_sop
,
861 int argc
, pfs_reg_t
*src
)
865 if (emit_vop
&& emit_sop
)
868 if (emit_vop
&& vop
== R300_FPI0_OUTC_REPL_ALPHA
)
873 if (src
[i
].v_swz
== SWIZZLE_WZY
)
880 static void emit_arith(struct r300_fragment_program
*rp
, int op
,
881 pfs_reg_t dest
, int mask
,
882 pfs_reg_t src0
, pfs_reg_t src1
, pfs_reg_t src2
,
886 pfs_reg_t src
[3] = { src0
, src1
, src2
};
887 int hwsrc
[3], sswz
[3], vswz
[3];
889 GLboolean emit_vop
= GL_FALSE
, emit_sop
= GL_FALSE
;
894 vop
= r300_fpop
[op
].v_op
;
895 sop
= r300_fpop
[op
].s_op
;
896 argc
= r300_fpop
[op
].argc
;
898 if ((mask
& WRITEMASK_XYZ
) || vop
== R300_FPI0_OUTC_DP3
)
900 if ((mask
& WRITEMASK_W
) || vop
== R300_FPI0_OUTC_REPL_ALPHA
)
903 if (dest
.type
== REG_TYPE_OUTPUT
&& dest
.index
== FRAG_RESULT_DEPR
)
906 if (force_same_slot(vop
, sop
, emit_vop
, emit_sop
, argc
, src
)) {
907 vpos
= spos
= MAX2(cs
->v_pos
, cs
->s_pos
);
911 /* Here is where we'd decide on where a safe place is to
912 * combine this instruction with a previous one.
914 * This is extremely simple for now.. if a source depends
915 * on the opposite stream, force the same instruction.
919 (v_swiz
[src
[i
].v_swz
].flags
& SLOT_SCALAR
)) {
920 vpos
= spos
= MAX2(vpos
, spos
);
924 (s_swiz
[src
[i
].s_swz
].flags
& SLOT_VECTOR
)) {
925 vpos
= spos
= MAX2(vpos
, spos
);
931 /* - Convert src->hwsrc, record for FPI1/FPI3
932 * - Determine ARG parts of FPI0/FPI2, unused args are filled
939 vswz
[i
] = R300_FPI0_ARGC_ZERO
;
940 sswz
[i
] = R300_FPI2_ARGA_ZERO
;
944 hwsrc
[i
] = t_hw_src(rp
, src
[i
], GL_FALSE
);
946 if (emit_vop
&& vop
!= R300_FPI0_OUTC_REPL_ALPHA
) {
947 srcpos
= add_src(rp
, hwsrc
[i
], vpos
,
948 v_swiz
[src
[i
].v_swz
].flags
);
949 vswz
[i
] = (v_swiz
[src
[i
].v_swz
].base
+
950 (srcpos
* v_swiz
[src
[i
].v_swz
].stride
)) |
951 (src
[i
].negate_v
? ARG_NEG
: 0) |
952 (src
[i
].absolute
? ARG_ABS
: 0);
953 } else vswz
[i
] = R300_FPI0_ARGC_ZERO
;
956 srcpos
= add_src(rp
, hwsrc
[i
], spos
,
957 s_swiz
[src
[i
].s_swz
].flags
);
958 sswz
[i
] = (s_swiz
[src
[i
].s_swz
].base
+
959 (srcpos
* s_swiz
[src
[i
].s_swz
].stride
)) |
960 (src
[i
].negate_s
? ARG_NEG
: 0) |
961 (src
[i
].absolute
? ARG_ABS
: 0);
962 } else sswz
[i
] = R300_FPI2_ARGA_ZERO
;
964 hwdest
= t_hw_dst(rp
, dest
, GL_FALSE
);
966 if (flags
& PFS_FLAG_SAT
) {
967 vop
|= R300_FPI0_OUTC_SAT
;
968 sop
|= R300_FPI2_OUTA_SAT
;
971 /* Throw the pieces together and get FPI0/1 */
972 rp
->alu
.inst
[vpos
].inst1
=
973 ((cs
->slot
[vpos
].vsrc
[0] << R300_FPI1_SRC0C_SHIFT
) |
974 (cs
->slot
[vpos
].vsrc
[1] << R300_FPI1_SRC1C_SHIFT
) |
975 (cs
->slot
[vpos
].vsrc
[2] << R300_FPI1_SRC2C_SHIFT
));
977 rp
->alu
.inst
[vpos
].inst0
= vop
|
978 (vswz
[0] << R300_FPI0_ARG0C_SHIFT
) |
979 (vswz
[1] << R300_FPI0_ARG1C_SHIFT
) |
980 (vswz
[2] << R300_FPI0_ARG2C_SHIFT
);
982 rp
->alu
.inst
[vpos
].inst1
|= hwdest
<< R300_FPI1_DSTC_SHIFT
;
983 if (dest
.type
== REG_TYPE_OUTPUT
) {
984 if (dest
.index
== FRAG_RESULT_COLR
) {
985 rp
->alu
.inst
[vpos
].inst1
|=
986 (mask
& WRITEMASK_XYZ
) << R300_FPI1_DSTC_OUTPUT_MASK_SHIFT
;
989 rp
->alu
.inst
[vpos
].inst1
|=
990 (mask
& WRITEMASK_XYZ
) << R300_FPI1_DSTC_REG_MASK_SHIFT
;
993 } else if (spos
>= vpos
)
994 rp
->alu
.inst
[spos
].inst0
= NOP_INST0
;
997 rp
->alu
.inst
[spos
].inst3
=
998 ((cs
->slot
[spos
].ssrc
[0] << R300_FPI3_SRC0A_SHIFT
) |
999 (cs
->slot
[spos
].ssrc
[1] << R300_FPI3_SRC1A_SHIFT
) |
1000 (cs
->slot
[spos
].ssrc
[2] << R300_FPI3_SRC2A_SHIFT
));
1002 rp
->alu
.inst
[spos
].inst2
= sop
|
1003 sswz
[0] << R300_FPI2_ARG0A_SHIFT
|
1004 sswz
[1] << R300_FPI2_ARG1A_SHIFT
|
1005 sswz
[2] << R300_FPI2_ARG2A_SHIFT
;
1007 if (mask
& WRITEMASK_W
) {
1008 if (dest
.type
== REG_TYPE_OUTPUT
) {
1009 if (dest
.index
== FRAG_RESULT_COLR
) {
1010 rp
->alu
.inst
[spos
].inst3
|=
1011 (hwdest
<< R300_FPI3_DSTA_SHIFT
) | R300_FPI3_DSTA_OUTPUT
;
1012 } else if (dest
.index
== FRAG_RESULT_DEPR
) {
1013 rp
->alu
.inst
[spos
].inst3
|= R300_FPI3_DSTA_DEPTH
;
1016 rp
->alu
.inst
[spos
].inst3
|=
1017 (hwdest
<< R300_FPI3_DSTA_SHIFT
) | R300_FPI3_DSTA_REG
;
1021 } else if (vpos
>= spos
)
1022 rp
->alu
.inst
[vpos
].inst2
= NOP_INST2
;
1028 static pfs_reg_t
get_attrib(struct r300_fragment_program
*rp
, GLuint attr
)
1030 struct gl_fragment_program
*mp
= &rp
->mesa_program
;
1031 pfs_reg_t r
= undef
;
1033 if (!(mp
->Base
.InputsRead
& (1<<attr
))) {
1034 ERROR("Attribute %d was not provided!\n", attr
);
1038 r
.type
= REG_TYPE_INPUT
;
1045 static GLboolean
parse_program(struct r300_fragment_program
*rp
)
1047 struct gl_fragment_program
*mp
= &rp
->mesa_program
;
1048 const struct prog_instruction
*inst
= mp
->Base
.Instructions
;
1049 struct prog_instruction
*fpi
;
1050 pfs_reg_t src
[3], dest
, temp
;
1052 int flags
, mask
= 0;
1053 GLfloat cnstv
[4] = {0.0, 0.0, 0.0, 0.0};
1055 if (!inst
|| inst
[0].Opcode
== OPCODE_END
) {
1056 ERROR("empty program?\n");
1060 for (fpi
=mp
->Base
.Instructions
; fpi
->Opcode
!= OPCODE_END
; fpi
++) {
1061 if (fpi
->SaturateMode
== SATURATE_ZERO_ONE
)
1062 flags
= PFS_FLAG_SAT
;
1066 if (fpi
->Opcode
!= OPCODE_KIL
) {
1067 dest
= t_dst(rp
, fpi
->DstReg
);
1068 mask
= fpi
->DstReg
.WriteMask
;
1071 switch (fpi
->Opcode
) {
1073 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1074 emit_arith(rp
, PFS_OP_MAD
, dest
, mask
,
1075 absolute(src
[0]), pfs_one
, pfs_zero
,
1079 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1080 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1081 emit_arith(rp
, PFS_OP_MAD
, dest
, mask
,
1082 src
[0], pfs_one
, src
[1],
1086 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1087 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1088 src
[2] = t_src(rp
, fpi
->SrcReg
[2]);
1089 /* ARB_f_p - if src0.c < 0.0 ? src1.c : src2.c
1090 * r300 - if src2.c < 0.0 ? src1.c : src0.c
1092 emit_arith(rp
, PFS_OP_CMP
, dest
, mask
,
1093 src
[2], src
[1], src
[0],
1097 ERROR("COS not implemented\n");
1100 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1101 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1102 emit_arith(rp
, PFS_OP_DP3
, dest
, mask
,
1103 src
[0], src
[1], undef
,
1107 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1108 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1109 emit_arith(rp
, PFS_OP_DP4
, dest
, mask
,
1110 src
[0], src
[1], undef
,
1114 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1115 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1116 /* src0.xyz1 -> temp
1117 * DP4 dest, temp, src1
1120 temp
= get_temp_reg(rp
);
1121 src
[0].s_swz
= SWIZZLE_ONE
;
1122 emit_arith(rp
, PFS_OP_MAD
, temp
, mask
,
1123 src
[0], pfs_one
, pfs_zero
,
1125 emit_arith(rp
, PFS_OP_DP4
, dest
, mask
,
1126 temp
, src
[1], undef
,
1128 free_temp(rp
, temp
);
1130 emit_arith(rp
, PFS_OP_DP4
, dest
, mask
,
1131 swizzle(src
[0], X
, Y
, Z
, ONE
), src
[1],
1136 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1137 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1138 /* dest.y = src0.y * src1.y */
1139 if (mask
& WRITEMASK_Y
)
1140 emit_arith(rp
, PFS_OP_MAD
, dest
, WRITEMASK_Y
,
1141 keep(src
[0]), keep(src
[1]),
1143 /* dest.z = src0.z */
1144 if (mask
& WRITEMASK_Z
)
1145 emit_arith(rp
, PFS_OP_MAD
, dest
, WRITEMASK_Z
,
1146 src
[0], pfs_one
, pfs_zero
, flags
);
1148 * result.w = src1.w */
1149 if (mask
& WRITEMASK_XW
) {
1150 src
[1].v_swz
= SWIZZLE_111
; /* Cheat.. */
1151 emit_arith(rp
, PFS_OP_MAD
, dest
,
1152 mask
& WRITEMASK_XW
,
1153 src
[1], pfs_one
, pfs_zero
,
1158 src
[0] = t_scalar_src(rp
, fpi
->SrcReg
[0]);
1159 emit_arith(rp
, PFS_OP_EX2
, dest
, mask
,
1160 src
[0], undef
, undef
,
1164 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1165 temp
= get_temp_reg(rp
);
1167 * MAD dest, src0, 1.0, -temp
1169 emit_arith(rp
, PFS_OP_FRC
, temp
, mask
,
1170 keep(src
[0]), undef
, undef
,
1172 emit_arith(rp
, PFS_OP_MAD
, dest
, mask
,
1173 src
[0], pfs_one
, negate(temp
),
1175 free_temp(rp
, temp
);
1178 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1179 emit_arith(rp
, PFS_OP_FRC
, dest
, mask
,
1180 src
[0], undef
, undef
,
1184 emit_tex(rp
, fpi
, R300_FPITX_OP_KIL
);
1187 src
[0] = t_scalar_src(rp
, fpi
->SrcReg
[0]);
1188 emit_arith(rp
, PFS_OP_LG2
, dest
, mask
,
1189 src
[0], undef
, undef
,
1194 * if (s.x < 0) t.x = 0; else t.x = s.x;
1195 * if (s.y < 0) t.y = 0; else t.y = s.y;
1196 * if (s.w > 128.0) t.w = 128.0; else t.w = s.w;
1197 * if (s.w < -128.0) t.w = -128.0; else t.w = s.w;
1199 * if (t.x > 0) r.y = pow(t.y, t.w); else r.y = 0;
1200 * Also r.y = 0 if t.y < 0
1201 * For the t.x > 0 FGLRX use the CMPH opcode which
1202 * change the compare to (t.x + 0.5) > 0.5 we may
1203 * save one instruction by doing CMP -t.x
1205 cnstv
[0] = cnstv
[1] = cnstv
[2] = cnstv
[4] = 0.50001;
1206 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1207 temp
= get_temp_reg(rp
);
1208 cnst
= emit_const4fv(rp
, cnstv
);
1209 emit_arith(rp
, PFS_OP_CMP
, temp
,
1210 WRITEMASK_X
| WRITEMASK_Y
,
1211 src
[0], pfs_zero
, src
[0], flags
);
1212 emit_arith(rp
, PFS_OP_MIN
, temp
, WRITEMASK_Z
,
1213 swizzle(keep(src
[0]), W
, W
, W
, W
),
1214 cnst
, undef
, flags
);
1215 emit_arith(rp
, PFS_OP_LG2
, temp
, WRITEMASK_W
,
1216 swizzle(temp
, Y
, Y
, Y
, Y
),
1217 undef
, undef
, flags
);
1218 emit_arith(rp
, PFS_OP_MAX
, temp
, WRITEMASK_Z
,
1219 temp
, negate(cnst
), undef
, flags
);
1220 emit_arith(rp
, PFS_OP_MAD
, temp
, WRITEMASK_W
,
1221 temp
, swizzle(temp
, Z
, Z
, Z
, Z
),
1223 emit_arith(rp
, PFS_OP_EX2
, temp
, WRITEMASK_W
,
1224 temp
, undef
, undef
, flags
);
1225 emit_arith(rp
, PFS_OP_MAD
, dest
, WRITEMASK_Y
,
1226 swizzle(keep(temp
), X
, X
, X
, X
),
1227 pfs_one
, pfs_zero
, flags
);
1229 emit_arith(rp
, PFS_OP_MAD
, temp
, WRITEMASK_X
,
1230 temp
, pfs_one
, pfs_half
, flags
);
1231 emit_arith(rp
, PFS_OP_CMPH
, temp
, WRITEMASK_Z
,
1232 swizzle(keep(temp
), W
, W
, W
, W
),
1233 pfs_zero
, swizzle(keep(temp
), X
, X
, X
, X
),
1236 emit_arith(rp
, PFS_OP_CMP
, temp
, WRITEMASK_Z
,
1238 swizzle(keep(temp
), W
, W
, W
, W
),
1239 negate(swizzle(keep(temp
), X
, X
, X
, X
)),
1242 emit_arith(rp
, PFS_OP_CMP
, dest
, WRITEMASK_Z
,
1244 negate(swizzle(keep(temp
), Y
, Y
, Y
, Y
)),
1246 emit_arith(rp
, PFS_OP_MAD
, dest
,
1247 WRITEMASK_X
| WRITEMASK_W
,
1252 free_temp(rp
, temp
);
1255 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1256 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1257 src
[2] = t_src(rp
, fpi
->SrcReg
[2]);
1258 /* result = tmp0tmp1 + (1 - tmp0)tmp2
1259 * = tmp0tmp1 + tmp2 + (-tmp0)tmp2
1260 * MAD temp, -tmp0, tmp2, tmp2
1261 * MAD result, tmp0, tmp1, temp
1263 temp
= get_temp_reg(rp
);
1264 emit_arith(rp
, PFS_OP_MAD
, temp
, mask
,
1265 negate(keep(src
[0])), keep(src
[2]), src
[2],
1267 emit_arith(rp
, PFS_OP_MAD
, dest
, mask
,
1268 src
[0], src
[1], temp
,
1270 free_temp(rp
, temp
);
1273 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1274 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1275 src
[2] = t_src(rp
, fpi
->SrcReg
[2]);
1276 emit_arith(rp
, PFS_OP_MAD
, dest
, mask
,
1277 src
[0], src
[1], src
[2],
1281 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1282 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1283 emit_arith(rp
, PFS_OP_MAX
, dest
, mask
,
1284 src
[0], src
[1], undef
,
1288 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1289 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1290 emit_arith(rp
, PFS_OP_MIN
, dest
, mask
,
1291 src
[0], src
[1], undef
,
1296 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1297 emit_arith(rp
, PFS_OP_MAD
, dest
, mask
,
1298 src
[0], pfs_one
, pfs_zero
,
1302 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1303 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1304 emit_arith(rp
, PFS_OP_MAD
, dest
, mask
,
1305 src
[0], src
[1], pfs_zero
,
1309 src
[0] = t_scalar_src(rp
, fpi
->SrcReg
[0]);
1310 src
[1] = t_scalar_src(rp
, fpi
->SrcReg
[1]);
1311 temp
= get_temp_reg(rp
);
1312 emit_arith(rp
, PFS_OP_LG2
, temp
, WRITEMASK_W
,
1313 src
[0], undef
, undef
,
1315 emit_arith(rp
, PFS_OP_MAD
, temp
, WRITEMASK_W
,
1316 temp
, src
[1], pfs_zero
,
1318 emit_arith(rp
, PFS_OP_EX2
, dest
, fpi
->DstReg
.WriteMask
,
1321 free_temp(rp
, temp
);
1324 src
[0] = t_scalar_src(rp
, fpi
->SrcReg
[0]);
1325 emit_arith(rp
, PFS_OP_RCP
, dest
, mask
,
1326 src
[0], undef
, undef
,
1330 src
[0] = t_scalar_src(rp
, fpi
->SrcReg
[0]);
1331 emit_arith(rp
, PFS_OP_RSQ
, dest
, mask
,
1332 absolute(src
[0]), pfs_zero
, pfs_zero
,
1336 ERROR("SCS not implemented\n");
1339 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1340 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1341 temp
= get_temp_reg(rp
);
1342 /* temp = src0 - src1
1343 * dest.c = (temp.c < 0.0) ? 0 : 1
1345 emit_arith(rp
, PFS_OP_MAD
, temp
, mask
,
1346 src
[0], pfs_one
, negate(src
[1]),
1348 emit_arith(rp
, PFS_OP_CMP
, dest
, mask
,
1349 pfs_one
, pfs_zero
, temp
,
1351 free_temp(rp
, temp
);
1354 ERROR("SIN not implemented\n");
1357 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1358 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1359 temp
= get_temp_reg(rp
);
1360 /* temp = src0 - src1
1361 * dest.c = (temp.c < 0.0) ? 1 : 0
1363 emit_arith(rp
, PFS_OP_MAD
, temp
, mask
,
1364 src
[0], pfs_one
, negate(src
[1]),
1366 emit_arith(rp
, PFS_OP_CMP
, dest
, mask
,
1367 pfs_zero
, pfs_one
, temp
,
1369 free_temp(rp
, temp
);
1372 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1373 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1374 emit_arith(rp
, PFS_OP_MAD
, dest
, mask
,
1375 src
[0], pfs_one
, negate(src
[1]),
1379 emit_tex(rp
, fpi
, R300_FPITX_OP_TEX
);
1382 emit_tex(rp
, fpi
, R300_FPITX_OP_TXB
);
1385 emit_tex(rp
, fpi
, R300_FPITX_OP_TXP
);
1388 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1389 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1390 temp
= get_temp_reg(rp
);
1391 /* temp = src0.zxy * src1.yzx */
1392 emit_arith(rp
, PFS_OP_MAD
, temp
, WRITEMASK_XYZ
,
1393 swizzle(keep(src
[0]), Z
, X
, Y
, W
),
1394 swizzle(keep(src
[1]), Y
, Z
, X
, W
),
1397 /* dest.xyz = src0.yzx * src1.zxy - temp
1398 * dest.w = undefined
1400 emit_arith(rp
, PFS_OP_MAD
, dest
, mask
& WRITEMASK_XYZ
,
1401 swizzle(src
[0], Y
, Z
, X
, W
),
1402 swizzle(src
[1], Z
, X
, Y
, W
),
1406 free_temp(rp
, temp
);
1410 ERROR("unknown fpi->Opcode %d\n", fpi
->Opcode
);
1422 /* - Init structures
1423 * - Determine what hwregs each input corresponds to
1425 static void init_program(struct r300_fragment_program
*rp
)
1427 struct r300_pfs_compile_state
*cs
= NULL
;
1428 struct gl_fragment_program
*mp
= &rp
->mesa_program
;
1429 struct prog_instruction
*fpi
;
1430 GLuint InputsRead
= mp
->Base
.InputsRead
;
1431 GLuint temps_used
= 0; /* for rp->temps[] */
1434 /* New compile, reset tracking data */
1435 rp
->translated
= GL_FALSE
;
1436 rp
->error
= GL_FALSE
;
1437 rp
->cs
= cs
= &(R300_CONTEXT(rp
->ctx
)->state
.pfs_compile
);
1440 rp
->first_node_has_tex
= 0;
1443 rp
->params_uptodate
= GL_FALSE
;
1444 rp
->max_temp_idx
= 0;
1445 rp
->node
[0].alu_end
= -1;
1446 rp
->node
[0].tex_end
= -1;
1448 _mesa_memset(cs
, 0, sizeof(*rp
->cs
));
1449 for (i
=0;i
<PFS_MAX_ALU_INST
;i
++) {
1451 cs
->slot
[i
].vsrc
[j
] = SRC_CONST
;
1452 cs
->slot
[i
].ssrc
[j
] = SRC_CONST
;
1456 /* Work out what temps the Mesa inputs correspond to, this must match
1457 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
1458 * configures itself based on the fragprog's InputsRead
1460 * NOTE: this depends on get_hw_temp() allocating registers in order,
1461 * starting from register 0.
1464 /* Texcoords come first */
1465 for (i
=0;i
<rp
->ctx
->Const
.MaxTextureUnits
;i
++) {
1466 if (InputsRead
& (FRAG_BIT_TEX0
<< i
)) {
1467 cs
->inputs
[FRAG_ATTRIB_TEX0
+i
].refcount
= 0;
1468 cs
->inputs
[FRAG_ATTRIB_TEX0
+i
].reg
= get_hw_temp(rp
);
1471 InputsRead
&= ~FRAG_BITS_TEX_ANY
;
1473 /* Then primary colour */
1474 if (InputsRead
& FRAG_BIT_COL0
) {
1475 cs
->inputs
[FRAG_ATTRIB_COL0
].refcount
= 0;
1476 cs
->inputs
[FRAG_ATTRIB_COL0
].reg
= get_hw_temp(rp
);
1478 InputsRead
&= ~FRAG_BIT_COL0
;
1480 /* Secondary color */
1481 if (InputsRead
& FRAG_BIT_COL1
) {
1482 cs
->inputs
[FRAG_ATTRIB_COL1
].refcount
= 0;
1483 cs
->inputs
[FRAG_ATTRIB_COL1
].reg
= get_hw_temp(rp
);
1485 InputsRead
&= ~FRAG_BIT_COL1
;
1489 WARN_ONCE("Don't know how to handle inputs 0x%x\n",
1491 /* force read from hwreg 0 for now */
1493 if (InputsRead
& (1<<i
)) cs
->inputs
[i
].reg
= 0;
1496 /* Pre-parse the mesa program, grabbing refcounts on input/temp regs.
1497 * That way, we can free up the reg when it's no longer needed
1499 if (!mp
->Base
.Instructions
) {
1500 ERROR("No instructions found in program\n");
1504 for (fpi
=mp
->Base
.Instructions
;fpi
->Opcode
!= OPCODE_END
; fpi
++) {
1508 idx
= fpi
->SrcReg
[i
].Index
;
1509 switch (fpi
->SrcReg
[i
].File
) {
1510 case PROGRAM_TEMPORARY
:
1511 if (!(temps_used
& (1<<idx
))) {
1512 cs
->temps
[idx
].reg
= -1;
1513 cs
->temps
[idx
].refcount
= 1;
1514 temps_used
|= (1 << idx
);
1516 cs
->temps
[idx
].refcount
++;
1519 cs
->inputs
[idx
].refcount
++;
1525 idx
= fpi
->DstReg
.Index
;
1526 if (fpi
->DstReg
.File
== PROGRAM_TEMPORARY
) {
1527 if (!(temps_used
& (1<<idx
))) {
1528 cs
->temps
[idx
].reg
= -1;
1529 cs
->temps
[idx
].refcount
= 1;
1530 temps_used
|= (1 << idx
);
1532 cs
->temps
[idx
].refcount
++;
1535 cs
->temp_in_use
= temps_used
;
1538 static void update_params(struct r300_fragment_program
*rp
)
1540 struct gl_fragment_program
*mp
= &rp
->mesa_program
;
1543 /* Ask Mesa nicely to fill in ParameterValues for us */
1545 _mesa_load_state_parameters(rp
->ctx
, mp
->Base
.Parameters
);
1547 for (i
=0;i
<rp
->param_nr
;i
++)
1548 COPY_4V(rp
->constant
[rp
->param
[i
].idx
], rp
->param
[i
].values
);
1550 rp
->params_uptodate
= GL_TRUE
;
1553 void r300_translate_fragment_shader(struct r300_fragment_program
*rp
)
1555 struct r300_pfs_compile_state
*cs
= NULL
;
1557 if (!rp
->translated
) {
1562 if (parse_program(rp
) == GL_FALSE
) {
1568 cs
->v_pos
= cs
->s_pos
= MAX2(cs
->v_pos
, cs
->s_pos
);
1569 rp
->node
[rp
->cur_node
].alu_end
=
1570 cs
->v_pos
- rp
->node
[rp
->cur_node
].alu_offset
- 1;
1571 if (rp
->node
[rp
->cur_node
].tex_end
< 0)
1572 rp
->node
[rp
->cur_node
].tex_end
= 0;
1574 rp
->alu_end
= cs
->v_pos
- 1;
1576 rp
->tex_end
= rp
->tex
.length
? rp
->tex
.length
- 1 : 0;
1577 assert(rp
->node
[rp
->cur_node
].alu_end
>= 0);
1578 assert(rp
->alu_end
>= 0);
1580 rp
->translated
= GL_TRUE
;
1581 if (0) dump_program(rp
);
1587 /* just some random things... */
1588 static void dump_program(struct r300_fragment_program
*rp
)
1593 fprintf(stderr
, "pc=%d*************************************\n", pc
++);
1595 fprintf(stderr
, "Mesa program:\n");
1596 fprintf(stderr
, "-------------\n");
1597 _mesa_print_program(&rp
->mesa_program
.Base
);
1600 fprintf(stderr
, "Hardware program\n");
1601 fprintf(stderr
, "----------------\n");
1603 fprintf(stderr
, "tex:\n");
1605 for(i
=0;i
<rp
->tex
.length
;i
++) {
1606 fprintf(stderr
, "%08x\n", rp
->tex
.inst
[i
]);
1609 for (i
=0;i
<(rp
->cur_node
+1);i
++) {
1610 fprintf(stderr
, "NODE %d: alu_offset: %d, tex_offset: %d, "\
1611 "alu_end: %d, tex_end: %d\n", i
,
1612 rp
->node
[i
].alu_offset
,
1613 rp
->node
[i
].tex_offset
,
1614 rp
->node
[i
].alu_end
,
1615 rp
->node
[i
].tex_end
);
1618 fprintf(stderr
, "%08x\n",
1619 ((rp
->tex_end
<< 16) | (R300_PFS_TEXI_0
>> 2)));
1620 for (i
=0;i
<=rp
->tex_end
;i
++)
1621 fprintf(stderr
, "%08x\n", rp
->tex
.inst
[i
]);
1623 /* dump program in pretty_print_command_stream.tcl-readable format */
1624 fprintf(stderr
, "%08x\n",
1625 ((rp
->alu_end
<< 16) | (R300_PFS_INSTR0_0
>> 2)));
1626 for (i
=0;i
<=rp
->alu_end
;i
++)
1627 fprintf(stderr
, "%08x\n", rp
->alu
.inst
[i
].inst0
);
1629 fprintf(stderr
, "%08x\n",
1630 ((rp
->alu_end
<< 16) | (R300_PFS_INSTR1_0
>> 2)));
1631 for (i
=0;i
<=rp
->alu_end
;i
++)
1632 fprintf(stderr
, "%08x\n", rp
->alu
.inst
[i
].inst1
);
1634 fprintf(stderr
, "%08x\n",
1635 ((rp
->alu_end
<< 16) | (R300_PFS_INSTR2_0
>> 2)));
1636 for (i
=0;i
<=rp
->alu_end
;i
++)
1637 fprintf(stderr
, "%08x\n", rp
->alu
.inst
[i
].inst2
);
1639 fprintf(stderr
, "%08x\n",
1640 ((rp
->alu_end
<< 16) | (R300_PFS_INSTR3_0
>> 2)));
1641 for (i
=0;i
<=rp
->alu_end
;i
++)
1642 fprintf(stderr
, "%08x\n", rp
->alu
.inst
[i
].inst3
);
1644 fprintf(stderr
, "00000000\n");