2 * Copyright (C) 2005 Ben Skeggs.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 * Ben Skeggs <darktama@iinet.net.au>
31 * Jerome Glisse <j.glisse@gmail.com>
36 * - COS/SIN/SCS instructions
37 * - Depth write, WPOS/FOGC inputs
39 * - Verify results of opcodes for accuracy, I've only checked them
49 #include "program_instruction.h"
50 #include "r300_context.h"
51 #include "r300_fragprog.h"
55 * Usefull macros and values
57 #define ERROR(fmt, args...) do { \
58 fprintf(stderr, "%s::%s(): " fmt "\n", \
59 __FILE__, __func__, ##args); \
60 rp->error = GL_TRUE; \
63 #define PFS_INVAL 0xFFFFFFFF
64 #define COMPILE_STATE struct r300_pfs_compile_state *cs = rp->cs
76 #define SWIZZLE_HHH 10
78 #define swizzle(r, x, y, z, w) do_swizzle(rp, r, \
85 #define REG_TYPE_INPUT 0
86 #define REG_TYPE_OUTPUT 1
87 #define REG_TYPE_TEMP 2
88 #define REG_TYPE_CONST 3
90 #define REG_TYPE_SHIFT 0
91 #define REG_INDEX_SHIFT 2
92 #define REG_VSWZ_SHIFT 8
93 #define REG_SSWZ_SHIFT 13
94 #define REG_NEGV_SHIFT 18
95 #define REG_NEGS_SHIFT 19
96 #define REG_ABS_SHIFT 20
97 #define REG_NO_USE_SHIFT 21
98 #define REG_VALID_SHIFT 22
100 #define REG_TYPE_MASK (0x03 << REG_TYPE_SHIFT)
101 #define REG_INDEX_MASK (0x3F << REG_INDEX_SHIFT)
102 #define REG_VSWZ_MASK (0x1F << REG_VSWZ_SHIFT)
103 #define REG_SSWZ_MASK (0x1F << REG_SSWZ_SHIFT)
104 #define REG_NEGV_MASK (0x01 << REG_NEGV_SHIFT)
105 #define REG_NEGS_MASK (0x01 << REG_NEGS_SHIFT)
106 #define REG_ABS_MASK (0x01 << REG_ABS_SHIFT)
107 #define REG_NO_USE_MASK (0x01 << REG_NO_USE_SHIFT)
108 #define REG_VALID_MASK (0x01 << REG_VALID_SHIFT)
110 #define REG(type, index, vswz, sswz, nouse, valid) \
111 (((type << REG_TYPE_SHIFT) & REG_TYPE_MASK) | \
112 ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK) | \
113 ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK) | \
114 ((valid << REG_VALID_SHIFT) & REG_VALID_MASK) | \
115 ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK) | \
116 ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK))
117 #define REG_GET_TYPE(reg) \
118 ((reg & REG_TYPE_MASK) >> REG_TYPE_SHIFT)
119 #define REG_GET_INDEX(reg) \
120 ((reg & REG_INDEX_MASK) >> REG_INDEX_SHIFT)
121 #define REG_GET_VSWZ(reg) \
122 ((reg & REG_VSWZ_MASK) >> REG_VSWZ_SHIFT)
123 #define REG_GET_SSWZ(reg) \
124 ((reg & REG_SSWZ_MASK) >> REG_SSWZ_SHIFT)
125 #define REG_GET_NO_USE(reg) \
126 ((reg & REG_NO_USE_MASK) >> REG_NO_USE_SHIFT)
127 #define REG_GET_VALID(reg) \
128 ((reg & REG_VALID_MASK) >> REG_VALID_SHIFT)
129 #define REG_SET_TYPE(reg, type) \
130 reg = ((reg & ~REG_TYPE_MASK) | \
131 ((type << REG_TYPE_SHIFT) & REG_TYPE_MASK))
132 #define REG_SET_INDEX(reg, index) \
133 reg = ((reg & ~REG_INDEX_MASK) | \
134 ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK))
135 #define REG_SET_VSWZ(reg, vswz) \
136 reg = ((reg & ~REG_VSWZ_MASK) | \
137 ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK))
138 #define REG_SET_SSWZ(reg, sswz) \
139 reg = ((reg & ~REG_SSWZ_MASK) | \
140 ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK))
141 #define REG_SET_NO_USE(reg, nouse) \
142 reg = ((reg & ~REG_NO_USE_MASK) | \
143 ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK))
144 #define REG_SET_VALID(reg, valid) \
145 reg = ((reg & ~REG_VALID_MASK) | \
146 ((valid << REG_VALID_SHIFT) & REG_VALID_MASK))
147 #define REG_ABS(reg) \
148 reg = (reg | REG_ABS_MASK)
149 #define REG_NEGV(reg) \
150 reg = (reg | REG_NEGV_MASK)
151 #define REG_NEGS(reg) \
152 reg = (reg | REG_NEGS_MASK)
156 * Datas structures for fragment program generation
159 /* description of r300 native hw instructions */
160 static const struct {
166 { "MAD", 3, R300_FPI0_OUTC_MAD
, R300_FPI2_OUTA_MAD
},
167 { "DP3", 2, R300_FPI0_OUTC_DP3
, R300_FPI2_OUTA_DP4
},
168 { "DP4", 2, R300_FPI0_OUTC_DP4
, R300_FPI2_OUTA_DP4
},
169 { "MIN", 2, R300_FPI0_OUTC_MIN
, R300_FPI2_OUTA_MIN
},
170 { "MAX", 2, R300_FPI0_OUTC_MAX
, R300_FPI2_OUTA_MAX
},
171 { "CMP", 3, R300_FPI0_OUTC_CMP
, R300_FPI2_OUTA_CMP
},
172 { "FRC", 1, R300_FPI0_OUTC_FRC
, R300_FPI2_OUTA_FRC
},
173 { "EX2", 1, R300_FPI0_OUTC_REPL_ALPHA
, R300_FPI2_OUTA_EX2
},
174 { "LG2", 1, R300_FPI0_OUTC_REPL_ALPHA
, R300_FPI2_OUTA_LG2
},
175 { "RCP", 1, R300_FPI0_OUTC_REPL_ALPHA
, R300_FPI2_OUTA_RCP
},
176 { "RSQ", 1, R300_FPI0_OUTC_REPL_ALPHA
, R300_FPI2_OUTA_RSQ
},
177 { "REPL_ALPHA", 1, R300_FPI0_OUTC_REPL_ALPHA
, PFS_INVAL
},
178 { "CMPH", 3, R300_FPI0_OUTC_CMPH
, PFS_INVAL
},
182 /* vector swizzles r300 can support natively, with a couple of
183 * cases we handle specially
185 * REG_VSWZ/REG_SSWZ is an index into this table
187 #define SLOT_VECTOR (1<<0)
188 #define SLOT_SCALAR (1<<3)
189 #define SLOT_BOTH (SLOT_VECTOR | SLOT_SCALAR)
190 #define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, \
194 static const struct r300_pfs_swizzle
{
195 GLuint hash
; /* swizzle value this matches */
196 GLuint base
; /* base value for hw swizzle */
197 GLuint stride
; /* difference in base between arg0/1/2 */
200 /* native swizzles */
201 { MAKE_SWZ3(X
, Y
, Z
), R300_FPI0_ARGC_SRC0C_XYZ
, 4, SLOT_VECTOR
},
202 { MAKE_SWZ3(X
, X
, X
), R300_FPI0_ARGC_SRC0C_XXX
, 4, SLOT_VECTOR
},
203 { MAKE_SWZ3(Y
, Y
, Y
), R300_FPI0_ARGC_SRC0C_YYY
, 4, SLOT_VECTOR
},
204 { MAKE_SWZ3(Z
, Z
, Z
), R300_FPI0_ARGC_SRC0C_ZZZ
, 4, SLOT_VECTOR
},
205 { MAKE_SWZ3(W
, W
, W
), R300_FPI0_ARGC_SRC0A
, 1, SLOT_SCALAR
},
206 { MAKE_SWZ3(Y
, Z
, X
), R300_FPI0_ARGC_SRC0C_YZX
, 1, SLOT_VECTOR
},
207 { MAKE_SWZ3(Z
, X
, Y
), R300_FPI0_ARGC_SRC0C_ZXY
, 1, SLOT_VECTOR
},
208 { MAKE_SWZ3(W
, Z
, Y
), R300_FPI0_ARGC_SRC0CA_WZY
, 1, SLOT_BOTH
},
209 { MAKE_SWZ3(ONE
, ONE
, ONE
), R300_FPI0_ARGC_ONE
, 0, 0},
210 { MAKE_SWZ3(ZERO
, ZERO
, ZERO
), R300_FPI0_ARGC_ZERO
, 0, 0},
211 { PFS_INVAL
, R300_FPI0_ARGC_HALF
, 0, 0},
212 { PFS_INVAL
, 0, 0, 0},
215 /* used during matching of non-native swizzles */
216 #define SWZ_X_MASK (7 << 0)
217 #define SWZ_Y_MASK (7 << 3)
218 #define SWZ_Z_MASK (7 << 6)
219 #define SWZ_W_MASK (7 << 9)
220 static const struct {
221 GLuint hash
; /* used to mask matching swizzle components */
222 int mask
; /* actual outmask */
223 int count
; /* count of components matched */
225 { SWZ_X_MASK
|SWZ_Y_MASK
|SWZ_Z_MASK
, 1|2|4, 3},
226 { SWZ_X_MASK
|SWZ_Y_MASK
, 1|2, 2},
227 { SWZ_X_MASK
|SWZ_Z_MASK
, 1|4, 2},
228 { SWZ_Y_MASK
|SWZ_Z_MASK
, 2|4, 2},
232 { PFS_INVAL
, PFS_INVAL
, PFS_INVAL
}
235 /* mapping from SWIZZLE_* to r300 native values for scalar insns */
236 #define SWIZZLE_HALF 6
237 static const struct {
238 int base
; /* hw value of swizzle */
239 int stride
; /* difference between SRC0/1/2 */
242 { R300_FPI2_ARGA_SRC0C_X
, 3, SLOT_VECTOR
},
243 { R300_FPI2_ARGA_SRC0C_Y
, 3, SLOT_VECTOR
},
244 { R300_FPI2_ARGA_SRC0C_Z
, 3, SLOT_VECTOR
},
245 { R300_FPI2_ARGA_SRC0A
, 1, SLOT_SCALAR
},
246 { R300_FPI2_ARGA_ZERO
, 0, 0 },
247 { R300_FPI2_ARGA_ONE
, 0, 0 },
248 { R300_FPI2_ARGA_HALF
, 0, 0 }
251 /* boiler-plate reg, for convenience */
252 static const GLuint undef
= REG(REG_TYPE_TEMP
,
259 /* constant one source */
260 static const GLuint pfs_one
= REG(REG_TYPE_CONST
,
267 /* constant half source */
268 static const GLuint pfs_half
= REG(REG_TYPE_CONST
,
275 /* constant zero source */
276 static const GLuint pfs_zero
= REG(REG_TYPE_CONST
,
284 * Common functions prototypes
286 static void dump_program(struct r300_fragment_program
*rp
);
287 static void emit_arith(struct r300_fragment_program
*rp
, int op
,
288 GLuint dest
, int mask
,
289 GLuint src0
, GLuint src1
, GLuint src2
,
293 * Helper functions prototypes
295 static int get_hw_temp(struct r300_fragment_program
*rp
)
298 int r
= ffs(~cs
->hwreg_in_use
);
300 ERROR("Out of hardware temps\n");
304 cs
->hwreg_in_use
|= (1 << --r
);
305 if (r
> rp
->max_temp_idx
)
306 rp
->max_temp_idx
= r
;
311 static int get_hw_temp_tex(struct r300_fragment_program
*rp
)
316 r
= ffs(~(cs
->hwreg_in_use
| cs
->used_in_node
));
318 return get_hw_temp(rp
); /* Will cause an indirection */
320 cs
->hwreg_in_use
|= (1 << --r
);
321 if (r
> rp
->max_temp_idx
)
322 rp
->max_temp_idx
= r
;
327 static void free_hw_temp(struct r300_fragment_program
*rp
, int idx
)
330 cs
->hwreg_in_use
&= ~(1<<idx
);
333 static GLuint
get_temp_reg(struct r300_fragment_program
*rp
)
339 index
= ffs(~cs
->temp_in_use
);
341 ERROR("Out of program temps\n");
345 cs
->temp_in_use
|= (1 << --index
);
346 cs
->temps
[index
].refcount
= 0xFFFFFFFF;
347 cs
->temps
[index
].reg
= -1;
349 REG_SET_TYPE(r
, REG_TYPE_TEMP
);
350 REG_SET_INDEX(r
, index
);
351 REG_SET_VALID(r
, GL_TRUE
);
355 static GLuint
get_temp_reg_tex(struct r300_fragment_program
*rp
)
361 index
= ffs(~cs
->temp_in_use
);
363 ERROR("Out of program temps\n");
367 cs
->temp_in_use
|= (1 << --index
);
368 cs
->temps
[index
].refcount
= 0xFFFFFFFF;
369 cs
->temps
[index
].reg
= get_hw_temp_tex(rp
);
371 REG_SET_TYPE(r
, REG_TYPE_TEMP
);
372 REG_SET_INDEX(r
, index
);
373 REG_SET_VALID(r
, GL_TRUE
);
377 static void free_temp(struct r300_fragment_program
*rp
, GLuint r
)
380 GLuint index
= REG_GET_INDEX(r
);
382 if (!(cs
->temp_in_use
& (1 << index
)))
385 if (REG_GET_TYPE(r
) == REG_TYPE_TEMP
) {
386 free_hw_temp(rp
, cs
->temps
[index
].reg
);
387 cs
->temps
[index
].reg
= -1;
388 cs
->temp_in_use
&= ~(1 << index
);
389 } else if (REG_GET_TYPE(r
) == REG_TYPE_INPUT
) {
390 free_hw_temp(rp
, cs
->inputs
[index
].reg
);
391 cs
->inputs
[index
].reg
= -1;
395 static GLuint
emit_param4fv(struct r300_fragment_program
*rp
,
402 pidx
= rp
->param_nr
++;
403 index
= rp
->const_nr
++;
404 if (pidx
>= PFS_NUM_CONST_REGS
|| index
>= PFS_NUM_CONST_REGS
) {
405 ERROR("Out of const/param slots!\n");
409 rp
->param
[pidx
].idx
= index
;
410 rp
->param
[pidx
].values
= values
;
411 rp
->params_uptodate
= GL_FALSE
;
413 REG_SET_TYPE(r
, REG_TYPE_CONST
);
414 REG_SET_INDEX(r
, index
);
415 REG_SET_VALID(r
, GL_TRUE
);
419 static GLuint
emit_const4fv(struct r300_fragment_program
*rp
, GLfloat
*cp
)
424 index
= rp
->const_nr
++;
425 if (index
>= PFS_NUM_CONST_REGS
) {
426 ERROR("Out of hw constants!\n");
430 COPY_4V(rp
->constant
[index
], cp
);
432 REG_SET_TYPE(r
, REG_TYPE_CONST
);
433 REG_SET_INDEX(r
, index
);
434 REG_SET_VALID(r
, GL_TRUE
);
438 static inline GLuint
negate(GLuint r
)
445 /* Hack, to prevent clobbering sources used multiple times when
446 * emulating non-native instructions
448 static inline GLuint
keep(GLuint r
)
450 REG_SET_NO_USE(r
, GL_TRUE
);
454 static inline GLuint
absolute(GLuint r
)
460 static int swz_native(struct r300_fragment_program
*rp
,
465 /* Native swizzle, handle negation */
466 src
= (src
& ~REG_NEGS_SHIFT
) |
467 (((arbneg
>> 3) & 1) << REG_NEGS_SHIFT
);
469 if ((arbneg
& 0x7) == 0x0) {
470 src
= src
& ~REG_NEGV_MASK
;
472 } else if ((arbneg
& 0x7) == 0x7) {
473 src
|= REG_NEGV_MASK
;
476 if (!REG_GET_VALID(*r
))
477 *r
= get_temp_reg(rp
);
478 src
|= REG_NEGV_MASK
;
487 src
= src
& ~REG_NEGV_MASK
;
491 (arbneg
^ 0x7) | WRITEMASK_W
,
501 static int swz_emit_partial(struct r300_fragment_program
*rp
,
511 if (!REG_GET_VALID(*r
))
512 *r
= get_temp_reg(rp
);
514 /* A partial match, VSWZ/mask define what parts of the
515 * desired swizzle we match
517 if (mc
+ s_mask
[mask
].count
== 3) {
519 src
|= ((arbneg
>> 3) & 1) << REG_NEGS_SHIFT
;
522 tmp
= arbneg
& s_mask
[mask
].mask
;
524 tmp
= tmp
^ s_mask
[mask
].mask
;
529 arbneg
& s_mask
[mask
].mask
,
530 keep(src
) | REG_NEGV_MASK
,
535 REG_SET_NO_USE(src
, GL_TRUE
);
537 REG_SET_NO_USE(src
, GL_FALSE
);
549 REG_SET_NO_USE(src
, GL_TRUE
);
551 REG_SET_NO_USE(src
, GL_FALSE
);
556 (arbneg
& s_mask
[mask
].mask
) | wmask
,
564 REG_SET_NO_USE(src
, GL_TRUE
);
566 REG_SET_NO_USE(src
, GL_FALSE
);
568 emit_arith(rp
, PFS_OP_MAD
,
570 s_mask
[mask
].mask
| wmask
,
577 return s_mask
[mask
].count
;
580 static GLuint
do_swizzle(struct r300_fragment_program
*rp
,
590 /* If swizzling from something without an XYZW native swizzle,
591 * emit result to a temp, and do new swizzle from the temp.
593 if (REG_GET_VSWZ(src
) != SWIZZLE_XYZ
||
594 REG_GET_SSWZ(src
) != SWIZZLE_W
) {
595 GLuint temp
= get_temp_reg(rp
);
607 /* set scalar swizzling */
608 REG_SET_SSWZ(src
, GET_SWZ(arbswz
, 3));
611 vswz
= REG_GET_VSWZ(src
);
615 REG_SET_VSWZ(src
, vswz
);
616 chash
= v_swiz
[REG_GET_VSWZ(src
)].hash
&
619 if (chash
== (arbswz
& s_mask
[c_mask
].hash
)) {
620 if (s_mask
[c_mask
].count
== 3) {
621 v_match
+= swz_native(rp
,
626 v_match
+= swz_emit_partial(rp
,
637 /* Fill with something invalid.. all 0's was
638 * wrong before, matched SWIZZLE_X. So all
639 * 1's will be okay for now
641 arbswz
|= (PFS_INVAL
& s_mask
[c_mask
].hash
);
643 } while(v_swiz
[++vswz
].hash
!= PFS_INVAL
);
644 REG_SET_VSWZ(src
, SWIZZLE_XYZ
);
645 } while (s_mask
[++c_mask
].hash
!= PFS_INVAL
);
647 ERROR("should NEVER get here\n");
651 static GLuint
t_src(struct r300_fragment_program
*rp
,
652 struct prog_src_register fpsrc
)
656 switch (fpsrc
.File
) {
657 case PROGRAM_TEMPORARY
:
658 REG_SET_INDEX(r
, fpsrc
.Index
);
659 REG_SET_VALID(r
, GL_TRUE
);
660 REG_SET_TYPE(r
, REG_TYPE_TEMP
);
663 REG_SET_INDEX(r
, fpsrc
.Index
);
664 REG_SET_VALID(r
, GL_TRUE
);
665 REG_SET_TYPE(r
, REG_TYPE_INPUT
);
667 case PROGRAM_LOCAL_PARAM
:
668 r
= emit_param4fv(rp
,
669 rp
->mesa_program
.Base
.LocalParams
[fpsrc
.Index
]);
671 case PROGRAM_ENV_PARAM
:
672 r
= emit_param4fv(rp
,
673 rp
->ctx
->FragmentProgram
.Parameters
[fpsrc
.Index
]);
675 case PROGRAM_STATE_VAR
:
676 case PROGRAM_NAMED_PARAM
:
677 r
= emit_param4fv(rp
,
678 rp
->mesa_program
.Base
.Parameters
->ParameterValues
[fpsrc
.Index
]);
681 ERROR("unknown SrcReg->File %x\n", fpsrc
.File
);
685 /* no point swizzling ONE/ZERO/HALF constants... */
686 if (REG_GET_VSWZ(r
) < SWIZZLE_111
|| REG_GET_SSWZ(r
) < SWIZZLE_ZERO
)
687 r
= do_swizzle(rp
, r
, fpsrc
.Swizzle
, fpsrc
.NegateBase
);
691 static GLuint
t_scalar_src(struct r300_fragment_program
*rp
,
692 struct prog_src_register fpsrc
)
694 struct prog_src_register src
= fpsrc
;
695 int sc
= GET_SWZ(fpsrc
.Swizzle
, 0); /* X */
697 src
.Swizzle
= ((sc
<<0)|(sc
<<3)|(sc
<<6)|(sc
<<9));
699 return t_src(rp
, src
);
702 static GLuint
t_dst(struct r300_fragment_program
*rp
,
703 struct prog_dst_register dest
)
708 case PROGRAM_TEMPORARY
:
709 REG_SET_INDEX(r
, dest
.Index
);
710 REG_SET_VALID(r
, GL_TRUE
);
711 REG_SET_TYPE(r
, REG_TYPE_TEMP
);
714 REG_SET_TYPE(r
, REG_TYPE_OUTPUT
);
715 switch (dest
.Index
) {
716 case FRAG_RESULT_COLR
:
717 case FRAG_RESULT_DEPR
:
718 REG_SET_INDEX(r
, dest
.Index
);
719 REG_SET_VALID(r
, GL_TRUE
);
722 ERROR("Bad DstReg->Index 0x%x\n", dest
.Index
);
726 ERROR("Bad DstReg->File 0x%x\n", dest
.File
);
731 static int t_hw_src(struct r300_fragment_program
*rp
,
737 int index
= REG_GET_INDEX(src
);
739 switch(REG_GET_TYPE(src
)) {
741 /* NOTE: if reg==-1 here, a source is being read that
742 * hasn't been written to. Undefined results
744 if (cs
->temps
[index
].reg
== -1)
745 cs
->temps
[index
].reg
= get_hw_temp(rp
);
747 idx
= cs
->temps
[index
].reg
;
749 if (!REG_GET_NO_USE(src
) &&
750 (--cs
->temps
[index
].refcount
== 0))
754 idx
= cs
->inputs
[index
].reg
;
756 if (!REG_GET_NO_USE(src
) &&
757 (--cs
->inputs
[index
].refcount
== 0))
758 free_hw_temp(rp
, cs
->inputs
[index
].reg
);
761 return (index
| SRC_CONST
);
763 ERROR("Invalid type for source reg\n");
764 return (0 | SRC_CONST
);
768 cs
->used_in_node
|= (1 << idx
);
773 static int t_hw_dst(struct r300_fragment_program
*rp
,
779 GLuint index
= REG_GET_INDEX(dest
);
780 assert(REG_GET_VALID(dest
));
782 switch(REG_GET_TYPE(dest
)) {
784 if (cs
->temps
[REG_GET_INDEX(dest
)].reg
== -1) {
786 cs
->temps
[index
].reg
= get_hw_temp(rp
);
788 cs
->temps
[index
].reg
= get_hw_temp_tex(rp
);
791 idx
= cs
->temps
[index
].reg
;
793 if (!REG_GET_NO_USE(dest
) &&
794 (--cs
->temps
[index
].refcount
== 0))
797 cs
->dest_in_node
|= (1 << idx
);
798 cs
->used_in_node
|= (1 << idx
);
800 case REG_TYPE_OUTPUT
:
802 case FRAG_RESULT_COLR
:
803 rp
->node
[rp
->cur_node
].flags
|= R300_PFS_NODE_OUTPUT_COLOR
;
805 case FRAG_RESULT_DEPR
:
806 rp
->node
[rp
->cur_node
].flags
|= R300_PFS_NODE_OUTPUT_DEPTH
;
812 ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest
));
819 static void emit_nop(struct r300_fragment_program
*rp
,
826 cs
->v_pos
= cs
->s_pos
= MAX2(cs
->v_pos
, cs
->s_pos
);
828 if (mask
& WRITEMASK_XYZ
) {
829 rp
->alu
.inst
[cs
->v_pos
].inst0
= NOP_INST0
;
830 rp
->alu
.inst
[cs
->v_pos
].inst1
= NOP_INST1
;
834 if (mask
& WRITEMASK_W
) {
835 rp
->alu
.inst
[cs
->s_pos
].inst2
= NOP_INST2
;
836 rp
->alu
.inst
[cs
->s_pos
].inst3
= NOP_INST3
;
841 static void emit_tex(struct r300_fragment_program
*rp
,
842 struct prog_instruction
*fpi
,
846 GLuint coord
= t_src(rp
, fpi
->SrcReg
[0]);
847 GLuint dest
= undef
, rdest
= undef
;
848 GLuint din
= cs
->dest_in_node
, uin
= cs
->used_in_node
;
849 int unit
= fpi
->TexSrcUnit
;
852 /* Resolve source/dest to hardware registers */
853 hwsrc
= t_hw_src(rp
, coord
, GL_TRUE
);
854 if (opcode
!= R300_FPITX_OP_KIL
) {
855 dest
= t_dst(rp
, fpi
->DstReg
);
857 /* r300 doesn't seem to be able to do TEX->output reg */
858 if (REG_GET_TYPE(dest
) == REG_TYPE_OUTPUT
) {
860 dest
= get_temp_reg_tex(rp
);
862 hwdest
= t_hw_dst(rp
, dest
, GL_TRUE
);
864 /* Use a temp that hasn't been used in this node, rather
865 * than causing an indirection
867 if (uin
& (1 << hwdest
)) {
868 free_hw_temp(rp
, hwdest
);
869 hwdest
= get_hw_temp_tex(rp
);
870 cs
->temps
[REG_GET_INDEX(dest
)].reg
= hwdest
;
877 /* Indirection if source has been written in this node, or if the
878 * dest has been read/written in this node
880 if ((REG_GET_TYPE(coord
) != REG_TYPE_CONST
&&
881 (din
& (1<<hwsrc
))) || (uin
& (1<<hwdest
))) {
883 /* Finish off current node */
884 cs
->v_pos
= cs
->s_pos
= MAX2(cs
->v_pos
, cs
->s_pos
);
885 if (rp
->node
[rp
->cur_node
].alu_offset
== cs
->v_pos
) {
886 /* No alu instructions in the node? Emit a NOP. */
887 emit_nop(rp
, WRITEMASK_XYZW
, GL_TRUE
);
888 cs
->v_pos
= cs
->s_pos
= MAX2(cs
->v_pos
, cs
->s_pos
);
891 rp
->node
[rp
->cur_node
].alu_end
=
892 cs
->v_pos
- rp
->node
[rp
->cur_node
].alu_offset
- 1;
893 assert(rp
->node
[rp
->cur_node
].alu_end
>= 0);
895 if (++rp
->cur_node
>= PFS_MAX_TEX_INDIRECT
) {
896 ERROR("too many levels of texture indirection\n");
901 rp
->node
[rp
->cur_node
].tex_offset
= rp
->tex
.length
;
902 rp
->node
[rp
->cur_node
].alu_offset
= cs
->v_pos
;
903 rp
->node
[rp
->cur_node
].tex_end
= -1;
904 rp
->node
[rp
->cur_node
].alu_end
= -1;
905 rp
->node
[rp
->cur_node
].flags
= 0;
906 cs
->used_in_node
= 0;
907 cs
->dest_in_node
= 0;
910 if (rp
->cur_node
== 0)
911 rp
->first_node_has_tex
= 1;
913 rp
->tex
.inst
[rp
->tex
.length
++] = 0
914 | (hwsrc
<< R300_FPITX_SRC_SHIFT
)
915 | (hwdest
<< R300_FPITX_DST_SHIFT
)
916 | (unit
<< R300_FPITX_IMAGE_SHIFT
)
917 /* not entirely sure about this */
918 | (opcode
<< R300_FPITX_OPCODE_SHIFT
);
920 cs
->dest_in_node
|= (1 << hwdest
);
921 if (REG_GET_TYPE(coord
) != REG_TYPE_CONST
)
922 cs
->used_in_node
|= (1 << hwsrc
);
924 rp
->node
[rp
->cur_node
].tex_end
++;
926 /* Copy from temp to output if needed */
927 if (REG_GET_VALID(rdest
)) {
928 emit_arith(rp
, PFS_OP_MAD
, rdest
, WRITEMASK_XYZW
, dest
,
929 pfs_one
, pfs_zero
, 0);
934 /* Add sources to FPI1/FPI3 lists. If source is already on list,
935 * reuse the index instead of wasting a source.
937 static int add_src(struct r300_fragment_program
*rp
,
945 /* Look for matches */
946 for (i
=0,csm
=srcmask
; i
<3; i
++,csm
=csm
<<1) {
947 /* If sources have been allocated in this position(s)... */
948 if ((cs
->slot
[pos
].umask
& csm
) == csm
) {
949 /* ... and the register number(s) match, re-use the
951 if (srcmask
== SLOT_VECTOR
&&
952 cs
->slot
[pos
].vsrc
[i
] == reg
)
954 if (srcmask
== SLOT_SCALAR
&&
955 cs
->slot
[pos
].ssrc
[i
] == reg
)
957 if (srcmask
== SLOT_BOTH
&&
958 cs
->slot
[pos
].vsrc
[i
] == reg
&&
959 cs
->slot
[pos
].ssrc
[i
] == reg
)
964 /* Look for free spaces */
965 for (i
=0,csm
=srcmask
; i
<3; i
++,csm
=csm
<<1) {
966 /* If the position(s) haven't been allocated */
967 if ((cs
->slot
[pos
].umask
& csm
) == 0) {
968 cs
->slot
[pos
].umask
|= csm
;
970 if (srcmask
& SLOT_VECTOR
)
971 cs
->slot
[pos
].vsrc
[i
] = reg
;
972 if (srcmask
& SLOT_SCALAR
)
973 cs
->slot
[pos
].ssrc
[i
] = reg
;
978 //ERROR("Failed to allocate sources in FPI1/FPI3!\n");
982 /* Determine whether or not to position opcode in the same ALU slot for both
983 * vector and scalar portions of an instruction.
985 * It's not necessary to force the first case, but it makes disassembled
986 * shaders easier to read.
988 static GLboolean
force_same_slot(int vop
,
997 if (emit_vop
&& emit_sop
)
1000 if (emit_vop
&& vop
== R300_FPI0_OUTC_REPL_ALPHA
)
1004 for (i
=0;i
<argc
;i
++)
1005 if (REG_GET_VSWZ(src
[i
]) == SWIZZLE_WZY
)
1012 static void emit_arith(struct r300_fragment_program
*rp
,
1022 GLuint src
[3] = { src0
, src1
, src2
};
1023 int hwsrc
[3], sswz
[3], vswz
[3];
1025 GLboolean emit_vop
= GL_FALSE
, emit_sop
= GL_FALSE
;
1030 vop
= r300_fpop
[op
].v_op
;
1031 sop
= r300_fpop
[op
].s_op
;
1032 argc
= r300_fpop
[op
].argc
;
1034 if ((mask
& WRITEMASK_XYZ
) || vop
== R300_FPI0_OUTC_DP3
)
1036 if ((mask
& WRITEMASK_W
) || vop
== R300_FPI0_OUTC_REPL_ALPHA
)
1039 if (REG_GET_TYPE(dest
) == REG_TYPE_OUTPUT
&&
1040 REG_GET_INDEX(dest
) == FRAG_RESULT_DEPR
)
1041 emit_vop
= GL_FALSE
;
1043 if (force_same_slot(vop
, sop
, emit_vop
, emit_sop
, argc
, src
)) {
1044 vpos
= spos
= MAX2(cs
->v_pos
, cs
->s_pos
);
1048 /* Here is where we'd decide on where a safe place is to
1049 * combine this instruction with a previous one.
1051 * This is extremely simple for now.. if a source depends
1052 * on the opposite stream, force the same instruction.
1056 (v_swiz
[REG_GET_VSWZ(src
[i
])].flags
& SLOT_SCALAR
)) {
1057 vpos
= spos
= MAX2(vpos
, spos
);
1061 (s_swiz
[REG_GET_VSWZ(src
[i
])].flags
& SLOT_VECTOR
)) {
1062 vpos
= spos
= MAX2(vpos
, spos
);
1068 /* - Convert src->hwsrc, record for FPI1/FPI3
1069 * - Determine ARG parts of FPI0/FPI2, unused args are filled
1076 vswz
[i
] = R300_FPI0_ARGC_ZERO
;
1077 sswz
[i
] = R300_FPI2_ARGA_ZERO
;
1081 hwsrc
[i
] = t_hw_src(rp
, src
[i
], GL_FALSE
);
1083 if (emit_vop
&& vop
!= R300_FPI0_OUTC_REPL_ALPHA
) {
1084 srcpos
= add_src(rp
, hwsrc
[i
], vpos
,
1085 v_swiz
[REG_GET_VSWZ(src
[i
])].flags
);
1086 vswz
[i
] = (v_swiz
[REG_GET_VSWZ(src
[i
])].base
+
1088 v_swiz
[REG_GET_VSWZ(src
[i
])].stride
)) |
1089 ((src
[i
] & REG_NEGV_MASK
) ? ARG_NEG
: 0) |
1090 ((src
[i
] & REG_ABS_MASK
) ? ARG_ABS
: 0);
1091 } else vswz
[i
] = R300_FPI0_ARGC_ZERO
;
1094 srcpos
= add_src(rp
, hwsrc
[i
], spos
,
1095 s_swiz
[REG_GET_SSWZ(src
[i
])].flags
);
1096 sswz
[i
] = (s_swiz
[REG_GET_SSWZ(src
[i
])].base
+
1098 s_swiz
[REG_GET_SSWZ(src
[i
])].stride
)) |
1099 ((src
[i
] & REG_NEGS_MASK
) ? ARG_NEG
: 0) |
1100 ((src
[i
] & REG_ABS_MASK
) ? ARG_ABS
: 0);
1101 } else sswz
[i
] = R300_FPI2_ARGA_ZERO
;
1103 hwdest
= t_hw_dst(rp
, dest
, GL_FALSE
);
1105 if (flags
& PFS_FLAG_SAT
) {
1106 vop
|= R300_FPI0_OUTC_SAT
;
1107 sop
|= R300_FPI2_OUTA_SAT
;
1110 /* Throw the pieces together and get FPI0/1 */
1111 rp
->alu
.inst
[vpos
].inst1
=
1112 ((cs
->slot
[vpos
].vsrc
[0] << R300_FPI1_SRC0C_SHIFT
) |
1113 (cs
->slot
[vpos
].vsrc
[1] << R300_FPI1_SRC1C_SHIFT
) |
1114 (cs
->slot
[vpos
].vsrc
[2] << R300_FPI1_SRC2C_SHIFT
));
1116 rp
->alu
.inst
[vpos
].inst0
= vop
|
1117 (vswz
[0] << R300_FPI0_ARG0C_SHIFT
) |
1118 (vswz
[1] << R300_FPI0_ARG1C_SHIFT
) |
1119 (vswz
[2] << R300_FPI0_ARG2C_SHIFT
);
1121 rp
->alu
.inst
[vpos
].inst1
|= hwdest
<< R300_FPI1_DSTC_SHIFT
;
1122 if (REG_GET_TYPE(dest
) == REG_TYPE_OUTPUT
) {
1123 if (REG_GET_INDEX(dest
) == FRAG_RESULT_COLR
) {
1124 rp
->alu
.inst
[vpos
].inst1
|=
1125 (mask
& WRITEMASK_XYZ
) << R300_FPI1_DSTC_OUTPUT_MASK_SHIFT
;
1128 rp
->alu
.inst
[vpos
].inst1
|=
1129 (mask
& WRITEMASK_XYZ
) << R300_FPI1_DSTC_REG_MASK_SHIFT
;
1132 } else if (spos
>= vpos
)
1133 rp
->alu
.inst
[spos
].inst0
= NOP_INST0
;
1135 /* And now FPI2/3 */
1136 rp
->alu
.inst
[spos
].inst3
=
1137 ((cs
->slot
[spos
].ssrc
[0] << R300_FPI3_SRC0A_SHIFT
) |
1138 (cs
->slot
[spos
].ssrc
[1] << R300_FPI3_SRC1A_SHIFT
) |
1139 (cs
->slot
[spos
].ssrc
[2] << R300_FPI3_SRC2A_SHIFT
));
1141 rp
->alu
.inst
[spos
].inst2
= sop
|
1142 sswz
[0] << R300_FPI2_ARG0A_SHIFT
|
1143 sswz
[1] << R300_FPI2_ARG1A_SHIFT
|
1144 sswz
[2] << R300_FPI2_ARG2A_SHIFT
;
1146 if (mask
& WRITEMASK_W
) {
1147 if (REG_GET_TYPE(dest
) == REG_TYPE_OUTPUT
) {
1148 if (REG_GET_INDEX(dest
) == FRAG_RESULT_COLR
) {
1149 rp
->alu
.inst
[spos
].inst3
|=
1150 (hwdest
<< R300_FPI3_DSTA_SHIFT
) | R300_FPI3_DSTA_OUTPUT
;
1151 } else if (REG_GET_INDEX(dest
) == FRAG_RESULT_DEPR
) {
1152 rp
->alu
.inst
[spos
].inst3
|= R300_FPI3_DSTA_DEPTH
;
1155 rp
->alu
.inst
[spos
].inst3
|=
1156 (hwdest
<< R300_FPI3_DSTA_SHIFT
) | R300_FPI3_DSTA_REG
;
1160 } else if (vpos
>= spos
)
1161 rp
->alu
.inst
[vpos
].inst2
= NOP_INST2
;
1167 static GLuint
get_attrib(struct r300_fragment_program
*rp
, GLuint attr
)
1169 struct gl_fragment_program
*mp
= &rp
->mesa_program
;
1172 if (!(mp
->Base
.InputsRead
& (1<<attr
))) {
1173 ERROR("Attribute %d was not provided!\n", attr
);
1177 REG_SET_TYPE(r
, REG_TYPE_INPUT
);
1178 REG_SET_INDEX(r
, attr
);
1179 REG_SET_VALID(r
, GL_TRUE
);
1184 static GLboolean
parse_program(struct r300_fragment_program
*rp
)
1186 struct gl_fragment_program
*mp
= &rp
->mesa_program
;
1187 const struct prog_instruction
*inst
= mp
->Base
.Instructions
;
1188 struct prog_instruction
*fpi
;
1189 GLuint src
[3], dest
, temp
;
1191 int flags
, mask
= 0;
1192 GLfloat cnstv
[4] = {0.0, 0.0, 0.0, 0.0};
1194 if (!inst
|| inst
[0].Opcode
== OPCODE_END
) {
1195 ERROR("empty program?\n");
1199 for (fpi
=mp
->Base
.Instructions
; fpi
->Opcode
!= OPCODE_END
; fpi
++) {
1200 if (fpi
->SaturateMode
== SATURATE_ZERO_ONE
)
1201 flags
= PFS_FLAG_SAT
;
1205 if (fpi
->Opcode
!= OPCODE_KIL
) {
1206 dest
= t_dst(rp
, fpi
->DstReg
);
1207 mask
= fpi
->DstReg
.WriteMask
;
1210 switch (fpi
->Opcode
) {
1212 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1213 emit_arith(rp
, PFS_OP_MAD
, dest
, mask
,
1214 absolute(src
[0]), pfs_one
, pfs_zero
,
1218 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1219 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1220 emit_arith(rp
, PFS_OP_MAD
, dest
, mask
,
1221 src
[0], pfs_one
, src
[1],
1225 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1226 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1227 src
[2] = t_src(rp
, fpi
->SrcReg
[2]);
1228 /* ARB_f_p - if src0.c < 0.0 ? src1.c : src2.c
1229 * r300 - if src2.c < 0.0 ? src1.c : src0.c
1231 emit_arith(rp
, PFS_OP_CMP
, dest
, mask
,
1232 src
[2], src
[1], src
[0],
1237 * cos using taylor serie:
1238 * cos(x) = 1 - x^2/2! + x^4/4! - x^6/6!
1240 temp
= get_temp_reg(rp
);
1242 cnstv
[1] = 0.041666667;
1243 cnstv
[2] = 0.001388889;
1245 cnst
= emit_const4fv(rp
, cnstv
);
1246 src
[0] = t_scalar_src(rp
, fpi
->SrcReg
[0]);
1248 emit_arith(rp
, PFS_OP_MAD
, temp
,
1254 emit_arith(rp
, PFS_OP_MAD
, temp
,
1255 WRITEMASK_Y
| WRITEMASK_Z
,
1259 emit_arith(rp
, PFS_OP_MAD
, temp
,
1262 swizzle(temp
, X
, X
, X
, W
),
1265 emit_arith(rp
, PFS_OP_MAD
, temp
,
1270 emit_arith(rp
, PFS_OP_MAD
, temp
,
1276 emit_arith(rp
, PFS_OP_MAD
, temp
,
1280 swizzle(temp
, Y
, Y
, Y
, W
),
1282 emit_arith(rp
, PFS_OP_MAD
, temp
,
1286 negate(swizzle(temp
, Z
, Z
, Z
, W
)),
1288 emit_arith(rp
, PFS_OP_MAD
, dest
, mask
,
1289 swizzle(temp
, X
, X
, X
, X
),
1293 free_temp(rp
, temp
);
1296 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1297 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1298 emit_arith(rp
, PFS_OP_DP3
, dest
, mask
,
1299 src
[0], src
[1], undef
,
1303 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1304 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1305 emit_arith(rp
, PFS_OP_DP4
, dest
, mask
,
1306 src
[0], src
[1], undef
,
1310 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1311 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1312 /* src0.xyz1 -> temp
1313 * DP4 dest, temp, src1
1316 temp
= get_temp_reg(rp
);
1317 src
[0].s_swz
= SWIZZLE_ONE
;
1318 emit_arith(rp
, PFS_OP_MAD
, temp
, mask
,
1319 src
[0], pfs_one
, pfs_zero
,
1321 emit_arith(rp
, PFS_OP_DP4
, dest
, mask
,
1322 temp
, src
[1], undef
,
1324 free_temp(rp
, temp
);
1326 emit_arith(rp
, PFS_OP_DP4
, dest
, mask
,
1327 swizzle(src
[0], X
, Y
, Z
, ONE
), src
[1],
1332 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1333 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1334 /* dest.y = src0.y * src1.y */
1335 if (mask
& WRITEMASK_Y
)
1336 emit_arith(rp
, PFS_OP_MAD
, dest
, WRITEMASK_Y
,
1337 keep(src
[0]), keep(src
[1]),
1339 /* dest.z = src0.z */
1340 if (mask
& WRITEMASK_Z
)
1341 emit_arith(rp
, PFS_OP_MAD
, dest
, WRITEMASK_Z
,
1342 src
[0], pfs_one
, pfs_zero
, flags
);
1344 * result.w = src1.w */
1345 if (mask
& WRITEMASK_XW
) {
1346 REG_SET_VSWZ(src
[1], SWIZZLE_111
); /*Cheat*/
1347 emit_arith(rp
, PFS_OP_MAD
, dest
,
1348 mask
& WRITEMASK_XW
,
1349 src
[1], pfs_one
, pfs_zero
,
1354 src
[0] = t_scalar_src(rp
, fpi
->SrcReg
[0]);
1355 emit_arith(rp
, PFS_OP_EX2
, dest
, mask
,
1356 src
[0], undef
, undef
,
1360 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1361 temp
= get_temp_reg(rp
);
1363 * MAD dest, src0, 1.0, -temp
1365 emit_arith(rp
, PFS_OP_FRC
, temp
, mask
,
1366 keep(src
[0]), undef
, undef
,
1368 emit_arith(rp
, PFS_OP_MAD
, dest
, mask
,
1369 src
[0], pfs_one
, negate(temp
),
1371 free_temp(rp
, temp
);
1374 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1375 emit_arith(rp
, PFS_OP_FRC
, dest
, mask
,
1376 src
[0], undef
, undef
,
1380 emit_tex(rp
, fpi
, R300_FPITX_OP_KIL
);
1383 src
[0] = t_scalar_src(rp
, fpi
->SrcReg
[0]);
1384 emit_arith(rp
, PFS_OP_LG2
, dest
, mask
,
1385 src
[0], undef
, undef
,
1390 * if (s.x < 0) t.x = 0; else t.x = s.x;
1391 * if (s.y < 0) t.y = 0; else t.y = s.y;
1392 * if (s.w > 128.0) t.w = 128.0; else t.w = s.w;
1393 * if (s.w < -128.0) t.w = -128.0; else t.w = s.w;
1395 * if (t.x > 0) r.y = pow(t.y, t.w); else r.y = 0;
1396 * Also r.y = 0 if t.y < 0
1397 * For the t.x > 0 FGLRX use the CMPH opcode which
1398 * change the compare to (t.x + 0.5) > 0.5 we may
1399 * save one instruction by doing CMP -t.x
1401 cnstv
[0] = cnstv
[1] = cnstv
[2] = cnstv
[4] = 0.50001;
1402 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1403 temp
= get_temp_reg(rp
);
1404 cnst
= emit_const4fv(rp
, cnstv
);
1405 emit_arith(rp
, PFS_OP_CMP
, temp
,
1406 WRITEMASK_X
| WRITEMASK_Y
,
1407 src
[0], pfs_zero
, src
[0], flags
);
1408 emit_arith(rp
, PFS_OP_MIN
, temp
, WRITEMASK_Z
,
1409 swizzle(keep(src
[0]), W
, W
, W
, W
),
1410 cnst
, undef
, flags
);
1411 emit_arith(rp
, PFS_OP_LG2
, temp
, WRITEMASK_W
,
1412 swizzle(temp
, Y
, Y
, Y
, Y
),
1413 undef
, undef
, flags
);
1414 emit_arith(rp
, PFS_OP_MAX
, temp
, WRITEMASK_Z
,
1415 temp
, negate(cnst
), undef
, flags
);
1416 emit_arith(rp
, PFS_OP_MAD
, temp
, WRITEMASK_W
,
1417 temp
, swizzle(temp
, Z
, Z
, Z
, Z
),
1419 emit_arith(rp
, PFS_OP_EX2
, temp
, WRITEMASK_W
,
1420 temp
, undef
, undef
, flags
);
1421 emit_arith(rp
, PFS_OP_MAD
, dest
, WRITEMASK_Y
,
1422 swizzle(keep(temp
), X
, X
, X
, X
),
1423 pfs_one
, pfs_zero
, flags
);
1425 emit_arith(rp
, PFS_OP_MAD
, temp
, WRITEMASK_X
,
1426 temp
, pfs_one
, pfs_half
, flags
);
1427 emit_arith(rp
, PFS_OP_CMPH
, temp
, WRITEMASK_Z
,
1428 swizzle(keep(temp
), W
, W
, W
, W
),
1429 pfs_zero
, swizzle(keep(temp
), X
, X
, X
, X
),
1432 emit_arith(rp
, PFS_OP_CMP
, temp
, WRITEMASK_Z
,
1434 swizzle(keep(temp
), W
, W
, W
, W
),
1435 negate(swizzle(keep(temp
), X
, X
, X
, X
)),
1438 emit_arith(rp
, PFS_OP_CMP
, dest
, WRITEMASK_Z
,
1440 negate(swizzle(keep(temp
), Y
, Y
, Y
, Y
)),
1442 emit_arith(rp
, PFS_OP_MAD
, dest
,
1443 WRITEMASK_X
| WRITEMASK_W
,
1448 free_temp(rp
, temp
);
1451 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1452 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1453 src
[2] = t_src(rp
, fpi
->SrcReg
[2]);
1454 /* result = tmp0tmp1 + (1 - tmp0)tmp2
1455 * = tmp0tmp1 + tmp2 + (-tmp0)tmp2
1456 * MAD temp, -tmp0, tmp2, tmp2
1457 * MAD result, tmp0, tmp1, temp
1459 temp
= get_temp_reg(rp
);
1460 emit_arith(rp
, PFS_OP_MAD
, temp
, mask
,
1461 negate(keep(src
[0])), keep(src
[2]), src
[2],
1463 emit_arith(rp
, PFS_OP_MAD
, dest
, mask
,
1464 src
[0], src
[1], temp
,
1466 free_temp(rp
, temp
);
1469 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1470 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1471 src
[2] = t_src(rp
, fpi
->SrcReg
[2]);
1472 emit_arith(rp
, PFS_OP_MAD
, dest
, mask
,
1473 src
[0], src
[1], src
[2],
1477 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1478 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1479 emit_arith(rp
, PFS_OP_MAX
, dest
, mask
,
1480 src
[0], src
[1], undef
,
1484 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1485 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1486 emit_arith(rp
, PFS_OP_MIN
, dest
, mask
,
1487 src
[0], src
[1], undef
,
1492 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1493 emit_arith(rp
, PFS_OP_MAD
, dest
, mask
,
1494 src
[0], pfs_one
, pfs_zero
,
1498 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1499 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1500 emit_arith(rp
, PFS_OP_MAD
, dest
, mask
,
1501 src
[0], src
[1], pfs_zero
,
1505 src
[0] = t_scalar_src(rp
, fpi
->SrcReg
[0]);
1506 src
[1] = t_scalar_src(rp
, fpi
->SrcReg
[1]);
1507 temp
= get_temp_reg(rp
);
1508 emit_arith(rp
, PFS_OP_LG2
, temp
, WRITEMASK_W
,
1509 src
[0], undef
, undef
,
1511 emit_arith(rp
, PFS_OP_MAD
, temp
, WRITEMASK_W
,
1512 temp
, src
[1], pfs_zero
,
1514 emit_arith(rp
, PFS_OP_EX2
, dest
, fpi
->DstReg
.WriteMask
,
1517 free_temp(rp
, temp
);
1520 src
[0] = t_scalar_src(rp
, fpi
->SrcReg
[0]);
1521 emit_arith(rp
, PFS_OP_RCP
, dest
, mask
,
1522 src
[0], undef
, undef
,
1526 src
[0] = t_scalar_src(rp
, fpi
->SrcReg
[0]);
1527 emit_arith(rp
, PFS_OP_RSQ
, dest
, mask
,
1528 absolute(src
[0]), pfs_zero
, pfs_zero
,
1532 ERROR("SCS not implemented\n");
1535 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1536 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1537 temp
= get_temp_reg(rp
);
1538 /* temp = src0 - src1
1539 * dest.c = (temp.c < 0.0) ? 0 : 1
1541 emit_arith(rp
, PFS_OP_MAD
, temp
, mask
,
1542 src
[0], pfs_one
, negate(src
[1]),
1544 emit_arith(rp
, PFS_OP_CMP
, dest
, mask
,
1545 pfs_one
, pfs_zero
, temp
,
1547 free_temp(rp
, temp
);
1551 * sin using taylor serie:
1552 * sin(x) = x - x^3/3! + x^5/5! - x^7/7!
1554 temp
= get_temp_reg(rp
);
1555 cnstv
[0] = 0.333333333;
1556 cnstv
[1] = 0.008333333;
1557 cnstv
[2] = 0.000198413;
1559 cnst
= emit_const4fv(rp
, cnstv
);
1560 src
[0] = t_scalar_src(rp
, fpi
->SrcReg
[0]);
1562 emit_arith(rp
, PFS_OP_MAD
, temp
,
1568 emit_arith(rp
, PFS_OP_MAD
, temp
,
1569 WRITEMASK_Y
| WRITEMASK_Z
,
1573 emit_arith(rp
, PFS_OP_MAD
, temp
,
1576 swizzle(temp
, X
, X
, X
, W
),
1579 emit_arith(rp
, PFS_OP_MAD
, temp
,
1585 emit_arith(rp
, PFS_OP_MAD
, temp
,
1590 emit_arith(rp
, PFS_OP_MAD
, temp
,
1596 emit_arith(rp
, PFS_OP_MAD
, temp
,
1600 swizzle(temp
, Y
, Y
, Y
, W
),
1602 emit_arith(rp
, PFS_OP_MAD
, temp
,
1606 negate(swizzle(temp
, Z
, Z
, Z
, W
)),
1608 emit_arith(rp
, PFS_OP_MAD
, dest
, mask
,
1609 swizzle(temp
, X
, X
, X
, X
),
1613 free_temp(rp
, temp
);
1616 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1617 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1618 temp
= get_temp_reg(rp
);
1619 /* temp = src0 - src1
1620 * dest.c = (temp.c < 0.0) ? 1 : 0
1622 emit_arith(rp
, PFS_OP_MAD
, temp
, mask
,
1623 src
[0], pfs_one
, negate(src
[1]),
1625 emit_arith(rp
, PFS_OP_CMP
, dest
, mask
,
1626 pfs_zero
, pfs_one
, temp
,
1628 free_temp(rp
, temp
);
1631 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1632 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1633 emit_arith(rp
, PFS_OP_MAD
, dest
, mask
,
1634 src
[0], pfs_one
, negate(src
[1]),
1638 emit_tex(rp
, fpi
, R300_FPITX_OP_TEX
);
1641 emit_tex(rp
, fpi
, R300_FPITX_OP_TXB
);
1644 emit_tex(rp
, fpi
, R300_FPITX_OP_TXP
);
1647 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1648 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1649 temp
= get_temp_reg(rp
);
1650 /* temp = src0.zxy * src1.yzx */
1651 emit_arith(rp
, PFS_OP_MAD
, temp
, WRITEMASK_XYZ
,
1652 swizzle(keep(src
[0]), Z
, X
, Y
, W
),
1653 swizzle(keep(src
[1]), Y
, Z
, X
, W
),
1656 /* dest.xyz = src0.yzx * src1.zxy - temp
1657 * dest.w = undefined
1659 emit_arith(rp
, PFS_OP_MAD
, dest
, mask
& WRITEMASK_XYZ
,
1660 swizzle(src
[0], Y
, Z
, X
, W
),
1661 swizzle(src
[1], Z
, X
, Y
, W
),
1665 free_temp(rp
, temp
);
1669 ERROR("unknown fpi->Opcode %d\n", fpi
->Opcode
);
1681 /* - Init structures
1682 * - Determine what hwregs each input corresponds to
1684 static void init_program(struct r300_fragment_program
*rp
)
1686 struct r300_pfs_compile_state
*cs
= NULL
;
1687 struct gl_fragment_program
*mp
= &rp
->mesa_program
;
1688 struct prog_instruction
*fpi
;
1689 GLuint InputsRead
= mp
->Base
.InputsRead
;
1690 GLuint temps_used
= 0; /* for rp->temps[] */
1693 /* New compile, reset tracking data */
1694 rp
->translated
= GL_FALSE
;
1695 rp
->error
= GL_FALSE
;
1696 rp
->cs
= cs
= &(R300_CONTEXT(rp
->ctx
)->state
.pfs_compile
);
1699 rp
->first_node_has_tex
= 0;
1702 rp
->params_uptodate
= GL_FALSE
;
1703 rp
->max_temp_idx
= 0;
1704 rp
->node
[0].alu_end
= -1;
1705 rp
->node
[0].tex_end
= -1;
1707 _mesa_memset(cs
, 0, sizeof(*rp
->cs
));
1708 for (i
=0;i
<PFS_MAX_ALU_INST
;i
++) {
1710 cs
->slot
[i
].vsrc
[j
] = SRC_CONST
;
1711 cs
->slot
[i
].ssrc
[j
] = SRC_CONST
;
1715 /* Work out what temps the Mesa inputs correspond to, this must match
1716 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
1717 * configures itself based on the fragprog's InputsRead
1719 * NOTE: this depends on get_hw_temp() allocating registers in order,
1720 * starting from register 0.
1723 /* Texcoords come first */
1724 for (i
=0;i
<rp
->ctx
->Const
.MaxTextureUnits
;i
++) {
1725 if (InputsRead
& (FRAG_BIT_TEX0
<< i
)) {
1726 cs
->inputs
[FRAG_ATTRIB_TEX0
+i
].refcount
= 0;
1727 cs
->inputs
[FRAG_ATTRIB_TEX0
+i
].reg
= get_hw_temp(rp
);
1730 InputsRead
&= ~FRAG_BITS_TEX_ANY
;
1732 /* fragment position treated as a texcoord */
1733 if (InputsRead
& FRAG_BIT_WPOS
) {
1734 cs
->inputs
[FRAG_ATTRIB_WPOS
].refcount
= 0;
1735 cs
->inputs
[FRAG_ATTRIB_WPOS
].reg
= get_hw_temp(rp
);
1737 InputsRead
&= ~FRAG_BIT_WPOS
;
1739 /* Then primary colour */
1740 if (InputsRead
& FRAG_BIT_COL0
) {
1741 cs
->inputs
[FRAG_ATTRIB_COL0
].refcount
= 0;
1742 cs
->inputs
[FRAG_ATTRIB_COL0
].reg
= get_hw_temp(rp
);
1744 InputsRead
&= ~FRAG_BIT_COL0
;
1746 /* Secondary color */
1747 if (InputsRead
& FRAG_BIT_COL1
) {
1748 cs
->inputs
[FRAG_ATTRIB_COL1
].refcount
= 0;
1749 cs
->inputs
[FRAG_ATTRIB_COL1
].reg
= get_hw_temp(rp
);
1751 InputsRead
&= ~FRAG_BIT_COL1
;
1755 WARN_ONCE("Don't know how to handle inputs 0x%x\n",
1757 /* force read from hwreg 0 for now */
1759 if (InputsRead
& (1<<i
)) cs
->inputs
[i
].reg
= 0;
1762 /* Pre-parse the mesa program, grabbing refcounts on input/temp regs.
1763 * That way, we can free up the reg when it's no longer needed
1765 if (!mp
->Base
.Instructions
) {
1766 ERROR("No instructions found in program\n");
1770 for (fpi
=mp
->Base
.Instructions
;fpi
->Opcode
!= OPCODE_END
; fpi
++) {
1774 idx
= fpi
->SrcReg
[i
].Index
;
1775 switch (fpi
->SrcReg
[i
].File
) {
1776 case PROGRAM_TEMPORARY
:
1777 if (!(temps_used
& (1<<idx
))) {
1778 cs
->temps
[idx
].reg
= -1;
1779 cs
->temps
[idx
].refcount
= 1;
1780 temps_used
|= (1 << idx
);
1782 cs
->temps
[idx
].refcount
++;
1785 cs
->inputs
[idx
].refcount
++;
1791 idx
= fpi
->DstReg
.Index
;
1792 if (fpi
->DstReg
.File
== PROGRAM_TEMPORARY
) {
1793 if (!(temps_used
& (1<<idx
))) {
1794 cs
->temps
[idx
].reg
= -1;
1795 cs
->temps
[idx
].refcount
= 1;
1796 temps_used
|= (1 << idx
);
1798 cs
->temps
[idx
].refcount
++;
1801 cs
->temp_in_use
= temps_used
;
1804 static void update_params(struct r300_fragment_program
*rp
)
1806 struct gl_fragment_program
*mp
= &rp
->mesa_program
;
1809 /* Ask Mesa nicely to fill in ParameterValues for us */
1811 _mesa_load_state_parameters(rp
->ctx
, mp
->Base
.Parameters
);
1813 for (i
=0;i
<rp
->param_nr
;i
++)
1814 COPY_4V(rp
->constant
[rp
->param
[i
].idx
], rp
->param
[i
].values
);
1816 rp
->params_uptodate
= GL_TRUE
;
1819 void r300_translate_fragment_shader(struct r300_fragment_program
*rp
)
1821 struct r300_pfs_compile_state
*cs
= NULL
;
1823 if (!rp
->translated
) {
1828 if (parse_program(rp
) == GL_FALSE
) {
1834 cs
->v_pos
= cs
->s_pos
= MAX2(cs
->v_pos
, cs
->s_pos
);
1835 rp
->node
[rp
->cur_node
].alu_end
=
1836 cs
->v_pos
- rp
->node
[rp
->cur_node
].alu_offset
- 1;
1837 if (rp
->node
[rp
->cur_node
].tex_end
< 0)
1838 rp
->node
[rp
->cur_node
].tex_end
= 0;
1840 rp
->alu_end
= cs
->v_pos
- 1;
1842 rp
->tex_end
= rp
->tex
.length
? rp
->tex
.length
- 1 : 0;
1843 assert(rp
->node
[rp
->cur_node
].alu_end
>= 0);
1844 assert(rp
->alu_end
>= 0);
1846 rp
->translated
= GL_TRUE
;
1847 if (0) dump_program(rp
);
1853 /* just some random things... */
1854 static void dump_program(struct r300_fragment_program
*rp
)
1859 fprintf(stderr
, "pc=%d*************************************\n", pc
++);
1861 fprintf(stderr
, "Mesa program:\n");
1862 fprintf(stderr
, "-------------\n");
1863 _mesa_print_program(&rp
->mesa_program
.Base
);
1866 fprintf(stderr
, "Hardware program\n");
1867 fprintf(stderr
, "----------------\n");
1869 fprintf(stderr
, "tex:\n");
1871 for(i
=0;i
<rp
->tex
.length
;i
++) {
1872 fprintf(stderr
, "%08x\n", rp
->tex
.inst
[i
]);
1875 for (i
=0;i
<(rp
->cur_node
+1);i
++) {
1876 fprintf(stderr
, "NODE %d: alu_offset: %d, tex_offset: %d, "\
1877 "alu_end: %d, tex_end: %d\n", i
,
1878 rp
->node
[i
].alu_offset
,
1879 rp
->node
[i
].tex_offset
,
1880 rp
->node
[i
].alu_end
,
1881 rp
->node
[i
].tex_end
);
1884 fprintf(stderr
, "%08x\n",
1885 ((rp
->tex_end
<< 16) | (R300_PFS_TEXI_0
>> 2)));
1886 for (i
=0;i
<=rp
->tex_end
;i
++)
1887 fprintf(stderr
, "%08x\n", rp
->tex
.inst
[i
]);
1889 /* dump program in pretty_print_command_stream.tcl-readable format */
1890 fprintf(stderr
, "%08x\n",
1891 ((rp
->alu_end
<< 16) | (R300_PFS_INSTR0_0
>> 2)));
1892 for (i
=0;i
<=rp
->alu_end
;i
++)
1893 fprintf(stderr
, "%08x\n", rp
->alu
.inst
[i
].inst0
);
1895 fprintf(stderr
, "%08x\n",
1896 ((rp
->alu_end
<< 16) | (R300_PFS_INSTR1_0
>> 2)));
1897 for (i
=0;i
<=rp
->alu_end
;i
++)
1898 fprintf(stderr
, "%08x\n", rp
->alu
.inst
[i
].inst1
);
1900 fprintf(stderr
, "%08x\n",
1901 ((rp
->alu_end
<< 16) | (R300_PFS_INSTR2_0
>> 2)));
1902 for (i
=0;i
<=rp
->alu_end
;i
++)
1903 fprintf(stderr
, "%08x\n", rp
->alu
.inst
[i
].inst2
);
1905 fprintf(stderr
, "%08x\n",
1906 ((rp
->alu_end
<< 16) | (R300_PFS_INSTR3_0
>> 2)));
1907 for (i
=0;i
<=rp
->alu_end
;i
++)
1908 fprintf(stderr
, "%08x\n", rp
->alu
.inst
[i
].inst3
);
1910 fprintf(stderr
, "00000000\n");