2 * Copyright (C) 2005 Ben Skeggs.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 * Ben Skeggs <darktama@iinet.net.au>
31 * Jerome Glisse <j.glisse@gmail.com>
36 * - Depth write, WPOS/FOGC inputs
38 * - Verify results of opcodes for accuracy, I've only checked them
48 #include "program_instruction.h"
49 #include "r300_context.h"
50 #include "r300_fragprog.h"
52 #include "r300_state.h"
55 * Usefull macros and values
57 #define ERROR(fmt, args...) do { \
58 fprintf(stderr, "%s::%s(): " fmt "\n", \
59 __FILE__, __func__, ##args); \
60 rp->error = GL_TRUE; \
63 #define PFS_INVAL 0xFFFFFFFF
64 #define COMPILE_STATE struct r300_pfs_compile_state *cs = rp->cs
76 #define SWIZZLE_HHH 10
78 #define swizzle(r, x, y, z, w) do_swizzle(rp, r, \
85 #define REG_TYPE_INPUT 0
86 #define REG_TYPE_OUTPUT 1
87 #define REG_TYPE_TEMP 2
88 #define REG_TYPE_CONST 3
90 #define REG_TYPE_SHIFT 0
91 #define REG_INDEX_SHIFT 2
92 #define REG_VSWZ_SHIFT 8
93 #define REG_SSWZ_SHIFT 13
94 #define REG_NEGV_SHIFT 18
95 #define REG_NEGS_SHIFT 19
96 #define REG_ABS_SHIFT 20
97 #define REG_NO_USE_SHIFT 21
98 #define REG_VALID_SHIFT 22
100 #define REG_TYPE_MASK (0x03 << REG_TYPE_SHIFT)
101 #define REG_INDEX_MASK (0x3F << REG_INDEX_SHIFT)
102 #define REG_VSWZ_MASK (0x1F << REG_VSWZ_SHIFT)
103 #define REG_SSWZ_MASK (0x1F << REG_SSWZ_SHIFT)
104 #define REG_NEGV_MASK (0x01 << REG_NEGV_SHIFT)
105 #define REG_NEGS_MASK (0x01 << REG_NEGS_SHIFT)
106 #define REG_ABS_MASK (0x01 << REG_ABS_SHIFT)
107 #define REG_NO_USE_MASK (0x01 << REG_NO_USE_SHIFT)
108 #define REG_VALID_MASK (0x01 << REG_VALID_SHIFT)
110 #define REG(type, index, vswz, sswz, nouse, valid) \
111 (((type << REG_TYPE_SHIFT) & REG_TYPE_MASK) | \
112 ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK) | \
113 ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK) | \
114 ((valid << REG_VALID_SHIFT) & REG_VALID_MASK) | \
115 ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK) | \
116 ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK))
117 #define REG_GET_TYPE(reg) \
118 ((reg & REG_TYPE_MASK) >> REG_TYPE_SHIFT)
119 #define REG_GET_INDEX(reg) \
120 ((reg & REG_INDEX_MASK) >> REG_INDEX_SHIFT)
121 #define REG_GET_VSWZ(reg) \
122 ((reg & REG_VSWZ_MASK) >> REG_VSWZ_SHIFT)
123 #define REG_GET_SSWZ(reg) \
124 ((reg & REG_SSWZ_MASK) >> REG_SSWZ_SHIFT)
125 #define REG_GET_NO_USE(reg) \
126 ((reg & REG_NO_USE_MASK) >> REG_NO_USE_SHIFT)
127 #define REG_GET_VALID(reg) \
128 ((reg & REG_VALID_MASK) >> REG_VALID_SHIFT)
129 #define REG_SET_TYPE(reg, type) \
130 reg = ((reg & ~REG_TYPE_MASK) | \
131 ((type << REG_TYPE_SHIFT) & REG_TYPE_MASK))
132 #define REG_SET_INDEX(reg, index) \
133 reg = ((reg & ~REG_INDEX_MASK) | \
134 ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK))
135 #define REG_SET_VSWZ(reg, vswz) \
136 reg = ((reg & ~REG_VSWZ_MASK) | \
137 ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK))
138 #define REG_SET_SSWZ(reg, sswz) \
139 reg = ((reg & ~REG_SSWZ_MASK) | \
140 ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK))
141 #define REG_SET_NO_USE(reg, nouse) \
142 reg = ((reg & ~REG_NO_USE_MASK) | \
143 ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK))
144 #define REG_SET_VALID(reg, valid) \
145 reg = ((reg & ~REG_VALID_MASK) | \
146 ((valid << REG_VALID_SHIFT) & REG_VALID_MASK))
147 #define REG_ABS(reg) \
148 reg = (reg | REG_ABS_MASK)
149 #define REG_NEGV(reg) \
150 reg = (reg | REG_NEGV_MASK)
151 #define REG_NEGS(reg) \
152 reg = (reg | REG_NEGS_MASK)
156 * Datas structures for fragment program generation
159 /* description of r300 native hw instructions */
160 static const struct {
166 { "MAD", 3, R300_FPI0_OUTC_MAD
, R300_FPI2_OUTA_MAD
},
167 { "DP3", 2, R300_FPI0_OUTC_DP3
, R300_FPI2_OUTA_DP4
},
168 { "DP4", 2, R300_FPI0_OUTC_DP4
, R300_FPI2_OUTA_DP4
},
169 { "MIN", 2, R300_FPI0_OUTC_MIN
, R300_FPI2_OUTA_MIN
},
170 { "MAX", 2, R300_FPI0_OUTC_MAX
, R300_FPI2_OUTA_MAX
},
171 { "CMP", 3, R300_FPI0_OUTC_CMP
, R300_FPI2_OUTA_CMP
},
172 { "FRC", 1, R300_FPI0_OUTC_FRC
, R300_FPI2_OUTA_FRC
},
173 { "EX2", 1, R300_FPI0_OUTC_REPL_ALPHA
, R300_FPI2_OUTA_EX2
},
174 { "LG2", 1, R300_FPI0_OUTC_REPL_ALPHA
, R300_FPI2_OUTA_LG2
},
175 { "RCP", 1, R300_FPI0_OUTC_REPL_ALPHA
, R300_FPI2_OUTA_RCP
},
176 { "RSQ", 1, R300_FPI0_OUTC_REPL_ALPHA
, R300_FPI2_OUTA_RSQ
},
177 { "REPL_ALPHA", 1, R300_FPI0_OUTC_REPL_ALPHA
, PFS_INVAL
},
178 { "CMPH", 3, R300_FPI0_OUTC_CMPH
, PFS_INVAL
},
182 /* vector swizzles r300 can support natively, with a couple of
183 * cases we handle specially
185 * REG_VSWZ/REG_SSWZ is an index into this table
187 #define SLOT_VECTOR (1<<0)
188 #define SLOT_SCALAR (1<<3)
189 #define SLOT_BOTH (SLOT_VECTOR | SLOT_SCALAR)
191 /* mapping from SWIZZLE_* to r300 native values for scalar insns */
192 #define SWIZZLE_HALF 6
194 #define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, \
198 static const struct r300_pfs_swizzle
{
199 GLuint hash
; /* swizzle value this matches */
200 GLuint base
; /* base value for hw swizzle */
201 GLuint stride
; /* difference in base between arg0/1/2 */
204 /* native swizzles */
205 { MAKE_SWZ3(X
, Y
, Z
), R300_FPI0_ARGC_SRC0C_XYZ
, 4, SLOT_VECTOR
},
206 { MAKE_SWZ3(X
, X
, X
), R300_FPI0_ARGC_SRC0C_XXX
, 4, SLOT_VECTOR
},
207 { MAKE_SWZ3(Y
, Y
, Y
), R300_FPI0_ARGC_SRC0C_YYY
, 4, SLOT_VECTOR
},
208 { MAKE_SWZ3(Z
, Z
, Z
), R300_FPI0_ARGC_SRC0C_ZZZ
, 4, SLOT_VECTOR
},
209 { MAKE_SWZ3(W
, W
, W
), R300_FPI0_ARGC_SRC0A
, 1, SLOT_SCALAR
},
210 { MAKE_SWZ3(Y
, Z
, X
), R300_FPI0_ARGC_SRC0C_YZX
, 1, SLOT_VECTOR
},
211 { MAKE_SWZ3(Z
, X
, Y
), R300_FPI0_ARGC_SRC0C_ZXY
, 1, SLOT_VECTOR
},
212 { MAKE_SWZ3(W
, Z
, Y
), R300_FPI0_ARGC_SRC0CA_WZY
, 1, SLOT_BOTH
},
213 { MAKE_SWZ3(ONE
, ONE
, ONE
), R300_FPI0_ARGC_ONE
, 0, 0},
214 { MAKE_SWZ3(ZERO
, ZERO
, ZERO
), R300_FPI0_ARGC_ZERO
, 0, 0},
215 { MAKE_SWZ3(HALF
, HALF
, HALF
), R300_FPI0_ARGC_HALF
, 0, 0},
216 { PFS_INVAL
, 0, 0, 0},
219 /* used during matching of non-native swizzles */
220 #define SWZ_X_MASK (7 << 0)
221 #define SWZ_Y_MASK (7 << 3)
222 #define SWZ_Z_MASK (7 << 6)
223 #define SWZ_W_MASK (7 << 9)
224 static const struct {
225 GLuint hash
; /* used to mask matching swizzle components */
226 int mask
; /* actual outmask */
227 int count
; /* count of components matched */
229 { SWZ_X_MASK
|SWZ_Y_MASK
|SWZ_Z_MASK
, 1|2|4, 3},
230 { SWZ_X_MASK
|SWZ_Y_MASK
, 1|2, 2},
231 { SWZ_X_MASK
|SWZ_Z_MASK
, 1|4, 2},
232 { SWZ_Y_MASK
|SWZ_Z_MASK
, 2|4, 2},
236 { PFS_INVAL
, PFS_INVAL
, PFS_INVAL
}
239 static const struct {
240 int base
; /* hw value of swizzle */
241 int stride
; /* difference between SRC0/1/2 */
244 { R300_FPI2_ARGA_SRC0C_X
, 3, SLOT_VECTOR
},
245 { R300_FPI2_ARGA_SRC0C_Y
, 3, SLOT_VECTOR
},
246 { R300_FPI2_ARGA_SRC0C_Z
, 3, SLOT_VECTOR
},
247 { R300_FPI2_ARGA_SRC0A
, 1, SLOT_SCALAR
},
248 { R300_FPI2_ARGA_ZERO
, 0, 0 },
249 { R300_FPI2_ARGA_ONE
, 0, 0 },
250 { R300_FPI2_ARGA_HALF
, 0, 0 }
253 /* boiler-plate reg, for convenience */
254 static const GLuint undef
= REG(REG_TYPE_TEMP
,
261 /* constant one source */
262 static const GLuint pfs_one
= REG(REG_TYPE_CONST
,
269 /* constant half source */
270 static const GLuint pfs_half
= REG(REG_TYPE_CONST
,
277 /* constant zero source */
278 static const GLuint pfs_zero
= REG(REG_TYPE_CONST
,
286 * Common functions prototypes
288 static void dump_program(struct r300_fragment_program
*rp
);
289 static void emit_arith(struct r300_fragment_program
*rp
, int op
,
290 GLuint dest
, int mask
,
291 GLuint src0
, GLuint src1
, GLuint src2
,
295 * Helper functions prototypes
297 static int get_hw_temp(struct r300_fragment_program
*rp
)
300 int r
= ffs(~cs
->hwreg_in_use
);
302 ERROR("Out of hardware temps\n");
306 cs
->hwreg_in_use
|= (1 << --r
);
307 if (r
> rp
->max_temp_idx
)
308 rp
->max_temp_idx
= r
;
313 static int get_hw_temp_tex(struct r300_fragment_program
*rp
)
318 r
= ffs(~(cs
->hwreg_in_use
| cs
->used_in_node
));
320 return get_hw_temp(rp
); /* Will cause an indirection */
322 cs
->hwreg_in_use
|= (1 << --r
);
323 if (r
> rp
->max_temp_idx
)
324 rp
->max_temp_idx
= r
;
329 static void free_hw_temp(struct r300_fragment_program
*rp
, int idx
)
332 cs
->hwreg_in_use
&= ~(1<<idx
);
335 static GLuint
get_temp_reg(struct r300_fragment_program
*rp
)
341 index
= ffs(~cs
->temp_in_use
);
343 ERROR("Out of program temps\n");
347 cs
->temp_in_use
|= (1 << --index
);
348 cs
->temps
[index
].refcount
= 0xFFFFFFFF;
349 cs
->temps
[index
].reg
= -1;
351 REG_SET_TYPE(r
, REG_TYPE_TEMP
);
352 REG_SET_INDEX(r
, index
);
353 REG_SET_VALID(r
, GL_TRUE
);
357 static GLuint
get_temp_reg_tex(struct r300_fragment_program
*rp
)
363 index
= ffs(~cs
->temp_in_use
);
365 ERROR("Out of program temps\n");
369 cs
->temp_in_use
|= (1 << --index
);
370 cs
->temps
[index
].refcount
= 0xFFFFFFFF;
371 cs
->temps
[index
].reg
= get_hw_temp_tex(rp
);
373 REG_SET_TYPE(r
, REG_TYPE_TEMP
);
374 REG_SET_INDEX(r
, index
);
375 REG_SET_VALID(r
, GL_TRUE
);
379 static void free_temp(struct r300_fragment_program
*rp
, GLuint r
)
382 GLuint index
= REG_GET_INDEX(r
);
384 if (!(cs
->temp_in_use
& (1 << index
)))
387 if (REG_GET_TYPE(r
) == REG_TYPE_TEMP
) {
388 free_hw_temp(rp
, cs
->temps
[index
].reg
);
389 cs
->temps
[index
].reg
= -1;
390 cs
->temp_in_use
&= ~(1 << index
);
391 } else if (REG_GET_TYPE(r
) == REG_TYPE_INPUT
) {
392 free_hw_temp(rp
, cs
->inputs
[index
].reg
);
393 cs
->inputs
[index
].reg
= -1;
397 static GLuint
emit_param4fv(struct r300_fragment_program
*rp
,
404 pidx
= rp
->param_nr
++;
405 index
= rp
->const_nr
++;
406 if (pidx
>= PFS_NUM_CONST_REGS
|| index
>= PFS_NUM_CONST_REGS
) {
407 ERROR("Out of const/param slots!\n");
411 rp
->param
[pidx
].idx
= index
;
412 rp
->param
[pidx
].values
= values
;
413 rp
->params_uptodate
= GL_FALSE
;
415 REG_SET_TYPE(r
, REG_TYPE_CONST
);
416 REG_SET_INDEX(r
, index
);
417 REG_SET_VALID(r
, GL_TRUE
);
421 static GLuint
emit_const4fv(struct r300_fragment_program
*rp
, GLfloat
*cp
)
426 index
= rp
->const_nr
++;
427 if (index
>= PFS_NUM_CONST_REGS
) {
428 ERROR("Out of hw constants!\n");
432 COPY_4V(rp
->constant
[index
], cp
);
434 REG_SET_TYPE(r
, REG_TYPE_CONST
);
435 REG_SET_INDEX(r
, index
);
436 REG_SET_VALID(r
, GL_TRUE
);
440 static inline GLuint
negate(GLuint r
)
447 /* Hack, to prevent clobbering sources used multiple times when
448 * emulating non-native instructions
450 static inline GLuint
keep(GLuint r
)
452 REG_SET_NO_USE(r
, GL_TRUE
);
456 static inline GLuint
absolute(GLuint r
)
462 static int swz_native(struct r300_fragment_program
*rp
,
467 /* Native swizzle, handle negation */
468 src
= (src
& ~REG_NEGS_MASK
) |
469 (((arbneg
>> 3) & 1) << REG_NEGS_SHIFT
);
471 if ((arbneg
& 0x7) == 0x0) {
472 src
= src
& ~REG_NEGV_MASK
;
474 } else if ((arbneg
& 0x7) == 0x7) {
475 src
|= REG_NEGV_MASK
;
478 if (!REG_GET_VALID(*r
))
479 *r
= get_temp_reg(rp
);
480 src
|= REG_NEGV_MASK
;
489 src
= src
& ~REG_NEGV_MASK
;
493 (arbneg
^ 0x7) | WRITEMASK_W
,
503 static int swz_emit_partial(struct r300_fragment_program
*rp
,
513 if (!REG_GET_VALID(*r
))
514 *r
= get_temp_reg(rp
);
516 /* A partial match, VSWZ/mask define what parts of the
517 * desired swizzle we match
519 if (mc
+ s_mask
[mask
].count
== 3) {
521 src
|= ((arbneg
>> 3) & 1) << REG_NEGS_SHIFT
;
524 tmp
= arbneg
& s_mask
[mask
].mask
;
526 tmp
= tmp
^ s_mask
[mask
].mask
;
531 arbneg
& s_mask
[mask
].mask
,
532 keep(src
) | REG_NEGV_MASK
,
537 REG_SET_NO_USE(src
, GL_TRUE
);
539 REG_SET_NO_USE(src
, GL_FALSE
);
551 REG_SET_NO_USE(src
, GL_TRUE
);
553 REG_SET_NO_USE(src
, GL_FALSE
);
558 (arbneg
& s_mask
[mask
].mask
) | wmask
,
566 REG_SET_NO_USE(src
, GL_TRUE
);
568 REG_SET_NO_USE(src
, GL_FALSE
);
570 emit_arith(rp
, PFS_OP_MAD
,
572 s_mask
[mask
].mask
| wmask
,
579 return s_mask
[mask
].count
;
582 static GLuint
do_swizzle(struct r300_fragment_program
*rp
,
592 /* If swizzling from something without an XYZW native swizzle,
593 * emit result to a temp, and do new swizzle from the temp.
596 if (REG_GET_VSWZ(src
) != SWIZZLE_XYZ
||
597 REG_GET_SSWZ(src
) != SWIZZLE_W
) {
598 GLuint temp
= get_temp_reg(rp
);
611 if (REG_GET_VSWZ(src
) != SWIZZLE_XYZ
||
612 REG_GET_SSWZ(src
) != SWIZZLE_W
) {
613 GLuint vsrcswz
= (v_swiz
[REG_GET_VSWZ(src
)].hash
& (SWZ_X_MASK
|SWZ_Y_MASK
|SWZ_Z_MASK
)) | REG_GET_SSWZ(src
) << 9;
618 for(i
=0; i
< 4; ++i
){
619 offset
= GET_SWZ(arbswz
, i
);
621 newswz
|= (offset
<= 3)?GET_SWZ(vsrcswz
, offset
) << i
*3:offset
<< i
*3;
624 arbswz
= newswz
& (SWZ_X_MASK
|SWZ_Y_MASK
|SWZ_Z_MASK
);
625 REG_SET_SSWZ(src
, GET_SWZ(newswz
, 3));
629 /* set scalar swizzling */
630 REG_SET_SSWZ(src
, GET_SWZ(arbswz
, 3));
634 vswz
= REG_GET_VSWZ(src
);
638 REG_SET_VSWZ(src
, vswz
);
639 chash
= v_swiz
[REG_GET_VSWZ(src
)].hash
&
642 if (chash
== (arbswz
& s_mask
[c_mask
].hash
)) {
643 if (s_mask
[c_mask
].count
== 3) {
644 v_match
+= swz_native(rp
,
649 v_match
+= swz_emit_partial(rp
,
660 /* Fill with something invalid.. all 0's was
661 * wrong before, matched SWIZZLE_X. So all
662 * 1's will be okay for now
664 arbswz
|= (PFS_INVAL
& s_mask
[c_mask
].hash
);
666 } while(v_swiz
[++vswz
].hash
!= PFS_INVAL
);
667 REG_SET_VSWZ(src
, SWIZZLE_XYZ
);
668 } while (s_mask
[++c_mask
].hash
!= PFS_INVAL
);
670 ERROR("should NEVER get here\n");
674 static GLuint
t_src(struct r300_fragment_program
*rp
,
675 struct prog_src_register fpsrc
)
679 switch (fpsrc
.File
) {
680 case PROGRAM_TEMPORARY
:
681 REG_SET_INDEX(r
, fpsrc
.Index
);
682 REG_SET_VALID(r
, GL_TRUE
);
683 REG_SET_TYPE(r
, REG_TYPE_TEMP
);
686 REG_SET_INDEX(r
, fpsrc
.Index
);
687 REG_SET_VALID(r
, GL_TRUE
);
688 REG_SET_TYPE(r
, REG_TYPE_INPUT
);
690 case PROGRAM_LOCAL_PARAM
:
691 r
= emit_param4fv(rp
,
692 rp
->mesa_program
.Base
.LocalParams
[fpsrc
.Index
]);
694 case PROGRAM_ENV_PARAM
:
695 r
= emit_param4fv(rp
,
696 rp
->ctx
->FragmentProgram
.Parameters
[fpsrc
.Index
]);
698 case PROGRAM_STATE_VAR
:
699 case PROGRAM_NAMED_PARAM
:
700 r
= emit_param4fv(rp
,
701 rp
->mesa_program
.Base
.Parameters
->ParameterValues
[fpsrc
.Index
]);
704 ERROR("unknown SrcReg->File %x\n", fpsrc
.File
);
708 /* no point swizzling ONE/ZERO/HALF constants... */
709 if (REG_GET_VSWZ(r
) < SWIZZLE_111
|| REG_GET_SSWZ(r
) < SWIZZLE_ZERO
)
710 r
= do_swizzle(rp
, r
, fpsrc
.Swizzle
, fpsrc
.NegateBase
);
714 static GLuint
t_scalar_src(struct r300_fragment_program
*rp
,
715 struct prog_src_register fpsrc
)
717 struct prog_src_register src
= fpsrc
;
718 int sc
= GET_SWZ(fpsrc
.Swizzle
, 0); /* X */
720 src
.Swizzle
= ((sc
<<0)|(sc
<<3)|(sc
<<6)|(sc
<<9));
722 return t_src(rp
, src
);
725 static GLuint
t_dst(struct r300_fragment_program
*rp
,
726 struct prog_dst_register dest
)
731 case PROGRAM_TEMPORARY
:
732 REG_SET_INDEX(r
, dest
.Index
);
733 REG_SET_VALID(r
, GL_TRUE
);
734 REG_SET_TYPE(r
, REG_TYPE_TEMP
);
737 REG_SET_TYPE(r
, REG_TYPE_OUTPUT
);
738 switch (dest
.Index
) {
739 case FRAG_RESULT_COLR
:
740 case FRAG_RESULT_DEPR
:
741 REG_SET_INDEX(r
, dest
.Index
);
742 REG_SET_VALID(r
, GL_TRUE
);
745 ERROR("Bad DstReg->Index 0x%x\n", dest
.Index
);
749 ERROR("Bad DstReg->File 0x%x\n", dest
.File
);
754 static int t_hw_src(struct r300_fragment_program
*rp
,
760 int index
= REG_GET_INDEX(src
);
762 switch(REG_GET_TYPE(src
)) {
764 /* NOTE: if reg==-1 here, a source is being read that
765 * hasn't been written to. Undefined results
767 if (cs
->temps
[index
].reg
== -1)
768 cs
->temps
[index
].reg
= get_hw_temp(rp
);
770 idx
= cs
->temps
[index
].reg
;
772 if (!REG_GET_NO_USE(src
) &&
773 (--cs
->temps
[index
].refcount
== 0))
777 idx
= cs
->inputs
[index
].reg
;
779 if (!REG_GET_NO_USE(src
) &&
780 (--cs
->inputs
[index
].refcount
== 0))
781 free_hw_temp(rp
, cs
->inputs
[index
].reg
);
784 return (index
| SRC_CONST
);
786 ERROR("Invalid type for source reg\n");
787 return (0 | SRC_CONST
);
791 cs
->used_in_node
|= (1 << idx
);
796 static int t_hw_dst(struct r300_fragment_program
*rp
,
802 GLuint index
= REG_GET_INDEX(dest
);
803 assert(REG_GET_VALID(dest
));
805 switch(REG_GET_TYPE(dest
)) {
807 if (cs
->temps
[REG_GET_INDEX(dest
)].reg
== -1) {
809 cs
->temps
[index
].reg
= get_hw_temp(rp
);
811 cs
->temps
[index
].reg
= get_hw_temp_tex(rp
);
814 idx
= cs
->temps
[index
].reg
;
816 if (!REG_GET_NO_USE(dest
) &&
817 (--cs
->temps
[index
].refcount
== 0))
820 cs
->dest_in_node
|= (1 << idx
);
821 cs
->used_in_node
|= (1 << idx
);
823 case REG_TYPE_OUTPUT
:
825 case FRAG_RESULT_COLR
:
826 rp
->node
[rp
->cur_node
].flags
|= R300_PFS_NODE_OUTPUT_COLOR
;
828 case FRAG_RESULT_DEPR
:
829 rp
->node
[rp
->cur_node
].flags
|= R300_PFS_NODE_OUTPUT_DEPTH
;
835 ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest
));
842 static void emit_nop(struct r300_fragment_program
*rp
,
849 cs
->v_pos
= cs
->s_pos
= MAX2(cs
->v_pos
, cs
->s_pos
);
851 if (mask
& WRITEMASK_XYZ
) {
852 rp
->alu
.inst
[cs
->v_pos
].inst0
= NOP_INST0
;
853 rp
->alu
.inst
[cs
->v_pos
].inst1
= NOP_INST1
;
857 if (mask
& WRITEMASK_W
) {
858 rp
->alu
.inst
[cs
->s_pos
].inst2
= NOP_INST2
;
859 rp
->alu
.inst
[cs
->s_pos
].inst3
= NOP_INST3
;
864 static void emit_tex(struct r300_fragment_program
*rp
,
865 struct prog_instruction
*fpi
,
869 GLuint coord
= t_src(rp
, fpi
->SrcReg
[0]);
870 GLuint dest
= undef
, rdest
= undef
;
871 GLuint din
= cs
->dest_in_node
, uin
= cs
->used_in_node
;
872 int unit
= fpi
->TexSrcUnit
;
875 /* Resolve source/dest to hardware registers */
876 hwsrc
= t_hw_src(rp
, coord
, GL_TRUE
);
877 if (opcode
!= R300_FPITX_OP_KIL
) {
878 dest
= t_dst(rp
, fpi
->DstReg
);
880 /* r300 doesn't seem to be able to do TEX->output reg */
881 if (REG_GET_TYPE(dest
) == REG_TYPE_OUTPUT
) {
883 dest
= get_temp_reg_tex(rp
);
885 hwdest
= t_hw_dst(rp
, dest
, GL_TRUE
);
887 /* Use a temp that hasn't been used in this node, rather
888 * than causing an indirection
890 if (uin
& (1 << hwdest
)) {
891 free_hw_temp(rp
, hwdest
);
892 hwdest
= get_hw_temp_tex(rp
);
893 cs
->temps
[REG_GET_INDEX(dest
)].reg
= hwdest
;
900 /* Indirection if source has been written in this node, or if the
901 * dest has been read/written in this node
903 if ((REG_GET_TYPE(coord
) != REG_TYPE_CONST
&&
904 (din
& (1<<hwsrc
))) || (uin
& (1<<hwdest
))) {
906 /* Finish off current node */
907 cs
->v_pos
= cs
->s_pos
= MAX2(cs
->v_pos
, cs
->s_pos
);
908 if (rp
->node
[rp
->cur_node
].alu_offset
== cs
->v_pos
) {
909 /* No alu instructions in the node? Emit a NOP. */
910 emit_nop(rp
, WRITEMASK_XYZW
, GL_TRUE
);
911 cs
->v_pos
= cs
->s_pos
= MAX2(cs
->v_pos
, cs
->s_pos
);
914 rp
->node
[rp
->cur_node
].alu_end
=
915 cs
->v_pos
- rp
->node
[rp
->cur_node
].alu_offset
- 1;
916 assert(rp
->node
[rp
->cur_node
].alu_end
>= 0);
918 if (++rp
->cur_node
>= PFS_MAX_TEX_INDIRECT
) {
919 ERROR("too many levels of texture indirection\n");
924 rp
->node
[rp
->cur_node
].tex_offset
= rp
->tex
.length
;
925 rp
->node
[rp
->cur_node
].alu_offset
= cs
->v_pos
;
926 rp
->node
[rp
->cur_node
].tex_end
= -1;
927 rp
->node
[rp
->cur_node
].alu_end
= -1;
928 rp
->node
[rp
->cur_node
].flags
= 0;
929 cs
->used_in_node
= 0;
930 cs
->dest_in_node
= 0;
933 if (rp
->cur_node
== 0)
934 rp
->first_node_has_tex
= 1;
936 rp
->tex
.inst
[rp
->tex
.length
++] = 0
937 | (hwsrc
<< R300_FPITX_SRC_SHIFT
)
938 | (hwdest
<< R300_FPITX_DST_SHIFT
)
939 | (unit
<< R300_FPITX_IMAGE_SHIFT
)
940 /* not entirely sure about this */
941 | (opcode
<< R300_FPITX_OPCODE_SHIFT
);
943 cs
->dest_in_node
|= (1 << hwdest
);
944 if (REG_GET_TYPE(coord
) != REG_TYPE_CONST
)
945 cs
->used_in_node
|= (1 << hwsrc
);
947 rp
->node
[rp
->cur_node
].tex_end
++;
949 /* Copy from temp to output if needed */
950 if (REG_GET_VALID(rdest
)) {
951 emit_arith(rp
, PFS_OP_MAD
, rdest
, WRITEMASK_XYZW
, dest
,
952 pfs_one
, pfs_zero
, 0);
957 /* Add sources to FPI1/FPI3 lists. If source is already on list,
958 * reuse the index instead of wasting a source.
960 static int add_src(struct r300_fragment_program
*rp
,
968 /* Look for matches */
969 for (i
=0,csm
=srcmask
; i
<3; i
++,csm
=csm
<<1) {
970 /* If sources have been allocated in this position(s)... */
971 if ((cs
->slot
[pos
].umask
& csm
) == csm
) {
972 /* ... and the register number(s) match, re-use the
974 if (srcmask
== SLOT_VECTOR
&&
975 cs
->slot
[pos
].vsrc
[i
] == reg
)
977 if (srcmask
== SLOT_SCALAR
&&
978 cs
->slot
[pos
].ssrc
[i
] == reg
)
980 if (srcmask
== SLOT_BOTH
&&
981 cs
->slot
[pos
].vsrc
[i
] == reg
&&
982 cs
->slot
[pos
].ssrc
[i
] == reg
)
987 /* Look for free spaces */
988 for (i
=0,csm
=srcmask
; i
<3; i
++,csm
=csm
<<1) {
989 /* If the position(s) haven't been allocated */
990 if ((cs
->slot
[pos
].umask
& csm
) == 0) {
991 cs
->slot
[pos
].umask
|= csm
;
993 if (srcmask
& SLOT_VECTOR
)
994 cs
->slot
[pos
].vsrc
[i
] = reg
;
995 if (srcmask
& SLOT_SCALAR
)
996 cs
->slot
[pos
].ssrc
[i
] = reg
;
1001 //ERROR("Failed to allocate sources in FPI1/FPI3!\n");
1005 /* Determine whether or not to position opcode in the same ALU slot for both
1006 * vector and scalar portions of an instruction.
1008 * It's not necessary to force the first case, but it makes disassembled
1009 * shaders easier to read.
1011 static GLboolean
force_same_slot(int vop
,
1020 if (emit_vop
&& emit_sop
)
1023 if (emit_vop
&& vop
== R300_FPI0_OUTC_REPL_ALPHA
)
1027 for (i
=0;i
<argc
;i
++)
1028 if (REG_GET_VSWZ(src
[i
]) == SWIZZLE_WZY
)
1035 static void emit_arith(struct r300_fragment_program
*rp
,
1045 GLuint src
[3] = { src0
, src1
, src2
};
1046 int hwsrc
[3], sswz
[3], vswz
[3];
1048 GLboolean emit_vop
= GL_FALSE
, emit_sop
= GL_FALSE
;
1053 vop
= r300_fpop
[op
].v_op
;
1054 sop
= r300_fpop
[op
].s_op
;
1055 argc
= r300_fpop
[op
].argc
;
1057 if ((mask
& WRITEMASK_XYZ
) || vop
== R300_FPI0_OUTC_DP3
)
1059 if ((mask
& WRITEMASK_W
) || vop
== R300_FPI0_OUTC_REPL_ALPHA
)
1062 if (REG_GET_TYPE(dest
) == REG_TYPE_OUTPUT
&&
1063 REG_GET_INDEX(dest
) == FRAG_RESULT_DEPR
)
1064 emit_vop
= GL_FALSE
;
1066 if (force_same_slot(vop
, sop
, emit_vop
, emit_sop
, argc
, src
)) {
1067 vpos
= spos
= MAX2(cs
->v_pos
, cs
->s_pos
);
1071 /* Here is where we'd decide on where a safe place is to
1072 * combine this instruction with a previous one.
1074 * This is extremely simple for now.. if a source depends
1075 * on the opposite stream, force the same instruction.
1079 (v_swiz
[REG_GET_VSWZ(src
[i
])].flags
& SLOT_SCALAR
)) {
1080 vpos
= spos
= MAX2(vpos
, spos
);
1084 (s_swiz
[REG_GET_SSWZ(src
[i
])].flags
& SLOT_VECTOR
)) {
1085 vpos
= spos
= MAX2(vpos
, spos
);
1091 /* - Convert src->hwsrc, record for FPI1/FPI3
1092 * - Determine ARG parts of FPI0/FPI2, unused args are filled
1099 vswz
[i
] = R300_FPI0_ARGC_ZERO
;
1100 sswz
[i
] = R300_FPI2_ARGA_ZERO
;
1104 hwsrc
[i
] = t_hw_src(rp
, src
[i
], GL_FALSE
);
1106 if (emit_vop
&& vop
!= R300_FPI0_OUTC_REPL_ALPHA
) {
1107 srcpos
= add_src(rp
, hwsrc
[i
], vpos
,
1108 v_swiz
[REG_GET_VSWZ(src
[i
])].flags
);
1109 vswz
[i
] = (v_swiz
[REG_GET_VSWZ(src
[i
])].base
+
1111 v_swiz
[REG_GET_VSWZ(src
[i
])].stride
)) |
1112 ((src
[i
] & REG_NEGV_MASK
) ? ARG_NEG
: 0) |
1113 ((src
[i
] & REG_ABS_MASK
) ? ARG_ABS
: 0);
1114 } else vswz
[i
] = R300_FPI0_ARGC_ZERO
;
1117 srcpos
= add_src(rp
, hwsrc
[i
], spos
,
1118 s_swiz
[REG_GET_SSWZ(src
[i
])].flags
);
1119 sswz
[i
] = (s_swiz
[REG_GET_SSWZ(src
[i
])].base
+
1121 s_swiz
[REG_GET_SSWZ(src
[i
])].stride
)) |
1122 ((src
[i
] & REG_NEGS_MASK
) ? ARG_NEG
: 0) |
1123 ((src
[i
] & REG_ABS_MASK
) ? ARG_ABS
: 0);
1124 } else sswz
[i
] = R300_FPI2_ARGA_ZERO
;
1126 hwdest
= t_hw_dst(rp
, dest
, GL_FALSE
);
1128 if (flags
& PFS_FLAG_SAT
) {
1129 vop
|= R300_FPI0_OUTC_SAT
;
1130 sop
|= R300_FPI2_OUTA_SAT
;
1133 /* Throw the pieces together and get FPI0/1 */
1134 rp
->alu
.inst
[vpos
].inst1
=
1135 ((cs
->slot
[vpos
].vsrc
[0] << R300_FPI1_SRC0C_SHIFT
) |
1136 (cs
->slot
[vpos
].vsrc
[1] << R300_FPI1_SRC1C_SHIFT
) |
1137 (cs
->slot
[vpos
].vsrc
[2] << R300_FPI1_SRC2C_SHIFT
));
1139 rp
->alu
.inst
[vpos
].inst0
= vop
|
1140 (vswz
[0] << R300_FPI0_ARG0C_SHIFT
) |
1141 (vswz
[1] << R300_FPI0_ARG1C_SHIFT
) |
1142 (vswz
[2] << R300_FPI0_ARG2C_SHIFT
);
1144 rp
->alu
.inst
[vpos
].inst1
|= hwdest
<< R300_FPI1_DSTC_SHIFT
;
1145 if (REG_GET_TYPE(dest
) == REG_TYPE_OUTPUT
) {
1146 if (REG_GET_INDEX(dest
) == FRAG_RESULT_COLR
) {
1147 rp
->alu
.inst
[vpos
].inst1
|=
1148 (mask
& WRITEMASK_XYZ
) << R300_FPI1_DSTC_OUTPUT_MASK_SHIFT
;
1151 rp
->alu
.inst
[vpos
].inst1
|=
1152 (mask
& WRITEMASK_XYZ
) << R300_FPI1_DSTC_REG_MASK_SHIFT
;
1155 } else if (spos
>= vpos
)
1156 rp
->alu
.inst
[spos
].inst0
= NOP_INST0
;
1158 /* And now FPI2/3 */
1159 rp
->alu
.inst
[spos
].inst3
=
1160 ((cs
->slot
[spos
].ssrc
[0] << R300_FPI3_SRC0A_SHIFT
) |
1161 (cs
->slot
[spos
].ssrc
[1] << R300_FPI3_SRC1A_SHIFT
) |
1162 (cs
->slot
[spos
].ssrc
[2] << R300_FPI3_SRC2A_SHIFT
));
1164 rp
->alu
.inst
[spos
].inst2
= sop
|
1165 sswz
[0] << R300_FPI2_ARG0A_SHIFT
|
1166 sswz
[1] << R300_FPI2_ARG1A_SHIFT
|
1167 sswz
[2] << R300_FPI2_ARG2A_SHIFT
;
1169 if (mask
& WRITEMASK_W
) {
1170 if (REG_GET_TYPE(dest
) == REG_TYPE_OUTPUT
) {
1171 if (REG_GET_INDEX(dest
) == FRAG_RESULT_COLR
) {
1172 rp
->alu
.inst
[spos
].inst3
|=
1173 (hwdest
<< R300_FPI3_DSTA_SHIFT
) | R300_FPI3_DSTA_OUTPUT
;
1174 } else if (REG_GET_INDEX(dest
) == FRAG_RESULT_DEPR
) {
1175 rp
->alu
.inst
[spos
].inst3
|= R300_FPI3_DSTA_DEPTH
;
1178 rp
->alu
.inst
[spos
].inst3
|=
1179 (hwdest
<< R300_FPI3_DSTA_SHIFT
) | R300_FPI3_DSTA_REG
;
1183 } else if (vpos
>= spos
)
1184 rp
->alu
.inst
[vpos
].inst2
= NOP_INST2
;
1190 static GLuint
get_attrib(struct r300_fragment_program
*rp
, GLuint attr
)
1192 struct gl_fragment_program
*mp
= &rp
->mesa_program
;
1195 if (!(mp
->Base
.InputsRead
& (1<<attr
))) {
1196 ERROR("Attribute %d was not provided!\n", attr
);
1200 REG_SET_TYPE(r
, REG_TYPE_INPUT
);
1201 REG_SET_INDEX(r
, attr
);
1202 REG_SET_VALID(r
, GL_TRUE
);
1207 static void make_sin_const(struct r300_fragment_program
*rp
)
1209 if(rp
->const_sin
[0] == -1){
1212 cnstv
[0] = 1.273239545; // 4/PI
1213 cnstv
[1] =-0.405284735; // -4/(PI*PI)
1214 cnstv
[2] = 3.141592654; // PI
1215 cnstv
[3] = 0.2225; // weight
1216 rp
->const_sin
[0] = emit_const4fv(rp
, cnstv
);
1220 cnstv
[2] = 0.159154943; // 1/(2*PI)
1221 cnstv
[3] = 6.283185307; // 2*PI
1222 rp
->const_sin
[1] = emit_const4fv(rp
, cnstv
);
1226 static GLboolean
parse_program(struct r300_fragment_program
*rp
)
1228 struct gl_fragment_program
*mp
= &rp
->mesa_program
;
1229 const struct prog_instruction
*inst
= mp
->Base
.Instructions
;
1230 struct prog_instruction
*fpi
;
1231 GLuint src
[3], dest
, temp
[2];
1233 int flags
, mask
= 0;
1234 GLfloat cnstv
[4] = {0.0, 0.0, 0.0, 0.0};
1236 if (!inst
|| inst
[0].Opcode
== OPCODE_END
) {
1237 ERROR("empty program?\n");
1241 for (fpi
=mp
->Base
.Instructions
; fpi
->Opcode
!= OPCODE_END
; fpi
++) {
1242 if (fpi
->SaturateMode
== SATURATE_ZERO_ONE
)
1243 flags
= PFS_FLAG_SAT
;
1247 if (fpi
->Opcode
!= OPCODE_KIL
) {
1248 dest
= t_dst(rp
, fpi
->DstReg
);
1249 mask
= fpi
->DstReg
.WriteMask
;
1252 switch (fpi
->Opcode
) {
1254 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1255 emit_arith(rp
, PFS_OP_MAD
, dest
, mask
,
1256 absolute(src
[0]), pfs_one
, pfs_zero
,
1260 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1261 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1262 emit_arith(rp
, PFS_OP_MAD
, dest
, mask
,
1263 src
[0], pfs_one
, src
[1],
1267 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1268 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1269 src
[2] = t_src(rp
, fpi
->SrcReg
[2]);
1270 /* ARB_f_p - if src0.c < 0.0 ? src1.c : src2.c
1271 * r300 - if src2.c < 0.0 ? src1.c : src0.c
1273 emit_arith(rp
, PFS_OP_CMP
, dest
, mask
,
1274 src
[2], src
[1], src
[0],
1279 * cos using a parabola (see SIN):
1281 * x = (x/(2*PI))+0.75
1286 temp
[0] = get_temp_reg(rp
);
1288 src
[0] = t_scalar_src(rp
, fpi
->SrcReg
[0]);
1290 /* add 0.5*PI and do range reduction */
1292 emit_arith(rp
, PFS_OP_MAD
, temp
[0], WRITEMASK_X
,
1293 swizzle(src
[0], X
, X
, X
, X
),
1294 swizzle(rp
->const_sin
[1], Z
, Z
, Z
, Z
),
1295 swizzle(rp
->const_sin
[1], X
, X
, X
, X
),
1298 emit_arith(rp
, PFS_OP_FRC
, temp
[0], WRITEMASK_X
,
1299 swizzle(temp
[0], X
, X
, X
, X
),
1304 emit_arith(rp
, PFS_OP_MAD
, temp
[0], WRITEMASK_Z
,
1305 swizzle(temp
[0], X
, X
, X
, X
),
1306 swizzle(rp
->const_sin
[1], W
, W
, W
, W
), //2*PI
1307 negate(swizzle(rp
->const_sin
[0], Z
, Z
, Z
, Z
)), //-PI
1312 emit_arith(rp
, PFS_OP_MAD
, temp
[0], WRITEMASK_X
| WRITEMASK_Y
,
1313 swizzle(temp
[0], Z
, Z
, Z
, Z
),
1318 emit_arith(rp
, PFS_OP_MAD
, temp
[0], WRITEMASK_X
,
1319 swizzle(temp
[0], Y
, Y
, Y
, Y
),
1320 absolute(swizzle(temp
[0], Z
, Z
, Z
, Z
)),
1321 swizzle(temp
[0], X
, X
, X
, X
),
1324 emit_arith(rp
, PFS_OP_MAD
, temp
[0], WRITEMASK_Y
,
1325 swizzle(temp
[0], X
, X
, X
, X
),
1326 absolute(swizzle(temp
[0], X
, X
, X
, X
)),
1327 negate(swizzle(temp
[0], X
, X
, X
, X
)),
1331 emit_arith(rp
, PFS_OP_MAD
, dest
, mask
,
1332 swizzle(temp
[0], Y
, Y
, Y
, Y
),
1333 swizzle(rp
->const_sin
[0], W
, W
, W
, W
),
1334 swizzle(temp
[0], X
, X
, X
, X
),
1337 free_temp(rp
, temp
[0]);
1340 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1341 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1342 emit_arith(rp
, PFS_OP_DP3
, dest
, mask
,
1343 src
[0], src
[1], undef
,
1347 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1348 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1349 emit_arith(rp
, PFS_OP_DP4
, dest
, mask
,
1350 src
[0], src
[1], undef
,
1354 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1355 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1356 /* src0.xyz1 -> temp
1357 * DP4 dest, temp, src1
1360 temp
[0] = get_temp_reg(rp
);
1361 src
[0].s_swz
= SWIZZLE_ONE
;
1362 emit_arith(rp
, PFS_OP_MAD
, temp
[0], mask
,
1363 src
[0], pfs_one
, pfs_zero
,
1365 emit_arith(rp
, PFS_OP_DP4
, dest
, mask
,
1366 temp
[0], src
[1], undef
,
1368 free_temp(rp
, temp
[0]);
1370 emit_arith(rp
, PFS_OP_DP4
, dest
, mask
,
1371 swizzle(src
[0], X
, Y
, Z
, ONE
), src
[1],
1376 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1377 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1378 /* dest.y = src0.y * src1.y */
1379 if (mask
& WRITEMASK_Y
)
1380 emit_arith(rp
, PFS_OP_MAD
, dest
, WRITEMASK_Y
,
1381 keep(src
[0]), keep(src
[1]),
1383 /* dest.z = src0.z */
1384 if (mask
& WRITEMASK_Z
)
1385 emit_arith(rp
, PFS_OP_MAD
, dest
, WRITEMASK_Z
,
1386 src
[0], pfs_one
, pfs_zero
, flags
);
1388 * result.w = src1.w */
1389 if (mask
& WRITEMASK_XW
) {
1390 REG_SET_VSWZ(src
[1], SWIZZLE_111
); /*Cheat*/
1391 emit_arith(rp
, PFS_OP_MAD
, dest
,
1392 mask
& WRITEMASK_XW
,
1393 src
[1], pfs_one
, pfs_zero
,
1398 src
[0] = t_scalar_src(rp
, fpi
->SrcReg
[0]);
1399 emit_arith(rp
, PFS_OP_EX2
, dest
, mask
,
1400 src
[0], undef
, undef
,
1404 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1405 temp
[0] = get_temp_reg(rp
);
1407 * MAD dest, src0, 1.0, -temp
1409 emit_arith(rp
, PFS_OP_FRC
, temp
[0], mask
,
1410 keep(src
[0]), undef
, undef
,
1412 emit_arith(rp
, PFS_OP_MAD
, dest
, mask
,
1413 src
[0], pfs_one
, negate(temp
[0]),
1415 free_temp(rp
, temp
[0]);
1418 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1419 emit_arith(rp
, PFS_OP_FRC
, dest
, mask
,
1420 src
[0], undef
, undef
,
1424 emit_tex(rp
, fpi
, R300_FPITX_OP_KIL
);
1427 src
[0] = t_scalar_src(rp
, fpi
->SrcReg
[0]);
1428 emit_arith(rp
, PFS_OP_LG2
, dest
, mask
,
1429 src
[0], undef
, undef
,
1434 * if (s.x < 0) t.x = 0; else t.x = s.x;
1435 * if (s.y < 0) t.y = 0; else t.y = s.y;
1436 * if (s.w > 128.0) t.w = 128.0; else t.w = s.w;
1437 * if (s.w < -128.0) t.w = -128.0; else t.w = s.w;
1439 * if (t.x > 0) r.y = pow(t.y, t.w); else r.y = 0;
1440 * Also r.y = 0 if t.y < 0
1441 * For the t.x > 0 FGLRX use the CMPH opcode which
1442 * change the compare to (t.x + 0.5) > 0.5 we may
1443 * save one instruction by doing CMP -t.x
1445 cnstv
[0] = cnstv
[1] = cnstv
[2] = cnstv
[3] = 0.50001;
1446 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1447 temp
[0] = get_temp_reg(rp
);
1448 cnst
= emit_const4fv(rp
, cnstv
);
1449 emit_arith(rp
, PFS_OP_CMP
, temp
[0],
1450 WRITEMASK_X
| WRITEMASK_Y
,
1451 src
[0], pfs_zero
, src
[0], flags
);
1452 emit_arith(rp
, PFS_OP_MIN
, temp
[0], WRITEMASK_Z
,
1453 swizzle(keep(src
[0]), W
, W
, W
, W
),
1454 cnst
, undef
, flags
);
1455 emit_arith(rp
, PFS_OP_LG2
, temp
[0], WRITEMASK_W
,
1456 swizzle(temp
[0], Y
, Y
, Y
, Y
),
1457 undef
, undef
, flags
);
1458 emit_arith(rp
, PFS_OP_MAX
, temp
[0], WRITEMASK_Z
,
1459 temp
[0], negate(cnst
), undef
, flags
);
1460 emit_arith(rp
, PFS_OP_MAD
, temp
[0], WRITEMASK_W
,
1461 temp
[0], swizzle(temp
[0], Z
, Z
, Z
, Z
),
1463 emit_arith(rp
, PFS_OP_EX2
, temp
[0], WRITEMASK_W
,
1464 temp
[0], undef
, undef
, flags
);
1465 emit_arith(rp
, PFS_OP_MAD
, dest
, WRITEMASK_Y
,
1466 swizzle(keep(temp
[0]), X
, X
, X
, X
),
1467 pfs_one
, pfs_zero
, flags
);
1469 emit_arith(rp
, PFS_OP_MAD
, temp
[0], WRITEMASK_X
,
1470 temp
[0], pfs_one
, pfs_half
, flags
);
1471 emit_arith(rp
, PFS_OP_CMPH
, temp
[0], WRITEMASK_Z
,
1472 swizzle(keep(temp
[0]), W
, W
, W
, W
),
1473 pfs_zero
, swizzle(keep(temp
[0]), X
, X
, X
, X
),
1476 emit_arith(rp
, PFS_OP_CMP
, temp
[0], WRITEMASK_Z
,
1478 swizzle(keep(temp
[0]), W
, W
, W
, W
),
1479 negate(swizzle(keep(temp
[0]), X
, X
, X
, X
)),
1482 emit_arith(rp
, PFS_OP_CMP
, dest
, WRITEMASK_Z
,
1484 negate(swizzle(keep(temp
[0]), Y
, Y
, Y
, Y
)),
1486 emit_arith(rp
, PFS_OP_MAD
, dest
,
1487 WRITEMASK_X
| WRITEMASK_W
,
1492 free_temp(rp
, temp
[0]);
1495 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1496 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1497 src
[2] = t_src(rp
, fpi
->SrcReg
[2]);
1498 /* result = tmp0tmp1 + (1 - tmp0)tmp2
1499 * = tmp0tmp1 + tmp2 + (-tmp0)tmp2
1500 * MAD temp, -tmp0, tmp2, tmp2
1501 * MAD result, tmp0, tmp1, temp
1503 temp
[0] = get_temp_reg(rp
);
1504 emit_arith(rp
, PFS_OP_MAD
, temp
[0], mask
,
1505 negate(keep(src
[0])), keep(src
[2]), src
[2],
1507 emit_arith(rp
, PFS_OP_MAD
, dest
, mask
,
1508 src
[0], src
[1], temp
[0],
1510 free_temp(rp
, temp
[0]);
1513 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1514 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1515 src
[2] = t_src(rp
, fpi
->SrcReg
[2]);
1516 emit_arith(rp
, PFS_OP_MAD
, dest
, mask
,
1517 src
[0], src
[1], src
[2],
1521 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1522 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1523 emit_arith(rp
, PFS_OP_MAX
, dest
, mask
,
1524 src
[0], src
[1], undef
,
1528 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1529 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1530 emit_arith(rp
, PFS_OP_MIN
, dest
, mask
,
1531 src
[0], src
[1], undef
,
1536 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1537 emit_arith(rp
, PFS_OP_MAD
, dest
, mask
,
1538 src
[0], pfs_one
, pfs_zero
,
1542 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1543 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1544 emit_arith(rp
, PFS_OP_MAD
, dest
, mask
,
1545 src
[0], src
[1], pfs_zero
,
1549 src
[0] = t_scalar_src(rp
, fpi
->SrcReg
[0]);
1550 src
[1] = t_scalar_src(rp
, fpi
->SrcReg
[1]);
1551 temp
[0] = get_temp_reg(rp
);
1552 emit_arith(rp
, PFS_OP_LG2
, temp
[0], WRITEMASK_W
,
1553 src
[0], undef
, undef
,
1555 emit_arith(rp
, PFS_OP_MAD
, temp
[0], WRITEMASK_W
,
1556 temp
[0], src
[1], pfs_zero
,
1558 emit_arith(rp
, PFS_OP_EX2
, dest
, fpi
->DstReg
.WriteMask
,
1559 temp
[0], undef
, undef
,
1561 free_temp(rp
, temp
[0]);
1564 src
[0] = t_scalar_src(rp
, fpi
->SrcReg
[0]);
1565 emit_arith(rp
, PFS_OP_RCP
, dest
, mask
,
1566 src
[0], undef
, undef
,
1570 src
[0] = t_scalar_src(rp
, fpi
->SrcReg
[0]);
1571 emit_arith(rp
, PFS_OP_RSQ
, dest
, mask
,
1572 absolute(src
[0]), pfs_zero
, pfs_zero
,
1577 * scs using a parabola :
1579 * result.x = sin(-abs(x)+0.5*PI) (cos)
1580 * result.y = sin(x) (sin)
1583 temp
[0] = get_temp_reg(rp
);
1584 temp
[1] = get_temp_reg(rp
);
1586 src
[0] = t_scalar_src(rp
, fpi
->SrcReg
[0]);
1588 /* x = -abs(x)+0.5*PI */
1589 emit_arith(rp
, PFS_OP_MAD
, temp
[0], WRITEMASK_Z
,
1590 swizzle(rp
->const_sin
[0], Z
, Z
, Z
, Z
), //PI
1592 negate(abs(swizzle(keep(src
[0]), X
, X
, X
, X
))),
1596 emit_arith(rp
, PFS_OP_MAD
, temp
[0], WRITEMASK_W
,
1597 swizzle(rp
->const_sin
[0], Y
, Y
, Y
, Y
),
1598 swizzle(keep(src
[0]), X
, X
, X
, X
),
1602 /* B*x, C*x (cos) */
1603 emit_arith(rp
, PFS_OP_MAD
, temp
[0], WRITEMASK_X
| WRITEMASK_Y
,
1604 swizzle(temp
[0], Z
, Z
, Z
, Z
),
1610 emit_arith(rp
, PFS_OP_MAD
, temp
[1], WRITEMASK_W
,
1611 swizzle(rp
->const_sin
[0], X
, X
, X
, X
),
1616 /* y = B*x + C*x*abs(x) (sin)*/
1617 emit_arith(rp
, PFS_OP_MAD
, temp
[1], WRITEMASK_Z
,
1619 swizzle(temp
[0], W
, W
, W
, W
),
1620 swizzle(temp
[1], W
, W
, W
, W
),
1623 /* y = B*x + C*x*abs(x) (cos)*/
1624 emit_arith(rp
, PFS_OP_MAD
, temp
[1], WRITEMASK_W
,
1625 swizzle(temp
[0], Y
, Y
, Y
, Y
),
1626 absolute(swizzle(temp
[0], Z
, Z
, Z
, Z
)),
1627 swizzle(temp
[0], X
, X
, X
, X
),
1630 /* y*abs(y) - y (cos), y*abs(y) - y (sin) */
1631 emit_arith(rp
, PFS_OP_MAD
, temp
[0], WRITEMASK_X
| WRITEMASK_Y
,
1632 swizzle(temp
[1], W
, Z
, Y
, X
),
1633 absolute(swizzle(temp
[1], W
, Z
, Y
, X
)),
1634 negate(swizzle(temp
[1], W
, Z
, Y
, X
)),
1638 /* dest.xy = mad(temp.xy, P, temp2.wz) */
1639 emit_arith(rp
, PFS_OP_MAD
, dest
, mask
& (WRITEMASK_X
| WRITEMASK_Y
),
1641 swizzle(rp
->const_sin
[0], W
, W
, W
, W
),
1642 swizzle(temp
[1], W
, Z
, Y
, X
),
1645 free_temp(rp
, temp
[0]);
1646 free_temp(rp
, temp
[1]);
1649 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1650 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1651 temp
[0] = get_temp_reg(rp
);
1652 /* temp = src0 - src1
1653 * dest.c = (temp.c < 0.0) ? 0 : 1
1655 emit_arith(rp
, PFS_OP_MAD
, temp
[0], mask
,
1656 src
[0], pfs_one
, negate(src
[1]),
1658 emit_arith(rp
, PFS_OP_CMP
, dest
, mask
,
1659 pfs_one
, pfs_zero
, temp
[0],
1661 free_temp(rp
, temp
[0]);
1666 * sin(x) = 4/pi * x + -4/(pi*pi) * x * abs(x)
1667 * extra precision is obtained by weighting against
1671 temp
[0] = get_temp_reg(rp
);
1673 src
[0] = t_scalar_src(rp
, fpi
->SrcReg
[0]);
1676 /* do range reduction */
1678 emit_arith(rp
, PFS_OP_MAD
, temp
[0], WRITEMASK_X
,
1679 swizzle(keep(src
[0]), X
, X
, X
, X
),
1680 swizzle(rp
->const_sin
[1], Z
, Z
, Z
, Z
),
1684 emit_arith(rp
, PFS_OP_FRC
, temp
[0], WRITEMASK_X
,
1685 swizzle(temp
[0], X
, X
, X
, X
),
1690 emit_arith(rp
, PFS_OP_MAD
, temp
[0], WRITEMASK_Z
,
1691 swizzle(temp
[0], X
, X
, X
, X
),
1692 swizzle(rp
->const_sin
[1], W
, W
, W
, W
), //2*PI
1693 negate(swizzle(rp
->const_sin
[0], Z
, Z
, Z
, Z
)), //PI
1698 emit_arith(rp
, PFS_OP_MAD
, temp
[0], WRITEMASK_X
| WRITEMASK_Y
,
1699 swizzle(temp
[0], Z
, Z
, Z
, Z
),
1704 emit_arith(rp
, PFS_OP_MAD
, temp
[0], WRITEMASK_X
,
1705 swizzle(temp
[0], Y
, Y
, Y
, Y
),
1706 absolute(swizzle(temp
[0], Z
, Z
, Z
, Z
)),
1707 swizzle(temp
[0], X
, X
, X
, X
),
1710 emit_arith(rp
, PFS_OP_MAD
, temp
[0], WRITEMASK_Y
,
1711 swizzle(temp
[0], X
, X
, X
, X
),
1712 absolute(swizzle(temp
[0], X
, X
, X
, X
)),
1713 negate(swizzle(temp
[0], X
, X
, X
, X
)),
1717 emit_arith(rp
, PFS_OP_MAD
, dest
, mask
,
1718 swizzle(temp
[0], Y
, Y
, Y
, Y
),
1719 swizzle(rp
->const_sin
[0], W
, W
, W
, W
),
1720 swizzle(temp
[0], X
, X
, X
, X
),
1723 free_temp(rp
, temp
[0]);
1726 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1727 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1728 temp
[0] = get_temp_reg(rp
);
1729 /* temp = src0 - src1
1730 * dest.c = (temp.c < 0.0) ? 1 : 0
1732 emit_arith(rp
, PFS_OP_MAD
, temp
[0], mask
,
1733 src
[0], pfs_one
, negate(src
[1]),
1735 emit_arith(rp
, PFS_OP_CMP
, dest
, mask
,
1736 pfs_zero
, pfs_one
, temp
[0],
1738 free_temp(rp
, temp
[0]);
1741 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1742 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1743 emit_arith(rp
, PFS_OP_MAD
, dest
, mask
,
1744 src
[0], pfs_one
, negate(src
[1]),
1748 emit_tex(rp
, fpi
, R300_FPITX_OP_TEX
);
1751 emit_tex(rp
, fpi
, R300_FPITX_OP_TXB
);
1754 emit_tex(rp
, fpi
, R300_FPITX_OP_TXP
);
1757 src
[0] = t_src(rp
, fpi
->SrcReg
[0]);
1758 src
[1] = t_src(rp
, fpi
->SrcReg
[1]);
1759 temp
[0] = get_temp_reg(rp
);
1760 /* temp = src0.zxy * src1.yzx */
1761 emit_arith(rp
, PFS_OP_MAD
, temp
[0], WRITEMASK_XYZ
,
1762 swizzle(keep(src
[0]), Z
, X
, Y
, W
),
1763 swizzle(keep(src
[1]), Y
, Z
, X
, W
),
1766 /* dest.xyz = src0.yzx * src1.zxy - temp
1767 * dest.w = undefined
1769 emit_arith(rp
, PFS_OP_MAD
, dest
, mask
& WRITEMASK_XYZ
,
1770 swizzle(src
[0], Y
, Z
, X
, W
),
1771 swizzle(src
[1], Z
, X
, Y
, W
),
1775 free_temp(rp
, temp
[0]);
1779 ERROR("unknown fpi->Opcode %d\n", fpi
->Opcode
);
1791 static void insert_wpos(struct gl_program
*prog
)
1793 GLint tokens
[6] = { STATE_INTERNAL
, STATE_R300_WINDOW_DIMENSION
, 0, 0, 0, 0 };
1794 struct prog_instruction
*fpi
;
1795 GLuint window_index
;
1797 GLuint tempregi
= prog
->NumTemporaries
;
1798 /* should do something else if no temps left... */
1799 prog
->NumTemporaries
++;
1802 fpi
= malloc((prog
->NumInstructions
+ 3) * sizeof(struct prog_instruction
));
1803 /* all including END */
1804 memcpy(&fpi
[3], prog
->Instructions
, prog
->NumInstructions
* sizeof(struct prog_instruction
));
1806 memset(fpi
, 0, 3 * sizeof(struct prog_instruction
));
1808 /* perspective divide */
1809 fpi
[i
].Opcode
= OPCODE_RCP
;
1811 fpi
[i
].DstReg
.File
= PROGRAM_TEMPORARY
;
1812 fpi
[i
].DstReg
.Index
= tempregi
;
1813 fpi
[i
].DstReg
.WriteMask
= WRITEMASK_W
;
1814 fpi
[i
].DstReg
.CondMask
= COND_TR
;
1816 fpi
[i
].SrcReg
[0].File
= PROGRAM_INPUT
;
1817 fpi
[i
].SrcReg
[0].Index
= FRAG_ATTRIB_WPOS
;
1818 fpi
[i
].SrcReg
[0].Swizzle
= MAKE_SWIZZLE4(SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
);
1821 fpi
[i
].Opcode
= OPCODE_MUL
;
1823 fpi
[i
].DstReg
.File
= PROGRAM_TEMPORARY
;
1824 fpi
[i
].DstReg
.Index
= tempregi
;
1825 fpi
[i
].DstReg
.WriteMask
= WRITEMASK_XYZ
;
1826 fpi
[i
].DstReg
.CondMask
= COND_TR
;
1828 fpi
[i
].SrcReg
[0].File
= PROGRAM_INPUT
;
1829 fpi
[i
].SrcReg
[0].Index
= FRAG_ATTRIB_WPOS
;
1830 fpi
[i
].SrcReg
[0].Swizzle
= MAKE_SWIZZLE4(SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_W
);
1832 fpi
[i
].SrcReg
[1].File
= PROGRAM_TEMPORARY
;
1833 fpi
[i
].SrcReg
[1].Index
= tempregi
;
1834 fpi
[i
].SrcReg
[1].Swizzle
= MAKE_SWIZZLE4(SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
, SWIZZLE_W
);
1837 /* viewport transformation */
1838 window_index
= _mesa_add_state_reference(prog
->Parameters
, tokens
);
1840 fpi
[i
].Opcode
= OPCODE_MAD
;
1842 fpi
[i
].DstReg
.File
= PROGRAM_TEMPORARY
;
1843 fpi
[i
].DstReg
.Index
= tempregi
;
1844 fpi
[i
].DstReg
.WriteMask
= WRITEMASK_XYZ
;
1845 fpi
[i
].DstReg
.CondMask
= COND_TR
;
1847 fpi
[i
].SrcReg
[0].File
= PROGRAM_TEMPORARY
;
1848 fpi
[i
].SrcReg
[0].Index
= tempregi
;
1849 fpi
[i
].SrcReg
[0].Swizzle
= MAKE_SWIZZLE4(SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_ZERO
);
1851 fpi
[i
].SrcReg
[1].File
= PROGRAM_STATE_VAR
;
1852 fpi
[i
].SrcReg
[1].Index
= window_index
;
1853 fpi
[i
].SrcReg
[1].Swizzle
= MAKE_SWIZZLE4(SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_ZERO
);
1855 fpi
[i
].SrcReg
[2].File
= PROGRAM_STATE_VAR
;
1856 fpi
[i
].SrcReg
[2].Index
= window_index
;
1857 fpi
[i
].SrcReg
[2].Swizzle
= MAKE_SWIZZLE4(SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_ZERO
);
1860 free(prog
->Instructions
);
1862 prog
->Instructions
= fpi
;
1864 prog
->NumInstructions
+= i
;
1865 fpi
= &prog
->Instructions
[prog
->NumInstructions
-1];
1867 assert(fpi
->Opcode
== OPCODE_END
);
1869 for(fpi
= &prog
->Instructions
[3]; fpi
->Opcode
!= OPCODE_END
; fpi
++){
1871 if( fpi
->SrcReg
[i
].File
== PROGRAM_INPUT
&&
1872 fpi
->SrcReg
[i
].Index
== FRAG_ATTRIB_WPOS
){
1873 fpi
->SrcReg
[i
].File
= PROGRAM_TEMPORARY
;
1874 fpi
->SrcReg
[i
].Index
= tempregi
;
1879 /* - Init structures
1880 * - Determine what hwregs each input corresponds to
1882 static void init_program(r300ContextPtr r300
, struct r300_fragment_program
*rp
)
1884 struct r300_pfs_compile_state
*cs
= NULL
;
1885 struct gl_fragment_program
*mp
= &rp
->mesa_program
;
1886 struct prog_instruction
*fpi
;
1887 GLuint InputsRead
= mp
->Base
.InputsRead
;
1888 GLuint temps_used
= 0; /* for rp->temps[] */
1891 /* New compile, reset tracking data */
1892 rp
->optimization
= driQueryOptioni(&r300
->radeon
.optionCache
, "fp_optimization");
1893 rp
->translated
= GL_FALSE
;
1894 rp
->error
= GL_FALSE
;
1895 rp
->cs
= cs
= &(R300_CONTEXT(rp
->ctx
)->state
.pfs_compile
);
1898 rp
->first_node_has_tex
= 0;
1901 rp
->params_uptodate
= GL_FALSE
;
1902 rp
->max_temp_idx
= 0;
1903 rp
->node
[0].alu_end
= -1;
1904 rp
->node
[0].tex_end
= -1;
1905 rp
->const_sin
[0] = -1;
1907 _mesa_memset(cs
, 0, sizeof(*rp
->cs
));
1908 for (i
=0;i
<PFS_MAX_ALU_INST
;i
++) {
1910 cs
->slot
[i
].vsrc
[j
] = SRC_CONST
;
1911 cs
->slot
[i
].ssrc
[j
] = SRC_CONST
;
1915 /* Work out what temps the Mesa inputs correspond to, this must match
1916 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
1917 * configures itself based on the fragprog's InputsRead
1919 * NOTE: this depends on get_hw_temp() allocating registers in order,
1920 * starting from register 0.
1923 /* Texcoords come first */
1924 for (i
=0;i
<rp
->ctx
->Const
.MaxTextureUnits
;i
++) {
1925 if (InputsRead
& (FRAG_BIT_TEX0
<< i
)) {
1926 cs
->inputs
[FRAG_ATTRIB_TEX0
+i
].refcount
= 0;
1927 cs
->inputs
[FRAG_ATTRIB_TEX0
+i
].reg
= get_hw_temp(rp
);
1930 InputsRead
&= ~FRAG_BITS_TEX_ANY
;
1932 /* fragment position treated as a texcoord */
1933 if (InputsRead
& FRAG_BIT_WPOS
) {
1934 cs
->inputs
[FRAG_ATTRIB_WPOS
].refcount
= 0;
1935 cs
->inputs
[FRAG_ATTRIB_WPOS
].reg
= get_hw_temp(rp
);
1936 insert_wpos(&mp
->Base
);
1938 InputsRead
&= ~FRAG_BIT_WPOS
;
1940 /* Then primary colour */
1941 if (InputsRead
& FRAG_BIT_COL0
) {
1942 cs
->inputs
[FRAG_ATTRIB_COL0
].refcount
= 0;
1943 cs
->inputs
[FRAG_ATTRIB_COL0
].reg
= get_hw_temp(rp
);
1945 InputsRead
&= ~FRAG_BIT_COL0
;
1947 /* Secondary color */
1948 if (InputsRead
& FRAG_BIT_COL1
) {
1949 cs
->inputs
[FRAG_ATTRIB_COL1
].refcount
= 0;
1950 cs
->inputs
[FRAG_ATTRIB_COL1
].reg
= get_hw_temp(rp
);
1952 InputsRead
&= ~FRAG_BIT_COL1
;
1956 WARN_ONCE("Don't know how to handle inputs 0x%x\n",
1958 /* force read from hwreg 0 for now */
1960 if (InputsRead
& (1<<i
)) cs
->inputs
[i
].reg
= 0;
1963 /* Pre-parse the mesa program, grabbing refcounts on input/temp regs.
1964 * That way, we can free up the reg when it's no longer needed
1966 if (!mp
->Base
.Instructions
) {
1967 ERROR("No instructions found in program\n");
1971 for (fpi
=mp
->Base
.Instructions
;fpi
->Opcode
!= OPCODE_END
; fpi
++) {
1975 idx
= fpi
->SrcReg
[i
].Index
;
1976 switch (fpi
->SrcReg
[i
].File
) {
1977 case PROGRAM_TEMPORARY
:
1978 if (!(temps_used
& (1<<idx
))) {
1979 cs
->temps
[idx
].reg
= -1;
1980 cs
->temps
[idx
].refcount
= 1;
1981 temps_used
|= (1 << idx
);
1983 cs
->temps
[idx
].refcount
++;
1986 cs
->inputs
[idx
].refcount
++;
1992 idx
= fpi
->DstReg
.Index
;
1993 if (fpi
->DstReg
.File
== PROGRAM_TEMPORARY
) {
1994 if (!(temps_used
& (1<<idx
))) {
1995 cs
->temps
[idx
].reg
= -1;
1996 cs
->temps
[idx
].refcount
= 1;
1997 temps_used
|= (1 << idx
);
1999 cs
->temps
[idx
].refcount
++;
2002 cs
->temp_in_use
= temps_used
;
2005 static void update_params(struct r300_fragment_program
*rp
)
2007 struct gl_fragment_program
*mp
= &rp
->mesa_program
;
2010 /* Ask Mesa nicely to fill in ParameterValues for us */
2012 _mesa_load_state_parameters(rp
->ctx
, mp
->Base
.Parameters
);
2014 for (i
=0;i
<rp
->param_nr
;i
++)
2015 COPY_4V(rp
->constant
[rp
->param
[i
].idx
], rp
->param
[i
].values
);
2017 rp
->params_uptodate
= GL_TRUE
;
2020 void r300_translate_fragment_shader(r300ContextPtr r300
, struct r300_fragment_program
*rp
)
2022 struct r300_pfs_compile_state
*cs
= NULL
;
2024 if (!rp
->translated
) {
2026 init_program(r300
, rp
);
2029 if (parse_program(rp
) == GL_FALSE
) {
2035 cs
->v_pos
= cs
->s_pos
= MAX2(cs
->v_pos
, cs
->s_pos
);
2036 rp
->node
[rp
->cur_node
].alu_end
=
2037 cs
->v_pos
- rp
->node
[rp
->cur_node
].alu_offset
- 1;
2038 if (rp
->node
[rp
->cur_node
].tex_end
< 0)
2039 rp
->node
[rp
->cur_node
].tex_end
= 0;
2041 rp
->alu_end
= cs
->v_pos
- 1;
2043 rp
->tex_end
= rp
->tex
.length
? rp
->tex
.length
- 1 : 0;
2044 assert(rp
->node
[rp
->cur_node
].alu_end
>= 0);
2045 assert(rp
->alu_end
>= 0);
2047 rp
->translated
= GL_TRUE
;
2048 if (0) dump_program(rp
);
2049 r300UpdateStateParameters(rp
->ctx
, _NEW_PROGRAM
);
2055 /* just some random things... */
2056 static void dump_program(struct r300_fragment_program
*rp
)
2061 fprintf(stderr
, "pc=%d*************************************\n", pc
++);
2063 fprintf(stderr
, "Mesa program:\n");
2064 fprintf(stderr
, "-------------\n");
2065 _mesa_print_program(&rp
->mesa_program
.Base
);
2068 fprintf(stderr
, "Hardware program\n");
2069 fprintf(stderr
, "----------------\n");
2071 fprintf(stderr
, "tex:\n");
2073 for(i
=0;i
<rp
->tex
.length
;i
++) {
2074 fprintf(stderr
, "%08x\n", rp
->tex
.inst
[i
]);
2077 for (i
=0;i
<(rp
->cur_node
+1);i
++) {
2078 fprintf(stderr
, "NODE %d: alu_offset: %d, tex_offset: %d, "\
2079 "alu_end: %d, tex_end: %d\n", i
,
2080 rp
->node
[i
].alu_offset
,
2081 rp
->node
[i
].tex_offset
,
2082 rp
->node
[i
].alu_end
,
2083 rp
->node
[i
].tex_end
);
2086 fprintf(stderr
, "%08x\n",
2087 ((rp
->tex_end
<< 16) | (R300_PFS_TEXI_0
>> 2)));
2088 for (i
=0;i
<=rp
->tex_end
;i
++)
2089 fprintf(stderr
, "%08x\n", rp
->tex
.inst
[i
]);
2091 /* dump program in pretty_print_command_stream.tcl-readable format */
2092 fprintf(stderr
, "%08x\n",
2093 ((rp
->alu_end
<< 16) | (R300_PFS_INSTR0_0
>> 2)));
2094 for (i
=0;i
<=rp
->alu_end
;i
++)
2095 fprintf(stderr
, "%08x\n", rp
->alu
.inst
[i
].inst0
);
2097 fprintf(stderr
, "%08x\n",
2098 ((rp
->alu_end
<< 16) | (R300_PFS_INSTR1_0
>> 2)));
2099 for (i
=0;i
<=rp
->alu_end
;i
++)
2100 fprintf(stderr
, "%08x\n", rp
->alu
.inst
[i
].inst1
);
2102 fprintf(stderr
, "%08x\n",
2103 ((rp
->alu_end
<< 16) | (R300_PFS_INSTR2_0
>> 2)));
2104 for (i
=0;i
<=rp
->alu_end
;i
++)
2105 fprintf(stderr
, "%08x\n", rp
->alu
.inst
[i
].inst2
);
2107 fprintf(stderr
, "%08x\n",
2108 ((rp
->alu_end
<< 16) | (R300_PFS_INSTR3_0
>> 2)));
2109 for (i
=0;i
<=rp
->alu_end
;i
++)
2110 fprintf(stderr
, "%08x\n", rp
->alu
.inst
[i
].inst3
);
2112 fprintf(stderr
, "00000000\n");