1 /**************************************************************************
3 Copyright (C) 2005 Aapo Tahkola.
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Aapo Tahkola <aet@rasterburn.org>
31 * Roland Scheidegger <rscheidegger_lists@hispeed.ch>
33 #include "main/glheader.h"
34 #include "main/macros.h"
35 #include "main/enums.h"
36 #include "program/program.h"
37 #include "program/prog_instruction.h"
38 #include "program/prog_parameter.h"
39 #include "program/prog_statevars.h"
40 #include "program/programopt.h"
43 #include "r200_context.h"
44 #include "r200_vertprog.h"
45 #include "r200_ioctl.h"
48 #if SWIZZLE_X != VSF_IN_COMPONENT_X || \
49 SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
50 SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
51 SWIZZLE_W != VSF_IN_COMPONENT_W || \
52 SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
53 SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
54 WRITEMASK_X != VSF_FLAG_X || \
55 WRITEMASK_Y != VSF_FLAG_Y || \
56 WRITEMASK_Z != VSF_FLAG_Z || \
57 WRITEMASK_W != VSF_FLAG_W
58 #error Cannot change these!
61 #define SCALAR_FLAG (1<<31)
62 #define FLAG_MASK (1<<31)
63 #define OP_MASK (0xf) /* we are unlikely to have more than 15 */
64 #define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
69 unsigned long ip
; /* number of input operands and flags */
73 OPN(ARL
, 1|SCALAR_FLAG
),
78 OPN(EX2
, 1|SCALAR_FLAG
),
79 OPN(EXP
, 1|SCALAR_FLAG
),
82 OPN(LG2
, 1|SCALAR_FLAG
),
84 OPN(LOG
, 1|SCALAR_FLAG
),
90 OPN(POW
, 2|SCALAR_FLAG
),
91 OPN(RCP
, 1|SCALAR_FLAG
),
92 OPN(RSQ
, 1|SCALAR_FLAG
),
103 static GLboolean
r200VertexProgUpdateParams(struct gl_context
*ctx
, struct r200_vertex_program
*vp
)
105 r200ContextPtr rmesa
= R200_CONTEXT( ctx
);
106 GLfloat
*fcmd
= (GLfloat
*)&rmesa
->hw
.vpp
[0].cmd
[VPP_CMD_0
+ 1];
108 struct gl_vertex_program
*mesa_vp
= &vp
->mesa_program
;
109 struct gl_program_parameter_list
*paramList
;
110 drm_radeon_cmd_header_t tmp
;
112 R200_STATECHANGE( rmesa
, vpp
[0] );
113 R200_STATECHANGE( rmesa
, vpp
[1] );
114 assert(mesa_vp
->Base
.Parameters
);
115 _mesa_load_state_parameters(ctx
, mesa_vp
->Base
.Parameters
);
116 paramList
= mesa_vp
->Base
.Parameters
;
118 if(paramList
->NumParameters
> R200_VSF_MAX_PARAM
){
119 fprintf(stderr
, "%s:Params exhausted\n", __FUNCTION__
);
123 for(pi
= 0; pi
< paramList
->NumParameters
; pi
++) {
124 switch(paramList
->Parameters
[pi
].Type
) {
125 case PROGRAM_STATE_VAR
:
126 case PROGRAM_NAMED_PARAM
:
127 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
128 case PROGRAM_CONSTANT
:
129 *fcmd
++ = paramList
->ParameterValues
[pi
][0].f
;
130 *fcmd
++ = paramList
->ParameterValues
[pi
][1].f
;
131 *fcmd
++ = paramList
->ParameterValues
[pi
][2].f
;
132 *fcmd
++ = paramList
->ParameterValues
[pi
][3].f
;
135 _mesa_problem(NULL
, "Bad param type in %s", __FUNCTION__
);
139 fcmd
= (GLfloat
*)&rmesa
->hw
.vpp
[1].cmd
[VPP_CMD_0
+ 1];
142 /* hack up the cmd_size so not the whole state atom is emitted always. */
143 rmesa
->hw
.vpp
[0].cmd_size
=
144 1 + 4 * ((paramList
->NumParameters
> 96) ? 96 : paramList
->NumParameters
);
145 tmp
.i
= rmesa
->hw
.vpp
[0].cmd
[VPP_CMD_0
];
146 tmp
.veclinear
.count
= (paramList
->NumParameters
> 96) ? 96 : paramList
->NumParameters
;
147 rmesa
->hw
.vpp
[0].cmd
[VPP_CMD_0
] = tmp
.i
;
148 if (paramList
->NumParameters
> 96) {
149 rmesa
->hw
.vpp
[1].cmd_size
= 1 + 4 * (paramList
->NumParameters
- 96);
150 tmp
.i
= rmesa
->hw
.vpp
[1].cmd
[VPP_CMD_0
];
151 tmp
.veclinear
.count
= paramList
->NumParameters
- 96;
152 rmesa
->hw
.vpp
[1].cmd
[VPP_CMD_0
] = tmp
.i
;
157 static INLINE
unsigned long t_dst_mask(GLuint mask
)
159 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
160 return mask
& VSF_FLAG_ALL
;
163 static unsigned long t_dst(struct prog_dst_register
*dst
)
166 case PROGRAM_TEMPORARY
:
167 return ((dst
->Index
<< R200_VPI_OUT_REG_INDEX_SHIFT
)
168 | R200_VSF_OUT_CLASS_TMP
);
170 switch (dst
->Index
) {
171 case VERT_RESULT_HPOS
:
172 return R200_VSF_OUT_CLASS_RESULT_POS
;
173 case VERT_RESULT_COL0
:
174 return R200_VSF_OUT_CLASS_RESULT_COLOR
;
175 case VERT_RESULT_COL1
:
176 return ((1 << R200_VPI_OUT_REG_INDEX_SHIFT
)
177 | R200_VSF_OUT_CLASS_RESULT_COLOR
);
178 case VERT_RESULT_FOGC
:
179 return R200_VSF_OUT_CLASS_RESULT_FOGC
;
180 case VERT_RESULT_TEX0
:
181 case VERT_RESULT_TEX1
:
182 case VERT_RESULT_TEX2
:
183 case VERT_RESULT_TEX3
:
184 case VERT_RESULT_TEX4
:
185 case VERT_RESULT_TEX5
:
186 return (((dst
->Index
- VERT_RESULT_TEX0
) << R200_VPI_OUT_REG_INDEX_SHIFT
)
187 | R200_VSF_OUT_CLASS_RESULT_TEXC
);
188 case VERT_RESULT_PSIZ
:
189 return R200_VSF_OUT_CLASS_RESULT_POINTSIZE
;
191 fprintf(stderr
, "problem in %s, unknown dst output reg %d\n", __FUNCTION__
, dst
->Index
);
195 case PROGRAM_ADDRESS
:
196 assert (dst
->Index
== 0);
197 return R200_VSF_OUT_CLASS_ADDR
;
199 fprintf(stderr
, "problem in %s, unknown register type %d\n", __FUNCTION__
, dst
->File
);
205 static unsigned long t_src_class(gl_register_file file
)
209 case PROGRAM_TEMPORARY
:
210 return VSF_IN_CLASS_TMP
;
213 return VSF_IN_CLASS_ATTR
;
215 case PROGRAM_LOCAL_PARAM
:
216 case PROGRAM_ENV_PARAM
:
217 case PROGRAM_NAMED_PARAM
:
218 case PROGRAM_CONSTANT
:
219 case PROGRAM_STATE_VAR
:
220 return VSF_IN_CLASS_PARAM
;
223 case PROGRAM_WRITE_ONLY:
224 case PROGRAM_ADDRESS:
227 fprintf(stderr
, "problem in %s", __FUNCTION__
);
232 static INLINE
unsigned long t_swizzle(GLubyte swizzle
)
234 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
239 static void vp_dump_inputs(struct r200_vertex_program
*vp
, char *caller
)
244 fprintf(stderr
, "vp null in call to %s from %s\n", __FUNCTION__
, caller
);
248 fprintf(stderr
, "%s:<", caller
);
249 for(i
=0; i
< VERT_ATTRIB_MAX
; i
++)
250 fprintf(stderr
, "%d ", vp
->inputs
[i
]);
251 fprintf(stderr
, ">\n");
256 static unsigned long t_src_index(struct r200_vertex_program
*vp
, struct prog_src_register
*src
)
262 if(src
->File
== PROGRAM_INPUT
){
263 /* if(vp->inputs[src->Index] != -1)
264 return vp->inputs[src->Index];
266 for(i=0; i < VERT_ATTRIB_MAX; i++)
267 if(vp->inputs[i] > max_reg)
268 max_reg = vp->inputs[i];
270 vp->inputs[src->Index] = max_reg+1;*/
272 //vp_dump_inputs(vp, __FUNCTION__);
273 assert(vp
->inputs
[src
->Index
] != -1);
274 return vp
->inputs
[src
->Index
];
276 if (src
->Index
< 0) {
277 fprintf(stderr
, "WARNING negative offsets for indirect addressing do not work\n");
284 static unsigned long t_src(struct r200_vertex_program
*vp
, struct prog_src_register
*src
)
287 return MAKE_VSF_SOURCE(t_src_index(vp
, src
),
288 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
289 t_swizzle(GET_SWZ(src
->Swizzle
, 1)),
290 t_swizzle(GET_SWZ(src
->Swizzle
, 2)),
291 t_swizzle(GET_SWZ(src
->Swizzle
, 3)),
292 t_src_class(src
->File
),
293 src
->Negate
) | (src
->RelAddr
<< 4);
296 static unsigned long t_src_scalar(struct r200_vertex_program
*vp
, struct prog_src_register
*src
)
299 return MAKE_VSF_SOURCE(t_src_index(vp
, src
),
300 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
301 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
302 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
303 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
304 t_src_class(src
->File
),
305 src
->Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
->RelAddr
<< 4);
308 static unsigned long t_opcode(enum prog_opcode opcode
)
312 case OPCODE_ADD
: return R200_VPI_OUT_OP_ADD
;
313 /* FIXME: ARL works fine, but negative offsets won't work - fglrx just
314 * seems to ignore neg offsets which isn't quite correct...
316 case OPCODE_ARL
: return R200_VPI_OUT_OP_ARL
;
317 case OPCODE_DP4
: return R200_VPI_OUT_OP_DOT
;
318 case OPCODE_DST
: return R200_VPI_OUT_OP_DST
;
319 case OPCODE_EX2
: return R200_VPI_OUT_OP_EX2
;
320 case OPCODE_EXP
: return R200_VPI_OUT_OP_EXP
;
321 case OPCODE_FRC
: return R200_VPI_OUT_OP_FRC
;
322 case OPCODE_LG2
: return R200_VPI_OUT_OP_LG2
;
323 case OPCODE_LIT
: return R200_VPI_OUT_OP_LIT
;
324 case OPCODE_LOG
: return R200_VPI_OUT_OP_LOG
;
325 case OPCODE_MAX
: return R200_VPI_OUT_OP_MAX
;
326 case OPCODE_MIN
: return R200_VPI_OUT_OP_MIN
;
327 case OPCODE_MUL
: return R200_VPI_OUT_OP_MUL
;
328 case OPCODE_RCP
: return R200_VPI_OUT_OP_RCP
;
329 case OPCODE_RSQ
: return R200_VPI_OUT_OP_RSQ
;
330 case OPCODE_SGE
: return R200_VPI_OUT_OP_SGE
;
331 case OPCODE_SLT
: return R200_VPI_OUT_OP_SLT
;
334 fprintf(stderr
, "%s: Should not be called with opcode %d!", __FUNCTION__
, opcode
);
340 static unsigned long op_operands(enum prog_opcode opcode
)
344 /* Can we trust mesas opcodes to be in order ? */
345 for(i
=0; i
< sizeof(op_names
) / sizeof(*op_names
); i
++)
346 if(op_names
[i
].opcode
== opcode
)
347 return op_names
[i
].ip
;
349 fprintf(stderr
, "op %d not found in op_names\n", opcode
);
354 /* TODO: Get rid of t_src_class call */
355 #define CMP_SRCS(a, b) (((a.RelAddr != b.RelAddr) || (a.Index != b.Index)) && \
356 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
357 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
358 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
359 t_src_class(b.File) == VSF_IN_CLASS_ATTR))) \
361 /* fglrx on rv250 codes up unused sources as follows:
362 unused but necessary sources are same as previous source, zero-ed out.
363 unnecessary sources are same as previous source but with VSF_IN_CLASS_NONE set.
364 i.e. an add (2 args) has its 2nd arg (if you use it as mov) zero-ed out, and 3rd arg
365 set to VSF_IN_CLASS_NONE. Not sure if strictly necessary. */
367 /* use these simpler definitions. Must obviously not be used with not yet set up regs.
368 Those are NOT semantically equivalent to the r300 ones, requires code changes */
369 #define ZERO_SRC_0 (((o_inst->src0 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
370 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
371 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
372 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
373 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
375 #define ZERO_SRC_1 (((o_inst->src1 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
376 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
377 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
378 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
379 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
381 #define ZERO_SRC_2 (((o_inst->src2 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
382 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
383 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
384 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
385 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
387 #define UNUSED_SRC_0 ((o_inst->src0 & ~15) | 9)
389 #define UNUSED_SRC_1 ((o_inst->src1 & ~15) | 9)
391 #define UNUSED_SRC_2 ((o_inst->src2 & ~15) | 9)
395 * Generate an R200 vertex program from Mesa's internal representation.
397 * \return GL_TRUE for success, GL_FALSE for failure.
399 static GLboolean
r200_translate_vertex_program(struct gl_context
*ctx
, struct r200_vertex_program
*vp
)
401 struct gl_vertex_program
*mesa_vp
= &vp
->mesa_program
;
402 struct prog_instruction
*vpi
;
404 VERTEX_SHADER_INSTRUCTION
*o_inst
;
405 unsigned long operands
;
414 vp
->native
= GL_FALSE
;
415 vp
->translated
= GL_TRUE
;
416 vp
->fogmode
= ctx
->Fog
.Mode
;
418 if (mesa_vp
->Base
.NumInstructions
== 0)
422 if ((mesa_vp
->Base
.InputsRead
&
423 ~(VERT_BIT_POS
| VERT_BIT_NORMAL
| VERT_BIT_COLOR0
| VERT_BIT_COLOR1
|
424 VERT_BIT_FOG
| VERT_BIT_TEX0
| VERT_BIT_TEX1
| VERT_BIT_TEX2
|
425 VERT_BIT_TEX3
| VERT_BIT_TEX4
| VERT_BIT_TEX5
)) != 0) {
426 if (R200_DEBUG
& RADEON_FALLBACKS
) {
427 fprintf(stderr
, "can't handle vert prog inputs 0x%x\n",
428 mesa_vp
->Base
.InputsRead
);
434 if ((mesa_vp
->Base
.OutputsWritten
&
435 ~((1 << VERT_RESULT_HPOS
) | (1 << VERT_RESULT_COL0
) | (1 << VERT_RESULT_COL1
) |
436 (1 << VERT_RESULT_FOGC
) | (1 << VERT_RESULT_TEX0
) | (1 << VERT_RESULT_TEX1
) |
437 (1 << VERT_RESULT_TEX2
) | (1 << VERT_RESULT_TEX3
) | (1 << VERT_RESULT_TEX4
) |
438 (1 << VERT_RESULT_TEX5
) | (1 << VERT_RESULT_PSIZ
))) != 0) {
439 if (R200_DEBUG
& RADEON_FALLBACKS
) {
440 fprintf(stderr
, "can't handle vert prog outputs 0x%llx\n",
441 (unsigned long long) mesa_vp
->Base
.OutputsWritten
);
446 if (mesa_vp
->IsNVProgram
) {
447 /* subtle differences in spec like guaranteed initialized regs could cause
448 headaches. Might want to remove the driconf option to enable it completely */
451 /* Initial value should be last tmp reg that hw supports.
452 Strangely enough r300 doesnt mind even though these would be out of range.
453 Smart enough to realize that it doesnt need it? */
454 int u_temp_i
= R200_VSF_MAX_TEMPS
- 1;
455 struct prog_src_register src
[3];
456 struct prog_dst_register dst
;
458 /* FIXME: is changing the prog safe to do here? */
459 if (mesa_vp
->IsPositionInvariant
&&
460 /* make sure we only do this once */
461 !(mesa_vp
->Base
.OutputsWritten
& (1 << VERT_RESULT_HPOS
))) {
462 _mesa_insert_mvp_code(ctx
, mesa_vp
);
465 /* for fogc, can't change mesa_vp, as it would hose swtnl, and exp with
466 base e isn't directly available neither. */
467 if ((mesa_vp
->Base
.OutputsWritten
& (1 << VERT_RESULT_FOGC
)) && !vp
->fogpidx
) {
468 struct gl_program_parameter_list
*paramList
;
469 gl_state_index tokens
[STATE_LENGTH
] = { STATE_FOG_PARAMS
, 0, 0, 0, 0 };
470 paramList
= mesa_vp
->Base
.Parameters
;
471 vp
->fogpidx
= _mesa_add_state_reference(paramList
, tokens
);
475 mesa_vp
->Base
.NumNativeInstructions
= 0;
476 if (mesa_vp
->Base
.Parameters
)
477 mesa_vp
->Base
.NumNativeParameters
= mesa_vp
->Base
.Parameters
->NumParameters
;
479 mesa_vp
->Base
.NumNativeParameters
= 0;
481 for(i
= 0; i
< VERT_ATTRIB_MAX
; i
++)
483 for(i
= 0; i
< 15; i
++)
484 vp
->inputmap_rev
[i
] = 255;
485 free_inputs
= 0x2ffd;
487 /* fglrx uses fixed inputs as follows for conventional attribs.
488 generic attribs use non-fixed assignment, fglrx will always use the
489 lowest attrib values available. We'll just do the same.
490 There are 12 generic attribs possible, corresponding to attrib 0, 2-11
491 and 13 in a hw vertex prog.
492 attr 1 and 12 aren't used for generic attribs as those cannot be made vec4
493 (correspond to vertex normal/weight - maybe weight actually could be made vec4).
494 Additionally, not more than 12 arrays in total are possible I think.
495 attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
496 attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
497 attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
498 attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
501 /* attr 4,5 and 13 are only used with generic attribs.
502 Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
503 not possibe to use with vertex progs as it is lacking in vert prog specification) */
504 /* may look different when using idx buf / input_route instead of se_vtx_fmt? */
505 if (mesa_vp
->Base
.InputsRead
& VERT_BIT_POS
) {
506 vp
->inputs
[VERT_ATTRIB_POS
] = 0;
507 vp
->inputmap_rev
[0] = VERT_ATTRIB_POS
;
508 free_inputs
&= ~(1 << 0);
511 if (mesa_vp
->Base
.InputsRead
& VERT_BIT_WEIGHT
) {
512 vp
->inputs
[VERT_ATTRIB_WEIGHT
] = 12;
513 vp
->inputmap_rev
[1] = VERT_ATTRIB_WEIGHT
;
516 if (mesa_vp
->Base
.InputsRead
& VERT_BIT_NORMAL
) {
517 vp
->inputs
[VERT_ATTRIB_NORMAL
] = 1;
518 vp
->inputmap_rev
[2] = VERT_ATTRIB_NORMAL
;
521 if (mesa_vp
->Base
.InputsRead
& VERT_BIT_COLOR0
) {
522 vp
->inputs
[VERT_ATTRIB_COLOR0
] = 2;
523 vp
->inputmap_rev
[4] = VERT_ATTRIB_COLOR0
;
524 free_inputs
&= ~(1 << 2);
527 if (mesa_vp
->Base
.InputsRead
& VERT_BIT_COLOR1
) {
528 vp
->inputs
[VERT_ATTRIB_COLOR1
] = 3;
529 vp
->inputmap_rev
[5] = VERT_ATTRIB_COLOR1
;
530 free_inputs
&= ~(1 << 3);
533 if (mesa_vp
->Base
.InputsRead
& VERT_BIT_FOG
) {
534 vp
->inputs
[VERT_ATTRIB_FOG
] = 15; array_count
++;
535 vp
->inputmap_rev
[3] = VERT_ATTRIB_FOG
;
538 /* VERT_ATTRIB_TEX0-5 */
539 for (i
= 0; i
<= 5; i
++) {
540 if (mesa_vp
->Base
.InputsRead
& VERT_BIT_TEX(i
)) {
541 vp
->inputs
[VERT_ATTRIB_TEX(i
)] = i
+ 6;
542 vp
->inputmap_rev
[8 + i
] = VERT_ATTRIB_TEX(i
);
543 free_inputs
&= ~(1 << (i
+ 6));
547 /* using VERT_ATTRIB_TEX6/7 would be illegal */
548 for (; i
< VERT_ATTRIB_TEX_MAX
; i
++) {
549 if (mesa_vp
->Base
.InputsRead
& VERT_BIT_TEX(i
)) {
550 if (R200_DEBUG
& RADEON_FALLBACKS
) {
551 fprintf(stderr
, "texture attribute %d in vert prog\n", i
);
556 /* completely ignore aliasing? */
557 for (i
= 0; i
< VERT_ATTRIB_GENERIC_MAX
; i
++) {
559 /* completely ignore aliasing? */
560 if (mesa_vp
->Base
.InputsRead
& VERT_BIT_GENERIC(i
)) {
562 if (array_count
> 12) {
563 if (R200_DEBUG
& RADEON_FALLBACKS
) {
564 fprintf(stderr
, "more than 12 attribs used in vert prog\n");
568 for (j
= 0; j
< 14; j
++) {
569 /* will always find one due to limited array_count */
570 if (free_inputs
& (1 << j
)) {
571 free_inputs
&= ~(1 << j
);
572 vp
->inputs
[VERT_ATTRIB_GENERIC(i
)] = j
;
575 vp
->inputmap_rev
[j
] = VERT_ATTRIB_GENERIC(i
);
577 /* mapped to col/tex */
578 vp
->inputmap_rev
[j
+ 2] = VERT_ATTRIB_GENERIC(i
);
581 vp
->inputmap_rev
[j
+ 1] = VERT_ATTRIB_GENERIC(i
);
589 if (!(mesa_vp
->Base
.OutputsWritten
& (1 << VERT_RESULT_HPOS
))) {
590 if (R200_DEBUG
& RADEON_FALLBACKS
) {
591 fprintf(stderr
, "can't handle vert prog without position output\n");
595 if (free_inputs
& 1) {
596 if (R200_DEBUG
& RADEON_FALLBACKS
) {
597 fprintf(stderr
, "can't handle vert prog without position input\n");
603 for (vpi
= mesa_vp
->Base
.Instructions
; vpi
->Opcode
!= OPCODE_END
; vpi
++, o_inst
++){
604 operands
= op_operands(vpi
->Opcode
);
605 are_srcs_scalar
= operands
& SCALAR_FLAG
;
608 for(i
= 0; i
< operands
; i
++) {
609 src
[i
] = vpi
->SrcReg
[i
];
610 /* hack up default attrib values as per spec as swizzling.
611 normal, fog, secondary color. Crazy?
612 May need more if we don't submit vec4 elements? */
613 if (src
[i
].File
== PROGRAM_INPUT
) {
614 if (src
[i
].Index
== VERT_ATTRIB_NORMAL
) {
616 for (j
= 0; j
< 4; j
++) {
617 if (GET_SWZ(src
[i
].Swizzle
, j
) == SWIZZLE_W
) {
618 src
[i
].Swizzle
&= ~(SWIZZLE_W
<< (j
*3));
619 src
[i
].Swizzle
|= SWIZZLE_ONE
<< (j
*3);
623 else if (src
[i
].Index
== VERT_ATTRIB_COLOR1
) {
625 for (j
= 0; j
< 4; j
++) {
626 if (GET_SWZ(src
[i
].Swizzle
, j
) == SWIZZLE_W
) {
627 src
[i
].Swizzle
&= ~(SWIZZLE_W
<< (j
*3));
628 src
[i
].Swizzle
|= SWIZZLE_ZERO
<< (j
*3);
632 else if (src
[i
].Index
== VERT_ATTRIB_FOG
) {
634 for (j
= 0; j
< 4; j
++) {
635 if (GET_SWZ(src
[i
].Swizzle
, j
) == SWIZZLE_W
) {
636 src
[i
].Swizzle
&= ~(SWIZZLE_W
<< (j
*3));
637 src
[i
].Swizzle
|= SWIZZLE_ONE
<< (j
*3);
639 else if ((GET_SWZ(src
[i
].Swizzle
, j
) == SWIZZLE_Y
) ||
640 GET_SWZ(src
[i
].Swizzle
, j
) == SWIZZLE_Z
) {
641 src
[i
].Swizzle
&= ~(SWIZZLE_W
<< (j
*3));
642 src
[i
].Swizzle
|= SWIZZLE_ZERO
<< (j
*3);
650 if( CMP_SRCS(src
[1], src
[2]) || CMP_SRCS(src
[0], src
[2]) ){
651 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
,
652 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
655 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[2]),
656 SWIZZLE_X
, SWIZZLE_Y
,
657 SWIZZLE_Z
, SWIZZLE_W
,
658 t_src_class(src
[2].File
), VSF_FLAG_NONE
) | (src
[2].RelAddr
<< 4);
660 o_inst
->src1
= ZERO_SRC_0
;
661 o_inst
->src2
= UNUSED_SRC_1
;
664 src
[2].File
= PROGRAM_TEMPORARY
;
665 src
[2].Index
= u_temp_i
;
672 if( CMP_SRCS(src
[1], src
[0]) ){
673 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
,
674 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
677 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
678 SWIZZLE_X
, SWIZZLE_Y
,
679 SWIZZLE_Z
, SWIZZLE_W
,
680 t_src_class(src
[0].File
), VSF_FLAG_NONE
) | (src
[0].RelAddr
<< 4);
682 o_inst
->src1
= ZERO_SRC_0
;
683 o_inst
->src2
= UNUSED_SRC_1
;
686 src
[0].File
= PROGRAM_TEMPORARY
;
687 src
[0].Index
= u_temp_i
;
694 if (dst
.File
== PROGRAM_OUTPUT
&&
695 dst
.Index
== VERT_RESULT_FOGC
&&
696 dst
.WriteMask
& WRITEMASK_X
) {
697 fog_temp_i
= u_temp_i
;
698 dst
.File
= PROGRAM_TEMPORARY
;
699 dst
.Index
= fog_temp_i
;
704 /* These ops need special handling. */
707 /* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter).
708 So may need to insert additional instruction */
709 if ((src
[0].File
== src
[1].File
) &&
710 (src
[0].Index
== src
[1].Index
)) {
711 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_POW
, t_dst(&dst
),
712 t_dst_mask(dst
.WriteMask
));
713 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
714 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
716 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
718 t_src_class(src
[0].File
),
719 src
[0].Negate
) | (src
[0].RelAddr
<< 4);
720 o_inst
->src1
= UNUSED_SRC_0
;
721 o_inst
->src2
= UNUSED_SRC_0
;
724 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
,
725 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
727 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
728 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
729 SWIZZLE_ZERO
, SWIZZLE_ZERO
, SWIZZLE_ZERO
,
730 t_src_class(src
[0].File
),
731 src
[0].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[0].RelAddr
<< 4);
732 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
733 SWIZZLE_ZERO
, SWIZZLE_ZERO
,
734 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), SWIZZLE_ZERO
,
735 t_src_class(src
[1].File
),
736 src
[1].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[1].RelAddr
<< 4);
737 o_inst
->src2
= UNUSED_SRC_1
;
740 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_POW
, t_dst(&dst
),
741 t_dst_mask(dst
.WriteMask
));
742 o_inst
->src0
= MAKE_VSF_SOURCE(u_temp_i
,
749 o_inst
->src1
= UNUSED_SRC_0
;
750 o_inst
->src2
= UNUSED_SRC_0
;
755 case OPCODE_MOV
://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
757 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
, t_dst(&dst
),
758 t_dst_mask(dst
.WriteMask
));
759 o_inst
->src0
= t_src(vp
, &src
[0]);
760 o_inst
->src1
= ZERO_SRC_0
;
761 o_inst
->src2
= UNUSED_SRC_1
;
765 /* only 2 read ports into temp memory thus may need the macro op MAD_2
766 instead (requiring 2 clocks) if all inputs are in temp memory
767 (and, only if they actually reference 3 distinct temps) */
768 hw_op
=(src
[0].File
== PROGRAM_TEMPORARY
&&
769 src
[1].File
== PROGRAM_TEMPORARY
&&
770 src
[2].File
== PROGRAM_TEMPORARY
&&
771 (((src
[0].RelAddr
<< 8) | src
[0].Index
) != ((src
[1].RelAddr
<< 8) | src
[1].Index
)) &&
772 (((src
[0].RelAddr
<< 8) | src
[0].Index
) != ((src
[2].RelAddr
<< 8) | src
[2].Index
)) &&
773 (((src
[1].RelAddr
<< 8) | src
[1].Index
) != ((src
[2].RelAddr
<< 8) | src
[2].Index
))) ?
774 R200_VPI_OUT_OP_MAD_2
: R200_VPI_OUT_OP_MAD
;
776 o_inst
->op
= MAKE_VSF_OP(hw_op
, t_dst(&dst
),
777 t_dst_mask(dst
.WriteMask
));
778 o_inst
->src0
= t_src(vp
, &src
[0]);
780 if ((o_inst
- vp
->instr
) == 31) {
781 /* fix up the broken vertex program of quake4 demo... */
782 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
783 SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
,
784 t_src_class(src
[1].File
),
785 src
[1].Negate
) | (src
[1].RelAddr
<< 4);
786 o_inst
->src2
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
787 SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
,
788 t_src_class(src
[1].File
),
789 src
[1].Negate
) | (src
[1].RelAddr
<< 4);
792 o_inst
->src1
= t_src(vp
, &src
[1]);
793 o_inst
->src2
= t_src(vp
, &src
[2]);
796 o_inst
->src1
= t_src(vp
, &src
[1]);
797 o_inst
->src2
= t_src(vp
, &src
[2]);
801 case OPCODE_DP3
://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
802 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_DOT
, t_dst(&dst
),
803 t_dst_mask(dst
.WriteMask
));
805 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
806 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
807 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
808 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
810 t_src_class(src
[0].File
),
811 src
[0].Negate
) | (src
[0].RelAddr
<< 4);
813 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
814 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
815 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)),
816 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)),
818 t_src_class(src
[1].File
),
819 src
[1].Negate
) | (src
[1].RelAddr
<< 4);
821 o_inst
->src2
= UNUSED_SRC_1
;
824 case OPCODE_DPH
://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
825 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_DOT
, t_dst(&dst
),
826 t_dst_mask(dst
.WriteMask
));
828 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
829 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
830 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
831 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
832 VSF_IN_COMPONENT_ONE
,
833 t_src_class(src
[0].File
),
834 src
[0].Negate
) | (src
[0].RelAddr
<< 4);
835 o_inst
->src1
= t_src(vp
, &src
[1]);
836 o_inst
->src2
= UNUSED_SRC_1
;
839 case OPCODE_SUB
://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
840 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
, t_dst(&dst
),
841 t_dst_mask(dst
.WriteMask
));
843 o_inst
->src0
= t_src(vp
, &src
[0]);
844 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
845 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
846 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)),
847 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)),
848 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)),
849 t_src_class(src
[1].File
),
850 (!src
[1].Negate
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[1].RelAddr
<< 4);
851 o_inst
->src2
= UNUSED_SRC_1
;
854 case OPCODE_ABS
://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
855 o_inst
->op
=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX
, t_dst(&dst
),
856 t_dst_mask(dst
.WriteMask
));
858 o_inst
->src0
=t_src(vp
, &src
[0]);
859 o_inst
->src1
=MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
860 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
861 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
862 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
863 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)),
864 t_src_class(src
[0].File
),
865 (!src
[0].Negate
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[0].RelAddr
<< 4);
866 o_inst
->src2
= UNUSED_SRC_1
;
870 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
871 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
873 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_FRC
,
874 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
875 t_dst_mask(dst
.WriteMask
));
877 o_inst
->src0
= t_src(vp
, &src
[0]);
878 o_inst
->src1
= UNUSED_SRC_0
;
879 o_inst
->src2
= UNUSED_SRC_1
;
882 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
, t_dst(&dst
),
883 t_dst_mask(dst
.WriteMask
));
885 o_inst
->src0
= t_src(vp
, &src
[0]);
886 o_inst
->src1
= MAKE_VSF_SOURCE(u_temp_i
,
892 /* Not 100% sure about this */
893 (!src
[0].Negate
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
/*VSF_FLAG_ALL*/);
895 o_inst
->src2
= UNUSED_SRC_0
;
900 /* mul r0, r1.yzxw, r2.zxyw
901 mad r0, -r2.yzxw, r1.zxyw, r0
903 hw_op
=(src
[0].File
== PROGRAM_TEMPORARY
&&
904 src
[1].File
== PROGRAM_TEMPORARY
&&
905 (((src
[0].RelAddr
<< 8) | src
[0].Index
) != ((src
[1].RelAddr
<< 8) | src
[1].Index
))) ?
906 R200_VPI_OUT_OP_MAD_2
: R200_VPI_OUT_OP_MAD
;
908 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_MUL
,
909 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
910 t_dst_mask(dst
.WriteMask
));
912 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
913 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
914 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)), // z
915 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
916 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
917 t_src_class(src
[0].File
),
918 src
[0].Negate
) | (src
[0].RelAddr
<< 4);
920 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
921 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), // z
922 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), // x
923 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)), // y
924 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)), // w
925 t_src_class(src
[1].File
),
926 src
[1].Negate
) | (src
[1].RelAddr
<< 4);
928 o_inst
->src2
= UNUSED_SRC_1
;
932 o_inst
->op
= MAKE_VSF_OP(hw_op
, t_dst(&dst
),
933 t_dst_mask(dst
.WriteMask
));
935 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
936 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)), // y
937 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), // z
938 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), // x
939 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)), // w
940 t_src_class(src
[1].File
),
941 (!src
[1].Negate
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[1].RelAddr
<< 4);
943 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
944 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)), // z
945 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
946 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
947 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
948 t_src_class(src
[0].File
),
949 src
[0].Negate
) | (src
[0].RelAddr
<< 4);
951 o_inst
->src2
= MAKE_VSF_SOURCE(u_temp_i
+1,
966 o_inst
->op
= MAKE_VSF_OP(t_opcode(vpi
->Opcode
), t_dst(&dst
),
967 t_dst_mask(dst
.WriteMask
));
972 o_inst
->src0
= t_src_scalar(vp
, &src
[0]);
973 o_inst
->src1
= UNUSED_SRC_0
;
974 o_inst
->src2
= UNUSED_SRC_1
;
978 o_inst
->src0
= t_src_scalar(vp
, &src
[0]);
979 o_inst
->src1
= t_src_scalar(vp
, &src
[1]);
980 o_inst
->src2
= UNUSED_SRC_1
;
984 o_inst
->src0
= t_src_scalar(vp
, &src
[0]);
985 o_inst
->src1
= t_src_scalar(vp
, &src
[1]);
986 o_inst
->src2
= t_src_scalar(vp
, &src
[2]);
990 fprintf(stderr
, "illegal number of operands %lu\n", operands
);
997 o_inst
->src0
= t_src(vp
, &src
[0]);
998 o_inst
->src1
= UNUSED_SRC_0
;
999 o_inst
->src2
= UNUSED_SRC_1
;
1003 o_inst
->src0
= t_src(vp
, &src
[0]);
1004 o_inst
->src1
= t_src(vp
, &src
[1]);
1005 o_inst
->src2
= UNUSED_SRC_1
;
1009 o_inst
->src0
= t_src(vp
, &src
[0]);
1010 o_inst
->src1
= t_src(vp
, &src
[1]);
1011 o_inst
->src2
= t_src(vp
, &src
[2]);
1015 fprintf(stderr
, "illegal number of operands %lu\n", operands
);
1024 if (vp
->fogmode
== GL_EXP
) {
1025 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_MUL
,
1026 (fog_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
1028 o_inst
->src0
= EASY_VSF_SOURCE(fog_temp_i
, X
, X
, X
, X
, TMP
, NONE
);
1029 o_inst
->src1
= EASY_VSF_SOURCE(vp
->fogpidx
, X
, X
, X
, X
, PARAM
, NONE
);
1030 o_inst
->src2
= UNUSED_SRC_1
;
1032 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E
,
1033 R200_VSF_OUT_CLASS_RESULT_FOGC
,
1035 o_inst
->src0
= EASY_VSF_SOURCE(fog_temp_i
, X
, X
, X
, X
, TMP
, ALL
);
1036 o_inst
->src1
= UNUSED_SRC_0
;
1037 o_inst
->src2
= UNUSED_SRC_1
;
1039 else if (vp
->fogmode
== GL_EXP2
) {
1040 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_MUL
,
1041 (fog_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
1043 o_inst
->src0
= EASY_VSF_SOURCE(fog_temp_i
, X
, X
, X
, X
, TMP
, NONE
);
1044 o_inst
->src1
= EASY_VSF_SOURCE(vp
->fogpidx
, X
, X
, X
, X
, PARAM
, NONE
);
1045 o_inst
->src2
= UNUSED_SRC_1
;
1047 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_MUL
,
1048 (fog_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
1050 o_inst
->src0
= EASY_VSF_SOURCE(fog_temp_i
, X
, X
, X
, X
, TMP
, NONE
);
1051 o_inst
->src1
= EASY_VSF_SOURCE(fog_temp_i
, X
, X
, X
, X
, TMP
, NONE
);
1052 o_inst
->src2
= UNUSED_SRC_1
;
1054 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E
,
1055 R200_VSF_OUT_CLASS_RESULT_FOGC
,
1057 o_inst
->src0
= EASY_VSF_SOURCE(fog_temp_i
, X
, X
, X
, X
, TMP
, ALL
);
1058 o_inst
->src1
= UNUSED_SRC_0
;
1059 o_inst
->src2
= UNUSED_SRC_1
;
1061 else { /* fogmode == GL_LINEAR */
1062 /* could do that with single op (dot) if using params like
1063 with fixed function pipeline fog */
1064 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
,
1065 (fog_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
1067 o_inst
->src0
= EASY_VSF_SOURCE(fog_temp_i
, X
, X
, X
, X
, TMP
, ALL
);
1068 o_inst
->src1
= EASY_VSF_SOURCE(vp
->fogpidx
, Z
, Z
, Z
, Z
, PARAM
, NONE
);
1069 o_inst
->src2
= UNUSED_SRC_1
;
1071 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_MUL
,
1072 R200_VSF_OUT_CLASS_RESULT_FOGC
,
1074 o_inst
->src0
= EASY_VSF_SOURCE(fog_temp_i
, X
, X
, X
, X
, TMP
, NONE
);
1075 o_inst
->src1
= EASY_VSF_SOURCE(vp
->fogpidx
, W
, W
, W
, W
, PARAM
, NONE
);
1076 o_inst
->src2
= UNUSED_SRC_1
;
1082 u_temp_used
= (R200_VSF_MAX_TEMPS
- 1) - u_temp_i
;
1083 if (mesa_vp
->Base
.NumNativeTemporaries
<
1084 (mesa_vp
->Base
.NumTemporaries
+ u_temp_used
)) {
1085 mesa_vp
->Base
.NumNativeTemporaries
=
1086 mesa_vp
->Base
.NumTemporaries
+ u_temp_used
;
1088 if ((mesa_vp
->Base
.NumTemporaries
+ u_temp_used
) > R200_VSF_MAX_TEMPS
) {
1089 if (R200_DEBUG
& RADEON_FALLBACKS
) {
1090 fprintf(stderr
, "Ran out of temps, num temps %d, us %d\n", mesa_vp
->Base
.NumTemporaries
, u_temp_used
);
1094 u_temp_i
= R200_VSF_MAX_TEMPS
- 1;
1095 if(o_inst
- vp
->instr
>= R200_VSF_MAX_INST
) {
1096 mesa_vp
->Base
.NumNativeInstructions
= 129;
1097 if (R200_DEBUG
& RADEON_FALLBACKS
) {
1098 fprintf(stderr
, "more than 128 native instructions\n");
1102 if ((o_inst
->op
& R200_VSF_OUT_CLASS_MASK
) == R200_VSF_OUT_CLASS_RESULT_POS
) {
1103 vp
->pos_end
= (o_inst
- vp
->instr
);
1107 vp
->native
= GL_TRUE
;
1108 mesa_vp
->Base
.NumNativeInstructions
= (o_inst
- vp
->instr
);
1110 fprintf(stderr
, "hw program:\n");
1111 for(i
=0; i
< vp
->program
.length
; i
++)
1112 fprintf(stderr
, "%08x\n", vp
->instr
[i
]);
1117 void r200SetupVertexProg( struct gl_context
*ctx
) {
1118 r200ContextPtr rmesa
= R200_CONTEXT(ctx
);
1119 struct r200_vertex_program
*vp
= (struct r200_vertex_program
*)ctx
->VertexProgram
.Current
;
1123 if (!vp
->translated
|| (ctx
->Fog
.Enabled
&& ctx
->Fog
.Mode
!= vp
->fogmode
)) {
1124 rmesa
->curr_vp_hw
= NULL
;
1125 r200_translate_vertex_program(ctx
, vp
);
1127 /* could optimize setting up vertex progs away for non-tcl hw */
1128 fallback
= !(vp
->native
&& r200VertexProgUpdateParams(ctx
, vp
));
1129 TCL_FALLBACK(ctx
, R200_TCL_FALLBACK_VERTEX_PROGRAM
, fallback
);
1130 if (rmesa
->radeon
.TclFallback
) return;
1132 R200_STATECHANGE( rmesa
, vap
);
1133 /* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it?
1134 maybe only when using more than 64 inst / 96 param? */
1135 rmesa
->hw
.vap
.cmd
[VAP_SE_VAP_CNTL
] |= R200_VAP_PROG_VTX_SHADER_ENABLE
/*| R200_VAP_SINGLE_BUF_STATE_ENABLE*/;
1137 R200_STATECHANGE( rmesa
, pvs
);
1139 rmesa
->hw
.pvs
.cmd
[PVS_CNTL_1
] = (0 << R200_PVS_CNTL_1_PROGRAM_START_SHIFT
) |
1140 ((vp
->mesa_program
.Base
.NumNativeInstructions
- 1) << R200_PVS_CNTL_1_PROGRAM_END_SHIFT
) |
1141 (vp
->pos_end
<< R200_PVS_CNTL_1_POS_END_SHIFT
);
1142 rmesa
->hw
.pvs
.cmd
[PVS_CNTL_2
] = (0 << R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT
) |
1143 (vp
->mesa_program
.Base
.NumNativeParameters
<< R200_PVS_CNTL_2_PARAM_COUNT_SHIFT
);
1145 /* maybe user clip planes just work with vertex progs... untested */
1146 if (ctx
->Transform
.ClipPlanesEnabled
) {
1147 R200_STATECHANGE( rmesa
, tcl
);
1148 if (vp
->mesa_program
.IsPositionInvariant
) {
1149 rmesa
->hw
.tcl
.cmd
[TCL_UCP_VERT_BLEND_CTL
] |= (ctx
->Transform
.ClipPlanesEnabled
<< 2);
1152 rmesa
->hw
.tcl
.cmd
[TCL_UCP_VERT_BLEND_CTL
] &= ~(0xfc);
1156 if (vp
!= rmesa
->curr_vp_hw
) {
1157 GLuint count
= vp
->mesa_program
.Base
.NumNativeInstructions
;
1158 drm_radeon_cmd_header_t tmp
;
1160 R200_STATECHANGE( rmesa
, vpi
[0] );
1161 R200_STATECHANGE( rmesa
, vpi
[1] );
1163 /* FIXME: what about using a memcopy... */
1164 for (i
= 0; (i
< 64) && i
< count
; i
++) {
1165 rmesa
->hw
.vpi
[0].cmd
[VPI_OPDST_0
+ 4 * i
] = vp
->instr
[i
].op
;
1166 rmesa
->hw
.vpi
[0].cmd
[VPI_SRC0_0
+ 4 * i
] = vp
->instr
[i
].src0
;
1167 rmesa
->hw
.vpi
[0].cmd
[VPI_SRC1_0
+ 4 * i
] = vp
->instr
[i
].src1
;
1168 rmesa
->hw
.vpi
[0].cmd
[VPI_SRC2_0
+ 4 * i
] = vp
->instr
[i
].src2
;
1170 /* hack up the cmd_size so not the whole state atom is emitted always.
1171 This may require some more thought, we may emit half progs on lost state, but
1172 hopefully it won't matter?
1173 WARNING: must not use R200_DB_STATECHANGE, this will produce bogus (and rejected)
1174 packet emits (due to the mismatched cmd_size and count in cmd/last_cmd) */
1175 rmesa
->hw
.vpi
[0].cmd_size
= 1 + 4 * ((count
> 64) ? 64 : count
);
1176 tmp
.i
= rmesa
->hw
.vpi
[0].cmd
[VPI_CMD_0
];
1177 tmp
.veclinear
.count
= (count
> 64) ? 64 : count
;
1178 rmesa
->hw
.vpi
[0].cmd
[VPI_CMD_0
] = tmp
.i
;
1180 for (i
= 0; i
< (count
- 64); i
++) {
1181 rmesa
->hw
.vpi
[1].cmd
[VPI_OPDST_0
+ 4 * i
] = vp
->instr
[i
+ 64].op
;
1182 rmesa
->hw
.vpi
[1].cmd
[VPI_SRC0_0
+ 4 * i
] = vp
->instr
[i
+ 64].src0
;
1183 rmesa
->hw
.vpi
[1].cmd
[VPI_SRC1_0
+ 4 * i
] = vp
->instr
[i
+ 64].src1
;
1184 rmesa
->hw
.vpi
[1].cmd
[VPI_SRC2_0
+ 4 * i
] = vp
->instr
[i
+ 64].src2
;
1186 rmesa
->hw
.vpi
[1].cmd_size
= 1 + 4 * (count
- 64);
1187 tmp
.i
= rmesa
->hw
.vpi
[1].cmd
[VPI_CMD_0
];
1188 tmp
.veclinear
.count
= count
- 64;
1189 rmesa
->hw
.vpi
[1].cmd
[VPI_CMD_0
] = tmp
.i
;
1191 rmesa
->curr_vp_hw
= vp
;
1197 r200BindProgram(struct gl_context
*ctx
, GLenum target
, struct gl_program
*prog
)
1199 r200ContextPtr rmesa
= R200_CONTEXT(ctx
);
1202 case GL_VERTEX_PROGRAM_ARB
:
1203 rmesa
->curr_vp_hw
= NULL
;
1206 _mesa_problem(ctx
, "Target not supported yet!");
1211 static struct gl_program
*
1212 r200NewProgram(struct gl_context
*ctx
, GLenum target
, GLuint id
)
1214 struct r200_vertex_program
*vp
;
1217 case GL_VERTEX_PROGRAM_ARB
:
1218 vp
= CALLOC_STRUCT(r200_vertex_program
);
1219 return _mesa_init_vertex_program(ctx
, &vp
->mesa_program
, target
, id
);
1220 case GL_FRAGMENT_PROGRAM_ARB
:
1221 case GL_FRAGMENT_PROGRAM_NV
:
1222 return _mesa_init_fragment_program( ctx
, CALLOC_STRUCT(gl_fragment_program
), target
, id
);
1224 _mesa_problem(ctx
, "Bad target in r200NewProgram");
1231 r200DeleteProgram(struct gl_context
*ctx
, struct gl_program
*prog
)
1233 _mesa_delete_program(ctx
, prog
);
1237 r200ProgramStringNotify(struct gl_context
*ctx
, GLenum target
, struct gl_program
*prog
)
1239 struct r200_vertex_program
*vp
= (void *)prog
;
1240 r200ContextPtr rmesa
= R200_CONTEXT(ctx
);
1243 case GL_VERTEX_PROGRAM_ARB
:
1244 vp
->translated
= GL_FALSE
;
1246 /* memset(&vp->translated, 0, sizeof(struct r200_vertex_program) - sizeof(struct gl_vertex_program));*/
1247 r200_translate_vertex_program(ctx
, vp
);
1248 rmesa
->curr_vp_hw
= NULL
;
1250 case GL_FRAGMENT_SHADER_ATI
:
1251 rmesa
->afs_loaded
= NULL
;
1254 /* need this for tcl fallbacks */
1255 (void) _tnl_program_string(ctx
, target
, prog
);
1257 /* XXX check if program is legal, within limits */
1262 r200IsProgramNative(struct gl_context
*ctx
, GLenum target
, struct gl_program
*prog
)
1264 struct r200_vertex_program
*vp
= (void *)prog
;
1267 case GL_VERTEX_STATE_PROGRAM_NV
:
1268 case GL_VERTEX_PROGRAM_ARB
:
1269 if (!vp
->translated
) {
1270 r200_translate_vertex_program(ctx
, vp
);
1272 /* does not take parameters etc. into account */
1275 _mesa_problem(ctx
, "Bad target in r200NewProgram");
1280 void r200InitShaderFuncs(struct dd_function_table
*functions
)
1282 functions
->NewProgram
= r200NewProgram
;
1283 functions
->BindProgram
= r200BindProgram
;
1284 functions
->DeleteProgram
= r200DeleteProgram
;
1285 functions
->ProgramStringNotify
= r200ProgramStringNotify
;
1286 functions
->IsProgramNative
= r200IsProgramNative
;