1 /**************************************************************************
3 Copyright (C) 2005 Aapo Tahkola.
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Aapo Tahkola <aet@rasterburn.org>
31 * Roland Scheidegger <rscheidegger_lists@hispeed.ch>
33 #include "main/glheader.h"
34 #include "main/macros.h"
35 #include "main/enums.h"
36 #include "program/program.h"
37 #include "program/prog_instruction.h"
38 #include "program/prog_parameter.h"
39 #include "program/prog_statevars.h"
40 #include "program/programopt.h"
43 #include "r200_context.h"
44 #include "r200_vertprog.h"
45 #include "r200_ioctl.h"
48 #if SWIZZLE_X != VSF_IN_COMPONENT_X || \
49 SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
50 SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
51 SWIZZLE_W != VSF_IN_COMPONENT_W || \
52 SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
53 SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
54 WRITEMASK_X != VSF_FLAG_X || \
55 WRITEMASK_Y != VSF_FLAG_Y || \
56 WRITEMASK_Z != VSF_FLAG_Z || \
57 WRITEMASK_W != VSF_FLAG_W
58 #error Cannot change these!
61 #define SCALAR_FLAG (1<<31)
62 #define FLAG_MASK (1<<31)
63 #define OP_MASK (0xf) /* we are unlikely to have more than 15 */
64 #define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
69 unsigned long ip
; /* number of input operands and flags */
73 OPN(ARL
, 1|SCALAR_FLAG
),
78 OPN(EX2
, 1|SCALAR_FLAG
),
79 OPN(EXP
, 1|SCALAR_FLAG
),
82 OPN(LG2
, 1|SCALAR_FLAG
),
84 OPN(LOG
, 1|SCALAR_FLAG
),
90 OPN(POW
, 2|SCALAR_FLAG
),
91 OPN(RCP
, 1|SCALAR_FLAG
),
92 OPN(RSQ
, 1|SCALAR_FLAG
),
103 static GLboolean
r200VertexProgUpdateParams(struct gl_context
*ctx
, struct r200_vertex_program
*vp
)
105 r200ContextPtr rmesa
= R200_CONTEXT( ctx
);
106 GLfloat
*fcmd
= (GLfloat
*)&rmesa
->hw
.vpp
[0].cmd
[VPP_CMD_0
+ 1];
108 struct gl_vertex_program
*mesa_vp
= &vp
->mesa_program
;
109 struct gl_program_parameter_list
*paramList
;
110 drm_radeon_cmd_header_t tmp
;
112 R200_STATECHANGE( rmesa
, vpp
[0] );
113 R200_STATECHANGE( rmesa
, vpp
[1] );
114 assert(mesa_vp
->Base
.Parameters
);
115 _mesa_load_state_parameters(ctx
, mesa_vp
->Base
.Parameters
);
116 paramList
= mesa_vp
->Base
.Parameters
;
118 if(paramList
->NumParameters
> R200_VSF_MAX_PARAM
){
119 fprintf(stderr
, "%s:Params exhausted\n", __FUNCTION__
);
123 for(pi
= 0; pi
< paramList
->NumParameters
; pi
++) {
124 switch(paramList
->Parameters
[pi
].Type
) {
125 case PROGRAM_STATE_VAR
:
126 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
127 case PROGRAM_CONSTANT
:
128 *fcmd
++ = paramList
->ParameterValues
[pi
][0].f
;
129 *fcmd
++ = paramList
->ParameterValues
[pi
][1].f
;
130 *fcmd
++ = paramList
->ParameterValues
[pi
][2].f
;
131 *fcmd
++ = paramList
->ParameterValues
[pi
][3].f
;
134 _mesa_problem(NULL
, "Bad param type in %s", __FUNCTION__
);
138 fcmd
= (GLfloat
*)&rmesa
->hw
.vpp
[1].cmd
[VPP_CMD_0
+ 1];
141 /* hack up the cmd_size so not the whole state atom is emitted always. */
142 rmesa
->hw
.vpp
[0].cmd_size
=
143 1 + 4 * ((paramList
->NumParameters
> 96) ? 96 : paramList
->NumParameters
);
144 tmp
.i
= rmesa
->hw
.vpp
[0].cmd
[VPP_CMD_0
];
145 tmp
.veclinear
.count
= (paramList
->NumParameters
> 96) ? 96 : paramList
->NumParameters
;
146 rmesa
->hw
.vpp
[0].cmd
[VPP_CMD_0
] = tmp
.i
;
147 if (paramList
->NumParameters
> 96) {
148 rmesa
->hw
.vpp
[1].cmd_size
= 1 + 4 * (paramList
->NumParameters
- 96);
149 tmp
.i
= rmesa
->hw
.vpp
[1].cmd
[VPP_CMD_0
];
150 tmp
.veclinear
.count
= paramList
->NumParameters
- 96;
151 rmesa
->hw
.vpp
[1].cmd
[VPP_CMD_0
] = tmp
.i
;
156 static INLINE
unsigned long t_dst_mask(GLuint mask
)
158 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
159 return mask
& VSF_FLAG_ALL
;
162 static unsigned long t_dst(struct prog_dst_register
*dst
)
165 case PROGRAM_TEMPORARY
:
166 return ((dst
->Index
<< R200_VPI_OUT_REG_INDEX_SHIFT
)
167 | R200_VSF_OUT_CLASS_TMP
);
169 switch (dst
->Index
) {
170 case VARYING_SLOT_POS
:
171 return R200_VSF_OUT_CLASS_RESULT_POS
;
172 case VARYING_SLOT_COL0
:
173 return R200_VSF_OUT_CLASS_RESULT_COLOR
;
174 case VARYING_SLOT_COL1
:
175 return ((1 << R200_VPI_OUT_REG_INDEX_SHIFT
)
176 | R200_VSF_OUT_CLASS_RESULT_COLOR
);
177 case VARYING_SLOT_FOGC
:
178 return R200_VSF_OUT_CLASS_RESULT_FOGC
;
179 case VARYING_SLOT_TEX0
:
180 case VARYING_SLOT_TEX1
:
181 case VARYING_SLOT_TEX2
:
182 case VARYING_SLOT_TEX3
:
183 case VARYING_SLOT_TEX4
:
184 case VARYING_SLOT_TEX5
:
185 return (((dst
->Index
- VARYING_SLOT_TEX0
) << R200_VPI_OUT_REG_INDEX_SHIFT
)
186 | R200_VSF_OUT_CLASS_RESULT_TEXC
);
187 case VARYING_SLOT_PSIZ
:
188 return R200_VSF_OUT_CLASS_RESULT_POINTSIZE
;
190 fprintf(stderr
, "problem in %s, unknown dst output reg %d\n", __FUNCTION__
, dst
->Index
);
194 case PROGRAM_ADDRESS
:
195 assert (dst
->Index
== 0);
196 return R200_VSF_OUT_CLASS_ADDR
;
198 fprintf(stderr
, "problem in %s, unknown register type %d\n", __FUNCTION__
, dst
->File
);
204 static unsigned long t_src_class(gl_register_file file
)
208 case PROGRAM_TEMPORARY
:
209 return VSF_IN_CLASS_TMP
;
212 return VSF_IN_CLASS_ATTR
;
214 case PROGRAM_LOCAL_PARAM
:
215 case PROGRAM_ENV_PARAM
:
216 case PROGRAM_CONSTANT
:
217 case PROGRAM_STATE_VAR
:
218 return VSF_IN_CLASS_PARAM
;
221 case PROGRAM_ADDRESS:
224 fprintf(stderr
, "problem in %s", __FUNCTION__
);
229 static INLINE
unsigned long t_swizzle(GLubyte swizzle
)
231 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
236 static void vp_dump_inputs(struct r200_vertex_program
*vp
, char *caller
)
241 fprintf(stderr
, "vp null in call to %s from %s\n", __FUNCTION__
, caller
);
245 fprintf(stderr
, "%s:<", caller
);
246 for(i
=0; i
< VERT_ATTRIB_MAX
; i
++)
247 fprintf(stderr
, "%d ", vp
->inputs
[i
]);
248 fprintf(stderr
, ">\n");
253 static unsigned long t_src_index(struct r200_vertex_program
*vp
, struct prog_src_register
*src
)
259 if(src
->File
== PROGRAM_INPUT
){
260 /* if(vp->inputs[src->Index] != -1)
261 return vp->inputs[src->Index];
263 for(i=0; i < VERT_ATTRIB_MAX; i++)
264 if(vp->inputs[i] > max_reg)
265 max_reg = vp->inputs[i];
267 vp->inputs[src->Index] = max_reg+1;*/
269 //vp_dump_inputs(vp, __FUNCTION__);
270 assert(vp
->inputs
[src
->Index
] != -1);
271 return vp
->inputs
[src
->Index
];
273 if (src
->Index
< 0) {
274 fprintf(stderr
, "WARNING negative offsets for indirect addressing do not work\n");
281 static unsigned long t_src(struct r200_vertex_program
*vp
, struct prog_src_register
*src
)
284 return MAKE_VSF_SOURCE(t_src_index(vp
, src
),
285 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
286 t_swizzle(GET_SWZ(src
->Swizzle
, 1)),
287 t_swizzle(GET_SWZ(src
->Swizzle
, 2)),
288 t_swizzle(GET_SWZ(src
->Swizzle
, 3)),
289 t_src_class(src
->File
),
290 src
->Negate
) | (src
->RelAddr
<< 4);
293 static unsigned long t_src_scalar(struct r200_vertex_program
*vp
, struct prog_src_register
*src
)
296 return MAKE_VSF_SOURCE(t_src_index(vp
, src
),
297 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
298 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
299 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
300 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
301 t_src_class(src
->File
),
302 src
->Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
->RelAddr
<< 4);
305 static unsigned long t_opcode(enum prog_opcode opcode
)
309 case OPCODE_ADD
: return R200_VPI_OUT_OP_ADD
;
310 /* FIXME: ARL works fine, but negative offsets won't work - fglrx just
311 * seems to ignore neg offsets which isn't quite correct...
313 case OPCODE_ARL
: return R200_VPI_OUT_OP_ARL
;
314 case OPCODE_DP4
: return R200_VPI_OUT_OP_DOT
;
315 case OPCODE_DST
: return R200_VPI_OUT_OP_DST
;
316 case OPCODE_EX2
: return R200_VPI_OUT_OP_EX2
;
317 case OPCODE_EXP
: return R200_VPI_OUT_OP_EXP
;
318 case OPCODE_FRC
: return R200_VPI_OUT_OP_FRC
;
319 case OPCODE_LG2
: return R200_VPI_OUT_OP_LG2
;
320 case OPCODE_LIT
: return R200_VPI_OUT_OP_LIT
;
321 case OPCODE_LOG
: return R200_VPI_OUT_OP_LOG
;
322 case OPCODE_MAX
: return R200_VPI_OUT_OP_MAX
;
323 case OPCODE_MIN
: return R200_VPI_OUT_OP_MIN
;
324 case OPCODE_MUL
: return R200_VPI_OUT_OP_MUL
;
325 case OPCODE_RCP
: return R200_VPI_OUT_OP_RCP
;
326 case OPCODE_RSQ
: return R200_VPI_OUT_OP_RSQ
;
327 case OPCODE_SGE
: return R200_VPI_OUT_OP_SGE
;
328 case OPCODE_SLT
: return R200_VPI_OUT_OP_SLT
;
331 fprintf(stderr
, "%s: Should not be called with opcode %d!", __FUNCTION__
, opcode
);
337 static unsigned long op_operands(enum prog_opcode opcode
)
341 /* Can we trust mesas opcodes to be in order ? */
342 for(i
=0; i
< sizeof(op_names
) / sizeof(*op_names
); i
++)
343 if(op_names
[i
].opcode
== opcode
)
344 return op_names
[i
].ip
;
346 fprintf(stderr
, "op %d not found in op_names\n", opcode
);
351 /* TODO: Get rid of t_src_class call */
352 #define CMP_SRCS(a, b) (((a.RelAddr != b.RelAddr) || (a.Index != b.Index)) && \
353 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
354 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
355 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
356 t_src_class(b.File) == VSF_IN_CLASS_ATTR))) \
358 /* fglrx on rv250 codes up unused sources as follows:
359 unused but necessary sources are same as previous source, zero-ed out.
360 unnecessary sources are same as previous source but with VSF_IN_CLASS_NONE set.
361 i.e. an add (2 args) has its 2nd arg (if you use it as mov) zero-ed out, and 3rd arg
362 set to VSF_IN_CLASS_NONE. Not sure if strictly necessary. */
364 /* use these simpler definitions. Must obviously not be used with not yet set up regs.
365 Those are NOT semantically equivalent to the r300 ones, requires code changes */
366 #define ZERO_SRC_0 (((o_inst->src0 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
367 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
368 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
369 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
370 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
372 #define ZERO_SRC_1 (((o_inst->src1 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
373 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
374 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
375 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
376 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
378 #define ZERO_SRC_2 (((o_inst->src2 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
379 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
380 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
381 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
382 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
384 #define UNUSED_SRC_0 ((o_inst->src0 & ~15) | 9)
386 #define UNUSED_SRC_1 ((o_inst->src1 & ~15) | 9)
388 #define UNUSED_SRC_2 ((o_inst->src2 & ~15) | 9)
392 * Generate an R200 vertex program from Mesa's internal representation.
394 * \return GL_TRUE for success, GL_FALSE for failure.
396 static GLboolean
r200_translate_vertex_program(struct gl_context
*ctx
, struct r200_vertex_program
*vp
)
398 struct gl_vertex_program
*mesa_vp
= &vp
->mesa_program
;
399 struct prog_instruction
*vpi
;
401 VERTEX_SHADER_INSTRUCTION
*o_inst
;
402 unsigned long operands
;
411 vp
->native
= GL_FALSE
;
412 vp
->translated
= GL_TRUE
;
413 vp
->fogmode
= ctx
->Fog
.Mode
;
415 if (mesa_vp
->Base
.NumInstructions
== 0)
419 if ((mesa_vp
->Base
.InputsRead
&
420 ~(VERT_BIT_POS
| VERT_BIT_NORMAL
| VERT_BIT_COLOR0
| VERT_BIT_COLOR1
|
421 VERT_BIT_FOG
| VERT_BIT_TEX0
| VERT_BIT_TEX1
| VERT_BIT_TEX2
|
422 VERT_BIT_TEX3
| VERT_BIT_TEX4
| VERT_BIT_TEX5
)) != 0) {
423 if (R200_DEBUG
& RADEON_FALLBACKS
) {
424 fprintf(stderr
, "can't handle vert prog inputs 0x%x\n",
425 mesa_vp
->Base
.InputsRead
);
431 if ((mesa_vp
->Base
.OutputsWritten
&
432 ~((1 << VARYING_SLOT_POS
) | (1 << VARYING_SLOT_COL0
) | (1 << VARYING_SLOT_COL1
) |
433 (1 << VARYING_SLOT_FOGC
) | (1 << VARYING_SLOT_TEX0
) | (1 << VARYING_SLOT_TEX1
) |
434 (1 << VARYING_SLOT_TEX2
) | (1 << VARYING_SLOT_TEX3
) | (1 << VARYING_SLOT_TEX4
) |
435 (1 << VARYING_SLOT_TEX5
) | (1 << VARYING_SLOT_PSIZ
))) != 0) {
436 if (R200_DEBUG
& RADEON_FALLBACKS
) {
437 fprintf(stderr
, "can't handle vert prog outputs 0x%llx\n",
438 (unsigned long long) mesa_vp
->Base
.OutputsWritten
);
443 /* Initial value should be last tmp reg that hw supports.
444 Strangely enough r300 doesnt mind even though these would be out of range.
445 Smart enough to realize that it doesnt need it? */
446 int u_temp_i
= R200_VSF_MAX_TEMPS
- 1;
447 struct prog_src_register src
[3];
448 struct prog_dst_register dst
;
450 /* FIXME: is changing the prog safe to do here? */
451 if (mesa_vp
->IsPositionInvariant
&&
452 /* make sure we only do this once */
453 !(mesa_vp
->Base
.OutputsWritten
& (1 << VARYING_SLOT_POS
))) {
454 _mesa_insert_mvp_code(ctx
, mesa_vp
);
457 /* for fogc, can't change mesa_vp, as it would hose swtnl, and exp with
458 base e isn't directly available neither. */
459 if ((mesa_vp
->Base
.OutputsWritten
& (1 << VARYING_SLOT_FOGC
)) && !vp
->fogpidx
) {
460 struct gl_program_parameter_list
*paramList
;
461 gl_state_index tokens
[STATE_LENGTH
] = { STATE_FOG_PARAMS
, 0, 0, 0, 0 };
462 paramList
= mesa_vp
->Base
.Parameters
;
463 vp
->fogpidx
= _mesa_add_state_reference(paramList
, tokens
);
467 mesa_vp
->Base
.NumNativeInstructions
= 0;
468 if (mesa_vp
->Base
.Parameters
)
469 mesa_vp
->Base
.NumNativeParameters
= mesa_vp
->Base
.Parameters
->NumParameters
;
471 mesa_vp
->Base
.NumNativeParameters
= 0;
473 for(i
= 0; i
< VERT_ATTRIB_MAX
; i
++)
475 for(i
= 0; i
< 15; i
++)
476 vp
->inputmap_rev
[i
] = 255;
477 free_inputs
= 0x2ffd;
479 /* fglrx uses fixed inputs as follows for conventional attribs.
480 generic attribs use non-fixed assignment, fglrx will always use the
481 lowest attrib values available. We'll just do the same.
482 There are 12 generic attribs possible, corresponding to attrib 0, 2-11
483 and 13 in a hw vertex prog.
484 attr 1 and 12 aren't used for generic attribs as those cannot be made vec4
485 (correspond to vertex normal/weight - maybe weight actually could be made vec4).
486 Additionally, not more than 12 arrays in total are possible I think.
487 attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
488 attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
489 attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
490 attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
493 /* attr 4,5 and 13 are only used with generic attribs.
494 Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
495 not possibe to use with vertex progs as it is lacking in vert prog specification) */
496 /* may look different when using idx buf / input_route instead of se_vtx_fmt? */
497 if (mesa_vp
->Base
.InputsRead
& VERT_BIT_POS
) {
498 vp
->inputs
[VERT_ATTRIB_POS
] = 0;
499 vp
->inputmap_rev
[0] = VERT_ATTRIB_POS
;
500 free_inputs
&= ~(1 << 0);
503 if (mesa_vp
->Base
.InputsRead
& VERT_BIT_WEIGHT
) {
504 vp
->inputs
[VERT_ATTRIB_WEIGHT
] = 12;
505 vp
->inputmap_rev
[1] = VERT_ATTRIB_WEIGHT
;
508 if (mesa_vp
->Base
.InputsRead
& VERT_BIT_NORMAL
) {
509 vp
->inputs
[VERT_ATTRIB_NORMAL
] = 1;
510 vp
->inputmap_rev
[2] = VERT_ATTRIB_NORMAL
;
513 if (mesa_vp
->Base
.InputsRead
& VERT_BIT_COLOR0
) {
514 vp
->inputs
[VERT_ATTRIB_COLOR0
] = 2;
515 vp
->inputmap_rev
[4] = VERT_ATTRIB_COLOR0
;
516 free_inputs
&= ~(1 << 2);
519 if (mesa_vp
->Base
.InputsRead
& VERT_BIT_COLOR1
) {
520 vp
->inputs
[VERT_ATTRIB_COLOR1
] = 3;
521 vp
->inputmap_rev
[5] = VERT_ATTRIB_COLOR1
;
522 free_inputs
&= ~(1 << 3);
525 if (mesa_vp
->Base
.InputsRead
& VERT_BIT_FOG
) {
526 vp
->inputs
[VERT_ATTRIB_FOG
] = 15; array_count
++;
527 vp
->inputmap_rev
[3] = VERT_ATTRIB_FOG
;
530 /* VERT_ATTRIB_TEX0-5 */
531 for (i
= 0; i
<= 5; i
++) {
532 if (mesa_vp
->Base
.InputsRead
& VERT_BIT_TEX(i
)) {
533 vp
->inputs
[VERT_ATTRIB_TEX(i
)] = i
+ 6;
534 vp
->inputmap_rev
[8 + i
] = VERT_ATTRIB_TEX(i
);
535 free_inputs
&= ~(1 << (i
+ 6));
539 /* using VERT_ATTRIB_TEX6/7 would be illegal */
540 for (; i
< VERT_ATTRIB_TEX_MAX
; i
++) {
541 if (mesa_vp
->Base
.InputsRead
& VERT_BIT_TEX(i
)) {
542 if (R200_DEBUG
& RADEON_FALLBACKS
) {
543 fprintf(stderr
, "texture attribute %d in vert prog\n", i
);
548 /* completely ignore aliasing? */
549 for (i
= 0; i
< VERT_ATTRIB_GENERIC_MAX
; i
++) {
551 /* completely ignore aliasing? */
552 if (mesa_vp
->Base
.InputsRead
& VERT_BIT_GENERIC(i
)) {
554 if (array_count
> 12) {
555 if (R200_DEBUG
& RADEON_FALLBACKS
) {
556 fprintf(stderr
, "more than 12 attribs used in vert prog\n");
560 for (j
= 0; j
< 14; j
++) {
561 /* will always find one due to limited array_count */
562 if (free_inputs
& (1 << j
)) {
563 free_inputs
&= ~(1 << j
);
564 vp
->inputs
[VERT_ATTRIB_GENERIC(i
)] = j
;
567 vp
->inputmap_rev
[j
] = VERT_ATTRIB_GENERIC(i
);
569 /* mapped to col/tex */
570 vp
->inputmap_rev
[j
+ 2] = VERT_ATTRIB_GENERIC(i
);
573 vp
->inputmap_rev
[j
+ 1] = VERT_ATTRIB_GENERIC(i
);
581 if (!(mesa_vp
->Base
.OutputsWritten
& (1 << VARYING_SLOT_POS
))) {
582 if (R200_DEBUG
& RADEON_FALLBACKS
) {
583 fprintf(stderr
, "can't handle vert prog without position output\n");
587 if (free_inputs
& 1) {
588 if (R200_DEBUG
& RADEON_FALLBACKS
) {
589 fprintf(stderr
, "can't handle vert prog without position input\n");
595 for (vpi
= mesa_vp
->Base
.Instructions
; vpi
->Opcode
!= OPCODE_END
; vpi
++, o_inst
++){
596 operands
= op_operands(vpi
->Opcode
);
597 are_srcs_scalar
= operands
& SCALAR_FLAG
;
600 for(i
= 0; i
< operands
; i
++) {
601 src
[i
] = vpi
->SrcReg
[i
];
602 /* hack up default attrib values as per spec as swizzling.
603 normal, fog, secondary color. Crazy?
604 May need more if we don't submit vec4 elements? */
605 if (src
[i
].File
== PROGRAM_INPUT
) {
606 if (src
[i
].Index
== VERT_ATTRIB_NORMAL
) {
608 for (j
= 0; j
< 4; j
++) {
609 if (GET_SWZ(src
[i
].Swizzle
, j
) == SWIZZLE_W
) {
610 src
[i
].Swizzle
&= ~(SWIZZLE_W
<< (j
*3));
611 src
[i
].Swizzle
|= SWIZZLE_ONE
<< (j
*3);
615 else if (src
[i
].Index
== VERT_ATTRIB_COLOR1
) {
617 for (j
= 0; j
< 4; j
++) {
618 if (GET_SWZ(src
[i
].Swizzle
, j
) == SWIZZLE_W
) {
619 src
[i
].Swizzle
&= ~(SWIZZLE_W
<< (j
*3));
620 src
[i
].Swizzle
|= SWIZZLE_ZERO
<< (j
*3);
624 else if (src
[i
].Index
== VERT_ATTRIB_FOG
) {
626 for (j
= 0; j
< 4; j
++) {
627 if (GET_SWZ(src
[i
].Swizzle
, j
) == SWIZZLE_W
) {
628 src
[i
].Swizzle
&= ~(SWIZZLE_W
<< (j
*3));
629 src
[i
].Swizzle
|= SWIZZLE_ONE
<< (j
*3);
631 else if ((GET_SWZ(src
[i
].Swizzle
, j
) == SWIZZLE_Y
) ||
632 GET_SWZ(src
[i
].Swizzle
, j
) == SWIZZLE_Z
) {
633 src
[i
].Swizzle
&= ~(SWIZZLE_W
<< (j
*3));
634 src
[i
].Swizzle
|= SWIZZLE_ZERO
<< (j
*3);
642 if( CMP_SRCS(src
[1], src
[2]) || CMP_SRCS(src
[0], src
[2]) ){
643 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
,
644 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
647 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[2]),
648 SWIZZLE_X
, SWIZZLE_Y
,
649 SWIZZLE_Z
, SWIZZLE_W
,
650 t_src_class(src
[2].File
), VSF_FLAG_NONE
) | (src
[2].RelAddr
<< 4);
652 o_inst
->src1
= ZERO_SRC_0
;
653 o_inst
->src2
= UNUSED_SRC_1
;
656 src
[2].File
= PROGRAM_TEMPORARY
;
657 src
[2].Index
= u_temp_i
;
664 if( CMP_SRCS(src
[1], src
[0]) ){
665 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
,
666 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
669 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
670 SWIZZLE_X
, SWIZZLE_Y
,
671 SWIZZLE_Z
, SWIZZLE_W
,
672 t_src_class(src
[0].File
), VSF_FLAG_NONE
) | (src
[0].RelAddr
<< 4);
674 o_inst
->src1
= ZERO_SRC_0
;
675 o_inst
->src2
= UNUSED_SRC_1
;
678 src
[0].File
= PROGRAM_TEMPORARY
;
679 src
[0].Index
= u_temp_i
;
686 if (dst
.File
== PROGRAM_OUTPUT
&&
687 dst
.Index
== VARYING_SLOT_FOGC
&&
688 dst
.WriteMask
& WRITEMASK_X
) {
689 fog_temp_i
= u_temp_i
;
690 dst
.File
= PROGRAM_TEMPORARY
;
691 dst
.Index
= fog_temp_i
;
696 /* These ops need special handling. */
699 /* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter).
700 So may need to insert additional instruction */
701 if ((src
[0].File
== src
[1].File
) &&
702 (src
[0].Index
== src
[1].Index
)) {
703 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_POW
, t_dst(&dst
),
704 t_dst_mask(dst
.WriteMask
));
705 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
706 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
708 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
710 t_src_class(src
[0].File
),
711 src
[0].Negate
) | (src
[0].RelAddr
<< 4);
712 o_inst
->src1
= UNUSED_SRC_0
;
713 o_inst
->src2
= UNUSED_SRC_0
;
716 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
,
717 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
719 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
720 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
721 SWIZZLE_ZERO
, SWIZZLE_ZERO
, SWIZZLE_ZERO
,
722 t_src_class(src
[0].File
),
723 src
[0].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[0].RelAddr
<< 4);
724 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
725 SWIZZLE_ZERO
, SWIZZLE_ZERO
,
726 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), SWIZZLE_ZERO
,
727 t_src_class(src
[1].File
),
728 src
[1].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[1].RelAddr
<< 4);
729 o_inst
->src2
= UNUSED_SRC_1
;
732 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_POW
, t_dst(&dst
),
733 t_dst_mask(dst
.WriteMask
));
734 o_inst
->src0
= MAKE_VSF_SOURCE(u_temp_i
,
741 o_inst
->src1
= UNUSED_SRC_0
;
742 o_inst
->src2
= UNUSED_SRC_0
;
747 case OPCODE_MOV
://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
749 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
, t_dst(&dst
),
750 t_dst_mask(dst
.WriteMask
));
751 o_inst
->src0
= t_src(vp
, &src
[0]);
752 o_inst
->src1
= ZERO_SRC_0
;
753 o_inst
->src2
= UNUSED_SRC_1
;
757 /* only 2 read ports into temp memory thus may need the macro op MAD_2
758 instead (requiring 2 clocks) if all inputs are in temp memory
759 (and, only if they actually reference 3 distinct temps) */
760 hw_op
=(src
[0].File
== PROGRAM_TEMPORARY
&&
761 src
[1].File
== PROGRAM_TEMPORARY
&&
762 src
[2].File
== PROGRAM_TEMPORARY
&&
763 (((src
[0].RelAddr
<< 8) | src
[0].Index
) != ((src
[1].RelAddr
<< 8) | src
[1].Index
)) &&
764 (((src
[0].RelAddr
<< 8) | src
[0].Index
) != ((src
[2].RelAddr
<< 8) | src
[2].Index
)) &&
765 (((src
[1].RelAddr
<< 8) | src
[1].Index
) != ((src
[2].RelAddr
<< 8) | src
[2].Index
))) ?
766 R200_VPI_OUT_OP_MAD_2
: R200_VPI_OUT_OP_MAD
;
768 o_inst
->op
= MAKE_VSF_OP(hw_op
, t_dst(&dst
),
769 t_dst_mask(dst
.WriteMask
));
770 o_inst
->src0
= t_src(vp
, &src
[0]);
772 if ((o_inst
- vp
->instr
) == 31) {
773 /* fix up the broken vertex program of quake4 demo... */
774 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
775 SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
,
776 t_src_class(src
[1].File
),
777 src
[1].Negate
) | (src
[1].RelAddr
<< 4);
778 o_inst
->src2
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
779 SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
,
780 t_src_class(src
[1].File
),
781 src
[1].Negate
) | (src
[1].RelAddr
<< 4);
784 o_inst
->src1
= t_src(vp
, &src
[1]);
785 o_inst
->src2
= t_src(vp
, &src
[2]);
788 o_inst
->src1
= t_src(vp
, &src
[1]);
789 o_inst
->src2
= t_src(vp
, &src
[2]);
793 case OPCODE_DP3
://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
794 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_DOT
, t_dst(&dst
),
795 t_dst_mask(dst
.WriteMask
));
797 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
798 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
799 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
800 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
802 t_src_class(src
[0].File
),
803 src
[0].Negate
) | (src
[0].RelAddr
<< 4);
805 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
806 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
807 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)),
808 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)),
810 t_src_class(src
[1].File
),
811 src
[1].Negate
) | (src
[1].RelAddr
<< 4);
813 o_inst
->src2
= UNUSED_SRC_1
;
816 case OPCODE_DPH
://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
817 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_DOT
, t_dst(&dst
),
818 t_dst_mask(dst
.WriteMask
));
820 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
821 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
822 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
823 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
824 VSF_IN_COMPONENT_ONE
,
825 t_src_class(src
[0].File
),
826 src
[0].Negate
) | (src
[0].RelAddr
<< 4);
827 o_inst
->src1
= t_src(vp
, &src
[1]);
828 o_inst
->src2
= UNUSED_SRC_1
;
831 case OPCODE_SUB
://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
832 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
, t_dst(&dst
),
833 t_dst_mask(dst
.WriteMask
));
835 o_inst
->src0
= t_src(vp
, &src
[0]);
836 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
837 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
838 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)),
839 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)),
840 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)),
841 t_src_class(src
[1].File
),
842 (!src
[1].Negate
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[1].RelAddr
<< 4);
843 o_inst
->src2
= UNUSED_SRC_1
;
846 case OPCODE_ABS
://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
847 o_inst
->op
=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX
, t_dst(&dst
),
848 t_dst_mask(dst
.WriteMask
));
850 o_inst
->src0
=t_src(vp
, &src
[0]);
851 o_inst
->src1
=MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
852 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
853 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
854 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
855 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)),
856 t_src_class(src
[0].File
),
857 (!src
[0].Negate
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[0].RelAddr
<< 4);
858 o_inst
->src2
= UNUSED_SRC_1
;
862 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
863 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
865 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_FRC
,
866 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
867 t_dst_mask(dst
.WriteMask
));
869 o_inst
->src0
= t_src(vp
, &src
[0]);
870 o_inst
->src1
= UNUSED_SRC_0
;
871 o_inst
->src2
= UNUSED_SRC_1
;
874 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
, t_dst(&dst
),
875 t_dst_mask(dst
.WriteMask
));
877 o_inst
->src0
= t_src(vp
, &src
[0]);
878 o_inst
->src1
= MAKE_VSF_SOURCE(u_temp_i
,
884 /* Not 100% sure about this */
885 (!src
[0].Negate
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
/*VSF_FLAG_ALL*/);
887 o_inst
->src2
= UNUSED_SRC_0
;
892 /* mul r0, r1.yzxw, r2.zxyw
893 mad r0, -r2.yzxw, r1.zxyw, r0
895 hw_op
=(src
[0].File
== PROGRAM_TEMPORARY
&&
896 src
[1].File
== PROGRAM_TEMPORARY
&&
897 (((src
[0].RelAddr
<< 8) | src
[0].Index
) != ((src
[1].RelAddr
<< 8) | src
[1].Index
))) ?
898 R200_VPI_OUT_OP_MAD_2
: R200_VPI_OUT_OP_MAD
;
900 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_MUL
,
901 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
902 t_dst_mask(dst
.WriteMask
));
904 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
905 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
906 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)), // z
907 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
908 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
909 t_src_class(src
[0].File
),
910 src
[0].Negate
) | (src
[0].RelAddr
<< 4);
912 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
913 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), // z
914 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), // x
915 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)), // y
916 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)), // w
917 t_src_class(src
[1].File
),
918 src
[1].Negate
) | (src
[1].RelAddr
<< 4);
920 o_inst
->src2
= UNUSED_SRC_1
;
924 o_inst
->op
= MAKE_VSF_OP(hw_op
, t_dst(&dst
),
925 t_dst_mask(dst
.WriteMask
));
927 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
928 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)), // y
929 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), // z
930 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), // x
931 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)), // w
932 t_src_class(src
[1].File
),
933 (!src
[1].Negate
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[1].RelAddr
<< 4);
935 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
936 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)), // z
937 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
938 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
939 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
940 t_src_class(src
[0].File
),
941 src
[0].Negate
) | (src
[0].RelAddr
<< 4);
943 o_inst
->src2
= MAKE_VSF_SOURCE(u_temp_i
+1,
958 o_inst
->op
= MAKE_VSF_OP(t_opcode(vpi
->Opcode
), t_dst(&dst
),
959 t_dst_mask(dst
.WriteMask
));
964 o_inst
->src0
= t_src_scalar(vp
, &src
[0]);
965 o_inst
->src1
= UNUSED_SRC_0
;
966 o_inst
->src2
= UNUSED_SRC_1
;
970 o_inst
->src0
= t_src_scalar(vp
, &src
[0]);
971 o_inst
->src1
= t_src_scalar(vp
, &src
[1]);
972 o_inst
->src2
= UNUSED_SRC_1
;
976 o_inst
->src0
= t_src_scalar(vp
, &src
[0]);
977 o_inst
->src1
= t_src_scalar(vp
, &src
[1]);
978 o_inst
->src2
= t_src_scalar(vp
, &src
[2]);
982 fprintf(stderr
, "illegal number of operands %lu\n", operands
);
989 o_inst
->src0
= t_src(vp
, &src
[0]);
990 o_inst
->src1
= UNUSED_SRC_0
;
991 o_inst
->src2
= UNUSED_SRC_1
;
995 o_inst
->src0
= t_src(vp
, &src
[0]);
996 o_inst
->src1
= t_src(vp
, &src
[1]);
997 o_inst
->src2
= UNUSED_SRC_1
;
1001 o_inst
->src0
= t_src(vp
, &src
[0]);
1002 o_inst
->src1
= t_src(vp
, &src
[1]);
1003 o_inst
->src2
= t_src(vp
, &src
[2]);
1007 fprintf(stderr
, "illegal number of operands %lu\n", operands
);
1016 if (vp
->fogmode
== GL_EXP
) {
1017 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_MUL
,
1018 (fog_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
1020 o_inst
->src0
= EASY_VSF_SOURCE(fog_temp_i
, X
, X
, X
, X
, TMP
, NONE
);
1021 o_inst
->src1
= EASY_VSF_SOURCE(vp
->fogpidx
, X
, X
, X
, X
, PARAM
, NONE
);
1022 o_inst
->src2
= UNUSED_SRC_1
;
1024 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E
,
1025 R200_VSF_OUT_CLASS_RESULT_FOGC
,
1027 o_inst
->src0
= EASY_VSF_SOURCE(fog_temp_i
, X
, X
, X
, X
, TMP
, ALL
);
1028 o_inst
->src1
= UNUSED_SRC_0
;
1029 o_inst
->src2
= UNUSED_SRC_1
;
1031 else if (vp
->fogmode
== GL_EXP2
) {
1032 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_MUL
,
1033 (fog_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
1035 o_inst
->src0
= EASY_VSF_SOURCE(fog_temp_i
, X
, X
, X
, X
, TMP
, NONE
);
1036 o_inst
->src1
= EASY_VSF_SOURCE(vp
->fogpidx
, X
, X
, X
, X
, PARAM
, NONE
);
1037 o_inst
->src2
= UNUSED_SRC_1
;
1039 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_MUL
,
1040 (fog_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
1042 o_inst
->src0
= EASY_VSF_SOURCE(fog_temp_i
, X
, X
, X
, X
, TMP
, NONE
);
1043 o_inst
->src1
= EASY_VSF_SOURCE(fog_temp_i
, X
, X
, X
, X
, TMP
, NONE
);
1044 o_inst
->src2
= UNUSED_SRC_1
;
1046 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E
,
1047 R200_VSF_OUT_CLASS_RESULT_FOGC
,
1049 o_inst
->src0
= EASY_VSF_SOURCE(fog_temp_i
, X
, X
, X
, X
, TMP
, ALL
);
1050 o_inst
->src1
= UNUSED_SRC_0
;
1051 o_inst
->src2
= UNUSED_SRC_1
;
1053 else { /* fogmode == GL_LINEAR */
1054 /* could do that with single op (dot) if using params like
1055 with fixed function pipeline fog */
1056 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
,
1057 (fog_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
1059 o_inst
->src0
= EASY_VSF_SOURCE(fog_temp_i
, X
, X
, X
, X
, TMP
, ALL
);
1060 o_inst
->src1
= EASY_VSF_SOURCE(vp
->fogpidx
, Z
, Z
, Z
, Z
, PARAM
, NONE
);
1061 o_inst
->src2
= UNUSED_SRC_1
;
1063 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_MUL
,
1064 R200_VSF_OUT_CLASS_RESULT_FOGC
,
1066 o_inst
->src0
= EASY_VSF_SOURCE(fog_temp_i
, X
, X
, X
, X
, TMP
, NONE
);
1067 o_inst
->src1
= EASY_VSF_SOURCE(vp
->fogpidx
, W
, W
, W
, W
, PARAM
, NONE
);
1068 o_inst
->src2
= UNUSED_SRC_1
;
1074 u_temp_used
= (R200_VSF_MAX_TEMPS
- 1) - u_temp_i
;
1075 if (mesa_vp
->Base
.NumNativeTemporaries
<
1076 (mesa_vp
->Base
.NumTemporaries
+ u_temp_used
)) {
1077 mesa_vp
->Base
.NumNativeTemporaries
=
1078 mesa_vp
->Base
.NumTemporaries
+ u_temp_used
;
1080 if ((mesa_vp
->Base
.NumTemporaries
+ u_temp_used
) > R200_VSF_MAX_TEMPS
) {
1081 if (R200_DEBUG
& RADEON_FALLBACKS
) {
1082 fprintf(stderr
, "Ran out of temps, num temps %d, us %d\n", mesa_vp
->Base
.NumTemporaries
, u_temp_used
);
1086 u_temp_i
= R200_VSF_MAX_TEMPS
- 1;
1087 if(o_inst
- vp
->instr
>= R200_VSF_MAX_INST
) {
1088 mesa_vp
->Base
.NumNativeInstructions
= 129;
1089 if (R200_DEBUG
& RADEON_FALLBACKS
) {
1090 fprintf(stderr
, "more than 128 native instructions\n");
1094 if ((o_inst
->op
& R200_VSF_OUT_CLASS_MASK
) == R200_VSF_OUT_CLASS_RESULT_POS
) {
1095 vp
->pos_end
= (o_inst
- vp
->instr
);
1099 vp
->native
= GL_TRUE
;
1100 mesa_vp
->Base
.NumNativeInstructions
= (o_inst
- vp
->instr
);
1102 fprintf(stderr
, "hw program:\n");
1103 for(i
=0; i
< vp
->program
.length
; i
++)
1104 fprintf(stderr
, "%08x\n", vp
->instr
[i
]);
1109 void r200SetupVertexProg( struct gl_context
*ctx
) {
1110 r200ContextPtr rmesa
= R200_CONTEXT(ctx
);
1111 struct r200_vertex_program
*vp
= (struct r200_vertex_program
*)ctx
->VertexProgram
.Current
;
1115 if (!vp
->translated
|| (ctx
->Fog
.Enabled
&& ctx
->Fog
.Mode
!= vp
->fogmode
)) {
1116 rmesa
->curr_vp_hw
= NULL
;
1117 r200_translate_vertex_program(ctx
, vp
);
1119 /* could optimize setting up vertex progs away for non-tcl hw */
1120 fallback
= !(vp
->native
&& r200VertexProgUpdateParams(ctx
, vp
));
1121 TCL_FALLBACK(ctx
, R200_TCL_FALLBACK_VERTEX_PROGRAM
, fallback
);
1122 if (rmesa
->radeon
.TclFallback
) return;
1124 R200_STATECHANGE( rmesa
, vap
);
1125 /* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it?
1126 maybe only when using more than 64 inst / 96 param? */
1127 rmesa
->hw
.vap
.cmd
[VAP_SE_VAP_CNTL
] |= R200_VAP_PROG_VTX_SHADER_ENABLE
/*| R200_VAP_SINGLE_BUF_STATE_ENABLE*/;
1129 R200_STATECHANGE( rmesa
, pvs
);
1131 rmesa
->hw
.pvs
.cmd
[PVS_CNTL_1
] = (0 << R200_PVS_CNTL_1_PROGRAM_START_SHIFT
) |
1132 ((vp
->mesa_program
.Base
.NumNativeInstructions
- 1) << R200_PVS_CNTL_1_PROGRAM_END_SHIFT
) |
1133 (vp
->pos_end
<< R200_PVS_CNTL_1_POS_END_SHIFT
);
1134 rmesa
->hw
.pvs
.cmd
[PVS_CNTL_2
] = (0 << R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT
) |
1135 (vp
->mesa_program
.Base
.NumNativeParameters
<< R200_PVS_CNTL_2_PARAM_COUNT_SHIFT
);
1137 /* maybe user clip planes just work with vertex progs... untested */
1138 if (ctx
->Transform
.ClipPlanesEnabled
) {
1139 R200_STATECHANGE( rmesa
, tcl
);
1140 if (vp
->mesa_program
.IsPositionInvariant
) {
1141 rmesa
->hw
.tcl
.cmd
[TCL_UCP_VERT_BLEND_CTL
] |= (ctx
->Transform
.ClipPlanesEnabled
<< 2);
1144 rmesa
->hw
.tcl
.cmd
[TCL_UCP_VERT_BLEND_CTL
] &= ~(0xfc);
1148 if (vp
!= rmesa
->curr_vp_hw
) {
1149 GLuint count
= vp
->mesa_program
.Base
.NumNativeInstructions
;
1150 drm_radeon_cmd_header_t tmp
;
1152 R200_STATECHANGE( rmesa
, vpi
[0] );
1153 R200_STATECHANGE( rmesa
, vpi
[1] );
1155 /* FIXME: what about using a memcopy... */
1156 for (i
= 0; (i
< 64) && i
< count
; i
++) {
1157 rmesa
->hw
.vpi
[0].cmd
[VPI_OPDST_0
+ 4 * i
] = vp
->instr
[i
].op
;
1158 rmesa
->hw
.vpi
[0].cmd
[VPI_SRC0_0
+ 4 * i
] = vp
->instr
[i
].src0
;
1159 rmesa
->hw
.vpi
[0].cmd
[VPI_SRC1_0
+ 4 * i
] = vp
->instr
[i
].src1
;
1160 rmesa
->hw
.vpi
[0].cmd
[VPI_SRC2_0
+ 4 * i
] = vp
->instr
[i
].src2
;
1162 /* hack up the cmd_size so not the whole state atom is emitted always.
1163 This may require some more thought, we may emit half progs on lost state, but
1164 hopefully it won't matter?
1165 WARNING: must not use R200_DB_STATECHANGE, this will produce bogus (and rejected)
1166 packet emits (due to the mismatched cmd_size and count in cmd/last_cmd) */
1167 rmesa
->hw
.vpi
[0].cmd_size
= 1 + 4 * ((count
> 64) ? 64 : count
);
1168 tmp
.i
= rmesa
->hw
.vpi
[0].cmd
[VPI_CMD_0
];
1169 tmp
.veclinear
.count
= (count
> 64) ? 64 : count
;
1170 rmesa
->hw
.vpi
[0].cmd
[VPI_CMD_0
] = tmp
.i
;
1172 for (i
= 0; i
< (count
- 64); i
++) {
1173 rmesa
->hw
.vpi
[1].cmd
[VPI_OPDST_0
+ 4 * i
] = vp
->instr
[i
+ 64].op
;
1174 rmesa
->hw
.vpi
[1].cmd
[VPI_SRC0_0
+ 4 * i
] = vp
->instr
[i
+ 64].src0
;
1175 rmesa
->hw
.vpi
[1].cmd
[VPI_SRC1_0
+ 4 * i
] = vp
->instr
[i
+ 64].src1
;
1176 rmesa
->hw
.vpi
[1].cmd
[VPI_SRC2_0
+ 4 * i
] = vp
->instr
[i
+ 64].src2
;
1178 rmesa
->hw
.vpi
[1].cmd_size
= 1 + 4 * (count
- 64);
1179 tmp
.i
= rmesa
->hw
.vpi
[1].cmd
[VPI_CMD_0
];
1180 tmp
.veclinear
.count
= count
- 64;
1181 rmesa
->hw
.vpi
[1].cmd
[VPI_CMD_0
] = tmp
.i
;
1183 rmesa
->curr_vp_hw
= vp
;
1189 r200BindProgram(struct gl_context
*ctx
, GLenum target
, struct gl_program
*prog
)
1191 r200ContextPtr rmesa
= R200_CONTEXT(ctx
);
1194 case GL_VERTEX_PROGRAM_ARB
:
1195 rmesa
->curr_vp_hw
= NULL
;
1198 _mesa_problem(ctx
, "Target not supported yet!");
1203 static struct gl_program
*
1204 r200NewProgram(struct gl_context
*ctx
, GLenum target
, GLuint id
)
1206 struct r200_vertex_program
*vp
;
1209 case GL_VERTEX_PROGRAM_ARB
:
1210 vp
= CALLOC_STRUCT(r200_vertex_program
);
1211 return _mesa_init_vertex_program(ctx
, &vp
->mesa_program
, target
, id
);
1212 case GL_FRAGMENT_PROGRAM_ARB
:
1213 return _mesa_init_fragment_program( ctx
, CALLOC_STRUCT(gl_fragment_program
), target
, id
);
1215 _mesa_problem(ctx
, "Bad target in r200NewProgram");
1222 r200DeleteProgram(struct gl_context
*ctx
, struct gl_program
*prog
)
1224 _mesa_delete_program(ctx
, prog
);
1228 r200ProgramStringNotify(struct gl_context
*ctx
, GLenum target
, struct gl_program
*prog
)
1230 struct r200_vertex_program
*vp
= (void *)prog
;
1231 r200ContextPtr rmesa
= R200_CONTEXT(ctx
);
1234 case GL_VERTEX_PROGRAM_ARB
:
1235 vp
->translated
= GL_FALSE
;
1237 /* memset(&vp->translated, 0, sizeof(struct r200_vertex_program) - sizeof(struct gl_vertex_program));*/
1238 r200_translate_vertex_program(ctx
, vp
);
1239 rmesa
->curr_vp_hw
= NULL
;
1241 case GL_FRAGMENT_SHADER_ATI
:
1242 rmesa
->afs_loaded
= NULL
;
1245 /* need this for tcl fallbacks */
1246 (void) _tnl_program_string(ctx
, target
, prog
);
1248 /* XXX check if program is legal, within limits */
1253 r200IsProgramNative(struct gl_context
*ctx
, GLenum target
, struct gl_program
*prog
)
1255 struct r200_vertex_program
*vp
= (void *)prog
;
1258 case GL_VERTEX_PROGRAM_ARB
:
1259 if (!vp
->translated
) {
1260 r200_translate_vertex_program(ctx
, vp
);
1262 /* does not take parameters etc. into account */
1265 _mesa_problem(ctx
, "Bad target in r200NewProgram");
1270 void r200InitShaderFuncs(struct dd_function_table
*functions
)
1272 functions
->NewProgram
= r200NewProgram
;
1273 functions
->BindProgram
= r200BindProgram
;
1274 functions
->DeleteProgram
= r200DeleteProgram
;
1275 functions
->ProgramStringNotify
= r200ProgramStringNotify
;
1276 functions
->IsProgramNative
= r200IsProgramNative
;