1 /**************************************************************************
3 Copyright (C) 2005 Aapo Tahkola.
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Aapo Tahkola <aet@rasterburn.org>
37 #include "r200_context.h"
38 #include "r200_vertprog.h"
39 #include "r200_ioctl.h"
41 #include "program_instruction.h"
44 #define SCALAR_FLAG (1<<31)
45 #define FLAG_MASK (1<<31)
46 #define OP_MASK (0xf) /* we are unlikely to have more than 15 */
47 #define OPN(operator, ip, op) {#operator, OPCODE_##operator, ip, op}
52 unsigned long ip
; /* number of input operands and flags */
57 OPN(ARL
, 1, 1|SCALAR_FLAG
),
58 OPN(DP3
, 2, 3|SCALAR_FLAG
),
59 OPN(DP4
, 2, 3|SCALAR_FLAG
),
60 OPN(DPH
, 2, 3|SCALAR_FLAG
),
62 OPN(EX2
, 1|SCALAR_FLAG
, 4|SCALAR_FLAG
),
63 OPN(EXP
, 1|SCALAR_FLAG
, 1),
66 OPN(LG2
, 1|SCALAR_FLAG
, 4|SCALAR_FLAG
),
68 OPN(LOG
, 1|SCALAR_FLAG
, 1),
74 OPN(POW
, 2|SCALAR_FLAG
, 4|SCALAR_FLAG
),
75 OPN(RCP
, 1|SCALAR_FLAG
, 4|SCALAR_FLAG
),
76 OPN(RSQ
, 1|SCALAR_FLAG
, 4|SCALAR_FLAG
),
82 OPN(RCC
, 0, 0), //extra
88 static GLboolean
r200VertexProgUpdateParams(GLcontext
*ctx
, struct r200_vertex_program
*vp
)
90 r200ContextPtr rmesa
= R200_CONTEXT( ctx
);
91 GLfloat
*fcmd
= (GLfloat
*)&rmesa
->hw
.vpp
[0].cmd
[VPP_CMD_0
+ 1];
93 struct vertex_program
*mesa_vp
= (void *)vp
;
94 struct program_parameter_list
*paramList
;
95 drm_radeon_cmd_header_t tmp
;
97 R200_STATECHANGE( rmesa
, vpp
[0] );
98 R200_STATECHANGE( rmesa
, vpp
[1] );
99 assert(mesa_vp
->Base
.Parameters
);
100 _mesa_load_state_parameters(ctx
, mesa_vp
->Base
.Parameters
);
101 paramList
= mesa_vp
->Base
.Parameters
;
103 if(paramList
->NumParameters
> R200_VSF_MAX_PARAM
){
104 fprintf(stderr
, "%s:Params exhausted\n", __FUNCTION__
);
108 for(pi
= 0; pi
< paramList
->NumParameters
; pi
++) {
109 switch(paramList
->Parameters
[pi
].Type
) {
110 case PROGRAM_STATE_VAR
:
111 case PROGRAM_NAMED_PARAM
:
112 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
113 case PROGRAM_CONSTANT
:
114 *fcmd
++ = paramList
->ParameterValues
[pi
][0];
115 *fcmd
++ = paramList
->ParameterValues
[pi
][1];
116 *fcmd
++ = paramList
->ParameterValues
[pi
][2];
117 *fcmd
++ = paramList
->ParameterValues
[pi
][3];
120 _mesa_problem(NULL
, "Bad param type in %s", __FUNCTION__
);
124 fcmd
= (GLfloat
*)rmesa
->hw
.vpp
[1].cmd
[VPP_CMD_0
+ 1];
127 /* hack up the cmd_size so not the whole state atom is emitted always. */
128 rmesa
->hw
.vpp
[0].cmd_size
=
129 1 + 4 * ((paramList
->NumParameters
> 96) ? 96 : paramList
->NumParameters
);
130 tmp
.i
= rmesa
->hw
.vpp
[0].cmd
[VPP_CMD_0
];
131 tmp
.veclinear
.count
= (paramList
->NumParameters
> 96) ? 96 : paramList
->NumParameters
;
132 rmesa
->hw
.vpp
[0].cmd
[VPP_CMD_0
] = tmp
.i
;
133 if (paramList
->NumParameters
> 96) {
134 rmesa
->hw
.vpp
[1].cmd_size
= 1 + 4 * (paramList
->NumParameters
- 96);
135 tmp
.i
= rmesa
->hw
.vpp
[1].cmd
[VPP_CMD_0
];
136 tmp
.veclinear
.count
= paramList
->NumParameters
- 96;
137 rmesa
->hw
.vpp
[1].cmd
[VPP_CMD_0
] = tmp
.i
;
142 static unsigned long t_dst_mask(GLuint mask
)
144 unsigned long flags
= 0;
146 if(mask
& WRITEMASK_X
) flags
|= VSF_FLAG_X
;
147 if(mask
& WRITEMASK_Y
) flags
|= VSF_FLAG_Y
;
148 if(mask
& WRITEMASK_Z
) flags
|= VSF_FLAG_Z
;
149 if(mask
& WRITEMASK_W
) flags
|= VSF_FLAG_W
;
154 static unsigned long t_dst(struct prog_dst_register
*dst
)
157 case PROGRAM_TEMPORARY
:
158 return ((dst
->Index
<< R200_VPI_OUT_REG_INDEX_SHIFT
)
159 | R200_VSF_OUT_CLASS_TMP
);
161 switch (dst
->Index
) {
162 case VERT_RESULT_HPOS
:
163 return R200_VSF_OUT_CLASS_RESULT_POS
;
164 case VERT_RESULT_COL0
:
165 return R200_VSF_OUT_CLASS_RESULT_COLOR
;
166 case VERT_RESULT_COL1
:
167 return ((1 << R200_VPI_OUT_REG_INDEX_SHIFT
)
168 | R200_VSF_OUT_CLASS_RESULT_COLOR
);
169 case VERT_RESULT_FOGC
:
170 return R200_VSF_OUT_CLASS_RESULT_FOGC
;
171 case VERT_RESULT_TEX0
:
172 case VERT_RESULT_TEX1
:
173 case VERT_RESULT_TEX2
:
174 case VERT_RESULT_TEX3
:
175 case VERT_RESULT_TEX4
:
176 case VERT_RESULT_TEX5
:
177 return (((dst
->Index
- VERT_RESULT_TEX0
) << R200_VPI_OUT_REG_INDEX_SHIFT
)
178 | R200_VSF_OUT_CLASS_RESULT_TEXC
);
179 case VERT_RESULT_PSIZ
:
180 return R200_VSF_OUT_CLASS_RESULT_POINTSIZE
;
182 fprintf(stderr
, "problem in %s, unknown dst output reg %d\n", __FUNCTION__
, dst
->Index
);
186 case PROGRAM_ADDRESS
:
187 assert (dst
->Index
== 0);
188 return R200_VSF_OUT_CLASS_ADDR
;
190 fprintf(stderr
, "problem in %s, unknown register type %d\n", __FUNCTION__
, dst
->File
);
196 static unsigned long t_src_class(enum register_file file
)
200 case PROGRAM_TEMPORARY
:
201 return VSF_IN_CLASS_TMP
;
204 return VSF_IN_CLASS_ATTR
;
206 case PROGRAM_LOCAL_PARAM
:
207 case PROGRAM_ENV_PARAM
:
208 case PROGRAM_NAMED_PARAM
:
209 case PROGRAM_STATE_VAR
:
210 return VSF_IN_CLASS_PARAM
;
213 case PROGRAM_WRITE_ONLY:
214 case PROGRAM_ADDRESS:
217 fprintf(stderr
, "problem in %s", __FUNCTION__
);
222 static __inline
unsigned long t_swizzle(GLubyte swizzle
)
224 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
227 case SWIZZLE_X: return VSF_IN_COMPONENT_X;
228 case SWIZZLE_Y: return VSF_IN_COMPONENT_Y;
229 case SWIZZLE_Z: return VSF_IN_COMPONENT_Z;
230 case SWIZZLE_W: return VSF_IN_COMPONENT_W;
231 case SWIZZLE_ZERO: return VSF_IN_COMPONENT_ZERO;
232 case SWIZZLE_ONE: return VSF_IN_COMPONENT_ONE;
234 fprintf(stderr, "problem in %s", __FUNCTION__);
242 static void vp_dump_inputs(struct r200_vertex_program
*vp
, char *caller
)
247 fprintf(stderr
, "vp null in call to %s from %s\n", __FUNCTION__
, caller
);
251 fprintf(stderr
, "%s:<", caller
);
252 for(i
=0; i
< VERT_ATTRIB_MAX
; i
++)
253 fprintf(stderr
, "%d ", vp
->inputs
[i
]);
254 fprintf(stderr
, ">\n");
259 static unsigned long t_src_index(struct r200_vertex_program
*vp
, struct prog_src_register
*src
)
265 if(src
->File
== PROGRAM_INPUT
){
266 /* if(vp->inputs[src->Index] != -1)
267 return vp->inputs[src->Index];
269 for(i=0; i < VERT_ATTRIB_MAX; i++)
270 if(vp->inputs[i] > max_reg)
271 max_reg = vp->inputs[i];
273 vp->inputs[src->Index] = max_reg+1;*/
275 //vp_dump_inputs(vp, __FUNCTION__);
276 assert(vp
->inputs
[src
->Index
] != -1);
277 return vp
->inputs
[src
->Index
];
279 if (src
->Index
< 0) {
280 fprintf(stderr
, "WARNING negative offsets for indirect addressing do not work\n");
287 static unsigned long t_src(struct r200_vertex_program
*vp
, struct prog_src_register
*src
)
290 return MAKE_VSF_SOURCE(t_src_index(vp
, src
),
291 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
292 t_swizzle(GET_SWZ(src
->Swizzle
, 1)),
293 t_swizzle(GET_SWZ(src
->Swizzle
, 2)),
294 t_swizzle(GET_SWZ(src
->Swizzle
, 3)),
295 t_src_class(src
->File
),
296 src
->NegateBase
) | (src
->RelAddr
<< 4);
299 static unsigned long t_src_scalar(struct r200_vertex_program
*vp
, struct prog_src_register
*src
)
302 return MAKE_VSF_SOURCE(t_src_index(vp
, src
),
303 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
304 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
305 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
306 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
307 t_src_class(src
->File
),
308 src
->NegateBase
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
->RelAddr
<< 4);
311 static unsigned long t_opcode(enum prog_opcode opcode
)
315 case OPCODE_DST
: return R200_VPI_OUT_OP_DST
;
316 case OPCODE_EX2
: return R200_VPI_OUT_OP_EX2
;
317 case OPCODE_EXP
: return R200_VPI_OUT_OP_EXP
;
318 case OPCODE_FRC
: return R200_VPI_OUT_OP_FRC
;
319 case OPCODE_LG2
: return R200_VPI_OUT_OP_LG2
;
320 case OPCODE_LOG
: return R200_VPI_OUT_OP_LOG
;
321 case OPCODE_MAX
: return R200_VPI_OUT_OP_MAX
;
322 case OPCODE_MIN
: return R200_VPI_OUT_OP_MIN
;
323 case OPCODE_MUL
: return R200_VPI_OUT_OP_MUL
;
324 case OPCODE_RCP
: return R200_VPI_OUT_OP_RCP
;
325 case OPCODE_RSQ
: return R200_VPI_OUT_OP_RSQ
;
326 case OPCODE_SGE
: return R200_VPI_OUT_OP_SGE
;
327 case OPCODE_SLT
: return R200_VPI_OUT_OP_SLT
;
328 case OPCODE_DP4
: return R200_VPI_OUT_OP_DOT
;
329 case OPCODE_ADD
: return R200_VPI_OUT_OP_ADD
;
332 fprintf(stderr
, "%s: Should not be called with opcode %d!", __FUNCTION__
, opcode
);
338 static unsigned long op_operands(enum prog_opcode opcode
)
342 /* Can we trust mesas opcodes to be in order ? */
343 for(i
=0; i
< sizeof(op_names
) / sizeof(*op_names
); i
++)
344 if(op_names
[i
].opcode
== opcode
)
345 return op_names
[i
].ip
;
347 fprintf(stderr
, "op %d not found in op_names\n", opcode
);
352 /* TODO: Get rid of t_src_class call */
353 #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
354 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
355 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
356 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
357 t_src_class(b.File) == VSF_IN_CLASS_ATTR)))) \
359 /* fglrx on rv250 codes up unused sources as follows:
360 unused but necessary sources are same as previous source, zero-ed out.
361 unnecessary sources are same as previous source but with VSF_IN_CLASS_NONE set.
362 i.e. an add (2 args) has its 2nd arg (if you use it as mov) zero-ed out, and 3rd arg
363 set to VSF_IN_CLASS_NONE. Not sure if strictly necessary. */
365 /* use these simpler definitions. Must obviously not be used with not yet set up regs.
366 Those are NOT semantically equivalent to the r300 ones, requires code changes */
367 #define ZERO_SRC_0 (((o_inst->src0 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
368 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
369 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
370 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
371 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
373 #define ZERO_SRC_1 (((o_inst->src1 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
374 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
375 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
376 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
377 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
379 #define ZERO_SRC_2 (((o_inst->src2 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
380 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
381 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
382 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
383 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
385 #define UNUSED_SRC_0 ((o_inst->src0 & ~15) | 9)
387 #define UNUSED_SRC_1 ((o_inst->src1 & ~15) | 9)
389 #define UNUSED_SRC_2 ((o_inst->src2 & ~15) | 9)
392 /* DP4 version seems to trigger some hw peculiarity - fglrx does this on r200 however */
395 static GLboolean
r200_translate_vertex_program(struct r200_vertex_program
*vp
)
397 struct vertex_program
*mesa_vp
= (void *)vp
;
398 struct prog_instruction
*vpi
;
400 VERTEX_SHADER_INSTRUCTION
*o_inst
;
401 unsigned long operands
;
405 vp
->native
= GL_FALSE
;
407 if ((mesa_vp
->Base
.InputsRead
&
408 ~(VERT_BIT_POS
| VERT_BIT_NORMAL
| VERT_BIT_COLOR0
| VERT_BIT_COLOR1
|
409 VERT_BIT_FOG
| VERT_BIT_TEX0
| VERT_BIT_TEX1
| VERT_BIT_TEX2
|
410 VERT_BIT_TEX3
| VERT_BIT_TEX4
| VERT_BIT_TEX5
)) != 0) {
411 if (R200_DEBUG
& DEBUG_FALLBACKS
) {
412 fprintf(stderr
, "can't handle vert prog inputs 0x%x\n",
413 mesa_vp
->Base
.InputsRead
);
418 /* Initial value should be last tmp reg that hw supports.
419 Strangely enough r300 doesnt mind even though these would be out of range.
420 Smart enough to realize that it doesnt need it? */
421 int u_temp_i
= R200_VSF_MAX_TEMPS
- 1;
422 struct prog_src_register src
[3];
424 /* if (getenv("R300_VP_SAFETY")) {
425 WARN_ONCE("R300_VP_SAFETY enabled.\n");
427 vpi = malloc((mesa_vp->Base.NumInstructions + VSF_MAX_FRAGMENT_TEMPS) * sizeof(struct prog_instruction));
428 memset(vpi, 0, VSF_MAX_FRAGMENT_TEMPS * sizeof(struct prog_instruction));
430 for (i=0; i < VSF_MAX_FRAGMENT_TEMPS; i++) {
431 vpi[i].Opcode = OPCODE_MOV;
432 vpi[i].StringPos = 0;
435 vpi[i].DstReg.File = PROGRAM_TEMPORARY;
436 vpi[i].DstReg.Index = i;
437 vpi[i].DstReg.WriteMask = WRITEMASK_XYZW;
438 vpi[i].DstReg.CondMask = COND_TR;
440 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
441 vpi[i].SrcReg[0].Index = 0;
442 vpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE);
445 memcpy(&vpi[i], mesa_vp->Base.Instructions, mesa_vp->Base.NumInstructions * sizeof(struct prog_instruction));
447 free(mesa_vp->Base.Instructions);
449 mesa_vp->Base.Instructions = vpi;
451 mesa_vp->Base.NumInstructions += VSF_MAX_FRAGMENT_TEMPS;
452 vpi = &mesa_vp->Base.Instructions[mesa_vp->Base.NumInstructions-1];
454 assert(vpi->Opcode == OPCODE_END);
456 /* FIXME: is changing the prog safe to do here? */
457 if (mesa_vp
->IsPositionInvariant
) {
458 struct program_parameter_list
*paramList
;
459 GLint tokens
[6] = { STATE_MATRIX
, STATE_MVP
, 0, 0, 0, STATE_MATRIX
};
462 tokens
[5] = STATE_MATRIX
;
464 tokens
[5] = STATE_MATRIX_TRANSPOSE
;
466 paramList
= mesa_vp
->Base
.Parameters
;
468 vpi
= malloc((mesa_vp
->Base
.NumInstructions
+ 4) * sizeof(struct prog_instruction
));
469 memset(vpi
, 0, 4 * sizeof(struct prog_instruction
));
471 for (i
=0; i
< 4; i
++) {
473 tokens
[3] = tokens
[4] = i
;
474 idx
= _mesa_add_state_reference(paramList
, tokens
);
476 vpi
[i
].Opcode
= OPCODE_DP4
;
477 vpi
[i
].StringPos
= 0;
480 vpi
[i
].DstReg
.File
= PROGRAM_OUTPUT
;
481 vpi
[i
].DstReg
.Index
= VERT_RESULT_HPOS
;
482 vpi
[i
].DstReg
.WriteMask
= 1 << i
;
483 vpi
[i
].DstReg
.CondMask
= COND_TR
;
485 vpi
[i
].SrcReg
[0].File
= PROGRAM_STATE_VAR
;
486 vpi
[i
].SrcReg
[0].Index
= idx
;
487 vpi
[i
].SrcReg
[0].Swizzle
= MAKE_SWIZZLE4(SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_W
);
489 vpi
[i
].SrcReg
[1].File
= PROGRAM_INPUT
;
490 vpi
[i
].SrcReg
[1].Index
= VERT_ATTRIB_POS
;
491 vpi
[i
].SrcReg
[1].Swizzle
= MAKE_SWIZZLE4(SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_W
);
494 vpi
[i
].Opcode
= OPCODE_MUL
;
496 vpi
[i
].Opcode
= OPCODE_MAD
;
498 vpi
[i
].StringPos
= 0;
502 vpi
[i
].DstReg
.File
= PROGRAM_OUTPUT
;
504 vpi
[i
].DstReg
.File
= PROGRAM_TEMPORARY
;
505 vpi
[i
].DstReg
.Index
= 0;
506 vpi
[i
].DstReg
.WriteMask
= 0xf;
507 vpi
[i
].DstReg
.CondMask
= COND_TR
;
509 vpi
[i
].SrcReg
[0].File
= PROGRAM_STATE_VAR
;
510 vpi
[i
].SrcReg
[0].Index
= idx
;
511 vpi
[i
].SrcReg
[0].Swizzle
= MAKE_SWIZZLE4(SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_W
);
513 vpi
[i
].SrcReg
[1].File
= PROGRAM_INPUT
;
514 vpi
[i
].SrcReg
[1].Index
= VERT_ATTRIB_POS
;
515 vpi
[i
].SrcReg
[1].Swizzle
= MAKE_SWIZZLE4(i
, i
, i
, i
);
518 vpi
[i
].SrcReg
[2].File
= PROGRAM_TEMPORARY
;
519 vpi
[i
].SrcReg
[2].Index
= 0;
520 vpi
[i
].SrcReg
[2].Swizzle
= MAKE_SWIZZLE4(SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_W
);
525 memcpy(&vpi
[i
], mesa_vp
->Base
.Instructions
, mesa_vp
->Base
.NumInstructions
* sizeof(struct prog_instruction
));
527 free(mesa_vp
->Base
.Instructions
);
529 mesa_vp
->Base
.Instructions
= vpi
;
531 mesa_vp
->Base
.NumInstructions
+= 4;
532 vpi
= &mesa_vp
->Base
.Instructions
[mesa_vp
->Base
.NumInstructions
-1];
534 assert(vpi
->Opcode
== OPCODE_END
);
536 mesa_vp
->Base
.InputsRead
|= (1 << VERT_ATTRIB_POS
);
537 mesa_vp
->Base
.OutputsWritten
|= (1 << VERT_RESULT_HPOS
);
539 //fprintf(stderr, "IsPositionInvariant is set!\n");
540 //_mesa_print_program(&mesa_vp->Base);
544 mesa_vp
->Base
.NumNativeInstructions
= 0;
545 mesa_vp
->Base
.NumNativeParameters
= mesa_vp
->Base
.Parameters
->NumParameters
;
547 for(i
=0; i
< VERT_ATTRIB_MAX
; i
++)
549 /* fglrx uses fixed inputs as follows for conventional attribs.
550 generic attribs use non-fixed assignment, fglrx will always use the lowest attrib values available.
551 There are 12 generic attribs possible, corresponding to attrib 0, 2-11 and 13 in a hw vertex prog.
552 attr 1 and 12 are not available for generic attribs as those cannot be made vec4 (correspond to
553 vertex normal/weight)
554 attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
555 attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
556 attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
557 attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
558 generic attribs would require some more work (dma regions, renaming). */
560 /* may look different when using idx buf / input_route instead of se_vtx_fmt? */
561 vp
->inputs
[VERT_ATTRIB_POS
] = 0;
562 vp
->inputs
[VERT_ATTRIB_WEIGHT
] = 12;
563 vp
->inputs
[VERT_ATTRIB_NORMAL
] = 1;
564 vp
->inputs
[VERT_ATTRIB_COLOR0
] = 2;
565 vp
->inputs
[VERT_ATTRIB_COLOR1
] = 3;
566 vp
->inputs
[VERT_ATTRIB_FOG
] = 15;
567 vp
->inputs
[VERT_ATTRIB_TEX0
] = 6;
568 vp
->inputs
[VERT_ATTRIB_TEX1
] = 7;
569 vp
->inputs
[VERT_ATTRIB_TEX2
] = 8;
570 vp
->inputs
[VERT_ATTRIB_TEX3
] = 9;
571 vp
->inputs
[VERT_ATTRIB_TEX4
] = 10;
572 vp
->inputs
[VERT_ATTRIB_TEX5
] = 11;
573 /* attr 4,5 and 13 are only used with generic attribs.
574 Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
575 not possibe to use with vertex progs as it is lacking in vert prog specification) */
577 assert(mesa_vp
->Base
.OutputsWritten
& (1 << VERT_RESULT_HPOS
));
579 vp
->translated
= GL_TRUE
;
582 for(vpi
= mesa_vp
->Base
.Instructions
; vpi
->Opcode
!= OPCODE_END
; vpi
++, o_inst
++){
583 if (u_temp_i
< mesa_vp
->Base
.NumTemporaries
) {
584 if (R200_DEBUG
& DEBUG_FALLBACKS
) {
585 fprintf(stderr
, "Ran out of temps, num temps %d, us %d\n", mesa_vp
->Base
.NumTemporaries
, u_temp_i
);
589 u_temp_i
= R200_VSF_MAX_TEMPS
- 1;
590 if(o_inst
- vp
->instr
>= R200_VSF_MAX_INST
) {
591 mesa_vp
->Base
.NumNativeInstructions
= 129;
592 if (R200_DEBUG
& DEBUG_FALLBACKS
) {
593 fprintf(stderr
, "more than 128 native instructions\n");
598 operands
= op_operands(vpi
->Opcode
);
599 are_srcs_scalar
= operands
& SCALAR_FLAG
;
602 for(i
=0; i
< operands
; i
++)
603 src
[i
] = vpi
->SrcReg
[i
];
605 if(operands
== 3){ /* TODO: scalars */
606 if( CMP_SRCS(src
[1], src
[2]) || CMP_SRCS(src
[0], src
[2]) ){
607 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
,
608 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
611 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[2]),
612 SWIZZLE_X
, SWIZZLE_Y
,
613 SWIZZLE_Z
, SWIZZLE_W
,
614 t_src_class(src
[2].File
), VSF_FLAG_NONE
) | (src
[2].RelAddr
<< 4);
616 o_inst
->src1
= ZERO_SRC_0
;
617 o_inst
->src2
= UNUSED_SRC_1
;
620 src
[2].File
= PROGRAM_TEMPORARY
;
621 src
[2].Index
= u_temp_i
;
628 if( CMP_SRCS(src
[1], src
[0]) ){
629 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
,
630 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
633 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
634 SWIZZLE_X
, SWIZZLE_Y
,
635 SWIZZLE_Z
, SWIZZLE_W
,
636 t_src_class(src
[0].File
), VSF_FLAG_NONE
) | (src
[0].RelAddr
<< 4);
638 o_inst
->src1
= ZERO_SRC_0
;
639 o_inst
->src2
= UNUSED_SRC_1
;
642 src
[0].File
= PROGRAM_TEMPORARY
;
643 src
[0].Index
= u_temp_i
;
649 /* These ops need special handling. */
651 /* FIXME: ARL works fine, but negative offsets won't work - fglrx just sems to ignore neg offsets
652 which isn't quite correct... */
654 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ARL
, t_dst(&vpi
->DstReg
),
655 t_dst_mask(vpi
->DstReg
.WriteMask
));
656 o_inst
->src0
= t_src_scalar(vp
, &src
[0]);
657 o_inst
->src1
= UNUSED_SRC_0
;
658 o_inst
->src2
= UNUSED_SRC_1
;
662 /* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter).
663 So may need to insert additional instruction */
664 /* this appears to be different to r300 */
665 if ((src
[0].File
== src
[1].File
) &&
666 (src
[0].Index
== src
[1].Index
)) {
667 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_POW
, t_dst(&vpi
->DstReg
),
668 t_dst_mask(vpi
->DstReg
.WriteMask
));
669 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
670 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
672 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
674 t_src_class(src
[0].File
),
675 src
[0].NegateBase
) | (src
[0].RelAddr
<< 4);
676 o_inst
->src1
= UNUSED_SRC_0
;
677 o_inst
->src2
= UNUSED_SRC_0
;
680 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
,
681 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
683 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
684 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
685 SWIZZLE_ZERO
, SWIZZLE_ZERO
, SWIZZLE_ZERO
,
686 t_src_class(src
[0].File
),
687 src
[0].NegateBase
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[0].RelAddr
<< 4);
688 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
689 SWIZZLE_ZERO
, SWIZZLE_ZERO
,
690 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), SWIZZLE_ZERO
,
691 t_src_class(src
[1].File
),
692 src
[1].NegateBase
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[1].RelAddr
<< 4);
693 o_inst
->src2
= UNUSED_SRC_1
;
696 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_POW
, t_dst(&vpi
->DstReg
),
697 t_dst_mask(vpi
->DstReg
.WriteMask
));
698 o_inst
->src0
= MAKE_VSF_SOURCE(u_temp_i
,
705 o_inst
->src1
= UNUSED_SRC_0
;
706 o_inst
->src2
= UNUSED_SRC_0
;
711 case OPCODE_MOV
://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
712 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
, t_dst(&vpi
->DstReg
),
713 t_dst_mask(vpi
->DstReg
.WriteMask
));
714 o_inst
->src0
= t_src(vp
, &src
[0]);
715 o_inst
->src1
= ZERO_SRC_0
;
716 o_inst
->src2
= UNUSED_SRC_1
;
719 hw_op
=(src
[0].File
== PROGRAM_TEMPORARY
&&
720 src
[1].File
== PROGRAM_TEMPORARY
&&
721 src
[2].File
== PROGRAM_TEMPORARY
) ? R200_VPI_OUT_OP_MAD_2
: R200_VPI_OUT_OP_MAD
;
723 o_inst
->op
= MAKE_VSF_OP(hw_op
, t_dst(&vpi
->DstReg
),
724 t_dst_mask(vpi
->DstReg
.WriteMask
));
725 o_inst
->src0
= t_src(vp
, &src
[0]);
727 if ((o_inst
- vp
->instr
) == 31) {
728 /* fix up the broken vertex program of quake4 demo... */
729 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
730 SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
,
731 t_src_class(src
[1].File
),
732 src
[1].NegateBase
) | (src
[1].RelAddr
<< 4);
733 o_inst
->src2
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
734 SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
,
735 t_src_class(src
[1].File
),
736 src
[1].NegateBase
) | (src
[1].RelAddr
<< 4);
739 o_inst
->src1
= t_src(vp
, &src
[1]);
740 o_inst
->src2
= t_src(vp
, &src
[2]);
743 o_inst
->src1
= t_src(vp
, &src
[1]);
744 o_inst
->src2
= t_src(vp
, &src
[2]);
748 case OPCODE_DP3
://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
749 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_DOT
, t_dst(&vpi
->DstReg
),
750 t_dst_mask(vpi
->DstReg
.WriteMask
));
752 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
753 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
754 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
755 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
757 t_src_class(src
[0].File
),
758 src
[0].NegateBase
) | (src
[0].RelAddr
<< 4);
760 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
761 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
762 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)),
763 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)),
765 t_src_class(src
[1].File
),
766 src
[1].NegateBase
) | (src
[1].RelAddr
<< 4);
768 o_inst
->src2
= UNUSED_SRC_1
;
771 case OPCODE_SUB
://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
772 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
, t_dst(&vpi
->DstReg
),
773 t_dst_mask(vpi
->DstReg
.WriteMask
));
775 o_inst
->src0
= t_src(vp
, &src
[0]);
776 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
777 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
778 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)),
779 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)),
780 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)),
781 t_src_class(src
[1].File
),
782 (!src
[1].NegateBase
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[1].RelAddr
<< 4);
783 o_inst
->src2
= UNUSED_SRC_1
;
786 case OPCODE_ABS
://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
787 o_inst
->op
=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX
, t_dst(&vpi
->DstReg
),
788 t_dst_mask(vpi
->DstReg
.WriteMask
));
790 o_inst
->src0
=t_src(vp
, &src
[0]);
791 o_inst
->src1
=MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
792 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
793 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
794 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
795 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)),
796 t_src_class(src
[0].File
),
797 (!src
[0].NegateBase
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[0].RelAddr
<< 4);
798 o_inst
->src2
= UNUSED_SRC_1
;
802 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
803 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
805 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_FRC
,
806 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
807 t_dst_mask(vpi
->DstReg
.WriteMask
));
809 o_inst
->src0
= t_src(vp
, &src
[0]);
810 o_inst
->src1
= UNUSED_SRC_0
;
811 o_inst
->src2
= UNUSED_SRC_1
;
814 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
, t_dst(&vpi
->DstReg
),
815 t_dst_mask(vpi
->DstReg
.WriteMask
));
817 o_inst
->src0
= t_src(vp
, &src
[0]);
818 o_inst
->src1
= MAKE_VSF_SOURCE(u_temp_i
,
824 /* Not 100% sure about this */
825 (!src
[0].NegateBase
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
/*VSF_FLAG_ALL*/);
827 o_inst
->src2
= UNUSED_SRC_0
;
831 case OPCODE_LG2
:// LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
832 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_LG2
, t_dst(&vpi
->DstReg
),
833 t_dst_mask(vpi
->DstReg
.WriteMask
));
835 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
836 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
837 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
838 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
839 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
840 t_src_class(src
[0].File
),
841 src
[0].NegateBase
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[0].RelAddr
<< 4);
842 o_inst
->src1
= UNUSED_SRC_0
;
843 o_inst
->src2
= UNUSED_SRC_0
;
846 case OPCODE_LIT
://LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
847 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_LIT
, t_dst(&vpi
->DstReg
),
848 t_dst_mask(vpi
->DstReg
.WriteMask
));
849 /* r200 in contrast to r300 does not seem to need any complicated setup,
850 its LIT instruction is "more native" */
851 o_inst
->src0
= t_src(vp
, &src
[0]);
852 o_inst
->src1
= UNUSED_SRC_0
;
853 o_inst
->src2
= UNUSED_SRC_0
;
856 case OPCODE_DPH
://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
857 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_DOT
, t_dst(&vpi
->DstReg
),
858 t_dst_mask(vpi
->DstReg
.WriteMask
));
860 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
861 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
862 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
863 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
864 VSF_IN_COMPONENT_ONE
,
865 t_src_class(src
[0].File
),
866 src
[0].NegateBase
) | (src
[0].RelAddr
<< 4);
867 o_inst
->src1
= t_src(vp
, &src
[1]);
868 o_inst
->src2
= UNUSED_SRC_1
;
872 /* mul r0, r1.yzxw, r2.zxyw
873 mad r0, -r2.yzxw, r1.zxyw, r0
874 NOTE: might need MAD_2
877 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_MUL
,
878 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
879 t_dst_mask(vpi
->DstReg
.WriteMask
));
881 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
882 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
883 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)), // z
884 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
885 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
886 t_src_class(src
[0].File
),
887 src
[0].NegateBase
) | (src
[0].RelAddr
<< 4);
889 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
890 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), // z
891 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), // x
892 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)), // y
893 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)), // w
894 t_src_class(src
[1].File
),
895 src
[1].NegateBase
) | (src
[1].RelAddr
<< 4);
897 o_inst
->src2
= UNUSED_SRC_1
;
901 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_MAD
, t_dst(&vpi
->DstReg
),
902 t_dst_mask(vpi
->DstReg
.WriteMask
));
904 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
905 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)), // y
906 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), // z
907 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), // x
908 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)), // w
909 t_src_class(src
[1].File
),
910 (!src
[1].NegateBase
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[1].RelAddr
<< 4);
912 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
913 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)), // z
914 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
915 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
916 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
917 t_src_class(src
[0].File
),
918 src
[0].NegateBase
) | (src
[0].RelAddr
<< 4);
920 o_inst
->src2
= MAKE_VSF_SOURCE(u_temp_i
+1,
931 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
, t_dst(&vpi
->DstReg
),
932 t_dst_mask(vpi
->DstReg
.WriteMask
));
933 o_inst
->src0
= t_src(vp
, &src
[0]);
934 o_inst
->src1
= ZERO_SRC_0
;
935 o_inst
->src2
= UNUSED_SRC_1
;
939 if (R200_DEBUG
& DEBUG_FALLBACKS
) {
940 fprintf(stderr
, "Don't know how to handle op %d yet\n", vpi
->Opcode
);
950 o_inst
->op
= MAKE_VSF_OP(t_opcode(vpi
->Opcode
), t_dst(&vpi
->DstReg
),
951 t_dst_mask(vpi
->DstReg
.WriteMask
));
956 o_inst
->src0
= t_src_scalar(vp
, &src
[0]);
957 o_inst
->src1
= UNUSED_SRC_0
;
958 o_inst
->src2
= UNUSED_SRC_1
;
962 o_inst
->src0
= t_src_scalar(vp
, &src
[0]);
963 o_inst
->src1
= t_src_scalar(vp
, &src
[1]);
964 o_inst
->src2
= UNUSED_SRC_1
;
968 o_inst
->src0
= t_src_scalar(vp
, &src
[0]);
969 o_inst
->src1
= t_src_scalar(vp
, &src
[1]);
970 o_inst
->src2
= t_src_scalar(vp
, &src
[2]);
974 fprintf(stderr
, "illegal number of operands %lu\n", operands
);
981 o_inst
->src0
= t_src(vp
, &src
[0]);
982 o_inst
->src1
= UNUSED_SRC_0
;
983 o_inst
->src2
= UNUSED_SRC_1
;
987 o_inst
->src0
= t_src(vp
, &src
[0]);
988 o_inst
->src1
= t_src(vp
, &src
[1]);
989 o_inst
->src2
= UNUSED_SRC_1
;
993 o_inst
->src0
= t_src(vp
, &src
[0]);
994 o_inst
->src1
= t_src(vp
, &src
[1]);
995 o_inst
->src2
= t_src(vp
, &src
[2]);
999 fprintf(stderr
, "illegal number of operands %lu\n", operands
);
1005 if ((o_inst
->op
& R200_VSF_OUT_CLASS_MASK
) == R200_VSF_OUT_CLASS_RESULT_POS
) {
1006 vp
->pos_end
= (o_inst
- vp
->instr
);
1010 /* need to test again since some instructions require more than one (up to 3) native inst */
1011 if(o_inst
- vp
->instr
> R200_VSF_MAX_INST
) {
1012 mesa_vp
->Base
.NumNativeInstructions
= 129;
1013 if (R200_DEBUG
& DEBUG_FALLBACKS
) {
1014 fprintf(stderr
, "more than 128 native instructions\n");
1018 vp
->native
= GL_TRUE
;
1019 mesa_vp
->Base
.NumNativeInstructions
= (o_inst
- vp
->instr
);
1021 fprintf(stderr
, "hw program:\n");
1022 for(i
=0; i
< vp
->program
.length
; i
++)
1023 fprintf(stderr
, "%08x\n", vp
->instr
[i
]);
1028 void r200SetupVertexProg( GLcontext
*ctx
) {
1029 r200ContextPtr rmesa
= R200_CONTEXT(ctx
);
1030 struct r200_vertex_program
*vp
= (struct r200_vertex_program
*)ctx
->VertexProgram
.Current
;
1034 if (!vp
->translated
) {
1035 rmesa
->curr_vp_hw
= NULL
;
1036 r200_translate_vertex_program(vp
);
1038 /* could optimize setting up vertex progs away for non-tcl hw */
1039 fallback
= !(vp
->native
&& r200VertexProgUpdateParams(ctx
, vp
) &&
1040 rmesa
->r200Screen
->drmSupportsVertexProgram
);
1041 TCL_FALLBACK(ctx
, R200_TCL_FALLBACK_VERTEX_PROGRAM
, fallback
);
1042 if (fallback
) return;
1044 R200_STATECHANGE( rmesa
, pvs
);
1046 rmesa
->hw
.pvs
.cmd
[PVS_CNTL_1
] = (0 << R200_PVS_CNTL_1_PROGRAM_START_SHIFT
) |
1047 ((vp
->mesa_program
.Base
.NumNativeInstructions
- 1) << R200_PVS_CNTL_1_PROGRAM_END_SHIFT
) |
1048 (vp
->pos_end
<< R200_PVS_CNTL_1_POS_END_SHIFT
);
1049 rmesa
->hw
.pvs
.cmd
[PVS_CNTL_2
] = (0 << R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT
) |
1050 (vp
->mesa_program
.Base
.NumNativeParameters
<< R200_PVS_CNTL_2_PARAM_COUNT_SHIFT
);
1052 /* maybe user clip planes just work with vertex progs... untested */
1053 if (ctx
->Transform
.ClipPlanesEnabled
) {
1054 R200_STATECHANGE( rmesa
, tcl
);
1055 if (vp
->mesa_program
.IsPositionInvariant
) {
1056 rmesa
->hw
.tcl
.cmd
[TCL_UCP_VERT_BLEND_CTL
] |= (ctx
->Transform
.ClipPlanesEnabled
<< 2);
1059 rmesa
->hw
.tcl
.cmd
[TCL_UCP_VERT_BLEND_CTL
] &= ~(0xfc);
1063 if (vp
!= rmesa
->curr_vp_hw
) {
1064 GLuint count
= vp
->mesa_program
.Base
.NumNativeInstructions
;
1065 drm_radeon_cmd_header_t tmp
;
1067 R200_STATECHANGE( rmesa
, vpi
[0] );
1068 R200_STATECHANGE( rmesa
, vpi
[1] );
1070 /* FIXME: what about using a memcopy... */
1071 for (i
= 0; (i
< 64) && i
< count
; i
++) {
1072 rmesa
->hw
.vpi
[0].cmd
[VPI_OPDST_0
+ 4 * i
] = vp
->instr
[i
].op
;
1073 rmesa
->hw
.vpi
[0].cmd
[VPI_SRC0_0
+ 4 * i
] = vp
->instr
[i
].src0
;
1074 rmesa
->hw
.vpi
[0].cmd
[VPI_SRC1_0
+ 4 * i
] = vp
->instr
[i
].src1
;
1075 rmesa
->hw
.vpi
[0].cmd
[VPI_SRC2_0
+ 4 * i
] = vp
->instr
[i
].src2
;
1077 /* hack up the cmd_size so not the whole state atom is emitted always.
1078 This may require some more thought, we may emit half progs on lost state, but
1079 hopefully it won't matter?
1080 WARNING: must not use R200_DB_STATECHANGE, this will produce bogus (and rejected)
1081 packet emits (due to the mismatched cmd_size and count in cmd/last_cmd) */
1082 rmesa
->hw
.vpi
[0].cmd_size
= 1 + 4 * ((count
> 64) ? 64 : count
);
1083 tmp
.i
= rmesa
->hw
.vpi
[0].cmd
[VPI_CMD_0
];
1084 tmp
.veclinear
.count
= (count
> 64) ? 64 : count
;
1085 rmesa
->hw
.vpi
[0].cmd
[VPI_CMD_0
] = tmp
.i
;
1087 for (i
= 0; i
< (count
- 64); i
++) {
1088 rmesa
->hw
.vpi
[1].cmd
[VPI_OPDST_0
+ 4 * i
] = vp
->instr
[i
+ 64].op
;
1089 rmesa
->hw
.vpi
[1].cmd
[VPI_SRC0_0
+ 4 * i
] = vp
->instr
[i
+ 64].src0
;
1090 rmesa
->hw
.vpi
[1].cmd
[VPI_SRC1_0
+ 4 * i
] = vp
->instr
[i
+ 64].src1
;
1091 rmesa
->hw
.vpi
[1].cmd
[VPI_SRC2_0
+ 4 * i
] = vp
->instr
[i
+ 64].src2
;
1093 rmesa
->hw
.vpi
[1].cmd_size
= 1 + 4 * (count
- 64);
1094 tmp
.i
= rmesa
->hw
.vpi
[1].cmd
[VPI_CMD_0
];
1095 tmp
.veclinear
.count
= count
- 64;
1096 rmesa
->hw
.vpi
[1].cmd
[VPI_CMD_0
] = tmp
.i
;
1098 rmesa
->curr_vp_hw
= vp
;
1103 static void r200BindProgram(GLcontext
*ctx
, GLenum target
, struct program
*prog
)
1105 r200ContextPtr rmesa
= R200_CONTEXT(ctx
);
1108 case GL_VERTEX_PROGRAM_ARB
:
1109 rmesa
->curr_vp_hw
= NULL
;
1112 _mesa_problem(ctx
, "Target not supported yet!");
1117 static struct program
*r200NewProgram(GLcontext
*ctx
, GLenum target
, GLuint id
)
1119 struct r200_vertex_program
*vp
;
1122 case GL_VERTEX_PROGRAM_ARB
:
1123 vp
= CALLOC_STRUCT(r200_vertex_program
);
1124 return _mesa_init_vertex_program(ctx
, &vp
->mesa_program
, target
, id
);
1125 case GL_FRAGMENT_PROGRAM_ARB
:
1126 case GL_FRAGMENT_PROGRAM_NV
:
1127 return _mesa_init_fragment_program( ctx
, CALLOC_STRUCT(fragment_program
), target
, id
);
1129 _mesa_problem(ctx
, "Bad target in r200NewProgram");
1135 static void r200DeleteProgram(GLcontext
*ctx
, struct program
*prog
)
1137 _mesa_delete_program(ctx
, prog
);
1140 static void r200ProgramStringNotify(GLcontext
*ctx
, GLenum target
, struct program
*prog
)
1142 struct r200_vertex_program
*vp
= (void *)prog
;
1145 case GL_VERTEX_PROGRAM_ARB
:
1146 vp
->translated
= GL_FALSE
;
1147 memset(&vp
->translated
, 0, sizeof(struct r200_vertex_program
) - sizeof(struct vertex_program
));
1148 /*r200_translate_vertex_shader(vp);*/
1151 /* need this for tcl fallbacks */
1152 _tnl_program_string(ctx
, target
, prog
);
1155 static GLboolean
r200IsProgramNative(GLcontext
*ctx
, GLenum target
, struct program
*prog
)
1157 struct r200_vertex_program
*vp
= (void *)prog
;
1160 case GL_VERTEX_STATE_PROGRAM_NV
:
1161 case GL_VERTEX_PROGRAM_ARB
:
1162 if (!vp
->translated
) {
1163 r200_translate_vertex_program(vp
);
1165 /* does not take parameters etc. into account */
1168 _mesa_problem(ctx
, "Bad target in r200NewProgram");
1173 void r200InitShaderFuncs(struct dd_function_table
*functions
)
1175 functions
->NewProgram
= r200NewProgram
;
1176 functions
->BindProgram
= r200BindProgram
;
1177 functions
->DeleteProgram
= r200DeleteProgram
;
1178 functions
->ProgramStringNotify
= r200ProgramStringNotify
;
1179 functions
->IsProgramNative
= r200IsProgramNative
;