1 /**************************************************************************
3 Copyright (C) 2005 Aapo Tahkola.
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Aapo Tahkola <aet@rasterburn.org>
31 * Roland Scheidegger <rscheidegger_lists@hispeed.ch>
38 #include "r200_context.h"
39 #include "r200_vertprog.h"
40 #include "r200_ioctl.h"
42 #include "program_instruction.h"
45 #if SWIZZLE_X != VSF_IN_COMPONENT_X || \
46 SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
47 SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
48 SWIZZLE_W != VSF_IN_COMPONENT_W || \
49 SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
50 SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
51 WRITEMASK_X != VSF_FLAG_X || \
52 WRITEMASK_Y != VSF_FLAG_Y || \
53 WRITEMASK_Z != VSF_FLAG_Z || \
54 WRITEMASK_W != VSF_FLAG_W
55 #error Cannot change these!
58 #define SCALAR_FLAG (1<<31)
59 #define FLAG_MASK (1<<31)
60 #define OP_MASK (0xf) /* we are unlikely to have more than 15 */
61 #define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
66 unsigned long ip
; /* number of input operands and flags */
70 OPN(ARL
, 1|SCALAR_FLAG
),
75 OPN(EX2
, 1|SCALAR_FLAG
),
76 OPN(EXP
, 1|SCALAR_FLAG
),
79 OPN(LG2
, 1|SCALAR_FLAG
),
81 OPN(LOG
, 1|SCALAR_FLAG
),
87 OPN(POW
, 2|SCALAR_FLAG
),
88 OPN(RCP
, 1|SCALAR_FLAG
),
89 OPN(RSQ
, 1|SCALAR_FLAG
),
100 static GLboolean
r200VertexProgUpdateParams(GLcontext
*ctx
, struct r200_vertex_program
*vp
)
102 r200ContextPtr rmesa
= R200_CONTEXT( ctx
);
103 GLfloat
*fcmd
= (GLfloat
*)&rmesa
->hw
.vpp
[0].cmd
[VPP_CMD_0
+ 1];
105 struct gl_vertex_program
*mesa_vp
= &vp
->mesa_program
;
106 struct gl_program_parameter_list
*paramList
;
107 drm_radeon_cmd_header_t tmp
;
109 R200_STATECHANGE( rmesa
, vpp
[0] );
110 R200_STATECHANGE( rmesa
, vpp
[1] );
111 assert(mesa_vp
->Base
.Parameters
);
112 _mesa_load_state_parameters(ctx
, mesa_vp
->Base
.Parameters
);
113 paramList
= mesa_vp
->Base
.Parameters
;
115 if(paramList
->NumParameters
> R200_VSF_MAX_PARAM
){
116 fprintf(stderr
, "%s:Params exhausted\n", __FUNCTION__
);
120 for(pi
= 0; pi
< paramList
->NumParameters
; pi
++) {
121 switch(paramList
->Parameters
[pi
].Type
) {
122 case PROGRAM_STATE_VAR
:
123 case PROGRAM_NAMED_PARAM
:
124 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
125 case PROGRAM_CONSTANT
:
126 *fcmd
++ = paramList
->ParameterValues
[pi
][0];
127 *fcmd
++ = paramList
->ParameterValues
[pi
][1];
128 *fcmd
++ = paramList
->ParameterValues
[pi
][2];
129 *fcmd
++ = paramList
->ParameterValues
[pi
][3];
132 _mesa_problem(NULL
, "Bad param type in %s", __FUNCTION__
);
136 fcmd
= (GLfloat
*)&rmesa
->hw
.vpp
[1].cmd
[VPP_CMD_0
+ 1];
139 /* hack up the cmd_size so not the whole state atom is emitted always. */
140 rmesa
->hw
.vpp
[0].cmd_size
=
141 1 + 4 * ((paramList
->NumParameters
> 96) ? 96 : paramList
->NumParameters
);
142 tmp
.i
= rmesa
->hw
.vpp
[0].cmd
[VPP_CMD_0
];
143 tmp
.veclinear
.count
= (paramList
->NumParameters
> 96) ? 96 : paramList
->NumParameters
;
144 rmesa
->hw
.vpp
[0].cmd
[VPP_CMD_0
] = tmp
.i
;
145 if (paramList
->NumParameters
> 96) {
146 rmesa
->hw
.vpp
[1].cmd_size
= 1 + 4 * (paramList
->NumParameters
- 96);
147 tmp
.i
= rmesa
->hw
.vpp
[1].cmd
[VPP_CMD_0
];
148 tmp
.veclinear
.count
= paramList
->NumParameters
- 96;
149 rmesa
->hw
.vpp
[1].cmd
[VPP_CMD_0
] = tmp
.i
;
154 static __inline
unsigned long t_dst_mask(GLuint mask
)
156 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
157 return mask
& VSF_FLAG_ALL
;
160 static unsigned long t_dst(struct prog_dst_register
*dst
)
163 case PROGRAM_TEMPORARY
:
164 return ((dst
->Index
<< R200_VPI_OUT_REG_INDEX_SHIFT
)
165 | R200_VSF_OUT_CLASS_TMP
);
167 switch (dst
->Index
) {
168 case VERT_RESULT_HPOS
:
169 return R200_VSF_OUT_CLASS_RESULT_POS
;
170 case VERT_RESULT_COL0
:
171 return R200_VSF_OUT_CLASS_RESULT_COLOR
;
172 case VERT_RESULT_COL1
:
173 return ((1 << R200_VPI_OUT_REG_INDEX_SHIFT
)
174 | R200_VSF_OUT_CLASS_RESULT_COLOR
);
175 case VERT_RESULT_FOGC
:
176 return R200_VSF_OUT_CLASS_RESULT_FOGC
;
177 case VERT_RESULT_TEX0
:
178 case VERT_RESULT_TEX1
:
179 case VERT_RESULT_TEX2
:
180 case VERT_RESULT_TEX3
:
181 case VERT_RESULT_TEX4
:
182 case VERT_RESULT_TEX5
:
183 return (((dst
->Index
- VERT_RESULT_TEX0
) << R200_VPI_OUT_REG_INDEX_SHIFT
)
184 | R200_VSF_OUT_CLASS_RESULT_TEXC
);
185 case VERT_RESULT_PSIZ
:
186 return R200_VSF_OUT_CLASS_RESULT_POINTSIZE
;
188 fprintf(stderr
, "problem in %s, unknown dst output reg %d\n", __FUNCTION__
, dst
->Index
);
192 case PROGRAM_ADDRESS
:
193 assert (dst
->Index
== 0);
194 return R200_VSF_OUT_CLASS_ADDR
;
196 fprintf(stderr
, "problem in %s, unknown register type %d\n", __FUNCTION__
, dst
->File
);
202 static unsigned long t_src_class(enum register_file file
)
206 case PROGRAM_TEMPORARY
:
207 return VSF_IN_CLASS_TMP
;
210 return VSF_IN_CLASS_ATTR
;
212 case PROGRAM_LOCAL_PARAM
:
213 case PROGRAM_ENV_PARAM
:
214 case PROGRAM_NAMED_PARAM
:
215 case PROGRAM_STATE_VAR
:
216 return VSF_IN_CLASS_PARAM
;
219 case PROGRAM_WRITE_ONLY:
220 case PROGRAM_ADDRESS:
223 fprintf(stderr
, "problem in %s", __FUNCTION__
);
228 static __inline
unsigned long t_swizzle(GLubyte swizzle
)
230 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
235 static void vp_dump_inputs(struct r200_vertex_program
*vp
, char *caller
)
240 fprintf(stderr
, "vp null in call to %s from %s\n", __FUNCTION__
, caller
);
244 fprintf(stderr
, "%s:<", caller
);
245 for(i
=0; i
< VERT_ATTRIB_MAX
; i
++)
246 fprintf(stderr
, "%d ", vp
->inputs
[i
]);
247 fprintf(stderr
, ">\n");
252 static unsigned long t_src_index(struct r200_vertex_program
*vp
, struct prog_src_register
*src
)
258 if(src
->File
== PROGRAM_INPUT
){
259 /* if(vp->inputs[src->Index] != -1)
260 return vp->inputs[src->Index];
262 for(i=0; i < VERT_ATTRIB_MAX; i++)
263 if(vp->inputs[i] > max_reg)
264 max_reg = vp->inputs[i];
266 vp->inputs[src->Index] = max_reg+1;*/
268 //vp_dump_inputs(vp, __FUNCTION__);
269 assert(vp
->inputs
[src
->Index
] != -1);
270 return vp
->inputs
[src
->Index
];
272 if (src
->Index
< 0) {
273 fprintf(stderr
, "WARNING negative offsets for indirect addressing do not work\n");
280 static unsigned long t_src(struct r200_vertex_program
*vp
, struct prog_src_register
*src
)
283 return MAKE_VSF_SOURCE(t_src_index(vp
, src
),
284 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
285 t_swizzle(GET_SWZ(src
->Swizzle
, 1)),
286 t_swizzle(GET_SWZ(src
->Swizzle
, 2)),
287 t_swizzle(GET_SWZ(src
->Swizzle
, 3)),
288 t_src_class(src
->File
),
289 src
->NegateBase
) | (src
->RelAddr
<< 4);
292 static unsigned long t_src_scalar(struct r200_vertex_program
*vp
, struct prog_src_register
*src
)
295 return MAKE_VSF_SOURCE(t_src_index(vp
, src
),
296 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
297 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
298 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
299 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
300 t_src_class(src
->File
),
301 src
->NegateBase
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
->RelAddr
<< 4);
304 static unsigned long t_opcode(enum prog_opcode opcode
)
308 case OPCODE_ADD
: return R200_VPI_OUT_OP_ADD
;
309 /* FIXME: ARL works fine, but negative offsets won't work - fglrx just
310 * seems to ignore neg offsets which isn't quite correct...
312 case OPCODE_ARL
: return R200_VPI_OUT_OP_ARL
;
313 case OPCODE_DP4
: return R200_VPI_OUT_OP_DOT
;
314 case OPCODE_DST
: return R200_VPI_OUT_OP_DST
;
315 case OPCODE_EX2
: return R200_VPI_OUT_OP_EX2
;
316 case OPCODE_EXP
: return R200_VPI_OUT_OP_EXP
;
317 case OPCODE_FRC
: return R200_VPI_OUT_OP_FRC
;
318 case OPCODE_LG2
: return R200_VPI_OUT_OP_LG2
;
319 case OPCODE_LIT
: return R200_VPI_OUT_OP_LIT
;
320 case OPCODE_LOG
: return R200_VPI_OUT_OP_LOG
;
321 case OPCODE_MAX
: return R200_VPI_OUT_OP_MAX
;
322 case OPCODE_MIN
: return R200_VPI_OUT_OP_MIN
;
323 case OPCODE_MUL
: return R200_VPI_OUT_OP_MUL
;
324 case OPCODE_RCP
: return R200_VPI_OUT_OP_RCP
;
325 case OPCODE_RSQ
: return R200_VPI_OUT_OP_RSQ
;
326 case OPCODE_SGE
: return R200_VPI_OUT_OP_SGE
;
327 case OPCODE_SLT
: return R200_VPI_OUT_OP_SLT
;
330 fprintf(stderr
, "%s: Should not be called with opcode %d!", __FUNCTION__
, opcode
);
336 static unsigned long op_operands(enum prog_opcode opcode
)
340 /* Can we trust mesas opcodes to be in order ? */
341 for(i
=0; i
< sizeof(op_names
) / sizeof(*op_names
); i
++)
342 if(op_names
[i
].opcode
== opcode
)
343 return op_names
[i
].ip
;
345 fprintf(stderr
, "op %d not found in op_names\n", opcode
);
350 /* TODO: Get rid of t_src_class call */
351 #define CMP_SRCS(a, b) (((a.RelAddr != b.RelAddr) || (a.Index != b.Index)) && \
352 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
353 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
354 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
355 t_src_class(b.File) == VSF_IN_CLASS_ATTR))) \
357 /* fglrx on rv250 codes up unused sources as follows:
358 unused but necessary sources are same as previous source, zero-ed out.
359 unnecessary sources are same as previous source but with VSF_IN_CLASS_NONE set.
360 i.e. an add (2 args) has its 2nd arg (if you use it as mov) zero-ed out, and 3rd arg
361 set to VSF_IN_CLASS_NONE. Not sure if strictly necessary. */
363 /* use these simpler definitions. Must obviously not be used with not yet set up regs.
364 Those are NOT semantically equivalent to the r300 ones, requires code changes */
365 #define ZERO_SRC_0 (((o_inst->src0 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
366 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
367 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
368 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
369 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
371 #define ZERO_SRC_1 (((o_inst->src1 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
372 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
373 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
374 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
375 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
377 #define ZERO_SRC_2 (((o_inst->src2 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
378 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
379 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
380 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
381 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
383 #define UNUSED_SRC_0 ((o_inst->src0 & ~15) | 9)
385 #define UNUSED_SRC_1 ((o_inst->src1 & ~15) | 9)
387 #define UNUSED_SRC_2 ((o_inst->src2 & ~15) | 9)
390 /* DP4 version seems to trigger some hw peculiarity - fglrx does this on r200 however */
395 * Generate an R200 vertex program from Mesa's internal representation.
397 * \return GL_TRUE for success, GL_FALSE for failure.
399 static GLboolean
r200_translate_vertex_program(struct r200_vertex_program
*vp
)
401 struct gl_vertex_program
*mesa_vp
= &vp
->mesa_program
;
402 struct prog_instruction
*vpi
;
404 VERTEX_SHADER_INSTRUCTION
*o_inst
;
405 unsigned long operands
;
409 vp
->native
= GL_FALSE
;
411 if (mesa_vp
->Base
.NumInstructions
== 0)
414 if ((mesa_vp
->Base
.InputsRead
&
415 ~(VERT_BIT_POS
| VERT_BIT_NORMAL
| VERT_BIT_COLOR0
| VERT_BIT_COLOR1
|
416 VERT_BIT_FOG
| VERT_BIT_TEX0
| VERT_BIT_TEX1
| VERT_BIT_TEX2
|
417 VERT_BIT_TEX3
| VERT_BIT_TEX4
| VERT_BIT_TEX5
)) != 0) {
418 if (R200_DEBUG
& DEBUG_FALLBACKS
) {
419 fprintf(stderr
, "can't handle vert prog inputs 0x%x\n",
420 mesa_vp
->Base
.InputsRead
);
425 if (mesa_vp
->IsNVProgram
) {
426 /* subtle differences in spec like guaranteed initialized regs could cause
427 headaches. Might want to remove the driconf option to enable it completely */
430 /* Initial value should be last tmp reg that hw supports.
431 Strangely enough r300 doesnt mind even though these would be out of range.
432 Smart enough to realize that it doesnt need it? */
433 int u_temp_i
= R200_VSF_MAX_TEMPS
- 1;
434 struct prog_src_register src
[3];
436 /* FIXME: is changing the prog safe to do here? */
437 if (mesa_vp
->IsPositionInvariant
) {
438 struct gl_program_parameter_list
*paramList
;
439 GLint tokens
[6] = { STATE_MATRIX
, STATE_MVP
, 0, 0, 0, STATE_MATRIX
};
442 tokens
[5] = STATE_MATRIX
;
444 tokens
[5] = STATE_MATRIX_TRANSPOSE
;
446 paramList
= mesa_vp
->Base
.Parameters
;
448 vpi
= malloc((mesa_vp
->Base
.NumInstructions
+ 4) * sizeof(struct prog_instruction
));
449 memset(vpi
, 0, 4 * sizeof(struct prog_instruction
));
451 /* emit four dot product instructions to do MVP transformation */
452 for (i
=0; i
< 4; i
++) {
454 tokens
[3] = tokens
[4] = i
;
455 idx
= _mesa_add_state_reference(paramList
, tokens
);
457 vpi
[i
].Opcode
= OPCODE_DP4
;
458 vpi
[i
].StringPos
= 0;
461 vpi
[i
].DstReg
.File
= PROGRAM_OUTPUT
;
462 vpi
[i
].DstReg
.Index
= VERT_RESULT_HPOS
;
463 vpi
[i
].DstReg
.WriteMask
= 1 << i
;
464 vpi
[i
].DstReg
.CondMask
= COND_TR
;
466 vpi
[i
].SrcReg
[0].File
= PROGRAM_STATE_VAR
;
467 vpi
[i
].SrcReg
[0].Index
= idx
;
468 vpi
[i
].SrcReg
[0].Swizzle
= MAKE_SWIZZLE4(SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_W
);
470 vpi
[i
].SrcReg
[1].File
= PROGRAM_INPUT
;
471 vpi
[i
].SrcReg
[1].Index
= VERT_ATTRIB_POS
;
472 vpi
[i
].SrcReg
[1].Swizzle
= MAKE_SWIZZLE4(SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_W
);
475 vpi
[i
].Opcode
= OPCODE_MUL
;
477 vpi
[i
].Opcode
= OPCODE_MAD
;
479 vpi
[i
].StringPos
= 0;
483 vpi
[i
].DstReg
.File
= PROGRAM_OUTPUT
;
485 vpi
[i
].DstReg
.File
= PROGRAM_TEMPORARY
;
486 vpi
[i
].DstReg
.Index
= 0;
487 vpi
[i
].DstReg
.WriteMask
= 0xf;
488 vpi
[i
].DstReg
.CondMask
= COND_TR
;
490 vpi
[i
].SrcReg
[0].File
= PROGRAM_STATE_VAR
;
491 vpi
[i
].SrcReg
[0].Index
= idx
;
492 vpi
[i
].SrcReg
[0].Swizzle
= MAKE_SWIZZLE4(SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_W
);
494 vpi
[i
].SrcReg
[1].File
= PROGRAM_INPUT
;
495 vpi
[i
].SrcReg
[1].Index
= VERT_ATTRIB_POS
;
496 vpi
[i
].SrcReg
[1].Swizzle
= MAKE_SWIZZLE4(i
, i
, i
, i
);
499 vpi
[i
].SrcReg
[2].File
= PROGRAM_TEMPORARY
;
500 vpi
[i
].SrcReg
[2].Index
= 0;
501 vpi
[i
].SrcReg
[2].Swizzle
= MAKE_SWIZZLE4(SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_W
);
506 /* now append original program after our new instructions */
507 memcpy(&vpi
[i
], mesa_vp
->Base
.Instructions
, mesa_vp
->Base
.NumInstructions
* sizeof(struct prog_instruction
));
509 /* deallocate original program */
510 free(mesa_vp
->Base
.Instructions
);
512 /* install new program */
513 mesa_vp
->Base
.Instructions
= vpi
;
515 mesa_vp
->Base
.NumInstructions
+= 4;
516 vpi
= &mesa_vp
->Base
.Instructions
[mesa_vp
->Base
.NumInstructions
-1];
518 assert(vpi
->Opcode
== OPCODE_END
);
520 mesa_vp
->Base
.InputsRead
|= (1 << VERT_ATTRIB_POS
);
521 mesa_vp
->Base
.OutputsWritten
|= (1 << VERT_RESULT_HPOS
);
523 //fprintf(stderr, "IsPositionInvariant is set!\n");
524 //_mesa_print_program(&mesa_vp->Base);
528 mesa_vp
->Base
.NumNativeInstructions
= 0;
529 if (mesa_vp
->Base
.Parameters
)
530 mesa_vp
->Base
.NumNativeParameters
= mesa_vp
->Base
.Parameters
->NumParameters
;
532 mesa_vp
->Base
.NumNativeParameters
= 0;
534 for(i
=0; i
< VERT_ATTRIB_MAX
; i
++)
536 /* fglrx uses fixed inputs as follows for conventional attribs.
537 generic attribs use non-fixed assignment, fglrx will always use the lowest attrib values available.
538 There are 12 generic attribs possible, corresponding to attrib 0, 2-11 and 13 in a hw vertex prog.
539 attr 1 and 12 are not available for generic attribs as those cannot be made vec4 (correspond to
540 vertex normal/weight)
541 attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
542 attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
543 attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
544 attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
545 generic attribs would require some more work (dma regions, renaming). */
547 /* may look different when using idx buf / input_route instead of se_vtx_fmt? */
548 vp
->inputs
[VERT_ATTRIB_POS
] = 0;
549 vp
->inputs
[VERT_ATTRIB_WEIGHT
] = 12;
550 vp
->inputs
[VERT_ATTRIB_NORMAL
] = 1;
551 vp
->inputs
[VERT_ATTRIB_COLOR0
] = 2;
552 vp
->inputs
[VERT_ATTRIB_COLOR1
] = 3;
553 vp
->inputs
[VERT_ATTRIB_FOG
] = 15;
554 vp
->inputs
[VERT_ATTRIB_TEX0
] = 6;
555 vp
->inputs
[VERT_ATTRIB_TEX1
] = 7;
556 vp
->inputs
[VERT_ATTRIB_TEX2
] = 8;
557 vp
->inputs
[VERT_ATTRIB_TEX3
] = 9;
558 vp
->inputs
[VERT_ATTRIB_TEX4
] = 10;
559 vp
->inputs
[VERT_ATTRIB_TEX5
] = 11;
560 /* attr 4,5 and 13 are only used with generic attribs.
561 Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
562 not possibe to use with vertex progs as it is lacking in vert prog specification) */
564 assert(mesa_vp
->Base
.OutputsWritten
& (1 << VERT_RESULT_HPOS
));
566 vp
->translated
= GL_TRUE
;
569 for(vpi
= mesa_vp
->Base
.Instructions
; vpi
->Opcode
!= OPCODE_END
; vpi
++, o_inst
++){
570 operands
= op_operands(vpi
->Opcode
);
571 are_srcs_scalar
= operands
& SCALAR_FLAG
;
574 for(i
= 0; i
< operands
; i
++)
575 src
[i
] = vpi
->SrcReg
[i
];
578 if( CMP_SRCS(src
[1], src
[2]) || CMP_SRCS(src
[0], src
[2]) ){
579 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
,
580 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
583 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[2]),
584 SWIZZLE_X
, SWIZZLE_Y
,
585 SWIZZLE_Z
, SWIZZLE_W
,
586 t_src_class(src
[2].File
), VSF_FLAG_NONE
) | (src
[2].RelAddr
<< 4);
588 o_inst
->src1
= ZERO_SRC_0
;
589 o_inst
->src2
= UNUSED_SRC_1
;
592 src
[2].File
= PROGRAM_TEMPORARY
;
593 src
[2].Index
= u_temp_i
;
600 if( CMP_SRCS(src
[1], src
[0]) ){
601 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
,
602 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
605 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
606 SWIZZLE_X
, SWIZZLE_Y
,
607 SWIZZLE_Z
, SWIZZLE_W
,
608 t_src_class(src
[0].File
), VSF_FLAG_NONE
) | (src
[0].RelAddr
<< 4);
610 o_inst
->src1
= ZERO_SRC_0
;
611 o_inst
->src2
= UNUSED_SRC_1
;
614 src
[0].File
= PROGRAM_TEMPORARY
;
615 src
[0].Index
= u_temp_i
;
621 /* These ops need special handling. */
624 /* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter).
625 So may need to insert additional instruction */
626 if ((src
[0].File
== src
[1].File
) &&
627 (src
[0].Index
== src
[1].Index
)) {
628 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_POW
, t_dst(&vpi
->DstReg
),
629 t_dst_mask(vpi
->DstReg
.WriteMask
));
630 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
631 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
633 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
635 t_src_class(src
[0].File
),
636 src
[0].NegateBase
) | (src
[0].RelAddr
<< 4);
637 o_inst
->src1
= UNUSED_SRC_0
;
638 o_inst
->src2
= UNUSED_SRC_0
;
641 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
,
642 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
644 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
645 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
646 SWIZZLE_ZERO
, SWIZZLE_ZERO
, SWIZZLE_ZERO
,
647 t_src_class(src
[0].File
),
648 src
[0].NegateBase
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[0].RelAddr
<< 4);
649 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
650 SWIZZLE_ZERO
, SWIZZLE_ZERO
,
651 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), SWIZZLE_ZERO
,
652 t_src_class(src
[1].File
),
653 src
[1].NegateBase
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[1].RelAddr
<< 4);
654 o_inst
->src2
= UNUSED_SRC_1
;
657 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_POW
, t_dst(&vpi
->DstReg
),
658 t_dst_mask(vpi
->DstReg
.WriteMask
));
659 o_inst
->src0
= MAKE_VSF_SOURCE(u_temp_i
,
666 o_inst
->src1
= UNUSED_SRC_0
;
667 o_inst
->src2
= UNUSED_SRC_0
;
672 case OPCODE_MOV
://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
674 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
, t_dst(&vpi
->DstReg
),
675 t_dst_mask(vpi
->DstReg
.WriteMask
));
676 o_inst
->src0
= t_src(vp
, &src
[0]);
677 o_inst
->src1
= ZERO_SRC_0
;
678 o_inst
->src2
= UNUSED_SRC_1
;
682 hw_op
=(src
[0].File
== PROGRAM_TEMPORARY
&&
683 src
[1].File
== PROGRAM_TEMPORARY
&&
684 src
[2].File
== PROGRAM_TEMPORARY
) ? R200_VPI_OUT_OP_MAD_2
: R200_VPI_OUT_OP_MAD
;
686 o_inst
->op
= MAKE_VSF_OP(hw_op
, t_dst(&vpi
->DstReg
),
687 t_dst_mask(vpi
->DstReg
.WriteMask
));
688 o_inst
->src0
= t_src(vp
, &src
[0]);
690 if ((o_inst
- vp
->instr
) == 31) {
691 /* fix up the broken vertex program of quake4 demo... */
692 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
693 SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
,
694 t_src_class(src
[1].File
),
695 src
[1].NegateBase
) | (src
[1].RelAddr
<< 4);
696 o_inst
->src2
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
697 SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
,
698 t_src_class(src
[1].File
),
699 src
[1].NegateBase
) | (src
[1].RelAddr
<< 4);
702 o_inst
->src1
= t_src(vp
, &src
[1]);
703 o_inst
->src2
= t_src(vp
, &src
[2]);
706 o_inst
->src1
= t_src(vp
, &src
[1]);
707 o_inst
->src2
= t_src(vp
, &src
[2]);
711 case OPCODE_DP3
://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
712 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_DOT
, t_dst(&vpi
->DstReg
),
713 t_dst_mask(vpi
->DstReg
.WriteMask
));
715 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
716 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
717 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
718 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
720 t_src_class(src
[0].File
),
721 src
[0].NegateBase
) | (src
[0].RelAddr
<< 4);
723 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
724 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
725 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)),
726 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)),
728 t_src_class(src
[1].File
),
729 src
[1].NegateBase
) | (src
[1].RelAddr
<< 4);
731 o_inst
->src2
= UNUSED_SRC_1
;
734 case OPCODE_DPH
://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
735 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_DOT
, t_dst(&vpi
->DstReg
),
736 t_dst_mask(vpi
->DstReg
.WriteMask
));
738 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
739 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
740 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
741 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
742 VSF_IN_COMPONENT_ONE
,
743 t_src_class(src
[0].File
),
744 src
[0].NegateBase
) | (src
[0].RelAddr
<< 4);
745 o_inst
->src1
= t_src(vp
, &src
[1]);
746 o_inst
->src2
= UNUSED_SRC_1
;
749 case OPCODE_SUB
://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
750 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
, t_dst(&vpi
->DstReg
),
751 t_dst_mask(vpi
->DstReg
.WriteMask
));
753 o_inst
->src0
= t_src(vp
, &src
[0]);
754 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
755 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
756 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)),
757 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)),
758 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)),
759 t_src_class(src
[1].File
),
760 (!src
[1].NegateBase
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[1].RelAddr
<< 4);
761 o_inst
->src2
= UNUSED_SRC_1
;
764 case OPCODE_ABS
://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
765 o_inst
->op
=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX
, t_dst(&vpi
->DstReg
),
766 t_dst_mask(vpi
->DstReg
.WriteMask
));
768 o_inst
->src0
=t_src(vp
, &src
[0]);
769 o_inst
->src1
=MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
770 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
771 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
772 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
773 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)),
774 t_src_class(src
[0].File
),
775 (!src
[0].NegateBase
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[0].RelAddr
<< 4);
776 o_inst
->src2
= UNUSED_SRC_1
;
780 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
781 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
783 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_FRC
,
784 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
785 t_dst_mask(vpi
->DstReg
.WriteMask
));
787 o_inst
->src0
= t_src(vp
, &src
[0]);
788 o_inst
->src1
= UNUSED_SRC_0
;
789 o_inst
->src2
= UNUSED_SRC_1
;
792 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
, t_dst(&vpi
->DstReg
),
793 t_dst_mask(vpi
->DstReg
.WriteMask
));
795 o_inst
->src0
= t_src(vp
, &src
[0]);
796 o_inst
->src1
= MAKE_VSF_SOURCE(u_temp_i
,
802 /* Not 100% sure about this */
803 (!src
[0].NegateBase
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
/*VSF_FLAG_ALL*/);
805 o_inst
->src2
= UNUSED_SRC_0
;
810 /* mul r0, r1.yzxw, r2.zxyw
811 mad r0, -r2.yzxw, r1.zxyw, r0
812 NOTE: might need MAD_2
815 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_MUL
,
816 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
817 t_dst_mask(vpi
->DstReg
.WriteMask
));
819 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
820 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
821 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)), // z
822 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
823 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
824 t_src_class(src
[0].File
),
825 src
[0].NegateBase
) | (src
[0].RelAddr
<< 4);
827 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
828 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), // z
829 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), // x
830 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)), // y
831 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)), // w
832 t_src_class(src
[1].File
),
833 src
[1].NegateBase
) | (src
[1].RelAddr
<< 4);
835 o_inst
->src2
= UNUSED_SRC_1
;
839 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_MAD
, t_dst(&vpi
->DstReg
),
840 t_dst_mask(vpi
->DstReg
.WriteMask
));
842 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
843 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)), // y
844 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), // z
845 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), // x
846 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)), // w
847 t_src_class(src
[1].File
),
848 (!src
[1].NegateBase
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[1].RelAddr
<< 4);
850 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
851 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)), // z
852 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
853 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
854 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
855 t_src_class(src
[0].File
),
856 src
[0].NegateBase
) | (src
[0].RelAddr
<< 4);
858 o_inst
->src2
= MAKE_VSF_SOURCE(u_temp_i
+1,
873 o_inst
->op
= MAKE_VSF_OP(t_opcode(vpi
->Opcode
), t_dst(&vpi
->DstReg
),
874 t_dst_mask(vpi
->DstReg
.WriteMask
));
879 o_inst
->src0
= t_src_scalar(vp
, &src
[0]);
880 o_inst
->src1
= UNUSED_SRC_0
;
881 o_inst
->src2
= UNUSED_SRC_1
;
885 o_inst
->src0
= t_src_scalar(vp
, &src
[0]);
886 o_inst
->src1
= t_src_scalar(vp
, &src
[1]);
887 o_inst
->src2
= UNUSED_SRC_1
;
891 o_inst
->src0
= t_src_scalar(vp
, &src
[0]);
892 o_inst
->src1
= t_src_scalar(vp
, &src
[1]);
893 o_inst
->src2
= t_src_scalar(vp
, &src
[2]);
897 fprintf(stderr
, "illegal number of operands %lu\n", operands
);
904 o_inst
->src0
= t_src(vp
, &src
[0]);
905 o_inst
->src1
= UNUSED_SRC_0
;
906 o_inst
->src2
= UNUSED_SRC_1
;
910 o_inst
->src0
= t_src(vp
, &src
[0]);
911 o_inst
->src1
= t_src(vp
, &src
[1]);
912 o_inst
->src2
= UNUSED_SRC_1
;
916 o_inst
->src0
= t_src(vp
, &src
[0]);
917 o_inst
->src1
= t_src(vp
, &src
[1]);
918 o_inst
->src2
= t_src(vp
, &src
[2]);
922 fprintf(stderr
, "illegal number of operands %lu\n", operands
);
928 if (mesa_vp
->Base
.NumNativeTemporaries
<
929 (mesa_vp
->Base
.NumTemporaries
+ (R200_VSF_MAX_TEMPS
- 1 - u_temp_i
))) {
930 mesa_vp
->Base
.NumNativeTemporaries
=
931 mesa_vp
->Base
.NumTemporaries
+ (R200_VSF_MAX_TEMPS
- 1 - u_temp_i
);
933 if (u_temp_i
< mesa_vp
->Base
.NumTemporaries
) {
934 if (R200_DEBUG
& DEBUG_FALLBACKS
) {
935 fprintf(stderr
, "Ran out of temps, num temps %d, us %d\n", mesa_vp
->Base
.NumTemporaries
, u_temp_i
);
939 u_temp_i
= R200_VSF_MAX_TEMPS
- 1;
940 if(o_inst
- vp
->instr
>= R200_VSF_MAX_INST
) {
941 mesa_vp
->Base
.NumNativeInstructions
= 129;
942 if (R200_DEBUG
& DEBUG_FALLBACKS
) {
943 fprintf(stderr
, "more than 128 native instructions\n");
947 if ((o_inst
->op
& R200_VSF_OUT_CLASS_MASK
) == R200_VSF_OUT_CLASS_RESULT_POS
) {
948 vp
->pos_end
= (o_inst
- vp
->instr
);
952 vp
->native
= GL_TRUE
;
953 mesa_vp
->Base
.NumNativeInstructions
= (o_inst
- vp
->instr
);
955 fprintf(stderr
, "hw program:\n");
956 for(i
=0; i
< vp
->program
.length
; i
++)
957 fprintf(stderr
, "%08x\n", vp
->instr
[i
]);
962 void r200SetupVertexProg( GLcontext
*ctx
) {
963 r200ContextPtr rmesa
= R200_CONTEXT(ctx
);
964 struct r200_vertex_program
*vp
= (struct r200_vertex_program
*)ctx
->VertexProgram
.Current
;
968 if (!vp
->translated
) {
969 rmesa
->curr_vp_hw
= NULL
;
970 r200_translate_vertex_program(vp
);
972 /* could optimize setting up vertex progs away for non-tcl hw */
973 fallback
= !(vp
->native
&& r200VertexProgUpdateParams(ctx
, vp
) &&
974 rmesa
->r200Screen
->drmSupportsVertexProgram
);
975 TCL_FALLBACK(ctx
, R200_TCL_FALLBACK_VERTEX_PROGRAM
, fallback
);
976 if (fallback
) return;
978 R200_STATECHANGE( rmesa
, pvs
);
980 rmesa
->hw
.pvs
.cmd
[PVS_CNTL_1
] = (0 << R200_PVS_CNTL_1_PROGRAM_START_SHIFT
) |
981 ((vp
->mesa_program
.Base
.NumNativeInstructions
- 1) << R200_PVS_CNTL_1_PROGRAM_END_SHIFT
) |
982 (vp
->pos_end
<< R200_PVS_CNTL_1_POS_END_SHIFT
);
983 rmesa
->hw
.pvs
.cmd
[PVS_CNTL_2
] = (0 << R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT
) |
984 (vp
->mesa_program
.Base
.NumNativeParameters
<< R200_PVS_CNTL_2_PARAM_COUNT_SHIFT
);
986 /* maybe user clip planes just work with vertex progs... untested */
987 if (ctx
->Transform
.ClipPlanesEnabled
) {
988 R200_STATECHANGE( rmesa
, tcl
);
989 if (vp
->mesa_program
.IsPositionInvariant
) {
990 rmesa
->hw
.tcl
.cmd
[TCL_UCP_VERT_BLEND_CTL
] |= (ctx
->Transform
.ClipPlanesEnabled
<< 2);
993 rmesa
->hw
.tcl
.cmd
[TCL_UCP_VERT_BLEND_CTL
] &= ~(0xfc);
997 if (vp
!= rmesa
->curr_vp_hw
) {
998 GLuint count
= vp
->mesa_program
.Base
.NumNativeInstructions
;
999 drm_radeon_cmd_header_t tmp
;
1001 R200_STATECHANGE( rmesa
, vpi
[0] );
1002 R200_STATECHANGE( rmesa
, vpi
[1] );
1004 /* FIXME: what about using a memcopy... */
1005 for (i
= 0; (i
< 64) && i
< count
; i
++) {
1006 rmesa
->hw
.vpi
[0].cmd
[VPI_OPDST_0
+ 4 * i
] = vp
->instr
[i
].op
;
1007 rmesa
->hw
.vpi
[0].cmd
[VPI_SRC0_0
+ 4 * i
] = vp
->instr
[i
].src0
;
1008 rmesa
->hw
.vpi
[0].cmd
[VPI_SRC1_0
+ 4 * i
] = vp
->instr
[i
].src1
;
1009 rmesa
->hw
.vpi
[0].cmd
[VPI_SRC2_0
+ 4 * i
] = vp
->instr
[i
].src2
;
1011 /* hack up the cmd_size so not the whole state atom is emitted always.
1012 This may require some more thought, we may emit half progs on lost state, but
1013 hopefully it won't matter?
1014 WARNING: must not use R200_DB_STATECHANGE, this will produce bogus (and rejected)
1015 packet emits (due to the mismatched cmd_size and count in cmd/last_cmd) */
1016 rmesa
->hw
.vpi
[0].cmd_size
= 1 + 4 * ((count
> 64) ? 64 : count
);
1017 tmp
.i
= rmesa
->hw
.vpi
[0].cmd
[VPI_CMD_0
];
1018 tmp
.veclinear
.count
= (count
> 64) ? 64 : count
;
1019 rmesa
->hw
.vpi
[0].cmd
[VPI_CMD_0
] = tmp
.i
;
1021 for (i
= 0; i
< (count
- 64); i
++) {
1022 rmesa
->hw
.vpi
[1].cmd
[VPI_OPDST_0
+ 4 * i
] = vp
->instr
[i
+ 64].op
;
1023 rmesa
->hw
.vpi
[1].cmd
[VPI_SRC0_0
+ 4 * i
] = vp
->instr
[i
+ 64].src0
;
1024 rmesa
->hw
.vpi
[1].cmd
[VPI_SRC1_0
+ 4 * i
] = vp
->instr
[i
+ 64].src1
;
1025 rmesa
->hw
.vpi
[1].cmd
[VPI_SRC2_0
+ 4 * i
] = vp
->instr
[i
+ 64].src2
;
1027 rmesa
->hw
.vpi
[1].cmd_size
= 1 + 4 * (count
- 64);
1028 tmp
.i
= rmesa
->hw
.vpi
[1].cmd
[VPI_CMD_0
];
1029 tmp
.veclinear
.count
= count
- 64;
1030 rmesa
->hw
.vpi
[1].cmd
[VPI_CMD_0
] = tmp
.i
;
1032 rmesa
->curr_vp_hw
= vp
;
1038 r200BindProgram(GLcontext
*ctx
, GLenum target
, struct gl_program
*prog
)
1040 r200ContextPtr rmesa
= R200_CONTEXT(ctx
);
1043 case GL_VERTEX_PROGRAM_ARB
:
1044 rmesa
->curr_vp_hw
= NULL
;
1047 _mesa_problem(ctx
, "Target not supported yet!");
1052 static struct gl_program
*
1053 r200NewProgram(GLcontext
*ctx
, GLenum target
, GLuint id
)
1055 struct r200_vertex_program
*vp
;
1058 case GL_VERTEX_PROGRAM_ARB
:
1059 vp
= CALLOC_STRUCT(r200_vertex_program
);
1060 return _mesa_init_vertex_program(ctx
, &vp
->mesa_program
, target
, id
);
1061 case GL_FRAGMENT_PROGRAM_ARB
:
1062 case GL_FRAGMENT_PROGRAM_NV
:
1063 return _mesa_init_fragment_program( ctx
, CALLOC_STRUCT(gl_fragment_program
), target
, id
);
1065 _mesa_problem(ctx
, "Bad target in r200NewProgram");
1072 r200DeleteProgram(GLcontext
*ctx
, struct gl_program
*prog
)
1074 _mesa_delete_program(ctx
, prog
);
1078 r200ProgramStringNotify(GLcontext
*ctx
, GLenum target
, struct gl_program
*prog
)
1080 struct r200_vertex_program
*vp
= (void *)prog
;
1083 case GL_VERTEX_PROGRAM_ARB
:
1084 vp
->translated
= GL_FALSE
;
1085 /* memset(&vp->translated, 0, sizeof(struct r200_vertex_program) - sizeof(struct gl_vertex_program));*/
1086 r200_translate_vertex_program(vp
);
1089 /* need this for tcl fallbacks */
1090 _tnl_program_string(ctx
, target
, prog
);
1094 r200IsProgramNative(GLcontext
*ctx
, GLenum target
, struct gl_program
*prog
)
1096 struct r200_vertex_program
*vp
= (void *)prog
;
1099 case GL_VERTEX_STATE_PROGRAM_NV
:
1100 case GL_VERTEX_PROGRAM_ARB
:
1101 if (!vp
->translated
) {
1102 r200_translate_vertex_program(vp
);
1104 /* does not take parameters etc. into account */
1107 _mesa_problem(ctx
, "Bad target in r200NewProgram");
1112 void r200InitShaderFuncs(struct dd_function_table
*functions
)
1114 functions
->NewProgram
= r200NewProgram
;
1115 functions
->BindProgram
= r200BindProgram
;
1116 functions
->DeleteProgram
= r200DeleteProgram
;
1117 functions
->ProgramStringNotify
= r200ProgramStringNotify
;
1118 functions
->IsProgramNative
= r200IsProgramNative
;