1 /**************************************************************************
3 Copyright (C) 2005 Aapo Tahkola.
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Aapo Tahkola <aet@rasterburn.org>
37 #include "r200_context.h"
38 #include "r200_vertprog.h"
39 #include "r200_ioctl.h"
41 #include "program_instruction.h"
44 #if SWIZZLE_X != VSF_IN_COMPONENT_X || \
45 SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
46 SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
47 SWIZZLE_W != VSF_IN_COMPONENT_W || \
48 SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
49 SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
50 WRITEMASK_X != VSF_FLAG_X || \
51 WRITEMASK_Y != VSF_FLAG_Y || \
52 WRITEMASK_Z != VSF_FLAG_Z || \
53 WRITEMASK_W != VSF_FLAG_W
54 #error Cannot change these!
57 #define SCALAR_FLAG (1<<31)
58 #define FLAG_MASK (1<<31)
59 #define OP_MASK (0xf) /* we are unlikely to have more than 15 */
60 #define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
65 unsigned long ip
; /* number of input operands and flags */
69 OPN(ARL
, 1|SCALAR_FLAG
),
74 OPN(EX2
, 1|SCALAR_FLAG
),
75 OPN(EXP
, 1|SCALAR_FLAG
),
78 OPN(LG2
, 1|SCALAR_FLAG
),
80 OPN(LOG
, 1|SCALAR_FLAG
),
86 OPN(POW
, 2|SCALAR_FLAG
),
87 OPN(RCP
, 1|SCALAR_FLAG
),
88 OPN(RSQ
, 1|SCALAR_FLAG
),
99 static GLboolean
r200VertexProgUpdateParams(GLcontext
*ctx
, struct r200_vertex_program
*vp
)
101 r200ContextPtr rmesa
= R200_CONTEXT( ctx
);
102 GLfloat
*fcmd
= (GLfloat
*)&rmesa
->hw
.vpp
[0].cmd
[VPP_CMD_0
+ 1];
104 struct gl_vertex_program
*mesa_vp
= (void *)vp
;
105 struct gl_program_parameter_list
*paramList
;
106 drm_radeon_cmd_header_t tmp
;
108 R200_STATECHANGE( rmesa
, vpp
[0] );
109 R200_STATECHANGE( rmesa
, vpp
[1] );
110 assert(mesa_vp
->Base
.Parameters
);
111 _mesa_load_state_parameters(ctx
, mesa_vp
->Base
.Parameters
);
112 paramList
= mesa_vp
->Base
.Parameters
;
114 if(paramList
->NumParameters
> R200_VSF_MAX_PARAM
){
115 fprintf(stderr
, "%s:Params exhausted\n", __FUNCTION__
);
119 for(pi
= 0; pi
< paramList
->NumParameters
; pi
++) {
120 switch(paramList
->Parameters
[pi
].Type
) {
121 case PROGRAM_STATE_VAR
:
122 case PROGRAM_NAMED_PARAM
:
123 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
124 case PROGRAM_CONSTANT
:
125 *fcmd
++ = paramList
->ParameterValues
[pi
][0];
126 *fcmd
++ = paramList
->ParameterValues
[pi
][1];
127 *fcmd
++ = paramList
->ParameterValues
[pi
][2];
128 *fcmd
++ = paramList
->ParameterValues
[pi
][3];
131 _mesa_problem(NULL
, "Bad param type in %s", __FUNCTION__
);
135 fcmd
= (GLfloat
*)rmesa
->hw
.vpp
[1].cmd
[VPP_CMD_0
+ 1];
138 /* hack up the cmd_size so not the whole state atom is emitted always. */
139 rmesa
->hw
.vpp
[0].cmd_size
=
140 1 + 4 * ((paramList
->NumParameters
> 96) ? 96 : paramList
->NumParameters
);
141 tmp
.i
= rmesa
->hw
.vpp
[0].cmd
[VPP_CMD_0
];
142 tmp
.veclinear
.count
= (paramList
->NumParameters
> 96) ? 96 : paramList
->NumParameters
;
143 rmesa
->hw
.vpp
[0].cmd
[VPP_CMD_0
] = tmp
.i
;
144 if (paramList
->NumParameters
> 96) {
145 rmesa
->hw
.vpp
[1].cmd_size
= 1 + 4 * (paramList
->NumParameters
- 96);
146 tmp
.i
= rmesa
->hw
.vpp
[1].cmd
[VPP_CMD_0
];
147 tmp
.veclinear
.count
= paramList
->NumParameters
- 96;
148 rmesa
->hw
.vpp
[1].cmd
[VPP_CMD_0
] = tmp
.i
;
153 static __inline
unsigned long t_dst_mask(GLuint mask
)
155 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
156 return mask
& VSF_FLAG_ALL
;
159 static unsigned long t_dst(struct prog_dst_register
*dst
)
162 case PROGRAM_TEMPORARY
:
163 return ((dst
->Index
<< R200_VPI_OUT_REG_INDEX_SHIFT
)
164 | R200_VSF_OUT_CLASS_TMP
);
166 switch (dst
->Index
) {
167 case VERT_RESULT_HPOS
:
168 return R200_VSF_OUT_CLASS_RESULT_POS
;
169 case VERT_RESULT_COL0
:
170 return R200_VSF_OUT_CLASS_RESULT_COLOR
;
171 case VERT_RESULT_COL1
:
172 return ((1 << R200_VPI_OUT_REG_INDEX_SHIFT
)
173 | R200_VSF_OUT_CLASS_RESULT_COLOR
);
174 case VERT_RESULT_FOGC
:
175 return R200_VSF_OUT_CLASS_RESULT_FOGC
;
176 case VERT_RESULT_TEX0
:
177 case VERT_RESULT_TEX1
:
178 case VERT_RESULT_TEX2
:
179 case VERT_RESULT_TEX3
:
180 case VERT_RESULT_TEX4
:
181 case VERT_RESULT_TEX5
:
182 return (((dst
->Index
- VERT_RESULT_TEX0
) << R200_VPI_OUT_REG_INDEX_SHIFT
)
183 | R200_VSF_OUT_CLASS_RESULT_TEXC
);
184 case VERT_RESULT_PSIZ
:
185 return R200_VSF_OUT_CLASS_RESULT_POINTSIZE
;
187 fprintf(stderr
, "problem in %s, unknown dst output reg %d\n", __FUNCTION__
, dst
->Index
);
191 case PROGRAM_ADDRESS
:
192 assert (dst
->Index
== 0);
193 return R200_VSF_OUT_CLASS_ADDR
;
195 fprintf(stderr
, "problem in %s, unknown register type %d\n", __FUNCTION__
, dst
->File
);
201 static unsigned long t_src_class(enum register_file file
)
205 case PROGRAM_TEMPORARY
:
206 return VSF_IN_CLASS_TMP
;
209 return VSF_IN_CLASS_ATTR
;
211 case PROGRAM_LOCAL_PARAM
:
212 case PROGRAM_ENV_PARAM
:
213 case PROGRAM_NAMED_PARAM
:
214 case PROGRAM_STATE_VAR
:
215 return VSF_IN_CLASS_PARAM
;
218 case PROGRAM_WRITE_ONLY:
219 case PROGRAM_ADDRESS:
222 fprintf(stderr
, "problem in %s", __FUNCTION__
);
227 static __inline
unsigned long t_swizzle(GLubyte swizzle
)
229 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
234 static void vp_dump_inputs(struct r200_vertex_program
*vp
, char *caller
)
239 fprintf(stderr
, "vp null in call to %s from %s\n", __FUNCTION__
, caller
);
243 fprintf(stderr
, "%s:<", caller
);
244 for(i
=0; i
< VERT_ATTRIB_MAX
; i
++)
245 fprintf(stderr
, "%d ", vp
->inputs
[i
]);
246 fprintf(stderr
, ">\n");
251 static unsigned long t_src_index(struct r200_vertex_program
*vp
, struct prog_src_register
*src
)
257 if(src
->File
== PROGRAM_INPUT
){
258 /* if(vp->inputs[src->Index] != -1)
259 return vp->inputs[src->Index];
261 for(i=0; i < VERT_ATTRIB_MAX; i++)
262 if(vp->inputs[i] > max_reg)
263 max_reg = vp->inputs[i];
265 vp->inputs[src->Index] = max_reg+1;*/
267 //vp_dump_inputs(vp, __FUNCTION__);
268 assert(vp
->inputs
[src
->Index
] != -1);
269 return vp
->inputs
[src
->Index
];
271 if (src
->Index
< 0) {
272 fprintf(stderr
, "WARNING negative offsets for indirect addressing do not work\n");
279 static unsigned long t_src(struct r200_vertex_program
*vp
, struct prog_src_register
*src
)
282 return MAKE_VSF_SOURCE(t_src_index(vp
, src
),
283 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
284 t_swizzle(GET_SWZ(src
->Swizzle
, 1)),
285 t_swizzle(GET_SWZ(src
->Swizzle
, 2)),
286 t_swizzle(GET_SWZ(src
->Swizzle
, 3)),
287 t_src_class(src
->File
),
288 src
->NegateBase
) | (src
->RelAddr
<< 4);
291 static unsigned long t_src_scalar(struct r200_vertex_program
*vp
, struct prog_src_register
*src
)
294 return MAKE_VSF_SOURCE(t_src_index(vp
, src
),
295 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
296 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
297 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
298 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
299 t_src_class(src
->File
),
300 src
->NegateBase
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
->RelAddr
<< 4);
303 static unsigned long t_opcode(enum prog_opcode opcode
)
307 case OPCODE_ADD
: return R200_VPI_OUT_OP_ADD
;
308 /* FIXME: ARL works fine, but negative offsets won't work - fglrx just
309 * seems to ignore neg offsets which isn't quite correct...
311 case OPCODE_ARL
: return R200_VPI_OUT_OP_ARL
;
312 case OPCODE_DP4
: return R200_VPI_OUT_OP_DOT
;
313 case OPCODE_DST
: return R200_VPI_OUT_OP_DST
;
314 case OPCODE_EX2
: return R200_VPI_OUT_OP_EX2
;
315 case OPCODE_EXP
: return R200_VPI_OUT_OP_EXP
;
316 case OPCODE_FRC
: return R200_VPI_OUT_OP_FRC
;
317 case OPCODE_LG2
: return R200_VPI_OUT_OP_LG2
;
318 case OPCODE_LIT
: return R200_VPI_OUT_OP_LIT
;
319 case OPCODE_LOG
: return R200_VPI_OUT_OP_LOG
;
320 case OPCODE_MAX
: return R200_VPI_OUT_OP_MAX
;
321 case OPCODE_MIN
: return R200_VPI_OUT_OP_MIN
;
322 case OPCODE_MUL
: return R200_VPI_OUT_OP_MUL
;
323 case OPCODE_RCP
: return R200_VPI_OUT_OP_RCP
;
324 case OPCODE_RSQ
: return R200_VPI_OUT_OP_RSQ
;
325 case OPCODE_SGE
: return R200_VPI_OUT_OP_SGE
;
326 case OPCODE_SLT
: return R200_VPI_OUT_OP_SLT
;
329 fprintf(stderr
, "%s: Should not be called with opcode %d!", __FUNCTION__
, opcode
);
335 static unsigned long op_operands(enum prog_opcode opcode
)
339 /* Can we trust mesas opcodes to be in order ? */
340 for(i
=0; i
< sizeof(op_names
) / sizeof(*op_names
); i
++)
341 if(op_names
[i
].opcode
== opcode
)
342 return op_names
[i
].ip
;
344 fprintf(stderr
, "op %d not found in op_names\n", opcode
);
349 /* TODO: Get rid of t_src_class call */
350 #define CMP_SRCS(a, b) (((a.RelAddr != b.RelAddr) || (a.Index != b.Index)) && \
351 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
352 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
353 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
354 t_src_class(b.File) == VSF_IN_CLASS_ATTR))) \
356 /* fglrx on rv250 codes up unused sources as follows:
357 unused but necessary sources are same as previous source, zero-ed out.
358 unnecessary sources are same as previous source but with VSF_IN_CLASS_NONE set.
359 i.e. an add (2 args) has its 2nd arg (if you use it as mov) zero-ed out, and 3rd arg
360 set to VSF_IN_CLASS_NONE. Not sure if strictly necessary. */
362 /* use these simpler definitions. Must obviously not be used with not yet set up regs.
363 Those are NOT semantically equivalent to the r300 ones, requires code changes */
364 #define ZERO_SRC_0 (((o_inst->src0 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
365 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
366 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
367 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
368 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
370 #define ZERO_SRC_1 (((o_inst->src1 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
371 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
372 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
373 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
374 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
376 #define ZERO_SRC_2 (((o_inst->src2 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
377 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
378 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
379 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
380 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
382 #define UNUSED_SRC_0 ((o_inst->src0 & ~15) | 9)
384 #define UNUSED_SRC_1 ((o_inst->src1 & ~15) | 9)
386 #define UNUSED_SRC_2 ((o_inst->src2 & ~15) | 9)
389 /* DP4 version seems to trigger some hw peculiarity - fglrx does this on r200 however */
392 static GLboolean
r200_translate_vertex_program(struct r200_vertex_program
*vp
)
394 struct gl_vertex_program
*mesa_vp
= (void *)vp
;
395 struct prog_instruction
*vpi
;
397 VERTEX_SHADER_INSTRUCTION
*o_inst
;
398 unsigned long operands
;
402 vp
->native
= GL_FALSE
;
404 if ((mesa_vp
->Base
.InputsRead
&
405 ~(VERT_BIT_POS
| VERT_BIT_NORMAL
| VERT_BIT_COLOR0
| VERT_BIT_COLOR1
|
406 VERT_BIT_FOG
| VERT_BIT_TEX0
| VERT_BIT_TEX1
| VERT_BIT_TEX2
|
407 VERT_BIT_TEX3
| VERT_BIT_TEX4
| VERT_BIT_TEX5
)) != 0) {
408 if (R200_DEBUG
& DEBUG_FALLBACKS
) {
409 fprintf(stderr
, "can't handle vert prog inputs 0x%x\n",
410 mesa_vp
->Base
.InputsRead
);
415 if (mesa_vp
->IsNVProgram
) {
416 /* subtle differences in spec like guaranteed initialized regs could cause
417 headaches. Might want to remove the driconf option to enable it completely */
420 /* Initial value should be last tmp reg that hw supports.
421 Strangely enough r300 doesnt mind even though these would be out of range.
422 Smart enough to realize that it doesnt need it? */
423 int u_temp_i
= R200_VSF_MAX_TEMPS
- 1;
424 struct prog_src_register src
[3];
426 /* if (getenv("R300_VP_SAFETY")) {
427 WARN_ONCE("R300_VP_SAFETY enabled.\n");
429 vpi = malloc((mesa_vp->Base.NumInstructions + VSF_MAX_FRAGMENT_TEMPS) * sizeof(struct prog_instruction));
430 memset(vpi, 0, VSF_MAX_FRAGMENT_TEMPS * sizeof(struct prog_instruction));
432 for (i=0; i < VSF_MAX_FRAGMENT_TEMPS; i++) {
433 vpi[i].Opcode = OPCODE_MOV;
434 vpi[i].StringPos = 0;
437 vpi[i].DstReg.File = PROGRAM_TEMPORARY;
438 vpi[i].DstReg.Index = i;
439 vpi[i].DstReg.WriteMask = WRITEMASK_XYZW;
440 vpi[i].DstReg.CondMask = COND_TR;
442 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
443 vpi[i].SrcReg[0].Index = 0;
444 vpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE);
447 memcpy(&vpi[i], mesa_vp->Base.Instructions, mesa_vp->Base.NumInstructions * sizeof(struct prog_instruction));
449 free(mesa_vp->Base.Instructions);
451 mesa_vp->Base.Instructions = vpi;
453 mesa_vp->Base.NumInstructions += VSF_MAX_FRAGMENT_TEMPS;
454 vpi = &mesa_vp->Base.Instructions[mesa_vp->Base.NumInstructions-1];
456 assert(vpi->Opcode == OPCODE_END);
458 /* FIXME: is changing the prog safe to do here? */
459 if (mesa_vp
->IsPositionInvariant
) {
460 struct gl_program_parameter_list
*paramList
;
461 GLint tokens
[6] = { STATE_MATRIX
, STATE_MVP
, 0, 0, 0, STATE_MATRIX
};
464 tokens
[5] = STATE_MATRIX
;
466 tokens
[5] = STATE_MATRIX_TRANSPOSE
;
468 paramList
= mesa_vp
->Base
.Parameters
;
470 vpi
= malloc((mesa_vp
->Base
.NumInstructions
+ 4) * sizeof(struct prog_instruction
));
471 memset(vpi
, 0, 4 * sizeof(struct prog_instruction
));
473 for (i
=0; i
< 4; i
++) {
475 tokens
[3] = tokens
[4] = i
;
476 idx
= _mesa_add_state_reference(paramList
, tokens
);
478 vpi
[i
].Opcode
= OPCODE_DP4
;
479 vpi
[i
].StringPos
= 0;
482 vpi
[i
].DstReg
.File
= PROGRAM_OUTPUT
;
483 vpi
[i
].DstReg
.Index
= VERT_RESULT_HPOS
;
484 vpi
[i
].DstReg
.WriteMask
= 1 << i
;
485 vpi
[i
].DstReg
.CondMask
= COND_TR
;
487 vpi
[i
].SrcReg
[0].File
= PROGRAM_STATE_VAR
;
488 vpi
[i
].SrcReg
[0].Index
= idx
;
489 vpi
[i
].SrcReg
[0].Swizzle
= MAKE_SWIZZLE4(SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_W
);
491 vpi
[i
].SrcReg
[1].File
= PROGRAM_INPUT
;
492 vpi
[i
].SrcReg
[1].Index
= VERT_ATTRIB_POS
;
493 vpi
[i
].SrcReg
[1].Swizzle
= MAKE_SWIZZLE4(SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_W
);
496 vpi
[i
].Opcode
= OPCODE_MUL
;
498 vpi
[i
].Opcode
= OPCODE_MAD
;
500 vpi
[i
].StringPos
= 0;
504 vpi
[i
].DstReg
.File
= PROGRAM_OUTPUT
;
506 vpi
[i
].DstReg
.File
= PROGRAM_TEMPORARY
;
507 vpi
[i
].DstReg
.Index
= 0;
508 vpi
[i
].DstReg
.WriteMask
= 0xf;
509 vpi
[i
].DstReg
.CondMask
= COND_TR
;
511 vpi
[i
].SrcReg
[0].File
= PROGRAM_STATE_VAR
;
512 vpi
[i
].SrcReg
[0].Index
= idx
;
513 vpi
[i
].SrcReg
[0].Swizzle
= MAKE_SWIZZLE4(SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_W
);
515 vpi
[i
].SrcReg
[1].File
= PROGRAM_INPUT
;
516 vpi
[i
].SrcReg
[1].Index
= VERT_ATTRIB_POS
;
517 vpi
[i
].SrcReg
[1].Swizzle
= MAKE_SWIZZLE4(i
, i
, i
, i
);
520 vpi
[i
].SrcReg
[2].File
= PROGRAM_TEMPORARY
;
521 vpi
[i
].SrcReg
[2].Index
= 0;
522 vpi
[i
].SrcReg
[2].Swizzle
= MAKE_SWIZZLE4(SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_W
);
527 memcpy(&vpi
[i
], mesa_vp
->Base
.Instructions
, mesa_vp
->Base
.NumInstructions
* sizeof(struct prog_instruction
));
529 free(mesa_vp
->Base
.Instructions
);
531 mesa_vp
->Base
.Instructions
= vpi
;
533 mesa_vp
->Base
.NumInstructions
+= 4;
534 vpi
= &mesa_vp
->Base
.Instructions
[mesa_vp
->Base
.NumInstructions
-1];
536 assert(vpi
->Opcode
== OPCODE_END
);
538 mesa_vp
->Base
.InputsRead
|= (1 << VERT_ATTRIB_POS
);
539 mesa_vp
->Base
.OutputsWritten
|= (1 << VERT_RESULT_HPOS
);
541 //fprintf(stderr, "IsPositionInvariant is set!\n");
542 //_mesa_print_program(&mesa_vp->Base);
546 mesa_vp
->Base
.NumNativeInstructions
= 0;
547 mesa_vp
->Base
.NumNativeParameters
= mesa_vp
->Base
.Parameters
->NumParameters
;
549 for(i
=0; i
< VERT_ATTRIB_MAX
; i
++)
551 /* fglrx uses fixed inputs as follows for conventional attribs.
552 generic attribs use non-fixed assignment, fglrx will always use the lowest attrib values available.
553 There are 12 generic attribs possible, corresponding to attrib 0, 2-11 and 13 in a hw vertex prog.
554 attr 1 and 12 are not available for generic attribs as those cannot be made vec4 (correspond to
555 vertex normal/weight)
556 attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
557 attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
558 attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
559 attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
560 generic attribs would require some more work (dma regions, renaming). */
562 /* may look different when using idx buf / input_route instead of se_vtx_fmt? */
563 vp
->inputs
[VERT_ATTRIB_POS
] = 0;
564 vp
->inputs
[VERT_ATTRIB_WEIGHT
] = 12;
565 vp
->inputs
[VERT_ATTRIB_NORMAL
] = 1;
566 vp
->inputs
[VERT_ATTRIB_COLOR0
] = 2;
567 vp
->inputs
[VERT_ATTRIB_COLOR1
] = 3;
568 vp
->inputs
[VERT_ATTRIB_FOG
] = 15;
569 vp
->inputs
[VERT_ATTRIB_TEX0
] = 6;
570 vp
->inputs
[VERT_ATTRIB_TEX1
] = 7;
571 vp
->inputs
[VERT_ATTRIB_TEX2
] = 8;
572 vp
->inputs
[VERT_ATTRIB_TEX3
] = 9;
573 vp
->inputs
[VERT_ATTRIB_TEX4
] = 10;
574 vp
->inputs
[VERT_ATTRIB_TEX5
] = 11;
575 /* attr 4,5 and 13 are only used with generic attribs.
576 Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
577 not possibe to use with vertex progs as it is lacking in vert prog specification) */
579 assert(mesa_vp
->Base
.OutputsWritten
& (1 << VERT_RESULT_HPOS
));
581 vp
->translated
= GL_TRUE
;
584 for(vpi
= mesa_vp
->Base
.Instructions
; vpi
->Opcode
!= OPCODE_END
; vpi
++, o_inst
++){
585 if (u_temp_i
< mesa_vp
->Base
.NumTemporaries
) {
586 if (R200_DEBUG
& DEBUG_FALLBACKS
) {
587 fprintf(stderr
, "Ran out of temps, num temps %d, us %d\n", mesa_vp
->Base
.NumTemporaries
, u_temp_i
);
591 u_temp_i
= R200_VSF_MAX_TEMPS
- 1;
592 if(o_inst
- vp
->instr
>= R200_VSF_MAX_INST
) {
593 mesa_vp
->Base
.NumNativeInstructions
= 129;
594 if (R200_DEBUG
& DEBUG_FALLBACKS
) {
595 fprintf(stderr
, "more than 128 native instructions\n");
600 operands
= op_operands(vpi
->Opcode
);
601 are_srcs_scalar
= operands
& SCALAR_FLAG
;
604 for(i
= 0; i
< operands
; i
++)
605 src
[i
] = vpi
->SrcReg
[i
];
608 if( CMP_SRCS(src
[1], src
[2]) || CMP_SRCS(src
[0], src
[2]) ){
609 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
,
610 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
613 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[2]),
614 SWIZZLE_X
, SWIZZLE_Y
,
615 SWIZZLE_Z
, SWIZZLE_W
,
616 t_src_class(src
[2].File
), VSF_FLAG_NONE
) | (src
[2].RelAddr
<< 4);
618 o_inst
->src1
= ZERO_SRC_0
;
619 o_inst
->src2
= UNUSED_SRC_1
;
622 src
[2].File
= PROGRAM_TEMPORARY
;
623 src
[2].Index
= u_temp_i
;
630 if( CMP_SRCS(src
[1], src
[0]) ){
631 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
,
632 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
635 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
636 SWIZZLE_X
, SWIZZLE_Y
,
637 SWIZZLE_Z
, SWIZZLE_W
,
638 t_src_class(src
[0].File
), VSF_FLAG_NONE
) | (src
[0].RelAddr
<< 4);
640 o_inst
->src1
= ZERO_SRC_0
;
641 o_inst
->src2
= UNUSED_SRC_1
;
644 src
[0].File
= PROGRAM_TEMPORARY
;
645 src
[0].Index
= u_temp_i
;
651 /* These ops need special handling. */
654 /* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter).
655 So may need to insert additional instruction */
656 if ((src
[0].File
== src
[1].File
) &&
657 (src
[0].Index
== src
[1].Index
)) {
658 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_POW
, t_dst(&vpi
->DstReg
),
659 t_dst_mask(vpi
->DstReg
.WriteMask
));
660 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
661 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
663 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
665 t_src_class(src
[0].File
),
666 src
[0].NegateBase
) | (src
[0].RelAddr
<< 4);
667 o_inst
->src1
= UNUSED_SRC_0
;
668 o_inst
->src2
= UNUSED_SRC_0
;
671 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
,
672 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
674 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
675 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
676 SWIZZLE_ZERO
, SWIZZLE_ZERO
, SWIZZLE_ZERO
,
677 t_src_class(src
[0].File
),
678 src
[0].NegateBase
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[0].RelAddr
<< 4);
679 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
680 SWIZZLE_ZERO
, SWIZZLE_ZERO
,
681 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), SWIZZLE_ZERO
,
682 t_src_class(src
[1].File
),
683 src
[1].NegateBase
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[1].RelAddr
<< 4);
684 o_inst
->src2
= UNUSED_SRC_1
;
687 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_POW
, t_dst(&vpi
->DstReg
),
688 t_dst_mask(vpi
->DstReg
.WriteMask
));
689 o_inst
->src0
= MAKE_VSF_SOURCE(u_temp_i
,
696 o_inst
->src1
= UNUSED_SRC_0
;
697 o_inst
->src2
= UNUSED_SRC_0
;
702 case OPCODE_MOV
://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
704 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
, t_dst(&vpi
->DstReg
),
705 t_dst_mask(vpi
->DstReg
.WriteMask
));
706 o_inst
->src0
= t_src(vp
, &src
[0]);
707 o_inst
->src1
= ZERO_SRC_0
;
708 o_inst
->src2
= UNUSED_SRC_1
;
712 hw_op
=(src
[0].File
== PROGRAM_TEMPORARY
&&
713 src
[1].File
== PROGRAM_TEMPORARY
&&
714 src
[2].File
== PROGRAM_TEMPORARY
) ? R200_VPI_OUT_OP_MAD_2
: R200_VPI_OUT_OP_MAD
;
716 o_inst
->op
= MAKE_VSF_OP(hw_op
, t_dst(&vpi
->DstReg
),
717 t_dst_mask(vpi
->DstReg
.WriteMask
));
718 o_inst
->src0
= t_src(vp
, &src
[0]);
720 if ((o_inst
- vp
->instr
) == 31) {
721 /* fix up the broken vertex program of quake4 demo... */
722 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
723 SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
, SWIZZLE_X
,
724 t_src_class(src
[1].File
),
725 src
[1].NegateBase
) | (src
[1].RelAddr
<< 4);
726 o_inst
->src2
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
727 SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
, SWIZZLE_Y
,
728 t_src_class(src
[1].File
),
729 src
[1].NegateBase
) | (src
[1].RelAddr
<< 4);
732 o_inst
->src1
= t_src(vp
, &src
[1]);
733 o_inst
->src2
= t_src(vp
, &src
[2]);
736 o_inst
->src1
= t_src(vp
, &src
[1]);
737 o_inst
->src2
= t_src(vp
, &src
[2]);
741 case OPCODE_DP3
://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
742 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_DOT
, t_dst(&vpi
->DstReg
),
743 t_dst_mask(vpi
->DstReg
.WriteMask
));
745 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
746 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
747 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
748 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
750 t_src_class(src
[0].File
),
751 src
[0].NegateBase
) | (src
[0].RelAddr
<< 4);
753 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
754 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
755 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)),
756 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)),
758 t_src_class(src
[1].File
),
759 src
[1].NegateBase
) | (src
[1].RelAddr
<< 4);
761 o_inst
->src2
= UNUSED_SRC_1
;
764 case OPCODE_DPH
://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
765 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_DOT
, t_dst(&vpi
->DstReg
),
766 t_dst_mask(vpi
->DstReg
.WriteMask
));
768 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
769 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
770 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
771 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
772 VSF_IN_COMPONENT_ONE
,
773 t_src_class(src
[0].File
),
774 src
[0].NegateBase
) | (src
[0].RelAddr
<< 4);
775 o_inst
->src1
= t_src(vp
, &src
[1]);
776 o_inst
->src2
= UNUSED_SRC_1
;
779 case OPCODE_SUB
://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
780 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
, t_dst(&vpi
->DstReg
),
781 t_dst_mask(vpi
->DstReg
.WriteMask
));
783 o_inst
->src0
= t_src(vp
, &src
[0]);
784 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
785 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
786 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)),
787 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)),
788 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)),
789 t_src_class(src
[1].File
),
790 (!src
[1].NegateBase
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[1].RelAddr
<< 4);
791 o_inst
->src2
= UNUSED_SRC_1
;
794 case OPCODE_ABS
://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
795 o_inst
->op
=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX
, t_dst(&vpi
->DstReg
),
796 t_dst_mask(vpi
->DstReg
.WriteMask
));
798 o_inst
->src0
=t_src(vp
, &src
[0]);
799 o_inst
->src1
=MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
800 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
801 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
802 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
803 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)),
804 t_src_class(src
[0].File
),
805 (!src
[0].NegateBase
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[0].RelAddr
<< 4);
806 o_inst
->src2
= UNUSED_SRC_1
;
810 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
811 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
813 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_FRC
,
814 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
815 t_dst_mask(vpi
->DstReg
.WriteMask
));
817 o_inst
->src0
= t_src(vp
, &src
[0]);
818 o_inst
->src1
= UNUSED_SRC_0
;
819 o_inst
->src2
= UNUSED_SRC_1
;
822 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_ADD
, t_dst(&vpi
->DstReg
),
823 t_dst_mask(vpi
->DstReg
.WriteMask
));
825 o_inst
->src0
= t_src(vp
, &src
[0]);
826 o_inst
->src1
= MAKE_VSF_SOURCE(u_temp_i
,
832 /* Not 100% sure about this */
833 (!src
[0].NegateBase
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
/*VSF_FLAG_ALL*/);
835 o_inst
->src2
= UNUSED_SRC_0
;
840 /* mul r0, r1.yzxw, r2.zxyw
841 mad r0, -r2.yzxw, r1.zxyw, r0
842 NOTE: might need MAD_2
845 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_MUL
,
846 (u_temp_i
<< R200_VPI_OUT_REG_INDEX_SHIFT
) | R200_VSF_OUT_CLASS_TMP
,
847 t_dst_mask(vpi
->DstReg
.WriteMask
));
849 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
850 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
851 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)), // z
852 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
853 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
854 t_src_class(src
[0].File
),
855 src
[0].NegateBase
) | (src
[0].RelAddr
<< 4);
857 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
858 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), // z
859 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), // x
860 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)), // y
861 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)), // w
862 t_src_class(src
[1].File
),
863 src
[1].NegateBase
) | (src
[1].RelAddr
<< 4);
865 o_inst
->src2
= UNUSED_SRC_1
;
869 o_inst
->op
= MAKE_VSF_OP(R200_VPI_OUT_OP_MAD
, t_dst(&vpi
->DstReg
),
870 t_dst_mask(vpi
->DstReg
.WriteMask
));
872 o_inst
->src0
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
873 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)), // y
874 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), // z
875 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), // x
876 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)), // w
877 t_src_class(src
[1].File
),
878 (!src
[1].NegateBase
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) | (src
[1].RelAddr
<< 4);
880 o_inst
->src1
= MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
881 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)), // z
882 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
883 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
884 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
885 t_src_class(src
[0].File
),
886 src
[0].NegateBase
) | (src
[0].RelAddr
<< 4);
888 o_inst
->src2
= MAKE_VSF_SOURCE(u_temp_i
+1,
903 o_inst
->op
= MAKE_VSF_OP(t_opcode(vpi
->Opcode
), t_dst(&vpi
->DstReg
),
904 t_dst_mask(vpi
->DstReg
.WriteMask
));
909 o_inst
->src0
= t_src_scalar(vp
, &src
[0]);
910 o_inst
->src1
= UNUSED_SRC_0
;
911 o_inst
->src2
= UNUSED_SRC_1
;
915 o_inst
->src0
= t_src_scalar(vp
, &src
[0]);
916 o_inst
->src1
= t_src_scalar(vp
, &src
[1]);
917 o_inst
->src2
= UNUSED_SRC_1
;
921 o_inst
->src0
= t_src_scalar(vp
, &src
[0]);
922 o_inst
->src1
= t_src_scalar(vp
, &src
[1]);
923 o_inst
->src2
= t_src_scalar(vp
, &src
[2]);
927 fprintf(stderr
, "illegal number of operands %lu\n", operands
);
934 o_inst
->src0
= t_src(vp
, &src
[0]);
935 o_inst
->src1
= UNUSED_SRC_0
;
936 o_inst
->src2
= UNUSED_SRC_1
;
940 o_inst
->src0
= t_src(vp
, &src
[0]);
941 o_inst
->src1
= t_src(vp
, &src
[1]);
942 o_inst
->src2
= UNUSED_SRC_1
;
946 o_inst
->src0
= t_src(vp
, &src
[0]);
947 o_inst
->src1
= t_src(vp
, &src
[1]);
948 o_inst
->src2
= t_src(vp
, &src
[2]);
952 fprintf(stderr
, "illegal number of operands %lu\n", operands
);
958 if ((o_inst
->op
& R200_VSF_OUT_CLASS_MASK
) == R200_VSF_OUT_CLASS_RESULT_POS
) {
959 vp
->pos_end
= (o_inst
- vp
->instr
);
963 /* need to test again since some instructions require more than one (up to 3) native inst */
964 if(o_inst
- vp
->instr
> R200_VSF_MAX_INST
) {
965 mesa_vp
->Base
.NumNativeInstructions
= 129;
966 if (R200_DEBUG
& DEBUG_FALLBACKS
) {
967 fprintf(stderr
, "more than 128 native instructions\n");
971 vp
->native
= GL_TRUE
;
972 mesa_vp
->Base
.NumNativeInstructions
= (o_inst
- vp
->instr
);
974 fprintf(stderr
, "hw program:\n");
975 for(i
=0; i
< vp
->program
.length
; i
++)
976 fprintf(stderr
, "%08x\n", vp
->instr
[i
]);
981 void r200SetupVertexProg( GLcontext
*ctx
) {
982 r200ContextPtr rmesa
= R200_CONTEXT(ctx
);
983 struct r200_vertex_program
*vp
= (struct r200_vertex_program
*)ctx
->VertexProgram
.Current
;
987 if (!vp
->translated
) {
988 rmesa
->curr_vp_hw
= NULL
;
989 r200_translate_vertex_program(vp
);
991 /* could optimize setting up vertex progs away for non-tcl hw */
992 fallback
= !(vp
->native
&& r200VertexProgUpdateParams(ctx
, vp
) &&
993 rmesa
->r200Screen
->drmSupportsVertexProgram
);
994 TCL_FALLBACK(ctx
, R200_TCL_FALLBACK_VERTEX_PROGRAM
, fallback
);
995 if (fallback
) return;
997 R200_STATECHANGE( rmesa
, pvs
);
999 rmesa
->hw
.pvs
.cmd
[PVS_CNTL_1
] = (0 << R200_PVS_CNTL_1_PROGRAM_START_SHIFT
) |
1000 ((vp
->mesa_program
.Base
.NumNativeInstructions
- 1) << R200_PVS_CNTL_1_PROGRAM_END_SHIFT
) |
1001 (vp
->pos_end
<< R200_PVS_CNTL_1_POS_END_SHIFT
);
1002 rmesa
->hw
.pvs
.cmd
[PVS_CNTL_2
] = (0 << R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT
) |
1003 (vp
->mesa_program
.Base
.NumNativeParameters
<< R200_PVS_CNTL_2_PARAM_COUNT_SHIFT
);
1005 /* maybe user clip planes just work with vertex progs... untested */
1006 if (ctx
->Transform
.ClipPlanesEnabled
) {
1007 R200_STATECHANGE( rmesa
, tcl
);
1008 if (vp
->mesa_program
.IsPositionInvariant
) {
1009 rmesa
->hw
.tcl
.cmd
[TCL_UCP_VERT_BLEND_CTL
] |= (ctx
->Transform
.ClipPlanesEnabled
<< 2);
1012 rmesa
->hw
.tcl
.cmd
[TCL_UCP_VERT_BLEND_CTL
] &= ~(0xfc);
1016 if (vp
!= rmesa
->curr_vp_hw
) {
1017 GLuint count
= vp
->mesa_program
.Base
.NumNativeInstructions
;
1018 drm_radeon_cmd_header_t tmp
;
1020 R200_STATECHANGE( rmesa
, vpi
[0] );
1021 R200_STATECHANGE( rmesa
, vpi
[1] );
1023 /* FIXME: what about using a memcopy... */
1024 for (i
= 0; (i
< 64) && i
< count
; i
++) {
1025 rmesa
->hw
.vpi
[0].cmd
[VPI_OPDST_0
+ 4 * i
] = vp
->instr
[i
].op
;
1026 rmesa
->hw
.vpi
[0].cmd
[VPI_SRC0_0
+ 4 * i
] = vp
->instr
[i
].src0
;
1027 rmesa
->hw
.vpi
[0].cmd
[VPI_SRC1_0
+ 4 * i
] = vp
->instr
[i
].src1
;
1028 rmesa
->hw
.vpi
[0].cmd
[VPI_SRC2_0
+ 4 * i
] = vp
->instr
[i
].src2
;
1030 /* hack up the cmd_size so not the whole state atom is emitted always.
1031 This may require some more thought, we may emit half progs on lost state, but
1032 hopefully it won't matter?
1033 WARNING: must not use R200_DB_STATECHANGE, this will produce bogus (and rejected)
1034 packet emits (due to the mismatched cmd_size and count in cmd/last_cmd) */
1035 rmesa
->hw
.vpi
[0].cmd_size
= 1 + 4 * ((count
> 64) ? 64 : count
);
1036 tmp
.i
= rmesa
->hw
.vpi
[0].cmd
[VPI_CMD_0
];
1037 tmp
.veclinear
.count
= (count
> 64) ? 64 : count
;
1038 rmesa
->hw
.vpi
[0].cmd
[VPI_CMD_0
] = tmp
.i
;
1040 for (i
= 0; i
< (count
- 64); i
++) {
1041 rmesa
->hw
.vpi
[1].cmd
[VPI_OPDST_0
+ 4 * i
] = vp
->instr
[i
+ 64].op
;
1042 rmesa
->hw
.vpi
[1].cmd
[VPI_SRC0_0
+ 4 * i
] = vp
->instr
[i
+ 64].src0
;
1043 rmesa
->hw
.vpi
[1].cmd
[VPI_SRC1_0
+ 4 * i
] = vp
->instr
[i
+ 64].src1
;
1044 rmesa
->hw
.vpi
[1].cmd
[VPI_SRC2_0
+ 4 * i
] = vp
->instr
[i
+ 64].src2
;
1046 rmesa
->hw
.vpi
[1].cmd_size
= 1 + 4 * (count
- 64);
1047 tmp
.i
= rmesa
->hw
.vpi
[1].cmd
[VPI_CMD_0
];
1048 tmp
.veclinear
.count
= count
- 64;
1049 rmesa
->hw
.vpi
[1].cmd
[VPI_CMD_0
] = tmp
.i
;
1051 rmesa
->curr_vp_hw
= vp
;
1057 r200BindProgram(GLcontext
*ctx
, GLenum target
, struct gl_program
*prog
)
1059 r200ContextPtr rmesa
= R200_CONTEXT(ctx
);
1062 case GL_VERTEX_PROGRAM_ARB
:
1063 rmesa
->curr_vp_hw
= NULL
;
1066 _mesa_problem(ctx
, "Target not supported yet!");
1071 static struct gl_program
*
1072 r200NewProgram(GLcontext
*ctx
, GLenum target
, GLuint id
)
1074 struct r200_vertex_program
*vp
;
1077 case GL_VERTEX_PROGRAM_ARB
:
1078 vp
= CALLOC_STRUCT(r200_vertex_program
);
1079 return _mesa_init_vertex_program(ctx
, &vp
->mesa_program
, target
, id
);
1080 case GL_FRAGMENT_PROGRAM_ARB
:
1081 case GL_FRAGMENT_PROGRAM_NV
:
1082 return _mesa_init_fragment_program( ctx
, CALLOC_STRUCT(gl_fragment_program
), target
, id
);
1084 _mesa_problem(ctx
, "Bad target in r200NewProgram");
1091 r200DeleteProgram(GLcontext
*ctx
, struct gl_program
*prog
)
1093 _mesa_delete_program(ctx
, prog
);
1097 r200ProgramStringNotify(GLcontext
*ctx
, GLenum target
, struct gl_program
*prog
)
1099 struct r200_vertex_program
*vp
= (void *)prog
;
1102 case GL_VERTEX_PROGRAM_ARB
:
1103 vp
->translated
= GL_FALSE
;
1104 memset(&vp
->translated
, 0, sizeof(struct r200_vertex_program
) - sizeof(struct gl_vertex_program
));
1105 /*r200_translate_vertex_shader(vp);*/
1108 /* need this for tcl fallbacks */
1109 _tnl_program_string(ctx
, target
, prog
);
1113 r200IsProgramNative(GLcontext
*ctx
, GLenum target
, struct gl_program
*prog
)
1115 struct r200_vertex_program
*vp
= (void *)prog
;
1118 case GL_VERTEX_STATE_PROGRAM_NV
:
1119 case GL_VERTEX_PROGRAM_ARB
:
1120 if (!vp
->translated
) {
1121 r200_translate_vertex_program(vp
);
1123 /* does not take parameters etc. into account */
1126 _mesa_problem(ctx
, "Bad target in r200NewProgram");
1131 void r200InitShaderFuncs(struct dd_function_table
*functions
)
1133 functions
->NewProgram
= r200NewProgram
;
1134 functions
->BindProgram
= r200BindProgram
;
1135 functions
->DeleteProgram
= r200DeleteProgram
;
1136 functions
->ProgramStringNotify
= r200ProgramStringNotify
;
1137 functions
->IsProgramNative
= r200IsProgramNative
;