Minor r200 vertex program cleanups. Remove disabled leftovers from r300 vertex progra...
[mesa.git] / src / mesa / drivers / dri / r200 / r200_vertprog.c
1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Aapo Tahkola <aet@rasterburn.org>
31 * Roland Scheidegger <rscheidegger_lists@hispeed.ch>
32 */
33 #include "glheader.h"
34 #include "macros.h"
35 #include "enums.h"
36 #include "program.h"
37
38 #include "r200_context.h"
39 #include "r200_vertprog.h"
40 #include "r200_ioctl.h"
41 #include "r200_tcl.h"
42 #include "program_instruction.h"
43 #include "tnl/tnl.h"
44
45 #if SWIZZLE_X != VSF_IN_COMPONENT_X || \
46 SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
47 SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
48 SWIZZLE_W != VSF_IN_COMPONENT_W || \
49 SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
50 SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
51 WRITEMASK_X != VSF_FLAG_X || \
52 WRITEMASK_Y != VSF_FLAG_Y || \
53 WRITEMASK_Z != VSF_FLAG_Z || \
54 WRITEMASK_W != VSF_FLAG_W
55 #error Cannot change these!
56 #endif
57
58 #define SCALAR_FLAG (1<<31)
59 #define FLAG_MASK (1<<31)
60 #define OP_MASK (0xf) /* we are unlikely to have more than 15 */
61 #define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
62
63 static struct{
64 char *name;
65 int opcode;
66 unsigned long ip; /* number of input operands and flags */
67 }op_names[]={
68 OPN(ABS, 1),
69 OPN(ADD, 2),
70 OPN(ARL, 1|SCALAR_FLAG),
71 OPN(DP3, 2),
72 OPN(DP4, 2),
73 OPN(DPH, 2),
74 OPN(DST, 2),
75 OPN(EX2, 1|SCALAR_FLAG),
76 OPN(EXP, 1|SCALAR_FLAG),
77 OPN(FLR, 1),
78 OPN(FRC, 1),
79 OPN(LG2, 1|SCALAR_FLAG),
80 OPN(LIT, 1),
81 OPN(LOG, 1|SCALAR_FLAG),
82 OPN(MAD, 3),
83 OPN(MAX, 2),
84 OPN(MIN, 2),
85 OPN(MOV, 1),
86 OPN(MUL, 2),
87 OPN(POW, 2|SCALAR_FLAG),
88 OPN(RCP, 1|SCALAR_FLAG),
89 OPN(RSQ, 1|SCALAR_FLAG),
90 OPN(SGE, 2),
91 OPN(SLT, 2),
92 OPN(SUB, 2),
93 OPN(SWZ, 1),
94 OPN(XPD, 2),
95 OPN(PRINT, 0),
96 OPN(END, 0),
97 };
98 #undef OPN
99
100 static GLboolean r200VertexProgUpdateParams(GLcontext *ctx, struct r200_vertex_program *vp)
101 {
102 r200ContextPtr rmesa = R200_CONTEXT( ctx );
103 GLfloat *fcmd = (GLfloat *)&rmesa->hw.vpp[0].cmd[VPP_CMD_0 + 1];
104 int pi;
105 struct gl_vertex_program *mesa_vp = &vp->mesa_program;
106 struct gl_program_parameter_list *paramList;
107 drm_radeon_cmd_header_t tmp;
108
109 R200_STATECHANGE( rmesa, vpp[0] );
110 R200_STATECHANGE( rmesa, vpp[1] );
111 assert(mesa_vp->Base.Parameters);
112 _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
113 paramList = mesa_vp->Base.Parameters;
114
115 if(paramList->NumParameters > R200_VSF_MAX_PARAM){
116 fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
117 return GL_FALSE;
118 }
119
120 for(pi = 0; pi < paramList->NumParameters; pi++) {
121 switch(paramList->Parameters[pi].Type) {
122 case PROGRAM_STATE_VAR:
123 case PROGRAM_NAMED_PARAM:
124 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
125 case PROGRAM_CONSTANT:
126 *fcmd++ = paramList->ParameterValues[pi][0];
127 *fcmd++ = paramList->ParameterValues[pi][1];
128 *fcmd++ = paramList->ParameterValues[pi][2];
129 *fcmd++ = paramList->ParameterValues[pi][3];
130 break;
131 default:
132 _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__);
133 break;
134 }
135 if (pi == 95) {
136 fcmd = (GLfloat *)&rmesa->hw.vpp[1].cmd[VPP_CMD_0 + 1];
137 }
138 }
139 /* hack up the cmd_size so not the whole state atom is emitted always. */
140 rmesa->hw.vpp[0].cmd_size =
141 1 + 4 * ((paramList->NumParameters > 96) ? 96 : paramList->NumParameters);
142 tmp.i = rmesa->hw.vpp[0].cmd[VPP_CMD_0];
143 tmp.veclinear.count = (paramList->NumParameters > 96) ? 96 : paramList->NumParameters;
144 rmesa->hw.vpp[0].cmd[VPP_CMD_0] = tmp.i;
145 if (paramList->NumParameters > 96) {
146 rmesa->hw.vpp[1].cmd_size = 1 + 4 * (paramList->NumParameters - 96);
147 tmp.i = rmesa->hw.vpp[1].cmd[VPP_CMD_0];
148 tmp.veclinear.count = paramList->NumParameters - 96;
149 rmesa->hw.vpp[1].cmd[VPP_CMD_0] = tmp.i;
150 }
151 return GL_TRUE;
152 }
153
154 static __inline unsigned long t_dst_mask(GLuint mask)
155 {
156 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
157 return mask & VSF_FLAG_ALL;
158 }
159
160 static unsigned long t_dst(struct prog_dst_register *dst)
161 {
162 switch(dst->File) {
163 case PROGRAM_TEMPORARY:
164 return ((dst->Index << R200_VPI_OUT_REG_INDEX_SHIFT)
165 | R200_VSF_OUT_CLASS_TMP);
166 case PROGRAM_OUTPUT:
167 switch (dst->Index) {
168 case VERT_RESULT_HPOS:
169 return R200_VSF_OUT_CLASS_RESULT_POS;
170 case VERT_RESULT_COL0:
171 return R200_VSF_OUT_CLASS_RESULT_COLOR;
172 case VERT_RESULT_COL1:
173 return ((1 << R200_VPI_OUT_REG_INDEX_SHIFT)
174 | R200_VSF_OUT_CLASS_RESULT_COLOR);
175 case VERT_RESULT_FOGC:
176 return R200_VSF_OUT_CLASS_RESULT_FOGC;
177 case VERT_RESULT_TEX0:
178 case VERT_RESULT_TEX1:
179 case VERT_RESULT_TEX2:
180 case VERT_RESULT_TEX3:
181 case VERT_RESULT_TEX4:
182 case VERT_RESULT_TEX5:
183 return (((dst->Index - VERT_RESULT_TEX0) << R200_VPI_OUT_REG_INDEX_SHIFT)
184 | R200_VSF_OUT_CLASS_RESULT_TEXC);
185 case VERT_RESULT_PSIZ:
186 return R200_VSF_OUT_CLASS_RESULT_POINTSIZE;
187 default:
188 fprintf(stderr, "problem in %s, unknown dst output reg %d\n", __FUNCTION__, dst->Index);
189 exit(0);
190 return 0;
191 }
192 case PROGRAM_ADDRESS:
193 assert (dst->Index == 0);
194 return R200_VSF_OUT_CLASS_ADDR;
195 default:
196 fprintf(stderr, "problem in %s, unknown register type %d\n", __FUNCTION__, dst->File);
197 exit(0);
198 return 0;
199 }
200 }
201
202 static unsigned long t_src_class(enum register_file file)
203 {
204
205 switch(file){
206 case PROGRAM_TEMPORARY:
207 return VSF_IN_CLASS_TMP;
208
209 case PROGRAM_INPUT:
210 return VSF_IN_CLASS_ATTR;
211
212 case PROGRAM_LOCAL_PARAM:
213 case PROGRAM_ENV_PARAM:
214 case PROGRAM_NAMED_PARAM:
215 case PROGRAM_STATE_VAR:
216 return VSF_IN_CLASS_PARAM;
217 /*
218 case PROGRAM_OUTPUT:
219 case PROGRAM_WRITE_ONLY:
220 case PROGRAM_ADDRESS:
221 */
222 default:
223 fprintf(stderr, "problem in %s", __FUNCTION__);
224 exit(0);
225 }
226 }
227
228 static __inline unsigned long t_swizzle(GLubyte swizzle)
229 {
230 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
231 return swizzle;
232 }
233
234 #if 0
235 static void vp_dump_inputs(struct r200_vertex_program *vp, char *caller)
236 {
237 int i;
238
239 if(vp == NULL){
240 fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, caller);
241 return ;
242 }
243
244 fprintf(stderr, "%s:<", caller);
245 for(i=0; i < VERT_ATTRIB_MAX; i++)
246 fprintf(stderr, "%d ", vp->inputs[i]);
247 fprintf(stderr, ">\n");
248
249 }
250 #endif
251
252 static unsigned long t_src_index(struct r200_vertex_program *vp, struct prog_src_register *src)
253 {
254 /*
255 int i;
256 int max_reg = -1;
257 */
258 if(src->File == PROGRAM_INPUT){
259 /* if(vp->inputs[src->Index] != -1)
260 return vp->inputs[src->Index];
261
262 for(i=0; i < VERT_ATTRIB_MAX; i++)
263 if(vp->inputs[i] > max_reg)
264 max_reg = vp->inputs[i];
265
266 vp->inputs[src->Index] = max_reg+1;*/
267
268 //vp_dump_inputs(vp, __FUNCTION__);
269 assert(vp->inputs[src->Index] != -1);
270 return vp->inputs[src->Index];
271 } else {
272 if (src->Index < 0) {
273 fprintf(stderr, "WARNING negative offsets for indirect addressing do not work\n");
274 return 0;
275 }
276 return src->Index;
277 }
278 }
279
280 static unsigned long t_src(struct r200_vertex_program *vp, struct prog_src_register *src)
281 {
282
283 return MAKE_VSF_SOURCE(t_src_index(vp, src),
284 t_swizzle(GET_SWZ(src->Swizzle, 0)),
285 t_swizzle(GET_SWZ(src->Swizzle, 1)),
286 t_swizzle(GET_SWZ(src->Swizzle, 2)),
287 t_swizzle(GET_SWZ(src->Swizzle, 3)),
288 t_src_class(src->File),
289 src->NegateBase) | (src->RelAddr << 4);
290 }
291
292 static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_src_register *src)
293 {
294
295 return MAKE_VSF_SOURCE(t_src_index(vp, src),
296 t_swizzle(GET_SWZ(src->Swizzle, 0)),
297 t_swizzle(GET_SWZ(src->Swizzle, 0)),
298 t_swizzle(GET_SWZ(src->Swizzle, 0)),
299 t_swizzle(GET_SWZ(src->Swizzle, 0)),
300 t_src_class(src->File),
301 src->NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4);
302 }
303
304 static unsigned long t_opcode(enum prog_opcode opcode)
305 {
306
307 switch(opcode){
308 case OPCODE_ADD: return R200_VPI_OUT_OP_ADD;
309 /* FIXME: ARL works fine, but negative offsets won't work - fglrx just
310 * seems to ignore neg offsets which isn't quite correct...
311 */
312 case OPCODE_ARL: return R200_VPI_OUT_OP_ARL;
313 case OPCODE_DP4: return R200_VPI_OUT_OP_DOT;
314 case OPCODE_DST: return R200_VPI_OUT_OP_DST;
315 case OPCODE_EX2: return R200_VPI_OUT_OP_EX2;
316 case OPCODE_EXP: return R200_VPI_OUT_OP_EXP;
317 case OPCODE_FRC: return R200_VPI_OUT_OP_FRC;
318 case OPCODE_LG2: return R200_VPI_OUT_OP_LG2;
319 case OPCODE_LIT: return R200_VPI_OUT_OP_LIT;
320 case OPCODE_LOG: return R200_VPI_OUT_OP_LOG;
321 case OPCODE_MAX: return R200_VPI_OUT_OP_MAX;
322 case OPCODE_MIN: return R200_VPI_OUT_OP_MIN;
323 case OPCODE_MUL: return R200_VPI_OUT_OP_MUL;
324 case OPCODE_RCP: return R200_VPI_OUT_OP_RCP;
325 case OPCODE_RSQ: return R200_VPI_OUT_OP_RSQ;
326 case OPCODE_SGE: return R200_VPI_OUT_OP_SGE;
327 case OPCODE_SLT: return R200_VPI_OUT_OP_SLT;
328
329 default:
330 fprintf(stderr, "%s: Should not be called with opcode %d!", __FUNCTION__, opcode);
331 }
332 exit(-1);
333 return 0;
334 }
335
336 static unsigned long op_operands(enum prog_opcode opcode)
337 {
338 int i;
339
340 /* Can we trust mesas opcodes to be in order ? */
341 for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++)
342 if(op_names[i].opcode == opcode)
343 return op_names[i].ip;
344
345 fprintf(stderr, "op %d not found in op_names\n", opcode);
346 exit(-1);
347 return 0;
348 }
349
350 /* TODO: Get rid of t_src_class call */
351 #define CMP_SRCS(a, b) (((a.RelAddr != b.RelAddr) || (a.Index != b.Index)) && \
352 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
353 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
354 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
355 t_src_class(b.File) == VSF_IN_CLASS_ATTR))) \
356
357 /* fglrx on rv250 codes up unused sources as follows:
358 unused but necessary sources are same as previous source, zero-ed out.
359 unnecessary sources are same as previous source but with VSF_IN_CLASS_NONE set.
360 i.e. an add (2 args) has its 2nd arg (if you use it as mov) zero-ed out, and 3rd arg
361 set to VSF_IN_CLASS_NONE. Not sure if strictly necessary. */
362
363 /* use these simpler definitions. Must obviously not be used with not yet set up regs.
364 Those are NOT semantically equivalent to the r300 ones, requires code changes */
365 #define ZERO_SRC_0 (((o_inst->src0 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
366 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
367 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
368 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
369 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
370
371 #define ZERO_SRC_1 (((o_inst->src1 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
372 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
373 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
374 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
375 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
376
377 #define ZERO_SRC_2 (((o_inst->src2 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
378 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
379 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
380 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
381 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
382
383 #define UNUSED_SRC_0 ((o_inst->src0 & ~15) | 9)
384
385 #define UNUSED_SRC_1 ((o_inst->src1 & ~15) | 9)
386
387 #define UNUSED_SRC_2 ((o_inst->src2 & ~15) | 9)
388
389
390 /* DP4 version seems to trigger some hw peculiarity - fglrx does this on r200 however */
391 #define PREFER_DP4
392
393
394 /**
395 * Generate an R200 vertex program from Mesa's internal representation.
396 *
397 * \return GL_TRUE for success, GL_FALSE for failure.
398 */
399 static GLboolean r200_translate_vertex_program(struct r200_vertex_program *vp)
400 {
401 struct gl_vertex_program *mesa_vp = &vp->mesa_program;
402 struct prog_instruction *vpi;
403 int i;
404 VERTEX_SHADER_INSTRUCTION *o_inst;
405 unsigned long operands;
406 int are_srcs_scalar;
407 unsigned long hw_op;
408
409 vp->native = GL_FALSE;
410
411 if (mesa_vp->Base.NumInstructions == 0)
412 return GL_FALSE;
413
414 if ((mesa_vp->Base.InputsRead &
415 ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 |
416 VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 |
417 VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) != 0) {
418 if (R200_DEBUG & DEBUG_FALLBACKS) {
419 fprintf(stderr, "can't handle vert prog inputs 0x%x\n",
420 mesa_vp->Base.InputsRead);
421 }
422 return GL_FALSE;
423 }
424
425 if (mesa_vp->IsNVProgram) {
426 /* subtle differences in spec like guaranteed initialized regs could cause
427 headaches. Might want to remove the driconf option to enable it completely */
428 return GL_FALSE;
429 }
430 /* Initial value should be last tmp reg that hw supports.
431 Strangely enough r300 doesnt mind even though these would be out of range.
432 Smart enough to realize that it doesnt need it? */
433 int u_temp_i = R200_VSF_MAX_TEMPS - 1;
434 struct prog_src_register src[3];
435
436 /* FIXME: is changing the prog safe to do here? */
437 if (mesa_vp->IsPositionInvariant) {
438 struct gl_program_parameter_list *paramList;
439 GLint tokens[6] = { STATE_MATRIX, STATE_MVP, 0, 0, 0, STATE_MATRIX };
440
441 #ifdef PREFER_DP4
442 tokens[5] = STATE_MATRIX;
443 #else
444 tokens[5] = STATE_MATRIX_TRANSPOSE;
445 #endif
446 paramList = mesa_vp->Base.Parameters;
447
448 vpi = malloc((mesa_vp->Base.NumInstructions + 4) * sizeof(struct prog_instruction));
449 memset(vpi, 0, 4 * sizeof(struct prog_instruction));
450
451 /* emit four dot product instructions to do MVP transformation */
452 for (i=0; i < 4; i++) {
453 GLint idx;
454 tokens[3] = tokens[4] = i;
455 idx = _mesa_add_state_reference(paramList, tokens);
456 #ifdef PREFER_DP4
457 vpi[i].Opcode = OPCODE_DP4;
458 vpi[i].StringPos = 0;
459 vpi[i].Data = 0;
460
461 vpi[i].DstReg.File = PROGRAM_OUTPUT;
462 vpi[i].DstReg.Index = VERT_RESULT_HPOS;
463 vpi[i].DstReg.WriteMask = 1 << i;
464 vpi[i].DstReg.CondMask = COND_TR;
465
466 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
467 vpi[i].SrcReg[0].Index = idx;
468 vpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
469
470 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
471 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
472 vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
473 #else
474 if (i == 0)
475 vpi[i].Opcode = OPCODE_MUL;
476 else
477 vpi[i].Opcode = OPCODE_MAD;
478
479 vpi[i].StringPos = 0;
480 vpi[i].Data = 0;
481
482 if (i == 3)
483 vpi[i].DstReg.File = PROGRAM_OUTPUT;
484 else
485 vpi[i].DstReg.File = PROGRAM_TEMPORARY;
486 vpi[i].DstReg.Index = 0;
487 vpi[i].DstReg.WriteMask = 0xf;
488 vpi[i].DstReg.CondMask = COND_TR;
489
490 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
491 vpi[i].SrcReg[0].Index = idx;
492 vpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
493
494 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
495 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
496 vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i);
497
498 if (i > 0) {
499 vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY;
500 vpi[i].SrcReg[2].Index = 0;
501 vpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
502 }
503 #endif
504 }
505
506 /* now append original program after our new instructions */
507 memcpy(&vpi[i], mesa_vp->Base.Instructions, mesa_vp->Base.NumInstructions * sizeof(struct prog_instruction));
508
509 /* deallocate original program */
510 free(mesa_vp->Base.Instructions);
511
512 /* install new program */
513 mesa_vp->Base.Instructions = vpi;
514
515 mesa_vp->Base.NumInstructions += 4;
516 vpi = &mesa_vp->Base.Instructions[mesa_vp->Base.NumInstructions-1];
517
518 assert(vpi->Opcode == OPCODE_END);
519
520 mesa_vp->Base.InputsRead |= (1 << VERT_ATTRIB_POS);
521 mesa_vp->Base.OutputsWritten |= (1 << VERT_RESULT_HPOS);
522
523 //fprintf(stderr, "IsPositionInvariant is set!\n");
524 //_mesa_print_program(&mesa_vp->Base);
525 }
526
527 vp->pos_end = 0;
528 mesa_vp->Base.NumNativeInstructions = 0;
529 if (mesa_vp->Base.Parameters)
530 mesa_vp->Base.NumNativeParameters = mesa_vp->Base.Parameters->NumParameters;
531 else
532 mesa_vp->Base.NumNativeParameters = 0;
533
534 for(i=0; i < VERT_ATTRIB_MAX; i++)
535 vp->inputs[i] = -1;
536 /* fglrx uses fixed inputs as follows for conventional attribs.
537 generic attribs use non-fixed assignment, fglrx will always use the lowest attrib values available.
538 There are 12 generic attribs possible, corresponding to attrib 0, 2-11 and 13 in a hw vertex prog.
539 attr 1 and 12 are not available for generic attribs as those cannot be made vec4 (correspond to
540 vertex normal/weight)
541 attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
542 attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
543 attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
544 attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
545 generic attribs would require some more work (dma regions, renaming). */
546
547 /* may look different when using idx buf / input_route instead of se_vtx_fmt? */
548 vp->inputs[VERT_ATTRIB_POS] = 0;
549 vp->inputs[VERT_ATTRIB_WEIGHT] = 12;
550 vp->inputs[VERT_ATTRIB_NORMAL] = 1;
551 vp->inputs[VERT_ATTRIB_COLOR0] = 2;
552 vp->inputs[VERT_ATTRIB_COLOR1] = 3;
553 vp->inputs[VERT_ATTRIB_FOG] = 15;
554 vp->inputs[VERT_ATTRIB_TEX0] = 6;
555 vp->inputs[VERT_ATTRIB_TEX1] = 7;
556 vp->inputs[VERT_ATTRIB_TEX2] = 8;
557 vp->inputs[VERT_ATTRIB_TEX3] = 9;
558 vp->inputs[VERT_ATTRIB_TEX4] = 10;
559 vp->inputs[VERT_ATTRIB_TEX5] = 11;
560 /* attr 4,5 and 13 are only used with generic attribs.
561 Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
562 not possibe to use with vertex progs as it is lacking in vert prog specification) */
563
564 assert(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS));
565
566 vp->translated = GL_TRUE;
567
568 o_inst = vp->instr;
569 for(vpi = mesa_vp->Base.Instructions; vpi->Opcode != OPCODE_END; vpi++, o_inst++){
570 operands = op_operands(vpi->Opcode);
571 are_srcs_scalar = operands & SCALAR_FLAG;
572 operands &= OP_MASK;
573
574 for(i = 0; i < operands; i++)
575 src[i] = vpi->SrcReg[i];
576
577 if(operands == 3){
578 if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
579 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
580 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
581 VSF_FLAG_ALL);
582
583 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
584 SWIZZLE_X, SWIZZLE_Y,
585 SWIZZLE_Z, SWIZZLE_W,
586 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4);
587
588 o_inst->src1 = ZERO_SRC_0;
589 o_inst->src2 = UNUSED_SRC_1;
590 o_inst++;
591
592 src[2].File = PROGRAM_TEMPORARY;
593 src[2].Index = u_temp_i;
594 src[2].RelAddr = 0;
595 u_temp_i--;
596 }
597 }
598
599 if(operands >= 2){
600 if( CMP_SRCS(src[1], src[0]) ){
601 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
602 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
603 VSF_FLAG_ALL);
604
605 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
606 SWIZZLE_X, SWIZZLE_Y,
607 SWIZZLE_Z, SWIZZLE_W,
608 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4);
609
610 o_inst->src1 = ZERO_SRC_0;
611 o_inst->src2 = UNUSED_SRC_1;
612 o_inst++;
613
614 src[0].File = PROGRAM_TEMPORARY;
615 src[0].Index = u_temp_i;
616 src[0].RelAddr = 0;
617 u_temp_i--;
618 }
619 }
620
621 /* These ops need special handling. */
622 switch(vpi->Opcode){
623 case OPCODE_POW:
624 /* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter).
625 So may need to insert additional instruction */
626 if ((src[0].File == src[1].File) &&
627 (src[0].Index == src[1].Index)) {
628 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&vpi->DstReg),
629 t_dst_mask(vpi->DstReg.WriteMask));
630 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
631 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
632 SWIZZLE_ZERO,
633 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
634 SWIZZLE_ZERO,
635 t_src_class(src[0].File),
636 src[0].NegateBase) | (src[0].RelAddr << 4);
637 o_inst->src1 = UNUSED_SRC_0;
638 o_inst->src2 = UNUSED_SRC_0;
639 }
640 else {
641 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
642 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
643 VSF_FLAG_ALL);
644 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
645 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
646 SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO,
647 t_src_class(src[0].File),
648 src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
649 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
650 SWIZZLE_ZERO, SWIZZLE_ZERO,
651 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO,
652 t_src_class(src[1].File),
653 src[1].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
654 o_inst->src2 = UNUSED_SRC_1;
655 o_inst++;
656
657 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&vpi->DstReg),
658 t_dst_mask(vpi->DstReg.WriteMask));
659 o_inst->src0 = MAKE_VSF_SOURCE(u_temp_i,
660 VSF_IN_COMPONENT_X,
661 VSF_IN_COMPONENT_Y,
662 VSF_IN_COMPONENT_Z,
663 VSF_IN_COMPONENT_W,
664 VSF_IN_CLASS_TMP,
665 VSF_FLAG_NONE);
666 o_inst->src1 = UNUSED_SRC_0;
667 o_inst->src2 = UNUSED_SRC_0;
668 u_temp_i--;
669 }
670 goto next;
671
672 case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
673 case OPCODE_SWZ:
674 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&vpi->DstReg),
675 t_dst_mask(vpi->DstReg.WriteMask));
676 o_inst->src0 = t_src(vp, &src[0]);
677 o_inst->src1 = ZERO_SRC_0;
678 o_inst->src2 = UNUSED_SRC_1;
679 goto next;
680
681 case OPCODE_MAD:
682 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
683 src[1].File == PROGRAM_TEMPORARY &&
684 src[2].File == PROGRAM_TEMPORARY) ? R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
685
686 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&vpi->DstReg),
687 t_dst_mask(vpi->DstReg.WriteMask));
688 o_inst->src0 = t_src(vp, &src[0]);
689 #if 0
690 if ((o_inst - vp->instr) == 31) {
691 /* fix up the broken vertex program of quake4 demo... */
692 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
693 SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X,
694 t_src_class(src[1].File),
695 src[1].NegateBase) | (src[1].RelAddr << 4);
696 o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
697 SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y,
698 t_src_class(src[1].File),
699 src[1].NegateBase) | (src[1].RelAddr << 4);
700 }
701 else {
702 o_inst->src1 = t_src(vp, &src[1]);
703 o_inst->src2 = t_src(vp, &src[2]);
704 }
705 #else
706 o_inst->src1 = t_src(vp, &src[1]);
707 o_inst->src2 = t_src(vp, &src[2]);
708 #endif
709 goto next;
710
711 case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
712 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&vpi->DstReg),
713 t_dst_mask(vpi->DstReg.WriteMask));
714
715 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
716 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
717 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
718 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
719 SWIZZLE_ZERO,
720 t_src_class(src[0].File),
721 src[0].NegateBase) | (src[0].RelAddr << 4);
722
723 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
724 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
725 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
726 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
727 SWIZZLE_ZERO,
728 t_src_class(src[1].File),
729 src[1].NegateBase) | (src[1].RelAddr << 4);
730
731 o_inst->src2 = UNUSED_SRC_1;
732 goto next;
733
734 case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
735 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&vpi->DstReg),
736 t_dst_mask(vpi->DstReg.WriteMask));
737
738 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
739 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
740 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
741 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
742 VSF_IN_COMPONENT_ONE,
743 t_src_class(src[0].File),
744 src[0].NegateBase) | (src[0].RelAddr << 4);
745 o_inst->src1 = t_src(vp, &src[1]);
746 o_inst->src2 = UNUSED_SRC_1;
747 goto next;
748
749 case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
750 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&vpi->DstReg),
751 t_dst_mask(vpi->DstReg.WriteMask));
752
753 o_inst->src0 = t_src(vp, &src[0]);
754 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
755 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
756 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
757 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
758 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
759 t_src_class(src[1].File),
760 (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
761 o_inst->src2 = UNUSED_SRC_1;
762 goto next;
763
764 case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
765 o_inst->op=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX, t_dst(&vpi->DstReg),
766 t_dst_mask(vpi->DstReg.WriteMask));
767
768 o_inst->src0=t_src(vp, &src[0]);
769 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
770 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
771 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
772 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
773 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
774 t_src_class(src[0].File),
775 (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
776 o_inst->src2 = UNUSED_SRC_1;
777 goto next;
778
779 case OPCODE_FLR:
780 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
781 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
782
783 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_FRC,
784 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
785 t_dst_mask(vpi->DstReg.WriteMask));
786
787 o_inst->src0 = t_src(vp, &src[0]);
788 o_inst->src1 = UNUSED_SRC_0;
789 o_inst->src2 = UNUSED_SRC_1;
790 o_inst++;
791
792 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&vpi->DstReg),
793 t_dst_mask(vpi->DstReg.WriteMask));
794
795 o_inst->src0 = t_src(vp, &src[0]);
796 o_inst->src1 = MAKE_VSF_SOURCE(u_temp_i,
797 VSF_IN_COMPONENT_X,
798 VSF_IN_COMPONENT_Y,
799 VSF_IN_COMPONENT_Z,
800 VSF_IN_COMPONENT_W,
801 VSF_IN_CLASS_TMP,
802 /* Not 100% sure about this */
803 (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
804
805 o_inst->src2 = UNUSED_SRC_0;
806 u_temp_i--;
807 goto next;
808
809 case OPCODE_XPD:
810 /* mul r0, r1.yzxw, r2.zxyw
811 mad r0, -r2.yzxw, r1.zxyw, r0
812 NOTE: might need MAD_2
813 */
814
815 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
816 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
817 t_dst_mask(vpi->DstReg.WriteMask));
818
819 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
820 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
821 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
822 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
823 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
824 t_src_class(src[0].File),
825 src[0].NegateBase) | (src[0].RelAddr << 4);
826
827 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
828 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
829 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
830 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
831 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
832 t_src_class(src[1].File),
833 src[1].NegateBase) | (src[1].RelAddr << 4);
834
835 o_inst->src2 = UNUSED_SRC_1;
836 o_inst++;
837 u_temp_i--;
838
839 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MAD, t_dst(&vpi->DstReg),
840 t_dst_mask(vpi->DstReg.WriteMask));
841
842 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
843 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
844 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
845 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
846 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
847 t_src_class(src[1].File),
848 (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
849
850 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
851 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
852 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
853 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
854 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
855 t_src_class(src[0].File),
856 src[0].NegateBase) | (src[0].RelAddr << 4);
857
858 o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1,
859 VSF_IN_COMPONENT_X,
860 VSF_IN_COMPONENT_Y,
861 VSF_IN_COMPONENT_Z,
862 VSF_IN_COMPONENT_W,
863 VSF_IN_CLASS_TMP,
864 VSF_FLAG_NONE);
865 goto next;
866
867 case OPCODE_END:
868 assert(0);
869 default:
870 break;
871 }
872
873 o_inst->op = MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst(&vpi->DstReg),
874 t_dst_mask(vpi->DstReg.WriteMask));
875
876 if(are_srcs_scalar){
877 switch(operands){
878 case 1:
879 o_inst->src0 = t_src_scalar(vp, &src[0]);
880 o_inst->src1 = UNUSED_SRC_0;
881 o_inst->src2 = UNUSED_SRC_1;
882 break;
883
884 case 2:
885 o_inst->src0 = t_src_scalar(vp, &src[0]);
886 o_inst->src1 = t_src_scalar(vp, &src[1]);
887 o_inst->src2 = UNUSED_SRC_1;
888 break;
889
890 case 3:
891 o_inst->src0 = t_src_scalar(vp, &src[0]);
892 o_inst->src1 = t_src_scalar(vp, &src[1]);
893 o_inst->src2 = t_src_scalar(vp, &src[2]);
894 break;
895
896 default:
897 fprintf(stderr, "illegal number of operands %lu\n", operands);
898 exit(-1);
899 break;
900 }
901 } else {
902 switch(operands){
903 case 1:
904 o_inst->src0 = t_src(vp, &src[0]);
905 o_inst->src1 = UNUSED_SRC_0;
906 o_inst->src2 = UNUSED_SRC_1;
907 break;
908
909 case 2:
910 o_inst->src0 = t_src(vp, &src[0]);
911 o_inst->src1 = t_src(vp, &src[1]);
912 o_inst->src2 = UNUSED_SRC_1;
913 break;
914
915 case 3:
916 o_inst->src0 = t_src(vp, &src[0]);
917 o_inst->src1 = t_src(vp, &src[1]);
918 o_inst->src2 = t_src(vp, &src[2]);
919 break;
920
921 default:
922 fprintf(stderr, "illegal number of operands %lu\n", operands);
923 exit(-1);
924 break;
925 }
926 }
927 next:
928 if (mesa_vp->Base.NumNativeTemporaries <
929 (mesa_vp->Base.NumTemporaries + (R200_VSF_MAX_TEMPS - 1 - u_temp_i))) {
930 mesa_vp->Base.NumNativeTemporaries =
931 mesa_vp->Base.NumTemporaries + (R200_VSF_MAX_TEMPS - 1 - u_temp_i);
932 }
933 if (u_temp_i < mesa_vp->Base.NumTemporaries) {
934 if (R200_DEBUG & DEBUG_FALLBACKS) {
935 fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_i);
936 }
937 return GL_FALSE;
938 }
939 u_temp_i = R200_VSF_MAX_TEMPS - 1;
940 if(o_inst - vp->instr >= R200_VSF_MAX_INST) {
941 mesa_vp->Base.NumNativeInstructions = 129;
942 if (R200_DEBUG & DEBUG_FALLBACKS) {
943 fprintf(stderr, "more than 128 native instructions\n");
944 }
945 return GL_FALSE;
946 }
947 if ((o_inst->op & R200_VSF_OUT_CLASS_MASK) == R200_VSF_OUT_CLASS_RESULT_POS) {
948 vp->pos_end = (o_inst - vp->instr);
949 }
950 }
951
952 vp->native = GL_TRUE;
953 mesa_vp->Base.NumNativeInstructions = (o_inst - vp->instr);
954 #if 0
955 fprintf(stderr, "hw program:\n");
956 for(i=0; i < vp->program.length; i++)
957 fprintf(stderr, "%08x\n", vp->instr[i]);
958 #endif
959 return GL_TRUE;
960 }
961
962 void r200SetupVertexProg( GLcontext *ctx ) {
963 r200ContextPtr rmesa = R200_CONTEXT(ctx);
964 struct r200_vertex_program *vp = (struct r200_vertex_program *)ctx->VertexProgram.Current;
965 GLboolean fallback;
966 GLint i;
967
968 if (!vp->translated) {
969 rmesa->curr_vp_hw = NULL;
970 r200_translate_vertex_program(vp);
971 }
972 /* could optimize setting up vertex progs away for non-tcl hw */
973 fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp) &&
974 rmesa->r200Screen->drmSupportsVertexProgram);
975 TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback);
976 if (fallback) return;
977
978 R200_STATECHANGE( rmesa, pvs );
979
980 rmesa->hw.pvs.cmd[PVS_CNTL_1] = (0 << R200_PVS_CNTL_1_PROGRAM_START_SHIFT) |
981 ((vp->mesa_program.Base.NumNativeInstructions - 1) << R200_PVS_CNTL_1_PROGRAM_END_SHIFT) |
982 (vp->pos_end << R200_PVS_CNTL_1_POS_END_SHIFT);
983 rmesa->hw.pvs.cmd[PVS_CNTL_2] = (0 << R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT) |
984 (vp->mesa_program.Base.NumNativeParameters << R200_PVS_CNTL_2_PARAM_COUNT_SHIFT);
985
986 /* maybe user clip planes just work with vertex progs... untested */
987 if (ctx->Transform.ClipPlanesEnabled) {
988 R200_STATECHANGE( rmesa, tcl );
989 if (vp->mesa_program.IsPositionInvariant) {
990 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (ctx->Transform.ClipPlanesEnabled << 2);
991 }
992 else {
993 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(0xfc);
994 }
995 }
996
997 if (vp != rmesa->curr_vp_hw) {
998 GLuint count = vp->mesa_program.Base.NumNativeInstructions;
999 drm_radeon_cmd_header_t tmp;
1000
1001 R200_STATECHANGE( rmesa, vpi[0] );
1002 R200_STATECHANGE( rmesa, vpi[1] );
1003
1004 /* FIXME: what about using a memcopy... */
1005 for (i = 0; (i < 64) && i < count; i++) {
1006 rmesa->hw.vpi[0].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i].op;
1007 rmesa->hw.vpi[0].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i].src0;
1008 rmesa->hw.vpi[0].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i].src1;
1009 rmesa->hw.vpi[0].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i].src2;
1010 }
1011 /* hack up the cmd_size so not the whole state atom is emitted always.
1012 This may require some more thought, we may emit half progs on lost state, but
1013 hopefully it won't matter?
1014 WARNING: must not use R200_DB_STATECHANGE, this will produce bogus (and rejected)
1015 packet emits (due to the mismatched cmd_size and count in cmd/last_cmd) */
1016 rmesa->hw.vpi[0].cmd_size = 1 + 4 * ((count > 64) ? 64 : count);
1017 tmp.i = rmesa->hw.vpi[0].cmd[VPI_CMD_0];
1018 tmp.veclinear.count = (count > 64) ? 64 : count;
1019 rmesa->hw.vpi[0].cmd[VPI_CMD_0] = tmp.i;
1020 if (count > 64) {
1021 for (i = 0; i < (count - 64); i++) {
1022 rmesa->hw.vpi[1].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i + 64].op;
1023 rmesa->hw.vpi[1].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i + 64].src0;
1024 rmesa->hw.vpi[1].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i + 64].src1;
1025 rmesa->hw.vpi[1].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i + 64].src2;
1026 }
1027 rmesa->hw.vpi[1].cmd_size = 1 + 4 * (count - 64);
1028 tmp.i = rmesa->hw.vpi[1].cmd[VPI_CMD_0];
1029 tmp.veclinear.count = count - 64;
1030 rmesa->hw.vpi[1].cmd[VPI_CMD_0] = tmp.i;
1031 }
1032 rmesa->curr_vp_hw = vp;
1033 }
1034 }
1035
1036
1037 static void
1038 r200BindProgram(GLcontext *ctx, GLenum target, struct gl_program *prog)
1039 {
1040 r200ContextPtr rmesa = R200_CONTEXT(ctx);
1041
1042 switch(target){
1043 case GL_VERTEX_PROGRAM_ARB:
1044 rmesa->curr_vp_hw = NULL;
1045 break;
1046 default:
1047 _mesa_problem(ctx, "Target not supported yet!");
1048 break;
1049 }
1050 }
1051
1052 static struct gl_program *
1053 r200NewProgram(GLcontext *ctx, GLenum target, GLuint id)
1054 {
1055 struct r200_vertex_program *vp;
1056
1057 switch(target){
1058 case GL_VERTEX_PROGRAM_ARB:
1059 vp = CALLOC_STRUCT(r200_vertex_program);
1060 return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id);
1061 case GL_FRAGMENT_PROGRAM_ARB:
1062 case GL_FRAGMENT_PROGRAM_NV:
1063 return _mesa_init_fragment_program( ctx, CALLOC_STRUCT(gl_fragment_program), target, id );
1064 default:
1065 _mesa_problem(ctx, "Bad target in r200NewProgram");
1066 }
1067 return NULL;
1068 }
1069
1070
1071 static void
1072 r200DeleteProgram(GLcontext *ctx, struct gl_program *prog)
1073 {
1074 _mesa_delete_program(ctx, prog);
1075 }
1076
1077 static void
1078 r200ProgramStringNotify(GLcontext *ctx, GLenum target, struct gl_program *prog)
1079 {
1080 struct r200_vertex_program *vp = (void *)prog;
1081
1082 switch(target) {
1083 case GL_VERTEX_PROGRAM_ARB:
1084 vp->translated = GL_FALSE;
1085 /* memset(&vp->translated, 0, sizeof(struct r200_vertex_program) - sizeof(struct gl_vertex_program));*/
1086 r200_translate_vertex_program(vp);
1087 break;
1088 }
1089 /* need this for tcl fallbacks */
1090 _tnl_program_string(ctx, target, prog);
1091 }
1092
1093 static GLboolean
1094 r200IsProgramNative(GLcontext *ctx, GLenum target, struct gl_program *prog)
1095 {
1096 struct r200_vertex_program *vp = (void *)prog;
1097
1098 switch(target){
1099 case GL_VERTEX_STATE_PROGRAM_NV:
1100 case GL_VERTEX_PROGRAM_ARB:
1101 if (!vp->translated) {
1102 r200_translate_vertex_program(vp);
1103 }
1104 /* does not take parameters etc. into account */
1105 return vp->native;
1106 default:
1107 _mesa_problem(ctx, "Bad target in r200NewProgram");
1108 }
1109 return 0;
1110 }
1111
1112 void r200InitShaderFuncs(struct dd_function_table *functions)
1113 {
1114 functions->NewProgram = r200NewProgram;
1115 functions->BindProgram = r200BindProgram;
1116 functions->DeleteProgram = r200DeleteProgram;
1117 functions->ProgramStringNotify = r200ProgramStringNotify;
1118 functions->IsProgramNative = r200IsProgramNative;
1119 }