Check if mesa_vp->Base.Parameters is null before dereferencing (bug report
[mesa.git] / src / mesa / drivers / dri / r200 / r200_vertprog.c
1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Aapo Tahkola <aet@rasterburn.org>
31 */
32 #include "glheader.h"
33 #include "macros.h"
34 #include "enums.h"
35 #include "program.h"
36
37 #include "r200_context.h"
38 #include "r200_vertprog.h"
39 #include "r200_ioctl.h"
40 #include "r200_tcl.h"
41 #include "program_instruction.h"
42 #include "tnl/tnl.h"
43
44 #if SWIZZLE_X != VSF_IN_COMPONENT_X || \
45 SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
46 SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
47 SWIZZLE_W != VSF_IN_COMPONENT_W || \
48 SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
49 SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
50 WRITEMASK_X != VSF_FLAG_X || \
51 WRITEMASK_Y != VSF_FLAG_Y || \
52 WRITEMASK_Z != VSF_FLAG_Z || \
53 WRITEMASK_W != VSF_FLAG_W
54 #error Cannot change these!
55 #endif
56
57 #define SCALAR_FLAG (1<<31)
58 #define FLAG_MASK (1<<31)
59 #define OP_MASK (0xf) /* we are unlikely to have more than 15 */
60 #define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
61
62 static struct{
63 char *name;
64 int opcode;
65 unsigned long ip; /* number of input operands and flags */
66 }op_names[]={
67 OPN(ABS, 1),
68 OPN(ADD, 2),
69 OPN(ARL, 1|SCALAR_FLAG),
70 OPN(DP3, 2),
71 OPN(DP4, 2),
72 OPN(DPH, 2),
73 OPN(DST, 2),
74 OPN(EX2, 1|SCALAR_FLAG),
75 OPN(EXP, 1|SCALAR_FLAG),
76 OPN(FLR, 1),
77 OPN(FRC, 1),
78 OPN(LG2, 1|SCALAR_FLAG),
79 OPN(LIT, 1),
80 OPN(LOG, 1|SCALAR_FLAG),
81 OPN(MAD, 3),
82 OPN(MAX, 2),
83 OPN(MIN, 2),
84 OPN(MOV, 1),
85 OPN(MUL, 2),
86 OPN(POW, 2|SCALAR_FLAG),
87 OPN(RCP, 1|SCALAR_FLAG),
88 OPN(RSQ, 1|SCALAR_FLAG),
89 OPN(SGE, 2),
90 OPN(SLT, 2),
91 OPN(SUB, 2),
92 OPN(SWZ, 1),
93 OPN(XPD, 2),
94 OPN(PRINT, 0),
95 OPN(END, 0),
96 };
97 #undef OPN
98
99 static GLboolean r200VertexProgUpdateParams(GLcontext *ctx, struct r200_vertex_program *vp)
100 {
101 r200ContextPtr rmesa = R200_CONTEXT( ctx );
102 GLfloat *fcmd = (GLfloat *)&rmesa->hw.vpp[0].cmd[VPP_CMD_0 + 1];
103 int pi;
104 struct gl_vertex_program *mesa_vp = &vp->mesa_program;
105 struct gl_program_parameter_list *paramList;
106 drm_radeon_cmd_header_t tmp;
107
108 R200_STATECHANGE( rmesa, vpp[0] );
109 R200_STATECHANGE( rmesa, vpp[1] );
110 assert(mesa_vp->Base.Parameters);
111 _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
112 paramList = mesa_vp->Base.Parameters;
113
114 if(paramList->NumParameters > R200_VSF_MAX_PARAM){
115 fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
116 return GL_FALSE;
117 }
118
119 for(pi = 0; pi < paramList->NumParameters; pi++) {
120 switch(paramList->Parameters[pi].Type) {
121 case PROGRAM_STATE_VAR:
122 case PROGRAM_NAMED_PARAM:
123 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
124 case PROGRAM_CONSTANT:
125 *fcmd++ = paramList->ParameterValues[pi][0];
126 *fcmd++ = paramList->ParameterValues[pi][1];
127 *fcmd++ = paramList->ParameterValues[pi][2];
128 *fcmd++ = paramList->ParameterValues[pi][3];
129 break;
130 default:
131 _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__);
132 break;
133 }
134 if (pi == 95) {
135 fcmd = (GLfloat *)rmesa->hw.vpp[1].cmd[VPP_CMD_0 + 1];
136 }
137 }
138 /* hack up the cmd_size so not the whole state atom is emitted always. */
139 rmesa->hw.vpp[0].cmd_size =
140 1 + 4 * ((paramList->NumParameters > 96) ? 96 : paramList->NumParameters);
141 tmp.i = rmesa->hw.vpp[0].cmd[VPP_CMD_0];
142 tmp.veclinear.count = (paramList->NumParameters > 96) ? 96 : paramList->NumParameters;
143 rmesa->hw.vpp[0].cmd[VPP_CMD_0] = tmp.i;
144 if (paramList->NumParameters > 96) {
145 rmesa->hw.vpp[1].cmd_size = 1 + 4 * (paramList->NumParameters - 96);
146 tmp.i = rmesa->hw.vpp[1].cmd[VPP_CMD_0];
147 tmp.veclinear.count = paramList->NumParameters - 96;
148 rmesa->hw.vpp[1].cmd[VPP_CMD_0] = tmp.i;
149 }
150 return GL_TRUE;
151 }
152
153 static __inline unsigned long t_dst_mask(GLuint mask)
154 {
155 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
156 return mask & VSF_FLAG_ALL;
157 }
158
159 static unsigned long t_dst(struct prog_dst_register *dst)
160 {
161 switch(dst->File) {
162 case PROGRAM_TEMPORARY:
163 return ((dst->Index << R200_VPI_OUT_REG_INDEX_SHIFT)
164 | R200_VSF_OUT_CLASS_TMP);
165 case PROGRAM_OUTPUT:
166 switch (dst->Index) {
167 case VERT_RESULT_HPOS:
168 return R200_VSF_OUT_CLASS_RESULT_POS;
169 case VERT_RESULT_COL0:
170 return R200_VSF_OUT_CLASS_RESULT_COLOR;
171 case VERT_RESULT_COL1:
172 return ((1 << R200_VPI_OUT_REG_INDEX_SHIFT)
173 | R200_VSF_OUT_CLASS_RESULT_COLOR);
174 case VERT_RESULT_FOGC:
175 return R200_VSF_OUT_CLASS_RESULT_FOGC;
176 case VERT_RESULT_TEX0:
177 case VERT_RESULT_TEX1:
178 case VERT_RESULT_TEX2:
179 case VERT_RESULT_TEX3:
180 case VERT_RESULT_TEX4:
181 case VERT_RESULT_TEX5:
182 return (((dst->Index - VERT_RESULT_TEX0) << R200_VPI_OUT_REG_INDEX_SHIFT)
183 | R200_VSF_OUT_CLASS_RESULT_TEXC);
184 case VERT_RESULT_PSIZ:
185 return R200_VSF_OUT_CLASS_RESULT_POINTSIZE;
186 default:
187 fprintf(stderr, "problem in %s, unknown dst output reg %d\n", __FUNCTION__, dst->Index);
188 exit(0);
189 return 0;
190 }
191 case PROGRAM_ADDRESS:
192 assert (dst->Index == 0);
193 return R200_VSF_OUT_CLASS_ADDR;
194 default:
195 fprintf(stderr, "problem in %s, unknown register type %d\n", __FUNCTION__, dst->File);
196 exit(0);
197 return 0;
198 }
199 }
200
201 static unsigned long t_src_class(enum register_file file)
202 {
203
204 switch(file){
205 case PROGRAM_TEMPORARY:
206 return VSF_IN_CLASS_TMP;
207
208 case PROGRAM_INPUT:
209 return VSF_IN_CLASS_ATTR;
210
211 case PROGRAM_LOCAL_PARAM:
212 case PROGRAM_ENV_PARAM:
213 case PROGRAM_NAMED_PARAM:
214 case PROGRAM_STATE_VAR:
215 return VSF_IN_CLASS_PARAM;
216 /*
217 case PROGRAM_OUTPUT:
218 case PROGRAM_WRITE_ONLY:
219 case PROGRAM_ADDRESS:
220 */
221 default:
222 fprintf(stderr, "problem in %s", __FUNCTION__);
223 exit(0);
224 }
225 }
226
227 static __inline unsigned long t_swizzle(GLubyte swizzle)
228 {
229 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
230 return swizzle;
231 }
232
233 #if 0
234 static void vp_dump_inputs(struct r200_vertex_program *vp, char *caller)
235 {
236 int i;
237
238 if(vp == NULL){
239 fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, caller);
240 return ;
241 }
242
243 fprintf(stderr, "%s:<", caller);
244 for(i=0; i < VERT_ATTRIB_MAX; i++)
245 fprintf(stderr, "%d ", vp->inputs[i]);
246 fprintf(stderr, ">\n");
247
248 }
249 #endif
250
251 static unsigned long t_src_index(struct r200_vertex_program *vp, struct prog_src_register *src)
252 {
253 /*
254 int i;
255 int max_reg = -1;
256 */
257 if(src->File == PROGRAM_INPUT){
258 /* if(vp->inputs[src->Index] != -1)
259 return vp->inputs[src->Index];
260
261 for(i=0; i < VERT_ATTRIB_MAX; i++)
262 if(vp->inputs[i] > max_reg)
263 max_reg = vp->inputs[i];
264
265 vp->inputs[src->Index] = max_reg+1;*/
266
267 //vp_dump_inputs(vp, __FUNCTION__);
268 assert(vp->inputs[src->Index] != -1);
269 return vp->inputs[src->Index];
270 } else {
271 if (src->Index < 0) {
272 fprintf(stderr, "WARNING negative offsets for indirect addressing do not work\n");
273 return 0;
274 }
275 return src->Index;
276 }
277 }
278
279 static unsigned long t_src(struct r200_vertex_program *vp, struct prog_src_register *src)
280 {
281
282 return MAKE_VSF_SOURCE(t_src_index(vp, src),
283 t_swizzle(GET_SWZ(src->Swizzle, 0)),
284 t_swizzle(GET_SWZ(src->Swizzle, 1)),
285 t_swizzle(GET_SWZ(src->Swizzle, 2)),
286 t_swizzle(GET_SWZ(src->Swizzle, 3)),
287 t_src_class(src->File),
288 src->NegateBase) | (src->RelAddr << 4);
289 }
290
291 static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_src_register *src)
292 {
293
294 return MAKE_VSF_SOURCE(t_src_index(vp, src),
295 t_swizzle(GET_SWZ(src->Swizzle, 0)),
296 t_swizzle(GET_SWZ(src->Swizzle, 0)),
297 t_swizzle(GET_SWZ(src->Swizzle, 0)),
298 t_swizzle(GET_SWZ(src->Swizzle, 0)),
299 t_src_class(src->File),
300 src->NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4);
301 }
302
303 static unsigned long t_opcode(enum prog_opcode opcode)
304 {
305
306 switch(opcode){
307 case OPCODE_ADD: return R200_VPI_OUT_OP_ADD;
308 /* FIXME: ARL works fine, but negative offsets won't work - fglrx just
309 * seems to ignore neg offsets which isn't quite correct...
310 */
311 case OPCODE_ARL: return R200_VPI_OUT_OP_ARL;
312 case OPCODE_DP4: return R200_VPI_OUT_OP_DOT;
313 case OPCODE_DST: return R200_VPI_OUT_OP_DST;
314 case OPCODE_EX2: return R200_VPI_OUT_OP_EX2;
315 case OPCODE_EXP: return R200_VPI_OUT_OP_EXP;
316 case OPCODE_FRC: return R200_VPI_OUT_OP_FRC;
317 case OPCODE_LG2: return R200_VPI_OUT_OP_LG2;
318 case OPCODE_LIT: return R200_VPI_OUT_OP_LIT;
319 case OPCODE_LOG: return R200_VPI_OUT_OP_LOG;
320 case OPCODE_MAX: return R200_VPI_OUT_OP_MAX;
321 case OPCODE_MIN: return R200_VPI_OUT_OP_MIN;
322 case OPCODE_MUL: return R200_VPI_OUT_OP_MUL;
323 case OPCODE_RCP: return R200_VPI_OUT_OP_RCP;
324 case OPCODE_RSQ: return R200_VPI_OUT_OP_RSQ;
325 case OPCODE_SGE: return R200_VPI_OUT_OP_SGE;
326 case OPCODE_SLT: return R200_VPI_OUT_OP_SLT;
327
328 default:
329 fprintf(stderr, "%s: Should not be called with opcode %d!", __FUNCTION__, opcode);
330 }
331 exit(-1);
332 return 0;
333 }
334
335 static unsigned long op_operands(enum prog_opcode opcode)
336 {
337 int i;
338
339 /* Can we trust mesas opcodes to be in order ? */
340 for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++)
341 if(op_names[i].opcode == opcode)
342 return op_names[i].ip;
343
344 fprintf(stderr, "op %d not found in op_names\n", opcode);
345 exit(-1);
346 return 0;
347 }
348
349 /* TODO: Get rid of t_src_class call */
350 #define CMP_SRCS(a, b) (((a.RelAddr != b.RelAddr) || (a.Index != b.Index)) && \
351 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
352 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
353 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
354 t_src_class(b.File) == VSF_IN_CLASS_ATTR))) \
355
356 /* fglrx on rv250 codes up unused sources as follows:
357 unused but necessary sources are same as previous source, zero-ed out.
358 unnecessary sources are same as previous source but with VSF_IN_CLASS_NONE set.
359 i.e. an add (2 args) has its 2nd arg (if you use it as mov) zero-ed out, and 3rd arg
360 set to VSF_IN_CLASS_NONE. Not sure if strictly necessary. */
361
362 /* use these simpler definitions. Must obviously not be used with not yet set up regs.
363 Those are NOT semantically equivalent to the r300 ones, requires code changes */
364 #define ZERO_SRC_0 (((o_inst->src0 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
365 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
366 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
367 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
368 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
369
370 #define ZERO_SRC_1 (((o_inst->src1 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
371 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
372 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
373 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
374 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
375
376 #define ZERO_SRC_2 (((o_inst->src2 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
377 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
378 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
379 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
380 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
381
382 #define UNUSED_SRC_0 ((o_inst->src0 & ~15) | 9)
383
384 #define UNUSED_SRC_1 ((o_inst->src1 & ~15) | 9)
385
386 #define UNUSED_SRC_2 ((o_inst->src2 & ~15) | 9)
387
388
389 /* DP4 version seems to trigger some hw peculiarity - fglrx does this on r200 however */
390 #define PREFER_DP4
391
392 static GLboolean r200_translate_vertex_program(struct r200_vertex_program *vp)
393 {
394 struct gl_vertex_program *mesa_vp = &vp->mesa_program;
395 struct prog_instruction *vpi;
396 int i;
397 VERTEX_SHADER_INSTRUCTION *o_inst;
398 unsigned long operands;
399 int are_srcs_scalar;
400 unsigned long hw_op;
401
402 vp->native = GL_FALSE;
403
404 if ((mesa_vp->Base.InputsRead &
405 ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 |
406 VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 |
407 VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) != 0) {
408 if (R200_DEBUG & DEBUG_FALLBACKS) {
409 fprintf(stderr, "can't handle vert prog inputs 0x%x\n",
410 mesa_vp->Base.InputsRead);
411 }
412 return GL_FALSE;
413 }
414
415 if (mesa_vp->IsNVProgram) {
416 /* subtle differences in spec like guaranteed initialized regs could cause
417 headaches. Might want to remove the driconf option to enable it completely */
418 return GL_FALSE;
419 }
420 /* Initial value should be last tmp reg that hw supports.
421 Strangely enough r300 doesnt mind even though these would be out of range.
422 Smart enough to realize that it doesnt need it? */
423 int u_temp_i = R200_VSF_MAX_TEMPS - 1;
424 struct prog_src_register src[3];
425
426 /* if (getenv("R300_VP_SAFETY")) {
427 WARN_ONCE("R300_VP_SAFETY enabled.\n");
428
429 vpi = malloc((mesa_vp->Base.NumInstructions + VSF_MAX_FRAGMENT_TEMPS) * sizeof(struct prog_instruction));
430 memset(vpi, 0, VSF_MAX_FRAGMENT_TEMPS * sizeof(struct prog_instruction));
431
432 for (i=0; i < VSF_MAX_FRAGMENT_TEMPS; i++) {
433 vpi[i].Opcode = OPCODE_MOV;
434 vpi[i].StringPos = 0;
435 vpi[i].Data = 0;
436
437 vpi[i].DstReg.File = PROGRAM_TEMPORARY;
438 vpi[i].DstReg.Index = i;
439 vpi[i].DstReg.WriteMask = WRITEMASK_XYZW;
440 vpi[i].DstReg.CondMask = COND_TR;
441
442 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
443 vpi[i].SrcReg[0].Index = 0;
444 vpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE);
445 }
446
447 memcpy(&vpi[i], mesa_vp->Base.Instructions, mesa_vp->Base.NumInstructions * sizeof(struct prog_instruction));
448
449 free(mesa_vp->Base.Instructions);
450
451 mesa_vp->Base.Instructions = vpi;
452
453 mesa_vp->Base.NumInstructions += VSF_MAX_FRAGMENT_TEMPS;
454 vpi = &mesa_vp->Base.Instructions[mesa_vp->Base.NumInstructions-1];
455
456 assert(vpi->Opcode == OPCODE_END);
457 }*/
458 /* FIXME: is changing the prog safe to do here? */
459 if (mesa_vp->IsPositionInvariant) {
460 struct gl_program_parameter_list *paramList;
461 GLint tokens[6] = { STATE_MATRIX, STATE_MVP, 0, 0, 0, STATE_MATRIX };
462
463 #ifdef PREFER_DP4
464 tokens[5] = STATE_MATRIX;
465 #else
466 tokens[5] = STATE_MATRIX_TRANSPOSE;
467 #endif
468 paramList = mesa_vp->Base.Parameters;
469
470 vpi = malloc((mesa_vp->Base.NumInstructions + 4) * sizeof(struct prog_instruction));
471 memset(vpi, 0, 4 * sizeof(struct prog_instruction));
472
473 for (i=0; i < 4; i++) {
474 GLint idx;
475 tokens[3] = tokens[4] = i;
476 idx = _mesa_add_state_reference(paramList, tokens);
477 #ifdef PREFER_DP4
478 vpi[i].Opcode = OPCODE_DP4;
479 vpi[i].StringPos = 0;
480 vpi[i].Data = 0;
481
482 vpi[i].DstReg.File = PROGRAM_OUTPUT;
483 vpi[i].DstReg.Index = VERT_RESULT_HPOS;
484 vpi[i].DstReg.WriteMask = 1 << i;
485 vpi[i].DstReg.CondMask = COND_TR;
486
487 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
488 vpi[i].SrcReg[0].Index = idx;
489 vpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
490
491 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
492 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
493 vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
494 #else
495 if (i == 0)
496 vpi[i].Opcode = OPCODE_MUL;
497 else
498 vpi[i].Opcode = OPCODE_MAD;
499
500 vpi[i].StringPos = 0;
501 vpi[i].Data = 0;
502
503 if (i == 3)
504 vpi[i].DstReg.File = PROGRAM_OUTPUT;
505 else
506 vpi[i].DstReg.File = PROGRAM_TEMPORARY;
507 vpi[i].DstReg.Index = 0;
508 vpi[i].DstReg.WriteMask = 0xf;
509 vpi[i].DstReg.CondMask = COND_TR;
510
511 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
512 vpi[i].SrcReg[0].Index = idx;
513 vpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
514
515 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
516 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
517 vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i);
518
519 if (i > 0) {
520 vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY;
521 vpi[i].SrcReg[2].Index = 0;
522 vpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
523 }
524 #endif
525 }
526
527 memcpy(&vpi[i], mesa_vp->Base.Instructions, mesa_vp->Base.NumInstructions * sizeof(struct prog_instruction));
528
529 free(mesa_vp->Base.Instructions);
530
531 mesa_vp->Base.Instructions = vpi;
532
533 mesa_vp->Base.NumInstructions += 4;
534 vpi = &mesa_vp->Base.Instructions[mesa_vp->Base.NumInstructions-1];
535
536 assert(vpi->Opcode == OPCODE_END);
537
538 mesa_vp->Base.InputsRead |= (1 << VERT_ATTRIB_POS);
539 mesa_vp->Base.OutputsWritten |= (1 << VERT_RESULT_HPOS);
540
541 //fprintf(stderr, "IsPositionInvariant is set!\n");
542 //_mesa_print_program(&mesa_vp->Base);
543 }
544
545 vp->pos_end = 0;
546 mesa_vp->Base.NumNativeInstructions = 0;
547 if (mesa_vp->Base.Parameters)
548 mesa_vp->Base.NumNativeParameters = mesa_vp->Base.Parameters->NumParameters;
549 else
550 mesa_vp->Base.NumNativeParameters = 0;
551
552 for(i=0; i < VERT_ATTRIB_MAX; i++)
553 vp->inputs[i] = -1;
554 /* fglrx uses fixed inputs as follows for conventional attribs.
555 generic attribs use non-fixed assignment, fglrx will always use the lowest attrib values available.
556 There are 12 generic attribs possible, corresponding to attrib 0, 2-11 and 13 in a hw vertex prog.
557 attr 1 and 12 are not available for generic attribs as those cannot be made vec4 (correspond to
558 vertex normal/weight)
559 attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
560 attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
561 attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
562 attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
563 generic attribs would require some more work (dma regions, renaming). */
564
565 /* may look different when using idx buf / input_route instead of se_vtx_fmt? */
566 vp->inputs[VERT_ATTRIB_POS] = 0;
567 vp->inputs[VERT_ATTRIB_WEIGHT] = 12;
568 vp->inputs[VERT_ATTRIB_NORMAL] = 1;
569 vp->inputs[VERT_ATTRIB_COLOR0] = 2;
570 vp->inputs[VERT_ATTRIB_COLOR1] = 3;
571 vp->inputs[VERT_ATTRIB_FOG] = 15;
572 vp->inputs[VERT_ATTRIB_TEX0] = 6;
573 vp->inputs[VERT_ATTRIB_TEX1] = 7;
574 vp->inputs[VERT_ATTRIB_TEX2] = 8;
575 vp->inputs[VERT_ATTRIB_TEX3] = 9;
576 vp->inputs[VERT_ATTRIB_TEX4] = 10;
577 vp->inputs[VERT_ATTRIB_TEX5] = 11;
578 /* attr 4,5 and 13 are only used with generic attribs.
579 Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
580 not possibe to use with vertex progs as it is lacking in vert prog specification) */
581
582 assert(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS));
583
584 vp->translated = GL_TRUE;
585
586 o_inst = vp->instr;
587 for(vpi = mesa_vp->Base.Instructions; vpi->Opcode != OPCODE_END; vpi++, o_inst++){
588 if (u_temp_i < mesa_vp->Base.NumTemporaries) {
589 if (R200_DEBUG & DEBUG_FALLBACKS) {
590 fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_i);
591 }
592 return GL_FALSE;
593 }
594 u_temp_i = R200_VSF_MAX_TEMPS - 1;
595 if(o_inst - vp->instr >= R200_VSF_MAX_INST) {
596 mesa_vp->Base.NumNativeInstructions = 129;
597 if (R200_DEBUG & DEBUG_FALLBACKS) {
598 fprintf(stderr, "more than 128 native instructions\n");
599 }
600 return GL_FALSE;
601 }
602
603 operands = op_operands(vpi->Opcode);
604 are_srcs_scalar = operands & SCALAR_FLAG;
605 operands &= OP_MASK;
606
607 for(i = 0; i < operands; i++)
608 src[i] = vpi->SrcReg[i];
609
610 if(operands == 3){
611 if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
612 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
613 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
614 VSF_FLAG_ALL);
615
616 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
617 SWIZZLE_X, SWIZZLE_Y,
618 SWIZZLE_Z, SWIZZLE_W,
619 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4);
620
621 o_inst->src1 = ZERO_SRC_0;
622 o_inst->src2 = UNUSED_SRC_1;
623 o_inst++;
624
625 src[2].File = PROGRAM_TEMPORARY;
626 src[2].Index = u_temp_i;
627 src[2].RelAddr = 0;
628 u_temp_i--;
629 }
630 }
631
632 if(operands >= 2){
633 if( CMP_SRCS(src[1], src[0]) ){
634 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
635 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
636 VSF_FLAG_ALL);
637
638 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
639 SWIZZLE_X, SWIZZLE_Y,
640 SWIZZLE_Z, SWIZZLE_W,
641 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4);
642
643 o_inst->src1 = ZERO_SRC_0;
644 o_inst->src2 = UNUSED_SRC_1;
645 o_inst++;
646
647 src[0].File = PROGRAM_TEMPORARY;
648 src[0].Index = u_temp_i;
649 src[0].RelAddr = 0;
650 u_temp_i--;
651 }
652 }
653
654 /* These ops need special handling. */
655 switch(vpi->Opcode){
656 case OPCODE_POW:
657 /* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter).
658 So may need to insert additional instruction */
659 if ((src[0].File == src[1].File) &&
660 (src[0].Index == src[1].Index)) {
661 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&vpi->DstReg),
662 t_dst_mask(vpi->DstReg.WriteMask));
663 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
664 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
665 SWIZZLE_ZERO,
666 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
667 SWIZZLE_ZERO,
668 t_src_class(src[0].File),
669 src[0].NegateBase) | (src[0].RelAddr << 4);
670 o_inst->src1 = UNUSED_SRC_0;
671 o_inst->src2 = UNUSED_SRC_0;
672 }
673 else {
674 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
675 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
676 VSF_FLAG_ALL);
677 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
678 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
679 SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO,
680 t_src_class(src[0].File),
681 src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
682 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
683 SWIZZLE_ZERO, SWIZZLE_ZERO,
684 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO,
685 t_src_class(src[1].File),
686 src[1].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
687 o_inst->src2 = UNUSED_SRC_1;
688 o_inst++;
689
690 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&vpi->DstReg),
691 t_dst_mask(vpi->DstReg.WriteMask));
692 o_inst->src0 = MAKE_VSF_SOURCE(u_temp_i,
693 VSF_IN_COMPONENT_X,
694 VSF_IN_COMPONENT_Y,
695 VSF_IN_COMPONENT_Z,
696 VSF_IN_COMPONENT_W,
697 VSF_IN_CLASS_TMP,
698 VSF_FLAG_NONE);
699 o_inst->src1 = UNUSED_SRC_0;
700 o_inst->src2 = UNUSED_SRC_0;
701 u_temp_i--;
702 }
703 goto next;
704
705 case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
706 case OPCODE_SWZ:
707 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&vpi->DstReg),
708 t_dst_mask(vpi->DstReg.WriteMask));
709 o_inst->src0 = t_src(vp, &src[0]);
710 o_inst->src1 = ZERO_SRC_0;
711 o_inst->src2 = UNUSED_SRC_1;
712 goto next;
713
714 case OPCODE_MAD:
715 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
716 src[1].File == PROGRAM_TEMPORARY &&
717 src[2].File == PROGRAM_TEMPORARY) ? R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
718
719 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&vpi->DstReg),
720 t_dst_mask(vpi->DstReg.WriteMask));
721 o_inst->src0 = t_src(vp, &src[0]);
722 #if 0
723 if ((o_inst - vp->instr) == 31) {
724 /* fix up the broken vertex program of quake4 demo... */
725 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
726 SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X,
727 t_src_class(src[1].File),
728 src[1].NegateBase) | (src[1].RelAddr << 4);
729 o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
730 SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y,
731 t_src_class(src[1].File),
732 src[1].NegateBase) | (src[1].RelAddr << 4);
733 }
734 else {
735 o_inst->src1 = t_src(vp, &src[1]);
736 o_inst->src2 = t_src(vp, &src[2]);
737 }
738 #else
739 o_inst->src1 = t_src(vp, &src[1]);
740 o_inst->src2 = t_src(vp, &src[2]);
741 #endif
742 goto next;
743
744 case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
745 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&vpi->DstReg),
746 t_dst_mask(vpi->DstReg.WriteMask));
747
748 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
749 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
750 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
751 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
752 SWIZZLE_ZERO,
753 t_src_class(src[0].File),
754 src[0].NegateBase) | (src[0].RelAddr << 4);
755
756 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
757 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
758 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
759 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
760 SWIZZLE_ZERO,
761 t_src_class(src[1].File),
762 src[1].NegateBase) | (src[1].RelAddr << 4);
763
764 o_inst->src2 = UNUSED_SRC_1;
765 goto next;
766
767 case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
768 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&vpi->DstReg),
769 t_dst_mask(vpi->DstReg.WriteMask));
770
771 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
772 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
773 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
774 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
775 VSF_IN_COMPONENT_ONE,
776 t_src_class(src[0].File),
777 src[0].NegateBase) | (src[0].RelAddr << 4);
778 o_inst->src1 = t_src(vp, &src[1]);
779 o_inst->src2 = UNUSED_SRC_1;
780 goto next;
781
782 case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
783 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&vpi->DstReg),
784 t_dst_mask(vpi->DstReg.WriteMask));
785
786 o_inst->src0 = t_src(vp, &src[0]);
787 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
788 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
789 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
790 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
791 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
792 t_src_class(src[1].File),
793 (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
794 o_inst->src2 = UNUSED_SRC_1;
795 goto next;
796
797 case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
798 o_inst->op=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX, t_dst(&vpi->DstReg),
799 t_dst_mask(vpi->DstReg.WriteMask));
800
801 o_inst->src0=t_src(vp, &src[0]);
802 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
803 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
804 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
805 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
806 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
807 t_src_class(src[0].File),
808 (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
809 o_inst->src2 = UNUSED_SRC_1;
810 goto next;
811
812 case OPCODE_FLR:
813 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
814 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
815
816 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_FRC,
817 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
818 t_dst_mask(vpi->DstReg.WriteMask));
819
820 o_inst->src0 = t_src(vp, &src[0]);
821 o_inst->src1 = UNUSED_SRC_0;
822 o_inst->src2 = UNUSED_SRC_1;
823 o_inst++;
824
825 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&vpi->DstReg),
826 t_dst_mask(vpi->DstReg.WriteMask));
827
828 o_inst->src0 = t_src(vp, &src[0]);
829 o_inst->src1 = MAKE_VSF_SOURCE(u_temp_i,
830 VSF_IN_COMPONENT_X,
831 VSF_IN_COMPONENT_Y,
832 VSF_IN_COMPONENT_Z,
833 VSF_IN_COMPONENT_W,
834 VSF_IN_CLASS_TMP,
835 /* Not 100% sure about this */
836 (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
837
838 o_inst->src2 = UNUSED_SRC_0;
839 u_temp_i--;
840 goto next;
841
842 case OPCODE_XPD:
843 /* mul r0, r1.yzxw, r2.zxyw
844 mad r0, -r2.yzxw, r1.zxyw, r0
845 NOTE: might need MAD_2
846 */
847
848 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
849 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
850 t_dst_mask(vpi->DstReg.WriteMask));
851
852 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
853 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
854 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
855 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
856 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
857 t_src_class(src[0].File),
858 src[0].NegateBase) | (src[0].RelAddr << 4);
859
860 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
861 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
862 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
863 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
864 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
865 t_src_class(src[1].File),
866 src[1].NegateBase) | (src[1].RelAddr << 4);
867
868 o_inst->src2 = UNUSED_SRC_1;
869 o_inst++;
870 u_temp_i--;
871
872 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MAD, t_dst(&vpi->DstReg),
873 t_dst_mask(vpi->DstReg.WriteMask));
874
875 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
876 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
877 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
878 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
879 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
880 t_src_class(src[1].File),
881 (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
882
883 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
884 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
885 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
886 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
887 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
888 t_src_class(src[0].File),
889 src[0].NegateBase) | (src[0].RelAddr << 4);
890
891 o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1,
892 VSF_IN_COMPONENT_X,
893 VSF_IN_COMPONENT_Y,
894 VSF_IN_COMPONENT_Z,
895 VSF_IN_COMPONENT_W,
896 VSF_IN_CLASS_TMP,
897 VSF_FLAG_NONE);
898 goto next;
899
900 case OPCODE_END:
901 break;
902 default:
903 break;
904 }
905
906 o_inst->op = MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst(&vpi->DstReg),
907 t_dst_mask(vpi->DstReg.WriteMask));
908
909 if(are_srcs_scalar){
910 switch(operands){
911 case 1:
912 o_inst->src0 = t_src_scalar(vp, &src[0]);
913 o_inst->src1 = UNUSED_SRC_0;
914 o_inst->src2 = UNUSED_SRC_1;
915 break;
916
917 case 2:
918 o_inst->src0 = t_src_scalar(vp, &src[0]);
919 o_inst->src1 = t_src_scalar(vp, &src[1]);
920 o_inst->src2 = UNUSED_SRC_1;
921 break;
922
923 case 3:
924 o_inst->src0 = t_src_scalar(vp, &src[0]);
925 o_inst->src1 = t_src_scalar(vp, &src[1]);
926 o_inst->src2 = t_src_scalar(vp, &src[2]);
927 break;
928
929 default:
930 fprintf(stderr, "illegal number of operands %lu\n", operands);
931 exit(-1);
932 break;
933 }
934 } else {
935 switch(operands){
936 case 1:
937 o_inst->src0 = t_src(vp, &src[0]);
938 o_inst->src1 = UNUSED_SRC_0;
939 o_inst->src2 = UNUSED_SRC_1;
940 break;
941
942 case 2:
943 o_inst->src0 = t_src(vp, &src[0]);
944 o_inst->src1 = t_src(vp, &src[1]);
945 o_inst->src2 = UNUSED_SRC_1;
946 break;
947
948 case 3:
949 o_inst->src0 = t_src(vp, &src[0]);
950 o_inst->src1 = t_src(vp, &src[1]);
951 o_inst->src2 = t_src(vp, &src[2]);
952 break;
953
954 default:
955 fprintf(stderr, "illegal number of operands %lu\n", operands);
956 exit(-1);
957 break;
958 }
959 }
960 next:
961 if ((o_inst->op & R200_VSF_OUT_CLASS_MASK) == R200_VSF_OUT_CLASS_RESULT_POS) {
962 vp->pos_end = (o_inst - vp->instr);
963 }
964 }
965
966 /* need to test again since some instructions require more than one (up to 3) native inst */
967 if(o_inst - vp->instr > R200_VSF_MAX_INST) {
968 mesa_vp->Base.NumNativeInstructions = 129;
969 if (R200_DEBUG & DEBUG_FALLBACKS) {
970 fprintf(stderr, "more than 128 native instructions\n");
971 }
972 return GL_FALSE;
973 }
974 vp->native = GL_TRUE;
975 mesa_vp->Base.NumNativeInstructions = (o_inst - vp->instr);
976 #if 0
977 fprintf(stderr, "hw program:\n");
978 for(i=0; i < vp->program.length; i++)
979 fprintf(stderr, "%08x\n", vp->instr[i]);
980 #endif
981 return GL_TRUE;
982 }
983
984 void r200SetupVertexProg( GLcontext *ctx ) {
985 r200ContextPtr rmesa = R200_CONTEXT(ctx);
986 struct r200_vertex_program *vp = (struct r200_vertex_program *)ctx->VertexProgram.Current;
987 GLboolean fallback;
988 GLint i;
989
990 if (!vp->translated) {
991 rmesa->curr_vp_hw = NULL;
992 r200_translate_vertex_program(vp);
993 }
994 /* could optimize setting up vertex progs away for non-tcl hw */
995 fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp) &&
996 rmesa->r200Screen->drmSupportsVertexProgram);
997 TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback);
998 if (fallback) return;
999
1000 R200_STATECHANGE( rmesa, pvs );
1001
1002 rmesa->hw.pvs.cmd[PVS_CNTL_1] = (0 << R200_PVS_CNTL_1_PROGRAM_START_SHIFT) |
1003 ((vp->mesa_program.Base.NumNativeInstructions - 1) << R200_PVS_CNTL_1_PROGRAM_END_SHIFT) |
1004 (vp->pos_end << R200_PVS_CNTL_1_POS_END_SHIFT);
1005 rmesa->hw.pvs.cmd[PVS_CNTL_2] = (0 << R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT) |
1006 (vp->mesa_program.Base.NumNativeParameters << R200_PVS_CNTL_2_PARAM_COUNT_SHIFT);
1007
1008 /* maybe user clip planes just work with vertex progs... untested */
1009 if (ctx->Transform.ClipPlanesEnabled) {
1010 R200_STATECHANGE( rmesa, tcl );
1011 if (vp->mesa_program.IsPositionInvariant) {
1012 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (ctx->Transform.ClipPlanesEnabled << 2);
1013 }
1014 else {
1015 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(0xfc);
1016 }
1017 }
1018
1019 if (vp != rmesa->curr_vp_hw) {
1020 GLuint count = vp->mesa_program.Base.NumNativeInstructions;
1021 drm_radeon_cmd_header_t tmp;
1022
1023 R200_STATECHANGE( rmesa, vpi[0] );
1024 R200_STATECHANGE( rmesa, vpi[1] );
1025
1026 /* FIXME: what about using a memcopy... */
1027 for (i = 0; (i < 64) && i < count; i++) {
1028 rmesa->hw.vpi[0].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i].op;
1029 rmesa->hw.vpi[0].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i].src0;
1030 rmesa->hw.vpi[0].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i].src1;
1031 rmesa->hw.vpi[0].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i].src2;
1032 }
1033 /* hack up the cmd_size so not the whole state atom is emitted always.
1034 This may require some more thought, we may emit half progs on lost state, but
1035 hopefully it won't matter?
1036 WARNING: must not use R200_DB_STATECHANGE, this will produce bogus (and rejected)
1037 packet emits (due to the mismatched cmd_size and count in cmd/last_cmd) */
1038 rmesa->hw.vpi[0].cmd_size = 1 + 4 * ((count > 64) ? 64 : count);
1039 tmp.i = rmesa->hw.vpi[0].cmd[VPI_CMD_0];
1040 tmp.veclinear.count = (count > 64) ? 64 : count;
1041 rmesa->hw.vpi[0].cmd[VPI_CMD_0] = tmp.i;
1042 if (count > 64) {
1043 for (i = 0; i < (count - 64); i++) {
1044 rmesa->hw.vpi[1].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i + 64].op;
1045 rmesa->hw.vpi[1].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i + 64].src0;
1046 rmesa->hw.vpi[1].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i + 64].src1;
1047 rmesa->hw.vpi[1].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i + 64].src2;
1048 }
1049 rmesa->hw.vpi[1].cmd_size = 1 + 4 * (count - 64);
1050 tmp.i = rmesa->hw.vpi[1].cmd[VPI_CMD_0];
1051 tmp.veclinear.count = count - 64;
1052 rmesa->hw.vpi[1].cmd[VPI_CMD_0] = tmp.i;
1053 }
1054 rmesa->curr_vp_hw = vp;
1055 }
1056 }
1057
1058
1059 static void
1060 r200BindProgram(GLcontext *ctx, GLenum target, struct gl_program *prog)
1061 {
1062 r200ContextPtr rmesa = R200_CONTEXT(ctx);
1063
1064 switch(target){
1065 case GL_VERTEX_PROGRAM_ARB:
1066 rmesa->curr_vp_hw = NULL;
1067 break;
1068 default:
1069 _mesa_problem(ctx, "Target not supported yet!");
1070 break;
1071 }
1072 }
1073
1074 static struct gl_program *
1075 r200NewProgram(GLcontext *ctx, GLenum target, GLuint id)
1076 {
1077 struct r200_vertex_program *vp;
1078
1079 switch(target){
1080 case GL_VERTEX_PROGRAM_ARB:
1081 vp = CALLOC_STRUCT(r200_vertex_program);
1082 return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id);
1083 case GL_FRAGMENT_PROGRAM_ARB:
1084 case GL_FRAGMENT_PROGRAM_NV:
1085 return _mesa_init_fragment_program( ctx, CALLOC_STRUCT(gl_fragment_program), target, id );
1086 default:
1087 _mesa_problem(ctx, "Bad target in r200NewProgram");
1088 }
1089 return NULL;
1090 }
1091
1092
1093 static void
1094 r200DeleteProgram(GLcontext *ctx, struct gl_program *prog)
1095 {
1096 _mesa_delete_program(ctx, prog);
1097 }
1098
1099 static void
1100 r200ProgramStringNotify(GLcontext *ctx, GLenum target, struct gl_program *prog)
1101 {
1102 struct r200_vertex_program *vp = (void *)prog;
1103
1104 switch(target) {
1105 case GL_VERTEX_PROGRAM_ARB:
1106 vp->translated = GL_FALSE;
1107 memset(&vp->translated, 0, sizeof(struct r200_vertex_program) - sizeof(struct gl_vertex_program));
1108 /*r200_translate_vertex_shader(vp);*/
1109 break;
1110 }
1111 /* need this for tcl fallbacks */
1112 _tnl_program_string(ctx, target, prog);
1113 }
1114
1115 static GLboolean
1116 r200IsProgramNative(GLcontext *ctx, GLenum target, struct gl_program *prog)
1117 {
1118 struct r200_vertex_program *vp = (void *)prog;
1119
1120 switch(target){
1121 case GL_VERTEX_STATE_PROGRAM_NV:
1122 case GL_VERTEX_PROGRAM_ARB:
1123 if (!vp->translated) {
1124 r200_translate_vertex_program(vp);
1125 }
1126 /* does not take parameters etc. into account */
1127 return vp->native;
1128 default:
1129 _mesa_problem(ctx, "Bad target in r200NewProgram");
1130 }
1131 return 0;
1132 }
1133
1134 void r200InitShaderFuncs(struct dd_function_table *functions)
1135 {
1136 functions->NewProgram = r200NewProgram;
1137 functions->BindProgram = r200BindProgram;
1138 functions->DeleteProgram = r200DeleteProgram;
1139 functions->ProgramStringNotify = r200ProgramStringNotify;
1140 functions->IsProgramNative = r200IsProgramNative;
1141 }