b0f13e5f0a3c76ab1b3913608b4e20976a02db9b
[mesa.git] / src / mesa / drivers / dri / r200 / r200_vertprog.c
1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Aapo Tahkola <aet@rasterburn.org>
31 */
32 #include "glheader.h"
33 #include "macros.h"
34 #include "enums.h"
35 #include "program.h"
36
37 #include "r200_context.h"
38 #include "r200_vertprog.h"
39 #include "r200_ioctl.h"
40 #include "r200_tcl.h"
41 #include "program_instruction.h"
42 #include "tnl/tnl.h"
43
44 #if SWIZZLE_X != VSF_IN_COMPONENT_X || \
45 SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
46 SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
47 SWIZZLE_W != VSF_IN_COMPONENT_W || \
48 SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
49 SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
50 WRITEMASK_X != VSF_FLAG_X || \
51 WRITEMASK_Y != VSF_FLAG_Y || \
52 WRITEMASK_Z != VSF_FLAG_Z || \
53 WRITEMASK_W != VSF_FLAG_W
54 #error Cannot change these!
55 #endif
56
57 #define SCALAR_FLAG (1<<31)
58 #define FLAG_MASK (1<<31)
59 #define OP_MASK (0xf) /* we are unlikely to have more than 15 */
60 #define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
61
62 static struct{
63 char *name;
64 int opcode;
65 unsigned long ip; /* number of input operands and flags */
66 }op_names[]={
67 OPN(ABS, 1),
68 OPN(ADD, 2),
69 OPN(ARL, 1|SCALAR_FLAG),
70 OPN(DP3, 2),
71 OPN(DP4, 2),
72 OPN(DPH, 2),
73 OPN(DST, 2),
74 OPN(EX2, 1|SCALAR_FLAG),
75 OPN(EXP, 1|SCALAR_FLAG),
76 OPN(FLR, 1),
77 OPN(FRC, 1),
78 OPN(LG2, 1|SCALAR_FLAG),
79 OPN(LIT, 1),
80 OPN(LOG, 1|SCALAR_FLAG),
81 OPN(MAD, 3),
82 OPN(MAX, 2),
83 OPN(MIN, 2),
84 OPN(MOV, 1),
85 OPN(MUL, 2),
86 OPN(POW, 2|SCALAR_FLAG),
87 OPN(RCP, 1|SCALAR_FLAG),
88 OPN(RSQ, 1|SCALAR_FLAG),
89 OPN(SGE, 2),
90 OPN(SLT, 2),
91 OPN(SUB, 2),
92 OPN(SWZ, 1),
93 OPN(XPD, 2),
94 OPN(PRINT, 0),
95 OPN(END, 0),
96 };
97 #undef OPN
98
99 static GLboolean r200VertexProgUpdateParams(GLcontext *ctx, struct r200_vertex_program *vp)
100 {
101 r200ContextPtr rmesa = R200_CONTEXT( ctx );
102 GLfloat *fcmd = (GLfloat *)&rmesa->hw.vpp[0].cmd[VPP_CMD_0 + 1];
103 int pi;
104 struct gl_vertex_program *mesa_vp = &vp->mesa_program;
105 struct gl_program_parameter_list *paramList;
106 drm_radeon_cmd_header_t tmp;
107
108 R200_STATECHANGE( rmesa, vpp[0] );
109 R200_STATECHANGE( rmesa, vpp[1] );
110 assert(mesa_vp->Base.Parameters);
111 _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
112 paramList = mesa_vp->Base.Parameters;
113
114 if(paramList->NumParameters > R200_VSF_MAX_PARAM){
115 fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
116 return GL_FALSE;
117 }
118
119 for(pi = 0; pi < paramList->NumParameters; pi++) {
120 switch(paramList->Parameters[pi].Type) {
121 case PROGRAM_STATE_VAR:
122 case PROGRAM_NAMED_PARAM:
123 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
124 case PROGRAM_CONSTANT:
125 *fcmd++ = paramList->ParameterValues[pi][0];
126 *fcmd++ = paramList->ParameterValues[pi][1];
127 *fcmd++ = paramList->ParameterValues[pi][2];
128 *fcmd++ = paramList->ParameterValues[pi][3];
129 break;
130 default:
131 _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__);
132 break;
133 }
134 if (pi == 95) {
135 fcmd = (GLfloat *)rmesa->hw.vpp[1].cmd[VPP_CMD_0 + 1];
136 }
137 }
138 /* hack up the cmd_size so not the whole state atom is emitted always. */
139 rmesa->hw.vpp[0].cmd_size =
140 1 + 4 * ((paramList->NumParameters > 96) ? 96 : paramList->NumParameters);
141 tmp.i = rmesa->hw.vpp[0].cmd[VPP_CMD_0];
142 tmp.veclinear.count = (paramList->NumParameters > 96) ? 96 : paramList->NumParameters;
143 rmesa->hw.vpp[0].cmd[VPP_CMD_0] = tmp.i;
144 if (paramList->NumParameters > 96) {
145 rmesa->hw.vpp[1].cmd_size = 1 + 4 * (paramList->NumParameters - 96);
146 tmp.i = rmesa->hw.vpp[1].cmd[VPP_CMD_0];
147 tmp.veclinear.count = paramList->NumParameters - 96;
148 rmesa->hw.vpp[1].cmd[VPP_CMD_0] = tmp.i;
149 }
150 return GL_TRUE;
151 }
152
153 static __inline unsigned long t_dst_mask(GLuint mask)
154 {
155 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
156 return mask & VSF_FLAG_ALL;
157 }
158
159 static unsigned long t_dst(struct prog_dst_register *dst)
160 {
161 switch(dst->File) {
162 case PROGRAM_TEMPORARY:
163 return ((dst->Index << R200_VPI_OUT_REG_INDEX_SHIFT)
164 | R200_VSF_OUT_CLASS_TMP);
165 case PROGRAM_OUTPUT:
166 switch (dst->Index) {
167 case VERT_RESULT_HPOS:
168 return R200_VSF_OUT_CLASS_RESULT_POS;
169 case VERT_RESULT_COL0:
170 return R200_VSF_OUT_CLASS_RESULT_COLOR;
171 case VERT_RESULT_COL1:
172 return ((1 << R200_VPI_OUT_REG_INDEX_SHIFT)
173 | R200_VSF_OUT_CLASS_RESULT_COLOR);
174 case VERT_RESULT_FOGC:
175 return R200_VSF_OUT_CLASS_RESULT_FOGC;
176 case VERT_RESULT_TEX0:
177 case VERT_RESULT_TEX1:
178 case VERT_RESULT_TEX2:
179 case VERT_RESULT_TEX3:
180 case VERT_RESULT_TEX4:
181 case VERT_RESULT_TEX5:
182 return (((dst->Index - VERT_RESULT_TEX0) << R200_VPI_OUT_REG_INDEX_SHIFT)
183 | R200_VSF_OUT_CLASS_RESULT_TEXC);
184 case VERT_RESULT_PSIZ:
185 return R200_VSF_OUT_CLASS_RESULT_POINTSIZE;
186 default:
187 fprintf(stderr, "problem in %s, unknown dst output reg %d\n", __FUNCTION__, dst->Index);
188 exit(0);
189 return 0;
190 }
191 case PROGRAM_ADDRESS:
192 assert (dst->Index == 0);
193 return R200_VSF_OUT_CLASS_ADDR;
194 default:
195 fprintf(stderr, "problem in %s, unknown register type %d\n", __FUNCTION__, dst->File);
196 exit(0);
197 return 0;
198 }
199 }
200
201 static unsigned long t_src_class(enum register_file file)
202 {
203
204 switch(file){
205 case PROGRAM_TEMPORARY:
206 return VSF_IN_CLASS_TMP;
207
208 case PROGRAM_INPUT:
209 return VSF_IN_CLASS_ATTR;
210
211 case PROGRAM_LOCAL_PARAM:
212 case PROGRAM_ENV_PARAM:
213 case PROGRAM_NAMED_PARAM:
214 case PROGRAM_STATE_VAR:
215 return VSF_IN_CLASS_PARAM;
216 /*
217 case PROGRAM_OUTPUT:
218 case PROGRAM_WRITE_ONLY:
219 case PROGRAM_ADDRESS:
220 */
221 default:
222 fprintf(stderr, "problem in %s", __FUNCTION__);
223 exit(0);
224 }
225 }
226
227 static __inline unsigned long t_swizzle(GLubyte swizzle)
228 {
229 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
230 return swizzle;
231 }
232
233 #if 0
234 static void vp_dump_inputs(struct r200_vertex_program *vp, char *caller)
235 {
236 int i;
237
238 if(vp == NULL){
239 fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, caller);
240 return ;
241 }
242
243 fprintf(stderr, "%s:<", caller);
244 for(i=0; i < VERT_ATTRIB_MAX; i++)
245 fprintf(stderr, "%d ", vp->inputs[i]);
246 fprintf(stderr, ">\n");
247
248 }
249 #endif
250
251 static unsigned long t_src_index(struct r200_vertex_program *vp, struct prog_src_register *src)
252 {
253 /*
254 int i;
255 int max_reg = -1;
256 */
257 if(src->File == PROGRAM_INPUT){
258 /* if(vp->inputs[src->Index] != -1)
259 return vp->inputs[src->Index];
260
261 for(i=0; i < VERT_ATTRIB_MAX; i++)
262 if(vp->inputs[i] > max_reg)
263 max_reg = vp->inputs[i];
264
265 vp->inputs[src->Index] = max_reg+1;*/
266
267 //vp_dump_inputs(vp, __FUNCTION__);
268 assert(vp->inputs[src->Index] != -1);
269 return vp->inputs[src->Index];
270 } else {
271 if (src->Index < 0) {
272 fprintf(stderr, "WARNING negative offsets for indirect addressing do not work\n");
273 return 0;
274 }
275 return src->Index;
276 }
277 }
278
279 static unsigned long t_src(struct r200_vertex_program *vp, struct prog_src_register *src)
280 {
281
282 return MAKE_VSF_SOURCE(t_src_index(vp, src),
283 t_swizzle(GET_SWZ(src->Swizzle, 0)),
284 t_swizzle(GET_SWZ(src->Swizzle, 1)),
285 t_swizzle(GET_SWZ(src->Swizzle, 2)),
286 t_swizzle(GET_SWZ(src->Swizzle, 3)),
287 t_src_class(src->File),
288 src->NegateBase) | (src->RelAddr << 4);
289 }
290
291 static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_src_register *src)
292 {
293
294 return MAKE_VSF_SOURCE(t_src_index(vp, src),
295 t_swizzle(GET_SWZ(src->Swizzle, 0)),
296 t_swizzle(GET_SWZ(src->Swizzle, 0)),
297 t_swizzle(GET_SWZ(src->Swizzle, 0)),
298 t_swizzle(GET_SWZ(src->Swizzle, 0)),
299 t_src_class(src->File),
300 src->NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4);
301 }
302
303 static unsigned long t_opcode(enum prog_opcode opcode)
304 {
305
306 switch(opcode){
307 case OPCODE_ADD: return R200_VPI_OUT_OP_ADD;
308 /* FIXME: ARL works fine, but negative offsets won't work - fglrx just
309 * seems to ignore neg offsets which isn't quite correct...
310 */
311 case OPCODE_ARL: return R200_VPI_OUT_OP_ARL;
312 case OPCODE_DP4: return R200_VPI_OUT_OP_DOT;
313 case OPCODE_DST: return R200_VPI_OUT_OP_DST;
314 case OPCODE_EX2: return R200_VPI_OUT_OP_EX2;
315 case OPCODE_EXP: return R200_VPI_OUT_OP_EXP;
316 case OPCODE_FRC: return R200_VPI_OUT_OP_FRC;
317 case OPCODE_LG2: return R200_VPI_OUT_OP_LG2;
318 case OPCODE_LIT: return R200_VPI_OUT_OP_LIT;
319 case OPCODE_LOG: return R200_VPI_OUT_OP_LOG;
320 case OPCODE_MAX: return R200_VPI_OUT_OP_MAX;
321 case OPCODE_MIN: return R200_VPI_OUT_OP_MIN;
322 case OPCODE_MUL: return R200_VPI_OUT_OP_MUL;
323 case OPCODE_RCP: return R200_VPI_OUT_OP_RCP;
324 case OPCODE_RSQ: return R200_VPI_OUT_OP_RSQ;
325 case OPCODE_SGE: return R200_VPI_OUT_OP_SGE;
326 case OPCODE_SLT: return R200_VPI_OUT_OP_SLT;
327
328 default:
329 fprintf(stderr, "%s: Should not be called with opcode %d!", __FUNCTION__, opcode);
330 }
331 exit(-1);
332 return 0;
333 }
334
335 static unsigned long op_operands(enum prog_opcode opcode)
336 {
337 int i;
338
339 /* Can we trust mesas opcodes to be in order ? */
340 for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++)
341 if(op_names[i].opcode == opcode)
342 return op_names[i].ip;
343
344 fprintf(stderr, "op %d not found in op_names\n", opcode);
345 exit(-1);
346 return 0;
347 }
348
349 /* TODO: Get rid of t_src_class call */
350 #define CMP_SRCS(a, b) (((a.RelAddr != b.RelAddr) || (a.Index != b.Index)) && \
351 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
352 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
353 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
354 t_src_class(b.File) == VSF_IN_CLASS_ATTR))) \
355
356 /* fglrx on rv250 codes up unused sources as follows:
357 unused but necessary sources are same as previous source, zero-ed out.
358 unnecessary sources are same as previous source but with VSF_IN_CLASS_NONE set.
359 i.e. an add (2 args) has its 2nd arg (if you use it as mov) zero-ed out, and 3rd arg
360 set to VSF_IN_CLASS_NONE. Not sure if strictly necessary. */
361
362 /* use these simpler definitions. Must obviously not be used with not yet set up regs.
363 Those are NOT semantically equivalent to the r300 ones, requires code changes */
364 #define ZERO_SRC_0 (((o_inst->src0 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
365 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
366 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
367 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
368 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
369
370 #define ZERO_SRC_1 (((o_inst->src1 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
371 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
372 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
373 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
374 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
375
376 #define ZERO_SRC_2 (((o_inst->src2 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
377 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
378 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
379 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
380 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
381
382 #define UNUSED_SRC_0 ((o_inst->src0 & ~15) | 9)
383
384 #define UNUSED_SRC_1 ((o_inst->src1 & ~15) | 9)
385
386 #define UNUSED_SRC_2 ((o_inst->src2 & ~15) | 9)
387
388
389 /* DP4 version seems to trigger some hw peculiarity - fglrx does this on r200 however */
390 #define PREFER_DP4
391
392
393 /**
394 * Generate an R200 vertex program from Mesa's internal representation.
395 *
396 * \return GL_TRUE for success, GL_FALSE for failure.
397 */
398 static GLboolean r200_translate_vertex_program(struct r200_vertex_program *vp)
399 {
400 struct gl_vertex_program *mesa_vp = &vp->mesa_program;
401 struct prog_instruction *vpi;
402 int i;
403 VERTEX_SHADER_INSTRUCTION *o_inst;
404 unsigned long operands;
405 int are_srcs_scalar;
406 unsigned long hw_op;
407
408 vp->native = GL_FALSE;
409
410 if (mesa_vp->Base.NumInstructions == 0)
411 return GL_FALSE;
412
413 if ((mesa_vp->Base.InputsRead &
414 ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 |
415 VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 |
416 VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) != 0) {
417 if (R200_DEBUG & DEBUG_FALLBACKS) {
418 fprintf(stderr, "can't handle vert prog inputs 0x%x\n",
419 mesa_vp->Base.InputsRead);
420 }
421 return GL_FALSE;
422 }
423
424 if (mesa_vp->IsNVProgram) {
425 /* subtle differences in spec like guaranteed initialized regs could cause
426 headaches. Might want to remove the driconf option to enable it completely */
427 return GL_FALSE;
428 }
429 /* Initial value should be last tmp reg that hw supports.
430 Strangely enough r300 doesnt mind even though these would be out of range.
431 Smart enough to realize that it doesnt need it? */
432 int u_temp_i = R200_VSF_MAX_TEMPS - 1;
433 struct prog_src_register src[3];
434
435 #if 0
436 if (getenv("R300_VP_SAFETY")) {
437 WARN_ONCE("R300_VP_SAFETY enabled.\n");
438
439 vpi = malloc((mesa_vp->Base.NumInstructions + VSF_MAX_FRAGMENT_TEMPS) * sizeof(struct prog_instruction));
440 memset(vpi, 0, VSF_MAX_FRAGMENT_TEMPS * sizeof(struct prog_instruction));
441
442 for (i=0; i < VSF_MAX_FRAGMENT_TEMPS; i++) {
443 vpi[i].Opcode = OPCODE_MOV;
444 vpi[i].StringPos = 0;
445 vpi[i].Data = 0;
446
447 vpi[i].DstReg.File = PROGRAM_TEMPORARY;
448 vpi[i].DstReg.Index = i;
449 vpi[i].DstReg.WriteMask = WRITEMASK_XYZW;
450 vpi[i].DstReg.CondMask = COND_TR;
451
452 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
453 vpi[i].SrcReg[0].Index = 0;
454 vpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE);
455 }
456
457 memcpy(&vpi[i], mesa_vp->Base.Instructions, mesa_vp->Base.NumInstructions * sizeof(struct prog_instruction));
458
459 free(mesa_vp->Base.Instructions);
460
461 mesa_vp->Base.Instructions = vpi;
462
463 mesa_vp->Base.NumInstructions += VSF_MAX_FRAGMENT_TEMPS;
464 vpi = &mesa_vp->Base.Instructions[mesa_vp->Base.NumInstructions-1];
465
466 assert(vpi->Opcode == OPCODE_END);
467 }
468 #endif
469
470 /* FIXME: is changing the prog safe to do here? */
471 if (mesa_vp->IsPositionInvariant) {
472 struct gl_program_parameter_list *paramList;
473 GLint tokens[6] = { STATE_MATRIX, STATE_MVP, 0, 0, 0, STATE_MATRIX };
474
475 #ifdef PREFER_DP4
476 tokens[5] = STATE_MATRIX;
477 #else
478 tokens[5] = STATE_MATRIX_TRANSPOSE;
479 #endif
480 paramList = mesa_vp->Base.Parameters;
481
482 vpi = malloc((mesa_vp->Base.NumInstructions + 4) * sizeof(struct prog_instruction));
483 memset(vpi, 0, 4 * sizeof(struct prog_instruction));
484
485 /* emit four dot product instructions to do MVP transformation */
486 for (i=0; i < 4; i++) {
487 GLint idx;
488 tokens[3] = tokens[4] = i;
489 idx = _mesa_add_state_reference(paramList, tokens);
490 #ifdef PREFER_DP4
491 vpi[i].Opcode = OPCODE_DP4;
492 vpi[i].StringPos = 0;
493 vpi[i].Data = 0;
494
495 vpi[i].DstReg.File = PROGRAM_OUTPUT;
496 vpi[i].DstReg.Index = VERT_RESULT_HPOS;
497 vpi[i].DstReg.WriteMask = 1 << i;
498 vpi[i].DstReg.CondMask = COND_TR;
499
500 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
501 vpi[i].SrcReg[0].Index = idx;
502 vpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
503
504 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
505 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
506 vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
507 #else
508 if (i == 0)
509 vpi[i].Opcode = OPCODE_MUL;
510 else
511 vpi[i].Opcode = OPCODE_MAD;
512
513 vpi[i].StringPos = 0;
514 vpi[i].Data = 0;
515
516 if (i == 3)
517 vpi[i].DstReg.File = PROGRAM_OUTPUT;
518 else
519 vpi[i].DstReg.File = PROGRAM_TEMPORARY;
520 vpi[i].DstReg.Index = 0;
521 vpi[i].DstReg.WriteMask = 0xf;
522 vpi[i].DstReg.CondMask = COND_TR;
523
524 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
525 vpi[i].SrcReg[0].Index = idx;
526 vpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
527
528 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
529 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
530 vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i);
531
532 if (i > 0) {
533 vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY;
534 vpi[i].SrcReg[2].Index = 0;
535 vpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
536 }
537 #endif
538 }
539
540 /* now append original program after our new instructions */
541 memcpy(&vpi[i], mesa_vp->Base.Instructions, mesa_vp->Base.NumInstructions * sizeof(struct prog_instruction));
542
543 /* deallocate original program */
544 free(mesa_vp->Base.Instructions);
545
546 /* install new program */
547 mesa_vp->Base.Instructions = vpi;
548
549 mesa_vp->Base.NumInstructions += 4;
550 vpi = &mesa_vp->Base.Instructions[mesa_vp->Base.NumInstructions-1];
551
552 assert(vpi->Opcode == OPCODE_END);
553
554 mesa_vp->Base.InputsRead |= (1 << VERT_ATTRIB_POS);
555 mesa_vp->Base.OutputsWritten |= (1 << VERT_RESULT_HPOS);
556
557 //fprintf(stderr, "IsPositionInvariant is set!\n");
558 //_mesa_print_program(&mesa_vp->Base);
559 }
560
561 vp->pos_end = 0;
562 mesa_vp->Base.NumNativeInstructions = 0;
563 if (mesa_vp->Base.Parameters)
564 mesa_vp->Base.NumNativeParameters = mesa_vp->Base.Parameters->NumParameters;
565 else
566 mesa_vp->Base.NumNativeParameters = 0;
567
568 for(i=0; i < VERT_ATTRIB_MAX; i++)
569 vp->inputs[i] = -1;
570 /* fglrx uses fixed inputs as follows for conventional attribs.
571 generic attribs use non-fixed assignment, fglrx will always use the lowest attrib values available.
572 There are 12 generic attribs possible, corresponding to attrib 0, 2-11 and 13 in a hw vertex prog.
573 attr 1 and 12 are not available for generic attribs as those cannot be made vec4 (correspond to
574 vertex normal/weight)
575 attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
576 attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
577 attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
578 attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
579 generic attribs would require some more work (dma regions, renaming). */
580
581 /* may look different when using idx buf / input_route instead of se_vtx_fmt? */
582 vp->inputs[VERT_ATTRIB_POS] = 0;
583 vp->inputs[VERT_ATTRIB_WEIGHT] = 12;
584 vp->inputs[VERT_ATTRIB_NORMAL] = 1;
585 vp->inputs[VERT_ATTRIB_COLOR0] = 2;
586 vp->inputs[VERT_ATTRIB_COLOR1] = 3;
587 vp->inputs[VERT_ATTRIB_FOG] = 15;
588 vp->inputs[VERT_ATTRIB_TEX0] = 6;
589 vp->inputs[VERT_ATTRIB_TEX1] = 7;
590 vp->inputs[VERT_ATTRIB_TEX2] = 8;
591 vp->inputs[VERT_ATTRIB_TEX3] = 9;
592 vp->inputs[VERT_ATTRIB_TEX4] = 10;
593 vp->inputs[VERT_ATTRIB_TEX5] = 11;
594 /* attr 4,5 and 13 are only used with generic attribs.
595 Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
596 not possibe to use with vertex progs as it is lacking in vert prog specification) */
597
598 assert(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS));
599
600 vp->translated = GL_TRUE;
601
602 o_inst = vp->instr;
603 for(vpi = mesa_vp->Base.Instructions; vpi->Opcode != OPCODE_END; vpi++, o_inst++){
604 if (u_temp_i < mesa_vp->Base.NumTemporaries) {
605 if (R200_DEBUG & DEBUG_FALLBACKS) {
606 fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_i);
607 }
608 return GL_FALSE;
609 }
610 u_temp_i = R200_VSF_MAX_TEMPS - 1;
611 if(o_inst - vp->instr >= R200_VSF_MAX_INST) {
612 mesa_vp->Base.NumNativeInstructions = 129;
613 if (R200_DEBUG & DEBUG_FALLBACKS) {
614 fprintf(stderr, "more than 128 native instructions\n");
615 }
616 return GL_FALSE;
617 }
618
619 operands = op_operands(vpi->Opcode);
620 are_srcs_scalar = operands & SCALAR_FLAG;
621 operands &= OP_MASK;
622
623 for(i = 0; i < operands; i++)
624 src[i] = vpi->SrcReg[i];
625
626 if(operands == 3){
627 if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
628 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
629 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
630 VSF_FLAG_ALL);
631
632 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
633 SWIZZLE_X, SWIZZLE_Y,
634 SWIZZLE_Z, SWIZZLE_W,
635 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4);
636
637 o_inst->src1 = ZERO_SRC_0;
638 o_inst->src2 = UNUSED_SRC_1;
639 o_inst++;
640
641 src[2].File = PROGRAM_TEMPORARY;
642 src[2].Index = u_temp_i;
643 src[2].RelAddr = 0;
644 u_temp_i--;
645 }
646 }
647
648 if(operands >= 2){
649 if( CMP_SRCS(src[1], src[0]) ){
650 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
651 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
652 VSF_FLAG_ALL);
653
654 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
655 SWIZZLE_X, SWIZZLE_Y,
656 SWIZZLE_Z, SWIZZLE_W,
657 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4);
658
659 o_inst->src1 = ZERO_SRC_0;
660 o_inst->src2 = UNUSED_SRC_1;
661 o_inst++;
662
663 src[0].File = PROGRAM_TEMPORARY;
664 src[0].Index = u_temp_i;
665 src[0].RelAddr = 0;
666 u_temp_i--;
667 }
668 }
669
670 /* These ops need special handling. */
671 switch(vpi->Opcode){
672 case OPCODE_POW:
673 /* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter).
674 So may need to insert additional instruction */
675 if ((src[0].File == src[1].File) &&
676 (src[0].Index == src[1].Index)) {
677 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&vpi->DstReg),
678 t_dst_mask(vpi->DstReg.WriteMask));
679 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
680 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
681 SWIZZLE_ZERO,
682 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
683 SWIZZLE_ZERO,
684 t_src_class(src[0].File),
685 src[0].NegateBase) | (src[0].RelAddr << 4);
686 o_inst->src1 = UNUSED_SRC_0;
687 o_inst->src2 = UNUSED_SRC_0;
688 }
689 else {
690 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
691 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
692 VSF_FLAG_ALL);
693 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
694 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
695 SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO,
696 t_src_class(src[0].File),
697 src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
698 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
699 SWIZZLE_ZERO, SWIZZLE_ZERO,
700 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO,
701 t_src_class(src[1].File),
702 src[1].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
703 o_inst->src2 = UNUSED_SRC_1;
704 o_inst++;
705
706 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&vpi->DstReg),
707 t_dst_mask(vpi->DstReg.WriteMask));
708 o_inst->src0 = MAKE_VSF_SOURCE(u_temp_i,
709 VSF_IN_COMPONENT_X,
710 VSF_IN_COMPONENT_Y,
711 VSF_IN_COMPONENT_Z,
712 VSF_IN_COMPONENT_W,
713 VSF_IN_CLASS_TMP,
714 VSF_FLAG_NONE);
715 o_inst->src1 = UNUSED_SRC_0;
716 o_inst->src2 = UNUSED_SRC_0;
717 u_temp_i--;
718 }
719 goto next;
720
721 case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
722 case OPCODE_SWZ:
723 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&vpi->DstReg),
724 t_dst_mask(vpi->DstReg.WriteMask));
725 o_inst->src0 = t_src(vp, &src[0]);
726 o_inst->src1 = ZERO_SRC_0;
727 o_inst->src2 = UNUSED_SRC_1;
728 goto next;
729
730 case OPCODE_MAD:
731 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
732 src[1].File == PROGRAM_TEMPORARY &&
733 src[2].File == PROGRAM_TEMPORARY) ? R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
734
735 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&vpi->DstReg),
736 t_dst_mask(vpi->DstReg.WriteMask));
737 o_inst->src0 = t_src(vp, &src[0]);
738 #if 0
739 if ((o_inst - vp->instr) == 31) {
740 /* fix up the broken vertex program of quake4 demo... */
741 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
742 SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X,
743 t_src_class(src[1].File),
744 src[1].NegateBase) | (src[1].RelAddr << 4);
745 o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
746 SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y,
747 t_src_class(src[1].File),
748 src[1].NegateBase) | (src[1].RelAddr << 4);
749 }
750 else {
751 o_inst->src1 = t_src(vp, &src[1]);
752 o_inst->src2 = t_src(vp, &src[2]);
753 }
754 #else
755 o_inst->src1 = t_src(vp, &src[1]);
756 o_inst->src2 = t_src(vp, &src[2]);
757 #endif
758 goto next;
759
760 case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
761 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&vpi->DstReg),
762 t_dst_mask(vpi->DstReg.WriteMask));
763
764 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
765 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
766 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
767 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
768 SWIZZLE_ZERO,
769 t_src_class(src[0].File),
770 src[0].NegateBase) | (src[0].RelAddr << 4);
771
772 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
773 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
774 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
775 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
776 SWIZZLE_ZERO,
777 t_src_class(src[1].File),
778 src[1].NegateBase) | (src[1].RelAddr << 4);
779
780 o_inst->src2 = UNUSED_SRC_1;
781 goto next;
782
783 case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
784 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&vpi->DstReg),
785 t_dst_mask(vpi->DstReg.WriteMask));
786
787 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
788 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
789 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
790 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
791 VSF_IN_COMPONENT_ONE,
792 t_src_class(src[0].File),
793 src[0].NegateBase) | (src[0].RelAddr << 4);
794 o_inst->src1 = t_src(vp, &src[1]);
795 o_inst->src2 = UNUSED_SRC_1;
796 goto next;
797
798 case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
799 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&vpi->DstReg),
800 t_dst_mask(vpi->DstReg.WriteMask));
801
802 o_inst->src0 = t_src(vp, &src[0]);
803 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
804 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
805 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
806 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
807 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
808 t_src_class(src[1].File),
809 (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
810 o_inst->src2 = UNUSED_SRC_1;
811 goto next;
812
813 case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
814 o_inst->op=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX, t_dst(&vpi->DstReg),
815 t_dst_mask(vpi->DstReg.WriteMask));
816
817 o_inst->src0=t_src(vp, &src[0]);
818 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
819 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
820 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
821 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
822 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
823 t_src_class(src[0].File),
824 (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
825 o_inst->src2 = UNUSED_SRC_1;
826 goto next;
827
828 case OPCODE_FLR:
829 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
830 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
831
832 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_FRC,
833 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
834 t_dst_mask(vpi->DstReg.WriteMask));
835
836 o_inst->src0 = t_src(vp, &src[0]);
837 o_inst->src1 = UNUSED_SRC_0;
838 o_inst->src2 = UNUSED_SRC_1;
839 o_inst++;
840
841 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&vpi->DstReg),
842 t_dst_mask(vpi->DstReg.WriteMask));
843
844 o_inst->src0 = t_src(vp, &src[0]);
845 o_inst->src1 = MAKE_VSF_SOURCE(u_temp_i,
846 VSF_IN_COMPONENT_X,
847 VSF_IN_COMPONENT_Y,
848 VSF_IN_COMPONENT_Z,
849 VSF_IN_COMPONENT_W,
850 VSF_IN_CLASS_TMP,
851 /* Not 100% sure about this */
852 (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
853
854 o_inst->src2 = UNUSED_SRC_0;
855 u_temp_i--;
856 goto next;
857
858 case OPCODE_XPD:
859 /* mul r0, r1.yzxw, r2.zxyw
860 mad r0, -r2.yzxw, r1.zxyw, r0
861 NOTE: might need MAD_2
862 */
863
864 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
865 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
866 t_dst_mask(vpi->DstReg.WriteMask));
867
868 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
869 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
870 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
871 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
872 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
873 t_src_class(src[0].File),
874 src[0].NegateBase) | (src[0].RelAddr << 4);
875
876 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
877 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
878 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
879 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
880 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
881 t_src_class(src[1].File),
882 src[1].NegateBase) | (src[1].RelAddr << 4);
883
884 o_inst->src2 = UNUSED_SRC_1;
885 o_inst++;
886 u_temp_i--;
887
888 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MAD, t_dst(&vpi->DstReg),
889 t_dst_mask(vpi->DstReg.WriteMask));
890
891 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
892 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
893 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
894 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
895 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
896 t_src_class(src[1].File),
897 (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
898
899 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
900 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
901 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
902 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
903 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
904 t_src_class(src[0].File),
905 src[0].NegateBase) | (src[0].RelAddr << 4);
906
907 o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1,
908 VSF_IN_COMPONENT_X,
909 VSF_IN_COMPONENT_Y,
910 VSF_IN_COMPONENT_Z,
911 VSF_IN_COMPONENT_W,
912 VSF_IN_CLASS_TMP,
913 VSF_FLAG_NONE);
914 goto next;
915
916 case OPCODE_END:
917 break;
918 default:
919 break;
920 }
921
922 o_inst->op = MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst(&vpi->DstReg),
923 t_dst_mask(vpi->DstReg.WriteMask));
924
925 if(are_srcs_scalar){
926 switch(operands){
927 case 1:
928 o_inst->src0 = t_src_scalar(vp, &src[0]);
929 o_inst->src1 = UNUSED_SRC_0;
930 o_inst->src2 = UNUSED_SRC_1;
931 break;
932
933 case 2:
934 o_inst->src0 = t_src_scalar(vp, &src[0]);
935 o_inst->src1 = t_src_scalar(vp, &src[1]);
936 o_inst->src2 = UNUSED_SRC_1;
937 break;
938
939 case 3:
940 o_inst->src0 = t_src_scalar(vp, &src[0]);
941 o_inst->src1 = t_src_scalar(vp, &src[1]);
942 o_inst->src2 = t_src_scalar(vp, &src[2]);
943 break;
944
945 default:
946 fprintf(stderr, "illegal number of operands %lu\n", operands);
947 exit(-1);
948 break;
949 }
950 } else {
951 switch(operands){
952 case 1:
953 o_inst->src0 = t_src(vp, &src[0]);
954 o_inst->src1 = UNUSED_SRC_0;
955 o_inst->src2 = UNUSED_SRC_1;
956 break;
957
958 case 2:
959 o_inst->src0 = t_src(vp, &src[0]);
960 o_inst->src1 = t_src(vp, &src[1]);
961 o_inst->src2 = UNUSED_SRC_1;
962 break;
963
964 case 3:
965 o_inst->src0 = t_src(vp, &src[0]);
966 o_inst->src1 = t_src(vp, &src[1]);
967 o_inst->src2 = t_src(vp, &src[2]);
968 break;
969
970 default:
971 fprintf(stderr, "illegal number of operands %lu\n", operands);
972 exit(-1);
973 break;
974 }
975 }
976 next:
977 if ((o_inst->op & R200_VSF_OUT_CLASS_MASK) == R200_VSF_OUT_CLASS_RESULT_POS) {
978 vp->pos_end = (o_inst - vp->instr);
979 }
980 }
981
982 /* need to test again since some instructions require more than one (up to 3) native inst */
983 if(o_inst - vp->instr > R200_VSF_MAX_INST) {
984 mesa_vp->Base.NumNativeInstructions = 129;
985 if (R200_DEBUG & DEBUG_FALLBACKS) {
986 fprintf(stderr, "more than 128 native instructions\n");
987 }
988 return GL_FALSE;
989 }
990 vp->native = GL_TRUE;
991 mesa_vp->Base.NumNativeInstructions = (o_inst - vp->instr);
992 #if 0
993 fprintf(stderr, "hw program:\n");
994 for(i=0; i < vp->program.length; i++)
995 fprintf(stderr, "%08x\n", vp->instr[i]);
996 #endif
997 return GL_TRUE;
998 }
999
1000 void r200SetupVertexProg( GLcontext *ctx ) {
1001 r200ContextPtr rmesa = R200_CONTEXT(ctx);
1002 struct r200_vertex_program *vp = (struct r200_vertex_program *)ctx->VertexProgram.Current;
1003 GLboolean fallback;
1004 GLint i;
1005
1006 if (!vp->translated) {
1007 rmesa->curr_vp_hw = NULL;
1008 r200_translate_vertex_program(vp);
1009 }
1010 /* could optimize setting up vertex progs away for non-tcl hw */
1011 fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp) &&
1012 rmesa->r200Screen->drmSupportsVertexProgram);
1013 TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback);
1014 if (fallback) return;
1015
1016 R200_STATECHANGE( rmesa, pvs );
1017
1018 rmesa->hw.pvs.cmd[PVS_CNTL_1] = (0 << R200_PVS_CNTL_1_PROGRAM_START_SHIFT) |
1019 ((vp->mesa_program.Base.NumNativeInstructions - 1) << R200_PVS_CNTL_1_PROGRAM_END_SHIFT) |
1020 (vp->pos_end << R200_PVS_CNTL_1_POS_END_SHIFT);
1021 rmesa->hw.pvs.cmd[PVS_CNTL_2] = (0 << R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT) |
1022 (vp->mesa_program.Base.NumNativeParameters << R200_PVS_CNTL_2_PARAM_COUNT_SHIFT);
1023
1024 /* maybe user clip planes just work with vertex progs... untested */
1025 if (ctx->Transform.ClipPlanesEnabled) {
1026 R200_STATECHANGE( rmesa, tcl );
1027 if (vp->mesa_program.IsPositionInvariant) {
1028 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (ctx->Transform.ClipPlanesEnabled << 2);
1029 }
1030 else {
1031 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(0xfc);
1032 }
1033 }
1034
1035 if (vp != rmesa->curr_vp_hw) {
1036 GLuint count = vp->mesa_program.Base.NumNativeInstructions;
1037 drm_radeon_cmd_header_t tmp;
1038
1039 R200_STATECHANGE( rmesa, vpi[0] );
1040 R200_STATECHANGE( rmesa, vpi[1] );
1041
1042 /* FIXME: what about using a memcopy... */
1043 for (i = 0; (i < 64) && i < count; i++) {
1044 rmesa->hw.vpi[0].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i].op;
1045 rmesa->hw.vpi[0].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i].src0;
1046 rmesa->hw.vpi[0].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i].src1;
1047 rmesa->hw.vpi[0].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i].src2;
1048 }
1049 /* hack up the cmd_size so not the whole state atom is emitted always.
1050 This may require some more thought, we may emit half progs on lost state, but
1051 hopefully it won't matter?
1052 WARNING: must not use R200_DB_STATECHANGE, this will produce bogus (and rejected)
1053 packet emits (due to the mismatched cmd_size and count in cmd/last_cmd) */
1054 rmesa->hw.vpi[0].cmd_size = 1 + 4 * ((count > 64) ? 64 : count);
1055 tmp.i = rmesa->hw.vpi[0].cmd[VPI_CMD_0];
1056 tmp.veclinear.count = (count > 64) ? 64 : count;
1057 rmesa->hw.vpi[0].cmd[VPI_CMD_0] = tmp.i;
1058 if (count > 64) {
1059 for (i = 0; i < (count - 64); i++) {
1060 rmesa->hw.vpi[1].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i + 64].op;
1061 rmesa->hw.vpi[1].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i + 64].src0;
1062 rmesa->hw.vpi[1].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i + 64].src1;
1063 rmesa->hw.vpi[1].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i + 64].src2;
1064 }
1065 rmesa->hw.vpi[1].cmd_size = 1 + 4 * (count - 64);
1066 tmp.i = rmesa->hw.vpi[1].cmd[VPI_CMD_0];
1067 tmp.veclinear.count = count - 64;
1068 rmesa->hw.vpi[1].cmd[VPI_CMD_0] = tmp.i;
1069 }
1070 rmesa->curr_vp_hw = vp;
1071 }
1072 }
1073
1074
1075 static void
1076 r200BindProgram(GLcontext *ctx, GLenum target, struct gl_program *prog)
1077 {
1078 r200ContextPtr rmesa = R200_CONTEXT(ctx);
1079
1080 switch(target){
1081 case GL_VERTEX_PROGRAM_ARB:
1082 rmesa->curr_vp_hw = NULL;
1083 break;
1084 default:
1085 _mesa_problem(ctx, "Target not supported yet!");
1086 break;
1087 }
1088 }
1089
1090 static struct gl_program *
1091 r200NewProgram(GLcontext *ctx, GLenum target, GLuint id)
1092 {
1093 struct r200_vertex_program *vp;
1094
1095 switch(target){
1096 case GL_VERTEX_PROGRAM_ARB:
1097 vp = CALLOC_STRUCT(r200_vertex_program);
1098 return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id);
1099 case GL_FRAGMENT_PROGRAM_ARB:
1100 case GL_FRAGMENT_PROGRAM_NV:
1101 return _mesa_init_fragment_program( ctx, CALLOC_STRUCT(gl_fragment_program), target, id );
1102 default:
1103 _mesa_problem(ctx, "Bad target in r200NewProgram");
1104 }
1105 return NULL;
1106 }
1107
1108
1109 static void
1110 r200DeleteProgram(GLcontext *ctx, struct gl_program *prog)
1111 {
1112 _mesa_delete_program(ctx, prog);
1113 }
1114
1115 static void
1116 r200ProgramStringNotify(GLcontext *ctx, GLenum target, struct gl_program *prog)
1117 {
1118 struct r200_vertex_program *vp = (void *)prog;
1119
1120 switch(target) {
1121 case GL_VERTEX_PROGRAM_ARB:
1122 vp->translated = GL_FALSE;
1123 memset(&vp->translated, 0, sizeof(struct r200_vertex_program) - sizeof(struct gl_vertex_program));
1124 /*r200_translate_vertex_shader(vp);*/
1125 break;
1126 }
1127 /* need this for tcl fallbacks */
1128 _tnl_program_string(ctx, target, prog);
1129 }
1130
1131 static GLboolean
1132 r200IsProgramNative(GLcontext *ctx, GLenum target, struct gl_program *prog)
1133 {
1134 struct r200_vertex_program *vp = (void *)prog;
1135
1136 switch(target){
1137 case GL_VERTEX_STATE_PROGRAM_NV:
1138 case GL_VERTEX_PROGRAM_ARB:
1139 if (!vp->translated) {
1140 r200_translate_vertex_program(vp);
1141 }
1142 /* does not take parameters etc. into account */
1143 return vp->native;
1144 default:
1145 _mesa_problem(ctx, "Bad target in r200NewProgram");
1146 }
1147 return 0;
1148 }
1149
1150 void r200InitShaderFuncs(struct dd_function_table *functions)
1151 {
1152 functions->NewProgram = r200NewProgram;
1153 functions->BindProgram = r200BindProgram;
1154 functions->DeleteProgram = r200DeleteProgram;
1155 functions->ProgramStringNotify = r200ProgramStringNotify;
1156 functions->IsProgramNative = r200IsProgramNative;
1157 }