submit vertex weights to make World of Warcraft maybe happy (bug 8250)
[mesa.git] / src / mesa / drivers / dri / r200 / r200_vertprog.c
1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Aapo Tahkola <aet@rasterburn.org>
31 * Roland Scheidegger <rscheidegger_lists@hispeed.ch>
32 */
33 #include "glheader.h"
34 #include "macros.h"
35 #include "enums.h"
36 #include "program.h"
37
38 #include "r200_context.h"
39 #include "r200_vertprog.h"
40 #include "r200_ioctl.h"
41 #include "r200_tcl.h"
42 #include "program_instruction.h"
43 #include "programopt.h"
44 #include "tnl/tnl.h"
45
46 #if SWIZZLE_X != VSF_IN_COMPONENT_X || \
47 SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
48 SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
49 SWIZZLE_W != VSF_IN_COMPONENT_W || \
50 SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
51 SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
52 WRITEMASK_X != VSF_FLAG_X || \
53 WRITEMASK_Y != VSF_FLAG_Y || \
54 WRITEMASK_Z != VSF_FLAG_Z || \
55 WRITEMASK_W != VSF_FLAG_W
56 #error Cannot change these!
57 #endif
58
59 #define SCALAR_FLAG (1<<31)
60 #define FLAG_MASK (1<<31)
61 #define OP_MASK (0xf) /* we are unlikely to have more than 15 */
62 #define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
63
64 static struct{
65 char *name;
66 int opcode;
67 unsigned long ip; /* number of input operands and flags */
68 }op_names[]={
69 OPN(ABS, 1),
70 OPN(ADD, 2),
71 OPN(ARL, 1|SCALAR_FLAG),
72 OPN(DP3, 2),
73 OPN(DP4, 2),
74 OPN(DPH, 2),
75 OPN(DST, 2),
76 OPN(EX2, 1|SCALAR_FLAG),
77 OPN(EXP, 1|SCALAR_FLAG),
78 OPN(FLR, 1),
79 OPN(FRC, 1),
80 OPN(LG2, 1|SCALAR_FLAG),
81 OPN(LIT, 1),
82 OPN(LOG, 1|SCALAR_FLAG),
83 OPN(MAD, 3),
84 OPN(MAX, 2),
85 OPN(MIN, 2),
86 OPN(MOV, 1),
87 OPN(MUL, 2),
88 OPN(POW, 2|SCALAR_FLAG),
89 OPN(RCP, 1|SCALAR_FLAG),
90 OPN(RSQ, 1|SCALAR_FLAG),
91 OPN(SGE, 2),
92 OPN(SLT, 2),
93 OPN(SUB, 2),
94 OPN(SWZ, 1),
95 OPN(XPD, 2),
96 OPN(PRINT, 0),
97 OPN(END, 0),
98 };
99 #undef OPN
100
101 static GLboolean r200VertexProgUpdateParams(GLcontext *ctx, struct r200_vertex_program *vp)
102 {
103 r200ContextPtr rmesa = R200_CONTEXT( ctx );
104 GLfloat *fcmd = (GLfloat *)&rmesa->hw.vpp[0].cmd[VPP_CMD_0 + 1];
105 int pi;
106 struct gl_vertex_program *mesa_vp = &vp->mesa_program;
107 struct gl_program_parameter_list *paramList;
108 drm_radeon_cmd_header_t tmp;
109
110 R200_STATECHANGE( rmesa, vpp[0] );
111 R200_STATECHANGE( rmesa, vpp[1] );
112 assert(mesa_vp->Base.Parameters);
113 _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
114 paramList = mesa_vp->Base.Parameters;
115
116 if(paramList->NumParameters > R200_VSF_MAX_PARAM){
117 fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
118 return GL_FALSE;
119 }
120
121 for(pi = 0; pi < paramList->NumParameters; pi++) {
122 switch(paramList->Parameters[pi].Type) {
123 case PROGRAM_STATE_VAR:
124 case PROGRAM_NAMED_PARAM:
125 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
126 case PROGRAM_CONSTANT:
127 *fcmd++ = paramList->ParameterValues[pi][0];
128 *fcmd++ = paramList->ParameterValues[pi][1];
129 *fcmd++ = paramList->ParameterValues[pi][2];
130 *fcmd++ = paramList->ParameterValues[pi][3];
131 break;
132 default:
133 _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__);
134 break;
135 }
136 if (pi == 95) {
137 fcmd = (GLfloat *)&rmesa->hw.vpp[1].cmd[VPP_CMD_0 + 1];
138 }
139 }
140 /* hack up the cmd_size so not the whole state atom is emitted always. */
141 rmesa->hw.vpp[0].cmd_size =
142 1 + 4 * ((paramList->NumParameters > 96) ? 96 : paramList->NumParameters);
143 tmp.i = rmesa->hw.vpp[0].cmd[VPP_CMD_0];
144 tmp.veclinear.count = (paramList->NumParameters > 96) ? 96 : paramList->NumParameters;
145 rmesa->hw.vpp[0].cmd[VPP_CMD_0] = tmp.i;
146 if (paramList->NumParameters > 96) {
147 rmesa->hw.vpp[1].cmd_size = 1 + 4 * (paramList->NumParameters - 96);
148 tmp.i = rmesa->hw.vpp[1].cmd[VPP_CMD_0];
149 tmp.veclinear.count = paramList->NumParameters - 96;
150 rmesa->hw.vpp[1].cmd[VPP_CMD_0] = tmp.i;
151 }
152 return GL_TRUE;
153 }
154
155 static __inline unsigned long t_dst_mask(GLuint mask)
156 {
157 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
158 return mask & VSF_FLAG_ALL;
159 }
160
161 static unsigned long t_dst(struct prog_dst_register *dst)
162 {
163 switch(dst->File) {
164 case PROGRAM_TEMPORARY:
165 return ((dst->Index << R200_VPI_OUT_REG_INDEX_SHIFT)
166 | R200_VSF_OUT_CLASS_TMP);
167 case PROGRAM_OUTPUT:
168 switch (dst->Index) {
169 case VERT_RESULT_HPOS:
170 return R200_VSF_OUT_CLASS_RESULT_POS;
171 case VERT_RESULT_COL0:
172 return R200_VSF_OUT_CLASS_RESULT_COLOR;
173 case VERT_RESULT_COL1:
174 return ((1 << R200_VPI_OUT_REG_INDEX_SHIFT)
175 | R200_VSF_OUT_CLASS_RESULT_COLOR);
176 case VERT_RESULT_FOGC:
177 return R200_VSF_OUT_CLASS_RESULT_FOGC;
178 case VERT_RESULT_TEX0:
179 case VERT_RESULT_TEX1:
180 case VERT_RESULT_TEX2:
181 case VERT_RESULT_TEX3:
182 case VERT_RESULT_TEX4:
183 case VERT_RESULT_TEX5:
184 return (((dst->Index - VERT_RESULT_TEX0) << R200_VPI_OUT_REG_INDEX_SHIFT)
185 | R200_VSF_OUT_CLASS_RESULT_TEXC);
186 case VERT_RESULT_PSIZ:
187 return R200_VSF_OUT_CLASS_RESULT_POINTSIZE;
188 default:
189 fprintf(stderr, "problem in %s, unknown dst output reg %d\n", __FUNCTION__, dst->Index);
190 exit(0);
191 return 0;
192 }
193 case PROGRAM_ADDRESS:
194 assert (dst->Index == 0);
195 return R200_VSF_OUT_CLASS_ADDR;
196 default:
197 fprintf(stderr, "problem in %s, unknown register type %d\n", __FUNCTION__, dst->File);
198 exit(0);
199 return 0;
200 }
201 }
202
203 static unsigned long t_src_class(enum register_file file)
204 {
205
206 switch(file){
207 case PROGRAM_TEMPORARY:
208 return VSF_IN_CLASS_TMP;
209
210 case PROGRAM_INPUT:
211 return VSF_IN_CLASS_ATTR;
212
213 case PROGRAM_LOCAL_PARAM:
214 case PROGRAM_ENV_PARAM:
215 case PROGRAM_NAMED_PARAM:
216 case PROGRAM_STATE_VAR:
217 return VSF_IN_CLASS_PARAM;
218 /*
219 case PROGRAM_OUTPUT:
220 case PROGRAM_WRITE_ONLY:
221 case PROGRAM_ADDRESS:
222 */
223 default:
224 fprintf(stderr, "problem in %s", __FUNCTION__);
225 exit(0);
226 }
227 }
228
229 static __inline unsigned long t_swizzle(GLubyte swizzle)
230 {
231 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
232 return swizzle;
233 }
234
235 #if 0
236 static void vp_dump_inputs(struct r200_vertex_program *vp, char *caller)
237 {
238 int i;
239
240 if(vp == NULL){
241 fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, caller);
242 return ;
243 }
244
245 fprintf(stderr, "%s:<", caller);
246 for(i=0; i < VERT_ATTRIB_MAX; i++)
247 fprintf(stderr, "%d ", vp->inputs[i]);
248 fprintf(stderr, ">\n");
249
250 }
251 #endif
252
253 static unsigned long t_src_index(struct r200_vertex_program *vp, struct prog_src_register *src)
254 {
255 /*
256 int i;
257 int max_reg = -1;
258 */
259 if(src->File == PROGRAM_INPUT){
260 /* if(vp->inputs[src->Index] != -1)
261 return vp->inputs[src->Index];
262
263 for(i=0; i < VERT_ATTRIB_MAX; i++)
264 if(vp->inputs[i] > max_reg)
265 max_reg = vp->inputs[i];
266
267 vp->inputs[src->Index] = max_reg+1;*/
268
269 //vp_dump_inputs(vp, __FUNCTION__);
270 assert(vp->inputs[src->Index] != -1);
271 return vp->inputs[src->Index];
272 } else {
273 if (src->Index < 0) {
274 fprintf(stderr, "WARNING negative offsets for indirect addressing do not work\n");
275 return 0;
276 }
277 return src->Index;
278 }
279 }
280
281 static unsigned long t_src(struct r200_vertex_program *vp, struct prog_src_register *src)
282 {
283
284 return MAKE_VSF_SOURCE(t_src_index(vp, src),
285 t_swizzle(GET_SWZ(src->Swizzle, 0)),
286 t_swizzle(GET_SWZ(src->Swizzle, 1)),
287 t_swizzle(GET_SWZ(src->Swizzle, 2)),
288 t_swizzle(GET_SWZ(src->Swizzle, 3)),
289 t_src_class(src->File),
290 src->NegateBase) | (src->RelAddr << 4);
291 }
292
293 static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_src_register *src)
294 {
295
296 return MAKE_VSF_SOURCE(t_src_index(vp, src),
297 t_swizzle(GET_SWZ(src->Swizzle, 0)),
298 t_swizzle(GET_SWZ(src->Swizzle, 0)),
299 t_swizzle(GET_SWZ(src->Swizzle, 0)),
300 t_swizzle(GET_SWZ(src->Swizzle, 0)),
301 t_src_class(src->File),
302 src->NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4);
303 }
304
305 static unsigned long t_opcode(enum prog_opcode opcode)
306 {
307
308 switch(opcode){
309 case OPCODE_ADD: return R200_VPI_OUT_OP_ADD;
310 /* FIXME: ARL works fine, but negative offsets won't work - fglrx just
311 * seems to ignore neg offsets which isn't quite correct...
312 */
313 case OPCODE_ARL: return R200_VPI_OUT_OP_ARL;
314 case OPCODE_DP4: return R200_VPI_OUT_OP_DOT;
315 case OPCODE_DST: return R200_VPI_OUT_OP_DST;
316 case OPCODE_EX2: return R200_VPI_OUT_OP_EX2;
317 case OPCODE_EXP: return R200_VPI_OUT_OP_EXP;
318 case OPCODE_FRC: return R200_VPI_OUT_OP_FRC;
319 case OPCODE_LG2: return R200_VPI_OUT_OP_LG2;
320 case OPCODE_LIT: return R200_VPI_OUT_OP_LIT;
321 case OPCODE_LOG: return R200_VPI_OUT_OP_LOG;
322 case OPCODE_MAX: return R200_VPI_OUT_OP_MAX;
323 case OPCODE_MIN: return R200_VPI_OUT_OP_MIN;
324 case OPCODE_MUL: return R200_VPI_OUT_OP_MUL;
325 case OPCODE_RCP: return R200_VPI_OUT_OP_RCP;
326 case OPCODE_RSQ: return R200_VPI_OUT_OP_RSQ;
327 case OPCODE_SGE: return R200_VPI_OUT_OP_SGE;
328 case OPCODE_SLT: return R200_VPI_OUT_OP_SLT;
329
330 default:
331 fprintf(stderr, "%s: Should not be called with opcode %d!", __FUNCTION__, opcode);
332 }
333 exit(-1);
334 return 0;
335 }
336
337 static unsigned long op_operands(enum prog_opcode opcode)
338 {
339 int i;
340
341 /* Can we trust mesas opcodes to be in order ? */
342 for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++)
343 if(op_names[i].opcode == opcode)
344 return op_names[i].ip;
345
346 fprintf(stderr, "op %d not found in op_names\n", opcode);
347 exit(-1);
348 return 0;
349 }
350
351 /* TODO: Get rid of t_src_class call */
352 #define CMP_SRCS(a, b) (((a.RelAddr != b.RelAddr) || (a.Index != b.Index)) && \
353 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
354 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
355 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
356 t_src_class(b.File) == VSF_IN_CLASS_ATTR))) \
357
358 /* fglrx on rv250 codes up unused sources as follows:
359 unused but necessary sources are same as previous source, zero-ed out.
360 unnecessary sources are same as previous source but with VSF_IN_CLASS_NONE set.
361 i.e. an add (2 args) has its 2nd arg (if you use it as mov) zero-ed out, and 3rd arg
362 set to VSF_IN_CLASS_NONE. Not sure if strictly necessary. */
363
364 /* use these simpler definitions. Must obviously not be used with not yet set up regs.
365 Those are NOT semantically equivalent to the r300 ones, requires code changes */
366 #define ZERO_SRC_0 (((o_inst->src0 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
367 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
368 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
369 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
370 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
371
372 #define ZERO_SRC_1 (((o_inst->src1 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
373 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
374 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
375 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
376 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
377
378 #define ZERO_SRC_2 (((o_inst->src2 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
379 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
380 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
381 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
382 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
383
384 #define UNUSED_SRC_0 ((o_inst->src0 & ~15) | 9)
385
386 #define UNUSED_SRC_1 ((o_inst->src1 & ~15) | 9)
387
388 #define UNUSED_SRC_2 ((o_inst->src2 & ~15) | 9)
389
390
391 /**
392 * Generate an R200 vertex program from Mesa's internal representation.
393 *
394 * \return GL_TRUE for success, GL_FALSE for failure.
395 */
396 static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_vertex_program *vp)
397 {
398 struct gl_vertex_program *mesa_vp = &vp->mesa_program;
399 struct prog_instruction *vpi;
400 int i;
401 VERTEX_SHADER_INSTRUCTION *o_inst;
402 unsigned long operands;
403 int are_srcs_scalar;
404 unsigned long hw_op;
405 int dofogfix = 0;
406 int fog_temp_i = 0;
407 int free_inputs;
408 int free_inputs_conv;
409 int array_count = 0;
410
411 vp->native = GL_FALSE;
412 vp->translated = GL_TRUE;
413 vp->fogmode = ctx->Fog.Mode;
414
415 if (mesa_vp->Base.NumInstructions == 0)
416 return GL_FALSE;
417
418 #if 0
419 if ((mesa_vp->Base.InputsRead &
420 ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 |
421 VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 |
422 VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) != 0) {
423 if (R200_DEBUG & DEBUG_FALLBACKS) {
424 fprintf(stderr, "can't handle vert prog inputs 0x%x\n",
425 mesa_vp->Base.InputsRead);
426 }
427 return GL_FALSE;
428 }
429 #endif
430
431 if ((mesa_vp->Base.OutputsWritten &
432 ~((1 << VERT_RESULT_HPOS) | (1 << VERT_RESULT_COL0) | (1 << VERT_RESULT_COL1) |
433 (1 << VERT_RESULT_FOGC) | (1 << VERT_RESULT_TEX0) | (1 << VERT_RESULT_TEX1) |
434 (1 << VERT_RESULT_TEX2) | (1 << VERT_RESULT_TEX3) | (1 << VERT_RESULT_TEX4) |
435 (1 << VERT_RESULT_TEX5) | (1 << VERT_RESULT_PSIZ))) != 0) {
436 if (R200_DEBUG & DEBUG_FALLBACKS) {
437 fprintf(stderr, "can't handle vert prog outputs 0x%x\n",
438 mesa_vp->Base.OutputsWritten);
439 }
440 return GL_FALSE;
441 }
442
443 if (mesa_vp->IsNVProgram) {
444 /* subtle differences in spec like guaranteed initialized regs could cause
445 headaches. Might want to remove the driconf option to enable it completely */
446 return GL_FALSE;
447 }
448 /* Initial value should be last tmp reg that hw supports.
449 Strangely enough r300 doesnt mind even though these would be out of range.
450 Smart enough to realize that it doesnt need it? */
451 int u_temp_i = R200_VSF_MAX_TEMPS - 1;
452 struct prog_src_register src[3];
453 struct prog_dst_register dst;
454
455 /* FIXME: is changing the prog safe to do here? */
456 if (mesa_vp->IsPositionInvariant &&
457 /* make sure we only do this once */
458 !(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS))) {
459 _mesa_insert_mvp_code(ctx, mesa_vp);
460 }
461
462 /* for fogc, can't change mesa_vp, as it would hose swtnl, and exp with
463 base e isn't directly available neither. */
464 if (mesa_vp->Base.OutputsWritten & VERT_RESULT_FOGC && !vp->fogpidx) {
465 struct gl_program_parameter_list *paramList;
466 GLint tokens[6] = { STATE_FOG_PARAMS, 0, 0, 0, 0, 0 };
467 paramList = mesa_vp->Base.Parameters;
468 vp->fogpidx = _mesa_add_state_reference(paramList, tokens);
469 }
470
471 vp->pos_end = 0;
472 mesa_vp->Base.NumNativeInstructions = 0;
473 if (mesa_vp->Base.Parameters)
474 mesa_vp->Base.NumNativeParameters = mesa_vp->Base.Parameters->NumParameters;
475 else
476 mesa_vp->Base.NumNativeParameters = 0;
477
478 for(i = 0; i < VERT_ATTRIB_MAX; i++)
479 vp->inputs[i] = -1;
480 free_inputs = 0x2ffd;
481
482 /* fglrx uses fixed inputs as follows for conventional attribs.
483 generic attribs use non-fixed assignment, fglrx will always use the
484 lowest attrib values available. We'll just do the same.
485 There are 12 generic attribs possible, corresponding to attrib 0, 2-11
486 and 13 in a hw vertex prog.
487 attr 1 and 12 aren't used for generic attribs as those cannot be made vec4
488 (correspond to vertex normal/weight - maybe weight actually could be made vec4).
489 Additionally, not more than 12 arrays in total are possible I think.
490 attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
491 attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
492 attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
493 attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
494 */
495
496 /* attr 4,5 and 13 are only used with generic attribs.
497 Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
498 not possibe to use with vertex progs as it is lacking in vert prog specification) */
499 /* may look different when using idx buf / input_route instead of se_vtx_fmt? */
500 if (mesa_vp->Base.InputsRead & VERT_BIT_POS) {
501 vp->inputs[VERT_ATTRIB_POS] = 0;
502 free_inputs &= ~(1 << 0);
503 array_count++;
504 }
505 if (mesa_vp->Base.InputsRead & VERT_BIT_WEIGHT) {
506 vp->inputs[VERT_ATTRIB_WEIGHT] = 12;
507 array_count++;
508 }
509 if (mesa_vp->Base.InputsRead & VERT_BIT_NORMAL) {
510 vp->inputs[VERT_ATTRIB_NORMAL] = 1;
511 array_count++;
512 }
513 if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR0) {
514 vp->inputs[VERT_ATTRIB_COLOR0] = 2;
515 free_inputs &= ~(1 << 2);
516 array_count++;
517 }
518 if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR1) {
519 vp->inputs[VERT_ATTRIB_COLOR1] = 3;
520 free_inputs &= ~(1 << 3);
521 array_count++;
522 }
523 if (mesa_vp->Base.InputsRead & VERT_BIT_FOG) {
524 vp->inputs[VERT_ATTRIB_FOG] = 15; array_count++;
525 }
526 for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX5; i++) {
527 if (mesa_vp->Base.InputsRead & (1 << i)) {
528 vp->inputs[i] = i - VERT_ATTRIB_TEX0 + 6;
529 free_inputs &= ~(1 << (i - VERT_ATTRIB_TEX0 + 6));
530 array_count++;
531 }
532 }
533 free_inputs_conv = free_inputs;
534 /* using VERT_ATTRIB_TEX6/7 would be illegal */
535 /* completely ignore aliasing? */
536 for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) {
537 int j;
538 /* completely ignore aliasing? */
539 if (mesa_vp->Base.InputsRead & (1 << i)) {
540 array_count++;
541 if (array_count > 12) {
542 if (R200_DEBUG & DEBUG_FALLBACKS) {
543 fprintf(stderr, "more than 12 attribs used in vert prog\n");
544 }
545 return GL_FALSE;
546 }
547 for (j = 0; j < 14; j++) {
548 /* will always find one due to limited array_count */
549 if (free_inputs & (1 << j)) {
550 free_inputs &= ~(1 << j);
551 vp->inputs[i] = j;
552 vp->rev_inputs[j] = i;
553 break;
554 }
555 }
556 }
557 }
558 vp->gen_inputs_mapped = free_inputs ^ free_inputs_conv;
559
560 if (!(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS))) {
561 if (R200_DEBUG & DEBUG_FALLBACKS) {
562 fprintf(stderr, "can't handle vert prog without position output\n");
563 }
564 return GL_FALSE;
565 }
566 if (free_inputs & 1) {
567 if (R200_DEBUG & DEBUG_FALLBACKS) {
568 fprintf(stderr, "can't handle vert prog without position input\n");
569 }
570 return GL_FALSE;
571 }
572
573 o_inst = vp->instr;
574 for (vpi = mesa_vp->Base.Instructions; vpi->Opcode != OPCODE_END; vpi++, o_inst++){
575 operands = op_operands(vpi->Opcode);
576 are_srcs_scalar = operands & SCALAR_FLAG;
577 operands &= OP_MASK;
578
579 for(i = 0; i < operands; i++) {
580 src[i] = vpi->SrcReg[i];
581 /* hack up default attrib values as per spec as swizzling.
582 normal, fog, secondary color. Crazy?
583 May need more if we don't submit vec4 elements? */
584 if (src[i].File == PROGRAM_INPUT) {
585 if (src[i].Index == VERT_ATTRIB_NORMAL) {
586 int j;
587 for (j = 0; j < 4; j++) {
588 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
589 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
590 src[i].Swizzle |= SWIZZLE_ONE << (j*3);
591 }
592 }
593 }
594 else if (src[i].Index == VERT_ATTRIB_COLOR1) {
595 int j;
596 for (j = 0; j < 4; j++) {
597 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
598 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
599 src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
600 }
601 }
602 }
603 else if (src[i].Index == VERT_ATTRIB_FOG) {
604 int j;
605 for (j = 0; j < 4; j++) {
606 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
607 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
608 src[i].Swizzle |= SWIZZLE_ONE << (j*3);
609 }
610 else if ((GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Y) ||
611 GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Z) {
612 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
613 src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
614 }
615 }
616 }
617 }
618 }
619
620 if(operands == 3){
621 if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
622 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
623 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
624 VSF_FLAG_ALL);
625
626 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
627 SWIZZLE_X, SWIZZLE_Y,
628 SWIZZLE_Z, SWIZZLE_W,
629 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4);
630
631 o_inst->src1 = ZERO_SRC_0;
632 o_inst->src2 = UNUSED_SRC_1;
633 o_inst++;
634
635 src[2].File = PROGRAM_TEMPORARY;
636 src[2].Index = u_temp_i;
637 src[2].RelAddr = 0;
638 u_temp_i--;
639 }
640 }
641
642 if(operands >= 2){
643 if( CMP_SRCS(src[1], src[0]) ){
644 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
645 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
646 VSF_FLAG_ALL);
647
648 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
649 SWIZZLE_X, SWIZZLE_Y,
650 SWIZZLE_Z, SWIZZLE_W,
651 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4);
652
653 o_inst->src1 = ZERO_SRC_0;
654 o_inst->src2 = UNUSED_SRC_1;
655 o_inst++;
656
657 src[0].File = PROGRAM_TEMPORARY;
658 src[0].Index = u_temp_i;
659 src[0].RelAddr = 0;
660 u_temp_i--;
661 }
662 }
663
664 dst = vpi->DstReg;
665 if (dst.File == PROGRAM_OUTPUT &&
666 dst.Index == VERT_RESULT_FOGC &&
667 dst.WriteMask & WRITEMASK_X) {
668 fog_temp_i = u_temp_i;
669 dst.File = PROGRAM_TEMPORARY;
670 dst.Index = fog_temp_i;
671 dofogfix = 1;
672 u_temp_i--;
673 }
674
675 /* These ops need special handling. */
676 switch(vpi->Opcode){
677 case OPCODE_POW:
678 /* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter).
679 So may need to insert additional instruction */
680 if ((src[0].File == src[1].File) &&
681 (src[0].Index == src[1].Index)) {
682 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
683 t_dst_mask(dst.WriteMask));
684 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
685 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
686 SWIZZLE_ZERO,
687 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
688 SWIZZLE_ZERO,
689 t_src_class(src[0].File),
690 src[0].NegateBase) | (src[0].RelAddr << 4);
691 o_inst->src1 = UNUSED_SRC_0;
692 o_inst->src2 = UNUSED_SRC_0;
693 }
694 else {
695 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
696 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
697 VSF_FLAG_ALL);
698 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
699 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
700 SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO,
701 t_src_class(src[0].File),
702 src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
703 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
704 SWIZZLE_ZERO, SWIZZLE_ZERO,
705 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO,
706 t_src_class(src[1].File),
707 src[1].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
708 o_inst->src2 = UNUSED_SRC_1;
709 o_inst++;
710
711 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
712 t_dst_mask(dst.WriteMask));
713 o_inst->src0 = MAKE_VSF_SOURCE(u_temp_i,
714 VSF_IN_COMPONENT_X,
715 VSF_IN_COMPONENT_Y,
716 VSF_IN_COMPONENT_Z,
717 VSF_IN_COMPONENT_W,
718 VSF_IN_CLASS_TMP,
719 VSF_FLAG_NONE);
720 o_inst->src1 = UNUSED_SRC_0;
721 o_inst->src2 = UNUSED_SRC_0;
722 u_temp_i--;
723 }
724 goto next;
725
726 case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
727 case OPCODE_SWZ:
728 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
729 t_dst_mask(dst.WriteMask));
730 o_inst->src0 = t_src(vp, &src[0]);
731 o_inst->src1 = ZERO_SRC_0;
732 o_inst->src2 = UNUSED_SRC_1;
733 goto next;
734
735 case OPCODE_MAD:
736 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
737 src[1].File == PROGRAM_TEMPORARY &&
738 src[2].File == PROGRAM_TEMPORARY) ? R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
739
740 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
741 t_dst_mask(dst.WriteMask));
742 o_inst->src0 = t_src(vp, &src[0]);
743 #if 0
744 if ((o_inst - vp->instr) == 31) {
745 /* fix up the broken vertex program of quake4 demo... */
746 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
747 SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X,
748 t_src_class(src[1].File),
749 src[1].NegateBase) | (src[1].RelAddr << 4);
750 o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
751 SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y,
752 t_src_class(src[1].File),
753 src[1].NegateBase) | (src[1].RelAddr << 4);
754 }
755 else {
756 o_inst->src1 = t_src(vp, &src[1]);
757 o_inst->src2 = t_src(vp, &src[2]);
758 }
759 #else
760 o_inst->src1 = t_src(vp, &src[1]);
761 o_inst->src2 = t_src(vp, &src[2]);
762 #endif
763 goto next;
764
765 case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
766 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
767 t_dst_mask(dst.WriteMask));
768
769 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
770 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
771 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
772 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
773 SWIZZLE_ZERO,
774 t_src_class(src[0].File),
775 src[0].NegateBase) | (src[0].RelAddr << 4);
776
777 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
778 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
779 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
780 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
781 SWIZZLE_ZERO,
782 t_src_class(src[1].File),
783 src[1].NegateBase) | (src[1].RelAddr << 4);
784
785 o_inst->src2 = UNUSED_SRC_1;
786 goto next;
787
788 case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
789 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
790 t_dst_mask(dst.WriteMask));
791
792 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
793 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
794 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
795 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
796 VSF_IN_COMPONENT_ONE,
797 t_src_class(src[0].File),
798 src[0].NegateBase) | (src[0].RelAddr << 4);
799 o_inst->src1 = t_src(vp, &src[1]);
800 o_inst->src2 = UNUSED_SRC_1;
801 goto next;
802
803 case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
804 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
805 t_dst_mask(dst.WriteMask));
806
807 o_inst->src0 = t_src(vp, &src[0]);
808 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
809 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
810 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
811 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
812 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
813 t_src_class(src[1].File),
814 (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
815 o_inst->src2 = UNUSED_SRC_1;
816 goto next;
817
818 case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
819 o_inst->op=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX, t_dst(&dst),
820 t_dst_mask(dst.WriteMask));
821
822 o_inst->src0=t_src(vp, &src[0]);
823 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
824 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
825 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
826 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
827 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
828 t_src_class(src[0].File),
829 (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
830 o_inst->src2 = UNUSED_SRC_1;
831 goto next;
832
833 case OPCODE_FLR:
834 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
835 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
836
837 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_FRC,
838 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
839 t_dst_mask(dst.WriteMask));
840
841 o_inst->src0 = t_src(vp, &src[0]);
842 o_inst->src1 = UNUSED_SRC_0;
843 o_inst->src2 = UNUSED_SRC_1;
844 o_inst++;
845
846 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
847 t_dst_mask(dst.WriteMask));
848
849 o_inst->src0 = t_src(vp, &src[0]);
850 o_inst->src1 = MAKE_VSF_SOURCE(u_temp_i,
851 VSF_IN_COMPONENT_X,
852 VSF_IN_COMPONENT_Y,
853 VSF_IN_COMPONENT_Z,
854 VSF_IN_COMPONENT_W,
855 VSF_IN_CLASS_TMP,
856 /* Not 100% sure about this */
857 (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
858
859 o_inst->src2 = UNUSED_SRC_0;
860 u_temp_i--;
861 goto next;
862
863 case OPCODE_XPD:
864 /* mul r0, r1.yzxw, r2.zxyw
865 mad r0, -r2.yzxw, r1.zxyw, r0
866 NOTE: might need MAD_2
867 */
868
869 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
870 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
871 t_dst_mask(dst.WriteMask));
872
873 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
874 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
875 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
876 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
877 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
878 t_src_class(src[0].File),
879 src[0].NegateBase) | (src[0].RelAddr << 4);
880
881 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
882 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
883 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
884 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
885 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
886 t_src_class(src[1].File),
887 src[1].NegateBase) | (src[1].RelAddr << 4);
888
889 o_inst->src2 = UNUSED_SRC_1;
890 o_inst++;
891 u_temp_i--;
892
893 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MAD, t_dst(&dst),
894 t_dst_mask(dst.WriteMask));
895
896 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
897 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
898 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
899 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
900 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
901 t_src_class(src[1].File),
902 (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
903
904 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
905 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
906 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
907 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
908 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
909 t_src_class(src[0].File),
910 src[0].NegateBase) | (src[0].RelAddr << 4);
911
912 o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1,
913 VSF_IN_COMPONENT_X,
914 VSF_IN_COMPONENT_Y,
915 VSF_IN_COMPONENT_Z,
916 VSF_IN_COMPONENT_W,
917 VSF_IN_CLASS_TMP,
918 VSF_FLAG_NONE);
919 goto next;
920
921 case OPCODE_END:
922 assert(0);
923 default:
924 break;
925 }
926
927 o_inst->op = MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst(&dst),
928 t_dst_mask(dst.WriteMask));
929
930 if(are_srcs_scalar){
931 switch(operands){
932 case 1:
933 o_inst->src0 = t_src_scalar(vp, &src[0]);
934 o_inst->src1 = UNUSED_SRC_0;
935 o_inst->src2 = UNUSED_SRC_1;
936 break;
937
938 case 2:
939 o_inst->src0 = t_src_scalar(vp, &src[0]);
940 o_inst->src1 = t_src_scalar(vp, &src[1]);
941 o_inst->src2 = UNUSED_SRC_1;
942 break;
943
944 case 3:
945 o_inst->src0 = t_src_scalar(vp, &src[0]);
946 o_inst->src1 = t_src_scalar(vp, &src[1]);
947 o_inst->src2 = t_src_scalar(vp, &src[2]);
948 break;
949
950 default:
951 fprintf(stderr, "illegal number of operands %lu\n", operands);
952 exit(-1);
953 break;
954 }
955 } else {
956 switch(operands){
957 case 1:
958 o_inst->src0 = t_src(vp, &src[0]);
959 o_inst->src1 = UNUSED_SRC_0;
960 o_inst->src2 = UNUSED_SRC_1;
961 break;
962
963 case 2:
964 o_inst->src0 = t_src(vp, &src[0]);
965 o_inst->src1 = t_src(vp, &src[1]);
966 o_inst->src2 = UNUSED_SRC_1;
967 break;
968
969 case 3:
970 o_inst->src0 = t_src(vp, &src[0]);
971 o_inst->src1 = t_src(vp, &src[1]);
972 o_inst->src2 = t_src(vp, &src[2]);
973 break;
974
975 default:
976 fprintf(stderr, "illegal number of operands %lu\n", operands);
977 exit(-1);
978 break;
979 }
980 }
981 next:
982
983 if (dofogfix) {
984 o_inst++;
985 if (vp->fogmode == GL_EXP) {
986 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
987 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
988 VSF_FLAG_X);
989 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
990 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
991 o_inst->src2 = UNUSED_SRC_1;
992 o_inst++;
993 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
994 R200_VSF_OUT_CLASS_RESULT_FOGC,
995 VSF_FLAG_X);
996 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
997 o_inst->src1 = UNUSED_SRC_0;
998 o_inst->src2 = UNUSED_SRC_1;
999 }
1000 else if (vp->fogmode == GL_EXP2) {
1001 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1002 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1003 VSF_FLAG_X);
1004 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1005 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
1006 o_inst->src2 = UNUSED_SRC_1;
1007 o_inst++;
1008 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1009 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1010 VSF_FLAG_X);
1011 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1012 o_inst->src1 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1013 o_inst->src2 = UNUSED_SRC_1;
1014 o_inst++;
1015 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
1016 R200_VSF_OUT_CLASS_RESULT_FOGC,
1017 VSF_FLAG_X);
1018 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1019 o_inst->src1 = UNUSED_SRC_0;
1020 o_inst->src2 = UNUSED_SRC_1;
1021 }
1022 else { /* fogmode == GL_LINEAR */
1023 /* could do that with single op (dot) if using params like
1024 with fixed function pipeline fog */
1025 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
1026 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1027 VSF_FLAG_X);
1028 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1029 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, Z, Z, Z, Z, PARAM, NONE);
1030 o_inst->src2 = UNUSED_SRC_1;
1031 o_inst++;
1032 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1033 R200_VSF_OUT_CLASS_RESULT_FOGC,
1034 VSF_FLAG_X);
1035 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1036 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, W, W, W, W, PARAM, NONE);
1037 o_inst->src2 = UNUSED_SRC_1;
1038
1039 }
1040 dofogfix = 0;
1041 }
1042
1043 if (mesa_vp->Base.NumNativeTemporaries <
1044 (mesa_vp->Base.NumTemporaries + (R200_VSF_MAX_TEMPS - 1 - u_temp_i))) {
1045 mesa_vp->Base.NumNativeTemporaries =
1046 mesa_vp->Base.NumTemporaries + (R200_VSF_MAX_TEMPS - 1 - u_temp_i);
1047 }
1048 if (u_temp_i < mesa_vp->Base.NumTemporaries) {
1049 if (R200_DEBUG & DEBUG_FALLBACKS) {
1050 fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_i);
1051 }
1052 return GL_FALSE;
1053 }
1054 u_temp_i = R200_VSF_MAX_TEMPS - 1;
1055 if(o_inst - vp->instr >= R200_VSF_MAX_INST) {
1056 mesa_vp->Base.NumNativeInstructions = 129;
1057 if (R200_DEBUG & DEBUG_FALLBACKS) {
1058 fprintf(stderr, "more than 128 native instructions\n");
1059 }
1060 return GL_FALSE;
1061 }
1062 if ((o_inst->op & R200_VSF_OUT_CLASS_MASK) == R200_VSF_OUT_CLASS_RESULT_POS) {
1063 vp->pos_end = (o_inst - vp->instr);
1064 }
1065 }
1066
1067 vp->native = GL_TRUE;
1068 mesa_vp->Base.NumNativeInstructions = (o_inst - vp->instr);
1069 #if 0
1070 fprintf(stderr, "hw program:\n");
1071 for(i=0; i < vp->program.length; i++)
1072 fprintf(stderr, "%08x\n", vp->instr[i]);
1073 #endif
1074 return GL_TRUE;
1075 }
1076
1077 void r200SetupVertexProg( GLcontext *ctx ) {
1078 r200ContextPtr rmesa = R200_CONTEXT(ctx);
1079 struct r200_vertex_program *vp = (struct r200_vertex_program *)ctx->VertexProgram.Current;
1080 GLboolean fallback;
1081 GLint i;
1082
1083 if (!vp->translated || (ctx->Fog.Enabled && ctx->Fog.Mode != vp->fogmode)) {
1084 rmesa->curr_vp_hw = NULL;
1085 r200_translate_vertex_program(ctx, vp);
1086 }
1087 /* could optimize setting up vertex progs away for non-tcl hw */
1088 fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp) &&
1089 rmesa->r200Screen->drmSupportsVertexProgram);
1090 TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback);
1091 if (rmesa->TclFallback) return;
1092
1093 R200_STATECHANGE( rmesa, vap );
1094 /* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it?
1095 maybe only when using more than 64 inst / 96 param? */
1096 rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_PROG_VTX_SHADER_ENABLE /*| R200_VAP_SINGLE_BUF_STATE_ENABLE*/;
1097
1098 R200_STATECHANGE( rmesa, pvs );
1099
1100 rmesa->hw.pvs.cmd[PVS_CNTL_1] = (0 << R200_PVS_CNTL_1_PROGRAM_START_SHIFT) |
1101 ((vp->mesa_program.Base.NumNativeInstructions - 1) << R200_PVS_CNTL_1_PROGRAM_END_SHIFT) |
1102 (vp->pos_end << R200_PVS_CNTL_1_POS_END_SHIFT);
1103 rmesa->hw.pvs.cmd[PVS_CNTL_2] = (0 << R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT) |
1104 (vp->mesa_program.Base.NumNativeParameters << R200_PVS_CNTL_2_PARAM_COUNT_SHIFT);
1105
1106 /* maybe user clip planes just work with vertex progs... untested */
1107 if (ctx->Transform.ClipPlanesEnabled) {
1108 R200_STATECHANGE( rmesa, tcl );
1109 if (vp->mesa_program.IsPositionInvariant) {
1110 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (ctx->Transform.ClipPlanesEnabled << 2);
1111 }
1112 else {
1113 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(0xfc);
1114 }
1115 }
1116
1117 if (vp != rmesa->curr_vp_hw) {
1118 GLuint count = vp->mesa_program.Base.NumNativeInstructions;
1119 drm_radeon_cmd_header_t tmp;
1120
1121 R200_STATECHANGE( rmesa, vpi[0] );
1122 R200_STATECHANGE( rmesa, vpi[1] );
1123
1124 /* FIXME: what about using a memcopy... */
1125 for (i = 0; (i < 64) && i < count; i++) {
1126 rmesa->hw.vpi[0].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i].op;
1127 rmesa->hw.vpi[0].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i].src0;
1128 rmesa->hw.vpi[0].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i].src1;
1129 rmesa->hw.vpi[0].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i].src2;
1130 }
1131 /* hack up the cmd_size so not the whole state atom is emitted always.
1132 This may require some more thought, we may emit half progs on lost state, but
1133 hopefully it won't matter?
1134 WARNING: must not use R200_DB_STATECHANGE, this will produce bogus (and rejected)
1135 packet emits (due to the mismatched cmd_size and count in cmd/last_cmd) */
1136 rmesa->hw.vpi[0].cmd_size = 1 + 4 * ((count > 64) ? 64 : count);
1137 tmp.i = rmesa->hw.vpi[0].cmd[VPI_CMD_0];
1138 tmp.veclinear.count = (count > 64) ? 64 : count;
1139 rmesa->hw.vpi[0].cmd[VPI_CMD_0] = tmp.i;
1140 if (count > 64) {
1141 for (i = 0; i < (count - 64); i++) {
1142 rmesa->hw.vpi[1].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i + 64].op;
1143 rmesa->hw.vpi[1].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i + 64].src0;
1144 rmesa->hw.vpi[1].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i + 64].src1;
1145 rmesa->hw.vpi[1].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i + 64].src2;
1146 }
1147 rmesa->hw.vpi[1].cmd_size = 1 + 4 * (count - 64);
1148 tmp.i = rmesa->hw.vpi[1].cmd[VPI_CMD_0];
1149 tmp.veclinear.count = count - 64;
1150 rmesa->hw.vpi[1].cmd[VPI_CMD_0] = tmp.i;
1151 }
1152 rmesa->curr_vp_hw = vp;
1153 }
1154 }
1155
1156
1157 static void
1158 r200BindProgram(GLcontext *ctx, GLenum target, struct gl_program *prog)
1159 {
1160 r200ContextPtr rmesa = R200_CONTEXT(ctx);
1161
1162 switch(target){
1163 case GL_VERTEX_PROGRAM_ARB:
1164 rmesa->curr_vp_hw = NULL;
1165 break;
1166 default:
1167 _mesa_problem(ctx, "Target not supported yet!");
1168 break;
1169 }
1170 }
1171
1172 static struct gl_program *
1173 r200NewProgram(GLcontext *ctx, GLenum target, GLuint id)
1174 {
1175 struct r200_vertex_program *vp;
1176
1177 switch(target){
1178 case GL_VERTEX_PROGRAM_ARB:
1179 vp = CALLOC_STRUCT(r200_vertex_program);
1180 return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id);
1181 case GL_FRAGMENT_PROGRAM_ARB:
1182 case GL_FRAGMENT_PROGRAM_NV:
1183 return _mesa_init_fragment_program( ctx, CALLOC_STRUCT(gl_fragment_program), target, id );
1184 default:
1185 _mesa_problem(ctx, "Bad target in r200NewProgram");
1186 }
1187 return NULL;
1188 }
1189
1190
1191 static void
1192 r200DeleteProgram(GLcontext *ctx, struct gl_program *prog)
1193 {
1194 _mesa_delete_program(ctx, prog);
1195 }
1196
1197 static void
1198 r200ProgramStringNotify(GLcontext *ctx, GLenum target, struct gl_program *prog)
1199 {
1200 struct r200_vertex_program *vp = (void *)prog;
1201 r200ContextPtr rmesa = R200_CONTEXT(ctx);
1202
1203 switch(target) {
1204 case GL_VERTEX_PROGRAM_ARB:
1205 vp->translated = GL_FALSE;
1206 vp->fogpidx = 0;
1207 /* memset(&vp->translated, 0, sizeof(struct r200_vertex_program) - sizeof(struct gl_vertex_program));*/
1208 r200_translate_vertex_program(ctx, vp);
1209 rmesa->curr_vp_hw = NULL;
1210 break;
1211 case GL_FRAGMENT_SHADER_ATI:
1212 rmesa->afs_loaded = NULL;
1213 break;
1214 }
1215 /* need this for tcl fallbacks */
1216 _tnl_program_string(ctx, target, prog);
1217 }
1218
1219 static GLboolean
1220 r200IsProgramNative(GLcontext *ctx, GLenum target, struct gl_program *prog)
1221 {
1222 struct r200_vertex_program *vp = (void *)prog;
1223
1224 switch(target){
1225 case GL_VERTEX_STATE_PROGRAM_NV:
1226 case GL_VERTEX_PROGRAM_ARB:
1227 if (!vp->translated) {
1228 r200_translate_vertex_program(ctx, vp);
1229 }
1230 /* does not take parameters etc. into account */
1231 return vp->native;
1232 default:
1233 _mesa_problem(ctx, "Bad target in r200NewProgram");
1234 }
1235 return 0;
1236 }
1237
1238 void r200InitShaderFuncs(struct dd_function_table *functions)
1239 {
1240 functions->NewProgram = r200NewProgram;
1241 functions->BindProgram = r200BindProgram;
1242 functions->DeleteProgram = r200DeleteProgram;
1243 functions->ProgramStringNotify = r200ProgramStringNotify;
1244 functions->IsProgramNative = r200IsProgramNative;
1245 }