b5afe51d82468b37ca82a07b34a14e4555f0f04c
[mesa.git] / src / mesa / drivers / dri / r200 / r200_vertprog.c
1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Aapo Tahkola <aet@rasterburn.org>
31 * Roland Scheidegger <rscheidegger_lists@hispeed.ch>
32 */
33 #include "glheader.h"
34 #include "macros.h"
35 #include "enums.h"
36 #include "program.h"
37
38 #include "r200_context.h"
39 #include "r200_vertprog.h"
40 #include "r200_ioctl.h"
41 #include "r200_tcl.h"
42 #include "program_instruction.h"
43 #include "programopt.h"
44 #include "tnl/tnl.h"
45
46 #if SWIZZLE_X != VSF_IN_COMPONENT_X || \
47 SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
48 SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
49 SWIZZLE_W != VSF_IN_COMPONENT_W || \
50 SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
51 SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
52 WRITEMASK_X != VSF_FLAG_X || \
53 WRITEMASK_Y != VSF_FLAG_Y || \
54 WRITEMASK_Z != VSF_FLAG_Z || \
55 WRITEMASK_W != VSF_FLAG_W
56 #error Cannot change these!
57 #endif
58
59 #define SCALAR_FLAG (1<<31)
60 #define FLAG_MASK (1<<31)
61 #define OP_MASK (0xf) /* we are unlikely to have more than 15 */
62 #define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
63
64 static struct{
65 char *name;
66 int opcode;
67 unsigned long ip; /* number of input operands and flags */
68 }op_names[]={
69 OPN(ABS, 1),
70 OPN(ADD, 2),
71 OPN(ARL, 1|SCALAR_FLAG),
72 OPN(DP3, 2),
73 OPN(DP4, 2),
74 OPN(DPH, 2),
75 OPN(DST, 2),
76 OPN(EX2, 1|SCALAR_FLAG),
77 OPN(EXP, 1|SCALAR_FLAG),
78 OPN(FLR, 1),
79 OPN(FRC, 1),
80 OPN(LG2, 1|SCALAR_FLAG),
81 OPN(LIT, 1),
82 OPN(LOG, 1|SCALAR_FLAG),
83 OPN(MAD, 3),
84 OPN(MAX, 2),
85 OPN(MIN, 2),
86 OPN(MOV, 1),
87 OPN(MUL, 2),
88 OPN(POW, 2|SCALAR_FLAG),
89 OPN(RCP, 1|SCALAR_FLAG),
90 OPN(RSQ, 1|SCALAR_FLAG),
91 OPN(SGE, 2),
92 OPN(SLT, 2),
93 OPN(SUB, 2),
94 OPN(SWZ, 1),
95 OPN(XPD, 2),
96 OPN(PRINT, 0),
97 OPN(END, 0),
98 };
99 #undef OPN
100
101 static GLboolean r200VertexProgUpdateParams(GLcontext *ctx, struct r200_vertex_program *vp)
102 {
103 r200ContextPtr rmesa = R200_CONTEXT( ctx );
104 GLfloat *fcmd = (GLfloat *)&rmesa->hw.vpp[0].cmd[VPP_CMD_0 + 1];
105 int pi;
106 struct gl_vertex_program *mesa_vp = &vp->mesa_program;
107 struct gl_program_parameter_list *paramList;
108 drm_radeon_cmd_header_t tmp;
109
110 R200_STATECHANGE( rmesa, vpp[0] );
111 R200_STATECHANGE( rmesa, vpp[1] );
112 assert(mesa_vp->Base.Parameters);
113 _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
114 paramList = mesa_vp->Base.Parameters;
115
116 if(paramList->NumParameters > R200_VSF_MAX_PARAM){
117 fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
118 return GL_FALSE;
119 }
120
121 for(pi = 0; pi < paramList->NumParameters; pi++) {
122 switch(paramList->Parameters[pi].Type) {
123 case PROGRAM_STATE_VAR:
124 case PROGRAM_NAMED_PARAM:
125 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
126 case PROGRAM_CONSTANT:
127 *fcmd++ = paramList->ParameterValues[pi][0];
128 *fcmd++ = paramList->ParameterValues[pi][1];
129 *fcmd++ = paramList->ParameterValues[pi][2];
130 *fcmd++ = paramList->ParameterValues[pi][3];
131 break;
132 default:
133 _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__);
134 break;
135 }
136 if (pi == 95) {
137 fcmd = (GLfloat *)&rmesa->hw.vpp[1].cmd[VPP_CMD_0 + 1];
138 }
139 }
140 /* hack up the cmd_size so not the whole state atom is emitted always. */
141 rmesa->hw.vpp[0].cmd_size =
142 1 + 4 * ((paramList->NumParameters > 96) ? 96 : paramList->NumParameters);
143 tmp.i = rmesa->hw.vpp[0].cmd[VPP_CMD_0];
144 tmp.veclinear.count = (paramList->NumParameters > 96) ? 96 : paramList->NumParameters;
145 rmesa->hw.vpp[0].cmd[VPP_CMD_0] = tmp.i;
146 if (paramList->NumParameters > 96) {
147 rmesa->hw.vpp[1].cmd_size = 1 + 4 * (paramList->NumParameters - 96);
148 tmp.i = rmesa->hw.vpp[1].cmd[VPP_CMD_0];
149 tmp.veclinear.count = paramList->NumParameters - 96;
150 rmesa->hw.vpp[1].cmd[VPP_CMD_0] = tmp.i;
151 }
152 return GL_TRUE;
153 }
154
155 static __inline unsigned long t_dst_mask(GLuint mask)
156 {
157 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
158 return mask & VSF_FLAG_ALL;
159 }
160
161 static unsigned long t_dst(struct prog_dst_register *dst)
162 {
163 switch(dst->File) {
164 case PROGRAM_TEMPORARY:
165 return ((dst->Index << R200_VPI_OUT_REG_INDEX_SHIFT)
166 | R200_VSF_OUT_CLASS_TMP);
167 case PROGRAM_OUTPUT:
168 switch (dst->Index) {
169 case VERT_RESULT_HPOS:
170 return R200_VSF_OUT_CLASS_RESULT_POS;
171 case VERT_RESULT_COL0:
172 return R200_VSF_OUT_CLASS_RESULT_COLOR;
173 case VERT_RESULT_COL1:
174 return ((1 << R200_VPI_OUT_REG_INDEX_SHIFT)
175 | R200_VSF_OUT_CLASS_RESULT_COLOR);
176 case VERT_RESULT_FOGC:
177 return R200_VSF_OUT_CLASS_RESULT_FOGC;
178 case VERT_RESULT_TEX0:
179 case VERT_RESULT_TEX1:
180 case VERT_RESULT_TEX2:
181 case VERT_RESULT_TEX3:
182 case VERT_RESULT_TEX4:
183 case VERT_RESULT_TEX5:
184 return (((dst->Index - VERT_RESULT_TEX0) << R200_VPI_OUT_REG_INDEX_SHIFT)
185 | R200_VSF_OUT_CLASS_RESULT_TEXC);
186 case VERT_RESULT_PSIZ:
187 return R200_VSF_OUT_CLASS_RESULT_POINTSIZE;
188 default:
189 fprintf(stderr, "problem in %s, unknown dst output reg %d\n", __FUNCTION__, dst->Index);
190 exit(0);
191 return 0;
192 }
193 case PROGRAM_ADDRESS:
194 assert (dst->Index == 0);
195 return R200_VSF_OUT_CLASS_ADDR;
196 default:
197 fprintf(stderr, "problem in %s, unknown register type %d\n", __FUNCTION__, dst->File);
198 exit(0);
199 return 0;
200 }
201 }
202
203 static unsigned long t_src_class(enum register_file file)
204 {
205
206 switch(file){
207 case PROGRAM_TEMPORARY:
208 return VSF_IN_CLASS_TMP;
209
210 case PROGRAM_INPUT:
211 return VSF_IN_CLASS_ATTR;
212
213 case PROGRAM_LOCAL_PARAM:
214 case PROGRAM_ENV_PARAM:
215 case PROGRAM_NAMED_PARAM:
216 case PROGRAM_STATE_VAR:
217 return VSF_IN_CLASS_PARAM;
218 /*
219 case PROGRAM_OUTPUT:
220 case PROGRAM_WRITE_ONLY:
221 case PROGRAM_ADDRESS:
222 */
223 default:
224 fprintf(stderr, "problem in %s", __FUNCTION__);
225 exit(0);
226 }
227 }
228
229 static __inline unsigned long t_swizzle(GLubyte swizzle)
230 {
231 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
232 return swizzle;
233 }
234
235 #if 0
236 static void vp_dump_inputs(struct r200_vertex_program *vp, char *caller)
237 {
238 int i;
239
240 if(vp == NULL){
241 fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, caller);
242 return ;
243 }
244
245 fprintf(stderr, "%s:<", caller);
246 for(i=0; i < VERT_ATTRIB_MAX; i++)
247 fprintf(stderr, "%d ", vp->inputs[i]);
248 fprintf(stderr, ">\n");
249
250 }
251 #endif
252
253 static unsigned long t_src_index(struct r200_vertex_program *vp, struct prog_src_register *src)
254 {
255 /*
256 int i;
257 int max_reg = -1;
258 */
259 if(src->File == PROGRAM_INPUT){
260 /* if(vp->inputs[src->Index] != -1)
261 return vp->inputs[src->Index];
262
263 for(i=0; i < VERT_ATTRIB_MAX; i++)
264 if(vp->inputs[i] > max_reg)
265 max_reg = vp->inputs[i];
266
267 vp->inputs[src->Index] = max_reg+1;*/
268
269 //vp_dump_inputs(vp, __FUNCTION__);
270 assert(vp->inputs[src->Index] != -1);
271 return vp->inputs[src->Index];
272 } else {
273 if (src->Index < 0) {
274 fprintf(stderr, "WARNING negative offsets for indirect addressing do not work\n");
275 return 0;
276 }
277 return src->Index;
278 }
279 }
280
281 static unsigned long t_src(struct r200_vertex_program *vp, struct prog_src_register *src)
282 {
283
284 return MAKE_VSF_SOURCE(t_src_index(vp, src),
285 t_swizzle(GET_SWZ(src->Swizzle, 0)),
286 t_swizzle(GET_SWZ(src->Swizzle, 1)),
287 t_swizzle(GET_SWZ(src->Swizzle, 2)),
288 t_swizzle(GET_SWZ(src->Swizzle, 3)),
289 t_src_class(src->File),
290 src->NegateBase) | (src->RelAddr << 4);
291 }
292
293 static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_src_register *src)
294 {
295
296 return MAKE_VSF_SOURCE(t_src_index(vp, src),
297 t_swizzle(GET_SWZ(src->Swizzle, 0)),
298 t_swizzle(GET_SWZ(src->Swizzle, 0)),
299 t_swizzle(GET_SWZ(src->Swizzle, 0)),
300 t_swizzle(GET_SWZ(src->Swizzle, 0)),
301 t_src_class(src->File),
302 src->NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4);
303 }
304
305 static unsigned long t_opcode(enum prog_opcode opcode)
306 {
307
308 switch(opcode){
309 case OPCODE_ADD: return R200_VPI_OUT_OP_ADD;
310 /* FIXME: ARL works fine, but negative offsets won't work - fglrx just
311 * seems to ignore neg offsets which isn't quite correct...
312 */
313 case OPCODE_ARL: return R200_VPI_OUT_OP_ARL;
314 case OPCODE_DP4: return R200_VPI_OUT_OP_DOT;
315 case OPCODE_DST: return R200_VPI_OUT_OP_DST;
316 case OPCODE_EX2: return R200_VPI_OUT_OP_EX2;
317 case OPCODE_EXP: return R200_VPI_OUT_OP_EXP;
318 case OPCODE_FRC: return R200_VPI_OUT_OP_FRC;
319 case OPCODE_LG2: return R200_VPI_OUT_OP_LG2;
320 case OPCODE_LIT: return R200_VPI_OUT_OP_LIT;
321 case OPCODE_LOG: return R200_VPI_OUT_OP_LOG;
322 case OPCODE_MAX: return R200_VPI_OUT_OP_MAX;
323 case OPCODE_MIN: return R200_VPI_OUT_OP_MIN;
324 case OPCODE_MUL: return R200_VPI_OUT_OP_MUL;
325 case OPCODE_RCP: return R200_VPI_OUT_OP_RCP;
326 case OPCODE_RSQ: return R200_VPI_OUT_OP_RSQ;
327 case OPCODE_SGE: return R200_VPI_OUT_OP_SGE;
328 case OPCODE_SLT: return R200_VPI_OUT_OP_SLT;
329
330 default:
331 fprintf(stderr, "%s: Should not be called with opcode %d!", __FUNCTION__, opcode);
332 }
333 exit(-1);
334 return 0;
335 }
336
337 static unsigned long op_operands(enum prog_opcode opcode)
338 {
339 int i;
340
341 /* Can we trust mesas opcodes to be in order ? */
342 for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++)
343 if(op_names[i].opcode == opcode)
344 return op_names[i].ip;
345
346 fprintf(stderr, "op %d not found in op_names\n", opcode);
347 exit(-1);
348 return 0;
349 }
350
351 /* TODO: Get rid of t_src_class call */
352 #define CMP_SRCS(a, b) (((a.RelAddr != b.RelAddr) || (a.Index != b.Index)) && \
353 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
354 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
355 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
356 t_src_class(b.File) == VSF_IN_CLASS_ATTR))) \
357
358 /* fglrx on rv250 codes up unused sources as follows:
359 unused but necessary sources are same as previous source, zero-ed out.
360 unnecessary sources are same as previous source but with VSF_IN_CLASS_NONE set.
361 i.e. an add (2 args) has its 2nd arg (if you use it as mov) zero-ed out, and 3rd arg
362 set to VSF_IN_CLASS_NONE. Not sure if strictly necessary. */
363
364 /* use these simpler definitions. Must obviously not be used with not yet set up regs.
365 Those are NOT semantically equivalent to the r300 ones, requires code changes */
366 #define ZERO_SRC_0 (((o_inst->src0 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
367 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
368 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
369 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
370 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
371
372 #define ZERO_SRC_1 (((o_inst->src1 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
373 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
374 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
375 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
376 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
377
378 #define ZERO_SRC_2 (((o_inst->src2 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
379 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
380 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
381 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
382 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
383
384 #define UNUSED_SRC_0 ((o_inst->src0 & ~15) | 9)
385
386 #define UNUSED_SRC_1 ((o_inst->src1 & ~15) | 9)
387
388 #define UNUSED_SRC_2 ((o_inst->src2 & ~15) | 9)
389
390
391 /**
392 * Generate an R200 vertex program from Mesa's internal representation.
393 *
394 * \return GL_TRUE for success, GL_FALSE for failure.
395 */
396 static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_vertex_program *vp)
397 {
398 struct gl_vertex_program *mesa_vp = &vp->mesa_program;
399 struct prog_instruction *vpi;
400 int i;
401 VERTEX_SHADER_INSTRUCTION *o_inst;
402 unsigned long operands;
403 int are_srcs_scalar;
404 unsigned long hw_op;
405 int dofogfix = 0;
406 int fog_temp_i = 0;
407 int free_inputs;
408 int array_count = 0;
409
410 vp->native = GL_FALSE;
411 vp->translated = GL_TRUE;
412 vp->fogmode = ctx->Fog.Mode;
413
414 if (mesa_vp->Base.NumInstructions == 0)
415 return GL_FALSE;
416
417 #if 0
418 if ((mesa_vp->Base.InputsRead &
419 ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 |
420 VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 |
421 VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) != 0) {
422 if (R200_DEBUG & DEBUG_FALLBACKS) {
423 fprintf(stderr, "can't handle vert prog inputs 0x%x\n",
424 mesa_vp->Base.InputsRead);
425 }
426 return GL_FALSE;
427 }
428 #endif
429
430 if ((mesa_vp->Base.OutputsWritten &
431 ~((1 << VERT_RESULT_HPOS) | (1 << VERT_RESULT_COL0) | (1 << VERT_RESULT_COL1) |
432 (1 << VERT_RESULT_FOGC) | (1 << VERT_RESULT_TEX0) | (1 << VERT_RESULT_TEX1) |
433 (1 << VERT_RESULT_TEX2) | (1 << VERT_RESULT_TEX3) | (1 << VERT_RESULT_TEX4) |
434 (1 << VERT_RESULT_TEX5) | (1 << VERT_RESULT_PSIZ))) != 0) {
435 if (R200_DEBUG & DEBUG_FALLBACKS) {
436 fprintf(stderr, "can't handle vert prog outputs 0x%x\n",
437 mesa_vp->Base.OutputsWritten);
438 }
439 return GL_FALSE;
440 }
441
442 if (mesa_vp->IsNVProgram) {
443 /* subtle differences in spec like guaranteed initialized regs could cause
444 headaches. Might want to remove the driconf option to enable it completely */
445 return GL_FALSE;
446 }
447 /* Initial value should be last tmp reg that hw supports.
448 Strangely enough r300 doesnt mind even though these would be out of range.
449 Smart enough to realize that it doesnt need it? */
450 int u_temp_i = R200_VSF_MAX_TEMPS - 1;
451 struct prog_src_register src[3];
452 struct prog_dst_register dst;
453
454 /* FIXME: is changing the prog safe to do here? */
455 if (mesa_vp->IsPositionInvariant &&
456 /* make sure we only do this once */
457 !(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS))) {
458 _mesa_insert_mvp_code(ctx, mesa_vp);
459 }
460
461 /* for fogc, can't change mesa_vp, as it would hose swtnl, and exp with
462 base e isn't directly available neither. */
463 if (mesa_vp->Base.OutputsWritten & VERT_RESULT_FOGC && !vp->fogpidx) {
464 struct gl_program_parameter_list *paramList;
465 GLint tokens[6] = { STATE_FOG_PARAMS, 0, 0, 0, 0, 0 };
466 paramList = mesa_vp->Base.Parameters;
467 vp->fogpidx = _mesa_add_state_reference(paramList, tokens);
468 }
469
470 vp->pos_end = 0;
471 mesa_vp->Base.NumNativeInstructions = 0;
472 if (mesa_vp->Base.Parameters)
473 mesa_vp->Base.NumNativeParameters = mesa_vp->Base.Parameters->NumParameters;
474 else
475 mesa_vp->Base.NumNativeParameters = 0;
476
477 for(i = 0; i < VERT_ATTRIB_MAX; i++)
478 vp->inputs[i] = -1;
479 free_inputs = 0x2ffd;
480
481 /* fglrx uses fixed inputs as follows for conventional attribs.
482 generic attribs use non-fixed assignment, fglrx will always use the
483 lowest attrib values available. We'll just do the same.
484 There are 12 generic attribs possible, corresponding to attrib 0, 2-11
485 and 13 in a hw vertex prog.
486 attr 1 and 12 aren't used for generic attribs as those cannot be made vec4
487 (correspond to vertex normal/weight - maybe weight actually could be made vec4).
488 Additionally, not more than 12 arrays in total are possible I think.
489 attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
490 attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
491 attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
492 attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
493 */
494
495 /* attr 4,5 and 13 are only used with generic attribs.
496 Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
497 not possibe to use with vertex progs as it is lacking in vert prog specification) */
498 /* may look different when using idx buf / input_route instead of se_vtx_fmt? */
499 if (mesa_vp->Base.InputsRead & VERT_BIT_POS) {
500 vp->inputs[VERT_ATTRIB_POS] = 0;
501 free_inputs &= ~(1 << 0);
502 array_count++;
503 }
504 if (mesa_vp->Base.InputsRead & VERT_BIT_WEIGHT) {
505 /* we don't actually handle that later. Then again, we don't have to... */
506 vp->inputs[VERT_ATTRIB_WEIGHT] = 12;
507 array_count++;
508 }
509 if (mesa_vp->Base.InputsRead & VERT_BIT_NORMAL) {
510 vp->inputs[VERT_ATTRIB_NORMAL] = 1;
511 array_count++;
512 }
513 if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR0) {
514 vp->inputs[VERT_ATTRIB_COLOR0] = 2;
515 free_inputs &= ~(1 << 2);
516 array_count++;
517 }
518 if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR1) {
519 vp->inputs[VERT_ATTRIB_COLOR1] = 3;
520 free_inputs &= ~(1 << 3);
521 array_count++;
522 }
523 if (mesa_vp->Base.InputsRead & VERT_BIT_FOG) {
524 vp->inputs[VERT_ATTRIB_FOG] = 15; array_count++;
525 }
526 for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX5; i++) {
527 if (mesa_vp->Base.InputsRead & (1 << i)) {
528 vp->inputs[i] = i - VERT_ATTRIB_TEX0 + 6;
529 free_inputs &= ~(1 << (i - VERT_ATTRIB_TEX0 + 6));
530 array_count++;
531 }
532 }
533 /* using VERT_ATTRIB_TEX6/7 would be illegal */
534 /* completely ignore aliasing? */
535 for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) {
536 int j;
537 /* completely ignore aliasing? */
538 if (mesa_vp->Base.InputsRead & (1 << i)) {
539 array_count++;
540 if (array_count > 12) {
541 if (R200_DEBUG & DEBUG_FALLBACKS) {
542 fprintf(stderr, "more than 12 attribs used in vert prog\n");
543 }
544 return GL_FALSE;
545 }
546 for (j = 0; j < 14; j++) {
547 /* will always find one due to limited array_count */
548 if (free_inputs & (1 << j)) {
549 free_inputs &= ~(1 << j);
550 vp->inputs[i] = j;
551 break;
552 }
553 }
554 }
555 }
556
557 if (!(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS))) {
558 if (R200_DEBUG & DEBUG_FALLBACKS) {
559 fprintf(stderr, "can't handle vert prog without position output\n");
560 }
561 return GL_FALSE;
562 }
563 if (free_inputs & 1) {
564 if (R200_DEBUG & DEBUG_FALLBACKS) {
565 fprintf(stderr, "can't handle vert prog without position input\n");
566 }
567 return GL_FALSE;
568 }
569
570 o_inst = vp->instr;
571 for (vpi = mesa_vp->Base.Instructions; vpi->Opcode != OPCODE_END; vpi++, o_inst++){
572 operands = op_operands(vpi->Opcode);
573 are_srcs_scalar = operands & SCALAR_FLAG;
574 operands &= OP_MASK;
575
576 for(i = 0; i < operands; i++) {
577 src[i] = vpi->SrcReg[i];
578 /* hack up default attrib values as per spec as swizzling.
579 normal, fog, secondary color. Crazy?
580 May need more if we don't submit vec4 elements? */
581 if (src[i].File == PROGRAM_INPUT) {
582 if (src[i].Index == VERT_ATTRIB_NORMAL) {
583 int j;
584 for (j = 0; j < 4; j++) {
585 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
586 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
587 src[i].Swizzle |= SWIZZLE_ONE << (j*3);
588 }
589 }
590 }
591 else if (src[i].Index == VERT_ATTRIB_COLOR1) {
592 int j;
593 for (j = 0; j < 4; j++) {
594 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
595 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
596 src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
597 }
598 }
599 }
600 else if (src[i].Index == VERT_ATTRIB_FOG) {
601 int j;
602 for (j = 0; j < 4; j++) {
603 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
604 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
605 src[i].Swizzle |= SWIZZLE_ONE << (j*3);
606 }
607 else if ((GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Y) ||
608 GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Z) {
609 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
610 src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
611 }
612 }
613 }
614 }
615 }
616
617 if(operands == 3){
618 if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
619 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
620 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
621 VSF_FLAG_ALL);
622
623 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
624 SWIZZLE_X, SWIZZLE_Y,
625 SWIZZLE_Z, SWIZZLE_W,
626 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4);
627
628 o_inst->src1 = ZERO_SRC_0;
629 o_inst->src2 = UNUSED_SRC_1;
630 o_inst++;
631
632 src[2].File = PROGRAM_TEMPORARY;
633 src[2].Index = u_temp_i;
634 src[2].RelAddr = 0;
635 u_temp_i--;
636 }
637 }
638
639 if(operands >= 2){
640 if( CMP_SRCS(src[1], src[0]) ){
641 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
642 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
643 VSF_FLAG_ALL);
644
645 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
646 SWIZZLE_X, SWIZZLE_Y,
647 SWIZZLE_Z, SWIZZLE_W,
648 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4);
649
650 o_inst->src1 = ZERO_SRC_0;
651 o_inst->src2 = UNUSED_SRC_1;
652 o_inst++;
653
654 src[0].File = PROGRAM_TEMPORARY;
655 src[0].Index = u_temp_i;
656 src[0].RelAddr = 0;
657 u_temp_i--;
658 }
659 }
660
661 dst = vpi->DstReg;
662 if (dst.File == PROGRAM_OUTPUT &&
663 dst.Index == VERT_RESULT_FOGC &&
664 dst.WriteMask & WRITEMASK_X) {
665 fog_temp_i = u_temp_i;
666 dst.File = PROGRAM_TEMPORARY;
667 dst.Index = fog_temp_i;
668 dofogfix = 1;
669 u_temp_i--;
670 }
671
672 /* These ops need special handling. */
673 switch(vpi->Opcode){
674 case OPCODE_POW:
675 /* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter).
676 So may need to insert additional instruction */
677 if ((src[0].File == src[1].File) &&
678 (src[0].Index == src[1].Index)) {
679 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
680 t_dst_mask(dst.WriteMask));
681 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
682 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
683 SWIZZLE_ZERO,
684 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
685 SWIZZLE_ZERO,
686 t_src_class(src[0].File),
687 src[0].NegateBase) | (src[0].RelAddr << 4);
688 o_inst->src1 = UNUSED_SRC_0;
689 o_inst->src2 = UNUSED_SRC_0;
690 }
691 else {
692 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
693 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
694 VSF_FLAG_ALL);
695 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
696 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
697 SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO,
698 t_src_class(src[0].File),
699 src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
700 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
701 SWIZZLE_ZERO, SWIZZLE_ZERO,
702 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO,
703 t_src_class(src[1].File),
704 src[1].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
705 o_inst->src2 = UNUSED_SRC_1;
706 o_inst++;
707
708 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
709 t_dst_mask(dst.WriteMask));
710 o_inst->src0 = MAKE_VSF_SOURCE(u_temp_i,
711 VSF_IN_COMPONENT_X,
712 VSF_IN_COMPONENT_Y,
713 VSF_IN_COMPONENT_Z,
714 VSF_IN_COMPONENT_W,
715 VSF_IN_CLASS_TMP,
716 VSF_FLAG_NONE);
717 o_inst->src1 = UNUSED_SRC_0;
718 o_inst->src2 = UNUSED_SRC_0;
719 u_temp_i--;
720 }
721 goto next;
722
723 case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
724 case OPCODE_SWZ:
725 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
726 t_dst_mask(dst.WriteMask));
727 o_inst->src0 = t_src(vp, &src[0]);
728 o_inst->src1 = ZERO_SRC_0;
729 o_inst->src2 = UNUSED_SRC_1;
730 goto next;
731
732 case OPCODE_MAD:
733 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
734 src[1].File == PROGRAM_TEMPORARY &&
735 src[2].File == PROGRAM_TEMPORARY) ? R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
736
737 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
738 t_dst_mask(dst.WriteMask));
739 o_inst->src0 = t_src(vp, &src[0]);
740 #if 0
741 if ((o_inst - vp->instr) == 31) {
742 /* fix up the broken vertex program of quake4 demo... */
743 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
744 SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X,
745 t_src_class(src[1].File),
746 src[1].NegateBase) | (src[1].RelAddr << 4);
747 o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
748 SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y,
749 t_src_class(src[1].File),
750 src[1].NegateBase) | (src[1].RelAddr << 4);
751 }
752 else {
753 o_inst->src1 = t_src(vp, &src[1]);
754 o_inst->src2 = t_src(vp, &src[2]);
755 }
756 #else
757 o_inst->src1 = t_src(vp, &src[1]);
758 o_inst->src2 = t_src(vp, &src[2]);
759 #endif
760 goto next;
761
762 case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
763 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
764 t_dst_mask(dst.WriteMask));
765
766 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
767 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
768 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
769 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
770 SWIZZLE_ZERO,
771 t_src_class(src[0].File),
772 src[0].NegateBase) | (src[0].RelAddr << 4);
773
774 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
775 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
776 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
777 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
778 SWIZZLE_ZERO,
779 t_src_class(src[1].File),
780 src[1].NegateBase) | (src[1].RelAddr << 4);
781
782 o_inst->src2 = UNUSED_SRC_1;
783 goto next;
784
785 case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
786 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
787 t_dst_mask(dst.WriteMask));
788
789 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
790 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
791 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
792 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
793 VSF_IN_COMPONENT_ONE,
794 t_src_class(src[0].File),
795 src[0].NegateBase) | (src[0].RelAddr << 4);
796 o_inst->src1 = t_src(vp, &src[1]);
797 o_inst->src2 = UNUSED_SRC_1;
798 goto next;
799
800 case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
801 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
802 t_dst_mask(dst.WriteMask));
803
804 o_inst->src0 = t_src(vp, &src[0]);
805 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
806 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
807 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
808 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
809 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
810 t_src_class(src[1].File),
811 (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
812 o_inst->src2 = UNUSED_SRC_1;
813 goto next;
814
815 case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
816 o_inst->op=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX, t_dst(&dst),
817 t_dst_mask(dst.WriteMask));
818
819 o_inst->src0=t_src(vp, &src[0]);
820 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
821 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
822 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
823 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
824 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
825 t_src_class(src[0].File),
826 (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
827 o_inst->src2 = UNUSED_SRC_1;
828 goto next;
829
830 case OPCODE_FLR:
831 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
832 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
833
834 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_FRC,
835 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
836 t_dst_mask(dst.WriteMask));
837
838 o_inst->src0 = t_src(vp, &src[0]);
839 o_inst->src1 = UNUSED_SRC_0;
840 o_inst->src2 = UNUSED_SRC_1;
841 o_inst++;
842
843 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
844 t_dst_mask(dst.WriteMask));
845
846 o_inst->src0 = t_src(vp, &src[0]);
847 o_inst->src1 = MAKE_VSF_SOURCE(u_temp_i,
848 VSF_IN_COMPONENT_X,
849 VSF_IN_COMPONENT_Y,
850 VSF_IN_COMPONENT_Z,
851 VSF_IN_COMPONENT_W,
852 VSF_IN_CLASS_TMP,
853 /* Not 100% sure about this */
854 (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
855
856 o_inst->src2 = UNUSED_SRC_0;
857 u_temp_i--;
858 goto next;
859
860 case OPCODE_XPD:
861 /* mul r0, r1.yzxw, r2.zxyw
862 mad r0, -r2.yzxw, r1.zxyw, r0
863 NOTE: might need MAD_2
864 */
865
866 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
867 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
868 t_dst_mask(dst.WriteMask));
869
870 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
871 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
872 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
873 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
874 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
875 t_src_class(src[0].File),
876 src[0].NegateBase) | (src[0].RelAddr << 4);
877
878 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
879 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
880 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
881 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
882 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
883 t_src_class(src[1].File),
884 src[1].NegateBase) | (src[1].RelAddr << 4);
885
886 o_inst->src2 = UNUSED_SRC_1;
887 o_inst++;
888 u_temp_i--;
889
890 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MAD, t_dst(&dst),
891 t_dst_mask(dst.WriteMask));
892
893 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
894 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
895 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
896 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
897 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
898 t_src_class(src[1].File),
899 (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
900
901 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
902 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
903 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
904 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
905 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
906 t_src_class(src[0].File),
907 src[0].NegateBase) | (src[0].RelAddr << 4);
908
909 o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1,
910 VSF_IN_COMPONENT_X,
911 VSF_IN_COMPONENT_Y,
912 VSF_IN_COMPONENT_Z,
913 VSF_IN_COMPONENT_W,
914 VSF_IN_CLASS_TMP,
915 VSF_FLAG_NONE);
916 goto next;
917
918 case OPCODE_END:
919 assert(0);
920 default:
921 break;
922 }
923
924 o_inst->op = MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst(&dst),
925 t_dst_mask(dst.WriteMask));
926
927 if(are_srcs_scalar){
928 switch(operands){
929 case 1:
930 o_inst->src0 = t_src_scalar(vp, &src[0]);
931 o_inst->src1 = UNUSED_SRC_0;
932 o_inst->src2 = UNUSED_SRC_1;
933 break;
934
935 case 2:
936 o_inst->src0 = t_src_scalar(vp, &src[0]);
937 o_inst->src1 = t_src_scalar(vp, &src[1]);
938 o_inst->src2 = UNUSED_SRC_1;
939 break;
940
941 case 3:
942 o_inst->src0 = t_src_scalar(vp, &src[0]);
943 o_inst->src1 = t_src_scalar(vp, &src[1]);
944 o_inst->src2 = t_src_scalar(vp, &src[2]);
945 break;
946
947 default:
948 fprintf(stderr, "illegal number of operands %lu\n", operands);
949 exit(-1);
950 break;
951 }
952 } else {
953 switch(operands){
954 case 1:
955 o_inst->src0 = t_src(vp, &src[0]);
956 o_inst->src1 = UNUSED_SRC_0;
957 o_inst->src2 = UNUSED_SRC_1;
958 break;
959
960 case 2:
961 o_inst->src0 = t_src(vp, &src[0]);
962 o_inst->src1 = t_src(vp, &src[1]);
963 o_inst->src2 = UNUSED_SRC_1;
964 break;
965
966 case 3:
967 o_inst->src0 = t_src(vp, &src[0]);
968 o_inst->src1 = t_src(vp, &src[1]);
969 o_inst->src2 = t_src(vp, &src[2]);
970 break;
971
972 default:
973 fprintf(stderr, "illegal number of operands %lu\n", operands);
974 exit(-1);
975 break;
976 }
977 }
978 next:
979
980 if (dofogfix) {
981 o_inst++;
982 if (vp->fogmode == GL_EXP) {
983 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
984 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
985 VSF_FLAG_X);
986 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
987 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
988 o_inst->src2 = UNUSED_SRC_1;
989 o_inst++;
990 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
991 R200_VSF_OUT_CLASS_RESULT_FOGC,
992 VSF_FLAG_X);
993 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
994 o_inst->src1 = UNUSED_SRC_0;
995 o_inst->src2 = UNUSED_SRC_1;
996 }
997 else if (vp->fogmode == GL_EXP2) {
998 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
999 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1000 VSF_FLAG_X);
1001 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1002 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
1003 o_inst->src2 = UNUSED_SRC_1;
1004 o_inst++;
1005 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1006 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1007 VSF_FLAG_X);
1008 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1009 o_inst->src1 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1010 o_inst->src2 = UNUSED_SRC_1;
1011 o_inst++;
1012 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
1013 R200_VSF_OUT_CLASS_RESULT_FOGC,
1014 VSF_FLAG_X);
1015 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1016 o_inst->src1 = UNUSED_SRC_0;
1017 o_inst->src2 = UNUSED_SRC_1;
1018 }
1019 else { /* fogmode == GL_LINEAR */
1020 /* could do that with single op (dot) if using params like
1021 with fixed function pipeline fog */
1022 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
1023 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1024 VSF_FLAG_X);
1025 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1026 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, Z, Z, Z, Z, PARAM, NONE);
1027 o_inst->src2 = UNUSED_SRC_1;
1028 o_inst++;
1029 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1030 R200_VSF_OUT_CLASS_RESULT_FOGC,
1031 VSF_FLAG_X);
1032 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1033 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, W, W, W, W, PARAM, NONE);
1034 o_inst->src2 = UNUSED_SRC_1;
1035
1036 }
1037 dofogfix = 0;
1038 }
1039
1040 if (mesa_vp->Base.NumNativeTemporaries <
1041 (mesa_vp->Base.NumTemporaries + (R200_VSF_MAX_TEMPS - 1 - u_temp_i))) {
1042 mesa_vp->Base.NumNativeTemporaries =
1043 mesa_vp->Base.NumTemporaries + (R200_VSF_MAX_TEMPS - 1 - u_temp_i);
1044 }
1045 if (u_temp_i < mesa_vp->Base.NumTemporaries) {
1046 if (R200_DEBUG & DEBUG_FALLBACKS) {
1047 fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_i);
1048 }
1049 return GL_FALSE;
1050 }
1051 u_temp_i = R200_VSF_MAX_TEMPS - 1;
1052 if(o_inst - vp->instr >= R200_VSF_MAX_INST) {
1053 mesa_vp->Base.NumNativeInstructions = 129;
1054 if (R200_DEBUG & DEBUG_FALLBACKS) {
1055 fprintf(stderr, "more than 128 native instructions\n");
1056 }
1057 return GL_FALSE;
1058 }
1059 if ((o_inst->op & R200_VSF_OUT_CLASS_MASK) == R200_VSF_OUT_CLASS_RESULT_POS) {
1060 vp->pos_end = (o_inst - vp->instr);
1061 }
1062 }
1063
1064 vp->native = GL_TRUE;
1065 mesa_vp->Base.NumNativeInstructions = (o_inst - vp->instr);
1066 #if 0
1067 fprintf(stderr, "hw program:\n");
1068 for(i=0; i < vp->program.length; i++)
1069 fprintf(stderr, "%08x\n", vp->instr[i]);
1070 #endif
1071 return GL_TRUE;
1072 }
1073
1074 void r200SetupVertexProg( GLcontext *ctx ) {
1075 r200ContextPtr rmesa = R200_CONTEXT(ctx);
1076 struct r200_vertex_program *vp = (struct r200_vertex_program *)ctx->VertexProgram.Current;
1077 GLboolean fallback;
1078 GLint i;
1079
1080 if (!vp->translated || (ctx->Fog.Enabled && ctx->Fog.Mode != vp->fogmode)) {
1081 rmesa->curr_vp_hw = NULL;
1082 r200_translate_vertex_program(ctx, vp);
1083 }
1084 /* could optimize setting up vertex progs away for non-tcl hw */
1085 fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp) &&
1086 rmesa->r200Screen->drmSupportsVertexProgram);
1087 TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback);
1088 if (rmesa->TclFallback) return;
1089
1090 R200_STATECHANGE( rmesa, vap );
1091 /* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it?
1092 maybe only when using more than 64 inst / 96 param? */
1093 rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_PROG_VTX_SHADER_ENABLE /*| R200_VAP_SINGLE_BUF_STATE_ENABLE*/;
1094
1095 R200_STATECHANGE( rmesa, pvs );
1096
1097 rmesa->hw.pvs.cmd[PVS_CNTL_1] = (0 << R200_PVS_CNTL_1_PROGRAM_START_SHIFT) |
1098 ((vp->mesa_program.Base.NumNativeInstructions - 1) << R200_PVS_CNTL_1_PROGRAM_END_SHIFT) |
1099 (vp->pos_end << R200_PVS_CNTL_1_POS_END_SHIFT);
1100 rmesa->hw.pvs.cmd[PVS_CNTL_2] = (0 << R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT) |
1101 (vp->mesa_program.Base.NumNativeParameters << R200_PVS_CNTL_2_PARAM_COUNT_SHIFT);
1102
1103 /* maybe user clip planes just work with vertex progs... untested */
1104 if (ctx->Transform.ClipPlanesEnabled) {
1105 R200_STATECHANGE( rmesa, tcl );
1106 if (vp->mesa_program.IsPositionInvariant) {
1107 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (ctx->Transform.ClipPlanesEnabled << 2);
1108 }
1109 else {
1110 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(0xfc);
1111 }
1112 }
1113
1114 if (vp != rmesa->curr_vp_hw) {
1115 GLuint count = vp->mesa_program.Base.NumNativeInstructions;
1116 drm_radeon_cmd_header_t tmp;
1117
1118 R200_STATECHANGE( rmesa, vpi[0] );
1119 R200_STATECHANGE( rmesa, vpi[1] );
1120
1121 /* FIXME: what about using a memcopy... */
1122 for (i = 0; (i < 64) && i < count; i++) {
1123 rmesa->hw.vpi[0].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i].op;
1124 rmesa->hw.vpi[0].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i].src0;
1125 rmesa->hw.vpi[0].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i].src1;
1126 rmesa->hw.vpi[0].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i].src2;
1127 }
1128 /* hack up the cmd_size so not the whole state atom is emitted always.
1129 This may require some more thought, we may emit half progs on lost state, but
1130 hopefully it won't matter?
1131 WARNING: must not use R200_DB_STATECHANGE, this will produce bogus (and rejected)
1132 packet emits (due to the mismatched cmd_size and count in cmd/last_cmd) */
1133 rmesa->hw.vpi[0].cmd_size = 1 + 4 * ((count > 64) ? 64 : count);
1134 tmp.i = rmesa->hw.vpi[0].cmd[VPI_CMD_0];
1135 tmp.veclinear.count = (count > 64) ? 64 : count;
1136 rmesa->hw.vpi[0].cmd[VPI_CMD_0] = tmp.i;
1137 if (count > 64) {
1138 for (i = 0; i < (count - 64); i++) {
1139 rmesa->hw.vpi[1].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i + 64].op;
1140 rmesa->hw.vpi[1].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i + 64].src0;
1141 rmesa->hw.vpi[1].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i + 64].src1;
1142 rmesa->hw.vpi[1].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i + 64].src2;
1143 }
1144 rmesa->hw.vpi[1].cmd_size = 1 + 4 * (count - 64);
1145 tmp.i = rmesa->hw.vpi[1].cmd[VPI_CMD_0];
1146 tmp.veclinear.count = count - 64;
1147 rmesa->hw.vpi[1].cmd[VPI_CMD_0] = tmp.i;
1148 }
1149 rmesa->curr_vp_hw = vp;
1150 }
1151 }
1152
1153
1154 static void
1155 r200BindProgram(GLcontext *ctx, GLenum target, struct gl_program *prog)
1156 {
1157 r200ContextPtr rmesa = R200_CONTEXT(ctx);
1158
1159 switch(target){
1160 case GL_VERTEX_PROGRAM_ARB:
1161 rmesa->curr_vp_hw = NULL;
1162 break;
1163 default:
1164 _mesa_problem(ctx, "Target not supported yet!");
1165 break;
1166 }
1167 }
1168
1169 static struct gl_program *
1170 r200NewProgram(GLcontext *ctx, GLenum target, GLuint id)
1171 {
1172 struct r200_vertex_program *vp;
1173
1174 switch(target){
1175 case GL_VERTEX_PROGRAM_ARB:
1176 vp = CALLOC_STRUCT(r200_vertex_program);
1177 return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id);
1178 case GL_FRAGMENT_PROGRAM_ARB:
1179 case GL_FRAGMENT_PROGRAM_NV:
1180 return _mesa_init_fragment_program( ctx, CALLOC_STRUCT(gl_fragment_program), target, id );
1181 default:
1182 _mesa_problem(ctx, "Bad target in r200NewProgram");
1183 }
1184 return NULL;
1185 }
1186
1187
1188 static void
1189 r200DeleteProgram(GLcontext *ctx, struct gl_program *prog)
1190 {
1191 _mesa_delete_program(ctx, prog);
1192 }
1193
1194 static void
1195 r200ProgramStringNotify(GLcontext *ctx, GLenum target, struct gl_program *prog)
1196 {
1197 struct r200_vertex_program *vp = (void *)prog;
1198 r200ContextPtr rmesa = R200_CONTEXT(ctx);
1199
1200 switch(target) {
1201 case GL_VERTEX_PROGRAM_ARB:
1202 vp->translated = GL_FALSE;
1203 vp->fogpidx = 0;
1204 /* memset(&vp->translated, 0, sizeof(struct r200_vertex_program) - sizeof(struct gl_vertex_program));*/
1205 r200_translate_vertex_program(ctx, vp);
1206 rmesa->curr_vp_hw = NULL;
1207 break;
1208 }
1209 /* need this for tcl fallbacks */
1210 _tnl_program_string(ctx, target, prog);
1211 }
1212
1213 static GLboolean
1214 r200IsProgramNative(GLcontext *ctx, GLenum target, struct gl_program *prog)
1215 {
1216 struct r200_vertex_program *vp = (void *)prog;
1217
1218 switch(target){
1219 case GL_VERTEX_STATE_PROGRAM_NV:
1220 case GL_VERTEX_PROGRAM_ARB:
1221 if (!vp->translated) {
1222 r200_translate_vertex_program(ctx, vp);
1223 }
1224 /* does not take parameters etc. into account */
1225 return vp->native;
1226 default:
1227 _mesa_problem(ctx, "Bad target in r200NewProgram");
1228 }
1229 return 0;
1230 }
1231
1232 void r200InitShaderFuncs(struct dd_function_table *functions)
1233 {
1234 functions->NewProgram = r200NewProgram;
1235 functions->BindProgram = r200BindProgram;
1236 functions->DeleteProgram = r200DeleteProgram;
1237 functions->ProgramStringNotify = r200ProgramStringNotify;
1238 functions->IsProgramNative = r200IsProgramNative;
1239 }