Merge branch 'master' into i915-unification
[mesa.git] / src / mesa / drivers / dri / r200 / r200_vertprog.c
1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Aapo Tahkola <aet@rasterburn.org>
31 * Roland Scheidegger <rscheidegger_lists@hispeed.ch>
32 */
33 #include "main/glheader.h"
34 #include "main/macros.h"
35 #include "main/enums.h"
36 #include "shader/program.h"
37 #include "shader/prog_instruction.h"
38 #include "shader/prog_parameter.h"
39 #include "shader/prog_statevars.h"
40 #include "shader/programopt.h"
41 #include "tnl/tnl.h"
42
43 #include "r200_context.h"
44 #include "r200_vertprog.h"
45 #include "r200_ioctl.h"
46 #include "r200_tcl.h"
47
48 #if SWIZZLE_X != VSF_IN_COMPONENT_X || \
49 SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
50 SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
51 SWIZZLE_W != VSF_IN_COMPONENT_W || \
52 SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
53 SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
54 WRITEMASK_X != VSF_FLAG_X || \
55 WRITEMASK_Y != VSF_FLAG_Y || \
56 WRITEMASK_Z != VSF_FLAG_Z || \
57 WRITEMASK_W != VSF_FLAG_W
58 #error Cannot change these!
59 #endif
60
61 #define SCALAR_FLAG (1<<31)
62 #define FLAG_MASK (1<<31)
63 #define OP_MASK (0xf) /* we are unlikely to have more than 15 */
64 #define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
65
66 static struct{
67 char *name;
68 int opcode;
69 unsigned long ip; /* number of input operands and flags */
70 }op_names[]={
71 OPN(ABS, 1),
72 OPN(ADD, 2),
73 OPN(ARL, 1|SCALAR_FLAG),
74 OPN(DP3, 2),
75 OPN(DP4, 2),
76 OPN(DPH, 2),
77 OPN(DST, 2),
78 OPN(EX2, 1|SCALAR_FLAG),
79 OPN(EXP, 1|SCALAR_FLAG),
80 OPN(FLR, 1),
81 OPN(FRC, 1),
82 OPN(LG2, 1|SCALAR_FLAG),
83 OPN(LIT, 1),
84 OPN(LOG, 1|SCALAR_FLAG),
85 OPN(MAD, 3),
86 OPN(MAX, 2),
87 OPN(MIN, 2),
88 OPN(MOV, 1),
89 OPN(MUL, 2),
90 OPN(POW, 2|SCALAR_FLAG),
91 OPN(RCP, 1|SCALAR_FLAG),
92 OPN(RSQ, 1|SCALAR_FLAG),
93 OPN(SGE, 2),
94 OPN(SLT, 2),
95 OPN(SUB, 2),
96 OPN(SWZ, 1),
97 OPN(XPD, 2),
98 OPN(PRINT, 0),
99 OPN(END, 0),
100 };
101 #undef OPN
102
103 static GLboolean r200VertexProgUpdateParams(GLcontext *ctx, struct r200_vertex_program *vp)
104 {
105 r200ContextPtr rmesa = R200_CONTEXT( ctx );
106 GLfloat *fcmd = (GLfloat *)&rmesa->hw.vpp[0].cmd[VPP_CMD_0 + 1];
107 int pi;
108 struct gl_vertex_program *mesa_vp = &vp->mesa_program;
109 struct gl_program_parameter_list *paramList;
110 drm_radeon_cmd_header_t tmp;
111
112 R200_STATECHANGE( rmesa, vpp[0] );
113 R200_STATECHANGE( rmesa, vpp[1] );
114 assert(mesa_vp->Base.Parameters);
115 _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
116 paramList = mesa_vp->Base.Parameters;
117
118 if(paramList->NumParameters > R200_VSF_MAX_PARAM){
119 fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
120 return GL_FALSE;
121 }
122
123 for(pi = 0; pi < paramList->NumParameters; pi++) {
124 switch(paramList->Parameters[pi].Type) {
125 case PROGRAM_STATE_VAR:
126 case PROGRAM_NAMED_PARAM:
127 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
128 case PROGRAM_CONSTANT:
129 *fcmd++ = paramList->ParameterValues[pi][0];
130 *fcmd++ = paramList->ParameterValues[pi][1];
131 *fcmd++ = paramList->ParameterValues[pi][2];
132 *fcmd++ = paramList->ParameterValues[pi][3];
133 break;
134 default:
135 _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__);
136 break;
137 }
138 if (pi == 95) {
139 fcmd = (GLfloat *)&rmesa->hw.vpp[1].cmd[VPP_CMD_0 + 1];
140 }
141 }
142 /* hack up the cmd_size so not the whole state atom is emitted always. */
143 rmesa->hw.vpp[0].cmd_size =
144 1 + 4 * ((paramList->NumParameters > 96) ? 96 : paramList->NumParameters);
145 tmp.i = rmesa->hw.vpp[0].cmd[VPP_CMD_0];
146 tmp.veclinear.count = (paramList->NumParameters > 96) ? 96 : paramList->NumParameters;
147 rmesa->hw.vpp[0].cmd[VPP_CMD_0] = tmp.i;
148 if (paramList->NumParameters > 96) {
149 rmesa->hw.vpp[1].cmd_size = 1 + 4 * (paramList->NumParameters - 96);
150 tmp.i = rmesa->hw.vpp[1].cmd[VPP_CMD_0];
151 tmp.veclinear.count = paramList->NumParameters - 96;
152 rmesa->hw.vpp[1].cmd[VPP_CMD_0] = tmp.i;
153 }
154 return GL_TRUE;
155 }
156
157 static __inline unsigned long t_dst_mask(GLuint mask)
158 {
159 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
160 return mask & VSF_FLAG_ALL;
161 }
162
163 static unsigned long t_dst(struct prog_dst_register *dst)
164 {
165 switch(dst->File) {
166 case PROGRAM_TEMPORARY:
167 return ((dst->Index << R200_VPI_OUT_REG_INDEX_SHIFT)
168 | R200_VSF_OUT_CLASS_TMP);
169 case PROGRAM_OUTPUT:
170 switch (dst->Index) {
171 case VERT_RESULT_HPOS:
172 return R200_VSF_OUT_CLASS_RESULT_POS;
173 case VERT_RESULT_COL0:
174 return R200_VSF_OUT_CLASS_RESULT_COLOR;
175 case VERT_RESULT_COL1:
176 return ((1 << R200_VPI_OUT_REG_INDEX_SHIFT)
177 | R200_VSF_OUT_CLASS_RESULT_COLOR);
178 case VERT_RESULT_FOGC:
179 return R200_VSF_OUT_CLASS_RESULT_FOGC;
180 case VERT_RESULT_TEX0:
181 case VERT_RESULT_TEX1:
182 case VERT_RESULT_TEX2:
183 case VERT_RESULT_TEX3:
184 case VERT_RESULT_TEX4:
185 case VERT_RESULT_TEX5:
186 return (((dst->Index - VERT_RESULT_TEX0) << R200_VPI_OUT_REG_INDEX_SHIFT)
187 | R200_VSF_OUT_CLASS_RESULT_TEXC);
188 case VERT_RESULT_PSIZ:
189 return R200_VSF_OUT_CLASS_RESULT_POINTSIZE;
190 default:
191 fprintf(stderr, "problem in %s, unknown dst output reg %d\n", __FUNCTION__, dst->Index);
192 exit(0);
193 return 0;
194 }
195 case PROGRAM_ADDRESS:
196 assert (dst->Index == 0);
197 return R200_VSF_OUT_CLASS_ADDR;
198 default:
199 fprintf(stderr, "problem in %s, unknown register type %d\n", __FUNCTION__, dst->File);
200 exit(0);
201 return 0;
202 }
203 }
204
205 static unsigned long t_src_class(enum register_file file)
206 {
207
208 switch(file){
209 case PROGRAM_TEMPORARY:
210 return VSF_IN_CLASS_TMP;
211
212 case PROGRAM_INPUT:
213 return VSF_IN_CLASS_ATTR;
214
215 case PROGRAM_LOCAL_PARAM:
216 case PROGRAM_ENV_PARAM:
217 case PROGRAM_NAMED_PARAM:
218 case PROGRAM_STATE_VAR:
219 return VSF_IN_CLASS_PARAM;
220 /*
221 case PROGRAM_OUTPUT:
222 case PROGRAM_WRITE_ONLY:
223 case PROGRAM_ADDRESS:
224 */
225 default:
226 fprintf(stderr, "problem in %s", __FUNCTION__);
227 exit(0);
228 }
229 }
230
231 static __inline unsigned long t_swizzle(GLubyte swizzle)
232 {
233 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
234 return swizzle;
235 }
236
237 #if 0
238 static void vp_dump_inputs(struct r200_vertex_program *vp, char *caller)
239 {
240 int i;
241
242 if(vp == NULL){
243 fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, caller);
244 return ;
245 }
246
247 fprintf(stderr, "%s:<", caller);
248 for(i=0; i < VERT_ATTRIB_MAX; i++)
249 fprintf(stderr, "%d ", vp->inputs[i]);
250 fprintf(stderr, ">\n");
251
252 }
253 #endif
254
255 static unsigned long t_src_index(struct r200_vertex_program *vp, struct prog_src_register *src)
256 {
257 /*
258 int i;
259 int max_reg = -1;
260 */
261 if(src->File == PROGRAM_INPUT){
262 /* if(vp->inputs[src->Index] != -1)
263 return vp->inputs[src->Index];
264
265 for(i=0; i < VERT_ATTRIB_MAX; i++)
266 if(vp->inputs[i] > max_reg)
267 max_reg = vp->inputs[i];
268
269 vp->inputs[src->Index] = max_reg+1;*/
270
271 //vp_dump_inputs(vp, __FUNCTION__);
272 assert(vp->inputs[src->Index] != -1);
273 return vp->inputs[src->Index];
274 } else {
275 if (src->Index < 0) {
276 fprintf(stderr, "WARNING negative offsets for indirect addressing do not work\n");
277 return 0;
278 }
279 return src->Index;
280 }
281 }
282
283 static unsigned long t_src(struct r200_vertex_program *vp, struct prog_src_register *src)
284 {
285
286 return MAKE_VSF_SOURCE(t_src_index(vp, src),
287 t_swizzle(GET_SWZ(src->Swizzle, 0)),
288 t_swizzle(GET_SWZ(src->Swizzle, 1)),
289 t_swizzle(GET_SWZ(src->Swizzle, 2)),
290 t_swizzle(GET_SWZ(src->Swizzle, 3)),
291 t_src_class(src->File),
292 src->NegateBase) | (src->RelAddr << 4);
293 }
294
295 static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_src_register *src)
296 {
297
298 return MAKE_VSF_SOURCE(t_src_index(vp, src),
299 t_swizzle(GET_SWZ(src->Swizzle, 0)),
300 t_swizzle(GET_SWZ(src->Swizzle, 0)),
301 t_swizzle(GET_SWZ(src->Swizzle, 0)),
302 t_swizzle(GET_SWZ(src->Swizzle, 0)),
303 t_src_class(src->File),
304 src->NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4);
305 }
306
307 static unsigned long t_opcode(enum prog_opcode opcode)
308 {
309
310 switch(opcode){
311 case OPCODE_ADD: return R200_VPI_OUT_OP_ADD;
312 /* FIXME: ARL works fine, but negative offsets won't work - fglrx just
313 * seems to ignore neg offsets which isn't quite correct...
314 */
315 case OPCODE_ARL: return R200_VPI_OUT_OP_ARL;
316 case OPCODE_DP4: return R200_VPI_OUT_OP_DOT;
317 case OPCODE_DST: return R200_VPI_OUT_OP_DST;
318 case OPCODE_EX2: return R200_VPI_OUT_OP_EX2;
319 case OPCODE_EXP: return R200_VPI_OUT_OP_EXP;
320 case OPCODE_FRC: return R200_VPI_OUT_OP_FRC;
321 case OPCODE_LG2: return R200_VPI_OUT_OP_LG2;
322 case OPCODE_LIT: return R200_VPI_OUT_OP_LIT;
323 case OPCODE_LOG: return R200_VPI_OUT_OP_LOG;
324 case OPCODE_MAX: return R200_VPI_OUT_OP_MAX;
325 case OPCODE_MIN: return R200_VPI_OUT_OP_MIN;
326 case OPCODE_MUL: return R200_VPI_OUT_OP_MUL;
327 case OPCODE_RCP: return R200_VPI_OUT_OP_RCP;
328 case OPCODE_RSQ: return R200_VPI_OUT_OP_RSQ;
329 case OPCODE_SGE: return R200_VPI_OUT_OP_SGE;
330 case OPCODE_SLT: return R200_VPI_OUT_OP_SLT;
331
332 default:
333 fprintf(stderr, "%s: Should not be called with opcode %d!", __FUNCTION__, opcode);
334 }
335 exit(-1);
336 return 0;
337 }
338
339 static unsigned long op_operands(enum prog_opcode opcode)
340 {
341 int i;
342
343 /* Can we trust mesas opcodes to be in order ? */
344 for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++)
345 if(op_names[i].opcode == opcode)
346 return op_names[i].ip;
347
348 fprintf(stderr, "op %d not found in op_names\n", opcode);
349 exit(-1);
350 return 0;
351 }
352
353 /* TODO: Get rid of t_src_class call */
354 #define CMP_SRCS(a, b) (((a.RelAddr != b.RelAddr) || (a.Index != b.Index)) && \
355 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
356 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
357 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
358 t_src_class(b.File) == VSF_IN_CLASS_ATTR))) \
359
360 /* fglrx on rv250 codes up unused sources as follows:
361 unused but necessary sources are same as previous source, zero-ed out.
362 unnecessary sources are same as previous source but with VSF_IN_CLASS_NONE set.
363 i.e. an add (2 args) has its 2nd arg (if you use it as mov) zero-ed out, and 3rd arg
364 set to VSF_IN_CLASS_NONE. Not sure if strictly necessary. */
365
366 /* use these simpler definitions. Must obviously not be used with not yet set up regs.
367 Those are NOT semantically equivalent to the r300 ones, requires code changes */
368 #define ZERO_SRC_0 (((o_inst->src0 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
369 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
370 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
371 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
372 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
373
374 #define ZERO_SRC_1 (((o_inst->src1 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
375 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
376 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
377 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
378 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
379
380 #define ZERO_SRC_2 (((o_inst->src2 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
381 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
382 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
383 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
384 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
385
386 #define UNUSED_SRC_0 ((o_inst->src0 & ~15) | 9)
387
388 #define UNUSED_SRC_1 ((o_inst->src1 & ~15) | 9)
389
390 #define UNUSED_SRC_2 ((o_inst->src2 & ~15) | 9)
391
392
393 /**
394 * Generate an R200 vertex program from Mesa's internal representation.
395 *
396 * \return GL_TRUE for success, GL_FALSE for failure.
397 */
398 static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_vertex_program *vp)
399 {
400 struct gl_vertex_program *mesa_vp = &vp->mesa_program;
401 struct prog_instruction *vpi;
402 int i;
403 VERTEX_SHADER_INSTRUCTION *o_inst;
404 unsigned long operands;
405 int are_srcs_scalar;
406 unsigned long hw_op;
407 int dofogfix = 0;
408 int fog_temp_i = 0;
409 int free_inputs;
410 int array_count = 0;
411
412 vp->native = GL_FALSE;
413 vp->translated = GL_TRUE;
414 vp->fogmode = ctx->Fog.Mode;
415
416 if (mesa_vp->Base.NumInstructions == 0)
417 return GL_FALSE;
418
419 #if 0
420 if ((mesa_vp->Base.InputsRead &
421 ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 |
422 VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 |
423 VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) != 0) {
424 if (R200_DEBUG & DEBUG_FALLBACKS) {
425 fprintf(stderr, "can't handle vert prog inputs 0x%x\n",
426 mesa_vp->Base.InputsRead);
427 }
428 return GL_FALSE;
429 }
430 #endif
431
432 if ((mesa_vp->Base.OutputsWritten &
433 ~((1 << VERT_RESULT_HPOS) | (1 << VERT_RESULT_COL0) | (1 << VERT_RESULT_COL1) |
434 (1 << VERT_RESULT_FOGC) | (1 << VERT_RESULT_TEX0) | (1 << VERT_RESULT_TEX1) |
435 (1 << VERT_RESULT_TEX2) | (1 << VERT_RESULT_TEX3) | (1 << VERT_RESULT_TEX4) |
436 (1 << VERT_RESULT_TEX5) | (1 << VERT_RESULT_PSIZ))) != 0) {
437 if (R200_DEBUG & DEBUG_FALLBACKS) {
438 fprintf(stderr, "can't handle vert prog outputs 0x%x\n",
439 mesa_vp->Base.OutputsWritten);
440 }
441 return GL_FALSE;
442 }
443
444 if (mesa_vp->IsNVProgram) {
445 /* subtle differences in spec like guaranteed initialized regs could cause
446 headaches. Might want to remove the driconf option to enable it completely */
447 return GL_FALSE;
448 }
449 /* Initial value should be last tmp reg that hw supports.
450 Strangely enough r300 doesnt mind even though these would be out of range.
451 Smart enough to realize that it doesnt need it? */
452 int u_temp_i = R200_VSF_MAX_TEMPS - 1;
453 struct prog_src_register src[3];
454 struct prog_dst_register dst;
455
456 /* FIXME: is changing the prog safe to do here? */
457 if (mesa_vp->IsPositionInvariant &&
458 /* make sure we only do this once */
459 !(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS))) {
460 _mesa_insert_mvp_code(ctx, mesa_vp);
461 }
462
463 /* for fogc, can't change mesa_vp, as it would hose swtnl, and exp with
464 base e isn't directly available neither. */
465 if ((mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_FOGC)) && !vp->fogpidx) {
466 struct gl_program_parameter_list *paramList;
467 gl_state_index tokens[STATE_LENGTH] = { STATE_FOG_PARAMS, 0, 0, 0, 0 };
468 paramList = mesa_vp->Base.Parameters;
469 vp->fogpidx = _mesa_add_state_reference(paramList, tokens);
470 }
471
472 vp->pos_end = 0;
473 mesa_vp->Base.NumNativeInstructions = 0;
474 if (mesa_vp->Base.Parameters)
475 mesa_vp->Base.NumNativeParameters = mesa_vp->Base.Parameters->NumParameters;
476 else
477 mesa_vp->Base.NumNativeParameters = 0;
478
479 for(i = 0; i < VERT_ATTRIB_MAX; i++)
480 vp->inputs[i] = -1;
481 for(i = 0; i < 15; i++)
482 vp->inputmap_rev[i] = 255;
483 free_inputs = 0x2ffd;
484
485 /* fglrx uses fixed inputs as follows for conventional attribs.
486 generic attribs use non-fixed assignment, fglrx will always use the
487 lowest attrib values available. We'll just do the same.
488 There are 12 generic attribs possible, corresponding to attrib 0, 2-11
489 and 13 in a hw vertex prog.
490 attr 1 and 12 aren't used for generic attribs as those cannot be made vec4
491 (correspond to vertex normal/weight - maybe weight actually could be made vec4).
492 Additionally, not more than 12 arrays in total are possible I think.
493 attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
494 attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
495 attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
496 attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
497 */
498
499 /* attr 4,5 and 13 are only used with generic attribs.
500 Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
501 not possibe to use with vertex progs as it is lacking in vert prog specification) */
502 /* may look different when using idx buf / input_route instead of se_vtx_fmt? */
503 if (mesa_vp->Base.InputsRead & VERT_BIT_POS) {
504 vp->inputs[VERT_ATTRIB_POS] = 0;
505 vp->inputmap_rev[0] = VERT_ATTRIB_POS;
506 free_inputs &= ~(1 << 0);
507 array_count++;
508 }
509 if (mesa_vp->Base.InputsRead & VERT_BIT_WEIGHT) {
510 vp->inputs[VERT_ATTRIB_WEIGHT] = 12;
511 vp->inputmap_rev[1] = VERT_ATTRIB_WEIGHT;
512 array_count++;
513 }
514 if (mesa_vp->Base.InputsRead & VERT_BIT_NORMAL) {
515 vp->inputs[VERT_ATTRIB_NORMAL] = 1;
516 vp->inputmap_rev[2] = VERT_ATTRIB_NORMAL;
517 array_count++;
518 }
519 if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR0) {
520 vp->inputs[VERT_ATTRIB_COLOR0] = 2;
521 vp->inputmap_rev[4] = VERT_ATTRIB_COLOR0;
522 free_inputs &= ~(1 << 2);
523 array_count++;
524 }
525 if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR1) {
526 vp->inputs[VERT_ATTRIB_COLOR1] = 3;
527 vp->inputmap_rev[5] = VERT_ATTRIB_COLOR1;
528 free_inputs &= ~(1 << 3);
529 array_count++;
530 }
531 if (mesa_vp->Base.InputsRead & VERT_BIT_FOG) {
532 vp->inputs[VERT_ATTRIB_FOG] = 15; array_count++;
533 vp->inputmap_rev[3] = VERT_ATTRIB_FOG;
534 array_count++;
535 }
536 for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX5; i++) {
537 if (mesa_vp->Base.InputsRead & (1 << i)) {
538 vp->inputs[i] = i - VERT_ATTRIB_TEX0 + 6;
539 vp->inputmap_rev[8 + i - VERT_ATTRIB_TEX0] = i;
540 free_inputs &= ~(1 << (i - VERT_ATTRIB_TEX0 + 6));
541 array_count++;
542 }
543 }
544 /* using VERT_ATTRIB_TEX6/7 would be illegal */
545 /* completely ignore aliasing? */
546 for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) {
547 int j;
548 /* completely ignore aliasing? */
549 if (mesa_vp->Base.InputsRead & (1 << i)) {
550 array_count++;
551 if (array_count > 12) {
552 if (R200_DEBUG & DEBUG_FALLBACKS) {
553 fprintf(stderr, "more than 12 attribs used in vert prog\n");
554 }
555 return GL_FALSE;
556 }
557 for (j = 0; j < 14; j++) {
558 /* will always find one due to limited array_count */
559 if (free_inputs & (1 << j)) {
560 free_inputs &= ~(1 << j);
561 vp->inputs[i] = j;
562 if (j == 0) vp->inputmap_rev[j] = i; /* mapped to pos */
563 else if (j < 12) vp->inputmap_rev[j + 2] = i; /* mapped to col/tex */
564 else vp->inputmap_rev[j + 1] = i; /* mapped to pos1 */
565 break;
566 }
567 }
568 }
569 }
570
571 if (!(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS))) {
572 if (R200_DEBUG & DEBUG_FALLBACKS) {
573 fprintf(stderr, "can't handle vert prog without position output\n");
574 }
575 return GL_FALSE;
576 }
577 if (free_inputs & 1) {
578 if (R200_DEBUG & DEBUG_FALLBACKS) {
579 fprintf(stderr, "can't handle vert prog without position input\n");
580 }
581 return GL_FALSE;
582 }
583
584 o_inst = vp->instr;
585 for (vpi = mesa_vp->Base.Instructions; vpi->Opcode != OPCODE_END; vpi++, o_inst++){
586 operands = op_operands(vpi->Opcode);
587 are_srcs_scalar = operands & SCALAR_FLAG;
588 operands &= OP_MASK;
589
590 for(i = 0; i < operands; i++) {
591 src[i] = vpi->SrcReg[i];
592 /* hack up default attrib values as per spec as swizzling.
593 normal, fog, secondary color. Crazy?
594 May need more if we don't submit vec4 elements? */
595 if (src[i].File == PROGRAM_INPUT) {
596 if (src[i].Index == VERT_ATTRIB_NORMAL) {
597 int j;
598 for (j = 0; j < 4; j++) {
599 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
600 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
601 src[i].Swizzle |= SWIZZLE_ONE << (j*3);
602 }
603 }
604 }
605 else if (src[i].Index == VERT_ATTRIB_COLOR1) {
606 int j;
607 for (j = 0; j < 4; j++) {
608 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
609 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
610 src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
611 }
612 }
613 }
614 else if (src[i].Index == VERT_ATTRIB_FOG) {
615 int j;
616 for (j = 0; j < 4; j++) {
617 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
618 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
619 src[i].Swizzle |= SWIZZLE_ONE << (j*3);
620 }
621 else if ((GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Y) ||
622 GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Z) {
623 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
624 src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
625 }
626 }
627 }
628 }
629 }
630
631 if(operands == 3){
632 if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
633 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
634 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
635 VSF_FLAG_ALL);
636
637 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
638 SWIZZLE_X, SWIZZLE_Y,
639 SWIZZLE_Z, SWIZZLE_W,
640 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4);
641
642 o_inst->src1 = ZERO_SRC_0;
643 o_inst->src2 = UNUSED_SRC_1;
644 o_inst++;
645
646 src[2].File = PROGRAM_TEMPORARY;
647 src[2].Index = u_temp_i;
648 src[2].RelAddr = 0;
649 u_temp_i--;
650 }
651 }
652
653 if(operands >= 2){
654 if( CMP_SRCS(src[1], src[0]) ){
655 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
656 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
657 VSF_FLAG_ALL);
658
659 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
660 SWIZZLE_X, SWIZZLE_Y,
661 SWIZZLE_Z, SWIZZLE_W,
662 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4);
663
664 o_inst->src1 = ZERO_SRC_0;
665 o_inst->src2 = UNUSED_SRC_1;
666 o_inst++;
667
668 src[0].File = PROGRAM_TEMPORARY;
669 src[0].Index = u_temp_i;
670 src[0].RelAddr = 0;
671 u_temp_i--;
672 }
673 }
674
675 dst = vpi->DstReg;
676 if (dst.File == PROGRAM_OUTPUT &&
677 dst.Index == VERT_RESULT_FOGC &&
678 dst.WriteMask & WRITEMASK_X) {
679 fog_temp_i = u_temp_i;
680 dst.File = PROGRAM_TEMPORARY;
681 dst.Index = fog_temp_i;
682 dofogfix = 1;
683 u_temp_i--;
684 }
685
686 /* These ops need special handling. */
687 switch(vpi->Opcode){
688 case OPCODE_POW:
689 /* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter).
690 So may need to insert additional instruction */
691 if ((src[0].File == src[1].File) &&
692 (src[0].Index == src[1].Index)) {
693 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
694 t_dst_mask(dst.WriteMask));
695 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
696 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
697 SWIZZLE_ZERO,
698 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
699 SWIZZLE_ZERO,
700 t_src_class(src[0].File),
701 src[0].NegateBase) | (src[0].RelAddr << 4);
702 o_inst->src1 = UNUSED_SRC_0;
703 o_inst->src2 = UNUSED_SRC_0;
704 }
705 else {
706 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
707 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
708 VSF_FLAG_ALL);
709 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
710 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
711 SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO,
712 t_src_class(src[0].File),
713 src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
714 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
715 SWIZZLE_ZERO, SWIZZLE_ZERO,
716 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO,
717 t_src_class(src[1].File),
718 src[1].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
719 o_inst->src2 = UNUSED_SRC_1;
720 o_inst++;
721
722 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
723 t_dst_mask(dst.WriteMask));
724 o_inst->src0 = MAKE_VSF_SOURCE(u_temp_i,
725 VSF_IN_COMPONENT_X,
726 VSF_IN_COMPONENT_Y,
727 VSF_IN_COMPONENT_Z,
728 VSF_IN_COMPONENT_W,
729 VSF_IN_CLASS_TMP,
730 VSF_FLAG_NONE);
731 o_inst->src1 = UNUSED_SRC_0;
732 o_inst->src2 = UNUSED_SRC_0;
733 u_temp_i--;
734 }
735 goto next;
736
737 case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
738 case OPCODE_SWZ:
739 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
740 t_dst_mask(dst.WriteMask));
741 o_inst->src0 = t_src(vp, &src[0]);
742 o_inst->src1 = ZERO_SRC_0;
743 o_inst->src2 = UNUSED_SRC_1;
744 goto next;
745
746 case OPCODE_MAD:
747 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
748 src[1].File == PROGRAM_TEMPORARY &&
749 src[2].File == PROGRAM_TEMPORARY) ? R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
750
751 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
752 t_dst_mask(dst.WriteMask));
753 o_inst->src0 = t_src(vp, &src[0]);
754 #if 0
755 if ((o_inst - vp->instr) == 31) {
756 /* fix up the broken vertex program of quake4 demo... */
757 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
758 SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X,
759 t_src_class(src[1].File),
760 src[1].NegateBase) | (src[1].RelAddr << 4);
761 o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
762 SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y,
763 t_src_class(src[1].File),
764 src[1].NegateBase) | (src[1].RelAddr << 4);
765 }
766 else {
767 o_inst->src1 = t_src(vp, &src[1]);
768 o_inst->src2 = t_src(vp, &src[2]);
769 }
770 #else
771 o_inst->src1 = t_src(vp, &src[1]);
772 o_inst->src2 = t_src(vp, &src[2]);
773 #endif
774 goto next;
775
776 case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
777 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
778 t_dst_mask(dst.WriteMask));
779
780 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
781 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
782 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
783 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
784 SWIZZLE_ZERO,
785 t_src_class(src[0].File),
786 src[0].NegateBase) | (src[0].RelAddr << 4);
787
788 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
789 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
790 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
791 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
792 SWIZZLE_ZERO,
793 t_src_class(src[1].File),
794 src[1].NegateBase) | (src[1].RelAddr << 4);
795
796 o_inst->src2 = UNUSED_SRC_1;
797 goto next;
798
799 case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
800 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
801 t_dst_mask(dst.WriteMask));
802
803 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
804 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
805 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
806 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
807 VSF_IN_COMPONENT_ONE,
808 t_src_class(src[0].File),
809 src[0].NegateBase) | (src[0].RelAddr << 4);
810 o_inst->src1 = t_src(vp, &src[1]);
811 o_inst->src2 = UNUSED_SRC_1;
812 goto next;
813
814 case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
815 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
816 t_dst_mask(dst.WriteMask));
817
818 o_inst->src0 = t_src(vp, &src[0]);
819 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
820 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
821 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
822 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
823 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
824 t_src_class(src[1].File),
825 (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
826 o_inst->src2 = UNUSED_SRC_1;
827 goto next;
828
829 case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
830 o_inst->op=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX, t_dst(&dst),
831 t_dst_mask(dst.WriteMask));
832
833 o_inst->src0=t_src(vp, &src[0]);
834 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
835 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
836 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
837 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
838 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
839 t_src_class(src[0].File),
840 (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
841 o_inst->src2 = UNUSED_SRC_1;
842 goto next;
843
844 case OPCODE_FLR:
845 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
846 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
847
848 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_FRC,
849 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
850 t_dst_mask(dst.WriteMask));
851
852 o_inst->src0 = t_src(vp, &src[0]);
853 o_inst->src1 = UNUSED_SRC_0;
854 o_inst->src2 = UNUSED_SRC_1;
855 o_inst++;
856
857 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
858 t_dst_mask(dst.WriteMask));
859
860 o_inst->src0 = t_src(vp, &src[0]);
861 o_inst->src1 = MAKE_VSF_SOURCE(u_temp_i,
862 VSF_IN_COMPONENT_X,
863 VSF_IN_COMPONENT_Y,
864 VSF_IN_COMPONENT_Z,
865 VSF_IN_COMPONENT_W,
866 VSF_IN_CLASS_TMP,
867 /* Not 100% sure about this */
868 (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
869
870 o_inst->src2 = UNUSED_SRC_0;
871 u_temp_i--;
872 goto next;
873
874 case OPCODE_XPD:
875 /* mul r0, r1.yzxw, r2.zxyw
876 mad r0, -r2.yzxw, r1.zxyw, r0
877 NOTE: might need MAD_2
878 */
879
880 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
881 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
882 t_dst_mask(dst.WriteMask));
883
884 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
885 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
886 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
887 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
888 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
889 t_src_class(src[0].File),
890 src[0].NegateBase) | (src[0].RelAddr << 4);
891
892 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
893 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
894 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
895 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
896 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
897 t_src_class(src[1].File),
898 src[1].NegateBase) | (src[1].RelAddr << 4);
899
900 o_inst->src2 = UNUSED_SRC_1;
901 o_inst++;
902 u_temp_i--;
903
904 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MAD, t_dst(&dst),
905 t_dst_mask(dst.WriteMask));
906
907 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
908 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
909 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
910 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
911 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
912 t_src_class(src[1].File),
913 (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
914
915 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
916 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
917 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
918 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
919 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
920 t_src_class(src[0].File),
921 src[0].NegateBase) | (src[0].RelAddr << 4);
922
923 o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1,
924 VSF_IN_COMPONENT_X,
925 VSF_IN_COMPONENT_Y,
926 VSF_IN_COMPONENT_Z,
927 VSF_IN_COMPONENT_W,
928 VSF_IN_CLASS_TMP,
929 VSF_FLAG_NONE);
930 goto next;
931
932 case OPCODE_END:
933 assert(0);
934 default:
935 break;
936 }
937
938 o_inst->op = MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst(&dst),
939 t_dst_mask(dst.WriteMask));
940
941 if(are_srcs_scalar){
942 switch(operands){
943 case 1:
944 o_inst->src0 = t_src_scalar(vp, &src[0]);
945 o_inst->src1 = UNUSED_SRC_0;
946 o_inst->src2 = UNUSED_SRC_1;
947 break;
948
949 case 2:
950 o_inst->src0 = t_src_scalar(vp, &src[0]);
951 o_inst->src1 = t_src_scalar(vp, &src[1]);
952 o_inst->src2 = UNUSED_SRC_1;
953 break;
954
955 case 3:
956 o_inst->src0 = t_src_scalar(vp, &src[0]);
957 o_inst->src1 = t_src_scalar(vp, &src[1]);
958 o_inst->src2 = t_src_scalar(vp, &src[2]);
959 break;
960
961 default:
962 fprintf(stderr, "illegal number of operands %lu\n", operands);
963 exit(-1);
964 break;
965 }
966 } else {
967 switch(operands){
968 case 1:
969 o_inst->src0 = t_src(vp, &src[0]);
970 o_inst->src1 = UNUSED_SRC_0;
971 o_inst->src2 = UNUSED_SRC_1;
972 break;
973
974 case 2:
975 o_inst->src0 = t_src(vp, &src[0]);
976 o_inst->src1 = t_src(vp, &src[1]);
977 o_inst->src2 = UNUSED_SRC_1;
978 break;
979
980 case 3:
981 o_inst->src0 = t_src(vp, &src[0]);
982 o_inst->src1 = t_src(vp, &src[1]);
983 o_inst->src2 = t_src(vp, &src[2]);
984 break;
985
986 default:
987 fprintf(stderr, "illegal number of operands %lu\n", operands);
988 exit(-1);
989 break;
990 }
991 }
992 next:
993
994 if (dofogfix) {
995 o_inst++;
996 if (vp->fogmode == GL_EXP) {
997 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
998 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
999 VSF_FLAG_X);
1000 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1001 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
1002 o_inst->src2 = UNUSED_SRC_1;
1003 o_inst++;
1004 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
1005 R200_VSF_OUT_CLASS_RESULT_FOGC,
1006 VSF_FLAG_X);
1007 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1008 o_inst->src1 = UNUSED_SRC_0;
1009 o_inst->src2 = UNUSED_SRC_1;
1010 }
1011 else if (vp->fogmode == GL_EXP2) {
1012 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1013 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1014 VSF_FLAG_X);
1015 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1016 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
1017 o_inst->src2 = UNUSED_SRC_1;
1018 o_inst++;
1019 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1020 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1021 VSF_FLAG_X);
1022 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1023 o_inst->src1 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1024 o_inst->src2 = UNUSED_SRC_1;
1025 o_inst++;
1026 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
1027 R200_VSF_OUT_CLASS_RESULT_FOGC,
1028 VSF_FLAG_X);
1029 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1030 o_inst->src1 = UNUSED_SRC_0;
1031 o_inst->src2 = UNUSED_SRC_1;
1032 }
1033 else { /* fogmode == GL_LINEAR */
1034 /* could do that with single op (dot) if using params like
1035 with fixed function pipeline fog */
1036 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
1037 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1038 VSF_FLAG_X);
1039 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1040 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, Z, Z, Z, Z, PARAM, NONE);
1041 o_inst->src2 = UNUSED_SRC_1;
1042 o_inst++;
1043 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1044 R200_VSF_OUT_CLASS_RESULT_FOGC,
1045 VSF_FLAG_X);
1046 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1047 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, W, W, W, W, PARAM, NONE);
1048 o_inst->src2 = UNUSED_SRC_1;
1049
1050 }
1051 dofogfix = 0;
1052 }
1053
1054 if (mesa_vp->Base.NumNativeTemporaries <
1055 (mesa_vp->Base.NumTemporaries + (R200_VSF_MAX_TEMPS - 1 - u_temp_i))) {
1056 mesa_vp->Base.NumNativeTemporaries =
1057 mesa_vp->Base.NumTemporaries + (R200_VSF_MAX_TEMPS - 1 - u_temp_i);
1058 }
1059 if (u_temp_i < mesa_vp->Base.NumTemporaries) {
1060 if (R200_DEBUG & DEBUG_FALLBACKS) {
1061 fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_i);
1062 }
1063 return GL_FALSE;
1064 }
1065 u_temp_i = R200_VSF_MAX_TEMPS - 1;
1066 if(o_inst - vp->instr >= R200_VSF_MAX_INST) {
1067 mesa_vp->Base.NumNativeInstructions = 129;
1068 if (R200_DEBUG & DEBUG_FALLBACKS) {
1069 fprintf(stderr, "more than 128 native instructions\n");
1070 }
1071 return GL_FALSE;
1072 }
1073 if ((o_inst->op & R200_VSF_OUT_CLASS_MASK) == R200_VSF_OUT_CLASS_RESULT_POS) {
1074 vp->pos_end = (o_inst - vp->instr);
1075 }
1076 }
1077
1078 vp->native = GL_TRUE;
1079 mesa_vp->Base.NumNativeInstructions = (o_inst - vp->instr);
1080 #if 0
1081 fprintf(stderr, "hw program:\n");
1082 for(i=0; i < vp->program.length; i++)
1083 fprintf(stderr, "%08x\n", vp->instr[i]);
1084 #endif
1085 return GL_TRUE;
1086 }
1087
1088 void r200SetupVertexProg( GLcontext *ctx ) {
1089 r200ContextPtr rmesa = R200_CONTEXT(ctx);
1090 struct r200_vertex_program *vp = (struct r200_vertex_program *)ctx->VertexProgram.Current;
1091 GLboolean fallback;
1092 GLint i;
1093
1094 if (!vp->translated || (ctx->Fog.Enabled && ctx->Fog.Mode != vp->fogmode)) {
1095 rmesa->curr_vp_hw = NULL;
1096 r200_translate_vertex_program(ctx, vp);
1097 }
1098 /* could optimize setting up vertex progs away for non-tcl hw */
1099 fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp) &&
1100 rmesa->r200Screen->drmSupportsVertexProgram);
1101 TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback);
1102 if (rmesa->TclFallback) return;
1103
1104 R200_STATECHANGE( rmesa, vap );
1105 /* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it?
1106 maybe only when using more than 64 inst / 96 param? */
1107 rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_PROG_VTX_SHADER_ENABLE /*| R200_VAP_SINGLE_BUF_STATE_ENABLE*/;
1108
1109 R200_STATECHANGE( rmesa, pvs );
1110
1111 rmesa->hw.pvs.cmd[PVS_CNTL_1] = (0 << R200_PVS_CNTL_1_PROGRAM_START_SHIFT) |
1112 ((vp->mesa_program.Base.NumNativeInstructions - 1) << R200_PVS_CNTL_1_PROGRAM_END_SHIFT) |
1113 (vp->pos_end << R200_PVS_CNTL_1_POS_END_SHIFT);
1114 rmesa->hw.pvs.cmd[PVS_CNTL_2] = (0 << R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT) |
1115 (vp->mesa_program.Base.NumNativeParameters << R200_PVS_CNTL_2_PARAM_COUNT_SHIFT);
1116
1117 /* maybe user clip planes just work with vertex progs... untested */
1118 if (ctx->Transform.ClipPlanesEnabled) {
1119 R200_STATECHANGE( rmesa, tcl );
1120 if (vp->mesa_program.IsPositionInvariant) {
1121 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (ctx->Transform.ClipPlanesEnabled << 2);
1122 }
1123 else {
1124 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(0xfc);
1125 }
1126 }
1127
1128 if (vp != rmesa->curr_vp_hw) {
1129 GLuint count = vp->mesa_program.Base.NumNativeInstructions;
1130 drm_radeon_cmd_header_t tmp;
1131
1132 R200_STATECHANGE( rmesa, vpi[0] );
1133 R200_STATECHANGE( rmesa, vpi[1] );
1134
1135 /* FIXME: what about using a memcopy... */
1136 for (i = 0; (i < 64) && i < count; i++) {
1137 rmesa->hw.vpi[0].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i].op;
1138 rmesa->hw.vpi[0].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i].src0;
1139 rmesa->hw.vpi[0].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i].src1;
1140 rmesa->hw.vpi[0].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i].src2;
1141 }
1142 /* hack up the cmd_size so not the whole state atom is emitted always.
1143 This may require some more thought, we may emit half progs on lost state, but
1144 hopefully it won't matter?
1145 WARNING: must not use R200_DB_STATECHANGE, this will produce bogus (and rejected)
1146 packet emits (due to the mismatched cmd_size and count in cmd/last_cmd) */
1147 rmesa->hw.vpi[0].cmd_size = 1 + 4 * ((count > 64) ? 64 : count);
1148 tmp.i = rmesa->hw.vpi[0].cmd[VPI_CMD_0];
1149 tmp.veclinear.count = (count > 64) ? 64 : count;
1150 rmesa->hw.vpi[0].cmd[VPI_CMD_0] = tmp.i;
1151 if (count > 64) {
1152 for (i = 0; i < (count - 64); i++) {
1153 rmesa->hw.vpi[1].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i + 64].op;
1154 rmesa->hw.vpi[1].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i + 64].src0;
1155 rmesa->hw.vpi[1].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i + 64].src1;
1156 rmesa->hw.vpi[1].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i + 64].src2;
1157 }
1158 rmesa->hw.vpi[1].cmd_size = 1 + 4 * (count - 64);
1159 tmp.i = rmesa->hw.vpi[1].cmd[VPI_CMD_0];
1160 tmp.veclinear.count = count - 64;
1161 rmesa->hw.vpi[1].cmd[VPI_CMD_0] = tmp.i;
1162 }
1163 rmesa->curr_vp_hw = vp;
1164 }
1165 }
1166
1167
1168 static void
1169 r200BindProgram(GLcontext *ctx, GLenum target, struct gl_program *prog)
1170 {
1171 r200ContextPtr rmesa = R200_CONTEXT(ctx);
1172
1173 switch(target){
1174 case GL_VERTEX_PROGRAM_ARB:
1175 rmesa->curr_vp_hw = NULL;
1176 break;
1177 default:
1178 _mesa_problem(ctx, "Target not supported yet!");
1179 break;
1180 }
1181 }
1182
1183 static struct gl_program *
1184 r200NewProgram(GLcontext *ctx, GLenum target, GLuint id)
1185 {
1186 struct r200_vertex_program *vp;
1187
1188 switch(target){
1189 case GL_VERTEX_PROGRAM_ARB:
1190 vp = CALLOC_STRUCT(r200_vertex_program);
1191 return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id);
1192 case GL_FRAGMENT_PROGRAM_ARB:
1193 case GL_FRAGMENT_PROGRAM_NV:
1194 return _mesa_init_fragment_program( ctx, CALLOC_STRUCT(gl_fragment_program), target, id );
1195 default:
1196 _mesa_problem(ctx, "Bad target in r200NewProgram");
1197 }
1198 return NULL;
1199 }
1200
1201
1202 static void
1203 r200DeleteProgram(GLcontext *ctx, struct gl_program *prog)
1204 {
1205 _mesa_delete_program(ctx, prog);
1206 }
1207
1208 static void
1209 r200ProgramStringNotify(GLcontext *ctx, GLenum target, struct gl_program *prog)
1210 {
1211 struct r200_vertex_program *vp = (void *)prog;
1212 r200ContextPtr rmesa = R200_CONTEXT(ctx);
1213
1214 switch(target) {
1215 case GL_VERTEX_PROGRAM_ARB:
1216 vp->translated = GL_FALSE;
1217 vp->fogpidx = 0;
1218 /* memset(&vp->translated, 0, sizeof(struct r200_vertex_program) - sizeof(struct gl_vertex_program));*/
1219 r200_translate_vertex_program(ctx, vp);
1220 rmesa->curr_vp_hw = NULL;
1221 break;
1222 case GL_FRAGMENT_SHADER_ATI:
1223 rmesa->afs_loaded = NULL;
1224 break;
1225 }
1226 /* need this for tcl fallbacks */
1227 _tnl_program_string(ctx, target, prog);
1228 }
1229
1230 static GLboolean
1231 r200IsProgramNative(GLcontext *ctx, GLenum target, struct gl_program *prog)
1232 {
1233 struct r200_vertex_program *vp = (void *)prog;
1234
1235 switch(target){
1236 case GL_VERTEX_STATE_PROGRAM_NV:
1237 case GL_VERTEX_PROGRAM_ARB:
1238 if (!vp->translated) {
1239 r200_translate_vertex_program(ctx, vp);
1240 }
1241 /* does not take parameters etc. into account */
1242 return vp->native;
1243 default:
1244 _mesa_problem(ctx, "Bad target in r200NewProgram");
1245 }
1246 return 0;
1247 }
1248
1249 void r200InitShaderFuncs(struct dd_function_table *functions)
1250 {
1251 functions->NewProgram = r200NewProgram;
1252 functions->BindProgram = r200BindProgram;
1253 functions->DeleteProgram = r200DeleteProgram;
1254 functions->ProgramStringNotify = r200ProgramStringNotify;
1255 functions->IsProgramNative = r200IsProgramNative;
1256 }