Replace gl_vert_result enum with gl_varying_slot.
[mesa.git] / src / mesa / drivers / dri / r200 / r200_vertprog.c
1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Aapo Tahkola <aet@rasterburn.org>
31 * Roland Scheidegger <rscheidegger_lists@hispeed.ch>
32 */
33 #include "main/glheader.h"
34 #include "main/macros.h"
35 #include "main/enums.h"
36 #include "program/program.h"
37 #include "program/prog_instruction.h"
38 #include "program/prog_parameter.h"
39 #include "program/prog_statevars.h"
40 #include "program/programopt.h"
41 #include "tnl/tnl.h"
42
43 #include "r200_context.h"
44 #include "r200_vertprog.h"
45 #include "r200_ioctl.h"
46 #include "r200_tcl.h"
47
48 #if SWIZZLE_X != VSF_IN_COMPONENT_X || \
49 SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
50 SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
51 SWIZZLE_W != VSF_IN_COMPONENT_W || \
52 SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
53 SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
54 WRITEMASK_X != VSF_FLAG_X || \
55 WRITEMASK_Y != VSF_FLAG_Y || \
56 WRITEMASK_Z != VSF_FLAG_Z || \
57 WRITEMASK_W != VSF_FLAG_W
58 #error Cannot change these!
59 #endif
60
61 #define SCALAR_FLAG (1<<31)
62 #define FLAG_MASK (1<<31)
63 #define OP_MASK (0xf) /* we are unlikely to have more than 15 */
64 #define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
65
66 static struct{
67 char *name;
68 int opcode;
69 unsigned long ip; /* number of input operands and flags */
70 }op_names[]={
71 OPN(ABS, 1),
72 OPN(ADD, 2),
73 OPN(ARL, 1|SCALAR_FLAG),
74 OPN(DP3, 2),
75 OPN(DP4, 2),
76 OPN(DPH, 2),
77 OPN(DST, 2),
78 OPN(EX2, 1|SCALAR_FLAG),
79 OPN(EXP, 1|SCALAR_FLAG),
80 OPN(FLR, 1),
81 OPN(FRC, 1),
82 OPN(LG2, 1|SCALAR_FLAG),
83 OPN(LIT, 1),
84 OPN(LOG, 1|SCALAR_FLAG),
85 OPN(MAD, 3),
86 OPN(MAX, 2),
87 OPN(MIN, 2),
88 OPN(MOV, 1),
89 OPN(MUL, 2),
90 OPN(POW, 2|SCALAR_FLAG),
91 OPN(RCP, 1|SCALAR_FLAG),
92 OPN(RSQ, 1|SCALAR_FLAG),
93 OPN(SGE, 2),
94 OPN(SLT, 2),
95 OPN(SUB, 2),
96 OPN(SWZ, 1),
97 OPN(XPD, 2),
98 OPN(PRINT, 0),
99 OPN(END, 0),
100 };
101 #undef OPN
102
103 static GLboolean r200VertexProgUpdateParams(struct gl_context *ctx, struct r200_vertex_program *vp)
104 {
105 r200ContextPtr rmesa = R200_CONTEXT( ctx );
106 GLfloat *fcmd = (GLfloat *)&rmesa->hw.vpp[0].cmd[VPP_CMD_0 + 1];
107 int pi;
108 struct gl_vertex_program *mesa_vp = &vp->mesa_program;
109 struct gl_program_parameter_list *paramList;
110 drm_radeon_cmd_header_t tmp;
111
112 R200_STATECHANGE( rmesa, vpp[0] );
113 R200_STATECHANGE( rmesa, vpp[1] );
114 assert(mesa_vp->Base.Parameters);
115 _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
116 paramList = mesa_vp->Base.Parameters;
117
118 if(paramList->NumParameters > R200_VSF_MAX_PARAM){
119 fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
120 return GL_FALSE;
121 }
122
123 for(pi = 0; pi < paramList->NumParameters; pi++) {
124 switch(paramList->Parameters[pi].Type) {
125 case PROGRAM_STATE_VAR:
126 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
127 case PROGRAM_CONSTANT:
128 *fcmd++ = paramList->ParameterValues[pi][0].f;
129 *fcmd++ = paramList->ParameterValues[pi][1].f;
130 *fcmd++ = paramList->ParameterValues[pi][2].f;
131 *fcmd++ = paramList->ParameterValues[pi][3].f;
132 break;
133 default:
134 _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__);
135 break;
136 }
137 if (pi == 95) {
138 fcmd = (GLfloat *)&rmesa->hw.vpp[1].cmd[VPP_CMD_0 + 1];
139 }
140 }
141 /* hack up the cmd_size so not the whole state atom is emitted always. */
142 rmesa->hw.vpp[0].cmd_size =
143 1 + 4 * ((paramList->NumParameters > 96) ? 96 : paramList->NumParameters);
144 tmp.i = rmesa->hw.vpp[0].cmd[VPP_CMD_0];
145 tmp.veclinear.count = (paramList->NumParameters > 96) ? 96 : paramList->NumParameters;
146 rmesa->hw.vpp[0].cmd[VPP_CMD_0] = tmp.i;
147 if (paramList->NumParameters > 96) {
148 rmesa->hw.vpp[1].cmd_size = 1 + 4 * (paramList->NumParameters - 96);
149 tmp.i = rmesa->hw.vpp[1].cmd[VPP_CMD_0];
150 tmp.veclinear.count = paramList->NumParameters - 96;
151 rmesa->hw.vpp[1].cmd[VPP_CMD_0] = tmp.i;
152 }
153 return GL_TRUE;
154 }
155
156 static INLINE unsigned long t_dst_mask(GLuint mask)
157 {
158 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
159 return mask & VSF_FLAG_ALL;
160 }
161
162 static unsigned long t_dst(struct prog_dst_register *dst)
163 {
164 switch(dst->File) {
165 case PROGRAM_TEMPORARY:
166 return ((dst->Index << R200_VPI_OUT_REG_INDEX_SHIFT)
167 | R200_VSF_OUT_CLASS_TMP);
168 case PROGRAM_OUTPUT:
169 switch (dst->Index) {
170 case VARYING_SLOT_POS:
171 return R200_VSF_OUT_CLASS_RESULT_POS;
172 case VARYING_SLOT_COL0:
173 return R200_VSF_OUT_CLASS_RESULT_COLOR;
174 case VARYING_SLOT_COL1:
175 return ((1 << R200_VPI_OUT_REG_INDEX_SHIFT)
176 | R200_VSF_OUT_CLASS_RESULT_COLOR);
177 case VARYING_SLOT_FOGC:
178 return R200_VSF_OUT_CLASS_RESULT_FOGC;
179 case VARYING_SLOT_TEX0:
180 case VARYING_SLOT_TEX1:
181 case VARYING_SLOT_TEX2:
182 case VARYING_SLOT_TEX3:
183 case VARYING_SLOT_TEX4:
184 case VARYING_SLOT_TEX5:
185 return (((dst->Index - VARYING_SLOT_TEX0) << R200_VPI_OUT_REG_INDEX_SHIFT)
186 | R200_VSF_OUT_CLASS_RESULT_TEXC);
187 case VARYING_SLOT_PSIZ:
188 return R200_VSF_OUT_CLASS_RESULT_POINTSIZE;
189 default:
190 fprintf(stderr, "problem in %s, unknown dst output reg %d\n", __FUNCTION__, dst->Index);
191 exit(0);
192 return 0;
193 }
194 case PROGRAM_ADDRESS:
195 assert (dst->Index == 0);
196 return R200_VSF_OUT_CLASS_ADDR;
197 default:
198 fprintf(stderr, "problem in %s, unknown register type %d\n", __FUNCTION__, dst->File);
199 exit(0);
200 return 0;
201 }
202 }
203
204 static unsigned long t_src_class(gl_register_file file)
205 {
206
207 switch(file){
208 case PROGRAM_TEMPORARY:
209 return VSF_IN_CLASS_TMP;
210
211 case PROGRAM_INPUT:
212 return VSF_IN_CLASS_ATTR;
213
214 case PROGRAM_LOCAL_PARAM:
215 case PROGRAM_ENV_PARAM:
216 case PROGRAM_CONSTANT:
217 case PROGRAM_STATE_VAR:
218 return VSF_IN_CLASS_PARAM;
219 /*
220 case PROGRAM_OUTPUT:
221 case PROGRAM_ADDRESS:
222 */
223 default:
224 fprintf(stderr, "problem in %s", __FUNCTION__);
225 exit(0);
226 }
227 }
228
229 static INLINE unsigned long t_swizzle(GLubyte swizzle)
230 {
231 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
232 return swizzle;
233 }
234
235 #if 0
236 static void vp_dump_inputs(struct r200_vertex_program *vp, char *caller)
237 {
238 int i;
239
240 if(vp == NULL){
241 fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, caller);
242 return ;
243 }
244
245 fprintf(stderr, "%s:<", caller);
246 for(i=0; i < VERT_ATTRIB_MAX; i++)
247 fprintf(stderr, "%d ", vp->inputs[i]);
248 fprintf(stderr, ">\n");
249
250 }
251 #endif
252
253 static unsigned long t_src_index(struct r200_vertex_program *vp, struct prog_src_register *src)
254 {
255 /*
256 int i;
257 int max_reg = -1;
258 */
259 if(src->File == PROGRAM_INPUT){
260 /* if(vp->inputs[src->Index] != -1)
261 return vp->inputs[src->Index];
262
263 for(i=0; i < VERT_ATTRIB_MAX; i++)
264 if(vp->inputs[i] > max_reg)
265 max_reg = vp->inputs[i];
266
267 vp->inputs[src->Index] = max_reg+1;*/
268
269 //vp_dump_inputs(vp, __FUNCTION__);
270 assert(vp->inputs[src->Index] != -1);
271 return vp->inputs[src->Index];
272 } else {
273 if (src->Index < 0) {
274 fprintf(stderr, "WARNING negative offsets for indirect addressing do not work\n");
275 return 0;
276 }
277 return src->Index;
278 }
279 }
280
281 static unsigned long t_src(struct r200_vertex_program *vp, struct prog_src_register *src)
282 {
283
284 return MAKE_VSF_SOURCE(t_src_index(vp, src),
285 t_swizzle(GET_SWZ(src->Swizzle, 0)),
286 t_swizzle(GET_SWZ(src->Swizzle, 1)),
287 t_swizzle(GET_SWZ(src->Swizzle, 2)),
288 t_swizzle(GET_SWZ(src->Swizzle, 3)),
289 t_src_class(src->File),
290 src->Negate) | (src->RelAddr << 4);
291 }
292
293 static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_src_register *src)
294 {
295
296 return MAKE_VSF_SOURCE(t_src_index(vp, src),
297 t_swizzle(GET_SWZ(src->Swizzle, 0)),
298 t_swizzle(GET_SWZ(src->Swizzle, 0)),
299 t_swizzle(GET_SWZ(src->Swizzle, 0)),
300 t_swizzle(GET_SWZ(src->Swizzle, 0)),
301 t_src_class(src->File),
302 src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4);
303 }
304
305 static unsigned long t_opcode(enum prog_opcode opcode)
306 {
307
308 switch(opcode){
309 case OPCODE_ADD: return R200_VPI_OUT_OP_ADD;
310 /* FIXME: ARL works fine, but negative offsets won't work - fglrx just
311 * seems to ignore neg offsets which isn't quite correct...
312 */
313 case OPCODE_ARL: return R200_VPI_OUT_OP_ARL;
314 case OPCODE_DP4: return R200_VPI_OUT_OP_DOT;
315 case OPCODE_DST: return R200_VPI_OUT_OP_DST;
316 case OPCODE_EX2: return R200_VPI_OUT_OP_EX2;
317 case OPCODE_EXP: return R200_VPI_OUT_OP_EXP;
318 case OPCODE_FRC: return R200_VPI_OUT_OP_FRC;
319 case OPCODE_LG2: return R200_VPI_OUT_OP_LG2;
320 case OPCODE_LIT: return R200_VPI_OUT_OP_LIT;
321 case OPCODE_LOG: return R200_VPI_OUT_OP_LOG;
322 case OPCODE_MAX: return R200_VPI_OUT_OP_MAX;
323 case OPCODE_MIN: return R200_VPI_OUT_OP_MIN;
324 case OPCODE_MUL: return R200_VPI_OUT_OP_MUL;
325 case OPCODE_RCP: return R200_VPI_OUT_OP_RCP;
326 case OPCODE_RSQ: return R200_VPI_OUT_OP_RSQ;
327 case OPCODE_SGE: return R200_VPI_OUT_OP_SGE;
328 case OPCODE_SLT: return R200_VPI_OUT_OP_SLT;
329
330 default:
331 fprintf(stderr, "%s: Should not be called with opcode %d!", __FUNCTION__, opcode);
332 }
333 exit(-1);
334 return 0;
335 }
336
337 static unsigned long op_operands(enum prog_opcode opcode)
338 {
339 int i;
340
341 /* Can we trust mesas opcodes to be in order ? */
342 for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++)
343 if(op_names[i].opcode == opcode)
344 return op_names[i].ip;
345
346 fprintf(stderr, "op %d not found in op_names\n", opcode);
347 exit(-1);
348 return 0;
349 }
350
351 /* TODO: Get rid of t_src_class call */
352 #define CMP_SRCS(a, b) (((a.RelAddr != b.RelAddr) || (a.Index != b.Index)) && \
353 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
354 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
355 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
356 t_src_class(b.File) == VSF_IN_CLASS_ATTR))) \
357
358 /* fglrx on rv250 codes up unused sources as follows:
359 unused but necessary sources are same as previous source, zero-ed out.
360 unnecessary sources are same as previous source but with VSF_IN_CLASS_NONE set.
361 i.e. an add (2 args) has its 2nd arg (if you use it as mov) zero-ed out, and 3rd arg
362 set to VSF_IN_CLASS_NONE. Not sure if strictly necessary. */
363
364 /* use these simpler definitions. Must obviously not be used with not yet set up regs.
365 Those are NOT semantically equivalent to the r300 ones, requires code changes */
366 #define ZERO_SRC_0 (((o_inst->src0 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
367 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
368 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
369 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
370 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
371
372 #define ZERO_SRC_1 (((o_inst->src1 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
373 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
374 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
375 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
376 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
377
378 #define ZERO_SRC_2 (((o_inst->src2 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
379 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
380 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
381 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
382 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
383
384 #define UNUSED_SRC_0 ((o_inst->src0 & ~15) | 9)
385
386 #define UNUSED_SRC_1 ((o_inst->src1 & ~15) | 9)
387
388 #define UNUSED_SRC_2 ((o_inst->src2 & ~15) | 9)
389
390
391 /**
392 * Generate an R200 vertex program from Mesa's internal representation.
393 *
394 * \return GL_TRUE for success, GL_FALSE for failure.
395 */
396 static GLboolean r200_translate_vertex_program(struct gl_context *ctx, struct r200_vertex_program *vp)
397 {
398 struct gl_vertex_program *mesa_vp = &vp->mesa_program;
399 struct prog_instruction *vpi;
400 int i;
401 VERTEX_SHADER_INSTRUCTION *o_inst;
402 unsigned long operands;
403 int are_srcs_scalar;
404 unsigned long hw_op;
405 int dofogfix = 0;
406 int fog_temp_i = 0;
407 int free_inputs;
408 int array_count = 0;
409 int u_temp_used;
410
411 vp->native = GL_FALSE;
412 vp->translated = GL_TRUE;
413 vp->fogmode = ctx->Fog.Mode;
414
415 if (mesa_vp->Base.NumInstructions == 0)
416 return GL_FALSE;
417
418 #if 0
419 if ((mesa_vp->Base.InputsRead &
420 ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 |
421 VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 |
422 VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) != 0) {
423 if (R200_DEBUG & RADEON_FALLBACKS) {
424 fprintf(stderr, "can't handle vert prog inputs 0x%x\n",
425 mesa_vp->Base.InputsRead);
426 }
427 return GL_FALSE;
428 }
429 #endif
430
431 if ((mesa_vp->Base.OutputsWritten &
432 ~((1 << VARYING_SLOT_POS) | (1 << VARYING_SLOT_COL0) | (1 << VARYING_SLOT_COL1) |
433 (1 << VARYING_SLOT_FOGC) | (1 << VARYING_SLOT_TEX0) | (1 << VARYING_SLOT_TEX1) |
434 (1 << VARYING_SLOT_TEX2) | (1 << VARYING_SLOT_TEX3) | (1 << VARYING_SLOT_TEX4) |
435 (1 << VARYING_SLOT_TEX5) | (1 << VARYING_SLOT_PSIZ))) != 0) {
436 if (R200_DEBUG & RADEON_FALLBACKS) {
437 fprintf(stderr, "can't handle vert prog outputs 0x%llx\n",
438 (unsigned long long) mesa_vp->Base.OutputsWritten);
439 }
440 return GL_FALSE;
441 }
442
443 /* Initial value should be last tmp reg that hw supports.
444 Strangely enough r300 doesnt mind even though these would be out of range.
445 Smart enough to realize that it doesnt need it? */
446 int u_temp_i = R200_VSF_MAX_TEMPS - 1;
447 struct prog_src_register src[3];
448 struct prog_dst_register dst;
449
450 /* FIXME: is changing the prog safe to do here? */
451 if (mesa_vp->IsPositionInvariant &&
452 /* make sure we only do this once */
453 !(mesa_vp->Base.OutputsWritten & (1 << VARYING_SLOT_POS))) {
454 _mesa_insert_mvp_code(ctx, mesa_vp);
455 }
456
457 /* for fogc, can't change mesa_vp, as it would hose swtnl, and exp with
458 base e isn't directly available neither. */
459 if ((mesa_vp->Base.OutputsWritten & (1 << VARYING_SLOT_FOGC)) && !vp->fogpidx) {
460 struct gl_program_parameter_list *paramList;
461 gl_state_index tokens[STATE_LENGTH] = { STATE_FOG_PARAMS, 0, 0, 0, 0 };
462 paramList = mesa_vp->Base.Parameters;
463 vp->fogpidx = _mesa_add_state_reference(paramList, tokens);
464 }
465
466 vp->pos_end = 0;
467 mesa_vp->Base.NumNativeInstructions = 0;
468 if (mesa_vp->Base.Parameters)
469 mesa_vp->Base.NumNativeParameters = mesa_vp->Base.Parameters->NumParameters;
470 else
471 mesa_vp->Base.NumNativeParameters = 0;
472
473 for(i = 0; i < VERT_ATTRIB_MAX; i++)
474 vp->inputs[i] = -1;
475 for(i = 0; i < 15; i++)
476 vp->inputmap_rev[i] = 255;
477 free_inputs = 0x2ffd;
478
479 /* fglrx uses fixed inputs as follows for conventional attribs.
480 generic attribs use non-fixed assignment, fglrx will always use the
481 lowest attrib values available. We'll just do the same.
482 There are 12 generic attribs possible, corresponding to attrib 0, 2-11
483 and 13 in a hw vertex prog.
484 attr 1 and 12 aren't used for generic attribs as those cannot be made vec4
485 (correspond to vertex normal/weight - maybe weight actually could be made vec4).
486 Additionally, not more than 12 arrays in total are possible I think.
487 attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
488 attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
489 attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
490 attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
491 */
492
493 /* attr 4,5 and 13 are only used with generic attribs.
494 Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
495 not possibe to use with vertex progs as it is lacking in vert prog specification) */
496 /* may look different when using idx buf / input_route instead of se_vtx_fmt? */
497 if (mesa_vp->Base.InputsRead & VERT_BIT_POS) {
498 vp->inputs[VERT_ATTRIB_POS] = 0;
499 vp->inputmap_rev[0] = VERT_ATTRIB_POS;
500 free_inputs &= ~(1 << 0);
501 array_count++;
502 }
503 if (mesa_vp->Base.InputsRead & VERT_BIT_WEIGHT) {
504 vp->inputs[VERT_ATTRIB_WEIGHT] = 12;
505 vp->inputmap_rev[1] = VERT_ATTRIB_WEIGHT;
506 array_count++;
507 }
508 if (mesa_vp->Base.InputsRead & VERT_BIT_NORMAL) {
509 vp->inputs[VERT_ATTRIB_NORMAL] = 1;
510 vp->inputmap_rev[2] = VERT_ATTRIB_NORMAL;
511 array_count++;
512 }
513 if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR0) {
514 vp->inputs[VERT_ATTRIB_COLOR0] = 2;
515 vp->inputmap_rev[4] = VERT_ATTRIB_COLOR0;
516 free_inputs &= ~(1 << 2);
517 array_count++;
518 }
519 if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR1) {
520 vp->inputs[VERT_ATTRIB_COLOR1] = 3;
521 vp->inputmap_rev[5] = VERT_ATTRIB_COLOR1;
522 free_inputs &= ~(1 << 3);
523 array_count++;
524 }
525 if (mesa_vp->Base.InputsRead & VERT_BIT_FOG) {
526 vp->inputs[VERT_ATTRIB_FOG] = 15; array_count++;
527 vp->inputmap_rev[3] = VERT_ATTRIB_FOG;
528 array_count++;
529 }
530 /* VERT_ATTRIB_TEX0-5 */
531 for (i = 0; i <= 5; i++) {
532 if (mesa_vp->Base.InputsRead & VERT_BIT_TEX(i)) {
533 vp->inputs[VERT_ATTRIB_TEX(i)] = i + 6;
534 vp->inputmap_rev[8 + i] = VERT_ATTRIB_TEX(i);
535 free_inputs &= ~(1 << (i + 6));
536 array_count++;
537 }
538 }
539 /* using VERT_ATTRIB_TEX6/7 would be illegal */
540 for (; i < VERT_ATTRIB_TEX_MAX; i++) {
541 if (mesa_vp->Base.InputsRead & VERT_BIT_TEX(i)) {
542 if (R200_DEBUG & RADEON_FALLBACKS) {
543 fprintf(stderr, "texture attribute %d in vert prog\n", i);
544 }
545 return GL_FALSE;
546 }
547 }
548 /* completely ignore aliasing? */
549 for (i = 0; i < VERT_ATTRIB_GENERIC_MAX; i++) {
550 int j;
551 /* completely ignore aliasing? */
552 if (mesa_vp->Base.InputsRead & VERT_BIT_GENERIC(i)) {
553 array_count++;
554 if (array_count > 12) {
555 if (R200_DEBUG & RADEON_FALLBACKS) {
556 fprintf(stderr, "more than 12 attribs used in vert prog\n");
557 }
558 return GL_FALSE;
559 }
560 for (j = 0; j < 14; j++) {
561 /* will always find one due to limited array_count */
562 if (free_inputs & (1 << j)) {
563 free_inputs &= ~(1 << j);
564 vp->inputs[VERT_ATTRIB_GENERIC(i)] = j;
565 if (j == 0) {
566 /* mapped to pos */
567 vp->inputmap_rev[j] = VERT_ATTRIB_GENERIC(i);
568 } else if (j < 12) {
569 /* mapped to col/tex */
570 vp->inputmap_rev[j + 2] = VERT_ATTRIB_GENERIC(i);
571 } else {
572 /* mapped to pos1 */
573 vp->inputmap_rev[j + 1] = VERT_ATTRIB_GENERIC(i);
574 }
575 break;
576 }
577 }
578 }
579 }
580
581 if (!(mesa_vp->Base.OutputsWritten & (1 << VARYING_SLOT_POS))) {
582 if (R200_DEBUG & RADEON_FALLBACKS) {
583 fprintf(stderr, "can't handle vert prog without position output\n");
584 }
585 return GL_FALSE;
586 }
587 if (free_inputs & 1) {
588 if (R200_DEBUG & RADEON_FALLBACKS) {
589 fprintf(stderr, "can't handle vert prog without position input\n");
590 }
591 return GL_FALSE;
592 }
593
594 o_inst = vp->instr;
595 for (vpi = mesa_vp->Base.Instructions; vpi->Opcode != OPCODE_END; vpi++, o_inst++){
596 operands = op_operands(vpi->Opcode);
597 are_srcs_scalar = operands & SCALAR_FLAG;
598 operands &= OP_MASK;
599
600 for(i = 0; i < operands; i++) {
601 src[i] = vpi->SrcReg[i];
602 /* hack up default attrib values as per spec as swizzling.
603 normal, fog, secondary color. Crazy?
604 May need more if we don't submit vec4 elements? */
605 if (src[i].File == PROGRAM_INPUT) {
606 if (src[i].Index == VERT_ATTRIB_NORMAL) {
607 int j;
608 for (j = 0; j < 4; j++) {
609 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
610 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
611 src[i].Swizzle |= SWIZZLE_ONE << (j*3);
612 }
613 }
614 }
615 else if (src[i].Index == VERT_ATTRIB_COLOR1) {
616 int j;
617 for (j = 0; j < 4; j++) {
618 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
619 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
620 src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
621 }
622 }
623 }
624 else if (src[i].Index == VERT_ATTRIB_FOG) {
625 int j;
626 for (j = 0; j < 4; j++) {
627 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
628 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
629 src[i].Swizzle |= SWIZZLE_ONE << (j*3);
630 }
631 else if ((GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Y) ||
632 GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Z) {
633 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
634 src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
635 }
636 }
637 }
638 }
639 }
640
641 if(operands == 3){
642 if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
643 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
644 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
645 VSF_FLAG_ALL);
646
647 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
648 SWIZZLE_X, SWIZZLE_Y,
649 SWIZZLE_Z, SWIZZLE_W,
650 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4);
651
652 o_inst->src1 = ZERO_SRC_0;
653 o_inst->src2 = UNUSED_SRC_1;
654 o_inst++;
655
656 src[2].File = PROGRAM_TEMPORARY;
657 src[2].Index = u_temp_i;
658 src[2].RelAddr = 0;
659 u_temp_i--;
660 }
661 }
662
663 if(operands >= 2){
664 if( CMP_SRCS(src[1], src[0]) ){
665 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
666 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
667 VSF_FLAG_ALL);
668
669 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
670 SWIZZLE_X, SWIZZLE_Y,
671 SWIZZLE_Z, SWIZZLE_W,
672 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4);
673
674 o_inst->src1 = ZERO_SRC_0;
675 o_inst->src2 = UNUSED_SRC_1;
676 o_inst++;
677
678 src[0].File = PROGRAM_TEMPORARY;
679 src[0].Index = u_temp_i;
680 src[0].RelAddr = 0;
681 u_temp_i--;
682 }
683 }
684
685 dst = vpi->DstReg;
686 if (dst.File == PROGRAM_OUTPUT &&
687 dst.Index == VARYING_SLOT_FOGC &&
688 dst.WriteMask & WRITEMASK_X) {
689 fog_temp_i = u_temp_i;
690 dst.File = PROGRAM_TEMPORARY;
691 dst.Index = fog_temp_i;
692 dofogfix = 1;
693 u_temp_i--;
694 }
695
696 /* These ops need special handling. */
697 switch(vpi->Opcode){
698 case OPCODE_POW:
699 /* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter).
700 So may need to insert additional instruction */
701 if ((src[0].File == src[1].File) &&
702 (src[0].Index == src[1].Index)) {
703 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
704 t_dst_mask(dst.WriteMask));
705 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
706 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
707 SWIZZLE_ZERO,
708 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
709 SWIZZLE_ZERO,
710 t_src_class(src[0].File),
711 src[0].Negate) | (src[0].RelAddr << 4);
712 o_inst->src1 = UNUSED_SRC_0;
713 o_inst->src2 = UNUSED_SRC_0;
714 }
715 else {
716 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
717 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
718 VSF_FLAG_ALL);
719 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
720 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
721 SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO,
722 t_src_class(src[0].File),
723 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
724 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
725 SWIZZLE_ZERO, SWIZZLE_ZERO,
726 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO,
727 t_src_class(src[1].File),
728 src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
729 o_inst->src2 = UNUSED_SRC_1;
730 o_inst++;
731
732 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
733 t_dst_mask(dst.WriteMask));
734 o_inst->src0 = MAKE_VSF_SOURCE(u_temp_i,
735 VSF_IN_COMPONENT_X,
736 VSF_IN_COMPONENT_Y,
737 VSF_IN_COMPONENT_Z,
738 VSF_IN_COMPONENT_W,
739 VSF_IN_CLASS_TMP,
740 VSF_FLAG_NONE);
741 o_inst->src1 = UNUSED_SRC_0;
742 o_inst->src2 = UNUSED_SRC_0;
743 u_temp_i--;
744 }
745 goto next;
746
747 case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
748 case OPCODE_SWZ:
749 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
750 t_dst_mask(dst.WriteMask));
751 o_inst->src0 = t_src(vp, &src[0]);
752 o_inst->src1 = ZERO_SRC_0;
753 o_inst->src2 = UNUSED_SRC_1;
754 goto next;
755
756 case OPCODE_MAD:
757 /* only 2 read ports into temp memory thus may need the macro op MAD_2
758 instead (requiring 2 clocks) if all inputs are in temp memory
759 (and, only if they actually reference 3 distinct temps) */
760 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
761 src[1].File == PROGRAM_TEMPORARY &&
762 src[2].File == PROGRAM_TEMPORARY &&
763 (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index)) &&
764 (((src[0].RelAddr << 8) | src[0].Index) != ((src[2].RelAddr << 8) | src[2].Index)) &&
765 (((src[1].RelAddr << 8) | src[1].Index) != ((src[2].RelAddr << 8) | src[2].Index))) ?
766 R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
767
768 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
769 t_dst_mask(dst.WriteMask));
770 o_inst->src0 = t_src(vp, &src[0]);
771 #if 0
772 if ((o_inst - vp->instr) == 31) {
773 /* fix up the broken vertex program of quake4 demo... */
774 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
775 SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X,
776 t_src_class(src[1].File),
777 src[1].Negate) | (src[1].RelAddr << 4);
778 o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
779 SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y,
780 t_src_class(src[1].File),
781 src[1].Negate) | (src[1].RelAddr << 4);
782 }
783 else {
784 o_inst->src1 = t_src(vp, &src[1]);
785 o_inst->src2 = t_src(vp, &src[2]);
786 }
787 #else
788 o_inst->src1 = t_src(vp, &src[1]);
789 o_inst->src2 = t_src(vp, &src[2]);
790 #endif
791 goto next;
792
793 case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
794 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
795 t_dst_mask(dst.WriteMask));
796
797 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
798 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
799 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
800 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
801 SWIZZLE_ZERO,
802 t_src_class(src[0].File),
803 src[0].Negate) | (src[0].RelAddr << 4);
804
805 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
806 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
807 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
808 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
809 SWIZZLE_ZERO,
810 t_src_class(src[1].File),
811 src[1].Negate) | (src[1].RelAddr << 4);
812
813 o_inst->src2 = UNUSED_SRC_1;
814 goto next;
815
816 case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
817 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
818 t_dst_mask(dst.WriteMask));
819
820 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
821 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
822 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
823 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
824 VSF_IN_COMPONENT_ONE,
825 t_src_class(src[0].File),
826 src[0].Negate) | (src[0].RelAddr << 4);
827 o_inst->src1 = t_src(vp, &src[1]);
828 o_inst->src2 = UNUSED_SRC_1;
829 goto next;
830
831 case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
832 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
833 t_dst_mask(dst.WriteMask));
834
835 o_inst->src0 = t_src(vp, &src[0]);
836 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
837 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
838 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
839 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
840 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
841 t_src_class(src[1].File),
842 (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
843 o_inst->src2 = UNUSED_SRC_1;
844 goto next;
845
846 case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
847 o_inst->op=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX, t_dst(&dst),
848 t_dst_mask(dst.WriteMask));
849
850 o_inst->src0=t_src(vp, &src[0]);
851 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
852 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
853 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
854 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
855 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
856 t_src_class(src[0].File),
857 (!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
858 o_inst->src2 = UNUSED_SRC_1;
859 goto next;
860
861 case OPCODE_FLR:
862 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
863 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
864
865 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_FRC,
866 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
867 t_dst_mask(dst.WriteMask));
868
869 o_inst->src0 = t_src(vp, &src[0]);
870 o_inst->src1 = UNUSED_SRC_0;
871 o_inst->src2 = UNUSED_SRC_1;
872 o_inst++;
873
874 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
875 t_dst_mask(dst.WriteMask));
876
877 o_inst->src0 = t_src(vp, &src[0]);
878 o_inst->src1 = MAKE_VSF_SOURCE(u_temp_i,
879 VSF_IN_COMPONENT_X,
880 VSF_IN_COMPONENT_Y,
881 VSF_IN_COMPONENT_Z,
882 VSF_IN_COMPONENT_W,
883 VSF_IN_CLASS_TMP,
884 /* Not 100% sure about this */
885 (!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
886
887 o_inst->src2 = UNUSED_SRC_0;
888 u_temp_i--;
889 goto next;
890
891 case OPCODE_XPD:
892 /* mul r0, r1.yzxw, r2.zxyw
893 mad r0, -r2.yzxw, r1.zxyw, r0
894 */
895 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
896 src[1].File == PROGRAM_TEMPORARY &&
897 (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index))) ?
898 R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
899
900 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
901 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
902 t_dst_mask(dst.WriteMask));
903
904 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
905 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
906 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
907 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
908 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
909 t_src_class(src[0].File),
910 src[0].Negate) | (src[0].RelAddr << 4);
911
912 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
913 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
914 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
915 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
916 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
917 t_src_class(src[1].File),
918 src[1].Negate) | (src[1].RelAddr << 4);
919
920 o_inst->src2 = UNUSED_SRC_1;
921 o_inst++;
922 u_temp_i--;
923
924 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
925 t_dst_mask(dst.WriteMask));
926
927 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
928 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
929 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
930 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
931 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
932 t_src_class(src[1].File),
933 (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
934
935 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
936 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
937 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
938 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
939 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
940 t_src_class(src[0].File),
941 src[0].Negate) | (src[0].RelAddr << 4);
942
943 o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1,
944 VSF_IN_COMPONENT_X,
945 VSF_IN_COMPONENT_Y,
946 VSF_IN_COMPONENT_Z,
947 VSF_IN_COMPONENT_W,
948 VSF_IN_CLASS_TMP,
949 VSF_FLAG_NONE);
950 goto next;
951
952 case OPCODE_END:
953 assert(0);
954 default:
955 break;
956 }
957
958 o_inst->op = MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst(&dst),
959 t_dst_mask(dst.WriteMask));
960
961 if(are_srcs_scalar){
962 switch(operands){
963 case 1:
964 o_inst->src0 = t_src_scalar(vp, &src[0]);
965 o_inst->src1 = UNUSED_SRC_0;
966 o_inst->src2 = UNUSED_SRC_1;
967 break;
968
969 case 2:
970 o_inst->src0 = t_src_scalar(vp, &src[0]);
971 o_inst->src1 = t_src_scalar(vp, &src[1]);
972 o_inst->src2 = UNUSED_SRC_1;
973 break;
974
975 case 3:
976 o_inst->src0 = t_src_scalar(vp, &src[0]);
977 o_inst->src1 = t_src_scalar(vp, &src[1]);
978 o_inst->src2 = t_src_scalar(vp, &src[2]);
979 break;
980
981 default:
982 fprintf(stderr, "illegal number of operands %lu\n", operands);
983 exit(-1);
984 break;
985 }
986 } else {
987 switch(operands){
988 case 1:
989 o_inst->src0 = t_src(vp, &src[0]);
990 o_inst->src1 = UNUSED_SRC_0;
991 o_inst->src2 = UNUSED_SRC_1;
992 break;
993
994 case 2:
995 o_inst->src0 = t_src(vp, &src[0]);
996 o_inst->src1 = t_src(vp, &src[1]);
997 o_inst->src2 = UNUSED_SRC_1;
998 break;
999
1000 case 3:
1001 o_inst->src0 = t_src(vp, &src[0]);
1002 o_inst->src1 = t_src(vp, &src[1]);
1003 o_inst->src2 = t_src(vp, &src[2]);
1004 break;
1005
1006 default:
1007 fprintf(stderr, "illegal number of operands %lu\n", operands);
1008 exit(-1);
1009 break;
1010 }
1011 }
1012 next:
1013
1014 if (dofogfix) {
1015 o_inst++;
1016 if (vp->fogmode == GL_EXP) {
1017 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1018 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1019 VSF_FLAG_X);
1020 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1021 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
1022 o_inst->src2 = UNUSED_SRC_1;
1023 o_inst++;
1024 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
1025 R200_VSF_OUT_CLASS_RESULT_FOGC,
1026 VSF_FLAG_X);
1027 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1028 o_inst->src1 = UNUSED_SRC_0;
1029 o_inst->src2 = UNUSED_SRC_1;
1030 }
1031 else if (vp->fogmode == GL_EXP2) {
1032 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1033 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1034 VSF_FLAG_X);
1035 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1036 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
1037 o_inst->src2 = UNUSED_SRC_1;
1038 o_inst++;
1039 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1040 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1041 VSF_FLAG_X);
1042 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1043 o_inst->src1 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1044 o_inst->src2 = UNUSED_SRC_1;
1045 o_inst++;
1046 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
1047 R200_VSF_OUT_CLASS_RESULT_FOGC,
1048 VSF_FLAG_X);
1049 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1050 o_inst->src1 = UNUSED_SRC_0;
1051 o_inst->src2 = UNUSED_SRC_1;
1052 }
1053 else { /* fogmode == GL_LINEAR */
1054 /* could do that with single op (dot) if using params like
1055 with fixed function pipeline fog */
1056 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
1057 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1058 VSF_FLAG_X);
1059 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1060 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, Z, Z, Z, Z, PARAM, NONE);
1061 o_inst->src2 = UNUSED_SRC_1;
1062 o_inst++;
1063 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1064 R200_VSF_OUT_CLASS_RESULT_FOGC,
1065 VSF_FLAG_X);
1066 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1067 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, W, W, W, W, PARAM, NONE);
1068 o_inst->src2 = UNUSED_SRC_1;
1069
1070 }
1071 dofogfix = 0;
1072 }
1073
1074 u_temp_used = (R200_VSF_MAX_TEMPS - 1) - u_temp_i;
1075 if (mesa_vp->Base.NumNativeTemporaries <
1076 (mesa_vp->Base.NumTemporaries + u_temp_used)) {
1077 mesa_vp->Base.NumNativeTemporaries =
1078 mesa_vp->Base.NumTemporaries + u_temp_used;
1079 }
1080 if ((mesa_vp->Base.NumTemporaries + u_temp_used) > R200_VSF_MAX_TEMPS) {
1081 if (R200_DEBUG & RADEON_FALLBACKS) {
1082 fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_used);
1083 }
1084 return GL_FALSE;
1085 }
1086 u_temp_i = R200_VSF_MAX_TEMPS - 1;
1087 if(o_inst - vp->instr >= R200_VSF_MAX_INST) {
1088 mesa_vp->Base.NumNativeInstructions = 129;
1089 if (R200_DEBUG & RADEON_FALLBACKS) {
1090 fprintf(stderr, "more than 128 native instructions\n");
1091 }
1092 return GL_FALSE;
1093 }
1094 if ((o_inst->op & R200_VSF_OUT_CLASS_MASK) == R200_VSF_OUT_CLASS_RESULT_POS) {
1095 vp->pos_end = (o_inst - vp->instr);
1096 }
1097 }
1098
1099 vp->native = GL_TRUE;
1100 mesa_vp->Base.NumNativeInstructions = (o_inst - vp->instr);
1101 #if 0
1102 fprintf(stderr, "hw program:\n");
1103 for(i=0; i < vp->program.length; i++)
1104 fprintf(stderr, "%08x\n", vp->instr[i]);
1105 #endif
1106 return GL_TRUE;
1107 }
1108
1109 void r200SetupVertexProg( struct gl_context *ctx ) {
1110 r200ContextPtr rmesa = R200_CONTEXT(ctx);
1111 struct r200_vertex_program *vp = (struct r200_vertex_program *)ctx->VertexProgram.Current;
1112 GLboolean fallback;
1113 GLint i;
1114
1115 if (!vp->translated || (ctx->Fog.Enabled && ctx->Fog.Mode != vp->fogmode)) {
1116 rmesa->curr_vp_hw = NULL;
1117 r200_translate_vertex_program(ctx, vp);
1118 }
1119 /* could optimize setting up vertex progs away for non-tcl hw */
1120 fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp));
1121 TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback);
1122 if (rmesa->radeon.TclFallback) return;
1123
1124 R200_STATECHANGE( rmesa, vap );
1125 /* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it?
1126 maybe only when using more than 64 inst / 96 param? */
1127 rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_PROG_VTX_SHADER_ENABLE /*| R200_VAP_SINGLE_BUF_STATE_ENABLE*/;
1128
1129 R200_STATECHANGE( rmesa, pvs );
1130
1131 rmesa->hw.pvs.cmd[PVS_CNTL_1] = (0 << R200_PVS_CNTL_1_PROGRAM_START_SHIFT) |
1132 ((vp->mesa_program.Base.NumNativeInstructions - 1) << R200_PVS_CNTL_1_PROGRAM_END_SHIFT) |
1133 (vp->pos_end << R200_PVS_CNTL_1_POS_END_SHIFT);
1134 rmesa->hw.pvs.cmd[PVS_CNTL_2] = (0 << R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT) |
1135 (vp->mesa_program.Base.NumNativeParameters << R200_PVS_CNTL_2_PARAM_COUNT_SHIFT);
1136
1137 /* maybe user clip planes just work with vertex progs... untested */
1138 if (ctx->Transform.ClipPlanesEnabled) {
1139 R200_STATECHANGE( rmesa, tcl );
1140 if (vp->mesa_program.IsPositionInvariant) {
1141 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (ctx->Transform.ClipPlanesEnabled << 2);
1142 }
1143 else {
1144 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(0xfc);
1145 }
1146 }
1147
1148 if (vp != rmesa->curr_vp_hw) {
1149 GLuint count = vp->mesa_program.Base.NumNativeInstructions;
1150 drm_radeon_cmd_header_t tmp;
1151
1152 R200_STATECHANGE( rmesa, vpi[0] );
1153 R200_STATECHANGE( rmesa, vpi[1] );
1154
1155 /* FIXME: what about using a memcopy... */
1156 for (i = 0; (i < 64) && i < count; i++) {
1157 rmesa->hw.vpi[0].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i].op;
1158 rmesa->hw.vpi[0].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i].src0;
1159 rmesa->hw.vpi[0].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i].src1;
1160 rmesa->hw.vpi[0].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i].src2;
1161 }
1162 /* hack up the cmd_size so not the whole state atom is emitted always.
1163 This may require some more thought, we may emit half progs on lost state, but
1164 hopefully it won't matter?
1165 WARNING: must not use R200_DB_STATECHANGE, this will produce bogus (and rejected)
1166 packet emits (due to the mismatched cmd_size and count in cmd/last_cmd) */
1167 rmesa->hw.vpi[0].cmd_size = 1 + 4 * ((count > 64) ? 64 : count);
1168 tmp.i = rmesa->hw.vpi[0].cmd[VPI_CMD_0];
1169 tmp.veclinear.count = (count > 64) ? 64 : count;
1170 rmesa->hw.vpi[0].cmd[VPI_CMD_0] = tmp.i;
1171 if (count > 64) {
1172 for (i = 0; i < (count - 64); i++) {
1173 rmesa->hw.vpi[1].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i + 64].op;
1174 rmesa->hw.vpi[1].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i + 64].src0;
1175 rmesa->hw.vpi[1].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i + 64].src1;
1176 rmesa->hw.vpi[1].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i + 64].src2;
1177 }
1178 rmesa->hw.vpi[1].cmd_size = 1 + 4 * (count - 64);
1179 tmp.i = rmesa->hw.vpi[1].cmd[VPI_CMD_0];
1180 tmp.veclinear.count = count - 64;
1181 rmesa->hw.vpi[1].cmd[VPI_CMD_0] = tmp.i;
1182 }
1183 rmesa->curr_vp_hw = vp;
1184 }
1185 }
1186
1187
1188 static void
1189 r200BindProgram(struct gl_context *ctx, GLenum target, struct gl_program *prog)
1190 {
1191 r200ContextPtr rmesa = R200_CONTEXT(ctx);
1192
1193 switch(target){
1194 case GL_VERTEX_PROGRAM_ARB:
1195 rmesa->curr_vp_hw = NULL;
1196 break;
1197 default:
1198 _mesa_problem(ctx, "Target not supported yet!");
1199 break;
1200 }
1201 }
1202
1203 static struct gl_program *
1204 r200NewProgram(struct gl_context *ctx, GLenum target, GLuint id)
1205 {
1206 struct r200_vertex_program *vp;
1207
1208 switch(target){
1209 case GL_VERTEX_PROGRAM_ARB:
1210 vp = CALLOC_STRUCT(r200_vertex_program);
1211 return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id);
1212 case GL_FRAGMENT_PROGRAM_ARB:
1213 return _mesa_init_fragment_program( ctx, CALLOC_STRUCT(gl_fragment_program), target, id );
1214 default:
1215 _mesa_problem(ctx, "Bad target in r200NewProgram");
1216 }
1217 return NULL;
1218 }
1219
1220
1221 static void
1222 r200DeleteProgram(struct gl_context *ctx, struct gl_program *prog)
1223 {
1224 _mesa_delete_program(ctx, prog);
1225 }
1226
1227 static GLboolean
1228 r200ProgramStringNotify(struct gl_context *ctx, GLenum target, struct gl_program *prog)
1229 {
1230 struct r200_vertex_program *vp = (void *)prog;
1231 r200ContextPtr rmesa = R200_CONTEXT(ctx);
1232
1233 switch(target) {
1234 case GL_VERTEX_PROGRAM_ARB:
1235 vp->translated = GL_FALSE;
1236 vp->fogpidx = 0;
1237 /* memset(&vp->translated, 0, sizeof(struct r200_vertex_program) - sizeof(struct gl_vertex_program));*/
1238 r200_translate_vertex_program(ctx, vp);
1239 rmesa->curr_vp_hw = NULL;
1240 break;
1241 case GL_FRAGMENT_SHADER_ATI:
1242 rmesa->afs_loaded = NULL;
1243 break;
1244 }
1245 /* need this for tcl fallbacks */
1246 (void) _tnl_program_string(ctx, target, prog);
1247
1248 /* XXX check if program is legal, within limits */
1249 return GL_TRUE;
1250 }
1251
1252 static GLboolean
1253 r200IsProgramNative(struct gl_context *ctx, GLenum target, struct gl_program *prog)
1254 {
1255 struct r200_vertex_program *vp = (void *)prog;
1256
1257 switch(target){
1258 case GL_VERTEX_PROGRAM_ARB:
1259 if (!vp->translated) {
1260 r200_translate_vertex_program(ctx, vp);
1261 }
1262 /* does not take parameters etc. into account */
1263 return vp->native;
1264 default:
1265 _mesa_problem(ctx, "Bad target in r200NewProgram");
1266 }
1267 return 0;
1268 }
1269
1270 void r200InitShaderFuncs(struct dd_function_table *functions)
1271 {
1272 functions->NewProgram = r200NewProgram;
1273 functions->BindProgram = r200BindProgram;
1274 functions->DeleteProgram = r200DeleteProgram;
1275 functions->ProgramStringNotify = r200ProgramStringNotify;
1276 functions->IsProgramNative = r200IsProgramNative;
1277 }