draw: corrections to allow for different cliptest cases
[mesa.git] / src / mesa / drivers / dri / r200 / r200_vertprog.c
1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Aapo Tahkola <aet@rasterburn.org>
31 * Roland Scheidegger <rscheidegger_lists@hispeed.ch>
32 */
33 #include "main/glheader.h"
34 #include "main/macros.h"
35 #include "main/enums.h"
36 #include "program/program.h"
37 #include "program/prog_instruction.h"
38 #include "program/prog_parameter.h"
39 #include "program/prog_statevars.h"
40 #include "program/programopt.h"
41 #include "tnl/tnl.h"
42
43 #include "r200_context.h"
44 #include "r200_vertprog.h"
45 #include "r200_ioctl.h"
46 #include "r200_tcl.h"
47
48 #if SWIZZLE_X != VSF_IN_COMPONENT_X || \
49 SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
50 SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
51 SWIZZLE_W != VSF_IN_COMPONENT_W || \
52 SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
53 SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
54 WRITEMASK_X != VSF_FLAG_X || \
55 WRITEMASK_Y != VSF_FLAG_Y || \
56 WRITEMASK_Z != VSF_FLAG_Z || \
57 WRITEMASK_W != VSF_FLAG_W
58 #error Cannot change these!
59 #endif
60
61 #define SCALAR_FLAG (1<<31)
62 #define FLAG_MASK (1<<31)
63 #define OP_MASK (0xf) /* we are unlikely to have more than 15 */
64 #define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
65
66 static struct{
67 char *name;
68 int opcode;
69 unsigned long ip; /* number of input operands and flags */
70 }op_names[]={
71 OPN(ABS, 1),
72 OPN(ADD, 2),
73 OPN(ARL, 1|SCALAR_FLAG),
74 OPN(DP3, 2),
75 OPN(DP4, 2),
76 OPN(DPH, 2),
77 OPN(DST, 2),
78 OPN(EX2, 1|SCALAR_FLAG),
79 OPN(EXP, 1|SCALAR_FLAG),
80 OPN(FLR, 1),
81 OPN(FRC, 1),
82 OPN(LG2, 1|SCALAR_FLAG),
83 OPN(LIT, 1),
84 OPN(LOG, 1|SCALAR_FLAG),
85 OPN(MAD, 3),
86 OPN(MAX, 2),
87 OPN(MIN, 2),
88 OPN(MOV, 1),
89 OPN(MUL, 2),
90 OPN(POW, 2|SCALAR_FLAG),
91 OPN(RCP, 1|SCALAR_FLAG),
92 OPN(RSQ, 1|SCALAR_FLAG),
93 OPN(SGE, 2),
94 OPN(SLT, 2),
95 OPN(SUB, 2),
96 OPN(SWZ, 1),
97 OPN(XPD, 2),
98 OPN(PRINT, 0),
99 OPN(END, 0),
100 };
101 #undef OPN
102
103 static GLboolean r200VertexProgUpdateParams(GLcontext *ctx, struct r200_vertex_program *vp)
104 {
105 r200ContextPtr rmesa = R200_CONTEXT( ctx );
106 GLfloat *fcmd = (GLfloat *)&rmesa->hw.vpp[0].cmd[VPP_CMD_0 + 1];
107 int pi;
108 struct gl_vertex_program *mesa_vp = &vp->mesa_program;
109 struct gl_program_parameter_list *paramList;
110 drm_radeon_cmd_header_t tmp;
111
112 R200_STATECHANGE( rmesa, vpp[0] );
113 R200_STATECHANGE( rmesa, vpp[1] );
114 assert(mesa_vp->Base.Parameters);
115 _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
116 paramList = mesa_vp->Base.Parameters;
117
118 if(paramList->NumParameters > R200_VSF_MAX_PARAM){
119 fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
120 return GL_FALSE;
121 }
122
123 for(pi = 0; pi < paramList->NumParameters; pi++) {
124 switch(paramList->Parameters[pi].Type) {
125 case PROGRAM_STATE_VAR:
126 case PROGRAM_NAMED_PARAM:
127 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
128 case PROGRAM_CONSTANT:
129 *fcmd++ = paramList->ParameterValues[pi][0];
130 *fcmd++ = paramList->ParameterValues[pi][1];
131 *fcmd++ = paramList->ParameterValues[pi][2];
132 *fcmd++ = paramList->ParameterValues[pi][3];
133 break;
134 default:
135 _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__);
136 break;
137 }
138 if (pi == 95) {
139 fcmd = (GLfloat *)&rmesa->hw.vpp[1].cmd[VPP_CMD_0 + 1];
140 }
141 }
142 /* hack up the cmd_size so not the whole state atom is emitted always. */
143 rmesa->hw.vpp[0].cmd_size =
144 1 + 4 * ((paramList->NumParameters > 96) ? 96 : paramList->NumParameters);
145 tmp.i = rmesa->hw.vpp[0].cmd[VPP_CMD_0];
146 tmp.veclinear.count = (paramList->NumParameters > 96) ? 96 : paramList->NumParameters;
147 rmesa->hw.vpp[0].cmd[VPP_CMD_0] = tmp.i;
148 if (paramList->NumParameters > 96) {
149 rmesa->hw.vpp[1].cmd_size = 1 + 4 * (paramList->NumParameters - 96);
150 tmp.i = rmesa->hw.vpp[1].cmd[VPP_CMD_0];
151 tmp.veclinear.count = paramList->NumParameters - 96;
152 rmesa->hw.vpp[1].cmd[VPP_CMD_0] = tmp.i;
153 }
154 return GL_TRUE;
155 }
156
157 static INLINE unsigned long t_dst_mask(GLuint mask)
158 {
159 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
160 return mask & VSF_FLAG_ALL;
161 }
162
163 static unsigned long t_dst(struct prog_dst_register *dst)
164 {
165 switch(dst->File) {
166 case PROGRAM_TEMPORARY:
167 return ((dst->Index << R200_VPI_OUT_REG_INDEX_SHIFT)
168 | R200_VSF_OUT_CLASS_TMP);
169 case PROGRAM_OUTPUT:
170 switch (dst->Index) {
171 case VERT_RESULT_HPOS:
172 return R200_VSF_OUT_CLASS_RESULT_POS;
173 case VERT_RESULT_COL0:
174 return R200_VSF_OUT_CLASS_RESULT_COLOR;
175 case VERT_RESULT_COL1:
176 return ((1 << R200_VPI_OUT_REG_INDEX_SHIFT)
177 | R200_VSF_OUT_CLASS_RESULT_COLOR);
178 case VERT_RESULT_FOGC:
179 return R200_VSF_OUT_CLASS_RESULT_FOGC;
180 case VERT_RESULT_TEX0:
181 case VERT_RESULT_TEX1:
182 case VERT_RESULT_TEX2:
183 case VERT_RESULT_TEX3:
184 case VERT_RESULT_TEX4:
185 case VERT_RESULT_TEX5:
186 return (((dst->Index - VERT_RESULT_TEX0) << R200_VPI_OUT_REG_INDEX_SHIFT)
187 | R200_VSF_OUT_CLASS_RESULT_TEXC);
188 case VERT_RESULT_PSIZ:
189 return R200_VSF_OUT_CLASS_RESULT_POINTSIZE;
190 default:
191 fprintf(stderr, "problem in %s, unknown dst output reg %d\n", __FUNCTION__, dst->Index);
192 exit(0);
193 return 0;
194 }
195 case PROGRAM_ADDRESS:
196 assert (dst->Index == 0);
197 return R200_VSF_OUT_CLASS_ADDR;
198 default:
199 fprintf(stderr, "problem in %s, unknown register type %d\n", __FUNCTION__, dst->File);
200 exit(0);
201 return 0;
202 }
203 }
204
205 static unsigned long t_src_class(gl_register_file file)
206 {
207
208 switch(file){
209 case PROGRAM_TEMPORARY:
210 return VSF_IN_CLASS_TMP;
211
212 case PROGRAM_INPUT:
213 return VSF_IN_CLASS_ATTR;
214
215 case PROGRAM_LOCAL_PARAM:
216 case PROGRAM_ENV_PARAM:
217 case PROGRAM_NAMED_PARAM:
218 case PROGRAM_CONSTANT:
219 case PROGRAM_STATE_VAR:
220 return VSF_IN_CLASS_PARAM;
221 /*
222 case PROGRAM_OUTPUT:
223 case PROGRAM_WRITE_ONLY:
224 case PROGRAM_ADDRESS:
225 */
226 default:
227 fprintf(stderr, "problem in %s", __FUNCTION__);
228 exit(0);
229 }
230 }
231
232 static INLINE unsigned long t_swizzle(GLubyte swizzle)
233 {
234 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
235 return swizzle;
236 }
237
238 #if 0
239 static void vp_dump_inputs(struct r200_vertex_program *vp, char *caller)
240 {
241 int i;
242
243 if(vp == NULL){
244 fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, caller);
245 return ;
246 }
247
248 fprintf(stderr, "%s:<", caller);
249 for(i=0; i < VERT_ATTRIB_MAX; i++)
250 fprintf(stderr, "%d ", vp->inputs[i]);
251 fprintf(stderr, ">\n");
252
253 }
254 #endif
255
256 static unsigned long t_src_index(struct r200_vertex_program *vp, struct prog_src_register *src)
257 {
258 /*
259 int i;
260 int max_reg = -1;
261 */
262 if(src->File == PROGRAM_INPUT){
263 /* if(vp->inputs[src->Index] != -1)
264 return vp->inputs[src->Index];
265
266 for(i=0; i < VERT_ATTRIB_MAX; i++)
267 if(vp->inputs[i] > max_reg)
268 max_reg = vp->inputs[i];
269
270 vp->inputs[src->Index] = max_reg+1;*/
271
272 //vp_dump_inputs(vp, __FUNCTION__);
273 assert(vp->inputs[src->Index] != -1);
274 return vp->inputs[src->Index];
275 } else {
276 if (src->Index < 0) {
277 fprintf(stderr, "WARNING negative offsets for indirect addressing do not work\n");
278 return 0;
279 }
280 return src->Index;
281 }
282 }
283
284 static unsigned long t_src(struct r200_vertex_program *vp, struct prog_src_register *src)
285 {
286
287 return MAKE_VSF_SOURCE(t_src_index(vp, src),
288 t_swizzle(GET_SWZ(src->Swizzle, 0)),
289 t_swizzle(GET_SWZ(src->Swizzle, 1)),
290 t_swizzle(GET_SWZ(src->Swizzle, 2)),
291 t_swizzle(GET_SWZ(src->Swizzle, 3)),
292 t_src_class(src->File),
293 src->Negate) | (src->RelAddr << 4);
294 }
295
296 static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_src_register *src)
297 {
298
299 return MAKE_VSF_SOURCE(t_src_index(vp, src),
300 t_swizzle(GET_SWZ(src->Swizzle, 0)),
301 t_swizzle(GET_SWZ(src->Swizzle, 0)),
302 t_swizzle(GET_SWZ(src->Swizzle, 0)),
303 t_swizzle(GET_SWZ(src->Swizzle, 0)),
304 t_src_class(src->File),
305 src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4);
306 }
307
308 static unsigned long t_opcode(enum prog_opcode opcode)
309 {
310
311 switch(opcode){
312 case OPCODE_ADD: return R200_VPI_OUT_OP_ADD;
313 /* FIXME: ARL works fine, but negative offsets won't work - fglrx just
314 * seems to ignore neg offsets which isn't quite correct...
315 */
316 case OPCODE_ARL: return R200_VPI_OUT_OP_ARL;
317 case OPCODE_DP4: return R200_VPI_OUT_OP_DOT;
318 case OPCODE_DST: return R200_VPI_OUT_OP_DST;
319 case OPCODE_EX2: return R200_VPI_OUT_OP_EX2;
320 case OPCODE_EXP: return R200_VPI_OUT_OP_EXP;
321 case OPCODE_FRC: return R200_VPI_OUT_OP_FRC;
322 case OPCODE_LG2: return R200_VPI_OUT_OP_LG2;
323 case OPCODE_LIT: return R200_VPI_OUT_OP_LIT;
324 case OPCODE_LOG: return R200_VPI_OUT_OP_LOG;
325 case OPCODE_MAX: return R200_VPI_OUT_OP_MAX;
326 case OPCODE_MIN: return R200_VPI_OUT_OP_MIN;
327 case OPCODE_MUL: return R200_VPI_OUT_OP_MUL;
328 case OPCODE_RCP: return R200_VPI_OUT_OP_RCP;
329 case OPCODE_RSQ: return R200_VPI_OUT_OP_RSQ;
330 case OPCODE_SGE: return R200_VPI_OUT_OP_SGE;
331 case OPCODE_SLT: return R200_VPI_OUT_OP_SLT;
332
333 default:
334 fprintf(stderr, "%s: Should not be called with opcode %d!", __FUNCTION__, opcode);
335 }
336 exit(-1);
337 return 0;
338 }
339
340 static unsigned long op_operands(enum prog_opcode opcode)
341 {
342 int i;
343
344 /* Can we trust mesas opcodes to be in order ? */
345 for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++)
346 if(op_names[i].opcode == opcode)
347 return op_names[i].ip;
348
349 fprintf(stderr, "op %d not found in op_names\n", opcode);
350 exit(-1);
351 return 0;
352 }
353
354 /* TODO: Get rid of t_src_class call */
355 #define CMP_SRCS(a, b) (((a.RelAddr != b.RelAddr) || (a.Index != b.Index)) && \
356 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
357 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
358 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
359 t_src_class(b.File) == VSF_IN_CLASS_ATTR))) \
360
361 /* fglrx on rv250 codes up unused sources as follows:
362 unused but necessary sources are same as previous source, zero-ed out.
363 unnecessary sources are same as previous source but with VSF_IN_CLASS_NONE set.
364 i.e. an add (2 args) has its 2nd arg (if you use it as mov) zero-ed out, and 3rd arg
365 set to VSF_IN_CLASS_NONE. Not sure if strictly necessary. */
366
367 /* use these simpler definitions. Must obviously not be used with not yet set up regs.
368 Those are NOT semantically equivalent to the r300 ones, requires code changes */
369 #define ZERO_SRC_0 (((o_inst->src0 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
370 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
371 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
372 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
373 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
374
375 #define ZERO_SRC_1 (((o_inst->src1 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
376 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
377 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
378 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
379 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
380
381 #define ZERO_SRC_2 (((o_inst->src2 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
382 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
383 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
384 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
385 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
386
387 #define UNUSED_SRC_0 ((o_inst->src0 & ~15) | 9)
388
389 #define UNUSED_SRC_1 ((o_inst->src1 & ~15) | 9)
390
391 #define UNUSED_SRC_2 ((o_inst->src2 & ~15) | 9)
392
393
394 /**
395 * Generate an R200 vertex program from Mesa's internal representation.
396 *
397 * \return GL_TRUE for success, GL_FALSE for failure.
398 */
399 static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_vertex_program *vp)
400 {
401 struct gl_vertex_program *mesa_vp = &vp->mesa_program;
402 struct prog_instruction *vpi;
403 int i;
404 VERTEX_SHADER_INSTRUCTION *o_inst;
405 unsigned long operands;
406 int are_srcs_scalar;
407 unsigned long hw_op;
408 int dofogfix = 0;
409 int fog_temp_i = 0;
410 int free_inputs;
411 int array_count = 0;
412 int u_temp_used;
413
414 vp->native = GL_FALSE;
415 vp->translated = GL_TRUE;
416 vp->fogmode = ctx->Fog.Mode;
417
418 if (mesa_vp->Base.NumInstructions == 0)
419 return GL_FALSE;
420
421 #if 0
422 if ((mesa_vp->Base.InputsRead &
423 ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 |
424 VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 |
425 VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) != 0) {
426 if (R200_DEBUG & RADEON_FALLBACKS) {
427 fprintf(stderr, "can't handle vert prog inputs 0x%x\n",
428 mesa_vp->Base.InputsRead);
429 }
430 return GL_FALSE;
431 }
432 #endif
433
434 if ((mesa_vp->Base.OutputsWritten &
435 ~((1 << VERT_RESULT_HPOS) | (1 << VERT_RESULT_COL0) | (1 << VERT_RESULT_COL1) |
436 (1 << VERT_RESULT_FOGC) | (1 << VERT_RESULT_TEX0) | (1 << VERT_RESULT_TEX1) |
437 (1 << VERT_RESULT_TEX2) | (1 << VERT_RESULT_TEX3) | (1 << VERT_RESULT_TEX4) |
438 (1 << VERT_RESULT_TEX5) | (1 << VERT_RESULT_PSIZ))) != 0) {
439 if (R200_DEBUG & RADEON_FALLBACKS) {
440 fprintf(stderr, "can't handle vert prog outputs 0x%llx\n",
441 mesa_vp->Base.OutputsWritten);
442 }
443 return GL_FALSE;
444 }
445
446 if (mesa_vp->IsNVProgram) {
447 /* subtle differences in spec like guaranteed initialized regs could cause
448 headaches. Might want to remove the driconf option to enable it completely */
449 return GL_FALSE;
450 }
451 /* Initial value should be last tmp reg that hw supports.
452 Strangely enough r300 doesnt mind even though these would be out of range.
453 Smart enough to realize that it doesnt need it? */
454 int u_temp_i = R200_VSF_MAX_TEMPS - 1;
455 struct prog_src_register src[3];
456 struct prog_dst_register dst;
457
458 /* FIXME: is changing the prog safe to do here? */
459 if (mesa_vp->IsPositionInvariant &&
460 /* make sure we only do this once */
461 !(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS))) {
462 _mesa_insert_mvp_code(ctx, mesa_vp);
463 }
464
465 /* for fogc, can't change mesa_vp, as it would hose swtnl, and exp with
466 base e isn't directly available neither. */
467 if ((mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_FOGC)) && !vp->fogpidx) {
468 struct gl_program_parameter_list *paramList;
469 gl_state_index tokens[STATE_LENGTH] = { STATE_FOG_PARAMS, 0, 0, 0, 0 };
470 paramList = mesa_vp->Base.Parameters;
471 vp->fogpidx = _mesa_add_state_reference(paramList, tokens);
472 }
473
474 vp->pos_end = 0;
475 mesa_vp->Base.NumNativeInstructions = 0;
476 if (mesa_vp->Base.Parameters)
477 mesa_vp->Base.NumNativeParameters = mesa_vp->Base.Parameters->NumParameters;
478 else
479 mesa_vp->Base.NumNativeParameters = 0;
480
481 for(i = 0; i < VERT_ATTRIB_MAX; i++)
482 vp->inputs[i] = -1;
483 for(i = 0; i < 15; i++)
484 vp->inputmap_rev[i] = 255;
485 free_inputs = 0x2ffd;
486
487 /* fglrx uses fixed inputs as follows for conventional attribs.
488 generic attribs use non-fixed assignment, fglrx will always use the
489 lowest attrib values available. We'll just do the same.
490 There are 12 generic attribs possible, corresponding to attrib 0, 2-11
491 and 13 in a hw vertex prog.
492 attr 1 and 12 aren't used for generic attribs as those cannot be made vec4
493 (correspond to vertex normal/weight - maybe weight actually could be made vec4).
494 Additionally, not more than 12 arrays in total are possible I think.
495 attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
496 attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
497 attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
498 attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
499 */
500
501 /* attr 4,5 and 13 are only used with generic attribs.
502 Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
503 not possibe to use with vertex progs as it is lacking in vert prog specification) */
504 /* may look different when using idx buf / input_route instead of se_vtx_fmt? */
505 if (mesa_vp->Base.InputsRead & VERT_BIT_POS) {
506 vp->inputs[VERT_ATTRIB_POS] = 0;
507 vp->inputmap_rev[0] = VERT_ATTRIB_POS;
508 free_inputs &= ~(1 << 0);
509 array_count++;
510 }
511 if (mesa_vp->Base.InputsRead & VERT_BIT_WEIGHT) {
512 vp->inputs[VERT_ATTRIB_WEIGHT] = 12;
513 vp->inputmap_rev[1] = VERT_ATTRIB_WEIGHT;
514 array_count++;
515 }
516 if (mesa_vp->Base.InputsRead & VERT_BIT_NORMAL) {
517 vp->inputs[VERT_ATTRIB_NORMAL] = 1;
518 vp->inputmap_rev[2] = VERT_ATTRIB_NORMAL;
519 array_count++;
520 }
521 if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR0) {
522 vp->inputs[VERT_ATTRIB_COLOR0] = 2;
523 vp->inputmap_rev[4] = VERT_ATTRIB_COLOR0;
524 free_inputs &= ~(1 << 2);
525 array_count++;
526 }
527 if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR1) {
528 vp->inputs[VERT_ATTRIB_COLOR1] = 3;
529 vp->inputmap_rev[5] = VERT_ATTRIB_COLOR1;
530 free_inputs &= ~(1 << 3);
531 array_count++;
532 }
533 if (mesa_vp->Base.InputsRead & VERT_BIT_FOG) {
534 vp->inputs[VERT_ATTRIB_FOG] = 15; array_count++;
535 vp->inputmap_rev[3] = VERT_ATTRIB_FOG;
536 array_count++;
537 }
538 for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX5; i++) {
539 if (mesa_vp->Base.InputsRead & (1 << i)) {
540 vp->inputs[i] = i - VERT_ATTRIB_TEX0 + 6;
541 vp->inputmap_rev[8 + i - VERT_ATTRIB_TEX0] = i;
542 free_inputs &= ~(1 << (i - VERT_ATTRIB_TEX0 + 6));
543 array_count++;
544 }
545 }
546 /* using VERT_ATTRIB_TEX6/7 would be illegal */
547 /* completely ignore aliasing? */
548 for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) {
549 int j;
550 /* completely ignore aliasing? */
551 if (mesa_vp->Base.InputsRead & (1 << i)) {
552 array_count++;
553 if (array_count > 12) {
554 if (R200_DEBUG & RADEON_FALLBACKS) {
555 fprintf(stderr, "more than 12 attribs used in vert prog\n");
556 }
557 return GL_FALSE;
558 }
559 for (j = 0; j < 14; j++) {
560 /* will always find one due to limited array_count */
561 if (free_inputs & (1 << j)) {
562 free_inputs &= ~(1 << j);
563 vp->inputs[i] = j;
564 if (j == 0) vp->inputmap_rev[j] = i; /* mapped to pos */
565 else if (j < 12) vp->inputmap_rev[j + 2] = i; /* mapped to col/tex */
566 else vp->inputmap_rev[j + 1] = i; /* mapped to pos1 */
567 break;
568 }
569 }
570 }
571 }
572
573 if (!(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS))) {
574 if (R200_DEBUG & RADEON_FALLBACKS) {
575 fprintf(stderr, "can't handle vert prog without position output\n");
576 }
577 return GL_FALSE;
578 }
579 if (free_inputs & 1) {
580 if (R200_DEBUG & RADEON_FALLBACKS) {
581 fprintf(stderr, "can't handle vert prog without position input\n");
582 }
583 return GL_FALSE;
584 }
585
586 o_inst = vp->instr;
587 for (vpi = mesa_vp->Base.Instructions; vpi->Opcode != OPCODE_END; vpi++, o_inst++){
588 operands = op_operands(vpi->Opcode);
589 are_srcs_scalar = operands & SCALAR_FLAG;
590 operands &= OP_MASK;
591
592 for(i = 0; i < operands; i++) {
593 src[i] = vpi->SrcReg[i];
594 /* hack up default attrib values as per spec as swizzling.
595 normal, fog, secondary color. Crazy?
596 May need more if we don't submit vec4 elements? */
597 if (src[i].File == PROGRAM_INPUT) {
598 if (src[i].Index == VERT_ATTRIB_NORMAL) {
599 int j;
600 for (j = 0; j < 4; j++) {
601 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
602 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
603 src[i].Swizzle |= SWIZZLE_ONE << (j*3);
604 }
605 }
606 }
607 else if (src[i].Index == VERT_ATTRIB_COLOR1) {
608 int j;
609 for (j = 0; j < 4; j++) {
610 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
611 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
612 src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
613 }
614 }
615 }
616 else if (src[i].Index == VERT_ATTRIB_FOG) {
617 int j;
618 for (j = 0; j < 4; j++) {
619 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
620 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
621 src[i].Swizzle |= SWIZZLE_ONE << (j*3);
622 }
623 else if ((GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Y) ||
624 GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Z) {
625 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
626 src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
627 }
628 }
629 }
630 }
631 }
632
633 if(operands == 3){
634 if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
635 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
636 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
637 VSF_FLAG_ALL);
638
639 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
640 SWIZZLE_X, SWIZZLE_Y,
641 SWIZZLE_Z, SWIZZLE_W,
642 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4);
643
644 o_inst->src1 = ZERO_SRC_0;
645 o_inst->src2 = UNUSED_SRC_1;
646 o_inst++;
647
648 src[2].File = PROGRAM_TEMPORARY;
649 src[2].Index = u_temp_i;
650 src[2].RelAddr = 0;
651 u_temp_i--;
652 }
653 }
654
655 if(operands >= 2){
656 if( CMP_SRCS(src[1], src[0]) ){
657 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
658 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
659 VSF_FLAG_ALL);
660
661 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
662 SWIZZLE_X, SWIZZLE_Y,
663 SWIZZLE_Z, SWIZZLE_W,
664 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4);
665
666 o_inst->src1 = ZERO_SRC_0;
667 o_inst->src2 = UNUSED_SRC_1;
668 o_inst++;
669
670 src[0].File = PROGRAM_TEMPORARY;
671 src[0].Index = u_temp_i;
672 src[0].RelAddr = 0;
673 u_temp_i--;
674 }
675 }
676
677 dst = vpi->DstReg;
678 if (dst.File == PROGRAM_OUTPUT &&
679 dst.Index == VERT_RESULT_FOGC &&
680 dst.WriteMask & WRITEMASK_X) {
681 fog_temp_i = u_temp_i;
682 dst.File = PROGRAM_TEMPORARY;
683 dst.Index = fog_temp_i;
684 dofogfix = 1;
685 u_temp_i--;
686 }
687
688 /* These ops need special handling. */
689 switch(vpi->Opcode){
690 case OPCODE_POW:
691 /* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter).
692 So may need to insert additional instruction */
693 if ((src[0].File == src[1].File) &&
694 (src[0].Index == src[1].Index)) {
695 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
696 t_dst_mask(dst.WriteMask));
697 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
698 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
699 SWIZZLE_ZERO,
700 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
701 SWIZZLE_ZERO,
702 t_src_class(src[0].File),
703 src[0].Negate) | (src[0].RelAddr << 4);
704 o_inst->src1 = UNUSED_SRC_0;
705 o_inst->src2 = UNUSED_SRC_0;
706 }
707 else {
708 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
709 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
710 VSF_FLAG_ALL);
711 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
712 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
713 SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO,
714 t_src_class(src[0].File),
715 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
716 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
717 SWIZZLE_ZERO, SWIZZLE_ZERO,
718 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO,
719 t_src_class(src[1].File),
720 src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
721 o_inst->src2 = UNUSED_SRC_1;
722 o_inst++;
723
724 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
725 t_dst_mask(dst.WriteMask));
726 o_inst->src0 = MAKE_VSF_SOURCE(u_temp_i,
727 VSF_IN_COMPONENT_X,
728 VSF_IN_COMPONENT_Y,
729 VSF_IN_COMPONENT_Z,
730 VSF_IN_COMPONENT_W,
731 VSF_IN_CLASS_TMP,
732 VSF_FLAG_NONE);
733 o_inst->src1 = UNUSED_SRC_0;
734 o_inst->src2 = UNUSED_SRC_0;
735 u_temp_i--;
736 }
737 goto next;
738
739 case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
740 case OPCODE_SWZ:
741 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
742 t_dst_mask(dst.WriteMask));
743 o_inst->src0 = t_src(vp, &src[0]);
744 o_inst->src1 = ZERO_SRC_0;
745 o_inst->src2 = UNUSED_SRC_1;
746 goto next;
747
748 case OPCODE_MAD:
749 /* only 2 read ports into temp memory thus may need the macro op MAD_2
750 instead (requiring 2 clocks) if all inputs are in temp memory
751 (and, only if they actually reference 3 distinct temps) */
752 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
753 src[1].File == PROGRAM_TEMPORARY &&
754 src[2].File == PROGRAM_TEMPORARY &&
755 (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index)) &&
756 (((src[0].RelAddr << 8) | src[0].Index) != ((src[2].RelAddr << 8) | src[2].Index)) &&
757 (((src[1].RelAddr << 8) | src[1].Index) != ((src[2].RelAddr << 8) | src[2].Index))) ?
758 R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
759
760 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
761 t_dst_mask(dst.WriteMask));
762 o_inst->src0 = t_src(vp, &src[0]);
763 #if 0
764 if ((o_inst - vp->instr) == 31) {
765 /* fix up the broken vertex program of quake4 demo... */
766 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
767 SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X,
768 t_src_class(src[1].File),
769 src[1].Negate) | (src[1].RelAddr << 4);
770 o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
771 SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y,
772 t_src_class(src[1].File),
773 src[1].Negate) | (src[1].RelAddr << 4);
774 }
775 else {
776 o_inst->src1 = t_src(vp, &src[1]);
777 o_inst->src2 = t_src(vp, &src[2]);
778 }
779 #else
780 o_inst->src1 = t_src(vp, &src[1]);
781 o_inst->src2 = t_src(vp, &src[2]);
782 #endif
783 goto next;
784
785 case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
786 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
787 t_dst_mask(dst.WriteMask));
788
789 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
790 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
791 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
792 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
793 SWIZZLE_ZERO,
794 t_src_class(src[0].File),
795 src[0].Negate) | (src[0].RelAddr << 4);
796
797 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
798 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
799 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
800 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
801 SWIZZLE_ZERO,
802 t_src_class(src[1].File),
803 src[1].Negate) | (src[1].RelAddr << 4);
804
805 o_inst->src2 = UNUSED_SRC_1;
806 goto next;
807
808 case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
809 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
810 t_dst_mask(dst.WriteMask));
811
812 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
813 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
814 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
815 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
816 VSF_IN_COMPONENT_ONE,
817 t_src_class(src[0].File),
818 src[0].Negate) | (src[0].RelAddr << 4);
819 o_inst->src1 = t_src(vp, &src[1]);
820 o_inst->src2 = UNUSED_SRC_1;
821 goto next;
822
823 case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
824 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
825 t_dst_mask(dst.WriteMask));
826
827 o_inst->src0 = t_src(vp, &src[0]);
828 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
829 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
830 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
831 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
832 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
833 t_src_class(src[1].File),
834 (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
835 o_inst->src2 = UNUSED_SRC_1;
836 goto next;
837
838 case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
839 o_inst->op=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX, t_dst(&dst),
840 t_dst_mask(dst.WriteMask));
841
842 o_inst->src0=t_src(vp, &src[0]);
843 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
844 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
845 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
846 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
847 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
848 t_src_class(src[0].File),
849 (!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
850 o_inst->src2 = UNUSED_SRC_1;
851 goto next;
852
853 case OPCODE_FLR:
854 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
855 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
856
857 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_FRC,
858 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
859 t_dst_mask(dst.WriteMask));
860
861 o_inst->src0 = t_src(vp, &src[0]);
862 o_inst->src1 = UNUSED_SRC_0;
863 o_inst->src2 = UNUSED_SRC_1;
864 o_inst++;
865
866 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
867 t_dst_mask(dst.WriteMask));
868
869 o_inst->src0 = t_src(vp, &src[0]);
870 o_inst->src1 = MAKE_VSF_SOURCE(u_temp_i,
871 VSF_IN_COMPONENT_X,
872 VSF_IN_COMPONENT_Y,
873 VSF_IN_COMPONENT_Z,
874 VSF_IN_COMPONENT_W,
875 VSF_IN_CLASS_TMP,
876 /* Not 100% sure about this */
877 (!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
878
879 o_inst->src2 = UNUSED_SRC_0;
880 u_temp_i--;
881 goto next;
882
883 case OPCODE_XPD:
884 /* mul r0, r1.yzxw, r2.zxyw
885 mad r0, -r2.yzxw, r1.zxyw, r0
886 */
887 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
888 src[1].File == PROGRAM_TEMPORARY &&
889 (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index))) ?
890 R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
891
892 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
893 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
894 t_dst_mask(dst.WriteMask));
895
896 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
897 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
898 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
899 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
900 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
901 t_src_class(src[0].File),
902 src[0].Negate) | (src[0].RelAddr << 4);
903
904 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
905 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
906 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
907 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
908 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
909 t_src_class(src[1].File),
910 src[1].Negate) | (src[1].RelAddr << 4);
911
912 o_inst->src2 = UNUSED_SRC_1;
913 o_inst++;
914 u_temp_i--;
915
916 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
917 t_dst_mask(dst.WriteMask));
918
919 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
920 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
921 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
922 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
923 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
924 t_src_class(src[1].File),
925 (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
926
927 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
928 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
929 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
930 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
931 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
932 t_src_class(src[0].File),
933 src[0].Negate) | (src[0].RelAddr << 4);
934
935 o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1,
936 VSF_IN_COMPONENT_X,
937 VSF_IN_COMPONENT_Y,
938 VSF_IN_COMPONENT_Z,
939 VSF_IN_COMPONENT_W,
940 VSF_IN_CLASS_TMP,
941 VSF_FLAG_NONE);
942 goto next;
943
944 case OPCODE_END:
945 assert(0);
946 default:
947 break;
948 }
949
950 o_inst->op = MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst(&dst),
951 t_dst_mask(dst.WriteMask));
952
953 if(are_srcs_scalar){
954 switch(operands){
955 case 1:
956 o_inst->src0 = t_src_scalar(vp, &src[0]);
957 o_inst->src1 = UNUSED_SRC_0;
958 o_inst->src2 = UNUSED_SRC_1;
959 break;
960
961 case 2:
962 o_inst->src0 = t_src_scalar(vp, &src[0]);
963 o_inst->src1 = t_src_scalar(vp, &src[1]);
964 o_inst->src2 = UNUSED_SRC_1;
965 break;
966
967 case 3:
968 o_inst->src0 = t_src_scalar(vp, &src[0]);
969 o_inst->src1 = t_src_scalar(vp, &src[1]);
970 o_inst->src2 = t_src_scalar(vp, &src[2]);
971 break;
972
973 default:
974 fprintf(stderr, "illegal number of operands %lu\n", operands);
975 exit(-1);
976 break;
977 }
978 } else {
979 switch(operands){
980 case 1:
981 o_inst->src0 = t_src(vp, &src[0]);
982 o_inst->src1 = UNUSED_SRC_0;
983 o_inst->src2 = UNUSED_SRC_1;
984 break;
985
986 case 2:
987 o_inst->src0 = t_src(vp, &src[0]);
988 o_inst->src1 = t_src(vp, &src[1]);
989 o_inst->src2 = UNUSED_SRC_1;
990 break;
991
992 case 3:
993 o_inst->src0 = t_src(vp, &src[0]);
994 o_inst->src1 = t_src(vp, &src[1]);
995 o_inst->src2 = t_src(vp, &src[2]);
996 break;
997
998 default:
999 fprintf(stderr, "illegal number of operands %lu\n", operands);
1000 exit(-1);
1001 break;
1002 }
1003 }
1004 next:
1005
1006 if (dofogfix) {
1007 o_inst++;
1008 if (vp->fogmode == GL_EXP) {
1009 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1010 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1011 VSF_FLAG_X);
1012 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1013 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
1014 o_inst->src2 = UNUSED_SRC_1;
1015 o_inst++;
1016 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
1017 R200_VSF_OUT_CLASS_RESULT_FOGC,
1018 VSF_FLAG_X);
1019 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1020 o_inst->src1 = UNUSED_SRC_0;
1021 o_inst->src2 = UNUSED_SRC_1;
1022 }
1023 else if (vp->fogmode == GL_EXP2) {
1024 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1025 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1026 VSF_FLAG_X);
1027 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1028 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
1029 o_inst->src2 = UNUSED_SRC_1;
1030 o_inst++;
1031 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1032 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1033 VSF_FLAG_X);
1034 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1035 o_inst->src1 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1036 o_inst->src2 = UNUSED_SRC_1;
1037 o_inst++;
1038 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
1039 R200_VSF_OUT_CLASS_RESULT_FOGC,
1040 VSF_FLAG_X);
1041 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1042 o_inst->src1 = UNUSED_SRC_0;
1043 o_inst->src2 = UNUSED_SRC_1;
1044 }
1045 else { /* fogmode == GL_LINEAR */
1046 /* could do that with single op (dot) if using params like
1047 with fixed function pipeline fog */
1048 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
1049 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1050 VSF_FLAG_X);
1051 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1052 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, Z, Z, Z, Z, PARAM, NONE);
1053 o_inst->src2 = UNUSED_SRC_1;
1054 o_inst++;
1055 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1056 R200_VSF_OUT_CLASS_RESULT_FOGC,
1057 VSF_FLAG_X);
1058 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1059 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, W, W, W, W, PARAM, NONE);
1060 o_inst->src2 = UNUSED_SRC_1;
1061
1062 }
1063 dofogfix = 0;
1064 }
1065
1066 u_temp_used = (R200_VSF_MAX_TEMPS - 1) - u_temp_i;
1067 if (mesa_vp->Base.NumNativeTemporaries <
1068 (mesa_vp->Base.NumTemporaries + u_temp_used)) {
1069 mesa_vp->Base.NumNativeTemporaries =
1070 mesa_vp->Base.NumTemporaries + u_temp_used;
1071 }
1072 if ((mesa_vp->Base.NumTemporaries + u_temp_used) > R200_VSF_MAX_TEMPS) {
1073 if (R200_DEBUG & RADEON_FALLBACKS) {
1074 fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_used);
1075 }
1076 return GL_FALSE;
1077 }
1078 u_temp_i = R200_VSF_MAX_TEMPS - 1;
1079 if(o_inst - vp->instr >= R200_VSF_MAX_INST) {
1080 mesa_vp->Base.NumNativeInstructions = 129;
1081 if (R200_DEBUG & RADEON_FALLBACKS) {
1082 fprintf(stderr, "more than 128 native instructions\n");
1083 }
1084 return GL_FALSE;
1085 }
1086 if ((o_inst->op & R200_VSF_OUT_CLASS_MASK) == R200_VSF_OUT_CLASS_RESULT_POS) {
1087 vp->pos_end = (o_inst - vp->instr);
1088 }
1089 }
1090
1091 vp->native = GL_TRUE;
1092 mesa_vp->Base.NumNativeInstructions = (o_inst - vp->instr);
1093 #if 0
1094 fprintf(stderr, "hw program:\n");
1095 for(i=0; i < vp->program.length; i++)
1096 fprintf(stderr, "%08x\n", vp->instr[i]);
1097 #endif
1098 return GL_TRUE;
1099 }
1100
1101 void r200SetupVertexProg( GLcontext *ctx ) {
1102 r200ContextPtr rmesa = R200_CONTEXT(ctx);
1103 struct r200_vertex_program *vp = (struct r200_vertex_program *)ctx->VertexProgram.Current;
1104 GLboolean fallback;
1105 GLint i;
1106
1107 if (!vp->translated || (ctx->Fog.Enabled && ctx->Fog.Mode != vp->fogmode)) {
1108 rmesa->curr_vp_hw = NULL;
1109 r200_translate_vertex_program(ctx, vp);
1110 }
1111 /* could optimize setting up vertex progs away for non-tcl hw */
1112 fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp) &&
1113 rmesa->radeon.radeonScreen->drmSupportsVertexProgram);
1114 TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback);
1115 if (rmesa->radeon.TclFallback) return;
1116
1117 R200_STATECHANGE( rmesa, vap );
1118 /* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it?
1119 maybe only when using more than 64 inst / 96 param? */
1120 rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_PROG_VTX_SHADER_ENABLE /*| R200_VAP_SINGLE_BUF_STATE_ENABLE*/;
1121
1122 R200_STATECHANGE( rmesa, pvs );
1123
1124 rmesa->hw.pvs.cmd[PVS_CNTL_1] = (0 << R200_PVS_CNTL_1_PROGRAM_START_SHIFT) |
1125 ((vp->mesa_program.Base.NumNativeInstructions - 1) << R200_PVS_CNTL_1_PROGRAM_END_SHIFT) |
1126 (vp->pos_end << R200_PVS_CNTL_1_POS_END_SHIFT);
1127 rmesa->hw.pvs.cmd[PVS_CNTL_2] = (0 << R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT) |
1128 (vp->mesa_program.Base.NumNativeParameters << R200_PVS_CNTL_2_PARAM_COUNT_SHIFT);
1129
1130 /* maybe user clip planes just work with vertex progs... untested */
1131 if (ctx->Transform.ClipPlanesEnabled) {
1132 R200_STATECHANGE( rmesa, tcl );
1133 if (vp->mesa_program.IsPositionInvariant) {
1134 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (ctx->Transform.ClipPlanesEnabled << 2);
1135 }
1136 else {
1137 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(0xfc);
1138 }
1139 }
1140
1141 if (vp != rmesa->curr_vp_hw) {
1142 GLuint count = vp->mesa_program.Base.NumNativeInstructions;
1143 drm_radeon_cmd_header_t tmp;
1144
1145 R200_STATECHANGE( rmesa, vpi[0] );
1146 R200_STATECHANGE( rmesa, vpi[1] );
1147
1148 /* FIXME: what about using a memcopy... */
1149 for (i = 0; (i < 64) && i < count; i++) {
1150 rmesa->hw.vpi[0].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i].op;
1151 rmesa->hw.vpi[0].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i].src0;
1152 rmesa->hw.vpi[0].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i].src1;
1153 rmesa->hw.vpi[0].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i].src2;
1154 }
1155 /* hack up the cmd_size so not the whole state atom is emitted always.
1156 This may require some more thought, we may emit half progs on lost state, but
1157 hopefully it won't matter?
1158 WARNING: must not use R200_DB_STATECHANGE, this will produce bogus (and rejected)
1159 packet emits (due to the mismatched cmd_size and count in cmd/last_cmd) */
1160 rmesa->hw.vpi[0].cmd_size = 1 + 4 * ((count > 64) ? 64 : count);
1161 tmp.i = rmesa->hw.vpi[0].cmd[VPI_CMD_0];
1162 tmp.veclinear.count = (count > 64) ? 64 : count;
1163 rmesa->hw.vpi[0].cmd[VPI_CMD_0] = tmp.i;
1164 if (count > 64) {
1165 for (i = 0; i < (count - 64); i++) {
1166 rmesa->hw.vpi[1].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i + 64].op;
1167 rmesa->hw.vpi[1].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i + 64].src0;
1168 rmesa->hw.vpi[1].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i + 64].src1;
1169 rmesa->hw.vpi[1].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i + 64].src2;
1170 }
1171 rmesa->hw.vpi[1].cmd_size = 1 + 4 * (count - 64);
1172 tmp.i = rmesa->hw.vpi[1].cmd[VPI_CMD_0];
1173 tmp.veclinear.count = count - 64;
1174 rmesa->hw.vpi[1].cmd[VPI_CMD_0] = tmp.i;
1175 }
1176 rmesa->curr_vp_hw = vp;
1177 }
1178 }
1179
1180
1181 static void
1182 r200BindProgram(GLcontext *ctx, GLenum target, struct gl_program *prog)
1183 {
1184 r200ContextPtr rmesa = R200_CONTEXT(ctx);
1185
1186 switch(target){
1187 case GL_VERTEX_PROGRAM_ARB:
1188 rmesa->curr_vp_hw = NULL;
1189 break;
1190 default:
1191 _mesa_problem(ctx, "Target not supported yet!");
1192 break;
1193 }
1194 }
1195
1196 static struct gl_program *
1197 r200NewProgram(GLcontext *ctx, GLenum target, GLuint id)
1198 {
1199 struct r200_vertex_program *vp;
1200
1201 switch(target){
1202 case GL_VERTEX_PROGRAM_ARB:
1203 vp = CALLOC_STRUCT(r200_vertex_program);
1204 return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id);
1205 case GL_FRAGMENT_PROGRAM_ARB:
1206 case GL_FRAGMENT_PROGRAM_NV:
1207 return _mesa_init_fragment_program( ctx, CALLOC_STRUCT(gl_fragment_program), target, id );
1208 default:
1209 _mesa_problem(ctx, "Bad target in r200NewProgram");
1210 }
1211 return NULL;
1212 }
1213
1214
1215 static void
1216 r200DeleteProgram(GLcontext *ctx, struct gl_program *prog)
1217 {
1218 _mesa_delete_program(ctx, prog);
1219 }
1220
1221 static GLboolean
1222 r200ProgramStringNotify(GLcontext *ctx, GLenum target, struct gl_program *prog)
1223 {
1224 struct r200_vertex_program *vp = (void *)prog;
1225 r200ContextPtr rmesa = R200_CONTEXT(ctx);
1226
1227 switch(target) {
1228 case GL_VERTEX_PROGRAM_ARB:
1229 vp->translated = GL_FALSE;
1230 vp->fogpidx = 0;
1231 /* memset(&vp->translated, 0, sizeof(struct r200_vertex_program) - sizeof(struct gl_vertex_program));*/
1232 r200_translate_vertex_program(ctx, vp);
1233 rmesa->curr_vp_hw = NULL;
1234 break;
1235 case GL_FRAGMENT_SHADER_ATI:
1236 rmesa->afs_loaded = NULL;
1237 break;
1238 }
1239 /* need this for tcl fallbacks */
1240 (void) _tnl_program_string(ctx, target, prog);
1241
1242 /* XXX check if program is legal, within limits */
1243 return GL_TRUE;
1244 }
1245
1246 static GLboolean
1247 r200IsProgramNative(GLcontext *ctx, GLenum target, struct gl_program *prog)
1248 {
1249 struct r200_vertex_program *vp = (void *)prog;
1250
1251 switch(target){
1252 case GL_VERTEX_STATE_PROGRAM_NV:
1253 case GL_VERTEX_PROGRAM_ARB:
1254 if (!vp->translated) {
1255 r200_translate_vertex_program(ctx, vp);
1256 }
1257 /* does not take parameters etc. into account */
1258 return vp->native;
1259 default:
1260 _mesa_problem(ctx, "Bad target in r200NewProgram");
1261 }
1262 return 0;
1263 }
1264
1265 void r200InitShaderFuncs(struct dd_function_table *functions)
1266 {
1267 functions->NewProgram = r200NewProgram;
1268 functions->BindProgram = r200BindProgram;
1269 functions->DeleteProgram = r200DeleteProgram;
1270 functions->ProgramStringNotify = r200ProgramStringNotify;
1271 functions->IsProgramNative = r200IsProgramNative;
1272 }