f69f4a0a1e210b0804fa5d86ddb448eda16bb0e5
[mesa.git] / src / mesa / drivers / dri / r200 / r200_vertprog.c
1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Aapo Tahkola <aet@rasterburn.org>
31 * Roland Scheidegger <rscheidegger_lists@hispeed.ch>
32 */
33 #include "main/glheader.h"
34 #include "main/macros.h"
35 #include "main/enums.h"
36 #include "program/program.h"
37 #include "program/prog_instruction.h"
38 #include "program/prog_parameter.h"
39 #include "program/prog_statevars.h"
40 #include "program/programopt.h"
41 #include "tnl/tnl.h"
42
43 #include "r200_context.h"
44 #include "r200_vertprog.h"
45 #include "r200_ioctl.h"
46 #include "r200_tcl.h"
47
48 #if SWIZZLE_X != VSF_IN_COMPONENT_X || \
49 SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
50 SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
51 SWIZZLE_W != VSF_IN_COMPONENT_W || \
52 SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
53 SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
54 WRITEMASK_X != VSF_FLAG_X || \
55 WRITEMASK_Y != VSF_FLAG_Y || \
56 WRITEMASK_Z != VSF_FLAG_Z || \
57 WRITEMASK_W != VSF_FLAG_W
58 #error Cannot change these!
59 #endif
60
61 #define SCALAR_FLAG (1<<31)
62 #define FLAG_MASK (1<<31)
63 #define OP_MASK (0xf) /* we are unlikely to have more than 15 */
64 #define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
65
66 static struct{
67 char *name;
68 int opcode;
69 unsigned long ip; /* number of input operands and flags */
70 }op_names[]={
71 OPN(ABS, 1),
72 OPN(ADD, 2),
73 OPN(ARL, 1|SCALAR_FLAG),
74 OPN(DP3, 2),
75 OPN(DP4, 2),
76 OPN(DPH, 2),
77 OPN(DST, 2),
78 OPN(EX2, 1|SCALAR_FLAG),
79 OPN(EXP, 1|SCALAR_FLAG),
80 OPN(FLR, 1),
81 OPN(FRC, 1),
82 OPN(LG2, 1|SCALAR_FLAG),
83 OPN(LIT, 1),
84 OPN(LOG, 1|SCALAR_FLAG),
85 OPN(MAD, 3),
86 OPN(MAX, 2),
87 OPN(MIN, 2),
88 OPN(MOV, 1),
89 OPN(MUL, 2),
90 OPN(POW, 2|SCALAR_FLAG),
91 OPN(RCP, 1|SCALAR_FLAG),
92 OPN(RSQ, 1|SCALAR_FLAG),
93 OPN(SGE, 2),
94 OPN(SLT, 2),
95 OPN(SUB, 2),
96 OPN(SWZ, 1),
97 OPN(XPD, 2),
98 OPN(END, 0),
99 };
100 #undef OPN
101
102 static GLboolean r200VertexProgUpdateParams(struct gl_context *ctx, struct r200_vertex_program *vp)
103 {
104 r200ContextPtr rmesa = R200_CONTEXT( ctx );
105 GLfloat *fcmd = (GLfloat *)&rmesa->hw.vpp[0].cmd[VPP_CMD_0 + 1];
106 int pi;
107 struct gl_program *mesa_vp = &vp->mesa_program;
108 struct gl_program_parameter_list *paramList;
109 drm_radeon_cmd_header_t tmp;
110
111 R200_STATECHANGE( rmesa, vpp[0] );
112 R200_STATECHANGE( rmesa, vpp[1] );
113 assert(mesa_vp->Parameters);
114 _mesa_load_state_parameters(ctx, mesa_vp->Parameters);
115 paramList = mesa_vp->Parameters;
116
117 if(paramList->NumParameters > R200_VSF_MAX_PARAM){
118 fprintf(stderr, "%s:Params exhausted\n", __func__);
119 return GL_FALSE;
120 }
121
122 for(pi = 0; pi < paramList->NumParameters; pi++) {
123 unsigned pvo = paramList->ParameterValueOffset[pi];
124
125 switch(paramList->Parameters[pi].Type) {
126 case PROGRAM_STATE_VAR:
127 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
128 case PROGRAM_CONSTANT:
129 *fcmd++ = paramList->ParameterValues[pvo + 0].f;
130 *fcmd++ = paramList->ParameterValues[pvo + 1].f;
131 *fcmd++ = paramList->ParameterValues[pvo + 2].f;
132 *fcmd++ = paramList->ParameterValues[pvo + 3].f;
133 break;
134 default:
135 _mesa_problem(NULL, "Bad param type in %s", __func__);
136 break;
137 }
138 if (pi == 95) {
139 fcmd = (GLfloat *)&rmesa->hw.vpp[1].cmd[VPP_CMD_0 + 1];
140 }
141 }
142 /* hack up the cmd_size so not the whole state atom is emitted always. */
143 rmesa->hw.vpp[0].cmd_size =
144 1 + 4 * ((paramList->NumParameters > 96) ? 96 : paramList->NumParameters);
145 tmp.i = rmesa->hw.vpp[0].cmd[VPP_CMD_0];
146 tmp.veclinear.count = (paramList->NumParameters > 96) ? 96 : paramList->NumParameters;
147 rmesa->hw.vpp[0].cmd[VPP_CMD_0] = tmp.i;
148 if (paramList->NumParameters > 96) {
149 rmesa->hw.vpp[1].cmd_size = 1 + 4 * (paramList->NumParameters - 96);
150 tmp.i = rmesa->hw.vpp[1].cmd[VPP_CMD_0];
151 tmp.veclinear.count = paramList->NumParameters - 96;
152 rmesa->hw.vpp[1].cmd[VPP_CMD_0] = tmp.i;
153 }
154 return GL_TRUE;
155 }
156
157 static inline unsigned long t_dst_mask(GLuint mask)
158 {
159 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
160 return mask & VSF_FLAG_ALL;
161 }
162
163 static unsigned long t_dst(struct prog_dst_register *dst)
164 {
165 switch(dst->File) {
166 case PROGRAM_TEMPORARY:
167 return ((dst->Index << R200_VPI_OUT_REG_INDEX_SHIFT)
168 | R200_VSF_OUT_CLASS_TMP);
169 case PROGRAM_OUTPUT:
170 switch (dst->Index) {
171 case VARYING_SLOT_POS:
172 return R200_VSF_OUT_CLASS_RESULT_POS;
173 case VARYING_SLOT_COL0:
174 return R200_VSF_OUT_CLASS_RESULT_COLOR;
175 case VARYING_SLOT_COL1:
176 return ((1 << R200_VPI_OUT_REG_INDEX_SHIFT)
177 | R200_VSF_OUT_CLASS_RESULT_COLOR);
178 case VARYING_SLOT_FOGC:
179 return R200_VSF_OUT_CLASS_RESULT_FOGC;
180 case VARYING_SLOT_TEX0:
181 case VARYING_SLOT_TEX1:
182 case VARYING_SLOT_TEX2:
183 case VARYING_SLOT_TEX3:
184 case VARYING_SLOT_TEX4:
185 case VARYING_SLOT_TEX5:
186 return (((dst->Index - VARYING_SLOT_TEX0) << R200_VPI_OUT_REG_INDEX_SHIFT)
187 | R200_VSF_OUT_CLASS_RESULT_TEXC);
188 case VARYING_SLOT_PSIZ:
189 return R200_VSF_OUT_CLASS_RESULT_POINTSIZE;
190 default:
191 fprintf(stderr, "problem in %s, unknown dst output reg %d\n", __func__, dst->Index);
192 exit(0);
193 return 0;
194 }
195 case PROGRAM_ADDRESS:
196 assert (dst->Index == 0);
197 return R200_VSF_OUT_CLASS_ADDR;
198 default:
199 fprintf(stderr, "problem in %s, unknown register type %d\n", __func__, dst->File);
200 exit(0);
201 return 0;
202 }
203 }
204
205 static unsigned long t_src_class(gl_register_file file)
206 {
207
208 switch(file){
209 case PROGRAM_TEMPORARY:
210 return VSF_IN_CLASS_TMP;
211
212 case PROGRAM_INPUT:
213 return VSF_IN_CLASS_ATTR;
214
215 case PROGRAM_CONSTANT:
216 case PROGRAM_STATE_VAR:
217 return VSF_IN_CLASS_PARAM;
218 /*
219 case PROGRAM_OUTPUT:
220 case PROGRAM_ADDRESS:
221 */
222 default:
223 fprintf(stderr, "problem in %s", __func__);
224 exit(0);
225 }
226 }
227
228 static inline unsigned long t_swizzle(GLubyte swizzle)
229 {
230 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
231 return swizzle;
232 }
233
234 #if 0
235 static void vp_dump_inputs(struct r200_vertex_program *vp, char *caller)
236 {
237 int i;
238
239 if(vp == NULL){
240 fprintf(stderr, "vp null in call to %s from %s\n", __func__, caller);
241 return ;
242 }
243
244 fprintf(stderr, "%s:<", caller);
245 for(i=0; i < VERT_ATTRIB_MAX; i++)
246 fprintf(stderr, "%d ", vp->inputs[i]);
247 fprintf(stderr, ">\n");
248
249 }
250 #endif
251
252 static unsigned long t_src_index(struct r200_vertex_program *vp, struct prog_src_register *src)
253 {
254 /*
255 int i;
256 int max_reg = -1;
257 */
258 if(src->File == PROGRAM_INPUT){
259 /* if(vp->inputs[src->Index] != -1)
260 return vp->inputs[src->Index];
261
262 for(i=0; i < VERT_ATTRIB_MAX; i++)
263 if(vp->inputs[i] > max_reg)
264 max_reg = vp->inputs[i];
265
266 vp->inputs[src->Index] = max_reg+1;*/
267
268 //vp_dump_inputs(vp, __func__);
269 assert(vp->inputs[src->Index] != -1);
270 return vp->inputs[src->Index];
271 } else {
272 if (src->Index < 0) {
273 fprintf(stderr, "WARNING negative offsets for indirect addressing do not work\n");
274 return 0;
275 }
276 return src->Index;
277 }
278 }
279
280 static unsigned long t_src(struct r200_vertex_program *vp, struct prog_src_register *src)
281 {
282
283 return MAKE_VSF_SOURCE(t_src_index(vp, src),
284 t_swizzle(GET_SWZ(src->Swizzle, 0)),
285 t_swizzle(GET_SWZ(src->Swizzle, 1)),
286 t_swizzle(GET_SWZ(src->Swizzle, 2)),
287 t_swizzle(GET_SWZ(src->Swizzle, 3)),
288 t_src_class(src->File),
289 src->Negate) | (src->RelAddr << 4);
290 }
291
292 static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_src_register *src)
293 {
294
295 return MAKE_VSF_SOURCE(t_src_index(vp, src),
296 t_swizzle(GET_SWZ(src->Swizzle, 0)),
297 t_swizzle(GET_SWZ(src->Swizzle, 0)),
298 t_swizzle(GET_SWZ(src->Swizzle, 0)),
299 t_swizzle(GET_SWZ(src->Swizzle, 0)),
300 t_src_class(src->File),
301 src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4);
302 }
303
304 static unsigned long t_opcode(enum prog_opcode opcode)
305 {
306
307 switch(opcode){
308 case OPCODE_ADD: return R200_VPI_OUT_OP_ADD;
309 /* FIXME: ARL works fine, but negative offsets won't work - fglrx just
310 * seems to ignore neg offsets which isn't quite correct...
311 */
312 case OPCODE_ARL: return R200_VPI_OUT_OP_ARL;
313 case OPCODE_DP4: return R200_VPI_OUT_OP_DOT;
314 case OPCODE_DST: return R200_VPI_OUT_OP_DST;
315 case OPCODE_EX2: return R200_VPI_OUT_OP_EX2;
316 case OPCODE_EXP: return R200_VPI_OUT_OP_EXP;
317 case OPCODE_FRC: return R200_VPI_OUT_OP_FRC;
318 case OPCODE_LG2: return R200_VPI_OUT_OP_LG2;
319 case OPCODE_LIT: return R200_VPI_OUT_OP_LIT;
320 case OPCODE_LOG: return R200_VPI_OUT_OP_LOG;
321 case OPCODE_MAX: return R200_VPI_OUT_OP_MAX;
322 case OPCODE_MIN: return R200_VPI_OUT_OP_MIN;
323 case OPCODE_MUL: return R200_VPI_OUT_OP_MUL;
324 case OPCODE_RCP: return R200_VPI_OUT_OP_RCP;
325 case OPCODE_RSQ: return R200_VPI_OUT_OP_RSQ;
326 case OPCODE_SGE: return R200_VPI_OUT_OP_SGE;
327 case OPCODE_SLT: return R200_VPI_OUT_OP_SLT;
328
329 default:
330 fprintf(stderr, "%s: Should not be called with opcode %d!", __func__, opcode);
331 }
332 exit(-1);
333 return 0;
334 }
335
336 static unsigned long op_operands(enum prog_opcode opcode)
337 {
338 int i;
339
340 /* Can we trust mesas opcodes to be in order ? */
341 for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++)
342 if(op_names[i].opcode == opcode)
343 return op_names[i].ip;
344
345 fprintf(stderr, "op %d not found in op_names\n", opcode);
346 exit(-1);
347 return 0;
348 }
349
350 /* TODO: Get rid of t_src_class call */
351 #define CMP_SRCS(a, b) (((a.RelAddr != b.RelAddr) || (a.Index != b.Index)) && \
352 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
353 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
354 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
355 t_src_class(b.File) == VSF_IN_CLASS_ATTR)))
356
357 /* fglrx on rv250 codes up unused sources as follows:
358 unused but necessary sources are same as previous source, zero-ed out.
359 unnecessary sources are same as previous source but with VSF_IN_CLASS_NONE set.
360 i.e. an add (2 args) has its 2nd arg (if you use it as mov) zero-ed out, and 3rd arg
361 set to VSF_IN_CLASS_NONE. Not sure if strictly necessary. */
362
363 /* use these simpler definitions. Must obviously not be used with not yet set up regs.
364 Those are NOT semantically equivalent to the r300 ones, requires code changes */
365 #define ZERO_SRC_0 (((o_inst->src0 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
366 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
367 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
368 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
369 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
370
371 #define ZERO_SRC_1 (((o_inst->src1 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
372 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
373 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
374 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
375 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
376
377 #define ZERO_SRC_2 (((o_inst->src2 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
378 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
379 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
380 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
381 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
382
383 #define UNUSED_SRC_0 ((o_inst->src0 & ~15) | 9)
384
385 #define UNUSED_SRC_1 ((o_inst->src1 & ~15) | 9)
386
387 #define UNUSED_SRC_2 ((o_inst->src2 & ~15) | 9)
388
389
390 /**
391 * Generate an R200 vertex program from Mesa's internal representation.
392 *
393 * \return GL_TRUE for success, GL_FALSE for failure.
394 */
395 static GLboolean r200_translate_vertex_program(struct gl_context *ctx, struct r200_vertex_program *vp)
396 {
397 struct gl_program *mesa_vp = &vp->mesa_program;
398 struct prog_instruction *vpi;
399 int i;
400 VERTEX_SHADER_INSTRUCTION *o_inst;
401 unsigned long operands;
402 int are_srcs_scalar;
403 unsigned long hw_op;
404 int dofogfix = 0;
405 int fog_temp_i = 0;
406 int free_inputs;
407 int array_count = 0;
408 int u_temp_used;
409
410 vp->native = GL_FALSE;
411 vp->translated = GL_TRUE;
412 vp->fogmode = ctx->Fog.Mode;
413
414 if (mesa_vp->arb.NumInstructions == 0)
415 return GL_FALSE;
416
417 #if 0
418 if ((mesa_vp->info.inputs_read &
419 ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 |
420 VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 |
421 VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) != 0) {
422 if (R200_DEBUG & RADEON_FALLBACKS) {
423 fprintf(stderr, "can't handle vert prog inputs 0x%x\n",
424 mesa_vp->info.inputs_read);
425 }
426 return GL_FALSE;
427 }
428 #endif
429
430 if ((mesa_vp->info.outputs_written &
431 ~((1 << VARYING_SLOT_POS) | (1 << VARYING_SLOT_COL0) | (1 << VARYING_SLOT_COL1) |
432 (1 << VARYING_SLOT_FOGC) | (1 << VARYING_SLOT_TEX0) | (1 << VARYING_SLOT_TEX1) |
433 (1 << VARYING_SLOT_TEX2) | (1 << VARYING_SLOT_TEX3) | (1 << VARYING_SLOT_TEX4) |
434 (1 << VARYING_SLOT_TEX5) | (1 << VARYING_SLOT_PSIZ))) != 0) {
435 if (R200_DEBUG & RADEON_FALLBACKS) {
436 fprintf(stderr, "can't handle vert prog outputs 0x%llx\n",
437 (unsigned long long) mesa_vp->info.outputs_written);
438 }
439 return GL_FALSE;
440 }
441
442 /* Initial value should be last tmp reg that hw supports.
443 Strangely enough r300 doesnt mind even though these would be out of range.
444 Smart enough to realize that it doesnt need it? */
445 int u_temp_i = R200_VSF_MAX_TEMPS - 1;
446 struct prog_src_register src[3];
447 struct prog_dst_register dst;
448
449 /* FIXME: is changing the prog safe to do here? */
450 if (mesa_vp->arb.IsPositionInvariant &&
451 /* make sure we only do this once */
452 !(mesa_vp->info.outputs_written & (1 << VARYING_SLOT_POS))) {
453 _mesa_insert_mvp_code(ctx, mesa_vp);
454 }
455
456 /* for fogc, can't change mesa_vp, as it would hose swtnl, and exp with
457 base e isn't directly available neither. */
458 if ((mesa_vp->info.outputs_written & (1 << VARYING_SLOT_FOGC)) &&
459 !vp->fogpidx) {
460 struct gl_program_parameter_list *paramList;
461 gl_state_index16 tokens[STATE_LENGTH] = { STATE_FOG_PARAMS, 0, 0, 0, 0 };
462 paramList = mesa_vp->Parameters;
463 vp->fogpidx = _mesa_add_state_reference(paramList, tokens);
464 }
465
466 vp->pos_end = 0;
467 mesa_vp->arb.NumNativeInstructions = 0;
468 if (mesa_vp->Parameters)
469 mesa_vp->arb.NumNativeParameters = mesa_vp->Parameters->NumParameters;
470 else
471 mesa_vp->arb.NumNativeParameters = 0;
472
473 for(i = 0; i < VERT_ATTRIB_MAX; i++)
474 vp->inputs[i] = -1;
475 for(i = 0; i < 15; i++)
476 vp->inputmap_rev[i] = 255;
477 free_inputs = 0x2ffd;
478
479 /* fglrx uses fixed inputs as follows for conventional attribs.
480 generic attribs use non-fixed assignment, fglrx will always use the
481 lowest attrib values available. We'll just do the same.
482 There are 12 generic attribs possible, corresponding to attrib 0, 2-11
483 and 13 in a hw vertex prog.
484 attr 1 and 12 aren't used for generic attribs as those cannot be made vec4
485 (correspond to vertex normal/weight - maybe weight actually could be made vec4).
486 Additionally, not more than 12 arrays in total are possible I think.
487 attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
488 attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
489 attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
490 attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
491 */
492
493 /* attr 4,5 and 13 are only used with generic attribs.
494 Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
495 not possibe to use with vertex progs as it is lacking in vert prog specification) */
496 /* may look different when using idx buf / input_route instead of se_vtx_fmt? */
497 if (mesa_vp->info.inputs_read & VERT_BIT_POS) {
498 vp->inputs[VERT_ATTRIB_POS] = 0;
499 vp->inputmap_rev[0] = VERT_ATTRIB_POS;
500 free_inputs &= ~(1 << 0);
501 array_count++;
502 }
503 if (mesa_vp->info.inputs_read & VERT_BIT_NORMAL) {
504 vp->inputs[VERT_ATTRIB_NORMAL] = 1;
505 vp->inputmap_rev[2] = VERT_ATTRIB_NORMAL;
506 array_count++;
507 }
508 if (mesa_vp->info.inputs_read & VERT_BIT_COLOR0) {
509 vp->inputs[VERT_ATTRIB_COLOR0] = 2;
510 vp->inputmap_rev[4] = VERT_ATTRIB_COLOR0;
511 free_inputs &= ~(1 << 2);
512 array_count++;
513 }
514 if (mesa_vp->info.inputs_read & VERT_BIT_COLOR1) {
515 vp->inputs[VERT_ATTRIB_COLOR1] = 3;
516 vp->inputmap_rev[5] = VERT_ATTRIB_COLOR1;
517 free_inputs &= ~(1 << 3);
518 array_count++;
519 }
520 if (mesa_vp->info.inputs_read & VERT_BIT_FOG) {
521 vp->inputs[VERT_ATTRIB_FOG] = 15; array_count++;
522 vp->inputmap_rev[3] = VERT_ATTRIB_FOG;
523 array_count++;
524 }
525 /* VERT_ATTRIB_TEX0-5 */
526 for (i = 0; i <= 5; i++) {
527 if (mesa_vp->info.inputs_read & VERT_BIT_TEX(i)) {
528 vp->inputs[VERT_ATTRIB_TEX(i)] = i + 6;
529 vp->inputmap_rev[8 + i] = VERT_ATTRIB_TEX(i);
530 free_inputs &= ~(1 << (i + 6));
531 array_count++;
532 }
533 }
534 /* using VERT_ATTRIB_TEX6/7 would be illegal */
535 for (; i < VERT_ATTRIB_TEX_MAX; i++) {
536 if (mesa_vp->info.inputs_read & VERT_BIT_TEX(i)) {
537 if (R200_DEBUG & RADEON_FALLBACKS) {
538 fprintf(stderr, "texture attribute %d in vert prog\n", i);
539 }
540 return GL_FALSE;
541 }
542 }
543 /* completely ignore aliasing? */
544 for (i = 0; i < VERT_ATTRIB_GENERIC_MAX; i++) {
545 int j;
546 /* completely ignore aliasing? */
547 if (mesa_vp->info.inputs_read & VERT_BIT_GENERIC(i)) {
548 array_count++;
549 if (array_count > 12) {
550 if (R200_DEBUG & RADEON_FALLBACKS) {
551 fprintf(stderr, "more than 12 attribs used in vert prog\n");
552 }
553 return GL_FALSE;
554 }
555 for (j = 0; j < 14; j++) {
556 /* will always find one due to limited array_count */
557 if (free_inputs & (1 << j)) {
558 free_inputs &= ~(1 << j);
559 vp->inputs[VERT_ATTRIB_GENERIC(i)] = j;
560 if (j == 0) {
561 /* mapped to pos */
562 vp->inputmap_rev[j] = VERT_ATTRIB_GENERIC(i);
563 } else if (j < 12) {
564 /* mapped to col/tex */
565 vp->inputmap_rev[j + 2] = VERT_ATTRIB_GENERIC(i);
566 } else {
567 /* mapped to pos1 */
568 vp->inputmap_rev[j + 1] = VERT_ATTRIB_GENERIC(i);
569 }
570 break;
571 }
572 }
573 }
574 }
575
576 if (!(mesa_vp->info.outputs_written & (1 << VARYING_SLOT_POS))) {
577 if (R200_DEBUG & RADEON_FALLBACKS) {
578 fprintf(stderr, "can't handle vert prog without position output\n");
579 }
580 return GL_FALSE;
581 }
582 if (free_inputs & 1) {
583 if (R200_DEBUG & RADEON_FALLBACKS) {
584 fprintf(stderr, "can't handle vert prog without position input\n");
585 }
586 return GL_FALSE;
587 }
588
589 o_inst = vp->instr;
590 for (vpi = mesa_vp->arb.Instructions; vpi->Opcode != OPCODE_END; vpi++, o_inst++){
591 operands = op_operands(vpi->Opcode);
592 are_srcs_scalar = operands & SCALAR_FLAG;
593 operands &= OP_MASK;
594
595 for(i = 0; i < operands; i++) {
596 src[i] = vpi->SrcReg[i];
597 /* hack up default attrib values as per spec as swizzling.
598 normal, fog, secondary color. Crazy?
599 May need more if we don't submit vec4 elements? */
600 if (src[i].File == PROGRAM_INPUT) {
601 if (src[i].Index == VERT_ATTRIB_NORMAL) {
602 int j;
603 for (j = 0; j < 4; j++) {
604 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
605 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
606 src[i].Swizzle |= SWIZZLE_ONE << (j*3);
607 }
608 }
609 }
610 else if (src[i].Index == VERT_ATTRIB_COLOR1) {
611 int j;
612 for (j = 0; j < 4; j++) {
613 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
614 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
615 src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
616 }
617 }
618 }
619 else if (src[i].Index == VERT_ATTRIB_FOG) {
620 int j;
621 for (j = 0; j < 4; j++) {
622 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
623 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
624 src[i].Swizzle |= SWIZZLE_ONE << (j*3);
625 }
626 else if ((GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Y) ||
627 GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Z) {
628 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
629 src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
630 }
631 }
632 }
633 }
634 }
635
636 if(operands == 3){
637 if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
638 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
639 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
640 VSF_FLAG_ALL);
641
642 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
643 SWIZZLE_X, SWIZZLE_Y,
644 SWIZZLE_Z, SWIZZLE_W,
645 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4);
646
647 o_inst->src1 = ZERO_SRC_0;
648 o_inst->src2 = UNUSED_SRC_1;
649 o_inst++;
650
651 src[2].File = PROGRAM_TEMPORARY;
652 src[2].Index = u_temp_i;
653 src[2].RelAddr = 0;
654 u_temp_i--;
655 }
656 }
657
658 if(operands >= 2){
659 if( CMP_SRCS(src[1], src[0]) ){
660 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
661 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
662 VSF_FLAG_ALL);
663
664 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
665 SWIZZLE_X, SWIZZLE_Y,
666 SWIZZLE_Z, SWIZZLE_W,
667 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4);
668
669 o_inst->src1 = ZERO_SRC_0;
670 o_inst->src2 = UNUSED_SRC_1;
671 o_inst++;
672
673 src[0].File = PROGRAM_TEMPORARY;
674 src[0].Index = u_temp_i;
675 src[0].RelAddr = 0;
676 u_temp_i--;
677 }
678 }
679
680 dst = vpi->DstReg;
681 if (dst.File == PROGRAM_OUTPUT &&
682 dst.Index == VARYING_SLOT_FOGC &&
683 dst.WriteMask & WRITEMASK_X) {
684 fog_temp_i = u_temp_i;
685 dst.File = PROGRAM_TEMPORARY;
686 dst.Index = fog_temp_i;
687 dofogfix = 1;
688 u_temp_i--;
689 }
690
691 /* These ops need special handling. */
692 switch(vpi->Opcode){
693 case OPCODE_POW:
694 /* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter).
695 So may need to insert additional instruction */
696 if ((src[0].File == src[1].File) &&
697 (src[0].Index == src[1].Index)) {
698 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
699 t_dst_mask(dst.WriteMask));
700 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
701 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
702 SWIZZLE_ZERO,
703 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
704 SWIZZLE_ZERO,
705 t_src_class(src[0].File),
706 src[0].Negate) | (src[0].RelAddr << 4);
707 o_inst->src1 = UNUSED_SRC_0;
708 o_inst->src2 = UNUSED_SRC_0;
709 }
710 else {
711 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
712 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
713 VSF_FLAG_ALL);
714 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
715 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
716 SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO,
717 t_src_class(src[0].File),
718 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
719 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
720 SWIZZLE_ZERO, SWIZZLE_ZERO,
721 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO,
722 t_src_class(src[1].File),
723 src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
724 o_inst->src2 = UNUSED_SRC_1;
725 o_inst++;
726
727 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
728 t_dst_mask(dst.WriteMask));
729 o_inst->src0 = MAKE_VSF_SOURCE(u_temp_i,
730 VSF_IN_COMPONENT_X,
731 VSF_IN_COMPONENT_Y,
732 VSF_IN_COMPONENT_Z,
733 VSF_IN_COMPONENT_W,
734 VSF_IN_CLASS_TMP,
735 VSF_FLAG_NONE);
736 o_inst->src1 = UNUSED_SRC_0;
737 o_inst->src2 = UNUSED_SRC_0;
738 u_temp_i--;
739 }
740 goto next;
741
742 case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
743 case OPCODE_SWZ:
744 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
745 t_dst_mask(dst.WriteMask));
746 o_inst->src0 = t_src(vp, &src[0]);
747 o_inst->src1 = ZERO_SRC_0;
748 o_inst->src2 = UNUSED_SRC_1;
749 goto next;
750
751 case OPCODE_MAD:
752 /* only 2 read ports into temp memory thus may need the macro op MAD_2
753 instead (requiring 2 clocks) if all inputs are in temp memory
754 (and, only if they actually reference 3 distinct temps) */
755 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
756 src[1].File == PROGRAM_TEMPORARY &&
757 src[2].File == PROGRAM_TEMPORARY &&
758 (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index)) &&
759 (((src[0].RelAddr << 8) | src[0].Index) != ((src[2].RelAddr << 8) | src[2].Index)) &&
760 (((src[1].RelAddr << 8) | src[1].Index) != ((src[2].RelAddr << 8) | src[2].Index))) ?
761 R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
762
763 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
764 t_dst_mask(dst.WriteMask));
765 o_inst->src0 = t_src(vp, &src[0]);
766 #if 0
767 if ((o_inst - vp->instr) == 31) {
768 /* fix up the broken vertex program of quake4 demo... */
769 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
770 SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X,
771 t_src_class(src[1].File),
772 src[1].Negate) | (src[1].RelAddr << 4);
773 o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
774 SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y,
775 t_src_class(src[1].File),
776 src[1].Negate) | (src[1].RelAddr << 4);
777 }
778 else {
779 o_inst->src1 = t_src(vp, &src[1]);
780 o_inst->src2 = t_src(vp, &src[2]);
781 }
782 #else
783 o_inst->src1 = t_src(vp, &src[1]);
784 o_inst->src2 = t_src(vp, &src[2]);
785 #endif
786 goto next;
787
788 case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
789 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
790 t_dst_mask(dst.WriteMask));
791
792 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
793 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
794 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
795 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
796 SWIZZLE_ZERO,
797 t_src_class(src[0].File),
798 src[0].Negate) | (src[0].RelAddr << 4);
799
800 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
801 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
802 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
803 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
804 SWIZZLE_ZERO,
805 t_src_class(src[1].File),
806 src[1].Negate) | (src[1].RelAddr << 4);
807
808 o_inst->src2 = UNUSED_SRC_1;
809 goto next;
810
811 case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
812 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
813 t_dst_mask(dst.WriteMask));
814
815 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
816 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
817 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
818 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
819 VSF_IN_COMPONENT_ONE,
820 t_src_class(src[0].File),
821 src[0].Negate) | (src[0].RelAddr << 4);
822 o_inst->src1 = t_src(vp, &src[1]);
823 o_inst->src2 = UNUSED_SRC_1;
824 goto next;
825
826 case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
827 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
828 t_dst_mask(dst.WriteMask));
829
830 o_inst->src0 = t_src(vp, &src[0]);
831 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
832 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
833 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
834 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
835 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
836 t_src_class(src[1].File),
837 (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
838 o_inst->src2 = UNUSED_SRC_1;
839 goto next;
840
841 case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
842 o_inst->op=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX, t_dst(&dst),
843 t_dst_mask(dst.WriteMask));
844
845 o_inst->src0=t_src(vp, &src[0]);
846 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
847 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
848 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
849 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
850 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
851 t_src_class(src[0].File),
852 (!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
853 o_inst->src2 = UNUSED_SRC_1;
854 goto next;
855
856 case OPCODE_FLR:
857 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
858 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
859
860 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_FRC,
861 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
862 t_dst_mask(dst.WriteMask));
863
864 o_inst->src0 = t_src(vp, &src[0]);
865 o_inst->src1 = UNUSED_SRC_0;
866 o_inst->src2 = UNUSED_SRC_1;
867 o_inst++;
868
869 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
870 t_dst_mask(dst.WriteMask));
871
872 o_inst->src0 = t_src(vp, &src[0]);
873 o_inst->src1 = MAKE_VSF_SOURCE(u_temp_i,
874 VSF_IN_COMPONENT_X,
875 VSF_IN_COMPONENT_Y,
876 VSF_IN_COMPONENT_Z,
877 VSF_IN_COMPONENT_W,
878 VSF_IN_CLASS_TMP,
879 /* Not 100% sure about this */
880 (!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
881
882 o_inst->src2 = UNUSED_SRC_0;
883 u_temp_i--;
884 goto next;
885
886 case OPCODE_XPD:
887 /* mul r0, r1.yzxw, r2.zxyw
888 mad r0, -r2.yzxw, r1.zxyw, r0
889 */
890 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
891 src[1].File == PROGRAM_TEMPORARY &&
892 (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index))) ?
893 R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
894
895 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
896 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
897 t_dst_mask(dst.WriteMask));
898
899 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
900 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
901 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
902 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
903 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
904 t_src_class(src[0].File),
905 src[0].Negate) | (src[0].RelAddr << 4);
906
907 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
908 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
909 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
910 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
911 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
912 t_src_class(src[1].File),
913 src[1].Negate) | (src[1].RelAddr << 4);
914
915 o_inst->src2 = UNUSED_SRC_1;
916 o_inst++;
917 u_temp_i--;
918
919 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
920 t_dst_mask(dst.WriteMask));
921
922 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
923 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
924 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
925 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
926 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
927 t_src_class(src[1].File),
928 (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
929
930 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
931 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
932 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
933 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
934 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
935 t_src_class(src[0].File),
936 src[0].Negate) | (src[0].RelAddr << 4);
937
938 o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1,
939 VSF_IN_COMPONENT_X,
940 VSF_IN_COMPONENT_Y,
941 VSF_IN_COMPONENT_Z,
942 VSF_IN_COMPONENT_W,
943 VSF_IN_CLASS_TMP,
944 VSF_FLAG_NONE);
945 goto next;
946
947 case OPCODE_END:
948 assert(0);
949 default:
950 break;
951 }
952
953 o_inst->op = MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst(&dst),
954 t_dst_mask(dst.WriteMask));
955
956 if(are_srcs_scalar){
957 switch(operands){
958 case 1:
959 o_inst->src0 = t_src_scalar(vp, &src[0]);
960 o_inst->src1 = UNUSED_SRC_0;
961 o_inst->src2 = UNUSED_SRC_1;
962 break;
963
964 case 2:
965 o_inst->src0 = t_src_scalar(vp, &src[0]);
966 o_inst->src1 = t_src_scalar(vp, &src[1]);
967 o_inst->src2 = UNUSED_SRC_1;
968 break;
969
970 case 3:
971 o_inst->src0 = t_src_scalar(vp, &src[0]);
972 o_inst->src1 = t_src_scalar(vp, &src[1]);
973 o_inst->src2 = t_src_scalar(vp, &src[2]);
974 break;
975
976 default:
977 fprintf(stderr, "illegal number of operands %lu\n", operands);
978 exit(-1);
979 break;
980 }
981 } else {
982 switch(operands){
983 case 1:
984 o_inst->src0 = t_src(vp, &src[0]);
985 o_inst->src1 = UNUSED_SRC_0;
986 o_inst->src2 = UNUSED_SRC_1;
987 break;
988
989 case 2:
990 o_inst->src0 = t_src(vp, &src[0]);
991 o_inst->src1 = t_src(vp, &src[1]);
992 o_inst->src2 = UNUSED_SRC_1;
993 break;
994
995 case 3:
996 o_inst->src0 = t_src(vp, &src[0]);
997 o_inst->src1 = t_src(vp, &src[1]);
998 o_inst->src2 = t_src(vp, &src[2]);
999 break;
1000
1001 default:
1002 fprintf(stderr, "illegal number of operands %lu\n", operands);
1003 exit(-1);
1004 break;
1005 }
1006 }
1007 next:
1008
1009 if (dofogfix) {
1010 o_inst++;
1011 if (vp->fogmode == GL_EXP) {
1012 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1013 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1014 VSF_FLAG_X);
1015 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1016 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
1017 o_inst->src2 = UNUSED_SRC_1;
1018 o_inst++;
1019 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
1020 R200_VSF_OUT_CLASS_RESULT_FOGC,
1021 VSF_FLAG_X);
1022 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1023 o_inst->src1 = UNUSED_SRC_0;
1024 o_inst->src2 = UNUSED_SRC_1;
1025 }
1026 else if (vp->fogmode == GL_EXP2) {
1027 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1028 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1029 VSF_FLAG_X);
1030 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1031 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
1032 o_inst->src2 = UNUSED_SRC_1;
1033 o_inst++;
1034 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1035 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1036 VSF_FLAG_X);
1037 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1038 o_inst->src1 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1039 o_inst->src2 = UNUSED_SRC_1;
1040 o_inst++;
1041 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
1042 R200_VSF_OUT_CLASS_RESULT_FOGC,
1043 VSF_FLAG_X);
1044 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1045 o_inst->src1 = UNUSED_SRC_0;
1046 o_inst->src2 = UNUSED_SRC_1;
1047 }
1048 else { /* fogmode == GL_LINEAR */
1049 /* could do that with single op (dot) if using params like
1050 with fixed function pipeline fog */
1051 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
1052 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1053 VSF_FLAG_X);
1054 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1055 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, Z, Z, Z, Z, PARAM, NONE);
1056 o_inst->src2 = UNUSED_SRC_1;
1057 o_inst++;
1058 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1059 R200_VSF_OUT_CLASS_RESULT_FOGC,
1060 VSF_FLAG_X);
1061 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1062 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, W, W, W, W, PARAM, NONE);
1063 o_inst->src2 = UNUSED_SRC_1;
1064
1065 }
1066 dofogfix = 0;
1067 }
1068
1069 u_temp_used = (R200_VSF_MAX_TEMPS - 1) - u_temp_i;
1070 if (mesa_vp->arb.NumNativeTemporaries <
1071 (mesa_vp->arb.NumTemporaries + u_temp_used)) {
1072 mesa_vp->arb.NumNativeTemporaries =
1073 mesa_vp->arb.NumTemporaries + u_temp_used;
1074 }
1075 if ((mesa_vp->arb.NumTemporaries + u_temp_used) > R200_VSF_MAX_TEMPS) {
1076 if (R200_DEBUG & RADEON_FALLBACKS) {
1077 fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->arb.NumTemporaries, u_temp_used);
1078 }
1079 return GL_FALSE;
1080 }
1081 u_temp_i = R200_VSF_MAX_TEMPS - 1;
1082 if(o_inst - vp->instr >= R200_VSF_MAX_INST) {
1083 mesa_vp->arb.NumNativeInstructions = 129;
1084 if (R200_DEBUG & RADEON_FALLBACKS) {
1085 fprintf(stderr, "more than 128 native instructions\n");
1086 }
1087 return GL_FALSE;
1088 }
1089 if ((o_inst->op & R200_VSF_OUT_CLASS_MASK) == R200_VSF_OUT_CLASS_RESULT_POS) {
1090 vp->pos_end = (o_inst - vp->instr);
1091 }
1092 }
1093
1094 vp->native = GL_TRUE;
1095 mesa_vp->arb.NumNativeInstructions = (o_inst - vp->instr);
1096 #if 0
1097 fprintf(stderr, "hw program:\n");
1098 for(i=0; i < vp->program.length; i++)
1099 fprintf(stderr, "%08x\n", vp->instr[i]);
1100 #endif
1101 return GL_TRUE;
1102 }
1103
1104 void r200SetupVertexProg( struct gl_context *ctx ) {
1105 r200ContextPtr rmesa = R200_CONTEXT(ctx);
1106 struct r200_vertex_program *vp = (struct r200_vertex_program *)ctx->VertexProgram.Current;
1107 GLboolean fallback;
1108 GLint i;
1109
1110 if (!vp->translated || (ctx->Fog.Enabled && ctx->Fog.Mode != vp->fogmode)) {
1111 rmesa->curr_vp_hw = NULL;
1112 r200_translate_vertex_program(ctx, vp);
1113 }
1114 /* could optimize setting up vertex progs away for non-tcl hw */
1115 fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp));
1116 TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback);
1117 if (rmesa->radeon.TclFallback) return;
1118
1119 R200_STATECHANGE( rmesa, vap );
1120 /* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it?
1121 maybe only when using more than 64 inst / 96 param? */
1122 rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_PROG_VTX_SHADER_ENABLE /*| R200_VAP_SINGLE_BUF_STATE_ENABLE*/;
1123
1124 R200_STATECHANGE( rmesa, pvs );
1125
1126 rmesa->hw.pvs.cmd[PVS_CNTL_1] = (0 << R200_PVS_CNTL_1_PROGRAM_START_SHIFT) |
1127 ((vp->mesa_program.arb.NumNativeInstructions - 1) << R200_PVS_CNTL_1_PROGRAM_END_SHIFT) |
1128 (vp->pos_end << R200_PVS_CNTL_1_POS_END_SHIFT);
1129 rmesa->hw.pvs.cmd[PVS_CNTL_2] = (0 << R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT) |
1130 (vp->mesa_program.arb.NumNativeParameters << R200_PVS_CNTL_2_PARAM_COUNT_SHIFT);
1131
1132 /* maybe user clip planes just work with vertex progs... untested */
1133 if (ctx->Transform.ClipPlanesEnabled) {
1134 R200_STATECHANGE( rmesa, tcl );
1135 if (vp->mesa_program.arb.IsPositionInvariant) {
1136 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (ctx->Transform.ClipPlanesEnabled << 2);
1137 }
1138 else {
1139 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(0xfc);
1140 }
1141 }
1142
1143 if (vp != rmesa->curr_vp_hw) {
1144 GLuint count = vp->mesa_program.arb.NumNativeInstructions;
1145 drm_radeon_cmd_header_t tmp;
1146
1147 R200_STATECHANGE( rmesa, vpi[0] );
1148 R200_STATECHANGE( rmesa, vpi[1] );
1149
1150 /* FIXME: what about using a memcopy... */
1151 for (i = 0; (i < 64) && i < count; i++) {
1152 rmesa->hw.vpi[0].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i].op;
1153 rmesa->hw.vpi[0].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i].src0;
1154 rmesa->hw.vpi[0].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i].src1;
1155 rmesa->hw.vpi[0].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i].src2;
1156 }
1157 /* hack up the cmd_size so not the whole state atom is emitted always.
1158 This may require some more thought, we may emit half progs on lost state, but
1159 hopefully it won't matter?
1160 WARNING: must not use R200_DB_STATECHANGE, this will produce bogus (and rejected)
1161 packet emits (due to the mismatched cmd_size and count in cmd/last_cmd) */
1162 rmesa->hw.vpi[0].cmd_size = 1 + 4 * ((count > 64) ? 64 : count);
1163 tmp.i = rmesa->hw.vpi[0].cmd[VPI_CMD_0];
1164 tmp.veclinear.count = (count > 64) ? 64 : count;
1165 rmesa->hw.vpi[0].cmd[VPI_CMD_0] = tmp.i;
1166 if (count > 64) {
1167 for (i = 0; i < (count - 64); i++) {
1168 rmesa->hw.vpi[1].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i + 64].op;
1169 rmesa->hw.vpi[1].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i + 64].src0;
1170 rmesa->hw.vpi[1].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i + 64].src1;
1171 rmesa->hw.vpi[1].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i + 64].src2;
1172 }
1173 rmesa->hw.vpi[1].cmd_size = 1 + 4 * (count - 64);
1174 tmp.i = rmesa->hw.vpi[1].cmd[VPI_CMD_0];
1175 tmp.veclinear.count = count - 64;
1176 rmesa->hw.vpi[1].cmd[VPI_CMD_0] = tmp.i;
1177 }
1178 rmesa->curr_vp_hw = vp;
1179 }
1180 }
1181
1182
1183 static struct gl_program *
1184 r200NewProgram(struct gl_context *ctx, GLenum target, GLuint id,
1185 bool is_arb_asm)
1186 {
1187 switch(target){
1188 case GL_VERTEX_PROGRAM_ARB: {
1189 struct r200_vertex_program *vp = rzalloc(NULL,
1190 struct r200_vertex_program);
1191 return _mesa_init_gl_program(&vp->mesa_program, target, id, is_arb_asm);
1192 }
1193 case GL_FRAGMENT_PROGRAM_ARB: {
1194 struct gl_program *prog = rzalloc(NULL, struct gl_program);
1195 return _mesa_init_gl_program(prog, target, id, is_arb_asm);
1196 }
1197 default:
1198 _mesa_problem(ctx, "Bad target in r200NewProgram");
1199 return NULL;
1200 }
1201 }
1202
1203
1204 static void
1205 r200DeleteProgram(struct gl_context *ctx, struct gl_program *prog)
1206 {
1207 _mesa_delete_program(ctx, prog);
1208 }
1209
1210 static GLboolean
1211 r200ProgramStringNotify(struct gl_context *ctx, GLenum target, struct gl_program *prog)
1212 {
1213 struct r200_vertex_program *vp = (void *)prog;
1214 r200ContextPtr rmesa = R200_CONTEXT(ctx);
1215
1216 switch(target) {
1217 case GL_VERTEX_PROGRAM_ARB:
1218 vp->translated = GL_FALSE;
1219 vp->fogpidx = 0;
1220 /* memset(&vp->translated, 0, sizeof(struct r200_vertex_program) - sizeof(struct gl_program));*/
1221 r200_translate_vertex_program(ctx, vp);
1222 rmesa->curr_vp_hw = NULL;
1223 break;
1224 case GL_FRAGMENT_SHADER_ATI:
1225 rmesa->afs_loaded = NULL;
1226 break;
1227 }
1228 /* need this for tcl fallbacks */
1229 (void) _tnl_program_string(ctx, target, prog);
1230
1231 /* XXX check if program is legal, within limits */
1232 return GL_TRUE;
1233 }
1234
1235 static GLboolean
1236 r200IsProgramNative(struct gl_context *ctx, GLenum target, struct gl_program *prog)
1237 {
1238 struct r200_vertex_program *vp = (void *)prog;
1239
1240 switch(target){
1241 case GL_VERTEX_PROGRAM_ARB:
1242 if (!vp->translated) {
1243 r200_translate_vertex_program(ctx, vp);
1244 }
1245 /* does not take parameters etc. into account */
1246 return vp->native;
1247 default:
1248 _mesa_problem(ctx, "Bad target in r200NewProgram");
1249 }
1250 return 0;
1251 }
1252
1253 void r200InitShaderFuncs(struct dd_function_table *functions)
1254 {
1255 functions->NewProgram = r200NewProgram;
1256 functions->DeleteProgram = r200DeleteProgram;
1257 functions->ProgramStringNotify = r200ProgramStringNotify;
1258 functions->IsProgramNative = r200IsProgramNative;
1259 }