mesa: replace VP/FP/ATIfs _Enabled flags with helper functions
[mesa.git] / src / mesa / drivers / dri / r200 / r200_vertprog.c
1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Aapo Tahkola <aet@rasterburn.org>
31 * Roland Scheidegger <rscheidegger_lists@hispeed.ch>
32 */
33 #include "main/glheader.h"
34 #include "main/macros.h"
35 #include "main/enums.h"
36 #include "program/program.h"
37 #include "program/prog_instruction.h"
38 #include "program/prog_parameter.h"
39 #include "program/prog_statevars.h"
40 #include "program/programopt.h"
41 #include "tnl/tnl.h"
42
43 #include "r200_context.h"
44 #include "r200_vertprog.h"
45 #include "r200_ioctl.h"
46 #include "r200_tcl.h"
47
48 #if SWIZZLE_X != VSF_IN_COMPONENT_X || \
49 SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
50 SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
51 SWIZZLE_W != VSF_IN_COMPONENT_W || \
52 SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
53 SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
54 WRITEMASK_X != VSF_FLAG_X || \
55 WRITEMASK_Y != VSF_FLAG_Y || \
56 WRITEMASK_Z != VSF_FLAG_Z || \
57 WRITEMASK_W != VSF_FLAG_W
58 #error Cannot change these!
59 #endif
60
61 #define SCALAR_FLAG (1<<31)
62 #define FLAG_MASK (1<<31)
63 #define OP_MASK (0xf) /* we are unlikely to have more than 15 */
64 #define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
65
66 static struct{
67 char *name;
68 int opcode;
69 unsigned long ip; /* number of input operands and flags */
70 }op_names[]={
71 OPN(ABS, 1),
72 OPN(ADD, 2),
73 OPN(ARL, 1|SCALAR_FLAG),
74 OPN(DP3, 2),
75 OPN(DP4, 2),
76 OPN(DPH, 2),
77 OPN(DST, 2),
78 OPN(EX2, 1|SCALAR_FLAG),
79 OPN(EXP, 1|SCALAR_FLAG),
80 OPN(FLR, 1),
81 OPN(FRC, 1),
82 OPN(LG2, 1|SCALAR_FLAG),
83 OPN(LIT, 1),
84 OPN(LOG, 1|SCALAR_FLAG),
85 OPN(MAD, 3),
86 OPN(MAX, 2),
87 OPN(MIN, 2),
88 OPN(MOV, 1),
89 OPN(MUL, 2),
90 OPN(POW, 2|SCALAR_FLAG),
91 OPN(RCP, 1|SCALAR_FLAG),
92 OPN(RSQ, 1|SCALAR_FLAG),
93 OPN(SGE, 2),
94 OPN(SLT, 2),
95 OPN(SUB, 2),
96 OPN(SWZ, 1),
97 OPN(XPD, 2),
98 OPN(END, 0),
99 };
100 #undef OPN
101
102 static GLboolean r200VertexProgUpdateParams(struct gl_context *ctx, struct r200_vertex_program *vp)
103 {
104 r200ContextPtr rmesa = R200_CONTEXT( ctx );
105 GLfloat *fcmd = (GLfloat *)&rmesa->hw.vpp[0].cmd[VPP_CMD_0 + 1];
106 int pi;
107 struct gl_program *mesa_vp = &vp->mesa_program;
108 struct gl_program_parameter_list *paramList;
109 drm_radeon_cmd_header_t tmp;
110
111 R200_STATECHANGE( rmesa, vpp[0] );
112 R200_STATECHANGE( rmesa, vpp[1] );
113 assert(mesa_vp->Parameters);
114 _mesa_load_state_parameters(ctx, mesa_vp->Parameters);
115 paramList = mesa_vp->Parameters;
116
117 if(paramList->NumParameters > R200_VSF_MAX_PARAM){
118 fprintf(stderr, "%s:Params exhausted\n", __func__);
119 return GL_FALSE;
120 }
121
122 for(pi = 0; pi < paramList->NumParameters; pi++) {
123 switch(paramList->Parameters[pi].Type) {
124 case PROGRAM_STATE_VAR:
125 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
126 case PROGRAM_CONSTANT:
127 *fcmd++ = paramList->ParameterValues[pi][0].f;
128 *fcmd++ = paramList->ParameterValues[pi][1].f;
129 *fcmd++ = paramList->ParameterValues[pi][2].f;
130 *fcmd++ = paramList->ParameterValues[pi][3].f;
131 break;
132 default:
133 _mesa_problem(NULL, "Bad param type in %s", __func__);
134 break;
135 }
136 if (pi == 95) {
137 fcmd = (GLfloat *)&rmesa->hw.vpp[1].cmd[VPP_CMD_0 + 1];
138 }
139 }
140 /* hack up the cmd_size so not the whole state atom is emitted always. */
141 rmesa->hw.vpp[0].cmd_size =
142 1 + 4 * ((paramList->NumParameters > 96) ? 96 : paramList->NumParameters);
143 tmp.i = rmesa->hw.vpp[0].cmd[VPP_CMD_0];
144 tmp.veclinear.count = (paramList->NumParameters > 96) ? 96 : paramList->NumParameters;
145 rmesa->hw.vpp[0].cmd[VPP_CMD_0] = tmp.i;
146 if (paramList->NumParameters > 96) {
147 rmesa->hw.vpp[1].cmd_size = 1 + 4 * (paramList->NumParameters - 96);
148 tmp.i = rmesa->hw.vpp[1].cmd[VPP_CMD_0];
149 tmp.veclinear.count = paramList->NumParameters - 96;
150 rmesa->hw.vpp[1].cmd[VPP_CMD_0] = tmp.i;
151 }
152 return GL_TRUE;
153 }
154
155 static inline unsigned long t_dst_mask(GLuint mask)
156 {
157 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
158 return mask & VSF_FLAG_ALL;
159 }
160
161 static unsigned long t_dst(struct prog_dst_register *dst)
162 {
163 switch(dst->File) {
164 case PROGRAM_TEMPORARY:
165 return ((dst->Index << R200_VPI_OUT_REG_INDEX_SHIFT)
166 | R200_VSF_OUT_CLASS_TMP);
167 case PROGRAM_OUTPUT:
168 switch (dst->Index) {
169 case VARYING_SLOT_POS:
170 return R200_VSF_OUT_CLASS_RESULT_POS;
171 case VARYING_SLOT_COL0:
172 return R200_VSF_OUT_CLASS_RESULT_COLOR;
173 case VARYING_SLOT_COL1:
174 return ((1 << R200_VPI_OUT_REG_INDEX_SHIFT)
175 | R200_VSF_OUT_CLASS_RESULT_COLOR);
176 case VARYING_SLOT_FOGC:
177 return R200_VSF_OUT_CLASS_RESULT_FOGC;
178 case VARYING_SLOT_TEX0:
179 case VARYING_SLOT_TEX1:
180 case VARYING_SLOT_TEX2:
181 case VARYING_SLOT_TEX3:
182 case VARYING_SLOT_TEX4:
183 case VARYING_SLOT_TEX5:
184 return (((dst->Index - VARYING_SLOT_TEX0) << R200_VPI_OUT_REG_INDEX_SHIFT)
185 | R200_VSF_OUT_CLASS_RESULT_TEXC);
186 case VARYING_SLOT_PSIZ:
187 return R200_VSF_OUT_CLASS_RESULT_POINTSIZE;
188 default:
189 fprintf(stderr, "problem in %s, unknown dst output reg %d\n", __func__, dst->Index);
190 exit(0);
191 return 0;
192 }
193 case PROGRAM_ADDRESS:
194 assert (dst->Index == 0);
195 return R200_VSF_OUT_CLASS_ADDR;
196 default:
197 fprintf(stderr, "problem in %s, unknown register type %d\n", __func__, dst->File);
198 exit(0);
199 return 0;
200 }
201 }
202
203 static unsigned long t_src_class(gl_register_file file)
204 {
205
206 switch(file){
207 case PROGRAM_TEMPORARY:
208 return VSF_IN_CLASS_TMP;
209
210 case PROGRAM_INPUT:
211 return VSF_IN_CLASS_ATTR;
212
213 case PROGRAM_CONSTANT:
214 case PROGRAM_STATE_VAR:
215 return VSF_IN_CLASS_PARAM;
216 /*
217 case PROGRAM_OUTPUT:
218 case PROGRAM_ADDRESS:
219 */
220 default:
221 fprintf(stderr, "problem in %s", __func__);
222 exit(0);
223 }
224 }
225
226 static inline unsigned long t_swizzle(GLubyte swizzle)
227 {
228 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
229 return swizzle;
230 }
231
232 #if 0
233 static void vp_dump_inputs(struct r200_vertex_program *vp, char *caller)
234 {
235 int i;
236
237 if(vp == NULL){
238 fprintf(stderr, "vp null in call to %s from %s\n", __func__, caller);
239 return ;
240 }
241
242 fprintf(stderr, "%s:<", caller);
243 for(i=0; i < VERT_ATTRIB_MAX; i++)
244 fprintf(stderr, "%d ", vp->inputs[i]);
245 fprintf(stderr, ">\n");
246
247 }
248 #endif
249
250 static unsigned long t_src_index(struct r200_vertex_program *vp, struct prog_src_register *src)
251 {
252 /*
253 int i;
254 int max_reg = -1;
255 */
256 if(src->File == PROGRAM_INPUT){
257 /* if(vp->inputs[src->Index] != -1)
258 return vp->inputs[src->Index];
259
260 for(i=0; i < VERT_ATTRIB_MAX; i++)
261 if(vp->inputs[i] > max_reg)
262 max_reg = vp->inputs[i];
263
264 vp->inputs[src->Index] = max_reg+1;*/
265
266 //vp_dump_inputs(vp, __func__);
267 assert(vp->inputs[src->Index] != -1);
268 return vp->inputs[src->Index];
269 } else {
270 if (src->Index < 0) {
271 fprintf(stderr, "WARNING negative offsets for indirect addressing do not work\n");
272 return 0;
273 }
274 return src->Index;
275 }
276 }
277
278 static unsigned long t_src(struct r200_vertex_program *vp, struct prog_src_register *src)
279 {
280
281 return MAKE_VSF_SOURCE(t_src_index(vp, src),
282 t_swizzle(GET_SWZ(src->Swizzle, 0)),
283 t_swizzle(GET_SWZ(src->Swizzle, 1)),
284 t_swizzle(GET_SWZ(src->Swizzle, 2)),
285 t_swizzle(GET_SWZ(src->Swizzle, 3)),
286 t_src_class(src->File),
287 src->Negate) | (src->RelAddr << 4);
288 }
289
290 static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_src_register *src)
291 {
292
293 return MAKE_VSF_SOURCE(t_src_index(vp, src),
294 t_swizzle(GET_SWZ(src->Swizzle, 0)),
295 t_swizzle(GET_SWZ(src->Swizzle, 0)),
296 t_swizzle(GET_SWZ(src->Swizzle, 0)),
297 t_swizzle(GET_SWZ(src->Swizzle, 0)),
298 t_src_class(src->File),
299 src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4);
300 }
301
302 static unsigned long t_opcode(enum prog_opcode opcode)
303 {
304
305 switch(opcode){
306 case OPCODE_ADD: return R200_VPI_OUT_OP_ADD;
307 /* FIXME: ARL works fine, but negative offsets won't work - fglrx just
308 * seems to ignore neg offsets which isn't quite correct...
309 */
310 case OPCODE_ARL: return R200_VPI_OUT_OP_ARL;
311 case OPCODE_DP4: return R200_VPI_OUT_OP_DOT;
312 case OPCODE_DST: return R200_VPI_OUT_OP_DST;
313 case OPCODE_EX2: return R200_VPI_OUT_OP_EX2;
314 case OPCODE_EXP: return R200_VPI_OUT_OP_EXP;
315 case OPCODE_FRC: return R200_VPI_OUT_OP_FRC;
316 case OPCODE_LG2: return R200_VPI_OUT_OP_LG2;
317 case OPCODE_LIT: return R200_VPI_OUT_OP_LIT;
318 case OPCODE_LOG: return R200_VPI_OUT_OP_LOG;
319 case OPCODE_MAX: return R200_VPI_OUT_OP_MAX;
320 case OPCODE_MIN: return R200_VPI_OUT_OP_MIN;
321 case OPCODE_MUL: return R200_VPI_OUT_OP_MUL;
322 case OPCODE_RCP: return R200_VPI_OUT_OP_RCP;
323 case OPCODE_RSQ: return R200_VPI_OUT_OP_RSQ;
324 case OPCODE_SGE: return R200_VPI_OUT_OP_SGE;
325 case OPCODE_SLT: return R200_VPI_OUT_OP_SLT;
326
327 default:
328 fprintf(stderr, "%s: Should not be called with opcode %d!", __func__, opcode);
329 }
330 exit(-1);
331 return 0;
332 }
333
334 static unsigned long op_operands(enum prog_opcode opcode)
335 {
336 int i;
337
338 /* Can we trust mesas opcodes to be in order ? */
339 for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++)
340 if(op_names[i].opcode == opcode)
341 return op_names[i].ip;
342
343 fprintf(stderr, "op %d not found in op_names\n", opcode);
344 exit(-1);
345 return 0;
346 }
347
348 /* TODO: Get rid of t_src_class call */
349 #define CMP_SRCS(a, b) (((a.RelAddr != b.RelAddr) || (a.Index != b.Index)) && \
350 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
351 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
352 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
353 t_src_class(b.File) == VSF_IN_CLASS_ATTR)))
354
355 /* fglrx on rv250 codes up unused sources as follows:
356 unused but necessary sources are same as previous source, zero-ed out.
357 unnecessary sources are same as previous source but with VSF_IN_CLASS_NONE set.
358 i.e. an add (2 args) has its 2nd arg (if you use it as mov) zero-ed out, and 3rd arg
359 set to VSF_IN_CLASS_NONE. Not sure if strictly necessary. */
360
361 /* use these simpler definitions. Must obviously not be used with not yet set up regs.
362 Those are NOT semantically equivalent to the r300 ones, requires code changes */
363 #define ZERO_SRC_0 (((o_inst->src0 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
364 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
365 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
366 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
367 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
368
369 #define ZERO_SRC_1 (((o_inst->src1 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
370 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
371 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
372 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
373 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
374
375 #define ZERO_SRC_2 (((o_inst->src2 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
376 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
377 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
378 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
379 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
380
381 #define UNUSED_SRC_0 ((o_inst->src0 & ~15) | 9)
382
383 #define UNUSED_SRC_1 ((o_inst->src1 & ~15) | 9)
384
385 #define UNUSED_SRC_2 ((o_inst->src2 & ~15) | 9)
386
387
388 /**
389 * Generate an R200 vertex program from Mesa's internal representation.
390 *
391 * \return GL_TRUE for success, GL_FALSE for failure.
392 */
393 static GLboolean r200_translate_vertex_program(struct gl_context *ctx, struct r200_vertex_program *vp)
394 {
395 struct gl_program *mesa_vp = &vp->mesa_program;
396 struct prog_instruction *vpi;
397 int i;
398 VERTEX_SHADER_INSTRUCTION *o_inst;
399 unsigned long operands;
400 int are_srcs_scalar;
401 unsigned long hw_op;
402 int dofogfix = 0;
403 int fog_temp_i = 0;
404 int free_inputs;
405 int array_count = 0;
406 int u_temp_used;
407
408 vp->native = GL_FALSE;
409 vp->translated = GL_TRUE;
410 vp->fogmode = ctx->Fog.Mode;
411
412 if (mesa_vp->arb.NumInstructions == 0)
413 return GL_FALSE;
414
415 #if 0
416 if ((mesa_vp->info.inputs_read &
417 ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 |
418 VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 |
419 VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) != 0) {
420 if (R200_DEBUG & RADEON_FALLBACKS) {
421 fprintf(stderr, "can't handle vert prog inputs 0x%x\n",
422 mesa_vp->info.inputs_read);
423 }
424 return GL_FALSE;
425 }
426 #endif
427
428 if ((mesa_vp->info.outputs_written &
429 ~((1 << VARYING_SLOT_POS) | (1 << VARYING_SLOT_COL0) | (1 << VARYING_SLOT_COL1) |
430 (1 << VARYING_SLOT_FOGC) | (1 << VARYING_SLOT_TEX0) | (1 << VARYING_SLOT_TEX1) |
431 (1 << VARYING_SLOT_TEX2) | (1 << VARYING_SLOT_TEX3) | (1 << VARYING_SLOT_TEX4) |
432 (1 << VARYING_SLOT_TEX5) | (1 << VARYING_SLOT_PSIZ))) != 0) {
433 if (R200_DEBUG & RADEON_FALLBACKS) {
434 fprintf(stderr, "can't handle vert prog outputs 0x%llx\n",
435 (unsigned long long) mesa_vp->info.outputs_written);
436 }
437 return GL_FALSE;
438 }
439
440 /* Initial value should be last tmp reg that hw supports.
441 Strangely enough r300 doesnt mind even though these would be out of range.
442 Smart enough to realize that it doesnt need it? */
443 int u_temp_i = R200_VSF_MAX_TEMPS - 1;
444 struct prog_src_register src[3];
445 struct prog_dst_register dst;
446
447 /* FIXME: is changing the prog safe to do here? */
448 if (mesa_vp->arb.IsPositionInvariant &&
449 /* make sure we only do this once */
450 !(mesa_vp->info.outputs_written & (1 << VARYING_SLOT_POS))) {
451 _mesa_insert_mvp_code(ctx, mesa_vp);
452 }
453
454 /* for fogc, can't change mesa_vp, as it would hose swtnl, and exp with
455 base e isn't directly available neither. */
456 if ((mesa_vp->info.outputs_written & (1 << VARYING_SLOT_FOGC)) &&
457 !vp->fogpidx) {
458 struct gl_program_parameter_list *paramList;
459 gl_state_index tokens[STATE_LENGTH] = { STATE_FOG_PARAMS, 0, 0, 0, 0 };
460 paramList = mesa_vp->Parameters;
461 vp->fogpidx = _mesa_add_state_reference(paramList, tokens);
462 }
463
464 vp->pos_end = 0;
465 mesa_vp->arb.NumNativeInstructions = 0;
466 if (mesa_vp->Parameters)
467 mesa_vp->arb.NumNativeParameters = mesa_vp->Parameters->NumParameters;
468 else
469 mesa_vp->arb.NumNativeParameters = 0;
470
471 for(i = 0; i < VERT_ATTRIB_MAX; i++)
472 vp->inputs[i] = -1;
473 for(i = 0; i < 15; i++)
474 vp->inputmap_rev[i] = 255;
475 free_inputs = 0x2ffd;
476
477 /* fglrx uses fixed inputs as follows for conventional attribs.
478 generic attribs use non-fixed assignment, fglrx will always use the
479 lowest attrib values available. We'll just do the same.
480 There are 12 generic attribs possible, corresponding to attrib 0, 2-11
481 and 13 in a hw vertex prog.
482 attr 1 and 12 aren't used for generic attribs as those cannot be made vec4
483 (correspond to vertex normal/weight - maybe weight actually could be made vec4).
484 Additionally, not more than 12 arrays in total are possible I think.
485 attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
486 attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
487 attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
488 attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
489 */
490
491 /* attr 4,5 and 13 are only used with generic attribs.
492 Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
493 not possibe to use with vertex progs as it is lacking in vert prog specification) */
494 /* may look different when using idx buf / input_route instead of se_vtx_fmt? */
495 if (mesa_vp->info.inputs_read & VERT_BIT_POS) {
496 vp->inputs[VERT_ATTRIB_POS] = 0;
497 vp->inputmap_rev[0] = VERT_ATTRIB_POS;
498 free_inputs &= ~(1 << 0);
499 array_count++;
500 }
501 if (mesa_vp->info.inputs_read & VERT_BIT_WEIGHT) {
502 vp->inputs[VERT_ATTRIB_WEIGHT] = 12;
503 vp->inputmap_rev[1] = VERT_ATTRIB_WEIGHT;
504 array_count++;
505 }
506 if (mesa_vp->info.inputs_read & VERT_BIT_NORMAL) {
507 vp->inputs[VERT_ATTRIB_NORMAL] = 1;
508 vp->inputmap_rev[2] = VERT_ATTRIB_NORMAL;
509 array_count++;
510 }
511 if (mesa_vp->info.inputs_read & VERT_BIT_COLOR0) {
512 vp->inputs[VERT_ATTRIB_COLOR0] = 2;
513 vp->inputmap_rev[4] = VERT_ATTRIB_COLOR0;
514 free_inputs &= ~(1 << 2);
515 array_count++;
516 }
517 if (mesa_vp->info.inputs_read & VERT_BIT_COLOR1) {
518 vp->inputs[VERT_ATTRIB_COLOR1] = 3;
519 vp->inputmap_rev[5] = VERT_ATTRIB_COLOR1;
520 free_inputs &= ~(1 << 3);
521 array_count++;
522 }
523 if (mesa_vp->info.inputs_read & VERT_BIT_FOG) {
524 vp->inputs[VERT_ATTRIB_FOG] = 15; array_count++;
525 vp->inputmap_rev[3] = VERT_ATTRIB_FOG;
526 array_count++;
527 }
528 /* VERT_ATTRIB_TEX0-5 */
529 for (i = 0; i <= 5; i++) {
530 if (mesa_vp->info.inputs_read & VERT_BIT_TEX(i)) {
531 vp->inputs[VERT_ATTRIB_TEX(i)] = i + 6;
532 vp->inputmap_rev[8 + i] = VERT_ATTRIB_TEX(i);
533 free_inputs &= ~(1 << (i + 6));
534 array_count++;
535 }
536 }
537 /* using VERT_ATTRIB_TEX6/7 would be illegal */
538 for (; i < VERT_ATTRIB_TEX_MAX; i++) {
539 if (mesa_vp->info.inputs_read & VERT_BIT_TEX(i)) {
540 if (R200_DEBUG & RADEON_FALLBACKS) {
541 fprintf(stderr, "texture attribute %d in vert prog\n", i);
542 }
543 return GL_FALSE;
544 }
545 }
546 /* completely ignore aliasing? */
547 for (i = 0; i < VERT_ATTRIB_GENERIC_MAX; i++) {
548 int j;
549 /* completely ignore aliasing? */
550 if (mesa_vp->info.inputs_read & VERT_BIT_GENERIC(i)) {
551 array_count++;
552 if (array_count > 12) {
553 if (R200_DEBUG & RADEON_FALLBACKS) {
554 fprintf(stderr, "more than 12 attribs used in vert prog\n");
555 }
556 return GL_FALSE;
557 }
558 for (j = 0; j < 14; j++) {
559 /* will always find one due to limited array_count */
560 if (free_inputs & (1 << j)) {
561 free_inputs &= ~(1 << j);
562 vp->inputs[VERT_ATTRIB_GENERIC(i)] = j;
563 if (j == 0) {
564 /* mapped to pos */
565 vp->inputmap_rev[j] = VERT_ATTRIB_GENERIC(i);
566 } else if (j < 12) {
567 /* mapped to col/tex */
568 vp->inputmap_rev[j + 2] = VERT_ATTRIB_GENERIC(i);
569 } else {
570 /* mapped to pos1 */
571 vp->inputmap_rev[j + 1] = VERT_ATTRIB_GENERIC(i);
572 }
573 break;
574 }
575 }
576 }
577 }
578
579 if (!(mesa_vp->info.outputs_written & (1 << VARYING_SLOT_POS))) {
580 if (R200_DEBUG & RADEON_FALLBACKS) {
581 fprintf(stderr, "can't handle vert prog without position output\n");
582 }
583 return GL_FALSE;
584 }
585 if (free_inputs & 1) {
586 if (R200_DEBUG & RADEON_FALLBACKS) {
587 fprintf(stderr, "can't handle vert prog without position input\n");
588 }
589 return GL_FALSE;
590 }
591
592 o_inst = vp->instr;
593 for (vpi = mesa_vp->arb.Instructions; vpi->Opcode != OPCODE_END; vpi++, o_inst++){
594 operands = op_operands(vpi->Opcode);
595 are_srcs_scalar = operands & SCALAR_FLAG;
596 operands &= OP_MASK;
597
598 for(i = 0; i < operands; i++) {
599 src[i] = vpi->SrcReg[i];
600 /* hack up default attrib values as per spec as swizzling.
601 normal, fog, secondary color. Crazy?
602 May need more if we don't submit vec4 elements? */
603 if (src[i].File == PROGRAM_INPUT) {
604 if (src[i].Index == VERT_ATTRIB_NORMAL) {
605 int j;
606 for (j = 0; j < 4; j++) {
607 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
608 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
609 src[i].Swizzle |= SWIZZLE_ONE << (j*3);
610 }
611 }
612 }
613 else if (src[i].Index == VERT_ATTRIB_COLOR1) {
614 int j;
615 for (j = 0; j < 4; j++) {
616 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
617 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
618 src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
619 }
620 }
621 }
622 else if (src[i].Index == VERT_ATTRIB_FOG) {
623 int j;
624 for (j = 0; j < 4; j++) {
625 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
626 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
627 src[i].Swizzle |= SWIZZLE_ONE << (j*3);
628 }
629 else if ((GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Y) ||
630 GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Z) {
631 src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
632 src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
633 }
634 }
635 }
636 }
637 }
638
639 if(operands == 3){
640 if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
641 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
642 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
643 VSF_FLAG_ALL);
644
645 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
646 SWIZZLE_X, SWIZZLE_Y,
647 SWIZZLE_Z, SWIZZLE_W,
648 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4);
649
650 o_inst->src1 = ZERO_SRC_0;
651 o_inst->src2 = UNUSED_SRC_1;
652 o_inst++;
653
654 src[2].File = PROGRAM_TEMPORARY;
655 src[2].Index = u_temp_i;
656 src[2].RelAddr = 0;
657 u_temp_i--;
658 }
659 }
660
661 if(operands >= 2){
662 if( CMP_SRCS(src[1], src[0]) ){
663 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
664 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
665 VSF_FLAG_ALL);
666
667 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
668 SWIZZLE_X, SWIZZLE_Y,
669 SWIZZLE_Z, SWIZZLE_W,
670 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4);
671
672 o_inst->src1 = ZERO_SRC_0;
673 o_inst->src2 = UNUSED_SRC_1;
674 o_inst++;
675
676 src[0].File = PROGRAM_TEMPORARY;
677 src[0].Index = u_temp_i;
678 src[0].RelAddr = 0;
679 u_temp_i--;
680 }
681 }
682
683 dst = vpi->DstReg;
684 if (dst.File == PROGRAM_OUTPUT &&
685 dst.Index == VARYING_SLOT_FOGC &&
686 dst.WriteMask & WRITEMASK_X) {
687 fog_temp_i = u_temp_i;
688 dst.File = PROGRAM_TEMPORARY;
689 dst.Index = fog_temp_i;
690 dofogfix = 1;
691 u_temp_i--;
692 }
693
694 /* These ops need special handling. */
695 switch(vpi->Opcode){
696 case OPCODE_POW:
697 /* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter).
698 So may need to insert additional instruction */
699 if ((src[0].File == src[1].File) &&
700 (src[0].Index == src[1].Index)) {
701 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
702 t_dst_mask(dst.WriteMask));
703 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
704 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
705 SWIZZLE_ZERO,
706 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
707 SWIZZLE_ZERO,
708 t_src_class(src[0].File),
709 src[0].Negate) | (src[0].RelAddr << 4);
710 o_inst->src1 = UNUSED_SRC_0;
711 o_inst->src2 = UNUSED_SRC_0;
712 }
713 else {
714 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
715 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
716 VSF_FLAG_ALL);
717 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
718 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
719 SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO,
720 t_src_class(src[0].File),
721 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
722 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
723 SWIZZLE_ZERO, SWIZZLE_ZERO,
724 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO,
725 t_src_class(src[1].File),
726 src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
727 o_inst->src2 = UNUSED_SRC_1;
728 o_inst++;
729
730 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
731 t_dst_mask(dst.WriteMask));
732 o_inst->src0 = MAKE_VSF_SOURCE(u_temp_i,
733 VSF_IN_COMPONENT_X,
734 VSF_IN_COMPONENT_Y,
735 VSF_IN_COMPONENT_Z,
736 VSF_IN_COMPONENT_W,
737 VSF_IN_CLASS_TMP,
738 VSF_FLAG_NONE);
739 o_inst->src1 = UNUSED_SRC_0;
740 o_inst->src2 = UNUSED_SRC_0;
741 u_temp_i--;
742 }
743 goto next;
744
745 case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
746 case OPCODE_SWZ:
747 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
748 t_dst_mask(dst.WriteMask));
749 o_inst->src0 = t_src(vp, &src[0]);
750 o_inst->src1 = ZERO_SRC_0;
751 o_inst->src2 = UNUSED_SRC_1;
752 goto next;
753
754 case OPCODE_MAD:
755 /* only 2 read ports into temp memory thus may need the macro op MAD_2
756 instead (requiring 2 clocks) if all inputs are in temp memory
757 (and, only if they actually reference 3 distinct temps) */
758 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
759 src[1].File == PROGRAM_TEMPORARY &&
760 src[2].File == PROGRAM_TEMPORARY &&
761 (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index)) &&
762 (((src[0].RelAddr << 8) | src[0].Index) != ((src[2].RelAddr << 8) | src[2].Index)) &&
763 (((src[1].RelAddr << 8) | src[1].Index) != ((src[2].RelAddr << 8) | src[2].Index))) ?
764 R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
765
766 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
767 t_dst_mask(dst.WriteMask));
768 o_inst->src0 = t_src(vp, &src[0]);
769 #if 0
770 if ((o_inst - vp->instr) == 31) {
771 /* fix up the broken vertex program of quake4 demo... */
772 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
773 SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X,
774 t_src_class(src[1].File),
775 src[1].Negate) | (src[1].RelAddr << 4);
776 o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
777 SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y,
778 t_src_class(src[1].File),
779 src[1].Negate) | (src[1].RelAddr << 4);
780 }
781 else {
782 o_inst->src1 = t_src(vp, &src[1]);
783 o_inst->src2 = t_src(vp, &src[2]);
784 }
785 #else
786 o_inst->src1 = t_src(vp, &src[1]);
787 o_inst->src2 = t_src(vp, &src[2]);
788 #endif
789 goto next;
790
791 case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
792 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
793 t_dst_mask(dst.WriteMask));
794
795 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
796 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
797 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
798 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
799 SWIZZLE_ZERO,
800 t_src_class(src[0].File),
801 src[0].Negate) | (src[0].RelAddr << 4);
802
803 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
804 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
805 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
806 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
807 SWIZZLE_ZERO,
808 t_src_class(src[1].File),
809 src[1].Negate) | (src[1].RelAddr << 4);
810
811 o_inst->src2 = UNUSED_SRC_1;
812 goto next;
813
814 case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
815 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
816 t_dst_mask(dst.WriteMask));
817
818 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
819 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
820 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
821 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
822 VSF_IN_COMPONENT_ONE,
823 t_src_class(src[0].File),
824 src[0].Negate) | (src[0].RelAddr << 4);
825 o_inst->src1 = t_src(vp, &src[1]);
826 o_inst->src2 = UNUSED_SRC_1;
827 goto next;
828
829 case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
830 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
831 t_dst_mask(dst.WriteMask));
832
833 o_inst->src0 = t_src(vp, &src[0]);
834 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
835 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
836 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
837 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
838 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
839 t_src_class(src[1].File),
840 (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
841 o_inst->src2 = UNUSED_SRC_1;
842 goto next;
843
844 case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
845 o_inst->op=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX, t_dst(&dst),
846 t_dst_mask(dst.WriteMask));
847
848 o_inst->src0=t_src(vp, &src[0]);
849 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
850 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
851 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
852 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
853 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
854 t_src_class(src[0].File),
855 (!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
856 o_inst->src2 = UNUSED_SRC_1;
857 goto next;
858
859 case OPCODE_FLR:
860 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
861 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
862
863 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_FRC,
864 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
865 t_dst_mask(dst.WriteMask));
866
867 o_inst->src0 = t_src(vp, &src[0]);
868 o_inst->src1 = UNUSED_SRC_0;
869 o_inst->src2 = UNUSED_SRC_1;
870 o_inst++;
871
872 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
873 t_dst_mask(dst.WriteMask));
874
875 o_inst->src0 = t_src(vp, &src[0]);
876 o_inst->src1 = MAKE_VSF_SOURCE(u_temp_i,
877 VSF_IN_COMPONENT_X,
878 VSF_IN_COMPONENT_Y,
879 VSF_IN_COMPONENT_Z,
880 VSF_IN_COMPONENT_W,
881 VSF_IN_CLASS_TMP,
882 /* Not 100% sure about this */
883 (!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
884
885 o_inst->src2 = UNUSED_SRC_0;
886 u_temp_i--;
887 goto next;
888
889 case OPCODE_XPD:
890 /* mul r0, r1.yzxw, r2.zxyw
891 mad r0, -r2.yzxw, r1.zxyw, r0
892 */
893 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
894 src[1].File == PROGRAM_TEMPORARY &&
895 (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index))) ?
896 R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
897
898 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
899 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
900 t_dst_mask(dst.WriteMask));
901
902 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
903 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
904 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
905 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
906 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
907 t_src_class(src[0].File),
908 src[0].Negate) | (src[0].RelAddr << 4);
909
910 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
911 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
912 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
913 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
914 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
915 t_src_class(src[1].File),
916 src[1].Negate) | (src[1].RelAddr << 4);
917
918 o_inst->src2 = UNUSED_SRC_1;
919 o_inst++;
920 u_temp_i--;
921
922 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
923 t_dst_mask(dst.WriteMask));
924
925 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
926 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
927 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
928 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
929 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
930 t_src_class(src[1].File),
931 (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
932
933 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
934 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
935 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
936 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
937 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
938 t_src_class(src[0].File),
939 src[0].Negate) | (src[0].RelAddr << 4);
940
941 o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1,
942 VSF_IN_COMPONENT_X,
943 VSF_IN_COMPONENT_Y,
944 VSF_IN_COMPONENT_Z,
945 VSF_IN_COMPONENT_W,
946 VSF_IN_CLASS_TMP,
947 VSF_FLAG_NONE);
948 goto next;
949
950 case OPCODE_END:
951 assert(0);
952 default:
953 break;
954 }
955
956 o_inst->op = MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst(&dst),
957 t_dst_mask(dst.WriteMask));
958
959 if(are_srcs_scalar){
960 switch(operands){
961 case 1:
962 o_inst->src0 = t_src_scalar(vp, &src[0]);
963 o_inst->src1 = UNUSED_SRC_0;
964 o_inst->src2 = UNUSED_SRC_1;
965 break;
966
967 case 2:
968 o_inst->src0 = t_src_scalar(vp, &src[0]);
969 o_inst->src1 = t_src_scalar(vp, &src[1]);
970 o_inst->src2 = UNUSED_SRC_1;
971 break;
972
973 case 3:
974 o_inst->src0 = t_src_scalar(vp, &src[0]);
975 o_inst->src1 = t_src_scalar(vp, &src[1]);
976 o_inst->src2 = t_src_scalar(vp, &src[2]);
977 break;
978
979 default:
980 fprintf(stderr, "illegal number of operands %lu\n", operands);
981 exit(-1);
982 break;
983 }
984 } else {
985 switch(operands){
986 case 1:
987 o_inst->src0 = t_src(vp, &src[0]);
988 o_inst->src1 = UNUSED_SRC_0;
989 o_inst->src2 = UNUSED_SRC_1;
990 break;
991
992 case 2:
993 o_inst->src0 = t_src(vp, &src[0]);
994 o_inst->src1 = t_src(vp, &src[1]);
995 o_inst->src2 = UNUSED_SRC_1;
996 break;
997
998 case 3:
999 o_inst->src0 = t_src(vp, &src[0]);
1000 o_inst->src1 = t_src(vp, &src[1]);
1001 o_inst->src2 = t_src(vp, &src[2]);
1002 break;
1003
1004 default:
1005 fprintf(stderr, "illegal number of operands %lu\n", operands);
1006 exit(-1);
1007 break;
1008 }
1009 }
1010 next:
1011
1012 if (dofogfix) {
1013 o_inst++;
1014 if (vp->fogmode == GL_EXP) {
1015 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1016 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1017 VSF_FLAG_X);
1018 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1019 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
1020 o_inst->src2 = UNUSED_SRC_1;
1021 o_inst++;
1022 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
1023 R200_VSF_OUT_CLASS_RESULT_FOGC,
1024 VSF_FLAG_X);
1025 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1026 o_inst->src1 = UNUSED_SRC_0;
1027 o_inst->src2 = UNUSED_SRC_1;
1028 }
1029 else if (vp->fogmode == GL_EXP2) {
1030 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1031 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1032 VSF_FLAG_X);
1033 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1034 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
1035 o_inst->src2 = UNUSED_SRC_1;
1036 o_inst++;
1037 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1038 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1039 VSF_FLAG_X);
1040 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1041 o_inst->src1 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1042 o_inst->src2 = UNUSED_SRC_1;
1043 o_inst++;
1044 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
1045 R200_VSF_OUT_CLASS_RESULT_FOGC,
1046 VSF_FLAG_X);
1047 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1048 o_inst->src1 = UNUSED_SRC_0;
1049 o_inst->src2 = UNUSED_SRC_1;
1050 }
1051 else { /* fogmode == GL_LINEAR */
1052 /* could do that with single op (dot) if using params like
1053 with fixed function pipeline fog */
1054 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
1055 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
1056 VSF_FLAG_X);
1057 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
1058 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, Z, Z, Z, Z, PARAM, NONE);
1059 o_inst->src2 = UNUSED_SRC_1;
1060 o_inst++;
1061 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
1062 R200_VSF_OUT_CLASS_RESULT_FOGC,
1063 VSF_FLAG_X);
1064 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
1065 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, W, W, W, W, PARAM, NONE);
1066 o_inst->src2 = UNUSED_SRC_1;
1067
1068 }
1069 dofogfix = 0;
1070 }
1071
1072 u_temp_used = (R200_VSF_MAX_TEMPS - 1) - u_temp_i;
1073 if (mesa_vp->arb.NumNativeTemporaries <
1074 (mesa_vp->arb.NumTemporaries + u_temp_used)) {
1075 mesa_vp->arb.NumNativeTemporaries =
1076 mesa_vp->arb.NumTemporaries + u_temp_used;
1077 }
1078 if ((mesa_vp->arb.NumTemporaries + u_temp_used) > R200_VSF_MAX_TEMPS) {
1079 if (R200_DEBUG & RADEON_FALLBACKS) {
1080 fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->arb.NumTemporaries, u_temp_used);
1081 }
1082 return GL_FALSE;
1083 }
1084 u_temp_i = R200_VSF_MAX_TEMPS - 1;
1085 if(o_inst - vp->instr >= R200_VSF_MAX_INST) {
1086 mesa_vp->arb.NumNativeInstructions = 129;
1087 if (R200_DEBUG & RADEON_FALLBACKS) {
1088 fprintf(stderr, "more than 128 native instructions\n");
1089 }
1090 return GL_FALSE;
1091 }
1092 if ((o_inst->op & R200_VSF_OUT_CLASS_MASK) == R200_VSF_OUT_CLASS_RESULT_POS) {
1093 vp->pos_end = (o_inst - vp->instr);
1094 }
1095 }
1096
1097 vp->native = GL_TRUE;
1098 mesa_vp->arb.NumNativeInstructions = (o_inst - vp->instr);
1099 #if 0
1100 fprintf(stderr, "hw program:\n");
1101 for(i=0; i < vp->program.length; i++)
1102 fprintf(stderr, "%08x\n", vp->instr[i]);
1103 #endif
1104 return GL_TRUE;
1105 }
1106
1107 void r200SetupVertexProg( struct gl_context *ctx ) {
1108 r200ContextPtr rmesa = R200_CONTEXT(ctx);
1109 struct r200_vertex_program *vp = (struct r200_vertex_program *)ctx->VertexProgram.Current;
1110 GLboolean fallback;
1111 GLint i;
1112
1113 if (!vp->translated || (ctx->Fog.Enabled && ctx->Fog.Mode != vp->fogmode)) {
1114 rmesa->curr_vp_hw = NULL;
1115 r200_translate_vertex_program(ctx, vp);
1116 }
1117 /* could optimize setting up vertex progs away for non-tcl hw */
1118 fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp));
1119 TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback);
1120 if (rmesa->radeon.TclFallback) return;
1121
1122 R200_STATECHANGE( rmesa, vap );
1123 /* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it?
1124 maybe only when using more than 64 inst / 96 param? */
1125 rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_PROG_VTX_SHADER_ENABLE /*| R200_VAP_SINGLE_BUF_STATE_ENABLE*/;
1126
1127 R200_STATECHANGE( rmesa, pvs );
1128
1129 rmesa->hw.pvs.cmd[PVS_CNTL_1] = (0 << R200_PVS_CNTL_1_PROGRAM_START_SHIFT) |
1130 ((vp->mesa_program.arb.NumNativeInstructions - 1) << R200_PVS_CNTL_1_PROGRAM_END_SHIFT) |
1131 (vp->pos_end << R200_PVS_CNTL_1_POS_END_SHIFT);
1132 rmesa->hw.pvs.cmd[PVS_CNTL_2] = (0 << R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT) |
1133 (vp->mesa_program.arb.NumNativeParameters << R200_PVS_CNTL_2_PARAM_COUNT_SHIFT);
1134
1135 /* maybe user clip planes just work with vertex progs... untested */
1136 if (ctx->Transform.ClipPlanesEnabled) {
1137 R200_STATECHANGE( rmesa, tcl );
1138 if (vp->mesa_program.arb.IsPositionInvariant) {
1139 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (ctx->Transform.ClipPlanesEnabled << 2);
1140 }
1141 else {
1142 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(0xfc);
1143 }
1144 }
1145
1146 if (vp != rmesa->curr_vp_hw) {
1147 GLuint count = vp->mesa_program.arb.NumNativeInstructions;
1148 drm_radeon_cmd_header_t tmp;
1149
1150 R200_STATECHANGE( rmesa, vpi[0] );
1151 R200_STATECHANGE( rmesa, vpi[1] );
1152
1153 /* FIXME: what about using a memcopy... */
1154 for (i = 0; (i < 64) && i < count; i++) {
1155 rmesa->hw.vpi[0].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i].op;
1156 rmesa->hw.vpi[0].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i].src0;
1157 rmesa->hw.vpi[0].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i].src1;
1158 rmesa->hw.vpi[0].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i].src2;
1159 }
1160 /* hack up the cmd_size so not the whole state atom is emitted always.
1161 This may require some more thought, we may emit half progs on lost state, but
1162 hopefully it won't matter?
1163 WARNING: must not use R200_DB_STATECHANGE, this will produce bogus (and rejected)
1164 packet emits (due to the mismatched cmd_size and count in cmd/last_cmd) */
1165 rmesa->hw.vpi[0].cmd_size = 1 + 4 * ((count > 64) ? 64 : count);
1166 tmp.i = rmesa->hw.vpi[0].cmd[VPI_CMD_0];
1167 tmp.veclinear.count = (count > 64) ? 64 : count;
1168 rmesa->hw.vpi[0].cmd[VPI_CMD_0] = tmp.i;
1169 if (count > 64) {
1170 for (i = 0; i < (count - 64); i++) {
1171 rmesa->hw.vpi[1].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i + 64].op;
1172 rmesa->hw.vpi[1].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i + 64].src0;
1173 rmesa->hw.vpi[1].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i + 64].src1;
1174 rmesa->hw.vpi[1].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i + 64].src2;
1175 }
1176 rmesa->hw.vpi[1].cmd_size = 1 + 4 * (count - 64);
1177 tmp.i = rmesa->hw.vpi[1].cmd[VPI_CMD_0];
1178 tmp.veclinear.count = count - 64;
1179 rmesa->hw.vpi[1].cmd[VPI_CMD_0] = tmp.i;
1180 }
1181 rmesa->curr_vp_hw = vp;
1182 }
1183 }
1184
1185
1186 static struct gl_program *
1187 r200NewProgram(struct gl_context *ctx, GLenum target, GLuint id,
1188 bool is_arb_asm)
1189 {
1190 switch(target){
1191 case GL_VERTEX_PROGRAM_ARB: {
1192 struct r200_vertex_program *vp = rzalloc(NULL,
1193 struct r200_vertex_program);
1194 return _mesa_init_gl_program(&vp->mesa_program, target, id, is_arb_asm);
1195 }
1196 case GL_FRAGMENT_PROGRAM_ARB: {
1197 struct gl_program *prog = rzalloc(NULL, struct gl_program);
1198 return _mesa_init_gl_program(prog, target, id, is_arb_asm);
1199 }
1200 default:
1201 _mesa_problem(ctx, "Bad target in r200NewProgram");
1202 return NULL;
1203 }
1204 }
1205
1206
1207 static void
1208 r200DeleteProgram(struct gl_context *ctx, struct gl_program *prog)
1209 {
1210 _mesa_delete_program(ctx, prog);
1211 }
1212
1213 static GLboolean
1214 r200ProgramStringNotify(struct gl_context *ctx, GLenum target, struct gl_program *prog)
1215 {
1216 struct r200_vertex_program *vp = (void *)prog;
1217 r200ContextPtr rmesa = R200_CONTEXT(ctx);
1218
1219 switch(target) {
1220 case GL_VERTEX_PROGRAM_ARB:
1221 vp->translated = GL_FALSE;
1222 vp->fogpidx = 0;
1223 /* memset(&vp->translated, 0, sizeof(struct r200_vertex_program) - sizeof(struct gl_program));*/
1224 r200_translate_vertex_program(ctx, vp);
1225 rmesa->curr_vp_hw = NULL;
1226 break;
1227 case GL_FRAGMENT_SHADER_ATI:
1228 rmesa->afs_loaded = NULL;
1229 break;
1230 }
1231 /* need this for tcl fallbacks */
1232 (void) _tnl_program_string(ctx, target, prog);
1233
1234 /* XXX check if program is legal, within limits */
1235 return GL_TRUE;
1236 }
1237
1238 static GLboolean
1239 r200IsProgramNative(struct gl_context *ctx, GLenum target, struct gl_program *prog)
1240 {
1241 struct r200_vertex_program *vp = (void *)prog;
1242
1243 switch(target){
1244 case GL_VERTEX_PROGRAM_ARB:
1245 if (!vp->translated) {
1246 r200_translate_vertex_program(ctx, vp);
1247 }
1248 /* does not take parameters etc. into account */
1249 return vp->native;
1250 default:
1251 _mesa_problem(ctx, "Bad target in r200NewProgram");
1252 }
1253 return 0;
1254 }
1255
1256 void r200InitShaderFuncs(struct dd_function_table *functions)
1257 {
1258 functions->NewProgram = r200NewProgram;
1259 functions->DeleteProgram = r200DeleteProgram;
1260 functions->ProgramStringNotify = r200ProgramStringNotify;
1261 functions->IsProgramNative = r200IsProgramNative;
1262 }