Fix a refcounting mistake with first_swap_fence.
[mesa.git] / src / mesa / drivers / dri / r300 / r300_vertprog.c
1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Aapo Tahkola <aet@rasterburn.org>
31 */
32 #include "glheader.h"
33 #include "macros.h"
34 #include "enums.h"
35 #include "program.h"
36 #include "shader/prog_instruction.h"
37 #include "shader/prog_parameter.h"
38 #include "shader/prog_statevars.h"
39 #include "tnl/tnl.h"
40
41 #include "r300_context.h"
42 #include "r300_program.h"
43
44 #if SWIZZLE_X != VSF_IN_COMPONENT_X || \
45 SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
46 SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
47 SWIZZLE_W != VSF_IN_COMPONENT_W || \
48 SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
49 SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
50 WRITEMASK_X != VSF_FLAG_X || \
51 WRITEMASK_Y != VSF_FLAG_Y || \
52 WRITEMASK_Z != VSF_FLAG_Z || \
53 WRITEMASK_W != VSF_FLAG_W
54 #error Cannot change these!
55 #endif
56
57 #define SCALAR_FLAG (1<<31)
58 #define FLAG_MASK (1<<31)
59 #define OP_MASK (0xf) /* we are unlikely to have more than 15 */
60 #define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
61
62 static struct{
63 char *name;
64 int opcode;
65 unsigned long ip; /* number of input operands and flags */
66 }op_names[]={
67 OPN(ABS, 1),
68 OPN(ADD, 2),
69 OPN(ARL, 1|SCALAR_FLAG),
70 OPN(DP3, 2),
71 OPN(DP4, 2),
72 OPN(DPH, 2),
73 OPN(DST, 2),
74 OPN(EX2, 1|SCALAR_FLAG),
75 OPN(EXP, 1|SCALAR_FLAG),
76 OPN(FLR, 1),
77 OPN(FRC, 1),
78 OPN(LG2, 1|SCALAR_FLAG),
79 OPN(LIT, 1),
80 OPN(LOG, 1|SCALAR_FLAG),
81 OPN(MAD, 3),
82 OPN(MAX, 2),
83 OPN(MIN, 2),
84 OPN(MOV, 1),
85 OPN(MUL, 2),
86 OPN(POW, 2|SCALAR_FLAG),
87 OPN(RCP, 1|SCALAR_FLAG),
88 OPN(RSQ, 1|SCALAR_FLAG),
89 OPN(SGE, 2),
90 OPN(SLT, 2),
91 OPN(SUB, 2),
92 OPN(SWZ, 1),
93 OPN(XPD, 2),
94 OPN(RCC, 0), //extra
95 OPN(PRINT, 0),
96 OPN(END, 0),
97 };
98 #undef OPN
99
100 int r300VertexProgUpdateParams(GLcontext *ctx, struct r300_vertex_program_cont *vp, float *dst)
101 {
102 int pi;
103 struct gl_vertex_program *mesa_vp = &vp->mesa_program;
104 float *dst_o=dst;
105 struct gl_program_parameter_list *paramList;
106
107 if (mesa_vp->IsNVProgram) {
108 _mesa_load_tracked_matrices(ctx);
109
110 for (pi=0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) {
111 *dst++=ctx->VertexProgram.Parameters[pi][0];
112 *dst++=ctx->VertexProgram.Parameters[pi][1];
113 *dst++=ctx->VertexProgram.Parameters[pi][2];
114 *dst++=ctx->VertexProgram.Parameters[pi][3];
115 }
116 return dst - dst_o;
117 }
118
119 assert(mesa_vp->Base.Parameters);
120 _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
121
122 if(mesa_vp->Base.Parameters->NumParameters * 4 > VSF_MAX_FRAGMENT_LENGTH){
123 fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
124 exit(-1);
125 }
126
127 paramList = mesa_vp->Base.Parameters;
128 for(pi=0; pi < paramList->NumParameters; pi++){
129 switch(paramList->Parameters[pi].Type){
130
131 case PROGRAM_STATE_VAR:
132 case PROGRAM_NAMED_PARAM:
133 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
134 case PROGRAM_CONSTANT:
135 *dst++=paramList->ParameterValues[pi][0];
136 *dst++=paramList->ParameterValues[pi][1];
137 *dst++=paramList->ParameterValues[pi][2];
138 *dst++=paramList->ParameterValues[pi][3];
139 break;
140
141 default: _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__);
142 }
143
144 }
145
146 return dst - dst_o;
147 }
148
149 static unsigned long t_dst_mask(GLuint mask)
150 {
151 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
152 return mask & VSF_FLAG_ALL;
153 }
154
155 static unsigned long t_dst_class(enum register_file file)
156 {
157
158 switch(file){
159 case PROGRAM_TEMPORARY:
160 return VSF_OUT_CLASS_TMP;
161 case PROGRAM_OUTPUT:
162 return VSF_OUT_CLASS_RESULT;
163 case PROGRAM_ADDRESS:
164 return VSF_OUT_CLASS_ADDR;
165 /*
166 case PROGRAM_INPUT:
167 case PROGRAM_LOCAL_PARAM:
168 case PROGRAM_ENV_PARAM:
169 case PROGRAM_NAMED_PARAM:
170 case PROGRAM_STATE_VAR:
171 case PROGRAM_WRITE_ONLY:
172 case PROGRAM_ADDRESS:
173 */
174 default:
175 fprintf(stderr, "problem in %s", __FUNCTION__);
176 exit(0);
177 }
178 }
179
180 static unsigned long t_dst_index(struct r300_vertex_program *vp, struct prog_dst_register *dst)
181 {
182 if(dst->File == PROGRAM_OUTPUT)
183 return vp->outputs[dst->Index];
184
185 return dst->Index;
186 }
187
188 static unsigned long t_src_class(enum register_file file)
189 {
190
191 switch(file){
192 case PROGRAM_TEMPORARY:
193 return VSF_IN_CLASS_TMP;
194
195 case PROGRAM_INPUT:
196 return VSF_IN_CLASS_ATTR;
197
198 case PROGRAM_LOCAL_PARAM:
199 case PROGRAM_ENV_PARAM:
200 case PROGRAM_NAMED_PARAM:
201 case PROGRAM_STATE_VAR:
202 return VSF_IN_CLASS_PARAM;
203 /*
204 case PROGRAM_OUTPUT:
205 case PROGRAM_WRITE_ONLY:
206 case PROGRAM_ADDRESS:
207 */
208 default:
209 fprintf(stderr, "problem in %s", __FUNCTION__);
210 exit(0);
211 }
212 }
213
214 static __inline unsigned long t_swizzle(GLubyte swizzle)
215 {
216 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
217 return swizzle;
218 }
219
220 #if 0
221 static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller)
222 {
223 int i;
224
225 if(vp == NULL){
226 fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, caller);
227 return ;
228 }
229
230 fprintf(stderr, "%s:<", caller);
231 for(i=0; i < VERT_ATTRIB_MAX; i++)
232 fprintf(stderr, "%d ", vp->inputs[i]);
233 fprintf(stderr, ">\n");
234
235 }
236 #endif
237
238 static unsigned long t_src_index(struct r300_vertex_program *vp, struct prog_src_register *src)
239 {
240 int i;
241 int max_reg=-1;
242
243 if(src->File == PROGRAM_INPUT){
244 if(vp->inputs[src->Index] != -1)
245 return vp->inputs[src->Index];
246
247 for(i=0; i < VERT_ATTRIB_MAX; i++)
248 if(vp->inputs[i] > max_reg)
249 max_reg=vp->inputs[i];
250
251 vp->inputs[src->Index]=max_reg+1;
252
253 //vp_dump_inputs(vp, __FUNCTION__);
254
255 return vp->inputs[src->Index];
256 }else{
257 if (src->Index < 0) {
258 fprintf(stderr, "WARNING negative offsets for indirect addressing do not work\n");
259 return 0;
260 }
261 return src->Index;
262 }
263 }
264
265 static unsigned long t_src(struct r300_vertex_program *vp, struct prog_src_register *src)
266 {
267 /* src->NegateBase uses the NEGATE_ flags from program_instruction.h,
268 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
269 */
270 return MAKE_VSF_SOURCE(t_src_index(vp, src),
271 t_swizzle(GET_SWZ(src->Swizzle, 0)),
272 t_swizzle(GET_SWZ(src->Swizzle, 1)),
273 t_swizzle(GET_SWZ(src->Swizzle, 2)),
274 t_swizzle(GET_SWZ(src->Swizzle, 3)),
275 t_src_class(src->File),
276 src->NegateBase) | (src->RelAddr << 4);
277 }
278
279 static unsigned long t_src_scalar(struct r300_vertex_program *vp, struct prog_src_register *src)
280 {
281
282 return MAKE_VSF_SOURCE(t_src_index(vp, src),
283 t_swizzle(GET_SWZ(src->Swizzle, 0)),
284 t_swizzle(GET_SWZ(src->Swizzle, 0)),
285 t_swizzle(GET_SWZ(src->Swizzle, 0)),
286 t_swizzle(GET_SWZ(src->Swizzle, 0)),
287 t_src_class(src->File),
288 src->NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4);
289 }
290
291 static unsigned long t_opcode(enum prog_opcode opcode)
292 {
293
294 switch(opcode){
295 case OPCODE_ARL: return R300_VPI_OUT_OP_ARL;
296 case OPCODE_DST: return R300_VPI_OUT_OP_DST;
297 case OPCODE_EX2: return R300_VPI_OUT_OP_EX2;
298 case OPCODE_EXP: return R300_VPI_OUT_OP_EXP;
299 case OPCODE_FRC: return R300_VPI_OUT_OP_FRC;
300 case OPCODE_LG2: return R300_VPI_OUT_OP_LG2;
301 case OPCODE_LOG: return R300_VPI_OUT_OP_LOG;
302 case OPCODE_MAX: return R300_VPI_OUT_OP_MAX;
303 case OPCODE_MIN: return R300_VPI_OUT_OP_MIN;
304 case OPCODE_MUL: return R300_VPI_OUT_OP_MUL;
305 case OPCODE_RCP: return R300_VPI_OUT_OP_RCP;
306 case OPCODE_RSQ: return R300_VPI_OUT_OP_RSQ;
307 case OPCODE_SGE: return R300_VPI_OUT_OP_SGE;
308 case OPCODE_SLT: return R300_VPI_OUT_OP_SLT;
309 case OPCODE_DP4: return R300_VPI_OUT_OP_DOT;
310
311 default:
312 fprintf(stderr, "%s: Should not be called with opcode %d!", __FUNCTION__, opcode);
313 }
314 exit(-1);
315 return 0;
316 }
317
318 static unsigned long op_operands(enum prog_opcode opcode)
319 {
320 int i;
321
322 /* Can we trust mesas opcodes to be in order ? */
323 for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++)
324 if(op_names[i].opcode == opcode)
325 return op_names[i].ip;
326
327 fprintf(stderr, "op %d not found in op_names\n", opcode);
328 exit(-1);
329 return 0;
330 }
331
332 static GLboolean valid_dst(struct r300_vertex_program *vp, struct prog_dst_register *dst)
333 {
334 if(dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
335 return GL_FALSE;
336 } else if(dst->File == PROGRAM_ADDRESS) {
337 assert(dst->Index == 0);
338 }
339
340 return GL_TRUE;
341 }
342
343 /* TODO: Get rid of t_src_class call */
344 #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
345 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
346 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
347 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
348 t_src_class(b.File) == VSF_IN_CLASS_ATTR)))) \
349
350 #define ZERO_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \
351 SWIZZLE_ZERO, SWIZZLE_ZERO, \
352 SWIZZLE_ZERO, SWIZZLE_ZERO, \
353 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
354
355 #define ZERO_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \
356 SWIZZLE_ZERO, SWIZZLE_ZERO, \
357 SWIZZLE_ZERO, SWIZZLE_ZERO, \
358 t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
359
360 #define ZERO_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \
361 SWIZZLE_ZERO, SWIZZLE_ZERO, \
362 SWIZZLE_ZERO, SWIZZLE_ZERO, \
363 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
364
365 #define ONE_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \
366 SWIZZLE_ONE, SWIZZLE_ONE, \
367 SWIZZLE_ONE, SWIZZLE_ONE, \
368 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
369
370 #define ONE_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \
371 SWIZZLE_ONE, SWIZZLE_ONE, \
372 SWIZZLE_ONE, SWIZZLE_ONE, \
373 t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
374
375 #define ONE_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \
376 SWIZZLE_ONE, SWIZZLE_ONE, \
377 SWIZZLE_ONE, SWIZZLE_ONE, \
378 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
379
380 /* DP4 version seems to trigger some hw peculiarity */
381 //#define PREFER_DP4
382
383 #define FREE_TEMPS() \
384 do { \
385 if(u_temp_i < vp->num_temporaries) { \
386 WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_i); \
387 vp->native = GL_FALSE; \
388 } \
389 u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
390 } while (0)
391
392 static void r300_translate_vertex_shader(struct r300_vertex_program *vp, struct prog_instruction *vpi)
393 {
394 int i, cur_reg=0;
395 VERTEX_SHADER_INSTRUCTION *o_inst;
396 unsigned long operands;
397 int are_srcs_scalar;
398 unsigned long hw_op;
399 /* Initial value should be last tmp reg that hw supports.
400 Strangely enough r300 doesnt mind even though these would be out of range.
401 Smart enough to realize that it doesnt need it? */
402 int u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1;
403 struct prog_src_register src[3];
404
405 vp->pos_end=0; /* Not supported yet */
406 vp->program.length=0;
407 /*vp->num_temporaries=mesa_vp->Base.NumTemporaries;*/
408
409 for(i=0; i < VERT_ATTRIB_MAX; i++)
410 vp->inputs[i] = -1;
411
412 for(i=0; i < VERT_RESULT_MAX; i++)
413 vp->outputs[i] = -1;
414
415 assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS));
416
417 /* Assign outputs */
418 if(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS))
419 vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
420
421 if(vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ))
422 vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
423
424 if(vp->key.OutputsWritten & (1 << VERT_RESULT_COL0))
425 vp->outputs[VERT_RESULT_COL0] = cur_reg++;
426
427 if(vp->key.OutputsWritten & (1 << VERT_RESULT_COL1))
428 vp->outputs[VERT_RESULT_COL1] = cur_reg++;
429
430 #if 0 /* Not supported yet */
431 if(vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0))
432 vp->outputs[VERT_RESULT_BFC0] = cur_reg++;
433
434 if(vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1))
435 vp->outputs[VERT_RESULT_BFC1] = cur_reg++;
436
437 if(vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC))
438 vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
439 #endif
440
441 for(i=VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++)
442 if(vp->key.OutputsWritten & (1 << i))
443 vp->outputs[i] = cur_reg++;
444
445 vp->translated = GL_TRUE;
446 vp->native = GL_TRUE;
447
448 o_inst=vp->program.body.i;
449 for(; vpi->Opcode != OPCODE_END; vpi++, o_inst++){
450 FREE_TEMPS();
451
452 if(!valid_dst(vp, &vpi->DstReg))
453 {
454 /* redirect result to unused temp */
455 vpi->DstReg.File = PROGRAM_TEMPORARY;
456 vpi->DstReg.Index = u_temp_i;
457 }
458
459 operands=op_operands(vpi->Opcode);
460 are_srcs_scalar=operands & SCALAR_FLAG;
461 operands &= OP_MASK;
462
463 for(i=0; i < operands; i++)
464 src[i]=vpi->SrcReg[i];
465
466 if(operands == 3){ /* TODO: scalars */
467 if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
468 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i,
469 VSF_FLAG_ALL, VSF_OUT_CLASS_TMP);
470
471 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
472 SWIZZLE_X, SWIZZLE_Y,
473 SWIZZLE_Z, SWIZZLE_W,
474 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4);
475
476 o_inst->src2=ZERO_SRC_2;
477 o_inst->src3=ZERO_SRC_2;
478 o_inst++;
479
480 src[2].File=PROGRAM_TEMPORARY;
481 src[2].Index=u_temp_i;
482 src[2].RelAddr=0;
483 u_temp_i--;
484 }
485
486 }
487
488 if(operands >= 2){
489 if( CMP_SRCS(src[1], src[0]) ){
490 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i,
491 VSF_FLAG_ALL, VSF_OUT_CLASS_TMP);
492
493 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
494 SWIZZLE_X, SWIZZLE_Y,
495 SWIZZLE_Z, SWIZZLE_W,
496 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4);
497
498 o_inst->src2=ZERO_SRC_0;
499 o_inst->src3=ZERO_SRC_0;
500 o_inst++;
501
502 src[0].File=PROGRAM_TEMPORARY;
503 src[0].Index=u_temp_i;
504 src[0].RelAddr=0;
505 u_temp_i--;
506 }
507 }
508
509 /* These ops need special handling. */
510 switch(vpi->Opcode){
511 case OPCODE_POW:
512 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_POW, t_dst_index(vp, &vpi->DstReg),
513 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
514 o_inst->src1=t_src_scalar(vp, &src[0]);
515 o_inst->src2=ZERO_SRC_0;
516 o_inst->src3=t_src_scalar(vp, &src[1]);
517 goto next;
518
519 case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
520 case OPCODE_SWZ:
521 #if 1
522 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
523 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
524 o_inst->src1=t_src(vp, &src[0]);
525 o_inst->src2=ZERO_SRC_0;
526 o_inst->src3=ZERO_SRC_0;
527 #else
528 hw_op=(src[0].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD;
529
530 o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
531 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
532 o_inst->src1=t_src(vp, &src[0]);
533 o_inst->src2=ONE_SRC_0;
534 o_inst->src3=ZERO_SRC_0;
535 #endif
536
537 goto next;
538
539 case OPCODE_ADD:
540 #if 1
541 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
542 src[1].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD;
543
544 o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
545 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
546 o_inst->src1=ONE_SRC_0;
547 o_inst->src2=t_src(vp, &src[0]);
548 o_inst->src3=t_src(vp, &src[1]);
549 #else
550 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
551 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
552 o_inst->src1=t_src(vp, &src[0]);
553 o_inst->src2=t_src(vp, &src[1]);
554 o_inst->src3=ZERO_SRC_1;
555
556 #endif
557 goto next;
558
559 case OPCODE_MAD:
560 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
561 src[1].File == PROGRAM_TEMPORARY &&
562 src[2].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD;
563
564 o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
565 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
566 o_inst->src1=t_src(vp, &src[0]);
567 o_inst->src2=t_src(vp, &src[1]);
568 o_inst->src3=t_src(vp, &src[2]);
569 goto next;
570
571 case OPCODE_MUL: /* HW mul can take third arg but appears to have some other limitations. */
572 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
573 src[1].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD;
574
575 o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
576 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
577 o_inst->src1=t_src(vp, &src[0]);
578 o_inst->src2=t_src(vp, &src[1]);
579
580 o_inst->src3=ZERO_SRC_1;
581 goto next;
582
583 case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
584 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, t_dst_index(vp, &vpi->DstReg),
585 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
586
587 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
588 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
589 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
590 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
591 SWIZZLE_ZERO,
592 t_src_class(src[0].File),
593 src[0].NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
594
595 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
596 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
597 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
598 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
599 SWIZZLE_ZERO,
600 t_src_class(src[1].File),
601 src[1].NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
602
603 o_inst->src3=ZERO_SRC_1;
604 goto next;
605
606 case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
607 #if 1
608 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
609 src[1].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD;
610
611 o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
612 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
613 o_inst->src1=t_src(vp, &src[0]);
614 o_inst->src2=ONE_SRC_0;
615 o_inst->src3=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
616 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
617 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
618 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
619 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
620 t_src_class(src[1].File),
621 (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
622 #else
623 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
624 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
625
626 o_inst->src1=t_src(vp, &src[0]);
627 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
628 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
629 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
630 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
631 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
632 t_src_class(src[1].File),
633 (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
634 o_inst->src3=0;
635 #endif
636 goto next;
637
638 case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
639 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_MAX, t_dst_index(vp, &vpi->DstReg),
640 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
641
642 o_inst->src1=t_src(vp, &src[0]);
643 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
644 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
645 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
646 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
647 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
648 t_src_class(src[0].File),
649 (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
650 o_inst->src3=0;
651 goto next;
652
653 case OPCODE_FLR:
654 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
655 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
656
657 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_FRC, u_temp_i,
658 t_dst_mask(vpi->DstReg.WriteMask), VSF_OUT_CLASS_TMP);
659
660 o_inst->src1=t_src(vp, &src[0]);
661 o_inst->src2=ZERO_SRC_0;
662 o_inst->src3=ZERO_SRC_0;
663 o_inst++;
664
665 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
666 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
667
668 o_inst->src1=t_src(vp, &src[0]);
669 o_inst->src2=MAKE_VSF_SOURCE(u_temp_i,
670 VSF_IN_COMPONENT_X,
671 VSF_IN_COMPONENT_Y,
672 VSF_IN_COMPONENT_Z,
673 VSF_IN_COMPONENT_W,
674 VSF_IN_CLASS_TMP,
675 /* Not 100% sure about this */
676 (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
677
678 o_inst->src3=ZERO_SRC_0;
679 u_temp_i--;
680 goto next;
681
682 case OPCODE_LG2:// LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
683 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_LG2, t_dst_index(vp, &vpi->DstReg),
684 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
685
686 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
687 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
688 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
689 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
690 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
691 t_src_class(src[0].File),
692 src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
693 o_inst->src2=ZERO_SRC_0;
694 o_inst->src3=ZERO_SRC_0;
695 goto next;
696
697 case OPCODE_LIT://LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
698 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_LIT, t_dst_index(vp, &vpi->DstReg),
699 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
700 /* NOTE: Users swizzling might not work. */
701 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
702 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
703 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
704 VSF_IN_COMPONENT_ZERO, // z
705 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
706 t_src_class(src[0].File),
707 src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
708 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
709 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
710 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
711 VSF_IN_COMPONENT_ZERO, // z
712 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
713 t_src_class(src[0].File),
714 src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
715 o_inst->src3=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
716 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
717 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
718 VSF_IN_COMPONENT_ZERO, // z
719 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
720 t_src_class(src[0].File),
721 src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
722 goto next;
723
724 case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
725 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, t_dst_index(vp, &vpi->DstReg),
726 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
727
728 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
729 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
730 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
731 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
732 VSF_IN_COMPONENT_ONE,
733 t_src_class(src[0].File),
734 src[0].NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
735 o_inst->src2=t_src(vp, &src[1]);
736 o_inst->src3=ZERO_SRC_1;
737 goto next;
738
739 case OPCODE_XPD:
740 /* mul r0, r1.yzxw, r2.zxyw
741 mad r0, -r2.yzxw, r1.zxyw, r0
742 NOTE: might need MAD_2
743 */
744
745 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, u_temp_i,
746 t_dst_mask(vpi->DstReg.WriteMask), VSF_OUT_CLASS_TMP);
747
748 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
749 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
750 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
751 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
752 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
753 t_src_class(src[0].File),
754 src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
755
756 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
757 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
758 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
759 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
760 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
761 t_src_class(src[1].File),
762 src[1].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
763
764 o_inst->src3=ZERO_SRC_1;
765 o_inst++;
766 u_temp_i--;
767
768 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, t_dst_index(vp, &vpi->DstReg),
769 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
770
771 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
772 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
773 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
774 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
775 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
776 t_src_class(src[1].File),
777 (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
778
779 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
780 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
781 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
782 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
783 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
784 t_src_class(src[0].File),
785 src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
786
787 o_inst->src3=MAKE_VSF_SOURCE(u_temp_i+1,
788 VSF_IN_COMPONENT_X,
789 VSF_IN_COMPONENT_Y,
790 VSF_IN_COMPONENT_Z,
791 VSF_IN_COMPONENT_W,
792 VSF_IN_CLASS_TMP,
793 VSF_FLAG_NONE);
794
795 goto next;
796
797 case OPCODE_RCC:
798 fprintf(stderr, "Dont know how to handle op %d yet\n", vpi->Opcode);
799 exit(-1);
800 break;
801 case OPCODE_END:
802 break;
803 default:
804 break;
805 }
806
807 o_inst->op=MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst_index(vp, &vpi->DstReg),
808 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
809
810 if(are_srcs_scalar){
811 switch(operands){
812 case 1:
813 o_inst->src1=t_src_scalar(vp, &src[0]);
814 o_inst->src2=ZERO_SRC_0;
815 o_inst->src3=ZERO_SRC_0;
816 break;
817
818 case 2:
819 o_inst->src1=t_src_scalar(vp, &src[0]);
820 o_inst->src2=t_src_scalar(vp, &src[1]);
821 o_inst->src3=ZERO_SRC_1;
822 break;
823
824 case 3:
825 o_inst->src1=t_src_scalar(vp, &src[0]);
826 o_inst->src2=t_src_scalar(vp, &src[1]);
827 o_inst->src3=t_src_scalar(vp, &src[2]);
828 break;
829
830 default:
831 fprintf(stderr, "scalars and op RCC not handled yet");
832 exit(-1);
833 break;
834 }
835 }else{
836 switch(operands){
837 case 1:
838 o_inst->src1=t_src(vp, &src[0]);
839 o_inst->src2=ZERO_SRC_0;
840 o_inst->src3=ZERO_SRC_0;
841 break;
842
843 case 2:
844 o_inst->src1=t_src(vp, &src[0]);
845 o_inst->src2=t_src(vp, &src[1]);
846 o_inst->src3=ZERO_SRC_1;
847 break;
848
849 case 3:
850 o_inst->src1=t_src(vp, &src[0]);
851 o_inst->src2=t_src(vp, &src[1]);
852 o_inst->src3=t_src(vp, &src[2]);
853 break;
854
855 default:
856 fprintf(stderr, "scalars and op RCC not handled yet");
857 exit(-1);
858 break;
859 }
860 }
861 next: ;
862 }
863
864 /* Will most likely segfault before we get here... fix later. */
865 if(o_inst - vp->program.body.i >= VSF_MAX_FRAGMENT_LENGTH/4) {
866 vp->program.length = 0;
867 vp->native = GL_FALSE;
868 return ;
869 }
870 vp->program.length=(o_inst - vp->program.body.i) * 4;
871 #if 0
872 fprintf(stderr, "hw program:\n");
873 for(i=0; i < vp->program.length; i++)
874 fprintf(stderr, "%08x\n", vp->program.body.d[i]);
875 #endif
876 }
877
878 static void position_invariant(struct gl_program *prog)
879 {
880 struct prog_instruction *vpi;
881 struct gl_program_parameter_list *paramList;
882 int i;
883
884 gl_state_index tokens[STATE_LENGTH] = { STATE_MVP_MATRIX, 0, 0, 0, 0 };
885
886 #ifdef PREFER_DP4
887 tokens[5] = STATE_MATRIX;
888 #else
889 tokens[5] = STATE_MATRIX_TRANSPOSE;
890 #endif
891 paramList = prog->Parameters;
892
893 vpi = _mesa_alloc_instructions (prog->NumInstructions + 4);
894 _mesa_init_instructions (vpi, prog->NumInstructions + 4);
895
896 for (i=0; i < 4; i++) {
897 GLint idx;
898 tokens[3] = tokens[4] = i;
899 idx = _mesa_add_state_reference(paramList, tokens);
900 #ifdef PREFER_DP4
901 vpi[i].Opcode = OPCODE_DP4;
902 vpi[i].StringPos = 0;
903 vpi[i].Data = 0;
904
905 vpi[i].DstReg.File = PROGRAM_OUTPUT;
906 vpi[i].DstReg.Index = VERT_RESULT_HPOS;
907 vpi[i].DstReg.WriteMask = 1 << i;
908 vpi[i].DstReg.CondMask = COND_TR;
909
910 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
911 vpi[i].SrcReg[0].Index = idx;
912 vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
913
914 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
915 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
916 vpi[i].SrcReg[1].Swizzle = SWIZZLE_XYZW;
917 #else
918 if (i == 0)
919 vpi[i].Opcode = OPCODE_MUL;
920 else
921 vpi[i].Opcode = OPCODE_MAD;
922
923 vpi[i].StringPos = 0;
924 vpi[i].Data = 0;
925
926 if (i == 3)
927 vpi[i].DstReg.File = PROGRAM_OUTPUT;
928 else
929 vpi[i].DstReg.File = PROGRAM_TEMPORARY;
930 vpi[i].DstReg.Index = 0;
931 vpi[i].DstReg.WriteMask = 0xf;
932 vpi[i].DstReg.CondMask = COND_TR;
933
934 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
935 vpi[i].SrcReg[0].Index = idx;
936 vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
937
938 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
939 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
940 vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i);
941
942 if (i > 0) {
943 vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY;
944 vpi[i].SrcReg[2].Index = 0;
945 vpi[i].SrcReg[2].Swizzle = SWIZZLE_XYZW;
946 }
947 #endif
948 }
949
950 _mesa_copy_instructions (&vpi[i], prog->Instructions, prog->NumInstructions);
951
952 free(prog->Instructions);
953
954 prog->Instructions = vpi;
955
956 prog->NumInstructions += 4;
957 vpi = &prog->Instructions[prog->NumInstructions-1];
958
959 assert(vpi->Opcode == OPCODE_END);
960 }
961
962 static void insert_wpos(struct r300_vertex_program *vp,
963 struct gl_program *prog,
964 GLuint temp_index)
965 {
966 struct prog_instruction *vpi;
967 struct prog_instruction *vpi_insert;
968 int i = 0;
969
970 vpi = _mesa_alloc_instructions (prog->NumInstructions + 2);
971 _mesa_init_instructions (vpi, prog->NumInstructions + 2);
972 /* all but END */
973 _mesa_copy_instructions (vpi, prog->Instructions, prog->NumInstructions - 1);
974 /* END */
975 _mesa_copy_instructions (&vpi[prog->NumInstructions + 1],
976 &prog->Instructions[prog->NumInstructions - 1],
977 1);
978 vpi_insert = &vpi[prog->NumInstructions - 1];
979
980 vpi_insert[i].Opcode = OPCODE_MOV;
981
982 vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
983 vpi_insert[i].DstReg.Index = VERT_RESULT_HPOS;
984 vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
985 vpi_insert[i].DstReg.CondMask = COND_TR;
986
987 vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
988 vpi_insert[i].SrcReg[0].Index = temp_index;
989 vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
990 i++;
991
992 vpi_insert[i].Opcode = OPCODE_MOV;
993
994 vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
995 vpi_insert[i].DstReg.Index = VERT_RESULT_TEX0+vp->wpos_idx;
996 vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
997 vpi_insert[i].DstReg.CondMask = COND_TR;
998
999 vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
1000 vpi_insert[i].SrcReg[0].Index = temp_index;
1001 vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1002 i++;
1003
1004 free(prog->Instructions);
1005
1006 prog->Instructions = vpi;
1007
1008 prog->NumInstructions += i;
1009 vpi = &prog->Instructions[prog->NumInstructions-1];
1010
1011 assert(vpi->Opcode == OPCODE_END);
1012 }
1013
1014 static void pos_as_texcoord(struct r300_vertex_program *vp,
1015 struct gl_program *prog)
1016 {
1017 struct prog_instruction *vpi;
1018 GLuint tempregi = prog->NumTemporaries;
1019 /* should do something else if no temps left... */
1020 prog->NumTemporaries++;
1021
1022 for(vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++){
1023 if( vpi->DstReg.File == PROGRAM_OUTPUT &&
1024 vpi->DstReg.Index == VERT_RESULT_HPOS ){
1025 vpi->DstReg.File = PROGRAM_TEMPORARY;
1026 vpi->DstReg.Index = tempregi;
1027 }
1028 }
1029 insert_wpos(vp, prog, tempregi);
1030 }
1031
1032 static struct r300_vertex_program *build_program(struct r300_vertex_program_key *wanted_key,
1033 struct gl_vertex_program *mesa_vp,
1034 GLint wpos_idx)
1035 {
1036 struct r300_vertex_program *vp;
1037
1038 vp = _mesa_calloc(sizeof(*vp));
1039 _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
1040
1041 vp->wpos_idx = wpos_idx;
1042
1043 if(mesa_vp->IsPositionInvariant) {
1044 position_invariant(&mesa_vp->Base);
1045 }
1046
1047 if(wpos_idx > -1)
1048 pos_as_texcoord(vp, &mesa_vp->Base);
1049
1050 assert(mesa_vp->Base.NumInstructions);
1051
1052 vp->num_temporaries=mesa_vp->Base.NumTemporaries;
1053
1054 r300_translate_vertex_shader(vp, mesa_vp->Base.Instructions);
1055
1056 return vp;
1057 }
1058
1059 void r300_select_vertex_shader(r300ContextPtr r300)
1060 {
1061 GLcontext *ctx = ctx = r300->radeon.glCtx;
1062 GLuint InputsRead;
1063 struct r300_vertex_program_key wanted_key = { 0 };
1064 GLint i;
1065 struct r300_vertex_program_cont *vpc;
1066 struct r300_vertex_program *vp;
1067 GLint wpos_idx;
1068
1069 vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
1070 InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
1071
1072 wanted_key.OutputsWritten |= 1 << VERT_RESULT_HPOS;
1073
1074 wpos_idx = -1;
1075 if (InputsRead & FRAG_BIT_WPOS){
1076 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
1077 if (!(InputsRead & (FRAG_BIT_TEX0 << i)))
1078 break;
1079
1080 if(i == ctx->Const.MaxTextureUnits){
1081 fprintf(stderr, "\tno free texcoord found\n");
1082 exit(0);
1083 }
1084
1085 InputsRead |= (FRAG_BIT_TEX0 << i);
1086 wpos_idx = i;
1087 }
1088
1089 if (InputsRead & FRAG_BIT_COL0)
1090 wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL0;
1091
1092 if ((InputsRead & FRAG_BIT_COL1) /*||
1093 (InputsRead & FRAG_BIT_FOGC)*/)
1094 wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL1;
1095
1096 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
1097 if (InputsRead & (FRAG_BIT_TEX0 << i))
1098 wanted_key.OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
1099
1100 wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead;
1101 if(vpc->mesa_program.IsPositionInvariant) {
1102 /* we wan't position don't we ? */
1103 wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS);
1104 }
1105
1106 for (vp = vpc->progs; vp; vp = vp->next)
1107 if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) == 0) {
1108 r300->selected_vp = vp;
1109 return ;
1110 }
1111
1112 //_mesa_print_program(&vpc->mesa_program.Base);
1113
1114 vp = build_program(&wanted_key, &vpc->mesa_program, wpos_idx);
1115 vp->next = vpc->progs;
1116 vpc->progs = vp;
1117 r300->selected_vp = vp;
1118 }