merge of glsl-compiler-1 branch
[mesa.git] / src / mesa / drivers / dri / r300 / r300_vertprog.c
1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Aapo Tahkola <aet@rasterburn.org>
31 */
32 #include "glheader.h"
33 #include "macros.h"
34 #include "enums.h"
35 #include "program.h"
36 #include "shader/prog_instruction.h"
37 #include "shader/prog_parameter.h"
38 #include "shader/prog_statevars.h"
39 #include "tnl/tnl.h"
40
41 #include "r300_context.h"
42 #include "r300_program.h"
43
44 #if SWIZZLE_X != VSF_IN_COMPONENT_X || \
45 SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
46 SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
47 SWIZZLE_W != VSF_IN_COMPONENT_W || \
48 SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
49 SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
50 WRITEMASK_X != VSF_FLAG_X || \
51 WRITEMASK_Y != VSF_FLAG_Y || \
52 WRITEMASK_Z != VSF_FLAG_Z || \
53 WRITEMASK_W != VSF_FLAG_W
54 #error Cannot change these!
55 #endif
56
57 #define SCALAR_FLAG (1<<31)
58 #define FLAG_MASK (1<<31)
59 #define OP_MASK (0xf) /* we are unlikely to have more than 15 */
60 #define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
61
62 static struct{
63 char *name;
64 int opcode;
65 unsigned long ip; /* number of input operands and flags */
66 }op_names[]={
67 OPN(ABS, 1),
68 OPN(ADD, 2),
69 OPN(ARL, 1|SCALAR_FLAG),
70 OPN(DP3, 2),
71 OPN(DP4, 2),
72 OPN(DPH, 2),
73 OPN(DST, 2),
74 OPN(EX2, 1|SCALAR_FLAG),
75 OPN(EXP, 1|SCALAR_FLAG),
76 OPN(FLR, 1),
77 OPN(FRC, 1),
78 OPN(LG2, 1|SCALAR_FLAG),
79 OPN(LIT, 1),
80 OPN(LOG, 1|SCALAR_FLAG),
81 OPN(MAD, 3),
82 OPN(MAX, 2),
83 OPN(MIN, 2),
84 OPN(MOV, 1),
85 OPN(MUL, 2),
86 OPN(POW, 2|SCALAR_FLAG),
87 OPN(RCP, 1|SCALAR_FLAG),
88 OPN(RSQ, 1|SCALAR_FLAG),
89 OPN(SGE, 2),
90 OPN(SLT, 2),
91 OPN(SUB, 2),
92 OPN(SWZ, 1),
93 OPN(XPD, 2),
94 OPN(RCC, 0), //extra
95 OPN(PRINT, 0),
96 OPN(END, 0),
97 };
98 #undef OPN
99
100 int r300VertexProgUpdateParams(GLcontext *ctx, struct r300_vertex_program_cont *vp, float *dst)
101 {
102 int pi;
103 struct gl_vertex_program *mesa_vp = &vp->mesa_program;
104 float *dst_o=dst;
105 struct gl_program_parameter_list *paramList;
106
107 if (mesa_vp->IsNVProgram) {
108 _mesa_load_tracked_matrices(ctx);
109
110 for (pi=0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) {
111 *dst++=ctx->VertexProgram.Parameters[pi][0];
112 *dst++=ctx->VertexProgram.Parameters[pi][1];
113 *dst++=ctx->VertexProgram.Parameters[pi][2];
114 *dst++=ctx->VertexProgram.Parameters[pi][3];
115 }
116 return dst - dst_o;
117 }
118
119 assert(mesa_vp->Base.Parameters);
120 _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
121
122 if(mesa_vp->Base.Parameters->NumParameters * 4 > VSF_MAX_FRAGMENT_LENGTH){
123 fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
124 exit(-1);
125 }
126
127 paramList = mesa_vp->Base.Parameters;
128 for(pi=0; pi < paramList->NumParameters; pi++){
129 switch(paramList->Parameters[pi].Type){
130
131 case PROGRAM_STATE_VAR:
132 case PROGRAM_NAMED_PARAM:
133 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
134 case PROGRAM_CONSTANT:
135 *dst++=paramList->ParameterValues[pi][0];
136 *dst++=paramList->ParameterValues[pi][1];
137 *dst++=paramList->ParameterValues[pi][2];
138 *dst++=paramList->ParameterValues[pi][3];
139 break;
140
141 default: _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__);
142 }
143
144 }
145
146 return dst - dst_o;
147 }
148
149 static unsigned long t_dst_mask(GLuint mask)
150 {
151 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
152 return mask & VSF_FLAG_ALL;
153 }
154
155 static unsigned long t_dst_class(enum register_file file)
156 {
157
158 switch(file){
159 case PROGRAM_TEMPORARY:
160 return VSF_OUT_CLASS_TMP;
161 case PROGRAM_OUTPUT:
162 return VSF_OUT_CLASS_RESULT;
163 case PROGRAM_ADDRESS:
164 return VSF_OUT_CLASS_ADDR;
165 /*
166 case PROGRAM_INPUT:
167 case PROGRAM_LOCAL_PARAM:
168 case PROGRAM_ENV_PARAM:
169 case PROGRAM_NAMED_PARAM:
170 case PROGRAM_STATE_VAR:
171 case PROGRAM_WRITE_ONLY:
172 case PROGRAM_ADDRESS:
173 */
174 default:
175 fprintf(stderr, "problem in %s", __FUNCTION__);
176 exit(0);
177 }
178 }
179
180 static unsigned long t_dst_index(struct r300_vertex_program *vp, struct prog_dst_register *dst)
181 {
182 if(dst->File == PROGRAM_OUTPUT)
183 return vp->outputs[dst->Index];
184
185 return dst->Index;
186 }
187
188 static unsigned long t_src_class(enum register_file file)
189 {
190
191 switch(file){
192 case PROGRAM_TEMPORARY:
193 return VSF_IN_CLASS_TMP;
194
195 case PROGRAM_INPUT:
196 return VSF_IN_CLASS_ATTR;
197
198 case PROGRAM_LOCAL_PARAM:
199 case PROGRAM_ENV_PARAM:
200 case PROGRAM_NAMED_PARAM:
201 case PROGRAM_STATE_VAR:
202 return VSF_IN_CLASS_PARAM;
203 /*
204 case PROGRAM_OUTPUT:
205 case PROGRAM_WRITE_ONLY:
206 case PROGRAM_ADDRESS:
207 */
208 default:
209 fprintf(stderr, "problem in %s", __FUNCTION__);
210 exit(0);
211 }
212 }
213
214 static __inline unsigned long t_swizzle(GLubyte swizzle)
215 {
216 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
217 return swizzle;
218 }
219
220 #if 0
221 static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller)
222 {
223 int i;
224
225 if(vp == NULL){
226 fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, caller);
227 return ;
228 }
229
230 fprintf(stderr, "%s:<", caller);
231 for(i=0; i < VERT_ATTRIB_MAX; i++)
232 fprintf(stderr, "%d ", vp->inputs[i]);
233 fprintf(stderr, ">\n");
234
235 }
236 #endif
237
238 static unsigned long t_src_index(struct r300_vertex_program *vp, struct prog_src_register *src)
239 {
240 int i;
241 int max_reg=-1;
242
243 if(src->File == PROGRAM_INPUT){
244 if(vp->inputs[src->Index] != -1)
245 return vp->inputs[src->Index];
246
247 for(i=0; i < VERT_ATTRIB_MAX; i++)
248 if(vp->inputs[i] > max_reg)
249 max_reg=vp->inputs[i];
250
251 vp->inputs[src->Index]=max_reg+1;
252
253 //vp_dump_inputs(vp, __FUNCTION__);
254
255 return vp->inputs[src->Index];
256 }else{
257 if (src->Index < 0) {
258 fprintf(stderr, "WARNING negative offsets for indirect addressing do not work\n");
259 return 0;
260 }
261 return src->Index;
262 }
263 }
264
265 static unsigned long t_src(struct r300_vertex_program *vp, struct prog_src_register *src)
266 {
267 /* src->NegateBase uses the NEGATE_ flags from program_instruction.h,
268 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
269 */
270 return MAKE_VSF_SOURCE(t_src_index(vp, src),
271 t_swizzle(GET_SWZ(src->Swizzle, 0)),
272 t_swizzle(GET_SWZ(src->Swizzle, 1)),
273 t_swizzle(GET_SWZ(src->Swizzle, 2)),
274 t_swizzle(GET_SWZ(src->Swizzle, 3)),
275 t_src_class(src->File),
276 src->NegateBase) | (src->RelAddr << 4);
277 }
278
279 static unsigned long t_src_scalar(struct r300_vertex_program *vp, struct prog_src_register *src)
280 {
281
282 return MAKE_VSF_SOURCE(t_src_index(vp, src),
283 t_swizzle(GET_SWZ(src->Swizzle, 0)),
284 t_swizzle(GET_SWZ(src->Swizzle, 0)),
285 t_swizzle(GET_SWZ(src->Swizzle, 0)),
286 t_swizzle(GET_SWZ(src->Swizzle, 0)),
287 t_src_class(src->File),
288 src->NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4);
289 }
290
291 static unsigned long t_opcode(enum prog_opcode opcode)
292 {
293
294 switch(opcode){
295 case OPCODE_ARL: return R300_VPI_OUT_OP_ARL;
296 case OPCODE_DST: return R300_VPI_OUT_OP_DST;
297 case OPCODE_EX2: return R300_VPI_OUT_OP_EX2;
298 case OPCODE_EXP: return R300_VPI_OUT_OP_EXP;
299 case OPCODE_FRC: return R300_VPI_OUT_OP_FRC;
300 case OPCODE_LG2: return R300_VPI_OUT_OP_LG2;
301 case OPCODE_LOG: return R300_VPI_OUT_OP_LOG;
302 case OPCODE_MAX: return R300_VPI_OUT_OP_MAX;
303 case OPCODE_MIN: return R300_VPI_OUT_OP_MIN;
304 case OPCODE_MUL: return R300_VPI_OUT_OP_MUL;
305 case OPCODE_RCP: return R300_VPI_OUT_OP_RCP;
306 case OPCODE_RSQ: return R300_VPI_OUT_OP_RSQ;
307 case OPCODE_SGE: return R300_VPI_OUT_OP_SGE;
308 case OPCODE_SLT: return R300_VPI_OUT_OP_SLT;
309 case OPCODE_DP4: return R300_VPI_OUT_OP_DOT;
310
311 default:
312 fprintf(stderr, "%s: Should not be called with opcode %d!", __FUNCTION__, opcode);
313 }
314 exit(-1);
315 return 0;
316 }
317
318 static unsigned long op_operands(enum prog_opcode opcode)
319 {
320 int i;
321
322 /* Can we trust mesas opcodes to be in order ? */
323 for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++)
324 if(op_names[i].opcode == opcode)
325 return op_names[i].ip;
326
327 fprintf(stderr, "op %d not found in op_names\n", opcode);
328 exit(-1);
329 return 0;
330 }
331
332 static GLboolean valid_dst(struct r300_vertex_program *vp, struct prog_dst_register *dst)
333 {
334 if(dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1){
335 WARN_ONCE("Output %d not used by fragment program\n", dst->Index);
336 return GL_FALSE;
337 }else if(dst->File == PROGRAM_ADDRESS) {
338 assert(dst->Index == 0);
339 }
340
341 return GL_TRUE;
342 }
343
344 /* TODO: Get rid of t_src_class call */
345 #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
346 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
347 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
348 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
349 t_src_class(b.File) == VSF_IN_CLASS_ATTR)))) \
350
351 #define ZERO_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \
352 SWIZZLE_ZERO, SWIZZLE_ZERO, \
353 SWIZZLE_ZERO, SWIZZLE_ZERO, \
354 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
355
356 #define ZERO_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \
357 SWIZZLE_ZERO, SWIZZLE_ZERO, \
358 SWIZZLE_ZERO, SWIZZLE_ZERO, \
359 t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
360
361 #define ZERO_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \
362 SWIZZLE_ZERO, SWIZZLE_ZERO, \
363 SWIZZLE_ZERO, SWIZZLE_ZERO, \
364 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
365
366 #define ONE_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \
367 SWIZZLE_ONE, SWIZZLE_ONE, \
368 SWIZZLE_ONE, SWIZZLE_ONE, \
369 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
370
371 #define ONE_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \
372 SWIZZLE_ONE, SWIZZLE_ONE, \
373 SWIZZLE_ONE, SWIZZLE_ONE, \
374 t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
375
376 #define ONE_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \
377 SWIZZLE_ONE, SWIZZLE_ONE, \
378 SWIZZLE_ONE, SWIZZLE_ONE, \
379 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
380
381 /* DP4 version seems to trigger some hw peculiarity */
382 //#define PREFER_DP4
383
384 #define FREE_TEMPS() \
385 do { \
386 if(u_temp_i < vp->num_temporaries) { \
387 WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_i); \
388 vp->native = GL_FALSE; \
389 } \
390 u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
391 } while (0)
392
393 static void r300_translate_vertex_shader(struct r300_vertex_program *vp, struct prog_instruction *vpi)
394 {
395 int i, cur_reg=0;
396 VERTEX_SHADER_INSTRUCTION *o_inst;
397 unsigned long operands;
398 int are_srcs_scalar;
399 unsigned long hw_op;
400 /* Initial value should be last tmp reg that hw supports.
401 Strangely enough r300 doesnt mind even though these would be out of range.
402 Smart enough to realize that it doesnt need it? */
403 int u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1;
404 struct prog_src_register src[3];
405
406 vp->pos_end=0; /* Not supported yet */
407 vp->program.length=0;
408 /*vp->num_temporaries=mesa_vp->Base.NumTemporaries;*/
409
410 for(i=0; i < VERT_ATTRIB_MAX; i++)
411 vp->inputs[i] = -1;
412
413 for(i=0; i < VERT_RESULT_MAX; i++)
414 vp->outputs[i] = -1;
415
416 assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS));
417
418 /* Assign outputs */
419 if(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS))
420 vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
421
422 if(vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ))
423 vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
424
425 if(vp->key.OutputsWritten & (1 << VERT_RESULT_COL0))
426 vp->outputs[VERT_RESULT_COL0] = cur_reg++;
427
428 if(vp->key.OutputsWritten & (1 << VERT_RESULT_COL1))
429 vp->outputs[VERT_RESULT_COL1] = cur_reg++;
430
431 #if 0 /* Not supported yet */
432 if(vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0))
433 vp->outputs[VERT_RESULT_BFC0] = cur_reg++;
434
435 if(vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1))
436 vp->outputs[VERT_RESULT_BFC1] = cur_reg++;
437
438 if(vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC))
439 vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
440 #endif
441
442 for(i=VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++)
443 if(vp->key.OutputsWritten & (1 << i))
444 vp->outputs[i] = cur_reg++;
445
446 vp->translated = GL_TRUE;
447 vp->native = GL_TRUE;
448
449 o_inst=vp->program.body.i;
450 for(; vpi->Opcode != OPCODE_END; vpi++, o_inst++){
451 FREE_TEMPS();
452
453 if(!valid_dst(vp, &vpi->DstReg))
454 {
455 /* redirect result to unused temp */
456 vpi->DstReg.File = PROGRAM_TEMPORARY;
457 vpi->DstReg.Index = u_temp_i;
458 }
459
460 operands=op_operands(vpi->Opcode);
461 are_srcs_scalar=operands & SCALAR_FLAG;
462 operands &= OP_MASK;
463
464 for(i=0; i < operands; i++)
465 src[i]=vpi->SrcReg[i];
466
467 if(operands == 3){ /* TODO: scalars */
468 if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
469 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i,
470 VSF_FLAG_ALL, VSF_OUT_CLASS_TMP);
471
472 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
473 SWIZZLE_X, SWIZZLE_Y,
474 SWIZZLE_Z, SWIZZLE_W,
475 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4);
476
477 o_inst->src2=ZERO_SRC_2;
478 o_inst->src3=ZERO_SRC_2;
479 o_inst++;
480
481 src[2].File=PROGRAM_TEMPORARY;
482 src[2].Index=u_temp_i;
483 src[2].RelAddr=0;
484 u_temp_i--;
485 }
486
487 }
488
489 if(operands >= 2){
490 if( CMP_SRCS(src[1], src[0]) ){
491 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i,
492 VSF_FLAG_ALL, VSF_OUT_CLASS_TMP);
493
494 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
495 SWIZZLE_X, SWIZZLE_Y,
496 SWIZZLE_Z, SWIZZLE_W,
497 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4);
498
499 o_inst->src2=ZERO_SRC_0;
500 o_inst->src3=ZERO_SRC_0;
501 o_inst++;
502
503 src[0].File=PROGRAM_TEMPORARY;
504 src[0].Index=u_temp_i;
505 src[0].RelAddr=0;
506 u_temp_i--;
507 }
508 }
509
510 /* These ops need special handling. */
511 switch(vpi->Opcode){
512 case OPCODE_POW:
513 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_POW, t_dst_index(vp, &vpi->DstReg),
514 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
515 o_inst->src1=t_src_scalar(vp, &src[0]);
516 o_inst->src2=ZERO_SRC_0;
517 o_inst->src3=t_src_scalar(vp, &src[1]);
518 goto next;
519
520 case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
521 case OPCODE_SWZ:
522 #if 1
523 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
524 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
525 o_inst->src1=t_src(vp, &src[0]);
526 o_inst->src2=ZERO_SRC_0;
527 o_inst->src3=ZERO_SRC_0;
528 #else
529 hw_op=(src[0].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD;
530
531 o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
532 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
533 o_inst->src1=t_src(vp, &src[0]);
534 o_inst->src2=ONE_SRC_0;
535 o_inst->src3=ZERO_SRC_0;
536 #endif
537
538 goto next;
539
540 case OPCODE_ADD:
541 #if 1
542 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
543 src[1].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD;
544
545 o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
546 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
547 o_inst->src1=ONE_SRC_0;
548 o_inst->src2=t_src(vp, &src[0]);
549 o_inst->src3=t_src(vp, &src[1]);
550 #else
551 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
552 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
553 o_inst->src1=t_src(vp, &src[0]);
554 o_inst->src2=t_src(vp, &src[1]);
555 o_inst->src3=ZERO_SRC_1;
556
557 #endif
558 goto next;
559
560 case OPCODE_MAD:
561 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
562 src[1].File == PROGRAM_TEMPORARY &&
563 src[2].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD;
564
565 o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
566 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
567 o_inst->src1=t_src(vp, &src[0]);
568 o_inst->src2=t_src(vp, &src[1]);
569 o_inst->src3=t_src(vp, &src[2]);
570 goto next;
571
572 case OPCODE_MUL: /* HW mul can take third arg but appears to have some other limitations. */
573 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
574 src[1].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD;
575
576 o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
577 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
578 o_inst->src1=t_src(vp, &src[0]);
579 o_inst->src2=t_src(vp, &src[1]);
580
581 o_inst->src3=ZERO_SRC_1;
582 goto next;
583
584 case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
585 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, t_dst_index(vp, &vpi->DstReg),
586 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
587
588 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
589 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
590 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
591 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
592 SWIZZLE_ZERO,
593 t_src_class(src[0].File),
594 src[0].NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
595
596 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
597 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
598 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
599 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
600 SWIZZLE_ZERO,
601 t_src_class(src[1].File),
602 src[1].NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
603
604 o_inst->src3=ZERO_SRC_1;
605 goto next;
606
607 case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
608 #if 1
609 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
610 src[1].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD;
611
612 o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
613 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
614 o_inst->src1=t_src(vp, &src[0]);
615 o_inst->src2=ONE_SRC_0;
616 o_inst->src3=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
617 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
618 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
619 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
620 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
621 t_src_class(src[1].File),
622 (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
623 #else
624 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
625 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
626
627 o_inst->src1=t_src(vp, &src[0]);
628 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
629 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
630 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
631 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
632 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
633 t_src_class(src[1].File),
634 (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
635 o_inst->src3=0;
636 #endif
637 goto next;
638
639 case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
640 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_MAX, t_dst_index(vp, &vpi->DstReg),
641 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
642
643 o_inst->src1=t_src(vp, &src[0]);
644 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
645 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
646 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
647 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
648 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
649 t_src_class(src[0].File),
650 (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
651 o_inst->src3=0;
652 goto next;
653
654 case OPCODE_FLR:
655 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
656 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
657
658 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_FRC, u_temp_i,
659 t_dst_mask(vpi->DstReg.WriteMask), VSF_OUT_CLASS_TMP);
660
661 o_inst->src1=t_src(vp, &src[0]);
662 o_inst->src2=ZERO_SRC_0;
663 o_inst->src3=ZERO_SRC_0;
664 o_inst++;
665
666 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
667 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
668
669 o_inst->src1=t_src(vp, &src[0]);
670 o_inst->src2=MAKE_VSF_SOURCE(u_temp_i,
671 VSF_IN_COMPONENT_X,
672 VSF_IN_COMPONENT_Y,
673 VSF_IN_COMPONENT_Z,
674 VSF_IN_COMPONENT_W,
675 VSF_IN_CLASS_TMP,
676 /* Not 100% sure about this */
677 (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
678
679 o_inst->src3=ZERO_SRC_0;
680 u_temp_i--;
681 goto next;
682
683 case OPCODE_LG2:// LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
684 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_LG2, t_dst_index(vp, &vpi->DstReg),
685 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
686
687 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
688 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
689 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
690 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
691 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
692 t_src_class(src[0].File),
693 src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
694 o_inst->src2=ZERO_SRC_0;
695 o_inst->src3=ZERO_SRC_0;
696 goto next;
697
698 case OPCODE_LIT://LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
699 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_LIT, t_dst_index(vp, &vpi->DstReg),
700 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
701 /* NOTE: Users swizzling might not work. */
702 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
703 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
704 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
705 VSF_IN_COMPONENT_ZERO, // z
706 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
707 t_src_class(src[0].File),
708 src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
709 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
710 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
711 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
712 VSF_IN_COMPONENT_ZERO, // z
713 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
714 t_src_class(src[0].File),
715 src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
716 o_inst->src3=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
717 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
718 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
719 VSF_IN_COMPONENT_ZERO, // z
720 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
721 t_src_class(src[0].File),
722 src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
723 goto next;
724
725 case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
726 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, t_dst_index(vp, &vpi->DstReg),
727 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
728
729 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
730 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
731 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
732 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
733 VSF_IN_COMPONENT_ONE,
734 t_src_class(src[0].File),
735 src[0].NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
736 o_inst->src2=t_src(vp, &src[1]);
737 o_inst->src3=ZERO_SRC_1;
738 goto next;
739
740 case OPCODE_XPD:
741 /* mul r0, r1.yzxw, r2.zxyw
742 mad r0, -r2.yzxw, r1.zxyw, r0
743 NOTE: might need MAD_2
744 */
745
746 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, u_temp_i,
747 t_dst_mask(vpi->DstReg.WriteMask), VSF_OUT_CLASS_TMP);
748
749 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
750 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
751 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
752 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
753 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
754 t_src_class(src[0].File),
755 src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
756
757 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
758 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
759 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
760 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
761 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
762 t_src_class(src[1].File),
763 src[1].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
764
765 o_inst->src3=ZERO_SRC_1;
766 o_inst++;
767 u_temp_i--;
768
769 o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, t_dst_index(vp, &vpi->DstReg),
770 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
771
772 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
773 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
774 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
775 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
776 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
777 t_src_class(src[1].File),
778 (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
779
780 o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
781 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
782 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
783 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
784 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
785 t_src_class(src[0].File),
786 src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
787
788 o_inst->src3=MAKE_VSF_SOURCE(u_temp_i+1,
789 VSF_IN_COMPONENT_X,
790 VSF_IN_COMPONENT_Y,
791 VSF_IN_COMPONENT_Z,
792 VSF_IN_COMPONENT_W,
793 VSF_IN_CLASS_TMP,
794 VSF_FLAG_NONE);
795
796 goto next;
797
798 case OPCODE_RCC:
799 fprintf(stderr, "Dont know how to handle op %d yet\n", vpi->Opcode);
800 exit(-1);
801 break;
802 case OPCODE_END:
803 break;
804 default:
805 break;
806 }
807
808 o_inst->op=MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst_index(vp, &vpi->DstReg),
809 t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
810
811 if(are_srcs_scalar){
812 switch(operands){
813 case 1:
814 o_inst->src1=t_src_scalar(vp, &src[0]);
815 o_inst->src2=ZERO_SRC_0;
816 o_inst->src3=ZERO_SRC_0;
817 break;
818
819 case 2:
820 o_inst->src1=t_src_scalar(vp, &src[0]);
821 o_inst->src2=t_src_scalar(vp, &src[1]);
822 o_inst->src3=ZERO_SRC_1;
823 break;
824
825 case 3:
826 o_inst->src1=t_src_scalar(vp, &src[0]);
827 o_inst->src2=t_src_scalar(vp, &src[1]);
828 o_inst->src3=t_src_scalar(vp, &src[2]);
829 break;
830
831 default:
832 fprintf(stderr, "scalars and op RCC not handled yet");
833 exit(-1);
834 break;
835 }
836 }else{
837 switch(operands){
838 case 1:
839 o_inst->src1=t_src(vp, &src[0]);
840 o_inst->src2=ZERO_SRC_0;
841 o_inst->src3=ZERO_SRC_0;
842 break;
843
844 case 2:
845 o_inst->src1=t_src(vp, &src[0]);
846 o_inst->src2=t_src(vp, &src[1]);
847 o_inst->src3=ZERO_SRC_1;
848 break;
849
850 case 3:
851 o_inst->src1=t_src(vp, &src[0]);
852 o_inst->src2=t_src(vp, &src[1]);
853 o_inst->src3=t_src(vp, &src[2]);
854 break;
855
856 default:
857 fprintf(stderr, "scalars and op RCC not handled yet");
858 exit(-1);
859 break;
860 }
861 }
862 next: ;
863 }
864
865 /* Will most likely segfault before we get here... fix later. */
866 if(o_inst - vp->program.body.i >= VSF_MAX_FRAGMENT_LENGTH/4) {
867 vp->program.length = 0;
868 vp->native = GL_FALSE;
869 return ;
870 }
871 vp->program.length=(o_inst - vp->program.body.i) * 4;
872 #if 0
873 fprintf(stderr, "hw program:\n");
874 for(i=0; i < vp->program.length; i++)
875 fprintf(stderr, "%08x\n", vp->program.body.d[i]);
876 #endif
877 }
878
879 static void position_invariant(struct gl_program *prog)
880 {
881 struct prog_instruction *vpi;
882 struct gl_program_parameter_list *paramList;
883 int i;
884
885 gl_state_index tokens[STATE_LENGTH] = { STATE_MVP_MATRIX, 0, 0, 0, 0 };
886
887 #ifdef PREFER_DP4
888 tokens[5] = STATE_MATRIX;
889 #else
890 tokens[5] = STATE_MATRIX_TRANSPOSE;
891 #endif
892 paramList = prog->Parameters;
893
894 vpi = _mesa_alloc_instructions (prog->NumInstructions + 4);
895 _mesa_init_instructions (vpi, prog->NumInstructions + 4);
896
897 for (i=0; i < 4; i++) {
898 GLint idx;
899 tokens[3] = tokens[4] = i;
900 idx = _mesa_add_state_reference(paramList, tokens);
901 #ifdef PREFER_DP4
902 vpi[i].Opcode = OPCODE_DP4;
903 vpi[i].StringPos = 0;
904 vpi[i].Data = 0;
905
906 vpi[i].DstReg.File = PROGRAM_OUTPUT;
907 vpi[i].DstReg.Index = VERT_RESULT_HPOS;
908 vpi[i].DstReg.WriteMask = 1 << i;
909 vpi[i].DstReg.CondMask = COND_TR;
910
911 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
912 vpi[i].SrcReg[0].Index = idx;
913 vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
914
915 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
916 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
917 vpi[i].SrcReg[1].Swizzle = SWIZZLE_XYZW;
918 #else
919 if (i == 0)
920 vpi[i].Opcode = OPCODE_MUL;
921 else
922 vpi[i].Opcode = OPCODE_MAD;
923
924 vpi[i].StringPos = 0;
925 vpi[i].Data = 0;
926
927 if (i == 3)
928 vpi[i].DstReg.File = PROGRAM_OUTPUT;
929 else
930 vpi[i].DstReg.File = PROGRAM_TEMPORARY;
931 vpi[i].DstReg.Index = 0;
932 vpi[i].DstReg.WriteMask = 0xf;
933 vpi[i].DstReg.CondMask = COND_TR;
934
935 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
936 vpi[i].SrcReg[0].Index = idx;
937 vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
938
939 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
940 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
941 vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i);
942
943 if (i > 0) {
944 vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY;
945 vpi[i].SrcReg[2].Index = 0;
946 vpi[i].SrcReg[2].Swizzle = SWIZZLE_XYZW;
947 }
948 #endif
949 }
950
951 _mesa_copy_instructions (&vpi[i], prog->Instructions, prog->NumInstructions);
952
953 free(prog->Instructions);
954
955 prog->Instructions = vpi;
956
957 prog->NumInstructions += 4;
958 vpi = &prog->Instructions[prog->NumInstructions-1];
959
960 assert(vpi->Opcode == OPCODE_END);
961 }
962
963 static void insert_wpos(struct r300_vertex_program *vp,
964 struct gl_program *prog,
965 GLuint temp_index)
966 {
967 struct prog_instruction *vpi;
968 struct prog_instruction *vpi_insert;
969 int i = 0;
970
971 vpi = _mesa_alloc_instructions (prog->NumInstructions + 2);
972 _mesa_init_instructions (vpi, prog->NumInstructions + 2);
973 /* all but END */
974 _mesa_copy_instructions (vpi, prog->Instructions, prog->NumInstructions - 1);
975 /* END */
976 _mesa_copy_instructions (&vpi[prog->NumInstructions + 1],
977 &prog->Instructions[prog->NumInstructions - 1],
978 1);
979 vpi_insert = &vpi[prog->NumInstructions - 1];
980
981 vpi_insert[i].Opcode = OPCODE_MOV;
982
983 vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
984 vpi_insert[i].DstReg.Index = VERT_RESULT_HPOS;
985 vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
986 vpi_insert[i].DstReg.CondMask = COND_TR;
987
988 vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
989 vpi_insert[i].SrcReg[0].Index = temp_index;
990 vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
991 i++;
992
993 vpi_insert[i].Opcode = OPCODE_MOV;
994
995 vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
996 vpi_insert[i].DstReg.Index = VERT_RESULT_TEX0+vp->wpos_idx;
997 vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
998 vpi_insert[i].DstReg.CondMask = COND_TR;
999
1000 vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
1001 vpi_insert[i].SrcReg[0].Index = temp_index;
1002 vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1003 i++;
1004
1005 free(prog->Instructions);
1006
1007 prog->Instructions = vpi;
1008
1009 prog->NumInstructions += i;
1010 vpi = &prog->Instructions[prog->NumInstructions-1];
1011
1012 assert(vpi->Opcode == OPCODE_END);
1013 }
1014
1015 static void pos_as_texcoord(struct r300_vertex_program *vp,
1016 struct gl_program *prog)
1017 {
1018 struct prog_instruction *vpi;
1019 GLuint tempregi = prog->NumTemporaries;
1020 /* should do something else if no temps left... */
1021 prog->NumTemporaries++;
1022
1023 for(vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++){
1024 if( vpi->DstReg.File == PROGRAM_OUTPUT &&
1025 vpi->DstReg.Index == VERT_RESULT_HPOS ){
1026 vpi->DstReg.File = PROGRAM_TEMPORARY;
1027 vpi->DstReg.Index = tempregi;
1028 }
1029 }
1030 insert_wpos(vp, prog, tempregi);
1031 }
1032
1033 static struct r300_vertex_program *build_program(struct r300_vertex_program_key *wanted_key,
1034 struct gl_vertex_program *mesa_vp,
1035 GLint wpos_idx)
1036 {
1037 struct r300_vertex_program *vp;
1038
1039 vp = _mesa_calloc(sizeof(*vp));
1040 _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
1041
1042 vp->wpos_idx = wpos_idx;
1043
1044 if(mesa_vp->IsPositionInvariant) {
1045 position_invariant(&mesa_vp->Base);
1046 }
1047
1048 if(wpos_idx > -1)
1049 pos_as_texcoord(vp, &mesa_vp->Base);
1050
1051 assert(mesa_vp->Base.NumInstructions);
1052
1053 vp->num_temporaries=mesa_vp->Base.NumTemporaries;
1054
1055 r300_translate_vertex_shader(vp, mesa_vp->Base.Instructions);
1056
1057 return vp;
1058 }
1059
1060 void r300_select_vertex_shader(r300ContextPtr r300)
1061 {
1062 GLcontext *ctx = ctx = r300->radeon.glCtx;
1063 GLuint InputsRead;
1064 struct r300_vertex_program_key wanted_key = { 0 };
1065 GLint i;
1066 struct r300_vertex_program_cont *vpc;
1067 struct r300_vertex_program *vp;
1068 GLint wpos_idx;
1069
1070 vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
1071 InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
1072
1073 wanted_key.OutputsWritten |= 1 << VERT_RESULT_HPOS;
1074
1075 wpos_idx = -1;
1076 if (InputsRead & FRAG_BIT_WPOS){
1077 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
1078 if (!(InputsRead & (FRAG_BIT_TEX0 << i)))
1079 break;
1080
1081 if(i == ctx->Const.MaxTextureUnits){
1082 fprintf(stderr, "\tno free texcoord found\n");
1083 exit(0);
1084 }
1085
1086 InputsRead |= (FRAG_BIT_TEX0 << i);
1087 wpos_idx = i;
1088 }
1089
1090 if (InputsRead & FRAG_BIT_COL0)
1091 wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL0;
1092
1093 if ((InputsRead & FRAG_BIT_COL1) /*||
1094 (InputsRead & FRAG_BIT_FOGC)*/)
1095 wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL1;
1096
1097 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
1098 if (InputsRead & (FRAG_BIT_TEX0 << i))
1099 wanted_key.OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
1100
1101 wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead;
1102 if(vpc->mesa_program.IsPositionInvariant) {
1103 /* we wan't position don't we ? */
1104 wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS);
1105 }
1106
1107 for (vp = vpc->progs; vp; vp = vp->next)
1108 if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) == 0) {
1109 r300->selected_vp = vp;
1110 return ;
1111 }
1112
1113 //_mesa_print_program(&vpc->mesa_program.Base);
1114
1115 vp = build_program(&wanted_key, &vpc->mesa_program, wpos_idx);
1116 vp->next = vpc->progs;
1117 vpc->progs = vp;
1118 r300->selected_vp = vp;
1119 }