bumpmap sample is correct now
[mesa.git] / src / mesa / drivers / dri / r300 / r300_vertprog.c
1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /**
29 * \file
30 *
31 * \author Aapo Tahkola <aet@rasterburn.org>
32 */
33
34 #include "glheader.h"
35 #include "macros.h"
36 #include "enums.h"
37 #include "program.h"
38 #include "shader/prog_instruction.h"
39 #include "shader/prog_parameter.h"
40 #include "shader/prog_statevars.h"
41 #include "tnl/tnl.h"
42
43 #include "r300_context.h"
44
45 #if SWIZZLE_X != VSF_IN_COMPONENT_X || \
46 SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
47 SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
48 SWIZZLE_W != VSF_IN_COMPONENT_W || \
49 SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
50 SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
51 WRITEMASK_X != VSF_FLAG_X || \
52 WRITEMASK_Y != VSF_FLAG_Y || \
53 WRITEMASK_Z != VSF_FLAG_Z || \
54 WRITEMASK_W != VSF_FLAG_W
55 #error Cannot change these!
56 #endif
57
58 #define SCALAR_FLAG (1<<31)
59 #define FLAG_MASK (1<<31)
60 #define OP_MASK (0xf) /* we are unlikely to have more than 15 */
61 #define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
62
63 static struct {
64 char *name;
65 int opcode;
66 unsigned long ip; /* number of input operands and flags */
67 } op_names[] = {
68 /* *INDENT-OFF* */
69 OPN(ABS, 1),
70 OPN(ADD, 2),
71 OPN(ARL, 1 | SCALAR_FLAG),
72 OPN(DP3, 2),
73 OPN(DP4, 2),
74 OPN(DPH, 2),
75 OPN(DST, 2),
76 OPN(EX2, 1 | SCALAR_FLAG),
77 OPN(EXP, 1 | SCALAR_FLAG),
78 OPN(FLR, 1),
79 OPN(FRC, 1),
80 OPN(LG2, 1 | SCALAR_FLAG),
81 OPN(LIT, 1),
82 OPN(LOG, 1 | SCALAR_FLAG),
83 OPN(MAD, 3),
84 OPN(MAX, 2),
85 OPN(MIN, 2),
86 OPN(MOV, 1),
87 OPN(MUL, 2),
88 OPN(POW, 2 | SCALAR_FLAG),
89 OPN(RCP, 1 | SCALAR_FLAG),
90 OPN(RSQ, 1 | SCALAR_FLAG),
91 OPN(SGE, 2),
92 OPN(SLT, 2),
93 OPN(SUB, 2),
94 OPN(SWZ, 1),
95 OPN(XPD, 2),
96 OPN(RCC, 0), //extra
97 OPN(PRINT, 0),
98 OPN(END, 0)
99 /* *INDENT-ON* */
100 };
101
102 #undef OPN
103
104 int r300VertexProgUpdateParams(GLcontext * ctx,
105 struct r300_vertex_program_cont *vp, float *dst)
106 {
107 int pi;
108 struct gl_vertex_program *mesa_vp = &vp->mesa_program;
109 float *dst_o = dst;
110 struct gl_program_parameter_list *paramList;
111
112 if (mesa_vp->IsNVProgram) {
113 _mesa_load_tracked_matrices(ctx);
114
115 for (pi = 0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) {
116 *dst++ = ctx->VertexProgram.Parameters[pi][0];
117 *dst++ = ctx->VertexProgram.Parameters[pi][1];
118 *dst++ = ctx->VertexProgram.Parameters[pi][2];
119 *dst++ = ctx->VertexProgram.Parameters[pi][3];
120 }
121 return dst - dst_o;
122 }
123
124 assert(mesa_vp->Base.Parameters);
125 _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
126
127 if (mesa_vp->Base.Parameters->NumParameters * 4 >
128 VSF_MAX_FRAGMENT_LENGTH) {
129 fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
130 _mesa_exit(-1);
131 }
132
133 paramList = mesa_vp->Base.Parameters;
134 for (pi = 0; pi < paramList->NumParameters; pi++) {
135 switch (paramList->Parameters[pi].Type) {
136
137 case PROGRAM_STATE_VAR:
138 case PROGRAM_NAMED_PARAM:
139 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
140 case PROGRAM_CONSTANT:
141 *dst++ = paramList->ParameterValues[pi][0];
142 *dst++ = paramList->ParameterValues[pi][1];
143 *dst++ = paramList->ParameterValues[pi][2];
144 *dst++ = paramList->ParameterValues[pi][3];
145 break;
146
147 default:
148 _mesa_problem(NULL, "Bad param type in %s",
149 __FUNCTION__);
150 }
151
152 }
153
154 return dst - dst_o;
155 }
156
157 static unsigned long t_dst_mask(GLuint mask)
158 {
159 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
160 return mask & VSF_FLAG_ALL;
161 }
162
163 static unsigned long t_dst_class(enum register_file file)
164 {
165
166 switch (file) {
167 case PROGRAM_TEMPORARY:
168 return VSF_OUT_CLASS_TMP;
169 case PROGRAM_OUTPUT:
170 return VSF_OUT_CLASS_RESULT;
171 case PROGRAM_ADDRESS:
172 return VSF_OUT_CLASS_ADDR;
173 /*
174 case PROGRAM_INPUT:
175 case PROGRAM_LOCAL_PARAM:
176 case PROGRAM_ENV_PARAM:
177 case PROGRAM_NAMED_PARAM:
178 case PROGRAM_STATE_VAR:
179 case PROGRAM_WRITE_ONLY:
180 case PROGRAM_ADDRESS:
181 */
182 default:
183 fprintf(stderr, "problem in %s", __FUNCTION__);
184 _mesa_exit(-1);
185 return -1;
186 }
187 }
188
189 static unsigned long t_dst_index(struct r300_vertex_program *vp,
190 struct prog_dst_register *dst)
191 {
192 if (dst->File == PROGRAM_OUTPUT)
193 return vp->outputs[dst->Index];
194
195 return dst->Index;
196 }
197
198 static unsigned long t_src_class(enum register_file file)
199 {
200
201 switch (file) {
202 case PROGRAM_TEMPORARY:
203 return VSF_IN_CLASS_TMP;
204
205 case PROGRAM_INPUT:
206 return VSF_IN_CLASS_ATTR;
207
208 case PROGRAM_LOCAL_PARAM:
209 case PROGRAM_ENV_PARAM:
210 case PROGRAM_NAMED_PARAM:
211 case PROGRAM_STATE_VAR:
212 return VSF_IN_CLASS_PARAM;
213 /*
214 case PROGRAM_OUTPUT:
215 case PROGRAM_WRITE_ONLY:
216 case PROGRAM_ADDRESS:
217 */
218 default:
219 fprintf(stderr, "problem in %s", __FUNCTION__);
220 _mesa_exit(-1);
221 return -1;
222 }
223 }
224
225 static inline unsigned long t_swizzle(GLubyte swizzle)
226 {
227 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
228 return swizzle;
229 }
230
231 #if 0
232 static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller)
233 {
234 int i;
235
236 if (vp == NULL) {
237 fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__,
238 caller);
239 return;
240 }
241
242 fprintf(stderr, "%s:<", caller);
243 for (i = 0; i < VERT_ATTRIB_MAX; i++)
244 fprintf(stderr, "%d ", vp->inputs[i]);
245 fprintf(stderr, ">\n");
246
247 }
248 #endif
249
250 static unsigned long t_src_index(struct r300_vertex_program *vp,
251 struct prog_src_register *src)
252 {
253 int i;
254 int max_reg = -1;
255
256 if (src->File == PROGRAM_INPUT) {
257 if (vp->inputs[src->Index] != -1)
258 return vp->inputs[src->Index];
259
260 for (i = 0; i < VERT_ATTRIB_MAX; i++)
261 if (vp->inputs[i] > max_reg)
262 max_reg = vp->inputs[i];
263
264 vp->inputs[src->Index] = max_reg + 1;
265
266 //vp_dump_inputs(vp, __FUNCTION__);
267
268 return vp->inputs[src->Index];
269 } else {
270 if (src->Index < 0) {
271 fprintf(stderr,
272 "negative offsets for indirect addressing do not work.\n");
273 return 0;
274 }
275 return src->Index;
276 }
277 }
278
279 static unsigned long t_src(struct r300_vertex_program *vp,
280 struct prog_src_register *src)
281 {
282 /* src->NegateBase uses the NEGATE_ flags from program_instruction.h,
283 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
284 */
285 return MAKE_VSF_SOURCE(t_src_index(vp, src),
286 t_swizzle(GET_SWZ(src->Swizzle, 0)),
287 t_swizzle(GET_SWZ(src->Swizzle, 1)),
288 t_swizzle(GET_SWZ(src->Swizzle, 2)),
289 t_swizzle(GET_SWZ(src->Swizzle, 3)),
290 t_src_class(src->File),
291 src->NegateBase) | (src->RelAddr << 4);
292 }
293
294 static unsigned long t_src_scalar(struct r300_vertex_program *vp,
295 struct prog_src_register *src)
296 {
297
298 return MAKE_VSF_SOURCE(t_src_index(vp, src),
299 t_swizzle(GET_SWZ(src->Swizzle, 0)),
300 t_swizzle(GET_SWZ(src->Swizzle, 0)),
301 t_swizzle(GET_SWZ(src->Swizzle, 0)),
302 t_swizzle(GET_SWZ(src->Swizzle, 0)),
303 t_src_class(src->File),
304 src->
305 NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
306 (src->RelAddr << 4);
307 }
308
309 static unsigned long t_opcode(enum prog_opcode opcode)
310 {
311
312 switch (opcode) {
313 /* *INDENT-OFF* */
314 case OPCODE_ARL: return R300_VPI_OUT_OP_ARL;
315 case OPCODE_DST: return R300_VPI_OUT_OP_DST;
316 case OPCODE_EX2: return R300_VPI_OUT_OP_EX2;
317 case OPCODE_EXP: return R300_VPI_OUT_OP_EXP;
318 case OPCODE_FRC: return R300_VPI_OUT_OP_FRC;
319 case OPCODE_LG2: return R300_VPI_OUT_OP_LG2;
320 case OPCODE_LOG: return R300_VPI_OUT_OP_LOG;
321 case OPCODE_MAX: return R300_VPI_OUT_OP_MAX;
322 case OPCODE_MIN: return R300_VPI_OUT_OP_MIN;
323 case OPCODE_MUL: return R300_VPI_OUT_OP_MUL;
324 case OPCODE_RCP: return R300_VPI_OUT_OP_RCP;
325 case OPCODE_RSQ: return R300_VPI_OUT_OP_RSQ;
326 case OPCODE_SGE: return R300_VPI_OUT_OP_SGE;
327 case OPCODE_SLT: return R300_VPI_OUT_OP_SLT;
328 case OPCODE_DP4: return R300_VPI_OUT_OP_DOT;
329 /* *INDENT-ON* */
330
331 default:
332 fprintf(stderr, "%s: Should not be called with opcode %d!",
333 __FUNCTION__, opcode);
334 }
335 _mesa_exit(-1);
336 return 0;
337 }
338
339 static unsigned long op_operands(enum prog_opcode opcode)
340 {
341 int i;
342
343 /* Can we trust mesas opcodes to be in order ? */
344 for (i = 0; i < sizeof(op_names) / sizeof(*op_names); i++)
345 if (op_names[i].opcode == opcode)
346 return op_names[i].ip;
347
348 fprintf(stderr, "op %d not found in op_names\n", opcode);
349 _mesa_exit(-1);
350 return 0;
351 }
352
353 static GLboolean valid_dst(struct r300_vertex_program *vp,
354 struct prog_dst_register *dst)
355 {
356 if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
357 return GL_FALSE;
358 } else if (dst->File == PROGRAM_ADDRESS) {
359 assert(dst->Index == 0);
360 }
361
362 return GL_TRUE;
363 }
364
365 /* TODO: Get rid of t_src_class call */
366 #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
367 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
368 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
369 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
370 t_src_class(b.File) == VSF_IN_CLASS_ATTR)))) \
371
372 #define ZERO_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \
373 SWIZZLE_ZERO, SWIZZLE_ZERO, \
374 SWIZZLE_ZERO, SWIZZLE_ZERO, \
375 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
376
377 #define ZERO_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \
378 SWIZZLE_ZERO, SWIZZLE_ZERO, \
379 SWIZZLE_ZERO, SWIZZLE_ZERO, \
380 t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
381
382 #define ZERO_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \
383 SWIZZLE_ZERO, SWIZZLE_ZERO, \
384 SWIZZLE_ZERO, SWIZZLE_ZERO, \
385 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
386
387 #define ONE_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \
388 SWIZZLE_ONE, SWIZZLE_ONE, \
389 SWIZZLE_ONE, SWIZZLE_ONE, \
390 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
391
392 #define ONE_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \
393 SWIZZLE_ONE, SWIZZLE_ONE, \
394 SWIZZLE_ONE, SWIZZLE_ONE, \
395 t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
396
397 #define ONE_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \
398 SWIZZLE_ONE, SWIZZLE_ONE, \
399 SWIZZLE_ONE, SWIZZLE_ONE, \
400 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
401
402 /* DP4 version seems to trigger some hw peculiarity */
403 //#define PREFER_DP4
404
405 #define FREE_TEMPS() \
406 do { \
407 if(u_temp_i < vp->num_temporaries) { \
408 WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_i); \
409 vp->native = GL_FALSE; \
410 } \
411 u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
412 } while (0)
413
414 static void r300TranslateVertexShader(struct r300_vertex_program *vp,
415 struct prog_instruction *vpi)
416 {
417 int i, cur_reg = 0;
418 VERTEX_SHADER_INSTRUCTION *o_inst;
419 unsigned long operands;
420 int are_srcs_scalar;
421 unsigned long hw_op;
422 /* Initial value should be last tmp reg that hw supports.
423 Strangely enough r300 doesnt mind even though these would be out of range.
424 Smart enough to realize that it doesnt need it? */
425 int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1;
426 struct prog_src_register src[3];
427
428 vp->pos_end = 0; /* Not supported yet */
429 vp->program.length = 0;
430 /*vp->num_temporaries=mesa_vp->Base.NumTemporaries; */
431
432 for (i = 0; i < VERT_ATTRIB_MAX; i++)
433 vp->inputs[i] = -1;
434
435 for (i = 0; i < VERT_RESULT_MAX; i++)
436 vp->outputs[i] = -1;
437
438 assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS));
439
440 /* Assign outputs */
441 if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS))
442 vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
443
444 if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ))
445 vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
446
447 if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0))
448 vp->outputs[VERT_RESULT_COL0] = cur_reg++;
449
450 if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1))
451 vp->outputs[VERT_RESULT_COL1] = cur_reg++;
452
453 #if 0 /* Not supported yet */
454 if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0))
455 vp->outputs[VERT_RESULT_BFC0] = cur_reg++;
456
457 if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1))
458 vp->outputs[VERT_RESULT_BFC1] = cur_reg++;
459
460 if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC))
461 vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
462 #endif
463
464 for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++)
465 if (vp->key.OutputsWritten & (1 << i))
466 vp->outputs[i] = cur_reg++;
467
468 vp->translated = GL_TRUE;
469 vp->native = GL_TRUE;
470
471 o_inst = vp->program.body.i;
472 for (; vpi->Opcode != OPCODE_END; vpi++, o_inst++) {
473 FREE_TEMPS();
474
475 if (!valid_dst(vp, &vpi->DstReg)) {
476 /* redirect result to unused temp */
477 vpi->DstReg.File = PROGRAM_TEMPORARY;
478 vpi->DstReg.Index = u_temp_i;
479 }
480
481 operands = op_operands(vpi->Opcode);
482 are_srcs_scalar = operands & SCALAR_FLAG;
483 operands &= OP_MASK;
484
485 for (i = 0; i < operands; i++)
486 src[i] = vpi->SrcReg[i];
487
488 if (operands == 3) { /* TODO: scalars */
489 if (CMP_SRCS(src[1], src[2])
490 || CMP_SRCS(src[0], src[2])) {
491 o_inst->op =
492 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i,
493 VSF_FLAG_ALL,
494 VSF_OUT_CLASS_TMP);
495
496 o_inst->src[0] =
497 MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
498 SWIZZLE_X, SWIZZLE_Y,
499 SWIZZLE_Z, SWIZZLE_W,
500 t_src_class(src[2].File),
501 VSF_FLAG_NONE) | (src[2].
502 RelAddr <<
503 4);
504
505 o_inst->src[1] = ZERO_SRC_2;
506 o_inst->src[2] = ZERO_SRC_2;
507 o_inst++;
508
509 src[2].File = PROGRAM_TEMPORARY;
510 src[2].Index = u_temp_i;
511 src[2].RelAddr = 0;
512 u_temp_i--;
513 }
514
515 }
516
517 if (operands >= 2) {
518 if (CMP_SRCS(src[1], src[0])) {
519 o_inst->op =
520 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i,
521 VSF_FLAG_ALL,
522 VSF_OUT_CLASS_TMP);
523
524 o_inst->src[0] =
525 MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
526 SWIZZLE_X, SWIZZLE_Y,
527 SWIZZLE_Z, SWIZZLE_W,
528 t_src_class(src[0].File),
529 VSF_FLAG_NONE) | (src[0].
530 RelAddr <<
531 4);
532
533 o_inst->src[1] = ZERO_SRC_0;
534 o_inst->src[2] = ZERO_SRC_0;
535 o_inst++;
536
537 src[0].File = PROGRAM_TEMPORARY;
538 src[0].Index = u_temp_i;
539 src[0].RelAddr = 0;
540 u_temp_i--;
541 }
542 }
543
544 /* These ops need special handling. */
545 switch (vpi->Opcode) {
546 case OPCODE_POW:
547 o_inst->op =
548 MAKE_VSF_OP(R300_VPI_OUT_OP_POW,
549 t_dst_index(vp, &vpi->DstReg),
550 t_dst_mask(vpi->DstReg.WriteMask),
551 t_dst_class(vpi->DstReg.File));
552 o_inst->src[0] = t_src_scalar(vp, &src[0]);
553 o_inst->src[1] = ZERO_SRC_0;
554 o_inst->src[2] = t_src_scalar(vp, &src[1]);
555 goto next;
556
557 case OPCODE_MOV: //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
558 case OPCODE_SWZ:
559 #if 1
560 o_inst->op =
561 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD,
562 t_dst_index(vp, &vpi->DstReg),
563 t_dst_mask(vpi->DstReg.WriteMask),
564 t_dst_class(vpi->DstReg.File));
565 o_inst->src[0] = t_src(vp, &src[0]);
566 o_inst->src[1] = ZERO_SRC_0;
567 o_inst->src[2] = ZERO_SRC_0;
568 #else
569 hw_op =
570 (src[0].File ==
571 PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
572 R300_VPI_OUT_OP_MAD;
573
574 o_inst->op =
575 MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
576 t_dst_mask(vpi->DstReg.WriteMask),
577 t_dst_class(vpi->DstReg.File));
578 o_inst->src[0] = t_src(vp, &src[0]);
579 o_inst->src[1] = ONE_SRC_0;
580 o_inst->src[2] = ZERO_SRC_0;
581 #endif
582
583 goto next;
584
585 case OPCODE_ADD:
586 #if 1
587 hw_op = (src[0].File == PROGRAM_TEMPORARY &&
588 src[1].File ==
589 PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
590 R300_VPI_OUT_OP_MAD;
591
592 o_inst->op =
593 MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
594 t_dst_mask(vpi->DstReg.WriteMask),
595 t_dst_class(vpi->DstReg.File));
596 o_inst->src[0] = ONE_SRC_0;
597 o_inst->src[1] = t_src(vp, &src[0]);
598 o_inst->src[2] = t_src(vp, &src[1]);
599 #else
600 o_inst->op =
601 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD,
602 t_dst_index(vp, &vpi->DstReg),
603 t_dst_mask(vpi->DstReg.WriteMask),
604 t_dst_class(vpi->DstReg.File));
605 o_inst->src[0] = t_src(vp, &src[0]);
606 o_inst->src[1] = t_src(vp, &src[1]);
607 o_inst->src[2] = ZERO_SRC_1;
608
609 #endif
610 goto next;
611
612 case OPCODE_MAD:
613 hw_op = (src[0].File == PROGRAM_TEMPORARY &&
614 src[1].File == PROGRAM_TEMPORARY &&
615 src[2].File ==
616 PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
617 R300_VPI_OUT_OP_MAD;
618
619 o_inst->op =
620 MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
621 t_dst_mask(vpi->DstReg.WriteMask),
622 t_dst_class(vpi->DstReg.File));
623 o_inst->src[0] = t_src(vp, &src[0]);
624 o_inst->src[1] = t_src(vp, &src[1]);
625 o_inst->src[2] = t_src(vp, &src[2]);
626 goto next;
627
628 case OPCODE_MUL: /* HW mul can take third arg but appears to have some other limitations. */
629 hw_op = (src[0].File == PROGRAM_TEMPORARY &&
630 src[1].File ==
631 PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
632 R300_VPI_OUT_OP_MAD;
633
634 o_inst->op =
635 MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
636 t_dst_mask(vpi->DstReg.WriteMask),
637 t_dst_class(vpi->DstReg.File));
638 o_inst->src[0] = t_src(vp, &src[0]);
639 o_inst->src[1] = t_src(vp, &src[1]);
640
641 o_inst->src[2] = ZERO_SRC_1;
642 goto next;
643
644 case OPCODE_DP3: //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
645 o_inst->op =
646 MAKE_VSF_OP(R300_VPI_OUT_OP_DOT,
647 t_dst_index(vp, &vpi->DstReg),
648 t_dst_mask(vpi->DstReg.WriteMask),
649 t_dst_class(vpi->DstReg.File));
650
651 o_inst->src[0] =
652 MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
653 t_swizzle(GET_SWZ
654 (src[0].Swizzle, 0)),
655 t_swizzle(GET_SWZ
656 (src[0].Swizzle, 1)),
657 t_swizzle(GET_SWZ
658 (src[0].Swizzle, 2)),
659 SWIZZLE_ZERO,
660 t_src_class(src[0].File),
661 src[0].
662 NegateBase ? VSF_FLAG_XYZ :
663 VSF_FLAG_NONE) | (src[0].
664 RelAddr << 4);
665
666 o_inst->src[1] =
667 MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
668 t_swizzle(GET_SWZ
669 (src[1].Swizzle, 0)),
670 t_swizzle(GET_SWZ
671 (src[1].Swizzle, 1)),
672 t_swizzle(GET_SWZ
673 (src[1].Swizzle, 2)),
674 SWIZZLE_ZERO,
675 t_src_class(src[1].File),
676 src[1].
677 NegateBase ? VSF_FLAG_XYZ :
678 VSF_FLAG_NONE) | (src[1].
679 RelAddr << 4);
680
681 o_inst->src[2] = ZERO_SRC_1;
682 goto next;
683
684 case OPCODE_SUB: //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
685 #if 1
686 hw_op = (src[0].File == PROGRAM_TEMPORARY &&
687 src[1].File ==
688 PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
689 R300_VPI_OUT_OP_MAD;
690
691 o_inst->op =
692 MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
693 t_dst_mask(vpi->DstReg.WriteMask),
694 t_dst_class(vpi->DstReg.File));
695 o_inst->src[0] = t_src(vp, &src[0]);
696 o_inst->src[1] = ONE_SRC_0;
697 o_inst->src[2] =
698 MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
699 t_swizzle(GET_SWZ
700 (src[1].Swizzle, 0)),
701 t_swizzle(GET_SWZ
702 (src[1].Swizzle, 1)),
703 t_swizzle(GET_SWZ
704 (src[1].Swizzle, 2)),
705 t_swizzle(GET_SWZ
706 (src[1].Swizzle, 3)),
707 t_src_class(src[1].File),
708 (!src[1].
709 NegateBase) ? VSF_FLAG_ALL :
710 VSF_FLAG_NONE) | (src[1].
711 RelAddr << 4);
712 #else
713 o_inst->op =
714 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD,
715 t_dst_index(vp, &vpi->DstReg),
716 t_dst_mask(vpi->DstReg.WriteMask),
717 t_dst_class(vpi->DstReg.File));
718
719 o_inst->src[0] = t_src(vp, &src[0]);
720 o_inst->src[1] =
721 MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
722 t_swizzle(GET_SWZ
723 (src[1].Swizzle, 0)),
724 t_swizzle(GET_SWZ
725 (src[1].Swizzle, 1)),
726 t_swizzle(GET_SWZ
727 (src[1].Swizzle, 2)),
728 t_swizzle(GET_SWZ
729 (src[1].Swizzle, 3)),
730 t_src_class(src[1].File),
731 (!src[1].
732 NegateBase) ? VSF_FLAG_ALL :
733 VSF_FLAG_NONE) | (src[1].
734 RelAddr << 4);
735 o_inst->src[2] = 0;
736 #endif
737 goto next;
738
739 case OPCODE_ABS: //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
740 o_inst->op =
741 MAKE_VSF_OP(R300_VPI_OUT_OP_MAX,
742 t_dst_index(vp, &vpi->DstReg),
743 t_dst_mask(vpi->DstReg.WriteMask),
744 t_dst_class(vpi->DstReg.File));
745
746 o_inst->src[0] = t_src(vp, &src[0]);
747 o_inst->src[1] =
748 MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
749 t_swizzle(GET_SWZ
750 (src[0].Swizzle, 0)),
751 t_swizzle(GET_SWZ
752 (src[0].Swizzle, 1)),
753 t_swizzle(GET_SWZ
754 (src[0].Swizzle, 2)),
755 t_swizzle(GET_SWZ
756 (src[0].Swizzle, 3)),
757 t_src_class(src[0].File),
758 (!src[0].
759 NegateBase) ? VSF_FLAG_ALL :
760 VSF_FLAG_NONE) | (src[0].
761 RelAddr << 4);
762 o_inst->src[2] = 0;
763 goto next;
764
765 case OPCODE_FLR:
766 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
767 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
768
769 o_inst->op = MAKE_VSF_OP(R300_VPI_OUT_OP_FRC, u_temp_i,
770 t_dst_mask(vpi->DstReg.
771 WriteMask),
772 VSF_OUT_CLASS_TMP);
773
774 o_inst->src[0] = t_src(vp, &src[0]);
775 o_inst->src[1] = ZERO_SRC_0;
776 o_inst->src[2] = ZERO_SRC_0;
777 o_inst++;
778
779 o_inst->op =
780 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD,
781 t_dst_index(vp, &vpi->DstReg),
782 t_dst_mask(vpi->DstReg.WriteMask),
783 t_dst_class(vpi->DstReg.File));
784
785 o_inst->src[0] = t_src(vp, &src[0]);
786 o_inst->src[1] = MAKE_VSF_SOURCE(u_temp_i,
787 VSF_IN_COMPONENT_X,
788 VSF_IN_COMPONENT_Y,
789 VSF_IN_COMPONENT_Z,
790 VSF_IN_COMPONENT_W,
791 VSF_IN_CLASS_TMP,
792 /* Not 100% sure about this */
793 (!src[0].
794 NegateBase) ?
795 VSF_FLAG_ALL :
796 VSF_FLAG_NONE
797 /*VSF_FLAG_ALL */ );
798
799 o_inst->src[2] = ZERO_SRC_0;
800 u_temp_i--;
801 goto next;
802
803 case OPCODE_LG2: // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
804 o_inst->op =
805 MAKE_VSF_OP(R300_VPI_OUT_OP_LG2,
806 t_dst_index(vp, &vpi->DstReg),
807 t_dst_mask(vpi->DstReg.WriteMask),
808 t_dst_class(vpi->DstReg.File));
809
810 o_inst->src[0] =
811 MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
812 t_swizzle(GET_SWZ
813 (src[0].Swizzle, 0)),
814 t_swizzle(GET_SWZ
815 (src[0].Swizzle, 0)),
816 t_swizzle(GET_SWZ
817 (src[0].Swizzle, 0)),
818 t_swizzle(GET_SWZ
819 (src[0].Swizzle, 0)),
820 t_src_class(src[0].File),
821 src[0].
822 NegateBase ? VSF_FLAG_ALL :
823 VSF_FLAG_NONE) | (src[0].
824 RelAddr << 4);
825 o_inst->src[1] = ZERO_SRC_0;
826 o_inst->src[2] = ZERO_SRC_0;
827 goto next;
828
829 case OPCODE_LIT: //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
830 o_inst->op =
831 MAKE_VSF_OP(R300_VPI_OUT_OP_LIT,
832 t_dst_index(vp, &vpi->DstReg),
833 t_dst_mask(vpi->DstReg.WriteMask),
834 t_dst_class(vpi->DstReg.File));
835 /* NOTE: Users swizzling might not work. */
836 o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
837 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
838 VSF_IN_COMPONENT_ZERO, // z
839 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
840 t_src_class(src[0].
841 File),
842 src[0].
843 NegateBase ?
844 VSF_FLAG_ALL :
845 VSF_FLAG_NONE) |
846 (src[0].RelAddr << 4);
847 o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
848 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
849 VSF_IN_COMPONENT_ZERO, // z
850 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
851 t_src_class(src[0].
852 File),
853 src[0].
854 NegateBase ?
855 VSF_FLAG_ALL :
856 VSF_FLAG_NONE) |
857 (src[0].RelAddr << 4);
858 o_inst->src[2] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
859 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
860 VSF_IN_COMPONENT_ZERO, // z
861 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
862 t_src_class(src[0].
863 File),
864 src[0].
865 NegateBase ?
866 VSF_FLAG_ALL :
867 VSF_FLAG_NONE) |
868 (src[0].RelAddr << 4);
869 goto next;
870
871 case OPCODE_DPH: //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
872 o_inst->op =
873 MAKE_VSF_OP(R300_VPI_OUT_OP_DOT,
874 t_dst_index(vp, &vpi->DstReg),
875 t_dst_mask(vpi->DstReg.WriteMask),
876 t_dst_class(vpi->DstReg.File));
877
878 o_inst->src[0] =
879 MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
880 t_swizzle(GET_SWZ
881 (src[0].Swizzle, 0)),
882 t_swizzle(GET_SWZ
883 (src[0].Swizzle, 1)),
884 t_swizzle(GET_SWZ
885 (src[0].Swizzle, 2)),
886 VSF_IN_COMPONENT_ONE,
887 t_src_class(src[0].File),
888 src[0].
889 NegateBase ? VSF_FLAG_XYZ :
890 VSF_FLAG_NONE) | (src[0].
891 RelAddr << 4);
892 o_inst->src[1] = t_src(vp, &src[1]);
893 o_inst->src[2] = ZERO_SRC_1;
894 goto next;
895
896 case OPCODE_XPD:
897 /* mul r0, r1.yzxw, r2.zxyw
898 mad r0, -r2.yzxw, r1.zxyw, r0
899 NOTE: might need MAD_2
900 */
901
902 o_inst->op = MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, u_temp_i,
903 t_dst_mask(vpi->DstReg.
904 WriteMask),
905 VSF_OUT_CLASS_TMP);
906
907 o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
908 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
909 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
910 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
911 t_src_class(src[0].
912 File),
913 src[0].
914 NegateBase ?
915 VSF_FLAG_ALL :
916 VSF_FLAG_NONE) |
917 (src[0].RelAddr << 4);
918
919 o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
920 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
921 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
922 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
923 t_src_class(src[1].
924 File),
925 src[1].
926 NegateBase ?
927 VSF_FLAG_ALL :
928 VSF_FLAG_NONE) |
929 (src[1].RelAddr << 4);
930
931 o_inst->src[2] = ZERO_SRC_1;
932 o_inst++;
933 u_temp_i--;
934
935 o_inst->op =
936 MAKE_VSF_OP(R300_VPI_OUT_OP_MAD,
937 t_dst_index(vp, &vpi->DstReg),
938 t_dst_mask(vpi->DstReg.WriteMask),
939 t_dst_class(vpi->DstReg.File));
940
941 o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
942 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
943 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
944 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
945 t_src_class(src[1].
946 File),
947 (!src[1].
948 NegateBase) ?
949 VSF_FLAG_ALL :
950 VSF_FLAG_NONE) |
951 (src[1].RelAddr << 4);
952
953 o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
954 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
955 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
956 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
957 t_src_class(src[0].
958 File),
959 src[0].
960 NegateBase ?
961 VSF_FLAG_ALL :
962 VSF_FLAG_NONE) |
963 (src[0].RelAddr << 4);
964
965 o_inst->src[2] = MAKE_VSF_SOURCE(u_temp_i + 1,
966 VSF_IN_COMPONENT_X,
967 VSF_IN_COMPONENT_Y,
968 VSF_IN_COMPONENT_Z,
969 VSF_IN_COMPONENT_W,
970 VSF_IN_CLASS_TMP,
971 VSF_FLAG_NONE);
972
973 goto next;
974
975 case OPCODE_RCC:
976 fprintf(stderr, "Dont know how to handle op %d yet\n",
977 vpi->Opcode);
978 _mesa_exit(-1);
979 break;
980 case OPCODE_END:
981 break;
982 default:
983 break;
984 }
985
986 o_inst->op =
987 MAKE_VSF_OP(t_opcode(vpi->Opcode),
988 t_dst_index(vp, &vpi->DstReg),
989 t_dst_mask(vpi->DstReg.WriteMask),
990 t_dst_class(vpi->DstReg.File));
991
992 if (are_srcs_scalar) {
993 switch (operands) {
994 case 1:
995 o_inst->src[0] = t_src_scalar(vp, &src[0]);
996 o_inst->src[1] = ZERO_SRC_0;
997 o_inst->src[2] = ZERO_SRC_0;
998 break;
999
1000 case 2:
1001 o_inst->src[0] = t_src_scalar(vp, &src[0]);
1002 o_inst->src[1] = t_src_scalar(vp, &src[1]);
1003 o_inst->src[2] = ZERO_SRC_1;
1004 break;
1005
1006 case 3:
1007 o_inst->src[0] = t_src_scalar(vp, &src[0]);
1008 o_inst->src[1] = t_src_scalar(vp, &src[1]);
1009 o_inst->src[2] = t_src_scalar(vp, &src[2]);
1010 break;
1011
1012 default:
1013 fprintf(stderr,
1014 "scalars and op RCC not handled yet");
1015 _mesa_exit(-1);
1016 break;
1017 }
1018 } else {
1019 switch (operands) {
1020 case 1:
1021 o_inst->src[0] = t_src(vp, &src[0]);
1022 o_inst->src[1] = ZERO_SRC_0;
1023 o_inst->src[2] = ZERO_SRC_0;
1024 break;
1025
1026 case 2:
1027 o_inst->src[0] = t_src(vp, &src[0]);
1028 o_inst->src[1] = t_src(vp, &src[1]);
1029 o_inst->src[2] = ZERO_SRC_1;
1030 break;
1031
1032 case 3:
1033 o_inst->src[0] = t_src(vp, &src[0]);
1034 o_inst->src[1] = t_src(vp, &src[1]);
1035 o_inst->src[2] = t_src(vp, &src[2]);
1036 break;
1037
1038 default:
1039 fprintf(stderr,
1040 "scalars and op RCC not handled yet");
1041 _mesa_exit(-1);
1042 break;
1043 }
1044 }
1045 next:;
1046 }
1047
1048 /* Will most likely segfault before we get here... fix later. */
1049 if (o_inst - vp->program.body.i >= VSF_MAX_FRAGMENT_LENGTH / 4) {
1050 vp->program.length = 0;
1051 vp->native = GL_FALSE;
1052 return;
1053 }
1054 vp->program.length = (o_inst - vp->program.body.i) * 4;
1055 #if 0
1056 fprintf(stderr, "hw program:\n");
1057 for (i = 0; i < vp->program.length; i++)
1058 fprintf(stderr, "%08x\n", vp->program.body.d[i]);
1059 #endif
1060 }
1061
1062 static void position_invariant(struct gl_program *prog)
1063 {
1064 struct prog_instruction *vpi;
1065 struct gl_program_parameter_list *paramList;
1066 int i;
1067
1068 gl_state_index tokens[STATE_LENGTH] = { STATE_MVP_MATRIX, 0, 0, 0, 0 };
1069
1070 /* tokens[4] = matrix modifier */
1071 #ifdef PREFER_DP4
1072 tokens[4] = 0; /* not transposed or inverted */
1073 #else
1074 tokens[4] = STATE_MATRIX_TRANSPOSE;
1075 #endif
1076 paramList = prog->Parameters;
1077
1078 vpi = _mesa_alloc_instructions(prog->NumInstructions + 4);
1079 _mesa_init_instructions(vpi, prog->NumInstructions + 4);
1080
1081 for (i = 0; i < 4; i++) {
1082 GLint idx;
1083 tokens[2] = tokens[3] = i; /* matrix row[i]..row[i] */
1084 idx = _mesa_add_state_reference(paramList, tokens);
1085 #ifdef PREFER_DP4
1086 vpi[i].Opcode = OPCODE_DP4;
1087 vpi[i].StringPos = 0;
1088 vpi[i].Data = 0;
1089
1090 vpi[i].DstReg.File = PROGRAM_OUTPUT;
1091 vpi[i].DstReg.Index = VERT_RESULT_HPOS;
1092 vpi[i].DstReg.WriteMask = 1 << i;
1093 vpi[i].DstReg.CondMask = COND_TR;
1094
1095 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
1096 vpi[i].SrcReg[0].Index = idx;
1097 vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1098
1099 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
1100 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
1101 vpi[i].SrcReg[1].Swizzle = SWIZZLE_XYZW;
1102 #else
1103 if (i == 0)
1104 vpi[i].Opcode = OPCODE_MUL;
1105 else
1106 vpi[i].Opcode = OPCODE_MAD;
1107
1108 vpi[i].StringPos = 0;
1109 vpi[i].Data = 0;
1110
1111 if (i == 3)
1112 vpi[i].DstReg.File = PROGRAM_OUTPUT;
1113 else
1114 vpi[i].DstReg.File = PROGRAM_TEMPORARY;
1115 vpi[i].DstReg.Index = 0;
1116 vpi[i].DstReg.WriteMask = 0xf;
1117 vpi[i].DstReg.CondMask = COND_TR;
1118
1119 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
1120 vpi[i].SrcReg[0].Index = idx;
1121 vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1122
1123 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
1124 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
1125 vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i);
1126
1127 if (i > 0) {
1128 vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY;
1129 vpi[i].SrcReg[2].Index = 0;
1130 vpi[i].SrcReg[2].Swizzle = SWIZZLE_XYZW;
1131 }
1132 #endif
1133 }
1134
1135 _mesa_copy_instructions(&vpi[i], prog->Instructions,
1136 prog->NumInstructions);
1137
1138 free(prog->Instructions);
1139
1140 prog->Instructions = vpi;
1141
1142 prog->NumInstructions += 4;
1143 vpi = &prog->Instructions[prog->NumInstructions - 1];
1144
1145 assert(vpi->Opcode == OPCODE_END);
1146 }
1147
1148 static void insert_wpos(struct r300_vertex_program *vp,
1149 struct gl_program *prog, GLuint temp_index)
1150 {
1151 struct prog_instruction *vpi;
1152 struct prog_instruction *vpi_insert;
1153 int i = 0;
1154
1155 vpi = _mesa_alloc_instructions(prog->NumInstructions + 2);
1156 _mesa_init_instructions(vpi, prog->NumInstructions + 2);
1157 /* all but END */
1158 _mesa_copy_instructions(vpi, prog->Instructions,
1159 prog->NumInstructions - 1);
1160 /* END */
1161 _mesa_copy_instructions(&vpi[prog->NumInstructions + 1],
1162 &prog->Instructions[prog->NumInstructions - 1],
1163 1);
1164 vpi_insert = &vpi[prog->NumInstructions - 1];
1165
1166 vpi_insert[i].Opcode = OPCODE_MOV;
1167
1168 vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
1169 vpi_insert[i].DstReg.Index = VERT_RESULT_HPOS;
1170 vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
1171 vpi_insert[i].DstReg.CondMask = COND_TR;
1172
1173 vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
1174 vpi_insert[i].SrcReg[0].Index = temp_index;
1175 vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1176 i++;
1177
1178 vpi_insert[i].Opcode = OPCODE_MOV;
1179
1180 vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
1181 vpi_insert[i].DstReg.Index = VERT_RESULT_TEX0 + vp->wpos_idx;
1182 vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
1183 vpi_insert[i].DstReg.CondMask = COND_TR;
1184
1185 vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
1186 vpi_insert[i].SrcReg[0].Index = temp_index;
1187 vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1188 i++;
1189
1190 free(prog->Instructions);
1191
1192 prog->Instructions = vpi;
1193
1194 prog->NumInstructions += i;
1195 vpi = &prog->Instructions[prog->NumInstructions - 1];
1196
1197 assert(vpi->Opcode == OPCODE_END);
1198 }
1199
1200 static void pos_as_texcoord(struct r300_vertex_program *vp,
1201 struct gl_program *prog)
1202 {
1203 struct prog_instruction *vpi;
1204 GLuint tempregi = prog->NumTemporaries;
1205 /* should do something else if no temps left... */
1206 prog->NumTemporaries++;
1207
1208 for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) {
1209 if (vpi->DstReg.File == PROGRAM_OUTPUT &&
1210 vpi->DstReg.Index == VERT_RESULT_HPOS) {
1211 vpi->DstReg.File = PROGRAM_TEMPORARY;
1212 vpi->DstReg.Index = tempregi;
1213 }
1214 }
1215 insert_wpos(vp, prog, tempregi);
1216 }
1217
1218 static struct r300_vertex_program *build_program(struct r300_vertex_program_key
1219 *wanted_key, struct gl_vertex_program
1220 *mesa_vp, GLint wpos_idx)
1221 {
1222 struct r300_vertex_program *vp;
1223
1224 vp = _mesa_calloc(sizeof(*vp));
1225 _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
1226
1227 vp->wpos_idx = wpos_idx;
1228
1229 if (mesa_vp->IsPositionInvariant) {
1230 position_invariant(&mesa_vp->Base);
1231 }
1232
1233 if (wpos_idx > -1)
1234 pos_as_texcoord(vp, &mesa_vp->Base);
1235
1236 assert(mesa_vp->Base.NumInstructions);
1237
1238 vp->num_temporaries = mesa_vp->Base.NumTemporaries;
1239
1240 r300TranslateVertexShader(vp, mesa_vp->Base.Instructions);
1241
1242 return vp;
1243 }
1244
1245 void r300SelectVertexShader(r300ContextPtr r300)
1246 {
1247 GLcontext *ctx = ctx = r300->radeon.glCtx;
1248 GLuint InputsRead;
1249 struct r300_vertex_program_key wanted_key = { 0 };
1250 GLint i;
1251 struct r300_vertex_program_cont *vpc;
1252 struct r300_vertex_program *vp;
1253 GLint wpos_idx;
1254
1255 vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
1256 InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
1257
1258 wanted_key.OutputsWritten |= 1 << VERT_RESULT_HPOS;
1259
1260 wpos_idx = -1;
1261 if (InputsRead & FRAG_BIT_WPOS) {
1262 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
1263 if (!(InputsRead & (FRAG_BIT_TEX0 << i)))
1264 break;
1265
1266 if (i == ctx->Const.MaxTextureUnits) {
1267 fprintf(stderr, "\tno free texcoord found\n");
1268 _mesa_exit(-1);
1269 }
1270
1271 InputsRead |= (FRAG_BIT_TEX0 << i);
1272 wpos_idx = i;
1273 }
1274
1275 if (InputsRead & FRAG_BIT_COL0)
1276 wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL0;
1277
1278 if ((InputsRead & FRAG_BIT_COL1) /*||
1279 (InputsRead & FRAG_BIT_FOGC) */ )
1280 wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL1;
1281
1282 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
1283 if (InputsRead & (FRAG_BIT_TEX0 << i))
1284 wanted_key.OutputsWritten |=
1285 1 << (VERT_RESULT_TEX0 + i);
1286
1287 wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead;
1288 if (vpc->mesa_program.IsPositionInvariant) {
1289 /* we wan't position don't we ? */
1290 wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS);
1291 }
1292
1293 for (vp = vpc->progs; vp; vp = vp->next)
1294 if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) ==
1295 0) {
1296 r300->selected_vp = vp;
1297 return;
1298 }
1299 //_mesa_print_program(&vpc->mesa_program.Base);
1300
1301 vp = build_program(&wanted_key, &vpc->mesa_program, wpos_idx);
1302 vp->next = vpc->progs;
1303 vpc->progs = vp;
1304 r300->selected_vp = vp;
1305 }