r300: Indented r300_vertprog.[ch].
[mesa.git] / src / mesa / drivers / dri / r300 / r300_vertprog.c
1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Aapo Tahkola <aet@rasterburn.org>
31 */
32 #include "glheader.h"
33 #include "macros.h"
34 #include "enums.h"
35 #include "program.h"
36 #include "shader/prog_instruction.h"
37 #include "shader/prog_parameter.h"
38 #include "shader/prog_statevars.h"
39 #include "tnl/tnl.h"
40
41 #include "r300_context.h"
42 #include "r300_program.h"
43
44 #if SWIZZLE_X != VSF_IN_COMPONENT_X || \
45 SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
46 SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
47 SWIZZLE_W != VSF_IN_COMPONENT_W || \
48 SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
49 SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
50 WRITEMASK_X != VSF_FLAG_X || \
51 WRITEMASK_Y != VSF_FLAG_Y || \
52 WRITEMASK_Z != VSF_FLAG_Z || \
53 WRITEMASK_W != VSF_FLAG_W
54 #error Cannot change these!
55 #endif
56
57 #define SCALAR_FLAG (1<<31)
58 #define FLAG_MASK (1<<31)
59 #define OP_MASK (0xf) /* we are unlikely to have more than 15 */
60 #define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
61
62 static struct {
63 char *name;
64 int opcode;
65 unsigned long ip; /* number of input operands and flags */
66 } op_names[] = {
67 /* *INDENT-OFF* */
68 OPN(ABS, 1),
69 OPN(ADD, 2),
70 OPN(ARL, 1 | SCALAR_FLAG),
71 OPN(DP3, 2),
72 OPN(DP4, 2),
73 OPN(DPH, 2),
74 OPN(DST, 2),
75 OPN(EX2, 1 | SCALAR_FLAG),
76 OPN(EXP, 1 | SCALAR_FLAG),
77 OPN(FLR, 1),
78 OPN(FRC, 1),
79 OPN(LG2, 1 | SCALAR_FLAG),
80 OPN(LIT, 1),
81 OPN(LOG, 1 | SCALAR_FLAG),
82 OPN(MAD, 3),
83 OPN(MAX, 2),
84 OPN(MIN, 2),
85 OPN(MOV, 1),
86 OPN(MUL, 2),
87 OPN(POW, 2 | SCALAR_FLAG),
88 OPN(RCP, 1 | SCALAR_FLAG),
89 OPN(RSQ, 1 | SCALAR_FLAG),
90 OPN(SGE, 2),
91 OPN(SLT, 2),
92 OPN(SUB, 2),
93 OPN(SWZ, 1),
94 OPN(XPD, 2),
95 OPN(RCC, 0), //extra
96 OPN(PRINT, 0),
97 OPN(END, 0)
98 /* *INDENT-ON* */
99 };
100
101 #undef OPN
102
103 int r300VertexProgUpdateParams(GLcontext * ctx,
104 struct r300_vertex_program_cont *vp, float *dst)
105 {
106 int pi;
107 struct gl_vertex_program *mesa_vp = &vp->mesa_program;
108 float *dst_o = dst;
109 struct gl_program_parameter_list *paramList;
110
111 if (mesa_vp->IsNVProgram) {
112 _mesa_load_tracked_matrices(ctx);
113
114 for (pi = 0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) {
115 *dst++ = ctx->VertexProgram.Parameters[pi][0];
116 *dst++ = ctx->VertexProgram.Parameters[pi][1];
117 *dst++ = ctx->VertexProgram.Parameters[pi][2];
118 *dst++ = ctx->VertexProgram.Parameters[pi][3];
119 }
120 return dst - dst_o;
121 }
122
123 assert(mesa_vp->Base.Parameters);
124 _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
125
126 if (mesa_vp->Base.Parameters->NumParameters * 4 >
127 VSF_MAX_FRAGMENT_LENGTH) {
128 fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
129 _mesa_exit(-1);
130 }
131
132 paramList = mesa_vp->Base.Parameters;
133 for (pi = 0; pi < paramList->NumParameters; pi++) {
134 switch (paramList->Parameters[pi].Type) {
135
136 case PROGRAM_STATE_VAR:
137 case PROGRAM_NAMED_PARAM:
138 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
139 case PROGRAM_CONSTANT:
140 *dst++ = paramList->ParameterValues[pi][0];
141 *dst++ = paramList->ParameterValues[pi][1];
142 *dst++ = paramList->ParameterValues[pi][2];
143 *dst++ = paramList->ParameterValues[pi][3];
144 break;
145
146 default:
147 _mesa_problem(NULL, "Bad param type in %s",
148 __FUNCTION__);
149 }
150
151 }
152
153 return dst - dst_o;
154 }
155
156 static unsigned long t_dst_mask(GLuint mask)
157 {
158 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
159 return mask & VSF_FLAG_ALL;
160 }
161
162 static unsigned long t_dst_class(enum register_file file)
163 {
164
165 switch (file) {
166 case PROGRAM_TEMPORARY:
167 return VSF_OUT_CLASS_TMP;
168 case PROGRAM_OUTPUT:
169 return VSF_OUT_CLASS_RESULT;
170 case PROGRAM_ADDRESS:
171 return VSF_OUT_CLASS_ADDR;
172 /*
173 case PROGRAM_INPUT:
174 case PROGRAM_LOCAL_PARAM:
175 case PROGRAM_ENV_PARAM:
176 case PROGRAM_NAMED_PARAM:
177 case PROGRAM_STATE_VAR:
178 case PROGRAM_WRITE_ONLY:
179 case PROGRAM_ADDRESS:
180 */
181 default:
182 fprintf(stderr, "problem in %s", __FUNCTION__);
183 _mesa_exit(-1);
184 return -1;
185 }
186 }
187
188 static unsigned long t_dst_index(struct r300_vertex_program *vp,
189 struct prog_dst_register *dst)
190 {
191 if (dst->File == PROGRAM_OUTPUT)
192 return vp->outputs[dst->Index];
193
194 return dst->Index;
195 }
196
197 static unsigned long t_src_class(enum register_file file)
198 {
199
200 switch (file) {
201 case PROGRAM_TEMPORARY:
202 return VSF_IN_CLASS_TMP;
203
204 case PROGRAM_INPUT:
205 return VSF_IN_CLASS_ATTR;
206
207 case PROGRAM_LOCAL_PARAM:
208 case PROGRAM_ENV_PARAM:
209 case PROGRAM_NAMED_PARAM:
210 case PROGRAM_STATE_VAR:
211 return VSF_IN_CLASS_PARAM;
212 /*
213 case PROGRAM_OUTPUT:
214 case PROGRAM_WRITE_ONLY:
215 case PROGRAM_ADDRESS:
216 */
217 default:
218 fprintf(stderr, "problem in %s", __FUNCTION__);
219 _mesa_exit(-1);
220 return -1;
221 }
222 }
223
224 static __inline unsigned long t_swizzle(GLubyte swizzle)
225 {
226 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
227 return swizzle;
228 }
229
230 #if 0
231 static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller)
232 {
233 int i;
234
235 if (vp == NULL) {
236 fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__,
237 caller);
238 return;
239 }
240
241 fprintf(stderr, "%s:<", caller);
242 for (i = 0; i < VERT_ATTRIB_MAX; i++)
243 fprintf(stderr, "%d ", vp->inputs[i]);
244 fprintf(stderr, ">\n");
245
246 }
247 #endif
248
249 static unsigned long t_src_index(struct r300_vertex_program *vp,
250 struct prog_src_register *src)
251 {
252 int i;
253 int max_reg = -1;
254
255 if (src->File == PROGRAM_INPUT) {
256 if (vp->inputs[src->Index] != -1)
257 return vp->inputs[src->Index];
258
259 for (i = 0; i < VERT_ATTRIB_MAX; i++)
260 if (vp->inputs[i] > max_reg)
261 max_reg = vp->inputs[i];
262
263 vp->inputs[src->Index] = max_reg + 1;
264
265 //vp_dump_inputs(vp, __FUNCTION__);
266
267 return vp->inputs[src->Index];
268 } else {
269 if (src->Index < 0) {
270 fprintf(stderr,
271 "negative offsets for indirect addressing do not work.\n");
272 return 0;
273 }
274 return src->Index;
275 }
276 }
277
278 static unsigned long t_src(struct r300_vertex_program *vp,
279 struct prog_src_register *src)
280 {
281 /* src->NegateBase uses the NEGATE_ flags from program_instruction.h,
282 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
283 */
284 return MAKE_VSF_SOURCE(t_src_index(vp, src),
285 t_swizzle(GET_SWZ(src->Swizzle, 0)),
286 t_swizzle(GET_SWZ(src->Swizzle, 1)),
287 t_swizzle(GET_SWZ(src->Swizzle, 2)),
288 t_swizzle(GET_SWZ(src->Swizzle, 3)),
289 t_src_class(src->File),
290 src->NegateBase) | (src->RelAddr << 4);
291 }
292
293 static unsigned long t_src_scalar(struct r300_vertex_program *vp,
294 struct prog_src_register *src)
295 {
296
297 return MAKE_VSF_SOURCE(t_src_index(vp, src),
298 t_swizzle(GET_SWZ(src->Swizzle, 0)),
299 t_swizzle(GET_SWZ(src->Swizzle, 0)),
300 t_swizzle(GET_SWZ(src->Swizzle, 0)),
301 t_swizzle(GET_SWZ(src->Swizzle, 0)),
302 t_src_class(src->File),
303 src->
304 NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
305 (src->RelAddr << 4);
306 }
307
308 static unsigned long t_opcode(enum prog_opcode opcode)
309 {
310
311 switch (opcode) {
312 /* *INDENT-OFF* */
313 case OPCODE_ARL: return R300_VPI_OUT_OP_ARL;
314 case OPCODE_DST: return R300_VPI_OUT_OP_DST;
315 case OPCODE_EX2: return R300_VPI_OUT_OP_EX2;
316 case OPCODE_EXP: return R300_VPI_OUT_OP_EXP;
317 case OPCODE_FRC: return R300_VPI_OUT_OP_FRC;
318 case OPCODE_LG2: return R300_VPI_OUT_OP_LG2;
319 case OPCODE_LOG: return R300_VPI_OUT_OP_LOG;
320 case OPCODE_MAX: return R300_VPI_OUT_OP_MAX;
321 case OPCODE_MIN: return R300_VPI_OUT_OP_MIN;
322 case OPCODE_MUL: return R300_VPI_OUT_OP_MUL;
323 case OPCODE_RCP: return R300_VPI_OUT_OP_RCP;
324 case OPCODE_RSQ: return R300_VPI_OUT_OP_RSQ;
325 case OPCODE_SGE: return R300_VPI_OUT_OP_SGE;
326 case OPCODE_SLT: return R300_VPI_OUT_OP_SLT;
327 case OPCODE_DP4: return R300_VPI_OUT_OP_DOT;
328 /* *INDENT-ON* */
329
330 default:
331 fprintf(stderr, "%s: Should not be called with opcode %d!",
332 __FUNCTION__, opcode);
333 }
334 _mesa_exit(-1);
335 return 0;
336 }
337
338 static unsigned long op_operands(enum prog_opcode opcode)
339 {
340 int i;
341
342 /* Can we trust mesas opcodes to be in order ? */
343 for (i = 0; i < sizeof(op_names) / sizeof(*op_names); i++)
344 if (op_names[i].opcode == opcode)
345 return op_names[i].ip;
346
347 fprintf(stderr, "op %d not found in op_names\n", opcode);
348 _mesa_exit(-1);
349 return 0;
350 }
351
352 static GLboolean valid_dst(struct r300_vertex_program *vp,
353 struct prog_dst_register *dst)
354 {
355 if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
356 return GL_FALSE;
357 } else if (dst->File == PROGRAM_ADDRESS) {
358 assert(dst->Index == 0);
359 }
360
361 return GL_TRUE;
362 }
363
364 /* TODO: Get rid of t_src_class call */
365 #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
366 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
367 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
368 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
369 t_src_class(b.File) == VSF_IN_CLASS_ATTR)))) \
370
371 #define ZERO_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \
372 SWIZZLE_ZERO, SWIZZLE_ZERO, \
373 SWIZZLE_ZERO, SWIZZLE_ZERO, \
374 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
375
376 #define ZERO_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \
377 SWIZZLE_ZERO, SWIZZLE_ZERO, \
378 SWIZZLE_ZERO, SWIZZLE_ZERO, \
379 t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
380
381 #define ZERO_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \
382 SWIZZLE_ZERO, SWIZZLE_ZERO, \
383 SWIZZLE_ZERO, SWIZZLE_ZERO, \
384 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
385
386 #define ONE_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \
387 SWIZZLE_ONE, SWIZZLE_ONE, \
388 SWIZZLE_ONE, SWIZZLE_ONE, \
389 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
390
391 #define ONE_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \
392 SWIZZLE_ONE, SWIZZLE_ONE, \
393 SWIZZLE_ONE, SWIZZLE_ONE, \
394 t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
395
396 #define ONE_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \
397 SWIZZLE_ONE, SWIZZLE_ONE, \
398 SWIZZLE_ONE, SWIZZLE_ONE, \
399 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
400
401 /* DP4 version seems to trigger some hw peculiarity */
402 //#define PREFER_DP4
403
404 #define FREE_TEMPS() \
405 do { \
406 if(u_temp_i < vp->num_temporaries) { \
407 WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_i); \
408 vp->native = GL_FALSE; \
409 } \
410 u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
411 } while (0)
412
413 static void r300_translate_vertex_shader(struct r300_vertex_program *vp,
414 struct prog_instruction *vpi)
415 {
416 int i, cur_reg = 0;
417 VERTEX_SHADER_INSTRUCTION *o_inst;
418 unsigned long operands;
419 int are_srcs_scalar;
420 unsigned long hw_op;
421 /* Initial value should be last tmp reg that hw supports.
422 Strangely enough r300 doesnt mind even though these would be out of range.
423 Smart enough to realize that it doesnt need it? */
424 int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1;
425 struct prog_src_register src[3];
426
427 vp->pos_end = 0; /* Not supported yet */
428 vp->program.length = 0;
429 /*vp->num_temporaries=mesa_vp->Base.NumTemporaries; */
430
431 for (i = 0; i < VERT_ATTRIB_MAX; i++)
432 vp->inputs[i] = -1;
433
434 for (i = 0; i < VERT_RESULT_MAX; i++)
435 vp->outputs[i] = -1;
436
437 assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS));
438
439 /* Assign outputs */
440 if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS))
441 vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
442
443 if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ))
444 vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
445
446 if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0))
447 vp->outputs[VERT_RESULT_COL0] = cur_reg++;
448
449 if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1))
450 vp->outputs[VERT_RESULT_COL1] = cur_reg++;
451
452 #if 0 /* Not supported yet */
453 if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0))
454 vp->outputs[VERT_RESULT_BFC0] = cur_reg++;
455
456 if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1))
457 vp->outputs[VERT_RESULT_BFC1] = cur_reg++;
458
459 if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC))
460 vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
461 #endif
462
463 for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++)
464 if (vp->key.OutputsWritten & (1 << i))
465 vp->outputs[i] = cur_reg++;
466
467 vp->translated = GL_TRUE;
468 vp->native = GL_TRUE;
469
470 o_inst = vp->program.body.i;
471 for (; vpi->Opcode != OPCODE_END; vpi++, o_inst++) {
472 FREE_TEMPS();
473
474 if (!valid_dst(vp, &vpi->DstReg)) {
475 /* redirect result to unused temp */
476 vpi->DstReg.File = PROGRAM_TEMPORARY;
477 vpi->DstReg.Index = u_temp_i;
478 }
479
480 operands = op_operands(vpi->Opcode);
481 are_srcs_scalar = operands & SCALAR_FLAG;
482 operands &= OP_MASK;
483
484 for (i = 0; i < operands; i++)
485 src[i] = vpi->SrcReg[i];
486
487 if (operands == 3) { /* TODO: scalars */
488 if (CMP_SRCS(src[1], src[2])
489 || CMP_SRCS(src[0], src[2])) {
490 o_inst->op =
491 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i,
492 VSF_FLAG_ALL,
493 VSF_OUT_CLASS_TMP);
494
495 o_inst->src[0] =
496 MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
497 SWIZZLE_X, SWIZZLE_Y,
498 SWIZZLE_Z, SWIZZLE_W,
499 t_src_class(src[2].File),
500 VSF_FLAG_NONE) | (src[2].
501 RelAddr <<
502 4);
503
504 o_inst->src[1] = ZERO_SRC_2;
505 o_inst->src[2] = ZERO_SRC_2;
506 o_inst++;
507
508 src[2].File = PROGRAM_TEMPORARY;
509 src[2].Index = u_temp_i;
510 src[2].RelAddr = 0;
511 u_temp_i--;
512 }
513
514 }
515
516 if (operands >= 2) {
517 if (CMP_SRCS(src[1], src[0])) {
518 o_inst->op =
519 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i,
520 VSF_FLAG_ALL,
521 VSF_OUT_CLASS_TMP);
522
523 o_inst->src[0] =
524 MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
525 SWIZZLE_X, SWIZZLE_Y,
526 SWIZZLE_Z, SWIZZLE_W,
527 t_src_class(src[0].File),
528 VSF_FLAG_NONE) | (src[0].
529 RelAddr <<
530 4);
531
532 o_inst->src[1] = ZERO_SRC_0;
533 o_inst->src[2] = ZERO_SRC_0;
534 o_inst++;
535
536 src[0].File = PROGRAM_TEMPORARY;
537 src[0].Index = u_temp_i;
538 src[0].RelAddr = 0;
539 u_temp_i--;
540 }
541 }
542
543 /* These ops need special handling. */
544 switch (vpi->Opcode) {
545 case OPCODE_POW:
546 o_inst->op =
547 MAKE_VSF_OP(R300_VPI_OUT_OP_POW,
548 t_dst_index(vp, &vpi->DstReg),
549 t_dst_mask(vpi->DstReg.WriteMask),
550 t_dst_class(vpi->DstReg.File));
551 o_inst->src[0] = t_src_scalar(vp, &src[0]);
552 o_inst->src[1] = ZERO_SRC_0;
553 o_inst->src[2] = t_src_scalar(vp, &src[1]);
554 goto next;
555
556 case OPCODE_MOV: //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
557 case OPCODE_SWZ:
558 #if 1
559 o_inst->op =
560 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD,
561 t_dst_index(vp, &vpi->DstReg),
562 t_dst_mask(vpi->DstReg.WriteMask),
563 t_dst_class(vpi->DstReg.File));
564 o_inst->src[0] = t_src(vp, &src[0]);
565 o_inst->src[1] = ZERO_SRC_0;
566 o_inst->src[2] = ZERO_SRC_0;
567 #else
568 hw_op =
569 (src[0].File ==
570 PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
571 R300_VPI_OUT_OP_MAD;
572
573 o_inst->op =
574 MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
575 t_dst_mask(vpi->DstReg.WriteMask),
576 t_dst_class(vpi->DstReg.File));
577 o_inst->src[0] = t_src(vp, &src[0]);
578 o_inst->src[1] = ONE_SRC_0;
579 o_inst->src[2] = ZERO_SRC_0;
580 #endif
581
582 goto next;
583
584 case OPCODE_ADD:
585 #if 1
586 hw_op = (src[0].File == PROGRAM_TEMPORARY &&
587 src[1].File ==
588 PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
589 R300_VPI_OUT_OP_MAD;
590
591 o_inst->op =
592 MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
593 t_dst_mask(vpi->DstReg.WriteMask),
594 t_dst_class(vpi->DstReg.File));
595 o_inst->src[0] = ONE_SRC_0;
596 o_inst->src[1] = t_src(vp, &src[0]);
597 o_inst->src[2] = t_src(vp, &src[1]);
598 #else
599 o_inst->op =
600 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD,
601 t_dst_index(vp, &vpi->DstReg),
602 t_dst_mask(vpi->DstReg.WriteMask),
603 t_dst_class(vpi->DstReg.File));
604 o_inst->src[0] = t_src(vp, &src[0]);
605 o_inst->src[1] = t_src(vp, &src[1]);
606 o_inst->src[2] = ZERO_SRC_1;
607
608 #endif
609 goto next;
610
611 case OPCODE_MAD:
612 hw_op = (src[0].File == PROGRAM_TEMPORARY &&
613 src[1].File == PROGRAM_TEMPORARY &&
614 src[2].File ==
615 PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
616 R300_VPI_OUT_OP_MAD;
617
618 o_inst->op =
619 MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
620 t_dst_mask(vpi->DstReg.WriteMask),
621 t_dst_class(vpi->DstReg.File));
622 o_inst->src[0] = t_src(vp, &src[0]);
623 o_inst->src[1] = t_src(vp, &src[1]);
624 o_inst->src[2] = t_src(vp, &src[2]);
625 goto next;
626
627 case OPCODE_MUL: /* HW mul can take third arg but appears to have some other limitations. */
628 hw_op = (src[0].File == PROGRAM_TEMPORARY &&
629 src[1].File ==
630 PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
631 R300_VPI_OUT_OP_MAD;
632
633 o_inst->op =
634 MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
635 t_dst_mask(vpi->DstReg.WriteMask),
636 t_dst_class(vpi->DstReg.File));
637 o_inst->src[0] = t_src(vp, &src[0]);
638 o_inst->src[1] = t_src(vp, &src[1]);
639
640 o_inst->src[2] = ZERO_SRC_1;
641 goto next;
642
643 case OPCODE_DP3: //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
644 o_inst->op =
645 MAKE_VSF_OP(R300_VPI_OUT_OP_DOT,
646 t_dst_index(vp, &vpi->DstReg),
647 t_dst_mask(vpi->DstReg.WriteMask),
648 t_dst_class(vpi->DstReg.File));
649
650 o_inst->src[0] =
651 MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
652 t_swizzle(GET_SWZ
653 (src[0].Swizzle, 0)),
654 t_swizzle(GET_SWZ
655 (src[0].Swizzle, 1)),
656 t_swizzle(GET_SWZ
657 (src[0].Swizzle, 2)),
658 SWIZZLE_ZERO,
659 t_src_class(src[0].File),
660 src[0].
661 NegateBase ? VSF_FLAG_XYZ :
662 VSF_FLAG_NONE) | (src[0].
663 RelAddr << 4);
664
665 o_inst->src[1] =
666 MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
667 t_swizzle(GET_SWZ
668 (src[1].Swizzle, 0)),
669 t_swizzle(GET_SWZ
670 (src[1].Swizzle, 1)),
671 t_swizzle(GET_SWZ
672 (src[1].Swizzle, 2)),
673 SWIZZLE_ZERO,
674 t_src_class(src[1].File),
675 src[1].
676 NegateBase ? VSF_FLAG_XYZ :
677 VSF_FLAG_NONE) | (src[1].
678 RelAddr << 4);
679
680 o_inst->src[2] = ZERO_SRC_1;
681 goto next;
682
683 case OPCODE_SUB: //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
684 #if 1
685 hw_op = (src[0].File == PROGRAM_TEMPORARY &&
686 src[1].File ==
687 PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
688 R300_VPI_OUT_OP_MAD;
689
690 o_inst->op =
691 MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
692 t_dst_mask(vpi->DstReg.WriteMask),
693 t_dst_class(vpi->DstReg.File));
694 o_inst->src[0] = t_src(vp, &src[0]);
695 o_inst->src[1] = ONE_SRC_0;
696 o_inst->src[2] =
697 MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
698 t_swizzle(GET_SWZ
699 (src[1].Swizzle, 0)),
700 t_swizzle(GET_SWZ
701 (src[1].Swizzle, 1)),
702 t_swizzle(GET_SWZ
703 (src[1].Swizzle, 2)),
704 t_swizzle(GET_SWZ
705 (src[1].Swizzle, 3)),
706 t_src_class(src[1].File),
707 (!src[1].
708 NegateBase) ? VSF_FLAG_ALL :
709 VSF_FLAG_NONE) | (src[1].
710 RelAddr << 4);
711 #else
712 o_inst->op =
713 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD,
714 t_dst_index(vp, &vpi->DstReg),
715 t_dst_mask(vpi->DstReg.WriteMask),
716 t_dst_class(vpi->DstReg.File));
717
718 o_inst->src[0] = t_src(vp, &src[0]);
719 o_inst->src[1] =
720 MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
721 t_swizzle(GET_SWZ
722 (src[1].Swizzle, 0)),
723 t_swizzle(GET_SWZ
724 (src[1].Swizzle, 1)),
725 t_swizzle(GET_SWZ
726 (src[1].Swizzle, 2)),
727 t_swizzle(GET_SWZ
728 (src[1].Swizzle, 3)),
729 t_src_class(src[1].File),
730 (!src[1].
731 NegateBase) ? VSF_FLAG_ALL :
732 VSF_FLAG_NONE) | (src[1].
733 RelAddr << 4);
734 o_inst->src[2] = 0;
735 #endif
736 goto next;
737
738 case OPCODE_ABS: //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
739 o_inst->op =
740 MAKE_VSF_OP(R300_VPI_OUT_OP_MAX,
741 t_dst_index(vp, &vpi->DstReg),
742 t_dst_mask(vpi->DstReg.WriteMask),
743 t_dst_class(vpi->DstReg.File));
744
745 o_inst->src[0] = t_src(vp, &src[0]);
746 o_inst->src[1] =
747 MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
748 t_swizzle(GET_SWZ
749 (src[0].Swizzle, 0)),
750 t_swizzle(GET_SWZ
751 (src[0].Swizzle, 1)),
752 t_swizzle(GET_SWZ
753 (src[0].Swizzle, 2)),
754 t_swizzle(GET_SWZ
755 (src[0].Swizzle, 3)),
756 t_src_class(src[0].File),
757 (!src[0].
758 NegateBase) ? VSF_FLAG_ALL :
759 VSF_FLAG_NONE) | (src[0].
760 RelAddr << 4);
761 o_inst->src[2] = 0;
762 goto next;
763
764 case OPCODE_FLR:
765 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
766 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
767
768 o_inst->op = MAKE_VSF_OP(R300_VPI_OUT_OP_FRC, u_temp_i,
769 t_dst_mask(vpi->DstReg.
770 WriteMask),
771 VSF_OUT_CLASS_TMP);
772
773 o_inst->src[0] = t_src(vp, &src[0]);
774 o_inst->src[1] = ZERO_SRC_0;
775 o_inst->src[2] = ZERO_SRC_0;
776 o_inst++;
777
778 o_inst->op =
779 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD,
780 t_dst_index(vp, &vpi->DstReg),
781 t_dst_mask(vpi->DstReg.WriteMask),
782 t_dst_class(vpi->DstReg.File));
783
784 o_inst->src[0] = t_src(vp, &src[0]);
785 o_inst->src[1] = MAKE_VSF_SOURCE(u_temp_i,
786 VSF_IN_COMPONENT_X,
787 VSF_IN_COMPONENT_Y,
788 VSF_IN_COMPONENT_Z,
789 VSF_IN_COMPONENT_W,
790 VSF_IN_CLASS_TMP,
791 /* Not 100% sure about this */
792 (!src[0].
793 NegateBase) ?
794 VSF_FLAG_ALL :
795 VSF_FLAG_NONE
796 /*VSF_FLAG_ALL */ );
797
798 o_inst->src[2] = ZERO_SRC_0;
799 u_temp_i--;
800 goto next;
801
802 case OPCODE_LG2: // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
803 o_inst->op =
804 MAKE_VSF_OP(R300_VPI_OUT_OP_LG2,
805 t_dst_index(vp, &vpi->DstReg),
806 t_dst_mask(vpi->DstReg.WriteMask),
807 t_dst_class(vpi->DstReg.File));
808
809 o_inst->src[0] =
810 MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
811 t_swizzle(GET_SWZ
812 (src[0].Swizzle, 0)),
813 t_swizzle(GET_SWZ
814 (src[0].Swizzle, 0)),
815 t_swizzle(GET_SWZ
816 (src[0].Swizzle, 0)),
817 t_swizzle(GET_SWZ
818 (src[0].Swizzle, 0)),
819 t_src_class(src[0].File),
820 src[0].
821 NegateBase ? VSF_FLAG_ALL :
822 VSF_FLAG_NONE) | (src[0].
823 RelAddr << 4);
824 o_inst->src[1] = ZERO_SRC_0;
825 o_inst->src[2] = ZERO_SRC_0;
826 goto next;
827
828 case OPCODE_LIT: //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
829 o_inst->op =
830 MAKE_VSF_OP(R300_VPI_OUT_OP_LIT,
831 t_dst_index(vp, &vpi->DstReg),
832 t_dst_mask(vpi->DstReg.WriteMask),
833 t_dst_class(vpi->DstReg.File));
834 /* NOTE: Users swizzling might not work. */
835 o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
836 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
837 VSF_IN_COMPONENT_ZERO, // z
838 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
839 t_src_class(src[0].
840 File),
841 src[0].
842 NegateBase ?
843 VSF_FLAG_ALL :
844 VSF_FLAG_NONE) |
845 (src[0].RelAddr << 4);
846 o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
847 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
848 VSF_IN_COMPONENT_ZERO, // z
849 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
850 t_src_class(src[0].
851 File),
852 src[0].
853 NegateBase ?
854 VSF_FLAG_ALL :
855 VSF_FLAG_NONE) |
856 (src[0].RelAddr << 4);
857 o_inst->src[2] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
858 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
859 VSF_IN_COMPONENT_ZERO, // z
860 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
861 t_src_class(src[0].
862 File),
863 src[0].
864 NegateBase ?
865 VSF_FLAG_ALL :
866 VSF_FLAG_NONE) |
867 (src[0].RelAddr << 4);
868 goto next;
869
870 case OPCODE_DPH: //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
871 o_inst->op =
872 MAKE_VSF_OP(R300_VPI_OUT_OP_DOT,
873 t_dst_index(vp, &vpi->DstReg),
874 t_dst_mask(vpi->DstReg.WriteMask),
875 t_dst_class(vpi->DstReg.File));
876
877 o_inst->src[0] =
878 MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
879 t_swizzle(GET_SWZ
880 (src[0].Swizzle, 0)),
881 t_swizzle(GET_SWZ
882 (src[0].Swizzle, 1)),
883 t_swizzle(GET_SWZ
884 (src[0].Swizzle, 2)),
885 VSF_IN_COMPONENT_ONE,
886 t_src_class(src[0].File),
887 src[0].
888 NegateBase ? VSF_FLAG_XYZ :
889 VSF_FLAG_NONE) | (src[0].
890 RelAddr << 4);
891 o_inst->src[1] = t_src(vp, &src[1]);
892 o_inst->src[2] = ZERO_SRC_1;
893 goto next;
894
895 case OPCODE_XPD:
896 /* mul r0, r1.yzxw, r2.zxyw
897 mad r0, -r2.yzxw, r1.zxyw, r0
898 NOTE: might need MAD_2
899 */
900
901 o_inst->op = MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, u_temp_i,
902 t_dst_mask(vpi->DstReg.
903 WriteMask),
904 VSF_OUT_CLASS_TMP);
905
906 o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
907 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
908 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
909 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
910 t_src_class(src[0].
911 File),
912 src[0].
913 NegateBase ?
914 VSF_FLAG_ALL :
915 VSF_FLAG_NONE) |
916 (src[0].RelAddr << 4);
917
918 o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
919 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
920 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
921 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
922 t_src_class(src[1].
923 File),
924 src[1].
925 NegateBase ?
926 VSF_FLAG_ALL :
927 VSF_FLAG_NONE) |
928 (src[1].RelAddr << 4);
929
930 o_inst->src[2] = ZERO_SRC_1;
931 o_inst++;
932 u_temp_i--;
933
934 o_inst->op =
935 MAKE_VSF_OP(R300_VPI_OUT_OP_MAD,
936 t_dst_index(vp, &vpi->DstReg),
937 t_dst_mask(vpi->DstReg.WriteMask),
938 t_dst_class(vpi->DstReg.File));
939
940 o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
941 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
942 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
943 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
944 t_src_class(src[1].
945 File),
946 (!src[1].
947 NegateBase) ?
948 VSF_FLAG_ALL :
949 VSF_FLAG_NONE) |
950 (src[1].RelAddr << 4);
951
952 o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
953 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
954 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
955 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
956 t_src_class(src[0].
957 File),
958 src[0].
959 NegateBase ?
960 VSF_FLAG_ALL :
961 VSF_FLAG_NONE) |
962 (src[0].RelAddr << 4);
963
964 o_inst->src[2] = MAKE_VSF_SOURCE(u_temp_i + 1,
965 VSF_IN_COMPONENT_X,
966 VSF_IN_COMPONENT_Y,
967 VSF_IN_COMPONENT_Z,
968 VSF_IN_COMPONENT_W,
969 VSF_IN_CLASS_TMP,
970 VSF_FLAG_NONE);
971
972 goto next;
973
974 case OPCODE_RCC:
975 fprintf(stderr, "Dont know how to handle op %d yet\n",
976 vpi->Opcode);
977 _mesa_exit(-1);
978 break;
979 case OPCODE_END:
980 break;
981 default:
982 break;
983 }
984
985 o_inst->op =
986 MAKE_VSF_OP(t_opcode(vpi->Opcode),
987 t_dst_index(vp, &vpi->DstReg),
988 t_dst_mask(vpi->DstReg.WriteMask),
989 t_dst_class(vpi->DstReg.File));
990
991 if (are_srcs_scalar) {
992 switch (operands) {
993 case 1:
994 o_inst->src[0] = t_src_scalar(vp, &src[0]);
995 o_inst->src[1] = ZERO_SRC_0;
996 o_inst->src[2] = ZERO_SRC_0;
997 break;
998
999 case 2:
1000 o_inst->src[0] = t_src_scalar(vp, &src[0]);
1001 o_inst->src[1] = t_src_scalar(vp, &src[1]);
1002 o_inst->src[2] = ZERO_SRC_1;
1003 break;
1004
1005 case 3:
1006 o_inst->src[0] = t_src_scalar(vp, &src[0]);
1007 o_inst->src[1] = t_src_scalar(vp, &src[1]);
1008 o_inst->src[2] = t_src_scalar(vp, &src[2]);
1009 break;
1010
1011 default:
1012 fprintf(stderr,
1013 "scalars and op RCC not handled yet");
1014 _mesa_exit(-1);
1015 break;
1016 }
1017 } else {
1018 switch (operands) {
1019 case 1:
1020 o_inst->src[0] = t_src(vp, &src[0]);
1021 o_inst->src[1] = ZERO_SRC_0;
1022 o_inst->src[2] = ZERO_SRC_0;
1023 break;
1024
1025 case 2:
1026 o_inst->src[0] = t_src(vp, &src[0]);
1027 o_inst->src[1] = t_src(vp, &src[1]);
1028 o_inst->src[2] = ZERO_SRC_1;
1029 break;
1030
1031 case 3:
1032 o_inst->src[0] = t_src(vp, &src[0]);
1033 o_inst->src[1] = t_src(vp, &src[1]);
1034 o_inst->src[2] = t_src(vp, &src[2]);
1035 break;
1036
1037 default:
1038 fprintf(stderr,
1039 "scalars and op RCC not handled yet");
1040 _mesa_exit(-1);
1041 break;
1042 }
1043 }
1044 next:;
1045 }
1046
1047 /* Will most likely segfault before we get here... fix later. */
1048 if (o_inst - vp->program.body.i >= VSF_MAX_FRAGMENT_LENGTH / 4) {
1049 vp->program.length = 0;
1050 vp->native = GL_FALSE;
1051 return;
1052 }
1053 vp->program.length = (o_inst - vp->program.body.i) * 4;
1054 #if 0
1055 fprintf(stderr, "hw program:\n");
1056 for (i = 0; i < vp->program.length; i++)
1057 fprintf(stderr, "%08x\n", vp->program.body.d[i]);
1058 #endif
1059 }
1060
1061 static void position_invariant(struct gl_program *prog)
1062 {
1063 struct prog_instruction *vpi;
1064 struct gl_program_parameter_list *paramList;
1065 int i;
1066
1067 gl_state_index tokens[STATE_LENGTH] = { STATE_MVP_MATRIX, 0, 0, 0, 0 };
1068
1069 /* tokens[4] = matrix modifier */
1070 #ifdef PREFER_DP4
1071 tokens[4] = 0; /* not transposed or inverted */
1072 #else
1073 tokens[4] = STATE_MATRIX_TRANSPOSE;
1074 #endif
1075 paramList = prog->Parameters;
1076
1077 vpi = _mesa_alloc_instructions(prog->NumInstructions + 4);
1078 _mesa_init_instructions(vpi, prog->NumInstructions + 4);
1079
1080 for (i = 0; i < 4; i++) {
1081 GLint idx;
1082 tokens[2] = tokens[3] = i; /* matrix row[i]..row[i] */
1083 idx = _mesa_add_state_reference(paramList, tokens);
1084 #ifdef PREFER_DP4
1085 vpi[i].Opcode = OPCODE_DP4;
1086 vpi[i].StringPos = 0;
1087 vpi[i].Data = 0;
1088
1089 vpi[i].DstReg.File = PROGRAM_OUTPUT;
1090 vpi[i].DstReg.Index = VERT_RESULT_HPOS;
1091 vpi[i].DstReg.WriteMask = 1 << i;
1092 vpi[i].DstReg.CondMask = COND_TR;
1093
1094 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
1095 vpi[i].SrcReg[0].Index = idx;
1096 vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1097
1098 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
1099 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
1100 vpi[i].SrcReg[1].Swizzle = SWIZZLE_XYZW;
1101 #else
1102 if (i == 0)
1103 vpi[i].Opcode = OPCODE_MUL;
1104 else
1105 vpi[i].Opcode = OPCODE_MAD;
1106
1107 vpi[i].StringPos = 0;
1108 vpi[i].Data = 0;
1109
1110 if (i == 3)
1111 vpi[i].DstReg.File = PROGRAM_OUTPUT;
1112 else
1113 vpi[i].DstReg.File = PROGRAM_TEMPORARY;
1114 vpi[i].DstReg.Index = 0;
1115 vpi[i].DstReg.WriteMask = 0xf;
1116 vpi[i].DstReg.CondMask = COND_TR;
1117
1118 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
1119 vpi[i].SrcReg[0].Index = idx;
1120 vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1121
1122 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
1123 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
1124 vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i);
1125
1126 if (i > 0) {
1127 vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY;
1128 vpi[i].SrcReg[2].Index = 0;
1129 vpi[i].SrcReg[2].Swizzle = SWIZZLE_XYZW;
1130 }
1131 #endif
1132 }
1133
1134 _mesa_copy_instructions(&vpi[i], prog->Instructions,
1135 prog->NumInstructions);
1136
1137 free(prog->Instructions);
1138
1139 prog->Instructions = vpi;
1140
1141 prog->NumInstructions += 4;
1142 vpi = &prog->Instructions[prog->NumInstructions - 1];
1143
1144 assert(vpi->Opcode == OPCODE_END);
1145 }
1146
1147 static void insert_wpos(struct r300_vertex_program *vp,
1148 struct gl_program *prog, GLuint temp_index)
1149 {
1150 struct prog_instruction *vpi;
1151 struct prog_instruction *vpi_insert;
1152 int i = 0;
1153
1154 vpi = _mesa_alloc_instructions(prog->NumInstructions + 2);
1155 _mesa_init_instructions(vpi, prog->NumInstructions + 2);
1156 /* all but END */
1157 _mesa_copy_instructions(vpi, prog->Instructions,
1158 prog->NumInstructions - 1);
1159 /* END */
1160 _mesa_copy_instructions(&vpi[prog->NumInstructions + 1],
1161 &prog->Instructions[prog->NumInstructions - 1],
1162 1);
1163 vpi_insert = &vpi[prog->NumInstructions - 1];
1164
1165 vpi_insert[i].Opcode = OPCODE_MOV;
1166
1167 vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
1168 vpi_insert[i].DstReg.Index = VERT_RESULT_HPOS;
1169 vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
1170 vpi_insert[i].DstReg.CondMask = COND_TR;
1171
1172 vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
1173 vpi_insert[i].SrcReg[0].Index = temp_index;
1174 vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1175 i++;
1176
1177 vpi_insert[i].Opcode = OPCODE_MOV;
1178
1179 vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
1180 vpi_insert[i].DstReg.Index = VERT_RESULT_TEX0 + vp->wpos_idx;
1181 vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
1182 vpi_insert[i].DstReg.CondMask = COND_TR;
1183
1184 vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
1185 vpi_insert[i].SrcReg[0].Index = temp_index;
1186 vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1187 i++;
1188
1189 free(prog->Instructions);
1190
1191 prog->Instructions = vpi;
1192
1193 prog->NumInstructions += i;
1194 vpi = &prog->Instructions[prog->NumInstructions - 1];
1195
1196 assert(vpi->Opcode == OPCODE_END);
1197 }
1198
1199 static void pos_as_texcoord(struct r300_vertex_program *vp,
1200 struct gl_program *prog)
1201 {
1202 struct prog_instruction *vpi;
1203 GLuint tempregi = prog->NumTemporaries;
1204 /* should do something else if no temps left... */
1205 prog->NumTemporaries++;
1206
1207 for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) {
1208 if (vpi->DstReg.File == PROGRAM_OUTPUT &&
1209 vpi->DstReg.Index == VERT_RESULT_HPOS) {
1210 vpi->DstReg.File = PROGRAM_TEMPORARY;
1211 vpi->DstReg.Index = tempregi;
1212 }
1213 }
1214 insert_wpos(vp, prog, tempregi);
1215 }
1216
1217 static struct r300_vertex_program *build_program(struct r300_vertex_program_key
1218 *wanted_key,
1219 struct gl_vertex_program
1220 *mesa_vp, GLint wpos_idx)
1221 {
1222 struct r300_vertex_program *vp;
1223
1224 vp = _mesa_calloc(sizeof(*vp));
1225 _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
1226
1227 vp->wpos_idx = wpos_idx;
1228
1229 if (mesa_vp->IsPositionInvariant) {
1230 position_invariant(&mesa_vp->Base);
1231 }
1232
1233 if (wpos_idx > -1)
1234 pos_as_texcoord(vp, &mesa_vp->Base);
1235
1236 assert(mesa_vp->Base.NumInstructions);
1237
1238 vp->num_temporaries = mesa_vp->Base.NumTemporaries;
1239
1240 r300_translate_vertex_shader(vp, mesa_vp->Base.Instructions);
1241
1242 return vp;
1243 }
1244
1245 void r300_select_vertex_shader(r300ContextPtr r300)
1246 {
1247 GLcontext *ctx = ctx = r300->radeon.glCtx;
1248 GLuint InputsRead;
1249 struct r300_vertex_program_key wanted_key = { 0 };
1250 GLint i;
1251 struct r300_vertex_program_cont *vpc;
1252 struct r300_vertex_program *vp;
1253 GLint wpos_idx;
1254
1255 vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
1256 InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
1257
1258 wanted_key.OutputsWritten |= 1 << VERT_RESULT_HPOS;
1259
1260 wpos_idx = -1;
1261 if (InputsRead & FRAG_BIT_WPOS) {
1262 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
1263 if (!(InputsRead & (FRAG_BIT_TEX0 << i)))
1264 break;
1265
1266 if (i == ctx->Const.MaxTextureUnits) {
1267 fprintf(stderr, "\tno free texcoord found\n");
1268 _mesa_exit(-1);
1269 }
1270
1271 InputsRead |= (FRAG_BIT_TEX0 << i);
1272 wpos_idx = i;
1273 }
1274
1275 if (InputsRead & FRAG_BIT_COL0)
1276 wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL0;
1277
1278 if ((InputsRead & FRAG_BIT_COL1) /*||
1279 (InputsRead & FRAG_BIT_FOGC) */ )
1280 wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL1;
1281
1282 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
1283 if (InputsRead & (FRAG_BIT_TEX0 << i))
1284 wanted_key.OutputsWritten |=
1285 1 << (VERT_RESULT_TEX0 + i);
1286
1287 wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead;
1288 if (vpc->mesa_program.IsPositionInvariant) {
1289 /* we wan't position don't we ? */
1290 wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS);
1291 }
1292
1293 for (vp = vpc->progs; vp; vp = vp->next)
1294 if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) ==
1295 0) {
1296 r300->selected_vp = vp;
1297 return;
1298 }
1299 //_mesa_print_program(&vpc->mesa_program.Base);
1300
1301 vp = build_program(&wanted_key, &vpc->mesa_program, wpos_idx);
1302 vp->next = vpc->progs;
1303 vpc->progs = vp;
1304 r300->selected_vp = vp;
1305 }