90f18ad225761eaffb2e8e46a14bebf5fe43738f
[mesa.git] / src / mesa / drivers / dri / r300 / r300_vertprog.c
1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Aapo Tahkola <aet@rasterburn.org>
31 */
32 #include "glheader.h"
33 #include "macros.h"
34 #include "enums.h"
35 #include "program.h"
36 #include "shader/prog_instruction.h"
37 #include "shader/prog_parameter.h"
38 #include "shader/prog_statevars.h"
39 #include "tnl/tnl.h"
40
41 #include "r300_context.h"
42
43 #if SWIZZLE_X != VSF_IN_COMPONENT_X || \
44 SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
45 SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
46 SWIZZLE_W != VSF_IN_COMPONENT_W || \
47 SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
48 SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
49 WRITEMASK_X != VSF_FLAG_X || \
50 WRITEMASK_Y != VSF_FLAG_Y || \
51 WRITEMASK_Z != VSF_FLAG_Z || \
52 WRITEMASK_W != VSF_FLAG_W
53 #error Cannot change these!
54 #endif
55
56 #define SCALAR_FLAG (1<<31)
57 #define FLAG_MASK (1<<31)
58 #define OP_MASK (0xf) /* we are unlikely to have more than 15 */
59 #define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
60
61 static struct {
62 char *name;
63 int opcode;
64 unsigned long ip; /* number of input operands and flags */
65 } op_names[] = {
66 /* *INDENT-OFF* */
67 OPN(ABS, 1),
68 OPN(ADD, 2),
69 OPN(ARL, 1 | SCALAR_FLAG),
70 OPN(DP3, 2),
71 OPN(DP4, 2),
72 OPN(DPH, 2),
73 OPN(DST, 2),
74 OPN(EX2, 1 | SCALAR_FLAG),
75 OPN(EXP, 1 | SCALAR_FLAG),
76 OPN(FLR, 1),
77 OPN(FRC, 1),
78 OPN(LG2, 1 | SCALAR_FLAG),
79 OPN(LIT, 1),
80 OPN(LOG, 1 | SCALAR_FLAG),
81 OPN(MAD, 3),
82 OPN(MAX, 2),
83 OPN(MIN, 2),
84 OPN(MOV, 1),
85 OPN(MUL, 2),
86 OPN(POW, 2 | SCALAR_FLAG),
87 OPN(RCP, 1 | SCALAR_FLAG),
88 OPN(RSQ, 1 | SCALAR_FLAG),
89 OPN(SGE, 2),
90 OPN(SLT, 2),
91 OPN(SUB, 2),
92 OPN(SWZ, 1),
93 OPN(XPD, 2),
94 OPN(RCC, 0), //extra
95 OPN(PRINT, 0),
96 OPN(END, 0)
97 /* *INDENT-ON* */
98 };
99
100 #undef OPN
101
102 int r300VertexProgUpdateParams(GLcontext * ctx,
103 struct r300_vertex_program_cont *vp, float *dst)
104 {
105 int pi;
106 struct gl_vertex_program *mesa_vp = &vp->mesa_program;
107 float *dst_o = dst;
108 struct gl_program_parameter_list *paramList;
109
110 if (mesa_vp->IsNVProgram) {
111 _mesa_load_tracked_matrices(ctx);
112
113 for (pi = 0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) {
114 *dst++ = ctx->VertexProgram.Parameters[pi][0];
115 *dst++ = ctx->VertexProgram.Parameters[pi][1];
116 *dst++ = ctx->VertexProgram.Parameters[pi][2];
117 *dst++ = ctx->VertexProgram.Parameters[pi][3];
118 }
119 return dst - dst_o;
120 }
121
122 assert(mesa_vp->Base.Parameters);
123 _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
124
125 if (mesa_vp->Base.Parameters->NumParameters * 4 >
126 VSF_MAX_FRAGMENT_LENGTH) {
127 fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
128 _mesa_exit(-1);
129 }
130
131 paramList = mesa_vp->Base.Parameters;
132 for (pi = 0; pi < paramList->NumParameters; pi++) {
133 switch (paramList->Parameters[pi].Type) {
134
135 case PROGRAM_STATE_VAR:
136 case PROGRAM_NAMED_PARAM:
137 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
138 case PROGRAM_CONSTANT:
139 *dst++ = paramList->ParameterValues[pi][0];
140 *dst++ = paramList->ParameterValues[pi][1];
141 *dst++ = paramList->ParameterValues[pi][2];
142 *dst++ = paramList->ParameterValues[pi][3];
143 break;
144
145 default:
146 _mesa_problem(NULL, "Bad param type in %s",
147 __FUNCTION__);
148 }
149
150 }
151
152 return dst - dst_o;
153 }
154
155 static unsigned long t_dst_mask(GLuint mask)
156 {
157 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
158 return mask & VSF_FLAG_ALL;
159 }
160
161 static unsigned long t_dst_class(enum register_file file)
162 {
163
164 switch (file) {
165 case PROGRAM_TEMPORARY:
166 return VSF_OUT_CLASS_TMP;
167 case PROGRAM_OUTPUT:
168 return VSF_OUT_CLASS_RESULT;
169 case PROGRAM_ADDRESS:
170 return VSF_OUT_CLASS_ADDR;
171 /*
172 case PROGRAM_INPUT:
173 case PROGRAM_LOCAL_PARAM:
174 case PROGRAM_ENV_PARAM:
175 case PROGRAM_NAMED_PARAM:
176 case PROGRAM_STATE_VAR:
177 case PROGRAM_WRITE_ONLY:
178 case PROGRAM_ADDRESS:
179 */
180 default:
181 fprintf(stderr, "problem in %s", __FUNCTION__);
182 _mesa_exit(-1);
183 return -1;
184 }
185 }
186
187 static unsigned long t_dst_index(struct r300_vertex_program *vp,
188 struct prog_dst_register *dst)
189 {
190 if (dst->File == PROGRAM_OUTPUT)
191 return vp->outputs[dst->Index];
192
193 return dst->Index;
194 }
195
196 static unsigned long t_src_class(enum register_file file)
197 {
198
199 switch (file) {
200 case PROGRAM_TEMPORARY:
201 return VSF_IN_CLASS_TMP;
202
203 case PROGRAM_INPUT:
204 return VSF_IN_CLASS_ATTR;
205
206 case PROGRAM_LOCAL_PARAM:
207 case PROGRAM_ENV_PARAM:
208 case PROGRAM_NAMED_PARAM:
209 case PROGRAM_STATE_VAR:
210 return VSF_IN_CLASS_PARAM;
211 /*
212 case PROGRAM_OUTPUT:
213 case PROGRAM_WRITE_ONLY:
214 case PROGRAM_ADDRESS:
215 */
216 default:
217 fprintf(stderr, "problem in %s", __FUNCTION__);
218 _mesa_exit(-1);
219 return -1;
220 }
221 }
222
223 static __inline unsigned long t_swizzle(GLubyte swizzle)
224 {
225 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
226 return swizzle;
227 }
228
229 #if 0
230 static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller)
231 {
232 int i;
233
234 if (vp == NULL) {
235 fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__,
236 caller);
237 return;
238 }
239
240 fprintf(stderr, "%s:<", caller);
241 for (i = 0; i < VERT_ATTRIB_MAX; i++)
242 fprintf(stderr, "%d ", vp->inputs[i]);
243 fprintf(stderr, ">\n");
244
245 }
246 #endif
247
248 static unsigned long t_src_index(struct r300_vertex_program *vp,
249 struct prog_src_register *src)
250 {
251 int i;
252 int max_reg = -1;
253
254 if (src->File == PROGRAM_INPUT) {
255 if (vp->inputs[src->Index] != -1)
256 return vp->inputs[src->Index];
257
258 for (i = 0; i < VERT_ATTRIB_MAX; i++)
259 if (vp->inputs[i] > max_reg)
260 max_reg = vp->inputs[i];
261
262 vp->inputs[src->Index] = max_reg + 1;
263
264 //vp_dump_inputs(vp, __FUNCTION__);
265
266 return vp->inputs[src->Index];
267 } else {
268 if (src->Index < 0) {
269 fprintf(stderr,
270 "negative offsets for indirect addressing do not work.\n");
271 return 0;
272 }
273 return src->Index;
274 }
275 }
276
277 static unsigned long t_src(struct r300_vertex_program *vp,
278 struct prog_src_register *src)
279 {
280 /* src->NegateBase uses the NEGATE_ flags from program_instruction.h,
281 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
282 */
283 return MAKE_VSF_SOURCE(t_src_index(vp, src),
284 t_swizzle(GET_SWZ(src->Swizzle, 0)),
285 t_swizzle(GET_SWZ(src->Swizzle, 1)),
286 t_swizzle(GET_SWZ(src->Swizzle, 2)),
287 t_swizzle(GET_SWZ(src->Swizzle, 3)),
288 t_src_class(src->File),
289 src->NegateBase) | (src->RelAddr << 4);
290 }
291
292 static unsigned long t_src_scalar(struct r300_vertex_program *vp,
293 struct prog_src_register *src)
294 {
295
296 return MAKE_VSF_SOURCE(t_src_index(vp, src),
297 t_swizzle(GET_SWZ(src->Swizzle, 0)),
298 t_swizzle(GET_SWZ(src->Swizzle, 0)),
299 t_swizzle(GET_SWZ(src->Swizzle, 0)),
300 t_swizzle(GET_SWZ(src->Swizzle, 0)),
301 t_src_class(src->File),
302 src->
303 NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
304 (src->RelAddr << 4);
305 }
306
307 static unsigned long t_opcode(enum prog_opcode opcode)
308 {
309
310 switch (opcode) {
311 /* *INDENT-OFF* */
312 case OPCODE_ARL: return R300_VPI_OUT_OP_ARL;
313 case OPCODE_DST: return R300_VPI_OUT_OP_DST;
314 case OPCODE_EX2: return R300_VPI_OUT_OP_EX2;
315 case OPCODE_EXP: return R300_VPI_OUT_OP_EXP;
316 case OPCODE_FRC: return R300_VPI_OUT_OP_FRC;
317 case OPCODE_LG2: return R300_VPI_OUT_OP_LG2;
318 case OPCODE_LOG: return R300_VPI_OUT_OP_LOG;
319 case OPCODE_MAX: return R300_VPI_OUT_OP_MAX;
320 case OPCODE_MIN: return R300_VPI_OUT_OP_MIN;
321 case OPCODE_MUL: return R300_VPI_OUT_OP_MUL;
322 case OPCODE_RCP: return R300_VPI_OUT_OP_RCP;
323 case OPCODE_RSQ: return R300_VPI_OUT_OP_RSQ;
324 case OPCODE_SGE: return R300_VPI_OUT_OP_SGE;
325 case OPCODE_SLT: return R300_VPI_OUT_OP_SLT;
326 case OPCODE_DP4: return R300_VPI_OUT_OP_DOT;
327 /* *INDENT-ON* */
328
329 default:
330 fprintf(stderr, "%s: Should not be called with opcode %d!",
331 __FUNCTION__, opcode);
332 }
333 _mesa_exit(-1);
334 return 0;
335 }
336
337 static unsigned long op_operands(enum prog_opcode opcode)
338 {
339 int i;
340
341 /* Can we trust mesas opcodes to be in order ? */
342 for (i = 0; i < sizeof(op_names) / sizeof(*op_names); i++)
343 if (op_names[i].opcode == opcode)
344 return op_names[i].ip;
345
346 fprintf(stderr, "op %d not found in op_names\n", opcode);
347 _mesa_exit(-1);
348 return 0;
349 }
350
351 static GLboolean valid_dst(struct r300_vertex_program *vp,
352 struct prog_dst_register *dst)
353 {
354 if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
355 return GL_FALSE;
356 } else if (dst->File == PROGRAM_ADDRESS) {
357 assert(dst->Index == 0);
358 }
359
360 return GL_TRUE;
361 }
362
363 /* TODO: Get rid of t_src_class call */
364 #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
365 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
366 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
367 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
368 t_src_class(b.File) == VSF_IN_CLASS_ATTR)))) \
369
370 #define ZERO_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \
371 SWIZZLE_ZERO, SWIZZLE_ZERO, \
372 SWIZZLE_ZERO, SWIZZLE_ZERO, \
373 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
374
375 #define ZERO_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \
376 SWIZZLE_ZERO, SWIZZLE_ZERO, \
377 SWIZZLE_ZERO, SWIZZLE_ZERO, \
378 t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
379
380 #define ZERO_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \
381 SWIZZLE_ZERO, SWIZZLE_ZERO, \
382 SWIZZLE_ZERO, SWIZZLE_ZERO, \
383 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
384
385 #define ONE_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \
386 SWIZZLE_ONE, SWIZZLE_ONE, \
387 SWIZZLE_ONE, SWIZZLE_ONE, \
388 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
389
390 #define ONE_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \
391 SWIZZLE_ONE, SWIZZLE_ONE, \
392 SWIZZLE_ONE, SWIZZLE_ONE, \
393 t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
394
395 #define ONE_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \
396 SWIZZLE_ONE, SWIZZLE_ONE, \
397 SWIZZLE_ONE, SWIZZLE_ONE, \
398 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
399
400 /* DP4 version seems to trigger some hw peculiarity */
401 //#define PREFER_DP4
402
403 #define FREE_TEMPS() \
404 do { \
405 if(u_temp_i < vp->num_temporaries) { \
406 WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_i); \
407 vp->native = GL_FALSE; \
408 } \
409 u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
410 } while (0)
411
412 static void r300TranslateVertexShader(struct r300_vertex_program *vp,
413 struct prog_instruction *vpi)
414 {
415 int i, cur_reg = 0;
416 VERTEX_SHADER_INSTRUCTION *o_inst;
417 unsigned long operands;
418 int are_srcs_scalar;
419 unsigned long hw_op;
420 /* Initial value should be last tmp reg that hw supports.
421 Strangely enough r300 doesnt mind even though these would be out of range.
422 Smart enough to realize that it doesnt need it? */
423 int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1;
424 struct prog_src_register src[3];
425
426 vp->pos_end = 0; /* Not supported yet */
427 vp->program.length = 0;
428 /*vp->num_temporaries=mesa_vp->Base.NumTemporaries; */
429
430 for (i = 0; i < VERT_ATTRIB_MAX; i++)
431 vp->inputs[i] = -1;
432
433 for (i = 0; i < VERT_RESULT_MAX; i++)
434 vp->outputs[i] = -1;
435
436 assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS));
437
438 /* Assign outputs */
439 if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS))
440 vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
441
442 if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ))
443 vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
444
445 if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0))
446 vp->outputs[VERT_RESULT_COL0] = cur_reg++;
447
448 if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1))
449 vp->outputs[VERT_RESULT_COL1] = cur_reg++;
450
451 #if 0 /* Not supported yet */
452 if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0))
453 vp->outputs[VERT_RESULT_BFC0] = cur_reg++;
454
455 if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1))
456 vp->outputs[VERT_RESULT_BFC1] = cur_reg++;
457
458 if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC))
459 vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
460 #endif
461
462 for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++)
463 if (vp->key.OutputsWritten & (1 << i))
464 vp->outputs[i] = cur_reg++;
465
466 vp->translated = GL_TRUE;
467 vp->native = GL_TRUE;
468
469 o_inst = vp->program.body.i;
470 for (; vpi->Opcode != OPCODE_END; vpi++, o_inst++) {
471 FREE_TEMPS();
472
473 if (!valid_dst(vp, &vpi->DstReg)) {
474 /* redirect result to unused temp */
475 vpi->DstReg.File = PROGRAM_TEMPORARY;
476 vpi->DstReg.Index = u_temp_i;
477 }
478
479 operands = op_operands(vpi->Opcode);
480 are_srcs_scalar = operands & SCALAR_FLAG;
481 operands &= OP_MASK;
482
483 for (i = 0; i < operands; i++)
484 src[i] = vpi->SrcReg[i];
485
486 if (operands == 3) { /* TODO: scalars */
487 if (CMP_SRCS(src[1], src[2])
488 || CMP_SRCS(src[0], src[2])) {
489 o_inst->op =
490 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i,
491 VSF_FLAG_ALL,
492 VSF_OUT_CLASS_TMP);
493
494 o_inst->src[0] =
495 MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
496 SWIZZLE_X, SWIZZLE_Y,
497 SWIZZLE_Z, SWIZZLE_W,
498 t_src_class(src[2].File),
499 VSF_FLAG_NONE) | (src[2].
500 RelAddr <<
501 4);
502
503 o_inst->src[1] = ZERO_SRC_2;
504 o_inst->src[2] = ZERO_SRC_2;
505 o_inst++;
506
507 src[2].File = PROGRAM_TEMPORARY;
508 src[2].Index = u_temp_i;
509 src[2].RelAddr = 0;
510 u_temp_i--;
511 }
512
513 }
514
515 if (operands >= 2) {
516 if (CMP_SRCS(src[1], src[0])) {
517 o_inst->op =
518 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i,
519 VSF_FLAG_ALL,
520 VSF_OUT_CLASS_TMP);
521
522 o_inst->src[0] =
523 MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
524 SWIZZLE_X, SWIZZLE_Y,
525 SWIZZLE_Z, SWIZZLE_W,
526 t_src_class(src[0].File),
527 VSF_FLAG_NONE) | (src[0].
528 RelAddr <<
529 4);
530
531 o_inst->src[1] = ZERO_SRC_0;
532 o_inst->src[2] = ZERO_SRC_0;
533 o_inst++;
534
535 src[0].File = PROGRAM_TEMPORARY;
536 src[0].Index = u_temp_i;
537 src[0].RelAddr = 0;
538 u_temp_i--;
539 }
540 }
541
542 /* These ops need special handling. */
543 switch (vpi->Opcode) {
544 case OPCODE_POW:
545 o_inst->op =
546 MAKE_VSF_OP(R300_VPI_OUT_OP_POW,
547 t_dst_index(vp, &vpi->DstReg),
548 t_dst_mask(vpi->DstReg.WriteMask),
549 t_dst_class(vpi->DstReg.File));
550 o_inst->src[0] = t_src_scalar(vp, &src[0]);
551 o_inst->src[1] = ZERO_SRC_0;
552 o_inst->src[2] = t_src_scalar(vp, &src[1]);
553 goto next;
554
555 case OPCODE_MOV: //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
556 case OPCODE_SWZ:
557 #if 1
558 o_inst->op =
559 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD,
560 t_dst_index(vp, &vpi->DstReg),
561 t_dst_mask(vpi->DstReg.WriteMask),
562 t_dst_class(vpi->DstReg.File));
563 o_inst->src[0] = t_src(vp, &src[0]);
564 o_inst->src[1] = ZERO_SRC_0;
565 o_inst->src[2] = ZERO_SRC_0;
566 #else
567 hw_op =
568 (src[0].File ==
569 PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
570 R300_VPI_OUT_OP_MAD;
571
572 o_inst->op =
573 MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
574 t_dst_mask(vpi->DstReg.WriteMask),
575 t_dst_class(vpi->DstReg.File));
576 o_inst->src[0] = t_src(vp, &src[0]);
577 o_inst->src[1] = ONE_SRC_0;
578 o_inst->src[2] = ZERO_SRC_0;
579 #endif
580
581 goto next;
582
583 case OPCODE_ADD:
584 #if 1
585 hw_op = (src[0].File == PROGRAM_TEMPORARY &&
586 src[1].File ==
587 PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
588 R300_VPI_OUT_OP_MAD;
589
590 o_inst->op =
591 MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
592 t_dst_mask(vpi->DstReg.WriteMask),
593 t_dst_class(vpi->DstReg.File));
594 o_inst->src[0] = ONE_SRC_0;
595 o_inst->src[1] = t_src(vp, &src[0]);
596 o_inst->src[2] = t_src(vp, &src[1]);
597 #else
598 o_inst->op =
599 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD,
600 t_dst_index(vp, &vpi->DstReg),
601 t_dst_mask(vpi->DstReg.WriteMask),
602 t_dst_class(vpi->DstReg.File));
603 o_inst->src[0] = t_src(vp, &src[0]);
604 o_inst->src[1] = t_src(vp, &src[1]);
605 o_inst->src[2] = ZERO_SRC_1;
606
607 #endif
608 goto next;
609
610 case OPCODE_MAD:
611 hw_op = (src[0].File == PROGRAM_TEMPORARY &&
612 src[1].File == PROGRAM_TEMPORARY &&
613 src[2].File ==
614 PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
615 R300_VPI_OUT_OP_MAD;
616
617 o_inst->op =
618 MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
619 t_dst_mask(vpi->DstReg.WriteMask),
620 t_dst_class(vpi->DstReg.File));
621 o_inst->src[0] = t_src(vp, &src[0]);
622 o_inst->src[1] = t_src(vp, &src[1]);
623 o_inst->src[2] = t_src(vp, &src[2]);
624 goto next;
625
626 case OPCODE_MUL: /* HW mul can take third arg but appears to have some other limitations. */
627 hw_op = (src[0].File == PROGRAM_TEMPORARY &&
628 src[1].File ==
629 PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
630 R300_VPI_OUT_OP_MAD;
631
632 o_inst->op =
633 MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
634 t_dst_mask(vpi->DstReg.WriteMask),
635 t_dst_class(vpi->DstReg.File));
636 o_inst->src[0] = t_src(vp, &src[0]);
637 o_inst->src[1] = t_src(vp, &src[1]);
638
639 o_inst->src[2] = ZERO_SRC_1;
640 goto next;
641
642 case OPCODE_DP3: //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
643 o_inst->op =
644 MAKE_VSF_OP(R300_VPI_OUT_OP_DOT,
645 t_dst_index(vp, &vpi->DstReg),
646 t_dst_mask(vpi->DstReg.WriteMask),
647 t_dst_class(vpi->DstReg.File));
648
649 o_inst->src[0] =
650 MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
651 t_swizzle(GET_SWZ
652 (src[0].Swizzle, 0)),
653 t_swizzle(GET_SWZ
654 (src[0].Swizzle, 1)),
655 t_swizzle(GET_SWZ
656 (src[0].Swizzle, 2)),
657 SWIZZLE_ZERO,
658 t_src_class(src[0].File),
659 src[0].
660 NegateBase ? VSF_FLAG_XYZ :
661 VSF_FLAG_NONE) | (src[0].
662 RelAddr << 4);
663
664 o_inst->src[1] =
665 MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
666 t_swizzle(GET_SWZ
667 (src[1].Swizzle, 0)),
668 t_swizzle(GET_SWZ
669 (src[1].Swizzle, 1)),
670 t_swizzle(GET_SWZ
671 (src[1].Swizzle, 2)),
672 SWIZZLE_ZERO,
673 t_src_class(src[1].File),
674 src[1].
675 NegateBase ? VSF_FLAG_XYZ :
676 VSF_FLAG_NONE) | (src[1].
677 RelAddr << 4);
678
679 o_inst->src[2] = ZERO_SRC_1;
680 goto next;
681
682 case OPCODE_SUB: //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
683 #if 1
684 hw_op = (src[0].File == PROGRAM_TEMPORARY &&
685 src[1].File ==
686 PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
687 R300_VPI_OUT_OP_MAD;
688
689 o_inst->op =
690 MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
691 t_dst_mask(vpi->DstReg.WriteMask),
692 t_dst_class(vpi->DstReg.File));
693 o_inst->src[0] = t_src(vp, &src[0]);
694 o_inst->src[1] = ONE_SRC_0;
695 o_inst->src[2] =
696 MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
697 t_swizzle(GET_SWZ
698 (src[1].Swizzle, 0)),
699 t_swizzle(GET_SWZ
700 (src[1].Swizzle, 1)),
701 t_swizzle(GET_SWZ
702 (src[1].Swizzle, 2)),
703 t_swizzle(GET_SWZ
704 (src[1].Swizzle, 3)),
705 t_src_class(src[1].File),
706 (!src[1].
707 NegateBase) ? VSF_FLAG_ALL :
708 VSF_FLAG_NONE) | (src[1].
709 RelAddr << 4);
710 #else
711 o_inst->op =
712 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD,
713 t_dst_index(vp, &vpi->DstReg),
714 t_dst_mask(vpi->DstReg.WriteMask),
715 t_dst_class(vpi->DstReg.File));
716
717 o_inst->src[0] = t_src(vp, &src[0]);
718 o_inst->src[1] =
719 MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
720 t_swizzle(GET_SWZ
721 (src[1].Swizzle, 0)),
722 t_swizzle(GET_SWZ
723 (src[1].Swizzle, 1)),
724 t_swizzle(GET_SWZ
725 (src[1].Swizzle, 2)),
726 t_swizzle(GET_SWZ
727 (src[1].Swizzle, 3)),
728 t_src_class(src[1].File),
729 (!src[1].
730 NegateBase) ? VSF_FLAG_ALL :
731 VSF_FLAG_NONE) | (src[1].
732 RelAddr << 4);
733 o_inst->src[2] = 0;
734 #endif
735 goto next;
736
737 case OPCODE_ABS: //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
738 o_inst->op =
739 MAKE_VSF_OP(R300_VPI_OUT_OP_MAX,
740 t_dst_index(vp, &vpi->DstReg),
741 t_dst_mask(vpi->DstReg.WriteMask),
742 t_dst_class(vpi->DstReg.File));
743
744 o_inst->src[0] = t_src(vp, &src[0]);
745 o_inst->src[1] =
746 MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
747 t_swizzle(GET_SWZ
748 (src[0].Swizzle, 0)),
749 t_swizzle(GET_SWZ
750 (src[0].Swizzle, 1)),
751 t_swizzle(GET_SWZ
752 (src[0].Swizzle, 2)),
753 t_swizzle(GET_SWZ
754 (src[0].Swizzle, 3)),
755 t_src_class(src[0].File),
756 (!src[0].
757 NegateBase) ? VSF_FLAG_ALL :
758 VSF_FLAG_NONE) | (src[0].
759 RelAddr << 4);
760 o_inst->src[2] = 0;
761 goto next;
762
763 case OPCODE_FLR:
764 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
765 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
766
767 o_inst->op = MAKE_VSF_OP(R300_VPI_OUT_OP_FRC, u_temp_i,
768 t_dst_mask(vpi->DstReg.
769 WriteMask),
770 VSF_OUT_CLASS_TMP);
771
772 o_inst->src[0] = t_src(vp, &src[0]);
773 o_inst->src[1] = ZERO_SRC_0;
774 o_inst->src[2] = ZERO_SRC_0;
775 o_inst++;
776
777 o_inst->op =
778 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD,
779 t_dst_index(vp, &vpi->DstReg),
780 t_dst_mask(vpi->DstReg.WriteMask),
781 t_dst_class(vpi->DstReg.File));
782
783 o_inst->src[0] = t_src(vp, &src[0]);
784 o_inst->src[1] = MAKE_VSF_SOURCE(u_temp_i,
785 VSF_IN_COMPONENT_X,
786 VSF_IN_COMPONENT_Y,
787 VSF_IN_COMPONENT_Z,
788 VSF_IN_COMPONENT_W,
789 VSF_IN_CLASS_TMP,
790 /* Not 100% sure about this */
791 (!src[0].
792 NegateBase) ?
793 VSF_FLAG_ALL :
794 VSF_FLAG_NONE
795 /*VSF_FLAG_ALL */ );
796
797 o_inst->src[2] = ZERO_SRC_0;
798 u_temp_i--;
799 goto next;
800
801 case OPCODE_LG2: // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
802 o_inst->op =
803 MAKE_VSF_OP(R300_VPI_OUT_OP_LG2,
804 t_dst_index(vp, &vpi->DstReg),
805 t_dst_mask(vpi->DstReg.WriteMask),
806 t_dst_class(vpi->DstReg.File));
807
808 o_inst->src[0] =
809 MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
810 t_swizzle(GET_SWZ
811 (src[0].Swizzle, 0)),
812 t_swizzle(GET_SWZ
813 (src[0].Swizzle, 0)),
814 t_swizzle(GET_SWZ
815 (src[0].Swizzle, 0)),
816 t_swizzle(GET_SWZ
817 (src[0].Swizzle, 0)),
818 t_src_class(src[0].File),
819 src[0].
820 NegateBase ? VSF_FLAG_ALL :
821 VSF_FLAG_NONE) | (src[0].
822 RelAddr << 4);
823 o_inst->src[1] = ZERO_SRC_0;
824 o_inst->src[2] = ZERO_SRC_0;
825 goto next;
826
827 case OPCODE_LIT: //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
828 o_inst->op =
829 MAKE_VSF_OP(R300_VPI_OUT_OP_LIT,
830 t_dst_index(vp, &vpi->DstReg),
831 t_dst_mask(vpi->DstReg.WriteMask),
832 t_dst_class(vpi->DstReg.File));
833 /* NOTE: Users swizzling might not work. */
834 o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
835 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
836 VSF_IN_COMPONENT_ZERO, // z
837 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
838 t_src_class(src[0].
839 File),
840 src[0].
841 NegateBase ?
842 VSF_FLAG_ALL :
843 VSF_FLAG_NONE) |
844 (src[0].RelAddr << 4);
845 o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
846 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
847 VSF_IN_COMPONENT_ZERO, // z
848 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
849 t_src_class(src[0].
850 File),
851 src[0].
852 NegateBase ?
853 VSF_FLAG_ALL :
854 VSF_FLAG_NONE) |
855 (src[0].RelAddr << 4);
856 o_inst->src[2] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
857 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
858 VSF_IN_COMPONENT_ZERO, // z
859 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
860 t_src_class(src[0].
861 File),
862 src[0].
863 NegateBase ?
864 VSF_FLAG_ALL :
865 VSF_FLAG_NONE) |
866 (src[0].RelAddr << 4);
867 goto next;
868
869 case OPCODE_DPH: //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
870 o_inst->op =
871 MAKE_VSF_OP(R300_VPI_OUT_OP_DOT,
872 t_dst_index(vp, &vpi->DstReg),
873 t_dst_mask(vpi->DstReg.WriteMask),
874 t_dst_class(vpi->DstReg.File));
875
876 o_inst->src[0] =
877 MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
878 t_swizzle(GET_SWZ
879 (src[0].Swizzle, 0)),
880 t_swizzle(GET_SWZ
881 (src[0].Swizzle, 1)),
882 t_swizzle(GET_SWZ
883 (src[0].Swizzle, 2)),
884 VSF_IN_COMPONENT_ONE,
885 t_src_class(src[0].File),
886 src[0].
887 NegateBase ? VSF_FLAG_XYZ :
888 VSF_FLAG_NONE) | (src[0].
889 RelAddr << 4);
890 o_inst->src[1] = t_src(vp, &src[1]);
891 o_inst->src[2] = ZERO_SRC_1;
892 goto next;
893
894 case OPCODE_XPD:
895 /* mul r0, r1.yzxw, r2.zxyw
896 mad r0, -r2.yzxw, r1.zxyw, r0
897 NOTE: might need MAD_2
898 */
899
900 o_inst->op = MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, u_temp_i,
901 t_dst_mask(vpi->DstReg.
902 WriteMask),
903 VSF_OUT_CLASS_TMP);
904
905 o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
906 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
907 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
908 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
909 t_src_class(src[0].
910 File),
911 src[0].
912 NegateBase ?
913 VSF_FLAG_ALL :
914 VSF_FLAG_NONE) |
915 (src[0].RelAddr << 4);
916
917 o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
918 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
919 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
920 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
921 t_src_class(src[1].
922 File),
923 src[1].
924 NegateBase ?
925 VSF_FLAG_ALL :
926 VSF_FLAG_NONE) |
927 (src[1].RelAddr << 4);
928
929 o_inst->src[2] = ZERO_SRC_1;
930 o_inst++;
931 u_temp_i--;
932
933 o_inst->op =
934 MAKE_VSF_OP(R300_VPI_OUT_OP_MAD,
935 t_dst_index(vp, &vpi->DstReg),
936 t_dst_mask(vpi->DstReg.WriteMask),
937 t_dst_class(vpi->DstReg.File));
938
939 o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
940 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
941 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
942 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
943 t_src_class(src[1].
944 File),
945 (!src[1].
946 NegateBase) ?
947 VSF_FLAG_ALL :
948 VSF_FLAG_NONE) |
949 (src[1].RelAddr << 4);
950
951 o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
952 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
953 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
954 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
955 t_src_class(src[0].
956 File),
957 src[0].
958 NegateBase ?
959 VSF_FLAG_ALL :
960 VSF_FLAG_NONE) |
961 (src[0].RelAddr << 4);
962
963 o_inst->src[2] = MAKE_VSF_SOURCE(u_temp_i + 1,
964 VSF_IN_COMPONENT_X,
965 VSF_IN_COMPONENT_Y,
966 VSF_IN_COMPONENT_Z,
967 VSF_IN_COMPONENT_W,
968 VSF_IN_CLASS_TMP,
969 VSF_FLAG_NONE);
970
971 goto next;
972
973 case OPCODE_RCC:
974 fprintf(stderr, "Dont know how to handle op %d yet\n",
975 vpi->Opcode);
976 _mesa_exit(-1);
977 break;
978 case OPCODE_END:
979 break;
980 default:
981 break;
982 }
983
984 o_inst->op =
985 MAKE_VSF_OP(t_opcode(vpi->Opcode),
986 t_dst_index(vp, &vpi->DstReg),
987 t_dst_mask(vpi->DstReg.WriteMask),
988 t_dst_class(vpi->DstReg.File));
989
990 if (are_srcs_scalar) {
991 switch (operands) {
992 case 1:
993 o_inst->src[0] = t_src_scalar(vp, &src[0]);
994 o_inst->src[1] = ZERO_SRC_0;
995 o_inst->src[2] = ZERO_SRC_0;
996 break;
997
998 case 2:
999 o_inst->src[0] = t_src_scalar(vp, &src[0]);
1000 o_inst->src[1] = t_src_scalar(vp, &src[1]);
1001 o_inst->src[2] = ZERO_SRC_1;
1002 break;
1003
1004 case 3:
1005 o_inst->src[0] = t_src_scalar(vp, &src[0]);
1006 o_inst->src[1] = t_src_scalar(vp, &src[1]);
1007 o_inst->src[2] = t_src_scalar(vp, &src[2]);
1008 break;
1009
1010 default:
1011 fprintf(stderr,
1012 "scalars and op RCC not handled yet");
1013 _mesa_exit(-1);
1014 break;
1015 }
1016 } else {
1017 switch (operands) {
1018 case 1:
1019 o_inst->src[0] = t_src(vp, &src[0]);
1020 o_inst->src[1] = ZERO_SRC_0;
1021 o_inst->src[2] = ZERO_SRC_0;
1022 break;
1023
1024 case 2:
1025 o_inst->src[0] = t_src(vp, &src[0]);
1026 o_inst->src[1] = t_src(vp, &src[1]);
1027 o_inst->src[2] = ZERO_SRC_1;
1028 break;
1029
1030 case 3:
1031 o_inst->src[0] = t_src(vp, &src[0]);
1032 o_inst->src[1] = t_src(vp, &src[1]);
1033 o_inst->src[2] = t_src(vp, &src[2]);
1034 break;
1035
1036 default:
1037 fprintf(stderr,
1038 "scalars and op RCC not handled yet");
1039 _mesa_exit(-1);
1040 break;
1041 }
1042 }
1043 next:;
1044 }
1045
1046 /* Will most likely segfault before we get here... fix later. */
1047 if (o_inst - vp->program.body.i >= VSF_MAX_FRAGMENT_LENGTH / 4) {
1048 vp->program.length = 0;
1049 vp->native = GL_FALSE;
1050 return;
1051 }
1052 vp->program.length = (o_inst - vp->program.body.i) * 4;
1053 #if 0
1054 fprintf(stderr, "hw program:\n");
1055 for (i = 0; i < vp->program.length; i++)
1056 fprintf(stderr, "%08x\n", vp->program.body.d[i]);
1057 #endif
1058 }
1059
1060 static void position_invariant(struct gl_program *prog)
1061 {
1062 struct prog_instruction *vpi;
1063 struct gl_program_parameter_list *paramList;
1064 int i;
1065
1066 gl_state_index tokens[STATE_LENGTH] = { STATE_MVP_MATRIX, 0, 0, 0, 0 };
1067
1068 /* tokens[4] = matrix modifier */
1069 #ifdef PREFER_DP4
1070 tokens[4] = 0; /* not transposed or inverted */
1071 #else
1072 tokens[4] = STATE_MATRIX_TRANSPOSE;
1073 #endif
1074 paramList = prog->Parameters;
1075
1076 vpi = _mesa_alloc_instructions(prog->NumInstructions + 4);
1077 _mesa_init_instructions(vpi, prog->NumInstructions + 4);
1078
1079 for (i = 0; i < 4; i++) {
1080 GLint idx;
1081 tokens[2] = tokens[3] = i; /* matrix row[i]..row[i] */
1082 idx = _mesa_add_state_reference(paramList, tokens);
1083 #ifdef PREFER_DP4
1084 vpi[i].Opcode = OPCODE_DP4;
1085 vpi[i].StringPos = 0;
1086 vpi[i].Data = 0;
1087
1088 vpi[i].DstReg.File = PROGRAM_OUTPUT;
1089 vpi[i].DstReg.Index = VERT_RESULT_HPOS;
1090 vpi[i].DstReg.WriteMask = 1 << i;
1091 vpi[i].DstReg.CondMask = COND_TR;
1092
1093 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
1094 vpi[i].SrcReg[0].Index = idx;
1095 vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1096
1097 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
1098 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
1099 vpi[i].SrcReg[1].Swizzle = SWIZZLE_XYZW;
1100 #else
1101 if (i == 0)
1102 vpi[i].Opcode = OPCODE_MUL;
1103 else
1104 vpi[i].Opcode = OPCODE_MAD;
1105
1106 vpi[i].StringPos = 0;
1107 vpi[i].Data = 0;
1108
1109 if (i == 3)
1110 vpi[i].DstReg.File = PROGRAM_OUTPUT;
1111 else
1112 vpi[i].DstReg.File = PROGRAM_TEMPORARY;
1113 vpi[i].DstReg.Index = 0;
1114 vpi[i].DstReg.WriteMask = 0xf;
1115 vpi[i].DstReg.CondMask = COND_TR;
1116
1117 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
1118 vpi[i].SrcReg[0].Index = idx;
1119 vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1120
1121 vpi[i].SrcReg[1].File = PROGRAM_INPUT;
1122 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
1123 vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i);
1124
1125 if (i > 0) {
1126 vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY;
1127 vpi[i].SrcReg[2].Index = 0;
1128 vpi[i].SrcReg[2].Swizzle = SWIZZLE_XYZW;
1129 }
1130 #endif
1131 }
1132
1133 _mesa_copy_instructions(&vpi[i], prog->Instructions,
1134 prog->NumInstructions);
1135
1136 free(prog->Instructions);
1137
1138 prog->Instructions = vpi;
1139
1140 prog->NumInstructions += 4;
1141 vpi = &prog->Instructions[prog->NumInstructions - 1];
1142
1143 assert(vpi->Opcode == OPCODE_END);
1144 }
1145
1146 static void insert_wpos(struct r300_vertex_program *vp,
1147 struct gl_program *prog, GLuint temp_index)
1148 {
1149 struct prog_instruction *vpi;
1150 struct prog_instruction *vpi_insert;
1151 int i = 0;
1152
1153 vpi = _mesa_alloc_instructions(prog->NumInstructions + 2);
1154 _mesa_init_instructions(vpi, prog->NumInstructions + 2);
1155 /* all but END */
1156 _mesa_copy_instructions(vpi, prog->Instructions,
1157 prog->NumInstructions - 1);
1158 /* END */
1159 _mesa_copy_instructions(&vpi[prog->NumInstructions + 1],
1160 &prog->Instructions[prog->NumInstructions - 1],
1161 1);
1162 vpi_insert = &vpi[prog->NumInstructions - 1];
1163
1164 vpi_insert[i].Opcode = OPCODE_MOV;
1165
1166 vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
1167 vpi_insert[i].DstReg.Index = VERT_RESULT_HPOS;
1168 vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
1169 vpi_insert[i].DstReg.CondMask = COND_TR;
1170
1171 vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
1172 vpi_insert[i].SrcReg[0].Index = temp_index;
1173 vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1174 i++;
1175
1176 vpi_insert[i].Opcode = OPCODE_MOV;
1177
1178 vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
1179 vpi_insert[i].DstReg.Index = VERT_RESULT_TEX0 + vp->wpos_idx;
1180 vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
1181 vpi_insert[i].DstReg.CondMask = COND_TR;
1182
1183 vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
1184 vpi_insert[i].SrcReg[0].Index = temp_index;
1185 vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
1186 i++;
1187
1188 free(prog->Instructions);
1189
1190 prog->Instructions = vpi;
1191
1192 prog->NumInstructions += i;
1193 vpi = &prog->Instructions[prog->NumInstructions - 1];
1194
1195 assert(vpi->Opcode == OPCODE_END);
1196 }
1197
1198 static void pos_as_texcoord(struct r300_vertex_program *vp,
1199 struct gl_program *prog)
1200 {
1201 struct prog_instruction *vpi;
1202 GLuint tempregi = prog->NumTemporaries;
1203 /* should do something else if no temps left... */
1204 prog->NumTemporaries++;
1205
1206 for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) {
1207 if (vpi->DstReg.File == PROGRAM_OUTPUT &&
1208 vpi->DstReg.Index == VERT_RESULT_HPOS) {
1209 vpi->DstReg.File = PROGRAM_TEMPORARY;
1210 vpi->DstReg.Index = tempregi;
1211 }
1212 }
1213 insert_wpos(vp, prog, tempregi);
1214 }
1215
1216 static struct r300_vertex_program *build_program(struct r300_vertex_program_key
1217 *wanted_key, struct gl_vertex_program
1218 *mesa_vp, GLint wpos_idx)
1219 {
1220 struct r300_vertex_program *vp;
1221
1222 vp = _mesa_calloc(sizeof(*vp));
1223 _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
1224
1225 vp->wpos_idx = wpos_idx;
1226
1227 if (mesa_vp->IsPositionInvariant) {
1228 position_invariant(&mesa_vp->Base);
1229 }
1230
1231 if (wpos_idx > -1)
1232 pos_as_texcoord(vp, &mesa_vp->Base);
1233
1234 assert(mesa_vp->Base.NumInstructions);
1235
1236 vp->num_temporaries = mesa_vp->Base.NumTemporaries;
1237
1238 r300TranslateVertexShader(vp, mesa_vp->Base.Instructions);
1239
1240 return vp;
1241 }
1242
1243 void r300SelectVertexShader(r300ContextPtr r300)
1244 {
1245 GLcontext *ctx = ctx = r300->radeon.glCtx;
1246 GLuint InputsRead;
1247 struct r300_vertex_program_key wanted_key = { 0 };
1248 GLint i;
1249 struct r300_vertex_program_cont *vpc;
1250 struct r300_vertex_program *vp;
1251 GLint wpos_idx;
1252
1253 vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
1254 InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
1255
1256 wanted_key.OutputsWritten |= 1 << VERT_RESULT_HPOS;
1257
1258 wpos_idx = -1;
1259 if (InputsRead & FRAG_BIT_WPOS) {
1260 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
1261 if (!(InputsRead & (FRAG_BIT_TEX0 << i)))
1262 break;
1263
1264 if (i == ctx->Const.MaxTextureUnits) {
1265 fprintf(stderr, "\tno free texcoord found\n");
1266 _mesa_exit(-1);
1267 }
1268
1269 InputsRead |= (FRAG_BIT_TEX0 << i);
1270 wpos_idx = i;
1271 }
1272
1273 if (InputsRead & FRAG_BIT_COL0)
1274 wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL0;
1275
1276 if ((InputsRead & FRAG_BIT_COL1) /*||
1277 (InputsRead & FRAG_BIT_FOGC) */ )
1278 wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL1;
1279
1280 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
1281 if (InputsRead & (FRAG_BIT_TEX0 << i))
1282 wanted_key.OutputsWritten |=
1283 1 << (VERT_RESULT_TEX0 + i);
1284
1285 wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead;
1286 if (vpc->mesa_program.IsPositionInvariant) {
1287 /* we wan't position don't we ? */
1288 wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS);
1289 }
1290
1291 for (vp = vpc->progs; vp; vp = vp->next)
1292 if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) ==
1293 0) {
1294 r300->selected_vp = vp;
1295 return;
1296 }
1297 //_mesa_print_program(&vpc->mesa_program.Base);
1298
1299 vp = build_program(&wanted_key, &vpc->mesa_program, wpos_idx);
1300 vp->next = vpc->progs;
1301 vpc->progs = vp;
1302 r300->selected_vp = vp;
1303 }