1 /**************************************************************************
3 Copyright (C) 2005 Aapo Tahkola.
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
31 * \author Aapo Tahkola <aet@rasterburn.org>
33 * \author Oliver McFadden <z3ro.geek@gmail.com>
35 * For a description of the vertex program instruction set see r300_reg.h.
42 #include "shader/prog_instruction.h"
43 #include "shader/prog_parameter.h"
44 #include "shader/prog_statevars.h"
47 #include "r300_context.h"
49 #if SWIZZLE_X != VSF_IN_COMPONENT_X || \
50 SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
51 SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
52 SWIZZLE_W != VSF_IN_COMPONENT_W || \
53 SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
54 SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
55 WRITEMASK_X != VSF_FLAG_X || \
56 WRITEMASK_Y != VSF_FLAG_Y || \
57 WRITEMASK_Z != VSF_FLAG_Z || \
58 WRITEMASK_W != VSF_FLAG_W
59 #error Cannot change these!
62 /* TODO: Get rid of t_src_class call */
63 #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
64 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
65 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
66 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
67 t_src_class(b.File) == VSF_IN_CLASS_ATTR)))) \
69 #define ZERO_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \
70 SWIZZLE_ZERO, SWIZZLE_ZERO, \
71 SWIZZLE_ZERO, SWIZZLE_ZERO, \
72 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
74 #define ZERO_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \
75 SWIZZLE_ZERO, SWIZZLE_ZERO, \
76 SWIZZLE_ZERO, SWIZZLE_ZERO, \
77 t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
79 #define ZERO_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \
80 SWIZZLE_ZERO, SWIZZLE_ZERO, \
81 SWIZZLE_ZERO, SWIZZLE_ZERO, \
82 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
84 #define ONE_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \
85 SWIZZLE_ONE, SWIZZLE_ONE, \
86 SWIZZLE_ONE, SWIZZLE_ONE, \
87 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
89 #define ONE_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \
90 SWIZZLE_ONE, SWIZZLE_ONE, \
91 SWIZZLE_ONE, SWIZZLE_ONE, \
92 t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
94 #define ONE_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \
95 SWIZZLE_ONE, SWIZZLE_ONE, \
96 SWIZZLE_ONE, SWIZZLE_ONE, \
97 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
99 /* DP4 version seems to trigger some hw peculiarity */
102 #define FREE_TEMPS() \
104 if(u_temp_i < vp->num_temporaries) { \
105 WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_i); \
106 vp->native = GL_FALSE; \
108 u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
111 int r300VertexProgUpdateParams(GLcontext
* ctx
,
112 struct r300_vertex_program_cont
*vp
,
116 struct gl_vertex_program
*mesa_vp
= &vp
->mesa_program
;
118 struct gl_program_parameter_list
*paramList
;
120 if (mesa_vp
->IsNVProgram
) {
121 _mesa_load_tracked_matrices(ctx
);
123 for (pi
= 0; pi
< MAX_NV_VERTEX_PROGRAM_PARAMS
; pi
++) {
124 *dst
++ = ctx
->VertexProgram
.Parameters
[pi
][0];
125 *dst
++ = ctx
->VertexProgram
.Parameters
[pi
][1];
126 *dst
++ = ctx
->VertexProgram
.Parameters
[pi
][2];
127 *dst
++ = ctx
->VertexProgram
.Parameters
[pi
][3];
132 assert(mesa_vp
->Base
.Parameters
);
133 _mesa_load_state_parameters(ctx
, mesa_vp
->Base
.Parameters
);
135 if (mesa_vp
->Base
.Parameters
->NumParameters
* 4 >
136 VSF_MAX_FRAGMENT_LENGTH
) {
137 fprintf(stderr
, "%s:Params exhausted\n", __FUNCTION__
);
141 paramList
= mesa_vp
->Base
.Parameters
;
142 for (pi
= 0; pi
< paramList
->NumParameters
; pi
++) {
143 switch (paramList
->Parameters
[pi
].Type
) {
145 case PROGRAM_STATE_VAR
:
146 case PROGRAM_NAMED_PARAM
:
147 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
148 case PROGRAM_CONSTANT
:
149 *dst
++ = paramList
->ParameterValues
[pi
][0];
150 *dst
++ = paramList
->ParameterValues
[pi
][1];
151 *dst
++ = paramList
->ParameterValues
[pi
][2];
152 *dst
++ = paramList
->ParameterValues
[pi
][3];
156 _mesa_problem(NULL
, "Bad param type in %s",
165 static unsigned long t_dst_mask(GLuint mask
)
167 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
168 return mask
& VSF_FLAG_ALL
;
171 static unsigned long t_dst_class(enum register_file file
)
175 case PROGRAM_TEMPORARY
:
176 return VSF_OUT_CLASS_TMP
;
178 return VSF_OUT_CLASS_RESULT
;
179 case PROGRAM_ADDRESS
:
180 return VSF_OUT_CLASS_ADDR
;
183 case PROGRAM_LOCAL_PARAM:
184 case PROGRAM_ENV_PARAM:
185 case PROGRAM_NAMED_PARAM:
186 case PROGRAM_STATE_VAR:
187 case PROGRAM_WRITE_ONLY:
188 case PROGRAM_ADDRESS:
191 fprintf(stderr
, "problem in %s", __FUNCTION__
);
197 static unsigned long t_dst_index(struct r300_vertex_program
*vp
,
198 struct prog_dst_register
*dst
)
200 if (dst
->File
== PROGRAM_OUTPUT
)
201 return vp
->outputs
[dst
->Index
];
206 static unsigned long t_src_class(enum register_file file
)
210 case PROGRAM_TEMPORARY
:
211 return VSF_IN_CLASS_TMP
;
214 return VSF_IN_CLASS_ATTR
;
216 case PROGRAM_LOCAL_PARAM
:
217 case PROGRAM_ENV_PARAM
:
218 case PROGRAM_NAMED_PARAM
:
219 case PROGRAM_STATE_VAR
:
220 return VSF_IN_CLASS_PARAM
;
223 case PROGRAM_WRITE_ONLY:
224 case PROGRAM_ADDRESS:
227 fprintf(stderr
, "problem in %s", __FUNCTION__
);
233 static inline unsigned long t_swizzle(GLubyte swizzle
)
235 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
240 static void vp_dump_inputs(struct r300_vertex_program
*vp
, char *caller
)
245 fprintf(stderr
, "vp null in call to %s from %s\n",
246 __FUNCTION__
, caller
);
250 fprintf(stderr
, "%s:<", caller
);
251 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++)
252 fprintf(stderr
, "%d ", vp
->inputs
[i
]);
253 fprintf(stderr
, ">\n");
258 static unsigned long t_src_index(struct r300_vertex_program
*vp
,
259 struct prog_src_register
*src
)
264 if (src
->File
== PROGRAM_INPUT
) {
265 if (vp
->inputs
[src
->Index
] != -1)
266 return vp
->inputs
[src
->Index
];
268 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++)
269 if (vp
->inputs
[i
] > max_reg
)
270 max_reg
= vp
->inputs
[i
];
272 vp
->inputs
[src
->Index
] = max_reg
+ 1;
274 //vp_dump_inputs(vp, __FUNCTION__);
276 return vp
->inputs
[src
->Index
];
278 if (src
->Index
< 0) {
280 "negative offsets for indirect addressing do not work.\n");
287 /* these two functions should probably be merged... */
289 static unsigned long t_src(struct r300_vertex_program
*vp
,
290 struct prog_src_register
*src
)
292 /* src->NegateBase uses the NEGATE_ flags from program_instruction.h,
293 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
295 return MAKE_VSF_SOURCE(t_src_index(vp
, src
),
296 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
297 t_swizzle(GET_SWZ(src
->Swizzle
, 1)),
298 t_swizzle(GET_SWZ(src
->Swizzle
, 2)),
299 t_swizzle(GET_SWZ(src
->Swizzle
, 3)),
300 t_src_class(src
->File
),
301 src
->NegateBase
) | (src
->RelAddr
<< 4);
304 static unsigned long t_src_scalar(struct r300_vertex_program
*vp
,
305 struct prog_src_register
*src
)
307 /* src->NegateBase uses the NEGATE_ flags from program_instruction.h,
308 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
310 return MAKE_VSF_SOURCE(t_src_index(vp
, src
),
311 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
312 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
313 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
314 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
315 t_src_class(src
->File
),
317 NegateBase
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
321 static GLboolean
valid_dst(struct r300_vertex_program
*vp
,
322 struct prog_dst_register
*dst
)
324 if (dst
->File
== PROGRAM_OUTPUT
&& vp
->outputs
[dst
->Index
] == -1) {
326 } else if (dst
->File
== PROGRAM_ADDRESS
) {
327 assert(dst
->Index
== 0);
334 * Instruction Inputs Output Description
335 * ----------- ------ ------ --------------------------------
336 * ABS v v absolute value
338 * ARL s a address register load
339 * DP3 v,v ssss 3-component dot product
340 * DP4 v,v ssss 4-component dot product
341 * DPH v,v ssss homogeneous dot product
342 * DST v,v v distance vector
343 * EX2 s ssss exponential base 2
344 * EXP s v exponential base 2 (approximate)
347 * LG2 s ssss logarithm base 2
348 * LIT v v compute light coefficients
349 * LOG s v logarithm base 2 (approximate)
350 * MAD v,v,v v multiply and add
355 * POW s,s ssss exponentiate
356 * RCP s ssss reciprocal
357 * RSQ s ssss reciprocal square root
358 * SGE v,v v set on greater than or equal
359 * SLT v,v v set on less than
361 * SWZ v v extended swizzle
362 * XPD v,v v cross product
364 * Table X.5: Summary of vertex program instructions. "v" indicates a
365 * floating-point vector input or output, "s" indicates a floating-point
366 * scalar input, "ssss" indicates a scalar output replicated across a
367 * 4-component result vector, and "a" indicates a single address register
371 static GLuint
*t_opcode_abs(struct r300_vertex_program
*vp
,
372 struct prog_instruction
*vpi
, GLuint
* inst
,
373 struct prog_src_register src
[3])
375 //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
378 MAKE_VSF_OP(R300_VPI_OUT_OP_MAX
, t_dst_index(vp
, &vpi
->DstReg
),
379 t_dst_mask(vpi
->DstReg
.WriteMask
),
380 t_dst_class(vpi
->DstReg
.File
));
382 inst
[1] = t_src(vp
, &src
[0]);
384 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
385 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
386 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
387 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
388 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)),
389 t_src_class(src
[0].File
),
391 NegateBase
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
392 (src
[0].RelAddr
<< 4);
398 static GLuint
*t_opcode_add(struct r300_vertex_program
*vp
,
399 struct prog_instruction
*vpi
, GLuint
* inst
,
400 struct prog_src_register src
[3])
405 hw_op
= (src
[0].File
== PROGRAM_TEMPORARY
407 PROGRAM_TEMPORARY
) ? R300_VPI_OUT_OP_MAD_2
:
411 MAKE_VSF_OP(hw_op
, t_dst_index(vp
, &vpi
->DstReg
),
412 t_dst_mask(vpi
->DstReg
.WriteMask
),
413 t_dst_class(vpi
->DstReg
.File
));
415 inst
[2] = t_src(vp
, &src
[0]);
416 inst
[3] = t_src(vp
, &src
[1]);
419 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD
, t_dst_index(vp
, &vpi
->DstReg
),
420 t_dst_mask(vpi
->DstReg
.WriteMask
),
421 t_dst_class(vpi
->DstReg
.File
));
422 inst
[1] = t_src(vp
, &src
[0]);
423 inst
[2] = t_src(vp
, &src
[1]);
424 inst
[3] = ZERO_SRC_1
;
431 static GLuint
*t_opcode_arl(struct r300_vertex_program
*vp
,
432 struct prog_instruction
*vpi
, GLuint
* inst
,
433 struct prog_src_register src
[3])
436 MAKE_VSF_OP(R300_VPI_OUT_OP_ARL
, t_dst_index(vp
, &vpi
->DstReg
),
437 t_dst_mask(vpi
->DstReg
.WriteMask
),
438 t_dst_class(vpi
->DstReg
.File
));
440 inst
[1] = t_src(vp
, &src
[0]);
441 inst
[2] = ZERO_SRC_0
;
442 inst
[3] = ZERO_SRC_0
;
447 static GLuint
*t_opcode_dp3(struct r300_vertex_program
*vp
,
448 struct prog_instruction
*vpi
, GLuint
* inst
,
449 struct prog_src_register src
[3])
451 //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
454 MAKE_VSF_OP(R300_VPI_OUT_OP_DOT
, t_dst_index(vp
, &vpi
->DstReg
),
455 t_dst_mask(vpi
->DstReg
.WriteMask
),
456 t_dst_class(vpi
->DstReg
.File
));
459 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
460 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
461 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
462 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
463 SWIZZLE_ZERO
, t_src_class(src
[0].File
),
465 NegateBase
? VSF_FLAG_XYZ
: VSF_FLAG_NONE
) |
466 (src
[0].RelAddr
<< 4);
469 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
470 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
471 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)),
472 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)),
473 SWIZZLE_ZERO
, t_src_class(src
[1].File
),
475 NegateBase
? VSF_FLAG_XYZ
: VSF_FLAG_NONE
) |
476 (src
[1].RelAddr
<< 4);
478 inst
[3] = ZERO_SRC_1
;
483 static GLuint
*t_opcode_dp4(struct r300_vertex_program
*vp
,
484 struct prog_instruction
*vpi
, GLuint
* inst
,
485 struct prog_src_register src
[3])
488 MAKE_VSF_OP(R300_VPI_OUT_OP_DOT
, t_dst_index(vp
, &vpi
->DstReg
),
489 t_dst_mask(vpi
->DstReg
.WriteMask
),
490 t_dst_class(vpi
->DstReg
.File
));
492 inst
[1] = t_src(vp
, &src
[0]);
493 inst
[2] = t_src(vp
, &src
[1]);
494 inst
[3] = ZERO_SRC_1
;
499 static GLuint
*t_opcode_dph(struct r300_vertex_program
*vp
,
500 struct prog_instruction
*vpi
, GLuint
* inst
,
501 struct prog_src_register src
[3])
503 //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
505 MAKE_VSF_OP(R300_VPI_OUT_OP_DOT
, t_dst_index(vp
, &vpi
->DstReg
),
506 t_dst_mask(vpi
->DstReg
.WriteMask
),
507 t_dst_class(vpi
->DstReg
.File
));
510 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
511 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
512 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
513 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
514 VSF_IN_COMPONENT_ONE
, t_src_class(src
[0].File
),
516 NegateBase
? VSF_FLAG_XYZ
: VSF_FLAG_NONE
) |
517 (src
[0].RelAddr
<< 4);
518 inst
[2] = t_src(vp
, &src
[1]);
519 inst
[3] = ZERO_SRC_1
;
524 static GLuint
*t_opcode_dst(struct r300_vertex_program
*vp
,
525 struct prog_instruction
*vpi
, GLuint
* inst
,
526 struct prog_src_register src
[3])
529 MAKE_VSF_OP(R300_VPI_OUT_OP_DST
, t_dst_index(vp
, &vpi
->DstReg
),
530 t_dst_mask(vpi
->DstReg
.WriteMask
),
531 t_dst_class(vpi
->DstReg
.File
));
533 inst
[1] = t_src(vp
, &src
[0]);
534 inst
[2] = t_src(vp
, &src
[1]);
535 inst
[3] = ZERO_SRC_1
;
540 static GLuint
*t_opcode_ex2(struct r300_vertex_program
*vp
,
541 struct prog_instruction
*vpi
, GLuint
* inst
,
542 struct prog_src_register src
[3])
545 MAKE_VSF_OP(R300_VPI_OUT_OP_EX2
, t_dst_index(vp
, &vpi
->DstReg
),
546 t_dst_mask(vpi
->DstReg
.WriteMask
),
547 t_dst_class(vpi
->DstReg
.File
));
549 inst
[1] = t_src_scalar(vp
, &src
[0]);
550 inst
[2] = ZERO_SRC_0
;
551 inst
[3] = ZERO_SRC_0
;
556 static GLuint
*t_opcode_exp(struct r300_vertex_program
*vp
,
557 struct prog_instruction
*vpi
, GLuint
* inst
,
558 struct prog_src_register src
[3])
561 MAKE_VSF_OP(R300_VPI_OUT_OP_EXP
, t_dst_index(vp
, &vpi
->DstReg
),
562 t_dst_mask(vpi
->DstReg
.WriteMask
),
563 t_dst_class(vpi
->DstReg
.File
));
565 inst
[1] = t_src_scalar(vp
, &src
[0]);
566 inst
[2] = ZERO_SRC_0
;
567 inst
[3] = ZERO_SRC_0
;
572 static GLuint
*t_opcode_flr(struct r300_vertex_program
*vp
,
573 struct prog_instruction
*vpi
, GLuint
* inst
,
574 struct prog_src_register src
[3], int *u_temp_i
)
576 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
577 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
580 MAKE_VSF_OP(R300_VPI_OUT_OP_FRC
, *u_temp_i
,
581 t_dst_mask(vpi
->DstReg
.WriteMask
),
584 inst
[1] = t_src(vp
, &src
[0]);
585 inst
[2] = ZERO_SRC_0
;
586 inst
[3] = ZERO_SRC_0
;
590 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD
, t_dst_index(vp
, &vpi
->DstReg
),
591 t_dst_mask(vpi
->DstReg
.WriteMask
),
592 t_dst_class(vpi
->DstReg
.File
));
594 inst
[1] = t_src(vp
, &src
[0]);
596 MAKE_VSF_SOURCE(*u_temp_i
, VSF_IN_COMPONENT_X
,
597 VSF_IN_COMPONENT_Y
, VSF_IN_COMPONENT_Z
,
598 VSF_IN_COMPONENT_W
, VSF_IN_CLASS_TMP
,
599 /* Not 100% sure about this */
601 NegateBase
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
604 inst
[3] = ZERO_SRC_0
;
610 static GLuint
*t_opcode_frc(struct r300_vertex_program
*vp
,
611 struct prog_instruction
*vpi
, GLuint
* inst
,
612 struct prog_src_register src
[3])
615 MAKE_VSF_OP(R300_VPI_OUT_OP_FRC
, t_dst_index(vp
, &vpi
->DstReg
),
616 t_dst_mask(vpi
->DstReg
.WriteMask
),
617 t_dst_class(vpi
->DstReg
.File
));
619 inst
[1] = t_src(vp
, &src
[0]);
620 inst
[2] = ZERO_SRC_0
;
621 inst
[3] = ZERO_SRC_0
;
626 static GLuint
*t_opcode_lg2(struct r300_vertex_program
*vp
,
627 struct prog_instruction
*vpi
, GLuint
* inst
,
628 struct prog_src_register src
[3])
630 // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
633 MAKE_VSF_OP(R300_VPI_OUT_OP_LG2
, t_dst_index(vp
, &vpi
->DstReg
),
634 t_dst_mask(vpi
->DstReg
.WriteMask
),
635 t_dst_class(vpi
->DstReg
.File
));
638 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
639 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
640 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
641 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
642 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
643 t_src_class(src
[0].File
),
645 NegateBase
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
646 (src
[0].RelAddr
<< 4);
647 inst
[2] = ZERO_SRC_0
;
648 inst
[3] = ZERO_SRC_0
;
653 static GLuint
*t_opcode_lit(struct r300_vertex_program
*vp
,
654 struct prog_instruction
*vpi
, GLuint
* inst
,
655 struct prog_src_register src
[3])
657 //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
660 MAKE_VSF_OP(R300_VPI_OUT_OP_LIT
, t_dst_index(vp
, &vpi
->DstReg
),
661 t_dst_mask(vpi
->DstReg
.WriteMask
),
662 t_dst_class(vpi
->DstReg
.File
));
663 /* NOTE: Users swizzling might not work. */
664 inst
[1] = MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
665 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
666 VSF_IN_COMPONENT_ZERO
, // z
667 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
668 t_src_class(src
[0].File
),
670 NegateBase
? VSF_FLAG_ALL
:
671 VSF_FLAG_NONE
) | (src
[0].RelAddr
<< 4);
672 inst
[2] = MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
673 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
674 VSF_IN_COMPONENT_ZERO
, // z
675 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
676 t_src_class(src
[0].File
),
678 NegateBase
? VSF_FLAG_ALL
:
679 VSF_FLAG_NONE
) | (src
[0].RelAddr
<< 4);
680 inst
[3] = MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
681 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
682 VSF_IN_COMPONENT_ZERO
, // z
683 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
684 t_src_class(src
[0].File
),
686 NegateBase
? VSF_FLAG_ALL
:
687 VSF_FLAG_NONE
) | (src
[0].RelAddr
<< 4);
692 static GLuint
*t_opcode_log(struct r300_vertex_program
*vp
,
693 struct prog_instruction
*vpi
, GLuint
* inst
,
694 struct prog_src_register src
[3])
697 MAKE_VSF_OP(R300_VPI_OUT_OP_LOG
, t_dst_index(vp
, &vpi
->DstReg
),
698 t_dst_mask(vpi
->DstReg
.WriteMask
),
699 t_dst_class(vpi
->DstReg
.File
));
701 inst
[1] = t_src_scalar(vp
, &src
[0]);
702 inst
[2] = ZERO_SRC_0
;
703 inst
[3] = ZERO_SRC_0
;
708 static GLuint
*t_opcode_mad(struct r300_vertex_program
*vp
,
709 struct prog_instruction
*vpi
, GLuint
* inst
,
710 struct prog_src_register src
[3])
714 hw_op
= (src
[0].File
== PROGRAM_TEMPORARY
715 && src
[1].File
== PROGRAM_TEMPORARY
717 PROGRAM_TEMPORARY
) ? R300_VPI_OUT_OP_MAD_2
:
721 MAKE_VSF_OP(hw_op
, t_dst_index(vp
, &vpi
->DstReg
),
722 t_dst_mask(vpi
->DstReg
.WriteMask
),
723 t_dst_class(vpi
->DstReg
.File
));
724 inst
[1] = t_src(vp
, &src
[0]);
725 inst
[2] = t_src(vp
, &src
[1]);
726 inst
[3] = t_src(vp
, &src
[2]);
731 static GLuint
*t_opcode_max(struct r300_vertex_program
*vp
,
732 struct prog_instruction
*vpi
, GLuint
* inst
,
733 struct prog_src_register src
[3])
736 MAKE_VSF_OP(R300_VPI_OUT_OP_MAX
, t_dst_index(vp
, &vpi
->DstReg
),
737 t_dst_mask(vpi
->DstReg
.WriteMask
),
738 t_dst_class(vpi
->DstReg
.File
));
740 inst
[1] = t_src(vp
, &src
[0]);
741 inst
[2] = t_src(vp
, &src
[1]);
742 inst
[3] = ZERO_SRC_1
;
747 static GLuint
*t_opcode_min(struct r300_vertex_program
*vp
,
748 struct prog_instruction
*vpi
, GLuint
* inst
,
749 struct prog_src_register src
[3])
752 MAKE_VSF_OP(R300_VPI_OUT_OP_MIN
, t_dst_index(vp
, &vpi
->DstReg
),
753 t_dst_mask(vpi
->DstReg
.WriteMask
),
754 t_dst_class(vpi
->DstReg
.File
));
756 inst
[1] = t_src(vp
, &src
[0]);
757 inst
[2] = t_src(vp
, &src
[1]);
758 inst
[3] = ZERO_SRC_1
;
763 static GLuint
*t_opcode_mov(struct r300_vertex_program
*vp
,
764 struct prog_instruction
*vpi
, GLuint
* inst
,
765 struct prog_src_register src
[3])
767 //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
771 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD
, t_dst_index(vp
, &vpi
->DstReg
),
772 t_dst_mask(vpi
->DstReg
.WriteMask
),
773 t_dst_class(vpi
->DstReg
.File
));
774 inst
[1] = t_src(vp
, &src
[0]);
775 inst
[2] = ZERO_SRC_0
;
776 inst
[3] = ZERO_SRC_0
;
780 PROGRAM_TEMPORARY
) ? R300_VPI_OUT_OP_MAD_2
:
784 MAKE_VSF_OP(hw_op
, t_dst_index(vp
, &vpi
->DstReg
),
785 t_dst_mask(vpi
->DstReg
.WriteMask
),
786 t_dst_class(vpi
->DstReg
.File
));
787 inst
[1] = t_src(vp
, &src
[0]);
789 inst
[3] = ZERO_SRC_0
;
795 static GLuint
*t_opcode_mul(struct r300_vertex_program
*vp
,
796 struct prog_instruction
*vpi
, GLuint
* inst
,
797 struct prog_src_register src
[3])
801 // HW mul can take third arg but appears to have some other limitations.
803 hw_op
= (src
[0].File
== PROGRAM_TEMPORARY
805 PROGRAM_TEMPORARY
) ? R300_VPI_OUT_OP_MAD_2
:
809 MAKE_VSF_OP(hw_op
, t_dst_index(vp
, &vpi
->DstReg
),
810 t_dst_mask(vpi
->DstReg
.WriteMask
),
811 t_dst_class(vpi
->DstReg
.File
));
812 inst
[1] = t_src(vp
, &src
[0]);
813 inst
[2] = t_src(vp
, &src
[1]);
815 inst
[3] = ZERO_SRC_1
;
820 static GLuint
*t_opcode_pow(struct r300_vertex_program
*vp
,
821 struct prog_instruction
*vpi
, GLuint
* inst
,
822 struct prog_src_register src
[3])
825 MAKE_VSF_OP(R300_VPI_OUT_OP_POW
, t_dst_index(vp
, &vpi
->DstReg
),
826 t_dst_mask(vpi
->DstReg
.WriteMask
),
827 t_dst_class(vpi
->DstReg
.File
));
828 inst
[1] = t_src_scalar(vp
, &src
[0]);
829 inst
[2] = ZERO_SRC_0
;
830 inst
[3] = t_src_scalar(vp
, &src
[1]);
835 static GLuint
*t_opcode_rcp(struct r300_vertex_program
*vp
,
836 struct prog_instruction
*vpi
, GLuint
* inst
,
837 struct prog_src_register src
[3])
840 MAKE_VSF_OP(R300_VPI_OUT_OP_RCP
, t_dst_index(vp
, &vpi
->DstReg
),
841 t_dst_mask(vpi
->DstReg
.WriteMask
),
842 t_dst_class(vpi
->DstReg
.File
));
844 inst
[1] = t_src_scalar(vp
, &src
[0]);
845 inst
[2] = ZERO_SRC_0
;
846 inst
[3] = ZERO_SRC_0
;
851 static GLuint
*t_opcode_rsq(struct r300_vertex_program
*vp
,
852 struct prog_instruction
*vpi
, GLuint
* inst
,
853 struct prog_src_register src
[3])
856 MAKE_VSF_OP(R300_VPI_OUT_OP_RSQ
, t_dst_index(vp
, &vpi
->DstReg
),
857 t_dst_mask(vpi
->DstReg
.WriteMask
),
858 t_dst_class(vpi
->DstReg
.File
));
860 inst
[1] = t_src_scalar(vp
, &src
[0]);
861 inst
[2] = ZERO_SRC_0
;
862 inst
[3] = ZERO_SRC_0
;
867 static GLuint
*t_opcode_sge(struct r300_vertex_program
*vp
,
868 struct prog_instruction
*vpi
, GLuint
* inst
,
869 struct prog_src_register src
[3])
872 MAKE_VSF_OP(R300_VPI_OUT_OP_SGE
, t_dst_index(vp
, &vpi
->DstReg
),
873 t_dst_mask(vpi
->DstReg
.WriteMask
),
874 t_dst_class(vpi
->DstReg
.File
));
876 inst
[1] = t_src(vp
, &src
[0]);
877 inst
[2] = t_src(vp
, &src
[1]);
878 inst
[3] = ZERO_SRC_1
;
883 static GLuint
*t_opcode_slt(struct r300_vertex_program
*vp
,
884 struct prog_instruction
*vpi
, GLuint
* inst
,
885 struct prog_src_register src
[3])
888 MAKE_VSF_OP(R300_VPI_OUT_OP_SLT
, t_dst_index(vp
, &vpi
->DstReg
),
889 t_dst_mask(vpi
->DstReg
.WriteMask
),
890 t_dst_class(vpi
->DstReg
.File
));
892 inst
[1] = t_src(vp
, &src
[0]);
893 inst
[2] = t_src(vp
, &src
[1]);
894 inst
[3] = ZERO_SRC_1
;
899 static GLuint
*t_opcode_sub(struct r300_vertex_program
*vp
,
900 struct prog_instruction
*vpi
, GLuint
* inst
,
901 struct prog_src_register src
[3])
905 //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
908 hw_op
= (src
[0].File
== PROGRAM_TEMPORARY
910 PROGRAM_TEMPORARY
) ? R300_VPI_OUT_OP_MAD_2
:
914 MAKE_VSF_OP(hw_op
, t_dst_index(vp
, &vpi
->DstReg
),
915 t_dst_mask(vpi
->DstReg
.WriteMask
),
916 t_dst_class(vpi
->DstReg
.File
));
917 inst
[1] = t_src(vp
, &src
[0]);
920 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
921 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
922 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)),
923 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)),
924 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)),
925 t_src_class(src
[1].File
),
927 NegateBase
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
928 (src
[1].RelAddr
<< 4);
931 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD
, t_dst_index(vp
, &vpi
->DstReg
),
932 t_dst_mask(vpi
->DstReg
.WriteMask
),
933 t_dst_class(vpi
->DstReg
.File
));
935 inst
[1] = t_src(vp
, &src
[0]);
937 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
938 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
939 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)),
940 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)),
941 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)),
942 t_src_class(src
[1].File
),
944 NegateBase
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
945 (src
[1].RelAddr
<< 4);
952 static GLuint
*t_opcode_swz(struct r300_vertex_program
*vp
,
953 struct prog_instruction
*vpi
, GLuint
* inst
,
954 struct prog_src_register src
[3])
956 //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
960 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD
, t_dst_index(vp
, &vpi
->DstReg
),
961 t_dst_mask(vpi
->DstReg
.WriteMask
),
962 t_dst_class(vpi
->DstReg
.File
));
963 inst
[1] = t_src(vp
, &src
[0]);
964 inst
[2] = ZERO_SRC_0
;
965 inst
[3] = ZERO_SRC_0
;
969 PROGRAM_TEMPORARY
) ? R300_VPI_OUT_OP_MAD_2
:
973 MAKE_VSF_OP(hw_op
, t_dst_index(vp
, &vpi
->DstReg
),
974 t_dst_mask(vpi
->DstReg
.WriteMask
),
975 t_dst_class(vpi
->DstReg
.File
));
976 inst
[1] = t_src(vp
, &src
[0]);
978 inst
[3] = ZERO_SRC_0
;
984 static GLuint
*t_opcode_xpd(struct r300_vertex_program
*vp
,
985 struct prog_instruction
*vpi
, GLuint
* inst
,
986 struct prog_src_register src
[3], int *u_temp_i
)
988 /* mul r0, r1.yzxw, r2.zxyw
989 mad r0, -r2.yzxw, r1.zxyw, r0
990 NOTE: might need MAD_2
994 MAKE_VSF_OP(R300_VPI_OUT_OP_MAD
, *u_temp_i
,
995 t_dst_mask(vpi
->DstReg
.WriteMask
),
998 inst
[1] = MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
999 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)), // z
1000 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
1001 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
1002 t_src_class(src
[0].File
),
1004 NegateBase
? VSF_FLAG_ALL
:
1005 VSF_FLAG_NONE
) | (src
[0].RelAddr
<< 4);
1007 inst
[2] = MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]), t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), // z
1008 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), // x
1009 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)), // y
1010 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)), // w
1011 t_src_class(src
[1].File
),
1013 NegateBase
? VSF_FLAG_ALL
:
1014 VSF_FLAG_NONE
) | (src
[1].RelAddr
<< 4);
1016 inst
[3] = ZERO_SRC_1
;
1021 MAKE_VSF_OP(R300_VPI_OUT_OP_MAD
, t_dst_index(vp
, &vpi
->DstReg
),
1022 t_dst_mask(vpi
->DstReg
.WriteMask
),
1023 t_dst_class(vpi
->DstReg
.File
));
1025 inst
[1] = MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]), t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)), // y
1026 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), // z
1027 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), // x
1028 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)), // w
1029 t_src_class(src
[1].File
),
1031 NegateBase
) ? VSF_FLAG_ALL
:
1032 VSF_FLAG_NONE
) | (src
[1].RelAddr
<< 4);
1034 inst
[2] = MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)), // z
1035 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
1036 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
1037 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
1038 t_src_class(src
[0].File
),
1040 NegateBase
? VSF_FLAG_ALL
:
1041 VSF_FLAG_NONE
) | (src
[0].RelAddr
<< 4);
1044 MAKE_VSF_SOURCE(*u_temp_i
+ 1, VSF_IN_COMPONENT_X
,
1045 VSF_IN_COMPONENT_Y
, VSF_IN_COMPONENT_Z
,
1046 VSF_IN_COMPONENT_W
, VSF_IN_CLASS_TMP
,
1052 static void t_inputs_outputs(struct r300_vertex_program
*vp
)
1057 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++)
1060 for (i
= 0; i
< VERT_RESULT_MAX
; i
++)
1061 vp
->outputs
[i
] = -1;
1063 assert(vp
->key
.OutputsWritten
& (1 << VERT_RESULT_HPOS
));
1065 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_HPOS
)) {
1066 vp
->outputs
[VERT_RESULT_HPOS
] = cur_reg
++;
1069 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_PSIZ
)) {
1070 vp
->outputs
[VERT_RESULT_PSIZ
] = cur_reg
++;
1073 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_COL0
)) {
1074 vp
->outputs
[VERT_RESULT_COL0
] = cur_reg
++;
1077 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_COL1
)) {
1078 vp
->outputs
[VERT_RESULT_COL1
] =
1079 vp
->outputs
[VERT_RESULT_COL0
] + 1;
1080 cur_reg
= vp
->outputs
[VERT_RESULT_COL1
] + 1;
1083 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_BFC0
)) {
1084 vp
->outputs
[VERT_RESULT_BFC0
] =
1085 vp
->outputs
[VERT_RESULT_COL0
] + 2;
1086 cur_reg
= vp
->outputs
[VERT_RESULT_BFC0
] + 2;
1089 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
1090 vp
->outputs
[VERT_RESULT_BFC1
] =
1091 vp
->outputs
[VERT_RESULT_COL0
] + 3;
1092 cur_reg
= vp
->outputs
[VERT_RESULT_BFC1
] + 1;
1095 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_FOGC
)) {
1096 vp
->outputs
[VERT_RESULT_FOGC
] = cur_reg
++;
1100 for (i
= VERT_RESULT_TEX0
; i
<= VERT_RESULT_TEX7
; i
++) {
1101 if (vp
->key
.OutputsWritten
& (1 << i
)) {
1102 vp
->outputs
[i
] = cur_reg
++;
1107 static void r300TranslateVertexShader(struct r300_vertex_program
*vp
,
1108 struct prog_instruction
*vpi
)
1112 unsigned long num_operands
;
1113 /* Initial value should be last tmp reg that hw supports.
1114 Strangely enough r300 doesnt mind even though these would be out of range.
1115 Smart enough to realize that it doesnt need it? */
1116 int u_temp_i
= VSF_MAX_FRAGMENT_TEMPS
- 1;
1117 struct prog_src_register src
[3];
1119 vp
->pos_end
= 0; /* Not supported yet */
1120 vp
->program
.length
= 0;
1121 /*vp->num_temporaries=mesa_vp->Base.NumTemporaries; */
1122 vp
->translated
= GL_TRUE
;
1123 vp
->native
= GL_TRUE
;
1125 t_inputs_outputs(vp
);
1127 for (inst
= vp
->program
.body
.i
; vpi
->Opcode
!= OPCODE_END
;
1132 if (!valid_dst(vp
, &vpi
->DstReg
)) {
1133 /* redirect result to unused temp */
1134 vpi
->DstReg
.File
= PROGRAM_TEMPORARY
;
1135 vpi
->DstReg
.Index
= u_temp_i
;
1138 num_operands
= _mesa_num_inst_src_regs(vpi
->Opcode
);
1140 /* copy the sources (src) from mesa into a local variable... is this needed? */
1141 for (i
= 0; i
< num_operands
; i
++) {
1142 src
[i
] = vpi
->SrcReg
[i
];
1145 if (num_operands
== 3) { /* TODO: scalars */
1146 if (CMP_SRCS(src
[1], src
[2])
1147 || CMP_SRCS(src
[0], src
[2])) {
1149 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD
,
1150 u_temp_i
, VSF_FLAG_ALL
,
1154 MAKE_VSF_SOURCE(t_src_index
1156 SWIZZLE_X
, SWIZZLE_Y
,
1157 SWIZZLE_Z
, SWIZZLE_W
,
1161 (src
[2].RelAddr
<< 4);
1163 inst
[2] = ZERO_SRC_2
;
1164 inst
[3] = ZERO_SRC_2
;
1167 src
[2].File
= PROGRAM_TEMPORARY
;
1168 src
[2].Index
= u_temp_i
;
1174 if (num_operands
>= 2) {
1175 if (CMP_SRCS(src
[1], src
[0])) {
1177 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD
,
1178 u_temp_i
, VSF_FLAG_ALL
,
1182 MAKE_VSF_SOURCE(t_src_index
1184 SWIZZLE_X
, SWIZZLE_Y
,
1185 SWIZZLE_Z
, SWIZZLE_W
,
1189 (src
[0].RelAddr
<< 4);
1191 inst
[2] = ZERO_SRC_0
;
1192 inst
[3] = ZERO_SRC_0
;
1195 src
[0].File
= PROGRAM_TEMPORARY
;
1196 src
[0].Index
= u_temp_i
;
1202 switch (vpi
->Opcode
) {
1204 inst
= t_opcode_abs(vp
, vpi
, inst
, src
);
1207 inst
= t_opcode_add(vp
, vpi
, inst
, src
);
1210 inst
= t_opcode_arl(vp
, vpi
, inst
, src
);
1213 inst
= t_opcode_dp3(vp
, vpi
, inst
, src
);
1216 inst
= t_opcode_dp4(vp
, vpi
, inst
, src
);
1219 inst
= t_opcode_dph(vp
, vpi
, inst
, src
);
1222 inst
= t_opcode_dst(vp
, vpi
, inst
, src
);
1225 inst
= t_opcode_ex2(vp
, vpi
, inst
, src
);
1228 inst
= t_opcode_exp(vp
, vpi
, inst
, src
);
1232 t_opcode_flr(vp
, vpi
, inst
, src
, /* FIXME */
1236 inst
= t_opcode_frc(vp
, vpi
, inst
, src
);
1239 inst
= t_opcode_lg2(vp
, vpi
, inst
, src
);
1242 inst
= t_opcode_lit(vp
, vpi
, inst
, src
);
1245 inst
= t_opcode_log(vp
, vpi
, inst
, src
);
1248 inst
= t_opcode_mad(vp
, vpi
, inst
, src
);
1251 inst
= t_opcode_max(vp
, vpi
, inst
, src
);
1254 inst
= t_opcode_min(vp
, vpi
, inst
, src
);
1257 inst
= t_opcode_mov(vp
, vpi
, inst
, src
);
1260 inst
= t_opcode_mul(vp
, vpi
, inst
, src
);
1263 inst
= t_opcode_pow(vp
, vpi
, inst
, src
);
1266 inst
= t_opcode_rcp(vp
, vpi
, inst
, src
);
1269 inst
= t_opcode_rsq(vp
, vpi
, inst
, src
);
1272 inst
= t_opcode_sge(vp
, vpi
, inst
, src
);
1275 inst
= t_opcode_slt(vp
, vpi
, inst
, src
);
1278 inst
= t_opcode_sub(vp
, vpi
, inst
, src
);
1281 inst
= t_opcode_swz(vp
, vpi
, inst
, src
);
1285 t_opcode_xpd(vp
, vpi
, inst
, src
, /* FIXME */
1294 vp
->program
.length
= (inst
- vp
->program
.body
.i
);
1295 if (vp
->program
.length
>= VSF_MAX_FRAGMENT_LENGTH
) {
1296 vp
->program
.length
= 0;
1297 vp
->native
= GL_FALSE
;
1300 fprintf(stderr
, "hw program:\n");
1301 for (i
= 0; i
< vp
->program
.length
; i
++)
1302 fprintf(stderr
, "%08x\n", vp
->program
.body
.d
[i
]);
1306 static void position_invariant(struct gl_program
*prog
)
1308 struct prog_instruction
*vpi
;
1309 struct gl_program_parameter_list
*paramList
;
1312 gl_state_index tokens
[STATE_LENGTH
] =
1313 { STATE_MVP_MATRIX
, 0, 0, 0, 0 };
1315 /* tokens[4] = matrix modifier */
1317 tokens
[4] = 0; /* not transposed or inverted */
1319 tokens
[4] = STATE_MATRIX_TRANSPOSE
;
1321 paramList
= prog
->Parameters
;
1323 vpi
= _mesa_alloc_instructions(prog
->NumInstructions
+ 4);
1324 _mesa_init_instructions(vpi
, prog
->NumInstructions
+ 4);
1326 for (i
= 0; i
< 4; i
++) {
1328 tokens
[2] = tokens
[3] = i
; /* matrix row[i]..row[i] */
1329 idx
= _mesa_add_state_reference(paramList
, tokens
);
1331 vpi
[i
].Opcode
= OPCODE_DP4
;
1332 vpi
[i
].StringPos
= 0;
1335 vpi
[i
].DstReg
.File
= PROGRAM_OUTPUT
;
1336 vpi
[i
].DstReg
.Index
= VERT_RESULT_HPOS
;
1337 vpi
[i
].DstReg
.WriteMask
= 1 << i
;
1338 vpi
[i
].DstReg
.CondMask
= COND_TR
;
1340 vpi
[i
].SrcReg
[0].File
= PROGRAM_STATE_VAR
;
1341 vpi
[i
].SrcReg
[0].Index
= idx
;
1342 vpi
[i
].SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1344 vpi
[i
].SrcReg
[1].File
= PROGRAM_INPUT
;
1345 vpi
[i
].SrcReg
[1].Index
= VERT_ATTRIB_POS
;
1346 vpi
[i
].SrcReg
[1].Swizzle
= SWIZZLE_XYZW
;
1349 vpi
[i
].Opcode
= OPCODE_MUL
;
1351 vpi
[i
].Opcode
= OPCODE_MAD
;
1353 vpi
[i
].StringPos
= 0;
1357 vpi
[i
].DstReg
.File
= PROGRAM_OUTPUT
;
1359 vpi
[i
].DstReg
.File
= PROGRAM_TEMPORARY
;
1360 vpi
[i
].DstReg
.Index
= 0;
1361 vpi
[i
].DstReg
.WriteMask
= 0xf;
1362 vpi
[i
].DstReg
.CondMask
= COND_TR
;
1364 vpi
[i
].SrcReg
[0].File
= PROGRAM_STATE_VAR
;
1365 vpi
[i
].SrcReg
[0].Index
= idx
;
1366 vpi
[i
].SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1368 vpi
[i
].SrcReg
[1].File
= PROGRAM_INPUT
;
1369 vpi
[i
].SrcReg
[1].Index
= VERT_ATTRIB_POS
;
1370 vpi
[i
].SrcReg
[1].Swizzle
= MAKE_SWIZZLE4(i
, i
, i
, i
);
1373 vpi
[i
].SrcReg
[2].File
= PROGRAM_TEMPORARY
;
1374 vpi
[i
].SrcReg
[2].Index
= 0;
1375 vpi
[i
].SrcReg
[2].Swizzle
= SWIZZLE_XYZW
;
1380 _mesa_copy_instructions(&vpi
[i
], prog
->Instructions
,
1381 prog
->NumInstructions
);
1383 free(prog
->Instructions
);
1385 prog
->Instructions
= vpi
;
1387 prog
->NumInstructions
+= 4;
1388 vpi
= &prog
->Instructions
[prog
->NumInstructions
- 1];
1390 assert(vpi
->Opcode
== OPCODE_END
);
1393 static void insert_wpos(struct r300_vertex_program
*vp
,
1394 struct gl_program
*prog
, GLuint temp_index
)
1396 struct prog_instruction
*vpi
;
1397 struct prog_instruction
*vpi_insert
;
1400 vpi
= _mesa_alloc_instructions(prog
->NumInstructions
+ 2);
1401 _mesa_init_instructions(vpi
, prog
->NumInstructions
+ 2);
1403 _mesa_copy_instructions(vpi
, prog
->Instructions
,
1404 prog
->NumInstructions
- 1);
1406 _mesa_copy_instructions(&vpi
[prog
->NumInstructions
+ 1],
1407 &prog
->Instructions
[prog
->NumInstructions
-
1409 vpi_insert
= &vpi
[prog
->NumInstructions
- 1];
1411 vpi_insert
[i
].Opcode
= OPCODE_MOV
;
1413 vpi_insert
[i
].DstReg
.File
= PROGRAM_OUTPUT
;
1414 vpi_insert
[i
].DstReg
.Index
= VERT_RESULT_HPOS
;
1415 vpi_insert
[i
].DstReg
.WriteMask
= WRITEMASK_XYZW
;
1416 vpi_insert
[i
].DstReg
.CondMask
= COND_TR
;
1418 vpi_insert
[i
].SrcReg
[0].File
= PROGRAM_TEMPORARY
;
1419 vpi_insert
[i
].SrcReg
[0].Index
= temp_index
;
1420 vpi_insert
[i
].SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1423 vpi_insert
[i
].Opcode
= OPCODE_MOV
;
1425 vpi_insert
[i
].DstReg
.File
= PROGRAM_OUTPUT
;
1426 vpi_insert
[i
].DstReg
.Index
= VERT_RESULT_TEX0
+ vp
->wpos_idx
;
1427 vpi_insert
[i
].DstReg
.WriteMask
= WRITEMASK_XYZW
;
1428 vpi_insert
[i
].DstReg
.CondMask
= COND_TR
;
1430 vpi_insert
[i
].SrcReg
[0].File
= PROGRAM_TEMPORARY
;
1431 vpi_insert
[i
].SrcReg
[0].Index
= temp_index
;
1432 vpi_insert
[i
].SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1435 free(prog
->Instructions
);
1437 prog
->Instructions
= vpi
;
1439 prog
->NumInstructions
+= i
;
1440 vpi
= &prog
->Instructions
[prog
->NumInstructions
- 1];
1442 assert(vpi
->Opcode
== OPCODE_END
);
1445 static void pos_as_texcoord(struct r300_vertex_program
*vp
,
1446 struct gl_program
*prog
)
1448 struct prog_instruction
*vpi
;
1449 GLuint tempregi
= prog
->NumTemporaries
;
1450 /* should do something else if no temps left... */
1451 prog
->NumTemporaries
++;
1453 for (vpi
= prog
->Instructions
; vpi
->Opcode
!= OPCODE_END
; vpi
++) {
1454 if (vpi
->DstReg
.File
== PROGRAM_OUTPUT
1455 && vpi
->DstReg
.Index
== VERT_RESULT_HPOS
) {
1456 vpi
->DstReg
.File
= PROGRAM_TEMPORARY
;
1457 vpi
->DstReg
.Index
= tempregi
;
1460 insert_wpos(vp
, prog
, tempregi
);
1463 static struct r300_vertex_program
*build_program(struct r300_vertex_program_key
1464 *wanted_key
, struct gl_vertex_program
1465 *mesa_vp
, GLint wpos_idx
)
1467 struct r300_vertex_program
*vp
;
1469 vp
= _mesa_calloc(sizeof(*vp
));
1470 _mesa_memcpy(&vp
->key
, wanted_key
, sizeof(vp
->key
));
1471 vp
->wpos_idx
= wpos_idx
;
1473 if (mesa_vp
->IsPositionInvariant
) {
1474 position_invariant(&mesa_vp
->Base
);
1477 if (wpos_idx
> -1) {
1478 pos_as_texcoord(vp
, &mesa_vp
->Base
);
1481 assert(mesa_vp
->Base
.NumInstructions
);
1482 vp
->num_temporaries
= mesa_vp
->Base
.NumTemporaries
;
1483 r300TranslateVertexShader(vp
, mesa_vp
->Base
.Instructions
);
1488 void r300SelectVertexShader(r300ContextPtr r300
)
1490 GLcontext
*ctx
= ctx
= r300
->radeon
.glCtx
;
1492 struct r300_vertex_program_key wanted_key
= { 0 };
1494 struct r300_vertex_program_cont
*vpc
;
1495 struct r300_vertex_program
*vp
;
1499 (struct r300_vertex_program_cont
*)ctx
->VertexProgram
._Current
;
1500 InputsRead
= ctx
->FragmentProgram
._Current
->Base
.InputsRead
;
1503 if (InputsRead
& FRAG_BIT_WPOS
) {
1504 for (i
= 0; i
< ctx
->Const
.MaxTextureUnits
; i
++)
1505 if (!(InputsRead
& (FRAG_BIT_TEX0
<< i
)))
1508 if (i
== ctx
->Const
.MaxTextureUnits
) {
1509 fprintf(stderr
, "\tno free texcoord found\n");
1513 InputsRead
|= (FRAG_BIT_TEX0
<< i
);
1516 wanted_key
.InputsRead
= vpc
->mesa_program
.Base
.InputsRead
;
1517 wanted_key
.OutputsWritten
= vpc
->mesa_program
.Base
.OutputsWritten
;
1519 wanted_key
.OutputsWritten
|= 1 << VERT_RESULT_HPOS
;
1521 if (InputsRead
& FRAG_BIT_COL0
) {
1522 wanted_key
.OutputsWritten
|= 1 << VERT_RESULT_COL0
;
1525 if ((InputsRead
& FRAG_BIT_COL1
)) {
1526 wanted_key
.OutputsWritten
|= 1 << VERT_RESULT_COL1
;
1529 for (i
= 0; i
< ctx
->Const
.MaxTextureUnits
; i
++) {
1530 if (InputsRead
& (FRAG_BIT_TEX0
<< i
)) {
1531 wanted_key
.OutputsWritten
|=
1532 1 << (VERT_RESULT_TEX0
+ i
);
1536 if (vpc
->mesa_program
.IsPositionInvariant
) {
1537 /* we wan't position don't we ? */
1538 wanted_key
.InputsRead
|= (1 << VERT_ATTRIB_POS
);
1539 wanted_key
.OutputsWritten
|= (1 << VERT_RESULT_HPOS
);
1542 for (vp
= vpc
->progs
; vp
; vp
= vp
->next
)
1543 if (_mesa_memcmp(&vp
->key
, &wanted_key
, sizeof(wanted_key
))
1545 r300
->selected_vp
= vp
;
1548 //_mesa_print_program(&vpc->mesa_program.Base);
1550 vp
= build_program(&wanted_key
, &vpc
->mesa_program
, wpos_idx
);
1551 vp
->next
= vpc
->progs
;
1553 r300
->selected_vp
= vp
;