1 /**************************************************************************
3 Copyright (C) 2005 Aapo Tahkola <aet@rasterburn.org>
4 Copyright (C) 2008 Oliver McFadden <z3ro.geek@gmail.com>
8 Permission is hereby granted, free of charge, to any person obtaining a
9 copy of this software and associated documentation files (the "Software"),
10 to deal in the Software without restriction, including without limitation
11 on the rights to use, copy, modify, merge, publish, distribute, sub
12 license, and/or sell copies of the Software, and to permit persons to whom
13 the Software is furnished to do so, subject to the following conditions:
15 The above copyright notice and this permission notice (including the next
16 paragraph) shall be included in all copies or substantial portions of the
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
22 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
23 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
24 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
25 USE OR OTHER DEALINGS IN THE SOFTWARE.
27 **************************************************************************/
29 /* Radeon R5xx Acceleration, Revision 1.2 */
31 #include "main/glheader.h"
32 #include "main/macros.h"
33 #include "main/enums.h"
34 #include "shader/program.h"
35 #include "shader/programopt.h"
36 #include "shader/prog_instruction.h"
37 #include "shader/prog_parameter.h"
38 #include "shader/prog_print.h"
39 #include "shader/prog_statevars.h"
42 #include "r300_context.h"
43 #include "r300_state.h"
45 /* TODO: Get rid of t_src_class call */
46 #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
47 ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \
48 t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \
49 (t_src_class(a.File) == PVS_SRC_REG_INPUT && \
50 t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \
53 * Take an already-setup and valid source then swizzle it appropriately to
54 * obtain a constant ZERO or ONE source.
56 #define __CONST(x, y) \
57 (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \
62 t_src_class(src[x].File), \
63 VSF_FLAG_NONE) | (src[x].RelAddr << 4))
65 #define FREE_TEMPS() \
67 int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; \
68 if((vp->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { \
69 WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_used); \
70 vp->error = GL_TRUE; \
72 u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
75 int r300VertexProgUpdateParams(GLcontext
* ctx
,
76 struct r300_vertex_program_cont
*vp
, float *dst
)
79 struct gl_vertex_program
*mesa_vp
= &vp
->mesa_program
;
81 struct gl_program_parameter_list
*paramList
;
83 if (mesa_vp
->IsNVProgram
) {
84 _mesa_load_tracked_matrices(ctx
);
86 for (pi
= 0; pi
< MAX_NV_VERTEX_PROGRAM_PARAMS
; pi
++) {
87 *dst
++ = ctx
->VertexProgram
.Parameters
[pi
][0];
88 *dst
++ = ctx
->VertexProgram
.Parameters
[pi
][1];
89 *dst
++ = ctx
->VertexProgram
.Parameters
[pi
][2];
90 *dst
++ = ctx
->VertexProgram
.Parameters
[pi
][3];
95 assert(mesa_vp
->Base
.Parameters
);
96 _mesa_load_state_parameters(ctx
, mesa_vp
->Base
.Parameters
);
98 if (mesa_vp
->Base
.Parameters
->NumParameters
* 4 >
99 VSF_MAX_FRAGMENT_LENGTH
) {
100 fprintf(stderr
, "%s:Params exhausted\n", __FUNCTION__
);
104 paramList
= mesa_vp
->Base
.Parameters
;
105 for (pi
= 0; pi
< paramList
->NumParameters
; pi
++) {
106 switch (paramList
->Parameters
[pi
].Type
) {
107 case PROGRAM_STATE_VAR
:
108 case PROGRAM_NAMED_PARAM
:
109 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
110 case PROGRAM_CONSTANT
:
111 *dst
++ = paramList
->ParameterValues
[pi
][0];
112 *dst
++ = paramList
->ParameterValues
[pi
][1];
113 *dst
++ = paramList
->ParameterValues
[pi
][2];
114 *dst
++ = paramList
->ParameterValues
[pi
][3];
117 _mesa_problem(NULL
, "Bad param type in %s",
126 static unsigned long t_dst_mask(GLuint mask
)
128 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
129 return mask
& VSF_FLAG_ALL
;
132 static unsigned long t_dst_class(gl_register_file file
)
136 case PROGRAM_TEMPORARY
:
137 return PVS_DST_REG_TEMPORARY
;
139 return PVS_DST_REG_OUT
;
140 case PROGRAM_ADDRESS
:
141 return PVS_DST_REG_A0
;
144 case PROGRAM_LOCAL_PARAM:
145 case PROGRAM_ENV_PARAM:
146 case PROGRAM_NAMED_PARAM:
147 case PROGRAM_STATE_VAR:
148 case PROGRAM_WRITE_ONLY:
149 case PROGRAM_ADDRESS:
152 fprintf(stderr
, "problem in %s", __FUNCTION__
);
158 static unsigned long t_dst_index(struct r300_vertex_program
*vp
,
159 struct prog_dst_register
*dst
)
161 if (dst
->File
== PROGRAM_OUTPUT
)
162 return vp
->outputs
[dst
->Index
];
167 static unsigned long t_src_class(gl_register_file file
)
170 case PROGRAM_TEMPORARY
:
171 return PVS_SRC_REG_TEMPORARY
;
173 return PVS_SRC_REG_INPUT
;
174 case PROGRAM_LOCAL_PARAM
:
175 case PROGRAM_ENV_PARAM
:
176 case PROGRAM_NAMED_PARAM
:
177 case PROGRAM_CONSTANT
:
178 case PROGRAM_STATE_VAR
:
179 return PVS_SRC_REG_CONSTANT
;
182 case PROGRAM_WRITE_ONLY:
183 case PROGRAM_ADDRESS:
186 fprintf(stderr
, "problem in %s", __FUNCTION__
);
192 static INLINE
unsigned long t_swizzle(GLubyte swizzle
)
194 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
199 static void vp_dump_inputs(struct r300_vertex_program
*vp
, char *caller
)
204 fprintf(stderr
, "vp null in call to %s from %s\n", __FUNCTION__
,
209 fprintf(stderr
, "%s:<", caller
);
210 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++)
211 fprintf(stderr
, "%d ", vp
->inputs
[i
]);
212 fprintf(stderr
, ">\n");
217 static unsigned long t_src_index(struct r300_vertex_program
*vp
,
218 struct prog_src_register
*src
)
220 if (src
->File
== PROGRAM_INPUT
) {
221 assert(vp
->inputs
[src
->Index
] != -1);
222 return vp
->inputs
[src
->Index
];
224 if (src
->Index
< 0) {
226 "negative offsets for indirect addressing do not work.\n");
233 /* these two functions should probably be merged... */
235 static unsigned long t_src(struct r300_vertex_program
*vp
,
236 struct prog_src_register
*src
)
238 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
239 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
241 return PVS_SRC_OPERAND(t_src_index(vp
, src
),
242 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
243 t_swizzle(GET_SWZ(src
->Swizzle
, 1)),
244 t_swizzle(GET_SWZ(src
->Swizzle
, 2)),
245 t_swizzle(GET_SWZ(src
->Swizzle
, 3)),
246 t_src_class(src
->File
),
247 src
->Negate
) | (src
->RelAddr
<< 4);
250 static unsigned long t_src_scalar(struct r300_vertex_program
*vp
,
251 struct prog_src_register
*src
)
253 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
254 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
256 return PVS_SRC_OPERAND(t_src_index(vp
, src
),
257 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
258 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
259 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
260 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
261 t_src_class(src
->File
),
262 src
->Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
266 static GLboolean
valid_dst(struct r300_vertex_program
*vp
,
267 struct prog_dst_register
*dst
)
269 if (dst
->File
== PROGRAM_OUTPUT
&& vp
->outputs
[dst
->Index
] == -1) {
271 } else if (dst
->File
== PROGRAM_ADDRESS
) {
272 assert(dst
->Index
== 0);
278 static GLuint
*r300TranslateOpcodeABS(struct r300_vertex_program
*vp
,
279 struct prog_instruction
*vpi
,
281 struct prog_src_register src
[3])
283 //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
285 inst
[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM
,
288 t_dst_index(vp
, &vpi
->DstReg
),
289 t_dst_mask(vpi
->DstReg
.WriteMask
),
290 t_dst_class(vpi
->DstReg
.File
));
291 inst
[1] = t_src(vp
, &src
[0]);
292 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]),
293 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
294 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
295 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
296 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)),
297 t_src_class(src
[0].File
),
299 Negate
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
300 (src
[0].RelAddr
<< 4);
306 static GLuint
*r300TranslateOpcodeADD(struct r300_vertex_program
*vp
,
307 struct prog_instruction
*vpi
,
309 struct prog_src_register src
[3])
311 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
314 t_dst_index(vp
, &vpi
->DstReg
),
315 t_dst_mask(vpi
->DstReg
.WriteMask
),
316 t_dst_class(vpi
->DstReg
.File
));
317 inst
[1] = t_src(vp
, &src
[0]);
318 inst
[2] = t_src(vp
, &src
[1]);
319 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
324 static GLuint
*r300TranslateOpcodeARL(struct r300_vertex_program
*vp
,
325 struct prog_instruction
*vpi
,
327 struct prog_src_register src
[3])
329 inst
[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX
,
332 t_dst_index(vp
, &vpi
->DstReg
),
333 t_dst_mask(vpi
->DstReg
.WriteMask
),
334 t_dst_class(vpi
->DstReg
.File
));
335 inst
[1] = t_src(vp
, &src
[0]);
336 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
337 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
342 static GLuint
*r300TranslateOpcodeDP3(struct r300_vertex_program
*vp
,
343 struct prog_instruction
*vpi
,
345 struct prog_src_register src
[3])
347 //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
349 inst
[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT
,
352 t_dst_index(vp
, &vpi
->DstReg
),
353 t_dst_mask(vpi
->DstReg
.WriteMask
),
354 t_dst_class(vpi
->DstReg
.File
));
355 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]),
356 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
357 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
358 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
360 t_src_class(src
[0].File
),
361 src
[0].Negate
? VSF_FLAG_XYZ
: VSF_FLAG_NONE
) |
362 (src
[0].RelAddr
<< 4);
364 PVS_SRC_OPERAND(t_src_index(vp
, &src
[1]),
365 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
366 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)),
367 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), SWIZZLE_ZERO
,
368 t_src_class(src
[1].File
),
369 src
[1].Negate
? VSF_FLAG_XYZ
: VSF_FLAG_NONE
) |
370 (src
[1].RelAddr
<< 4);
371 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
376 static GLuint
*r300TranslateOpcodeDP4(struct r300_vertex_program
*vp
,
377 struct prog_instruction
*vpi
,
379 struct prog_src_register src
[3])
381 inst
[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT
,
384 t_dst_index(vp
, &vpi
->DstReg
),
385 t_dst_mask(vpi
->DstReg
.WriteMask
),
386 t_dst_class(vpi
->DstReg
.File
));
387 inst
[1] = t_src(vp
, &src
[0]);
388 inst
[2] = t_src(vp
, &src
[1]);
389 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
394 static GLuint
*r300TranslateOpcodeDPH(struct r300_vertex_program
*vp
,
395 struct prog_instruction
*vpi
,
397 struct prog_src_register src
[3])
399 //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
400 inst
[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT
,
403 t_dst_index(vp
, &vpi
->DstReg
),
404 t_dst_mask(vpi
->DstReg
.WriteMask
),
405 t_dst_class(vpi
->DstReg
.File
));
406 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]),
407 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
408 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
409 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
410 PVS_SRC_SELECT_FORCE_1
,
411 t_src_class(src
[0].File
),
412 src
[0].Negate
? VSF_FLAG_XYZ
: VSF_FLAG_NONE
) |
413 (src
[0].RelAddr
<< 4);
414 inst
[2] = t_src(vp
, &src
[1]);
415 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
420 static GLuint
*r300TranslateOpcodeDST(struct r300_vertex_program
*vp
,
421 struct prog_instruction
*vpi
,
423 struct prog_src_register src
[3])
425 inst
[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR
,
428 t_dst_index(vp
, &vpi
->DstReg
),
429 t_dst_mask(vpi
->DstReg
.WriteMask
),
430 t_dst_class(vpi
->DstReg
.File
));
431 inst
[1] = t_src(vp
, &src
[0]);
432 inst
[2] = t_src(vp
, &src
[1]);
433 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
438 static GLuint
*r300TranslateOpcodeEX2(struct r300_vertex_program
*vp
,
439 struct prog_instruction
*vpi
,
441 struct prog_src_register src
[3])
443 inst
[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX
,
446 t_dst_index(vp
, &vpi
->DstReg
),
447 t_dst_mask(vpi
->DstReg
.WriteMask
),
448 t_dst_class(vpi
->DstReg
.File
));
449 inst
[1] = t_src_scalar(vp
, &src
[0]);
450 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
451 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
456 static GLuint
*r300TranslateOpcodeEXP(struct r300_vertex_program
*vp
,
457 struct prog_instruction
*vpi
,
459 struct prog_src_register src
[3])
461 inst
[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX
,
464 t_dst_index(vp
, &vpi
->DstReg
),
465 t_dst_mask(vpi
->DstReg
.WriteMask
),
466 t_dst_class(vpi
->DstReg
.File
));
467 inst
[1] = t_src_scalar(vp
, &src
[0]);
468 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
469 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
474 static GLuint
*r300TranslateOpcodeFLR(struct r300_vertex_program
*vp
,
475 struct prog_instruction
*vpi
,
477 struct prog_src_register src
[3],
480 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
481 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
483 inst
[0] = PVS_OP_DST_OPERAND(VE_FRACTION
,
487 t_dst_mask(vpi
->DstReg
.WriteMask
),
488 PVS_DST_REG_TEMPORARY
);
489 inst
[1] = t_src(vp
, &src
[0]);
490 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
491 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
494 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
497 t_dst_index(vp
, &vpi
->DstReg
),
498 t_dst_mask(vpi
->DstReg
.WriteMask
),
499 t_dst_class(vpi
->DstReg
.File
));
500 inst
[1] = t_src(vp
, &src
[0]);
501 inst
[2] = PVS_SRC_OPERAND(*u_temp_i
,
505 PVS_SRC_SELECT_W
, PVS_SRC_REG_TEMPORARY
,
506 /* Not 100% sure about this */
508 Negate
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
510 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
516 static GLuint
*r300TranslateOpcodeFRC(struct r300_vertex_program
*vp
,
517 struct prog_instruction
*vpi
,
519 struct prog_src_register src
[3])
521 inst
[0] = PVS_OP_DST_OPERAND(VE_FRACTION
,
524 t_dst_index(vp
, &vpi
->DstReg
),
525 t_dst_mask(vpi
->DstReg
.WriteMask
),
526 t_dst_class(vpi
->DstReg
.File
));
527 inst
[1] = t_src(vp
, &src
[0]);
528 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
529 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
534 static GLuint
*r300TranslateOpcodeLG2(struct r300_vertex_program
*vp
,
535 struct prog_instruction
*vpi
,
537 struct prog_src_register src
[3])
539 // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
541 inst
[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX
,
544 t_dst_index(vp
, &vpi
->DstReg
),
545 t_dst_mask(vpi
->DstReg
.WriteMask
),
546 t_dst_class(vpi
->DstReg
.File
));
547 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]),
548 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
549 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
550 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
551 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
552 t_src_class(src
[0].File
),
553 src
[0].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
554 (src
[0].RelAddr
<< 4);
555 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
556 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
561 static GLuint
*r300TranslateOpcodeLIT(struct r300_vertex_program
*vp
,
562 struct prog_instruction
*vpi
,
564 struct prog_src_register src
[3])
566 //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
568 inst
[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX
,
571 t_dst_index(vp
, &vpi
->DstReg
),
572 t_dst_mask(vpi
->DstReg
.WriteMask
),
573 t_dst_class(vpi
->DstReg
.File
));
574 /* NOTE: Users swizzling might not work. */
575 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // X
576 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // W
577 PVS_SRC_SELECT_FORCE_0
, // Z
578 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // Y
579 t_src_class(src
[0].File
),
580 src
[0].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
581 (src
[0].RelAddr
<< 4);
582 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // Y
583 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // W
584 PVS_SRC_SELECT_FORCE_0
, // Z
585 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // X
586 t_src_class(src
[0].File
),
587 src
[0].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
588 (src
[0].RelAddr
<< 4);
589 inst
[3] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // Y
590 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // X
591 PVS_SRC_SELECT_FORCE_0
, // Z
592 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // W
593 t_src_class(src
[0].File
),
594 src
[0].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
595 (src
[0].RelAddr
<< 4);
600 static GLuint
*r300TranslateOpcodeLOG(struct r300_vertex_program
*vp
,
601 struct prog_instruction
*vpi
,
603 struct prog_src_register src
[3])
605 inst
[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX
,
608 t_dst_index(vp
, &vpi
->DstReg
),
609 t_dst_mask(vpi
->DstReg
.WriteMask
),
610 t_dst_class(vpi
->DstReg
.File
));
611 inst
[1] = t_src_scalar(vp
, &src
[0]);
612 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
613 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
618 static GLuint
*r300TranslateOpcodeMAD(struct r300_vertex_program
*vp
,
619 struct prog_instruction
*vpi
,
621 struct prog_src_register src
[3])
623 inst
[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD
,
626 t_dst_index(vp
, &vpi
->DstReg
),
627 t_dst_mask(vpi
->DstReg
.WriteMask
),
628 t_dst_class(vpi
->DstReg
.File
));
629 inst
[1] = t_src(vp
, &src
[0]);
630 inst
[2] = t_src(vp
, &src
[1]);
631 inst
[3] = t_src(vp
, &src
[2]);
636 static GLuint
*r300TranslateOpcodeMAX(struct r300_vertex_program
*vp
,
637 struct prog_instruction
*vpi
,
639 struct prog_src_register src
[3])
641 inst
[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM
,
644 t_dst_index(vp
, &vpi
->DstReg
),
645 t_dst_mask(vpi
->DstReg
.WriteMask
),
646 t_dst_class(vpi
->DstReg
.File
));
647 inst
[1] = t_src(vp
, &src
[0]);
648 inst
[2] = t_src(vp
, &src
[1]);
649 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
654 static GLuint
*r300TranslateOpcodeMIN(struct r300_vertex_program
*vp
,
655 struct prog_instruction
*vpi
,
657 struct prog_src_register src
[3])
659 inst
[0] = PVS_OP_DST_OPERAND(VE_MINIMUM
,
662 t_dst_index(vp
, &vpi
->DstReg
),
663 t_dst_mask(vpi
->DstReg
.WriteMask
),
664 t_dst_class(vpi
->DstReg
.File
));
665 inst
[1] = t_src(vp
, &src
[0]);
666 inst
[2] = t_src(vp
, &src
[1]);
667 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
672 static GLuint
*r300TranslateOpcodeMOV(struct r300_vertex_program
*vp
,
673 struct prog_instruction
*vpi
,
675 struct prog_src_register src
[3])
677 //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
679 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
682 t_dst_index(vp
, &vpi
->DstReg
),
683 t_dst_mask(vpi
->DstReg
.WriteMask
),
684 t_dst_class(vpi
->DstReg
.File
));
685 inst
[1] = t_src(vp
, &src
[0]);
686 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
687 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
692 static GLuint
*r300TranslateOpcodeMUL(struct r300_vertex_program
*vp
,
693 struct prog_instruction
*vpi
,
695 struct prog_src_register src
[3])
697 inst
[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY
,
700 t_dst_index(vp
, &vpi
->DstReg
),
701 t_dst_mask(vpi
->DstReg
.WriteMask
),
702 t_dst_class(vpi
->DstReg
.File
));
703 inst
[1] = t_src(vp
, &src
[0]);
704 inst
[2] = t_src(vp
, &src
[1]);
705 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
710 static GLuint
*r300TranslateOpcodePOW(struct r300_vertex_program
*vp
,
711 struct prog_instruction
*vpi
,
713 struct prog_src_register src
[3])
715 inst
[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF
,
718 t_dst_index(vp
, &vpi
->DstReg
),
719 t_dst_mask(vpi
->DstReg
.WriteMask
),
720 t_dst_class(vpi
->DstReg
.File
));
721 inst
[1] = t_src_scalar(vp
, &src
[0]);
722 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
723 inst
[3] = t_src_scalar(vp
, &src
[1]);
728 static GLuint
*r300TranslateOpcodeRCP(struct r300_vertex_program
*vp
,
729 struct prog_instruction
*vpi
,
731 struct prog_src_register src
[3])
733 inst
[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX
,
736 t_dst_index(vp
, &vpi
->DstReg
),
737 t_dst_mask(vpi
->DstReg
.WriteMask
),
738 t_dst_class(vpi
->DstReg
.File
));
739 inst
[1] = t_src_scalar(vp
, &src
[0]);
740 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
741 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
746 static GLuint
*r300TranslateOpcodeRSQ(struct r300_vertex_program
*vp
,
747 struct prog_instruction
*vpi
,
749 struct prog_src_register src
[3])
751 inst
[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX
,
754 t_dst_index(vp
, &vpi
->DstReg
),
755 t_dst_mask(vpi
->DstReg
.WriteMask
),
756 t_dst_class(vpi
->DstReg
.File
));
757 inst
[1] = t_src_scalar(vp
, &src
[0]);
758 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
759 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
764 static GLuint
*r300TranslateOpcodeSGE(struct r300_vertex_program
*vp
,
765 struct prog_instruction
*vpi
,
767 struct prog_src_register src
[3])
769 inst
[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL
,
772 t_dst_index(vp
, &vpi
->DstReg
),
773 t_dst_mask(vpi
->DstReg
.WriteMask
),
774 t_dst_class(vpi
->DstReg
.File
));
775 inst
[1] = t_src(vp
, &src
[0]);
776 inst
[2] = t_src(vp
, &src
[1]);
777 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
782 static GLuint
*r300TranslateOpcodeSLT(struct r300_vertex_program
*vp
,
783 struct prog_instruction
*vpi
,
785 struct prog_src_register src
[3])
787 inst
[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN
,
790 t_dst_index(vp
, &vpi
->DstReg
),
791 t_dst_mask(vpi
->DstReg
.WriteMask
),
792 t_dst_class(vpi
->DstReg
.File
));
793 inst
[1] = t_src(vp
, &src
[0]);
794 inst
[2] = t_src(vp
, &src
[1]);
795 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
800 static GLuint
*r300TranslateOpcodeSUB(struct r300_vertex_program
*vp
,
801 struct prog_instruction
*vpi
,
803 struct prog_src_register src
[3])
805 //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
808 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
811 t_dst_index(vp
, &vpi
->DstReg
),
812 t_dst_mask(vpi
->DstReg
.WriteMask
),
813 t_dst_class(vpi
->DstReg
.File
));
814 inst
[1] = t_src(vp
, &src
[0]);
815 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[1]),
816 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
817 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)),
818 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)),
819 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)),
820 t_src_class(src
[1].File
),
822 Negate
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
823 (src
[1].RelAddr
<< 4);
827 PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD
,
830 t_dst_index(vp
, &vpi
->DstReg
),
831 t_dst_mask(vpi
->DstReg
.WriteMask
),
832 t_dst_class(vpi
->DstReg
.File
));
833 inst
[1] = t_src(vp
, &src
[0]);
834 inst
[2] = __CONST(0, SWIZZLE_ONE
);
835 inst
[3] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[1]),
836 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
837 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)),
838 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)),
839 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)),
840 t_src_class(src
[1].File
),
842 Negate
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
843 (src
[1].RelAddr
<< 4);
849 static GLuint
*r300TranslateOpcodeSWZ(struct r300_vertex_program
*vp
,
850 struct prog_instruction
*vpi
,
852 struct prog_src_register src
[3])
854 //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
856 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
859 t_dst_index(vp
, &vpi
->DstReg
),
860 t_dst_mask(vpi
->DstReg
.WriteMask
),
861 t_dst_class(vpi
->DstReg
.File
));
862 inst
[1] = t_src(vp
, &src
[0]);
863 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
864 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
869 static GLuint
*r300TranslateOpcodeXPD(struct r300_vertex_program
*vp
,
870 struct prog_instruction
*vpi
,
872 struct prog_src_register src
[3],
875 /* mul r0, r1.yzxw, r2.zxyw
876 mad r0, -r2.yzxw, r1.zxyw, r0
879 inst
[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD
,
883 t_dst_mask(vpi
->DstReg
.WriteMask
),
884 PVS_DST_REG_TEMPORARY
);
885 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // Y
886 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)), // Z
887 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // X
888 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // W
889 t_src_class(src
[0].File
),
890 src
[0].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
891 (src
[0].RelAddr
<< 4);
892 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[1]), t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), // Z
893 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), // X
894 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)), // Y
895 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)), // W
896 t_src_class(src
[1].File
),
897 src
[1].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
898 (src
[1].RelAddr
<< 4);
899 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
902 inst
[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD
,
905 t_dst_index(vp
, &vpi
->DstReg
),
906 t_dst_mask(vpi
->DstReg
.WriteMask
),
907 t_dst_class(vpi
->DstReg
.File
));
908 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[1]), t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)), // Y
909 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), // Z
910 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), // X
911 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)), // W
912 t_src_class(src
[1].File
),
914 Negate
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
915 (src
[1].RelAddr
<< 4);
916 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)), // Z
917 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // X
918 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // Y
919 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // W
920 t_src_class(src
[0].File
),
921 src
[0].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
922 (src
[0].RelAddr
<< 4);
924 PVS_SRC_OPERAND(*u_temp_i
, PVS_SRC_SELECT_X
, PVS_SRC_SELECT_Y
,
925 PVS_SRC_SELECT_Z
, PVS_SRC_SELECT_W
,
926 PVS_SRC_REG_TEMPORARY
, VSF_FLAG_NONE
);
933 static void t_inputs_outputs(struct r300_vertex_program
*vp
)
939 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++) {
940 if (vp
->key
.InputsRead
& (1 << i
))
941 vp
->inputs
[i
] = ++cur_reg
;
947 for (i
= 0; i
< VERT_RESULT_MAX
; i
++)
950 assert(vp
->key
.OutputsWritten
& (1 << VERT_RESULT_HPOS
));
952 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_HPOS
)) {
953 vp
->outputs
[VERT_RESULT_HPOS
] = cur_reg
++;
956 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_PSIZ
)) {
957 vp
->outputs
[VERT_RESULT_PSIZ
] = cur_reg
++;
960 /* If we're writing back facing colors we need to send
961 * four colors to make front/back face colors selection work.
962 * If the vertex program doesn't write all 4 colors, lets
963 * pretend it does by skipping output index reg so the colors
964 * get written into appropriate output vectors.
966 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_COL0
)) {
967 vp
->outputs
[VERT_RESULT_COL0
] = cur_reg
++;
968 } else if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_BFC0
) ||
969 vp
->key
.OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
973 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_COL1
)) {
974 vp
->outputs
[VERT_RESULT_COL1
] = cur_reg
++;
975 } else if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_BFC0
) ||
976 vp
->key
.OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
980 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_BFC0
)) {
981 vp
->outputs
[VERT_RESULT_BFC0
] = cur_reg
++;
982 } else if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
986 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
987 vp
->outputs
[VERT_RESULT_BFC1
] = cur_reg
++;
988 } else if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_BFC0
)) {
992 for (i
= VERT_RESULT_TEX0
; i
<= VERT_RESULT_TEX7
; i
++) {
993 if (vp
->key
.OutputsWritten
& (1 << i
)) {
994 vp
->outputs
[i
] = cur_reg
++;
998 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_FOGC
)) {
999 vp
->outputs
[VERT_RESULT_FOGC
] = cur_reg
++;
1003 static void r300TranslateVertexShader(struct r300_vertex_program
*vp
,
1004 struct prog_instruction
*vpi
)
1008 unsigned long num_operands
;
1009 /* Initial value should be last tmp reg that hw supports.
1010 Strangely enough r300 doesnt mind even though these would be out of range.
1011 Smart enough to realize that it doesnt need it? */
1012 int u_temp_i
= VSF_MAX_FRAGMENT_TEMPS
- 1;
1013 struct prog_src_register src
[3];
1015 vp
->pos_end
= 0; /* Not supported yet */
1016 vp
->hw_code
.length
= 0;
1017 vp
->translated
= GL_TRUE
;
1018 vp
->error
= GL_FALSE
;
1020 t_inputs_outputs(vp
);
1022 for (inst
= vp
->hw_code
.body
.d
; vpi
->Opcode
!= OPCODE_END
;
1027 if (!valid_dst(vp
, &vpi
->DstReg
)) {
1028 /* redirect result to unused temp */
1029 vpi
->DstReg
.File
= PROGRAM_TEMPORARY
;
1030 vpi
->DstReg
.Index
= u_temp_i
;
1033 num_operands
= _mesa_num_inst_src_regs(vpi
->Opcode
);
1035 /* copy the sources (src) from mesa into a local variable... is this needed? */
1036 for (i
= 0; i
< num_operands
; i
++) {
1037 src
[i
] = vpi
->SrcReg
[i
];
1040 if (num_operands
== 3) { /* TODO: scalars */
1041 if (CMP_SRCS(src
[1], src
[2])
1042 || CMP_SRCS(src
[0], src
[2])) {
1043 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
1048 PVS_DST_REG_TEMPORARY
);
1050 PVS_SRC_OPERAND(t_src_index(vp
, &src
[2]),
1055 t_src_class(src
[2].File
),
1056 VSF_FLAG_NONE
) | (src
[2].
1059 inst
[2] = __CONST(2, SWIZZLE_ZERO
);
1060 inst
[3] = __CONST(2, SWIZZLE_ZERO
);
1063 src
[2].File
= PROGRAM_TEMPORARY
;
1064 src
[2].Index
= u_temp_i
;
1070 if (num_operands
>= 2) {
1071 if (CMP_SRCS(src
[1], src
[0])) {
1072 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
1077 PVS_DST_REG_TEMPORARY
);
1079 PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]),
1084 t_src_class(src
[0].File
),
1085 VSF_FLAG_NONE
) | (src
[0].
1088 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
1089 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
1092 src
[0].File
= PROGRAM_TEMPORARY
;
1093 src
[0].Index
= u_temp_i
;
1099 switch (vpi
->Opcode
) {
1101 inst
= r300TranslateOpcodeABS(vp
, vpi
, inst
, src
);
1104 inst
= r300TranslateOpcodeADD(vp
, vpi
, inst
, src
);
1107 inst
= r300TranslateOpcodeARL(vp
, vpi
, inst
, src
);
1110 inst
= r300TranslateOpcodeDP3(vp
, vpi
, inst
, src
);
1113 inst
= r300TranslateOpcodeDP4(vp
, vpi
, inst
, src
);
1116 inst
= r300TranslateOpcodeDPH(vp
, vpi
, inst
, src
);
1119 inst
= r300TranslateOpcodeDST(vp
, vpi
, inst
, src
);
1122 inst
= r300TranslateOpcodeEX2(vp
, vpi
, inst
, src
);
1125 inst
= r300TranslateOpcodeEXP(vp
, vpi
, inst
, src
);
1128 inst
= r300TranslateOpcodeFLR(vp
, vpi
, inst
, src
, /* FIXME */
1132 inst
= r300TranslateOpcodeFRC(vp
, vpi
, inst
, src
);
1135 inst
= r300TranslateOpcodeLG2(vp
, vpi
, inst
, src
);
1138 inst
= r300TranslateOpcodeLIT(vp
, vpi
, inst
, src
);
1141 inst
= r300TranslateOpcodeLOG(vp
, vpi
, inst
, src
);
1144 inst
= r300TranslateOpcodeMAD(vp
, vpi
, inst
, src
);
1147 inst
= r300TranslateOpcodeMAX(vp
, vpi
, inst
, src
);
1150 inst
= r300TranslateOpcodeMIN(vp
, vpi
, inst
, src
);
1153 inst
= r300TranslateOpcodeMOV(vp
, vpi
, inst
, src
);
1156 inst
= r300TranslateOpcodeMUL(vp
, vpi
, inst
, src
);
1159 inst
= r300TranslateOpcodePOW(vp
, vpi
, inst
, src
);
1162 inst
= r300TranslateOpcodeRCP(vp
, vpi
, inst
, src
);
1165 inst
= r300TranslateOpcodeRSQ(vp
, vpi
, inst
, src
);
1168 inst
= r300TranslateOpcodeSGE(vp
, vpi
, inst
, src
);
1171 inst
= r300TranslateOpcodeSLT(vp
, vpi
, inst
, src
);
1174 inst
= r300TranslateOpcodeSUB(vp
, vpi
, inst
, src
);
1177 inst
= r300TranslateOpcodeSWZ(vp
, vpi
, inst
, src
);
1180 inst
= r300TranslateOpcodeXPD(vp
, vpi
, inst
, src
, /* FIXME */
1184 vp
->error
= GL_TRUE
;
1189 vp
->hw_code
.length
= (inst
- vp
->hw_code
.body
.d
);
1190 if (vp
->hw_code
.length
>= VSF_MAX_FRAGMENT_LENGTH
) {
1191 vp
->error
= GL_TRUE
;
1195 static void insert_wpos(struct r300_vertex_program
*vp
, struct gl_program
*prog
,
1198 struct prog_instruction
*vpi
;
1200 _mesa_insert_instructions(prog
, prog
->NumInstructions
- 1, 2);
1202 vpi
= &prog
->Instructions
[prog
->NumInstructions
- 3];
1204 vpi
->Opcode
= OPCODE_MOV
;
1206 vpi
->DstReg
.File
= PROGRAM_OUTPUT
;
1207 vpi
->DstReg
.Index
= VERT_RESULT_HPOS
;
1208 vpi
->DstReg
.WriteMask
= WRITEMASK_XYZW
;
1209 vpi
->DstReg
.CondMask
= COND_TR
;
1211 vpi
->SrcReg
[0].File
= PROGRAM_TEMPORARY
;
1212 vpi
->SrcReg
[0].Index
= temp_index
;
1213 vpi
->SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1217 vpi
->Opcode
= OPCODE_MOV
;
1219 vpi
->DstReg
.File
= PROGRAM_OUTPUT
;
1220 vpi
->DstReg
.Index
= VERT_RESULT_TEX0
+ vp
->wpos_idx
;
1221 vpi
->DstReg
.WriteMask
= WRITEMASK_XYZW
;
1222 vpi
->DstReg
.CondMask
= COND_TR
;
1224 vpi
->SrcReg
[0].File
= PROGRAM_TEMPORARY
;
1225 vpi
->SrcReg
[0].Index
= temp_index
;
1226 vpi
->SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1230 vpi
->Opcode
= OPCODE_END
;
1233 static void pos_as_texcoord(struct r300_vertex_program
*vp
,
1234 struct gl_program
*prog
)
1236 struct prog_instruction
*vpi
;
1237 GLuint tempregi
= prog
->NumTemporaries
;
1239 prog
->NumTemporaries
++;
1241 for (vpi
= prog
->Instructions
; vpi
->Opcode
!= OPCODE_END
; vpi
++) {
1242 if (vpi
->DstReg
.File
== PROGRAM_OUTPUT
&& vpi
->DstReg
.Index
== VERT_RESULT_HPOS
) {
1243 vpi
->DstReg
.File
= PROGRAM_TEMPORARY
;
1244 vpi
->DstReg
.Index
= tempregi
;
1248 insert_wpos(vp
, prog
, tempregi
);
1251 static struct r300_vertex_program
*build_program(GLcontext
*ctx
,
1252 struct r300_vertex_program_key
*wanted_key
,
1253 struct gl_vertex_program
*mesa_vp
,
1256 struct r300_vertex_program
*vp
;
1258 vp
= _mesa_calloc(sizeof(*vp
));
1259 _mesa_memcpy(&vp
->key
, wanted_key
, sizeof(vp
->key
));
1260 vp
->wpos_idx
= wpos_idx
;
1262 if (mesa_vp
->IsPositionInvariant
) {
1263 _mesa_insert_mvp_code(ctx
, mesa_vp
);
1266 if (wpos_idx
> -1) {
1267 pos_as_texcoord(vp
, &mesa_vp
->Base
);
1270 if (RADEON_DEBUG
& DEBUG_VERTS
) {
1271 fprintf(stderr
, "Vertex program after native rewrite:\n");
1272 _mesa_print_program(&mesa_vp
->Base
);
1276 /* Some outputs may be artificially added, to match the inputs of the fragment program.
1277 * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by
1278 * vertex program are undefined, so just use MOV [vertex_result], CONST[0]
1282 for (i
= 0; i
< VERT_RESULT_MAX
; ++i
) {
1283 if (vp
->key
.OutputsAdded
& (1 << i
)) {
1289 struct prog_instruction
*inst
;
1291 _mesa_insert_instructions(&mesa_vp
->Base
, mesa_vp
->Base
.NumInstructions
- 1, count
);
1292 inst
= &mesa_vp
->Base
.Instructions
[mesa_vp
->Base
.NumInstructions
- 1 - count
];
1294 for (i
= 0; i
< VERT_RESULT_MAX
; ++i
) {
1295 if (vp
->key
.OutputsAdded
& (1 << i
)) {
1296 inst
->Opcode
= OPCODE_MOV
;
1298 inst
->DstReg
.File
= PROGRAM_OUTPUT
;
1299 inst
->DstReg
.Index
= i
;
1300 inst
->DstReg
.WriteMask
= WRITEMASK_XYZW
;
1301 inst
->DstReg
.CondMask
= COND_TR
;
1303 inst
->SrcReg
[0].File
= PROGRAM_CONSTANT
;
1304 inst
->SrcReg
[0].Index
= 0;
1305 inst
->SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1313 assert(mesa_vp
->Base
.NumInstructions
);
1314 vp
->num_temporaries
= mesa_vp
->Base
.NumTemporaries
;
1315 r300TranslateVertexShader(vp
, mesa_vp
->Base
.Instructions
);
1320 static void add_outputs(struct r300_vertex_program_key
*key
, GLint vert
)
1322 if (key
->OutputsWritten
& (1 << vert
))
1325 key
->OutputsWritten
|= 1 << vert
;
1326 key
->OutputsAdded
|= 1 << vert
;
1329 void r300SelectVertexShader(r300ContextPtr r300
)
1331 GLcontext
*ctx
= ctx
= r300
->radeon
.glCtx
;
1333 struct r300_vertex_program_key wanted_key
= { 0 };
1335 struct r300_vertex_program_cont
*vpc
;
1336 struct r300_vertex_program
*vp
;
1339 vpc
= (struct r300_vertex_program_cont
*)ctx
->VertexProgram
._Current
;
1340 wanted_key
.InputsRead
= vpc
->mesa_program
.Base
.InputsRead
;
1341 wanted_key
.OutputsWritten
= vpc
->mesa_program
.Base
.OutputsWritten
;
1342 InputsRead
= ctx
->FragmentProgram
._Current
->Base
.InputsRead
;
1345 if (InputsRead
& FRAG_BIT_WPOS
) {
1346 for (i
= 0; i
< ctx
->Const
.MaxTextureUnits
; i
++)
1347 if (!(InputsRead
& (FRAG_BIT_TEX0
<< i
)))
1350 if (i
== ctx
->Const
.MaxTextureUnits
) {
1351 fprintf(stderr
, "\tno free texcoord found\n");
1355 wanted_key
.OutputsWritten
|= 1 << (VERT_RESULT_TEX0
+ i
);
1359 if (vpc
->mesa_program
.IsPositionInvariant
) {
1360 wanted_key
.InputsRead
|= (1 << VERT_ATTRIB_POS
);
1361 wanted_key
.OutputsWritten
|= (1 << VERT_RESULT_HPOS
);
1363 add_outputs(&wanted_key
, VERT_RESULT_HPOS
);
1366 if (InputsRead
& FRAG_BIT_COL0
) {
1367 add_outputs(&wanted_key
, VERT_RESULT_COL0
);
1370 if (InputsRead
& FRAG_BIT_COL1
) {
1371 add_outputs(&wanted_key
, VERT_RESULT_COL1
);
1374 if (InputsRead
& FRAG_BIT_FOGC
) {
1375 add_outputs(&wanted_key
, VERT_RESULT_FOGC
);
1378 for (i
= 0; i
< ctx
->Const
.MaxTextureUnits
; i
++) {
1379 if (InputsRead
& (FRAG_BIT_TEX0
<< i
)) {
1380 add_outputs(&wanted_key
, VERT_RESULT_TEX0
+ i
);
1384 for (vp
= vpc
->progs
; vp
; vp
= vp
->next
)
1385 if (_mesa_memcmp(&vp
->key
, &wanted_key
, sizeof(wanted_key
))
1387 r300
->selected_vp
= vp
;
1391 if (RADEON_DEBUG
& DEBUG_VERTS
) {
1392 fprintf(stderr
, "Initial vertex program:\n");
1393 _mesa_print_program(&vpc
->mesa_program
.Base
);
1397 vp
= build_program(ctx
, &wanted_key
, &vpc
->mesa_program
, wpos_idx
);
1398 vp
->next
= vpc
->progs
;
1400 r300
->selected_vp
= vp
;
1403 #define bump_vpu_count(ptr, new_count) do { \
1404 drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \
1405 int _nc=(new_count)/4; \
1406 assert(_nc < 256); \
1407 if(_nc>_p->vpu.count)_p->vpu.count=_nc; \
1410 static void r300EmitVertexProgram(r300ContextPtr r300
, int dest
, struct r300_vertex_shader_hw_code
*code
)
1414 assert((code
->length
> 0) && (code
->length
% 4 == 0));
1416 switch ((dest
>> 8) & 0xf) {
1418 R300_STATECHANGE(r300
, vpi
);
1419 for (i
= 0; i
< code
->length
; i
++)
1420 r300
->hw
.vpi
.cmd
[R300_VPI_INSTR_0
+ i
+ 4 * (dest
& 0xff)] = (code
->body
.d
[i
]);
1421 bump_vpu_count(r300
->hw
.vpi
.cmd
, code
->length
+ 4 * (dest
& 0xff));
1424 R300_STATECHANGE(r300
, vpp
);
1425 for (i
= 0; i
< code
->length
; i
++)
1426 r300
->hw
.vpp
.cmd
[R300_VPP_PARAM_0
+ i
+ 4 * (dest
& 0xff)] = (code
->body
.d
[i
]);
1427 bump_vpu_count(r300
->hw
.vpp
.cmd
, code
->length
+ 4 * (dest
& 0xff));
1430 R300_STATECHANGE(r300
, vps
);
1431 for (i
= 0; i
< code
->length
; i
++)
1432 r300
->hw
.vps
.cmd
[1 + i
+ 4 * (dest
& 0xff)] = (code
->body
.d
[i
]);
1433 bump_vpu_count(r300
->hw
.vps
.cmd
, code
->length
+ 4 * (dest
& 0xff));
1436 fprintf(stderr
, "%s:%s don't know how to handle dest %04x\n", __FILE__
, __FUNCTION__
, dest
);
1441 void r300SetupVertexProgram(r300ContextPtr rmesa
)
1443 GLcontext
*ctx
= rmesa
->radeon
.glCtx
;
1444 struct r300_vertex_program
*prog
= rmesa
->selected_vp
;
1446 int param_count
= 0;
1448 /* Reset state, in case we don't use something */
1449 ((drm_r300_cmd_header_t
*) rmesa
->hw
.vpp
.cmd
)->vpu
.count
= 0;
1450 ((drm_r300_cmd_header_t
*) rmesa
->hw
.vpi
.cmd
)->vpu
.count
= 0;
1451 ((drm_r300_cmd_header_t
*) rmesa
->hw
.vps
.cmd
)->vpu
.count
= 0;
1453 R300_STATECHANGE(rmesa
, vpp
);
1454 param_count
= r300VertexProgUpdateParams(ctx
,
1455 (struct r300_vertex_program_cont
*)
1456 ctx
->VertexProgram
._Current
,
1457 (float *)&rmesa
->hw
.vpp
.cmd
[R300_VPP_PARAM_0
]);
1458 bump_vpu_count(rmesa
->hw
.vpp
.cmd
, param_count
);
1461 r300EmitVertexProgram(rmesa
, R300_PVS_CODE_START
, &(prog
->hw_code
));
1462 inst_count
= (prog
->hw_code
.length
/ 4) - 1;
1464 r300VapCntl(rmesa
, _mesa_bitcount(prog
->key
.InputsRead
),
1465 _mesa_bitcount(prog
->key
.OutputsWritten
), prog
->num_temporaries
);
1467 R300_STATECHANGE(rmesa
, pvs
);
1468 rmesa
->hw
.pvs
.cmd
[R300_PVS_CNTL_1
] = (0 << R300_PVS_FIRST_INST_SHIFT
) | (inst_count
<< R300_PVS_XYZW_VALID_INST_SHIFT
) |
1469 (inst_count
<< R300_PVS_LAST_INST_SHIFT
);
1471 rmesa
->hw
.pvs
.cmd
[R300_PVS_CNTL_2
] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT
) | (param_count
<< R300_PVS_MAX_CONST_ADDR_SHIFT
);
1472 rmesa
->hw
.pvs
.cmd
[R300_PVS_CNTL_3
] = (inst_count
<< R300_PVS_LAST_VTX_SRC_INST_SHIFT
);