1 /**************************************************************************
3 Copyright (C) 2005 Aapo Tahkola <aet@rasterburn.org>
4 Copyright (C) 2008 Oliver McFadden <z3ro.geek@gmail.com>
8 Permission is hereby granted, free of charge, to any person obtaining a
9 copy of this software and associated documentation files (the "Software"),
10 to deal in the Software without restriction, including without limitation
11 on the rights to use, copy, modify, merge, publish, distribute, sub
12 license, and/or sell copies of the Software, and to permit persons to whom
13 the Software is furnished to do so, subject to the following conditions:
15 The above copyright notice and this permission notice (including the next
16 paragraph) shall be included in all copies or substantial portions of the
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
22 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
23 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
24 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
25 USE OR OTHER DEALINGS IN THE SOFTWARE.
27 **************************************************************************/
29 /* Radeon R5xx Acceleration, Revision 1.2 */
31 #include "main/glheader.h"
32 #include "main/macros.h"
33 #include "main/enums.h"
34 #include "shader/program.h"
35 #include "shader/prog_instruction.h"
36 #include "shader/prog_parameter.h"
37 #include "shader/prog_statevars.h"
40 #include "r300_context.h"
41 #include "r300_state.h"
43 /* TODO: Get rid of t_src_class call */
44 #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
45 ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \
46 t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \
47 (t_src_class(a.File) == PVS_SRC_REG_INPUT && \
48 t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \
51 * Take an already-setup and valid source then swizzle it appropriately to
52 * obtain a constant ZERO or ONE source.
54 #define __CONST(x, y) \
55 (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \
60 t_src_class(src[x].File), \
61 VSF_FLAG_NONE) | (src[x].RelAddr << 4))
63 #define FREE_TEMPS() \
65 int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; \
66 if((vp->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { \
67 WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_used); \
68 vp->error = GL_TRUE; \
70 u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
73 int r300VertexProgUpdateParams(GLcontext
* ctx
,
74 struct r300_vertex_program_cont
*vp
, float *dst
)
77 struct gl_vertex_program
*mesa_vp
= &vp
->mesa_program
;
79 struct gl_program_parameter_list
*paramList
;
81 if (mesa_vp
->IsNVProgram
) {
82 _mesa_load_tracked_matrices(ctx
);
84 for (pi
= 0; pi
< MAX_NV_VERTEX_PROGRAM_PARAMS
; pi
++) {
85 *dst
++ = ctx
->VertexProgram
.Parameters
[pi
][0];
86 *dst
++ = ctx
->VertexProgram
.Parameters
[pi
][1];
87 *dst
++ = ctx
->VertexProgram
.Parameters
[pi
][2];
88 *dst
++ = ctx
->VertexProgram
.Parameters
[pi
][3];
93 assert(mesa_vp
->Base
.Parameters
);
94 _mesa_load_state_parameters(ctx
, mesa_vp
->Base
.Parameters
);
96 if (mesa_vp
->Base
.Parameters
->NumParameters
* 4 >
97 VSF_MAX_FRAGMENT_LENGTH
) {
98 fprintf(stderr
, "%s:Params exhausted\n", __FUNCTION__
);
102 paramList
= mesa_vp
->Base
.Parameters
;
103 for (pi
= 0; pi
< paramList
->NumParameters
; pi
++) {
104 switch (paramList
->Parameters
[pi
].Type
) {
105 case PROGRAM_STATE_VAR
:
106 case PROGRAM_NAMED_PARAM
:
107 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
108 case PROGRAM_CONSTANT
:
109 *dst
++ = paramList
->ParameterValues
[pi
][0];
110 *dst
++ = paramList
->ParameterValues
[pi
][1];
111 *dst
++ = paramList
->ParameterValues
[pi
][2];
112 *dst
++ = paramList
->ParameterValues
[pi
][3];
115 _mesa_problem(NULL
, "Bad param type in %s",
124 static unsigned long t_dst_mask(GLuint mask
)
126 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
127 return mask
& VSF_FLAG_ALL
;
130 static unsigned long t_dst_class(gl_register_file file
)
134 case PROGRAM_TEMPORARY
:
135 return PVS_DST_REG_TEMPORARY
;
137 return PVS_DST_REG_OUT
;
138 case PROGRAM_ADDRESS
:
139 return PVS_DST_REG_A0
;
142 case PROGRAM_LOCAL_PARAM:
143 case PROGRAM_ENV_PARAM:
144 case PROGRAM_NAMED_PARAM:
145 case PROGRAM_STATE_VAR:
146 case PROGRAM_WRITE_ONLY:
147 case PROGRAM_ADDRESS:
150 fprintf(stderr
, "problem in %s", __FUNCTION__
);
156 static unsigned long t_dst_index(struct r300_vertex_program
*vp
,
157 struct prog_dst_register
*dst
)
159 if (dst
->File
== PROGRAM_OUTPUT
)
160 return vp
->outputs
[dst
->Index
];
165 static unsigned long t_src_class(gl_register_file file
)
168 case PROGRAM_TEMPORARY
:
169 return PVS_SRC_REG_TEMPORARY
;
171 return PVS_SRC_REG_INPUT
;
172 case PROGRAM_LOCAL_PARAM
:
173 case PROGRAM_ENV_PARAM
:
174 case PROGRAM_NAMED_PARAM
:
175 case PROGRAM_CONSTANT
:
176 case PROGRAM_STATE_VAR
:
177 return PVS_SRC_REG_CONSTANT
;
180 case PROGRAM_WRITE_ONLY:
181 case PROGRAM_ADDRESS:
184 fprintf(stderr
, "problem in %s", __FUNCTION__
);
190 static INLINE
unsigned long t_swizzle(GLubyte swizzle
)
192 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
197 static void vp_dump_inputs(struct r300_vertex_program
*vp
, char *caller
)
202 fprintf(stderr
, "vp null in call to %s from %s\n", __FUNCTION__
,
207 fprintf(stderr
, "%s:<", caller
);
208 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++)
209 fprintf(stderr
, "%d ", vp
->inputs
[i
]);
210 fprintf(stderr
, ">\n");
215 static unsigned long t_src_index(struct r300_vertex_program
*vp
,
216 struct prog_src_register
*src
)
218 if (src
->File
== PROGRAM_INPUT
) {
219 assert(vp
->inputs
[src
->Index
] != -1);
220 return vp
->inputs
[src
->Index
];
222 if (src
->Index
< 0) {
224 "negative offsets for indirect addressing do not work.\n");
231 /* these two functions should probably be merged... */
233 static unsigned long t_src(struct r300_vertex_program
*vp
,
234 struct prog_src_register
*src
)
236 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
237 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
239 return PVS_SRC_OPERAND(t_src_index(vp
, src
),
240 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
241 t_swizzle(GET_SWZ(src
->Swizzle
, 1)),
242 t_swizzle(GET_SWZ(src
->Swizzle
, 2)),
243 t_swizzle(GET_SWZ(src
->Swizzle
, 3)),
244 t_src_class(src
->File
),
245 src
->Negate
) | (src
->RelAddr
<< 4);
248 static unsigned long t_src_scalar(struct r300_vertex_program
*vp
,
249 struct prog_src_register
*src
)
251 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
252 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
254 return PVS_SRC_OPERAND(t_src_index(vp
, src
),
255 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
256 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
257 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
258 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
259 t_src_class(src
->File
),
260 src
->Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
264 static GLboolean
valid_dst(struct r300_vertex_program
*vp
,
265 struct prog_dst_register
*dst
)
267 if (dst
->File
== PROGRAM_OUTPUT
&& vp
->outputs
[dst
->Index
] == -1) {
269 } else if (dst
->File
== PROGRAM_ADDRESS
) {
270 assert(dst
->Index
== 0);
276 static GLuint
*r300TranslateOpcodeABS(struct r300_vertex_program
*vp
,
277 struct prog_instruction
*vpi
,
279 struct prog_src_register src
[3])
281 //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
283 inst
[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM
,
286 t_dst_index(vp
, &vpi
->DstReg
),
287 t_dst_mask(vpi
->DstReg
.WriteMask
),
288 t_dst_class(vpi
->DstReg
.File
));
289 inst
[1] = t_src(vp
, &src
[0]);
290 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]),
291 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
292 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
293 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
294 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)),
295 t_src_class(src
[0].File
),
297 Negate
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
298 (src
[0].RelAddr
<< 4);
304 static GLuint
*r300TranslateOpcodeADD(struct r300_vertex_program
*vp
,
305 struct prog_instruction
*vpi
,
307 struct prog_src_register src
[3])
309 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
312 t_dst_index(vp
, &vpi
->DstReg
),
313 t_dst_mask(vpi
->DstReg
.WriteMask
),
314 t_dst_class(vpi
->DstReg
.File
));
315 inst
[1] = t_src(vp
, &src
[0]);
316 inst
[2] = t_src(vp
, &src
[1]);
317 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
322 static GLuint
*r300TranslateOpcodeARL(struct r300_vertex_program
*vp
,
323 struct prog_instruction
*vpi
,
325 struct prog_src_register src
[3])
327 inst
[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX
,
330 t_dst_index(vp
, &vpi
->DstReg
),
331 t_dst_mask(vpi
->DstReg
.WriteMask
),
332 t_dst_class(vpi
->DstReg
.File
));
333 inst
[1] = t_src(vp
, &src
[0]);
334 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
335 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
340 static GLuint
*r300TranslateOpcodeDP3(struct r300_vertex_program
*vp
,
341 struct prog_instruction
*vpi
,
343 struct prog_src_register src
[3])
345 //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
347 inst
[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT
,
350 t_dst_index(vp
, &vpi
->DstReg
),
351 t_dst_mask(vpi
->DstReg
.WriteMask
),
352 t_dst_class(vpi
->DstReg
.File
));
353 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]),
354 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
355 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
356 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
358 t_src_class(src
[0].File
),
359 src
[0].Negate
? VSF_FLAG_XYZ
: VSF_FLAG_NONE
) |
360 (src
[0].RelAddr
<< 4);
362 PVS_SRC_OPERAND(t_src_index(vp
, &src
[1]),
363 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
364 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)),
365 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), SWIZZLE_ZERO
,
366 t_src_class(src
[1].File
),
367 src
[1].Negate
? VSF_FLAG_XYZ
: VSF_FLAG_NONE
) |
368 (src
[1].RelAddr
<< 4);
369 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
374 static GLuint
*r300TranslateOpcodeDP4(struct r300_vertex_program
*vp
,
375 struct prog_instruction
*vpi
,
377 struct prog_src_register src
[3])
379 inst
[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT
,
382 t_dst_index(vp
, &vpi
->DstReg
),
383 t_dst_mask(vpi
->DstReg
.WriteMask
),
384 t_dst_class(vpi
->DstReg
.File
));
385 inst
[1] = t_src(vp
, &src
[0]);
386 inst
[2] = t_src(vp
, &src
[1]);
387 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
392 static GLuint
*r300TranslateOpcodeDPH(struct r300_vertex_program
*vp
,
393 struct prog_instruction
*vpi
,
395 struct prog_src_register src
[3])
397 //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
398 inst
[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT
,
401 t_dst_index(vp
, &vpi
->DstReg
),
402 t_dst_mask(vpi
->DstReg
.WriteMask
),
403 t_dst_class(vpi
->DstReg
.File
));
404 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]),
405 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
406 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
407 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
408 PVS_SRC_SELECT_FORCE_1
,
409 t_src_class(src
[0].File
),
410 src
[0].Negate
? VSF_FLAG_XYZ
: VSF_FLAG_NONE
) |
411 (src
[0].RelAddr
<< 4);
412 inst
[2] = t_src(vp
, &src
[1]);
413 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
418 static GLuint
*r300TranslateOpcodeDST(struct r300_vertex_program
*vp
,
419 struct prog_instruction
*vpi
,
421 struct prog_src_register src
[3])
423 inst
[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR
,
426 t_dst_index(vp
, &vpi
->DstReg
),
427 t_dst_mask(vpi
->DstReg
.WriteMask
),
428 t_dst_class(vpi
->DstReg
.File
));
429 inst
[1] = t_src(vp
, &src
[0]);
430 inst
[2] = t_src(vp
, &src
[1]);
431 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
436 static GLuint
*r300TranslateOpcodeEX2(struct r300_vertex_program
*vp
,
437 struct prog_instruction
*vpi
,
439 struct prog_src_register src
[3])
441 inst
[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX
,
444 t_dst_index(vp
, &vpi
->DstReg
),
445 t_dst_mask(vpi
->DstReg
.WriteMask
),
446 t_dst_class(vpi
->DstReg
.File
));
447 inst
[1] = t_src_scalar(vp
, &src
[0]);
448 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
449 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
454 static GLuint
*r300TranslateOpcodeEXP(struct r300_vertex_program
*vp
,
455 struct prog_instruction
*vpi
,
457 struct prog_src_register src
[3])
459 inst
[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX
,
462 t_dst_index(vp
, &vpi
->DstReg
),
463 t_dst_mask(vpi
->DstReg
.WriteMask
),
464 t_dst_class(vpi
->DstReg
.File
));
465 inst
[1] = t_src_scalar(vp
, &src
[0]);
466 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
467 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
472 static GLuint
*r300TranslateOpcodeFLR(struct r300_vertex_program
*vp
,
473 struct prog_instruction
*vpi
,
475 struct prog_src_register src
[3],
478 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
479 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
481 inst
[0] = PVS_OP_DST_OPERAND(VE_FRACTION
,
485 t_dst_mask(vpi
->DstReg
.WriteMask
),
486 PVS_DST_REG_TEMPORARY
);
487 inst
[1] = t_src(vp
, &src
[0]);
488 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
489 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
492 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
495 t_dst_index(vp
, &vpi
->DstReg
),
496 t_dst_mask(vpi
->DstReg
.WriteMask
),
497 t_dst_class(vpi
->DstReg
.File
));
498 inst
[1] = t_src(vp
, &src
[0]);
499 inst
[2] = PVS_SRC_OPERAND(*u_temp_i
,
503 PVS_SRC_SELECT_W
, PVS_SRC_REG_TEMPORARY
,
504 /* Not 100% sure about this */
506 Negate
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
508 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
514 static GLuint
*r300TranslateOpcodeFRC(struct r300_vertex_program
*vp
,
515 struct prog_instruction
*vpi
,
517 struct prog_src_register src
[3])
519 inst
[0] = PVS_OP_DST_OPERAND(VE_FRACTION
,
522 t_dst_index(vp
, &vpi
->DstReg
),
523 t_dst_mask(vpi
->DstReg
.WriteMask
),
524 t_dst_class(vpi
->DstReg
.File
));
525 inst
[1] = t_src(vp
, &src
[0]);
526 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
527 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
532 static GLuint
*r300TranslateOpcodeLG2(struct r300_vertex_program
*vp
,
533 struct prog_instruction
*vpi
,
535 struct prog_src_register src
[3])
537 // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
539 inst
[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX
,
542 t_dst_index(vp
, &vpi
->DstReg
),
543 t_dst_mask(vpi
->DstReg
.WriteMask
),
544 t_dst_class(vpi
->DstReg
.File
));
545 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]),
546 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
547 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
548 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
549 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
550 t_src_class(src
[0].File
),
551 src
[0].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
552 (src
[0].RelAddr
<< 4);
553 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
554 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
559 static GLuint
*r300TranslateOpcodeLIT(struct r300_vertex_program
*vp
,
560 struct prog_instruction
*vpi
,
562 struct prog_src_register src
[3])
564 //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
566 inst
[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX
,
569 t_dst_index(vp
, &vpi
->DstReg
),
570 t_dst_mask(vpi
->DstReg
.WriteMask
),
571 t_dst_class(vpi
->DstReg
.File
));
572 /* NOTE: Users swizzling might not work. */
573 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // X
574 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // W
575 PVS_SRC_SELECT_FORCE_0
, // Z
576 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // Y
577 t_src_class(src
[0].File
),
578 src
[0].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
579 (src
[0].RelAddr
<< 4);
580 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // Y
581 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // W
582 PVS_SRC_SELECT_FORCE_0
, // Z
583 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // X
584 t_src_class(src
[0].File
),
585 src
[0].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
586 (src
[0].RelAddr
<< 4);
587 inst
[3] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // Y
588 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // X
589 PVS_SRC_SELECT_FORCE_0
, // Z
590 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // W
591 t_src_class(src
[0].File
),
592 src
[0].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
593 (src
[0].RelAddr
<< 4);
598 static GLuint
*r300TranslateOpcodeLOG(struct r300_vertex_program
*vp
,
599 struct prog_instruction
*vpi
,
601 struct prog_src_register src
[3])
603 inst
[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX
,
606 t_dst_index(vp
, &vpi
->DstReg
),
607 t_dst_mask(vpi
->DstReg
.WriteMask
),
608 t_dst_class(vpi
->DstReg
.File
));
609 inst
[1] = t_src_scalar(vp
, &src
[0]);
610 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
611 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
616 static GLuint
*r300TranslateOpcodeMAD(struct r300_vertex_program
*vp
,
617 struct prog_instruction
*vpi
,
619 struct prog_src_register src
[3])
621 inst
[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD
,
624 t_dst_index(vp
, &vpi
->DstReg
),
625 t_dst_mask(vpi
->DstReg
.WriteMask
),
626 t_dst_class(vpi
->DstReg
.File
));
627 inst
[1] = t_src(vp
, &src
[0]);
628 inst
[2] = t_src(vp
, &src
[1]);
629 inst
[3] = t_src(vp
, &src
[2]);
634 static GLuint
*r300TranslateOpcodeMAX(struct r300_vertex_program
*vp
,
635 struct prog_instruction
*vpi
,
637 struct prog_src_register src
[3])
639 inst
[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM
,
642 t_dst_index(vp
, &vpi
->DstReg
),
643 t_dst_mask(vpi
->DstReg
.WriteMask
),
644 t_dst_class(vpi
->DstReg
.File
));
645 inst
[1] = t_src(vp
, &src
[0]);
646 inst
[2] = t_src(vp
, &src
[1]);
647 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
652 static GLuint
*r300TranslateOpcodeMIN(struct r300_vertex_program
*vp
,
653 struct prog_instruction
*vpi
,
655 struct prog_src_register src
[3])
657 inst
[0] = PVS_OP_DST_OPERAND(VE_MINIMUM
,
660 t_dst_index(vp
, &vpi
->DstReg
),
661 t_dst_mask(vpi
->DstReg
.WriteMask
),
662 t_dst_class(vpi
->DstReg
.File
));
663 inst
[1] = t_src(vp
, &src
[0]);
664 inst
[2] = t_src(vp
, &src
[1]);
665 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
670 static GLuint
*r300TranslateOpcodeMOV(struct r300_vertex_program
*vp
,
671 struct prog_instruction
*vpi
,
673 struct prog_src_register src
[3])
675 //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
677 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
680 t_dst_index(vp
, &vpi
->DstReg
),
681 t_dst_mask(vpi
->DstReg
.WriteMask
),
682 t_dst_class(vpi
->DstReg
.File
));
683 inst
[1] = t_src(vp
, &src
[0]);
684 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
685 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
690 static GLuint
*r300TranslateOpcodeMUL(struct r300_vertex_program
*vp
,
691 struct prog_instruction
*vpi
,
693 struct prog_src_register src
[3])
695 inst
[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY
,
698 t_dst_index(vp
, &vpi
->DstReg
),
699 t_dst_mask(vpi
->DstReg
.WriteMask
),
700 t_dst_class(vpi
->DstReg
.File
));
701 inst
[1] = t_src(vp
, &src
[0]);
702 inst
[2] = t_src(vp
, &src
[1]);
703 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
708 static GLuint
*r300TranslateOpcodePOW(struct r300_vertex_program
*vp
,
709 struct prog_instruction
*vpi
,
711 struct prog_src_register src
[3])
713 inst
[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF
,
716 t_dst_index(vp
, &vpi
->DstReg
),
717 t_dst_mask(vpi
->DstReg
.WriteMask
),
718 t_dst_class(vpi
->DstReg
.File
));
719 inst
[1] = t_src_scalar(vp
, &src
[0]);
720 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
721 inst
[3] = t_src_scalar(vp
, &src
[1]);
726 static GLuint
*r300TranslateOpcodeRCP(struct r300_vertex_program
*vp
,
727 struct prog_instruction
*vpi
,
729 struct prog_src_register src
[3])
731 inst
[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX
,
734 t_dst_index(vp
, &vpi
->DstReg
),
735 t_dst_mask(vpi
->DstReg
.WriteMask
),
736 t_dst_class(vpi
->DstReg
.File
));
737 inst
[1] = t_src_scalar(vp
, &src
[0]);
738 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
739 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
744 static GLuint
*r300TranslateOpcodeRSQ(struct r300_vertex_program
*vp
,
745 struct prog_instruction
*vpi
,
747 struct prog_src_register src
[3])
749 inst
[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX
,
752 t_dst_index(vp
, &vpi
->DstReg
),
753 t_dst_mask(vpi
->DstReg
.WriteMask
),
754 t_dst_class(vpi
->DstReg
.File
));
755 inst
[1] = t_src_scalar(vp
, &src
[0]);
756 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
757 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
762 static GLuint
*r300TranslateOpcodeSGE(struct r300_vertex_program
*vp
,
763 struct prog_instruction
*vpi
,
765 struct prog_src_register src
[3])
767 inst
[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL
,
770 t_dst_index(vp
, &vpi
->DstReg
),
771 t_dst_mask(vpi
->DstReg
.WriteMask
),
772 t_dst_class(vpi
->DstReg
.File
));
773 inst
[1] = t_src(vp
, &src
[0]);
774 inst
[2] = t_src(vp
, &src
[1]);
775 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
780 static GLuint
*r300TranslateOpcodeSLT(struct r300_vertex_program
*vp
,
781 struct prog_instruction
*vpi
,
783 struct prog_src_register src
[3])
785 inst
[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN
,
788 t_dst_index(vp
, &vpi
->DstReg
),
789 t_dst_mask(vpi
->DstReg
.WriteMask
),
790 t_dst_class(vpi
->DstReg
.File
));
791 inst
[1] = t_src(vp
, &src
[0]);
792 inst
[2] = t_src(vp
, &src
[1]);
793 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
798 static GLuint
*r300TranslateOpcodeSUB(struct r300_vertex_program
*vp
,
799 struct prog_instruction
*vpi
,
801 struct prog_src_register src
[3])
803 //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
806 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
809 t_dst_index(vp
, &vpi
->DstReg
),
810 t_dst_mask(vpi
->DstReg
.WriteMask
),
811 t_dst_class(vpi
->DstReg
.File
));
812 inst
[1] = t_src(vp
, &src
[0]);
813 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[1]),
814 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
815 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)),
816 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)),
817 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)),
818 t_src_class(src
[1].File
),
820 Negate
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
821 (src
[1].RelAddr
<< 4);
825 PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD
,
828 t_dst_index(vp
, &vpi
->DstReg
),
829 t_dst_mask(vpi
->DstReg
.WriteMask
),
830 t_dst_class(vpi
->DstReg
.File
));
831 inst
[1] = t_src(vp
, &src
[0]);
832 inst
[2] = __CONST(0, SWIZZLE_ONE
);
833 inst
[3] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[1]),
834 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
835 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)),
836 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)),
837 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)),
838 t_src_class(src
[1].File
),
840 Negate
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
841 (src
[1].RelAddr
<< 4);
847 static GLuint
*r300TranslateOpcodeSWZ(struct r300_vertex_program
*vp
,
848 struct prog_instruction
*vpi
,
850 struct prog_src_register src
[3])
852 //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
854 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
857 t_dst_index(vp
, &vpi
->DstReg
),
858 t_dst_mask(vpi
->DstReg
.WriteMask
),
859 t_dst_class(vpi
->DstReg
.File
));
860 inst
[1] = t_src(vp
, &src
[0]);
861 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
862 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
867 static GLuint
*r300TranslateOpcodeXPD(struct r300_vertex_program
*vp
,
868 struct prog_instruction
*vpi
,
870 struct prog_src_register src
[3],
873 /* mul r0, r1.yzxw, r2.zxyw
874 mad r0, -r2.yzxw, r1.zxyw, r0
877 inst
[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD
,
881 t_dst_mask(vpi
->DstReg
.WriteMask
),
882 PVS_DST_REG_TEMPORARY
);
883 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // Y
884 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)), // Z
885 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // X
886 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // W
887 t_src_class(src
[0].File
),
888 src
[0].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
889 (src
[0].RelAddr
<< 4);
890 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[1]), t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), // Z
891 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), // X
892 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)), // Y
893 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)), // W
894 t_src_class(src
[1].File
),
895 src
[1].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
896 (src
[1].RelAddr
<< 4);
897 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
900 inst
[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD
,
903 t_dst_index(vp
, &vpi
->DstReg
),
904 t_dst_mask(vpi
->DstReg
.WriteMask
),
905 t_dst_class(vpi
->DstReg
.File
));
906 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[1]), t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)), // Y
907 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), // Z
908 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), // X
909 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)), // W
910 t_src_class(src
[1].File
),
912 Negate
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
913 (src
[1].RelAddr
<< 4);
914 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)), // Z
915 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // X
916 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // Y
917 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // W
918 t_src_class(src
[0].File
),
919 src
[0].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
920 (src
[0].RelAddr
<< 4);
922 PVS_SRC_OPERAND(*u_temp_i
, PVS_SRC_SELECT_X
, PVS_SRC_SELECT_Y
,
923 PVS_SRC_SELECT_Z
, PVS_SRC_SELECT_W
,
924 PVS_SRC_REG_TEMPORARY
, VSF_FLAG_NONE
);
931 static void t_inputs_outputs(struct r300_vertex_program
*vp
)
937 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++) {
938 if (vp
->key
.InputsRead
& (1 << i
))
939 vp
->inputs
[i
] = ++cur_reg
;
945 for (i
= 0; i
< VERT_RESULT_MAX
; i
++)
948 assert(vp
->key
.OutputsWritten
& (1 << VERT_RESULT_HPOS
));
950 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_HPOS
)) {
951 vp
->outputs
[VERT_RESULT_HPOS
] = cur_reg
++;
954 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_PSIZ
)) {
955 vp
->outputs
[VERT_RESULT_PSIZ
] = cur_reg
++;
958 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_COL0
)) {
959 vp
->outputs
[VERT_RESULT_COL0
] = cur_reg
++;
962 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_COL1
)) {
963 vp
->outputs
[VERT_RESULT_COL1
] =
964 vp
->outputs
[VERT_RESULT_COL0
] + 1;
965 cur_reg
= vp
->outputs
[VERT_RESULT_COL1
] + 1;
968 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_BFC0
)) {
969 vp
->outputs
[VERT_RESULT_BFC0
] =
970 vp
->outputs
[VERT_RESULT_COL0
] + 2;
971 cur_reg
= vp
->outputs
[VERT_RESULT_BFC0
] + 2;
974 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
975 vp
->outputs
[VERT_RESULT_BFC1
] =
976 vp
->outputs
[VERT_RESULT_COL0
] + 3;
977 cur_reg
= vp
->outputs
[VERT_RESULT_BFC1
] + 1;
980 for (i
= VERT_RESULT_TEX0
; i
<= VERT_RESULT_TEX7
; i
++) {
981 if (vp
->key
.OutputsWritten
& (1 << i
)) {
982 vp
->outputs
[i
] = cur_reg
++;
986 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_FOGC
)) {
987 vp
->outputs
[VERT_RESULT_FOGC
] = cur_reg
++;
991 static void r300TranslateVertexShader(struct r300_vertex_program
*vp
,
992 struct prog_instruction
*vpi
)
996 unsigned long num_operands
;
997 /* Initial value should be last tmp reg that hw supports.
998 Strangely enough r300 doesnt mind even though these would be out of range.
999 Smart enough to realize that it doesnt need it? */
1000 int u_temp_i
= VSF_MAX_FRAGMENT_TEMPS
- 1;
1001 struct prog_src_register src
[3];
1003 vp
->pos_end
= 0; /* Not supported yet */
1004 vp
->hw_code
.length
= 0;
1005 vp
->translated
= GL_TRUE
;
1006 vp
->error
= GL_FALSE
;
1008 t_inputs_outputs(vp
);
1010 for (inst
= vp
->hw_code
.body
.d
; vpi
->Opcode
!= OPCODE_END
;
1015 if (!valid_dst(vp
, &vpi
->DstReg
)) {
1016 /* redirect result to unused temp */
1017 vpi
->DstReg
.File
= PROGRAM_TEMPORARY
;
1018 vpi
->DstReg
.Index
= u_temp_i
;
1021 num_operands
= _mesa_num_inst_src_regs(vpi
->Opcode
);
1023 /* copy the sources (src) from mesa into a local variable... is this needed? */
1024 for (i
= 0; i
< num_operands
; i
++) {
1025 src
[i
] = vpi
->SrcReg
[i
];
1028 if (num_operands
== 3) { /* TODO: scalars */
1029 if (CMP_SRCS(src
[1], src
[2])
1030 || CMP_SRCS(src
[0], src
[2])) {
1031 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
1036 PVS_DST_REG_TEMPORARY
);
1038 PVS_SRC_OPERAND(t_src_index(vp
, &src
[2]),
1043 t_src_class(src
[2].File
),
1044 VSF_FLAG_NONE
) | (src
[2].
1047 inst
[2] = __CONST(2, SWIZZLE_ZERO
);
1048 inst
[3] = __CONST(2, SWIZZLE_ZERO
);
1051 src
[2].File
= PROGRAM_TEMPORARY
;
1052 src
[2].Index
= u_temp_i
;
1058 if (num_operands
>= 2) {
1059 if (CMP_SRCS(src
[1], src
[0])) {
1060 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
1065 PVS_DST_REG_TEMPORARY
);
1067 PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]),
1072 t_src_class(src
[0].File
),
1073 VSF_FLAG_NONE
) | (src
[0].
1076 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
1077 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
1080 src
[0].File
= PROGRAM_TEMPORARY
;
1081 src
[0].Index
= u_temp_i
;
1087 switch (vpi
->Opcode
) {
1089 inst
= r300TranslateOpcodeABS(vp
, vpi
, inst
, src
);
1092 inst
= r300TranslateOpcodeADD(vp
, vpi
, inst
, src
);
1095 inst
= r300TranslateOpcodeARL(vp
, vpi
, inst
, src
);
1098 inst
= r300TranslateOpcodeDP3(vp
, vpi
, inst
, src
);
1101 inst
= r300TranslateOpcodeDP4(vp
, vpi
, inst
, src
);
1104 inst
= r300TranslateOpcodeDPH(vp
, vpi
, inst
, src
);
1107 inst
= r300TranslateOpcodeDST(vp
, vpi
, inst
, src
);
1110 inst
= r300TranslateOpcodeEX2(vp
, vpi
, inst
, src
);
1113 inst
= r300TranslateOpcodeEXP(vp
, vpi
, inst
, src
);
1116 inst
= r300TranslateOpcodeFLR(vp
, vpi
, inst
, src
, /* FIXME */
1120 inst
= r300TranslateOpcodeFRC(vp
, vpi
, inst
, src
);
1123 inst
= r300TranslateOpcodeLG2(vp
, vpi
, inst
, src
);
1126 inst
= r300TranslateOpcodeLIT(vp
, vpi
, inst
, src
);
1129 inst
= r300TranslateOpcodeLOG(vp
, vpi
, inst
, src
);
1132 inst
= r300TranslateOpcodeMAD(vp
, vpi
, inst
, src
);
1135 inst
= r300TranslateOpcodeMAX(vp
, vpi
, inst
, src
);
1138 inst
= r300TranslateOpcodeMIN(vp
, vpi
, inst
, src
);
1141 inst
= r300TranslateOpcodeMOV(vp
, vpi
, inst
, src
);
1144 inst
= r300TranslateOpcodeMUL(vp
, vpi
, inst
, src
);
1147 inst
= r300TranslateOpcodePOW(vp
, vpi
, inst
, src
);
1150 inst
= r300TranslateOpcodeRCP(vp
, vpi
, inst
, src
);
1153 inst
= r300TranslateOpcodeRSQ(vp
, vpi
, inst
, src
);
1156 inst
= r300TranslateOpcodeSGE(vp
, vpi
, inst
, src
);
1159 inst
= r300TranslateOpcodeSLT(vp
, vpi
, inst
, src
);
1162 inst
= r300TranslateOpcodeSUB(vp
, vpi
, inst
, src
);
1165 inst
= r300TranslateOpcodeSWZ(vp
, vpi
, inst
, src
);
1168 inst
= r300TranslateOpcodeXPD(vp
, vpi
, inst
, src
, /* FIXME */
1172 vp
->error
= GL_TRUE
;
1177 /* Some outputs may be artificially added, to match the inputs
1178 of the fragment program. Blank the outputs here. */
1179 for (i
= 0; i
< VERT_RESULT_MAX
; i
++) {
1180 if (vp
->key
.OutputsAdded
& (1 << i
)) {
1181 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
1187 inst
[1] = __CONST(0, SWIZZLE_ZERO
);
1188 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
1189 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
1194 vp
->hw_code
.length
= (inst
- vp
->hw_code
.body
.d
);
1195 if (vp
->hw_code
.length
>= VSF_MAX_FRAGMENT_LENGTH
) {
1196 vp
->error
= GL_TRUE
;
1200 /* DP4 version seems to trigger some hw peculiarity */
1201 //#define PREFER_DP4
1203 static void position_invariant(struct gl_program
*prog
)
1205 struct prog_instruction
*vpi
;
1206 struct gl_program_parameter_list
*paramList
;
1209 gl_state_index tokens
[STATE_LENGTH
] = { STATE_MVP_MATRIX
, 0, 0, 0, 0 };
1211 /* tokens[4] = matrix modifier */
1213 tokens
[4] = 0; /* not transposed or inverted */
1215 tokens
[4] = STATE_MATRIX_TRANSPOSE
;
1217 paramList
= prog
->Parameters
;
1219 vpi
= _mesa_alloc_instructions(prog
->NumInstructions
+ 4);
1220 _mesa_init_instructions(vpi
, prog
->NumInstructions
+ 4);
1222 for (i
= 0; i
< 4; i
++) {
1224 tokens
[2] = tokens
[3] = i
; /* matrix row[i]..row[i] */
1225 idx
= _mesa_add_state_reference(paramList
, tokens
);
1227 vpi
[i
].Opcode
= OPCODE_DP4
;
1228 vpi
[i
].StringPos
= 0;
1231 vpi
[i
].DstReg
.File
= PROGRAM_OUTPUT
;
1232 vpi
[i
].DstReg
.Index
= VERT_RESULT_HPOS
;
1233 vpi
[i
].DstReg
.WriteMask
= 1 << i
;
1234 vpi
[i
].DstReg
.CondMask
= COND_TR
;
1236 vpi
[i
].SrcReg
[0].File
= PROGRAM_STATE_VAR
;
1237 vpi
[i
].SrcReg
[0].Index
= idx
;
1238 vpi
[i
].SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1240 vpi
[i
].SrcReg
[1].File
= PROGRAM_INPUT
;
1241 vpi
[i
].SrcReg
[1].Index
= VERT_ATTRIB_POS
;
1242 vpi
[i
].SrcReg
[1].Swizzle
= SWIZZLE_XYZW
;
1245 vpi
[i
].Opcode
= OPCODE_MUL
;
1247 vpi
[i
].Opcode
= OPCODE_MAD
;
1252 vpi
[i
].DstReg
.File
= PROGRAM_OUTPUT
;
1254 vpi
[i
].DstReg
.File
= PROGRAM_TEMPORARY
;
1255 vpi
[i
].DstReg
.Index
= 0;
1256 vpi
[i
].DstReg
.WriteMask
= 0xf;
1257 vpi
[i
].DstReg
.CondMask
= COND_TR
;
1259 vpi
[i
].SrcReg
[0].File
= PROGRAM_STATE_VAR
;
1260 vpi
[i
].SrcReg
[0].Index
= idx
;
1261 vpi
[i
].SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1263 vpi
[i
].SrcReg
[1].File
= PROGRAM_INPUT
;
1264 vpi
[i
].SrcReg
[1].Index
= VERT_ATTRIB_POS
;
1265 vpi
[i
].SrcReg
[1].Swizzle
= MAKE_SWIZZLE4(i
, i
, i
, i
);
1268 vpi
[i
].SrcReg
[2].File
= PROGRAM_TEMPORARY
;
1269 vpi
[i
].SrcReg
[2].Index
= 0;
1270 vpi
[i
].SrcReg
[2].Swizzle
= SWIZZLE_XYZW
;
1275 _mesa_copy_instructions(&vpi
[i
], prog
->Instructions
,
1276 prog
->NumInstructions
);
1278 free(prog
->Instructions
);
1280 prog
->Instructions
= vpi
;
1282 prog
->NumInstructions
+= 4;
1283 vpi
= &prog
->Instructions
[prog
->NumInstructions
- 1];
1285 assert(vpi
->Opcode
== OPCODE_END
);
1288 static void insert_wpos(struct r300_vertex_program
*vp
, struct gl_program
*prog
,
1291 struct prog_instruction
*vpi
;
1292 struct prog_instruction
*vpi_insert
;
1295 vpi
= _mesa_alloc_instructions(prog
->NumInstructions
+ 2);
1296 _mesa_init_instructions(vpi
, prog
->NumInstructions
+ 2);
1298 _mesa_copy_instructions(vpi
, prog
->Instructions
,
1299 prog
->NumInstructions
- 1);
1301 _mesa_copy_instructions(&vpi
[prog
->NumInstructions
+ 1],
1302 &prog
->Instructions
[prog
->NumInstructions
- 1],
1304 vpi_insert
= &vpi
[prog
->NumInstructions
- 1];
1306 vpi_insert
[i
].Opcode
= OPCODE_MOV
;
1308 vpi_insert
[i
].DstReg
.File
= PROGRAM_OUTPUT
;
1309 vpi_insert
[i
].DstReg
.Index
= VERT_RESULT_HPOS
;
1310 vpi_insert
[i
].DstReg
.WriteMask
= WRITEMASK_XYZW
;
1311 vpi_insert
[i
].DstReg
.CondMask
= COND_TR
;
1313 vpi_insert
[i
].SrcReg
[0].File
= PROGRAM_TEMPORARY
;
1314 vpi_insert
[i
].SrcReg
[0].Index
= temp_index
;
1315 vpi_insert
[i
].SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1318 vpi_insert
[i
].Opcode
= OPCODE_MOV
;
1320 vpi_insert
[i
].DstReg
.File
= PROGRAM_OUTPUT
;
1321 vpi_insert
[i
].DstReg
.Index
= VERT_RESULT_TEX0
+ vp
->wpos_idx
;
1322 vpi_insert
[i
].DstReg
.WriteMask
= WRITEMASK_XYZW
;
1323 vpi_insert
[i
].DstReg
.CondMask
= COND_TR
;
1325 vpi_insert
[i
].SrcReg
[0].File
= PROGRAM_TEMPORARY
;
1326 vpi_insert
[i
].SrcReg
[0].Index
= temp_index
;
1327 vpi_insert
[i
].SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1330 free(prog
->Instructions
);
1332 prog
->Instructions
= vpi
;
1334 prog
->NumInstructions
+= i
;
1335 vpi
= &prog
->Instructions
[prog
->NumInstructions
- 1];
1337 assert(vpi
->Opcode
== OPCODE_END
);
1340 static void pos_as_texcoord(struct r300_vertex_program
*vp
,
1341 struct gl_program
*prog
)
1343 struct prog_instruction
*vpi
;
1344 GLuint tempregi
= prog
->NumTemporaries
;
1345 /* should do something else if no temps left... */
1346 prog
->NumTemporaries
++;
1348 for (vpi
= prog
->Instructions
; vpi
->Opcode
!= OPCODE_END
; vpi
++) {
1349 if (vpi
->DstReg
.File
== PROGRAM_OUTPUT
1350 && vpi
->DstReg
.Index
== VERT_RESULT_HPOS
) {
1351 vpi
->DstReg
.File
= PROGRAM_TEMPORARY
;
1352 vpi
->DstReg
.Index
= tempregi
;
1355 insert_wpos(vp
, prog
, tempregi
);
1358 static struct r300_vertex_program
*build_program(struct r300_vertex_program_key
1359 *wanted_key
, struct gl_vertex_program
1360 *mesa_vp
, GLint wpos_idx
)
1362 struct r300_vertex_program
*vp
;
1364 vp
= _mesa_calloc(sizeof(*vp
));
1365 _mesa_memcpy(&vp
->key
, wanted_key
, sizeof(vp
->key
));
1366 vp
->wpos_idx
= wpos_idx
;
1368 if (mesa_vp
->IsPositionInvariant
) {
1369 position_invariant(&mesa_vp
->Base
);
1372 if (wpos_idx
> -1) {
1373 pos_as_texcoord(vp
, &mesa_vp
->Base
);
1376 assert(mesa_vp
->Base
.NumInstructions
);
1377 vp
->num_temporaries
= mesa_vp
->Base
.NumTemporaries
;
1378 r300TranslateVertexShader(vp
, mesa_vp
->Base
.Instructions
);
1383 static void add_outputs(struct r300_vertex_program_key
*key
, GLint vert
)
1385 if (key
->OutputsWritten
& (1 << vert
))
1388 key
->OutputsWritten
|= 1 << vert
;
1389 key
->OutputsAdded
|= 1 << vert
;
1392 void r300SelectVertexShader(r300ContextPtr r300
)
1394 GLcontext
*ctx
= ctx
= r300
->radeon
.glCtx
;
1396 struct r300_vertex_program_key wanted_key
= { 0 };
1398 struct r300_vertex_program_cont
*vpc
;
1399 struct r300_vertex_program
*vp
;
1402 vpc
= (struct r300_vertex_program_cont
*)ctx
->VertexProgram
._Current
;
1403 wanted_key
.InputsRead
= vpc
->mesa_program
.Base
.InputsRead
;
1404 wanted_key
.OutputsWritten
= vpc
->mesa_program
.Base
.OutputsWritten
;
1405 InputsRead
= ctx
->FragmentProgram
._Current
->Base
.InputsRead
;
1408 if (InputsRead
& FRAG_BIT_WPOS
) {
1409 for (i
= 0; i
< ctx
->Const
.MaxTextureUnits
; i
++)
1410 if (!(InputsRead
& (FRAG_BIT_TEX0
<< i
)))
1413 if (i
== ctx
->Const
.MaxTextureUnits
) {
1414 fprintf(stderr
, "\tno free texcoord found\n");
1418 wanted_key
.OutputsWritten
|= 1 << (VERT_RESULT_TEX0
+ i
);
1422 add_outputs(&wanted_key
, VERT_RESULT_HPOS
);
1424 if (InputsRead
& FRAG_BIT_COL0
) {
1425 add_outputs(&wanted_key
, VERT_RESULT_COL0
);
1428 if (InputsRead
& FRAG_BIT_COL1
) {
1429 add_outputs(&wanted_key
, VERT_RESULT_COL1
);
1432 for (i
= 0; i
< ctx
->Const
.MaxTextureUnits
; i
++) {
1433 if (InputsRead
& (FRAG_BIT_TEX0
<< i
)) {
1434 add_outputs(&wanted_key
, VERT_RESULT_TEX0
+ i
);
1438 if (vpc
->mesa_program
.IsPositionInvariant
) {
1439 /* we wan't position don't we ? */
1440 wanted_key
.InputsRead
|= (1 << VERT_ATTRIB_POS
);
1443 for (vp
= vpc
->progs
; vp
; vp
= vp
->next
)
1444 if (_mesa_memcmp(&vp
->key
, &wanted_key
, sizeof(wanted_key
))
1446 r300
->selected_vp
= vp
;
1449 //_mesa_print_program(&vpc->mesa_program.Base);
1451 vp
= build_program(&wanted_key
, &vpc
->mesa_program
, wpos_idx
);
1452 vp
->next
= vpc
->progs
;
1454 r300
->selected_vp
= vp
;
1457 #define bump_vpu_count(ptr, new_count) do { \
1458 drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \
1459 int _nc=(new_count)/4; \
1460 assert(_nc < 256); \
1461 if(_nc>_p->vpu.count)_p->vpu.count=_nc; \
1464 static void r300EmitVertexProgram(r300ContextPtr r300
, int dest
, struct r300_vertex_shader_hw_code
*code
)
1468 assert((code
->length
> 0) && (code
->length
% 4 == 0));
1470 switch ((dest
>> 8) & 0xf) {
1472 R300_STATECHANGE(r300
, vpi
);
1473 for (i
= 0; i
< code
->length
; i
++)
1474 r300
->hw
.vpi
.cmd
[R300_VPI_INSTR_0
+ i
+ 4 * (dest
& 0xff)] = (code
->body
.d
[i
]);
1475 bump_vpu_count(r300
->hw
.vpi
.cmd
, code
->length
+ 4 * (dest
& 0xff));
1478 R300_STATECHANGE(r300
, vpp
);
1479 for (i
= 0; i
< code
->length
; i
++)
1480 r300
->hw
.vpp
.cmd
[R300_VPP_PARAM_0
+ i
+ 4 * (dest
& 0xff)] = (code
->body
.d
[i
]);
1481 bump_vpu_count(r300
->hw
.vpp
.cmd
, code
->length
+ 4 * (dest
& 0xff));
1484 R300_STATECHANGE(r300
, vps
);
1485 for (i
= 0; i
< code
->length
; i
++)
1486 r300
->hw
.vps
.cmd
[1 + i
+ 4 * (dest
& 0xff)] = (code
->body
.d
[i
]);
1487 bump_vpu_count(r300
->hw
.vps
.cmd
, code
->length
+ 4 * (dest
& 0xff));
1490 fprintf(stderr
, "%s:%s don't know how to handle dest %04x\n", __FILE__
, __FUNCTION__
, dest
);
1495 void r300SetupSwtclVertexProgram(r300ContextPtr rmesa
)
1497 struct r300_vertex_shader_hw_code
*hw_code
;
1502 int param_count
= 0;
1503 int program_end
= 0;
1505 /* Reset state, in case we don't use something */
1506 ((drm_r300_cmd_header_t
*) rmesa
->hw
.vpp
.cmd
)->vpu
.count
= 0;
1507 ((drm_r300_cmd_header_t
*) rmesa
->hw
.vpi
.cmd
)->vpu
.count
= 0;
1508 ((drm_r300_cmd_header_t
*) rmesa
->hw
.vps
.cmd
)->vpu
.count
= 0;
1510 hw_code
= _mesa_malloc(sizeof(struct r300_vertex_shader_hw_code
));
1512 for (i
= VERT_ATTRIB_POS
; i
< VERT_ATTRIB_MAX
; i
++) {
1513 if (rmesa
->swtcl
.sw_tcl_inputs
[i
] != -1) {
1514 hw_code
->body
.d
[program_end
+ 0] = PVS_OP_DST_OPERAND(VE_MULTIPLY
, GL_FALSE
, GL_FALSE
, o_reg
++, VSF_FLAG_ALL
, PVS_DST_REG_OUT
);
1515 hw_code
->body
.d
[program_end
+ 1] = PVS_SRC_OPERAND(rmesa
->swtcl
.sw_tcl_inputs
[i
], PVS_SRC_SELECT_X
,
1516 PVS_SRC_SELECT_Y
, PVS_SRC_SELECT_Z
, PVS_SRC_SELECT_W
, PVS_SRC_REG_INPUT
, VSF_FLAG_NONE
);
1517 hw_code
->body
.d
[program_end
+ 2] = PVS_SRC_OPERAND(rmesa
->swtcl
.sw_tcl_inputs
[i
], PVS_SRC_SELECT_FORCE_1
, PVS_SRC_SELECT_FORCE_1
,
1518 PVS_SRC_SELECT_FORCE_1
, PVS_SRC_SELECT_FORCE_1
, PVS_SRC_REG_INPUT
, VSF_FLAG_NONE
);
1519 hw_code
->body
.d
[program_end
+ 3] = PVS_SRC_OPERAND(rmesa
->swtcl
.sw_tcl_inputs
[i
], PVS_SRC_SELECT_FORCE_1
, PVS_SRC_SELECT_FORCE_1
,
1520 PVS_SRC_SELECT_FORCE_1
, PVS_SRC_SELECT_FORCE_1
, PVS_SRC_REG_INPUT
, VSF_FLAG_NONE
);
1526 hw_code
->length
= program_end
;
1528 r300EmitVertexProgram(rmesa
, R300_PVS_CODE_START
, hw_code
);
1529 inst_count
= (hw_code
->length
/ 4) - 1;
1531 r300VapCntl(rmesa
, i_reg
, o_reg
, 0);
1533 R300_STATECHANGE(rmesa
, pvs
);
1534 rmesa
->hw
.pvs
.cmd
[R300_PVS_CNTL_1
] = (0 << R300_PVS_FIRST_INST_SHIFT
) | (inst_count
<< R300_PVS_XYZW_VALID_INST_SHIFT
) |
1535 (inst_count
<< R300_PVS_LAST_INST_SHIFT
);
1537 rmesa
->hw
.pvs
.cmd
[R300_PVS_CNTL_2
] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT
) | (param_count
<< R300_PVS_MAX_CONST_ADDR_SHIFT
);
1538 rmesa
->hw
.pvs
.cmd
[R300_PVS_CNTL_3
] = (inst_count
<< R300_PVS_LAST_VTX_SRC_INST_SHIFT
);
1540 _mesa_free(hw_code
);
1543 void r300SetupVertexProgram(r300ContextPtr rmesa
)
1545 GLcontext
*ctx
= rmesa
->radeon
.glCtx
;
1546 struct r300_vertex_program
*prog
= rmesa
->selected_vp
;
1548 int param_count
= 0;
1550 /* Reset state, in case we don't use something */
1551 ((drm_r300_cmd_header_t
*) rmesa
->hw
.vpp
.cmd
)->vpu
.count
= 0;
1552 ((drm_r300_cmd_header_t
*) rmesa
->hw
.vpi
.cmd
)->vpu
.count
= 0;
1553 ((drm_r300_cmd_header_t
*) rmesa
->hw
.vps
.cmd
)->vpu
.count
= 0;
1555 R300_STATECHANGE(rmesa
, vpp
);
1556 param_count
= r300VertexProgUpdateParams(ctx
,
1557 (struct r300_vertex_program_cont
*)
1558 ctx
->VertexProgram
._Current
,
1559 (float *)&rmesa
->hw
.vpp
.
1560 cmd
[R300_VPP_PARAM_0
]);
1561 bump_vpu_count(rmesa
->hw
.vpp
.cmd
, param_count
);
1564 r300EmitVertexProgram(rmesa
, R300_PVS_CODE_START
, &(prog
->hw_code
));
1565 inst_count
= (prog
->hw_code
.length
/ 4) - 1;
1567 r300VapCntl(rmesa
, _mesa_bitcount(prog
->key
.InputsRead
),
1568 _mesa_bitcount(prog
->key
.OutputsWritten
), prog
->num_temporaries
);
1570 R300_STATECHANGE(rmesa
, pvs
);
1571 rmesa
->hw
.pvs
.cmd
[R300_PVS_CNTL_1
] = (0 << R300_PVS_FIRST_INST_SHIFT
) | (inst_count
<< R300_PVS_XYZW_VALID_INST_SHIFT
) |
1572 (inst_count
<< R300_PVS_LAST_INST_SHIFT
);
1574 rmesa
->hw
.pvs
.cmd
[R300_PVS_CNTL_2
] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT
) | (param_count
<< R300_PVS_MAX_CONST_ADDR_SHIFT
);
1575 rmesa
->hw
.pvs
.cmd
[R300_PVS_CNTL_3
] = (inst_count
<< R300_PVS_LAST_VTX_SRC_INST_SHIFT
);