1 /**************************************************************************
3 Copyright (C) 2005 Aapo Tahkola <aet@rasterburn.org>
4 Copyright (C) 2008 Oliver McFadden <z3ro.geek@gmail.com>
8 Permission is hereby granted, free of charge, to any person obtaining a
9 copy of this software and associated documentation files (the "Software"),
10 to deal in the Software without restriction, including without limitation
11 on the rights to use, copy, modify, merge, publish, distribute, sub
12 license, and/or sell copies of the Software, and to permit persons to whom
13 the Software is furnished to do so, subject to the following conditions:
15 The above copyright notice and this permission notice (including the next
16 paragraph) shall be included in all copies or substantial portions of the
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
22 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
23 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
24 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
25 USE OR OTHER DEALINGS IN THE SOFTWARE.
27 **************************************************************************/
29 /* Radeon R5xx Acceleration, Revision 1.2 */
31 #include "main/glheader.h"
32 #include "main/macros.h"
33 #include "main/enums.h"
34 #include "shader/program.h"
35 #include "shader/prog_instruction.h"
36 #include "shader/prog_parameter.h"
37 #include "shader/prog_print.h"
38 #include "shader/prog_statevars.h"
41 #include "r300_context.h"
42 #include "r300_state.h"
44 /* TODO: Get rid of t_src_class call */
45 #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
46 ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \
47 t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \
48 (t_src_class(a.File) == PVS_SRC_REG_INPUT && \
49 t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \
52 * Take an already-setup and valid source then swizzle it appropriately to
53 * obtain a constant ZERO or ONE source.
55 #define __CONST(x, y) \
56 (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \
61 t_src_class(src[x].File), \
62 VSF_FLAG_NONE) | (src[x].RelAddr << 4))
64 #define FREE_TEMPS() \
66 int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; \
67 if((vp->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { \
68 WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_used); \
69 vp->error = GL_TRUE; \
71 u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
74 int r300VertexProgUpdateParams(GLcontext
* ctx
,
75 struct r300_vertex_program_cont
*vp
, float *dst
)
78 struct gl_vertex_program
*mesa_vp
= &vp
->mesa_program
;
80 struct gl_program_parameter_list
*paramList
;
82 if (mesa_vp
->IsNVProgram
) {
83 _mesa_load_tracked_matrices(ctx
);
85 for (pi
= 0; pi
< MAX_NV_VERTEX_PROGRAM_PARAMS
; pi
++) {
86 *dst
++ = ctx
->VertexProgram
.Parameters
[pi
][0];
87 *dst
++ = ctx
->VertexProgram
.Parameters
[pi
][1];
88 *dst
++ = ctx
->VertexProgram
.Parameters
[pi
][2];
89 *dst
++ = ctx
->VertexProgram
.Parameters
[pi
][3];
94 assert(mesa_vp
->Base
.Parameters
);
95 _mesa_load_state_parameters(ctx
, mesa_vp
->Base
.Parameters
);
97 if (mesa_vp
->Base
.Parameters
->NumParameters
* 4 >
98 VSF_MAX_FRAGMENT_LENGTH
) {
99 fprintf(stderr
, "%s:Params exhausted\n", __FUNCTION__
);
103 paramList
= mesa_vp
->Base
.Parameters
;
104 for (pi
= 0; pi
< paramList
->NumParameters
; pi
++) {
105 switch (paramList
->Parameters
[pi
].Type
) {
106 case PROGRAM_STATE_VAR
:
107 case PROGRAM_NAMED_PARAM
:
108 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
109 case PROGRAM_CONSTANT
:
110 *dst
++ = paramList
->ParameterValues
[pi
][0];
111 *dst
++ = paramList
->ParameterValues
[pi
][1];
112 *dst
++ = paramList
->ParameterValues
[pi
][2];
113 *dst
++ = paramList
->ParameterValues
[pi
][3];
116 _mesa_problem(NULL
, "Bad param type in %s",
125 static unsigned long t_dst_mask(GLuint mask
)
127 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
128 return mask
& VSF_FLAG_ALL
;
131 static unsigned long t_dst_class(gl_register_file file
)
135 case PROGRAM_TEMPORARY
:
136 return PVS_DST_REG_TEMPORARY
;
138 return PVS_DST_REG_OUT
;
139 case PROGRAM_ADDRESS
:
140 return PVS_DST_REG_A0
;
143 case PROGRAM_LOCAL_PARAM:
144 case PROGRAM_ENV_PARAM:
145 case PROGRAM_NAMED_PARAM:
146 case PROGRAM_STATE_VAR:
147 case PROGRAM_WRITE_ONLY:
148 case PROGRAM_ADDRESS:
151 fprintf(stderr
, "problem in %s", __FUNCTION__
);
157 static unsigned long t_dst_index(struct r300_vertex_program
*vp
,
158 struct prog_dst_register
*dst
)
160 if (dst
->File
== PROGRAM_OUTPUT
)
161 return vp
->outputs
[dst
->Index
];
166 static unsigned long t_src_class(gl_register_file file
)
169 case PROGRAM_TEMPORARY
:
170 return PVS_SRC_REG_TEMPORARY
;
172 return PVS_SRC_REG_INPUT
;
173 case PROGRAM_LOCAL_PARAM
:
174 case PROGRAM_ENV_PARAM
:
175 case PROGRAM_NAMED_PARAM
:
176 case PROGRAM_CONSTANT
:
177 case PROGRAM_STATE_VAR
:
178 return PVS_SRC_REG_CONSTANT
;
181 case PROGRAM_WRITE_ONLY:
182 case PROGRAM_ADDRESS:
185 fprintf(stderr
, "problem in %s", __FUNCTION__
);
191 static INLINE
unsigned long t_swizzle(GLubyte swizzle
)
193 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
198 static void vp_dump_inputs(struct r300_vertex_program
*vp
, char *caller
)
203 fprintf(stderr
, "vp null in call to %s from %s\n", __FUNCTION__
,
208 fprintf(stderr
, "%s:<", caller
);
209 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++)
210 fprintf(stderr
, "%d ", vp
->inputs
[i
]);
211 fprintf(stderr
, ">\n");
216 static unsigned long t_src_index(struct r300_vertex_program
*vp
,
217 struct prog_src_register
*src
)
219 if (src
->File
== PROGRAM_INPUT
) {
220 assert(vp
->inputs
[src
->Index
] != -1);
221 return vp
->inputs
[src
->Index
];
223 if (src
->Index
< 0) {
225 "negative offsets for indirect addressing do not work.\n");
232 /* these two functions should probably be merged... */
234 static unsigned long t_src(struct r300_vertex_program
*vp
,
235 struct prog_src_register
*src
)
237 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
238 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
240 return PVS_SRC_OPERAND(t_src_index(vp
, src
),
241 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
242 t_swizzle(GET_SWZ(src
->Swizzle
, 1)),
243 t_swizzle(GET_SWZ(src
->Swizzle
, 2)),
244 t_swizzle(GET_SWZ(src
->Swizzle
, 3)),
245 t_src_class(src
->File
),
246 src
->Negate
) | (src
->RelAddr
<< 4);
249 static unsigned long t_src_scalar(struct r300_vertex_program
*vp
,
250 struct prog_src_register
*src
)
252 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
253 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
255 return PVS_SRC_OPERAND(t_src_index(vp
, src
),
256 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
257 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
258 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
259 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
260 t_src_class(src
->File
),
261 src
->Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
265 static GLboolean
valid_dst(struct r300_vertex_program
*vp
,
266 struct prog_dst_register
*dst
)
268 if (dst
->File
== PROGRAM_OUTPUT
&& vp
->outputs
[dst
->Index
] == -1) {
270 } else if (dst
->File
== PROGRAM_ADDRESS
) {
271 assert(dst
->Index
== 0);
277 static GLuint
*r300TranslateOpcodeABS(struct r300_vertex_program
*vp
,
278 struct prog_instruction
*vpi
,
280 struct prog_src_register src
[3])
282 //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
284 inst
[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM
,
287 t_dst_index(vp
, &vpi
->DstReg
),
288 t_dst_mask(vpi
->DstReg
.WriteMask
),
289 t_dst_class(vpi
->DstReg
.File
));
290 inst
[1] = t_src(vp
, &src
[0]);
291 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]),
292 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
293 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
294 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
295 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)),
296 t_src_class(src
[0].File
),
298 Negate
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
299 (src
[0].RelAddr
<< 4);
305 static GLuint
*r300TranslateOpcodeADD(struct r300_vertex_program
*vp
,
306 struct prog_instruction
*vpi
,
308 struct prog_src_register src
[3])
310 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
313 t_dst_index(vp
, &vpi
->DstReg
),
314 t_dst_mask(vpi
->DstReg
.WriteMask
),
315 t_dst_class(vpi
->DstReg
.File
));
316 inst
[1] = t_src(vp
, &src
[0]);
317 inst
[2] = t_src(vp
, &src
[1]);
318 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
323 static GLuint
*r300TranslateOpcodeARL(struct r300_vertex_program
*vp
,
324 struct prog_instruction
*vpi
,
326 struct prog_src_register src
[3])
328 inst
[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX
,
331 t_dst_index(vp
, &vpi
->DstReg
),
332 t_dst_mask(vpi
->DstReg
.WriteMask
),
333 t_dst_class(vpi
->DstReg
.File
));
334 inst
[1] = t_src(vp
, &src
[0]);
335 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
336 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
341 static GLuint
*r300TranslateOpcodeDP3(struct r300_vertex_program
*vp
,
342 struct prog_instruction
*vpi
,
344 struct prog_src_register src
[3])
346 //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
348 inst
[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT
,
351 t_dst_index(vp
, &vpi
->DstReg
),
352 t_dst_mask(vpi
->DstReg
.WriteMask
),
353 t_dst_class(vpi
->DstReg
.File
));
354 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]),
355 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
356 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
357 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
359 t_src_class(src
[0].File
),
360 src
[0].Negate
? VSF_FLAG_XYZ
: VSF_FLAG_NONE
) |
361 (src
[0].RelAddr
<< 4);
363 PVS_SRC_OPERAND(t_src_index(vp
, &src
[1]),
364 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
365 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)),
366 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), SWIZZLE_ZERO
,
367 t_src_class(src
[1].File
),
368 src
[1].Negate
? VSF_FLAG_XYZ
: VSF_FLAG_NONE
) |
369 (src
[1].RelAddr
<< 4);
370 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
375 static GLuint
*r300TranslateOpcodeDP4(struct r300_vertex_program
*vp
,
376 struct prog_instruction
*vpi
,
378 struct prog_src_register src
[3])
380 inst
[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT
,
383 t_dst_index(vp
, &vpi
->DstReg
),
384 t_dst_mask(vpi
->DstReg
.WriteMask
),
385 t_dst_class(vpi
->DstReg
.File
));
386 inst
[1] = t_src(vp
, &src
[0]);
387 inst
[2] = t_src(vp
, &src
[1]);
388 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
393 static GLuint
*r300TranslateOpcodeDPH(struct r300_vertex_program
*vp
,
394 struct prog_instruction
*vpi
,
396 struct prog_src_register src
[3])
398 //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
399 inst
[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT
,
402 t_dst_index(vp
, &vpi
->DstReg
),
403 t_dst_mask(vpi
->DstReg
.WriteMask
),
404 t_dst_class(vpi
->DstReg
.File
));
405 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]),
406 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
407 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
408 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
409 PVS_SRC_SELECT_FORCE_1
,
410 t_src_class(src
[0].File
),
411 src
[0].Negate
? VSF_FLAG_XYZ
: VSF_FLAG_NONE
) |
412 (src
[0].RelAddr
<< 4);
413 inst
[2] = t_src(vp
, &src
[1]);
414 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
419 static GLuint
*r300TranslateOpcodeDST(struct r300_vertex_program
*vp
,
420 struct prog_instruction
*vpi
,
422 struct prog_src_register src
[3])
424 inst
[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR
,
427 t_dst_index(vp
, &vpi
->DstReg
),
428 t_dst_mask(vpi
->DstReg
.WriteMask
),
429 t_dst_class(vpi
->DstReg
.File
));
430 inst
[1] = t_src(vp
, &src
[0]);
431 inst
[2] = t_src(vp
, &src
[1]);
432 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
437 static GLuint
*r300TranslateOpcodeEX2(struct r300_vertex_program
*vp
,
438 struct prog_instruction
*vpi
,
440 struct prog_src_register src
[3])
442 inst
[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX
,
445 t_dst_index(vp
, &vpi
->DstReg
),
446 t_dst_mask(vpi
->DstReg
.WriteMask
),
447 t_dst_class(vpi
->DstReg
.File
));
448 inst
[1] = t_src_scalar(vp
, &src
[0]);
449 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
450 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
455 static GLuint
*r300TranslateOpcodeEXP(struct r300_vertex_program
*vp
,
456 struct prog_instruction
*vpi
,
458 struct prog_src_register src
[3])
460 inst
[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX
,
463 t_dst_index(vp
, &vpi
->DstReg
),
464 t_dst_mask(vpi
->DstReg
.WriteMask
),
465 t_dst_class(vpi
->DstReg
.File
));
466 inst
[1] = t_src_scalar(vp
, &src
[0]);
467 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
468 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
473 static GLuint
*r300TranslateOpcodeFLR(struct r300_vertex_program
*vp
,
474 struct prog_instruction
*vpi
,
476 struct prog_src_register src
[3],
479 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
480 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
482 inst
[0] = PVS_OP_DST_OPERAND(VE_FRACTION
,
486 t_dst_mask(vpi
->DstReg
.WriteMask
),
487 PVS_DST_REG_TEMPORARY
);
488 inst
[1] = t_src(vp
, &src
[0]);
489 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
490 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
493 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
496 t_dst_index(vp
, &vpi
->DstReg
),
497 t_dst_mask(vpi
->DstReg
.WriteMask
),
498 t_dst_class(vpi
->DstReg
.File
));
499 inst
[1] = t_src(vp
, &src
[0]);
500 inst
[2] = PVS_SRC_OPERAND(*u_temp_i
,
504 PVS_SRC_SELECT_W
, PVS_SRC_REG_TEMPORARY
,
505 /* Not 100% sure about this */
507 Negate
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
509 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
515 static GLuint
*r300TranslateOpcodeFRC(struct r300_vertex_program
*vp
,
516 struct prog_instruction
*vpi
,
518 struct prog_src_register src
[3])
520 inst
[0] = PVS_OP_DST_OPERAND(VE_FRACTION
,
523 t_dst_index(vp
, &vpi
->DstReg
),
524 t_dst_mask(vpi
->DstReg
.WriteMask
),
525 t_dst_class(vpi
->DstReg
.File
));
526 inst
[1] = t_src(vp
, &src
[0]);
527 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
528 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
533 static GLuint
*r300TranslateOpcodeLG2(struct r300_vertex_program
*vp
,
534 struct prog_instruction
*vpi
,
536 struct prog_src_register src
[3])
538 // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
540 inst
[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX
,
543 t_dst_index(vp
, &vpi
->DstReg
),
544 t_dst_mask(vpi
->DstReg
.WriteMask
),
545 t_dst_class(vpi
->DstReg
.File
));
546 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]),
547 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
548 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
549 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
550 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
551 t_src_class(src
[0].File
),
552 src
[0].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
553 (src
[0].RelAddr
<< 4);
554 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
555 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
560 static GLuint
*r300TranslateOpcodeLIT(struct r300_vertex_program
*vp
,
561 struct prog_instruction
*vpi
,
563 struct prog_src_register src
[3])
565 //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
567 inst
[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX
,
570 t_dst_index(vp
, &vpi
->DstReg
),
571 t_dst_mask(vpi
->DstReg
.WriteMask
),
572 t_dst_class(vpi
->DstReg
.File
));
573 /* NOTE: Users swizzling might not work. */
574 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // X
575 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // W
576 PVS_SRC_SELECT_FORCE_0
, // Z
577 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // Y
578 t_src_class(src
[0].File
),
579 src
[0].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
580 (src
[0].RelAddr
<< 4);
581 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // Y
582 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // W
583 PVS_SRC_SELECT_FORCE_0
, // Z
584 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // X
585 t_src_class(src
[0].File
),
586 src
[0].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
587 (src
[0].RelAddr
<< 4);
588 inst
[3] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // Y
589 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // X
590 PVS_SRC_SELECT_FORCE_0
, // Z
591 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // W
592 t_src_class(src
[0].File
),
593 src
[0].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
594 (src
[0].RelAddr
<< 4);
599 static GLuint
*r300TranslateOpcodeLOG(struct r300_vertex_program
*vp
,
600 struct prog_instruction
*vpi
,
602 struct prog_src_register src
[3])
604 inst
[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX
,
607 t_dst_index(vp
, &vpi
->DstReg
),
608 t_dst_mask(vpi
->DstReg
.WriteMask
),
609 t_dst_class(vpi
->DstReg
.File
));
610 inst
[1] = t_src_scalar(vp
, &src
[0]);
611 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
612 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
617 static GLuint
*r300TranslateOpcodeMAD(struct r300_vertex_program
*vp
,
618 struct prog_instruction
*vpi
,
620 struct prog_src_register src
[3])
622 inst
[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD
,
625 t_dst_index(vp
, &vpi
->DstReg
),
626 t_dst_mask(vpi
->DstReg
.WriteMask
),
627 t_dst_class(vpi
->DstReg
.File
));
628 inst
[1] = t_src(vp
, &src
[0]);
629 inst
[2] = t_src(vp
, &src
[1]);
630 inst
[3] = t_src(vp
, &src
[2]);
635 static GLuint
*r300TranslateOpcodeMAX(struct r300_vertex_program
*vp
,
636 struct prog_instruction
*vpi
,
638 struct prog_src_register src
[3])
640 inst
[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM
,
643 t_dst_index(vp
, &vpi
->DstReg
),
644 t_dst_mask(vpi
->DstReg
.WriteMask
),
645 t_dst_class(vpi
->DstReg
.File
));
646 inst
[1] = t_src(vp
, &src
[0]);
647 inst
[2] = t_src(vp
, &src
[1]);
648 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
653 static GLuint
*r300TranslateOpcodeMIN(struct r300_vertex_program
*vp
,
654 struct prog_instruction
*vpi
,
656 struct prog_src_register src
[3])
658 inst
[0] = PVS_OP_DST_OPERAND(VE_MINIMUM
,
661 t_dst_index(vp
, &vpi
->DstReg
),
662 t_dst_mask(vpi
->DstReg
.WriteMask
),
663 t_dst_class(vpi
->DstReg
.File
));
664 inst
[1] = t_src(vp
, &src
[0]);
665 inst
[2] = t_src(vp
, &src
[1]);
666 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
671 static GLuint
*r300TranslateOpcodeMOV(struct r300_vertex_program
*vp
,
672 struct prog_instruction
*vpi
,
674 struct prog_src_register src
[3])
676 //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
678 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
681 t_dst_index(vp
, &vpi
->DstReg
),
682 t_dst_mask(vpi
->DstReg
.WriteMask
),
683 t_dst_class(vpi
->DstReg
.File
));
684 inst
[1] = t_src(vp
, &src
[0]);
685 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
686 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
691 static GLuint
*r300TranslateOpcodeMUL(struct r300_vertex_program
*vp
,
692 struct prog_instruction
*vpi
,
694 struct prog_src_register src
[3])
696 inst
[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY
,
699 t_dst_index(vp
, &vpi
->DstReg
),
700 t_dst_mask(vpi
->DstReg
.WriteMask
),
701 t_dst_class(vpi
->DstReg
.File
));
702 inst
[1] = t_src(vp
, &src
[0]);
703 inst
[2] = t_src(vp
, &src
[1]);
704 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
709 static GLuint
*r300TranslateOpcodePOW(struct r300_vertex_program
*vp
,
710 struct prog_instruction
*vpi
,
712 struct prog_src_register src
[3])
714 inst
[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF
,
717 t_dst_index(vp
, &vpi
->DstReg
),
718 t_dst_mask(vpi
->DstReg
.WriteMask
),
719 t_dst_class(vpi
->DstReg
.File
));
720 inst
[1] = t_src_scalar(vp
, &src
[0]);
721 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
722 inst
[3] = t_src_scalar(vp
, &src
[1]);
727 static GLuint
*r300TranslateOpcodeRCP(struct r300_vertex_program
*vp
,
728 struct prog_instruction
*vpi
,
730 struct prog_src_register src
[3])
732 inst
[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX
,
735 t_dst_index(vp
, &vpi
->DstReg
),
736 t_dst_mask(vpi
->DstReg
.WriteMask
),
737 t_dst_class(vpi
->DstReg
.File
));
738 inst
[1] = t_src_scalar(vp
, &src
[0]);
739 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
740 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
745 static GLuint
*r300TranslateOpcodeRSQ(struct r300_vertex_program
*vp
,
746 struct prog_instruction
*vpi
,
748 struct prog_src_register src
[3])
750 inst
[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX
,
753 t_dst_index(vp
, &vpi
->DstReg
),
754 t_dst_mask(vpi
->DstReg
.WriteMask
),
755 t_dst_class(vpi
->DstReg
.File
));
756 inst
[1] = t_src_scalar(vp
, &src
[0]);
757 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
758 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
763 static GLuint
*r300TranslateOpcodeSGE(struct r300_vertex_program
*vp
,
764 struct prog_instruction
*vpi
,
766 struct prog_src_register src
[3])
768 inst
[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL
,
771 t_dst_index(vp
, &vpi
->DstReg
),
772 t_dst_mask(vpi
->DstReg
.WriteMask
),
773 t_dst_class(vpi
->DstReg
.File
));
774 inst
[1] = t_src(vp
, &src
[0]);
775 inst
[2] = t_src(vp
, &src
[1]);
776 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
781 static GLuint
*r300TranslateOpcodeSLT(struct r300_vertex_program
*vp
,
782 struct prog_instruction
*vpi
,
784 struct prog_src_register src
[3])
786 inst
[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN
,
789 t_dst_index(vp
, &vpi
->DstReg
),
790 t_dst_mask(vpi
->DstReg
.WriteMask
),
791 t_dst_class(vpi
->DstReg
.File
));
792 inst
[1] = t_src(vp
, &src
[0]);
793 inst
[2] = t_src(vp
, &src
[1]);
794 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
799 static GLuint
*r300TranslateOpcodeSUB(struct r300_vertex_program
*vp
,
800 struct prog_instruction
*vpi
,
802 struct prog_src_register src
[3])
804 //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
807 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
810 t_dst_index(vp
, &vpi
->DstReg
),
811 t_dst_mask(vpi
->DstReg
.WriteMask
),
812 t_dst_class(vpi
->DstReg
.File
));
813 inst
[1] = t_src(vp
, &src
[0]);
814 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[1]),
815 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
816 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)),
817 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)),
818 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)),
819 t_src_class(src
[1].File
),
821 Negate
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
822 (src
[1].RelAddr
<< 4);
826 PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD
,
829 t_dst_index(vp
, &vpi
->DstReg
),
830 t_dst_mask(vpi
->DstReg
.WriteMask
),
831 t_dst_class(vpi
->DstReg
.File
));
832 inst
[1] = t_src(vp
, &src
[0]);
833 inst
[2] = __CONST(0, SWIZZLE_ONE
);
834 inst
[3] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[1]),
835 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
836 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)),
837 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)),
838 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)),
839 t_src_class(src
[1].File
),
841 Negate
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
842 (src
[1].RelAddr
<< 4);
848 static GLuint
*r300TranslateOpcodeSWZ(struct r300_vertex_program
*vp
,
849 struct prog_instruction
*vpi
,
851 struct prog_src_register src
[3])
853 //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
855 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
858 t_dst_index(vp
, &vpi
->DstReg
),
859 t_dst_mask(vpi
->DstReg
.WriteMask
),
860 t_dst_class(vpi
->DstReg
.File
));
861 inst
[1] = t_src(vp
, &src
[0]);
862 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
863 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
868 static GLuint
*r300TranslateOpcodeXPD(struct r300_vertex_program
*vp
,
869 struct prog_instruction
*vpi
,
871 struct prog_src_register src
[3],
874 /* mul r0, r1.yzxw, r2.zxyw
875 mad r0, -r2.yzxw, r1.zxyw, r0
878 inst
[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD
,
882 t_dst_mask(vpi
->DstReg
.WriteMask
),
883 PVS_DST_REG_TEMPORARY
);
884 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // Y
885 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)), // Z
886 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // X
887 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // W
888 t_src_class(src
[0].File
),
889 src
[0].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
890 (src
[0].RelAddr
<< 4);
891 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[1]), t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), // Z
892 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), // X
893 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)), // Y
894 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)), // W
895 t_src_class(src
[1].File
),
896 src
[1].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
897 (src
[1].RelAddr
<< 4);
898 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
901 inst
[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD
,
904 t_dst_index(vp
, &vpi
->DstReg
),
905 t_dst_mask(vpi
->DstReg
.WriteMask
),
906 t_dst_class(vpi
->DstReg
.File
));
907 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[1]), t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)), // Y
908 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), // Z
909 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), // X
910 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)), // W
911 t_src_class(src
[1].File
),
913 Negate
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
914 (src
[1].RelAddr
<< 4);
915 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)), // Z
916 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // X
917 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // Y
918 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // W
919 t_src_class(src
[0].File
),
920 src
[0].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
921 (src
[0].RelAddr
<< 4);
923 PVS_SRC_OPERAND(*u_temp_i
, PVS_SRC_SELECT_X
, PVS_SRC_SELECT_Y
,
924 PVS_SRC_SELECT_Z
, PVS_SRC_SELECT_W
,
925 PVS_SRC_REG_TEMPORARY
, VSF_FLAG_NONE
);
932 static void t_inputs_outputs(struct r300_vertex_program
*vp
)
938 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++) {
939 if (vp
->key
.InputsRead
& (1 << i
))
940 vp
->inputs
[i
] = ++cur_reg
;
946 for (i
= 0; i
< VERT_RESULT_MAX
; i
++)
949 assert(vp
->key
.OutputsWritten
& (1 << VERT_RESULT_HPOS
));
951 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_HPOS
)) {
952 vp
->outputs
[VERT_RESULT_HPOS
] = cur_reg
++;
955 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_PSIZ
)) {
956 vp
->outputs
[VERT_RESULT_PSIZ
] = cur_reg
++;
959 /* If we're writing back facing colors we need to send
960 * four colors to make front/back face colors selection work.
961 * If the vertex program doesn't write all 4 colors, lets
962 * pretend it does by skipping output index reg so the colors
963 * get written into appropriate output vectors.
965 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_COL0
)) {
966 vp
->outputs
[VERT_RESULT_COL0
] = cur_reg
++;
967 } else if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_BFC0
) ||
968 vp
->key
.OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
972 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_COL1
)) {
973 vp
->outputs
[VERT_RESULT_COL1
] = cur_reg
++;
974 } else if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_BFC0
) ||
975 vp
->key
.OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
979 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_BFC0
)) {
980 vp
->outputs
[VERT_RESULT_BFC0
] = cur_reg
++;
981 } else if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
985 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
986 vp
->outputs
[VERT_RESULT_BFC1
] = cur_reg
++;
987 } else if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_BFC0
)) {
991 for (i
= VERT_RESULT_TEX0
; i
<= VERT_RESULT_TEX7
; i
++) {
992 if (vp
->key
.OutputsWritten
& (1 << i
)) {
993 vp
->outputs
[i
] = cur_reg
++;
997 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_FOGC
)) {
998 vp
->outputs
[VERT_RESULT_FOGC
] = cur_reg
++;
1002 static void r300TranslateVertexShader(struct r300_vertex_program
*vp
,
1003 struct prog_instruction
*vpi
)
1007 unsigned long num_operands
;
1008 /* Initial value should be last tmp reg that hw supports.
1009 Strangely enough r300 doesnt mind even though these would be out of range.
1010 Smart enough to realize that it doesnt need it? */
1011 int u_temp_i
= VSF_MAX_FRAGMENT_TEMPS
- 1;
1012 struct prog_src_register src
[3];
1014 vp
->pos_end
= 0; /* Not supported yet */
1015 vp
->hw_code
.length
= 0;
1016 vp
->translated
= GL_TRUE
;
1017 vp
->error
= GL_FALSE
;
1019 t_inputs_outputs(vp
);
1021 for (inst
= vp
->hw_code
.body
.d
; vpi
->Opcode
!= OPCODE_END
;
1026 if (!valid_dst(vp
, &vpi
->DstReg
)) {
1027 /* redirect result to unused temp */
1028 vpi
->DstReg
.File
= PROGRAM_TEMPORARY
;
1029 vpi
->DstReg
.Index
= u_temp_i
;
1032 num_operands
= _mesa_num_inst_src_regs(vpi
->Opcode
);
1034 /* copy the sources (src) from mesa into a local variable... is this needed? */
1035 for (i
= 0; i
< num_operands
; i
++) {
1036 src
[i
] = vpi
->SrcReg
[i
];
1039 if (num_operands
== 3) { /* TODO: scalars */
1040 if (CMP_SRCS(src
[1], src
[2])
1041 || CMP_SRCS(src
[0], src
[2])) {
1042 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
1047 PVS_DST_REG_TEMPORARY
);
1049 PVS_SRC_OPERAND(t_src_index(vp
, &src
[2]),
1054 t_src_class(src
[2].File
),
1055 VSF_FLAG_NONE
) | (src
[2].
1058 inst
[2] = __CONST(2, SWIZZLE_ZERO
);
1059 inst
[3] = __CONST(2, SWIZZLE_ZERO
);
1062 src
[2].File
= PROGRAM_TEMPORARY
;
1063 src
[2].Index
= u_temp_i
;
1069 if (num_operands
>= 2) {
1070 if (CMP_SRCS(src
[1], src
[0])) {
1071 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
1076 PVS_DST_REG_TEMPORARY
);
1078 PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]),
1083 t_src_class(src
[0].File
),
1084 VSF_FLAG_NONE
) | (src
[0].
1087 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
1088 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
1091 src
[0].File
= PROGRAM_TEMPORARY
;
1092 src
[0].Index
= u_temp_i
;
1098 switch (vpi
->Opcode
) {
1100 inst
= r300TranslateOpcodeABS(vp
, vpi
, inst
, src
);
1103 inst
= r300TranslateOpcodeADD(vp
, vpi
, inst
, src
);
1106 inst
= r300TranslateOpcodeARL(vp
, vpi
, inst
, src
);
1109 inst
= r300TranslateOpcodeDP3(vp
, vpi
, inst
, src
);
1112 inst
= r300TranslateOpcodeDP4(vp
, vpi
, inst
, src
);
1115 inst
= r300TranslateOpcodeDPH(vp
, vpi
, inst
, src
);
1118 inst
= r300TranslateOpcodeDST(vp
, vpi
, inst
, src
);
1121 inst
= r300TranslateOpcodeEX2(vp
, vpi
, inst
, src
);
1124 inst
= r300TranslateOpcodeEXP(vp
, vpi
, inst
, src
);
1127 inst
= r300TranslateOpcodeFLR(vp
, vpi
, inst
, src
, /* FIXME */
1131 inst
= r300TranslateOpcodeFRC(vp
, vpi
, inst
, src
);
1134 inst
= r300TranslateOpcodeLG2(vp
, vpi
, inst
, src
);
1137 inst
= r300TranslateOpcodeLIT(vp
, vpi
, inst
, src
);
1140 inst
= r300TranslateOpcodeLOG(vp
, vpi
, inst
, src
);
1143 inst
= r300TranslateOpcodeMAD(vp
, vpi
, inst
, src
);
1146 inst
= r300TranslateOpcodeMAX(vp
, vpi
, inst
, src
);
1149 inst
= r300TranslateOpcodeMIN(vp
, vpi
, inst
, src
);
1152 inst
= r300TranslateOpcodeMOV(vp
, vpi
, inst
, src
);
1155 inst
= r300TranslateOpcodeMUL(vp
, vpi
, inst
, src
);
1158 inst
= r300TranslateOpcodePOW(vp
, vpi
, inst
, src
);
1161 inst
= r300TranslateOpcodeRCP(vp
, vpi
, inst
, src
);
1164 inst
= r300TranslateOpcodeRSQ(vp
, vpi
, inst
, src
);
1167 inst
= r300TranslateOpcodeSGE(vp
, vpi
, inst
, src
);
1170 inst
= r300TranslateOpcodeSLT(vp
, vpi
, inst
, src
);
1173 inst
= r300TranslateOpcodeSUB(vp
, vpi
, inst
, src
);
1176 inst
= r300TranslateOpcodeSWZ(vp
, vpi
, inst
, src
);
1179 inst
= r300TranslateOpcodeXPD(vp
, vpi
, inst
, src
, /* FIXME */
1183 vp
->error
= GL_TRUE
;
1188 vp
->hw_code
.length
= (inst
- vp
->hw_code
.body
.d
);
1189 if (vp
->hw_code
.length
>= VSF_MAX_FRAGMENT_LENGTH
) {
1190 vp
->error
= GL_TRUE
;
1194 /* DP4 version seems to trigger some hw peculiarity */
1195 //#define PREFER_DP4
1197 static void position_invariant(struct gl_program
*prog
)
1199 struct prog_instruction
*vpi
;
1200 struct gl_program_parameter_list
*paramList
;
1203 gl_state_index tokens
[STATE_LENGTH
] = { STATE_MVP_MATRIX
, 0, 0, 0, 0 };
1205 /* tokens[4] = matrix modifier */
1207 tokens
[4] = 0; /* not transposed or inverted */
1209 tokens
[4] = STATE_MATRIX_TRANSPOSE
;
1211 paramList
= prog
->Parameters
;
1213 vpi
= _mesa_alloc_instructions(prog
->NumInstructions
+ 4);
1214 _mesa_init_instructions(vpi
, prog
->NumInstructions
+ 4);
1216 for (i
= 0; i
< 4; i
++) {
1218 tokens
[2] = tokens
[3] = i
; /* matrix row[i]..row[i] */
1219 idx
= _mesa_add_state_reference(paramList
, tokens
);
1221 vpi
[i
].Opcode
= OPCODE_DP4
;
1222 vpi
[i
].StringPos
= 0;
1225 vpi
[i
].DstReg
.File
= PROGRAM_OUTPUT
;
1226 vpi
[i
].DstReg
.Index
= VERT_RESULT_HPOS
;
1227 vpi
[i
].DstReg
.WriteMask
= 1 << i
;
1228 vpi
[i
].DstReg
.CondMask
= COND_TR
;
1230 vpi
[i
].SrcReg
[0].File
= PROGRAM_STATE_VAR
;
1231 vpi
[i
].SrcReg
[0].Index
= idx
;
1232 vpi
[i
].SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1234 vpi
[i
].SrcReg
[1].File
= PROGRAM_INPUT
;
1235 vpi
[i
].SrcReg
[1].Index
= VERT_ATTRIB_POS
;
1236 vpi
[i
].SrcReg
[1].Swizzle
= SWIZZLE_XYZW
;
1239 vpi
[i
].Opcode
= OPCODE_MUL
;
1241 vpi
[i
].Opcode
= OPCODE_MAD
;
1246 vpi
[i
].DstReg
.File
= PROGRAM_OUTPUT
;
1248 vpi
[i
].DstReg
.File
= PROGRAM_TEMPORARY
;
1249 vpi
[i
].DstReg
.Index
= 0;
1250 vpi
[i
].DstReg
.WriteMask
= 0xf;
1251 vpi
[i
].DstReg
.CondMask
= COND_TR
;
1253 vpi
[i
].SrcReg
[0].File
= PROGRAM_STATE_VAR
;
1254 vpi
[i
].SrcReg
[0].Index
= idx
;
1255 vpi
[i
].SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1257 vpi
[i
].SrcReg
[1].File
= PROGRAM_INPUT
;
1258 vpi
[i
].SrcReg
[1].Index
= VERT_ATTRIB_POS
;
1259 vpi
[i
].SrcReg
[1].Swizzle
= MAKE_SWIZZLE4(i
, i
, i
, i
);
1262 vpi
[i
].SrcReg
[2].File
= PROGRAM_TEMPORARY
;
1263 vpi
[i
].SrcReg
[2].Index
= 0;
1264 vpi
[i
].SrcReg
[2].Swizzle
= SWIZZLE_XYZW
;
1269 _mesa_copy_instructions(&vpi
[i
], prog
->Instructions
,
1270 prog
->NumInstructions
);
1272 free(prog
->Instructions
);
1274 prog
->Instructions
= vpi
;
1276 prog
->NumInstructions
+= 4;
1277 vpi
= &prog
->Instructions
[prog
->NumInstructions
- 1];
1279 assert(vpi
->Opcode
== OPCODE_END
);
1282 static void insert_wpos(struct r300_vertex_program
*vp
, struct gl_program
*prog
,
1285 struct prog_instruction
*vpi
;
1286 struct prog_instruction
*vpi_insert
;
1289 vpi
= _mesa_alloc_instructions(prog
->NumInstructions
+ 2);
1290 _mesa_init_instructions(vpi
, prog
->NumInstructions
+ 2);
1292 _mesa_copy_instructions(vpi
, prog
->Instructions
,
1293 prog
->NumInstructions
- 1);
1295 _mesa_copy_instructions(&vpi
[prog
->NumInstructions
+ 1],
1296 &prog
->Instructions
[prog
->NumInstructions
- 1],
1298 vpi_insert
= &vpi
[prog
->NumInstructions
- 1];
1300 vpi_insert
[i
].Opcode
= OPCODE_MOV
;
1302 vpi_insert
[i
].DstReg
.File
= PROGRAM_OUTPUT
;
1303 vpi_insert
[i
].DstReg
.Index
= VERT_RESULT_HPOS
;
1304 vpi_insert
[i
].DstReg
.WriteMask
= WRITEMASK_XYZW
;
1305 vpi_insert
[i
].DstReg
.CondMask
= COND_TR
;
1307 vpi_insert
[i
].SrcReg
[0].File
= PROGRAM_TEMPORARY
;
1308 vpi_insert
[i
].SrcReg
[0].Index
= temp_index
;
1309 vpi_insert
[i
].SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1312 vpi_insert
[i
].Opcode
= OPCODE_MOV
;
1314 vpi_insert
[i
].DstReg
.File
= PROGRAM_OUTPUT
;
1315 vpi_insert
[i
].DstReg
.Index
= VERT_RESULT_TEX0
+ vp
->wpos_idx
;
1316 vpi_insert
[i
].DstReg
.WriteMask
= WRITEMASK_XYZW
;
1317 vpi_insert
[i
].DstReg
.CondMask
= COND_TR
;
1319 vpi_insert
[i
].SrcReg
[0].File
= PROGRAM_TEMPORARY
;
1320 vpi_insert
[i
].SrcReg
[0].Index
= temp_index
;
1321 vpi_insert
[i
].SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1324 free(prog
->Instructions
);
1326 prog
->Instructions
= vpi
;
1328 prog
->NumInstructions
+= i
;
1329 vpi
= &prog
->Instructions
[prog
->NumInstructions
- 1];
1331 assert(vpi
->Opcode
== OPCODE_END
);
1334 static void pos_as_texcoord(struct r300_vertex_program
*vp
,
1335 struct gl_program
*prog
)
1337 struct prog_instruction
*vpi
;
1338 GLuint tempregi
= prog
->NumTemporaries
;
1339 /* should do something else if no temps left... */
1340 prog
->NumTemporaries
++;
1342 for (vpi
= prog
->Instructions
; vpi
->Opcode
!= OPCODE_END
; vpi
++) {
1343 if (vpi
->DstReg
.File
== PROGRAM_OUTPUT
1344 && vpi
->DstReg
.Index
== VERT_RESULT_HPOS
) {
1345 vpi
->DstReg
.File
= PROGRAM_TEMPORARY
;
1346 vpi
->DstReg
.Index
= tempregi
;
1349 insert_wpos(vp
, prog
, tempregi
);
1352 static struct r300_vertex_program
*build_program(struct r300_vertex_program_key
1353 *wanted_key
, struct gl_vertex_program
1354 *mesa_vp
, GLint wpos_idx
)
1356 struct r300_vertex_program
*vp
;
1358 vp
= _mesa_calloc(sizeof(*vp
));
1359 _mesa_memcpy(&vp
->key
, wanted_key
, sizeof(vp
->key
));
1360 vp
->wpos_idx
= wpos_idx
;
1362 if (mesa_vp
->IsPositionInvariant
) {
1363 position_invariant(&mesa_vp
->Base
);
1366 if (wpos_idx
> -1) {
1367 pos_as_texcoord(vp
, &mesa_vp
->Base
);
1370 if (RADEON_DEBUG
& DEBUG_VERTS
) {
1371 fprintf(stderr
, "Vertex program after native rewrite:\n");
1372 _mesa_print_program(&mesa_vp
->Base
);
1376 /* Some outputs may be artificially added, to match the inputs of the fragment program.
1377 * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by
1378 * vertex program are undefined, so just use MOV [vertex_result], CONST[0]
1382 for (i
= 0; i
< VERT_RESULT_MAX
; ++i
) {
1383 if (vp
->key
.OutputsAdded
& (1 << i
)) {
1389 struct prog_instruction
*inst
;
1391 _mesa_insert_instructions(&mesa_vp
->Base
, mesa_vp
->Base
.NumInstructions
- 1, count
);
1392 inst
= &mesa_vp
->Base
.Instructions
[mesa_vp
->Base
.NumInstructions
- 1 - count
];
1394 for (i
= 0; i
< VERT_RESULT_MAX
; ++i
) {
1395 if (vp
->key
.OutputsAdded
& (1 << i
)) {
1396 inst
->Opcode
= OPCODE_MOV
;
1398 inst
->DstReg
.File
= PROGRAM_OUTPUT
;
1399 inst
->DstReg
.Index
= i
;
1400 inst
->DstReg
.WriteMask
= WRITEMASK_XYZW
;
1401 inst
->DstReg
.CondMask
= COND_TR
;
1403 inst
->SrcReg
[0].File
= PROGRAM_CONSTANT
;
1404 inst
->SrcReg
[0].Index
= 0;
1405 inst
->SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1413 assert(mesa_vp
->Base
.NumInstructions
);
1414 vp
->num_temporaries
= mesa_vp
->Base
.NumTemporaries
;
1415 r300TranslateVertexShader(vp
, mesa_vp
->Base
.Instructions
);
1420 static void add_outputs(struct r300_vertex_program_key
*key
, GLint vert
)
1422 if (key
->OutputsWritten
& (1 << vert
))
1425 key
->OutputsWritten
|= 1 << vert
;
1426 key
->OutputsAdded
|= 1 << vert
;
1429 void r300SelectVertexShader(r300ContextPtr r300
)
1431 GLcontext
*ctx
= ctx
= r300
->radeon
.glCtx
;
1433 struct r300_vertex_program_key wanted_key
= { 0 };
1435 struct r300_vertex_program_cont
*vpc
;
1436 struct r300_vertex_program
*vp
;
1439 vpc
= (struct r300_vertex_program_cont
*)ctx
->VertexProgram
._Current
;
1440 wanted_key
.InputsRead
= vpc
->mesa_program
.Base
.InputsRead
;
1441 wanted_key
.OutputsWritten
= vpc
->mesa_program
.Base
.OutputsWritten
;
1442 InputsRead
= ctx
->FragmentProgram
._Current
->Base
.InputsRead
;
1445 if (InputsRead
& FRAG_BIT_WPOS
) {
1446 for (i
= 0; i
< ctx
->Const
.MaxTextureUnits
; i
++)
1447 if (!(InputsRead
& (FRAG_BIT_TEX0
<< i
)))
1450 if (i
== ctx
->Const
.MaxTextureUnits
) {
1451 fprintf(stderr
, "\tno free texcoord found\n");
1455 wanted_key
.OutputsWritten
|= 1 << (VERT_RESULT_TEX0
+ i
);
1459 if (vpc
->mesa_program
.IsPositionInvariant
) {
1460 wanted_key
.InputsRead
|= (1 << VERT_ATTRIB_POS
);
1461 wanted_key
.OutputsWritten
|= (1 << VERT_RESULT_HPOS
);
1463 add_outputs(&wanted_key
, VERT_RESULT_HPOS
);
1466 if (InputsRead
& FRAG_BIT_COL0
) {
1467 add_outputs(&wanted_key
, VERT_RESULT_COL0
);
1470 if (InputsRead
& FRAG_BIT_COL1
) {
1471 add_outputs(&wanted_key
, VERT_RESULT_COL1
);
1474 if (InputsRead
& FRAG_BIT_FOGC
) {
1475 add_outputs(&wanted_key
, VERT_RESULT_FOGC
);
1478 for (i
= 0; i
< ctx
->Const
.MaxTextureUnits
; i
++) {
1479 if (InputsRead
& (FRAG_BIT_TEX0
<< i
)) {
1480 add_outputs(&wanted_key
, VERT_RESULT_TEX0
+ i
);
1484 for (vp
= vpc
->progs
; vp
; vp
= vp
->next
)
1485 if (_mesa_memcmp(&vp
->key
, &wanted_key
, sizeof(wanted_key
))
1487 r300
->selected_vp
= vp
;
1491 if (RADEON_DEBUG
& DEBUG_VERTS
) {
1492 fprintf(stderr
, "Initial vertex program:\n");
1493 _mesa_print_program(&vpc
->mesa_program
.Base
);
1497 vp
= build_program(&wanted_key
, &vpc
->mesa_program
, wpos_idx
);
1498 vp
->next
= vpc
->progs
;
1500 r300
->selected_vp
= vp
;
1503 #define bump_vpu_count(ptr, new_count) do { \
1504 drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \
1505 int _nc=(new_count)/4; \
1506 assert(_nc < 256); \
1507 if(_nc>_p->vpu.count)_p->vpu.count=_nc; \
1510 static void r300EmitVertexProgram(r300ContextPtr r300
, int dest
, struct r300_vertex_shader_hw_code
*code
)
1514 assert((code
->length
> 0) && (code
->length
% 4 == 0));
1516 switch ((dest
>> 8) & 0xf) {
1518 R300_STATECHANGE(r300
, vpi
);
1519 for (i
= 0; i
< code
->length
; i
++)
1520 r300
->hw
.vpi
.cmd
[R300_VPI_INSTR_0
+ i
+ 4 * (dest
& 0xff)] = (code
->body
.d
[i
]);
1521 bump_vpu_count(r300
->hw
.vpi
.cmd
, code
->length
+ 4 * (dest
& 0xff));
1524 R300_STATECHANGE(r300
, vpp
);
1525 for (i
= 0; i
< code
->length
; i
++)
1526 r300
->hw
.vpp
.cmd
[R300_VPP_PARAM_0
+ i
+ 4 * (dest
& 0xff)] = (code
->body
.d
[i
]);
1527 bump_vpu_count(r300
->hw
.vpp
.cmd
, code
->length
+ 4 * (dest
& 0xff));
1530 R300_STATECHANGE(r300
, vps
);
1531 for (i
= 0; i
< code
->length
; i
++)
1532 r300
->hw
.vps
.cmd
[1 + i
+ 4 * (dest
& 0xff)] = (code
->body
.d
[i
]);
1533 bump_vpu_count(r300
->hw
.vps
.cmd
, code
->length
+ 4 * (dest
& 0xff));
1536 fprintf(stderr
, "%s:%s don't know how to handle dest %04x\n", __FILE__
, __FUNCTION__
, dest
);
1541 void r300SetupVertexProgram(r300ContextPtr rmesa
)
1543 GLcontext
*ctx
= rmesa
->radeon
.glCtx
;
1544 struct r300_vertex_program
*prog
= rmesa
->selected_vp
;
1546 int param_count
= 0;
1548 /* Reset state, in case we don't use something */
1549 ((drm_r300_cmd_header_t
*) rmesa
->hw
.vpp
.cmd
)->vpu
.count
= 0;
1550 ((drm_r300_cmd_header_t
*) rmesa
->hw
.vpi
.cmd
)->vpu
.count
= 0;
1551 ((drm_r300_cmd_header_t
*) rmesa
->hw
.vps
.cmd
)->vpu
.count
= 0;
1553 R300_STATECHANGE(rmesa
, vpp
);
1554 param_count
= r300VertexProgUpdateParams(ctx
,
1555 (struct r300_vertex_program_cont
*)
1556 ctx
->VertexProgram
._Current
,
1557 (float *)&rmesa
->hw
.vpp
.cmd
[R300_VPP_PARAM_0
]);
1558 bump_vpu_count(rmesa
->hw
.vpp
.cmd
, param_count
);
1561 r300EmitVertexProgram(rmesa
, R300_PVS_CODE_START
, &(prog
->hw_code
));
1562 inst_count
= (prog
->hw_code
.length
/ 4) - 1;
1564 r300VapCntl(rmesa
, _mesa_bitcount(prog
->key
.InputsRead
),
1565 _mesa_bitcount(prog
->key
.OutputsWritten
), prog
->num_temporaries
);
1567 R300_STATECHANGE(rmesa
, pvs
);
1568 rmesa
->hw
.pvs
.cmd
[R300_PVS_CNTL_1
] = (0 << R300_PVS_FIRST_INST_SHIFT
) | (inst_count
<< R300_PVS_XYZW_VALID_INST_SHIFT
) |
1569 (inst_count
<< R300_PVS_LAST_INST_SHIFT
);
1571 rmesa
->hw
.pvs
.cmd
[R300_PVS_CNTL_2
] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT
) | (param_count
<< R300_PVS_MAX_CONST_ADDR_SHIFT
);
1572 rmesa
->hw
.pvs
.cmd
[R300_PVS_CNTL_3
] = (inst_count
<< R300_PVS_LAST_VTX_SRC_INST_SHIFT
);