1 /**************************************************************************
3 Copyright (C) 2005 Aapo Tahkola <aet@rasterburn.org>
4 Copyright (C) 2008 Oliver McFadden <z3ro.geek@gmail.com>
8 Permission is hereby granted, free of charge, to any person obtaining a
9 copy of this software and associated documentation files (the "Software"),
10 to deal in the Software without restriction, including without limitation
11 on the rights to use, copy, modify, merge, publish, distribute, sub
12 license, and/or sell copies of the Software, and to permit persons to whom
13 the Software is furnished to do so, subject to the following conditions:
15 The above copyright notice and this permission notice (including the next
16 paragraph) shall be included in all copies or substantial portions of the
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
22 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
23 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
24 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
25 USE OR OTHER DEALINGS IN THE SOFTWARE.
27 **************************************************************************/
29 /* Radeon R5xx Acceleration, Revision 1.2 */
31 #include "main/glheader.h"
32 #include "main/macros.h"
33 #include "main/enums.h"
34 #include "shader/program.h"
35 #include "shader/programopt.h"
36 #include "shader/prog_instruction.h"
37 #include "shader/prog_optimize.h"
38 #include "shader/prog_parameter.h"
39 #include "shader/prog_print.h"
40 #include "shader/prog_statevars.h"
43 #include "compiler/radeon_nqssadce.h"
44 #include "r300_context.h"
45 #include "r300_state.h"
47 /* TODO: Get rid of t_src_class call */
48 #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
49 ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \
50 t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \
51 (t_src_class(a.File) == PVS_SRC_REG_INPUT && \
52 t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \
55 * Take an already-setup and valid source then swizzle it appropriately to
56 * obtain a constant ZERO or ONE source.
58 #define __CONST(x, y) \
59 (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \
64 t_src_class(src[x].File), \
65 VSF_FLAG_NONE) | (src[x].RelAddr << 4))
67 #define FREE_TEMPS() \
69 int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; \
70 if((vp->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { \
71 WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_used); \
72 vp->error = GL_TRUE; \
74 u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
77 static int r300VertexProgUpdateParams(GLcontext
* ctx
, struct gl_vertex_program
*vp
, float *dst
)
81 struct gl_program_parameter_list
*paramList
;
83 if (vp
->IsNVProgram
) {
84 _mesa_load_tracked_matrices(ctx
);
86 for (pi
= 0; pi
< MAX_NV_VERTEX_PROGRAM_PARAMS
; pi
++) {
87 *dst
++ = ctx
->VertexProgram
.Parameters
[pi
][0];
88 *dst
++ = ctx
->VertexProgram
.Parameters
[pi
][1];
89 *dst
++ = ctx
->VertexProgram
.Parameters
[pi
][2];
90 *dst
++ = ctx
->VertexProgram
.Parameters
[pi
][3];
95 if (!vp
->Base
.Parameters
)
98 _mesa_load_state_parameters(ctx
, vp
->Base
.Parameters
);
100 if (vp
->Base
.Parameters
->NumParameters
* 4 >
101 VSF_MAX_FRAGMENT_LENGTH
) {
102 fprintf(stderr
, "%s:Params exhausted\n", __FUNCTION__
);
106 paramList
= vp
->Base
.Parameters
;
107 for (pi
= 0; pi
< paramList
->NumParameters
; pi
++) {
108 switch (paramList
->Parameters
[pi
].Type
) {
109 case PROGRAM_STATE_VAR
:
110 case PROGRAM_NAMED_PARAM
:
111 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
112 case PROGRAM_CONSTANT
:
113 *dst
++ = paramList
->ParameterValues
[pi
][0];
114 *dst
++ = paramList
->ParameterValues
[pi
][1];
115 *dst
++ = paramList
->ParameterValues
[pi
][2];
116 *dst
++ = paramList
->ParameterValues
[pi
][3];
119 _mesa_problem(NULL
, "Bad param type in %s",
128 static unsigned long t_dst_mask(GLuint mask
)
130 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
131 return mask
& VSF_FLAG_ALL
;
134 static unsigned long t_dst_class(gl_register_file file
)
138 case PROGRAM_TEMPORARY
:
139 return PVS_DST_REG_TEMPORARY
;
141 return PVS_DST_REG_OUT
;
142 case PROGRAM_ADDRESS
:
143 return PVS_DST_REG_A0
;
146 case PROGRAM_LOCAL_PARAM:
147 case PROGRAM_ENV_PARAM:
148 case PROGRAM_NAMED_PARAM:
149 case PROGRAM_STATE_VAR:
150 case PROGRAM_WRITE_ONLY:
151 case PROGRAM_ADDRESS:
154 fprintf(stderr
, "problem in %s", __FUNCTION__
);
160 static unsigned long t_dst_index(struct r300_vertex_program
*vp
,
161 struct prog_dst_register
*dst
)
163 if (dst
->File
== PROGRAM_OUTPUT
)
164 return vp
->outputs
[dst
->Index
];
169 static unsigned long t_src_class(gl_register_file file
)
172 case PROGRAM_TEMPORARY
:
173 return PVS_SRC_REG_TEMPORARY
;
175 return PVS_SRC_REG_INPUT
;
176 case PROGRAM_LOCAL_PARAM
:
177 case PROGRAM_ENV_PARAM
:
178 case PROGRAM_NAMED_PARAM
:
179 case PROGRAM_CONSTANT
:
180 case PROGRAM_STATE_VAR
:
181 return PVS_SRC_REG_CONSTANT
;
184 case PROGRAM_WRITE_ONLY:
185 case PROGRAM_ADDRESS:
188 fprintf(stderr
, "problem in %s", __FUNCTION__
);
194 static INLINE
unsigned long t_swizzle(GLubyte swizzle
)
196 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
201 static void vp_dump_inputs(struct r300_vertex_program
*vp
, char *caller
)
206 fprintf(stderr
, "vp null in call to %s from %s\n", __FUNCTION__
,
211 fprintf(stderr
, "%s:<", caller
);
212 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++)
213 fprintf(stderr
, "%d ", vp
->inputs
[i
]);
214 fprintf(stderr
, ">\n");
219 static unsigned long t_src_index(struct r300_vertex_program
*vp
,
220 struct prog_src_register
*src
)
222 if (src
->File
== PROGRAM_INPUT
) {
223 assert(vp
->inputs
[src
->Index
] != -1);
224 return vp
->inputs
[src
->Index
];
226 if (src
->Index
< 0) {
228 "negative offsets for indirect addressing do not work.\n");
235 /* these two functions should probably be merged... */
237 static unsigned long t_src(struct r300_vertex_program
*vp
,
238 struct prog_src_register
*src
)
240 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
241 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
243 return PVS_SRC_OPERAND(t_src_index(vp
, src
),
244 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
245 t_swizzle(GET_SWZ(src
->Swizzle
, 1)),
246 t_swizzle(GET_SWZ(src
->Swizzle
, 2)),
247 t_swizzle(GET_SWZ(src
->Swizzle
, 3)),
248 t_src_class(src
->File
),
249 src
->Negate
) | (src
->RelAddr
<< 4);
252 static unsigned long t_src_scalar(struct r300_vertex_program
*vp
,
253 struct prog_src_register
*src
)
255 /* src->Negate uses the NEGATE_ flags from program_instruction.h,
256 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
258 return PVS_SRC_OPERAND(t_src_index(vp
, src
),
259 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
260 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
261 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
262 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
263 t_src_class(src
->File
),
264 src
->Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
268 static GLboolean
valid_dst(struct r300_vertex_program
*vp
,
269 struct prog_dst_register
*dst
)
271 if (dst
->File
== PROGRAM_OUTPUT
&& vp
->outputs
[dst
->Index
] == -1) {
273 } else if (dst
->File
== PROGRAM_ADDRESS
) {
274 assert(dst
->Index
== 0);
280 static GLuint
*r300TranslateOpcodeABS(struct r300_vertex_program
*vp
,
281 struct prog_instruction
*vpi
,
283 struct prog_src_register src
[3])
285 //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
287 inst
[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM
,
290 t_dst_index(vp
, &vpi
->DstReg
),
291 t_dst_mask(vpi
->DstReg
.WriteMask
),
292 t_dst_class(vpi
->DstReg
.File
));
293 inst
[1] = t_src(vp
, &src
[0]);
294 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]),
295 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
296 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
297 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
298 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)),
299 t_src_class(src
[0].File
),
301 Negate
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
302 (src
[0].RelAddr
<< 4);
308 static GLuint
*r300TranslateOpcodeADD(struct r300_vertex_program
*vp
,
309 struct prog_instruction
*vpi
,
311 struct prog_src_register src
[3])
313 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
316 t_dst_index(vp
, &vpi
->DstReg
),
317 t_dst_mask(vpi
->DstReg
.WriteMask
),
318 t_dst_class(vpi
->DstReg
.File
));
319 inst
[1] = t_src(vp
, &src
[0]);
320 inst
[2] = t_src(vp
, &src
[1]);
321 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
326 static GLuint
*r300TranslateOpcodeARL(struct r300_vertex_program
*vp
,
327 struct prog_instruction
*vpi
,
329 struct prog_src_register src
[3])
331 inst
[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX
,
334 t_dst_index(vp
, &vpi
->DstReg
),
335 t_dst_mask(vpi
->DstReg
.WriteMask
),
336 t_dst_class(vpi
->DstReg
.File
));
337 inst
[1] = t_src(vp
, &src
[0]);
338 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
339 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
344 static GLuint
*r300TranslateOpcodeDP3(struct r300_vertex_program
*vp
,
345 struct prog_instruction
*vpi
,
347 struct prog_src_register src
[3])
349 //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
351 inst
[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT
,
354 t_dst_index(vp
, &vpi
->DstReg
),
355 t_dst_mask(vpi
->DstReg
.WriteMask
),
356 t_dst_class(vpi
->DstReg
.File
));
357 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]),
358 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
359 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
360 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
362 t_src_class(src
[0].File
),
363 src
[0].Negate
? VSF_FLAG_XYZ
: VSF_FLAG_NONE
) |
364 (src
[0].RelAddr
<< 4);
366 PVS_SRC_OPERAND(t_src_index(vp
, &src
[1]),
367 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
368 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)),
369 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), SWIZZLE_ZERO
,
370 t_src_class(src
[1].File
),
371 src
[1].Negate
? VSF_FLAG_XYZ
: VSF_FLAG_NONE
) |
372 (src
[1].RelAddr
<< 4);
373 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
378 static GLuint
*r300TranslateOpcodeDP4(struct r300_vertex_program
*vp
,
379 struct prog_instruction
*vpi
,
381 struct prog_src_register src
[3])
383 inst
[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT
,
386 t_dst_index(vp
, &vpi
->DstReg
),
387 t_dst_mask(vpi
->DstReg
.WriteMask
),
388 t_dst_class(vpi
->DstReg
.File
));
389 inst
[1] = t_src(vp
, &src
[0]);
390 inst
[2] = t_src(vp
, &src
[1]);
391 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
396 static GLuint
*r300TranslateOpcodeDPH(struct r300_vertex_program
*vp
,
397 struct prog_instruction
*vpi
,
399 struct prog_src_register src
[3])
401 //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
402 inst
[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT
,
405 t_dst_index(vp
, &vpi
->DstReg
),
406 t_dst_mask(vpi
->DstReg
.WriteMask
),
407 t_dst_class(vpi
->DstReg
.File
));
408 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]),
409 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
410 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)),
411 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)),
412 PVS_SRC_SELECT_FORCE_1
,
413 t_src_class(src
[0].File
),
414 src
[0].Negate
? VSF_FLAG_XYZ
: VSF_FLAG_NONE
) |
415 (src
[0].RelAddr
<< 4);
416 inst
[2] = t_src(vp
, &src
[1]);
417 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
422 static GLuint
*r300TranslateOpcodeDST(struct r300_vertex_program
*vp
,
423 struct prog_instruction
*vpi
,
425 struct prog_src_register src
[3])
427 inst
[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR
,
430 t_dst_index(vp
, &vpi
->DstReg
),
431 t_dst_mask(vpi
->DstReg
.WriteMask
),
432 t_dst_class(vpi
->DstReg
.File
));
433 inst
[1] = t_src(vp
, &src
[0]);
434 inst
[2] = t_src(vp
, &src
[1]);
435 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
440 static GLuint
*r300TranslateOpcodeEX2(struct r300_vertex_program
*vp
,
441 struct prog_instruction
*vpi
,
443 struct prog_src_register src
[3])
445 inst
[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX
,
448 t_dst_index(vp
, &vpi
->DstReg
),
449 t_dst_mask(vpi
->DstReg
.WriteMask
),
450 t_dst_class(vpi
->DstReg
.File
));
451 inst
[1] = t_src_scalar(vp
, &src
[0]);
452 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
453 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
458 static GLuint
*r300TranslateOpcodeEXP(struct r300_vertex_program
*vp
,
459 struct prog_instruction
*vpi
,
461 struct prog_src_register src
[3])
463 inst
[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX
,
466 t_dst_index(vp
, &vpi
->DstReg
),
467 t_dst_mask(vpi
->DstReg
.WriteMask
),
468 t_dst_class(vpi
->DstReg
.File
));
469 inst
[1] = t_src_scalar(vp
, &src
[0]);
470 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
471 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
476 static GLuint
*r300TranslateOpcodeFLR(struct r300_vertex_program
*vp
,
477 struct prog_instruction
*vpi
,
479 struct prog_src_register src
[3],
482 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
483 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
485 inst
[0] = PVS_OP_DST_OPERAND(VE_FRACTION
,
489 t_dst_mask(vpi
->DstReg
.WriteMask
),
490 PVS_DST_REG_TEMPORARY
);
491 inst
[1] = t_src(vp
, &src
[0]);
492 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
493 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
496 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
499 t_dst_index(vp
, &vpi
->DstReg
),
500 t_dst_mask(vpi
->DstReg
.WriteMask
),
501 t_dst_class(vpi
->DstReg
.File
));
502 inst
[1] = t_src(vp
, &src
[0]);
503 inst
[2] = PVS_SRC_OPERAND(*u_temp_i
,
507 PVS_SRC_SELECT_W
, PVS_SRC_REG_TEMPORARY
,
508 /* Not 100% sure about this */
510 Negate
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
512 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
518 static GLuint
*r300TranslateOpcodeFRC(struct r300_vertex_program
*vp
,
519 struct prog_instruction
*vpi
,
521 struct prog_src_register src
[3])
523 inst
[0] = PVS_OP_DST_OPERAND(VE_FRACTION
,
526 t_dst_index(vp
, &vpi
->DstReg
),
527 t_dst_mask(vpi
->DstReg
.WriteMask
),
528 t_dst_class(vpi
->DstReg
.File
));
529 inst
[1] = t_src(vp
, &src
[0]);
530 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
531 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
536 static GLuint
*r300TranslateOpcodeLG2(struct r300_vertex_program
*vp
,
537 struct prog_instruction
*vpi
,
539 struct prog_src_register src
[3])
541 // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
543 inst
[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX
,
546 t_dst_index(vp
, &vpi
->DstReg
),
547 t_dst_mask(vpi
->DstReg
.WriteMask
),
548 t_dst_class(vpi
->DstReg
.File
));
549 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]),
550 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
551 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
552 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
553 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)),
554 t_src_class(src
[0].File
),
555 src
[0].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
556 (src
[0].RelAddr
<< 4);
557 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
558 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
563 static GLuint
*r300TranslateOpcodeLIT(struct r300_vertex_program
*vp
,
564 struct prog_instruction
*vpi
,
566 struct prog_src_register src
[3])
568 //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
570 inst
[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX
,
573 t_dst_index(vp
, &vpi
->DstReg
),
574 t_dst_mask(vpi
->DstReg
.WriteMask
),
575 t_dst_class(vpi
->DstReg
.File
));
576 /* NOTE: Users swizzling might not work. */
577 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // X
578 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // W
579 PVS_SRC_SELECT_FORCE_0
, // Z
580 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // Y
581 t_src_class(src
[0].File
),
582 src
[0].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
583 (src
[0].RelAddr
<< 4);
584 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // Y
585 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // W
586 PVS_SRC_SELECT_FORCE_0
, // Z
587 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // X
588 t_src_class(src
[0].File
),
589 src
[0].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
590 (src
[0].RelAddr
<< 4);
591 inst
[3] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // Y
592 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // X
593 PVS_SRC_SELECT_FORCE_0
, // Z
594 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // W
595 t_src_class(src
[0].File
),
596 src
[0].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
597 (src
[0].RelAddr
<< 4);
602 static GLuint
*r300TranslateOpcodeLOG(struct r300_vertex_program
*vp
,
603 struct prog_instruction
*vpi
,
605 struct prog_src_register src
[3])
607 inst
[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX
,
610 t_dst_index(vp
, &vpi
->DstReg
),
611 t_dst_mask(vpi
->DstReg
.WriteMask
),
612 t_dst_class(vpi
->DstReg
.File
));
613 inst
[1] = t_src_scalar(vp
, &src
[0]);
614 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
615 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
620 static GLuint
*r300TranslateOpcodeMAD(struct r300_vertex_program
*vp
,
621 struct prog_instruction
*vpi
,
623 struct prog_src_register src
[3])
625 inst
[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD
,
628 t_dst_index(vp
, &vpi
->DstReg
),
629 t_dst_mask(vpi
->DstReg
.WriteMask
),
630 t_dst_class(vpi
->DstReg
.File
));
631 inst
[1] = t_src(vp
, &src
[0]);
632 inst
[2] = t_src(vp
, &src
[1]);
633 inst
[3] = t_src(vp
, &src
[2]);
638 static GLuint
*r300TranslateOpcodeMAX(struct r300_vertex_program
*vp
,
639 struct prog_instruction
*vpi
,
641 struct prog_src_register src
[3])
643 inst
[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM
,
646 t_dst_index(vp
, &vpi
->DstReg
),
647 t_dst_mask(vpi
->DstReg
.WriteMask
),
648 t_dst_class(vpi
->DstReg
.File
));
649 inst
[1] = t_src(vp
, &src
[0]);
650 inst
[2] = t_src(vp
, &src
[1]);
651 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
656 static GLuint
*r300TranslateOpcodeMIN(struct r300_vertex_program
*vp
,
657 struct prog_instruction
*vpi
,
659 struct prog_src_register src
[3])
661 inst
[0] = PVS_OP_DST_OPERAND(VE_MINIMUM
,
664 t_dst_index(vp
, &vpi
->DstReg
),
665 t_dst_mask(vpi
->DstReg
.WriteMask
),
666 t_dst_class(vpi
->DstReg
.File
));
667 inst
[1] = t_src(vp
, &src
[0]);
668 inst
[2] = t_src(vp
, &src
[1]);
669 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
674 static GLuint
*r300TranslateOpcodeMOV(struct r300_vertex_program
*vp
,
675 struct prog_instruction
*vpi
,
677 struct prog_src_register src
[3])
679 //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
681 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
684 t_dst_index(vp
, &vpi
->DstReg
),
685 t_dst_mask(vpi
->DstReg
.WriteMask
),
686 t_dst_class(vpi
->DstReg
.File
));
687 inst
[1] = t_src(vp
, &src
[0]);
688 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
689 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
694 static GLuint
*r300TranslateOpcodeMUL(struct r300_vertex_program
*vp
,
695 struct prog_instruction
*vpi
,
697 struct prog_src_register src
[3])
699 inst
[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY
,
702 t_dst_index(vp
, &vpi
->DstReg
),
703 t_dst_mask(vpi
->DstReg
.WriteMask
),
704 t_dst_class(vpi
->DstReg
.File
));
705 inst
[1] = t_src(vp
, &src
[0]);
706 inst
[2] = t_src(vp
, &src
[1]);
707 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
712 static GLuint
*r300TranslateOpcodePOW(struct r300_vertex_program
*vp
,
713 struct prog_instruction
*vpi
,
715 struct prog_src_register src
[3])
717 inst
[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF
,
720 t_dst_index(vp
, &vpi
->DstReg
),
721 t_dst_mask(vpi
->DstReg
.WriteMask
),
722 t_dst_class(vpi
->DstReg
.File
));
723 inst
[1] = t_src_scalar(vp
, &src
[0]);
724 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
725 inst
[3] = t_src_scalar(vp
, &src
[1]);
730 static GLuint
*r300TranslateOpcodeRCP(struct r300_vertex_program
*vp
,
731 struct prog_instruction
*vpi
,
733 struct prog_src_register src
[3])
735 inst
[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX
,
738 t_dst_index(vp
, &vpi
->DstReg
),
739 t_dst_mask(vpi
->DstReg
.WriteMask
),
740 t_dst_class(vpi
->DstReg
.File
));
741 inst
[1] = t_src_scalar(vp
, &src
[0]);
742 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
743 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
748 static GLuint
*r300TranslateOpcodeRSQ(struct r300_vertex_program
*vp
,
749 struct prog_instruction
*vpi
,
751 struct prog_src_register src
[3])
753 inst
[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX
,
756 t_dst_index(vp
, &vpi
->DstReg
),
757 t_dst_mask(vpi
->DstReg
.WriteMask
),
758 t_dst_class(vpi
->DstReg
.File
));
759 inst
[1] = t_src_scalar(vp
, &src
[0]);
760 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
761 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
766 static GLuint
*r300TranslateOpcodeSGE(struct r300_vertex_program
*vp
,
767 struct prog_instruction
*vpi
,
769 struct prog_src_register src
[3])
771 inst
[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL
,
774 t_dst_index(vp
, &vpi
->DstReg
),
775 t_dst_mask(vpi
->DstReg
.WriteMask
),
776 t_dst_class(vpi
->DstReg
.File
));
777 inst
[1] = t_src(vp
, &src
[0]);
778 inst
[2] = t_src(vp
, &src
[1]);
779 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
784 static GLuint
*r300TranslateOpcodeSLT(struct r300_vertex_program
*vp
,
785 struct prog_instruction
*vpi
,
787 struct prog_src_register src
[3])
789 inst
[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN
,
792 t_dst_index(vp
, &vpi
->DstReg
),
793 t_dst_mask(vpi
->DstReg
.WriteMask
),
794 t_dst_class(vpi
->DstReg
.File
));
795 inst
[1] = t_src(vp
, &src
[0]);
796 inst
[2] = t_src(vp
, &src
[1]);
797 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
802 static GLuint
*r300TranslateOpcodeSUB(struct r300_vertex_program
*vp
,
803 struct prog_instruction
*vpi
,
805 struct prog_src_register src
[3])
807 //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
810 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
813 t_dst_index(vp
, &vpi
->DstReg
),
814 t_dst_mask(vpi
->DstReg
.WriteMask
),
815 t_dst_class(vpi
->DstReg
.File
));
816 inst
[1] = t_src(vp
, &src
[0]);
817 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[1]),
818 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
819 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)),
820 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)),
821 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)),
822 t_src_class(src
[1].File
),
824 Negate
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
825 (src
[1].RelAddr
<< 4);
829 PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD
,
832 t_dst_index(vp
, &vpi
->DstReg
),
833 t_dst_mask(vpi
->DstReg
.WriteMask
),
834 t_dst_class(vpi
->DstReg
.File
));
835 inst
[1] = t_src(vp
, &src
[0]);
836 inst
[2] = __CONST(0, SWIZZLE_ONE
);
837 inst
[3] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[1]),
838 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)),
839 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)),
840 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)),
841 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)),
842 t_src_class(src
[1].File
),
844 Negate
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
845 (src
[1].RelAddr
<< 4);
851 static GLuint
*r300TranslateOpcodeSWZ(struct r300_vertex_program
*vp
,
852 struct prog_instruction
*vpi
,
854 struct prog_src_register src
[3])
856 //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
858 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
861 t_dst_index(vp
, &vpi
->DstReg
),
862 t_dst_mask(vpi
->DstReg
.WriteMask
),
863 t_dst_class(vpi
->DstReg
.File
));
864 inst
[1] = t_src(vp
, &src
[0]);
865 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
866 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
871 static GLuint
*r300TranslateOpcodeXPD(struct r300_vertex_program
*vp
,
872 struct prog_instruction
*vpi
,
874 struct prog_src_register src
[3],
877 /* mul r0, r1.yzxw, r2.zxyw
878 mad r0, -r2.yzxw, r1.zxyw, r0
881 inst
[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD
,
885 t_dst_mask(vpi
->DstReg
.WriteMask
),
886 PVS_DST_REG_TEMPORARY
);
887 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // Y
888 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)), // Z
889 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // X
890 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // W
891 t_src_class(src
[0].File
),
892 src
[0].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
893 (src
[0].RelAddr
<< 4);
894 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[1]), t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), // Z
895 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), // X
896 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)), // Y
897 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)), // W
898 t_src_class(src
[1].File
),
899 src
[1].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
900 (src
[1].RelAddr
<< 4);
901 inst
[3] = __CONST(1, SWIZZLE_ZERO
);
904 inst
[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD
,
907 t_dst_index(vp
, &vpi
->DstReg
),
908 t_dst_mask(vpi
->DstReg
.WriteMask
),
909 t_dst_class(vpi
->DstReg
.File
));
910 inst
[1] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[1]), t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)), // Y
911 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), // Z
912 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), // X
913 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)), // W
914 t_src_class(src
[1].File
),
916 Negate
) ? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
917 (src
[1].RelAddr
<< 4);
918 inst
[2] = PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)), // Z
919 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // X
920 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // Y
921 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // W
922 t_src_class(src
[0].File
),
923 src
[0].Negate
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
924 (src
[0].RelAddr
<< 4);
926 PVS_SRC_OPERAND(*u_temp_i
, PVS_SRC_SELECT_X
, PVS_SRC_SELECT_Y
,
927 PVS_SRC_SELECT_Z
, PVS_SRC_SELECT_W
,
928 PVS_SRC_REG_TEMPORARY
, VSF_FLAG_NONE
);
935 static void t_inputs_outputs(struct r300_vertex_program
*vp
)
939 GLuint OutputsWritten
, InputsRead
;
941 OutputsWritten
= vp
->Base
->Base
.OutputsWritten
;
942 InputsRead
= vp
->Base
->Base
.InputsRead
;
945 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++) {
946 if (InputsRead
& (1 << i
))
947 vp
->inputs
[i
] = ++cur_reg
;
953 for (i
= 0; i
< VERT_RESULT_MAX
; i
++)
956 assert(OutputsWritten
& (1 << VERT_RESULT_HPOS
));
958 if (OutputsWritten
& (1 << VERT_RESULT_HPOS
)) {
959 vp
->outputs
[VERT_RESULT_HPOS
] = cur_reg
++;
962 if (OutputsWritten
& (1 << VERT_RESULT_PSIZ
)) {
963 vp
->outputs
[VERT_RESULT_PSIZ
] = cur_reg
++;
966 /* If we're writing back facing colors we need to send
967 * four colors to make front/back face colors selection work.
968 * If the vertex program doesn't write all 4 colors, lets
969 * pretend it does by skipping output index reg so the colors
970 * get written into appropriate output vectors.
972 if (OutputsWritten
& (1 << VERT_RESULT_COL0
)) {
973 vp
->outputs
[VERT_RESULT_COL0
] = cur_reg
++;
974 } else if (OutputsWritten
& (1 << VERT_RESULT_BFC0
) ||
975 OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
979 if (OutputsWritten
& (1 << VERT_RESULT_COL1
)) {
980 vp
->outputs
[VERT_RESULT_COL1
] = cur_reg
++;
981 } else if (OutputsWritten
& (1 << VERT_RESULT_BFC0
) ||
982 OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
986 if (OutputsWritten
& (1 << VERT_RESULT_BFC0
)) {
987 vp
->outputs
[VERT_RESULT_BFC0
] = cur_reg
++;
988 } else if (OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
992 if (OutputsWritten
& (1 << VERT_RESULT_BFC1
)) {
993 vp
->outputs
[VERT_RESULT_BFC1
] = cur_reg
++;
994 } else if (OutputsWritten
& (1 << VERT_RESULT_BFC0
)) {
998 for (i
= VERT_RESULT_TEX0
; i
<= VERT_RESULT_TEX7
; i
++) {
999 if (OutputsWritten
& (1 << i
)) {
1000 vp
->outputs
[i
] = cur_reg
++;
1004 if (OutputsWritten
& (1 << VERT_RESULT_FOGC
)) {
1005 vp
->outputs
[VERT_RESULT_FOGC
] = cur_reg
++;
1009 void r300TranslateVertexShader(struct r300_vertex_program
*vp
)
1011 struct prog_instruction
*vpi
= vp
->Base
->Base
.Instructions
;
1014 unsigned long num_operands
;
1015 /* Initial value should be last tmp reg that hw supports.
1016 Strangely enough r300 doesnt mind even though these would be out of range.
1017 Smart enough to realize that it doesnt need it? */
1018 int u_temp_i
= VSF_MAX_FRAGMENT_TEMPS
- 1;
1019 struct prog_src_register src
[3];
1021 vp
->pos_end
= 0; /* Not supported yet */
1022 vp
->hw_code
.length
= 0;
1023 vp
->translated
= GL_TRUE
;
1024 vp
->error
= GL_FALSE
;
1026 t_inputs_outputs(vp
);
1028 for (inst
= vp
->hw_code
.body
.d
; vpi
->Opcode
!= OPCODE_END
;
1033 if (!valid_dst(vp
, &vpi
->DstReg
)) {
1034 /* redirect result to unused temp */
1035 vpi
->DstReg
.File
= PROGRAM_TEMPORARY
;
1036 vpi
->DstReg
.Index
= u_temp_i
;
1039 num_operands
= _mesa_num_inst_src_regs(vpi
->Opcode
);
1041 /* copy the sources (src) from mesa into a local variable... is this needed? */
1042 for (i
= 0; i
< num_operands
; i
++) {
1043 src
[i
] = vpi
->SrcReg
[i
];
1046 if (num_operands
== 3) { /* TODO: scalars */
1047 if (CMP_SRCS(src
[1], src
[2])
1048 || CMP_SRCS(src
[0], src
[2])) {
1049 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
1054 PVS_DST_REG_TEMPORARY
);
1056 PVS_SRC_OPERAND(t_src_index(vp
, &src
[2]),
1061 t_src_class(src
[2].File
),
1062 VSF_FLAG_NONE
) | (src
[2].
1065 inst
[2] = __CONST(2, SWIZZLE_ZERO
);
1066 inst
[3] = __CONST(2, SWIZZLE_ZERO
);
1069 src
[2].File
= PROGRAM_TEMPORARY
;
1070 src
[2].Index
= u_temp_i
;
1076 if (num_operands
>= 2) {
1077 if (CMP_SRCS(src
[1], src
[0])) {
1078 inst
[0] = PVS_OP_DST_OPERAND(VE_ADD
,
1083 PVS_DST_REG_TEMPORARY
);
1085 PVS_SRC_OPERAND(t_src_index(vp
, &src
[0]),
1090 t_src_class(src
[0].File
),
1091 VSF_FLAG_NONE
) | (src
[0].
1094 inst
[2] = __CONST(0, SWIZZLE_ZERO
);
1095 inst
[3] = __CONST(0, SWIZZLE_ZERO
);
1098 src
[0].File
= PROGRAM_TEMPORARY
;
1099 src
[0].Index
= u_temp_i
;
1105 switch (vpi
->Opcode
) {
1107 inst
= r300TranslateOpcodeABS(vp
, vpi
, inst
, src
);
1110 inst
= r300TranslateOpcodeADD(vp
, vpi
, inst
, src
);
1113 inst
= r300TranslateOpcodeARL(vp
, vpi
, inst
, src
);
1116 inst
= r300TranslateOpcodeDP3(vp
, vpi
, inst
, src
);
1119 inst
= r300TranslateOpcodeDP4(vp
, vpi
, inst
, src
);
1122 inst
= r300TranslateOpcodeDPH(vp
, vpi
, inst
, src
);
1125 inst
= r300TranslateOpcodeDST(vp
, vpi
, inst
, src
);
1128 inst
= r300TranslateOpcodeEX2(vp
, vpi
, inst
, src
);
1131 inst
= r300TranslateOpcodeEXP(vp
, vpi
, inst
, src
);
1134 inst
= r300TranslateOpcodeFLR(vp
, vpi
, inst
, src
, /* FIXME */
1138 inst
= r300TranslateOpcodeFRC(vp
, vpi
, inst
, src
);
1141 inst
= r300TranslateOpcodeLG2(vp
, vpi
, inst
, src
);
1144 inst
= r300TranslateOpcodeLIT(vp
, vpi
, inst
, src
);
1147 inst
= r300TranslateOpcodeLOG(vp
, vpi
, inst
, src
);
1150 inst
= r300TranslateOpcodeMAD(vp
, vpi
, inst
, src
);
1153 inst
= r300TranslateOpcodeMAX(vp
, vpi
, inst
, src
);
1156 inst
= r300TranslateOpcodeMIN(vp
, vpi
, inst
, src
);
1159 inst
= r300TranslateOpcodeMOV(vp
, vpi
, inst
, src
);
1162 inst
= r300TranslateOpcodeMUL(vp
, vpi
, inst
, src
);
1165 inst
= r300TranslateOpcodePOW(vp
, vpi
, inst
, src
);
1168 inst
= r300TranslateOpcodeRCP(vp
, vpi
, inst
, src
);
1171 inst
= r300TranslateOpcodeRSQ(vp
, vpi
, inst
, src
);
1174 inst
= r300TranslateOpcodeSGE(vp
, vpi
, inst
, src
);
1177 inst
= r300TranslateOpcodeSLT(vp
, vpi
, inst
, src
);
1180 inst
= r300TranslateOpcodeSUB(vp
, vpi
, inst
, src
);
1183 inst
= r300TranslateOpcodeSWZ(vp
, vpi
, inst
, src
);
1186 inst
= r300TranslateOpcodeXPD(vp
, vpi
, inst
, src
, /* FIXME */
1190 vp
->error
= GL_TRUE
;
1195 vp
->hw_code
.length
= (inst
- vp
->hw_code
.body
.d
);
1196 if (vp
->hw_code
.length
>= VSF_MAX_FRAGMENT_LENGTH
) {
1197 vp
->error
= GL_TRUE
;
1201 static void insert_wpos(struct gl_program
*prog
, GLuint temp_index
, int tex_id
)
1203 struct prog_instruction
*vpi
;
1205 _mesa_insert_instructions(prog
, prog
->NumInstructions
- 1, 2);
1207 vpi
= &prog
->Instructions
[prog
->NumInstructions
- 3];
1209 vpi
->Opcode
= OPCODE_MOV
;
1211 vpi
->DstReg
.File
= PROGRAM_OUTPUT
;
1212 vpi
->DstReg
.Index
= VERT_RESULT_HPOS
;
1213 vpi
->DstReg
.WriteMask
= WRITEMASK_XYZW
;
1214 vpi
->DstReg
.CondMask
= COND_TR
;
1216 vpi
->SrcReg
[0].File
= PROGRAM_TEMPORARY
;
1217 vpi
->SrcReg
[0].Index
= temp_index
;
1218 vpi
->SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1222 vpi
->Opcode
= OPCODE_MOV
;
1224 vpi
->DstReg
.File
= PROGRAM_OUTPUT
;
1225 vpi
->DstReg
.Index
= VERT_RESULT_TEX0
+ tex_id
;
1226 vpi
->DstReg
.WriteMask
= WRITEMASK_XYZW
;
1227 vpi
->DstReg
.CondMask
= COND_TR
;
1229 vpi
->SrcReg
[0].File
= PROGRAM_TEMPORARY
;
1230 vpi
->SrcReg
[0].Index
= temp_index
;
1231 vpi
->SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1235 vpi
->Opcode
= OPCODE_END
;
1238 static void pos_as_texcoord(struct gl_program
*prog
, int tex_id
)
1240 struct prog_instruction
*vpi
;
1241 GLuint tempregi
= prog
->NumTemporaries
;
1243 prog
->NumTemporaries
++;
1245 for (vpi
= prog
->Instructions
; vpi
->Opcode
!= OPCODE_END
; vpi
++) {
1246 if (vpi
->DstReg
.File
== PROGRAM_OUTPUT
&& vpi
->DstReg
.Index
== VERT_RESULT_HPOS
) {
1247 vpi
->DstReg
.File
= PROGRAM_TEMPORARY
;
1248 vpi
->DstReg
.Index
= tempregi
;
1252 insert_wpos(prog
, tempregi
, tex_id
);
1254 prog
->OutputsWritten
|= 1 << (VERT_RESULT_TEX0
+ tex_id
);
1258 * The fogcoord attribute is special in that only the first component
1259 * is relevant, and the remaining components are always fixed (when read
1260 * from by the fragment program) to yield an X001 pattern.
1262 * We need to enforce this either in the vertex program or in the fragment
1263 * program, and this code chooses not to enforce it in the vertex program.
1264 * This is slightly cheaper, as long as the fragment program does not use
1267 * And it seems that usually, weird swizzles are not used, so...
1269 * See also the counterpart rewriting for fragment programs.
1271 static void fog_as_texcoord(struct gl_program
*prog
, int tex_id
)
1273 struct prog_instruction
*vpi
;
1275 vpi
= prog
->Instructions
;
1276 while (vpi
->Opcode
!= OPCODE_END
) {
1277 if (vpi
->DstReg
.File
== PROGRAM_OUTPUT
&& vpi
->DstReg
.Index
== VERT_RESULT_FOGC
) {
1278 vpi
->DstReg
.Index
= VERT_RESULT_TEX0
+ tex_id
;
1279 vpi
->DstReg
.WriteMask
= WRITEMASK_X
;
1285 prog
->OutputsWritten
&= ~(1 << VERT_RESULT_FOGC
);
1286 prog
->OutputsWritten
|= 1 << (VERT_RESULT_TEX0
+ tex_id
);
1289 static int translateABS(struct gl_program
*prog
, int pos
)
1291 struct prog_instruction
*inst
;
1293 inst
= &prog
->Instructions
[pos
];
1295 inst
->Opcode
= OPCODE_MAX
;
1296 inst
->SrcReg
[1] = inst
->SrcReg
[0];
1297 inst
->SrcReg
[1].Negate
^= NEGATE_XYZW
;
1302 static int translateDP3(struct gl_program
*prog
, int pos
)
1304 struct prog_instruction
*inst
;
1306 inst
= &prog
->Instructions
[pos
];
1308 inst
->Opcode
= OPCODE_DP4
;
1309 inst
->SrcReg
[0].Swizzle
= combine_swizzles4(inst
->SrcReg
[0].Swizzle
, SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_ZERO
);
1314 static int translateDPH(struct gl_program
*prog
, int pos
)
1316 struct prog_instruction
*inst
;
1318 inst
= &prog
->Instructions
[pos
];
1320 inst
->Opcode
= OPCODE_DP4
;
1321 inst
->SrcReg
[0].Swizzle
= combine_swizzles4(inst
->SrcReg
[0].Swizzle
, SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_ONE
);
1326 static int translateFLR(struct gl_program
*prog
, int pos
)
1328 struct prog_instruction
*inst
;
1329 struct prog_dst_register dst
;
1332 tmp_idx
= prog
->NumTemporaries
++;
1334 _mesa_insert_instructions(prog
, pos
+ 1, 1);
1336 inst
= &prog
->Instructions
[pos
];
1339 inst
->Opcode
= OPCODE_FRC
;
1340 inst
->DstReg
.File
= PROGRAM_TEMPORARY
;
1341 inst
->DstReg
.Index
= tmp_idx
;
1344 inst
->Opcode
= OPCODE_ADD
;
1346 inst
->SrcReg
[0] = (inst
-1)->SrcReg
[0];
1347 inst
->SrcReg
[1].File
= PROGRAM_TEMPORARY
;
1348 inst
->SrcReg
[1].Index
= tmp_idx
;
1349 inst
->SrcReg
[1].Negate
= NEGATE_XYZW
;
1354 static int translateSUB(struct gl_program
*prog
, int pos
)
1356 struct prog_instruction
*inst
;
1358 inst
= &prog
->Instructions
[pos
];
1360 inst
->Opcode
= OPCODE_ADD
;
1361 inst
->SrcReg
[1].Negate
^= NEGATE_XYZW
;
1366 static int translateSWZ(struct gl_program
*prog
, int pos
)
1368 prog
->Instructions
[pos
].Opcode
= OPCODE_MOV
;
1373 static int translateXPD(struct gl_program
*prog
, int pos
)
1375 struct prog_instruction
*inst
;
1378 tmp_idx
= prog
->NumTemporaries
++;
1380 _mesa_insert_instructions(prog
, pos
+ 1, 1);
1382 inst
= &prog
->Instructions
[pos
];
1386 inst
->Opcode
= OPCODE_MUL
;
1387 inst
->DstReg
.File
= PROGRAM_TEMPORARY
;
1388 inst
->DstReg
.Index
= tmp_idx
;
1389 inst
->SrcReg
[0].Swizzle
= combine_swizzles4(inst
->SrcReg
[0].Swizzle
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_X
, SWIZZLE_W
);
1390 inst
->SrcReg
[1].Swizzle
= combine_swizzles4(inst
->SrcReg
[1].Swizzle
, SWIZZLE_Z
, SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_W
);
1393 inst
->Opcode
= OPCODE_MAD
;
1394 inst
->SrcReg
[0].Swizzle
= combine_swizzles4(inst
->SrcReg
[0].Swizzle
, SWIZZLE_Z
, SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_W
);
1395 inst
->SrcReg
[1].Swizzle
= combine_swizzles4(inst
->SrcReg
[1].Swizzle
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_X
, SWIZZLE_W
);
1396 inst
->SrcReg
[1].Negate
^= NEGATE_XYZW
;
1397 inst
->SrcReg
[2].File
= PROGRAM_TEMPORARY
;
1398 inst
->SrcReg
[2].Index
= tmp_idx
;
1403 static void translateInsts(struct gl_program
*prog
)
1405 struct prog_instruction
*inst
;
1408 for (i
= 0; i
< prog
->NumInstructions
; ++i
) {
1409 inst
= &prog
->Instructions
[i
];
1411 switch (inst
->Opcode
) {
1413 i
+= translateABS(prog
, i
);
1416 i
+= translateDP3(prog
, i
);
1419 i
+= translateDPH(prog
, i
);
1422 i
+= translateFLR(prog
, i
);
1425 i
+= translateSUB(prog
, i
);
1428 i
+= translateSWZ(prog
, i
);
1431 i
+= translateXPD(prog
, i
);
1439 #define ADD_OUTPUT(fp_attr, vp_result) \
1441 if ((FpReads & (1 << (fp_attr))) && !(prog->OutputsWritten & (1 << (vp_result)))) { \
1442 OutputsAdded |= 1 << (vp_result); \
1447 static void addArtificialOutputs(GLcontext
*ctx
, struct gl_program
*prog
)
1449 r300ContextPtr r300
= R300_CONTEXT(ctx
);
1450 GLuint OutputsAdded
, FpReads
;
1455 FpReads
= r300
->selected_fp
->Base
->InputsRead
;
1457 ADD_OUTPUT(FRAG_ATTRIB_COL0
, VERT_RESULT_COL0
);
1458 ADD_OUTPUT(FRAG_ATTRIB_COL1
, VERT_RESULT_COL1
);
1460 for (i
= 0; i
< 7; ++i
) {
1461 ADD_OUTPUT(FRAG_ATTRIB_TEX0
+ i
, VERT_RESULT_TEX0
+ i
);
1464 /* Some outputs may be artificially added, to match the inputs of the fragment program.
1465 * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by
1466 * vertex program are undefined, so just use MOV [vertex_result], CONST[0]
1469 struct prog_instruction
*inst
;
1471 _mesa_insert_instructions(prog
, prog
->NumInstructions
- 1, count
);
1472 inst
= &prog
->Instructions
[prog
->NumInstructions
- 1 - count
];
1474 for (i
= 0; i
< VERT_RESULT_MAX
; ++i
) {
1475 if (OutputsAdded
& (1 << i
)) {
1476 inst
->Opcode
= OPCODE_MOV
;
1478 inst
->DstReg
.File
= PROGRAM_OUTPUT
;
1479 inst
->DstReg
.Index
= i
;
1480 inst
->DstReg
.WriteMask
= WRITEMASK_XYZW
;
1481 inst
->DstReg
.CondMask
= COND_TR
;
1483 inst
->SrcReg
[0].File
= PROGRAM_CONSTANT
;
1484 inst
->SrcReg
[0].Index
= 0;
1485 inst
->SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1491 prog
->OutputsWritten
|= OutputsAdded
;
1497 static void nqssadceInit(struct nqssadce_state
* s
)
1499 r300ContextPtr r300
= R300_CONTEXT(s
->Ctx
);
1502 fp_reads
= r300
->selected_fp
->Base
->InputsRead
;
1504 if (fp_reads
& FRAG_BIT_COL0
) {
1505 s
->Outputs
[VERT_RESULT_COL0
].Sourced
= WRITEMASK_XYZW
;
1506 s
->Outputs
[VERT_RESULT_BFC0
].Sourced
= WRITEMASK_XYZW
;
1509 if (fp_reads
& FRAG_BIT_COL1
) {
1510 s
->Outputs
[VERT_RESULT_COL1
].Sourced
= WRITEMASK_XYZW
;
1511 s
->Outputs
[VERT_RESULT_BFC1
].Sourced
= WRITEMASK_XYZW
;
1517 for (i
= 0; i
< 8; ++i
) {
1518 if (fp_reads
& FRAG_BIT_TEX(i
)) {
1519 s
->Outputs
[VERT_RESULT_TEX0
+ i
].Sourced
= WRITEMASK_XYZW
;
1524 s
->Outputs
[VERT_RESULT_HPOS
].Sourced
= WRITEMASK_XYZW
;
1525 if (s
->Program
->OutputsWritten
& (1 << VERT_RESULT_PSIZ
))
1526 s
->Outputs
[VERT_RESULT_PSIZ
].Sourced
= WRITEMASK_X
;
1529 static GLboolean
swizzleIsNative(GLuint opcode
, struct prog_src_register reg
)
1537 static struct r300_vertex_program
*build_program(GLcontext
*ctx
,
1538 struct r300_vertex_program_key
*wanted_key
,
1539 const struct gl_vertex_program
*mesa_vp
)
1541 r300ContextPtr r300
= R300_CONTEXT(ctx
);
1542 struct r300_vertex_program
*vp
;
1543 struct gl_program
*prog
;
1545 vp
= _mesa_calloc(sizeof(*vp
));
1546 vp
->Base
= (struct gl_vertex_program
*) _mesa_clone_program(ctx
, &mesa_vp
->Base
);
1547 _mesa_memcpy(&vp
->key
, wanted_key
, sizeof(vp
->key
));
1549 prog
= &vp
->Base
->Base
;
1551 if (RADEON_DEBUG
& DEBUG_VERTS
) {
1552 fprintf(stderr
, "Initial vertex program:\n");
1553 _mesa_print_program(prog
);
1557 if (vp
->Base
->IsPositionInvariant
) {
1558 _mesa_insert_mvp_code(ctx
, vp
->Base
);
1561 if (r300
->selected_fp
->code
.wpos_attr
!= FRAG_ATTRIB_MAX
) {
1562 pos_as_texcoord(&vp
->Base
->Base
, r300
->selected_fp
->code
.wpos_attr
- FRAG_ATTRIB_TEX0
);
1565 if (r300
->selected_fp
->code
.fog_attr
!= FRAG_ATTRIB_MAX
) {
1566 fog_as_texcoord(&vp
->Base
->Base
, r300
->selected_fp
->code
.fog_attr
- FRAG_ATTRIB_TEX0
);
1569 addArtificialOutputs(ctx
, prog
);
1571 translateInsts(prog
);
1573 if (RADEON_DEBUG
& DEBUG_VERTS
) {
1574 fprintf(stderr
, "Vertex program after native rewrite:\n");
1575 _mesa_print_program(prog
);
1580 struct radeon_nqssadce_descr nqssadce
= {
1581 .Init
= &nqssadceInit
,
1582 .IsNativeSwizzle
= &swizzleIsNative
,
1583 .BuildSwizzle
= NULL
1585 radeonNqssaDce(ctx
, prog
, &nqssadce
);
1587 /* We need this step for reusing temporary registers */
1588 _mesa_optimize_program(ctx
, prog
);
1590 if (RADEON_DEBUG
& DEBUG_VERTS
) {
1591 fprintf(stderr
, "Vertex program after NQSSADCE:\n");
1592 _mesa_print_program(prog
);
1597 assert(prog
->NumInstructions
);
1599 struct prog_instruction
*inst
;
1602 inst
= prog
->Instructions
;
1604 while (inst
->Opcode
!= OPCODE_END
) {
1605 tmp
= _mesa_num_inst_src_regs(inst
->Opcode
);
1606 for (i
= 0; i
< tmp
; ++i
) {
1607 if (inst
->SrcReg
[i
].File
== PROGRAM_TEMPORARY
) {
1608 if ((int) inst
->SrcReg
[i
].Index
> max
) {
1609 max
= inst
->SrcReg
[i
].Index
;
1614 if (_mesa_num_inst_dst_regs(inst
->Opcode
)) {
1615 if (inst
->DstReg
.File
== PROGRAM_TEMPORARY
) {
1616 if ((int) inst
->DstReg
.Index
> max
) {
1617 max
= inst
->DstReg
.Index
;
1624 /* We actually want highest index of used temporary register,
1625 * not the number of temporaries used.
1626 * These values aren't always the same.
1628 vp
->num_temporaries
= max
+ 1;
1634 struct r300_vertex_program
* r300SelectVertexShader(GLcontext
*ctx
)
1636 r300ContextPtr r300
= R300_CONTEXT(ctx
);
1637 struct r300_vertex_program_key wanted_key
= { 0 };
1638 struct r300_vertex_program_cont
*vpc
;
1639 struct r300_vertex_program
*vp
;
1641 vpc
= (struct r300_vertex_program_cont
*)ctx
->VertexProgram
._Current
;
1642 wanted_key
.FpReads
= r300
->selected_fp
->Base
->InputsRead
;
1643 wanted_key
.FogAttr
= r300
->selected_fp
->code
.fog_attr
;
1644 wanted_key
.WPosAttr
= r300
->selected_fp
->code
.wpos_attr
;
1646 for (vp
= vpc
->progs
; vp
; vp
= vp
->next
) {
1647 if (_mesa_memcmp(&vp
->key
, &wanted_key
, sizeof(wanted_key
))
1649 return r300
->selected_vp
= vp
;
1653 vp
= build_program(ctx
, &wanted_key
, &vpc
->mesa_program
);
1654 vp
->next
= vpc
->progs
;
1657 return r300
->selected_vp
= vp
;
1660 #define bump_vpu_count(ptr, new_count) do { \
1661 drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \
1662 int _nc=(new_count)/4; \
1663 assert(_nc < 256); \
1664 if(_nc>_p->vpu.count)_p->vpu.count=_nc; \
1667 static void r300EmitVertexProgram(r300ContextPtr r300
, int dest
, struct r300_vertex_shader_hw_code
*code
)
1671 assert((code
->length
> 0) && (code
->length
% 4 == 0));
1673 switch ((dest
>> 8) & 0xf) {
1675 R300_STATECHANGE(r300
, vpi
);
1676 for (i
= 0; i
< code
->length
; i
++)
1677 r300
->hw
.vpi
.cmd
[R300_VPI_INSTR_0
+ i
+ 4 * (dest
& 0xff)] = (code
->body
.d
[i
]);
1678 bump_vpu_count(r300
->hw
.vpi
.cmd
, code
->length
+ 4 * (dest
& 0xff));
1681 R300_STATECHANGE(r300
, vpp
);
1682 for (i
= 0; i
< code
->length
; i
++)
1683 r300
->hw
.vpp
.cmd
[R300_VPP_PARAM_0
+ i
+ 4 * (dest
& 0xff)] = (code
->body
.d
[i
]);
1684 bump_vpu_count(r300
->hw
.vpp
.cmd
, code
->length
+ 4 * (dest
& 0xff));
1687 R300_STATECHANGE(r300
, vps
);
1688 for (i
= 0; i
< code
->length
; i
++)
1689 r300
->hw
.vps
.cmd
[1 + i
+ 4 * (dest
& 0xff)] = (code
->body
.d
[i
]);
1690 bump_vpu_count(r300
->hw
.vps
.cmd
, code
->length
+ 4 * (dest
& 0xff));
1693 fprintf(stderr
, "%s:%s don't know how to handle dest %04x\n", __FILE__
, __FUNCTION__
, dest
);
1698 void r300SetupVertexProgram(r300ContextPtr rmesa
)
1700 GLcontext
*ctx
= rmesa
->radeon
.glCtx
;
1701 struct r300_vertex_program
*prog
= rmesa
->selected_vp
;
1703 int param_count
= 0;
1705 /* Reset state, in case we don't use something */
1706 ((drm_r300_cmd_header_t
*) rmesa
->hw
.vpp
.cmd
)->vpu
.count
= 0;
1707 ((drm_r300_cmd_header_t
*) rmesa
->hw
.vpi
.cmd
)->vpu
.count
= 0;
1708 ((drm_r300_cmd_header_t
*) rmesa
->hw
.vps
.cmd
)->vpu
.count
= 0;
1710 R300_STATECHANGE(rmesa
, vpp
);
1711 param_count
= r300VertexProgUpdateParams(ctx
, prog
->Base
, (float *)&rmesa
->hw
.vpp
.cmd
[R300_VPP_PARAM_0
]);
1712 bump_vpu_count(rmesa
->hw
.vpp
.cmd
, param_count
);
1715 r300EmitVertexProgram(rmesa
, R300_PVS_CODE_START
, &(prog
->hw_code
));
1716 inst_count
= (prog
->hw_code
.length
/ 4) - 1;
1718 r300VapCntl(rmesa
, _mesa_bitcount(prog
->Base
->Base
.InputsRead
),
1719 _mesa_bitcount(prog
->Base
->Base
.OutputsWritten
), prog
->num_temporaries
);
1721 R300_STATECHANGE(rmesa
, pvs
);
1722 rmesa
->hw
.pvs
.cmd
[R300_PVS_CNTL_1
] = (0 << R300_PVS_FIRST_INST_SHIFT
) | (inst_count
<< R300_PVS_XYZW_VALID_INST_SHIFT
) |
1723 (inst_count
<< R300_PVS_LAST_INST_SHIFT
);
1725 rmesa
->hw
.pvs
.cmd
[R300_PVS_CNTL_2
] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT
) | (param_count
<< R300_PVS_MAX_CONST_ADDR_SHIFT
);
1726 rmesa
->hw
.pvs
.cmd
[R300_PVS_CNTL_3
] = (inst_count
<< R300_PVS_LAST_VTX_SRC_INST_SHIFT
);