1 /**************************************************************************
3 Copyright (C) 2005 Aapo Tahkola.
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Aapo Tahkola <aet@rasterburn.org>
36 #include "shader/prog_instruction.h"
37 #include "shader/prog_parameter.h"
38 #include "shader/prog_statevars.h"
41 #include "r300_context.h"
42 #include "r300_program.h"
44 #if SWIZZLE_X != VSF_IN_COMPONENT_X || \
45 SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
46 SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
47 SWIZZLE_W != VSF_IN_COMPONENT_W || \
48 SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
49 SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
50 WRITEMASK_X != VSF_FLAG_X || \
51 WRITEMASK_Y != VSF_FLAG_Y || \
52 WRITEMASK_Z != VSF_FLAG_Z || \
53 WRITEMASK_W != VSF_FLAG_W
54 #error Cannot change these!
57 #define SCALAR_FLAG (1<<31)
58 #define FLAG_MASK (1<<31)
59 #define OP_MASK (0xf) /* we are unlikely to have more than 15 */
60 #define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
65 unsigned long ip
; /* number of input operands and flags */
70 OPN(ARL
, 1 | SCALAR_FLAG
),
75 OPN(EX2
, 1 | SCALAR_FLAG
),
76 OPN(EXP
, 1 | SCALAR_FLAG
),
79 OPN(LG2
, 1 | SCALAR_FLAG
),
81 OPN(LOG
, 1 | SCALAR_FLAG
),
87 OPN(POW
, 2 | SCALAR_FLAG
),
88 OPN(RCP
, 1 | SCALAR_FLAG
),
89 OPN(RSQ
, 1 | SCALAR_FLAG
),
103 int r300VertexProgUpdateParams(GLcontext
* ctx
,
104 struct r300_vertex_program_cont
*vp
, float *dst
)
107 struct gl_vertex_program
*mesa_vp
= &vp
->mesa_program
;
109 struct gl_program_parameter_list
*paramList
;
111 if (mesa_vp
->IsNVProgram
) {
112 _mesa_load_tracked_matrices(ctx
);
114 for (pi
= 0; pi
< MAX_NV_VERTEX_PROGRAM_PARAMS
; pi
++) {
115 *dst
++ = ctx
->VertexProgram
.Parameters
[pi
][0];
116 *dst
++ = ctx
->VertexProgram
.Parameters
[pi
][1];
117 *dst
++ = ctx
->VertexProgram
.Parameters
[pi
][2];
118 *dst
++ = ctx
->VertexProgram
.Parameters
[pi
][3];
123 assert(mesa_vp
->Base
.Parameters
);
124 _mesa_load_state_parameters(ctx
, mesa_vp
->Base
.Parameters
);
126 if (mesa_vp
->Base
.Parameters
->NumParameters
* 4 >
127 VSF_MAX_FRAGMENT_LENGTH
) {
128 fprintf(stderr
, "%s:Params exhausted\n", __FUNCTION__
);
132 paramList
= mesa_vp
->Base
.Parameters
;
133 for (pi
= 0; pi
< paramList
->NumParameters
; pi
++) {
134 switch (paramList
->Parameters
[pi
].Type
) {
136 case PROGRAM_STATE_VAR
:
137 case PROGRAM_NAMED_PARAM
:
138 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
139 case PROGRAM_CONSTANT
:
140 *dst
++ = paramList
->ParameterValues
[pi
][0];
141 *dst
++ = paramList
->ParameterValues
[pi
][1];
142 *dst
++ = paramList
->ParameterValues
[pi
][2];
143 *dst
++ = paramList
->ParameterValues
[pi
][3];
147 _mesa_problem(NULL
, "Bad param type in %s",
156 static unsigned long t_dst_mask(GLuint mask
)
158 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
159 return mask
& VSF_FLAG_ALL
;
162 static unsigned long t_dst_class(enum register_file file
)
166 case PROGRAM_TEMPORARY
:
167 return VSF_OUT_CLASS_TMP
;
169 return VSF_OUT_CLASS_RESULT
;
170 case PROGRAM_ADDRESS
:
171 return VSF_OUT_CLASS_ADDR
;
174 case PROGRAM_LOCAL_PARAM:
175 case PROGRAM_ENV_PARAM:
176 case PROGRAM_NAMED_PARAM:
177 case PROGRAM_STATE_VAR:
178 case PROGRAM_WRITE_ONLY:
179 case PROGRAM_ADDRESS:
182 fprintf(stderr
, "problem in %s", __FUNCTION__
);
188 static unsigned long t_dst_index(struct r300_vertex_program
*vp
,
189 struct prog_dst_register
*dst
)
191 if (dst
->File
== PROGRAM_OUTPUT
)
192 return vp
->outputs
[dst
->Index
];
197 static unsigned long t_src_class(enum register_file file
)
201 case PROGRAM_TEMPORARY
:
202 return VSF_IN_CLASS_TMP
;
205 return VSF_IN_CLASS_ATTR
;
207 case PROGRAM_LOCAL_PARAM
:
208 case PROGRAM_ENV_PARAM
:
209 case PROGRAM_NAMED_PARAM
:
210 case PROGRAM_STATE_VAR
:
211 return VSF_IN_CLASS_PARAM
;
214 case PROGRAM_WRITE_ONLY:
215 case PROGRAM_ADDRESS:
218 fprintf(stderr
, "problem in %s", __FUNCTION__
);
224 static __inline
unsigned long t_swizzle(GLubyte swizzle
)
226 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
231 static void vp_dump_inputs(struct r300_vertex_program
*vp
, char *caller
)
236 fprintf(stderr
, "vp null in call to %s from %s\n", __FUNCTION__
,
241 fprintf(stderr
, "%s:<", caller
);
242 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++)
243 fprintf(stderr
, "%d ", vp
->inputs
[i
]);
244 fprintf(stderr
, ">\n");
249 static unsigned long t_src_index(struct r300_vertex_program
*vp
,
250 struct prog_src_register
*src
)
255 if (src
->File
== PROGRAM_INPUT
) {
256 if (vp
->inputs
[src
->Index
] != -1)
257 return vp
->inputs
[src
->Index
];
259 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++)
260 if (vp
->inputs
[i
] > max_reg
)
261 max_reg
= vp
->inputs
[i
];
263 vp
->inputs
[src
->Index
] = max_reg
+ 1;
265 //vp_dump_inputs(vp, __FUNCTION__);
267 return vp
->inputs
[src
->Index
];
269 if (src
->Index
< 0) {
271 "negative offsets for indirect addressing do not work.\n");
278 static unsigned long t_src(struct r300_vertex_program
*vp
,
279 struct prog_src_register
*src
)
281 /* src->NegateBase uses the NEGATE_ flags from program_instruction.h,
282 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
284 return MAKE_VSF_SOURCE(t_src_index(vp
, src
),
285 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
286 t_swizzle(GET_SWZ(src
->Swizzle
, 1)),
287 t_swizzle(GET_SWZ(src
->Swizzle
, 2)),
288 t_swizzle(GET_SWZ(src
->Swizzle
, 3)),
289 t_src_class(src
->File
),
290 src
->NegateBase
) | (src
->RelAddr
<< 4);
293 static unsigned long t_src_scalar(struct r300_vertex_program
*vp
,
294 struct prog_src_register
*src
)
297 return MAKE_VSF_SOURCE(t_src_index(vp
, src
),
298 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
299 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
300 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
301 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
302 t_src_class(src
->File
),
304 NegateBase
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
308 static unsigned long t_opcode(enum prog_opcode opcode
)
313 case OPCODE_ARL
: return R300_VPI_OUT_OP_ARL
;
314 case OPCODE_DST
: return R300_VPI_OUT_OP_DST
;
315 case OPCODE_EX2
: return R300_VPI_OUT_OP_EX2
;
316 case OPCODE_EXP
: return R300_VPI_OUT_OP_EXP
;
317 case OPCODE_FRC
: return R300_VPI_OUT_OP_FRC
;
318 case OPCODE_LG2
: return R300_VPI_OUT_OP_LG2
;
319 case OPCODE_LOG
: return R300_VPI_OUT_OP_LOG
;
320 case OPCODE_MAX
: return R300_VPI_OUT_OP_MAX
;
321 case OPCODE_MIN
: return R300_VPI_OUT_OP_MIN
;
322 case OPCODE_MUL
: return R300_VPI_OUT_OP_MUL
;
323 case OPCODE_RCP
: return R300_VPI_OUT_OP_RCP
;
324 case OPCODE_RSQ
: return R300_VPI_OUT_OP_RSQ
;
325 case OPCODE_SGE
: return R300_VPI_OUT_OP_SGE
;
326 case OPCODE_SLT
: return R300_VPI_OUT_OP_SLT
;
327 case OPCODE_DP4
: return R300_VPI_OUT_OP_DOT
;
331 fprintf(stderr
, "%s: Should not be called with opcode %d!",
332 __FUNCTION__
, opcode
);
338 static unsigned long op_operands(enum prog_opcode opcode
)
342 /* Can we trust mesas opcodes to be in order ? */
343 for (i
= 0; i
< sizeof(op_names
) / sizeof(*op_names
); i
++)
344 if (op_names
[i
].opcode
== opcode
)
345 return op_names
[i
].ip
;
347 fprintf(stderr
, "op %d not found in op_names\n", opcode
);
352 static GLboolean
valid_dst(struct r300_vertex_program
*vp
,
353 struct prog_dst_register
*dst
)
355 if (dst
->File
== PROGRAM_OUTPUT
&& vp
->outputs
[dst
->Index
] == -1) {
357 } else if (dst
->File
== PROGRAM_ADDRESS
) {
358 assert(dst
->Index
== 0);
364 /* TODO: Get rid of t_src_class call */
365 #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
366 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
367 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
368 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
369 t_src_class(b.File) == VSF_IN_CLASS_ATTR)))) \
371 #define ZERO_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \
372 SWIZZLE_ZERO, SWIZZLE_ZERO, \
373 SWIZZLE_ZERO, SWIZZLE_ZERO, \
374 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
376 #define ZERO_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \
377 SWIZZLE_ZERO, SWIZZLE_ZERO, \
378 SWIZZLE_ZERO, SWIZZLE_ZERO, \
379 t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
381 #define ZERO_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \
382 SWIZZLE_ZERO, SWIZZLE_ZERO, \
383 SWIZZLE_ZERO, SWIZZLE_ZERO, \
384 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
386 #define ONE_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \
387 SWIZZLE_ONE, SWIZZLE_ONE, \
388 SWIZZLE_ONE, SWIZZLE_ONE, \
389 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
391 #define ONE_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \
392 SWIZZLE_ONE, SWIZZLE_ONE, \
393 SWIZZLE_ONE, SWIZZLE_ONE, \
394 t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
396 #define ONE_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \
397 SWIZZLE_ONE, SWIZZLE_ONE, \
398 SWIZZLE_ONE, SWIZZLE_ONE, \
399 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
401 /* DP4 version seems to trigger some hw peculiarity */
404 #define FREE_TEMPS() \
406 if(u_temp_i < vp->num_temporaries) { \
407 WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_i); \
408 vp->native = GL_FALSE; \
410 u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
413 static void r300_translate_vertex_shader(struct r300_vertex_program
*vp
,
414 struct prog_instruction
*vpi
)
417 VERTEX_SHADER_INSTRUCTION
*o_inst
;
418 unsigned long operands
;
421 /* Initial value should be last tmp reg that hw supports.
422 Strangely enough r300 doesnt mind even though these would be out of range.
423 Smart enough to realize that it doesnt need it? */
424 int u_temp_i
= VSF_MAX_FRAGMENT_TEMPS
- 1;
425 struct prog_src_register src
[3];
427 vp
->pos_end
= 0; /* Not supported yet */
428 vp
->program
.length
= 0;
429 /*vp->num_temporaries=mesa_vp->Base.NumTemporaries; */
431 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++)
434 for (i
= 0; i
< VERT_RESULT_MAX
; i
++)
437 assert(vp
->key
.OutputsWritten
& (1 << VERT_RESULT_HPOS
));
440 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_HPOS
))
441 vp
->outputs
[VERT_RESULT_HPOS
] = cur_reg
++;
443 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_PSIZ
))
444 vp
->outputs
[VERT_RESULT_PSIZ
] = cur_reg
++;
446 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_COL0
))
447 vp
->outputs
[VERT_RESULT_COL0
] = cur_reg
++;
449 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_COL1
))
450 vp
->outputs
[VERT_RESULT_COL1
] = cur_reg
++;
452 #if 0 /* Not supported yet */
453 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_BFC0
))
454 vp
->outputs
[VERT_RESULT_BFC0
] = cur_reg
++;
456 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_BFC1
))
457 vp
->outputs
[VERT_RESULT_BFC1
] = cur_reg
++;
459 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_FOGC
))
460 vp
->outputs
[VERT_RESULT_FOGC
] = cur_reg
++;
463 for (i
= VERT_RESULT_TEX0
; i
<= VERT_RESULT_TEX7
; i
++)
464 if (vp
->key
.OutputsWritten
& (1 << i
))
465 vp
->outputs
[i
] = cur_reg
++;
467 vp
->translated
= GL_TRUE
;
468 vp
->native
= GL_TRUE
;
470 o_inst
= vp
->program
.body
.i
;
471 for (; vpi
->Opcode
!= OPCODE_END
; vpi
++, o_inst
++) {
474 if (!valid_dst(vp
, &vpi
->DstReg
)) {
475 /* redirect result to unused temp */
476 vpi
->DstReg
.File
= PROGRAM_TEMPORARY
;
477 vpi
->DstReg
.Index
= u_temp_i
;
480 operands
= op_operands(vpi
->Opcode
);
481 are_srcs_scalar
= operands
& SCALAR_FLAG
;
484 for (i
= 0; i
< operands
; i
++)
485 src
[i
] = vpi
->SrcReg
[i
];
487 if (operands
== 3) { /* TODO: scalars */
488 if (CMP_SRCS(src
[1], src
[2])
489 || CMP_SRCS(src
[0], src
[2])) {
491 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD
, u_temp_i
,
496 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[2]),
497 SWIZZLE_X
, SWIZZLE_Y
,
498 SWIZZLE_Z
, SWIZZLE_W
,
499 t_src_class(src
[2].File
),
500 VSF_FLAG_NONE
) | (src
[2].
504 o_inst
->src
[1] = ZERO_SRC_2
;
505 o_inst
->src
[2] = ZERO_SRC_2
;
508 src
[2].File
= PROGRAM_TEMPORARY
;
509 src
[2].Index
= u_temp_i
;
517 if (CMP_SRCS(src
[1], src
[0])) {
519 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD
, u_temp_i
,
524 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
525 SWIZZLE_X
, SWIZZLE_Y
,
526 SWIZZLE_Z
, SWIZZLE_W
,
527 t_src_class(src
[0].File
),
528 VSF_FLAG_NONE
) | (src
[0].
532 o_inst
->src
[1] = ZERO_SRC_0
;
533 o_inst
->src
[2] = ZERO_SRC_0
;
536 src
[0].File
= PROGRAM_TEMPORARY
;
537 src
[0].Index
= u_temp_i
;
543 /* These ops need special handling. */
544 switch (vpi
->Opcode
) {
547 MAKE_VSF_OP(R300_VPI_OUT_OP_POW
,
548 t_dst_index(vp
, &vpi
->DstReg
),
549 t_dst_mask(vpi
->DstReg
.WriteMask
),
550 t_dst_class(vpi
->DstReg
.File
));
551 o_inst
->src
[0] = t_src_scalar(vp
, &src
[0]);
552 o_inst
->src
[1] = ZERO_SRC_0
;
553 o_inst
->src
[2] = t_src_scalar(vp
, &src
[1]);
556 case OPCODE_MOV
: //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
560 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD
,
561 t_dst_index(vp
, &vpi
->DstReg
),
562 t_dst_mask(vpi
->DstReg
.WriteMask
),
563 t_dst_class(vpi
->DstReg
.File
));
564 o_inst
->src
[0] = t_src(vp
, &src
[0]);
565 o_inst
->src
[1] = ZERO_SRC_0
;
566 o_inst
->src
[2] = ZERO_SRC_0
;
570 PROGRAM_TEMPORARY
) ? R300_VPI_OUT_OP_MAD_2
:
574 MAKE_VSF_OP(hw_op
, t_dst_index(vp
, &vpi
->DstReg
),
575 t_dst_mask(vpi
->DstReg
.WriteMask
),
576 t_dst_class(vpi
->DstReg
.File
));
577 o_inst
->src
[0] = t_src(vp
, &src
[0]);
578 o_inst
->src
[1] = ONE_SRC_0
;
579 o_inst
->src
[2] = ZERO_SRC_0
;
586 hw_op
= (src
[0].File
== PROGRAM_TEMPORARY
&&
588 PROGRAM_TEMPORARY
) ? R300_VPI_OUT_OP_MAD_2
:
592 MAKE_VSF_OP(hw_op
, t_dst_index(vp
, &vpi
->DstReg
),
593 t_dst_mask(vpi
->DstReg
.WriteMask
),
594 t_dst_class(vpi
->DstReg
.File
));
595 o_inst
->src
[0] = ONE_SRC_0
;
596 o_inst
->src
[1] = t_src(vp
, &src
[0]);
597 o_inst
->src
[2] = t_src(vp
, &src
[1]);
600 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD
,
601 t_dst_index(vp
, &vpi
->DstReg
),
602 t_dst_mask(vpi
->DstReg
.WriteMask
),
603 t_dst_class(vpi
->DstReg
.File
));
604 o_inst
->src
[0] = t_src(vp
, &src
[0]);
605 o_inst
->src
[1] = t_src(vp
, &src
[1]);
606 o_inst
->src
[2] = ZERO_SRC_1
;
612 hw_op
= (src
[0].File
== PROGRAM_TEMPORARY
&&
613 src
[1].File
== PROGRAM_TEMPORARY
&&
615 PROGRAM_TEMPORARY
) ? R300_VPI_OUT_OP_MAD_2
:
619 MAKE_VSF_OP(hw_op
, t_dst_index(vp
, &vpi
->DstReg
),
620 t_dst_mask(vpi
->DstReg
.WriteMask
),
621 t_dst_class(vpi
->DstReg
.File
));
622 o_inst
->src
[0] = t_src(vp
, &src
[0]);
623 o_inst
->src
[1] = t_src(vp
, &src
[1]);
624 o_inst
->src
[2] = t_src(vp
, &src
[2]);
627 case OPCODE_MUL
: /* HW mul can take third arg but appears to have some other limitations. */
628 hw_op
= (src
[0].File
== PROGRAM_TEMPORARY
&&
630 PROGRAM_TEMPORARY
) ? R300_VPI_OUT_OP_MAD_2
:
634 MAKE_VSF_OP(hw_op
, t_dst_index(vp
, &vpi
->DstReg
),
635 t_dst_mask(vpi
->DstReg
.WriteMask
),
636 t_dst_class(vpi
->DstReg
.File
));
637 o_inst
->src
[0] = t_src(vp
, &src
[0]);
638 o_inst
->src
[1] = t_src(vp
, &src
[1]);
640 o_inst
->src
[2] = ZERO_SRC_1
;
643 case OPCODE_DP3
: //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
645 MAKE_VSF_OP(R300_VPI_OUT_OP_DOT
,
646 t_dst_index(vp
, &vpi
->DstReg
),
647 t_dst_mask(vpi
->DstReg
.WriteMask
),
648 t_dst_class(vpi
->DstReg
.File
));
651 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
653 (src
[0].Swizzle
, 0)),
655 (src
[0].Swizzle
, 1)),
657 (src
[0].Swizzle
, 2)),
659 t_src_class(src
[0].File
),
661 NegateBase
? VSF_FLAG_XYZ
:
662 VSF_FLAG_NONE
) | (src
[0].
666 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
668 (src
[1].Swizzle
, 0)),
670 (src
[1].Swizzle
, 1)),
672 (src
[1].Swizzle
, 2)),
674 t_src_class(src
[1].File
),
676 NegateBase
? VSF_FLAG_XYZ
:
677 VSF_FLAG_NONE
) | (src
[1].
680 o_inst
->src
[2] = ZERO_SRC_1
;
683 case OPCODE_SUB
: //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
685 hw_op
= (src
[0].File
== PROGRAM_TEMPORARY
&&
687 PROGRAM_TEMPORARY
) ? R300_VPI_OUT_OP_MAD_2
:
691 MAKE_VSF_OP(hw_op
, t_dst_index(vp
, &vpi
->DstReg
),
692 t_dst_mask(vpi
->DstReg
.WriteMask
),
693 t_dst_class(vpi
->DstReg
.File
));
694 o_inst
->src
[0] = t_src(vp
, &src
[0]);
695 o_inst
->src
[1] = ONE_SRC_0
;
697 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
699 (src
[1].Swizzle
, 0)),
701 (src
[1].Swizzle
, 1)),
703 (src
[1].Swizzle
, 2)),
705 (src
[1].Swizzle
, 3)),
706 t_src_class(src
[1].File
),
708 NegateBase
) ? VSF_FLAG_ALL
:
709 VSF_FLAG_NONE
) | (src
[1].
713 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD
,
714 t_dst_index(vp
, &vpi
->DstReg
),
715 t_dst_mask(vpi
->DstReg
.WriteMask
),
716 t_dst_class(vpi
->DstReg
.File
));
718 o_inst
->src
[0] = t_src(vp
, &src
[0]);
720 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
722 (src
[1].Swizzle
, 0)),
724 (src
[1].Swizzle
, 1)),
726 (src
[1].Swizzle
, 2)),
728 (src
[1].Swizzle
, 3)),
729 t_src_class(src
[1].File
),
731 NegateBase
) ? VSF_FLAG_ALL
:
732 VSF_FLAG_NONE
) | (src
[1].
738 case OPCODE_ABS
: //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
740 MAKE_VSF_OP(R300_VPI_OUT_OP_MAX
,
741 t_dst_index(vp
, &vpi
->DstReg
),
742 t_dst_mask(vpi
->DstReg
.WriteMask
),
743 t_dst_class(vpi
->DstReg
.File
));
745 o_inst
->src
[0] = t_src(vp
, &src
[0]);
747 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
749 (src
[0].Swizzle
, 0)),
751 (src
[0].Swizzle
, 1)),
753 (src
[0].Swizzle
, 2)),
755 (src
[0].Swizzle
, 3)),
756 t_src_class(src
[0].File
),
758 NegateBase
) ? VSF_FLAG_ALL
:
759 VSF_FLAG_NONE
) | (src
[0].
765 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
766 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
768 o_inst
->op
= MAKE_VSF_OP(R300_VPI_OUT_OP_FRC
, u_temp_i
,
769 t_dst_mask(vpi
->DstReg
.
773 o_inst
->src
[0] = t_src(vp
, &src
[0]);
774 o_inst
->src
[1] = ZERO_SRC_0
;
775 o_inst
->src
[2] = ZERO_SRC_0
;
779 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD
,
780 t_dst_index(vp
, &vpi
->DstReg
),
781 t_dst_mask(vpi
->DstReg
.WriteMask
),
782 t_dst_class(vpi
->DstReg
.File
));
784 o_inst
->src
[0] = t_src(vp
, &src
[0]);
785 o_inst
->src
[1] = MAKE_VSF_SOURCE(u_temp_i
,
791 /* Not 100% sure about this */
798 o_inst
->src
[2] = ZERO_SRC_0
;
802 case OPCODE_LG2
: // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
804 MAKE_VSF_OP(R300_VPI_OUT_OP_LG2
,
805 t_dst_index(vp
, &vpi
->DstReg
),
806 t_dst_mask(vpi
->DstReg
.WriteMask
),
807 t_dst_class(vpi
->DstReg
.File
));
810 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
812 (src
[0].Swizzle
, 0)),
814 (src
[0].Swizzle
, 0)),
816 (src
[0].Swizzle
, 0)),
818 (src
[0].Swizzle
, 0)),
819 t_src_class(src
[0].File
),
821 NegateBase
? VSF_FLAG_ALL
:
822 VSF_FLAG_NONE
) | (src
[0].
824 o_inst
->src
[1] = ZERO_SRC_0
;
825 o_inst
->src
[2] = ZERO_SRC_0
;
828 case OPCODE_LIT
: //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
830 MAKE_VSF_OP(R300_VPI_OUT_OP_LIT
,
831 t_dst_index(vp
, &vpi
->DstReg
),
832 t_dst_mask(vpi
->DstReg
.WriteMask
),
833 t_dst_class(vpi
->DstReg
.File
));
834 /* NOTE: Users swizzling might not work. */
835 o_inst
->src
[0] = MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
836 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
837 VSF_IN_COMPONENT_ZERO
, // z
838 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
845 (src
[0].RelAddr
<< 4);
846 o_inst
->src
[1] = MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
847 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
848 VSF_IN_COMPONENT_ZERO
, // z
849 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
856 (src
[0].RelAddr
<< 4);
857 o_inst
->src
[2] = MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
858 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
859 VSF_IN_COMPONENT_ZERO
, // z
860 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
867 (src
[0].RelAddr
<< 4);
870 case OPCODE_DPH
: //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
872 MAKE_VSF_OP(R300_VPI_OUT_OP_DOT
,
873 t_dst_index(vp
, &vpi
->DstReg
),
874 t_dst_mask(vpi
->DstReg
.WriteMask
),
875 t_dst_class(vpi
->DstReg
.File
));
878 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
880 (src
[0].Swizzle
, 0)),
882 (src
[0].Swizzle
, 1)),
884 (src
[0].Swizzle
, 2)),
885 VSF_IN_COMPONENT_ONE
,
886 t_src_class(src
[0].File
),
888 NegateBase
? VSF_FLAG_XYZ
:
889 VSF_FLAG_NONE
) | (src
[0].
891 o_inst
->src
[1] = t_src(vp
, &src
[1]);
892 o_inst
->src
[2] = ZERO_SRC_1
;
896 /* mul r0, r1.yzxw, r2.zxyw
897 mad r0, -r2.yzxw, r1.zxyw, r0
898 NOTE: might need MAD_2
901 o_inst
->op
= MAKE_VSF_OP(R300_VPI_OUT_OP_MAD
, u_temp_i
,
902 t_dst_mask(vpi
->DstReg
.
906 o_inst
->src
[0] = MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
907 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)), // z
908 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
909 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
916 (src
[0].RelAddr
<< 4);
918 o_inst
->src
[1] = MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]), t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), // z
919 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), // x
920 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)), // y
921 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)), // w
928 (src
[1].RelAddr
<< 4);
930 o_inst
->src
[2] = ZERO_SRC_1
;
935 MAKE_VSF_OP(R300_VPI_OUT_OP_MAD
,
936 t_dst_index(vp
, &vpi
->DstReg
),
937 t_dst_mask(vpi
->DstReg
.WriteMask
),
938 t_dst_class(vpi
->DstReg
.File
));
940 o_inst
->src
[0] = MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]), t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)), // y
941 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), // z
942 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), // x
943 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)), // w
950 (src
[1].RelAddr
<< 4);
952 o_inst
->src
[1] = MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)), // z
953 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
954 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
955 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
962 (src
[0].RelAddr
<< 4);
964 o_inst
->src
[2] = MAKE_VSF_SOURCE(u_temp_i
+ 1,
975 fprintf(stderr
, "Dont know how to handle op %d yet\n",
986 MAKE_VSF_OP(t_opcode(vpi
->Opcode
),
987 t_dst_index(vp
, &vpi
->DstReg
),
988 t_dst_mask(vpi
->DstReg
.WriteMask
),
989 t_dst_class(vpi
->DstReg
.File
));
991 if (are_srcs_scalar
) {
994 o_inst
->src
[0] = t_src_scalar(vp
, &src
[0]);
995 o_inst
->src
[1] = ZERO_SRC_0
;
996 o_inst
->src
[2] = ZERO_SRC_0
;
1000 o_inst
->src
[0] = t_src_scalar(vp
, &src
[0]);
1001 o_inst
->src
[1] = t_src_scalar(vp
, &src
[1]);
1002 o_inst
->src
[2] = ZERO_SRC_1
;
1006 o_inst
->src
[0] = t_src_scalar(vp
, &src
[0]);
1007 o_inst
->src
[1] = t_src_scalar(vp
, &src
[1]);
1008 o_inst
->src
[2] = t_src_scalar(vp
, &src
[2]);
1013 "scalars and op RCC not handled yet");
1020 o_inst
->src
[0] = t_src(vp
, &src
[0]);
1021 o_inst
->src
[1] = ZERO_SRC_0
;
1022 o_inst
->src
[2] = ZERO_SRC_0
;
1026 o_inst
->src
[0] = t_src(vp
, &src
[0]);
1027 o_inst
->src
[1] = t_src(vp
, &src
[1]);
1028 o_inst
->src
[2] = ZERO_SRC_1
;
1032 o_inst
->src
[0] = t_src(vp
, &src
[0]);
1033 o_inst
->src
[1] = t_src(vp
, &src
[1]);
1034 o_inst
->src
[2] = t_src(vp
, &src
[2]);
1039 "scalars and op RCC not handled yet");
1047 /* Will most likely segfault before we get here... fix later. */
1048 if (o_inst
- vp
->program
.body
.i
>= VSF_MAX_FRAGMENT_LENGTH
/ 4) {
1049 vp
->program
.length
= 0;
1050 vp
->native
= GL_FALSE
;
1053 vp
->program
.length
= (o_inst
- vp
->program
.body
.i
) * 4;
1055 fprintf(stderr
, "hw program:\n");
1056 for (i
= 0; i
< vp
->program
.length
; i
++)
1057 fprintf(stderr
, "%08x\n", vp
->program
.body
.d
[i
]);
1061 static void position_invariant(struct gl_program
*prog
)
1063 struct prog_instruction
*vpi
;
1064 struct gl_program_parameter_list
*paramList
;
1067 gl_state_index tokens
[STATE_LENGTH
] = { STATE_MVP_MATRIX
, 0, 0, 0, 0 };
1069 /* tokens[4] = matrix modifier */
1071 tokens
[4] = 0; /* not transposed or inverted */
1073 tokens
[4] = STATE_MATRIX_TRANSPOSE
;
1075 paramList
= prog
->Parameters
;
1077 vpi
= _mesa_alloc_instructions(prog
->NumInstructions
+ 4);
1078 _mesa_init_instructions(vpi
, prog
->NumInstructions
+ 4);
1080 for (i
= 0; i
< 4; i
++) {
1082 tokens
[2] = tokens
[3] = i
; /* matrix row[i]..row[i] */
1083 idx
= _mesa_add_state_reference(paramList
, tokens
);
1085 vpi
[i
].Opcode
= OPCODE_DP4
;
1086 vpi
[i
].StringPos
= 0;
1089 vpi
[i
].DstReg
.File
= PROGRAM_OUTPUT
;
1090 vpi
[i
].DstReg
.Index
= VERT_RESULT_HPOS
;
1091 vpi
[i
].DstReg
.WriteMask
= 1 << i
;
1092 vpi
[i
].DstReg
.CondMask
= COND_TR
;
1094 vpi
[i
].SrcReg
[0].File
= PROGRAM_STATE_VAR
;
1095 vpi
[i
].SrcReg
[0].Index
= idx
;
1096 vpi
[i
].SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1098 vpi
[i
].SrcReg
[1].File
= PROGRAM_INPUT
;
1099 vpi
[i
].SrcReg
[1].Index
= VERT_ATTRIB_POS
;
1100 vpi
[i
].SrcReg
[1].Swizzle
= SWIZZLE_XYZW
;
1103 vpi
[i
].Opcode
= OPCODE_MUL
;
1105 vpi
[i
].Opcode
= OPCODE_MAD
;
1107 vpi
[i
].StringPos
= 0;
1111 vpi
[i
].DstReg
.File
= PROGRAM_OUTPUT
;
1113 vpi
[i
].DstReg
.File
= PROGRAM_TEMPORARY
;
1114 vpi
[i
].DstReg
.Index
= 0;
1115 vpi
[i
].DstReg
.WriteMask
= 0xf;
1116 vpi
[i
].DstReg
.CondMask
= COND_TR
;
1118 vpi
[i
].SrcReg
[0].File
= PROGRAM_STATE_VAR
;
1119 vpi
[i
].SrcReg
[0].Index
= idx
;
1120 vpi
[i
].SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1122 vpi
[i
].SrcReg
[1].File
= PROGRAM_INPUT
;
1123 vpi
[i
].SrcReg
[1].Index
= VERT_ATTRIB_POS
;
1124 vpi
[i
].SrcReg
[1].Swizzle
= MAKE_SWIZZLE4(i
, i
, i
, i
);
1127 vpi
[i
].SrcReg
[2].File
= PROGRAM_TEMPORARY
;
1128 vpi
[i
].SrcReg
[2].Index
= 0;
1129 vpi
[i
].SrcReg
[2].Swizzle
= SWIZZLE_XYZW
;
1134 _mesa_copy_instructions(&vpi
[i
], prog
->Instructions
,
1135 prog
->NumInstructions
);
1137 free(prog
->Instructions
);
1139 prog
->Instructions
= vpi
;
1141 prog
->NumInstructions
+= 4;
1142 vpi
= &prog
->Instructions
[prog
->NumInstructions
- 1];
1144 assert(vpi
->Opcode
== OPCODE_END
);
1147 static void insert_wpos(struct r300_vertex_program
*vp
,
1148 struct gl_program
*prog
, GLuint temp_index
)
1150 struct prog_instruction
*vpi
;
1151 struct prog_instruction
*vpi_insert
;
1154 vpi
= _mesa_alloc_instructions(prog
->NumInstructions
+ 2);
1155 _mesa_init_instructions(vpi
, prog
->NumInstructions
+ 2);
1157 _mesa_copy_instructions(vpi
, prog
->Instructions
,
1158 prog
->NumInstructions
- 1);
1160 _mesa_copy_instructions(&vpi
[prog
->NumInstructions
+ 1],
1161 &prog
->Instructions
[prog
->NumInstructions
- 1],
1163 vpi_insert
= &vpi
[prog
->NumInstructions
- 1];
1165 vpi_insert
[i
].Opcode
= OPCODE_MOV
;
1167 vpi_insert
[i
].DstReg
.File
= PROGRAM_OUTPUT
;
1168 vpi_insert
[i
].DstReg
.Index
= VERT_RESULT_HPOS
;
1169 vpi_insert
[i
].DstReg
.WriteMask
= WRITEMASK_XYZW
;
1170 vpi_insert
[i
].DstReg
.CondMask
= COND_TR
;
1172 vpi_insert
[i
].SrcReg
[0].File
= PROGRAM_TEMPORARY
;
1173 vpi_insert
[i
].SrcReg
[0].Index
= temp_index
;
1174 vpi_insert
[i
].SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1177 vpi_insert
[i
].Opcode
= OPCODE_MOV
;
1179 vpi_insert
[i
].DstReg
.File
= PROGRAM_OUTPUT
;
1180 vpi_insert
[i
].DstReg
.Index
= VERT_RESULT_TEX0
+ vp
->wpos_idx
;
1181 vpi_insert
[i
].DstReg
.WriteMask
= WRITEMASK_XYZW
;
1182 vpi_insert
[i
].DstReg
.CondMask
= COND_TR
;
1184 vpi_insert
[i
].SrcReg
[0].File
= PROGRAM_TEMPORARY
;
1185 vpi_insert
[i
].SrcReg
[0].Index
= temp_index
;
1186 vpi_insert
[i
].SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1189 free(prog
->Instructions
);
1191 prog
->Instructions
= vpi
;
1193 prog
->NumInstructions
+= i
;
1194 vpi
= &prog
->Instructions
[prog
->NumInstructions
- 1];
1196 assert(vpi
->Opcode
== OPCODE_END
);
1199 static void pos_as_texcoord(struct r300_vertex_program
*vp
,
1200 struct gl_program
*prog
)
1202 struct prog_instruction
*vpi
;
1203 GLuint tempregi
= prog
->NumTemporaries
;
1204 /* should do something else if no temps left... */
1205 prog
->NumTemporaries
++;
1207 for (vpi
= prog
->Instructions
; vpi
->Opcode
!= OPCODE_END
; vpi
++) {
1208 if (vpi
->DstReg
.File
== PROGRAM_OUTPUT
&&
1209 vpi
->DstReg
.Index
== VERT_RESULT_HPOS
) {
1210 vpi
->DstReg
.File
= PROGRAM_TEMPORARY
;
1211 vpi
->DstReg
.Index
= tempregi
;
1214 insert_wpos(vp
, prog
, tempregi
);
1217 static struct r300_vertex_program
*build_program(struct r300_vertex_program_key
1218 *wanted_key
, struct gl_vertex_program
1219 *mesa_vp
, GLint wpos_idx
)
1221 struct r300_vertex_program
*vp
;
1223 vp
= _mesa_calloc(sizeof(*vp
));
1224 _mesa_memcpy(&vp
->key
, wanted_key
, sizeof(vp
->key
));
1226 vp
->wpos_idx
= wpos_idx
;
1228 if (mesa_vp
->IsPositionInvariant
) {
1229 position_invariant(&mesa_vp
->Base
);
1233 pos_as_texcoord(vp
, &mesa_vp
->Base
);
1235 assert(mesa_vp
->Base
.NumInstructions
);
1237 vp
->num_temporaries
= mesa_vp
->Base
.NumTemporaries
;
1239 r300_translate_vertex_shader(vp
, mesa_vp
->Base
.Instructions
);
1244 void r300_select_vertex_shader(r300ContextPtr r300
)
1246 GLcontext
*ctx
= ctx
= r300
->radeon
.glCtx
;
1248 struct r300_vertex_program_key wanted_key
= { 0 };
1250 struct r300_vertex_program_cont
*vpc
;
1251 struct r300_vertex_program
*vp
;
1254 vpc
= (struct r300_vertex_program_cont
*)ctx
->VertexProgram
._Current
;
1255 InputsRead
= ctx
->FragmentProgram
._Current
->Base
.InputsRead
;
1257 wanted_key
.OutputsWritten
|= 1 << VERT_RESULT_HPOS
;
1260 if (InputsRead
& FRAG_BIT_WPOS
) {
1261 for (i
= 0; i
< ctx
->Const
.MaxTextureUnits
; i
++)
1262 if (!(InputsRead
& (FRAG_BIT_TEX0
<< i
)))
1265 if (i
== ctx
->Const
.MaxTextureUnits
) {
1266 fprintf(stderr
, "\tno free texcoord found\n");
1270 InputsRead
|= (FRAG_BIT_TEX0
<< i
);
1274 if (InputsRead
& FRAG_BIT_COL0
)
1275 wanted_key
.OutputsWritten
|= 1 << VERT_RESULT_COL0
;
1277 if ((InputsRead
& FRAG_BIT_COL1
) /*||
1278 (InputsRead & FRAG_BIT_FOGC) */ )
1279 wanted_key
.OutputsWritten
|= 1 << VERT_RESULT_COL1
;
1281 for (i
= 0; i
< ctx
->Const
.MaxTextureUnits
; i
++)
1282 if (InputsRead
& (FRAG_BIT_TEX0
<< i
))
1283 wanted_key
.OutputsWritten
|=
1284 1 << (VERT_RESULT_TEX0
+ i
);
1286 wanted_key
.InputsRead
= vpc
->mesa_program
.Base
.InputsRead
;
1287 if (vpc
->mesa_program
.IsPositionInvariant
) {
1288 /* we wan't position don't we ? */
1289 wanted_key
.InputsRead
|= (1 << VERT_ATTRIB_POS
);
1292 for (vp
= vpc
->progs
; vp
; vp
= vp
->next
)
1293 if (_mesa_memcmp(&vp
->key
, &wanted_key
, sizeof(wanted_key
)) ==
1295 r300
->selected_vp
= vp
;
1298 //_mesa_print_program(&vpc->mesa_program.Base);
1300 vp
= build_program(&wanted_key
, &vpc
->mesa_program
, wpos_idx
);
1301 vp
->next
= vpc
->progs
;
1303 r300
->selected_vp
= vp
;