1 /**************************************************************************
3 Copyright (C) 2005 Aapo Tahkola.
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Aapo Tahkola <aet@rasterburn.org>
36 #include "shader/prog_instruction.h"
37 #include "shader/prog_parameter.h"
38 #include "shader/prog_statevars.h"
41 #include "r300_context.h"
43 #if SWIZZLE_X != VSF_IN_COMPONENT_X || \
44 SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
45 SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
46 SWIZZLE_W != VSF_IN_COMPONENT_W || \
47 SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
48 SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
49 WRITEMASK_X != VSF_FLAG_X || \
50 WRITEMASK_Y != VSF_FLAG_Y || \
51 WRITEMASK_Z != VSF_FLAG_Z || \
52 WRITEMASK_W != VSF_FLAG_W
53 #error Cannot change these!
56 #define SCALAR_FLAG (1<<31)
57 #define FLAG_MASK (1<<31)
58 #define OP_MASK (0xf) /* we are unlikely to have more than 15 */
59 #define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
64 unsigned long ip
; /* number of input operands and flags */
69 OPN(ARL
, 1 | SCALAR_FLAG
),
74 OPN(EX2
, 1 | SCALAR_FLAG
),
75 OPN(EXP
, 1 | SCALAR_FLAG
),
78 OPN(LG2
, 1 | SCALAR_FLAG
),
80 OPN(LOG
, 1 | SCALAR_FLAG
),
86 OPN(POW
, 2 | SCALAR_FLAG
),
87 OPN(RCP
, 1 | SCALAR_FLAG
),
88 OPN(RSQ
, 1 | SCALAR_FLAG
),
102 int r300VertexProgUpdateParams(GLcontext
* ctx
,
103 struct r300_vertex_program_cont
*vp
, float *dst
)
106 struct gl_vertex_program
*mesa_vp
= &vp
->mesa_program
;
108 struct gl_program_parameter_list
*paramList
;
110 if (mesa_vp
->IsNVProgram
) {
111 _mesa_load_tracked_matrices(ctx
);
113 for (pi
= 0; pi
< MAX_NV_VERTEX_PROGRAM_PARAMS
; pi
++) {
114 *dst
++ = ctx
->VertexProgram
.Parameters
[pi
][0];
115 *dst
++ = ctx
->VertexProgram
.Parameters
[pi
][1];
116 *dst
++ = ctx
->VertexProgram
.Parameters
[pi
][2];
117 *dst
++ = ctx
->VertexProgram
.Parameters
[pi
][3];
122 assert(mesa_vp
->Base
.Parameters
);
123 _mesa_load_state_parameters(ctx
, mesa_vp
->Base
.Parameters
);
125 if (mesa_vp
->Base
.Parameters
->NumParameters
* 4 >
126 VSF_MAX_FRAGMENT_LENGTH
) {
127 fprintf(stderr
, "%s:Params exhausted\n", __FUNCTION__
);
131 paramList
= mesa_vp
->Base
.Parameters
;
132 for (pi
= 0; pi
< paramList
->NumParameters
; pi
++) {
133 switch (paramList
->Parameters
[pi
].Type
) {
135 case PROGRAM_STATE_VAR
:
136 case PROGRAM_NAMED_PARAM
:
137 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
138 case PROGRAM_CONSTANT
:
139 *dst
++ = paramList
->ParameterValues
[pi
][0];
140 *dst
++ = paramList
->ParameterValues
[pi
][1];
141 *dst
++ = paramList
->ParameterValues
[pi
][2];
142 *dst
++ = paramList
->ParameterValues
[pi
][3];
146 _mesa_problem(NULL
, "Bad param type in %s",
155 static unsigned long t_dst_mask(GLuint mask
)
157 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
158 return mask
& VSF_FLAG_ALL
;
161 static unsigned long t_dst_class(enum register_file file
)
165 case PROGRAM_TEMPORARY
:
166 return VSF_OUT_CLASS_TMP
;
168 return VSF_OUT_CLASS_RESULT
;
169 case PROGRAM_ADDRESS
:
170 return VSF_OUT_CLASS_ADDR
;
173 case PROGRAM_LOCAL_PARAM:
174 case PROGRAM_ENV_PARAM:
175 case PROGRAM_NAMED_PARAM:
176 case PROGRAM_STATE_VAR:
177 case PROGRAM_WRITE_ONLY:
178 case PROGRAM_ADDRESS:
181 fprintf(stderr
, "problem in %s", __FUNCTION__
);
187 static unsigned long t_dst_index(struct r300_vertex_program
*vp
,
188 struct prog_dst_register
*dst
)
190 if (dst
->File
== PROGRAM_OUTPUT
)
191 return vp
->outputs
[dst
->Index
];
196 static unsigned long t_src_class(enum register_file file
)
200 case PROGRAM_TEMPORARY
:
201 return VSF_IN_CLASS_TMP
;
204 return VSF_IN_CLASS_ATTR
;
206 case PROGRAM_LOCAL_PARAM
:
207 case PROGRAM_ENV_PARAM
:
208 case PROGRAM_NAMED_PARAM
:
209 case PROGRAM_STATE_VAR
:
210 return VSF_IN_CLASS_PARAM
;
213 case PROGRAM_WRITE_ONLY:
214 case PROGRAM_ADDRESS:
217 fprintf(stderr
, "problem in %s", __FUNCTION__
);
223 static __inline
unsigned long t_swizzle(GLubyte swizzle
)
225 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
230 static void vp_dump_inputs(struct r300_vertex_program
*vp
, char *caller
)
235 fprintf(stderr
, "vp null in call to %s from %s\n", __FUNCTION__
,
240 fprintf(stderr
, "%s:<", caller
);
241 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++)
242 fprintf(stderr
, "%d ", vp
->inputs
[i
]);
243 fprintf(stderr
, ">\n");
248 static unsigned long t_src_index(struct r300_vertex_program
*vp
,
249 struct prog_src_register
*src
)
254 if (src
->File
== PROGRAM_INPUT
) {
255 if (vp
->inputs
[src
->Index
] != -1)
256 return vp
->inputs
[src
->Index
];
258 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++)
259 if (vp
->inputs
[i
] > max_reg
)
260 max_reg
= vp
->inputs
[i
];
262 vp
->inputs
[src
->Index
] = max_reg
+ 1;
264 //vp_dump_inputs(vp, __FUNCTION__);
266 return vp
->inputs
[src
->Index
];
268 if (src
->Index
< 0) {
270 "negative offsets for indirect addressing do not work.\n");
277 static unsigned long t_src(struct r300_vertex_program
*vp
,
278 struct prog_src_register
*src
)
280 /* src->NegateBase uses the NEGATE_ flags from program_instruction.h,
281 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
283 return MAKE_VSF_SOURCE(t_src_index(vp
, src
),
284 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
285 t_swizzle(GET_SWZ(src
->Swizzle
, 1)),
286 t_swizzle(GET_SWZ(src
->Swizzle
, 2)),
287 t_swizzle(GET_SWZ(src
->Swizzle
, 3)),
288 t_src_class(src
->File
),
289 src
->NegateBase
) | (src
->RelAddr
<< 4);
292 static unsigned long t_src_scalar(struct r300_vertex_program
*vp
,
293 struct prog_src_register
*src
)
296 return MAKE_VSF_SOURCE(t_src_index(vp
, src
),
297 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
298 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
299 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
300 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
301 t_src_class(src
->File
),
303 NegateBase
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
307 static unsigned long t_opcode(enum prog_opcode opcode
)
312 case OPCODE_ARL
: return R300_VPI_OUT_OP_ARL
;
313 case OPCODE_DST
: return R300_VPI_OUT_OP_DST
;
314 case OPCODE_EX2
: return R300_VPI_OUT_OP_EX2
;
315 case OPCODE_EXP
: return R300_VPI_OUT_OP_EXP
;
316 case OPCODE_FRC
: return R300_VPI_OUT_OP_FRC
;
317 case OPCODE_LG2
: return R300_VPI_OUT_OP_LG2
;
318 case OPCODE_LOG
: return R300_VPI_OUT_OP_LOG
;
319 case OPCODE_MAX
: return R300_VPI_OUT_OP_MAX
;
320 case OPCODE_MIN
: return R300_VPI_OUT_OP_MIN
;
321 case OPCODE_MUL
: return R300_VPI_OUT_OP_MUL
;
322 case OPCODE_RCP
: return R300_VPI_OUT_OP_RCP
;
323 case OPCODE_RSQ
: return R300_VPI_OUT_OP_RSQ
;
324 case OPCODE_SGE
: return R300_VPI_OUT_OP_SGE
;
325 case OPCODE_SLT
: return R300_VPI_OUT_OP_SLT
;
326 case OPCODE_DP4
: return R300_VPI_OUT_OP_DOT
;
330 fprintf(stderr
, "%s: Should not be called with opcode %d!",
331 __FUNCTION__
, opcode
);
337 static unsigned long op_operands(enum prog_opcode opcode
)
341 /* Can we trust mesas opcodes to be in order ? */
342 for (i
= 0; i
< sizeof(op_names
) / sizeof(*op_names
); i
++)
343 if (op_names
[i
].opcode
== opcode
)
344 return op_names
[i
].ip
;
346 fprintf(stderr
, "op %d not found in op_names\n", opcode
);
351 static GLboolean
valid_dst(struct r300_vertex_program
*vp
,
352 struct prog_dst_register
*dst
)
354 if (dst
->File
== PROGRAM_OUTPUT
&& vp
->outputs
[dst
->Index
] == -1) {
356 } else if (dst
->File
== PROGRAM_ADDRESS
) {
357 assert(dst
->Index
== 0);
363 /* TODO: Get rid of t_src_class call */
364 #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
365 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
366 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
367 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
368 t_src_class(b.File) == VSF_IN_CLASS_ATTR)))) \
370 #define ZERO_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \
371 SWIZZLE_ZERO, SWIZZLE_ZERO, \
372 SWIZZLE_ZERO, SWIZZLE_ZERO, \
373 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
375 #define ZERO_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \
376 SWIZZLE_ZERO, SWIZZLE_ZERO, \
377 SWIZZLE_ZERO, SWIZZLE_ZERO, \
378 t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
380 #define ZERO_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \
381 SWIZZLE_ZERO, SWIZZLE_ZERO, \
382 SWIZZLE_ZERO, SWIZZLE_ZERO, \
383 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
385 #define ONE_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \
386 SWIZZLE_ONE, SWIZZLE_ONE, \
387 SWIZZLE_ONE, SWIZZLE_ONE, \
388 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
390 #define ONE_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \
391 SWIZZLE_ONE, SWIZZLE_ONE, \
392 SWIZZLE_ONE, SWIZZLE_ONE, \
393 t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
395 #define ONE_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \
396 SWIZZLE_ONE, SWIZZLE_ONE, \
397 SWIZZLE_ONE, SWIZZLE_ONE, \
398 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
400 /* DP4 version seems to trigger some hw peculiarity */
403 #define FREE_TEMPS() \
405 if(u_temp_i < vp->num_temporaries) { \
406 WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_i); \
407 vp->native = GL_FALSE; \
409 u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
412 static void r300TranslateVertexShader(struct r300_vertex_program
*vp
,
413 struct prog_instruction
*vpi
)
416 VERTEX_SHADER_INSTRUCTION
*o_inst
;
417 unsigned long operands
;
420 /* Initial value should be last tmp reg that hw supports.
421 Strangely enough r300 doesnt mind even though these would be out of range.
422 Smart enough to realize that it doesnt need it? */
423 int u_temp_i
= VSF_MAX_FRAGMENT_TEMPS
- 1;
424 struct prog_src_register src
[3];
426 vp
->pos_end
= 0; /* Not supported yet */
427 vp
->program
.length
= 0;
428 /*vp->num_temporaries=mesa_vp->Base.NumTemporaries; */
430 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++)
433 for (i
= 0; i
< VERT_RESULT_MAX
; i
++)
436 assert(vp
->key
.OutputsWritten
& (1 << VERT_RESULT_HPOS
));
439 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_HPOS
))
440 vp
->outputs
[VERT_RESULT_HPOS
] = cur_reg
++;
442 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_PSIZ
))
443 vp
->outputs
[VERT_RESULT_PSIZ
] = cur_reg
++;
445 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_COL0
))
446 vp
->outputs
[VERT_RESULT_COL0
] = cur_reg
++;
448 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_COL1
))
449 vp
->outputs
[VERT_RESULT_COL1
] = cur_reg
++;
451 #if 0 /* Not supported yet */
452 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_BFC0
))
453 vp
->outputs
[VERT_RESULT_BFC0
] = cur_reg
++;
455 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_BFC1
))
456 vp
->outputs
[VERT_RESULT_BFC1
] = cur_reg
++;
458 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_FOGC
))
459 vp
->outputs
[VERT_RESULT_FOGC
] = cur_reg
++;
462 for (i
= VERT_RESULT_TEX0
; i
<= VERT_RESULT_TEX7
; i
++)
463 if (vp
->key
.OutputsWritten
& (1 << i
))
464 vp
->outputs
[i
] = cur_reg
++;
466 vp
->translated
= GL_TRUE
;
467 vp
->native
= GL_TRUE
;
469 o_inst
= vp
->program
.body
.i
;
470 for (; vpi
->Opcode
!= OPCODE_END
; vpi
++, o_inst
++) {
473 if (!valid_dst(vp
, &vpi
->DstReg
)) {
474 /* redirect result to unused temp */
475 vpi
->DstReg
.File
= PROGRAM_TEMPORARY
;
476 vpi
->DstReg
.Index
= u_temp_i
;
479 operands
= op_operands(vpi
->Opcode
);
480 are_srcs_scalar
= operands
& SCALAR_FLAG
;
483 for (i
= 0; i
< operands
; i
++)
484 src
[i
] = vpi
->SrcReg
[i
];
486 if (operands
== 3) { /* TODO: scalars */
487 if (CMP_SRCS(src
[1], src
[2])
488 || CMP_SRCS(src
[0], src
[2])) {
490 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD
, u_temp_i
,
495 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[2]),
496 SWIZZLE_X
, SWIZZLE_Y
,
497 SWIZZLE_Z
, SWIZZLE_W
,
498 t_src_class(src
[2].File
),
499 VSF_FLAG_NONE
) | (src
[2].
503 o_inst
->src
[1] = ZERO_SRC_2
;
504 o_inst
->src
[2] = ZERO_SRC_2
;
507 src
[2].File
= PROGRAM_TEMPORARY
;
508 src
[2].Index
= u_temp_i
;
516 if (CMP_SRCS(src
[1], src
[0])) {
518 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD
, u_temp_i
,
523 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
524 SWIZZLE_X
, SWIZZLE_Y
,
525 SWIZZLE_Z
, SWIZZLE_W
,
526 t_src_class(src
[0].File
),
527 VSF_FLAG_NONE
) | (src
[0].
531 o_inst
->src
[1] = ZERO_SRC_0
;
532 o_inst
->src
[2] = ZERO_SRC_0
;
535 src
[0].File
= PROGRAM_TEMPORARY
;
536 src
[0].Index
= u_temp_i
;
542 /* These ops need special handling. */
543 switch (vpi
->Opcode
) {
546 MAKE_VSF_OP(R300_VPI_OUT_OP_POW
,
547 t_dst_index(vp
, &vpi
->DstReg
),
548 t_dst_mask(vpi
->DstReg
.WriteMask
),
549 t_dst_class(vpi
->DstReg
.File
));
550 o_inst
->src
[0] = t_src_scalar(vp
, &src
[0]);
551 o_inst
->src
[1] = ZERO_SRC_0
;
552 o_inst
->src
[2] = t_src_scalar(vp
, &src
[1]);
555 case OPCODE_MOV
: //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
559 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD
,
560 t_dst_index(vp
, &vpi
->DstReg
),
561 t_dst_mask(vpi
->DstReg
.WriteMask
),
562 t_dst_class(vpi
->DstReg
.File
));
563 o_inst
->src
[0] = t_src(vp
, &src
[0]);
564 o_inst
->src
[1] = ZERO_SRC_0
;
565 o_inst
->src
[2] = ZERO_SRC_0
;
569 PROGRAM_TEMPORARY
) ? R300_VPI_OUT_OP_MAD_2
:
573 MAKE_VSF_OP(hw_op
, t_dst_index(vp
, &vpi
->DstReg
),
574 t_dst_mask(vpi
->DstReg
.WriteMask
),
575 t_dst_class(vpi
->DstReg
.File
));
576 o_inst
->src
[0] = t_src(vp
, &src
[0]);
577 o_inst
->src
[1] = ONE_SRC_0
;
578 o_inst
->src
[2] = ZERO_SRC_0
;
585 hw_op
= (src
[0].File
== PROGRAM_TEMPORARY
&&
587 PROGRAM_TEMPORARY
) ? R300_VPI_OUT_OP_MAD_2
:
591 MAKE_VSF_OP(hw_op
, t_dst_index(vp
, &vpi
->DstReg
),
592 t_dst_mask(vpi
->DstReg
.WriteMask
),
593 t_dst_class(vpi
->DstReg
.File
));
594 o_inst
->src
[0] = ONE_SRC_0
;
595 o_inst
->src
[1] = t_src(vp
, &src
[0]);
596 o_inst
->src
[2] = t_src(vp
, &src
[1]);
599 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD
,
600 t_dst_index(vp
, &vpi
->DstReg
),
601 t_dst_mask(vpi
->DstReg
.WriteMask
),
602 t_dst_class(vpi
->DstReg
.File
));
603 o_inst
->src
[0] = t_src(vp
, &src
[0]);
604 o_inst
->src
[1] = t_src(vp
, &src
[1]);
605 o_inst
->src
[2] = ZERO_SRC_1
;
611 hw_op
= (src
[0].File
== PROGRAM_TEMPORARY
&&
612 src
[1].File
== PROGRAM_TEMPORARY
&&
614 PROGRAM_TEMPORARY
) ? R300_VPI_OUT_OP_MAD_2
:
618 MAKE_VSF_OP(hw_op
, t_dst_index(vp
, &vpi
->DstReg
),
619 t_dst_mask(vpi
->DstReg
.WriteMask
),
620 t_dst_class(vpi
->DstReg
.File
));
621 o_inst
->src
[0] = t_src(vp
, &src
[0]);
622 o_inst
->src
[1] = t_src(vp
, &src
[1]);
623 o_inst
->src
[2] = t_src(vp
, &src
[2]);
626 case OPCODE_MUL
: /* HW mul can take third arg but appears to have some other limitations. */
627 hw_op
= (src
[0].File
== PROGRAM_TEMPORARY
&&
629 PROGRAM_TEMPORARY
) ? R300_VPI_OUT_OP_MAD_2
:
633 MAKE_VSF_OP(hw_op
, t_dst_index(vp
, &vpi
->DstReg
),
634 t_dst_mask(vpi
->DstReg
.WriteMask
),
635 t_dst_class(vpi
->DstReg
.File
));
636 o_inst
->src
[0] = t_src(vp
, &src
[0]);
637 o_inst
->src
[1] = t_src(vp
, &src
[1]);
639 o_inst
->src
[2] = ZERO_SRC_1
;
642 case OPCODE_DP3
: //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
644 MAKE_VSF_OP(R300_VPI_OUT_OP_DOT
,
645 t_dst_index(vp
, &vpi
->DstReg
),
646 t_dst_mask(vpi
->DstReg
.WriteMask
),
647 t_dst_class(vpi
->DstReg
.File
));
650 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
652 (src
[0].Swizzle
, 0)),
654 (src
[0].Swizzle
, 1)),
656 (src
[0].Swizzle
, 2)),
658 t_src_class(src
[0].File
),
660 NegateBase
? VSF_FLAG_XYZ
:
661 VSF_FLAG_NONE
) | (src
[0].
665 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
667 (src
[1].Swizzle
, 0)),
669 (src
[1].Swizzle
, 1)),
671 (src
[1].Swizzle
, 2)),
673 t_src_class(src
[1].File
),
675 NegateBase
? VSF_FLAG_XYZ
:
676 VSF_FLAG_NONE
) | (src
[1].
679 o_inst
->src
[2] = ZERO_SRC_1
;
682 case OPCODE_SUB
: //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
684 hw_op
= (src
[0].File
== PROGRAM_TEMPORARY
&&
686 PROGRAM_TEMPORARY
) ? R300_VPI_OUT_OP_MAD_2
:
690 MAKE_VSF_OP(hw_op
, t_dst_index(vp
, &vpi
->DstReg
),
691 t_dst_mask(vpi
->DstReg
.WriteMask
),
692 t_dst_class(vpi
->DstReg
.File
));
693 o_inst
->src
[0] = t_src(vp
, &src
[0]);
694 o_inst
->src
[1] = ONE_SRC_0
;
696 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
698 (src
[1].Swizzle
, 0)),
700 (src
[1].Swizzle
, 1)),
702 (src
[1].Swizzle
, 2)),
704 (src
[1].Swizzle
, 3)),
705 t_src_class(src
[1].File
),
707 NegateBase
) ? VSF_FLAG_ALL
:
708 VSF_FLAG_NONE
) | (src
[1].
712 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD
,
713 t_dst_index(vp
, &vpi
->DstReg
),
714 t_dst_mask(vpi
->DstReg
.WriteMask
),
715 t_dst_class(vpi
->DstReg
.File
));
717 o_inst
->src
[0] = t_src(vp
, &src
[0]);
719 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
721 (src
[1].Swizzle
, 0)),
723 (src
[1].Swizzle
, 1)),
725 (src
[1].Swizzle
, 2)),
727 (src
[1].Swizzle
, 3)),
728 t_src_class(src
[1].File
),
730 NegateBase
) ? VSF_FLAG_ALL
:
731 VSF_FLAG_NONE
) | (src
[1].
737 case OPCODE_ABS
: //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
739 MAKE_VSF_OP(R300_VPI_OUT_OP_MAX
,
740 t_dst_index(vp
, &vpi
->DstReg
),
741 t_dst_mask(vpi
->DstReg
.WriteMask
),
742 t_dst_class(vpi
->DstReg
.File
));
744 o_inst
->src
[0] = t_src(vp
, &src
[0]);
746 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
748 (src
[0].Swizzle
, 0)),
750 (src
[0].Swizzle
, 1)),
752 (src
[0].Swizzle
, 2)),
754 (src
[0].Swizzle
, 3)),
755 t_src_class(src
[0].File
),
757 NegateBase
) ? VSF_FLAG_ALL
:
758 VSF_FLAG_NONE
) | (src
[0].
764 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
765 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
767 o_inst
->op
= MAKE_VSF_OP(R300_VPI_OUT_OP_FRC
, u_temp_i
,
768 t_dst_mask(vpi
->DstReg
.
772 o_inst
->src
[0] = t_src(vp
, &src
[0]);
773 o_inst
->src
[1] = ZERO_SRC_0
;
774 o_inst
->src
[2] = ZERO_SRC_0
;
778 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD
,
779 t_dst_index(vp
, &vpi
->DstReg
),
780 t_dst_mask(vpi
->DstReg
.WriteMask
),
781 t_dst_class(vpi
->DstReg
.File
));
783 o_inst
->src
[0] = t_src(vp
, &src
[0]);
784 o_inst
->src
[1] = MAKE_VSF_SOURCE(u_temp_i
,
790 /* Not 100% sure about this */
797 o_inst
->src
[2] = ZERO_SRC_0
;
801 case OPCODE_LG2
: // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
803 MAKE_VSF_OP(R300_VPI_OUT_OP_LG2
,
804 t_dst_index(vp
, &vpi
->DstReg
),
805 t_dst_mask(vpi
->DstReg
.WriteMask
),
806 t_dst_class(vpi
->DstReg
.File
));
809 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
811 (src
[0].Swizzle
, 0)),
813 (src
[0].Swizzle
, 0)),
815 (src
[0].Swizzle
, 0)),
817 (src
[0].Swizzle
, 0)),
818 t_src_class(src
[0].File
),
820 NegateBase
? VSF_FLAG_ALL
:
821 VSF_FLAG_NONE
) | (src
[0].
823 o_inst
->src
[1] = ZERO_SRC_0
;
824 o_inst
->src
[2] = ZERO_SRC_0
;
827 case OPCODE_LIT
: //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
829 MAKE_VSF_OP(R300_VPI_OUT_OP_LIT
,
830 t_dst_index(vp
, &vpi
->DstReg
),
831 t_dst_mask(vpi
->DstReg
.WriteMask
),
832 t_dst_class(vpi
->DstReg
.File
));
833 /* NOTE: Users swizzling might not work. */
834 o_inst
->src
[0] = MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
835 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
836 VSF_IN_COMPONENT_ZERO
, // z
837 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
844 (src
[0].RelAddr
<< 4);
845 o_inst
->src
[1] = MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
846 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
847 VSF_IN_COMPONENT_ZERO
, // z
848 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
855 (src
[0].RelAddr
<< 4);
856 o_inst
->src
[2] = MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
857 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
858 VSF_IN_COMPONENT_ZERO
, // z
859 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
866 (src
[0].RelAddr
<< 4);
869 case OPCODE_DPH
: //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
871 MAKE_VSF_OP(R300_VPI_OUT_OP_DOT
,
872 t_dst_index(vp
, &vpi
->DstReg
),
873 t_dst_mask(vpi
->DstReg
.WriteMask
),
874 t_dst_class(vpi
->DstReg
.File
));
877 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
879 (src
[0].Swizzle
, 0)),
881 (src
[0].Swizzle
, 1)),
883 (src
[0].Swizzle
, 2)),
884 VSF_IN_COMPONENT_ONE
,
885 t_src_class(src
[0].File
),
887 NegateBase
? VSF_FLAG_XYZ
:
888 VSF_FLAG_NONE
) | (src
[0].
890 o_inst
->src
[1] = t_src(vp
, &src
[1]);
891 o_inst
->src
[2] = ZERO_SRC_1
;
895 /* mul r0, r1.yzxw, r2.zxyw
896 mad r0, -r2.yzxw, r1.zxyw, r0
897 NOTE: might need MAD_2
900 o_inst
->op
= MAKE_VSF_OP(R300_VPI_OUT_OP_MAD
, u_temp_i
,
901 t_dst_mask(vpi
->DstReg
.
905 o_inst
->src
[0] = MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
906 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)), // z
907 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
908 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
915 (src
[0].RelAddr
<< 4);
917 o_inst
->src
[1] = MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]), t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), // z
918 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), // x
919 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)), // y
920 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)), // w
927 (src
[1].RelAddr
<< 4);
929 o_inst
->src
[2] = ZERO_SRC_1
;
934 MAKE_VSF_OP(R300_VPI_OUT_OP_MAD
,
935 t_dst_index(vp
, &vpi
->DstReg
),
936 t_dst_mask(vpi
->DstReg
.WriteMask
),
937 t_dst_class(vpi
->DstReg
.File
));
939 o_inst
->src
[0] = MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]), t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)), // y
940 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), // z
941 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), // x
942 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)), // w
949 (src
[1].RelAddr
<< 4);
951 o_inst
->src
[1] = MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)), // z
952 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
953 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
954 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
961 (src
[0].RelAddr
<< 4);
963 o_inst
->src
[2] = MAKE_VSF_SOURCE(u_temp_i
+ 1,
974 fprintf(stderr
, "Dont know how to handle op %d yet\n",
985 MAKE_VSF_OP(t_opcode(vpi
->Opcode
),
986 t_dst_index(vp
, &vpi
->DstReg
),
987 t_dst_mask(vpi
->DstReg
.WriteMask
),
988 t_dst_class(vpi
->DstReg
.File
));
990 if (are_srcs_scalar
) {
993 o_inst
->src
[0] = t_src_scalar(vp
, &src
[0]);
994 o_inst
->src
[1] = ZERO_SRC_0
;
995 o_inst
->src
[2] = ZERO_SRC_0
;
999 o_inst
->src
[0] = t_src_scalar(vp
, &src
[0]);
1000 o_inst
->src
[1] = t_src_scalar(vp
, &src
[1]);
1001 o_inst
->src
[2] = ZERO_SRC_1
;
1005 o_inst
->src
[0] = t_src_scalar(vp
, &src
[0]);
1006 o_inst
->src
[1] = t_src_scalar(vp
, &src
[1]);
1007 o_inst
->src
[2] = t_src_scalar(vp
, &src
[2]);
1012 "scalars and op RCC not handled yet");
1019 o_inst
->src
[0] = t_src(vp
, &src
[0]);
1020 o_inst
->src
[1] = ZERO_SRC_0
;
1021 o_inst
->src
[2] = ZERO_SRC_0
;
1025 o_inst
->src
[0] = t_src(vp
, &src
[0]);
1026 o_inst
->src
[1] = t_src(vp
, &src
[1]);
1027 o_inst
->src
[2] = ZERO_SRC_1
;
1031 o_inst
->src
[0] = t_src(vp
, &src
[0]);
1032 o_inst
->src
[1] = t_src(vp
, &src
[1]);
1033 o_inst
->src
[2] = t_src(vp
, &src
[2]);
1038 "scalars and op RCC not handled yet");
1046 /* Will most likely segfault before we get here... fix later. */
1047 if (o_inst
- vp
->program
.body
.i
>= VSF_MAX_FRAGMENT_LENGTH
/ 4) {
1048 vp
->program
.length
= 0;
1049 vp
->native
= GL_FALSE
;
1052 vp
->program
.length
= (o_inst
- vp
->program
.body
.i
) * 4;
1054 fprintf(stderr
, "hw program:\n");
1055 for (i
= 0; i
< vp
->program
.length
; i
++)
1056 fprintf(stderr
, "%08x\n", vp
->program
.body
.d
[i
]);
1060 static void position_invariant(struct gl_program
*prog
)
1062 struct prog_instruction
*vpi
;
1063 struct gl_program_parameter_list
*paramList
;
1066 gl_state_index tokens
[STATE_LENGTH
] = { STATE_MVP_MATRIX
, 0, 0, 0, 0 };
1068 /* tokens[4] = matrix modifier */
1070 tokens
[4] = 0; /* not transposed or inverted */
1072 tokens
[4] = STATE_MATRIX_TRANSPOSE
;
1074 paramList
= prog
->Parameters
;
1076 vpi
= _mesa_alloc_instructions(prog
->NumInstructions
+ 4);
1077 _mesa_init_instructions(vpi
, prog
->NumInstructions
+ 4);
1079 for (i
= 0; i
< 4; i
++) {
1081 tokens
[2] = tokens
[3] = i
; /* matrix row[i]..row[i] */
1082 idx
= _mesa_add_state_reference(paramList
, tokens
);
1084 vpi
[i
].Opcode
= OPCODE_DP4
;
1085 vpi
[i
].StringPos
= 0;
1088 vpi
[i
].DstReg
.File
= PROGRAM_OUTPUT
;
1089 vpi
[i
].DstReg
.Index
= VERT_RESULT_HPOS
;
1090 vpi
[i
].DstReg
.WriteMask
= 1 << i
;
1091 vpi
[i
].DstReg
.CondMask
= COND_TR
;
1093 vpi
[i
].SrcReg
[0].File
= PROGRAM_STATE_VAR
;
1094 vpi
[i
].SrcReg
[0].Index
= idx
;
1095 vpi
[i
].SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1097 vpi
[i
].SrcReg
[1].File
= PROGRAM_INPUT
;
1098 vpi
[i
].SrcReg
[1].Index
= VERT_ATTRIB_POS
;
1099 vpi
[i
].SrcReg
[1].Swizzle
= SWIZZLE_XYZW
;
1102 vpi
[i
].Opcode
= OPCODE_MUL
;
1104 vpi
[i
].Opcode
= OPCODE_MAD
;
1106 vpi
[i
].StringPos
= 0;
1110 vpi
[i
].DstReg
.File
= PROGRAM_OUTPUT
;
1112 vpi
[i
].DstReg
.File
= PROGRAM_TEMPORARY
;
1113 vpi
[i
].DstReg
.Index
= 0;
1114 vpi
[i
].DstReg
.WriteMask
= 0xf;
1115 vpi
[i
].DstReg
.CondMask
= COND_TR
;
1117 vpi
[i
].SrcReg
[0].File
= PROGRAM_STATE_VAR
;
1118 vpi
[i
].SrcReg
[0].Index
= idx
;
1119 vpi
[i
].SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1121 vpi
[i
].SrcReg
[1].File
= PROGRAM_INPUT
;
1122 vpi
[i
].SrcReg
[1].Index
= VERT_ATTRIB_POS
;
1123 vpi
[i
].SrcReg
[1].Swizzle
= MAKE_SWIZZLE4(i
, i
, i
, i
);
1126 vpi
[i
].SrcReg
[2].File
= PROGRAM_TEMPORARY
;
1127 vpi
[i
].SrcReg
[2].Index
= 0;
1128 vpi
[i
].SrcReg
[2].Swizzle
= SWIZZLE_XYZW
;
1133 _mesa_copy_instructions(&vpi
[i
], prog
->Instructions
,
1134 prog
->NumInstructions
);
1136 free(prog
->Instructions
);
1138 prog
->Instructions
= vpi
;
1140 prog
->NumInstructions
+= 4;
1141 vpi
= &prog
->Instructions
[prog
->NumInstructions
- 1];
1143 assert(vpi
->Opcode
== OPCODE_END
);
1146 static void insert_wpos(struct r300_vertex_program
*vp
,
1147 struct gl_program
*prog
, GLuint temp_index
)
1149 struct prog_instruction
*vpi
;
1150 struct prog_instruction
*vpi_insert
;
1153 vpi
= _mesa_alloc_instructions(prog
->NumInstructions
+ 2);
1154 _mesa_init_instructions(vpi
, prog
->NumInstructions
+ 2);
1156 _mesa_copy_instructions(vpi
, prog
->Instructions
,
1157 prog
->NumInstructions
- 1);
1159 _mesa_copy_instructions(&vpi
[prog
->NumInstructions
+ 1],
1160 &prog
->Instructions
[prog
->NumInstructions
- 1],
1162 vpi_insert
= &vpi
[prog
->NumInstructions
- 1];
1164 vpi_insert
[i
].Opcode
= OPCODE_MOV
;
1166 vpi_insert
[i
].DstReg
.File
= PROGRAM_OUTPUT
;
1167 vpi_insert
[i
].DstReg
.Index
= VERT_RESULT_HPOS
;
1168 vpi_insert
[i
].DstReg
.WriteMask
= WRITEMASK_XYZW
;
1169 vpi_insert
[i
].DstReg
.CondMask
= COND_TR
;
1171 vpi_insert
[i
].SrcReg
[0].File
= PROGRAM_TEMPORARY
;
1172 vpi_insert
[i
].SrcReg
[0].Index
= temp_index
;
1173 vpi_insert
[i
].SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1176 vpi_insert
[i
].Opcode
= OPCODE_MOV
;
1178 vpi_insert
[i
].DstReg
.File
= PROGRAM_OUTPUT
;
1179 vpi_insert
[i
].DstReg
.Index
= VERT_RESULT_TEX0
+ vp
->wpos_idx
;
1180 vpi_insert
[i
].DstReg
.WriteMask
= WRITEMASK_XYZW
;
1181 vpi_insert
[i
].DstReg
.CondMask
= COND_TR
;
1183 vpi_insert
[i
].SrcReg
[0].File
= PROGRAM_TEMPORARY
;
1184 vpi_insert
[i
].SrcReg
[0].Index
= temp_index
;
1185 vpi_insert
[i
].SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1188 free(prog
->Instructions
);
1190 prog
->Instructions
= vpi
;
1192 prog
->NumInstructions
+= i
;
1193 vpi
= &prog
->Instructions
[prog
->NumInstructions
- 1];
1195 assert(vpi
->Opcode
== OPCODE_END
);
1198 static void pos_as_texcoord(struct r300_vertex_program
*vp
,
1199 struct gl_program
*prog
)
1201 struct prog_instruction
*vpi
;
1202 GLuint tempregi
= prog
->NumTemporaries
;
1203 /* should do something else if no temps left... */
1204 prog
->NumTemporaries
++;
1206 for (vpi
= prog
->Instructions
; vpi
->Opcode
!= OPCODE_END
; vpi
++) {
1207 if (vpi
->DstReg
.File
== PROGRAM_OUTPUT
&&
1208 vpi
->DstReg
.Index
== VERT_RESULT_HPOS
) {
1209 vpi
->DstReg
.File
= PROGRAM_TEMPORARY
;
1210 vpi
->DstReg
.Index
= tempregi
;
1213 insert_wpos(vp
, prog
, tempregi
);
1216 static struct r300_vertex_program
*build_program(struct r300_vertex_program_key
1217 *wanted_key
, struct gl_vertex_program
1218 *mesa_vp
, GLint wpos_idx
)
1220 struct r300_vertex_program
*vp
;
1222 vp
= _mesa_calloc(sizeof(*vp
));
1223 _mesa_memcpy(&vp
->key
, wanted_key
, sizeof(vp
->key
));
1225 vp
->wpos_idx
= wpos_idx
;
1227 if (mesa_vp
->IsPositionInvariant
) {
1228 position_invariant(&mesa_vp
->Base
);
1232 pos_as_texcoord(vp
, &mesa_vp
->Base
);
1234 assert(mesa_vp
->Base
.NumInstructions
);
1236 vp
->num_temporaries
= mesa_vp
->Base
.NumTemporaries
;
1238 r300TranslateVertexShader(vp
, mesa_vp
->Base
.Instructions
);
1243 void r300SelectVertexShader(r300ContextPtr r300
)
1245 GLcontext
*ctx
= ctx
= r300
->radeon
.glCtx
;
1247 struct r300_vertex_program_key wanted_key
= { 0 };
1249 struct r300_vertex_program_cont
*vpc
;
1250 struct r300_vertex_program
*vp
;
1253 vpc
= (struct r300_vertex_program_cont
*)ctx
->VertexProgram
._Current
;
1254 InputsRead
= ctx
->FragmentProgram
._Current
->Base
.InputsRead
;
1256 wanted_key
.OutputsWritten
|= 1 << VERT_RESULT_HPOS
;
1259 if (InputsRead
& FRAG_BIT_WPOS
) {
1260 for (i
= 0; i
< ctx
->Const
.MaxTextureUnits
; i
++)
1261 if (!(InputsRead
& (FRAG_BIT_TEX0
<< i
)))
1264 if (i
== ctx
->Const
.MaxTextureUnits
) {
1265 fprintf(stderr
, "\tno free texcoord found\n");
1269 InputsRead
|= (FRAG_BIT_TEX0
<< i
);
1273 if (InputsRead
& FRAG_BIT_COL0
)
1274 wanted_key
.OutputsWritten
|= 1 << VERT_RESULT_COL0
;
1276 if ((InputsRead
& FRAG_BIT_COL1
) /*||
1277 (InputsRead & FRAG_BIT_FOGC) */ )
1278 wanted_key
.OutputsWritten
|= 1 << VERT_RESULT_COL1
;
1280 for (i
= 0; i
< ctx
->Const
.MaxTextureUnits
; i
++)
1281 if (InputsRead
& (FRAG_BIT_TEX0
<< i
))
1282 wanted_key
.OutputsWritten
|=
1283 1 << (VERT_RESULT_TEX0
+ i
);
1285 wanted_key
.InputsRead
= vpc
->mesa_program
.Base
.InputsRead
;
1286 if (vpc
->mesa_program
.IsPositionInvariant
) {
1287 /* we wan't position don't we ? */
1288 wanted_key
.InputsRead
|= (1 << VERT_ATTRIB_POS
);
1291 for (vp
= vpc
->progs
; vp
; vp
= vp
->next
)
1292 if (_mesa_memcmp(&vp
->key
, &wanted_key
, sizeof(wanted_key
)) ==
1294 r300
->selected_vp
= vp
;
1297 //_mesa_print_program(&vpc->mesa_program.Base);
1299 vp
= build_program(&wanted_key
, &vpc
->mesa_program
, wpos_idx
);
1300 vp
->next
= vpc
->progs
;
1302 r300
->selected_vp
= vp
;