1 /**************************************************************************
3 Copyright (C) 2005 Aapo Tahkola.
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
31 * \author Aapo Tahkola <aet@rasterburn.org>
38 #include "shader/prog_instruction.h"
39 #include "shader/prog_parameter.h"
40 #include "shader/prog_statevars.h"
43 #include "r300_context.h"
45 #if SWIZZLE_X != VSF_IN_COMPONENT_X || \
46 SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
47 SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
48 SWIZZLE_W != VSF_IN_COMPONENT_W || \
49 SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
50 SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
51 WRITEMASK_X != VSF_FLAG_X || \
52 WRITEMASK_Y != VSF_FLAG_Y || \
53 WRITEMASK_Z != VSF_FLAG_Z || \
54 WRITEMASK_W != VSF_FLAG_W
55 #error Cannot change these!
58 #define SCALAR_FLAG (1<<31)
59 #define FLAG_MASK (1<<31)
60 #define OP_MASK (0xf) /* we are unlikely to have more than 15 */
61 #define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
66 unsigned long ip
; /* number of input operands and flags */
71 OPN(ARL
, 1 | SCALAR_FLAG
),
76 OPN(EX2
, 1 | SCALAR_FLAG
),
77 OPN(EXP
, 1 | SCALAR_FLAG
),
80 OPN(LG2
, 1 | SCALAR_FLAG
),
82 OPN(LOG
, 1 | SCALAR_FLAG
),
88 OPN(POW
, 2 | SCALAR_FLAG
),
89 OPN(RCP
, 1 | SCALAR_FLAG
),
90 OPN(RSQ
, 1 | SCALAR_FLAG
),
104 int r300VertexProgUpdateParams(GLcontext
* ctx
,
105 struct r300_vertex_program_cont
*vp
, float *dst
)
108 struct gl_vertex_program
*mesa_vp
= &vp
->mesa_program
;
110 struct gl_program_parameter_list
*paramList
;
112 if (mesa_vp
->IsNVProgram
) {
113 _mesa_load_tracked_matrices(ctx
);
115 for (pi
= 0; pi
< MAX_NV_VERTEX_PROGRAM_PARAMS
; pi
++) {
116 *dst
++ = ctx
->VertexProgram
.Parameters
[pi
][0];
117 *dst
++ = ctx
->VertexProgram
.Parameters
[pi
][1];
118 *dst
++ = ctx
->VertexProgram
.Parameters
[pi
][2];
119 *dst
++ = ctx
->VertexProgram
.Parameters
[pi
][3];
124 assert(mesa_vp
->Base
.Parameters
);
125 _mesa_load_state_parameters(ctx
, mesa_vp
->Base
.Parameters
);
127 if (mesa_vp
->Base
.Parameters
->NumParameters
* 4 >
128 VSF_MAX_FRAGMENT_LENGTH
) {
129 fprintf(stderr
, "%s:Params exhausted\n", __FUNCTION__
);
133 paramList
= mesa_vp
->Base
.Parameters
;
134 for (pi
= 0; pi
< paramList
->NumParameters
; pi
++) {
135 switch (paramList
->Parameters
[pi
].Type
) {
137 case PROGRAM_STATE_VAR
:
138 case PROGRAM_NAMED_PARAM
:
139 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
140 case PROGRAM_CONSTANT
:
141 *dst
++ = paramList
->ParameterValues
[pi
][0];
142 *dst
++ = paramList
->ParameterValues
[pi
][1];
143 *dst
++ = paramList
->ParameterValues
[pi
][2];
144 *dst
++ = paramList
->ParameterValues
[pi
][3];
148 _mesa_problem(NULL
, "Bad param type in %s",
157 static unsigned long t_dst_mask(GLuint mask
)
159 /* WRITEMASK_* is equivalent to VSF_FLAG_* */
160 return mask
& VSF_FLAG_ALL
;
163 static unsigned long t_dst_class(enum register_file file
)
167 case PROGRAM_TEMPORARY
:
168 return VSF_OUT_CLASS_TMP
;
170 return VSF_OUT_CLASS_RESULT
;
171 case PROGRAM_ADDRESS
:
172 return VSF_OUT_CLASS_ADDR
;
175 case PROGRAM_LOCAL_PARAM:
176 case PROGRAM_ENV_PARAM:
177 case PROGRAM_NAMED_PARAM:
178 case PROGRAM_STATE_VAR:
179 case PROGRAM_WRITE_ONLY:
180 case PROGRAM_ADDRESS:
183 fprintf(stderr
, "problem in %s", __FUNCTION__
);
189 static unsigned long t_dst_index(struct r300_vertex_program
*vp
,
190 struct prog_dst_register
*dst
)
192 if (dst
->File
== PROGRAM_OUTPUT
)
193 return vp
->outputs
[dst
->Index
];
198 static unsigned long t_src_class(enum register_file file
)
202 case PROGRAM_TEMPORARY
:
203 return VSF_IN_CLASS_TMP
;
206 return VSF_IN_CLASS_ATTR
;
208 case PROGRAM_LOCAL_PARAM
:
209 case PROGRAM_ENV_PARAM
:
210 case PROGRAM_NAMED_PARAM
:
211 case PROGRAM_STATE_VAR
:
212 return VSF_IN_CLASS_PARAM
;
215 case PROGRAM_WRITE_ONLY:
216 case PROGRAM_ADDRESS:
219 fprintf(stderr
, "problem in %s", __FUNCTION__
);
225 static inline unsigned long t_swizzle(GLubyte swizzle
)
227 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
232 static void vp_dump_inputs(struct r300_vertex_program
*vp
, char *caller
)
237 fprintf(stderr
, "vp null in call to %s from %s\n", __FUNCTION__
,
242 fprintf(stderr
, "%s:<", caller
);
243 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++)
244 fprintf(stderr
, "%d ", vp
->inputs
[i
]);
245 fprintf(stderr
, ">\n");
250 static unsigned long t_src_index(struct r300_vertex_program
*vp
,
251 struct prog_src_register
*src
)
256 if (src
->File
== PROGRAM_INPUT
) {
257 if (vp
->inputs
[src
->Index
] != -1)
258 return vp
->inputs
[src
->Index
];
260 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++)
261 if (vp
->inputs
[i
] > max_reg
)
262 max_reg
= vp
->inputs
[i
];
264 vp
->inputs
[src
->Index
] = max_reg
+ 1;
266 //vp_dump_inputs(vp, __FUNCTION__);
268 return vp
->inputs
[src
->Index
];
270 if (src
->Index
< 0) {
272 "negative offsets for indirect addressing do not work.\n");
279 static unsigned long t_src(struct r300_vertex_program
*vp
,
280 struct prog_src_register
*src
)
282 /* src->NegateBase uses the NEGATE_ flags from program_instruction.h,
283 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
285 return MAKE_VSF_SOURCE(t_src_index(vp
, src
),
286 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
287 t_swizzle(GET_SWZ(src
->Swizzle
, 1)),
288 t_swizzle(GET_SWZ(src
->Swizzle
, 2)),
289 t_swizzle(GET_SWZ(src
->Swizzle
, 3)),
290 t_src_class(src
->File
),
291 src
->NegateBase
) | (src
->RelAddr
<< 4);
294 static unsigned long t_src_scalar(struct r300_vertex_program
*vp
,
295 struct prog_src_register
*src
)
298 return MAKE_VSF_SOURCE(t_src_index(vp
, src
),
299 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
300 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
301 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
302 t_swizzle(GET_SWZ(src
->Swizzle
, 0)),
303 t_src_class(src
->File
),
305 NegateBase
? VSF_FLAG_ALL
: VSF_FLAG_NONE
) |
309 static unsigned long t_opcode(enum prog_opcode opcode
)
314 case OPCODE_ARL
: return R300_VPI_OUT_OP_ARL
;
315 case OPCODE_DST
: return R300_VPI_OUT_OP_DST
;
316 case OPCODE_EX2
: return R300_VPI_OUT_OP_EX2
;
317 case OPCODE_EXP
: return R300_VPI_OUT_OP_EXP
;
318 case OPCODE_FRC
: return R300_VPI_OUT_OP_FRC
;
319 case OPCODE_LG2
: return R300_VPI_OUT_OP_LG2
;
320 case OPCODE_LOG
: return R300_VPI_OUT_OP_LOG
;
321 case OPCODE_MAX
: return R300_VPI_OUT_OP_MAX
;
322 case OPCODE_MIN
: return R300_VPI_OUT_OP_MIN
;
323 case OPCODE_MUL
: return R300_VPI_OUT_OP_MUL
;
324 case OPCODE_RCP
: return R300_VPI_OUT_OP_RCP
;
325 case OPCODE_RSQ
: return R300_VPI_OUT_OP_RSQ
;
326 case OPCODE_SGE
: return R300_VPI_OUT_OP_SGE
;
327 case OPCODE_SLT
: return R300_VPI_OUT_OP_SLT
;
328 case OPCODE_DP4
: return R300_VPI_OUT_OP_DOT
;
332 fprintf(stderr
, "%s: Should not be called with opcode %d!",
333 __FUNCTION__
, opcode
);
339 static unsigned long op_operands(enum prog_opcode opcode
)
343 /* Can we trust mesas opcodes to be in order ? */
344 for (i
= 0; i
< sizeof(op_names
) / sizeof(*op_names
); i
++)
345 if (op_names
[i
].opcode
== opcode
)
346 return op_names
[i
].ip
;
348 fprintf(stderr
, "op %d not found in op_names\n", opcode
);
353 static GLboolean
valid_dst(struct r300_vertex_program
*vp
,
354 struct prog_dst_register
*dst
)
356 if (dst
->File
== PROGRAM_OUTPUT
&& vp
->outputs
[dst
->Index
] == -1) {
358 } else if (dst
->File
== PROGRAM_ADDRESS
) {
359 assert(dst
->Index
== 0);
365 /* TODO: Get rid of t_src_class call */
366 #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
367 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
368 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
369 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
370 t_src_class(b.File) == VSF_IN_CLASS_ATTR)))) \
372 #define ZERO_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \
373 SWIZZLE_ZERO, SWIZZLE_ZERO, \
374 SWIZZLE_ZERO, SWIZZLE_ZERO, \
375 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
377 #define ZERO_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \
378 SWIZZLE_ZERO, SWIZZLE_ZERO, \
379 SWIZZLE_ZERO, SWIZZLE_ZERO, \
380 t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
382 #define ZERO_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \
383 SWIZZLE_ZERO, SWIZZLE_ZERO, \
384 SWIZZLE_ZERO, SWIZZLE_ZERO, \
385 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
387 #define ONE_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \
388 SWIZZLE_ONE, SWIZZLE_ONE, \
389 SWIZZLE_ONE, SWIZZLE_ONE, \
390 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
392 #define ONE_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \
393 SWIZZLE_ONE, SWIZZLE_ONE, \
394 SWIZZLE_ONE, SWIZZLE_ONE, \
395 t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
397 #define ONE_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \
398 SWIZZLE_ONE, SWIZZLE_ONE, \
399 SWIZZLE_ONE, SWIZZLE_ONE, \
400 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
402 /* DP4 version seems to trigger some hw peculiarity */
405 #define FREE_TEMPS() \
407 if(u_temp_i < vp->num_temporaries) { \
408 WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_i); \
409 vp->native = GL_FALSE; \
411 u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
414 static void r300TranslateVertexShader(struct r300_vertex_program
*vp
,
415 struct prog_instruction
*vpi
)
418 VERTEX_SHADER_INSTRUCTION
*o_inst
;
419 unsigned long operands
;
422 /* Initial value should be last tmp reg that hw supports.
423 Strangely enough r300 doesnt mind even though these would be out of range.
424 Smart enough to realize that it doesnt need it? */
425 int u_temp_i
= VSF_MAX_FRAGMENT_TEMPS
- 1;
426 struct prog_src_register src
[3];
428 vp
->pos_end
= 0; /* Not supported yet */
429 vp
->program
.length
= 0;
430 /*vp->num_temporaries=mesa_vp->Base.NumTemporaries; */
432 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++)
435 for (i
= 0; i
< VERT_RESULT_MAX
; i
++)
438 assert(vp
->key
.OutputsWritten
& (1 << VERT_RESULT_HPOS
));
441 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_HPOS
))
442 vp
->outputs
[VERT_RESULT_HPOS
] = cur_reg
++;
444 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_PSIZ
))
445 vp
->outputs
[VERT_RESULT_PSIZ
] = cur_reg
++;
447 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_COL0
))
448 vp
->outputs
[VERT_RESULT_COL0
] = cur_reg
++;
450 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_COL1
))
451 vp
->outputs
[VERT_RESULT_COL1
] = cur_reg
++;
453 #if 0 /* Not supported yet */
454 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_BFC0
))
455 vp
->outputs
[VERT_RESULT_BFC0
] = cur_reg
++;
457 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_BFC1
))
458 vp
->outputs
[VERT_RESULT_BFC1
] = cur_reg
++;
460 if (vp
->key
.OutputsWritten
& (1 << VERT_RESULT_FOGC
))
461 vp
->outputs
[VERT_RESULT_FOGC
] = cur_reg
++;
464 for (i
= VERT_RESULT_TEX0
; i
<= VERT_RESULT_TEX7
; i
++)
465 if (vp
->key
.OutputsWritten
& (1 << i
))
466 vp
->outputs
[i
] = cur_reg
++;
468 vp
->translated
= GL_TRUE
;
469 vp
->native
= GL_TRUE
;
471 o_inst
= vp
->program
.body
.i
;
472 for (; vpi
->Opcode
!= OPCODE_END
; vpi
++, o_inst
++) {
475 if (!valid_dst(vp
, &vpi
->DstReg
)) {
476 /* redirect result to unused temp */
477 vpi
->DstReg
.File
= PROGRAM_TEMPORARY
;
478 vpi
->DstReg
.Index
= u_temp_i
;
481 operands
= op_operands(vpi
->Opcode
);
482 are_srcs_scalar
= operands
& SCALAR_FLAG
;
485 for (i
= 0; i
< operands
; i
++)
486 src
[i
] = vpi
->SrcReg
[i
];
488 if (operands
== 3) { /* TODO: scalars */
489 if (CMP_SRCS(src
[1], src
[2])
490 || CMP_SRCS(src
[0], src
[2])) {
492 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD
, u_temp_i
,
497 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[2]),
498 SWIZZLE_X
, SWIZZLE_Y
,
499 SWIZZLE_Z
, SWIZZLE_W
,
500 t_src_class(src
[2].File
),
501 VSF_FLAG_NONE
) | (src
[2].
505 o_inst
->src
[1] = ZERO_SRC_2
;
506 o_inst
->src
[2] = ZERO_SRC_2
;
509 src
[2].File
= PROGRAM_TEMPORARY
;
510 src
[2].Index
= u_temp_i
;
518 if (CMP_SRCS(src
[1], src
[0])) {
520 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD
, u_temp_i
,
525 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
526 SWIZZLE_X
, SWIZZLE_Y
,
527 SWIZZLE_Z
, SWIZZLE_W
,
528 t_src_class(src
[0].File
),
529 VSF_FLAG_NONE
) | (src
[0].
533 o_inst
->src
[1] = ZERO_SRC_0
;
534 o_inst
->src
[2] = ZERO_SRC_0
;
537 src
[0].File
= PROGRAM_TEMPORARY
;
538 src
[0].Index
= u_temp_i
;
544 /* These ops need special handling. */
545 switch (vpi
->Opcode
) {
548 MAKE_VSF_OP(R300_VPI_OUT_OP_POW
,
549 t_dst_index(vp
, &vpi
->DstReg
),
550 t_dst_mask(vpi
->DstReg
.WriteMask
),
551 t_dst_class(vpi
->DstReg
.File
));
552 o_inst
->src
[0] = t_src_scalar(vp
, &src
[0]);
553 o_inst
->src
[1] = ZERO_SRC_0
;
554 o_inst
->src
[2] = t_src_scalar(vp
, &src
[1]);
557 case OPCODE_MOV
: //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
561 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD
,
562 t_dst_index(vp
, &vpi
->DstReg
),
563 t_dst_mask(vpi
->DstReg
.WriteMask
),
564 t_dst_class(vpi
->DstReg
.File
));
565 o_inst
->src
[0] = t_src(vp
, &src
[0]);
566 o_inst
->src
[1] = ZERO_SRC_0
;
567 o_inst
->src
[2] = ZERO_SRC_0
;
571 PROGRAM_TEMPORARY
) ? R300_VPI_OUT_OP_MAD_2
:
575 MAKE_VSF_OP(hw_op
, t_dst_index(vp
, &vpi
->DstReg
),
576 t_dst_mask(vpi
->DstReg
.WriteMask
),
577 t_dst_class(vpi
->DstReg
.File
));
578 o_inst
->src
[0] = t_src(vp
, &src
[0]);
579 o_inst
->src
[1] = ONE_SRC_0
;
580 o_inst
->src
[2] = ZERO_SRC_0
;
587 hw_op
= (src
[0].File
== PROGRAM_TEMPORARY
&&
589 PROGRAM_TEMPORARY
) ? R300_VPI_OUT_OP_MAD_2
:
593 MAKE_VSF_OP(hw_op
, t_dst_index(vp
, &vpi
->DstReg
),
594 t_dst_mask(vpi
->DstReg
.WriteMask
),
595 t_dst_class(vpi
->DstReg
.File
));
596 o_inst
->src
[0] = ONE_SRC_0
;
597 o_inst
->src
[1] = t_src(vp
, &src
[0]);
598 o_inst
->src
[2] = t_src(vp
, &src
[1]);
601 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD
,
602 t_dst_index(vp
, &vpi
->DstReg
),
603 t_dst_mask(vpi
->DstReg
.WriteMask
),
604 t_dst_class(vpi
->DstReg
.File
));
605 o_inst
->src
[0] = t_src(vp
, &src
[0]);
606 o_inst
->src
[1] = t_src(vp
, &src
[1]);
607 o_inst
->src
[2] = ZERO_SRC_1
;
613 hw_op
= (src
[0].File
== PROGRAM_TEMPORARY
&&
614 src
[1].File
== PROGRAM_TEMPORARY
&&
616 PROGRAM_TEMPORARY
) ? R300_VPI_OUT_OP_MAD_2
:
620 MAKE_VSF_OP(hw_op
, t_dst_index(vp
, &vpi
->DstReg
),
621 t_dst_mask(vpi
->DstReg
.WriteMask
),
622 t_dst_class(vpi
->DstReg
.File
));
623 o_inst
->src
[0] = t_src(vp
, &src
[0]);
624 o_inst
->src
[1] = t_src(vp
, &src
[1]);
625 o_inst
->src
[2] = t_src(vp
, &src
[2]);
628 case OPCODE_MUL
: /* HW mul can take third arg but appears to have some other limitations. */
629 hw_op
= (src
[0].File
== PROGRAM_TEMPORARY
&&
631 PROGRAM_TEMPORARY
) ? R300_VPI_OUT_OP_MAD_2
:
635 MAKE_VSF_OP(hw_op
, t_dst_index(vp
, &vpi
->DstReg
),
636 t_dst_mask(vpi
->DstReg
.WriteMask
),
637 t_dst_class(vpi
->DstReg
.File
));
638 o_inst
->src
[0] = t_src(vp
, &src
[0]);
639 o_inst
->src
[1] = t_src(vp
, &src
[1]);
641 o_inst
->src
[2] = ZERO_SRC_1
;
644 case OPCODE_DP3
: //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
646 MAKE_VSF_OP(R300_VPI_OUT_OP_DOT
,
647 t_dst_index(vp
, &vpi
->DstReg
),
648 t_dst_mask(vpi
->DstReg
.WriteMask
),
649 t_dst_class(vpi
->DstReg
.File
));
652 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
654 (src
[0].Swizzle
, 0)),
656 (src
[0].Swizzle
, 1)),
658 (src
[0].Swizzle
, 2)),
660 t_src_class(src
[0].File
),
662 NegateBase
? VSF_FLAG_XYZ
:
663 VSF_FLAG_NONE
) | (src
[0].
667 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
669 (src
[1].Swizzle
, 0)),
671 (src
[1].Swizzle
, 1)),
673 (src
[1].Swizzle
, 2)),
675 t_src_class(src
[1].File
),
677 NegateBase
? VSF_FLAG_XYZ
:
678 VSF_FLAG_NONE
) | (src
[1].
681 o_inst
->src
[2] = ZERO_SRC_1
;
684 case OPCODE_SUB
: //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
686 hw_op
= (src
[0].File
== PROGRAM_TEMPORARY
&&
688 PROGRAM_TEMPORARY
) ? R300_VPI_OUT_OP_MAD_2
:
692 MAKE_VSF_OP(hw_op
, t_dst_index(vp
, &vpi
->DstReg
),
693 t_dst_mask(vpi
->DstReg
.WriteMask
),
694 t_dst_class(vpi
->DstReg
.File
));
695 o_inst
->src
[0] = t_src(vp
, &src
[0]);
696 o_inst
->src
[1] = ONE_SRC_0
;
698 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
700 (src
[1].Swizzle
, 0)),
702 (src
[1].Swizzle
, 1)),
704 (src
[1].Swizzle
, 2)),
706 (src
[1].Swizzle
, 3)),
707 t_src_class(src
[1].File
),
709 NegateBase
) ? VSF_FLAG_ALL
:
710 VSF_FLAG_NONE
) | (src
[1].
714 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD
,
715 t_dst_index(vp
, &vpi
->DstReg
),
716 t_dst_mask(vpi
->DstReg
.WriteMask
),
717 t_dst_class(vpi
->DstReg
.File
));
719 o_inst
->src
[0] = t_src(vp
, &src
[0]);
721 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]),
723 (src
[1].Swizzle
, 0)),
725 (src
[1].Swizzle
, 1)),
727 (src
[1].Swizzle
, 2)),
729 (src
[1].Swizzle
, 3)),
730 t_src_class(src
[1].File
),
732 NegateBase
) ? VSF_FLAG_ALL
:
733 VSF_FLAG_NONE
) | (src
[1].
739 case OPCODE_ABS
: //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
741 MAKE_VSF_OP(R300_VPI_OUT_OP_MAX
,
742 t_dst_index(vp
, &vpi
->DstReg
),
743 t_dst_mask(vpi
->DstReg
.WriteMask
),
744 t_dst_class(vpi
->DstReg
.File
));
746 o_inst
->src
[0] = t_src(vp
, &src
[0]);
748 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
750 (src
[0].Swizzle
, 0)),
752 (src
[0].Swizzle
, 1)),
754 (src
[0].Swizzle
, 2)),
756 (src
[0].Swizzle
, 3)),
757 t_src_class(src
[0].File
),
759 NegateBase
) ? VSF_FLAG_ALL
:
760 VSF_FLAG_NONE
) | (src
[0].
766 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
767 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
769 o_inst
->op
= MAKE_VSF_OP(R300_VPI_OUT_OP_FRC
, u_temp_i
,
770 t_dst_mask(vpi
->DstReg
.
774 o_inst
->src
[0] = t_src(vp
, &src
[0]);
775 o_inst
->src
[1] = ZERO_SRC_0
;
776 o_inst
->src
[2] = ZERO_SRC_0
;
780 MAKE_VSF_OP(R300_VPI_OUT_OP_ADD
,
781 t_dst_index(vp
, &vpi
->DstReg
),
782 t_dst_mask(vpi
->DstReg
.WriteMask
),
783 t_dst_class(vpi
->DstReg
.File
));
785 o_inst
->src
[0] = t_src(vp
, &src
[0]);
786 o_inst
->src
[1] = MAKE_VSF_SOURCE(u_temp_i
,
792 /* Not 100% sure about this */
799 o_inst
->src
[2] = ZERO_SRC_0
;
803 case OPCODE_LG2
: // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
805 MAKE_VSF_OP(R300_VPI_OUT_OP_LG2
,
806 t_dst_index(vp
, &vpi
->DstReg
),
807 t_dst_mask(vpi
->DstReg
.WriteMask
),
808 t_dst_class(vpi
->DstReg
.File
));
811 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
813 (src
[0].Swizzle
, 0)),
815 (src
[0].Swizzle
, 0)),
817 (src
[0].Swizzle
, 0)),
819 (src
[0].Swizzle
, 0)),
820 t_src_class(src
[0].File
),
822 NegateBase
? VSF_FLAG_ALL
:
823 VSF_FLAG_NONE
) | (src
[0].
825 o_inst
->src
[1] = ZERO_SRC_0
;
826 o_inst
->src
[2] = ZERO_SRC_0
;
829 case OPCODE_LIT
: //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
831 MAKE_VSF_OP(R300_VPI_OUT_OP_LIT
,
832 t_dst_index(vp
, &vpi
->DstReg
),
833 t_dst_mask(vpi
->DstReg
.WriteMask
),
834 t_dst_class(vpi
->DstReg
.File
));
835 /* NOTE: Users swizzling might not work. */
836 o_inst
->src
[0] = MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
837 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
838 VSF_IN_COMPONENT_ZERO
, // z
839 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
846 (src
[0].RelAddr
<< 4);
847 o_inst
->src
[1] = MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
848 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
849 VSF_IN_COMPONENT_ZERO
, // z
850 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
857 (src
[0].RelAddr
<< 4);
858 o_inst
->src
[2] = MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
859 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
860 VSF_IN_COMPONENT_ZERO
, // z
861 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
868 (src
[0].RelAddr
<< 4);
871 case OPCODE_DPH
: //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
873 MAKE_VSF_OP(R300_VPI_OUT_OP_DOT
,
874 t_dst_index(vp
, &vpi
->DstReg
),
875 t_dst_mask(vpi
->DstReg
.WriteMask
),
876 t_dst_class(vpi
->DstReg
.File
));
879 MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]),
881 (src
[0].Swizzle
, 0)),
883 (src
[0].Swizzle
, 1)),
885 (src
[0].Swizzle
, 2)),
886 VSF_IN_COMPONENT_ONE
,
887 t_src_class(src
[0].File
),
889 NegateBase
? VSF_FLAG_XYZ
:
890 VSF_FLAG_NONE
) | (src
[0].
892 o_inst
->src
[1] = t_src(vp
, &src
[1]);
893 o_inst
->src
[2] = ZERO_SRC_1
;
897 /* mul r0, r1.yzxw, r2.zxyw
898 mad r0, -r2.yzxw, r1.zxyw, r0
899 NOTE: might need MAD_2
902 o_inst
->op
= MAKE_VSF_OP(R300_VPI_OUT_OP_MAD
, u_temp_i
,
903 t_dst_mask(vpi
->DstReg
.
907 o_inst
->src
[0] = MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
908 t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)), // z
909 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
910 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
917 (src
[0].RelAddr
<< 4);
919 o_inst
->src
[1] = MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]), t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), // z
920 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), // x
921 t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)), // y
922 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)), // w
929 (src
[1].RelAddr
<< 4);
931 o_inst
->src
[2] = ZERO_SRC_1
;
936 MAKE_VSF_OP(R300_VPI_OUT_OP_MAD
,
937 t_dst_index(vp
, &vpi
->DstReg
),
938 t_dst_mask(vpi
->DstReg
.WriteMask
),
939 t_dst_class(vpi
->DstReg
.File
));
941 o_inst
->src
[0] = MAKE_VSF_SOURCE(t_src_index(vp
, &src
[1]), t_swizzle(GET_SWZ(src
[1].Swizzle
, 1)), // y
942 t_swizzle(GET_SWZ(src
[1].Swizzle
, 2)), // z
943 t_swizzle(GET_SWZ(src
[1].Swizzle
, 0)), // x
944 t_swizzle(GET_SWZ(src
[1].Swizzle
, 3)), // w
951 (src
[1].RelAddr
<< 4);
953 o_inst
->src
[1] = MAKE_VSF_SOURCE(t_src_index(vp
, &src
[0]), t_swizzle(GET_SWZ(src
[0].Swizzle
, 2)), // z
954 t_swizzle(GET_SWZ(src
[0].Swizzle
, 0)), // x
955 t_swizzle(GET_SWZ(src
[0].Swizzle
, 1)), // y
956 t_swizzle(GET_SWZ(src
[0].Swizzle
, 3)), // w
963 (src
[0].RelAddr
<< 4);
965 o_inst
->src
[2] = MAKE_VSF_SOURCE(u_temp_i
+ 1,
976 fprintf(stderr
, "Dont know how to handle op %d yet\n",
987 MAKE_VSF_OP(t_opcode(vpi
->Opcode
),
988 t_dst_index(vp
, &vpi
->DstReg
),
989 t_dst_mask(vpi
->DstReg
.WriteMask
),
990 t_dst_class(vpi
->DstReg
.File
));
992 if (are_srcs_scalar
) {
995 o_inst
->src
[0] = t_src_scalar(vp
, &src
[0]);
996 o_inst
->src
[1] = ZERO_SRC_0
;
997 o_inst
->src
[2] = ZERO_SRC_0
;
1001 o_inst
->src
[0] = t_src_scalar(vp
, &src
[0]);
1002 o_inst
->src
[1] = t_src_scalar(vp
, &src
[1]);
1003 o_inst
->src
[2] = ZERO_SRC_1
;
1007 o_inst
->src
[0] = t_src_scalar(vp
, &src
[0]);
1008 o_inst
->src
[1] = t_src_scalar(vp
, &src
[1]);
1009 o_inst
->src
[2] = t_src_scalar(vp
, &src
[2]);
1014 "scalars and op RCC not handled yet");
1021 o_inst
->src
[0] = t_src(vp
, &src
[0]);
1022 o_inst
->src
[1] = ZERO_SRC_0
;
1023 o_inst
->src
[2] = ZERO_SRC_0
;
1027 o_inst
->src
[0] = t_src(vp
, &src
[0]);
1028 o_inst
->src
[1] = t_src(vp
, &src
[1]);
1029 o_inst
->src
[2] = ZERO_SRC_1
;
1033 o_inst
->src
[0] = t_src(vp
, &src
[0]);
1034 o_inst
->src
[1] = t_src(vp
, &src
[1]);
1035 o_inst
->src
[2] = t_src(vp
, &src
[2]);
1040 "scalars and op RCC not handled yet");
1048 /* Will most likely segfault before we get here... fix later. */
1049 if (o_inst
- vp
->program
.body
.i
>= VSF_MAX_FRAGMENT_LENGTH
/ 4) {
1050 vp
->program
.length
= 0;
1051 vp
->native
= GL_FALSE
;
1054 vp
->program
.length
= (o_inst
- vp
->program
.body
.i
) * 4;
1056 fprintf(stderr
, "hw program:\n");
1057 for (i
= 0; i
< vp
->program
.length
; i
++)
1058 fprintf(stderr
, "%08x\n", vp
->program
.body
.d
[i
]);
1062 static void position_invariant(struct gl_program
*prog
)
1064 struct prog_instruction
*vpi
;
1065 struct gl_program_parameter_list
*paramList
;
1068 gl_state_index tokens
[STATE_LENGTH
] = { STATE_MVP_MATRIX
, 0, 0, 0, 0 };
1070 /* tokens[4] = matrix modifier */
1072 tokens
[4] = 0; /* not transposed or inverted */
1074 tokens
[4] = STATE_MATRIX_TRANSPOSE
;
1076 paramList
= prog
->Parameters
;
1078 vpi
= _mesa_alloc_instructions(prog
->NumInstructions
+ 4);
1079 _mesa_init_instructions(vpi
, prog
->NumInstructions
+ 4);
1081 for (i
= 0; i
< 4; i
++) {
1083 tokens
[2] = tokens
[3] = i
; /* matrix row[i]..row[i] */
1084 idx
= _mesa_add_state_reference(paramList
, tokens
);
1086 vpi
[i
].Opcode
= OPCODE_DP4
;
1087 vpi
[i
].StringPos
= 0;
1090 vpi
[i
].DstReg
.File
= PROGRAM_OUTPUT
;
1091 vpi
[i
].DstReg
.Index
= VERT_RESULT_HPOS
;
1092 vpi
[i
].DstReg
.WriteMask
= 1 << i
;
1093 vpi
[i
].DstReg
.CondMask
= COND_TR
;
1095 vpi
[i
].SrcReg
[0].File
= PROGRAM_STATE_VAR
;
1096 vpi
[i
].SrcReg
[0].Index
= idx
;
1097 vpi
[i
].SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1099 vpi
[i
].SrcReg
[1].File
= PROGRAM_INPUT
;
1100 vpi
[i
].SrcReg
[1].Index
= VERT_ATTRIB_POS
;
1101 vpi
[i
].SrcReg
[1].Swizzle
= SWIZZLE_XYZW
;
1104 vpi
[i
].Opcode
= OPCODE_MUL
;
1106 vpi
[i
].Opcode
= OPCODE_MAD
;
1108 vpi
[i
].StringPos
= 0;
1112 vpi
[i
].DstReg
.File
= PROGRAM_OUTPUT
;
1114 vpi
[i
].DstReg
.File
= PROGRAM_TEMPORARY
;
1115 vpi
[i
].DstReg
.Index
= 0;
1116 vpi
[i
].DstReg
.WriteMask
= 0xf;
1117 vpi
[i
].DstReg
.CondMask
= COND_TR
;
1119 vpi
[i
].SrcReg
[0].File
= PROGRAM_STATE_VAR
;
1120 vpi
[i
].SrcReg
[0].Index
= idx
;
1121 vpi
[i
].SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1123 vpi
[i
].SrcReg
[1].File
= PROGRAM_INPUT
;
1124 vpi
[i
].SrcReg
[1].Index
= VERT_ATTRIB_POS
;
1125 vpi
[i
].SrcReg
[1].Swizzle
= MAKE_SWIZZLE4(i
, i
, i
, i
);
1128 vpi
[i
].SrcReg
[2].File
= PROGRAM_TEMPORARY
;
1129 vpi
[i
].SrcReg
[2].Index
= 0;
1130 vpi
[i
].SrcReg
[2].Swizzle
= SWIZZLE_XYZW
;
1135 _mesa_copy_instructions(&vpi
[i
], prog
->Instructions
,
1136 prog
->NumInstructions
);
1138 free(prog
->Instructions
);
1140 prog
->Instructions
= vpi
;
1142 prog
->NumInstructions
+= 4;
1143 vpi
= &prog
->Instructions
[prog
->NumInstructions
- 1];
1145 assert(vpi
->Opcode
== OPCODE_END
);
1148 static void insert_wpos(struct r300_vertex_program
*vp
,
1149 struct gl_program
*prog
, GLuint temp_index
)
1151 struct prog_instruction
*vpi
;
1152 struct prog_instruction
*vpi_insert
;
1155 vpi
= _mesa_alloc_instructions(prog
->NumInstructions
+ 2);
1156 _mesa_init_instructions(vpi
, prog
->NumInstructions
+ 2);
1158 _mesa_copy_instructions(vpi
, prog
->Instructions
,
1159 prog
->NumInstructions
- 1);
1161 _mesa_copy_instructions(&vpi
[prog
->NumInstructions
+ 1],
1162 &prog
->Instructions
[prog
->NumInstructions
- 1],
1164 vpi_insert
= &vpi
[prog
->NumInstructions
- 1];
1166 vpi_insert
[i
].Opcode
= OPCODE_MOV
;
1168 vpi_insert
[i
].DstReg
.File
= PROGRAM_OUTPUT
;
1169 vpi_insert
[i
].DstReg
.Index
= VERT_RESULT_HPOS
;
1170 vpi_insert
[i
].DstReg
.WriteMask
= WRITEMASK_XYZW
;
1171 vpi_insert
[i
].DstReg
.CondMask
= COND_TR
;
1173 vpi_insert
[i
].SrcReg
[0].File
= PROGRAM_TEMPORARY
;
1174 vpi_insert
[i
].SrcReg
[0].Index
= temp_index
;
1175 vpi_insert
[i
].SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1178 vpi_insert
[i
].Opcode
= OPCODE_MOV
;
1180 vpi_insert
[i
].DstReg
.File
= PROGRAM_OUTPUT
;
1181 vpi_insert
[i
].DstReg
.Index
= VERT_RESULT_TEX0
+ vp
->wpos_idx
;
1182 vpi_insert
[i
].DstReg
.WriteMask
= WRITEMASK_XYZW
;
1183 vpi_insert
[i
].DstReg
.CondMask
= COND_TR
;
1185 vpi_insert
[i
].SrcReg
[0].File
= PROGRAM_TEMPORARY
;
1186 vpi_insert
[i
].SrcReg
[0].Index
= temp_index
;
1187 vpi_insert
[i
].SrcReg
[0].Swizzle
= SWIZZLE_XYZW
;
1190 free(prog
->Instructions
);
1192 prog
->Instructions
= vpi
;
1194 prog
->NumInstructions
+= i
;
1195 vpi
= &prog
->Instructions
[prog
->NumInstructions
- 1];
1197 assert(vpi
->Opcode
== OPCODE_END
);
1200 static void pos_as_texcoord(struct r300_vertex_program
*vp
,
1201 struct gl_program
*prog
)
1203 struct prog_instruction
*vpi
;
1204 GLuint tempregi
= prog
->NumTemporaries
;
1205 /* should do something else if no temps left... */
1206 prog
->NumTemporaries
++;
1208 for (vpi
= prog
->Instructions
; vpi
->Opcode
!= OPCODE_END
; vpi
++) {
1209 if (vpi
->DstReg
.File
== PROGRAM_OUTPUT
&&
1210 vpi
->DstReg
.Index
== VERT_RESULT_HPOS
) {
1211 vpi
->DstReg
.File
= PROGRAM_TEMPORARY
;
1212 vpi
->DstReg
.Index
= tempregi
;
1215 insert_wpos(vp
, prog
, tempregi
);
1218 static struct r300_vertex_program
*build_program(struct r300_vertex_program_key
1219 *wanted_key
, struct gl_vertex_program
1220 *mesa_vp
, GLint wpos_idx
)
1222 struct r300_vertex_program
*vp
;
1224 vp
= _mesa_calloc(sizeof(*vp
));
1225 _mesa_memcpy(&vp
->key
, wanted_key
, sizeof(vp
->key
));
1227 vp
->wpos_idx
= wpos_idx
;
1229 if (mesa_vp
->IsPositionInvariant
) {
1230 position_invariant(&mesa_vp
->Base
);
1234 pos_as_texcoord(vp
, &mesa_vp
->Base
);
1236 assert(mesa_vp
->Base
.NumInstructions
);
1238 vp
->num_temporaries
= mesa_vp
->Base
.NumTemporaries
;
1240 r300TranslateVertexShader(vp
, mesa_vp
->Base
.Instructions
);
1245 void r300SelectVertexShader(r300ContextPtr r300
)
1247 GLcontext
*ctx
= ctx
= r300
->radeon
.glCtx
;
1249 struct r300_vertex_program_key wanted_key
= { 0 };
1251 struct r300_vertex_program_cont
*vpc
;
1252 struct r300_vertex_program
*vp
;
1255 vpc
= (struct r300_vertex_program_cont
*)ctx
->VertexProgram
._Current
;
1256 InputsRead
= ctx
->FragmentProgram
._Current
->Base
.InputsRead
;
1258 wanted_key
.OutputsWritten
|= 1 << VERT_RESULT_HPOS
;
1261 if (InputsRead
& FRAG_BIT_WPOS
) {
1262 for (i
= 0; i
< ctx
->Const
.MaxTextureUnits
; i
++)
1263 if (!(InputsRead
& (FRAG_BIT_TEX0
<< i
)))
1266 if (i
== ctx
->Const
.MaxTextureUnits
) {
1267 fprintf(stderr
, "\tno free texcoord found\n");
1271 InputsRead
|= (FRAG_BIT_TEX0
<< i
);
1275 if (InputsRead
& FRAG_BIT_COL0
)
1276 wanted_key
.OutputsWritten
|= 1 << VERT_RESULT_COL0
;
1278 if ((InputsRead
& FRAG_BIT_COL1
) /*||
1279 (InputsRead & FRAG_BIT_FOGC) */ )
1280 wanted_key
.OutputsWritten
|= 1 << VERT_RESULT_COL1
;
1282 for (i
= 0; i
< ctx
->Const
.MaxTextureUnits
; i
++)
1283 if (InputsRead
& (FRAG_BIT_TEX0
<< i
))
1284 wanted_key
.OutputsWritten
|=
1285 1 << (VERT_RESULT_TEX0
+ i
);
1287 wanted_key
.InputsRead
= vpc
->mesa_program
.Base
.InputsRead
;
1288 if (vpc
->mesa_program
.IsPositionInvariant
) {
1289 /* we wan't position don't we ? */
1290 wanted_key
.InputsRead
|= (1 << VERT_ATTRIB_POS
);
1293 for (vp
= vpc
->progs
; vp
; vp
= vp
->next
)
1294 if (_mesa_memcmp(&vp
->key
, &wanted_key
, sizeof(wanted_key
)) ==
1296 r300
->selected_vp
= vp
;
1299 //_mesa_print_program(&vpc->mesa_program.Base);
1301 vp
= build_program(&wanted_key
, &vpc
->mesa_program
, wpos_idx
);
1302 vp
->next
= vpc
->progs
;
1304 r300
->selected_vp
= vp
;