X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fr300%2Fr300_vertprog.c;h=aa98a049aa4aad5d64ab00c5997263a3200c6f25;hb=955f51270bb60ad77dba049799587dc7c0fb4dda;hp=b9f6d28357ee4e7da371c39ae7410487b5c67eb6;hpb=5dcbdc09f354d96cef93a28215f7776a5e84dd1f;p=mesa.git diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index b9f6d28357e..aa98a049aa4 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -1,6 +1,7 @@ /************************************************************************** -Copyright (C) 2005 Aapo Tahkola. +Copyright (C) 2005 Aapo Tahkola +Copyright (C) 2008 Oliver McFadden All Rights Reserved. @@ -25,1419 +26,377 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. **************************************************************************/ -/** - * \file - * - * \author Aapo Tahkola - * - * \author Oliver McFadden - * - * \todo A VE_MULTIPLY_ADD or VE_MULTIPLYX2_ADD opcode with all 3 source - * operands using unique PVS_REG_TEMPORARY vector addresses requires special - * handling, which is currently not implemented! - * - * For a description of the vertex program instruction set see r300_reg.h. - */ +/* Radeon R5xx Acceleration, Revision 1.2 */ #include "main/glheader.h" #include "main/macros.h" #include "main/enums.h" #include "shader/program.h" +#include "shader/programopt.h" #include "shader/prog_instruction.h" +#include "shader/prog_optimize.h" #include "shader/prog_parameter.h" +#include "shader/prog_print.h" #include "shader/prog_statevars.h" #include "tnl/tnl.h" +#include "compiler/radeon_compiler.h" +#include "radeon_mesa_to_rc.h" #include "r300_context.h" +#include "r300_fragprog_common.h" +#include "r300_state.h" -/* TODO: Get rid of t_src_class call */ -#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \ - ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \ - t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \ - (t_src_class(a.File) == PVS_SRC_REG_INPUT && \ - t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \ - -#define ZERO_SRC_0 (PVS_SOURCE_OPCODE(t_src_index(vp, &src[0]), \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4)) - -#define ZERO_SRC_1 (PVS_SOURCE_OPCODE(t_src_index(vp, &src[1]), \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4)) - -#define ZERO_SRC_2 (PVS_SOURCE_OPCODE(t_src_index(vp, &src[2]), \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4)) - -#define ONE_SRC_0 (PVS_SOURCE_OPCODE(t_src_index(vp, &src[0]), \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4)) - -#define ONE_SRC_1 (PVS_SOURCE_OPCODE(t_src_index(vp, &src[1]), \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4)) - -#define ONE_SRC_2 (PVS_SOURCE_OPCODE(t_src_index(vp, &src[2]), \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4)) - -#define FREE_TEMPS() \ - do { \ - int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; \ - if((vp->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { \ - WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_used); \ - vp->native = GL_FALSE; \ - } \ - u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \ - } while (0) - -int r300VertexProgUpdateParams(GLcontext * ctx, - struct r300_vertex_program_cont *vp, - float *dst) +/** + * Write parameter array for the given vertex program into dst. + * Return the total number of components written. + */ +static int r300VertexProgUpdateParams(GLcontext * ctx, struct r300_vertex_program *vp, float *dst) { - int pi; - struct gl_vertex_program *mesa_vp = &vp->mesa_program; - float *dst_o = dst; - struct gl_program_parameter_list *paramList; + int i; - if (mesa_vp->IsNVProgram) { + if (vp->Base->IsNVProgram) { _mesa_load_tracked_matrices(ctx); - - for (pi = 0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) { - *dst++ = ctx->VertexProgram.Parameters[pi][0]; - *dst++ = ctx->VertexProgram.Parameters[pi][1]; - *dst++ = ctx->VertexProgram.Parameters[pi][2]; - *dst++ = ctx->VertexProgram.Parameters[pi][3]; + } else { + if (vp->Base->Base.Parameters) { + _mesa_load_state_parameters(ctx, vp->Base->Base.Parameters); } - return dst - dst_o; } - assert(mesa_vp->Base.Parameters); - _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters); + for(i = 0; i < vp->code.constants.Count; ++i) { + const float * src = 0; + const struct rc_constant * constant = &vp->code.constants.Constants[i]; - if (mesa_vp->Base.Parameters->NumParameters * 4 > - VSF_MAX_FRAGMENT_LENGTH) { - fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__); - _mesa_exit(-1); - } - - paramList = mesa_vp->Base.Parameters; - for (pi = 0; pi < paramList->NumParameters; pi++) { - switch (paramList->Parameters[pi].Type) { - - case PROGRAM_STATE_VAR: - case PROGRAM_NAMED_PARAM: - //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name); - case PROGRAM_CONSTANT: - *dst++ = paramList->ParameterValues[pi][0]; - *dst++ = paramList->ParameterValues[pi][1]; - *dst++ = paramList->ParameterValues[pi][2]; - *dst++ = paramList->ParameterValues[pi][3]; + switch(constant->Type) { + case RC_CONSTANT_EXTERNAL: + if (vp->Base->IsNVProgram) { + src = ctx->VertexProgram.Parameters[constant->u.External]; + } else { + src = vp->Base->Base.Parameters->ParameterValues[constant->u.External]; + } break; - default: - _mesa_problem(NULL, "Bad param type in %s", - __FUNCTION__); + case RC_CONSTANT_IMMEDIATE: + src = constant->u.Immediate; + break; } + dst[4*i] = src[0]; + dst[4*i + 1] = src[1]; + dst[4*i + 2] = src[2]; + dst[4*i + 3] = src[3]; } - return dst - dst_o; -} - -static unsigned long t_dst_mask(GLuint mask) -{ - /* WRITEMASK_* is equivalent to VSF_FLAG_* */ - return mask & VSF_FLAG_ALL; -} - -static unsigned long t_dst_class(enum register_file file) -{ - - switch (file) { - case PROGRAM_TEMPORARY: - return PVS_DST_REG_TEMPORARY; - case PROGRAM_OUTPUT: - return PVS_DST_REG_OUT; - case PROGRAM_ADDRESS: - return PVS_DST_REG_A0; - /* - case PROGRAM_INPUT: - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_NAMED_PARAM: - case PROGRAM_STATE_VAR: - case PROGRAM_WRITE_ONLY: - case PROGRAM_ADDRESS: - */ - default: - fprintf(stderr, "problem in %s", __FUNCTION__); - _mesa_exit(-1); - return -1; - } -} - -static unsigned long t_dst_index(struct r300_vertex_program *vp, - struct prog_dst_register *dst) -{ - if (dst->File == PROGRAM_OUTPUT) - return vp->outputs[dst->Index]; - - return dst->Index; -} - -static unsigned long t_src_class(enum register_file file) -{ - - switch (file) { - case PROGRAM_TEMPORARY: - return PVS_SRC_REG_TEMPORARY; - - case PROGRAM_INPUT: - return PVS_SRC_REG_INPUT; - - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_NAMED_PARAM: - case PROGRAM_STATE_VAR: - return PVS_SRC_REG_CONSTANT; - /* - case PROGRAM_OUTPUT: - case PROGRAM_WRITE_ONLY: - case PROGRAM_ADDRESS: - */ - default: - fprintf(stderr, "problem in %s", __FUNCTION__); - _mesa_exit(-1); - return -1; - } -} - -static inline unsigned long t_swizzle(GLubyte swizzle) -{ -/* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ - return swizzle; -} - -#if 0 -static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller) -{ - int i; - - if (vp == NULL) { - fprintf(stderr, "vp null in call to %s from %s\n", - __FUNCTION__, caller); - return; - } - - fprintf(stderr, "%s:<", caller); - for (i = 0; i < VERT_ATTRIB_MAX; i++) - fprintf(stderr, "%d ", vp->inputs[i]); - fprintf(stderr, ">\n"); - + return 4 * vp->code.constants.Count; } -#endif -static unsigned long t_src_index(struct r300_vertex_program *vp, - struct prog_src_register *src) +static GLbitfield compute_required_outputs(struct gl_vertex_program * vp, GLbitfield fpreads) { + GLbitfield outputs = 0; int i; - int max_reg = -1; - if (src->File == PROGRAM_INPUT) { - if (vp->inputs[src->Index] != -1) - return vp->inputs[src->Index]; - - for (i = 0; i < VERT_ATTRIB_MAX; i++) - if (vp->inputs[i] > max_reg) - max_reg = vp->inputs[i]; - - vp->inputs[src->Index] = max_reg + 1; +#define ADD_OUTPUT(fp_attr, vp_result) \ + do { \ + if (fpreads & (1 << (fp_attr))) \ + outputs |= (1 << (vp_result)); \ + } while (0) - //vp_dump_inputs(vp, __FUNCTION__); + ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0); + ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1); - return vp->inputs[src->Index]; - } else { - if (src->Index < 0) { - fprintf(stderr, - "negative offsets for indirect addressing do not work.\n"); - return 0; - } - return src->Index; + for (i = 0; i <= 7; ++i) { + ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i); } -} -/* these two functions should probably be merged... */ +#undef ADD_OUTPUT -static unsigned long t_src(struct r300_vertex_program *vp, - struct prog_src_register *src) -{ - /* src->NegateBase uses the NEGATE_ flags from program_instruction.h, - * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. - */ - return PVS_SOURCE_OPCODE(t_src_index(vp, src), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 1)), - t_swizzle(GET_SWZ(src->Swizzle, 2)), - t_swizzle(GET_SWZ(src->Swizzle, 3)), - t_src_class(src->File), - src->NegateBase) | (src->RelAddr << 4); -} + if ((fpreads & (1 << FRAG_ATTRIB_COL0)) && + (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC0))) + outputs |= 1 << VERT_RESULT_BFC0; + if ((fpreads & (1 << FRAG_ATTRIB_COL1)) && + (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC1))) + outputs |= 1 << VERT_RESULT_BFC1; -static unsigned long t_src_scalar(struct r300_vertex_program *vp, - struct prog_src_register *src) -{ - /* src->NegateBase uses the NEGATE_ flags from program_instruction.h, - * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. - */ - return PVS_SOURCE_OPCODE(t_src_index(vp, src), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_src_class(src->File), - src-> - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src->RelAddr << 4); -} + outputs |= 1 << VERT_RESULT_HPOS; + if (vp->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ)) + outputs |= 1 << VERT_RESULT_PSIZ; -static GLboolean valid_dst(struct r300_vertex_program *vp, - struct prog_dst_register *dst) -{ - if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) { - return GL_FALSE; - } else if (dst->File == PROGRAM_ADDRESS) { - assert(dst->Index == 0); - } - - return GL_TRUE; + return outputs; } -/* - * Instruction Inputs Output Description - * ----------- ------ ------ -------------------------------- - * ABS v v absolute value - * ADD v,v v add - * ARL s a address register load - * DP3 v,v ssss 3-component dot product - * DP4 v,v ssss 4-component dot product - * DPH v,v ssss homogeneous dot product - * DST v,v v distance vector - * EX2 s ssss exponential base 2 - * EXP s v exponential base 2 (approximate) - * FLR v v floor - * FRC v v fraction - * LG2 s ssss logarithm base 2 - * LIT v v compute light coefficients - * LOG s v logarithm base 2 (approximate) - * MAD v,v,v v multiply and add - * MAX v,v v maximum - * MIN v,v v minimum - * MOV v v move - * MUL v,v v multiply - * POW s,s ssss exponentiate - * RCP s ssss reciprocal - * RSQ s ssss reciprocal square root - * SGE v,v v set on greater than or equal - * SLT v,v v set on less than - * SUB v,v v subtract - * SWZ v v extended swizzle - * XPD v,v v cross product - * - * Table X.5: Summary of vertex program instructions. "v" indicates a - * floating-point vector input or output, "s" indicates a floating-point - * scalar input, "ssss" indicates a scalar output replicated across a - * 4-component result vector, and "a" indicates a single address register - * component. - */ - -static GLuint *t_opcode_abs(struct r300_vertex_program *vp, - struct prog_instruction *vpi, GLuint * inst, - struct prog_src_register src[3]) -{ - //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W - - inst[0] = - PVS_VECTOR_OPCODE(VE_MAXIMUM, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = - PVS_SOURCE_OPCODE(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), - t_src_class(src[0].File), - (!src[0]. - NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[3] = 0; - - return inst; -} -static GLuint *t_opcode_add(struct r300_vertex_program *vp, - struct prog_instruction *vpi, GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = - PVS_VECTOR_OPCODE(VE_ADD, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = ZERO_SRC_1; - - return inst; -} - -static GLuint *t_opcode_arl(struct r300_vertex_program *vp, - struct prog_instruction *vpi, GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = - PVS_VECTOR_OPCODE(VE_FLT2FIX_DX, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = ZERO_SRC_0; - inst[3] = ZERO_SRC_0; - - return inst; -} - -static GLuint *t_opcode_dp3(struct r300_vertex_program *vp, - struct prog_instruction *vpi, GLuint * inst, - struct prog_src_register src[3]) -{ - //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} - - inst[0] = - PVS_VECTOR_OPCODE(VE_DOT_PRODUCT, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = - PVS_SOURCE_OPCODE(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), - SWIZZLE_ZERO, t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = - PVS_SOURCE_OPCODE(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), - SWIZZLE_ZERO, t_src_class(src[1].File), - src[1]. - NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - inst[3] = ZERO_SRC_1; - - return inst; -} - -static GLuint *t_opcode_dp4(struct r300_vertex_program *vp, - struct prog_instruction *vpi, GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = - PVS_VECTOR_OPCODE(VE_DOT_PRODUCT, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = ZERO_SRC_1; - - return inst; -} - -static GLuint *t_opcode_dph(struct r300_vertex_program *vp, - struct prog_instruction *vpi, GLuint * inst, - struct prog_src_register src[3]) -{ - //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} - inst[0] = - PVS_VECTOR_OPCODE(VE_DOT_PRODUCT, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = - PVS_SOURCE_OPCODE(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), - PVS_SRC_SELECT_FORCE_1, t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = t_src(vp, &src[1]); - inst[3] = ZERO_SRC_1; - - return inst; -} - -static GLuint *t_opcode_dst(struct r300_vertex_program *vp, - struct prog_instruction *vpi, GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = - PVS_VECTOR_OPCODE(VE_DISTANCE_VECTOR, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = ZERO_SRC_1; - - return inst; -} - -static GLuint *t_opcode_ex2(struct r300_vertex_program *vp, - struct prog_instruction *vpi, GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = - PVS_MATH_OPCODE(ME_EXP_BASE2_FULL_DX, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = ZERO_SRC_0; - inst[3] = ZERO_SRC_0; - - return inst; -} - -static GLuint *t_opcode_exp(struct r300_vertex_program *vp, - struct prog_instruction *vpi, GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = - PVS_MATH_OPCODE(ME_EXP_BASE2_DX, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = ZERO_SRC_0; - inst[3] = ZERO_SRC_0; - - return inst; -} - -static GLuint *t_opcode_flr(struct r300_vertex_program *vp, - struct prog_instruction *vpi, GLuint * inst, - struct prog_src_register src[3], int *u_temp_i) -{ - /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} - ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */ - - inst[0] = - PVS_VECTOR_OPCODE(VE_FRACTION, *u_temp_i, - t_dst_mask(vpi->DstReg.WriteMask), - PVS_DST_REG_TEMPORARY); - inst[1] = t_src(vp, &src[0]); - inst[2] = ZERO_SRC_0; - inst[3] = ZERO_SRC_0; - inst += 4; - - inst[0] = - PVS_VECTOR_OPCODE(VE_ADD, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = - PVS_SOURCE_OPCODE(*u_temp_i, PVS_SRC_SELECT_X, - PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, - PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY, - /* Not 100% sure about this */ - (!src[0]. - NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE - /*VSF_FLAG_ALL */ ); - inst[3] = ZERO_SRC_0; - (*u_temp_i)--; - - return inst; -} - -static GLuint *t_opcode_frc(struct r300_vertex_program *vp, - struct prog_instruction *vpi, GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = - PVS_VECTOR_OPCODE(VE_FRACTION, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = ZERO_SRC_0; - inst[3] = ZERO_SRC_0; - - return inst; -} - -static GLuint *t_opcode_lg2(struct r300_vertex_program *vp, - struct prog_instruction *vpi, GLuint * inst, - struct prog_src_register src[3]) -{ - // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X} - - inst[0] = - PVS_MATH_OPCODE(ME_LOG_BASE2_FULL_DX, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = - PVS_SOURCE_OPCODE(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = ZERO_SRC_0; - inst[3] = ZERO_SRC_0; - - return inst; -} - -static GLuint *t_opcode_lit(struct r300_vertex_program *vp, - struct prog_instruction *vpi, GLuint * inst, - struct prog_src_register src[3]) -{ - //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} - - inst[0] = - PVS_MATH_OPCODE(ME_LIGHT_COEFF_DX, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - /* NOTE: Users swizzling might not work. */ - inst[1] = PVS_SOURCE_OPCODE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w - PVS_SRC_SELECT_FORCE_0, // z - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y - t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : - VSF_FLAG_NONE) | (src[0].RelAddr << 4); - inst[2] = PVS_SOURCE_OPCODE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w - PVS_SRC_SELECT_FORCE_0, // z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x - t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : - VSF_FLAG_NONE) | (src[0].RelAddr << 4); - inst[3] = PVS_SOURCE_OPCODE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x - PVS_SRC_SELECT_FORCE_0, // z - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w - t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : - VSF_FLAG_NONE) | (src[0].RelAddr << 4); - - return inst; -} - -static GLuint *t_opcode_log(struct r300_vertex_program *vp, - struct prog_instruction *vpi, GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = - PVS_MATH_OPCODE(ME_LOG_BASE2_DX, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = ZERO_SRC_0; - inst[3] = ZERO_SRC_0; - - return inst; -} - -static GLuint *t_opcode_mad(struct r300_vertex_program *vp, - struct prog_instruction *vpi, GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = - PVS_VECTOR_OPCODE(VE_MULTIPLY_ADD, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = t_src(vp, &src[2]); - - return inst; -} - -static GLuint *t_opcode_max(struct r300_vertex_program *vp, - struct prog_instruction *vpi, GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = - PVS_VECTOR_OPCODE(VE_MAXIMUM, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = ZERO_SRC_1; - - return inst; -} - -static GLuint *t_opcode_min(struct r300_vertex_program *vp, - struct prog_instruction *vpi, GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = - PVS_VECTOR_OPCODE(VE_MINIMUM, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = ZERO_SRC_1; - - return inst; -} - -static GLuint *t_opcode_mov(struct r300_vertex_program *vp, - struct prog_instruction *vpi, GLuint * inst, - struct prog_src_register src[3]) -{ - //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} - - inst[0] = - PVS_VECTOR_OPCODE(VE_ADD, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = ZERO_SRC_0; - inst[3] = ZERO_SRC_0; - - return inst; -} - -static GLuint *t_opcode_mul(struct r300_vertex_program *vp, - struct prog_instruction *vpi, GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = - PVS_VECTOR_OPCODE(VE_MULTIPLY, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = ZERO_SRC_1; - - return inst; -} - -static GLuint *t_opcode_pow(struct r300_vertex_program *vp, - struct prog_instruction *vpi, GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = - PVS_MATH_OPCODE(ME_POWER_FUNC_FF, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = ZERO_SRC_0; - inst[3] = t_src_scalar(vp, &src[1]); - - return inst; -} - -static GLuint *t_opcode_rcp(struct r300_vertex_program *vp, - struct prog_instruction *vpi, GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = - PVS_MATH_OPCODE(ME_RECIP_DX, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = ZERO_SRC_0; - inst[3] = ZERO_SRC_0; - - return inst; -} - -static GLuint *t_opcode_rsq(struct r300_vertex_program *vp, - struct prog_instruction *vpi, GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = - PVS_MATH_OPCODE(ME_RECIP_SQRT_DX, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = ZERO_SRC_0; - inst[3] = ZERO_SRC_0; - - return inst; -} - -static GLuint *t_opcode_sge(struct r300_vertex_program *vp, - struct prog_instruction *vpi, GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = - PVS_VECTOR_OPCODE(VE_SET_GREATER_THAN_EQUAL, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = ZERO_SRC_1; - - return inst; -} - -static GLuint *t_opcode_slt(struct r300_vertex_program *vp, - struct prog_instruction *vpi, GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = - PVS_VECTOR_OPCODE(VE_SET_LESS_THAN, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = ZERO_SRC_1; - - return inst; -} - -static GLuint *t_opcode_sub(struct r300_vertex_program *vp, - struct prog_instruction *vpi, GLuint * inst, - struct prog_src_register src[3]) -{ - //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W - - inst[0] = - PVS_VECTOR_OPCODE(VE_ADD, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = - PVS_SOURCE_OPCODE(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), - t_src_class(src[1].File), - (!src[1]. - NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - inst[3] = 0; - - return inst; -} - -static GLuint *t_opcode_swz(struct r300_vertex_program *vp, - struct prog_instruction *vpi, GLuint * inst, - struct prog_src_register src[3]) -{ - //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} - - inst[0] = - PVS_VECTOR_OPCODE(VE_ADD, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = ZERO_SRC_0; - inst[3] = ZERO_SRC_0; - - return inst; -} - -static GLuint *t_opcode_xpd(struct r300_vertex_program *vp, - struct prog_instruction *vpi, GLuint * inst, - struct prog_src_register src[3], int *u_temp_i) -{ - /* mul r0, r1.yzxw, r2.zxyw - mad r0, -r2.yzxw, r1.zxyw, r0 - */ - - inst[0] = - PVS_VECTOR_OPCODE(VE_MULTIPLY_ADD, *u_temp_i, - t_dst_mask(vpi->DstReg.WriteMask), - PVS_DST_REG_TEMPORARY); - inst[1] = PVS_SOURCE_OPCODE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w - t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : - VSF_FLAG_NONE) | (src[0].RelAddr << 4); - inst[2] = PVS_SOURCE_OPCODE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w - t_src_class(src[1].File), - src[1]. - NegateBase ? VSF_FLAG_ALL : - VSF_FLAG_NONE) | (src[1].RelAddr << 4); - inst[3] = ZERO_SRC_1; - inst += 4; - - inst[0] = - PVS_VECTOR_OPCODE(VE_MULTIPLY_ADD, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = PVS_SOURCE_OPCODE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w - t_src_class(src[1].File), - (!src[1]. - NegateBase) ? VSF_FLAG_ALL : - VSF_FLAG_NONE) | (src[1].RelAddr << 4); - inst[2] = PVS_SOURCE_OPCODE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w - t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : - VSF_FLAG_NONE) | (src[0].RelAddr << 4); - inst[3] = - PVS_SOURCE_OPCODE(*u_temp_i, PVS_SRC_SELECT_X, - PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, - PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY, - VSF_FLAG_NONE); - - (*u_temp_i)--; - - return inst; -} - -static void t_inputs_outputs(struct r300_vertex_program *vp) +static void t_inputs_outputs(struct r300_vertex_program_compiler * c) { int i; - int cur_reg = 0; + int cur_reg; + GLuint OutputsWritten, InputsRead; - for (i = 0; i < VERT_ATTRIB_MAX; i++) - vp->inputs[i] = -1; + OutputsWritten = c->Base.Program.OutputsWritten; + InputsRead = c->Base.Program.InputsRead; + cur_reg = -1; + for (i = 0; i < VERT_ATTRIB_MAX; i++) { + if (InputsRead & (1 << i)) + c->code->inputs[i] = ++cur_reg; + else + c->code->inputs[i] = -1; + } + + cur_reg = 0; for (i = 0; i < VERT_RESULT_MAX; i++) - vp->outputs[i] = -1; + c->code->outputs[i] = -1; - assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)); + assert(OutputsWritten & (1 << VERT_RESULT_HPOS)); - if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)) { - vp->outputs[VERT_RESULT_HPOS] = cur_reg++; + if (OutputsWritten & (1 << VERT_RESULT_HPOS)) { + c->code->outputs[VERT_RESULT_HPOS] = cur_reg++; } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) { - vp->outputs[VERT_RESULT_PSIZ] = cur_reg++; + if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) { + c->code->outputs[VERT_RESULT_PSIZ] = cur_reg++; } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) { - vp->outputs[VERT_RESULT_COL0] = cur_reg++; + /* If we're writing back facing colors we need to send + * four colors to make front/back face colors selection work. + * If the vertex program doesn't write all 4 colors, lets + * pretend it does by skipping output index reg so the colors + * get written into appropriate output vectors. + */ + if (OutputsWritten & (1 << VERT_RESULT_COL0)) { + c->code->outputs[VERT_RESULT_COL0] = cur_reg++; + } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || + OutputsWritten & (1 << VERT_RESULT_BFC1)) { + cur_reg++; } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) { - vp->outputs[VERT_RESULT_COL1] = - vp->outputs[VERT_RESULT_COL0] + 1; - cur_reg = vp->outputs[VERT_RESULT_COL1] + 1; + if (OutputsWritten & (1 << VERT_RESULT_COL1)) { + c->code->outputs[VERT_RESULT_COL1] = cur_reg++; + } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || + OutputsWritten & (1 << VERT_RESULT_BFC1)) { + cur_reg++; } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) { - vp->outputs[VERT_RESULT_BFC0] = - vp->outputs[VERT_RESULT_COL0] + 2; - cur_reg = vp->outputs[VERT_RESULT_BFC0] + 2; + if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { + c->code->outputs[VERT_RESULT_BFC0] = cur_reg++; + } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { + cur_reg++; } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) { - vp->outputs[VERT_RESULT_BFC1] = - vp->outputs[VERT_RESULT_COL0] + 3; - cur_reg = vp->outputs[VERT_RESULT_BFC1] + 1; + if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { + c->code->outputs[VERT_RESULT_BFC1] = cur_reg++; + } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { + cur_reg++; } -#if 0 - if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) { - vp->outputs[VERT_RESULT_FOGC] = cur_reg++; - } -#endif for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) { - if (vp->key.OutputsWritten & (1 << i)) { - vp->outputs[i] = cur_reg++; + if (OutputsWritten & (1 << i)) { + c->code->outputs[i] = cur_reg++; } } -} - -static void r300TranslateVertexShader(struct r300_vertex_program *vp, - struct prog_instruction *vpi) -{ - int i; - GLuint *inst; - unsigned long num_operands; - /* Initial value should be last tmp reg that hw supports. - Strangely enough r300 doesnt mind even though these would be out of range. - Smart enough to realize that it doesnt need it? */ - int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1; - struct prog_src_register src[3]; - - vp->pos_end = 0; /* Not supported yet */ - vp->program.length = 0; - /*vp->num_temporaries=mesa_vp->Base.NumTemporaries; */ - vp->translated = GL_TRUE; - vp->native = GL_TRUE; - - t_inputs_outputs(vp); - - for (inst = vp->program.body.i; vpi->Opcode != OPCODE_END; - vpi++, inst += 4) { - - FREE_TEMPS(); - - if (!valid_dst(vp, &vpi->DstReg)) { - /* redirect result to unused temp */ - vpi->DstReg.File = PROGRAM_TEMPORARY; - vpi->DstReg.Index = u_temp_i; - } - num_operands = _mesa_num_inst_src_regs(vpi->Opcode); - - /* copy the sources (src) from mesa into a local variable... is this needed? */ - for (i = 0; i < num_operands; i++) { - src[i] = vpi->SrcReg[i]; - } - - if (num_operands == 3) { /* TODO: scalars */ - if (CMP_SRCS(src[1], src[2]) - || CMP_SRCS(src[0], src[2])) { - inst[0] = - PVS_VECTOR_OPCODE(VE_ADD, - u_temp_i, VSF_FLAG_ALL, - PVS_DST_REG_TEMPORARY); - inst[1] = - PVS_SOURCE_OPCODE(t_src_index - (vp, &src[2]), - SWIZZLE_X, SWIZZLE_Y, - SWIZZLE_Z, SWIZZLE_W, - t_src_class(src[2]. - File), - VSF_FLAG_NONE) | - (src[2].RelAddr << 4); - inst[2] = ZERO_SRC_2; - inst[3] = ZERO_SRC_2; - inst += 4; - - src[2].File = PROGRAM_TEMPORARY; - src[2].Index = u_temp_i; - src[2].RelAddr = 0; - u_temp_i--; - } - } - - if (num_operands >= 2) { - if (CMP_SRCS(src[1], src[0])) { - inst[0] = - PVS_VECTOR_OPCODE(VE_ADD, - u_temp_i, VSF_FLAG_ALL, - PVS_DST_REG_TEMPORARY); - inst[1] = - PVS_SOURCE_OPCODE(t_src_index - (vp, &src[0]), - SWIZZLE_X, SWIZZLE_Y, - SWIZZLE_Z, SWIZZLE_W, - t_src_class(src[0]. - File), - VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = ZERO_SRC_0; - inst[3] = ZERO_SRC_0; - inst += 4; - - src[0].File = PROGRAM_TEMPORARY; - src[0].Index = u_temp_i; - src[0].RelAddr = 0; - u_temp_i--; - } - } - - switch (vpi->Opcode) { - case OPCODE_ABS: - inst = t_opcode_abs(vp, vpi, inst, src); - break; - case OPCODE_ADD: - inst = t_opcode_add(vp, vpi, inst, src); - break; - case OPCODE_ARL: - inst = t_opcode_arl(vp, vpi, inst, src); - break; - case OPCODE_DP3: - inst = t_opcode_dp3(vp, vpi, inst, src); - break; - case OPCODE_DP4: - inst = t_opcode_dp4(vp, vpi, inst, src); - break; - case OPCODE_DPH: - inst = t_opcode_dph(vp, vpi, inst, src); - break; - case OPCODE_DST: - inst = t_opcode_dst(vp, vpi, inst, src); - break; - case OPCODE_EX2: - inst = t_opcode_ex2(vp, vpi, inst, src); - break; - case OPCODE_EXP: - inst = t_opcode_exp(vp, vpi, inst, src); - break; - case OPCODE_FLR: - inst = - t_opcode_flr(vp, vpi, inst, src, /* FIXME */ - &u_temp_i); - break; - case OPCODE_FRC: - inst = t_opcode_frc(vp, vpi, inst, src); - break; - case OPCODE_LG2: - inst = t_opcode_lg2(vp, vpi, inst, src); - break; - case OPCODE_LIT: - inst = t_opcode_lit(vp, vpi, inst, src); - break; - case OPCODE_LOG: - inst = t_opcode_log(vp, vpi, inst, src); - break; - case OPCODE_MAD: - inst = t_opcode_mad(vp, vpi, inst, src); - break; - case OPCODE_MAX: - inst = t_opcode_max(vp, vpi, inst, src); - break; - case OPCODE_MIN: - inst = t_opcode_min(vp, vpi, inst, src); - break; - case OPCODE_MOV: - inst = t_opcode_mov(vp, vpi, inst, src); - break; - case OPCODE_MUL: - inst = t_opcode_mul(vp, vpi, inst, src); - break; - case OPCODE_POW: - inst = t_opcode_pow(vp, vpi, inst, src); - break; - case OPCODE_RCP: - inst = t_opcode_rcp(vp, vpi, inst, src); - break; - case OPCODE_RSQ: - inst = t_opcode_rsq(vp, vpi, inst, src); - break; - case OPCODE_SGE: - inst = t_opcode_sge(vp, vpi, inst, src); - break; - case OPCODE_SLT: - inst = t_opcode_slt(vp, vpi, inst, src); - break; - case OPCODE_SUB: - inst = t_opcode_sub(vp, vpi, inst, src); - break; - case OPCODE_SWZ: - inst = t_opcode_swz(vp, vpi, inst, src); - break; - case OPCODE_XPD: - inst = - t_opcode_xpd(vp, vpi, inst, src, /* FIXME */ - &u_temp_i); - break; - default: - assert(0); - break; - } + if (OutputsWritten & (1 << VERT_RESULT_FOGC)) { + c->code->outputs[VERT_RESULT_FOGC] = cur_reg++; } +} - /* Some outputs may be artificially added, to match the inputs - of the fragment program. Blank the outputs here. */ - for (i = 0; i < VERT_RESULT_MAX; i++) { - if (vp->key.OutputsAdded & (1 << i)) { - inst[0] = PVS_VECTOR_OPCODE(VE_ADD, vp->outputs[i], - VSF_FLAG_ALL, - PVS_DST_REG_OUT); - inst[1] = ZERO_SRC_0; - inst[2] = ZERO_SRC_0; - inst[3] = ZERO_SRC_0; - inst += 4; - } +/** + * The NV_vertex_program spec mandates that all registers be + * initialized to zero. We do this here unconditionally. + * + * \note We rely on dead-code elimination in the compiler. + */ +static void initialize_NV_registers(struct radeon_compiler * compiler) +{ + unsigned int reg; + struct rc_instruction * inst; + + for(reg = 0; reg < 12; ++reg) { + inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions); + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = reg; + inst->U.I.SrcReg[0].File = RC_FILE_NONE; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; } - vp->program.length = (inst - vp->program.body.i); - if (vp->program.length >= VSF_MAX_FRAGMENT_LENGTH) { - vp->program.length = 0; - vp->native = GL_FALSE; - } -#if 0 - fprintf(stderr, "hw program:\n"); - for (i = 0; i < vp->program.length; i++) - fprintf(stderr, "%08x\n", vp->program.body.d[i]); -#endif + inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions); + inst->U.I.Opcode = RC_OPCODE_ARL; + inst->U.I.DstReg.File = RC_FILE_ADDRESS; + inst->U.I.DstReg.Index = 0; + inst->U.I.DstReg.WriteMask = WRITEMASK_X; + inst->U.I.SrcReg[0].File = RC_FILE_NONE; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; } -/* DP4 version seems to trigger some hw peculiarity */ -//#define PREFER_DP4 - -static void position_invariant(struct gl_program *prog) +static struct r300_vertex_program *build_program(GLcontext *ctx, + struct r300_vertex_program_key *wanted_key, + const struct gl_vertex_program *mesa_vp) { - struct prog_instruction *vpi; - struct gl_program_parameter_list *paramList; - int i; - - gl_state_index tokens[STATE_LENGTH] = - { STATE_MVP_MATRIX, 0, 0, 0, 0 }; - - /* tokens[4] = matrix modifier */ -#ifdef PREFER_DP4 - tokens[4] = 0; /* not transposed or inverted */ -#else - tokens[4] = STATE_MATRIX_TRANSPOSE; -#endif - paramList = prog->Parameters; - - vpi = _mesa_alloc_instructions(prog->NumInstructions + 4); - _mesa_init_instructions(vpi, prog->NumInstructions + 4); - - for (i = 0; i < 4; i++) { - GLint idx; - tokens[2] = tokens[3] = i; /* matrix row[i]..row[i] */ - idx = _mesa_add_state_reference(paramList, tokens); -#ifdef PREFER_DP4 - vpi[i].Opcode = OPCODE_DP4; - vpi[i].StringPos = 0; - vpi[i].Data = 0; - - vpi[i].DstReg.File = PROGRAM_OUTPUT; - vpi[i].DstReg.Index = VERT_RESULT_HPOS; - vpi[i].DstReg.WriteMask = 1 << i; - vpi[i].DstReg.CondMask = COND_TR; - - vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR; - vpi[i].SrcReg[0].Index = idx; - vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; - - vpi[i].SrcReg[1].File = PROGRAM_INPUT; - vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS; - vpi[i].SrcReg[1].Swizzle = SWIZZLE_XYZW; -#else - if (i == 0) - vpi[i].Opcode = OPCODE_MUL; - else - vpi[i].Opcode = OPCODE_MAD; - - vpi[i].StringPos = 0; - vpi[i].Data = 0; - - if (i == 3) - vpi[i].DstReg.File = PROGRAM_OUTPUT; - else - vpi[i].DstReg.File = PROGRAM_TEMPORARY; - vpi[i].DstReg.Index = 0; - vpi[i].DstReg.WriteMask = 0xf; - vpi[i].DstReg.CondMask = COND_TR; - - vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR; - vpi[i].SrcReg[0].Index = idx; - vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; - - vpi[i].SrcReg[1].File = PROGRAM_INPUT; - vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS; - vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i); - - if (i > 0) { - vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY; - vpi[i].SrcReg[2].Index = 0; - vpi[i].SrcReg[2].Swizzle = SWIZZLE_XYZW; - } -#endif - } - - _mesa_copy_instructions(&vpi[i], prog->Instructions, - prog->NumInstructions); - - free(prog->Instructions); + struct r300_vertex_program *vp; + struct r300_vertex_program_compiler compiler; - prog->Instructions = vpi; + vp = _mesa_calloc(sizeof(*vp)); + vp->Base = (struct gl_vertex_program *) _mesa_clone_program(ctx, &mesa_vp->Base); + _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key)); - prog->NumInstructions += 4; - vpi = &prog->Instructions[prog->NumInstructions - 1]; + rc_init(&compiler.Base); + compiler.Base.Debug = (RADEON_DEBUG & RADEON_VERTS) ? GL_TRUE : GL_FALSE; - assert(vpi->Opcode == OPCODE_END); -} + compiler.code = &vp->code; + compiler.RequiredOutputs = compute_required_outputs(vp->Base, vp->key.FpReads); + compiler.SetHwInputOutput = &t_inputs_outputs; -static void insert_wpos(struct r300_vertex_program *vp, - struct gl_program *prog, GLuint temp_index) -{ - struct prog_instruction *vpi; - struct prog_instruction *vpi_insert; - int i = 0; + if (compiler.Base.Debug) { + fprintf(stderr, "Initial vertex program:\n"); + _mesa_print_program(&vp->Base->Base); + fflush(stderr); + } - vpi = _mesa_alloc_instructions(prog->NumInstructions + 2); - _mesa_init_instructions(vpi, prog->NumInstructions + 2); - /* all but END */ - _mesa_copy_instructions(vpi, prog->Instructions, - prog->NumInstructions - 1); - /* END */ - _mesa_copy_instructions(&vpi[prog->NumInstructions + 1], - &prog->Instructions[prog->NumInstructions - - 1], 1); - vpi_insert = &vpi[prog->NumInstructions - 1]; + if (mesa_vp->IsPositionInvariant) { + _mesa_insert_mvp_code(ctx, vp->Base); + } - vpi_insert[i].Opcode = OPCODE_MOV; + radeon_mesa_to_rc_program(&compiler.Base, &vp->Base->Base); - vpi_insert[i].DstReg.File = PROGRAM_OUTPUT; - vpi_insert[i].DstReg.Index = VERT_RESULT_HPOS; - vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW; - vpi_insert[i].DstReg.CondMask = COND_TR; + if (mesa_vp->IsNVProgram) + initialize_NV_registers(&compiler.Base); - vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY; - vpi_insert[i].SrcReg[0].Index = temp_index; - vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; - i++; + rc_move_output(&compiler.Base, VERT_RESULT_PSIZ, VERT_RESULT_PSIZ, WRITEMASK_X); - vpi_insert[i].Opcode = OPCODE_MOV; + if (vp->key.WPosAttr != FRAG_ATTRIB_MAX) { + rc_copy_output(&compiler.Base, + VERT_RESULT_HPOS, + vp->key.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0); + } - vpi_insert[i].DstReg.File = PROGRAM_OUTPUT; - vpi_insert[i].DstReg.Index = VERT_RESULT_TEX0 + vp->wpos_idx; - vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW; - vpi_insert[i].DstReg.CondMask = COND_TR; + if (vp->key.FogAttr != FRAG_ATTRIB_MAX) { + rc_move_output(&compiler.Base, + VERT_RESULT_FOGC, + vp->key.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0, WRITEMASK_X); + } - vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY; - vpi_insert[i].SrcReg[0].Index = temp_index; - vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; - i++; + r3xx_compile_vertex_program(&compiler); - free(prog->Instructions); + if (vp->code.constants.Count > ctx->Const.VertexProgram.MaxParameters) { + rc_error(&compiler.Base, "Program exceeds constant buffer size limit\n"); + } - prog->Instructions = vpi; + vp->error = compiler.Base.Error; - prog->NumInstructions += i; - vpi = &prog->Instructions[prog->NumInstructions - 1]; + vp->Base->Base.InputsRead = vp->code.InputsRead; + vp->Base->Base.OutputsWritten = vp->code.OutputsWritten; - assert(vpi->Opcode == OPCODE_END); -} + rc_destroy(&compiler.Base); -static void pos_as_texcoord(struct r300_vertex_program *vp, - struct gl_program *prog) -{ - struct prog_instruction *vpi; - GLuint tempregi = prog->NumTemporaries; - /* should do something else if no temps left... */ - prog->NumTemporaries++; - - for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) { - if (vpi->DstReg.File == PROGRAM_OUTPUT - && vpi->DstReg.Index == VERT_RESULT_HPOS) { - vpi->DstReg.File = PROGRAM_TEMPORARY; - vpi->DstReg.Index = tempregi; - } - } - insert_wpos(vp, prog, tempregi); + return vp; } -static struct r300_vertex_program *build_program(struct r300_vertex_program_key - *wanted_key, struct gl_vertex_program - *mesa_vp, GLint wpos_idx) +struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx) { + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_vertex_program_key wanted_key = { 0 }; + struct r300_vertex_program_cont *vpc; struct r300_vertex_program *vp; - vp = _mesa_calloc(sizeof(*vp)); - _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key)); - vp->wpos_idx = wpos_idx; - - if (mesa_vp->IsPositionInvariant) { - position_invariant(&mesa_vp->Base); + vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; + + if (!r300->selected_fp) { + /* This can happen when GetProgramiv is called to check + * whether the program runs natively. + * + * To be honest, this is not a very good solution, + * but solving the problem of reporting good values + * for those queries is tough anyway considering that + * we recompile vertex programs based on the precise + * fragment program that is in use. + */ + r300SelectAndTranslateFragmentShader(ctx); } - if (wpos_idx > -1) { - pos_as_texcoord(vp, &mesa_vp->Base); + wanted_key.FpReads = r300->selected_fp->InputsRead; + wanted_key.FogAttr = r300->selected_fp->fog_attr; + wanted_key.WPosAttr = r300->selected_fp->wpos_attr; + + for (vp = vpc->progs; vp; vp = vp->next) { + if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) + == 0) { + return r300->selected_vp = vp; + } } - assert(mesa_vp->Base.NumInstructions); - vp->num_temporaries = mesa_vp->Base.NumTemporaries; - r300TranslateVertexShader(vp, mesa_vp->Base.Instructions); + vp = build_program(ctx, &wanted_key, &vpc->mesa_program); + vp->next = vpc->progs; + vpc->progs = vp; - return vp; + return r300->selected_vp = vp; } -static void add_outputs(struct r300_vertex_program_key *key, GLint vert) -{ - if (key->OutputsWritten & (1 << vert)) - return; - - key->OutputsWritten |= 1 << vert; - key->OutputsAdded |= 1 << vert; -} +#define bump_vpu_count(ptr, new_count) do { \ + drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \ + int _nc=(new_count)/4; \ + if(_nc>_p->vpu.count)_p->vpu.count=_nc; \ + } while(0) -void r300SelectVertexShader(r300ContextPtr r300) +static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_program_code *code) { - GLcontext *ctx = ctx = r300->radeon.glCtx; - GLuint InputsRead; - struct r300_vertex_program_key wanted_key = { 0 }; - GLint i; - struct r300_vertex_program_cont *vpc; - struct r300_vertex_program *vp; - GLint wpos_idx; - - vpc = - (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; - InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; + int i; - wpos_idx = -1; - if (InputsRead & FRAG_BIT_WPOS) { - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) - if (!(InputsRead & (FRAG_BIT_TEX0 << i))) - break; + assert((code->length > 0) && (code->length % 4 == 0)); - if (i == ctx->Const.MaxTextureUnits) { - fprintf(stderr, "\tno free texcoord found\n"); - _mesa_exit(-1); - } + R300_STATECHANGE( r300, vap_flush ); - InputsRead |= (FRAG_BIT_TEX0 << i); - wpos_idx = i; + switch ((dest >> 8) & 0xf) { + case 0: + R300_STATECHANGE(r300, vpi); + for (i = 0; i < code->length; i++) + r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]); + bump_vpu_count(r300->hw.vpi.cmd, code->length + 4 * (dest & 0xff)); + break; + case 2: + R300_STATECHANGE(r300, vpp); + for (i = 0; i < code->length; i++) + r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]); + bump_vpu_count(r300->hw.vpp.cmd, code->length + 4 * (dest & 0xff)); + break; + case 4: + R300_STATECHANGE(r300, vps); + for (i = 0; i < code->length; i++) + r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (code->body.d[i]); + bump_vpu_count(r300->hw.vps.cmd, code->length + 4 * (dest & 0xff)); + break; + default: + fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest); + exit(-1); } - wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead; - wanted_key.OutputsWritten = vpc->mesa_program.Base.OutputsWritten; +} - add_outputs(&wanted_key, VERT_RESULT_HPOS); +void r300SetupVertexProgram(r300ContextPtr rmesa) +{ + GLcontext *ctx = rmesa->radeon.glCtx; + struct r300_vertex_program *prog = rmesa->selected_vp; + int inst_count = 0; + int param_count = 0; - if (InputsRead & FRAG_BIT_COL0) { - add_outputs(&wanted_key, VERT_RESULT_COL0); - } + /* Reset state, in case we don't use something */ + ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0; + ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0; + ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0; - if (InputsRead & FRAG_BIT_COL1) { - add_outputs(&wanted_key, VERT_RESULT_COL1); - } + R300_STATECHANGE(rmesa, vap_flush); + R300_STATECHANGE(rmesa, vpp); + param_count = r300VertexProgUpdateParams(ctx, prog, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]); + bump_vpu_count(rmesa->hw.vpp.cmd, param_count); + param_count /= 4; - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - if (InputsRead & (FRAG_BIT_TEX0 << i)) { - add_outputs(&wanted_key, VERT_RESULT_TEX0 + i); - } - } + r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->code)); + inst_count = (prog->code.length / 4) - 1; - if (vpc->mesa_program.IsPositionInvariant) { - /* we wan't position don't we ? */ - wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS); - } + r300VapCntl(rmesa, _mesa_bitcount(prog->code.InputsRead), + _mesa_bitcount(prog->code.OutputsWritten), prog->code.num_temporaries); - for (vp = vpc->progs; vp; vp = vp->next) - if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) - == 0) { - r300->selected_vp = vp; - return; - } - //_mesa_print_program(&vpc->mesa_program.Base); + R300_STATECHANGE(rmesa, pvs); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) | + (inst_count << R300_PVS_LAST_INST_SHIFT); - vp = build_program(&wanted_key, &vpc->mesa_program, wpos_idx); - vp->next = vpc->progs; - vpc->progs = vp; - r300->selected_vp = vp; + rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT); }