X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fr300%2Fr300_vertprog.c;h=dd0f27f9cba12f76d9b408873f7001be4ac0ddb7;hb=8398535331dae39ff5f52e94a69abdbf7507a343;hp=95cedd9d919f4f2239892eab0182f6cbbf6ba784;hpb=11cd795940723e79f99e7887a2e2dd8410352572;p=mesa.git diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index 95cedd9d919..dd0f27f9cba 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -40,927 +40,124 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "shader/prog_statevars.h" #include "tnl/tnl.h" +#include "compiler/radeon_compiler.h" #include "compiler/radeon_nqssadce.h" #include "r300_context.h" #include "r300_state.h" -/* TODO: Get rid of t_src_class call */ -#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \ - ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \ - t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \ - (t_src_class(a.File) == PVS_SRC_REG_INPUT && \ - t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \ - -/* - * Take an already-setup and valid source then swizzle it appropriately to - * obtain a constant ZERO or ONE source. +/** + * Write parameter array for the given vertex program into dst. + * Return the total number of components written. */ -#define __CONST(x, y) \ - (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \ - t_swizzle(y), \ - t_swizzle(y), \ - t_swizzle(y), \ - t_swizzle(y), \ - t_src_class(src[x].File), \ - VSF_FLAG_NONE) | (src[x].RelAddr << 4)) - -#define FREE_TEMPS() \ - do { \ - int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; \ - if((vp->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { \ - WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_used); \ - vp->error = GL_TRUE; \ - } \ - u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \ - } while (0) - -static int r300VertexProgUpdateParams(GLcontext * ctx, struct gl_vertex_program *vp, float *dst) +static int r300VertexProgUpdateParams(GLcontext * ctx, struct r300_vertex_program *vp, float *dst) { - int pi; - float *dst_o = dst; - struct gl_program_parameter_list *paramList; + int i; - if (vp->IsNVProgram) { + if (vp->Base->IsNVProgram) { _mesa_load_tracked_matrices(ctx); - - for (pi = 0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) { - *dst++ = ctx->VertexProgram.Parameters[pi][0]; - *dst++ = ctx->VertexProgram.Parameters[pi][1]; - *dst++ = ctx->VertexProgram.Parameters[pi][2]; - *dst++ = ctx->VertexProgram.Parameters[pi][3]; + } else { + if (vp->Base->Base.Parameters) { + _mesa_load_state_parameters(ctx, vp->Base->Base.Parameters); } - return dst - dst_o; } - if (!vp->Base.Parameters) - return 0; - - _mesa_load_state_parameters(ctx, vp->Base.Parameters); - - if (vp->Base.Parameters->NumParameters * 4 > - VSF_MAX_FRAGMENT_LENGTH) { + if (vp->code.constants.Count * 4 > VSF_MAX_FRAGMENT_LENGTH) { + /* Should have checked this earlier... */ fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__); _mesa_exit(-1); } - paramList = vp->Base.Parameters; - for (pi = 0; pi < paramList->NumParameters; pi++) { - switch (paramList->Parameters[pi].Type) { - case PROGRAM_STATE_VAR: - case PROGRAM_NAMED_PARAM: - //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name); - case PROGRAM_CONSTANT: - *dst++ = paramList->ParameterValues[pi][0]; - *dst++ = paramList->ParameterValues[pi][1]; - *dst++ = paramList->ParameterValues[pi][2]; - *dst++ = paramList->ParameterValues[pi][3]; - break; - default: - _mesa_problem(NULL, "Bad param type in %s", - __FUNCTION__); - } - - } - - return dst - dst_o; -} - -static unsigned long t_dst_mask(GLuint mask) -{ - /* WRITEMASK_* is equivalent to VSF_FLAG_* */ - return mask & VSF_FLAG_ALL; -} - -static unsigned long t_dst_class(gl_register_file file) -{ + for(i = 0; i < vp->code.constants.Count; ++i) { + const float * src = 0; + const struct rc_constant * constant = &vp->code.constants.Constants[i]; - switch (file) { - case PROGRAM_TEMPORARY: - return PVS_DST_REG_TEMPORARY; - case PROGRAM_OUTPUT: - return PVS_DST_REG_OUT; - case PROGRAM_ADDRESS: - return PVS_DST_REG_A0; - /* - case PROGRAM_INPUT: - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_NAMED_PARAM: - case PROGRAM_STATE_VAR: - case PROGRAM_WRITE_ONLY: - case PROGRAM_ADDRESS: - */ - default: - fprintf(stderr, "problem in %s", __FUNCTION__); - _mesa_exit(-1); - return -1; - } -} - -static unsigned long t_dst_index(struct r300_vertex_program *vp, - struct prog_dst_register *dst) -{ - if (dst->File == PROGRAM_OUTPUT) - return vp->outputs[dst->Index]; + switch(constant->Type) { + case RC_CONSTANT_EXTERNAL: + if (vp->Base->IsNVProgram) { + src = ctx->VertexProgram.Parameters[constant->u.External]; + } else { + src = vp->Base->Base.Parameters->ParameterValues[constant->u.External]; + } + break; - return dst->Index; -} + case RC_CONSTANT_IMMEDIATE: + src = constant->u.Immediate; + break; + } -static unsigned long t_src_class(gl_register_file file) -{ - switch (file) { - case PROGRAM_TEMPORARY: - return PVS_SRC_REG_TEMPORARY; - case PROGRAM_INPUT: - return PVS_SRC_REG_INPUT; - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_NAMED_PARAM: - case PROGRAM_CONSTANT: - case PROGRAM_STATE_VAR: - return PVS_SRC_REG_CONSTANT; - /* - case PROGRAM_OUTPUT: - case PROGRAM_WRITE_ONLY: - case PROGRAM_ADDRESS: - */ - default: - fprintf(stderr, "problem in %s", __FUNCTION__); - _mesa_exit(-1); - return -1; + dst[4*i] = src[0]; + dst[4*i + 1] = src[1]; + dst[4*i + 2] = src[2]; + dst[4*i + 3] = src[3]; } -} -static INLINE unsigned long t_swizzle(GLubyte swizzle) -{ -/* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ - return swizzle; + return 4 * vp->code.constants.Count; } -#if 0 -static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller) +static GLbitfield compute_required_outputs(struct gl_vertex_program * vp, GLbitfield fpreads) { + GLbitfield outputs = 0; int i; - if (vp == NULL) { - fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, - caller); - return; - } - - fprintf(stderr, "%s:<", caller); - for (i = 0; i < VERT_ATTRIB_MAX; i++) - fprintf(stderr, "%d ", vp->inputs[i]); - fprintf(stderr, ">\n"); - -} -#endif - -static unsigned long t_src_index(struct r300_vertex_program *vp, - struct prog_src_register *src) -{ - if (src->File == PROGRAM_INPUT) { - assert(vp->inputs[src->Index] != -1); - return vp->inputs[src->Index]; - } else { - if (src->Index < 0) { - fprintf(stderr, - "negative offsets for indirect addressing do not work.\n"); - return 0; - } - return src->Index; - } -} - -/* these two functions should probably be merged... */ - -static unsigned long t_src(struct r300_vertex_program *vp, - struct prog_src_register *src) -{ - /* src->Negate uses the NEGATE_ flags from program_instruction.h, - * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. - */ - return PVS_SRC_OPERAND(t_src_index(vp, src), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 1)), - t_swizzle(GET_SWZ(src->Swizzle, 2)), - t_swizzle(GET_SWZ(src->Swizzle, 3)), - t_src_class(src->File), - src->Negate) | (src->RelAddr << 4); -} +#define ADD_OUTPUT(fp_attr, vp_result) \ + do { \ + if (fpreads & (1 << (fp_attr))) \ + outputs |= (1 << (vp_result)); \ + } while (0) -static unsigned long t_src_scalar(struct r300_vertex_program *vp, - struct prog_src_register *src) -{ - /* src->Negate uses the NEGATE_ flags from program_instruction.h, - * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. - */ - return PVS_SRC_OPERAND(t_src_index(vp, src), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_src_class(src->File), - src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src->RelAddr << 4); -} + ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0); + ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1); -static GLboolean valid_dst(struct r300_vertex_program *vp, - struct prog_dst_register *dst) -{ - if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) { - return GL_FALSE; - } else if (dst->File == PROGRAM_ADDRESS) { - assert(dst->Index == 0); + for (i = 0; i <= 7; ++i) { + ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i); } - return GL_TRUE; -} - -static GLuint *r300TranslateOpcodeABS(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W - - inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), - t_src_class(src[0].File), - (!src[0]. - Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[3] = 0; - - return inst; -} - -static GLuint *r300TranslateOpcodeADD(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} - - inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), - SWIZZLE_ZERO, - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = - PVS_SRC_OPERAND(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO, - t_src_class(src[1].File), - src[1].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeDPH(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} - inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), - PVS_SRC_SELECT_FORCE_1, - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeEX2(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeFLR(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3], - int *u_temp_i) -{ - /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} - ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */ - - inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION, - GL_FALSE, - GL_FALSE, - *u_temp_i, - t_dst_mask(vpi->DstReg.WriteMask), - PVS_DST_REG_TEMPORARY); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - inst += 4; - - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = PVS_SRC_OPERAND(*u_temp_i, - PVS_SRC_SELECT_X, - PVS_SRC_SELECT_Y, - PVS_SRC_SELECT_Z, - PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY, - /* Not 100% sure about this */ - (!src[0]. - Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE - /*VSF_FLAG_ALL */ ); - inst[3] = __CONST(0, SWIZZLE_ZERO); - (*u_temp_i)--; - - return inst; -} - -static GLuint *r300TranslateOpcodeFRC(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X} - - inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} - - inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - /* NOTE: Users swizzling might not work. */ - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - PVS_SRC_SELECT_FORCE_0, // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - PVS_SRC_SELECT_FORCE_0, // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - PVS_SRC_SELECT_FORCE_0, // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - - return inst; -} - -static GLuint *r300TranslateOpcodeLOG(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeMAD(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, - GL_FALSE, - GL_TRUE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = t_src(vp, &src[2]); - - return inst; -} - -static GLuint *r300TranslateOpcodeMAX(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeMIN(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_MINIMUM, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeMOV(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} - - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeMUL(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodePOW(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = t_src_scalar(vp, &src[1]); - - return inst; -} - -static GLuint *r300TranslateOpcodeRCP(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeRSQ(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} +#undef ADD_OUTPUT -static GLuint *r300TranslateOpcodeSGE(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} + if ((fpreads & (1 << FRAG_ATTRIB_COL0)) && + (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC0))) + outputs |= 1 << VERT_RESULT_BFC0; + if ((fpreads & (1 << FRAG_ATTRIB_COL1)) && + (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC1))) + outputs |= 1 << VERT_RESULT_BFC1; -static GLuint *r300TranslateOpcodeSLT(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} + outputs |= 1 << VERT_RESULT_HPOS; + if (vp->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ)) + outputs |= 1 << VERT_RESULT_PSIZ; -static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W - -#if 0 - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), - t_src_class(src[1].File), - (!src[1]. - Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - inst[3] = 0; -#else - inst[0] = - PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ONE); - inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), - t_src_class(src[1].File), - (!src[1]. - Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[1].RelAddr << 4); -#endif - - return inst; + return outputs; } -static GLuint *r300TranslateOpcodeSWZ(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} - - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} -static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3], - int *u_temp_i) -{ - /* mul r0, r1.yzxw, r2.zxyw - mad r0, -r2.yzxw, r1.zxyw, r0 - */ - - inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, - GL_FALSE, - GL_FALSE, - *u_temp_i, - t_dst_mask(vpi->DstReg.WriteMask), - PVS_DST_REG_TEMPORARY); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W - t_src_class(src[1].File), - src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - inst[3] = __CONST(1, SWIZZLE_ZERO); - inst += 4; - - inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W - t_src_class(src[1].File), - (!src[1]. - Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[3] = - PVS_SRC_OPERAND(*u_temp_i, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, - PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, - PVS_SRC_REG_TEMPORARY, VSF_FLAG_NONE); - - (*u_temp_i)--; - - return inst; -} - -static void t_inputs_outputs(struct r300_vertex_program *vp, struct gl_program * glvp) +static void t_inputs_outputs(struct r300_vertex_program_compiler * c) { int i; int cur_reg; GLuint OutputsWritten, InputsRead; - OutputsWritten = glvp->OutputsWritten; - InputsRead = glvp->InputsRead; + OutputsWritten = c->Base.Program.OutputsWritten; + InputsRead = c->Base.Program.InputsRead; cur_reg = -1; for (i = 0; i < VERT_ATTRIB_MAX; i++) { if (InputsRead & (1 << i)) - vp->inputs[i] = ++cur_reg; + c->code->inputs[i] = ++cur_reg; else - vp->inputs[i] = -1; + c->code->inputs[i] = -1; } cur_reg = 0; for (i = 0; i < VERT_RESULT_MAX; i++) - vp->outputs[i] = -1; + c->code->outputs[i] = -1; assert(OutputsWritten & (1 << VERT_RESULT_HPOS)); if (OutputsWritten & (1 << VERT_RESULT_HPOS)) { - vp->outputs[VERT_RESULT_HPOS] = cur_reg++; + c->code->outputs[VERT_RESULT_HPOS] = cur_reg++; } if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) { - vp->outputs[VERT_RESULT_PSIZ] = cur_reg++; + c->code->outputs[VERT_RESULT_PSIZ] = cur_reg++; } /* If we're writing back facing colors we need to send @@ -970,668 +167,94 @@ static void t_inputs_outputs(struct r300_vertex_program *vp, struct gl_program * * get written into appropriate output vectors. */ if (OutputsWritten & (1 << VERT_RESULT_COL0)) { - vp->outputs[VERT_RESULT_COL0] = cur_reg++; + c->code->outputs[VERT_RESULT_COL0] = cur_reg++; } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || OutputsWritten & (1 << VERT_RESULT_BFC1)) { cur_reg++; } if (OutputsWritten & (1 << VERT_RESULT_COL1)) { - vp->outputs[VERT_RESULT_COL1] = cur_reg++; + c->code->outputs[VERT_RESULT_COL1] = cur_reg++; } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || OutputsWritten & (1 << VERT_RESULT_BFC1)) { cur_reg++; } if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { - vp->outputs[VERT_RESULT_BFC0] = cur_reg++; + c->code->outputs[VERT_RESULT_BFC0] = cur_reg++; } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { cur_reg++; } if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { - vp->outputs[VERT_RESULT_BFC1] = cur_reg++; + c->code->outputs[VERT_RESULT_BFC1] = cur_reg++; } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { cur_reg++; } for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) { if (OutputsWritten & (1 << i)) { - vp->outputs[i] = cur_reg++; + c->code->outputs[i] = cur_reg++; } } if (OutputsWritten & (1 << VERT_RESULT_FOGC)) { - vp->outputs[VERT_RESULT_FOGC] = cur_reg++; - } -} - -static void translate_vertex_program(struct r300_vertex_program *vp, struct gl_program * glvp) -{ - struct prog_instruction *vpi = glvp->Instructions; - int i; - GLuint *inst; - unsigned long num_operands; - /* Initial value should be last tmp reg that hw supports. - Strangely enough r300 doesnt mind even though these would be out of range. - Smart enough to realize that it doesnt need it? */ - int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1; - struct prog_src_register src[3]; - - vp->pos_end = 0; /* Not supported yet */ - vp->hw_code.length = 0; - vp->error = GL_FALSE; - - t_inputs_outputs(vp, glvp); - - for (inst = vp->hw_code.body.d; vpi->Opcode != OPCODE_END; - vpi++, inst += 4) { - - FREE_TEMPS(); - - if (!valid_dst(vp, &vpi->DstReg)) { - /* redirect result to unused temp */ - vpi->DstReg.File = PROGRAM_TEMPORARY; - vpi->DstReg.Index = u_temp_i; - } - - num_operands = _mesa_num_inst_src_regs(vpi->Opcode); - - /* copy the sources (src) from mesa into a local variable... is this needed? */ - for (i = 0; i < num_operands; i++) { - src[i] = vpi->SrcReg[i]; - } - - if (num_operands == 3) { /* TODO: scalars */ - if (CMP_SRCS(src[1], src[2]) - || CMP_SRCS(src[0], src[2])) { - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - u_temp_i, - VSF_FLAG_ALL, - PVS_DST_REG_TEMPORARY); - inst[1] = - PVS_SRC_OPERAND(t_src_index(vp, &src[2]), - SWIZZLE_X, - SWIZZLE_Y, - SWIZZLE_Z, - SWIZZLE_W, - t_src_class(src[2].File), - VSF_FLAG_NONE) | (src[2]. - RelAddr << - 4); - inst[2] = __CONST(2, SWIZZLE_ZERO); - inst[3] = __CONST(2, SWIZZLE_ZERO); - inst += 4; - - src[2].File = PROGRAM_TEMPORARY; - src[2].Index = u_temp_i; - src[2].RelAddr = 0; - u_temp_i--; - } - } - - if (num_operands >= 2) { - if (CMP_SRCS(src[1], src[0])) { - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - u_temp_i, - VSF_FLAG_ALL, - PVS_DST_REG_TEMPORARY); - inst[1] = - PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - SWIZZLE_X, - SWIZZLE_Y, - SWIZZLE_Z, - SWIZZLE_W, - t_src_class(src[0].File), - VSF_FLAG_NONE) | (src[0]. - RelAddr << - 4); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - inst += 4; - - src[0].File = PROGRAM_TEMPORARY; - src[0].Index = u_temp_i; - src[0].RelAddr = 0; - u_temp_i--; - } - } - - switch (vpi->Opcode) { - case OPCODE_ABS: - inst = r300TranslateOpcodeABS(vp, vpi, inst, src); - break; - case OPCODE_ADD: - inst = r300TranslateOpcodeADD(vp, vpi, inst, src); - break; - case OPCODE_ARL: - inst = r300TranslateOpcodeARL(vp, vpi, inst, src); - break; - case OPCODE_DP3: - inst = r300TranslateOpcodeDP3(vp, vpi, inst, src); - break; - case OPCODE_DP4: - inst = r300TranslateOpcodeDP4(vp, vpi, inst, src); - break; - case OPCODE_DPH: - inst = r300TranslateOpcodeDPH(vp, vpi, inst, src); - break; - case OPCODE_DST: - inst = r300TranslateOpcodeDST(vp, vpi, inst, src); - break; - case OPCODE_EX2: - inst = r300TranslateOpcodeEX2(vp, vpi, inst, src); - break; - case OPCODE_EXP: - inst = r300TranslateOpcodeEXP(vp, vpi, inst, src); - break; - case OPCODE_FLR: - inst = r300TranslateOpcodeFLR(vp, vpi, inst, src, /* FIXME */ - &u_temp_i); - break; - case OPCODE_FRC: - inst = r300TranslateOpcodeFRC(vp, vpi, inst, src); - break; - case OPCODE_LG2: - inst = r300TranslateOpcodeLG2(vp, vpi, inst, src); - break; - case OPCODE_LIT: - inst = r300TranslateOpcodeLIT(vp, vpi, inst, src); - break; - case OPCODE_LOG: - inst = r300TranslateOpcodeLOG(vp, vpi, inst, src); - break; - case OPCODE_MAD: - inst = r300TranslateOpcodeMAD(vp, vpi, inst, src); - break; - case OPCODE_MAX: - inst = r300TranslateOpcodeMAX(vp, vpi, inst, src); - break; - case OPCODE_MIN: - inst = r300TranslateOpcodeMIN(vp, vpi, inst, src); - break; - case OPCODE_MOV: - inst = r300TranslateOpcodeMOV(vp, vpi, inst, src); - break; - case OPCODE_MUL: - inst = r300TranslateOpcodeMUL(vp, vpi, inst, src); - break; - case OPCODE_POW: - inst = r300TranslateOpcodePOW(vp, vpi, inst, src); - break; - case OPCODE_RCP: - inst = r300TranslateOpcodeRCP(vp, vpi, inst, src); - break; - case OPCODE_RSQ: - inst = r300TranslateOpcodeRSQ(vp, vpi, inst, src); - break; - case OPCODE_SGE: - inst = r300TranslateOpcodeSGE(vp, vpi, inst, src); - break; - case OPCODE_SLT: - inst = r300TranslateOpcodeSLT(vp, vpi, inst, src); - break; - case OPCODE_SUB: - inst = r300TranslateOpcodeSUB(vp, vpi, inst, src); - break; - case OPCODE_SWZ: - inst = r300TranslateOpcodeSWZ(vp, vpi, inst, src); - break; - case OPCODE_XPD: - inst = r300TranslateOpcodeXPD(vp, vpi, inst, src, /* FIXME */ - &u_temp_i); - break; - default: - vp->error = GL_TRUE; - break; - } - } - - vp->hw_code.length = (inst - vp->hw_code.body.d); - if (vp->hw_code.length >= VSF_MAX_FRAGMENT_LENGTH) { - vp->error = GL_TRUE; + c->code->outputs[VERT_RESULT_FOGC] = cur_reg++; } } -static void insert_wpos(struct gl_program *prog, GLuint temp_index, int tex_id) -{ - struct prog_instruction *vpi; - - _mesa_insert_instructions(prog, prog->NumInstructions - 1, 2); - - vpi = &prog->Instructions[prog->NumInstructions - 3]; - - vpi->Opcode = OPCODE_MOV; - - vpi->DstReg.File = PROGRAM_OUTPUT; - vpi->DstReg.Index = VERT_RESULT_HPOS; - vpi->DstReg.WriteMask = WRITEMASK_XYZW; - vpi->DstReg.CondMask = COND_TR; - - vpi->SrcReg[0].File = PROGRAM_TEMPORARY; - vpi->SrcReg[0].Index = temp_index; - vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW; - - ++vpi; - - vpi->Opcode = OPCODE_MOV; - - vpi->DstReg.File = PROGRAM_OUTPUT; - vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id; - vpi->DstReg.WriteMask = WRITEMASK_XYZW; - vpi->DstReg.CondMask = COND_TR; - - vpi->SrcReg[0].File = PROGRAM_TEMPORARY; - vpi->SrcReg[0].Index = temp_index; - vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW; - - ++vpi; - - vpi->Opcode = OPCODE_END; -} - -static void pos_as_texcoord(struct gl_program *prog, int tex_id) -{ - struct prog_instruction *vpi; - GLuint tempregi = prog->NumTemporaries; - - prog->NumTemporaries++; - - for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) { - if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_HPOS) { - vpi->DstReg.File = PROGRAM_TEMPORARY; - vpi->DstReg.Index = tempregi; - } - } - - insert_wpos(prog, tempregi, tex_id); - - prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id); -} - -/** - * The fogcoord attribute is special in that only the first component - * is relevant, and the remaining components are always fixed (when read - * from by the fragment program) to yield an X001 pattern. - * - * We need to enforce this either in the vertex program or in the fragment - * program, and this code chooses not to enforce it in the vertex program. - * This is slightly cheaper, as long as the fragment program does not use - * weird swizzles. - * - * And it seems that usually, weird swizzles are not used, so... - * - * See also the counterpart rewriting for fragment programs. - */ -static void fog_as_texcoord(struct gl_program *prog, int tex_id) -{ - struct prog_instruction *vpi; - - vpi = prog->Instructions; - while (vpi->Opcode != OPCODE_END) { - if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_FOGC) { - vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id; - vpi->DstReg.WriteMask = WRITEMASK_X; - } - - ++vpi; - } - - prog->OutputsWritten &= ~(1 << VERT_RESULT_FOGC); - prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id); -} - -static int translateABS(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - - inst = &prog->Instructions[pos]; - - inst->Opcode = OPCODE_MAX; - inst->SrcReg[1] = inst->SrcReg[0]; - inst->SrcReg[1].Negate ^= NEGATE_XYZW; - - return 0; -} - -static int translateDP3(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - - inst = &prog->Instructions[pos]; - - inst->Opcode = OPCODE_DP4; - inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - return 0; -} - -static int translateDPH(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - - inst = &prog->Instructions[pos]; - - inst->Opcode = OPCODE_DP4; - inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); - - return 0; -} - -static int translateFLR(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - struct prog_dst_register dst; - int tmp_idx; - - tmp_idx = prog->NumTemporaries++; - - _mesa_insert_instructions(prog, pos + 1, 1); - - inst = &prog->Instructions[pos]; - dst = inst->DstReg; - - inst->Opcode = OPCODE_FRC; - inst->DstReg.File = PROGRAM_TEMPORARY; - inst->DstReg.Index = tmp_idx; - ++inst; - - inst->Opcode = OPCODE_ADD; - inst->DstReg = dst; - inst->SrcReg[0] = (inst-1)->SrcReg[0]; - inst->SrcReg[1].File = PROGRAM_TEMPORARY; - inst->SrcReg[1].Index = tmp_idx; - inst->SrcReg[1].Negate = NEGATE_XYZW; - - return 1; -} - -static int translateSUB(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - - inst = &prog->Instructions[pos]; - - inst->Opcode = OPCODE_ADD; - inst->SrcReg[1].Negate ^= NEGATE_XYZW; - - return 0; -} - -static int translateSWZ(struct gl_program *prog, int pos) -{ - prog->Instructions[pos].Opcode = OPCODE_MOV; - - return 0; -} - -static int translateXPD(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - int tmp_idx; - - tmp_idx = prog->NumTemporaries++; - - _mesa_insert_instructions(prog, pos + 1, 1); - - inst = &prog->Instructions[pos]; - - *(inst+1) = *inst; - - inst->Opcode = OPCODE_MUL; - inst->DstReg.File = PROGRAM_TEMPORARY; - inst->DstReg.Index = tmp_idx; - inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W); - inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W); - ++inst; - - inst->Opcode = OPCODE_MAD; - inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W); - inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W); - inst->SrcReg[1].Negate ^= NEGATE_XYZW; - inst->SrcReg[2].File = PROGRAM_TEMPORARY; - inst->SrcReg[2].Index = tmp_idx; - - return 1; -} - -static void translateInsts(struct gl_program *prog) -{ - struct prog_instruction *inst; - int i; - - for (i = 0; i < prog->NumInstructions; ++i) { - inst = &prog->Instructions[i]; - - switch (inst->Opcode) { - case OPCODE_ABS: - i += translateABS(prog, i); - break; - case OPCODE_DP3: - i += translateDP3(prog, i); - break; - case OPCODE_DPH: - i += translateDPH(prog, i); - break; - case OPCODE_FLR: - i += translateFLR(prog, i); - break; - case OPCODE_SUB: - i += translateSUB(prog, i); - break; - case OPCODE_SWZ: - i += translateSWZ(prog, i); - break; - case OPCODE_XPD: - i += translateXPD(prog, i); - break; - default: - break; - } - } -} - -#define ADD_OUTPUT(fp_attr, vp_result) \ - do { \ - if ((FpReads & (1 << (fp_attr))) && !(prog->OutputsWritten & (1 << (vp_result)))) { \ - OutputsAdded |= 1 << (vp_result); \ - count++; \ - } \ - } while (0) - -static void addArtificialOutputs(GLcontext *ctx, struct gl_program *prog) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - GLuint OutputsAdded, FpReads; - int i, count; - - OutputsAdded = 0; - count = 0; - FpReads = r300->selected_fp->InputsRead; - - ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0); - ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1); - - for (i = 0; i < 7; ++i) { - ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i); - } - - /* Some outputs may be artificially added, to match the inputs of the fragment program. - * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by - * vertex program are undefined, so just use MOV [vertex_result], CONST[0] - */ - if (count > 0) { - struct prog_instruction *inst; - - _mesa_insert_instructions(prog, prog->NumInstructions - 1, count); - inst = &prog->Instructions[prog->NumInstructions - 1 - count]; - - for (i = 0; i < VERT_RESULT_MAX; ++i) { - if (OutputsAdded & (1 << i)) { - inst->Opcode = OPCODE_MOV; - - inst->DstReg.File = PROGRAM_OUTPUT; - inst->DstReg.Index = i; - inst->DstReg.WriteMask = WRITEMASK_XYZW; - inst->DstReg.CondMask = COND_TR; - - inst->SrcReg[0].File = PROGRAM_CONSTANT; - inst->SrcReg[0].Index = 0; - inst->SrcReg[0].Swizzle = SWIZZLE_XYZW; - - ++inst; - } - } - - prog->OutputsWritten |= OutputsAdded; - } -} - -#undef ADD_OUTPUT - -static void nqssadceInit(struct nqssadce_state* s) -{ - r300ContextPtr r300 = (r300ContextPtr)(s->UserData); - GLuint fp_reads; - - fp_reads = r300->selected_fp->InputsRead; - { - if (fp_reads & FRAG_BIT_COL0) { - s->Outputs[VERT_RESULT_COL0].Sourced = WRITEMASK_XYZW; - s->Outputs[VERT_RESULT_BFC0].Sourced = WRITEMASK_XYZW; - } - - if (fp_reads & FRAG_BIT_COL1) { - s->Outputs[VERT_RESULT_COL1].Sourced = WRITEMASK_XYZW; - s->Outputs[VERT_RESULT_BFC1].Sourced = WRITEMASK_XYZW; - } - } - - { - int i; - for (i = 0; i < 8; ++i) { - if (fp_reads & FRAG_BIT_TEX(i)) { - s->Outputs[VERT_RESULT_TEX0 + i].Sourced = WRITEMASK_XYZW; - } - } - } - - s->Outputs[VERT_RESULT_HPOS].Sourced = WRITEMASK_XYZW; - if (s->Program->OutputsWritten & (1 << VERT_RESULT_PSIZ)) - s->Outputs[VERT_RESULT_PSIZ].Sourced = WRITEMASK_X; -} - -static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg) -{ - (void) opcode; - (void) reg; - - return GL_TRUE; -} static struct r300_vertex_program *build_program(GLcontext *ctx, struct r300_vertex_program_key *wanted_key, const struct gl_vertex_program *mesa_vp) { - r300ContextPtr r300 = R300_CONTEXT(ctx); struct r300_vertex_program *vp; - struct gl_vertex_program * glvp = (struct gl_vertex_program *) _mesa_clone_program(ctx, &mesa_vp->Base); - struct gl_program *prog; + struct r300_vertex_program_compiler compiler; vp = _mesa_calloc(sizeof(*vp)); - vp->Base = glvp; + vp->Base = (struct gl_vertex_program *) _mesa_clone_program(ctx, &mesa_vp->Base); _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key)); - prog = &glvp->Base; + rc_init(&compiler.Base); + compiler.Base.Debug = (RADEON_DEBUG & RADEON_VERTS) ? GL_TRUE : GL_FALSE; - if (RADEON_DEBUG & DEBUG_VERTS) { - fprintf(stderr, "Initial vertex program:\n"); - _mesa_print_program(prog); - fflush(stdout); - } + compiler.code = &vp->code; + compiler.RequiredOutputs = compute_required_outputs(vp->Base, vp->key.FpReads); + compiler.SetHwInputOutput = &t_inputs_outputs; - if (glvp->IsPositionInvariant) { - _mesa_insert_mvp_code(ctx, glvp); - } - - if (r300->selected_fp->code.wpos_attr != FRAG_ATTRIB_MAX) { - pos_as_texcoord(&glvp->Base, r300->selected_fp->code.wpos_attr - FRAG_ATTRIB_TEX0); + if (compiler.Base.Debug) { + fprintf(stderr, "Initial vertex program:\n"); + _mesa_print_program(&vp->Base->Base); + fflush(stderr); } - if (r300->selected_fp->code.fog_attr != FRAG_ATTRIB_MAX) { - fog_as_texcoord(&glvp->Base, r300->selected_fp->code.fog_attr - FRAG_ATTRIB_TEX0); + if (mesa_vp->IsPositionInvariant) { + _mesa_insert_mvp_code(ctx, vp->Base); } - addArtificialOutputs(ctx, prog); + rc_mesa_to_rc_program(&compiler.Base, &vp->Base->Base); - translateInsts(prog); + rc_move_output(&compiler.Base, VERT_RESULT_PSIZ, VERT_RESULT_PSIZ, WRITEMASK_X); - if (RADEON_DEBUG & DEBUG_VERTS) { - fprintf(stderr, "Vertex program after native rewrite:\n"); - _mesa_print_program(prog); - fflush(stdout); + if (vp->key.WPosAttr != FRAG_ATTRIB_MAX) { + rc_copy_output(&compiler.Base, + VERT_RESULT_HPOS, + vp->key.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0); } - { - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadceInit, - .IsNativeSwizzle = &swizzleIsNative, - .BuildSwizzle = NULL - }; - radeonNqssaDce(prog, &nqssadce, r300); - - /* We need this step for reusing temporary registers */ - _mesa_optimize_program(ctx, prog); - - if (RADEON_DEBUG & DEBUG_VERTS) { - fprintf(stderr, "Vertex program after NQSSADCE:\n"); - _mesa_print_program(prog); - fflush(stdout); - } + if (vp->key.FogAttr != FRAG_ATTRIB_MAX) { + rc_move_output(&compiler.Base, + VERT_RESULT_FOGC, + vp->key.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0, WRITEMASK_X); } - assert(prog->NumInstructions); - { - struct prog_instruction *inst; - int max, i, tmp; - - inst = prog->Instructions; - max = -1; - while (inst->Opcode != OPCODE_END) { - tmp = _mesa_num_inst_src_regs(inst->Opcode); - for (i = 0; i < tmp; ++i) { - if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) { - if ((int) inst->SrcReg[i].Index > max) { - max = inst->SrcReg[i].Index; - } - } - } - - if (_mesa_num_inst_dst_regs(inst->Opcode)) { - if (inst->DstReg.File == PROGRAM_TEMPORARY) { - if ((int) inst->DstReg.Index > max) { - max = inst->DstReg.Index; - } - } - } - ++inst; - } + r3xx_compile_vertex_program(&compiler); + vp->error = compiler.Base.Error; - /* We actually want highest index of used temporary register, - * not the number of temporaries used. - * These values aren't always the same. - */ - vp->num_temporaries = max + 1; - } + vp->Base->Base.InputsRead = vp->code.InputsRead; + vp->Base->Base.OutputsWritten = vp->code.OutputsWritten; - translate_vertex_program(vp, &glvp->Base); - - vp->InputsRead = glvp->Base.InputsRead; - vp->OutputsWritten = glvp->Base.OutputsWritten; + rc_destroy(&compiler.Base); return vp; } @@ -1645,8 +268,8 @@ struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx) vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; wanted_key.FpReads = r300->selected_fp->InputsRead; - wanted_key.FogAttr = r300->selected_fp->code.fog_attr; - wanted_key.WPosAttr = r300->selected_fp->code.wpos_attr; + wanted_key.FogAttr = r300->selected_fp->fog_attr; + wanted_key.WPosAttr = r300->selected_fp->wpos_attr; for (vp = vpc->progs; vp; vp = vp->next) { if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) @@ -1669,12 +292,14 @@ struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx) if(_nc>_p->vpu.count)_p->vpu.count=_nc; \ } while(0) -static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_shader_hw_code *code) +static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_program_code *code) { int i; assert((code->length > 0) && (code->length % 4 == 0)); + R300_STATECHANGE( r300, vap_flush ); + switch ((dest >> 8) & 0xf) { case 0: R300_STATECHANGE(r300, vpi); @@ -1712,16 +337,17 @@ void r300SetupVertexProgram(r300ContextPtr rmesa) ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0; ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0; + R300_STATECHANGE(rmesa, vap_flush); R300_STATECHANGE(rmesa, vpp); - param_count = r300VertexProgUpdateParams(ctx, prog->Base, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]); + param_count = r300VertexProgUpdateParams(ctx, prog, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]); bump_vpu_count(rmesa->hw.vpp.cmd, param_count); param_count /= 4; - r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->hw_code)); - inst_count = (prog->hw_code.length / 4) - 1; + r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->code)); + inst_count = (prog->code.length / 4) - 1; - r300VapCntl(rmesa, _mesa_bitcount(prog->InputsRead), - _mesa_bitcount(prog->OutputsWritten), prog->num_temporaries); + r300VapCntl(rmesa, _mesa_bitcount(prog->code.InputsRead), + _mesa_bitcount(prog->code.OutputsWritten), prog->code.num_temporaries); R300_STATECHANGE(rmesa, pvs); rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |