X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fr300%2Fcompiler%2Fr3xx_vertprog.c;h=0efd2c91e6aa99589c4e5c426bd784bb72f6adc7;hb=d1b4351e603522be11061522cb6b685da9ef1fee;hp=fc9c8f805ae0091751065bd4cc52d2356e385d10;hpb=73fc09a7bf5c63b595251dc10997891c72ecb119;p=mesa.git diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index fc9c8f805ae..0efd2c91e6a 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -22,14 +22,14 @@ #include "radeon_compiler.h" +#include + #include "../r300_reg.h" #include "radeon_nqssadce.h" #include "radeon_program.h" #include "radeon_program_alu.h" -#include "shader/prog_print.h" - /* * Take an already-setup and valid source then swizzle it appropriately to @@ -42,103 +42,82 @@ t_swizzle(y), \ t_swizzle(y), \ t_src_class(vpi->SrcReg[x].File), \ - NEGATE_NONE) | (vpi->SrcReg[x].RelAddr << 4)) + RC_MASK_NONE) | (vpi->SrcReg[x].RelAddr << 4)) -static unsigned long t_dst_mask(GLuint mask) +static unsigned long t_dst_mask(unsigned int mask) { - /* WRITEMASK_* is equivalent to VSF_FLAG_* */ - return mask & WRITEMASK_XYZW; + /* RC_MASK_* is equivalent to VSF_FLAG_* */ + return mask & RC_MASK_XYZW; } -static unsigned long t_dst_class(gl_register_file file) +static unsigned long t_dst_class(rc_register_file file) { - switch (file) { - case PROGRAM_TEMPORARY: + default: + fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file); + /* fall-through */ + case RC_FILE_TEMPORARY: return PVS_DST_REG_TEMPORARY; - case PROGRAM_OUTPUT: + case RC_FILE_OUTPUT: return PVS_DST_REG_OUT; - case PROGRAM_ADDRESS: + case RC_FILE_ADDRESS: return PVS_DST_REG_A0; - /* - case PROGRAM_INPUT: - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_NAMED_PARAM: - case PROGRAM_STATE_VAR: - case PROGRAM_WRITE_ONLY: - case PROGRAM_ADDRESS: - */ - default: - fprintf(stderr, "problem in %s", __FUNCTION__); - _mesa_exit(-1); - return -1; } } static unsigned long t_dst_index(struct r300_vertex_program_code *vp, - struct prog_dst_register *dst) + struct rc_dst_register *dst) { - if (dst->File == PROGRAM_OUTPUT) + if (dst->File == RC_FILE_OUTPUT) return vp->outputs[dst->Index]; return dst->Index; } -static unsigned long t_src_class(gl_register_file file) +static unsigned long t_src_class(rc_register_file file) { switch (file) { - case PROGRAM_TEMPORARY: + default: + fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file); + /* fall-through */ + case RC_FILE_TEMPORARY: return PVS_SRC_REG_TEMPORARY; - case PROGRAM_INPUT: + case RC_FILE_INPUT: return PVS_SRC_REG_INPUT; - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_NAMED_PARAM: - case PROGRAM_CONSTANT: - case PROGRAM_STATE_VAR: + case RC_FILE_CONSTANT: return PVS_SRC_REG_CONSTANT; - /* - case PROGRAM_OUTPUT: - case PROGRAM_WRITE_ONLY: - case PROGRAM_ADDRESS: - */ - default: - fprintf(stderr, "problem in %s", __FUNCTION__); - _mesa_exit(-1); - return -1; } } -static GLboolean t_src_conflict(struct prog_src_register a, struct prog_src_register b) +static int t_src_conflict(struct rc_src_register a, struct rc_src_register b) { unsigned long aclass = t_src_class(a.File); unsigned long bclass = t_src_class(b.File); if (aclass != bclass) - return GL_FALSE; + return 0; if (aclass == PVS_SRC_REG_TEMPORARY) - return GL_FALSE; + return 0; if (a.RelAddr || b.RelAddr) - return GL_TRUE; + return 1; if (a.Index != b.Index) - return GL_TRUE; + return 1; - return GL_FALSE; + return 0; } -static INLINE unsigned long t_swizzle(GLubyte swizzle) +static inline unsigned long t_swizzle(unsigned int swizzle) { - /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ + /* this is in fact a NOP as the Mesa RC_SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ return swizzle; } static unsigned long t_src_index(struct r300_vertex_program_code *vp, - struct prog_src_register *src) + struct rc_src_register *src) { - if (src->File == PROGRAM_INPUT) { + if (src->File == RC_FILE_INPUT) { assert(vp->inputs[src->Index] != -1); return vp->inputs[src->Index]; } else { @@ -154,9 +133,9 @@ static unsigned long t_src_index(struct r300_vertex_program_code *vp, /* these two functions should probably be merged... */ static unsigned long t_src(struct r300_vertex_program_code *vp, - struct prog_src_register *src) + struct rc_src_register *src) { - /* src->Negate uses the NEGATE_ flags from program_instruction.h, + /* src->Negate uses the RC_MASK_ flags from program_instruction.h, * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. */ return PVS_SRC_OPERAND(t_src_index(vp, src), @@ -169,9 +148,9 @@ static unsigned long t_src(struct r300_vertex_program_code *vp, } static unsigned long t_src_scalar(struct r300_vertex_program_code *vp, - struct prog_src_register *src) + struct rc_src_register *src) { - /* src->Negate uses the NEGATE_ flags from program_instruction.h, + /* src->Negate uses the RC_MASK_ flags from program_instruction.h, * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. */ return PVS_SRC_OPERAND(t_src_index(vp, src), @@ -180,79 +159,79 @@ static unsigned long t_src_scalar(struct r300_vertex_program_code *vp, t_swizzle(GET_SWZ(src->Swizzle, 0)), t_swizzle(GET_SWZ(src->Swizzle, 0)), t_src_class(src->File), - src->Negate ? NEGATE_XYZW : NEGATE_NONE) | + src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) | (src->RelAddr << 4); } -static GLboolean valid_dst(struct r300_vertex_program_code *vp, - struct prog_dst_register *dst) +static int valid_dst(struct r300_vertex_program_code *vp, + struct rc_dst_register *dst) { - if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) { - return GL_FALSE; - } else if (dst->File == PROGRAM_ADDRESS) { + if (dst->File == RC_FILE_OUTPUT && vp->outputs[dst->Index] == -1) { + return 0; + } else if (dst->File == RC_FILE_ADDRESS) { assert(dst->Index == 0); } - return GL_TRUE; + return 1; } static void ei_vector1(struct r300_vertex_program_code *vp, - GLuint hw_opcode, - struct prog_instruction *vpi, - GLuint * inst) + unsigned int hw_opcode, + struct rc_sub_instruction *vpi, + unsigned int * inst) { inst[0] = PVS_OP_DST_OPERAND(hw_opcode, - GL_FALSE, - GL_FALSE, + 0, + 0, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); inst[1] = t_src(vp, &vpi->SrcReg[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); + inst[2] = __CONST(0, RC_SWIZZLE_ZERO); + inst[3] = __CONST(0, RC_SWIZZLE_ZERO); } static void ei_vector2(struct r300_vertex_program_code *vp, - GLuint hw_opcode, - struct prog_instruction *vpi, - GLuint * inst) + unsigned int hw_opcode, + struct rc_sub_instruction *vpi, + unsigned int * inst) { inst[0] = PVS_OP_DST_OPERAND(hw_opcode, - GL_FALSE, - GL_FALSE, + 0, + 0, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); inst[1] = t_src(vp, &vpi->SrcReg[0]); inst[2] = t_src(vp, &vpi->SrcReg[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); + inst[3] = __CONST(1, RC_SWIZZLE_ZERO); } static void ei_math1(struct r300_vertex_program_code *vp, - GLuint hw_opcode, - struct prog_instruction *vpi, - GLuint * inst) + unsigned int hw_opcode, + struct rc_sub_instruction *vpi, + unsigned int * inst) { inst[0] = PVS_OP_DST_OPERAND(hw_opcode, - GL_TRUE, - GL_FALSE, + 1, + 0, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); + inst[2] = __CONST(0, RC_SWIZZLE_ZERO); + inst[3] = __CONST(0, RC_SWIZZLE_ZERO); } static void ei_lit(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst) + struct rc_sub_instruction *vpi, + unsigned int * inst) { //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX, - GL_TRUE, - GL_FALSE, + 1, + 0, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); @@ -262,51 +241,91 @@ static void ei_lit(struct r300_vertex_program_code *vp, PVS_SRC_SELECT_FORCE_0, // Z t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y t_src_class(vpi->SrcReg[0].File), - vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | (vpi->SrcReg[0].RelAddr << 4); inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W PVS_SRC_SELECT_FORCE_0, // Z t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X t_src_class(vpi->SrcReg[0].File), - vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | (vpi->SrcReg[0].RelAddr << 4); inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X PVS_SRC_SELECT_FORCE_0, // Z t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W t_src_class(vpi->SrcReg[0].File), - vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | (vpi->SrcReg[0].RelAddr << 4); } static void ei_mad(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst) + struct rc_sub_instruction *vpi, + unsigned int * inst) { - inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, - GL_FALSE, - GL_TRUE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); + /* Remarks about hardware limitations of MAD + * (please preserve this comment, as this information is _NOT_ + * in the documentation provided by AMD). + * + * As described in the documentation, MAD with three unique temporary + * source registers requires the use of the macro version. + * + * However (and this is not mentioned in the documentation), apparently + * the macro version is _NOT_ a full superset of the normal version. + * In particular, the macro version does not always work when relative + * addressing is used in the source operands. + * + * This limitation caused incorrect rendering in Sauerbraten's OpenGL + * assembly shader path when using medium quality animations + * (i.e. animations with matrix blending instead of quaternion blending). + * + * Unfortunately, I (nha) have been unable to extract a Piglit regression + * test for this issue - for some reason, it is possible to have vertex + * programs whose prefix is *exactly* the same as the prefix of the + * offending program in Sauerbraten up to the offending instruction + * without causing any trouble. + * + * Bottom line: Only use the macro version only when really necessary; + * according to AMD docs, this should improve performance by one clock + * as a nice side bonus. + */ + if (vpi->SrcReg[0].File == RC_FILE_TEMPORARY && + vpi->SrcReg[1].File == RC_FILE_TEMPORARY && + vpi->SrcReg[2].File == RC_FILE_TEMPORARY && + vpi->SrcReg[0].Index != vpi->SrcReg[1].Index && + vpi->SrcReg[0].Index != vpi->SrcReg[2].Index && + vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) { + inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, + 0, + 1, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + } else { + inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, + 0, + 0, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + } inst[1] = t_src(vp, &vpi->SrcReg[0]); inst[2] = t_src(vp, &vpi->SrcReg[1]); inst[3] = t_src(vp, &vpi->SrcReg[2]); } static void ei_pow(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst) + struct rc_sub_instruction *vpi, + unsigned int * inst) { inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF, - GL_TRUE, - GL_FALSE, + 1, + 0, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[2] = __CONST(0, RC_SWIZZLE_ZERO); inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]); } @@ -321,8 +340,8 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi compiler->SetHwInputOutput(compiler); for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) { - struct prog_instruction *vpi = &rci->I; - GLuint *inst = compiler->code->body.d + compiler->code->length; + struct rc_sub_instruction *vpi = &rci->I; + unsigned int *inst = compiler->code->body.d + compiler->code->length; /* Skip instructions writing to non-existing destination */ if (!valid_dst(compiler->code, &vpi->DstReg)) @@ -334,26 +353,26 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi } switch (vpi->Opcode) { - case OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break; - case OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break; - case OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break; - case OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break; - case OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break; - case OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break; - case OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break; - case OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break; - case OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break; - case OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break; - case OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break; - case OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break; - case OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break; - case OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break; - case OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break; - case OPCODE_POW: ei_pow(compiler->code, vpi, inst); break; - case OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break; - case OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break; - case OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break; - case OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break; + case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break; + case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break; + case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break; + case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break; + case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break; + case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break; + case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break; + case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break; + case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break; + case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break; + case RC_OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break; + case RC_OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break; + case RC_OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break; + case RC_OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break; + case RC_OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break; + case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break; + case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break; + case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break; + case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break; + case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break; default: rc_error(&compiler->Base, "Unknown opcode %i\n", vpi->Opcode); return; @@ -367,36 +386,35 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi } struct temporary_allocation { - GLuint Allocated:1; - GLuint HwTemp:15; + unsigned int Allocated:1; + unsigned int HwTemp:15; struct rc_instruction * LastRead; }; static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler) { struct rc_instruction *inst; - GLuint num_orig_temps = 0; - GLboolean hwtemps[VSF_MAX_FRAGMENT_TEMPS]; + unsigned int num_orig_temps = 0; + char hwtemps[VSF_MAX_FRAGMENT_TEMPS]; struct temporary_allocation * ta; - GLuint i, j; + unsigned int i, j; compiler->code->num_temporaries = 0; memset(hwtemps, 0, sizeof(hwtemps)); /* Pass 1: Count original temporaries and allocate structures */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { - GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); - GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); - for (i = 0; i < numsrcs; ++i) { - if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) { + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->I.SrcReg[i].File == RC_FILE_TEMPORARY) { if (inst->I.SrcReg[i].Index >= num_orig_temps) num_orig_temps = inst->I.SrcReg[i].Index + 1; } } - if (numdsts) { - if (inst->I.DstReg.File == PROGRAM_TEMPORARY) { + if (opcode->HasDstReg) { + if (inst->I.DstReg.File == RC_FILE_TEMPORARY) { if (inst->I.DstReg.Index >= num_orig_temps) num_orig_temps = inst->I.DstReg.Index + 1; } @@ -409,32 +427,31 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c /* Pass 2: Determine original temporary lifetimes */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { - GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); - for (i = 0; i < numsrcs; ++i) { - if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->I.SrcReg[i].File == RC_FILE_TEMPORARY) ta[inst->I.SrcReg[i].Index].LastRead = inst; } } /* Pass 3: Register allocation */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { - GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); - GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); - for (i = 0; i < numsrcs; ++i) { - if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) { - GLuint orig = inst->I.SrcReg[i].Index; + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->I.SrcReg[i].File == RC_FILE_TEMPORARY) { + unsigned int orig = inst->I.SrcReg[i].Index; inst->I.SrcReg[i].Index = ta[orig].HwTemp; if (ta[orig].Allocated && inst == ta[orig].LastRead) - hwtemps[ta[orig].HwTemp] = GL_FALSE; + hwtemps[ta[orig].HwTemp] = 0; } } - if (numdsts) { - if (inst->I.DstReg.File == PROGRAM_TEMPORARY) { - GLuint orig = inst->I.DstReg.Index; + if (opcode->HasDstReg) { + if (inst->I.DstReg.File == RC_FILE_TEMPORARY) { + unsigned int orig = inst->I.DstReg.Index; if (!ta[orig].Allocated) { for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) { @@ -444,9 +461,9 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c if (j >= VSF_MAX_FRAGMENT_TEMPS) { fprintf(stderr, "Out of hw temporaries\n"); } else { - ta[orig].Allocated = GL_TRUE; + ta[orig].Allocated = 1; ta[orig].HwTemp = j; - hwtemps[j] = GL_TRUE; + hwtemps[j] = 1; if (j >= compiler->code->num_temporaries) compiler->code->num_temporaries = j + 1; @@ -464,45 +481,45 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c * Vertex engine cannot read two inputs or two constants at the same time. * Introduce intermediate MOVs to temporary registers to account for this. */ -static GLboolean transform_source_conflicts( +static int transform_source_conflicts( struct radeon_compiler *c, struct rc_instruction* inst, void* unused) { - GLuint num_operands = _mesa_num_inst_src_regs(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); - if (num_operands == 3) { + if (opcode->NumSrcRegs == 3) { if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[2]) || t_src_conflict(inst->I.SrcReg[0], inst->I.SrcReg[2])) { int tmpreg = rc_find_free_temporary(c); struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); - inst_mov->I.Opcode = OPCODE_MOV; - inst_mov->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mov->I.Opcode = RC_OPCODE_MOV; + inst_mov->I.DstReg.File = RC_FILE_TEMPORARY; inst_mov->I.DstReg.Index = tmpreg; inst_mov->I.SrcReg[0] = inst->I.SrcReg[2]; reset_srcreg(&inst->I.SrcReg[2]); - inst->I.SrcReg[2].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[2].File = RC_FILE_TEMPORARY; inst->I.SrcReg[2].Index = tmpreg; } } - if (num_operands >= 2) { + if (opcode->NumSrcRegs >= 2) { if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[0])) { int tmpreg = rc_find_free_temporary(c); struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); - inst_mov->I.Opcode = OPCODE_MOV; - inst_mov->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mov->I.Opcode = RC_OPCODE_MOV; + inst_mov->I.DstReg.File = RC_FILE_TEMPORARY; inst_mov->I.DstReg.Index = tmpreg; inst_mov->I.SrcReg[0] = inst->I.SrcReg[1]; reset_srcreg(&inst->I.SrcReg[1]); - inst->I.SrcReg[1].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[1].File = RC_FILE_TEMPORARY; inst->I.SrcReg[1].Index = tmpreg; } } - return GL_TRUE; + return 1; } static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler) @@ -513,15 +530,15 @@ static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler) if ((compiler->RequiredOutputs & (1 << i)) && !(compiler->Base.Program.OutputsWritten & (1 << i))) { struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev); - inst->I.Opcode = OPCODE_MOV; + inst->I.Opcode = RC_OPCODE_MOV; - inst->I.DstReg.File = PROGRAM_OUTPUT; + inst->I.DstReg.File = RC_FILE_OUTPUT; inst->I.DstReg.Index = i; - inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + inst->I.DstReg.WriteMask = RC_MASK_XYZW; - inst->I.SrcReg[0].File = PROGRAM_CONSTANT; + inst->I.SrcReg[0].File = RC_FILE_CONSTANT; inst->I.SrcReg[0].Index = 0; - inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW; + inst->I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; compiler->Base.Program.OutputsWritten |= 1 << i; } @@ -533,18 +550,18 @@ static void nqssadceInit(struct nqssadce_state* s) struct r300_vertex_program_compiler * compiler = s->UserData; int i; - for(i = 0; i < VERT_RESULT_MAX; ++i) { + for(i = 0; i < 32; ++i) { if (compiler->RequiredOutputs & (1 << i)) - s->Outputs[i].Sourced = WRITEMASK_XYZW; + s->Outputs[i].Sourced = RC_MASK_XYZW; } } -static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg) +static int swizzleIsNative(rc_opcode opcode, struct rc_src_register reg) { (void) opcode; (void) reg; - return GL_TRUE; + return 1; } @@ -563,7 +580,7 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) if (compiler->Base.Debug) { fprintf(stderr, "Vertex program after native rewrite:\n"); rc_print_program(&compiler->Base.Program); - fflush(stdout); + fflush(stderr); } { @@ -580,7 +597,7 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) if (compiler->Base.Debug) { fprintf(stderr, "Vertex program after source conflict resolve:\n"); rc_print_program(&compiler->Base.Program); - fflush(stdout); + fflush(stderr); } { @@ -597,7 +614,7 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) if (compiler->Base.Debug) { fprintf(stderr, "Vertex program after NQSSADCE:\n"); rc_print_program(&compiler->Base.Program); - fflush(stdout); + fflush(stderr); } } @@ -609,7 +626,7 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) compiler->code->OutputsWritten = compiler->Base.Program.OutputsWritten; if (compiler->Base.Debug) { - printf("Final vertex program code:\n"); + fprintf(stderr, "Final vertex program code:\n"); r300_vertex_program_dump(compiler->code); } }