#include "radeon_compiler.h"
+#include <stdio.h>
+
#include "../r300_reg.h"
-#include "radeon_nqssadce.h"
-#include "radeon_program.h"
+#include "radeon_dataflow.h"
#include "radeon_program_alu.h"
-
-#include "shader/prog_print.h"
-
+#include "radeon_swizzle.h"
+#include "radeon_emulate_branches.h"
+#include "radeon_emulate_loops.h"
/*
* Take an already-setup and valid source then swizzle it appropriately to
t_swizzle(y), \
t_swizzle(y), \
t_src_class(vpi->SrcReg[x].File), \
- NEGATE_NONE) | (vpi->SrcReg[x].RelAddr << 4))
+ RC_MASK_NONE) | (vpi->SrcReg[x].RelAddr << 4))
-static unsigned long t_dst_mask(GLuint mask)
+static unsigned long t_dst_mask(unsigned int mask)
{
- /* WRITEMASK_* is equivalent to VSF_FLAG_* */
- return mask & WRITEMASK_XYZW;
+ /* RC_MASK_* is equivalent to VSF_FLAG_* */
+ return mask & RC_MASK_XYZW;
}
-static unsigned long t_dst_class(gl_register_file file)
+static unsigned long t_dst_class(rc_register_file file)
{
-
switch (file) {
- case PROGRAM_TEMPORARY:
+ default:
+ fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file);
+ /* fall-through */
+ case RC_FILE_TEMPORARY:
return PVS_DST_REG_TEMPORARY;
- case PROGRAM_OUTPUT:
+ case RC_FILE_OUTPUT:
return PVS_DST_REG_OUT;
- case PROGRAM_ADDRESS:
+ case RC_FILE_ADDRESS:
return PVS_DST_REG_A0;
- /*
- case PROGRAM_INPUT:
- case PROGRAM_LOCAL_PARAM:
- case PROGRAM_ENV_PARAM:
- case PROGRAM_NAMED_PARAM:
- case PROGRAM_STATE_VAR:
- case PROGRAM_WRITE_ONLY:
- case PROGRAM_ADDRESS:
- */
- default:
- fprintf(stderr, "problem in %s", __FUNCTION__);
- _mesa_exit(-1);
- return -1;
}
}
static unsigned long t_dst_index(struct r300_vertex_program_code *vp,
- struct prog_dst_register *dst)
+ struct rc_dst_register *dst)
{
- if (dst->File == PROGRAM_OUTPUT)
+ if (dst->File == RC_FILE_OUTPUT)
return vp->outputs[dst->Index];
return dst->Index;
}
-static unsigned long t_src_class(gl_register_file file)
+static unsigned long t_src_class(rc_register_file file)
{
switch (file) {
- case PROGRAM_TEMPORARY:
+ default:
+ fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file);
+ /* fall-through */
+ case RC_FILE_NONE:
+ case RC_FILE_TEMPORARY:
return PVS_SRC_REG_TEMPORARY;
- case PROGRAM_INPUT:
+ case RC_FILE_INPUT:
return PVS_SRC_REG_INPUT;
- case PROGRAM_LOCAL_PARAM:
- case PROGRAM_ENV_PARAM:
- case PROGRAM_NAMED_PARAM:
- case PROGRAM_CONSTANT:
- case PROGRAM_STATE_VAR:
+ case RC_FILE_CONSTANT:
return PVS_SRC_REG_CONSTANT;
- /*
- case PROGRAM_OUTPUT:
- case PROGRAM_WRITE_ONLY:
- case PROGRAM_ADDRESS:
- */
- default:
- fprintf(stderr, "problem in %s", __FUNCTION__);
- _mesa_exit(-1);
- return -1;
}
}
-static GLboolean t_src_conflict(struct prog_src_register a, struct prog_src_register b)
+static int t_src_conflict(struct rc_src_register a, struct rc_src_register b)
{
unsigned long aclass = t_src_class(a.File);
unsigned long bclass = t_src_class(b.File);
if (aclass != bclass)
- return GL_FALSE;
+ return 0;
if (aclass == PVS_SRC_REG_TEMPORARY)
- return GL_FALSE;
+ return 0;
if (a.RelAddr || b.RelAddr)
- return GL_TRUE;
+ return 1;
if (a.Index != b.Index)
- return GL_TRUE;
+ return 1;
- return GL_FALSE;
+ return 0;
}
-static INLINE unsigned long t_swizzle(GLubyte swizzle)
+static inline unsigned long t_swizzle(unsigned int swizzle)
{
- /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
+ /* this is in fact a NOP as the Mesa RC_SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
return swizzle;
}
static unsigned long t_src_index(struct r300_vertex_program_code *vp,
- struct prog_src_register *src)
+ struct rc_src_register *src)
{
- if (src->File == PROGRAM_INPUT) {
+ if (src->File == RC_FILE_INPUT) {
assert(vp->inputs[src->Index] != -1);
return vp->inputs[src->Index];
} else {
/* these two functions should probably be merged... */
static unsigned long t_src(struct r300_vertex_program_code *vp,
- struct prog_src_register *src)
+ struct rc_src_register *src)
{
- /* src->Negate uses the NEGATE_ flags from program_instruction.h,
+ /* src->Negate uses the RC_MASK_ flags from program_instruction.h,
* which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
*/
return PVS_SRC_OPERAND(t_src_index(vp, src),
t_swizzle(GET_SWZ(src->Swizzle, 2)),
t_swizzle(GET_SWZ(src->Swizzle, 3)),
t_src_class(src->File),
- src->Negate) | (src->RelAddr << 4);
+ src->Negate) |
+ (src->RelAddr << 4) | (src->Abs << 3);
}
static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
- struct prog_src_register *src)
+ struct rc_src_register *src)
{
- /* src->Negate uses the NEGATE_ flags from program_instruction.h,
+ /* src->Negate uses the RC_MASK_ flags from program_instruction.h,
* which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
*/
return PVS_SRC_OPERAND(t_src_index(vp, src),
t_swizzle(GET_SWZ(src->Swizzle, 0)),
t_swizzle(GET_SWZ(src->Swizzle, 0)),
t_src_class(src->File),
- src->Negate ? NEGATE_XYZW : NEGATE_NONE) |
- (src->RelAddr << 4);
+ src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
+ (src->RelAddr << 4) | (src->Abs << 3);
}
-static GLboolean valid_dst(struct r300_vertex_program_code *vp,
- struct prog_dst_register *dst)
+static int valid_dst(struct r300_vertex_program_code *vp,
+ struct rc_dst_register *dst)
{
- if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
- return GL_FALSE;
- } else if (dst->File == PROGRAM_ADDRESS) {
+ if (dst->File == RC_FILE_OUTPUT && vp->outputs[dst->Index] == -1) {
+ return 0;
+ } else if (dst->File == RC_FILE_ADDRESS) {
assert(dst->Index == 0);
}
- return GL_TRUE;
+ return 1;
}
static void ei_vector1(struct r300_vertex_program_code *vp,
- GLuint hw_opcode,
- struct prog_instruction *vpi,
- GLuint * inst)
+ unsigned int hw_opcode,
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
{
inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
- GL_FALSE,
- GL_FALSE,
+ 0,
+ 0,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
inst[1] = t_src(vp, &vpi->SrcReg[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
+ inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
+ inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
}
static void ei_vector2(struct r300_vertex_program_code *vp,
- GLuint hw_opcode,
- struct prog_instruction *vpi,
- GLuint * inst)
+ unsigned int hw_opcode,
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
{
inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
- GL_FALSE,
- GL_FALSE,
+ 0,
+ 0,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
inst[1] = t_src(vp, &vpi->SrcReg[0]);
inst[2] = t_src(vp, &vpi->SrcReg[1]);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
+ inst[3] = __CONST(1, RC_SWIZZLE_ZERO);
}
static void ei_math1(struct r300_vertex_program_code *vp,
- GLuint hw_opcode,
- struct prog_instruction *vpi,
- GLuint * inst)
+ unsigned int hw_opcode,
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
{
inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
- GL_TRUE,
- GL_FALSE,
+ 1,
+ 0,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
+ inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
+ inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
}
static void ei_lit(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst)
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
{
//LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
- GL_TRUE,
- GL_FALSE,
+ 1,
+ 0,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
PVS_SRC_SELECT_FORCE_0, // Z
t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
t_src_class(vpi->SrcReg[0].File),
- vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
(vpi->SrcReg[0].RelAddr << 4);
inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
PVS_SRC_SELECT_FORCE_0, // Z
t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
t_src_class(vpi->SrcReg[0].File),
- vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
(vpi->SrcReg[0].RelAddr << 4);
inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
PVS_SRC_SELECT_FORCE_0, // Z
t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
t_src_class(vpi->SrcReg[0].File),
- vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
(vpi->SrcReg[0].RelAddr << 4);
}
static void ei_mad(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst)
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
{
- inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
- GL_FALSE,
- GL_TRUE,
- t_dst_index(vp, &vpi->DstReg),
- t_dst_mask(vpi->DstReg.WriteMask),
- t_dst_class(vpi->DstReg.File));
+ /* Remarks about hardware limitations of MAD
+ * (please preserve this comment, as this information is _NOT_
+ * in the documentation provided by AMD).
+ *
+ * As described in the documentation, MAD with three unique temporary
+ * source registers requires the use of the macro version.
+ *
+ * However (and this is not mentioned in the documentation), apparently
+ * the macro version is _NOT_ a full superset of the normal version.
+ * In particular, the macro version does not always work when relative
+ * addressing is used in the source operands.
+ *
+ * This limitation caused incorrect rendering in Sauerbraten's OpenGL
+ * assembly shader path when using medium quality animations
+ * (i.e. animations with matrix blending instead of quaternion blending).
+ *
+ * Unfortunately, I (nha) have been unable to extract a Piglit regression
+ * test for this issue - for some reason, it is possible to have vertex
+ * programs whose prefix is *exactly* the same as the prefix of the
+ * offending program in Sauerbraten up to the offending instruction
+ * without causing any trouble.
+ *
+ * Bottom line: Only use the macro version only when really necessary;
+ * according to AMD docs, this should improve performance by one clock
+ * as a nice side bonus.
+ */
+ if (vpi->SrcReg[0].File == RC_FILE_TEMPORARY &&
+ vpi->SrcReg[1].File == RC_FILE_TEMPORARY &&
+ vpi->SrcReg[2].File == RC_FILE_TEMPORARY &&
+ vpi->SrcReg[0].Index != vpi->SrcReg[1].Index &&
+ vpi->SrcReg[0].Index != vpi->SrcReg[2].Index &&
+ vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) {
+ inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
+ 0,
+ 1,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ } else {
+ inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
+ 0,
+ 0,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ }
inst[1] = t_src(vp, &vpi->SrcReg[0]);
inst[2] = t_src(vp, &vpi->SrcReg[1]);
inst[3] = t_src(vp, &vpi->SrcReg[2]);
}
static void ei_pow(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst)
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
{
inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
- GL_TRUE,
- GL_FALSE,
+ 1,
+ 0,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
}
-static void t_inputs_outputs(struct r300_vertex_program_compiler * c)
-{
- int i;
- int cur_reg;
- GLuint OutputsWritten, InputsRead;
-
- OutputsWritten = c->Base.Program.OutputsWritten;
- InputsRead = c->Base.Program.InputsRead;
-
- cur_reg = -1;
- for (i = 0; i < VERT_ATTRIB_MAX; i++) {
- if (InputsRead & (1 << i))
- c->code->inputs[i] = ++cur_reg;
- else
- c->code->inputs[i] = -1;
- }
-
- cur_reg = 0;
- for (i = 0; i < VERT_RESULT_MAX; i++)
- c->code->outputs[i] = -1;
-
- assert(OutputsWritten & (1 << VERT_RESULT_HPOS));
-
- if (OutputsWritten & (1 << VERT_RESULT_HPOS)) {
- c->code->outputs[VERT_RESULT_HPOS] = cur_reg++;
- }
-
- if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
- c->code->outputs[VERT_RESULT_PSIZ] = cur_reg++;
- }
-
- /* If we're writing back facing colors we need to send
- * four colors to make front/back face colors selection work.
- * If the vertex program doesn't write all 4 colors, lets
- * pretend it does by skipping output index reg so the colors
- * get written into appropriate output vectors.
- */
- if (OutputsWritten & (1 << VERT_RESULT_COL0)) {
- c->code->outputs[VERT_RESULT_COL0] = cur_reg++;
- } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
- OutputsWritten & (1 << VERT_RESULT_BFC1)) {
- cur_reg++;
- }
-
- if (OutputsWritten & (1 << VERT_RESULT_COL1)) {
- c->code->outputs[VERT_RESULT_COL1] = cur_reg++;
- } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
- OutputsWritten & (1 << VERT_RESULT_BFC1)) {
- cur_reg++;
- }
-
- if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
- c->code->outputs[VERT_RESULT_BFC0] = cur_reg++;
- } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
- cur_reg++;
- }
-
- if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
- c->code->outputs[VERT_RESULT_BFC1] = cur_reg++;
- } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
- cur_reg++;
- }
-
- for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
- if (OutputsWritten & (1 << i)) {
- c->code->outputs[i] = cur_reg++;
- }
- }
-
- if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
- c->code->outputs[VERT_RESULT_FOGC] = cur_reg++;
- }
-}
static void translate_vertex_program(struct r300_vertex_program_compiler * compiler)
{
compiler->code->pos_end = 0; /* Not supported yet */
compiler->code->length = 0;
- t_inputs_outputs(compiler);
+ compiler->SetHwInputOutput(compiler);
for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
- struct prog_instruction *vpi = &rci->I;
- GLuint *inst = compiler->code->body.d + compiler->code->length;
+ struct rc_sub_instruction *vpi = &rci->U.I;
+ unsigned int *inst = compiler->code->body.d + compiler->code->length;
/* Skip instructions writing to non-existing destination */
if (!valid_dst(compiler->code, &vpi->DstReg))
continue;
- if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) {
+ if (compiler->code->length >= R500_VS_MAX_ALU_DWORDS ||
+ (compiler->code->length >= R300_VS_MAX_ALU_DWORDS && !compiler->Base.is_r500)) {
rc_error(&compiler->Base, "Vertex program has too many instructions\n");
return;
}
+ assert(compiler->Base.is_r500 ||
+ (vpi->Opcode != RC_OPCODE_SEQ &&
+ vpi->Opcode != RC_OPCODE_SNE));
+
switch (vpi->Opcode) {
- case OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
- case OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
- case OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
- case OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
- case OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
- case OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
- case OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
- case OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
- case OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
- case OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
- case OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break;
- case OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break;
- case OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break;
- case OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break;
- case OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break;
- case OPCODE_POW: ei_pow(compiler->code, vpi, inst); break;
- case OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break;
- case OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
- case OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
- case OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
+ case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
+ case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
+ case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break;
+ case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
+ case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
+ case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
+ case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
+ case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
+ case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
+ case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
+ case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
+ case RC_OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break;
+ case RC_OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break;
+ case RC_OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break;
+ case RC_OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break;
+ case RC_OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break;
+ case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break;
+ case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break;
+ case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
+ case RC_OPCODE_SEQ: ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst); break;
+ case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
+ case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break;
+ case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
+ case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break;
default:
- rc_error(&compiler->Base, "Unknown opcode %i\n", vpi->Opcode);
+ rc_error(&compiler->Base, "Unknown opcode %s\n", rc_get_opcode_info(vpi->Opcode)->Name);
return;
}
}
struct temporary_allocation {
- GLuint Allocated:1;
- GLuint HwTemp:15;
+ unsigned int Allocated:1;
+ unsigned int HwTemp:15;
struct rc_instruction * LastRead;
};
static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler)
{
struct rc_instruction *inst;
- GLuint num_orig_temps = 0;
- GLboolean hwtemps[VSF_MAX_FRAGMENT_TEMPS];
+ unsigned int num_orig_temps = 0;
+ char hwtemps[R300_VS_MAX_TEMPS];
struct temporary_allocation * ta;
- GLuint i, j;
+ unsigned int i, j;
compiler->code->num_temporaries = 0;
memset(hwtemps, 0, sizeof(hwtemps));
/* Pass 1: Count original temporaries and allocate structures */
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
- GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
- GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode);
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
- for (i = 0; i < numsrcs; ++i) {
- if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) {
- if (inst->I.SrcReg[i].Index >= num_orig_temps)
- num_orig_temps = inst->I.SrcReg[i].Index + 1;
+ for (i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+ if (inst->U.I.SrcReg[i].Index >= num_orig_temps)
+ num_orig_temps = inst->U.I.SrcReg[i].Index + 1;
}
}
- if (numdsts) {
- if (inst->I.DstReg.File == PROGRAM_TEMPORARY) {
- if (inst->I.DstReg.Index >= num_orig_temps)
- num_orig_temps = inst->I.DstReg.Index + 1;
+ if (opcode->HasDstReg) {
+ if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
+ if (inst->U.I.DstReg.Index >= num_orig_temps)
+ num_orig_temps = inst->U.I.DstReg.Index + 1;
}
}
}
/* Pass 2: Determine original temporary lifetimes */
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
- GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
- for (i = 0; i < numsrcs; ++i) {
- if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY)
- ta[inst->I.SrcReg[i].Index].LastRead = inst;
+ for (i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY)
+ ta[inst->U.I.SrcReg[i].Index].LastRead = inst;
}
}
/* Pass 3: Register allocation */
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
- GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
- GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode);
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
- for (i = 0; i < numsrcs; ++i) {
- if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) {
- GLuint orig = inst->I.SrcReg[i].Index;
- inst->I.SrcReg[i].Index = ta[orig].HwTemp;
+ for (i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+ unsigned int orig = inst->U.I.SrcReg[i].Index;
+ inst->U.I.SrcReg[i].Index = ta[orig].HwTemp;
if (ta[orig].Allocated && inst == ta[orig].LastRead)
- hwtemps[ta[orig].HwTemp] = GL_FALSE;
+ hwtemps[ta[orig].HwTemp] = 0;
}
}
- if (numdsts) {
- if (inst->I.DstReg.File == PROGRAM_TEMPORARY) {
- GLuint orig = inst->I.DstReg.Index;
+ if (opcode->HasDstReg) {
+ if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
+ unsigned int orig = inst->U.I.DstReg.Index;
if (!ta[orig].Allocated) {
- for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) {
+ for(j = 0; j < R300_VS_MAX_TEMPS; ++j) {
if (!hwtemps[j])
break;
}
- if (j >= VSF_MAX_FRAGMENT_TEMPS) {
+ if (j >= R300_VS_MAX_TEMPS) {
fprintf(stderr, "Out of hw temporaries\n");
} else {
- ta[orig].Allocated = GL_TRUE;
+ ta[orig].Allocated = 1;
ta[orig].HwTemp = j;
- hwtemps[j] = GL_TRUE;
+ hwtemps[j] = 1;
if (j >= compiler->code->num_temporaries)
compiler->code->num_temporaries = j + 1;
}
}
- inst->I.DstReg.Index = ta[orig].HwTemp;
+ inst->U.I.DstReg.Index = ta[orig].HwTemp;
}
}
}
}
+/**
+ * R3xx-R4xx vertex engine does not support the Absolute source operand modifier
+ * and the Saturate opcode modifier. Only Absolute is currently transformed.
+ */
+static int transform_nonnative_modifiers(
+ struct radeon_compiler *c,
+ struct rc_instruction *inst,
+ void* unused)
+{
+ const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ unsigned i;
+
+ /* Transform ABS(a) to MAX(a, -a). */
+ for (i = 0; i < opcode->NumSrcRegs; i++) {
+ if (inst->U.I.SrcReg[i].Abs) {
+ struct rc_instruction *new_inst;
+ unsigned temp;
+
+ inst->U.I.SrcReg[i].Abs = 0;
+
+ temp = rc_find_free_temporary(c);
+
+ new_inst = rc_insert_new_instruction(c, inst->Prev);
+ new_inst->U.I.Opcode = RC_OPCODE_MAX;
+ new_inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ new_inst->U.I.DstReg.Index = temp;
+ new_inst->U.I.SrcReg[0] = inst->U.I.SrcReg[i];
+ new_inst->U.I.SrcReg[1] = inst->U.I.SrcReg[i];
+ new_inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+
+ memset(&inst->U.I.SrcReg[i], 0, sizeof(inst->U.I.SrcReg[i]));
+ inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[i].Index = temp;
+ inst->U.I.SrcReg[i].Swizzle = RC_SWIZZLE_XYZW;
+ }
+ }
+ return 1;
+}
/**
* Vertex engine cannot read two inputs or two constants at the same time.
* Introduce intermediate MOVs to temporary registers to account for this.
*/
-static GLboolean transform_source_conflicts(
+static int transform_source_conflicts(
struct radeon_compiler *c,
struct rc_instruction* inst,
void* unused)
{
- GLuint num_operands = _mesa_num_inst_src_regs(inst->I.Opcode);
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
- if (num_operands == 3) {
- if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[2])
- || t_src_conflict(inst->I.SrcReg[0], inst->I.SrcReg[2])) {
+ if (opcode->NumSrcRegs == 3) {
+ if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])
+ || t_src_conflict(inst->U.I.SrcReg[0], inst->U.I.SrcReg[2])) {
int tmpreg = rc_find_free_temporary(c);
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
- inst_mov->I.Opcode = OPCODE_MOV;
- inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
- inst_mov->I.DstReg.Index = tmpreg;
- inst_mov->I.SrcReg[0] = inst->I.SrcReg[2];
-
- reset_srcreg(&inst->I.SrcReg[2]);
- inst->I.SrcReg[2].File = PROGRAM_TEMPORARY;
- inst->I.SrcReg[2].Index = tmpreg;
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = tmpreg;
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+
+ reset_srcreg(&inst->U.I.SrcReg[2]);
+ inst->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[2].Index = tmpreg;
}
}
- if (num_operands >= 2) {
- if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[0])) {
+ if (opcode->NumSrcRegs >= 2) {
+ if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[0])) {
int tmpreg = rc_find_free_temporary(c);
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
- inst_mov->I.Opcode = OPCODE_MOV;
- inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
- inst_mov->I.DstReg.Index = tmpreg;
- inst_mov->I.SrcReg[0] = inst->I.SrcReg[1];
-
- reset_srcreg(&inst->I.SrcReg[1]);
- inst->I.SrcReg[1].File = PROGRAM_TEMPORARY;
- inst->I.SrcReg[1].Index = tmpreg;
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = tmpreg;
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
+
+ reset_srcreg(&inst->U.I.SrcReg[1]);
+ inst->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[1].Index = tmpreg;
}
}
- return GL_TRUE;
+ return 1;
}
static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler)
if ((compiler->RequiredOutputs & (1 << i)) &&
!(compiler->Base.Program.OutputsWritten & (1 << i))) {
struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev);
- inst->I.Opcode = OPCODE_MOV;
+ inst->U.I.Opcode = RC_OPCODE_MOV;
- inst->I.DstReg.File = PROGRAM_OUTPUT;
- inst->I.DstReg.Index = i;
- inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
+ inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+ inst->U.I.DstReg.Index = i;
+ inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
- inst->I.SrcReg[0].File = PROGRAM_CONSTANT;
- inst->I.SrcReg[0].Index = 0;
- inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW;
+ inst->U.I.SrcReg[0].File = RC_FILE_CONSTANT;
+ inst->U.I.SrcReg[0].Index = 0;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
compiler->Base.Program.OutputsWritten |= 1 << i;
}
}
}
-static void nqssadceInit(struct nqssadce_state* s)
+static void dataflow_outputs_mark_used(void * userdata, void * data,
+ void (*callback)(void *, unsigned int, unsigned int))
{
- struct r300_vertex_program_compiler * compiler = s->UserData;
+ struct r300_vertex_program_compiler * c = userdata;
int i;
- for(i = 0; i < VERT_RESULT_MAX; ++i) {
- if (compiler->RequiredOutputs & (1 << i))
- s->Outputs[i].Sourced = WRITEMASK_XYZW;
+ for(i = 0; i < 32; ++i) {
+ if (c->RequiredOutputs & (1 << i))
+ callback(data, i, RC_MASK_XYZW);
}
}
-static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg)
+static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
{
(void) opcode;
(void) reg;
- return GL_TRUE;
+ return 1;
}
+static void debug_program_log(struct r300_vertex_program_compiler* c, const char * where)
+{
+ if (c->Base.Debug) {
+ fprintf(stderr, "Vertex Program: %s\n", where);
+ rc_print_program(&c->Base.Program);
+ }
+}
+
+
+static struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
+ .IsNative = &swizzle_is_native,
+ .Split = 0 /* should never be called */
+};
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
{
- rc_mesa_to_rc_program(&compiler->Base, compiler->program);
- compiler->program = 0;
+ struct emulate_loop_state loop_state;
+
+ compiler->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
- rc_move_output(&compiler->Base, VERT_RESULT_PSIZ, VERT_RESULT_PSIZ, WRITEMASK_X);
+ addArtificialOutputs(compiler);
- if (compiler->state.WPosAttr != FRAG_ATTRIB_MAX) {
- rc_copy_output(&compiler->Base,
- VERT_RESULT_HPOS,
- compiler->state.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0);
+ debug_program_log(compiler, "before compilation");
+
+ /* XXX Ideally this should be done only for r3xx, but since
+ * we don't have branching support for r5xx, we use the emulation
+ * on all chipsets. */
+ rc_transform_unroll_loops(&compiler->Base, &loop_state);
+
+ debug_program_log(compiler, "after transform loops");
+
+ if (compiler->Base.is_r500){
+ rc_emulate_loops(&loop_state, R500_VS_MAX_ALU);
+ } else {
+ rc_emulate_loops(&loop_state, R300_VS_MAX_ALU);
}
+ debug_program_log(compiler, "after emulate loops");
- if (compiler->state.FogAttr != FRAG_ATTRIB_MAX) {
- rc_move_output(&compiler->Base,
- VERT_RESULT_FOGC,
- compiler->state.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0, WRITEMASK_X);
- }
+ rc_emulate_branches(&compiler->Base);
- addArtificialOutputs(compiler);
+ debug_program_log(compiler, "after emulate branches");
- {
+ if (compiler->Base.is_r500) {
struct radeon_program_transformation transformations[] = {
{ &r300_transform_vertex_alu, 0 },
+ { &r300_transform_trig_scale_vertex, 0 }
};
- radeonLocalTransform(&compiler->Base, 1, transformations);
- }
+ radeonLocalTransform(&compiler->Base, 2, transformations);
- if (compiler->Base.Debug) {
- fprintf(stderr, "Vertex program after native rewrite:\n");
- rc_print_program(&compiler->Base.Program);
- fflush(stdout);
+ debug_program_log(compiler, "after native rewrite");
+ } else {
+ struct radeon_program_transformation transformations[] = {
+ { &r300_transform_vertex_alu, 0 },
+ { &radeonTransformTrigSimple, 0 }
+ };
+ radeonLocalTransform(&compiler->Base, 2, transformations);
+
+ debug_program_log(compiler, "after native rewrite");
+
+ /* Note: This pass has to be done seperately from ALU rewrite,
+ * because it needs to check every instruction.
+ */
+ struct radeon_program_transformation transformations2[] = {
+ { &transform_nonnative_modifiers, 0 },
+ };
+ radeonLocalTransform(&compiler->Base, 1, transformations2);
+
+ debug_program_log(compiler, "after emulate modifiers");
}
{
radeonLocalTransform(&compiler->Base, 1, transformations);
}
- if (compiler->Base.Debug) {
- fprintf(stderr, "Vertex program after source conflict resolve:\n");
- rc_print_program(&compiler->Base.Program);
- fflush(stdout);
- }
+ debug_program_log(compiler, "after source conflict resolve");
- {
- struct radeon_nqssadce_descr nqssadce = {
- .Init = &nqssadceInit,
- .IsNativeSwizzle = &swizzleIsNative,
- .BuildSwizzle = NULL
- };
- radeonNqssaDce(&compiler->Base, &nqssadce, compiler);
+ rc_dataflow_deadcode(&compiler->Base, &dataflow_outputs_mark_used, compiler);
- /* We need this step for reusing temporary registers */
- allocate_temporary_registers(compiler);
+ debug_program_log(compiler, "after deadcode");
- if (compiler->Base.Debug) {
- fprintf(stderr, "Vertex program after NQSSADCE:\n");
- rc_print_program(&compiler->Base.Program);
- fflush(stdout);
- }
- }
+ rc_dataflow_swizzles(&compiler->Base);
+
+ allocate_temporary_registers(compiler);
+
+ debug_program_log(compiler, "after dataflow");
translate_vertex_program(compiler);
compiler->code->InputsRead = compiler->Base.Program.InputsRead;
compiler->code->OutputsWritten = compiler->Base.Program.OutputsWritten;
+
+ if (compiler->Base.Debug) {
+ fprintf(stderr, "Final vertex program code:\n");
+ r300_vertex_program_dump(compiler->code);
+ }
}