* Aapo Tahkola <aet@rasterburn.org>
* Roland Scheidegger <rscheidegger_lists@hispeed.ch>
*/
-#include "glheader.h"
-#include "macros.h"
-#include "enums.h"
-#include "program.h"
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "shader/program.h"
+#include "shader/prog_instruction.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_statevars.h"
+#include "shader/programopt.h"
+#include "tnl/tnl.h"
#include "r200_context.h"
#include "r200_vertprog.h"
#include "r200_ioctl.h"
#include "r200_tcl.h"
-#include "program_instruction.h"
-#include "programopt.h"
-#include "tnl/tnl.h"
#if SWIZZLE_X != VSF_IN_COMPONENT_X || \
SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
return GL_TRUE;
}
-static __inline unsigned long t_dst_mask(GLuint mask)
+static INLINE unsigned long t_dst_mask(GLuint mask)
{
/* WRITEMASK_* is equivalent to VSF_FLAG_* */
return mask & VSF_FLAG_ALL;
case PROGRAM_LOCAL_PARAM:
case PROGRAM_ENV_PARAM:
case PROGRAM_NAMED_PARAM:
+ case PROGRAM_CONSTANT:
case PROGRAM_STATE_VAR:
return VSF_IN_CLASS_PARAM;
/*
}
}
-static __inline unsigned long t_swizzle(GLubyte swizzle)
+static INLINE unsigned long t_swizzle(GLubyte swizzle)
{
/* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
return swizzle;
int dofogfix = 0;
int fog_temp_i = 0;
int free_inputs;
- int free_inputs_conv;
int array_count = 0;
+ int u_temp_used;
vp->native = GL_FALSE;
vp->translated = GL_TRUE;
/* for fogc, can't change mesa_vp, as it would hose swtnl, and exp with
base e isn't directly available neither. */
- if (mesa_vp->Base.OutputsWritten & VERT_RESULT_FOGC && !vp->fogpidx) {
+ if ((mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_FOGC)) && !vp->fogpidx) {
struct gl_program_parameter_list *paramList;
- GLint tokens[6] = { STATE_FOG_PARAMS, 0, 0, 0, 0, 0 };
+ gl_state_index tokens[STATE_LENGTH] = { STATE_FOG_PARAMS, 0, 0, 0, 0 };
paramList = mesa_vp->Base.Parameters;
vp->fogpidx = _mesa_add_state_reference(paramList, tokens);
}
for(i = 0; i < VERT_ATTRIB_MAX; i++)
vp->inputs[i] = -1;
+ for(i = 0; i < 15; i++)
+ vp->inputmap_rev[i] = 255;
free_inputs = 0x2ffd;
/* fglrx uses fixed inputs as follows for conventional attribs.
/* may look different when using idx buf / input_route instead of se_vtx_fmt? */
if (mesa_vp->Base.InputsRead & VERT_BIT_POS) {
vp->inputs[VERT_ATTRIB_POS] = 0;
+ vp->inputmap_rev[0] = VERT_ATTRIB_POS;
free_inputs &= ~(1 << 0);
array_count++;
}
if (mesa_vp->Base.InputsRead & VERT_BIT_WEIGHT) {
vp->inputs[VERT_ATTRIB_WEIGHT] = 12;
+ vp->inputmap_rev[1] = VERT_ATTRIB_WEIGHT;
array_count++;
}
if (mesa_vp->Base.InputsRead & VERT_BIT_NORMAL) {
vp->inputs[VERT_ATTRIB_NORMAL] = 1;
+ vp->inputmap_rev[2] = VERT_ATTRIB_NORMAL;
array_count++;
}
if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR0) {
vp->inputs[VERT_ATTRIB_COLOR0] = 2;
+ vp->inputmap_rev[4] = VERT_ATTRIB_COLOR0;
free_inputs &= ~(1 << 2);
array_count++;
}
if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR1) {
vp->inputs[VERT_ATTRIB_COLOR1] = 3;
+ vp->inputmap_rev[5] = VERT_ATTRIB_COLOR1;
free_inputs &= ~(1 << 3);
array_count++;
}
if (mesa_vp->Base.InputsRead & VERT_BIT_FOG) {
vp->inputs[VERT_ATTRIB_FOG] = 15; array_count++;
+ vp->inputmap_rev[3] = VERT_ATTRIB_FOG;
+ array_count++;
}
for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX5; i++) {
if (mesa_vp->Base.InputsRead & (1 << i)) {
vp->inputs[i] = i - VERT_ATTRIB_TEX0 + 6;
+ vp->inputmap_rev[8 + i - VERT_ATTRIB_TEX0] = i;
free_inputs &= ~(1 << (i - VERT_ATTRIB_TEX0 + 6));
array_count++;
}
}
- free_inputs_conv = free_inputs;
/* using VERT_ATTRIB_TEX6/7 would be illegal */
/* completely ignore aliasing? */
for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) {
if (free_inputs & (1 << j)) {
free_inputs &= ~(1 << j);
vp->inputs[i] = j;
- vp->rev_inputs[j] = i;
+ if (j == 0) vp->inputmap_rev[j] = i; /* mapped to pos */
+ else if (j < 12) vp->inputmap_rev[j + 2] = i; /* mapped to col/tex */
+ else vp->inputmap_rev[j + 1] = i; /* mapped to pos1 */
break;
}
}
}
}
- vp->gen_inputs_mapped = free_inputs ^ free_inputs_conv;
if (!(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS))) {
if (R200_DEBUG & DEBUG_FALLBACKS) {
goto next;
case OPCODE_MAD:
+ /* only 2 read ports into temp memory thus may need the macro op MAD_2
+ instead (requiring 2 clocks) if all inputs are in temp memory
+ (and, only if they actually reference 3 distinct temps) */
hw_op=(src[0].File == PROGRAM_TEMPORARY &&
src[1].File == PROGRAM_TEMPORARY &&
- src[2].File == PROGRAM_TEMPORARY) ? R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
+ src[2].File == PROGRAM_TEMPORARY &&
+ (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index)) &&
+ (((src[0].RelAddr << 8) | src[0].Index) != ((src[2].RelAddr << 8) | src[2].Index)) &&
+ (((src[1].RelAddr << 8) | src[1].Index) != ((src[2].RelAddr << 8) | src[2].Index))) ?
+ R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
t_dst_mask(dst.WriteMask));
case OPCODE_XPD:
/* mul r0, r1.yzxw, r2.zxyw
mad r0, -r2.yzxw, r1.zxyw, r0
- NOTE: might need MAD_2
*/
+ hw_op=(src[0].File == PROGRAM_TEMPORARY &&
+ src[1].File == PROGRAM_TEMPORARY &&
+ (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index))) ?
+ R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
(u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
o_inst++;
u_temp_i--;
- o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MAD, t_dst(&dst),
+ o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
t_dst_mask(dst.WriteMask));
o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
dofogfix = 0;
}
+ u_temp_used = (R200_VSF_MAX_TEMPS - 1) - u_temp_i;
if (mesa_vp->Base.NumNativeTemporaries <
- (mesa_vp->Base.NumTemporaries + (R200_VSF_MAX_TEMPS - 1 - u_temp_i))) {
+ (mesa_vp->Base.NumTemporaries + u_temp_used)) {
mesa_vp->Base.NumNativeTemporaries =
- mesa_vp->Base.NumTemporaries + (R200_VSF_MAX_TEMPS - 1 - u_temp_i);
+ mesa_vp->Base.NumTemporaries + u_temp_used;
}
- if (u_temp_i < mesa_vp->Base.NumTemporaries) {
+ if ((mesa_vp->Base.NumTemporaries + u_temp_used) > R200_VSF_MAX_TEMPS) {
if (R200_DEBUG & DEBUG_FALLBACKS) {
- fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_i);
+ fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_used);
}
return GL_FALSE;
}
}
/* could optimize setting up vertex progs away for non-tcl hw */
fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp) &&
- rmesa->r200Screen->drmSupportsVertexProgram);
+ rmesa->radeon.radeonScreen->drmSupportsVertexProgram);
TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback);
- if (rmesa->TclFallback) return;
+ if (rmesa->radeon.TclFallback) return;
R200_STATECHANGE( rmesa, vap );
/* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it?