From 84445273ed554ea6fa65c894bbe098eb3f3d1230 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Nicolai=20H=C3=A4hnle?= Date: Thu, 23 Jul 2009 18:40:41 +0200 Subject: [PATCH] r300: Move vertex program compilation to compiler MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This is just the first step of refactoring. The separation is not yet clean enough with this commit. Signed-off-by: Nicolai Hähnle --- src/mesa/drivers/dri/r300/compiler/Makefile | 1 + .../drivers/dri/r300/compiler/r3xx_vertprog.c | 1533 ++++++++++++++++ .../drivers/dri/r300/compiler/radeon_code.h | 25 + .../dri/r300/compiler/radeon_compiler.h | 10 + src/mesa/drivers/dri/r300/r300_context.h | 31 +- src/mesa/drivers/dri/r300/r300_draw.c | 4 +- src/mesa/drivers/dri/r300/r300_ioctl.c | 9 +- src/mesa/drivers/dri/r300/r300_reg.h | 18 + src/mesa/drivers/dri/r300/r300_state.c | 4 +- src/mesa/drivers/dri/r300/r300_vertprog.c | 1549 +---------------- src/mesa/drivers/dri/r300/r300_vertprog.h | 28 - src/mesa/shader/prog_instruction.h | 9 +- 12 files changed, 1625 insertions(+), 1596 deletions(-) create mode 100644 src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile index c0fd85c1810..4e2ff50c69d 100644 --- a/src/mesa/drivers/dri/r300/compiler/Makefile +++ b/src/mesa/drivers/dri/r300/compiler/Makefile @@ -17,6 +17,7 @@ C_SOURCES = \ r300_fragprog_emit.c \ r500_fragprog.c \ r500_fragprog_emit.c \ + r3xx_vertprog.c \ \ memory_pool.c diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c new file mode 100644 index 00000000000..b074c98ee9d --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -0,0 +1,1533 @@ +/* + * Copyright 2009 Nicolai Hähnle + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" + +#include "../r300_reg.h" + +#include "radeon_nqssadce.h" + +#include "shader/prog_optimize.h" +#include "shader/prog_print.h" + + +/* TODO: Get rid of t_src_class call */ +#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \ + ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \ + t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \ + (t_src_class(a.File) == PVS_SRC_REG_INPUT && \ + t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \ + +/* + * Take an already-setup and valid source then swizzle it appropriately to + * obtain a constant ZERO or ONE source. + */ +#define __CONST(x, y) \ + (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_src_class(src[x].File), \ + NEGATE_NONE) | (src[x].RelAddr << 4)) + + + + +static unsigned long t_dst_mask(GLuint mask) +{ + /* WRITEMASK_* is equivalent to VSF_FLAG_* */ + return mask & WRITEMASK_XYZW; +} + +static unsigned long t_dst_class(gl_register_file file) +{ + + switch (file) { + case PROGRAM_TEMPORARY: + return PVS_DST_REG_TEMPORARY; + case PROGRAM_OUTPUT: + return PVS_DST_REG_OUT; + case PROGRAM_ADDRESS: + return PVS_DST_REG_A0; + /* + case PROGRAM_INPUT: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_NAMED_PARAM: + case PROGRAM_STATE_VAR: + case PROGRAM_WRITE_ONLY: + case PROGRAM_ADDRESS: + */ + default: + fprintf(stderr, "problem in %s", __FUNCTION__); + _mesa_exit(-1); + return -1; + } +} + +static unsigned long t_dst_index(struct r300_vertex_program_code *vp, + struct prog_dst_register *dst) +{ + if (dst->File == PROGRAM_OUTPUT) + return vp->outputs[dst->Index]; + + return dst->Index; +} + +static unsigned long t_src_class(gl_register_file file) +{ + switch (file) { + case PROGRAM_TEMPORARY: + return PVS_SRC_REG_TEMPORARY; + case PROGRAM_INPUT: + return PVS_SRC_REG_INPUT; + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + case PROGRAM_STATE_VAR: + return PVS_SRC_REG_CONSTANT; + /* + case PROGRAM_OUTPUT: + case PROGRAM_WRITE_ONLY: + case PROGRAM_ADDRESS: + */ + default: + fprintf(stderr, "problem in %s", __FUNCTION__); + _mesa_exit(-1); + return -1; + } +} + +static INLINE unsigned long t_swizzle(GLubyte swizzle) +{ + /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ + return swizzle; +} + +static unsigned long t_src_index(struct r300_vertex_program_code *vp, + struct prog_src_register *src) +{ + if (src->File == PROGRAM_INPUT) { + assert(vp->inputs[src->Index] != -1); + return vp->inputs[src->Index]; + } else { + if (src->Index < 0) { + fprintf(stderr, + "negative offsets for indirect addressing do not work.\n"); + return 0; + } + return src->Index; + } +} + +/* these two functions should probably be merged... */ + +static unsigned long t_src(struct r300_vertex_program_code *vp, + struct prog_src_register *src) +{ + /* src->Negate uses the NEGATE_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ + return PVS_SRC_OPERAND(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 1)), + t_swizzle(GET_SWZ(src->Swizzle, 2)), + t_swizzle(GET_SWZ(src->Swizzle, 3)), + t_src_class(src->File), + src->Negate) | (src->RelAddr << 4); +} + +static unsigned long t_src_scalar(struct r300_vertex_program_code *vp, + struct prog_src_register *src) +{ + /* src->Negate uses the NEGATE_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ + return PVS_SRC_OPERAND(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_src_class(src->File), + src->Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src->RelAddr << 4); +} + +static GLboolean valid_dst(struct r300_vertex_program_code *vp, + struct prog_dst_register *dst) +{ + if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) { + return GL_FALSE; + } else if (dst->File == PROGRAM_ADDRESS) { + assert(dst->Index == 0); + } + + return GL_TRUE; +} + +static GLuint *r300TranslateOpcodeABS(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W + + inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), + t_src_class(src[0].File), + (!src[0]. + Negate) ? NEGATE_XYZW : NEGATE_NONE) | + (src[0].RelAddr << 4); + inst[3] = 0; + + return inst; +} + +static GLuint *r300TranslateOpcodeADD(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} + + inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + SWIZZLE_ZERO, + t_src_class(src[0].File), + src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src[0].RelAddr << 4); + inst[2] = + PVS_SRC_OPERAND(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO, + t_src_class(src[1].File), + src[1].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src[1].RelAddr << 4); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeDPH(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} + inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + PVS_SRC_SELECT_FORCE_1, + t_src_class(src[0].File), + src[0].Negate ? NEGATE_XYZ : NEGATE_NONE) | + (src[0].RelAddr << 4); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeEX2(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeFLR(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3], + int *u_temp_i) +{ + /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} + ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */ + + inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION, + GL_FALSE, + GL_FALSE, + *u_temp_i, + t_dst_mask(vpi->DstReg.WriteMask), + PVS_DST_REG_TEMPORARY); + inst[1] = t_src(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + inst += 4; + + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = PVS_SRC_OPERAND(*u_temp_i, + PVS_SRC_SELECT_X, + PVS_SRC_SELECT_Y, + PVS_SRC_SELECT_Z, + PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY, + /* Not 100% sure about this */ + (!src[0]. + Negate) ? NEGATE_XYZW : NEGATE_NONE); + inst[3] = __CONST(0, SWIZZLE_ZERO); + (*u_temp_i)--; + + return inst; +} + +static GLuint *r300TranslateOpcodeFRC(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X} + + inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_src_class(src[0].File), + src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src[0].RelAddr << 4); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} + + inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + /* NOTE: Users swizzling might not work. */ + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y + t_src_class(src[0].File), + src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src[0].RelAddr << 4); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X + t_src_class(src[0].File), + src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src[0].RelAddr << 4); + inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W + t_src_class(src[0].File), + src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src[0].RelAddr << 4); + + return inst; +} + +static GLuint *r300TranslateOpcodeLOG(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeMAD(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, + GL_FALSE, + GL_TRUE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = t_src(vp, &src[2]); + + return inst; +} + +static GLuint *r300TranslateOpcodeMAX(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeMIN(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_MINIMUM, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeMOV(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} + + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeMUL(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodePOW(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = t_src_scalar(vp, &src[1]); + + return inst; +} + +static GLuint *r300TranslateOpcodeRCP(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeRSQ(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeSGE(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeSLT(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W + +#if 0 + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), + t_src_class(src[1].File), + (!src[1]. + Negate) ? NEGATE_XYZW : NEGATE_NONE) | + (src[1].RelAddr << 4); + inst[3] = 0; +#else + inst[0] = + PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ONE); + inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), + t_src_class(src[1].File), + (!src[1]. + Negate) ? NEGATE_XYZW : NEGATE_NONE) | + (src[1].RelAddr << 4); +#endif + + return inst; +} + +static GLuint *r300TranslateOpcodeSWZ(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} + + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3], + int *u_temp_i) +{ + /* mul r0, r1.yzxw, r2.zxyw + mad r0, -r2.yzxw, r1.zxyw, r0 + */ + + inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, + GL_FALSE, + GL_FALSE, + *u_temp_i, + t_dst_mask(vpi->DstReg.WriteMask), + PVS_DST_REG_TEMPORARY); + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W + t_src_class(src[0].File), + src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src[0].RelAddr << 4); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W + t_src_class(src[1].File), + src[1].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src[1].RelAddr << 4); + inst[3] = __CONST(1, SWIZZLE_ZERO); + inst += 4; + + inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W + t_src_class(src[1].File), + (!src[1]. + Negate) ? NEGATE_XYZW : NEGATE_NONE) | + (src[1].RelAddr << 4); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W + t_src_class(src[0].File), + src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src[0].RelAddr << 4); + inst[3] = + PVS_SRC_OPERAND(*u_temp_i, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, + PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, + PVS_SRC_REG_TEMPORARY, NEGATE_NONE); + + (*u_temp_i)--; + + return inst; +} + +static void t_inputs_outputs(struct r300_vertex_program_code *vp, struct gl_program * glvp) +{ + int i; + int cur_reg; + GLuint OutputsWritten, InputsRead; + + OutputsWritten = glvp->OutputsWritten; + InputsRead = glvp->InputsRead; + + cur_reg = -1; + for (i = 0; i < VERT_ATTRIB_MAX; i++) { + if (InputsRead & (1 << i)) + vp->inputs[i] = ++cur_reg; + else + vp->inputs[i] = -1; + } + + cur_reg = 0; + for (i = 0; i < VERT_RESULT_MAX; i++) + vp->outputs[i] = -1; + + assert(OutputsWritten & (1 << VERT_RESULT_HPOS)); + + if (OutputsWritten & (1 << VERT_RESULT_HPOS)) { + vp->outputs[VERT_RESULT_HPOS] = cur_reg++; + } + + if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) { + vp->outputs[VERT_RESULT_PSIZ] = cur_reg++; + } + + /* If we're writing back facing colors we need to send + * four colors to make front/back face colors selection work. + * If the vertex program doesn't write all 4 colors, lets + * pretend it does by skipping output index reg so the colors + * get written into appropriate output vectors. + */ + if (OutputsWritten & (1 << VERT_RESULT_COL0)) { + vp->outputs[VERT_RESULT_COL0] = cur_reg++; + } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || + OutputsWritten & (1 << VERT_RESULT_BFC1)) { + cur_reg++; + } + + if (OutputsWritten & (1 << VERT_RESULT_COL1)) { + vp->outputs[VERT_RESULT_COL1] = cur_reg++; + } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || + OutputsWritten & (1 << VERT_RESULT_BFC1)) { + cur_reg++; + } + + if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { + vp->outputs[VERT_RESULT_BFC0] = cur_reg++; + } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { + cur_reg++; + } + + if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { + vp->outputs[VERT_RESULT_BFC1] = cur_reg++; + } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { + cur_reg++; + } + + for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) { + if (OutputsWritten & (1 << i)) { + vp->outputs[i] = cur_reg++; + } + } + + if (OutputsWritten & (1 << VERT_RESULT_FOGC)) { + vp->outputs[VERT_RESULT_FOGC] = cur_reg++; + } +} + +static GLboolean translate_vertex_program(struct r300_vertex_program_compiler * compiler) +{ + struct prog_instruction *vpi = compiler->program->Instructions; + int i; + GLuint *inst; + unsigned long num_operands; + /* Initial value should be last tmp reg that hw supports. + Strangely enough r300 doesnt mind even though these would be out of range. + Smart enough to realize that it doesnt need it? */ + int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1; + struct prog_src_register src[3]; + struct r300_vertex_program_code * vp = compiler->code; + + compiler->code->pos_end = 0; /* Not supported yet */ + compiler->code->length = 0; + + t_inputs_outputs(compiler->code, compiler->program); + + for (inst = compiler->code->body.d; vpi->Opcode != OPCODE_END; + vpi++, inst += 4) { + + { + int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; + if((compiler->code->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { + fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", compiler->code->num_temporaries, u_temp_used); + return GL_FALSE; + } + u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; + } + + if (!valid_dst(compiler->code, &vpi->DstReg)) { + /* redirect result to unused temp */ + vpi->DstReg.File = PROGRAM_TEMPORARY; + vpi->DstReg.Index = u_temp_i; + } + + num_operands = _mesa_num_inst_src_regs(vpi->Opcode); + + /* copy the sources (src) from mesa into a local variable... is this needed? */ + for (i = 0; i < num_operands; i++) { + src[i] = vpi->SrcReg[i]; + } + + if (num_operands == 3) { /* TODO: scalars */ + if (CMP_SRCS(src[1], src[2]) + || CMP_SRCS(src[0], src[2])) { + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + u_temp_i, + WRITEMASK_XYZW, + PVS_DST_REG_TEMPORARY); + inst[1] = + PVS_SRC_OPERAND(t_src_index(compiler->code, &src[2]), + SWIZZLE_X, + SWIZZLE_Y, + SWIZZLE_Z, + SWIZZLE_W, + t_src_class(src[2].File), + NEGATE_NONE) | (src[2]. + RelAddr << + 4); + inst[2] = __CONST(2, SWIZZLE_ZERO); + inst[3] = __CONST(2, SWIZZLE_ZERO); + inst += 4; + + src[2].File = PROGRAM_TEMPORARY; + src[2].Index = u_temp_i; + src[2].RelAddr = 0; + u_temp_i--; + } + } + + if (num_operands >= 2) { + if (CMP_SRCS(src[1], src[0])) { + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + u_temp_i, + WRITEMASK_XYZW, + PVS_DST_REG_TEMPORARY); + inst[1] = + PVS_SRC_OPERAND(t_src_index(compiler->code, &src[0]), + SWIZZLE_X, + SWIZZLE_Y, + SWIZZLE_Z, + SWIZZLE_W, + t_src_class(src[0].File), + NEGATE_NONE) | (src[0]. + RelAddr << + 4); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + inst += 4; + + src[0].File = PROGRAM_TEMPORARY; + src[0].Index = u_temp_i; + src[0].RelAddr = 0; + u_temp_i--; + } + } + + switch (vpi->Opcode) { + case OPCODE_ABS: + inst = r300TranslateOpcodeABS(compiler->code, vpi, inst, src); + break; + case OPCODE_ADD: + inst = r300TranslateOpcodeADD(compiler->code, vpi, inst, src); + break; + case OPCODE_ARL: + inst = r300TranslateOpcodeARL(compiler->code, vpi, inst, src); + break; + case OPCODE_DP3: + inst = r300TranslateOpcodeDP3(compiler->code, vpi, inst, src); + break; + case OPCODE_DP4: + inst = r300TranslateOpcodeDP4(compiler->code, vpi, inst, src); + break; + case OPCODE_DPH: + inst = r300TranslateOpcodeDPH(compiler->code, vpi, inst, src); + break; + case OPCODE_DST: + inst = r300TranslateOpcodeDST(compiler->code, vpi, inst, src); + break; + case OPCODE_EX2: + inst = r300TranslateOpcodeEX2(compiler->code, vpi, inst, src); + break; + case OPCODE_EXP: + inst = r300TranslateOpcodeEXP(compiler->code, vpi, inst, src); + break; + case OPCODE_FLR: + inst = r300TranslateOpcodeFLR(compiler->code, vpi, inst, src, /* FIXME */ + &u_temp_i); + break; + case OPCODE_FRC: + inst = r300TranslateOpcodeFRC(compiler->code, vpi, inst, src); + break; + case OPCODE_LG2: + inst = r300TranslateOpcodeLG2(compiler->code, vpi, inst, src); + break; + case OPCODE_LIT: + inst = r300TranslateOpcodeLIT(compiler->code, vpi, inst, src); + break; + case OPCODE_LOG: + inst = r300TranslateOpcodeLOG(compiler->code, vpi, inst, src); + break; + case OPCODE_MAD: + inst = r300TranslateOpcodeMAD(compiler->code, vpi, inst, src); + break; + case OPCODE_MAX: + inst = r300TranslateOpcodeMAX(compiler->code, vpi, inst, src); + break; + case OPCODE_MIN: + inst = r300TranslateOpcodeMIN(compiler->code, vpi, inst, src); + break; + case OPCODE_MOV: + inst = r300TranslateOpcodeMOV(compiler->code, vpi, inst, src); + break; + case OPCODE_MUL: + inst = r300TranslateOpcodeMUL(compiler->code, vpi, inst, src); + break; + case OPCODE_POW: + inst = r300TranslateOpcodePOW(compiler->code, vpi, inst, src); + break; + case OPCODE_RCP: + inst = r300TranslateOpcodeRCP(compiler->code, vpi, inst, src); + break; + case OPCODE_RSQ: + inst = r300TranslateOpcodeRSQ(compiler->code, vpi, inst, src); + break; + case OPCODE_SGE: + inst = r300TranslateOpcodeSGE(compiler->code, vpi, inst, src); + break; + case OPCODE_SLT: + inst = r300TranslateOpcodeSLT(compiler->code, vpi, inst, src); + break; + case OPCODE_SUB: + inst = r300TranslateOpcodeSUB(compiler->code, vpi, inst, src); + break; + case OPCODE_SWZ: + inst = r300TranslateOpcodeSWZ(compiler->code, vpi, inst, src); + break; + case OPCODE_XPD: + inst = r300TranslateOpcodeXPD(compiler->code, vpi, inst, src, /* FIXME */ + &u_temp_i); + break; + default: + return GL_FALSE; + } + } + + compiler->code->length = (inst - compiler->code->body.d); + if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) { + return GL_FALSE; + } + + return GL_TRUE; +} + +static void insert_wpos(struct gl_program *prog, GLuint temp_index, int tex_id) +{ + struct prog_instruction *vpi; + + _mesa_insert_instructions(prog, prog->NumInstructions - 1, 2); + + vpi = &prog->Instructions[prog->NumInstructions - 3]; + + vpi->Opcode = OPCODE_MOV; + + vpi->DstReg.File = PROGRAM_OUTPUT; + vpi->DstReg.Index = VERT_RESULT_HPOS; + vpi->DstReg.WriteMask = WRITEMASK_XYZW; + vpi->DstReg.CondMask = COND_TR; + + vpi->SrcReg[0].File = PROGRAM_TEMPORARY; + vpi->SrcReg[0].Index = temp_index; + vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW; + + ++vpi; + + vpi->Opcode = OPCODE_MOV; + + vpi->DstReg.File = PROGRAM_OUTPUT; + vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id; + vpi->DstReg.WriteMask = WRITEMASK_XYZW; + vpi->DstReg.CondMask = COND_TR; + + vpi->SrcReg[0].File = PROGRAM_TEMPORARY; + vpi->SrcReg[0].Index = temp_index; + vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW; + + ++vpi; + + vpi->Opcode = OPCODE_END; +} + +static void pos_as_texcoord(struct gl_program *prog, int tex_id) +{ + struct prog_instruction *vpi; + GLuint tempregi = prog->NumTemporaries; + + prog->NumTemporaries++; + + for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) { + if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_HPOS) { + vpi->DstReg.File = PROGRAM_TEMPORARY; + vpi->DstReg.Index = tempregi; + } + } + + insert_wpos(prog, tempregi, tex_id); + + prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id); +} + +/** + * The fogcoord attribute is special in that only the first component + * is relevant, and the remaining components are always fixed (when read + * from by the fragment program) to yield an X001 pattern. + * + * We need to enforce this either in the vertex program or in the fragment + * program, and this code chooses not to enforce it in the vertex program. + * This is slightly cheaper, as long as the fragment program does not use + * weird swizzles. + * + * And it seems that usually, weird swizzles are not used, so... + * + * See also the counterpart rewriting for fragment programs. + */ +static void fog_as_texcoord(struct gl_program *prog, int tex_id) +{ + struct prog_instruction *vpi; + + vpi = prog->Instructions; + while (vpi->Opcode != OPCODE_END) { + if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_FOGC) { + vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id; + vpi->DstReg.WriteMask = WRITEMASK_X; + } + + ++vpi; + } + + prog->OutputsWritten &= ~(1 << VERT_RESULT_FOGC); + prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id); +} + +static int translateABS(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; + + inst = &prog->Instructions[pos]; + + inst->Opcode = OPCODE_MAX; + inst->SrcReg[1] = inst->SrcReg[0]; + inst->SrcReg[1].Negate ^= NEGATE_XYZW; + + return 0; +} + +static int translateDP3(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; + + inst = &prog->Instructions[pos]; + + inst->Opcode = OPCODE_DP4; + inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + return 0; +} + +static int translateDPH(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; + + inst = &prog->Instructions[pos]; + + inst->Opcode = OPCODE_DP4; + inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); + + return 0; +} + +static int translateFLR(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; + struct prog_dst_register dst; + int tmp_idx; + + tmp_idx = prog->NumTemporaries++; + + _mesa_insert_instructions(prog, pos + 1, 1); + + inst = &prog->Instructions[pos]; + dst = inst->DstReg; + + inst->Opcode = OPCODE_FRC; + inst->DstReg.File = PROGRAM_TEMPORARY; + inst->DstReg.Index = tmp_idx; + ++inst; + + inst->Opcode = OPCODE_ADD; + inst->DstReg = dst; + inst->SrcReg[0] = (inst-1)->SrcReg[0]; + inst->SrcReg[1].File = PROGRAM_TEMPORARY; + inst->SrcReg[1].Index = tmp_idx; + inst->SrcReg[1].Negate = NEGATE_XYZW; + + return 1; +} + +static int translateSUB(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; + + inst = &prog->Instructions[pos]; + + inst->Opcode = OPCODE_ADD; + inst->SrcReg[1].Negate ^= NEGATE_XYZW; + + return 0; +} + +static int translateSWZ(struct gl_program *prog, int pos) +{ + prog->Instructions[pos].Opcode = OPCODE_MOV; + + return 0; +} + +static int translateXPD(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; + int tmp_idx; + + tmp_idx = prog->NumTemporaries++; + + _mesa_insert_instructions(prog, pos + 1, 1); + + inst = &prog->Instructions[pos]; + + *(inst+1) = *inst; + + inst->Opcode = OPCODE_MUL; + inst->DstReg.File = PROGRAM_TEMPORARY; + inst->DstReg.Index = tmp_idx; + inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W); + inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W); + ++inst; + + inst->Opcode = OPCODE_MAD; + inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W); + inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W); + inst->SrcReg[1].Negate ^= NEGATE_XYZW; + inst->SrcReg[2].File = PROGRAM_TEMPORARY; + inst->SrcReg[2].Index = tmp_idx; + + return 1; +} + +static void translateInsts(struct gl_program *prog) +{ + struct prog_instruction *inst; + int i; + + for (i = 0; i < prog->NumInstructions; ++i) { + inst = &prog->Instructions[i]; + + switch (inst->Opcode) { + case OPCODE_ABS: + i += translateABS(prog, i); + break; + case OPCODE_DP3: + i += translateDP3(prog, i); + break; + case OPCODE_DPH: + i += translateDPH(prog, i); + break; + case OPCODE_FLR: + i += translateFLR(prog, i); + break; + case OPCODE_SUB: + i += translateSUB(prog, i); + break; + case OPCODE_SWZ: + i += translateSWZ(prog, i); + break; + case OPCODE_XPD: + i += translateXPD(prog, i); + break; + default: + break; + } + } +} + +#define ADD_OUTPUT(fp_attr, vp_result) \ + do { \ + if ((FpReads & (1 << (fp_attr))) && !(compiler->program->OutputsWritten & (1 << (vp_result)))) { \ + OutputsAdded |= 1 << (vp_result); \ + count++; \ + } \ + } while (0) + +static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler) +{ + GLuint OutputsAdded, FpReads; + int i, count; + + OutputsAdded = 0; + count = 0; + FpReads = compiler->state.FpReads; + + ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0); + ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1); + + for (i = 0; i < 7; ++i) { + ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i); + } + + /* Some outputs may be artificially added, to match the inputs of the fragment program. + * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by + * vertex program are undefined, so just use MOV [vertex_result], CONST[0] + */ + if (count > 0) { + struct prog_instruction *inst; + + _mesa_insert_instructions(compiler->program, compiler->program->NumInstructions - 1, count); + inst = &compiler->program->Instructions[compiler->program->NumInstructions - 1 - count]; + + for (i = 0; i < VERT_RESULT_MAX; ++i) { + if (OutputsAdded & (1 << i)) { + inst->Opcode = OPCODE_MOV; + + inst->DstReg.File = PROGRAM_OUTPUT; + inst->DstReg.Index = i; + inst->DstReg.WriteMask = WRITEMASK_XYZW; + inst->DstReg.CondMask = COND_TR; + + inst->SrcReg[0].File = PROGRAM_CONSTANT; + inst->SrcReg[0].Index = 0; + inst->SrcReg[0].Swizzle = SWIZZLE_XYZW; + + ++inst; + } + } + + compiler->program->OutputsWritten |= OutputsAdded; + } +} + +#undef ADD_OUTPUT + +static void nqssadceInit(struct nqssadce_state* s) +{ + struct r300_vertex_program_compiler * compiler = s->UserData; + GLuint fp_reads; + + fp_reads = compiler->state.FpReads; + { + if (fp_reads & FRAG_BIT_COL0) { + s->Outputs[VERT_RESULT_COL0].Sourced = WRITEMASK_XYZW; + s->Outputs[VERT_RESULT_BFC0].Sourced = WRITEMASK_XYZW; + } + + if (fp_reads & FRAG_BIT_COL1) { + s->Outputs[VERT_RESULT_COL1].Sourced = WRITEMASK_XYZW; + s->Outputs[VERT_RESULT_BFC1].Sourced = WRITEMASK_XYZW; + } + } + + { + int i; + for (i = 0; i < 8; ++i) { + if (fp_reads & FRAG_BIT_TEX(i)) { + s->Outputs[VERT_RESULT_TEX0 + i].Sourced = WRITEMASK_XYZW; + } + } + } + + s->Outputs[VERT_RESULT_HPOS].Sourced = WRITEMASK_XYZW; + if (s->Program->OutputsWritten & (1 << VERT_RESULT_PSIZ)) + s->Outputs[VERT_RESULT_PSIZ].Sourced = WRITEMASK_X; +} + +static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg) +{ + (void) opcode; + (void) reg; + + return GL_TRUE; +} + + + +GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler, GLcontext * ctx) +{ + GLboolean success; + + if (compiler->state.WPosAttr != FRAG_ATTRIB_MAX) { + pos_as_texcoord(compiler->program, compiler->state.WPosAttr - FRAG_ATTRIB_TEX0); + } + + if (compiler->state.FogAttr != FRAG_ATTRIB_MAX) { + fog_as_texcoord(compiler->program, compiler->state.FogAttr - FRAG_ATTRIB_TEX0); + } + + addArtificialOutputs(compiler); + + translateInsts(compiler->program); + + if (compiler->Base.Debug) { + fprintf(stderr, "Vertex program after native rewrite:\n"); + _mesa_print_program(compiler->program); + fflush(stdout); + } + + { + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadceInit, + .IsNativeSwizzle = &swizzleIsNative, + .BuildSwizzle = NULL + }; + radeonNqssaDce(compiler->program, &nqssadce, compiler); + + /* We need this step for reusing temporary registers */ + _mesa_optimize_program(ctx, compiler->program); + + if (compiler->Base.Debug) { + fprintf(stderr, "Vertex program after NQSSADCE:\n"); + _mesa_print_program(compiler->program); + fflush(stdout); + } + } + + assert(compiler->program->NumInstructions); + { + struct prog_instruction *inst; + int max, i, tmp; + + inst = compiler->program->Instructions; + max = -1; + while (inst->Opcode != OPCODE_END) { + tmp = _mesa_num_inst_src_regs(inst->Opcode); + for (i = 0; i < tmp; ++i) { + if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) { + if ((int) inst->SrcReg[i].Index > max) { + max = inst->SrcReg[i].Index; + } + } + } + + if (_mesa_num_inst_dst_regs(inst->Opcode)) { + if (inst->DstReg.File == PROGRAM_TEMPORARY) { + if ((int) inst->DstReg.Index > max) { + max = inst->DstReg.Index; + } + } + } + ++inst; + } + + /* We actually want highest index of used temporary register, + * not the number of temporaries used. + * These values aren't always the same. + */ + compiler->code->num_temporaries = max + 1; + } + + success = translate_vertex_program(compiler); + + compiler->code->InputsRead = compiler->program->InputsRead; + compiler->code->OutputsWritten = compiler->program->OutputsWritten; + + return success; +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h index 7d8bf483e79..e89e7bc17b2 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -147,4 +147,29 @@ struct rX00_fragment_program_code { }; +#define VSF_MAX_FRAGMENT_LENGTH (255*4) +#define VSF_MAX_FRAGMENT_TEMPS (14) + +struct r300_vertex_program_external_state { + GLuint FpReads; + GLuint FogAttr; + GLuint WPosAttr; +}; + +struct r300_vertex_program_code { + int length; + union { + GLuint d[VSF_MAX_FRAGMENT_LENGTH]; + float f[VSF_MAX_FRAGMENT_LENGTH]; + } body; + + int pos_end; + int num_temporaries; /* Number of temp vars used by program */ + int inputs[VERT_ATTRIB_MAX]; + int outputs[VERT_RESULT_MAX]; + + GLbitfield InputsRead; + GLbitfield OutputsWritten; +}; + #endif /* RADEON_CODE_H */ \ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index a5f70173b7d..f8e4b3c681a 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -66,4 +66,14 @@ struct r300_fragment_program_compiler { GLboolean r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c); + +struct r300_vertex_program_compiler { + struct radeon_compiler Base; + struct r300_vertex_program_code *code; + struct r300_vertex_program_external_state state; + struct gl_program *program; +}; + +GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c, GLcontext * ctx); + #endif /* RADEON_COMPILER_H */ diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index d14d992366b..5c575441d7b 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -390,46 +390,19 @@ struct r300_hw_state { /* Vertex shader state */ -/* Perhaps more if we store programs in vmem? */ -/* drm_r300_cmd_header_t->vpu->count is unsigned char */ -#define VSF_MAX_FRAGMENT_LENGTH (255*4) - -/* Can be tested with colormat currently. */ -#define VSF_MAX_FRAGMENT_TEMPS (14) - #define COLOR_IS_RGBA #define TAG(x) r300##x #include "tnl_dd/t_dd_vertex.h" #undef TAG -struct r300_vertex_program_key { - GLuint FpReads; - GLuint FogAttr; - GLuint WPosAttr; -}; - struct r300_vertex_program { struct gl_vertex_program *Base; struct r300_vertex_program *next; - struct r300_vertex_program_key key; - GLbitfield InputsRead; - GLbitfield OutputsWritten; - - struct r300_vertex_shader_hw_code { - int length; - union { - GLuint d[VSF_MAX_FRAGMENT_LENGTH]; - float f[VSF_MAX_FRAGMENT_LENGTH]; - } body; - } hw_code; + struct r300_vertex_program_external_state key; + struct r300_vertex_program_code code; GLboolean error; - - int pos_end; - int num_temporaries; /* Number of temp vars used by program */ - int inputs[VERT_ATTRIB_MAX]; - int outputs[VERT_RESULT_MAX]; }; struct r300_vertex_program_cont { diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c index e2e92fde482..fcfd3099332 100644 --- a/src/mesa/drivers/dri/r300/r300_draw.c +++ b/src/mesa/drivers/dri/r300/r300_draw.c @@ -341,7 +341,7 @@ static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *ar { int i, tmp; - tmp = r300->selected_vp->InputsRead; + tmp = r300->selected_vp->code.InputsRead; i = 0; vbuf->num_attribs = 0; while (tmp) { @@ -437,7 +437,7 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx, if (r300->fallback) return GL_FALSE; - r300SetupVAP(ctx, r300->selected_vp->InputsRead, r300->selected_vp->OutputsWritten); + r300SetupVAP(ctx, r300->selected_vp->code.InputsRead, r300->selected_vp->code.OutputsWritten); r300UpdateShaderStates(r300); diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index 2fa626bab24..5bded642ef8 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -567,12 +567,12 @@ static void r300EmitClearState(GLcontext * ctx) 0, 0xf, PVS_DST_REG_OUT); vpu.cmd[2] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, - PVS_SRC_REG_INPUT, VSF_FLAG_NONE); + PVS_SRC_REG_INPUT, NEGATE_NONE); vpu.cmd[3] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, - PVS_SRC_REG_INPUT, VSF_FLAG_NONE); + PVS_SRC_REG_INPUT, NEGATE_NONE); vpu.cmd[4] = 0x0; vpu.cmd[5] = PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf, @@ -580,13 +580,12 @@ static void r300EmitClearState(GLcontext * ctx) vpu.cmd[6] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, - - VSF_FLAG_NONE); + NEGATE_NONE); vpu.cmd[7] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, - PVS_SRC_REG_INPUT, VSF_FLAG_NONE); + PVS_SRC_REG_INPUT, NEGATE_NONE); vpu.cmd[8] = 0x0; r300->vap_flush_needed = GL_TRUE; diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 357c600af97..dd32e6c730a 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -2667,6 +2667,24 @@ enum { PVS_SRC_ADDR_MODE_1_SHIFT = 32, }; + +#define PVS_OP_DST_OPERAND(opcode, math_inst, macro_inst, reg_index, reg_writemask, reg_class) \ + (((opcode & PVS_DST_OPCODE_MASK) << PVS_DST_OPCODE_SHIFT) \ + | ((math_inst & PVS_DST_MATH_INST_MASK) << PVS_DST_MATH_INST_SHIFT) \ + | ((macro_inst & PVS_DST_MACRO_INST_MASK) << PVS_DST_MACRO_INST_SHIFT) \ + | ((reg_index & PVS_DST_OFFSET_MASK) << PVS_DST_OFFSET_SHIFT) \ + | ((reg_writemask & 0xf) << PVS_DST_WE_X_SHIFT) /* X Y Z W */ \ + | ((reg_class & PVS_DST_REG_TYPE_MASK) << PVS_DST_REG_TYPE_SHIFT)) + +#define PVS_SRC_OPERAND(in_reg_index, comp_x, comp_y, comp_z, comp_w, reg_class, negate) \ + (((in_reg_index & PVS_SRC_OFFSET_MASK) << PVS_SRC_OFFSET_SHIFT) \ + | ((comp_x & PVS_SRC_SWIZZLE_X_MASK) << PVS_SRC_SWIZZLE_X_SHIFT) \ + | ((comp_y & PVS_SRC_SWIZZLE_Y_MASK) << PVS_SRC_SWIZZLE_Y_SHIFT) \ + | ((comp_z & PVS_SRC_SWIZZLE_Z_MASK) << PVS_SRC_SWIZZLE_Z_SHIFT) \ + | ((comp_w & PVS_SRC_SWIZZLE_W_MASK) << PVS_SRC_SWIZZLE_W_SHIFT) \ + | ((negate & 0xf) << PVS_SRC_MODIFIER_X_SHIFT) /* X Y Z W */ \ + | ((reg_class & PVS_SRC_REG_TYPE_MASK) << PVS_SRC_REG_TYPE_SHIFT)) + /*\}*/ /* BEGIN: Packet 3 commands */ diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index ad57b7e2f11..e3e8a6fb3df 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1458,7 +1458,7 @@ static void r300SetupRSUnit(GLcontext * ctx) hw_tcl_on = r300->options.hw_tcl_enabled; if (hw_tcl_on) - OutputsWritten.vp_outputs = r300->selected_vp->OutputsWritten; + OutputsWritten.vp_outputs = r300->selected_vp->code.OutputsWritten; else RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset); @@ -1552,7 +1552,7 @@ static void r500SetupRSUnit(GLcontext * ctx) hw_tcl_on = r300->options.hw_tcl_enabled; if (hw_tcl_on) - OutputsWritten.vp_outputs = r300->selected_vp->OutputsWritten; + OutputsWritten.vp_outputs = r300->selected_vp->code.OutputsWritten; else RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset); diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index 95cedd9d919..ec4ba9ca7da 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -40,39 +40,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "shader/prog_statevars.h" #include "tnl/tnl.h" +#include "compiler/radeon_compiler.h" #include "compiler/radeon_nqssadce.h" #include "r300_context.h" #include "r300_state.h" -/* TODO: Get rid of t_src_class call */ -#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \ - ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \ - t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \ - (t_src_class(a.File) == PVS_SRC_REG_INPUT && \ - t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \ - -/* - * Take an already-setup and valid source then swizzle it appropriately to - * obtain a constant ZERO or ONE source. - */ -#define __CONST(x, y) \ - (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \ - t_swizzle(y), \ - t_swizzle(y), \ - t_swizzle(y), \ - t_swizzle(y), \ - t_src_class(src[x].File), \ - VSF_FLAG_NONE) | (src[x].RelAddr << 4)) - -#define FREE_TEMPS() \ - do { \ - int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; \ - if((vp->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { \ - WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_used); \ - vp->error = GL_TRUE; \ - } \ - u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \ - } while (0) static int r300VertexProgUpdateParams(GLcontext * ctx, struct gl_vertex_program *vp, float *dst) { @@ -125,1513 +97,38 @@ static int r300VertexProgUpdateParams(GLcontext * ctx, struct gl_vertex_program return dst - dst_o; } -static unsigned long t_dst_mask(GLuint mask) -{ - /* WRITEMASK_* is equivalent to VSF_FLAG_* */ - return mask & VSF_FLAG_ALL; -} - -static unsigned long t_dst_class(gl_register_file file) -{ - - switch (file) { - case PROGRAM_TEMPORARY: - return PVS_DST_REG_TEMPORARY; - case PROGRAM_OUTPUT: - return PVS_DST_REG_OUT; - case PROGRAM_ADDRESS: - return PVS_DST_REG_A0; - /* - case PROGRAM_INPUT: - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_NAMED_PARAM: - case PROGRAM_STATE_VAR: - case PROGRAM_WRITE_ONLY: - case PROGRAM_ADDRESS: - */ - default: - fprintf(stderr, "problem in %s", __FUNCTION__); - _mesa_exit(-1); - return -1; - } -} - -static unsigned long t_dst_index(struct r300_vertex_program *vp, - struct prog_dst_register *dst) -{ - if (dst->File == PROGRAM_OUTPUT) - return vp->outputs[dst->Index]; - - return dst->Index; -} - -static unsigned long t_src_class(gl_register_file file) -{ - switch (file) { - case PROGRAM_TEMPORARY: - return PVS_SRC_REG_TEMPORARY; - case PROGRAM_INPUT: - return PVS_SRC_REG_INPUT; - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_NAMED_PARAM: - case PROGRAM_CONSTANT: - case PROGRAM_STATE_VAR: - return PVS_SRC_REG_CONSTANT; - /* - case PROGRAM_OUTPUT: - case PROGRAM_WRITE_ONLY: - case PROGRAM_ADDRESS: - */ - default: - fprintf(stderr, "problem in %s", __FUNCTION__); - _mesa_exit(-1); - return -1; - } -} - -static INLINE unsigned long t_swizzle(GLubyte swizzle) -{ -/* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ - return swizzle; -} - -#if 0 -static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller) -{ - int i; - - if (vp == NULL) { - fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, - caller); - return; - } - - fprintf(stderr, "%s:<", caller); - for (i = 0; i < VERT_ATTRIB_MAX; i++) - fprintf(stderr, "%d ", vp->inputs[i]); - fprintf(stderr, ">\n"); - -} -#endif - -static unsigned long t_src_index(struct r300_vertex_program *vp, - struct prog_src_register *src) -{ - if (src->File == PROGRAM_INPUT) { - assert(vp->inputs[src->Index] != -1); - return vp->inputs[src->Index]; - } else { - if (src->Index < 0) { - fprintf(stderr, - "negative offsets for indirect addressing do not work.\n"); - return 0; - } - return src->Index; - } -} - -/* these two functions should probably be merged... */ - -static unsigned long t_src(struct r300_vertex_program *vp, - struct prog_src_register *src) -{ - /* src->Negate uses the NEGATE_ flags from program_instruction.h, - * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. - */ - return PVS_SRC_OPERAND(t_src_index(vp, src), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 1)), - t_swizzle(GET_SWZ(src->Swizzle, 2)), - t_swizzle(GET_SWZ(src->Swizzle, 3)), - t_src_class(src->File), - src->Negate) | (src->RelAddr << 4); -} - -static unsigned long t_src_scalar(struct r300_vertex_program *vp, - struct prog_src_register *src) -{ - /* src->Negate uses the NEGATE_ flags from program_instruction.h, - * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. - */ - return PVS_SRC_OPERAND(t_src_index(vp, src), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_src_class(src->File), - src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src->RelAddr << 4); -} - -static GLboolean valid_dst(struct r300_vertex_program *vp, - struct prog_dst_register *dst) -{ - if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) { - return GL_FALSE; - } else if (dst->File == PROGRAM_ADDRESS) { - assert(dst->Index == 0); - } - - return GL_TRUE; -} - -static GLuint *r300TranslateOpcodeABS(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W - - inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), - t_src_class(src[0].File), - (!src[0]. - Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[3] = 0; - - return inst; -} - -static GLuint *r300TranslateOpcodeADD(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} - - inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), - SWIZZLE_ZERO, - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = - PVS_SRC_OPERAND(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO, - t_src_class(src[1].File), - src[1].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeDPH(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} - inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), - PVS_SRC_SELECT_FORCE_1, - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeEX2(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeFLR(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3], - int *u_temp_i) -{ - /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} - ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */ - - inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION, - GL_FALSE, - GL_FALSE, - *u_temp_i, - t_dst_mask(vpi->DstReg.WriteMask), - PVS_DST_REG_TEMPORARY); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - inst += 4; - - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = PVS_SRC_OPERAND(*u_temp_i, - PVS_SRC_SELECT_X, - PVS_SRC_SELECT_Y, - PVS_SRC_SELECT_Z, - PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY, - /* Not 100% sure about this */ - (!src[0]. - Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE - /*VSF_FLAG_ALL */ ); - inst[3] = __CONST(0, SWIZZLE_ZERO); - (*u_temp_i)--; - - return inst; -} - -static GLuint *r300TranslateOpcodeFRC(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X} - - inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} - - inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - /* NOTE: Users swizzling might not work. */ - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - PVS_SRC_SELECT_FORCE_0, // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - PVS_SRC_SELECT_FORCE_0, // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - PVS_SRC_SELECT_FORCE_0, // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - - return inst; -} - -static GLuint *r300TranslateOpcodeLOG(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeMAD(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, - GL_FALSE, - GL_TRUE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = t_src(vp, &src[2]); - - return inst; -} - -static GLuint *r300TranslateOpcodeMAX(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeMIN(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_MINIMUM, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeMOV(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} - - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeMUL(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodePOW(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = t_src_scalar(vp, &src[1]); - - return inst; -} - -static GLuint *r300TranslateOpcodeRCP(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeRSQ(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeSGE(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeSLT(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W - -#if 0 - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), - t_src_class(src[1].File), - (!src[1]. - Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - inst[3] = 0; -#else - inst[0] = - PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ONE); - inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), - t_src_class(src[1].File), - (!src[1]. - Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[1].RelAddr << 4); -#endif - - return inst; -} - -static GLuint *r300TranslateOpcodeSWZ(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} - - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3], - int *u_temp_i) -{ - /* mul r0, r1.yzxw, r2.zxyw - mad r0, -r2.yzxw, r1.zxyw, r0 - */ - - inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, - GL_FALSE, - GL_FALSE, - *u_temp_i, - t_dst_mask(vpi->DstReg.WriteMask), - PVS_DST_REG_TEMPORARY); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W - t_src_class(src[1].File), - src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - inst[3] = __CONST(1, SWIZZLE_ZERO); - inst += 4; - - inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W - t_src_class(src[1].File), - (!src[1]. - Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[3] = - PVS_SRC_OPERAND(*u_temp_i, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, - PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, - PVS_SRC_REG_TEMPORARY, VSF_FLAG_NONE); - - (*u_temp_i)--; - - return inst; -} - -static void t_inputs_outputs(struct r300_vertex_program *vp, struct gl_program * glvp) -{ - int i; - int cur_reg; - GLuint OutputsWritten, InputsRead; - - OutputsWritten = glvp->OutputsWritten; - InputsRead = glvp->InputsRead; - - cur_reg = -1; - for (i = 0; i < VERT_ATTRIB_MAX; i++) { - if (InputsRead & (1 << i)) - vp->inputs[i] = ++cur_reg; - else - vp->inputs[i] = -1; - } - - cur_reg = 0; - for (i = 0; i < VERT_RESULT_MAX; i++) - vp->outputs[i] = -1; - - assert(OutputsWritten & (1 << VERT_RESULT_HPOS)); - - if (OutputsWritten & (1 << VERT_RESULT_HPOS)) { - vp->outputs[VERT_RESULT_HPOS] = cur_reg++; - } - - if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) { - vp->outputs[VERT_RESULT_PSIZ] = cur_reg++; - } - - /* If we're writing back facing colors we need to send - * four colors to make front/back face colors selection work. - * If the vertex program doesn't write all 4 colors, lets - * pretend it does by skipping output index reg so the colors - * get written into appropriate output vectors. - */ - if (OutputsWritten & (1 << VERT_RESULT_COL0)) { - vp->outputs[VERT_RESULT_COL0] = cur_reg++; - } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || - OutputsWritten & (1 << VERT_RESULT_BFC1)) { - cur_reg++; - } - - if (OutputsWritten & (1 << VERT_RESULT_COL1)) { - vp->outputs[VERT_RESULT_COL1] = cur_reg++; - } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || - OutputsWritten & (1 << VERT_RESULT_BFC1)) { - cur_reg++; - } - - if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { - vp->outputs[VERT_RESULT_BFC0] = cur_reg++; - } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { - cur_reg++; - } - - if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { - vp->outputs[VERT_RESULT_BFC1] = cur_reg++; - } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { - cur_reg++; - } - - for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) { - if (OutputsWritten & (1 << i)) { - vp->outputs[i] = cur_reg++; - } - } - - if (OutputsWritten & (1 << VERT_RESULT_FOGC)) { - vp->outputs[VERT_RESULT_FOGC] = cur_reg++; - } -} - -static void translate_vertex_program(struct r300_vertex_program *vp, struct gl_program * glvp) -{ - struct prog_instruction *vpi = glvp->Instructions; - int i; - GLuint *inst; - unsigned long num_operands; - /* Initial value should be last tmp reg that hw supports. - Strangely enough r300 doesnt mind even though these would be out of range. - Smart enough to realize that it doesnt need it? */ - int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1; - struct prog_src_register src[3]; - - vp->pos_end = 0; /* Not supported yet */ - vp->hw_code.length = 0; - vp->error = GL_FALSE; - - t_inputs_outputs(vp, glvp); - - for (inst = vp->hw_code.body.d; vpi->Opcode != OPCODE_END; - vpi++, inst += 4) { - - FREE_TEMPS(); - - if (!valid_dst(vp, &vpi->DstReg)) { - /* redirect result to unused temp */ - vpi->DstReg.File = PROGRAM_TEMPORARY; - vpi->DstReg.Index = u_temp_i; - } - - num_operands = _mesa_num_inst_src_regs(vpi->Opcode); - - /* copy the sources (src) from mesa into a local variable... is this needed? */ - for (i = 0; i < num_operands; i++) { - src[i] = vpi->SrcReg[i]; - } - - if (num_operands == 3) { /* TODO: scalars */ - if (CMP_SRCS(src[1], src[2]) - || CMP_SRCS(src[0], src[2])) { - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - u_temp_i, - VSF_FLAG_ALL, - PVS_DST_REG_TEMPORARY); - inst[1] = - PVS_SRC_OPERAND(t_src_index(vp, &src[2]), - SWIZZLE_X, - SWIZZLE_Y, - SWIZZLE_Z, - SWIZZLE_W, - t_src_class(src[2].File), - VSF_FLAG_NONE) | (src[2]. - RelAddr << - 4); - inst[2] = __CONST(2, SWIZZLE_ZERO); - inst[3] = __CONST(2, SWIZZLE_ZERO); - inst += 4; - - src[2].File = PROGRAM_TEMPORARY; - src[2].Index = u_temp_i; - src[2].RelAddr = 0; - u_temp_i--; - } - } - - if (num_operands >= 2) { - if (CMP_SRCS(src[1], src[0])) { - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - u_temp_i, - VSF_FLAG_ALL, - PVS_DST_REG_TEMPORARY); - inst[1] = - PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - SWIZZLE_X, - SWIZZLE_Y, - SWIZZLE_Z, - SWIZZLE_W, - t_src_class(src[0].File), - VSF_FLAG_NONE) | (src[0]. - RelAddr << - 4); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - inst += 4; - - src[0].File = PROGRAM_TEMPORARY; - src[0].Index = u_temp_i; - src[0].RelAddr = 0; - u_temp_i--; - } - } - - switch (vpi->Opcode) { - case OPCODE_ABS: - inst = r300TranslateOpcodeABS(vp, vpi, inst, src); - break; - case OPCODE_ADD: - inst = r300TranslateOpcodeADD(vp, vpi, inst, src); - break; - case OPCODE_ARL: - inst = r300TranslateOpcodeARL(vp, vpi, inst, src); - break; - case OPCODE_DP3: - inst = r300TranslateOpcodeDP3(vp, vpi, inst, src); - break; - case OPCODE_DP4: - inst = r300TranslateOpcodeDP4(vp, vpi, inst, src); - break; - case OPCODE_DPH: - inst = r300TranslateOpcodeDPH(vp, vpi, inst, src); - break; - case OPCODE_DST: - inst = r300TranslateOpcodeDST(vp, vpi, inst, src); - break; - case OPCODE_EX2: - inst = r300TranslateOpcodeEX2(vp, vpi, inst, src); - break; - case OPCODE_EXP: - inst = r300TranslateOpcodeEXP(vp, vpi, inst, src); - break; - case OPCODE_FLR: - inst = r300TranslateOpcodeFLR(vp, vpi, inst, src, /* FIXME */ - &u_temp_i); - break; - case OPCODE_FRC: - inst = r300TranslateOpcodeFRC(vp, vpi, inst, src); - break; - case OPCODE_LG2: - inst = r300TranslateOpcodeLG2(vp, vpi, inst, src); - break; - case OPCODE_LIT: - inst = r300TranslateOpcodeLIT(vp, vpi, inst, src); - break; - case OPCODE_LOG: - inst = r300TranslateOpcodeLOG(vp, vpi, inst, src); - break; - case OPCODE_MAD: - inst = r300TranslateOpcodeMAD(vp, vpi, inst, src); - break; - case OPCODE_MAX: - inst = r300TranslateOpcodeMAX(vp, vpi, inst, src); - break; - case OPCODE_MIN: - inst = r300TranslateOpcodeMIN(vp, vpi, inst, src); - break; - case OPCODE_MOV: - inst = r300TranslateOpcodeMOV(vp, vpi, inst, src); - break; - case OPCODE_MUL: - inst = r300TranslateOpcodeMUL(vp, vpi, inst, src); - break; - case OPCODE_POW: - inst = r300TranslateOpcodePOW(vp, vpi, inst, src); - break; - case OPCODE_RCP: - inst = r300TranslateOpcodeRCP(vp, vpi, inst, src); - break; - case OPCODE_RSQ: - inst = r300TranslateOpcodeRSQ(vp, vpi, inst, src); - break; - case OPCODE_SGE: - inst = r300TranslateOpcodeSGE(vp, vpi, inst, src); - break; - case OPCODE_SLT: - inst = r300TranslateOpcodeSLT(vp, vpi, inst, src); - break; - case OPCODE_SUB: - inst = r300TranslateOpcodeSUB(vp, vpi, inst, src); - break; - case OPCODE_SWZ: - inst = r300TranslateOpcodeSWZ(vp, vpi, inst, src); - break; - case OPCODE_XPD: - inst = r300TranslateOpcodeXPD(vp, vpi, inst, src, /* FIXME */ - &u_temp_i); - break; - default: - vp->error = GL_TRUE; - break; - } - } - - vp->hw_code.length = (inst - vp->hw_code.body.d); - if (vp->hw_code.length >= VSF_MAX_FRAGMENT_LENGTH) { - vp->error = GL_TRUE; - } -} - -static void insert_wpos(struct gl_program *prog, GLuint temp_index, int tex_id) -{ - struct prog_instruction *vpi; - - _mesa_insert_instructions(prog, prog->NumInstructions - 1, 2); - - vpi = &prog->Instructions[prog->NumInstructions - 3]; - - vpi->Opcode = OPCODE_MOV; - - vpi->DstReg.File = PROGRAM_OUTPUT; - vpi->DstReg.Index = VERT_RESULT_HPOS; - vpi->DstReg.WriteMask = WRITEMASK_XYZW; - vpi->DstReg.CondMask = COND_TR; - - vpi->SrcReg[0].File = PROGRAM_TEMPORARY; - vpi->SrcReg[0].Index = temp_index; - vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW; - - ++vpi; - - vpi->Opcode = OPCODE_MOV; - - vpi->DstReg.File = PROGRAM_OUTPUT; - vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id; - vpi->DstReg.WriteMask = WRITEMASK_XYZW; - vpi->DstReg.CondMask = COND_TR; - - vpi->SrcReg[0].File = PROGRAM_TEMPORARY; - vpi->SrcReg[0].Index = temp_index; - vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW; - - ++vpi; - - vpi->Opcode = OPCODE_END; -} - -static void pos_as_texcoord(struct gl_program *prog, int tex_id) -{ - struct prog_instruction *vpi; - GLuint tempregi = prog->NumTemporaries; - - prog->NumTemporaries++; - - for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) { - if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_HPOS) { - vpi->DstReg.File = PROGRAM_TEMPORARY; - vpi->DstReg.Index = tempregi; - } - } - - insert_wpos(prog, tempregi, tex_id); - - prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id); -} - -/** - * The fogcoord attribute is special in that only the first component - * is relevant, and the remaining components are always fixed (when read - * from by the fragment program) to yield an X001 pattern. - * - * We need to enforce this either in the vertex program or in the fragment - * program, and this code chooses not to enforce it in the vertex program. - * This is slightly cheaper, as long as the fragment program does not use - * weird swizzles. - * - * And it seems that usually, weird swizzles are not used, so... - * - * See also the counterpart rewriting for fragment programs. - */ -static void fog_as_texcoord(struct gl_program *prog, int tex_id) -{ - struct prog_instruction *vpi; - - vpi = prog->Instructions; - while (vpi->Opcode != OPCODE_END) { - if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_FOGC) { - vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id; - vpi->DstReg.WriteMask = WRITEMASK_X; - } - - ++vpi; - } - - prog->OutputsWritten &= ~(1 << VERT_RESULT_FOGC); - prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id); -} - -static int translateABS(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - - inst = &prog->Instructions[pos]; - - inst->Opcode = OPCODE_MAX; - inst->SrcReg[1] = inst->SrcReg[0]; - inst->SrcReg[1].Negate ^= NEGATE_XYZW; - - return 0; -} - -static int translateDP3(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - - inst = &prog->Instructions[pos]; - - inst->Opcode = OPCODE_DP4; - inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - return 0; -} - -static int translateDPH(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - - inst = &prog->Instructions[pos]; - - inst->Opcode = OPCODE_DP4; - inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); - - return 0; -} - -static int translateFLR(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - struct prog_dst_register dst; - int tmp_idx; - - tmp_idx = prog->NumTemporaries++; - - _mesa_insert_instructions(prog, pos + 1, 1); - - inst = &prog->Instructions[pos]; - dst = inst->DstReg; - - inst->Opcode = OPCODE_FRC; - inst->DstReg.File = PROGRAM_TEMPORARY; - inst->DstReg.Index = tmp_idx; - ++inst; - - inst->Opcode = OPCODE_ADD; - inst->DstReg = dst; - inst->SrcReg[0] = (inst-1)->SrcReg[0]; - inst->SrcReg[1].File = PROGRAM_TEMPORARY; - inst->SrcReg[1].Index = tmp_idx; - inst->SrcReg[1].Negate = NEGATE_XYZW; - - return 1; -} - -static int translateSUB(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - - inst = &prog->Instructions[pos]; - - inst->Opcode = OPCODE_ADD; - inst->SrcReg[1].Negate ^= NEGATE_XYZW; - - return 0; -} - -static int translateSWZ(struct gl_program *prog, int pos) -{ - prog->Instructions[pos].Opcode = OPCODE_MOV; - - return 0; -} - -static int translateXPD(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - int tmp_idx; - - tmp_idx = prog->NumTemporaries++; - - _mesa_insert_instructions(prog, pos + 1, 1); - - inst = &prog->Instructions[pos]; - - *(inst+1) = *inst; - - inst->Opcode = OPCODE_MUL; - inst->DstReg.File = PROGRAM_TEMPORARY; - inst->DstReg.Index = tmp_idx; - inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W); - inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W); - ++inst; - - inst->Opcode = OPCODE_MAD; - inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W); - inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W); - inst->SrcReg[1].Negate ^= NEGATE_XYZW; - inst->SrcReg[2].File = PROGRAM_TEMPORARY; - inst->SrcReg[2].Index = tmp_idx; - - return 1; -} - -static void translateInsts(struct gl_program *prog) -{ - struct prog_instruction *inst; - int i; - - for (i = 0; i < prog->NumInstructions; ++i) { - inst = &prog->Instructions[i]; - - switch (inst->Opcode) { - case OPCODE_ABS: - i += translateABS(prog, i); - break; - case OPCODE_DP3: - i += translateDP3(prog, i); - break; - case OPCODE_DPH: - i += translateDPH(prog, i); - break; - case OPCODE_FLR: - i += translateFLR(prog, i); - break; - case OPCODE_SUB: - i += translateSUB(prog, i); - break; - case OPCODE_SWZ: - i += translateSWZ(prog, i); - break; - case OPCODE_XPD: - i += translateXPD(prog, i); - break; - default: - break; - } - } -} - -#define ADD_OUTPUT(fp_attr, vp_result) \ - do { \ - if ((FpReads & (1 << (fp_attr))) && !(prog->OutputsWritten & (1 << (vp_result)))) { \ - OutputsAdded |= 1 << (vp_result); \ - count++; \ - } \ - } while (0) - -static void addArtificialOutputs(GLcontext *ctx, struct gl_program *prog) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - GLuint OutputsAdded, FpReads; - int i, count; - - OutputsAdded = 0; - count = 0; - FpReads = r300->selected_fp->InputsRead; - - ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0); - ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1); - - for (i = 0; i < 7; ++i) { - ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i); - } - - /* Some outputs may be artificially added, to match the inputs of the fragment program. - * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by - * vertex program are undefined, so just use MOV [vertex_result], CONST[0] - */ - if (count > 0) { - struct prog_instruction *inst; - - _mesa_insert_instructions(prog, prog->NumInstructions - 1, count); - inst = &prog->Instructions[prog->NumInstructions - 1 - count]; - - for (i = 0; i < VERT_RESULT_MAX; ++i) { - if (OutputsAdded & (1 << i)) { - inst->Opcode = OPCODE_MOV; - - inst->DstReg.File = PROGRAM_OUTPUT; - inst->DstReg.Index = i; - inst->DstReg.WriteMask = WRITEMASK_XYZW; - inst->DstReg.CondMask = COND_TR; - - inst->SrcReg[0].File = PROGRAM_CONSTANT; - inst->SrcReg[0].Index = 0; - inst->SrcReg[0].Swizzle = SWIZZLE_XYZW; - - ++inst; - } - } - - prog->OutputsWritten |= OutputsAdded; - } -} - -#undef ADD_OUTPUT - -static void nqssadceInit(struct nqssadce_state* s) -{ - r300ContextPtr r300 = (r300ContextPtr)(s->UserData); - GLuint fp_reads; - - fp_reads = r300->selected_fp->InputsRead; - { - if (fp_reads & FRAG_BIT_COL0) { - s->Outputs[VERT_RESULT_COL0].Sourced = WRITEMASK_XYZW; - s->Outputs[VERT_RESULT_BFC0].Sourced = WRITEMASK_XYZW; - } - - if (fp_reads & FRAG_BIT_COL1) { - s->Outputs[VERT_RESULT_COL1].Sourced = WRITEMASK_XYZW; - s->Outputs[VERT_RESULT_BFC1].Sourced = WRITEMASK_XYZW; - } - } - - { - int i; - for (i = 0; i < 8; ++i) { - if (fp_reads & FRAG_BIT_TEX(i)) { - s->Outputs[VERT_RESULT_TEX0 + i].Sourced = WRITEMASK_XYZW; - } - } - } - - s->Outputs[VERT_RESULT_HPOS].Sourced = WRITEMASK_XYZW; - if (s->Program->OutputsWritten & (1 << VERT_RESULT_PSIZ)) - s->Outputs[VERT_RESULT_PSIZ].Sourced = WRITEMASK_X; -} - -static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg) -{ - (void) opcode; - (void) reg; - - return GL_TRUE; -} - static struct r300_vertex_program *build_program(GLcontext *ctx, - struct r300_vertex_program_key *wanted_key, + struct r300_vertex_program_external_state *wanted_key, const struct gl_vertex_program *mesa_vp) { - r300ContextPtr r300 = R300_CONTEXT(ctx); struct r300_vertex_program *vp; - struct gl_vertex_program * glvp = (struct gl_vertex_program *) _mesa_clone_program(ctx, &mesa_vp->Base); - struct gl_program *prog; + struct r300_vertex_program_compiler compiler; vp = _mesa_calloc(sizeof(*vp)); - vp->Base = glvp; + vp->Base = (struct gl_vertex_program *) _mesa_clone_program(ctx, &mesa_vp->Base); _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key)); - prog = &glvp->Base; - - if (RADEON_DEBUG & DEBUG_VERTS) { - fprintf(stderr, "Initial vertex program:\n"); - _mesa_print_program(prog); - fflush(stdout); - } - - if (glvp->IsPositionInvariant) { - _mesa_insert_mvp_code(ctx, glvp); - } - - if (r300->selected_fp->code.wpos_attr != FRAG_ATTRIB_MAX) { - pos_as_texcoord(&glvp->Base, r300->selected_fp->code.wpos_attr - FRAG_ATTRIB_TEX0); - } - - if (r300->selected_fp->code.fog_attr != FRAG_ATTRIB_MAX) { - fog_as_texcoord(&glvp->Base, r300->selected_fp->code.fog_attr - FRAG_ATTRIB_TEX0); - } - - addArtificialOutputs(ctx, prog); + rc_init(&compiler.Base); + compiler.Base.Debug = (RADEON_DEBUG & DEBUG_VERTS) ? GL_TRUE : GL_FALSE; - translateInsts(prog); + compiler.code = &vp->code; + compiler.state = vp->key; + compiler.program = vp->Base; - if (RADEON_DEBUG & DEBUG_VERTS) { - fprintf(stderr, "Vertex program after native rewrite:\n"); - _mesa_print_program(prog); + if (compiler.Base.Debug) { + fprintf(stderr, "Initial vertex program:\n"); + _mesa_print_program(compiler.program); fflush(stdout); } - { - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadceInit, - .IsNativeSwizzle = &swizzleIsNative, - .BuildSwizzle = NULL - }; - radeonNqssaDce(prog, &nqssadce, r300); - - /* We need this step for reusing temporary registers */ - _mesa_optimize_program(ctx, prog); - - if (RADEON_DEBUG & DEBUG_VERTS) { - fprintf(stderr, "Vertex program after NQSSADCE:\n"); - _mesa_print_program(prog); - fflush(stdout); - } - } - - assert(prog->NumInstructions); - { - struct prog_instruction *inst; - int max, i, tmp; - - inst = prog->Instructions; - max = -1; - while (inst->Opcode != OPCODE_END) { - tmp = _mesa_num_inst_src_regs(inst->Opcode); - for (i = 0; i < tmp; ++i) { - if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) { - if ((int) inst->SrcReg[i].Index > max) { - max = inst->SrcReg[i].Index; - } - } - } - - if (_mesa_num_inst_dst_regs(inst->Opcode)) { - if (inst->DstReg.File == PROGRAM_TEMPORARY) { - if ((int) inst->DstReg.Index > max) { - max = inst->DstReg.Index; - } - } - } - ++inst; - } - - /* We actually want highest index of used temporary register, - * not the number of temporaries used. - * These values aren't always the same. - */ - vp->num_temporaries = max + 1; + if (mesa_vp->IsPositionInvariant) { + _mesa_insert_mvp_code(ctx, (struct gl_vertex_program *)compiler.program); } - translate_vertex_program(vp, &glvp->Base); + if (!r3xx_compile_vertex_program(&compiler, ctx)) + vp->error = GL_TRUE; - vp->InputsRead = glvp->Base.InputsRead; - vp->OutputsWritten = glvp->Base.OutputsWritten; + rc_destroy(&compiler.Base); return vp; } @@ -1639,7 +136,7 @@ static struct r300_vertex_program *build_program(GLcontext *ctx, struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); - struct r300_vertex_program_key wanted_key = { 0 }; + struct r300_vertex_program_external_state wanted_key = { 0 }; struct r300_vertex_program_cont *vpc; struct r300_vertex_program *vp; @@ -1669,7 +166,7 @@ struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx) if(_nc>_p->vpu.count)_p->vpu.count=_nc; \ } while(0) -static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_shader_hw_code *code) +static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_program_code *code) { int i; @@ -1717,11 +214,11 @@ void r300SetupVertexProgram(r300ContextPtr rmesa) bump_vpu_count(rmesa->hw.vpp.cmd, param_count); param_count /= 4; - r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->hw_code)); - inst_count = (prog->hw_code.length / 4) - 1; + r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->code)); + inst_count = (prog->code.length / 4) - 1; - r300VapCntl(rmesa, _mesa_bitcount(prog->InputsRead), - _mesa_bitcount(prog->OutputsWritten), prog->num_temporaries); + r300VapCntl(rmesa, _mesa_bitcount(prog->code.InputsRead), + _mesa_bitcount(prog->code.OutputsWritten), prog->code.num_temporaries); R300_STATECHANGE(rmesa, pvs); rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) | diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.h b/src/mesa/drivers/dri/r300/r300_vertprog.h index 896699ffe2e..ccec896be40 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.h +++ b/src/mesa/drivers/dri/r300/r300_vertprog.h @@ -3,34 +3,6 @@ #include "r300_reg.h" -#define PVS_OP_DST_OPERAND(opcode, math_inst, macro_inst, reg_index, reg_writemask, reg_class) \ - (((opcode & PVS_DST_OPCODE_MASK) << PVS_DST_OPCODE_SHIFT) \ - | ((math_inst & PVS_DST_MATH_INST_MASK) << PVS_DST_MATH_INST_SHIFT) \ - | ((macro_inst & PVS_DST_MACRO_INST_MASK) << PVS_DST_MACRO_INST_SHIFT) \ - | ((reg_index & PVS_DST_OFFSET_MASK) << PVS_DST_OFFSET_SHIFT) \ - | ((reg_writemask & 0xf) << PVS_DST_WE_X_SHIFT) /* X Y Z W */ \ - | ((reg_class & PVS_DST_REG_TYPE_MASK) << PVS_DST_REG_TYPE_SHIFT)) - -#define PVS_SRC_OPERAND(in_reg_index, comp_x, comp_y, comp_z, comp_w, reg_class, negate) \ - (((in_reg_index & PVS_SRC_OFFSET_MASK) << PVS_SRC_OFFSET_SHIFT) \ - | ((comp_x & PVS_SRC_SWIZZLE_X_MASK) << PVS_SRC_SWIZZLE_X_SHIFT) \ - | ((comp_y & PVS_SRC_SWIZZLE_Y_MASK) << PVS_SRC_SWIZZLE_Y_SHIFT) \ - | ((comp_z & PVS_SRC_SWIZZLE_Z_MASK) << PVS_SRC_SWIZZLE_Z_SHIFT) \ - | ((comp_w & PVS_SRC_SWIZZLE_W_MASK) << PVS_SRC_SWIZZLE_W_SHIFT) \ - | ((negate & 0xf) << PVS_SRC_MODIFIER_X_SHIFT) /* X Y Z W */ \ - | ((reg_class & PVS_SRC_REG_TYPE_MASK) << PVS_SRC_REG_TYPE_SHIFT)) - -#if 1 - -#define VSF_FLAG_X 1 -#define VSF_FLAG_Y 2 -#define VSF_FLAG_Z 4 -#define VSF_FLAG_W 8 -#define VSF_FLAG_XYZ (VSF_FLAG_X | VSF_FLAG_Y | VSF_FLAG_Z) -#define VSF_FLAG_ALL 0xf -#define VSF_FLAG_NONE 0 - -#endif void r300SetupVertexProgram(r300ContextPtr rmesa); diff --git a/src/mesa/shader/prog_instruction.h b/src/mesa/shader/prog_instruction.h index 40ad998f79d..39a221eeaba 100644 --- a/src/mesa/shader/prog_instruction.h +++ b/src/mesa/shader/prog_instruction.h @@ -133,6 +133,7 @@ #define NEGATE_Y 0x2 #define NEGATE_Z 0x4 #define NEGATE_W 0x8 +#define NEGATE_XYZ 0x7 #define NEGATE_XYZW 0xf #define NEGATE_NONE 0x0 /*@}*/ @@ -303,11 +304,11 @@ struct prog_dst_register * Condition code swizzle value. */ GLuint CondSwizzle:12; - + /** * Selects the condition code register to use for conditional destination * update masking. In NV_fragmnet_program or NV_vertex_program2 mode, only - * condition code register 0 is available. In NV_vertex_program3 mode, + * condition code register 0 is available. In NV_vertex_program3 mode, * condition code registers 0 and 1 are available. */ GLuint CondSrc:1; @@ -359,7 +360,7 @@ struct prog_instruction * NV_fragment_program, NV_fragment_program_option, NV_vertex_program3. */ GLuint SaturateMode:2; - + /** * Per-instruction selectable precision: FLOAT32, FLOAT16, FIXED12. * @@ -374,7 +375,7 @@ struct prog_instruction /*@{*/ /** Source texture unit. */ GLuint TexSrcUnit:5; - + /** Source texture target, one of TEXTURE_{1D,2D,3D,CUBE,RECT}_INDEX */ GLuint TexSrcTarget:3; -- 2.30.2