X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Ftnl%2Ft_vb_arbprogram.c;h=524472a7772d27fee17a622f5063e3cc5026401d;hb=18d1fdebebcb52e7fcf50e62c4c02862d173af51;hp=30364ceb7818637373c6a8b25537412be65f3aa5;hpb=3eec2901db92dd91d96203ab515296eaf4b32927;p=mesa.git diff --git a/src/mesa/tnl/t_vb_arbprogram.c b/src/mesa/tnl/t_vb_arbprogram.c index 30364ceb781..524472a7772 100644 --- a/src/mesa/tnl/t_vb_arbprogram.c +++ b/src/mesa/tnl/t_vb_arbprogram.c @@ -1,8 +1,8 @@ /* * Mesa 3-D graphics library - * Version: 6.5 + * Version: 6.5.1 * - * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. + * Copyright (C) 1999-2006 Brian Paul All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -38,22 +38,15 @@ #include "light.h" #include "program.h" #include "math/m_matrix.h" -#include "math/m_translate.h" #include "t_context.h" #include "t_pipeline.h" #include "t_vb_arbprogram.h" #include "tnl.h" +#include "program_instruction.h" #define DISASSEM 0 -/*--------------------------------------------------------------------------- */ - -struct opcode_info { - GLuint nr_args; - const char *string; - void (*print)( union instruction , const struct opcode_info * ); -}; struct compilation { GLuint reg_active; @@ -91,7 +84,10 @@ static GLfloat RoughApproxExp2(GLfloat t) static GLfloat RoughApproxPower(GLfloat x, GLfloat y) { - return RoughApproxExp2(y * RoughApproxLog2(x)); + if (x == 0.0 && y == 0.0) + return 1.0; /* spec requires this */ + else + return RoughApproxExp2(y * RoughApproxLog2(x)); } @@ -112,34 +108,57 @@ static GLfloat ApproxPower(GLfloat x, GLfloat y) return (GLfloat) _mesa_pow(x, y); } -static GLfloat rough_approx_log2_0_1(GLfloat x) + +/** + * Perform a reduced swizzle: + */ +static void do_RSW( struct arb_vp_machine *m, union instruction op ) { - return LOG2(x); -} + GLfloat *result = m->File[0][op.rsw.dst]; + const GLfloat *arg0 = m->File[op.rsw.file0][op.rsw.idx0]; + const GLuint swz = op.rsw.swz; + const GLuint neg = op.rsw.neg; + GLfloat tmp[4]; + /* Need a temporary to be correct in the case where result == arg0. + */ + COPY_4V(tmp, arg0); + result[0] = tmp[GET_SWZ(swz, 0)]; + result[1] = tmp[GET_SWZ(swz, 1)]; + result[2] = tmp[GET_SWZ(swz, 2)]; + result[3] = tmp[GET_SWZ(swz, 3)]; + if (neg) { + if (neg & 0x1) result[0] = -result[0]; + if (neg & 0x2) result[1] = -result[1]; + if (neg & 0x4) result[2] = -result[2]; + if (neg & 0x8) result[3] = -result[3]; + } +} /** - * Perform a reduced swizzle: + * Perform a full swizzle */ -static void do_RSW( struct arb_vp_machine *m, union instruction op ) +static void do_SWZ( struct arb_vp_machine *m, union instruction op ) { GLfloat *result = m->File[0][op.rsw.dst]; const GLfloat *arg0 = m->File[op.rsw.file0][op.rsw.idx0]; - GLuint swz = op.rsw.swz; - GLuint neg = op.rsw.neg; - GLfloat tmp[4]; + const GLuint swz = op.rsw.swz; + const GLuint neg = op.rsw.neg; + GLfloat tmp[6]; + tmp[4] = 0.0; + tmp[5] = 1.0; /* Need a temporary to be correct in the case where result == arg0. */ COPY_4V(tmp, arg0); - - result[0] = tmp[GET_RSW(swz, 0)]; - result[1] = tmp[GET_RSW(swz, 1)]; - result[2] = tmp[GET_RSW(swz, 2)]; - result[3] = tmp[GET_RSW(swz, 3)]; - + + result[0] = tmp[GET_SWZ(swz, 0)]; + result[1] = tmp[GET_SWZ(swz, 1)]; + result[2] = tmp[GET_SWZ(swz, 2)]; + result[3] = tmp[GET_SWZ(swz, 3)]; + if (neg) { if (neg & 0x1) result[0] = -result[0]; if (neg & 0x2) result[1] = -result[1]; @@ -163,10 +182,10 @@ static void do_MSK( struct arb_vp_machine *m, union instruction op ) GLfloat *dst = m->File[0][op.msk.dst]; const GLfloat *arg = m->File[op.msk.file][op.msk.idx]; - if (op.msk.mask & 0x1) dst[0] = arg[0]; - if (op.msk.mask & 0x2) dst[1] = arg[1]; - if (op.msk.mask & 0x4) dst[2] = arg[2]; - if (op.msk.mask & 0x8) dst[3] = arg[3]; + if (op.msk.mask & WRITEMASK_X) dst[0] = arg[0]; + if (op.msk.mask & WRITEMASK_Y) dst[1] = arg[1]; + if (op.msk.mask & WRITEMASK_Z) dst[2] = arg[2]; + if (op.msk.mask & WRITEMASK_W) dst[3] = arg[3]; } @@ -284,13 +303,13 @@ static void do_EXP( struct arb_vp_machine *m, union instruction op ) { GLfloat *result = m->File[0][op.alu.dst]; const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; - GLfloat tmp = arg0[0]; - GLfloat flr_tmp = FLOORF(tmp); - GLfloat frac_tmp = tmp - flr_tmp; + const GLfloat tmp = arg0[0]; + const GLfloat flr_tmp = FLOORF(tmp); + const GLfloat frac_tmp = tmp - flr_tmp; result[0] = LDEXPF(1.0, (int)flr_tmp); result[1] = frac_tmp; - result[2] = LDEXPF(rough_approx_log2_0_1(frac_tmp), (int)flr_tmp); + result[2] = RoughApproxExp2(tmp); result[3] = 1.0F; } @@ -333,20 +352,17 @@ static void do_LIT( struct arb_vp_machine *m, union instruction op ) { GLfloat *result = m->File[0][op.alu.dst]; const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; - GLfloat tmp[4]; + GLfloat tmp[4]; /* use temp in case arg0 == result register */ tmp[0] = 1.0; - tmp[1] = 0.0; - tmp[2] = 0.0; - tmp[3] = 1.0; - + tmp[1] = arg0[0]; if (arg0[0] > 0.0) { - tmp[1] = arg0[0]; - - if (arg0[1] > 0.0) { - tmp[2] = RoughApproxPower(arg0[1], arg0[3]); - } + tmp[2] = RoughApproxPower(arg0[1], arg0[3]); + } + else { + tmp[2] = 0.0; } + tmp[3] = 1.0; COPY_4V(result, tmp); } @@ -358,9 +374,9 @@ static void do_LOG( struct arb_vp_machine *m, union instruction op ) { GLfloat *result = m->File[0][op.alu.dst]; const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; - GLfloat tmp = FABSF(arg0[0]); + const GLfloat tmp = FABSF(arg0[0]); int exponent; - GLfloat mantissa = FREXPF(tmp, &exponent); + const GLfloat mantissa = FREXPF(tmp, &exponent); result[0] = (GLfloat) (exponent - 1); result[1] = 2.0 * mantissa; /* map [.5, 1) -> [1, 2) */ @@ -432,7 +448,7 @@ static void do_POW( struct arb_vp_machine *m, union instruction op ) static void do_REL( struct arb_vp_machine *m, union instruction op ) { GLfloat *result = m->File[0][op.alu.dst]; - GLuint idx = (op.alu.idx0 + (GLint)m->File[0][REG_ADDR][0]) & (MAX_NV_VERTEX_PROGRAM_PARAMS-1); + const GLuint idx = (op.alu.idx0 + (GLint)m->File[0][REG_ADDR][0]) & (MAX_NV_VERTEX_PROGRAM_PARAMS-1); const GLfloat *arg0 = m->File[op.alu.file0][idx]; result[0] = arg0[0]; @@ -526,10 +542,10 @@ static void do_NOP( struct arb_vp_machine *m, union instruction op ) static void print_mask( GLuint mask ) { _mesa_printf("."); - if (mask&0x1) _mesa_printf("x"); - if (mask&0x2) _mesa_printf("y"); - if (mask&0x4) _mesa_printf("z"); - if (mask&0x8) _mesa_printf("w"); + if (mask & WRITEMASK_X) _mesa_printf("x"); + if (mask & WRITEMASK_Y) _mesa_printf("y"); + if (mask & WRITEMASK_Z) _mesa_printf("z"); + if (mask & WRITEMASK_W) _mesa_printf("w"); } static void print_reg( GLuint file, GLuint reg ) @@ -564,43 +580,63 @@ static void print_reg( GLuint file, GLuint reg ) } -static void print_RSW( union instruction op, const struct opcode_info *info ) +static void print_RSW( union instruction op ) +{ + GLuint swz = op.rsw.swz; + GLuint neg = op.rsw.neg; + GLuint i; + + _mesa_printf("RSW "); + print_reg(0, op.rsw.dst); + _mesa_printf(", "); + print_reg(op.rsw.file0, op.rsw.idx0); + _mesa_printf("."); + for (i = 0; i < 4; i++, swz >>= 3) { + const char *cswz = "xyzw01"; + if (neg & (1<string); + _mesa_printf("SWZ "); print_reg(0, op.rsw.dst); _mesa_printf(", "); print_reg(op.rsw.file0, op.rsw.idx0); _mesa_printf("."); - for (i = 0; i < 4; i++, swz >>= 2) { - const char *cswz = "xyzw"; + for (i = 0; i < 4; i++, swz >>= 3) { + const char *cswz = "xyzw01"; if (neg & (1<string); + _mesa_printf("%s ", _mesa_opcode_string((enum prog_opcode) op.alu.opcode)); print_reg(0, op.alu.dst); _mesa_printf(", "); print_reg(op.alu.file0, op.alu.idx0); - if (info->nr_args > 1) { + if (_mesa_num_inst_src_regs((enum prog_opcode) op.alu.opcode) > 1) { _mesa_printf(", "); print_reg(op.alu.file1, op.alu.idx1); } _mesa_printf("\n"); } -static void print_MSK( union instruction op, const struct opcode_info *info ) +static void print_MSK( union instruction op ) { - _mesa_printf("%s ", info->string); + _mesa_printf("MSK "); print_reg(0, op.msk.dst); print_mask(op.msk.mask); _mesa_printf(", "); @@ -608,64 +644,88 @@ static void print_MSK( union instruction op, const struct opcode_info *info ) _mesa_printf("\n"); } - -static void print_NOP( union instruction op, const struct opcode_info *info ) +static void print_NOP( union instruction op ) { } -#define NOP 0 -#define ALU 1 -#define SWZ 2 - -static const struct opcode_info opcode_info[] = -{ - { 1, "ABS", print_ALU }, - { 2, "ADD", print_ALU }, - { 1, "ARL", print_NOP }, - { 2, "DP3", print_ALU }, - { 2, "DP4", print_ALU }, - { 2, "DPH", print_ALU }, - { 2, "DST", print_ALU }, - { 0, "END", print_NOP }, - { 1, "EX2", print_ALU }, - { 1, "EXP", print_ALU }, - { 1, "FLR", print_ALU }, - { 1, "FRC", print_ALU }, - { 1, "LG2", print_ALU }, - { 1, "LIT", print_ALU }, - { 1, "LOG", print_ALU }, - { 3, "MAD", print_NOP }, - { 2, "MAX", print_ALU }, - { 2, "MIN", print_ALU }, - { 1, "MOV", print_ALU }, - { 2, "MUL", print_ALU }, - { 2, "POW", print_ALU }, - { 1, "PRT", print_ALU }, /* PRINT */ - { 1, "RCC", print_NOP }, - { 1, "RCP", print_ALU }, - { 1, "RSQ", print_ALU }, - { 2, "SGE", print_ALU }, - { 2, "SLT", print_ALU }, - { 2, "SUB", print_ALU }, - { 1, "SWZ", print_NOP }, - { 2, "XPD", print_ALU }, - { 1, "RSW", print_RSW }, - { 2, "MSK", print_MSK }, - { 1, "REL", print_ALU }, -}; - -void _tnl_disassem_vba_insn( union instruction op ) +void +_tnl_disassem_vba_insn( union instruction op ) { - const struct opcode_info *info = &opcode_info[op.alu.opcode]; - info->print( op, info ); + switch (op.alu.opcode) { + case OPCODE_ABS: + case OPCODE_ADD: + case OPCODE_DP3: + case OPCODE_DP4: + case OPCODE_DPH: + case OPCODE_DST: + case OPCODE_EX2: + case OPCODE_EXP: + case OPCODE_FLR: + case OPCODE_FRC: + case OPCODE_LG2: + case OPCODE_LIT: + case OPCODE_LOG: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_MOV: + case OPCODE_MUL: + case OPCODE_POW: + case OPCODE_PRINT: + case OPCODE_RCP: + case OPCODE_RSQ: + case OPCODE_SGE: + case OPCODE_SLT: + case OPCODE_SUB: + case OPCODE_XPD: + print_ALU(op); + break; + case OPCODE_ARA: + case OPCODE_ARL: + case OPCODE_ARL_NV: + case OPCODE_ARR: + case OPCODE_BRA: + case OPCODE_CAL: + case OPCODE_END: + case OPCODE_MAD: + case OPCODE_POPA: + case OPCODE_PUSHA: + case OPCODE_RCC: + case OPCODE_RET: + case OPCODE_SSG: + print_NOP(op); + break; + case OPCODE_SWZ: + print_SWZ(op); + break; + case RSW: + print_RSW(op); + break; + case MSK: + print_MSK(op); + break; + case REL: + print_ALU(op); + break; + default: + _mesa_problem(NULL, "Bad opcode in _tnl_disassem_vba_insn()"); + } } -static void (* const opcode_func[])(struct arb_vp_machine *, union instruction) = +static void (* const opcode_func[MAX_OPCODE+3])(struct arb_vp_machine *, union instruction) = { do_ABS, do_ADD, - do_NOP, + do_NOP,/*ARA*/ + do_NOP,/*ARL*/ + do_NOP,/*ARL_NV*/ + do_NOP,/*ARR*/ + do_NOP,/*BRA*/ + do_NOP,/*CAL*/ + do_NOP,/*CMP*/ + do_NOP,/*COS*/ + do_NOP,/*DDX*/ + do_NOP,/*DDY*/ do_DP3, do_DP4, do_DPH, @@ -675,23 +735,54 @@ static void (* const opcode_func[])(struct arb_vp_machine *, union instruction) do_EXP, do_FLR, do_FRC, + do_NOP,/*KIL*/ + do_NOP,/*KIL_NV*/ do_LG2, do_LIT, do_LOG, - do_NOP, + do_NOP,/*LRP*/ + do_NOP,/*MAD*/ do_MAX, do_MIN, do_MOV, do_MUL, + do_NOP,/*PK2H*/ + do_NOP,/*PK2US*/ + do_NOP,/*PK4B*/ + do_NOP,/*PK4UB*/ do_POW, + do_NOP,/*POPA*/ do_PRT, - do_NOP, - do_RCP, + do_NOP,/*PUSHA*/ + do_NOP,/*RCC*/ + do_RCP,/*RCP*/ + do_NOP,/*RET*/ + do_NOP,/*RFL*/ do_RSQ, + do_NOP,/*SCS*/ + do_NOP,/*SEQ*/ + do_NOP,/*SFL*/ do_SGE, + do_NOP,/*SGT*/ + do_NOP,/*SIN*/ + do_NOP,/*SLE*/ do_SLT, + do_NOP,/*SNE*/ + do_NOP,/*SSG*/ + do_NOP,/*STR*/ do_SUB, - do_RSW, + do_SWZ,/*SWZ*/ + do_NOP,/*TEX*/ + do_NOP,/*TXB*/ + do_NOP,/*TXD*/ + do_NOP,/*TXL*/ + do_NOP,/*TXP*/ + do_NOP,/*TXP_NV*/ + do_NOP,/*UP2H*/ + do_NOP,/*UP2US*/ + do_NOP,/*UP4B*/ + do_NOP,/*UP4UB*/ + do_NOP,/*X2D*/ do_XPD, do_RSW, do_MSK, @@ -701,7 +792,7 @@ static void (* const opcode_func[])(struct arb_vp_machine *, union instruction) static union instruction *cvp_next_instruction( struct compilation *cp ) { union instruction *op = cp->csr++; - op->dword = 0; + _mesa_bzero(op, sizeof(*op)); return op; } @@ -773,35 +864,42 @@ static struct reg cvp_load_reg( struct compilation *cp, case PROGRAM_WRITE_ONLY: case PROGRAM_ADDRESS: default: + _mesa_problem(NULL, "Invalid register file %d in cvp_load_reg()"); assert(0); return tmpreg; /* can't happen */ } } static struct reg cvp_emit_arg( struct compilation *cp, - const struct vp_src_register *src, + const struct prog_src_register *src, GLuint arg ) { struct reg reg = cvp_load_reg( cp, src->File, src->Index, src->RelAddr, arg ); union instruction rsw, noop; - + /* Emit any necessary swizzling. */ - rsw.dword = 0; - rsw.rsw.neg = src->Negate ? WRITEMASK_XYZW : 0; - rsw.rsw.swz = ((GET_SWZ(src->Swizzle, 0) << 0) | - (GET_SWZ(src->Swizzle, 1) << 2) | - (GET_SWZ(src->Swizzle, 2) << 4) | - (GET_SWZ(src->Swizzle, 3) << 6)); - - noop.dword = 0; + _mesa_bzero(&rsw, sizeof(rsw)); + rsw.rsw.neg = src->NegateBase ? WRITEMASK_XYZW : 0; + + /* we're expecting 2-bit swizzles below... */ +#if 1 /* XXX THESE ASSERTIONS CURRENTLY FAIL DURING GLEAN TESTS! */ +/* hopefully no longer happens? */ + ASSERT(GET_SWZ(src->Swizzle, 0) < 4); + ASSERT(GET_SWZ(src->Swizzle, 1) < 4); + ASSERT(GET_SWZ(src->Swizzle, 2) < 4); + ASSERT(GET_SWZ(src->Swizzle, 3) < 4); +#endif + rsw.rsw.swz = src->Swizzle; + + _mesa_bzero(&noop, sizeof(noop)); noop.rsw.neg = 0; - noop.rsw.swz = RSW_NOOP; + noop.rsw.swz = SWIZZLE_NOOP; - if (rsw.dword != noop.dword) { + if (_mesa_memcmp(&rsw, &noop, sizeof(rsw)) !=0) { union instruction *op = cvp_next_instruction(cp); struct reg rsw_reg = cvp_make_reg(FILE_REG, REG_ARG0 + arg); - op->dword = rsw.dword; + *op = rsw; op->rsw.opcode = RSW; op->rsw.file0 = reg.file; op->rsw.idx0 = reg.idx; @@ -813,7 +911,7 @@ static struct reg cvp_emit_arg( struct compilation *cp, } static GLuint cvp_choose_result( struct compilation *cp, - const struct vp_dst_register *dst, + const struct prog_dst_register *dst, union instruction *fixup ) { GLuint mask = dst->WriteMask; @@ -844,76 +942,33 @@ static GLuint cvp_choose_result( struct compilation *cp, return REG_RES; } else { - fixup->dword = 0; + _mesa_bzero(fixup, sizeof(*fixup)); cp->reg_active |= 1 << idx; return idx; } } -static struct reg cvp_emit_rsw( struct compilation *cp, - GLuint dst, - struct reg src, - GLuint neg, - GLuint swz, - GLboolean force) -{ - struct reg retval; - - if (swz != RSW_NOOP || neg != 0) { - union instruction *op = cvp_next_instruction(cp); - op->rsw.opcode = RSW; - op->rsw.dst = dst; - op->rsw.file0 = src.file; - op->rsw.idx0 = src.idx; - op->rsw.neg = neg; - op->rsw.swz = swz; - - retval.file = FILE_REG; - retval.idx = dst; - return retval; - } - else if (force) { - /* Oops. Degenerate case: - */ - union instruction *op = cvp_next_instruction(cp); - op->alu.opcode = VP_OPCODE_MOV; - op->alu.dst = dst; - op->alu.file0 = src.file; - op->alu.idx0 = src.idx; - - retval.file = FILE_REG; - retval.idx = dst; - return retval; - } - else { - return src; - } -} - static void cvp_emit_inst( struct compilation *cp, - const struct vp_instruction *inst ) + const struct prog_instruction *inst ) { - const struct opcode_info *info = &opcode_info[inst->Opcode]; union instruction *op; union instruction fixup; struct reg reg[3]; - GLuint result, i; - - assert(sizeof(*op) == sizeof(GLuint)); + GLuint result, nr_args, i; /* Need to handle SWZ, ARL specially. */ switch (inst->Opcode) { /* Split into mul and add: */ - case VP_OPCODE_MAD: + case OPCODE_MAD: result = cvp_choose_result( cp, &inst->DstReg, &fixup ); for (i = 0; i < 3; i++) reg[i] = cvp_emit_arg( cp, &inst->SrcReg[i], REG_ARG0+i ); op = cvp_next_instruction(cp); - op->alu.opcode = VP_OPCODE_MUL; + op->alu.opcode = OPCODE_MUL; op->alu.file0 = reg[0].file; op->alu.idx0 = reg[0].idx; op->alu.file1 = reg[1].file; @@ -921,7 +976,7 @@ static void cvp_emit_inst( struct compilation *cp, op->alu.dst = REG_ARG0; op = cvp_next_instruction(cp); - op->alu.opcode = VP_OPCODE_ADD; + op->alu.opcode = OPCODE_ADD; op->alu.file0 = FILE_REG; op->alu.idx0 = REG_ARG0; op->alu.file1 = reg[2].file; @@ -930,82 +985,45 @@ static void cvp_emit_inst( struct compilation *cp, if (result == REG_RES) { op = cvp_next_instruction(cp); - op->dword = fixup.dword; + *op = fixup; } break; - case VP_OPCODE_ARL: + case OPCODE_ARL: reg[0] = cvp_emit_arg( cp, &inst->SrcReg[0], REG_ARG0 ); op = cvp_next_instruction(cp); - op->alu.opcode = VP_OPCODE_FLR; + op->alu.opcode = OPCODE_FLR; op->alu.dst = REG_ADDR; op->alu.file0 = reg[0].file; op->alu.idx0 = reg[0].idx; break; - case VP_OPCODE_SWZ: { - GLuint swz0 = 0, swz1 = 0; - GLuint neg0 = 0, neg1 = 0; - GLuint mask = 0; - - /* Translate 3-bit-per-element swizzle into two 2-bit swizzles, - * one from the source register the other from a constant - * {0,0,0,1}. - */ - for (i = 0; i < 4; i++) { - GLuint swzelt = GET_SWZ(inst->SrcReg[0].Swizzle, i); - if (swzelt >= SWIZZLE_ZERO) { - neg0 |= inst->SrcReg[0].Negate & (1<SrcReg[0].Negate & (1<DstReg, &fixup ); - reg[0].file = FILE_REG; - reg[0].idx = REG_ID; - reg[1] = cvp_emit_arg( cp, &inst->SrcReg[0], REG_ARG0 ); - - if (mask == WRITEMASK_XYZW) { - cvp_emit_rsw(cp, result, reg[0], neg0, swz0, GL_TRUE); - - } - else if (mask == 0) { - cvp_emit_rsw(cp, result, reg[1], neg1, swz1, GL_TRUE); - } - else { - cvp_emit_rsw(cp, result, reg[0], neg0, swz0, GL_TRUE); - reg[1] = cvp_emit_rsw(cp, REG_ARG0, reg[1], neg1, swz1, GL_FALSE); - - op = cvp_next_instruction(cp); - op->msk.opcode = MSK; - op->msk.dst = result; - op->msk.file = reg[1].file; - op->msk.idx = reg[1].idx; - op->msk.mask = mask; - } + reg[0] = cvp_load_reg( cp, inst->SrcReg[0].File, + inst->SrcReg[0].Index, inst->SrcReg[0].RelAddr, REG_ARG0 ); + op = cvp_next_instruction(cp); + op->rsw.opcode = inst->Opcode; + op->rsw.file0 = reg[0].file; + op->rsw.idx0 = reg[0].idx; + op->rsw.dst = result; + op->rsw.swz = inst->SrcReg[0].Swizzle; + op->rsw.neg = inst->SrcReg[0].NegateBase; if (result == REG_RES) { op = cvp_next_instruction(cp); - op->dword = fixup.dword; + *op = fixup; } break; - } - - case VP_OPCODE_END: - break; default: result = cvp_choose_result( cp, &inst->DstReg, &fixup ); - for (i = 0; i < info->nr_args; i++) + nr_args = _mesa_num_inst_src_regs(inst->Opcode); + for (i = 0; i < nr_args; i++) reg[i] = cvp_emit_arg( cp, &inst->SrcReg[i], REG_ARG0 + i ); op = cvp_next_instruction(cp); @@ -1018,27 +1036,27 @@ static void cvp_emit_inst( struct compilation *cp, if (result == REG_RES) { op = cvp_next_instruction(cp); - op->dword = fixup.dword; - } + *op = fixup; + } break; } } -static void free_tnl_data( struct vertex_program *program ) +static void free_tnl_data( struct gl_vertex_program *program ) { - struct tnl_compiled_program *p = program->TnlData; + struct tnl_compiled_program *p = (struct tnl_compiled_program *) program->TnlData; if (p->compiled_func) _mesa_free((void *)p->compiled_func); _mesa_free(p); program->TnlData = NULL; } -static void compile_vertex_program( struct vertex_program *program, +static void compile_vertex_program( struct gl_vertex_program *program, GLboolean try_codegen ) { struct compilation cp; struct tnl_compiled_program *p = CALLOC_STRUCT(tnl_compiled_program); - GLuint i; + GLint i; if (program->TnlData) free_tnl_data( program ); @@ -1054,7 +1072,7 @@ static void compile_vertex_program( struct vertex_program *program, /* Compile instructions: */ for (i = 0; i < program->Base.NumInstructions; i++) { - cvp_emit_inst(&cp, &program->Instructions[i]); + cvp_emit_inst(&cp, &program->Base.Instructions[i]); } /* Finish up: @@ -1128,9 +1146,9 @@ static void userclip( GLcontext *ctx, } -static GLboolean do_ndc_cliptest( struct arb_vp_machine *m ) +static GLboolean +do_ndc_cliptest(GLcontext *ctx, struct arb_vp_machine *m) { - GLcontext *ctx = m->ctx; TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = m->VB; @@ -1138,7 +1156,7 @@ static GLboolean do_ndc_cliptest( struct arb_vp_machine *m ) * the clipmask. */ m->ormask = 0; - m->andmask = CLIP_ALL_BITS; + m->andmask = CLIP_FRUSTUM_BITS; if (tnl->NeedNdcCoords) { VB->NdcPtr = @@ -1164,7 +1182,8 @@ static GLboolean do_ndc_cliptest( struct arb_vp_machine *m ) /* Test userclip planes. This contributes to VB->ClipMask. */ - if (ctx->Transform.ClipPlanesEnabled && !ctx->VertexProgram._Enabled) { + if (ctx->Transform.ClipPlanesEnabled && (!ctx->VertexProgram._Enabled || + ctx->VertexProgram.Current->IsPositionInvariant)) { userclip( ctx, VB->ClipPtr, m->clipmask, @@ -1202,19 +1221,25 @@ static INLINE void call_func( struct tnl_compiled_program *p, static GLboolean run_arb_vertex_program(GLcontext *ctx, struct tnl_pipeline_stage *stage) { - struct vertex_program *program = (ctx->VertexProgram._Enabled ? - ctx->VertexProgram.Current : - ctx->_TnlProgram); + const struct gl_vertex_program *program; struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; struct arb_vp_machine *m = ARB_VP_MACHINE(stage); struct tnl_compiled_program *p; - GLuint i, j, outputs; + GLuint i, j; + GLbitfield outputs; + + if (ctx->ShaderObjects._VertexShaderPresent) + return GL_TRUE; + program = ctx->VertexProgram._Enabled ? ctx->VertexProgram.Current : NULL; + if (!program && ctx->_MaintainTnlProgram) { + program = ctx->_TnlProgram; + } if (!program || program->IsNVProgram) return GL_TRUE; - if (program->Parameters) { - _mesa_load_state_parameters(ctx, program->Parameters); + if (program->Base.Parameters) { + _mesa_load_state_parameters(ctx, program->Base.Parameters); } p = (struct tnl_compiled_program *)program->TnlData; @@ -1223,8 +1248,9 @@ run_arb_vertex_program(GLcontext *ctx, struct tnl_pipeline_stage *stage) m->nr_inputs = m->nr_outputs = 0; - for (i = 0; i < _TNL_ATTRIB_MAX; i++) { - if (program->InputsRead & (1<Base.InputsRead & (1<IsPositionInvariant)) { GLuint j = m->nr_inputs++; m->input[j].idx = i; m->input[j].data = (GLfloat *)m->VB->AttribPtr[i]->data; @@ -1234,8 +1260,9 @@ run_arb_vertex_program(GLcontext *ctx, struct tnl_pipeline_stage *stage) } } - for (i = 0; i < 15; i++) { - if (program->OutputsWritten & (1<Base.OutputsWritten & (1 << i) || + (i == VERT_RESULT_HPOS && program->IsPositionInvariant)) { GLuint j = m->nr_outputs++; m->output[j].idx = i; m->output[j].data = (GLfloat *)m->attribs[i].data; @@ -1258,16 +1285,27 @@ run_arb_vertex_program(GLcontext *ctx, struct tnl_pipeline_stage *stage) STRIDE_F(m->input[j].data, m->input[j].stride); } + if (p->compiled_func) { call_func( p, m ); } else { + GLint j; for (j = 0; j < p->nr_instructions; j++) { union instruction inst = p->instructions[j]; opcode_func[inst.alu.opcode]( m, inst ); } } + /* If the program is position invariant, multiply the input position + * by the MVP matrix and store in the vertex position result register. + */ + if (program->IsPositionInvariant) { + TRANSFORM_POINT( m->File[0][REG_OUT0+0], + ctx->_ModelProjectMatrix.m, + m->File[0][REG_IN0+0]); + } + for (j = 0; j < m->nr_outputs; j++) { GLuint idx = REG_OUT0 + m->output[j].idx; m->output[j].data[0] = m->File[0][idx][0]; @@ -1276,6 +1314,7 @@ run_arb_vertex_program(GLcontext *ctx, struct tnl_pipeline_stage *stage) m->output[j].data[3] = m->File[0][idx][3]; m->output[j].data += 4; } + } /* Setup the VB pointers so that the next pipeline stages get @@ -1291,11 +1330,16 @@ run_arb_vertex_program(GLcontext *ctx, struct tnl_pipeline_stage *stage) VB->ClipPtr = &m->attribs[VERT_RESULT_HPOS]; VB->ClipPtr->count = VB->Count; - outputs = program->OutputsWritten; + /* XXX There seems to be confusion between using the VERT_ATTRIB_* + * values vs _TNL_ATTRIB_* tokens here: + */ + outputs = program->Base.OutputsWritten; + if (program->IsPositionInvariant) + outputs |= (1<ColorPtr[0] = &m->attribs[VERT_RESULT_COL0]; - VB->AttribPtr[VERT_ATTRIB_COLOR0] = VB->ColorPtr[0]; + VB->ColorPtr[0] = + VB->AttribPtr[VERT_ATTRIB_COLOR0] = &m->attribs[VERT_RESULT_COL0]; } if (outputs & (1<SecondaryColorPtr[0] = &m->attribs[VERT_RESULT_COL1]; - VB->AttribPtr[VERT_ATTRIB_COLOR1] = VB->SecondaryColorPtr[0]; + VB->SecondaryColorPtr[0] = + VB->AttribPtr[VERT_ATTRIB_COLOR1] = &m->attribs[VERT_RESULT_COL1]; } if (outputs & (1<FogCoordPtr = &m->attribs[VERT_RESULT_FOGC]; - VB->AttribPtr[VERT_ATTRIB_FOG] = VB->FogCoordPtr; + VB->FogCoordPtr = + VB->AttribPtr[VERT_ATTRIB_FOG] = &m->attribs[VERT_RESULT_FOGC]; } if (outputs & (1<PointSizePtr = &m->attribs[VERT_RESULT_PSIZ]; VB->AttribPtr[_TNL_ATTRIB_POINTSIZE] = &m->attribs[VERT_RESULT_PSIZ]; } - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) { if (outputs & (1<<(VERT_RESULT_TEX0+i))) { - VB->TexCoordPtr[i] = &m->attribs[VERT_RESULT_TEX0 + i]; - VB->AttribPtr[VERT_ATTRIB_TEX0+i] = VB->TexCoordPtr[i]; + VB->TexCoordPtr[i] = + VB->AttribPtr[VERT_ATTRIB_TEX0+i] = &m->attribs[VERT_RESULT_TEX0 + i]; } } @@ -1335,16 +1378,16 @@ run_arb_vertex_program(GLcontext *ctx, struct tnl_pipeline_stage *stage) VEC_ELT(VB->ClipPtr, GLfloat, i)[1], VEC_ELT(VB->ClipPtr, GLfloat, i)[2], VEC_ELT(VB->ClipPtr, GLfloat, i)[3], - VEC_ELT(VB->TexCoordPtr[0], GLfloat, i)[0], - VEC_ELT(VB->TexCoordPtr[0], GLfloat, i)[1], - VEC_ELT(VB->TexCoordPtr[0], GLfloat, i)[2], - VEC_ELT(VB->TexCoordPtr[0], GLfloat, i)[3]); + VEC_ELT(VB->AttribPtr[VERT_ATTRIB_TEX0], GLfloat, i)[0], + VEC_ELT(VB->AttribPtr[VERT_ATTRIB_TEX0], GLfloat, i)[1], + VEC_ELT(VB->AttribPtr[VERT_ATTRIB_TEX0], GLfloat, i)[2], + VEC_ELT(VB->AttribPtr[VERT_ATTRIB_TEX0], GLfloat, i)[3]); } #endif /* Perform NDC and cliptest operations: */ - return do_ndc_cliptest(m); + return do_ndc_cliptest(ctx, m); } @@ -1352,9 +1395,12 @@ static void validate_vertex_program( GLcontext *ctx, struct tnl_pipeline_stage *stage ) { struct arb_vp_machine *m = ARB_VP_MACHINE(stage); - struct vertex_program *program = - (ctx->VertexProgram._Enabled ? ctx->VertexProgram.Current : 0); + struct gl_vertex_program *program; + + if (ctx->ShaderObjects._VertexShaderPresent) + return; + program = (ctx->VertexProgram._Enabled ? ctx->VertexProgram.Current : 0); if (!program && ctx->_MaintainTnlProgram) { program = ctx->_TnlProgram; } @@ -1368,8 +1414,8 @@ validate_vertex_program( GLcontext *ctx, struct tnl_pipeline_stage *stage ) m->File[FILE_LOCAL_PARAM] = program->Base.LocalParams; m->File[FILE_ENV_PARAM] = ctx->VertexProgram.Parameters; /* GL_NV_vertex_programs can't reference GL state */ - if (program->Parameters) - m->File[FILE_STATE_PARAM] = program->Parameters->ParameterValues; + if (program->Base.Parameters) + m->File[FILE_STATE_PARAM] = program->Base.Parameters->ParameterValues; else m->File[FILE_STATE_PARAM] = NULL; } @@ -1394,7 +1440,7 @@ static GLboolean init_vertex_program( GLcontext *ctx, const GLuint size = VB->Size; GLuint i; - stage->privatePtr = _mesa_malloc(sizeof(*m)); + stage->privatePtr = _mesa_calloc(sizeof(*m)); m = ARB_VP_MACHINE(stage); if (!m) return GL_FALSE; @@ -1402,21 +1448,20 @@ static GLboolean init_vertex_program( GLcontext *ctx, /* arb_vertex_machine struct should subsume the VB: */ m->VB = VB; - m->ctx = ctx; - m->File[0] = ALIGN_MALLOC(REG_MAX * sizeof(GLfloat) * 4, 16); + m->File[0] = (GLfloat(*)[4])ALIGN_MALLOC(REG_MAX * sizeof(GLfloat) * 4, 16); /* Initialize regs where necessary: */ ASSIGN_4V(m->File[0][REG_ID], 0, 0, 0, 1); ASSIGN_4V(m->File[0][REG_ONES], 1, 1, 1, 1); - ASSIGN_4V(m->File[0][REG_SWZ], -1, 1, 0, 0); + ASSIGN_4V(m->File[0][REG_SWZ], 1, -1, 0, 0); ASSIGN_4V(m->File[0][REG_NEG], -1, -1, -1, -1); ASSIGN_4V(m->File[0][REG_LIT], 1, 0, 0, 1); ASSIGN_4V(m->File[0][REG_LIT2], 1, .5, .2, 1); /* debug value */ if (_mesa_getenv("MESA_EXPERIMENTAL")) - m->try_codegen = 1; + m->try_codegen = GL_TRUE; /* Allocate arrays of vertex output values */ for (i = 0; i < VERT_RESULT_MAX; i++) { @@ -1469,7 +1514,7 @@ static void dtr( struct tnl_pipeline_stage *stage ) */ const struct tnl_pipeline_stage _tnl_arb_vertex_program_stage = { - "vertex-program", + "arb-vertex-program", NULL, /* private_data */ init_vertex_program, /* create */ dtr, /* destroy */ @@ -1483,11 +1528,11 @@ const struct tnl_pipeline_stage _tnl_arb_vertex_program_stage = * string has been parsed. */ void -_tnl_program_string(GLcontext *ctx, GLenum target, struct program *program) +_tnl_program_string(GLcontext *ctx, GLenum target, struct gl_program *program) { if (target == GL_VERTEX_PROGRAM_ARB) { /* free any existing tnl data hanging off the program */ - struct vertex_program *vprog = (struct vertex_program *) program; + struct gl_vertex_program *vprog = (struct gl_vertex_program *) program; if (vprog->TnlData) { free_tnl_data(vprog); }