From f20917de5bd2b1fc152e74304d3649a1f6042422 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Mon, 12 Sep 2005 21:20:10 +0000 Subject: [PATCH] add complete support for ATI_fragment_shader for the r200. Most of the new code is in the new file r200_fragshader.c. Reverse engeneered by Dave Airlie and me --- src/mesa/drivers/dri/r200/Makefile | 1 + src/mesa/drivers/dri/r200/r200_cmdbuf.c | 4 +- src/mesa/drivers/dri/r200/r200_context.c | 7 + src/mesa/drivers/dri/r200/r200_context.h | 40 +- src/mesa/drivers/dri/r200/r200_fragshader.c | 543 ++++++++++++++++++++ src/mesa/drivers/dri/r200/r200_screen.c | 5 +- src/mesa/drivers/dri/r200/r200_screen.h | 3 +- src/mesa/drivers/dri/r200/r200_state.c | 32 +- src/mesa/drivers/dri/r200/r200_state_init.c | 120 +++-- src/mesa/drivers/dri/r200/r200_tex.h | 4 + src/mesa/drivers/dri/r200/r200_texstate.c | 115 +++-- 11 files changed, 795 insertions(+), 79 deletions(-) create mode 100644 src/mesa/drivers/dri/r200/r200_fragshader.c diff --git a/src/mesa/drivers/dri/r200/Makefile b/src/mesa/drivers/dri/r200/Makefile index e4fff5746d8..dfeebca5a29 100644 --- a/src/mesa/drivers/dri/r200/Makefile +++ b/src/mesa/drivers/dri/r200/Makefile @@ -31,6 +31,7 @@ DRIVER_SOURCES = r200_context.c \ r200_vtxfmt_c.c \ r200_vtxfmt_sse.c \ r200_vtxfmt_x86.c \ + r200_fragshader.c \ $(EGL_SOURCES) C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES) diff --git a/src/mesa/drivers/dri/r200/r200_cmdbuf.c b/src/mesa/drivers/dri/r200/r200_cmdbuf.c index 2891054b059..759175a82fb 100644 --- a/src/mesa/drivers/dri/r200/r200_cmdbuf.c +++ b/src/mesa/drivers/dri/r200/r200_cmdbuf.c @@ -88,13 +88,15 @@ void r200SetUpAtomList( r200ContextPtr rmesa ) insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.fog ); insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tam ); insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tf ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.atf ); for (i = 0; i < mtu; ++i) insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tex[i] ); for (i = 0; i < mtu; ++i) insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.cube[i] ); for (i = 0; i < 6; ++i) insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.pix[i] ); - + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.afs[0] ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.afs[1] ); for (i = 0; i < 8; ++i) insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.lit[i] ); for (i = 0; i < 3 + mtu; ++i) diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c index cbf4a42ddc3..2c95e9a6c23 100644 --- a/src/mesa/drivers/dri/r200/r200_context.c +++ b/src/mesa/drivers/dri/r200/r200_context.c @@ -67,6 +67,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define need_GL_ARB_texture_compression #define need_GL_ARB_vertex_buffer_object #define need_GL_ARB_vertex_program +#define need_GL_ATI_fragment_shader #define need_GL_EXT_blend_minmax #define need_GL_EXT_fog_coord #define need_GL_EXT_secondary_color @@ -177,6 +178,10 @@ const struct dri_extension NV_vp_extension[] = { { "GL_NV_vertex_program", GL_NV_vertex_program_functions } }; +const struct dri_extension ATI_fs_extension[] = { + { "GL_ATI_fragment_shader", GL_ATI_fragment_shader_functions } +}; + extern const struct tnl_pipeline_stage _r200_render_stage; extern const struct tnl_pipeline_stage _r200_tcl_stage; @@ -466,6 +471,8 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, if(driQueryOptionb(&rmesa->optionCache, "nv_vertex_program")) driInitSingleExtension( ctx, NV_vp_extension ); + if ((ctx->Const.MaxTextureUnits == 6) && rmesa->r200Screen->drmSupportsFragShader) + driInitSingleExtension( ctx, ATI_fs_extension ); #if 0 r200InitDriverFuncs( ctx ); r200InitIoctlFuncs( ctx ); diff --git a/src/mesa/drivers/dri/r200/r200_context.h b/src/mesa/drivers/dri/r200/r200_context.h index d2902f9e6ab..aacc1327e6f 100644 --- a/src/mesa/drivers/dri/r200/r200_context.h +++ b/src/mesa/drivers/dri/r200/r200_context.h @@ -278,12 +278,17 @@ struct r200_state_atom { #define TEX_PP_TXSIZE 4 /*2c0c*/ #define TEX_PP_TXPITCH 5 /*2c10*/ #define TEX_PP_BORDER_COLOR 6 /*2c14*/ -#define TEX_CMD_1 7 -#define TEX_PP_TXOFFSET 8 /*2d00 */ -#define TEX_STATE_SIZE 9 - -#define CUBE_CMD_0 0 /* 1 register follows */ -#define CUBE_PP_CUBIC_FACES 1 /* 0x2c18 */ +#define TEX_CMD_1_OLDDRM 7 +#define TEX_PP_TXOFFSET_OLDDRM 8 /*2d00 */ +#define TEX_STATE_SIZE_OLDDRM 9 +#define TEX_PP_CUBIC_FACES 7 +#define TEX_PP_TXMULTI_CTL 8 +#define TEX_CMD_1_NEWDRM 9 +#define TEX_PP_TXOFFSET_NEWDRM 10 +#define TEX_STATE_SIZE_NEWDRM 11 + +#define CUBE_CMD_0 0 /* 1 register follows */ /* this command unnecessary */ +#define CUBE_PP_CUBIC_FACES 1 /* 0x2c18 */ /* with new enough drm */ #define CUBE_CMD_1 2 /* 5 registers follow */ #define CUBE_PP_CUBIC_OFFSET_F1 3 /* 0x2d04 */ #define CUBE_PP_CUBIC_OFFSET_F2 4 /* 0x2d08 */ @@ -308,6 +313,25 @@ struct r200_state_atom { #define TF_TFACTOR_5 6 #define TF_STATE_SIZE 7 +#define ATF_CMD_0 0 +#define ATF_TFACTOR_0 1 +#define ATF_TFACTOR_1 2 +#define ATF_TFACTOR_2 3 +#define ATF_TFACTOR_3 4 +#define ATF_TFACTOR_4 5 +#define ATF_TFACTOR_5 6 +#define ATF_TFACTOR_6 7 +#define ATF_TFACTOR_7 8 +#define ATF_STATE_SIZE 9 + +/* ATI_FRAGMENT_SHADER */ +#define AFS_CMD_0 0 +#define AFS_IC0 1 /* 2f00 */ +#define AFS_IC1 2 /* 2f04 */ +#define AFS_IA0 3 /* 2f08 */ +#define AFS_IA1 4 /* 2f0c */ +#define AFS_STATE_SIZE 33 + #define TCL_CMD_0 0 #define TCL_LIGHT_MODEL_CTL_0 1 #define TCL_LIGHT_MODEL_CTL_1 2 @@ -533,6 +557,8 @@ struct r200_hw_state { struct r200_state_atom fog; struct r200_state_atom glt; struct r200_state_atom prf; + struct r200_state_atom afs[2]; + struct r200_state_atom atf; int max_state_size; /* Number of bytes necessary for a full state emit. */ GLboolean is_dirty, all_dirty; @@ -942,6 +968,8 @@ struct r200_context { GLboolean using_hyperz; GLboolean texmicrotile; + + struct ati_fragment_shader *afs_loaded; }; #define R200_CONTEXT(ctx) ((r200ContextPtr)(ctx->DriverCtx)) diff --git a/src/mesa/drivers/dri/r200/r200_fragshader.c b/src/mesa/drivers/dri/r200/r200_fragshader.c new file mode 100644 index 00000000000..70b75266a45 --- /dev/null +++ b/src/mesa/drivers/dri/r200/r200_fragshader.c @@ -0,0 +1,543 @@ +/************************************************************************** + * + * Copyright 2004 David Airlie + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL DAVID AIRLIE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +#include "glheader.h" +#include "macros.h" +#include "enums.h" + +#include "tnl/t_context.h" +#include "atifragshader.h" +#include "program.h" +#include "r200_context.h" +#include "r200_ioctl.h" +#include "r200_tex.h" + +#define SET_INST(inst, type) afs_cmd[((inst<<2) + (type<<1) + 1)] +#define SET_INST_2(inst, type) afs_cmd[((inst<<2) + (type<<1) + 2)] + +static void r200SetFragShaderArg( GLuint *afs_cmd, GLuint opnum, GLuint optype, + const struct atifragshader_src_register srcReg, + GLuint argPos, GLuint *tfactor ) +{ + const GLuint index = srcReg.Index; + const GLuint srcmod = srcReg.argMod; + const GLuint srcrep = srcReg.argRep; + GLuint reg0 = 0; + GLuint reg2 = 0; + GLuint useOddSrc = 0; + + switch(srcrep) { + case GL_RED: + reg2 |= R200_TXC_REPL_RED << (R200_TXC_REPL_ARG_A_SHIFT + (2*argPos)); + if (optype) + useOddSrc = 1; + break; + case GL_GREEN: + reg2 |= R200_TXC_REPL_GREEN << (R200_TXC_REPL_ARG_A_SHIFT + (2*argPos)); + if (optype) + useOddSrc = 1; + break; + case GL_BLUE: + if (!optype) + reg2 |= R200_TXC_REPL_BLUE << (R200_TXC_REPL_ARG_A_SHIFT + (2*argPos)); + else + useOddSrc = 1; + break; + case GL_ALPHA: + if (!optype) + useOddSrc = 1; + break; + } + + if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI) + reg0 |= (((index - GL_REG_0_ATI)*2) + 10 + useOddSrc) << (5*argPos); + else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) { + if ((*tfactor == 0) || (index == *tfactor)) { + reg0 |= (R200_TXC_ARG_A_TFACTOR_COLOR + useOddSrc) << (5*argPos); + reg2 |= (index - GL_CON_0_ATI) << R200_TXC_TFACTOR_SEL_SHIFT; + *tfactor = index; + } + else { + reg0 |= (R200_TXC_ARG_A_TFACTOR1_COLOR + useOddSrc) << (5*argPos); + reg2 |= (index - GL_CON_0_ATI) << R200_TXC_TFACTOR1_SEL_SHIFT; + } + } + else if (index == GL_PRIMARY_COLOR_EXT) { + reg0 |= (R200_TXC_ARG_A_DIFFUSE_COLOR + useOddSrc) << (5*argPos); + } + else if (index == GL_SECONDARY_INTERPOLATOR_ATI) { + reg0 |= (R200_TXC_ARG_A_SPECULAR_COLOR + useOddSrc) << (5*argPos); + } + /* GL_ZERO is a noop, for GL_ONE we set the complement */ + else if (index == GL_ONE) { + reg0 |= R200_TXC_COMP_ARG_A << (4*argPos); + } + + if (srcmod & GL_COMP_BIT_ATI) + reg0 ^= R200_TXC_COMP_ARG_A << (4*argPos); + if (srcmod & GL_BIAS_BIT_ATI) + reg0 |= R200_TXC_BIAS_ARG_A << (4*argPos); + if (srcmod & GL_2X_BIT_ATI) + reg0 |= R200_TXC_SCALE_ARG_A << (4*argPos); + if (srcmod & GL_NEGATE_BIT_ATI) + reg0 ^= R200_TXC_NEG_ARG_A << (4*argPos); + + SET_INST(opnum, optype) |= reg0; + SET_INST_2(opnum, optype) |= reg2; +} + +static GLuint dstmask_table[8] = +{ + R200_TXC_OUTPUT_MASK_RGB, + R200_TXC_OUTPUT_MASK_R, + R200_TXC_OUTPUT_MASK_G, + R200_TXC_OUTPUT_MASK_RG, + R200_TXC_OUTPUT_MASK_B, + R200_TXC_OUTPUT_MASK_RB, + R200_TXC_OUTPUT_MASK_GB, + R200_TXC_OUTPUT_MASK_RGB +}; + +static void r200UpdateFSArith( GLcontext *ctx ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLuint *afs_cmd; + const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current; + GLuint pass; + + R200_STATECHANGE( rmesa, afs[0] ); + R200_STATECHANGE( rmesa, afs[1] ); + + if (shader->NumPasses < 2) { + afs_cmd = rmesa->hw.afs[1].cmd; + } + else { + afs_cmd = rmesa->hw.afs[0].cmd; + } + for (pass = 0; pass < shader->NumPasses; pass++) { + GLuint opnum = 0; + GLuint pc; + for (pc = 0; pc < shader->numArithInstr[pass]; pc++) { + GLuint optype; + struct atifs_instruction *inst = &shader->Instructions[pass][pc]; + + SET_INST(opnum, 0) = 0; + SET_INST_2(opnum, 0) = 0; + SET_INST(opnum, 1) = 0; + SET_INST_2(opnum, 1) = 0; + + for (optype = 0; optype < 2; optype++) { + GLuint tfactor = 0; + + if (inst->Opcode[optype]) { + switch (inst->Opcode[optype]) { + /* these are all MADD in disguise + MADD is A * B + C + so for GL_ADD use arg B/C and make A complement 0 + for GL_SUB use arg B/C, negate C and make A complement 0 + for GL_MOV use arg C + for GL_MUL use arg A + for GL_MAD all good */ + case GL_SUB_ATI: + /* negate C */ + SET_INST(opnum, optype) |= R200_TXC_NEG_ARG_C; + /* fallthrough */ + case GL_ADD_ATI: + r200SetFragShaderArg(afs_cmd, opnum, optype, + inst->SrcReg[optype][0], 1, &tfactor); + r200SetFragShaderArg(afs_cmd, opnum, optype, + inst->SrcReg[optype][1], 2, &tfactor); + /* A = complement 0 */ + SET_INST(opnum, optype) |= R200_TXC_COMP_ARG_A; + SET_INST(opnum, optype) |= R200_TXC_OP_MADD; + break; + case GL_MOV_ATI: + /* put arg0 in C */ + r200SetFragShaderArg(afs_cmd, opnum, optype, + inst->SrcReg[optype][0], 2, &tfactor); + SET_INST(opnum, optype) |= R200_TXC_OP_MADD; + break; + case GL_MAD_ATI: + r200SetFragShaderArg(afs_cmd, opnum, optype, + inst->SrcReg[optype][2], 2, &tfactor); + /* fallthrough */ + case GL_MUL_ATI: + r200SetFragShaderArg(afs_cmd, opnum, optype, + inst->SrcReg[optype][0], 0, &tfactor); + r200SetFragShaderArg(afs_cmd, opnum, optype, + inst->SrcReg[optype][1], 1, &tfactor); + SET_INST(opnum, optype) |= R200_TXC_OP_MADD; + break; + case GL_LERP_ATI: + /* arg order is not native chip order, swap A and C */ + r200SetFragShaderArg(afs_cmd, opnum, optype, + inst->SrcReg[optype][0], 2, &tfactor); + r200SetFragShaderArg(afs_cmd, opnum, optype, + inst->SrcReg[optype][1], 1, &tfactor); + r200SetFragShaderArg(afs_cmd, opnum, optype, + inst->SrcReg[optype][2], 0, &tfactor); + SET_INST(opnum, optype) |= R200_TXC_OP_LERP; + break; + case GL_CND_ATI: + r200SetFragShaderArg(afs_cmd, opnum, optype, + inst->SrcReg[optype][0], 0, &tfactor); + r200SetFragShaderArg(afs_cmd, opnum, optype, + inst->SrcReg[optype][1], 1, &tfactor); + r200SetFragShaderArg(afs_cmd, opnum, optype, + inst->SrcReg[optype][2], 2, &tfactor); + SET_INST(opnum, optype) |= R200_TXC_OP_CONDITIONAL; + break; + case GL_CND0_ATI: + r200SetFragShaderArg(afs_cmd, opnum, optype, + inst->SrcReg[optype][0], 0, &tfactor); + r200SetFragShaderArg(afs_cmd, opnum, optype, + inst->SrcReg[optype][1], 1, &tfactor); + r200SetFragShaderArg(afs_cmd, opnum, optype, + inst->SrcReg[optype][2], 2, &tfactor); + SET_INST(opnum, optype) |= R200_TXC_OP_CND0; + break; + /* cannot specify dot ops as alpha ops directly */ + case GL_DOT2_ADD_ATI: + if (optype) + SET_INST_2(opnum, 1) |= R200_TXA_DOT_ALPHA; + else { + r200SetFragShaderArg(afs_cmd, opnum, 0, + inst->SrcReg[0][0], 0, &tfactor); + r200SetFragShaderArg(afs_cmd, opnum, 0, + inst->SrcReg[0][1], 1, &tfactor); + r200SetFragShaderArg(afs_cmd, opnum, 0, + inst->SrcReg[0][2], 2, &tfactor); + SET_INST(opnum, 0) |= R200_TXC_OP_DOT2_ADD; + } + break; + case GL_DOT3_ATI: + if (optype) + SET_INST_2(opnum, 1) |= R200_TXA_DOT_ALPHA; + else { + r200SetFragShaderArg(afs_cmd, opnum, 0, + inst->SrcReg[0][0], 0, &tfactor); + r200SetFragShaderArg(afs_cmd, opnum, 0, + inst->SrcReg[0][1], 1, &tfactor); + SET_INST(opnum, 0) |= R200_TXC_OP_DOT3; + } + break; + case GL_DOT4_ATI: + /* experimental verification: for dot4 setup of alpha args is needed + (dstmod is ignored, though, so dot2/dot3 should be safe) + the hardware apparently does R1*R2 + G1*G2 + B1*B2 + A3*A4 + but the API doesn't allow it */ + if (optype) + SET_INST_2(opnum, 1) |= R200_TXA_DOT_ALPHA; + else { + r200SetFragShaderArg(afs_cmd, opnum, 0, + inst->SrcReg[0][0], 0, &tfactor); + r200SetFragShaderArg(afs_cmd, opnum, 0, + inst->SrcReg[0][1], 1, &tfactor); + r200SetFragShaderArg(afs_cmd, opnum, 1, + inst->SrcReg[0][0], 0, &tfactor); + r200SetFragShaderArg(afs_cmd, opnum, 1, + inst->SrcReg[0][1], 1, &tfactor); + SET_INST(opnum, optype) |= R200_TXC_OP_DOT4; + } + break; + } + } + + /* destination */ + if (inst->DstReg[optype].Index) { + GLuint dstreg = inst->DstReg[optype].Index - GL_REG_0_ATI; + GLuint dstmask = inst->DstReg[optype].dstMask; + GLuint sat = inst->DstReg[optype].dstMod & GL_SATURATE_BIT_ATI; + GLuint dstmod = inst->DstReg[optype].dstMod; + + dstmod &= ~GL_SATURATE_BIT_ATI; + + SET_INST_2(opnum, optype) |= (dstreg + 1) << R200_TXC_OUTPUT_REG_SHIFT; + SET_INST_2(opnum, optype) |= dstmask_table[dstmask]; + + /* fglrx does clamp the last instructions to 0_1 it seems */ + /* this won't necessarily catch the last instruction + which writes to reg0 */ + if (sat || (pc == (shader->numArithInstr[pass] - 1) && + ((pass == 1) || (shader->NumPasses == 1)))) + SET_INST_2(opnum, optype) |= R200_TXC_CLAMP_0_1; + else + /*should we clamp or not? spec is vague, I would suppose yes but fglrx doesn't */ + SET_INST_2(opnum, optype) |= R200_TXC_CLAMP_8_8; +/* SET_INST_2(opnum, optype) |= R200_TXC_CLAMP_WRAP;*/ + switch(dstmod) { + case GL_2X_BIT_ATI: + SET_INST_2(opnum, optype) |= R200_TXC_SCALE_2X; + break; + case GL_4X_BIT_ATI: + SET_INST_2(opnum, optype) |= R200_TXC_SCALE_4X; + break; + case GL_8X_BIT_ATI: + SET_INST_2(opnum, optype) |= R200_TXC_SCALE_8X; + break; + case GL_HALF_BIT_ATI: + SET_INST_2(opnum, optype) |= R200_TXC_SCALE_INV2; + break; + case GL_QUARTER_BIT_ATI: + SET_INST_2(opnum, optype) |= R200_TXC_SCALE_INV4; + break; + case GL_EIGHTH_BIT_ATI: + SET_INST_2(opnum, optype) |= R200_TXC_SCALE_INV8; + break; + default: + break; + } + } + } +/* fprintf(stderr, "pass %d nr %d inst 0x%.8x 0x%.8x 0x%.8x 0x%.8x\n", + pass, opnum, SET_INST(opnum, 0), SET_INST_2(opnum, 0), + SET_INST(opnum, 1), SET_INST_2(opnum, 1));*/ + opnum++; + } + afs_cmd = rmesa->hw.afs[1].cmd; + } + rmesa->afs_loaded = ctx->ATIFragmentShader.Current; +} + +static void r200UpdateFSRouting( GLcontext *ctx ) { + r200ContextPtr rmesa = R200_CONTEXT(ctx); + const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current; + GLuint reg; + + R200_STATECHANGE( rmesa, ctx ); + R200_STATECHANGE( rmesa, cst ); + + for (reg = 0; reg < R200_MAX_TEXTURE_UNITS; reg++) { + if (shader->swizzlerq & (1 << (2 * reg))) + /* r coord */ + set_re_cntl_d3d( ctx, reg, 1); + /* q coord */ + else set_re_cntl_d3d( ctx, reg, 0); + } + + rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~(R200_MULTI_PASS_ENABLE | + R200_TEX_BLEND_ENABLE_MASK | + R200_TEX_ENABLE_MASK); + rmesa->hw.cst.cmd[CST_PP_CNTL_X] &= ~(R200_PPX_PFS_INST_ENABLE_MASK | + R200_PPX_TEX_ENABLE_MASK | + R200_PPX_OUTPUT_REG_MASK); + + /* first pass registers use slots 8 - 15 + but single pass shaders use slots 0 - 7 */ + if (shader->NumPasses < 2) { + rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= shader->numArithInstr[0] == 8 ? + 0xff << (R200_TEX_BLEND_0_ENABLE_SHIFT - 1) : + (0xff >> (8 - shader->numArithInstr[0])) << R200_TEX_BLEND_0_ENABLE_SHIFT; + } else { + rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_MULTI_PASS_ENABLE; + rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= shader->numArithInstr[1] == 8 ? + 0xff << (R200_TEX_BLEND_0_ENABLE_SHIFT - 1) : + (0xff >> (8 - shader->numArithInstr[1])) << R200_TEX_BLEND_0_ENABLE_SHIFT; + rmesa->hw.cst.cmd[CST_PP_CNTL_X] |= + (0xff >> (8 - shader->numArithInstr[0])) << R200_PPX_FPS_INST0_ENABLE_SHIFT; + } + + if (shader->NumPasses < 2) { + for (reg = 0; reg < R200_MAX_TEXTURE_UNITS; reg++) { + struct gl_texture_object *texObj = ctx->Texture.Unit[reg]._Current; + R200_STATECHANGE( rmesa, tex[reg] ); + rmesa->hw.tex[reg].cmd[TEX_PP_TXMULTI_CTL] = 0; + if (shader->SetupInst[0][reg].Opcode) { + GLuint txformat = rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT] + & ~(R200_TXFORMAT_ST_ROUTE_MASK | R200_TXFORMAT_LOOKUP_DISABLE); + GLuint txformat_x = rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT_X] & ~R200_TEXCOORD_MASK; + txformat |= (shader->SetupInst[0][reg].src - GL_TEXTURE0_ARB) + << R200_TXFORMAT_ST_ROUTE_SHIFT; + /* fix up texcoords for proj/non-proj 2d (3d and cube are not defined when + using projection so don't have to worry there). + When passing coords, need R200_TEXCOORD_VOLUME, otherwise loose a coord */ + /* FIXME: someone might rely on default tex coords r/q, which we unfortunately + don't provide (we have the same problem without shaders) */ + if (shader->SetupInst[0][reg].Opcode == ATI_FRAGMENT_SHADER_PASS_OP) { + txformat |= R200_TXFORMAT_LOOKUP_DISABLE; + if (shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STR_ATI || + shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STQ_ATI) { + txformat_x |= R200_TEXCOORD_VOLUME; + } + else { + txformat_x |= R200_TEXCOORD_PROJ; + } + } + else if (texObj->Target == GL_TEXTURE_3D) { + txformat_x |= R200_TEXCOORD_VOLUME; + } + else if (texObj->Target == GL_TEXTURE_CUBE_MAP) { + txformat_x |= R200_TEXCOORD_CUBIC_ENV; + } + else if (shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STR_ATI || + shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STQ_ATI) { + txformat_x |= R200_TEXCOORD_NONPROJ; + } + else { + txformat_x |= R200_TEXCOORD_PROJ; + } + rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT] = txformat; + rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT_X] = txformat_x; + /* is this a good idea? Could potentially sample from not enabled unit. + results are probably undefined anyway (?) but I hope it doesn't lock up... */ + rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << reg; + } + } + + } else { + /* setup 1st pass */ + for (reg = 0; reg < R200_MAX_TEXTURE_UNITS; reg++) { + struct gl_texture_object *texObj = ctx->Texture.Unit[reg]._Current; + R200_STATECHANGE( rmesa, tex[reg] ); + GLuint txformat_multi = 0; + if (shader->SetupInst[0][reg].Opcode) { + txformat_multi |= (shader->SetupInst[0][reg].src - GL_TEXTURE0_ARB) + << R200_PASS1_ST_ROUTE_SHIFT; + if (shader->SetupInst[0][reg].Opcode == ATI_FRAGMENT_SHADER_PASS_OP) { + txformat_multi |= R200_PASS1_TXFORMAT_LOOKUP_DISABLE; + if (shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STR_ATI || + shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STQ_ATI) { + txformat_multi |= R200_PASS1_TEXCOORD_VOLUME; + } + else { + txformat_multi |= R200_PASS1_TEXCOORD_PROJ; + } + } + else if (texObj->Target == GL_TEXTURE_3D) { + txformat_multi |= R200_PASS1_TEXCOORD_VOLUME; + } + else if (texObj->Target == GL_TEXTURE_CUBE_MAP) { + txformat_multi |= R200_PASS1_TEXCOORD_CUBIC_ENV; + } + else if (shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STR_ATI || + shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STQ_ATI) { + txformat_multi |= R200_PASS1_TEXCOORD_NONPROJ; + } + else { + txformat_multi |= R200_PASS1_TEXCOORD_PROJ; + } + rmesa->hw.cst.cmd[CST_PP_CNTL_X] |= R200_PPX_TEX_0_ENABLE << reg; + } + rmesa->hw.tex[reg].cmd[TEX_PP_TXMULTI_CTL] = txformat_multi; + } + + /* setup 2nd pass */ + for (reg=0; reg < R200_MAX_TEXTURE_UNITS; reg++) { + struct gl_texture_object *texObj = ctx->Texture.Unit[reg]._Current; + if (shader->SetupInst[1][reg].Opcode) { + GLuint coord = shader->SetupInst[1][reg].src; + GLuint txformat = rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT] + & ~(R200_TXFORMAT_ST_ROUTE_MASK | R200_TXFORMAT_LOOKUP_DISABLE); + GLuint txformat_x = rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT_X] & ~R200_TEXCOORD_MASK; + R200_STATECHANGE( rmesa, tex[reg] ); + if (shader->SetupInst[1][reg].Opcode == ATI_FRAGMENT_SHADER_PASS_OP) { + txformat |= R200_TXFORMAT_LOOKUP_DISABLE; + txformat_x |= R200_TEXCOORD_VOLUME; + if (shader->SetupInst[1][reg].swizzle == GL_SWIZZLE_STR_ATI || + shader->SetupInst[1][reg].swizzle == GL_SWIZZLE_STQ_ATI) { + txformat_x |= R200_TEXCOORD_VOLUME; + } + else { + txformat_x |= R200_TEXCOORD_PROJ; + } + } + else if (texObj->Target == GL_TEXTURE_3D) { + txformat_x |= R200_TEXCOORD_VOLUME; + } + else if (texObj->Target == GL_TEXTURE_CUBE_MAP) { + txformat_x |= R200_TEXCOORD_CUBIC_ENV; + } + else if (shader->SetupInst[1][reg].swizzle == GL_SWIZZLE_STR_ATI || + shader->SetupInst[1][reg].swizzle == GL_SWIZZLE_STQ_ATI) { + txformat_x |= R200_TEXCOORD_NONPROJ; + } + else { + txformat_x |= R200_TEXCOORD_PROJ; + } + if (coord >= GL_REG_0_ATI) { + GLuint txformat_multi = rmesa->hw.tex[reg].cmd[TEX_PP_TXMULTI_CTL]; + txformat_multi |= (coord - GL_REG_0_ATI + 2) << R200_PASS2_COORDS_REG_SHIFT; + rmesa->hw.tex[reg].cmd[TEX_PP_TXMULTI_CTL] = txformat_multi; + rmesa->hw.cst.cmd[CST_PP_CNTL_X] |= 1 << + (R200_PPX_OUTPUT_REG_0_SHIFT + coord - GL_REG_0_ATI); + } else { + txformat |= (coord - GL_TEXTURE0_ARB) << R200_TXFORMAT_ST_ROUTE_SHIFT; + } + rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT_X] = txformat_x; + rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT] = txformat; + rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << reg; + } + } + } +} + +static void r200UpdateFSConstants( GLcontext *ctx ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current; + GLuint i; + + /* update constants */ + R200_STATECHANGE(rmesa, atf); + for (i = 0; i < 8; i++) + { + GLubyte con_byte[4]; + if ((shader->localConstDef >> i) & 1) { + CLAMPED_FLOAT_TO_UBYTE(con_byte[0], shader->Constants[i][0]); + CLAMPED_FLOAT_TO_UBYTE(con_byte[1], shader->Constants[i][1]); + CLAMPED_FLOAT_TO_UBYTE(con_byte[2], shader->Constants[i][2]); + CLAMPED_FLOAT_TO_UBYTE(con_byte[3], shader->Constants[i][3]); + } + else { + CLAMPED_FLOAT_TO_UBYTE(con_byte[0], ctx->ATIFragmentShader.globalConstants[i][0]); + CLAMPED_FLOAT_TO_UBYTE(con_byte[1], ctx->ATIFragmentShader.globalConstants[i][1]); + CLAMPED_FLOAT_TO_UBYTE(con_byte[2], ctx->ATIFragmentShader.globalConstants[i][2]); + CLAMPED_FLOAT_TO_UBYTE(con_byte[3], ctx->ATIFragmentShader.globalConstants[i][3]); + } + rmesa->hw.atf.cmd[ATF_TFACTOR_0 + i] = r200PackColor ( + 4, con_byte[0], con_byte[1], con_byte[2], con_byte[3] ); + } +} + +/* update routing, constants and arithmetic + * constants need to be updated always (globals can change, no separate notification) + * routing needs to be updated always too (non-shader code will overwrite state, plus + * some of the routing depends on what sort of texture is bound) + * for both of them, we need to update anyway because of disabling/enabling ati_fs which + * we'd need to track otherwise + * arithmetic is only updated if current shader changes (and probably the data should be + * stored in some DriverData object attached to the mesa atifs object, i.e. binding a + * shader wouldn't force us to "recompile" the shader). + */ +void r200UpdateFragmentShader( GLcontext *ctx ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + + r200UpdateFSConstants( ctx ); + r200UpdateFSRouting( ctx ); + if (rmesa->afs_loaded != ctx->ATIFragmentShader.Current) + r200UpdateFSArith( ctx ); +} diff --git a/src/mesa/drivers/dri/r200/r200_screen.c b/src/mesa/drivers/dri/r200/r200_screen.c index 592d46c1e42..ff4b85915fd 100644 --- a/src/mesa/drivers/dri/r200/r200_screen.c +++ b/src/mesa/drivers/dri/r200/r200_screen.c @@ -94,6 +94,7 @@ extern const struct dri_extension card_extensions[]; extern const struct dri_extension blend_extensions[]; extern const struct dri_extension ARB_vp_extension[]; extern const struct dri_extension NV_vp_extension[]; +extern const struct dri_extension ATI_fs_extension[]; #if 1 /* Including xf86PciInfo.h introduces a bunch of errors... @@ -357,8 +358,9 @@ r200CreateScreen( __DRIscreenPrivate *sPriv ) /* Check if kernel module is new enough to support blend color and separate blend functions/equations */ screen->drmSupportsBlendColor = (sPriv->drmMinor >= 11); - screen->drmSupportsTriPerf = (sPriv->drmMinor >= 16); + screen->drmSupportsFragShader = (sPriv->drmMinor >= 18); + } /* Check if ddx has set up a surface reg to cover depth buffer */ screen->depthHasSurface = (sPriv->ddxMajor > 4); @@ -711,6 +713,7 @@ void * __driCreateNewScreen_20050727( __DRInativeDisplay *dpy, int scrn, __DRIsc driInitExtensions( NULL, blend_extensions, GL_FALSE ); driInitSingleExtension( NULL, ARB_vp_extension ); driInitSingleExtension( NULL, NV_vp_extension ); + driInitSingleExtension( NULL, ATI_fs_extension ); } return (void *) psp; diff --git a/src/mesa/drivers/dri/r200/r200_screen.h b/src/mesa/drivers/dri/r200/r200_screen.h index eb887974b96..47e6f2be720 100644 --- a/src/mesa/drivers/dri/r200/r200_screen.h +++ b/src/mesa/drivers/dri/r200/r200_screen.h @@ -94,9 +94,10 @@ typedef struct { unsigned int gart_texture_offset; /* offset in card memory space */ unsigned int gart_base; - GLboolean drmSupportsCubeMaps; /* need radeon kernel module >=1.7 */ + GLboolean drmSupportsCubeMaps; /* need radeon kernel module >= 1.7 */ GLboolean drmSupportsBlendColor; /* need radeon kernel module >= 1.11 */ GLboolean drmSupportsTriPerf; /* need radeon kernel module >= 1.16 */ + GLboolean drmSupportsFragShader; /* need radeon kernel module >= 1.18 */ GLboolean depthHasSurface; /* Configuration cache with default values for all contexts */ diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c index 43126b3378e..968783607de 100644 --- a/src/mesa/drivers/dri/r200/r200_state.c +++ b/src/mesa/drivers/dri/r200/r200_state.c @@ -2087,6 +2087,34 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state ) TCL_FALLBACK(rmesa->glCtx, R200_TCL_FALLBACK_VERTEX_PROGRAM, state); break; + case GL_FRAGMENT_SHADER_ATI: + if ( !state ) { + /* restore normal tex env colors and make sure tex env combine will get updated + mark env atoms dirty (as their data was overwritten by afs even + if they didn't change) and restore tex coord routing */ + GLuint unit; + for (unit = 0; unit < R200_MAX_TEXTURE_UNITS; unit++) { + rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &= + ~(R200_TXFORMAT_ST_ROUTE_MASK | R200_TXFORMAT_LOOKUP_DISABLE); + rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] |= unit << R200_TXFORMAT_ST_ROUTE_SHIFT; + /* need to guard this with drmSupportsFragmentShader? Should never get here if + we don't announce ATI_fs, right? */ + rmesa->hw.tex[unit].cmd[TEX_PP_TXMULTI_CTL] = 0; + R200_STATECHANGE( rmesa, pix[unit] ); + R200_STATECHANGE( rmesa, tex[unit] ); + } + rmesa->hw.cst.cmd[CST_PP_CNTL_X] = 0; + R200_STATECHANGE( rmesa, cst ); + R200_STATECHANGE( rmesa, tf ); + } + else { + /* need to mark this dirty as pix/tf atoms have overwritten the data + even if the data in the atoms didn't change */ + R200_STATECHANGE( rmesa, atf ); + R200_STATECHANGE( rmesa, afs[1] ); + /* everything else picked up in r200UpdateTextureState hopefully */ + } + break; default: return; } @@ -2260,7 +2288,7 @@ void r200ValidateState( GLcontext *ctx ) r200UpdateDrawBuffer(ctx); } - if (new_state & _NEW_TEXTURE) { + if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM)) { r200UpdateTextureState( ctx ); new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */ } @@ -2282,7 +2310,7 @@ void r200ValidateState( GLcontext *ctx ) */ if (new_state & (_NEW_TEXTURE|_NEW_TEXTURE_MATRIX)) { update_texturematrix( ctx ); - } + } if (new_state & (_NEW_LIGHT|_NEW_MODELVIEW|_MESA_NEW_NEED_EYE_COORDS)) { update_light( ctx ); diff --git a/src/mesa/drivers/dri/r200/r200_state_init.c b/src/mesa/drivers/dri/r200/r200_state_init.c index 27c0b946b1e..db78afd3758 100644 --- a/src/mesa/drivers/dri/r200/r200_state_init.c +++ b/src/mesa/drivers/dri/r200/r200_state_init.c @@ -137,9 +137,13 @@ static GLboolean check_##NM( GLcontext *ctx, int idx ) \ CHECK( always, GL_TRUE ) CHECK( never, GL_FALSE ) CHECK( tex_any, ctx->Texture._EnabledUnits ) -CHECK( tex_pair, (rmesa->state.texture.unit[idx].unitneeded | rmesa->state.texture.unit[idx & ~1].unitneeded)) +CHECK( tf, (ctx->Texture._EnabledUnits && !ctx->ATIFragmentShader._Enabled) ); +CHECK( tex_pair, (rmesa->state.texture.unit[idx].unitneeded | rmesa->state.texture.unit[idx & ~1].unitneeded) ) CHECK( tex, rmesa->state.texture.unit[idx].unitneeded ) -CHECK( texenv, rmesa->state.envneeded & (1 << idx) ) +CHECK( pix_zero, !ctx->ATIFragmentShader._Enabled ) +CHECK( texenv, (rmesa->state.envneeded & (1 << idx) && !ctx->ATIFragmentShader._Enabled) ) +CHECK( afs_pass1, (ctx->ATIFragmentShader._Enabled && (ctx->ATIFragmentShader.Current->NumPasses > 1)) ) +CHECK( afs, ctx->ATIFragmentShader._Enabled ) CHECK( tex_cube, rmesa->state.texture.unit[idx].unitneeded & TEXTURE_CUBE_BIT ) CHECK( fog, ctx->Fog.Enabled ) TCL_CHECK( tcl, GL_TRUE ) @@ -229,8 +233,8 @@ void r200InitState( r200ContextPtr rmesa ) rmesa->hw.ATOM.dirty = GL_FALSE; \ rmesa->hw.max_state_size += SZ * sizeof(int); \ } while (0) - - + + /* Allocate state buffers: */ if (rmesa->r200Screen->drmSupportsBlendColor) @@ -247,22 +251,46 @@ void r200InitState( r200ContextPtr rmesa ) ALLOC_STATE( msc, always, MSC_STATE_SIZE, "MSC/misc", 0 ); ALLOC_STATE( cst, always, CST_STATE_SIZE, "CST/constant", 0 ); ALLOC_STATE( zbs, always, ZBS_STATE_SIZE, "ZBS/zbias", 0 ); - ALLOC_STATE( tf, tex_any, TF_STATE_SIZE, "TF/tfactor", 0 ); - if (rmesa->r200Screen->chipset & R200_CHIPSET_REAL_R200) { - /* make sure texture units 0/1 are emitted pair-wise for r200 t0 hang workaround */ - ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE, "TEX/tex-0", 0 ); - ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE, "TEX/tex-1", 1 ); - ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 ); + ALLOC_STATE( tf, tf, TF_STATE_SIZE, "TF/tfactor", 0 ); + if (rmesa->r200Screen->drmSupportsFragShader) { + if (rmesa->r200Screen->chipset & R200_CHIPSET_REAL_R200) { + /* make sure texture units 0/1 are emitted pair-wise for r200 t0 hang workaround */ + ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE_NEWDRM, "TEX/tex-0", 0 ); + ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE_NEWDRM, "TEX/tex-1", 1 ); + ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 ); + } + else { + ALLOC_STATE( tex[0], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-0", 0 ); + ALLOC_STATE( tex[1], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-1", 1 ); + ALLOC_STATE( tam, never, TAM_STATE_SIZE, "TAM/tam", 0 ); + } + ALLOC_STATE( tex[2], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-2", 2 ); + ALLOC_STATE( tex[3], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-3", 3 ); + ALLOC_STATE( tex[4], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-4", 4 ); + ALLOC_STATE( tex[5], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-5", 5 ); + ALLOC_STATE( atf, afs, ATF_STATE_SIZE, "ATF/tfactor", 0 ); + ALLOC_STATE( afs[0], afs_pass1, AFS_STATE_SIZE, "AFS/afsinst-0", 0 ); + ALLOC_STATE( afs[1], afs, AFS_STATE_SIZE, "AFS/afsinst-1", 1 ); } else { - ALLOC_STATE( tex[0], tex, TEX_STATE_SIZE, "TEX/tex-0", 0 ); - ALLOC_STATE( tex[1], tex, TEX_STATE_SIZE, "TEX/tex-1", 1 ); - ALLOC_STATE( tam, never, TAM_STATE_SIZE, "TAM/tam", 0 ); + if (rmesa->r200Screen->chipset & R200_CHIPSET_REAL_R200) { + ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE_OLDDRM, "TEX/tex-0", 0 ); + ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE_OLDDRM, "TEX/tex-1", 1 ); + ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 ); + } + else { + ALLOC_STATE( tex[0], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-0", 0 ); + ALLOC_STATE( tex[1], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-1", 1 ); + ALLOC_STATE( tam, never, TAM_STATE_SIZE, "TAM/tam", 0 ); + } + ALLOC_STATE( tex[2], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-2", 2 ); + ALLOC_STATE( tex[3], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-3", 3 ); + ALLOC_STATE( tex[4], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-4", 4 ); + ALLOC_STATE( tex[5], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-5", 5 ); + ALLOC_STATE( atf, never, ATF_STATE_SIZE, "TF/tfactor", 0 ); + ALLOC_STATE( afs[0], never, AFS_STATE_SIZE, "AFS/afsinst-0", 0 ); + ALLOC_STATE( afs[1], never, AFS_STATE_SIZE, "AFS/afsinst-1", 1 ); } - ALLOC_STATE( tex[2], tex, TEX_STATE_SIZE, "TEX/tex-2", 2 ); - ALLOC_STATE( tex[3], tex, TEX_STATE_SIZE, "TEX/tex-3", 3 ); - ALLOC_STATE( tex[4], tex, TEX_STATE_SIZE, "TEX/tex-4", 4 ); - ALLOC_STATE( tex[5], tex, TEX_STATE_SIZE, "TEX/tex-5", 5 ); if (rmesa->r200Screen->drmSupportsCubeMaps) { ALLOC_STATE( cube[0], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-0", 0 ); ALLOC_STATE( cube[1], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-1", 1 ); @@ -312,7 +340,7 @@ void r200InitState( r200ContextPtr rmesa ) ALLOC_STATE( lit[5], tcl_light, LIT_STATE_SIZE, "LIT/light-5", 5 ); ALLOC_STATE( lit[6], tcl_light, LIT_STATE_SIZE, "LIT/light-6", 6 ); ALLOC_STATE( lit[7], tcl_light, LIT_STATE_SIZE, "LIT/light-7", 7 ); - ALLOC_STATE( pix[0], always, PIX_STATE_SIZE, "PIX/pixstage-0", 0 ); + ALLOC_STATE( pix[0], pix_zero, PIX_STATE_SIZE, "PIX/pixstage-0", 0 ); ALLOC_STATE( pix[1], texenv, PIX_STATE_SIZE, "PIX/pixstage-1", 1 ); ALLOC_STATE( pix[2], texenv, PIX_STATE_SIZE, "PIX/pixstage-2", 2 ); ALLOC_STATE( pix[3], texenv, PIX_STATE_SIZE, "PIX/pixstage-3", 3 ); @@ -348,19 +376,37 @@ void r200InitState( r200ContextPtr rmesa ) rmesa->hw.cst.cmd[CST_CMD_5] = cmdpkt(R200_EMIT_RE_POINTSIZE); rmesa->hw.cst.cmd[CST_CMD_6] = cmdpkt(R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0); rmesa->hw.tam.cmd[TAM_CMD_0] = cmdpkt(R200_EMIT_PP_TAM_DEBUG3); - rmesa->hw.tf.cmd[TF_CMD_0] = cmdpkt(R200_EMIT_TFACTOR_0); - rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_0); - rmesa->hw.tex[0].cmd[TEX_CMD_1] = cmdpkt(R200_EMIT_PP_TXOFFSET_0); - rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_1); - rmesa->hw.tex[1].cmd[TEX_CMD_1] = cmdpkt(R200_EMIT_PP_TXOFFSET_1); - rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_2); - rmesa->hw.tex[2].cmd[TEX_CMD_1] = cmdpkt(R200_EMIT_PP_TXOFFSET_2); - rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_3); - rmesa->hw.tex[3].cmd[TEX_CMD_1] = cmdpkt(R200_EMIT_PP_TXOFFSET_3); - rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_4); - rmesa->hw.tex[4].cmd[TEX_CMD_1] = cmdpkt(R200_EMIT_PP_TXOFFSET_4); - rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_5); - rmesa->hw.tex[5].cmd[TEX_CMD_1] = cmdpkt(R200_EMIT_PP_TXOFFSET_5); + rmesa->hw.tf.cmd[TF_CMD_0] = cmdpkt(R200_EMIT_TFACTOR_0); + if (rmesa->r200Screen->drmSupportsFragShader) { + rmesa->hw.atf.cmd[ATF_CMD_0] = cmdpkt(R200_EMIT_ATF_TFACTOR); + rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_0); + rmesa->hw.tex[0].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_0); + rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_1); + rmesa->hw.tex[1].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_1); + rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_2); + rmesa->hw.tex[2].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_2); + rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_3); + rmesa->hw.tex[3].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_3); + rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_4); + rmesa->hw.tex[4].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_4); + rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_5); + rmesa->hw.tex[5].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_5); + } else { + rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_0); + rmesa->hw.tex[0].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_0); + rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_1); + rmesa->hw.tex[1].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_1); + rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_2); + rmesa->hw.tex[2].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_2); + rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_3); + rmesa->hw.tex[3].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_3); + rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_4); + rmesa->hw.tex[4].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_4); + rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_5); + rmesa->hw.tex[5].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_5); + } + rmesa->hw.afs[0].cmd[AFS_CMD_0] = cmdpkt(R200_EMIT_PP_AFS_0); + rmesa->hw.afs[1].cmd[AFS_CMD_0] = cmdpkt(R200_EMIT_PP_AFS_1); rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_0); rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_0); rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_1); @@ -623,12 +669,20 @@ void r200InitState( r200ContextPtr rmesa ) ((i << R200_TXFORMAT_ST_ROUTE_SHIFT) | /* <-- note i */ (2 << R200_TXFORMAT_WIDTH_SHIFT) | (2 << R200_TXFORMAT_HEIGHT_SHIFT)); - rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET] = - rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP]; rmesa->hw.tex[i].cmd[TEX_PP_BORDER_COLOR] = 0; rmesa->hw.tex[i].cmd[TEX_PP_TXFORMAT_X] = (/* R200_TEXCOORD_PROJ | */ 0x100000); /* Small default bias */ + if (rmesa->r200Screen->drmSupportsFragShader) { + rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET_NEWDRM] = + rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP]; + rmesa->hw.tex[i].cmd[TEX_PP_CUBIC_FACES] = 0; + rmesa->hw.tex[i].cmd[TEX_PP_TXMULTI_CTL] = 0; + } + else { + rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET_OLDDRM] = + rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP]; + } rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_FACES] = 0; rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F1] = diff --git a/src/mesa/drivers/dri/r200/r200_tex.h b/src/mesa/drivers/dri/r200/r200_tex.h index acabbc11bd1..4438cc02a82 100644 --- a/src/mesa/drivers/dri/r200/r200_tex.h +++ b/src/mesa/drivers/dri/r200/r200_tex.h @@ -44,4 +44,8 @@ extern void r200DestroyTexObj( r200ContextPtr rmesa, r200TexObjPtr t ); extern void r200InitTextureFuncs( struct dd_function_table *functions ); +extern void r200UpdateFragmentShader( GLcontext *ctx ); + +extern void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d ); + #endif /* __R200_TEX_H__ */ diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c index b79f3acc2c1..db47f6fd79e 100644 --- a/src/mesa/drivers/dri/r200/r200_texstate.c +++ b/src/mesa/drivers/dri/r200/r200_texstate.c @@ -315,6 +315,7 @@ static void r200SetTexImages( r200ContextPtr rmesa, ASSERT(log2Width == log2Height); t->pp_txformat |= ((log2Width << R200_TXFORMAT_F5_WIDTH_SHIFT) | (log2Height << R200_TXFORMAT_F5_HEIGHT_SHIFT) | +/* don't think we need this bit, if it exists at all - fglrx does not set it */ (R200_TXFORMAT_CUBIC_MAP_ENABLE)); t->pp_txformat_x |= R200_TEXCOORD_CUBIC_ENV; t->pp_cubic_faces = ((log2Width << R200_FACE_WIDTH_1_SHIFT) | @@ -591,7 +592,8 @@ static GLboolean r200UpdateTextureEnv( GLcontext *ctx, int unit, int slot, GLuin break; case GL_PREVIOUS: if (replaceargs != unit) { - const GLint srcRGBreplace = ctx->Texture.Unit[replaceargs]._CurrentCombine->SourceRGB[0]; + const GLint srcRGBreplace = + ctx->Texture.Unit[replaceargs]._CurrentCombine->SourceRGB[0]; if (op >= 2) { op = op ^ replaceopa; } @@ -612,7 +614,8 @@ static GLboolean r200UpdateTextureEnv( GLcontext *ctx, int unit, int slot, GLuin if (slot == 0) color_arg[i] = r200_primary_color[op]; else - color_arg[i] = r200_register_color[op][rmesa->state.texture.unit[replaceargs - 1].outputreg]; + color_arg[i] = r200_register_color[op] + [rmesa->state.texture.unit[replaceargs - 1].outputreg]; break; case GL_ZERO: color_arg[i] = r200_zero_color[op]; @@ -636,7 +639,8 @@ static GLboolean r200UpdateTextureEnv( GLcontext *ctx, int unit, int slot, GLuin if (slot == 0) color_arg[i] = r200_primary_color[op]; else - color_arg[i] = r200_register_color[op][rmesa->state.texture.unit[unit - 1].outputreg]; + color_arg[i] = r200_register_color[op] + [rmesa->state.texture.unit[unit - 1].outputreg]; } break; case GL_ZERO: @@ -675,7 +679,8 @@ static GLboolean r200UpdateTextureEnv( GLcontext *ctx, int unit, int slot, GLuin break; case GL_PREVIOUS: if (replaceargs != unit) { - const GLint srcAreplace = ctx->Texture.Unit[replaceargs]._CurrentCombine->SourceA[0]; + const GLint srcAreplace = + ctx->Texture.Unit[replaceargs]._CurrentCombine->SourceA[0]; op = op ^ replaceopa; switch (srcAreplace) { case GL_TEXTURE: @@ -691,7 +696,8 @@ static GLboolean r200UpdateTextureEnv( GLcontext *ctx, int unit, int slot, GLuin if (slot == 0) alpha_arg[i] = r200_primary_alpha[op]; else - alpha_arg[i] = r200_register_alpha[op][rmesa->state.texture.unit[replaceargs - 1].outputreg]; + alpha_arg[i] = r200_register_alpha[op] + [rmesa->state.texture.unit[replaceargs - 1].outputreg]; break; case GL_ZERO: alpha_arg[i] = r200_zero_alpha[op]; @@ -715,7 +721,8 @@ static GLboolean r200UpdateTextureEnv( GLcontext *ctx, int unit, int slot, GLuin if (slot == 0) alpha_arg[i] = r200_primary_alpha[op]; else - alpha_arg[i] = r200_register_alpha[op][rmesa->state.texture.unit[unit - 1].outputreg]; + alpha_arg[i] = r200_register_alpha[op] + [rmesa->state.texture.unit[unit - 1].outputreg]; } break; case GL_ZERO: @@ -1091,7 +1098,7 @@ static GLboolean r200UpdateAllTexEnv( GLcontext *ctx ) } R200_STATECHANGE( rmesa, ctx ); - rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~R200_TEX_BLEND_ENABLE_MASK; + rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~(R200_TEX_BLEND_ENABLE_MASK | R200_MULTI_PASS_ENABLE); rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= rmesa->state.envneeded << R200_TEX_BLEND_0_ENABLE_SHIFT; return ok; @@ -1114,11 +1121,11 @@ static GLboolean r200UpdateAllTexEnv( GLcontext *ctx ) #define TEXOBJ_TXFORMAT_MASK (R200_TXFORMAT_WIDTH_MASK | \ R200_TXFORMAT_HEIGHT_MASK | \ R200_TXFORMAT_FORMAT_MASK | \ - R200_TXFORMAT_F5_WIDTH_MASK | \ - R200_TXFORMAT_F5_HEIGHT_MASK | \ + R200_TXFORMAT_F5_WIDTH_MASK | \ + R200_TXFORMAT_F5_HEIGHT_MASK | \ R200_TXFORMAT_ALPHA_IN_MAP | \ R200_TXFORMAT_CUBIC_MAP_ENABLE | \ - R200_TXFORMAT_NON_POWER2) + R200_TXFORMAT_NON_POWER2) #define TEXOBJ_TXFORMAT_X_MASK (R200_DEPTH_LOG2_MASK | \ R200_TEXCOORD_MASK | \ @@ -1140,15 +1147,24 @@ static void import_tex_obj_state( r200ContextPtr rmesa, cmd[TEX_PP_TXFORMAT_X] |= texobj->pp_txformat_x & TEXOBJ_TXFORMAT_X_MASK; cmd[TEX_PP_TXSIZE] = texobj->pp_txsize; /* NPOT only! */ cmd[TEX_PP_TXPITCH] = texobj->pp_txpitch; /* NPOT only! */ - cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset; cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color; - R200_DB_STATECHANGE( rmesa, &rmesa->hw.tex[unit] ); + if (rmesa->r200Screen->drmSupportsFragShader) { + cmd[TEX_PP_TXOFFSET_NEWDRM] = texobj->pp_txoffset; + } + else { + cmd[TEX_PP_TXOFFSET_OLDDRM] = texobj->pp_txoffset; + } if (texobj->base.tObj->Target == GL_TEXTURE_CUBE_MAP) { GLuint *cube_cmd = R200_DB_STATE( cube[unit] ); GLuint bytesPerFace = texobj->base.totalSize / 6; ASSERT(texobj->base.totalSize % 6 == 0); cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces; + if (rmesa->r200Screen->drmSupportsFragShader) { + /* that value is submitted twice. could change cube atom + to not include that command when new drm is used */ + cmd[TEX_PP_CUBIC_FACES] = texobj->pp_cubic_faces; + } cube_cmd[CUBE_PP_CUBIC_OFFSET_F1] = texobj->pp_txoffset + 1 * bytesPerFace; cube_cmd[CUBE_PP_CUBIC_OFFSET_F2] = texobj->pp_txoffset + 2 * bytesPerFace; cube_cmd[CUBE_PP_CUBIC_OFFSET_F3] = texobj->pp_txoffset + 3 * bytesPerFace; @@ -1156,6 +1172,7 @@ static void import_tex_obj_state( r200ContextPtr rmesa, cube_cmd[CUBE_PP_CUBIC_OFFSET_F5] = texobj->pp_txoffset + 5 * bytesPerFace; R200_DB_STATECHANGE( rmesa, &rmesa->hw.cube[unit] ); } + R200_DB_STATECHANGE( rmesa, &rmesa->hw.tex[unit] ); texobj->dirty_state &= ~(1<state.texture.unit[unit].unitneeded; - if ( rmesa->state.texture.unit[unit].unitneeded & (TEXTURE_RECT_BIT) ) { + if ( unitneeded & (TEXTURE_RECT_BIT) ) { return (enable_tex_rect( ctx, unit ) && update_tex_common( ctx, unit )); } - else if ( rmesa->state.texture.unit[unit].unitneeded & (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ) { + else if ( unitneeded & (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ) { return (enable_tex_2d( ctx, unit ) && update_tex_common( ctx, unit )); } #if ENABLE_HW_3D_TEXTURE - else if ( rmesa->state.texture.unit[unit].unitneeded & (TEXTURE_3D_BIT) ) { + else if ( unitneeded & (TEXTURE_3D_BIT) ) { return (enable_tex_3d( ctx, unit ) && update_tex_common( ctx, unit )); } #endif - else if ( rmesa->state.texture.unit[unit].unitneeded & (TEXTURE_CUBE_BIT) ) { + else if ( unitneeded & (TEXTURE_CUBE_BIT) ) { return (enable_tex_cube( ctx, unit ) && update_tex_common( ctx, unit )); } - else if ( rmesa->state.texture.unit[unit].unitneeded ) { + else if ( unitneeded ) { return GL_FALSE; } else { @@ -1631,8 +1649,16 @@ void r200UpdateTextureState( GLcontext *ctx ) GLboolean ok; GLuint dbg; - ok = r200UpdateAllTexEnv( ctx ); - + if (ctx->ATIFragmentShader._Enabled) { + GLuint i; + for (i = 0; i < R200_MAX_TEXTURE_UNITS; i++) { + rmesa->state.texture.unit[i].unitneeded = ctx->Texture.Unit[i]._ReallyEnabled; + } + ok = GL_TRUE; + } + else { + ok = r200UpdateAllTexEnv( ctx ); + } if (ok) { ok = (r200UpdateTextureUnit( ctx, 0 ) && r200UpdateTextureUnit( ctx, 1 ) && @@ -1642,6 +1668,10 @@ void r200UpdateTextureState( GLcontext *ctx ) r200UpdateTextureUnit( ctx, 5 )); } + if (ok && ctx->ATIFragmentShader._Enabled) { + r200UpdateFragmentShader(ctx); + } + FALLBACK( rmesa, R200_FALLBACK_TEXTURE, !ok ); if (rmesa->TclFallback) @@ -1652,24 +1682,38 @@ void r200UpdateTextureState( GLcontext *ctx ) /* * T0 hang workaround ------------- - * not needed for r200 derivatives? - */ + * not needed for r200 derivatives + */ if ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_ENABLE_MASK) == R200_TEX_0_ENABLE && - (rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK) > R200_MIN_FILTER_LINEAR) { - - R200_STATECHANGE(rmesa, ctx); - R200_STATECHANGE(rmesa, tex[1]); - rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_1_ENABLE; - rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK; - rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] |= 0x08000000; + (rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK) > R200_MIN_FILTER_LINEAR) { + + R200_STATECHANGE(rmesa, ctx); + R200_STATECHANGE(rmesa, tex[1]); + rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_1_ENABLE; + if (!(rmesa->hw.cst.cmd[CST_PP_CNTL_X] & R200_PPX_TEX_1_ENABLE)) + rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK; + rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] |= R200_TXFORMAT_LOOKUP_DISABLE; } - else { - if ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_1_ENABLE) && - (rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] & 0x08000000)) { - R200_STATECHANGE(rmesa, tex[1]); - rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~0x08000000; + else if (!ctx->ATIFragmentShader._Enabled) { + if ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_1_ENABLE) && + (rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] & R200_TXFORMAT_LOOKUP_DISABLE)) { + R200_STATECHANGE(rmesa, tex[1]); + rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~R200_TXFORMAT_LOOKUP_DISABLE; } } + /* do the same workaround for the first pass of a fragment shader. + * completely unknown if necessary / sufficient. + */ + if ((rmesa->hw.cst.cmd[CST_PP_CNTL_X] & R200_PPX_TEX_ENABLE_MASK) == R200_PPX_TEX_0_ENABLE && + (rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK) > R200_MIN_FILTER_LINEAR) { + + R200_STATECHANGE(rmesa, cst); + R200_STATECHANGE(rmesa, tex[1]); + rmesa->hw.cst.cmd[CST_PP_CNTL_X] |= R200_PPX_TEX_1_ENABLE; + if (!(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_1_ENABLE)) + rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK; + rmesa->hw.tex[1].cmd[TEX_PP_TXMULTI_CTL] |= R200_PASS1_TXFORMAT_LOOKUP_DISABLE; + } /* maybe needs to be done pairwise due to 2 parallel (physical) tex units ? looks like that's not the case, if 8500/9100 owners don't complain remove this... @@ -1695,7 +1739,8 @@ void r200UpdateTextureState( GLcontext *ctx ) /* * Texture cache LRU hang workaround ------------- - * not needed for r200 derivatives? + * not needed for r200 derivatives + * hopefully this covers first pass of a shader as well */ /* While the cases below attempt to only enable the workaround in the -- 2.30.2