r200_vtxfmt_c.c \
r200_vtxfmt_sse.c \
r200_vtxfmt_x86.c \
+ r200_fragshader.c \
$(EGL_SOURCES)
C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES)
insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.fog );
insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tam );
insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tf );
+ insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.atf );
for (i = 0; i < mtu; ++i)
insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tex[i] );
for (i = 0; i < mtu; ++i)
insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.cube[i] );
for (i = 0; i < 6; ++i)
insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.pix[i] );
-
+ insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.afs[0] );
+ insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.afs[1] );
for (i = 0; i < 8; ++i)
insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.lit[i] );
for (i = 0; i < 3 + mtu; ++i)
#define need_GL_ARB_texture_compression
#define need_GL_ARB_vertex_buffer_object
#define need_GL_ARB_vertex_program
+#define need_GL_ATI_fragment_shader
#define need_GL_EXT_blend_minmax
#define need_GL_EXT_fog_coord
#define need_GL_EXT_secondary_color
{ "GL_NV_vertex_program", GL_NV_vertex_program_functions }
};
+const struct dri_extension ATI_fs_extension[] = {
+ { "GL_ATI_fragment_shader", GL_ATI_fragment_shader_functions }
+};
+
extern const struct tnl_pipeline_stage _r200_render_stage;
extern const struct tnl_pipeline_stage _r200_tcl_stage;
if(driQueryOptionb(&rmesa->optionCache, "nv_vertex_program"))
driInitSingleExtension( ctx, NV_vp_extension );
+ if ((ctx->Const.MaxTextureUnits == 6) && rmesa->r200Screen->drmSupportsFragShader)
+ driInitSingleExtension( ctx, ATI_fs_extension );
#if 0
r200InitDriverFuncs( ctx );
r200InitIoctlFuncs( ctx );
#define TEX_PP_TXSIZE 4 /*2c0c*/
#define TEX_PP_TXPITCH 5 /*2c10*/
#define TEX_PP_BORDER_COLOR 6 /*2c14*/
-#define TEX_CMD_1 7
-#define TEX_PP_TXOFFSET 8 /*2d00 */
-#define TEX_STATE_SIZE 9
-
-#define CUBE_CMD_0 0 /* 1 register follows */
-#define CUBE_PP_CUBIC_FACES 1 /* 0x2c18 */
+#define TEX_CMD_1_OLDDRM 7
+#define TEX_PP_TXOFFSET_OLDDRM 8 /*2d00 */
+#define TEX_STATE_SIZE_OLDDRM 9
+#define TEX_PP_CUBIC_FACES 7
+#define TEX_PP_TXMULTI_CTL 8
+#define TEX_CMD_1_NEWDRM 9
+#define TEX_PP_TXOFFSET_NEWDRM 10
+#define TEX_STATE_SIZE_NEWDRM 11
+
+#define CUBE_CMD_0 0 /* 1 register follows */ /* this command unnecessary */
+#define CUBE_PP_CUBIC_FACES 1 /* 0x2c18 */ /* with new enough drm */
#define CUBE_CMD_1 2 /* 5 registers follow */
#define CUBE_PP_CUBIC_OFFSET_F1 3 /* 0x2d04 */
#define CUBE_PP_CUBIC_OFFSET_F2 4 /* 0x2d08 */
#define TF_TFACTOR_5 6
#define TF_STATE_SIZE 7
+#define ATF_CMD_0 0
+#define ATF_TFACTOR_0 1
+#define ATF_TFACTOR_1 2
+#define ATF_TFACTOR_2 3
+#define ATF_TFACTOR_3 4
+#define ATF_TFACTOR_4 5
+#define ATF_TFACTOR_5 6
+#define ATF_TFACTOR_6 7
+#define ATF_TFACTOR_7 8
+#define ATF_STATE_SIZE 9
+
+/* ATI_FRAGMENT_SHADER */
+#define AFS_CMD_0 0
+#define AFS_IC0 1 /* 2f00 */
+#define AFS_IC1 2 /* 2f04 */
+#define AFS_IA0 3 /* 2f08 */
+#define AFS_IA1 4 /* 2f0c */
+#define AFS_STATE_SIZE 33
+
#define TCL_CMD_0 0
#define TCL_LIGHT_MODEL_CTL_0 1
#define TCL_LIGHT_MODEL_CTL_1 2
struct r200_state_atom fog;
struct r200_state_atom glt;
struct r200_state_atom prf;
+ struct r200_state_atom afs[2];
+ struct r200_state_atom atf;
int max_state_size; /* Number of bytes necessary for a full state emit. */
GLboolean is_dirty, all_dirty;
GLboolean using_hyperz;
GLboolean texmicrotile;
+
+ struct ati_fragment_shader *afs_loaded;
};
#define R200_CONTEXT(ctx) ((r200ContextPtr)(ctx->DriverCtx))
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2004 David Airlie
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL DAVID AIRLIE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+
+#include "tnl/t_context.h"
+#include "atifragshader.h"
+#include "program.h"
+#include "r200_context.h"
+#include "r200_ioctl.h"
+#include "r200_tex.h"
+
+#define SET_INST(inst, type) afs_cmd[((inst<<2) + (type<<1) + 1)]
+#define SET_INST_2(inst, type) afs_cmd[((inst<<2) + (type<<1) + 2)]
+
+static void r200SetFragShaderArg( GLuint *afs_cmd, GLuint opnum, GLuint optype,
+ const struct atifragshader_src_register srcReg,
+ GLuint argPos, GLuint *tfactor )
+{
+ const GLuint index = srcReg.Index;
+ const GLuint srcmod = srcReg.argMod;
+ const GLuint srcrep = srcReg.argRep;
+ GLuint reg0 = 0;
+ GLuint reg2 = 0;
+ GLuint useOddSrc = 0;
+
+ switch(srcrep) {
+ case GL_RED:
+ reg2 |= R200_TXC_REPL_RED << (R200_TXC_REPL_ARG_A_SHIFT + (2*argPos));
+ if (optype)
+ useOddSrc = 1;
+ break;
+ case GL_GREEN:
+ reg2 |= R200_TXC_REPL_GREEN << (R200_TXC_REPL_ARG_A_SHIFT + (2*argPos));
+ if (optype)
+ useOddSrc = 1;
+ break;
+ case GL_BLUE:
+ if (!optype)
+ reg2 |= R200_TXC_REPL_BLUE << (R200_TXC_REPL_ARG_A_SHIFT + (2*argPos));
+ else
+ useOddSrc = 1;
+ break;
+ case GL_ALPHA:
+ if (!optype)
+ useOddSrc = 1;
+ break;
+ }
+
+ if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI)
+ reg0 |= (((index - GL_REG_0_ATI)*2) + 10 + useOddSrc) << (5*argPos);
+ else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) {
+ if ((*tfactor == 0) || (index == *tfactor)) {
+ reg0 |= (R200_TXC_ARG_A_TFACTOR_COLOR + useOddSrc) << (5*argPos);
+ reg2 |= (index - GL_CON_0_ATI) << R200_TXC_TFACTOR_SEL_SHIFT;
+ *tfactor = index;
+ }
+ else {
+ reg0 |= (R200_TXC_ARG_A_TFACTOR1_COLOR + useOddSrc) << (5*argPos);
+ reg2 |= (index - GL_CON_0_ATI) << R200_TXC_TFACTOR1_SEL_SHIFT;
+ }
+ }
+ else if (index == GL_PRIMARY_COLOR_EXT) {
+ reg0 |= (R200_TXC_ARG_A_DIFFUSE_COLOR + useOddSrc) << (5*argPos);
+ }
+ else if (index == GL_SECONDARY_INTERPOLATOR_ATI) {
+ reg0 |= (R200_TXC_ARG_A_SPECULAR_COLOR + useOddSrc) << (5*argPos);
+ }
+ /* GL_ZERO is a noop, for GL_ONE we set the complement */
+ else if (index == GL_ONE) {
+ reg0 |= R200_TXC_COMP_ARG_A << (4*argPos);
+ }
+
+ if (srcmod & GL_COMP_BIT_ATI)
+ reg0 ^= R200_TXC_COMP_ARG_A << (4*argPos);
+ if (srcmod & GL_BIAS_BIT_ATI)
+ reg0 |= R200_TXC_BIAS_ARG_A << (4*argPos);
+ if (srcmod & GL_2X_BIT_ATI)
+ reg0 |= R200_TXC_SCALE_ARG_A << (4*argPos);
+ if (srcmod & GL_NEGATE_BIT_ATI)
+ reg0 ^= R200_TXC_NEG_ARG_A << (4*argPos);
+
+ SET_INST(opnum, optype) |= reg0;
+ SET_INST_2(opnum, optype) |= reg2;
+}
+
+static GLuint dstmask_table[8] =
+{
+ R200_TXC_OUTPUT_MASK_RGB,
+ R200_TXC_OUTPUT_MASK_R,
+ R200_TXC_OUTPUT_MASK_G,
+ R200_TXC_OUTPUT_MASK_RG,
+ R200_TXC_OUTPUT_MASK_B,
+ R200_TXC_OUTPUT_MASK_RB,
+ R200_TXC_OUTPUT_MASK_GB,
+ R200_TXC_OUTPUT_MASK_RGB
+};
+
+static void r200UpdateFSArith( GLcontext *ctx )
+{
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+ GLuint *afs_cmd;
+ const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
+ GLuint pass;
+
+ R200_STATECHANGE( rmesa, afs[0] );
+ R200_STATECHANGE( rmesa, afs[1] );
+
+ if (shader->NumPasses < 2) {
+ afs_cmd = rmesa->hw.afs[1].cmd;
+ }
+ else {
+ afs_cmd = rmesa->hw.afs[0].cmd;
+ }
+ for (pass = 0; pass < shader->NumPasses; pass++) {
+ GLuint opnum = 0;
+ GLuint pc;
+ for (pc = 0; pc < shader->numArithInstr[pass]; pc++) {
+ GLuint optype;
+ struct atifs_instruction *inst = &shader->Instructions[pass][pc];
+
+ SET_INST(opnum, 0) = 0;
+ SET_INST_2(opnum, 0) = 0;
+ SET_INST(opnum, 1) = 0;
+ SET_INST_2(opnum, 1) = 0;
+
+ for (optype = 0; optype < 2; optype++) {
+ GLuint tfactor = 0;
+
+ if (inst->Opcode[optype]) {
+ switch (inst->Opcode[optype]) {
+ /* these are all MADD in disguise
+ MADD is A * B + C
+ so for GL_ADD use arg B/C and make A complement 0
+ for GL_SUB use arg B/C, negate C and make A complement 0
+ for GL_MOV use arg C
+ for GL_MUL use arg A
+ for GL_MAD all good */
+ case GL_SUB_ATI:
+ /* negate C */
+ SET_INST(opnum, optype) |= R200_TXC_NEG_ARG_C;
+ /* fallthrough */
+ case GL_ADD_ATI:
+ r200SetFragShaderArg(afs_cmd, opnum, optype,
+ inst->SrcReg[optype][0], 1, &tfactor);
+ r200SetFragShaderArg(afs_cmd, opnum, optype,
+ inst->SrcReg[optype][1], 2, &tfactor);
+ /* A = complement 0 */
+ SET_INST(opnum, optype) |= R200_TXC_COMP_ARG_A;
+ SET_INST(opnum, optype) |= R200_TXC_OP_MADD;
+ break;
+ case GL_MOV_ATI:
+ /* put arg0 in C */
+ r200SetFragShaderArg(afs_cmd, opnum, optype,
+ inst->SrcReg[optype][0], 2, &tfactor);
+ SET_INST(opnum, optype) |= R200_TXC_OP_MADD;
+ break;
+ case GL_MAD_ATI:
+ r200SetFragShaderArg(afs_cmd, opnum, optype,
+ inst->SrcReg[optype][2], 2, &tfactor);
+ /* fallthrough */
+ case GL_MUL_ATI:
+ r200SetFragShaderArg(afs_cmd, opnum, optype,
+ inst->SrcReg[optype][0], 0, &tfactor);
+ r200SetFragShaderArg(afs_cmd, opnum, optype,
+ inst->SrcReg[optype][1], 1, &tfactor);
+ SET_INST(opnum, optype) |= R200_TXC_OP_MADD;
+ break;
+ case GL_LERP_ATI:
+ /* arg order is not native chip order, swap A and C */
+ r200SetFragShaderArg(afs_cmd, opnum, optype,
+ inst->SrcReg[optype][0], 2, &tfactor);
+ r200SetFragShaderArg(afs_cmd, opnum, optype,
+ inst->SrcReg[optype][1], 1, &tfactor);
+ r200SetFragShaderArg(afs_cmd, opnum, optype,
+ inst->SrcReg[optype][2], 0, &tfactor);
+ SET_INST(opnum, optype) |= R200_TXC_OP_LERP;
+ break;
+ case GL_CND_ATI:
+ r200SetFragShaderArg(afs_cmd, opnum, optype,
+ inst->SrcReg[optype][0], 0, &tfactor);
+ r200SetFragShaderArg(afs_cmd, opnum, optype,
+ inst->SrcReg[optype][1], 1, &tfactor);
+ r200SetFragShaderArg(afs_cmd, opnum, optype,
+ inst->SrcReg[optype][2], 2, &tfactor);
+ SET_INST(opnum, optype) |= R200_TXC_OP_CONDITIONAL;
+ break;
+ case GL_CND0_ATI:
+ r200SetFragShaderArg(afs_cmd, opnum, optype,
+ inst->SrcReg[optype][0], 0, &tfactor);
+ r200SetFragShaderArg(afs_cmd, opnum, optype,
+ inst->SrcReg[optype][1], 1, &tfactor);
+ r200SetFragShaderArg(afs_cmd, opnum, optype,
+ inst->SrcReg[optype][2], 2, &tfactor);
+ SET_INST(opnum, optype) |= R200_TXC_OP_CND0;
+ break;
+ /* cannot specify dot ops as alpha ops directly */
+ case GL_DOT2_ADD_ATI:
+ if (optype)
+ SET_INST_2(opnum, 1) |= R200_TXA_DOT_ALPHA;
+ else {
+ r200SetFragShaderArg(afs_cmd, opnum, 0,
+ inst->SrcReg[0][0], 0, &tfactor);
+ r200SetFragShaderArg(afs_cmd, opnum, 0,
+ inst->SrcReg[0][1], 1, &tfactor);
+ r200SetFragShaderArg(afs_cmd, opnum, 0,
+ inst->SrcReg[0][2], 2, &tfactor);
+ SET_INST(opnum, 0) |= R200_TXC_OP_DOT2_ADD;
+ }
+ break;
+ case GL_DOT3_ATI:
+ if (optype)
+ SET_INST_2(opnum, 1) |= R200_TXA_DOT_ALPHA;
+ else {
+ r200SetFragShaderArg(afs_cmd, opnum, 0,
+ inst->SrcReg[0][0], 0, &tfactor);
+ r200SetFragShaderArg(afs_cmd, opnum, 0,
+ inst->SrcReg[0][1], 1, &tfactor);
+ SET_INST(opnum, 0) |= R200_TXC_OP_DOT3;
+ }
+ break;
+ case GL_DOT4_ATI:
+ /* experimental verification: for dot4 setup of alpha args is needed
+ (dstmod is ignored, though, so dot2/dot3 should be safe)
+ the hardware apparently does R1*R2 + G1*G2 + B1*B2 + A3*A4
+ but the API doesn't allow it */
+ if (optype)
+ SET_INST_2(opnum, 1) |= R200_TXA_DOT_ALPHA;
+ else {
+ r200SetFragShaderArg(afs_cmd, opnum, 0,
+ inst->SrcReg[0][0], 0, &tfactor);
+ r200SetFragShaderArg(afs_cmd, opnum, 0,
+ inst->SrcReg[0][1], 1, &tfactor);
+ r200SetFragShaderArg(afs_cmd, opnum, 1,
+ inst->SrcReg[0][0], 0, &tfactor);
+ r200SetFragShaderArg(afs_cmd, opnum, 1,
+ inst->SrcReg[0][1], 1, &tfactor);
+ SET_INST(opnum, optype) |= R200_TXC_OP_DOT4;
+ }
+ break;
+ }
+ }
+
+ /* destination */
+ if (inst->DstReg[optype].Index) {
+ GLuint dstreg = inst->DstReg[optype].Index - GL_REG_0_ATI;
+ GLuint dstmask = inst->DstReg[optype].dstMask;
+ GLuint sat = inst->DstReg[optype].dstMod & GL_SATURATE_BIT_ATI;
+ GLuint dstmod = inst->DstReg[optype].dstMod;
+
+ dstmod &= ~GL_SATURATE_BIT_ATI;
+
+ SET_INST_2(opnum, optype) |= (dstreg + 1) << R200_TXC_OUTPUT_REG_SHIFT;
+ SET_INST_2(opnum, optype) |= dstmask_table[dstmask];
+
+ /* fglrx does clamp the last instructions to 0_1 it seems */
+ /* this won't necessarily catch the last instruction
+ which writes to reg0 */
+ if (sat || (pc == (shader->numArithInstr[pass] - 1) &&
+ ((pass == 1) || (shader->NumPasses == 1))))
+ SET_INST_2(opnum, optype) |= R200_TXC_CLAMP_0_1;
+ else
+ /*should we clamp or not? spec is vague, I would suppose yes but fglrx doesn't */
+ SET_INST_2(opnum, optype) |= R200_TXC_CLAMP_8_8;
+/* SET_INST_2(opnum, optype) |= R200_TXC_CLAMP_WRAP;*/
+ switch(dstmod) {
+ case GL_2X_BIT_ATI:
+ SET_INST_2(opnum, optype) |= R200_TXC_SCALE_2X;
+ break;
+ case GL_4X_BIT_ATI:
+ SET_INST_2(opnum, optype) |= R200_TXC_SCALE_4X;
+ break;
+ case GL_8X_BIT_ATI:
+ SET_INST_2(opnum, optype) |= R200_TXC_SCALE_8X;
+ break;
+ case GL_HALF_BIT_ATI:
+ SET_INST_2(opnum, optype) |= R200_TXC_SCALE_INV2;
+ break;
+ case GL_QUARTER_BIT_ATI:
+ SET_INST_2(opnum, optype) |= R200_TXC_SCALE_INV4;
+ break;
+ case GL_EIGHTH_BIT_ATI:
+ SET_INST_2(opnum, optype) |= R200_TXC_SCALE_INV8;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+/* fprintf(stderr, "pass %d nr %d inst 0x%.8x 0x%.8x 0x%.8x 0x%.8x\n",
+ pass, opnum, SET_INST(opnum, 0), SET_INST_2(opnum, 0),
+ SET_INST(opnum, 1), SET_INST_2(opnum, 1));*/
+ opnum++;
+ }
+ afs_cmd = rmesa->hw.afs[1].cmd;
+ }
+ rmesa->afs_loaded = ctx->ATIFragmentShader.Current;
+}
+
+static void r200UpdateFSRouting( GLcontext *ctx ) {
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+ const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
+ GLuint reg;
+
+ R200_STATECHANGE( rmesa, ctx );
+ R200_STATECHANGE( rmesa, cst );
+
+ for (reg = 0; reg < R200_MAX_TEXTURE_UNITS; reg++) {
+ if (shader->swizzlerq & (1 << (2 * reg)))
+ /* r coord */
+ set_re_cntl_d3d( ctx, reg, 1);
+ /* q coord */
+ else set_re_cntl_d3d( ctx, reg, 0);
+ }
+
+ rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~(R200_MULTI_PASS_ENABLE |
+ R200_TEX_BLEND_ENABLE_MASK |
+ R200_TEX_ENABLE_MASK);
+ rmesa->hw.cst.cmd[CST_PP_CNTL_X] &= ~(R200_PPX_PFS_INST_ENABLE_MASK |
+ R200_PPX_TEX_ENABLE_MASK |
+ R200_PPX_OUTPUT_REG_MASK);
+
+ /* first pass registers use slots 8 - 15
+ but single pass shaders use slots 0 - 7 */
+ if (shader->NumPasses < 2) {
+ rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= shader->numArithInstr[0] == 8 ?
+ 0xff << (R200_TEX_BLEND_0_ENABLE_SHIFT - 1) :
+ (0xff >> (8 - shader->numArithInstr[0])) << R200_TEX_BLEND_0_ENABLE_SHIFT;
+ } else {
+ rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_MULTI_PASS_ENABLE;
+ rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= shader->numArithInstr[1] == 8 ?
+ 0xff << (R200_TEX_BLEND_0_ENABLE_SHIFT - 1) :
+ (0xff >> (8 - shader->numArithInstr[1])) << R200_TEX_BLEND_0_ENABLE_SHIFT;
+ rmesa->hw.cst.cmd[CST_PP_CNTL_X] |=
+ (0xff >> (8 - shader->numArithInstr[0])) << R200_PPX_FPS_INST0_ENABLE_SHIFT;
+ }
+
+ if (shader->NumPasses < 2) {
+ for (reg = 0; reg < R200_MAX_TEXTURE_UNITS; reg++) {
+ struct gl_texture_object *texObj = ctx->Texture.Unit[reg]._Current;
+ R200_STATECHANGE( rmesa, tex[reg] );
+ rmesa->hw.tex[reg].cmd[TEX_PP_TXMULTI_CTL] = 0;
+ if (shader->SetupInst[0][reg].Opcode) {
+ GLuint txformat = rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT]
+ & ~(R200_TXFORMAT_ST_ROUTE_MASK | R200_TXFORMAT_LOOKUP_DISABLE);
+ GLuint txformat_x = rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT_X] & ~R200_TEXCOORD_MASK;
+ txformat |= (shader->SetupInst[0][reg].src - GL_TEXTURE0_ARB)
+ << R200_TXFORMAT_ST_ROUTE_SHIFT;
+ /* fix up texcoords for proj/non-proj 2d (3d and cube are not defined when
+ using projection so don't have to worry there).
+ When passing coords, need R200_TEXCOORD_VOLUME, otherwise loose a coord */
+ /* FIXME: someone might rely on default tex coords r/q, which we unfortunately
+ don't provide (we have the same problem without shaders) */
+ if (shader->SetupInst[0][reg].Opcode == ATI_FRAGMENT_SHADER_PASS_OP) {
+ txformat |= R200_TXFORMAT_LOOKUP_DISABLE;
+ if (shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STR_ATI ||
+ shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STQ_ATI) {
+ txformat_x |= R200_TEXCOORD_VOLUME;
+ }
+ else {
+ txformat_x |= R200_TEXCOORD_PROJ;
+ }
+ }
+ else if (texObj->Target == GL_TEXTURE_3D) {
+ txformat_x |= R200_TEXCOORD_VOLUME;
+ }
+ else if (texObj->Target == GL_TEXTURE_CUBE_MAP) {
+ txformat_x |= R200_TEXCOORD_CUBIC_ENV;
+ }
+ else if (shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STR_ATI ||
+ shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STQ_ATI) {
+ txformat_x |= R200_TEXCOORD_NONPROJ;
+ }
+ else {
+ txformat_x |= R200_TEXCOORD_PROJ;
+ }
+ rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT] = txformat;
+ rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT_X] = txformat_x;
+ /* is this a good idea? Could potentially sample from not enabled unit.
+ results are probably undefined anyway (?) but I hope it doesn't lock up... */
+ rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << reg;
+ }
+ }
+
+ } else {
+ /* setup 1st pass */
+ for (reg = 0; reg < R200_MAX_TEXTURE_UNITS; reg++) {
+ struct gl_texture_object *texObj = ctx->Texture.Unit[reg]._Current;
+ R200_STATECHANGE( rmesa, tex[reg] );
+ GLuint txformat_multi = 0;
+ if (shader->SetupInst[0][reg].Opcode) {
+ txformat_multi |= (shader->SetupInst[0][reg].src - GL_TEXTURE0_ARB)
+ << R200_PASS1_ST_ROUTE_SHIFT;
+ if (shader->SetupInst[0][reg].Opcode == ATI_FRAGMENT_SHADER_PASS_OP) {
+ txformat_multi |= R200_PASS1_TXFORMAT_LOOKUP_DISABLE;
+ if (shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STR_ATI ||
+ shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STQ_ATI) {
+ txformat_multi |= R200_PASS1_TEXCOORD_VOLUME;
+ }
+ else {
+ txformat_multi |= R200_PASS1_TEXCOORD_PROJ;
+ }
+ }
+ else if (texObj->Target == GL_TEXTURE_3D) {
+ txformat_multi |= R200_PASS1_TEXCOORD_VOLUME;
+ }
+ else if (texObj->Target == GL_TEXTURE_CUBE_MAP) {
+ txformat_multi |= R200_PASS1_TEXCOORD_CUBIC_ENV;
+ }
+ else if (shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STR_ATI ||
+ shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STQ_ATI) {
+ txformat_multi |= R200_PASS1_TEXCOORD_NONPROJ;
+ }
+ else {
+ txformat_multi |= R200_PASS1_TEXCOORD_PROJ;
+ }
+ rmesa->hw.cst.cmd[CST_PP_CNTL_X] |= R200_PPX_TEX_0_ENABLE << reg;
+ }
+ rmesa->hw.tex[reg].cmd[TEX_PP_TXMULTI_CTL] = txformat_multi;
+ }
+
+ /* setup 2nd pass */
+ for (reg=0; reg < R200_MAX_TEXTURE_UNITS; reg++) {
+ struct gl_texture_object *texObj = ctx->Texture.Unit[reg]._Current;
+ if (shader->SetupInst[1][reg].Opcode) {
+ GLuint coord = shader->SetupInst[1][reg].src;
+ GLuint txformat = rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT]
+ & ~(R200_TXFORMAT_ST_ROUTE_MASK | R200_TXFORMAT_LOOKUP_DISABLE);
+ GLuint txformat_x = rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT_X] & ~R200_TEXCOORD_MASK;
+ R200_STATECHANGE( rmesa, tex[reg] );
+ if (shader->SetupInst[1][reg].Opcode == ATI_FRAGMENT_SHADER_PASS_OP) {
+ txformat |= R200_TXFORMAT_LOOKUP_DISABLE;
+ txformat_x |= R200_TEXCOORD_VOLUME;
+ if (shader->SetupInst[1][reg].swizzle == GL_SWIZZLE_STR_ATI ||
+ shader->SetupInst[1][reg].swizzle == GL_SWIZZLE_STQ_ATI) {
+ txformat_x |= R200_TEXCOORD_VOLUME;
+ }
+ else {
+ txformat_x |= R200_TEXCOORD_PROJ;
+ }
+ }
+ else if (texObj->Target == GL_TEXTURE_3D) {
+ txformat_x |= R200_TEXCOORD_VOLUME;
+ }
+ else if (texObj->Target == GL_TEXTURE_CUBE_MAP) {
+ txformat_x |= R200_TEXCOORD_CUBIC_ENV;
+ }
+ else if (shader->SetupInst[1][reg].swizzle == GL_SWIZZLE_STR_ATI ||
+ shader->SetupInst[1][reg].swizzle == GL_SWIZZLE_STQ_ATI) {
+ txformat_x |= R200_TEXCOORD_NONPROJ;
+ }
+ else {
+ txformat_x |= R200_TEXCOORD_PROJ;
+ }
+ if (coord >= GL_REG_0_ATI) {
+ GLuint txformat_multi = rmesa->hw.tex[reg].cmd[TEX_PP_TXMULTI_CTL];
+ txformat_multi |= (coord - GL_REG_0_ATI + 2) << R200_PASS2_COORDS_REG_SHIFT;
+ rmesa->hw.tex[reg].cmd[TEX_PP_TXMULTI_CTL] = txformat_multi;
+ rmesa->hw.cst.cmd[CST_PP_CNTL_X] |= 1 <<
+ (R200_PPX_OUTPUT_REG_0_SHIFT + coord - GL_REG_0_ATI);
+ } else {
+ txformat |= (coord - GL_TEXTURE0_ARB) << R200_TXFORMAT_ST_ROUTE_SHIFT;
+ }
+ rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT_X] = txformat_x;
+ rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT] = txformat;
+ rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << reg;
+ }
+ }
+ }
+}
+
+static void r200UpdateFSConstants( GLcontext *ctx )
+{
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+ const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
+ GLuint i;
+
+ /* update constants */
+ R200_STATECHANGE(rmesa, atf);
+ for (i = 0; i < 8; i++)
+ {
+ GLubyte con_byte[4];
+ if ((shader->localConstDef >> i) & 1) {
+ CLAMPED_FLOAT_TO_UBYTE(con_byte[0], shader->Constants[i][0]);
+ CLAMPED_FLOAT_TO_UBYTE(con_byte[1], shader->Constants[i][1]);
+ CLAMPED_FLOAT_TO_UBYTE(con_byte[2], shader->Constants[i][2]);
+ CLAMPED_FLOAT_TO_UBYTE(con_byte[3], shader->Constants[i][3]);
+ }
+ else {
+ CLAMPED_FLOAT_TO_UBYTE(con_byte[0], ctx->ATIFragmentShader.globalConstants[i][0]);
+ CLAMPED_FLOAT_TO_UBYTE(con_byte[1], ctx->ATIFragmentShader.globalConstants[i][1]);
+ CLAMPED_FLOAT_TO_UBYTE(con_byte[2], ctx->ATIFragmentShader.globalConstants[i][2]);
+ CLAMPED_FLOAT_TO_UBYTE(con_byte[3], ctx->ATIFragmentShader.globalConstants[i][3]);
+ }
+ rmesa->hw.atf.cmd[ATF_TFACTOR_0 + i] = r200PackColor (
+ 4, con_byte[0], con_byte[1], con_byte[2], con_byte[3] );
+ }
+}
+
+/* update routing, constants and arithmetic
+ * constants need to be updated always (globals can change, no separate notification)
+ * routing needs to be updated always too (non-shader code will overwrite state, plus
+ * some of the routing depends on what sort of texture is bound)
+ * for both of them, we need to update anyway because of disabling/enabling ati_fs which
+ * we'd need to track otherwise
+ * arithmetic is only updated if current shader changes (and probably the data should be
+ * stored in some DriverData object attached to the mesa atifs object, i.e. binding a
+ * shader wouldn't force us to "recompile" the shader).
+ */
+void r200UpdateFragmentShader( GLcontext *ctx )
+{
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+ r200UpdateFSConstants( ctx );
+ r200UpdateFSRouting( ctx );
+ if (rmesa->afs_loaded != ctx->ATIFragmentShader.Current)
+ r200UpdateFSArith( ctx );
+}
extern const struct dri_extension blend_extensions[];
extern const struct dri_extension ARB_vp_extension[];
extern const struct dri_extension NV_vp_extension[];
+extern const struct dri_extension ATI_fs_extension[];
#if 1
/* Including xf86PciInfo.h introduces a bunch of errors...
/* Check if kernel module is new enough to support blend color and
separate blend functions/equations */
screen->drmSupportsBlendColor = (sPriv->drmMinor >= 11);
-
screen->drmSupportsTriPerf = (sPriv->drmMinor >= 16);
+ screen->drmSupportsFragShader = (sPriv->drmMinor >= 18);
+
}
/* Check if ddx has set up a surface reg to cover depth buffer */
screen->depthHasSurface = (sPriv->ddxMajor > 4);
driInitExtensions( NULL, blend_extensions, GL_FALSE );
driInitSingleExtension( NULL, ARB_vp_extension );
driInitSingleExtension( NULL, NV_vp_extension );
+ driInitSingleExtension( NULL, ATI_fs_extension );
}
return (void *) psp;
unsigned int gart_texture_offset; /* offset in card memory space */
unsigned int gart_base;
- GLboolean drmSupportsCubeMaps; /* need radeon kernel module >=1.7 */
+ GLboolean drmSupportsCubeMaps; /* need radeon kernel module >= 1.7 */
GLboolean drmSupportsBlendColor; /* need radeon kernel module >= 1.11 */
GLboolean drmSupportsTriPerf; /* need radeon kernel module >= 1.16 */
+ GLboolean drmSupportsFragShader; /* need radeon kernel module >= 1.18 */
GLboolean depthHasSurface;
/* Configuration cache with default values for all contexts */
TCL_FALLBACK(rmesa->glCtx, R200_TCL_FALLBACK_VERTEX_PROGRAM, state);
break;
+ case GL_FRAGMENT_SHADER_ATI:
+ if ( !state ) {
+ /* restore normal tex env colors and make sure tex env combine will get updated
+ mark env atoms dirty (as their data was overwritten by afs even
+ if they didn't change) and restore tex coord routing */
+ GLuint unit;
+ for (unit = 0; unit < R200_MAX_TEXTURE_UNITS; unit++) {
+ rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &=
+ ~(R200_TXFORMAT_ST_ROUTE_MASK | R200_TXFORMAT_LOOKUP_DISABLE);
+ rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] |= unit << R200_TXFORMAT_ST_ROUTE_SHIFT;
+ /* need to guard this with drmSupportsFragmentShader? Should never get here if
+ we don't announce ATI_fs, right? */
+ rmesa->hw.tex[unit].cmd[TEX_PP_TXMULTI_CTL] = 0;
+ R200_STATECHANGE( rmesa, pix[unit] );
+ R200_STATECHANGE( rmesa, tex[unit] );
+ }
+ rmesa->hw.cst.cmd[CST_PP_CNTL_X] = 0;
+ R200_STATECHANGE( rmesa, cst );
+ R200_STATECHANGE( rmesa, tf );
+ }
+ else {
+ /* need to mark this dirty as pix/tf atoms have overwritten the data
+ even if the data in the atoms didn't change */
+ R200_STATECHANGE( rmesa, atf );
+ R200_STATECHANGE( rmesa, afs[1] );
+ /* everything else picked up in r200UpdateTextureState hopefully */
+ }
+ break;
default:
return;
}
r200UpdateDrawBuffer(ctx);
}
- if (new_state & _NEW_TEXTURE) {
+ if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM)) {
r200UpdateTextureState( ctx );
new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */
}
*/
if (new_state & (_NEW_TEXTURE|_NEW_TEXTURE_MATRIX)) {
update_texturematrix( ctx );
- }
+ }
if (new_state & (_NEW_LIGHT|_NEW_MODELVIEW|_MESA_NEW_NEED_EYE_COORDS)) {
update_light( ctx );
CHECK( always, GL_TRUE )
CHECK( never, GL_FALSE )
CHECK( tex_any, ctx->Texture._EnabledUnits )
-CHECK( tex_pair, (rmesa->state.texture.unit[idx].unitneeded | rmesa->state.texture.unit[idx & ~1].unitneeded))
+CHECK( tf, (ctx->Texture._EnabledUnits && !ctx->ATIFragmentShader._Enabled) );
+CHECK( tex_pair, (rmesa->state.texture.unit[idx].unitneeded | rmesa->state.texture.unit[idx & ~1].unitneeded) )
CHECK( tex, rmesa->state.texture.unit[idx].unitneeded )
-CHECK( texenv, rmesa->state.envneeded & (1 << idx) )
+CHECK( pix_zero, !ctx->ATIFragmentShader._Enabled )
+CHECK( texenv, (rmesa->state.envneeded & (1 << idx) && !ctx->ATIFragmentShader._Enabled) )
+CHECK( afs_pass1, (ctx->ATIFragmentShader._Enabled && (ctx->ATIFragmentShader.Current->NumPasses > 1)) )
+CHECK( afs, ctx->ATIFragmentShader._Enabled )
CHECK( tex_cube, rmesa->state.texture.unit[idx].unitneeded & TEXTURE_CUBE_BIT )
CHECK( fog, ctx->Fog.Enabled )
TCL_CHECK( tcl, GL_TRUE )
rmesa->hw.ATOM.dirty = GL_FALSE; \
rmesa->hw.max_state_size += SZ * sizeof(int); \
} while (0)
-
-
+
+
/* Allocate state buffers:
*/
if (rmesa->r200Screen->drmSupportsBlendColor)
ALLOC_STATE( msc, always, MSC_STATE_SIZE, "MSC/misc", 0 );
ALLOC_STATE( cst, always, CST_STATE_SIZE, "CST/constant", 0 );
ALLOC_STATE( zbs, always, ZBS_STATE_SIZE, "ZBS/zbias", 0 );
- ALLOC_STATE( tf, tex_any, TF_STATE_SIZE, "TF/tfactor", 0 );
- if (rmesa->r200Screen->chipset & R200_CHIPSET_REAL_R200) {
- /* make sure texture units 0/1 are emitted pair-wise for r200 t0 hang workaround */
- ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE, "TEX/tex-0", 0 );
- ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE, "TEX/tex-1", 1 );
- ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 );
+ ALLOC_STATE( tf, tf, TF_STATE_SIZE, "TF/tfactor", 0 );
+ if (rmesa->r200Screen->drmSupportsFragShader) {
+ if (rmesa->r200Screen->chipset & R200_CHIPSET_REAL_R200) {
+ /* make sure texture units 0/1 are emitted pair-wise for r200 t0 hang workaround */
+ ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE_NEWDRM, "TEX/tex-0", 0 );
+ ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE_NEWDRM, "TEX/tex-1", 1 );
+ ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 );
+ }
+ else {
+ ALLOC_STATE( tex[0], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-0", 0 );
+ ALLOC_STATE( tex[1], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-1", 1 );
+ ALLOC_STATE( tam, never, TAM_STATE_SIZE, "TAM/tam", 0 );
+ }
+ ALLOC_STATE( tex[2], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-2", 2 );
+ ALLOC_STATE( tex[3], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-3", 3 );
+ ALLOC_STATE( tex[4], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-4", 4 );
+ ALLOC_STATE( tex[5], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-5", 5 );
+ ALLOC_STATE( atf, afs, ATF_STATE_SIZE, "ATF/tfactor", 0 );
+ ALLOC_STATE( afs[0], afs_pass1, AFS_STATE_SIZE, "AFS/afsinst-0", 0 );
+ ALLOC_STATE( afs[1], afs, AFS_STATE_SIZE, "AFS/afsinst-1", 1 );
}
else {
- ALLOC_STATE( tex[0], tex, TEX_STATE_SIZE, "TEX/tex-0", 0 );
- ALLOC_STATE( tex[1], tex, TEX_STATE_SIZE, "TEX/tex-1", 1 );
- ALLOC_STATE( tam, never, TAM_STATE_SIZE, "TAM/tam", 0 );
+ if (rmesa->r200Screen->chipset & R200_CHIPSET_REAL_R200) {
+ ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE_OLDDRM, "TEX/tex-0", 0 );
+ ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE_OLDDRM, "TEX/tex-1", 1 );
+ ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 );
+ }
+ else {
+ ALLOC_STATE( tex[0], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-0", 0 );
+ ALLOC_STATE( tex[1], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-1", 1 );
+ ALLOC_STATE( tam, never, TAM_STATE_SIZE, "TAM/tam", 0 );
+ }
+ ALLOC_STATE( tex[2], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-2", 2 );
+ ALLOC_STATE( tex[3], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-3", 3 );
+ ALLOC_STATE( tex[4], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-4", 4 );
+ ALLOC_STATE( tex[5], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-5", 5 );
+ ALLOC_STATE( atf, never, ATF_STATE_SIZE, "TF/tfactor", 0 );
+ ALLOC_STATE( afs[0], never, AFS_STATE_SIZE, "AFS/afsinst-0", 0 );
+ ALLOC_STATE( afs[1], never, AFS_STATE_SIZE, "AFS/afsinst-1", 1 );
}
- ALLOC_STATE( tex[2], tex, TEX_STATE_SIZE, "TEX/tex-2", 2 );
- ALLOC_STATE( tex[3], tex, TEX_STATE_SIZE, "TEX/tex-3", 3 );
- ALLOC_STATE( tex[4], tex, TEX_STATE_SIZE, "TEX/tex-4", 4 );
- ALLOC_STATE( tex[5], tex, TEX_STATE_SIZE, "TEX/tex-5", 5 );
if (rmesa->r200Screen->drmSupportsCubeMaps) {
ALLOC_STATE( cube[0], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-0", 0 );
ALLOC_STATE( cube[1], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-1", 1 );
ALLOC_STATE( lit[5], tcl_light, LIT_STATE_SIZE, "LIT/light-5", 5 );
ALLOC_STATE( lit[6], tcl_light, LIT_STATE_SIZE, "LIT/light-6", 6 );
ALLOC_STATE( lit[7], tcl_light, LIT_STATE_SIZE, "LIT/light-7", 7 );
- ALLOC_STATE( pix[0], always, PIX_STATE_SIZE, "PIX/pixstage-0", 0 );
+ ALLOC_STATE( pix[0], pix_zero, PIX_STATE_SIZE, "PIX/pixstage-0", 0 );
ALLOC_STATE( pix[1], texenv, PIX_STATE_SIZE, "PIX/pixstage-1", 1 );
ALLOC_STATE( pix[2], texenv, PIX_STATE_SIZE, "PIX/pixstage-2", 2 );
ALLOC_STATE( pix[3], texenv, PIX_STATE_SIZE, "PIX/pixstage-3", 3 );
rmesa->hw.cst.cmd[CST_CMD_5] = cmdpkt(R200_EMIT_RE_POINTSIZE);
rmesa->hw.cst.cmd[CST_CMD_6] = cmdpkt(R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0);
rmesa->hw.tam.cmd[TAM_CMD_0] = cmdpkt(R200_EMIT_PP_TAM_DEBUG3);
- rmesa->hw.tf.cmd[TF_CMD_0] = cmdpkt(R200_EMIT_TFACTOR_0);
- rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_0);
- rmesa->hw.tex[0].cmd[TEX_CMD_1] = cmdpkt(R200_EMIT_PP_TXOFFSET_0);
- rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_1);
- rmesa->hw.tex[1].cmd[TEX_CMD_1] = cmdpkt(R200_EMIT_PP_TXOFFSET_1);
- rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_2);
- rmesa->hw.tex[2].cmd[TEX_CMD_1] = cmdpkt(R200_EMIT_PP_TXOFFSET_2);
- rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_3);
- rmesa->hw.tex[3].cmd[TEX_CMD_1] = cmdpkt(R200_EMIT_PP_TXOFFSET_3);
- rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_4);
- rmesa->hw.tex[4].cmd[TEX_CMD_1] = cmdpkt(R200_EMIT_PP_TXOFFSET_4);
- rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_5);
- rmesa->hw.tex[5].cmd[TEX_CMD_1] = cmdpkt(R200_EMIT_PP_TXOFFSET_5);
+ rmesa->hw.tf.cmd[TF_CMD_0] = cmdpkt(R200_EMIT_TFACTOR_0);
+ if (rmesa->r200Screen->drmSupportsFragShader) {
+ rmesa->hw.atf.cmd[ATF_CMD_0] = cmdpkt(R200_EMIT_ATF_TFACTOR);
+ rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_0);
+ rmesa->hw.tex[0].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_0);
+ rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_1);
+ rmesa->hw.tex[1].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_1);
+ rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_2);
+ rmesa->hw.tex[2].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_2);
+ rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_3);
+ rmesa->hw.tex[3].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_3);
+ rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_4);
+ rmesa->hw.tex[4].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_4);
+ rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_5);
+ rmesa->hw.tex[5].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_5);
+ } else {
+ rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_0);
+ rmesa->hw.tex[0].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_0);
+ rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_1);
+ rmesa->hw.tex[1].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_1);
+ rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_2);
+ rmesa->hw.tex[2].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_2);
+ rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_3);
+ rmesa->hw.tex[3].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_3);
+ rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_4);
+ rmesa->hw.tex[4].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_4);
+ rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_5);
+ rmesa->hw.tex[5].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_5);
+ }
+ rmesa->hw.afs[0].cmd[AFS_CMD_0] = cmdpkt(R200_EMIT_PP_AFS_0);
+ rmesa->hw.afs[1].cmd[AFS_CMD_0] = cmdpkt(R200_EMIT_PP_AFS_1);
rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_0);
rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_0);
rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_1);
((i << R200_TXFORMAT_ST_ROUTE_SHIFT) | /* <-- note i */
(2 << R200_TXFORMAT_WIDTH_SHIFT) |
(2 << R200_TXFORMAT_HEIGHT_SHIFT));
- rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET] =
- rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
rmesa->hw.tex[i].cmd[TEX_PP_BORDER_COLOR] = 0;
rmesa->hw.tex[i].cmd[TEX_PP_TXFORMAT_X] =
(/* R200_TEXCOORD_PROJ | */
0x100000); /* Small default bias */
+ if (rmesa->r200Screen->drmSupportsFragShader) {
+ rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET_NEWDRM] =
+ rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
+ rmesa->hw.tex[i].cmd[TEX_PP_CUBIC_FACES] = 0;
+ rmesa->hw.tex[i].cmd[TEX_PP_TXMULTI_CTL] = 0;
+ }
+ else {
+ rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET_OLDDRM] =
+ rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
+ }
rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_FACES] = 0;
rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F1] =
extern void r200InitTextureFuncs( struct dd_function_table *functions );
+extern void r200UpdateFragmentShader( GLcontext *ctx );
+
+extern void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d );
+
#endif /* __R200_TEX_H__ */
ASSERT(log2Width == log2Height);
t->pp_txformat |= ((log2Width << R200_TXFORMAT_F5_WIDTH_SHIFT) |
(log2Height << R200_TXFORMAT_F5_HEIGHT_SHIFT) |
+/* don't think we need this bit, if it exists at all - fglrx does not set it */
(R200_TXFORMAT_CUBIC_MAP_ENABLE));
t->pp_txformat_x |= R200_TEXCOORD_CUBIC_ENV;
t->pp_cubic_faces = ((log2Width << R200_FACE_WIDTH_1_SHIFT) |
break;
case GL_PREVIOUS:
if (replaceargs != unit) {
- const GLint srcRGBreplace = ctx->Texture.Unit[replaceargs]._CurrentCombine->SourceRGB[0];
+ const GLint srcRGBreplace =
+ ctx->Texture.Unit[replaceargs]._CurrentCombine->SourceRGB[0];
if (op >= 2) {
op = op ^ replaceopa;
}
if (slot == 0)
color_arg[i] = r200_primary_color[op];
else
- color_arg[i] = r200_register_color[op][rmesa->state.texture.unit[replaceargs - 1].outputreg];
+ color_arg[i] = r200_register_color[op]
+ [rmesa->state.texture.unit[replaceargs - 1].outputreg];
break;
case GL_ZERO:
color_arg[i] = r200_zero_color[op];
if (slot == 0)
color_arg[i] = r200_primary_color[op];
else
- color_arg[i] = r200_register_color[op][rmesa->state.texture.unit[unit - 1].outputreg];
+ color_arg[i] = r200_register_color[op]
+ [rmesa->state.texture.unit[unit - 1].outputreg];
}
break;
case GL_ZERO:
break;
case GL_PREVIOUS:
if (replaceargs != unit) {
- const GLint srcAreplace = ctx->Texture.Unit[replaceargs]._CurrentCombine->SourceA[0];
+ const GLint srcAreplace =
+ ctx->Texture.Unit[replaceargs]._CurrentCombine->SourceA[0];
op = op ^ replaceopa;
switch (srcAreplace) {
case GL_TEXTURE:
if (slot == 0)
alpha_arg[i] = r200_primary_alpha[op];
else
- alpha_arg[i] = r200_register_alpha[op][rmesa->state.texture.unit[replaceargs - 1].outputreg];
+ alpha_arg[i] = r200_register_alpha[op]
+ [rmesa->state.texture.unit[replaceargs - 1].outputreg];
break;
case GL_ZERO:
alpha_arg[i] = r200_zero_alpha[op];
if (slot == 0)
alpha_arg[i] = r200_primary_alpha[op];
else
- alpha_arg[i] = r200_register_alpha[op][rmesa->state.texture.unit[unit - 1].outputreg];
+ alpha_arg[i] = r200_register_alpha[op]
+ [rmesa->state.texture.unit[unit - 1].outputreg];
}
break;
case GL_ZERO:
}
R200_STATECHANGE( rmesa, ctx );
- rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~R200_TEX_BLEND_ENABLE_MASK;
+ rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~(R200_TEX_BLEND_ENABLE_MASK | R200_MULTI_PASS_ENABLE);
rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= rmesa->state.envneeded << R200_TEX_BLEND_0_ENABLE_SHIFT;
return ok;
#define TEXOBJ_TXFORMAT_MASK (R200_TXFORMAT_WIDTH_MASK | \
R200_TXFORMAT_HEIGHT_MASK | \
R200_TXFORMAT_FORMAT_MASK | \
- R200_TXFORMAT_F5_WIDTH_MASK | \
- R200_TXFORMAT_F5_HEIGHT_MASK | \
+ R200_TXFORMAT_F5_WIDTH_MASK | \
+ R200_TXFORMAT_F5_HEIGHT_MASK | \
R200_TXFORMAT_ALPHA_IN_MAP | \
R200_TXFORMAT_CUBIC_MAP_ENABLE | \
- R200_TXFORMAT_NON_POWER2)
+ R200_TXFORMAT_NON_POWER2)
#define TEXOBJ_TXFORMAT_X_MASK (R200_DEPTH_LOG2_MASK | \
R200_TEXCOORD_MASK | \
cmd[TEX_PP_TXFORMAT_X] |= texobj->pp_txformat_x & TEXOBJ_TXFORMAT_X_MASK;
cmd[TEX_PP_TXSIZE] = texobj->pp_txsize; /* NPOT only! */
cmd[TEX_PP_TXPITCH] = texobj->pp_txpitch; /* NPOT only! */
- cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset;
cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color;
- R200_DB_STATECHANGE( rmesa, &rmesa->hw.tex[unit] );
+ if (rmesa->r200Screen->drmSupportsFragShader) {
+ cmd[TEX_PP_TXOFFSET_NEWDRM] = texobj->pp_txoffset;
+ }
+ else {
+ cmd[TEX_PP_TXOFFSET_OLDDRM] = texobj->pp_txoffset;
+ }
if (texobj->base.tObj->Target == GL_TEXTURE_CUBE_MAP) {
GLuint *cube_cmd = R200_DB_STATE( cube[unit] );
GLuint bytesPerFace = texobj->base.totalSize / 6;
ASSERT(texobj->base.totalSize % 6 == 0);
cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
+ if (rmesa->r200Screen->drmSupportsFragShader) {
+ /* that value is submitted twice. could change cube atom
+ to not include that command when new drm is used */
+ cmd[TEX_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
+ }
cube_cmd[CUBE_PP_CUBIC_OFFSET_F1] = texobj->pp_txoffset + 1 * bytesPerFace;
cube_cmd[CUBE_PP_CUBIC_OFFSET_F2] = texobj->pp_txoffset + 2 * bytesPerFace;
cube_cmd[CUBE_PP_CUBIC_OFFSET_F3] = texobj->pp_txoffset + 3 * bytesPerFace;
cube_cmd[CUBE_PP_CUBIC_OFFSET_F5] = texobj->pp_txoffset + 5 * bytesPerFace;
R200_DB_STATECHANGE( rmesa, &rmesa->hw.cube[unit] );
}
+ R200_DB_STATECHANGE( rmesa, &rmesa->hw.tex[unit] );
texobj->dirty_state &= ~(1<<unit);
}
}
}
-static void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d )
+void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d )
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
static GLboolean r200UpdateTextureUnit( GLcontext *ctx, int unit )
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
+ GLuint unitneeded = rmesa->state.texture.unit[unit].unitneeded;
- if ( rmesa->state.texture.unit[unit].unitneeded & (TEXTURE_RECT_BIT) ) {
+ if ( unitneeded & (TEXTURE_RECT_BIT) ) {
return (enable_tex_rect( ctx, unit ) &&
update_tex_common( ctx, unit ));
}
- else if ( rmesa->state.texture.unit[unit].unitneeded & (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ) {
+ else if ( unitneeded & (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ) {
return (enable_tex_2d( ctx, unit ) &&
update_tex_common( ctx, unit ));
}
#if ENABLE_HW_3D_TEXTURE
- else if ( rmesa->state.texture.unit[unit].unitneeded & (TEXTURE_3D_BIT) ) {
+ else if ( unitneeded & (TEXTURE_3D_BIT) ) {
return (enable_tex_3d( ctx, unit ) &&
update_tex_common( ctx, unit ));
}
#endif
- else if ( rmesa->state.texture.unit[unit].unitneeded & (TEXTURE_CUBE_BIT) ) {
+ else if ( unitneeded & (TEXTURE_CUBE_BIT) ) {
return (enable_tex_cube( ctx, unit ) &&
update_tex_common( ctx, unit ));
}
- else if ( rmesa->state.texture.unit[unit].unitneeded ) {
+ else if ( unitneeded ) {
return GL_FALSE;
}
else {
GLboolean ok;
GLuint dbg;
- ok = r200UpdateAllTexEnv( ctx );
-
+ if (ctx->ATIFragmentShader._Enabled) {
+ GLuint i;
+ for (i = 0; i < R200_MAX_TEXTURE_UNITS; i++) {
+ rmesa->state.texture.unit[i].unitneeded = ctx->Texture.Unit[i]._ReallyEnabled;
+ }
+ ok = GL_TRUE;
+ }
+ else {
+ ok = r200UpdateAllTexEnv( ctx );
+ }
if (ok) {
ok = (r200UpdateTextureUnit( ctx, 0 ) &&
r200UpdateTextureUnit( ctx, 1 ) &&
r200UpdateTextureUnit( ctx, 5 ));
}
+ if (ok && ctx->ATIFragmentShader._Enabled) {
+ r200UpdateFragmentShader(ctx);
+ }
+
FALLBACK( rmesa, R200_FALLBACK_TEXTURE, !ok );
if (rmesa->TclFallback)
/*
* T0 hang workaround -------------
- * not needed for r200 derivatives?
- */
+ * not needed for r200 derivatives
+ */
if ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_ENABLE_MASK) == R200_TEX_0_ENABLE &&
- (rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK) > R200_MIN_FILTER_LINEAR) {
-
- R200_STATECHANGE(rmesa, ctx);
- R200_STATECHANGE(rmesa, tex[1]);
- rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_1_ENABLE;
- rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
- rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] |= 0x08000000;
+ (rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK) > R200_MIN_FILTER_LINEAR) {
+
+ R200_STATECHANGE(rmesa, ctx);
+ R200_STATECHANGE(rmesa, tex[1]);
+ rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_1_ENABLE;
+ if (!(rmesa->hw.cst.cmd[CST_PP_CNTL_X] & R200_PPX_TEX_1_ENABLE))
+ rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
+ rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] |= R200_TXFORMAT_LOOKUP_DISABLE;
}
- else {
- if ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_1_ENABLE) &&
- (rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] & 0x08000000)) {
- R200_STATECHANGE(rmesa, tex[1]);
- rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~0x08000000;
+ else if (!ctx->ATIFragmentShader._Enabled) {
+ if ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_1_ENABLE) &&
+ (rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] & R200_TXFORMAT_LOOKUP_DISABLE)) {
+ R200_STATECHANGE(rmesa, tex[1]);
+ rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~R200_TXFORMAT_LOOKUP_DISABLE;
}
}
+ /* do the same workaround for the first pass of a fragment shader.
+ * completely unknown if necessary / sufficient.
+ */
+ if ((rmesa->hw.cst.cmd[CST_PP_CNTL_X] & R200_PPX_TEX_ENABLE_MASK) == R200_PPX_TEX_0_ENABLE &&
+ (rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK) > R200_MIN_FILTER_LINEAR) {
+
+ R200_STATECHANGE(rmesa, cst);
+ R200_STATECHANGE(rmesa, tex[1]);
+ rmesa->hw.cst.cmd[CST_PP_CNTL_X] |= R200_PPX_TEX_1_ENABLE;
+ if (!(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_1_ENABLE))
+ rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
+ rmesa->hw.tex[1].cmd[TEX_PP_TXMULTI_CTL] |= R200_PASS1_TXFORMAT_LOOKUP_DISABLE;
+ }
/* maybe needs to be done pairwise due to 2 parallel (physical) tex units ?
looks like that's not the case, if 8500/9100 owners don't complain remove this...
/*
* Texture cache LRU hang workaround -------------
- * not needed for r200 derivatives?
+ * not needed for r200 derivatives
+ * hopefully this covers first pass of a shader as well
*/
/* While the cases below attempt to only enable the workaround in the