From eeada484979e9784ebb03f635f207e717a04e311 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 23 May 2005 14:14:19 +0000 Subject: [PATCH] Implement enough of ARB_fragment_program for Keith's texenv program generation in Mesa. Requires most recent Mesa cvs to work correctly. Swizzle code is possibly slow/buggy and will probably be replaced. Old behaviour can be controlled by a #define in r300_context.h, if no-one minds I'll remove the old code later on. Also, disabled cube map extension. AFAIK We don't support it yet, and it causes issues with UT2004. --- src/mesa/drivers/dri/r300/Makefile | 1 + src/mesa/drivers/dri/r300/r300_context.c | 23 +- src/mesa/drivers/dri/r300/r300_context.h | 71 +- src/mesa/drivers/dri/r300/r300_fragprog.c | 937 ++++++++++++++++++++++ src/mesa/drivers/dri/r300/r300_fragprog.h | 46 ++ src/mesa/drivers/dri/r300/r300_reg.h | 13 +- src/mesa/drivers/dri/r300/r300_shader.c | 26 +- src/mesa/drivers/dri/r300/r300_state.c | 246 +++++- src/mesa/drivers/dri/r300/r300_texprog.c | 2 + 9 files changed, 1312 insertions(+), 53 deletions(-) create mode 100644 src/mesa/drivers/dri/r300/r300_fragprog.c create mode 100644 src/mesa/drivers/dri/r300/r300_fragprog.h diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index 71454a37e99..520b395997e 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -37,6 +37,7 @@ DRIVER_SOURCES = \ r300_texstate.c \ r300_texprog.c \ r300_vertexprog.c \ + r300_fragprog.c \ r300_shader.c \ r300_maos.c # \ diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index 5511d0b88f3..f4ed7159dd2 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -73,14 +73,17 @@ static const char *const card_extensions[] = { "GL_ARB_multitexture", "GL_ARB_texture_border_clamp", "GL_ARB_texture_compression", - "GL_ARB_texture_cube_map", +/* disable until we support it, fixes a few things in ut2004 */ +// "GL_ARB_texture_cube_map", "GL_ARB_texture_env_add", "GL_ARB_texture_env_combine", "GL_ARB_texture_env_dot3", "GL_ARB_texture_mirrored_repeat", "GL_ARB_vertex_buffer_object", "GL_ARB_vertex_program", - //"GL_ARB_fragment_program", +#if USE_ARB_F_P == 1 + "GL_ARB_fragment_program", +#endif "GL_EXT_blend_equation_separate", "GL_EXT_blend_func_separate", "GL_EXT_blend_minmax", @@ -101,6 +104,7 @@ static const char *const card_extensions[] = { "GL_NV_blend_square", "GL_NV_vertex_program", "GL_SGIS_generate_mipmap", + "GL_ARB_texture_env_crossbar", NULL }; @@ -325,7 +329,20 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, ctx->Const.MaxVertexProgramLocalParams=256; // r420 ctx->Const.MaxVertexProgramEnvParams=256; // r420 ctx->Const.MaxVertexProgramAddressRegs=1; - + +#if USE_ARB_F_P + ctx->Const.MaxFragmentProgramTemps = PFS_NUM_TEMP_REGS; + ctx->Const.MaxFragmentProgramAttribs = 11; /* copy i915... */ + ctx->Const.MaxFragmentProgramLocalParams = PFS_NUM_CONST_REGS; + ctx->Const.MaxFragmentProgramEnvParams = PFS_NUM_CONST_REGS; + ctx->Const.MaxFragmentProgramAluInstructions = PFS_MAX_ALU_INST; + ctx->Const.MaxFragmentProgramTexInstructions = PFS_MAX_TEX_INST; + ctx->Const.MaxFragmentProgramInstructions = PFS_MAX_ALU_INST+PFS_MAX_TEX_INST; + ctx->Const.MaxFragmentProgramTexIndirections = PFS_MAX_TEX_INDIRECT; + ctx->Const.MaxFragmentProgramAddressRegs = 0; /* and these are?? */ + ctx->_MaintainTexEnvProgram = GL_TRUE; +#endif + driInitExtensions(ctx, card_extensions, GL_TRUE); radeonInitSpanFuncs(ctx); diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 0e6aa7f34e1..ebe6553797a 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -47,6 +47,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "colormac.h" #include "radeon_context.h" +#define USE_ARB_F_P 1 + struct r300_context; typedef struct r300_context r300ContextRec; typedef struct r300_context *r300ContextPtr; @@ -83,7 +85,11 @@ typedef GLubyte uint8_t; and pixel_shader structure later on */ #define CARD32 GLuint #include "vertex_shader.h" +#if USE_ARB_F_P == 1 +#include "r300_fragprog.h" +#else #include "pixel_shader.h" +#endif #undef CARD32 static __inline__ uint32_t r300PackFloat32(float fl) @@ -588,6 +594,67 @@ struct r300_vertex_program { int tex_regs[8]; }; +#if USE_ARB_F_P == 1 +#define PFS_MAX_ALU_INST 64 +#define PFS_MAX_TEX_INST 64 +#define PFS_MAX_TEX_INDIRECT 4 +#define PFS_NUM_TEMP_REGS 32 +#define PFS_NUM_CONST_REGS 32 +struct r300_fragment_program { + struct fragment_program mesa_program; + + GLcontext *ctx; + GLboolean translated; + GLboolean error; + + struct { + int length; + GLuint inst[PFS_MAX_TEX_INST]; + } tex; + + struct { + struct { + GLuint inst0; + GLuint inst1; + GLuint inst2; + GLuint inst3; + } inst[PFS_MAX_ALU_INST]; + } alu; + int v_pos; + int s_pos; + + struct { + int tex_offset; + int tex_end; + int alu_offset; + int alu_end; + } node[4]; + int cur_node; + int first_node_has_tex; + + int alu_offset; + int alu_end; + int tex_offset; + int tex_end; + + struct { + float x; + float y; + float z; + float w; + } param[32]; + int param_length; + + GLuint temps[PFS_NUM_TEMP_REGS]; + int temp_in_use; + GLuint used_in_node; + GLuint dest_in_node; + GLuint inputs[32]; /* don't actually need 32... */ + + int hwreg_in_use; + int max_temp_idx; +}; +#else /* 64 appears to be the maximum */ #define PSF_MAX_PROGRAM_LENGTH 64 @@ -652,6 +719,7 @@ struct r300_pixel_shader_state { float w; } param[MAX_PIXEL_SHADER_PARAMS]; }; +#endif // USE_ARB_F_P /* 8 is somewhat bogus... it is probably something like 24 */ #define R300_MAX_AOS_ARRAYS 8 @@ -682,8 +750,9 @@ struct r300_state { struct r300_texture_state texture; struct r300_vap_reg_state vap_reg; struct r300_vertex_shader_state vertex_shader; +#if USE_ARB_F_P == 0 struct r300_pixel_shader_state pixel_shader; - +#endif struct r300_dma_region aos[R300_MAX_AOS_ARRAYS]; int aos_count; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c new file mode 100644 index 00000000000..dbc650d437c --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -0,0 +1,937 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs + */ + +/*TODO'S + * + * - Implement remaining arb_f_p opcodes + * - Depth write + * - Negate on individual components (implement in swizzle code?) + * - Reuse input/temp regs, if they're no longer needed. + * - Find out whether there's any benifit in ordering registers the way + * fglrx does (see r300_reg.h). + * - and more... + */ + +#include "glheader.h" +#include "macros.h" +#include "enums.h" + +#include "program.h" +#include "nvfragprog.h" +#include "r300_context.h" +#if USE_ARB_F_P == 1 +#include "r300_fragprog.h" +#include "r300_reg.h" + +#define PFS_INVAL 0xFFFFFFFF + +static void dump_program(struct r300_fragment_program *rp); +static void emit_arith(struct r300_fragment_program *rp, int op, + pfs_reg_t dest, int mask, + pfs_reg_t src0, pfs_reg_t src1, pfs_reg_t src2, + int flags); + +/*************************************** + * begin: useful data structions for fragment program generation + ***************************************/ + +/* description of r300 native hw instructions */ +const struct { + const char *name; + int argc; + int v_op; + int s_op; +} r300_fpop[] = { + { "MAD", 3, R300_FPI0_OUTC_MAD, R300_FPI2_OUTA_MAD }, + { "DP3", 2, R300_FPI0_OUTC_DP3, PFS_INVAL }, + { "DP4", 2, R300_FPI0_OUTC_DP4, R300_FPI2_OUTA_DP4 }, + { "MIN", 2, R300_FPI0_OUTC_MIN, R300_FPI2_OUTA_MIN }, + { "MAX", 2, R300_FPI0_OUTC_MAX, R300_FPI2_OUTA_MAX }, + { "CMP", 3, R300_FPI0_OUTC_CMP, R300_FPI2_OUTA_CMP }, + { "FRC", 1, R300_FPI0_OUTC_FRC, R300_FPI2_OUTA_FRC }, +/* should the vector insns below be REPL_ALPHA? */ + { "EX2", 1, PFS_INVAL, R300_FPI2_OUTA_EX2 }, + { "LG2", 1, PFS_INVAL, R300_FPI2_OUTA_LG2 }, + { "RCP", 1, PFS_INVAL, R300_FPI2_OUTA_RCP }, + { "RSQ", 1, PFS_INVAL, R300_FPI2_OUTA_RSQ }, +}; + +#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, \ + SWIZZLE_##y, \ + SWIZZLE_##z, \ + SWIZZLE_ZERO)) + +/* vector swizzles r300 can support natively, with a couple of + * cases we handle specially + * + * pfs_reg_t.v_swz/pfs_reg_t.s_swz is an index into this table + **/ +static const struct r300_pfv_swizzle { + const char *name; + GLuint hash; /* swizzle value this matches */ + GLboolean native; + GLuint base; /* base value for hw swizzle */ + GLuint stride; /* difference in base between arg0/1/2 */ + GLboolean dep_sca; +} v_swiz[] = { +/* native swizzles */ + { "xyz", MAKE_SWZ3(X, Y, Z), GL_TRUE, R300_FPI0_ARGC_SRC0C_XYZ, 4, GL_FALSE }, + { "xxx", MAKE_SWZ3(X, X, X), GL_TRUE, R300_FPI0_ARGC_SRC0C_XXX, 4, GL_FALSE }, + { "yyy", MAKE_SWZ3(Y, Y, Y), GL_TRUE, R300_FPI0_ARGC_SRC0C_YYY, 4, GL_FALSE }, + { "zzz", MAKE_SWZ3(Z, Z, Z), GL_TRUE, R300_FPI0_ARGC_SRC0C_ZZZ, 4, GL_FALSE }, + { "yzx", MAKE_SWZ3(Y, Z, X), GL_TRUE, R300_FPI0_ARGC_SRC0C_YZX, 1, GL_FALSE }, + { "zxy", MAKE_SWZ3(Z, X, Y), GL_TRUE, R300_FPI0_ARGC_SRC0C_ZXY, 1, GL_FALSE }, + { "wzy", MAKE_SWZ3(W, Z, Y), GL_TRUE, R300_FPI0_ARGC_SRC0CA_WZY, 1, GL_TRUE }, +/* special cases */ + { NULL, MAKE_SWZ3(W, W, W), GL_FALSE, 0, 0, GL_FALSE}, + { NULL, MAKE_SWZ3(ONE, ONE, ONE), GL_FALSE, R300_FPI0_ARGC_ONE, 0, GL_FALSE}, + { NULL, MAKE_SWZ3(ZERO, ZERO, ZERO), GL_FALSE, R300_FPI0_ARGC_ZERO, 0, GL_FALSE}, + { NULL, PFS_INVAL, GL_FALSE, R300_FPI0_ARGC_HALF, 0, GL_FALSE}, + { NULL, PFS_INVAL, GL_FALSE, 0, 0, 0 }, +}; +#define SWIZZLE_XYZ 0 +#define SWIZZLE_XXX 1 +#define SWIZZLE_WZY 6 +#define SWIZZLE_111 8 +#define SWIZZLE_000 9 +#define SWIZZLE_HHH 10 + +#define SWZ_X_MASK (7 << 0) +#define SWZ_Y_MASK (7 << 3) +#define SWZ_Z_MASK (7 << 6) +#define SWZ_W_MASK (7 << 9) +/* used during matching of non-native swizzles */ +static const struct { + GLuint hash; /* used to mask matching swizzle components */ + int mask; /* actual outmask */ + int count; /* count of components matched */ +} s_mask[] = { + { SWZ_X_MASK|SWZ_Y_MASK|SWZ_Z_MASK, 1|2|4, 3}, + { SWZ_X_MASK|SWZ_Y_MASK, 1|2, 2}, + { SWZ_X_MASK|SWZ_Z_MASK, 1|4, 2}, + { SWZ_Y_MASK|SWZ_Z_MASK, 2|4, 2}, + { SWZ_X_MASK, 1, 1}, + { SWZ_Y_MASK, 2, 1}, + { SWZ_Z_MASK, 4, 1}, + { PFS_INVAL, PFS_INVAL, PFS_INVAL} +}; + +/* mapping from SWIZZLE_* to r300 native values for scalar insns */ +static const struct { + const char *name; + int base; /* hw value of swizzle */ + int stride; /* difference between SRC0/1/2 */ + GLboolean dep_vec; +} s_swiz[] = { + { "x", R300_FPI2_ARGA_SRC0C_X, 3, GL_TRUE }, + { "y", R300_FPI2_ARGA_SRC0C_Y, 3, GL_TRUE }, + { "z", R300_FPI2_ARGA_SRC0C_Z, 3, GL_TRUE }, + { "w", R300_FPI2_ARGA_SRC0A , 1, GL_FALSE }, + { "0", R300_FPI2_ARGA_ZERO , 0, GL_FALSE }, + { "1", R300_FPI2_ARGA_ONE , 0, GL_FALSE }, + { ".5", R300_FPI2_ARGA_HALF, 0, GL_FALSE } +}; +#define SWIZZLE_HALF 6 + +/* boiler-plate reg, for convenience */ +const pfs_reg_t pfs_default_reg = { + type: REG_TYPE_TEMP, + index: 0, + v_swz: 0 /* matches XYZ in table */, + s_swz: SWIZZLE_W, + vcross: 0, + scross: 0, + negate: 0, + has_w: GL_FALSE, + valid: GL_FALSE +}; + +/* constant zero source */ +const pfs_reg_t pfs_one = { + type: REG_TYPE_CONST, + index: 0, + v_swz: SWIZZLE_111, + s_swz: SWIZZLE_ONE, + valid: GL_TRUE +}; + +/* constant one source */ +const pfs_reg_t pfs_zero = { + type: REG_TYPE_CONST, + index: 0, + v_swz: SWIZZLE_000, + s_swz: SWIZZLE_ZERO, + valid: GL_TRUE +}; + +/*************************************** + * end: data structures + ***************************************/ + +#define ERROR(fmt, args...) do { \ + fprintf(stderr, "%s::%s(): " fmt "\n", __FILE__, __func__, ##args); \ + rp->error = GL_TRUE; \ +} while(0) + +static int get_hw_temp(struct r300_fragment_program *rp) +{ + int r = ffs(~rp->hwreg_in_use); + if (!r) { + ERROR("Out of hardware temps\n"); + return 0; + } + + rp->hwreg_in_use |= (1 << --r); + if (r > rp->max_temp_idx) + rp->max_temp_idx = r; + + return r; +} + +static void free_hw_temp(struct r300_fragment_program *rp, int idx) +{ + rp->hwreg_in_use &= ~(1<temp_in_use); + if (!r.index) { + ERROR("Out of program temps\n"); + return r; + } + rp->temp_in_use |= (1 << --r.index); + + rp->temps[r.index] = get_hw_temp(rp); + r.valid = GL_TRUE; + return r; +} + +static void free_temp(struct r300_fragment_program *rp, pfs_reg_t r) +{ + if (!rp || !(rp->temp_in_use & (1<temps[r.index]); + rp->temp_in_use &= ~(1<param_length++; + rp->param[r.index].x = cp[0]; + rp->param[r.index].y = cp[1]; + rp->param[r.index].z = cp[2]; + rp->param[r.index].w = cp[3]; + } + + return r; +} + +static pfs_reg_t negate(pfs_reg_t r) +{ + r.negate = 1; + return r; +} + +static int swz_native(struct r300_fragment_program *rp, + pfs_reg_t src, pfs_reg_t *r) +{ + /* Native swizzle, nothing to see here */ + *r = src; + r->has_w = GL_TRUE; + return 3; +} + +static int swz_emit_partial(struct r300_fragment_program *rp, + pfs_reg_t src, pfs_reg_t *r, int mask) +{ + if (!r->valid) + *r = get_temp_reg(rp); + + /* A partial match, src.v_swz/mask define what parts of the + * desired swizzle we match */ + emit_arith(rp, PFS_OP_MAD, *r, s_mask[mask].mask, src, pfs_one, pfs_zero, 0); + + return s_mask[mask].count; +} + +static int swz_special_case(struct r300_fragment_program *rp, + pfs_reg_t src, pfs_reg_t *r, int mask) +{ + pfs_reg_t ssrc = pfs_default_reg; + + switch(GET_SWZ(v_swiz[src.v_swz].hash, 0)) { + case SWIZZLE_W: + ssrc = get_temp_reg(rp); + src.v_swz = SWIZZLE_WZY; + src.vcross = GL_TRUE; + if (s_mask[mask].count == 3) { + emit_arith(rp, PFS_OP_MAD, ssrc, WRITEMASK_XW, src, pfs_one, pfs_zero, 0); + *r = ssrc; + r->v_swz = SWIZZLE_XXX; + r->s_swz = SWIZZLE_W; + r->has_w = GL_TRUE; + } else { + if (!r->valid) + *r = get_temp_reg(rp); + emit_arith(rp, PFS_OP_MAD, ssrc, WRITEMASK_X, src, pfs_one, pfs_zero, 0); + ssrc.v_swz = SWIZZLE_XXX; + emit_arith(rp, PFS_OP_MAD, *r, s_mask[mask].mask, ssrc, pfs_one, pfs_zero, 0); + free_temp(rp, ssrc); + } + break; + case SWIZZLE_ONE: + case SWIZZLE_ZERO: + default: + ERROR("Unknown special-case swizzle! %d\n", src.v_swz); + return 0; + } + + return s_mask[mask].count; +} + +static pfs_reg_t swizzle(struct r300_fragment_program *rp, + pfs_reg_t src, + GLuint arbswz) +{ + pfs_reg_t r = pfs_default_reg; + + int c_mask = 0; + int v_matched = 0; + src.v_swz = SWIZZLE_XYZ; + src.s_swz = GET_SWZ(arbswz, 3); + if (src.s_swz >= SWIZZLE_X && src.s_swz <= SWIZZLE_Z) + src.scross = GL_TRUE; + + do { + do { +#define CUR_HASH (v_swiz[src.v_swz].hash & s_mask[c_mask].hash) + if (CUR_HASH == (arbswz & s_mask[c_mask].hash)) { + if (v_swiz[src.v_swz].native == GL_FALSE) + v_matched += swz_special_case(rp, src, &r, c_mask); + else if (s_mask[c_mask].count == 3) + v_matched += swz_native(rp, src, &r); + else + v_matched += swz_emit_partial(rp, src, &r, c_mask); + + if (v_matched == 3) { + if (!r.has_w) { + emit_arith(rp, PFS_OP_MAD, r, WRITEMASK_W, src, pfs_one, pfs_zero, 0); + r.s_swz = SWIZZLE_W; + } + + if (r.type != REG_TYPE_CONST) { + if (r.v_swz == SWIZZLE_WZY) + r.vcross = GL_TRUE; + if (r.s_swz >= SWIZZLE_X && r.s_swz <= SWIZZLE_Z) + r.scross = GL_TRUE; + } + return r; + } + + arbswz &= ~s_mask[c_mask].hash; + } + } while(v_swiz[++src.v_swz].hash != PFS_INVAL); + } while (s_mask[++c_mask].hash != PFS_INVAL); + + ERROR("should NEVER get here\n"); + return r; +} + +static pfs_reg_t t_src(struct r300_fragment_program *rp, + struct fp_src_register fpsrc) { + pfs_reg_t r = pfs_default_reg; + + switch (fpsrc.File) { + case PROGRAM_TEMPORARY: + r.index = fpsrc.Index; + r.valid = GL_TRUE; + break; + case PROGRAM_INPUT: + r.index = fpsrc.Index; + r.type = REG_TYPE_INPUT; + r.valid = GL_TRUE; + break; + case PROGRAM_LOCAL_PARAM: + r = emit_const4fv(rp, rp->mesa_program.Base.LocalParams[fpsrc.Index]); + break; + case PROGRAM_ENV_PARAM: + r = emit_const4fv(rp, rp->ctx->FragmentProgram.Parameters[fpsrc.Index]); + break; + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + r = emit_const4fv(rp, rp->mesa_program.Parameters->ParameterValues[fpsrc.Index]); + break; + default: + ERROR("unknown SrcReg->File %x\n", fpsrc.File); + return r; + } + + /* no point swizzling ONE/ZERO/HALF constants... */ + if (r.v_swz < SWIZZLE_111 && r.s_swz < SWIZZLE_ZERO) + r = swizzle(rp, r, fpsrc.Swizzle); + + /* WRONG! Need to be able to do individual component negation, + * should probably handle this in the swizzling code unless + * all components are negated, then we can do this natively */ + if (fpsrc.NegateBase) + r.negate = GL_TRUE; + + return r; +} + +static pfs_reg_t t_dst(struct r300_fragment_program *rp, + struct fp_dst_register dest) { + pfs_reg_t r = pfs_default_reg; + + switch (dest.File) { + case PROGRAM_TEMPORARY: + r.index = dest.Index; + r.valid = GL_TRUE; + return r; + case PROGRAM_OUTPUT: + r.type = REG_TYPE_OUTPUT; + switch (dest.Index) { + case 0: + r.valid = GL_TRUE; + return r; + case 1: + ERROR("I don't know how to write depth!\n"); + return r; + default: + ERROR("Bad DstReg->Index 0x%x\n", dest.Index); + return r; + } + default: + ERROR("Bad DstReg->File 0x%x\n", dest.File); + return r; + } +} + +static void sync_streams(struct r300_fragment_program *rp) { + /* Bring vector/scalar streams into sync, inserting nops into + * whatever stream is lagging behind + * + * I'm using "MAD t0, t0, 1.0, 0.0" as a NOP + */ + while (rp->v_pos != rp->s_pos) { + if (rp->s_pos > rp->v_pos) { + rp->alu.inst[rp->v_pos].inst0 = 0x00050A80; + rp->alu.inst[rp->v_pos].inst1 = 0x03820800; + rp->v_pos++; + } else { + rp->alu.inst[rp->s_pos].inst2 = 0x00040889; + rp->alu.inst[rp->s_pos].inst3 = 0x00820800; + rp->s_pos++; + } + } +} + +static void emit_tex(struct r300_fragment_program *rp, + struct fp_instruction *fpi, + int opcode) +{ + pfs_reg_t coord = t_src(rp, fpi->SrcReg[0]); + pfs_reg_t dest = t_dst(rp, fpi->DstReg); + int unit = fpi->TexSrcUnit; + int hwsrc, hwdest, flags = 0; + + switch (coord.type) { + case REG_TYPE_TEMP: + hwsrc = rp->temps[coord.index]; + break; + case REG_TYPE_INPUT: + hwsrc = rp->inputs[coord.index]; + break; + case REG_TYPE_CONST: + hwsrc = coord.index; + flags = R300_FPITX_SRC_CONST; + break; + default: + ERROR("Unknown coord.type = %d\n", coord.type); + return; + } + hwdest = rp->temps[dest.index]; + + /* Indirection if source has been written in this node, or if the dest has + * been read/written in this node + */ + if ((coord.type != REG_TYPE_CONST && (rp->dest_in_node & (1<used_in_node & (1<cur_node == 3) { /* We only support 4 natively */ + ERROR("too many levels of texture indirection\n"); + return; + } + /* Finish off current node */ + sync_streams(rp); + rp->node[rp->cur_node].alu_end = rp->v_pos - 1; + + /* Start new node */ + rp->cur_node++; + rp->used_in_node = 0; + rp->dest_in_node = 0; + rp->node[rp->cur_node].tex_offset = rp->tex.length; + rp->node[rp->cur_node].alu_offset = rp->v_pos; + rp->node[rp->cur_node].tex_end = -1; + rp->node[rp->cur_node].alu_end = -1; + } + + if (rp->cur_node == 0) rp->first_node_has_tex = 1; + + rp->tex.inst[rp->tex.length++] = 0 + | (hwsrc << R300_FPITX_SRC_SHIFT) + | (hwdest << R300_FPITX_DST_SHIFT) + | (unit << R300_FPITX_IMAGE_SHIFT) + | (opcode << R300_FPITX_OPCODE_SHIFT) /* not entirely sure about this */ + | flags; + rp->dest_in_node |= (1 << hwdest); + + rp->node[rp->cur_node].tex_end++; +} + +static void emit_arith(struct r300_fragment_program *rp, int op, + pfs_reg_t dest, int mask, + pfs_reg_t src0, pfs_reg_t src1, pfs_reg_t src2, + int flags) +{ + pfs_reg_t src[3] = { src0, src1, src2 }; + int hwdest, hwsrc[3]; + int argc; + int v_idx = rp->v_pos, s_idx = rp->s_pos; + GLuint inst[4] = { 0, 0, 0, 0 }; + int i; + + if (!dest.valid || !src0.valid || !src1.valid || !src2.valid) { + ERROR("invalid register. dest/src0/src1/src2 valid = %d/%d/%d/%d\n", + dest.valid, src0.valid, src1.valid, src2.valid); + return; + } + + /* check opcode */ + if (op > MAX_PFS_OP) { + ERROR("unknown opcode!\n"); + return; + } + argc = r300_fpop[op].argc; + + /* grab hwregs of sources */ + for (i=0;iinputs[src[i].index]; + rp->used_in_node |= (1 << hwsrc[i]); + break; + case REG_TYPE_TEMP: + /* make sure insn ordering is right... */ + if ((src[i].vcross && v_idx < s_idx) || + (src[i].scross && s_idx < v_idx)) { + sync_streams(rp); + v_idx = s_idx = rp->v_pos; + } + + hwsrc[i] = rp->temps[src[i].index]; + rp->used_in_node |= (1 << hwsrc[i]); + break; + case REG_TYPE_CONST: + hwsrc[i] = src[i].index; + break; + default: + ERROR("invalid source reg\n"); + return; + } + } + + /* grab hwregs of dest */ + switch (dest.type) { + case REG_TYPE_TEMP: + hwdest = rp->temps[dest.index]; + rp->dest_in_node |= (1 << hwdest); + rp->used_in_node |= (1 << hwdest); + break; + case REG_TYPE_OUTPUT: + hwdest = 0; + break; + default: + ERROR("invalid dest reg type %d\n", dest.type); + return; + } + + for (i=0;i<3;i++) { + if (i < argc) { + inst[0] |= (v_swiz[src[i].v_swz].base + (i * v_swiz[src[i].v_swz].stride)) << (i * 7); + inst[2] |= (s_swiz[src[i].s_swz].base + (i * s_swiz[src[i].s_swz].stride)) << (i * 7); + if (src[i].negate) { + inst[0] |= (1<<5) << (i*7); + inst[2] |= (1<<5) << (i*7); + } + inst[1] |= hwsrc[i] << (i*6); + inst[3] |= hwsrc[i] << (i*6); + if (src[i].type == REG_TYPE_CONST) { + inst[1] |= (1<<5) << (i*6); + inst[3] |= (1<<5) << (i*6); + } + } else { + /* read constant zero, may aswell use a ZERO swizzle aswell.. */ + inst[0] |= R300_FPI0_ARGC_ZERO << (i*7); + inst[2] |= R300_FPI2_ARGA_ZERO << (i*7); + inst[1] |= (1<<5) << (i*6); + inst[3] |= (1<<5) << (i*6); + } + } + + if (mask & WRITEMASK_XYZ) { + rp->alu.inst[v_idx].inst0 = inst[0] | r300_fpop[op].v_op | flags; + rp->alu.inst[v_idx].inst1 = inst[1] | + (hwdest << R300_FPI1_DSTC_SHIFT) | + ((mask & WRITEMASK_XYZ) << (dest.type == REG_TYPE_OUTPUT ? 26 : 23)); + rp->v_pos = v_idx + 1; + } + + if (mask & WRITEMASK_W) { + rp->alu.inst[s_idx].inst2 = inst[2] | r300_fpop[op].s_op | flags; + rp->alu.inst[s_idx].inst3 = inst[3] | + (hwdest << R300_FPI3_DSTA_SHIFT) | + (1 << (dest.type == REG_TYPE_OUTPUT ? 24 : 23)); + rp->s_pos = s_idx + 1; + } + +// sync_streams(rp); + return; +}; + +static GLboolean parse_program(struct r300_fragment_program *rp) +{ + struct fragment_program *mp = &rp->mesa_program; + const struct fp_instruction *inst = mp->Instructions; + struct fp_instruction *fpi; + pfs_reg_t src0, src1, src2, dest, temp; + int flags = 0; + + if (!inst || inst[0].Opcode == FP_OPCODE_END) { + ERROR("empty program?\n"); + return GL_FALSE; + } + + for (fpi=mp->Instructions; fpi->Opcode != FP_OPCODE_END; fpi++) { + if (inst->Saturate) flags = R300_FPI0_OUTC_SAT; /* same for OUTA */ + + switch (fpi->Opcode) { + case FP_OPCODE_ABS: + ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); + break; + case FP_OPCODE_ADD: + emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg), fpi->DstReg.WriteMask, + t_src(rp, fpi->SrcReg[0]), + pfs_one, + t_src(rp, fpi->SrcReg[1]), + flags); + break; + case FP_OPCODE_CMP: + case FP_OPCODE_COS: + ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); + break; + case FP_OPCODE_DP3: + case FP_OPCODE_DP4: + case FP_OPCODE_DPH: + case FP_OPCODE_DST: + case FP_OPCODE_EX2: + case FP_OPCODE_FLR: + case FP_OPCODE_FRC: + case FP_OPCODE_KIL: + case FP_OPCODE_LG2: + case FP_OPCODE_LIT: + ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); + break; + case FP_OPCODE_LRP: + /* TODO: use the special LRP form if possible */ + src0 = t_src(rp, fpi->SrcReg[0]); + src1 = t_src(rp, fpi->SrcReg[1]); + src2 = t_src(rp, fpi->SrcReg[2]); + // result = tmp0tmp1 + (1 - tmp0)tmp2 + // = tmp0tmp1 + tmp2 + (-tmp0)tmp2 + // MAD temp, -tmp0, tmp2, tmp2 + // MAD result, tmp0, tmp1, temp + temp = get_temp_reg(rp); + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_XYZW, + negate(src0), src2, src2, 0); + emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg), fpi->DstReg.WriteMask, + src0, src1, temp, flags); + free_temp(rp, temp); + break; + case FP_OPCODE_MAD: + emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg), fpi->DstReg.WriteMask, + t_src(rp, fpi->SrcReg[0]), + t_src(rp, fpi->SrcReg[1]), + t_src(rp, fpi->SrcReg[2]), + flags); + break; + case FP_OPCODE_MAX: + case FP_OPCODE_MIN: + ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); + break; + case FP_OPCODE_MOV: + emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg), fpi->DstReg.WriteMask, + t_src(rp, fpi->SrcReg[0]), pfs_one, pfs_zero, + flags); + break; + case FP_OPCODE_MUL: + emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg), fpi->DstReg.WriteMask, + t_src(rp, fpi->SrcReg[0]), + t_src(rp, fpi->SrcReg[1]), + pfs_zero, + flags); + break; + case FP_OPCODE_POW: + case FP_OPCODE_RCP: + case FP_OPCODE_RSQ: + case FP_OPCODE_SCS: + case FP_OPCODE_SGE: + case FP_OPCODE_SIN: + case FP_OPCODE_SLT: + ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); + break; + case FP_OPCODE_SUB: + emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg), fpi->DstReg.WriteMask, + t_src(rp, fpi->SrcReg[0]), + pfs_one, + negate(t_src(rp, fpi->SrcReg[1])), + flags); + break; + case FP_OPCODE_SWZ: + ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); + break; + case FP_OPCODE_TEX: + emit_tex(rp, fpi, R300_FPITX_OP_TEX); + break; + case FP_OPCODE_TXB: + emit_tex(rp, fpi, R300_FPITX_OP_TXB); + break; + case FP_OPCODE_TXP: + emit_tex(rp, fpi, R300_FPITX_OP_TXP); + break; + case FP_OPCODE_XPD: + ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); + break; + default: + ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); + break; + } + + if (rp->error) + return GL_FALSE; + } + + return GL_TRUE; +} + +/* - Init structures + * - Determine what hwregs each input corresponds to + */ +void init_program(struct r300_fragment_program *rp) +{ + struct fragment_program *mp = &rp->mesa_program; + struct fp_instruction *fpi; + GLuint InputsRead = mp->InputsRead; + GLuint fp_reg = 0; + GLuint temps_used = 0; /* for rp->temps[] */ + int i; + + rp->translated = GL_FALSE; + rp->error = GL_FALSE; + + rp->v_pos = 0; + rp->s_pos = 0; + + rp->tex.length = 0; + rp->node[0].alu_offset = 0; + rp->node[0].alu_end = -1; + rp->node[0].tex_offset = 0; + rp->node[0].tex_end = -1; + rp->cur_node = 0; + rp->first_node_has_tex = 0; + rp->used_in_node = 0; + rp->dest_in_node = 0; + + rp->param_length = 0; + rp->temp_in_use = 0; + rp->hwreg_in_use = 0; + rp->max_temp_idx = 0; + + /* Work out what temps the Mesa inputs correspond to, this must match + * what setup_rs_unit does, which shouldn't be a problem as rs_unit + * configures itself based on the fragprog's InputsRead + */ + + /* Texcoords come first */ + for (i=0;ictx->Const.MaxTextureUnits;i++) { + if (InputsRead & (FRAG_BIT_TEX0 << i)) { + rp->hwreg_in_use |= (1<inputs[FRAG_ATTRIB_TEX0+i] = fp_reg++; + } + } + InputsRead &= ~FRAG_BITS_TEX_ANY; + + /* Then primary colour */ + if (InputsRead & FRAG_BIT_COL0) { + rp->hwreg_in_use |= (1<inputs[FRAG_ATTRIB_COL0] = fp_reg++; + } + InputsRead &= ~FRAG_BIT_COL0; + + /* Anything else */ + if (InputsRead) { + WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead); + /* force read from hwreg 0 for now */ + for (i=0;i<32;i++) + if (InputsRead & (1<inputs[i] = 0; + } + + /* Possibly the worst part of how I went about this... Find out what + * temps are used by the mesa program so we don't clobber something + * when we need a temp for other reasons. + * + * Possibly not too bad actually, as we could add to this later and + * find out when inputs are last used so we can reuse them as temps. + */ + if (!mp->Instructions) { + ERROR("No instructions found in program\n"); + return; + } + for (fpi=mp->Instructions;fpi->Opcode != FP_OPCODE_END; fpi++) { + for (i=0;i<3;i++) { + if (fpi->SrcReg[i].File == PROGRAM_TEMPORARY) { + if (!(temps_used & (1 << fpi->SrcReg[i].Index))) { + temps_used |= (1 << fpi->SrcReg[i].Index); + rp->temps[fpi->SrcReg[i].Index] = get_hw_temp(rp); + } + } + } + /* needed? surely if a program writes a temp it'll read it again */ + if (fpi->DstReg.File == PROGRAM_TEMPORARY) { + if (!(temps_used & (1 << fpi->DstReg.Index))) { + temps_used |= (1 << fpi->DstReg.Index); + rp->temps[fpi->DstReg.Index] = get_hw_temp(rp); + } + } + } + rp->temp_in_use = temps_used; + + /* Ask Mesa nicely to fill in ParameterValues for us */ + _mesa_load_state_parameters(rp->ctx, rp->mesa_program.Parameters); +} + +void translate_fragment_shader(struct r300_fragment_program *rp) +{ + int i; + + init_program(rp); + + if (parse_program(rp) == GL_FALSE) { + dump_program(rp); + return; + } + + /* Finish off */ + sync_streams(rp); + rp->node[rp->cur_node].alu_end = rp->v_pos - 1; + rp->alu_offset = 0; + rp->alu_end = rp->v_pos - 1; + rp->tex_offset = 0; + rp->tex_end = rp->tex.length - 1; + + rp->translated = GL_TRUE; + if (0) dump_program(rp); +} + +/* just some random things... */ +static void dump_program(struct r300_fragment_program *rp) +{ + int i; + static int pc = 0; + + fprintf(stderr, "pc=%d*************************************\n", pc++); + + fprintf(stderr, "Mesa program:\n"); + fprintf(stderr, "-------------\n"); + _mesa_debug_fp_inst(rp->mesa_program.NumTexInstructions + + rp->mesa_program.NumAluInstructions, + rp->mesa_program.Instructions); + fflush(stdout); + + fprintf(stderr, "Hardware program\n"); + fprintf(stderr, "----------------\n"); + for (i=0;i<(rp->cur_node+1);i++) { + fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, alu_end: %d, tex_end: %d\n", i, + rp->node[i].alu_offset, + rp->node[i].tex_offset, + rp->node[i].alu_end, + rp->node[i].tex_end); + } + +/* dump program in pretty_print_command_stream.tcl-readable format */ + fprintf(stderr, "%08x\n", ((rp->alu_end << 16) | (R300_PFS_INSTR0_0 >> 2))); + for (i=0;i<=rp->alu_end;i++) + fprintf(stderr, "%08x\n", rp->alu.inst[i].inst0); + fprintf(stderr, "%08x\n", ((rp->alu_end << 16) | (R300_PFS_INSTR1_0 >> 2))); + for (i=0;i<=rp->alu_end;i++) + fprintf(stderr, "%08x\n", rp->alu.inst[i].inst1); + fprintf(stderr, "%08x\n", ((rp->alu_end << 16) | (R300_PFS_INSTR2_0 >> 2))); + for (i=0;i<=rp->alu_end;i++) + fprintf(stderr, "%08x\n", rp->alu.inst[i].inst2); + fprintf(stderr, "%08x\n", ((rp->alu_end << 16) | (R300_PFS_INSTR3_0 >> 2))); + for (i=0;i<=rp->alu_end;i++) + fprintf(stderr, "%08x\n", rp->alu.inst[i].inst3); + fprintf(stderr, "00000000\n"); + +} +#endif // USE_ARB_F_P == 1 diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.h b/src/mesa/drivers/dri/r300/r300_fragprog.h new file mode 100644 index 00000000000..b98c6c03a77 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_fragprog.h @@ -0,0 +1,46 @@ +#ifndef __R300_FRAGPROG_H_ +#define __R300_FRAGPROG_H_ + +#include "glheader.h" +#include "macros.h" +#include "enums.h" + +#include "program.h" +#include "r300_context.h" +#include "nvfragprog.h" + +/* representation of a register for emit_arith/swizzle */ +typedef struct _pfs_reg_t { + enum { + REG_TYPE_INPUT, + REG_TYPE_OUTPUT, + REG_TYPE_TEMP, + REG_TYPE_CONST + } type:2; + GLuint index:6; + GLuint v_swz:5; + GLuint s_swz:5; + GLboolean vcross:1; + GLboolean scross:1; + GLuint negate:1; //XXX: we need to handle negate individually + GLboolean has_w:1; + GLboolean valid:1; +} pfs_reg_t; + +/* supported hw opcodes */ +#define PFS_OP_MAD 0 +#define PFS_OP_DP3 1 +#define PFS_OP_DP4 2 +#define PFS_OP_MIN 3 +#define PFS_OP_MAX 4 +#define PFS_OP_CMP 5 +#define PFS_OP_FRC 6 +#define PFS_OP_EX2 7 +#define PFS_OP_LG2 8 +#define PFS_OP_RCP 9 +#define PFS_OP_RSQ 10 +#define MAX_PFS_OP 10 +#define OP(n) PFS_OP_##n + +#endif + diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 72192324099..3d090c37108 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -823,6 +823,13 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_FPITX_DST_MASK (31 << 6) # define R300_FPITX_IMAGE_SHIFT 11 # define R300_FPITX_IMAGE_MASK (15 << 11) /* GUESS based on layout and native limits */ +/* Unsure if these are opcodes, or some kind of bitfield, but this is how + * they were set when I checked + */ +# define R300_FPITX_OPCODE_SHIFT 15 +# define R300_FPITX_OP_TEX 1 +# define R300_FPITX_OP_TXP 3 +# define R300_FPITX_OP_TXB 4 /* ALU // The ALU instructions register blocks are enumerated according to the order @@ -997,13 +1004,13 @@ I am fairly certain that they are correct unless stated otherwise in comments. # define R300_FPI2_ARG1A_MASK (31 << 7) # define R300_FPI2_ARG1A_NEG (1 << 12) # define R300_FPI2_ARG2A_SHIFT 14 -# define R300_FPI2_AEG2A_MASK (31 << 14) +# define R300_FPI2_ARG2A_MASK (31 << 14) # define R300_FPI2_ARG2A_NEG (1 << 19) # define R300_FPI2_SPECIAL_LRP (1 << 21) # define R300_FPI2_OUTA_MAD (0 << 23) # define R300_FPI2_OUTA_DP4 (1 << 23) -# define R300_RPI2_OUTA_MIN (2 << 23) -# define R300_RPI2_OUTA_MAX (3 << 23) +# define R300_FPI2_OUTA_MIN (2 << 23) +# define R300_FPI2_OUTA_MAX (3 << 23) # define R300_FPI2_OUTA_CMP (6 << 23) # define R300_FPI2_OUTA_FRC (7 << 23) # define R300_FPI2_OUTA_EX2 (8 << 23) diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c index c1f5914aad1..c3e4a041d54 100644 --- a/src/mesa/drivers/dri/r300/r300_shader.c +++ b/src/mesa/drivers/dri/r300/r300_shader.c @@ -5,6 +5,9 @@ #include "program.h" #include "r300_context.h" #include "nvvertprog.h" +#if USE_ARB_F_P == 1 +#include "r300_fragprog.h" +#endif static void r300BindProgram(GLcontext *ctx, GLenum target, struct program *prog) { @@ -13,6 +16,9 @@ static void r300BindProgram(GLcontext *ctx, GLenum target, struct program *prog) switch(target){ case GL_VERTEX_PROGRAM_ARB: +#if USE_ARB_F_P == 1 + case GL_FRAGMENT_PROGRAM_ARB: +#endif //rmesa->current_vp = vp; break; default: @@ -24,7 +30,11 @@ static void r300BindProgram(GLcontext *ctx, GLenum target, struct program *prog) static struct program *r300NewProgram(GLcontext *ctx, GLenum target, GLuint id) { struct r300_vertex_program *vp; +#if USE_ARB_F_P == 1 + struct r300_fragment_program *fp; +#else struct fragment_program *fp; +#endif struct ati_fragment_shader *afs; switch(target){ @@ -33,9 +43,14 @@ static struct program *r300NewProgram(GLcontext *ctx, GLenum target, GLuint id) return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id); case GL_FRAGMENT_PROGRAM_ARB: +#if USE_ARB_F_P == 1 + fp=CALLOC_STRUCT(r300_fragment_program); + fp->ctx = ctx; + return _mesa_init_fragment_program(ctx, &fp->mesa_program, target, id); +#else fp=CALLOC_STRUCT(fragment_program); return _mesa_init_fragment_program(ctx, fp, target, id); - +#endif case GL_FRAGMENT_PROGRAM_NV: fp=CALLOC_STRUCT(fragment_program); return _mesa_init_fragment_program(ctx, fp, target, id); @@ -64,15 +79,20 @@ void r300ProgramStringNotify(GLcontext *ctx, GLenum target, struct program *prog) { struct r300_vertex_program *vp=(void *)prog; - +#if USE_ARB_F_P == 1 + struct r300_fragment_program *fp=(void *)prog; +#endif + switch(target) { case GL_VERTEX_PROGRAM_ARB: /*vp->translated=GL_FALSE; translate_vertex_shader(vp);*/ //debug_vp(ctx, vp); break; - case GL_FRAGMENT_PROGRAM_ARB: +#if USE_ARB_F_P == 1 + fp->translated = GL_FALSE; +#endif break; } } diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index a051706557a..b143ad279be 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -58,10 +58,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_reg.h" #include "r300_program.h" #include "r300_emit.h" +#if USE_ARB_F_P == 1 +#include "r300_fragprog.h" +#else #include "r300_fixed_pipelines.h" +#include "r300_texprog.h" +#endif #include "r300_tex.h" #include "r300_maos.h" -#include "r300_texprog.h" static void r300AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref) { @@ -1094,6 +1098,99 @@ void r300_setup_textures(GLcontext *ctx) fprintf(stderr, "TX_ENABLE: %08x max_texture_unit=%d\n", r300->hw.txe.cmd[R300_TXE_ENABLE], max_texture_unit); } +#if USE_ARB_F_P == 1 +void r300_setup_rs_unit(GLcontext *ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + int i, vp_reg, fp_reg, in_texcoords; + /* I'm still unsure if these are needed */ + GLuint interp_magic[8] = { + 0x00, + 0x40, + 0x80, + 0xC0, + 0x00, + 0x00, + 0x00, + 0x00 + }; + GLuint OutputsWritten; + GLuint InputsRead; + + if(hw_tcl_on) + OutputsWritten = CURRENT_VERTEX_SHADER(ctx)->OutputsWritten; + else + OutputsWritten = r300->state.render_inputs; + + if (ctx->FragmentProgram._Current) + InputsRead = ctx->FragmentProgram._Current->InputsRead; + else { + fprintf(stderr, "No ctx->FragmentProgram._Current!!\n"); + return; /* This should only ever happen once.. */ + } + /* This needs to be rewritten - it is a hack at best */ + R300_STATECHANGE(r300, ri); + R300_STATECHANGE(r300, rc); + R300_STATECHANGE(r300, rr); + + vp_reg = fp_reg = in_texcoords = 0; + r300->hw.rr.cmd[R300_RR_ROUTE_0] = 0; + + for (i=0;iConst.MaxTextureUnits;i++) { + if (OutputsWritten & (hw_tcl_on ? (1 << (VERT_RESULT_TEX0+i)) : (_TNL_BIT_TEX0<hw.ri.cmd[R300_RI_INTERP_0+i] = 0 + | R300_RS_INTERP_USED + | (vp_reg << R300_RS_INTERP_SRC_SHIFT) + | interp_magic[i]; + + if (InputsRead & (FRAG_BIT_TEX0<state.texture.tc_count != 0); + r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] = 0 + | R300_RS_ROUTE_ENABLE + | i /* source INTERP */ + | (fp_reg << R300_RS_ROUTE_DEST_SHIFT); + + if (OutputsWritten & (hw_tcl_on ? (1 << (VERT_RESULT_TEX0+i)) : (_TNL_BIT_TEX0<hw.rr.cmd[R300_RR_ROUTE_0] |= 0 + | R300_RS_ROUTE_0_COLOR + | (fp_reg << R300_RS_ROUTE_0_COLOR_DEST_SHIFT); + InputsRead &= ~FRAG_BIT_COL0; + } + + r300->hw.rc.cmd[1] = 0 + | (in_texcoords << R300_RS_CNTL_TC_CNT_SHIFT) + | R300_RS_CNTL_0_UNKNOWN_7 + | R300_RS_CNTL_0_UNKNOWN_18; + + if (r300->state.texture.tc_count > 0) { + r300->hw.rr.cmd[R300_RR_CMD_0] = cmducs(R300_RS_ROUTE_0, fp_reg); + r300->hw.rc.cmd[2] = 0xC0 | (fp_reg-1); /* index of highest RS_ROUTE used*/ + } else { + r300->hw.rr.cmd[R300_RR_CMD_0] = cmducs(R300_RS_ROUTE_0, 1); + r300->hw.rc.cmd[2] = 0x0; + } + + if (InputsRead) + WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead); +} +#else void r300_setup_rs_unit(GLcontext *ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); @@ -1120,7 +1217,6 @@ void r300_setup_rs_unit(GLcontext *ctx) R300_STATECHANGE(r300, rc); R300_STATECHANGE(r300, rr); -#if 1 cur_reg = 0; r300->hw.rr.cmd[R300_RR_ROUTE_0] = 0; @@ -1163,48 +1259,8 @@ void r300_setup_rs_unit(GLcontext *ctx) // fprintf(stderr, "rendering with %d texture co-ordinate sets\n", cur_reg); - -#else - for(i = 1; i <= 8; ++i) - r300->hw.ri.cmd[i] = 0x00d10000; - r300->hw.ri.cmd[R300_RI_INTERP_1] |= R300_RS_INTERP_1_UNKNOWN; - r300->hw.ri.cmd[R300_RI_INTERP_2] |= R300_RS_INTERP_2_UNKNOWN; - r300->hw.ri.cmd[R300_RI_INTERP_3] |= R300_RS_INTERP_3_UNKNOWN; - -#if 1 - for(i = 2; i <= 8; ++i) - r300->hw.ri.cmd[i] |= 4; -#endif - - for(i = 1; i <= 8; ++i) - r300->hw.rr.cmd[i] = 0; - /* textures enabled ? */ - if(r300->state.texture.tc_count>0){ - - /* This code only really works with one set of texture coordinates */ - - /* The second constant is needed to get glxgears display anything .. */ - r300->hw.rc.cmd[1] = R300_RS_CNTL_0_UNKNOWN_7 - | R300_RS_CNTL_0_UNKNOWN_18 - | (r300->state.texture.tc_count<hw.rc.cmd[2] = 0xc0; - - - ((drm_r300_cmd_header_t*)r300->hw.rr.cmd)->unchecked_state.count = 1; - r300->hw.rr.cmd[R300_RR_ROUTE_0] = 0x24008; - - } else { - - /* The second constant is needed to get glxgears display anything .. */ - r300->hw.rc.cmd[1] = R300_RS_CNTL_0_UNKNOWN_7 | R300_RS_CNTL_0_UNKNOWN_18; - r300->hw.rc.cmd[2] = 0; - - ((drm_r300_cmd_header_t*)r300->hw.rr.cmd)->unchecked_state.count = 1; - r300->hw.rr.cmd[R300_RR_ROUTE_0] = 0x4000; - - } -#endif } +#endif // USE_ARB_F_P #define vpucount(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count) @@ -1477,7 +1533,109 @@ void r300SetupVertexProgram(r300ContextPtr rmesa) #endif } +/* This is probably wrong for some values, I need to test this + * some more. Range checking would be a good idea also.. + * + * But it works for most things. I'll fix it later if someone + * else with a better clue doesn't + */ +unsigned int r300PackFloat24(float f) +{ + float mantissa; + int exponent; + unsigned int float24 = 0; + + if (f == 0.0) return 0; + + mantissa = frexpf(f, &exponent); + + /* Handle -ve */ + if (mantissa < 0) { + float24 |= (1<<23); + mantissa = mantissa * -1.0; + } + /* Handle exponent, bias of 63 */ + exponent += 62; + float24 |= (exponent << 16); + /* Kill 7 LSB of mantissa */ + float24 |= (r300PackFloat32(mantissa) & 0x7FFFFF) >> 7; + + return float24; +} + +#if USE_ARB_F_P == 1 +void r300SetupPixelShader(r300ContextPtr rmesa) +{ + GLcontext *ctx = rmesa->radeon.glCtx; + struct r300_fragment_program *rp = ctx->FragmentProgram._Current; + int i,k; + if (!rp) /* should only happenen once, just after context is created */ + return; + + if (!rp->translated) { + translate_fragment_shader(ctx->FragmentProgram._Current); + if (!rp->translated) { + fprintf(stderr, "%s: No valid fragment shader, exiting\n", __func__); + exit(-1); + } + } + + R300_STATECHANGE(rmesa, fpt); + for(i=0;itex.length;i++) + rmesa->hw.fpt.cmd[R300_FPT_INSTR_0+i]=rp->tex.inst[i]; + rmesa->hw.fpt.cmd[R300_FPT_CMD_0]=cmducs(R300_PFS_TEXI_0, rp->tex.length); + +#define OUTPUT_FIELD(st, reg, field) \ + R300_STATECHANGE(rmesa, st); \ + for(i=0;i<=rp->alu_end;i++) \ + rmesa->hw.st.cmd[R300_FPI_INSTR_0+i]=rp->alu.inst[i].field;\ + rmesa->hw.st.cmd[R300_FPI_CMD_0]=cmducs(reg, rp->alu_end+1); + + OUTPUT_FIELD(fpi[0], R300_PFS_INSTR0_0, inst0); + OUTPUT_FIELD(fpi[1], R300_PFS_INSTR1_0, inst1); + OUTPUT_FIELD(fpi[2], R300_PFS_INSTR2_0, inst2); + OUTPUT_FIELD(fpi[3], R300_PFS_INSTR3_0, inst3); +#undef OUTPUT_FIELD + + R300_STATECHANGE(rmesa, fp); + /* I just want to say, the way these nodes are stored.. weird.. */ + for (i=0,k=(4-(rp->cur_node+1));i<4;i++,k++) { + if (i<(rp->cur_node+1)) { + rmesa->hw.fp.cmd[R300_FP_NODE0+k]= + (rp->node[i].alu_offset << R300_PFS_NODE_ALU_OFFSET_SHIFT) + | (rp->node[i].alu_end << R300_PFS_NODE_ALU_END_SHIFT) + | (rp->node[i].tex_offset << R300_PFS_NODE_TEX_OFFSET_SHIFT) + | (rp->node[i].tex_end << R300_PFS_NODE_TEX_END_SHIFT) + | ( (k==3) ? R300_PFS_NODE_LAST_NODE : 0); + } else { + rmesa->hw.fp.cmd[R300_FP_NODE0+(3-i)] = 0; + } + } + + /* PFS_CNTL_0 */ + rmesa->hw.fp.cmd[R300_FP_CNTL0]= + rp->cur_node + | (rp->first_node_has_tex<<3); + /* PFS_CNTL_1 */ + rmesa->hw.fp.cmd[R300_FP_CNTL1]=rp->max_temp_idx; + /* PFS_CNTL_2 */ + rmesa->hw.fp.cmd[R300_FP_CNTL2]= + (rp->alu_offset << R300_PFS_CNTL_ALU_OFFSET_SHIFT) + | (rp->alu_end << R300_PFS_CNTL_ALU_END_SHIFT) + | (rp->tex_offset << R300_PFS_CNTL_TEX_OFFSET_SHIFT) + | (rp->tex_end << R300_PFS_CNTL_TEX_END_SHIFT); + + R300_STATECHANGE(rmesa, fpp); + for(i=0;iparam_length;i++){ + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0+4*i+0]=r300PackFloat24(rp->param[i].x); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0+4*i+1]=r300PackFloat24(rp->param[i].y); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0+4*i+2]=r300PackFloat24(rp->param[i].z); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0+4*i+3]=r300PackFloat24(rp->param[i].w); + } + rmesa->hw.fpp.cmd[R300_FPP_CMD_0]=cmducs(R300_PFS_PARAM_0_X, rp->param_length*4); +} +#else /* just a skeleton for now.. */ void r300GenerateTexturePixelShader(r300ContextPtr r300) { @@ -1652,6 +1810,7 @@ int i,k; rmesa->hw.fpp.cmd[R300_FPP_CMD_0]=cmducs(R300_PFS_PARAM_0_X, rmesa->state.pixel_shader.param_length); } +#endif /** * Called by Mesa after an internal state update. @@ -2118,3 +2277,4 @@ void r300InitStateFuncs(struct dd_function_table* functions) functions->PolygonOffset = r300PolygonOffset; functions->PolygonMode = r300PolygonMode; } + diff --git a/src/mesa/drivers/dri/r300/r300_texprog.c b/src/mesa/drivers/dri/r300/r300_texprog.c index eab8dbe7ffd..71d28db0529 100644 --- a/src/mesa/drivers/dri/r300/r300_texprog.c +++ b/src/mesa/drivers/dri/r300/r300_texprog.c @@ -17,6 +17,7 @@ #include "radeon_ioctl.h" #include "radeon_state.h" #include "r300_context.h" +#if USE_ARB_F_P == 0 #include "r300_ioctl.h" #include "r300_state.h" #include "r300_reg.h" @@ -265,4 +266,5 @@ void r300GenerateTextureFragmentShader(r300ContextPtr r300) p->alu_end = ps->program.alu.length - 1; p->alu_offset = 0; } +#endif // USE_ARB_F_P == 0 -- 2.30.2