From: Corbin Simpson Date: Wed, 7 May 2008 05:18:28 +0000 (-0700) Subject: r5xx: Index inputs and temps. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=dc24fb51a31de8443e653655105d4e1c88847bcc;p=mesa.git r5xx: Index inputs and temps. This is not the same as r3xx indexing. It only tries to protect inputs on the pixel stack from getting clobbered by temps or texs. Texs don't need special treatment since they read from special input regs and write to the same temp regs as ALU/FC instructions. --- diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 980a26ffdd8..815a729969a 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -807,10 +807,7 @@ struct r500_fragment_program { int cur_node; int first_node_has_tex; - int alu_offset; - int alu_end; - int tex_offset; - int tex_end; + int temp_reg_offset; /* Hardware constants. * Contains a pointer to the value. The destination of the pointer diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 15bc5798e0b..c753c2b6f7a 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -65,6 +65,9 @@ #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs +#define R500_US_NUM_TEMP_REGS 128 +#define R500_US_NUM_CONST_REGS 256 + /* "Register" flags */ #define REG_CONSTANT (1 << 8) #define REG_SRC_REL (1 << 9) @@ -121,6 +124,30 @@ static inline GLuint make_strq_swizzle(struct prog_src_register src) { return swiz; } +static int get_temp(struct r500_fragment_program *fp, int slot) { + + COMPILE_STATE; + + int r = slot + fp->temp_reg_offset; + + while (cs->inputs[r].refcount != 0) { + /* Crap, taken. */ + r++; + } + + fp->temp_reg_offset = r - slot; + + if (r >= R500_US_NUM_TEMP_REGS) { + ERROR("Out of hardware temps!\n"); + return 0; + } + + if (r > fp->max_temp_idx) + fp->max_temp_idx = r; + + return r; +} + /* Borrowed verbatim from r300_fragprog since it hasn't changed. */ static GLuint emit_const4fv(struct r500_fragment_program *fp, const GLfloat * cp) @@ -134,8 +161,7 @@ static GLuint emit_const4fv(struct r500_fragment_program *fp, } if (index >= fp->const_nr) { - /* TODO: This should be r5xx nums, not r300 */ - if (index >= PFS_NUM_CONST_REGS) { + if (index >= R500_US_NUM_CONST_REGS) { ERROR("Out of hw constants!\n"); return reg; } @@ -152,15 +178,12 @@ static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_registe GLuint reg; switch (src.File) { case PROGRAM_TEMPORARY: - // reg = (src.Index << 0x1) | 0x1; - reg = src.Index; - if (src.Index > fp->max_temp_idx) - fp->max_temp_idx = src.Index; + reg = get_temp(fp, src.Index); break; case PROGRAM_INPUT: /* Ugly hack needed to work around Mesa; * fragments don't get loaded right otherwise! */ - reg = 0x0; + reg = src.Index; break; case PROGRAM_STATE_VAR: case PROGRAM_NAMED_PARAM: @@ -180,10 +203,7 @@ static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_regist GLuint reg; switch (dest.File) { case PROGRAM_TEMPORARY: - // reg = (dest.Index << 0x1) | 0x1; - reg = dest.Index; - if (dest.Index > fp->max_temp_idx) - fp->max_temp_idx = dest.Index; + reg = get_temp(fp, dest.Index); break; case PROGRAM_OUTPUT: /* Eventually we may need to handle multiple @@ -323,7 +343,7 @@ static void emit_mov(struct r500_fragment_program *fp, int counter, struct prog_ /* The r3xx shader uses MAD to implement MOV. We are using CMP, since * it is technically more accurate and recommended by ATI/AMD. */ GLuint src_reg = make_src(fp, src); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU; + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT; fp->inst[counter].inst1 = R500_RGB_ADDR0(src_reg); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 @@ -511,7 +531,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) src[1] = make_src(fp, fpi->SrcReg[1]); /* Variation on MAD: src0*src1+0 */ fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | mask; + | R500_INST_TEX_SEM_WAIT | mask; fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) @@ -639,7 +659,10 @@ static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) fp->cur_node = 0; fp->first_node_has_tex = 0; fp->const_nr = 0; - fp->max_temp_idx = 0; + /* Size of pixel stack, plus 1. */ + fp->max_temp_idx = 1; + /* Temp register offset. */ + fp->temp_reg_offset = 0; fp->node[0].alu_end = -1; fp->node[0].tex_end = -1; @@ -659,49 +682,6 @@ static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) * starting from register 0. */ -#if 0 - /* Texcoords come first */ - for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) { - if (InputsRead & (FRAG_BIT_TEX0 << i)) { - cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0; - cs->inputs[FRAG_ATTRIB_TEX0 + i].reg = - get_hw_temp(fp, 0); - } - } - InputsRead &= ~FRAG_BITS_TEX_ANY; - - /* fragment position treated as a texcoord */ - if (InputsRead & FRAG_BIT_WPOS) { - cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0; - cs->inputs[FRAG_ATTRIB_WPOS].reg = get_hw_temp(fp, 0); - insert_wpos(&mp->Base); - } - InputsRead &= ~FRAG_BIT_WPOS; - - /* Then primary colour */ - if (InputsRead & FRAG_BIT_COL0) { - cs->inputs[FRAG_ATTRIB_COL0].refcount = 0; - cs->inputs[FRAG_ATTRIB_COL0].reg = get_hw_temp(fp, 0); - } - InputsRead &= ~FRAG_BIT_COL0; - - /* Secondary color */ - if (InputsRead & FRAG_BIT_COL1) { - cs->inputs[FRAG_ATTRIB_COL1].refcount = 0; - cs->inputs[FRAG_ATTRIB_COL1].reg = get_hw_temp(fp, 0); - } - InputsRead &= ~FRAG_BIT_COL1; - - /* Anything else */ - if (InputsRead) { - WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead); - /* force read from hwreg 0 for now */ - for (i = 0; i < 32; i++) - if (InputsRead & (1 << i)) - cs->inputs[i].reg = 0; - } -#endif - /* Pre-parse the mesa program, grabbing refcounts on input/temp regs. * That way, we can free up the reg when it's no longer needed */ @@ -712,35 +692,14 @@ static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { int idx; - for (i = 0; i < 3; i++) { idx = fpi->SrcReg[i].Index; - switch (fpi->SrcReg[i].File) { - case PROGRAM_TEMPORARY: - if (!(temps_used & (1 << idx))) { - cs->temps[idx].reg = -1; - cs->temps[idx].refcount = 1; - temps_used |= (1 << idx); - } else - cs->temps[idx].refcount++; - break; - case PROGRAM_INPUT: + if (fpi->SrcReg[i].File == PROGRAM_INPUT) { cs->inputs[idx].refcount++; - break; - default: - break; + if (fp->max_temp_idx < idx) + fp->max_temp_idx = idx; } } - - idx = fpi->DstReg.Index; - if (fpi->DstReg.File == PROGRAM_TEMPORARY) { - if (!(temps_used & (1 << idx))) { - cs->temps[idx].reg = -1; - cs->temps[idx].refcount = 1; - temps_used |= (1 << idx); - } else - cs->temps[idx].refcount++; - } } cs->temp_in_use = temps_used; } @@ -777,16 +736,6 @@ void r500TranslateFragmentShader(r300ContextPtr r300, return; } - /* Finish off */ - fp->node[fp->cur_node].alu_end = - cs->nrslots - fp->node[fp->cur_node].alu_offset - 1; - if (fp->node[fp->cur_node].tex_end < 0) - fp->node[fp->cur_node].tex_end = 0; - fp->alu_offset = 0; - fp->alu_end = cs->nrslots - 1; - //assert(fp->node[fp->cur_node].alu_end >= 0); - //assert(fp->alu_end >= 0); - fp->translated = GL_TRUE; r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM); }