From e7d6bb9dc41433803bcaad671ea2cf3bf628e0be Mon Sep 17 00:00:00 2001 From: Aapo Tahkola Date: Tue, 1 Nov 2005 15:52:08 +0000 Subject: [PATCH] -Fix first frame -bug -Use 16-bit elts in vtxfmt_a path if possible -Optimize VSF param uploading -return in r300DepthMask looks suspicious, use r300Enable instead -Dont use r300ResetHwState in invalidate state(disabled, missing hooks, possible instabilities) --- src/mesa/drivers/dri/r300/r300_context.h | 14 +--- src/mesa/drivers/dri/r300/r300_render.c | 10 ++- src/mesa/drivers/dri/r300/r300_state.c | 72 ++++++++++--------- src/mesa/drivers/dri/r300/r300_state.h | 1 + src/mesa/drivers/dri/r300/r300_vertexprog.c | 16 ++--- src/mesa/drivers/dri/r300/radeon_vtxfmt_a.c | 77 +++++++++++++-------- 6 files changed, 109 insertions(+), 81 deletions(-) diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 0ece8f8d1f4..c03dd03daf5 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -826,6 +826,7 @@ struct r300_context { */ struct r300_dma dma; GLboolean save_on_next_unlock; + GLuint NewGLState; /* Texture object bookkeeping */ @@ -839,17 +840,6 @@ struct r300_context { */ GLuint prefer_gart_client_texturing; - /* TCL stuff - */ - GLmatrix TexGenMatrix[R300_MAX_TEXTURE_UNITS]; - GLboolean recheck_texgen[R300_MAX_TEXTURE_UNITS]; - GLboolean TexGenNeedNormals[R300_MAX_TEXTURE_UNITS]; - GLuint TexMatEnabled; - GLuint TexMatCompSel; - GLuint TexGenEnabled; - GLuint TexGenInputs; - GLuint TexGenCompSel; - GLmatrix tmpmat; #ifdef USER_BUFFERS key_t mm_ipc_key; int mm_shm_id; @@ -885,7 +875,7 @@ extern GLboolean r300CreateContext(const __GLcontextModes * glVisual, void translate_vertex_shader(struct r300_vertex_program *vp); extern void r300InitShaderFuncs(struct dd_function_table *functions); -extern void r300VertexProgUpdateParams(GLcontext *ctx, struct r300_vertex_program *vp); +extern int r300VertexProgUpdateParams(GLcontext *ctx, struct r300_vertex_program *vp, float *dst); #ifdef RADEON_VTXFMT_A extern void radeon_init_vtxfmt_a(r300ContextPtr rmesa); diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index 1599f0f6af4..98ddd821067 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -543,10 +543,13 @@ static GLboolean r300_run_vb_render(GLcontext *ctx, if (RADEON_DEBUG & DEBUG_PRIMS) fprintf(stderr, "%s\n", __FUNCTION__); - + + r300UpdateShaders(rmesa); + r300ReleaseArrays(ctx); r300EmitArrays(ctx, GL_FALSE); + r300UpdateShaderStates(rmesa); // LOCK_HARDWARE(&(rmesa->radeon)); reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); @@ -692,6 +695,8 @@ void dump_dt(struct dt *dt, int count) if (rmesa->state.VB.LockCount == 0) { r300ReleaseArrays(ctx); r300EmitArraysVtx(ctx, GL_FALSE); + + r300UpdateShaderStates(rmesa); } else { /* TODO: Figure out why do we need these. */ R300_STATECHANGE(rmesa, vir[0]); @@ -891,7 +896,8 @@ static GLboolean r300_run_tcl_render(GLcontext *ctx, if(hw_tcl_on == GL_FALSE) return GL_TRUE; - r300UpdateShaderStates(rmesa); + //r300UpdateShaders(rmesa); + //r300UpdateShaderStates(rmesa); return r300_run_vb_render(ctx, stage); } diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index e3aa74b1023..343acc426a1 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -621,14 +621,7 @@ static void r300DepthFunc(GLcontext* ctx, GLenum func) static void r300DepthMask(GLcontext* ctx, GLboolean mask) { r300ContextPtr r300 = R300_CONTEXT(ctx); - - if (!ctx->Depth.Test) - return; - - R300_STATECHANGE(r300, zs); - r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_RB3D_STENCIL_ENABLE; - r300->hw.zs.cmd[R300_ZS_CNTL_0] |= mask - ? R300_RB3D_Z_TEST_AND_WRITE : R300_RB3D_Z_TEST; + r300Enable(ctx, GL_DEPTH_TEST, ctx->Depth.Test); } @@ -1054,7 +1047,7 @@ void r300_setup_textures(GLcontext *ctx) if(t == NULL){ fprintf(stderr, "Texture unit %d enabled, but corresponding texobj is NULL, using default object.\n", i); - //exit(-1); + exit(-1); t=&default_tex_obj; } @@ -1506,24 +1499,24 @@ void r300SetupVertexProgram(r300ContextPtr rmesa) struct r300_vertex_program *prog=(struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); - /* Reset state, in case we don't use something */ ((drm_r300_cmd_header_t*)rmesa->hw.vpp.cmd)->vpu.count = 0; + R300_STATECHANGE(rmesa, vpp); + param_count = r300VertexProgUpdateParams(ctx, prog, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]); + bump_vpu_count(rmesa->hw.vpp.cmd, param_count); + param_count /= 4; + + /* Reset state, in case we don't use something */ ((drm_r300_cmd_header_t*)rmesa->hw.vpi.cmd)->vpu.count = 0; ((drm_r300_cmd_header_t*)rmesa->hw.vps.cmd)->vpu.count = 0; - r300VertexProgUpdateParams(ctx, prog); - setup_vertex_shader_fragment(rmesa, VSF_DEST_PROGRAM, &(prog->program)); - setup_vertex_shader_fragment(rmesa, VSF_DEST_MATRIX0, &(prog->params)); - #if 0 setup_vertex_shader_fragment(rmesa, VSF_DEST_UNKNOWN1, &(rmesa->state.vertex_shader.unknown1)); setup_vertex_shader_fragment(rmesa, VSF_DEST_UNKNOWN2, &(rmesa->state.vertex_shader.unknown2)); #endif inst_count=prog->program.length/4 - 1; - param_count=prog->params.length/4; R300_STATECHANGE(rmesa, pvs); rmesa->hw.pvs.cmd[R300_PVS_CNTL_1]=(0 << R300_PVS_CNTL_1_PROGRAM_START_SHIFT) @@ -1545,28 +1538,42 @@ void r300SetupVertexProgram(r300ContextPtr rmesa) extern void _tnl_UpdateFixedFunctionProgram( GLcontext *ctx ); extern int future_hw_tcl_on; -void r300UpdateShaderStates(r300ContextPtr rmesa) +void r300UpdateShaders(r300ContextPtr rmesa) { GLcontext *ctx; struct r300_vertex_program *vp; - ctx = rmesa->radeon.glCtx; + ctx = rmesa->radeon.glCtx; - if(ctx->VertexProgram._Enabled == GL_FALSE){ - _tnl_UpdateFixedFunctionProgram(ctx); - } - vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); - if(vp->translated == GL_FALSE) - translate_vertex_shader(vp); - if(vp->translated == GL_FALSE){ - fprintf(stderr, "Failing back to sw-tcl\n"); - debug_vp(ctx, &vp->mesa_program); - hw_tcl_on=future_hw_tcl_on=0; - r300ResetHwState(rmesa); - - return ; + if (rmesa->NewGLState && hw_tcl_on) { + rmesa->NewGLState = 0; + if (ctx->VertexProgram._Enabled == GL_FALSE) + _tnl_UpdateFixedFunctionProgram(ctx); + + vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); + if (vp->translated == GL_FALSE) + translate_vertex_shader(vp); + if (vp->translated == GL_FALSE) { + fprintf(stderr, "Failing back to sw-tcl\n"); + debug_vp(ctx, &vp->mesa_program); + hw_tcl_on = future_hw_tcl_on = 0; + r300ResetHwState(rmesa); + + return ; + } } - + +} + +void r300UpdateShaderStates(r300ContextPtr rmesa) +{ + GLcontext *ctx; + ctx = rmesa->radeon.glCtx; + +#ifdef CB_DPATH + r300UpdateTextureState(ctx); +#endif + r300_setup_textures(ctx); r300_setup_rs_unit(ctx); @@ -1865,12 +1872,15 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state) _tnl_InvalidateState(ctx, new_state); _ae_invalidate_state(ctx, new_state); +#ifndef CB_DPATH /* Go inefficiency! */ r300ResetHwState(r300); +#endif #ifdef HW_VBOS if(new_state & _NEW_ARRAY) r300->state.VB.lock_uptodate = GL_FALSE; #endif + r300->NewGLState |= new_state; } /** diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h index ee54e4b3eff..9564c0716cb 100644 --- a/src/mesa/drivers/dri/r300/r300_state.h +++ b/src/mesa/drivers/dri/r300/r300_state.h @@ -68,6 +68,7 @@ extern void r300SetupPixelShader(r300ContextPtr rmesa); extern void r300_setup_textures(GLcontext *ctx); extern void r300_setup_rs_unit(GLcontext *ctx); +extern void r300UpdateShaders(r300ContextPtr rmesa); extern void r300UpdateShaderStates(r300ContextPtr rmesa); extern void r300_print_state_atom(r300ContextPtr r300, struct r300_state_atom *state); diff --git a/src/mesa/drivers/dri/r300/r300_vertexprog.c b/src/mesa/drivers/dri/r300/r300_vertexprog.c index 7526bb86e61..9bf92cde55a 100644 --- a/src/mesa/drivers/dri/r300/r300_vertexprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertexprog.c @@ -242,11 +242,11 @@ void debug_vp(GLcontext *ctx, struct vertex_program *vp) } -void r300VertexProgUpdateParams(GLcontext *ctx, struct r300_vertex_program *vp) +int r300VertexProgUpdateParams(GLcontext *ctx, struct r300_vertex_program *vp, float *dst) { int pi; struct vertex_program *mesa_vp=(void *)vp; - int dst_index; + float *dst_o=dst; _mesa_load_state_parameters(ctx, mesa_vp->Parameters); @@ -255,7 +255,7 @@ void r300VertexProgUpdateParams(GLcontext *ctx, struct r300_vertex_program *vp) fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__); exit(-1); } - dst_index=0; + for(pi=0; pi < mesa_vp->Parameters->NumParameters; pi++){ switch(mesa_vp->Parameters->Parameters[pi].Type){ @@ -263,10 +263,10 @@ void r300VertexProgUpdateParams(GLcontext *ctx, struct r300_vertex_program *vp) case NAMED_PARAMETER: //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name); case CONSTANT: - vp->params.body.f[dst_index++]=mesa_vp->Parameters->ParameterValues[pi][0]; - vp->params.body.f[dst_index++]=mesa_vp->Parameters->ParameterValues[pi][1]; - vp->params.body.f[dst_index++]=mesa_vp->Parameters->ParameterValues[pi][2]; - vp->params.body.f[dst_index++]=mesa_vp->Parameters->ParameterValues[pi][3]; + *dst++=mesa_vp->Parameters->ParameterValues[pi][0]; + *dst++=mesa_vp->Parameters->ParameterValues[pi][1]; + *dst++=mesa_vp->Parameters->ParameterValues[pi][2]; + *dst++=mesa_vp->Parameters->ParameterValues[pi][3]; break; default: _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__); @@ -274,7 +274,7 @@ void r300VertexProgUpdateParams(GLcontext *ctx, struct r300_vertex_program *vp) } - vp->params.length=dst_index; + return dst - dst_o; } static unsigned long t_dst_mask(GLuint mask) diff --git a/src/mesa/drivers/dri/r300/radeon_vtxfmt_a.c b/src/mesa/drivers/dri/r300/radeon_vtxfmt_a.c index b6292bb00e1..34fe0952fe3 100644 --- a/src/mesa/drivers/dri/r300/radeon_vtxfmt_a.c +++ b/src/mesa/drivers/dri/r300/radeon_vtxfmt_a.c @@ -136,12 +136,6 @@ void radeonDrawElements( GLenum mode, GLsizei count, GLenum type, const GLvoid * switch (type) { case GL_UNSIGNED_BYTE: - elt_size = 2; - - r300AllocDmaRegion(rmesa, &rvb, count * elt_size, elt_size); - rvb.aos_offset = GET_START(&rvb); - ptr = rvb.address + rvb.start; - for (i=0; i < count; i++) { if(((unsigned char *)indices)[i] < min) min = ((unsigned char *)indices)[i]; @@ -149,17 +143,17 @@ void radeonDrawElements( GLenum mode, GLsizei count, GLenum type, const GLvoid * max = ((unsigned char *)indices)[i]; } - for (i=0; i < count; i++) - ((unsigned short int *)ptr)[i] = ((unsigned char *)indices)[i] - min; - break; - - case GL_UNSIGNED_SHORT: elt_size = 2; r300AllocDmaRegion(rmesa, &rvb, count * elt_size, elt_size); rvb.aos_offset = GET_START(&rvb); ptr = rvb.address + rvb.start; + + for (i=0; i < count; i++) + ((unsigned short int *)ptr)[i] = ((unsigned char *)indices)[i] - min; + break; + case GL_UNSIGNED_SHORT: for (i=0; i < count; i++) { if(((unsigned short int *)indices)[i] < min) min = ((unsigned short int *)indices)[i]; @@ -167,17 +161,17 @@ void radeonDrawElements( GLenum mode, GLsizei count, GLenum type, const GLvoid * max = ((unsigned short int *)indices)[i]; } - for (i=0; i < count; i++) - ((unsigned short int *)ptr)[i] = ((unsigned short int *)indices)[i] - min; - break; - - case GL_UNSIGNED_INT: - elt_size = 4; + elt_size = 2; r300AllocDmaRegion(rmesa, &rvb, count * elt_size, elt_size); rvb.aos_offset = GET_START(&rvb); ptr = rvb.address + rvb.start; + for (i=0; i < count; i++) + ((unsigned short int *)ptr)[i] = ((unsigned short int *)indices)[i] - min; + break; + + case GL_UNSIGNED_INT: for (i=0; i < count; i++) { if(((unsigned int *)indices)[i] < min) min = ((unsigned int *)indices)[i]; @@ -185,8 +179,22 @@ void radeonDrawElements( GLenum mode, GLsizei count, GLenum type, const GLvoid * max = ((unsigned int *)indices)[i]; } - for (i=0; i < count; i++) - ((unsigned int *)ptr)[i] = ((unsigned int *)indices)[i] - min; + if (max - min <= 65535) + elt_size = 2; + else + elt_size = 4; + + r300AllocDmaRegion(rmesa, &rvb, count * elt_size, elt_size); + rvb.aos_offset = GET_START(&rvb); + ptr = rvb.address + rvb.start; + + + if (max - min <= 65535) + for (i=0; i < count; i++) + ((unsigned short int *)ptr)[i] = ((unsigned int *)indices)[i] - min; + else + for (i=0; i < count; i++) + ((unsigned int *)ptr)[i] = ((unsigned int *)indices)[i] - min; break; default: @@ -199,7 +207,7 @@ void radeonDrawElements( GLenum mode, GLsizei count, GLenum type, const GLvoid * if (ctx->NewState) _mesa_update_state( ctx ); - r300UpdateShaderStates(rmesa); + r300UpdateShaders(rmesa); if (rmesa->state.VB.LockCount) { if (rmesa->state.VB.lock_uptodate == GL_FALSE) { @@ -230,6 +238,8 @@ void radeonDrawElements( GLenum mode, GLsizei count, GLenum type, const GLvoid * rmesa->state.VB.Count = max - min + 1; } + r300UpdateShaderStates(rmesa); + rmesa->state.VB.Primitive = &prim; rmesa->state.VB.PrimitiveCount = 1; @@ -256,7 +266,7 @@ void radeonDrawRangeElements(GLenum mode, GLuint min, GLuint max, GLsizei count, struct tnl_prim prim; int elt_size; int i; - static void *ptr = NULL; + void *ptr = NULL; static struct r300_dma_region rvb; if (ctx->Array.ElementArrayBufferObj->Name) { @@ -275,7 +285,7 @@ void radeonDrawRangeElements(GLenum mode, GLuint min, GLuint max, GLsizei count, FLUSH_CURRENT( ctx, 0 ); #ifdef OPTIMIZE_ELTS - start = 0; + min = 0; #endif r300ReleaseDmaRegion(rmesa, &rvb, __FUNCTION__); @@ -295,7 +305,7 @@ void radeonDrawRangeElements(GLenum mode, GLuint min, GLuint max, GLsizei count, elt_size = 2; #ifdef OPTIMIZE_ELTS - if (start == 0 && ctx->Array.ElementArrayBufferObj->Name){ + if (min == 0 && ctx->Array.ElementArrayBufferObj->Name){ ptr = indices; break; } @@ -309,14 +319,21 @@ void radeonDrawRangeElements(GLenum mode, GLuint min, GLuint max, GLsizei count, break; case GL_UNSIGNED_INT: - elt_size = 4; + if (max - min <= 65535) + elt_size = 2; + else + elt_size = 4; r300AllocDmaRegion(rmesa, &rvb, count * elt_size, elt_size); rvb.aos_offset = GET_START(&rvb); ptr = rvb.address + rvb.start; - for(i=0; i < count; i++) - ((unsigned int *)ptr)[i] = ((unsigned int *)indices)[i] - min; + if (max - min <= 65535) + for (i=0; i < count; i++) + ((unsigned short int *)ptr)[i] = ((unsigned int *)indices)[i] - min; + else + for (i=0; i < count; i++) + ((unsigned int *)ptr)[i] = ((unsigned int *)indices)[i] - min; break; default: @@ -330,7 +347,7 @@ void radeonDrawRangeElements(GLenum mode, GLuint min, GLuint max, GLsizei count, if (ctx->NewState) _mesa_update_state( ctx ); - r300UpdateShaderStates(rmesa); + r300UpdateShaders(rmesa); if (rmesa->state.VB.LockCount) { if (rmesa->state.VB.lock_uptodate == GL_FALSE) { @@ -361,6 +378,8 @@ void radeonDrawRangeElements(GLenum mode, GLuint min, GLuint max, GLsizei count, rmesa->state.VB.Count = max - min + 1; } + r300UpdateShaderStates(rmesa); + rmesa->state.VB.Primitive = &prim; rmesa->state.VB.PrimitiveCount = 1; @@ -398,7 +417,7 @@ void radeonDrawArrays( GLenum mode, GLint start, GLsizei count ) /* XXX: setup_arrays before state update? */ - r300UpdateShaderStates(rmesa); + r300UpdateShaders(rmesa); if (rmesa->state.VB.LockCount) { if (rmesa->state.VB.lock_uptodate == GL_FALSE) { @@ -429,6 +448,8 @@ void radeonDrawArrays( GLenum mode, GLint start, GLsizei count ) rmesa->state.VB.Count = count; } + r300UpdateShaderStates(rmesa); + rmesa->state.VB.Primitive = &prim; rmesa->state.VB.PrimitiveCount = 1; -- 2.30.2