X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fradeon%2Fradeon_tcl.c;h=3e2f4261600ee97777327e32fdeb8093f9cd0a0f;hb=ed65e6ef49e17e9cae93a8f98e2968346de2bc6e;hp=5887ab355d244a3614ed5174709ba6a7a3968e80;hpb=407e8ae5b167b0193e1e5b1266a5d61ed836dfb5;p=mesa.git diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c index 5887ab355d2..3e2f4261600 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tcl.c +++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c @@ -1,7 +1,7 @@ /************************************************************************** Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and - Tungsten Graphics Inc., Austin, Texas. + VMware, Inc. All Rights Reserved. @@ -29,14 +29,15 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. /* * Authors: - * Keith Whitwell + * Keith Whitwell */ #include "main/glheader.h" #include "main/imports.h" -#include "main/light.h" #include "main/mtypes.h" +#include "main/light.h" #include "main/enums.h" +#include "main/state.h" #include "vbo/vbo.h" #include "tnl/tnl.h" @@ -46,10 +47,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_context.h" #include "radeon_state.h" #include "radeon_ioctl.h" -#include "radeon_tex.h" #include "radeon_tcl.h" #include "radeon_swtcl.h" #include "radeon_maos.h" +#include "radeon_common_context.h" @@ -64,7 +65,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define HAVE_LINE_STRIPS 1 #define HAVE_TRIANGLES 1 #define HAVE_TRI_STRIPS 1 -#define HAVE_TRI_STRIP_1 0 #define HAVE_TRI_FANS 1 #define HAVE_QUADS 0 #define HAVE_QUAD_STRIPS 0 @@ -147,13 +147,10 @@ static GLboolean discrete_prim[0x10] = { static GLushort *radeonAllocElts( r100ContextPtr rmesa, GLuint nr ) { if (rmesa->radeon.dma.flush) - rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); - - rcommonEnsureCmdBufSpace(&rmesa->radeon, rmesa->radeon.hw.max_state_size + ELTS_BUFSZ(nr) + - AOS_BUFSZ(rmesa->tcl.nr_aos_components), __FUNCTION__); + rmesa->radeon.dma.flush( &rmesa->radeon.glCtx ); radeonEmitAOS( rmesa, - rmesa->tcl.nr_aos_components, 0 ); + rmesa->radeon.tcl.aos_count, 0 ); return radeonAllocEltsOpenEnded( rmesa, rmesa->tcl.vertex_format, rmesa->tcl.hw_primitive, nr ); @@ -167,7 +164,7 @@ static GLushort *radeonAllocElts( r100ContextPtr rmesa, GLuint nr ) * discrete and there are no intervening state changes. (Somewhat * duplicates changes to DrawArrays code) */ -static void radeonEmitPrim( GLcontext *ctx, +static void radeonEmitPrim( struct gl_context *ctx, GLenum prim, GLuint hwprim, GLuint start, @@ -176,12 +173,8 @@ static void radeonEmitPrim( GLcontext *ctx, r100ContextPtr rmesa = R100_CONTEXT( ctx ); radeonTclPrimitive( ctx, prim, hwprim ); - rcommonEnsureCmdBufSpace( &rmesa->radeon, - AOS_BUFSZ(rmesa->tcl.nr_aos_components) + - rmesa->radeon.hw.max_state_size + VBUF_BUFSZ, __FUNCTION__ ); - radeonEmitAOS( rmesa, - rmesa->tcl.nr_aos_components, + rmesa->radeon.tcl.aos_count, start ); /* Why couldn't this packet have taken an offset param? @@ -196,6 +189,8 @@ static void radeonEmitPrim( GLcontext *ctx, radeonEmitPrim( ctx, prim, hwprim, start, count ); \ (void) rmesa; } while (0) +#define MAX_CONVERSION_SIZE 40 + /* Try & join small primitives */ #if 0 @@ -212,8 +207,8 @@ static void radeonEmitPrim( GLcontext *ctx, #ifdef MESA_BIG_ENDIAN /* We could do without (most of) this ugliness if dest was always 32 bit word aligned... */ #define EMIT_ELT(dest, offset, x) do { \ - int off = offset + ( ( (GLuint)dest & 0x2 ) >> 1 ); \ - GLushort *des = (GLushort *)( (GLuint)dest & ~0x2 ); \ + int off = offset + ( ( (uintptr_t)dest & 0x2 ) >> 1 ); \ + GLushort *des = (GLushort *)( (uintptr_t)dest & ~0x2 ); \ (des)[ off + 1 - 2 * ( off & 1 ) ] = (GLushort)(x); \ (void)rmesa; } while (0) #else @@ -233,7 +228,7 @@ static void radeonEmitPrim( GLcontext *ctx, /* External entrypoints */ /**********************************************************************/ -void radeonEmitPrimitive( GLcontext *ctx, +void radeonEmitPrimitive( struct gl_context *ctx, GLuint first, GLuint last, GLuint flags ) @@ -241,7 +236,7 @@ void radeonEmitPrimitive( GLcontext *ctx, tcl_render_tab_verts[flags&PRIM_MODE_MASK]( ctx, first, last, flags ); } -void radeonEmitEltPrimitive( GLcontext *ctx, +void radeonEmitEltPrimitive( struct gl_context *ctx, GLuint first, GLuint last, GLuint flags ) @@ -249,7 +244,7 @@ void radeonEmitEltPrimitive( GLcontext *ctx, tcl_render_tab_elts[flags&PRIM_MODE_MASK]( ctx, first, last, flags ); } -void radeonTclPrimitive( GLcontext *ctx, +void radeonTclPrimitive( struct gl_context *ctx, GLenum prim, int hw_prim ) { @@ -257,6 +252,10 @@ void radeonTclPrimitive( GLcontext *ctx, GLuint se_cntl; GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE; + radeon_prepare_render(&rmesa->radeon); + if (rmesa->radeon.NewGLState) + radeonValidateState( ctx ); + if (newprim != rmesa->tcl.hw_primitive || !discrete_prim[hw_prim&0xf]) { RADEON_NEWPRIM( rmesa ); @@ -266,7 +265,7 @@ void radeonTclPrimitive( GLcontext *ctx, se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL]; se_cntl &= ~RADEON_FLAT_SHADE_VTX_LAST; - if (prim == GL_POLYGON && (ctx->_TriangleCaps & DD_FLATSHADE)) + if (prim == GL_POLYGON && ctx->Light.ShadeModel == GL_FLAT) se_cntl |= RADEON_FLAT_SHADE_VTX_0; else se_cntl |= RADEON_FLAT_SHADE_VTX_LAST; @@ -277,87 +276,72 @@ void radeonTclPrimitive( GLcontext *ctx, } } -/**********************************************************************/ -/* Fog blend factor computation for hw tcl */ -/* same calculation used as in t_vb_fog.c */ -/**********************************************************************/ - -#define FOG_EXP_TABLE_SIZE 256 -#define FOG_MAX (10.0) -#define EXP_FOG_MAX .0006595 -#define FOG_INCR (FOG_MAX/FOG_EXP_TABLE_SIZE) -static GLfloat exp_table[FOG_EXP_TABLE_SIZE]; - -#if 1 -#define NEG_EXP( result, narg ) \ -do { \ - GLfloat f = (GLfloat) (narg * (1.0/FOG_INCR)); \ - GLint k = (GLint) f; \ - if (k > FOG_EXP_TABLE_SIZE-2) \ - result = (GLfloat) EXP_FOG_MAX; \ - else \ - result = exp_table[k] + (f-k)*(exp_table[k+1]-exp_table[k]); \ -} while (0) -#else -#define NEG_EXP( result, narg ) \ -do { \ - result = exp(-narg); \ -} while (0) -#endif - - -/** - * Initialize the exp_table[] lookup table for approximating exp(). - */ -void -radeonInitStaticFogData( void ) -{ - GLfloat f = 0.0F; - GLint i = 0; - for ( ; i < FOG_EXP_TABLE_SIZE ; i++, f += FOG_INCR) { - exp_table[i] = (GLfloat) exp(-f); - } -} - - /** - * Compute per-vertex fog blend factors from fog coordinates by - * evaluating the GL_LINEAR, GL_EXP or GL_EXP2 fog function. - * Fog coordinates are distances from the eye (typically between the - * near and far clip plane distances). - * Note the fog (eye Z) coords may be negative so we use ABS(z) below. - * Fog blend factors are in the range [0,1]. + * Predict total emit size for next rendering operation so there is no flush in middle of rendering + * Prediction has to aim towards the best possible value that is worse than worst case scenario */ -float -radeonComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord ) +static GLuint radeonEnsureEmitSize( struct gl_context * ctx , GLuint inputs ) { - GLfloat end = ctx->Fog.End; - GLfloat d, temp; - const GLfloat z = FABSF(fogcoord); - - switch (ctx->Fog.Mode) { - case GL_LINEAR: - if (ctx->Fog.Start == ctx->Fog.End) - d = 1.0F; + r100ContextPtr rmesa = R100_CONTEXT(ctx); + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + GLuint space_required; + GLuint state_size; + GLuint nr_aos = 1; /* radeonEmitArrays does always emit one */ + int i; + /* list of flags that are allocating aos object */ + const GLuint flags_to_check[] = { + VERT_BIT_NORMAL, + VERT_BIT_COLOR0, + VERT_BIT_COLOR1, + VERT_BIT_FOG + }; + /* predict number of aos to emit */ + for (i=0; i < sizeof(flags_to_check)/sizeof(flags_to_check[0]); ++i) + { + if (inputs & flags_to_check[i]) + ++nr_aos; + } + for (i = 0; i < ctx->Const.MaxTextureUnits; ++i) + { + if (inputs & VERT_BIT_TEX(i)) + ++nr_aos; + } + + { + /* count the prediction for state size */ + space_required = 0; + state_size = radeonCountStateEmitSize( &rmesa->radeon ); + /* tcl may be changed in radeonEmitArrays so account for it if not dirty */ + if (!rmesa->hw.tcl.dirty) + state_size += rmesa->hw.tcl.check( &rmesa->radeon.glCtx, &rmesa->hw.tcl ); + /* predict size for elements */ + for (i = 0; i < VB->PrimitiveCount; ++i) + { + /* If primitive.count is less than MAX_CONVERSION_SIZE + rendering code may decide convert to elts. + In that case we have to make pessimistic prediction. + and use larger of 2 paths. */ + const GLuint elts = ELTS_BUFSZ(nr_aos); + const GLuint index = INDEX_BUFSZ; + const GLuint vbuf = VBUF_BUFSZ; + if (!VB->Primitive[i].count) + continue; + if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE) + || vbuf > index + elts) + space_required += vbuf; else - d = 1.0F / (ctx->Fog.End - ctx->Fog.Start); - temp = (end - z) * d; - return CLAMP(temp, 0.0F, 1.0F); - break; - case GL_EXP: - d = ctx->Fog.Density; - NEG_EXP( temp, d * z ); - return temp; - break; - case GL_EXP2: - d = ctx->Fog.Density*ctx->Fog.Density; - NEG_EXP( temp, d * z * z ); - return temp; - break; - default: - _mesa_problem(ctx, "Bad fog mode in make_fog_coord"); - return 0; - } + space_required += index + elts; + space_required += VB->Primitive[i].count * 3; + space_required += AOS_BUFSZ(nr_aos); + } + space_required += SCISSOR_BUFSZ; + } + /* flush the buffer in case we need more than is left. */ + if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required, __func__)) + return space_required + radeonCountStateEmitSize( &rmesa->radeon ); + else + return space_required + state_size; } /**********************************************************************/ @@ -367,7 +351,7 @@ radeonComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord ) /* TCL render. */ -static GLboolean radeon_run_tcl_render( GLcontext *ctx, +static GLboolean radeon_run_tcl_render( struct gl_context *ctx, struct tnl_pipeline_stage *stage ) { r100ContextPtr rmesa = R100_CONTEXT(ctx); @@ -375,6 +359,7 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx, struct vertex_buffer *VB = &tnl->vb; GLuint inputs = VERT_BIT_POS | VERT_BIT_COLOR0; GLuint i; + GLuint emit_end; /* TODO: separate this from the swtnl pipeline */ @@ -391,7 +376,7 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx, inputs |= VERT_BIT_NORMAL; } - if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) { + if (_mesa_need_secondary_color(ctx)) { inputs |= VERT_BIT_COLOR1; } @@ -400,7 +385,7 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx, } for (i = 0 ; i < ctx->Const.MaxTextureUnits; i++) { - if (ctx->Texture.Unit[i]._ReallyEnabled) { + if (ctx->Texture.Unit[i]._Current) { /* TODO: probably should not emit texture coords when texgen is enabled */ if (rmesa->TexGenNeedNormals[i]) { inputs |= VERT_BIT_NORMAL; @@ -410,6 +395,8 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx, } radeonReleaseArrays( ctx, ~0 ); + emit_end = radeonEnsureEmitSize( ctx, inputs ) + + rmesa->radeon.cmdbuf.cs->cdw; radeonEmitArrays( ctx, inputs ); rmesa->tcl.Elts = VB->Elts; @@ -429,6 +416,10 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx, radeonEmitPrimitive( ctx, start, start+length, prim ); } + if (emit_end < rmesa->radeon.cmdbuf.cs->cdw) + WARN_ONCE("Rendering was %d commands larger than predicted size." + " We might overflow command buffer.\n", rmesa->radeon.cmdbuf.cs->cdw - emit_end); + return GL_FALSE; /* finished the pipe */ } @@ -458,7 +449,7 @@ const struct tnl_pipeline_stage _radeon_tcl_stage = */ -static void transition_to_swtnl( GLcontext *ctx ) +static void transition_to_swtnl( struct gl_context *ctx ) { r100ContextPtr rmesa = R100_CONTEXT(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); @@ -470,10 +461,10 @@ static void transition_to_swtnl( GLcontext *ctx ) radeonChooseVertexState( ctx ); radeonChooseRenderState( ctx ); - _mesa_validate_all_lighting_tables( ctx ); + _tnl_validate_shine_tables( ctx ); tnl->Driver.NotifyMaterialChange = - _mesa_validate_all_lighting_tables; + _tnl_validate_shine_tables; radeonReleaseArrays( ctx, ~0 ); @@ -487,7 +478,7 @@ static void transition_to_swtnl( GLcontext *ctx ) } -static void transition_to_hwtnl( GLcontext *ctx ) +static void transition_to_hwtnl( struct gl_context *ctx ) { r100ContextPtr rmesa = R100_CONTEXT(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); @@ -509,16 +500,16 @@ static void transition_to_hwtnl( GLcontext *ctx ) tnl->Driver.NotifyMaterialChange = radeonUpdateMaterial; if ( rmesa->radeon.dma.flush ) - rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); + rmesa->radeon.dma.flush( &rmesa->radeon.glCtx ); rmesa->radeon.dma.flush = NULL; rmesa->swtcl.vertex_format = 0; // if (rmesa->swtcl.indexed_verts.buf) // radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, - // __FUNCTION__ ); + // __func__ ); - if (RADEON_DEBUG & DEBUG_FALLBACKS) + if (RADEON_DEBUG & RADEON_FALLBACKS) fprintf(stderr, "Radeon end tcl fallback\n"); } @@ -547,7 +538,7 @@ static char *getFallbackString(GLuint bit) -void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) +void radeonTclFallback( struct gl_context *ctx, GLuint bit, GLboolean mode ) { r100ContextPtr rmesa = R100_CONTEXT(ctx); GLuint oldfallback = rmesa->radeon.TclFallback; @@ -555,7 +546,7 @@ void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) if (mode) { rmesa->radeon.TclFallback |= bit; if (oldfallback == 0) { - if (RADEON_DEBUG & DEBUG_FALLBACKS) + if (RADEON_DEBUG & RADEON_FALLBACKS) fprintf(stderr, "Radeon begin tcl fallback %s\n", getFallbackString( bit )); transition_to_swtnl( ctx ); @@ -564,7 +555,7 @@ void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) else { rmesa->radeon.TclFallback &= ~bit; if (oldfallback == bit) { - if (RADEON_DEBUG & DEBUG_FALLBACKS) + if (RADEON_DEBUG & RADEON_FALLBACKS) fprintf(stderr, "Radeon end tcl fallback %s\n", getFallbackString( bit )); transition_to_hwtnl( ctx );