X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fradeon%2Fradeon_tcl.c;h=3e2f4261600ee97777327e32fdeb8093f9cd0a0f;hb=ed65e6ef49e17e9cae93a8f98e2968346de2bc6e;hp=779e9ae5df04a2b2d0d9b7602d361fc552b7029a;hpb=e228433823b90127a217950433e31f0ef44df813;p=mesa.git diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c index 779e9ae5df0..3e2f4261600 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tcl.c +++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c @@ -1,7 +1,7 @@ /************************************************************************** Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and - Tungsten Graphics Inc., Austin, Texas. + VMware, Inc. All Rights Reserved. @@ -29,26 +29,28 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. /* * Authors: - * Keith Whitwell + * Keith Whitwell */ #include "main/glheader.h" #include "main/imports.h" -#include "main/light.h" #include "main/mtypes.h" +#include "main/light.h" #include "main/enums.h" +#include "main/state.h" #include "vbo/vbo.h" #include "tnl/tnl.h" #include "tnl/t_pipeline.h" +#include "radeon_common.h" #include "radeon_context.h" #include "radeon_state.h" #include "radeon_ioctl.h" -#include "radeon_tex.h" #include "radeon_tcl.h" #include "radeon_swtcl.h" #include "radeon_maos.h" +#include "radeon_common_context.h" @@ -63,7 +65,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define HAVE_LINE_STRIPS 1 #define HAVE_TRIANGLES 1 #define HAVE_TRI_STRIPS 1 -#define HAVE_TRI_STRIP_1 0 #define HAVE_TRI_FANS 1 #define HAVE_QUADS 0 #define HAVE_QUAD_STRIPS 0 @@ -104,7 +105,7 @@ static GLboolean discrete_prim[0x10] = { }; -#define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx) +#define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx) #define ELT_TYPE GLushort #define ELT_INIT(prim, hw_prim) \ @@ -125,7 +126,7 @@ static GLboolean discrete_prim[0x10] = { #define RESET_STIPPLE() do { \ RADEON_STATECHANGE( rmesa, lin ); \ - radeonEmitState( rmesa ); \ + radeonEmitState(&rmesa->radeon); \ } while (0) #define AUTO_STIPPLE( mode ) do { \ @@ -136,31 +137,26 @@ static GLboolean discrete_prim[0x10] = { else \ rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &= \ ~RADEON_LINE_PATTERN_AUTO_RESET; \ - radeonEmitState( rmesa ); \ + radeonEmitState(&rmesa->radeon); \ } while (0) #define ALLOC_ELTS(nr) radeonAllocElts( rmesa, nr ) -static GLushort *radeonAllocElts( radeonContextPtr rmesa, GLuint nr ) +static GLushort *radeonAllocElts( r100ContextPtr rmesa, GLuint nr ) { - if (rmesa->dma.flush) - rmesa->dma.flush( rmesa ); + if (rmesa->radeon.dma.flush) + rmesa->radeon.dma.flush( &rmesa->radeon.glCtx ); - radeonEnsureCmdBufSpace(rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) + - rmesa->hw.max_state_size + ELTS_BUFSZ(nr)); - - radeonEmitAOS( rmesa, - rmesa->tcl.aos_components, - rmesa->tcl.nr_aos_components, 0 ); + radeonEmitAOS( rmesa, + rmesa->radeon.tcl.aos_count, 0 ); - return radeonAllocEltsOpenEnded( rmesa, - rmesa->tcl.vertex_format, - rmesa->tcl.hw_primitive, nr ); + return radeonAllocEltsOpenEnded( rmesa, rmesa->tcl.vertex_format, + rmesa->tcl.hw_primitive, nr ); } -#define CLOSE_ELTS() RADEON_NEWPRIM( rmesa ) +#define CLOSE_ELTS() if (0) RADEON_NEWPRIM( rmesa ) @@ -168,21 +164,17 @@ static GLushort *radeonAllocElts( radeonContextPtr rmesa, GLuint nr ) * discrete and there are no intervening state changes. (Somewhat * duplicates changes to DrawArrays code) */ -static void radeonEmitPrim( GLcontext *ctx, +static void radeonEmitPrim( struct gl_context *ctx, GLenum prim, GLuint hwprim, GLuint start, GLuint count) { - radeonContextPtr rmesa = RADEON_CONTEXT( ctx ); + r100ContextPtr rmesa = R100_CONTEXT( ctx ); radeonTclPrimitive( ctx, prim, hwprim ); - radeonEnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) + - rmesa->hw.max_state_size + VBUF_BUFSZ ); - radeonEmitAOS( rmesa, - rmesa->tcl.aos_components, - rmesa->tcl.nr_aos_components, + rmesa->radeon.tcl.aos_count, start ); /* Why couldn't this packet have taken an offset param? @@ -197,6 +189,8 @@ static void radeonEmitPrim( GLcontext *ctx, radeonEmitPrim( ctx, prim, hwprim, start, count ); \ (void) rmesa; } while (0) +#define MAX_CONVERSION_SIZE 40 + /* Try & join small primitives */ #if 0 @@ -213,8 +207,8 @@ static void radeonEmitPrim( GLcontext *ctx, #ifdef MESA_BIG_ENDIAN /* We could do without (most of) this ugliness if dest was always 32 bit word aligned... */ #define EMIT_ELT(dest, offset, x) do { \ - int off = offset + ( ( (GLuint)dest & 0x2 ) >> 1 ); \ - GLushort *des = (GLushort *)( (GLuint)dest & ~0x2 ); \ + int off = offset + ( ( (uintptr_t)dest & 0x2 ) >> 1 ); \ + GLushort *des = (GLushort *)( (uintptr_t)dest & ~0x2 ); \ (des)[ off + 1 - 2 * ( off & 1 ) ] = (GLushort)(x); \ (void)rmesa; } while (0) #else @@ -234,7 +228,7 @@ static void radeonEmitPrim( GLcontext *ctx, /* External entrypoints */ /**********************************************************************/ -void radeonEmitPrimitive( GLcontext *ctx, +void radeonEmitPrimitive( struct gl_context *ctx, GLuint first, GLuint last, GLuint flags ) @@ -242,7 +236,7 @@ void radeonEmitPrimitive( GLcontext *ctx, tcl_render_tab_verts[flags&PRIM_MODE_MASK]( ctx, first, last, flags ); } -void radeonEmitEltPrimitive( GLcontext *ctx, +void radeonEmitEltPrimitive( struct gl_context *ctx, GLuint first, GLuint last, GLuint flags ) @@ -250,14 +244,18 @@ void radeonEmitEltPrimitive( GLcontext *ctx, tcl_render_tab_elts[flags&PRIM_MODE_MASK]( ctx, first, last, flags ); } -void radeonTclPrimitive( GLcontext *ctx, +void radeonTclPrimitive( struct gl_context *ctx, GLenum prim, int hw_prim ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); GLuint se_cntl; GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE; + radeon_prepare_render(&rmesa->radeon); + if (rmesa->radeon.NewGLState) + radeonValidateState( ctx ); + if (newprim != rmesa->tcl.hw_primitive || !discrete_prim[hw_prim&0xf]) { RADEON_NEWPRIM( rmesa ); @@ -267,7 +265,7 @@ void radeonTclPrimitive( GLcontext *ctx, se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL]; se_cntl &= ~RADEON_FLAT_SHADE_VTX_LAST; - if (prim == GL_POLYGON && (ctx->_TriangleCaps & DD_FLATSHADE)) + if (prim == GL_POLYGON && ctx->Light.ShadeModel == GL_FLAT) se_cntl |= RADEON_FLAT_SHADE_VTX_0; else se_cntl |= RADEON_FLAT_SHADE_VTX_LAST; @@ -278,87 +276,72 @@ void radeonTclPrimitive( GLcontext *ctx, } } -/**********************************************************************/ -/* Fog blend factor computation for hw tcl */ -/* same calculation used as in t_vb_fog.c */ -/**********************************************************************/ - -#define FOG_EXP_TABLE_SIZE 256 -#define FOG_MAX (10.0) -#define EXP_FOG_MAX .0006595 -#define FOG_INCR (FOG_MAX/FOG_EXP_TABLE_SIZE) -static GLfloat exp_table[FOG_EXP_TABLE_SIZE]; - -#if 1 -#define NEG_EXP( result, narg ) \ -do { \ - GLfloat f = (GLfloat) (narg * (1.0/FOG_INCR)); \ - GLint k = (GLint) f; \ - if (k > FOG_EXP_TABLE_SIZE-2) \ - result = (GLfloat) EXP_FOG_MAX; \ - else \ - result = exp_table[k] + (f-k)*(exp_table[k+1]-exp_table[k]); \ -} while (0) -#else -#define NEG_EXP( result, narg ) \ -do { \ - result = exp(-narg); \ -} while (0) -#endif - - -/** - * Initialize the exp_table[] lookup table for approximating exp(). - */ -void -radeonInitStaticFogData( void ) -{ - GLfloat f = 0.0F; - GLint i = 0; - for ( ; i < FOG_EXP_TABLE_SIZE ; i++, f += FOG_INCR) { - exp_table[i] = (GLfloat) exp(-f); - } -} - - /** - * Compute per-vertex fog blend factors from fog coordinates by - * evaluating the GL_LINEAR, GL_EXP or GL_EXP2 fog function. - * Fog coordinates are distances from the eye (typically between the - * near and far clip plane distances). - * Note the fog (eye Z) coords may be negative so we use ABS(z) below. - * Fog blend factors are in the range [0,1]. + * Predict total emit size for next rendering operation so there is no flush in middle of rendering + * Prediction has to aim towards the best possible value that is worse than worst case scenario */ -float -radeonComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord ) +static GLuint radeonEnsureEmitSize( struct gl_context * ctx , GLuint inputs ) { - GLfloat end = ctx->Fog.End; - GLfloat d, temp; - const GLfloat z = FABSF(fogcoord); - - switch (ctx->Fog.Mode) { - case GL_LINEAR: - if (ctx->Fog.Start == ctx->Fog.End) - d = 1.0F; + r100ContextPtr rmesa = R100_CONTEXT(ctx); + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + GLuint space_required; + GLuint state_size; + GLuint nr_aos = 1; /* radeonEmitArrays does always emit one */ + int i; + /* list of flags that are allocating aos object */ + const GLuint flags_to_check[] = { + VERT_BIT_NORMAL, + VERT_BIT_COLOR0, + VERT_BIT_COLOR1, + VERT_BIT_FOG + }; + /* predict number of aos to emit */ + for (i=0; i < sizeof(flags_to_check)/sizeof(flags_to_check[0]); ++i) + { + if (inputs & flags_to_check[i]) + ++nr_aos; + } + for (i = 0; i < ctx->Const.MaxTextureUnits; ++i) + { + if (inputs & VERT_BIT_TEX(i)) + ++nr_aos; + } + + { + /* count the prediction for state size */ + space_required = 0; + state_size = radeonCountStateEmitSize( &rmesa->radeon ); + /* tcl may be changed in radeonEmitArrays so account for it if not dirty */ + if (!rmesa->hw.tcl.dirty) + state_size += rmesa->hw.tcl.check( &rmesa->radeon.glCtx, &rmesa->hw.tcl ); + /* predict size for elements */ + for (i = 0; i < VB->PrimitiveCount; ++i) + { + /* If primitive.count is less than MAX_CONVERSION_SIZE + rendering code may decide convert to elts. + In that case we have to make pessimistic prediction. + and use larger of 2 paths. */ + const GLuint elts = ELTS_BUFSZ(nr_aos); + const GLuint index = INDEX_BUFSZ; + const GLuint vbuf = VBUF_BUFSZ; + if (!VB->Primitive[i].count) + continue; + if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE) + || vbuf > index + elts) + space_required += vbuf; else - d = 1.0F / (ctx->Fog.End - ctx->Fog.Start); - temp = (end - z) * d; - return CLAMP(temp, 0.0F, 1.0F); - break; - case GL_EXP: - d = ctx->Fog.Density; - NEG_EXP( temp, d * z ); - return temp; - break; - case GL_EXP2: - d = ctx->Fog.Density*ctx->Fog.Density; - NEG_EXP( temp, d * z * z ); - return temp; - break; - default: - _mesa_problem(ctx, "Bad fog mode in make_fog_coord"); - return 0; - } + space_required += index + elts; + space_required += VB->Primitive[i].count * 3; + space_required += AOS_BUFSZ(nr_aos); + } + space_required += SCISSOR_BUFSZ; + } + /* flush the buffer in case we need more than is left. */ + if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required, __func__)) + return space_required + radeonCountStateEmitSize( &rmesa->radeon ); + else + return space_required + state_size; } /**********************************************************************/ @@ -368,18 +351,19 @@ radeonComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord ) /* TCL render. */ -static GLboolean radeon_run_tcl_render( GLcontext *ctx, +static GLboolean radeon_run_tcl_render( struct gl_context *ctx, struct tnl_pipeline_stage *stage ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &tnl->vb; GLuint inputs = VERT_BIT_POS | VERT_BIT_COLOR0; GLuint i; + GLuint emit_end; /* TODO: separate this from the swtnl pipeline */ - if (rmesa->TclFallback) + if (rmesa->radeon.TclFallback) return GL_TRUE; /* fallback to software t&l */ if (VB->Count == 0) @@ -392,7 +376,7 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx, inputs |= VERT_BIT_NORMAL; } - if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) { + if (_mesa_need_secondary_color(ctx)) { inputs |= VERT_BIT_COLOR1; } @@ -401,7 +385,7 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx, } for (i = 0 ; i < ctx->Const.MaxTextureUnits; i++) { - if (ctx->Texture.Unit[i]._ReallyEnabled) { + if (ctx->Texture.Unit[i]._Current) { /* TODO: probably should not emit texture coords when texgen is enabled */ if (rmesa->TexGenNeedNormals[i]) { inputs |= VERT_BIT_NORMAL; @@ -411,6 +395,8 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx, } radeonReleaseArrays( ctx, ~0 ); + emit_end = radeonEnsureEmitSize( ctx, inputs ) + + rmesa->radeon.cmdbuf.cs->cdw; radeonEmitArrays( ctx, inputs ); rmesa->tcl.Elts = VB->Elts; @@ -430,6 +416,10 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx, radeonEmitPrimitive( ctx, start, start+length, prim ); } + if (emit_end < rmesa->radeon.cmdbuf.cs->cdw) + WARN_ONCE("Rendering was %d commands larger than predicted size." + " We might overflow command buffer.\n", rmesa->radeon.cmdbuf.cs->cdw - emit_end); + return GL_FALSE; /* finished the pipe */ } @@ -459,9 +449,9 @@ const struct tnl_pipeline_stage _radeon_tcl_stage = */ -static void transition_to_swtnl( GLcontext *ctx ) +static void transition_to_swtnl( struct gl_context *ctx ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); GLuint se_cntl; @@ -471,10 +461,10 @@ static void transition_to_swtnl( GLcontext *ctx ) radeonChooseVertexState( ctx ); radeonChooseRenderState( ctx ); - _mesa_validate_all_lighting_tables( ctx ); + _tnl_validate_shine_tables( ctx ); tnl->Driver.NotifyMaterialChange = - _mesa_validate_all_lighting_tables; + _tnl_validate_shine_tables; radeonReleaseArrays( ctx, ~0 ); @@ -488,9 +478,9 @@ static void transition_to_swtnl( GLcontext *ctx ) } -static void transition_to_hwtnl( GLcontext *ctx ) +static void transition_to_hwtnl( struct gl_context *ctx ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT]; @@ -509,17 +499,17 @@ static void transition_to_hwtnl( GLcontext *ctx ) tnl->Driver.NotifyMaterialChange = radeonUpdateMaterial; - if ( rmesa->dma.flush ) - rmesa->dma.flush( rmesa ); + if ( rmesa->radeon.dma.flush ) + rmesa->radeon.dma.flush( &rmesa->radeon.glCtx ); - rmesa->dma.flush = NULL; + rmesa->radeon.dma.flush = NULL; rmesa->swtcl.vertex_format = 0; - if (rmesa->swtcl.indexed_verts.buf) - radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, - __FUNCTION__ ); + // if (rmesa->swtcl.indexed_verts.buf) + // radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, + // __func__ ); - if (RADEON_DEBUG & DEBUG_FALLBACKS) + if (RADEON_DEBUG & RADEON_FALLBACKS) fprintf(stderr, "Radeon end tcl fallback\n"); } @@ -548,24 +538,24 @@ static char *getFallbackString(GLuint bit) -void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) +void radeonTclFallback( struct gl_context *ctx, GLuint bit, GLboolean mode ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - GLuint oldfallback = rmesa->TclFallback; + r100ContextPtr rmesa = R100_CONTEXT(ctx); + GLuint oldfallback = rmesa->radeon.TclFallback; if (mode) { - rmesa->TclFallback |= bit; + rmesa->radeon.TclFallback |= bit; if (oldfallback == 0) { - if (RADEON_DEBUG & DEBUG_FALLBACKS) + if (RADEON_DEBUG & RADEON_FALLBACKS) fprintf(stderr, "Radeon begin tcl fallback %s\n", getFallbackString( bit )); transition_to_swtnl( ctx ); } } else { - rmesa->TclFallback &= ~bit; + rmesa->radeon.TclFallback &= ~bit; if (oldfallback == bit) { - if (RADEON_DEBUG & DEBUG_FALLBACKS) + if (RADEON_DEBUG & RADEON_FALLBACKS) fprintf(stderr, "Radeon end tcl fallback %s\n", getFallbackString( bit )); transition_to_hwtnl( ctx );