radeon: Fix legacy bo not to reuse dma buffers before refcount is 1.

[mesa.git] / src / mesa / drivers / dri / radeon / radeon_tcl.c
diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c

index 779e9ae5df04a2b2d0d9b7602d361fc552b7029a..b334ea05e5b1ce2beb267b04cc34154ef256ab83 100644 (file)
--- a/src/mesa/drivers/dri/radeon/radeon_tcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c
@@ -42,6 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  #include "tnl/tnl.h"
  #include "tnl/t_pipeline.h"
  
+#include "radeon_common.h"
  #include "radeon_context.h"
  #include "radeon_state.h"
  #include "radeon_ioctl.h"
@@ -49,6 +50,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  #include "radeon_tcl.h"
  #include "radeon_swtcl.h"
  #include "radeon_maos.h"
+#include "radeon_common_context.h"
  
  
  
@@ -104,7 +106,7 @@ static GLboolean discrete_prim[0x10] = {
  };
     
  
-#define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx)
+#define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx)
  #define ELT_TYPE  GLushort
  
  #define ELT_INIT(prim, hw_prim) \
@@ -125,7 +127,7 @@ static GLboolean discrete_prim[0x10] = {
  
  #define RESET_STIPPLE() do {                   \
     RADEON_STATECHANGE( rmesa, lin );           \
-   radeonEmitState( rmesa );                   \
+   radeonEmitState(&rmesa->radeon);                    \
  } while (0)
  
  #define AUTO_STIPPLE( mode )  do {             \
@@ -136,31 +138,26 @@ static GLboolean discrete_prim[0x10] = {
     else                                                \
        rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &=        \
          ~RADEON_LINE_PATTERN_AUTO_RESET;       \
-   radeonEmitState( rmesa );                   \
+   radeonEmitState(&rmesa->radeon);            \
  } while (0)
  
  
  
  #define ALLOC_ELTS(nr) radeonAllocElts( rmesa, nr )
  
-static GLushort *radeonAllocElts( radeonContextPtr rmesa, GLuint nr ) 
+static GLushort *radeonAllocElts( r100ContextPtr rmesa, GLuint nr ) 
  {
-   if (rmesa->dma.flush)
-      rmesa->dma.flush( rmesa );
+      if (rmesa->radeon.dma.flush)
+        rmesa->radeon.dma.flush( rmesa->radeon.glCtx );
  
-   radeonEnsureCmdBufSpace(rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
-                          rmesa->hw.max_state_size + ELTS_BUFSZ(nr));
+      radeonEmitAOS( rmesa,
+                    rmesa->radeon.tcl.aos_count, 0 );
  
-   radeonEmitAOS( rmesa,
-               rmesa->tcl.aos_components,
-               rmesa->tcl.nr_aos_components, 0 );
-
-   return radeonAllocEltsOpenEnded( rmesa,
-                                   rmesa->tcl.vertex_format, 
-                                   rmesa->tcl.hw_primitive, nr );
+      return radeonAllocEltsOpenEnded( rmesa, rmesa->tcl.vertex_format,
+                                      rmesa->tcl.hw_primitive, nr );
  }
  
-#define CLOSE_ELTS()  RADEON_NEWPRIM( rmesa )
+#define CLOSE_ELTS() if (0)  RADEON_NEWPRIM( rmesa )
  
  
  
@@ -174,15 +171,11 @@ static void radeonEmitPrim( GLcontext *ctx,
                        GLuint start, 
                        GLuint count)    
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   r100ContextPtr rmesa = R100_CONTEXT( ctx );
     radeonTclPrimitive( ctx, prim, hwprim );
     
-   radeonEnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
-                           rmesa->hw.max_state_size + VBUF_BUFSZ );
-
     radeonEmitAOS( rmesa,
-                 rmesa->tcl.aos_components,
-                 rmesa->tcl.nr_aos_components,
+                 rmesa->radeon.tcl.aos_count,
                   start );
     
     /* Why couldn't this packet have taken an offset param?
@@ -197,6 +190,8 @@ static void radeonEmitPrim( GLcontext *ctx,
     radeonEmitPrim( ctx, prim, hwprim, start, count );           \
     (void) rmesa; } while (0)
  
+#define MAX_CONVERSION_SIZE 40
+
  /* Try & join small primitives
   */
  #if 0
@@ -254,7 +249,7 @@ void radeonTclPrimitive( GLcontext *ctx,
                          GLenum prim,
                          int hw_prim )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
     GLuint se_cntl;
     GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE;
  
@@ -361,6 +356,73 @@ radeonComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord )
     }
  }
  
+/**
+ * Predict total emit size for next rendering operation so there is no flush in middle of rendering
+ * Prediction has to aim towards the best possible value that is worse than worst case scenario
+ */
+static GLuint radeonEnsureEmitSize( GLcontext * ctx , GLuint inputs )
+{
+  r100ContextPtr rmesa = R100_CONTEXT(ctx);
+  TNLcontext *tnl = TNL_CONTEXT(ctx);
+  struct vertex_buffer *VB = &tnl->vb;
+  GLuint space_required;
+  GLuint state_size;
+  GLuint nr_aos = 1; /* radeonEmitArrays does always emit one */
+  int i;
+  /* list of flags that are allocating aos object */
+  const GLuint flags_to_check[] = {
+    VERT_BIT_NORMAL,
+    VERT_BIT_COLOR0,
+    VERT_BIT_COLOR1,
+    VERT_BIT_FOG
+  };
+  /* predict number of aos to emit */
+  for (i=0; i < sizeof(flags_to_check)/sizeof(flags_to_check[0]); ++i)
+  {
+    if (inputs & flags_to_check[i])
+      ++nr_aos;
+  }
+  for (i = 0; i < ctx->Const.MaxTextureUnits; ++i)
+  {
+    if (inputs & VERT_BIT_TEX(i))
+      ++nr_aos;
+  }
+
+  {
+    /* count the prediction for state size */
+    space_required = 0;
+    state_size = radeonCountStateEmitSize( &rmesa->radeon );
+    /* tcl may be changed in radeonEmitArrays so account for it if not dirty */
+    if (!rmesa->hw.tcl.dirty)
+      state_size += rmesa->hw.tcl.check( rmesa->radeon.glCtx, &rmesa->hw.tcl );
+    /* predict size for elements */
+    for (i = 0; i < VB->PrimitiveCount; ++i)
+    {
+      if (!VB->Primitive[i].count)
+       continue;
+      /* If primitive.count is less than MAX_CONVERSION_SIZE
+        rendering code may decide convert to elts.
+        In that case we have to make pessimistic prediction.
+        and use larger of 2 paths. */
+      const GLuint elts = ELTS_BUFSZ(nr_aos);
+      const GLuint index = INDEX_BUFSZ;
+      const GLuint vbuf = VBUF_BUFSZ;
+      if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE)
+         || vbuf > index + elts)
+       space_required += vbuf;
+      else
+       space_required += index + elts;
+      space_required += AOS_BUFSZ(nr_aos);
+    }
+    space_required += SCISSOR_BUFSZ;
+  }
+  /* flush the buffer in case we need more than is left. */
+  if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required, __FUNCTION__))
+    return space_required + radeonCountStateEmitSize( &rmesa->radeon );
+  else
+    return space_required + state_size;
+}
+
  /**********************************************************************/
  /*                          Render pipeline stage                     */
  /**********************************************************************/
@@ -371,7 +433,7 @@ radeonComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord )
  static GLboolean radeon_run_tcl_render( GLcontext *ctx,
                                         struct tnl_pipeline_stage *stage )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
     TNLcontext *tnl = TNL_CONTEXT(ctx);
     struct vertex_buffer *VB = &tnl->vb;
     GLuint inputs = VERT_BIT_POS | VERT_BIT_COLOR0;
@@ -379,7 +441,7 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx,
  
     /* TODO: separate this from the swtnl pipeline 
      */
-   if (rmesa->TclFallback)
+   if (rmesa->radeon.TclFallback)
        return GL_TRUE;  /* fallback to software t&l */
  
     if (VB->Count == 0)
@@ -411,6 +473,8 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx,
     }
  
     radeonReleaseArrays( ctx, ~0 );
+   GLuint emit_end = radeonEnsureEmitSize( ctx, inputs )
+     + rmesa->radeon.cmdbuf.cs->cdw;
     radeonEmitArrays( ctx, inputs );
  
     rmesa->tcl.Elts = VB->Elts;
@@ -430,6 +494,10 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx,
          radeonEmitPrimitive( ctx, start, start+length, prim );
     }
  
+   if (emit_end < rmesa->radeon.cmdbuf.cs->cdw)
+      WARN_ONCE("Rendering was %d commands larger than predicted size."
+         " We might overflow  command buffer.\n", rmesa->radeon.cmdbuf.cs->cdw - emit_end);
+
     return GL_FALSE;            /* finished the pipe */
  }
  
@@ -461,7 +529,7 @@ const struct tnl_pipeline_stage _radeon_tcl_stage =
  
  static void transition_to_swtnl( GLcontext *ctx )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
     TNLcontext *tnl = TNL_CONTEXT(ctx);
     GLuint se_cntl;
  
@@ -490,7 +558,7 @@ static void transition_to_swtnl( GLcontext *ctx )
  
  static void transition_to_hwtnl( GLcontext *ctx )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
     TNLcontext *tnl = TNL_CONTEXT(ctx);
     GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
  
@@ -509,17 +577,17 @@ static void transition_to_hwtnl( GLcontext *ctx )
  
     tnl->Driver.NotifyMaterialChange = radeonUpdateMaterial;
  
-   if ( rmesa->dma.flush )                     
-      rmesa->dma.flush( rmesa );       
+   if ( rmesa->radeon.dma.flush )                      
+      rmesa->radeon.dma.flush( rmesa->radeon.glCtx );  
  
-   rmesa->dma.flush = NULL;
+   rmesa->radeon.dma.flush = NULL;
     rmesa->swtcl.vertex_format = 0;
     
-   if (rmesa->swtcl.indexed_verts.buf) 
-      radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, 
-                             __FUNCTION__ );
+   //   if (rmesa->swtcl.indexed_verts.buf) 
+   //      radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, 
+   //                        __FUNCTION__ );
  
-   if (RADEON_DEBUG & DEBUG_FALLBACKS) 
+   if (RADEON_DEBUG & RADEON_FALLBACKS)
        fprintf(stderr, "Radeon end tcl fallback\n");
  }
  
@@ -550,22 +618,22 @@ static char *getFallbackString(GLuint bit)
  
  void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
  {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLuint oldfallback = rmesa->TclFallback;
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint oldfallback = rmesa->radeon.TclFallback;
  
     if (mode) {
-      rmesa->TclFallback |= bit;
+      rmesa->radeon.TclFallback |= bit;
        if (oldfallback == 0) {
-        if (RADEON_DEBUG & DEBUG_FALLBACKS) 
+        if (RADEON_DEBUG & RADEON_FALLBACKS)
             fprintf(stderr, "Radeon begin tcl fallback %s\n",
                     getFallbackString( bit ));
          transition_to_swtnl( ctx );
        }
     }
     else {
-      rmesa->TclFallback &= ~bit;
+      rmesa->radeon.TclFallback &= ~bit;
        if (oldfallback == bit) {
-        if (RADEON_DEBUG & DEBUG_FALLBACKS) 
+        if (RADEON_DEBUG & RADEON_FALLBACKS)
             fprintf(stderr, "Radeon end tcl fallback %s\n",
                     getFallbackString( bit ));
          transition_to_hwtnl( ctx );