radeon/r200/r300: Fix swtcl prediction to work after primitie change.
authorPauli Nieminen <suokkos@gmail.com>
Thu, 27 Aug 2009 11:21:13 +0000 (14:21 +0300)
committerPauli Nieminen <suokkos@gmail.com>
Thu, 27 Aug 2009 15:57:13 +0000 (18:57 +0300)
Swtcl calls flush everytime primitive changes so prediction has to made again
after flushing.

src/mesa/drivers/dri/r200/r200_swtcl.c
src/mesa/drivers/dri/r300/r300_swtcl.c
src/mesa/drivers/dri/radeon/radeon_common_context.h
src/mesa/drivers/dri/radeon/radeon_swtcl.c

index 6b7279e8db5be8cf66760a76205f91a628272806..3d4e70115578425a39169659009a01dea12d2f27 100644 (file)
@@ -201,27 +201,35 @@ static void r200SetVertexFormat( GLcontext *ctx )
    }
 }
 
-
-static void r200RenderStart( GLcontext *ctx )
+static void r200_predict_emit_size( GLcontext *ctx )
 {
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
    const int vertex_array_size = 7;
    const int prim_size = 3;
-   r200ContextPtr rmesa = R200_CONTEXT( ctx );
-   r200SetVertexFormat( ctx );
-   if (RADEON_DEBUG & DEBUG_VERTS)
-      fprintf(stderr, "%s\n", __func__);
-   if (!rmesa->radeon.swtcl.primitive_counter) {
+   if (!rmesa->radeon.swtcl.emit_prediction) {
+      const int state_size = radeonCountStateEmitSize(&rmesa->radeon);
       if (rcommonEnsureCmdBufSpace(&rmesa->radeon,
-              radeonCountStateEmitSize(&rmesa->radeon) +
+              state_size +
               vertex_array_size + prim_size,
               __FUNCTION__))
-        rmesa->radeon.swtcl.primitive_counter = 0;
+        rmesa->radeon.swtcl.emit_prediction = radeonCountStateEmitSize(&rmesa->radeon);
       else
-        rmesa->radeon.swtcl.primitive_counter = 1;
+        rmesa->radeon.swtcl.emit_prediction = state_size;
+      rmesa->radeon.swtcl.emit_prediction += vertex_array_size + prim_size
+        + rmesa->radeon.cmdbuf.cs->cdw;
    }
 }
 
 
+static void r200RenderStart( GLcontext *ctx )
+{
+   r200SetVertexFormat( ctx );
+   if (RADEON_DEBUG & DEBUG_VERTS)
+      fprintf(stderr, "%s\n", __func__);
+   r200_predict_emit_size( ctx );
+}
+
+
 /**
  * Set vertex state for SW TCL.  The primary purpose of this function is to
  * determine in advance whether or not the hardware can / should do the
@@ -296,8 +304,13 @@ void r200_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
    r200EmitVbufPrim( rmesa,
                     rmesa->radeon.swtcl.hw_primitive,
                     rmesa->radeon.swtcl.numverts);
+   if ( rmesa->radeon.swtcl.emit_prediction < rmesa->radeon.cmdbuf.cs->cdw )
+      WARN_ONCE("Rendering was %d commands larger than predicted size."
+           " We might overflow  command buffer.\n",
+           rmesa->radeon.cmdbuf.cs->cdw - rmesa->radeon.swtcl.emit_prediction );
 
-   rmesa->radeon.swtcl.primitive_counter = 0;
+   rmesa->radeon.swtcl.emit_prediction = 0;
+   r200_predict_emit_size( ctx );
 
 }
 
@@ -905,7 +918,7 @@ void r200InitSwtcl( GLcontext *ctx )
       init_rast_tab();
       firsttime = 0;
    }
-   rmesa->radeon.swtcl.primitive_counter = 0;
+   rmesa->radeon.swtcl.emit_prediction = 0;
 
    tnl->Driver.Render.Start = r200RenderStart;
    tnl->Driver.Render.Finish = r200RenderFinish;
index 18af51e666addeedb23eab27af1cef9043fc5ada..ca17f306d14aef61c1d2ffb326c26d9eb69f44c4 100644 (file)
@@ -490,6 +490,27 @@ static void r300ChooseRenderState( GLcontext *ctx )
                rmesa->radeon.swtcl.RenderIndex = index;
        }
 }
+static void r300_predict_emit_size( GLcontext *ctx )
+{
+       r300ContextPtr rmesa = R300_CONTEXT( ctx );
+       if (!rmesa->radeon.swtcl.emit_prediction) {
+               const int vertex_size = 7;
+               const int prim_size = 3;
+               const int cache_flush_size = 4;
+               const int state_size = radeonCountStateEmitSize(&rmesa->radeon);
+
+               if (rcommonEnsureCmdBufSpace(&rmesa->radeon,
+                                       state_size +
+                                       + vertex_size + prim_size,
+                                       __FUNCTION__))
+                       rmesa->radeon.swtcl.emit_prediction = radeonCountStateEmitSize(&rmesa->radeon);
+               else
+                       rmesa->radeon.swtcl.emit_prediction = state_size;
+
+               rmesa->radeon.swtcl.emit_prediction += rmesa->radeon.cmdbuf.cs->cdw
+                       + vertex_size + prim_size + cache_flush_size * 2;
+       }
+}
 
 
 void r300RenderStart(GLcontext *ctx)
@@ -508,20 +529,7 @@ void r300RenderStart(GLcontext *ctx)
 
        r300UpdateShaderStates(rmesa);
 
-       const int vertex_size = 7;
-       const int prim_size = 3;
-
-       if (!rmesa->radeon.swtcl.primitive_counter) {
-               if (rcommonEnsureCmdBufSpace(&rmesa->radeon,
-                                       radeonCountStateEmitSize(&rmesa->radeon) +
-                                       + vertex_size + prim_size,
-                                       __FUNCTION__))
-                       rmesa->radeon.swtcl.primitive_counter = 0;
-               else
-                       rmesa->radeon.swtcl.primitive_counter = 1;
-       }
-
-       r300EmitCacheFlush(rmesa);
+       r300_predict_emit_size( ctx );
 
        /* investigate if we can put back flush optimisation if needed */
        if (rmesa->radeon.dma.flush != NULL) {
@@ -577,7 +585,7 @@ void r300InitSwtcl(GLcontext *ctx)
                init_rast_tab();
                firsttime = 0;
        }
-       rmesa->radeon.swtcl.primitive_counter = 0;
+       rmesa->radeon.swtcl.emit_prediction = 0;
 
        tnl->Driver.Render.Start = r300RenderStart;
        tnl->Driver.Render.Finish = r300RenderFinish;
@@ -644,6 +652,8 @@ void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
                fprintf(stderr, "%s\n", __func__);
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
 
+       r300EmitCacheFlush(rmesa);
+
        radeonEmitState(&rmesa->radeon);
     r300_emit_scissor(ctx);
        r300EmitVertexAOS(rmesa,
@@ -655,6 +665,11 @@ void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
                   rmesa->radeon.swtcl.hw_primitive,
                   rmesa->radeon.swtcl.numverts);
        r300EmitCacheFlush(rmesa);
-       rmesa->radeon.swtcl.primitive_counter = 0;
+       if ( rmesa->radeon.swtcl.emit_prediction < rmesa->radeon.cmdbuf.cs->cdw )
+               WARN_ONCE("Rendering was %d commands larger than predicted size."
+                       " We might overflow  command buffer.\n",
+                       rmesa->radeon.cmdbuf.cs->cdw - rmesa->radeon.swtcl.emit_prediction );
+       rmesa->radeon.swtcl.emit_prediction = 0;
+       r300_predict_emit_size( ctx );
        COMMIT_BATCH();
 }
index 07a0c4c05544ac9be1797b1050c0455f269610d4..427eb946ffa8316d0754567f3d33f3efb1aec55b 100644 (file)
@@ -342,7 +342,7 @@ struct radeon_swtcl_info {
        struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
        GLuint vertex_attr_count;
 
-       GLuint primitive_counter;
+       GLuint emit_prediction;
 };
 
 #define RADEON_MAX_AOS_ARRAYS          16
index c9e399f9b61dd4bd23f57a85772d3e5c34256bc5..7358e22255b4c9ee66914d6c02e3e953b34c7ca9 100644 (file)
@@ -220,6 +220,27 @@ static void radeonSetVertexFormat( GLcontext *ctx )
    }
 }
 
+static void radeon_predict_emit_size( GLcontext* ctx )
+{
+    r100ContextPtr rmesa = R100_CONTEXT( ctx );
+
+    if (!rmesa->radeon.swtcl.emit_prediction) {
+        const int state_size = radeonCountStateEmitSize( &rmesa->radeon );
+        const int scissor_size = 8;
+        const int prims_size = 8;
+        const int vertex_size = 7;
+
+        if (rcommonEnsureCmdBufSpace(&rmesa->radeon,
+                    state_size +
+                    (scissor_size + prims_size + vertex_size),
+                    __FUNCTION__))
+            rmesa->radeon.swtcl.emit_prediction = radeonCountStateEmitSize( &rmesa->radeon );
+        else
+            rmesa->radeon.swtcl.emit_prediction = state_size;
+        rmesa->radeon.swtcl.emit_prediction += scissor_size + prims_size + vertex_size
+            + rmesa->radeon.cmdbuf.cs->cdw;
+    }
+}
 
 static void radeonRenderStart( GLcontext *ctx )
 {
@@ -230,16 +251,7 @@ static void radeonRenderStart( GLcontext *ctx )
     if (rmesa->radeon.dma.flush != 0 &&
             rmesa->radeon.dma.flush != rcommon_flush_last_swtcl_prim)
         rmesa->radeon.dma.flush( ctx );
-
-    if (!rmesa->radeon.swtcl.primitive_counter) {
-        if (rcommonEnsureCmdBufSpace(&rmesa->radeon,
-                    radeonCountStateEmitSize( &rmesa->radeon ) +
-                    (8 + 8 + 7), /* scissor + primis + VertexAOS */
-                    __FUNCTION__))
-            rmesa->radeon.swtcl.primitive_counter = 0;
-        else
-            rmesa->radeon.swtcl.primitive_counter = 1;
-    }
+    radeon_predict_emit_size( ctx );
 }
 
 
@@ -307,9 +319,14 @@ void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
                       rmesa->swtcl.vertex_format,
                       rmesa->radeon.swtcl.hw_primitive,
                       rmesa->radeon.swtcl.numverts);
+   if ( rmesa->radeon.swtcl.emit_prediction < rmesa->radeon.cmdbuf.cs->cdw )
+     WARN_ONCE("Rendering was %d commands larger than predicted size."
+        " We might overflow  command buffer.\n",
+        rmesa->radeon.cmdbuf.cs->cdw - rmesa->radeon.swtcl.emit_prediction );
 
+   radeon_predict_emit_size( ctx );
 
-   rmesa->radeon.swtcl.primitive_counter = 0;
+   rmesa->radeon.swtcl.emit_prediction = 0;
 
 }
 
@@ -814,8 +831,8 @@ void radeonInitSwtcl( GLcontext *ctx )
    if (firsttime) {
       init_rast_tab();
       firsttime = 0;
-      rmesa->radeon.swtcl.primitive_counter = 0;
    }
+   rmesa->radeon.swtcl.emit_prediction = 0;
 
    tnl->Driver.Render.Start = radeonRenderStart;
    tnl->Driver.Render.Finish = radeonRenderFinish;