i965/fs: Add a little whitespace between shader dumping debug.

[mesa.git] / src / mesa / drivers / dri / r200 / r200_tcl.c
diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c

index 0f5e501c1a61172681c76702a4b964dad42645c2..7aed116f0b3f193ff8915593c26e8ca59d837e6c 100644 (file)
--- a/src/mesa/drivers/dri/r200/r200_tcl.c
+++ b/src/mesa/drivers/dri/r200/r200_tcl.c
@@ -46,11 +46,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  #include "r200_context.h"
  #include "r200_state.h"
  #include "r200_ioctl.h"
-#include "r200_tex.h"
  #include "r200_tcl.h"
  #include "r200_swtcl.h"
  #include "r200_maos.h"
  
+#include "radeon_common_context.h"
+
  
  
  #define HAVE_POINTS      1
@@ -67,9 +68,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  #define HAVE_ELTS        1
  
  
-#define HW_POINTS           ((ctx->Point.PointSprite || \
-                               ((ctx->_TriangleCaps & (DD_POINT_SIZE | DD_POINT_ATTEN)) && \
-                               !(ctx->_TriangleCaps & (DD_POINT_SMOOTH)))) ? \
+#define HW_POINTS           (((R200_CONTEXT(ctx))->radeon.radeonScreen->drmSupportsPointSprites && \
+                             !(ctx->_TriangleCaps & DD_POINT_SMOOTH)) ? \
                                 R200_VF_PRIM_POINT_SPRITES : R200_VF_PRIM_POINTS)
  #define HW_LINES            R200_VF_PRIM_LINES
  #define HW_LINE_LOOP        0
@@ -159,6 +159,7 @@ static GLushort *r200AllocElts( r200ContextPtr rmesa, GLuint nr )
        r200EmitAOS( rmesa,
                    rmesa->radeon.tcl.aos_count, 0 );
  
+      r200EmitMaxVtxIndex(rmesa, rmesa->radeon.tcl.aos[0].count);
        return r200AllocEltsOpenEnded( rmesa, rmesa->tcl.hw_primitive, nr );
     }
  }
@@ -175,7 +176,7 @@ while (0)
   * discrete and there are no intervening state changes.  (Somewhat
   * duplicates changes to DrawArrays code)
   */
-static void r200EmitPrim( GLcontext *ctx, 
+static void r200EmitPrim( struct gl_context *ctx, 
                           GLenum prim, 
                           GLuint hwprim, 
                           GLuint start, 
@@ -239,7 +240,7 @@ static void r200EmitPrim( GLcontext *ctx,
  /*                          External entrypoints                     */
  /**********************************************************************/
  
-void r200EmitPrimitive( GLcontext *ctx, 
+void r200EmitPrimitive( struct gl_context *ctx, 
                           GLuint first,
                           GLuint last,
                           GLuint flags )
@@ -247,7 +248,7 @@ void r200EmitPrimitive( GLcontext *ctx,
     tcl_render_tab_verts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
  }
  
-void r200EmitEltPrimitive( GLcontext *ctx, 
+void r200EmitEltPrimitive( struct gl_context *ctx, 
                              GLuint first,
                              GLuint last,
                              GLuint flags )
@@ -255,13 +256,17 @@ void r200EmitEltPrimitive( GLcontext *ctx,
     tcl_render_tab_elts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
  }
  
-void r200TclPrimitive( GLcontext *ctx, 
+void r200TclPrimitive( struct gl_context *ctx, 
                          GLenum prim,
                          int hw_prim )
  {
     r200ContextPtr rmesa = R200_CONTEXT(ctx);
     GLuint newprim = hw_prim | R200_VF_TCL_OUTPUT_VTX_ENABLE;
  
+   radeon_prepare_render(&rmesa->radeon);
+   if (rmesa->radeon.NewGLState)
+      r200ValidateState( ctx );
+
     if (newprim != rmesa->tcl.hw_primitive ||
         !discrete_prim[hw_prim&0xf]) {
        /* need to disable perspective-correct texturing for point sprites */
@@ -333,7 +338,7 @@ r200InitStaticFogData( void )
   * Fog blend factors are in the range [0,1].
   */
  float
-r200ComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord )
+r200ComputeFogBlendFactor( struct gl_context *ctx, GLfloat fogcoord )
  {
     GLfloat end  = ctx->Fog.End;
     GLfloat d, temp;
@@ -368,12 +373,13 @@ r200ComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord )
   * Predict total emit size for next rendering operation so there is no flush in middle of rendering
   * Prediction has to aim towards the best possible value that is worse than worst case scenario
   */
-static void r200EnsureEmitSize( GLcontext * ctx , GLubyte* vimap_rev )
+static GLuint r200EnsureEmitSize( struct gl_context * ctx , GLubyte* vimap_rev )
  {
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
    TNLcontext *tnl = TNL_CONTEXT(ctx);
    struct vertex_buffer *VB = &tnl->vb;
    GLuint space_required;
+  GLuint state_size;
    GLuint nr_aos = 0;
    int i;
    /* predict number of aos to emit */
@@ -387,10 +393,11 @@ static void r200EnsureEmitSize( GLcontext * ctx , GLubyte* vimap_rev )
  
    {
      /* count the prediction for state size */
-    space_required = radeonCountStateEmitSize( &rmesa->radeon );
+    space_required = 0;
+    state_size = radeonCountStateEmitSize( &rmesa->radeon );
      /* vtx may be changed in r200EmitArrays so account for it if not dirty */
      if (!rmesa->hw.vtx.dirty)
-      space_required += rmesa->hw.vtx.check(rmesa->radeon.glCtx, &rmesa->hw.vtx);
+      state_size += rmesa->hw.vtx.check(rmesa->radeon.glCtx, &rmesa->hw.vtx);
      /* predict size for elements */
      for (i = 0; i < VB->PrimitiveCount; ++i)
      {
@@ -400,8 +407,9 @@ static void r200EnsureEmitSize( GLcontext * ctx , GLubyte* vimap_rev )
           rendering code may decide convert to elts.
          In that case we have to make pessimistic prediction.
          and use larger of 2 paths. */
-      const GLuint elts = ELTS_BUFSZ(nr_aos);
-      const GLuint index = INDEX_BUFSZ;
+      const GLuint elt_count =(VB->Primitive[i].count/GET_MAX_HW_ELTS() + 1);
+      const GLuint elts = ELTS_BUFSZ(nr_aos) * elt_count;
+      const GLuint index = INDEX_BUFSZ * elt_count;
        const GLuint vbuf = VBUF_BUFSZ;
        if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE)
           || vbuf > index + elts)
@@ -410,10 +418,16 @@ static void r200EnsureEmitSize( GLcontext * ctx , GLubyte* vimap_rev )
         space_required += index + elts;
        space_required += AOS_BUFSZ(nr_aos);
      }
-    space_required += SCISSOR_BUFSZ;
    }
+
+  radeon_print(RADEON_RENDER,RADEON_VERBOSE,
+      "%s space %u, aos %d\n",
+      __func__, space_required, AOS_BUFSZ(nr_aos) );
    /* flush the buffer in case we need more than is left. */
-  rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required, __FUNCTION__);
+  if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required + state_size, __FUNCTION__))
+    return space_required + radeonCountStateEmitSize( &rmesa->radeon );
+  else
+    return space_required + state_size;
  }
  
  
@@ -424,7 +438,7 @@ static void r200EnsureEmitSize( GLcontext * ctx , GLubyte* vimap_rev )
  
  /* TCL render.
   */
-static GLboolean r200_run_tcl_render( GLcontext *ctx,
+static GLboolean r200_run_tcl_render( struct gl_context *ctx,
                                       struct tnl_pipeline_stage *stage )
  {
     r200ContextPtr rmesa = R200_CONTEXT(ctx);
@@ -444,8 +458,7 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx,
     if (rmesa->radeon.TclFallback)
        return GL_TRUE;  /* fallback to software t&l */
  
-   if (R200_DEBUG & DEBUG_PRIMS)
-      fprintf(stderr, "%s\n", __FUNCTION__);
+   radeon_print(RADEON_RENDER, RADEON_NORMAL, "%s\n", __FUNCTION__);
  
     if (VB->Count == 0)
        return GL_FALSE;
@@ -499,25 +512,26 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx,
          prog to a not enabled output however, so just don't mess with it.
          We only need to change compsel. */
        GLuint out_compsel = 0;
-      GLuint vp_out = rmesa->curr_vp_hw->mesa_program.Base.OutputsWritten;
+      const GLbitfield64 vp_out =
+        rmesa->curr_vp_hw->mesa_program.Base.OutputsWritten;
  
        vimap_rev = &rmesa->curr_vp_hw->inputmap_rev[0];
-      assert(vp_out & (1 << VERT_RESULT_HPOS));
+      assert(vp_out & BITFIELD64_BIT(VERT_RESULT_HPOS));
        out_compsel = R200_OUTPUT_XYZW;
-      if (vp_out & (1 << VERT_RESULT_COL0)) {
+      if (vp_out & BITFIELD64_BIT(VERT_RESULT_COL0)) {
          out_compsel |= R200_OUTPUT_COLOR_0;
        }
-      if (vp_out & (1 << VERT_RESULT_COL1)) {
+      if (vp_out & BITFIELD64_BIT(VERT_RESULT_COL1)) {
          out_compsel |= R200_OUTPUT_COLOR_1;
        }
-      if (vp_out & (1 << VERT_RESULT_FOGC)) {
+      if (vp_out & BITFIELD64_BIT(VERT_RESULT_FOGC)) {
           out_compsel |= R200_OUTPUT_DISCRETE_FOG;
        }
-      if (vp_out & (1 << VERT_RESULT_PSIZ)) {
+      if (vp_out & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
          out_compsel |= R200_OUTPUT_PT_SIZE;
        }
        for (i = VERT_RESULT_TEX0; i < VERT_RESULT_TEX6; i++) {
-        if (vp_out & (1 << i)) {
+        if (vp_out & BITFIELD64_BIT(i)) {
             out_compsel |= R200_OUTPUT_TEX_0 << (i - VERT_RESULT_TEX0);
          }
        }
@@ -530,7 +544,8 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx,
     /* Do the actual work:
      */
     radeonReleaseArrays( ctx, ~0 /* stage->changed_inputs */ );
-   r200EnsureEmitSize( ctx, vimap_rev );
+   GLuint emit_end = r200EnsureEmitSize( ctx, vimap_rev )
+     + rmesa->radeon.cmdbuf.cs->cdw;
     r200EmitArrays( ctx, vimap_rev );
  
     for (i = 0 ; i < VB->PrimitiveCount ; i++)
@@ -547,6 +562,9 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx,
        else
          r200EmitPrimitive( ctx, start, start+length, prim );
     }
+   if ( emit_end < rmesa->radeon.cmdbuf.cs->cdw )
+     WARN_ONCE("Rendering was %d commands larger than predicted size."
+        " We might overflow  command buffer.\n", rmesa->radeon.cmdbuf.cs->cdw - emit_end);
  
     return GL_FALSE;            /* finished the pipe */
  }
@@ -577,7 +595,7 @@ const struct tnl_pipeline_stage _r200_tcl_stage =
   */
  
  
-static void transition_to_swtnl( GLcontext *ctx )
+static void transition_to_swtnl( struct gl_context *ctx )
  {
     r200ContextPtr rmesa = R200_CONTEXT(ctx);
     TNLcontext *tnl = TNL_CONTEXT(ctx);
@@ -601,7 +619,7 @@ static void transition_to_swtnl( GLcontext *ctx )
     rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] &= ~(R200_VAP_TCL_ENABLE|R200_VAP_PROG_VTX_SHADER_ENABLE);
  }
  
-static void transition_to_hwtnl( GLcontext *ctx )
+static void transition_to_hwtnl( struct gl_context *ctx )
  {
     r200ContextPtr rmesa = R200_CONTEXT(ctx);
     TNLcontext *tnl = TNL_CONTEXT(ctx);
@@ -637,7 +655,7 @@ static void transition_to_hwtnl( GLcontext *ctx )
     rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] &= ~(R200_VTX_XY_FMT|R200_VTX_Z_FMT);
     rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] |= R200_VTX_W0_FMT;
  
-   if (R200_DEBUG & DEBUG_FALLBACKS) 
+   if (R200_DEBUG & RADEON_FALLBACKS)
        fprintf(stderr, "R200 end tcl fallback\n");
  }
  
@@ -671,27 +689,36 @@ static char *getFallbackString(GLuint bit)
  
  
  
-void r200TclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+void r200TclFallback( struct gl_context *ctx, GLuint bit, GLboolean mode )
  {
-   r200ContextPtr rmesa = R200_CONTEXT(ctx);
-   GLuint oldfallback = rmesa->radeon.TclFallback;
-
-   if (mode) {
-      rmesa->radeon.TclFallback |= bit;
-      if (oldfallback == 0) {
-        if (R200_DEBUG & DEBUG_FALLBACKS) 
-           fprintf(stderr, "R200 begin tcl fallback %s\n",
-                   getFallbackString( bit ));
-        transition_to_swtnl( ctx );
-      }
-   }
-   else {
-      rmesa->radeon.TclFallback &= ~bit;
-      if (oldfallback == bit) {
-        if (R200_DEBUG & DEBUG_FALLBACKS) 
-           fprintf(stderr, "R200 end tcl fallback %s\n",
-                   getFallbackString( bit ));
-        transition_to_hwtnl( ctx );
-      }
-   }
+       r200ContextPtr rmesa = R200_CONTEXT(ctx);
+       GLuint oldfallback = rmesa->radeon.TclFallback;
+
+       if (mode) {
+               if (oldfallback == 0) {
+                       /* We have to flush before transition */
+                       if ( rmesa->radeon.dma.flush )
+                               rmesa->radeon.dma.flush( rmesa->radeon.glCtx );
+
+                       if (R200_DEBUG & RADEON_FALLBACKS)
+                               fprintf(stderr, "R200 begin tcl fallback %s\n",
+                                               getFallbackString( bit ));
+                       rmesa->radeon.TclFallback |= bit;
+                       transition_to_swtnl( ctx );
+               } else
+                       rmesa->radeon.TclFallback |= bit;
+       } else {
+               if (oldfallback == bit) {
+                       /* We have to flush before transition */
+                       if ( rmesa->radeon.dma.flush )
+                               rmesa->radeon.dma.flush( rmesa->radeon.glCtx );
+
+                       if (R200_DEBUG & RADEON_FALLBACKS)
+                               fprintf(stderr, "R200 end tcl fallback %s\n",
+                                               getFallbackString( bit ));
+                       rmesa->radeon.TclFallback &= ~bit;
+                       transition_to_hwtnl( ctx );
+               } else
+                       rmesa->radeon.TclFallback &= ~bit;
+       }
  }