r300: fix vertex program parameters limits
[mesa.git] / src / mesa / drivers / dri / r300 / r300_swtcl.c
index ce4179208ebba26579c219b74147c76242a67fa0..ee2c71e1a7f81382afc0801f0ce5d224d3ee5555 100644 (file)
@@ -39,12 +39,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "r300_emit.h"
 #include "r300_tex.h"
 #include "r300_render.h"
+#include "main/simple_list.h"
 
 #define EMIT_ATTR( ATTR, STYLE )                                       \
 do {                                                                   \
-   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR);    \
-   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE);   \
-   rmesa->radeon.swtcl.vertex_attr_count++;                                    \
+       rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR);        \
+       rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE);       \
+       rmesa->radeon.swtcl.vertex_attr_count++;                                        \
 } while (0)
 
 #define EMIT_PAD( N )                                                  \
@@ -76,12 +77,16 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead,  GLuint *_
        GLuint InputsRead = 0;
        GLuint OutputsWritten = 0;
        int num_attrs = 0;
-       GLuint fp_reads = ctx->FragmentProgram._Current->Base.InputsRead;
+       GLuint fp_reads = rmesa->selected_fp->InputsRead;
        struct vertex_attribute *attrs = rmesa->vbuf.attribs;
 
+       radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __func__);
        rmesa->swtcl.coloroffset = rmesa->swtcl.specoffset = 0;
        rmesa->radeon.swtcl.vertex_attr_count = 0;
 
+       if (RADEON_DEBUG & RADEON_VERTS)
+               fprintf(stderr, "%s\n", __func__);
+
        /* We always want non Ndc coords format */
        VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr;
 
@@ -150,6 +155,22 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead,  GLuint *_
                ADD_ATTR(VERT_ATTRIB_POINT_SIZE, R300_DATA_TYPE_FLOAT_1, SWTCL_OVM_POINT_SIZE, swiz, MASK_X, 0);
        }
 
+       if (rmesa->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) {
+               int tex_id = rmesa->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0;
+
+               VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_POS];
+               VB->TexCoordPtr[tex_id] = VB->AttribPtr[VERT_ATTRIB_POS];
+               RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id);
+       }
+
+       if (rmesa->selected_fp->fog_attr != FRAG_ATTRIB_MAX) {
+               int tex_id = rmesa->selected_fp->fog_attr - FRAG_ATTRIB_TEX0;
+
+               VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG];
+               VB->TexCoordPtr[tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG];
+               RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id);
+       }
+
        /**
         *  Sending only one texcoord component may lead to lock up,
         *  so for all textures always output 4 texcoord components to RS.
@@ -192,31 +213,9 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead,  GLuint *_
                }
        }
 
-       /* RS can't put fragment position on the pixel stack, so stuff it in texcoord if needed */
-       if (fp_reads & FRAG_BIT_WPOS) {
-               if (first_free_tex >= ctx->Const.MaxTextureUnits) {
-                       fprintf(stderr, "\tout of free texcoords to write w pos\n");
-                       _mesa_exit(-1);
-               }
-
-               InputsRead |= 1 << (VERT_ATTRIB_TEX0 + first_free_tex);
-               OutputsWritten |= 1 << (VERT_RESULT_TEX0 + first_free_tex);
-               EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F );
-               ADD_ATTR(VERT_ATTRIB_POS, R300_DATA_TYPE_FLOAT_4, SWTCL_OVM_TEX(first_free_tex), SWIZZLE_XYZW, MASK_XYZW, 0);
-               ++first_free_tex;
-       }
-
-       if (fp_reads & FRAG_BIT_FOGC) {
-               if (first_free_tex >= ctx->Const.MaxTextureUnits) {
-                       fprintf(stderr, "\tout of free texcoords to write fog coordinate\n");
-                       _mesa_exit(-1);
-               }
-
-               InputsRead |= 1 << VERT_ATTRIB_FOG;
-               OutputsWritten |= 1 << VERT_RESULT_FOGC;
-               GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO);
-               EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1F );
-               ADD_ATTR(VERT_ATTRIB_FOG, R300_DATA_TYPE_FLOAT_1, SWTCL_OVM_TEX(first_free_tex), swiz, MASK_XYZW, 0);
+       if (first_free_tex >= ctx->Const.MaxTextureUnits) {
+               fprintf(stderr, "\tout of free texcoords to write fog coordinate\n");
+               _mesa_exit(-1);
        }
 
        R300_NEWPRIM(rmesa);
@@ -231,6 +230,7 @@ static void r300PrepareVertices(GLcontext *ctx)
 {
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
        GLuint InputsRead, OutputsWritten;
+       radeon_print(RADEON_SWRENDER, RADEON_TRACE, "%s\n", __func__);
 
        r300ChooseSwtclVertexFormat(ctx, &InputsRead, &OutputsWritten);
        r300SetupVAP(ctx, InputsRead, OutputsWritten);
@@ -244,6 +244,33 @@ static void r300PrepareVertices(GLcontext *ctx)
        rmesa->radeon.swtcl.vertex_size /= 4;
 }
 
+static void r300_predict_emit_size( r300ContextPtr rmesa )
+{
+       if (!rmesa->radeon.swtcl.emit_prediction) {
+               const int vertex_size = 7;
+               const int prim_size = 3;
+               const int cache_flush_size = 4;
+               const int pre_emit_state = 4;
+               const int scissor_size = 3;
+               const int state_size = radeonCountStateEmitSize(&rmesa->radeon);
+
+               if (rcommonEnsureCmdBufSpace(&rmesa->radeon,
+                                       state_size + pre_emit_state + scissor_size
+                                       + vertex_size + prim_size + cache_flush_size * 2,
+                                       __FUNCTION__))
+                       rmesa->radeon.swtcl.emit_prediction = radeonCountStateEmitSize(&rmesa->radeon);
+               else
+                       rmesa->radeon.swtcl.emit_prediction = state_size;
+
+               rmesa->radeon.swtcl.emit_prediction += rmesa->radeon.cmdbuf.cs->cdw
+                       + vertex_size + scissor_size + prim_size + cache_flush_size * 2 + pre_emit_state;
+               radeon_print(RADEON_SWRENDER, RADEON_VERBOSE,
+                               "%s, size %d\n",
+                               __func__, rmesa->radeon.cmdbuf.cs->cdw
+                               + vertex_size + scissor_size + prim_size + cache_flush_size * 2 + pre_emit_state);
+       }
+}
+
 
 static GLuint reduced_prim[] = {
        GL_POINTS,
@@ -277,11 +304,21 @@ static void r300RasterPrimitive( GLcontext *ctx, GLuint prim );
 #define HAVE_POLYGONS    1
 #define HAVE_ELTS        1
 
+static void* r300_alloc_verts(r300ContextPtr rmesa, GLuint n, GLuint size)
+{
+       void *rv;
+       do {
+               r300_predict_emit_size( rmesa );
+               rv = rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 );
+       } while (!rv);
+       return rv;
+}
+
 #undef LOCAL_VARS
 #undef ALLOC_VERTS
 #define CTX_ARG r300ContextPtr rmesa
 #define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size
-#define ALLOC_VERTS( n, size ) rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 )
+#define ALLOC_VERTS( n, size ) r300_alloc_verts(rmesa, n, size);
 #define LOCAL_VARS                                             \
    r300ContextPtr rmesa = R300_CONTEXT(ctx);           \
    const char *r300verts = (char *)rmesa->radeon.swtcl.verts;
@@ -467,6 +504,7 @@ static void r300ChooseRenderState( GLcontext *ctx )
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
        GLuint index = 0;
        GLuint flags = ctx->_TriangleCaps;
+       radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __func__);
 
        if (flags & DD_TRI_UNFILLED)      index |= R300_UNFILLED_BIT;
 
@@ -491,20 +529,21 @@ static void r300ChooseRenderState( GLcontext *ctx )
        }
 }
 
-
 void r300RenderStart(GLcontext *ctx)
 {
+       radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __func__);
        r300ContextPtr rmesa = R300_CONTEXT( ctx );
 
        r300ChooseRenderState(ctx);
+
+       r300UpdateShaders(rmesa);
+
        r300PrepareVertices(ctx);
 
        r300ValidateBuffers(ctx);
 
-       r300UpdateShaders(rmesa);
        r300UpdateShaderStates(rmesa);
 
-       r300EmitCacheFlush(rmesa);
 
        /* investigate if we can put back flush optimisation if needed */
        if (rmesa->radeon.dma.flush != NULL) {
@@ -519,6 +558,7 @@ void r300RenderFinish(GLcontext *ctx)
 static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim )
 {
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
+       radeon_print(RADEON_SWRENDER, RADEON_TRACE, "%s\n", __func__);
 
        if (rmesa->radeon.swtcl.hw_primitive != hwprim) {
                R300_NEWPRIM( rmesa );
@@ -531,6 +571,7 @@ void r300RenderPrimitive(GLcontext *ctx, GLenum prim)
 
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
        rmesa->radeon.swtcl.render_primitive = prim;
+       radeon_print(RADEON_SWRENDER, RADEON_TRACE, "%s\n", __func__);
 
        if ((prim == GL_TRIANGLES) && (ctx->_TriangleCaps & DD_TRI_UNFILLED))
                return;
@@ -540,6 +581,8 @@ void r300RenderPrimitive(GLcontext *ctx, GLenum prim)
 
 void r300ResetLineStipple(GLcontext *ctx)
 {
+       if (RADEON_DEBUG & RADEON_VERTS)
+               fprintf(stderr, "%s\n", __func__);
 }
 
 void r300InitSwtcl(GLcontext *ctx)
@@ -547,11 +590,13 @@ void r300InitSwtcl(GLcontext *ctx)
        TNLcontext *tnl = TNL_CONTEXT(ctx);
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
        static int firsttime = 1;
+       radeon_print(RADEON_SWRENDER, RADEON_NORMAL, "%s\n", __func__);
 
        if (firsttime) {
                init_rast_tab();
                firsttime = 0;
        }
+       rmesa->radeon.swtcl.emit_prediction = 0;
 
        tnl->Driver.Render.Start = r300RenderStart;
        tnl->Driver.Render.Finish = r300RenderFinish;
@@ -584,8 +629,8 @@ static void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct r
 {
        BATCH_LOCALS(&rmesa->radeon);
 
-       if (RADEON_DEBUG & DEBUG_VERTS)
-               fprintf(stderr, "%s:  vertex_size %d, offset 0x%x \n",
+       radeon_print(RADEON_SWRENDER, RADEON_TRACE,
+               "%s:  vertex_size %d, offset 0x%x \n",
                        __FUNCTION__, vertex_size, offset);
 
        BEGIN_BATCH(7);
@@ -600,6 +645,8 @@ static void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vert
 {
        BATCH_LOCALS(&rmesa->radeon);
        int type, num_verts;
+       if (RADEON_DEBUG & RADEON_VERTS)
+               fprintf(stderr, "%s\n", __func__);
 
        type = r300PrimitiveType(rmesa, primitive);
        num_verts = r300NumVerts(rmesa, vertex_nr, primitive);
@@ -612,21 +659,26 @@ static void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vert
 
 void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
 {
+       radeon_print(RADEON_SWRENDER, RADEON_TRACE, "%s\n", __func__);
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
 
-       rcommonEnsureCmdBufSpace(&rmesa->radeon,
-                          rmesa->radeon.hw.max_state_size + (12*sizeof(int)),
-                          __FUNCTION__);
+       r300EmitCacheFlush(rmesa);
+
        radeonEmitState(&rmesa->radeon);
     r300_emit_scissor(ctx);
        r300EmitVertexAOS(rmesa,
                        rmesa->radeon.swtcl.vertex_size,
-                       rmesa->radeon.dma.current,
+                       first_elem(&rmesa->radeon.dma.reserved)->bo,
                        current_offset);
 
        r300EmitVbufPrim(rmesa,
                   rmesa->radeon.swtcl.hw_primitive,
                   rmesa->radeon.swtcl.numverts);
        r300EmitCacheFlush(rmesa);
+       if ( rmesa->radeon.swtcl.emit_prediction < rmesa->radeon.cmdbuf.cs->cdw )
+               WARN_ONCE("Rendering was %d commands larger than predicted size."
+                       " We might overflow  command buffer.\n",
+                       rmesa->radeon.cmdbuf.cs->cdw - rmesa->radeon.swtcl.emit_prediction );
+       rmesa->radeon.swtcl.emit_prediction = 0;
        COMMIT_BATCH();
 }