* are available, you will also be adding an rmesa->state.max_state_size because
* r200EmitState is called from within r200EmitVbufPrim and r200FlushElts.
*/
-#define AOS_BUFSZ(nr) ((3 + ((nr / 2) * 3) + ((nr & 1) * 2)) * sizeof(int))
-#define VERT_AOS_BUFSZ (5 * sizeof(int))
+#define AOS_BUFSZ(nr) ((3 + ((nr / 2) * 3) + ((nr & 1) * 2) + nr*2))
+#define VERT_AOS_BUFSZ (5)
#define ELTS_BUFSZ(nr) (12 + nr * 2)
-#define VBUF_BUFSZ (3 * sizeof(int))
+#define VBUF_BUFSZ (3)
+#define SCISSOR_BUFSZ (8)
+#define INDEX_BUFSZ (8+2)
static inline uint32_t cmdpacket3(int cmd_type)
{
r200EmitPrim( ctx, prim, hwprim, start, count ); \
(void) rmesa; } while (0)
+#define MAX_CONVERSION_SIZE 40
/* Try & join small primitives
*/
#if 0
}
}
+/**
+ * Predict total emit size for next rendering operation so there is no flush in middle of rendering
+ * Prediction has to aim towards the best possible value that is worse than worst case scenario
+ */
+static void r200EnsureEmitSize( GLcontext * ctx , GLubyte* vimap_rev )
+{
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ struct vertex_buffer *VB = &tnl->vb;
+ GLuint space_required;
+ GLuint nr_aos = 0;
+ int i;
+ /* predict number of aos to emit */
+ for (i = 0; i < 15; ++i)
+ {
+ if (vimap_rev[i] != 255)
+ {
+ ++nr_aos;
+ }
+ }
+
+ {
+ /* count the prediction for state size */
+ space_required = radeonCountEmitSize( &rmesa->radeon );
+ /* vtx may be changed in r200EmitArrays so account for it if not dirty */
+ if (!rmesa->hw.vtx.dirty)
+ space_required += rmesa->hw.vtx.check(rmesa->radeon.glCtx, &rmesa->hw.vtx);
+ /* predict size for elements */
+ for (i = 0; i < VB->PrimitiveCount; ++i)
+ {
+ if (!VB->Primitive[i].count)
+ continue;
+ /* If primitive.count is less than MAX_CONVERSION_SIZE
+ rendering code may decide convert to elts.
+ In that case we have to make pessimistic prediction.
+ and use larger of 2 paths. */
+ const GLuint elts = ELTS_BUFSZ(nr_aos);
+ const GLuint index = INDEX_BUFSZ;
+ const GLuint vbuf = VBUF_BUFSZ;
+ if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE)
+ || vbuf > index + elts)
+ space_required += vbuf;
+ else
+ space_required += index + elts;
+ space_required += AOS_BUFSZ(nr_aos);
+ }
+ space_required += SCISSOR_BUFSZ;
+ }
+ /* flush the buffer in case we need more than is left. */
+ rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required, __FUNCTION__);
+}
+
/**********************************************************************/
/* Render pipeline stage */
/* Do the actual work:
*/
radeonReleaseArrays( ctx, ~0 /* stage->changed_inputs */ );
+ r200EnsureEmitSize( ctx, vimap_rev );
r200EmitArrays( ctx, vimap_rev );
rmesa->tcl.Elts = VB->Elts;
}
}
+/**
+ * Count total size for next state emit.
+ **/
+GLuint radeonCountEmitSize(radeonContextPtr radeon)
+{
+ struct radeon_state_atom *atom;
+ int dwords = 0;
+ /* check if we are going to emit full state */
+ if (radeon->cmdbuf.cs->cdw && !radeon->hw.all_dirty) {
+ if (!radeon->hw.is_dirty)
+ return dwords;
+ foreach(atom, &radeon->hw.atomlist) {
+ if (atom->dirty)
+ dwords += atom->check(radeon->glCtx, atom);
+ }
+ } else {
+ foreach(atom, &radeon->hw.atomlist) {
+ dwords += atom->check(radeon->glCtx, atom);
+ }
+ }
+ return dwords;
+}
+
static INLINE void radeonEmitAtoms(radeonContextPtr radeon, GLboolean dirty)
{
BATCH_LOCALS(radeon);