r300: rework index buffer setup
authorMaciej Cencora <m.cencora@gmail.com>
Fri, 14 Aug 2009 14:59:26 +0000 (16:59 +0200)
committerMaciej Cencora <m.cencora@gmail.com>
Fri, 14 Aug 2009 15:10:15 +0000 (17:10 +0200)
Copy elements directly to DMA bo to get rid of one memcpy, and prepare for using VBOs for index buffer.

src/mesa/drivers/dri/r300/r300_context.h
src/mesa/drivers/dri/r300/r300_draw.c
src/mesa/drivers/dri/r300/r300_render.c

index 09de898748e0b349b2681c9baa04eaa1eb9c5c6d..d6204174229a5218c1522a36308336105b369611 100644 (file)
@@ -498,9 +498,10 @@ struct r300_vertex_buffer {
 };
 
 struct r300_index_buffer {
-       GLvoid *ptr;
+       struct radeon_bo *bo;
+       int bo_offset;
+
        GLboolean is_32bit;
-       GLboolean free_needed;
        GLuint count;
 };
 
index 99c73d27a2cc8c90853b7d11643acbe334ac5aa2..1d6e6db773db1ce246933bb9b4434f8773f08a8a 100644 (file)
 #include "swrast/swrast.h"
 #include "swrast_setup/swrast_setup.h"
 
+
+static int getTypeSize(GLenum type)
+{
+       switch (type) {
+               case GL_DOUBLE:
+                       return sizeof(GLdouble);
+               case GL_FLOAT:
+                       return sizeof(GLfloat);
+               case GL_INT:
+                       return sizeof(GLint);
+               case GL_UNSIGNED_INT:
+                       return sizeof(GLuint);
+               case GL_SHORT:
+                       return sizeof(GLshort);
+               case GL_UNSIGNED_SHORT:
+                       return sizeof(GLushort);
+               case GL_BYTE:
+                       return sizeof(GLbyte);
+               case GL_UNSIGNED_BYTE:
+                       return sizeof(GLubyte);
+               default:
+                       assert(0);
+                       return 0;
+       }
+}
+
 static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf)
 {
        r300ContextPtr r300 = R300_CONTEXT(ctx);
-       struct r300_index_buffer *ind_buf = &r300->ind_buf;
        GLvoid *src_ptr;
-       GLboolean mapped_bo = GL_FALSE;
+       GLuint *out;
+       int i;
 
-       if (!mesa_ind_buf) {
-               ind_buf->ptr = NULL;
-               return;
-       }
-
-       ind_buf->count = mesa_ind_buf->count;
        if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) {
                ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
-               mapped_bo = GL_TRUE;
                assert(mesa_ind_buf->obj->Pointer != NULL);
        }
        src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr);
 
        if (mesa_ind_buf->type == GL_UNSIGNED_BYTE) {
+               GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1);
                GLubyte *in = (GLubyte *)src_ptr;
-               GLuint *out = _mesa_malloc(sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1));
-               int i;
 
-               ind_buf->ptr = out;
+               radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, &r300->ind_buf.bo_offset, size, 4);
+
+               assert(r300->ind_buf.bo->ptr != NULL);
+               out = (GLuint *)ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset);
 
                for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) {
                        *out++ = in[i] | in[i + 1] << 16;
@@ -82,16 +103,15 @@ static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer
                        *out++ = in[i];
                }
 
-               ind_buf->free_needed = GL_TRUE;
-               ind_buf->is_32bit = GL_FALSE;
-       } else if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) {
 #if MESA_BIG_ENDIAN
+       } else { /* if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) */
                GLushort *in = (GLushort *)src_ptr;
-               GLuint *out = _mesa_malloc(sizeof(GLushort) *
-                                          ((mesa_ind_buf->count + 1) & ~1));
-               int i;
+               size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1);
+
+               radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, &r300->ind_buf.bo_offet, size, 4);
 
-               ind_buf->ptr = out;
+               assert(r300->ind_buf.bo->ptr != NULL)
+               out = (GLuint *)ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset);
 
                for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) {
                        *out++ = in[i] | in[i + 1] << 16;
@@ -100,46 +120,52 @@ static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer
                if (i < mesa_ind_buf->count) {
                        *out++ = in[i];
                }
-
-               ind_buf->free_needed = GL_TRUE;
-#else
-               ind_buf->ptr = src_ptr;
-               ind_buf->free_needed = GL_FALSE;
 #endif
-               ind_buf->is_32bit = GL_FALSE;
-       } else {
-               ind_buf->ptr = src_ptr;
-               ind_buf->free_needed = GL_FALSE;
-               ind_buf->is_32bit = GL_TRUE;
        }
 
-       if (mapped_bo) {
-               ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
-       }
+       r300->ind_buf.is_32bit = GL_FALSE;
+       r300->ind_buf.count = mesa_ind_buf->count;
 }
 
-static int getTypeSize(GLenum type)
+
+static void r300SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf)
 {
-       switch (type) {
-               case GL_DOUBLE:
-                       return sizeof(GLdouble);
-               case GL_FLOAT:
-                       return sizeof(GLfloat);
-               case GL_INT:
-                       return sizeof(GLint);
-               case GL_UNSIGNED_INT:
-                       return sizeof(GLuint);
-               case GL_SHORT:
-                       return sizeof(GLshort);
-               case GL_UNSIGNED_SHORT:
-                       return sizeof(GLushort);
-               case GL_BYTE:
-                       return sizeof(GLbyte);
-               case GL_UNSIGNED_BYTE:
-                       return sizeof(GLubyte);
-               default:
-                       assert(0);
-                       return 0;
+       r300ContextPtr r300 = R300_CONTEXT(ctx);
+       GLboolean mapped_named_bo = GL_FALSE;
+
+       if (!mesa_ind_buf) {
+               r300->ind_buf.bo = NULL;
+               return;
+       }
+
+#if MESA_BIG_ENDIAN
+       if (mesa_ind_buf->type == GL_UNSIGNED_INT) {
+#else
+       if (mesa_ind_buf->type != GL_UNSIGNED_BYTE) {
+#endif
+               const GLvoid *src_ptr;
+               GLvoid *dst_ptr;
+
+               if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) {
+                       ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+                       assert(mesa_ind_buf->obj->Pointer != NULL);
+                       mapped_named_bo = GL_TRUE;
+               }
+
+               src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr);
+
+               const GLuint size = mesa_ind_buf->count * getTypeSize(mesa_ind_buf->type);
+
+               radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, &r300->ind_buf.bo_offset, size, 4);
+
+               assert(r300->ind_buf.bo->ptr != NULL);
+               dst_ptr = ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset);
+               _mesa_memcpy(dst_ptr, src_ptr, size);
+
+               r300->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT);
+               r300->ind_buf.count = mesa_ind_buf->count;
+       } else {
+               r300FixupIndexBuffer(ctx, mesa_ind_buf);
        }
 }
 
@@ -473,13 +499,22 @@ static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *ar
                                                                                          RADEON_GEM_DOMAIN_GTT, 0);
                        }
                }
-
                r300->radeon.tcl.aos_count = vbuf->num_attribs;
+
+               if (r300->ind_buf.bo) {
+                       radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs,
+                                                                                 r300->ind_buf.bo,
+                                                                                 RADEON_GEM_DOMAIN_GTT, 0);
+               }
        }
 }
 
 static void r300FreeData(GLcontext *ctx)
 {
+       /* Need to zero tcl.aos[n].bo and tcl.elt_dma_bo
+        * to prevent double unref in radeonReleaseArrays
+        * called during context destroy
+        */
        r300ContextPtr r300 = R300_CONTEXT(ctx);
        {
                int i;
@@ -493,15 +528,9 @@ static void r300FreeData(GLcontext *ctx)
        }
 
        {
-               struct r300_index_buffer *ind_buf = &R300_CONTEXT(ctx)->ind_buf;
-               if (ind_buf->free_needed) {
-                       _mesa_free(ind_buf->ptr);
-               }
-
-               if (r300->radeon.tcl.elt_dma_bo) {
-                       radeon_bo_unref(r300->radeon.tcl.elt_dma_bo);
+               if (r300->ind_buf.bo != NULL) {
+                       radeon_bo_unref(r300->ind_buf.bo);
                }
-               r300->radeon.tcl.elt_dma_bo = NULL;
        }
 }
 
@@ -526,7 +555,7 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx,
 
        r300SwitchFallback(ctx, R300_FALLBACK_INVALID_BUFFERS, !r300ValidateBuffers(ctx));
 
-       r300FixupIndexBuffer(ctx, ib);
+       r300SetupIndexBuffer(ctx, ib);
 
        /* ensure we have the cmd buf space in advance to cover
         * the state + DMA AOS pointers */
index 22b0d316cfdea8b2c015e99f9c93442854ce0eb8..196cb47fef74a2c8410999ce322a674fb589cd46 100644 (file)
@@ -172,64 +172,42 @@ int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
        return num_verts - verts_off;
 }
 
-static void r300EmitElts(GLcontext * ctx, unsigned long n_elts)
-{
-       r300ContextPtr rmesa = R300_CONTEXT(ctx);
-       void *out;
-       GLuint size;
-
-       size = ((rmesa->ind_buf.is_32bit ? 4 : 2) * n_elts + 3) & ~3;
-
-       radeonAllocDmaRegion(&rmesa->radeon, &rmesa->radeon.tcl.elt_dma_bo,
-                            &rmesa->radeon.tcl.elt_dma_offset, size, 4);
-       radeon_bo_map(rmesa->radeon.tcl.elt_dma_bo, 1);
-       out = rmesa->radeon.tcl.elt_dma_bo->ptr + rmesa->radeon.tcl.elt_dma_offset;
-       memcpy(out, rmesa->ind_buf.ptr, size);
-       radeon_bo_unmap(rmesa->radeon.tcl.elt_dma_bo);
-}
-
 static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type)
 {
        BATCH_LOCALS(&rmesa->radeon);
+       int size;
 
-    r300_emit_scissor(rmesa->radeon.glCtx);
-       if (vertex_count > 0) {
-               int size;
-
-               BEGIN_BATCH(10);
-               OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
-               if (rmesa->ind_buf.is_32bit) {
-                       size = vertex_count;
-                       OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
-                         ((vertex_count + 0) << 16) | type |
-                         R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
-               } else {
-                       size = (vertex_count + 1) >> 1;
-                       OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
-                          ((vertex_count + 0) << 16) | type);
-               }
+       r300_emit_scissor(rmesa->radeon.glCtx);
 
-               if (!rmesa->radeon.radeonScreen->kernel_mm) {
-                       OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
-                       OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
-                                (R300_VAP_PORT_IDX0 >> 2));
-                       OUT_BATCH_RELOC(rmesa->radeon.tcl.elt_dma_offset,
-                                       rmesa->radeon.tcl.elt_dma_bo,
-                                       rmesa->radeon.tcl.elt_dma_offset,
-                                       RADEON_GEM_DOMAIN_GTT, 0, 0);
-                       OUT_BATCH(size);
-               } else {
-                       OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
-                       OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
-                                (R300_VAP_PORT_IDX0 >> 2));
-                       OUT_BATCH(rmesa->radeon.tcl.elt_dma_offset);
-                       OUT_BATCH(size);
-                       radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
-                                             rmesa->radeon.tcl.elt_dma_bo,
-                                             RADEON_GEM_DOMAIN_GTT, 0, 0);
-               }
-               END_BATCH();
+       BEGIN_BATCH(10);
+       OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
+       if (rmesa->ind_buf.is_32bit) {
+               size = vertex_count;
+               OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
+                 (vertex_count << 16) | type |
+                 R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
+       } else {
+               size = (vertex_count + 1) >> 1;
+               OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
+                  (vertex_count << 16) | type);
+       }
+
+       if (!rmesa->radeon.radeonScreen->kernel_mm) {
+               OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
+               OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
+                                (R300_VAP_PORT_IDX0 >> 2));
+               OUT_BATCH_RELOC(0, rmesa->ind_buf.bo, rmesa->ind_buf.bo_offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
+               OUT_BATCH(size);
+       } else {
+               OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
+               OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
+                                (R300_VAP_PORT_IDX0 >> 2));
+               OUT_BATCH(rmesa->ind_buf.bo_offset);
+               OUT_BATCH(size);
+               radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+                                     rmesa->ind_buf.bo, RADEON_GEM_DOMAIN_GTT, 0, 0);
        }
+       END_BATCH();
 }
 
 static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
@@ -365,8 +343,7 @@ void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
         */
        rcommonEnsureCmdBufSpace(&rmesa->radeon, 128, __FUNCTION__);
 
-       if (rmesa->ind_buf.ptr) {
-               r300EmitElts(ctx, num_verts);
+       if (rmesa->ind_buf.bo) {
                r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, 0);
                if (rmesa->radeon.radeonScreen->kernel_mm) {
                        BEGIN_BATCH_NO_AUTOSTATE(2);