nouveau: new bufferobj code.
authorBen Skeggs <darktama@iinet.net.au>
Mon, 5 Feb 2007 13:39:50 +0000 (00:39 +1100)
committerBen Skeggs <darktama@iinet.net.au>
Mon, 5 Feb 2007 13:39:50 +0000 (00:39 +1100)
The old code suffered from a number of issues, the most severe being that
with the Mesa VBO merge even swtcl used the driver's bufferobj interface.
On most VBO types (or non-AGP cards) the buffer ended up in vram, and
killed swtcl performance greatly.  All bufferobj's start in system memory
now, until they get referenced as a "real" VBO.

The other big change is that only potentially "damaged" areas are
uploaded/downloaded to/from the hardware.

src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
src/mesa/drivers/dri/nouveau/nouveau_bufferobj.h
src/mesa/drivers/dri/nouveau/nv30_fragprog.c

index d36196aeef28277aeb96e00c55bb38bf18efc194..684ed7b017d5fe366ce19e375c94d1647207c280 100644 (file)
 #include "nouveau_object.h"
 #include "nouveau_msg.h"
 
+#define NOUVEAU_MEM_FREE(mem) do {      \
+       nouveau_mem_free(ctx, (mem));   \
+       (mem) = NULL;                   \
+} while(0)
+
 #define DEBUG(fmt,args...) do {                \
        if (NOUVEAU_DEBUG & DEBUG_BUFFEROBJ) { \
                fprintf(stderr, "%s: "fmt, __func__, ##args);  \
        }                                      \
 } while(0)
 
-/* Wrapper for nouveau_mem_gpu_offset_get() that marks the bufferobj dirty
- * if the GPU modifies the data.
- */
+static GLboolean
+nouveau_bo_download_from_screen(GLcontext *ctx,        GLuint offset, GLuint size,
+                                               struct gl_buffer_object *bo)
+{
+       nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx);
+       nouveau_buffer_object *nbo = (nouveau_buffer_object *)bo;
+       nouveau_mem *in_mem;
+
+       DEBUG("bo=%p, offset=%d, size=%d\n", bo, offset, size);
+
+       /* If there's a permanent backing store, blit directly into it */
+       if (nbo->cpu_mem) {
+               if (nbo->cpu_mem != nbo->gpu_mem) {
+                       DEBUG("..cpu_mem\n");
+                       nouveau_memformat_flat_emit(ctx, nbo->cpu_mem,
+                                                   nbo->gpu_mem,
+                                                   offset, offset, size);
+               }
+       } else {
+               DEBUG("..sys_mem\n");
+               in_mem = nouveau_mem_alloc(ctx, NOUVEAU_MEM_AGP, size, 0);
+               if (in_mem) {
+                       DEBUG("....via AGP\n");
+                       /* otherwise, try blitting to faster memory and
+                        * copying from there
+                        */
+                       nouveau_memformat_flat_emit(ctx, in_mem, nbo->gpu_mem,
+                                                        0, offset, size);
+                       nouveau_notifier_wait_nop(ctx, nmesa->syncNotifier,
+                                                      NvSubMemFormat);
+                       _mesa_memcpy(nbo->cpu_mem_sys + offset,
+                                       in_mem->map, size);
+                       NOUVEAU_MEM_FREE(in_mem);
+               } else {
+                       DEBUG("....direct VRAM copy\n");
+                       /* worst case, copy directly from vram */
+                       _mesa_memcpy(nbo->cpu_mem_sys + offset,
+                                    nbo->gpu_mem + offset,
+                                    size);
+               }
+       }
+
+       return GL_TRUE;
+}
+
+static GLboolean
+nouveau_bo_upload_to_screen(GLcontext *ctx, GLuint offset, GLuint size,
+                                           struct gl_buffer_object *bo)
+{
+       nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx);
+       nouveau_buffer_object *nbo = (nouveau_buffer_object *)bo;
+       nouveau_mem *out_mem;
+
+       DEBUG("bo=%p, offset=%d, size=%d\n", bo, offset, size);
+
+       if (nbo->cpu_mem) {
+               if (nbo->cpu_mem != nbo->gpu_mem) {
+                       DEBUG("..cpu_mem\n");
+                       nouveau_memformat_flat_emit(ctx, nbo->gpu_mem,
+                                                   nbo->cpu_mem,
+                                                   offset, offset, size);
+               }
+       } else {
+               out_mem = nouveau_mem_alloc(ctx, NOUVEAU_MEM_AGP |
+                                                NOUVEAU_MEM_MAPPED,
+                                                size, 0);
+               if (out_mem) {
+                       DEBUG("....via AGP\n");
+                       _mesa_memcpy(out_mem->map,
+                                       nbo->cpu_mem_sys + offset, size);
+                       nouveau_memformat_flat_emit(ctx, nbo->gpu_mem, out_mem,
+                                                   offset, 0, size);
+                       nouveau_notifier_wait_nop(ctx, nmesa->syncNotifier,
+                                                      NvSubMemFormat);
+                       NOUVEAU_MEM_FREE(out_mem);
+               } else {
+                       DEBUG("....direct VRAM copy\n");
+                       _mesa_memcpy(nbo->gpu_mem->map + offset,
+                                    nbo->cpu_mem_sys + offset,
+                                    size);
+               }
+       }
+
+       return GL_TRUE;
+}
+
+GLboolean
+nouveau_bo_move_in(GLcontext *ctx, struct gl_buffer_object *bo)
+{
+       nouveau_buffer_object *nbo = (nouveau_buffer_object *)bo;
+
+       DEBUG("bo=%p\n", bo);
+
+       if (bo->OnCard)
+               return GL_TRUE;
+       assert(nbo->gpu_mem_flags);
+
+       nbo->gpu_mem = nouveau_mem_alloc(ctx, nbo->gpu_mem_flags |
+                                             NOUVEAU_MEM_MAPPED,
+                                             bo->Size, 0);
+       assert(nbo->gpu_mem);
+
+       if (nbo->cpu_mem_flags) {
+               if ((nbo->cpu_mem_flags|NOUVEAU_MEM_MAPPED) != nbo->gpu_mem->type) {
+                       DEBUG("..need cpu_mem buffer\n");
+
+                       nbo->cpu_mem = nouveau_mem_alloc(ctx,
+                                                        nbo->cpu_mem_flags |
+                                                        NOUVEAU_MEM_MAPPED,
+                                                        bo->Size, 0);
+
+                       if (nbo->cpu_mem) {
+                               DEBUG("....alloc ok, kill sys_mem buffer\n");
+                               _mesa_memcpy(nbo->cpu_mem->map,
+                                            nbo->cpu_mem_sys, bo->Size);
+                               FREE(nbo->cpu_mem_sys);
+                       }
+               } else {
+                       DEBUG("..cpu direct access to GPU buffer\n");
+                       nbo->cpu_mem = nbo->gpu_mem;
+               }
+       }
+       nouveau_bo_upload_to_screen(ctx, 0, bo->Size, bo);
+
+       bo->OnCard = GL_TRUE;
+       return GL_TRUE;
+}
+
+GLboolean
+nouveau_bo_move_out(GLcontext *ctx, struct gl_buffer_object *bo)
+{
+       nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx);
+       nouveau_buffer_object *nbo = (nouveau_buffer_object *)bo;
+       GLuint nr_dirty;
+
+       DEBUG("bo=%p\n", bo);
+       if (!bo->OnCard)
+               return GL_TRUE;
+
+       nr_dirty = nouveau_bo_download_dirty(ctx, bo);
+       if (nbo->cpu_mem) {
+               if (nr_dirty && nbo->cpu_mem != nbo->gpu_mem)
+                       nouveau_notifier_wait_nop(ctx, nmesa->syncNotifier,
+                                                      NvSubMemFormat);
+               DEBUG("..destroy cpu_mem buffer\n");
+               nbo->cpu_mem_sys = malloc(bo->Size);
+               assert(nbo->cpu_mem_sys);
+               _mesa_memcpy(nbo->cpu_mem_sys, nbo->cpu_mem->map, bo->Size);
+               if (nbo->cpu_mem == nbo->gpu_mem)
+                       nbo->cpu_mem = NULL;
+               else
+                       NOUVEAU_MEM_FREE(nbo->cpu_mem);
+       }
+       NOUVEAU_MEM_FREE(nbo->gpu_mem);
+
+       bo->OnCard = GL_FALSE;
+       return GL_TRUE;
+}
+
+static void
+nouveau_bo_choose_storage_method(GLcontext *ctx, GLenum usage,
+                                                struct gl_buffer_object *bo)
+{
+       nouveau_buffer_object *nbo = (nouveau_buffer_object *)bo;
+       GLuint gpu_type = 0;
+       GLuint cpu_type = 0;
+
+       switch (usage) {
+       /* Client source, changes often, used by GL many times */
+       case GL_DYNAMIC_DRAW_ARB:
+               gpu_type = NOUVEAU_MEM_AGP | NOUVEAU_MEM_FB_ACCEPTABLE;
+               cpu_type = NOUVEAU_MEM_AGP;
+               break;
+       /* GL source, changes often, client reads many times */
+       case GL_DYNAMIC_READ_ARB:
+       /* Client source, specified once, used by GL many times */
+       case GL_STATIC_DRAW_ARB:
+       /* GL source, specified once, client reads many times */
+       case GL_STATIC_READ_ARB:
+       /* Client source, specified once, used by GL a few times */
+       case GL_STREAM_DRAW_ARB:
+       /* GL source, specified once, client reads a few times */
+       case GL_STREAM_READ_ARB:
+       /* GL source, changes often, used by GL many times*/
+       case GL_DYNAMIC_COPY_ARB:
+       /* GL source, specified once, used by GL many times */
+       case GL_STATIC_COPY_ARB:
+       /* GL source, specified once, used by GL a few times */
+       case GL_STREAM_COPY_ARB:
+               gpu_type = NOUVEAU_MEM_FB;
+               break;
+       default: 
+               assert(0);
+       }
+
+       nbo->gpu_mem_flags = gpu_type;
+       nbo->cpu_mem_flags = cpu_type;
+       nbo->usage         = usage;
+}
+
+void
+nouveau_bo_init_storage(GLcontext *ctx,        GLuint valid_gpu_access,
+                                       GLsizeiptrARB size,
+                                       const GLvoid *data,
+                                       GLenum usage,
+                                       struct gl_buffer_object *bo)
+{
+       nouveau_buffer_object *nbo = (nouveau_buffer_object *)bo;
+
+       DEBUG("bo=%p\n", bo);
+
+       /* Free up previous buffers if we can't reuse them */
+       if (nbo->usage != usage ||
+                       (nbo->gpu_mem && (nbo->gpu_mem->size != size))) {
+               if (nbo->cpu_mem_sys)
+                       FREE(nbo->cpu_mem_sys);
+               if (nbo->cpu_mem) {
+                       if (nbo->cpu_mem != nbo->gpu_mem)
+                               NOUVEAU_MEM_FREE(nbo->cpu_mem);
+                       else
+                               nbo->cpu_mem = NULL;
+               }
+               if (nbo->gpu_mem)
+                       NOUVEAU_MEM_FREE(nbo->gpu_mem);
+
+               bo->OnCard = GL_FALSE;
+               nbo->cpu_mem_sys = calloc(1, size);
+       }
+
+       nouveau_bo_choose_storage_method(ctx, usage, bo);
+       /* Force off flags that may not be ok for a given buffer */
+       nbo->gpu_mem_flags &= valid_gpu_access;
+
+       bo->Usage  = usage;
+       bo->Size   = size;
+
+       if (data) {
+               GLvoid *map = nouveau_bo_map(ctx, GL_WRITE_ONLY_ARB, bo);
+               _mesa_memcpy(map, data, size);
+               nouveau_bo_dirty_all(ctx, GL_FALSE, bo);
+               nouveau_bo_unmap(ctx, bo);
+       }
+}
+
+void *
+nouveau_bo_map(GLcontext *ctx, GLenum access, struct gl_buffer_object *bo)
+{
+       nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx);
+       nouveau_buffer_object *nbo = (nouveau_buffer_object *)bo;
+
+       DEBUG("bo=%p, access=%s\n", bo, _mesa_lookup_enum_by_nr(access));
+
+       if (bo->OnCard && 
+               (access == GL_READ_ONLY_ARB || access == GL_READ_WRITE_ARB)) {
+               GLuint nr_dirty;
+
+               DEBUG("..on card\n");
+               nr_dirty = nouveau_bo_download_dirty(ctx, bo);
+
+               /* nouveau_bo_download_dirty won't wait unless it needs to
+                * free a temp buffer, which isn't the case if cpu_mem is
+                * present.
+                */
+               if (nr_dirty && nbo->cpu_mem && nbo->cpu_mem != nbo->gpu_mem)
+                       nouveau_notifier_wait_nop(ctx, nmesa->syncNotifier,
+                                                      NvSubMemFormat);
+       }
+
+       if (nbo->cpu_mem) {
+               DEBUG("..access via cpu_mem\n");
+               return nbo->cpu_mem->map;
+       } else {
+               DEBUG("..access via cpu_mem_sys\n");
+               return nbo->cpu_mem_sys;
+       }
+}
+
+void
+nouveau_bo_unmap(GLcontext *ctx, struct gl_buffer_object *bo)
+{
+       DEBUG("unmap bo=%p\n", bo);
+}
+
 uint32_t
-nouveau_bufferobj_gpu_ref(GLcontext *ctx, GLenum access,
-                         struct gl_buffer_object *obj)
+nouveau_bo_gpu_ref(GLcontext *ctx, struct gl_buffer_object *bo)
 {
-       nouveau_buffer_object *nbo = (nouveau_buffer_object *)obj;
+       nouveau_buffer_object *nbo = (nouveau_buffer_object *)bo;
 
-       DEBUG("obj=%p, access=%s\n", obj, _mesa_lookup_enum_by_nr(access));
+       assert(nbo->mapped == GL_FALSE);
 
-       if (access == GL_WRITE_ONLY_ARB || access == GL_READ_WRITE_ARB)
-               nbo->gpu_dirty = GL_TRUE;
+       DEBUG("gpu_ref\n");
+       
+       if (!bo->OnCard) {
+               nouveau_bo_move_in(ctx, bo);
+               bo->OnCard = GL_TRUE;
+       }
+       nouveau_bo_upload_dirty(ctx, bo);
 
        return nouveau_mem_gpu_offset_get(ctx, nbo->gpu_mem);
 }
 
+void
+nouveau_bo_dirty_linear(GLcontext *ctx, GLboolean on_card,
+                       uint32_t offset, uint32_t size,
+                       struct gl_buffer_object *bo)
+{
+       nouveau_buffer_object *nbo = (nouveau_buffer_object *)bo;
+       nouveau_bufferobj_dirty *dirty;
+       uint32_t start = offset;
+       uint32_t end = offset + size;
+       int i;
+
+       if (nbo->cpu_mem == nbo->gpu_mem)
+               return;
+
+       dirty = on_card ? &nbo->gpu_dirty : &nbo->cpu_dirty;
+
+       DEBUG("on_card=%d, offset=%d, size=%d, bo=%p\n",
+                       on_card, offset, size, bo);
+
+       for (i=0; i<dirty->nr_dirty; i++) {
+               nouveau_bufferobj_region *r = &dirty->dirty[i];
+
+               /* already dirty */
+               if (start >= r->start && end <= r->end) {
+                       DEBUG("..already dirty\n");
+                       return;
+               }
+
+               /* add to the end of a region */
+               if (start >= r->start && start <= r->end) {
+                       if (end > r->end) {
+                               DEBUG("..extend end of region\n");
+                               r->end = end;
+                               return;
+                       }
+               }
+
+               /* add to the start of a region */
+               if (start < r->start && end >= r->end) {
+                       DEBUG("..extend start of region\n");
+                       r->start = start;
+                       /* .. and to the end */
+                       if (end > r->end) {
+                               DEBUG("....and end\n");
+                               r->end = end;
+                       }
+                       return;
+               }
+       }
+
+       /* new region */
+       DEBUG("..new dirty\n");
+       dirty->nr_dirty++;
+       dirty->dirty = realloc(dirty->dirty,
+                              sizeof(nouveau_bufferobj_region) *
+                              dirty->nr_dirty);
+       dirty->dirty[dirty->nr_dirty - 1].start = start;
+       dirty->dirty[dirty->nr_dirty - 1].end   = end;
+}
+
+void
+nouveau_bo_dirty_all(GLcontext *ctx, GLboolean on_card,
+                    struct gl_buffer_object *bo)
+{
+       nouveau_buffer_object *nbo = (nouveau_buffer_object *)bo;
+       nouveau_bufferobj_dirty *dirty;
+
+       dirty = on_card ? &nbo->gpu_dirty : &nbo->cpu_dirty;
+       
+       DEBUG("dirty all\n");
+       if (dirty->nr_dirty) {
+               FREE(dirty->dirty);
+               dirty->dirty    = NULL;
+               dirty->nr_dirty = 0;
+       }
+
+       nouveau_bo_dirty_linear(ctx, on_card, 0, bo->Size, bo);
+}
+
+GLuint
+nouveau_bo_upload_dirty(GLcontext *ctx, struct gl_buffer_object *bo)
+{
+       nouveau_buffer_object *nbo = (nouveau_buffer_object *)bo;
+       nouveau_bufferobj_dirty *dirty = &nbo->cpu_dirty;
+       GLuint nr_dirty;
+       int i;
+
+       nr_dirty = dirty->nr_dirty;
+       if (!nr_dirty) {
+               DEBUG("clean\n");
+               return nr_dirty;
+       }
+
+       for (i=0; i<nr_dirty; i++) {
+               nouveau_bufferobj_region *r = &dirty->dirty[i];
+
+               DEBUG("dirty %d: o=0x%08x, s=0x%08x\n",
+                               i, r->start, r->end - r->start);
+               nouveau_bo_upload_to_screen(ctx,
+                                           r->start, r->end - r->start, bo);
+       }
+
+       FREE(dirty->dirty);
+       dirty->dirty    = NULL;
+       dirty->nr_dirty = 0;
+
+       return nr_dirty;
+}
+
+GLuint
+nouveau_bo_download_dirty(GLcontext *ctx, struct gl_buffer_object *bo)
+{
+       nouveau_buffer_object *nbo = (nouveau_buffer_object *)bo;
+       nouveau_bufferobj_dirty *dirty = &nbo->gpu_dirty;
+       GLuint nr_dirty;
+       int i;
+
+       nr_dirty = dirty->nr_dirty;
+       if (nr_dirty) {
+               DEBUG("clean\n");
+               return nr_dirty;
+       }
+       
+       for (i=0; i<nr_dirty; i++) {
+               nouveau_bufferobj_region *r = &dirty->dirty[i];
+
+               DEBUG("dirty %d: o=0x%08x, s=0x%08x\n",
+                               i, r->start, r->end - r->start);
+               nouveau_bo_download_from_screen(ctx,
+                                               r->start,
+                                               r->end - r->start, bo);
+       }
+
+       FREE(dirty->dirty);
+       dirty->dirty    = NULL;
+       dirty->nr_dirty = 0;
+
+       return nr_dirty;
+}
+
 static void
 nouveauBindBuffer(GLcontext *ctx, GLenum target, struct gl_buffer_object *obj)
 {
@@ -42,10 +471,11 @@ nouveauNewBufferObject(GLcontext *ctx, GLuint buffer, GLenum target)
        nouveau_buffer_object *nbo;
 
        nbo = CALLOC_STRUCT(nouveau_buffer_object_t);
-       DEBUG("name=0x%08x, target=%s, obj=%p\n",
-                       buffer, _mesa_lookup_enum_by_nr(target), nbo);
-       _mesa_initialize_buffer_object(&nbo->mesa, buffer, target);
-       return &nbo->mesa;
+       if (nbo)
+               _mesa_initialize_buffer_object(&nbo->mesa, buffer, target);
+       DEBUG("bo=%p\n", nbo);
+
+       return nbo ? &nbo->mesa : NULL;
 }
 
 static void
@@ -53,11 +483,13 @@ nouveauDeleteBuffer(GLcontext *ctx, struct gl_buffer_object *obj)
 {
        nouveau_buffer_object *nbo = (nouveau_buffer_object *)obj;
 
-       DEBUG("obj=%p\n", obj);
+       if (nbo->gpu_dirty.nr_dirty)
+               FREE(nbo->gpu_dirty.dirty);
+       if (nbo->cpu_dirty.nr_dirty)
+               FREE(nbo->cpu_dirty.dirty);
+       if (nbo->cpu_mem) nouveau_mem_free(ctx, nbo->cpu_mem);
+       if (nbo->gpu_mem) nouveau_mem_free(ctx, nbo->gpu_mem);
 
-       if (nbo->gpu_mem) {
-               nouveau_mem_free(ctx, nbo->gpu_mem);
-       }
        _mesa_delete_buffer_object(ctx, obj);
 }
 
@@ -66,193 +498,105 @@ nouveauBufferData(GLcontext *ctx, GLenum target, GLsizeiptrARB size,
                  const GLvoid *data, GLenum usage,
                  struct gl_buffer_object *obj)
 {
-       nouveau_buffer_object *nbo = (nouveau_buffer_object *)obj;
+       GLuint gpu_flags;
 
-       DEBUG("obj=%p, target=%s, usage=%s, size=%d, data=%p\n",
-                       obj,
+       DEBUG("target=%s, size=%d, data=%p, usage=%s, obj=%p\n",
                        _mesa_lookup_enum_by_nr(target),
+                       (GLuint)size, data,
                        _mesa_lookup_enum_by_nr(usage),
-                       (unsigned int)size,
-                       data);
-
-       if (nbo->gpu_mem && nbo->gpu_mem->size != size)
-               nouveau_mem_free(ctx, nbo->gpu_mem);
-
-       /* Always have the GPU access the data from VRAM if possible.  For
-        * some "usage" values it may be better from AGP be default?
-        *
-        * TODO: At some point we should drop the NOUVEAU_MEM_MAPPED flag.
-        * TODO: Use the NOUVEAU_MEM_AGP_ACCEPTABLE flag.
-        * TODO: What about PCI-E and shared system memory?
-        */
-       if (!nbo->gpu_mem)
-               nbo->gpu_mem = nouveau_mem_alloc(ctx,
-                                                NOUVEAU_MEM_FB |
-                                                NOUVEAU_MEM_MAPPED,
-                                                size,
-                                                0);
-
-       if (!nbo->gpu_mem) {
-               MESSAGE("AIII bufferobj malloc failed\n");
-               return;
+                       obj);
+
+       switch (target) {
+       case GL_ELEMENT_ARRAY_BUFFER_ARB:
+               gpu_flags = 0;
+               break;
+       default:
+               gpu_flags = NOUVEAU_BO_VRAM_OK | NOUVEAU_BO_AGP_OK;
+               break;
        }
-
-       obj->Usage = usage;
-       obj->Size  = size;
-       if (!data)
-               return;
-
-       ctx->Driver.MapBuffer(ctx, target, GL_WRITE_ONLY_ARB, obj);
-       _mesa_memcpy(nbo->cpu_mem->map, data, size);
-       ctx->Driver.UnmapBuffer(ctx, target, obj);
+       nouveau_bo_init_storage(ctx, gpu_flags, size, data, usage, obj);
 }
 
-/*TODO: we don't need to DMA the entire buffer like MapBuffer does.. */
 static void
 nouveauBufferSubData(GLcontext *ctx, GLenum target, GLintptrARB offset,
                     GLsizeiptrARB size, const GLvoid *data,
                     struct gl_buffer_object *obj)
 {
-       DEBUG("obj=%p, target=%s, offset=0x%x, size=%d, data=%p\n",
-                       obj,
+       GLvoid *out;
+
+       DEBUG("target=%s, offset=0x%x, size=%d, data=%p, obj=%p\n",
                        _mesa_lookup_enum_by_nr(target),
-                       (unsigned int)offset,
-                       (unsigned int)size,
-                       data);
+                       (GLuint)offset, (GLuint)size, data, obj);
 
-       ctx->Driver.MapBuffer(ctx, target, GL_WRITE_ONLY_ARB, obj);
-       _mesa_memcpy((GLubyte *)obj->Pointer + offset, data, size);
-       ctx->Driver.UnmapBuffer(ctx, target, obj);
+       out = nouveau_bo_map(ctx, GL_WRITE_ONLY_ARB, obj);
+       _mesa_memcpy(out + offset, data, size);
+       nouveau_bo_dirty_linear(ctx, GL_FALSE, offset, size, obj);
+       nouveau_bo_unmap(ctx, obj);
 }
 
-/*TODO: we don't need to DMA the entire buffer like MapBuffer does.. */
 static void
 nouveauGetBufferSubData(GLcontext *ctx, GLenum target, GLintptrARB offset,
                     GLsizeiptrARB size, GLvoid *data,
                     struct gl_buffer_object *obj)
 {
-       DEBUG("obj=%p, target=%s, offset=0x%x, size=%d, data=%p\n",
-                       obj,
+       const GLvoid *in;
+
+       DEBUG("target=%s, offset=0x%x, size=%d, data=%p, obj=%p\n",
                        _mesa_lookup_enum_by_nr(target),
-                       (unsigned int)offset,
-                       (unsigned int)size,
-                       data);
+                       (GLuint)offset, (GLuint)size, data, obj);
 
-       ctx->Driver.MapBuffer(ctx, target, GL_READ_ONLY_ARB, obj);
-       _mesa_memcpy(data, (GLubyte *)obj->Pointer + offset, size);
-       ctx->Driver.UnmapBuffer(ctx, target, obj);
+       in = nouveau_bo_map(ctx, GL_READ_ONLY_ARB, obj);
+       _mesa_memcpy(data, in + offset, size);
+       nouveau_bo_unmap(ctx, obj);
 }
 
 static void *
 nouveauMapBuffer(GLcontext *ctx, GLenum target, GLenum access,
                 struct gl_buffer_object *obj)
 {
-       nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx);
-       nouveau_buffer_object *nbo = (nouveau_buffer_object *)obj;
-
-       DEBUG("obj=%p, target=%s, access=%s\n",
-                       obj,
+       DEBUG("target=%s, access=%s, obj=%p\n",
                        _mesa_lookup_enum_by_nr(target),
-                       _mesa_lookup_enum_by_nr(access));
+                       _mesa_lookup_enum_by_nr(access),
+                       obj
+                       );
 
-       if (obj->Pointer) {
-               DEBUG("already mapped, return NULL\n");
+       /* Already mapped.. */
+       if (obj->Pointer)
                return NULL;
-       }
 
-#ifdef ALLOW_MULTI_SUBCHANNEL
-       /* If GPU is accessing the data from VRAM, copy to faster AGP memory
-        * before CPU access to the buffer.
+       /* Have to pass READ_WRITE here, nouveau_bo_map will only ensure that
+        * the cpu_mem buffer is up-to-date if we ask for read access.
+        *
+        * However, even if the client only asks for write access, we're still
+        * forced to reupload the entire buffer.  So, we need the cpu_mem buffer
+        * to have the correct data all the time.
         */
-       if (nbo->gpu_mem->type & NOUVEAU_MEM_FB) {
-               DEBUG("Data in VRAM, copying to AGP for CPU access\n");
-
-               /* This can happen if BufferData grows the GPU-access buffer */
-               if (nbo->cpu_mem && nbo->cpu_mem->size != nbo->gpu_mem->size) {
-                       nouveau_mem_free(ctx, nbo->cpu_mem);
-                       nbo->cpu_mem = NULL;
-               }
-
-               if (!nbo->cpu_mem) {
-                       nbo->cpu_mem = nouveau_mem_alloc(ctx,
-                                                        NOUVEAU_MEM_AGP |
-                                                        NOUVEAU_MEM_MAPPED,
-                                                        nbo->gpu_mem->size,
-                                                        0);
+       obj->Pointer = nouveau_bo_map(ctx, GL_READ_WRITE_ARB, obj);
 
-                       /* Mark GPU data as modified, so it gets copied to
-                        * the new buffer */
-                       nbo->gpu_dirty = GL_TRUE;
-               }
-
-               if (nbo->cpu_mem && nbo->gpu_dirty) {
-                       nouveau_memformat_flat_emit(ctx, nbo->cpu_mem,
-                                                        nbo->gpu_mem,
-                                                        0, 0,
-                                                        nbo->gpu_mem->size);
-
-                       nouveau_notifier_wait_nop(ctx,
-                                                 nmesa->syncNotifier,
-                                                 NvSubMemFormat);
-                       nbo->gpu_dirty = GL_FALSE;
-               }
-
-               /* buffer isn't guaranteed to be up-to-date on the card now */
-               nbo->cpu_dirty = GL_TRUE;
-       }
-#endif
-
-       /* If the copy to AGP failed for some reason, just return a pointer
-        * directly to vram..
+       /* The GL spec says that a client attempting to write to a bufferobj
+        * mapped READ_ONLY object may have unpredictable results, possibly
+        * even program termination.
+        *
+        * We're going to use this, and only mark the buffer as dirtied if
+        * the client asks for write access.
         */
-       if (!nbo->cpu_mem) {
-               DEBUG("Returning direct pointer to VRAM\n");
-               nbo->cpu_mem   = nbo->gpu_mem;
-               nbo->cpu_dirty = GL_FALSE;
+       if (target != GL_READ_ONLY_ARB) {
+               /* We have no way of knowing what was modified by the client,
+                * so the entire buffer gets dirtied. */
+               nouveau_bo_dirty_all(ctx, GL_FALSE, obj);
        }
 
-       obj->Pointer = nbo->cpu_mem->map;
        return obj->Pointer;
 }
 
 static GLboolean
 nouveauUnmapBuffer(GLcontext *ctx, GLenum target, struct gl_buffer_object *obj)
 {
-       nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx);
-       nouveau_buffer_object *nbo = (nouveau_buffer_object *)obj;
-
-       DEBUG("obj=%p, target=%s\n", obj, _mesa_lookup_enum_by_nr(target));
+       DEBUG("target=%s, obj=%p\n", _mesa_lookup_enum_by_nr(target), obj);
 
-#ifdef ALLOW_MULTI_SUBCHANNEL
-       if (nbo->cpu_dirty && nbo->cpu_mem != nbo->gpu_mem) {
-               DEBUG("Copying potentially modified data back to GPU\n");
-
-               /* blit from GPU buffer -> CPU  buffer */
-               nouveau_memformat_flat_emit(ctx, nbo->gpu_mem, nbo->cpu_mem,
-                                           0, 0, nbo->cpu_mem->size);
-
-               /* buffer is now up-to-date on the hardware (or rather, will
-                * be by the time any other commands in this channel reference
-                * the data.)
-                */
-               nbo->cpu_dirty = GL_FALSE;
-
-               /* we can avoid this wait in some cases.. */
-               nouveau_notifier_wait_nop(ctx,
-                                         nmesa->syncNotifier,
-                                         NvSubMemFormat);
-
-               /* If it's likely CPU access to the buffer will occur often,
-                * keep the cpu_mem around to avoid repeated allocs.
-                */
-               if (obj->Usage != GL_DYNAMIC_DRAW_ARB) {
-
-                       nouveau_mem_free(ctx, nbo->cpu_mem);
-                       nbo->cpu_mem = NULL;
-               }
-       }
-#endif
+       assert(obj->Pointer);
 
+       nouveau_bo_unmap(ctx, obj);
        obj->Pointer = NULL;
        return GL_TRUE;
 }
index fccc349b836615a161fcb1eecd89ee2fe2c06369..932450fd8777a375a87658b68c9f01382518cd2f 100644 (file)
@@ -4,24 +4,74 @@
 #include "mtypes.h"
 #include "nouveau_buffers.h"
 
+#define NOUVEAU_BO_VRAM_OK (NOUVEAU_MEM_FB | NOUVEAU_MEM_FB_ACCEPTABLE)
+#define NOUVEAU_BO_AGP_OK  (NOUVEAU_MEM_AGP | NOUVEAU_MEM_AGP_ACCEPTABLE)
+
+typedef struct nouveau_bufferobj_region_t {
+       uint32_t start;
+       uint32_t end;
+} nouveau_bufferobj_region;
+
+typedef struct nouveau_bufferobj_dirty_t {
+       nouveau_bufferobj_region *dirty;
+       int nr_dirty;
+} nouveau_bufferobj_dirty;
+
 typedef struct nouveau_buffer_object_t {
        /* Base class, must be first */
        struct gl_buffer_object mesa;
 
+       GLboolean               mapped;
+       GLenum                  usage;
+
        /* Memory used for GPU access to the buffer*/
+       GLuint                  gpu_mem_flags;
        nouveau_mem *           gpu_mem;
-       /* Buffer has been dirtied by the GPU */
-       GLboolean               gpu_dirty;
+       nouveau_bufferobj_dirty gpu_dirty;
 
        /* Memory used for CPU access to the buffer */
+       GLuint                  cpu_mem_flags;
        nouveau_mem *           cpu_mem;
-       /* Buffer has possibly been dirtied by the CPU */
-       GLboolean               cpu_dirty;
+       GLvoid *                cpu_mem_sys;
+       nouveau_bufferobj_dirty cpu_dirty;
 } nouveau_buffer_object;
 
-extern uint32_t nouveau_bufferobj_gpu_ref(GLcontext *ctx, GLenum access,
-                                         struct gl_buffer_object *obj);
+extern void
+nouveau_bo_init_storage(GLcontext *ctx, GLuint valid_gpu_access,
+                       GLsizeiptrARB size, const GLvoid *data, GLenum usage,
+                       struct gl_buffer_object *bo);
+
+extern GLboolean
+nouveau_bo_move_in(GLcontext *ctx, struct gl_buffer_object *bo);
+
+extern GLboolean
+nouveau_bo_move_out(GLcontext *ctx, struct gl_buffer_object *bo);
+
+extern void *
+nouveau_bo_map(GLcontext *ctx, GLenum usage, struct gl_buffer_object *bo);
+
+extern void
+nouveau_bo_unmap(GLcontext *ctx, struct gl_buffer_object *bo);
+
+extern uint32_t
+nouveau_bo_gpu_ref(GLcontext *ctx, struct gl_buffer_object *bo);
+
+extern void
+nouveau_bo_dirty_linear(GLcontext *ctx, GLboolean on_card,
+                       uint32_t offset, uint32_t size,
+                       struct gl_buffer_object *bo);
+
+extern void
+nouveau_bo_dirty_all(GLcontext *ctx, GLboolean on_card,
+                    struct gl_buffer_object *bo);
+
+extern GLuint
+nouveau_bo_upload_dirty(GLcontext *ctx, struct gl_buffer_object *bo);
+
+extern GLuint
+nouveau_bo_download_dirty(GLcontext *ctx, struct gl_buffer_object *bo);
 
-extern void nouveauInitBufferObjects(GLcontext *ctx);
+extern void
+nouveauInitBufferObjects(GLcontext *ctx);
 
 #endif
index 02bd8014cc10bd191c60816fe7dbb39e61e8effc..f868ec9293184294cf542c118c5a4452575a38c4 100644 (file)
@@ -32,14 +32,13 @@ NV30FPUploadToHW(GLcontext *ctx, nouveauShader *nvs)
                                                        GL_ARRAY_BUFFER_ARB);
 
    /* Should use STATIC_DRAW_ARB if shader doesn't use changable params */
-   ctx->Driver.BufferData(ctx, GL_ARRAY_BUFFER_ARB,
+   nouveau_bo_init_storage(ctx, NOUVEAU_BO_VRAM_OK,
                          nvs->program_size * sizeof(uint32_t),
                          (const GLvoid *)nvs->program,
                          GL_DYNAMIC_DRAW_ARB,
                          nvs->program_buffer);
 
-   offset = nouveau_bufferobj_gpu_ref(ctx, GL_READ_ONLY_ARB,
-                                     nvs->program_buffer);
+   offset = nouveau_bo_gpu_ref(ctx, nvs->program_buffer);
 
    /* Not using state cache here, updated programs at the same address don't
     * seem to take effect unless the ACTIVE_PROGRAM method is called again.