From e7654b22aa02636d17a88a9a5ee1eeb213d81f30 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 6 Feb 2007 00:39:50 +1100 Subject: [PATCH] nouveau: new bufferobj code. The old code suffered from a number of issues, the most severe being that with the Mesa VBO merge even swtcl used the driver's bufferobj interface. On most VBO types (or non-AGP cards) the buffer ended up in vram, and killed swtcl performance greatly. All bufferobj's start in system memory now, until they get referenced as a "real" VBO. The other big change is that only potentially "damaged" areas are uploaded/downloaded to/from the hardware. --- .../drivers/dri/nouveau/nouveau_bufferobj.c | 662 +++++++++++++----- .../drivers/dri/nouveau/nouveau_bufferobj.h | 64 +- src/mesa/drivers/dri/nouveau/nv30_fragprog.c | 5 +- 3 files changed, 562 insertions(+), 169 deletions(-) diff --git a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c index d36196aeef2..684ed7b017d 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c @@ -8,29 +8,458 @@ #include "nouveau_object.h" #include "nouveau_msg.h" +#define NOUVEAU_MEM_FREE(mem) do { \ + nouveau_mem_free(ctx, (mem)); \ + (mem) = NULL; \ +} while(0) + #define DEBUG(fmt,args...) do { \ if (NOUVEAU_DEBUG & DEBUG_BUFFEROBJ) { \ fprintf(stderr, "%s: "fmt, __func__, ##args); \ } \ } while(0) -/* Wrapper for nouveau_mem_gpu_offset_get() that marks the bufferobj dirty - * if the GPU modifies the data. - */ +static GLboolean +nouveau_bo_download_from_screen(GLcontext *ctx, GLuint offset, GLuint size, + struct gl_buffer_object *bo) +{ + nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); + nouveau_buffer_object *nbo = (nouveau_buffer_object *)bo; + nouveau_mem *in_mem; + + DEBUG("bo=%p, offset=%d, size=%d\n", bo, offset, size); + + /* If there's a permanent backing store, blit directly into it */ + if (nbo->cpu_mem) { + if (nbo->cpu_mem != nbo->gpu_mem) { + DEBUG("..cpu_mem\n"); + nouveau_memformat_flat_emit(ctx, nbo->cpu_mem, + nbo->gpu_mem, + offset, offset, size); + } + } else { + DEBUG("..sys_mem\n"); + in_mem = nouveau_mem_alloc(ctx, NOUVEAU_MEM_AGP, size, 0); + if (in_mem) { + DEBUG("....via AGP\n"); + /* otherwise, try blitting to faster memory and + * copying from there + */ + nouveau_memformat_flat_emit(ctx, in_mem, nbo->gpu_mem, + 0, offset, size); + nouveau_notifier_wait_nop(ctx, nmesa->syncNotifier, + NvSubMemFormat); + _mesa_memcpy(nbo->cpu_mem_sys + offset, + in_mem->map, size); + NOUVEAU_MEM_FREE(in_mem); + } else { + DEBUG("....direct VRAM copy\n"); + /* worst case, copy directly from vram */ + _mesa_memcpy(nbo->cpu_mem_sys + offset, + nbo->gpu_mem + offset, + size); + } + } + + return GL_TRUE; +} + +static GLboolean +nouveau_bo_upload_to_screen(GLcontext *ctx, GLuint offset, GLuint size, + struct gl_buffer_object *bo) +{ + nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); + nouveau_buffer_object *nbo = (nouveau_buffer_object *)bo; + nouveau_mem *out_mem; + + DEBUG("bo=%p, offset=%d, size=%d\n", bo, offset, size); + + if (nbo->cpu_mem) { + if (nbo->cpu_mem != nbo->gpu_mem) { + DEBUG("..cpu_mem\n"); + nouveau_memformat_flat_emit(ctx, nbo->gpu_mem, + nbo->cpu_mem, + offset, offset, size); + } + } else { + out_mem = nouveau_mem_alloc(ctx, NOUVEAU_MEM_AGP | + NOUVEAU_MEM_MAPPED, + size, 0); + if (out_mem) { + DEBUG("....via AGP\n"); + _mesa_memcpy(out_mem->map, + nbo->cpu_mem_sys + offset, size); + nouveau_memformat_flat_emit(ctx, nbo->gpu_mem, out_mem, + offset, 0, size); + nouveau_notifier_wait_nop(ctx, nmesa->syncNotifier, + NvSubMemFormat); + NOUVEAU_MEM_FREE(out_mem); + } else { + DEBUG("....direct VRAM copy\n"); + _mesa_memcpy(nbo->gpu_mem->map + offset, + nbo->cpu_mem_sys + offset, + size); + } + } + + return GL_TRUE; +} + +GLboolean +nouveau_bo_move_in(GLcontext *ctx, struct gl_buffer_object *bo) +{ + nouveau_buffer_object *nbo = (nouveau_buffer_object *)bo; + + DEBUG("bo=%p\n", bo); + + if (bo->OnCard) + return GL_TRUE; + assert(nbo->gpu_mem_flags); + + nbo->gpu_mem = nouveau_mem_alloc(ctx, nbo->gpu_mem_flags | + NOUVEAU_MEM_MAPPED, + bo->Size, 0); + assert(nbo->gpu_mem); + + if (nbo->cpu_mem_flags) { + if ((nbo->cpu_mem_flags|NOUVEAU_MEM_MAPPED) != nbo->gpu_mem->type) { + DEBUG("..need cpu_mem buffer\n"); + + nbo->cpu_mem = nouveau_mem_alloc(ctx, + nbo->cpu_mem_flags | + NOUVEAU_MEM_MAPPED, + bo->Size, 0); + + if (nbo->cpu_mem) { + DEBUG("....alloc ok, kill sys_mem buffer\n"); + _mesa_memcpy(nbo->cpu_mem->map, + nbo->cpu_mem_sys, bo->Size); + FREE(nbo->cpu_mem_sys); + } + } else { + DEBUG("..cpu direct access to GPU buffer\n"); + nbo->cpu_mem = nbo->gpu_mem; + } + } + nouveau_bo_upload_to_screen(ctx, 0, bo->Size, bo); + + bo->OnCard = GL_TRUE; + return GL_TRUE; +} + +GLboolean +nouveau_bo_move_out(GLcontext *ctx, struct gl_buffer_object *bo) +{ + nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); + nouveau_buffer_object *nbo = (nouveau_buffer_object *)bo; + GLuint nr_dirty; + + DEBUG("bo=%p\n", bo); + if (!bo->OnCard) + return GL_TRUE; + + nr_dirty = nouveau_bo_download_dirty(ctx, bo); + if (nbo->cpu_mem) { + if (nr_dirty && nbo->cpu_mem != nbo->gpu_mem) + nouveau_notifier_wait_nop(ctx, nmesa->syncNotifier, + NvSubMemFormat); + DEBUG("..destroy cpu_mem buffer\n"); + nbo->cpu_mem_sys = malloc(bo->Size); + assert(nbo->cpu_mem_sys); + _mesa_memcpy(nbo->cpu_mem_sys, nbo->cpu_mem->map, bo->Size); + if (nbo->cpu_mem == nbo->gpu_mem) + nbo->cpu_mem = NULL; + else + NOUVEAU_MEM_FREE(nbo->cpu_mem); + } + NOUVEAU_MEM_FREE(nbo->gpu_mem); + + bo->OnCard = GL_FALSE; + return GL_TRUE; +} + +static void +nouveau_bo_choose_storage_method(GLcontext *ctx, GLenum usage, + struct gl_buffer_object *bo) +{ + nouveau_buffer_object *nbo = (nouveau_buffer_object *)bo; + GLuint gpu_type = 0; + GLuint cpu_type = 0; + + switch (usage) { + /* Client source, changes often, used by GL many times */ + case GL_DYNAMIC_DRAW_ARB: + gpu_type = NOUVEAU_MEM_AGP | NOUVEAU_MEM_FB_ACCEPTABLE; + cpu_type = NOUVEAU_MEM_AGP; + break; + /* GL source, changes often, client reads many times */ + case GL_DYNAMIC_READ_ARB: + /* Client source, specified once, used by GL many times */ + case GL_STATIC_DRAW_ARB: + /* GL source, specified once, client reads many times */ + case GL_STATIC_READ_ARB: + /* Client source, specified once, used by GL a few times */ + case GL_STREAM_DRAW_ARB: + /* GL source, specified once, client reads a few times */ + case GL_STREAM_READ_ARB: + /* GL source, changes often, used by GL many times*/ + case GL_DYNAMIC_COPY_ARB: + /* GL source, specified once, used by GL many times */ + case GL_STATIC_COPY_ARB: + /* GL source, specified once, used by GL a few times */ + case GL_STREAM_COPY_ARB: + gpu_type = NOUVEAU_MEM_FB; + break; + default: + assert(0); + } + + nbo->gpu_mem_flags = gpu_type; + nbo->cpu_mem_flags = cpu_type; + nbo->usage = usage; +} + +void +nouveau_bo_init_storage(GLcontext *ctx, GLuint valid_gpu_access, + GLsizeiptrARB size, + const GLvoid *data, + GLenum usage, + struct gl_buffer_object *bo) +{ + nouveau_buffer_object *nbo = (nouveau_buffer_object *)bo; + + DEBUG("bo=%p\n", bo); + + /* Free up previous buffers if we can't reuse them */ + if (nbo->usage != usage || + (nbo->gpu_mem && (nbo->gpu_mem->size != size))) { + if (nbo->cpu_mem_sys) + FREE(nbo->cpu_mem_sys); + if (nbo->cpu_mem) { + if (nbo->cpu_mem != nbo->gpu_mem) + NOUVEAU_MEM_FREE(nbo->cpu_mem); + else + nbo->cpu_mem = NULL; + } + if (nbo->gpu_mem) + NOUVEAU_MEM_FREE(nbo->gpu_mem); + + bo->OnCard = GL_FALSE; + nbo->cpu_mem_sys = calloc(1, size); + } + + nouveau_bo_choose_storage_method(ctx, usage, bo); + /* Force off flags that may not be ok for a given buffer */ + nbo->gpu_mem_flags &= valid_gpu_access; + + bo->Usage = usage; + bo->Size = size; + + if (data) { + GLvoid *map = nouveau_bo_map(ctx, GL_WRITE_ONLY_ARB, bo); + _mesa_memcpy(map, data, size); + nouveau_bo_dirty_all(ctx, GL_FALSE, bo); + nouveau_bo_unmap(ctx, bo); + } +} + +void * +nouveau_bo_map(GLcontext *ctx, GLenum access, struct gl_buffer_object *bo) +{ + nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); + nouveau_buffer_object *nbo = (nouveau_buffer_object *)bo; + + DEBUG("bo=%p, access=%s\n", bo, _mesa_lookup_enum_by_nr(access)); + + if (bo->OnCard && + (access == GL_READ_ONLY_ARB || access == GL_READ_WRITE_ARB)) { + GLuint nr_dirty; + + DEBUG("..on card\n"); + nr_dirty = nouveau_bo_download_dirty(ctx, bo); + + /* nouveau_bo_download_dirty won't wait unless it needs to + * free a temp buffer, which isn't the case if cpu_mem is + * present. + */ + if (nr_dirty && nbo->cpu_mem && nbo->cpu_mem != nbo->gpu_mem) + nouveau_notifier_wait_nop(ctx, nmesa->syncNotifier, + NvSubMemFormat); + } + + if (nbo->cpu_mem) { + DEBUG("..access via cpu_mem\n"); + return nbo->cpu_mem->map; + } else { + DEBUG("..access via cpu_mem_sys\n"); + return nbo->cpu_mem_sys; + } +} + +void +nouveau_bo_unmap(GLcontext *ctx, struct gl_buffer_object *bo) +{ + DEBUG("unmap bo=%p\n", bo); +} + uint32_t -nouveau_bufferobj_gpu_ref(GLcontext *ctx, GLenum access, - struct gl_buffer_object *obj) +nouveau_bo_gpu_ref(GLcontext *ctx, struct gl_buffer_object *bo) { - nouveau_buffer_object *nbo = (nouveau_buffer_object *)obj; + nouveau_buffer_object *nbo = (nouveau_buffer_object *)bo; - DEBUG("obj=%p, access=%s\n", obj, _mesa_lookup_enum_by_nr(access)); + assert(nbo->mapped == GL_FALSE); - if (access == GL_WRITE_ONLY_ARB || access == GL_READ_WRITE_ARB) - nbo->gpu_dirty = GL_TRUE; + DEBUG("gpu_ref\n"); + + if (!bo->OnCard) { + nouveau_bo_move_in(ctx, bo); + bo->OnCard = GL_TRUE; + } + nouveau_bo_upload_dirty(ctx, bo); return nouveau_mem_gpu_offset_get(ctx, nbo->gpu_mem); } +void +nouveau_bo_dirty_linear(GLcontext *ctx, GLboolean on_card, + uint32_t offset, uint32_t size, + struct gl_buffer_object *bo) +{ + nouveau_buffer_object *nbo = (nouveau_buffer_object *)bo; + nouveau_bufferobj_dirty *dirty; + uint32_t start = offset; + uint32_t end = offset + size; + int i; + + if (nbo->cpu_mem == nbo->gpu_mem) + return; + + dirty = on_card ? &nbo->gpu_dirty : &nbo->cpu_dirty; + + DEBUG("on_card=%d, offset=%d, size=%d, bo=%p\n", + on_card, offset, size, bo); + + for (i=0; inr_dirty; i++) { + nouveau_bufferobj_region *r = &dirty->dirty[i]; + + /* already dirty */ + if (start >= r->start && end <= r->end) { + DEBUG("..already dirty\n"); + return; + } + + /* add to the end of a region */ + if (start >= r->start && start <= r->end) { + if (end > r->end) { + DEBUG("..extend end of region\n"); + r->end = end; + return; + } + } + + /* add to the start of a region */ + if (start < r->start && end >= r->end) { + DEBUG("..extend start of region\n"); + r->start = start; + /* .. and to the end */ + if (end > r->end) { + DEBUG("....and end\n"); + r->end = end; + } + return; + } + } + + /* new region */ + DEBUG("..new dirty\n"); + dirty->nr_dirty++; + dirty->dirty = realloc(dirty->dirty, + sizeof(nouveau_bufferobj_region) * + dirty->nr_dirty); + dirty->dirty[dirty->nr_dirty - 1].start = start; + dirty->dirty[dirty->nr_dirty - 1].end = end; +} + +void +nouveau_bo_dirty_all(GLcontext *ctx, GLboolean on_card, + struct gl_buffer_object *bo) +{ + nouveau_buffer_object *nbo = (nouveau_buffer_object *)bo; + nouveau_bufferobj_dirty *dirty; + + dirty = on_card ? &nbo->gpu_dirty : &nbo->cpu_dirty; + + DEBUG("dirty all\n"); + if (dirty->nr_dirty) { + FREE(dirty->dirty); + dirty->dirty = NULL; + dirty->nr_dirty = 0; + } + + nouveau_bo_dirty_linear(ctx, on_card, 0, bo->Size, bo); +} + +GLuint +nouveau_bo_upload_dirty(GLcontext *ctx, struct gl_buffer_object *bo) +{ + nouveau_buffer_object *nbo = (nouveau_buffer_object *)bo; + nouveau_bufferobj_dirty *dirty = &nbo->cpu_dirty; + GLuint nr_dirty; + int i; + + nr_dirty = dirty->nr_dirty; + if (!nr_dirty) { + DEBUG("clean\n"); + return nr_dirty; + } + + for (i=0; idirty[i]; + + DEBUG("dirty %d: o=0x%08x, s=0x%08x\n", + i, r->start, r->end - r->start); + nouveau_bo_upload_to_screen(ctx, + r->start, r->end - r->start, bo); + } + + FREE(dirty->dirty); + dirty->dirty = NULL; + dirty->nr_dirty = 0; + + return nr_dirty; +} + +GLuint +nouveau_bo_download_dirty(GLcontext *ctx, struct gl_buffer_object *bo) +{ + nouveau_buffer_object *nbo = (nouveau_buffer_object *)bo; + nouveau_bufferobj_dirty *dirty = &nbo->gpu_dirty; + GLuint nr_dirty; + int i; + + nr_dirty = dirty->nr_dirty; + if (nr_dirty) { + DEBUG("clean\n"); + return nr_dirty; + } + + for (i=0; idirty[i]; + + DEBUG("dirty %d: o=0x%08x, s=0x%08x\n", + i, r->start, r->end - r->start); + nouveau_bo_download_from_screen(ctx, + r->start, + r->end - r->start, bo); + } + + FREE(dirty->dirty); + dirty->dirty = NULL; + dirty->nr_dirty = 0; + + return nr_dirty; +} + static void nouveauBindBuffer(GLcontext *ctx, GLenum target, struct gl_buffer_object *obj) { @@ -42,10 +471,11 @@ nouveauNewBufferObject(GLcontext *ctx, GLuint buffer, GLenum target) nouveau_buffer_object *nbo; nbo = CALLOC_STRUCT(nouveau_buffer_object_t); - DEBUG("name=0x%08x, target=%s, obj=%p\n", - buffer, _mesa_lookup_enum_by_nr(target), nbo); - _mesa_initialize_buffer_object(&nbo->mesa, buffer, target); - return &nbo->mesa; + if (nbo) + _mesa_initialize_buffer_object(&nbo->mesa, buffer, target); + DEBUG("bo=%p\n", nbo); + + return nbo ? &nbo->mesa : NULL; } static void @@ -53,11 +483,13 @@ nouveauDeleteBuffer(GLcontext *ctx, struct gl_buffer_object *obj) { nouveau_buffer_object *nbo = (nouveau_buffer_object *)obj; - DEBUG("obj=%p\n", obj); + if (nbo->gpu_dirty.nr_dirty) + FREE(nbo->gpu_dirty.dirty); + if (nbo->cpu_dirty.nr_dirty) + FREE(nbo->cpu_dirty.dirty); + if (nbo->cpu_mem) nouveau_mem_free(ctx, nbo->cpu_mem); + if (nbo->gpu_mem) nouveau_mem_free(ctx, nbo->gpu_mem); - if (nbo->gpu_mem) { - nouveau_mem_free(ctx, nbo->gpu_mem); - } _mesa_delete_buffer_object(ctx, obj); } @@ -66,193 +498,105 @@ nouveauBufferData(GLcontext *ctx, GLenum target, GLsizeiptrARB size, const GLvoid *data, GLenum usage, struct gl_buffer_object *obj) { - nouveau_buffer_object *nbo = (nouveau_buffer_object *)obj; + GLuint gpu_flags; - DEBUG("obj=%p, target=%s, usage=%s, size=%d, data=%p\n", - obj, + DEBUG("target=%s, size=%d, data=%p, usage=%s, obj=%p\n", _mesa_lookup_enum_by_nr(target), + (GLuint)size, data, _mesa_lookup_enum_by_nr(usage), - (unsigned int)size, - data); - - if (nbo->gpu_mem && nbo->gpu_mem->size != size) - nouveau_mem_free(ctx, nbo->gpu_mem); - - /* Always have the GPU access the data from VRAM if possible. For - * some "usage" values it may be better from AGP be default? - * - * TODO: At some point we should drop the NOUVEAU_MEM_MAPPED flag. - * TODO: Use the NOUVEAU_MEM_AGP_ACCEPTABLE flag. - * TODO: What about PCI-E and shared system memory? - */ - if (!nbo->gpu_mem) - nbo->gpu_mem = nouveau_mem_alloc(ctx, - NOUVEAU_MEM_FB | - NOUVEAU_MEM_MAPPED, - size, - 0); - - if (!nbo->gpu_mem) { - MESSAGE("AIII bufferobj malloc failed\n"); - return; + obj); + + switch (target) { + case GL_ELEMENT_ARRAY_BUFFER_ARB: + gpu_flags = 0; + break; + default: + gpu_flags = NOUVEAU_BO_VRAM_OK | NOUVEAU_BO_AGP_OK; + break; } - - obj->Usage = usage; - obj->Size = size; - if (!data) - return; - - ctx->Driver.MapBuffer(ctx, target, GL_WRITE_ONLY_ARB, obj); - _mesa_memcpy(nbo->cpu_mem->map, data, size); - ctx->Driver.UnmapBuffer(ctx, target, obj); + nouveau_bo_init_storage(ctx, gpu_flags, size, data, usage, obj); } -/*TODO: we don't need to DMA the entire buffer like MapBuffer does.. */ static void nouveauBufferSubData(GLcontext *ctx, GLenum target, GLintptrARB offset, GLsizeiptrARB size, const GLvoid *data, struct gl_buffer_object *obj) { - DEBUG("obj=%p, target=%s, offset=0x%x, size=%d, data=%p\n", - obj, + GLvoid *out; + + DEBUG("target=%s, offset=0x%x, size=%d, data=%p, obj=%p\n", _mesa_lookup_enum_by_nr(target), - (unsigned int)offset, - (unsigned int)size, - data); + (GLuint)offset, (GLuint)size, data, obj); - ctx->Driver.MapBuffer(ctx, target, GL_WRITE_ONLY_ARB, obj); - _mesa_memcpy((GLubyte *)obj->Pointer + offset, data, size); - ctx->Driver.UnmapBuffer(ctx, target, obj); + out = nouveau_bo_map(ctx, GL_WRITE_ONLY_ARB, obj); + _mesa_memcpy(out + offset, data, size); + nouveau_bo_dirty_linear(ctx, GL_FALSE, offset, size, obj); + nouveau_bo_unmap(ctx, obj); } -/*TODO: we don't need to DMA the entire buffer like MapBuffer does.. */ static void nouveauGetBufferSubData(GLcontext *ctx, GLenum target, GLintptrARB offset, GLsizeiptrARB size, GLvoid *data, struct gl_buffer_object *obj) { - DEBUG("obj=%p, target=%s, offset=0x%x, size=%d, data=%p\n", - obj, + const GLvoid *in; + + DEBUG("target=%s, offset=0x%x, size=%d, data=%p, obj=%p\n", _mesa_lookup_enum_by_nr(target), - (unsigned int)offset, - (unsigned int)size, - data); + (GLuint)offset, (GLuint)size, data, obj); - ctx->Driver.MapBuffer(ctx, target, GL_READ_ONLY_ARB, obj); - _mesa_memcpy(data, (GLubyte *)obj->Pointer + offset, size); - ctx->Driver.UnmapBuffer(ctx, target, obj); + in = nouveau_bo_map(ctx, GL_READ_ONLY_ARB, obj); + _mesa_memcpy(data, in + offset, size); + nouveau_bo_unmap(ctx, obj); } static void * nouveauMapBuffer(GLcontext *ctx, GLenum target, GLenum access, struct gl_buffer_object *obj) { - nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); - nouveau_buffer_object *nbo = (nouveau_buffer_object *)obj; - - DEBUG("obj=%p, target=%s, access=%s\n", - obj, + DEBUG("target=%s, access=%s, obj=%p\n", _mesa_lookup_enum_by_nr(target), - _mesa_lookup_enum_by_nr(access)); + _mesa_lookup_enum_by_nr(access), + obj + ); - if (obj->Pointer) { - DEBUG("already mapped, return NULL\n"); + /* Already mapped.. */ + if (obj->Pointer) return NULL; - } -#ifdef ALLOW_MULTI_SUBCHANNEL - /* If GPU is accessing the data from VRAM, copy to faster AGP memory - * before CPU access to the buffer. + /* Have to pass READ_WRITE here, nouveau_bo_map will only ensure that + * the cpu_mem buffer is up-to-date if we ask for read access. + * + * However, even if the client only asks for write access, we're still + * forced to reupload the entire buffer. So, we need the cpu_mem buffer + * to have the correct data all the time. */ - if (nbo->gpu_mem->type & NOUVEAU_MEM_FB) { - DEBUG("Data in VRAM, copying to AGP for CPU access\n"); - - /* This can happen if BufferData grows the GPU-access buffer */ - if (nbo->cpu_mem && nbo->cpu_mem->size != nbo->gpu_mem->size) { - nouveau_mem_free(ctx, nbo->cpu_mem); - nbo->cpu_mem = NULL; - } - - if (!nbo->cpu_mem) { - nbo->cpu_mem = nouveau_mem_alloc(ctx, - NOUVEAU_MEM_AGP | - NOUVEAU_MEM_MAPPED, - nbo->gpu_mem->size, - 0); + obj->Pointer = nouveau_bo_map(ctx, GL_READ_WRITE_ARB, obj); - /* Mark GPU data as modified, so it gets copied to - * the new buffer */ - nbo->gpu_dirty = GL_TRUE; - } - - if (nbo->cpu_mem && nbo->gpu_dirty) { - nouveau_memformat_flat_emit(ctx, nbo->cpu_mem, - nbo->gpu_mem, - 0, 0, - nbo->gpu_mem->size); - - nouveau_notifier_wait_nop(ctx, - nmesa->syncNotifier, - NvSubMemFormat); - nbo->gpu_dirty = GL_FALSE; - } - - /* buffer isn't guaranteed to be up-to-date on the card now */ - nbo->cpu_dirty = GL_TRUE; - } -#endif - - /* If the copy to AGP failed for some reason, just return a pointer - * directly to vram.. + /* The GL spec says that a client attempting to write to a bufferobj + * mapped READ_ONLY object may have unpredictable results, possibly + * even program termination. + * + * We're going to use this, and only mark the buffer as dirtied if + * the client asks for write access. */ - if (!nbo->cpu_mem) { - DEBUG("Returning direct pointer to VRAM\n"); - nbo->cpu_mem = nbo->gpu_mem; - nbo->cpu_dirty = GL_FALSE; + if (target != GL_READ_ONLY_ARB) { + /* We have no way of knowing what was modified by the client, + * so the entire buffer gets dirtied. */ + nouveau_bo_dirty_all(ctx, GL_FALSE, obj); } - obj->Pointer = nbo->cpu_mem->map; return obj->Pointer; } static GLboolean nouveauUnmapBuffer(GLcontext *ctx, GLenum target, struct gl_buffer_object *obj) { - nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); - nouveau_buffer_object *nbo = (nouveau_buffer_object *)obj; - - DEBUG("obj=%p, target=%s\n", obj, _mesa_lookup_enum_by_nr(target)); + DEBUG("target=%s, obj=%p\n", _mesa_lookup_enum_by_nr(target), obj); -#ifdef ALLOW_MULTI_SUBCHANNEL - if (nbo->cpu_dirty && nbo->cpu_mem != nbo->gpu_mem) { - DEBUG("Copying potentially modified data back to GPU\n"); - - /* blit from GPU buffer -> CPU buffer */ - nouveau_memformat_flat_emit(ctx, nbo->gpu_mem, nbo->cpu_mem, - 0, 0, nbo->cpu_mem->size); - - /* buffer is now up-to-date on the hardware (or rather, will - * be by the time any other commands in this channel reference - * the data.) - */ - nbo->cpu_dirty = GL_FALSE; - - /* we can avoid this wait in some cases.. */ - nouveau_notifier_wait_nop(ctx, - nmesa->syncNotifier, - NvSubMemFormat); - - /* If it's likely CPU access to the buffer will occur often, - * keep the cpu_mem around to avoid repeated allocs. - */ - if (obj->Usage != GL_DYNAMIC_DRAW_ARB) { - - nouveau_mem_free(ctx, nbo->cpu_mem); - nbo->cpu_mem = NULL; - } - } -#endif + assert(obj->Pointer); + nouveau_bo_unmap(ctx, obj); obj->Pointer = NULL; return GL_TRUE; } diff --git a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.h b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.h index fccc349b836..932450fd877 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.h @@ -4,24 +4,74 @@ #include "mtypes.h" #include "nouveau_buffers.h" +#define NOUVEAU_BO_VRAM_OK (NOUVEAU_MEM_FB | NOUVEAU_MEM_FB_ACCEPTABLE) +#define NOUVEAU_BO_AGP_OK (NOUVEAU_MEM_AGP | NOUVEAU_MEM_AGP_ACCEPTABLE) + +typedef struct nouveau_bufferobj_region_t { + uint32_t start; + uint32_t end; +} nouveau_bufferobj_region; + +typedef struct nouveau_bufferobj_dirty_t { + nouveau_bufferobj_region *dirty; + int nr_dirty; +} nouveau_bufferobj_dirty; + typedef struct nouveau_buffer_object_t { /* Base class, must be first */ struct gl_buffer_object mesa; + GLboolean mapped; + GLenum usage; + /* Memory used for GPU access to the buffer*/ + GLuint gpu_mem_flags; nouveau_mem * gpu_mem; - /* Buffer has been dirtied by the GPU */ - GLboolean gpu_dirty; + nouveau_bufferobj_dirty gpu_dirty; /* Memory used for CPU access to the buffer */ + GLuint cpu_mem_flags; nouveau_mem * cpu_mem; - /* Buffer has possibly been dirtied by the CPU */ - GLboolean cpu_dirty; + GLvoid * cpu_mem_sys; + nouveau_bufferobj_dirty cpu_dirty; } nouveau_buffer_object; -extern uint32_t nouveau_bufferobj_gpu_ref(GLcontext *ctx, GLenum access, - struct gl_buffer_object *obj); +extern void +nouveau_bo_init_storage(GLcontext *ctx, GLuint valid_gpu_access, + GLsizeiptrARB size, const GLvoid *data, GLenum usage, + struct gl_buffer_object *bo); + +extern GLboolean +nouveau_bo_move_in(GLcontext *ctx, struct gl_buffer_object *bo); + +extern GLboolean +nouveau_bo_move_out(GLcontext *ctx, struct gl_buffer_object *bo); + +extern void * +nouveau_bo_map(GLcontext *ctx, GLenum usage, struct gl_buffer_object *bo); + +extern void +nouveau_bo_unmap(GLcontext *ctx, struct gl_buffer_object *bo); + +extern uint32_t +nouveau_bo_gpu_ref(GLcontext *ctx, struct gl_buffer_object *bo); + +extern void +nouveau_bo_dirty_linear(GLcontext *ctx, GLboolean on_card, + uint32_t offset, uint32_t size, + struct gl_buffer_object *bo); + +extern void +nouveau_bo_dirty_all(GLcontext *ctx, GLboolean on_card, + struct gl_buffer_object *bo); + +extern GLuint +nouveau_bo_upload_dirty(GLcontext *ctx, struct gl_buffer_object *bo); + +extern GLuint +nouveau_bo_download_dirty(GLcontext *ctx, struct gl_buffer_object *bo); -extern void nouveauInitBufferObjects(GLcontext *ctx); +extern void +nouveauInitBufferObjects(GLcontext *ctx); #endif diff --git a/src/mesa/drivers/dri/nouveau/nv30_fragprog.c b/src/mesa/drivers/dri/nouveau/nv30_fragprog.c index 02bd8014cc1..f868ec92931 100644 --- a/src/mesa/drivers/dri/nouveau/nv30_fragprog.c +++ b/src/mesa/drivers/dri/nouveau/nv30_fragprog.c @@ -32,14 +32,13 @@ NV30FPUploadToHW(GLcontext *ctx, nouveauShader *nvs) GL_ARRAY_BUFFER_ARB); /* Should use STATIC_DRAW_ARB if shader doesn't use changable params */ - ctx->Driver.BufferData(ctx, GL_ARRAY_BUFFER_ARB, + nouveau_bo_init_storage(ctx, NOUVEAU_BO_VRAM_OK, nvs->program_size * sizeof(uint32_t), (const GLvoid *)nvs->program, GL_DYNAMIC_DRAW_ARB, nvs->program_buffer); - offset = nouveau_bufferobj_gpu_ref(ctx, GL_READ_ONLY_ARB, - nvs->program_buffer); + offset = nouveau_bo_gpu_ref(ctx, nvs->program_buffer); /* Not using state cache here, updated programs at the same address don't * seem to take effect unless the ACTIVE_PROGRAM method is called again. -- 2.30.2