X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fintel_buffer_objects.c;h=56da2da08a8dd2e76f374f5e65d45967e09b09de;hb=e36f0878cf391c2ae4a12f7fed22c64b9d215100;hp=0b4782b894d1fd7d28d7c192d5728ae3888551aa;hpb=dd54558d318d348033c783290333feb621d8d1b4;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c b/src/mesa/drivers/dri/i965/intel_buffer_objects.c index 0b4782b894d..56da2da08a8 100644 --- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c @@ -1,30 +1,33 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. +/* + * Copyright 2003 VMware, Inc. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to + * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ + */ +/** + * @file intel_buffer_objects.c + * + * This provides core GL buffer object functionality. + */ #include "main/imports.h" #include "main/mtypes.h" @@ -35,26 +38,82 @@ #include "intel_blit.h" #include "intel_buffer_objects.h" #include "intel_batchbuffer.h" -#include "intel_fbo.h" -#include "intel_mipmap_tree.h" -#include "intel_regions.h" -#include "brw_context.h" +/** + * Map a buffer object; issue performance warnings if mapping causes stalls. + * + * This matches the drm_intel_bo_map API, but takes an additional human-readable + * name for the buffer object to use in the performance debug message. + */ +int +brw_bo_map(struct brw_context *brw, + drm_intel_bo *bo, int write_enable, + const char *bo_name) +{ + if (likely(!brw->perf_debug) || !drm_intel_bo_busy(bo)) + return drm_intel_bo_map(bo, write_enable); -static GLboolean -intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj); + double start_time = get_time(); + + int ret = drm_intel_bo_map(bo, write_enable); + + perf_debug("CPU mapping a busy %s BO stalled and took %.03f ms.\n", + bo_name, (get_time() - start_time) * 1000); + + return ret; +} + +int +brw_bo_map_gtt(struct brw_context *brw, drm_intel_bo *bo, const char *bo_name) +{ + if (likely(!brw->perf_debug) || !drm_intel_bo_busy(bo)) + return drm_intel_gem_bo_map_gtt(bo); + + double start_time = get_time(); + + int ret = drm_intel_gem_bo_map_gtt(bo); + + perf_debug("GTT mapping a busy %s BO stalled and took %.03f ms.\n", + bo_name, (get_time() - start_time) * 1000); + + return ret; +} + +static void +mark_buffer_gpu_usage(struct intel_buffer_object *intel_obj, + uint32_t offset, uint32_t size) +{ + intel_obj->gpu_active_start = MIN2(intel_obj->gpu_active_start, offset); + intel_obj->gpu_active_end = MAX2(intel_obj->gpu_active_end, offset + size); +} + +static void +mark_buffer_inactive(struct intel_buffer_object *intel_obj) +{ + intel_obj->gpu_active_start = ~0; + intel_obj->gpu_active_end = 0; +} /** Allocates a new drm_intel_bo to store the data for the buffer object. */ static void -intel_bufferobj_alloc_buffer(struct brw_context *brw, - struct intel_buffer_object *intel_obj) +alloc_buffer_object(struct brw_context *brw, + struct intel_buffer_object *intel_obj) { intel_obj->buffer = drm_intel_bo_alloc(brw->bufmgr, "bufferobj", intel_obj->Base.Size, 64); /* the buffer might be bound as a uniform buffer, need to update it */ - brw->state.dirty.brw |= BRW_NEW_UNIFORM_BUFFER; + if (intel_obj->Base.UsageHistory & USAGE_UNIFORM_BUFFER) + brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER; + if (intel_obj->Base.UsageHistory & USAGE_SHADER_STORAGE_BUFFER) + brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER; + if (intel_obj->Base.UsageHistory & USAGE_TEXTURE_BUFFER) + brw->ctx.NewDriverState |= BRW_NEW_TEXTURE_BUFFER; + if (intel_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER) + brw->ctx.NewDriverState |= BRW_NEW_ATOMIC_BUFFER; + + mark_buffer_inactive(intel_obj); } static void @@ -62,21 +121,27 @@ release_buffer(struct intel_buffer_object *intel_obj) { drm_intel_bo_unreference(intel_obj->buffer); intel_obj->buffer = NULL; - intel_obj->offset = 0; } /** + * The NewBufferObject() driver hook. + * + * Allocates a new intel_buffer_object structure and initializes it. + * * There is some duplication between mesa's bufferobjects and our * bufmgr buffers. Both have an integer handle and a hashtable to * lookup an opaque structure. It would be nice if the handles and * internal structure where somehow shared. */ static struct gl_buffer_object * -intel_bufferobj_alloc(struct gl_context * ctx, GLuint name, GLenum target) +brw_new_buffer_object(struct gl_context * ctx, GLuint name) { struct intel_buffer_object *obj = CALLOC_STRUCT(intel_buffer_object); + if (!obj) { + _mesa_error_no_memory(__func__); + } - _mesa_initialize_buffer_object(ctx, &obj->Base, name, target); + _mesa_initialize_buffer_object(ctx, &obj->Base, name); obj->buffer = NULL; @@ -84,11 +149,12 @@ intel_bufferobj_alloc(struct gl_context * ctx, GLuint name, GLenum target) } /** - * Deallocate/free a vertex/pixel buffer object. - * Called via glDeleteBuffersARB(). + * The DeleteBuffer() driver hook. + * + * Deletes a single OpenGL buffer object. Used by glDeleteBuffers(). */ static void -intel_bufferobj_free(struct gl_context * ctx, struct gl_buffer_object *obj) +brw_delete_buffer(struct gl_context * ctx, struct gl_buffer_object *obj) { struct intel_buffer_object *intel_obj = intel_buffer_object(obj); @@ -98,28 +164,31 @@ intel_bufferobj_free(struct gl_context * ctx, struct gl_buffer_object *obj) * to the spec, but Mesa doesn't do UnmapBuffer for us at context destroy * (though it does if you call glDeleteBuffers) */ - if (obj->Pointer) - intel_bufferobj_unmap(ctx, obj); + _mesa_buffer_unmap_all_mappings(ctx, obj); drm_intel_bo_unreference(intel_obj->buffer); - free(intel_obj); + _mesa_delete_buffer_object(ctx, obj); } - /** - * Allocate space for and store data in a buffer object. Any data that was - * previously stored in the buffer object is lost. If data is NULL, - * memory will be allocated, but no copy will occur. - * Called via ctx->Driver.BufferData(). + * The BufferData() driver hook. + * + * Implements glBufferData(), which recreates a buffer object's data store + * and populates it with the given data, if present. + * + * Any data that was previously stored in the buffer object is lost. + * * \return true for success, false if out of memory */ static GLboolean -intel_bufferobj_data(struct gl_context * ctx, - GLenum target, - GLsizeiptrARB size, - const GLvoid * data, - GLenum usage, struct gl_buffer_object *obj) +brw_buffer_data(struct gl_context *ctx, + GLenum target, + GLsizeiptrARB size, + const GLvoid *data, + GLenum usage, + GLbitfield storageFlags, + struct gl_buffer_object *obj) { struct brw_context *brw = brw_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); @@ -130,14 +199,16 @@ intel_bufferobj_data(struct gl_context * ctx, intel_obj->Base.Size = size; intel_obj->Base.Usage = usage; + intel_obj->Base.StorageFlags = storageFlags; - assert(!obj->Pointer); /* Mesa should have unmapped it */ + assert(!obj->Mappings[MAP_USER].Pointer); /* Mesa should have unmapped it */ + assert(!obj->Mappings[MAP_INTERNAL].Pointer); if (intel_obj->buffer != NULL) release_buffer(intel_obj); if (size != 0) { - intel_bufferobj_alloc_buffer(brw, intel_obj); + alloc_buffer_object(brw, intel_obj); if (!intel_obj->buffer) return false; @@ -150,16 +221,20 @@ intel_bufferobj_data(struct gl_context * ctx, /** - * Replace data in a subrange of buffer object. If the data range - * specified by size + offset extends beyond the end of the buffer or - * if data is NULL, no copy is performed. - * Called via glBufferSubDataARB(). + * The BufferSubData() driver hook. + * + * Implements glBufferSubData(), which replaces a portion of the data in a + * buffer object. + * + * If the data range specified by (size + offset) extends beyond the end of + * the buffer or if data is NULL, no copy is performed. */ static void -intel_bufferobj_subdata(struct gl_context * ctx, - GLintptrARB offset, - GLsizeiptrARB size, - const GLvoid * data, struct gl_buffer_object *obj) +brw_buffer_subdata(struct gl_context *ctx, + GLintptrARB offset, + GLsizeiptrARB size, + const GLvoid *data, + struct gl_buffer_object *obj) { struct brw_context *brw = brw_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); @@ -170,20 +245,46 @@ intel_bufferobj_subdata(struct gl_context * ctx, assert(intel_obj); + /* See if we can unsynchronized write the data into the user's BO. This + * avoids GPU stalls in unfortunately common user patterns (uploading + * sequentially into a BO, with draw calls in between each upload). + * + * Once we've hit this path, we mark this GL BO as preferring stalling to + * blits, so that we can hopefully hit this path again in the future + * (otherwise, an app that might occasionally stall but mostly not will end + * up with blitting all the time, at the cost of bandwidth) + */ + if (offset + size <= intel_obj->gpu_active_start || + intel_obj->gpu_active_end <= offset) { + if (brw->has_llc) { + drm_intel_gem_bo_map_unsynchronized(intel_obj->buffer); + memcpy(intel_obj->buffer->virtual + offset, data, size); + drm_intel_bo_unmap(intel_obj->buffer); + + if (intel_obj->gpu_active_end > intel_obj->gpu_active_start) + intel_obj->prefer_stall_to_blit = true; + return; + } else { + perf_debug("BufferSubData could be unsynchronized, but !LLC doesn't support it yet\n"); + } + } + busy = drm_intel_bo_busy(intel_obj->buffer) || drm_intel_bo_references(brw->batch.bo, intel_obj->buffer); if (busy) { if (size == intel_obj->Base.Size) { - /* Replace the current busy bo with fresh data. */ + /* Replace the current busy bo so the subdata doesn't stall. */ drm_intel_bo_unreference(intel_obj->buffer); - intel_bufferobj_alloc_buffer(brw, intel_obj); - drm_intel_bo_subdata(intel_obj->buffer, 0, size, data); - } else { - perf_debug("Using a blit copy to avoid stalling on %ldb " - "glBufferSubData() to a busy buffer object.\n", - (long)size); + alloc_buffer_object(brw, intel_obj); + } else if (!intel_obj->prefer_stall_to_blit) { + perf_debug("Using a blit copy to avoid stalling on " + "glBufferSubData(%ld, %ld) (%ldkb) to a busy " + "(%d-%d) buffer object.\n", + (long)offset, (long)offset + size, (long)(size/1024), + intel_obj->gpu_active_start, + intel_obj->gpu_active_end); drm_intel_bo *temp_bo = drm_intel_bo_alloc(brw->bufmgr, "subdata temp", size, 64); @@ -195,21 +296,35 @@ intel_bufferobj_subdata(struct gl_context * ctx, size); drm_intel_bo_unreference(temp_bo); + return; + } else { + perf_debug("Stalling on glBufferSubData(%ld, %ld) (%ldkb) to a busy " + "(%d-%d) buffer object. Use glMapBufferRange() to " + "avoid this.\n", + (long)offset, (long)offset + size, (long)(size/1024), + intel_obj->gpu_active_start, + intel_obj->gpu_active_end); + intel_batchbuffer_flush(brw); } - } else { - drm_intel_bo_subdata(intel_obj->buffer, offset, size, data); } + + drm_intel_bo_subdata(intel_obj->buffer, offset, size, data); + mark_buffer_inactive(intel_obj); } /** - * Called via glGetBufferSubDataARB(). + * The GetBufferSubData() driver hook. + * + * Implements glGetBufferSubData(), which copies a subrange of a buffer + * object into user memory. */ static void -intel_bufferobj_get_subdata(struct gl_context * ctx, - GLintptrARB offset, - GLsizeiptrARB size, - GLvoid * data, struct gl_buffer_object *obj) +brw_get_buffer_subdata(struct gl_context *ctx, + GLintptrARB offset, + GLsizeiptrARB size, + GLvoid *data, + struct gl_buffer_object *obj) { struct intel_buffer_object *intel_obj = intel_buffer_object(obj); struct brw_context *brw = brw_context(ctx); @@ -219,12 +334,15 @@ intel_bufferobj_get_subdata(struct gl_context * ctx, intel_batchbuffer_flush(brw); } drm_intel_bo_get_subdata(intel_obj->buffer, offset, size, data); -} + mark_buffer_inactive(intel_obj); +} /** - * Called via glMapBufferRange and glMapBuffer + * The MapBufferRange() driver hook. + * + * This implements both glMapBufferRange() and glMapBuffer(). * * The goal of this extension is to allow apps to accumulate their rendering * at the same time as they accumulate their buffer object. Without it, @@ -240,9 +358,10 @@ intel_bufferobj_get_subdata(struct gl_context * ctx, * and blit it into the real BO at unmap time. */ static void * -intel_bufferobj_map_range(struct gl_context * ctx, - GLintptr offset, GLsizeiptr length, - GLbitfield access, struct gl_buffer_object *obj) +brw_map_buffer_range(struct gl_context *ctx, + GLintptr offset, GLsizeiptr length, + GLbitfield access, struct gl_buffer_object *obj, + gl_map_buffer_index index) { struct brw_context *brw = brw_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); @@ -252,12 +371,12 @@ intel_bufferobj_map_range(struct gl_context * ctx, /* _mesa_MapBufferRange (GL entrypoint) sets these, but the vbo module also * internally uses our functions directly. */ - obj->Offset = offset; - obj->Length = length; - obj->AccessFlags = access; + obj->Mappings[index].Offset = offset; + obj->Mappings[index].Length = length; + obj->Mappings[index].AccessFlags = access; if (intel_obj->buffer == NULL) { - obj->Pointer = NULL; + obj->Mappings[index].Pointer = NULL; return NULL; } @@ -273,16 +392,16 @@ intel_bufferobj_map_range(struct gl_context * ctx, if (drm_intel_bo_references(brw->batch.bo, intel_obj->buffer)) { if (access & GL_MAP_INVALIDATE_BUFFER_BIT) { drm_intel_bo_unreference(intel_obj->buffer); - intel_bufferobj_alloc_buffer(brw, intel_obj); + alloc_buffer_object(brw, intel_obj); } else { perf_debug("Stalling on the GPU for mapping a busy buffer " "object\n"); - intel_flush(ctx); + intel_batchbuffer_flush(brw); } } else if (drm_intel_bo_busy(intel_obj->buffer) && (access & GL_MAP_INVALIDATE_BUFFER_BIT)) { drm_intel_bo_unreference(intel_obj->buffer); - intel_bufferobj_alloc_buffer(brw, intel_obj); + alloc_buffer_object(brw, intel_obj); } } @@ -290,447 +409,242 @@ intel_bufferobj_map_range(struct gl_context * ctx, * doesn't require the current contents of that range, make a new * BO, and we'll copy what they put in there out at unmap or * FlushRange time. + * + * That is, unless they're looking for a persistent mapping -- we would + * need to do blits in the MemoryBarrier call, and it's easier to just do a + * GPU stall and do a mapping. */ - if ((access & GL_MAP_INVALIDATE_RANGE_BIT) && + if (!(access & (GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_PERSISTENT_BIT)) && + (access & GL_MAP_INVALIDATE_RANGE_BIT) && drm_intel_bo_busy(intel_obj->buffer)) { - if (access & GL_MAP_FLUSH_EXPLICIT_BIT) { - intel_obj->range_map_buffer = malloc(length); - obj->Pointer = intel_obj->range_map_buffer; + /* Ensure that the base alignment of the allocation meets the alignment + * guarantees the driver has advertised to the application. + */ + const unsigned alignment = ctx->Const.MinMapBufferAlignment; + + intel_obj->map_extra[index] = (uintptr_t) offset % alignment; + intel_obj->range_map_bo[index] = drm_intel_bo_alloc(brw->bufmgr, + "BO blit temp", + length + + intel_obj->map_extra[index], + alignment); + if (brw->has_llc) { + brw_bo_map(brw, intel_obj->range_map_bo[index], + (access & GL_MAP_WRITE_BIT) != 0, "range-map"); } else { - intel_obj->range_map_bo = drm_intel_bo_alloc(brw->bufmgr, - "range map", - length, 64); - if (!(access & GL_MAP_READ_BIT)) { - drm_intel_gem_bo_map_gtt(intel_obj->range_map_bo); - } else { - drm_intel_bo_map(intel_obj->range_map_bo, - (access & GL_MAP_WRITE_BIT) != 0); - } - obj->Pointer = intel_obj->range_map_bo->virtual; + drm_intel_gem_bo_map_gtt(intel_obj->range_map_bo[index]); } - return obj->Pointer; + obj->Mappings[index].Pointer = + intel_obj->range_map_bo[index]->virtual + intel_obj->map_extra[index]; + return obj->Mappings[index].Pointer; } - if (access & GL_MAP_UNSYNCHRONIZED_BIT) + if (access & GL_MAP_UNSYNCHRONIZED_BIT) { + if (!brw->has_llc && brw->perf_debug && + drm_intel_bo_busy(intel_obj->buffer)) { + perf_debug("MapBufferRange with GL_MAP_UNSYNCHRONIZED_BIT stalling (it's actually synchronized on non-LLC platforms)\n"); + } drm_intel_gem_bo_map_unsynchronized(intel_obj->buffer); - else if (!(access & GL_MAP_READ_BIT)) { + } else if (!brw->has_llc && (!(access & GL_MAP_READ_BIT) || + (access & GL_MAP_PERSISTENT_BIT))) { drm_intel_gem_bo_map_gtt(intel_obj->buffer); + mark_buffer_inactive(intel_obj); } else { - drm_intel_bo_map(intel_obj->buffer, (access & GL_MAP_WRITE_BIT) != 0); + brw_bo_map(brw, intel_obj->buffer, (access & GL_MAP_WRITE_BIT) != 0, + "MapBufferRange"); + mark_buffer_inactive(intel_obj); } - obj->Pointer = intel_obj->buffer->virtual + offset; - return obj->Pointer; + obj->Mappings[index].Pointer = intel_obj->buffer->virtual + offset; + return obj->Mappings[index].Pointer; } -/* Ideally we'd use a BO to avoid taking up cache space for the temporary +/** + * The FlushMappedBufferRange() driver hook. + * + * Implements glFlushMappedBufferRange(), which signifies that modifications + * have been made to a range of a mapped buffer, and it should be flushed. + * + * This is only used for buffers mapped with GL_MAP_FLUSH_EXPLICIT_BIT. + * + * Ideally we'd use a BO to avoid taking up cache space for the temporary * data, but FlushMappedBufferRange may be followed by further writes to * the pointer, so we would have to re-map after emitting our blit, which * would defeat the point. */ static void -intel_bufferobj_flush_mapped_range(struct gl_context *ctx, - GLintptr offset, GLsizeiptr length, - struct gl_buffer_object *obj) +brw_flush_mapped_buffer_range(struct gl_context *ctx, + GLintptr offset, GLsizeiptr length, + struct gl_buffer_object *obj, + gl_map_buffer_index index) { struct brw_context *brw = brw_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); - drm_intel_bo *temp_bo; - /* Unless we're in the range map using a temporary system buffer, - * there's no work to do. + assert(obj->Mappings[index].AccessFlags & GL_MAP_FLUSH_EXPLICIT_BIT); + + /* If we gave a direct mapping of the buffer instead of using a temporary, + * then there's nothing to do. */ - if (intel_obj->range_map_buffer == NULL) + if (intel_obj->range_map_bo[index] == NULL) return; if (length == 0) return; - temp_bo = drm_intel_bo_alloc(brw->bufmgr, "range map flush", length, 64); - - drm_intel_bo_subdata(temp_bo, 0, length, intel_obj->range_map_buffer); - + /* Note that we're not unmapping our buffer while executing the blit. We + * need to have a mapping still at the end of this call, since the user + * gets to make further modifications and glFlushMappedBufferRange() calls. + * This is safe, because: + * + * - On LLC platforms, we're using a CPU mapping that's coherent with the + * GPU (except for the render caches), so the kernel doesn't need to do + * any flushing work for us except for what happens at batch exec time + * anyway. + * + * - On non-LLC platforms, we're using a GTT mapping that writes directly + * to system memory (except for the chipset cache that gets flushed at + * batch exec time). + * + * In both cases we don't need to stall for the previous blit to complete + * so we can re-map (and we definitely don't want to, since that would be + * slow): If the user edits a part of their buffer that's previously been + * blitted, then our lack of synchoronization is fine, because either + * they'll get some too-new data in the first blit and not do another blit + * of that area (but in that case the results are undefined), or they'll do + * another blit of that area and the complete newer data will land the + * second time. + */ intel_emit_linear_blit(brw, - intel_obj->buffer, obj->Offset + offset, - temp_bo, 0, + intel_obj->buffer, + obj->Mappings[index].Offset + offset, + intel_obj->range_map_bo[index], + intel_obj->map_extra[index] + offset, length); - - drm_intel_bo_unreference(temp_bo); + mark_buffer_gpu_usage(intel_obj, + obj->Mappings[index].Offset + offset, + length); } /** - * Called via glUnmapBuffer(). + * The UnmapBuffer() driver hook. + * + * Implements glUnmapBuffer(). */ static GLboolean -intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj) +brw_unmap_buffer(struct gl_context *ctx, + struct gl_buffer_object *obj, + gl_map_buffer_index index) { struct brw_context *brw = brw_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); assert(intel_obj); - assert(obj->Pointer); - if (intel_obj->range_map_buffer != NULL) { - /* Since we've emitted some blits to buffers that will (likely) be used - * in rendering operations in other cache domains in this batch, emit a - * flush. Once again, we wish for a domain tracker in libdrm to cover - * usage inside of a batchbuffer. - */ - intel_batchbuffer_emit_mi_flush(brw); - free(intel_obj->range_map_buffer); - intel_obj->range_map_buffer = NULL; - } else if (intel_obj->range_map_bo != NULL) { - drm_intel_bo_unmap(intel_obj->range_map_bo); - - intel_emit_linear_blit(brw, - intel_obj->buffer, obj->Offset, - intel_obj->range_map_bo, 0, - obj->Length); + assert(obj->Mappings[index].Pointer); + if (intel_obj->range_map_bo[index] != NULL) { + drm_intel_bo_unmap(intel_obj->range_map_bo[index]); + + if (!(obj->Mappings[index].AccessFlags & GL_MAP_FLUSH_EXPLICIT_BIT)) { + intel_emit_linear_blit(brw, + intel_obj->buffer, obj->Mappings[index].Offset, + intel_obj->range_map_bo[index], + intel_obj->map_extra[index], + obj->Mappings[index].Length); + mark_buffer_gpu_usage(intel_obj, obj->Mappings[index].Offset, + obj->Mappings[index].Length); + } /* Since we've emitted some blits to buffers that will (likely) be used * in rendering operations in other cache domains in this batch, emit a * flush. Once again, we wish for a domain tracker in libdrm to cover * usage inside of a batchbuffer. */ - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); - drm_intel_bo_unreference(intel_obj->range_map_bo); - intel_obj->range_map_bo = NULL; + drm_intel_bo_unreference(intel_obj->range_map_bo[index]); + intel_obj->range_map_bo[index] = NULL; } else if (intel_obj->buffer != NULL) { drm_intel_bo_unmap(intel_obj->buffer); } - obj->Pointer = NULL; - obj->Offset = 0; - obj->Length = 0; + obj->Mappings[index].Pointer = NULL; + obj->Mappings[index].Offset = 0; + obj->Mappings[index].Length = 0; return true; } +/** + * Gets a pointer to the object's BO, and marks the given range as being used + * on the GPU. + * + * Anywhere that uses buffer objects in the pipeline should be using this to + * mark the range of the buffer that is being accessed by the pipeline. + */ drm_intel_bo * intel_bufferobj_buffer(struct brw_context *brw, struct intel_buffer_object *intel_obj, - GLuint flag) + uint32_t offset, uint32_t size) { + /* This is needed so that things like transform feedback and texture buffer + * objects that need a BO but don't want to check that they exist for + * draw-time validation can just always get a BO from a GL buffer object. + */ if (intel_obj->buffer == NULL) - intel_bufferobj_alloc_buffer(brw, intel_obj); - - return intel_obj->buffer; -} - -#define INTEL_UPLOAD_SIZE (64*1024) - -void -intel_upload_finish(struct brw_context *brw) -{ - if (!brw->upload.bo) - return; - - if (brw->upload.buffer_len) { - drm_intel_bo_subdata(brw->upload.bo, - brw->upload.buffer_offset, - brw->upload.buffer_len, - brw->upload.buffer); - brw->upload.buffer_len = 0; - } - - drm_intel_bo_unreference(brw->upload.bo); - brw->upload.bo = NULL; -} - -static void wrap_buffers(struct brw_context *brw, GLuint size) -{ - intel_upload_finish(brw); - - if (size < INTEL_UPLOAD_SIZE) - size = INTEL_UPLOAD_SIZE; - - brw->upload.bo = drm_intel_bo_alloc(brw->bufmgr, "upload", size, 0); - brw->upload.offset = 0; -} - -void intel_upload_data(struct brw_context *brw, - const void *ptr, GLuint size, GLuint align, - drm_intel_bo **return_bo, - GLuint *return_offset) -{ - GLuint base, delta; - - base = (brw->upload.offset + align - 1) / align * align; - if (brw->upload.bo == NULL || base + size > brw->upload.bo->size) { - wrap_buffers(brw, size); - base = 0; - } - - drm_intel_bo_reference(brw->upload.bo); - *return_bo = brw->upload.bo; - *return_offset = base; - - delta = base - brw->upload.offset; - if (brw->upload.buffer_len && - brw->upload.buffer_len + delta + size > sizeof(brw->upload.buffer)) - { - drm_intel_bo_subdata(brw->upload.bo, - brw->upload.buffer_offset, - brw->upload.buffer_len, - brw->upload.buffer); - brw->upload.buffer_len = 0; - } - - if (size < sizeof(brw->upload.buffer)) - { - if (brw->upload.buffer_len == 0) - brw->upload.buffer_offset = base; - else - brw->upload.buffer_len += delta; - - memcpy(brw->upload.buffer + brw->upload.buffer_len, ptr, size); - brw->upload.buffer_len += size; - } - else - { - drm_intel_bo_subdata(brw->upload.bo, base, size, ptr); - } - - brw->upload.offset = base + size; -} - -void *intel_upload_map(struct brw_context *brw, GLuint size, GLuint align) -{ - GLuint base, delta; - char *ptr; - - base = (brw->upload.offset + align - 1) / align * align; - if (brw->upload.bo == NULL || base + size > brw->upload.bo->size) { - wrap_buffers(brw, size); - base = 0; - } - - delta = base - brw->upload.offset; - if (brw->upload.buffer_len && - brw->upload.buffer_len + delta + size > sizeof(brw->upload.buffer)) - { - drm_intel_bo_subdata(brw->upload.bo, - brw->upload.buffer_offset, - brw->upload.buffer_len, - brw->upload.buffer); - brw->upload.buffer_len = 0; - } - - if (size <= sizeof(brw->upload.buffer)) { - if (brw->upload.buffer_len == 0) - brw->upload.buffer_offset = base; - else - brw->upload.buffer_len += delta; + alloc_buffer_object(brw, intel_obj); - ptr = brw->upload.buffer + brw->upload.buffer_len; - brw->upload.buffer_len += size; - } else - ptr = malloc(size); - - return ptr; -} - -void intel_upload_unmap(struct brw_context *brw, - const void *ptr, GLuint size, GLuint align, - drm_intel_bo **return_bo, - GLuint *return_offset) -{ - GLuint base; - - base = (brw->upload.offset + align - 1) / align * align; - if (size > sizeof(brw->upload.buffer)) { - drm_intel_bo_subdata(brw->upload.bo, base, size, ptr); - free((void*)ptr); - } + mark_buffer_gpu_usage(intel_obj, offset, size); - drm_intel_bo_reference(brw->upload.bo); - *return_bo = brw->upload.bo; - *return_offset = base; - - brw->upload.offset = base + size; -} - -drm_intel_bo * -intel_bufferobj_source(struct brw_context *brw, - struct intel_buffer_object *intel_obj, - GLuint align, GLuint *offset) -{ - *offset = intel_obj->offset; return intel_obj->buffer; } +/** + * The CopyBufferSubData() driver hook. + * + * Implements glCopyBufferSubData(), which copies a portion of one buffer + * object's data to another. Independent source and destination offsets + * are allowed. + */ static void -intel_bufferobj_copy_subdata(struct gl_context *ctx, - struct gl_buffer_object *src, - struct gl_buffer_object *dst, - GLintptr read_offset, GLintptr write_offset, - GLsizeiptr size) +brw_copy_buffer_subdata(struct gl_context *ctx, + struct gl_buffer_object *src, + struct gl_buffer_object *dst, + GLintptr read_offset, GLintptr write_offset, + GLsizeiptr size) { struct brw_context *brw = brw_context(ctx); struct intel_buffer_object *intel_src = intel_buffer_object(src); struct intel_buffer_object *intel_dst = intel_buffer_object(dst); drm_intel_bo *src_bo, *dst_bo; - GLuint src_offset; if (size == 0) return; - dst_bo = intel_bufferobj_buffer(brw, intel_dst, INTEL_WRITE_PART); - src_bo = intel_bufferobj_source(brw, intel_src, 64, &src_offset); + dst_bo = intel_bufferobj_buffer(brw, intel_dst, write_offset, size); + src_bo = intel_bufferobj_buffer(brw, intel_src, read_offset, size); intel_emit_linear_blit(brw, dst_bo, write_offset, - src_bo, read_offset + src_offset, size); + src_bo, read_offset, size); /* Since we've emitted some blits to buffers that will (likely) be used * in rendering operations in other cache domains in this batch, emit a * flush. Once again, we wish for a domain tracker in libdrm to cover * usage inside of a batchbuffer. */ - intel_batchbuffer_emit_mi_flush(brw); -} - -static GLenum -intel_buffer_purgeable(drm_intel_bo *buffer) -{ - int retained = 0; - - if (buffer != NULL) - retained = drm_intel_bo_madvise (buffer, I915_MADV_DONTNEED); - - return retained ? GL_VOLATILE_APPLE : GL_RELEASED_APPLE; -} - -static GLenum -intel_buffer_object_purgeable(struct gl_context * ctx, - struct gl_buffer_object *obj, - GLenum option) -{ - struct intel_buffer_object *intel_obj = intel_buffer_object (obj); - - if (intel_obj->buffer != NULL) - return intel_buffer_purgeable(intel_obj->buffer); - - if (option == GL_RELEASED_APPLE) { - return GL_RELEASED_APPLE; - } else { - /* XXX Create the buffer and madvise(MADV_DONTNEED)? */ - struct brw_context *brw = brw_context(ctx); - drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_obj, INTEL_READ); - - return intel_buffer_purgeable(bo); - } -} - -static GLenum -intel_texture_object_purgeable(struct gl_context * ctx, - struct gl_texture_object *obj, - GLenum option) -{ - struct intel_texture_object *intel; - - (void) ctx; - (void) option; - - intel = intel_texture_object(obj); - if (intel->mt == NULL || intel->mt->region == NULL) - return GL_RELEASED_APPLE; - - return intel_buffer_purgeable(intel->mt->region->bo); -} - -static GLenum -intel_render_object_purgeable(struct gl_context * ctx, - struct gl_renderbuffer *obj, - GLenum option) -{ - struct intel_renderbuffer *intel; - - (void) ctx; - (void) option; - - intel = intel_renderbuffer(obj); - if (intel->mt == NULL) - return GL_RELEASED_APPLE; - - return intel_buffer_purgeable(intel->mt->region->bo); -} - -static GLenum -intel_buffer_unpurgeable(drm_intel_bo *buffer) -{ - int retained; - - retained = 0; - if (buffer != NULL) - retained = drm_intel_bo_madvise (buffer, I915_MADV_WILLNEED); - - return retained ? GL_RETAINED_APPLE : GL_UNDEFINED_APPLE; -} - -static GLenum -intel_buffer_object_unpurgeable(struct gl_context * ctx, - struct gl_buffer_object *obj, - GLenum option) -{ - (void) ctx; - (void) option; - - return intel_buffer_unpurgeable(intel_buffer_object (obj)->buffer); -} - -static GLenum -intel_texture_object_unpurgeable(struct gl_context * ctx, - struct gl_texture_object *obj, - GLenum option) -{ - struct intel_texture_object *intel; - - (void) ctx; - (void) option; - - intel = intel_texture_object(obj); - if (intel->mt == NULL || intel->mt->region == NULL) - return GL_UNDEFINED_APPLE; - - return intel_buffer_unpurgeable(intel->mt->region->bo); -} - -static GLenum -intel_render_object_unpurgeable(struct gl_context * ctx, - struct gl_renderbuffer *obj, - GLenum option) -{ - struct intel_renderbuffer *intel; - - (void) ctx; - (void) option; - - intel = intel_renderbuffer(obj); - if (intel->mt == NULL) - return GL_UNDEFINED_APPLE; - - return intel_buffer_unpurgeable(intel->mt->region->bo); + brw_emit_mi_flush(brw); } void intelInitBufferObjectFuncs(struct dd_function_table *functions) { - functions->NewBufferObject = intel_bufferobj_alloc; - functions->DeleteBuffer = intel_bufferobj_free; - functions->BufferData = intel_bufferobj_data; - functions->BufferSubData = intel_bufferobj_subdata; - functions->GetBufferSubData = intel_bufferobj_get_subdata; - functions->MapBufferRange = intel_bufferobj_map_range; - functions->FlushMappedBufferRange = intel_bufferobj_flush_mapped_range; - functions->UnmapBuffer = intel_bufferobj_unmap; - functions->CopyBufferSubData = intel_bufferobj_copy_subdata; - - functions->BufferObjectPurgeable = intel_buffer_object_purgeable; - functions->TextureObjectPurgeable = intel_texture_object_purgeable; - functions->RenderObjectPurgeable = intel_render_object_purgeable; - - functions->BufferObjectUnpurgeable = intel_buffer_object_unpurgeable; - functions->TextureObjectUnpurgeable = intel_texture_object_unpurgeable; - functions->RenderObjectUnpurgeable = intel_render_object_unpurgeable; + functions->NewBufferObject = brw_new_buffer_object; + functions->DeleteBuffer = brw_delete_buffer; + functions->BufferData = brw_buffer_data; + functions->BufferSubData = brw_buffer_subdata; + functions->GetBufferSubData = brw_get_buffer_subdata; + functions->MapBufferRange = brw_map_buffer_range; + functions->FlushMappedBufferRange = brw_flush_mapped_buffer_range; + functions->UnmapBuffer = brw_unmap_buffer; + functions->CopyBufferSubData = brw_copy_buffer_subdata; }