X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fintel_buffer_objects.c;h=e932badaafe4ccf27b078da25053af30e41a69ce;hb=bdae2ddff89004c199b71cb6a4a306dee616f7f9;hp=f568864f4b4c38b73dbc6a7055a8bccbd169cf87;hpb=3dbba95b72262344b82fba018b7c2c1208754cd2;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c b/src/mesa/drivers/dri/i965/intel_buffer_objects.c index f568864f4b4..e932badaafe 100644 --- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c @@ -1,90 +1,142 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. +/* + * Copyright 2003 VMware, Inc. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to + * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ + */ +/** + * @file intel_buffer_objects.c + * + * This provides core GL buffer object functionality. + */ #include "main/imports.h" #include "main/mtypes.h" #include "main/macros.h" #include "main/bufferobj.h" +#include "brw_context.h" #include "intel_blit.h" #include "intel_buffer_objects.h" #include "intel_batchbuffer.h" -#include "intel_context.h" -#include "intel_fbo.h" -#include "intel_mipmap_tree.h" -#include "intel_regions.h" -#ifndef I915 -#include "brw_context.h" -#endif +static void +mark_buffer_gpu_usage(struct intel_buffer_object *intel_obj, + uint32_t offset, uint32_t size) +{ + intel_obj->gpu_active_start = MIN2(intel_obj->gpu_active_start, offset); + intel_obj->gpu_active_end = MAX2(intel_obj->gpu_active_end, offset + size); +} -static GLboolean -intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj); +static void +mark_buffer_inactive(struct intel_buffer_object *intel_obj) +{ + intel_obj->gpu_active_start = ~0; + intel_obj->gpu_active_end = 0; +} + +static void +mark_buffer_valid_data(struct intel_buffer_object *intel_obj, + uint32_t offset, uint32_t size) +{ + intel_obj->valid_data_start = MIN2(intel_obj->valid_data_start, offset); + intel_obj->valid_data_end = MAX2(intel_obj->valid_data_end, offset + size); +} + +static void +mark_buffer_invalid(struct intel_buffer_object *intel_obj) +{ + intel_obj->valid_data_start = ~0; + intel_obj->valid_data_end = 0; +} -/** Allocates a new drm_intel_bo to store the data for the buffer object. */ +/** Allocates a new brw_bo to store the data for the buffer object. */ static void -intel_bufferobj_alloc_buffer(struct intel_context *intel, - struct intel_buffer_object *intel_obj) +alloc_buffer_object(struct brw_context *brw, + struct intel_buffer_object *intel_obj) { - intel_obj->buffer = drm_intel_bo_alloc(intel->bufmgr, "bufferobj", - intel_obj->Base.Size, 64); + const struct gl_context *ctx = &brw->ctx; + + uint64_t size = intel_obj->Base.Size; + if (ctx->Const.RobustAccess) { + /* Pad out buffer objects with an extra 2kB (half a page). + * + * When pushing UBOs, we need to safeguard against 3DSTATE_CONSTANT_* + * reading out of bounds memory. The application might bind a UBO that's + * smaller than what the program expects. Ideally, we'd bind an extra + * push buffer containing zeros, but we have a limited number of those, + * so it's not always viable. Our only safe option is to pad all buffer + * objects by the maximum push data length, so that it will never read + * past the end of a BO. + * + * This is unfortunate, but it should result in at most 1 extra page, + * which probably isn't too terrible. + */ + size += 64 * 32; /* max read length of 64 256-bit units */ + } + intel_obj->buffer = brw_bo_alloc(brw->bufmgr, "bufferobj", size, 64); -#ifndef I915 /* the buffer might be bound as a uniform buffer, need to update it */ - { - struct brw_context *brw = brw_context(&intel->ctx); - brw->state.dirty.brw |= BRW_NEW_UNIFORM_BUFFER; - } -#endif + if (intel_obj->Base.UsageHistory & USAGE_UNIFORM_BUFFER) + brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER; + if (intel_obj->Base.UsageHistory & USAGE_SHADER_STORAGE_BUFFER) + brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER; + if (intel_obj->Base.UsageHistory & USAGE_TEXTURE_BUFFER) + brw->ctx.NewDriverState |= BRW_NEW_TEXTURE_BUFFER; + if (intel_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER) + brw->ctx.NewDriverState |= BRW_NEW_ATOMIC_BUFFER; + + mark_buffer_inactive(intel_obj); + mark_buffer_invalid(intel_obj); } static void release_buffer(struct intel_buffer_object *intel_obj) { - drm_intel_bo_unreference(intel_obj->buffer); + brw_bo_unreference(intel_obj->buffer); intel_obj->buffer = NULL; - intel_obj->offset = 0; - intel_obj->source = 0; } /** + * The NewBufferObject() driver hook. + * + * Allocates a new intel_buffer_object structure and initializes it. + * * There is some duplication between mesa's bufferobjects and our * bufmgr buffers. Both have an integer handle and a hashtable to * lookup an opaque structure. It would be nice if the handles and * internal structure where somehow shared. */ static struct gl_buffer_object * -intel_bufferobj_alloc(struct gl_context * ctx, GLuint name, GLenum target) +brw_new_buffer_object(struct gl_context * ctx, GLuint name) { struct intel_buffer_object *obj = CALLOC_STRUCT(intel_buffer_object); + if (!obj) { + _mesa_error_no_memory(__func__); + return NULL; + } - _mesa_initialize_buffer_object(ctx, &obj->Base, name, target); + _mesa_initialize_buffer_object(ctx, &obj->Base, name); obj->buffer = NULL; @@ -92,11 +144,12 @@ intel_bufferobj_alloc(struct gl_context * ctx, GLuint name, GLenum target) } /** - * Deallocate/free a vertex/pixel buffer object. - * Called via glDeleteBuffersARB(). + * The DeleteBuffer() driver hook. + * + * Deletes a single OpenGL buffer object. Used by glDeleteBuffers(). */ static void -intel_bufferobj_free(struct gl_context * ctx, struct gl_buffer_object *obj) +brw_delete_buffer(struct gl_context * ctx, struct gl_buffer_object *obj) { struct intel_buffer_object *intel_obj = intel_buffer_object(obj); @@ -106,71 +159,58 @@ intel_bufferobj_free(struct gl_context * ctx, struct gl_buffer_object *obj) * to the spec, but Mesa doesn't do UnmapBuffer for us at context destroy * (though it does if you call glDeleteBuffers) */ - if (obj->Pointer) - intel_bufferobj_unmap(ctx, obj); + _mesa_buffer_unmap_all_mappings(ctx, obj); - free(intel_obj->sys_buffer); - - drm_intel_bo_unreference(intel_obj->buffer); - free(intel_obj); + brw_bo_unreference(intel_obj->buffer); + _mesa_delete_buffer_object(ctx, obj); } - /** - * Allocate space for and store data in a buffer object. Any data that was - * previously stored in the buffer object is lost. If data is NULL, - * memory will be allocated, but no copy will occur. - * Called via ctx->Driver.BufferData(). + * The BufferData() driver hook. + * + * Implements glBufferData(), which recreates a buffer object's data store + * and populates it with the given data, if present. + * + * Any data that was previously stored in the buffer object is lost. + * * \return true for success, false if out of memory */ static GLboolean -intel_bufferobj_data(struct gl_context * ctx, - GLenum target, - GLsizeiptrARB size, - const GLvoid * data, - GLenum usage, struct gl_buffer_object *obj) +brw_buffer_data(struct gl_context *ctx, + GLenum target, + GLsizeiptrARB size, + const GLvoid *data, + GLenum usage, + GLbitfield storageFlags, + struct gl_buffer_object *obj) { - struct intel_context *intel = intel_context(ctx); + struct brw_context *brw = brw_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); /* Part of the ABI, but this function doesn't use it. */ -#ifndef I915 (void) target; -#endif intel_obj->Base.Size = size; intel_obj->Base.Usage = usage; + intel_obj->Base.StorageFlags = storageFlags; - assert(!obj->Pointer); /* Mesa should have unmapped it */ + assert(!obj->Mappings[MAP_USER].Pointer); /* Mesa should have unmapped it */ + assert(!obj->Mappings[MAP_INTERNAL].Pointer); if (intel_obj->buffer != NULL) release_buffer(intel_obj); - free(intel_obj->sys_buffer); - intel_obj->sys_buffer = NULL; - if (size != 0) { -#ifdef I915 - /* On pre-965, stick VBOs in system memory, as we're always doing - * swtnl with their contents anyway. - */ - if (target == GL_ARRAY_BUFFER || target == GL_ELEMENT_ARRAY_BUFFER) { - intel_obj->sys_buffer = malloc(size); - if (intel_obj->sys_buffer != NULL) { - if (data != NULL) - memcpy(intel_obj->sys_buffer, data, size); - return true; - } - } -#endif - intel_bufferobj_alloc_buffer(intel, intel_obj); + alloc_buffer_object(brw, intel_obj); if (!intel_obj->buffer) return false; - if (data != NULL) - drm_intel_bo_subdata(intel_obj->buffer, 0, size, data); + if (data != NULL) { + brw_bo_subdata(intel_obj->buffer, 0, size, data); + mark_buffer_valid_data(intel_obj, 0, size); + } } return true; @@ -178,18 +218,22 @@ intel_bufferobj_data(struct gl_context * ctx, /** - * Replace data in a subrange of buffer object. If the data range - * specified by size + offset extends beyond the end of the buffer or - * if data is NULL, no copy is performed. - * Called via glBufferSubDataARB(). + * The BufferSubData() driver hook. + * + * Implements glBufferSubData(), which replaces a portion of the data in a + * buffer object. + * + * If the data range specified by (size + offset) extends beyond the end of + * the buffer or if data is NULL, no copy is performed. */ static void -intel_bufferobj_subdata(struct gl_context * ctx, - GLintptrARB offset, - GLsizeiptrARB size, - const GLvoid * data, struct gl_buffer_object *obj) +brw_buffer_subdata(struct gl_context *ctx, + GLintptrARB offset, + GLsizeiptrARB size, + const GLvoid *data, + struct gl_buffer_object *obj) { - struct intel_context *intel = intel_context(ctx); + struct brw_context *brw = brw_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); bool busy; @@ -198,80 +242,119 @@ intel_bufferobj_subdata(struct gl_context * ctx, assert(intel_obj); - /* If we have a single copy in system memory, update that */ - if (intel_obj->sys_buffer) { - if (intel_obj->source) - release_buffer(intel_obj); - - if (intel_obj->buffer == NULL) { - memcpy((char *)intel_obj->sys_buffer + offset, data, size); - return; - } - - free(intel_obj->sys_buffer); - intel_obj->sys_buffer = NULL; + /* See if we can unsynchronized write the data into the user's BO. This + * avoids GPU stalls in unfortunately common user patterns (uploading + * sequentially into a BO, with draw calls in between each upload). + * + * Once we've hit this path, we mark this GL BO as preferring stalling to + * blits, so that we can hopefully hit this path again in the future + * (otherwise, an app that might occasionally stall but mostly not will end + * up with blitting all the time, at the cost of bandwidth) + */ + if (offset + size <= intel_obj->gpu_active_start || + intel_obj->gpu_active_end <= offset || + offset + size <= intel_obj->valid_data_start || + intel_obj->valid_data_end <= offset) { + void *map = brw_bo_map(brw, intel_obj->buffer, MAP_WRITE | MAP_ASYNC); + memcpy(map + offset, data, size); + brw_bo_unmap(intel_obj->buffer); + + if (intel_obj->gpu_active_end > intel_obj->gpu_active_start) + intel_obj->prefer_stall_to_blit = true; + + mark_buffer_valid_data(intel_obj, offset, size); + return; } - /* Otherwise we need to update the copy in video memory. */ busy = - drm_intel_bo_busy(intel_obj->buffer) || - drm_intel_bo_references(intel->batch.bo, intel_obj->buffer); + brw_bo_busy(intel_obj->buffer) || + brw_batch_references(&brw->batch, intel_obj->buffer); if (busy) { - if (size == intel_obj->Base.Size) { - /* Replace the current busy bo with fresh data. */ - drm_intel_bo_unreference(intel_obj->buffer); - intel_bufferobj_alloc_buffer(intel, intel_obj); - drm_intel_bo_subdata(intel_obj->buffer, 0, size, data); - } else { - perf_debug("Using a blit copy to avoid stalling on %ldb " - "glBufferSubData() to a busy buffer object.\n", - (long)size); - drm_intel_bo *temp_bo = - drm_intel_bo_alloc(intel->bufmgr, "subdata temp", size, 64); - - drm_intel_bo_subdata(temp_bo, 0, size, data); - - intel_emit_linear_blit(intel, + if (size == intel_obj->Base.Size || + (intel_obj->valid_data_start >= offset && + intel_obj->valid_data_end <= offset + size)) { + /* Replace the current busy bo so the subdata doesn't stall. */ + brw_bo_unreference(intel_obj->buffer); + alloc_buffer_object(brw, intel_obj); + } else if (!intel_obj->prefer_stall_to_blit) { + perf_debug("Using a blit copy to avoid stalling on " + "glBufferSubData(%ld, %ld) (%ldkb) to a busy " + "(%d-%d) / valid (%d-%d) buffer object.\n", + (long)offset, (long)offset + size, (long)(size/1024), + intel_obj->gpu_active_start, + intel_obj->gpu_active_end, + intel_obj->valid_data_start, + intel_obj->valid_data_end); + struct brw_bo *temp_bo = + brw_bo_alloc(brw->bufmgr, "subdata temp", size, 64); + + brw_bo_subdata(temp_bo, 0, size, data); + + intel_emit_linear_blit(brw, intel_obj->buffer, offset, temp_bo, 0, size); - drm_intel_bo_unreference(temp_bo); + brw_bo_unreference(temp_bo); + mark_buffer_valid_data(intel_obj, offset, size); + return; + } else { + perf_debug("Stalling on glBufferSubData(%ld, %ld) (%ldkb) to a busy " + "(%d-%d) buffer object. Use glMapBufferRange() to " + "avoid this.\n", + (long)offset, (long)offset + size, (long)(size/1024), + intel_obj->gpu_active_start, + intel_obj->gpu_active_end); + intel_batchbuffer_flush(brw); } - } else { - drm_intel_bo_subdata(intel_obj->buffer, offset, size, data); } + + brw_bo_subdata(intel_obj->buffer, offset, size, data); + mark_buffer_inactive(intel_obj); + mark_buffer_valid_data(intel_obj, offset, size); } /** - * Called via glGetBufferSubDataARB(). + * The GetBufferSubData() driver hook. + * + * Implements glGetBufferSubData(), which copies a subrange of a buffer + * object into user memory. */ static void -intel_bufferobj_get_subdata(struct gl_context * ctx, - GLintptrARB offset, - GLsizeiptrARB size, - GLvoid * data, struct gl_buffer_object *obj) +brw_get_buffer_subdata(struct gl_context *ctx, + GLintptrARB offset, + GLsizeiptrARB size, + GLvoid *data, + struct gl_buffer_object *obj) { struct intel_buffer_object *intel_obj = intel_buffer_object(obj); - struct intel_context *intel = intel_context(ctx); + struct brw_context *brw = brw_context(ctx); assert(intel_obj); - if (intel_obj->sys_buffer) - memcpy(data, (char *)intel_obj->sys_buffer + offset, size); - else { - if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer)) { - intel_batchbuffer_flush(intel); - } - drm_intel_bo_get_subdata(intel_obj->buffer, offset, size, data); + if (brw_batch_references(&brw->batch, intel_obj->buffer)) { + intel_batchbuffer_flush(brw); + } + + void *map = brw_bo_map(brw, intel_obj->buffer, MAP_READ); + + if (unlikely(!map)) { + _mesa_error_no_memory(__func__); + return; } -} + memcpy(data, map + offset, size); + brw_bo_unmap(intel_obj->buffer); + + mark_buffer_inactive(intel_obj); +} /** - * Called via glMapBufferRange and glMapBuffer + * The MapBufferRange() driver hook. + * + * This implements both glMapBufferRange() and glMapBuffer(). * * The goal of this extension is to allow apps to accumulate their rendering * at the same time as they accumulate their buffer object. Without it, @@ -287,40 +370,32 @@ intel_bufferobj_get_subdata(struct gl_context * ctx, * and blit it into the real BO at unmap time. */ static void * -intel_bufferobj_map_range(struct gl_context * ctx, - GLintptr offset, GLsizeiptr length, - GLbitfield access, struct gl_buffer_object *obj) +brw_map_buffer_range(struct gl_context *ctx, + GLintptr offset, GLsizeiptr length, + GLbitfield access, struct gl_buffer_object *obj, + gl_map_buffer_index index) { - struct intel_context *intel = intel_context(ctx); + struct brw_context *brw = brw_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); assert(intel_obj); + STATIC_ASSERT(GL_MAP_UNSYNCHRONIZED_BIT == MAP_ASYNC); + STATIC_ASSERT(GL_MAP_WRITE_BIT == MAP_WRITE); + STATIC_ASSERT(GL_MAP_READ_BIT == MAP_READ); + STATIC_ASSERT(GL_MAP_PERSISTENT_BIT == MAP_PERSISTENT); + STATIC_ASSERT(GL_MAP_COHERENT_BIT == MAP_COHERENT); + assert((access & MAP_INTERNAL_MASK) == 0); + /* _mesa_MapBufferRange (GL entrypoint) sets these, but the vbo module also * internally uses our functions directly. */ - obj->Offset = offset; - obj->Length = length; - obj->AccessFlags = access; - - if (intel_obj->sys_buffer) { - const bool read_only = - (access & (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)) == GL_MAP_READ_BIT; - - if (!read_only && intel_obj->source) - release_buffer(intel_obj); - - if (!intel_obj->buffer || intel_obj->source) { - obj->Pointer = intel_obj->sys_buffer + offset; - return obj->Pointer; - } - - free(intel_obj->sys_buffer); - intel_obj->sys_buffer = NULL; - } + obj->Mappings[index].Offset = offset; + obj->Mappings[index].Length = length; + obj->Mappings[index].AccessFlags = access; if (intel_obj->buffer == NULL) { - obj->Pointer = NULL; + obj->Mappings[index].Pointer = NULL; return NULL; } @@ -333,521 +408,252 @@ intel_bufferobj_map_range(struct gl_context * ctx, * achieve the required synchronization. */ if (!(access & GL_MAP_UNSYNCHRONIZED_BIT)) { - if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer)) { + if (brw_batch_references(&brw->batch, intel_obj->buffer)) { if (access & GL_MAP_INVALIDATE_BUFFER_BIT) { - drm_intel_bo_unreference(intel_obj->buffer); - intel_bufferobj_alloc_buffer(intel, intel_obj); + brw_bo_unreference(intel_obj->buffer); + alloc_buffer_object(brw, intel_obj); } else { perf_debug("Stalling on the GPU for mapping a busy buffer " "object\n"); - intel_flush(ctx); + intel_batchbuffer_flush(brw); } - } else if (drm_intel_bo_busy(intel_obj->buffer) && + } else if (brw_bo_busy(intel_obj->buffer) && (access & GL_MAP_INVALIDATE_BUFFER_BIT)) { - drm_intel_bo_unreference(intel_obj->buffer); - intel_bufferobj_alloc_buffer(intel, intel_obj); + brw_bo_unreference(intel_obj->buffer); + alloc_buffer_object(brw, intel_obj); } } + if (access & MAP_WRITE) + mark_buffer_valid_data(intel_obj, offset, length); + /* If the user is mapping a range of an active buffer object but * doesn't require the current contents of that range, make a new * BO, and we'll copy what they put in there out at unmap or * FlushRange time. + * + * That is, unless they're looking for a persistent mapping -- we would + * need to do blits in the MemoryBarrier call, and it's easier to just do a + * GPU stall and do a mapping. */ - if ((access & GL_MAP_INVALIDATE_RANGE_BIT) && - drm_intel_bo_busy(intel_obj->buffer)) { - if (access & GL_MAP_FLUSH_EXPLICIT_BIT) { - intel_obj->range_map_buffer = malloc(length); - obj->Pointer = intel_obj->range_map_buffer; - } else { - intel_obj->range_map_bo = drm_intel_bo_alloc(intel->bufmgr, - "range map", - length, 64); - if (!(access & GL_MAP_READ_BIT)) { - drm_intel_gem_bo_map_gtt(intel_obj->range_map_bo); - } else { - drm_intel_bo_map(intel_obj->range_map_bo, - (access & GL_MAP_WRITE_BIT) != 0); - } - obj->Pointer = intel_obj->range_map_bo->virtual; - } - return obj->Pointer; + if (!(access & (GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_PERSISTENT_BIT)) && + (access & GL_MAP_INVALIDATE_RANGE_BIT) && + brw_bo_busy(intel_obj->buffer)) { + /* Ensure that the base alignment of the allocation meets the alignment + * guarantees the driver has advertised to the application. + */ + const unsigned alignment = ctx->Const.MinMapBufferAlignment; + + intel_obj->map_extra[index] = (uintptr_t) offset % alignment; + intel_obj->range_map_bo[index] = brw_bo_alloc(brw->bufmgr, + "BO blit temp", + length + + intel_obj->map_extra[index], + alignment); + void *map = brw_bo_map(brw, intel_obj->range_map_bo[index], access); + obj->Mappings[index].Pointer = map + intel_obj->map_extra[index]; + return obj->Mappings[index].Pointer; } - if (access & GL_MAP_UNSYNCHRONIZED_BIT) - drm_intel_gem_bo_map_unsynchronized(intel_obj->buffer); - else if (!(access & GL_MAP_READ_BIT)) { - drm_intel_gem_bo_map_gtt(intel_obj->buffer); - } else { - drm_intel_bo_map(intel_obj->buffer, (access & GL_MAP_WRITE_BIT) != 0); + void *map = brw_bo_map(brw, intel_obj->buffer, access); + if (!(access & GL_MAP_UNSYNCHRONIZED_BIT)) { + mark_buffer_inactive(intel_obj); } - obj->Pointer = intel_obj->buffer->virtual + offset; - return obj->Pointer; + obj->Mappings[index].Pointer = map + offset; + return obj->Mappings[index].Pointer; } -/* Ideally we'd use a BO to avoid taking up cache space for the temporary +/** + * The FlushMappedBufferRange() driver hook. + * + * Implements glFlushMappedBufferRange(), which signifies that modifications + * have been made to a range of a mapped buffer, and it should be flushed. + * + * This is only used for buffers mapped with GL_MAP_FLUSH_EXPLICIT_BIT. + * + * Ideally we'd use a BO to avoid taking up cache space for the temporary * data, but FlushMappedBufferRange may be followed by further writes to * the pointer, so we would have to re-map after emitting our blit, which * would defeat the point. */ static void -intel_bufferobj_flush_mapped_range(struct gl_context *ctx, - GLintptr offset, GLsizeiptr length, - struct gl_buffer_object *obj) +brw_flush_mapped_buffer_range(struct gl_context *ctx, + GLintptr offset, GLsizeiptr length, + struct gl_buffer_object *obj, + gl_map_buffer_index index) { - struct intel_context *intel = intel_context(ctx); + struct brw_context *brw = brw_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); - drm_intel_bo *temp_bo; - /* Unless we're in the range map using a temporary system buffer, - * there's no work to do. + assert(obj->Mappings[index].AccessFlags & GL_MAP_FLUSH_EXPLICIT_BIT); + + /* If we gave a direct mapping of the buffer instead of using a temporary, + * then there's nothing to do. */ - if (intel_obj->range_map_buffer == NULL) + if (intel_obj->range_map_bo[index] == NULL) return; if (length == 0) return; - temp_bo = drm_intel_bo_alloc(intel->bufmgr, "range map flush", length, 64); - - drm_intel_bo_subdata(temp_bo, 0, length, intel_obj->range_map_buffer); - - intel_emit_linear_blit(intel, - intel_obj->buffer, obj->Offset + offset, - temp_bo, 0, + /* Note that we're not unmapping our buffer while executing the blit. We + * need to have a mapping still at the end of this call, since the user + * gets to make further modifications and glFlushMappedBufferRange() calls. + * This is safe, because: + * + * - On LLC platforms, we're using a CPU mapping that's coherent with the + * GPU (except for the render caches), so the kernel doesn't need to do + * any flushing work for us except for what happens at batch exec time + * anyway. + * + * - On non-LLC platforms, we're using a GTT mapping that writes directly + * to system memory (except for the chipset cache that gets flushed at + * batch exec time). + * + * In both cases we don't need to stall for the previous blit to complete + * so we can re-map (and we definitely don't want to, since that would be + * slow): If the user edits a part of their buffer that's previously been + * blitted, then our lack of synchoronization is fine, because either + * they'll get some too-new data in the first blit and not do another blit + * of that area (but in that case the results are undefined), or they'll do + * another blit of that area and the complete newer data will land the + * second time. + */ + intel_emit_linear_blit(brw, + intel_obj->buffer, + obj->Mappings[index].Offset + offset, + intel_obj->range_map_bo[index], + intel_obj->map_extra[index] + offset, length); - - drm_intel_bo_unreference(temp_bo); + mark_buffer_gpu_usage(intel_obj, + obj->Mappings[index].Offset + offset, + length); } /** - * Called via glUnmapBuffer(). + * The UnmapBuffer() driver hook. + * + * Implements glUnmapBuffer(). */ static GLboolean -intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj) +brw_unmap_buffer(struct gl_context *ctx, + struct gl_buffer_object *obj, + gl_map_buffer_index index) { - struct intel_context *intel = intel_context(ctx); + struct brw_context *brw = brw_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); assert(intel_obj); - assert(obj->Pointer); - if (intel_obj->sys_buffer != NULL) { - /* always keep the mapping around. */ - } else if (intel_obj->range_map_buffer != NULL) { - /* Since we've emitted some blits to buffers that will (likely) be used - * in rendering operations in other cache domains in this batch, emit a - * flush. Once again, we wish for a domain tracker in libdrm to cover - * usage inside of a batchbuffer. - */ - intel_batchbuffer_emit_mi_flush(intel); - free(intel_obj->range_map_buffer); - intel_obj->range_map_buffer = NULL; - } else if (intel_obj->range_map_bo != NULL) { - drm_intel_bo_unmap(intel_obj->range_map_bo); - - intel_emit_linear_blit(intel, - intel_obj->buffer, obj->Offset, - intel_obj->range_map_bo, 0, - obj->Length); + assert(obj->Mappings[index].Pointer); + if (intel_obj->range_map_bo[index] != NULL) { + brw_bo_unmap(intel_obj->range_map_bo[index]); + + if (!(obj->Mappings[index].AccessFlags & GL_MAP_FLUSH_EXPLICIT_BIT)) { + intel_emit_linear_blit(brw, + intel_obj->buffer, obj->Mappings[index].Offset, + intel_obj->range_map_bo[index], + intel_obj->map_extra[index], + obj->Mappings[index].Length); + mark_buffer_gpu_usage(intel_obj, obj->Mappings[index].Offset, + obj->Mappings[index].Length); + } /* Since we've emitted some blits to buffers that will (likely) be used * in rendering operations in other cache domains in this batch, emit a * flush. Once again, we wish for a domain tracker in libdrm to cover * usage inside of a batchbuffer. */ - intel_batchbuffer_emit_mi_flush(intel); + brw_emit_mi_flush(brw); - drm_intel_bo_unreference(intel_obj->range_map_bo); - intel_obj->range_map_bo = NULL; + brw_bo_unreference(intel_obj->range_map_bo[index]); + intel_obj->range_map_bo[index] = NULL; } else if (intel_obj->buffer != NULL) { - drm_intel_bo_unmap(intel_obj->buffer); + brw_bo_unmap(intel_obj->buffer); } - obj->Pointer = NULL; - obj->Offset = 0; - obj->Length = 0; + obj->Mappings[index].Pointer = NULL; + obj->Mappings[index].Offset = 0; + obj->Mappings[index].Length = 0; return true; } -drm_intel_bo * -intel_bufferobj_buffer(struct intel_context *intel, +/** + * Gets a pointer to the object's BO, and marks the given range as being used + * on the GPU. + * + * Anywhere that uses buffer objects in the pipeline should be using this to + * mark the range of the buffer that is being accessed by the pipeline. + */ +struct brw_bo * +intel_bufferobj_buffer(struct brw_context *brw, struct intel_buffer_object *intel_obj, - GLuint flag) + uint32_t offset, uint32_t size, bool write) { - if (intel_obj->source) - release_buffer(intel_obj); - - if (intel_obj->buffer == NULL) { - intel_bufferobj_alloc_buffer(intel, intel_obj); - drm_intel_bo_subdata(intel_obj->buffer, - 0, intel_obj->Base.Size, - intel_obj->sys_buffer); - - free(intel_obj->sys_buffer); - intel_obj->sys_buffer = NULL; - intel_obj->offset = 0; - } - - return intel_obj->buffer; -} - -#define INTEL_UPLOAD_SIZE (64*1024) - -void -intel_upload_finish(struct intel_context *intel) -{ - if (!intel->upload.bo) - return; - - if (intel->upload.buffer_len) { - drm_intel_bo_subdata(intel->upload.bo, - intel->upload.buffer_offset, - intel->upload.buffer_len, - intel->upload.buffer); - intel->upload.buffer_len = 0; - } - - drm_intel_bo_unreference(intel->upload.bo); - intel->upload.bo = NULL; -} - -static void wrap_buffers(struct intel_context *intel, GLuint size) -{ - intel_upload_finish(intel); - - if (size < INTEL_UPLOAD_SIZE) - size = INTEL_UPLOAD_SIZE; - - intel->upload.bo = drm_intel_bo_alloc(intel->bufmgr, "upload", size, 0); - intel->upload.offset = 0; -} - -void intel_upload_data(struct intel_context *intel, - const void *ptr, GLuint size, GLuint align, - drm_intel_bo **return_bo, - GLuint *return_offset) -{ - GLuint base, delta; - - base = (intel->upload.offset + align - 1) / align * align; - if (intel->upload.bo == NULL || base + size > intel->upload.bo->size) { - wrap_buffers(intel, size); - base = 0; - } - - drm_intel_bo_reference(intel->upload.bo); - *return_bo = intel->upload.bo; - *return_offset = base; - - delta = base - intel->upload.offset; - if (intel->upload.buffer_len && - intel->upload.buffer_len + delta + size > sizeof(intel->upload.buffer)) - { - drm_intel_bo_subdata(intel->upload.bo, - intel->upload.buffer_offset, - intel->upload.buffer_len, - intel->upload.buffer); - intel->upload.buffer_len = 0; - } - - if (size < sizeof(intel->upload.buffer)) - { - if (intel->upload.buffer_len == 0) - intel->upload.buffer_offset = base; - else - intel->upload.buffer_len += delta; - - memcpy(intel->upload.buffer + intel->upload.buffer_len, ptr, size); - intel->upload.buffer_len += size; - } - else - { - drm_intel_bo_subdata(intel->upload.bo, base, size, ptr); - } - - intel->upload.offset = base + size; -} - -void *intel_upload_map(struct intel_context *intel, GLuint size, GLuint align) -{ - GLuint base, delta; - char *ptr; - - base = (intel->upload.offset + align - 1) / align * align; - if (intel->upload.bo == NULL || base + size > intel->upload.bo->size) { - wrap_buffers(intel, size); - base = 0; - } - - delta = base - intel->upload.offset; - if (intel->upload.buffer_len && - intel->upload.buffer_len + delta + size > sizeof(intel->upload.buffer)) - { - drm_intel_bo_subdata(intel->upload.bo, - intel->upload.buffer_offset, - intel->upload.buffer_len, - intel->upload.buffer); - intel->upload.buffer_len = 0; - } - - if (size <= sizeof(intel->upload.buffer)) { - if (intel->upload.buffer_len == 0) - intel->upload.buffer_offset = base; - else - intel->upload.buffer_len += delta; - - ptr = intel->upload.buffer + intel->upload.buffer_len; - intel->upload.buffer_len += size; - } else - ptr = malloc(size); - - return ptr; -} - -void intel_upload_unmap(struct intel_context *intel, - const void *ptr, GLuint size, GLuint align, - drm_intel_bo **return_bo, - GLuint *return_offset) -{ - GLuint base; - - base = (intel->upload.offset + align - 1) / align * align; - if (size > sizeof(intel->upload.buffer)) { - drm_intel_bo_subdata(intel->upload.bo, base, size, ptr); - free((void*)ptr); - } - - drm_intel_bo_reference(intel->upload.bo); - *return_bo = intel->upload.bo; - *return_offset = base; + /* This is needed so that things like transform feedback and texture buffer + * objects that need a BO but don't want to check that they exist for + * draw-time validation can just always get a BO from a GL buffer object. + */ + if (intel_obj->buffer == NULL) + alloc_buffer_object(brw, intel_obj); - intel->upload.offset = base + size; -} + mark_buffer_gpu_usage(intel_obj, offset, size); -drm_intel_bo * -intel_bufferobj_source(struct intel_context *intel, - struct intel_buffer_object *intel_obj, - GLuint align, GLuint *offset) -{ - if (intel_obj->buffer == NULL) { - intel_upload_data(intel, - intel_obj->sys_buffer, intel_obj->Base.Size, align, - &intel_obj->buffer, &intel_obj->offset); - intel_obj->source = 1; - } + /* If writing, (conservatively) mark this section as having valid data. */ + if (write) + mark_buffer_valid_data(intel_obj, offset, size); - *offset = intel_obj->offset; return intel_obj->buffer; } +/** + * The CopyBufferSubData() driver hook. + * + * Implements glCopyBufferSubData(), which copies a portion of one buffer + * object's data to another. Independent source and destination offsets + * are allowed. + */ static void -intel_bufferobj_copy_subdata(struct gl_context *ctx, - struct gl_buffer_object *src, - struct gl_buffer_object *dst, - GLintptr read_offset, GLintptr write_offset, - GLsizeiptr size) +brw_copy_buffer_subdata(struct gl_context *ctx, + struct gl_buffer_object *src, + struct gl_buffer_object *dst, + GLintptr read_offset, GLintptr write_offset, + GLsizeiptr size) { - struct intel_context *intel = intel_context(ctx); + struct brw_context *brw = brw_context(ctx); struct intel_buffer_object *intel_src = intel_buffer_object(src); struct intel_buffer_object *intel_dst = intel_buffer_object(dst); - drm_intel_bo *src_bo, *dst_bo; - GLuint src_offset; + struct brw_bo *src_bo, *dst_bo; if (size == 0) return; - /* If we're in system memory, just map and memcpy. */ - if (intel_src->sys_buffer || intel_dst->sys_buffer) { - /* The same buffer may be used, but note that regions copied may - * not overlap. - */ - if (src == dst) { - char *ptr = intel_bufferobj_map_range(ctx, 0, dst->Size, - GL_MAP_READ_BIT | - GL_MAP_WRITE_BIT, - dst); - memmove(ptr + write_offset, ptr + read_offset, size); - intel_bufferobj_unmap(ctx, dst); - } else { - const char *src_ptr; - char *dst_ptr; - - src_ptr = intel_bufferobj_map_range(ctx, 0, src->Size, - GL_MAP_READ_BIT, src); - dst_ptr = intel_bufferobj_map_range(ctx, 0, dst->Size, - GL_MAP_WRITE_BIT, dst); - - memcpy(dst_ptr + write_offset, src_ptr + read_offset, size); - - intel_bufferobj_unmap(ctx, src); - intel_bufferobj_unmap(ctx, dst); - } - return; - } - - /* Otherwise, we have real BOs, so blit them. */ - - dst_bo = intel_bufferobj_buffer(intel, intel_dst, INTEL_WRITE_PART); - src_bo = intel_bufferobj_source(intel, intel_src, 64, &src_offset); + dst_bo = intel_bufferobj_buffer(brw, intel_dst, write_offset, size, true); + src_bo = intel_bufferobj_buffer(brw, intel_src, read_offset, size, false); - intel_emit_linear_blit(intel, + intel_emit_linear_blit(brw, dst_bo, write_offset, - src_bo, read_offset + src_offset, size); + src_bo, read_offset, size); /* Since we've emitted some blits to buffers that will (likely) be used * in rendering operations in other cache domains in this batch, emit a * flush. Once again, we wish for a domain tracker in libdrm to cover * usage inside of a batchbuffer. */ - intel_batchbuffer_emit_mi_flush(intel); -} - -static GLenum -intel_buffer_purgeable(drm_intel_bo *buffer) -{ - int retained = 0; - - if (buffer != NULL) - retained = drm_intel_bo_madvise (buffer, I915_MADV_DONTNEED); - - return retained ? GL_VOLATILE_APPLE : GL_RELEASED_APPLE; -} - -static GLenum -intel_buffer_object_purgeable(struct gl_context * ctx, - struct gl_buffer_object *obj, - GLenum option) -{ - struct intel_buffer_object *intel_obj = intel_buffer_object (obj); - - if (intel_obj->buffer != NULL) - return intel_buffer_purgeable(intel_obj->buffer); - - if (option == GL_RELEASED_APPLE) { - free(intel_obj->sys_buffer); - intel_obj->sys_buffer = NULL; - - return GL_RELEASED_APPLE; - } else { - /* XXX Create the buffer and madvise(MADV_DONTNEED)? */ - struct intel_context *intel = intel_context(ctx); - drm_intel_bo *bo = intel_bufferobj_buffer(intel, intel_obj, INTEL_READ); - - return intel_buffer_purgeable(bo); - } -} - -static GLenum -intel_texture_object_purgeable(struct gl_context * ctx, - struct gl_texture_object *obj, - GLenum option) -{ - struct intel_texture_object *intel; - - (void) ctx; - (void) option; - - intel = intel_texture_object(obj); - if (intel->mt == NULL || intel->mt->region == NULL) - return GL_RELEASED_APPLE; - - return intel_buffer_purgeable(intel->mt->region->bo); -} - -static GLenum -intel_render_object_purgeable(struct gl_context * ctx, - struct gl_renderbuffer *obj, - GLenum option) -{ - struct intel_renderbuffer *intel; - - (void) ctx; - (void) option; - - intel = intel_renderbuffer(obj); - if (intel->mt == NULL) - return GL_RELEASED_APPLE; - - return intel_buffer_purgeable(intel->mt->region->bo); -} - -static GLenum -intel_buffer_unpurgeable(drm_intel_bo *buffer) -{ - int retained; - - retained = 0; - if (buffer != NULL) - retained = drm_intel_bo_madvise (buffer, I915_MADV_WILLNEED); - - return retained ? GL_RETAINED_APPLE : GL_UNDEFINED_APPLE; -} - -static GLenum -intel_buffer_object_unpurgeable(struct gl_context * ctx, - struct gl_buffer_object *obj, - GLenum option) -{ - (void) ctx; - (void) option; - - return intel_buffer_unpurgeable(intel_buffer_object (obj)->buffer); -} - -static GLenum -intel_texture_object_unpurgeable(struct gl_context * ctx, - struct gl_texture_object *obj, - GLenum option) -{ - struct intel_texture_object *intel; - - (void) ctx; - (void) option; - - intel = intel_texture_object(obj); - if (intel->mt == NULL || intel->mt->region == NULL) - return GL_UNDEFINED_APPLE; - - return intel_buffer_unpurgeable(intel->mt->region->bo); -} - -static GLenum -intel_render_object_unpurgeable(struct gl_context * ctx, - struct gl_renderbuffer *obj, - GLenum option) -{ - struct intel_renderbuffer *intel; - - (void) ctx; - (void) option; - - intel = intel_renderbuffer(obj); - if (intel->mt == NULL) - return GL_UNDEFINED_APPLE; - - return intel_buffer_unpurgeable(intel->mt->region->bo); + brw_emit_mi_flush(brw); } void intelInitBufferObjectFuncs(struct dd_function_table *functions) { - functions->NewBufferObject = intel_bufferobj_alloc; - functions->DeleteBuffer = intel_bufferobj_free; - functions->BufferData = intel_bufferobj_data; - functions->BufferSubData = intel_bufferobj_subdata; - functions->GetBufferSubData = intel_bufferobj_get_subdata; - functions->MapBufferRange = intel_bufferobj_map_range; - functions->FlushMappedBufferRange = intel_bufferobj_flush_mapped_range; - functions->UnmapBuffer = intel_bufferobj_unmap; - functions->CopyBufferSubData = intel_bufferobj_copy_subdata; - - functions->BufferObjectPurgeable = intel_buffer_object_purgeable; - functions->TextureObjectPurgeable = intel_texture_object_purgeable; - functions->RenderObjectPurgeable = intel_render_object_purgeable; - - functions->BufferObjectUnpurgeable = intel_buffer_object_unpurgeable; - functions->TextureObjectUnpurgeable = intel_texture_object_unpurgeable; - functions->RenderObjectUnpurgeable = intel_render_object_unpurgeable; + functions->NewBufferObject = brw_new_buffer_object; + functions->DeleteBuffer = brw_delete_buffer; + functions->BufferData = brw_buffer_data; + functions->BufferSubData = brw_buffer_subdata; + functions->GetBufferSubData = brw_get_buffer_subdata; + functions->MapBufferRange = brw_map_buffer_range; + functions->FlushMappedBufferRange = brw_flush_mapped_buffer_range; + functions->UnmapBuffer = brw_unmap_buffer; + functions->CopyBufferSubData = brw_copy_buffer_subdata; }