From 2d5c74fac3cc0f9d45a9e11b2fcdea1bc67928c4 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 26 Aug 2009 18:08:52 -0700 Subject: [PATCH] intel: Add support for GL_ARB_map_buffer_range. Passes glean's bufferObject test, and should provide good performance in the cases applications are expected to use. --- src/mesa/drivers/dri/intel/intel_blit.c | 40 ++++ src/mesa/drivers/dri/intel/intel_blit.h | 6 + .../drivers/dri/intel/intel_buffer_objects.c | 171 ++++++++++++++---- .../drivers/dri/intel/intel_buffer_objects.h | 5 + src/mesa/drivers/dri/intel/intel_extensions.c | 2 + 5 files changed, 189 insertions(+), 35 deletions(-) diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index 979f2025842..0c5be4c7989 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -636,3 +636,43 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, return GL_TRUE; } + +/* We don't have a memmove-type blit like some other hardware, so we'll do a + * rectangular blit covering a large space, then emit 1-scanline blit at the + * end to cover the last if we need. + */ +void +intel_emit_linear_blit(struct intel_context *intel, + drm_intel_bo *dst_bo, + unsigned int dst_offset, + drm_intel_bo *src_bo, + unsigned int src_offset, + unsigned int size) +{ + GLuint pitch, height; + + /* The pitch is a signed value. */ + pitch = MIN2(size, (1 << 15) - 1); + height = size / pitch; + intelEmitCopyBlit(intel, 1, + pitch, src_bo, src_offset, I915_TILING_NONE, + pitch, dst_bo, dst_offset, I915_TILING_NONE, + 0, 0, /* src x/y */ + 0, 0, /* dst x/y */ + pitch, height, /* w, h */ + GL_COPY); + + src_offset += pitch * height; + dst_offset += pitch * height; + size -= pitch * height; + assert (size < (1 << 15)); + if (size != 0) { + intelEmitCopyBlit(intel, 1, + size, src_bo, src_offset, I915_TILING_NONE, + size, dst_bo, dst_offset, I915_TILING_NONE, + 0, 0, /* src x/y */ + 0, 0, /* dst x/y */ + size, 1, /* w, h */ + GL_COPY); + } +} diff --git a/src/mesa/drivers/dri/intel/intel_blit.h b/src/mesa/drivers/dri/intel/intel_blit.h index 152fa3f17bf..240cb7cd1b6 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.h +++ b/src/mesa/drivers/dri/intel/intel_blit.h @@ -63,5 +63,11 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, GLshort x, GLshort y, GLshort w, GLshort h, GLenum logic_op); +void intel_emit_linear_blit(struct intel_context *intel, + drm_intel_bo *dst_bo, + unsigned int dst_offset, + drm_intel_bo *src_bo, + unsigned int src_offset, + unsigned int size); #endif diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c index a7403979439..3cf0ea0991d 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c @@ -270,29 +270,155 @@ intel_bufferobj_map(GLcontext * ctx, return obj->Pointer; } +/** + * Called via glMapBufferRange(). + * + * The goal of this extension is to allow apps to accumulate their rendering + * at the same time as they accumulate their buffer object. Without it, + * you'd end up blocking on execution of rendering every time you mapped + * the buffer to put new data in. + * + * We support it in 3 ways: If unsynchronized, then don't bother + * flushing the batchbuffer before mapping the buffer, which can save blocking + * in many cases. If we would still block, and they allow the whole buffer + * to be invalidated, then just allocate a new buffer to replace the old one. + * If not, and we'd block, and they allow the subrange of the buffer to be + * invalidated, then we can make a new little BO, let them write into that, + * and blit it into the real BO at unmap time. + */ +static void * +intel_bufferobj_map_range(GLcontext * ctx, + GLenum target, GLsizei offset, GLsizeiptr length, + GLbitfield access, struct gl_buffer_object *obj) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + + assert(intel_obj); + + if (intel_obj->sys_buffer) { + obj->Pointer = intel_obj->sys_buffer + offset; + return obj->Pointer; + } + + if (intel_obj->region) + intel_bufferobj_cow(intel, intel_obj); + + /* If the mapping is synchronized with other GL operations, flush + * the batchbuffer so that GEM knows about the buffer access for later + * syncing. + */ + if ((access & GL_MAP_WRITE_BIT) && !(access & GL_MAP_UNSYNCHRONIZED_BIT)) + intelFlush(ctx); + + /* _mesa_MapBufferRange (GL entrypoint) sets these, but the vbo module also + * internally uses our functions directly. + */ + obj->Offset = offset; + obj->Length = length; + obj->AccessFlags = access; + + if (intel_obj->buffer == NULL) { + obj->Pointer = NULL; + return NULL; + } + + /* If the user doesn't care about existing buffer contents and mapping + * would cause us to block, then throw out the old buffer. + */ + if (!(access & GL_MAP_UNSYNCHRONIZED_BIT) && + (access & GL_MAP_INVALIDATE_BUFFER_BIT) && + drm_intel_bo_busy(intel_obj->buffer)) { + drm_intel_bo_unreference(intel_obj->buffer); + intel_obj->buffer = dri_bo_alloc(intel->bufmgr, "bufferobj", + intel_obj->Base.Size, 64); + } + + /* If the user is mapping a range of an active buffer object but + * doesn't require the current contents of that range, make a new + * BO, and we'll copy what they put in there out at unmap or + * FlushRange time. + */ + if ((access & GL_MAP_INVALIDATE_RANGE_BIT) && + drm_intel_bo_busy(intel_obj->buffer)) { + intel_obj->range_map_bo = drm_intel_bo_alloc(intel->bufmgr, + "range map", + length, 64); + if (!(access & GL_MAP_READ_BIT) && + intel->intelScreen->kernel_exec_fencing) { + drm_intel_gem_bo_map_gtt(intel_obj->range_map_bo); + intel_obj->mapped_gtt = GL_TRUE; + } else { + drm_intel_bo_map(intel_obj->range_map_bo, + (access & GL_MAP_WRITE_BIT) != 0); + intel_obj->mapped_gtt = GL_FALSE; + } + obj->Pointer = intel_obj->range_map_bo->virtual; + return obj->Pointer; + } + + if (!(access & GL_MAP_READ_BIT) && + intel->intelScreen->kernel_exec_fencing) { + drm_intel_gem_bo_map_gtt(intel_obj->buffer); + intel_obj->mapped_gtt = GL_TRUE; + } else { + drm_intel_bo_map(intel_obj->buffer, (access & GL_MAP_WRITE_BIT) != 0); + intel_obj->mapped_gtt = GL_FALSE; + } + + obj->Pointer = intel_obj->buffer->virtual + offset; + return obj->Pointer; +} + /** - * Called via glMapBufferARB(). + * Called via glUnmapBuffer(). */ static GLboolean intel_bufferobj_unmap(GLcontext * ctx, GLenum target, struct gl_buffer_object *obj) { + struct intel_context *intel = intel_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); assert(intel_obj); + assert(obj->Pointer); if (intel_obj->sys_buffer != NULL) { - assert(obj->Pointer); - obj->Pointer = NULL; + /* always keep the mapping around. */ + } else if (intel_obj->range_map_bo != NULL) { + if (intel_obj->mapped_gtt) { + drm_intel_gem_bo_unmap_gtt(intel_obj->range_map_bo); + } else { + drm_intel_bo_unmap(intel_obj->range_map_bo); + } + + /* We ignore the FLUSH_EXPLICIT bit and the calls associated with it. + * It would be a small win to support that, but for now we just copy + * the whole mapped range into place. + */ + intel_emit_linear_blit(intel, + intel_obj->buffer, obj->Offset, + intel_obj->range_map_bo, 0, + obj->Length); + + /* Since we've emitted some blits to buffers that will (likely) be used + * in rendering operations in other cache domains in this batch, emit a + * flush. Once again, we wish for a domain tracker in libdrm to cover + * usage inside of a batchbuffer. + */ + intel_batchbuffer_emit_mi_flush(intel->batch); + + drm_intel_bo_unreference(intel_obj->range_map_bo); + intel_obj->range_map_bo = NULL; } else if (intel_obj->buffer != NULL) { - assert(obj->Pointer); if (intel_obj->mapped_gtt) { drm_intel_gem_bo_unmap_gtt(intel_obj->buffer); } else { drm_intel_bo_unmap(intel_obj->buffer); } - obj->Pointer = NULL; } + obj->Pointer = NULL; + return GL_TRUE; } @@ -340,7 +466,6 @@ intel_bufferobj_copy_subdata(GLcontext *ctx, struct intel_buffer_object *intel_src = intel_buffer_object(src); struct intel_buffer_object *intel_dst = intel_buffer_object(dst); drm_intel_bo *src_bo, *dst_bo; - GLuint pitch, height; if (size == 0) return; @@ -371,39 +496,14 @@ intel_bufferobj_copy_subdata(GLcontext *ctx, } } - /* Otherwise, we have real BOs, so blit them. We don't have a memmove-type - * blit like some other hardware, so we'll do a rectangular blit covering - * a large space, then emit a scanline blit at the end to cover the last - * if we need. - */ + /* Otherwise, we have real BOs, so blit them. */ dst_bo = intel_bufferobj_buffer(intel, intel_dst, INTEL_WRITE_PART); src_bo = intel_bufferobj_buffer(intel, intel_src, INTEL_READ); - /* The pitch is a signed value. */ - pitch = MIN2(size, (1 << 15) - 1); - height = size / pitch; - intelEmitCopyBlit(intel, 1, - pitch, src_bo, read_offset, I915_TILING_NONE, - pitch, dst_bo, write_offset, I915_TILING_NONE, - 0, 0, /* src x/y */ - 0, 0, /* dst x/y */ - pitch, height, /* w, h */ - GL_COPY); - - read_offset += pitch * height; - write_offset += pitch * height; - size -= pitch * height; - assert (size < (1 << 15)); - if (size != 0) { - intelEmitCopyBlit(intel, 1, - size, src_bo, read_offset, I915_TILING_NONE, - size, dst_bo, write_offset, I915_TILING_NONE, - 0, 0, /* src x/y */ - 0, 0, /* dst x/y */ - size, 1, /* w, h */ - GL_COPY); - } + intel_emit_linear_blit(intel, + dst_bo, write_offset, + src_bo, read_offset, size); /* Since we've emitted some blits to buffers that will (likely) be used * in rendering operations in other cache domains in this batch, emit a @@ -422,6 +522,7 @@ intelInitBufferObjectFuncs(struct dd_function_table *functions) functions->BufferSubData = intel_bufferobj_subdata; functions->GetBufferSubData = intel_bufferobj_get_subdata; functions->MapBuffer = intel_bufferobj_map; + functions->MapBufferRange = intel_bufferobj_map_range; functions->UnmapBuffer = intel_bufferobj_unmap; functions->CopyBufferSubData = intel_bufferobj_copy_subdata; } diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.h b/src/mesa/drivers/dri/intel/intel_buffer_objects.h index 8164407f079..06a8ab9824c 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.h +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.h @@ -48,6 +48,11 @@ struct intel_buffer_object struct intel_region *region; /* Is there a zero-copy texture associated with this (pixel) buffer object? */ + + drm_intel_bo *range_map_bo; + unsigned int range_map_offset; + GLsizei range_map_size; + GLboolean mapped_gtt; }; diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c index ff9ad5acced..7aee70f0a8f 100644 --- a/src/mesa/drivers/dri/intel/intel_extensions.c +++ b/src/mesa/drivers/dri/intel/intel_extensions.c @@ -32,6 +32,7 @@ #define need_GL_ARB_copy_buffer #define need_GL_ARB_framebuffer_object +#define need_GL_ARB_map_buffer_range #define need_GL_ARB_occlusion_query #define need_GL_ARB_point_parameters #define need_GL_ARB_shader_objects @@ -72,6 +73,7 @@ static const struct dri_extension card_extensions[] = { { "GL_ARB_copy_buffer", GL_ARB_copy_buffer_functions }, { "GL_ARB_half_float_pixel", NULL }, + { "GL_ARB_map_buffer_range", GL_ARB_map_buffer_range_functions }, { "GL_ARB_multitexture", NULL }, { "GL_ARB_point_parameters", GL_ARB_point_parameters_functions }, { "GL_ARB_point_sprite", NULL }, -- 2.30.2