From: Chad Versace Date: Wed, 6 May 2015 02:05:32 +0000 (-0700) Subject: i965/sync: Implement DRI2_Fence extension X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=c636284;p=mesa.git i965/sync: Implement DRI2_Fence extension This enables EGL_KHR_fence_sync and EGL_KHR_wait_sync. Below is the difference in piglit results, before and after this patch. No regressions and several tests improve from 'skip' to 'pass'. Out of EGL_KHR_fence_sync tests, two of the multithreaded tests skip; all other tests pass. cmdline: piglit run -p gbm -t sync tests/quick.py mesa: master@1ac7db0 piglit: 4069bec hw: Ivybridge | before after ------+------------- pass | 32 46 fail | 0 0 crash | 0 0 skip | 35 21 total | 67 67 v2: - Set fence->signalled = true in brw_fence_has_completed() too. Reviewed-by: Daniel Stone Reviewed-by: Kenneth Graunke --- diff --git a/docs/relnotes/10.6.0.html b/docs/relnotes/10.6.0.html index fb1a928d948..b0042030901 100644 --- a/docs/relnotes/10.6.0.html +++ b/docs/relnotes/10.6.0.html @@ -60,8 +60,8 @@ Note: some of the new features are only available with certain drivers.
  • GL_ARB_viewport_array, GL_AMD_vertex_shader_viewport_index on i965/gen6
  • GL_EXT_draw_buffers2 on freedreno
  • GL_OES_EGL_sync on all drivers
  • -
  • EGL_KHR_fence_sync on freedreno, nv50, nvc0, r600, radeonsi
  • -
  • EGL_KHR_wait_sync on freedreno, nv50, nvc0, r600, radeonsi
  • +
  • EGL_KHR_fence_sync on i965, freedreno, nv50, nvc0, r600, radeonsi
  • +
  • EGL_KHR_wait_sync on i965, freedreno, nv50, nvc0, r600, radeonsi
  • EGL_KHR_cl_event2 on freedreno, nv50, nvc0, r600, radeonsi
  • GL_AMD_performance_monitor on nvc0
  • diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index 015eaf19ea1..dda16389f8a 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -909,6 +909,7 @@ static const __DRIrobustnessExtension dri2Robustness = { static const __DRIextension *intelScreenExtensions[] = { &intelTexBufferExtension.base, + &intelFenceExtension.base, &intelFlushExtension.base, &intelImageExtension.base, &intelRendererQueryExtension.base, @@ -918,6 +919,7 @@ static const __DRIextension *intelScreenExtensions[] = { static const __DRIextension *intelRobustScreenExtensions[] = { &intelTexBufferExtension.base, + &intelFenceExtension.base, &intelFlushExtension.base, &intelImageExtension.base, &intelRendererQueryExtension.base, diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h index f814ed017b1..e7a14903d6e 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.h +++ b/src/mesa/drivers/dri/i965/intel_screen.h @@ -30,6 +30,9 @@ #include #include + +#include + #include "dri_util.h" #include "intel_bufmgr.h" #include "brw_device_info.h" @@ -76,6 +79,7 @@ extern void intelDestroyContext(__DRIcontext * driContextPriv); extern GLboolean intelUnbindContext(__DRIcontext * driContextPriv); PUBLIC const __DRIextension **__driDriverGetExtensions_i965(void); +extern const __DRI2fenceExtension intelFenceExtension; extern GLboolean intelMakeCurrent(__DRIcontext * driContextPriv, diff --git a/src/mesa/drivers/dri/i965/intel_syncobj.c b/src/mesa/drivers/dri/i965/intel_syncobj.c index dea6dba340f..3cfa7e593ab 100644 --- a/src/mesa/drivers/dri/i965/intel_syncobj.c +++ b/src/mesa/drivers/dri/i965/intel_syncobj.c @@ -25,11 +25,11 @@ * */ -/** @file intel_syncobj.c +/** + * \file + * \brief Support for GL_ARB_sync and EGL_KHR_fence_sync. * - * Support for ARB_sync - * - * ARB_sync is implemented by flushing the current batchbuffer and keeping a + * GL_ARB_sync is implemented by flushing the current batchbuffer and keeping a * reference on it. We can then check for completion or wait for completion * using the normal buffer object mechanisms. This does mean that if an * application is using many sync objects, it will emit small batchbuffers @@ -44,13 +44,94 @@ #include "intel_batchbuffer.h" #include "intel_reg.h" +struct brw_fence { + /** The fence waits for completion of this batch. */ + drm_intel_bo *batch_bo; + + bool signalled; +}; + struct intel_gl_sync_object { struct gl_sync_object Base; - - /** Batch associated with this sync object */ - drm_intel_bo *bo; + struct brw_fence fence; }; +static void +brw_fence_finish(struct brw_fence *fence) +{ + if (fence->batch_bo) + drm_intel_bo_unreference(fence->batch_bo); +} + +static void +brw_fence_insert(struct brw_context *brw, struct brw_fence *fence) +{ + assert(!fence->batch_bo); + assert(!fence->signalled); + + intel_batchbuffer_emit_mi_flush(brw); + fence->batch_bo = brw->batch.bo; + drm_intel_bo_reference(fence->batch_bo); + intel_batchbuffer_flush(brw); +} + +static bool +brw_fence_has_completed(struct brw_fence *fence) +{ + if (fence->signalled) + return true; + + if (fence->batch_bo && !drm_intel_bo_busy(fence->batch_bo)) { + drm_intel_bo_unreference(fence->batch_bo); + fence->batch_bo = NULL; + fence->signalled = true; + return true; + } + + return false; +} + +/** + * Return true if the function successfully signals or has already signalled. + * (This matches the behavior expected from __DRI2fence::client_wait_sync). + */ +static bool +brw_fence_client_wait(struct brw_context *brw, struct brw_fence *fence, + uint64_t timeout) +{ + if (fence->signalled) + return true; + + assert(fence->batch_bo); + + /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and returns + * immediately for timeouts <= 0. The best we can do is to clamp the + * timeout to INT64_MAX. This limits the maximum timeout from 584 years to + * 292 years - likely not a big deal. + */ + if (timeout > INT64_MAX) + timeout = INT64_MAX; + + if (drm_intel_gem_bo_wait(fence->batch_bo, timeout) != 0) + return false; + + fence->signalled = true; + drm_intel_bo_unreference(fence->batch_bo); + fence->batch_bo = NULL; + + return true; +} + +static void +brw_fence_server_wait(struct brw_context *brw, struct brw_fence *fence) +{ + /* We have nothing to do for WaitSync. Our GL command stream is sequential, + * so given that the sync object has already flushed the batchbuffer, any + * batchbuffers coming after this waitsync will naturally not occur until + * the previous one is done. + */ +} + static struct gl_sync_object * intel_gl_new_sync_object(struct gl_context *ctx, GLuint id) { @@ -68,9 +149,7 @@ intel_gl_delete_sync_object(struct gl_context *ctx, struct gl_sync_object *s) { struct intel_gl_sync_object *sync = (struct intel_gl_sync_object *)s; - if (sync->bo) - drm_intel_bo_unreference(sync->bo); - + brw_fence_finish(&sync->fence); free(sync); } @@ -81,56 +160,37 @@ intel_gl_fence_sync(struct gl_context *ctx, struct gl_sync_object *s, struct brw_context *brw = brw_context(ctx); struct intel_gl_sync_object *sync = (struct intel_gl_sync_object *)s; - assert(condition == GL_SYNC_GPU_COMMANDS_COMPLETE); - intel_batchbuffer_emit_mi_flush(brw); - - sync->bo = brw->batch.bo; - drm_intel_bo_reference(sync->bo); - - intel_batchbuffer_flush(brw); + brw_fence_insert(brw, &sync->fence); } static void intel_gl_client_wait_sync(struct gl_context *ctx, struct gl_sync_object *s, GLbitfield flags, GLuint64 timeout) { + struct brw_context *brw = brw_context(ctx); struct intel_gl_sync_object *sync = (struct intel_gl_sync_object *)s; - /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and returns - * immediately for timeouts <= 0. The best we can do is to clamp the - * timeout to INT64_MAX. This limits the maximum timeout from 584 years to - * 292 years - likely not a big deal. - */ - if (timeout > INT64_MAX) - timeout = INT64_MAX; - - if (sync->bo && drm_intel_gem_bo_wait(sync->bo, timeout) == 0) { + if (brw_fence_client_wait(brw, &sync->fence, timeout)) s->StatusFlag = 1; - drm_intel_bo_unreference(sync->bo); - sync->bo = NULL; - } } -/* We have nothing to do for WaitSync. Our GL command stream is sequential, - * so given that the sync object has already flushed the batchbuffer, - * any batchbuffers coming after this waitsync will naturally not occur until - * the previous one is done. - */ static void intel_gl_server_wait_sync(struct gl_context *ctx, struct gl_sync_object *s, GLbitfield flags, GLuint64 timeout) { + struct brw_context *brw = brw_context(ctx); + struct intel_gl_sync_object *sync = (struct intel_gl_sync_object *)s; + + brw_fence_server_wait(brw, &sync->fence); } -static void intel_check_sync(struct gl_context *ctx, struct gl_sync_object *s) +static void +intel_gl_check_sync(struct gl_context *ctx, struct gl_sync_object *s) { struct intel_gl_sync_object *sync = (struct intel_gl_sync_object *)s; - if (sync->bo && !drm_intel_bo_busy(sync->bo)) { - drm_intel_bo_unreference(sync->bo); - sync->bo = NULL; + if (brw_fence_has_completed(&sync->fence)) s->StatusFlag = 1; - } } void @@ -143,3 +203,56 @@ intel_init_syncobj_functions(struct dd_function_table *functions) functions->ClientWaitSync = intel_gl_client_wait_sync; functions->ServerWaitSync = intel_gl_server_wait_sync; } + +static void * +intel_dri_create_fence(__DRIcontext *ctx) +{ + struct brw_context *brw = ctx->driverPrivate; + struct brw_fence *fence; + + fence = calloc(1, sizeof(*fence)); + if (!fence) + return NULL; + + brw_fence_insert(brw, fence); + + return fence; +} + +static void +intel_dri_destroy_fence(__DRIscreen *screen, void *driver_fence) +{ + struct brw_fence *fence = driver_fence; + + brw_fence_finish(fence); + free(fence); +} + +static GLboolean +intel_dri_client_wait_sync(__DRIcontext *ctx, void *driver_fence, unsigned flags, + uint64_t timeout) +{ + struct brw_context *brw = ctx->driverPrivate; + struct brw_fence *fence = driver_fence; + + return brw_fence_client_wait(brw, fence, timeout); +} + +static void +intel_dri_server_wait_sync(__DRIcontext *ctx, void *driver_fence, unsigned flags) +{ + struct brw_context *brw = ctx->driverPrivate; + struct brw_fence *fence = driver_fence; + + brw_fence_server_wait(brw, fence); +} + +const __DRI2fenceExtension intelFenceExtension = { + .base = { __DRI2_FENCE, 1 }, + + .create_fence = intel_dri_create_fence, + .destroy_fence = intel_dri_destroy_fence, + .client_wait_sync = intel_dri_client_wait_sync, + .server_wait_sync = intel_dri_server_wait_sync, + .get_fence_from_cl_event = NULL, +};