From aa79cc2bc8e27febc159bafe7bdb52e866b8a9ec Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Tue, 10 Apr 2018 23:22:03 -0700 Subject: [PATCH] i965: Implement EGL_KHR_mutable_render_buffer MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Testing: - Manually tested a low-latency handwriting demo that toggles EGL_RENDER_BUFFER. Toggling changed the display latency as expected. Used Android on Chrome OS, Kabylake GT2. - No change in dEQP-EGL.functional.* on Fedora 27, Wayland, Skylake GT2. Used deqp at tag android-p-preview-5. - No regressions in dEQP-EGL.functional.*, ran on Android on Chrome OS, Kabylake GT2. Some dEQP-EGL.functional.mutable_render_buffer.* test change from NotSupported to Pass. Reviewed-by: Tapani Pälli --- src/mesa/drivers/dri/i965/brw_context.c | 86 +++++++++++++++++++++++- src/mesa/drivers/dri/i965/brw_context.h | 12 ++++ src/mesa/drivers/dri/i965/intel_screen.c | 13 +++- 3 files changed, 107 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 0c551d774c9..6ba64e4e06d 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -239,6 +239,35 @@ intel_flush_front(struct gl_context *ctx) } } +static void +brw_display_shared_buffer(struct brw_context *brw) +{ + __DRIcontext *dri_context = brw->driContext; + __DRIdrawable *dri_drawable = dri_context->driDrawablePriv; + __DRIscreen *dri_screen = brw->screen->driScrnPriv; + int fence_fd = -1; + + if (!brw->is_shared_buffer_bound) + return; + + if (!brw->is_shared_buffer_dirty) + return; + + if (brw->screen->has_exec_fence) { + /* This function is always called during a flush operation, so there is + * no need to flush again here. But we want to provide a fence_fd to the + * loader, and a redundant flush is the easiest way to acquire one. + */ + if (intel_batchbuffer_flush_fence(brw, -1, &fence_fd)) + return; + } + + dri_screen->mutableRenderBuffer.loader + ->displaySharedBuffer(dri_drawable, fence_fd, + dri_drawable->loaderPrivate); + brw->is_shared_buffer_dirty = false; +} + static void intel_glFlush(struct gl_context *ctx) { @@ -246,7 +275,7 @@ intel_glFlush(struct gl_context *ctx) intel_batchbuffer_flush(brw); intel_flush_front(ctx); - + brw_display_shared_buffer(brw); brw->need_flush_throttle = true; } @@ -1460,6 +1489,11 @@ intel_prepare_render(struct brw_context *brw) */ if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer)) brw->front_buffer_dirty = true; + + if (brw->is_shared_buffer_bound) { + /* Subsequent rendering will probably dirty the shared buffer. */ + brw->is_shared_buffer_dirty = true; + } } /** @@ -1693,8 +1727,12 @@ intel_update_image_buffer(struct brw_context *intel, else last_mt = rb->singlesample_mt; - if (last_mt && last_mt->bo == buffer->bo) + if (last_mt && last_mt->bo == buffer->bo) { + if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) { + intel_miptree_make_shareable(intel, last_mt); + } return; + } /* Only allow internal compression if samples == 0. For multisampled * window system buffers, the only thing the single-sampled buffer is used @@ -1723,6 +1761,35 @@ intel_update_image_buffer(struct brw_context *intel, rb->Base.Base.NumSamples > 1) { intel_renderbuffer_upsample(intel, rb); } + + if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) { + /* The compositor and the application may access this image + * concurrently. The display hardware may even scanout the image while + * the GPU is rendering to it. Aux surfaces cause difficulty with + * concurrent access, so permanently disable aux for this miptree. + * + * Perhaps we could improve overall application performance by + * re-enabling the aux surface when EGL_RENDER_BUFFER transitions to + * EGL_BACK_BUFFER, then disabling it again when EGL_RENDER_BUFFER + * returns to EGL_SINGLE_BUFFER. I expect the wins and losses with this + * approach to be highly dependent on the application's GL usage. + * + * I [chadv] expect clever disabling/reenabling to be counterproductive + * in the use cases I care about: applications that render nearly + * realtime handwriting to the surface while possibly undergiong + * simultaneously scanout as a display plane. The app requires low + * render latency. Even though the app spends most of its time in + * shared-buffer mode, it also frequently transitions between + * shared-buffer (EGL_SINGLE_BUFFER) and double-buffer (EGL_BACK_BUFFER) + * mode. Visual sutter during the transitions should be avoided. + * + * In this case, I [chadv] believe reducing the GPU workload at + * shared-buffer/double-buffer transitions would offer a smoother app + * experience than any savings due to aux compression. But I've + * collected no data to prove my theory. + */ + intel_miptree_make_shareable(intel, mt); + } } static void @@ -1783,4 +1850,19 @@ intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable) images.back, __DRI_IMAGE_BUFFER_BACK); } + + if (images.image_mask & __DRI_IMAGE_BUFFER_SHARED) { + assert(images.image_mask == __DRI_IMAGE_BUFFER_SHARED); + drawable->w = images.back->width; + drawable->h = images.back->height; + intel_update_image_buffer(brw, + drawable, + back_rb, + images.back, + __DRI_IMAGE_BUFFER_SHARED); + brw->is_shared_buffer_bound = true; + } else { + brw->is_shared_buffer_bound = false; + brw->is_shared_buffer_dirty = false; + } } diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index c32def7c3d7..d3b96953467 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -791,6 +791,18 @@ struct brw_context */ bool front_buffer_dirty; + /** + * True if the __DRIdrawable's current __DRIimageBufferMask is + * __DRI_IMAGE_BUFFER_SHARED. + */ + bool is_shared_buffer_bound; + + /** + * True if a shared buffer is bound and it has received any rendering since + * the previous __DRImutableRenderBufferLoaderExtension::displaySharedBuffer(). + */ + bool is_shared_buffer_dirty; + /** Framerate throttling: @{ */ struct brw_bo *throttle_batch[2]; diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index 923f9be3fbb..eaf5a3b9feb 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -1587,12 +1587,17 @@ static const __DRI2blobExtension intelBlobExtension = { .set_cache_funcs = brw_set_cache_funcs }; +static const __DRImutableRenderBufferDriverExtension intelMutableRenderBufferExtension = { + .base = { __DRI_MUTABLE_RENDER_BUFFER_DRIVER, 1 }, +}; + static const __DRIextension *screenExtensions[] = { &intelTexBufferExtension.base, &intelFenceExtension.base, &intelFlushExtension.base, &intelImageExtension.base, &intelRendererQueryExtension.base, + &intelMutableRenderBufferExtension.base, &dri2ConfigQueryExtension.base, &dri2NoErrorExtension.base, &intelBlobExtension.base, @@ -1605,6 +1610,7 @@ static const __DRIextension *intelRobustScreenExtensions[] = { &intelFlushExtension.base, &intelImageExtension.base, &intelRendererQueryExtension.base, + &intelMutableRenderBufferExtension.base, &dri2ConfigQueryExtension.base, &dri2Robustness.base, &dri2NoErrorExtension.base, @@ -2158,7 +2164,9 @@ intel_screen_make_configs(__DRIscreen *dri_screen) bool allow_rgb10_configs = driQueryOptionb(&screen->optionCache, "allow_rgb10_configs"); - /* Generate singlesample configs without accumulation buffer. */ + /* Generate singlesample configs, each without accumulation buffer + * and with EGL_MUTABLE_RENDER_BUFFER_BIT_KHR. + */ for (unsigned i = 0; i < num_formats; i++) { __DRIconfig **new_configs; int num_depth_stencil_bits = 2; @@ -2194,7 +2202,8 @@ intel_screen_make_configs(__DRIscreen *dri_screen) num_depth_stencil_bits, back_buffer_modes, 2, singlesample_samples, 1, - false, false, false); + false, false, + /*mutable_render_buffer*/ true); configs = driConcatConfigs(configs, new_configs); } -- 2.30.2