From 84ed8b67c56b323cd834d463151e1a3c0430c53e Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 5 Jun 2017 14:50:26 -0700 Subject: [PATCH] vc4: Set shareable BOs as T tiled if possible X11 and GL compositor performance on VC4 has been terrible because of our SHARED-usage buffers all being forced to linear. This swaps SHARED && !LINEAR buffers over to being tiled. This is an expected win for all GL compositors during rendering (a full copy of each shared texture per draw call), allows X11 to be used with decent performance without a GL compositor, and improves X11 windowed swapbuffers performance as well. It also halves the memory usage of shared buffers that get textured from. The only cost should be idle systems with a scanout-only buffer that isn't flagged as LINEAR, in which case the memory bandwidth cost of scanout goes up ~25%. This implements the EGL_EXT_image_dma_buf_import_modifiers extension, supporting the VC4 T_TILED modifier. v2: Added modifier support to resource creation/import, and advertisement (by daniels). v3: Fix old-kernel fallback path, fix compiler error and warnings, and comment touchups (by anholt). Reviewed-by: Daniel Stone --- src/gallium/drivers/vc4/vc4_resource.c | 157 ++++++++++++++++++++++-- src/gallium/drivers/vc4/vc4_screen.c | 30 +++++ src/gallium/drivers/vc4/vc4_screen.h | 1 + src/gallium/drivers/vc4/vc4_simulator.c | 7 ++ 4 files changed, 182 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c index 00706762b8a..94301bd4114 100644 --- a/src/gallium/drivers/vc4/vc4_resource.c +++ b/src/gallium/drivers/vc4/vc4_resource.c @@ -29,11 +29,17 @@ #include "util/u_surface.h" #include "util/u_upload_mgr.h" +#include "drm_fourcc.h" +#include "vc4_drm.h" #include "vc4_screen.h" #include "vc4_context.h" #include "vc4_resource.h" #include "vc4_tiling.h" +#ifndef DRM_FORMAT_MOD_INVALID +#define DRM_FORMAT_MOD_INVALID ((1ULL << 56) - 1) +#endif + static bool vc4_resource_bo_alloc(struct vc4_resource *rsc) { @@ -387,6 +393,7 @@ vc4_resource_get_handle(struct pipe_screen *pscreen, struct vc4_resource *rsc = vc4_resource(prsc); whandle->stride = rsc->slices[0].stride; + whandle->offset = 0; /* If we're passing some reference to our BO out to some other part of * the system, then we can't do any optimizations about only us being @@ -394,6 +401,11 @@ vc4_resource_get_handle(struct pipe_screen *pscreen, */ rsc->bo->private = false; + if (rsc->tiled) + whandle->modifier = DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED; + else + whandle->modifier = DRM_FORMAT_MOD_LINEAR; + switch (whandle->type) { case DRM_API_HANDLE_TYPE_SHARED: if (screen->ro) { @@ -561,26 +573,77 @@ get_resource_texture_format(struct pipe_resource *prsc) return format; } -struct pipe_resource * -vc4_resource_create(struct pipe_screen *pscreen, - const struct pipe_resource *tmpl) +static bool +find_modifier(uint64_t needle, const uint64_t *haystack, int count) +{ + int i; + + for (i = 0; i < count; i++) { + if (haystack[i] == needle) + return true; + } + + return false; +} + +static struct pipe_resource * +vc4_resource_create_with_modifiers(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl, + const uint64_t *modifiers, + int count) { struct vc4_screen *screen = vc4_screen(pscreen); struct vc4_resource *rsc = vc4_resource_setup(pscreen, tmpl); struct pipe_resource *prsc = &rsc->base; + bool linear_ok = find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count); + /* Use a tiled layout if we can, for better 3D performance. */ + bool should_tile = true; - /* We have to make shared be untiled, since we don't have any way to - * communicate metadata about tiling currently. + /* VBOs/PBOs are untiled (and 1 height). */ + if (tmpl->target == PIPE_BUFFER) + should_tile = false; + + /* MSAA buffers are linear. */ + if (tmpl->nr_samples > 1) + should_tile = false; + + /* No tiling when we're sharing with another device (pl111). */ + if (screen->ro && (tmpl->bind & PIPE_BIND_SCANOUT)) + should_tile = false; + + /* Cursors are always linear, and the user can request linear as well. + */ + if (tmpl->bind & (PIPE_BIND_LINEAR | PIPE_BIND_CURSOR)) + should_tile = false; + + /* No shared objects with LT format -- the kernel only has T-format + * metadata. LT objects are small enough it's not worth the trouble to + * give them metadata to tile. + */ + if ((tmpl->bind & (PIPE_BIND_SHARED | PIPE_BIND_SCANOUT)) && + vc4_size_is_lt(prsc->width0, prsc->height0, rsc->cpp)) + should_tile = false; + + /* If we're sharing or scanning out, we need the ioctl present to + * inform the kernel or the other side. */ - if (tmpl->target == PIPE_BUFFER || - tmpl->nr_samples > 1 || - (tmpl->bind & (PIPE_BIND_SCANOUT | - PIPE_BIND_LINEAR | - PIPE_BIND_SHARED | - PIPE_BIND_CURSOR))) { + if ((tmpl->bind & (PIPE_BIND_SHARED | + PIPE_BIND_SCANOUT)) && !screen->has_tiling_ioctl) + should_tile = false; + + /* No user-specified modifier; determine our own. */ + if (count == 1 && modifiers[0] == DRM_FORMAT_MOD_INVALID) { + linear_ok = true; + rsc->tiled = should_tile; + } else if (should_tile && + find_modifier(DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED, + modifiers, count)) { + rsc->tiled = true; + } else if (linear_ok) { rsc->tiled = false; } else { - rsc->tiled = true; + fprintf(stderr, "Unsupported modifier requested\n"); + return NULL; } if (tmpl->target != PIPE_BUFFER) @@ -590,6 +653,22 @@ vc4_resource_create(struct pipe_screen *pscreen, if (!vc4_resource_bo_alloc(rsc)) goto fail; + if (screen->has_tiling_ioctl) { + uint64_t modifier; + if (rsc->tiled) + modifier = DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED; + else + modifier = DRM_FORMAT_MOD_LINEAR; + struct drm_vc4_set_tiling set_tiling = { + .handle = rsc->bo->handle, + .modifier = modifier, + }; + int ret = vc4_ioctl(screen->fd, DRM_IOCTL_VC4_SET_TILING, + &set_tiling); + if (ret != 0) + goto fail; + } + if (screen->ro && tmpl->bind & PIPE_BIND_SCANOUT) { rsc->scanout = renderonly_scanout_for_resource(prsc, screen->ro); @@ -603,6 +682,14 @@ fail: return NULL; } +struct pipe_resource * +vc4_resource_create(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl) +{ + const uint64_t mod = DRM_FORMAT_MOD_INVALID; + return vc4_resource_create_with_modifiers(pscreen, tmpl, &mod, 1); +} + static struct pipe_resource * vc4_resource_from_handle(struct pipe_screen *pscreen, const struct pipe_resource *tmpl, @@ -642,7 +729,36 @@ vc4_resource_from_handle(struct pipe_screen *pscreen, if (!rsc->bo) goto fail; - rsc->tiled = false; + struct drm_vc4_get_tiling get_tiling = { + .handle = rsc->bo->handle, + }; + int ret = vc4_ioctl(screen->fd, DRM_IOCTL_VC4_GET_TILING, &get_tiling); + + if (ret != 0) { + whandle->modifier = DRM_FORMAT_MOD_LINEAR; + } else if (whandle->modifier == DRM_FORMAT_MOD_INVALID) { + whandle->modifier = get_tiling.modifier; + } else if (whandle->modifier != get_tiling.modifier) { + fprintf(stderr, + "Modifier 0x%llx vs. tiling (0x%llx) mismatch\n", + (long long)whandle->modifier, get_tiling.modifier); + goto fail; + } + + switch (whandle->modifier) { + case DRM_FORMAT_MOD_LINEAR: + rsc->tiled = false; + break; + case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: + rsc->tiled = true; + break; + default: + fprintf(stderr, + "Attempt to import unsupported modifier 0x%llx\n", + (long long)whandle->modifier); + goto fail; + } + rsc->vc4_format = get_resource_texture_format(prsc); vc4_setup_slices(rsc, "import"); @@ -1067,11 +1183,26 @@ vc4_get_shadow_index_buffer(struct pipe_context *pctx, void vc4_resource_screen_init(struct pipe_screen *pscreen) { + struct vc4_screen *screen = vc4_screen(pscreen); + pscreen->resource_create = vc4_resource_create; + pscreen->resource_create_with_modifiers = + vc4_resource_create_with_modifiers; pscreen->resource_from_handle = vc4_resource_from_handle; pscreen->resource_destroy = u_resource_destroy_vtbl; pscreen->resource_get_handle = vc4_resource_get_handle; pscreen->resource_destroy = vc4_resource_destroy; + + /* Test if the kernel has GET_TILING; it will return -EINVAL if the + * ioctl does not exist, but -ENOENT if we pass an impossible handle. + * 0 cannot be a valid GEM object, so use that. + */ + struct drm_vc4_get_tiling get_tiling = { + .handle = 0x0, + }; + int ret = vc4_ioctl(screen->fd, DRM_IOCTL_VC4_GET_TILING, &get_tiling); + if (ret == -1 && errno == ENOENT) + screen->has_tiling_ioctl = true; } void diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c index 07395487d77..f3b47ca8903 100644 --- a/src/gallium/drivers/vc4/vc4_screen.c +++ b/src/gallium/drivers/vc4/vc4_screen.c @@ -35,6 +35,7 @@ #include "util/ralloc.h" #include +#include "drm_fourcc.h" #include "vc4_drm.h" #include "vc4_screen.h" #include "vc4_context.h" @@ -534,6 +535,34 @@ vc4_screen_is_format_supported(struct pipe_screen *pscreen, return retval == usage; } +static void +vc4_screen_query_dmabuf_modifiers(struct pipe_screen *pscreen, + enum pipe_format format, int max, + uint64_t *modifiers, + unsigned int *external_only, + int *count) +{ + if (!modifiers) { + *count = 2; + return; + } + + *count = MIN2(max, 2); + + /* We support both modifiers (tiled and linear) for all sampler + * formats. + */ + modifiers[0] = DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED; + if (external_only) + external_only[0] = false; + if (max < 2) + return; + + modifiers[1] = DRM_FORMAT_MOD_LINEAR; + if (external_only) + external_only[1] = false; +} + #define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x))) static unsigned handle_hash(void *key) @@ -666,6 +695,7 @@ vc4_screen_create(int fd, struct renderonly *ro) pscreen->get_vendor = vc4_screen_get_vendor; pscreen->get_device_vendor = vc4_screen_get_vendor; pscreen->get_compiler_options = vc4_screen_get_compiler_options; + pscreen->query_dmabuf_modifiers = vc4_screen_query_dmabuf_modifiers; return pscreen; diff --git a/src/gallium/drivers/vc4/vc4_screen.h b/src/gallium/drivers/vc4/vc4_screen.h index 7887adee941..85108219ee3 100644 --- a/src/gallium/drivers/vc4/vc4_screen.h +++ b/src/gallium/drivers/vc4/vc4_screen.h @@ -95,6 +95,7 @@ struct vc4_screen { bool has_control_flow; bool has_etc1; bool has_threaded_fs; + bool has_tiling_ioctl; struct vc4_simulator_file *sim_file; }; diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c index 6d99e9f2eea..ff306f29610 100644 --- a/src/gallium/drivers/vc4/vc4_simulator.c +++ b/src/gallium/drivers/vc4/vc4_simulator.c @@ -653,6 +653,13 @@ vc4_simulator_ioctl(int fd, unsigned long request, void *args) */ return 0; + case DRM_IOCTL_VC4_GET_TILING: + case DRM_IOCTL_VC4_SET_TILING: + /* Disable these for now, since the sharing with i965 requires + * linear buffers. + */ + return -1; + case DRM_IOCTL_VC4_GET_PARAM: return vc4_simulator_get_param_ioctl(fd, args); -- 2.30.2