vc4: Set shareable BOs as T tiled if possible
authorEric Anholt <eric@anholt.net>
Mon, 5 Jun 2017 21:50:26 +0000 (14:50 -0700)
committerEric Anholt <eric@anholt.net>
Wed, 12 Jul 2017 17:58:33 +0000 (10:58 -0700)
X11 and GL compositor performance on VC4 has been terrible because of our
SHARED-usage buffers all being forced to linear.  This swaps SHARED &&
!LINEAR buffers over to being tiled.

This is an expected win for all GL compositors during rendering (a full
copy of each shared texture per draw call), allows X11 to be used with
decent performance without a GL compositor, and improves X11 windowed
swapbuffers performance as well.  It also halves the memory usage of
shared buffers that get textured from.  The only cost should be idle
systems with a scanout-only buffer that isn't flagged as LINEAR, in which
case the memory bandwidth cost of scanout goes up ~25%.

This implements the EGL_EXT_image_dma_buf_import_modifiers extension,
supporting the VC4 T_TILED modifier.

v2: Added modifier support to resource creation/import, and
    advertisement (by daniels).
v3: Fix old-kernel fallback path, fix compiler error and warnings, and
    comment touchups (by anholt).

Reviewed-by: Daniel Stone <daniels@collabora.com>
src/gallium/drivers/vc4/vc4_resource.c
src/gallium/drivers/vc4/vc4_screen.c
src/gallium/drivers/vc4/vc4_screen.h
src/gallium/drivers/vc4/vc4_simulator.c

index 00706762b8a23719107f01a9deb2c6ece7780a3d..94301bd41149525fefe61e72e40a1c9aed33550b 100644 (file)
 #include "util/u_surface.h"
 #include "util/u_upload_mgr.h"
 
+#include "drm_fourcc.h"
+#include "vc4_drm.h"
 #include "vc4_screen.h"
 #include "vc4_context.h"
 #include "vc4_resource.h"
 #include "vc4_tiling.h"
 
+#ifndef DRM_FORMAT_MOD_INVALID
+#define DRM_FORMAT_MOD_INVALID ((1ULL << 56) - 1)
+#endif
+
 static bool
 vc4_resource_bo_alloc(struct vc4_resource *rsc)
 {
@@ -387,6 +393,7 @@ vc4_resource_get_handle(struct pipe_screen *pscreen,
         struct vc4_resource *rsc = vc4_resource(prsc);
 
         whandle->stride = rsc->slices[0].stride;
+        whandle->offset = 0;
 
         /* If we're passing some reference to our BO out to some other part of
          * the system, then we can't do any optimizations about only us being
@@ -394,6 +401,11 @@ vc4_resource_get_handle(struct pipe_screen *pscreen,
          */
         rsc->bo->private = false;
 
+        if (rsc->tiled)
+                whandle->modifier = DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED;
+        else
+                whandle->modifier = DRM_FORMAT_MOD_LINEAR;
+
         switch (whandle->type) {
         case DRM_API_HANDLE_TYPE_SHARED:
                 if (screen->ro) {
@@ -561,26 +573,77 @@ get_resource_texture_format(struct pipe_resource *prsc)
         return format;
 }
 
-struct pipe_resource *
-vc4_resource_create(struct pipe_screen *pscreen,
-                    const struct pipe_resource *tmpl)
+static bool
+find_modifier(uint64_t needle, const uint64_t *haystack, int count)
+{
+        int i;
+
+        for (i = 0; i < count; i++) {
+                if (haystack[i] == needle)
+                        return true;
+        }
+
+        return false;
+}
+
+static struct pipe_resource *
+vc4_resource_create_with_modifiers(struct pipe_screen *pscreen,
+                                   const struct pipe_resource *tmpl,
+                                   const uint64_t *modifiers,
+                                   int count)
 {
         struct vc4_screen *screen = vc4_screen(pscreen);
         struct vc4_resource *rsc = vc4_resource_setup(pscreen, tmpl);
         struct pipe_resource *prsc = &rsc->base;
+        bool linear_ok = find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count);
+        /* Use a tiled layout if we can, for better 3D performance. */
+        bool should_tile = true;
 
-        /* We have to make shared be untiled, since we don't have any way to
-         * communicate metadata about tiling currently.
+        /* VBOs/PBOs are untiled (and 1 height). */
+        if (tmpl->target == PIPE_BUFFER)
+                should_tile = false;
+
+        /* MSAA buffers are linear. */
+        if (tmpl->nr_samples > 1)
+                should_tile = false;
+
+        /* No tiling when we're sharing with another device (pl111). */
+        if (screen->ro && (tmpl->bind & PIPE_BIND_SCANOUT))
+                should_tile = false;
+
+        /* Cursors are always linear, and the user can request linear as well.
+         */
+        if (tmpl->bind & (PIPE_BIND_LINEAR | PIPE_BIND_CURSOR))
+                should_tile = false;
+
+        /* No shared objects with LT format -- the kernel only has T-format
+         * metadata.  LT objects are small enough it's not worth the trouble to
+         * give them metadata to tile.
+         */
+        if ((tmpl->bind & (PIPE_BIND_SHARED | PIPE_BIND_SCANOUT)) &&
+            vc4_size_is_lt(prsc->width0, prsc->height0, rsc->cpp))
+                should_tile = false;
+
+        /* If we're sharing or scanning out, we need the ioctl present to
+         * inform the kernel or the other side.
          */
-        if (tmpl->target == PIPE_BUFFER ||
-            tmpl->nr_samples > 1 ||
-            (tmpl->bind & (PIPE_BIND_SCANOUT |
-                           PIPE_BIND_LINEAR |
-                           PIPE_BIND_SHARED |
-                           PIPE_BIND_CURSOR))) {
+        if ((tmpl->bind & (PIPE_BIND_SHARED |
+                           PIPE_BIND_SCANOUT)) && !screen->has_tiling_ioctl)
+                should_tile = false;
+
+        /* No user-specified modifier; determine our own. */
+        if (count == 1 && modifiers[0] == DRM_FORMAT_MOD_INVALID) {
+                linear_ok = true;
+                rsc->tiled = should_tile;
+        } else if (should_tile &&
+                   find_modifier(DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED,
+                                 modifiers, count)) {
+                rsc->tiled = true;
+        } else if (linear_ok) {
                 rsc->tiled = false;
         } else {
-                rsc->tiled = true;
+                fprintf(stderr, "Unsupported modifier requested\n");
+                return NULL;
         }
 
         if (tmpl->target != PIPE_BUFFER)
@@ -590,6 +653,22 @@ vc4_resource_create(struct pipe_screen *pscreen,
         if (!vc4_resource_bo_alloc(rsc))
                 goto fail;
 
+        if (screen->has_tiling_ioctl) {
+                uint64_t modifier;
+                if (rsc->tiled)
+                        modifier = DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED;
+                else
+                        modifier = DRM_FORMAT_MOD_LINEAR;
+                struct drm_vc4_set_tiling set_tiling = {
+                        .handle = rsc->bo->handle,
+                        .modifier = modifier,
+                };
+                int ret = vc4_ioctl(screen->fd, DRM_IOCTL_VC4_SET_TILING,
+                                    &set_tiling);
+                if (ret != 0)
+                        goto fail;
+        }
+
         if (screen->ro && tmpl->bind & PIPE_BIND_SCANOUT) {
                 rsc->scanout =
                         renderonly_scanout_for_resource(prsc, screen->ro);
@@ -603,6 +682,14 @@ fail:
         return NULL;
 }
 
+struct pipe_resource *
+vc4_resource_create(struct pipe_screen *pscreen,
+                    const struct pipe_resource *tmpl)
+{
+        const uint64_t mod = DRM_FORMAT_MOD_INVALID;
+        return vc4_resource_create_with_modifiers(pscreen, tmpl, &mod, 1);
+}
+
 static struct pipe_resource *
 vc4_resource_from_handle(struct pipe_screen *pscreen,
                          const struct pipe_resource *tmpl,
@@ -642,7 +729,36 @@ vc4_resource_from_handle(struct pipe_screen *pscreen,
         if (!rsc->bo)
                 goto fail;
 
-        rsc->tiled = false;
+        struct drm_vc4_get_tiling get_tiling = {
+                .handle = rsc->bo->handle,
+        };
+        int ret = vc4_ioctl(screen->fd, DRM_IOCTL_VC4_GET_TILING, &get_tiling);
+
+        if (ret != 0) {
+                whandle->modifier = DRM_FORMAT_MOD_LINEAR;
+        } else if (whandle->modifier == DRM_FORMAT_MOD_INVALID) {
+                whandle->modifier = get_tiling.modifier;
+        } else if (whandle->modifier != get_tiling.modifier) {
+                fprintf(stderr,
+                        "Modifier 0x%llx vs. tiling (0x%llx) mismatch\n",
+                        (long long)whandle->modifier, get_tiling.modifier);
+                goto fail;
+        }
+
+        switch (whandle->modifier) {
+        case DRM_FORMAT_MOD_LINEAR:
+                rsc->tiled = false;
+                break;
+        case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED:
+                rsc->tiled = true;
+                break;
+        default:
+                fprintf(stderr,
+                        "Attempt to import unsupported modifier 0x%llx\n",
+                        (long long)whandle->modifier);
+                goto fail;
+        }
+
         rsc->vc4_format = get_resource_texture_format(prsc);
         vc4_setup_slices(rsc, "import");
 
@@ -1067,11 +1183,26 @@ vc4_get_shadow_index_buffer(struct pipe_context *pctx,
 void
 vc4_resource_screen_init(struct pipe_screen *pscreen)
 {
+        struct vc4_screen *screen = vc4_screen(pscreen);
+
         pscreen->resource_create = vc4_resource_create;
+        pscreen->resource_create_with_modifiers =
+                vc4_resource_create_with_modifiers;
         pscreen->resource_from_handle = vc4_resource_from_handle;
         pscreen->resource_destroy = u_resource_destroy_vtbl;
         pscreen->resource_get_handle = vc4_resource_get_handle;
         pscreen->resource_destroy = vc4_resource_destroy;
+
+        /* Test if the kernel has GET_TILING; it will return -EINVAL if the
+         * ioctl does not exist, but -ENOENT if we pass an impossible handle.
+         * 0 cannot be a valid GEM object, so use that.
+         */
+        struct drm_vc4_get_tiling get_tiling = {
+                .handle = 0x0,
+        };
+        int ret = vc4_ioctl(screen->fd, DRM_IOCTL_VC4_GET_TILING, &get_tiling);
+        if (ret == -1 && errno == ENOENT)
+                screen->has_tiling_ioctl = true;
 }
 
 void
index 07395487d7768d51f63276af20cea12ecd23cb63..f3b47ca8903e0a563015f8f8f1818e5c4d3e0752 100644 (file)
@@ -35,6 +35,7 @@
 #include "util/ralloc.h"
 
 #include <xf86drm.h>
+#include "drm_fourcc.h"
 #include "vc4_drm.h"
 #include "vc4_screen.h"
 #include "vc4_context.h"
@@ -534,6 +535,34 @@ vc4_screen_is_format_supported(struct pipe_screen *pscreen,
         return retval == usage;
 }
 
+static void
+vc4_screen_query_dmabuf_modifiers(struct pipe_screen *pscreen,
+                                  enum pipe_format format, int max,
+                                  uint64_t *modifiers,
+                                  unsigned int *external_only,
+                                  int *count)
+{
+        if (!modifiers) {
+                *count = 2;
+                return;
+        }
+
+        *count = MIN2(max, 2);
+
+        /* We support both modifiers (tiled and linear) for all sampler
+         * formats.
+         */
+        modifiers[0] = DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED;
+        if (external_only)
+                external_only[0] = false;
+        if (max < 2)
+                return;
+
+        modifiers[1] = DRM_FORMAT_MOD_LINEAR;
+        if (external_only)
+                external_only[1] = false;
+}
+
 #define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x)))
 
 static unsigned handle_hash(void *key)
@@ -666,6 +695,7 @@ vc4_screen_create(int fd, struct renderonly *ro)
         pscreen->get_vendor = vc4_screen_get_vendor;
         pscreen->get_device_vendor = vc4_screen_get_vendor;
         pscreen->get_compiler_options = vc4_screen_get_compiler_options;
+        pscreen->query_dmabuf_modifiers = vc4_screen_query_dmabuf_modifiers;
 
         return pscreen;
 
index 7887adee94185e57d4c8b8aff26c272528a082c2..85108219ee3babc434c3c3ac3921aa4d8b8b9d32 100644 (file)
@@ -95,6 +95,7 @@ struct vc4_screen {
         bool has_control_flow;
         bool has_etc1;
         bool has_threaded_fs;
+        bool has_tiling_ioctl;
 
         struct vc4_simulator_file *sim_file;
 };
index 6d99e9f2eea2448a162f36041fcffa5f126d22b6..ff306f2961042a79ef7c7fff10b8e2eb94e21805 100644 (file)
@@ -653,6 +653,13 @@ vc4_simulator_ioctl(int fd, unsigned long request, void *args)
                  */
                 return 0;
 
+        case DRM_IOCTL_VC4_GET_TILING:
+        case DRM_IOCTL_VC4_SET_TILING:
+                /* Disable these for now, since the sharing with i965 requires
+                 * linear buffers.
+                 */
+                return -1;
+
         case DRM_IOCTL_VC4_GET_PARAM:
                 return vc4_simulator_get_param_ioctl(fd, args);