v3d: compute appropriate VPM memory configuration for geometry shader workloads
[mesa.git] / src / gallium / drivers / v3d / v3d_resource.c
index b91ab30708ea9f0187b462ca760e405948f387ba..84f6b8c9122cc0ac7e82f61ac65c792d145ef564 100644 (file)
 #include "pipe/p_defines.h"
 #include "util/u_blit.h"
 #include "util/u_memory.h"
-#include "util/u_format.h"
+#include "util/format/u_format.h"
 #include "util/u_inlines.h"
 #include "util/u_surface.h"
 #include "util/u_transfer_helper.h"
 #include "util/u_upload_mgr.h"
-#include "util/u_format_zs.h"
+#include "util/format/u_format_zs.h"
+#include "util/u_drm.h"
 
-#include "drm_fourcc.h"
+#include "drm-uapi/drm_fourcc.h"
 #include "v3d_screen.h"
 #include "v3d_context.h"
 #include "v3d_resource.h"
@@ -146,37 +147,13 @@ v3d_resource_transfer_unmap(struct pipe_context *pctx,
         slab_free(&v3d->transfer_pool, ptrans);
 }
 
-static void *
-v3d_resource_transfer_map(struct pipe_context *pctx,
-                          struct pipe_resource *prsc,
-                          unsigned level, unsigned usage,
-                          const struct pipe_box *box,
-                          struct pipe_transfer **pptrans)
+static void
+v3d_map_usage_prep(struct pipe_context *pctx,
+                   struct pipe_resource *prsc,
+                   unsigned usage)
 {
         struct v3d_context *v3d = v3d_context(pctx);
         struct v3d_resource *rsc = v3d_resource(prsc);
-        struct v3d_transfer *trans;
-        struct pipe_transfer *ptrans;
-        enum pipe_format format = prsc->format;
-        char *buf;
-
-        /* MSAA maps should have been handled by u_transfer_helper. */
-        assert(prsc->nr_samples <= 1);
-
-        /* Upgrade DISCARD_RANGE to WHOLE_RESOURCE if the whole resource is
-         * being mapped.
-         */
-        if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
-            !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
-            !(prsc->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) &&
-            prsc->last_level == 0 &&
-            prsc->width0 == box->width &&
-            prsc->height0 == box->height &&
-            prsc->depth0 == box->depth &&
-            prsc->array_size == 1 &&
-            rsc->bo->private) {
-                usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
-        }
 
         if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
                 if (v3d_resource_bo_alloc(rsc)) {
@@ -192,7 +169,8 @@ v3d_resource_transfer_map(struct pipe_context *pctx,
                         /* If we failed to reallocate, flush users so that we
                          * don't violate any syncing requirements.
                          */
-                        v3d_flush_jobs_reading_resource(v3d, prsc);
+                        v3d_flush_jobs_reading_resource(v3d, prsc,
+                                                        V3D_FLUSH_DEFAULT);
                 }
         } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
                 /* If we're writing and the buffer is being used by the CL, we
@@ -200,15 +178,52 @@ v3d_resource_transfer_map(struct pipe_context *pctx,
                  * to flush if the CL has written our buffer.
                  */
                 if (usage & PIPE_TRANSFER_WRITE)
-                        v3d_flush_jobs_reading_resource(v3d, prsc);
+                        v3d_flush_jobs_reading_resource(v3d, prsc,
+                                                        V3D_FLUSH_ALWAYS);
                 else
-                        v3d_flush_jobs_writing_resource(v3d, prsc);
+                        v3d_flush_jobs_writing_resource(v3d, prsc,
+                                                        V3D_FLUSH_ALWAYS);
         }
 
         if (usage & PIPE_TRANSFER_WRITE) {
                 rsc->writes++;
                 rsc->initialized_buffers = ~0;
         }
+}
+
+static void *
+v3d_resource_transfer_map(struct pipe_context *pctx,
+                          struct pipe_resource *prsc,
+                          unsigned level, unsigned usage,
+                          const struct pipe_box *box,
+                          struct pipe_transfer **pptrans)
+{
+        struct v3d_context *v3d = v3d_context(pctx);
+        struct v3d_resource *rsc = v3d_resource(prsc);
+        struct v3d_transfer *trans;
+        struct pipe_transfer *ptrans;
+        enum pipe_format format = prsc->format;
+        char *buf;
+
+        /* MSAA maps should have been handled by u_transfer_helper. */
+        assert(prsc->nr_samples <= 1);
+
+        /* Upgrade DISCARD_RANGE to WHOLE_RESOURCE if the whole resource is
+         * being mapped.
+         */
+        if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
+            !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
+            !(prsc->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) &&
+            prsc->last_level == 0 &&
+            prsc->width0 == box->width &&
+            prsc->height0 == box->height &&
+            prsc->depth0 == box->depth &&
+            prsc->array_size == 1 &&
+            rsc->bo->private) {
+                usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
+        }
+
+        v3d_map_usage_prep(pctx, prsc, usage);
 
         trans = slab_alloc(&v3d->transfer_pool);
         if (!trans)
@@ -281,7 +296,7 @@ v3d_resource_transfer_map(struct pipe_context *pctx,
                 return trans->map;
         } else {
                 ptrans->stride = slice->stride;
-                ptrans->layer_stride = ptrans->stride;
+                ptrans->layer_stride = rsc->cube_map_stride;
 
                 return buf + slice->offset +
                         ptrans->box.y * ptrans->stride +
@@ -295,27 +310,78 @@ fail:
         return NULL;
 }
 
+static void
+v3d_texture_subdata(struct pipe_context *pctx,
+                    struct pipe_resource *prsc,
+                    unsigned level,
+                    unsigned usage,
+                    const struct pipe_box *box,
+                    const void *data,
+                    unsigned stride,
+                    unsigned layer_stride)
+{
+        struct v3d_resource *rsc = v3d_resource(prsc);
+        struct v3d_resource_slice *slice = &rsc->slices[level];
+
+        /* For a direct mapping, we can just take the u_transfer path. */
+        if (!rsc->tiled) {
+                return u_default_texture_subdata(pctx, prsc, level, usage, box,
+                                                 data, stride, layer_stride);
+        }
+
+        /* Otherwise, map and store the texture data directly into the tiled
+         * texture.  Note that gallium's texture_subdata may be called with
+         * obvious usage flags missing!
+         */
+        v3d_map_usage_prep(pctx, prsc, usage | (PIPE_TRANSFER_WRITE |
+                                                PIPE_TRANSFER_DISCARD_RANGE));
+
+        void *buf;
+        if (usage & PIPE_TRANSFER_UNSYNCHRONIZED)
+                buf = v3d_bo_map_unsynchronized(rsc->bo);
+        else
+                buf = v3d_bo_map(rsc->bo);
+
+        for (int i = 0; i < box->depth; i++) {
+                v3d_store_tiled_image(buf +
+                                      v3d_layer_offset(&rsc->base,
+                                                       level,
+                                                       box->z + i),
+                                      slice->stride,
+                                      (void *)data + layer_stride * i,
+                                      stride,
+                                      slice->tiling, rsc->cpp, slice->padded_height,
+                                      box);
+        }
+}
+
 static void
 v3d_resource_destroy(struct pipe_screen *pscreen,
                      struct pipe_resource *prsc)
 {
+        struct v3d_screen *screen = v3d_screen(pscreen);
         struct v3d_resource *rsc = v3d_resource(prsc);
 
+        if (rsc->scanout)
+                renderonly_scanout_destroy(rsc->scanout, screen->ro);
+
         v3d_bo_unreference(&rsc->bo);
         free(rsc);
 }
 
-static boolean
+static bool
 v3d_resource_get_handle(struct pipe_screen *pscreen,
                         struct pipe_context *pctx,
                         struct pipe_resource *prsc,
                         struct winsys_handle *whandle,
                         unsigned usage)
 {
+        struct v3d_screen *screen = v3d_screen(pscreen);
         struct v3d_resource *rsc = v3d_resource(prsc);
         struct v3d_bo *bo = rsc->bo;
 
         whandle->stride = rsc->slices[0].stride;
+        whandle->offset = 0;
 
         /* If we're passing some reference to our BO out to some other part of
          * the system, then we can't do any optimizations about only us being
@@ -323,18 +389,35 @@ v3d_resource_get_handle(struct pipe_screen *pscreen,
          */
         bo->private = false;
 
+        if (rsc->tiled) {
+                /* A shared tiled buffer should always be allocated as UIF,
+                 * not UBLINEAR or LT.
+                 */
+                assert(rsc->slices[0].tiling == VC5_TILING_UIF_XOR ||
+                       rsc->slices[0].tiling == VC5_TILING_UIF_NO_XOR);
+                whandle->modifier = DRM_FORMAT_MOD_BROADCOM_UIF;
+        } else {
+                whandle->modifier = DRM_FORMAT_MOD_LINEAR;
+        }
+
         switch (whandle->type) {
         case WINSYS_HANDLE_TYPE_SHARED:
                 return v3d_bo_flink(bo, &whandle->handle);
         case WINSYS_HANDLE_TYPE_KMS:
+                if (screen->ro) {
+                        assert(rsc->scanout);
+                        bool ok = renderonly_get_handle(rsc->scanout, whandle);
+                        whandle->stride = rsc->slices[0].stride;
+                        return ok;
+                }
                 whandle->handle = bo->handle;
-                return TRUE;
+                return true;
         case WINSYS_HANDLE_TYPE_FD:
                 whandle->handle = v3d_bo_get_dmabuf(bo);
                 return whandle->handle != -1;
         }
 
-        return FALSE;
+        return false;
 }
 
 #define PAGE_UB_ROWS (VC5_UIFCFG_PAGE_SIZE / VC5_UIFBLOCK_ROW_SIZE)
@@ -384,7 +467,8 @@ v3d_get_ub_pad(struct v3d_resource *rsc, uint32_t height)
 }
 
 static void
-v3d_setup_slices(struct v3d_resource *rsc)
+v3d_setup_slices(struct v3d_resource *rsc, uint32_t winsys_stride,
+                 bool uif_top)
 {
         struct pipe_resource *prsc = &rsc->base;
         uint32_t width = prsc->width0;
@@ -406,10 +490,17 @@ v3d_setup_slices(struct v3d_resource *rsc)
         uint32_t block_width = util_format_get_blockwidth(prsc->format);
         uint32_t block_height = util_format_get_blockheight(prsc->format);
         bool msaa = prsc->nr_samples > 1;
+
         /* MSAA textures/renderbuffers are always laid out as single-level
          * UIF.
          */
-        bool uif_top = msaa;
+        uif_top |= msaa;
+
+        /* Check some easy mistakes to make in a resource_create() call that
+         * will break our setup.
+         */
+        assert(prsc->array_size != 0);
+        assert(prsc->depth0 != 0);
 
         for (int i = prsc->last_level; i >= 0; i--) {
                 struct v3d_resource_slice *slice = &rsc->slices[i];
@@ -486,7 +577,10 @@ v3d_setup_slices(struct v3d_resource *rsc)
                 }
 
                 slice->offset = offset;
-                slice->stride = level_width * rsc->cpp;
+                if (winsys_stride)
+                        slice->stride = winsys_stride;
+                else
+                        slice->stride = level_width * rsc->cpp;
                 slice->padded_height = level_height;
                 slice->size = level_height * slice->stride;
 
@@ -599,26 +693,15 @@ v3d_resource_setup(struct pipe_screen *pscreen,
         return rsc;
 }
 
-static bool
-find_modifier(uint64_t needle, const uint64_t *haystack, int count)
-{
-        int i;
-
-        for (i = 0; i < count; i++) {
-                if (haystack[i] == needle)
-                        return true;
-        }
-
-        return false;
-}
-
 static struct pipe_resource *
 v3d_resource_create_with_modifiers(struct pipe_screen *pscreen,
                                    const struct pipe_resource *tmpl,
                                    const uint64_t *modifiers,
                                    int count)
 {
-        bool linear_ok = find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count);
+        struct v3d_screen *screen = v3d_screen(pscreen);
+
+        bool linear_ok = drm_find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count);
         struct v3d_resource *rsc = v3d_resource_setup(pscreen, tmpl);
         struct pipe_resource *prsc = &rsc->base;
         /* Use a tiled layout if we can, for better 3D performance. */
@@ -645,26 +728,74 @@ v3d_resource_create_with_modifiers(struct pipe_screen *pscreen,
             tmpl->bind & (PIPE_BIND_SHARED | PIPE_BIND_SCANOUT))
                 should_tile = false;
 
+        /* If using the old-school SCANOUT flag, we don't know what the screen
+         * might support other than linear. Just force linear.
+         */
+        if (tmpl->bind & PIPE_BIND_SCANOUT)
+                should_tile = false;
+
         /* No user-specified modifier; determine our own. */
         if (count == 1 && modifiers[0] == DRM_FORMAT_MOD_INVALID) {
                 linear_ok = true;
                 rsc->tiled = should_tile;
         } else if (should_tile &&
-                   find_modifier(DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED,
+                   drm_find_modifier(DRM_FORMAT_MOD_BROADCOM_UIF,
                                  modifiers, count)) {
                 rsc->tiled = true;
         } else if (linear_ok) {
                 rsc->tiled = false;
         } else {
                 fprintf(stderr, "Unsupported modifier requested\n");
-                return NULL;
+                goto fail;
         }
 
         rsc->internal_format = prsc->format;
 
-        v3d_setup_slices(rsc);
-        if (!v3d_resource_bo_alloc(rsc))
-                goto fail;
+        v3d_setup_slices(rsc, 0, tmpl->bind & PIPE_BIND_SHARED);
+
+        /* If we're in a renderonly setup, use the other device to perform our
+         * allocation and just import it to v3d.  The other device may be
+         * using CMA, and V3D can import from CMA but doesn't do CMA
+         * allocations on its own.
+         *
+         * We always allocate this way for SHARED, because get_handle will
+         * need a resource on the display fd.
+         */
+        if (screen->ro && (tmpl->bind & (PIPE_BIND_SCANOUT |
+                                         PIPE_BIND_SHARED))) {
+                struct winsys_handle handle;
+                struct pipe_resource scanout_tmpl = {
+                        .target = prsc->target,
+                        .format = PIPE_FORMAT_RGBA8888_UNORM,
+                        .width0 = 1024, /* one page */
+                        .height0 = align(rsc->size, 4096) / 4096,
+                        .depth0 = 1,
+                        .array_size = 1,
+                };
+
+                rsc->scanout =
+                        renderonly_scanout_for_resource(&scanout_tmpl,
+                                                        screen->ro,
+                                                        &handle);
+
+                if (!rsc->scanout) {
+                        fprintf(stderr, "Failed to create scanout resource\n");
+                        return NULL;
+                }
+                assert(handle.type == WINSYS_HANDLE_TYPE_FD);
+                rsc->bo = v3d_bo_open_dmabuf(screen, handle.handle);
+                close(handle.handle);
+
+                if (!rsc->bo)
+                        goto fail;
+
+                v3d_debug_resource_layout(rsc, "renderonly");
+
+                return prsc;
+        } else {
+                if (!v3d_resource_bo_alloc(rsc))
+                        goto fail;
+        }
 
         return prsc;
 fail:
@@ -696,10 +827,14 @@ v3d_resource_from_handle(struct pipe_screen *pscreen,
 
         switch (whandle->modifier) {
         case DRM_FORMAT_MOD_LINEAR:
-        case DRM_FORMAT_MOD_INVALID:
                 rsc->tiled = false;
                 break;
-        /* XXX: UIF */
+        case DRM_FORMAT_MOD_BROADCOM_UIF:
+                rsc->tiled = true;
+                break;
+        case DRM_FORMAT_MOD_INVALID:
+                rsc->tiled = screen->ro == NULL;
+                break;
         default:
                 fprintf(stderr,
                         "Attempt to import unsupported modifier 0x%llx\n",
@@ -707,21 +842,12 @@ v3d_resource_from_handle(struct pipe_screen *pscreen,
                 goto fail;
         }
 
-        if (whandle->offset != 0) {
-                fprintf(stderr,
-                        "Attempt to import unsupported winsys offset %u\n",
-                        whandle->offset);
-                goto fail;
-        }
-
         switch (whandle->type) {
         case WINSYS_HANDLE_TYPE_SHARED:
-                rsc->bo = v3d_bo_open_name(screen,
-                                           whandle->handle, whandle->stride);
+                rsc->bo = v3d_bo_open_name(screen, whandle->handle);
                 break;
         case WINSYS_HANDLE_TYPE_FD:
-                rsc->bo = v3d_bo_open_dmabuf(screen,
-                                             whandle->handle, whandle->stride);
+                rsc->bo = v3d_bo_open_dmabuf(screen, whandle->handle);
                 break;
         default:
                 fprintf(stderr,
@@ -735,10 +861,45 @@ v3d_resource_from_handle(struct pipe_screen *pscreen,
 
         rsc->internal_format = prsc->format;
 
-        v3d_setup_slices(rsc);
+        v3d_setup_slices(rsc, whandle->stride, true);
         v3d_debug_resource_layout(rsc, "import");
 
-        if (whandle->stride != slice->stride) {
+        if (whandle->offset != 0) {
+                if (rsc->tiled) {
+                        fprintf(stderr,
+                                "Attempt to import unsupported winsys offset %u\n",
+                                whandle->offset);
+                        goto fail;
+                }
+                rsc->slices[0].offset += whandle->offset;
+
+                if (rsc->slices[0].offset + rsc->slices[0].size >
+                    rsc->bo->size) {
+                        fprintf(stderr, "Attempt to import "
+                                "with overflowing offset (%d + %d > %d)\n",
+                                whandle->offset,
+                                rsc->slices[0].size,
+                                rsc->bo->size);
+                         goto fail;
+                 }
+        }
+
+        if (screen->ro) {
+                /* Make sure that renderonly has a handle to our buffer in the
+                 * display's fd, so that a later renderonly_get_handle()
+                 * returns correct handles or GEM names.
+                 */
+                rsc->scanout =
+                        renderonly_create_gpu_import_for_resource(prsc,
+                                                                  screen->ro,
+                                                                  NULL);
+                if (!rsc->scanout) {
+                        fprintf(stderr, "Failed to create scanout resource.\n");
+                        goto fail;
+                }
+        }
+
+        if (rsc->tiled && whandle->stride != slice->stride) {
                 static bool warned = false;
                 if (!warned) {
                         warned = true;
@@ -751,6 +912,8 @@ v3d_resource_from_handle(struct pipe_screen *pscreen,
                                 slice->stride);
                 }
                 goto fail;
+        } else if (!rsc->tiled) {
+                slice->stride = whandle->stride;
         }
 
         return prsc;
@@ -760,6 +923,62 @@ fail:
         return NULL;
 }
 
+void
+v3d_update_shadow_texture(struct pipe_context *pctx,
+                          struct pipe_sampler_view *pview)
+{
+        struct v3d_context *v3d = v3d_context(pctx);
+        struct v3d_sampler_view *view = v3d_sampler_view(pview);
+        struct v3d_resource *shadow = v3d_resource(view->texture);
+        struct v3d_resource *orig = v3d_resource(pview->texture);
+
+        assert(view->texture != pview->texture);
+
+        if (shadow->writes == orig->writes && orig->bo->private)
+                return;
+
+        perf_debug("Updating %dx%d@%d shadow for linear texture\n",
+                   orig->base.width0, orig->base.height0,
+                   pview->u.tex.first_level);
+
+        for (int i = 0; i <= shadow->base.last_level; i++) {
+                unsigned width = u_minify(shadow->base.width0, i);
+                unsigned height = u_minify(shadow->base.height0, i);
+                struct pipe_blit_info info = {
+                        .dst = {
+                                .resource = &shadow->base,
+                                .level = i,
+                                .box = {
+                                        .x = 0,
+                                        .y = 0,
+                                        .z = 0,
+                                        .width = width,
+                                        .height = height,
+                                        .depth = 1,
+                                },
+                                .format = shadow->base.format,
+                        },
+                        .src = {
+                                .resource = &orig->base,
+                                .level = pview->u.tex.first_level + i,
+                                .box = {
+                                        .x = 0,
+                                        .y = 0,
+                                        .z = 0,
+                                        .width = width,
+                                        .height = height,
+                                        .depth = 1,
+                                },
+                                .format = orig->base.format,
+                        },
+                        .mask = util_format_get_mask(orig->base.format),
+                };
+                pctx->blit(pctx, &info);
+        }
+
+        shadow->writes = orig->writes;
+}
+
 static struct pipe_surface *
 v3d_create_surface(struct pipe_context *pctx,
                    struct pipe_resource *ptex,
@@ -796,6 +1015,12 @@ v3d_create_surface(struct pipe_context *pctx,
 
         surface->format = v3d_get_rt_format(&screen->devinfo, psurf->format);
 
+        const struct util_format_description *desc =
+                util_format_description(psurf->format);
+
+        surface->swap_rb = (desc->swizzle[0] == PIPE_SWIZZLE_Z &&
+                            psurf->format != PIPE_FORMAT_B5G6R5_UNORM);
+
         if (util_format_is_depth_or_stencil(psurf->format)) {
                 switch (psurf->format) {
                 case PIPE_FORMAT_Z16_UNORM:
@@ -895,7 +1120,8 @@ v3d_resource_screen_init(struct pipe_screen *pscreen)
         pscreen->resource_get_handle = v3d_resource_get_handle;
         pscreen->resource_destroy = u_transfer_helper_resource_destroy;
         pscreen->transfer_helper = u_transfer_helper_create(&transfer_vtbl,
-                                                            true, true, true);
+                                                            true, false,
+                                                            true, true);
 }
 
 void
@@ -905,10 +1131,11 @@ v3d_resource_context_init(struct pipe_context *pctx)
         pctx->transfer_flush_region = u_transfer_helper_transfer_flush_region;
         pctx->transfer_unmap = u_transfer_helper_transfer_unmap;
         pctx->buffer_subdata = u_default_buffer_subdata;
-        pctx->texture_subdata = u_default_texture_subdata;
+        pctx->texture_subdata = v3d_texture_subdata;
         pctx->create_surface = v3d_create_surface;
         pctx->surface_destroy = v3d_surface_destroy;
         pctx->resource_copy_region = util_resource_copy_region;
         pctx->blit = v3d_blit;
+        pctx->generate_mipmap = v3d_generate_mipmap;
         pctx->flush_resource = v3d_flush_resource;
 }