panfrost: Rewrite texture descriptor creation logic
authorAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Tue, 18 Feb 2020 19:20:16 +0000 (14:20 -0500)
committerAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Fri, 21 Feb 2020 12:27:05 +0000 (07:27 -0500)
Rather than creating partially within the Gallium create function and
monkeypatching on draw time with code split across N different files
with tight Gallium dependencies, let's streamline everything into a
series of maintainable routines in mesa/src/panfrost with no Gallium
dependencies, doing the entire texture creation in one-shot and thus
adding absolutely zero draw-time overhead (since we can allocate a BO
for the descriptor and upload ahead-of-time, so switching textures is as
cheap as switching pointers).

Was this worth it? You know, I'm not sure :|

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3858>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3858>

src/gallium/drivers/panfrost/pan_context.c
src/gallium/drivers/panfrost/pan_context.h
src/gallium/drivers/panfrost/pan_resource.c
src/panfrost/encoder/pan_texture.c
src/panfrost/encoder/pan_texture.h

index 0d69036b9a22ea92961118aaebe2d2d0a007755e..ff00c2129bf1ee76acca620f704e0c5a40e00ccf 100644 (file)
@@ -475,95 +475,19 @@ panfrost_upload_tex(
 
         struct pipe_sampler_view *pview = &view->base;
         struct panfrost_resource *rsrc = pan_resource(pview->texture);
-        mali_ptr descriptor_gpu;
-        void *descriptor;
-
-        /* Do we interleave an explicit stride with every element? */
-
-        bool has_manual_stride = view->manual_stride;
-
-        /* For easy access */
-
-        bool is_buffer = pview->target == PIPE_BUFFER;
-        unsigned first_level = is_buffer ? 0 : pview->u.tex.first_level;
-        unsigned last_level  = is_buffer ? 0 : pview->u.tex.last_level;
-        unsigned first_layer = is_buffer ? 0 : pview->u.tex.first_layer;
-        unsigned last_layer  = is_buffer ? 0 : pview->u.tex.last_layer;
-        unsigned first_face  = 0;
-        unsigned last_face   = 0;
-        unsigned face_mult   = 1;
-
-        /* Cubemaps have 6 faces as layers in between each actual layer.
-         * There's a bit of an impedence mismatch between Gallium and the
-         * hardware, let's fixup for it */
-
-        if (pview->target == PIPE_TEXTURE_CUBE || pview->target == PIPE_TEXTURE_CUBE_ARRAY) {
-                /* TODO: logic wrong in the asserted out cases ... can they happen? */
-
-                first_face = first_layer % 6;
-                last_face = last_layer % 6;
-                first_layer /= 6;
-                last_layer /= 6;
-
-                assert((first_layer == last_layer) || (first_face == 0 && last_face == 5));
-                face_mult = 6;
-        }
-
-        /* Lower-bit is set when sampling from colour AFBC */
-        bool is_afbc = rsrc->layout == MALI_TEXTURE_AFBC;
-        bool is_zs = rsrc->base.bind & PIPE_BIND_DEPTH_STENCIL;
-        unsigned afbc_bit = (is_afbc && !is_zs) ? 1 : 0;
 
         /* Add the BO to the job so it's retained until the job is done. */
         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
+
         panfrost_batch_add_bo(batch, rsrc->bo,
                               PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
                               panfrost_bo_access_for_stage(st));
 
-        /* Add the usage flags in, since they can change across the CSO
-         * lifetime due to layout switches */
-
-        view->hw.format.layout = rsrc->layout;
-        view->hw.format.manual_stride = has_manual_stride;
-
-        /* Inject the addresses in, interleaving array indices, mip levels,
-         * cube faces, and strides in that order */
-
-        unsigned idx = 0;
-        unsigned levels = 1 + last_level - first_level;
-        unsigned layers = 1 + last_layer - first_layer;
-        unsigned faces  = 1 + last_face  - first_face;
-        unsigned num_elements = levels * layers * faces;
-        if (has_manual_stride)
-                num_elements *= 2;
-
-        descriptor = malloc(sizeof(struct mali_texture_descriptor) +
-                            sizeof(mali_ptr) * num_elements);
-        memcpy(descriptor, &view->hw, sizeof(struct mali_texture_descriptor));
-
-        mali_ptr *pointers_and_strides = descriptor +
-                                         sizeof(struct mali_texture_descriptor);
-
-        for (unsigned w = first_layer; w <= last_layer; ++w) {
-                for (unsigned l = first_level; l <= last_level; ++l) {
-                        for (unsigned f = first_face; f <= last_face; ++f) {
-                                pointers_and_strides[idx++] =
-                                        panfrost_get_texture_address(rsrc, l, w * face_mult + f)
-                                                + afbc_bit + view->astc_stretch;
-                                if (has_manual_stride) {
-                                        pointers_and_strides[idx++] =
-                                                rsrc->slices[l].stride;
-                                }
-                        }
-                }
-        }
-
-        descriptor_gpu = panfrost_upload_transient(batch, descriptor,
-                                  sizeof(struct mali_texture_descriptor) +
-                                          num_elements * sizeof(*pointers_and_strides));
-        free(descriptor);
+        panfrost_batch_add_bo(batch, view->bo,
+                              PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
+                              panfrost_bo_access_for_stage(st));
 
-        return descriptor_gpu;
+        return view->bo->gpu;
 }
 
 static void
@@ -2052,29 +1976,14 @@ panfrost_translate_texture_type(enum pipe_texture_target t) {
         }
 }
 
-static uint8_t
-panfrost_compute_astc_stretch(
-        const struct util_format_description *desc)
-{
-        unsigned width = desc->block.width;
-        unsigned height = desc->block.height;
-        assert(width >= 4 && width <= 12);
-        assert(height >= 4 && height <= 12);
-        if (width == 12)
-                width = 11;
-        if (height == 12)
-                height = 11;
-        return ((height - 4) * 8) + (width - 4);
-}
-
 static struct pipe_sampler_view *
 panfrost_create_sampler_view(
         struct pipe_context *pctx,
         struct pipe_resource *texture,
         const struct pipe_sampler_view *template)
 {
+        struct panfrost_screen *screen = pan_screen(pctx->screen);
         struct panfrost_sampler_view *so = rzalloc(pctx, struct panfrost_sampler_view);
-        int bytes_per_pixel = util_format_get_blocksize(texture->format);
 
         pipe_reference(NULL, &texture->reference);
 
@@ -2086,12 +1995,6 @@ panfrost_create_sampler_view(
         so->base.reference.count = 1;
         so->base.context = pctx;
 
-        /* sampler_views correspond to texture descriptors, minus the texture
-         * (data) itself. So, we serialise the descriptor here and cache it for
-         * later. */
-
-        const struct util_format_description *desc = util_format_description(prsrc->base.format);
-
         unsigned char user_swizzle[4] = {
                 template->swizzle_r,
                 template->swizzle_g,
@@ -2099,32 +2002,6 @@ panfrost_create_sampler_view(
                 template->swizzle_a
         };
 
-        enum mali_format format = panfrost_find_format(desc);
-
-        if (format == MALI_ASTC_HDR_SUPP || format == MALI_ASTC_SRGB_SUPP)
-                so->astc_stretch = panfrost_compute_astc_stretch(desc);
-
-        /* Check if we need to set a custom stride by computing the "expected"
-         * stride and comparing it to what the BO actually wants. Only applies
-         * to linear textures, since tiled/compressed textures have strict
-         * alignment requirements for their strides as it is */
-
-        unsigned first_level = template->u.tex.first_level;
-        unsigned last_level = template->u.tex.last_level;
-
-        if (prsrc->layout == MALI_TEXTURE_LINEAR) {
-                for (unsigned l = first_level; l <= last_level; ++l) {
-                        unsigned actual_stride = prsrc->slices[l].stride;
-                        unsigned width = u_minify(texture->width0, l);
-                        unsigned comp_stride = width * bytes_per_pixel;
-
-                        if (comp_stride != actual_stride) {
-                                so->manual_stride = true;
-                                break;
-                        }
-                }
-        }
-
         /* In the hardware, array_size refers specifically to array textures,
          * whereas in Gallium, it also covers cubemaps */
 
@@ -2136,26 +2013,32 @@ panfrost_create_sampler_view(
                 array_size /= 6;
         }
 
-        struct mali_texture_descriptor texture_descriptor = {
-                .width = MALI_POSITIVE(u_minify(texture->width0, first_level)),
-                .height = MALI_POSITIVE(u_minify(texture->height0, first_level)),
-                .depth = MALI_POSITIVE(u_minify(texture->depth0, first_level)),
-                .array_size = MALI_POSITIVE(array_size),
-
-                .format = {
-                        .swizzle = panfrost_translate_swizzle_4(desc->swizzle),
-                        .format = format,
-                        .srgb = desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB,
-                        .type = panfrost_translate_texture_type(template->target),
-                        .unknown2 = 0x1,
-                },
-
-                .swizzle = panfrost_translate_swizzle_4(user_swizzle)
-        };
-
-        texture_descriptor.levels = last_level - first_level;
-
-        so->hw = texture_descriptor;
+        enum mali_texture_type type =
+                panfrost_translate_texture_type(template->target);
+
+        unsigned size = panfrost_estimate_texture_size(
+                        template->u.tex.first_level,
+                        template->u.tex.last_level,
+                        template->u.tex.first_layer,
+                        template->u.tex.last_layer,
+                        type, prsrc->layout);
+
+        so->bo = panfrost_bo_create(screen, size, 0);
+
+        panfrost_new_texture(
+                        so->bo->cpu,
+                        texture->width0, texture->height0,
+                        texture->depth0, array_size,
+                        texture->format,
+                        type, prsrc->layout,
+                        template->u.tex.first_level,
+                        template->u.tex.last_level,
+                        template->u.tex.first_layer,
+                        template->u.tex.last_layer,
+                        prsrc->cubemap_stride,
+                        panfrost_translate_swizzle_4(user_swizzle),
+                        prsrc->bo->gpu,
+                        prsrc->slices);
 
         return (struct pipe_sampler_view *) so;
 }
@@ -2190,9 +2073,12 @@ panfrost_set_sampler_views(
 static void
 panfrost_sampler_view_destroy(
         struct pipe_context *pctx,
-        struct pipe_sampler_view *view)
+        struct pipe_sampler_view *pview)
 {
-        pipe_resource_reference(&view->texture, NULL);
+        struct panfrost_sampler_view *view = (struct panfrost_sampler_view *) pview;
+
+        pipe_resource_reference(&pview->texture, NULL);
+        panfrost_bo_unreference(view->bo);
         ralloc_free(view);
 }
 
index 1a5702613638e95e893868e70d3f49492245decb..375569d7c735288e531c303edd50b98f353e0115 100644 (file)
@@ -262,9 +262,7 @@ struct panfrost_sampler_state {
 
 struct panfrost_sampler_view {
         struct pipe_sampler_view base;
-        struct mali_texture_descriptor hw;
-        uint8_t astc_stretch;
-        bool manual_stride;
+        struct panfrost_bo *bo;
 };
 
 static inline struct panfrost_context *
index 5c90a5e9b4ceef9fad47624100a58d6abff73a02..d4c134f6246166c5c1abb7c4ab98a58549459692 100644 (file)
@@ -533,15 +533,6 @@ panfrost_resource_destroy(struct pipe_screen *screen,
         ralloc_free(rsrc);
 }
 
-static unsigned
-panfrost_get_layer_stride(struct panfrost_resource *rsrc, unsigned level)
-{
-        if (rsrc->base.target == PIPE_TEXTURE_3D)
-                return rsrc->slices[level].size0;
-        else
-                return rsrc->cubemap_stride;
-}
-
 static void *
 panfrost_transfer_map(struct pipe_context *pctx,
                       struct pipe_resource *resource,
@@ -645,7 +636,9 @@ panfrost_transfer_map(struct pipe_context *pctx,
                 return transfer->map;
         } else {
                 transfer->base.stride = rsrc->slices[level].stride;
-                transfer->base.layer_stride = panfrost_get_layer_stride(rsrc, level);
+                transfer->base.layer_stride = panfrost_get_layer_stride(
+                                rsrc->slices, rsrc->base.target == PIPE_TEXTURE_3D,
+                                rsrc->cubemap_stride, level);
 
                 /* By mapping direct-write, we're implicitly already
                  * initialized (maybe), so be conservative */
@@ -792,10 +785,8 @@ panfrost_get_texture_address(
         struct panfrost_resource *rsrc,
         unsigned level, unsigned face)
 {
-        unsigned level_offset = rsrc->slices[level].offset;
-        unsigned face_offset = face * panfrost_get_layer_stride(rsrc, level);
-
-        return rsrc->bo->gpu + level_offset + face_offset;
+        bool is_3d = rsrc->base.target == PIPE_TEXTURE_3D;
+        return rsrc->bo->gpu + panfrost_texture_offset(rsrc->slices, is_3d, rsrc->cubemap_stride, level, face);
 }
 
 /* Given a resource that has already been allocated, hint that it should use a
@@ -856,6 +847,8 @@ panfrost_resource_hint_layout(
                 panfrost_bo_unreference(rsrc->bo);
                 rsrc->bo = panfrost_bo_create(screen, new_size, PAN_BO_DELAY_MMAP);
         }
+
+        /* TODO: If there are textures bound, regenerate their descriptors */
 }
 
 static void
index 8e4b00e9055b205f644c13038770cd5b1ad58b93..0c92464af084c3f051ffcdd334c347f616c98265 100644 (file)
  */
 
 #include "util/macros.h"
+#include "util/u_math.h"
 #include "pan_texture.h"
 
+/* Generates a texture descriptor. Ideally, descriptors are immutable after the
+ * texture is created, so we can keep these hanging around in GPU memory in a
+ * dedicated BO and not have to worry. In practice there are some minor gotchas
+ * with this (the driver sometimes will change the format of a texture on the
+ * fly for compression) but it's fast enough to just regenerate the descriptor
+ * in those cases, rather than monkeypatching at drawtime.
+ *
+ * A texture descriptor consists of a 32-byte mali_texture_descriptor structure
+ * followed by a variable number of pointers. Due to this variance and
+ * potentially large size, we actually upload directly rather than returning
+ * the descriptor. Whether the user does a copy themselves or not is irrelevant
+ * to us here.
+ */
+
+/* Check if we need to set a custom stride by computing the "expected"
+ * stride and comparing it to what the user actually wants. Only applies
+ * to linear textures, since tiled/compressed textures have strict
+ * alignment requirements for their strides as it is */
+
+static bool
+panfrost_needs_explicit_stride(
+                struct panfrost_slice *slices,
+                uint16_t width,
+                unsigned first_level, unsigned last_level,
+                unsigned bytes_per_pixel)
+{
+        for (unsigned l = first_level; l <= last_level; ++l) {
+                unsigned actual = slices[l].stride;
+                unsigned expected = u_minify(width, l) * bytes_per_pixel;
+
+                if (actual != expected)
+                        return true;
+        }
+
+        return false;
+}
+
+/* A Scalable Texture Compression (ASTC) corresponds to just a few texture type
+ * in the hardware, but in fact can be parametrized to have various widths and
+ * heights for the so-called "stretch factor". It turns out these parameters
+ * are stuffed in the bottom bits of the payload pointers. This functions
+ * computes these magic stuffing constants based on the ASTC format in use. The
+ * constant in a given dimension is 3-bits, and two are stored side-by-side for
+ * each active dimension.
+ */
+
+static unsigned
+panfrost_astc_stretch(unsigned dim)
+{
+        assert(dim >= 4 && dim <= 12);
+        return MIN2(dim, 11) - 4;
+}
+
+/* Texture addresses are tagged with information about AFBC (colour AFBC?) xor
+ * ASTC (stretch factor) if in use. */
+
+static unsigned
+panfrost_compression_tag(
+                const struct util_format_description *desc,
+                enum mali_format format, enum mali_texture_layout layout)
+{
+        if (layout == MALI_TEXTURE_AFBC)
+                return util_format_has_depth(desc) ? 0x0 : 0x1;
+        else if (format == MALI_ASTC_HDR_SUPP || format == MALI_ASTC_SRGB_SUPP)
+                return (panfrost_astc_stretch(desc->block.height) << 3) |
+                        panfrost_astc_stretch(desc->block.width);
+        else
+                return 0;
+}
+
+
+/* Cubemaps have 6 faces as "layers" in between each actual layer. We
+ * need to fix this up. TODO: logic wrong in the asserted out cases ...
+ * can they happen, perhaps from cubemap arrays? */
+
+static void
+panfrost_adjust_cube_dimensions(
+                unsigned *first_face, unsigned *last_face,
+                unsigned *first_layer, unsigned *last_layer)
+{
+        *first_face = *first_layer % 6;
+        *last_face = *last_layer % 6;
+        *first_layer /= 6;
+        *last_layer /= 6;
+
+        assert((*first_layer == *last_layer) || (*first_face == 0 && *last_face == 5));
+}
+
+/* Following the texture descriptor is a number of pointers. How many? */
+
+static unsigned
+panfrost_texture_num_elements(
+                unsigned first_level, unsigned last_level,
+                unsigned first_layer, unsigned last_layer,
+                bool is_cube, bool manual_stride)
+{
+        unsigned first_face  = 0, last_face = 0;
+
+        if (is_cube) {
+                panfrost_adjust_cube_dimensions(&first_face, &last_face,
+                                &first_layer, &last_layer);
+        }
+
+        unsigned levels = 1 + last_level - first_level;
+        unsigned layers = 1 + last_layer - first_layer;
+        unsigned faces  = 1 + last_face  - first_face;
+        unsigned num_elements = levels * layers * faces;
+
+        if (manual_stride)
+                num_elements *= 2;
+
+        return num_elements;
+}
+
+/* Conservative estimate of the size of the texture descriptor a priori.
+ * Average case, size equal to the actual size. Worst case, off by 2x (if
+ * a manual stride is not needed on a linear texture). Returned value
+ * must be greater than or equal to the actual size, so it's safe to use
+ * as an allocation amount */
+
+unsigned
+panfrost_estimate_texture_size(
+                unsigned first_level, unsigned last_level,
+                unsigned first_layer, unsigned last_layer,
+                enum mali_texture_type type, enum mali_texture_layout layout)
+{
+        /* Assume worst case */
+        unsigned manual_stride = (layout == MALI_TEXTURE_LINEAR);
+
+        unsigned elements = panfrost_texture_num_elements(
+                        first_level, last_level,
+                        first_layer, last_layer,
+                        type == MALI_TEX_CUBE, manual_stride);
+
+        return sizeof(struct mali_texture_descriptor) +
+                sizeof(mali_ptr) * elements;
+}
+
+void
+panfrost_new_texture(
+        void *out,
+        uint16_t width, uint16_t height,
+        uint16_t depth, uint16_t array_size,
+        enum pipe_format format,
+        enum mali_texture_type type,
+        enum mali_texture_layout layout,
+        unsigned first_level, unsigned last_level,
+        unsigned first_layer, unsigned last_layer,
+        unsigned cube_stride,
+        unsigned swizzle,
+        mali_ptr base,
+        struct panfrost_slice *slices)
+{
+        const struct util_format_description *desc =
+                util_format_description(format);
+
+        unsigned bytes_per_pixel = util_format_get_blocksize(format);
+
+        enum mali_format mali_format = panfrost_find_format(desc);
+
+        bool manual_stride = (layout == MALI_TEXTURE_LINEAR)
+                && panfrost_needs_explicit_stride(slices, width,
+                                first_level, last_level, bytes_per_pixel);
+
+        struct mali_texture_descriptor descriptor = {
+                .width = MALI_POSITIVE(u_minify(width, first_level)),
+                .height = MALI_POSITIVE(u_minify(height, first_level)),
+                .depth = MALI_POSITIVE(u_minify(depth, first_level)),
+                .array_size = MALI_POSITIVE(array_size),
+                .format = {
+                        .swizzle = panfrost_translate_swizzle_4(desc->swizzle),
+                        .format = mali_format,
+                        .srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB),
+                        .type = type,
+                        .layout = layout,
+                        .manual_stride = manual_stride,
+                        .unknown2 = 1,
+                },
+                .levels = last_level - first_level,
+                .swizzle = swizzle
+        };
+
+        memcpy(out, &descriptor, sizeof(descriptor));
+
+        base |= panfrost_compression_tag(desc, mali_format, layout);
+
+        /* Inject the addresses in, interleaving array indices, mip levels,
+         * cube faces, and strides in that order */
+
+        unsigned first_face  = 0, last_face = 0, face_mult = 1;
+
+        if (type == MALI_TEX_CUBE) {
+                face_mult = 6;
+                panfrost_adjust_cube_dimensions(&first_face, &last_face, &first_layer, &last_layer);
+        }
+
+        mali_ptr *payload = (mali_ptr *) (out + sizeof(struct mali_texture_descriptor));
+        unsigned idx = 0;
+
+        for (unsigned w = first_layer; w <= last_layer; ++w) {
+                for (unsigned l = first_level; l <= last_level; ++l) {
+                        for (unsigned f = first_face; f <= last_face; ++f) {
+                                payload[idx++] = base + panfrost_texture_offset(
+                                                slices, type == MALI_TEX_3D,
+                                                cube_stride, l, w * face_mult + f);
+
+                                if (manual_stride)
+                                        payload[idx++] = slices[l].stride;
+                        }
+                }
+        }
+}
+
 /* Computes sizes for checksumming, which is 8 bytes per 16x16 tile.
  * Checksumming is believed to be a CRC variant (CRC64 based on the size?).
  * This feature is also known as "transaction elimination". */
@@ -52,3 +266,19 @@ panfrost_compute_checksum_size(
 
         return slice->checksum_stride * tile_count_y;
 }
+
+unsigned
+panfrost_get_layer_stride(struct panfrost_slice *slices, bool is_3d, unsigned cube_stride, unsigned level)
+{
+        return is_3d ? slices[level].size0 : cube_stride;
+}
+
+/* Computes the offset into a texture at a particular level/face. Add to
+ * the base address of a texture to get the address to that level/face */
+
+unsigned
+panfrost_texture_offset(struct panfrost_slice *slices, bool is_3d, unsigned cube_stride, unsigned level, unsigned face)
+{
+        unsigned layer_stride = panfrost_get_layer_stride(slices, is_3d, cube_stride, level);
+        return slices[level].offset + (face * layer_stride);
+}
index 29be91cca830a7fff0b89b5e1d182bd214dc6b14..241e1f8b4e67052b86c8d922da773b16206f0b59 100644 (file)
@@ -30,6 +30,7 @@
 
 #include <stdbool.h>
 #include "util/format/u_format.h"
+#include "panfrost-job.h"
 
 struct panfrost_slice {
         unsigned offset;
@@ -63,6 +64,36 @@ panfrost_format_supports_afbc(enum pipe_format format);
 unsigned
 panfrost_afbc_header_size(unsigned width, unsigned height);
 
+/* mali_texture_descriptor */
+
+unsigned
+panfrost_estimate_texture_size(
+                unsigned first_level, unsigned last_level,
+                unsigned first_layer, unsigned last_layer,
+                enum mali_texture_type type, enum mali_texture_layout layout);
+
+void
+panfrost_new_texture(
+        void *out,
+        uint16_t width, uint16_t height,
+        uint16_t depth, uint16_t array_size,
+        enum pipe_format format,
+        enum mali_texture_type type,
+        enum mali_texture_layout layout,
+        unsigned first_level, unsigned last_level,
+        unsigned first_layer, unsigned last_layer,
+        unsigned cube_stride,
+        unsigned swizzle,
+        mali_ptr base,
+        struct panfrost_slice *slices);
+
+
+unsigned
+panfrost_get_layer_stride(struct panfrost_slice *slices, bool is_3d, unsigned cube_stride, unsigned level);
+
+unsigned
+panfrost_texture_offset(struct panfrost_slice *slices, bool is_3d, unsigned cube_stride, unsigned level, unsigned face);
+
 /* Formats */
 
 enum mali_format