panfrost: split index cache into shared part
[mesa.git] / src / gallium / drivers / panfrost / pan_context.c
index f234d9435c367c58755ddf0db84a83d45bd4a75f..630f6753fd413f149005768309c1a996041b7823 100644 (file)
@@ -29,7 +29,7 @@
 
 #include "pan_bo.h"
 #include "pan_context.h"
-#include "pan_format.h"
+#include "pan_minmax_cache.h"
 #include "panfrost-quirks.h"
 
 #include "util/macros.h"
@@ -431,7 +431,11 @@ panfrost_stage_attributes(struct panfrost_context *ctx)
         };
 
         /* See mali_attr_meta specification for the magic number */
+
+        builtin.index = so->vertexid_index;
         memcpy(&target[PAN_VERTEX_ID], &builtin, 4);
+
+        builtin.index = so->vertexid_index + 1;
         memcpy(&target[PAN_INSTANCE_ID], &builtin, 4);
 
         ctx->payloads[PIPE_SHADER_VERTEX].postfix.attribute_meta = transfer.gpu;
@@ -476,95 +480,19 @@ panfrost_upload_tex(
 
         struct pipe_sampler_view *pview = &view->base;
         struct panfrost_resource *rsrc = pan_resource(pview->texture);
-        mali_ptr descriptor_gpu;
-        void *descriptor;
-
-        /* Do we interleave an explicit stride with every element? */
-
-        bool has_manual_stride = view->manual_stride;
-
-        /* For easy access */
-
-        bool is_buffer = pview->target == PIPE_BUFFER;
-        unsigned first_level = is_buffer ? 0 : pview->u.tex.first_level;
-        unsigned last_level  = is_buffer ? 0 : pview->u.tex.last_level;
-        unsigned first_layer = is_buffer ? 0 : pview->u.tex.first_layer;
-        unsigned last_layer  = is_buffer ? 0 : pview->u.tex.last_layer;
-        unsigned first_face  = 0;
-        unsigned last_face   = 0;
-        unsigned face_mult   = 1;
-
-        /* Cubemaps have 6 faces as layers in between each actual layer.
-         * There's a bit of an impedence mismatch between Gallium and the
-         * hardware, let's fixup for it */
-
-        if (pview->target == PIPE_TEXTURE_CUBE || pview->target == PIPE_TEXTURE_CUBE_ARRAY) {
-                /* TODO: logic wrong in the asserted out cases ... can they happen? */
-
-                first_face = first_layer % 6;
-                last_face = last_layer % 6;
-                first_layer /= 6;
-                last_layer /= 6;
-
-                assert((first_layer == last_layer) || (first_face == 0 && last_face == 5));
-                face_mult = 6;
-        }
-
-        /* Lower-bit is set when sampling from colour AFBC */
-        bool is_afbc = rsrc->layout == MALI_TEXTURE_AFBC;
-        bool is_zs = rsrc->base.bind & PIPE_BIND_DEPTH_STENCIL;
-        unsigned afbc_bit = (is_afbc && !is_zs) ? 1 : 0;
 
         /* Add the BO to the job so it's retained until the job is done. */
         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
+
         panfrost_batch_add_bo(batch, rsrc->bo,
                               PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
                               panfrost_bo_access_for_stage(st));
 
-        /* Add the usage flags in, since they can change across the CSO
-         * lifetime due to layout switches */
-
-        view->hw.format.layout = rsrc->layout;
-        view->hw.format.manual_stride = has_manual_stride;
-
-        /* Inject the addresses in, interleaving array indices, mip levels,
-         * cube faces, and strides in that order */
-
-        unsigned idx = 0;
-        unsigned levels = 1 + last_level - first_level;
-        unsigned layers = 1 + last_layer - first_layer;
-        unsigned faces  = 1 + last_face  - first_face;
-        unsigned num_elements = levels * layers * faces;
-        if (has_manual_stride)
-                num_elements *= 2;
-
-        descriptor = malloc(sizeof(struct mali_texture_descriptor) +
-                            sizeof(mali_ptr) * num_elements);
-        memcpy(descriptor, &view->hw, sizeof(struct mali_texture_descriptor));
-
-        mali_ptr *pointers_and_strides = descriptor +
-                                         sizeof(struct mali_texture_descriptor);
-
-        for (unsigned w = first_layer; w <= last_layer; ++w) {
-                for (unsigned l = first_level; l <= last_level; ++l) {
-                        for (unsigned f = first_face; f <= last_face; ++f) {
-                                pointers_and_strides[idx++] =
-                                        panfrost_get_texture_address(rsrc, l, w * face_mult + f)
-                                                + afbc_bit + view->astc_stretch;
-                                if (has_manual_stride) {
-                                        pointers_and_strides[idx++] =
-                                                rsrc->slices[l].stride;
-                                }
-                        }
-                }
-        }
-
-        descriptor_gpu = panfrost_upload_transient(batch, descriptor,
-                                  sizeof(struct mali_texture_descriptor) +
-                                          num_elements * sizeof(*pointers_and_strides));
-        free(descriptor);
+        panfrost_batch_add_bo(batch, view->bo,
+                              PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
+                              panfrost_bo_access_for_stage(st));
 
-        return descriptor_gpu;
+        return view->bo->gpu;
 }
 
 static void
@@ -1329,15 +1257,27 @@ panfrost_translate_index_size(unsigned size)
 }
 
 /* Gets a GPU address for the associated index buffer. Only gauranteed to be
- * good for the duration of the draw (transient), could last longer */
+ * good for the duration of the draw (transient), could last longer. Also get
+ * the bounds on the index buffer for the range accessed by the draw. We do
+ * these operations together because there are natural optimizations which
+ * require them to be together. */
 
 static mali_ptr
-panfrost_get_index_buffer_mapped(struct panfrost_context *ctx, const struct pipe_draw_info *info)
+panfrost_get_index_buffer_bounded(struct panfrost_context *ctx, const struct pipe_draw_info *info, unsigned *min_index, unsigned *max_index)
 {
         struct panfrost_resource *rsrc = (struct panfrost_resource *) (info->index.resource);
 
         off_t offset = info->start * info->index_size;
         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
+        mali_ptr out = 0;
+
+        bool needs_indices = true;
+
+        if (info->max_index != ~0u) {
+                *min_index = info->min_index;
+                *max_index = info->max_index;
+                needs_indices = false;
+        }
 
         if (!info->has_user_indices) {
                 /* Only resources can be directly mapped */
@@ -1345,12 +1285,29 @@ panfrost_get_index_buffer_mapped(struct panfrost_context *ctx, const struct pipe
                                       PAN_BO_ACCESS_SHARED |
                                       PAN_BO_ACCESS_READ |
                                       PAN_BO_ACCESS_VERTEX_TILER);
-                return rsrc->bo->gpu + offset;
+                out = rsrc->bo->gpu + offset;
+
+                /* Check the cache */
+                needs_indices = !panfrost_minmax_cache_get(rsrc->index_cache, info->start, info->count,
+                                                           min_index, max_index);
         } else {
                 /* Otherwise, we need to upload to transient memory */
                 const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
-                return panfrost_upload_transient(batch, ibuf8 + offset, info->count * info->index_size);
+                out = panfrost_upload_transient(batch, ibuf8 + offset, info->count * info->index_size);
+        }
+
+        if (needs_indices) {
+                /* Fallback */
+                u_vbuf_get_minmax_index(&ctx->base, info, min_index, max_index);
+
+                if (!info->has_user_indices) {
+                        panfrost_minmax_cache_add(rsrc->index_cache, info->start, info->count,
+                                                  *min_index, *max_index);
+                }
         }
+
+
+        return out;
 }
 
 static bool
@@ -1468,18 +1425,9 @@ panfrost_draw_vbo(
         panfrost_statistics_record(ctx, info);
 
         if (info->index_size) {
-                /* Calculate the min/max index used so we can figure out how
-                 * many times to invoke the vertex shader */
-
-                /* Fetch / calculate index bounds */
                 unsigned min_index = 0, max_index = 0;
-
-                if (info->max_index == ~0u) {
-                        u_vbuf_get_minmax_index(pipe, info, &min_index, &max_index);
-                } else {
-                        min_index = info->min_index;
-                        max_index = info->max_index;
-                }
+                ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.indices =
+                        panfrost_get_index_buffer_bounded(ctx, info, &min_index, &max_index);
 
                 /* Use the corresponding values */
                 vertex_count = max_index - min_index + 1;
@@ -1490,7 +1438,6 @@ panfrost_draw_vbo(
                 ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.index_count = MALI_POSITIVE(info->count);
 
                 draw_flags |= panfrost_translate_index_size(info->index_size);
-                ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.indices = panfrost_get_index_buffer_mapped(ctx, info);
         } else {
                 /* Index count == vertex count, if no indexing is applied, as
                  * if it is internally indexed in the expected order */
@@ -2053,29 +2000,14 @@ panfrost_translate_texture_type(enum pipe_texture_target t) {
         }
 }
 
-static uint8_t
-panfrost_compute_astc_stretch(
-        const struct util_format_description *desc)
-{
-        unsigned width = desc->block.width;
-        unsigned height = desc->block.height;
-        assert(width >= 4 && width <= 12);
-        assert(height >= 4 && height <= 12);
-        if (width == 12)
-                width = 11;
-        if (height == 12)
-                height = 11;
-        return ((height - 4) * 8) + (width - 4);
-}
-
 static struct pipe_sampler_view *
 panfrost_create_sampler_view(
         struct pipe_context *pctx,
         struct pipe_resource *texture,
         const struct pipe_sampler_view *template)
 {
+        struct panfrost_screen *screen = pan_screen(pctx->screen);
         struct panfrost_sampler_view *so = rzalloc(pctx, struct panfrost_sampler_view);
-        int bytes_per_pixel = util_format_get_blocksize(texture->format);
 
         pipe_reference(NULL, &texture->reference);
 
@@ -2087,12 +2019,6 @@ panfrost_create_sampler_view(
         so->base.reference.count = 1;
         so->base.context = pctx;
 
-        /* sampler_views correspond to texture descriptors, minus the texture
-         * (data) itself. So, we serialise the descriptor here and cache it for
-         * later. */
-
-        const struct util_format_description *desc = util_format_description(prsrc->base.format);
-
         unsigned char user_swizzle[4] = {
                 template->swizzle_r,
                 template->swizzle_g,
@@ -2100,32 +2026,6 @@ panfrost_create_sampler_view(
                 template->swizzle_a
         };
 
-        enum mali_format format = panfrost_find_format(desc);
-
-        if (format == MALI_ASTC_HDR_SUPP || format == MALI_ASTC_SRGB_SUPP)
-                so->astc_stretch = panfrost_compute_astc_stretch(desc);
-
-        /* Check if we need to set a custom stride by computing the "expected"
-         * stride and comparing it to what the BO actually wants. Only applies
-         * to linear textures, since tiled/compressed textures have strict
-         * alignment requirements for their strides as it is */
-
-        unsigned first_level = template->u.tex.first_level;
-        unsigned last_level = template->u.tex.last_level;
-
-        if (prsrc->layout == MALI_TEXTURE_LINEAR) {
-                for (unsigned l = first_level; l <= last_level; ++l) {
-                        unsigned actual_stride = prsrc->slices[l].stride;
-                        unsigned width = u_minify(texture->width0, l);
-                        unsigned comp_stride = width * bytes_per_pixel;
-
-                        if (comp_stride != actual_stride) {
-                                so->manual_stride = true;
-                                break;
-                        }
-                }
-        }
-
         /* In the hardware, array_size refers specifically to array textures,
          * whereas in Gallium, it also covers cubemaps */
 
@@ -2137,26 +2037,32 @@ panfrost_create_sampler_view(
                 array_size /= 6;
         }
 
-        struct mali_texture_descriptor texture_descriptor = {
-                .width = MALI_POSITIVE(u_minify(texture->width0, first_level)),
-                .height = MALI_POSITIVE(u_minify(texture->height0, first_level)),
-                .depth = MALI_POSITIVE(u_minify(texture->depth0, first_level)),
-                .array_size = MALI_POSITIVE(array_size),
-
-                .format = {
-                        .swizzle = panfrost_translate_swizzle_4(desc->swizzle),
-                        .format = format,
-                        .srgb = desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB,
-                        .type = panfrost_translate_texture_type(template->target),
-                        .unknown2 = 0x1,
-                },
-
-                .swizzle = panfrost_translate_swizzle_4(user_swizzle)
-        };
-
-        texture_descriptor.levels = last_level - first_level;
-
-        so->hw = texture_descriptor;
+        enum mali_texture_type type =
+                panfrost_translate_texture_type(template->target);
+
+        unsigned size = panfrost_estimate_texture_size(
+                        template->u.tex.first_level,
+                        template->u.tex.last_level,
+                        template->u.tex.first_layer,
+                        template->u.tex.last_layer,
+                        type, prsrc->layout);
+
+        so->bo = panfrost_bo_create(screen, size, 0);
+
+        panfrost_new_texture(
+                        so->bo->cpu,
+                        texture->width0, texture->height0,
+                        texture->depth0, array_size,
+                        texture->format,
+                        type, prsrc->layout,
+                        template->u.tex.first_level,
+                        template->u.tex.last_level,
+                        template->u.tex.first_layer,
+                        template->u.tex.last_layer,
+                        prsrc->cubemap_stride,
+                        panfrost_translate_swizzle_4(user_swizzle),
+                        prsrc->bo->gpu,
+                        prsrc->slices);
 
         return (struct pipe_sampler_view *) so;
 }
@@ -2191,9 +2097,12 @@ panfrost_set_sampler_views(
 static void
 panfrost_sampler_view_destroy(
         struct pipe_context *pctx,
-        struct pipe_sampler_view *view)
+        struct pipe_sampler_view *pview)
 {
-        pipe_resource_reference(&view->texture, NULL);
+        struct panfrost_sampler_view *view = (struct panfrost_sampler_view *) pview;
+
+        pipe_resource_reference(&pview->texture, NULL);
+        panfrost_bo_unreference(view->bo);
         ralloc_free(view);
 }