panfrost: split index cache into shared part
[mesa.git] / src / gallium / drivers / panfrost / pan_context.c
index 60ccf66f8415e962ca92dbd329c24ea29f3c7949..630f6753fd413f149005768309c1a996041b7823 100644 (file)
@@ -29,7 +29,7 @@
 
 #include "pan_bo.h"
 #include "pan_context.h"
-#include "pan_format.h"
+#include "pan_minmax_cache.h"
 #include "panfrost-quirks.h"
 
 #include "util/macros.h"
@@ -177,12 +177,6 @@ panfrost_invalidate_frame(struct panfrost_context *ctx)
         for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i)
                 ctx->payloads[i].postfix.shared_memory = 0;
 
-        if (ctx->rasterizer)
-                ctx->dirty |= PAN_DIRTY_RASTERIZER;
-
-        /* XXX */
-        ctx->dirty |= PAN_DIRTY_SAMPLERS | PAN_DIRTY_TEXTURES;
-
         /* TODO: When does this need to be handled? */
         ctx->active_queries = true;
 }
@@ -437,7 +431,11 @@ panfrost_stage_attributes(struct panfrost_context *ctx)
         };
 
         /* See mali_attr_meta specification for the magic number */
+
+        builtin.index = so->vertexid_index;
         memcpy(&target[PAN_VERTEX_ID], &builtin, 4);
+
+        builtin.index = so->vertexid_index + 1;
         memcpy(&target[PAN_INSTANCE_ID], &builtin, 4);
 
         ctx->payloads[PIPE_SHADER_VERTEX].postfix.attribute_meta = transfer.gpu;
@@ -471,21 +469,6 @@ panfrost_upload_sampler_descriptors(struct panfrost_context *ctx)
         }
 }
 
-static enum mali_texture_layout
-panfrost_layout_for_texture(struct panfrost_resource *rsrc)
-{
-        switch (rsrc->layout) {
-        case PAN_AFBC:
-                return MALI_TEXTURE_AFBC;
-        case PAN_TILED:
-                return MALI_TEXTURE_TILED;
-        case PAN_LINEAR:
-                return MALI_TEXTURE_LINEAR;
-        default:
-                unreachable("Invalid texture layout");
-        }
-}
-
 static mali_ptr
 panfrost_upload_tex(
         struct panfrost_context *ctx,
@@ -497,95 +480,19 @@ panfrost_upload_tex(
 
         struct pipe_sampler_view *pview = &view->base;
         struct panfrost_resource *rsrc = pan_resource(pview->texture);
-        mali_ptr descriptor_gpu;
-        void *descriptor;
-
-        /* Do we interleave an explicit stride with every element? */
-
-        bool has_manual_stride = view->manual_stride;
-
-        /* For easy access */
-
-        bool is_buffer = pview->target == PIPE_BUFFER;
-        unsigned first_level = is_buffer ? 0 : pview->u.tex.first_level;
-        unsigned last_level  = is_buffer ? 0 : pview->u.tex.last_level;
-        unsigned first_layer = is_buffer ? 0 : pview->u.tex.first_layer;
-        unsigned last_layer  = is_buffer ? 0 : pview->u.tex.last_layer;
-        unsigned first_face  = 0;
-        unsigned last_face   = 0;
-        unsigned face_mult   = 1;
-
-        /* Cubemaps have 6 faces as layers in between each actual layer.
-         * There's a bit of an impedence mismatch between Gallium and the
-         * hardware, let's fixup for it */
-
-        if (pview->target == PIPE_TEXTURE_CUBE || pview->target == PIPE_TEXTURE_CUBE_ARRAY) {
-                /* TODO: logic wrong in the asserted out cases ... can they happen? */
-
-                first_face = first_layer % 6;
-                last_face = last_layer % 6;
-                first_layer /= 6;
-                last_layer /= 6;
-
-                assert((first_layer == last_layer) || (first_face == 0 && last_face == 5));
-                face_mult = 6;
-        }
-
-        /* Lower-bit is set when sampling from colour AFBC */
-        bool is_afbc = rsrc->layout == PAN_AFBC;
-        bool is_zs = rsrc->base.bind & PIPE_BIND_DEPTH_STENCIL;
-        unsigned afbc_bit = (is_afbc && !is_zs) ? 1 : 0;
 
         /* Add the BO to the job so it's retained until the job is done. */
         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
+
         panfrost_batch_add_bo(batch, rsrc->bo,
                               PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
                               panfrost_bo_access_for_stage(st));
 
-        /* Add the usage flags in, since they can change across the CSO
-         * lifetime due to layout switches */
-
-        view->hw.format.layout = panfrost_layout_for_texture(rsrc);
-        view->hw.format.manual_stride = has_manual_stride;
-
-        /* Inject the addresses in, interleaving array indices, mip levels,
-         * cube faces, and strides in that order */
-
-        unsigned idx = 0;
-        unsigned levels = 1 + last_level - first_level;
-        unsigned layers = 1 + last_layer - first_layer;
-        unsigned faces  = 1 + last_face  - first_face;
-        unsigned num_elements = levels * layers * faces;
-        if (has_manual_stride)
-                num_elements *= 2;
-
-        descriptor = malloc(sizeof(struct mali_texture_descriptor) +
-                            sizeof(mali_ptr) * num_elements);
-        memcpy(descriptor, &view->hw, sizeof(struct mali_texture_descriptor));
-
-        mali_ptr *pointers_and_strides = descriptor +
-                                         sizeof(struct mali_texture_descriptor);
-
-        for (unsigned w = first_layer; w <= last_layer; ++w) {
-                for (unsigned l = first_level; l <= last_level; ++l) {
-                        for (unsigned f = first_face; f <= last_face; ++f) {
-                                pointers_and_strides[idx++] =
-                                        panfrost_get_texture_address(rsrc, l, w * face_mult + f)
-                                                + afbc_bit + view->astc_stretch;
-                                if (has_manual_stride) {
-                                        pointers_and_strides[idx++] =
-                                                rsrc->slices[l].stride;
-                                }
-                        }
-                }
-        }
-
-        descriptor_gpu = panfrost_upload_transient(batch, descriptor,
-                                  sizeof(struct mali_texture_descriptor) +
-                                          num_elements * sizeof(*pointers_and_strides));
-        free(descriptor);
+        panfrost_batch_add_bo(batch, view->bo,
+                              PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
+                              panfrost_bo_access_for_stage(st));
 
-        return descriptor_gpu;
+        return view->bo->gpu;
 }
 
 static void
@@ -861,9 +768,9 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
                 panfrost_emit_varying_descriptor(ctx, total_count);
         }
 
-        bool msaa = ctx->rasterizer->base.multisample;
 
-        if (ctx->dirty & PAN_DIRTY_RASTERIZER) {
+        if (ctx->rasterizer) {
+                bool msaa = ctx->rasterizer->base.multisample;
                 ctx->payloads[PIPE_SHADER_FRAGMENT].gl_enables = ctx->rasterizer->tiler_gl_enables;
 
                 /* TODO: Sample size */
@@ -881,7 +788,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
         panfrost_patch_shader_state(ctx, PIPE_SHADER_VERTEX);
         panfrost_patch_shader_state(ctx, PIPE_SHADER_COMPUTE);
 
-        if (ctx->dirty & (PAN_DIRTY_RASTERIZER | PAN_DIRTY_VS)) {
+        if (ctx->shader[PIPE_SHADER_VERTEX] && ctx->shader[PIPE_SHADER_FRAGMENT]) {
                 /* Check if we need to link the gl_PointSize varying */
                 if (!panfrost_writes_point_size(ctx)) {
                         /* If the size is constant, write it out. Otherwise,
@@ -896,12 +803,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
                 }
         }
 
-        /* TODO: Maybe dirty track FS, maybe not. For now, it's transient. */
-        if (ctx->shader[PIPE_SHADER_FRAGMENT])
-                ctx->dirty |= PAN_DIRTY_FS;
-
-        if (ctx->dirty & PAN_DIRTY_FS) {
-                assert(ctx->shader[PIPE_SHADER_FRAGMENT]);
+        if (ctx->shader[PIPE_SHADER_FRAGMENT]) {
                 struct panfrost_shader_state *variant = &ctx->shader[PIPE_SHADER_FRAGMENT]->variants[ctx->shader[PIPE_SHADER_FRAGMENT]->active_variant];
 
                 panfrost_patch_shader_state(ctx, PIPE_SHADER_FRAGMENT);
@@ -1049,11 +951,8 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
         if (ctx->vertex)
                 panfrost_stage_attributes(ctx);
 
-        if (ctx->dirty & PAN_DIRTY_SAMPLERS)
-                panfrost_upload_sampler_descriptors(ctx);
-
-        if (ctx->dirty & PAN_DIRTY_TEXTURES)
-                panfrost_upload_texture_descriptors(ctx);
+        panfrost_upload_sampler_descriptors(ctx);
+        panfrost_upload_texture_descriptors(ctx);
 
         const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
 
@@ -1097,12 +996,11 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
                 unsigned ubo_count = panfrost_ubo_count(ctx, i);
                 assert(ubo_count >= 1);
 
-                size_t sz = sizeof(struct mali_uniform_buffer_meta) * ubo_count;
-                struct mali_uniform_buffer_meta ubos[PAN_MAX_CONST_BUFFERS];
+                size_t sz = sizeof(uint64_t) * ubo_count;
+                uint64_t ubos[PAN_MAX_CONST_BUFFERS];
 
                 /* Upload uniforms as a UBO */
-                ubos[0].size = MALI_POSITIVE((2 + uniform_count));
-                ubos[0].ptr = transfer.gpu >> 2;
+                ubos[0] = MALI_MAKE_UBO(2 + uniform_count, transfer.gpu);
 
                 /* The rest are honest-to-goodness UBOs */
 
@@ -1114,9 +1012,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
 
                         if (!enabled || empty) {
                                 /* Stub out disabled UBOs to catch accesses */
-
-                                ubos[ubo].size = 0;
-                                ubos[ubo].ptr = 0xDEAD0000;
+                                ubos[ubo] = MALI_MAKE_UBO(0, 0xDEAD0000);
                                 continue;
                         }
 
@@ -1124,10 +1020,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
 
                         unsigned bytes_per_field = 16;
                         unsigned aligned = ALIGN_POT(usz, bytes_per_field);
-                        unsigned fields = aligned / bytes_per_field;
-
-                        ubos[ubo].size = MALI_POSITIVE(fields);
-                        ubos[ubo].ptr = gpu >> 2;
+                        ubos[ubo] = MALI_MAKE_UBO(aligned / bytes_per_field, gpu);
                 }
 
                 mali_ptr ubufs = panfrost_upload_transient(batch, ubos, sz);
@@ -1233,8 +1126,6 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
                 panfrost_upload_transient(batch,
                                           &view,
                                           sizeof(struct mali_viewport));
-
-        ctx->dirty = 0;
 }
 
 /* Corresponds to exactly one draw, but does not submit anything */
@@ -1366,15 +1257,27 @@ panfrost_translate_index_size(unsigned size)
 }
 
 /* Gets a GPU address for the associated index buffer. Only gauranteed to be
- * good for the duration of the draw (transient), could last longer */
+ * good for the duration of the draw (transient), could last longer. Also get
+ * the bounds on the index buffer for the range accessed by the draw. We do
+ * these operations together because there are natural optimizations which
+ * require them to be together. */
 
 static mali_ptr
-panfrost_get_index_buffer_mapped(struct panfrost_context *ctx, const struct pipe_draw_info *info)
+panfrost_get_index_buffer_bounded(struct panfrost_context *ctx, const struct pipe_draw_info *info, unsigned *min_index, unsigned *max_index)
 {
         struct panfrost_resource *rsrc = (struct panfrost_resource *) (info->index.resource);
 
         off_t offset = info->start * info->index_size;
         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
+        mali_ptr out = 0;
+
+        bool needs_indices = true;
+
+        if (info->max_index != ~0u) {
+                *min_index = info->min_index;
+                *max_index = info->max_index;
+                needs_indices = false;
+        }
 
         if (!info->has_user_indices) {
                 /* Only resources can be directly mapped */
@@ -1382,12 +1285,29 @@ panfrost_get_index_buffer_mapped(struct panfrost_context *ctx, const struct pipe
                                       PAN_BO_ACCESS_SHARED |
                                       PAN_BO_ACCESS_READ |
                                       PAN_BO_ACCESS_VERTEX_TILER);
-                return rsrc->bo->gpu + offset;
+                out = rsrc->bo->gpu + offset;
+
+                /* Check the cache */
+                needs_indices = !panfrost_minmax_cache_get(rsrc->index_cache, info->start, info->count,
+                                                           min_index, max_index);
         } else {
                 /* Otherwise, we need to upload to transient memory */
                 const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
-                return panfrost_upload_transient(batch, ibuf8 + offset, info->count * info->index_size);
+                out = panfrost_upload_transient(batch, ibuf8 + offset, info->count * info->index_size);
+        }
+
+        if (needs_indices) {
+                /* Fallback */
+                u_vbuf_get_minmax_index(&ctx->base, info, min_index, max_index);
+
+                if (!info->has_user_indices) {
+                        panfrost_minmax_cache_add(rsrc->index_cache, info->start, info->count,
+                                                  *min_index, *max_index);
+                }
         }
+
+
+        return out;
 }
 
 static bool
@@ -1505,18 +1425,9 @@ panfrost_draw_vbo(
         panfrost_statistics_record(ctx, info);
 
         if (info->index_size) {
-                /* Calculate the min/max index used so we can figure out how
-                 * many times to invoke the vertex shader */
-
-                /* Fetch / calculate index bounds */
                 unsigned min_index = 0, max_index = 0;
-
-                if (info->max_index == ~0u) {
-                        u_vbuf_get_minmax_index(pipe, info, &min_index, &max_index);
-                } else {
-                        min_index = info->min_index;
-                        max_index = info->max_index;
-                }
+                ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.indices =
+                        panfrost_get_index_buffer_bounded(ctx, info, &min_index, &max_index);
 
                 /* Use the corresponding values */
                 vertex_count = max_index - min_index + 1;
@@ -1526,10 +1437,7 @@ panfrost_draw_vbo(
                 ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.offset_bias_correction = -min_index;
                 ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.index_count = MALI_POSITIVE(info->count);
 
-                //assert(!info->restart_index); /* TODO: Research */
-
                 draw_flags |= panfrost_translate_index_size(info->index_size);
-                ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.indices = panfrost_get_index_buffer_mapped(ctx, info);
         } else {
                 /* Index count == vertex count, if no indexing is applied, as
                  * if it is internally indexed in the expected order */
@@ -1627,13 +1535,11 @@ panfrost_bind_rasterizer_state(
 {
         struct panfrost_context *ctx = pan_context(pctx);
 
-        /* TODO: Why can't rasterizer be NULL ever? Other drivers are fine.. */
+        ctx->rasterizer = hwcso;
+
         if (!hwcso)
                 return;
 
-        ctx->rasterizer = hwcso;
-        ctx->dirty |= PAN_DIRTY_RASTERIZER;
-
         ctx->fragment_shader_core.depth_units = ctx->rasterizer->base.offset_units * 2.0f;
         ctx->fragment_shader_core.depth_factor = ctx->rasterizer->base.offset_scale;
 
@@ -1688,9 +1594,7 @@ panfrost_bind_vertex_elements_state(
         void *hwcso)
 {
         struct panfrost_context *ctx = pan_context(pctx);
-
         ctx->vertex = hwcso;
-        ctx->dirty |= PAN_DIRTY_VERTEX;
 }
 
 static void *
@@ -1822,8 +1726,6 @@ panfrost_bind_sampler_states(
         /* XXX: Should upload, not just copy? */
         ctx->sampler_count[shader] = num_sampler;
         memcpy(ctx->samplers[shader], sampler, num_sampler * sizeof (void *));
-
-        ctx->dirty |= PAN_DIRTY_SAMPLERS;
 }
 
 static bool
@@ -1917,14 +1819,8 @@ panfrost_bind_shader_state(
         enum pipe_shader_type type)
 {
         struct panfrost_context *ctx = pan_context(pctx);
-
         ctx->shader[type] = hwcso;
 
-        if (type == PIPE_SHADER_FRAGMENT)
-                ctx->dirty |= PAN_DIRTY_FS;
-        else
-                ctx->dirty |= PAN_DIRTY_VS;
-
         if (!hwcso) return;
 
         /* Match the appropriate variant */
@@ -2076,9 +1972,6 @@ panfrost_set_stencil_ref(
 {
         struct panfrost_context *ctx = pan_context(pctx);
         ctx->stencil_ref = *ref;
-
-        /* Shader core dirty */
-        ctx->dirty |= PAN_DIRTY_FS;
 }
 
 static enum mali_texture_type
@@ -2107,29 +2000,14 @@ panfrost_translate_texture_type(enum pipe_texture_target t) {
         }
 }
 
-static uint8_t
-panfrost_compute_astc_stretch(
-        const struct util_format_description *desc)
-{
-        unsigned width = desc->block.width;
-        unsigned height = desc->block.height;
-        assert(width >= 4 && width <= 12);
-        assert(height >= 4 && height <= 12);
-        if (width == 12)
-                width = 11;
-        if (height == 12)
-                height = 11;
-        return ((height - 4) * 8) + (width - 4);
-}
-
 static struct pipe_sampler_view *
 panfrost_create_sampler_view(
         struct pipe_context *pctx,
         struct pipe_resource *texture,
         const struct pipe_sampler_view *template)
 {
+        struct panfrost_screen *screen = pan_screen(pctx->screen);
         struct panfrost_sampler_view *so = rzalloc(pctx, struct panfrost_sampler_view);
-        int bytes_per_pixel = util_format_get_blocksize(texture->format);
 
         pipe_reference(NULL, &texture->reference);
 
@@ -2141,12 +2019,6 @@ panfrost_create_sampler_view(
         so->base.reference.count = 1;
         so->base.context = pctx;
 
-        /* sampler_views correspond to texture descriptors, minus the texture
-         * (data) itself. So, we serialise the descriptor here and cache it for
-         * later. */
-
-        const struct util_format_description *desc = util_format_description(prsrc->base.format);
-
         unsigned char user_swizzle[4] = {
                 template->swizzle_r,
                 template->swizzle_g,
@@ -2154,32 +2026,6 @@ panfrost_create_sampler_view(
                 template->swizzle_a
         };
 
-        enum mali_format format = panfrost_find_format(desc);
-
-        if (format == MALI_ASTC_HDR_SUPP || format == MALI_ASTC_SRGB_SUPP)
-                so->astc_stretch = panfrost_compute_astc_stretch(desc);
-
-        /* Check if we need to set a custom stride by computing the "expected"
-         * stride and comparing it to what the BO actually wants. Only applies
-         * to linear textures, since tiled/compressed textures have strict
-         * alignment requirements for their strides as it is */
-
-        unsigned first_level = template->u.tex.first_level;
-        unsigned last_level = template->u.tex.last_level;
-
-        if (prsrc->layout == PAN_LINEAR) {
-                for (unsigned l = first_level; l <= last_level; ++l) {
-                        unsigned actual_stride = prsrc->slices[l].stride;
-                        unsigned width = u_minify(texture->width0, l);
-                        unsigned comp_stride = width * bytes_per_pixel;
-
-                        if (comp_stride != actual_stride) {
-                                so->manual_stride = true;
-                                break;
-                        }
-                }
-        }
-
         /* In the hardware, array_size refers specifically to array textures,
          * whereas in Gallium, it also covers cubemaps */
 
@@ -2191,26 +2037,32 @@ panfrost_create_sampler_view(
                 array_size /= 6;
         }
 
-        struct mali_texture_descriptor texture_descriptor = {
-                .width = MALI_POSITIVE(u_minify(texture->width0, first_level)),
-                .height = MALI_POSITIVE(u_minify(texture->height0, first_level)),
-                .depth = MALI_POSITIVE(u_minify(texture->depth0, first_level)),
-                .array_size = MALI_POSITIVE(array_size),
-
-                .format = {
-                        .swizzle = panfrost_translate_swizzle_4(desc->swizzle),
-                        .format = format,
-                        .srgb = desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB,
-                        .type = panfrost_translate_texture_type(template->target),
-                        .unknown2 = 0x1,
-                },
-
-                .swizzle = panfrost_translate_swizzle_4(user_swizzle)
-        };
-
-        texture_descriptor.levels = last_level - first_level;
-
-        so->hw = texture_descriptor;
+        enum mali_texture_type type =
+                panfrost_translate_texture_type(template->target);
+
+        unsigned size = panfrost_estimate_texture_size(
+                        template->u.tex.first_level,
+                        template->u.tex.last_level,
+                        template->u.tex.first_layer,
+                        template->u.tex.last_layer,
+                        type, prsrc->layout);
+
+        so->bo = panfrost_bo_create(screen, size, 0);
+
+        panfrost_new_texture(
+                        so->bo->cpu,
+                        texture->width0, texture->height0,
+                        texture->depth0, array_size,
+                        texture->format,
+                        type, prsrc->layout,
+                        template->u.tex.first_level,
+                        template->u.tex.last_level,
+                        template->u.tex.first_layer,
+                        template->u.tex.last_layer,
+                        prsrc->cubemap_stride,
+                        panfrost_translate_swizzle_4(user_swizzle),
+                        prsrc->bo->gpu,
+                        prsrc->slices);
 
         return (struct pipe_sampler_view *) so;
 }
@@ -2240,16 +2092,17 @@ panfrost_set_sampler_views(
                                            NULL);
         }
         ctx->sampler_view_count[shader] = new_nr;
-
-        ctx->dirty |= PAN_DIRTY_TEXTURES;
 }
 
 static void
 panfrost_sampler_view_destroy(
         struct pipe_context *pctx,
-        struct pipe_sampler_view *view)
+        struct pipe_sampler_view *pview)
 {
-        pipe_resource_reference(&view->texture, NULL);
+        struct panfrost_sampler_view *view = (struct panfrost_sampler_view *) pview;
+
+        pipe_resource_reference(&pview->texture, NULL);
+        panfrost_bo_unreference(view->bo);
         ralloc_free(view);
 }
 
@@ -2282,14 +2135,14 @@ panfrost_hint_afbc(
         for (unsigned i = 0; i < fb->nr_cbufs; ++i) {
                 struct pipe_surface *surf = fb->cbufs[i];
                 struct panfrost_resource *rsrc = pan_resource(surf->texture);
-                panfrost_resource_hint_layout(screen, rsrc, PAN_AFBC, 1);
+                panfrost_resource_hint_layout(screen, rsrc, MALI_TEXTURE_AFBC, 1);
         }
 
         /* Also hint it to the depth buffer */
 
         if (fb->zsbuf) {
                 struct panfrost_resource *rsrc = pan_resource(fb->zsbuf->texture);
-                panfrost_resource_hint_layout(screen, rsrc, PAN_AFBC, 1);
+                panfrost_resource_hint_layout(screen, rsrc, MALI_TEXTURE_AFBC, 1);
         }
 }
 
@@ -2355,8 +2208,6 @@ panfrost_bind_depth_stencil_state(struct pipe_context *pipe,
 
         /* Bounds test not implemented */
         assert(!depth_stencil->depth.bounds_test);
-
-        ctx->dirty |= PAN_DIRTY_FS;
 }
 
 static void