panfrost: split index cache into shared part
[mesa.git] / src / gallium / drivers / panfrost / pan_context.c
index 417c900c961a2b23010e6d66aa35220aa0017ff0..630f6753fd413f149005768309c1a996041b7823 100644 (file)
@@ -29,7 +29,7 @@
 
 #include "pan_bo.h"
 #include "pan_context.h"
-#include "pan_format.h"
+#include "pan_minmax_cache.h"
 #include "panfrost-quirks.h"
 
 #include "util/macros.h"
@@ -141,6 +141,9 @@ panfrost_clear(
         panfrost_batch_clear(batch, buffers, color, depth, stencil);
 }
 
+/* TODO: Bifrost requires just a mali_shared_memory, without the rest of the
+ * framebuffer */
+
 static void
 panfrost_attach_vt_framebuffer(struct panfrost_context *ctx)
 {
@@ -152,7 +155,7 @@ panfrost_attach_vt_framebuffer(struct panfrost_context *ctx)
         if (!batch->framebuffer.gpu) {
                 unsigned size = (screen->quirks & MIDGARD_SFBD) ?
                         sizeof(struct mali_single_framebuffer) :
-                        sizeof(struct bifrost_framebuffer);
+                        sizeof(struct mali_framebuffer);
 
                 batch->framebuffer = panfrost_allocate_transient(batch, size);
 
@@ -162,7 +165,7 @@ panfrost_attach_vt_framebuffer(struct panfrost_context *ctx)
         }
 
         for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i)
-                ctx->payloads[i].postfix.framebuffer = batch->framebuffer.gpu;
+                ctx->payloads[i].postfix.shared_memory = batch->framebuffer.gpu;
 }
 
 /* Reset per-frame context, called on context initialisation as well as after
@@ -172,13 +175,7 @@ void
 panfrost_invalidate_frame(struct panfrost_context *ctx)
 {
         for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i)
-                ctx->payloads[i].postfix.framebuffer = 0;
-
-        if (ctx->rasterizer)
-                ctx->dirty |= PAN_DIRTY_RASTERIZER;
-
-        /* XXX */
-        ctx->dirty |= PAN_DIRTY_SAMPLERS | PAN_DIRTY_TEXTURES;
+                ctx->payloads[i].postfix.shared_memory = 0;
 
         /* TODO: When does this need to be handled? */
         ctx->active_queries = true;
@@ -356,38 +353,6 @@ panfrost_default_shader_backend(struct panfrost_context *ctx)
         memcpy(&ctx->fragment_shader_core, &shader, sizeof(shader));
 }
 
-/* Generates a vertex/tiler job. This is, in some sense, the heart of the
- * graphics command stream. It should be called once per draw, accordding to
- * presentations. Set is_tiler for "tiler" jobs (fragment shader jobs, but in
- * Mali parlance, "fragment" refers to framebuffer writeout). Clear it for
- * vertex jobs. */
-
-struct panfrost_transfer
-panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler)
-{
-        struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
-        struct mali_job_descriptor_header job = {
-                .job_type = is_tiler ? JOB_TYPE_TILER : JOB_TYPE_VERTEX,
-                .job_descriptor_size = 1,
-        };
-
-        struct midgard_payload_vertex_tiler *payload = is_tiler ? &ctx->payloads[PIPE_SHADER_FRAGMENT] : &ctx->payloads[PIPE_SHADER_VERTEX];
-
-        struct panfrost_transfer transfer = panfrost_allocate_transient(batch, sizeof(job) + sizeof(*payload));
-        memcpy(transfer.cpu, &job, sizeof(job));
-        memcpy(transfer.cpu + sizeof(job), payload, sizeof(*payload));
-        return transfer;
-}
-
-mali_ptr
-panfrost_vertex_buffer_address(struct panfrost_context *ctx, unsigned i)
-{
-        struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[i];
-        struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer.resource);
-
-        return rsrc->bo->gpu + buf->buffer_offset;
-}
-
 static bool
 panfrost_writes_point_size(struct panfrost_context *ctx)
 {
@@ -439,25 +404,39 @@ panfrost_stage_attributes(struct panfrost_context *ctx)
         for (unsigned i = 0; i < so->num_elements; ++i) {
                 unsigned vbi = so->pipe[i].vertex_buffer_index;
                 struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
-                mali_ptr addr = panfrost_vertex_buffer_address(ctx, vbi);
+                struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer.resource);
+                mali_ptr addr = rsrc->bo->gpu + buf->buffer_offset;
+
+                /* Adjust by the masked off bits of the offset. Make sure we
+                 * read src_offset from so->hw (which is not GPU visible)
+                 * rather than target (which is) due to caching effects */
 
-                /* Adjust by the masked off bits of the offset */
-                target[i].src_offset += (addr & 63);
+                unsigned src_offset = so->hw[i].src_offset;
+                src_offset += (addr & 63);
 
                 /* Also, somewhat obscurely per-instance data needs to be
                  * offset in response to a delayed start in an indexed draw */
 
                 if (so->pipe[i].instance_divisor && ctx->instance_count > 1 && start)
-                        target[i].src_offset -= buf->stride * start;
+                        src_offset -= buf->stride * start;
+
+                target[i].src_offset = src_offset;
         }
 
         /* Let's also include vertex builtins */
 
-        target[PAN_VERTEX_ID].format = MALI_R32UI;
-        target[PAN_VERTEX_ID].swizzle = panfrost_get_default_swizzle(1);
+        struct mali_attr_meta builtin = {
+                .format = MALI_R32UI,
+                .swizzle = panfrost_get_default_swizzle(1)
+        };
+
+        /* See mali_attr_meta specification for the magic number */
 
-        target[PAN_INSTANCE_ID].format = MALI_R32UI;
-        target[PAN_INSTANCE_ID].swizzle = panfrost_get_default_swizzle(1);
+        builtin.index = so->vertexid_index;
+        memcpy(&target[PAN_VERTEX_ID], &builtin, 4);
+
+        builtin.index = so->vertexid_index + 1;
+        memcpy(&target[PAN_INSTANCE_ID], &builtin, 4);
 
         ctx->payloads[PIPE_SHADER_VERTEX].postfix.attribute_meta = transfer.gpu;
 }
@@ -490,21 +469,6 @@ panfrost_upload_sampler_descriptors(struct panfrost_context *ctx)
         }
 }
 
-static enum mali_texture_layout
-panfrost_layout_for_texture(struct panfrost_resource *rsrc)
-{
-        switch (rsrc->layout) {
-        case PAN_AFBC:
-                return MALI_TEXTURE_AFBC;
-        case PAN_TILED:
-                return MALI_TEXTURE_TILED;
-        case PAN_LINEAR:
-                return MALI_TEXTURE_LINEAR;
-        default:
-                unreachable("Invalid texture layout");
-        }
-}
-
 static mali_ptr
 panfrost_upload_tex(
         struct panfrost_context *ctx,
@@ -516,95 +480,19 @@ panfrost_upload_tex(
 
         struct pipe_sampler_view *pview = &view->base;
         struct panfrost_resource *rsrc = pan_resource(pview->texture);
-        mali_ptr descriptor_gpu;
-        void *descriptor;
-
-        /* Do we interleave an explicit stride with every element? */
-
-        bool has_manual_stride = view->manual_stride;
-
-        /* For easy access */
-
-        bool is_buffer = pview->target == PIPE_BUFFER;
-        unsigned first_level = is_buffer ? 0 : pview->u.tex.first_level;
-        unsigned last_level  = is_buffer ? 0 : pview->u.tex.last_level;
-        unsigned first_layer = is_buffer ? 0 : pview->u.tex.first_layer;
-        unsigned last_layer  = is_buffer ? 0 : pview->u.tex.last_layer;
-        unsigned first_face  = 0;
-        unsigned last_face   = 0;
-        unsigned face_mult   = 1;
-
-        /* Cubemaps have 6 faces as layers in between each actual layer.
-         * There's a bit of an impedence mismatch between Gallium and the
-         * hardware, let's fixup for it */
-
-        if (pview->target == PIPE_TEXTURE_CUBE || pview->target == PIPE_TEXTURE_CUBE_ARRAY) {
-                /* TODO: logic wrong in the asserted out cases ... can they happen? */
-
-                first_face = first_layer % 6;
-                last_face = last_layer % 6;
-                first_layer /= 6;
-                last_layer /= 6;
-
-                assert((first_layer == last_layer) || (first_face == 0 && last_face == 5));
-                face_mult = 6;
-        }
-
-        /* Lower-bit is set when sampling from colour AFBC */
-        bool is_afbc = rsrc->layout == PAN_AFBC;
-        bool is_zs = rsrc->base.bind & PIPE_BIND_DEPTH_STENCIL;
-        unsigned afbc_bit = (is_afbc && !is_zs) ? 1 : 0;
 
         /* Add the BO to the job so it's retained until the job is done. */
         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
+
         panfrost_batch_add_bo(batch, rsrc->bo,
                               PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
                               panfrost_bo_access_for_stage(st));
 
-        /* Add the usage flags in, since they can change across the CSO
-         * lifetime due to layout switches */
-
-        view->hw.format.layout = panfrost_layout_for_texture(rsrc);
-        view->hw.format.manual_stride = has_manual_stride;
-
-        /* Inject the addresses in, interleaving array indices, mip levels,
-         * cube faces, and strides in that order */
-
-        unsigned idx = 0;
-        unsigned levels = 1 + last_level - first_level;
-        unsigned layers = 1 + last_layer - first_layer;
-        unsigned faces  = 1 + last_face  - first_face;
-        unsigned num_elements = levels * layers * faces;
-        if (has_manual_stride)
-                num_elements *= 2;
-
-        descriptor = malloc(sizeof(struct mali_texture_descriptor) +
-                            sizeof(mali_ptr) * num_elements);
-        memcpy(descriptor, &view->hw, sizeof(struct mali_texture_descriptor));
-
-        mali_ptr *pointers_and_strides = descriptor +
-                                         sizeof(struct mali_texture_descriptor);
-
-        for (unsigned w = first_layer; w <= last_layer; ++w) {
-                for (unsigned l = first_level; l <= last_level; ++l) {
-                        for (unsigned f = first_face; f <= last_face; ++f) {
-                                pointers_and_strides[idx++] =
-                                        panfrost_get_texture_address(rsrc, l, w * face_mult + f)
-                                                + afbc_bit + view->astc_stretch;
-                                if (has_manual_stride) {
-                                        pointers_and_strides[idx++] =
-                                                rsrc->slices[l].stride;
-                                }
-                        }
-                }
-        }
-
-        descriptor_gpu = panfrost_upload_transient(batch, descriptor,
-                                  sizeof(struct mali_texture_descriptor) +
-                                          num_elements * sizeof(*pointers_and_strides));
-        free(descriptor);
+        panfrost_batch_add_bo(batch, view->bo,
+                              PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
+                              panfrost_bo_access_for_stage(st));
 
-        return descriptor_gpu;
+        return view->bo->gpu;
 }
 
 static void
@@ -880,9 +768,9 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
                 panfrost_emit_varying_descriptor(ctx, total_count);
         }
 
-        bool msaa = ctx->rasterizer->base.multisample;
 
-        if (ctx->dirty & PAN_DIRTY_RASTERIZER) {
+        if (ctx->rasterizer) {
+                bool msaa = ctx->rasterizer->base.multisample;
                 ctx->payloads[PIPE_SHADER_FRAGMENT].gl_enables = ctx->rasterizer->tiler_gl_enables;
 
                 /* TODO: Sample size */
@@ -900,7 +788,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
         panfrost_patch_shader_state(ctx, PIPE_SHADER_VERTEX);
         panfrost_patch_shader_state(ctx, PIPE_SHADER_COMPUTE);
 
-        if (ctx->dirty & (PAN_DIRTY_RASTERIZER | PAN_DIRTY_VS)) {
+        if (ctx->shader[PIPE_SHADER_VERTEX] && ctx->shader[PIPE_SHADER_FRAGMENT]) {
                 /* Check if we need to link the gl_PointSize varying */
                 if (!panfrost_writes_point_size(ctx)) {
                         /* If the size is constant, write it out. Otherwise,
@@ -915,12 +803,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
                 }
         }
 
-        /* TODO: Maybe dirty track FS, maybe not. For now, it's transient. */
-        if (ctx->shader[PIPE_SHADER_FRAGMENT])
-                ctx->dirty |= PAN_DIRTY_FS;
-
-        if (ctx->dirty & PAN_DIRTY_FS) {
-                assert(ctx->shader[PIPE_SHADER_FRAGMENT]);
+        if (ctx->shader[PIPE_SHADER_FRAGMENT]) {
                 struct panfrost_shader_state *variant = &ctx->shader[PIPE_SHADER_FRAGMENT]->variants[ctx->shader[PIPE_SHADER_FRAGMENT]->active_variant];
 
                 panfrost_patch_shader_state(ctx, PIPE_SHADER_FRAGMENT);
@@ -1068,11 +951,8 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
         if (ctx->vertex)
                 panfrost_stage_attributes(ctx);
 
-        if (ctx->dirty & PAN_DIRTY_SAMPLERS)
-                panfrost_upload_sampler_descriptors(ctx);
-
-        if (ctx->dirty & PAN_DIRTY_TEXTURES)
-                panfrost_upload_texture_descriptors(ctx);
+        panfrost_upload_sampler_descriptors(ctx);
+        panfrost_upload_texture_descriptors(ctx);
 
         const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
 
@@ -1116,12 +996,11 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
                 unsigned ubo_count = panfrost_ubo_count(ctx, i);
                 assert(ubo_count >= 1);
 
-                size_t sz = sizeof(struct mali_uniform_buffer_meta) * ubo_count;
-                struct mali_uniform_buffer_meta ubos[PAN_MAX_CONST_BUFFERS];
+                size_t sz = sizeof(uint64_t) * ubo_count;
+                uint64_t ubos[PAN_MAX_CONST_BUFFERS];
 
                 /* Upload uniforms as a UBO */
-                ubos[0].size = MALI_POSITIVE((2 + uniform_count));
-                ubos[0].ptr = transfer.gpu >> 2;
+                ubos[0] = MALI_MAKE_UBO(2 + uniform_count, transfer.gpu);
 
                 /* The rest are honest-to-goodness UBOs */
 
@@ -1133,9 +1012,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
 
                         if (!enabled || empty) {
                                 /* Stub out disabled UBOs to catch accesses */
-
-                                ubos[ubo].size = 0;
-                                ubos[ubo].ptr = 0xDEAD0000;
+                                ubos[ubo] = MALI_MAKE_UBO(0, 0xDEAD0000);
                                 continue;
                         }
 
@@ -1143,10 +1020,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
 
                         unsigned bytes_per_field = 16;
                         unsigned aligned = ALIGN_POT(usz, bytes_per_field);
-                        unsigned fields = aligned / bytes_per_field;
-
-                        ubos[ubo].size = MALI_POSITIVE(fields);
-                        ubos[ubo].ptr = gpu >> 2;
+                        ubos[ubo] = MALI_MAKE_UBO(aligned / bytes_per_field, gpu);
                 }
 
                 mali_ptr ubufs = panfrost_upload_transient(batch, ubos, sz);
@@ -1252,8 +1126,6 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
                 panfrost_upload_transient(batch,
                                           &view,
                                           sizeof(struct mali_viewport));
-
-        ctx->dirty = 0;
 }
 
 /* Corresponds to exactly one draw, but does not submit anything */
@@ -1269,20 +1141,23 @@ panfrost_queue_draw(struct panfrost_context *ctx)
         bool rasterizer_discard = ctx->rasterizer
                                   && ctx->rasterizer->base.rasterizer_discard;
 
-        struct panfrost_transfer vertex = panfrost_vertex_tiler_job(ctx, false);
-        struct panfrost_transfer tiler;
 
-        if (!rasterizer_discard)
-                tiler = panfrost_vertex_tiler_job(ctx, true);
+        struct midgard_payload_vertex_tiler *vertex_payload = &ctx->payloads[PIPE_SHADER_VERTEX];
+        struct midgard_payload_vertex_tiler *tiler_payload = &ctx->payloads[PIPE_SHADER_FRAGMENT];
 
         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
+        bool wallpapering = ctx->wallpaper_batch && batch->tiler_dep;
 
-        if (rasterizer_discard)
-                panfrost_scoreboard_queue_vertex_job(batch, vertex, FALSE);
-        else if (ctx->wallpaper_batch && batch->first_tiler.gpu)
-                panfrost_scoreboard_queue_fused_job_prepend(batch, vertex, tiler);
-        else
-                panfrost_scoreboard_queue_fused_job(batch, vertex, tiler);
+        if (wallpapering) {
+                /* Inject in reverse order, with "predicted" job indices. THIS IS A HACK XXX */
+                panfrost_new_job(batch, JOB_TYPE_TILER, false, batch->job_index + 2, tiler_payload, sizeof(*tiler_payload), true);
+                panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0, vertex_payload, sizeof(*vertex_payload), true);
+        } else  {
+                unsigned vertex = panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0, vertex_payload, sizeof(*vertex_payload), false);
+
+                if (!rasterizer_discard)
+                        panfrost_new_job(batch, JOB_TYPE_TILER, false, vertex, tiler_payload, sizeof(*tiler_payload), false);
+        }
 
         for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) {
                 struct panfrost_shader_variants *all = ctx->shader[i];
@@ -1382,15 +1257,27 @@ panfrost_translate_index_size(unsigned size)
 }
 
 /* Gets a GPU address for the associated index buffer. Only gauranteed to be
- * good for the duration of the draw (transient), could last longer */
+ * good for the duration of the draw (transient), could last longer. Also get
+ * the bounds on the index buffer for the range accessed by the draw. We do
+ * these operations together because there are natural optimizations which
+ * require them to be together. */
 
 static mali_ptr
-panfrost_get_index_buffer_mapped(struct panfrost_context *ctx, const struct pipe_draw_info *info)
+panfrost_get_index_buffer_bounded(struct panfrost_context *ctx, const struct pipe_draw_info *info, unsigned *min_index, unsigned *max_index)
 {
         struct panfrost_resource *rsrc = (struct panfrost_resource *) (info->index.resource);
 
         off_t offset = info->start * info->index_size;
         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
+        mali_ptr out = 0;
+
+        bool needs_indices = true;
+
+        if (info->max_index != ~0u) {
+                *min_index = info->min_index;
+                *max_index = info->max_index;
+                needs_indices = false;
+        }
 
         if (!info->has_user_indices) {
                 /* Only resources can be directly mapped */
@@ -1398,12 +1285,29 @@ panfrost_get_index_buffer_mapped(struct panfrost_context *ctx, const struct pipe
                                       PAN_BO_ACCESS_SHARED |
                                       PAN_BO_ACCESS_READ |
                                       PAN_BO_ACCESS_VERTEX_TILER);
-                return rsrc->bo->gpu + offset;
+                out = rsrc->bo->gpu + offset;
+
+                /* Check the cache */
+                needs_indices = !panfrost_minmax_cache_get(rsrc->index_cache, info->start, info->count,
+                                                           min_index, max_index);
         } else {
                 /* Otherwise, we need to upload to transient memory */
                 const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
-                return panfrost_upload_transient(batch, ibuf8 + offset, info->count * info->index_size);
+                out = panfrost_upload_transient(batch, ibuf8 + offset, info->count * info->index_size);
         }
+
+        if (needs_indices) {
+                /* Fallback */
+                u_vbuf_get_minmax_index(&ctx->base, info, min_index, max_index);
+
+                if (!info->has_user_indices) {
+                        panfrost_minmax_cache_add(rsrc->index_cache, info->start, info->count,
+                                                  *min_index, *max_index);
+                }
+        }
+
+
+        return out;
 }
 
 static bool
@@ -1521,18 +1425,9 @@ panfrost_draw_vbo(
         panfrost_statistics_record(ctx, info);
 
         if (info->index_size) {
-                /* Calculate the min/max index used so we can figure out how
-                 * many times to invoke the vertex shader */
-
-                /* Fetch / calculate index bounds */
                 unsigned min_index = 0, max_index = 0;
-
-                if (info->max_index == ~0u) {
-                        u_vbuf_get_minmax_index(pipe, info, &min_index, &max_index);
-                } else {
-                        min_index = info->min_index;
-                        max_index = info->max_index;
-                }
+                ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.indices =
+                        panfrost_get_index_buffer_bounded(ctx, info, &min_index, &max_index);
 
                 /* Use the corresponding values */
                 vertex_count = max_index - min_index + 1;
@@ -1542,10 +1437,7 @@ panfrost_draw_vbo(
                 ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.offset_bias_correction = -min_index;
                 ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.index_count = MALI_POSITIVE(info->count);
 
-                //assert(!info->restart_index); /* TODO: Research */
-
                 draw_flags |= panfrost_translate_index_size(info->index_size);
-                ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.indices = panfrost_get_index_buffer_mapped(ctx, info);
         } else {
                 /* Index count == vertex count, if no indexing is applied, as
                  * if it is internally indexed in the expected order */
@@ -1643,13 +1535,11 @@ panfrost_bind_rasterizer_state(
 {
         struct panfrost_context *ctx = pan_context(pctx);
 
-        /* TODO: Why can't rasterizer be NULL ever? Other drivers are fine.. */
+        ctx->rasterizer = hwcso;
+
         if (!hwcso)
                 return;
 
-        ctx->rasterizer = hwcso;
-        ctx->dirty |= PAN_DIRTY_RASTERIZER;
-
         ctx->fragment_shader_core.depth_units = ctx->rasterizer->base.offset_units * 2.0f;
         ctx->fragment_shader_core.depth_factor = ctx->rasterizer->base.offset_scale;
 
@@ -1704,9 +1594,7 @@ panfrost_bind_vertex_elements_state(
         void *hwcso)
 {
         struct panfrost_context *ctx = pan_context(pctx);
-
         ctx->vertex = hwcso;
-        ctx->dirty |= PAN_DIRTY_VERTEX;
 }
 
 static void *
@@ -1838,8 +1726,6 @@ panfrost_bind_sampler_states(
         /* XXX: Should upload, not just copy? */
         ctx->sampler_count[shader] = num_sampler;
         memcpy(ctx->samplers[shader], sampler, num_sampler * sizeof (void *));
-
-        ctx->dirty |= PAN_DIRTY_SAMPLERS;
 }
 
 static bool
@@ -1933,14 +1819,8 @@ panfrost_bind_shader_state(
         enum pipe_shader_type type)
 {
         struct panfrost_context *ctx = pan_context(pctx);
-
         ctx->shader[type] = hwcso;
 
-        if (type == PIPE_SHADER_FRAGMENT)
-                ctx->dirty |= PAN_DIRTY_FS;
-        else
-                ctx->dirty |= PAN_DIRTY_VS;
-
         if (!hwcso) return;
 
         /* Match the appropriate variant */
@@ -2092,9 +1972,6 @@ panfrost_set_stencil_ref(
 {
         struct panfrost_context *ctx = pan_context(pctx);
         ctx->stencil_ref = *ref;
-
-        /* Shader core dirty */
-        ctx->dirty |= PAN_DIRTY_FS;
 }
 
 static enum mali_texture_type
@@ -2123,29 +2000,14 @@ panfrost_translate_texture_type(enum pipe_texture_target t) {
         }
 }
 
-static uint8_t
-panfrost_compute_astc_stretch(
-        const struct util_format_description *desc)
-{
-        unsigned width = desc->block.width;
-        unsigned height = desc->block.height;
-        assert(width >= 4 && width <= 12);
-        assert(height >= 4 && height <= 12);
-        if (width == 12)
-                width = 11;
-        if (height == 12)
-                height = 11;
-        return ((height - 4) * 8) + (width - 4);
-}
-
 static struct pipe_sampler_view *
 panfrost_create_sampler_view(
         struct pipe_context *pctx,
         struct pipe_resource *texture,
         const struct pipe_sampler_view *template)
 {
+        struct panfrost_screen *screen = pan_screen(pctx->screen);
         struct panfrost_sampler_view *so = rzalloc(pctx, struct panfrost_sampler_view);
-        int bytes_per_pixel = util_format_get_blocksize(texture->format);
 
         pipe_reference(NULL, &texture->reference);
 
@@ -2157,12 +2019,6 @@ panfrost_create_sampler_view(
         so->base.reference.count = 1;
         so->base.context = pctx;
 
-        /* sampler_views correspond to texture descriptors, minus the texture
-         * (data) itself. So, we serialise the descriptor here and cache it for
-         * later. */
-
-        const struct util_format_description *desc = util_format_description(prsrc->base.format);
-
         unsigned char user_swizzle[4] = {
                 template->swizzle_r,
                 template->swizzle_g,
@@ -2170,32 +2026,6 @@ panfrost_create_sampler_view(
                 template->swizzle_a
         };
 
-        enum mali_format format = panfrost_find_format(desc);
-
-        if (format == MALI_ASTC_HDR_SUPP || format == MALI_ASTC_SRGB_SUPP)
-                so->astc_stretch = panfrost_compute_astc_stretch(desc);
-
-        /* Check if we need to set a custom stride by computing the "expected"
-         * stride and comparing it to what the BO actually wants. Only applies
-         * to linear textures, since tiled/compressed textures have strict
-         * alignment requirements for their strides as it is */
-
-        unsigned first_level = template->u.tex.first_level;
-        unsigned last_level = template->u.tex.last_level;
-
-        if (prsrc->layout == PAN_LINEAR) {
-                for (unsigned l = first_level; l <= last_level; ++l) {
-                        unsigned actual_stride = prsrc->slices[l].stride;
-                        unsigned width = u_minify(texture->width0, l);
-                        unsigned comp_stride = width * bytes_per_pixel;
-
-                        if (comp_stride != actual_stride) {
-                                so->manual_stride = true;
-                                break;
-                        }
-                }
-        }
-
         /* In the hardware, array_size refers specifically to array textures,
          * whereas in Gallium, it also covers cubemaps */
 
@@ -2207,26 +2037,32 @@ panfrost_create_sampler_view(
                 array_size /= 6;
         }
 
-        struct mali_texture_descriptor texture_descriptor = {
-                .width = MALI_POSITIVE(u_minify(texture->width0, first_level)),
-                .height = MALI_POSITIVE(u_minify(texture->height0, first_level)),
-                .depth = MALI_POSITIVE(u_minify(texture->depth0, first_level)),
-                .array_size = MALI_POSITIVE(array_size),
-
-                .format = {
-                        .swizzle = panfrost_translate_swizzle_4(desc->swizzle),
-                        .format = format,
-                        .srgb = desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB,
-                        .type = panfrost_translate_texture_type(template->target),
-                        .unknown2 = 0x1,
-                },
-
-                .swizzle = panfrost_translate_swizzle_4(user_swizzle)
-        };
-
-        texture_descriptor.levels = last_level - first_level;
-
-        so->hw = texture_descriptor;
+        enum mali_texture_type type =
+                panfrost_translate_texture_type(template->target);
+
+        unsigned size = panfrost_estimate_texture_size(
+                        template->u.tex.first_level,
+                        template->u.tex.last_level,
+                        template->u.tex.first_layer,
+                        template->u.tex.last_layer,
+                        type, prsrc->layout);
+
+        so->bo = panfrost_bo_create(screen, size, 0);
+
+        panfrost_new_texture(
+                        so->bo->cpu,
+                        texture->width0, texture->height0,
+                        texture->depth0, array_size,
+                        texture->format,
+                        type, prsrc->layout,
+                        template->u.tex.first_level,
+                        template->u.tex.last_level,
+                        template->u.tex.first_layer,
+                        template->u.tex.last_layer,
+                        prsrc->cubemap_stride,
+                        panfrost_translate_swizzle_4(user_swizzle),
+                        prsrc->bo->gpu,
+                        prsrc->slices);
 
         return (struct pipe_sampler_view *) so;
 }
@@ -2256,16 +2092,17 @@ panfrost_set_sampler_views(
                                            NULL);
         }
         ctx->sampler_view_count[shader] = new_nr;
-
-        ctx->dirty |= PAN_DIRTY_TEXTURES;
 }
 
 static void
 panfrost_sampler_view_destroy(
         struct pipe_context *pctx,
-        struct pipe_sampler_view *view)
+        struct pipe_sampler_view *pview)
 {
-        pipe_resource_reference(&view->texture, NULL);
+        struct panfrost_sampler_view *view = (struct panfrost_sampler_view *) pview;
+
+        pipe_resource_reference(&pview->texture, NULL);
+        panfrost_bo_unreference(view->bo);
         ralloc_free(view);
 }
 
@@ -2298,14 +2135,14 @@ panfrost_hint_afbc(
         for (unsigned i = 0; i < fb->nr_cbufs; ++i) {
                 struct pipe_surface *surf = fb->cbufs[i];
                 struct panfrost_resource *rsrc = pan_resource(surf->texture);
-                panfrost_resource_hint_layout(screen, rsrc, PAN_AFBC, 1);
+                panfrost_resource_hint_layout(screen, rsrc, MALI_TEXTURE_AFBC, 1);
         }
 
         /* Also hint it to the depth buffer */
 
         if (fb->zsbuf) {
                 struct panfrost_resource *rsrc = pan_resource(fb->zsbuf->texture);
-                panfrost_resource_hint_layout(screen, rsrc, PAN_AFBC, 1);
+                panfrost_resource_hint_layout(screen, rsrc, MALI_TEXTURE_AFBC, 1);
         }
 }
 
@@ -2371,8 +2208,6 @@ panfrost_bind_depth_stencil_state(struct pipe_context *pipe,
 
         /* Bounds test not implemented */
         assert(!depth_stencil->depth.bounds_test);
-
-        ctx->dirty |= PAN_DIRTY_FS;
 }
 
 static void