panfrost: Refactor texture/sampler upload
authorAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Fri, 7 Jun 2019 21:25:28 +0000 (14:25 -0700)
committerAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Mon, 10 Jun 2019 13:45:33 +0000 (06:45 -0700)
We move some coding packing the texture/sampler descriptors into
dedicated functions (out of the terrifyingly long emit_for_draw
monolith), cleaning them up as we go.

The discovery triggering the cleanup is the format for including manual
strides in the presence of mipmaps/cubemaps. Rather than placed at the
end like previously assumed, they are interleaved after each address.
This difference is relevant when handling NPOT linear mipmaps.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
src/gallium/drivers/panfrost/include/panfrost-job.h
src/gallium/drivers/panfrost/pan_context.c
src/gallium/drivers/panfrost/pandecode/decode.c

index 8a4a7644070c2462e3fa9dfe07fad558be2835df..fd23499a00cd54cc82ed583ace0d009f5fae1a7a 100644 (file)
@@ -1112,6 +1112,9 @@ enum mali_wrap_mode {
 /* Cubemap bloats everything up */
 #define MAX_FACES (6)
 
+/* For each pointer, there is an address and optionally also a stride */
+#define MAX_ELEMENTS (2)
+
 /* Corresponds to the type passed to glTexImage2D and so forth */
 
 /* Flags for usage2 */
@@ -1155,7 +1158,7 @@ struct mali_texture_descriptor {
         uint32_t unknown6;
         uint32_t unknown7;
 
-        mali_ptr swizzled_bitmaps[MAX_MIP_LEVELS * MAX_FACES];
+        mali_ptr payload[MAX_MIP_LEVELS * MAX_FACES * MAX_ELEMENTS];
 } __attribute__((packed));
 
 /* Used as part of filter_mode */
index 94fcae5fe4e57fd0cf978cffa3d4092f21ea4968..b08f50291fba604bd5d8dca50bd370a95b9dfcf0 100644 (file)
@@ -848,6 +848,109 @@ panfrost_stage_attributes(struct panfrost_context *ctx)
         ctx->payload_vertex.postfix.attribute_meta = transfer.gpu;
 }
 
+static void
+panfrost_upload_sampler_descriptors(struct panfrost_context *ctx)
+{
+        size_t desc_size = sizeof(struct mali_sampler_descriptor);
+
+        for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) {
+                if (!ctx->sampler_count[t]) continue;
+
+                size_t transfer_size = desc_size * ctx->sampler_count[t];
+
+                struct panfrost_transfer transfer =
+                        panfrost_allocate_transient(ctx, transfer_size);
+
+                struct mali_sampler_descriptor *desc =
+                        (struct mali_sampler_descriptor *) transfer.cpu;
+
+                for (int i = 0; i < ctx->sampler_count[t]; ++i)
+                        desc[i] = ctx->samplers[t][i]->hw;
+
+                if (t == PIPE_SHADER_FRAGMENT)
+                        ctx->payload_tiler.postfix.sampler_descriptor = transfer.gpu;
+                else if (t == PIPE_SHADER_VERTEX)
+                        ctx->payload_vertex.postfix.sampler_descriptor = transfer.gpu;
+                else
+                        assert(0);
+        }
+}
+
+/* Computes the address to a texture at a particular slice */
+
+static mali_ptr
+panfrost_get_texture_address(
+                struct panfrost_resource *rsrc,
+                unsigned level, unsigned face)
+{
+        unsigned level_offset = rsrc->bo->slices[level].offset;
+        unsigned face_offset = face * rsrc->bo->cubemap_stride;
+
+        return rsrc->bo->gpu + level_offset + face_offset;
+
+}
+
+static mali_ptr
+panfrost_upload_tex(
+                struct panfrost_context *ctx,
+                struct panfrost_sampler_view *view)
+{
+        if (!view)
+                return (mali_ptr) NULL;
+
+        struct pipe_resource *tex_rsrc = view->base.texture;
+        struct panfrost_resource *rsrc = (struct panfrost_resource *) tex_rsrc;
+
+        /* Do we interleave an explicit stride with every element? */
+
+        bool has_manual_stride =
+                view->hw.format.usage2 & MALI_TEX_MANUAL_STRIDE;
+
+        /* Inject the addresses in, interleaving mip levels, cube faces, and
+         * strides in that order */
+
+        unsigned idx = 0;
+
+        for (unsigned l = 0; l <= tex_rsrc->last_level; ++l) {
+                for (unsigned f = 0; f < tex_rsrc->array_size; ++f) {
+                        view->hw.payload[idx++] =
+                                panfrost_get_texture_address(rsrc, l, f);
+
+                        if (has_manual_stride) {
+                                view->hw.payload[idx++] =
+                                        rsrc->bo->slices[l].stride;
+                        }
+                }
+        }
+
+        return panfrost_upload_transient(ctx, &view->hw,
+                        sizeof(struct mali_texture_descriptor));
+}
+
+static void
+panfrost_upload_texture_descriptors(struct panfrost_context *ctx)
+{
+        for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) {
+                /* Shortcircuit */
+                if (!ctx->sampler_view_count[t]) continue;
+
+                uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+
+                for (int i = 0; i < ctx->sampler_view_count[t]; ++i)
+                        trampolines[i] =
+                                panfrost_upload_tex(ctx, ctx->sampler_views[t][i]);
+
+                mali_ptr trampoline = panfrost_upload_transient(ctx, trampolines, sizeof(uint64_t) * ctx->sampler_view_count[t]);
+
+                if (t == PIPE_SHADER_FRAGMENT)
+                        ctx->payload_tiler.postfix.texture_trampoline = trampoline;
+                else if (t == PIPE_SHADER_VERTEX)
+                        ctx->payload_vertex.postfix.texture_trampoline = trampoline;
+                else
+                        assert(0);
+        }
+}
+
 /* Go through dirty flags and actualise them in the cmdstream. */
 
 void
@@ -1040,80 +1143,11 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
         /* We stage to transient, so always dirty.. */
         panfrost_stage_attributes(ctx);
 
-        if (ctx->dirty & PAN_DIRTY_SAMPLERS) {
-                /* Upload samplers back to back, no padding */
-
-                for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) {
-                        if (!ctx->sampler_count[t]) continue;
-
-                        struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(struct mali_sampler_descriptor) * ctx->sampler_count[t]);
-                        struct mali_sampler_descriptor *desc = (struct mali_sampler_descriptor *) transfer.cpu;
-
-                        for (int i = 0; i < ctx->sampler_count[t]; ++i) {
-                                desc[i] = ctx->samplers[t][i]->hw;
-                        }
-
-                        if (t == PIPE_SHADER_FRAGMENT)
-                                ctx->payload_tiler.postfix.sampler_descriptor = transfer.gpu;
-                        else if (t == PIPE_SHADER_VERTEX)
-                                ctx->payload_vertex.postfix.sampler_descriptor = transfer.gpu;
-                        else
-                                assert(0);
-                }
-        }
+        if (ctx->dirty & PAN_DIRTY_SAMPLERS)
+                panfrost_upload_sampler_descriptors(ctx);
 
-        if (ctx->dirty & PAN_DIRTY_TEXTURES) {
-                for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) {
-                        /* Shortcircuit */
-                        if (!ctx->sampler_view_count[t]) continue;
-
-                        uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS];
-
-                        for (int i = 0; i < ctx->sampler_view_count[t]; ++i) {
-                                if (!ctx->sampler_views[t][i])
-                                        continue;
-
-                                struct pipe_resource *tex_rsrc = ctx->sampler_views[t][i]->base.texture;
-                                struct panfrost_resource *rsrc = (struct panfrost_resource *) tex_rsrc;
-
-                                /* Inject the addresses in, interleaving cube
-                                 * faces and mip levels appropriately. */
-
-                                for (int l = 0; l <= tex_rsrc->last_level; ++l) {
-                                        for (int f = 0; f < tex_rsrc->array_size; ++f) {
-                                                unsigned idx = (l * tex_rsrc->array_size) + f;
-
-                                                ctx->sampler_views[t][i]->hw.swizzled_bitmaps[idx] =
-                                                        rsrc->bo->gpu +
-                                                        rsrc->bo->slices[l].offset +
-                                                        f * rsrc->bo->cubemap_stride;
-                                        }
-                                }
-
-                                /* Inject the strides */
-                                unsigned usage2 = ctx->sampler_views[t][i]->hw.format.usage2;
-
-                                if (usage2 & MALI_TEX_MANUAL_STRIDE) {
-                                        unsigned idx = tex_rsrc->last_level * tex_rsrc->array_size;
-                                        idx += tex_rsrc->array_size;
-
-                                        ctx->sampler_views[t][i]->hw.swizzled_bitmaps[idx] =
-                                                rsrc->bo->slices[0].stride;
-                                }
-
-                                trampolines[i] = panfrost_upload_transient(ctx, &ctx->sampler_views[t][i]->hw, sizeof(struct mali_texture_descriptor));
-                        }
-
-                        mali_ptr trampoline = panfrost_upload_transient(ctx, trampolines, sizeof(uint64_t) * ctx->sampler_view_count[t]);
-
-                        if (t == PIPE_SHADER_FRAGMENT)
-                                ctx->payload_tiler.postfix.texture_trampoline = trampoline;
-                        else if (t == PIPE_SHADER_VERTEX)
-                                ctx->payload_vertex.postfix.texture_trampoline = trampoline;
-                        else
-                                assert(0);
-                }
-        }
+        if (ctx->dirty & PAN_DIRTY_TEXTURES)
+                panfrost_upload_texture_descriptors(ctx);
 
         const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
 
index dac27c366848a4c01bf4e7d7d3cc18e8145aba4d..00678a4c5edb7b02f9b90107df404393313315db 100644 (file)
@@ -1508,7 +1508,7 @@ pandecode_replay_vertex_tiler_postfix_pre(const struct mali_vertex_tiler_postfix
                                         pandecode_prop("unknown6 = 0x%" PRIx32, t->unknown6);
                                         pandecode_prop("unknown7 = 0x%" PRIx32, t->unknown7);
 
-                                        pandecode_log(".swizzled_bitmaps = {\n");
+                                        pandecode_log(".payload = {\n");
                                         pandecode_indent++;
 
                                         /* A bunch of bitmap pointers follow.
@@ -1518,32 +1518,19 @@ pandecode_replay_vertex_tiler_postfix_pre(const struct mali_vertex_tiler_postfix
                                          * possibilities to futureproof */
 
                                         int bitmap_count = MALI_NEGATIVE(t->nr_mipmap_levels);
+                                        bool manual_stride = f.usage2 & MALI_TEX_MANUAL_STRIDE;
 
-                                        if (!f.is_not_cubemap) {
-                                                /* Miptree for each face */
+                                        /* Miptree for each face */
+                                        if (!f.is_not_cubemap)
                                                 bitmap_count *= 6;
-                                        }
-
-                                        if (f.usage2 & MALI_TEX_MANUAL_STRIDE) {
-                                                /* Stride for each... what exactly? TODO More traces */
-
-                                                if (bitmap_count > 1) {
-                                                        pandecode_msg("Manual stride with mip/cubemaps, decode uncertain");
-                                                }
 
-                                                /* This is a guess, we've only
-                                                 * seen for 1-level non-mip 2D
-                                                 * */
+                                        /* Stride for each element */
+                                        if (manual_stride)
+                                                bitmap_count *= 2;
 
-                                                bitmap_count += 1;
-                                        }
-
-                                        int max_count = sizeof(t->swizzled_bitmaps) / sizeof(t->swizzled_bitmaps[0]);
-
-                                        if (bitmap_count > max_count) {
-                                                pandecode_msg("XXX: bitmap count tripped");
-                                                bitmap_count = max_count;
-                                        }
+                                        /* Sanity check the size */
+                                        int max_count = sizeof(t->payload) / sizeof(t->payload[0]);
+                                        assert (bitmap_count <= max_count);
 
                                         /* Dump more to be safe, but not _that_ much more */
                                         int safe_count = MIN2(bitmap_count * 2, max_count);
@@ -1553,15 +1540,15 @@ pandecode_replay_vertex_tiler_postfix_pre(const struct mali_vertex_tiler_postfix
 
                                                 /* How we dump depends if this is a stride or a pointer */
 
-                                                if ((f.usage2 & MALI_TEX_MANUAL_STRIDE) && ((i + 1) == bitmap_count)) {
+                                                if ((f.usage2 & MALI_TEX_MANUAL_STRIDE) && (i & 1)) {
                                                         /* signed 32-bit snuck in as a 64-bit pointer */
-                                                        uint64_t stride_set = t->swizzled_bitmaps[i];
+                                                        uint64_t stride_set = t->payload[i];
                                                         uint32_t clamped_stride = stride_set;
                                                         int32_t stride = clamped_stride;
                                                         assert(stride_set == clamped_stride);
                                                         pandecode_log("%s(mali_ptr) %d /* stride */, \n", prefix, stride);
                                                 } else {
-                                                        char *a = pointer_as_memory_reference(t->swizzled_bitmaps[i]);
+                                                        char *a = pointer_as_memory_reference(t->payload[i]);
                                                         pandecode_log("%s%s, \n", prefix, a);
                                                         free(a);
                                                 }