panfrost: Implement tiled rendering
[mesa.git] / src / gallium / drivers / panfrost / pan_mfbd.c
index a141fd314c00829d02d29599df9eeb462d1adfdd..f9a69e47e45a74ccbfb2c3940915c599e58aea75 100644 (file)
@@ -36,17 +36,26 @@ panfrost_mfbd_format(struct pipe_surface *surf)
         const struct util_format_description *desc =
                 util_format_description(surf->texture->format);
 
-        /* Fill in accordingly */
+        /* Fill in accordingly, defaulting to RGBA8888 (UNORM) */
 
         struct mali_rt_format fmt = {
                 .unk1 = 0x4000000,
                 .unk2 = 0x1,
                 .nr_channels = MALI_POSITIVE(desc->nr_channels),
-                .flags = 0x444,
+                .unk3 = 0x4,
+                .flags = 0x8,
                 .swizzle = panfrost_translate_swizzle_4(desc->swizzle),
                 .unk4 = 0x8
         };
 
+        /* Set flags for alternative formats */
+
+        if (surf->texture->format == PIPE_FORMAT_B5G6R5_UNORM) {
+                fmt.unk1 = 0x14000000;
+                fmt.nr_channels = MALI_POSITIVE(2);
+                fmt.unk3 |= 0x1;
+        }
+
         return fmt;
 }
 
@@ -58,8 +67,6 @@ panfrost_mfbd_clear(
                 struct bifrost_fb_extra *fbx,
                 struct bifrost_render_target *rt)
 {
-        struct panfrost_context *ctx = job->ctx;
-
         if (job->clear & PIPE_CLEAR_COLOR) {
                 rt->clear_color_1 = job->clear_color;
                 rt->clear_color_2 = job->clear_color;
@@ -74,48 +81,40 @@ panfrost_mfbd_clear(
         if (job->clear & PIPE_CLEAR_STENCIL) {
                 fb->clear_stencil = job->clear_stencil;
         }
-
-        if (job->clear & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
-                /* Setup combined 24/8 depth/stencil */
-                fb->unk3 |= MALI_MFBD_EXTRA;
-                fbx->flags = 0x405;
-                fbx->ds_linear.depth = ctx->depth_stencil_buffer.gpu;
-                fbx->ds_linear.depth_stride = ctx->pipe_framebuffer.width * 4;
-        }
 }
 
 static void
 panfrost_mfbd_set_cbuf(
                 struct bifrost_render_target *rt,
-                struct pipe_surface *surf,
-                bool flip_y)
+                struct pipe_surface *surf)
 {
         struct panfrost_resource *rsrc = pan_resource(surf->texture);
 
-        signed stride =
-                util_format_get_stride(surf->format, surf->texture->width0);
+        unsigned level = surf->u.tex.level;
+        assert(surf->u.tex.first_layer == 0);
+
+        int stride = rsrc->bo->slices[level].stride;
+        unsigned offset = rsrc->bo->slices[level].offset;
 
         rt->format = panfrost_mfbd_format(surf);
 
         /* Now, we set the layout specific pieces */
 
         if (rsrc->bo->layout == PAN_LINEAR) {
-                mali_ptr framebuffer = rsrc->bo->gpu[0];
-
-                if (flip_y) {
-                        framebuffer += stride * (surf->texture->height0 - 1);
-                        stride = -stride;
-                }
-
-                /* MFBD specifies stride in tiles */
-                rt->framebuffer = framebuffer;
+                rt->format.block = MALI_MFBD_BLOCK_LINEAR;
+                rt->framebuffer = rsrc->bo->gpu + offset;
                 rt->framebuffer_stride = stride / 16;
+        } else if (rsrc->bo->layout == PAN_TILED) {
+                rt->format.block = MALI_MFBD_BLOCK_TILED;
+                rt->framebuffer = rsrc->bo->gpu + offset;
+                rt->framebuffer_stride = stride;
         } else if (rsrc->bo->layout == PAN_AFBC) {
+                assert(level == 0);
                 rt->afbc.metadata = rsrc->bo->afbc_slab.gpu;
                 rt->afbc.stride = 0;
                 rt->afbc.unk = 0x30009;
 
-                rt->format.flags |= MALI_MFBD_FORMAT_AFBC;
+                rt->format.block = MALI_MFBD_BLOCK_AFBC;
 
                 mali_ptr afbc_main = rsrc->bo->afbc_slab.gpu + rsrc->bo->afbc_metadata_size;
                 rt->framebuffer = afbc_main;
@@ -136,8 +135,14 @@ panfrost_mfbd_set_zsbuf(
 {
         struct panfrost_resource *rsrc = pan_resource(surf->texture);
 
+        unsigned level = surf->u.tex.level;
+        assert(surf->u.tex.first_layer == 0);
+
+        unsigned offset = rsrc->bo->slices[level].offset;
+
         if (rsrc->bo->layout == PAN_AFBC) {
-                fb->unk3 |= MALI_MFBD_EXTRA;
+                assert(level == 0);
+                fb->mfbd_flags |= MALI_MFBD_EXTRA;
 
                 fbx->flags =
                         MALI_EXTRA_PRESENT |
@@ -153,8 +158,16 @@ panfrost_mfbd_set_zsbuf(
 
                 fbx->ds_afbc.zero1 = 0x10009;
                 fbx->ds_afbc.padding = 0x1000;
+        } else if (rsrc->bo->layout == PAN_LINEAR) {
+                int stride = rsrc->bo->slices[level].stride;
+                fb->mfbd_flags |= MALI_MFBD_EXTRA;
+
+                fbx->flags |= MALI_EXTRA_PRESENT | MALI_EXTRA_ZS | 0x1;
 
-                fb->unk3 |= MALI_MFBD_DEPTH_WRITE;
+                fbx->ds_linear.depth = rsrc->bo->gpu + offset;
+                fbx->ds_linear.depth_stride = stride;
+        } else {
+                assert(0);
         }
 }
 
@@ -178,7 +191,7 @@ panfrost_mfbd_upload(
         off_t offset = 0;
 
         /* There may be extra data stuck in the middle */
-        bool has_extra = fb->unk3 & MALI_MFBD_EXTRA;
+        bool has_extra = fb->mfbd_flags & MALI_MFBD_EXTRA;
 
         /* Compute total size for transfer */
 
@@ -201,7 +214,7 @@ panfrost_mfbd_upload(
                 UPLOAD(m_f_trans, offset, &rts[c], total_sz);
         }
 
-        /* Return pointer suitable for the fragment seciton */
+        /* Return pointer suitable for the fragment section */
         return m_f_trans.gpu | MALI_MFBD | (has_extra ? 2 : 0);
 }
 
@@ -210,24 +223,24 @@ panfrost_mfbd_upload(
 /* Creates an MFBD for the FRAGMENT section of the bound framebuffer */
 
 mali_ptr
-panfrost_mfbd_fragment(struct panfrost_context *ctx, bool flip_y)
+panfrost_mfbd_fragment(struct panfrost_context *ctx, bool has_draws)
 {
         struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
 
-        struct bifrost_framebuffer fb = panfrost_emit_mfbd(ctx);
+        struct bifrost_framebuffer fb = panfrost_emit_mfbd(ctx, has_draws);
         struct bifrost_fb_extra fbx = {};
         struct bifrost_render_target rts[4] = {};
 
         /* XXX: MRT case */
         fb.rt_count_2 = 1;
-        fb.unk3 = 0x100;
+        fb.mfbd_flags = 0x100;
 
         /* TODO: MRT clear */
         panfrost_mfbd_clear(job, &fb, &fbx, &rts[0]);
 
         for (int cb = 0; cb < ctx->pipe_framebuffer.nr_cbufs; ++cb) {
                 struct pipe_surface *surf = ctx->pipe_framebuffer.cbufs[cb];
-                panfrost_mfbd_set_cbuf(&rts[cb], surf, flip_y);
+                panfrost_mfbd_set_cbuf(&rts[cb], surf);
         }
 
         if (ctx->pipe_framebuffer.zsbuf) {
@@ -247,7 +260,21 @@ panfrost_mfbd_fragment(struct panfrost_context *ctx, bool flip_y)
                 rts[0].framebuffer_stride = 0;
         }
 
-        if (job->msaa) {
+        /* When scanning out, the depth buffer is immediately invalidated, so
+         * we don't need to waste bandwidth writing it out. This can improve
+         * performance substantially (Z32_UNORM 1080p @ 60fps is 475 MB/s of
+         * memory bandwidth!).
+         *
+         * The exception is ReadPixels, but this is not supported on GLES so we
+         * can safely ignore it. */
+
+        if (panfrost_is_scanout(ctx)) {
+                job->requirements &= ~PAN_REQ_DEPTH_WRITE;
+        }
+
+        /* Actualize the requirements */
+
+        if (job->requirements & PAN_REQ_MSAA) {
                 rts[0].format.flags |= MALI_MFBD_FORMAT_MSAA;
 
                 /* XXX */
@@ -255,16 +282,17 @@ panfrost_mfbd_fragment(struct panfrost_context *ctx, bool flip_y)
                 fb.rt_count_2 = 4;
         }
 
+        if (job->requirements & PAN_REQ_DEPTH_WRITE)
+                fb.mfbd_flags |= MALI_MFBD_DEPTH_WRITE;
+
         if (ctx->pipe_framebuffer.nr_cbufs == 1) {
                 struct panfrost_resource *rsrc = (struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[0]->texture;
 
                 if (rsrc->bo->has_checksum) {
-                        int stride = util_format_get_stride(rsrc->base.format, rsrc->base.width0);
-
-                        fb.unk3 |= MALI_MFBD_EXTRA;
+                        fb.mfbd_flags |= MALI_MFBD_EXTRA;
                         fbx.flags |= MALI_EXTRA_PRESENT;
                         fbx.checksum_stride = rsrc->bo->checksum_stride;
-                        fbx.checksum = rsrc->bo->gpu[0] + stride * rsrc->base.height0;
+                        fbx.checksum = rsrc->bo->gpu + rsrc->bo->slices[0].stride * rsrc->base.height0;
                 }
         }