panfrost: Remove MRT indirection in blend shaders
[mesa.git] / src / gallium / drivers / panfrost / pan_mfbd.c
index a35acc311bcf23a3153a784d29472380c0bd6d6f..0f2931a58b3adece33965d00db0f18a12b4e4f2d 100644 (file)
  *
  */
 
+#include "pan_bo.h"
 #include "pan_context.h"
 #include "pan_util.h"
 #include "pan_format.h"
 
-#include "util/u_format.h"
-
-static void
-panfrost_invert_swizzle(const unsigned char *in, unsigned char *out)
-{
-        /* First, default to all zeroes to prevent uninitialized junk */
-
-        for (unsigned c = 0; c < 4; ++c)
-                out[c] = PIPE_SWIZZLE_0;
-
-        /* Now "do" what the swizzle says */
-
-        for (unsigned c = 0; c < 4; ++c) {
-                unsigned char i = in[c];
-
-                /* Who cares? */
-                if (i < PIPE_SWIZZLE_X || i > PIPE_SWIZZLE_W)
-                        continue;
-
-                /* Invert */
-                unsigned idx = i - PIPE_SWIZZLE_X;
-                out[idx] = PIPE_SWIZZLE_X + c;
-        }
-}
+#include "util/format/u_format.h"
 
 static struct mali_rt_format
 panfrost_mfbd_format(struct pipe_surface *surf)
@@ -73,7 +51,7 @@ panfrost_mfbd_format(struct pipe_surface *surf)
                 .unk3 = 0x4,
                 .flags = 0x8,
                 .swizzle = panfrost_translate_swizzle_4(swizzle),
-                .unk4 = 0x8
+                .no_preload = true
         };
 
         if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
@@ -178,28 +156,28 @@ panfrost_mfbd_format(struct pipe_surface *surf)
 
 static void
 panfrost_mfbd_clear(
-        struct panfrost_job *job,
+        struct panfrost_batch *batch,
         struct bifrost_framebuffer *fb,
         struct bifrost_fb_extra *fbx,
         struct bifrost_render_target *rts,
         unsigned rt_count)
 {
         for (unsigned i = 0; i < rt_count; ++i) {
-                if (!(job->clear & (PIPE_CLEAR_COLOR0 << i)))
+                if (!(batch->clear & (PIPE_CLEAR_COLOR0 << i)))
                         continue;
 
-                rts[i].clear_color_1 = job->clear_color[i][0];
-                rts[i].clear_color_2 = job->clear_color[i][1];
-                rts[i].clear_color_3 = job->clear_color[i][2];
-                rts[i].clear_color_4 = job->clear_color[i][3];
+                rts[i].clear_color_1 = batch->clear_color[i][0];
+                rts[i].clear_color_2 = batch->clear_color[i][1];
+                rts[i].clear_color_3 = batch->clear_color[i][2];
+                rts[i].clear_color_4 = batch->clear_color[i][3];
         }
 
-        if (job->clear & PIPE_CLEAR_DEPTH) {
-                fb->clear_depth = job->clear_depth;
+        if (batch->clear & PIPE_CLEAR_DEPTH) {
+                fb->clear_depth = batch->clear_depth;
         }
 
-        if (job->clear & PIPE_CLEAR_STENCIL) {
-                fb->clear_stencil = job->clear_stencil;
+        if (batch->clear & PIPE_CLEAR_STENCIL) {
+                fb->clear_stencil = batch->clear_stencil;
         }
 }
 
@@ -222,15 +200,15 @@ panfrost_mfbd_set_cbuf(
         /* Now, we set the layout specific pieces */
 
         if (rsrc->layout == PAN_LINEAR) {
-                rt->format.block = MALI_MFBD_BLOCK_LINEAR;
+                rt->format.block = MALI_BLOCK_LINEAR;
                 rt->framebuffer = base;
                 rt->framebuffer_stride = stride / 16;
         } else if (rsrc->layout == PAN_TILED) {
-                rt->format.block = MALI_MFBD_BLOCK_TILED;
+                rt->format.block = MALI_BLOCK_TILED;
                 rt->framebuffer = base;
                 rt->framebuffer_stride = stride;
         } else if (rsrc->layout == PAN_AFBC) {
-                rt->format.block = MALI_MFBD_BLOCK_AFBC;
+                rt->format.block = MALI_BLOCK_AFBC;
 
                 unsigned header_size = rsrc->slices[level].header_size;
 
@@ -248,20 +226,6 @@ panfrost_mfbd_set_cbuf(
         }
 }
 
-/* Is a format encoded like Z24S8 and therefore compatible for render? */
-
-static bool
-panfrost_is_z24s8_variant(enum pipe_format fmt)
-{
-        switch (fmt) {
-                case PIPE_FORMAT_Z24_UNORM_S8_UINT:
-                case PIPE_FORMAT_Z24X8_UNORM:
-                        return true;
-                default:
-                        return false;
-        }
-}
-
 static void
 panfrost_mfbd_set_zsbuf(
         struct bifrost_framebuffer *fb,
@@ -313,6 +277,20 @@ panfrost_mfbd_set_zsbuf(
                         fbx->flags |= 0x1;
                 } else if (surf->format == PIPE_FORMAT_Z32_UNORM) {
                         /* default flags (0 in bottom place) */
+                } else if (surf->format == PIPE_FORMAT_Z32_FLOAT) {
+                        fbx->flags |= 0xA;
+                        fb->mfbd_flags ^= 0x100;
+                        fb->mfbd_flags |= 0x200;
+                } else if (surf->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
+                        fbx->flags |= 0x1000A;
+                        fb->mfbd_flags ^= 0x100;
+                        fb->mfbd_flags |= 0x201;
+
+                        struct panfrost_resource *stencil = rsrc->separate_stencil;
+                        struct panfrost_slice stencil_slice = stencil->slices[level];
+
+                        fbx->ds_linear.stencil = stencil->bo->gpu + stencil_slice.offset;
+                        fbx->ds_linear.stencil_stride = stencil_slice.stride;
                 }
 
         } else {
@@ -330,12 +308,11 @@ panfrost_mfbd_set_zsbuf(
 }
 
 static mali_ptr
-panfrost_mfbd_upload(
-        struct panfrost_context *ctx,
+panfrost_mfbd_upload(struct panfrost_batch *batch,
         struct bifrost_framebuffer *fb,
         struct bifrost_fb_extra *fbx,
         struct bifrost_render_target *rts,
-        unsigned cbufs)
+        unsigned rt_count)
 {
         off_t offset = 0;
 
@@ -347,10 +324,10 @@ panfrost_mfbd_upload(
         size_t total_sz =
                 sizeof(struct bifrost_framebuffer) +
                 (has_extra ? sizeof(struct bifrost_fb_extra) : 0) +
-                sizeof(struct bifrost_render_target) * cbufs;
+                sizeof(struct bifrost_render_target) * 4;
 
         struct panfrost_transfer m_f_trans =
-                panfrost_allocate_transient(ctx, total_sz);
+                panfrost_allocate_transient(batch, total_sz);
 
         /* Do the transfer */
 
@@ -359,61 +336,116 @@ panfrost_mfbd_upload(
         if (has_extra)
                 UPLOAD(m_f_trans, offset, fbx, total_sz);
 
-        for (unsigned c = 0; c < cbufs; ++c) {
+        for (unsigned c = 0; c < 4; ++c) {
                 UPLOAD(m_f_trans, offset, &rts[c], total_sz);
         }
 
         /* Return pointer suitable for the fragment section */
-        return m_f_trans.gpu | MALI_MFBD | (has_extra ? 2 : 0);
+        unsigned tag =
+                MALI_MFBD |
+                (has_extra ? MALI_MFBD_TAG_EXTRA : 0) |
+                (MALI_POSITIVE(rt_count) << 2);
+
+        return m_f_trans.gpu | tag;
 }
 
 #undef UPLOAD
 
+static struct bifrost_framebuffer
+panfrost_emit_mfbd(struct panfrost_batch *batch, unsigned vertex_count)
+{
+        struct panfrost_context *ctx = batch->ctx;
+        struct pipe_context *gallium = (struct pipe_context *) ctx;
+        struct panfrost_screen *screen = pan_screen(gallium->screen);
+
+        unsigned width = batch->key.width;
+        unsigned height = batch->key.height;
+
+        unsigned shift = panfrost_get_stack_shift(batch->stack_size);
+
+        struct bifrost_framebuffer framebuffer = {
+                .width1 = MALI_POSITIVE(width),
+                .height1 = MALI_POSITIVE(height),
+                .width2 = MALI_POSITIVE(width),
+                .height2 = MALI_POSITIVE(height),
+
+                .unk1 = 0x1080,
+
+                .rt_count_1 = MALI_POSITIVE(batch->key.nr_cbufs),
+                .rt_count_2 = 4,
+
+                .unknown2 = 0x1f,
+                .tiler = panfrost_emit_midg_tiler(batch, vertex_count),
+                
+                .stack_shift = shift,
+                .unk0 = 0x1e,
+                .scratchpad = panfrost_batch_get_scratchpad(batch, shift, screen->thread_tls_alloc, screen->core_count)->gpu
+        };
+
+        return framebuffer;
+}
+
+void
+panfrost_attach_mfbd(struct panfrost_batch *batch, unsigned vertex_count)
+{
+        struct bifrost_framebuffer mfbd =
+                panfrost_emit_mfbd(batch, vertex_count);
+
+        memcpy(batch->framebuffer.cpu, &mfbd, sizeof(mfbd));
+}
+
 /* Creates an MFBD for the FRAGMENT section of the bound framebuffer */
 
 mali_ptr
-panfrost_mfbd_fragment(struct panfrost_context *ctx, bool has_draws)
+panfrost_mfbd_fragment(struct panfrost_batch *batch, bool has_draws)
 {
-        struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
+        struct bifrost_framebuffer fb = panfrost_emit_mfbd(batch, has_draws);
+        struct bifrost_fb_extra fbx = {0};
+        struct bifrost_render_target rts[4] = {0};
+
+        /* We always upload at least one dummy GL_NONE render target */
 
-        struct bifrost_framebuffer fb = panfrost_emit_mfbd(ctx, has_draws);
-        struct bifrost_fb_extra fbx = {};
-        struct bifrost_render_target rts[4] = {};
+        unsigned rt_descriptors = MAX2(batch->key.nr_cbufs, 1);
 
-        /* XXX: MRT case */
-        fb.rt_count_2 = 1;
+        fb.rt_count_1 = MALI_POSITIVE(rt_descriptors);
+        fb.rt_count_2 = rt_descriptors;
         fb.mfbd_flags = 0x100;
 
         /* TODO: MRT clear */
-        panfrost_mfbd_clear(job, &fb, &fbx, rts, fb.rt_count_2);
+        panfrost_mfbd_clear(batch, &fb, &fbx, rts, fb.rt_count_2);
 
-        for (int cb = 0; cb < ctx->pipe_framebuffer.nr_cbufs; ++cb) {
-                struct pipe_surface *surf = ctx->pipe_framebuffer.cbufs[cb];
-                unsigned bpp = util_format_get_blocksize(surf->format);
 
-                panfrost_mfbd_set_cbuf(&rts[cb], surf);
+        /* Upload either the render target or a dummy GL_NONE target */
 
-                /* What is this? Looks like some extension of the bpp field.
-                 * Maybe it establishes how much internal tilebuffer space is
-                 * reserved? */
-                fb.rt_count_2 = MAX2(fb.rt_count_2, ALIGN_POT(bpp, 4) / 4);
-        }
+        for (int cb = 0; cb < rt_descriptors; ++cb) {
+                struct pipe_surface *surf = batch->key.cbufs[cb];
 
-        if (ctx->pipe_framebuffer.zsbuf) {
-                panfrost_mfbd_set_zsbuf(&fb, &fbx, ctx->pipe_framebuffer.zsbuf);
-        }
+                if (surf) {
+                        panfrost_mfbd_set_cbuf(&rts[cb], surf);
 
-        /* For the special case of a depth-only FBO, we need to attach a dummy render target */
+                        /* What is this? Looks like some extension of the bpp
+                         * field. Maybe it establishes how much internal
+                         * tilebuffer space is reserved? */
 
-        if (ctx->pipe_framebuffer.nr_cbufs == 0) {
-                struct mali_rt_format null_rt = {
-                        .unk1 = 0x4000000,
-                        .unk4 = 0x8
-                };
+                        unsigned bpp = util_format_get_blocksize(surf->format);
+                        fb.rt_count_2 = MAX2(fb.rt_count_2, ALIGN_POT(bpp, 4) / 4);
+                } else {
+                        struct mali_rt_format null_rt = {
+                                .unk1 = 0x4000000,
+                                .no_preload = true
+                        };
 
-                rts[0].format = null_rt;
-                rts[0].framebuffer = 0;
-                rts[0].framebuffer_stride = 0;
+                        rts[cb].format = null_rt;
+                        rts[cb].framebuffer = 0;
+                        rts[cb].framebuffer_stride = 0;
+                }
+
+                /* TODO: Break out the field */
+                rts[cb].format.unk1 |= (cb * 0x400);
+        }
+
+        if (batch->key.zsbuf) {
+                panfrost_mfbd_set_zsbuf(&fb, &fbx, batch->key.zsbuf);
         }
 
         /* When scanning out, the depth buffer is immediately invalidated, so
@@ -424,13 +456,12 @@ panfrost_mfbd_fragment(struct panfrost_context *ctx, bool has_draws)
          * The exception is ReadPixels, but this is not supported on GLES so we
          * can safely ignore it. */
 
-        if (panfrost_is_scanout(ctx)) {
-                job->requirements &= ~PAN_REQ_DEPTH_WRITE;
-        }
+        if (panfrost_batch_is_scanout(batch))
+                batch->requirements &= ~PAN_REQ_DEPTH_WRITE;
 
         /* Actualize the requirements */
 
-        if (job->requirements & PAN_REQ_MSAA) {
+        if (batch->requirements & PAN_REQ_MSAA) {
                 rts[0].format.flags |= MALI_MFBD_FORMAT_MSAA;
 
                 /* XXX */
@@ -438,13 +469,13 @@ panfrost_mfbd_fragment(struct panfrost_context *ctx, bool has_draws)
                 fb.rt_count_2 = 4;
         }
 
-        if (job->requirements & PAN_REQ_DEPTH_WRITE)
+        if (batch->requirements & PAN_REQ_DEPTH_WRITE)
                 fb.mfbd_flags |= MALI_MFBD_DEPTH_WRITE;
 
         /* Checksumming only works with a single render target */
 
-        if (ctx->pipe_framebuffer.nr_cbufs == 1) {
-                struct pipe_surface *surf = ctx->pipe_framebuffer.cbufs[0];
+        if (batch->key.nr_cbufs == 1) {
+                struct pipe_surface *surf = batch->key.cbufs[0];
                 struct panfrost_resource *rsrc = pan_resource(surf->texture);
                 struct panfrost_bo *bo = rsrc->bo;
 
@@ -459,8 +490,5 @@ panfrost_mfbd_fragment(struct panfrost_context *ctx, bool has_draws)
                 }
         }
 
-        /* We always upload at least one (dummy) cbuf */
-        unsigned cbufs = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
-
-        return panfrost_mfbd_upload(ctx, &fb, &fbx, rts, cbufs);
+        return panfrost_mfbd_upload(batch, &fb, &fbx, rts, rt_descriptors);
 }