Added few more stubs so that control reaches to DestroyDevice().

[mesa.git] / src / gallium / drivers / panfrost / pan_mfbd.c
diff --git a/src/gallium/drivers/panfrost/pan_mfbd.c b/src/gallium/drivers/panfrost/pan_mfbd.c

index 463e91904008a35e94139b8dc744cc39d731c5d3..deccd3a42397e0d69cb531cb84ad09d7dfc70c04 100644 (file)
--- a/src/gallium/drivers/panfrost/pan_mfbd.c
+++ b/src/gallium/drivers/panfrost/pan_mfbd.c
@@ -22,34 +22,11 @@
   *
   */
  
   *
   */
  
+#include "pan_bo.h"
  #include "pan_context.h"
  #include "pan_context.h"
+#include "pan_cmdstream.h"
  #include "pan_util.h"
  #include "pan_util.h"
-#include "pan_format.h"
-
-#include "util/u_format.h"
-
-static void
-panfrost_invert_swizzle(const unsigned char *in, unsigned char *out)
-{
-        /* First, default to all zeroes to prevent uninitialized junk */
-
-        for (unsigned c = 0; c < 4; ++c)
-                out[c] = PIPE_SWIZZLE_0;
-
-        /* Now "do" what the swizzle says */
-
-        for (unsigned c = 0; c < 4; ++c) {
-                unsigned char i = in[c];
-
-                /* Who cares? */
-                if (i < PIPE_SWIZZLE_X || i > PIPE_SWIZZLE_W)
-                        continue;
-
-                /* Invert */
-                unsigned idx = i - PIPE_SWIZZLE_X;
-                out[idx] = PIPE_SWIZZLE_X + c;
-        }
-}
+#include "panfrost-quirks.h"
  
  static struct mali_rt_format
  panfrost_mfbd_format(struct pipe_surface *surf)
  
  static struct mali_rt_format
  panfrost_mfbd_format(struct pipe_surface *surf)
@@ -71,9 +48,9 @@ panfrost_mfbd_format(struct pipe_surface *surf)
                  .unk2 = 0x1,
                  .nr_channels = MALI_POSITIVE(desc->nr_channels),
                  .unk3 = 0x4,
                  .unk2 = 0x1,
                  .nr_channels = MALI_POSITIVE(desc->nr_channels),
                  .unk3 = 0x4,
-                .flags = 0x8,
+                .flags = 0x2,
                  .swizzle = panfrost_translate_swizzle_4(swizzle),
                  .swizzle = panfrost_translate_swizzle_4(swizzle),
-                .unk4 = 0x8
+                .no_preload = true
          };
  
          if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
          };
  
          if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
@@ -97,6 +74,7 @@ panfrost_mfbd_format(struct pipe_surface *surf)
  
          case PIPE_FORMAT_A4B4G4R4_UNORM:
          case PIPE_FORMAT_B4G4R4A4_UNORM:
  
          case PIPE_FORMAT_A4B4G4R4_UNORM:
          case PIPE_FORMAT_B4G4R4A4_UNORM:
+        case PIPE_FORMAT_R4G4B4A4_UNORM:
                  fmt.unk1 = 0x10000000;
                  fmt.unk3 = 0x5;
                  fmt.nr_channels = MALI_POSITIVE(1);
                  fmt.unk1 = 0x10000000;
                  fmt.unk3 = 0x5;
                  fmt.nr_channels = MALI_POSITIVE(1);
@@ -111,6 +89,14 @@ panfrost_mfbd_format(struct pipe_surface *surf)
                  fmt.nr_channels = MALI_POSITIVE(1);
                  break;
  
                  fmt.nr_channels = MALI_POSITIVE(1);
                  break;
  
+        case PIPE_FORMAT_B5G5R5A1_UNORM:
+        case PIPE_FORMAT_R5G5B5A1_UNORM:
+        case PIPE_FORMAT_B5G5R5X1_UNORM:
+                fmt.unk1 = 0x18000000;
+                fmt.unk3 = 0x7;
+                fmt.nr_channels = MALI_POSITIVE(2);
+                break;
+
          /* Generic 8-bit */
          case PIPE_FORMAT_R8_UINT:
          case PIPE_FORMAT_R8_SINT:
          /* Generic 8-bit */
          case PIPE_FORMAT_R8_UINT:
          case PIPE_FORMAT_R8_SINT:
@@ -141,7 +127,6 @@ panfrost_mfbd_format(struct pipe_surface *surf)
          case PIPE_FORMAT_R16_FLOAT:
          case PIPE_FORMAT_R16_UINT:
          case PIPE_FORMAT_R16_SINT:
          case PIPE_FORMAT_R16_FLOAT:
          case PIPE_FORMAT_R16_UINT:
          case PIPE_FORMAT_R16_SINT:
-        case PIPE_FORMAT_B5G5R5A1_UNORM:
                  fmt.unk1 = 0x84000000;
                  fmt.unk3 = 0x0;
                  fmt.nr_channels = MALI_POSITIVE(2);
                  fmt.unk1 = 0x84000000;
                  fmt.unk3 = 0x0;
                  fmt.nr_channels = MALI_POSITIVE(2);
@@ -178,125 +163,206 @@ panfrost_mfbd_format(struct pipe_surface *surf)
  
  static void
  panfrost_mfbd_clear(
  
  static void
  panfrost_mfbd_clear(
-        struct panfrost_job *job,
-        struct bifrost_framebuffer *fb,
-        struct bifrost_fb_extra *fbx,
-        struct bifrost_render_target *rts,
+        struct panfrost_batch *batch,
+        struct mali_framebuffer *fb,
+        struct mali_framebuffer_extra *fbx,
+        struct mali_render_target *rts,
          unsigned rt_count)
  {
          unsigned rt_count)
  {
+        struct panfrost_context *ctx = batch->ctx;
+        struct pipe_context *gallium = (struct pipe_context *) ctx;
+        struct panfrost_device *dev = pan_device(gallium->screen);
+
          for (unsigned i = 0; i < rt_count; ++i) {
          for (unsigned i = 0; i < rt_count; ++i) {
-                if (!(job->clear & (PIPE_CLEAR_COLOR0 << i)))
+                if (!(batch->clear & (PIPE_CLEAR_COLOR0 << i)))
                          continue;
  
                          continue;
  
-                rts[i].clear_color_1 = job->clear_color[i][0];
-                rts[i].clear_color_2 = job->clear_color[i][1];
-                rts[i].clear_color_3 = job->clear_color[i][2];
-                rts[i].clear_color_4 = job->clear_color[i][3];
+                rts[i].clear_color_1 = batch->clear_color[i][0];
+                rts[i].clear_color_2 = batch->clear_color[i][1];
+                rts[i].clear_color_3 = batch->clear_color[i][2];
+                rts[i].clear_color_4 = batch->clear_color[i][3];
+        }
+
+        if (batch->clear & PIPE_CLEAR_DEPTH) {
+                fb->clear_depth = batch->clear_depth;
          }
  
          }
  
-        if (job->clear & PIPE_CLEAR_DEPTH) {
-                fb->clear_depth = job->clear_depth;
+        if (batch->clear & PIPE_CLEAR_STENCIL) {
+                fb->clear_stencil = batch->clear_stencil;
          }
  
          }
  
-        if (job->clear & PIPE_CLEAR_STENCIL) {
-                fb->clear_stencil = job->clear_stencil;
+        if (dev->quirks & IS_BIFROST) {
+                fbx->clear_color_1 = batch->clear_color[0][0];
+                fbx->clear_color_2 = 0xc0000000 | (fbx->clear_color_1 & 0xffff); /* WTF? */
          }
  }
  
  static void
  panfrost_mfbd_set_cbuf(
          }
  }
  
  static void
  panfrost_mfbd_set_cbuf(
-        struct bifrost_render_target *rt,
+        struct mali_render_target *rt,
          struct pipe_surface *surf)
  {
          struct panfrost_resource *rsrc = pan_resource(surf->texture);
          struct pipe_surface *surf)
  {
          struct panfrost_resource *rsrc = pan_resource(surf->texture);
+        struct panfrost_device *dev = pan_device(surf->context->screen);
+        bool is_bifrost = dev->quirks & IS_BIFROST;
  
          unsigned level = surf->u.tex.level;
          unsigned first_layer = surf->u.tex.first_layer;
          assert(surf->u.tex.last_layer == first_layer);
          int stride = rsrc->slices[level].stride;
  
  
          unsigned level = surf->u.tex.level;
          unsigned first_layer = surf->u.tex.first_layer;
          assert(surf->u.tex.last_layer == first_layer);
          int stride = rsrc->slices[level].stride;
  
-        mali_ptr base = panfrost_get_texture_address(rsrc, level, first_layer);
+        /* Only set layer_stride for layered MSAA rendering  */
+
+        unsigned nr_samples = surf->texture->nr_samples;
+        unsigned layer_stride = (nr_samples > 1) ? rsrc->slices[level].size0 : 0;
+
+        mali_ptr base = panfrost_get_texture_address(rsrc, level, first_layer, 0);
  
          rt->format = panfrost_mfbd_format(surf);
  
  
          rt->format = panfrost_mfbd_format(surf);
  
-        /* Now, we set the layout specific pieces */
+        if (layer_stride)
+                rt->format.msaa = MALI_MSAA_LAYERED;
+        else if (surf->nr_samples)
+                rt->format.msaa = MALI_MSAA_AVERAGE;
+        else
+                rt->format.msaa = MALI_MSAA_SINGLE;
+
+        /* Now, we set the modifier specific pieces */
+
+        if (rsrc->modifier == DRM_FORMAT_MOD_LINEAR) {
+                if (is_bifrost) {
+                        rt->format.unk4 = 0x1;
+                } else {
+                        rt->format.block = MALI_BLOCK_FORMAT_LINEAR;
+                }
  
  
-        if (rsrc->layout == PAN_LINEAR) {
-                rt->format.block = MALI_MFBD_BLOCK_LINEAR;
                  rt->framebuffer = base;
                  rt->framebuffer_stride = stride / 16;
                  rt->framebuffer = base;
                  rt->framebuffer_stride = stride / 16;
-        } else if (rsrc->layout == PAN_TILED) {
-                rt->format.block = MALI_MFBD_BLOCK_TILED;
+                rt->layer_stride = layer_stride;
+        } else if (rsrc->modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED) {
+                if (is_bifrost) {
+                        rt->format.unk3 |= 0x8;
+                } else {
+                        rt->format.block = MALI_BLOCK_FORMAT_TILED;
+                }
+
                  rt->framebuffer = base;
                  rt->framebuffer_stride = stride;
                  rt->framebuffer = base;
                  rt->framebuffer_stride = stride;
-        } else if (rsrc->layout == PAN_AFBC) {
-                rt->format.block = MALI_MFBD_BLOCK_AFBC;
+                rt->layer_stride = layer_stride;
+        } else if (drm_is_afbc(rsrc->modifier)) {
+                rt->format.block = MALI_BLOCK_FORMAT_AFBC;
  
                  unsigned header_size = rsrc->slices[level].header_size;
  
                  rt->framebuffer = base + header_size;
  
                  unsigned header_size = rsrc->slices[level].header_size;
  
                  rt->framebuffer = base + header_size;
+                rt->layer_stride = layer_stride;
                  rt->afbc.metadata = base;
                  rt->afbc.stride = 0;
                  rt->afbc.metadata = base;
                  rt->afbc.stride = 0;
-                rt->afbc.unk = 0x30009;
+                rt->afbc.flags = MALI_AFBC_FLAGS;
+
+                if (rsrc->modifier & AFBC_FORMAT_MOD_YTR)
+                        rt->afbc.flags |= MALI_AFBC_YTR;
  
  
-                /* TODO: Investigate shift */
-                rt->framebuffer_stride = stride << 1;
+                /* TODO: The blob sets this to something nonzero, but it's not
+                 * clear what/how to calculate/if it matters */
+                rt->framebuffer_stride = 0;
          } else {
          } else {
-                fprintf(stderr, "Invalid render layout (cbuf)");
-                assert(0);
+                unreachable("Invalid mod");
          }
  }
  
  static void
  panfrost_mfbd_set_zsbuf(
          }
  }
  
  static void
  panfrost_mfbd_set_zsbuf(
-        struct bifrost_framebuffer *fb,
-        struct bifrost_fb_extra *fbx,
+        struct mali_framebuffer *fb,
+        struct mali_framebuffer_extra *fbx,
          struct pipe_surface *surf)
  {
          struct pipe_surface *surf)
  {
+        struct panfrost_device *dev = pan_device(surf->context->screen);
+        bool is_bifrost = dev->quirks & IS_BIFROST;
          struct panfrost_resource *rsrc = pan_resource(surf->texture);
          struct panfrost_resource *rsrc = pan_resource(surf->texture);
-        enum pipe_format format = surf->format;
+
+        unsigned nr_samples = surf->texture->nr_samples;
+        nr_samples = MAX2(nr_samples, 1);
+
+        fbx->zs_samples = MALI_POSITIVE(nr_samples);
  
          unsigned level = surf->u.tex.level;
  
          unsigned level = surf->u.tex.level;
-        assert(surf->u.tex.first_layer == 0);
+        unsigned first_layer = surf->u.tex.first_layer;
+        assert(surf->u.tex.last_layer == first_layer);
  
  
-        unsigned offset = rsrc->slices[level].offset;
+        mali_ptr base = panfrost_get_texture_address(rsrc, level, first_layer, 0);
  
  
-        if (rsrc->layout == PAN_AFBC) {
+        if (drm_is_afbc(rsrc->modifier)) {
                  /* The only Z/S format we can compress is Z24S8 or variants
                  /* The only Z/S format we can compress is Z24S8 or variants
-                 * thereof (handled by the state tracker) */
-                assert(format == PIPE_FORMAT_Z24_UNORM_S8_UINT);
+                 * thereof (handled by the gallium frontend) */
+                assert(panfrost_is_z24s8_variant(surf->format));
  
  
-                mali_ptr base = rsrc->bo->gpu + offset;
                  unsigned header_size = rsrc->slices[level].header_size;
  
                  unsigned header_size = rsrc->slices[level].header_size;
  
-                fb->mfbd_flags |= MALI_MFBD_EXTRA;
+                fb->mfbd_flags |= MALI_MFBD_EXTRA | MALI_MFBD_DEPTH_WRITE;
  
  
-                fbx->flags =
-                        MALI_EXTRA_PRESENT |
-                        MALI_EXTRA_AFBC |
-                        MALI_EXTRA_AFBC_ZS |
-                        MALI_EXTRA_ZS |
-                        0x1; /* unknown */
+                fbx->flags_hi |= MALI_EXTRA_PRESENT;
+                fbx->flags_lo |= MALI_EXTRA_ZS | 0x1; /* unknown */
+                fbx->zs_block = MALI_BLOCK_FORMAT_AFBC;
  
                  fbx->ds_afbc.depth_stencil = base + header_size;
                  fbx->ds_afbc.depth_stencil_afbc_metadata = base;
                  fbx->ds_afbc.depth_stencil_afbc_stride = 0;
  
  
                  fbx->ds_afbc.depth_stencil = base + header_size;
                  fbx->ds_afbc.depth_stencil_afbc_metadata = base;
                  fbx->ds_afbc.depth_stencil_afbc_stride = 0;
  
-                fbx->ds_afbc.zero1 = 0x10009;
+                fbx->ds_afbc.flags = MALI_AFBC_FLAGS;
                  fbx->ds_afbc.padding = 0x1000;
                  fbx->ds_afbc.padding = 0x1000;
-        } else if (rsrc->layout == PAN_LINEAR) {
+        } else {
+                assert(rsrc->modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED || rsrc->modifier == DRM_FORMAT_MOD_LINEAR);
                  /* TODO: Z32F(S8) support, which is always linear */
  
                  /* TODO: Z32F(S8) support, which is always linear */
  
-                assert(format == PIPE_FORMAT_Z24_UNORM_S8_UINT);
                  int stride = rsrc->slices[level].stride;
                  int stride = rsrc->slices[level].stride;
-                fb->mfbd_flags |= MALI_MFBD_EXTRA;
  
  
-                fbx->flags |= MALI_EXTRA_PRESENT | MALI_EXTRA_ZS | 0x1;
+                unsigned layer_stride = (nr_samples > 1) ? rsrc->slices[level].size0 : 0;
+
+                fb->mfbd_flags |= MALI_MFBD_EXTRA | MALI_MFBD_DEPTH_WRITE;
+                fbx->flags_hi |= MALI_EXTRA_PRESENT;
+                fbx->flags_lo |= MALI_EXTRA_ZS;
+
+                fbx->ds_linear.depth = base;
+
+                if (rsrc->modifier == DRM_FORMAT_MOD_LINEAR) {
+                        fbx->zs_block = MALI_BLOCK_FORMAT_LINEAR;
+                        fbx->ds_linear.depth_stride = stride / 16;
+                        fbx->ds_linear.depth_layer_stride = layer_stride;
+                } else {
+                        if (is_bifrost) {
+                                /* XXX: Bifrost fields are different here */
+                                fbx->zs_block = 1;
+                                fbx->flags_hi |= 0x440;
+                                fbx->flags_lo |= 0x1;
+                        } else {
+                                fbx->zs_block = MALI_BLOCK_FORMAT_TILED;
+                        }
+
+                        fbx->ds_linear.depth_stride = stride;
+                        fbx->ds_linear.depth_layer_stride = layer_stride;
+                }
  
  
-                fbx->ds_linear.depth = rsrc->bo->gpu + offset;
-                fbx->ds_linear.depth_stride = stride;
-        } else {
-                assert(0);
+                if (panfrost_is_z24s8_variant(surf->format)) {
+                        fbx->flags_lo |= 0x1;
+                } else if (surf->format == PIPE_FORMAT_Z32_FLOAT) {
+                        fbx->flags_lo |= 0xA;
+                        fb->mfbd_flags ^= 0x100;
+                        fb->mfbd_flags |= 0x200;
+                } else if (surf->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
+                        fbx->flags_hi |= 0x40;
+                        fbx->flags_lo |= 0xA;
+                        fb->mfbd_flags ^= 0x100;
+                        fb->mfbd_flags |= 0x201;
+
+                        struct panfrost_resource *stencil = rsrc->separate_stencil;
+                        struct panfrost_slice stencil_slice = stencil->slices[level];
+                        unsigned stencil_layer_stride = (nr_samples > 1) ? stencil_slice.size0 : 0;
+
+                        fbx->ds_linear.stencil = panfrost_get_texture_address(stencil, level, first_layer, 0);
+                        fbx->ds_linear.stencil_stride = stencil_slice.stride;
+                        fbx->ds_linear.stencil_layer_stride = stencil_layer_stride;
+                }
          }
  }
  
          }
  }
  
@@ -310,12 +376,11 @@ panfrost_mfbd_set_zsbuf(
  }
  
  static mali_ptr
  }
  
  static mali_ptr
-panfrost_mfbd_upload(
-        struct panfrost_context *ctx,
-        struct bifrost_framebuffer *fb,
-        struct bifrost_fb_extra *fbx,
-        struct bifrost_render_target *rts,
-        unsigned cbufs)
+panfrost_mfbd_upload(struct panfrost_batch *batch,
+        struct mali_framebuffer *fb,
+        struct mali_framebuffer_extra *fbx,
+        struct mali_render_target *rts,
+        unsigned rt_count)
  {
          off_t offset = 0;
  
  {
          off_t offset = 0;
  
@@ -325,12 +390,12 @@ panfrost_mfbd_upload(
          /* Compute total size for transfer */
  
          size_t total_sz =
          /* Compute total size for transfer */
  
          size_t total_sz =
-                sizeof(struct bifrost_framebuffer) +
-                (has_extra ? sizeof(struct bifrost_fb_extra) : 0) +
-                sizeof(struct bifrost_render_target) * cbufs;
+                sizeof(struct mali_framebuffer) +
+                (has_extra ? sizeof(struct mali_framebuffer_extra) : 0) +
+                sizeof(struct mali_render_target) * 8;
  
          struct panfrost_transfer m_f_trans =
  
          struct panfrost_transfer m_f_trans =
-                panfrost_allocate_transient(ctx, total_sz);
+                panfrost_pool_alloc_aligned(&batch->pool, total_sz, 64);
  
          /* Do the transfer */
  
  
          /* Do the transfer */
  
@@ -339,108 +404,234 @@ panfrost_mfbd_upload(
          if (has_extra)
                  UPLOAD(m_f_trans, offset, fbx, total_sz);
  
          if (has_extra)
                  UPLOAD(m_f_trans, offset, fbx, total_sz);
  
-        for (unsigned c = 0; c < cbufs; ++c) {
+        for (unsigned c = 0; c < 8; ++c) {
                  UPLOAD(m_f_trans, offset, &rts[c], total_sz);
          }
  
          /* Return pointer suitable for the fragment section */
                  UPLOAD(m_f_trans, offset, &rts[c], total_sz);
          }
  
          /* Return pointer suitable for the fragment section */
-        return m_f_trans.gpu | MALI_MFBD | (has_extra ? 2 : 0);
+        unsigned tag =
+                MALI_MFBD |
+                (has_extra ? MALI_MFBD_TAG_EXTRA : 0) |
+                (MALI_POSITIVE(rt_count) << 2);
+
+        return m_f_trans.gpu | tag;
  }
  
  #undef UPLOAD
  
  }
  
  #undef UPLOAD
  
+/* Determines the # of bytes per pixel we need to reserve for a given format in
+ * the tilebuffer (compared to 128-bit budget, etc). Usually the same as the
+ * bytes per pixel of the format itself, but there are some special cases I
+ * don't understand. */
+
+static unsigned
+pan_bytes_per_pixel_tib(enum pipe_format format)
+{
+        const struct util_format_description *desc =
+                util_format_description(format);
+
+        if (util_format_is_unorm8(desc) || format == PIPE_FORMAT_B5G6R5_UNORM)
+                return 4;
+
+        return desc->block.bits / 8;
+}
+
+/* Determines whether a framebuffer uses too much tilebuffer space (requiring
+ * us to scale up the tile at a performance penalty). This is conservative but
+ * afaict you get 128-bits per pixel normally */
+
+static unsigned
+pan_tib_size(struct panfrost_batch *batch)
+{
+        unsigned size = 0;
+
+        for (int cb = 0; cb < batch->key.nr_cbufs; ++cb) {
+                struct pipe_surface *surf = batch->key.cbufs[cb];
+                assert(surf);
+                size += pan_bytes_per_pixel_tib(surf->format);
+        }
+
+        return size;
+}
+
+static unsigned
+pan_tib_shift(struct panfrost_batch *batch)
+{
+        unsigned size = pan_tib_size(batch);
+
+        if (size > 128)
+                return 4;
+        else if (size > 64)
+                return 5;
+        else if (size > 32)
+                return 6;
+        else if (size > 16)
+                return 7;
+        else
+                return 8;
+}
+
+static struct mali_framebuffer
+panfrost_emit_mfbd(struct panfrost_batch *batch, unsigned vertex_count)
+{
+        struct panfrost_context *ctx = batch->ctx;
+        struct pipe_context *gallium = (struct pipe_context *) ctx;
+        struct panfrost_device *dev = pan_device(gallium->screen);
+
+        unsigned width = batch->key.width;
+        unsigned height = batch->key.height;
+
+        struct mali_framebuffer mfbd = {
+                .width1 = MALI_POSITIVE(width),
+                .height1 = MALI_POSITIVE(height),
+                .width2 = MALI_POSITIVE(width),
+                .height2 = MALI_POSITIVE(height),
+
+                /* Configures tib size */
+                .unk1 = (pan_tib_shift(batch) << 9) | 0x80,
+
+                .rt_count_1 = MALI_POSITIVE(MAX2(batch->key.nr_cbufs, 1)),
+                .rt_count_2 = 4,
+        };
+
+        if (dev->quirks & IS_BIFROST) {
+                mfbd.msaa.sample_locations = panfrost_emit_sample_locations(batch);
+                mfbd.tiler_meta = panfrost_batch_get_tiler_meta(batch, vertex_count);
+        } else {
+                if (batch->stack_size) {
+                        unsigned shift = panfrost_get_stack_shift(batch->stack_size);
+                        struct panfrost_bo *bo = panfrost_batch_get_scratchpad(batch,
+                                                                               batch->stack_size,
+                                                                               dev->thread_tls_alloc,
+                                                                               dev->core_count);
+                        mfbd.shared_memory.stack_shift = shift;
+                        mfbd.shared_memory.scratchpad = bo->gpu;
+                }
+
+                mfbd.shared_memory.shared_workgroup_count = ~0;
+
+                mfbd.tiler = panfrost_emit_midg_tiler(batch, vertex_count);
+        }
+
+        return mfbd;
+}
+
+void
+panfrost_attach_mfbd(struct panfrost_batch *batch, unsigned vertex_count)
+{
+        struct mali_framebuffer mfbd =
+                panfrost_emit_mfbd(batch, vertex_count);
+
+        memcpy(batch->framebuffer.cpu, &mfbd, sizeof(mfbd));
+}
+
  /* Creates an MFBD for the FRAGMENT section of the bound framebuffer */
  
  mali_ptr
  /* Creates an MFBD for the FRAGMENT section of the bound framebuffer */
  
  mali_ptr
-panfrost_mfbd_fragment(struct panfrost_context *ctx, bool has_draws)
+panfrost_mfbd_fragment(struct panfrost_batch *batch, bool has_draws)
  {
  {
-        struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
+        struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
+        bool is_bifrost = dev->quirks & IS_BIFROST;
+
+        struct mali_framebuffer fb = panfrost_emit_mfbd(batch, has_draws);
+        struct mali_framebuffer_extra fbx = {0};
+        struct mali_render_target rts[8] = {0};
+
+        /* We always upload at least one dummy GL_NONE render target */
  
  
-        struct bifrost_framebuffer fb = panfrost_emit_mfbd(ctx, has_draws);
-        struct bifrost_fb_extra fbx = {};
-        struct bifrost_render_target rts[4] = {};
+        unsigned rt_descriptors = MAX2(batch->key.nr_cbufs, 1);
  
  
-        /* XXX: MRT case */
-        fb.rt_count_2 = 1;
+        fb.rt_count_1 = MALI_POSITIVE(rt_descriptors);
          fb.mfbd_flags = 0x100;
  
          fb.mfbd_flags = 0x100;
  
-        /* TODO: MRT clear */
-        panfrost_mfbd_clear(job, &fb, &fbx, rts, fb.rt_count_2);
+        panfrost_mfbd_clear(batch, &fb, &fbx, rts, rt_descriptors);
  
  
-        for (int cb = 0; cb < ctx->pipe_framebuffer.nr_cbufs; ++cb) {
-                struct pipe_surface *surf = ctx->pipe_framebuffer.cbufs[cb];
-                unsigned bpp = util_format_get_blocksize(surf->format);
+        /* Upload either the render target or a dummy GL_NONE target */
  
  
-                panfrost_mfbd_set_cbuf(&rts[cb], surf);
+        unsigned offset = 0;
+        unsigned tib_shift = pan_tib_shift(batch);
  
  
-                /* What is this? Looks like some extension of the bpp field.
-                 * Maybe it establishes how much internal tilebuffer space is
-                 * reserved? */
-                fb.rt_count_2 = MAX2(fb.rt_count_2, ALIGN_POT(bpp, 4) / 4);
-        }
+        for (int cb = 0; cb < rt_descriptors; ++cb) {
+                struct pipe_surface *surf = batch->key.cbufs[cb];
+                unsigned rt_offset = offset << tib_shift;
+
+                if (surf && ((batch->clear | batch->draws) & (PIPE_CLEAR_COLOR0 << cb))) {
+                        if (MAX2(surf->nr_samples, surf->texture->nr_samples) > 1)
+                                batch->requirements |= PAN_REQ_MSAA;
+
+                        panfrost_mfbd_set_cbuf(&rts[cb], surf);
+
+                        offset += pan_bytes_per_pixel_tib(surf->format);
+                } else {
+                        struct mali_rt_format null_rt = {
+                                .unk1 = 0x4000000,
+                                .no_preload = true
+                        };
+
+                        if (is_bifrost) {
+                                null_rt.flags = 0x2;
+                                null_rt.unk3 = 0x8;
+                        }
  
  
-        if (ctx->pipe_framebuffer.zsbuf) {
-                panfrost_mfbd_set_zsbuf(&fb, &fbx, ctx->pipe_framebuffer.zsbuf);
+                        rts[cb].format = null_rt;
+                        rts[cb].framebuffer = 0;
+                        rts[cb].framebuffer_stride = 0;
+                }
+
+                /* TODO: Break out the field */
+                rts[cb].format.unk1 |= rt_offset;
          }
  
          }
  
-        /* For the special case of a depth-only FBO, we need to attach a dummy render target */
+        fb.rt_count_2 = MAX2(DIV_ROUND_UP(offset, 1 << (10 - tib_shift)), 1);
  
  
-        if (ctx->pipe_framebuffer.nr_cbufs == 0) {
-                struct mali_rt_format null_rt = {
-                        .unk1 = 0x4000000,
-                        .unk4 = 0x8
-                };
+        if (batch->key.zsbuf && ((batch->clear | batch->draws) & PIPE_CLEAR_DEPTHSTENCIL)) {
+                if (MAX2(batch->key.zsbuf->nr_samples, batch->key.zsbuf->nr_samples) > 1)
+                        batch->requirements |= PAN_REQ_MSAA;
  
  
-                rts[0].format = null_rt;
-                rts[0].framebuffer = 0;
-                rts[0].framebuffer_stride = 0;
+                panfrost_mfbd_set_zsbuf(&fb, &fbx, batch->key.zsbuf);
          }
  
          /* When scanning out, the depth buffer is immediately invalidated, so
           * we don't need to waste bandwidth writing it out. This can improve
          }
  
          /* When scanning out, the depth buffer is immediately invalidated, so
           * we don't need to waste bandwidth writing it out. This can improve
-         * performance substantially (Z32_UNORM 1080p @ 60fps is 475 MB/s of
+         * performance substantially (Z24X8_UNORM 1080p @ 60fps is 475 MB/s of
           * memory bandwidth!).
           *
           * The exception is ReadPixels, but this is not supported on GLES so we
           * can safely ignore it. */
  
           * memory bandwidth!).
           *
           * The exception is ReadPixels, but this is not supported on GLES so we
           * can safely ignore it. */
  
-        if (panfrost_is_scanout(ctx)) {
-                job->requirements &= ~PAN_REQ_DEPTH_WRITE;
-        }
+        if (panfrost_batch_is_scanout(batch))
+                batch->requirements &= ~PAN_REQ_DEPTH_WRITE;
  
          /* Actualize the requirements */
  
  
          /* Actualize the requirements */
  
-        if (job->requirements & PAN_REQ_MSAA) {
-                rts[0].format.flags |= MALI_MFBD_FORMAT_MSAA;
-
+        if (batch->requirements & PAN_REQ_MSAA) {
                  /* XXX */
                  fb.unk1 |= (1 << 4) | (1 << 1);
                  fb.rt_count_2 = 4;
          }
  
                  /* XXX */
                  fb.unk1 |= (1 << 4) | (1 << 1);
                  fb.rt_count_2 = 4;
          }
  
-        if (job->requirements & PAN_REQ_DEPTH_WRITE)
+        if (batch->requirements & PAN_REQ_DEPTH_WRITE)
                  fb.mfbd_flags |= MALI_MFBD_DEPTH_WRITE;
  
          /* Checksumming only works with a single render target */
  
                  fb.mfbd_flags |= MALI_MFBD_DEPTH_WRITE;
  
          /* Checksumming only works with a single render target */
  
-        if (ctx->pipe_framebuffer.nr_cbufs == 1) {
-                struct pipe_surface *surf = ctx->pipe_framebuffer.cbufs[0];
+        if (batch->key.nr_cbufs == 1) {
+                struct pipe_surface *surf = batch->key.cbufs[0];
                  struct panfrost_resource *rsrc = pan_resource(surf->texture);
                  struct panfrost_resource *rsrc = pan_resource(surf->texture);
-                struct panfrost_bo *bo = rsrc->bo;
  
                  if (rsrc->checksummed) {
                          unsigned level = surf->u.tex.level;
                          struct panfrost_slice *slice = &rsrc->slices[level];
  
                          fb.mfbd_flags |= MALI_MFBD_EXTRA;
  
                  if (rsrc->checksummed) {
                          unsigned level = surf->u.tex.level;
                          struct panfrost_slice *slice = &rsrc->slices[level];
  
                          fb.mfbd_flags |= MALI_MFBD_EXTRA;
-                        fbx.flags |= MALI_EXTRA_PRESENT;
+                        fbx.flags_hi |= MALI_EXTRA_PRESENT;
                          fbx.checksum_stride = slice->checksum_stride;
                          fbx.checksum_stride = slice->checksum_stride;
-                        fbx.checksum = bo->gpu + slice->checksum_offset;
+                        if (slice->checksum_bo)
+                                fbx.checksum = slice->checksum_bo->gpu;
+                        else
+                                fbx.checksum = rsrc->bo->gpu + slice->checksum_offset;
                  }
          }
  
                  }
          }
  
-        /* We always upload at least one (dummy) cbuf */
-        unsigned cbufs = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
-
-        return panfrost_mfbd_upload(ctx, &fb, &fbx, rts, cbufs);
+        return panfrost_mfbd_upload(batch, &fb, &fbx, rts, rt_descriptors);
  }
  }