panfrost: RGBA4 and RGB5_A1 framebuffer support
[mesa.git] / src / gallium / drivers / panfrost / pan_mfbd.c
index 0f2931a58b3adece33965d00db0f18a12b4e4f2d..41f21128198fe36624417877d772f43a8e87dea8 100644 (file)
 
 #include "pan_bo.h"
 #include "pan_context.h"
+#include "pan_cmdstream.h"
 #include "pan_util.h"
-#include "pan_format.h"
-
-#include "util/format/u_format.h"
+#include "panfrost-quirks.h"
 
 static struct mali_rt_format
 panfrost_mfbd_format(struct pipe_surface *surf)
@@ -75,6 +74,7 @@ panfrost_mfbd_format(struct pipe_surface *surf)
 
         case PIPE_FORMAT_A4B4G4R4_UNORM:
         case PIPE_FORMAT_B4G4R4A4_UNORM:
+        case PIPE_FORMAT_R4G4B4A4_UNORM:
                 fmt.unk1 = 0x10000000;
                 fmt.unk3 = 0x5;
                 fmt.nr_channels = MALI_POSITIVE(1);
@@ -89,6 +89,14 @@ panfrost_mfbd_format(struct pipe_surface *surf)
                 fmt.nr_channels = MALI_POSITIVE(1);
                 break;
 
+        case PIPE_FORMAT_B5G5R5A1_UNORM:
+        case PIPE_FORMAT_R5G5B5A1_UNORM:
+        case PIPE_FORMAT_B5G5R5X1_UNORM:
+                fmt.unk1 = 0x18000000;
+                fmt.unk3 = 0x7;
+                fmt.nr_channels = MALI_POSITIVE(2);
+                break;
+
         /* Generic 8-bit */
         case PIPE_FORMAT_R8_UINT:
         case PIPE_FORMAT_R8_SINT:
@@ -119,7 +127,6 @@ panfrost_mfbd_format(struct pipe_surface *surf)
         case PIPE_FORMAT_R16_FLOAT:
         case PIPE_FORMAT_R16_UINT:
         case PIPE_FORMAT_R16_SINT:
-        case PIPE_FORMAT_B5G5R5A1_UNORM:
                 fmt.unk1 = 0x84000000;
                 fmt.unk3 = 0x0;
                 fmt.nr_channels = MALI_POSITIVE(2);
@@ -157,11 +164,15 @@ panfrost_mfbd_format(struct pipe_surface *surf)
 static void
 panfrost_mfbd_clear(
         struct panfrost_batch *batch,
-        struct bifrost_framebuffer *fb,
-        struct bifrost_fb_extra *fbx,
-        struct bifrost_render_target *rts,
+        struct mali_framebuffer *fb,
+        struct mali_framebuffer_extra *fbx,
+        struct mali_render_target *rts,
         unsigned rt_count)
 {
+        struct panfrost_context *ctx = batch->ctx;
+        struct pipe_context *gallium = (struct pipe_context *) ctx;
+        struct panfrost_device *dev = pan_device(gallium->screen);
+
         for (unsigned i = 0; i < rt_count; ++i) {
                 if (!(batch->clear & (PIPE_CLEAR_COLOR0 << i)))
                         continue;
@@ -179,14 +190,21 @@ panfrost_mfbd_clear(
         if (batch->clear & PIPE_CLEAR_STENCIL) {
                 fb->clear_stencil = batch->clear_stencil;
         }
+
+        if (dev->quirks & IS_BIFROST) {
+                fbx->clear_color_1 = batch->clear_color[0][0];
+                fbx->clear_color_2 = 0xc0000000 | (fbx->clear_color_1 & 0xffff); /* WTF? */
+        }
 }
 
 static void
 panfrost_mfbd_set_cbuf(
-        struct bifrost_render_target *rt,
+        struct mali_render_target *rt,
         struct pipe_surface *surf)
 {
         struct panfrost_resource *rsrc = pan_resource(surf->texture);
+        struct panfrost_device *dev = pan_device(surf->context->screen);
+        bool is_bifrost = dev->quirks & IS_BIFROST;
 
         unsigned level = surf->u.tex.level;
         unsigned first_layer = surf->u.tex.first_layer;
@@ -199,15 +217,25 @@ panfrost_mfbd_set_cbuf(
 
         /* Now, we set the layout specific pieces */
 
-        if (rsrc->layout == PAN_LINEAR) {
-                rt->format.block = MALI_BLOCK_LINEAR;
+        if (rsrc->layout == MALI_TEXTURE_LINEAR) {
+                if (is_bifrost) {
+                        rt->format.unk4 = 0x1;
+                } else {
+                        rt->format.block = MALI_BLOCK_LINEAR;
+                }
+
                 rt->framebuffer = base;
                 rt->framebuffer_stride = stride / 16;
-        } else if (rsrc->layout == PAN_TILED) {
-                rt->format.block = MALI_BLOCK_TILED;
+        } else if (rsrc->layout == MALI_TEXTURE_TILED) {
+                if (is_bifrost) {
+                        rt->format.unk3 |= 0x8;
+                } else {
+                        rt->format.block = MALI_BLOCK_TILED;
+                }
+
                 rt->framebuffer = base;
                 rt->framebuffer_stride = stride;
-        } else if (rsrc->layout == PAN_AFBC) {
+        } else if (rsrc->layout == MALI_TEXTURE_AFBC) {
                 rt->format.block = MALI_BLOCK_AFBC;
 
                 unsigned header_size = rsrc->slices[level].header_size;
@@ -215,7 +243,13 @@ panfrost_mfbd_set_cbuf(
                 rt->framebuffer = base + header_size;
                 rt->afbc.metadata = base;
                 rt->afbc.stride = 0;
-                rt->afbc.unk = 0x30009;
+                rt->afbc.flags = MALI_AFBC_FLAGS;
+
+                unsigned components = util_format_get_nr_components(surf->format);
+
+                /* The "lossless colorspace transform" is lossy for R and RG formats */
+                if (components >= 3)
+                   rt->afbc.flags |= MALI_AFBC_YTR;
 
                 /* TODO: The blob sets this to something nonzero, but it's not
                  * clear what/how to calculate/if it matters */
@@ -228,68 +262,81 @@ panfrost_mfbd_set_cbuf(
 
 static void
 panfrost_mfbd_set_zsbuf(
-        struct bifrost_framebuffer *fb,
-        struct bifrost_fb_extra *fbx,
+        struct mali_framebuffer *fb,
+        struct mali_framebuffer_extra *fbx,
         struct pipe_surface *surf)
 {
+        struct panfrost_device *dev = pan_device(surf->context->screen);
+        bool is_bifrost = dev->quirks & IS_BIFROST;
         struct panfrost_resource *rsrc = pan_resource(surf->texture);
 
         unsigned level = surf->u.tex.level;
-        assert(surf->u.tex.first_layer == 0);
+        unsigned first_layer = surf->u.tex.first_layer;
+        assert(surf->u.tex.last_layer == first_layer);
 
-        unsigned offset = rsrc->slices[level].offset;
+        mali_ptr base = panfrost_get_texture_address(rsrc, level, first_layer);
 
-        if (rsrc->layout == PAN_AFBC) {
+        if (rsrc->layout == MALI_TEXTURE_AFBC) {
                 /* The only Z/S format we can compress is Z24S8 or variants
-                 * thereof (handled by the state tracker) */
+                 * thereof (handled by the gallium frontend) */
                 assert(panfrost_is_z24s8_variant(surf->format));
 
-                mali_ptr base = rsrc->bo->gpu + offset;
                 unsigned header_size = rsrc->slices[level].header_size;
 
                 fb->mfbd_flags |= MALI_MFBD_EXTRA;
 
-                fbx->flags =
-                        MALI_EXTRA_PRESENT |
-                        MALI_EXTRA_AFBC |
-                        MALI_EXTRA_AFBC_ZS |
-                        MALI_EXTRA_ZS |
-                        0x1; /* unknown */
+                fbx->flags_hi |= MALI_EXTRA_PRESENT;
+                fbx->flags_lo |= MALI_EXTRA_ZS | 0x1; /* unknown */
+                fbx->zs_block = MALI_BLOCK_AFBC;
 
                 fbx->ds_afbc.depth_stencil = base + header_size;
                 fbx->ds_afbc.depth_stencil_afbc_metadata = base;
                 fbx->ds_afbc.depth_stencil_afbc_stride = 0;
 
-                fbx->ds_afbc.zero1 = 0x10009;
+                fbx->ds_afbc.flags = MALI_AFBC_FLAGS;
                 fbx->ds_afbc.padding = 0x1000;
-        } else if (rsrc->layout == PAN_LINEAR) {
+        } else if (rsrc->layout == MALI_TEXTURE_LINEAR || rsrc->layout == MALI_TEXTURE_TILED) {
                 /* TODO: Z32F(S8) support, which is always linear */
 
                 int stride = rsrc->slices[level].stride;
 
                 fb->mfbd_flags |= MALI_MFBD_EXTRA;
-                fbx->flags |= MALI_EXTRA_PRESENT | MALI_EXTRA_ZS;
+                fbx->flags_hi |= MALI_EXTRA_PRESENT;
+                fbx->flags_lo |= MALI_EXTRA_ZS;
+
+                fbx->ds_linear.depth = base;
 
-                fbx->ds_linear.depth = rsrc->bo->gpu + offset;
-                fbx->ds_linear.depth_stride = stride;
+                if (rsrc->layout == MALI_TEXTURE_LINEAR) {
+                        fbx->zs_block = MALI_BLOCK_LINEAR;
+                        fbx->ds_linear.depth_stride = stride / 16;
+                } else {
+                        if (is_bifrost) {
+                                fbx->zs_block = MALI_BLOCK_UNKNOWN;
+                                fbx->flags_hi |= 0x4400;
+                                fbx->flags_lo |= 0x1;
+                        } else {
+                                fbx->zs_block = MALI_BLOCK_TILED;
+                        }
+
+                        fbx->ds_linear.depth_stride = stride;
+                }
 
                 if (panfrost_is_z24s8_variant(surf->format)) {
-                        fbx->flags |= 0x1;
-                } else if (surf->format == PIPE_FORMAT_Z32_UNORM) {
-                        /* default flags (0 in bottom place) */
+                        fbx->flags_lo |= 0x1;
                 } else if (surf->format == PIPE_FORMAT_Z32_FLOAT) {
-                        fbx->flags |= 0xA;
+                        fbx->flags_lo |= 0xA;
                         fb->mfbd_flags ^= 0x100;
                         fb->mfbd_flags |= 0x200;
                 } else if (surf->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
-                        fbx->flags |= 0x1000A;
+                        fbx->flags_hi |= 0x400;
+                        fbx->flags_lo |= 0xA;
                         fb->mfbd_flags ^= 0x100;
                         fb->mfbd_flags |= 0x201;
 
                         struct panfrost_resource *stencil = rsrc->separate_stencil;
                         struct panfrost_slice stencil_slice = stencil->slices[level];
 
-                        fbx->ds_linear.stencil = stencil->bo->gpu + stencil_slice.offset;
+                        fbx->ds_linear.stencil = panfrost_get_texture_address(stencil, level, first_layer);
                         fbx->ds_linear.stencil_stride = stencil_slice.stride;
                 }
 
@@ -309,9 +356,9 @@ panfrost_mfbd_set_zsbuf(
 
 static mali_ptr
 panfrost_mfbd_upload(struct panfrost_batch *batch,
-        struct bifrost_framebuffer *fb,
-        struct bifrost_fb_extra *fbx,
-        struct bifrost_render_target *rts,
+        struct mali_framebuffer *fb,
+        struct mali_framebuffer_extra *fbx,
+        struct mali_render_target *rts,
         unsigned rt_count)
 {
         off_t offset = 0;
@@ -322,9 +369,9 @@ panfrost_mfbd_upload(struct panfrost_batch *batch,
         /* Compute total size for transfer */
 
         size_t total_sz =
-                sizeof(struct bifrost_framebuffer) +
-                (has_extra ? sizeof(struct bifrost_fb_extra) : 0) +
-                sizeof(struct bifrost_render_target) * 4;
+                sizeof(struct mali_framebuffer) +
+                (has_extra ? sizeof(struct mali_framebuffer_extra) : 0) +
+                sizeof(struct mali_render_target) * 4;
 
         struct panfrost_transfer m_f_trans =
                 panfrost_allocate_transient(batch, total_sz);
@@ -351,44 +398,71 @@ panfrost_mfbd_upload(struct panfrost_batch *batch,
 
 #undef UPLOAD
 
-static struct bifrost_framebuffer
+/* Determines whether a framebuffer uses too much tilebuffer space (requiring
+ * us to scale up the tile at a performance penalty). This is conservative but
+ * afaict you get 128-bits per pixel normally */
+
+static bool
+pan_is_large_tib(struct panfrost_batch *batch)
+{
+        unsigned size = 0;
+
+        for (int cb = 0; cb < batch->key.nr_cbufs; ++cb) {
+                struct pipe_surface *surf = batch->key.cbufs[cb];
+                assert(surf);
+                unsigned bpp = util_format_get_blocksize(surf->format);
+                size += ALIGN_POT(bpp, 4);
+        }
+
+        return (size > 16);
+}
+
+static struct mali_framebuffer
 panfrost_emit_mfbd(struct panfrost_batch *batch, unsigned vertex_count)
 {
         struct panfrost_context *ctx = batch->ctx;
         struct pipe_context *gallium = (struct pipe_context *) ctx;
-        struct panfrost_screen *screen = pan_screen(gallium->screen);
+        struct panfrost_device *dev = pan_device(gallium->screen);
 
         unsigned width = batch->key.width;
         unsigned height = batch->key.height;
 
-        unsigned shift = panfrost_get_stack_shift(batch->stack_size);
-
-        struct bifrost_framebuffer framebuffer = {
+        struct mali_framebuffer mfbd = {
                 .width1 = MALI_POSITIVE(width),
                 .height1 = MALI_POSITIVE(height),
                 .width2 = MALI_POSITIVE(width),
                 .height2 = MALI_POSITIVE(height),
 
-                .unk1 = 0x1080,
+                /* Seems to configure tib size */
+                .unk1 = pan_is_large_tib(batch) ? 0xc80 : 0x1080,
 
                 .rt_count_1 = MALI_POSITIVE(batch->key.nr_cbufs),
                 .rt_count_2 = 4,
-
-                .unknown2 = 0x1f,
-                .tiler = panfrost_emit_midg_tiler(batch, vertex_count),
-                
-                .stack_shift = shift,
-                .unk0 = 0x1e,
-                .scratchpad = panfrost_batch_get_scratchpad(batch, shift, screen->thread_tls_alloc, screen->core_count)->gpu
         };
 
-        return framebuffer;
+        if (dev->quirks & IS_BIFROST) {
+                mfbd.msaa.sample_locations = panfrost_emit_sample_locations(batch);
+                mfbd.tiler_meta = panfrost_batch_get_tiler_meta(batch, vertex_count);
+        } else {
+                unsigned shift = panfrost_get_stack_shift(batch->stack_size);
+                struct panfrost_bo *bo = panfrost_batch_get_scratchpad(batch,
+                                                                       shift,
+                                                                       dev->thread_tls_alloc,
+                                                                       dev->core_count);
+                mfbd.shared_memory.stack_shift = shift;
+                mfbd.shared_memory.scratchpad = bo->gpu;
+                mfbd.shared_memory.shared_workgroup_count = ~0;
+
+                mfbd.tiler = panfrost_emit_midg_tiler(batch, vertex_count);
+        }
+
+        return mfbd;
 }
 
 void
 panfrost_attach_mfbd(struct panfrost_batch *batch, unsigned vertex_count)
 {
-        struct bifrost_framebuffer mfbd =
+        struct mali_framebuffer mfbd =
                 panfrost_emit_mfbd(batch, vertex_count);
 
         memcpy(batch->framebuffer.cpu, &mfbd, sizeof(mfbd));
@@ -399,9 +473,12 @@ panfrost_attach_mfbd(struct panfrost_batch *batch, unsigned vertex_count)
 mali_ptr
 panfrost_mfbd_fragment(struct panfrost_batch *batch, bool has_draws)
 {
-        struct bifrost_framebuffer fb = panfrost_emit_mfbd(batch, has_draws);
-        struct bifrost_fb_extra fbx = {0};
-        struct bifrost_render_target rts[4] = {0};
+        struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
+        bool is_bifrost = dev->quirks & IS_BIFROST;
+
+        struct mali_framebuffer fb = panfrost_emit_mfbd(batch, has_draws);
+        struct mali_framebuffer_extra fbx = {0};
+        struct mali_render_target rts[4] = {0};
 
         /* We always upload at least one dummy GL_NONE render target */
 
@@ -435,6 +512,11 @@ panfrost_mfbd_fragment(struct panfrost_batch *batch, bool has_draws)
                                 .no_preload = true
                         };
 
+                        if (is_bifrost) {
+                                null_rt.flags = 0x8;
+                                null_rt.unk3 = 0x8;
+                        }
+
                         rts[cb].format = null_rt;
                         rts[cb].framebuffer = 0;
                         rts[cb].framebuffer_stride = 0;
@@ -450,7 +532,7 @@ panfrost_mfbd_fragment(struct panfrost_batch *batch, bool has_draws)
 
         /* When scanning out, the depth buffer is immediately invalidated, so
          * we don't need to waste bandwidth writing it out. This can improve
-         * performance substantially (Z32_UNORM 1080p @ 60fps is 475 MB/s of
+         * performance substantially (Z24X8_UNORM 1080p @ 60fps is 475 MB/s of
          * memory bandwidth!).
          *
          * The exception is ReadPixels, but this is not supported on GLES so we
@@ -477,16 +559,18 @@ panfrost_mfbd_fragment(struct panfrost_batch *batch, bool has_draws)
         if (batch->key.nr_cbufs == 1) {
                 struct pipe_surface *surf = batch->key.cbufs[0];
                 struct panfrost_resource *rsrc = pan_resource(surf->texture);
-                struct panfrost_bo *bo = rsrc->bo;
 
                 if (rsrc->checksummed) {
                         unsigned level = surf->u.tex.level;
                         struct panfrost_slice *slice = &rsrc->slices[level];
 
                         fb.mfbd_flags |= MALI_MFBD_EXTRA;
-                        fbx.flags |= MALI_EXTRA_PRESENT;
+                        fbx.flags_hi |= MALI_EXTRA_PRESENT;
                         fbx.checksum_stride = slice->checksum_stride;
-                        fbx.checksum = bo->gpu + slice->checksum_offset;
+                        if (slice->checksum_bo)
+                                fbx.checksum = slice->checksum_bo->gpu;
+                        else
+                                fbx.checksum = rsrc->bo->gpu + slice->checksum_offset;
                 }
         }