#include "pan_bo.h"
#include "pan_context.h"
+#include "pan_cmdstream.h"
#include "pan_util.h"
-#include "pan_format.h"
-
-#include "util/format/u_format.h"
+#include "panfrost-quirks.h"
static struct mali_rt_format
panfrost_mfbd_format(struct pipe_surface *surf)
case PIPE_FORMAT_A4B4G4R4_UNORM:
case PIPE_FORMAT_B4G4R4A4_UNORM:
+ case PIPE_FORMAT_R4G4B4A4_UNORM:
fmt.unk1 = 0x10000000;
fmt.unk3 = 0x5;
fmt.nr_channels = MALI_POSITIVE(1);
fmt.nr_channels = MALI_POSITIVE(1);
break;
+ case PIPE_FORMAT_B5G5R5A1_UNORM:
+ case PIPE_FORMAT_R5G5B5A1_UNORM:
+ case PIPE_FORMAT_B5G5R5X1_UNORM:
+ fmt.unk1 = 0x18000000;
+ fmt.unk3 = 0x7;
+ fmt.nr_channels = MALI_POSITIVE(2);
+ break;
+
/* Generic 8-bit */
case PIPE_FORMAT_R8_UINT:
case PIPE_FORMAT_R8_SINT:
case PIPE_FORMAT_R16_FLOAT:
case PIPE_FORMAT_R16_UINT:
case PIPE_FORMAT_R16_SINT:
- case PIPE_FORMAT_B5G5R5A1_UNORM:
fmt.unk1 = 0x84000000;
fmt.unk3 = 0x0;
fmt.nr_channels = MALI_POSITIVE(2);
static void
panfrost_mfbd_clear(
struct panfrost_batch *batch,
- struct bifrost_framebuffer *fb,
- struct bifrost_fb_extra *fbx,
- struct bifrost_render_target *rts,
+ struct mali_framebuffer *fb,
+ struct mali_framebuffer_extra *fbx,
+ struct mali_render_target *rts,
unsigned rt_count)
{
+ struct panfrost_context *ctx = batch->ctx;
+ struct pipe_context *gallium = (struct pipe_context *) ctx;
+ struct panfrost_device *dev = pan_device(gallium->screen);
+
for (unsigned i = 0; i < rt_count; ++i) {
if (!(batch->clear & (PIPE_CLEAR_COLOR0 << i)))
continue;
if (batch->clear & PIPE_CLEAR_STENCIL) {
fb->clear_stencil = batch->clear_stencil;
}
+
+ if (dev->quirks & IS_BIFROST) {
+ fbx->clear_color_1 = batch->clear_color[0][0];
+ fbx->clear_color_2 = 0xc0000000 | (fbx->clear_color_1 & 0xffff); /* WTF? */
+ }
}
static void
panfrost_mfbd_set_cbuf(
- struct bifrost_render_target *rt,
+ struct mali_render_target *rt,
struct pipe_surface *surf)
{
struct panfrost_resource *rsrc = pan_resource(surf->texture);
+ struct panfrost_device *dev = pan_device(surf->context->screen);
+ bool is_bifrost = dev->quirks & IS_BIFROST;
unsigned level = surf->u.tex.level;
unsigned first_layer = surf->u.tex.first_layer;
/* Now, we set the layout specific pieces */
- if (rsrc->layout == PAN_LINEAR) {
- rt->format.block = MALI_BLOCK_LINEAR;
+ if (rsrc->layout == MALI_TEXTURE_LINEAR) {
+ if (is_bifrost) {
+ rt->format.unk4 = 0x1;
+ } else {
+ rt->format.block = MALI_BLOCK_LINEAR;
+ }
+
rt->framebuffer = base;
rt->framebuffer_stride = stride / 16;
- } else if (rsrc->layout == PAN_TILED) {
- rt->format.block = MALI_BLOCK_TILED;
+ } else if (rsrc->layout == MALI_TEXTURE_TILED) {
+ if (is_bifrost) {
+ rt->format.unk3 |= 0x8;
+ } else {
+ rt->format.block = MALI_BLOCK_TILED;
+ }
+
rt->framebuffer = base;
rt->framebuffer_stride = stride;
- } else if (rsrc->layout == PAN_AFBC) {
+ } else if (rsrc->layout == MALI_TEXTURE_AFBC) {
rt->format.block = MALI_BLOCK_AFBC;
unsigned header_size = rsrc->slices[level].header_size;
rt->framebuffer = base + header_size;
rt->afbc.metadata = base;
rt->afbc.stride = 0;
- rt->afbc.unk = 0x30009;
+ rt->afbc.flags = MALI_AFBC_FLAGS;
+
+ unsigned components = util_format_get_nr_components(surf->format);
+
+ /* The "lossless colorspace transform" is lossy for R and RG formats */
+ if (components >= 3)
+ rt->afbc.flags |= MALI_AFBC_YTR;
/* TODO: The blob sets this to something nonzero, but it's not
* clear what/how to calculate/if it matters */
static void
panfrost_mfbd_set_zsbuf(
- struct bifrost_framebuffer *fb,
- struct bifrost_fb_extra *fbx,
+ struct mali_framebuffer *fb,
+ struct mali_framebuffer_extra *fbx,
struct pipe_surface *surf)
{
+ struct panfrost_device *dev = pan_device(surf->context->screen);
+ bool is_bifrost = dev->quirks & IS_BIFROST;
struct panfrost_resource *rsrc = pan_resource(surf->texture);
unsigned level = surf->u.tex.level;
- assert(surf->u.tex.first_layer == 0);
+ unsigned first_layer = surf->u.tex.first_layer;
+ assert(surf->u.tex.last_layer == first_layer);
- unsigned offset = rsrc->slices[level].offset;
+ mali_ptr base = panfrost_get_texture_address(rsrc, level, first_layer);
- if (rsrc->layout == PAN_AFBC) {
+ if (rsrc->layout == MALI_TEXTURE_AFBC) {
/* The only Z/S format we can compress is Z24S8 or variants
- * thereof (handled by the state tracker) */
+ * thereof (handled by the gallium frontend) */
assert(panfrost_is_z24s8_variant(surf->format));
- mali_ptr base = rsrc->bo->gpu + offset;
unsigned header_size = rsrc->slices[level].header_size;
fb->mfbd_flags |= MALI_MFBD_EXTRA;
- fbx->flags =
- MALI_EXTRA_PRESENT |
- MALI_EXTRA_AFBC |
- MALI_EXTRA_AFBC_ZS |
- MALI_EXTRA_ZS |
- 0x1; /* unknown */
+ fbx->flags_hi |= MALI_EXTRA_PRESENT;
+ fbx->flags_lo |= MALI_EXTRA_ZS | 0x1; /* unknown */
+ fbx->zs_block = MALI_BLOCK_AFBC;
fbx->ds_afbc.depth_stencil = base + header_size;
fbx->ds_afbc.depth_stencil_afbc_metadata = base;
fbx->ds_afbc.depth_stencil_afbc_stride = 0;
- fbx->ds_afbc.zero1 = 0x10009;
+ fbx->ds_afbc.flags = MALI_AFBC_FLAGS;
fbx->ds_afbc.padding = 0x1000;
- } else if (rsrc->layout == PAN_LINEAR) {
+ } else if (rsrc->layout == MALI_TEXTURE_LINEAR || rsrc->layout == MALI_TEXTURE_TILED) {
/* TODO: Z32F(S8) support, which is always linear */
int stride = rsrc->slices[level].stride;
fb->mfbd_flags |= MALI_MFBD_EXTRA;
- fbx->flags |= MALI_EXTRA_PRESENT | MALI_EXTRA_ZS;
+ fbx->flags_hi |= MALI_EXTRA_PRESENT;
+ fbx->flags_lo |= MALI_EXTRA_ZS;
+
+ fbx->ds_linear.depth = base;
- fbx->ds_linear.depth = rsrc->bo->gpu + offset;
- fbx->ds_linear.depth_stride = stride;
+ if (rsrc->layout == MALI_TEXTURE_LINEAR) {
+ fbx->zs_block = MALI_BLOCK_LINEAR;
+ fbx->ds_linear.depth_stride = stride / 16;
+ } else {
+ if (is_bifrost) {
+ fbx->zs_block = MALI_BLOCK_UNKNOWN;
+ fbx->flags_hi |= 0x4400;
+ fbx->flags_lo |= 0x1;
+ } else {
+ fbx->zs_block = MALI_BLOCK_TILED;
+ }
+
+ fbx->ds_linear.depth_stride = stride;
+ }
if (panfrost_is_z24s8_variant(surf->format)) {
- fbx->flags |= 0x1;
- } else if (surf->format == PIPE_FORMAT_Z32_UNORM) {
- /* default flags (0 in bottom place) */
+ fbx->flags_lo |= 0x1;
} else if (surf->format == PIPE_FORMAT_Z32_FLOAT) {
- fbx->flags |= 0xA;
+ fbx->flags_lo |= 0xA;
fb->mfbd_flags ^= 0x100;
fb->mfbd_flags |= 0x200;
} else if (surf->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
- fbx->flags |= 0x1000A;
+ fbx->flags_hi |= 0x400;
+ fbx->flags_lo |= 0xA;
fb->mfbd_flags ^= 0x100;
fb->mfbd_flags |= 0x201;
struct panfrost_resource *stencil = rsrc->separate_stencil;
struct panfrost_slice stencil_slice = stencil->slices[level];
- fbx->ds_linear.stencil = stencil->bo->gpu + stencil_slice.offset;
+ fbx->ds_linear.stencil = panfrost_get_texture_address(stencil, level, first_layer);
fbx->ds_linear.stencil_stride = stencil_slice.stride;
}
static mali_ptr
panfrost_mfbd_upload(struct panfrost_batch *batch,
- struct bifrost_framebuffer *fb,
- struct bifrost_fb_extra *fbx,
- struct bifrost_render_target *rts,
+ struct mali_framebuffer *fb,
+ struct mali_framebuffer_extra *fbx,
+ struct mali_render_target *rts,
unsigned rt_count)
{
off_t offset = 0;
/* Compute total size for transfer */
size_t total_sz =
- sizeof(struct bifrost_framebuffer) +
- (has_extra ? sizeof(struct bifrost_fb_extra) : 0) +
- sizeof(struct bifrost_render_target) * 4;
+ sizeof(struct mali_framebuffer) +
+ (has_extra ? sizeof(struct mali_framebuffer_extra) : 0) +
+ sizeof(struct mali_render_target) * 4;
struct panfrost_transfer m_f_trans =
panfrost_allocate_transient(batch, total_sz);
#undef UPLOAD
-static struct bifrost_framebuffer
+/* Determines whether a framebuffer uses too much tilebuffer space (requiring
+ * us to scale up the tile at a performance penalty). This is conservative but
+ * afaict you get 128-bits per pixel normally */
+
+static bool
+pan_is_large_tib(struct panfrost_batch *batch)
+{
+ unsigned size = 0;
+
+ for (int cb = 0; cb < batch->key.nr_cbufs; ++cb) {
+ struct pipe_surface *surf = batch->key.cbufs[cb];
+ assert(surf);
+ unsigned bpp = util_format_get_blocksize(surf->format);
+ size += ALIGN_POT(bpp, 4);
+ }
+
+ return (size > 16);
+}
+
+static struct mali_framebuffer
panfrost_emit_mfbd(struct panfrost_batch *batch, unsigned vertex_count)
{
struct panfrost_context *ctx = batch->ctx;
struct pipe_context *gallium = (struct pipe_context *) ctx;
- struct panfrost_screen *screen = pan_screen(gallium->screen);
+ struct panfrost_device *dev = pan_device(gallium->screen);
unsigned width = batch->key.width;
unsigned height = batch->key.height;
- unsigned shift = panfrost_get_stack_shift(batch->stack_size);
-
- struct bifrost_framebuffer framebuffer = {
+ struct mali_framebuffer mfbd = {
.width1 = MALI_POSITIVE(width),
.height1 = MALI_POSITIVE(height),
.width2 = MALI_POSITIVE(width),
.height2 = MALI_POSITIVE(height),
- .unk1 = 0x1080,
+ /* Seems to configure tib size */
+ .unk1 = pan_is_large_tib(batch) ? 0xc80 : 0x1080,
.rt_count_1 = MALI_POSITIVE(batch->key.nr_cbufs),
.rt_count_2 = 4,
-
- .unknown2 = 0x1f,
- .tiler = panfrost_emit_midg_tiler(batch, vertex_count),
-
- .stack_shift = shift,
- .unk0 = 0x1e,
- .scratchpad = panfrost_batch_get_scratchpad(batch, shift, screen->thread_tls_alloc, screen->core_count)->gpu
};
- return framebuffer;
+ if (dev->quirks & IS_BIFROST) {
+ mfbd.msaa.sample_locations = panfrost_emit_sample_locations(batch);
+ mfbd.tiler_meta = panfrost_batch_get_tiler_meta(batch, vertex_count);
+ } else {
+ unsigned shift = panfrost_get_stack_shift(batch->stack_size);
+ struct panfrost_bo *bo = panfrost_batch_get_scratchpad(batch,
+ shift,
+ dev->thread_tls_alloc,
+ dev->core_count);
+ mfbd.shared_memory.stack_shift = shift;
+ mfbd.shared_memory.scratchpad = bo->gpu;
+ mfbd.shared_memory.shared_workgroup_count = ~0;
+
+ mfbd.tiler = panfrost_emit_midg_tiler(batch, vertex_count);
+ }
+
+ return mfbd;
}
void
panfrost_attach_mfbd(struct panfrost_batch *batch, unsigned vertex_count)
{
- struct bifrost_framebuffer mfbd =
+ struct mali_framebuffer mfbd =
panfrost_emit_mfbd(batch, vertex_count);
memcpy(batch->framebuffer.cpu, &mfbd, sizeof(mfbd));
mali_ptr
panfrost_mfbd_fragment(struct panfrost_batch *batch, bool has_draws)
{
- struct bifrost_framebuffer fb = panfrost_emit_mfbd(batch, has_draws);
- struct bifrost_fb_extra fbx = {0};
- struct bifrost_render_target rts[4] = {0};
+ struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
+ bool is_bifrost = dev->quirks & IS_BIFROST;
+
+ struct mali_framebuffer fb = panfrost_emit_mfbd(batch, has_draws);
+ struct mali_framebuffer_extra fbx = {0};
+ struct mali_render_target rts[4] = {0};
/* We always upload at least one dummy GL_NONE render target */
.no_preload = true
};
+ if (is_bifrost) {
+ null_rt.flags = 0x8;
+ null_rt.unk3 = 0x8;
+ }
+
rts[cb].format = null_rt;
rts[cb].framebuffer = 0;
rts[cb].framebuffer_stride = 0;
/* When scanning out, the depth buffer is immediately invalidated, so
* we don't need to waste bandwidth writing it out. This can improve
- * performance substantially (Z32_UNORM 1080p @ 60fps is 475 MB/s of
+ * performance substantially (Z24X8_UNORM 1080p @ 60fps is 475 MB/s of
* memory bandwidth!).
*
* The exception is ReadPixels, but this is not supported on GLES so we
if (batch->key.nr_cbufs == 1) {
struct pipe_surface *surf = batch->key.cbufs[0];
struct panfrost_resource *rsrc = pan_resource(surf->texture);
- struct panfrost_bo *bo = rsrc->bo;
if (rsrc->checksummed) {
unsigned level = surf->u.tex.level;
struct panfrost_slice *slice = &rsrc->slices[level];
fb.mfbd_flags |= MALI_MFBD_EXTRA;
- fbx.flags |= MALI_EXTRA_PRESENT;
+ fbx.flags_hi |= MALI_EXTRA_PRESENT;
fbx.checksum_stride = slice->checksum_stride;
- fbx.checksum = bo->gpu + slice->checksum_offset;
+ if (slice->checksum_bo)
+ fbx.checksum = slice->checksum_bo->gpu;
+ else
+ fbx.checksum = rsrc->bo->gpu + slice->checksum_offset;
}
}