X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fpanfrost%2Fpan_mfbd.c;h=deccd3a42397e0d69cb531cb84ad09d7dfc70c04;hb=02e768e6a96e1a0aad6d1cbb002bfb883078ad40;hp=4b762d1b6466ea42953cb223cd9ccaa3d3a86803;hpb=9ac106defe351428fbe3c62547e6be918b603d32;p=mesa.git diff --git a/src/gallium/drivers/panfrost/pan_mfbd.c b/src/gallium/drivers/panfrost/pan_mfbd.c index 4b762d1b646..deccd3a4239 100644 --- a/src/gallium/drivers/panfrost/pan_mfbd.c +++ b/src/gallium/drivers/panfrost/pan_mfbd.c @@ -48,7 +48,7 @@ panfrost_mfbd_format(struct pipe_surface *surf) .unk2 = 0x1, .nr_channels = MALI_POSITIVE(desc->nr_channels), .unk3 = 0x4, - .flags = 0x8, + .flags = 0x2, .swizzle = panfrost_translate_swizzle_4(swizzle), .no_preload = true }; @@ -74,6 +74,7 @@ panfrost_mfbd_format(struct pipe_surface *surf) case PIPE_FORMAT_A4B4G4R4_UNORM: case PIPE_FORMAT_B4G4R4A4_UNORM: + case PIPE_FORMAT_R4G4B4A4_UNORM: fmt.unk1 = 0x10000000; fmt.unk3 = 0x5; fmt.nr_channels = MALI_POSITIVE(1); @@ -89,6 +90,7 @@ panfrost_mfbd_format(struct pipe_surface *surf) break; case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_R5G5B5A1_UNORM: case PIPE_FORMAT_B5G5R5X1_UNORM: fmt.unk1 = 0x18000000; fmt.unk3 = 0x7; @@ -209,46 +211,63 @@ panfrost_mfbd_set_cbuf( assert(surf->u.tex.last_layer == first_layer); int stride = rsrc->slices[level].stride; - mali_ptr base = panfrost_get_texture_address(rsrc, level, first_layer); + /* Only set layer_stride for layered MSAA rendering */ + + unsigned nr_samples = surf->texture->nr_samples; + unsigned layer_stride = (nr_samples > 1) ? rsrc->slices[level].size0 : 0; + + mali_ptr base = panfrost_get_texture_address(rsrc, level, first_layer, 0); rt->format = panfrost_mfbd_format(surf); - /* Now, we set the layout specific pieces */ + if (layer_stride) + rt->format.msaa = MALI_MSAA_LAYERED; + else if (surf->nr_samples) + rt->format.msaa = MALI_MSAA_AVERAGE; + else + rt->format.msaa = MALI_MSAA_SINGLE; - if (rsrc->layout == MALI_TEXTURE_LINEAR) { + /* Now, we set the modifier specific pieces */ + + if (rsrc->modifier == DRM_FORMAT_MOD_LINEAR) { if (is_bifrost) { rt->format.unk4 = 0x1; } else { - rt->format.block = MALI_BLOCK_LINEAR; + rt->format.block = MALI_BLOCK_FORMAT_LINEAR; } rt->framebuffer = base; rt->framebuffer_stride = stride / 16; - } else if (rsrc->layout == MALI_TEXTURE_TILED) { + rt->layer_stride = layer_stride; + } else if (rsrc->modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED) { if (is_bifrost) { rt->format.unk3 |= 0x8; } else { - rt->format.block = MALI_BLOCK_TILED; + rt->format.block = MALI_BLOCK_FORMAT_TILED; } rt->framebuffer = base; rt->framebuffer_stride = stride; - } else if (rsrc->layout == MALI_TEXTURE_AFBC) { - rt->format.block = MALI_BLOCK_AFBC; + rt->layer_stride = layer_stride; + } else if (drm_is_afbc(rsrc->modifier)) { + rt->format.block = MALI_BLOCK_FORMAT_AFBC; unsigned header_size = rsrc->slices[level].header_size; rt->framebuffer = base + header_size; + rt->layer_stride = layer_stride; rt->afbc.metadata = base; rt->afbc.stride = 0; - rt->afbc.flags = MALI_AFBC_FLAGS | MALI_AFBC_YTR; + rt->afbc.flags = MALI_AFBC_FLAGS; + + if (rsrc->modifier & AFBC_FORMAT_MOD_YTR) + rt->afbc.flags |= MALI_AFBC_YTR; /* TODO: The blob sets this to something nonzero, but it's not * clear what/how to calculate/if it matters */ rt->framebuffer_stride = 0; } else { - fprintf(stderr, "Invalid render layout (cbuf)"); - assert(0); + unreachable("Invalid mod"); } } @@ -262,24 +281,29 @@ panfrost_mfbd_set_zsbuf( bool is_bifrost = dev->quirks & IS_BIFROST; struct panfrost_resource *rsrc = pan_resource(surf->texture); + unsigned nr_samples = surf->texture->nr_samples; + nr_samples = MAX2(nr_samples, 1); + + fbx->zs_samples = MALI_POSITIVE(nr_samples); + unsigned level = surf->u.tex.level; unsigned first_layer = surf->u.tex.first_layer; assert(surf->u.tex.last_layer == first_layer); - mali_ptr base = panfrost_get_texture_address(rsrc, level, first_layer); + mali_ptr base = panfrost_get_texture_address(rsrc, level, first_layer, 0); - if (rsrc->layout == MALI_TEXTURE_AFBC) { + if (drm_is_afbc(rsrc->modifier)) { /* The only Z/S format we can compress is Z24S8 or variants * thereof (handled by the gallium frontend) */ assert(panfrost_is_z24s8_variant(surf->format)); unsigned header_size = rsrc->slices[level].header_size; - fb->mfbd_flags |= MALI_MFBD_EXTRA; + fb->mfbd_flags |= MALI_MFBD_EXTRA | MALI_MFBD_DEPTH_WRITE; fbx->flags_hi |= MALI_EXTRA_PRESENT; fbx->flags_lo |= MALI_EXTRA_ZS | 0x1; /* unknown */ - fbx->zs_block = MALI_BLOCK_AFBC; + fbx->zs_block = MALI_BLOCK_FORMAT_AFBC; fbx->ds_afbc.depth_stencil = base + header_size; fbx->ds_afbc.depth_stencil_afbc_metadata = base; @@ -287,30 +311,36 @@ panfrost_mfbd_set_zsbuf( fbx->ds_afbc.flags = MALI_AFBC_FLAGS; fbx->ds_afbc.padding = 0x1000; - } else if (rsrc->layout == MALI_TEXTURE_LINEAR || rsrc->layout == MALI_TEXTURE_TILED) { + } else { + assert(rsrc->modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED || rsrc->modifier == DRM_FORMAT_MOD_LINEAR); /* TODO: Z32F(S8) support, which is always linear */ int stride = rsrc->slices[level].stride; - fb->mfbd_flags |= MALI_MFBD_EXTRA; + unsigned layer_stride = (nr_samples > 1) ? rsrc->slices[level].size0 : 0; + + fb->mfbd_flags |= MALI_MFBD_EXTRA | MALI_MFBD_DEPTH_WRITE; fbx->flags_hi |= MALI_EXTRA_PRESENT; fbx->flags_lo |= MALI_EXTRA_ZS; fbx->ds_linear.depth = base; - if (rsrc->layout == MALI_TEXTURE_LINEAR) { - fbx->zs_block = MALI_BLOCK_LINEAR; + if (rsrc->modifier == DRM_FORMAT_MOD_LINEAR) { + fbx->zs_block = MALI_BLOCK_FORMAT_LINEAR; fbx->ds_linear.depth_stride = stride / 16; + fbx->ds_linear.depth_layer_stride = layer_stride; } else { if (is_bifrost) { - fbx->zs_block = MALI_BLOCK_UNKNOWN; - fbx->flags_hi |= 0x4400; + /* XXX: Bifrost fields are different here */ + fbx->zs_block = 1; + fbx->flags_hi |= 0x440; fbx->flags_lo |= 0x1; } else { - fbx->zs_block = MALI_BLOCK_TILED; + fbx->zs_block = MALI_BLOCK_FORMAT_TILED; } fbx->ds_linear.depth_stride = stride; + fbx->ds_linear.depth_layer_stride = layer_stride; } if (panfrost_is_z24s8_variant(surf->format)) { @@ -320,20 +350,19 @@ panfrost_mfbd_set_zsbuf( fb->mfbd_flags ^= 0x100; fb->mfbd_flags |= 0x200; } else if (surf->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) { - fbx->flags_hi |= 0x400; + fbx->flags_hi |= 0x40; fbx->flags_lo |= 0xA; fb->mfbd_flags ^= 0x100; fb->mfbd_flags |= 0x201; struct panfrost_resource *stencil = rsrc->separate_stencil; struct panfrost_slice stencil_slice = stencil->slices[level]; + unsigned stencil_layer_stride = (nr_samples > 1) ? stencil_slice.size0 : 0; - fbx->ds_linear.stencil = panfrost_get_texture_address(stencil, level, first_layer); + fbx->ds_linear.stencil = panfrost_get_texture_address(stencil, level, first_layer, 0); fbx->ds_linear.stencil_stride = stencil_slice.stride; + fbx->ds_linear.stencil_layer_stride = stencil_layer_stride; } - - } else { - assert(0); } } @@ -363,10 +392,10 @@ panfrost_mfbd_upload(struct panfrost_batch *batch, size_t total_sz = sizeof(struct mali_framebuffer) + (has_extra ? sizeof(struct mali_framebuffer_extra) : 0) + - sizeof(struct mali_render_target) * 4; + sizeof(struct mali_render_target) * 8; struct panfrost_transfer m_f_trans = - panfrost_allocate_transient(batch, total_sz); + panfrost_pool_alloc_aligned(&batch->pool, total_sz, 64); /* Do the transfer */ @@ -375,7 +404,7 @@ panfrost_mfbd_upload(struct panfrost_batch *batch, if (has_extra) UPLOAD(m_f_trans, offset, fbx, total_sz); - for (unsigned c = 0; c < 4; ++c) { + for (unsigned c = 0; c < 8; ++c) { UPLOAD(m_f_trans, offset, &rts[c], total_sz); } @@ -390,23 +419,56 @@ panfrost_mfbd_upload(struct panfrost_batch *batch, #undef UPLOAD +/* Determines the # of bytes per pixel we need to reserve for a given format in + * the tilebuffer (compared to 128-bit budget, etc). Usually the same as the + * bytes per pixel of the format itself, but there are some special cases I + * don't understand. */ + +static unsigned +pan_bytes_per_pixel_tib(enum pipe_format format) +{ + const struct util_format_description *desc = + util_format_description(format); + + if (util_format_is_unorm8(desc) || format == PIPE_FORMAT_B5G6R5_UNORM) + return 4; + + return desc->block.bits / 8; +} + /* Determines whether a framebuffer uses too much tilebuffer space (requiring * us to scale up the tile at a performance penalty). This is conservative but * afaict you get 128-bits per pixel normally */ -static bool -pan_is_large_tib(struct panfrost_batch *batch) +static unsigned +pan_tib_size(struct panfrost_batch *batch) { unsigned size = 0; for (int cb = 0; cb < batch->key.nr_cbufs; ++cb) { struct pipe_surface *surf = batch->key.cbufs[cb]; assert(surf); - unsigned bpp = util_format_get_blocksize(surf->format); - size += ALIGN_POT(bpp, 4); + size += pan_bytes_per_pixel_tib(surf->format); } - return (size > 16); + return size; +} + +static unsigned +pan_tib_shift(struct panfrost_batch *batch) +{ + unsigned size = pan_tib_size(batch); + + if (size > 128) + return 4; + else if (size > 64) + return 5; + else if (size > 32) + return 6; + else if (size > 16) + return 7; + else + return 8; } static struct mali_framebuffer @@ -425,10 +487,10 @@ panfrost_emit_mfbd(struct panfrost_batch *batch, unsigned vertex_count) .width2 = MALI_POSITIVE(width), .height2 = MALI_POSITIVE(height), - /* Seems to configure tib size */ - .unk1 = pan_is_large_tib(batch) ? 0xc80 : 0x1080, + /* Configures tib size */ + .unk1 = (pan_tib_shift(batch) << 9) | 0x80, - .rt_count_1 = MALI_POSITIVE(batch->key.nr_cbufs), + .rt_count_1 = MALI_POSITIVE(MAX2(batch->key.nr_cbufs, 1)), .rt_count_2 = 4, }; @@ -436,13 +498,16 @@ panfrost_emit_mfbd(struct panfrost_batch *batch, unsigned vertex_count) mfbd.msaa.sample_locations = panfrost_emit_sample_locations(batch); mfbd.tiler_meta = panfrost_batch_get_tiler_meta(batch, vertex_count); } else { - unsigned shift = panfrost_get_stack_shift(batch->stack_size); - struct panfrost_bo *bo = panfrost_batch_get_scratchpad(batch, - shift, - dev->thread_tls_alloc, - dev->core_count); - mfbd.shared_memory.stack_shift = shift; - mfbd.shared_memory.scratchpad = bo->gpu; + if (batch->stack_size) { + unsigned shift = panfrost_get_stack_shift(batch->stack_size); + struct panfrost_bo *bo = panfrost_batch_get_scratchpad(batch, + batch->stack_size, + dev->thread_tls_alloc, + dev->core_count); + mfbd.shared_memory.stack_shift = shift; + mfbd.shared_memory.scratchpad = bo->gpu; + } + mfbd.shared_memory.shared_workgroup_count = ~0; mfbd.tiler = panfrost_emit_midg_tiler(batch, vertex_count); @@ -470,34 +535,33 @@ panfrost_mfbd_fragment(struct panfrost_batch *batch, bool has_draws) struct mali_framebuffer fb = panfrost_emit_mfbd(batch, has_draws); struct mali_framebuffer_extra fbx = {0}; - struct mali_render_target rts[4] = {0}; + struct mali_render_target rts[8] = {0}; /* We always upload at least one dummy GL_NONE render target */ unsigned rt_descriptors = MAX2(batch->key.nr_cbufs, 1); fb.rt_count_1 = MALI_POSITIVE(rt_descriptors); - fb.rt_count_2 = rt_descriptors; fb.mfbd_flags = 0x100; - /* TODO: MRT clear */ - panfrost_mfbd_clear(batch, &fb, &fbx, rts, fb.rt_count_2); - + panfrost_mfbd_clear(batch, &fb, &fbx, rts, rt_descriptors); /* Upload either the render target or a dummy GL_NONE target */ + unsigned offset = 0; + unsigned tib_shift = pan_tib_shift(batch); + for (int cb = 0; cb < rt_descriptors; ++cb) { struct pipe_surface *surf = batch->key.cbufs[cb]; + unsigned rt_offset = offset << tib_shift; - if (surf) { - panfrost_mfbd_set_cbuf(&rts[cb], surf); + if (surf && ((batch->clear | batch->draws) & (PIPE_CLEAR_COLOR0 << cb))) { + if (MAX2(surf->nr_samples, surf->texture->nr_samples) > 1) + batch->requirements |= PAN_REQ_MSAA; - /* What is this? Looks like some extension of the bpp - * field. Maybe it establishes how much internal - * tilebuffer space is reserved? */ + panfrost_mfbd_set_cbuf(&rts[cb], surf); - unsigned bpp = util_format_get_blocksize(surf->format); - fb.rt_count_2 = MAX2(fb.rt_count_2, ALIGN_POT(bpp, 4) / 4); + offset += pan_bytes_per_pixel_tib(surf->format); } else { struct mali_rt_format null_rt = { .unk1 = 0x4000000, @@ -505,7 +569,7 @@ panfrost_mfbd_fragment(struct panfrost_batch *batch, bool has_draws) }; if (is_bifrost) { - null_rt.flags = 0x8; + null_rt.flags = 0x2; null_rt.unk3 = 0x8; } @@ -515,10 +579,15 @@ panfrost_mfbd_fragment(struct panfrost_batch *batch, bool has_draws) } /* TODO: Break out the field */ - rts[cb].format.unk1 |= (cb * 0x400); + rts[cb].format.unk1 |= rt_offset; } - if (batch->key.zsbuf) { + fb.rt_count_2 = MAX2(DIV_ROUND_UP(offset, 1 << (10 - tib_shift)), 1); + + if (batch->key.zsbuf && ((batch->clear | batch->draws) & PIPE_CLEAR_DEPTHSTENCIL)) { + if (MAX2(batch->key.zsbuf->nr_samples, batch->key.zsbuf->nr_samples) > 1) + batch->requirements |= PAN_REQ_MSAA; + panfrost_mfbd_set_zsbuf(&fb, &fbx, batch->key.zsbuf); } @@ -536,8 +605,6 @@ panfrost_mfbd_fragment(struct panfrost_batch *batch, bool has_draws) /* Actualize the requirements */ if (batch->requirements & PAN_REQ_MSAA) { - rts[0].format.flags |= MALI_MFBD_FORMAT_MSAA; - /* XXX */ fb.unk1 |= (1 << 4) | (1 << 1); fb.rt_count_2 = 4;