From f5c293425fa457d91be8e689501619dfb16ccefe Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 19 Aug 2019 14:30:53 -0700 Subject: [PATCH] panfrost: Correct polygon size computations While the algorithm for computing the header size has been correct for a while, we used a major hack to conservatively guess the body size. Let's scrap that and figure out the algorithm we actually need to use to be bit-identical with what the hardware expects. We do have to be careful to add the header size to total comptued BO size. It's not clear how big the polygon list needs to be in practice -- but it has to be somewhat bigger than the polygon list itself. This needs more investigation. If we size the polygon list exactly based on the polygon_list_size field, we get faults like: [ 1224.219886] panfrost ff9a0000.gpu: Unhandled Page fault in AS0 at VA 0x000000001BDE8000 Reason: TODO raw fault status: 0x660003C3 decoded fault status: SLAVE FAULT exception type 0xC3: TRANSLATION_FAULT_LEVEL3 access type 0x3: WRITE source id 0x6600 Signed-off-by: Alyssa Rosenzweig --- src/gallium/drivers/panfrost/pan_context.c | 9 +- src/panfrost/encoder/pan_encoder.h | 4 +- src/panfrost/encoder/pan_tiler.c | 97 +++++++++++++++------- src/panfrost/pandecode/decode.c | 3 +- 4 files changed, 73 insertions(+), 40 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 3d4b837697c..a07273c7da1 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -71,13 +71,15 @@ panfrost_emit_midg_tiler( unsigned header_size = panfrost_tiler_header_size( width, height, t.hierarchy_mask); - unsigned body_size = panfrost_tiler_body_size( + t.polygon_list_size = panfrost_tiler_full_size( width, height, t.hierarchy_mask); /* Sanity check */ if (t.hierarchy_mask) { - t.polygon_list = panfrost_job_get_polygon_list(batch, header_size + body_size); + t.polygon_list = panfrost_job_get_polygon_list(batch, + header_size + t.polygon_list_size); + /* Allow the entire tiler heap */ t.heap_start = ctx->tiler_heap.bo->gpu; @@ -98,9 +100,6 @@ panfrost_emit_midg_tiler( t.polygon_list_body = t.polygon_list + header_size; - t.polygon_list_size = - header_size + body_size; - return t; } diff --git a/src/panfrost/encoder/pan_encoder.h b/src/panfrost/encoder/pan_encoder.h index 0e135d0f511..8aa2df7240b 100644 --- a/src/panfrost/encoder/pan_encoder.h +++ b/src/panfrost/encoder/pan_encoder.h @@ -56,10 +56,10 @@ panfrost_pack_work_groups_fused( /* Tiler structure size computation */ unsigned -panfrost_tiler_header_size(unsigned width, unsigned height, uint8_t mask); +panfrost_tiler_header_size(unsigned width, unsigned height, unsigned mask); unsigned -panfrost_tiler_body_size(unsigned width, unsigned height, uint8_t mask); +panfrost_tiler_full_size(unsigned width, unsigned height, unsigned mask); unsigned panfrost_choose_hierarchy_mask( diff --git a/src/panfrost/encoder/pan_tiler.c b/src/panfrost/encoder/pan_tiler.c index 7718ad9fe48..af1e12e4805 100644 --- a/src/panfrost/encoder/pan_tiler.c +++ b/src/panfrost/encoder/pan_tiler.c @@ -40,7 +40,7 @@ * tile size, but Midgard features "hierarchical tiling", where power-of-two * multiples of the base tile size can be used: hierarchy level 0 (16x16), * level 1 (32x32), level 2 (64x64), per public information about Midgard's - * tiling. In fact, tiling goes up to 2048x2048 (!), although in practice + * tiling. In fact, tiling goes up to 4096x4096 (!), although in practice * 128x128 is the largest usually used (though higher modes are enabled). The * idea behind hierarchical tiling is to use low tiling levels for small * triangles and high levels for large triangles, to minimize memory bandwidth @@ -147,17 +147,17 @@ * * From this heuristic (or whatever), we determine the minimum allowable tile * size, and we use that to decide the hierarchy masking, selecting from the - * minimum "ideal" tile size to the maximum tile size (2048x2048). + * minimum "ideal" tile size to the maximum tile size (2048x2048 in practice). * * Once we have that mask and the framebuffer dimensions, we can compute the * size of the statically-sized polygon list structures, allocate them, and go! * */ -/* Hierarchical tiling spans from 16x16 to 2048x2048 tiles */ +/* Hierarchical tiling spans from 16x16 to 4096x4096 tiles */ #define MIN_TILE_SIZE 16 -#define MAX_TILE_SIZE 2048 +#define MAX_TILE_SIZE 4096 /* Constants as shifts for easier power-of-two iteration */ @@ -170,9 +170,16 @@ /* For each tile (across all hierarchy levels), there is 8 bytes of header */ #define HEADER_BYTES_PER_TILE 0x8 +/* Likewise, each tile per level has 512 bytes of body */ +#define FULL_BYTES_PER_TILE 0x200 + /* Absent any geometry, the minimum size of the header */ #define MINIMUM_HEADER_SIZE 0x200 +/* Mask of valid hierarchy levels: one bit for each level from min...max + * inclusive */ +#define HIERARCHY_MASK (((MAX_TILE_SIZE / MIN_TILE_SIZE) << 1) - 1) + /* If the width-x-height framebuffer is divided into tile_size-x-tile_size * tiles, how many tiles are there? Rounding up in each direction. For the * special case of tile_size=16, this aligns with the usual Midgard count. @@ -201,7 +208,12 @@ pan_tile_count(unsigned width, unsigned height, unsigned tile_size) * byte counts across the levels to find a byte count for all levels. */ static unsigned -panfrost_raw_header_size(unsigned width, unsigned height, unsigned masked_count) +panfrost_raw_segment_size( + unsigned width, + unsigned height, + unsigned masked_count, + unsigned end_level, + unsigned bytes_per_tile) { unsigned size = PROLOGUE_SIZE; @@ -212,66 +224,89 @@ panfrost_raw_header_size(unsigned width, unsigned height, unsigned masked_count) /* Iterate hierarchy levels / tile sizes */ - for (unsigned i = start_level; i < MAX_TILE_SHIFT; ++i) { + for (unsigned i = start_level; i <= end_level; ++i) { /* Shift from a level to a tile size */ unsigned tile_size = (1 << i); unsigned tile_count = pan_tile_count(width, height, tile_size); - unsigned header_bytes = HEADER_BYTES_PER_TILE * tile_count; + unsigned level_count = bytes_per_tile * tile_count; - size += header_bytes; + size += level_count; } /* This size will be used as an offset, so ensure it's aligned */ return ALIGN_POT(size, 512); } -/* Given a hierarchy mask and a framebuffer size, compute the header size */ +/* Given a hierarchy mask and a framebuffer size, compute the size of one of + * the segments (header or body) */ -unsigned -panfrost_tiler_header_size(unsigned width, unsigned height, uint8_t mask) +static unsigned +panfrost_segment_size( + unsigned width, unsigned height, + unsigned mask, unsigned bytes_per_tile) { - /* If no hierarchy levels are enabled, that means there is no geometry - * for the tiler to process, so use a minimum size. Used for clears */ - - if (mask == 0x00) - return MINIMUM_HEADER_SIZE; + /* The tiler-disabled case should have been handled by the caller */ + assert(mask); /* Some levels are enabled. Ensure that only smaller levels are * disabled and there are no gaps. Theoretically the hardware is more * flexible, but there's no known reason to use other configurations - * and this keeps the code simple. Since we know the 0x80 bit is set, - * ctz(mask) will return the number of masked off levels. */ + * and this keeps the code simple. Since we know the 0x80 or 0x100 bit + * is set, ctz(mask) will return the number of masked off levels. */ unsigned masked_count = __builtin_ctz(mask); - assert(mask & 0x80); + assert(mask & (0x80 | 0x100)); assert(((mask >> masked_count) & ((mask >> masked_count) + 1)) == 0); + /* Figure out the top level */ + unsigned unused_count = __builtin_clz(mask); + unsigned top_bit = ((8 * sizeof(mask)) - 1) - unused_count; + + /* We don't have bits for nonexistant levels below 16x16 */ + unsigned top_level = top_bit + 4; + /* Everything looks good. Use the number of trailing zeroes we found to * figure out how many smaller levels are disabled to compute the * actual header size */ - return panfrost_raw_header_size(width, height, masked_count); + return panfrost_raw_segment_size(width, height, + masked_count, top_level, bytes_per_tile); } -/* The body seems to be about 512 bytes per tile. Noting that the header is - * about 8 bytes per tile, we can be a little sloppy and estimate the body size - * to be equal to the header size * (512/8). Given the header size is a - * considerable overestimate, this is fine. Eventually, we should maybe figure - * out how to actually implement this. */ + +/* Given a hierarchy mask and a framebuffer size, compute the header size */ unsigned -panfrost_tiler_body_size(unsigned width, unsigned height, uint8_t mask) +panfrost_tiler_header_size(unsigned width, unsigned height, unsigned mask) { - /* No levels means no body */ - if (!mask) - return 0x00; + mask &= HIERARCHY_MASK; - unsigned header_size = panfrost_tiler_header_size(width, height, mask); - return ALIGN_POT(header_size * 512 / 8, 512); + /* If no hierarchy levels are enabled, that means there is no geometry + * for the tiler to process, so use a minimum size. Used for clears */ + + if (mask == 0x00) + return MINIMUM_HEADER_SIZE; + + return panfrost_segment_size(width, height, mask, HEADER_BYTES_PER_TILE); } +/* The combined header/body is sized similarly (but it is significantly + * larger), except that it can be empty when the tiler disabled, rather than + * getting clamped to a minimum size. + */ + +unsigned +panfrost_tiler_full_size(unsigned width, unsigned height, unsigned mask) +{ + mask &= HIERARCHY_MASK; + + if (mask == 0x00) + return MINIMUM_HEADER_SIZE; + + return panfrost_segment_size(width, height, mask, FULL_BYTES_PER_TILE); +} /* In the future, a heuristic to choose a tiler hierarchy mask would go here. * At the moment, we just default to 0xFF, which enables all possible hierarchy diff --git a/src/panfrost/pandecode/decode.c b/src/panfrost/pandecode/decode.c index 0af688f5729..476838d23fc 100644 --- a/src/panfrost/pandecode/decode.c +++ b/src/panfrost/pandecode/decode.c @@ -506,8 +506,7 @@ pandecode_midgard_tiler_descriptor( * ourselves for comparison */ unsigned ref_header = panfrost_tiler_header_size(width, height, t->hierarchy_mask); - unsigned ref_body = panfrost_tiler_body_size(width, height, t->hierarchy_mask); - unsigned ref_size = ref_header + ref_body; + unsigned ref_size = panfrost_tiler_full_size(width, height, t->hierarchy_mask); if (!((ref_header == body_offset) && (ref_size == t->polygon_list_size))) { pandecode_msg("XXX: bad polygon list size (expected %d / 0x%x)\n", -- 2.30.2