From: Rob Clark Date: Sat, 25 Apr 2020 19:16:35 +0000 (-0700) Subject: freedreno/a6xx: pre-calculate expected vsc stream sizes X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=commitdiff_plain;h=f561e516c8a01993ea83f5d48e0126d0b7b6528b freedreno/a6xx: pre-calculate expected vsc stream sizes We should only rely on overflow detection for indirect draws, where we have no other option. This doesn't use quite the worst-possible-case sizes, which in practice seem to be ~20x larger than what is required. But instead uses roughly half of that. Signed-off-by: Rob Clark Part-of: --- diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources index 0fd7bcf5ddd..0268bb11a55 100644 --- a/src/gallium/drivers/freedreno/Makefile.sources +++ b/src/gallium/drivers/freedreno/Makefile.sources @@ -195,6 +195,8 @@ a6xx_SOURCES := \ a6xx/fd6_screen.h \ a6xx/fd6_texture.c \ a6xx/fd6_texture.h \ + a6xx/fd6_vsc.c \ + a6xx/fd6_vsc.h \ a6xx/fd6_zsa.c \ a6xx/fd6_zsa.h diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c index 25d81018ccc..f8ad3be75c8 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c @@ -38,6 +38,7 @@ #include "fd6_emit.h" #include "fd6_program.h" #include "fd6_format.h" +#include "fd6_vsc.h" #include "fd6_zsa.h" static void @@ -192,6 +193,9 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info, if (emit.key.gs) emit.key.key.has_gs = true; + if (!(emit.key.hs || emit.key.ds || emit.key.gs || info->indirect)) + fd6_vsc_update_sizes(ctx->batch, info); + fixup_shader_state(ctx, &emit.key.key); if (!(ctx->dirty & FD_DIRTY_PROG)) { diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c index 3ee55f98adf..befe19aadfa 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c @@ -326,6 +326,27 @@ update_vsc_pipe(struct fd_batch *batch) struct fd_ringbuffer *ring = batch->gmem; int i; + if (batch->draw_strm_bits/8 > fd6_ctx->vsc_draw_strm_pitch) { + if (fd6_ctx->vsc_draw_strm) + fd_bo_del(fd6_ctx->vsc_draw_strm); + fd6_ctx->vsc_draw_strm = NULL; + /* Note: probably only need to align to 0x40, but aligning stronger + * reduces the odds that we will have to realloc again on the next + * frame: + */ + fd6_ctx->vsc_draw_strm_pitch = align(batch->draw_strm_bits/8, 0x4000); + debug_printf("pre-resize VSC_DRAW_STRM_PITCH to: 0x%x\n", + fd6_ctx->vsc_draw_strm_pitch); + } + + if (batch->prim_strm_bits/8 > fd6_ctx->vsc_prim_strm_pitch) { + if (fd6_ctx->vsc_prim_strm) + fd_bo_del(fd6_ctx->vsc_prim_strm); + fd6_ctx->vsc_prim_strm = NULL; + fd6_ctx->vsc_prim_strm_pitch = align(batch->prim_strm_bits/8, 0x4000); + debug_printf("pre-resize VSC_PRIM_STRM_PITCH to: 0x%x\n", + fd6_ctx->vsc_prim_strm_pitch); + } if (!fd6_ctx->vsc_draw_strm) { fd6_ctx->vsc_draw_strm = fd_bo_new(ctx->screen->dev, diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_vsc.c b/src/gallium/drivers/freedreno/a6xx/fd6_vsc.c new file mode 100644 index 00000000000..daf97fe48ac --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_vsc.c @@ -0,0 +1,160 @@ +/* + * Copyright © 2020 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#include "pipe/p_state.h" +#include "util/u_prim.h" + +#include "freedreno_batch.h" +#include "freedreno_gmem.h" + +#include "fd6_vsc.h" + +/* + * Helper util to update expected vsc draw and primitive stream sizes, see + * https://github.com/freedreno/freedreno/wiki/Visibility-Stream-Format + */ + +enum { + byte = 8, + dword = 4 * byte, +} bits_per; + +/** + * Determine # of bits required to store a given number, see + * https://github.com/freedreno/freedreno/wiki/Visibility-Stream-Format#numbers + */ +static unsigned +number_size_bits(unsigned nr) +{ + unsigned n = util_last_bit(nr); + assert(n); /* encoding 0 is not possible */ + return n + (n - 1); +} + +/** + * Determine # of bits requred to store a given bitfield, see + * https://github.com/freedreno/freedreno/wiki/Visibility-Stream-Format#bitfields + */ +static unsigned +bitfield_size_bits(unsigned n) +{ + return n + 1; /* worst case is always 1 + nr of bits */ +} + +static unsigned +prim_count(const struct pipe_draw_info *info) +{ + /* PIPE_PRIM_MAX used internally for RECTLIST blits on 3d pipe: */ + unsigned vtx_per_prim = (info->mode == PIPE_PRIM_MAX) ? 2 : + u_vertices_per_prim(info->mode); + return (info->count * info->instance_count) / vtx_per_prim; +} + +/** + * The primitive stream uses a run-length encoding, where each packet contains a + * bitfield of bins covered and then the number of primitives which have the same + * bitfield. Each packet consists of the following, in order: + * + * - The (compressed) bitfield of bins covered + * - The number of primitives with this bitset + * - Checksum + * + * The worst case would be that each primitive has a different bitmask. In practice, + * assuming ever other primitive has a different bitmask still gets us conservatively + * large primitive stream sizes. (Ie. 10x what is needed, vs. 20x) + * + * https://github.com/freedreno/freedreno/wiki/Visibility-Stream-Format#primitive-streams + */ +static unsigned +primitive_stream_size_bits(const struct pipe_draw_info *info, unsigned num_bins) +{ + unsigned num_prims = prim_count(info); + unsigned nbits = + (bitfield_size_bits(num_bins) /* bitfield of bins covered */ + + number_size_bits(1) /* number of primitives with this bitset */ + + 1 /* checksum */ + ) * DIV_ROUND_UP(num_prims, 2); + return align(nbits, dword); +} + +/** + * Each draw stream packet contains the following: + * + * - Bin bitfield + * - Last instance bit + * - If bitfield is empty, the number of draws it is empty for, otherwise + * the size of the corresponding primitive stream in DWORD's. + * - Checksum + * + * https://github.com/freedreno/freedreno/wiki/Visibility-Stream-Format#draw-streams + */ +static unsigned +draw_stream_size_bits(const struct pipe_draw_info *info, unsigned num_bins, + unsigned prim_strm_bits) +{ + unsigned ndwords = prim_strm_bits / dword; + assert(info->instance_count > 0); + return (bitfield_size_bits(num_bins) /* bitfield of bins */ + + 1 /* last-instance-bit */ + + number_size_bits(ndwords) /* size of corresponding prim strm */ + + 1 /* checksum */ + ) * info->instance_count; +} + +void +fd6_vsc_update_sizes(struct fd_batch *batch, const struct pipe_draw_info *info) +{ + if (!batch->num_bins_per_pipe) { + batch->num_bins_per_pipe = fd_gmem_estimate_bins_per_pipe(batch); + + /* This is a convenient spot to add the size of the final draw- + * stream packet: + * + * If there are N bins, the final packet, after all the draws are + * done, consists of a 1 followed by N + 17 0's, plus a final 1. + * This uses the otherwise-unused pattern of a non-empty bitfield + * (initial 1) that is nontheless empty (has all 0's) + */ + unsigned final_pkt_sz = 1 + batch->num_bins_per_pipe + 17 + 1; + batch->prim_strm_bits = align(final_pkt_sz, dword); + } + + unsigned prim_strm_bits = + primitive_stream_size_bits(info, batch->num_bins_per_pipe); + unsigned draw_strm_bits = + draw_stream_size_bits(info, batch->num_bins_per_pipe, prim_strm_bits); + +#if 0 + printf("vsc: prim_strm_bits=%d, draw_strm_bits=%d, nb=%u, ic=%u, c=%u, pc=%u (%s)\n", + prim_strm_bits, draw_strm_bits, batch->num_bins_per_pipe, + info->instance_count, info->count, + (info->count * info->instance_count) / + u_vertices_per_prim(info->mode), + u_prim_name(info->mode)); +#endif + + batch->prim_strm_bits += prim_strm_bits; + batch->draw_strm_bits += draw_strm_bits; +} + diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_vsc.h b/src/gallium/drivers/freedreno/a6xx/fd6_vsc.h new file mode 100644 index 00000000000..50470f683e0 --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_vsc.h @@ -0,0 +1,29 @@ +/* + * Copyright © 2020 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef FD6_VSC_H_ +#define FD6_VSC_H_ + +void fd6_vsc_update_sizes(struct fd_batch *batch, const struct pipe_draw_info *info); + +#endif /* FD6_VSC_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_batch.c b/src/gallium/drivers/freedreno/freedreno_batch.c index 106959b4fed..082329eb59c 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch.c +++ b/src/gallium/drivers/freedreno/freedreno_batch.c @@ -84,6 +84,9 @@ batch_init(struct fd_batch *batch) batch->gmem_reason = 0; batch->num_draws = 0; batch->num_vertices = 0; + batch->num_bins_per_pipe = 0; + batch->prim_strm_bits = 0; + batch->draw_strm_bits = 0; batch->stage = FD_STAGE_NULL; fd_reset_wfi(batch); diff --git a/src/gallium/drivers/freedreno/freedreno_batch.h b/src/gallium/drivers/freedreno/freedreno_batch.h index 9266790bb80..479d78d5eca 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch.h +++ b/src/gallium/drivers/freedreno/freedreno_batch.h @@ -129,9 +129,16 @@ struct fd_batch { */ const struct fd_gmem_stateobj *gmem_state; - unsigned num_draws; /* number of draws in current batch */ + unsigned num_draws; /* number of draws in current batch */ unsigned num_vertices; /* number of vertices in current batch */ + /* Currently only used on a6xx, to calculate vsc prim/draw stream + * sizes: + */ + unsigned num_bins_per_pipe; + unsigned prim_strm_bits; + unsigned draw_strm_bits; + /* Track the maximal bounds of the scissor of all the draws within a * batch. Used at the tile rendering step (fd_gmem_render_tiles(), * mem2gmem/gmem2mem) to avoid needlessly moving data in/out of gmem. diff --git a/src/gallium/drivers/freedreno/meson.build b/src/gallium/drivers/freedreno/meson.build index a5c0935e07e..919baf070de 100644 --- a/src/gallium/drivers/freedreno/meson.build +++ b/src/gallium/drivers/freedreno/meson.build @@ -205,6 +205,8 @@ files_libfreedreno = files( 'a6xx/fd6_screen.h', 'a6xx/fd6_texture.c', 'a6xx/fd6_texture.h', + 'a6xx/fd6_vsc.c', + 'a6xx/fd6_vsc.h', 'a6xx/fd6_zsa.c', 'a6xx/fd6_zsa.h', 'ir3/ir3_cache.c',