freedreno/a6xx: pre-calculate expected vsc stream sizes
authorRob Clark <robdclark@chromium.org>
Sat, 25 Apr 2020 19:16:35 +0000 (12:16 -0700)
committerMarge Bot <eric+marge@anholt.net>
Tue, 28 Apr 2020 23:31:58 +0000 (23:31 +0000)
We should only rely on overflow detection for indirect draws, where we
have no other option.

This doesn't use quite the worst-possible-case sizes, which in practice
seem to be ~20x larger than what is required.  But instead uses roughly
half of that.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4750>

src/gallium/drivers/freedreno/Makefile.sources
src/gallium/drivers/freedreno/a6xx/fd6_draw.c
src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
src/gallium/drivers/freedreno/a6xx/fd6_vsc.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a6xx/fd6_vsc.h [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_batch.c
src/gallium/drivers/freedreno/freedreno_batch.h
src/gallium/drivers/freedreno/meson.build

index 0fd7bcf5dddaaa791b9053a64be041c31dc93a87..0268bb11a553b4838f4513a9265398610a99d23e 100644 (file)
@@ -195,6 +195,8 @@ a6xx_SOURCES := \
        a6xx/fd6_screen.h \
        a6xx/fd6_texture.c \
        a6xx/fd6_texture.h \
+       a6xx/fd6_vsc.c \
+       a6xx/fd6_vsc.h \
        a6xx/fd6_zsa.c \
        a6xx/fd6_zsa.h
 
index 25d81018cccf0c971423c92d51d095a1d12051fb..f8ad3be75c8f4725151f1797bda2844b57e906da 100644 (file)
@@ -38,6 +38,7 @@
 #include "fd6_emit.h"
 #include "fd6_program.h"
 #include "fd6_format.h"
+#include "fd6_vsc.h"
 #include "fd6_zsa.h"
 
 static void
@@ -192,6 +193,9 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
        if (emit.key.gs)
                emit.key.key.has_gs = true;
 
+       if (!(emit.key.hs || emit.key.ds || emit.key.gs || info->indirect))
+               fd6_vsc_update_sizes(ctx->batch, info);
+
        fixup_shader_state(ctx, &emit.key.key);
 
        if (!(ctx->dirty & FD_DIRTY_PROG)) {
index 3ee55f98adf471bd42014899ef19b857680a4241..befe19aadfae6bd29358d2f0ed1373b5b227da0c 100644 (file)
@@ -326,6 +326,27 @@ update_vsc_pipe(struct fd_batch *batch)
        struct fd_ringbuffer *ring = batch->gmem;
        int i;
 
+       if (batch->draw_strm_bits/8 > fd6_ctx->vsc_draw_strm_pitch) {
+               if (fd6_ctx->vsc_draw_strm)
+                       fd_bo_del(fd6_ctx->vsc_draw_strm);
+               fd6_ctx->vsc_draw_strm = NULL;
+               /* Note: probably only need to align to 0x40, but aligning stronger
+                * reduces the odds that we will have to realloc again on the next
+                * frame:
+                */
+               fd6_ctx->vsc_draw_strm_pitch = align(batch->draw_strm_bits/8, 0x4000);
+               debug_printf("pre-resize VSC_DRAW_STRM_PITCH to: 0x%x\n",
+                               fd6_ctx->vsc_draw_strm_pitch);
+       }
+
+       if (batch->prim_strm_bits/8 > fd6_ctx->vsc_prim_strm_pitch) {
+               if (fd6_ctx->vsc_prim_strm)
+                       fd_bo_del(fd6_ctx->vsc_prim_strm);
+               fd6_ctx->vsc_prim_strm = NULL;
+               fd6_ctx->vsc_prim_strm_pitch = align(batch->prim_strm_bits/8, 0x4000);
+               debug_printf("pre-resize VSC_PRIM_STRM_PITCH to: 0x%x\n",
+                               fd6_ctx->vsc_prim_strm_pitch);
+       }
 
        if (!fd6_ctx->vsc_draw_strm) {
                fd6_ctx->vsc_draw_strm = fd_bo_new(ctx->screen->dev,
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_vsc.c b/src/gallium/drivers/freedreno/a6xx/fd6_vsc.c
new file mode 100644 (file)
index 0000000..daf97fe
--- /dev/null
@@ -0,0 +1,160 @@
+/*
+ * Copyright © 2020 Google, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#include "pipe/p_state.h"
+#include "util/u_prim.h"
+
+#include "freedreno_batch.h"
+#include "freedreno_gmem.h"
+
+#include "fd6_vsc.h"
+
+/*
+ * Helper util to update expected vsc draw and primitive stream sizes, see
+ * https://github.com/freedreno/freedreno/wiki/Visibility-Stream-Format
+ */
+
+enum {
+       byte = 8,
+       dword = 4 * byte,
+} bits_per;
+
+/**
+ * Determine # of bits required to store a given number, see
+ * https://github.com/freedreno/freedreno/wiki/Visibility-Stream-Format#numbers
+ */
+static unsigned
+number_size_bits(unsigned nr)
+{
+       unsigned n = util_last_bit(nr);
+       assert(n);  /* encoding 0 is not possible */
+       return n + (n - 1);
+}
+
+/**
+ * Determine # of bits requred to store a given bitfield, see
+ * https://github.com/freedreno/freedreno/wiki/Visibility-Stream-Format#bitfields
+ */
+static unsigned
+bitfield_size_bits(unsigned n)
+{
+       return n + 1;  /* worst case is always 1 + nr of bits */
+}
+
+static unsigned
+prim_count(const struct pipe_draw_info *info)
+{
+       /* PIPE_PRIM_MAX used internally for RECTLIST blits on 3d pipe: */
+       unsigned vtx_per_prim = (info->mode == PIPE_PRIM_MAX) ? 2 :
+                       u_vertices_per_prim(info->mode);
+       return (info->count * info->instance_count) / vtx_per_prim;
+}
+
+/**
+ * The primitive stream uses a run-length encoding, where each packet contains a
+ * bitfield of bins covered and then the number of primitives which have the same
+ * bitfield. Each packet consists of the following, in order:
+ *
+ *  - The (compressed) bitfield of bins covered
+ *  - The number of primitives with this bitset
+ *  - Checksum
+ *
+ * The worst case would be that each primitive has a different bitmask.  In practice,
+ * assuming ever other primitive has a different bitmask still gets us conservatively
+ * large primitive stream sizes.  (Ie. 10x what is needed, vs. 20x)
+ *
+ * https://github.com/freedreno/freedreno/wiki/Visibility-Stream-Format#primitive-streams
+ */
+static unsigned
+primitive_stream_size_bits(const struct pipe_draw_info *info, unsigned num_bins)
+{
+       unsigned num_prims = prim_count(info);
+       unsigned nbits =
+                       (bitfield_size_bits(num_bins)   /* bitfield of bins covered */
+                       + number_size_bits(1)           /* number of primitives with this bitset */
+                       + 1                             /* checksum */
+                       ) * DIV_ROUND_UP(num_prims, 2);
+       return align(nbits, dword);
+}
+
+/**
+ * Each draw stream packet contains the following:
+ *
+ *  - Bin bitfield
+ *  - Last instance bit
+ *  - If bitfield is empty, the number of draws it is empty for, otherwise
+ *    the size of the corresponding primitive stream in DWORD's.
+ *  - Checksum
+ *
+ * https://github.com/freedreno/freedreno/wiki/Visibility-Stream-Format#draw-streams
+ */
+static unsigned
+draw_stream_size_bits(const struct pipe_draw_info *info, unsigned num_bins,
+               unsigned prim_strm_bits)
+{
+       unsigned ndwords = prim_strm_bits / dword;
+       assert(info->instance_count > 0);
+       return (bitfield_size_bits(num_bins)    /* bitfield of bins */
+                       + 1                             /* last-instance-bit */
+                       + number_size_bits(ndwords)     /* size of corresponding prim strm */
+                       + 1                             /* checksum */
+                       ) * info->instance_count;
+}
+
+void
+fd6_vsc_update_sizes(struct fd_batch *batch, const struct pipe_draw_info *info)
+{
+       if (!batch->num_bins_per_pipe) {
+               batch->num_bins_per_pipe = fd_gmem_estimate_bins_per_pipe(batch);
+
+               /* This is a convenient spot to add the size of the final draw-
+                * stream packet:
+                *
+                * If there are N bins, the final packet, after all the draws are
+                * done, consists of a 1 followed by N + 17 0's, plus a final 1.
+                * This uses the otherwise-unused pattern of a non-empty bitfield
+                * (initial 1) that is nontheless empty (has all 0's)
+                */
+               unsigned final_pkt_sz = 1 + batch->num_bins_per_pipe + 17 + 1;
+               batch->prim_strm_bits = align(final_pkt_sz, dword);
+       }
+
+       unsigned prim_strm_bits =
+               primitive_stream_size_bits(info, batch->num_bins_per_pipe);
+       unsigned draw_strm_bits =
+               draw_stream_size_bits(info, batch->num_bins_per_pipe, prim_strm_bits);
+
+#if 0
+       printf("vsc: prim_strm_bits=%d, draw_strm_bits=%d, nb=%u, ic=%u, c=%u, pc=%u (%s)\n",
+                       prim_strm_bits, draw_strm_bits, batch->num_bins_per_pipe,
+                       info->instance_count, info->count,
+                       (info->count * info->instance_count) /
+                       u_vertices_per_prim(info->mode),
+                       u_prim_name(info->mode));
+#endif
+
+       batch->prim_strm_bits += prim_strm_bits;
+       batch->draw_strm_bits += draw_strm_bits;
+}
+
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_vsc.h b/src/gallium/drivers/freedreno/a6xx/fd6_vsc.h
new file mode 100644 (file)
index 0000000..50470f6
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+ * Copyright © 2020 Google, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef FD6_VSC_H_
+#define FD6_VSC_H_
+
+void fd6_vsc_update_sizes(struct fd_batch *batch, const struct pipe_draw_info *info);
+
+#endif /* FD6_VSC_H_ */
index 106959b4fed9d1bfb952c94cd1cce982ebb2996e..082329eb59c50171a3f45c310a5628b64bf29186 100644 (file)
@@ -84,6 +84,9 @@ batch_init(struct fd_batch *batch)
        batch->gmem_reason = 0;
        batch->num_draws = 0;
        batch->num_vertices = 0;
+       batch->num_bins_per_pipe = 0;
+       batch->prim_strm_bits = 0;
+       batch->draw_strm_bits = 0;
        batch->stage = FD_STAGE_NULL;
 
        fd_reset_wfi(batch);
index 9266790bb80cf4d4180d5c186e5fe2ca3b5b2479..479d78d5eca9ba7967397e8b4e3dad3b75d2a503 100644 (file)
@@ -129,9 +129,16 @@ struct fd_batch {
         */
        const struct fd_gmem_stateobj *gmem_state;
 
-       unsigned num_draws;   /* number of draws in current batch */
+       unsigned num_draws;      /* number of draws in current batch */
        unsigned num_vertices;   /* number of vertices in current batch */
 
+       /* Currently only used on a6xx, to calculate vsc prim/draw stream
+        * sizes:
+        */
+       unsigned num_bins_per_pipe;
+       unsigned prim_strm_bits;
+       unsigned draw_strm_bits;
+
        /* Track the maximal bounds of the scissor of all the draws within a
         * batch.  Used at the tile rendering step (fd_gmem_render_tiles(),
         * mem2gmem/gmem2mem) to avoid needlessly moving data in/out of gmem.
index a5c0935e07ecc0973489c543aaf61b8977896c85..919baf070de10ad0e6255666d2880b305806fec1 100644 (file)
@@ -205,6 +205,8 @@ files_libfreedreno = files(
   'a6xx/fd6_screen.h',
   'a6xx/fd6_texture.c',
   'a6xx/fd6_texture.h',
+  'a6xx/fd6_vsc.c',
+  'a6xx/fd6_vsc.h',
   'a6xx/fd6_zsa.c',
   'a6xx/fd6_zsa.h',
   'ir3/ir3_cache.c',