From 2a2fd4c5308dee51d48630863255f1c6a04768a9 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 1 Apr 2020 08:44:08 +1000 Subject: [PATCH] gallium/llvmpipe: add an optimised 32-bit memset MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This might have other users beyond filling/clearing buffers, increase a fullscreen 4k gears from 68->74 fps on my Ryzen since gears is really just a clear benchmark, and this helps clearing. Reviewed-by: Marek Olšák Tested-by: Marge Bot Part-of: --- src/gallium/auxiliary/util/u_surface.c | 10 ++---- src/gallium/drivers/llvmpipe/lp_rast.c | 6 ++-- src/util/Makefile.sources | 1 + src/util/meson.build | 1 + src/util/u_memset.h | 47 ++++++++++++++++++++++++++ 5 files changed, 54 insertions(+), 11 deletions(-) create mode 100644 src/util/u_memset.h diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c index 847c0008079..527e6662f0b 100644 --- a/src/gallium/auxiliary/util/u_surface.c +++ b/src/gallium/auxiliary/util/u_surface.c @@ -41,7 +41,7 @@ #include "util/u_rect.h" #include "util/u_surface.h" #include "util/u_pack_color.h" - +#include "util/u_memset.h" /** * Initialize a pipe_surface object. 'view' is considered to have @@ -141,9 +141,7 @@ util_fill_rect(ubyte * dst, break; case 4: for (i = 0; i < height; i++) { - uint32_t *row = (uint32_t *)dst; - for (j = 0; j < width; j++) - *row++ = uc->ui[0]; + util_memset32(dst, uc->ui[0], width); dst += dst_stride; } break; @@ -492,9 +490,7 @@ util_clear_depth_stencil_texture(struct pipe_context *pipe, case 4: if (!need_rmw) { for (i = 0; i < height; i++) { - uint32_t *row = (uint32_t *)dst_map; - for (j = 0; j < width; j++) - *row++ = (uint32_t) zstencil; + util_memset32(dst_map, (uint32_t)zstencil, width); dst_map += dst_stride; } } diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index ef783ea6fb1..ad55ed7be79 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -33,7 +33,7 @@ #include "util/u_pack_color.h" #include "util/u_string.h" #include "util/u_thread.h" - +#include "util/u_memset.h" #include "util/os_time.h" #include "lp_scene_queue.h" @@ -236,9 +236,7 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, case 4: if (clear_mask == 0xffffffff) { for (i = 0; i < height; i++) { - uint32_t *row = (uint32_t *)dst; - for (j = 0; j < width; j++) - *row++ = clear_value; + util_memset32(dst, clear_value, width); dst += dst_stride; } } diff --git a/src/util/Makefile.sources b/src/util/Makefile.sources index 644839bd09a..0e8fbb7ee85 100644 --- a/src/util/Makefile.sources +++ b/src/util/Makefile.sources @@ -119,6 +119,7 @@ MESA_UTIL_FILES := \ os_memory_stdc.h \ os_memory.h \ u_memory.h \ + u_memset.h \ u_mm.h \ u_mm.c \ vma.c \ diff --git a/src/util/meson.build b/src/util/meson.build index 0a2b2e4284e..ff25a9ea851 100644 --- a/src/util/meson.build +++ b/src/util/meson.build @@ -112,6 +112,7 @@ files_mesa_util = files( 'u_vector.h', 'u_math.c', 'u_math.h', + 'u_memset.h', 'u_mm.c', 'u_mm.h', 'u_debug.c', diff --git a/src/util/u_memset.h b/src/util/u_memset.h new file mode 100644 index 00000000000..df8cf80b64a --- /dev/null +++ b/src/util/u_memset.h @@ -0,0 +1,47 @@ +/************************************************************************** + * + * Copyright 2020 Red Hat + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_config.h" + +static inline void * +util_memset32(void *s, uint32_t ui, size_t n) +{ +#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86_64) + long d0, d1; + __asm__ volatile("rep\n\t" + "stosl" + : "=&c" (d0), "=&D" (d1) + : "a" (ui), "1" (s), "0" (n) + : "memory"); + return s; +#else + uint32_t *xs = (uint32_t *)s; + while (n--) + *xs++ = ui; + return s; +#endif +} -- 2.30.2