From 6f2e81ce4cc4a03d22e2670191651bfa9bdb0d0c Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Mon, 4 Nov 2013 12:02:24 -0800 Subject: [PATCH] mesa: Add a streaming load memcpy implementation. Uses SSE 4.1's MOVNTDQA instruction (streaming load) to read from uncached memory without polluting the cache. Reviewed-by: Chad Versace --- src/mesa/Makefile.am | 10 +++- src/mesa/main/streaming-load-memcpy.c | 85 +++++++++++++++++++++++++++ src/mesa/main/streaming-load-memcpy.h | 33 +++++++++++ 3 files changed, 127 insertions(+), 1 deletion(-) create mode 100644 src/mesa/main/streaming-load-memcpy.c create mode 100644 src/mesa/main/streaming-load-memcpy.h diff --git a/src/mesa/Makefile.am b/src/mesa/Makefile.am index 4387415facf..a60600e0364 100644 --- a/src/mesa/Makefile.am +++ b/src/mesa/Makefile.am @@ -81,7 +81,7 @@ main/get_hash.h: $(GLAPI)/gl_and_es_API.xml main/get_hash_params.py \ -f $< > $@.tmp; \ mv $@.tmp $@; -noinst_LTLIBRARIES = +noinst_LTLIBRARIES = $(ARCH_LIBS) if NEED_LIBMESA noinst_LTLIBRARIES += libmesa.la else @@ -103,6 +103,8 @@ noinst_PROGRAMS = gen_matypes gen_matypes_SOURCES = x86/gen_matypes.c BUILT_SOURCES += matypes.h +ARCH_LIBS = libmesa_sse41.la + if HAVE_X86_64_ASM MESA_ASM_FILES_FOR_ARCH += $(X86_64_FILES) AM_CPPFLAGS += -I$(builddir)/x86-64 -I$(srcdir)/x86-64 @@ -123,6 +125,7 @@ libmesa_la_SOURCES = \ libmesa_la_LIBADD = \ $(top_builddir)/src/glsl/libglsl.la \ + $(ARCH_LIBS) \ $() libmesagallium_la_SOURCES = \ @@ -132,8 +135,13 @@ libmesagallium_la_SOURCES = \ libmesagallium_la_LIBADD = \ $(top_builddir)/src/glsl/libglsl.la \ + $(ARCH_LIBS) \ $() +libmesa_sse41_la_SOURCES = \ + main/streaming-load-memcpy.c +libmesa_sse41_la_CFLAGS = $(AM_CFLAGS) -msse4.1 + pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = gl.pc diff --git a/src/mesa/main/streaming-load-memcpy.c b/src/mesa/main/streaming-load-memcpy.c new file mode 100644 index 00000000000..d7147afdc5c --- /dev/null +++ b/src/mesa/main/streaming-load-memcpy.c @@ -0,0 +1,85 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * Matt Turner + * + */ + +#include "main/macros.h" +#include "main/streaming-load-memcpy.h" +#include + +/* Copies memory from src to dst, using SSE 4.1's MOVNTDQA to get streaming + * read performance from uncached memory. + */ +void +_mesa_streaming_load_memcpy(void *restrict dst, void *restrict src, size_t len) +{ + char *restrict d = dst; + char *restrict s = src; + + /* If dst and src are not co-aligned, fallback to memcpy(). */ + if (((uintptr_t)d & 15) != ((uintptr_t)s & 15)) { + memcpy(d, s, len); + return; + } + + /* memcpy() the misaligned header. At the end of this if block, and + * are aligned to a 16-byte boundary or == 0. + */ + if ((uintptr_t)d & 15) { + uintptr_t bytes_before_alignment_boundary = 16 - ((uintptr_t)d & 15); + assert(bytes_before_alignment_boundary < 16); + + memcpy(d, s, MIN2(bytes_before_alignment_boundary, len)); + + d = (char *)ALIGN((uintptr_t)d, 16); + s = (char *)ALIGN((uintptr_t)s, 16); + len -= MIN2(bytes_before_alignment_boundary, len); + } + + while (len >= 64) { + __m128i *dst_cacheline = (__m128i *)d; + __m128i *src_cacheline = (__m128i *)s; + + __m128i temp1 = _mm_stream_load_si128(src_cacheline + 0); + __m128i temp2 = _mm_stream_load_si128(src_cacheline + 1); + __m128i temp3 = _mm_stream_load_si128(src_cacheline + 2); + __m128i temp4 = _mm_stream_load_si128(src_cacheline + 3); + + _mm_store_si128(dst_cacheline + 0, temp1); + _mm_store_si128(dst_cacheline + 1, temp2); + _mm_store_si128(dst_cacheline + 2, temp3); + _mm_store_si128(dst_cacheline + 3, temp4); + + d += 64; + s += 64; + len -= 64; + } + + /* memcpy() the tail. */ + if (len) { + memcpy(d, s, len); + } +} diff --git a/src/mesa/main/streaming-load-memcpy.h b/src/mesa/main/streaming-load-memcpy.h new file mode 100644 index 00000000000..41eeeeca0f1 --- /dev/null +++ b/src/mesa/main/streaming-load-memcpy.h @@ -0,0 +1,33 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * Matt Turner + * + */ + +/* Copies memory from src to dst, using SSE 4.1's MOVNTDQA to get streaming + * read performance from uncached memory. + */ +void +_mesa_streaming_load_memcpy(void *restrict dst, void *restrict src, size_t len); -- 2.30.2