From a354c389f524c2aa0fa64ac8b7e3d93c2cea4b81 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Nils=20Wallm=C3=A9nius?= Date: Fri, 22 Jul 2016 13:10:03 +0200 Subject: [PATCH] main: memcpy larger chunks in _mesa_propagate_uniforms_to_driver_storage MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit When possible, do the memcpy on larger blocks. This reduces cycles spent in _mesa_propagate_uniforms_to_driver_storage from 1.51 % to 0.62% according to perf during the Unigine Heaven benchmark. It did not affect the framerate of the benchmark. The system used for testing was an i5 6600K with a Radeon R9 380. Piglit hangs randomly on this system both with and without the patch so i could not make a comparison. v2: fixed whitespace Signed-off-by: Nils Wallménius Reviewed-by: Nicolai Hähnle --- src/mesa/main/uniform_query.cpp | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp index ab22a0ed86a..b9b9ff23ffa 100644 --- a/src/mesa/main/uniform_query.cpp +++ b/src/mesa/main/uniform_query.cpp @@ -578,14 +578,31 @@ _mesa_propagate_uniforms_to_driver_storage(struct gl_uniform_storage *uni, unsigned j; unsigned v; - for (j = 0; j < count; j++) { - for (v = 0; v < vectors; v++) { - memcpy(dst, src, src_vector_byte_stride); - src += src_vector_byte_stride; - dst += store->vector_stride; + if (src_vector_byte_stride == store->vector_stride) { + if (extra_stride) { + for (j = 0; j < count; j++) { + memcpy(dst, src, src_vector_byte_stride * vectors); + src += src_vector_byte_stride * vectors; + dst += store->vector_stride * vectors; + + dst += extra_stride; + } + } else { + /* Unigine Heaven benchmark gets here */ + memcpy(dst, src, src_vector_byte_stride * vectors * count); + src += src_vector_byte_stride * vectors * count; + dst += store->vector_stride * vectors * count; } + } else { + for (j = 0; j < count; j++) { + for (v = 0; v < vectors; v++) { + memcpy(dst, src, src_vector_byte_stride); + src += src_vector_byte_stride; + dst += store->vector_stride; + } - dst += extra_stride; + dst += extra_stride; + } } break; } -- 2.30.2