From 3b1ce49bc1e5aff87805b0bab255885c84bf5052 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 31 May 2017 01:32:01 +0200 Subject: [PATCH] gallium/u_threaded: align batches and call slots to 16 bytes not sure if this helps Reviewed-by: Samuel Pitoiset --- src/gallium/auxiliary/util/u_threaded_context.c | 11 +++++++++-- src/gallium/auxiliary/util/u_threaded_context.h | 9 ++++++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c index 8ea7f8aa260..34206bfbf40 100644 --- a/src/gallium/auxiliary/util/u_threaded_context.c +++ b/src/gallium/auxiliary/util/u_threaded_context.c @@ -2125,7 +2125,7 @@ tc_destroy(struct pipe_context *_pipe) slab_destroy_child(&tc->pool_transfers); pipe->destroy(pipe); - FREE(tc); + os_free_aligned(tc); } static const tc_execute execute_func[TC_NUM_CALLS] = { @@ -2165,11 +2165,18 @@ threaded_context_create(struct pipe_context *pipe, if (!debug_get_bool_option("GALLIUM_THREAD", util_cpu_caps.nr_cpus > 1)) return pipe; - tc = CALLOC_STRUCT(threaded_context); + tc = os_malloc_aligned(sizeof(struct threaded_context), 16); if (!tc) { pipe->destroy(pipe); return NULL; } + memset(tc, 0, sizeof(*tc)); + + assert((uintptr_t)tc % 16 == 0); + STATIC_ASSERT(offsetof(struct threaded_context, batch_slots[0]) % 16 == 0); + STATIC_ASSERT(offsetof(struct threaded_context, batch_slots[0].call[0]) % 16 == 0); + STATIC_ASSERT(offsetof(struct threaded_context, batch_slots[0].call[1]) % 16 == 0); + STATIC_ASSERT(offsetof(struct threaded_context, batch_slots[1].call[0]) % 16 == 0); /* The driver context isn't wrapped, so set its "priv" to NULL. */ pipe->priv = NULL; diff --git a/src/gallium/auxiliary/util/u_threaded_context.h b/src/gallium/auxiliary/util/u_threaded_context.h index f13923050a0..5d2a10cb125 100644 --- a/src/gallium/auxiliary/util/u_threaded_context.h +++ b/src/gallium/auxiliary/util/u_threaded_context.h @@ -273,7 +273,14 @@ union tc_payload { uint64_t __use_8_bytes; }; -struct tc_call { +#ifdef _MSC_VER +#define ALIGN16 __declspec(align(16)) +#else +#define ALIGN16 __attribute__((aligned(16))) +#endif + +/* Each call slot should be aligned to its own size for optimal cache usage. */ +struct ALIGN16 tc_call { unsigned sentinel; ushort num_call_slots; ushort call_id; -- 2.30.2