From cdc331c6f9f6b2ffc035018de4445dba9b67c1f7 Mon Sep 17 00:00:00 2001 From: Rafael Antognolli Date: Mon, 30 Sep 2019 14:08:11 -0700 Subject: [PATCH] anv/block_pool: Align anv_block_pool state to 64 bits. On 64 bits platforms, some atomic operations like __sync_fetch_and_add() have constant time, but on 32 bits platforms they are implemented with a loop and might take much longer. Additionally, it seems like if their operands are not aligned to 64 bits, they also require extra memory accesses. From the Intel Architecture's Developer Manual Vol. 1, 4.1.1: "A word or doubleword operand that crosses a 4-byte boundary or a quadword operand that crosses an 8-byte boundary is considered unaligned and requires two separate memory bus cycles for access." Forcing the u64 field to be aligned to 64 bits seems to make the unit tests that are stressing this finish much faster. Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_private.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 754f69b6c62..ab35f127970 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -632,7 +632,10 @@ union anv_free_list { /* A simple count that is incremented every time the head changes. */ uint32_t count; }; - uint64_t u64; + /* Make sure it's aligned to 64 bits. This will make atomic operations + * faster on 32 bit platforms. + */ + uint64_t u64 __attribute__ ((aligned (8))); }; #define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { UINT32_MAX, 0 } }) @@ -643,7 +646,10 @@ struct anv_block_state { uint32_t next; uint32_t end; }; - uint64_t u64; + /* Make sure it's aligned to 64 bits. This will make atomic operations + * faster on 32 bit platforms. + */ + uint64_t u64 __attribute__ ((aligned (8))); }; }; -- 2.30.2