2 * Copyright © 2019 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "sparse_array.h"
25 #include "os_memory.h"
27 /* Aligning our allocations to 64 has two advantages:
29 * 1. On x86 platforms, it means that they are cache-line aligned so we
30 * reduce the likelihood that one of our allocations shares a cache line
31 * with some other allocation.
33 * 2. It lets us use the bottom 6 bits of the pointer to store the tree level
34 * of the node so we can avoid some pointer indirections.
36 #define NODE_ALLOC_ALIGN 64
39 util_sparse_array_init(struct util_sparse_array
*arr
,
40 size_t elem_size
, size_t node_size
)
42 memset(arr
, 0, sizeof(*arr
));
43 arr
->elem_size
= elem_size
;
44 arr
->node_size_log2
= util_logbase2_64(node_size
);
45 assert(node_size
>= 2 && node_size
== (1ull << arr
->node_size_log2
));
48 #define NODE_PTR_MASK (~((uintptr_t)NODE_ALLOC_ALIGN - 1))
49 #define NODE_LEVEL_MASK ((uintptr_t)NODE_ALLOC_ALIGN - 1)
52 static inline uintptr_t
53 _util_sparse_array_node(void *data
, unsigned level
)
56 assert(((uintptr_t)data
& NODE_LEVEL_MASK
) == 0);
57 assert((level
& NODE_PTR_MASK
) == 0);
58 return (uintptr_t)data
| level
;
62 _util_sparse_array_node_data(uintptr_t handle
)
64 return (void *)(handle
& NODE_PTR_MASK
);
67 static inline unsigned
68 _util_sparse_array_node_level(uintptr_t handle
)
70 return handle
& NODE_LEVEL_MASK
;
74 _util_sparse_array_node_finish(struct util_sparse_array
*arr
,
77 if (_util_sparse_array_node_level(node
) > 0) {
78 uintptr_t *children
= _util_sparse_array_node_data(node
);
79 size_t node_size
= 1ull << arr
->node_size_log2
;
80 for (size_t i
= 0; i
< node_size
; i
++) {
82 _util_sparse_array_node_finish(arr
, children
[i
]);
86 os_free_aligned(_util_sparse_array_node_data(node
));
90 util_sparse_array_finish(struct util_sparse_array
*arr
)
93 _util_sparse_array_node_finish(arr
, arr
->root
);
96 static inline uintptr_t
97 _util_sparse_array_node_alloc(struct util_sparse_array
*arr
,
102 size
= arr
->elem_size
<< arr
->node_size_log2
;
104 size
= sizeof(uintptr_t) << arr
->node_size_log2
;
107 void *data
= os_malloc_aligned(size
, NODE_ALLOC_ALIGN
);
108 memset(data
, 0, size
);
110 return _util_sparse_array_node(data
, level
);
113 static inline uintptr_t
114 _util_sparse_array_set_or_free_node(uintptr_t *node_ptr
,
118 uintptr_t prev_node
= p_atomic_cmpxchg(node_ptr
, cmp_node
, node
);
120 if (prev_node
!= cmp_node
) {
121 /* We lost the race. Free this one and return the one that was already
124 os_free_aligned(_util_sparse_array_node_data(node
));
132 util_sparse_array_get(struct util_sparse_array
*arr
, uint64_t idx
)
134 const unsigned node_size_log2
= arr
->node_size_log2
;
135 uintptr_t root
= p_atomic_read(&arr
->root
);
136 if (unlikely(!root
)) {
137 unsigned root_level
= 0;
138 uint64_t idx_iter
= idx
>> node_size_log2
;
140 idx_iter
>>= node_size_log2
;
143 uintptr_t new_root
= _util_sparse_array_node_alloc(arr
, root_level
);
144 root
= _util_sparse_array_set_or_free_node(&arr
->root
,
145 NULL_NODE
, new_root
);
149 unsigned root_level
= _util_sparse_array_node_level(root
);
150 uint64_t root_idx
= idx
>> (root_level
* node_size_log2
);
151 if (likely(root_idx
< (1ull << node_size_log2
)))
154 /* In this case, we have a root but its level is low enough that the
155 * requested index is out-of-bounds.
157 uintptr_t new_root
= _util_sparse_array_node_alloc(arr
, root_level
+ 1);
159 uintptr_t *new_root_children
= _util_sparse_array_node_data(new_root
);
160 new_root_children
[0] = root
;
162 /* We only add one at a time instead of the whole tree because it's
163 * easier to ensure correctness of both the tree building and the
164 * clean-up path. Because we're only adding one node we never have to
165 * worry about trying to free multiple things without freeing the old
168 root
= _util_sparse_array_set_or_free_node(&arr
->root
, root
, new_root
);
171 void *node_data
= _util_sparse_array_node_data(root
);
172 unsigned node_level
= _util_sparse_array_node_level(root
);
173 while (node_level
> 0) {
174 uint64_t child_idx
= (idx
>> (node_level
* node_size_log2
)) &
175 ((1ull << node_size_log2
) - 1);
177 uintptr_t *children
= node_data
;
178 uintptr_t child
= p_atomic_read(&children
[child_idx
]);
180 if (unlikely(!child
)) {
181 child
= _util_sparse_array_node_alloc(arr
, node_level
- 1);
182 child
= _util_sparse_array_set_or_free_node(&children
[child_idx
],
186 node_data
= _util_sparse_array_node_data(child
);
187 node_level
= _util_sparse_array_node_level(child
);
190 uint64_t elem_idx
= idx
& ((1ull << node_size_log2
) - 1);
191 return (void *)((char *)node_data
+ (elem_idx
* arr
->elem_size
));
195 validate_node_level(struct util_sparse_array
*arr
,
196 uintptr_t node
, unsigned level
)
198 assert(_util_sparse_array_node_level(node
) == level
);
200 if (_util_sparse_array_node_level(node
) > 0) {
201 uintptr_t *children
= _util_sparse_array_node_data(node
);
202 size_t node_size
= 1ull << arr
->node_size_log2
;
203 for (size_t i
= 0; i
< node_size
; i
++) {
205 validate_node_level(arr
, children
[i
], level
- 1);
211 util_sparse_array_validate(struct util_sparse_array
*arr
)
213 uintptr_t root
= p_atomic_read(&arr
->root
);
214 validate_node_level(arr
, root
, _util_sparse_array_node_level(root
));
218 util_sparse_array_free_list_init(struct util_sparse_array_free_list
*fl
,
219 struct util_sparse_array
*arr
,
221 uint32_t next_offset
)
225 fl
->sentinel
= sentinel
;
226 fl
->next_offset
= next_offset
;
230 free_list_head(uint64_t old
, uint32_t next
)
232 return ((old
& 0xffffffff00000000ull
) + 0x100000000ull
) | next
;
236 util_sparse_array_free_list_push(struct util_sparse_array_free_list
*fl
,
237 uint32_t *items
, unsigned num_items
)
239 assert(num_items
> 0);
240 assert(items
[0] != fl
->sentinel
);
241 void *last_elem
= util_sparse_array_get(fl
->arr
, items
[0]);
242 uint32_t *last_next
= (uint32_t *)((char *)last_elem
+ fl
->next_offset
);
243 for (unsigned i
= 1; i
< num_items
; i
++) {
244 p_atomic_set(last_next
, items
[i
]);
245 assert(items
[i
] != fl
->sentinel
);
246 last_elem
= util_sparse_array_get(fl
->arr
, items
[i
]);
247 last_next
= (uint32_t *)((char *)last_elem
+ fl
->next_offset
);
250 uint64_t current_head
, old_head
;
251 old_head
= p_atomic_read(&fl
->head
);
253 current_head
= old_head
;
254 p_atomic_set(last_next
, (uint32_t)current_head
); /* Index is the bottom 32 bits */
255 uint64_t new_head
= free_list_head(current_head
, items
[0]);
256 old_head
= p_atomic_cmpxchg(&fl
->head
, current_head
, new_head
);
257 } while (old_head
!= current_head
);
261 util_sparse_array_free_list_pop_idx(struct util_sparse_array_free_list
*fl
)
263 uint64_t current_head
;
265 current_head
= p_atomic_read(&fl
->head
);
267 if ((uint32_t)current_head
== fl
->sentinel
)
270 uint32_t head_idx
= current_head
; /* Index is the bottom 32 bits */
271 void *head_elem
= util_sparse_array_get(fl
->arr
, head_idx
);
272 uint32_t *head_next
= (uint32_t *)((char *)head_elem
+ fl
->next_offset
);
273 uint64_t new_head
= free_list_head(current_head
, p_atomic_read(head_next
));
274 uint64_t old_head
= p_atomic_cmpxchg(&fl
->head
, current_head
, new_head
);
275 if (old_head
== current_head
)
277 current_head
= old_head
;
282 util_sparse_array_free_list_pop_elem(struct util_sparse_array_free_list
*fl
)
284 uint64_t current_head
;
286 current_head
= p_atomic_read(&fl
->head
);
288 if ((uint32_t)current_head
== fl
->sentinel
)
291 uint32_t head_idx
= current_head
; /* Index is the bottom 32 bits */
292 void *head_elem
= util_sparse_array_get(fl
->arr
, head_idx
);
293 uint32_t *head_next
= (uint32_t *)((char *)head_elem
+ fl
->next_offset
);
294 uint64_t new_head
= free_list_head(current_head
, p_atomic_read(head_next
));
295 uint64_t old_head
= p_atomic_cmpxchg(&fl
->head
, current_head
, new_head
);
296 if (old_head
== current_head
)
298 current_head
= old_head
;