util/sparse_free_list: manipulate node pointers using atomic primitives
[mesa.git] / src / util / sparse_array.c
1 /*
2 * Copyright © 2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "sparse_array.h"
25 #include "os_memory.h"
26
27 /* Aligning our allocations to 64 has two advantages:
28 *
29 * 1. On x86 platforms, it means that they are cache-line aligned so we
30 * reduce the likelihood that one of our allocations shares a cache line
31 * with some other allocation.
32 *
33 * 2. It lets us use the bottom 6 bits of the pointer to store the tree level
34 * of the node so we can avoid some pointer indirections.
35 */
36 #define NODE_ALLOC_ALIGN 64
37
38 void
39 util_sparse_array_init(struct util_sparse_array *arr,
40 size_t elem_size, size_t node_size)
41 {
42 memset(arr, 0, sizeof(*arr));
43 arr->elem_size = elem_size;
44 arr->node_size_log2 = util_logbase2_64(node_size);
45 assert(node_size >= 2 && node_size == (1ull << arr->node_size_log2));
46 }
47
48 #define NODE_PTR_MASK (~((uintptr_t)NODE_ALLOC_ALIGN - 1))
49 #define NODE_LEVEL_MASK ((uintptr_t)NODE_ALLOC_ALIGN - 1)
50 #define NULL_NODE 0
51
52 static inline uintptr_t
53 _util_sparse_array_node(void *data, unsigned level)
54 {
55 assert(data != NULL);
56 assert(((uintptr_t)data & NODE_LEVEL_MASK) == 0);
57 assert((level & NODE_PTR_MASK) == 0);
58 return (uintptr_t)data | level;
59 }
60
61 static inline void *
62 _util_sparse_array_node_data(uintptr_t handle)
63 {
64 return (void *)(handle & NODE_PTR_MASK);
65 }
66
67 static inline unsigned
68 _util_sparse_array_node_level(uintptr_t handle)
69 {
70 return handle & NODE_LEVEL_MASK;
71 }
72
73 static inline void
74 _util_sparse_array_node_finish(struct util_sparse_array *arr,
75 uintptr_t node)
76 {
77 if (_util_sparse_array_node_level(node) > 0) {
78 uintptr_t *children = _util_sparse_array_node_data(node);
79 size_t node_size = 1ull << arr->node_size_log2;
80 for (size_t i = 0; i < node_size; i++) {
81 if (children[i])
82 _util_sparse_array_node_finish(arr, children[i]);
83 }
84 }
85
86 os_free_aligned(_util_sparse_array_node_data(node));
87 }
88
89 void
90 util_sparse_array_finish(struct util_sparse_array *arr)
91 {
92 if (arr->root)
93 _util_sparse_array_node_finish(arr, arr->root);
94 }
95
96 static inline uintptr_t
97 _util_sparse_array_node_alloc(struct util_sparse_array *arr,
98 unsigned level)
99 {
100 size_t size;
101 if (level == 0) {
102 size = arr->elem_size << arr->node_size_log2;
103 } else {
104 size = sizeof(uintptr_t) << arr->node_size_log2;
105 }
106
107 void *data = os_malloc_aligned(size, NODE_ALLOC_ALIGN);
108 memset(data, 0, size);
109
110 return _util_sparse_array_node(data, level);
111 }
112
113 static inline uintptr_t
114 _util_sparse_array_set_or_free_node(uintptr_t *node_ptr,
115 uintptr_t cmp_node,
116 uintptr_t node)
117 {
118 uintptr_t prev_node = p_atomic_cmpxchg(node_ptr, cmp_node, node);
119
120 if (prev_node != cmp_node) {
121 /* We lost the race. Free this one and return the one that was already
122 * allocated.
123 */
124 os_free_aligned(_util_sparse_array_node_data(node));
125 return prev_node;
126 } else {
127 return node;
128 }
129 }
130
131 void *
132 util_sparse_array_get(struct util_sparse_array *arr, uint64_t idx)
133 {
134 const unsigned node_size_log2 = arr->node_size_log2;
135 uintptr_t root = p_atomic_read(&arr->root);
136 if (unlikely(!root)) {
137 unsigned root_level = 0;
138 uint64_t idx_iter = idx >> node_size_log2;
139 while (idx_iter) {
140 idx_iter >>= node_size_log2;
141 root_level++;
142 }
143 uintptr_t new_root = _util_sparse_array_node_alloc(arr, root_level);
144 root = _util_sparse_array_set_or_free_node(&arr->root,
145 NULL_NODE, new_root);
146 }
147
148 while (1) {
149 unsigned root_level = _util_sparse_array_node_level(root);
150 uint64_t root_idx = idx >> (root_level * node_size_log2);
151 if (likely(root_idx < (1ull << node_size_log2)))
152 break;
153
154 /* In this case, we have a root but its level is low enough that the
155 * requested index is out-of-bounds.
156 */
157 uintptr_t new_root = _util_sparse_array_node_alloc(arr, root_level + 1);
158
159 uintptr_t *new_root_children = _util_sparse_array_node_data(new_root);
160 new_root_children[0] = root;
161
162 /* We only add one at a time instead of the whole tree because it's
163 * easier to ensure correctness of both the tree building and the
164 * clean-up path. Because we're only adding one node we never have to
165 * worry about trying to free multiple things without freeing the old
166 * things.
167 */
168 root = _util_sparse_array_set_or_free_node(&arr->root, root, new_root);
169 }
170
171 void *node_data = _util_sparse_array_node_data(root);
172 unsigned node_level = _util_sparse_array_node_level(root);
173 while (node_level > 0) {
174 uint64_t child_idx = (idx >> (node_level * node_size_log2)) &
175 ((1ull << node_size_log2) - 1);
176
177 uintptr_t *children = node_data;
178 uintptr_t child = p_atomic_read(&children[child_idx]);
179
180 if (unlikely(!child)) {
181 child = _util_sparse_array_node_alloc(arr, node_level - 1);
182 child = _util_sparse_array_set_or_free_node(&children[child_idx],
183 NULL_NODE, child);
184 }
185
186 node_data = _util_sparse_array_node_data(child);
187 node_level = _util_sparse_array_node_level(child);
188 }
189
190 uint64_t elem_idx = idx & ((1ull << node_size_log2) - 1);
191 return (void *)((char *)node_data + (elem_idx * arr->elem_size));
192 }
193
194 static void
195 validate_node_level(struct util_sparse_array *arr,
196 uintptr_t node, unsigned level)
197 {
198 assert(_util_sparse_array_node_level(node) == level);
199
200 if (_util_sparse_array_node_level(node) > 0) {
201 uintptr_t *children = _util_sparse_array_node_data(node);
202 size_t node_size = 1ull << arr->node_size_log2;
203 for (size_t i = 0; i < node_size; i++) {
204 if (children[i])
205 validate_node_level(arr, children[i], level - 1);
206 }
207 }
208 }
209
210 void
211 util_sparse_array_validate(struct util_sparse_array *arr)
212 {
213 uintptr_t root = p_atomic_read(&arr->root);
214 validate_node_level(arr, root, _util_sparse_array_node_level(root));
215 }
216
217 void
218 util_sparse_array_free_list_init(struct util_sparse_array_free_list *fl,
219 struct util_sparse_array *arr,
220 uint32_t sentinel,
221 uint32_t next_offset)
222 {
223 fl->head = sentinel;
224 fl->arr = arr;
225 fl->sentinel = sentinel;
226 fl->next_offset = next_offset;
227 }
228
229 static uint64_t
230 free_list_head(uint64_t old, uint32_t next)
231 {
232 return ((old & 0xffffffff00000000ull) + 0x100000000ull) | next;
233 }
234
235 void
236 util_sparse_array_free_list_push(struct util_sparse_array_free_list *fl,
237 uint32_t *items, unsigned num_items)
238 {
239 assert(num_items > 0);
240 assert(items[0] != fl->sentinel);
241 void *last_elem = util_sparse_array_get(fl->arr, items[0]);
242 uint32_t *last_next = (uint32_t *)((char *)last_elem + fl->next_offset);
243 for (unsigned i = 1; i < num_items; i++) {
244 p_atomic_set(last_next, items[i]);
245 assert(items[i] != fl->sentinel);
246 last_elem = util_sparse_array_get(fl->arr, items[i]);
247 last_next = (uint32_t *)((char *)last_elem + fl->next_offset);
248 }
249
250 uint64_t current_head, old_head;
251 old_head = p_atomic_read(&fl->head);
252 do {
253 current_head = old_head;
254 p_atomic_set(last_next, (uint32_t)current_head); /* Index is the bottom 32 bits */
255 uint64_t new_head = free_list_head(current_head, items[0]);
256 old_head = p_atomic_cmpxchg(&fl->head, current_head, new_head);
257 } while (old_head != current_head);
258 }
259
260 uint32_t
261 util_sparse_array_free_list_pop_idx(struct util_sparse_array_free_list *fl)
262 {
263 uint64_t current_head;
264
265 current_head = p_atomic_read(&fl->head);
266 while (1) {
267 if ((uint32_t)current_head == fl->sentinel)
268 return fl->sentinel;
269
270 uint32_t head_idx = current_head; /* Index is the bottom 32 bits */
271 void *head_elem = util_sparse_array_get(fl->arr, head_idx);
272 uint32_t *head_next = (uint32_t *)((char *)head_elem + fl->next_offset);
273 uint64_t new_head = free_list_head(current_head, p_atomic_read(head_next));
274 uint64_t old_head = p_atomic_cmpxchg(&fl->head, current_head, new_head);
275 if (old_head == current_head)
276 return head_idx;
277 current_head = old_head;
278 }
279 }
280
281 void *
282 util_sparse_array_free_list_pop_elem(struct util_sparse_array_free_list *fl)
283 {
284 uint64_t current_head;
285
286 current_head = p_atomic_read(&fl->head);
287 while (1) {
288 if ((uint32_t)current_head == fl->sentinel)
289 return NULL;
290
291 uint32_t head_idx = current_head; /* Index is the bottom 32 bits */
292 void *head_elem = util_sparse_array_get(fl->arr, head_idx);
293 uint32_t *head_next = (uint32_t *)((char *)head_elem + fl->next_offset);
294 uint64_t new_head = free_list_head(current_head, p_atomic_read(head_next));
295 uint64_t old_head = p_atomic_cmpxchg(&fl->head, current_head, new_head);
296 if (old_head == current_head)
297 return head_elem;
298 current_head = old_head;
299 }
300 }