u_queue: add a futex-based implementation of fences
[mesa.git] / src / util / u_queue.c
1 /*
2 * Copyright © 2016 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27 #include "u_queue.h"
28 #include "util/u_string.h"
29
30 static void util_queue_killall_and_wait(struct util_queue *queue);
31
32 /****************************************************************************
33 * Wait for all queues to assert idle when exit() is called.
34 *
35 * Otherwise, C++ static variable destructors can be called while threads
36 * are using the static variables.
37 */
38
39 static once_flag atexit_once_flag = ONCE_FLAG_INIT;
40 static struct list_head queue_list;
41 static mtx_t exit_mutex = _MTX_INITIALIZER_NP;
42
43 static void
44 atexit_handler(void)
45 {
46 struct util_queue *iter;
47
48 mtx_lock(&exit_mutex);
49 /* Wait for all queues to assert idle. */
50 LIST_FOR_EACH_ENTRY(iter, &queue_list, head) {
51 util_queue_killall_and_wait(iter);
52 }
53 mtx_unlock(&exit_mutex);
54 }
55
56 static void
57 global_init(void)
58 {
59 LIST_INITHEAD(&queue_list);
60 atexit(atexit_handler);
61 }
62
63 static void
64 add_to_atexit_list(struct util_queue *queue)
65 {
66 call_once(&atexit_once_flag, global_init);
67
68 mtx_lock(&exit_mutex);
69 LIST_ADD(&queue->head, &queue_list);
70 mtx_unlock(&exit_mutex);
71 }
72
73 static void
74 remove_from_atexit_list(struct util_queue *queue)
75 {
76 struct util_queue *iter, *tmp;
77
78 mtx_lock(&exit_mutex);
79 LIST_FOR_EACH_ENTRY_SAFE(iter, tmp, &queue_list, head) {
80 if (iter == queue) {
81 LIST_DEL(&iter->head);
82 break;
83 }
84 }
85 mtx_unlock(&exit_mutex);
86 }
87
88 /****************************************************************************
89 * util_queue_fence
90 */
91
92 #ifdef UTIL_QUEUE_FENCE_STANDARD
93 void
94 util_queue_fence_signal(struct util_queue_fence *fence)
95 {
96 mtx_lock(&fence->mutex);
97 fence->signalled = true;
98 cnd_broadcast(&fence->cond);
99 mtx_unlock(&fence->mutex);
100 }
101
102 void
103 util_queue_fence_wait(struct util_queue_fence *fence)
104 {
105 mtx_lock(&fence->mutex);
106 while (!fence->signalled)
107 cnd_wait(&fence->cond, &fence->mutex);
108 mtx_unlock(&fence->mutex);
109 }
110
111 void
112 util_queue_fence_init(struct util_queue_fence *fence)
113 {
114 memset(fence, 0, sizeof(*fence));
115 (void) mtx_init(&fence->mutex, mtx_plain);
116 cnd_init(&fence->cond);
117 fence->signalled = true;
118 }
119
120 void
121 util_queue_fence_destroy(struct util_queue_fence *fence)
122 {
123 assert(fence->signalled);
124
125 /* Ensure that another thread is not in the middle of
126 * util_queue_fence_signal (having set the fence to signalled but still
127 * holding the fence mutex).
128 *
129 * A common contract between threads is that as soon as a fence is signalled
130 * by thread A, thread B is allowed to destroy it. Since
131 * util_queue_fence_is_signalled does not lock the fence mutex (for
132 * performance reasons), we must do so here.
133 */
134 mtx_lock(&fence->mutex);
135 mtx_unlock(&fence->mutex);
136
137 cnd_destroy(&fence->cond);
138 mtx_destroy(&fence->mutex);
139 }
140 #endif
141
142 /****************************************************************************
143 * util_queue implementation
144 */
145
146 struct thread_input {
147 struct util_queue *queue;
148 int thread_index;
149 };
150
151 static int
152 util_queue_thread_func(void *input)
153 {
154 struct util_queue *queue = ((struct thread_input*)input)->queue;
155 int thread_index = ((struct thread_input*)input)->thread_index;
156
157 free(input);
158
159 if (queue->name) {
160 char name[16];
161 util_snprintf(name, sizeof(name), "%s:%i", queue->name, thread_index);
162 u_thread_setname(name);
163 }
164
165 while (1) {
166 struct util_queue_job job;
167
168 mtx_lock(&queue->lock);
169 assert(queue->num_queued >= 0 && queue->num_queued <= queue->max_jobs);
170
171 /* wait if the queue is empty */
172 while (!queue->kill_threads && queue->num_queued == 0)
173 cnd_wait(&queue->has_queued_cond, &queue->lock);
174
175 if (queue->kill_threads) {
176 mtx_unlock(&queue->lock);
177 break;
178 }
179
180 job = queue->jobs[queue->read_idx];
181 memset(&queue->jobs[queue->read_idx], 0, sizeof(struct util_queue_job));
182 queue->read_idx = (queue->read_idx + 1) % queue->max_jobs;
183
184 queue->num_queued--;
185 cnd_signal(&queue->has_space_cond);
186 mtx_unlock(&queue->lock);
187
188 if (job.job) {
189 job.execute(job.job, thread_index);
190 util_queue_fence_signal(job.fence);
191 if (job.cleanup)
192 job.cleanup(job.job, thread_index);
193 }
194 }
195
196 /* signal remaining jobs before terminating */
197 mtx_lock(&queue->lock);
198 for (unsigned i = queue->read_idx; i != queue->write_idx;
199 i = (i + 1) % queue->max_jobs) {
200 if (queue->jobs[i].job) {
201 util_queue_fence_signal(queue->jobs[i].fence);
202 queue->jobs[i].job = NULL;
203 }
204 }
205 queue->read_idx = queue->write_idx;
206 queue->num_queued = 0;
207 mtx_unlock(&queue->lock);
208 return 0;
209 }
210
211 bool
212 util_queue_init(struct util_queue *queue,
213 const char *name,
214 unsigned max_jobs,
215 unsigned num_threads,
216 unsigned flags)
217 {
218 unsigned i;
219
220 memset(queue, 0, sizeof(*queue));
221 queue->name = name;
222 queue->flags = flags;
223 queue->num_threads = num_threads;
224 queue->max_jobs = max_jobs;
225
226 queue->jobs = (struct util_queue_job*)
227 calloc(max_jobs, sizeof(struct util_queue_job));
228 if (!queue->jobs)
229 goto fail;
230
231 (void) mtx_init(&queue->lock, mtx_plain);
232
233 queue->num_queued = 0;
234 cnd_init(&queue->has_queued_cond);
235 cnd_init(&queue->has_space_cond);
236
237 queue->threads = (thrd_t*) calloc(num_threads, sizeof(thrd_t));
238 if (!queue->threads)
239 goto fail;
240
241 /* start threads */
242 for (i = 0; i < num_threads; i++) {
243 struct thread_input *input =
244 (struct thread_input *) malloc(sizeof(struct thread_input));
245 input->queue = queue;
246 input->thread_index = i;
247
248 queue->threads[i] = u_thread_create(util_queue_thread_func, input);
249
250 if (!queue->threads[i]) {
251 free(input);
252
253 if (i == 0) {
254 /* no threads created, fail */
255 goto fail;
256 } else {
257 /* at least one thread created, so use it */
258 queue->num_threads = i;
259 break;
260 }
261 }
262
263 if (flags & UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY) {
264 #if defined(__linux__) && defined(SCHED_IDLE)
265 struct sched_param sched_param = {0};
266
267 /* The nice() function can only set a maximum of 19.
268 * SCHED_IDLE is the same as nice = 20.
269 *
270 * Note that Linux only allows decreasing the priority. The original
271 * priority can't be restored.
272 */
273 pthread_setschedparam(queue->threads[i], SCHED_IDLE, &sched_param);
274 #endif
275 }
276 }
277
278 add_to_atexit_list(queue);
279 return true;
280
281 fail:
282 free(queue->threads);
283
284 if (queue->jobs) {
285 cnd_destroy(&queue->has_space_cond);
286 cnd_destroy(&queue->has_queued_cond);
287 mtx_destroy(&queue->lock);
288 free(queue->jobs);
289 }
290 /* also util_queue_is_initialized can be used to check for success */
291 memset(queue, 0, sizeof(*queue));
292 return false;
293 }
294
295 static void
296 util_queue_killall_and_wait(struct util_queue *queue)
297 {
298 unsigned i;
299
300 /* Signal all threads to terminate. */
301 mtx_lock(&queue->lock);
302 queue->kill_threads = 1;
303 cnd_broadcast(&queue->has_queued_cond);
304 mtx_unlock(&queue->lock);
305
306 for (i = 0; i < queue->num_threads; i++)
307 thrd_join(queue->threads[i], NULL);
308 queue->num_threads = 0;
309 }
310
311 void
312 util_queue_destroy(struct util_queue *queue)
313 {
314 util_queue_killall_and_wait(queue);
315 remove_from_atexit_list(queue);
316
317 cnd_destroy(&queue->has_space_cond);
318 cnd_destroy(&queue->has_queued_cond);
319 mtx_destroy(&queue->lock);
320 free(queue->jobs);
321 free(queue->threads);
322 }
323
324 void
325 util_queue_add_job(struct util_queue *queue,
326 void *job,
327 struct util_queue_fence *fence,
328 util_queue_execute_func execute,
329 util_queue_execute_func cleanup)
330 {
331 struct util_queue_job *ptr;
332
333 mtx_lock(&queue->lock);
334 if (queue->kill_threads) {
335 mtx_unlock(&queue->lock);
336 /* well no good option here, but any leaks will be
337 * short-lived as things are shutting down..
338 */
339 return;
340 }
341
342 util_queue_fence_reset(fence);
343
344 assert(queue->num_queued >= 0 && queue->num_queued <= queue->max_jobs);
345
346 if (queue->num_queued == queue->max_jobs) {
347 if (queue->flags & UTIL_QUEUE_INIT_RESIZE_IF_FULL) {
348 /* If the queue is full, make it larger to avoid waiting for a free
349 * slot.
350 */
351 unsigned new_max_jobs = queue->max_jobs + 8;
352 struct util_queue_job *jobs =
353 (struct util_queue_job*)calloc(new_max_jobs,
354 sizeof(struct util_queue_job));
355 assert(jobs);
356
357 /* Copy all queued jobs into the new list. */
358 unsigned num_jobs = 0;
359 unsigned i = queue->read_idx;
360
361 do {
362 jobs[num_jobs++] = queue->jobs[i];
363 i = (i + 1) % queue->max_jobs;
364 } while (i != queue->write_idx);
365
366 assert(num_jobs == queue->num_queued);
367
368 free(queue->jobs);
369 queue->jobs = jobs;
370 queue->read_idx = 0;
371 queue->write_idx = num_jobs;
372 queue->max_jobs = new_max_jobs;
373 } else {
374 /* Wait until there is a free slot. */
375 while (queue->num_queued == queue->max_jobs)
376 cnd_wait(&queue->has_space_cond, &queue->lock);
377 }
378 }
379
380 ptr = &queue->jobs[queue->write_idx];
381 assert(ptr->job == NULL);
382 ptr->job = job;
383 ptr->fence = fence;
384 ptr->execute = execute;
385 ptr->cleanup = cleanup;
386 queue->write_idx = (queue->write_idx + 1) % queue->max_jobs;
387
388 queue->num_queued++;
389 cnd_signal(&queue->has_queued_cond);
390 mtx_unlock(&queue->lock);
391 }
392
393 /**
394 * Remove a queued job. If the job hasn't started execution, it's removed from
395 * the queue. If the job has started execution, the function waits for it to
396 * complete.
397 *
398 * In all cases, the fence is signalled when the function returns.
399 *
400 * The function can be used when destroying an object associated with the job
401 * when you don't care about the job completion state.
402 */
403 void
404 util_queue_drop_job(struct util_queue *queue, struct util_queue_fence *fence)
405 {
406 bool removed = false;
407
408 if (util_queue_fence_is_signalled(fence))
409 return;
410
411 mtx_lock(&queue->lock);
412 for (unsigned i = queue->read_idx; i != queue->write_idx;
413 i = (i + 1) % queue->max_jobs) {
414 if (queue->jobs[i].fence == fence) {
415 if (queue->jobs[i].cleanup)
416 queue->jobs[i].cleanup(queue->jobs[i].job, -1);
417
418 /* Just clear it. The threads will treat as a no-op job. */
419 memset(&queue->jobs[i], 0, sizeof(queue->jobs[i]));
420 removed = true;
421 break;
422 }
423 }
424 mtx_unlock(&queue->lock);
425
426 if (removed)
427 util_queue_fence_signal(fence);
428 else
429 util_queue_fence_wait(fence);
430 }
431
432 int64_t
433 util_queue_get_thread_time_nano(struct util_queue *queue, unsigned thread_index)
434 {
435 /* Allow some flexibility by not raising an error. */
436 if (thread_index >= queue->num_threads)
437 return 0;
438
439 return u_thread_get_time_nano(queue->threads[thread_index]);
440 }