From 404d0d50d8aaf60597668e65a2d7c96cdea53aea Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 11 Jun 2016 15:40:28 +0200 Subject: [PATCH] gallium/u_queue: add an option to have multiple worker threads MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit independent jobs don't have to be stuck on only one thread v2: use CALLOC & FREE Reviewed-by: Nicolai Hähnle --- src/gallium/auxiliary/util/u_queue.c | 66 ++++++++++++++++--- src/gallium/auxiliary/util/u_queue.h | 12 ++-- src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 4 +- src/gallium/winsys/amdgpu/drm/amdgpu_cs.h | 2 +- src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 2 +- src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 4 +- src/gallium/winsys/radeon/drm/radeon_drm_cs.h | 2 +- .../winsys/radeon/drm/radeon_drm_winsys.c | 2 +- 8 files changed, 71 insertions(+), 23 deletions(-) diff --git a/src/gallium/auxiliary/util/u_queue.c b/src/gallium/auxiliary/util/u_queue.c index 7a67c1169d9..a958c04d5db 100644 --- a/src/gallium/auxiliary/util/u_queue.c +++ b/src/gallium/auxiliary/util/u_queue.c @@ -25,6 +25,7 @@ */ #include "u_queue.h" +#include "u_memory.h" #include "os/os_time.h" static void @@ -48,15 +49,23 @@ util_queue_job_wait(struct util_queue_fence *fence) pipe_mutex_unlock(fence->mutex); } -static PIPE_THREAD_ROUTINE(util_queue_thread_func, param) +struct thread_input { + struct util_queue *queue; + int thread_index; +}; + +static PIPE_THREAD_ROUTINE(util_queue_thread_func, input) { - struct util_queue *queue = (struct util_queue*)param; + struct util_queue *queue = ((struct thread_input*)input)->queue; + int thread_index = ((struct thread_input*)input)->thread_index; + + FREE(input); while (1) { struct util_queue_job job; pipe_semaphore_wait(&queue->queued); - if (queue->kill_thread) + if (queue->kill_threads) break; pipe_mutex_lock(queue->lock); @@ -68,7 +77,7 @@ static PIPE_THREAD_ROUTINE(util_queue_thread_func, param) pipe_semaphore_signal(&queue->has_space); if (job.job) { - queue->execute_job(job.job); + queue->execute_job(job.job, thread_index); util_queue_fence_signal(job.fence); } } @@ -88,9 +97,13 @@ static PIPE_THREAD_ROUTINE(util_queue_thread_func, param) bool util_queue_init(struct util_queue *queue, unsigned max_jobs, - void (*execute_job)(void *)) + unsigned num_threads, + void (*execute_job)(void *, int)) { + unsigned i; + memset(queue, 0, sizeof(*queue)); + queue->num_threads = num_threads; queue->max_jobs = max_jobs; queue->jobs = (struct util_queue_job*) @@ -103,13 +116,36 @@ util_queue_init(struct util_queue *queue, pipe_semaphore_init(&queue->has_space, max_jobs); pipe_semaphore_init(&queue->queued, 0); - queue->thread = pipe_thread_create(util_queue_thread_func, queue); - if (!queue->thread) + queue->threads = (pipe_thread*)CALLOC(num_threads, sizeof(pipe_thread)); + if (!queue->threads) goto fail; + /* start threads */ + for (i = 0; i < num_threads; i++) { + struct thread_input *input = MALLOC_STRUCT(thread_input); + input->queue = queue; + input->thread_index = i; + + queue->threads[i] = pipe_thread_create(util_queue_thread_func, input); + + if (!queue->threads[i]) { + FREE(input); + + if (i == 0) { + /* no threads created, fail */ + goto fail; + } else { + /* at least one thread created, so use it */ + queue->num_threads = i+1; + break; + } + } + } return true; fail: + FREE(queue->threads); + if (queue->jobs) { pipe_semaphore_destroy(&queue->has_space); pipe_semaphore_destroy(&queue->queued); @@ -124,13 +160,23 @@ fail: void util_queue_destroy(struct util_queue *queue) { - queue->kill_thread = 1; - pipe_semaphore_signal(&queue->queued); - pipe_thread_wait(queue->thread); + unsigned i; + + /* Signal all threads to terminate. */ + pipe_mutex_lock(queue->queued.mutex); + queue->kill_threads = 1; + queue->queued.counter = queue->num_threads; + pipe_condvar_broadcast(queue->queued.cond); + pipe_mutex_unlock(queue->queued.mutex); + + for (i = 0; i < queue->num_threads; i++) + pipe_thread_wait(queue->threads[i]); + pipe_semaphore_destroy(&queue->has_space); pipe_semaphore_destroy(&queue->queued); pipe_mutex_destroy(queue->lock); FREE(queue->jobs); + FREE(queue->threads); } void diff --git a/src/gallium/auxiliary/util/u_queue.h b/src/gallium/auxiliary/util/u_queue.h index acebb51382f..f3aa4f6f5c6 100644 --- a/src/gallium/auxiliary/util/u_queue.h +++ b/src/gallium/auxiliary/util/u_queue.h @@ -54,17 +54,19 @@ struct util_queue { pipe_mutex lock; pipe_semaphore has_space; pipe_semaphore queued; - pipe_thread thread; - int kill_thread; + pipe_thread *threads; + unsigned num_threads; + int kill_threads; int max_jobs; int write_idx, read_idx; /* ring buffer pointers */ struct util_queue_job *jobs; - void (*execute_job)(void *job); + void (*execute_job)(void *job, int thread_index); }; bool util_queue_init(struct util_queue *queue, unsigned max_jobs, - void (*execute_job)(void *)); + unsigned num_threads, + void (*execute_job)(void *, int)); void util_queue_destroy(struct util_queue *queue); void util_queue_fence_init(struct util_queue_fence *fence); void util_queue_fence_destroy(struct util_queue_fence *fence); @@ -78,7 +80,7 @@ void util_queue_job_wait(struct util_queue_fence *fence); static inline bool util_queue_is_initialized(struct util_queue *queue) { - return queue->thread != 0; + return queue->threads != NULL; } static inline bool diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 4a7302ab2cb..5636f834de1 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -872,7 +872,7 @@ static void amdgpu_add_fence_dependencies(struct amdgpu_cs *acs) } } -void amdgpu_cs_submit_ib(void *job) +void amdgpu_cs_submit_ib(void *job, int thread_index) { struct amdgpu_cs *acs = (struct amdgpu_cs*)job; struct amdgpu_winsys *ws = acs->ctx->ws; @@ -1054,7 +1054,7 @@ static void amdgpu_cs_flush(struct radeon_winsys_cs *rcs, util_queue_is_initialized(&ws->cs_queue)) { util_queue_add_job(&ws->cs_queue, cs, &cs->flush_completed); } else { - amdgpu_cs_submit_ib(cs); + amdgpu_cs_submit_ib(cs, 0); } } else { amdgpu_cs_context_cleanup(cs->csc); diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h index 354e403fa36..a7f3414269f 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h @@ -218,6 +218,6 @@ bool amdgpu_fence_wait(struct pipe_fence_handle *fence, uint64_t timeout, bool absolute); void amdgpu_cs_sync_flush(struct radeon_winsys_cs *rcs); void amdgpu_cs_init_functions(struct amdgpu_winsys *ws); -void amdgpu_cs_submit_ib(void *job); +void amdgpu_cs_submit_ib(void *job, int thread_index); #endif diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c index ddcdc865f1b..22a81220ace 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c @@ -493,7 +493,7 @@ amdgpu_winsys_create(int fd, radeon_screen_create_t screen_create) pipe_mutex_init(ws->bo_fence_lock); if (sysconf(_SC_NPROCESSORS_ONLN) > 1 && debug_get_option_thread()) - util_queue_init(&ws->cs_queue, 8, amdgpu_cs_submit_ib); + util_queue_init(&ws->cs_queue, 8, 1, amdgpu_cs_submit_ib); /* Create the screen at the end. The winsys must be initialized * completely. diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 9552bd5b950..9532a6a0f0f 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -427,7 +427,7 @@ static unsigned radeon_drm_cs_get_buffer_list(struct radeon_winsys_cs *rcs, return cs->csc->crelocs; } -void radeon_drm_cs_emit_ioctl_oneshot(void *job) +void radeon_drm_cs_emit_ioctl_oneshot(void *job, int thread_index) { struct radeon_cs_context *csc = ((struct radeon_drm_cs*)job)->cst; unsigned i; @@ -590,7 +590,7 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, if (!(flags & RADEON_FLUSH_ASYNC)) radeon_drm_cs_sync_flush(rcs); } else { - radeon_drm_cs_emit_ioctl_oneshot(cs); + radeon_drm_cs_emit_ioctl_oneshot(cs, 0); } } else { radeon_cs_context_cleanup(cs->cst); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h index a5f243db533..53a3ae0cf2a 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h @@ -122,6 +122,6 @@ radeon_bo_is_referenced_by_any_cs(struct radeon_bo *bo) void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs); void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws); -void radeon_drm_cs_emit_ioctl_oneshot(void *job); +void radeon_drm_cs_emit_ioctl_oneshot(void *job, int thread_index); #endif diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 453cbfc935c..32d58b9b9e5 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -783,7 +783,7 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create) ws->info.gart_page_size = sysconf(_SC_PAGESIZE); if (ws->num_cpus > 1 && debug_get_option_thread()) - util_queue_init(&ws->cs_queue, 8, + util_queue_init(&ws->cs_queue, 8, 1, radeon_drm_cs_emit_ioctl_oneshot); /* Create the screen at the end. The winsys must be initialized -- 2.30.2