From 15cf7d170b3391ebde58f954cd2b90fff35b1ce5 Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Fri, 10 Apr 2020 14:40:37 +0200 Subject: [PATCH] gallium/u_threaded: flush batch when hitting mapping limit MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit tc_transfer_map maps buffers directly, but the unmap operation is executed in the driver thread. When an application does a lot of map/unmap operations, without flushing, this increase the RAM used (and eventually get the app killed by the oom-killer). This commit allows tc to keep track of how many bytes were mapped during the current batch. When this estimation becomes higher than a threshold, we flush the batch. See: https://gitlab.freedesktop.org/mesa/mesa/-/issues/2735 Reviewed-by: Marek Olšák Part-of: --- .../auxiliary/util/u_threaded_context.c | 18 ++++++++++++++++++ .../auxiliary/util/u_threaded_context.h | 6 ++++++ 2 files changed, 24 insertions(+) diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c index 66e05fb1772..cfe88d310e8 100644 --- a/src/gallium/auxiliary/util/u_threaded_context.c +++ b/src/gallium/auxiliary/util/u_threaded_context.c @@ -108,6 +108,7 @@ tc_batch_flush(struct threaded_context *tc) tc_assert(next->num_total_call_slots != 0); tc_batch_check(next); tc_debug_check(tc); + tc->bytes_mapped_estimate = 0; p_atomic_add(&tc->num_offloaded_slots, next->num_total_call_slots); if (next->token) { @@ -204,6 +205,7 @@ _tc_sync(struct threaded_context *tc, UNUSED const char *info, UNUSED const char /* .. and execute unflushed calls directly. */ if (next->num_total_call_slots) { p_atomic_add(&tc->num_direct_slots, next->num_total_call_slots); + tc->bytes_mapped_estimate = 0; tc_batch_execute(next, 0); synced = true; } @@ -1489,6 +1491,8 @@ tc_transfer_map(struct pipe_context *_pipe, usage & PIPE_TRANSFER_DISCARD_RANGE ? " discard_range" : usage & PIPE_TRANSFER_READ ? " read" : " ??"); + tc->bytes_mapped_estimate += box->width; + return pipe->transfer_map(pipe, tres->latest ? tres->latest : resource, level, usage, box, transfer); } @@ -1584,6 +1588,10 @@ tc_call_transfer_unmap(struct pipe_context *pipe, union tc_payload *payload) pipe->transfer_unmap(pipe, payload->transfer); } +static void +tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence, + unsigned flags); + static void tc_transfer_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer) { @@ -1606,6 +1614,16 @@ tc_transfer_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer) } tc_add_small_call(tc, TC_CALL_transfer_unmap)->transfer = transfer; + + /* tc_transfer_map directly maps the buffers, but tc_transfer_unmap + * defers the unmap operation to the batch execution. + * bytes_mapped_estimate is an estimation of the map/unmap bytes delta + * and if it goes over an optional limit the current batch is flushed, + * to reclaim some RAM. */ + if (!ttrans->staging && tc->bytes_mapped_limit && + tc->bytes_mapped_estimate > tc->bytes_mapped_limit) { + tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC); + } } struct tc_buffer_subdata { diff --git a/src/gallium/auxiliary/util/u_threaded_context.h b/src/gallium/auxiliary/util/u_threaded_context.h index a32f893592a..c54dec0b1d7 100644 --- a/src/gallium/auxiliary/util/u_threaded_context.h +++ b/src/gallium/auxiliary/util/u_threaded_context.h @@ -361,6 +361,12 @@ struct threaded_context { unsigned num_direct_slots; unsigned num_syncs; + /* Estimation of how much vram/gtt bytes are mmap'd in + * the current tc_batch. + */ + uint64_t bytes_mapped_estimate; + uint64_t bytes_mapped_limit; + struct util_queue queue; struct util_queue_fence *fence; -- 2.30.2