From 2fea44c6361b171c9313a75a7e9ef4cbf97602f0 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Tue, 25 Feb 2020 15:40:20 -0500 Subject: [PATCH] panfrost: Simplify stack shift calculation I'm not sure why I never saw smaller values, but here you go. Signed-off-by: Alyssa Rosenzweig Reviewed-by: Tomeu Vizoso Part-of: --- src/panfrost/encoder/pan_scratch.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/panfrost/encoder/pan_scratch.c b/src/panfrost/encoder/pan_scratch.c index 04a9bb7d1f6..478a788b116 100644 --- a/src/panfrost/encoder/pan_scratch.c +++ b/src/panfrost/encoder/pan_scratch.c @@ -47,13 +47,14 @@ * https://en.wikipedia.org/wiki/Mali_%28GPU% [citation needed] * * Within a particular thread, there is stack allocated. If it is present, its - * size is a power-of-two, and it is at least 256 bytes. Stack is allocated + * size is a power-of-two, and it is at least 16 bytes. Stack is allocated * with the shared memory descriptor used for all shaders within a frame (note * that they don't execute concurrently so it's fine). So, consider the maximum * stack size used by any shader within a job, and then compute (where npot * denotes the next power of two): * - * allocated = npot(max(size, 256)) * (# of threads/core) * (# of cores) + * bytes/thread = npot(max(size, 16)) + * allocated = (# of bytes/thread) * (# of threads/core) * (# of cores) * * The size of Thread Local Storage is signaled to the GPU in a dedicated * log_stack_size field. Since stack sizes are powers of two, it follows that @@ -70,20 +71,26 @@ * stack_size <= 2^(log_stack_size + 4) * * Given the constraints about powers-of-two and the minimum of 256, we thus - * derive a formula for log_stack_size in terms of stack size (s): + * derive a formula for log_stack_size in terms of stack size (s), where s is + * positive: * - * log_stack_size = ceil(log2(max(s, 256))) - 4 + * log_stack_size = ceil(log2(max(s, 16))) - 4 * * There are other valid characterisations of this formula, of course, but this - * is computationally simple, so good enough for our purposes. + * is computationally simple, so good enough for our purposes. If s=0, since + * there is no spilling used whatsoever, we may set log_stack_size to 0 to + * disable the stack. */ -/* Computes log_stack_size = ceil(log2(max(s, 256))) - 4 */ +/* Computes log_stack_size = ceil(log2(max(s, 16))) - 4 */ unsigned panfrost_get_stack_shift(unsigned stack_size) { - return util_logbase2_ceil(MAX2(stack_size, 256)) - 4; + if (stack_size) + return util_logbase2_ceil(MAX2(stack_size, 16)) - 4; + else + return 0; } /* Computes the aligned stack size given the shift and thread count. The blob -- 2.30.2