From f37cec3275a3bf9d23dfbc43720bb6831eab0242 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Tue, 25 Feb 2020 15:34:51 -0500 Subject: [PATCH] panfrost: Default to 256 threads for TLS I'm not sure where I got the impression 1024 was the right number. From kbase: #define THREAD_MT_DEFAULT 256 (where MT = "max threads" and the threads to allocate for TLS is <= max threads). Let's cut out memory footprint for spilling by 75% :) Signed-off-by: Alyssa Rosenzweig Reviewed-by: Tomeu Vizoso Part-of: --- src/panfrost/encoder/pan_props.c | 14 ++++++++++---- src/panfrost/encoder/pan_scratch.c | 2 +- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/panfrost/encoder/pan_props.c b/src/panfrost/encoder/pan_props.c index 9fd316295d0..fe9e5ab3e0a 100644 --- a/src/panfrost/encoder/pan_props.c +++ b/src/panfrost/encoder/pan_props.c @@ -75,11 +75,17 @@ panfrost_query_core_count(int fd) unsigned panfrost_query_thread_tls_alloc(int fd) { - /* On older kernels, we worst-case to 1024 threads, the architectural - * maximum for Midgard */ + /* On older kernels, we worst-case to 256 threads, the architectural + * maximum for Midgard. On my current kernel/hardware, I'm seeing this + * readback as 0, so we'll worst-case there too */ - return panfrost_query_raw(fd, - DRM_PANFROST_PARAM_THREAD_TLS_ALLOC, false, 1024); + unsigned tls = panfrost_query_raw(fd, + DRM_PANFROST_PARAM_THREAD_TLS_ALLOC, false, 256); + + if (tls) + return tls; + else + return 256; } /* Given a GPU ID like 0x860, return a prettified model name */ diff --git a/src/panfrost/encoder/pan_scratch.c b/src/panfrost/encoder/pan_scratch.c index c75c1f330ef..ccb2fd7fac3 100644 --- a/src/panfrost/encoder/pan_scratch.c +++ b/src/panfrost/encoder/pan_scratch.c @@ -41,7 +41,7 @@ * must allocate for, and DRM_PANFROST_PARAM_SHADER_PRESENT for a bitmask of * shader cores (so take a popcount of that mask for the number of shader * cores). On older kernels that do not support querying these values, - * following kbase, we may use the worst-case value of 1024 threads for + * following kbase, we may use the worst-case value of 256 threads for * THREAD_TLS_ALLOC, and the worst-case value of 16 cores for Midgard per the * "shader core count" column of the implementations table in * https://en.wikipedia.org/wiki/Mali_%28GPU% [citation needed] -- 2.30.2