panfrost: Simplify stack shift calculation

author Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>

Tue, 25 Feb 2020 20:40:20 +0000 (15:40 -0500)

committer Tomeu Vizoso <tomeu.vizoso@collabora.com>

Thu, 27 Feb 2020 15:32:17 +0000 (16:32 +0100)
author Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Tue, 25 Feb 2020 20:40:20 +0000 (15:40 -0500)
committer Tomeu Vizoso <tomeu.vizoso@collabora.com>
Thu, 27 Feb 2020 15:32:17 +0000 (16:32 +0100)
diff --git a/src/panfrost/encoder/pan_scratch.c b/src/panfrost/encoder/pan_scratch.c

index 04a9bb7d1f6430b4ed2ac436c143d0895b339094..478a788b116d54a6cf77b961c62c4bcff9615ba5 100644 (file)
--- a/src/panfrost/encoder/pan_scratch.c
+++ b/src/panfrost/encoder/pan_scratch.c
@@ -47,13 +47,14 @@
   * https://en.wikipedia.org/wiki/Mali_%28GPU% [citation needed]
   *
   * Within a particular thread, there is stack allocated. If it is present, its
- * size is a power-of-two, and it is at least 256 bytes. Stack is allocated
+ * size is a power-of-two, and it is at least 16 bytes. Stack is allocated
   * with the shared memory descriptor used for all shaders within a frame (note
   * that they don't execute concurrently so it's fine). So, consider the maximum
   * stack size used by any shader within a job, and then compute (where npot
   * denotes the next power of two):
   *
- *      allocated = npot(max(size, 256)) * (# of threads/core) * (# of cores)
+ *      bytes/thread = npot(max(size, 16))
+ *      allocated = (# of bytes/thread) * (# of threads/core) * (# of cores)
   *
   * The size of Thread Local Storage is signaled to the GPU in a dedicated
   * log_stack_size field. Since stack sizes are powers of two, it follows that
@@ -70,20 +71,26 @@
   *      stack_size <= 2^(log_stack_size + 4)
   *
   *  Given the constraints about powers-of-two and the minimum of 256, we thus
- *  derive a formula for log_stack_size in terms of stack size (s):
+ *  derive a formula for log_stack_size in terms of stack size (s), where s is
+ *  positive:
   *
- *      log_stack_size = ceil(log2(max(s, 256))) - 4
+ *      log_stack_size = ceil(log2(max(s, 16))) - 4
   *
   * There are other valid characterisations of this formula, of course, but this
- * is computationally simple, so good enough for our purposes.
+ * is computationally simple, so good enough for our purposes. If s=0, since
+ * there is no spilling used whatsoever, we may set log_stack_size to 0 to
+ * disable the stack.
   */
  
-/* Computes log_stack_size = ceil(log2(max(s, 256))) - 4 */
+/* Computes log_stack_size = ceil(log2(max(s, 16))) - 4 */
  
  unsigned
  panfrost_get_stack_shift(unsigned stack_size)
  {
-        return util_logbase2_ceil(MAX2(stack_size, 256)) - 4;
+        if (stack_size)
+                return util_logbase2_ceil(MAX2(stack_size, 16)) - 4;
+        else
+                return 0;
  }
  
  /* Computes the aligned stack size given the shift and thread count. The blob
author	Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
	Tue, 25 Feb 2020 20:40:20 +0000 (15:40 -0500)
committer	Tomeu Vizoso <tomeu.vizoso@collabora.com>
	Thu, 27 Feb 2020 15:32:17 +0000 (16:32 +0100)