nvc0: expose ARB_compute_variable_group_size
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Sat, 10 Sep 2016 14:45:32 +0000 (16:45 +0200)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Thu, 6 Oct 2016 22:18:57 +0000 (00:18 +0200)
Only expose 512 threads/block on Fermi to not be limited by
32 GPRs/thread.

v4: - use 512 threads on Fermi, 1024 on Kepler+

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
src/gallium/drivers/nouveau/nvc0/nvc0_screen.c

index df6c6af6037261efe07a5ae8bc4420663c7c50e9..afcb08ba6ef316003b2b9656c441e1d653921ce8 100644 (file)
@@ -448,6 +448,12 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
       RET(((uint64_t []) { 1024, 1024, 64 }));
    case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
       RET((uint64_t []) { 1024 });
+   case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
+      if (obj_class >= NVE4_COMPUTE_CLASS) {
+         RET((uint64_t []) { 1024 });
+      } else {
+         RET((uint64_t []) { 512 });
+      }
    case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g[] */
       RET((uint64_t []) { 1ULL << 40 });
    case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */
@@ -478,8 +484,6 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
       RET((uint32_t []) { 512 }); /* FIXME: arbitrary limit */
    case PIPE_COMPUTE_CAP_ADDRESS_BITS:
       RET((uint32_t []) { 64 });
-   case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
-      RET((uint64_t []) { 0 });
    default:
       return 0;
    }