nvc0: bump the amount of shared memory per MP on Maxwell
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Mon, 25 Apr 2016 22:15:25 +0000 (00:15 +0200)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Mon, 25 Apr 2016 22:32:25 +0000 (00:32 +0200)
According to the CUDA compute capability version, GM10x can expose
64KB of shared memory while GM20x can use 96KB.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
src/gallium/drivers/nouveau/nvc0/nvc0_screen.c

index 3bf98ad6a3b548e58425106bdbc8acdb83d5c136..3fe4309977230b00e71b6ce60c54a0537c2c7927 100644 (file)
@@ -444,7 +444,17 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
    case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g[] */
       RET((uint64_t []) { 1ULL << 40 });
    case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */
-      RET((uint64_t []) { 48 << 10 });
+      switch (obj_class) {
+      case GM200_COMPUTE_CLASS:
+         RET((uint64_t []) { 96 << 10 });
+         break;
+      case GM107_COMPUTE_CLASS:
+         RET((uint64_t []) { 64 << 10 });
+         break;
+      default:
+         RET((uint64_t []) { 48 << 10 });
+         break;
+      }
    case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */
       RET((uint64_t []) { 512 << 10 });
    case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */