v3d: Detect the correct number of QPUs and use it to fix the spill size.
authorEric Anholt <eric@anholt.net>
Thu, 11 Apr 2019 19:28:30 +0000 (12:28 -0700)
committerEric Anholt <eric@anholt.net>
Fri, 12 Apr 2019 22:59:31 +0000 (15:59 -0700)
We were missing a * 4 even if the particular hardware matched our
assumption.

src/broadcom/common/v3d_device_info.h
src/gallium/drivers/v3d/v3d_program.c
src/gallium/drivers/v3d/v3d_screen.c

index b0a2a02154cd8b04c4059e9f48fc016eb4e26380..608b584544405f8312174242f3ec51028688cb58 100644 (file)
@@ -35,6 +35,9 @@ struct v3d_device_info {
 
         /** Size of the VPM, in bytes. */
         int vpm_size;
+
+        /* NSLC * QUPS from the core's IDENT registers. */
+        int qpu_count;
 };
 
 #endif
index 7805b808a010edaa712e4d9892e705de6abe5019..cdacb5dbb8041972fc6a5bc50a7517c52854e762 100644 (file)
@@ -376,11 +376,13 @@ v3d_get_compiled_shader(struct v3d_context *v3d,
 
         if (shader->prog_data.base->spill_size >
             v3d->prog.spill_size_per_thread) {
-                /* Max 4 QPUs per slice, 3 slices per core. We only do single
-                 * core so far.  This overallocates memory on smaller cores.
+                /* The TIDX register we use for choosing the area to access
+                 * for scratch space is: (core << 6) | (qpu << 2) | thread.
+                 * Even at minimum threadcount in a particular shader, that
+                 * means we still multiply by qpus by 4.
                  */
-                int total_spill_size =
-                        4 * 3 * shader->prog_data.base->spill_size;
+                int total_spill_size = (v3d->screen->devinfo.qpu_count * 4 *
+                                        shader->prog_data.base->spill_size);
 
                 v3d_bo_unreference(&v3d->prog.spill_bo);
                 v3d->prog.spill_bo = v3d_bo_alloc(v3d->screen,
index b77e3d9060e4f7f98f40906ffbf626afa4f0efc8..6f91e35521a16f72ae895c0abbc85e61e71e7a02 100644 (file)
@@ -594,6 +594,10 @@ v3d_get_device_info(struct v3d_screen *screen)
 
         screen->devinfo.vpm_size = (ident1.value >> 28 & 0xf) * 8192;
 
+        int nslc = (ident1.value >> 4) & 0xf;
+        int qups = (ident1.value >> 8) & 0xf;
+        screen->devinfo.qpu_count = nslc * qups;
+
         switch (screen->devinfo.ver) {
         case 33:
         case 41: