From: Eric Anholt Date: Thu, 11 Apr 2019 19:28:30 +0000 (-0700) Subject: v3d: Detect the correct number of QPUs and use it to fix the spill size. X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=commitdiff_plain;h=8a2d91e1248e31426ff656c02d3e598f9e117422;ds=sidebyside v3d: Detect the correct number of QPUs and use it to fix the spill size. We were missing a * 4 even if the particular hardware matched our assumption. --- diff --git a/src/broadcom/common/v3d_device_info.h b/src/broadcom/common/v3d_device_info.h index b0a2a02154c..608b5845444 100644 --- a/src/broadcom/common/v3d_device_info.h +++ b/src/broadcom/common/v3d_device_info.h @@ -35,6 +35,9 @@ struct v3d_device_info { /** Size of the VPM, in bytes. */ int vpm_size; + + /* NSLC * QUPS from the core's IDENT registers. */ + int qpu_count; }; #endif diff --git a/src/gallium/drivers/v3d/v3d_program.c b/src/gallium/drivers/v3d/v3d_program.c index 7805b808a01..cdacb5dbb80 100644 --- a/src/gallium/drivers/v3d/v3d_program.c +++ b/src/gallium/drivers/v3d/v3d_program.c @@ -376,11 +376,13 @@ v3d_get_compiled_shader(struct v3d_context *v3d, if (shader->prog_data.base->spill_size > v3d->prog.spill_size_per_thread) { - /* Max 4 QPUs per slice, 3 slices per core. We only do single - * core so far. This overallocates memory on smaller cores. + /* The TIDX register we use for choosing the area to access + * for scratch space is: (core << 6) | (qpu << 2) | thread. + * Even at minimum threadcount in a particular shader, that + * means we still multiply by qpus by 4. */ - int total_spill_size = - 4 * 3 * shader->prog_data.base->spill_size; + int total_spill_size = (v3d->screen->devinfo.qpu_count * 4 * + shader->prog_data.base->spill_size); v3d_bo_unreference(&v3d->prog.spill_bo); v3d->prog.spill_bo = v3d_bo_alloc(v3d->screen, diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c index b77e3d9060e..6f91e35521a 100644 --- a/src/gallium/drivers/v3d/v3d_screen.c +++ b/src/gallium/drivers/v3d/v3d_screen.c @@ -594,6 +594,10 @@ v3d_get_device_info(struct v3d_screen *screen) screen->devinfo.vpm_size = (ident1.value >> 28 & 0xf) * 8192; + int nslc = (ident1.value >> 4) & 0xf; + int qups = (ident1.value >> 8) & 0xf; + screen->devinfo.qpu_count = nslc * qups; + switch (screen->devinfo.ver) { case 33: case 41: