From 25722e3454fb179933514f3a1b76e0f4662875bd Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 27 Mar 2013 23:38:29 +0100 Subject: [PATCH] nvc0: use NOUVEAU_GETPARAM_GRAPH_UNITS to get MP count --- src/gallium/drivers/nvc0/nvc0_screen.c | 63 ++++++++++++++++----- src/gallium/drivers/nvc0/nvc0_screen.h | 5 +- src/gallium/drivers/nvc0/nve4_compute.c | 12 ++-- src/gallium/drivers/nvc0/nve4_compute.xml.h | 22 ++++--- 4 files changed, 73 insertions(+), 29 deletions(-) diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index b6cf2ca8a1f..b5b4ef10d7a 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -32,6 +32,10 @@ #include "nvc0_graph_macros.h" +#ifndef NOUVEAU_GETPARAM_GRAPH_UNITS +# define NOUVEAU_GETPARAM_GRAPH_UNITS 13 +#endif + static boolean nvc0_screen_is_format_supported(struct pipe_screen *pscreen, enum pipe_format format, @@ -494,6 +498,35 @@ nvc0_screen_init_compute(struct nvc0_screen *screen) } } +boolean +nvc0_screen_resize_tls_area(struct nvc0_screen *screen, + uint32_t lpos, uint32_t lneg, uint32_t cstack) +{ + struct nouveau_bo *bo = NULL; + int ret; + uint64_t size = (lpos + lneg) * 32 + cstack; + + if (size >= (1 << 20)) { + NOUVEAU_ERR("requested TLS size too large: 0x%"PRIx64"\n", size); + return FALSE; + } + + size *= (screen->base.device->chipset >= 0xe0) ? 64 : 48; /* max warps */ + size *= screen->mp_count; + + size = align(size, 1 << 17); + + ret = nouveau_bo_new(screen->base.device, NOUVEAU_BO_VRAM, 1 << 17, size, + NULL, &bo); + if (ret) { + NOUVEAU_ERR("failed to allocate TLS area, size: 0x%"PRIx64"\n", size); + return FALSE; + } + nouveau_bo_ref(NULL, &screen->tls); + screen->tls = bo; + return TRUE; +} + #define FAIL_SCREEN_INIT(str, err) \ do { \ NOUVEAU_ERR(str, err); \ @@ -508,6 +541,7 @@ nvc0_screen_create(struct nouveau_device *dev) struct pipe_screen *pscreen; struct nouveau_object *chan; struct nouveau_pushbuf *push; + uint64_t value; uint32_t obj_class; int ret; unsigned i; @@ -733,18 +767,21 @@ nvc0_screen_create(struct nouveau_device *dev) PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 9)); PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 9)); - /* max MPs * max warps per MP (TODO: ask kernel) */ - if (screen->eng3d->oclass >= NVE4_3D_CLASS) - screen->tls_size = 8 * 64 * 32; - else - screen->tls_size = 16 * 48 * 32; - screen->tls_size *= NVC0_CAP_MAX_PROGRAM_TEMPS * 16; - screen->tls_size = align(screen->tls_size, 1 << 17); + if (dev->drm_version >= 0x01000101) { + ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value); + if (ret) { + NOUVEAU_ERR("NOUVEAU_GETPARAM_GRAPH_UNITS failed.\n"); + goto fail; + } + } else { + if (dev->chipset >= 0xe0 && dev->chipset < 0xf0) + value = (8 << 8) | 4; + else + value = (16 << 8) | 4; + } + screen->mp_count = value >> 8; - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, - screen->tls_size, NULL, &screen->tls); - if (ret) - goto fail; + nvc0_screen_resize_tls_area(screen, 128 * 16, 0, 0x200); BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->text->offset); @@ -752,8 +789,8 @@ nvc0_screen_create(struct nouveau_device *dev) BEGIN_NVC0(push, NVC0_3D(TEMP_ADDRESS_HIGH), 4); PUSH_DATAh(push, screen->tls->offset); PUSH_DATA (push, screen->tls->offset); - PUSH_DATA (push, screen->tls_size >> 32); - PUSH_DATA (push, screen->tls_size); + PUSH_DATA (push, screen->tls->size >> 32); + PUSH_DATA (push, screen->tls->size); BEGIN_NVC0(push, NVC0_3D(WARP_TEMP_ALLOC), 1); PUSH_DATA (push, 0); BEGIN_NVC0(push, NVC0_3D(LOCAL_BASE), 1); diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h index 16f0febd3ea..13dc83e7e8a 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nvc0/nvc0_screen.h @@ -38,7 +38,7 @@ struct nvc0_screen { struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */ struct nouveau_bo *poly_cache; - uint64_t tls_size; + uint16_t mp_count; struct nouveau_heap *text_heap; struct nouveau_heap *lib_code; /* allocated from text_heap */ @@ -86,6 +86,9 @@ int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *); int nve4_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *); +boolean nvc0_screen_resize_tls_area(struct nvc0_screen *, uint32_t lpos, + uint32_t lneg, uint32_t cstack); + static INLINE void nvc0_resource_fence(struct nv04_resource *res, uint32_t flags) { diff --git a/src/gallium/drivers/nvc0/nve4_compute.c b/src/gallium/drivers/nvc0/nve4_compute.c index c61d90cf0d1..943ae78b479 100644 --- a/src/gallium/drivers/nvc0/nve4_compute.c +++ b/src/gallium/drivers/nvc0/nve4_compute.c @@ -74,13 +74,13 @@ nve4_screen_compute_setup(struct nvc0_screen *screen, * Actually this might be per-MP TEMP size and looks like I'm only using * 2 MPs instead of all 8. */ - BEGIN_NVC0(push, NVE4_COMPUTE(TEMP_SIZE_HIGH(0)), 3); - PUSH_DATAh(push, screen->tls_size / 2); - PUSH_DATA (push, screen->tls_size / 2); + BEGIN_NVC0(push, NVE4_COMPUTE(MP_TEMP_SIZE_HIGH(0)), 3); + PUSH_DATAh(push, screen->tls->size / screen->mp_count); + PUSH_DATA (push, screen->tls->size / screen->mp_count); PUSH_DATA (push, 0xff); - BEGIN_NVC0(push, NVE4_COMPUTE(TEMP_SIZE_HIGH(1)), 3); - PUSH_DATAh(push, screen->tls_size / 2); - PUSH_DATA (push, screen->tls_size / 2); + BEGIN_NVC0(push, NVE4_COMPUTE(MP_TEMP_SIZE_HIGH(1)), 3); + PUSH_DATAh(push, screen->tls->size / screen->mp_count); + PUSH_DATA (push, screen->tls->size / screen->mp_count); PUSH_DATA (push, 0xff); /* Unified address space ? Who needs that ? Certainly not OpenCL. diff --git a/src/gallium/drivers/nvc0/nve4_compute.xml.h b/src/gallium/drivers/nvc0/nve4_compute.xml.h index e513ae7eb86..2f110f57657 100644 --- a/src/gallium/drivers/nvc0/nve4_compute.xml.h +++ b/src/gallium/drivers/nvc0/nve4_compute.xml.h @@ -8,10 +8,10 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng git clone git://0x04.net/rules-ng-ng The rules-ng-ng source files this header was generated from are: -- nve4_compute.xml ( 6352 bytes, from 2013-03-10 14:59:45) +- nve4_compute.xml ( 11117 bytes, from 2013-03-27 19:22:20) - copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12) -- nvchipsets.xml ( 3870 bytes, from 2013-03-08 12:41:50) -- nv_object.xml ( 13238 bytes, from 2013-02-07 16:35:34) +- nvchipsets.xml ( 3954 bytes, from 2013-03-26 01:26:43) +- nv_object.xml ( 13792 bytes, from 2013-03-26 01:26:43) - nv_defs.xml ( 4437 bytes, from 2011-08-11 18:25:12) - nv50_defs.xml ( 7783 bytes, from 2013-03-08 12:42:29) @@ -110,15 +110,15 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVE4_COMPUTE_LAUNCH 0x000002bc -#define NVE4_COMPUTE_TEMP_SIZE(i0) (0x000002e4 + 0xc*(i0)) -#define NVE4_COMPUTE_TEMP_SIZE__ESIZE 0x0000000c -#define NVE4_COMPUTE_TEMP_SIZE__LEN 0x00000002 +#define NVE4_COMPUTE_MP_TEMP_SIZE(i0) (0x000002e4 + 0xc*(i0)) +#define NVE4_COMPUTE_MP_TEMP_SIZE__ESIZE 0x0000000c +#define NVE4_COMPUTE_MP_TEMP_SIZE__LEN 0x00000002 -#define NVE4_COMPUTE_TEMP_SIZE_HIGH(i0) (0x000002e4 + 0xc*(i0)) +#define NVE4_COMPUTE_MP_TEMP_SIZE_HIGH(i0) (0x000002e4 + 0xc*(i0)) -#define NVE4_COMPUTE_TEMP_SIZE_LOW(i0) (0x000002e8 + 0xc*(i0)) +#define NVE4_COMPUTE_MP_TEMP_SIZE_LOW(i0) (0x000002e8 + 0xc*(i0)) -#define NVE4_COMPUTE_TEMP_SIZE_MASK(i0) (0x000002ec + 0xc*(i0)) +#define NVE4_COMPUTE_MP_TEMP_SIZE_MASK(i0) (0x000002ec + 0xc*(i0)) #define NVE4_COMPUTE_UNK0310 0x00000310 @@ -200,6 +200,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVE4_COMPUTE_UNK260c 0x0000260c #define NVE4_COMPUTE_LAUNCH_DESC__SIZE 0x00000100 +#define NVE4_COMPUTE_LAUNCH_DESC_6 0x00000018 +#define NVE4_COMPUTE_LAUNCH_DESC_6_NOTIFY__MASK 0x00000c00 +#define NVE4_COMPUTE_LAUNCH_DESC_6_NOTIFY__SHIFT 10 + #define NVE4_COMPUTE_LAUNCH_DESC_PROG_START 0x00000020 #define NVE4_COMPUTE_LAUNCH_DESC_12 0x00000030 -- 2.30.2