From debd9105122586ee00c0d20a73bb4e3191c50e70 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 24 Feb 2016 17:03:57 +0100 Subject: [PATCH] nvc0: bind driver cb for compute on c7[] for Kepler Instead of using the screen->parm buffer object which will be removed, upload auxiliary constants to uniform_bo to be consistent regarding what we already do for Fermi. This breaks surfaces support (for compute only) but this will be properly re-introduced later for ARB_shader_image_load_store. Signed-off-by: Samuel Pitoiset Reviewed-by: Ilia Mirkin --- .../drivers/nouveau/nvc0/nvc0_context.h | 6 +++ .../drivers/nouveau/nvc0/nvc0_program.c | 11 +++-- .../drivers/nouveau/nvc0/nve4_compute.c | 40 ++++++++++++------- .../drivers/nouveau/nvc0/nve4_compute.h | 25 ------------ 4 files changed, 37 insertions(+), 45 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h index 31e1272aeed..f4f2d0b9780 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -110,6 +110,12 @@ /* 32 textures handles, at 1 32-bits integer each */ #define NVC0_CB_AUX_TEX_INFO(i) 0x020 + (i) * 4 #define NVC0_CB_AUX_TEX_SIZE (32 * 4) +/* 8 sets of 32-bits coordinate offsets */ +#define NVC0_CB_AUX_MS_INFO 0x0a0 /* CP */ +#define NVC0_CB_AUX_MS_SIZE (8 * 2 * 4) +/* block/grid size, at 3 32-bits integers each and gridid */ +#define NVC0_CB_AUX_GRID_INFO 0x0e0 /* CP */ +#define NVC0_CB_AUX_GRID_SIZE (7 * 4) /* 8 user clip planes, at 4 32-bits floats each */ #define NVC0_CB_AUX_UCP_INFO 0x100 #define NVC0_CB_AUX_UCP_SIZE (PIPE_MAX_CLIP_PLANES * 4 * 4) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index a3433f4a10a..d76b48fa4c9 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -540,17 +540,16 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, if (prog->type == PIPE_SHADER_COMPUTE) { if (chipset >= NVISA_GK104_CHIPSET) { - info->io.auxCBSlot = 0; - info->io.texBindBase = NVE4_CP_INPUT_TEX(0); - info->io.suInfoBase = NVE4_CP_INPUT_SUF(0); - info->prop.cp.gridInfoBase = NVE4_CP_INPUT_GRID_INFO(0); + info->io.auxCBSlot = 7; + info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0); + info->prop.cp.gridInfoBase = NVC0_CB_AUX_GRID_INFO; info->io.bufInfoBase = 0; /* TODO */ } else { info->io.bufInfoBase = NVC0_CB_AUX_BUF_INFO(0); - info->io.suInfoBase = 0; /* TODO */ } info->io.msInfoCBSlot = 0; - info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS; + info->io.msInfoBase = NVC0_CB_AUX_MS_INFO; + info->io.suInfoBase = 0; /* TODO */ } else { if (chipset >= NVISA_GK104_CHIPSET) { info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0); diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c index b3d841461d6..cae4838be38 100644 --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c @@ -41,6 +41,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen, int i; int ret; uint32_t obj_class; + uint64_t address; switch (dev->chipset & ~0xf) { case 0x100: @@ -65,7 +66,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen, return ret; } - ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 0, NVE4_CP_PARAM_SIZE, NULL, + ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 0, 1 << 12, NULL, &screen->parm); if (ret) return ret; @@ -128,15 +129,17 @@ nve4_screen_compute_setup(struct nvc0_screen *screen, } BEGIN_NVC0(push, NVE4_CP(TEX_CB_INDEX), 1); - PUSH_DATA (push, 0); /* does not interefere with 3D */ + PUSH_DATA (push, 7); /* does not interfere with 3D */ if (obj_class == NVF0_COMPUTE_CLASS) IMMED_NVC0(push, SUBC_CP(0x02c4), 1); + address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5); + /* MS sample coordinate offsets: these do not work with _ALT modes ! */ BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); - PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS); - PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS); + PUSH_DATAh(push, address + NVC0_CB_AUX_MS_INFO); + PUSH_DATA (push, address + NVC0_CB_AUX_MS_INFO); BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); PUSH_DATA (push, 64); PUSH_DATA (push, 1); @@ -159,7 +162,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen, PUSH_DATA (push, 3); /* 7 */ PUSH_DATA (push, 1); -#ifdef DEBUG +#ifdef NOUVEAU_NVE4_MP_TRAP_HANDLER BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR); PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR); @@ -194,6 +197,9 @@ nve4_compute_validate_surfaces(struct nvc0_context *nvc0) uint32_t mask; unsigned i; const unsigned t = 1; + uint64_t address; + + address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5); mask = nvc0->surfaces_dirty[t]; while (mask) { @@ -205,8 +211,8 @@ nve4_compute_validate_surfaces(struct nvc0_context *nvc0) * directly instead of via binding points, so we have to supply them. */ BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); - PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_SUF(i)); - PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_SUF(i)); + PUSH_DATAh(push, address + NVC0_CB_AUX_BUF_INFO(i)); + PUSH_DATA (push, address + NVC0_CB_AUX_BUF_INFO(i)); BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); PUSH_DATA (push, 64); PUSH_DATA (push, 1); @@ -271,6 +277,7 @@ static void nve4_compute_set_tex_handles(struct nvc0_context *nvc0) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_screen *screen = nvc0->screen; uint64_t address; const unsigned s = nvc0_shader_stage(PIPE_SHADER_COMPUTE); unsigned i, n; @@ -282,11 +289,11 @@ nve4_compute_set_tex_handles(struct nvc0_context *nvc0) n = util_logbase2(dirty) + 1 - i; assert(n); - address = nvc0->screen->parm->offset + NVE4_CP_INPUT_TEX(i); + address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s); BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); - PUSH_DATAh(push, address); - PUSH_DATA (push, address); + PUSH_DATAh(push, address + NVC0_CB_AUX_TEX_INFO(i)); + PUSH_DATA (push, address + NVC0_CB_AUX_TEX_INFO(i)); BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); PUSH_DATA (push, n * 4); PUSH_DATA (push, 0x1); @@ -334,6 +341,9 @@ nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input, struct nvc0_screen *screen = nvc0->screen; struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_program *cp = nvc0->compprog; + uint64_t address; + + address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5); if (cp->parm_size) { BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); @@ -347,8 +357,8 @@ nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input, PUSH_DATAp(push, input, cp->parm_size / 4); } BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); - PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0)); - PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0)); + PUSH_DATAh(push, address + NVC0_CB_AUX_GRID_INFO); + PUSH_DATA (push, address + NVC0_CB_AUX_GRID_INFO); BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); PUSH_DATA (push, 7 * 4); PUSH_DATA (push, 0x1); @@ -408,7 +418,9 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0, if (nvc0->constbuf[s][i].u.buf) nve4_cp_launch_desc_set_ctx_cb(desc, i + 1, &nvc0->constbuf[s][i]); } - nve4_cp_launch_desc_set_cb(desc, 0, screen->parm, 0, NVE4_CP_INPUT_SIZE); + nve4_cp_launch_desc_set_cb(desc, 0, screen->parm, 0, 1 << 12); + nve4_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo, + NVC0_CB_AUX_INFO(5), 1 << 10); } static inline struct nve4_cp_launch_desc * @@ -495,7 +507,7 @@ nve4_compute_validate_textures(struct nvc0_context *nvc0) struct nouveau_pushbuf *push = nvc0->base.pushbuf; const unsigned s = 5; unsigned i; - uint32_t commands[2][NVE4_CP_INPUT_TEX_MAX]; + uint32_t commands[2][32]; unsigned n[2] = { 0, 0 }; for (i = 0; i < nvc0->num_textures[s]; ++i) { diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h index 84f8593b9b6..dcafbeda397 100644 --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h @@ -4,31 +4,6 @@ #include "nvc0/nve4_compute.xml.h" -/* Input space is implemented as c0[], to which we bind the screen->parm bo. - */ -#define NVE4_CP_INPUT_USER 0x0000 -#define NVE4_CP_INPUT_USER_LIMIT 0x1000 -#define NVE4_CP_INPUT_GRID_INFO(i) (0x1000 + (i) * 4) -#define NVE4_CP_INPUT_NTID(i) (0x1000 + (i) * 4) -#define NVE4_CP_INPUT_NCTAID(i) (0x100c + (i) * 4) -#define NVE4_CP_INPUT_GRIDID 0x1018 -#define NVE4_CP_INPUT_TEX(i) (0x1040 + (i) * 4) -#define NVE4_CP_INPUT_TEX_STRIDE 4 -#define NVE4_CP_INPUT_TEX_MAX 32 -#define NVE4_CP_INPUT_MS_OFFSETS 0x10c0 -#define NVE4_CP_INPUT_SUF_STRIDE 64 -#define NVE4_CP_INPUT_SUF(i) (0x1100 + (i) * NVE4_CP_INPUT_SUF_STRIDE) -#define NVE4_CP_INPUT_SUF_MAX 32 -#define NVE4_CP_INPUT_TRAP_INFO_PTR 0x1900 -#define NVE4_CP_INPUT_TEMP_PTR 0x1908 -#define NVE4_CP_INPUT_MP_TEMP_SIZE 0x1910 -#define NVE4_CP_INPUT_WARP_TEMP_SIZE 0x1914 -#define NVE4_CP_INPUT_CSTACK_SIZE 0x1918 -#define NVE4_CP_INPUT_SIZE 0x1a00 -#define NVE4_CP_PARAM_TRAP_INFO 0x2000 -#define NVE4_CP_PARAM_TRAP_INFO_SZ (1 << 16) -#define NVE4_CP_PARAM_SIZE (NVE4_CP_PARAM_TRAP_INFO + (1 << 16)) - struct nve4_cp_launch_desc { u32 unk0[8]; -- 2.30.2