nvc0: bind driver cb for compute on c7[] for Kepler
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Wed, 24 Feb 2016 16:03:57 +0000 (17:03 +0100)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Fri, 1 Apr 2016 20:26:24 +0000 (22:26 +0200)
Instead of using the screen->parm buffer object which will be removed,
upload auxiliary constants to uniform_bo to be consistent regarding
what we already do for Fermi.

This breaks surfaces support (for compute only) but this will be
properly re-introduced later for ARB_shader_image_load_store.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
src/gallium/drivers/nouveau/nvc0/nvc0_context.h
src/gallium/drivers/nouveau/nvc0/nvc0_program.c
src/gallium/drivers/nouveau/nvc0/nve4_compute.c
src/gallium/drivers/nouveau/nvc0/nve4_compute.h

index 31e1272aeed96dd4b018153023b53772a2a166d1..f4f2d0b9780a8fb30e66eb7a1fc241c1ab035925 100644 (file)
 /* 32 textures handles, at 1 32-bits integer each */
 #define NVC0_CB_AUX_TEX_INFO(i)     0x020 + (i) * 4
 #define NVC0_CB_AUX_TEX_SIZE        (32 * 4)
+/* 8 sets of 32-bits coordinate offsets */
+#define NVC0_CB_AUX_MS_INFO         0x0a0 /* CP */
+#define NVC0_CB_AUX_MS_SIZE         (8 * 2 * 4)
+/* block/grid size, at 3 32-bits integers each and gridid */
+#define NVC0_CB_AUX_GRID_INFO       0x0e0 /* CP */
+#define NVC0_CB_AUX_GRID_SIZE       (7 * 4)
 /* 8 user clip planes, at 4 32-bits floats each */
 #define NVC0_CB_AUX_UCP_INFO        0x100
 #define NVC0_CB_AUX_UCP_SIZE        (PIPE_MAX_CLIP_PLANES * 4 * 4)
index a3433f4a10a3c25ce258f7e8aab17bddcc2462e9..d76b48fa4c90d697e1cccf65b9839aef6661fba5 100644 (file)
@@ -540,17 +540,16 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
 
    if (prog->type == PIPE_SHADER_COMPUTE) {
       if (chipset >= NVISA_GK104_CHIPSET) {
-         info->io.auxCBSlot = 0;
-         info->io.texBindBase = NVE4_CP_INPUT_TEX(0);
-         info->io.suInfoBase = NVE4_CP_INPUT_SUF(0);
-         info->prop.cp.gridInfoBase = NVE4_CP_INPUT_GRID_INFO(0);
+         info->io.auxCBSlot = 7;
+         info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
+         info->prop.cp.gridInfoBase = NVC0_CB_AUX_GRID_INFO;
          info->io.bufInfoBase = 0; /* TODO */
       } else {
          info->io.bufInfoBase = NVC0_CB_AUX_BUF_INFO(0);
-         info->io.suInfoBase = 0; /* TODO */
       }
       info->io.msInfoCBSlot = 0;
-      info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS;
+      info->io.msInfoBase = NVC0_CB_AUX_MS_INFO;
+      info->io.suInfoBase = 0; /* TODO */
    } else {
       if (chipset >= NVISA_GK104_CHIPSET) {
          info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
index b3d841461d65e1de74832880b04985d2b67a829f..cae4838be384c67fd784da7eb695223c9b349b3e 100644 (file)
@@ -41,6 +41,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
    int i;
    int ret;
    uint32_t obj_class;
+   uint64_t address;
 
    switch (dev->chipset & ~0xf) {
    case 0x100:
@@ -65,7 +66,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
       return ret;
    }
 
-   ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 0, NVE4_CP_PARAM_SIZE, NULL,
+   ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 0, 1 << 12, NULL,
                         &screen->parm);
    if (ret)
       return ret;
@@ -128,15 +129,17 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
    }
 
    BEGIN_NVC0(push, NVE4_CP(TEX_CB_INDEX), 1);
-   PUSH_DATA (push, 0); /* does not interefere with 3D */
+   PUSH_DATA (push, 7); /* does not interfere with 3D */
 
    if (obj_class == NVF0_COMPUTE_CLASS)
       IMMED_NVC0(push, SUBC_CP(0x02c4), 1);
 
+   address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5);
+
    /* MS sample coordinate offsets: these do not work with _ALT modes ! */
    BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
-   PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS);
-   PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS);
+   PUSH_DATAh(push, address + NVC0_CB_AUX_MS_INFO);
+   PUSH_DATA (push, address + NVC0_CB_AUX_MS_INFO);
    BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
    PUSH_DATA (push, 64);
    PUSH_DATA (push, 1);
@@ -159,7 +162,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
    PUSH_DATA (push, 3); /* 7 */
    PUSH_DATA (push, 1);
 
-#ifdef DEBUG
+#ifdef NOUVEAU_NVE4_MP_TRAP_HANDLER
    BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
    PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR);
    PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR);
@@ -194,6 +197,9 @@ nve4_compute_validate_surfaces(struct nvc0_context *nvc0)
    uint32_t mask;
    unsigned i;
    const unsigned t = 1;
+   uint64_t address;
+
+   address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5);
 
    mask = nvc0->surfaces_dirty[t];
    while (mask) {
@@ -205,8 +211,8 @@ nve4_compute_validate_surfaces(struct nvc0_context *nvc0)
        * directly instead of via binding points, so we have to supply them.
        */
       BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
-      PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_SUF(i));
-      PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_SUF(i));
+      PUSH_DATAh(push, address + NVC0_CB_AUX_BUF_INFO(i));
+      PUSH_DATA (push, address + NVC0_CB_AUX_BUF_INFO(i));
       BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
       PUSH_DATA (push, 64);
       PUSH_DATA (push, 1);
@@ -271,6 +277,7 @@ static void
 nve4_compute_set_tex_handles(struct nvc0_context *nvc0)
 {
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+   struct nvc0_screen *screen = nvc0->screen;
    uint64_t address;
    const unsigned s = nvc0_shader_stage(PIPE_SHADER_COMPUTE);
    unsigned i, n;
@@ -282,11 +289,11 @@ nve4_compute_set_tex_handles(struct nvc0_context *nvc0)
    n = util_logbase2(dirty) + 1 - i;
    assert(n);
 
-   address = nvc0->screen->parm->offset + NVE4_CP_INPUT_TEX(i);
+   address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s);
 
    BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
-   PUSH_DATAh(push, address);
-   PUSH_DATA (push, address);
+   PUSH_DATAh(push, address + NVC0_CB_AUX_TEX_INFO(i));
+   PUSH_DATA (push, address + NVC0_CB_AUX_TEX_INFO(i));
    BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
    PUSH_DATA (push, n * 4);
    PUSH_DATA (push, 0x1);
@@ -334,6 +341,9 @@ nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input,
    struct nvc0_screen *screen = nvc0->screen;
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    struct nvc0_program *cp = nvc0->compprog;
+   uint64_t address;
+
+   address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5);
 
    if (cp->parm_size) {
       BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
@@ -347,8 +357,8 @@ nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input,
       PUSH_DATAp(push, input, cp->parm_size / 4);
    }
    BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
-   PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0));
-   PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0));
+   PUSH_DATAh(push, address + NVC0_CB_AUX_GRID_INFO);
+   PUSH_DATA (push, address + NVC0_CB_AUX_GRID_INFO);
    BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
    PUSH_DATA (push, 7 * 4);
    PUSH_DATA (push, 0x1);
@@ -408,7 +418,9 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,
       if (nvc0->constbuf[s][i].u.buf)
          nve4_cp_launch_desc_set_ctx_cb(desc, i + 1, &nvc0->constbuf[s][i]);
    }
-   nve4_cp_launch_desc_set_cb(desc, 0, screen->parm, 0, NVE4_CP_INPUT_SIZE);
+   nve4_cp_launch_desc_set_cb(desc, 0, screen->parm, 0, 1 << 12);
+   nve4_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo,
+                              NVC0_CB_AUX_INFO(5), 1 << 10);
 }
 
 static inline struct nve4_cp_launch_desc *
@@ -495,7 +507,7 @@ nve4_compute_validate_textures(struct nvc0_context *nvc0)
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    const unsigned s = 5;
    unsigned i;
-   uint32_t commands[2][NVE4_CP_INPUT_TEX_MAX];
+   uint32_t commands[2][32];
    unsigned n[2] = { 0, 0 };
 
    for (i = 0; i < nvc0->num_textures[s]; ++i) {
index 84f8593b9b64a8dc9081e83b2b9fc9cccf3f8619..dcafbeda397a4d5f383c6c352f278eac14e50e25 100644 (file)
@@ -4,31 +4,6 @@
 
 #include "nvc0/nve4_compute.xml.h"
 
-/* Input space is implemented as c0[], to which we bind the screen->parm bo.
- */
-#define NVE4_CP_INPUT_USER           0x0000
-#define NVE4_CP_INPUT_USER_LIMIT     0x1000
-#define NVE4_CP_INPUT_GRID_INFO(i)  (0x1000 + (i) * 4)
-#define NVE4_CP_INPUT_NTID(i)       (0x1000 + (i) * 4)
-#define NVE4_CP_INPUT_NCTAID(i)     (0x100c + (i) * 4)
-#define NVE4_CP_INPUT_GRIDID         0x1018
-#define NVE4_CP_INPUT_TEX(i)        (0x1040 + (i) * 4)
-#define NVE4_CP_INPUT_TEX_STRIDE     4
-#define NVE4_CP_INPUT_TEX_MAX        32
-#define NVE4_CP_INPUT_MS_OFFSETS     0x10c0
-#define NVE4_CP_INPUT_SUF_STRIDE     64
-#define NVE4_CP_INPUT_SUF(i)        (0x1100 + (i) * NVE4_CP_INPUT_SUF_STRIDE)
-#define NVE4_CP_INPUT_SUF_MAX        32
-#define NVE4_CP_INPUT_TRAP_INFO_PTR  0x1900
-#define NVE4_CP_INPUT_TEMP_PTR       0x1908
-#define NVE4_CP_INPUT_MP_TEMP_SIZE   0x1910
-#define NVE4_CP_INPUT_WARP_TEMP_SIZE 0x1914
-#define NVE4_CP_INPUT_CSTACK_SIZE    0x1918
-#define NVE4_CP_INPUT_SIZE           0x1a00
-#define NVE4_CP_PARAM_TRAP_INFO      0x2000
-#define NVE4_CP_PARAM_TRAP_INFO_SZ  (1 << 16)
-#define NVE4_CP_PARAM_SIZE          (NVE4_CP_PARAM_TRAP_INFO + (1 << 16))
-
 struct nve4_cp_launch_desc
 {
    u32 unk0[8];