nvc0: add support for indirect compute on Fermi
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Tue, 12 Jan 2016 22:51:00 +0000 (23:51 +0100)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Sun, 21 Feb 2016 09:41:45 +0000 (10:41 +0100)
When indirect compute is used, the size of the grid (in blocks) is
stored as three integers inside a buffer. This requires a macro to
set up GRIDDIM_YX and GRIDDIM_Z.

Changes from v2:
 - do not launch the grid if the number of groups for a dimension is 0

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
src/gallium/drivers/nouveau/nvc0/mme/Makefile
src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme [new file with mode: 0644]
src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h [new file with mode: 0644]
src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
src/gallium/drivers/nouveau/nvc0/nvc0_screen.c

index 1c0f58359730da3295106215e0b38a4d5155936d..52fb0a5481202f0e38716e5dca8f0e2e979d50c1 100644 (file)
@@ -1,5 +1,5 @@
 ENVYAS?=envyas
-TARGETS=com9097.mme.h
+TARGETS=com9097.mme.h com90c0.mme.h
 
 all: $(TARGETS)
 
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme
new file mode 100644 (file)
index 0000000..a3f1bde
--- /dev/null
@@ -0,0 +1,24 @@
+/* NVC0_COMPUTE_MACRO_LAUNCH_GRID_INDIRECT
+ *
+ * arg     = num_groups_x
+ * parm[0] = num_groups_y
+ * parm[1] = num_groups_z
+ */
+.section #mme90c0_launch_grid_indirect
+   parm $r2 maddr 0x108e /* GRIDDIM_YX */
+   braz $r1 #fail
+   parm $r3
+   braz annul $r2 #fail
+   braz annul $r3 #fail
+   send (extrinsrt $r1 $r2 0x0 0x10 0x10) /* num_groups_y << 16 | num_groups_x */
+   send $r3
+   maddrsend 0xa7 /* COMPUTE_BEGIN */
+   maddrsend 0x282 /* UNKA08 */
+   maddr 0xda /* LAUNCH */
+   send 0x1000
+   maddrsend 0x281 /* COMPUTE_END */
+   exit maddr 0xd8 /* UNK360 */
+   send 0x1
+fail:
+   exit
+   nop
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h
new file mode 100644 (file)
index 0000000..1dc06e5
--- /dev/null
@@ -0,0 +1,19 @@
+uint32_t mme90c0_launch_grid_indirect[] = {
+       0x04238251,
+       0x00034807,
+       0x00000301,
+/* 0x000e: fail */
+       0x0002d027,
+       0x00029827,
+       0x84008842,
+       0x00001841,
+       0x0029c071,
+       0x00a08071,
+       0x00368021,
+       0x04000041,
+       0x00a04071,
+       0x003600a1,
+       0x00004041,
+       0x00000091,
+       0x00000011,
+};
index 70e76ba48b47a01ee1ffee7f15e8b4b08ce38e34..884b15f875dd610f0fb4cb7acaf76bb1570e5252 100644 (file)
@@ -360,14 +360,6 @@ nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
    BEGIN_NVC0(push, NVC0_COMPUTE(CP_GPR_ALLOC), 1);
    PUSH_DATA (push, cp->num_gprs);
 
-   /* grid/block setup */
-   BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2);
-   PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]);
-   PUSH_DATA (push, info->grid[2]);
-   BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2);
-   PUSH_DATA (push, (info->block[1] << 16) | info->block[0]);
-   PUSH_DATA (push, info->block[2]);
-
    /* launch preliminary setup */
    BEGIN_NVC0(push, NVC0_COMPUTE(GRIDID), 1);
    PUSH_DATA (push, 0x1);
@@ -376,17 +368,39 @@ nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
    BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
    PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8);
 
-   /* kernel launching */
-   BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1);
-   PUSH_DATA (push, 0);
-   BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1);
-   PUSH_DATA (push, 0);
-   BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1);
-   PUSH_DATA (push, 0x1000);
-   BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1);
-   PUSH_DATA (push, 0);
-   BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1);
-   PUSH_DATA (push, 0x1);
+   /* block setup */
+   BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2);
+   PUSH_DATA (push, (info->block[1] << 16) | info->block[0]);
+   PUSH_DATA (push, info->block[2]);
+
+   if (unlikely(info->indirect)) {
+      struct nv04_resource *res = nv04_resource(info->indirect);
+      uint32_t offset = res->offset + info->indirect_offset;
+      unsigned macro = NVC0_COMPUTE_MACRO_LAUNCH_GRID_INDIRECT;
+
+      nouveau_pushbuf_space(push, 16, 0, 1);
+      PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);
+      PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(1, macro, 3));
+      nouveau_pushbuf_data(push, res->bo, offset,
+                           NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);
+   } else {
+      /* grid setup */
+      BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2);
+      PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]);
+      PUSH_DATA (push, info->grid[2]);
+
+      /* kernel launching */
+      BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1);
+      PUSH_DATA (push, 0);
+      BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1);
+      PUSH_DATA (push, 0);
+      BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1);
+      PUSH_DATA (push, 0x1000);
+      BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1);
+      PUSH_DATA (push, 0);
+      BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1);
+      PUSH_DATA (push, 0x1);
+   }
 
    /* Invalidate all 3D constbufs because they are aliased with COMPUTE. */
    nvc0->dirty |= NVC0_NEW_CONSTBUF;
index 49e176cbd49882baf84d8501fe61b05d27876cec..57262fe0e4ab20fab6825be6ffe29baa88ddb1b3 100644 (file)
@@ -35,4 +35,6 @@
 
 #define NVC0_3D_MACRO_QUERY_BUFFER_WRITE                       0x00003858
 
+#define NVC0_COMPUTE_MACRO_LAUNCH_GRID_INDIRECT       0x00003860
+
 #endif /* __NVC0_MACROS_H__ */
index 51cfcfe8a0692cffd2ece76c9302d2f65391de84..25e2e5cedd2199e91037a320d3753198160961dd 100644 (file)
@@ -36,6 +36,7 @@
 #include "nvc0/nvc0_screen.h"
 
 #include "nvc0/mme/com9097.mme.h"
+#include "nvc0/mme/com90c0.mme.h"
 
 static boolean
 nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
@@ -1074,6 +1075,7 @@ nvc0_screen_create(struct nouveau_device *dev)
    MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count);
    MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count);
    MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write);
+   MK_MACRO(NVC0_COMPUTE_MACRO_LAUNCH_GRID_INDIRECT, mme90c0_launch_grid_indirect);
 
    BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1);
    PUSH_DATA (push, 1);