From: Samuel Pitoiset Date: Tue, 12 Jan 2016 22:51:00 +0000 (+0100) Subject: nvc0: add support for indirect compute on Fermi X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=c6293877f0d2f69db122d351c14ab2d21b7b2964;p=mesa.git nvc0: add support for indirect compute on Fermi When indirect compute is used, the size of the grid (in blocks) is stored as three integers inside a buffer. This requires a macro to set up GRIDDIM_YX and GRIDDIM_Z. Changes from v2: - do not launch the grid if the number of groups for a dimension is 0 Signed-off-by: Samuel Pitoiset Reviewed-by: Ilia Mirkin --- diff --git a/src/gallium/drivers/nouveau/nvc0/mme/Makefile b/src/gallium/drivers/nouveau/nvc0/mme/Makefile index 1c0f5835973..52fb0a54812 100644 --- a/src/gallium/drivers/nouveau/nvc0/mme/Makefile +++ b/src/gallium/drivers/nouveau/nvc0/mme/Makefile @@ -1,5 +1,5 @@ ENVYAS?=envyas -TARGETS=com9097.mme.h +TARGETS=com9097.mme.h com90c0.mme.h all: $(TARGETS) diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme new file mode 100644 index 00000000000..a3f1bdeebcd --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme @@ -0,0 +1,24 @@ +/* NVC0_COMPUTE_MACRO_LAUNCH_GRID_INDIRECT + * + * arg = num_groups_x + * parm[0] = num_groups_y + * parm[1] = num_groups_z + */ +.section #mme90c0_launch_grid_indirect + parm $r2 maddr 0x108e /* GRIDDIM_YX */ + braz $r1 #fail + parm $r3 + braz annul $r2 #fail + braz annul $r3 #fail + send (extrinsrt $r1 $r2 0x0 0x10 0x10) /* num_groups_y << 16 | num_groups_x */ + send $r3 + maddrsend 0xa7 /* COMPUTE_BEGIN */ + maddrsend 0x282 /* UNKA08 */ + maddr 0xda /* LAUNCH */ + send 0x1000 + maddrsend 0x281 /* COMPUTE_END */ + exit maddr 0xd8 /* UNK360 */ + send 0x1 +fail: + exit + nop diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h new file mode 100644 index 00000000000..1dc06e5e690 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h @@ -0,0 +1,19 @@ +uint32_t mme90c0_launch_grid_indirect[] = { + 0x04238251, + 0x00034807, + 0x00000301, +/* 0x000e: fail */ + 0x0002d027, + 0x00029827, + 0x84008842, + 0x00001841, + 0x0029c071, + 0x00a08071, + 0x00368021, + 0x04000041, + 0x00a04071, + 0x003600a1, + 0x00004041, + 0x00000091, + 0x00000011, +}; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c index 70e76ba48b4..884b15f875d 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c @@ -360,14 +360,6 @@ nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) BEGIN_NVC0(push, NVC0_COMPUTE(CP_GPR_ALLOC), 1); PUSH_DATA (push, cp->num_gprs); - /* grid/block setup */ - BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2); - PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]); - PUSH_DATA (push, info->grid[2]); - BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2); - PUSH_DATA (push, (info->block[1] << 16) | info->block[0]); - PUSH_DATA (push, info->block[2]); - /* launch preliminary setup */ BEGIN_NVC0(push, NVC0_COMPUTE(GRIDID), 1); PUSH_DATA (push, 0x1); @@ -376,17 +368,39 @@ nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1); PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8); - /* kernel launching */ - BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1); - PUSH_DATA (push, 0); - BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1); - PUSH_DATA (push, 0); - BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1); - PUSH_DATA (push, 0x1000); - BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1); - PUSH_DATA (push, 0); - BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1); - PUSH_DATA (push, 0x1); + /* block setup */ + BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2); + PUSH_DATA (push, (info->block[1] << 16) | info->block[0]); + PUSH_DATA (push, info->block[2]); + + if (unlikely(info->indirect)) { + struct nv04_resource *res = nv04_resource(info->indirect); + uint32_t offset = res->offset + info->indirect_offset; + unsigned macro = NVC0_COMPUTE_MACRO_LAUNCH_GRID_INDIRECT; + + nouveau_pushbuf_space(push, 16, 0, 1); + PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain); + PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(1, macro, 3)); + nouveau_pushbuf_data(push, res->bo, offset, + NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4); + } else { + /* grid setup */ + BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2); + PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]); + PUSH_DATA (push, info->grid[2]); + + /* kernel launching */ + BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1); + PUSH_DATA (push, 0x1000); + BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1); + PUSH_DATA (push, 0x1); + } /* Invalidate all 3D constbufs because they are aliased with COMPUTE. */ nvc0->dirty |= NVC0_NEW_CONSTBUF; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h index 49e176cbd49..57262fe0e4a 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h @@ -35,4 +35,6 @@ #define NVC0_3D_MACRO_QUERY_BUFFER_WRITE 0x00003858 +#define NVC0_COMPUTE_MACRO_LAUNCH_GRID_INDIRECT 0x00003860 + #endif /* __NVC0_MACROS_H__ */ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 51cfcfe8a06..25e2e5cedd2 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -36,6 +36,7 @@ #include "nvc0/nvc0_screen.h" #include "nvc0/mme/com9097.mme.h" +#include "nvc0/mme/com90c0.mme.h" static boolean nvc0_screen_is_format_supported(struct pipe_screen *pscreen, @@ -1074,6 +1075,7 @@ nvc0_screen_create(struct nouveau_device *dev) MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count); MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count); MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write); + MK_MACRO(NVC0_COMPUTE_MACRO_LAUNCH_GRID_INDIRECT, mme90c0_launch_grid_indirect); BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1); PUSH_DATA (push, 1);