From 7ca67c752bca08a38a7334cace15ce2b8429a318 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sat, 2 Jan 2016 00:45:56 -0500 Subject: [PATCH] nvc0: add support for real ARB_multi_draw_indirect The draw groups are now split up into groups of 32 if there's a non-packed stride, or in groups of 400-500 if the draw data is packed. Signed-off-by: Ilia Mirkin --- .../drivers/nouveau/nvc0/nvc0_query_hw.c | 2 - .../drivers/nouveau/nvc0/nvc0_screen.c | 2 +- src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c | 60 ++++++++++++++----- .../drivers/nouveau/nvc0/nvc0_winsys.h | 1 + 4 files changed, 47 insertions(+), 18 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c index 8021a65dc46..1bed0162baf 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c @@ -470,8 +470,6 @@ nvc0_hw_query_pushbuf_submit(struct nouveau_pushbuf *push, { struct nvc0_hw_query *hq = nvc0_hw_query(q); -#define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8)) - PUSH_REFN(push, hq->bo, NOUVEAU_BO_RD | NOUVEAU_BO_GART); nouveau_pushbuf_data(push, hq->bo, hq->offset + result_offset, 4 | NVC0_IB_ENTRY_1_NO_PREFETCH); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index a184e8fdd87..86bd8632d0b 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -186,6 +186,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_CLEAR_TEXTURE: case PIPE_CAP_DRAW_PARAMETERS: case PIPE_CAP_TGSI_PACK_HALF_FLOAT: + case PIPE_CAP_MULTI_DRAW_INDIRECT: return 1; case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: return (class_3d >= NVE4_3D_CLASS) ? 1 : 0; @@ -208,7 +209,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_VERTEXID_NOBASE: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: - case PIPE_CAP_MULTI_DRAW_INDIRECT: case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS: return 0; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c index 55aeb806288..1d889b9db0d 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c @@ -807,8 +807,8 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nv04_resource *buf = nv04_resource(info->indirect); - unsigned size; - const uint32_t offset = buf->offset + info->indirect_offset; + unsigned size, macro, count = info->indirect_count, drawid = info->drawid; + uint32_t offset = buf->offset + info->indirect_offset; /* must make FIFO wait for engines idle before continuing to process */ if (buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr)) @@ -820,13 +820,11 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info) PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (0 << 9)); PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (0 << 9)); - nouveau_pushbuf_space(push, 8, 0, 1); - PUSH_REFN(push, buf->bo, NOUVEAU_BO_RD | buf->domain); if (info->indexed) { assert(nvc0->idxbuf.buffer); assert(nouveau_resource_mapped_by_gpu(nvc0->idxbuf.buffer)); - size = 5 * 4; - BEGIN_1IC0(push, NVC0_3D(MACRO_DRAW_ELEMENTS_INDIRECT), 3 + size / 4); + size = 5; + macro = NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT; } else { if (nvc0->state.index_bias) { /* index_bias is implied 0 if !info->indexed (really ?) */ @@ -834,15 +832,47 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info) IMMED_NVC0(push, NVC0_3D(VERTEX_ID_BASE), 0); nvc0->state.index_bias = 0; } - size = 4 * 4; - BEGIN_1IC0(push, NVC0_3D(MACRO_DRAW_ARRAYS_INDIRECT), 3 + size / 4); - } - PUSH_DATA(push, nvc0_prim_gl(info->mode)); - PUSH_DATA(push, info->drawid); - PUSH_DATA(push, 1); -#define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8)) - nouveau_pushbuf_data(push, - buf->bo, offset, NVC0_IB_ENTRY_1_NO_PREFETCH | size); + size = 4; + macro = NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT; + } + + /* If the stride is not the natural stride, we have to stick a separate + * push data reference for each draw. Otherwise it can all go in as one. + * Of course there is a maximum packet size, so we have to break things up + * along those borders as well. + */ + while (count) { + unsigned draws = count, pushes, i; + if (info->indirect_stride == size * 4) { + draws = MIN2(draws, (NV04_PFIFO_MAX_PACKET_LEN - 4) / size); + pushes = 1; + } else { + draws = MIN2(draws, 32); + pushes = draws; + } + + nouveau_pushbuf_space(push, 8, 0, pushes); + PUSH_REFN(push, buf->bo, NOUVEAU_BO_RD | buf->domain); + PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(0, macro, 3 + draws * size)); + PUSH_DATA(push, nvc0_prim_gl(info->mode)); + PUSH_DATA(push, drawid); + PUSH_DATA(push, draws); + if (pushes == 1) { + nouveau_pushbuf_data(push, + buf->bo, offset, + NVC0_IB_ENTRY_1_NO_PREFETCH | (size * 4 * draws)); + offset += draws * info->indirect_stride; + } else { + for (i = 0; i < pushes; i++) { + nouveau_pushbuf_data(push, + buf->bo, offset, + NVC0_IB_ENTRY_1_NO_PREFETCH | (size * 4)); + offset += info->indirect_stride; + } + } + count -= draws; + drawid += draws; + } } static inline void diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h b/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h index 4ea8ca3cfa2..79abe78b77a 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h @@ -68,6 +68,7 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags) #define SUBC_SW(m) 7, (m) #define NVC0_3D_SERIALIZE NV50_GRAPH_SERIALIZE +#define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8)) static inline uint32_t NVC0_FIFO_PKHDR_SQ(int subc, int mthd, unsigned size) -- 2.30.2