From 67b31b3c59a3b950897709d6c472348c4e12951c Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sat, 2 Jan 2016 11:38:42 -0500 Subject: [PATCH] nvc0: add ARB_indirect_parameters support I chose to make separate macros for this due to the additional complexity and extra scratch usage. Signed-off-by: Ilia Mirkin --- docs/relnotes/11.2.0.html | 1 + .../drivers/nouveau/nvc0/mme/com9097.mme | 157 ++++++++++++++++++ .../drivers/nouveau/nvc0/mme/com9097.mme.h | 125 ++++++++++++++ .../drivers/nouveau/nvc0/nvc0_macros.h | 4 + .../drivers/nouveau/nvc0/nvc0_screen.c | 4 +- src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c | 29 +++- 6 files changed, 314 insertions(+), 6 deletions(-) diff --git a/docs/relnotes/11.2.0.html b/docs/relnotes/11.2.0.html index d31da8ba4a6..616c134a768 100644 --- a/docs/relnotes/11.2.0.html +++ b/docs/relnotes/11.2.0.html @@ -47,6 +47,7 @@ Note: some of the new features are only available with certain drivers.
  • GL_ARB_base_instance on freedreno/a4xx
  • GL_ARB_compute_shader on i965
  • GL_ARB_copy_image on r600
  • +
  • GL_ARB_indirect_parameters on nvc0
  • GL_ARB_shader_draw_parameters on i965, nvc0
  • GL_ARB_tessellation_shader on i965 and r600 (evergreen/cayman only)
  • GL_ARB_texture_buffer_object_rgb32 on freedreno/a4xx
  • diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme index 35355edf2e7..4daa57d47bb 100644 --- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme +++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme @@ -334,3 +334,160 @@ dai_end: mov $r6 (add $r6 1) exit maddr 0x50e /* VB_INSTANCE_BASE to restore */ send $r5 + +/* NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT + * + * NOTE: Saves and restores VB_ELEMENT,INSTANCE_BASE. + * Forcefully sets VERTEX_ID_BASE to the value of VB_ELEMENT_BASE. + * + * arg = mode + * parm[0] = start_drawid + * parm[1] = numparams + * parm[2] = totaldraws + * parm[3 + 5n + 0] = count + * parm[3 + 5n + 1] = instance_count + * parm[3 + 5n + 2] = start + * parm[3 + 5n + 3] = index_bias + * parm[3 + 5n + 4] = start_instance + * + * SCRATCH[0] = saved VB_ELEMENT_BASE + * SCRATCH[1] = saved VB_INSTANCE_BASE + * SCRATCH[2] = draws left + */ +.section #mme9097_draw_elts_indirect_count + read $r6 0x50d /* VB_ELEMENT_BASE */ + read $r7 0x50e /* VB_INSTANCE_BASE */ + maddr 0x1d00 + send $r6 /* SCRATCH[0] = VB_ELEMENT_BASE */ + send $r7 /* SCRATCH[1] = VB_INSTANCE_BASE */ + parm $r6 /* start_drawid */ + parm $r7 /* numparams */ + parm $r5 /* totaldraws */ + mov $r5 (sub $r5 $r6) /* draws left */ + braz $r5 #deic_runout + mov $r3 (extrinsrt 0x0 $r5 31 1 0) /* extract high bit */ + branz $r3 #deic_runout + send $r5 +deic_draw_again: + parm $r3 /* count */ + parm $r2 /* instance_count */ + parm $r4 maddr 0x5f7 /* INDEX_BATCH_FIRST, start */ + parm $r4 send $r4 /* index_bias, send start */ + maddr 0x18e3 /* CB_POS */ + send 0x180 /* 256 + 128 */ + braz $r2 #deic_end + parm $r5 send $r4 /* start_instance, send index_bias */ + send $r5 /* send start_instance */ + send $r6 /* draw id */ + maddr 0x150d /* VB_ELEMENT,INSTANCE_BASE */ + send $r4 + send $r5 + maddr 0x446 + send $r4 + mov $r4 0x1 + mov $r1 (extrinsrt $r1 0x0 0 1 26) /* clear INSTANCE_NEXT */ +deic_again: + maddr 0x586 /* VERTEX_BEGIN_GL */ + send $r1 /* mode */ + maddr 0x5f8 /* INDEX_BATCH_COUNT */ + send $r3 /* count */ + mov $r2 (sub $r2 $r4) + maddrsend 0x585 /* VERTEX_END_GL */ + branz $r2 #deic_again + mov $r1 (extrinsrt $r1 $r4 0 1 26) /* set INSTANCE_NEXT */ +deic_end: + read $r5 0xd02 + mov $r5 (add $r5 -1) + braz $r5 #deic_runout_check + mov $r7 (add $r7 -1) + maddr 0xd02 + send $r5 + branz $r7 #deic_draw_again + mov $r6 (add $r6 1) +deic_restore: + read $r6 0xd00 + read $r7 0xd01 + maddr 0x150d /* VB_ELEMENT,INSTANCE_BASE */ + send $r6 + send $r7 + exit maddr 0x446 + send $r6 +deic_runout: + parm $r2 + parm $r2 + parm $r2 + parm $r2 + parm $r2 + mov $r7 (add $r7 -1) +deic_runout_check: + branz annul $r7 #deic_runout + bra annul #deic_restore + +/* NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT: + * + * NOTE: Saves and restores VB_INSTANCE_BASE. + * + * arg = mode + * parm[0] = start_drawid + * parm[1] = numparams + * parm[2] = totaldraws + * parm[3 + 4n + 0] = count + * parm[3 + 4n + 1] = instance_count + * parm[3 + 4n + 2] = start + * parm[3 + 4n + 3] = start_instance + * + * SCRATCH[0] = VB_INSTANCE_BASE + */ +.section #mme9097_draw_arrays_indirect_count + read $r5 0x50e /* VB_INSTANCE_BASE */ + maddr 0xd00 + parm $r6 send $r5 /* start_drawid, save VB_INSTANCE_BASE */ + parm $r7 /* numparams */ + parm $r5 /* totaldraws */ + mov $r5 (sub $r5 $r6) /* draws left */ + braz $r5 #daic_runout + mov $r3 (extrinsrt 0x0 $r5 31 1 0) /* extract high bit */ + branz annul $r3 #daic_runout +daic_draw_again: + parm $r2 /* count */ + parm $r3 /* instance_count */ + parm $r4 maddr 0x35d /* VERTEX_BUFFER_FIRST, start */ + braz $r3 #daic_end + parm $r4 send $r4 /* start_instance */ + maddr 0x18e3 /* CB_POS */ + send 0x180 /* 256 + 128 */ + send 0x0 /* send 0 as base_vertex */ + send $r4 /* send start_instance */ + send $r6 /* draw id */ + maddr 0x50e /* VB_INSTANCE_BASE */ + send $r4 + mov $r4 0x1 + mov $r1 (extrinsrt $r1 0x0 0 1 26) /* clear INSTANCE_NEXT */ +daic_again: + maddr 0x586 /* VERTEX_BEGIN_GL */ + send $r1 /* mode */ + maddr 0x35e /* VERTEX_BUFFER_COUNT */ + send $r2 + mov $r3 (sub $r3 $r4) + maddrsend 0x585 /* VERTEX_END_GL */ + branz $r3 #daic_again + mov $r1 (extrinsrt $r1 $r4 0 1 26) /* set INSTANCE_NEXT */ +daic_end: + mov $r5 (add $r5 -1) + braz $r5 #daic_runout_check + mov $r7 (add $r7 -1) + branz $r7 #daic_draw_again + mov $r6 (add $r6 1) +daic_restore: + read $r5 0xd00 + exit maddr 0x50e /* VB_INSTANCE_BASE to restore */ + send $r5 +daic_runout: + parm $r2 + parm $r2 + parm $r2 + parm $r2 + mov $r7 (add $r7 -1) +daic_runout_check: + branz annul $r7 #daic_runout + bra annul #daic_restore diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h index 0aebeeb6e66..bf8625e0584 100644 --- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h +++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h @@ -207,3 +207,128 @@ uint32_t mme9097_draw_arrays_indirect[] = { 0x014380a1, 0x00002841, }; + +uint32_t mme9097_draw_elts_indirect_count[] = { + 0x01434615, + 0x01438715, + 0x07400021, +/* 0x000d: deic_draw_again */ + 0x00003041, + 0x00003841, + 0x00000601, + 0x00000701, +/* 0x001e: deic_again */ + 0x00000501, + 0x0005ad10, +/* 0x0026: deic_end */ + 0x000b2807, + 0x007f4312, +/* 0x002e: deic_restore */ + 0x000a9817, + 0x00002841, +/* 0x0035: deic_runout */ + 0x00000301, +/* 0x003b: deic_runout_check */ + 0x00000201, + 0x017dc451, + 0x00002431, + 0x0638c021, + 0x00600041, + 0x0004d007, + 0x00002531, + 0x00002841, + 0x00003041, + 0x05434021, + 0x00002041, + 0x00002841, + 0x01118021, + 0x00002041, + 0x00004411, + 0xd0400912, + 0x01618021, + 0x00000841, + 0x017e0021, + 0x00001841, + 0x00051210, + 0x01614071, + 0xfffe9017, + 0xd0410912, + 0x03408515, + 0xffffed11, + 0x0004e807, + 0xffffff11, + 0x03408021, + 0x00002841, + 0xfff87817, + 0x00007611, + 0x03400615, + 0x03404715, + 0x05434021, + 0x00003041, + 0x00003841, + 0x011180a1, + 0x00003041, + 0x00000201, + 0x00000201, + 0x00000201, + 0x00000201, + 0x00000201, + 0xffffff11, + 0xfffeb837, + 0xfffc8027, +}; + +uint32_t mme9097_draw_arrays_indirect_count[] = { + 0x01438515, + 0x03400021, +/* 0x0009: daic_draw_again */ + 0x00002e31, + 0x00000701, + 0x00000501, +/* 0x0017: daic_again */ + 0x0005ad10, + 0x00086807, +/* 0x001f: daic_end */ + 0x007f4312, + 0x0007d837, +/* 0x0024: daic_restore */ +/* 0x0027: daic_runout */ + 0x00000201, + 0x00000301, +/* 0x002c: daic_runout_check */ + 0x00d74451, + 0x0004d807, + 0x00002431, + 0x0638c021, + 0x00600041, + 0x00000041, + 0x00002041, + 0x00003041, + 0x01438021, + 0x00002041, + 0x00004411, + 0xd0400912, + 0x01618021, + 0x00000841, + 0x00d78021, + 0x00001041, + 0x00051b10, + 0x01614071, + 0xfffe9817, + 0xd0410912, + 0xffffed11, + 0x00032807, + 0xffffff11, + 0xfff9f817, + 0x00007611, + 0x03400515, + 0x014380a1, + 0x00002841, + 0x00000201, + 0x00000201, + 0x00000201, + 0x00000201, + 0xffffff11, + 0xfffef837, + 0xfffdc027, +}; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h index bf2798a44a0..27c026b8b30 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h @@ -29,4 +29,8 @@ #define NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT 0x00003840 +#define NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT 0x00003848 + +#define NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT 0x00003850 + #endif /* __NVC0_MACROS_H__ */ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 86bd8632d0b..c8510b8bb5a 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -187,6 +187,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_DRAW_PARAMETERS: case PIPE_CAP_TGSI_PACK_HALF_FLOAT: case PIPE_CAP_MULTI_DRAW_INDIRECT: + case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS: return 1; case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: return (class_3d >= NVE4_3D_CLASS) ? 1 : 0; @@ -209,7 +210,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_VERTEXID_NOBASE: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: - case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS: return 0; case PIPE_CAP_VENDOR_ID: @@ -1029,6 +1029,8 @@ nvc0_screen_create(struct nouveau_device *dev) MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mme9097_poly_mode_back); MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT, mme9097_draw_arrays_indirect); MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect); + MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count); + MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count); BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1); PUSH_DATA (push, 1); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c index 1d889b9db0d..ad79d1cbb9c 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c @@ -807,12 +807,16 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nv04_resource *buf = nv04_resource(info->indirect); + struct nv04_resource *buf_count = nv04_resource(info->indirect_params); unsigned size, macro, count = info->indirect_count, drawid = info->drawid; uint32_t offset = buf->offset + info->indirect_offset; /* must make FIFO wait for engines idle before continuing to process */ - if (buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr)) + if ((buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr)) || + (buf_count && buf_count->fence_wr && + !nouveau_fence_signalled(buf_count->fence_wr))) { IMMED_NVC0(push, SUBC_3D(NV10_SUBCHAN_REF_CNT), 0); + } /* Queue things up to let the macros write params to the driver constbuf */ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); @@ -824,7 +828,10 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info) assert(nvc0->idxbuf.buffer); assert(nouveau_resource_mapped_by_gpu(nvc0->idxbuf.buffer)); size = 5; - macro = NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT; + if (buf_count) + macro = NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT; + else + macro = NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT; } else { if (nvc0->state.index_bias) { /* index_bias is implied 0 if !info->indexed (really ?) */ @@ -833,7 +840,10 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info) nvc0->state.index_bias = 0; } size = 4; - macro = NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT; + if (buf_count) + macro = NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT; + else + macro = NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT; } /* If the stride is not the natural stride, we have to stick a separate @@ -851,12 +861,21 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info) pushes = draws; } - nouveau_pushbuf_space(push, 8, 0, pushes); + nouveau_pushbuf_space(push, 16, 0, pushes + !!buf_count); PUSH_REFN(push, buf->bo, NOUVEAU_BO_RD | buf->domain); - PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(0, macro, 3 + draws * size)); + if (buf_count) + PUSH_REFN(push, buf_count->bo, NOUVEAU_BO_RD | buf_count->domain); + PUSH_DATA(push, + NVC0_FIFO_PKHDR_1I(0, macro, 3 + !!buf_count + draws * size)); PUSH_DATA(push, nvc0_prim_gl(info->mode)); PUSH_DATA(push, drawid); PUSH_DATA(push, draws); + if (buf_count) { + nouveau_pushbuf_data(push, + buf_count->bo, + buf_count->offset + info->indirect_params_offset, + NVC0_IB_ENTRY_1_NO_PREFETCH | 4); + } if (pushes == 1) { nouveau_pushbuf_data(push, buf->bo, offset, -- 2.30.2