nvc0: add ARB_indirect_parameters support
authorIlia Mirkin <imirkin@alum.mit.edu>
Sat, 2 Jan 2016 16:38:42 +0000 (11:38 -0500)
committerIlia Mirkin <imirkin@alum.mit.edu>
Thu, 7 Jan 2016 23:38:46 +0000 (18:38 -0500)
I chose to make separate macros for this due to the additional
complexity and extra scratch usage.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
docs/relnotes/11.2.0.html
src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c

index d31da8ba4a68cab3f1c31f0c487817c6f8529802..616c134a768d092672ce00ec14b288eb9b51a08f 100644 (file)
@@ -47,6 +47,7 @@ Note: some of the new features are only available with certain drivers.
 <li>GL_ARB_base_instance on freedreno/a4xx</li>
 <li>GL_ARB_compute_shader on i965</li>
 <li>GL_ARB_copy_image on r600</li>
+<li>GL_ARB_indirect_parameters on nvc0</li>
 <li>GL_ARB_shader_draw_parameters on i965, nvc0</li>
 <li>GL_ARB_tessellation_shader on i965 and r600 (evergreen/cayman only)</li>
 <li>GL_ARB_texture_buffer_object_rgb32 on freedreno/a4xx</li>
index 35355edf2e7eefa88b59402cab0e4a61809914ad..4daa57d47bbf5f9651c9c27a32870b3e492a4b9c 100644 (file)
@@ -334,3 +334,160 @@ dai_end:
    mov $r6 (add $r6 1)
    exit maddr 0x50e /* VB_INSTANCE_BASE to restore */
    send $r5
+
+/* NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT
+ *
+ * NOTE: Saves and restores VB_ELEMENT,INSTANCE_BASE.
+ * Forcefully sets VERTEX_ID_BASE to the value of VB_ELEMENT_BASE.
+ *
+ * arg     = mode
+ * parm[0] = start_drawid
+ * parm[1] = numparams
+ * parm[2] = totaldraws
+ * parm[3 + 5n + 0] = count
+ * parm[3 + 5n + 1] = instance_count
+ * parm[3 + 5n + 2] = start
+ * parm[3 + 5n + 3] = index_bias
+ * parm[3 + 5n + 4] = start_instance
+ *
+ * SCRATCH[0] = saved VB_ELEMENT_BASE
+ * SCRATCH[1] = saved VB_INSTANCE_BASE
+ * SCRATCH[2] = draws left
+ */
+.section #mme9097_draw_elts_indirect_count
+   read $r6 0x50d /* VB_ELEMENT_BASE */
+   read $r7 0x50e /* VB_INSTANCE_BASE */
+   maddr 0x1d00
+   send $r6 /* SCRATCH[0] = VB_ELEMENT_BASE */
+   send $r7 /* SCRATCH[1] = VB_INSTANCE_BASE */
+   parm $r6 /* start_drawid */
+   parm $r7 /* numparams */
+   parm $r5 /* totaldraws */
+   mov $r5 (sub $r5 $r6) /* draws left */
+   braz $r5 #deic_runout
+   mov $r3 (extrinsrt 0x0 $r5 31 1 0) /* extract high bit */
+   branz $r3 #deic_runout
+   send $r5
+deic_draw_again:
+   parm $r3 /* count */
+   parm $r2 /* instance_count */
+   parm $r4 maddr 0x5f7 /* INDEX_BATCH_FIRST, start */
+   parm $r4 send $r4 /* index_bias, send start */
+   maddr 0x18e3 /* CB_POS */
+   send 0x180 /* 256 + 128 */
+   braz $r2 #deic_end
+   parm $r5 send $r4 /* start_instance, send index_bias */
+   send $r5 /* send start_instance */
+   send $r6 /* draw id */
+   maddr 0x150d /* VB_ELEMENT,INSTANCE_BASE */
+   send $r4
+   send $r5
+   maddr 0x446
+   send $r4
+   mov $r4 0x1
+   mov $r1 (extrinsrt $r1 0x0 0 1 26) /* clear INSTANCE_NEXT */
+deic_again:
+   maddr 0x586 /* VERTEX_BEGIN_GL */
+   send $r1 /* mode */
+   maddr 0x5f8 /* INDEX_BATCH_COUNT */
+   send $r3 /* count */
+   mov $r2 (sub $r2 $r4)
+   maddrsend 0x585 /* VERTEX_END_GL */
+   branz $r2 #deic_again
+   mov $r1 (extrinsrt $r1 $r4 0 1 26) /* set INSTANCE_NEXT */
+deic_end:
+   read $r5 0xd02
+   mov $r5 (add $r5 -1)
+   braz $r5 #deic_runout_check
+   mov $r7 (add $r7 -1)
+   maddr 0xd02
+   send $r5
+   branz $r7 #deic_draw_again
+   mov $r6 (add $r6 1)
+deic_restore:
+   read $r6 0xd00
+   read $r7 0xd01
+   maddr 0x150d /* VB_ELEMENT,INSTANCE_BASE */
+   send $r6
+   send $r7
+   exit maddr 0x446
+   send $r6
+deic_runout:
+   parm $r2
+   parm $r2
+   parm $r2
+   parm $r2
+   parm $r2
+   mov $r7 (add $r7 -1)
+deic_runout_check:
+   branz annul $r7 #deic_runout
+   bra annul #deic_restore
+
+/* NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT:
+ *
+ * NOTE: Saves and restores VB_INSTANCE_BASE.
+ *
+ * arg     = mode
+ * parm[0] = start_drawid
+ * parm[1] = numparams
+ * parm[2] = totaldraws
+ * parm[3 + 4n + 0] = count
+ * parm[3 + 4n + 1] = instance_count
+ * parm[3 + 4n + 2] = start
+ * parm[3 + 4n + 3] = start_instance
+ *
+ * SCRATCH[0] = VB_INSTANCE_BASE
+ */
+.section #mme9097_draw_arrays_indirect_count
+   read $r5 0x50e /* VB_INSTANCE_BASE */
+   maddr 0xd00
+   parm $r6 send $r5 /* start_drawid, save VB_INSTANCE_BASE */
+   parm $r7 /* numparams */
+   parm $r5 /* totaldraws */
+   mov $r5 (sub $r5 $r6) /* draws left */
+   braz $r5 #daic_runout
+   mov $r3 (extrinsrt 0x0 $r5 31 1 0) /* extract high bit */
+   branz annul $r3 #daic_runout
+daic_draw_again:
+   parm $r2 /* count */
+   parm $r3 /* instance_count */
+   parm $r4 maddr 0x35d /* VERTEX_BUFFER_FIRST, start */
+   braz $r3 #daic_end
+   parm $r4 send $r4 /* start_instance */
+   maddr 0x18e3 /* CB_POS */
+   send 0x180 /* 256 + 128 */
+   send 0x0 /* send 0 as base_vertex */
+   send $r4 /* send start_instance */
+   send $r6 /* draw id */
+   maddr 0x50e /* VB_INSTANCE_BASE */
+   send $r4
+   mov $r4 0x1
+   mov $r1 (extrinsrt $r1 0x0 0 1 26) /* clear INSTANCE_NEXT */
+daic_again:
+   maddr 0x586 /* VERTEX_BEGIN_GL */
+   send $r1 /* mode */
+   maddr 0x35e /* VERTEX_BUFFER_COUNT */
+   send $r2
+   mov $r3 (sub $r3 $r4)
+   maddrsend 0x585 /* VERTEX_END_GL */
+   branz $r3 #daic_again
+   mov $r1 (extrinsrt $r1 $r4 0 1 26) /* set INSTANCE_NEXT */
+daic_end:
+   mov $r5 (add $r5 -1)
+   braz $r5 #daic_runout_check
+   mov $r7 (add $r7 -1)
+   branz $r7 #daic_draw_again
+   mov $r6 (add $r6 1)
+daic_restore:
+   read $r5 0xd00
+   exit maddr 0x50e /* VB_INSTANCE_BASE to restore */
+   send $r5
+daic_runout:
+   parm $r2
+   parm $r2
+   parm $r2
+   parm $r2
+   mov $r7 (add $r7 -1)
+daic_runout_check:
+   branz annul $r7 #daic_runout
+   bra annul #daic_restore
index 0aebeeb6e66cdd1bfb74c47cc9ffc5d01807a73f..bf8625e0584f5e87d9594e342334db5416a377de 100644 (file)
@@ -207,3 +207,128 @@ uint32_t mme9097_draw_arrays_indirect[] = {
        0x014380a1,
        0x00002841,
 };
+
+uint32_t mme9097_draw_elts_indirect_count[] = {
+       0x01434615,
+       0x01438715,
+       0x07400021,
+/* 0x000d: deic_draw_again */
+       0x00003041,
+       0x00003841,
+       0x00000601,
+       0x00000701,
+/* 0x001e: deic_again */
+       0x00000501,
+       0x0005ad10,
+/* 0x0026: deic_end */
+       0x000b2807,
+       0x007f4312,
+/* 0x002e: deic_restore */
+       0x000a9817,
+       0x00002841,
+/* 0x0035: deic_runout */
+       0x00000301,
+/* 0x003b: deic_runout_check */
+       0x00000201,
+       0x017dc451,
+       0x00002431,
+       0x0638c021,
+       0x00600041,
+       0x0004d007,
+       0x00002531,
+       0x00002841,
+       0x00003041,
+       0x05434021,
+       0x00002041,
+       0x00002841,
+       0x01118021,
+       0x00002041,
+       0x00004411,
+       0xd0400912,
+       0x01618021,
+       0x00000841,
+       0x017e0021,
+       0x00001841,
+       0x00051210,
+       0x01614071,
+       0xfffe9017,
+       0xd0410912,
+       0x03408515,
+       0xffffed11,
+       0x0004e807,
+       0xffffff11,
+       0x03408021,
+       0x00002841,
+       0xfff87817,
+       0x00007611,
+       0x03400615,
+       0x03404715,
+       0x05434021,
+       0x00003041,
+       0x00003841,
+       0x011180a1,
+       0x00003041,
+       0x00000201,
+       0x00000201,
+       0x00000201,
+       0x00000201,
+       0x00000201,
+       0xffffff11,
+       0xfffeb837,
+       0xfffc8027,
+};
+
+uint32_t mme9097_draw_arrays_indirect_count[] = {
+       0x01438515,
+       0x03400021,
+/* 0x0009: daic_draw_again */
+       0x00002e31,
+       0x00000701,
+       0x00000501,
+/* 0x0017: daic_again */
+       0x0005ad10,
+       0x00086807,
+/* 0x001f: daic_end */
+       0x007f4312,
+       0x0007d837,
+/* 0x0024: daic_restore */
+/* 0x0027: daic_runout */
+       0x00000201,
+       0x00000301,
+/* 0x002c: daic_runout_check */
+       0x00d74451,
+       0x0004d807,
+       0x00002431,
+       0x0638c021,
+       0x00600041,
+       0x00000041,
+       0x00002041,
+       0x00003041,
+       0x01438021,
+       0x00002041,
+       0x00004411,
+       0xd0400912,
+       0x01618021,
+       0x00000841,
+       0x00d78021,
+       0x00001041,
+       0x00051b10,
+       0x01614071,
+       0xfffe9817,
+       0xd0410912,
+       0xffffed11,
+       0x00032807,
+       0xffffff11,
+       0xfff9f817,
+       0x00007611,
+       0x03400515,
+       0x014380a1,
+       0x00002841,
+       0x00000201,
+       0x00000201,
+       0x00000201,
+       0x00000201,
+       0xffffff11,
+       0xfffef837,
+       0xfffdc027,
+};
index bf2798a44a06bce542f79da21d4a57de78e9d621..27c026b8b303091edbacc39d7941dc0214172d56 100644 (file)
@@ -29,4 +29,8 @@
 
 #define NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT                   0x00003840
 
+#define NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT               0x00003848
+
+#define NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT             0x00003850
+
 #endif /* __NVC0_MACROS_H__ */
index 86bd8632d0bc6538e004e71e293beb093d6ee002..c8510b8bb5a7d1215c15211d493f9d89bff80b45 100644 (file)
@@ -187,6 +187,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_DRAW_PARAMETERS:
    case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
    case PIPE_CAP_MULTI_DRAW_INDIRECT:
+   case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
       return 1;
    case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
       return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
@@ -209,7 +210,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_VERTEXID_NOBASE:
    case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
    case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
-   case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
       return 0;
 
    case PIPE_CAP_VENDOR_ID:
@@ -1029,6 +1029,8 @@ nvc0_screen_create(struct nouveau_device *dev)
    MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mme9097_poly_mode_back);
    MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT, mme9097_draw_arrays_indirect);
    MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect);
+   MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count);
+   MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count);
 
    BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1);
    PUSH_DATA (push, 1);
index 1d889b9db0d9e036bbeaf17db5feef1ad2fd6d9b..ad79d1cbb9caeadfd2c3fb28dfc309a5a8fab4eb 100644 (file)
@@ -807,12 +807,16 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
 {
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    struct nv04_resource *buf = nv04_resource(info->indirect);
+   struct nv04_resource *buf_count = nv04_resource(info->indirect_params);
    unsigned size, macro, count = info->indirect_count, drawid = info->drawid;
    uint32_t offset = buf->offset + info->indirect_offset;
 
    /* must make FIFO wait for engines idle before continuing to process */
-   if (buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr))
+   if ((buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr)) ||
+       (buf_count && buf_count->fence_wr &&
+        !nouveau_fence_signalled(buf_count->fence_wr))) {
       IMMED_NVC0(push, SUBC_3D(NV10_SUBCHAN_REF_CNT), 0);
+   }
 
    /* Queue things up to let the macros write params to the driver constbuf */
    BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
@@ -824,7 +828,10 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
       assert(nvc0->idxbuf.buffer);
       assert(nouveau_resource_mapped_by_gpu(nvc0->idxbuf.buffer));
       size = 5;
-      macro = NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT;
+      if (buf_count)
+         macro = NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT;
+      else
+         macro = NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT;
    } else {
       if (nvc0->state.index_bias) {
          /* index_bias is implied 0 if !info->indexed (really ?) */
@@ -833,7 +840,10 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
          nvc0->state.index_bias = 0;
       }
       size = 4;
-      macro = NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT;
+      if (buf_count)
+         macro = NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT;
+      else
+         macro = NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT;
    }
 
    /* If the stride is not the natural stride, we have to stick a separate
@@ -851,12 +861,21 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
          pushes = draws;
       }
 
-      nouveau_pushbuf_space(push, 8, 0, pushes);
+      nouveau_pushbuf_space(push, 16, 0, pushes + !!buf_count);
       PUSH_REFN(push, buf->bo, NOUVEAU_BO_RD | buf->domain);
-      PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(0, macro, 3 + draws * size));
+      if (buf_count)
+         PUSH_REFN(push, buf_count->bo, NOUVEAU_BO_RD | buf_count->domain);
+      PUSH_DATA(push,
+                NVC0_FIFO_PKHDR_1I(0, macro, 3 + !!buf_count + draws * size));
       PUSH_DATA(push, nvc0_prim_gl(info->mode));
       PUSH_DATA(push, drawid);
       PUSH_DATA(push, draws);
+      if (buf_count) {
+         nouveau_pushbuf_data(push,
+                              buf_count->bo,
+                              buf_count->offset + info->indirect_params_offset,
+                              NVC0_IB_ENTRY_1_NO_PREFETCH | 4);
+      }
       if (pushes == 1) {
          nouveau_pushbuf_data(push,
                               buf->bo, offset,