nvc0: add support for indirect drawing
authorChristoph Bumiller <e0425955@student.tuwien.ac.at>
Thu, 4 Apr 2013 13:28:13 +0000 (15:28 +0200)
committerIlia Mirkin <imirkin@alum.mit.edu>
Tue, 15 Jul 2014 21:57:45 +0000 (17:57 -0400)
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
docs/GL3.txt
docs/relnotes/10.3.html
src/gallium/drivers/nouveau/nouveau_screen.c
src/gallium/drivers/nouveau/nv50/nv50_3d.xml.h
src/gallium/drivers/nouveau/nv50/nv50_screen.c
src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c

index a2f438b409a6d5d73cb4e9700d773ebba2e21d6e..92694cd215e7dbbe98236b409bf0a1de79e23466 100644 (file)
@@ -98,7 +98,7 @@ GL 4.0:
 
   GLSL 4.0                                             not started
   GL_ARB_draw_buffers_blend                            DONE (i965, nv50, nvc0, r600, radeonsi, softpipe)
-  GL_ARB_draw_indirect                                 DONE (i965)
+  GL_ARB_draw_indirect                                 DONE (i965, nvc0)
   GL_ARB_gpu_shader5                                   started
   - 'precise' qualifier                                DONE
   - Dynamically uniform sampler array indices          started (Chris)
@@ -165,7 +165,7 @@ GL 4.3:
   GL_ARB_framebuffer_no_attachments                    not started
   GL_ARB_internalformat_query2                         not started
   GL_ARB_invalidate_subdata                            DONE (all drivers)
-  GL_ARB_multi_draw_indirect                           DONE (i965)
+  GL_ARB_multi_draw_indirect                           DONE (i965, nvc0)
   GL_ARB_program_interface_query                       not started
   GL_ARB_robust_buffer_access_behavior                 not started
   GL_ARB_shader_image_size                             not started
index 2e718fc8a7eef8c917bf1189945196a3e6127ed2..7d4f53318b708e90bd3a3b1cc325707fe8a739a1 100644 (file)
@@ -56,6 +56,7 @@ Note: some of the new features are only available with certain drivers.
 <li>GL_ARB_seamless_cubemap_per_texture on i965, llvmpipe, nvc0, r600, radeonsi, softpipe</li>
 <li>GL_ARB_fragment_layer_viewport on nv50, nvc0, llvmpipe, r600</li>
 <li>GL_AMD_vertex_shader_viewport_index on i965/gen7+, r600</li>
+<li>GL_ARB_(multi_)draw_indirect on nvc0</li>
 </ul>
 
 
index 9d71bf77dd4d029d551fe14f812cb7d4d4fd6142..517978d885884807e9e16699c4f2a0a78f7e7369 100644 (file)
@@ -194,12 +194,14 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
        screen->lowmem_bindings = PIPE_BIND_GLOBAL; /* gallium limit */
        screen->vidmem_bindings =
                PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL |
-               PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | PIPE_BIND_CURSOR |
+               PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT |
+               PIPE_BIND_CURSOR |
                PIPE_BIND_SAMPLER_VIEW |
                PIPE_BIND_SHADER_RESOURCE | PIPE_BIND_COMPUTE_RESOURCE |
                PIPE_BIND_GLOBAL;
        screen->sysmem_bindings =
-               PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_STREAM_OUTPUT;
+               PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_STREAM_OUTPUT |
+               PIPE_BIND_COMMAND_ARGS_BUFFER;
 
        memset(&mm_config, 0, sizeof(mm_config));
 
index cc3a382b06122a0905d166a0c3f9e8aaf66b77d4..7523fdc4580da001c5a03503d1cd685c8d888c0e 100644 (file)
@@ -479,7 +479,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #define NV50_3D_WATCHDOG_TIMER                                 0x00000de4
 
-#define NV50_3D_UNK0DE8                                                0x00000de8
+#define NV50_3D_PRIM_RESTART_WITH_DRAW_ARRAYS                  0x00000de8
 
 #define NV50_3D_UNK0DEC                                                0x00000dec
 
index 17d8fa3529f642c97a1ef7575f4698ed8140db8f..fd63819c397c36a93ae2fa251473b988657d9cf0 100644 (file)
@@ -437,6 +437,8 @@ nv50_screen_init_hwctx(struct nv50_screen *screen)
    PUSH_DATA (push, NV50_3D_MULTISAMPLE_MODE_MS1);
    BEGIN_NV04(push, NV50_3D(MULTISAMPLE_CTRL), 1);
    PUSH_DATA (push, 0);
+   BEGIN_NV04(push, NV50_3D(PRIM_RESTART_WITH_DRAW_ARRAYS), 1);
+   PUSH_DATA (push, 1);
    BEGIN_NV04(push, NV50_3D(LINE_LAST_PIXEL), 1);
    PUSH_DATA (push, 0);
    BEGIN_NV04(push, NV50_3D(BLEND_SEPARATE_ALPHA), 1);
index da9975b5eb15d9b66fef5e41d47affcf15431673..07e4519d794edd56b0fbed15a4bb802fc43c85e3 100644 (file)
@@ -223,3 +223,78 @@ locn_0a_ts:
 locn_0f_ts:
    exit maddr 0xbb
    send $r6
+
+/* NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT
+ *
+ * NOTE: Saves and restores VB_ELEMENT,INSTANCE_BASE.
+ *
+ * arg     = mode
+ * parm[0] = count
+ * parm[1] = instance_count
+ * parm[2] = start
+ * parm[3] = index_bias
+ * parm[4] = start_instance
+ */
+.section #mme9097_draw_elts_indirect
+   parm $r3 /* count */
+   parm $r2 /* instance_count */
+   parm $r4 maddr 0x5f7 /* INDEX_BATCH_FIRST, start */
+   parm $r4 send $r4 /* index_bias, send start */
+   braz $r2 #dei_end
+   parm $r5 /* start_instance */
+   read $r6 0x50d /* VB_ELEMENT_BASE */
+   read $r7 0x50e /* VB_INSTANCE_BASE */
+   maddr 0x150d /* VB_ELEMENT,INSTANCE_BASE */
+   send $r4
+   send $r5
+   mov $r4 0x1
+dei_again:
+   maddr 0x586 /* VERTEX_BEGIN_GL */
+   send $r1 /* mode */
+   maddr 0x5f8 /* INDEX_BATCH_COUNT */
+   send $r3 /* count */
+   mov $r2 (sub $r2 $r4)
+   maddrsend 0x585 /* VERTEX_END_GL */
+   branz $r2 #dei_again
+   mov $r1 (extrinsrt $r1 $r4 0 1 26) /* set INSTANCE_NEXT */
+   maddr 0x150d /* VB_ELEMENT,INSTANCE_BASE */
+   exit send $r6
+   send $r7
+dei_end:
+   exit
+   nop
+
+/* NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT:
+ *
+ * NOTE: Saves and restores VB_INSTANCE_BASE.
+ *
+ * arg     = mode
+ * parm[0] = count
+ * parm[1] = instance_count
+ * parm[2] = start
+ * parm[3] = start_instance
+ */
+.section #mme9097_draw_arrays_indirect
+   parm $r2 /* count */
+   parm $r3 /* instance_count */
+   parm $r4 maddr 0x35d /* VERTEX_BUFFER_FIRST, start */
+   parm $r4 send $r4 /* start_instance */
+   braz $r3 #dai_end
+   read $r6 0x50e /* VB_INSTANCE_BASE */
+   maddr 0x50e /* VB_INSTANCE_BASE */
+   mov $r5 0x1
+   send $r4
+dai_again:
+   maddr 0x586 /* VERTEX_BEGIN_GL */
+   send $r1 /* mode */
+   maddr 0x35e /* VERTEX_BUFFER_COUNT */
+   send $r2
+   mov $r3 (sub $r3 $r5)
+   maddrsend 0x585 /* VERTEX_END_GL */
+   branz $r3 #dai_again
+   mov $r1 (extrinsrt $r1 $r5 0 1 26) /* set INSTANCE_NEXT */
+   exit maddr 0x50e /* VB_INSTANCE_BASE to restore */
+   send $r6
+dai_end:
+   exit
+   nop
index fd81a2f130e359be4a63759b9959d4237fb064da..654bf931563568263ff185f17e415285955a9a66 100644 (file)
@@ -123,3 +123,59 @@ uint32_t mme9097_tep_select[] = {
        0x002ec0a1,
        0x00003041,
 };
+
+uint32_t mme9097_draw_elts_indirect[] = {
+       0x00000301,
+       0x00000201,
+       0x017dc451,
+/* 0x000c: dei_again */
+       0x00002431,
+       0x0004d007,
+/* 0x0017: dei_end */
+       0x00000501,
+       0x01434615,
+       0x01438715,
+       0x05434021,
+       0x00002041,
+       0x00002841,
+       0x00004411,
+       0x01618021,
+       0x00000841,
+       0x017e0021,
+       0x00001841,
+       0x00051210,
+       0x01614071,
+       0xfffe9017,
+       0xd0410912,
+       0x05434021,
+       0x000030c1,
+       0x00003841,
+       0x00000091,
+       0x00000011,
+};
+
+uint32_t mme9097_draw_arrays_indirect[] = {
+       0x00000201,
+       0x00000301,
+/* 0x0009: dai_again */
+       0x00d74451,
+       0x00002431,
+/* 0x0013: dai_end */
+       0x0003d807,
+       0x01438615,
+       0x01438021,
+       0x00004511,
+       0x00002041,
+       0x01618021,
+       0x00000841,
+       0x00d78021,
+       0x00001041,
+       0x00055b10,
+       0x01614071,
+       0xfffe9817,
+       0xd0414912,
+       0x014380a1,
+       0x00003041,
+       0x00000091,
+       0x00000011,
+};
index 9e3c56b4418018a6e50ffa66809ac420f5cb852b..94b447b088fe748b300473332619706a780d689d 100644 (file)
@@ -338,6 +338,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #define NVC0_3D_WATCHDOG_TIMER                                 0x00000de4
 
+#define NVC0_3D_PRIM_RESTART_WITH_DRAW_ARRAYS                  0x00000de8
+
 #define NVC0_3D_WINDOW_OFFSET_X                                        0x00000df8
 
 #define NVC0_3D_WINDOW_OFFSET_Y                                        0x00000dfc
@@ -1347,5 +1349,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #define NVC0_3D_MACRO_TEP_SELECT                               0x00003830
 
+#define NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT                     0x00003838
+
+#define NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT                   0x00003840
+
 
 #endif /* NVC0_3D_XML */
index dc9b14384c445ff3e5b47fb5f091a3ba7e62c67c..3f444a4f57d79fed48cc0303e7e50e025a9b565c 100644 (file)
@@ -158,6 +158,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
    case PIPE_CAP_START_INSTANCE:
    case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
+   case PIPE_CAP_DRAW_INDIRECT:
    case PIPE_CAP_USER_CONSTANT_BUFFERS:
    case PIPE_CAP_USER_INDEX_BUFFERS:
    case PIPE_CAP_USER_VERTEX_BUFFERS:
@@ -183,7 +184,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
    case PIPE_CAP_FAKE_SW_MSAA:
    case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
-   case PIPE_CAP_DRAW_INDIRECT:
       return 0;
    }
 
@@ -405,6 +405,8 @@ nvc0_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos,
 
    size /= 4;
 
+   assert((pos + size) <= 0x800);
+
    BEGIN_NVC0(push, SUBC_3D(NVC0_GRAPH_MACRO_ID), 2);
    PUSH_DATA (push, (m - 0x3800) / 8);
    PUSH_DATA (push, pos);
@@ -433,8 +435,6 @@ nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class)
    PUSH_DATA (push, (3 << 16) | 3);
    BEGIN_NVC0(push, SUBC_3D(0x1794), 1);
    PUSH_DATA (push, (2 << 16) | 2);
-   BEGIN_NVC0(push, SUBC_3D(0x0de8), 1);
-   PUSH_DATA (push, 1);
 
    if (obj_class < GM107_3D_CLASS) {
       BEGIN_NVC0(push, SUBC_3D(0x12ac), 1);
@@ -609,7 +609,8 @@ nvc0_screen_create(struct nouveau_device *dev)
    push->rsvd_kick = 5;
 
    screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER |
-      PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER;
+      PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER |
+      PIPE_BIND_COMMAND_ARGS_BUFFER;
    screen->base.sysmem_bindings |=
       PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER;
 
@@ -768,6 +769,8 @@ nvc0_screen_create(struct nouveau_device *dev)
    PUSH_DATA (push, 1);
    BEGIN_NVC0(push, NVC0_3D(LINE_LAST_PIXEL), 1);
    PUSH_DATA (push, 0);
+   BEGIN_NVC0(push, NVC0_3D(PRIM_RESTART_WITH_DRAW_ARRAYS), 1);
+   PUSH_DATA (push, 1);
    BEGIN_NVC0(push, NVC0_3D(BLEND_SEPARATE_ALPHA), 1);
    PUSH_DATA (push, 1);
    BEGIN_NVC0(push, NVC0_3D(BLEND_ENABLE_COMMON), 1);
@@ -954,6 +957,8 @@ nvc0_screen_create(struct nouveau_device *dev)
    MK_MACRO(NVC0_3D_MACRO_GP_SELECT, mme9097_gp_select);
    MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, mme9097_poly_mode_front);
    MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mme9097_poly_mode_back);
+   MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT, mme9097_draw_arrays_indirect);
+   MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect);
 
    BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1);
    PUSH_DATA (push, 1);
index c26b98fb5f03705a57b843d2da2154fc879b927e..f99d533af77bea17008a31854ce9751e2984462e 100644 (file)
@@ -573,6 +573,8 @@ nvc0_draw_arrays(struct nvc0_context *nvc0,
    unsigned prim;
 
    if (nvc0->state.index_bias) {
+      /* index_bias is implied 0 if !info->indexed (really ?) */
+      /* TODO: can we deactivate it for the VERTEX_BUFFER_FIRST command ? */
       PUSH_SPACE(push, 1);
       IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_BASE), 0);
       nvc0->state.index_bias = 0;
@@ -794,6 +796,61 @@ nvc0_draw_stream_output(struct nvc0_context *nvc0,
    }
 }
 
+static void
+nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
+{
+   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+   struct nv04_resource *buf = nv04_resource(info->indirect);
+   unsigned size;
+   const uint32_t offset = buf->offset + info->indirect_offset;
+
+   /* must make FIFO wait for engines idle before continuing to process */
+   if (buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr))
+      IMMED_NVC0(push, SUBC_3D(NV10_SUBCHAN_REF_CNT), 0);
+
+   PUSH_SPACE(push, 8);
+   if (info->indexed) {
+      assert(nvc0->idxbuf.buffer);
+      assert(nouveau_resource_mapped_by_gpu(nvc0->idxbuf.buffer));
+      size = 5 * 4;
+      BEGIN_1IC0(push, NVC0_3D(MACRO_DRAW_ELEMENTS_INDIRECT), 1 + size / 4);
+   } else {
+      if (nvc0->state.index_bias) {
+         /* index_bias is implied 0 if !info->indexed (really ?) */
+         IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_BASE), 0);
+         nvc0->state.index_bias = 0;
+      }
+      size = 4 * 4;
+      BEGIN_1IC0(push, NVC0_3D(MACRO_DRAW_ARRAYS_INDIRECT), 1 + size / 4);
+   }
+   PUSH_DATA(push, nvc0_prim_gl(info->mode));
+#define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8))
+   nouveau_pushbuf_space(push, 0, 0, 1);
+   nouveau_pushbuf_data(push,
+                        buf->bo, offset, NVC0_IB_ENTRY_1_NO_PREFETCH | size);
+}
+
+static INLINE void
+nvc0_update_prim_restart(struct nvc0_context *nvc0, boolean en, uint32_t index)
+{
+   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+
+   if (en != nvc0->state.prim_restart) {
+      if (en) {
+         BEGIN_NVC0(push, NVC0_3D(PRIM_RESTART_ENABLE), 2);
+         PUSH_DATA (push, 1);
+         PUSH_DATA (push, index);
+      } else {
+         IMMED_NVC0(push, NVC0_3D(PRIM_RESTART_ENABLE), 0);
+      }
+      nvc0->state.prim_restart = en;
+   } else
+   if (en) {
+      BEGIN_NVC0(push, NVC0_3D(PRIM_RESTART_INDEX), 1);
+      PUSH_DATA (push, index);
+   }
+}
+
 void
 nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
 {
@@ -885,42 +942,29 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
        nvc0->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
       nvc0->base.vbo_dirty = TRUE;
 
+   nvc0_update_prim_restart(nvc0, info->primitive_restart, info->restart_index);
+
    if (nvc0->base.vbo_dirty) {
       if (nvc0->screen->eng3d->oclass < GM107_3D_CLASS)
          IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0);
       nvc0->base.vbo_dirty = FALSE;
    }
 
+   if (unlikely(info->indirect)) {
+      nvc0_draw_indirect(nvc0, info);
+   } else
+   if (unlikely(info->count_from_stream_output)) {
+      nvc0_draw_stream_output(nvc0, info);
+   } else
    if (info->indexed) {
       boolean shorten = info->max_index <= 65535;
 
-      if (info->primitive_restart != nvc0->state.prim_restart) {
-         if (info->primitive_restart) {
-            BEGIN_NVC0(push, NVC0_3D(PRIM_RESTART_ENABLE), 2);
-            PUSH_DATA (push, 1);
-            PUSH_DATA (push, info->restart_index);
-
-            if (info->restart_index > 65535)
-               shorten = FALSE;
-         } else {
-            IMMED_NVC0(push, NVC0_3D(PRIM_RESTART_ENABLE), 0);
-         }
-         nvc0->state.prim_restart = info->primitive_restart;
-      } else
-      if (info->primitive_restart) {
-         BEGIN_NVC0(push, NVC0_3D(PRIM_RESTART_INDEX), 1);
-         PUSH_DATA (push, info->restart_index);
-
-         if (info->restart_index > 65535)
-            shorten = FALSE;
-      }
+      if (info->primitive_restart && info->restart_index > 65535)
+         shorten = FALSE;
 
       nvc0_draw_elements(nvc0, shorten,
                          info->mode, info->start, info->count,
                          info->instance_count, info->index_bias);
-   } else
-   if (unlikely(info->count_from_stream_output)) {
-      nvc0_draw_stream_output(nvc0, info);
    } else {
       nvc0_draw_arrays(nvc0,
                        info->mode, info->start, info->count,