radeonsi/gfx10: allocate GDS BOs for streamout

author Marek Olšák <marek.olsak@amd.com>

Wed, 5 Jun 2019 02:08:41 +0000 (22:08 -0400)

committer Marek Olšák <marek.olsak@amd.com>

Wed, 3 Jul 2019 19:51:13 +0000 (15:51 -0400)
author Marek Olšák <marek.olsak@amd.com>
Wed, 5 Jun 2019 02:08:41 +0000 (22:08 -0400)
committer Marek Olšák <marek.olsak@amd.com>
Wed, 3 Jul 2019 19:51:13 +0000 (15:51 -0400)
diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c

index 2e7b42e056d4643a0c62d08cfa0dbc9c30a02596..777873fedd09c07eabc2364f0d165ff347326ede 100644 (file)
--- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
+++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
@@ -231,6 +231,8 @@ static void build_streamout(struct si_shader_context *ctx,
         unsigned scratch_offset_base = isgs ? 8 : 4;
         LLVMValueRef scratch_offset_basev = isgs ? i32_8 : i32_4;
  
+       ac_llvm_add_target_dep_function_attr(ctx->main_fn, "amdgpu-gds-size", 256);
+
         /* Determine the mapping of streamout buffers to vertex streams. */
         for (unsigned i = 0; i < so->num_outputs; ++i) {
                 unsigned buf = so->output[i].output_buffer;
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c

index 13ef470af3c1e9b2b699449dc4c346e1f9bdfb2f..3c323fbafdf751152f0c886299a8b51ab4f884a0 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -284,20 +284,43 @@ static void si_begin_gfx_cs_debug(struct si_context *ctx)
                               RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE);
  }
  
-void si_begin_new_gfx_cs(struct si_context *ctx)
+static void si_add_gds_to_buffer_list(struct si_context *sctx)
  {
-       if (ctx->is_debug)
-               si_begin_gfx_cs_debug(ctx);
-
-       if (ctx->gds) {
-               ctx->ws->cs_add_buffer(ctx->gfx_cs, ctx->gds,
+       if (sctx->gds) {
+               sctx->ws->cs_add_buffer(sctx->gfx_cs, sctx->gds,
                                        RADEON_USAGE_READWRITE, 0, 0);
-               if (ctx->gds_oa) {
-                       ctx->ws->cs_add_buffer(ctx->gfx_cs, ctx->gds_oa,
+               if (sctx->gds_oa) {
+                       sctx->ws->cs_add_buffer(sctx->gfx_cs, sctx->gds_oa,
                                                RADEON_USAGE_READWRITE, 0, 0);
                 }
         }
+}
+
+void si_allocate_gds(struct si_context *sctx)
+{
+       struct radeon_winsys *ws = sctx->ws;
+
+       if (sctx->gds)
+               return;
+
+       assert(sctx->chip_class >= GFX10); /* for gfx10 streamout */
+
+       /* 4 streamout GDS counters.
+        * We need 256B (64 dw) of GDS, otherwise streamout hangs.
+        */
+       sctx->gds = ws->buffer_create(ws, 256, 4, RADEON_DOMAIN_GDS, 0);
+       sctx->gds_oa = ws->buffer_create(ws, 4, 1, RADEON_DOMAIN_OA, 0);
+
+       assert(sctx->gds && sctx->gds_oa);
+       si_add_gds_to_buffer_list(sctx);
+}
+
+void si_begin_new_gfx_cs(struct si_context *ctx)
+{
+       if (ctx->is_debug)
+               si_begin_gfx_cs_debug(ctx);
  
+       si_add_gds_to_buffer_list(ctx);
  
         /* Always invalidate caches at the beginning of IBs, because external
          * users (e.g. BO evictions and SDMA/UVD/VCE IBs) can modify our
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h

index 874b1bf4cd0fab6b3532fafcaa95880318e27556..e3c9151e87ea9d59c673c2797dc1ea6b0b244c95 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1379,6 +1379,7 @@ void si_init_screen_get_functions(struct si_screen *sscreen);
  /* si_gfx_cs.c */
  void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,
                      struct pipe_fence_handle **fence);
+void si_allocate_gds(struct si_context *ctx);
  void si_begin_new_gfx_cs(struct si_context *ctx);
  void si_need_gfx_cs_space(struct si_context *ctx);
  void si_unref_sdma_uploads(struct si_context *sctx);
diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c b/src/gallium/drivers/radeonsi/si_state_streamout.c

index 1eb06b7430b4c4f25612563ed46cdb6b94e98c63..da8c5465488c0eee1863dadc4d07974e749ab609 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_state_streamout.c
+++ b/src/gallium/drivers/radeonsi/si_state_streamout.c
@@ -145,9 +145,13 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
         /* All readers of the streamout targets need to be finished before we can
          * start writing to the targets.
          */
-       if (num_targets)
+       if (num_targets) {
+               if (sctx->chip_class >= GFX10)
+                       si_allocate_gds(sctx);
+
                 sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
-                                SI_CONTEXT_CS_PARTIAL_FLUSH;
+                              SI_CONTEXT_CS_PARTIAL_FLUSH;
+       }
  
         /* Streamout buffers must be bound in 2 places:
          * 1) in VGT by setting the VGT_STRMOUT registers
author	Marek Olšák <marek.olsak@amd.com>
	Wed, 5 Jun 2019 02:08:41 +0000 (22:08 -0400)
committer	Marek Olšák <marek.olsak@amd.com>
	Wed, 3 Jul 2019 19:51:13 +0000 (15:51 -0400)
src/gallium/drivers/radeonsi/gfx10_shader_ngg.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_gfx_cs.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_pipe.h		patch \| blob \| history
src/gallium/drivers/radeonsi/si_state_streamout.c		patch \| blob \| history