winsys/amdgpu: add a parallel compute IB coupled with a gfx IB

author Marek Olšák <marek.olsak@amd.com>

Mon, 4 Feb 2019 22:48:04 +0000 (17:48 -0500)

committer Marek Olšák <marek.olsak@amd.com>

Thu, 16 May 2019 17:07:00 +0000 (13:07 -0400)
author Marek Olšák <marek.olsak@amd.com>
Mon, 4 Feb 2019 22:48:04 +0000 (17:48 -0500)
committer Marek Olšák <marek.olsak@amd.com>
Thu, 16 May 2019 17:07:00 +0000 (13:07 -0400)
diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c

index db7f9e47ce1b895b21283d09eb7a62a9330de143..2e8f943d204f12ffe0a8c52190ab5658ae42a98f 100644 (file)
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -397,6 +397,7 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
                 info->drm_minor >= 13;
         info->has_2d_tiling = true;
         info->has_read_registers_query = true;
+       info->has_scheduled_fence_dependency = info->drm_minor >= 28;
  
         info->num_render_backends = amdinfo->rb_pipes;
         /* The value returned by the kernel driver was wrong. */
@@ -470,6 +471,10 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
                 else
                         info->use_display_dcc_with_retile_blit = true;
         }
+
+       info->has_gds_ordered_append = info->chip_class >= GFX7 &&
+                                      info->drm_minor >= 29 &&
+                                      HAVE_LLVM >= 0x0800;
         return true;
  }
  
@@ -572,6 +577,8 @@ void ac_print_gpu_info(struct radeon_info *info)
         printf("    has_sparse_vm_mappings = %u\n", info->has_sparse_vm_mappings);
         printf("    has_2d_tiling = %u\n", info->has_2d_tiling);
         printf("    has_read_registers_query = %u\n", info->has_read_registers_query);
+       printf("    has_gds_ordered_append = %u\n", info->has_gds_ordered_append);
+       printf("    has_scheduled_fence_dependency = %u\n", info->has_scheduled_fence_dependency);
  
         printf("Shader core info:\n");
         printf("    max_shader_clock = %i\n", info->max_shader_clock);
diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h

index 11fb77eee874d237fe36db43ca1763d6bb722501..ddbd09ff0bb44ec32bbd26a5e35ad7239e1362bd 100644 (file)
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -119,6 +119,8 @@ struct radeon_info {
         bool                        has_sparse_vm_mappings;
         bool                        has_2d_tiling;
         bool                        has_read_registers_query;
+       bool                        has_gds_ordered_append;
+       bool                        has_scheduled_fence_dependency;
  
         /* Shader cores. */
         uint32_t                    r600_max_quad_pipes; /* wave size / 16 */
diff --git a/src/gallium/drivers/r600/r600_pipe_common.c b/src/gallium/drivers/r600/r600_pipe_common.c

index bce45ea045639522504687b30e47c79cb6af8604..664ca268b374b277a2600917db2a431edf237ef0 100644 (file)
--- a/src/gallium/drivers/r600/r600_pipe_common.c
+++ b/src/gallium/drivers/r600/r600_pipe_common.c
@@ -351,8 +351,8 @@ static void r600_add_fence_dependency(struct r600_common_context *rctx,
         struct radeon_winsys *ws = rctx->ws;
  
         if (rctx->dma.cs)
-               ws->cs_add_fence_dependency(rctx->dma.cs, fence);
-       ws->cs_add_fence_dependency(rctx->gfx.cs, fence);
+               ws->cs_add_fence_dependency(rctx->dma.cs, fence, 0);
+       ws->cs_add_fence_dependency(rctx->gfx.cs, fence, 0);
  }
  
  static void r600_fence_server_sync(struct pipe_context *ctx,
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h

index 0c71b59ae0426271259566fac2dd2c34ae1903de..770db2857d55032c0b5db5b071bb340ce1155802 100644 (file)
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -67,6 +67,16 @@ enum radeon_bo_flag { /* bitfield */
      RADEON_FLAG_32BIT =    (1 << 6),
  };
  
+enum radeon_dependency_flag {
+    /* Add the dependency to the parallel compute IB only. */
+    RADEON_DEPENDENCY_PARALLEL_COMPUTE_ONLY = 1 << 0,
+
+    /* Instead of waiting for a job to finish execution, the dependency will
+     * be signaled when the job starts execution.
+     */
+    RADEON_DEPENDENCY_START_FENCE = 1 << 1,
+};
+
  enum radeon_bo_usage { /* bitfield */
      RADEON_USAGE_READ = 2,
      RADEON_USAGE_WRITE = 4,
@@ -492,6 +502,23 @@ struct radeon_winsys {
                                         void *flush_ctx,
                                         bool stop_exec_on_failure);
  
+    /**
+     * Add a parallel compute IB to a gfx IB. It will share the buffer list
+     * and fence dependencies with the gfx IB. The gfx flush call will submit
+     * both IBs at the same time.
+     *
+     * The compute IB doesn't have an output fence, so the primary IB has
+     * to use a wait packet for synchronization.
+     *
+     * The returned IB is only a stream for writing packets to the new
+     * IB. Calling other winsys functions with it is not allowed, not even
+     * "cs_destroy". Use the gfx IB instead.
+     *
+     * \param cs              Gfx IB
+     */
+    struct radeon_cmdbuf *(*cs_add_parallel_compute_ib)(struct radeon_cmdbuf *cs,
+                                                        bool uses_gds_ordered_append);
+
      /**
       * Destroy a command stream.
       *
@@ -614,9 +641,12 @@ struct radeon_winsys {
      /**
       * Add a fence dependency to the CS, so that the CS will wait for
       * the fence before execution.
+     *
+     * \param dependency_flags  Bitmask of RADEON_DEPENDENCY_*
       */
      void (*cs_add_fence_dependency)(struct radeon_cmdbuf *cs,
-                                    struct pipe_fence_handle *fence);
+                                    struct pipe_fence_handle *fence,
+                                    unsigned dependency_flags);
  
      /**
       * Signal a syncobj when the CS finishes execution.
diff --git a/src/gallium/drivers/radeonsi/si_fence.c b/src/gallium/drivers/radeonsi/si_fence.c

index 638f318675b96190439260113ea4da3d9541dd5c..fdd10d8bdef00661cb5fa6ab8da84a414993bcd6 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_fence.c
+++ b/src/gallium/drivers/radeonsi/si_fence.c
@@ -177,8 +177,8 @@ static void si_add_fence_dependency(struct si_context *sctx,
         struct radeon_winsys *ws = sctx->ws;
  
         if (sctx->dma_cs)
-               ws->cs_add_fence_dependency(sctx->dma_cs, fence);
-       ws->cs_add_fence_dependency(sctx->gfx_cs, fence);
+               ws->cs_add_fence_dependency(sctx->dma_cs, fence, 0);
+       ws->cs_add_fence_dependency(sctx->gfx_cs, fence, 0);
  }
  
  static void si_add_syncobj_signal(struct si_context *sctx,
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c

index 5e45caa362b4f3196d2c049559e2449223a7738d..eb973bf8102262b9764dd4f821fde0e52c9c52ab 100644 (file)
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -35,6 +35,14 @@
  
  DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", false)
  
+#ifndef AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID
+#define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID  (1 << 4)
+#endif
+
+#ifndef AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES
+#define AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES 0x07
+#endif
+
  /* FENCES */
  
  static struct pipe_fence_handle *
@@ -717,6 +725,7 @@ static bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws, struct amdgpu_ib *ib,
  
  static unsigned amdgpu_ib_max_submit_dwords(enum ib_type ib_type)
  {
+   /* The maximum IB size including all chained IBs. */
     switch (ib_type) {
     case IB_MAIN:
        /* Smaller submits means the GPU gets busy sooner and there is less
@@ -724,6 +733,9 @@ static unsigned amdgpu_ib_max_submit_dwords(enum ib_type ib_type)
         *   http://www.phoronix.com/scan.php?page=article&item=mesa-111-si&num=1
         */
        return 20 * 1024;
+   case IB_PARALLEL_COMPUTE:
+      /* Always chain this IB. */
+      return UINT_MAX;
     default:
        unreachable("bad ib_type");
     }
@@ -739,12 +751,15 @@ static bool amdgpu_get_new_ib(struct radeon_winsys *ws, struct amdgpu_cs *cs,
      */
     struct amdgpu_ib *ib = NULL;
     struct drm_amdgpu_cs_chunk_ib *info = &cs->csc->ib[ib_type];
-   unsigned ib_size = 0;
+   /* This is the minimum size of a contiguous IB. */
+   unsigned ib_size = 4 * 1024 * 4;
  
     switch (ib_type) {
+   case IB_PARALLEL_COMPUTE:
+      ib = &cs->compute_ib;
+      break;
     case IB_MAIN:
        ib = &cs->main;
-      ib_size = 4 * 1024 * 4;
        break;
     default:
        unreachable("unhandled IB type");
@@ -866,6 +881,9 @@ static bool amdgpu_init_cs_context(struct amdgpu_winsys *ws,
        assert(0);
     }
  
+   cs->ib[IB_PARALLEL_COMPUTE].ip_type = AMDGPU_HW_IP_COMPUTE;
+   cs->ib[IB_PARALLEL_COMPUTE].flags = AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE;
+
     memset(cs->buffer_indices_hashlist, -1, sizeof(cs->buffer_indices_hashlist));
     cs->last_added_bo = NULL;
     return true;
@@ -897,6 +915,8 @@ static void amdgpu_cs_context_cleanup(struct amdgpu_cs_context *cs)
     cleanup_fence_list(&cs->fence_dependencies);
     cleanup_fence_list(&cs->syncobj_dependencies);
     cleanup_fence_list(&cs->syncobj_to_signal);
+   cleanup_fence_list(&cs->compute_fence_dependencies);
+   cleanup_fence_list(&cs->compute_start_fence_dependencies);
  
     cs->num_real_buffers = 0;
     cs->num_slab_buffers = 0;
@@ -916,6 +936,8 @@ static void amdgpu_destroy_cs_context(struct amdgpu_cs_context *cs)
     FREE(cs->fence_dependencies.list);
     FREE(cs->syncobj_dependencies.list);
     FREE(cs->syncobj_to_signal.list);
+   FREE(cs->compute_fence_dependencies.list);
+   FREE(cs->compute_start_fence_dependencies.list);
  }
  
  
@@ -949,6 +971,7 @@ amdgpu_cs_create(struct radeon_winsys_ctx *rwctx,
     amdgpu_cs_chunk_fence_info_to_data(&fence_info, (void*)&cs->fence_chunk);
  
     cs->main.ib_type = IB_MAIN;
+   cs->compute_ib.ib_type = IB_PARALLEL_COMPUTE;
  
     if (!amdgpu_init_cs_context(ctx->ws, &cs->csc1, ring_type)) {
        FREE(cs);
@@ -976,6 +999,33 @@ amdgpu_cs_create(struct radeon_winsys_ctx *rwctx,
     return &cs->main.base;
  }
  
+static struct radeon_cmdbuf *
+amdgpu_cs_add_parallel_compute_ib(struct radeon_cmdbuf *ib,
+                                  bool uses_gds_ordered_append)
+{
+   struct amdgpu_cs *cs = (struct amdgpu_cs*)ib;
+   struct amdgpu_winsys *ws = cs->ctx->ws;
+
+   if (cs->ring_type != RING_GFX)
+      return NULL;
+
+   /* only one secondary IB can be added */
+   if (cs->compute_ib.ib_mapped)
+      return NULL;
+
+   /* Allocate the compute IB. */
+   if (!amdgpu_get_new_ib(&ws->base, cs, IB_PARALLEL_COMPUTE))
+      return NULL;
+
+   if (uses_gds_ordered_append) {
+      cs->csc1.ib[IB_PARALLEL_COMPUTE].flags |=
+            AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID;
+      cs->csc2.ib[IB_PARALLEL_COMPUTE].flags |=
+            AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID;
+   }
+   return &cs->compute_ib.base;
+}
+
  static bool amdgpu_cs_validate(struct radeon_cmdbuf *rcs)
  {
     return true;
@@ -1104,6 +1154,11 @@ static void add_fence_to_list(struct amdgpu_fence_list *fences,
     amdgpu_fence_reference(&fences->list[idx], (struct pipe_fence_handle*)fence);
  }
  
+/* TODO: recognizing dependencies as no-ops doesn't take the parallel
+ * compute IB into account. The compute IB won't wait for these.
+ * Also, the scheduler can execute compute and SDMA IBs on any rings.
+ * Should we always insert dependencies?
+ */
  static bool is_noop_fence_dependency(struct amdgpu_cs *acs,
                                       struct amdgpu_fence *fence)
  {
@@ -1120,7 +1175,8 @@ static bool is_noop_fence_dependency(struct amdgpu_cs *acs,
  }
  
  static void amdgpu_cs_add_fence_dependency(struct radeon_cmdbuf *rws,
-                                           struct pipe_fence_handle *pfence)
+                                           struct pipe_fence_handle *pfence,
+                                           unsigned dependency_flags)
  {
     struct amdgpu_cs *acs = amdgpu_cs(rws);
     struct amdgpu_cs_context *cs = acs->csc;
@@ -1128,6 +1184,21 @@ static void amdgpu_cs_add_fence_dependency(struct radeon_cmdbuf *rws,
  
     util_queue_fence_wait(&fence->submitted);
  
+   if (dependency_flags & RADEON_DEPENDENCY_PARALLEL_COMPUTE_ONLY) {
+      /* Syncobjs are not needed here. */
+      assert(!amdgpu_fence_is_syncobj(fence));
+
+      if (acs->ctx->ws->info.has_scheduled_fence_dependency &&
+          dependency_flags & RADEON_DEPENDENCY_START_FENCE)
+         add_fence_to_list(&cs->compute_start_fence_dependencies, fence);
+      else
+         add_fence_to_list(&cs->compute_fence_dependencies, fence);
+      return;
+   }
+
+   /* Start fences are not needed here. */
+   assert(!(dependency_flags & RADEON_DEPENDENCY_START_FENCE));
+
     if (is_noop_fence_dependency(acs, fence))
        return;
  
@@ -1324,7 +1395,7 @@ void amdgpu_cs_submit_ib(void *job, int thread_index)
        }
  
        struct drm_amdgpu_bo_list_entry *list =
-         alloca(cs->num_real_buffers * sizeof(struct drm_amdgpu_bo_list_entry));
+         alloca((cs->num_real_buffers + 2) * sizeof(struct drm_amdgpu_bo_list_entry));
  
        unsigned num_handles = 0;
        for (i = 0; i < cs->num_real_buffers; ++i) {
@@ -1417,6 +1488,66 @@ void amdgpu_cs_submit_ib(void *job, int thread_index)
           num_chunks++;
        }
  
+      /* Submit the parallel compute IB first. */
+      if (cs->ib[IB_PARALLEL_COMPUTE].ib_bytes > 0) {
+         unsigned old_num_chunks = num_chunks;
+
+         /* Add compute fence dependencies. */
+         unsigned num_dependencies = cs->compute_fence_dependencies.num;
+         if (num_dependencies) {
+            struct drm_amdgpu_cs_chunk_dep *dep_chunk =
+               alloca(num_dependencies * sizeof(*dep_chunk));
+
+            for (unsigned i = 0; i < num_dependencies; i++) {
+               struct amdgpu_fence *fence =
+                  (struct amdgpu_fence*)cs->compute_fence_dependencies.list[i];
+
+               assert(util_queue_fence_is_signalled(&fence->submitted));
+               amdgpu_cs_chunk_fence_to_dep(&fence->fence, &dep_chunk[i]);
+            }
+
+            chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES;
+            chunks[num_chunks].length_dw = sizeof(dep_chunk[0]) / 4 * num_dependencies;
+            chunks[num_chunks].chunk_data = (uintptr_t)dep_chunk;
+            num_chunks++;
+         }
+
+         /* Add compute start fence dependencies. */
+         unsigned num_start_dependencies = cs->compute_start_fence_dependencies.num;
+         if (num_start_dependencies) {
+            struct drm_amdgpu_cs_chunk_dep *dep_chunk =
+               alloca(num_start_dependencies * sizeof(*dep_chunk));
+
+            for (unsigned i = 0; i < num_start_dependencies; i++) {
+               struct amdgpu_fence *fence =
+                  (struct amdgpu_fence*)cs->compute_start_fence_dependencies.list[i];
+
+               assert(util_queue_fence_is_signalled(&fence->submitted));
+               amdgpu_cs_chunk_fence_to_dep(&fence->fence, &dep_chunk[i]);
+            }
+
+            chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES;
+            chunks[num_chunks].length_dw = sizeof(dep_chunk[0]) / 4 * num_start_dependencies;
+            chunks[num_chunks].chunk_data = (uintptr_t)dep_chunk;
+            num_chunks++;
+         }
+
+         /* Convert from dwords to bytes. */
+         cs->ib[IB_PARALLEL_COMPUTE].ib_bytes *= 4;
+         chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_IB;
+         chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
+         chunks[num_chunks].chunk_data = (uintptr_t)&cs->ib[IB_PARALLEL_COMPUTE];
+         num_chunks++;
+
+         r = amdgpu_cs_submit_raw2(ws->dev, acs->ctx->ctx, bo_list,
+                                   num_chunks, chunks, NULL);
+         if (r)
+            goto finalize;
+
+         /* Back off the compute chunks. */
+         num_chunks = old_num_chunks;
+      }
+
        /* Syncobj signals. */
        unsigned num_syncobj_to_signal = cs->syncobj_to_signal.num;
        if (num_syncobj_to_signal) {
@@ -1458,6 +1589,7 @@ void amdgpu_cs_submit_ib(void *job, int thread_index)
        r = amdgpu_cs_submit_raw2(ws->dev, acs->ctx->ctx, bo_list,
                                  num_chunks, chunks, &seq_no);
     }
+finalize:
  
     if (r) {
        if (r == -ENOMEM)
@@ -1543,6 +1675,12 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs,
        }
        if (cs->ring_type == RING_GFX)
           ws->gfx_ib_size_counter += (rcs->prev_dw + rcs->current.cdw) * 4;
+
+      /* Also pad secondary IBs. */
+      if (cs->compute_ib.ib_mapped) {
+         while (cs->compute_ib.base.current.cdw & 7)
+            radeon_emit(&cs->compute_ib.base, 0xffff1000); /* type3 nop packet */
+      }
        break;
     case RING_UVD:
     case RING_UVD_ENC:
@@ -1578,6 +1716,9 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs,
        /* Set IB sizes. */
        amdgpu_ib_finalize(ws, &cs->main);
  
+      if (cs->compute_ib.ib_mapped)
+         amdgpu_ib_finalize(ws, &cs->compute_ib);
+
        /* Create a fence. */
        amdgpu_fence_reference(&cur->fence, NULL);
        if (cs->next_fence) {
@@ -1623,6 +1764,8 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs,
     }
  
     amdgpu_get_new_ib(&ws->base, cs, IB_MAIN);
+   if (cs->compute_ib.ib_mapped)
+      amdgpu_get_new_ib(&ws->base, cs, IB_PARALLEL_COMPUTE);
  
     cs->main.base.used_gart = 0;
     cs->main.base.used_vram = 0;
@@ -1644,6 +1787,8 @@ static void amdgpu_cs_destroy(struct radeon_cmdbuf *rcs)
     p_atomic_dec(&cs->ctx->ws->num_cs);
     pb_reference(&cs->main.big_ib_buffer, NULL);
     FREE(cs->main.base.prev);
+   pb_reference(&cs->compute_ib.big_ib_buffer, NULL);
+   FREE(cs->compute_ib.base.prev);
     amdgpu_destroy_cs_context(&cs->csc1);
     amdgpu_destroy_cs_context(&cs->csc2);
     amdgpu_fence_reference(&cs->next_fence, NULL);
@@ -1666,6 +1811,7 @@ void amdgpu_cs_init_functions(struct amdgpu_winsys *ws)
     ws->base.ctx_destroy = amdgpu_ctx_destroy;
     ws->base.ctx_query_reset_status = amdgpu_ctx_query_reset_status;
     ws->base.cs_create = amdgpu_cs_create;
+   ws->base.cs_add_parallel_compute_ib = amdgpu_cs_add_parallel_compute_ib;
     ws->base.cs_destroy = amdgpu_cs_destroy;
     ws->base.cs_add_buffer = amdgpu_cs_add_buffer;
     ws->base.cs_validate = amdgpu_cs_validate;
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h

index 4f49a9065c686c1f501f33b1175ad4708f5764bb..56788cdc27e3acde6041513c72a0a03dec13e921 100644 (file)
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
@@ -57,6 +57,7 @@ struct amdgpu_cs_buffer {
  
  enum ib_type {
     IB_MAIN,
+   IB_PARALLEL_COMPUTE,
     IB_NUM,
  };
  
@@ -113,6 +114,10 @@ struct amdgpu_cs_context {
     struct amdgpu_fence_list    syncobj_dependencies;
     struct amdgpu_fence_list    syncobj_to_signal;
  
+   /* The compute IB uses the dependencies above + these: */
+   struct amdgpu_fence_list    compute_fence_dependencies;
+   struct amdgpu_fence_list    compute_start_fence_dependencies;
+
     struct pipe_fence_handle    *fence;
  
     /* the error returned from cs_flush for non-async submissions */
@@ -121,6 +126,7 @@ struct amdgpu_cs_context {
  
  struct amdgpu_cs {
     struct amdgpu_ib main; /* must be first because this is inherited */
+   struct amdgpu_ib compute_ib; /* optional parallel compute IB */
     struct amdgpu_ctx *ctx;
     enum ring_type ring_type;
     struct drm_amdgpu_cs_chunk_fence fence_chunk;
@@ -220,6 +226,8 @@ amdgpu_cs_from_ib(struct amdgpu_ib *ib)
     switch (ib->ib_type) {
     case IB_MAIN:
        return get_container(ib, struct amdgpu_cs, main);
+   case IB_PARALLEL_COMPUTE:
+      return get_container(ib, struct amdgpu_cs, compute_ib);
     default:
        unreachable("bad ib_type");
     }
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c

index 431853668034ca609c67b027641b4a0d2f8e1d34..670d2f57de6e74eb26172162cb771ab4a14c4d78 100644 (file)
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -798,7 +798,8 @@ radeon_drm_cs_get_next_fence(struct radeon_cmdbuf *rcs)
  
  static void
  radeon_drm_cs_add_fence_dependency(struct radeon_cmdbuf *cs,
-                                   struct pipe_fence_handle *fence)
+                                   struct pipe_fence_handle *fence,
+                                   unsigned dependency_flags)
  {
     /* TODO: Handle the following unlikely multi-threaded scenario:
      *
author	Marek Olšák <marek.olsak@amd.com>
	Mon, 4 Feb 2019 22:48:04 +0000 (17:48 -0500)
committer	Marek Olšák <marek.olsak@amd.com>
	Thu, 16 May 2019 17:07:00 +0000 (13:07 -0400)
src/amd/common/ac_gpu_info.c		patch \| blob \| history
src/amd/common/ac_gpu_info.h		patch \| blob \| history
src/gallium/drivers/r600/r600_pipe_common.c		patch \| blob \| history
src/gallium/drivers/radeon/radeon_winsys.h		patch \| blob \| history
src/gallium/drivers/radeonsi/si_fence.c		patch \| blob \| history
src/gallium/winsys/amdgpu/drm/amdgpu_cs.c		patch \| blob \| history
src/gallium/winsys/amdgpu/drm/amdgpu_cs.h		patch \| blob \| history
src/gallium/winsys/radeon/drm/radeon_drm_cs.c		patch \| blob \| history