panfrost: Extend the panfrost_batch_add_bo() API to pass access flags

author Boris Brezillon <boris.brezillon@collabora.com>

Sun, 15 Sep 2019 07:21:13 +0000 (09:21 +0200)

committer Boris Brezillon <boris.brezillon@collabora.com>

Thu, 3 Oct 2019 20:55:38 +0000 (16:55 -0400)
author Boris Brezillon <boris.brezillon@collabora.com>
Sun, 15 Sep 2019 07:21:13 +0000 (09:21 +0200)
committer Boris Brezillon <boris.brezillon@collabora.com>
Thu, 3 Oct 2019 20:55:38 +0000 (16:55 -0400)
diff --git a/src/gallium/drivers/panfrost/pan_allocate.c b/src/gallium/drivers/panfrost/pan_allocate.c

index 3076c23ab1cc771050f44c63dd0cebb631dabf64..b16a1253ac2f5e5c2cd93fe4b28bc3e414b683ec 100644 (file)
--- a/src/gallium/drivers/panfrost/pan_allocate.c
+++ b/src/gallium/drivers/panfrost/pan_allocate.c
@@ -63,8 +63,18 @@ panfrost_allocate_transient(struct panfrost_batch *batch, size_t sz)
                  size_t bo_sz = sz < TRANSIENT_SLAB_SIZE ?
                                 TRANSIENT_SLAB_SIZE : ALIGN_POT(sz, 4096);
  
-                /* We can't reuse the current BO, but we can create a new one. */
-                bo = panfrost_batch_create_bo(batch, bo_sz, 0);
+                /* We can't reuse the current BO, but we can create a new one.
+                 * We don't know what the BO will be used for, so let's flag it
+                 * RW and attach it to both the fragment and vertex/tiler jobs.
+                 * TODO: if we want fine grained BO assignment we should pass
+                 * flags to this function and keep the read/write,
+                 * fragment/vertex+tiler pools separate.
+                 */
+                bo = panfrost_batch_create_bo(batch, bo_sz, 0,
+                                              PAN_BO_ACCESS_PRIVATE |
+                                              PAN_BO_ACCESS_RW |
+                                              PAN_BO_ACCESS_VERTEX_TILER |
+                                              PAN_BO_ACCESS_FRAGMENT);
  
                  if (sz < TRANSIENT_SLAB_SIZE) {
                          batch->transient_bo = bo;
diff --git a/src/gallium/drivers/panfrost/pan_blend_cso.c b/src/gallium/drivers/panfrost/pan_blend_cso.c

index 6bd6ff71cdc72cf961c9bc833ec6de27092a253b..48bd513ab6e5409db5c399ed89d6c26a101b65f8 100644 (file)
--- a/src/gallium/drivers/panfrost/pan_blend_cso.c
+++ b/src/gallium/drivers/panfrost/pan_blend_cso.c
@@ -273,7 +273,11 @@ panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti)
  
          /* Upload the shader */
          final.shader.bo = panfrost_batch_create_bo(batch, shader->size,
-                                                   PAN_BO_EXECUTE);
+                                                   PAN_BO_EXECUTE,
+                                                   PAN_BO_ACCESS_PRIVATE |
+                                                   PAN_BO_ACCESS_READ |
+                                                   PAN_BO_ACCESS_VERTEX_TILER |
+                                                   PAN_BO_ACCESS_FRAGMENT);
          memcpy(final.shader.bo->cpu, shader->buffer, shader->size);
  
          if (shader->patch_index) {
diff --git a/src/gallium/drivers/panfrost/pan_bo.h b/src/gallium/drivers/panfrost/pan_bo.h

index 33fbddff336946307edab19f4b07f074fd0bdf8d..73cc74a260d46a9dfe52482b8db889737ac00796 100644 (file)
--- a/src/gallium/drivers/panfrost/pan_bo.h
+++ b/src/gallium/drivers/panfrost/pan_bo.h
@@ -56,6 +56,24 @@ struct panfrost_screen;
   * let the BO logic know about this contraint. */
  #define PAN_BO_DONT_REUSE         (1 << 5)
  
+/* GPU access flags */
+
+/* BO is either shared (can be accessed by more than one GPU batch) or private
+ * (reserved by a specific GPU job). */
+#define PAN_BO_ACCESS_PRIVATE         (0 << 0)
+#define PAN_BO_ACCESS_SHARED          (1 << 0)
+
+/* BO is being read/written by the GPU */
+#define PAN_BO_ACCESS_READ            (1 << 1)
+#define PAN_BO_ACCESS_WRITE           (1 << 2)
+#define PAN_BO_ACCESS_RW              (PAN_BO_ACCESS_READ | PAN_BO_ACCESS_WRITE)
+
+/* BO is accessed by the vertex/tiler job. */
+#define PAN_BO_ACCESS_VERTEX_TILER    (1 << 3)
+
+/* BO is accessed by the fragment job. */
+#define PAN_BO_ACCESS_FRAGMENT        (1 << 4)
+
  struct panfrost_bo {
          /* Must be first for casting */
          struct list_head link;
@@ -78,6 +96,17 @@ struct panfrost_bo {
          uint32_t flags;
  };
  
+static inline uint32_t
+panfrost_bo_access_for_stage(enum pipe_shader_type stage)
+{
+        assert(stage == PIPE_SHADER_FRAGMENT ||
+               stage == PIPE_SHADER_VERTEX);
+
+        return stage == PIPE_SHADER_FRAGMENT ?
+               PAN_BO_ACCESS_FRAGMENT :
+               PAN_BO_ACCESS_VERTEX_TILER;
+}
+
  void
  panfrost_bo_reference(struct panfrost_bo *bo);
  void
diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c

index ba32564b6e013496d58e8f1b9b5cf20ecfa60def..b9945bdac870516e69098bcc478ab74265e21198 100644 (file)
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -583,6 +583,7 @@ panfrost_layout_for_texture(struct panfrost_resource *rsrc)
  static mali_ptr
  panfrost_upload_tex(
          struct panfrost_context *ctx,
+        enum pipe_shader_type st,
          struct panfrost_sampler_view *view)
  {
          if (!view)
@@ -610,7 +611,9 @@ panfrost_upload_tex(
  
          /* Add the BO to the job so it's retained until the job is done. */
          struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
-        panfrost_batch_add_bo(batch, rsrc->bo);
+        panfrost_batch_add_bo(batch, rsrc->bo,
+                              PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
+                              panfrost_bo_access_for_stage(st));
  
          /* Add the usage flags in, since they can change across the CSO
           * lifetime due to layout switches */
@@ -653,7 +656,7 @@ panfrost_upload_texture_descriptors(struct panfrost_context *ctx)
  
                          for (int i = 0; i < ctx->sampler_view_count[t]; ++i)
                                  trampolines[i] =
-                                        panfrost_upload_tex(ctx, ctx->sampler_views[t][i]);
+                                        panfrost_upload_tex(ctx, t, ctx->sampler_views[t][i]);
  
                          trampoline = panfrost_upload_transient(batch, trampolines, sizeof(uint64_t) * ctx->sampler_view_count[t]);
                  }
@@ -729,7 +732,9 @@ static void panfrost_upload_ssbo_sysval(
          struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
          struct panfrost_bo *bo = pan_resource(sb.buffer)->bo;
  
-        panfrost_batch_add_bo(batch, bo);
+        panfrost_batch_add_bo(batch, bo,
+                              PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_RW |
+                              panfrost_bo_access_for_stage(st));
  
          /* Upload address and size as sysval */
          uniform->du[0] = bo->gpu + sb.buffer_offset;
@@ -795,6 +800,7 @@ panfrost_map_constant_buffer_cpu(struct panfrost_constant_buffer *buf, unsigned
  static mali_ptr
  panfrost_map_constant_buffer_gpu(
          struct panfrost_context *ctx,
+        enum pipe_shader_type st,
          struct panfrost_constant_buffer *buf,
          unsigned index)
  {
@@ -803,7 +809,10 @@ panfrost_map_constant_buffer_gpu(
          struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
  
          if (rsrc) {
-                panfrost_batch_add_bo(batch, rsrc->bo);
+                panfrost_batch_add_bo(batch, rsrc->bo,
+                                      PAN_BO_ACCESS_SHARED |
+                                      PAN_BO_ACCESS_READ |
+                                      panfrost_bo_access_for_stage(st));
                  return rsrc->bo->gpu;
         } else if (cb->user_buffer) {
                  return panfrost_upload_transient(batch, cb->user_buffer, cb->buffer_size);
@@ -845,7 +854,10 @@ panfrost_patch_shader_state(
          struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
  
          /* Add the shader BO to the batch. */
-        panfrost_batch_add_bo(batch, ss->bo);
+        panfrost_batch_add_bo(batch, ss->bo,
+                              PAN_BO_ACCESS_PRIVATE |
+                              PAN_BO_ACCESS_READ |
+                             panfrost_bo_access_for_stage(stage));
  
          /* We can't reuse over frames; that's not safe. The descriptor must be
           * transient uploaded */
@@ -1175,7 +1187,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
                                  continue;
                          }
  
-                        mali_ptr gpu = panfrost_map_constant_buffer_gpu(ctx, buf, ubo);
+                        mali_ptr gpu = panfrost_map_constant_buffer_gpu(ctx, i, buf, ubo);
  
                          unsigned bytes_per_field = 16;
                          unsigned aligned = ALIGN_POT(usz, bytes_per_field);
@@ -1397,7 +1409,10 @@ panfrost_get_index_buffer_mapped(struct panfrost_context *ctx, const struct pipe
  
          if (!info->has_user_indices) {
                  /* Only resources can be directly mapped */
-                panfrost_batch_add_bo(batch, rsrc->bo);
+                panfrost_batch_add_bo(batch, rsrc->bo,
+                                      PAN_BO_ACCESS_SHARED |
+                                      PAN_BO_ACCESS_READ |
+                                      PAN_BO_ACCESS_VERTEX_TILER);
                  return rsrc->bo->gpu + offset;
          } else {
                  /* Otherwise, we need to upload to transient memory */
diff --git a/src/gallium/drivers/panfrost/pan_instancing.c b/src/gallium/drivers/panfrost/pan_instancing.c

index 806823f0da0d38e9f0d7d77e1a786770ee5470ec..473026cb8a0607ff6749698f3bddeaaa584bc728 100644 (file)
--- a/src/gallium/drivers/panfrost/pan_instancing.c
+++ b/src/gallium/drivers/panfrost/pan_instancing.c
@@ -298,7 +298,10 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch)
                  unsigned chopped_addr = raw_addr - addr;
  
                  /* Add a dependency of the batch on the vertex buffer */
-                panfrost_batch_add_bo(batch, rsrc->bo);
+                panfrost_batch_add_bo(batch, rsrc->bo,
+                                      PAN_BO_ACCESS_SHARED |
+                                      PAN_BO_ACCESS_READ |
+                                      PAN_BO_ACCESS_VERTEX_TILER);
  
                  /* Set common fields */
                  attrs[k].elements = addr;
diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c

index 69047ca62a6c66f507e9fb14c5bda324ae0a4658..8e2703ae168cb891a871ded4eac2e2de6c4eb0ea 100644 (file)
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -132,7 +132,8 @@ panfrost_get_batch_for_fbo(struct panfrost_context *ctx)
  }
  
  void
-panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo)
+panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo,
+                      uint32_t flags)
  {
          if (!bo)
                  return;
@@ -146,26 +147,30 @@ panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo)
  
  void panfrost_batch_add_fbo_bos(struct panfrost_batch *batch)
  {
+        uint32_t flags = PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_WRITE |
+                         PAN_BO_ACCESS_VERTEX_TILER |
+                         PAN_BO_ACCESS_FRAGMENT;
+
          for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
                  struct panfrost_resource *rsrc = pan_resource(batch->key.cbufs[i]->texture);
-                panfrost_batch_add_bo(batch, rsrc->bo);
+                panfrost_batch_add_bo(batch, rsrc->bo, flags);
          }
  
          if (batch->key.zsbuf) {
                  struct panfrost_resource *rsrc = pan_resource(batch->key.zsbuf->texture);
-                panfrost_batch_add_bo(batch, rsrc->bo);
+                panfrost_batch_add_bo(batch, rsrc->bo, flags);
          }
  }
  
  struct panfrost_bo *
  panfrost_batch_create_bo(struct panfrost_batch *batch, size_t size,
-                         uint32_t create_flags)
+                         uint32_t create_flags, uint32_t access_flags)
  {
          struct panfrost_bo *bo;
  
          bo = panfrost_bo_create(pan_screen(batch->ctx->base.screen), size,
                                  create_flags);
-        panfrost_batch_add_bo(batch, bo);
+        panfrost_batch_add_bo(batch, bo, access_flags);
  
          /* panfrost_batch_add_bo() has retained a reference and
           * panfrost_bo_create() initialize the refcnt to 1, so let's
@@ -189,7 +194,11 @@ panfrost_batch_get_polygon_list(struct panfrost_batch *batch, unsigned size)
                  /* Create the BO as invisible, as there's no reason to map */
  
                  batch->polygon_list = panfrost_batch_create_bo(batch, size,
-                                                              PAN_BO_INVISIBLE);
+                                                               PAN_BO_INVISIBLE,
+                                                               PAN_BO_ACCESS_PRIVATE |
+                                                               PAN_BO_ACCESS_RW |
+                                                               PAN_BO_ACCESS_VERTEX_TILER |
+                                                               PAN_BO_ACCESS_FRAGMENT);
          }
  
          return batch->polygon_list->gpu;
@@ -202,7 +211,11 @@ panfrost_batch_get_scratchpad(struct panfrost_batch *batch)
                  return batch->scratchpad;
  
          batch->scratchpad = panfrost_batch_create_bo(batch, 64 * 4 * 4096,
-                                                     PAN_BO_INVISIBLE);
+                                                     PAN_BO_INVISIBLE,
+                                                     PAN_BO_ACCESS_PRIVATE |
+                                                     PAN_BO_ACCESS_RW |
+                                                     PAN_BO_ACCESS_VERTEX_TILER |
+                                                     PAN_BO_ACCESS_FRAGMENT);
          assert(batch->scratchpad);
          return batch->scratchpad;
  }
@@ -215,7 +228,11 @@ panfrost_batch_get_tiler_heap(struct panfrost_batch *batch)
  
          batch->tiler_heap = panfrost_batch_create_bo(batch, 4096 * 4096,
                                                       PAN_BO_INVISIBLE |
-                                                     PAN_BO_GROWABLE);
+                                                     PAN_BO_GROWABLE,
+                                                     PAN_BO_ACCESS_PRIVATE |
+                                                     PAN_BO_ACCESS_RW |
+                                                     PAN_BO_ACCESS_VERTEX_TILER |
+                                                     PAN_BO_ACCESS_FRAGMENT);
          assert(batch->tiler_heap);
          return batch->tiler_heap;
  }
@@ -227,7 +244,11 @@ panfrost_batch_get_tiler_dummy(struct panfrost_batch *batch)
                  return batch->tiler_dummy;
  
          batch->tiler_dummy = panfrost_batch_create_bo(batch, 4096,
-                                                      PAN_BO_INVISIBLE);
+                                                      PAN_BO_INVISIBLE,
+                                                      PAN_BO_ACCESS_PRIVATE |
+                                                      PAN_BO_ACCESS_RW |
+                                                      PAN_BO_ACCESS_VERTEX_TILER |
+                                                      PAN_BO_ACCESS_FRAGMENT);
          assert(batch->tiler_dummy);
          return batch->tiler_dummy;
  }
diff --git a/src/gallium/drivers/panfrost/pan_job.h b/src/gallium/drivers/panfrost/pan_job.h

index e5e0e323f2fd0d1c975ebd71a27e0677e5a80c25..0b37a3131e86800a609852c610ba4fb1e4c86835 100644 (file)
--- a/src/gallium/drivers/panfrost/pan_job.h
+++ b/src/gallium/drivers/panfrost/pan_job.h
@@ -131,13 +131,14 @@ void
  panfrost_batch_init(struct panfrost_context *ctx);
  
  void
-panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo);
+panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo,
+                      uint32_t flags);
  
  void panfrost_batch_add_fbo_bos(struct panfrost_batch *batch);
  
  struct panfrost_bo *
  panfrost_batch_create_bo(struct panfrost_batch *batch, size_t size,
-                         uint32_t create_flags);
+                         uint32_t create_flags, uint32_t access_flags);
  
  void
  panfrost_batch_submit(struct panfrost_batch *batch);
diff --git a/src/gallium/drivers/panfrost/pan_varyings.c b/src/gallium/drivers/panfrost/pan_varyings.c

index 9dbd1e2ebacfe592a46a1fa25a1b4eaf8ec2c4db..8383c3efd283770a4dc17a9fc7ff2cd17e610f47 100644 (file)
--- a/src/gallium/drivers/panfrost/pan_varyings.c
+++ b/src/gallium/drivers/panfrost/pan_varyings.c
@@ -69,7 +69,15 @@ panfrost_emit_streamout(
          /* Grab the BO and bind it to the batch */
          struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
          struct panfrost_bo *bo = pan_resource(target->buffer)->bo;
-        panfrost_batch_add_bo(batch, bo);
+
+        /* Varyings are WRITE from the perspective of the VERTEX but READ from
+         * the perspective of the TILER and FRAGMENT.
+         */
+        panfrost_batch_add_bo(batch, bo,
+                              PAN_BO_ACCESS_SHARED |
+                              PAN_BO_ACCESS_RW |
+                              PAN_BO_ACCESS_VERTEX_TILER |
+                              PAN_BO_ACCESS_FRAGMENT);
  
          mali_ptr addr = bo->gpu + target->buffer_offset + (offset * slot->stride);
          slot->elements = addr;
author	Boris Brezillon <boris.brezillon@collabora.com>
	Sun, 15 Sep 2019 07:21:13 +0000 (09:21 +0200)
committer	Boris Brezillon <boris.brezillon@collabora.com>
	Thu, 3 Oct 2019 20:55:38 +0000 (16:55 -0400)
src/gallium/drivers/panfrost/pan_allocate.c		patch \| blob \| history
src/gallium/drivers/panfrost/pan_blend_cso.c		patch \| blob \| history
src/gallium/drivers/panfrost/pan_bo.h		patch \| blob \| history
src/gallium/drivers/panfrost/pan_context.c		patch \| blob \| history
src/gallium/drivers/panfrost/pan_instancing.c		patch \| blob \| history
src/gallium/drivers/panfrost/pan_job.c		patch \| blob \| history
src/gallium/drivers/panfrost/pan_job.h		patch \| blob \| history
src/gallium/drivers/panfrost/pan_varyings.c		patch \| blob \| history