freedreno: add global_bindings state

author Rob Clark <robdclark@gmail.com>

Sun, 25 Feb 2018 21:11:06 +0000 (16:11 -0500)

committer Rob Clark <robdclark@gmail.com>

Mon, 5 Mar 2018 13:05:33 +0000 (08:05 -0500)
author Rob Clark <robdclark@gmail.com>
Sun, 25 Feb 2018 21:11:06 +0000 (16:11 -0500)
committer Rob Clark <robdclark@gmail.com>
Mon, 5 Mar 2018 13:05:33 +0000 (08:05 -0500)
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_compute.c b/src/gallium/drivers/freedreno/a5xx/fd5_compute.c

index b7f596f336a8e010fde9a5c5958f46f4a7f4ea57..9d3039c3805c65c04be9651eb4d26e56cb8514c2 100644 (file)
--- a/src/gallium/drivers/freedreno/a5xx/fd5_compute.c
+++ b/src/gallium/drivers/freedreno/a5xx/fd5_compute.c
@@ -42,6 +42,18 @@ fd5_create_compute_state(struct pipe_context *pctx,
                 const struct pipe_compute_state *cso)
  {
         struct fd_context *ctx = fd_context(pctx);
+
+       /* req_input_mem will only be non-zero for cl kernels (ie. clover).
+        * This isn't a perfect test because I guess it is possible (but
+        * uncommon) for none for the kernel parameters to be a global,
+        * but ctx->set_global_bindings() can't fail, so this is the next
+        * best place to fail if we need a newer version of kernel driver:
+        */
+       if ((cso->req_input_mem > 0) &&
+                       fd_device_version(ctx->dev) < FD_VERSION_BO_IOVA) {
+               return NULL;
+       }
+
         struct ir3_compiler *compiler = ctx->screen->compiler;
         struct fd5_compute_stateobj *so = CALLOC_STRUCT(fd5_compute_stateobj);
         so->shader = ir3_shader_create_compute(compiler, cso, &ctx->debug);
@@ -156,6 +168,7 @@ fd5_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info)
         struct ir3_shader_key key = {0};
         struct ir3_shader_variant *v;
         struct fd_ringbuffer *ring = ctx->batch->draw;
+       unsigned i, nglobal = 0;
  
         emit_setup(ctx);
  
@@ -167,6 +180,23 @@ fd5_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info)
         fd5_emit_cs_state(ctx, ring, v);
         ir3_emit_cs_consts(v, ring, ctx, info);
  
+       foreach_bit(i, ctx->global_bindings.enabled_mask)
+               nglobal++;
+
+       if (nglobal > 0) {
+               /* global resources don't otherwise get an OUT_RELOC(), since
+                * the raw ptr address is emitted ir ir3_emit_cs_consts().
+                * So to make the kernel aware that these buffers are referenced
+                * by the batch, emit dummy reloc's as part of a no-op packet
+                * payload:
+                */
+               OUT_PKT7(ring, CP_NOP, 2 * nglobal);
+               foreach_bit(i, ctx->global_bindings.enabled_mask) {
+                       struct pipe_resource *prsc = ctx->global_bindings.buf[i];
+                       OUT_RELOCW(ring, fd_resource(prsc)->bo, 0, 0, 0);
+               }
+       }
+
         const unsigned *local_size = info->block; // v->shader->nir->info->cs.local_size;
         const unsigned *num_groups = info->grid;
         /* for some reason, mesa/st doesn't set info->work_dim, so just assume 3: */
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h

index 1653e8a3b85b4f76c6655ac39858fc5b57a76ccd..af564bd87609fab46eab96051cbfc88458cd65bd 100644 (file)
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -110,6 +110,12 @@ struct fd_streamout_stateobj {
         unsigned offsets[PIPE_MAX_SO_BUFFERS];
  };
  
+#define MAX_GLOBAL_BUFFERS 16
+struct fd_global_bindings_stateobj {
+       struct pipe_resource *buf[MAX_GLOBAL_BUFFERS];
+       uint32_t enabled_mask;
+};
+
  /* group together the vertex and vertexbuf state.. for ease of passing
   * around, and because various internal operations (gmem<->mem, etc)
   * need their own vertex state:
@@ -282,6 +288,7 @@ struct fd_context {
         struct fd_shaderbuf_stateobj shaderbuf[PIPE_SHADER_TYPES];
         struct fd_shaderimg_stateobj shaderimg[PIPE_SHADER_TYPES];
         struct fd_streamout_stateobj streamout;
+       struct fd_global_bindings_stateobj global_bindings;
         struct pipe_clip_state ucp;
  
         struct pipe_query *cond_query;
diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c

index 933481e742d0416c36c61845a3b7c9c32c9076ae..eb36a930751432a86827ad980c0e7bc2459c8255 100644 (file)
--- a/src/gallium/drivers/freedreno/freedreno_draw.c
+++ b/src/gallium/drivers/freedreno/freedreno_draw.c
@@ -488,6 +488,12 @@ fd_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
         foreach_bit(i, ctx->tex[PIPE_SHADER_COMPUTE].valid_textures)
                 resource_read(batch, ctx->tex[PIPE_SHADER_COMPUTE].textures[i]->texture);
  
+       /* For global buffers, we don't really know if read or written, so assume
+        * the worst:
+        */
+       foreach_bit(i, ctx->global_bindings.enabled_mask)
+               resource_written(batch, ctx->global_bindings.buf[i]);
+
         if (info->indirect)
                 resource_read(batch, info->indirect);
  
diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c

index dd42aa02e7597a792a65c34b48086f2fd853235d..05717da95995e5cec6a50a2194e440c45d8ca234 100644 (file)
--- a/src/gallium/drivers/freedreno/freedreno_state.c
+++ b/src/gallium/drivers/freedreno/freedreno_state.c
@@ -495,15 +495,53 @@ fd_set_compute_resources(struct pipe_context *pctx,
         // TODO
  }
  
+/* used by clover to bind global objects, returning the bo address
+ * via handles[n]
+ */
  static void
  fd_set_global_binding(struct pipe_context *pctx,
                 unsigned first, unsigned count, struct pipe_resource **prscs,
                 uint32_t **handles)
  {
-       /* TODO only used by clover.. seems to need us to return the actual
-        * gpuaddr of the buffer.. which isn't really exposed to mesa atm.
-        * How is this used?
-        */
+       struct fd_context *ctx = fd_context(pctx);
+       struct fd_global_bindings_stateobj *so = &ctx->global_bindings;
+       unsigned mask = 0;
+
+       if (prscs) {
+               for (unsigned i = 0; i < count; i++) {
+                       unsigned n = i + first;
+
+                       mask |= BIT(n);
+
+                       pipe_resource_reference(&so->buf[n], prscs[i]);
+
+                       if (so->buf[n]) {
+                               struct fd_resource *rsc = fd_resource(so->buf[n]);
+                               uint64_t iova = fd_bo_get_iova(rsc->bo);
+                               // TODO need to scream if iova > 32b or fix gallium API..
+                               *handles[i] += iova;
+                       }
+
+                       if (prscs[i])
+                               so->enabled_mask |= BIT(n);
+                       else
+                               so->enabled_mask &= ~BIT(n);
+               }
+       } else {
+               mask = (BIT(count) - 1) << first;
+
+               for (unsigned i = 0; i < count; i++) {
+                       unsigned n = i + first;
+                       if (so->buf[n]) {
+                               struct fd_resource *rsc = fd_resource(so->buf[n]);
+                               fd_bo_put_iova(rsc->bo);
+                       }
+                       pipe_resource_reference(&so->buf[n], NULL);
+               }
+
+               so->enabled_mask &= ~mask;
+       }
+
  }
  
  void
author	Rob Clark <robdclark@gmail.com>
	Sun, 25 Feb 2018 21:11:06 +0000 (16:11 -0500)
committer	Rob Clark <robdclark@gmail.com>
	Mon, 5 Mar 2018 13:05:33 +0000 (08:05 -0500)
src/gallium/drivers/freedreno/a5xx/fd5_compute.c		patch \| blob \| history
src/gallium/drivers/freedreno/freedreno_context.h		patch \| blob \| history
src/gallium/drivers/freedreno/freedreno_draw.c		patch \| blob \| history
src/gallium/drivers/freedreno/freedreno_state.c		patch \| blob \| history