freedreno: add global_bindings state
authorRob Clark <robdclark@gmail.com>
Sun, 25 Feb 2018 21:11:06 +0000 (16:11 -0500)
committerRob Clark <robdclark@gmail.com>
Mon, 5 Mar 2018 13:05:33 +0000 (08:05 -0500)
Signed-off-by: Rob Clark <robdclark@gmail.com>
src/gallium/drivers/freedreno/a5xx/fd5_compute.c
src/gallium/drivers/freedreno/freedreno_context.h
src/gallium/drivers/freedreno/freedreno_draw.c
src/gallium/drivers/freedreno/freedreno_state.c

index b7f596f336a8e010fde9a5c5958f46f4a7f4ea57..9d3039c3805c65c04be9651eb4d26e56cb8514c2 100644 (file)
@@ -42,6 +42,18 @@ fd5_create_compute_state(struct pipe_context *pctx,
                const struct pipe_compute_state *cso)
 {
        struct fd_context *ctx = fd_context(pctx);
+
+       /* req_input_mem will only be non-zero for cl kernels (ie. clover).
+        * This isn't a perfect test because I guess it is possible (but
+        * uncommon) for none for the kernel parameters to be a global,
+        * but ctx->set_global_bindings() can't fail, so this is the next
+        * best place to fail if we need a newer version of kernel driver:
+        */
+       if ((cso->req_input_mem > 0) &&
+                       fd_device_version(ctx->dev) < FD_VERSION_BO_IOVA) {
+               return NULL;
+       }
+
        struct ir3_compiler *compiler = ctx->screen->compiler;
        struct fd5_compute_stateobj *so = CALLOC_STRUCT(fd5_compute_stateobj);
        so->shader = ir3_shader_create_compute(compiler, cso, &ctx->debug);
@@ -156,6 +168,7 @@ fd5_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info)
        struct ir3_shader_key key = {0};
        struct ir3_shader_variant *v;
        struct fd_ringbuffer *ring = ctx->batch->draw;
+       unsigned i, nglobal = 0;
 
        emit_setup(ctx);
 
@@ -167,6 +180,23 @@ fd5_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info)
        fd5_emit_cs_state(ctx, ring, v);
        ir3_emit_cs_consts(v, ring, ctx, info);
 
+       foreach_bit(i, ctx->global_bindings.enabled_mask)
+               nglobal++;
+
+       if (nglobal > 0) {
+               /* global resources don't otherwise get an OUT_RELOC(), since
+                * the raw ptr address is emitted ir ir3_emit_cs_consts().
+                * So to make the kernel aware that these buffers are referenced
+                * by the batch, emit dummy reloc's as part of a no-op packet
+                * payload:
+                */
+               OUT_PKT7(ring, CP_NOP, 2 * nglobal);
+               foreach_bit(i, ctx->global_bindings.enabled_mask) {
+                       struct pipe_resource *prsc = ctx->global_bindings.buf[i];
+                       OUT_RELOCW(ring, fd_resource(prsc)->bo, 0, 0, 0);
+               }
+       }
+
        const unsigned *local_size = info->block; // v->shader->nir->info->cs.local_size;
        const unsigned *num_groups = info->grid;
        /* for some reason, mesa/st doesn't set info->work_dim, so just assume 3: */
index 1653e8a3b85b4f76c6655ac39858fc5b57a76ccd..af564bd87609fab46eab96051cbfc88458cd65bd 100644 (file)
@@ -110,6 +110,12 @@ struct fd_streamout_stateobj {
        unsigned offsets[PIPE_MAX_SO_BUFFERS];
 };
 
+#define MAX_GLOBAL_BUFFERS 16
+struct fd_global_bindings_stateobj {
+       struct pipe_resource *buf[MAX_GLOBAL_BUFFERS];
+       uint32_t enabled_mask;
+};
+
 /* group together the vertex and vertexbuf state.. for ease of passing
  * around, and because various internal operations (gmem<->mem, etc)
  * need their own vertex state:
@@ -282,6 +288,7 @@ struct fd_context {
        struct fd_shaderbuf_stateobj shaderbuf[PIPE_SHADER_TYPES];
        struct fd_shaderimg_stateobj shaderimg[PIPE_SHADER_TYPES];
        struct fd_streamout_stateobj streamout;
+       struct fd_global_bindings_stateobj global_bindings;
        struct pipe_clip_state ucp;
 
        struct pipe_query *cond_query;
index 933481e742d0416c36c61845a3b7c9c32c9076ae..eb36a930751432a86827ad980c0e7bc2459c8255 100644 (file)
@@ -488,6 +488,12 @@ fd_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
        foreach_bit(i, ctx->tex[PIPE_SHADER_COMPUTE].valid_textures)
                resource_read(batch, ctx->tex[PIPE_SHADER_COMPUTE].textures[i]->texture);
 
+       /* For global buffers, we don't really know if read or written, so assume
+        * the worst:
+        */
+       foreach_bit(i, ctx->global_bindings.enabled_mask)
+               resource_written(batch, ctx->global_bindings.buf[i]);
+
        if (info->indirect)
                resource_read(batch, info->indirect);
 
index dd42aa02e7597a792a65c34b48086f2fd853235d..05717da95995e5cec6a50a2194e440c45d8ca234 100644 (file)
@@ -495,15 +495,53 @@ fd_set_compute_resources(struct pipe_context *pctx,
        // TODO
 }
 
+/* used by clover to bind global objects, returning the bo address
+ * via handles[n]
+ */
 static void
 fd_set_global_binding(struct pipe_context *pctx,
                unsigned first, unsigned count, struct pipe_resource **prscs,
                uint32_t **handles)
 {
-       /* TODO only used by clover.. seems to need us to return the actual
-        * gpuaddr of the buffer.. which isn't really exposed to mesa atm.
-        * How is this used?
-        */
+       struct fd_context *ctx = fd_context(pctx);
+       struct fd_global_bindings_stateobj *so = &ctx->global_bindings;
+       unsigned mask = 0;
+
+       if (prscs) {
+               for (unsigned i = 0; i < count; i++) {
+                       unsigned n = i + first;
+
+                       mask |= BIT(n);
+
+                       pipe_resource_reference(&so->buf[n], prscs[i]);
+
+                       if (so->buf[n]) {
+                               struct fd_resource *rsc = fd_resource(so->buf[n]);
+                               uint64_t iova = fd_bo_get_iova(rsc->bo);
+                               // TODO need to scream if iova > 32b or fix gallium API..
+                               *handles[i] += iova;
+                       }
+
+                       if (prscs[i])
+                               so->enabled_mask |= BIT(n);
+                       else
+                               so->enabled_mask &= ~BIT(n);
+               }
+       } else {
+               mask = (BIT(count) - 1) << first;
+
+               for (unsigned i = 0; i < count; i++) {
+                       unsigned n = i + first;
+                       if (so->buf[n]) {
+                               struct fd_resource *rsc = fd_resource(so->buf[n]);
+                               fd_bo_put_iova(rsc->bo);
+                       }
+                       pipe_resource_reference(&so->buf[n], NULL);
+               }
+
+               so->enabled_mask &= ~mask;
+       }
+
 }
 
 void