From 26a9321d0ae819f2a49d73735e6aa7408ef5f629 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 25 Feb 2018 16:11:06 -0500 Subject: [PATCH] freedreno: add global_bindings state Signed-off-by: Rob Clark --- .../drivers/freedreno/a5xx/fd5_compute.c | 30 ++++++++++++ .../drivers/freedreno/freedreno_context.h | 7 +++ .../drivers/freedreno/freedreno_draw.c | 6 +++ .../drivers/freedreno/freedreno_state.c | 46 +++++++++++++++++-- 4 files changed, 85 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_compute.c b/src/gallium/drivers/freedreno/a5xx/fd5_compute.c index b7f596f336a..9d3039c3805 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_compute.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_compute.c @@ -42,6 +42,18 @@ fd5_create_compute_state(struct pipe_context *pctx, const struct pipe_compute_state *cso) { struct fd_context *ctx = fd_context(pctx); + + /* req_input_mem will only be non-zero for cl kernels (ie. clover). + * This isn't a perfect test because I guess it is possible (but + * uncommon) for none for the kernel parameters to be a global, + * but ctx->set_global_bindings() can't fail, so this is the next + * best place to fail if we need a newer version of kernel driver: + */ + if ((cso->req_input_mem > 0) && + fd_device_version(ctx->dev) < FD_VERSION_BO_IOVA) { + return NULL; + } + struct ir3_compiler *compiler = ctx->screen->compiler; struct fd5_compute_stateobj *so = CALLOC_STRUCT(fd5_compute_stateobj); so->shader = ir3_shader_create_compute(compiler, cso, &ctx->debug); @@ -156,6 +168,7 @@ fd5_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) struct ir3_shader_key key = {0}; struct ir3_shader_variant *v; struct fd_ringbuffer *ring = ctx->batch->draw; + unsigned i, nglobal = 0; emit_setup(ctx); @@ -167,6 +180,23 @@ fd5_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) fd5_emit_cs_state(ctx, ring, v); ir3_emit_cs_consts(v, ring, ctx, info); + foreach_bit(i, ctx->global_bindings.enabled_mask) + nglobal++; + + if (nglobal > 0) { + /* global resources don't otherwise get an OUT_RELOC(), since + * the raw ptr address is emitted ir ir3_emit_cs_consts(). + * So to make the kernel aware that these buffers are referenced + * by the batch, emit dummy reloc's as part of a no-op packet + * payload: + */ + OUT_PKT7(ring, CP_NOP, 2 * nglobal); + foreach_bit(i, ctx->global_bindings.enabled_mask) { + struct pipe_resource *prsc = ctx->global_bindings.buf[i]; + OUT_RELOCW(ring, fd_resource(prsc)->bo, 0, 0, 0); + } + } + const unsigned *local_size = info->block; // v->shader->nir->info->cs.local_size; const unsigned *num_groups = info->grid; /* for some reason, mesa/st doesn't set info->work_dim, so just assume 3: */ diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index 1653e8a3b85..af564bd8760 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -110,6 +110,12 @@ struct fd_streamout_stateobj { unsigned offsets[PIPE_MAX_SO_BUFFERS]; }; +#define MAX_GLOBAL_BUFFERS 16 +struct fd_global_bindings_stateobj { + struct pipe_resource *buf[MAX_GLOBAL_BUFFERS]; + uint32_t enabled_mask; +}; + /* group together the vertex and vertexbuf state.. for ease of passing * around, and because various internal operations (gmem<->mem, etc) * need their own vertex state: @@ -282,6 +288,7 @@ struct fd_context { struct fd_shaderbuf_stateobj shaderbuf[PIPE_SHADER_TYPES]; struct fd_shaderimg_stateobj shaderimg[PIPE_SHADER_TYPES]; struct fd_streamout_stateobj streamout; + struct fd_global_bindings_stateobj global_bindings; struct pipe_clip_state ucp; struct pipe_query *cond_query; diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c index 933481e742d..eb36a930751 100644 --- a/src/gallium/drivers/freedreno/freedreno_draw.c +++ b/src/gallium/drivers/freedreno/freedreno_draw.c @@ -488,6 +488,12 @@ fd_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info) foreach_bit(i, ctx->tex[PIPE_SHADER_COMPUTE].valid_textures) resource_read(batch, ctx->tex[PIPE_SHADER_COMPUTE].textures[i]->texture); + /* For global buffers, we don't really know if read or written, so assume + * the worst: + */ + foreach_bit(i, ctx->global_bindings.enabled_mask) + resource_written(batch, ctx->global_bindings.buf[i]); + if (info->indirect) resource_read(batch, info->indirect); diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c index dd42aa02e75..05717da9599 100644 --- a/src/gallium/drivers/freedreno/freedreno_state.c +++ b/src/gallium/drivers/freedreno/freedreno_state.c @@ -495,15 +495,53 @@ fd_set_compute_resources(struct pipe_context *pctx, // TODO } +/* used by clover to bind global objects, returning the bo address + * via handles[n] + */ static void fd_set_global_binding(struct pipe_context *pctx, unsigned first, unsigned count, struct pipe_resource **prscs, uint32_t **handles) { - /* TODO only used by clover.. seems to need us to return the actual - * gpuaddr of the buffer.. which isn't really exposed to mesa atm. - * How is this used? - */ + struct fd_context *ctx = fd_context(pctx); + struct fd_global_bindings_stateobj *so = &ctx->global_bindings; + unsigned mask = 0; + + if (prscs) { + for (unsigned i = 0; i < count; i++) { + unsigned n = i + first; + + mask |= BIT(n); + + pipe_resource_reference(&so->buf[n], prscs[i]); + + if (so->buf[n]) { + struct fd_resource *rsc = fd_resource(so->buf[n]); + uint64_t iova = fd_bo_get_iova(rsc->bo); + // TODO need to scream if iova > 32b or fix gallium API.. + *handles[i] += iova; + } + + if (prscs[i]) + so->enabled_mask |= BIT(n); + else + so->enabled_mask &= ~BIT(n); + } + } else { + mask = (BIT(count) - 1) << first; + + for (unsigned i = 0; i < count; i++) { + unsigned n = i + first; + if (so->buf[n]) { + struct fd_resource *rsc = fd_resource(so->buf[n]); + fd_bo_put_iova(rsc->bo); + } + pipe_resource_reference(&so->buf[n], NULL); + } + + so->enabled_mask &= ~mask; + } + } void -- 2.30.2