From 12aa047c98e597a109b387e9b71cd87bff0dea0a Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Mon, 11 Jan 2016 21:22:58 +0100 Subject: [PATCH] nvc0: bind user uniforms for compute on Kepler Uniform buffer objects will be sticked to the driver constant buffer like buffers because the launch descriptor only allows 8 CBs. Input kernel parameters for OpenCL are still uploaded to screen->parm which is bound on c0, but this will be changed later with a new series. Signed-off-by: Samuel Pitoiset Reviewed-by: Ilia Mirkin --- .../drivers/nouveau/nvc0/nve4_compute.c | 63 ++++++++++++++++--- .../drivers/nouveau/nvc0/nve4_compute.h | 19 +----- 2 files changed, 55 insertions(+), 27 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c index ccf5aef5c64..5f340926971 100644 --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c @@ -308,6 +308,42 @@ nve4_compute_set_tex_handles(struct nvc0_context *nvc0) nvc0->samplers_dirty[s] = 0; } +static void +nve4_compute_validate_constbufs(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + const int s = 5; + + while (nvc0->constbuf_dirty[s]) { + int i = ffs(nvc0->constbuf_dirty[s]) - 1; + nvc0->constbuf_dirty[s] &= ~(1 << i); + + if (nvc0->constbuf[s][i].user) { + struct nouveau_bo *bo = nvc0->screen->uniform_bo; + const unsigned base = NVC0_CB_USR_INFO(s); + const unsigned size = nvc0->constbuf[s][0].size; + assert(i == 0); /* we really only want OpenGL uniforms here */ + assert(nvc0->constbuf[s][0].u.data); + + BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, bo->offset + base); + PUSH_DATA (push, bo->offset + base); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); + PUSH_DATA (push, size); + PUSH_DATA (push, 0x1); + BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (size / 4)); + PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); + PUSH_DATAp(push, nvc0->constbuf[s][0].u.data, size / 4); + } + else { + /* TODO: will be updated in the next commit */ + } + } + + BEGIN_NVC0(push, NVE4_CP(FLUSH), 1); + PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB); +} + static void nve4_compute_validate_buffers(struct nvc0_context *nvc0) { @@ -355,6 +391,7 @@ validate_list_cp[] = { { nve4_compute_validate_surfaces, NVC0_NEW_CP_SURFACES }, { nvc0_compute_validate_globals, NVC0_NEW_CP_GLOBALS }, { nve4_compute_validate_buffers, NVC0_NEW_CP_BUFFERS }, + { nve4_compute_validate_constbufs, NVC0_NEW_CP_CONSTBUF }, }; static bool @@ -372,7 +409,9 @@ nve4_state_validate_cp(struct nvc0_context *nvc0, uint32_t mask) } static void -nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input, +nve4_compute_upload_input(struct nvc0_context *nvc0, + struct nve4_cp_launch_desc *desc, + const void *input, const uint *block_layout, const uint *grid_layout) { @@ -393,6 +432,11 @@ nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input, BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (cp->parm_size / 4)); PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); PUSH_DATAp(push, input, cp->parm_size / 4); + + /* Bind user parameters coming from clover. */ + /* TODO: This should be harmonized with uniform_bo. */ + assert(!(desc->cb_mask & (1 << 0))); + nve4_cp_launch_desc_set_cb(desc, 0, screen->parm, 0, 1 << 12); } BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); PUSH_DATAh(push, address + NVC0_CB_AUX_GRID_INFO); @@ -429,7 +473,6 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0, { const struct nvc0_screen *screen = nvc0->screen; const struct nvc0_program *cp = nvc0->compprog; - unsigned i; nve4_cp_launch_desc_init_default(desc); @@ -451,12 +494,13 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0, desc->gpr_alloc = cp->num_gprs; desc->bar_alloc = cp->num_barriers; - for (i = 0; i < 7; ++i) { - const unsigned s = 5; - if (nvc0->constbuf[s][i].u.buf) - nve4_cp_launch_desc_set_ctx_cb(desc, i + 1, &nvc0->constbuf[s][i]); + // Only bind OpenGL uniforms and the driver constant buffer through the + // launch descriptor because UBOs are sticked to the driver cb to avoid the + // limitation of 8 CBs. + if (nvc0->constbuf[5][0].user) { + nve4_cp_launch_desc_set_cb(desc, 0, screen->uniform_bo, + NVC0_CB_USR_INFO(5), 1 << 16); } - nve4_cp_launch_desc_set_cb(desc, 0, screen->parm, 0, 1 << 12); nve4_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo, NVC0_CB_AUX_INFO(5), 1 << 10); } @@ -500,13 +544,14 @@ nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) nve4_compute_setup_launch_desc(nvc0, desc, info->pc, info->block, info->grid); + + nve4_compute_upload_input(nvc0, desc, info->input, info->block, info->grid); + #ifdef DEBUG if (debug_get_num_option("NV50_PROG_DEBUG", 0)) nve4_compute_dump_launch_desc(desc); #endif - nve4_compute_upload_input(nvc0, info->input, info->block, info->grid); - /* upload descriptor and flush */ #if 0 BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h index dcafbeda397..b98c65d4a09 100644 --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h @@ -56,7 +56,7 @@ static inline void nve4_cp_launch_desc_set_cb(struct nve4_cp_launch_desc *desc, unsigned index, struct nouveau_bo *bo, - uint32_t base, uint16_t size) + uint32_t base, uint32_t size) { uint64_t address = bo->offset + base; @@ -70,23 +70,6 @@ nve4_cp_launch_desc_set_cb(struct nve4_cp_launch_desc *desc, desc->cb_mask |= 1 << index; } -static inline void -nve4_cp_launch_desc_set_ctx_cb(struct nve4_cp_launch_desc *desc, - unsigned index, - const struct nvc0_constbuf *cb) -{ - assert(index < 8); - - if (!cb->u.buf) { - desc->cb_mask &= ~(1 << index); - } else { - const struct nv04_resource *buf = nv04_resource(cb->u.buf); - assert(!cb->user); - nve4_cp_launch_desc_set_cb(desc, index, - buf->bo, buf->offset + cb->offset, cb->size); - } -} - struct nve4_mp_trap_info { u32 lock; u32 pc; -- 2.30.2