From 3f48548a6f65fe90b97956c7be73268917c6f2f9 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sun, 5 Jun 2016 19:02:29 -0400 Subject: [PATCH] nv50: reinstate dedicated constbuf push path This was disabled due to occasionally incorrect behavior when trying to upload data. It later became apparent that nvc0 also had a similar but slightly different issue, which was resolved in commit e50c01d5. This takes the same logic as nvc0 and applies it to nv50 (which has somewhat different interfaces). Unfortunately I did not note down precisely what was broken with UBOs when removing the support from nv50, but I've tested a bunch of local traces, and none of them appear to regress. This should hopefully improve performance when UBOs are used, but this was not directly verified. Signed-off-by: Ilia Mirkin Reviewed-by: Samuel Pitoiset --- .../drivers/nouveau/nv50/nv50_context.c | 7 --- .../drivers/nouveau/nv50/nv50_context.h | 3 +- .../drivers/nouveau/nv50/nv50_shader_state.c | 1 + src/gallium/drivers/nouveau/nv50/nv50_state.c | 5 +- .../drivers/nouveau/nv50/nv50_transfer.c | 63 +++++++++++++------ 5 files changed, 50 insertions(+), 29 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c index 5af0e9b3a27..fc852d7068c 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c @@ -310,14 +310,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags) nv50->base.screen = &screen->base; nv50->base.copy_data = nv50_m2mf_copy_linear; nv50->base.push_data = nv50_sifc_linear_u8; - /* FIXME: Make it possible to use this again. The problem is that there is - * some clever logic in the card that allows for multiple renders to happen - * when there are only constbuf changes. However that relies on the - * constbuf updates happening to the right constbuf slots. Currently - * implementation just makes it go through a separate slot which doesn't - * properly update the right constbuf data. nv50->base.push_cb = nv50_cb_push; - */ nv50->screen = screen; pipe->screen = pscreen; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h index 2317fa2ccf8..d5de255b332 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h @@ -281,8 +281,7 @@ nv50_m2mf_copy_linear(struct nouveau_context *pipe, unsigned size); void nv50_cb_push(struct nouveau_context *nv, - struct nouveau_bo *bo, unsigned domain, - unsigned base, unsigned size, + struct nv04_resource *res, unsigned offset, unsigned words, const uint32_t *data); /* nv50_vbo.c */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c index f838d151161..23263945bdc 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c @@ -99,6 +99,7 @@ nv50_constbufs_validate(struct nv50_context *nv50) BCTX_REFN(nv50->bufctx_3d, 3D_CB(s, i), res, RD); nv50->cb_dirty = 1; /* Force cache flush for UBO. */ + res->cb_bindings[s] |= 1 << i; } else { BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1); PUSH_DATA (push, (i << 8) | p | 0); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c index 86e74d68b11..3231aba20c1 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c @@ -856,9 +856,10 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, if (nv50->constbuf[s][i].user) nv50->constbuf[s][i].u.buf = NULL; else - if (nv50->constbuf[s][i].u.buf) + if (nv50->constbuf[s][i].u.buf) { nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_CB(s, i)); - + nv04_resource(nv50->constbuf[s][i].u.buf)->cb_bindings[s] &= ~(1 << i); + } pipe_resource_reference(&nv50->constbuf[s][i].u.buf, res); nv50->constbuf[s][i].user = (cb && cb->user_buffer) ? true : false; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c index 86a8c159469..8209f1f1e82 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c @@ -371,32 +371,24 @@ nv50_miptree_transfer_unmap(struct pipe_context *pctx, FREE(tx); } -void -nv50_cb_push(struct nouveau_context *nv, - struct nouveau_bo *bo, unsigned domain, - unsigned base, unsigned size, - unsigned offset, unsigned words, const uint32_t *data) +static void +nv50_cb_bo_push(struct nouveau_context *nv, + struct nouveau_bo *bo, unsigned domain, + unsigned bufid, + unsigned offset, unsigned words, + const uint32_t *data) { struct nouveau_pushbuf *push = nv->pushbuf; - struct nouveau_bufctx *bctx = nv50_context(&nv->pipe)->bufctx; assert(!(offset & 3)); - size = align(size, 0x100); - - nouveau_bufctx_refn(bctx, 0, bo, NOUVEAU_BO_WR | domain); - nouveau_pushbuf_bufctx(push, bctx); - nouveau_pushbuf_validate(push); while (words) { unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN); - PUSH_SPACE(push, nr + 7); - BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); - PUSH_DATAh(push, bo->offset + base); - PUSH_DATA (push, bo->offset + base); - PUSH_DATA (push, (NV50_CB_TMP << 16) | (size & 0xffff)); + PUSH_SPACE(push, nr + 3); + PUSH_REFN (push, bo, NOUVEAU_BO_WR | domain); BEGIN_NV04(push, NV50_3D(CB_ADDR), 1); - PUSH_DATA (push, (offset << 6) | NV50_CB_TMP); + PUSH_DATA (push, (offset << 6) | bufid); BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nr); PUSH_DATAp(push, data, nr); @@ -404,6 +396,41 @@ nv50_cb_push(struct nouveau_context *nv, data += nr; offset += nr * 4; } +} - nouveau_bufctx_reset(bctx, 0); +void +nv50_cb_push(struct nouveau_context *nv, + struct nv04_resource *res, + unsigned offset, unsigned words, const uint32_t *data) +{ + struct nv50_context *nv50 = nv50_context(&nv->pipe); + struct nv50_constbuf *cb = NULL; + int s, bufid; + /* Go through all the constbuf binding points of this buffer and try to + * find one which contains the region to be updated. + */ + /* XXX compute? */ + for (s = 0; s < 3 && !cb; s++) { + uint16_t bindings = res->cb_bindings[s]; + while (bindings) { + int i = ffs(bindings) - 1; + uint32_t cb_offset = nv50->constbuf[s][i].offset; + + bindings &= ~(1 << i); + if (cb_offset <= offset && + cb_offset + nv50->constbuf[s][i].size >= offset + words * 4) { + cb = &nv50->constbuf[s][i]; + bufid = s * 16 + i; + break; + } + } + } + + if (cb) { + nv50_cb_bo_push(nv, res->bo, res->domain, + bufid, offset - cb->offset, words, data); + } else { + nv->push_data(nv, res->bo, res->offset + offset, res->domain, + words * 4, data); + } } -- 2.30.2