From f310e86a4200d8ecc74f9ae682386b6ecd1d09d2 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Sun, 15 Jul 2018 23:14:41 +0100 Subject: [PATCH] nvc0: serialize before updating some constant buffer bindings on Maxwell+ To avoid serializing, this has the user constant buffer always be 65536 bytes and enabled unless it's required that something else is used for constant buffer 0. Fixes artifacts with at least XCOM: Enemy Within, 0 A.D. and Unigine Valley, Heaven and Superposition. v2: changed uniform_buffer_bound to be bool instead of a uint32_t v3: remove magic constants v3: remove pointless code in nvc0_validate_driverconst Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100177 Signed-off-by: Rhys Perry Reviewed-by: Ilia Mirkin --- .../drivers/nouveau/nvc0/nvc0_compute.c | 13 +++-- .../drivers/nouveau/nvc0/nvc0_screen.c | 48 +++++++++++++++--- .../drivers/nouveau/nvc0/nvc0_screen.h | 17 ++++++- .../nouveau/nvc0/nvc0_state_validate.c | 50 +++++++------------ 4 files changed, 81 insertions(+), 47 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c index 11635c94658..4963493877b 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c @@ -181,7 +181,7 @@ nvc0_compute_invalidate_constbufs(struct nvc0_context *nvc0) /* Invalidate all 3D constbufs because they are aliased with COMPUTE. */ for (s = 0; s < 5; s++) { nvc0->constbuf_dirty[s] |= nvc0->constbuf_valid[s]; - nvc0->state.uniform_buffer_bound[s] = 0; + nvc0->state.uniform_buffer_bound[s] = false; } nvc0->dirty_3d |= NVC0_NEW_3D_CONSTBUF; } @@ -203,19 +203,18 @@ nvc0_compute_validate_constbufs(struct nvc0_context *nvc0) assert(i == 0); /* we really only want OpenGL uniforms here */ assert(nvc0->constbuf[s][0].u.data); - if (nvc0->state.uniform_buffer_bound[s] < size) { - nvc0->state.uniform_buffer_bound[s] = align(size, 0x100); + if (!nvc0->state.uniform_buffer_bound[s]) { + nvc0->state.uniform_buffer_bound[s] = true; BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); - PUSH_DATA (push, nvc0->state.uniform_buffer_bound[s]); + PUSH_DATA (push, NVC0_MAX_CONSTBUF_SIZE); PUSH_DATAh(push, bo->offset + base); PUSH_DATA (push, bo->offset + base); BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); PUSH_DATA (push, (0 << 8) | 1); } nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base), - base, nvc0->state.uniform_buffer_bound[s], - 0, (size + 3) / 4, + base, NVC0_MAX_CONSTBUF_SIZE, 0, (size + 3) / 4, nvc0->constbuf[s][0].u.data); } else { struct nv04_resource *res = @@ -236,7 +235,7 @@ nvc0_compute_validate_constbufs(struct nvc0_context *nvc0) PUSH_DATA (push, (i << 8) | 0); } if (i == 0) - nvc0->state.uniform_buffer_bound[s] = 0; + nvc0->state.uniform_buffer_bound[s] = false; } } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 37e10dcd07f..bcd30b560bc 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -382,7 +382,7 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, case PIPE_SHADER_CAP_MAX_OUTPUTS: return 32; case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: - return 65536; + return NVC0_MAX_CONSTBUF_SIZE; case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: return NVC0_MAX_PIPE_CONSTBUFS; case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: @@ -829,6 +829,40 @@ nvc0_screen_resize_text_area(struct nvc0_screen *screen, uint64_t size) return 0; } +void +nvc0_screen_bind_cb_3d(struct nvc0_screen *screen, bool *can_serialize, + int stage, int index, int size, uint64_t addr) +{ + assert(stage != 5); + + struct nouveau_pushbuf *push = screen->base.pushbuf; + + if (screen->base.class_3d >= GM107_3D_CLASS) { + struct nvc0_cb_binding *binding = &screen->cb_bindings[stage][index]; + + // TODO: Better figure out the conditions in which this is needed + bool serialize = binding->addr == addr && binding->size != size; + if (can_serialize) + serialize = serialize && *can_serialize; + if (serialize) { + IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0); + if (can_serialize) + *can_serialize = false; + } + + binding->addr = addr; + binding->size = size; + } + + if (size >= 0) { + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, size); + PUSH_DATAh(push, addr); + PUSH_DATA (push, addr); + } + IMMED_NVC0(push, NVC0_3D(CB_BIND(stage)), (index << 4) | (size >= 0)); +} + #define FAIL_SCREEN_INIT(str, err) \ do { \ NOUVEAU_ERR(str, err); \ @@ -1272,14 +1306,14 @@ nvc0_screen_create(struct nouveau_device *dev) /* XXX: Compute and 3D are somehow aliased on Fermi. */ for (i = 0; i < 5; ++i) { + unsigned j = 0; + for (j = 0; j < 16; j++) + screen->cb_bindings[i][j].size = -1; + /* TIC and TSC entries for each unit (nve4+ only) */ /* auxiliary constants (6 user clip planes, base instance id) */ - BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); - PUSH_DATA (push, NVC0_CB_AUX_SIZE); - PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i)); - PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i)); - BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1); - PUSH_DATA (push, (15 << 4) | 1); + nvc0_screen_bind_cb_3d(screen, NULL, i, 15, NVC0_CB_AUX_SIZE, + screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i)); if (screen->eng3d->oclass >= NVE4_3D_CLASS) { unsigned j; BEGIN_1IC0(push, NVC0_3D(CB_POS), 9); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h index efd62a8a412..d8223ba9d54 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h @@ -16,7 +16,9 @@ #define NVE4_IMG_MAX_HANDLES 512 /* doesn't count driver-reserved slot */ -#define NVC0_MAX_PIPE_CONSTBUFS 15 +#define NVC0_MAX_PIPE_CONSTBUFS 15 +#define NVC0_MAX_CONST_BUFFERS 16 +#define NVC0_MAX_CONSTBUF_SIZE 65536 #define NVC0_MAX_SURFACE_SLOTS 16 @@ -53,12 +55,17 @@ struct nvc0_graph_state { uint8_t tls_required; /* bitmask of shader types using l[] */ uint8_t clip_enable; uint32_t clip_mode; - uint32_t uniform_buffer_bound[6]; + bool uniform_buffer_bound[6]; struct nvc0_transform_feedback_state *tfb; bool seamless_cube_map; bool post_depth_coverage; }; +struct nvc0_cb_binding { + uint64_t addr; + int size; +}; + struct nvc0_screen { struct nouveau_screen base; @@ -114,6 +121,9 @@ struct nvc0_screen { bool mp_counters_enabled; } pm; + /* only maintained on Maxwell+ */ + struct nvc0_cb_binding cb_bindings[5][NVC0_MAX_CONST_BUFFERS]; + struct nouveau_object *eng3d; /* sqrt(1/2)|kepler> + sqrt(1/2)|fermi> */ struct nouveau_object *eng2d; struct nouveau_object *m2mf; @@ -146,6 +156,9 @@ int nvc0_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *); int nvc0_screen_resize_text_area(struct nvc0_screen *, uint64_t); +// 3D Only +void nvc0_screen_bind_cb_3d(struct nvc0_screen *, bool *, int, int, int, uint64_t); + static inline void nvc0_resource_fence(struct nv04_resource *res, uint32_t flags) { diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c index cc18f41c4bb..4f004a4f705 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c @@ -565,9 +565,10 @@ nvc0_validate_rasterizer(struct nvc0_context *nvc0) static void nvc0_constbufs_validate(struct nvc0_context *nvc0) { - struct nouveau_pushbuf *push = nvc0->base.pushbuf; unsigned s; + bool can_serialize = true; + for (s = 0; s < 5; ++s) { while (nvc0->constbuf_dirty[s]) { int i = ffs(nvc0->constbuf_dirty[s]) - 1; @@ -580,41 +581,34 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0) assert(i == 0); /* we really only want OpenGL uniforms here */ assert(nvc0->constbuf[s][0].u.data); - if (nvc0->state.uniform_buffer_bound[s] < size) { - nvc0->state.uniform_buffer_bound[s] = align(size, 0x100); + if (!nvc0->state.uniform_buffer_bound[s]) { + nvc0->state.uniform_buffer_bound[s] = true; - BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); - PUSH_DATA (push, nvc0->state.uniform_buffer_bound[s]); - PUSH_DATAh(push, bo->offset + base); - PUSH_DATA (push, bo->offset + base); - BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1); - PUSH_DATA (push, (0 << 4) | 1); + nvc0_screen_bind_cb_3d(nvc0->screen, &can_serialize, s, i, + NVC0_MAX_CONSTBUF_SIZE, bo->offset + base); } nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base), - base, nvc0->state.uniform_buffer_bound[s], + base, NVC0_MAX_CONSTBUF_SIZE, 0, (size + 3) / 4, nvc0->constbuf[s][0].u.data); } else { struct nv04_resource *res = nv04_resource(nvc0->constbuf[s][i].u.buf); if (res) { - BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); - PUSH_DATA (push, nvc0->constbuf[s][i].size); - PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset); - PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset); - BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1); - PUSH_DATA (push, (i << 4) | 1); + nvc0_screen_bind_cb_3d(nvc0->screen, &can_serialize, s, i, + nvc0->constbuf[s][i].size, + res->address + nvc0->constbuf[s][i].offset); BCTX_REFN(nvc0->bufctx_3d, 3D_CB(s, i), res, RD); nvc0->cb_dirty = 1; /* Force cache flush for UBO. */ res->cb_bindings[s] |= 1 << i; - } else { - BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1); - PUSH_DATA (push, (i << 4) | 0); + + if (i == 0) + nvc0->state.uniform_buffer_bound[s] = false; + } else if (i != 0) { + nvc0_screen_bind_cb_3d(nvc0->screen, &can_serialize, s, i, -1, 0); } - if (i == 0) - nvc0->state.uniform_buffer_bound[s] = 0; } } } @@ -623,7 +617,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0) /* Invalidate all COMPUTE constbufs because they are aliased with 3D. */ nvc0->dirty_cp |= NVC0_NEW_CP_CONSTBUF; nvc0->constbuf_dirty[5] |= nvc0->constbuf_valid[5]; - nvc0->state.uniform_buffer_bound[5] = 0; + nvc0->state.uniform_buffer_bound[5] = false; } } @@ -710,18 +704,12 @@ nvc0_validate_min_samples(struct nvc0_context *nvc0) static void nvc0_validate_driverconst(struct nvc0_context *nvc0) { - struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_screen *screen = nvc0->screen; int i; - for (i = 0; i < 5; ++i) { - BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); - PUSH_DATA (push, NVC0_CB_AUX_SIZE); - PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i)); - PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i)); - BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1); - PUSH_DATA (push, (15 << 4) | 1); - } + for (i = 0; i < 5; ++i) + nvc0_screen_bind_cb_3d(screen, NULL, i, 15, NVC0_CB_AUX_SIZE, + screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i)); nvc0->dirty_cp |= NVC0_NEW_CP_DRIVERCONST; } -- 2.30.2