nvc0: serialize before updating some constant buffer bindings on Maxwell+
authorRhys Perry <pendingchaos02@gmail.com>
Sun, 15 Jul 2018 22:14:41 +0000 (23:14 +0100)
committerRhys Perry <pendingchaos02@gmail.com>
Mon, 30 Jul 2018 14:04:26 +0000 (15:04 +0100)
To avoid serializing, this has the user constant buffer always be 65536
bytes and enabled unless it's required that something else is used for
constant buffer 0.

Fixes artifacts with at least XCOM: Enemy Within, 0 A.D. and Unigine
Valley, Heaven and Superposition.

v2: changed uniform_buffer_bound to be bool instead of a uint32_t
v3: remove magic constants
v3: remove pointless code in nvc0_validate_driverconst

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100177
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c

index 11635c94658b01b08c3621e9a78e9cbe098c5431..4963493877b0caebc8f1870551d74f05a14fad70 100644 (file)
@@ -181,7 +181,7 @@ nvc0_compute_invalidate_constbufs(struct nvc0_context *nvc0)
    /* Invalidate all 3D constbufs because they are aliased with COMPUTE. */
    for (s = 0; s < 5; s++) {
       nvc0->constbuf_dirty[s] |= nvc0->constbuf_valid[s];
-      nvc0->state.uniform_buffer_bound[s] = 0;
+      nvc0->state.uniform_buffer_bound[s] = false;
    }
    nvc0->dirty_3d |= NVC0_NEW_3D_CONSTBUF;
 }
@@ -203,19 +203,18 @@ nvc0_compute_validate_constbufs(struct nvc0_context *nvc0)
          assert(i == 0); /* we really only want OpenGL uniforms here */
          assert(nvc0->constbuf[s][0].u.data);
 
-         if (nvc0->state.uniform_buffer_bound[s] < size) {
-            nvc0->state.uniform_buffer_bound[s] = align(size, 0x100);
+         if (!nvc0->state.uniform_buffer_bound[s]) {
+            nvc0->state.uniform_buffer_bound[s] = true;
 
             BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
-            PUSH_DATA (push, nvc0->state.uniform_buffer_bound[s]);
+            PUSH_DATA (push, NVC0_MAX_CONSTBUF_SIZE);
             PUSH_DATAh(push, bo->offset + base);
             PUSH_DATA (push, bo->offset + base);
             BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
             PUSH_DATA (push, (0 << 8) | 1);
          }
          nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
-                         base, nvc0->state.uniform_buffer_bound[s],
-                         0, (size + 3) / 4,
+                         base, NVC0_MAX_CONSTBUF_SIZE, 0, (size + 3) / 4,
                          nvc0->constbuf[s][0].u.data);
       } else {
          struct nv04_resource *res =
@@ -236,7 +235,7 @@ nvc0_compute_validate_constbufs(struct nvc0_context *nvc0)
             PUSH_DATA (push, (i << 8) | 0);
          }
          if (i == 0)
-            nvc0->state.uniform_buffer_bound[s] = 0;
+            nvc0->state.uniform_buffer_bound[s] = false;
       }
    }
 
index 37e10dcd07f9835619e9234339606b36927a2b90..bcd30b560bc27134cda15d2fa4b64232a1a2c72c 100644 (file)
@@ -382,7 +382,7 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen,
    case PIPE_SHADER_CAP_MAX_OUTPUTS:
       return 32;
    case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
-      return 65536;
+      return NVC0_MAX_CONSTBUF_SIZE;
    case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
       return NVC0_MAX_PIPE_CONSTBUFS;
    case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
@@ -829,6 +829,40 @@ nvc0_screen_resize_text_area(struct nvc0_screen *screen, uint64_t size)
    return 0;
 }
 
+void
+nvc0_screen_bind_cb_3d(struct nvc0_screen *screen, bool *can_serialize,
+                       int stage, int index, int size, uint64_t addr)
+{
+   assert(stage != 5);
+
+   struct nouveau_pushbuf *push = screen->base.pushbuf;
+
+   if (screen->base.class_3d >= GM107_3D_CLASS) {
+      struct nvc0_cb_binding *binding = &screen->cb_bindings[stage][index];
+
+      // TODO: Better figure out the conditions in which this is needed
+      bool serialize = binding->addr == addr && binding->size != size;
+      if (can_serialize)
+         serialize = serialize && *can_serialize;
+      if (serialize) {
+         IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
+         if (can_serialize)
+            *can_serialize = false;
+      }
+
+      binding->addr = addr;
+      binding->size = size;
+   }
+
+   if (size >= 0) {
+      BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+      PUSH_DATA (push, size);
+      PUSH_DATAh(push, addr);
+      PUSH_DATA (push, addr);
+   }
+   IMMED_NVC0(push, NVC0_3D(CB_BIND(stage)), (index << 4) | (size >= 0));
+}
+
 #define FAIL_SCREEN_INIT(str, err)                    \
    do {                                               \
       NOUVEAU_ERR(str, err);                          \
@@ -1272,14 +1306,14 @@ nvc0_screen_create(struct nouveau_device *dev)
 
    /* XXX: Compute and 3D are somehow aliased on Fermi. */
    for (i = 0; i < 5; ++i) {
+      unsigned j = 0;
+      for (j = 0; j < 16; j++)
+         screen->cb_bindings[i][j].size = -1;
+
       /* TIC and TSC entries for each unit (nve4+ only) */
       /* auxiliary constants (6 user clip planes, base instance id) */
-      BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
-      PUSH_DATA (push, NVC0_CB_AUX_SIZE);
-      PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
-      PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
-      BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1);
-      PUSH_DATA (push, (15 << 4) | 1);
+      nvc0_screen_bind_cb_3d(screen, NULL, i, 15, NVC0_CB_AUX_SIZE,
+                             screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
       if (screen->eng3d->oclass >= NVE4_3D_CLASS) {
          unsigned j;
          BEGIN_1IC0(push, NVC0_3D(CB_POS), 9);
index efd62a8a4129f535cbcbb15e3d33e3e982eee025..d8223ba9d542a03e404112832597f9b0dc85eba7 100644 (file)
@@ -16,7 +16,9 @@
 #define NVE4_IMG_MAX_HANDLES 512
 
 /* doesn't count driver-reserved slot */
-#define NVC0_MAX_PIPE_CONSTBUFS         15
+#define NVC0_MAX_PIPE_CONSTBUFS 15
+#define NVC0_MAX_CONST_BUFFERS  16
+#define NVC0_MAX_CONSTBUF_SIZE  65536
 
 #define NVC0_MAX_SURFACE_SLOTS 16
 
@@ -53,12 +55,17 @@ struct nvc0_graph_state {
    uint8_t tls_required; /* bitmask of shader types using l[] */
    uint8_t clip_enable;
    uint32_t clip_mode;
-   uint32_t uniform_buffer_bound[6];
+   bool uniform_buffer_bound[6];
    struct nvc0_transform_feedback_state *tfb;
    bool seamless_cube_map;
    bool post_depth_coverage;
 };
 
+struct nvc0_cb_binding {
+   uint64_t addr;
+   int size;
+};
+
 struct nvc0_screen {
    struct nouveau_screen base;
 
@@ -114,6 +121,9 @@ struct nvc0_screen {
       bool mp_counters_enabled;
    } pm;
 
+   /* only maintained on Maxwell+ */
+   struct nvc0_cb_binding cb_bindings[5][NVC0_MAX_CONST_BUFFERS];
+
    struct nouveau_object *eng3d; /* sqrt(1/2)|kepler> + sqrt(1/2)|fermi> */
    struct nouveau_object *eng2d;
    struct nouveau_object *m2mf;
@@ -146,6 +156,9 @@ int nvc0_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *);
 
 int nvc0_screen_resize_text_area(struct nvc0_screen *, uint64_t);
 
+// 3D Only
+void nvc0_screen_bind_cb_3d(struct nvc0_screen *, bool *, int, int, int, uint64_t);
+
 static inline void
 nvc0_resource_fence(struct nv04_resource *res, uint32_t flags)
 {
index cc18f41c4bb5454ff59c67dac45880404ab801e5..4f004a4f70577907be66629a31c9afdbe1a5582e 100644 (file)
@@ -565,9 +565,10 @@ nvc0_validate_rasterizer(struct nvc0_context *nvc0)
 static void
 nvc0_constbufs_validate(struct nvc0_context *nvc0)
 {
-   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    unsigned s;
 
+   bool can_serialize = true;
+
    for (s = 0; s < 5; ++s) {
       while (nvc0->constbuf_dirty[s]) {
          int i = ffs(nvc0->constbuf_dirty[s]) - 1;
@@ -580,41 +581,34 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
             assert(i == 0); /* we really only want OpenGL uniforms here */
             assert(nvc0->constbuf[s][0].u.data);
 
-            if (nvc0->state.uniform_buffer_bound[s] < size) {
-               nvc0->state.uniform_buffer_bound[s] = align(size, 0x100);
+            if (!nvc0->state.uniform_buffer_bound[s]) {
+               nvc0->state.uniform_buffer_bound[s] = true;
 
-               BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
-               PUSH_DATA (push, nvc0->state.uniform_buffer_bound[s]);
-               PUSH_DATAh(push, bo->offset + base);
-               PUSH_DATA (push, bo->offset + base);
-               BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
-               PUSH_DATA (push, (0 << 4) | 1);
+               nvc0_screen_bind_cb_3d(nvc0->screen, &can_serialize, s, i,
+                                      NVC0_MAX_CONSTBUF_SIZE, bo->offset + base);
             }
             nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
-                         base, nvc0->state.uniform_buffer_bound[s],
+                         base, NVC0_MAX_CONSTBUF_SIZE,
                          0, (size + 3) / 4,
                          nvc0->constbuf[s][0].u.data);
          } else {
             struct nv04_resource *res =
                nv04_resource(nvc0->constbuf[s][i].u.buf);
             if (res) {
-               BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
-               PUSH_DATA (push, nvc0->constbuf[s][i].size);
-               PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset);
-               PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset);
-               BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
-               PUSH_DATA (push, (i << 4) | 1);
+               nvc0_screen_bind_cb_3d(nvc0->screen, &can_serialize, s, i,
+                                      nvc0->constbuf[s][i].size,
+                                      res->address + nvc0->constbuf[s][i].offset);
 
                BCTX_REFN(nvc0->bufctx_3d, 3D_CB(s, i), res, RD);
 
                nvc0->cb_dirty = 1; /* Force cache flush for UBO. */
                res->cb_bindings[s] |= 1 << i;
-            } else {
-               BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
-               PUSH_DATA (push, (i << 4) | 0);
+
+               if (i == 0)
+                  nvc0->state.uniform_buffer_bound[s] = false;
+            } else if (i != 0) {
+               nvc0_screen_bind_cb_3d(nvc0->screen, &can_serialize, s, i, -1, 0);
             }
-            if (i == 0)
-               nvc0->state.uniform_buffer_bound[s] = 0;
          }
       }
    }
@@ -623,7 +617,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
       /* Invalidate all COMPUTE constbufs because they are aliased with 3D. */
       nvc0->dirty_cp |= NVC0_NEW_CP_CONSTBUF;
       nvc0->constbuf_dirty[5] |= nvc0->constbuf_valid[5];
-      nvc0->state.uniform_buffer_bound[5] = 0;
+      nvc0->state.uniform_buffer_bound[5] = false;
    }
 }
 
@@ -710,18 +704,12 @@ nvc0_validate_min_samples(struct nvc0_context *nvc0)
 static void
 nvc0_validate_driverconst(struct nvc0_context *nvc0)
 {
-   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    struct nvc0_screen *screen = nvc0->screen;
    int i;
 
-   for (i = 0; i < 5; ++i) {
-      BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
-      PUSH_DATA (push, NVC0_CB_AUX_SIZE);
-      PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
-      PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
-      BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1);
-      PUSH_DATA (push, (15 << 4) | 1);
-   }
+   for (i = 0; i < 5; ++i)
+      nvc0_screen_bind_cb_3d(screen, NULL, i, 15, NVC0_CB_AUX_SIZE,
+                             screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
 
    nvc0->dirty_cp |= NVC0_NEW_CP_DRIVERCONST;
 }