freedreno: reduce resource dependency tracking overhead
[mesa.git] / src / gallium / drivers / freedreno / freedreno_batch_cache.c
index df11eab254c927aeb6a15d1a497a1e66ffcbc13f..a8b32d9bd08406bfbd6fcba54478c117876c38e6 100644 (file)
@@ -124,34 +124,64 @@ fd_bc_fini(struct fd_batch_cache *cache)
        _mesa_hash_table_destroy(cache->ht, NULL);
 }
 
-uint32_t
-fd_bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx)
+static void
+bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx, bool deferred)
 {
-       struct hash_entry *entry;
-       struct fd_batch *last_batch = NULL;
+       /* fd_batch_flush() (and fd_batch_add_dep() which calls it indirectly)
+        * can cause batches to be unref'd and freed under our feet, so grab
+        * a reference to all the batches we need up-front.
+        */
+       struct fd_batch *batches[ARRAY_SIZE(cache->batches)] = {0};
+       struct fd_batch *batch;
+       unsigned n = 0;
 
-       pipe_mutex_lock(ctx->screen->lock);
+       fd_context_lock(ctx);
 
-       hash_table_foreach(cache->ht, entry) {
-               struct fd_batch *batch = NULL;
-               fd_batch_reference_locked(&batch, (struct fd_batch *)entry->data);
+       foreach_batch(batch, cache, cache->batch_mask) {
                if (batch->ctx == ctx) {
-                       pipe_mutex_unlock(ctx->screen->lock);
-                       fd_batch_reference(&last_batch, batch);
-                       fd_batch_flush(batch, false);
-                       pipe_mutex_lock(ctx->screen->lock);
+                       fd_batch_reference_locked(&batches[n++], batch);
                }
-               fd_batch_reference_locked(&batch, NULL);
        }
 
-       pipe_mutex_unlock(ctx->screen->lock);
+       if (deferred) {
+               struct fd_batch *current_batch = fd_context_batch(ctx);
+
+               for (unsigned i = 0; i < n; i++) {
+                       if (batches[i] && (batches[i]->ctx == ctx) &&
+                                       (batches[i] != current_batch)) {
+                               fd_batch_add_dep(current_batch, batches[i]);
+                       }
+               }
+
+               fd_context_unlock(ctx);
+       } else {
+               fd_context_unlock(ctx);
 
-       if (last_batch) {
-               fd_batch_sync(last_batch);
-               fd_batch_reference(&last_batch, NULL);
+               for (unsigned i = 0; i < n; i++) {
+                       fd_batch_flush(batches[i], false, false);
+               }
        }
 
-       return ctx->last_fence;
+       for (unsigned i = 0; i < n; i++) {
+               fd_batch_reference(&batches[i], NULL);
+       }
+}
+
+void
+fd_bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx)
+{
+       bc_flush(cache, ctx, false);
+}
+
+/* deferred flush doesn't actually flush, but it marks every other
+ * batch associated with the context as dependent on the current
+ * batch.  So when the current batch gets flushed, all other batches
+ * that came before also get flushed.
+ */
+void
+fd_bc_flush_deferred(struct fd_batch_cache *cache, struct fd_context *ctx)
+{
+       bc_flush(cache, ctx, true);
 }
 
 void
@@ -160,18 +190,28 @@ fd_bc_invalidate_context(struct fd_context *ctx)
        struct fd_batch_cache *cache = &ctx->screen->batch_cache;
        struct fd_batch *batch;
 
-       pipe_mutex_lock(ctx->screen->lock);
+       mtx_lock(&ctx->screen->lock);
 
        foreach_batch(batch, cache, cache->batch_mask) {
                if (batch->ctx == ctx)
-                       fd_batch_reference_locked(&batch, NULL);
+                       fd_bc_invalidate_batch(batch, true);
        }
 
-       pipe_mutex_unlock(ctx->screen->lock);
+       mtx_unlock(&ctx->screen->lock);
 }
 
+/**
+ * Note that when batch is flushed, it needs to remain in the cache so
+ * that fd_bc_invalidate_resource() can work.. otherwise we can have
+ * the case where a rsc is destroyed while a batch still has a dangling
+ * reference to it.
+ *
+ * Note that the cmdstream (or, after the SUBMIT ioctl, the kernel)
+ * would have a reference to the underlying bo, so it is ok for the
+ * rsc to be destroyed before the batch.
+ */
 void
-fd_bc_invalidate_batch(struct fd_batch *batch, bool destroy)
+fd_bc_invalidate_batch(struct fd_batch *batch, bool remove)
 {
        if (!batch)
                return;
@@ -179,9 +219,9 @@ fd_bc_invalidate_batch(struct fd_batch *batch, bool destroy)
        struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;
        struct key *key = (struct key *)batch->key;
 
-       pipe_mutex_assert_locked(batch->ctx->screen->lock);
+       fd_context_assert_locked(batch->ctx);
 
-       if (destroy) {
+       if (remove) {
                cache->batches[batch->idx] = NULL;
                cache->batch_mask &= ~(1 << batch->idx);
        }
@@ -206,10 +246,10 @@ fd_bc_invalidate_batch(struct fd_batch *batch, bool destroy)
 void
 fd_bc_invalidate_resource(struct fd_resource *rsc, bool destroy)
 {
-       struct fd_screen *screen = fd_screen(rsc->base.b.screen);
+       struct fd_screen *screen = fd_screen(rsc->base.screen);
        struct fd_batch *batch;
 
-       pipe_mutex_lock(screen->lock);
+       mtx_lock(&screen->lock);
 
        if (destroy) {
                foreach_batch(batch, &screen->batch_cache, rsc->batch_mask) {
@@ -226,16 +266,16 @@ fd_bc_invalidate_resource(struct fd_resource *rsc, bool destroy)
 
        rsc->bc_batch_mask = 0;
 
-       pipe_mutex_unlock(screen->lock);
+       mtx_unlock(&screen->lock);
 }
 
 struct fd_batch *
-fd_bc_alloc_batch(struct fd_batch_cache *cache, struct fd_context *ctx)
+fd_bc_alloc_batch(struct fd_batch_cache *cache, struct fd_context *ctx, bool nondraw)
 {
        struct fd_batch *batch;
        uint32_t idx;
 
-       pipe_mutex_lock(ctx->screen->lock);
+       mtx_lock(&ctx->screen->lock);
 
        while ((idx = ffs(~cache->batch_mask)) == 0) {
 #if 0
@@ -265,10 +305,10 @@ fd_bc_alloc_batch(struct fd_batch_cache *cache, struct fd_context *ctx)
                /* we can drop lock temporarily here, since we hold a ref,
                 * flush_batch won't disappear under us.
                 */
-               pipe_mutex_unlock(ctx->screen->lock);
+               mtx_unlock(&ctx->screen->lock);
                DBG("%p: too many batches!  flush forced!", flush_batch);
-               fd_batch_flush(flush_batch, true);
-               pipe_mutex_lock(ctx->screen->lock);
+               fd_batch_flush(flush_batch, true, false);
+               mtx_lock(&ctx->screen->lock);
 
                /* While the resources get cleaned up automatically, the flush_batch
                 * doesn't get removed from the dependencies of other batches, so
@@ -293,7 +333,7 @@ fd_bc_alloc_batch(struct fd_batch_cache *cache, struct fd_context *ctx)
 
        idx--;              /* bit zero returns 1 for ffs() */
 
-       batch = fd_batch_create(ctx);
+       batch = fd_batch_create(ctx, nondraw);
        if (!batch)
                goto out;
 
@@ -305,7 +345,7 @@ fd_bc_alloc_batch(struct fd_batch_cache *cache, struct fd_context *ctx)
        cache->batches[idx] = batch;
 
 out:
-       pipe_mutex_unlock(ctx->screen->lock);
+       mtx_unlock(&ctx->screen->lock);
 
        return batch;
 }
@@ -325,7 +365,7 @@ batch_from_key(struct fd_batch_cache *cache, struct key *key,
                return batch;
        }
 
-       batch = fd_bc_alloc_batch(cache, ctx);
+       batch = fd_bc_alloc_batch(cache, ctx, false);
 #ifdef DEBUG
        DBG("%p: hash=0x%08x, %ux%u, %u layers, %u samples", batch, hash,
                        key->width, key->height, key->layers, key->samples);
@@ -340,7 +380,7 @@ batch_from_key(struct fd_batch_cache *cache, struct key *key,
        if (!batch)
                return NULL;
 
-       pipe_mutex_lock(ctx->screen->lock);
+       mtx_lock(&ctx->screen->lock);
 
        _mesa_hash_table_insert_pre_hashed(cache->ht, hash, key, batch);
        batch->key = key;
@@ -351,7 +391,7 @@ batch_from_key(struct fd_batch_cache *cache, struct key *key,
                rsc->bc_batch_mask = (1 << batch->idx);
        }
 
-       pipe_mutex_unlock(ctx->screen->lock);
+       mtx_unlock(&ctx->screen->lock);
 
        return batch;
 }
@@ -375,7 +415,7 @@ fd_batch_from_fb(struct fd_batch_cache *cache, struct fd_context *ctx,
        key->width = pfb->width;
        key->height = pfb->height;
        key->layers = pfb->layers;
-       key->samples = pfb->samples;
+       key->samples = util_framebuffer_get_num_samples(pfb);
        key->ctx = ctx;
 
        if (pfb->zsbuf)