src/gallium/drivers/freedreno/freedreno_batch_cache.c

   1 /*
   2  * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  *
  23  * Authors:
  24  *    Rob Clark <robclark@freedesktop.org>
  25  */
  26
  27 #include "util/hash_table.h"
  28 #include "util/set.h"
  29 #include "util/list.h"
  30 #include "util/u_string.h"
  31 #define XXH_INLINE_ALL
  32 #include "util/xxhash.h"
  33
  34 #include "freedreno_batch.h"
  35 #include "freedreno_batch_cache.h"
  36 #include "freedreno_context.h"
  37 #include "freedreno_resource.h"
  38
  39 /* Overview:
  40  *
  41  *   The batch cache provides lookup for mapping pipe_framebuffer_state
  42  *   to a batch.
  43  *
  44  *   It does this via hashtable, with key that roughly matches the
  45  *   pipe_framebuffer_state, as described below.
  46  *
  47  * Batch Cache hashtable key:
  48  *
  49  *   To serialize the key, and to avoid dealing with holding a reference to
  50  *   pipe_surface's (which hold a reference to pipe_resource and complicate
  51  *   the whole refcnting thing), the key is variable length and inline's the
  52  *   pertinent details of the pipe_surface.
  53  *
  54  * Batch:
  55  *
  56  *   Each batch needs to hold a reference to each resource it depends on (ie.
  57  *   anything that needs a mem2gmem).  And a weak reference to resources it
  58  *   renders to.  (If both src[n] and dst[n] are not NULL then they are the
  59  *   same.)
  60  *
  61  *   When a resource is destroyed, we need to remove entries in the batch
  62  *   cache that reference the resource, to avoid dangling pointer issues.
  63  *   So each resource holds a hashset of batches which have reference them
  64  *   in their hashtable key.
  65  *
  66  *   When a batch has weak reference to no more resources (ie. all the
  67  *   surfaces it rendered to are destroyed) the batch can be destroyed.
  68  *   Could happen in an app that renders and never uses the result.  More
  69  *   common scenario, I think, will be that some, but not all, of the
  70  *   surfaces are destroyed before the batch is submitted.
  71  *
  72  *   If (for example), batch writes to zsbuf but that surface is destroyed
  73  *   before batch is submitted, we can skip gmem2mem (but still need to
  74  *   alloc gmem space as before.  If the batch depended on previous contents
  75  *   of that surface, it would be holding a reference so the surface would
  76  *   not have been destroyed.
  77  */
  78
  79 struct key {
  80         uint32_t width, height, layers;
  81         uint16_t samples, num_surfs;
  82         struct fd_context *ctx;
  83         struct {
  84                 struct pipe_resource *texture;
  85                 union pipe_surface_desc u;
  86                 uint8_t pos, samples;
  87                 uint16_t format;
  88         } surf[0];
  89 };
  90
  91 static struct key *
  92 key_alloc(unsigned num_surfs)
  93 {
  94         struct key *key =
  95                 CALLOC_VARIANT_LENGTH_STRUCT(key, sizeof(key->surf[0]) * num_surfs);
  96         return key;
  97 }
  98
  99 static uint32_t
 100 key_hash(const void *_key)
 101 {
 102         const struct key *key = _key;
 103         uint32_t hash = 0;
 104         hash = XXH32(key, offsetof(struct key, surf[0]), hash);
 105         hash = XXH32(key->surf, sizeof(key->surf[0]) * key->num_surfs , hash);
 106         return hash;
 107 }
 108
 109 static bool
 110 key_equals(const void *_a, const void *_b)
 111 {
 112         const struct key *a = _a;
 113         const struct key *b = _b;
 114         return (memcmp(a, b, offsetof(struct key, surf[0])) == 0) &&
 115                 (memcmp(a->surf, b->surf, sizeof(a->surf[0]) * a->num_surfs) == 0);
 116 }
 117
 118 void
 119 fd_bc_init(struct fd_batch_cache *cache)
 120 {
 121         cache->ht = _mesa_hash_table_create(NULL, key_hash, key_equals);
 122 }
 123
 124 void
 125 fd_bc_fini(struct fd_batch_cache *cache)
 126 {
 127         _mesa_hash_table_destroy(cache->ht, NULL);
 128 }
 129
 130 static void
 131 bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx, bool deferred)
 132 {
 133         /* fd_batch_flush() (and fd_batch_add_dep() which calls it indirectly)
 134          * can cause batches to be unref'd and freed under our feet, so grab
 135          * a reference to all the batches we need up-front.
 136          */
 137         struct fd_batch *batches[ARRAY_SIZE(cache->batches)] = {0};
 138         struct fd_batch *batch;
 139         unsigned n = 0;
 140
 141         fd_context_lock(ctx);
 142
 143         foreach_batch(batch, cache, cache->batch_mask) {
 144                 if (batch->ctx == ctx) {
 145                         fd_batch_reference_locked(&batches[n++], batch);
 146                 }
 147         }
 148
 149         if (deferred) {
 150                 struct fd_batch *current_batch = fd_context_batch(ctx);
 151
 152                 for (unsigned i = 0; i < n; i++) {
 153                         if (batches[i] && (batches[i]->ctx == ctx) &&
 154                                         (batches[i] != current_batch)) {
 155                                 fd_batch_add_dep(current_batch, batches[i]);
 156                         }
 157                 }
 158
 159                 fd_context_unlock(ctx);
 160         } else {
 161                 fd_context_unlock(ctx);
 162
 163                 for (unsigned i = 0; i < n; i++) {
 164                         fd_batch_flush(batches[i]);
 165                 }
 166         }
 167
 168         for (unsigned i = 0; i < n; i++) {
 169                 fd_batch_reference(&batches[i], NULL);
 170         }
 171 }
 172
 173 void
 174 fd_bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx)
 175 {
 176         bc_flush(cache, ctx, false);
 177 }
 178
 179 /* deferred flush doesn't actually flush, but it marks every other
 180  * batch associated with the context as dependent on the current
 181  * batch.  So when the current batch gets flushed, all other batches
 182  * that came before also get flushed.
 183  */
 184 void
 185 fd_bc_flush_deferred(struct fd_batch_cache *cache, struct fd_context *ctx)
 186 {
 187         bc_flush(cache, ctx, true);
 188 }
 189
 190 void
 191 fd_bc_invalidate_context(struct fd_context *ctx)
 192 {
 193         struct fd_batch_cache *cache = &ctx->screen->batch_cache;
 194         struct fd_batch *batch;
 195
 196         fd_screen_lock(ctx->screen);
 197
 198         foreach_batch(batch, cache, cache->batch_mask) {
 199                 if (batch->ctx == ctx)
 200                         fd_bc_invalidate_batch(batch, true);
 201         }
 202
 203         fd_screen_unlock(ctx->screen);
 204 }
 205
 206 /**
 207  * Note that when batch is flushed, it needs to remain in the cache so
 208  * that fd_bc_invalidate_resource() can work.. otherwise we can have
 209  * the case where a rsc is destroyed while a batch still has a dangling
 210  * reference to it.
 211  *
 212  * Note that the cmdstream (or, after the SUBMIT ioctl, the kernel)
 213  * would have a reference to the underlying bo, so it is ok for the
 214  * rsc to be destroyed before the batch.
 215  */
 216 void
 217 fd_bc_invalidate_batch(struct fd_batch *batch, bool remove)
 218 {
 219         if (!batch)
 220                 return;
 221
 222         struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;
 223         struct key *key = (struct key *)batch->key;
 224
 225         fd_context_assert_locked(batch->ctx);
 226
 227         if (remove) {
 228                 cache->batches[batch->idx] = NULL;
 229                 cache->batch_mask &= ~(1 << batch->idx);
 230         }
 231
 232         if (!key)
 233                 return;
 234
 235         DBG("%p: key=%p", batch, batch->key);
 236         for (unsigned idx = 0; idx < key->num_surfs; idx++) {
 237                 struct fd_resource *rsc = fd_resource(key->surf[idx].texture);
 238                 rsc->bc_batch_mask &= ~(1 << batch->idx);
 239         }
 240
 241         struct hash_entry *entry =
 242                 _mesa_hash_table_search_pre_hashed(cache->ht, batch->hash, key);
 243         _mesa_hash_table_remove(cache->ht, entry);
 244
 245         batch->key = NULL;
 246         free(key);
 247 }
 248
 249 void
 250 fd_bc_invalidate_resource(struct fd_resource *rsc, bool destroy)
 251 {
 252         struct fd_screen *screen = fd_screen(rsc->base.screen);
 253         struct fd_batch *batch;
 254
 255         fd_screen_lock(screen);
 256
 257         if (destroy) {
 258                 foreach_batch(batch, &screen->batch_cache, rsc->batch_mask) {
 259                         struct set_entry *entry = _mesa_set_search(batch->resources, rsc);
 260                         _mesa_set_remove(batch->resources, entry);
 261                 }
 262                 rsc->batch_mask = 0;
 263
 264                 fd_batch_reference_locked(&rsc->write_batch, NULL);
 265         }
 266
 267         foreach_batch(batch, &screen->batch_cache, rsc->bc_batch_mask)
 268                 fd_bc_invalidate_batch(batch, false);
 269
 270         rsc->bc_batch_mask = 0;
 271
 272         fd_screen_unlock(screen);
 273 }
 274
 275 struct fd_batch *
 276 fd_bc_alloc_batch(struct fd_batch_cache *cache, struct fd_context *ctx, bool nondraw)
 277 {
 278         struct fd_batch *batch;
 279         uint32_t idx;
 280
 281         fd_screen_lock(ctx->screen);
 282
 283         while ((idx = ffs(~cache->batch_mask)) == 0) {
 284 #if 0
 285                 for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) {
 286                         batch = cache->batches[i];
 287                         debug_printf("%d: needs_flush=%d, depends:", batch->idx, batch->needs_flush);
 288                         set_foreach(batch->dependencies, entry) {
 289                                 struct fd_batch *dep = (struct fd_batch *)entry->key;
 290                                 debug_printf(" %d", dep->idx);
 291                         }
 292                         debug_printf("\n");
 293                 }
 294 #endif
 295                 /* TODO: is LRU the better policy?  Or perhaps the batch that
 296                  * depends on the fewest other batches?
 297                  */
 298                 struct fd_batch *flush_batch = NULL;
 299                 for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) {
 300                         if (!flush_batch || (cache->batches[i]->seqno < flush_batch->seqno))
 301                                 fd_batch_reference_locked(&flush_batch, cache->batches[i]);
 302                 }
 303
 304                 /* we can drop lock temporarily here, since we hold a ref,
 305                  * flush_batch won't disappear under us.
 306                  */
 307                 fd_screen_unlock(ctx->screen);
 308                 DBG("%p: too many batches!  flush forced!", flush_batch);
 309                 fd_batch_flush(flush_batch);
 310                 fd_screen_lock(ctx->screen);
 311
 312                 /* While the resources get cleaned up automatically, the flush_batch
 313                  * doesn't get removed from the dependencies of other batches, so
 314                  * it won't be unref'd and will remain in the table.
 315                  *
 316                  * TODO maybe keep a bitmask of batches that depend on me, to make
 317                  * this easier:
 318                  */
 319                 for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) {
 320                         struct fd_batch *other = cache->batches[i];
 321                         if (!other)
 322                                 continue;
 323                         if (other->dependents_mask & (1 << flush_batch->idx)) {
 324                                 other->dependents_mask &= ~(1 << flush_batch->idx);
 325                                 struct fd_batch *ref = flush_batch;
 326                                 fd_batch_reference_locked(&ref, NULL);
 327                         }
 328                 }
 329
 330                 fd_batch_reference_locked(&flush_batch, NULL);
 331         }
 332
 333         idx--;              /* bit zero returns 1 for ffs() */
 334
 335         batch = fd_batch_create(ctx, nondraw);
 336         if (!batch)
 337                 goto out;
 338
 339         batch->seqno = cache->cnt++;
 340         batch->idx = idx;
 341         cache->batch_mask |= (1 << idx);
 342
 343         debug_assert(cache->batches[idx] == NULL);
 344         cache->batches[idx] = batch;
 345
 346 out:
 347         fd_screen_unlock(ctx->screen);
 348
 349         return batch;
 350 }
 351
 352 static struct fd_batch *
 353 batch_from_key(struct fd_batch_cache *cache, struct key *key,
 354                 struct fd_context *ctx)
 355 {
 356         struct fd_batch *batch = NULL;
 357         uint32_t hash = key_hash(key);
 358         struct hash_entry *entry =
 359                 _mesa_hash_table_search_pre_hashed(cache->ht, hash, key);
 360
 361         if (entry) {
 362                 free(key);
 363                 fd_batch_reference(&batch, (struct fd_batch *)entry->data);
 364                 return batch;
 365         }
 366
 367         batch = fd_bc_alloc_batch(cache, ctx, false);
 368 #ifdef DEBUG
 369         DBG("%p: hash=0x%08x, %ux%u, %u layers, %u samples", batch, hash,
 370                         key->width, key->height, key->layers, key->samples);
 371         for (unsigned idx = 0; idx < key->num_surfs; idx++) {
 372                 DBG("%p:  surf[%u]: %p (%s) (%u,%u / %u,%u,%u)", batch, key->surf[idx].pos,
 373                         key->surf[idx].texture, util_format_name(key->surf[idx].format),
 374                         key->surf[idx].u.buf.first_element, key->surf[idx].u.buf.last_element,
 375                         key->surf[idx].u.tex.first_layer, key->surf[idx].u.tex.last_layer,
 376                         key->surf[idx].u.tex.level);
 377         }
 378 #endif
 379         if (!batch)
 380                 return NULL;
 381
 382         /* reset max_scissor, which will be adjusted on draws
 383          * according to the actual scissor.
 384          */
 385         batch->max_scissor.minx = ~0;
 386         batch->max_scissor.miny = ~0;
 387         batch->max_scissor.maxx = 0;
 388         batch->max_scissor.maxy = 0;
 389
 390         fd_screen_lock(ctx->screen);
 391
 392         _mesa_hash_table_insert_pre_hashed(cache->ht, hash, key, batch);
 393         batch->key = key;
 394         batch->hash = hash;
 395
 396         for (unsigned idx = 0; idx < key->num_surfs; idx++) {
 397                 struct fd_resource *rsc = fd_resource(key->surf[idx].texture);
 398                 rsc->bc_batch_mask = (1 << batch->idx);
 399         }
 400
 401         fd_screen_unlock(ctx->screen);
 402
 403         return batch;
 404 }
 405
 406 static void
 407 key_surf(struct key *key, unsigned idx, unsigned pos, struct pipe_surface *psurf)
 408 {
 409         key->surf[idx].texture = psurf->texture;
 410         key->surf[idx].u = psurf->u;
 411         key->surf[idx].pos = pos;
 412         key->surf[idx].samples = MAX2(1, psurf->nr_samples);
 413         key->surf[idx].format = psurf->format;
 414 }
 415
 416 struct fd_batch *
 417 fd_batch_from_fb(struct fd_batch_cache *cache, struct fd_context *ctx,
 418                 const struct pipe_framebuffer_state *pfb)
 419 {
 420         unsigned idx = 0, n = pfb->nr_cbufs + (pfb->zsbuf ? 1 : 0);
 421         struct key *key = key_alloc(n);
 422
 423         key->width = pfb->width;
 424         key->height = pfb->height;
 425         key->layers = pfb->layers;
 426         key->samples = util_framebuffer_get_num_samples(pfb);
 427         key->ctx = ctx;
 428
 429         if (pfb->zsbuf)
 430                 key_surf(key, idx++, 0, pfb->zsbuf);
 431
 432         for (unsigned i = 0; i < pfb->nr_cbufs; i++)
 433                 if (pfb->cbufs[i])
 434                         key_surf(key, idx++, i + 1, pfb->cbufs[i]);
 435
 436         key->num_surfs = idx;
 437
 438         return batch_from_key(cache, key, ctx);
 439 }