src/gallium/drivers/freedreno/freedreno_batch_cache.c

   1 /*
   2  * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  *
  23  * Authors:
  24  *    Rob Clark <robclark@freedesktop.org>
  25  */
  26
  27 #include "util/hash_table.h"
  28 #include "util/set.h"
  29 #include "util/list.h"
  30 #include "util/u_string.h"
  31 #define XXH_INLINE_ALL
  32 #include "util/xxhash.h"
  33
  34 #include "freedreno_batch.h"
  35 #include "freedreno_batch_cache.h"
  36 #include "freedreno_context.h"
  37 #include "freedreno_resource.h"
  38
  39 /* Overview:
  40  *
  41  *   The batch cache provides lookup for mapping pipe_framebuffer_state
  42  *   to a batch.
  43  *
  44  *   It does this via hashtable, with key that roughly matches the
  45  *   pipe_framebuffer_state, as described below.
  46  *
  47  * Batch Cache hashtable key:
  48  *
  49  *   To serialize the key, and to avoid dealing with holding a reference to
  50  *   pipe_surface's (which hold a reference to pipe_resource and complicate
  51  *   the whole refcnting thing), the key is variable length and inline's the
  52  *   pertinent details of the pipe_surface.
  53  *
  54  * Batch:
  55  *
  56  *   Each batch needs to hold a reference to each resource it depends on (ie.
  57  *   anything that needs a mem2gmem).  And a weak reference to resources it
  58  *   renders to.  (If both src[n] and dst[n] are not NULL then they are the
  59  *   same.)
  60  *
  61  *   When a resource is destroyed, we need to remove entries in the batch
  62  *   cache that reference the resource, to avoid dangling pointer issues.
  63  *   So each resource holds a hashset of batches which have reference them
  64  *   in their hashtable key.
  65  *
  66  *   When a batch has weak reference to no more resources (ie. all the
  67  *   surfaces it rendered to are destroyed) the batch can be destroyed.
  68  *   Could happen in an app that renders and never uses the result.  More
  69  *   common scenario, I think, will be that some, but not all, of the
  70  *   surfaces are destroyed before the batch is submitted.
  71  *
  72  *   If (for example), batch writes to zsbuf but that surface is destroyed
  73  *   before batch is submitted, we can skip gmem2mem (but still need to
  74  *   alloc gmem space as before.  If the batch depended on previous contents
  75  *   of that surface, it would be holding a reference so the surface would
  76  *   not have been destroyed.
  77  */
  78
  79 struct key {
  80         uint32_t width, height, layers;
  81         uint16_t samples, num_surfs;
  82         struct fd_context *ctx;
  83         struct {
  84                 struct pipe_resource *texture;
  85                 union pipe_surface_desc u;
  86                 uint8_t pos, samples;
  87                 uint16_t format;
  88         } surf[0];
  89 };
  90
  91 static struct key *
  92 key_alloc(unsigned num_surfs)
  93 {
  94         struct key *key =
  95                 CALLOC_VARIANT_LENGTH_STRUCT(key, sizeof(key->surf[0]) * num_surfs);
  96         return key;
  97 }
  98
  99 static uint32_t
 100 key_hash(const void *_key)
 101 {
 102         const struct key *key = _key;
 103         uint32_t hash = 0;
 104         hash = XXH32(key, offsetof(struct key, surf[0]), hash);
 105         hash = XXH32(key->surf, sizeof(key->surf[0]) * key->num_surfs , hash);
 106         return hash;
 107 }
 108
 109 static bool
 110 key_equals(const void *_a, const void *_b)
 111 {
 112         const struct key *a = _a;
 113         const struct key *b = _b;
 114         return (memcmp(a, b, offsetof(struct key, surf[0])) == 0) &&
 115                 (memcmp(a->surf, b->surf, sizeof(a->surf[0]) * a->num_surfs) == 0);
 116 }
 117
 118 void
 119 fd_bc_init(struct fd_batch_cache *cache)
 120 {
 121         cache->ht = _mesa_hash_table_create(NULL, key_hash, key_equals);
 122 }
 123
 124 void
 125 fd_bc_fini(struct fd_batch_cache *cache)
 126 {
 127         _mesa_hash_table_destroy(cache->ht, NULL);
 128 }
 129
 130 static void
 131 bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx, bool deferred)
 132 {
 133         /* fd_batch_flush() (and fd_batch_add_dep() which calls it indirectly)
 134          * can cause batches to be unref'd and freed under our feet, so grab
 135          * a reference to all the batches we need up-front.
 136          */
 137         struct fd_batch *batches[ARRAY_SIZE(cache->batches)] = {0};
 138         struct fd_batch *batch;
 139         unsigned n = 0;
 140
 141         fd_context_lock(ctx);
 142
 143         foreach_batch(batch, cache, cache->batch_mask) {
 144                 if (batch->ctx == ctx) {
 145                         fd_batch_reference_locked(&batches[n++], batch);
 146                 }
 147         }
 148
 149         if (deferred) {
 150                 struct fd_batch *current_batch = fd_context_batch(ctx);
 151
 152                 for (unsigned i = 0; i < n; i++) {
 153                         if (batches[i] && (batches[i]->ctx == ctx) &&
 154                                         (batches[i] != current_batch)) {
 155                                 fd_batch_add_dep(current_batch, batches[i]);
 156                         }
 157                 }
 158
 159                 fd_context_unlock(ctx);
 160         } else {
 161                 fd_context_unlock(ctx);
 162
 163                 for (unsigned i = 0; i < n; i++) {
 164                         fd_batch_flush(batches[i]);
 165                 }
 166         }
 167
 168         for (unsigned i = 0; i < n; i++) {
 169                 fd_batch_reference(&batches[i], NULL);
 170         }
 171 }
 172
 173 void
 174 fd_bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx)
 175 {
 176         bc_flush(cache, ctx, false);
 177 }
 178
 179 /* deferred flush doesn't actually flush, but it marks every other
 180  * batch associated with the context as dependent on the current
 181  * batch.  So when the current batch gets flushed, all other batches
 182  * that came before also get flushed.
 183  */
 184 void
 185 fd_bc_flush_deferred(struct fd_batch_cache *cache, struct fd_context *ctx)
 186 {
 187         bc_flush(cache, ctx, true);
 188 }
 189
 190 static bool
 191 batch_in_cache(struct fd_batch_cache *cache, struct fd_batch *batch)
 192 {
 193         struct fd_batch *b;
 194
 195         foreach_batch (b, cache, cache->batch_mask)
 196                 if (b == batch)
 197                         return true;
 198
 199         return false;
 200 }
 201
 202 void
 203 fd_bc_dump(struct fd_screen *screen, const char *fmt, ...)
 204 {
 205         struct fd_batch_cache *cache = &screen->batch_cache;
 206
 207         if (!BATCH_DEBUG)
 208                 return;
 209
 210         fd_screen_lock(screen);
 211
 212         va_list ap;
 213         va_start(ap, fmt);
 214         vprintf(fmt, ap);
 215         va_end(ap);
 216
 217         set_foreach (screen->live_batches, entry) {
 218                 struct fd_batch *batch = (struct fd_batch *)entry->key;
 219                 printf("  %p<%u>%s%s\n", batch, batch->seqno,
 220                                 batch->needs_flush ? ", NEEDS FLUSH" : "",
 221                                 batch_in_cache(cache, batch) ? "" : ", ORPHAN");
 222         }
 223
 224         printf("----\n");
 225
 226         fd_screen_unlock(screen);
 227 }
 228
 229 void
 230 fd_bc_invalidate_context(struct fd_context *ctx)
 231 {
 232         struct fd_batch_cache *cache = &ctx->screen->batch_cache;
 233         struct fd_batch *batch;
 234
 235         fd_screen_lock(ctx->screen);
 236
 237         foreach_batch(batch, cache, cache->batch_mask) {
 238                 if (batch->ctx == ctx)
 239                         fd_bc_invalidate_batch(batch, true);
 240         }
 241
 242         fd_screen_unlock(ctx->screen);
 243 }
 244
 245 /**
 246  * Note that when batch is flushed, it needs to remain in the cache so
 247  * that fd_bc_invalidate_resource() can work.. otherwise we can have
 248  * the case where a rsc is destroyed while a batch still has a dangling
 249  * reference to it.
 250  *
 251  * Note that the cmdstream (or, after the SUBMIT ioctl, the kernel)
 252  * would have a reference to the underlying bo, so it is ok for the
 253  * rsc to be destroyed before the batch.
 254  */
 255 void
 256 fd_bc_invalidate_batch(struct fd_batch *batch, bool remove)
 257 {
 258         if (!batch)
 259                 return;
 260
 261         struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;
 262         struct key *key = (struct key *)batch->key;
 263
 264         fd_context_assert_locked(batch->ctx);
 265
 266         if (remove) {
 267                 cache->batches[batch->idx] = NULL;
 268                 cache->batch_mask &= ~(1 << batch->idx);
 269         }
 270
 271         if (!key)
 272                 return;
 273
 274         DBG("%p: key=%p", batch, batch->key);
 275         for (unsigned idx = 0; idx < key->num_surfs; idx++) {
 276                 struct fd_resource *rsc = fd_resource(key->surf[idx].texture);
 277                 rsc->bc_batch_mask &= ~(1 << batch->idx);
 278         }
 279
 280         struct hash_entry *entry =
 281                 _mesa_hash_table_search_pre_hashed(cache->ht, batch->hash, key);
 282         _mesa_hash_table_remove(cache->ht, entry);
 283
 284         batch->key = NULL;
 285         free(key);
 286 }
 287
 288 void
 289 fd_bc_invalidate_resource(struct fd_resource *rsc, bool destroy)
 290 {
 291         struct fd_screen *screen = fd_screen(rsc->base.screen);
 292         struct fd_batch *batch;
 293
 294         fd_screen_lock(screen);
 295
 296         if (destroy) {
 297                 foreach_batch(batch, &screen->batch_cache, rsc->batch_mask) {
 298                         struct set_entry *entry = _mesa_set_search(batch->resources, rsc);
 299                         _mesa_set_remove(batch->resources, entry);
 300                 }
 301                 rsc->batch_mask = 0;
 302
 303                 fd_batch_reference_locked(&rsc->write_batch, NULL);
 304         }
 305
 306         foreach_batch(batch, &screen->batch_cache, rsc->bc_batch_mask)
 307                 fd_bc_invalidate_batch(batch, false);
 308
 309         rsc->bc_batch_mask = 0;
 310
 311         fd_screen_unlock(screen);
 312 }
 313
 314 struct fd_batch *
 315 fd_bc_alloc_batch(struct fd_batch_cache *cache, struct fd_context *ctx, bool nondraw)
 316 {
 317         struct fd_batch *batch;
 318         uint32_t idx;
 319
 320         fd_screen_lock(ctx->screen);
 321
 322         while ((idx = ffs(~cache->batch_mask)) == 0) {
 323 #if 0
 324                 for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) {
 325                         batch = cache->batches[i];
 326                         debug_printf("%d: needs_flush=%d, depends:", batch->idx, batch->needs_flush);
 327                         set_foreach(batch->dependencies, entry) {
 328                                 struct fd_batch *dep = (struct fd_batch *)entry->key;
 329                                 debug_printf(" %d", dep->idx);
 330                         }
 331                         debug_printf("\n");
 332                 }
 333 #endif
 334                 /* TODO: is LRU the better policy?  Or perhaps the batch that
 335                  * depends on the fewest other batches?
 336                  */
 337                 struct fd_batch *flush_batch = NULL;
 338                 for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) {
 339                         if (!flush_batch || (cache->batches[i]->seqno < flush_batch->seqno))
 340                                 fd_batch_reference_locked(&flush_batch, cache->batches[i]);
 341                 }
 342
 343                 /* we can drop lock temporarily here, since we hold a ref,
 344                  * flush_batch won't disappear under us.
 345                  */
 346                 fd_screen_unlock(ctx->screen);
 347                 DBG("%p: too many batches!  flush forced!", flush_batch);
 348                 fd_batch_flush(flush_batch);
 349                 fd_screen_lock(ctx->screen);
 350
 351                 /* While the resources get cleaned up automatically, the flush_batch
 352                  * doesn't get removed from the dependencies of other batches, so
 353                  * it won't be unref'd and will remain in the table.
 354                  *
 355                  * TODO maybe keep a bitmask of batches that depend on me, to make
 356                  * this easier:
 357                  */
 358                 for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) {
 359                         struct fd_batch *other = cache->batches[i];
 360                         if (!other)
 361                                 continue;
 362                         if (other->dependents_mask & (1 << flush_batch->idx)) {
 363                                 other->dependents_mask &= ~(1 << flush_batch->idx);
 364                                 struct fd_batch *ref = flush_batch;
 365                                 fd_batch_reference_locked(&ref, NULL);
 366                         }
 367                 }
 368
 369                 fd_batch_reference_locked(&flush_batch, NULL);
 370         }
 371
 372         idx--;              /* bit zero returns 1 for ffs() */
 373
 374         batch = fd_batch_create(ctx, nondraw);
 375         if (!batch)
 376                 goto out;
 377
 378         batch->seqno = cache->cnt++;
 379         batch->idx = idx;
 380         cache->batch_mask |= (1 << idx);
 381
 382         debug_assert(cache->batches[idx] == NULL);
 383         cache->batches[idx] = batch;
 384
 385 out:
 386         fd_screen_unlock(ctx->screen);
 387
 388         return batch;
 389 }
 390
 391 static struct fd_batch *
 392 batch_from_key(struct fd_batch_cache *cache, struct key *key,
 393                 struct fd_context *ctx)
 394 {
 395         struct fd_batch *batch = NULL;
 396         uint32_t hash = key_hash(key);
 397         struct hash_entry *entry =
 398                 _mesa_hash_table_search_pre_hashed(cache->ht, hash, key);
 399
 400         if (entry) {
 401                 free(key);
 402                 fd_batch_reference(&batch, (struct fd_batch *)entry->data);
 403                 return batch;
 404         }
 405
 406         batch = fd_bc_alloc_batch(cache, ctx, false);
 407 #ifdef DEBUG
 408         DBG("%p: hash=0x%08x, %ux%u, %u layers, %u samples", batch, hash,
 409                         key->width, key->height, key->layers, key->samples);
 410         for (unsigned idx = 0; idx < key->num_surfs; idx++) {
 411                 DBG("%p:  surf[%u]: %p (%s) (%u,%u / %u,%u,%u)", batch, key->surf[idx].pos,
 412                         key->surf[idx].texture, util_format_name(key->surf[idx].format),
 413                         key->surf[idx].u.buf.first_element, key->surf[idx].u.buf.last_element,
 414                         key->surf[idx].u.tex.first_layer, key->surf[idx].u.tex.last_layer,
 415                         key->surf[idx].u.tex.level);
 416         }
 417 #endif
 418         if (!batch)
 419                 return NULL;
 420
 421         /* reset max_scissor, which will be adjusted on draws
 422          * according to the actual scissor.
 423          */
 424         batch->max_scissor.minx = ~0;
 425         batch->max_scissor.miny = ~0;
 426         batch->max_scissor.maxx = 0;
 427         batch->max_scissor.maxy = 0;
 428
 429         fd_screen_lock(ctx->screen);
 430
 431         _mesa_hash_table_insert_pre_hashed(cache->ht, hash, key, batch);
 432         batch->key = key;
 433         batch->hash = hash;
 434
 435         for (unsigned idx = 0; idx < key->num_surfs; idx++) {
 436                 struct fd_resource *rsc = fd_resource(key->surf[idx].texture);
 437                 rsc->bc_batch_mask = (1 << batch->idx);
 438         }
 439
 440         fd_screen_unlock(ctx->screen);
 441
 442         return batch;
 443 }
 444
 445 static void
 446 key_surf(struct key *key, unsigned idx, unsigned pos, struct pipe_surface *psurf)
 447 {
 448         key->surf[idx].texture = psurf->texture;
 449         key->surf[idx].u = psurf->u;
 450         key->surf[idx].pos = pos;
 451         key->surf[idx].samples = MAX2(1, psurf->nr_samples);
 452         key->surf[idx].format = psurf->format;
 453 }
 454
 455 struct fd_batch *
 456 fd_batch_from_fb(struct fd_batch_cache *cache, struct fd_context *ctx,
 457                 const struct pipe_framebuffer_state *pfb)
 458 {
 459         unsigned idx = 0, n = pfb->nr_cbufs + (pfb->zsbuf ? 1 : 0);
 460         struct key *key = key_alloc(n);
 461
 462         key->width = pfb->width;
 463         key->height = pfb->height;
 464         key->layers = pfb->layers;
 465         key->samples = util_framebuffer_get_num_samples(pfb);
 466         key->ctx = ctx;
 467
 468         if (pfb->zsbuf)
 469                 key_surf(key, idx++, 0, pfb->zsbuf);
 470
 471         for (unsigned i = 0; i < pfb->nr_cbufs; i++)
 472                 if (pfb->cbufs[i])
 473                         key_surf(key, idx++, i + 1, pfb->cbufs[i]);
 474
 475         key->num_surfs = idx;
 476
 477         return batch_from_key(cache, key, ctx);
 478 }