src/gallium/drivers/freedreno/freedreno_resource.c

   1 /*
   2  * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  *
  23  * Authors:
  24  *    Rob Clark <robclark@freedesktop.org>
  25  */
  26
  27 #include "util/u_format.h"
  28 #include "util/u_format_rgtc.h"
  29 #include "util/u_format_zs.h"
  30 #include "util/u_inlines.h"
  31 #include "util/u_transfer.h"
  32 #include "util/u_string.h"
  33 #include "util/u_surface.h"
  34 #include "util/set.h"
  35 #include "util/u_drm.h"
  36
  37 #include "freedreno_resource.h"
  38 #include "freedreno_batch_cache.h"
  39 #include "freedreno_blitter.h"
  40 #include "freedreno_fence.h"
  41 #include "freedreno_screen.h"
  42 #include "freedreno_surface.h"
  43 #include "freedreno_context.h"
  44 #include "freedreno_query_hw.h"
  45 #include "freedreno_util.h"
  46
  47 #include "drm-uapi/drm_fourcc.h"
  48 #include <errno.h>
  49
  50 /* XXX this should go away, needed for 'struct winsys_handle' */
  51 #include "state_tracker/drm_driver.h"
  52
  53 /* A private modifier for now, so we have a way to request tiled but not
  54  * compressed.  It would perhaps be good to get real modifiers for the
  55  * tiled formats, but would probably need to do some work to figure out
  56  * the layout(s) of the tiled modes, and whether they are the same
  57  * across generations.
  58  */
  59 #define FD_FORMAT_MOD_QCOM_TILED        fourcc_mod_code(QCOM, 0xffffffff)
  60
  61 /**
  62  * Go through the entire state and see if the resource is bound
  63  * anywhere. If it is, mark the relevant state as dirty. This is
  64  * called on realloc_bo to ensure the neccessary state is re-
  65  * emitted so the GPU looks at the new backing bo.
  66  */
  67 static void
  68 rebind_resource(struct fd_context *ctx, struct pipe_resource *prsc)
  69 {
  70         /* VBOs */
  71         for (unsigned i = 0; i < ctx->vtx.vertexbuf.count && !(ctx->dirty & FD_DIRTY_VTXBUF); i++) {
  72                 if (ctx->vtx.vertexbuf.vb[i].buffer.resource == prsc)
  73                         ctx->dirty |= FD_DIRTY_VTXBUF;
  74         }
  75
  76         /* per-shader-stage resources: */
  77         for (unsigned stage = 0; stage < PIPE_SHADER_TYPES; stage++) {
  78                 /* Constbufs.. note that constbuf[0] is normal uniforms emitted in
  79                  * cmdstream rather than by pointer..
  80                  */
  81                 const unsigned num_ubos = util_last_bit(ctx->constbuf[stage].enabled_mask);
  82                 for (unsigned i = 1; i < num_ubos; i++) {
  83                         if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_CONST)
  84                                 break;
  85                         if (ctx->constbuf[stage].cb[i].buffer == prsc)
  86                                 ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_CONST;
  87                 }
  88
  89                 /* Textures */
  90                 for (unsigned i = 0; i < ctx->tex[stage].num_textures; i++) {
  91                         if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_TEX)
  92                                 break;
  93                         if (ctx->tex[stage].textures[i] && (ctx->tex[stage].textures[i]->texture == prsc))
  94                                 ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_TEX;
  95                 }
  96
  97                 /* Images */
  98                 const unsigned num_images = util_last_bit(ctx->shaderimg[stage].enabled_mask);
  99                 for (unsigned i = 0; i < num_images; i++) {
 100                         if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_IMAGE)
 101                                 break;
 102                         if (ctx->shaderimg[stage].si[i].resource == prsc)
 103                                 ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_IMAGE;
 104                 }
 105
 106                 /* SSBOs */
 107                 const unsigned num_ssbos = util_last_bit(ctx->shaderbuf[stage].enabled_mask);
 108                 for (unsigned i = 0; i < num_ssbos; i++) {
 109                         if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_SSBO)
 110                                 break;
 111                         if (ctx->shaderbuf[stage].sb[i].buffer == prsc)
 112                                 ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_SSBO;
 113                 }
 114         }
 115 }
 116
 117 static void
 118 realloc_bo(struct fd_resource *rsc, uint32_t size)
 119 {
 120         struct pipe_resource *prsc = &rsc->base;
 121         struct fd_screen *screen = fd_screen(rsc->base.screen);
 122         uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
 123                         DRM_FREEDRENO_GEM_TYPE_KMEM |
 124                         COND(prsc->bind & PIPE_BIND_SCANOUT, DRM_FREEDRENO_GEM_SCANOUT);
 125                         /* TODO other flags? */
 126
 127         /* if we start using things other than write-combine,
 128          * be sure to check for PIPE_RESOURCE_FLAG_MAP_COHERENT
 129          */
 130
 131         if (rsc->bo)
 132                 fd_bo_del(rsc->bo);
 133
 134         rsc->bo = fd_bo_new(screen->dev, size, flags, "%ux%ux%u@%u:%x",
 135                         prsc->width0, prsc->height0, prsc->depth0, rsc->cpp, prsc->bind);
 136         rsc->seqno = p_atomic_inc_return(&screen->rsc_seqno);
 137         util_range_set_empty(&rsc->valid_buffer_range);
 138         fd_bc_invalidate_resource(rsc, true);
 139 }
 140
 141 static void
 142 do_blit(struct fd_context *ctx, const struct pipe_blit_info *blit, bool fallback)
 143 {
 144         struct pipe_context *pctx = &ctx->base;
 145
 146         /* TODO size threshold too?? */
 147         if (fallback || !fd_blit(pctx, blit)) {
 148                 /* do blit on cpu: */
 149                 util_resource_copy_region(pctx,
 150                                 blit->dst.resource, blit->dst.level, blit->dst.box.x,
 151                                 blit->dst.box.y, blit->dst.box.z,
 152                                 blit->src.resource, blit->src.level, &blit->src.box);
 153         }
 154 }
 155
 156 /**
 157  * @rsc: the resource to shadow
 158  * @level: the level to discard (if box != NULL, otherwise ignored)
 159  * @box: the box to discard (or NULL if none)
 160  * @modifier: the modifier for the new buffer state
 161  */
 162 static bool
 163 fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc,
 164                 unsigned level, const struct pipe_box *box, uint64_t modifier)
 165 {
 166         struct pipe_context *pctx = &ctx->base;
 167         struct pipe_resource *prsc = &rsc->base;
 168         bool fallback = false;
 169
 170         if (prsc->next)
 171                 return false;
 172
 173         /* TODO: somehow munge dimensions and format to copy unsupported
 174          * render target format to something that is supported?
 175          */
 176         if (!pctx->screen->is_format_supported(pctx->screen,
 177                         prsc->format, prsc->target, prsc->nr_samples,
 178                         prsc->nr_storage_samples,
 179                         PIPE_BIND_RENDER_TARGET))
 180                 fallback = true;
 181
 182         /* do shadowing back-blits on the cpu for buffers: */
 183         if (prsc->target == PIPE_BUFFER)
 184                 fallback = true;
 185
 186         bool discard_whole_level = box && util_texrange_covers_whole_level(prsc, level,
 187                 box->x, box->y, box->z, box->width, box->height, box->depth);
 188
 189         /* TODO need to be more clever about current level */
 190         if ((prsc->target >= PIPE_TEXTURE_2D) && box && !discard_whole_level)
 191                 return false;
 192
 193         struct pipe_resource *pshadow =
 194                 pctx->screen->resource_create_with_modifiers(pctx->screen,
 195                                 prsc, &modifier, 1);
 196
 197         if (!pshadow)
 198                 return false;
 199
 200         assert(!ctx->in_shadow);
 201         ctx->in_shadow = true;
 202
 203         /* get rid of any references that batch-cache might have to us (which
 204          * should empty/destroy rsc->batches hashset)
 205          */
 206         fd_bc_invalidate_resource(rsc, false);
 207
 208         mtx_lock(&ctx->screen->lock);
 209
 210         /* Swap the backing bo's, so shadow becomes the old buffer,
 211          * blit from shadow to new buffer.  From here on out, we
 212          * cannot fail.
 213          *
 214          * Note that we need to do it in this order, otherwise if
 215          * we go down cpu blit path, the recursive transfer_map()
 216          * sees the wrong status..
 217          */
 218         struct fd_resource *shadow = fd_resource(pshadow);
 219
 220         DBG("shadow: %p (%d) -> %p (%d)\n", rsc, rsc->base.reference.count,
 221                         shadow, shadow->base.reference.count);
 222
 223         /* TODO valid_buffer_range?? */
 224         swap(rsc->bo,        shadow->bo);
 225         swap(rsc->write_batch,   shadow->write_batch);
 226         swap(rsc->offset, shadow->offset);
 227         swap(rsc->ubwc_offset, shadow->ubwc_offset);
 228         swap(rsc->ubwc_pitch, shadow->ubwc_pitch);
 229         swap(rsc->ubwc_size, shadow->ubwc_size);
 230         rsc->seqno = p_atomic_inc_return(&ctx->screen->rsc_seqno);
 231
 232         /* at this point, the newly created shadow buffer is not referenced
 233          * by any batches, but the existing rsc (probably) is.  We need to
 234          * transfer those references over:
 235          */
 236         debug_assert(shadow->batch_mask == 0);
 237         struct fd_batch *batch;
 238         foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) {
 239                 struct set_entry *entry = _mesa_set_search(batch->resources, rsc);
 240                 _mesa_set_remove(batch->resources, entry);
 241                 _mesa_set_add(batch->resources, shadow);
 242         }
 243         swap(rsc->batch_mask, shadow->batch_mask);
 244
 245         mtx_unlock(&ctx->screen->lock);
 246
 247         struct pipe_blit_info blit = {};
 248         blit.dst.resource = prsc;
 249         blit.dst.format   = prsc->format;
 250         blit.src.resource = pshadow;
 251         blit.src.format   = pshadow->format;
 252         blit.mask = util_format_get_mask(prsc->format);
 253         blit.filter = PIPE_TEX_FILTER_NEAREST;
 254
 255 #define set_box(field, val) do {     \
 256                 blit.dst.field = (val);      \
 257                 blit.src.field = (val);      \
 258         } while (0)
 259
 260         /* blit the other levels in their entirety: */
 261         for (unsigned l = 0; l <= prsc->last_level; l++) {
 262                 if (box && l == level)
 263                         continue;
 264
 265                 /* just blit whole level: */
 266                 set_box(level, l);
 267                 set_box(box.width,  u_minify(prsc->width0, l));
 268                 set_box(box.height, u_minify(prsc->height0, l));
 269                 set_box(box.depth,  u_minify(prsc->depth0, l));
 270
 271                 do_blit(ctx, &blit, fallback);
 272         }
 273
 274         /* deal w/ current level specially, since we might need to split
 275          * it up into a couple blits:
 276          */
 277         if (box && !discard_whole_level) {
 278                 set_box(level, level);
 279
 280                 switch (prsc->target) {
 281                 case PIPE_BUFFER:
 282                 case PIPE_TEXTURE_1D:
 283                         set_box(box.y, 0);
 284                         set_box(box.z, 0);
 285                         set_box(box.height, 1);
 286                         set_box(box.depth, 1);
 287
 288                         if (box->x > 0) {
 289                                 set_box(box.x, 0);
 290                                 set_box(box.width, box->x);
 291
 292                                 do_blit(ctx, &blit, fallback);
 293                         }
 294                         if ((box->x + box->width) < u_minify(prsc->width0, level)) {
 295                                 set_box(box.x, box->x + box->width);
 296                                 set_box(box.width, u_minify(prsc->width0, level) - (box->x + box->width));
 297
 298                                 do_blit(ctx, &blit, fallback);
 299                         }
 300                         break;
 301                 case PIPE_TEXTURE_2D:
 302                         /* TODO */
 303                 default:
 304                         unreachable("TODO");
 305                 }
 306         }
 307
 308         ctx->in_shadow = false;
 309
 310         pipe_resource_reference(&pshadow, NULL);
 311
 312         return true;
 313 }
 314
 315 /**
 316  * Uncompress an UBWC compressed buffer "in place".  This works basically
 317  * like resource shadowing, creating a new resource, and doing an uncompress
 318  * blit, and swapping the state between shadow and original resource so it
 319  * appears to the state tracker as if nothing changed.
 320  */
 321 void
 322 fd_resource_uncompress(struct fd_context *ctx, struct fd_resource *rsc)
 323 {
 324         bool success =
 325                 fd_try_shadow_resource(ctx, rsc, 0, NULL, FD_FORMAT_MOD_QCOM_TILED);
 326
 327         /* shadow should not fail in any cases where we need to uncompress: */
 328         debug_assert(success);
 329
 330         /*
 331          * TODO what if rsc is used in other contexts, we don't currently
 332          * have a good way to rebind_resource() in other contexts.  And an
 333          * app that is reading one resource in multiple contexts, isn't
 334          * going to expect that the resource is modified.
 335          *
 336          * Hopefully the edge cases where we need to uncompress are rare
 337          * enough that they mostly only show up in deqp.
 338          */
 339
 340         rebind_resource(ctx, &rsc->base);
 341 }
 342
 343 static struct fd_resource *
 344 fd_alloc_staging(struct fd_context *ctx, struct fd_resource *rsc,
 345                 unsigned level, const struct pipe_box *box)
 346 {
 347         struct pipe_context *pctx = &ctx->base;
 348         struct pipe_resource tmpl = rsc->base;
 349
 350         tmpl.width0  = box->width;
 351         tmpl.height0 = box->height;
 352         /* for array textures, box->depth is the array_size, otherwise
 353          * for 3d textures, it is the depth:
 354          */
 355         if (tmpl.array_size > 1) {
 356                 if (tmpl.target == PIPE_TEXTURE_CUBE)
 357                         tmpl.target = PIPE_TEXTURE_2D_ARRAY;
 358                 tmpl.array_size = box->depth;
 359                 tmpl.depth0 = 1;
 360         } else {
 361                 tmpl.array_size = 1;
 362                 tmpl.depth0 = box->depth;
 363         }
 364         tmpl.last_level = 0;
 365         tmpl.bind |= PIPE_BIND_LINEAR;
 366
 367         struct pipe_resource *pstaging =
 368                 pctx->screen->resource_create(pctx->screen, &tmpl);
 369         if (!pstaging)
 370                 return NULL;
 371
 372         return fd_resource(pstaging);
 373 }
 374
 375 static void
 376 fd_blit_from_staging(struct fd_context *ctx, struct fd_transfer *trans)
 377 {
 378         struct pipe_resource *dst = trans->base.resource;
 379         struct pipe_blit_info blit = {};
 380
 381         blit.dst.resource = dst;
 382         blit.dst.format   = dst->format;
 383         blit.dst.level    = trans->base.level;
 384         blit.dst.box      = trans->base.box;
 385         blit.src.resource = trans->staging_prsc;
 386         blit.src.format   = trans->staging_prsc->format;
 387         blit.src.level    = 0;
 388         blit.src.box      = trans->staging_box;
 389         blit.mask = util_format_get_mask(trans->staging_prsc->format);
 390         blit.filter = PIPE_TEX_FILTER_NEAREST;
 391
 392         do_blit(ctx, &blit, false);
 393 }
 394
 395 static void
 396 fd_blit_to_staging(struct fd_context *ctx, struct fd_transfer *trans)
 397 {
 398         struct pipe_resource *src = trans->base.resource;
 399         struct pipe_blit_info blit = {};
 400
 401         blit.src.resource = src;
 402         blit.src.format   = src->format;
 403         blit.src.level    = trans->base.level;
 404         blit.src.box      = trans->base.box;
 405         blit.dst.resource = trans->staging_prsc;
 406         blit.dst.format   = trans->staging_prsc->format;
 407         blit.dst.level    = 0;
 408         blit.dst.box      = trans->staging_box;
 409         blit.mask = util_format_get_mask(trans->staging_prsc->format);
 410         blit.filter = PIPE_TEX_FILTER_NEAREST;
 411
 412         do_blit(ctx, &blit, false);
 413 }
 414
 415 static void fd_resource_transfer_flush_region(struct pipe_context *pctx,
 416                 struct pipe_transfer *ptrans,
 417                 const struct pipe_box *box)
 418 {
 419         struct fd_resource *rsc = fd_resource(ptrans->resource);
 420
 421         if (ptrans->resource->target == PIPE_BUFFER)
 422                 util_range_add(&rsc->valid_buffer_range,
 423                                            ptrans->box.x + box->x,
 424                                            ptrans->box.x + box->x + box->width);
 425 }
 426
 427 static void
 428 flush_resource(struct fd_context *ctx, struct fd_resource *rsc, unsigned usage)
 429 {
 430         struct fd_batch *write_batch = NULL;
 431
 432         mtx_lock(&ctx->screen->lock);
 433         fd_batch_reference_locked(&write_batch, rsc->write_batch);
 434         mtx_unlock(&ctx->screen->lock);
 435
 436         if (usage & PIPE_TRANSFER_WRITE) {
 437                 struct fd_batch *batch, *batches[32] = {};
 438                 uint32_t batch_mask;
 439
 440                 /* This is a bit awkward, probably a fd_batch_flush_locked()
 441                  * would make things simpler.. but we need to hold the lock
 442                  * to iterate the batches which reference this resource.  So
 443                  * we must first grab references under a lock, then flush.
 444                  */
 445                 mtx_lock(&ctx->screen->lock);
 446                 batch_mask = rsc->batch_mask;
 447                 foreach_batch(batch, &ctx->screen->batch_cache, batch_mask)
 448                         fd_batch_reference_locked(&batches[batch->idx], batch);
 449                 mtx_unlock(&ctx->screen->lock);
 450
 451                 foreach_batch(batch, &ctx->screen->batch_cache, batch_mask)
 452                         fd_batch_flush(batch, false);
 453
 454                 foreach_batch(batch, &ctx->screen->batch_cache, batch_mask) {
 455                         fd_batch_sync(batch);
 456                         fd_batch_reference(&batches[batch->idx], NULL);
 457                 }
 458                 assert(rsc->batch_mask == 0);
 459         } else if (write_batch) {
 460                 fd_batch_flush(write_batch, true);
 461         }
 462
 463         fd_batch_reference(&write_batch, NULL);
 464
 465         assert(!rsc->write_batch);
 466 }
 467
 468 static void
 469 fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
 470 {
 471         flush_resource(fd_context(pctx), fd_resource(prsc), PIPE_TRANSFER_READ);
 472 }
 473
 474 static void
 475 fd_resource_transfer_unmap(struct pipe_context *pctx,
 476                 struct pipe_transfer *ptrans)
 477 {
 478         struct fd_context *ctx = fd_context(pctx);
 479         struct fd_resource *rsc = fd_resource(ptrans->resource);
 480         struct fd_transfer *trans = fd_transfer(ptrans);
 481
 482         if (trans->staging_prsc) {
 483                 if (ptrans->usage & PIPE_TRANSFER_WRITE)
 484                         fd_blit_from_staging(ctx, trans);
 485                 pipe_resource_reference(&trans->staging_prsc, NULL);
 486         }
 487
 488         if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
 489                 fd_bo_cpu_fini(rsc->bo);
 490         }
 491
 492         util_range_add(&rsc->valid_buffer_range,
 493                                    ptrans->box.x,
 494                                    ptrans->box.x + ptrans->box.width);
 495
 496         pipe_resource_reference(&ptrans->resource, NULL);
 497         slab_free(&ctx->transfer_pool, ptrans);
 498 }
 499
 500 static void *
 501 fd_resource_transfer_map(struct pipe_context *pctx,
 502                 struct pipe_resource *prsc,
 503                 unsigned level, unsigned usage,
 504                 const struct pipe_box *box,
 505                 struct pipe_transfer **pptrans)
 506 {
 507         struct fd_context *ctx = fd_context(pctx);
 508         struct fd_resource *rsc = fd_resource(prsc);
 509         struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
 510         struct fd_transfer *trans;
 511         struct pipe_transfer *ptrans;
 512         enum pipe_format format = prsc->format;
 513         uint32_t op = 0;
 514         uint32_t offset;
 515         char *buf;
 516         int ret = 0;
 517
 518         DBG("prsc=%p, level=%u, usage=%x, box=%dx%d+%d,%d", prsc, level, usage,
 519                 box->width, box->height, box->x, box->y);
 520
 521         ptrans = slab_alloc(&ctx->transfer_pool);
 522         if (!ptrans)
 523                 return NULL;
 524
 525         /* slab_alloc_st() doesn't zero: */
 526         trans = fd_transfer(ptrans);
 527         memset(trans, 0, sizeof(*trans));
 528
 529         pipe_resource_reference(&ptrans->resource, prsc);
 530         ptrans->level = level;
 531         ptrans->usage = usage;
 532         ptrans->box = *box;
 533         ptrans->stride = util_format_get_nblocksx(format, slice->pitch) * rsc->cpp;
 534         ptrans->layer_stride = rsc->layer_first ? rsc->layer_size : slice->size0;
 535
 536         /* we always need a staging texture for tiled buffers:
 537          *
 538          * TODO we might sometimes want to *also* shadow the resource to avoid
 539          * splitting a batch.. for ex, mid-frame texture uploads to a tiled
 540          * texture.
 541          */
 542         if (rsc->tile_mode) {
 543                 struct fd_resource *staging_rsc;
 544
 545                 staging_rsc = fd_alloc_staging(ctx, rsc, level, box);
 546                 if (staging_rsc) {
 547                         // TODO for PIPE_TRANSFER_READ, need to do untiling blit..
 548                         trans->staging_prsc = &staging_rsc->base;
 549                         trans->base.stride = util_format_get_nblocksx(format,
 550                                 staging_rsc->slices[0].pitch) * staging_rsc->cpp;
 551                         trans->base.layer_stride = staging_rsc->layer_first ?
 552                                 staging_rsc->layer_size : staging_rsc->slices[0].size0;
 553                         trans->staging_box = *box;
 554                         trans->staging_box.x = 0;
 555                         trans->staging_box.y = 0;
 556                         trans->staging_box.z = 0;
 557
 558                         if (usage & PIPE_TRANSFER_READ) {
 559                                 fd_blit_to_staging(ctx, trans);
 560
 561                                 struct fd_batch *batch = NULL;
 562
 563                                 fd_context_lock(ctx);
 564                                 fd_batch_reference_locked(&batch, staging_rsc->write_batch);
 565                                 fd_context_unlock(ctx);
 566
 567                                 /* we can't fd_bo_cpu_prep() until the blit to staging
 568                                  * is submitted to kernel.. in that case write_batch
 569                                  * wouldn't be NULL yet:
 570                                  */
 571                                 if (batch) {
 572                                         fd_batch_sync(batch);
 573                                         fd_batch_reference(&batch, NULL);
 574                                 }
 575
 576                                 fd_bo_cpu_prep(staging_rsc->bo, ctx->pipe,
 577                                                 DRM_FREEDRENO_PREP_READ);
 578                         }
 579
 580                         buf = fd_bo_map(staging_rsc->bo);
 581                         offset = 0;
 582
 583                         *pptrans = ptrans;
 584
 585                         ctx->stats.staging_uploads++;
 586
 587                         return buf;
 588                 }
 589         }
 590
 591         if (ctx->in_shadow && !(usage & PIPE_TRANSFER_READ))
 592                 usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
 593
 594         if (usage & PIPE_TRANSFER_READ)
 595                 op |= DRM_FREEDRENO_PREP_READ;
 596
 597         if (usage & PIPE_TRANSFER_WRITE)
 598                 op |= DRM_FREEDRENO_PREP_WRITE;
 599
 600         if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
 601                 realloc_bo(rsc, fd_bo_size(rsc->bo));
 602                 rebind_resource(ctx, prsc);
 603         } else if ((usage & PIPE_TRANSFER_WRITE) &&
 604                            prsc->target == PIPE_BUFFER &&
 605                            !util_ranges_intersect(&rsc->valid_buffer_range,
 606                                                                           box->x, box->x + box->width)) {
 607                 /* We are trying to write to a previously uninitialized range. No need
 608                  * to wait.
 609                  */
 610         } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
 611                 struct fd_batch *write_batch = NULL;
 612
 613                 /* hold a reference, so it doesn't disappear under us: */
 614                 fd_context_lock(ctx);
 615                 fd_batch_reference_locked(&write_batch, rsc->write_batch);
 616                 fd_context_unlock(ctx);
 617
 618                 if ((usage & PIPE_TRANSFER_WRITE) && write_batch &&
 619                                 write_batch->back_blit) {
 620                         /* if only thing pending is a back-blit, we can discard it: */
 621                         fd_batch_reset(write_batch);
 622                 }
 623
 624                 /* If the GPU is writing to the resource, or if it is reading from the
 625                  * resource and we're trying to write to it, flush the renders.
 626                  */
 627                 bool needs_flush = pending(rsc, !!(usage & PIPE_TRANSFER_WRITE));
 628                 bool busy = needs_flush || (0 != fd_bo_cpu_prep(rsc->bo,
 629                                 ctx->pipe, op | DRM_FREEDRENO_PREP_NOSYNC));
 630
 631                 /* if we need to flush/stall, see if we can make a shadow buffer
 632                  * to avoid this:
 633                  *
 634                  * TODO we could go down this path !reorder && !busy_for_read
 635                  * ie. we only *don't* want to go down this path if the blit
 636                  * will trigger a flush!
 637                  */
 638                 if (ctx->screen->reorder && busy && !(usage & PIPE_TRANSFER_READ) &&
 639                                 (usage & PIPE_TRANSFER_DISCARD_RANGE)) {
 640                         /* try shadowing only if it avoids a flush, otherwise staging would
 641                          * be better:
 642                          */
 643                         if (needs_flush && fd_try_shadow_resource(ctx, rsc, level,
 644                                                         box, DRM_FORMAT_MOD_LINEAR)) {
 645                                 needs_flush = busy = false;
 646                                 rebind_resource(ctx, prsc);
 647                                 ctx->stats.shadow_uploads++;
 648                         } else {
 649                                 struct fd_resource *staging_rsc;
 650
 651                                 if (needs_flush) {
 652                                         flush_resource(ctx, rsc, usage);
 653                                         needs_flush = false;
 654                                 }
 655
 656                                 /* in this case, we don't need to shadow the whole resource,
 657                                  * since any draw that references the previous contents has
 658                                  * already had rendering flushed for all tiles.  So we can
 659                                  * use a staging buffer to do the upload.
 660                                  */
 661                                 staging_rsc = fd_alloc_staging(ctx, rsc, level, box);
 662                                 if (staging_rsc) {
 663                                         trans->staging_prsc = &staging_rsc->base;
 664                                         trans->base.stride = util_format_get_nblocksx(format,
 665                                                 staging_rsc->slices[0].pitch) * staging_rsc->cpp;
 666                                         trans->base.layer_stride = staging_rsc->layer_first ?
 667                                                 staging_rsc->layer_size : staging_rsc->slices[0].size0;
 668                                         trans->staging_box = *box;
 669                                         trans->staging_box.x = 0;
 670                                         trans->staging_box.y = 0;
 671                                         trans->staging_box.z = 0;
 672                                         buf = fd_bo_map(staging_rsc->bo);
 673                                         offset = 0;
 674
 675                                         *pptrans = ptrans;
 676
 677                                         fd_batch_reference(&write_batch, NULL);
 678
 679                                         ctx->stats.staging_uploads++;
 680
 681                                         return buf;
 682                                 }
 683                         }
 684                 }
 685
 686                 if (needs_flush) {
 687                         flush_resource(ctx, rsc, usage);
 688                         needs_flush = false;
 689                 }
 690
 691                 fd_batch_reference(&write_batch, NULL);
 692
 693                 /* The GPU keeps track of how the various bo's are being used, and
 694                  * will wait if necessary for the proper operation to have
 695                  * completed.
 696                  */
 697                 if (busy) {
 698                         ret = fd_bo_cpu_prep(rsc->bo, ctx->pipe, op);
 699                         if (ret)
 700                                 goto fail;
 701                 }
 702         }
 703
 704         buf = fd_bo_map(rsc->bo);
 705         offset =
 706                 box->y / util_format_get_blockheight(format) * ptrans->stride +
 707                 box->x / util_format_get_blockwidth(format) * rsc->cpp +
 708                 fd_resource_offset(rsc, level, box->z);
 709
 710         if (usage & PIPE_TRANSFER_WRITE)
 711                 rsc->valid = true;
 712
 713         *pptrans = ptrans;
 714
 715         return buf + offset;
 716
 717 fail:
 718         fd_resource_transfer_unmap(pctx, ptrans);
 719         return NULL;
 720 }
 721
 722 static void
 723 fd_resource_destroy(struct pipe_screen *pscreen,
 724                 struct pipe_resource *prsc)
 725 {
 726         struct fd_resource *rsc = fd_resource(prsc);
 727         fd_bc_invalidate_resource(rsc, true);
 728         if (rsc->bo)
 729                 fd_bo_del(rsc->bo);
 730         if (rsc->scanout)
 731                 renderonly_scanout_destroy(rsc->scanout, fd_screen(pscreen)->ro);
 732
 733         util_range_destroy(&rsc->valid_buffer_range);
 734         FREE(rsc);
 735 }
 736
 737 static uint64_t
 738 fd_resource_modifier(struct fd_resource *rsc)
 739 {
 740         if (!rsc->tile_mode)
 741                 return DRM_FORMAT_MOD_LINEAR;
 742
 743         if (rsc->ubwc_size)
 744                 return DRM_FORMAT_MOD_QCOM_COMPRESSED;
 745
 746         /* TODO invent a modifier for tiled but not UBWC buffers: */
 747         return DRM_FORMAT_MOD_INVALID;
 748 }
 749
 750 static bool
 751 fd_resource_get_handle(struct pipe_screen *pscreen,
 752                 struct pipe_context *pctx,
 753                 struct pipe_resource *prsc,
 754                 struct winsys_handle *handle,
 755                 unsigned usage)
 756 {
 757         struct fd_resource *rsc = fd_resource(prsc);
 758
 759         handle->modifier = fd_resource_modifier(rsc);
 760
 761         return fd_screen_bo_get_handle(pscreen, rsc->bo, rsc->scanout,
 762                         rsc->slices[0].pitch * rsc->cpp, handle);
 763 }
 764
 765 static uint32_t
 766 setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format)
 767 {
 768         struct pipe_resource *prsc = &rsc->base;
 769         struct fd_screen *screen = fd_screen(prsc->screen);
 770         enum util_format_layout layout = util_format_description(format)->layout;
 771         uint32_t pitchalign = screen->gmem_alignw;
 772         uint32_t level, size = 0;
 773         uint32_t width = prsc->width0;
 774         uint32_t height = prsc->height0;
 775         uint32_t depth = prsc->depth0;
 776         /* in layer_first layout, the level (slice) contains just one
 777          * layer (since in fact the layer contains the slices)
 778          */
 779         uint32_t layers_in_level = rsc->layer_first ? 1 : prsc->array_size;
 780
 781         for (level = 0; level <= prsc->last_level; level++) {
 782                 struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
 783                 uint32_t blocks;
 784
 785                 if (layout == UTIL_FORMAT_LAYOUT_ASTC)
 786                         slice->pitch = width =
 787                                 util_align_npot(width, pitchalign * util_format_get_blockwidth(format));
 788                 else
 789                         slice->pitch = width = align(width, pitchalign);
 790                 slice->offset = size;
 791                 blocks = util_format_get_nblocks(format, width, height);
 792                 /* 1d array and 2d array textures must all have the same layer size
 793                  * for each miplevel on a3xx. 3d textures can have different layer
 794                  * sizes for high levels, but the hw auto-sizer is buggy (or at least
 795                  * different than what this code does), so as soon as the layer size
 796                  * range gets into range, we stop reducing it.
 797                  */
 798                 if (prsc->target == PIPE_TEXTURE_3D && (
 799                                         level == 1 ||
 800                                         (level > 1 && rsc->slices[level - 1].size0 > 0xf000)))
 801                         slice->size0 = align(blocks * rsc->cpp, alignment);
 802                 else if (level == 0 || rsc->layer_first || alignment == 1)
 803                         slice->size0 = align(blocks * rsc->cpp, alignment);
 804                 else
 805                         slice->size0 = rsc->slices[level - 1].size0;
 806
 807                 size += slice->size0 * depth * layers_in_level;
 808
 809                 width = u_minify(width, 1);
 810                 height = u_minify(height, 1);
 811                 depth = u_minify(depth, 1);
 812         }
 813
 814         return size;
 815 }
 816
 817 static uint32_t
 818 slice_alignment(enum pipe_texture_target target)
 819 {
 820         /* on a3xx, 2d array and 3d textures seem to want their
 821          * layers aligned to page boundaries:
 822          */
 823         switch (target) {
 824         case PIPE_TEXTURE_3D:
 825         case PIPE_TEXTURE_1D_ARRAY:
 826         case PIPE_TEXTURE_2D_ARRAY:
 827                 return 4096;
 828         default:
 829                 return 1;
 830         }
 831 }
 832
 833 /* cross generation texture layout to plug in to screen->setup_slices()..
 834  * replace with generation specific one as-needed.
 835  *
 836  * TODO for a4xx probably can extract out the a4xx specific logic int
 837  * a small fd4_setup_slices() wrapper that sets up layer_first, and then
 838  * calls this.
 839  */
 840 uint32_t
 841 fd_setup_slices(struct fd_resource *rsc)
 842 {
 843         uint32_t alignment;
 844
 845         alignment = slice_alignment(rsc->base.target);
 846
 847         struct fd_screen *screen = fd_screen(rsc->base.screen);
 848         if (is_a4xx(screen)) {
 849                 switch (rsc->base.target) {
 850                 case PIPE_TEXTURE_3D:
 851                         rsc->layer_first = false;
 852                         break;
 853                 default:
 854                         rsc->layer_first = true;
 855                         alignment = 1;
 856                         break;
 857                 }
 858         }
 859
 860         return setup_slices(rsc, alignment, rsc->base.format);
 861 }
 862
 863 /* special case to resize query buf after allocated.. */
 864 void
 865 fd_resource_resize(struct pipe_resource *prsc, uint32_t sz)
 866 {
 867         struct fd_resource *rsc = fd_resource(prsc);
 868
 869         debug_assert(prsc->width0 == 0);
 870         debug_assert(prsc->target == PIPE_BUFFER);
 871         debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
 872
 873         prsc->width0 = sz;
 874         realloc_bo(rsc, fd_screen(prsc->screen)->setup_slices(rsc));
 875 }
 876
 877 // TODO common helper?
 878 static bool
 879 has_depth(enum pipe_format format)
 880 {
 881         switch (format) {
 882         case PIPE_FORMAT_Z16_UNORM:
 883         case PIPE_FORMAT_Z32_UNORM:
 884         case PIPE_FORMAT_Z32_FLOAT:
 885         case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
 886         case PIPE_FORMAT_Z24_UNORM_S8_UINT:
 887         case PIPE_FORMAT_S8_UINT_Z24_UNORM:
 888         case PIPE_FORMAT_Z24X8_UNORM:
 889         case PIPE_FORMAT_X8Z24_UNORM:
 890                 return true;
 891         default:
 892                 return false;
 893         }
 894 }
 895
 896 /**
 897  * Create a new texture object, using the given template info.
 898  */
 899 static struct pipe_resource *
 900 fd_resource_create_with_modifiers(struct pipe_screen *pscreen,
 901                 const struct pipe_resource *tmpl,
 902                 const uint64_t *modifiers, int count)
 903 {
 904         struct fd_screen *screen = fd_screen(pscreen);
 905         struct fd_resource *rsc;
 906         struct pipe_resource *prsc;
 907         enum pipe_format format = tmpl->format;
 908         uint32_t size;
 909
 910         /* when using kmsro, scanout buffers are allocated on the display device
 911          * create_with_modifiers() doesn't give us usage flags, so we have to
 912          * assume that all calls with modifiers are scanout-possible
 913          */
 914         if (screen->ro &&
 915                 ((tmpl->bind & PIPE_BIND_SCANOUT) ||
 916                  !(count == 1 && modifiers[0] == DRM_FORMAT_MOD_INVALID))) {
 917                 struct pipe_resource scanout_templat = *tmpl;
 918                 struct renderonly_scanout *scanout;
 919                 struct winsys_handle handle;
 920
 921                 scanout = renderonly_scanout_for_resource(&scanout_templat,
 922                                                                                                   screen->ro, &handle);
 923                 if (!scanout)
 924                         return NULL;
 925
 926                 renderonly_scanout_destroy(scanout, screen->ro);
 927
 928                 assert(handle.type == WINSYS_HANDLE_TYPE_FD);
 929                 rsc = fd_resource(pscreen->resource_from_handle(pscreen, tmpl,
 930                                                                                                                 &handle,
 931                                                                                                                 PIPE_HANDLE_USAGE_FRAMEBUFFER_WRITE));
 932                 close(handle.handle);
 933                 if (!rsc)
 934                         return NULL;
 935
 936                 return &rsc->base;
 937         }
 938
 939         rsc = CALLOC_STRUCT(fd_resource);
 940         prsc = &rsc->base;
 941
 942         DBG("%p: target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
 943                         "nr_samples=%u, usage=%u, bind=%x, flags=%x", prsc,
 944                         tmpl->target, util_format_name(format),
 945                         tmpl->width0, tmpl->height0, tmpl->depth0,
 946                         tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
 947                         tmpl->usage, tmpl->bind, tmpl->flags);
 948
 949         if (!rsc)
 950                 return NULL;
 951
 952         *prsc = *tmpl;
 953
 954 #define LINEAR \
 955         (PIPE_BIND_SCANOUT | \
 956          PIPE_BIND_LINEAR  | \
 957          PIPE_BIND_DISPLAY_TARGET)
 958
 959         bool linear = drm_find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count);
 960         if (tmpl->bind & LINEAR)
 961                 linear = true;
 962
 963         /* Normally, for non-shared buffers, allow buffer compression if
 964          * not shared, otherwise only allow if QCOM_COMPRESSED modifier
 965          * is requested:
 966          *
 967          * TODO we should probably also limit tiled in a similar way,
 968          * except we don't have a format modifier for tiled.  (We probably
 969          * should.)
 970          */
 971         bool allow_ubwc = drm_find_modifier(DRM_FORMAT_MOD_INVALID, modifiers, count);
 972         if (tmpl->bind & PIPE_BIND_SHARED)
 973                 allow_ubwc = drm_find_modifier(DRM_FORMAT_MOD_QCOM_COMPRESSED, modifiers, count);
 974
 975         allow_ubwc &= !(fd_mesa_debug & FD_DBG_NOUBWC);
 976
 977         if (screen->tile_mode &&
 978                         (tmpl->target != PIPE_BUFFER) &&
 979                         !linear) {
 980                 rsc->tile_mode = screen->tile_mode(tmpl);
 981         }
 982
 983         pipe_reference_init(&prsc->reference, 1);
 984
 985         prsc->screen = pscreen;
 986
 987         util_range_init(&rsc->valid_buffer_range);
 988
 989         rsc->internal_format = format;
 990         rsc->cpp = util_format_get_blocksize(format);
 991         rsc->cpp *= fd_resource_nr_samples(prsc);
 992
 993         assert(rsc->cpp);
 994
 995         // XXX probably need some extra work if we hit rsc shadowing path w/ lrz..
 996         if ((is_a5xx(screen) || is_a6xx(screen)) &&
 997                  (fd_mesa_debug & FD_DBG_LRZ) && has_depth(format)) {
 998                 const uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
 999                                 DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */
1000                 unsigned lrz_pitch  = align(DIV_ROUND_UP(tmpl->width0, 8), 64);
1001                 unsigned lrz_height = DIV_ROUND_UP(tmpl->height0, 8);
1002
1003                 /* LRZ buffer is super-sampled: */
1004                 switch (prsc->nr_samples) {
1005                 case 4:
1006                         lrz_pitch *= 2;
1007                 case 2:
1008                         lrz_height *= 2;
1009                 }
1010
1011                 unsigned size = lrz_pitch * lrz_height * 2;
1012
1013                 size += 0x1000; /* for GRAS_LRZ_FAST_CLEAR_BUFFER */
1014
1015                 rsc->lrz_height = lrz_height;
1016                 rsc->lrz_width = lrz_pitch;
1017                 rsc->lrz_pitch = lrz_pitch;
1018                 rsc->lrz = fd_bo_new(screen->dev, size, flags, "lrz");
1019         }
1020
1021         size = screen->setup_slices(rsc);
1022
1023         if (allow_ubwc && screen->fill_ubwc_buffer_sizes && rsc->tile_mode)
1024                 size += screen->fill_ubwc_buffer_sizes(rsc);
1025
1026         /* special case for hw-query buffer, which we need to allocate before we
1027          * know the size:
1028          */
1029         if (size == 0) {
1030                 /* note, semi-intention == instead of & */
1031                 debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
1032                 return prsc;
1033         }
1034
1035         if (rsc->layer_first) {
1036                 rsc->layer_size = align(size, 4096);
1037                 size = rsc->layer_size * prsc->array_size;
1038         }
1039
1040         realloc_bo(rsc, size);
1041         if (!rsc->bo)
1042                 goto fail;
1043
1044         return prsc;
1045 fail:
1046         fd_resource_destroy(pscreen, prsc);
1047         return NULL;
1048 }
1049
1050 static struct pipe_resource *
1051 fd_resource_create(struct pipe_screen *pscreen,
1052                 const struct pipe_resource *tmpl)
1053 {
1054         const uint64_t mod = DRM_FORMAT_MOD_INVALID;
1055         return fd_resource_create_with_modifiers(pscreen, tmpl, &mod, 1);
1056 }
1057
1058 static bool
1059 is_supported_modifier(struct pipe_screen *pscreen, enum pipe_format pfmt,
1060                 uint64_t mod)
1061 {
1062         int count;
1063
1064         /* Get the count of supported modifiers: */
1065         pscreen->query_dmabuf_modifiers(pscreen, pfmt, 0, NULL, NULL, &count);
1066
1067         /* Get the supported modifiers: */
1068         uint64_t modifiers[count];
1069         pscreen->query_dmabuf_modifiers(pscreen, pfmt, count, modifiers, NULL, &count);
1070
1071         for (int i = 0; i < count; i++)
1072                 if (modifiers[i] == mod)
1073                         return true;
1074
1075         return false;
1076 }
1077
1078 /**
1079  * Create a texture from a winsys_handle. The handle is often created in
1080  * another process by first creating a pipe texture and then calling
1081  * resource_get_handle.
1082  */
1083 static struct pipe_resource *
1084 fd_resource_from_handle(struct pipe_screen *pscreen,
1085                 const struct pipe_resource *tmpl,
1086                 struct winsys_handle *handle, unsigned usage)
1087 {
1088         struct fd_screen *screen = fd_screen(pscreen);
1089         struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
1090         struct fd_resource_slice *slice = &rsc->slices[0];
1091         struct pipe_resource *prsc = &rsc->base;
1092         uint32_t pitchalign = fd_screen(pscreen)->gmem_alignw;
1093
1094         DBG("target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
1095                         "nr_samples=%u, usage=%u, bind=%x, flags=%x",
1096                         tmpl->target, util_format_name(tmpl->format),
1097                         tmpl->width0, tmpl->height0, tmpl->depth0,
1098                         tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
1099                         tmpl->usage, tmpl->bind, tmpl->flags);
1100
1101         if (!rsc)
1102                 return NULL;
1103
1104         *prsc = *tmpl;
1105
1106         pipe_reference_init(&prsc->reference, 1);
1107
1108         prsc->screen = pscreen;
1109
1110         util_range_init(&rsc->valid_buffer_range);
1111
1112         rsc->bo = fd_screen_bo_from_handle(pscreen, handle);
1113         if (!rsc->bo)
1114                 goto fail;
1115
1116         rsc->internal_format = tmpl->format;
1117         rsc->cpp = util_format_get_blocksize(tmpl->format);
1118         rsc->cpp *= fd_resource_nr_samples(prsc);
1119         slice->pitch = handle->stride / rsc->cpp;
1120         slice->offset = handle->offset;
1121         slice->size0 = handle->stride * prsc->height0;
1122
1123         if ((slice->pitch < align(prsc->width0, pitchalign)) ||
1124                         (slice->pitch & (pitchalign - 1)))
1125                 goto fail;
1126
1127         if (handle->modifier == DRM_FORMAT_MOD_QCOM_COMPRESSED) {
1128                 if (!is_supported_modifier(pscreen, tmpl->format,
1129                                 DRM_FORMAT_MOD_QCOM_COMPRESSED)) {
1130                         DBG("bad modifier: %"PRIx64, handle->modifier);
1131                         goto fail;
1132                 }
1133                 debug_assert(screen->fill_ubwc_buffer_sizes);
1134                 screen->fill_ubwc_buffer_sizes(rsc);
1135         } else if (handle->modifier &&
1136                         (handle->modifier != DRM_FORMAT_MOD_INVALID)) {
1137                 goto fail;
1138         }
1139
1140         assert(rsc->cpp);
1141
1142         if (screen->ro) {
1143                 rsc->scanout =
1144                         renderonly_create_gpu_import_for_resource(prsc, screen->ro, NULL);
1145                 /* failure is expected in some cases.. */
1146         }
1147
1148         rsc->valid = true;
1149
1150         return prsc;
1151
1152 fail:
1153         fd_resource_destroy(pscreen, prsc);
1154         return NULL;
1155 }
1156
1157 bool
1158 fd_render_condition_check(struct pipe_context *pctx)
1159 {
1160         struct fd_context *ctx = fd_context(pctx);
1161
1162         if (!ctx->cond_query)
1163                 return true;
1164
1165         union pipe_query_result res = { 0 };
1166         bool wait =
1167                 ctx->cond_mode != PIPE_RENDER_COND_NO_WAIT &&
1168                 ctx->cond_mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
1169
1170         if (pctx->get_query_result(pctx, ctx->cond_query, wait, &res))
1171                         return (bool)res.u64 != ctx->cond_cond;
1172
1173         return true;
1174 }
1175
1176 static void
1177 fd_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
1178 {
1179         struct fd_context *ctx = fd_context(pctx);
1180         struct fd_resource *rsc = fd_resource(prsc);
1181
1182         /*
1183          * TODO I guess we could track that the resource is invalidated and
1184          * use that as a hint to realloc rather than stall in _transfer_map(),
1185          * even in the non-DISCARD_WHOLE_RESOURCE case?
1186          *
1187          * Note: we set dirty bits to trigger invalidate logic fd_draw_vbo
1188          */
1189
1190         if (rsc->write_batch) {
1191                 struct fd_batch *batch = rsc->write_batch;
1192                 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1193
1194                 if (pfb->zsbuf && pfb->zsbuf->texture == prsc) {
1195                         batch->resolve &= ~(FD_BUFFER_DEPTH | FD_BUFFER_STENCIL);
1196                         ctx->dirty |= FD_DIRTY_ZSA;
1197                 }
1198
1199                 for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
1200                         if (pfb->cbufs[i] && pfb->cbufs[i]->texture == prsc) {
1201                                 batch->resolve &= ~(PIPE_CLEAR_COLOR0 << i);
1202                                 ctx->dirty |= FD_DIRTY_FRAMEBUFFER;
1203                         }
1204                 }
1205         }
1206
1207         rsc->valid = false;
1208 }
1209
1210 static enum pipe_format
1211 fd_resource_get_internal_format(struct pipe_resource *prsc)
1212 {
1213         return fd_resource(prsc)->internal_format;
1214 }
1215
1216 static void
1217 fd_resource_set_stencil(struct pipe_resource *prsc,
1218                 struct pipe_resource *stencil)
1219 {
1220         fd_resource(prsc)->stencil = fd_resource(stencil);
1221 }
1222
1223 static struct pipe_resource *
1224 fd_resource_get_stencil(struct pipe_resource *prsc)
1225 {
1226         struct fd_resource *rsc = fd_resource(prsc);
1227         if (rsc->stencil)
1228                 return &rsc->stencil->base;
1229         return NULL;
1230 }
1231
1232 static const struct u_transfer_vtbl transfer_vtbl = {
1233                 .resource_create          = fd_resource_create,
1234                 .resource_destroy         = fd_resource_destroy,
1235                 .transfer_map             = fd_resource_transfer_map,
1236                 .transfer_flush_region    = fd_resource_transfer_flush_region,
1237                 .transfer_unmap           = fd_resource_transfer_unmap,
1238                 .get_internal_format      = fd_resource_get_internal_format,
1239                 .set_stencil              = fd_resource_set_stencil,
1240                 .get_stencil              = fd_resource_get_stencil,
1241 };
1242
1243 void
1244 fd_resource_screen_init(struct pipe_screen *pscreen)
1245 {
1246         struct fd_screen *screen = fd_screen(pscreen);
1247         bool fake_rgtc = screen->gpu_id < 400;
1248
1249         pscreen->resource_create = u_transfer_helper_resource_create;
1250         /* NOTE: u_transfer_helper does not yet support the _with_modifiers()
1251          * variant:
1252          */
1253         pscreen->resource_create_with_modifiers = fd_resource_create_with_modifiers;
1254         pscreen->resource_from_handle = fd_resource_from_handle;
1255         pscreen->resource_get_handle = fd_resource_get_handle;
1256         pscreen->resource_destroy = u_transfer_helper_resource_destroy;
1257
1258         pscreen->transfer_helper = u_transfer_helper_create(&transfer_vtbl,
1259                         true, false, fake_rgtc, true);
1260
1261         if (!screen->setup_slices)
1262                 screen->setup_slices = fd_setup_slices;
1263 }
1264
1265 static void
1266 fd_get_sample_position(struct pipe_context *context,
1267                          unsigned sample_count, unsigned sample_index,
1268                          float *pos_out)
1269 {
1270         /* The following is copied from nouveau/nv50 except for position
1271          * values, which are taken from blob driver */
1272         static const uint8_t pos1[1][2] = { { 0x8, 0x8 } };
1273         static const uint8_t pos2[2][2] = {
1274                 { 0xc, 0xc }, { 0x4, 0x4 } };
1275         static const uint8_t pos4[4][2] = {
1276                 { 0x6, 0x2 }, { 0xe, 0x6 },
1277                 { 0x2, 0xa }, { 0xa, 0xe } };
1278         /* TODO needs to be verified on supported hw */
1279         static const uint8_t pos8[8][2] = {
1280                 { 0x9, 0x5 }, { 0x7, 0xb },
1281                 { 0xd, 0x9 }, { 0x5, 0x3 },
1282                 { 0x3, 0xd }, { 0x1, 0x7 },
1283                 { 0xb, 0xf }, { 0xf, 0x1 } };
1284
1285         const uint8_t (*ptr)[2];
1286
1287         switch (sample_count) {
1288         case 1:
1289                 ptr = pos1;
1290                 break;
1291         case 2:
1292                 ptr = pos2;
1293                 break;
1294         case 4:
1295                 ptr = pos4;
1296                 break;
1297         case 8:
1298                 ptr = pos8;
1299                 break;
1300         default:
1301                 assert(0);
1302                 return;
1303         }
1304
1305         pos_out[0] = ptr[sample_index][0] / 16.0f;
1306         pos_out[1] = ptr[sample_index][1] / 16.0f;
1307 }
1308
1309 static void
1310 fd_blit_pipe(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
1311 {
1312         /* wrap fd_blit to return void */
1313         fd_blit(pctx, blit_info);
1314 }
1315
1316 void
1317 fd_resource_context_init(struct pipe_context *pctx)
1318 {
1319         pctx->transfer_map = u_transfer_helper_transfer_map;
1320         pctx->transfer_flush_region = u_transfer_helper_transfer_flush_region;
1321         pctx->transfer_unmap = u_transfer_helper_transfer_unmap;
1322         pctx->buffer_subdata = u_default_buffer_subdata;
1323         pctx->texture_subdata = u_default_texture_subdata;
1324         pctx->create_surface = fd_create_surface;
1325         pctx->surface_destroy = fd_surface_destroy;
1326         pctx->resource_copy_region = fd_resource_copy_region;
1327         pctx->blit = fd_blit_pipe;
1328         pctx->flush_resource = fd_flush_resource;
1329         pctx->invalidate_resource = fd_invalidate_resource;
1330         pctx->get_sample_position = fd_get_sample_position;
1331 }