src/gallium/drivers/freedreno/freedreno_resource.c

   1 /*
   2  * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  *
  23  * Authors:
  24  *    Rob Clark <robclark@freedesktop.org>
  25  */
  26
  27 #include "util/u_format.h"
  28 #include "util/u_format_rgtc.h"
  29 #include "util/u_format_zs.h"
  30 #include "util/u_inlines.h"
  31 #include "util/u_transfer.h"
  32 #include "util/u_string.h"
  33 #include "util/u_surface.h"
  34 #include "util/set.h"
  35 #include "util/u_drm.h"
  36
  37 #include "freedreno_resource.h"
  38 #include "freedreno_batch_cache.h"
  39 #include "freedreno_blitter.h"
  40 #include "freedreno_fence.h"
  41 #include "freedreno_screen.h"
  42 #include "freedreno_surface.h"
  43 #include "freedreno_context.h"
  44 #include "freedreno_query_hw.h"
  45 #include "freedreno_util.h"
  46
  47 #include "drm-uapi/drm_fourcc.h"
  48 #include <errno.h>
  49
  50 /* XXX this should go away, needed for 'struct winsys_handle' */
  51 #include "state_tracker/drm_driver.h"
  52
  53 /**
  54  * Go through the entire state and see if the resource is bound
  55  * anywhere. If it is, mark the relevant state as dirty. This is
  56  * called on realloc_bo to ensure the neccessary state is re-
  57  * emitted so the GPU looks at the new backing bo.
  58  */
  59 static void
  60 rebind_resource(struct fd_context *ctx, struct pipe_resource *prsc)
  61 {
  62         /* VBOs */
  63         for (unsigned i = 0; i < ctx->vtx.vertexbuf.count && !(ctx->dirty & FD_DIRTY_VTXBUF); i++) {
  64                 if (ctx->vtx.vertexbuf.vb[i].buffer.resource == prsc)
  65                         ctx->dirty |= FD_DIRTY_VTXBUF;
  66         }
  67
  68         /* per-shader-stage resources: */
  69         for (unsigned stage = 0; stage < PIPE_SHADER_TYPES; stage++) {
  70                 /* Constbufs.. note that constbuf[0] is normal uniforms emitted in
  71                  * cmdstream rather than by pointer..
  72                  */
  73                 const unsigned num_ubos = util_last_bit(ctx->constbuf[stage].enabled_mask);
  74                 for (unsigned i = 1; i < num_ubos; i++) {
  75                         if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_CONST)
  76                                 break;
  77                         if (ctx->constbuf[stage].cb[i].buffer == prsc)
  78                                 ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_CONST;
  79                 }
  80
  81                 /* Textures */
  82                 for (unsigned i = 0; i < ctx->tex[stage].num_textures; i++) {
  83                         if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_TEX)
  84                                 break;
  85                         if (ctx->tex[stage].textures[i] && (ctx->tex[stage].textures[i]->texture == prsc))
  86                                 ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_TEX;
  87                 }
  88
  89                 /* SSBOs */
  90                 const unsigned num_ssbos = util_last_bit(ctx->shaderbuf[stage].enabled_mask);
  91                 for (unsigned i = 0; i < num_ssbos; i++) {
  92                         if (ctx->dirty_shader[stage] & FD_DIRTY_SHADER_SSBO)
  93                                 break;
  94                         if (ctx->shaderbuf[stage].sb[i].buffer == prsc)
  95                                 ctx->dirty_shader[stage] |= FD_DIRTY_SHADER_SSBO;
  96                 }
  97         }
  98 }
  99
 100 static void
 101 realloc_bo(struct fd_resource *rsc, uint32_t size)
 102 {
 103         struct pipe_resource *prsc = &rsc->base;
 104         struct fd_screen *screen = fd_screen(rsc->base.screen);
 105         uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
 106                         DRM_FREEDRENO_GEM_TYPE_KMEM |
 107                         COND(prsc->bind & PIPE_BIND_SCANOUT, DRM_FREEDRENO_GEM_SCANOUT);
 108                         /* TODO other flags? */
 109
 110         /* if we start using things other than write-combine,
 111          * be sure to check for PIPE_RESOURCE_FLAG_MAP_COHERENT
 112          */
 113
 114         if (rsc->bo)
 115                 fd_bo_del(rsc->bo);
 116
 117         rsc->bo = fd_bo_new(screen->dev, size, flags, "%ux%ux%u@%u:%x",
 118                         prsc->width0, prsc->height0, prsc->depth0, rsc->cpp, prsc->bind);
 119         rsc->seqno = p_atomic_inc_return(&screen->rsc_seqno);
 120         util_range_set_empty(&rsc->valid_buffer_range);
 121         fd_bc_invalidate_resource(rsc, true);
 122 }
 123
 124 static void
 125 do_blit(struct fd_context *ctx, const struct pipe_blit_info *blit, bool fallback)
 126 {
 127         struct pipe_context *pctx = &ctx->base;
 128
 129         /* TODO size threshold too?? */
 130         if (fallback || !fd_blit(pctx, blit)) {
 131                 /* do blit on cpu: */
 132                 util_resource_copy_region(pctx,
 133                                 blit->dst.resource, blit->dst.level, blit->dst.box.x,
 134                                 blit->dst.box.y, blit->dst.box.z,
 135                                 blit->src.resource, blit->src.level, &blit->src.box);
 136         }
 137 }
 138
 139 static bool
 140 fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc,
 141                 unsigned level, const struct pipe_box *box)
 142 {
 143         struct pipe_context *pctx = &ctx->base;
 144         struct pipe_resource *prsc = &rsc->base;
 145         bool fallback = false;
 146
 147         if (prsc->next)
 148                 return false;
 149
 150         /* TODO: somehow munge dimensions and format to copy unsupported
 151          * render target format to something that is supported?
 152          */
 153         if (!pctx->screen->is_format_supported(pctx->screen,
 154                         prsc->format, prsc->target, prsc->nr_samples,
 155                         prsc->nr_storage_samples,
 156                         PIPE_BIND_RENDER_TARGET))
 157                 fallback = true;
 158
 159         /* do shadowing back-blits on the cpu for buffers: */
 160         if (prsc->target == PIPE_BUFFER)
 161                 fallback = true;
 162
 163         bool whole_level = util_texrange_covers_whole_level(prsc, level,
 164                 box->x, box->y, box->z, box->width, box->height, box->depth);
 165
 166         /* TODO need to be more clever about current level */
 167         if ((prsc->target >= PIPE_TEXTURE_2D) && !whole_level)
 168                 return false;
 169
 170         struct pipe_resource *pshadow =
 171                 pctx->screen->resource_create(pctx->screen, prsc);
 172
 173         if (!pshadow)
 174                 return false;
 175
 176         assert(!ctx->in_shadow);
 177         ctx->in_shadow = true;
 178
 179         /* get rid of any references that batch-cache might have to us (which
 180          * should empty/destroy rsc->batches hashset)
 181          */
 182         fd_bc_invalidate_resource(rsc, false);
 183
 184         mtx_lock(&ctx->screen->lock);
 185
 186         /* Swap the backing bo's, so shadow becomes the old buffer,
 187          * blit from shadow to new buffer.  From here on out, we
 188          * cannot fail.
 189          *
 190          * Note that we need to do it in this order, otherwise if
 191          * we go down cpu blit path, the recursive transfer_map()
 192          * sees the wrong status..
 193          */
 194         struct fd_resource *shadow = fd_resource(pshadow);
 195
 196         DBG("shadow: %p (%d) -> %p (%d)\n", rsc, rsc->base.reference.count,
 197                         shadow, shadow->base.reference.count);
 198
 199         /* TODO valid_buffer_range?? */
 200         swap(rsc->bo,        shadow->bo);
 201         swap(rsc->write_batch,   shadow->write_batch);
 202         rsc->seqno = p_atomic_inc_return(&ctx->screen->rsc_seqno);
 203
 204         /* at this point, the newly created shadow buffer is not referenced
 205          * by any batches, but the existing rsc (probably) is.  We need to
 206          * transfer those references over:
 207          */
 208         debug_assert(shadow->batch_mask == 0);
 209         struct fd_batch *batch;
 210         foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) {
 211                 struct set_entry *entry = _mesa_set_search(batch->resources, rsc);
 212                 _mesa_set_remove(batch->resources, entry);
 213                 _mesa_set_add(batch->resources, shadow);
 214         }
 215         swap(rsc->batch_mask, shadow->batch_mask);
 216
 217         mtx_unlock(&ctx->screen->lock);
 218
 219         struct pipe_blit_info blit = {};
 220         blit.dst.resource = prsc;
 221         blit.dst.format   = prsc->format;
 222         blit.src.resource = pshadow;
 223         blit.src.format   = pshadow->format;
 224         blit.mask = util_format_get_mask(prsc->format);
 225         blit.filter = PIPE_TEX_FILTER_NEAREST;
 226
 227 #define set_box(field, val) do {     \
 228                 blit.dst.field = (val);      \
 229                 blit.src.field = (val);      \
 230         } while (0)
 231
 232         /* blit the other levels in their entirety: */
 233         for (unsigned l = 0; l <= prsc->last_level; l++) {
 234                 if (l == level)
 235                         continue;
 236
 237                 /* just blit whole level: */
 238                 set_box(level, l);
 239                 set_box(box.width,  u_minify(prsc->width0, l));
 240                 set_box(box.height, u_minify(prsc->height0, l));
 241                 set_box(box.depth,  u_minify(prsc->depth0, l));
 242
 243                 do_blit(ctx, &blit, fallback);
 244         }
 245
 246         /* deal w/ current level specially, since we might need to split
 247          * it up into a couple blits:
 248          */
 249         if (!whole_level) {
 250                 set_box(level, level);
 251
 252                 switch (prsc->target) {
 253                 case PIPE_BUFFER:
 254                 case PIPE_TEXTURE_1D:
 255                         set_box(box.y, 0);
 256                         set_box(box.z, 0);
 257                         set_box(box.height, 1);
 258                         set_box(box.depth, 1);
 259
 260                         if (box->x > 0) {
 261                                 set_box(box.x, 0);
 262                                 set_box(box.width, box->x);
 263
 264                                 do_blit(ctx, &blit, fallback);
 265                         }
 266                         if ((box->x + box->width) < u_minify(prsc->width0, level)) {
 267                                 set_box(box.x, box->x + box->width);
 268                                 set_box(box.width, u_minify(prsc->width0, level) - (box->x + box->width));
 269
 270                                 do_blit(ctx, &blit, fallback);
 271                         }
 272                         break;
 273                 case PIPE_TEXTURE_2D:
 274                         /* TODO */
 275                 default:
 276                         unreachable("TODO");
 277                 }
 278         }
 279
 280         ctx->in_shadow = false;
 281
 282         pipe_resource_reference(&pshadow, NULL);
 283
 284         return true;
 285 }
 286
 287 static struct fd_resource *
 288 fd_alloc_staging(struct fd_context *ctx, struct fd_resource *rsc,
 289                 unsigned level, const struct pipe_box *box)
 290 {
 291         struct pipe_context *pctx = &ctx->base;
 292         struct pipe_resource tmpl = rsc->base;
 293
 294         tmpl.width0  = box->width;
 295         tmpl.height0 = box->height;
 296         /* for array textures, box->depth is the array_size, otherwise
 297          * for 3d textures, it is the depth:
 298          */
 299         if (tmpl.array_size > 1) {
 300                 tmpl.array_size = box->depth;
 301                 tmpl.depth0 = 1;
 302         } else {
 303                 tmpl.array_size = 1;
 304                 tmpl.depth0 = box->depth;
 305         }
 306         tmpl.last_level = 0;
 307         tmpl.bind |= PIPE_BIND_LINEAR;
 308
 309         struct pipe_resource *pstaging =
 310                 pctx->screen->resource_create(pctx->screen, &tmpl);
 311         if (!pstaging)
 312                 return NULL;
 313
 314         return fd_resource(pstaging);
 315 }
 316
 317 static void
 318 fd_blit_from_staging(struct fd_context *ctx, struct fd_transfer *trans)
 319 {
 320         struct pipe_resource *dst = trans->base.resource;
 321         struct pipe_blit_info blit = {};
 322
 323         blit.dst.resource = dst;
 324         blit.dst.format   = dst->format;
 325         blit.dst.level    = trans->base.level;
 326         blit.dst.box      = trans->base.box;
 327         blit.src.resource = trans->staging_prsc;
 328         blit.src.format   = trans->staging_prsc->format;
 329         blit.src.level    = 0;
 330         blit.src.box      = trans->staging_box;
 331         blit.mask = util_format_get_mask(trans->staging_prsc->format);
 332         blit.filter = PIPE_TEX_FILTER_NEAREST;
 333
 334         do_blit(ctx, &blit, false);
 335 }
 336
 337 static void
 338 fd_blit_to_staging(struct fd_context *ctx, struct fd_transfer *trans)
 339 {
 340         struct pipe_resource *src = trans->base.resource;
 341         struct pipe_blit_info blit = {};
 342
 343         blit.src.resource = src;
 344         blit.src.format   = src->format;
 345         blit.src.level    = trans->base.level;
 346         blit.src.box      = trans->base.box;
 347         blit.dst.resource = trans->staging_prsc;
 348         blit.dst.format   = trans->staging_prsc->format;
 349         blit.dst.level    = 0;
 350         blit.dst.box      = trans->staging_box;
 351         blit.mask = util_format_get_mask(trans->staging_prsc->format);
 352         blit.filter = PIPE_TEX_FILTER_NEAREST;
 353
 354         do_blit(ctx, &blit, false);
 355 }
 356
 357 static void fd_resource_transfer_flush_region(struct pipe_context *pctx,
 358                 struct pipe_transfer *ptrans,
 359                 const struct pipe_box *box)
 360 {
 361         struct fd_resource *rsc = fd_resource(ptrans->resource);
 362
 363         if (ptrans->resource->target == PIPE_BUFFER)
 364                 util_range_add(&rsc->valid_buffer_range,
 365                                            ptrans->box.x + box->x,
 366                                            ptrans->box.x + box->x + box->width);
 367 }
 368
 369 static void
 370 flush_resource(struct fd_context *ctx, struct fd_resource *rsc, unsigned usage)
 371 {
 372         struct fd_batch *write_batch = NULL;
 373
 374         mtx_lock(&ctx->screen->lock);
 375         fd_batch_reference_locked(&write_batch, rsc->write_batch);
 376         mtx_unlock(&ctx->screen->lock);
 377
 378         if (usage & PIPE_TRANSFER_WRITE) {
 379                 struct fd_batch *batch, *batches[32] = {};
 380                 uint32_t batch_mask;
 381
 382                 /* This is a bit awkward, probably a fd_batch_flush_locked()
 383                  * would make things simpler.. but we need to hold the lock
 384                  * to iterate the batches which reference this resource.  So
 385                  * we must first grab references under a lock, then flush.
 386                  */
 387                 mtx_lock(&ctx->screen->lock);
 388                 batch_mask = rsc->batch_mask;
 389                 foreach_batch(batch, &ctx->screen->batch_cache, batch_mask)
 390                         fd_batch_reference_locked(&batches[batch->idx], batch);
 391                 mtx_unlock(&ctx->screen->lock);
 392
 393                 foreach_batch(batch, &ctx->screen->batch_cache, batch_mask)
 394                         fd_batch_flush(batch, false, false);
 395
 396                 foreach_batch(batch, &ctx->screen->batch_cache, batch_mask) {
 397                         fd_batch_sync(batch);
 398                         fd_batch_reference(&batches[batch->idx], NULL);
 399                 }
 400                 assert(rsc->batch_mask == 0);
 401         } else if (write_batch) {
 402                 fd_batch_flush(write_batch, true, false);
 403         }
 404
 405         fd_batch_reference(&write_batch, NULL);
 406
 407         assert(!rsc->write_batch);
 408 }
 409
 410 static void
 411 fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
 412 {
 413         flush_resource(fd_context(pctx), fd_resource(prsc), PIPE_TRANSFER_READ);
 414 }
 415
 416 static void
 417 fd_resource_transfer_unmap(struct pipe_context *pctx,
 418                 struct pipe_transfer *ptrans)
 419 {
 420         struct fd_context *ctx = fd_context(pctx);
 421         struct fd_resource *rsc = fd_resource(ptrans->resource);
 422         struct fd_transfer *trans = fd_transfer(ptrans);
 423
 424         if (trans->staging_prsc) {
 425                 if (ptrans->usage & PIPE_TRANSFER_WRITE)
 426                         fd_blit_from_staging(ctx, trans);
 427                 pipe_resource_reference(&trans->staging_prsc, NULL);
 428         }
 429
 430         if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
 431                 fd_bo_cpu_fini(rsc->bo);
 432         }
 433
 434         util_range_add(&rsc->valid_buffer_range,
 435                                    ptrans->box.x,
 436                                    ptrans->box.x + ptrans->box.width);
 437
 438         pipe_resource_reference(&ptrans->resource, NULL);
 439         slab_free(&ctx->transfer_pool, ptrans);
 440 }
 441
 442 static void *
 443 fd_resource_transfer_map(struct pipe_context *pctx,
 444                 struct pipe_resource *prsc,
 445                 unsigned level, unsigned usage,
 446                 const struct pipe_box *box,
 447                 struct pipe_transfer **pptrans)
 448 {
 449         struct fd_context *ctx = fd_context(pctx);
 450         struct fd_resource *rsc = fd_resource(prsc);
 451         struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
 452         struct fd_transfer *trans;
 453         struct pipe_transfer *ptrans;
 454         enum pipe_format format = prsc->format;
 455         uint32_t op = 0;
 456         uint32_t offset;
 457         char *buf;
 458         int ret = 0;
 459
 460         DBG("prsc=%p, level=%u, usage=%x, box=%dx%d+%d,%d", prsc, level, usage,
 461                 box->width, box->height, box->x, box->y);
 462
 463         ptrans = slab_alloc(&ctx->transfer_pool);
 464         if (!ptrans)
 465                 return NULL;
 466
 467         /* slab_alloc_st() doesn't zero: */
 468         trans = fd_transfer(ptrans);
 469         memset(trans, 0, sizeof(*trans));
 470
 471         pipe_resource_reference(&ptrans->resource, prsc);
 472         ptrans->level = level;
 473         ptrans->usage = usage;
 474         ptrans->box = *box;
 475         ptrans->stride = util_format_get_nblocksx(format, slice->pitch) * rsc->cpp;
 476         ptrans->layer_stride = rsc->layer_first ? rsc->layer_size : slice->size0;
 477
 478         /* we always need a staging texture for tiled buffers:
 479          *
 480          * TODO we might sometimes want to *also* shadow the resource to avoid
 481          * splitting a batch.. for ex, mid-frame texture uploads to a tiled
 482          * texture.
 483          */
 484         if (rsc->tile_mode) {
 485                 struct fd_resource *staging_rsc;
 486
 487                 staging_rsc = fd_alloc_staging(ctx, rsc, level, box);
 488                 if (staging_rsc) {
 489                         // TODO for PIPE_TRANSFER_READ, need to do untiling blit..
 490                         trans->staging_prsc = &staging_rsc->base;
 491                         trans->base.stride = util_format_get_nblocksx(format,
 492                                 staging_rsc->slices[0].pitch) * staging_rsc->cpp;
 493                         trans->base.layer_stride = staging_rsc->layer_first ?
 494                                 staging_rsc->layer_size : staging_rsc->slices[0].size0;
 495                         trans->staging_box = *box;
 496                         trans->staging_box.x = 0;
 497                         trans->staging_box.y = 0;
 498                         trans->staging_box.z = 0;
 499
 500                         if (usage & PIPE_TRANSFER_READ) {
 501                                 fd_blit_to_staging(ctx, trans);
 502
 503                                 struct fd_batch *batch = NULL;
 504
 505                                 fd_context_lock(ctx);
 506                                 fd_batch_reference_locked(&batch, staging_rsc->write_batch);
 507                                 fd_context_unlock(ctx);
 508
 509                                 /* we can't fd_bo_cpu_prep() until the blit to staging
 510                                  * is submitted to kernel.. in that case write_batch
 511                                  * wouldn't be NULL yet:
 512                                  */
 513                                 if (batch) {
 514                                         fd_batch_sync(batch);
 515                                         fd_batch_reference(&batch, NULL);
 516                                 }
 517
 518                                 fd_bo_cpu_prep(staging_rsc->bo, ctx->pipe,
 519                                                 DRM_FREEDRENO_PREP_READ);
 520                         }
 521
 522                         buf = fd_bo_map(staging_rsc->bo);
 523                         offset = 0;
 524
 525                         *pptrans = ptrans;
 526
 527                         ctx->stats.staging_uploads++;
 528
 529                         return buf;
 530                 }
 531         }
 532
 533         if (ctx->in_shadow && !(usage & PIPE_TRANSFER_READ))
 534                 usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
 535
 536         if (usage & PIPE_TRANSFER_READ)
 537                 op |= DRM_FREEDRENO_PREP_READ;
 538
 539         if (usage & PIPE_TRANSFER_WRITE)
 540                 op |= DRM_FREEDRENO_PREP_WRITE;
 541
 542         if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
 543                 realloc_bo(rsc, fd_bo_size(rsc->bo));
 544                 rebind_resource(ctx, prsc);
 545         } else if ((usage & PIPE_TRANSFER_WRITE) &&
 546                            prsc->target == PIPE_BUFFER &&
 547                            !util_ranges_intersect(&rsc->valid_buffer_range,
 548                                                                           box->x, box->x + box->width)) {
 549                 /* We are trying to write to a previously uninitialized range. No need
 550                  * to wait.
 551                  */
 552         } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
 553                 struct fd_batch *write_batch = NULL;
 554
 555                 /* hold a reference, so it doesn't disappear under us: */
 556                 fd_context_lock(ctx);
 557                 fd_batch_reference_locked(&write_batch, rsc->write_batch);
 558                 fd_context_unlock(ctx);
 559
 560                 if ((usage & PIPE_TRANSFER_WRITE) && write_batch &&
 561                                 write_batch->back_blit) {
 562                         /* if only thing pending is a back-blit, we can discard it: */
 563                         fd_batch_reset(write_batch);
 564                 }
 565
 566                 /* If the GPU is writing to the resource, or if it is reading from the
 567                  * resource and we're trying to write to it, flush the renders.
 568                  */
 569                 bool needs_flush = pending(rsc, !!(usage & PIPE_TRANSFER_WRITE));
 570                 bool busy = needs_flush || (0 != fd_bo_cpu_prep(rsc->bo,
 571                                 ctx->pipe, op | DRM_FREEDRENO_PREP_NOSYNC));
 572
 573                 /* if we need to flush/stall, see if we can make a shadow buffer
 574                  * to avoid this:
 575                  *
 576                  * TODO we could go down this path !reorder && !busy_for_read
 577                  * ie. we only *don't* want to go down this path if the blit
 578                  * will trigger a flush!
 579                  */
 580                 if (ctx->screen->reorder && busy && !(usage & PIPE_TRANSFER_READ) &&
 581                                 (usage & PIPE_TRANSFER_DISCARD_RANGE)) {
 582                         /* try shadowing only if it avoids a flush, otherwise staging would
 583                          * be better:
 584                          */
 585                         if (needs_flush && fd_try_shadow_resource(ctx, rsc, level, box)) {
 586                                 needs_flush = busy = false;
 587                                 rebind_resource(ctx, prsc);
 588                                 ctx->stats.shadow_uploads++;
 589                         } else {
 590                                 struct fd_resource *staging_rsc;
 591
 592                                 if (needs_flush) {
 593                                         flush_resource(ctx, rsc, usage);
 594                                         needs_flush = false;
 595                                 }
 596
 597                                 /* in this case, we don't need to shadow the whole resource,
 598                                  * since any draw that references the previous contents has
 599                                  * already had rendering flushed for all tiles.  So we can
 600                                  * use a staging buffer to do the upload.
 601                                  */
 602                                 staging_rsc = fd_alloc_staging(ctx, rsc, level, box);
 603                                 if (staging_rsc) {
 604                                         trans->staging_prsc = &staging_rsc->base;
 605                                         trans->base.stride = util_format_get_nblocksx(format,
 606                                                 staging_rsc->slices[0].pitch) * staging_rsc->cpp;
 607                                         trans->base.layer_stride = staging_rsc->layer_first ?
 608                                                 staging_rsc->layer_size : staging_rsc->slices[0].size0;
 609                                         trans->staging_box = *box;
 610                                         trans->staging_box.x = 0;
 611                                         trans->staging_box.y = 0;
 612                                         trans->staging_box.z = 0;
 613                                         buf = fd_bo_map(staging_rsc->bo);
 614                                         offset = 0;
 615
 616                                         *pptrans = ptrans;
 617
 618                                         fd_batch_reference(&write_batch, NULL);
 619
 620                                         ctx->stats.staging_uploads++;
 621
 622                                         return buf;
 623                                 }
 624                         }
 625                 }
 626
 627                 if (needs_flush) {
 628                         flush_resource(ctx, rsc, usage);
 629                         needs_flush = false;
 630                 }
 631
 632                 fd_batch_reference(&write_batch, NULL);
 633
 634                 /* The GPU keeps track of how the various bo's are being used, and
 635                  * will wait if necessary for the proper operation to have
 636                  * completed.
 637                  */
 638                 if (busy) {
 639                         ret = fd_bo_cpu_prep(rsc->bo, ctx->pipe, op);
 640                         if (ret)
 641                                 goto fail;
 642                 }
 643         }
 644
 645         buf = fd_bo_map(rsc->bo);
 646         offset =
 647                 box->y / util_format_get_blockheight(format) * ptrans->stride +
 648                 box->x / util_format_get_blockwidth(format) * rsc->cpp +
 649                 fd_resource_offset(rsc, level, box->z);
 650
 651         if (usage & PIPE_TRANSFER_WRITE)
 652                 rsc->valid = true;
 653
 654         *pptrans = ptrans;
 655
 656         return buf + offset;
 657
 658 fail:
 659         fd_resource_transfer_unmap(pctx, ptrans);
 660         return NULL;
 661 }
 662
 663 static void
 664 fd_resource_destroy(struct pipe_screen *pscreen,
 665                 struct pipe_resource *prsc)
 666 {
 667         struct fd_resource *rsc = fd_resource(prsc);
 668         fd_bc_invalidate_resource(rsc, true);
 669         if (rsc->bo)
 670                 fd_bo_del(rsc->bo);
 671         if (rsc->scanout)
 672                 renderonly_scanout_destroy(rsc->scanout, fd_screen(pscreen)->ro);
 673
 674         util_range_destroy(&rsc->valid_buffer_range);
 675         FREE(rsc);
 676 }
 677
 678 static uint64_t
 679 fd_resource_modifier(struct fd_resource *rsc)
 680 {
 681         if (!rsc->tile_mode)
 682                 return DRM_FORMAT_MOD_LINEAR;
 683
 684         if (rsc->ubwc_size)
 685                 return DRM_FORMAT_MOD_QCOM_COMPRESSED;
 686
 687         /* TODO invent a modifier for tiled but not UBWC buffers: */
 688         return DRM_FORMAT_MOD_INVALID;
 689 }
 690
 691 static boolean
 692 fd_resource_get_handle(struct pipe_screen *pscreen,
 693                 struct pipe_context *pctx,
 694                 struct pipe_resource *prsc,
 695                 struct winsys_handle *handle,
 696                 unsigned usage)
 697 {
 698         struct fd_resource *rsc = fd_resource(prsc);
 699
 700         handle->modifier = fd_resource_modifier(rsc);
 701
 702         return fd_screen_bo_get_handle(pscreen, rsc->bo, rsc->scanout,
 703                         rsc->slices[0].pitch * rsc->cpp, handle);
 704 }
 705
 706 static uint32_t
 707 setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format)
 708 {
 709         struct pipe_resource *prsc = &rsc->base;
 710         struct fd_screen *screen = fd_screen(prsc->screen);
 711         enum util_format_layout layout = util_format_description(format)->layout;
 712         uint32_t pitchalign = screen->gmem_alignw;
 713         uint32_t level, size = 0;
 714         uint32_t width = prsc->width0;
 715         uint32_t height = prsc->height0;
 716         uint32_t depth = prsc->depth0;
 717         /* in layer_first layout, the level (slice) contains just one
 718          * layer (since in fact the layer contains the slices)
 719          */
 720         uint32_t layers_in_level = rsc->layer_first ? 1 : prsc->array_size;
 721
 722         for (level = 0; level <= prsc->last_level; level++) {
 723                 struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
 724                 uint32_t blocks;
 725
 726                 if (layout == UTIL_FORMAT_LAYOUT_ASTC)
 727                         slice->pitch = width =
 728                                 util_align_npot(width, pitchalign * util_format_get_blockwidth(format));
 729                 else
 730                         slice->pitch = width = align(width, pitchalign);
 731                 slice->offset = size;
 732                 blocks = util_format_get_nblocks(format, width, height);
 733                 /* 1d array and 2d array textures must all have the same layer size
 734                  * for each miplevel on a3xx. 3d textures can have different layer
 735                  * sizes for high levels, but the hw auto-sizer is buggy (or at least
 736                  * different than what this code does), so as soon as the layer size
 737                  * range gets into range, we stop reducing it.
 738                  */
 739                 if (prsc->target == PIPE_TEXTURE_3D && (
 740                                         level == 1 ||
 741                                         (level > 1 && rsc->slices[level - 1].size0 > 0xf000)))
 742                         slice->size0 = align(blocks * rsc->cpp, alignment);
 743                 else if (level == 0 || rsc->layer_first || alignment == 1)
 744                         slice->size0 = align(blocks * rsc->cpp, alignment);
 745                 else
 746                         slice->size0 = rsc->slices[level - 1].size0;
 747
 748                 size += slice->size0 * depth * layers_in_level;
 749
 750                 width = u_minify(width, 1);
 751                 height = u_minify(height, 1);
 752                 depth = u_minify(depth, 1);
 753         }
 754
 755         return size;
 756 }
 757
 758 static uint32_t
 759 slice_alignment(enum pipe_texture_target target)
 760 {
 761         /* on a3xx, 2d array and 3d textures seem to want their
 762          * layers aligned to page boundaries:
 763          */
 764         switch (target) {
 765         case PIPE_TEXTURE_3D:
 766         case PIPE_TEXTURE_1D_ARRAY:
 767         case PIPE_TEXTURE_2D_ARRAY:
 768                 return 4096;
 769         default:
 770                 return 1;
 771         }
 772 }
 773
 774 /* cross generation texture layout to plug in to screen->setup_slices()..
 775  * replace with generation specific one as-needed.
 776  *
 777  * TODO for a4xx probably can extract out the a4xx specific logic int
 778  * a small fd4_setup_slices() wrapper that sets up layer_first, and then
 779  * calls this.
 780  */
 781 uint32_t
 782 fd_setup_slices(struct fd_resource *rsc)
 783 {
 784         uint32_t alignment;
 785
 786         alignment = slice_alignment(rsc->base.target);
 787
 788         struct fd_screen *screen = fd_screen(rsc->base.screen);
 789         if (is_a4xx(screen)) {
 790                 switch (rsc->base.target) {
 791                 case PIPE_TEXTURE_3D:
 792                         rsc->layer_first = false;
 793                         break;
 794                 default:
 795                         rsc->layer_first = true;
 796                         alignment = 1;
 797                         break;
 798                 }
 799         }
 800
 801         return setup_slices(rsc, alignment, rsc->base.format);
 802 }
 803
 804 /* special case to resize query buf after allocated.. */
 805 void
 806 fd_resource_resize(struct pipe_resource *prsc, uint32_t sz)
 807 {
 808         struct fd_resource *rsc = fd_resource(prsc);
 809
 810         debug_assert(prsc->width0 == 0);
 811         debug_assert(prsc->target == PIPE_BUFFER);
 812         debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
 813
 814         prsc->width0 = sz;
 815         realloc_bo(rsc, fd_screen(prsc->screen)->setup_slices(rsc));
 816 }
 817
 818 // TODO common helper?
 819 static bool
 820 has_depth(enum pipe_format format)
 821 {
 822         switch (format) {
 823         case PIPE_FORMAT_Z16_UNORM:
 824         case PIPE_FORMAT_Z32_UNORM:
 825         case PIPE_FORMAT_Z32_FLOAT:
 826         case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
 827         case PIPE_FORMAT_Z24_UNORM_S8_UINT:
 828         case PIPE_FORMAT_S8_UINT_Z24_UNORM:
 829         case PIPE_FORMAT_Z24X8_UNORM:
 830         case PIPE_FORMAT_X8Z24_UNORM:
 831                 return true;
 832         default:
 833                 return false;
 834         }
 835 }
 836
 837 /**
 838  * Create a new texture object, using the given template info.
 839  */
 840 static struct pipe_resource *
 841 fd_resource_create_with_modifiers(struct pipe_screen *pscreen,
 842                 const struct pipe_resource *tmpl,
 843                 const uint64_t *modifiers, int count)
 844 {
 845         struct fd_screen *screen = fd_screen(pscreen);
 846         struct fd_resource *rsc;
 847         struct pipe_resource *prsc;
 848         enum pipe_format format = tmpl->format;
 849         uint32_t size;
 850
 851         /* when using kmsro, scanout buffers are allocated on the display device
 852          * create_with_modifiers() doesn't give us usage flags, so we have to
 853          * assume that all calls with modifiers are scanout-possible
 854          */
 855         if (screen->ro &&
 856                 ((tmpl->bind & PIPE_BIND_SCANOUT) ||
 857                  !(count == 1 && modifiers[0] == DRM_FORMAT_MOD_INVALID))) {
 858                 struct pipe_resource scanout_templat = *tmpl;
 859                 struct renderonly_scanout *scanout;
 860                 struct winsys_handle handle;
 861
 862                 scanout = renderonly_scanout_for_resource(&scanout_templat,
 863                                                                                                   screen->ro, &handle);
 864                 if (!scanout)
 865                         return NULL;
 866
 867                 renderonly_scanout_destroy(scanout, screen->ro);
 868
 869                 assert(handle.type == WINSYS_HANDLE_TYPE_FD);
 870                 rsc = fd_resource(pscreen->resource_from_handle(pscreen, tmpl,
 871                                                                                                                 &handle,
 872                                                                                                                 PIPE_HANDLE_USAGE_FRAMEBUFFER_WRITE));
 873                 close(handle.handle);
 874                 if (!rsc)
 875                         return NULL;
 876
 877                 return &rsc->base;
 878         }
 879
 880         rsc = CALLOC_STRUCT(fd_resource);
 881         prsc = &rsc->base;
 882
 883         DBG("%p: target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
 884                         "nr_samples=%u, usage=%u, bind=%x, flags=%x", prsc,
 885                         tmpl->target, util_format_name(format),
 886                         tmpl->width0, tmpl->height0, tmpl->depth0,
 887                         tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
 888                         tmpl->usage, tmpl->bind, tmpl->flags);
 889
 890         if (!rsc)
 891                 return NULL;
 892
 893         *prsc = *tmpl;
 894
 895 #define LINEAR \
 896         (PIPE_BIND_SCANOUT | \
 897          PIPE_BIND_LINEAR  | \
 898          PIPE_BIND_DISPLAY_TARGET)
 899
 900         bool linear = drm_find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count);
 901         if (tmpl->bind & LINEAR)
 902                 linear = true;
 903
 904         /* Normally, for non-shared buffers, allow buffer compression if
 905          * not shared, otherwise only allow if QCOM_COMPRESSED modifier
 906          * is requested:
 907          *
 908          * TODO we should probably also limit tiled in a similar way,
 909          * except we don't have a format modifier for tiled.  (We probably
 910          * should.)
 911          */
 912         bool allow_ubwc = drm_find_modifier(DRM_FORMAT_MOD_INVALID, modifiers, count);
 913         if (tmpl->bind & PIPE_BIND_SHARED)
 914                 allow_ubwc = drm_find_modifier(DRM_FORMAT_MOD_QCOM_COMPRESSED, modifiers, count);
 915
 916         /* TODO turn on UBWC for all internal buffers
 917          *
 918          * There are still some regressions in deqp with UBWC enabled.  I
 919          * think it is mostly related to sampler/image views using a format
 920          * that doesn't support compression with a resource created with
 921          * a format that does.  We need to track the compression state of
 922          * a buffer and do an (in-place, hopefully?) resolve if it is re-
 923          * interpreted with a format that does not support compression.
 924          *
 925          * It is possible (likely?) that we can't do atomic ops on a
 926          * compressed buffer as well, so this would also require transition
 927          * to a compressed state.
 928          */
 929         allow_ubwc &= !!(fd_mesa_debug & FD_DBG_UBWC);
 930
 931         if (screen->tile_mode &&
 932                         (tmpl->target != PIPE_BUFFER) &&
 933                         !linear) {
 934                 rsc->tile_mode = screen->tile_mode(tmpl);
 935         }
 936
 937         pipe_reference_init(&prsc->reference, 1);
 938
 939         prsc->screen = pscreen;
 940
 941         util_range_init(&rsc->valid_buffer_range);
 942
 943         rsc->internal_format = format;
 944         rsc->cpp = util_format_get_blocksize(format);
 945         rsc->cpp *= fd_resource_nr_samples(prsc);
 946
 947         assert(rsc->cpp);
 948
 949         // XXX probably need some extra work if we hit rsc shadowing path w/ lrz..
 950         if ((is_a5xx(screen) || is_a6xx(screen)) &&
 951                  (fd_mesa_debug & FD_DBG_LRZ) && has_depth(format)) {
 952                 const uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
 953                                 DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */
 954                 unsigned lrz_pitch  = align(DIV_ROUND_UP(tmpl->width0, 8), 64);
 955                 unsigned lrz_height = DIV_ROUND_UP(tmpl->height0, 8);
 956
 957                 /* LRZ buffer is super-sampled: */
 958                 switch (prsc->nr_samples) {
 959                 case 4:
 960                         lrz_pitch *= 2;
 961                 case 2:
 962                         lrz_height *= 2;
 963                 }
 964
 965                 unsigned size = lrz_pitch * lrz_height * 2;
 966
 967                 size += 0x1000; /* for GRAS_LRZ_FAST_CLEAR_BUFFER */
 968
 969                 rsc->lrz_height = lrz_height;
 970                 rsc->lrz_width = lrz_pitch;
 971                 rsc->lrz_pitch = lrz_pitch;
 972                 rsc->lrz = fd_bo_new(screen->dev, size, flags, "lrz");
 973         }
 974
 975         size = screen->setup_slices(rsc);
 976
 977         if (allow_ubwc && screen->fill_ubwc_buffer_sizes && rsc->tile_mode)
 978                 size += screen->fill_ubwc_buffer_sizes(rsc);
 979
 980         /* special case for hw-query buffer, which we need to allocate before we
 981          * know the size:
 982          */
 983         if (size == 0) {
 984                 /* note, semi-intention == instead of & */
 985                 debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
 986                 return prsc;
 987         }
 988
 989         if (rsc->layer_first) {
 990                 rsc->layer_size = align(size, 4096);
 991                 size = rsc->layer_size * prsc->array_size;
 992         }
 993
 994         realloc_bo(rsc, size);
 995         if (!rsc->bo)
 996                 goto fail;
 997
 998         return prsc;
 999 fail:
1000         fd_resource_destroy(pscreen, prsc);
1001         return NULL;
1002 }
1003
1004 static struct pipe_resource *
1005 fd_resource_create(struct pipe_screen *pscreen,
1006                 const struct pipe_resource *tmpl)
1007 {
1008         const uint64_t mod = DRM_FORMAT_MOD_INVALID;
1009         return fd_resource_create_with_modifiers(pscreen, tmpl, &mod, 1);
1010 }
1011
1012 static bool
1013 is_supported_modifier(struct pipe_screen *pscreen, enum pipe_format pfmt,
1014                 uint64_t mod)
1015 {
1016         int count;
1017
1018         /* Get the count of supported modifiers: */
1019         pscreen->query_dmabuf_modifiers(pscreen, pfmt, 0, NULL, NULL, &count);
1020
1021         /* Get the supported modifiers: */
1022         uint64_t modifiers[count];
1023         pscreen->query_dmabuf_modifiers(pscreen, pfmt, count, modifiers, NULL, &count);
1024
1025         for (int i = 0; i < count; i++)
1026                 if (modifiers[i] == mod)
1027                         return true;
1028
1029         return false;
1030 }
1031
1032 /**
1033  * Create a texture from a winsys_handle. The handle is often created in
1034  * another process by first creating a pipe texture and then calling
1035  * resource_get_handle.
1036  */
1037 static struct pipe_resource *
1038 fd_resource_from_handle(struct pipe_screen *pscreen,
1039                 const struct pipe_resource *tmpl,
1040                 struct winsys_handle *handle, unsigned usage)
1041 {
1042         struct fd_screen *screen = fd_screen(pscreen);
1043         struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
1044         struct fd_resource_slice *slice = &rsc->slices[0];
1045         struct pipe_resource *prsc = &rsc->base;
1046         uint32_t pitchalign = fd_screen(pscreen)->gmem_alignw;
1047
1048         DBG("target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
1049                         "nr_samples=%u, usage=%u, bind=%x, flags=%x",
1050                         tmpl->target, util_format_name(tmpl->format),
1051                         tmpl->width0, tmpl->height0, tmpl->depth0,
1052                         tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
1053                         tmpl->usage, tmpl->bind, tmpl->flags);
1054
1055         if (!rsc)
1056                 return NULL;
1057
1058         *prsc = *tmpl;
1059
1060         pipe_reference_init(&prsc->reference, 1);
1061
1062         prsc->screen = pscreen;
1063
1064         util_range_init(&rsc->valid_buffer_range);
1065
1066         rsc->bo = fd_screen_bo_from_handle(pscreen, handle);
1067         if (!rsc->bo)
1068                 goto fail;
1069
1070         rsc->internal_format = tmpl->format;
1071         rsc->cpp = util_format_get_blocksize(tmpl->format);
1072         rsc->cpp *= fd_resource_nr_samples(prsc);
1073         slice->pitch = handle->stride / rsc->cpp;
1074         slice->offset = handle->offset;
1075         slice->size0 = handle->stride * prsc->height0;
1076
1077         if ((slice->pitch < align(prsc->width0, pitchalign)) ||
1078                         (slice->pitch & (pitchalign - 1)))
1079                 goto fail;
1080
1081         if (handle->modifier == DRM_FORMAT_MOD_QCOM_COMPRESSED) {
1082                 if (!is_supported_modifier(pscreen, tmpl->format,
1083                                 DRM_FORMAT_MOD_QCOM_COMPRESSED)) {
1084                         DBG("bad modifier: %"PRIx64, handle->modifier);
1085                         goto fail;
1086                 }
1087                 debug_assert(screen->fill_ubwc_buffer_sizes);
1088                 screen->fill_ubwc_buffer_sizes(rsc);
1089         } else if (handle->modifier &&
1090                         (handle->modifier != DRM_FORMAT_MOD_INVALID)) {
1091                 goto fail;
1092         }
1093
1094         assert(rsc->cpp);
1095
1096         if (screen->ro) {
1097                 rsc->scanout =
1098                         renderonly_create_gpu_import_for_resource(prsc, screen->ro, NULL);
1099                 /* failure is expected in some cases.. */
1100         }
1101
1102         rsc->valid = true;
1103
1104         return prsc;
1105
1106 fail:
1107         fd_resource_destroy(pscreen, prsc);
1108         return NULL;
1109 }
1110
1111 bool
1112 fd_render_condition_check(struct pipe_context *pctx)
1113 {
1114         struct fd_context *ctx = fd_context(pctx);
1115
1116         if (!ctx->cond_query)
1117                 return true;
1118
1119         union pipe_query_result res = { 0 };
1120         bool wait =
1121                 ctx->cond_mode != PIPE_RENDER_COND_NO_WAIT &&
1122                 ctx->cond_mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
1123
1124         if (pctx->get_query_result(pctx, ctx->cond_query, wait, &res))
1125                         return (bool)res.u64 != ctx->cond_cond;
1126
1127         return true;
1128 }
1129
1130 static void
1131 fd_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
1132 {
1133         struct fd_context *ctx = fd_context(pctx);
1134         struct fd_resource *rsc = fd_resource(prsc);
1135
1136         /*
1137          * TODO I guess we could track that the resource is invalidated and
1138          * use that as a hint to realloc rather than stall in _transfer_map(),
1139          * even in the non-DISCARD_WHOLE_RESOURCE case?
1140          *
1141          * Note: we set dirty bits to trigger invalidate logic fd_draw_vbo
1142          */
1143
1144         if (rsc->write_batch) {
1145                 struct fd_batch *batch = rsc->write_batch;
1146                 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1147
1148                 if (pfb->zsbuf && pfb->zsbuf->texture == prsc) {
1149                         batch->resolve &= ~(FD_BUFFER_DEPTH | FD_BUFFER_STENCIL);
1150                         ctx->dirty |= FD_DIRTY_ZSA;
1151                 }
1152
1153                 for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
1154                         if (pfb->cbufs[i] && pfb->cbufs[i]->texture == prsc) {
1155                                 batch->resolve &= ~(PIPE_CLEAR_COLOR0 << i);
1156                                 ctx->dirty |= FD_DIRTY_FRAMEBUFFER;
1157                         }
1158                 }
1159         }
1160
1161         rsc->valid = false;
1162 }
1163
1164 static enum pipe_format
1165 fd_resource_get_internal_format(struct pipe_resource *prsc)
1166 {
1167         return fd_resource(prsc)->internal_format;
1168 }
1169
1170 static void
1171 fd_resource_set_stencil(struct pipe_resource *prsc,
1172                 struct pipe_resource *stencil)
1173 {
1174         fd_resource(prsc)->stencil = fd_resource(stencil);
1175 }
1176
1177 static struct pipe_resource *
1178 fd_resource_get_stencil(struct pipe_resource *prsc)
1179 {
1180         struct fd_resource *rsc = fd_resource(prsc);
1181         if (rsc->stencil)
1182                 return &rsc->stencil->base;
1183         return NULL;
1184 }
1185
1186 static const struct u_transfer_vtbl transfer_vtbl = {
1187                 .resource_create          = fd_resource_create,
1188                 .resource_destroy         = fd_resource_destroy,
1189                 .transfer_map             = fd_resource_transfer_map,
1190                 .transfer_flush_region    = fd_resource_transfer_flush_region,
1191                 .transfer_unmap           = fd_resource_transfer_unmap,
1192                 .get_internal_format      = fd_resource_get_internal_format,
1193                 .set_stencil              = fd_resource_set_stencil,
1194                 .get_stencil              = fd_resource_get_stencil,
1195 };
1196
1197 void
1198 fd_resource_screen_init(struct pipe_screen *pscreen)
1199 {
1200         struct fd_screen *screen = fd_screen(pscreen);
1201         bool fake_rgtc = screen->gpu_id < 400;
1202
1203         pscreen->resource_create = u_transfer_helper_resource_create;
1204         /* NOTE: u_transfer_helper does not yet support the _with_modifiers()
1205          * variant:
1206          */
1207         pscreen->resource_create_with_modifiers = fd_resource_create_with_modifiers;
1208         pscreen->resource_from_handle = fd_resource_from_handle;
1209         pscreen->resource_get_handle = fd_resource_get_handle;
1210         pscreen->resource_destroy = u_transfer_helper_resource_destroy;
1211
1212         pscreen->transfer_helper = u_transfer_helper_create(&transfer_vtbl,
1213                         true, false, fake_rgtc, true);
1214
1215         if (!screen->setup_slices)
1216                 screen->setup_slices = fd_setup_slices;
1217 }
1218
1219 static void
1220 fd_get_sample_position(struct pipe_context *context,
1221                          unsigned sample_count, unsigned sample_index,
1222                          float *pos_out)
1223 {
1224         /* The following is copied from nouveau/nv50 except for position
1225          * values, which are taken from blob driver */
1226         static const uint8_t pos1[1][2] = { { 0x8, 0x8 } };
1227         static const uint8_t pos2[2][2] = {
1228                 { 0xc, 0xc }, { 0x4, 0x4 } };
1229         static const uint8_t pos4[4][2] = {
1230                 { 0x6, 0x2 }, { 0xe, 0x6 },
1231                 { 0x2, 0xa }, { 0xa, 0xe } };
1232         /* TODO needs to be verified on supported hw */
1233         static const uint8_t pos8[8][2] = {
1234                 { 0x9, 0x5 }, { 0x7, 0xb },
1235                 { 0xd, 0x9 }, { 0x5, 0x3 },
1236                 { 0x3, 0xd }, { 0x1, 0x7 },
1237                 { 0xb, 0xf }, { 0xf, 0x1 } };
1238
1239         const uint8_t (*ptr)[2];
1240
1241         switch (sample_count) {
1242         case 1:
1243                 ptr = pos1;
1244                 break;
1245         case 2:
1246                 ptr = pos2;
1247                 break;
1248         case 4:
1249                 ptr = pos4;
1250                 break;
1251         case 8:
1252                 ptr = pos8;
1253                 break;
1254         default:
1255                 assert(0);
1256                 return;
1257         }
1258
1259         pos_out[0] = ptr[sample_index][0] / 16.0f;
1260         pos_out[1] = ptr[sample_index][1] / 16.0f;
1261 }
1262
1263 static void
1264 fd_blit_pipe(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
1265 {
1266         /* wrap fd_blit to return void */
1267         fd_blit(pctx, blit_info);
1268 }
1269
1270 void
1271 fd_resource_context_init(struct pipe_context *pctx)
1272 {
1273         pctx->transfer_map = u_transfer_helper_transfer_map;
1274         pctx->transfer_flush_region = u_transfer_helper_transfer_flush_region;
1275         pctx->transfer_unmap = u_transfer_helper_transfer_unmap;
1276         pctx->buffer_subdata = u_default_buffer_subdata;
1277         pctx->texture_subdata = u_default_texture_subdata;
1278         pctx->create_surface = fd_create_surface;
1279         pctx->surface_destroy = fd_surface_destroy;
1280         pctx->resource_copy_region = fd_resource_copy_region;
1281         pctx->blit = fd_blit_pipe;
1282         pctx->flush_resource = fd_flush_resource;
1283         pctx->invalidate_resource = fd_invalidate_resource;
1284         pctx->get_sample_position = fd_get_sample_position;
1285 }