src/gallium/drivers/ilo/ilo_transfer.c

   1 /*
   2  * Mesa 3-D graphics library
   3  *
   4  * Copyright (C) 2012-2013 LunarG, Inc.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included
  14  * in all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22  * DEALINGS IN THE SOFTWARE.
  23  *
  24  * Authors:
  25  *    Chia-I Wu <olv@lunarg.com>
  26  */
  27
  28 #include "util/u_surface.h"
  29 #include "util/u_transfer.h"
  30 #include "util/u_format_etc.h"
  31
  32 #include "ilo_blit.h"
  33 #include "ilo_blitter.h"
  34 #include "ilo_cp.h"
  35 #include "ilo_context.h"
  36 #include "ilo_resource.h"
  37 #include "ilo_state.h"
  38 #include "ilo_transfer.h"
  39
  40 /*
  41  * For buffers that are not busy, we want to map/unmap them directly.  For
  42  * those that are busy, we have to worry about synchronization.  We could wait
  43  * for GPU to finish, but there are cases where we could avoid waiting.
  44  *
  45  *  - When PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE is set, the contents of the
  46  *    buffer can be discarded.  We can replace the backing bo by a new one of
  47  *    the same size (renaming).
  48  *  - When PIPE_TRANSFER_DISCARD_RANGE is set, the contents of the mapped
  49  *    range can be discarded.  We can allocate and map a staging bo on
  50  *    mapping, and (pipelined-)copy it over to the real bo on unmapping.
  51  *  - When PIPE_TRANSFER_FLUSH_EXPLICIT is set, there is no reading and only
  52  *    flushed regions need to be written.  We can still allocate and map a
  53  *    staging bo, but should copy only the flushed regions over.
  54  *
  55  * However, there are other flags to consider.
  56  *
  57  *  - When PIPE_TRANSFER_UNSYNCHRONIZED is set, we do not need to worry about
  58  *    synchronization at all on mapping.
  59  *  - When PIPE_TRANSFER_MAP_DIRECTLY is set, no staging area is allowed.
  60  *  - When PIPE_TRANSFER_DONTBLOCK is set, we should fail if we have to block.
  61  *  - When PIPE_TRANSFER_PERSISTENT is set, GPU may access the buffer while it
  62  *    is mapped.  Synchronization is done by defining memory barriers,
  63  *    explicitly via memory_barrier() or implicitly via
  64  *    transfer_flush_region(), as well as GPU fences.
  65  *  - When PIPE_TRANSFER_COHERENT is set, updates by either CPU or GPU should
  66  *    be made visible to the other side immediately.  Since the kernel flushes
  67  *    GPU caches at the end of each batch buffer, CPU always sees GPU updates.
  68  *    We could use a coherent mapping to make all persistent mappings
  69  *    coherent.
  70  *
  71  * These also apply to textures, except that we may additionally need to do
  72  * format conversion or tiling/untiling.
  73  */
  74
  75 /**
  76  * Return a transfer method suitable for the usage.  The returned method will
  77  * correctly block when the resource is busy.
  78  */
  79 static bool
  80 resource_get_transfer_method(struct pipe_resource *res,
  81                              const struct pipe_transfer *transfer,
  82                              enum ilo_transfer_map_method *method)
  83 {
  84    const struct ilo_screen *is = ilo_screen(res->screen);
  85    const unsigned usage = transfer->usage;
  86    enum ilo_transfer_map_method m;
  87    bool tiled;
  88
  89    if (res->target == PIPE_BUFFER) {
  90       tiled = false;
  91    } else {
  92       struct ilo_texture *tex = ilo_texture(res);
  93       bool need_convert = false;
  94
  95       /* we may need to convert on the fly */
  96       if (tex->image.tiling == GEN8_TILING_W || tex->separate_s8) {
  97          /* on GEN6, separate stencil is enabled only when HiZ is */
  98          if (ilo_dev_gen(&is->dev) >= ILO_GEN(7) ||
  99              ilo_image_can_enable_aux(&tex->image, transfer->level)) {
 100             m = ILO_TRANSFER_MAP_SW_ZS;
 101             need_convert = true;
 102          }
 103       } else if (tex->image_format != tex->base.format) {
 104          m = ILO_TRANSFER_MAP_SW_CONVERT;
 105          need_convert = true;
 106       }
 107
 108       if (need_convert) {
 109          if (usage & (PIPE_TRANSFER_MAP_DIRECTLY | PIPE_TRANSFER_PERSISTENT))
 110             return false;
 111
 112          *method = m;
 113          return true;
 114       }
 115
 116       tiled = (tex->image.tiling != GEN6_TILING_NONE);
 117    }
 118
 119    if (tiled)
 120       m = ILO_TRANSFER_MAP_GTT; /* to have a linear view */
 121    else if (is->dev.has_llc)
 122       m = ILO_TRANSFER_MAP_CPU; /* fast and mostly coherent */
 123    else if (usage & PIPE_TRANSFER_PERSISTENT)
 124       m = ILO_TRANSFER_MAP_GTT; /* for coherency */
 125    else if (usage & PIPE_TRANSFER_READ)
 126       m = ILO_TRANSFER_MAP_CPU; /* gtt read is too slow */
 127    else
 128       m = ILO_TRANSFER_MAP_GTT;
 129
 130    *method = m;
 131
 132    return true;
 133 }
 134
 135 /**
 136  * Return true if usage allows the use of staging bo to avoid blocking.
 137  */
 138 static bool
 139 usage_allows_staging_bo(unsigned usage)
 140 {
 141    /* do we know how to write the data back to the resource? */
 142    const unsigned can_writeback = (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
 143                                    PIPE_TRANSFER_DISCARD_RANGE |
 144                                    PIPE_TRANSFER_FLUSH_EXPLICIT);
 145    const unsigned reasons_against = (PIPE_TRANSFER_READ |
 146                                      PIPE_TRANSFER_MAP_DIRECTLY |
 147                                      PIPE_TRANSFER_PERSISTENT);
 148
 149    return (usage & can_writeback) && !(usage & reasons_against);
 150 }
 151
 152 /**
 153  * Allocate the staging resource.  It is always linear and its size matches
 154  * the transfer box, with proper paddings.
 155  */
 156 static bool
 157 xfer_alloc_staging_res(struct ilo_transfer *xfer)
 158 {
 159    const struct pipe_resource *res = xfer->base.resource;
 160    const struct pipe_box *box = &xfer->base.box;
 161    struct pipe_resource templ;
 162
 163    memset(&templ, 0, sizeof(templ));
 164
 165    templ.format = res->format;
 166
 167    if (res->target == PIPE_BUFFER) {
 168       templ.target = PIPE_BUFFER;
 169       templ.width0 =
 170          (box->x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT) + box->width;
 171    }
 172    else {
 173       /* use 2D array for any texture target */
 174       templ.target = PIPE_TEXTURE_2D_ARRAY;
 175       templ.width0 = box->width;
 176    }
 177
 178    templ.height0 = box->height;
 179    templ.depth0 = 1;
 180    templ.array_size = box->depth;
 181    templ.nr_samples = 1;
 182    templ.usage = PIPE_USAGE_STAGING;
 183
 184    if (xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT) {
 185       templ.flags = PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
 186                     PIPE_RESOURCE_FLAG_MAP_COHERENT;
 187    }
 188
 189    xfer->staging.res = res->screen->resource_create(res->screen, &templ);
 190
 191    if (xfer->staging.res && xfer->staging.res->target != PIPE_BUFFER) {
 192       assert(ilo_texture(xfer->staging.res)->image.tiling ==
 193             GEN6_TILING_NONE);
 194    }
 195
 196    return (xfer->staging.res != NULL);
 197 }
 198
 199 /**
 200  * Use an alternative transfer method or rename the resource to unblock an
 201  * otherwise blocking transfer.
 202  */
 203 static bool
 204 xfer_unblock(struct ilo_transfer *xfer, bool *resource_renamed)
 205 {
 206    struct pipe_resource *res = xfer->base.resource;
 207    bool unblocked = false, renamed = false;
 208
 209    switch (xfer->method) {
 210    case ILO_TRANSFER_MAP_CPU:
 211    case ILO_TRANSFER_MAP_GTT:
 212       if (xfer->base.usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
 213          xfer->method = ILO_TRANSFER_MAP_GTT_ASYNC;
 214          unblocked = true;
 215       }
 216       else if ((xfer->base.usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) &&
 217                ilo_resource_rename_bo(res)) {
 218          renamed = true;
 219          unblocked = true;
 220       }
 221       else if (usage_allows_staging_bo(xfer->base.usage) &&
 222                xfer_alloc_staging_res(xfer)) {
 223          xfer->method = ILO_TRANSFER_MAP_STAGING;
 224          unblocked = true;
 225       }
 226       break;
 227    case ILO_TRANSFER_MAP_GTT_ASYNC:
 228    case ILO_TRANSFER_MAP_STAGING:
 229       unblocked = true;
 230       break;
 231    default:
 232       break;
 233    }
 234
 235    *resource_renamed = renamed;
 236
 237    return unblocked;
 238 }
 239
 240 /**
 241  * Allocate the staging system buffer based on the resource format and the
 242  * transfer box.
 243  */
 244 static bool
 245 xfer_alloc_staging_sys(struct ilo_transfer *xfer)
 246 {
 247    const enum pipe_format format = xfer->base.resource->format;
 248    const struct pipe_box *box = &xfer->base.box;
 249    const unsigned alignment = 64;
 250
 251    /* need to tell the world the layout */
 252    xfer->base.stride =
 253       align(util_format_get_stride(format, box->width), alignment);
 254    xfer->base.layer_stride =
 255       util_format_get_2d_size(format, xfer->base.stride, box->height);
 256
 257    xfer->staging.sys =
 258       align_malloc(xfer->base.layer_stride * box->depth, alignment);
 259
 260    return (xfer->staging.sys != NULL);
 261 }
 262
 263 /**
 264  * Map according to the method.  The staging system buffer should have been
 265  * allocated if the method requires it.
 266  */
 267 static void *
 268 xfer_map(struct ilo_transfer *xfer)
 269 {
 270    const struct ilo_vma *vma;
 271    void *ptr;
 272
 273    switch (xfer->method) {
 274    case ILO_TRANSFER_MAP_CPU:
 275       vma = ilo_resource_get_vma(xfer->base.resource);
 276       ptr = intel_bo_map(vma->bo, xfer->base.usage & PIPE_TRANSFER_WRITE);
 277       break;
 278    case ILO_TRANSFER_MAP_GTT:
 279       vma = ilo_resource_get_vma(xfer->base.resource);
 280       ptr = intel_bo_map_gtt(vma->bo);
 281       break;
 282    case ILO_TRANSFER_MAP_GTT_ASYNC:
 283       vma = ilo_resource_get_vma(xfer->base.resource);
 284       ptr = intel_bo_map_gtt_async(vma->bo);
 285       break;
 286    case ILO_TRANSFER_MAP_STAGING:
 287       {
 288          const struct ilo_screen *is = ilo_screen(xfer->staging.res->screen);
 289
 290          vma = ilo_resource_get_vma(xfer->staging.res);
 291
 292          /*
 293           * We want a writable, optionally persistent and coherent, mapping
 294           * for a linear bo.  We can call resource_get_transfer_method(), but
 295           * this turns out to be fairly simple.
 296           */
 297          if (is->dev.has_llc)
 298             ptr = intel_bo_map(vma->bo, true);
 299          else
 300             ptr = intel_bo_map_gtt(vma->bo);
 301
 302          if (ptr && xfer->staging.res->target == PIPE_BUFFER)
 303             ptr += (xfer->base.box.x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT);
 304       }
 305       break;
 306    case ILO_TRANSFER_MAP_SW_CONVERT:
 307    case ILO_TRANSFER_MAP_SW_ZS:
 308       vma = NULL;
 309       ptr = xfer->staging.sys;
 310       break;
 311    default:
 312       assert(!"unknown mapping method");
 313       vma = NULL;
 314       ptr = NULL;
 315       break;
 316    }
 317
 318    if (ptr && vma)
 319       ptr = (void *) ((char *) ptr + vma->bo_offset);
 320
 321    return ptr;
 322 }
 323
 324 /**
 325  * Unmap a transfer.
 326  */
 327 static void
 328 xfer_unmap(struct ilo_transfer *xfer)
 329 {
 330    switch (xfer->method) {
 331    case ILO_TRANSFER_MAP_CPU:
 332    case ILO_TRANSFER_MAP_GTT:
 333    case ILO_TRANSFER_MAP_GTT_ASYNC:
 334       intel_bo_unmap(ilo_resource_get_vma(xfer->base.resource)->bo);
 335       break;
 336    case ILO_TRANSFER_MAP_STAGING:
 337       intel_bo_unmap(ilo_resource_get_vma(xfer->staging.res)->bo);
 338       break;
 339    default:
 340       break;
 341    }
 342 }
 343
 344 static void
 345 tex_get_box_origin(const struct ilo_texture *tex,
 346                    unsigned level, unsigned slice,
 347                    const struct pipe_box *box,
 348                    unsigned *mem_x, unsigned *mem_y)
 349 {
 350    unsigned x, y;
 351
 352    ilo_image_get_slice_pos(&tex->image, level, box->z + slice, &x, &y);
 353    x += box->x;
 354    y += box->y;
 355
 356    ilo_image_pos_to_mem(&tex->image, x, y, mem_x, mem_y);
 357 }
 358
 359 static unsigned
 360 tex_get_box_offset(const struct ilo_texture *tex, unsigned level,
 361                    const struct pipe_box *box)
 362 {
 363    unsigned mem_x, mem_y;
 364
 365    tex_get_box_origin(tex, level, 0, box, &mem_x, &mem_y);
 366
 367    return ilo_image_mem_to_linear(&tex->image, mem_x, mem_y);
 368 }
 369
 370 static unsigned
 371 tex_get_slice_stride(const struct ilo_texture *tex, unsigned level)
 372 {
 373    return ilo_image_get_slice_stride(&tex->image, level);
 374 }
 375
 376 static unsigned
 377 tex_tile_x_swizzle(unsigned addr)
 378 {
 379    /*
 380     * From the Ivy Bridge PRM, volume 1 part 2, page 24:
 381     *
 382     *     "As shown in the tiling algorithm, the new address bit[6] should be:
 383     *
 384     *        Address bit[6] <= TiledAddr bit[6] XOR
 385     *                          TiledAddr bit[9] XOR
 386     *                          TiledAddr bit[10]"
 387     */
 388    return addr ^ (((addr >> 3) ^ (addr >> 4)) & 0x40);
 389 }
 390
 391 static unsigned
 392 tex_tile_y_swizzle(unsigned addr)
 393 {
 394    /*
 395     * From the Ivy Bridge PRM, volume 1 part 2, page 24:
 396     *
 397     *     "As shown in the tiling algorithm, The new address bit[6] becomes:
 398     *
 399     *        Address bit[6] <= TiledAddr bit[6] XOR
 400     *                          TiledAddr bit[9]"
 401     */
 402    return addr ^ ((addr >> 3) & 0x40);
 403 }
 404
 405 static unsigned
 406 tex_tile_x_offset(unsigned mem_x, unsigned mem_y,
 407                   unsigned tiles_per_row, bool swizzle)
 408 {
 409    /*
 410     * From the Sandy Bridge PRM, volume 1 part 2, page 21, we know that a
 411     * X-major tile has 8 rows and 32 OWord columns (512 bytes).  Tiles in the
 412     * tiled region are numbered in row-major order, starting from zero.  The
 413     * tile number can thus be calculated as follows:
 414     *
 415     *    tile = (mem_y / 8) * tiles_per_row + (mem_x / 512)
 416     *
 417     * OWords in that tile are also numbered in row-major order, starting from
 418     * zero.  The OWord number can thus be calculated as follows:
 419     *
 420     *    oword = (mem_y % 8) * 32 + ((mem_x % 512) / 16)
 421     *
 422     * and the tiled offset is
 423     *
 424     *    offset = tile * 4096 + oword * 16 + (mem_x % 16)
 425     *           = tile * 4096 + (mem_y % 8) * 512 + (mem_x % 512)
 426     */
 427    unsigned tile, offset;
 428
 429    tile = (mem_y >> 3) * tiles_per_row + (mem_x >> 9);
 430    offset = tile << 12 | (mem_y & 0x7) << 9 | (mem_x & 0x1ff);
 431
 432    return (swizzle) ? tex_tile_x_swizzle(offset) : offset;
 433 }
 434
 435 static unsigned
 436 tex_tile_y_offset(unsigned mem_x, unsigned mem_y,
 437                   unsigned tiles_per_row, bool swizzle)
 438 {
 439    /*
 440     * From the Sandy Bridge PRM, volume 1 part 2, page 22, we know that a
 441     * Y-major tile has 32 rows and 8 OWord columns (128 bytes).  Tiles in the
 442     * tiled region are numbered in row-major order, starting from zero.  The
 443     * tile number can thus be calculated as follows:
 444     *
 445     *    tile = (mem_y / 32) * tiles_per_row + (mem_x / 128)
 446     *
 447     * OWords in that tile are numbered in column-major order, starting from
 448     * zero.  The OWord number can thus be calculated as follows:
 449     *
 450     *    oword = ((mem_x % 128) / 16) * 32 + (mem_y % 32)
 451     *
 452     * and the tiled offset is
 453     *
 454     *    offset = tile * 4096 + oword * 16 + (mem_x % 16)
 455     */
 456    unsigned tile, oword, offset;
 457
 458    tile = (mem_y >> 5) * tiles_per_row + (mem_x >> 7);
 459    oword = (mem_x & 0x70) << 1 | (mem_y & 0x1f);
 460    offset = tile << 12 | oword << 4 | (mem_x & 0xf);
 461
 462    return (swizzle) ? tex_tile_y_swizzle(offset) : offset;
 463 }
 464
 465 static unsigned
 466 tex_tile_w_offset(unsigned mem_x, unsigned mem_y,
 467                   unsigned tiles_per_row, bool swizzle)
 468 {
 469    /*
 470     * From the Sandy Bridge PRM, volume 1 part 2, page 23, we know that a
 471     * W-major tile has 8 8x8-block rows and 8 8x8-block columns.  Tiles in the
 472     * tiled region are numbered in row-major order, starting from zero.  The
 473     * tile number can thus be calculated as follows:
 474     *
 475     *    tile = (mem_y / 64) * tiles_per_row + (mem_x / 64)
 476     *
 477     * 8x8-blocks in that tile are numbered in column-major order, starting
 478     * from zero.  The 8x8-block number can thus be calculated as follows:
 479     *
 480     *    blk8 = ((mem_x % 64) / 8) * 8 + ((mem_y % 64) / 8)
 481     *
 482     * Each 8x8-block is divided into 4 4x4-blocks, in row-major order.  Each
 483     * 4x4-block is further divided into 4 2x2-blocks, also in row-major order.
 484     * We have
 485     *
 486     *    blk4 = (((mem_y % 64) / 4) & 1) * 2 + (((mem_x % 64) / 4) & 1)
 487     *    blk2 = (((mem_y % 64) / 2) & 1) * 2 + (((mem_x % 64) / 2) & 1)
 488     *    blk1 = (((mem_y % 64)    ) & 1) * 2 + (((mem_x % 64)    ) & 1)
 489     *
 490     * and the tiled offset is
 491     *
 492     *    offset = tile * 4096 + blk8 * 64 + blk4 * 16 + blk2 * 4 + blk1
 493     */
 494    unsigned tile, blk8, blk4, blk2, blk1, offset;
 495
 496    tile = (mem_y >> 6) * tiles_per_row + (mem_x >> 6);
 497    blk8 = ((mem_x >> 3) & 0x7) << 3 | ((mem_y >> 3) & 0x7);
 498    blk4 = ((mem_y >> 2) & 0x1) << 1 | ((mem_x >> 2) & 0x1);
 499    blk2 = ((mem_y >> 1) & 0x1) << 1 | ((mem_x >> 1) & 0x1);
 500    blk1 = ((mem_y     ) & 0x1) << 1 | ((mem_x     ) & 0x1);
 501    offset = tile << 12 | blk8 << 6 | blk4 << 4 | blk2 << 2 | blk1;
 502
 503    return (swizzle) ? tex_tile_y_swizzle(offset) : offset;
 504 }
 505
 506 static unsigned
 507 tex_tile_none_offset(unsigned mem_x, unsigned mem_y,
 508                      unsigned tiles_per_row, bool swizzle)
 509 {
 510    return mem_y * tiles_per_row + mem_x;
 511 }
 512
 513 typedef unsigned (*tex_tile_offset_func)(unsigned mem_x, unsigned mem_y,
 514                                          unsigned tiles_per_row,
 515                                          bool swizzle);
 516
 517 static tex_tile_offset_func
 518 tex_tile_choose_offset_func(const struct ilo_texture *tex,
 519                             unsigned *tiles_per_row)
 520 {
 521    switch (tex->image.tiling) {
 522    default:
 523       assert(!"unknown tiling");
 524       /* fall through */
 525    case GEN6_TILING_NONE:
 526       *tiles_per_row = tex->image.bo_stride;
 527       return tex_tile_none_offset;
 528    case GEN6_TILING_X:
 529       *tiles_per_row = tex->image.bo_stride / 512;
 530       return tex_tile_x_offset;
 531    case GEN6_TILING_Y:
 532       *tiles_per_row = tex->image.bo_stride / 128;
 533       return tex_tile_y_offset;
 534    case GEN8_TILING_W:
 535       *tiles_per_row = tex->image.bo_stride / 64;
 536       return tex_tile_w_offset;
 537    }
 538 }
 539
 540 static void *
 541 tex_staging_sys_map_bo(struct ilo_texture *tex,
 542                        bool for_read_back,
 543                        bool linear_view)
 544 {
 545    const struct ilo_screen *is = ilo_screen(tex->base.screen);
 546    const bool prefer_cpu = (is->dev.has_llc || for_read_back);
 547    void *ptr;
 548
 549    if (prefer_cpu && (tex->image.tiling == GEN6_TILING_NONE ||
 550                       !linear_view))
 551       ptr = intel_bo_map(tex->vma.bo, !for_read_back);
 552    else
 553       ptr = intel_bo_map_gtt(tex->vma.bo);
 554
 555    if (ptr)
 556       ptr = (void *) ((char *) ptr + tex->vma.bo_offset);
 557
 558    return ptr;
 559 }
 560
 561 static void
 562 tex_staging_sys_unmap_bo(struct ilo_texture *tex)
 563 {
 564    intel_bo_unmap(tex->vma.bo);
 565 }
 566
 567 static bool
 568 tex_staging_sys_zs_read(struct ilo_texture *tex,
 569                         const struct ilo_transfer *xfer)
 570 {
 571    const struct ilo_screen *is = ilo_screen(tex->base.screen);
 572    const bool swizzle = is->dev.has_address_swizzling;
 573    const struct pipe_box *box = &xfer->base.box;
 574    const uint8_t *src;
 575    tex_tile_offset_func tile_offset;
 576    unsigned tiles_per_row;
 577    int slice;
 578
 579    src = tex_staging_sys_map_bo(tex, true, false);
 580    if (!src)
 581       return false;
 582
 583    tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row);
 584
 585    assert(tex->image.block_width == 1 && tex->image.block_height == 1);
 586
 587    if (tex->separate_s8) {
 588       struct ilo_texture *s8_tex = tex->separate_s8;
 589       const uint8_t *s8_src;
 590       tex_tile_offset_func s8_tile_offset;
 591       unsigned s8_tiles_per_row;
 592       int dst_cpp, dst_s8_pos, src_cpp_used;
 593
 594       s8_src = tex_staging_sys_map_bo(s8_tex, true, false);
 595       if (!s8_src) {
 596          tex_staging_sys_unmap_bo(tex);
 597          return false;
 598       }
 599
 600       s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
 601
 602       if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
 603          assert(tex->image_format == PIPE_FORMAT_Z24X8_UNORM);
 604
 605          dst_cpp = 4;
 606          dst_s8_pos = 3;
 607          src_cpp_used = 3;
 608       }
 609       else {
 610          assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
 611          assert(tex->image_format == PIPE_FORMAT_Z32_FLOAT);
 612
 613          dst_cpp = 8;
 614          dst_s8_pos = 4;
 615          src_cpp_used = 4;
 616       }
 617
 618       for (slice = 0; slice < box->depth; slice++) {
 619          unsigned mem_x, mem_y, s8_mem_x, s8_mem_y;
 620          uint8_t *dst;
 621          int i, j;
 622
 623          tex_get_box_origin(tex, xfer->base.level, slice,
 624                             box, &mem_x, &mem_y);
 625          tex_get_box_origin(s8_tex, xfer->base.level, slice,
 626                             box, &s8_mem_x, &s8_mem_y);
 627
 628          dst = xfer->staging.sys + xfer->base.layer_stride * slice;
 629
 630          for (i = 0; i < box->height; i++) {
 631             unsigned x = mem_x, s8_x = s8_mem_x;
 632             uint8_t *d = dst;
 633
 634             for (j = 0; j < box->width; j++) {
 635                const unsigned offset =
 636                   tile_offset(x, mem_y, tiles_per_row, swizzle);
 637                const unsigned s8_offset =
 638                   s8_tile_offset(s8_x, s8_mem_y, s8_tiles_per_row, swizzle);
 639
 640                memcpy(d, src + offset, src_cpp_used);
 641                d[dst_s8_pos] = s8_src[s8_offset];
 642
 643                d += dst_cpp;
 644                x += tex->image.block_size;
 645                s8_x++;
 646             }
 647
 648             dst += xfer->base.stride;
 649             mem_y++;
 650             s8_mem_y++;
 651          }
 652       }
 653
 654       tex_staging_sys_unmap_bo(s8_tex);
 655    }
 656    else {
 657       assert(tex->image_format == PIPE_FORMAT_S8_UINT);
 658
 659       for (slice = 0; slice < box->depth; slice++) {
 660          unsigned mem_x, mem_y;
 661          uint8_t *dst;
 662          int i, j;
 663
 664          tex_get_box_origin(tex, xfer->base.level, slice,
 665                             box, &mem_x, &mem_y);
 666
 667          dst = xfer->staging.sys + xfer->base.layer_stride * slice;
 668
 669          for (i = 0; i < box->height; i++) {
 670             unsigned x = mem_x;
 671             uint8_t *d = dst;
 672
 673             for (j = 0; j < box->width; j++) {
 674                const unsigned offset =
 675                   tile_offset(x, mem_y, tiles_per_row, swizzle);
 676
 677                *d = src[offset];
 678
 679                d++;
 680                x++;
 681             }
 682
 683             dst += xfer->base.stride;
 684             mem_y++;
 685          }
 686       }
 687    }
 688
 689    tex_staging_sys_unmap_bo(tex);
 690
 691    return true;
 692 }
 693
 694 static bool
 695 tex_staging_sys_zs_write(struct ilo_texture *tex,
 696                          const struct ilo_transfer *xfer)
 697 {
 698    const struct ilo_screen *is = ilo_screen(tex->base.screen);
 699    const bool swizzle = is->dev.has_address_swizzling;
 700    const struct pipe_box *box = &xfer->base.box;
 701    uint8_t *dst;
 702    tex_tile_offset_func tile_offset;
 703    unsigned tiles_per_row;
 704    int slice;
 705
 706    dst = tex_staging_sys_map_bo(tex, false, false);
 707    if (!dst)
 708       return false;
 709
 710    tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row);
 711
 712    assert(tex->image.block_width == 1 && tex->image.block_height == 1);
 713
 714    if (tex->separate_s8) {
 715       struct ilo_texture *s8_tex = tex->separate_s8;
 716       uint8_t *s8_dst;
 717       tex_tile_offset_func s8_tile_offset;
 718       unsigned s8_tiles_per_row;
 719       int src_cpp, src_s8_pos, dst_cpp_used;
 720
 721       s8_dst = tex_staging_sys_map_bo(s8_tex, false, false);
 722       if (!s8_dst) {
 723          tex_staging_sys_unmap_bo(s8_tex);
 724          return false;
 725       }
 726
 727       s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
 728
 729       if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
 730          assert(tex->image_format == PIPE_FORMAT_Z24X8_UNORM);
 731
 732          src_cpp = 4;
 733          src_s8_pos = 3;
 734          dst_cpp_used = 3;
 735       }
 736       else {
 737          assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
 738          assert(tex->image_format == PIPE_FORMAT_Z32_FLOAT);
 739
 740          src_cpp = 8;
 741          src_s8_pos = 4;
 742          dst_cpp_used = 4;
 743       }
 744
 745       for (slice = 0; slice < box->depth; slice++) {
 746          unsigned mem_x, mem_y, s8_mem_x, s8_mem_y;
 747          const uint8_t *src;
 748          int i, j;
 749
 750          tex_get_box_origin(tex, xfer->base.level, slice,
 751                             box, &mem_x, &mem_y);
 752          tex_get_box_origin(s8_tex, xfer->base.level, slice,
 753                             box, &s8_mem_x, &s8_mem_y);
 754
 755          src = xfer->staging.sys + xfer->base.layer_stride * slice;
 756
 757          for (i = 0; i < box->height; i++) {
 758             unsigned x = mem_x, s8_x = s8_mem_x;
 759             const uint8_t *s = src;
 760
 761             for (j = 0; j < box->width; j++) {
 762                const unsigned offset =
 763                   tile_offset(x, mem_y, tiles_per_row, swizzle);
 764                const unsigned s8_offset =
 765                   s8_tile_offset(s8_x, s8_mem_y, s8_tiles_per_row, swizzle);
 766
 767                memcpy(dst + offset, s, dst_cpp_used);
 768                s8_dst[s8_offset] = s[src_s8_pos];
 769
 770                s += src_cpp;
 771                x += tex->image.block_size;
 772                s8_x++;
 773             }
 774
 775             src += xfer->base.stride;
 776             mem_y++;
 777             s8_mem_y++;
 778          }
 779       }
 780
 781       tex_staging_sys_unmap_bo(s8_tex);
 782    }
 783    else {
 784       assert(tex->image_format == PIPE_FORMAT_S8_UINT);
 785
 786       for (slice = 0; slice < box->depth; slice++) {
 787          unsigned mem_x, mem_y;
 788          const uint8_t *src;
 789          int i, j;
 790
 791          tex_get_box_origin(tex, xfer->base.level, slice,
 792                             box, &mem_x, &mem_y);
 793
 794          src = xfer->staging.sys + xfer->base.layer_stride * slice;
 795
 796          for (i = 0; i < box->height; i++) {
 797             unsigned x = mem_x;
 798             const uint8_t *s = src;
 799
 800             for (j = 0; j < box->width; j++) {
 801                const unsigned offset =
 802                   tile_offset(x, mem_y, tiles_per_row, swizzle);
 803
 804                dst[offset] = *s;
 805
 806                s++;
 807                x++;
 808             }
 809
 810             src += xfer->base.stride;
 811             mem_y++;
 812          }
 813       }
 814    }
 815
 816    tex_staging_sys_unmap_bo(tex);
 817
 818    return true;
 819 }
 820
 821 static bool
 822 tex_staging_sys_convert_write(struct ilo_texture *tex,
 823                               const struct ilo_transfer *xfer)
 824 {
 825    const struct pipe_box *box = &xfer->base.box;
 826    unsigned dst_slice_stride;
 827    void *dst;
 828    int slice;
 829
 830    dst = tex_staging_sys_map_bo(tex, false, true);
 831    if (!dst)
 832       return false;
 833
 834    dst += tex_get_box_offset(tex, xfer->base.level, box);
 835
 836    /* slice stride is not always available */
 837    if (box->depth > 1)
 838       dst_slice_stride = tex_get_slice_stride(tex, xfer->base.level);
 839    else
 840       dst_slice_stride = 0;
 841
 842    if (unlikely(tex->image_format == tex->base.format)) {
 843       util_copy_box(dst, tex->image_format, tex->image.bo_stride,
 844             dst_slice_stride, 0, 0, 0, box->width, box->height, box->depth,
 845             xfer->staging.sys, xfer->base.stride, xfer->base.layer_stride,
 846             0, 0, 0);
 847
 848       tex_staging_sys_unmap_bo(tex);
 849
 850       return true;
 851    }
 852
 853    switch (tex->base.format) {
 854    case PIPE_FORMAT_ETC1_RGB8:
 855       assert(tex->image_format == PIPE_FORMAT_R8G8B8X8_UNORM);
 856
 857       for (slice = 0; slice < box->depth; slice++) {
 858          const void *src =
 859             xfer->staging.sys + xfer->base.layer_stride * slice;
 860
 861          util_format_etc1_rgb8_unpack_rgba_8unorm(dst,
 862                tex->image.bo_stride, src, xfer->base.stride,
 863                box->width, box->height);
 864
 865          dst += dst_slice_stride;
 866       }
 867       break;
 868    default:
 869       assert(!"unable to convert the staging data");
 870       break;
 871    }
 872
 873    tex_staging_sys_unmap_bo(tex);
 874
 875    return true;
 876 }
 877
 878 static void
 879 tex_staging_sys_writeback(struct ilo_transfer *xfer)
 880 {
 881    struct ilo_texture *tex = ilo_texture(xfer->base.resource);
 882    bool success;
 883
 884    if (!(xfer->base.usage & PIPE_TRANSFER_WRITE))
 885       return;
 886
 887    switch (xfer->method) {
 888    case ILO_TRANSFER_MAP_SW_CONVERT:
 889       success = tex_staging_sys_convert_write(tex, xfer);
 890       break;
 891    case ILO_TRANSFER_MAP_SW_ZS:
 892       success = tex_staging_sys_zs_write(tex, xfer);
 893       break;
 894    default:
 895       assert(!"unknown mapping method");
 896       success = false;
 897       break;
 898    }
 899
 900    if (!success)
 901       ilo_err("failed to map resource for moving staging data\n");
 902 }
 903
 904 static bool
 905 tex_staging_sys_readback(struct ilo_transfer *xfer)
 906 {
 907    struct ilo_texture *tex = ilo_texture(xfer->base.resource);
 908    bool read_back = false, success;
 909
 910    /* see if we need to read the resource back */
 911    if (xfer->base.usage & PIPE_TRANSFER_READ) {
 912       read_back = true;
 913    }
 914    else if (xfer->base.usage & PIPE_TRANSFER_WRITE) {
 915       const unsigned discard_flags =
 916          (PIPE_TRANSFER_DISCARD_RANGE | PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE);
 917
 918       if (!(xfer->base.usage & discard_flags))
 919          read_back = true;
 920    }
 921
 922    if (!read_back)
 923       return true;
 924
 925    switch (xfer->method) {
 926    case ILO_TRANSFER_MAP_SW_CONVERT:
 927       assert(!"no on-the-fly format conversion for mapping");
 928       success = false;
 929       break;
 930    case ILO_TRANSFER_MAP_SW_ZS:
 931       success = tex_staging_sys_zs_read(tex, xfer);
 932       break;
 933    default:
 934       assert(!"unknown mapping method");
 935       success = false;
 936       break;
 937    }
 938
 939    return success;
 940 }
 941
 942 static void *
 943 tex_map(struct ilo_transfer *xfer)
 944 {
 945    void *ptr;
 946
 947    switch (xfer->method) {
 948    case ILO_TRANSFER_MAP_CPU:
 949    case ILO_TRANSFER_MAP_GTT:
 950    case ILO_TRANSFER_MAP_GTT_ASYNC:
 951       ptr = xfer_map(xfer);
 952       if (ptr) {
 953          const struct ilo_texture *tex = ilo_texture(xfer->base.resource);
 954
 955          ptr += tex_get_box_offset(tex, xfer->base.level, &xfer->base.box);
 956
 957          /* stride is for a block row, not a texel row */
 958          xfer->base.stride = tex->image.bo_stride;
 959          /* note that slice stride is not always available */
 960          xfer->base.layer_stride = (xfer->base.box.depth > 1) ?
 961             tex_get_slice_stride(tex, xfer->base.level) : 0;
 962       }
 963       break;
 964    case ILO_TRANSFER_MAP_STAGING:
 965       ptr = xfer_map(xfer);
 966       if (ptr) {
 967          const struct ilo_texture *staging = ilo_texture(xfer->staging.res);
 968          xfer->base.stride = staging->image.bo_stride;
 969          xfer->base.layer_stride = tex_get_slice_stride(staging, 0);
 970       }
 971       break;
 972    case ILO_TRANSFER_MAP_SW_CONVERT:
 973    case ILO_TRANSFER_MAP_SW_ZS:
 974       if (xfer_alloc_staging_sys(xfer) && tex_staging_sys_readback(xfer))
 975          ptr = xfer_map(xfer);
 976       else
 977          ptr = NULL;
 978       break;
 979    default:
 980       assert(!"unknown mapping method");
 981       ptr = NULL;
 982       break;
 983    }
 984
 985    return ptr;
 986 }
 987
 988 static void *
 989 buf_map(struct ilo_transfer *xfer)
 990 {
 991    void *ptr;
 992
 993    ptr = xfer_map(xfer);
 994    if (!ptr)
 995       return NULL;
 996
 997    if (xfer->method != ILO_TRANSFER_MAP_STAGING)
 998       ptr += xfer->base.box.x;
 999
1000    xfer->base.stride = 0;
1001    xfer->base.layer_stride = 0;
1002
1003    assert(xfer->base.level == 0);
1004    assert(xfer->base.box.y == 0);
1005    assert(xfer->base.box.z == 0);
1006    assert(xfer->base.box.height == 1);
1007    assert(xfer->base.box.depth == 1);
1008
1009    return ptr;
1010 }
1011
1012 static void
1013 copy_staging_resource(struct ilo_context *ilo,
1014                       struct ilo_transfer *xfer,
1015                       const struct pipe_box *box)
1016 {
1017    const unsigned pad_x = (xfer->staging.res->target == PIPE_BUFFER) ?
1018       xfer->base.box.x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT : 0;
1019    struct pipe_box modified_box;
1020
1021    assert(xfer->method == ILO_TRANSFER_MAP_STAGING && xfer->staging.res);
1022
1023    if (!box) {
1024       u_box_3d(pad_x, 0, 0, xfer->base.box.width, xfer->base.box.height,
1025             xfer->base.box.depth, &modified_box);
1026       box = &modified_box;
1027    }
1028    else if (pad_x) {
1029       modified_box = *box;
1030       modified_box.x += pad_x;
1031       box = &modified_box;
1032    }
1033
1034    ilo_blitter_blt_copy_resource(ilo->blitter,
1035          xfer->base.resource, xfer->base.level,
1036          xfer->base.box.x, xfer->base.box.y, xfer->base.box.z,
1037          xfer->staging.res, 0, box);
1038 }
1039
1040 static bool
1041 is_bo_busy(struct ilo_context *ilo, struct intel_bo *bo, bool *need_submit)
1042 {
1043    const bool referenced = ilo_builder_has_reloc(&ilo->cp->builder, bo);
1044
1045    if (need_submit)
1046       *need_submit = referenced;
1047
1048    if (referenced)
1049       return true;
1050
1051    return intel_bo_is_busy(bo);
1052 }
1053
1054 /**
1055  * Choose the best mapping method, depending on the transfer usage and whether
1056  * the bo is busy.
1057  */
1058 static bool
1059 choose_transfer_method(struct ilo_context *ilo, struct ilo_transfer *xfer)
1060 {
1061    struct pipe_resource *res = xfer->base.resource;
1062    bool need_submit;
1063
1064    if (!resource_get_transfer_method(res, &xfer->base, &xfer->method))
1065       return false;
1066
1067    /* see if we can avoid blocking */
1068    if (is_bo_busy(ilo, ilo_resource_get_vma(res)->bo, &need_submit)) {
1069       bool resource_renamed;
1070
1071       if (!xfer_unblock(xfer, &resource_renamed)) {
1072          if (xfer->base.usage & PIPE_TRANSFER_DONTBLOCK)
1073             return false;
1074
1075          /* submit to make bo really busy and map() correctly blocks */
1076          if (need_submit)
1077             ilo_cp_submit(ilo->cp, "syncing for transfers");
1078       }
1079
1080       if (resource_renamed)
1081          ilo_state_vector_resource_renamed(&ilo->state_vector, res);
1082    }
1083
1084    return true;
1085 }
1086
1087 static void
1088 buf_pwrite(struct ilo_context *ilo, struct pipe_resource *res,
1089            unsigned usage, int offset, int size, const void *data)
1090 {
1091    struct ilo_buffer_resource *buf = ilo_buffer_resource(res);
1092    bool need_submit;
1093
1094    /* see if we can avoid blocking */
1095    if (is_bo_busy(ilo, buf->vma.bo, &need_submit)) {
1096       bool unblocked = false;
1097
1098       if ((usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) &&
1099           ilo_resource_rename_bo(res)) {
1100          ilo_state_vector_resource_renamed(&ilo->state_vector, res);
1101          unblocked = true;
1102       }
1103       else {
1104          struct pipe_resource templ, *staging;
1105
1106          /*
1107           * allocate a staging buffer to hold the data and pipelined copy it
1108           * over
1109           */
1110          templ = *res;
1111          templ.width0 = size;
1112          templ.usage = PIPE_USAGE_STAGING;
1113          templ.bind = 0;
1114          staging = ilo->base.screen->resource_create(ilo->base.screen, &templ);
1115          if (staging) {
1116             const struct ilo_vma *staging_vma = ilo_resource_get_vma(staging);
1117             struct pipe_box staging_box;
1118
1119             /* offset by staging_vma->bo_offset for pwrite */
1120             intel_bo_pwrite(staging_vma->bo, staging_vma->bo_offset,
1121                   size, data);
1122
1123             u_box_1d(0, size, &staging_box);
1124             ilo_blitter_blt_copy_resource(ilo->blitter,
1125                   res, 0, offset, 0, 0,
1126                   staging, 0, &staging_box);
1127
1128             pipe_resource_reference(&staging, NULL);
1129
1130             return;
1131          }
1132       }
1133
1134       /* submit to make bo really busy and pwrite() correctly blocks */
1135       if (!unblocked && need_submit)
1136          ilo_cp_submit(ilo->cp, "syncing for pwrites");
1137    }
1138
1139    /* offset by buf->vma.bo_offset for pwrite */
1140    intel_bo_pwrite(buf->vma.bo, buf->vma.bo_offset + offset, size, data);
1141 }
1142
1143 static void
1144 ilo_transfer_flush_region(struct pipe_context *pipe,
1145                           struct pipe_transfer *transfer,
1146                           const struct pipe_box *box)
1147 {
1148    struct ilo_context *ilo = ilo_context(pipe);
1149    struct ilo_transfer *xfer = ilo_transfer(transfer);
1150
1151    /*
1152     * The staging resource is mapped persistently and coherently.  We can copy
1153     * without unmapping.
1154     */
1155    if (xfer->method == ILO_TRANSFER_MAP_STAGING &&
1156        (xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
1157       copy_staging_resource(ilo, xfer, box);
1158 }
1159
1160 static void
1161 ilo_transfer_unmap(struct pipe_context *pipe,
1162                    struct pipe_transfer *transfer)
1163 {
1164    struct ilo_context *ilo = ilo_context(pipe);
1165    struct ilo_transfer *xfer = ilo_transfer(transfer);
1166
1167    xfer_unmap(xfer);
1168
1169    switch (xfer->method) {
1170    case ILO_TRANSFER_MAP_STAGING:
1171       if (!(xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
1172          copy_staging_resource(ilo, xfer, NULL);
1173       pipe_resource_reference(&xfer->staging.res, NULL);
1174       break;
1175    case ILO_TRANSFER_MAP_SW_CONVERT:
1176    case ILO_TRANSFER_MAP_SW_ZS:
1177       tex_staging_sys_writeback(xfer);
1178       align_free(xfer->staging.sys);
1179       break;
1180    default:
1181       break;
1182    }
1183
1184    pipe_resource_reference(&xfer->base.resource, NULL);
1185
1186    slab_free_st(&ilo->transfer_mempool, xfer);
1187 }
1188
1189 static void *
1190 ilo_transfer_map(struct pipe_context *pipe,
1191                  struct pipe_resource *res,
1192                  unsigned level,
1193                  unsigned usage,
1194                  const struct pipe_box *box,
1195                  struct pipe_transfer **transfer)
1196 {
1197    struct ilo_context *ilo = ilo_context(pipe);
1198    struct ilo_transfer *xfer;
1199    void *ptr;
1200
1201    /* note that xfer is not zero'd */
1202    xfer = slab_alloc_st(&ilo->transfer_mempool);
1203    if (!xfer) {
1204       *transfer = NULL;
1205       return NULL;
1206    }
1207
1208    xfer->base.resource = NULL;
1209    pipe_resource_reference(&xfer->base.resource, res);
1210    xfer->base.level = level;
1211    xfer->base.usage = usage;
1212    xfer->base.box = *box;
1213
1214    ilo_blit_resolve_transfer(ilo, &xfer->base);
1215
1216    if (choose_transfer_method(ilo, xfer)) {
1217       if (res->target == PIPE_BUFFER)
1218          ptr = buf_map(xfer);
1219       else
1220          ptr = tex_map(xfer);
1221    }
1222    else {
1223       ptr = NULL;
1224    }
1225
1226    if (!ptr) {
1227       pipe_resource_reference(&xfer->base.resource, NULL);
1228       slab_free_st(&ilo->transfer_mempool, xfer);
1229       *transfer = NULL;
1230       return NULL;
1231    }
1232
1233    *transfer = &xfer->base;
1234
1235    return ptr;
1236 }
1237
1238 static void ilo_buffer_subdata(struct pipe_context *pipe,
1239                                struct pipe_resource *resource,
1240                                unsigned usage, unsigned offset,
1241                                unsigned size, const void *data)
1242 {
1243    if (usage & PIPE_TRANSFER_UNSYNCHRONIZED)
1244       u_default_buffer_subdata(pipe, resource, usage, offset, size, data);
1245    else
1246       buf_pwrite(ilo_context(pipe), resource, usage, offset, size, data);
1247 }
1248
1249 /**
1250  * Initialize transfer-related functions.
1251  */
1252 void
1253 ilo_init_transfer_functions(struct ilo_context *ilo)
1254 {
1255    ilo->base.transfer_map = ilo_transfer_map;
1256    ilo->base.transfer_flush_region = ilo_transfer_flush_region;
1257    ilo->base.transfer_unmap = ilo_transfer_unmap;
1258    ilo->base.buffer_subdata = ilo_buffer_subdata;
1259    ilo->base.texture_subdata = u_default_texture_subdata;
1260 }