src/gallium/drivers/ilo/ilo_transfer.c

   1 /*
   2  * Mesa 3-D graphics library
   3  *
   4  * Copyright (C) 2012-2013 LunarG, Inc.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included
  14  * in all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22  * DEALINGS IN THE SOFTWARE.
  23  *
  24  * Authors:
  25  *    Chia-I Wu <olv@lunarg.com>
  26  */
  27
  28 #include "util/u_surface.h"
  29 #include "util/u_transfer.h"
  30 #include "util/u_format_etc.h"
  31
  32 #include "ilo_blit.h"
  33 #include "ilo_blitter.h"
  34 #include "ilo_cp.h"
  35 #include "ilo_context.h"
  36 #include "ilo_resource.h"
  37 #include "ilo_state.h"
  38 #include "ilo_transfer.h"
  39
  40 /*
  41  * For buffers that are not busy, we want to map/unmap them directly.  For
  42  * those that are busy, we have to worry about synchronization.  We could wait
  43  * for GPU to finish, but there are cases where we could avoid waiting.
  44  *
  45  *  - When PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE is set, the contents of the
  46  *    buffer can be discarded.  We can replace the backing bo by a new one of
  47  *    the same size (renaming).
  48  *  - When PIPE_TRANSFER_DISCARD_RANGE is set, the contents of the mapped
  49  *    range can be discarded.  We can allocate and map a staging bo on
  50  *    mapping, and (pipelined-)copy it over to the real bo on unmapping.
  51  *  - When PIPE_TRANSFER_FLUSH_EXPLICIT is set, there is no reading and only
  52  *    flushed regions need to be written.  We can still allocate and map a
  53  *    staging bo, but should copy only the flushed regions over.
  54  *
  55  * However, there are other flags to consider.
  56  *
  57  *  - When PIPE_TRANSFER_UNSYNCHRONIZED is set, we do not need to worry about
  58  *    synchronization at all on mapping.
  59  *  - When PIPE_TRANSFER_MAP_DIRECTLY is set, no staging area is allowed.
  60  *  - When PIPE_TRANSFER_DONTBLOCK is set, we should fail if we have to block.
  61  *  - When PIPE_TRANSFER_PERSISTENT is set, GPU may access the buffer while it
  62  *    is mapped.  Synchronization is done by defining memory barriers,
  63  *    explicitly via memory_barrier() or implicitly via
  64  *    transfer_flush_region(), as well as GPU fences.
  65  *  - When PIPE_TRANSFER_COHERENT is set, updates by either CPU or GPU should
  66  *    be made visible to the other side immediately.  Since the kernel flushes
  67  *    GPU caches at the end of each batch buffer, CPU always sees GPU updates.
  68  *    We could use a coherent mapping to make all persistent mappings
  69  *    coherent.
  70  *
  71  * These also apply to textures, except that we may additionally need to do
  72  * format conversion or tiling/untiling.
  73  */
  74
  75 /**
  76  * Return a transfer method suitable for the usage.  The returned method will
  77  * correctly block when the resource is busy.
  78  */
  79 static bool
  80 resource_get_transfer_method(struct pipe_resource *res,
  81                              const struct pipe_transfer *transfer,
  82                              enum ilo_transfer_map_method *method)
  83 {
  84    const struct ilo_screen *is = ilo_screen(res->screen);
  85    const unsigned usage = transfer->usage;
  86    enum ilo_transfer_map_method m;
  87    bool tiled;
  88
  89    if (res->target == PIPE_BUFFER) {
  90       tiled = false;
  91    } else {
  92       struct ilo_texture *tex = ilo_texture(res);
  93       bool need_convert = false;
  94
  95       /* we may need to convert on the fly */
  96       if (tex->image.tiling == GEN8_TILING_W || tex->separate_s8) {
  97          /* on GEN6, separate stencil is enabled only when HiZ is */
  98          if (ilo_dev_gen(&is->dev) >= ILO_GEN(7) ||
  99              ilo_image_can_enable_aux(&tex->image, transfer->level)) {
 100             m = ILO_TRANSFER_MAP_SW_ZS;
 101             need_convert = true;
 102          }
 103       } else if (tex->image.format != tex->base.format) {
 104          m = ILO_TRANSFER_MAP_SW_CONVERT;
 105          need_convert = true;
 106       }
 107
 108       if (need_convert) {
 109          if (usage & (PIPE_TRANSFER_MAP_DIRECTLY | PIPE_TRANSFER_PERSISTENT))
 110             return false;
 111
 112          *method = m;
 113          return true;
 114       }
 115
 116       tiled = (tex->image.tiling != GEN6_TILING_NONE);
 117    }
 118
 119    if (tiled)
 120       m = ILO_TRANSFER_MAP_GTT; /* to have a linear view */
 121    else if (is->dev.has_llc)
 122       m = ILO_TRANSFER_MAP_CPU; /* fast and mostly coherent */
 123    else if (usage & PIPE_TRANSFER_PERSISTENT)
 124       m = ILO_TRANSFER_MAP_GTT; /* for coherency */
 125    else if (usage & PIPE_TRANSFER_READ)
 126       m = ILO_TRANSFER_MAP_CPU; /* gtt read is too slow */
 127    else
 128       m = ILO_TRANSFER_MAP_GTT;
 129
 130    *method = m;
 131
 132    return true;
 133 }
 134
 135 /**
 136  * Return true if usage allows the use of staging bo to avoid blocking.
 137  */
 138 static bool
 139 usage_allows_staging_bo(unsigned usage)
 140 {
 141    /* do we know how to write the data back to the resource? */
 142    const unsigned can_writeback = (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
 143                                    PIPE_TRANSFER_DISCARD_RANGE |
 144                                    PIPE_TRANSFER_FLUSH_EXPLICIT);
 145    const unsigned reasons_against = (PIPE_TRANSFER_READ |
 146                                      PIPE_TRANSFER_MAP_DIRECTLY |
 147                                      PIPE_TRANSFER_PERSISTENT);
 148
 149    return (usage & can_writeback) && !(usage & reasons_against);
 150 }
 151
 152 /**
 153  * Allocate the staging resource.  It is always linear and its size matches
 154  * the transfer box, with proper paddings.
 155  */
 156 static bool
 157 xfer_alloc_staging_res(struct ilo_transfer *xfer)
 158 {
 159    const struct pipe_resource *res = xfer->base.resource;
 160    const struct pipe_box *box = &xfer->base.box;
 161    struct pipe_resource templ;
 162
 163    memset(&templ, 0, sizeof(templ));
 164
 165    templ.format = res->format;
 166
 167    if (res->target == PIPE_BUFFER) {
 168       templ.target = PIPE_BUFFER;
 169       templ.width0 =
 170          (box->x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT) + box->width;
 171    }
 172    else {
 173       /* use 2D array for any texture target */
 174       templ.target = PIPE_TEXTURE_2D_ARRAY;
 175       templ.width0 = box->width;
 176    }
 177
 178    templ.height0 = box->height;
 179    templ.depth0 = 1;
 180    templ.array_size = box->depth;
 181    templ.nr_samples = 1;
 182    templ.usage = PIPE_USAGE_STAGING;
 183    templ.bind = PIPE_BIND_TRANSFER_WRITE;
 184
 185    if (xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT) {
 186       templ.flags = PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
 187                     PIPE_RESOURCE_FLAG_MAP_COHERENT;
 188    }
 189
 190    xfer->staging.res = res->screen->resource_create(res->screen, &templ);
 191
 192    if (xfer->staging.res && xfer->staging.res->target != PIPE_BUFFER) {
 193       assert(ilo_texture(xfer->staging.res)->image.tiling ==
 194             GEN6_TILING_NONE);
 195    }
 196
 197    return (xfer->staging.res != NULL);
 198 }
 199
 200 /**
 201  * Use an alternative transfer method or rename the resource to unblock an
 202  * otherwise blocking transfer.
 203  */
 204 static bool
 205 xfer_unblock(struct ilo_transfer *xfer, bool *resource_renamed)
 206 {
 207    struct pipe_resource *res = xfer->base.resource;
 208    bool unblocked = false, renamed = false;
 209
 210    switch (xfer->method) {
 211    case ILO_TRANSFER_MAP_CPU:
 212    case ILO_TRANSFER_MAP_GTT:
 213       if (xfer->base.usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
 214          xfer->method = ILO_TRANSFER_MAP_GTT_ASYNC;
 215          unblocked = true;
 216       }
 217       else if ((xfer->base.usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) &&
 218                ilo_resource_rename_bo(res)) {
 219          renamed = true;
 220          unblocked = true;
 221       }
 222       else if (usage_allows_staging_bo(xfer->base.usage) &&
 223                xfer_alloc_staging_res(xfer)) {
 224          xfer->method = ILO_TRANSFER_MAP_STAGING;
 225          unblocked = true;
 226       }
 227       break;
 228    case ILO_TRANSFER_MAP_GTT_ASYNC:
 229    case ILO_TRANSFER_MAP_STAGING:
 230       unblocked = true;
 231       break;
 232    default:
 233       break;
 234    }
 235
 236    *resource_renamed = renamed;
 237
 238    return unblocked;
 239 }
 240
 241 /**
 242  * Allocate the staging system buffer based on the resource format and the
 243  * transfer box.
 244  */
 245 static bool
 246 xfer_alloc_staging_sys(struct ilo_transfer *xfer)
 247 {
 248    const enum pipe_format format = xfer->base.resource->format;
 249    const struct pipe_box *box = &xfer->base.box;
 250    const unsigned alignment = 64;
 251
 252    /* need to tell the world the layout */
 253    xfer->base.stride =
 254       align(util_format_get_stride(format, box->width), alignment);
 255    xfer->base.layer_stride =
 256       util_format_get_2d_size(format, xfer->base.stride, box->height);
 257
 258    xfer->staging.sys =
 259       align_malloc(xfer->base.layer_stride * box->depth, alignment);
 260
 261    return (xfer->staging.sys != NULL);
 262 }
 263
 264 /**
 265  * Map according to the method.  The staging system buffer should have been
 266  * allocated if the method requires it.
 267  */
 268 static void *
 269 xfer_map(struct ilo_transfer *xfer)
 270 {
 271    void *ptr;
 272
 273    switch (xfer->method) {
 274    case ILO_TRANSFER_MAP_CPU:
 275       ptr = intel_bo_map(ilo_resource_get_bo(xfer->base.resource),
 276             xfer->base.usage & PIPE_TRANSFER_WRITE);
 277       break;
 278    case ILO_TRANSFER_MAP_GTT:
 279       ptr = intel_bo_map_gtt(ilo_resource_get_bo(xfer->base.resource));
 280       break;
 281    case ILO_TRANSFER_MAP_GTT_ASYNC:
 282       ptr = intel_bo_map_gtt_async(ilo_resource_get_bo(xfer->base.resource));
 283       break;
 284    case ILO_TRANSFER_MAP_STAGING:
 285       {
 286          const struct ilo_screen *is = ilo_screen(xfer->staging.res->screen);
 287          struct intel_bo *bo = ilo_resource_get_bo(xfer->staging.res);
 288
 289          /*
 290           * We want a writable, optionally persistent and coherent, mapping
 291           * for a linear bo.  We can call resource_get_transfer_method(), but
 292           * this turns out to be fairly simple.
 293           */
 294          if (is->dev.has_llc)
 295             ptr = intel_bo_map(bo, true);
 296          else
 297             ptr = intel_bo_map_gtt(bo);
 298
 299          if (ptr && xfer->staging.res->target == PIPE_BUFFER)
 300             ptr += (xfer->base.box.x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT);
 301
 302       }
 303       break;
 304    case ILO_TRANSFER_MAP_SW_CONVERT:
 305    case ILO_TRANSFER_MAP_SW_ZS:
 306       ptr = xfer->staging.sys;
 307       break;
 308    default:
 309       assert(!"unknown mapping method");
 310       ptr = NULL;
 311       break;
 312    }
 313
 314    return ptr;
 315 }
 316
 317 /**
 318  * Unmap a transfer.
 319  */
 320 static void
 321 xfer_unmap(struct ilo_transfer *xfer)
 322 {
 323    switch (xfer->method) {
 324    case ILO_TRANSFER_MAP_CPU:
 325    case ILO_TRANSFER_MAP_GTT:
 326    case ILO_TRANSFER_MAP_GTT_ASYNC:
 327       intel_bo_unmap(ilo_resource_get_bo(xfer->base.resource));
 328       break;
 329    case ILO_TRANSFER_MAP_STAGING:
 330       intel_bo_unmap(ilo_resource_get_bo(xfer->staging.res));
 331       break;
 332    default:
 333       break;
 334    }
 335 }
 336
 337 static void
 338 tex_get_box_origin(const struct ilo_texture *tex,
 339                    unsigned level, unsigned slice,
 340                    const struct pipe_box *box,
 341                    unsigned *mem_x, unsigned *mem_y)
 342 {
 343    unsigned x, y;
 344
 345    ilo_image_get_slice_pos(&tex->image, level, box->z + slice, &x, &y);
 346    x += box->x;
 347    y += box->y;
 348
 349    ilo_image_pos_to_mem(&tex->image, x, y, mem_x, mem_y);
 350 }
 351
 352 static unsigned
 353 tex_get_box_offset(const struct ilo_texture *tex, unsigned level,
 354                    const struct pipe_box *box)
 355 {
 356    unsigned mem_x, mem_y;
 357
 358    tex_get_box_origin(tex, level, 0, box, &mem_x, &mem_y);
 359
 360    return ilo_image_mem_to_linear(&tex->image, mem_x, mem_y);
 361 }
 362
 363 static unsigned
 364 tex_get_slice_stride(const struct ilo_texture *tex, unsigned level)
 365 {
 366    return ilo_image_get_slice_stride(&tex->image, level);
 367 }
 368
 369 static unsigned
 370 tex_tile_x_swizzle(unsigned addr)
 371 {
 372    /*
 373     * From the Ivy Bridge PRM, volume 1 part 2, page 24:
 374     *
 375     *     "As shown in the tiling algorithm, the new address bit[6] should be:
 376     *
 377     *        Address bit[6] <= TiledAddr bit[6] XOR
 378     *                          TiledAddr bit[9] XOR
 379     *                          TiledAddr bit[10]"
 380     */
 381    return addr ^ (((addr >> 3) ^ (addr >> 4)) & 0x40);
 382 }
 383
 384 static unsigned
 385 tex_tile_y_swizzle(unsigned addr)
 386 {
 387    /*
 388     * From the Ivy Bridge PRM, volume 1 part 2, page 24:
 389     *
 390     *     "As shown in the tiling algorithm, The new address bit[6] becomes:
 391     *
 392     *        Address bit[6] <= TiledAddr bit[6] XOR
 393     *                          TiledAddr bit[9]"
 394     */
 395    return addr ^ ((addr >> 3) & 0x40);
 396 }
 397
 398 static unsigned
 399 tex_tile_x_offset(unsigned mem_x, unsigned mem_y,
 400                   unsigned tiles_per_row, bool swizzle)
 401 {
 402    /*
 403     * From the Sandy Bridge PRM, volume 1 part 2, page 21, we know that a
 404     * X-major tile has 8 rows and 32 OWord columns (512 bytes).  Tiles in the
 405     * tiled region are numbered in row-major order, starting from zero.  The
 406     * tile number can thus be calculated as follows:
 407     *
 408     *    tile = (mem_y / 8) * tiles_per_row + (mem_x / 512)
 409     *
 410     * OWords in that tile are also numbered in row-major order, starting from
 411     * zero.  The OWord number can thus be calculated as follows:
 412     *
 413     *    oword = (mem_y % 8) * 32 + ((mem_x % 512) / 16)
 414     *
 415     * and the tiled offset is
 416     *
 417     *    offset = tile * 4096 + oword * 16 + (mem_x % 16)
 418     *           = tile * 4096 + (mem_y % 8) * 512 + (mem_x % 512)
 419     */
 420    unsigned tile, offset;
 421
 422    tile = (mem_y >> 3) * tiles_per_row + (mem_x >> 9);
 423    offset = tile << 12 | (mem_y & 0x7) << 9 | (mem_x & 0x1ff);
 424
 425    return (swizzle) ? tex_tile_x_swizzle(offset) : offset;
 426 }
 427
 428 static unsigned
 429 tex_tile_y_offset(unsigned mem_x, unsigned mem_y,
 430                   unsigned tiles_per_row, bool swizzle)
 431 {
 432    /*
 433     * From the Sandy Bridge PRM, volume 1 part 2, page 22, we know that a
 434     * Y-major tile has 32 rows and 8 OWord columns (128 bytes).  Tiles in the
 435     * tiled region are numbered in row-major order, starting from zero.  The
 436     * tile number can thus be calculated as follows:
 437     *
 438     *    tile = (mem_y / 32) * tiles_per_row + (mem_x / 128)
 439     *
 440     * OWords in that tile are numbered in column-major order, starting from
 441     * zero.  The OWord number can thus be calculated as follows:
 442     *
 443     *    oword = ((mem_x % 128) / 16) * 32 + (mem_y % 32)
 444     *
 445     * and the tiled offset is
 446     *
 447     *    offset = tile * 4096 + oword * 16 + (mem_x % 16)
 448     */
 449    unsigned tile, oword, offset;
 450
 451    tile = (mem_y >> 5) * tiles_per_row + (mem_x >> 7);
 452    oword = (mem_x & 0x70) << 1 | (mem_y & 0x1f);
 453    offset = tile << 12 | oword << 4 | (mem_x & 0xf);
 454
 455    return (swizzle) ? tex_tile_y_swizzle(offset) : offset;
 456 }
 457
 458 static unsigned
 459 tex_tile_w_offset(unsigned mem_x, unsigned mem_y,
 460                   unsigned tiles_per_row, bool swizzle)
 461 {
 462    /*
 463     * From the Sandy Bridge PRM, volume 1 part 2, page 23, we know that a
 464     * W-major tile has 8 8x8-block rows and 8 8x8-block columns.  Tiles in the
 465     * tiled region are numbered in row-major order, starting from zero.  The
 466     * tile number can thus be calculated as follows:
 467     *
 468     *    tile = (mem_y / 64) * tiles_per_row + (mem_x / 64)
 469     *
 470     * 8x8-blocks in that tile are numbered in column-major order, starting
 471     * from zero.  The 8x8-block number can thus be calculated as follows:
 472     *
 473     *    blk8 = ((mem_x % 64) / 8) * 8 + ((mem_y % 64) / 8)
 474     *
 475     * Each 8x8-block is divided into 4 4x4-blocks, in row-major order.  Each
 476     * 4x4-block is further divided into 4 2x2-blocks, also in row-major order.
 477     * We have
 478     *
 479     *    blk4 = (((mem_y % 64) / 4) & 1) * 2 + (((mem_x % 64) / 4) & 1)
 480     *    blk2 = (((mem_y % 64) / 2) & 1) * 2 + (((mem_x % 64) / 2) & 1)
 481     *    blk1 = (((mem_y % 64)    ) & 1) * 2 + (((mem_x % 64)    ) & 1)
 482     *
 483     * and the tiled offset is
 484     *
 485     *    offset = tile * 4096 + blk8 * 64 + blk4 * 16 + blk2 * 4 + blk1
 486     */
 487    unsigned tile, blk8, blk4, blk2, blk1, offset;
 488
 489    tile = (mem_y >> 6) * tiles_per_row + (mem_x >> 6);
 490    blk8 = ((mem_x >> 3) & 0x7) << 3 | ((mem_y >> 3) & 0x7);
 491    blk4 = ((mem_y >> 2) & 0x1) << 1 | ((mem_x >> 2) & 0x1);
 492    blk2 = ((mem_y >> 1) & 0x1) << 1 | ((mem_x >> 1) & 0x1);
 493    blk1 = ((mem_y     ) & 0x1) << 1 | ((mem_x     ) & 0x1);
 494    offset = tile << 12 | blk8 << 6 | blk4 << 4 | blk2 << 2 | blk1;
 495
 496    return (swizzle) ? tex_tile_y_swizzle(offset) : offset;
 497 }
 498
 499 static unsigned
 500 tex_tile_none_offset(unsigned mem_x, unsigned mem_y,
 501                      unsigned tiles_per_row, bool swizzle)
 502 {
 503    return mem_y * tiles_per_row + mem_x;
 504 }
 505
 506 typedef unsigned (*tex_tile_offset_func)(unsigned mem_x, unsigned mem_y,
 507                                          unsigned tiles_per_row,
 508                                          bool swizzle);
 509
 510 static tex_tile_offset_func
 511 tex_tile_choose_offset_func(const struct ilo_texture *tex,
 512                             unsigned *tiles_per_row)
 513 {
 514    switch (tex->image.tiling) {
 515    default:
 516       assert(!"unknown tiling");
 517       /* fall through */
 518    case GEN6_TILING_NONE:
 519       *tiles_per_row = tex->image.bo_stride;
 520       return tex_tile_none_offset;
 521    case GEN6_TILING_X:
 522       *tiles_per_row = tex->image.bo_stride / 512;
 523       return tex_tile_x_offset;
 524    case GEN6_TILING_Y:
 525       *tiles_per_row = tex->image.bo_stride / 128;
 526       return tex_tile_y_offset;
 527    case GEN8_TILING_W:
 528       *tiles_per_row = tex->image.bo_stride / 64;
 529       return tex_tile_w_offset;
 530    }
 531 }
 532
 533 static void *
 534 tex_staging_sys_map_bo(struct ilo_texture *tex,
 535                        bool for_read_back,
 536                        bool linear_view)
 537 {
 538    const struct ilo_screen *is = ilo_screen(tex->base.screen);
 539    const bool prefer_cpu = (is->dev.has_llc || for_read_back);
 540    void *ptr;
 541
 542    if (prefer_cpu && (tex->image.tiling == GEN6_TILING_NONE ||
 543                       !linear_view))
 544       ptr = intel_bo_map(tex->image.bo, !for_read_back);
 545    else
 546       ptr = intel_bo_map_gtt(tex->image.bo);
 547
 548    return ptr;
 549 }
 550
 551 static void
 552 tex_staging_sys_unmap_bo(struct ilo_texture *tex)
 553 {
 554    intel_bo_unmap(tex->image.bo);
 555 }
 556
 557 static bool
 558 tex_staging_sys_zs_read(struct ilo_texture *tex,
 559                         const struct ilo_transfer *xfer)
 560 {
 561    const struct ilo_screen *is = ilo_screen(tex->base.screen);
 562    const bool swizzle = is->dev.has_address_swizzling;
 563    const struct pipe_box *box = &xfer->base.box;
 564    const uint8_t *src;
 565    tex_tile_offset_func tile_offset;
 566    unsigned tiles_per_row;
 567    int slice;
 568
 569    src = tex_staging_sys_map_bo(tex, true, false);
 570    if (!src)
 571       return false;
 572
 573    tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row);
 574
 575    assert(tex->image.block_width == 1 && tex->image.block_height == 1);
 576
 577    if (tex->separate_s8) {
 578       struct ilo_texture *s8_tex = tex->separate_s8;
 579       const uint8_t *s8_src;
 580       tex_tile_offset_func s8_tile_offset;
 581       unsigned s8_tiles_per_row;
 582       int dst_cpp, dst_s8_pos, src_cpp_used;
 583
 584       s8_src = tex_staging_sys_map_bo(s8_tex, true, false);
 585       if (!s8_src) {
 586          tex_staging_sys_unmap_bo(tex);
 587          return false;
 588       }
 589
 590       s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
 591
 592       if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
 593          assert(tex->image.format == PIPE_FORMAT_Z24X8_UNORM);
 594
 595          dst_cpp = 4;
 596          dst_s8_pos = 3;
 597          src_cpp_used = 3;
 598       }
 599       else {
 600          assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
 601          assert(tex->image.format == PIPE_FORMAT_Z32_FLOAT);
 602
 603          dst_cpp = 8;
 604          dst_s8_pos = 4;
 605          src_cpp_used = 4;
 606       }
 607
 608       for (slice = 0; slice < box->depth; slice++) {
 609          unsigned mem_x, mem_y, s8_mem_x, s8_mem_y;
 610          uint8_t *dst;
 611          int i, j;
 612
 613          tex_get_box_origin(tex, xfer->base.level, slice,
 614                             box, &mem_x, &mem_y);
 615          tex_get_box_origin(s8_tex, xfer->base.level, slice,
 616                             box, &s8_mem_x, &s8_mem_y);
 617
 618          dst = xfer->staging.sys + xfer->base.layer_stride * slice;
 619
 620          for (i = 0; i < box->height; i++) {
 621             unsigned x = mem_x, s8_x = s8_mem_x;
 622             uint8_t *d = dst;
 623
 624             for (j = 0; j < box->width; j++) {
 625                const unsigned offset =
 626                   tile_offset(x, mem_y, tiles_per_row, swizzle);
 627                const unsigned s8_offset =
 628                   s8_tile_offset(s8_x, s8_mem_y, s8_tiles_per_row, swizzle);
 629
 630                memcpy(d, src + offset, src_cpp_used);
 631                d[dst_s8_pos] = s8_src[s8_offset];
 632
 633                d += dst_cpp;
 634                x += tex->image.block_size;
 635                s8_x++;
 636             }
 637
 638             dst += xfer->base.stride;
 639             mem_y++;
 640             s8_mem_y++;
 641          }
 642       }
 643
 644       tex_staging_sys_unmap_bo(s8_tex);
 645    }
 646    else {
 647       assert(tex->image.format == PIPE_FORMAT_S8_UINT);
 648
 649       for (slice = 0; slice < box->depth; slice++) {
 650          unsigned mem_x, mem_y;
 651          uint8_t *dst;
 652          int i, j;
 653
 654          tex_get_box_origin(tex, xfer->base.level, slice,
 655                             box, &mem_x, &mem_y);
 656
 657          dst = xfer->staging.sys + xfer->base.layer_stride * slice;
 658
 659          for (i = 0; i < box->height; i++) {
 660             unsigned x = mem_x;
 661             uint8_t *d = dst;
 662
 663             for (j = 0; j < box->width; j++) {
 664                const unsigned offset =
 665                   tile_offset(x, mem_y, tiles_per_row, swizzle);
 666
 667                *d = src[offset];
 668
 669                d++;
 670                x++;
 671             }
 672
 673             dst += xfer->base.stride;
 674             mem_y++;
 675          }
 676       }
 677    }
 678
 679    tex_staging_sys_unmap_bo(tex);
 680
 681    return true;
 682 }
 683
 684 static bool
 685 tex_staging_sys_zs_write(struct ilo_texture *tex,
 686                          const struct ilo_transfer *xfer)
 687 {
 688    const struct ilo_screen *is = ilo_screen(tex->base.screen);
 689    const bool swizzle = is->dev.has_address_swizzling;
 690    const struct pipe_box *box = &xfer->base.box;
 691    uint8_t *dst;
 692    tex_tile_offset_func tile_offset;
 693    unsigned tiles_per_row;
 694    int slice;
 695
 696    dst = tex_staging_sys_map_bo(tex, false, false);
 697    if (!dst)
 698       return false;
 699
 700    tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row);
 701
 702    assert(tex->image.block_width == 1 && tex->image.block_height == 1);
 703
 704    if (tex->separate_s8) {
 705       struct ilo_texture *s8_tex = tex->separate_s8;
 706       uint8_t *s8_dst;
 707       tex_tile_offset_func s8_tile_offset;
 708       unsigned s8_tiles_per_row;
 709       int src_cpp, src_s8_pos, dst_cpp_used;
 710
 711       s8_dst = tex_staging_sys_map_bo(s8_tex, false, false);
 712       if (!s8_dst) {
 713          tex_staging_sys_unmap_bo(s8_tex);
 714          return false;
 715       }
 716
 717       s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
 718
 719       if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
 720          assert(tex->image.format == PIPE_FORMAT_Z24X8_UNORM);
 721
 722          src_cpp = 4;
 723          src_s8_pos = 3;
 724          dst_cpp_used = 3;
 725       }
 726       else {
 727          assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
 728          assert(tex->image.format == PIPE_FORMAT_Z32_FLOAT);
 729
 730          src_cpp = 8;
 731          src_s8_pos = 4;
 732          dst_cpp_used = 4;
 733       }
 734
 735       for (slice = 0; slice < box->depth; slice++) {
 736          unsigned mem_x, mem_y, s8_mem_x, s8_mem_y;
 737          const uint8_t *src;
 738          int i, j;
 739
 740          tex_get_box_origin(tex, xfer->base.level, slice,
 741                             box, &mem_x, &mem_y);
 742          tex_get_box_origin(s8_tex, xfer->base.level, slice,
 743                             box, &s8_mem_x, &s8_mem_y);
 744
 745          src = xfer->staging.sys + xfer->base.layer_stride * slice;
 746
 747          for (i = 0; i < box->height; i++) {
 748             unsigned x = mem_x, s8_x = s8_mem_x;
 749             const uint8_t *s = src;
 750
 751             for (j = 0; j < box->width; j++) {
 752                const unsigned offset =
 753                   tile_offset(x, mem_y, tiles_per_row, swizzle);
 754                const unsigned s8_offset =
 755                   s8_tile_offset(s8_x, s8_mem_y, s8_tiles_per_row, swizzle);
 756
 757                memcpy(dst + offset, s, dst_cpp_used);
 758                s8_dst[s8_offset] = s[src_s8_pos];
 759
 760                s += src_cpp;
 761                x += tex->image.block_size;
 762                s8_x++;
 763             }
 764
 765             src += xfer->base.stride;
 766             mem_y++;
 767             s8_mem_y++;
 768          }
 769       }
 770
 771       tex_staging_sys_unmap_bo(s8_tex);
 772    }
 773    else {
 774       assert(tex->image.format == PIPE_FORMAT_S8_UINT);
 775
 776       for (slice = 0; slice < box->depth; slice++) {
 777          unsigned mem_x, mem_y;
 778          const uint8_t *src;
 779          int i, j;
 780
 781          tex_get_box_origin(tex, xfer->base.level, slice,
 782                             box, &mem_x, &mem_y);
 783
 784          src = xfer->staging.sys + xfer->base.layer_stride * slice;
 785
 786          for (i = 0; i < box->height; i++) {
 787             unsigned x = mem_x;
 788             const uint8_t *s = src;
 789
 790             for (j = 0; j < box->width; j++) {
 791                const unsigned offset =
 792                   tile_offset(x, mem_y, tiles_per_row, swizzle);
 793
 794                dst[offset] = *s;
 795
 796                s++;
 797                x++;
 798             }
 799
 800             src += xfer->base.stride;
 801             mem_y++;
 802          }
 803       }
 804    }
 805
 806    tex_staging_sys_unmap_bo(tex);
 807
 808    return true;
 809 }
 810
 811 static bool
 812 tex_staging_sys_convert_write(struct ilo_texture *tex,
 813                               const struct ilo_transfer *xfer)
 814 {
 815    const struct pipe_box *box = &xfer->base.box;
 816    unsigned dst_slice_stride;
 817    void *dst;
 818    int slice;
 819
 820    dst = tex_staging_sys_map_bo(tex, false, true);
 821    if (!dst)
 822       return false;
 823
 824    dst += tex_get_box_offset(tex, xfer->base.level, box);
 825
 826    /* slice stride is not always available */
 827    if (box->depth > 1)
 828       dst_slice_stride = tex_get_slice_stride(tex, xfer->base.level);
 829    else
 830       dst_slice_stride = 0;
 831
 832    if (unlikely(tex->image.format == tex->base.format)) {
 833       util_copy_box(dst, tex->image.format, tex->image.bo_stride,
 834             dst_slice_stride, 0, 0, 0, box->width, box->height, box->depth,
 835             xfer->staging.sys, xfer->base.stride, xfer->base.layer_stride,
 836             0, 0, 0);
 837
 838       tex_staging_sys_unmap_bo(tex);
 839
 840       return true;
 841    }
 842
 843    switch (tex->base.format) {
 844    case PIPE_FORMAT_ETC1_RGB8:
 845       assert(tex->image.format == PIPE_FORMAT_R8G8B8X8_UNORM);
 846
 847       for (slice = 0; slice < box->depth; slice++) {
 848          const void *src =
 849             xfer->staging.sys + xfer->base.layer_stride * slice;
 850
 851          util_format_etc1_rgb8_unpack_rgba_8unorm(dst,
 852                tex->image.bo_stride, src, xfer->base.stride,
 853                box->width, box->height);
 854
 855          dst += dst_slice_stride;
 856       }
 857       break;
 858    default:
 859       assert(!"unable to convert the staging data");
 860       break;
 861    }
 862
 863    tex_staging_sys_unmap_bo(tex);
 864
 865    return true;
 866 }
 867
 868 static void
 869 tex_staging_sys_writeback(struct ilo_transfer *xfer)
 870 {
 871    struct ilo_texture *tex = ilo_texture(xfer->base.resource);
 872    bool success;
 873
 874    if (!(xfer->base.usage & PIPE_TRANSFER_WRITE))
 875       return;
 876
 877    switch (xfer->method) {
 878    case ILO_TRANSFER_MAP_SW_CONVERT:
 879       success = tex_staging_sys_convert_write(tex, xfer);
 880       break;
 881    case ILO_TRANSFER_MAP_SW_ZS:
 882       success = tex_staging_sys_zs_write(tex, xfer);
 883       break;
 884    default:
 885       assert(!"unknown mapping method");
 886       success = false;
 887       break;
 888    }
 889
 890    if (!success)
 891       ilo_err("failed to map resource for moving staging data\n");
 892 }
 893
 894 static bool
 895 tex_staging_sys_readback(struct ilo_transfer *xfer)
 896 {
 897    struct ilo_texture *tex = ilo_texture(xfer->base.resource);
 898    bool read_back = false, success;
 899
 900    /* see if we need to read the resource back */
 901    if (xfer->base.usage & PIPE_TRANSFER_READ) {
 902       read_back = true;
 903    }
 904    else if (xfer->base.usage & PIPE_TRANSFER_WRITE) {
 905       const unsigned discard_flags =
 906          (PIPE_TRANSFER_DISCARD_RANGE | PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE);
 907
 908       if (!(xfer->base.usage & discard_flags))
 909          read_back = true;
 910    }
 911
 912    if (!read_back)
 913       return true;
 914
 915    switch (xfer->method) {
 916    case ILO_TRANSFER_MAP_SW_CONVERT:
 917       assert(!"no on-the-fly format conversion for mapping");
 918       success = false;
 919       break;
 920    case ILO_TRANSFER_MAP_SW_ZS:
 921       success = tex_staging_sys_zs_read(tex, xfer);
 922       break;
 923    default:
 924       assert(!"unknown mapping method");
 925       success = false;
 926       break;
 927    }
 928
 929    return success;
 930 }
 931
 932 static void *
 933 tex_map(struct ilo_transfer *xfer)
 934 {
 935    void *ptr;
 936
 937    switch (xfer->method) {
 938    case ILO_TRANSFER_MAP_CPU:
 939    case ILO_TRANSFER_MAP_GTT:
 940    case ILO_TRANSFER_MAP_GTT_ASYNC:
 941       ptr = xfer_map(xfer);
 942       if (ptr) {
 943          const struct ilo_texture *tex = ilo_texture(xfer->base.resource);
 944
 945          ptr += tex_get_box_offset(tex, xfer->base.level, &xfer->base.box);
 946
 947          /* stride is for a block row, not a texel row */
 948          xfer->base.stride = tex->image.bo_stride;
 949          /* note that slice stride is not always available */
 950          xfer->base.layer_stride = (xfer->base.box.depth > 1) ?
 951             tex_get_slice_stride(tex, xfer->base.level) : 0;
 952       }
 953       break;
 954    case ILO_TRANSFER_MAP_STAGING:
 955       ptr = xfer_map(xfer);
 956       if (ptr) {
 957          const struct ilo_texture *staging = ilo_texture(xfer->staging.res);
 958          xfer->base.stride = staging->image.bo_stride;
 959          xfer->base.layer_stride = tex_get_slice_stride(staging, 0);
 960       }
 961       break;
 962    case ILO_TRANSFER_MAP_SW_CONVERT:
 963    case ILO_TRANSFER_MAP_SW_ZS:
 964       if (xfer_alloc_staging_sys(xfer) && tex_staging_sys_readback(xfer))
 965          ptr = xfer_map(xfer);
 966       else
 967          ptr = NULL;
 968       break;
 969    default:
 970       assert(!"unknown mapping method");
 971       ptr = NULL;
 972       break;
 973    }
 974
 975    return ptr;
 976 }
 977
 978 static void *
 979 buf_map(struct ilo_transfer *xfer)
 980 {
 981    void *ptr;
 982
 983    ptr = xfer_map(xfer);
 984    if (!ptr)
 985       return NULL;
 986
 987    if (xfer->method != ILO_TRANSFER_MAP_STAGING)
 988       ptr += xfer->base.box.x;
 989
 990    xfer->base.stride = 0;
 991    xfer->base.layer_stride = 0;
 992
 993    assert(xfer->base.level == 0);
 994    assert(xfer->base.box.y == 0);
 995    assert(xfer->base.box.z == 0);
 996    assert(xfer->base.box.height == 1);
 997    assert(xfer->base.box.depth == 1);
 998
 999    return ptr;
1000 }
1001
1002 static void
1003 copy_staging_resource(struct ilo_context *ilo,
1004                       struct ilo_transfer *xfer,
1005                       const struct pipe_box *box)
1006 {
1007    const unsigned pad_x = (xfer->staging.res->target == PIPE_BUFFER) ?
1008       xfer->base.box.x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT : 0;
1009    struct pipe_box modified_box;
1010
1011    assert(xfer->method == ILO_TRANSFER_MAP_STAGING && xfer->staging.res);
1012
1013    if (!box) {
1014       u_box_3d(pad_x, 0, 0, xfer->base.box.width, xfer->base.box.height,
1015             xfer->base.box.depth, &modified_box);
1016       box = &modified_box;
1017    }
1018    else if (pad_x) {
1019       modified_box = *box;
1020       modified_box.x += pad_x;
1021       box = &modified_box;
1022    }
1023
1024    ilo_blitter_blt_copy_resource(ilo->blitter,
1025          xfer->base.resource, xfer->base.level,
1026          xfer->base.box.x, xfer->base.box.y, xfer->base.box.z,
1027          xfer->staging.res, 0, box);
1028 }
1029
1030 static bool
1031 is_bo_busy(struct ilo_context *ilo, struct intel_bo *bo, bool *need_submit)
1032 {
1033    const bool referenced = ilo_builder_has_reloc(&ilo->cp->builder, bo);
1034
1035    if (need_submit)
1036       *need_submit = referenced;
1037
1038    if (referenced)
1039       return true;
1040
1041    return intel_bo_is_busy(bo);
1042 }
1043
1044 /**
1045  * Choose the best mapping method, depending on the transfer usage and whether
1046  * the bo is busy.
1047  */
1048 static bool
1049 choose_transfer_method(struct ilo_context *ilo, struct ilo_transfer *xfer)
1050 {
1051    struct pipe_resource *res = xfer->base.resource;
1052    bool need_submit;
1053
1054    if (!resource_get_transfer_method(res, &xfer->base, &xfer->method))
1055       return false;
1056
1057    /* see if we can avoid blocking */
1058    if (is_bo_busy(ilo, ilo_resource_get_bo(res), &need_submit)) {
1059       bool resource_renamed;
1060
1061       if (!xfer_unblock(xfer, &resource_renamed)) {
1062          if (xfer->base.usage & PIPE_TRANSFER_DONTBLOCK)
1063             return false;
1064
1065          /* submit to make bo really busy and map() correctly blocks */
1066          if (need_submit)
1067             ilo_cp_submit(ilo->cp, "syncing for transfers");
1068       }
1069
1070       if (resource_renamed)
1071          ilo_state_vector_resource_renamed(&ilo->state_vector, res);
1072    }
1073
1074    return true;
1075 }
1076
1077 static void
1078 buf_pwrite(struct ilo_context *ilo, struct pipe_resource *res,
1079            unsigned usage, int offset, int size, const void *data)
1080 {
1081    struct ilo_buffer *buf = ilo_buffer(res);
1082    bool need_submit;
1083
1084    /* see if we can avoid blocking */
1085    if (is_bo_busy(ilo, buf->bo, &need_submit)) {
1086       bool unblocked = false;
1087
1088       if ((usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) &&
1089           ilo_resource_rename_bo(res)) {
1090          ilo_state_vector_resource_renamed(&ilo->state_vector, res);
1091          unblocked = true;
1092       }
1093       else {
1094          struct pipe_resource templ, *staging;
1095
1096          /*
1097           * allocate a staging buffer to hold the data and pipelined copy it
1098           * over
1099           */
1100          templ = *res;
1101          templ.width0 = size;
1102          templ.usage = PIPE_USAGE_STAGING;
1103          templ.bind = PIPE_BIND_TRANSFER_WRITE;
1104          staging = ilo->base.screen->resource_create(ilo->base.screen, &templ);
1105          if (staging) {
1106             struct pipe_box staging_box;
1107
1108             intel_bo_pwrite(ilo_buffer(staging)->bo, 0, size, data);
1109
1110             u_box_1d(0, size, &staging_box);
1111             ilo_blitter_blt_copy_resource(ilo->blitter,
1112                   res, 0, offset, 0, 0,
1113                   staging, 0, &staging_box);
1114
1115             pipe_resource_reference(&staging, NULL);
1116
1117             return;
1118          }
1119       }
1120
1121       /* submit to make bo really busy and pwrite() correctly blocks */
1122       if (!unblocked && need_submit)
1123          ilo_cp_submit(ilo->cp, "syncing for pwrites");
1124    }
1125
1126    intel_bo_pwrite(buf->bo, offset, size, data);
1127 }
1128
1129 static void
1130 ilo_transfer_flush_region(struct pipe_context *pipe,
1131                           struct pipe_transfer *transfer,
1132                           const struct pipe_box *box)
1133 {
1134    struct ilo_context *ilo = ilo_context(pipe);
1135    struct ilo_transfer *xfer = ilo_transfer(transfer);
1136
1137    /*
1138     * The staging resource is mapped persistently and coherently.  We can copy
1139     * without unmapping.
1140     */
1141    if (xfer->method == ILO_TRANSFER_MAP_STAGING &&
1142        (xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
1143       copy_staging_resource(ilo, xfer, box);
1144 }
1145
1146 static void
1147 ilo_transfer_unmap(struct pipe_context *pipe,
1148                    struct pipe_transfer *transfer)
1149 {
1150    struct ilo_context *ilo = ilo_context(pipe);
1151    struct ilo_transfer *xfer = ilo_transfer(transfer);
1152
1153    xfer_unmap(xfer);
1154
1155    switch (xfer->method) {
1156    case ILO_TRANSFER_MAP_STAGING:
1157       if (!(xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
1158          copy_staging_resource(ilo, xfer, NULL);
1159       pipe_resource_reference(&xfer->staging.res, NULL);
1160       break;
1161    case ILO_TRANSFER_MAP_SW_CONVERT:
1162    case ILO_TRANSFER_MAP_SW_ZS:
1163       tex_staging_sys_writeback(xfer);
1164       align_free(xfer->staging.sys);
1165       break;
1166    default:
1167       break;
1168    }
1169
1170    pipe_resource_reference(&xfer->base.resource, NULL);
1171
1172    util_slab_free(&ilo->transfer_mempool, xfer);
1173 }
1174
1175 static void *
1176 ilo_transfer_map(struct pipe_context *pipe,
1177                  struct pipe_resource *res,
1178                  unsigned level,
1179                  unsigned usage,
1180                  const struct pipe_box *box,
1181                  struct pipe_transfer **transfer)
1182 {
1183    struct ilo_context *ilo = ilo_context(pipe);
1184    struct ilo_transfer *xfer;
1185    void *ptr;
1186
1187    /* note that xfer is not zero'd */
1188    xfer = util_slab_alloc(&ilo->transfer_mempool);
1189    if (!xfer) {
1190       *transfer = NULL;
1191       return NULL;
1192    }
1193
1194    xfer->base.resource = NULL;
1195    pipe_resource_reference(&xfer->base.resource, res);
1196    xfer->base.level = level;
1197    xfer->base.usage = usage;
1198    xfer->base.box = *box;
1199
1200    ilo_blit_resolve_transfer(ilo, &xfer->base);
1201
1202    if (choose_transfer_method(ilo, xfer)) {
1203       if (res->target == PIPE_BUFFER)
1204          ptr = buf_map(xfer);
1205       else
1206          ptr = tex_map(xfer);
1207    }
1208    else {
1209       ptr = NULL;
1210    }
1211
1212    if (!ptr) {
1213       pipe_resource_reference(&xfer->base.resource, NULL);
1214       util_slab_free(&ilo->transfer_mempool, xfer);
1215       *transfer = NULL;
1216       return NULL;
1217    }
1218
1219    *transfer = &xfer->base;
1220
1221    return ptr;
1222 }
1223
1224 static void
1225 ilo_transfer_inline_write(struct pipe_context *pipe,
1226                           struct pipe_resource *res,
1227                           unsigned level,
1228                           unsigned usage,
1229                           const struct pipe_box *box,
1230                           const void *data,
1231                           unsigned stride,
1232                           unsigned layer_stride)
1233 {
1234    if (likely(res->target == PIPE_BUFFER) &&
1235        !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
1236       /* they should specify just an offset and a size */
1237       assert(level == 0);
1238       assert(box->y == 0);
1239       assert(box->z == 0);
1240       assert(box->height == 1);
1241       assert(box->depth == 1);
1242
1243       buf_pwrite(ilo_context(pipe), res,
1244             usage, box->x, box->width, data);
1245    }
1246    else {
1247       u_default_transfer_inline_write(pipe, res,
1248             level, usage, box, data, stride, layer_stride);
1249    }
1250 }
1251
1252 /**
1253  * Initialize transfer-related functions.
1254  */
1255 void
1256 ilo_init_transfer_functions(struct ilo_context *ilo)
1257 {
1258    ilo->base.transfer_map = ilo_transfer_map;
1259    ilo->base.transfer_flush_region = ilo_transfer_flush_region;
1260    ilo->base.transfer_unmap = ilo_transfer_unmap;
1261    ilo->base.transfer_inline_write = ilo_transfer_inline_write;
1262 }