src/gallium/drivers/ilo/ilo_transfer.c

   1 /*
   2  * Mesa 3-D graphics library
   3  *
   4  * Copyright (C) 2012-2013 LunarG, Inc.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included
  14  * in all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22  * DEALINGS IN THE SOFTWARE.
  23  *
  24  * Authors:
  25  *    Chia-I Wu <olv@lunarg.com>
  26  */
  27
  28 #include "util/u_surface.h"
  29 #include "util/u_transfer.h"
  30 #include "util/u_format_etc.h"
  31
  32 #include "ilo_blit.h"
  33 #include "ilo_blitter.h"
  34 #include "ilo_cp.h"
  35 #include "ilo_context.h"
  36 #include "ilo_resource.h"
  37 #include "ilo_state.h"
  38 #include "ilo_transfer.h"
  39
  40 /*
  41  * For buffers that are not busy, we want to map/unmap them directly.  For
  42  * those that are busy, we have to worry about synchronization.  We could wait
  43  * for GPU to finish, but there are cases where we could avoid waiting.
  44  *
  45  *  - When PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE is set, the contents of the
  46  *    buffer can be discarded.  We can replace the backing bo by a new one of
  47  *    the same size (renaming).
  48  *  - When PIPE_TRANSFER_DISCARD_RANGE is set, the contents of the mapped
  49  *    range can be discarded.  We can allocate and map a staging bo on
  50  *    mapping, and (pipelined-)copy it over to the real bo on unmapping.
  51  *  - When PIPE_TRANSFER_FLUSH_EXPLICIT is set, there is no reading and only
  52  *    flushed regions need to be written.  We can still allocate and map a
  53  *    staging bo, but should copy only the flushed regions over.
  54  *
  55  * However, there are other flags to consider.
  56  *
  57  *  - When PIPE_TRANSFER_UNSYNCHRONIZED is set, we do not need to worry about
  58  *    synchronization at all on mapping.
  59  *  - When PIPE_TRANSFER_MAP_DIRECTLY is set, no staging area is allowed.
  60  *  - When PIPE_TRANSFER_DONTBLOCK is set, we should fail if we have to block.
  61  *  - When PIPE_TRANSFER_PERSISTENT is set, GPU may access the buffer while it
  62  *    is mapped.  Synchronization is done by defining memory barriers,
  63  *    explicitly via memory_barrier() or implicitly via
  64  *    transfer_flush_region(), as well as GPU fences.
  65  *  - When PIPE_TRANSFER_COHERENT is set, updates by either CPU or GPU should
  66  *    be made visible to the other side immediately.  Since the kernel flushes
  67  *    GPU caches at the end of each batch buffer, CPU always sees GPU updates.
  68  *    We could use a coherent mapping to make all persistent mappings
  69  *    coherent.
  70  *
  71  * These also apply to textures, except that we may additionally need to do
  72  * format conversion or tiling/untiling.
  73  */
  74
  75 /**
  76  * Return a transfer method suitable for the usage.  The returned method will
  77  * correctly block when the resource is busy.
  78  */
  79 static bool
  80 resource_get_transfer_method(struct pipe_resource *res,
  81                              const struct pipe_transfer *transfer,
  82                              enum ilo_transfer_map_method *method)
  83 {
  84    const struct ilo_screen *is = ilo_screen(res->screen);
  85    const unsigned usage = transfer->usage;
  86    enum ilo_transfer_map_method m;
  87    bool tiled;
  88
  89    if (res->target == PIPE_BUFFER) {
  90       tiled = false;
  91    } else {
  92       struct ilo_texture *tex = ilo_texture(res);
  93       bool need_convert = false;
  94
  95       /* we may need to convert on the fly */
  96       if (tex->image.tiling == GEN8_TILING_W || tex->separate_s8) {
  97          /* on GEN6, separate stencil is enabled only when HiZ is */
  98          if (ilo_dev_gen(&is->dev) >= ILO_GEN(7) ||
  99              ilo_texture_can_enable_hiz(tex, transfer->level,
 100                 transfer->box.z, transfer->box.depth)) {
 101             m = ILO_TRANSFER_MAP_SW_ZS;
 102             need_convert = true;
 103          }
 104       } else if (tex->image.format != tex->base.format) {
 105          m = ILO_TRANSFER_MAP_SW_CONVERT;
 106          need_convert = true;
 107       }
 108
 109       if (need_convert) {
 110          if (usage & (PIPE_TRANSFER_MAP_DIRECTLY | PIPE_TRANSFER_PERSISTENT))
 111             return false;
 112
 113          *method = m;
 114          return true;
 115       }
 116
 117       tiled = (tex->image.tiling != GEN6_TILING_NONE);
 118    }
 119
 120    if (tiled)
 121       m = ILO_TRANSFER_MAP_GTT; /* to have a linear view */
 122    else if (is->dev.has_llc)
 123       m = ILO_TRANSFER_MAP_CPU; /* fast and mostly coherent */
 124    else if (usage & PIPE_TRANSFER_PERSISTENT)
 125       m = ILO_TRANSFER_MAP_GTT; /* for coherency */
 126    else if (usage & PIPE_TRANSFER_READ)
 127       m = ILO_TRANSFER_MAP_CPU; /* gtt read is too slow */
 128    else
 129       m = ILO_TRANSFER_MAP_GTT;
 130
 131    *method = m;
 132
 133    return true;
 134 }
 135
 136 /**
 137  * Rename the bo of the resource.
 138  */
 139 static bool
 140 resource_rename_bo(struct pipe_resource *res)
 141 {
 142    return (res->target == PIPE_BUFFER) ?
 143       ilo_buffer_rename_bo(ilo_buffer(res)) :
 144       ilo_texture_rename_bo(ilo_texture(res));
 145 }
 146
 147 /**
 148  * Return true if usage allows the use of staging bo to avoid blocking.
 149  */
 150 static bool
 151 usage_allows_staging_bo(unsigned usage)
 152 {
 153    /* do we know how to write the data back to the resource? */
 154    const unsigned can_writeback = (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
 155                                    PIPE_TRANSFER_DISCARD_RANGE |
 156                                    PIPE_TRANSFER_FLUSH_EXPLICIT);
 157    const unsigned reasons_against = (PIPE_TRANSFER_READ |
 158                                      PIPE_TRANSFER_MAP_DIRECTLY |
 159                                      PIPE_TRANSFER_PERSISTENT);
 160
 161    return (usage & can_writeback) && !(usage & reasons_against);
 162 }
 163
 164 /**
 165  * Allocate the staging resource.  It is always linear and its size matches
 166  * the transfer box, with proper paddings.
 167  */
 168 static bool
 169 xfer_alloc_staging_res(struct ilo_transfer *xfer)
 170 {
 171    const struct pipe_resource *res = xfer->base.resource;
 172    const struct pipe_box *box = &xfer->base.box;
 173    struct pipe_resource templ;
 174
 175    memset(&templ, 0, sizeof(templ));
 176
 177    templ.format = res->format;
 178
 179    if (res->target == PIPE_BUFFER) {
 180       templ.target = PIPE_BUFFER;
 181       templ.width0 =
 182          (box->x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT) + box->width;
 183    }
 184    else {
 185       /* use 2D array for any texture target */
 186       templ.target = PIPE_TEXTURE_2D_ARRAY;
 187       templ.width0 = box->width;
 188    }
 189
 190    templ.height0 = box->height;
 191    templ.depth0 = 1;
 192    templ.array_size = box->depth;
 193    templ.nr_samples = 1;
 194    templ.usage = PIPE_USAGE_STAGING;
 195    templ.bind = PIPE_BIND_TRANSFER_WRITE;
 196
 197    if (xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT) {
 198       templ.flags = PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
 199                     PIPE_RESOURCE_FLAG_MAP_COHERENT;
 200    }
 201
 202    xfer->staging.res = res->screen->resource_create(res->screen, &templ);
 203
 204    if (xfer->staging.res && xfer->staging.res->target != PIPE_BUFFER) {
 205       assert(ilo_texture(xfer->staging.res)->image.tiling ==
 206             GEN6_TILING_NONE);
 207    }
 208
 209    return (xfer->staging.res != NULL);
 210 }
 211
 212 /**
 213  * Use an alternative transfer method or rename the resource to unblock an
 214  * otherwise blocking transfer.
 215  */
 216 static bool
 217 xfer_unblock(struct ilo_transfer *xfer, bool *resource_renamed)
 218 {
 219    struct pipe_resource *res = xfer->base.resource;
 220    bool unblocked = false, renamed = false;
 221
 222    switch (xfer->method) {
 223    case ILO_TRANSFER_MAP_CPU:
 224    case ILO_TRANSFER_MAP_GTT:
 225       if (xfer->base.usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
 226          xfer->method = ILO_TRANSFER_MAP_GTT_ASYNC;
 227          unblocked = true;
 228       }
 229       else if ((xfer->base.usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) &&
 230                resource_rename_bo(res)) {
 231          renamed = true;
 232          unblocked = true;
 233       }
 234       else if (usage_allows_staging_bo(xfer->base.usage) &&
 235                xfer_alloc_staging_res(xfer)) {
 236          xfer->method = ILO_TRANSFER_MAP_STAGING;
 237          unblocked = true;
 238       }
 239       break;
 240    case ILO_TRANSFER_MAP_GTT_ASYNC:
 241    case ILO_TRANSFER_MAP_STAGING:
 242       unblocked = true;
 243       break;
 244    default:
 245       break;
 246    }
 247
 248    *resource_renamed = renamed;
 249
 250    return unblocked;
 251 }
 252
 253 /**
 254  * Allocate the staging system buffer based on the resource format and the
 255  * transfer box.
 256  */
 257 static bool
 258 xfer_alloc_staging_sys(struct ilo_transfer *xfer)
 259 {
 260    const enum pipe_format format = xfer->base.resource->format;
 261    const struct pipe_box *box = &xfer->base.box;
 262    const unsigned alignment = 64;
 263
 264    /* need to tell the world the layout */
 265    xfer->base.stride =
 266       align(util_format_get_stride(format, box->width), alignment);
 267    xfer->base.layer_stride =
 268       util_format_get_2d_size(format, xfer->base.stride, box->height);
 269
 270    xfer->staging.sys =
 271       align_malloc(xfer->base.layer_stride * box->depth, alignment);
 272
 273    return (xfer->staging.sys != NULL);
 274 }
 275
 276 /**
 277  * Map according to the method.  The staging system buffer should have been
 278  * allocated if the method requires it.
 279  */
 280 static void *
 281 xfer_map(struct ilo_transfer *xfer)
 282 {
 283    void *ptr;
 284
 285    switch (xfer->method) {
 286    case ILO_TRANSFER_MAP_CPU:
 287       ptr = intel_bo_map(ilo_resource_get_bo(xfer->base.resource),
 288             xfer->base.usage & PIPE_TRANSFER_WRITE);
 289       break;
 290    case ILO_TRANSFER_MAP_GTT:
 291       ptr = intel_bo_map_gtt(ilo_resource_get_bo(xfer->base.resource));
 292       break;
 293    case ILO_TRANSFER_MAP_GTT_ASYNC:
 294       ptr = intel_bo_map_gtt_async(ilo_resource_get_bo(xfer->base.resource));
 295       break;
 296    case ILO_TRANSFER_MAP_STAGING:
 297       {
 298          const struct ilo_screen *is = ilo_screen(xfer->staging.res->screen);
 299          struct intel_bo *bo = ilo_resource_get_bo(xfer->staging.res);
 300
 301          /*
 302           * We want a writable, optionally persistent and coherent, mapping
 303           * for a linear bo.  We can call resource_get_transfer_method(), but
 304           * this turns out to be fairly simple.
 305           */
 306          if (is->dev.has_llc)
 307             ptr = intel_bo_map(bo, true);
 308          else
 309             ptr = intel_bo_map_gtt(bo);
 310
 311          if (ptr && xfer->staging.res->target == PIPE_BUFFER)
 312             ptr += (xfer->base.box.x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT);
 313
 314       }
 315       break;
 316    case ILO_TRANSFER_MAP_SW_CONVERT:
 317    case ILO_TRANSFER_MAP_SW_ZS:
 318       ptr = xfer->staging.sys;
 319       break;
 320    default:
 321       assert(!"unknown mapping method");
 322       ptr = NULL;
 323       break;
 324    }
 325
 326    return ptr;
 327 }
 328
 329 /**
 330  * Unmap a transfer.
 331  */
 332 static void
 333 xfer_unmap(struct ilo_transfer *xfer)
 334 {
 335    switch (xfer->method) {
 336    case ILO_TRANSFER_MAP_CPU:
 337    case ILO_TRANSFER_MAP_GTT:
 338    case ILO_TRANSFER_MAP_GTT_ASYNC:
 339       intel_bo_unmap(ilo_resource_get_bo(xfer->base.resource));
 340       break;
 341    case ILO_TRANSFER_MAP_STAGING:
 342       intel_bo_unmap(ilo_resource_get_bo(xfer->staging.res));
 343       break;
 344    default:
 345       break;
 346    }
 347 }
 348
 349 static void
 350 tex_get_box_origin(const struct ilo_texture *tex,
 351                    unsigned level, unsigned slice,
 352                    const struct pipe_box *box,
 353                    unsigned *mem_x, unsigned *mem_y)
 354 {
 355    unsigned x, y;
 356
 357    ilo_image_get_slice_pos(&tex->image, level, box->z + slice, &x, &y);
 358    x += box->x;
 359    y += box->y;
 360
 361    ilo_image_pos_to_mem(&tex->image, x, y, mem_x, mem_y);
 362 }
 363
 364 static unsigned
 365 tex_get_box_offset(const struct ilo_texture *tex, unsigned level,
 366                    const struct pipe_box *box)
 367 {
 368    unsigned mem_x, mem_y;
 369
 370    tex_get_box_origin(tex, level, 0, box, &mem_x, &mem_y);
 371
 372    return ilo_image_mem_to_linear(&tex->image, mem_x, mem_y);
 373 }
 374
 375 static unsigned
 376 tex_get_slice_stride(const struct ilo_texture *tex, unsigned level)
 377 {
 378    return ilo_image_get_slice_stride(&tex->image, level);
 379 }
 380
 381 static unsigned
 382 tex_tile_x_swizzle(unsigned addr)
 383 {
 384    /*
 385     * From the Ivy Bridge PRM, volume 1 part 2, page 24:
 386     *
 387     *     "As shown in the tiling algorithm, the new address bit[6] should be:
 388     *
 389     *        Address bit[6] <= TiledAddr bit[6] XOR
 390     *                          TiledAddr bit[9] XOR
 391     *                          TiledAddr bit[10]"
 392     */
 393    return addr ^ (((addr >> 3) ^ (addr >> 4)) & 0x40);
 394 }
 395
 396 static unsigned
 397 tex_tile_y_swizzle(unsigned addr)
 398 {
 399    /*
 400     * From the Ivy Bridge PRM, volume 1 part 2, page 24:
 401     *
 402     *     "As shown in the tiling algorithm, The new address bit[6] becomes:
 403     *
 404     *        Address bit[6] <= TiledAddr bit[6] XOR
 405     *                          TiledAddr bit[9]"
 406     */
 407    return addr ^ ((addr >> 3) & 0x40);
 408 }
 409
 410 static unsigned
 411 tex_tile_x_offset(unsigned mem_x, unsigned mem_y,
 412                   unsigned tiles_per_row, bool swizzle)
 413 {
 414    /*
 415     * From the Sandy Bridge PRM, volume 1 part 2, page 21, we know that a
 416     * X-major tile has 8 rows and 32 OWord columns (512 bytes).  Tiles in the
 417     * tiled region are numbered in row-major order, starting from zero.  The
 418     * tile number can thus be calculated as follows:
 419     *
 420     *    tile = (mem_y / 8) * tiles_per_row + (mem_x / 512)
 421     *
 422     * OWords in that tile are also numbered in row-major order, starting from
 423     * zero.  The OWord number can thus be calculated as follows:
 424     *
 425     *    oword = (mem_y % 8) * 32 + ((mem_x % 512) / 16)
 426     *
 427     * and the tiled offset is
 428     *
 429     *    offset = tile * 4096 + oword * 16 + (mem_x % 16)
 430     *           = tile * 4096 + (mem_y % 8) * 512 + (mem_x % 512)
 431     */
 432    unsigned tile, offset;
 433
 434    tile = (mem_y >> 3) * tiles_per_row + (mem_x >> 9);
 435    offset = tile << 12 | (mem_y & 0x7) << 9 | (mem_x & 0x1ff);
 436
 437    return (swizzle) ? tex_tile_x_swizzle(offset) : offset;
 438 }
 439
 440 static unsigned
 441 tex_tile_y_offset(unsigned mem_x, unsigned mem_y,
 442                   unsigned tiles_per_row, bool swizzle)
 443 {
 444    /*
 445     * From the Sandy Bridge PRM, volume 1 part 2, page 22, we know that a
 446     * Y-major tile has 32 rows and 8 OWord columns (128 bytes).  Tiles in the
 447     * tiled region are numbered in row-major order, starting from zero.  The
 448     * tile number can thus be calculated as follows:
 449     *
 450     *    tile = (mem_y / 32) * tiles_per_row + (mem_x / 128)
 451     *
 452     * OWords in that tile are numbered in column-major order, starting from
 453     * zero.  The OWord number can thus be calculated as follows:
 454     *
 455     *    oword = ((mem_x % 128) / 16) * 32 + (mem_y % 32)
 456     *
 457     * and the tiled offset is
 458     *
 459     *    offset = tile * 4096 + oword * 16 + (mem_x % 16)
 460     */
 461    unsigned tile, oword, offset;
 462
 463    tile = (mem_y >> 5) * tiles_per_row + (mem_x >> 7);
 464    oword = (mem_x & 0x70) << 1 | (mem_y & 0x1f);
 465    offset = tile << 12 | oword << 4 | (mem_x & 0xf);
 466
 467    return (swizzle) ? tex_tile_y_swizzle(offset) : offset;
 468 }
 469
 470 static unsigned
 471 tex_tile_w_offset(unsigned mem_x, unsigned mem_y,
 472                   unsigned tiles_per_row, bool swizzle)
 473 {
 474    /*
 475     * From the Sandy Bridge PRM, volume 1 part 2, page 23, we know that a
 476     * W-major tile has 8 8x8-block rows and 8 8x8-block columns.  Tiles in the
 477     * tiled region are numbered in row-major order, starting from zero.  The
 478     * tile number can thus be calculated as follows:
 479     *
 480     *    tile = (mem_y / 64) * tiles_per_row + (mem_x / 64)
 481     *
 482     * 8x8-blocks in that tile are numbered in column-major order, starting
 483     * from zero.  The 8x8-block number can thus be calculated as follows:
 484     *
 485     *    blk8 = ((mem_x % 64) / 8) * 8 + ((mem_y % 64) / 8)
 486     *
 487     * Each 8x8-block is divided into 4 4x4-blocks, in row-major order.  Each
 488     * 4x4-block is further divided into 4 2x2-blocks, also in row-major order.
 489     * We have
 490     *
 491     *    blk4 = (((mem_y % 64) / 4) & 1) * 2 + (((mem_x % 64) / 4) & 1)
 492     *    blk2 = (((mem_y % 64) / 2) & 1) * 2 + (((mem_x % 64) / 2) & 1)
 493     *    blk1 = (((mem_y % 64)    ) & 1) * 2 + (((mem_x % 64)    ) & 1)
 494     *
 495     * and the tiled offset is
 496     *
 497     *    offset = tile * 4096 + blk8 * 64 + blk4 * 16 + blk2 * 4 + blk1
 498     */
 499    unsigned tile, blk8, blk4, blk2, blk1, offset;
 500
 501    tile = (mem_y >> 6) * tiles_per_row + (mem_x >> 6);
 502    blk8 = ((mem_x >> 3) & 0x7) << 3 | ((mem_y >> 3) & 0x7);
 503    blk4 = ((mem_y >> 2) & 0x1) << 1 | ((mem_x >> 2) & 0x1);
 504    blk2 = ((mem_y >> 1) & 0x1) << 1 | ((mem_x >> 1) & 0x1);
 505    blk1 = ((mem_y     ) & 0x1) << 1 | ((mem_x     ) & 0x1);
 506    offset = tile << 12 | blk8 << 6 | blk4 << 4 | blk2 << 2 | blk1;
 507
 508    return (swizzle) ? tex_tile_y_swizzle(offset) : offset;
 509 }
 510
 511 static unsigned
 512 tex_tile_none_offset(unsigned mem_x, unsigned mem_y,
 513                      unsigned tiles_per_row, bool swizzle)
 514 {
 515    return mem_y * tiles_per_row + mem_x;
 516 }
 517
 518 typedef unsigned (*tex_tile_offset_func)(unsigned mem_x, unsigned mem_y,
 519                                          unsigned tiles_per_row,
 520                                          bool swizzle);
 521
 522 static tex_tile_offset_func
 523 tex_tile_choose_offset_func(const struct ilo_texture *tex,
 524                             unsigned *tiles_per_row)
 525 {
 526    switch (tex->image.tiling) {
 527    default:
 528       assert(!"unknown tiling");
 529       /* fall through */
 530    case GEN6_TILING_NONE:
 531       *tiles_per_row = tex->image.bo_stride;
 532       return tex_tile_none_offset;
 533    case GEN6_TILING_X:
 534       *tiles_per_row = tex->image.bo_stride / 512;
 535       return tex_tile_x_offset;
 536    case GEN6_TILING_Y:
 537       *tiles_per_row = tex->image.bo_stride / 128;
 538       return tex_tile_y_offset;
 539    case GEN8_TILING_W:
 540       *tiles_per_row = tex->image.bo_stride / 64;
 541       return tex_tile_w_offset;
 542    }
 543 }
 544
 545 static void *
 546 tex_staging_sys_map_bo(struct ilo_texture *tex,
 547                        bool for_read_back,
 548                        bool linear_view)
 549 {
 550    const struct ilo_screen *is = ilo_screen(tex->base.screen);
 551    const bool prefer_cpu = (is->dev.has_llc || for_read_back);
 552    void *ptr;
 553
 554    if (prefer_cpu && (tex->image.tiling == GEN6_TILING_NONE ||
 555                       !linear_view))
 556       ptr = intel_bo_map(tex->bo, !for_read_back);
 557    else
 558       ptr = intel_bo_map_gtt(tex->bo);
 559
 560    return ptr;
 561 }
 562
 563 static void
 564 tex_staging_sys_unmap_bo(struct ilo_texture *tex)
 565 {
 566    intel_bo_unmap(tex->bo);
 567 }
 568
 569 static bool
 570 tex_staging_sys_zs_read(struct ilo_texture *tex,
 571                         const struct ilo_transfer *xfer)
 572 {
 573    const struct ilo_screen *is = ilo_screen(tex->base.screen);
 574    const bool swizzle = is->dev.has_address_swizzling;
 575    const struct pipe_box *box = &xfer->base.box;
 576    const uint8_t *src;
 577    tex_tile_offset_func tile_offset;
 578    unsigned tiles_per_row;
 579    int slice;
 580
 581    src = tex_staging_sys_map_bo(tex, true, false);
 582    if (!src)
 583       return false;
 584
 585    tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row);
 586
 587    assert(tex->image.block_width == 1 && tex->image.block_height == 1);
 588
 589    if (tex->separate_s8) {
 590       struct ilo_texture *s8_tex = tex->separate_s8;
 591       const uint8_t *s8_src;
 592       tex_tile_offset_func s8_tile_offset;
 593       unsigned s8_tiles_per_row;
 594       int dst_cpp, dst_s8_pos, src_cpp_used;
 595
 596       s8_src = tex_staging_sys_map_bo(s8_tex, true, false);
 597       if (!s8_src) {
 598          tex_staging_sys_unmap_bo(tex);
 599          return false;
 600       }
 601
 602       s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
 603
 604       if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
 605          assert(tex->image.format == PIPE_FORMAT_Z24X8_UNORM);
 606
 607          dst_cpp = 4;
 608          dst_s8_pos = 3;
 609          src_cpp_used = 3;
 610       }
 611       else {
 612          assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
 613          assert(tex->image.format == PIPE_FORMAT_Z32_FLOAT);
 614
 615          dst_cpp = 8;
 616          dst_s8_pos = 4;
 617          src_cpp_used = 4;
 618       }
 619
 620       for (slice = 0; slice < box->depth; slice++) {
 621          unsigned mem_x, mem_y, s8_mem_x, s8_mem_y;
 622          uint8_t *dst;
 623          int i, j;
 624
 625          tex_get_box_origin(tex, xfer->base.level, slice,
 626                             box, &mem_x, &mem_y);
 627          tex_get_box_origin(s8_tex, xfer->base.level, slice,
 628                             box, &s8_mem_x, &s8_mem_y);
 629
 630          dst = xfer->staging.sys + xfer->base.layer_stride * slice;
 631
 632          for (i = 0; i < box->height; i++) {
 633             unsigned x = mem_x, s8_x = s8_mem_x;
 634             uint8_t *d = dst;
 635
 636             for (j = 0; j < box->width; j++) {
 637                const unsigned offset =
 638                   tile_offset(x, mem_y, tiles_per_row, swizzle);
 639                const unsigned s8_offset =
 640                   s8_tile_offset(s8_x, s8_mem_y, s8_tiles_per_row, swizzle);
 641
 642                memcpy(d, src + offset, src_cpp_used);
 643                d[dst_s8_pos] = s8_src[s8_offset];
 644
 645                d += dst_cpp;
 646                x += tex->image.block_size;
 647                s8_x++;
 648             }
 649
 650             dst += xfer->base.stride;
 651             mem_y++;
 652             s8_mem_y++;
 653          }
 654       }
 655
 656       tex_staging_sys_unmap_bo(s8_tex);
 657    }
 658    else {
 659       assert(tex->image.format == PIPE_FORMAT_S8_UINT);
 660
 661       for (slice = 0; slice < box->depth; slice++) {
 662          unsigned mem_x, mem_y;
 663          uint8_t *dst;
 664          int i, j;
 665
 666          tex_get_box_origin(tex, xfer->base.level, slice,
 667                             box, &mem_x, &mem_y);
 668
 669          dst = xfer->staging.sys + xfer->base.layer_stride * slice;
 670
 671          for (i = 0; i < box->height; i++) {
 672             unsigned x = mem_x;
 673             uint8_t *d = dst;
 674
 675             for (j = 0; j < box->width; j++) {
 676                const unsigned offset =
 677                   tile_offset(x, mem_y, tiles_per_row, swizzle);
 678
 679                *d = src[offset];
 680
 681                d++;
 682                x++;
 683             }
 684
 685             dst += xfer->base.stride;
 686             mem_y++;
 687          }
 688       }
 689    }
 690
 691    tex_staging_sys_unmap_bo(tex);
 692
 693    return true;
 694 }
 695
 696 static bool
 697 tex_staging_sys_zs_write(struct ilo_texture *tex,
 698                          const struct ilo_transfer *xfer)
 699 {
 700    const struct ilo_screen *is = ilo_screen(tex->base.screen);
 701    const bool swizzle = is->dev.has_address_swizzling;
 702    const struct pipe_box *box = &xfer->base.box;
 703    uint8_t *dst;
 704    tex_tile_offset_func tile_offset;
 705    unsigned tiles_per_row;
 706    int slice;
 707
 708    dst = tex_staging_sys_map_bo(tex, false, false);
 709    if (!dst)
 710       return false;
 711
 712    tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row);
 713
 714    assert(tex->image.block_width == 1 && tex->image.block_height == 1);
 715
 716    if (tex->separate_s8) {
 717       struct ilo_texture *s8_tex = tex->separate_s8;
 718       uint8_t *s8_dst;
 719       tex_tile_offset_func s8_tile_offset;
 720       unsigned s8_tiles_per_row;
 721       int src_cpp, src_s8_pos, dst_cpp_used;
 722
 723       s8_dst = tex_staging_sys_map_bo(s8_tex, false, false);
 724       if (!s8_dst) {
 725          tex_staging_sys_unmap_bo(s8_tex);
 726          return false;
 727       }
 728
 729       s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
 730
 731       if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
 732          assert(tex->image.format == PIPE_FORMAT_Z24X8_UNORM);
 733
 734          src_cpp = 4;
 735          src_s8_pos = 3;
 736          dst_cpp_used = 3;
 737       }
 738       else {
 739          assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
 740          assert(tex->image.format == PIPE_FORMAT_Z32_FLOAT);
 741
 742          src_cpp = 8;
 743          src_s8_pos = 4;
 744          dst_cpp_used = 4;
 745       }
 746
 747       for (slice = 0; slice < box->depth; slice++) {
 748          unsigned mem_x, mem_y, s8_mem_x, s8_mem_y;
 749          const uint8_t *src;
 750          int i, j;
 751
 752          tex_get_box_origin(tex, xfer->base.level, slice,
 753                             box, &mem_x, &mem_y);
 754          tex_get_box_origin(s8_tex, xfer->base.level, slice,
 755                             box, &s8_mem_x, &s8_mem_y);
 756
 757          src = xfer->staging.sys + xfer->base.layer_stride * slice;
 758
 759          for (i = 0; i < box->height; i++) {
 760             unsigned x = mem_x, s8_x = s8_mem_x;
 761             const uint8_t *s = src;
 762
 763             for (j = 0; j < box->width; j++) {
 764                const unsigned offset =
 765                   tile_offset(x, mem_y, tiles_per_row, swizzle);
 766                const unsigned s8_offset =
 767                   s8_tile_offset(s8_x, s8_mem_y, s8_tiles_per_row, swizzle);
 768
 769                memcpy(dst + offset, s, dst_cpp_used);
 770                s8_dst[s8_offset] = s[src_s8_pos];
 771
 772                s += src_cpp;
 773                x += tex->image.block_size;
 774                s8_x++;
 775             }
 776
 777             src += xfer->base.stride;
 778             mem_y++;
 779             s8_mem_y++;
 780          }
 781       }
 782
 783       tex_staging_sys_unmap_bo(s8_tex);
 784    }
 785    else {
 786       assert(tex->image.format == PIPE_FORMAT_S8_UINT);
 787
 788       for (slice = 0; slice < box->depth; slice++) {
 789          unsigned mem_x, mem_y;
 790          const uint8_t *src;
 791          int i, j;
 792
 793          tex_get_box_origin(tex, xfer->base.level, slice,
 794                             box, &mem_x, &mem_y);
 795
 796          src = xfer->staging.sys + xfer->base.layer_stride * slice;
 797
 798          for (i = 0; i < box->height; i++) {
 799             unsigned x = mem_x;
 800             const uint8_t *s = src;
 801
 802             for (j = 0; j < box->width; j++) {
 803                const unsigned offset =
 804                   tile_offset(x, mem_y, tiles_per_row, swizzle);
 805
 806                dst[offset] = *s;
 807
 808                s++;
 809                x++;
 810             }
 811
 812             src += xfer->base.stride;
 813             mem_y++;
 814          }
 815       }
 816    }
 817
 818    tex_staging_sys_unmap_bo(tex);
 819
 820    return true;
 821 }
 822
 823 static bool
 824 tex_staging_sys_convert_write(struct ilo_texture *tex,
 825                               const struct ilo_transfer *xfer)
 826 {
 827    const struct pipe_box *box = &xfer->base.box;
 828    unsigned dst_slice_stride;
 829    void *dst;
 830    int slice;
 831
 832    dst = tex_staging_sys_map_bo(tex, false, true);
 833    if (!dst)
 834       return false;
 835
 836    dst += tex_get_box_offset(tex, xfer->base.level, box);
 837
 838    /* slice stride is not always available */
 839    if (box->depth > 1)
 840       dst_slice_stride = tex_get_slice_stride(tex, xfer->base.level);
 841    else
 842       dst_slice_stride = 0;
 843
 844    if (unlikely(tex->image.format == tex->base.format)) {
 845       util_copy_box(dst, tex->image.format, tex->image.bo_stride,
 846             dst_slice_stride, 0, 0, 0, box->width, box->height, box->depth,
 847             xfer->staging.sys, xfer->base.stride, xfer->base.layer_stride,
 848             0, 0, 0);
 849
 850       tex_staging_sys_unmap_bo(tex);
 851
 852       return true;
 853    }
 854
 855    switch (tex->base.format) {
 856    case PIPE_FORMAT_ETC1_RGB8:
 857       assert(tex->image.format == PIPE_FORMAT_R8G8B8X8_UNORM);
 858
 859       for (slice = 0; slice < box->depth; slice++) {
 860          const void *src =
 861             xfer->staging.sys + xfer->base.layer_stride * slice;
 862
 863          util_format_etc1_rgb8_unpack_rgba_8unorm(dst,
 864                tex->image.bo_stride, src, xfer->base.stride,
 865                box->width, box->height);
 866
 867          dst += dst_slice_stride;
 868       }
 869       break;
 870    default:
 871       assert(!"unable to convert the staging data");
 872       break;
 873    }
 874
 875    tex_staging_sys_unmap_bo(tex);
 876
 877    return true;
 878 }
 879
 880 static void
 881 tex_staging_sys_writeback(struct ilo_transfer *xfer)
 882 {
 883    struct ilo_texture *tex = ilo_texture(xfer->base.resource);
 884    bool success;
 885
 886    if (!(xfer->base.usage & PIPE_TRANSFER_WRITE))
 887       return;
 888
 889    switch (xfer->method) {
 890    case ILO_TRANSFER_MAP_SW_CONVERT:
 891       success = tex_staging_sys_convert_write(tex, xfer);
 892       break;
 893    case ILO_TRANSFER_MAP_SW_ZS:
 894       success = tex_staging_sys_zs_write(tex, xfer);
 895       break;
 896    default:
 897       assert(!"unknown mapping method");
 898       success = false;
 899       break;
 900    }
 901
 902    if (!success)
 903       ilo_err("failed to map resource for moving staging data\n");
 904 }
 905
 906 static bool
 907 tex_staging_sys_readback(struct ilo_transfer *xfer)
 908 {
 909    struct ilo_texture *tex = ilo_texture(xfer->base.resource);
 910    bool read_back = false, success;
 911
 912    /* see if we need to read the resource back */
 913    if (xfer->base.usage & PIPE_TRANSFER_READ) {
 914       read_back = true;
 915    }
 916    else if (xfer->base.usage & PIPE_TRANSFER_WRITE) {
 917       const unsigned discard_flags =
 918          (PIPE_TRANSFER_DISCARD_RANGE | PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE);
 919
 920       if (!(xfer->base.usage & discard_flags))
 921          read_back = true;
 922    }
 923
 924    if (!read_back)
 925       return true;
 926
 927    switch (xfer->method) {
 928    case ILO_TRANSFER_MAP_SW_CONVERT:
 929       assert(!"no on-the-fly format conversion for mapping");
 930       success = false;
 931       break;
 932    case ILO_TRANSFER_MAP_SW_ZS:
 933       success = tex_staging_sys_zs_read(tex, xfer);
 934       break;
 935    default:
 936       assert(!"unknown mapping method");
 937       success = false;
 938       break;
 939    }
 940
 941    return success;
 942 }
 943
 944 static void *
 945 tex_map(struct ilo_transfer *xfer)
 946 {
 947    void *ptr;
 948
 949    switch (xfer->method) {
 950    case ILO_TRANSFER_MAP_CPU:
 951    case ILO_TRANSFER_MAP_GTT:
 952    case ILO_TRANSFER_MAP_GTT_ASYNC:
 953       ptr = xfer_map(xfer);
 954       if (ptr) {
 955          const struct ilo_texture *tex = ilo_texture(xfer->base.resource);
 956
 957          ptr += tex_get_box_offset(tex, xfer->base.level, &xfer->base.box);
 958
 959          /* stride is for a block row, not a texel row */
 960          xfer->base.stride = tex->image.bo_stride;
 961          /* note that slice stride is not always available */
 962          xfer->base.layer_stride = (xfer->base.box.depth > 1) ?
 963             tex_get_slice_stride(tex, xfer->base.level) : 0;
 964       }
 965       break;
 966    case ILO_TRANSFER_MAP_STAGING:
 967       ptr = xfer_map(xfer);
 968       if (ptr) {
 969          const struct ilo_texture *staging = ilo_texture(xfer->staging.res);
 970          xfer->base.stride = staging->image.bo_stride;
 971          xfer->base.layer_stride = tex_get_slice_stride(staging, 0);
 972       }
 973       break;
 974    case ILO_TRANSFER_MAP_SW_CONVERT:
 975    case ILO_TRANSFER_MAP_SW_ZS:
 976       if (xfer_alloc_staging_sys(xfer) && tex_staging_sys_readback(xfer))
 977          ptr = xfer_map(xfer);
 978       else
 979          ptr = NULL;
 980       break;
 981    default:
 982       assert(!"unknown mapping method");
 983       ptr = NULL;
 984       break;
 985    }
 986
 987    return ptr;
 988 }
 989
 990 static void *
 991 buf_map(struct ilo_transfer *xfer)
 992 {
 993    void *ptr;
 994
 995    ptr = xfer_map(xfer);
 996    if (!ptr)
 997       return NULL;
 998
 999    if (xfer->method != ILO_TRANSFER_MAP_STAGING)
1000       ptr += xfer->base.box.x;
1001
1002    xfer->base.stride = 0;
1003    xfer->base.layer_stride = 0;
1004
1005    assert(xfer->base.level == 0);
1006    assert(xfer->base.box.y == 0);
1007    assert(xfer->base.box.z == 0);
1008    assert(xfer->base.box.height == 1);
1009    assert(xfer->base.box.depth == 1);
1010
1011    return ptr;
1012 }
1013
1014 static void
1015 copy_staging_resource(struct ilo_context *ilo,
1016                       struct ilo_transfer *xfer,
1017                       const struct pipe_box *box)
1018 {
1019    const unsigned pad_x = (xfer->staging.res->target == PIPE_BUFFER) ?
1020       xfer->base.box.x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT : 0;
1021    struct pipe_box modified_box;
1022
1023    assert(xfer->method == ILO_TRANSFER_MAP_STAGING && xfer->staging.res);
1024
1025    if (!box) {
1026       u_box_3d(pad_x, 0, 0, xfer->base.box.width, xfer->base.box.height,
1027             xfer->base.box.depth, &modified_box);
1028       box = &modified_box;
1029    }
1030    else if (pad_x) {
1031       modified_box = *box;
1032       modified_box.x += pad_x;
1033       box = &modified_box;
1034    }
1035
1036    ilo_blitter_blt_copy_resource(ilo->blitter,
1037          xfer->base.resource, xfer->base.level,
1038          xfer->base.box.x, xfer->base.box.y, xfer->base.box.z,
1039          xfer->staging.res, 0, box);
1040 }
1041
1042 static bool
1043 is_bo_busy(struct ilo_context *ilo, struct intel_bo *bo, bool *need_submit)
1044 {
1045    const bool referenced = ilo_builder_has_reloc(&ilo->cp->builder, bo);
1046
1047    if (need_submit)
1048       *need_submit = referenced;
1049
1050    if (referenced)
1051       return true;
1052
1053    return intel_bo_is_busy(bo);
1054 }
1055
1056 /**
1057  * Choose the best mapping method, depending on the transfer usage and whether
1058  * the bo is busy.
1059  */
1060 static bool
1061 choose_transfer_method(struct ilo_context *ilo, struct ilo_transfer *xfer)
1062 {
1063    struct pipe_resource *res = xfer->base.resource;
1064    bool need_submit;
1065
1066    if (!resource_get_transfer_method(res, &xfer->base, &xfer->method))
1067       return false;
1068
1069    /* see if we can avoid blocking */
1070    if (is_bo_busy(ilo, ilo_resource_get_bo(res), &need_submit)) {
1071       bool resource_renamed;
1072
1073       if (!xfer_unblock(xfer, &resource_renamed)) {
1074          if (xfer->base.usage & PIPE_TRANSFER_DONTBLOCK)
1075             return false;
1076
1077          /* submit to make bo really busy and map() correctly blocks */
1078          if (need_submit)
1079             ilo_cp_submit(ilo->cp, "syncing for transfers");
1080       }
1081
1082       if (resource_renamed)
1083          ilo_state_vector_resource_renamed(&ilo->state_vector, res);
1084    }
1085
1086    return true;
1087 }
1088
1089 static void
1090 buf_pwrite(struct ilo_context *ilo, struct ilo_buffer *buf,
1091            unsigned usage, int offset, int size, const void *data)
1092 {
1093    bool need_submit;
1094
1095    /* see if we can avoid blocking */
1096    if (is_bo_busy(ilo, buf->bo, &need_submit)) {
1097       bool unblocked = false;
1098
1099       if ((usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) &&
1100           ilo_buffer_rename_bo(buf)) {
1101          ilo_state_vector_resource_renamed(&ilo->state_vector, &buf->base);
1102          unblocked = true;
1103       }
1104       else {
1105          struct pipe_resource templ, *staging;
1106
1107          /*
1108           * allocate a staging buffer to hold the data and pipelined copy it
1109           * over
1110           */
1111          templ = buf->base;
1112          templ.width0 = size;
1113          templ.usage = PIPE_USAGE_STAGING;
1114          templ.bind = PIPE_BIND_TRANSFER_WRITE;
1115          staging = ilo->base.screen->resource_create(ilo->base.screen, &templ);
1116          if (staging) {
1117             struct pipe_box staging_box;
1118
1119             intel_bo_pwrite(ilo_buffer(staging)->bo, 0, size, data);
1120
1121             u_box_1d(0, size, &staging_box);
1122             ilo_blitter_blt_copy_resource(ilo->blitter,
1123                   &buf->base, 0, offset, 0, 0,
1124                   staging, 0, &staging_box);
1125
1126             pipe_resource_reference(&staging, NULL);
1127
1128             return;
1129          }
1130       }
1131
1132       /* submit to make bo really busy and pwrite() correctly blocks */
1133       if (!unblocked && need_submit)
1134          ilo_cp_submit(ilo->cp, "syncing for pwrites");
1135    }
1136
1137    intel_bo_pwrite(buf->bo, offset, size, data);
1138 }
1139
1140 static void
1141 ilo_transfer_flush_region(struct pipe_context *pipe,
1142                           struct pipe_transfer *transfer,
1143                           const struct pipe_box *box)
1144 {
1145    struct ilo_context *ilo = ilo_context(pipe);
1146    struct ilo_transfer *xfer = ilo_transfer(transfer);
1147
1148    /*
1149     * The staging resource is mapped persistently and coherently.  We can copy
1150     * without unmapping.
1151     */
1152    if (xfer->method == ILO_TRANSFER_MAP_STAGING &&
1153        (xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
1154       copy_staging_resource(ilo, xfer, box);
1155 }
1156
1157 static void
1158 ilo_transfer_unmap(struct pipe_context *pipe,
1159                    struct pipe_transfer *transfer)
1160 {
1161    struct ilo_context *ilo = ilo_context(pipe);
1162    struct ilo_transfer *xfer = ilo_transfer(transfer);
1163
1164    xfer_unmap(xfer);
1165
1166    switch (xfer->method) {
1167    case ILO_TRANSFER_MAP_STAGING:
1168       if (!(xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
1169          copy_staging_resource(ilo, xfer, NULL);
1170       pipe_resource_reference(&xfer->staging.res, NULL);
1171       break;
1172    case ILO_TRANSFER_MAP_SW_CONVERT:
1173    case ILO_TRANSFER_MAP_SW_ZS:
1174       tex_staging_sys_writeback(xfer);
1175       align_free(xfer->staging.sys);
1176       break;
1177    default:
1178       break;
1179    }
1180
1181    pipe_resource_reference(&xfer->base.resource, NULL);
1182
1183    util_slab_free(&ilo->transfer_mempool, xfer);
1184 }
1185
1186 static void *
1187 ilo_transfer_map(struct pipe_context *pipe,
1188                  struct pipe_resource *res,
1189                  unsigned level,
1190                  unsigned usage,
1191                  const struct pipe_box *box,
1192                  struct pipe_transfer **transfer)
1193 {
1194    struct ilo_context *ilo = ilo_context(pipe);
1195    struct ilo_transfer *xfer;
1196    void *ptr;
1197
1198    /* note that xfer is not zero'd */
1199    xfer = util_slab_alloc(&ilo->transfer_mempool);
1200    if (!xfer) {
1201       *transfer = NULL;
1202       return NULL;
1203    }
1204
1205    xfer->base.resource = NULL;
1206    pipe_resource_reference(&xfer->base.resource, res);
1207    xfer->base.level = level;
1208    xfer->base.usage = usage;
1209    xfer->base.box = *box;
1210
1211    ilo_blit_resolve_transfer(ilo, &xfer->base);
1212
1213    if (choose_transfer_method(ilo, xfer)) {
1214       if (res->target == PIPE_BUFFER)
1215          ptr = buf_map(xfer);
1216       else
1217          ptr = tex_map(xfer);
1218    }
1219    else {
1220       ptr = NULL;
1221    }
1222
1223    if (!ptr) {
1224       pipe_resource_reference(&xfer->base.resource, NULL);
1225       util_slab_free(&ilo->transfer_mempool, xfer);
1226       *transfer = NULL;
1227       return NULL;
1228    }
1229
1230    *transfer = &xfer->base;
1231
1232    return ptr;
1233 }
1234
1235 static void
1236 ilo_transfer_inline_write(struct pipe_context *pipe,
1237                           struct pipe_resource *res,
1238                           unsigned level,
1239                           unsigned usage,
1240                           const struct pipe_box *box,
1241                           const void *data,
1242                           unsigned stride,
1243                           unsigned layer_stride)
1244 {
1245    if (likely(res->target == PIPE_BUFFER) &&
1246        !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
1247       /* they should specify just an offset and a size */
1248       assert(level == 0);
1249       assert(box->y == 0);
1250       assert(box->z == 0);
1251       assert(box->height == 1);
1252       assert(box->depth == 1);
1253
1254       buf_pwrite(ilo_context(pipe), ilo_buffer(res),
1255             usage, box->x, box->width, data);
1256    }
1257    else {
1258       u_default_transfer_inline_write(pipe, res,
1259             level, usage, box, data, stride, layer_stride);
1260    }
1261 }
1262
1263 /**
1264  * Initialize transfer-related functions.
1265  */
1266 void
1267 ilo_init_transfer_functions(struct ilo_context *ilo)
1268 {
1269    ilo->base.transfer_map = ilo_transfer_map;
1270    ilo->base.transfer_flush_region = ilo_transfer_flush_region;
1271    ilo->base.transfer_unmap = ilo_transfer_unmap;
1272    ilo->base.transfer_inline_write = ilo_transfer_inline_write;
1273 }