src/gallium/drivers/ilo/ilo_transfer.c

   1 /*
   2  * Mesa 3-D graphics library
   3  *
   4  * Copyright (C) 2012-2013 LunarG, Inc.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included
  14  * in all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22  * DEALINGS IN THE SOFTWARE.
  23  *
  24  * Authors:
  25  *    Chia-I Wu <olv@lunarg.com>
  26  */
  27
  28 #include "util/u_surface.h"
  29 #include "util/u_transfer.h"
  30 #include "util/u_format_etc.h"
  31
  32 #include "ilo_blit.h"
  33 #include "ilo_blitter.h"
  34 #include "ilo_cp.h"
  35 #include "ilo_context.h"
  36 #include "ilo_resource.h"
  37 #include "ilo_state.h"
  38 #include "ilo_transfer.h"
  39
  40 /*
  41  * For buffers that are not busy, we want to map/unmap them directly.  For
  42  * those that are busy, we have to worry about synchronization.  We could wait
  43  * for GPU to finish, but there are cases where we could avoid waiting.
  44  *
  45  *  - When PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE is set, the contents of the
  46  *    buffer can be discarded.  We can replace the backing bo by a new one of
  47  *    the same size (renaming).
  48  *  - When PIPE_TRANSFER_DISCARD_RANGE is set, the contents of the mapped
  49  *    range can be discarded.  We can allocate and map a staging bo on
  50  *    mapping, and (pipelined-)copy it over to the real bo on unmapping.
  51  *  - When PIPE_TRANSFER_FLUSH_EXPLICIT is set, there is no reading and only
  52  *    flushed regions need to be written.  We can still allocate and map a
  53  *    staging bo, but should copy only the flushed regions over.
  54  *
  55  * However, there are other flags to consider.
  56  *
  57  *  - When PIPE_TRANSFER_UNSYNCHRONIZED is set, we do not need to worry about
  58  *    synchronization at all on mapping.
  59  *  - When PIPE_TRANSFER_MAP_DIRECTLY is set, no staging area is allowed.
  60  *  - When PIPE_TRANSFER_DONTBLOCK is set, we should fail if we have to block.
  61  *  - When PIPE_TRANSFER_PERSISTENT is set, GPU may access the buffer while it
  62  *    is mapped.  Synchronization is done by defining memory barriers,
  63  *    explicitly via memory_barrier() or implicitly via
  64  *    transfer_flush_region(), as well as GPU fences.
  65  *  - When PIPE_TRANSFER_COHERENT is set, updates by either CPU or GPU should
  66  *    be made visible to the other side immediately.  Since the kernel flushes
  67  *    GPU caches at the end of each batch buffer, CPU always sees GPU updates.
  68  *    We could use a coherent mapping to make all persistent mappings
  69  *    coherent.
  70  *
  71  * These also apply to textures, except that we may additionally need to do
  72  * format conversion or tiling/untiling.
  73  */
  74
  75 /**
  76  * Return a transfer method suitable for the usage.  The returned method will
  77  * correctly block when the resource is busy.
  78  */
  79 static bool
  80 resource_get_transfer_method(struct pipe_resource *res,
  81                              const struct pipe_transfer *transfer,
  82                              enum ilo_transfer_map_method *method)
  83 {
  84    const struct ilo_screen *is = ilo_screen(res->screen);
  85    const unsigned usage = transfer->usage;
  86    enum ilo_transfer_map_method m;
  87    bool tiled;
  88
  89    if (res->target == PIPE_BUFFER) {
  90       tiled = false;
  91    } else {
  92       struct ilo_texture *tex = ilo_texture(res);
  93       bool need_convert = false;
  94
  95       /* we may need to convert on the fly */
  96       if (tex->image.tiling == GEN8_TILING_W || tex->separate_s8) {
  97          /* on GEN6, separate stencil is enabled only when HiZ is */
  98          if (ilo_dev_gen(&is->dev) >= ILO_GEN(7) ||
  99              ilo_texture_can_enable_hiz(tex, transfer->level,
 100                 transfer->box.z, transfer->box.depth)) {
 101             m = ILO_TRANSFER_MAP_SW_ZS;
 102             need_convert = true;
 103          }
 104       } else if (tex->image.format != tex->base.format) {
 105          m = ILO_TRANSFER_MAP_SW_CONVERT;
 106          need_convert = true;
 107       }
 108
 109       if (need_convert) {
 110          if (usage & (PIPE_TRANSFER_MAP_DIRECTLY | PIPE_TRANSFER_PERSISTENT))
 111             return false;
 112
 113          *method = m;
 114          return true;
 115       }
 116
 117       tiled = (tex->image.tiling != GEN6_TILING_NONE);
 118    }
 119
 120    if (tiled)
 121       m = ILO_TRANSFER_MAP_GTT; /* to have a linear view */
 122    else if (is->dev.has_llc)
 123       m = ILO_TRANSFER_MAP_CPU; /* fast and mostly coherent */
 124    else if (usage & PIPE_TRANSFER_PERSISTENT)
 125       m = ILO_TRANSFER_MAP_GTT; /* for coherency */
 126    else if (usage & PIPE_TRANSFER_READ)
 127       m = ILO_TRANSFER_MAP_CPU; /* gtt read is too slow */
 128    else
 129       m = ILO_TRANSFER_MAP_GTT;
 130
 131    *method = m;
 132
 133    return true;
 134 }
 135
 136 /**
 137  * Return true if usage allows the use of staging bo to avoid blocking.
 138  */
 139 static bool
 140 usage_allows_staging_bo(unsigned usage)
 141 {
 142    /* do we know how to write the data back to the resource? */
 143    const unsigned can_writeback = (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
 144                                    PIPE_TRANSFER_DISCARD_RANGE |
 145                                    PIPE_TRANSFER_FLUSH_EXPLICIT);
 146    const unsigned reasons_against = (PIPE_TRANSFER_READ |
 147                                      PIPE_TRANSFER_MAP_DIRECTLY |
 148                                      PIPE_TRANSFER_PERSISTENT);
 149
 150    return (usage & can_writeback) && !(usage & reasons_against);
 151 }
 152
 153 /**
 154  * Allocate the staging resource.  It is always linear and its size matches
 155  * the transfer box, with proper paddings.
 156  */
 157 static bool
 158 xfer_alloc_staging_res(struct ilo_transfer *xfer)
 159 {
 160    const struct pipe_resource *res = xfer->base.resource;
 161    const struct pipe_box *box = &xfer->base.box;
 162    struct pipe_resource templ;
 163
 164    memset(&templ, 0, sizeof(templ));
 165
 166    templ.format = res->format;
 167
 168    if (res->target == PIPE_BUFFER) {
 169       templ.target = PIPE_BUFFER;
 170       templ.width0 =
 171          (box->x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT) + box->width;
 172    }
 173    else {
 174       /* use 2D array for any texture target */
 175       templ.target = PIPE_TEXTURE_2D_ARRAY;
 176       templ.width0 = box->width;
 177    }
 178
 179    templ.height0 = box->height;
 180    templ.depth0 = 1;
 181    templ.array_size = box->depth;
 182    templ.nr_samples = 1;
 183    templ.usage = PIPE_USAGE_STAGING;
 184    templ.bind = PIPE_BIND_TRANSFER_WRITE;
 185
 186    if (xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT) {
 187       templ.flags = PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
 188                     PIPE_RESOURCE_FLAG_MAP_COHERENT;
 189    }
 190
 191    xfer->staging.res = res->screen->resource_create(res->screen, &templ);
 192
 193    if (xfer->staging.res && xfer->staging.res->target != PIPE_BUFFER) {
 194       assert(ilo_texture(xfer->staging.res)->image.tiling ==
 195             GEN6_TILING_NONE);
 196    }
 197
 198    return (xfer->staging.res != NULL);
 199 }
 200
 201 /**
 202  * Use an alternative transfer method or rename the resource to unblock an
 203  * otherwise blocking transfer.
 204  */
 205 static bool
 206 xfer_unblock(struct ilo_transfer *xfer, bool *resource_renamed)
 207 {
 208    struct pipe_resource *res = xfer->base.resource;
 209    bool unblocked = false, renamed = false;
 210
 211    switch (xfer->method) {
 212    case ILO_TRANSFER_MAP_CPU:
 213    case ILO_TRANSFER_MAP_GTT:
 214       if (xfer->base.usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
 215          xfer->method = ILO_TRANSFER_MAP_GTT_ASYNC;
 216          unblocked = true;
 217       }
 218       else if ((xfer->base.usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) &&
 219                ilo_resource_rename_bo(res)) {
 220          renamed = true;
 221          unblocked = true;
 222       }
 223       else if (usage_allows_staging_bo(xfer->base.usage) &&
 224                xfer_alloc_staging_res(xfer)) {
 225          xfer->method = ILO_TRANSFER_MAP_STAGING;
 226          unblocked = true;
 227       }
 228       break;
 229    case ILO_TRANSFER_MAP_GTT_ASYNC:
 230    case ILO_TRANSFER_MAP_STAGING:
 231       unblocked = true;
 232       break;
 233    default:
 234       break;
 235    }
 236
 237    *resource_renamed = renamed;
 238
 239    return unblocked;
 240 }
 241
 242 /**
 243  * Allocate the staging system buffer based on the resource format and the
 244  * transfer box.
 245  */
 246 static bool
 247 xfer_alloc_staging_sys(struct ilo_transfer *xfer)
 248 {
 249    const enum pipe_format format = xfer->base.resource->format;
 250    const struct pipe_box *box = &xfer->base.box;
 251    const unsigned alignment = 64;
 252
 253    /* need to tell the world the layout */
 254    xfer->base.stride =
 255       align(util_format_get_stride(format, box->width), alignment);
 256    xfer->base.layer_stride =
 257       util_format_get_2d_size(format, xfer->base.stride, box->height);
 258
 259    xfer->staging.sys =
 260       align_malloc(xfer->base.layer_stride * box->depth, alignment);
 261
 262    return (xfer->staging.sys != NULL);
 263 }
 264
 265 /**
 266  * Map according to the method.  The staging system buffer should have been
 267  * allocated if the method requires it.
 268  */
 269 static void *
 270 xfer_map(struct ilo_transfer *xfer)
 271 {
 272    void *ptr;
 273
 274    switch (xfer->method) {
 275    case ILO_TRANSFER_MAP_CPU:
 276       ptr = intel_bo_map(ilo_resource_get_bo(xfer->base.resource),
 277             xfer->base.usage & PIPE_TRANSFER_WRITE);
 278       break;
 279    case ILO_TRANSFER_MAP_GTT:
 280       ptr = intel_bo_map_gtt(ilo_resource_get_bo(xfer->base.resource));
 281       break;
 282    case ILO_TRANSFER_MAP_GTT_ASYNC:
 283       ptr = intel_bo_map_gtt_async(ilo_resource_get_bo(xfer->base.resource));
 284       break;
 285    case ILO_TRANSFER_MAP_STAGING:
 286       {
 287          const struct ilo_screen *is = ilo_screen(xfer->staging.res->screen);
 288          struct intel_bo *bo = ilo_resource_get_bo(xfer->staging.res);
 289
 290          /*
 291           * We want a writable, optionally persistent and coherent, mapping
 292           * for a linear bo.  We can call resource_get_transfer_method(), but
 293           * this turns out to be fairly simple.
 294           */
 295          if (is->dev.has_llc)
 296             ptr = intel_bo_map(bo, true);
 297          else
 298             ptr = intel_bo_map_gtt(bo);
 299
 300          if (ptr && xfer->staging.res->target == PIPE_BUFFER)
 301             ptr += (xfer->base.box.x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT);
 302
 303       }
 304       break;
 305    case ILO_TRANSFER_MAP_SW_CONVERT:
 306    case ILO_TRANSFER_MAP_SW_ZS:
 307       ptr = xfer->staging.sys;
 308       break;
 309    default:
 310       assert(!"unknown mapping method");
 311       ptr = NULL;
 312       break;
 313    }
 314
 315    return ptr;
 316 }
 317
 318 /**
 319  * Unmap a transfer.
 320  */
 321 static void
 322 xfer_unmap(struct ilo_transfer *xfer)
 323 {
 324    switch (xfer->method) {
 325    case ILO_TRANSFER_MAP_CPU:
 326    case ILO_TRANSFER_MAP_GTT:
 327    case ILO_TRANSFER_MAP_GTT_ASYNC:
 328       intel_bo_unmap(ilo_resource_get_bo(xfer->base.resource));
 329       break;
 330    case ILO_TRANSFER_MAP_STAGING:
 331       intel_bo_unmap(ilo_resource_get_bo(xfer->staging.res));
 332       break;
 333    default:
 334       break;
 335    }
 336 }
 337
 338 static void
 339 tex_get_box_origin(const struct ilo_texture *tex,
 340                    unsigned level, unsigned slice,
 341                    const struct pipe_box *box,
 342                    unsigned *mem_x, unsigned *mem_y)
 343 {
 344    unsigned x, y;
 345
 346    ilo_image_get_slice_pos(&tex->image, level, box->z + slice, &x, &y);
 347    x += box->x;
 348    y += box->y;
 349
 350    ilo_image_pos_to_mem(&tex->image, x, y, mem_x, mem_y);
 351 }
 352
 353 static unsigned
 354 tex_get_box_offset(const struct ilo_texture *tex, unsigned level,
 355                    const struct pipe_box *box)
 356 {
 357    unsigned mem_x, mem_y;
 358
 359    tex_get_box_origin(tex, level, 0, box, &mem_x, &mem_y);
 360
 361    return ilo_image_mem_to_linear(&tex->image, mem_x, mem_y);
 362 }
 363
 364 static unsigned
 365 tex_get_slice_stride(const struct ilo_texture *tex, unsigned level)
 366 {
 367    return ilo_image_get_slice_stride(&tex->image, level);
 368 }
 369
 370 static unsigned
 371 tex_tile_x_swizzle(unsigned addr)
 372 {
 373    /*
 374     * From the Ivy Bridge PRM, volume 1 part 2, page 24:
 375     *
 376     *     "As shown in the tiling algorithm, the new address bit[6] should be:
 377     *
 378     *        Address bit[6] <= TiledAddr bit[6] XOR
 379     *                          TiledAddr bit[9] XOR
 380     *                          TiledAddr bit[10]"
 381     */
 382    return addr ^ (((addr >> 3) ^ (addr >> 4)) & 0x40);
 383 }
 384
 385 static unsigned
 386 tex_tile_y_swizzle(unsigned addr)
 387 {
 388    /*
 389     * From the Ivy Bridge PRM, volume 1 part 2, page 24:
 390     *
 391     *     "As shown in the tiling algorithm, The new address bit[6] becomes:
 392     *
 393     *        Address bit[6] <= TiledAddr bit[6] XOR
 394     *                          TiledAddr bit[9]"
 395     */
 396    return addr ^ ((addr >> 3) & 0x40);
 397 }
 398
 399 static unsigned
 400 tex_tile_x_offset(unsigned mem_x, unsigned mem_y,
 401                   unsigned tiles_per_row, bool swizzle)
 402 {
 403    /*
 404     * From the Sandy Bridge PRM, volume 1 part 2, page 21, we know that a
 405     * X-major tile has 8 rows and 32 OWord columns (512 bytes).  Tiles in the
 406     * tiled region are numbered in row-major order, starting from zero.  The
 407     * tile number can thus be calculated as follows:
 408     *
 409     *    tile = (mem_y / 8) * tiles_per_row + (mem_x / 512)
 410     *
 411     * OWords in that tile are also numbered in row-major order, starting from
 412     * zero.  The OWord number can thus be calculated as follows:
 413     *
 414     *    oword = (mem_y % 8) * 32 + ((mem_x % 512) / 16)
 415     *
 416     * and the tiled offset is
 417     *
 418     *    offset = tile * 4096 + oword * 16 + (mem_x % 16)
 419     *           = tile * 4096 + (mem_y % 8) * 512 + (mem_x % 512)
 420     */
 421    unsigned tile, offset;
 422
 423    tile = (mem_y >> 3) * tiles_per_row + (mem_x >> 9);
 424    offset = tile << 12 | (mem_y & 0x7) << 9 | (mem_x & 0x1ff);
 425
 426    return (swizzle) ? tex_tile_x_swizzle(offset) : offset;
 427 }
 428
 429 static unsigned
 430 tex_tile_y_offset(unsigned mem_x, unsigned mem_y,
 431                   unsigned tiles_per_row, bool swizzle)
 432 {
 433    /*
 434     * From the Sandy Bridge PRM, volume 1 part 2, page 22, we know that a
 435     * Y-major tile has 32 rows and 8 OWord columns (128 bytes).  Tiles in the
 436     * tiled region are numbered in row-major order, starting from zero.  The
 437     * tile number can thus be calculated as follows:
 438     *
 439     *    tile = (mem_y / 32) * tiles_per_row + (mem_x / 128)
 440     *
 441     * OWords in that tile are numbered in column-major order, starting from
 442     * zero.  The OWord number can thus be calculated as follows:
 443     *
 444     *    oword = ((mem_x % 128) / 16) * 32 + (mem_y % 32)
 445     *
 446     * and the tiled offset is
 447     *
 448     *    offset = tile * 4096 + oword * 16 + (mem_x % 16)
 449     */
 450    unsigned tile, oword, offset;
 451
 452    tile = (mem_y >> 5) * tiles_per_row + (mem_x >> 7);
 453    oword = (mem_x & 0x70) << 1 | (mem_y & 0x1f);
 454    offset = tile << 12 | oword << 4 | (mem_x & 0xf);
 455
 456    return (swizzle) ? tex_tile_y_swizzle(offset) : offset;
 457 }
 458
 459 static unsigned
 460 tex_tile_w_offset(unsigned mem_x, unsigned mem_y,
 461                   unsigned tiles_per_row, bool swizzle)
 462 {
 463    /*
 464     * From the Sandy Bridge PRM, volume 1 part 2, page 23, we know that a
 465     * W-major tile has 8 8x8-block rows and 8 8x8-block columns.  Tiles in the
 466     * tiled region are numbered in row-major order, starting from zero.  The
 467     * tile number can thus be calculated as follows:
 468     *
 469     *    tile = (mem_y / 64) * tiles_per_row + (mem_x / 64)
 470     *
 471     * 8x8-blocks in that tile are numbered in column-major order, starting
 472     * from zero.  The 8x8-block number can thus be calculated as follows:
 473     *
 474     *    blk8 = ((mem_x % 64) / 8) * 8 + ((mem_y % 64) / 8)
 475     *
 476     * Each 8x8-block is divided into 4 4x4-blocks, in row-major order.  Each
 477     * 4x4-block is further divided into 4 2x2-blocks, also in row-major order.
 478     * We have
 479     *
 480     *    blk4 = (((mem_y % 64) / 4) & 1) * 2 + (((mem_x % 64) / 4) & 1)
 481     *    blk2 = (((mem_y % 64) / 2) & 1) * 2 + (((mem_x % 64) / 2) & 1)
 482     *    blk1 = (((mem_y % 64)    ) & 1) * 2 + (((mem_x % 64)    ) & 1)
 483     *
 484     * and the tiled offset is
 485     *
 486     *    offset = tile * 4096 + blk8 * 64 + blk4 * 16 + blk2 * 4 + blk1
 487     */
 488    unsigned tile, blk8, blk4, blk2, blk1, offset;
 489
 490    tile = (mem_y >> 6) * tiles_per_row + (mem_x >> 6);
 491    blk8 = ((mem_x >> 3) & 0x7) << 3 | ((mem_y >> 3) & 0x7);
 492    blk4 = ((mem_y >> 2) & 0x1) << 1 | ((mem_x >> 2) & 0x1);
 493    blk2 = ((mem_y >> 1) & 0x1) << 1 | ((mem_x >> 1) & 0x1);
 494    blk1 = ((mem_y     ) & 0x1) << 1 | ((mem_x     ) & 0x1);
 495    offset = tile << 12 | blk8 << 6 | blk4 << 4 | blk2 << 2 | blk1;
 496
 497    return (swizzle) ? tex_tile_y_swizzle(offset) : offset;
 498 }
 499
 500 static unsigned
 501 tex_tile_none_offset(unsigned mem_x, unsigned mem_y,
 502                      unsigned tiles_per_row, bool swizzle)
 503 {
 504    return mem_y * tiles_per_row + mem_x;
 505 }
 506
 507 typedef unsigned (*tex_tile_offset_func)(unsigned mem_x, unsigned mem_y,
 508                                          unsigned tiles_per_row,
 509                                          bool swizzle);
 510
 511 static tex_tile_offset_func
 512 tex_tile_choose_offset_func(const struct ilo_texture *tex,
 513                             unsigned *tiles_per_row)
 514 {
 515    switch (tex->image.tiling) {
 516    default:
 517       assert(!"unknown tiling");
 518       /* fall through */
 519    case GEN6_TILING_NONE:
 520       *tiles_per_row = tex->image.bo_stride;
 521       return tex_tile_none_offset;
 522    case GEN6_TILING_X:
 523       *tiles_per_row = tex->image.bo_stride / 512;
 524       return tex_tile_x_offset;
 525    case GEN6_TILING_Y:
 526       *tiles_per_row = tex->image.bo_stride / 128;
 527       return tex_tile_y_offset;
 528    case GEN8_TILING_W:
 529       *tiles_per_row = tex->image.bo_stride / 64;
 530       return tex_tile_w_offset;
 531    }
 532 }
 533
 534 static void *
 535 tex_staging_sys_map_bo(struct ilo_texture *tex,
 536                        bool for_read_back,
 537                        bool linear_view)
 538 {
 539    const struct ilo_screen *is = ilo_screen(tex->base.screen);
 540    const bool prefer_cpu = (is->dev.has_llc || for_read_back);
 541    void *ptr;
 542
 543    if (prefer_cpu && (tex->image.tiling == GEN6_TILING_NONE ||
 544                       !linear_view))
 545       ptr = intel_bo_map(tex->image.bo, !for_read_back);
 546    else
 547       ptr = intel_bo_map_gtt(tex->image.bo);
 548
 549    return ptr;
 550 }
 551
 552 static void
 553 tex_staging_sys_unmap_bo(struct ilo_texture *tex)
 554 {
 555    intel_bo_unmap(tex->image.bo);
 556 }
 557
 558 static bool
 559 tex_staging_sys_zs_read(struct ilo_texture *tex,
 560                         const struct ilo_transfer *xfer)
 561 {
 562    const struct ilo_screen *is = ilo_screen(tex->base.screen);
 563    const bool swizzle = is->dev.has_address_swizzling;
 564    const struct pipe_box *box = &xfer->base.box;
 565    const uint8_t *src;
 566    tex_tile_offset_func tile_offset;
 567    unsigned tiles_per_row;
 568    int slice;
 569
 570    src = tex_staging_sys_map_bo(tex, true, false);
 571    if (!src)
 572       return false;
 573
 574    tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row);
 575
 576    assert(tex->image.block_width == 1 && tex->image.block_height == 1);
 577
 578    if (tex->separate_s8) {
 579       struct ilo_texture *s8_tex = tex->separate_s8;
 580       const uint8_t *s8_src;
 581       tex_tile_offset_func s8_tile_offset;
 582       unsigned s8_tiles_per_row;
 583       int dst_cpp, dst_s8_pos, src_cpp_used;
 584
 585       s8_src = tex_staging_sys_map_bo(s8_tex, true, false);
 586       if (!s8_src) {
 587          tex_staging_sys_unmap_bo(tex);
 588          return false;
 589       }
 590
 591       s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
 592
 593       if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
 594          assert(tex->image.format == PIPE_FORMAT_Z24X8_UNORM);
 595
 596          dst_cpp = 4;
 597          dst_s8_pos = 3;
 598          src_cpp_used = 3;
 599       }
 600       else {
 601          assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
 602          assert(tex->image.format == PIPE_FORMAT_Z32_FLOAT);
 603
 604          dst_cpp = 8;
 605          dst_s8_pos = 4;
 606          src_cpp_used = 4;
 607       }
 608
 609       for (slice = 0; slice < box->depth; slice++) {
 610          unsigned mem_x, mem_y, s8_mem_x, s8_mem_y;
 611          uint8_t *dst;
 612          int i, j;
 613
 614          tex_get_box_origin(tex, xfer->base.level, slice,
 615                             box, &mem_x, &mem_y);
 616          tex_get_box_origin(s8_tex, xfer->base.level, slice,
 617                             box, &s8_mem_x, &s8_mem_y);
 618
 619          dst = xfer->staging.sys + xfer->base.layer_stride * slice;
 620
 621          for (i = 0; i < box->height; i++) {
 622             unsigned x = mem_x, s8_x = s8_mem_x;
 623             uint8_t *d = dst;
 624
 625             for (j = 0; j < box->width; j++) {
 626                const unsigned offset =
 627                   tile_offset(x, mem_y, tiles_per_row, swizzle);
 628                const unsigned s8_offset =
 629                   s8_tile_offset(s8_x, s8_mem_y, s8_tiles_per_row, swizzle);
 630
 631                memcpy(d, src + offset, src_cpp_used);
 632                d[dst_s8_pos] = s8_src[s8_offset];
 633
 634                d += dst_cpp;
 635                x += tex->image.block_size;
 636                s8_x++;
 637             }
 638
 639             dst += xfer->base.stride;
 640             mem_y++;
 641             s8_mem_y++;
 642          }
 643       }
 644
 645       tex_staging_sys_unmap_bo(s8_tex);
 646    }
 647    else {
 648       assert(tex->image.format == PIPE_FORMAT_S8_UINT);
 649
 650       for (slice = 0; slice < box->depth; slice++) {
 651          unsigned mem_x, mem_y;
 652          uint8_t *dst;
 653          int i, j;
 654
 655          tex_get_box_origin(tex, xfer->base.level, slice,
 656                             box, &mem_x, &mem_y);
 657
 658          dst = xfer->staging.sys + xfer->base.layer_stride * slice;
 659
 660          for (i = 0; i < box->height; i++) {
 661             unsigned x = mem_x;
 662             uint8_t *d = dst;
 663
 664             for (j = 0; j < box->width; j++) {
 665                const unsigned offset =
 666                   tile_offset(x, mem_y, tiles_per_row, swizzle);
 667
 668                *d = src[offset];
 669
 670                d++;
 671                x++;
 672             }
 673
 674             dst += xfer->base.stride;
 675             mem_y++;
 676          }
 677       }
 678    }
 679
 680    tex_staging_sys_unmap_bo(tex);
 681
 682    return true;
 683 }
 684
 685 static bool
 686 tex_staging_sys_zs_write(struct ilo_texture *tex,
 687                          const struct ilo_transfer *xfer)
 688 {
 689    const struct ilo_screen *is = ilo_screen(tex->base.screen);
 690    const bool swizzle = is->dev.has_address_swizzling;
 691    const struct pipe_box *box = &xfer->base.box;
 692    uint8_t *dst;
 693    tex_tile_offset_func tile_offset;
 694    unsigned tiles_per_row;
 695    int slice;
 696
 697    dst = tex_staging_sys_map_bo(tex, false, false);
 698    if (!dst)
 699       return false;
 700
 701    tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row);
 702
 703    assert(tex->image.block_width == 1 && tex->image.block_height == 1);
 704
 705    if (tex->separate_s8) {
 706       struct ilo_texture *s8_tex = tex->separate_s8;
 707       uint8_t *s8_dst;
 708       tex_tile_offset_func s8_tile_offset;
 709       unsigned s8_tiles_per_row;
 710       int src_cpp, src_s8_pos, dst_cpp_used;
 711
 712       s8_dst = tex_staging_sys_map_bo(s8_tex, false, false);
 713       if (!s8_dst) {
 714          tex_staging_sys_unmap_bo(s8_tex);
 715          return false;
 716       }
 717
 718       s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
 719
 720       if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
 721          assert(tex->image.format == PIPE_FORMAT_Z24X8_UNORM);
 722
 723          src_cpp = 4;
 724          src_s8_pos = 3;
 725          dst_cpp_used = 3;
 726       }
 727       else {
 728          assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
 729          assert(tex->image.format == PIPE_FORMAT_Z32_FLOAT);
 730
 731          src_cpp = 8;
 732          src_s8_pos = 4;
 733          dst_cpp_used = 4;
 734       }
 735
 736       for (slice = 0; slice < box->depth; slice++) {
 737          unsigned mem_x, mem_y, s8_mem_x, s8_mem_y;
 738          const uint8_t *src;
 739          int i, j;
 740
 741          tex_get_box_origin(tex, xfer->base.level, slice,
 742                             box, &mem_x, &mem_y);
 743          tex_get_box_origin(s8_tex, xfer->base.level, slice,
 744                             box, &s8_mem_x, &s8_mem_y);
 745
 746          src = xfer->staging.sys + xfer->base.layer_stride * slice;
 747
 748          for (i = 0; i < box->height; i++) {
 749             unsigned x = mem_x, s8_x = s8_mem_x;
 750             const uint8_t *s = src;
 751
 752             for (j = 0; j < box->width; j++) {
 753                const unsigned offset =
 754                   tile_offset(x, mem_y, tiles_per_row, swizzle);
 755                const unsigned s8_offset =
 756                   s8_tile_offset(s8_x, s8_mem_y, s8_tiles_per_row, swizzle);
 757
 758                memcpy(dst + offset, s, dst_cpp_used);
 759                s8_dst[s8_offset] = s[src_s8_pos];
 760
 761                s += src_cpp;
 762                x += tex->image.block_size;
 763                s8_x++;
 764             }
 765
 766             src += xfer->base.stride;
 767             mem_y++;
 768             s8_mem_y++;
 769          }
 770       }
 771
 772       tex_staging_sys_unmap_bo(s8_tex);
 773    }
 774    else {
 775       assert(tex->image.format == PIPE_FORMAT_S8_UINT);
 776
 777       for (slice = 0; slice < box->depth; slice++) {
 778          unsigned mem_x, mem_y;
 779          const uint8_t *src;
 780          int i, j;
 781
 782          tex_get_box_origin(tex, xfer->base.level, slice,
 783                             box, &mem_x, &mem_y);
 784
 785          src = xfer->staging.sys + xfer->base.layer_stride * slice;
 786
 787          for (i = 0; i < box->height; i++) {
 788             unsigned x = mem_x;
 789             const uint8_t *s = src;
 790
 791             for (j = 0; j < box->width; j++) {
 792                const unsigned offset =
 793                   tile_offset(x, mem_y, tiles_per_row, swizzle);
 794
 795                dst[offset] = *s;
 796
 797                s++;
 798                x++;
 799             }
 800
 801             src += xfer->base.stride;
 802             mem_y++;
 803          }
 804       }
 805    }
 806
 807    tex_staging_sys_unmap_bo(tex);
 808
 809    return true;
 810 }
 811
 812 static bool
 813 tex_staging_sys_convert_write(struct ilo_texture *tex,
 814                               const struct ilo_transfer *xfer)
 815 {
 816    const struct pipe_box *box = &xfer->base.box;
 817    unsigned dst_slice_stride;
 818    void *dst;
 819    int slice;
 820
 821    dst = tex_staging_sys_map_bo(tex, false, true);
 822    if (!dst)
 823       return false;
 824
 825    dst += tex_get_box_offset(tex, xfer->base.level, box);
 826
 827    /* slice stride is not always available */
 828    if (box->depth > 1)
 829       dst_slice_stride = tex_get_slice_stride(tex, xfer->base.level);
 830    else
 831       dst_slice_stride = 0;
 832
 833    if (unlikely(tex->image.format == tex->base.format)) {
 834       util_copy_box(dst, tex->image.format, tex->image.bo_stride,
 835             dst_slice_stride, 0, 0, 0, box->width, box->height, box->depth,
 836             xfer->staging.sys, xfer->base.stride, xfer->base.layer_stride,
 837             0, 0, 0);
 838
 839       tex_staging_sys_unmap_bo(tex);
 840
 841       return true;
 842    }
 843
 844    switch (tex->base.format) {
 845    case PIPE_FORMAT_ETC1_RGB8:
 846       assert(tex->image.format == PIPE_FORMAT_R8G8B8X8_UNORM);
 847
 848       for (slice = 0; slice < box->depth; slice++) {
 849          const void *src =
 850             xfer->staging.sys + xfer->base.layer_stride * slice;
 851
 852          util_format_etc1_rgb8_unpack_rgba_8unorm(dst,
 853                tex->image.bo_stride, src, xfer->base.stride,
 854                box->width, box->height);
 855
 856          dst += dst_slice_stride;
 857       }
 858       break;
 859    default:
 860       assert(!"unable to convert the staging data");
 861       break;
 862    }
 863
 864    tex_staging_sys_unmap_bo(tex);
 865
 866    return true;
 867 }
 868
 869 static void
 870 tex_staging_sys_writeback(struct ilo_transfer *xfer)
 871 {
 872    struct ilo_texture *tex = ilo_texture(xfer->base.resource);
 873    bool success;
 874
 875    if (!(xfer->base.usage & PIPE_TRANSFER_WRITE))
 876       return;
 877
 878    switch (xfer->method) {
 879    case ILO_TRANSFER_MAP_SW_CONVERT:
 880       success = tex_staging_sys_convert_write(tex, xfer);
 881       break;
 882    case ILO_TRANSFER_MAP_SW_ZS:
 883       success = tex_staging_sys_zs_write(tex, xfer);
 884       break;
 885    default:
 886       assert(!"unknown mapping method");
 887       success = false;
 888       break;
 889    }
 890
 891    if (!success)
 892       ilo_err("failed to map resource for moving staging data\n");
 893 }
 894
 895 static bool
 896 tex_staging_sys_readback(struct ilo_transfer *xfer)
 897 {
 898    struct ilo_texture *tex = ilo_texture(xfer->base.resource);
 899    bool read_back = false, success;
 900
 901    /* see if we need to read the resource back */
 902    if (xfer->base.usage & PIPE_TRANSFER_READ) {
 903       read_back = true;
 904    }
 905    else if (xfer->base.usage & PIPE_TRANSFER_WRITE) {
 906       const unsigned discard_flags =
 907          (PIPE_TRANSFER_DISCARD_RANGE | PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE);
 908
 909       if (!(xfer->base.usage & discard_flags))
 910          read_back = true;
 911    }
 912
 913    if (!read_back)
 914       return true;
 915
 916    switch (xfer->method) {
 917    case ILO_TRANSFER_MAP_SW_CONVERT:
 918       assert(!"no on-the-fly format conversion for mapping");
 919       success = false;
 920       break;
 921    case ILO_TRANSFER_MAP_SW_ZS:
 922       success = tex_staging_sys_zs_read(tex, xfer);
 923       break;
 924    default:
 925       assert(!"unknown mapping method");
 926       success = false;
 927       break;
 928    }
 929
 930    return success;
 931 }
 932
 933 static void *
 934 tex_map(struct ilo_transfer *xfer)
 935 {
 936    void *ptr;
 937
 938    switch (xfer->method) {
 939    case ILO_TRANSFER_MAP_CPU:
 940    case ILO_TRANSFER_MAP_GTT:
 941    case ILO_TRANSFER_MAP_GTT_ASYNC:
 942       ptr = xfer_map(xfer);
 943       if (ptr) {
 944          const struct ilo_texture *tex = ilo_texture(xfer->base.resource);
 945
 946          ptr += tex_get_box_offset(tex, xfer->base.level, &xfer->base.box);
 947
 948          /* stride is for a block row, not a texel row */
 949          xfer->base.stride = tex->image.bo_stride;
 950          /* note that slice stride is not always available */
 951          xfer->base.layer_stride = (xfer->base.box.depth > 1) ?
 952             tex_get_slice_stride(tex, xfer->base.level) : 0;
 953       }
 954       break;
 955    case ILO_TRANSFER_MAP_STAGING:
 956       ptr = xfer_map(xfer);
 957       if (ptr) {
 958          const struct ilo_texture *staging = ilo_texture(xfer->staging.res);
 959          xfer->base.stride = staging->image.bo_stride;
 960          xfer->base.layer_stride = tex_get_slice_stride(staging, 0);
 961       }
 962       break;
 963    case ILO_TRANSFER_MAP_SW_CONVERT:
 964    case ILO_TRANSFER_MAP_SW_ZS:
 965       if (xfer_alloc_staging_sys(xfer) && tex_staging_sys_readback(xfer))
 966          ptr = xfer_map(xfer);
 967       else
 968          ptr = NULL;
 969       break;
 970    default:
 971       assert(!"unknown mapping method");
 972       ptr = NULL;
 973       break;
 974    }
 975
 976    return ptr;
 977 }
 978
 979 static void *
 980 buf_map(struct ilo_transfer *xfer)
 981 {
 982    void *ptr;
 983
 984    ptr = xfer_map(xfer);
 985    if (!ptr)
 986       return NULL;
 987
 988    if (xfer->method != ILO_TRANSFER_MAP_STAGING)
 989       ptr += xfer->base.box.x;
 990
 991    xfer->base.stride = 0;
 992    xfer->base.layer_stride = 0;
 993
 994    assert(xfer->base.level == 0);
 995    assert(xfer->base.box.y == 0);
 996    assert(xfer->base.box.z == 0);
 997    assert(xfer->base.box.height == 1);
 998    assert(xfer->base.box.depth == 1);
 999
1000    return ptr;
1001 }
1002
1003 static void
1004 copy_staging_resource(struct ilo_context *ilo,
1005                       struct ilo_transfer *xfer,
1006                       const struct pipe_box *box)
1007 {
1008    const unsigned pad_x = (xfer->staging.res->target == PIPE_BUFFER) ?
1009       xfer->base.box.x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT : 0;
1010    struct pipe_box modified_box;
1011
1012    assert(xfer->method == ILO_TRANSFER_MAP_STAGING && xfer->staging.res);
1013
1014    if (!box) {
1015       u_box_3d(pad_x, 0, 0, xfer->base.box.width, xfer->base.box.height,
1016             xfer->base.box.depth, &modified_box);
1017       box = &modified_box;
1018    }
1019    else if (pad_x) {
1020       modified_box = *box;
1021       modified_box.x += pad_x;
1022       box = &modified_box;
1023    }
1024
1025    ilo_blitter_blt_copy_resource(ilo->blitter,
1026          xfer->base.resource, xfer->base.level,
1027          xfer->base.box.x, xfer->base.box.y, xfer->base.box.z,
1028          xfer->staging.res, 0, box);
1029 }
1030
1031 static bool
1032 is_bo_busy(struct ilo_context *ilo, struct intel_bo *bo, bool *need_submit)
1033 {
1034    const bool referenced = ilo_builder_has_reloc(&ilo->cp->builder, bo);
1035
1036    if (need_submit)
1037       *need_submit = referenced;
1038
1039    if (referenced)
1040       return true;
1041
1042    return intel_bo_is_busy(bo);
1043 }
1044
1045 /**
1046  * Choose the best mapping method, depending on the transfer usage and whether
1047  * the bo is busy.
1048  */
1049 static bool
1050 choose_transfer_method(struct ilo_context *ilo, struct ilo_transfer *xfer)
1051 {
1052    struct pipe_resource *res = xfer->base.resource;
1053    bool need_submit;
1054
1055    if (!resource_get_transfer_method(res, &xfer->base, &xfer->method))
1056       return false;
1057
1058    /* see if we can avoid blocking */
1059    if (is_bo_busy(ilo, ilo_resource_get_bo(res), &need_submit)) {
1060       bool resource_renamed;
1061
1062       if (!xfer_unblock(xfer, &resource_renamed)) {
1063          if (xfer->base.usage & PIPE_TRANSFER_DONTBLOCK)
1064             return false;
1065
1066          /* submit to make bo really busy and map() correctly blocks */
1067          if (need_submit)
1068             ilo_cp_submit(ilo->cp, "syncing for transfers");
1069       }
1070
1071       if (resource_renamed)
1072          ilo_state_vector_resource_renamed(&ilo->state_vector, res);
1073    }
1074
1075    return true;
1076 }
1077
1078 static void
1079 buf_pwrite(struct ilo_context *ilo, struct pipe_resource *res,
1080            unsigned usage, int offset, int size, const void *data)
1081 {
1082    struct ilo_buffer *buf = ilo_buffer(res);
1083    bool need_submit;
1084
1085    /* see if we can avoid blocking */
1086    if (is_bo_busy(ilo, buf->bo, &need_submit)) {
1087       bool unblocked = false;
1088
1089       if ((usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) &&
1090           ilo_resource_rename_bo(res)) {
1091          ilo_state_vector_resource_renamed(&ilo->state_vector, res);
1092          unblocked = true;
1093       }
1094       else {
1095          struct pipe_resource templ, *staging;
1096
1097          /*
1098           * allocate a staging buffer to hold the data and pipelined copy it
1099           * over
1100           */
1101          templ = *res;
1102          templ.width0 = size;
1103          templ.usage = PIPE_USAGE_STAGING;
1104          templ.bind = PIPE_BIND_TRANSFER_WRITE;
1105          staging = ilo->base.screen->resource_create(ilo->base.screen, &templ);
1106          if (staging) {
1107             struct pipe_box staging_box;
1108
1109             intel_bo_pwrite(ilo_buffer(staging)->bo, 0, size, data);
1110
1111             u_box_1d(0, size, &staging_box);
1112             ilo_blitter_blt_copy_resource(ilo->blitter,
1113                   res, 0, offset, 0, 0,
1114                   staging, 0, &staging_box);
1115
1116             pipe_resource_reference(&staging, NULL);
1117
1118             return;
1119          }
1120       }
1121
1122       /* submit to make bo really busy and pwrite() correctly blocks */
1123       if (!unblocked && need_submit)
1124          ilo_cp_submit(ilo->cp, "syncing for pwrites");
1125    }
1126
1127    intel_bo_pwrite(buf->bo, offset, size, data);
1128 }
1129
1130 static void
1131 ilo_transfer_flush_region(struct pipe_context *pipe,
1132                           struct pipe_transfer *transfer,
1133                           const struct pipe_box *box)
1134 {
1135    struct ilo_context *ilo = ilo_context(pipe);
1136    struct ilo_transfer *xfer = ilo_transfer(transfer);
1137
1138    /*
1139     * The staging resource is mapped persistently and coherently.  We can copy
1140     * without unmapping.
1141     */
1142    if (xfer->method == ILO_TRANSFER_MAP_STAGING &&
1143        (xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
1144       copy_staging_resource(ilo, xfer, box);
1145 }
1146
1147 static void
1148 ilo_transfer_unmap(struct pipe_context *pipe,
1149                    struct pipe_transfer *transfer)
1150 {
1151    struct ilo_context *ilo = ilo_context(pipe);
1152    struct ilo_transfer *xfer = ilo_transfer(transfer);
1153
1154    xfer_unmap(xfer);
1155
1156    switch (xfer->method) {
1157    case ILO_TRANSFER_MAP_STAGING:
1158       if (!(xfer->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
1159          copy_staging_resource(ilo, xfer, NULL);
1160       pipe_resource_reference(&xfer->staging.res, NULL);
1161       break;
1162    case ILO_TRANSFER_MAP_SW_CONVERT:
1163    case ILO_TRANSFER_MAP_SW_ZS:
1164       tex_staging_sys_writeback(xfer);
1165       align_free(xfer->staging.sys);
1166       break;
1167    default:
1168       break;
1169    }
1170
1171    pipe_resource_reference(&xfer->base.resource, NULL);
1172
1173    util_slab_free(&ilo->transfer_mempool, xfer);
1174 }
1175
1176 static void *
1177 ilo_transfer_map(struct pipe_context *pipe,
1178                  struct pipe_resource *res,
1179                  unsigned level,
1180                  unsigned usage,
1181                  const struct pipe_box *box,
1182                  struct pipe_transfer **transfer)
1183 {
1184    struct ilo_context *ilo = ilo_context(pipe);
1185    struct ilo_transfer *xfer;
1186    void *ptr;
1187
1188    /* note that xfer is not zero'd */
1189    xfer = util_slab_alloc(&ilo->transfer_mempool);
1190    if (!xfer) {
1191       *transfer = NULL;
1192       return NULL;
1193    }
1194
1195    xfer->base.resource = NULL;
1196    pipe_resource_reference(&xfer->base.resource, res);
1197    xfer->base.level = level;
1198    xfer->base.usage = usage;
1199    xfer->base.box = *box;
1200
1201    ilo_blit_resolve_transfer(ilo, &xfer->base);
1202
1203    if (choose_transfer_method(ilo, xfer)) {
1204       if (res->target == PIPE_BUFFER)
1205          ptr = buf_map(xfer);
1206       else
1207          ptr = tex_map(xfer);
1208    }
1209    else {
1210       ptr = NULL;
1211    }
1212
1213    if (!ptr) {
1214       pipe_resource_reference(&xfer->base.resource, NULL);
1215       util_slab_free(&ilo->transfer_mempool, xfer);
1216       *transfer = NULL;
1217       return NULL;
1218    }
1219
1220    *transfer = &xfer->base;
1221
1222    return ptr;
1223 }
1224
1225 static void
1226 ilo_transfer_inline_write(struct pipe_context *pipe,
1227                           struct pipe_resource *res,
1228                           unsigned level,
1229                           unsigned usage,
1230                           const struct pipe_box *box,
1231                           const void *data,
1232                           unsigned stride,
1233                           unsigned layer_stride)
1234 {
1235    if (likely(res->target == PIPE_BUFFER) &&
1236        !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
1237       /* they should specify just an offset and a size */
1238       assert(level == 0);
1239       assert(box->y == 0);
1240       assert(box->z == 0);
1241       assert(box->height == 1);
1242       assert(box->depth == 1);
1243
1244       buf_pwrite(ilo_context(pipe), res,
1245             usage, box->x, box->width, data);
1246    }
1247    else {
1248       u_default_transfer_inline_write(pipe, res,
1249             level, usage, box, data, stride, layer_stride);
1250    }
1251 }
1252
1253 /**
1254  * Initialize transfer-related functions.
1255  */
1256 void
1257 ilo_init_transfer_functions(struct ilo_context *ilo)
1258 {
1259    ilo->base.transfer_map = ilo_transfer_map;
1260    ilo->base.transfer_flush_region = ilo_transfer_flush_region;
1261    ilo->base.transfer_unmap = ilo_transfer_unmap;
1262    ilo->base.transfer_inline_write = ilo_transfer_inline_write;
1263 }