src/gallium/drivers/ilo/ilo_transfer.c

   1 /*
   2  * Mesa 3-D graphics library
   3  *
   4  * Copyright (C) 2012-2013 LunarG, Inc.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included
  14  * in all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22  * DEALINGS IN THE SOFTWARE.
  23  *
  24  * Authors:
  25  *    Chia-I Wu <olv@lunarg.com>
  26  */
  27
  28 #include "util/u_surface.h"
  29 #include "util/u_transfer.h"
  30 #include "util/u_format_etc.h"
  31
  32 #include "ilo_blit.h"
  33 #include "ilo_cp.h"
  34 #include "ilo_context.h"
  35 #include "ilo_resource.h"
  36 #include "ilo_state.h"
  37 #include "ilo_transfer.h"
  38
  39 static bool
  40 is_bo_busy(struct ilo_context *ilo, struct intel_bo *bo, bool *need_flush)
  41 {
  42    const bool referenced = intel_bo_references(ilo->cp->bo, bo);
  43
  44    if (need_flush)
  45       *need_flush = referenced;
  46
  47    if (referenced)
  48       return true;
  49
  50    return intel_bo_is_busy(bo);
  51 }
  52
  53 static bool
  54 map_bo_for_transfer(struct ilo_context *ilo, struct intel_bo *bo,
  55                     const struct ilo_transfer *xfer)
  56 {
  57    int err;
  58
  59    switch (xfer->method) {
  60    case ILO_TRANSFER_MAP_CPU:
  61       err = intel_bo_map(bo, (xfer->base.usage & PIPE_TRANSFER_WRITE));
  62       break;
  63    case ILO_TRANSFER_MAP_GTT:
  64       err = intel_bo_map_gtt(bo);
  65       break;
  66    case ILO_TRANSFER_MAP_UNSYNC:
  67       err = intel_bo_map_unsynchronized(bo);
  68       break;
  69    default:
  70       assert(!"unknown mapping method");
  71       err = -1;
  72       break;
  73    }
  74
  75    return !err;
  76 }
  77
  78 /**
  79  * Choose the best mapping method, depending on the transfer usage and whether
  80  * the bo is busy.
  81  */
  82 static bool
  83 choose_transfer_method(struct ilo_context *ilo, struct ilo_transfer *xfer)
  84 {
  85    struct pipe_resource *res = xfer->base.resource;
  86    const unsigned usage = xfer->base.usage;
  87    /* prefer map() when there is the last-level cache */
  88    const bool prefer_cpu =
  89       (ilo->dev->has_llc || (usage & PIPE_TRANSFER_READ));
  90    struct ilo_texture *tex;
  91    struct ilo_buffer *buf;
  92    struct intel_bo *bo;
  93    bool tiled, need_flush;
  94
  95    if (res->target == PIPE_BUFFER) {
  96       tex = NULL;
  97
  98       buf = ilo_buffer(res);
  99       bo = buf->bo;
 100       tiled = false;
 101    }
 102    else {
 103       buf = NULL;
 104
 105       tex = ilo_texture(res);
 106       bo = tex->bo;
 107       tiled = (tex->tiling != INTEL_TILING_NONE);
 108    }
 109
 110    /* choose between mapping through CPU or GTT */
 111    if (usage & PIPE_TRANSFER_MAP_DIRECTLY) {
 112       /* we do not want fencing */
 113       if (tiled || prefer_cpu)
 114          xfer->method = ILO_TRANSFER_MAP_CPU;
 115       else
 116          xfer->method = ILO_TRANSFER_MAP_GTT;
 117    }
 118    else {
 119       if (!tiled && prefer_cpu)
 120          xfer->method = ILO_TRANSFER_MAP_CPU;
 121       else
 122          xfer->method = ILO_TRANSFER_MAP_GTT;
 123    }
 124
 125    /* see if we can avoid stalling */
 126    if (is_bo_busy(ilo, bo, &need_flush)) {
 127       bool will_stall = true;
 128
 129       if (usage & PIPE_TRANSFER_MAP_DIRECTLY) {
 130          /* nothing we can do */
 131       }
 132       else if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
 133          /* unsynchronized gtt mapping does not stall */
 134          xfer->method = ILO_TRANSFER_MAP_UNSYNC;
 135          will_stall = false;
 136       }
 137       else if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
 138          /* discard old bo and allocate a new one for mapping */
 139          if ((tex && ilo_texture_alloc_bo(tex)) ||
 140              (buf && ilo_buffer_alloc_bo(buf))) {
 141             ilo_mark_states_with_resource_dirty(ilo, res);
 142             will_stall = false;
 143          }
 144       }
 145       else if (usage & PIPE_TRANSFER_FLUSH_EXPLICIT) {
 146          /*
 147           * We could allocate and return a system buffer here.  When a region of
 148           * the buffer is explicitly flushed, we pwrite() the region to a
 149           * temporary bo and emit pipelined copy blit.
 150           *
 151           * For now, do nothing.
 152           */
 153       }
 154       else if (usage & PIPE_TRANSFER_DISCARD_RANGE) {
 155          /*
 156           * We could allocate a temporary bo for mapping, and emit pipelined copy
 157           * blit upon unmapping.
 158           *
 159           * For now, do nothing.
 160           */
 161       }
 162
 163       if (will_stall) {
 164          if (usage & PIPE_TRANSFER_DONTBLOCK)
 165             return false;
 166
 167          /* flush to make bo busy (so that map() stalls as it should be) */
 168          if (need_flush)
 169             ilo_cp_flush(ilo->cp, "syncing for transfers");
 170       }
 171    }
 172
 173    if (tex && !(usage & PIPE_TRANSFER_MAP_DIRECTLY)) {
 174       if (tex->separate_s8 || tex->bo_format == PIPE_FORMAT_S8_UINT)
 175          xfer->method = ILO_TRANSFER_MAP_SW_ZS;
 176       /* need to convert on-the-fly */
 177       else if (tex->bo_format != tex->base.format)
 178          xfer->method = ILO_TRANSFER_MAP_SW_CONVERT;
 179    }
 180
 181    return true;
 182 }
 183
 184 static void
 185 tex_get_box_origin(const struct ilo_texture *tex,
 186                    unsigned level, unsigned slice,
 187                    const struct pipe_box *box,
 188                    unsigned *mem_x, unsigned *mem_y)
 189 {
 190    const struct ilo_texture_slice *s =
 191       ilo_texture_get_slice(tex, level, slice + box->z);
 192    unsigned x, y;
 193
 194    x = s->x + box->x;
 195    y = s->y + box->y;
 196
 197    assert(x % tex->block_width == 0 && y % tex->block_height == 0);
 198
 199    *mem_x = x / tex->block_width * tex->bo_cpp;
 200    *mem_y = y / tex->block_height;
 201 }
 202
 203 static unsigned
 204 tex_get_box_offset(const struct ilo_texture *tex, unsigned level,
 205                    const struct pipe_box *box)
 206 {
 207    unsigned mem_x, mem_y;
 208
 209    tex_get_box_origin(tex, level, 0, box, &mem_x, &mem_y);
 210
 211    return mem_y * tex->bo_stride + mem_x;
 212 }
 213
 214 static unsigned
 215 tex_get_slice_stride(const struct ilo_texture *tex, unsigned level)
 216 {
 217    const struct ilo_texture_slice *s0, *s1;
 218    unsigned qpitch;
 219
 220    /* there is no 3D array texture */
 221    assert(tex->base.array_size == 1 || tex->base.depth0 == 1);
 222
 223    if (tex->base.array_size == 1) {
 224       /* non-array, non-3D */
 225       if (tex->base.depth0 == 1)
 226          return 0;
 227
 228       /* only the first level has a fixed slice stride */
 229       if (level > 0) {
 230          assert(!"no slice stride for 3D texture with level > 0");
 231          return 0;
 232       }
 233    }
 234
 235    s0 = ilo_texture_get_slice(tex, level, 0);
 236    s1 = ilo_texture_get_slice(tex, level, 1);
 237    qpitch = s1->y - s0->y;
 238    assert(qpitch % tex->block_height == 0);
 239
 240    return (qpitch / tex->block_height) * tex->bo_stride;
 241 }
 242
 243 static unsigned
 244 tex_tile_x_swizzle(unsigned addr)
 245 {
 246    /*
 247     * From the Ivy Bridge PRM, volume 1 part 2, page 24:
 248     *
 249     *     "As shown in the tiling algorithm, the new address bit[6] should be:
 250     *
 251     *        Address bit[6] <= TiledAddr bit[6] XOR
 252     *                          TiledAddr bit[9] XOR
 253     *                          TiledAddr bit[10]"
 254     */
 255    return addr ^ (((addr >> 3) ^ (addr >> 4)) & 0x40);
 256 }
 257
 258 static unsigned
 259 tex_tile_y_swizzle(unsigned addr)
 260 {
 261    /*
 262     * From the Ivy Bridge PRM, volume 1 part 2, page 24:
 263     *
 264     *     "As shown in the tiling algorithm, The new address bit[6] becomes:
 265     *
 266     *        Address bit[6] <= TiledAddr bit[6] XOR
 267     *                          TiledAddr bit[9]"
 268     */
 269    return addr ^ ((addr >> 3) & 0x40);
 270 }
 271
 272 static unsigned
 273 tex_tile_x_offset(unsigned mem_x, unsigned mem_y,
 274                   unsigned tiles_per_row, bool swizzle)
 275 {
 276    /*
 277     * From the Sandy Bridge PRM, volume 1 part 2, page 21, we know that a
 278     * X-major tile has 8 rows and 32 OWord columns (512 bytes).  Tiles in the
 279     * tiled region are numbered in row-major order, starting from zero.  The
 280     * tile number can thus be calculated as follows:
 281     *
 282     *    tile = (mem_y / 8) * tiles_per_row + (mem_x / 512)
 283     *
 284     * OWords in that tile are also numbered in row-major order, starting from
 285     * zero.  The OWord number can thus be calculated as follows:
 286     *
 287     *    oword = (mem_y % 8) * 32 + ((mem_x % 512) / 16)
 288     *
 289     * and the tiled offset is
 290     *
 291     *    offset = tile * 4096 + oword * 16 + (mem_x % 16)
 292     *           = tile * 4096 + (mem_y % 8) * 512 + (mem_x % 512)
 293     */
 294    unsigned tile, offset;
 295
 296    tile = (mem_y >> 3) * tiles_per_row + (mem_x >> 9);
 297    offset = tile << 12 | (mem_y & 0x7) << 9 | (mem_x & 0x1ff);
 298
 299    return (swizzle) ? tex_tile_x_swizzle(offset) : offset;
 300 }
 301
 302 static unsigned
 303 tex_tile_y_offset(unsigned mem_x, unsigned mem_y,
 304                   unsigned tiles_per_row, bool swizzle)
 305 {
 306    /*
 307     * From the Sandy Bridge PRM, volume 1 part 2, page 22, we know that a
 308     * Y-major tile has 32 rows and 8 OWord columns (128 bytes).  Tiles in the
 309     * tiled region are numbered in row-major order, starting from zero.  The
 310     * tile number can thus be calculated as follows:
 311     *
 312     *    tile = (mem_y / 32) * tiles_per_row + (mem_x / 128)
 313     *
 314     * OWords in that tile are numbered in column-major order, starting from
 315     * zero.  The OWord number can thus be calculated as follows:
 316     *
 317     *    oword = ((mem_x % 128) / 16) * 32 + (mem_y % 32)
 318     *
 319     * and the tiled offset is
 320     *
 321     *    offset = tile * 4096 + oword * 16 + (mem_x % 16)
 322     */
 323    unsigned tile, oword, offset;
 324
 325    tile = (mem_y >> 5) * tiles_per_row + (mem_x >> 7);
 326    oword = (mem_x & 0x70) << 1 | (mem_y & 0x1f);
 327    offset = tile << 12 | oword << 4 | (mem_x & 0xf);
 328
 329    return (swizzle) ? tex_tile_y_swizzle(offset) : offset;
 330 }
 331
 332 static unsigned
 333 tex_tile_w_offset(unsigned mem_x, unsigned mem_y,
 334                   unsigned tiles_per_row, bool swizzle)
 335 {
 336    /*
 337     * From the Sandy Bridge PRM, volume 1 part 2, page 23, we know that a
 338     * W-major tile has 8 8x8-block rows and 8 8x8-block columns.  Tiles in the
 339     * tiled region are numbered in row-major order, starting from zero.  The
 340     * tile number can thus be calculated as follows:
 341     *
 342     *    tile = (mem_y / 64) * tiles_per_row + (mem_x / 64)
 343     *
 344     * 8x8-blocks in that tile are numbered in column-major order, starting
 345     * from zero.  The 8x8-block number can thus be calculated as follows:
 346     *
 347     *    blk8 = ((mem_x % 64) / 8) * 8 + ((mem_y % 64) / 8)
 348     *
 349     * Each 8x8-block is divided into 4 4x4-blocks, in row-major order.  Each
 350     * 4x4-block is further divided into 4 2x2-blocks, also in row-major order.
 351     * We have
 352     *
 353     *    blk4 = (((mem_y % 64) / 4) & 1) * 2 + (((mem_x % 64) / 4) & 1)
 354     *    blk2 = (((mem_y % 64) / 2) & 1) * 2 + (((mem_x % 64) / 2) & 1)
 355     *    blk1 = (((mem_y % 64)    ) & 1) * 2 + (((mem_x % 64)    ) & 1)
 356     *
 357     * and the tiled offset is
 358     *
 359     *    offset = tile * 4096 + blk8 * 64 + blk4 * 16 + blk2 * 4 + blk1
 360     */
 361    unsigned tile, blk8, blk4, blk2, blk1, offset;
 362
 363    tile = (mem_y >> 6) * tiles_per_row + (mem_x >> 6);
 364    blk8 = ((mem_x >> 3) & 0x7) << 3 | ((mem_y >> 3) & 0x7);
 365    blk4 = ((mem_y >> 2) & 0x1) << 1 | ((mem_x >> 2) & 0x1);
 366    blk2 = ((mem_y >> 1) & 0x1) << 1 | ((mem_x >> 1) & 0x1);
 367    blk1 = ((mem_y     ) & 0x1) << 1 | ((mem_x     ) & 0x1);
 368    offset = tile << 12 | blk8 << 6 | blk4 << 4 | blk2 << 2 | blk1;
 369
 370    return (swizzle) ? tex_tile_y_swizzle(offset) : offset;
 371 }
 372
 373 static unsigned
 374 tex_tile_none_offset(unsigned mem_x, unsigned mem_y,
 375                      unsigned tiles_per_row, bool swizzle)
 376 {
 377    return mem_y * tiles_per_row + mem_x;
 378 }
 379
 380 typedef unsigned (*tex_tile_offset_func)(unsigned mem_x, unsigned mem_y,
 381                                          unsigned tiles_per_row,
 382                                          bool swizzle);
 383
 384 static tex_tile_offset_func
 385 tex_tile_choose_offset_func(const struct ilo_texture *tex,
 386                             unsigned *tiles_per_row)
 387 {
 388    switch (tex->tiling) {
 389    case INTEL_TILING_X:
 390       *tiles_per_row = tex->bo_stride / 512;
 391       return tex_tile_x_offset;
 392    case INTEL_TILING_Y:
 393       *tiles_per_row = tex->bo_stride / 128;
 394       return tex_tile_y_offset;
 395    case INTEL_TILING_NONE:
 396    default:
 397       /* W-tiling */
 398       if (tex->bo_format == PIPE_FORMAT_S8_UINT) {
 399          *tiles_per_row = tex->bo_stride / 64;
 400          return tex_tile_w_offset;
 401       }
 402       else {
 403          *tiles_per_row = tex->bo_stride;
 404          return tex_tile_none_offset;
 405       }
 406    }
 407 }
 408
 409 static void
 410 tex_staging_sys_zs_read(struct ilo_context *ilo,
 411                         struct ilo_texture *tex,
 412                         const struct ilo_transfer *xfer)
 413 {
 414    const bool swizzle = ilo->dev->has_address_swizzling;
 415    const struct pipe_box *box = &xfer->base.box;
 416    const uint8_t *src = intel_bo_get_virtual(tex->bo);
 417    tex_tile_offset_func tile_offset;
 418    unsigned tiles_per_row;
 419    int slice;
 420
 421    tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row);
 422
 423    assert(tex->block_width == 1 && tex->block_height == 1);
 424
 425    if (tex->separate_s8) {
 426       struct ilo_texture *s8_tex = tex->separate_s8;
 427       const uint8_t *s8_src = intel_bo_get_virtual(s8_tex->bo);
 428       tex_tile_offset_func s8_tile_offset;
 429       unsigned s8_tiles_per_row;
 430       int dst_cpp, dst_s8_pos, src_cpp_used;
 431
 432       s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
 433
 434       if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
 435          assert(tex->bo_format == PIPE_FORMAT_Z24X8_UNORM);
 436
 437          dst_cpp = 4;
 438          dst_s8_pos = 3;
 439          src_cpp_used = 3;
 440       }
 441       else {
 442          assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
 443          assert(tex->bo_format == PIPE_FORMAT_Z32_FLOAT);
 444
 445          dst_cpp = 8;
 446          dst_s8_pos = 4;
 447          src_cpp_used = 4;
 448       }
 449
 450       for (slice = 0; slice < box->depth; slice++) {
 451          unsigned mem_x, mem_y, s8_mem_x, s8_mem_y;
 452          uint8_t *dst;
 453          int i, j;
 454
 455          tex_get_box_origin(tex, xfer->base.level, slice,
 456                             box, &mem_x, &mem_y);
 457          tex_get_box_origin(s8_tex, xfer->base.level, slice,
 458                             box, &s8_mem_x, &s8_mem_y);
 459
 460          dst = xfer->staging_sys + xfer->base.layer_stride * slice;
 461
 462          for (i = 0; i < box->height; i++) {
 463             unsigned x = mem_x, s8_x = s8_mem_x;
 464             uint8_t *d = dst;
 465
 466             for (j = 0; j < box->width; j++) {
 467                const unsigned offset =
 468                   tile_offset(x, mem_y, tiles_per_row, swizzle);
 469                const unsigned s8_offset =
 470                   s8_tile_offset(s8_x, s8_mem_y, s8_tiles_per_row, swizzle);
 471
 472                memcpy(d, src + offset, src_cpp_used);
 473                d[dst_s8_pos] = s8_src[s8_offset];
 474
 475                d += dst_cpp;
 476                x += tex->bo_cpp;
 477                s8_x++;
 478             }
 479
 480             dst += xfer->base.stride;
 481             mem_y++;
 482             s8_mem_y++;
 483          }
 484       }
 485    }
 486    else {
 487       assert(tex->bo_format == PIPE_FORMAT_S8_UINT);
 488
 489       for (slice = 0; slice < box->depth; slice++) {
 490          unsigned mem_x, mem_y;
 491          uint8_t *dst;
 492          int i, j;
 493
 494          tex_get_box_origin(tex, xfer->base.level, slice,
 495                             box, &mem_x, &mem_y);
 496
 497          dst = xfer->staging_sys + xfer->base.layer_stride * slice;
 498
 499          for (i = 0; i < box->height; i++) {
 500             unsigned x = mem_x;
 501             uint8_t *d = dst;
 502
 503             for (j = 0; j < box->width; j++) {
 504                const unsigned offset =
 505                   tile_offset(x, mem_y, tiles_per_row, swizzle);
 506
 507                *d = src[offset];
 508
 509                d++;
 510                x++;
 511             }
 512
 513             dst += xfer->base.stride;
 514             mem_y++;
 515          }
 516       }
 517    }
 518 }
 519
 520 static void
 521 tex_staging_sys_zs_write(struct ilo_context *ilo,
 522                          struct ilo_texture *tex,
 523                          const struct ilo_transfer *xfer)
 524 {
 525    const bool swizzle = ilo->dev->has_address_swizzling;
 526    const struct pipe_box *box = &xfer->base.box;
 527    uint8_t *dst = intel_bo_get_virtual(tex->bo);
 528    tex_tile_offset_func tile_offset;
 529    unsigned tiles_per_row;
 530    int slice;
 531
 532    tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row);
 533
 534    assert(tex->block_width == 1 && tex->block_height == 1);
 535
 536    if (tex->separate_s8) {
 537       struct ilo_texture *s8_tex = tex->separate_s8;
 538       uint8_t *s8_dst = intel_bo_get_virtual(s8_tex->bo);
 539       tex_tile_offset_func s8_tile_offset;
 540       unsigned s8_tiles_per_row;
 541       int src_cpp, src_s8_pos, dst_cpp_used;
 542
 543       s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
 544
 545       if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
 546          assert(tex->bo_format == PIPE_FORMAT_Z24X8_UNORM);
 547
 548          src_cpp = 4;
 549          src_s8_pos = 3;
 550          dst_cpp_used = 3;
 551       }
 552       else {
 553          assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
 554          assert(tex->bo_format == PIPE_FORMAT_Z32_FLOAT);
 555
 556          src_cpp = 8;
 557          src_s8_pos = 4;
 558          dst_cpp_used = 4;
 559       }
 560
 561       for (slice = 0; slice < box->depth; slice++) {
 562          unsigned mem_x, mem_y, s8_mem_x, s8_mem_y;
 563          const uint8_t *src;
 564          int i, j;
 565
 566          tex_get_box_origin(tex, xfer->base.level, slice,
 567                             box, &mem_x, &mem_y);
 568          tex_get_box_origin(s8_tex, xfer->base.level, slice,
 569                             box, &s8_mem_x, &s8_mem_y);
 570
 571          src = xfer->staging_sys + xfer->base.layer_stride * slice;
 572
 573          for (i = 0; i < box->height; i++) {
 574             unsigned x = mem_x, s8_x = s8_mem_x;
 575             const uint8_t *s = src;
 576
 577             for (j = 0; j < box->width; j++) {
 578                const unsigned offset =
 579                   tile_offset(x, mem_y, tiles_per_row, swizzle);
 580                const unsigned s8_offset =
 581                   s8_tile_offset(s8_x, s8_mem_y, s8_tiles_per_row, swizzle);
 582
 583                memcpy(dst + offset, s, dst_cpp_used);
 584                s8_dst[s8_offset] = s[src_s8_pos];
 585
 586                s += src_cpp;
 587                x += tex->bo_cpp;
 588                s8_x++;
 589             }
 590
 591             src += xfer->base.stride;
 592             mem_y++;
 593             s8_mem_y++;
 594          }
 595       }
 596    }
 597    else {
 598       assert(tex->bo_format == PIPE_FORMAT_S8_UINT);
 599
 600       for (slice = 0; slice < box->depth; slice++) {
 601          unsigned mem_x, mem_y;
 602          const uint8_t *src;
 603          int i, j;
 604
 605          tex_get_box_origin(tex, xfer->base.level, slice,
 606                             box, &mem_x, &mem_y);
 607
 608          src = xfer->staging_sys + xfer->base.layer_stride * slice;
 609
 610          for (i = 0; i < box->height; i++) {
 611             unsigned x = mem_x;
 612             const uint8_t *s = src;
 613
 614             for (j = 0; j < box->width; j++) {
 615                const unsigned offset =
 616                   tile_offset(x, mem_y, tiles_per_row, swizzle);
 617
 618                dst[offset] = *s;
 619
 620                s++;
 621                x++;
 622             }
 623
 624             src += xfer->base.stride;
 625             mem_y++;
 626          }
 627       }
 628    }
 629 }
 630
 631 static void
 632 tex_staging_sys_convert_write(struct ilo_context *ilo,
 633                               struct ilo_texture *tex,
 634                               const struct ilo_transfer *xfer)
 635 {
 636    const struct pipe_box *box = &xfer->base.box;
 637    unsigned dst_slice_stride;
 638    void *dst;
 639    int slice;
 640
 641    dst = intel_bo_get_virtual(tex->bo);
 642    dst += tex_get_box_offset(tex, xfer->base.level, box);
 643
 644    /* slice stride is not always available */
 645    if (box->depth > 1)
 646       dst_slice_stride = tex_get_slice_stride(tex, xfer->base.level);
 647    else
 648       dst_slice_stride = 0;
 649
 650    if (unlikely(tex->bo_format == tex->base.format)) {
 651       util_copy_box(dst, tex->bo_format, tex->bo_stride, dst_slice_stride,
 652             0, 0, 0, box->width, box->height, box->depth,
 653             xfer->staging_sys, xfer->base.stride, xfer->base.layer_stride,
 654             0, 0, 0);
 655       return;
 656    }
 657
 658    switch (tex->base.format) {
 659    case PIPE_FORMAT_ETC1_RGB8:
 660       assert(tex->bo_format == PIPE_FORMAT_R8G8B8X8_UNORM);
 661
 662       for (slice = 0; slice < box->depth; slice++) {
 663          const void *src =
 664             xfer->staging_sys + xfer->base.layer_stride * slice;
 665
 666          util_format_etc1_rgb8_unpack_rgba_8unorm(dst,
 667                tex->bo_stride, src, xfer->base.stride,
 668                box->width, box->height);
 669
 670          dst += dst_slice_stride;
 671       }
 672       break;
 673    default:
 674       assert(!"unable to convert the staging data");
 675       break;
 676    }
 677 }
 678
 679 static bool
 680 tex_staging_sys_map_bo(const struct ilo_context *ilo,
 681                        const struct ilo_texture *tex,
 682                        bool for_read_back, bool linear_view)
 683 {
 684    const bool prefer_cpu = (ilo->dev->has_llc || for_read_back);
 685    int err;
 686
 687    if (prefer_cpu && (tex->tiling == INTEL_TILING_NONE || !linear_view))
 688       err = intel_bo_map(tex->bo, !for_read_back);
 689    else
 690       err = intel_bo_map_gtt(tex->bo);
 691
 692    if (!tex->separate_s8)
 693       return !err;
 694
 695    err = intel_bo_map(tex->separate_s8->bo, !for_read_back);
 696    if (err)
 697       intel_bo_unmap(tex->bo);
 698
 699    return !err;
 700 }
 701
 702 static void
 703 tex_staging_sys_unmap_bo(const struct ilo_context *ilo,
 704                          const struct ilo_texture *tex)
 705 {
 706    if (tex->separate_s8)
 707       intel_bo_unmap(tex->separate_s8->bo);
 708
 709    intel_bo_unmap(tex->bo);
 710 }
 711
 712 static void
 713 tex_staging_sys_unmap(struct ilo_context *ilo,
 714                       struct ilo_texture *tex,
 715                       struct ilo_transfer *xfer)
 716 {
 717    bool success;
 718
 719    if (!(xfer->base.usage & PIPE_TRANSFER_WRITE)) {
 720       FREE(xfer->staging_sys);
 721       return;
 722    }
 723
 724    switch (xfer->method) {
 725    case ILO_TRANSFER_MAP_SW_CONVERT:
 726       success = tex_staging_sys_map_bo(ilo, tex, false, true);
 727       if (success) {
 728          tex_staging_sys_convert_write(ilo, tex, xfer);
 729          tex_staging_sys_unmap_bo(ilo, tex);
 730       }
 731       break;
 732    case ILO_TRANSFER_MAP_SW_ZS:
 733       success = tex_staging_sys_map_bo(ilo, tex, false, false);
 734       if (success) {
 735          tex_staging_sys_zs_write(ilo, tex, xfer);
 736          tex_staging_sys_unmap_bo(ilo, tex);
 737       }
 738       break;
 739    default:
 740       assert(!"unknown mapping method");
 741       success = false;
 742       break;
 743    }
 744
 745    if (!success)
 746       ilo_err("failed to map resource for moving staging data\n");
 747
 748    FREE(xfer->staging_sys);
 749 }
 750
 751 static bool
 752 tex_staging_sys_map(struct ilo_context *ilo,
 753                     struct ilo_texture *tex,
 754                     struct ilo_transfer *xfer)
 755 {
 756    const struct pipe_box *box = &xfer->base.box;
 757    const size_t stride = util_format_get_stride(tex->base.format, box->width);
 758    const size_t size =
 759       util_format_get_2d_size(tex->base.format, stride, box->height);
 760    bool read_back = false, success;
 761
 762    xfer->staging_sys = MALLOC(size * box->depth);
 763    if (!xfer->staging_sys)
 764       return false;
 765
 766    xfer->base.stride = stride;
 767    xfer->base.layer_stride = size;
 768    xfer->ptr = xfer->staging_sys;
 769
 770    /* see if we need to read the resource back */
 771    if (xfer->base.usage & PIPE_TRANSFER_READ) {
 772       read_back = true;
 773    }
 774    else if (xfer->base.usage & PIPE_TRANSFER_WRITE) {
 775       const unsigned discard_flags =
 776          (PIPE_TRANSFER_DISCARD_RANGE | PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE);
 777
 778       if (!(xfer->base.usage & discard_flags))
 779          read_back = true;
 780    }
 781
 782    if (!read_back)
 783       return true;
 784
 785    switch (xfer->method) {
 786    case ILO_TRANSFER_MAP_SW_CONVERT:
 787       assert(!"no on-the-fly format conversion for mapping");
 788       success = false;
 789       break;
 790    case ILO_TRANSFER_MAP_SW_ZS:
 791       success = tex_staging_sys_map_bo(ilo, tex, true, false);
 792       if (success) {
 793          tex_staging_sys_zs_read(ilo, tex, xfer);
 794          tex_staging_sys_unmap_bo(ilo, tex);
 795       }
 796       break;
 797    default:
 798       assert(!"unknown mapping method");
 799       success = false;
 800       break;
 801    }
 802
 803    return success;
 804 }
 805
 806 static void
 807 tex_direct_unmap(struct ilo_context *ilo,
 808                  struct ilo_texture *tex,
 809                  struct ilo_transfer *xfer)
 810 {
 811    intel_bo_unmap(tex->bo);
 812 }
 813
 814 static bool
 815 tex_direct_map(struct ilo_context *ilo,
 816                struct ilo_texture *tex,
 817                struct ilo_transfer *xfer)
 818 {
 819    if (!map_bo_for_transfer(ilo, tex->bo, xfer))
 820       return false;
 821
 822    /* note that stride is for a block row, not a texel row */
 823    xfer->base.stride = tex->bo_stride;
 824
 825    /* slice stride is not always available */
 826    if (xfer->base.box.depth > 1)
 827       xfer->base.layer_stride = tex_get_slice_stride(tex, xfer->base.level);
 828    else
 829       xfer->base.layer_stride = 0;
 830
 831    xfer->ptr = intel_bo_get_virtual(tex->bo);
 832    xfer->ptr += tex_get_box_offset(tex, xfer->base.level, &xfer->base.box);
 833
 834    return true;
 835 }
 836
 837 static bool
 838 tex_map(struct ilo_context *ilo, struct ilo_transfer *xfer)
 839 {
 840    struct ilo_texture *tex = ilo_texture(xfer->base.resource);
 841    bool success;
 842
 843    if (!choose_transfer_method(ilo, xfer))
 844       return false;
 845
 846    switch (xfer->method) {
 847    case ILO_TRANSFER_MAP_CPU:
 848    case ILO_TRANSFER_MAP_GTT:
 849    case ILO_TRANSFER_MAP_UNSYNC:
 850       success = tex_direct_map(ilo, tex, xfer);
 851       break;
 852    case ILO_TRANSFER_MAP_SW_CONVERT:
 853    case ILO_TRANSFER_MAP_SW_ZS:
 854       success = tex_staging_sys_map(ilo, tex, xfer);
 855       break;
 856    default:
 857       assert(!"unknown mapping method");
 858       success = false;
 859       break;
 860    }
 861
 862    return success;
 863 }
 864
 865 static void
 866 tex_unmap(struct ilo_context *ilo, struct ilo_transfer *xfer)
 867 {
 868    struct ilo_texture *tex = ilo_texture(xfer->base.resource);
 869
 870    switch (xfer->method) {
 871    case ILO_TRANSFER_MAP_CPU:
 872    case ILO_TRANSFER_MAP_GTT:
 873    case ILO_TRANSFER_MAP_UNSYNC:
 874       tex_direct_unmap(ilo, tex, xfer);
 875       break;
 876    case ILO_TRANSFER_MAP_SW_CONVERT:
 877    case ILO_TRANSFER_MAP_SW_ZS:
 878       tex_staging_sys_unmap(ilo, tex, xfer);
 879       break;
 880    default:
 881       assert(!"unknown mapping method");
 882       break;
 883    }
 884 }
 885
 886 static bool
 887 buf_map(struct ilo_context *ilo, struct ilo_transfer *xfer)
 888 {
 889    struct ilo_buffer *buf = ilo_buffer(xfer->base.resource);
 890
 891    if (!choose_transfer_method(ilo, xfer))
 892       return false;
 893
 894    if (!map_bo_for_transfer(ilo, buf->bo, xfer))
 895       return false;
 896
 897    assert(xfer->base.level == 0);
 898    assert(xfer->base.box.y == 0);
 899    assert(xfer->base.box.z == 0);
 900    assert(xfer->base.box.height == 1);
 901    assert(xfer->base.box.depth == 1);
 902
 903    xfer->base.stride = 0;
 904    xfer->base.layer_stride = 0;
 905
 906    xfer->ptr = intel_bo_get_virtual(buf->bo);
 907    xfer->ptr += xfer->base.box.x;
 908
 909    return true;
 910 }
 911
 912 static void
 913 buf_unmap(struct ilo_context *ilo, struct ilo_transfer *xfer)
 914 {
 915    struct ilo_buffer *buf = ilo_buffer(xfer->base.resource);
 916
 917    intel_bo_unmap(buf->bo);
 918 }
 919
 920 static void
 921 buf_pwrite(struct ilo_context *ilo, struct ilo_buffer *buf,
 922            unsigned usage, int offset, int size, const void *data)
 923 {
 924    bool need_flush;
 925
 926    /* see if we can avoid stalling */
 927    if (is_bo_busy(ilo, buf->bo, &need_flush)) {
 928       bool will_stall = true;
 929
 930       if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
 931          /* old data not needed so discard the old bo to avoid stalling */
 932          if (ilo_buffer_alloc_bo(buf)) {
 933             ilo_mark_states_with_resource_dirty(ilo, &buf->base);
 934             will_stall = false;
 935          }
 936       }
 937       else {
 938          /*
 939           * We could allocate a temporary bo to hold the data and emit
 940           * pipelined copy blit to move them to buf->bo.  But for now, do
 941           * nothing.
 942           */
 943       }
 944
 945       /* flush to make bo busy (so that pwrite() stalls as it should be) */
 946       if (will_stall && need_flush)
 947          ilo_cp_flush(ilo->cp, "syncing for pwrites");
 948    }
 949
 950    intel_bo_pwrite(buf->bo, offset, size, data);
 951 }
 952
 953 static void
 954 ilo_transfer_flush_region(struct pipe_context *pipe,
 955                           struct pipe_transfer *transfer,
 956                           const struct pipe_box *box)
 957 {
 958 }
 959
 960 static void
 961 ilo_transfer_unmap(struct pipe_context *pipe,
 962                    struct pipe_transfer *transfer)
 963 {
 964    struct ilo_context *ilo = ilo_context(pipe);
 965    struct ilo_transfer *xfer = ilo_transfer(transfer);
 966
 967    if (xfer->base.resource->target == PIPE_BUFFER)
 968       buf_unmap(ilo, xfer);
 969    else
 970       tex_unmap(ilo, xfer);
 971
 972    pipe_resource_reference(&xfer->base.resource, NULL);
 973
 974    util_slab_free(&ilo->transfer_mempool, xfer);
 975 }
 976
 977 static void *
 978 ilo_transfer_map(struct pipe_context *pipe,
 979                  struct pipe_resource *res,
 980                  unsigned level,
 981                  unsigned usage,
 982                  const struct pipe_box *box,
 983                  struct pipe_transfer **transfer)
 984 {
 985    struct ilo_context *ilo = ilo_context(pipe);
 986    struct ilo_transfer *xfer;
 987    bool success;
 988
 989    xfer = util_slab_alloc(&ilo->transfer_mempool);
 990    if (!xfer) {
 991       *transfer = NULL;
 992       return NULL;
 993    }
 994
 995    xfer->base.resource = NULL;
 996    pipe_resource_reference(&xfer->base.resource, res);
 997    xfer->base.level = level;
 998    xfer->base.usage = usage;
 999    xfer->base.box = *box;
1000
1001    ilo_blit_resolve_transfer(ilo, &xfer->base);
1002
1003    if (res->target == PIPE_BUFFER)
1004       success = buf_map(ilo, xfer);
1005    else
1006       success = tex_map(ilo, xfer);
1007
1008    if (!success) {
1009       pipe_resource_reference(&xfer->base.resource, NULL);
1010       FREE(xfer);
1011       *transfer = NULL;
1012       return NULL;
1013    }
1014
1015    *transfer = &xfer->base;
1016
1017    return xfer->ptr;
1018 }
1019
1020 static void
1021 ilo_transfer_inline_write(struct pipe_context *pipe,
1022                           struct pipe_resource *res,
1023                           unsigned level,
1024                           unsigned usage,
1025                           const struct pipe_box *box,
1026                           const void *data,
1027                           unsigned stride,
1028                           unsigned layer_stride)
1029 {
1030    if (likely(res->target == PIPE_BUFFER) &&
1031        !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
1032       /* they should specify just an offset and a size */
1033       assert(level == 0);
1034       assert(box->y == 0);
1035       assert(box->z == 0);
1036       assert(box->height == 1);
1037       assert(box->depth == 1);
1038
1039       buf_pwrite(ilo_context(pipe), ilo_buffer(res),
1040             usage, box->x, box->width, data);
1041    }
1042    else {
1043       u_default_transfer_inline_write(pipe, res,
1044             level, usage, box, data, stride, layer_stride);
1045    }
1046 }
1047
1048 /**
1049  * Initialize transfer-related functions.
1050  */
1051 void
1052 ilo_init_transfer_functions(struct ilo_context *ilo)
1053 {
1054    ilo->base.transfer_map = ilo_transfer_map;
1055    ilo->base.transfer_flush_region = ilo_transfer_flush_region;
1056    ilo->base.transfer_unmap = ilo_transfer_unmap;
1057    ilo->base.transfer_inline_write = ilo_transfer_inline_write;
1058 }