src/gallium/drivers/ilo/ilo_transfer.c

   1 /*
   2  * Mesa 3-D graphics library
   3  *
   4  * Copyright (C) 2012-2013 LunarG, Inc.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included
  14  * in all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22  * DEALINGS IN THE SOFTWARE.
  23  *
  24  * Authors:
  25  *    Chia-I Wu <olv@lunarg.com>
  26  */
  27
  28 #include "util/u_surface.h"
  29 #include "util/u_transfer.h"
  30 #include "util/u_format_etc.h"
  31
  32 #include "ilo_cp.h"
  33 #include "ilo_context.h"
  34 #include "ilo_resource.h"
  35 #include "ilo_state.h"
  36 #include "ilo_transfer.h"
  37
  38 static bool
  39 is_bo_busy(struct ilo_context *ilo, struct intel_bo *bo, bool *need_flush)
  40 {
  41    const bool referenced = intel_bo_references(ilo->cp->bo, bo);
  42
  43    if (need_flush)
  44       *need_flush = referenced;
  45
  46    if (referenced)
  47       return true;
  48
  49    return intel_bo_is_busy(bo);
  50 }
  51
  52 static bool
  53 map_bo_for_transfer(struct ilo_context *ilo, struct intel_bo *bo,
  54                     const struct ilo_transfer *xfer)
  55 {
  56    int err;
  57
  58    switch (xfer->method) {
  59    case ILO_TRANSFER_MAP_CPU:
  60       err = intel_bo_map(bo, (xfer->base.usage & PIPE_TRANSFER_WRITE));
  61       break;
  62    case ILO_TRANSFER_MAP_GTT:
  63       err = intel_bo_map_gtt(bo);
  64       break;
  65    case ILO_TRANSFER_MAP_UNSYNC:
  66       err = intel_bo_map_unsynchronized(bo);
  67       break;
  68    default:
  69       assert(!"unknown mapping method");
  70       err = -1;
  71       break;
  72    }
  73
  74    return !err;
  75 }
  76
  77 /**
  78  * Choose the best mapping method, depending on the transfer usage and whether
  79  * the bo is busy.
  80  */
  81 static bool
  82 choose_transfer_method(struct ilo_context *ilo, struct ilo_transfer *xfer)
  83 {
  84    struct pipe_resource *res = xfer->base.resource;
  85    const unsigned usage = xfer->base.usage;
  86    /* prefer map() when there is the last-level cache */
  87    const bool prefer_cpu =
  88       (ilo->dev->has_llc || (usage & PIPE_TRANSFER_READ));
  89    struct ilo_texture *tex;
  90    struct ilo_buffer *buf;
  91    struct intel_bo *bo;
  92    bool tiled, need_flush;
  93
  94    if (res->target == PIPE_BUFFER) {
  95       tex = NULL;
  96
  97       buf = ilo_buffer(res);
  98       bo = buf->bo;
  99       tiled = false;
 100    }
 101    else {
 102       buf = NULL;
 103
 104       tex = ilo_texture(res);
 105       bo = tex->bo;
 106       tiled = (tex->tiling != INTEL_TILING_NONE);
 107    }
 108
 109    /* choose between mapping through CPU or GTT */
 110    if (usage & PIPE_TRANSFER_MAP_DIRECTLY) {
 111       /* we do not want fencing */
 112       if (tiled || prefer_cpu)
 113          xfer->method = ILO_TRANSFER_MAP_CPU;
 114       else
 115          xfer->method = ILO_TRANSFER_MAP_GTT;
 116    }
 117    else {
 118       if (!tiled && prefer_cpu)
 119          xfer->method = ILO_TRANSFER_MAP_CPU;
 120       else
 121          xfer->method = ILO_TRANSFER_MAP_GTT;
 122    }
 123
 124    /* see if we can avoid stalling */
 125    if (is_bo_busy(ilo, bo, &need_flush)) {
 126       bool will_stall = true;
 127
 128       if (usage & PIPE_TRANSFER_MAP_DIRECTLY) {
 129          /* nothing we can do */
 130       }
 131       else if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
 132          /* unsynchronized gtt mapping does not stall */
 133          xfer->method = ILO_TRANSFER_MAP_UNSYNC;
 134          will_stall = false;
 135       }
 136       else if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
 137          /* discard old bo and allocate a new one for mapping */
 138          if ((tex && ilo_texture_alloc_bo(tex)) ||
 139              (buf && ilo_buffer_alloc_bo(buf))) {
 140             ilo_mark_states_with_resource_dirty(ilo, res);
 141             will_stall = false;
 142          }
 143       }
 144       else if (usage & PIPE_TRANSFER_FLUSH_EXPLICIT) {
 145          /*
 146           * We could allocate and return a system buffer here.  When a region of
 147           * the buffer is explicitly flushed, we pwrite() the region to a
 148           * temporary bo and emit pipelined copy blit.
 149           *
 150           * For now, do nothing.
 151           */
 152       }
 153       else if (usage & PIPE_TRANSFER_DISCARD_RANGE) {
 154          /*
 155           * We could allocate a temporary bo for mapping, and emit pipelined copy
 156           * blit upon unmapping.
 157           *
 158           * For now, do nothing.
 159           */
 160       }
 161
 162       if (will_stall) {
 163          if (usage & PIPE_TRANSFER_DONTBLOCK)
 164             return false;
 165
 166          /* flush to make bo busy (so that map() stalls as it should be) */
 167          if (need_flush)
 168             ilo_cp_flush(ilo->cp, "syncing for transfers");
 169       }
 170    }
 171
 172    if (tex && !(usage & PIPE_TRANSFER_MAP_DIRECTLY)) {
 173       if (tex->separate_s8 || tex->bo_format == PIPE_FORMAT_S8_UINT)
 174          xfer->method = ILO_TRANSFER_MAP_SW_ZS;
 175       /* need to convert on-the-fly */
 176       else if (tex->bo_format != tex->base.format)
 177          xfer->method = ILO_TRANSFER_MAP_SW_CONVERT;
 178    }
 179
 180    return true;
 181 }
 182
 183 static void
 184 tex_get_box_origin(const struct ilo_texture *tex,
 185                    unsigned level, unsigned slice,
 186                    const struct pipe_box *box,
 187                    unsigned *mem_x, unsigned *mem_y)
 188 {
 189    const struct ilo_texture_slice *s =
 190       ilo_texture_get_slice(tex, level, slice + box->z);
 191    unsigned x, y;
 192
 193    x = s->x + box->x;
 194    y = s->y + box->y;
 195
 196    assert(x % tex->block_width == 0 && y % tex->block_height == 0);
 197
 198    *mem_x = x / tex->block_width * tex->bo_cpp;
 199    *mem_y = y / tex->block_height;
 200 }
 201
 202 static unsigned
 203 tex_get_box_offset(const struct ilo_texture *tex, unsigned level,
 204                    const struct pipe_box *box)
 205 {
 206    unsigned mem_x, mem_y;
 207
 208    tex_get_box_origin(tex, level, 0, box, &mem_x, &mem_y);
 209
 210    return mem_y * tex->bo_stride + mem_x;
 211 }
 212
 213 static unsigned
 214 tex_get_slice_stride(const struct ilo_texture *tex, unsigned level)
 215 {
 216    const struct ilo_texture_slice *s0, *s1;
 217    unsigned qpitch;
 218
 219    /* there is no 3D array texture */
 220    assert(tex->base.array_size == 1 || tex->base.depth0 == 1);
 221
 222    if (tex->base.array_size == 1) {
 223       /* non-array, non-3D */
 224       if (tex->base.depth0 == 1)
 225          return 0;
 226
 227       /* only the first level has a fixed slice stride */
 228       if (level > 0) {
 229          assert(!"no slice stride for 3D texture with level > 0");
 230          return 0;
 231       }
 232    }
 233
 234    s0 = ilo_texture_get_slice(tex, level, 0);
 235    s1 = ilo_texture_get_slice(tex, level, 1);
 236    qpitch = s1->y - s0->y;
 237    assert(qpitch % tex->block_height == 0);
 238
 239    return (qpitch / tex->block_height) * tex->bo_stride;
 240 }
 241
 242 static unsigned
 243 tex_tile_x_swizzle(unsigned addr)
 244 {
 245    /*
 246     * From the Ivy Bridge PRM, volume 1 part 2, page 24:
 247     *
 248     *     "As shown in the tiling algorithm, the new address bit[6] should be:
 249     *
 250     *        Address bit[6] <= TiledAddr bit[6] XOR
 251     *                          TiledAddr bit[9] XOR
 252     *                          TiledAddr bit[10]"
 253     */
 254    return addr ^ (((addr >> 3) ^ (addr >> 4)) & 0x40);
 255 }
 256
 257 static unsigned
 258 tex_tile_y_swizzle(unsigned addr)
 259 {
 260    /*
 261     * From the Ivy Bridge PRM, volume 1 part 2, page 24:
 262     *
 263     *     "As shown in the tiling algorithm, The new address bit[6] becomes:
 264     *
 265     *        Address bit[6] <= TiledAddr bit[6] XOR
 266     *                          TiledAddr bit[9]"
 267     */
 268    return addr ^ ((addr >> 3) & 0x40);
 269 }
 270
 271 static unsigned
 272 tex_tile_x_offset(unsigned mem_x, unsigned mem_y,
 273                   unsigned tiles_per_row, bool swizzle)
 274 {
 275    /*
 276     * From the Sandy Bridge PRM, volume 1 part 2, page 21, we know that a
 277     * X-major tile has 8 rows and 32 OWord columns (512 bytes).  Tiles in the
 278     * tiled region are numbered in row-major order, starting from zero.  The
 279     * tile number can thus be calculated as follows:
 280     *
 281     *    tile = (mem_y / 8) * tiles_per_row + (mem_x / 512)
 282     *
 283     * OWords in that tile are also numbered in row-major order, starting from
 284     * zero.  The OWord number can thus be calculated as follows:
 285     *
 286     *    oword = (mem_y % 8) * 32 + ((mem_x % 512) / 16)
 287     *
 288     * and the tiled offset is
 289     *
 290     *    offset = tile * 4096 + oword * 16 + (mem_x % 16)
 291     *           = tile * 4096 + (mem_y % 8) * 512 + (mem_x % 512)
 292     */
 293    unsigned tile, offset;
 294
 295    tile = (mem_y >> 3) * tiles_per_row + (mem_x >> 9);
 296    offset = tile << 12 | (mem_y & 0x7) << 9 | (mem_x & 0x1ff);
 297
 298    return (swizzle) ? tex_tile_x_swizzle(offset) : offset;
 299 }
 300
 301 static unsigned
 302 tex_tile_y_offset(unsigned mem_x, unsigned mem_y,
 303                   unsigned tiles_per_row, bool swizzle)
 304 {
 305    /*
 306     * From the Sandy Bridge PRM, volume 1 part 2, page 22, we know that a
 307     * Y-major tile has 32 rows and 8 OWord columns (128 bytes).  Tiles in the
 308     * tiled region are numbered in row-major order, starting from zero.  The
 309     * tile number can thus be calculated as follows:
 310     *
 311     *    tile = (mem_y / 32) * tiles_per_row + (mem_x / 128)
 312     *
 313     * OWords in that tile are numbered in column-major order, starting from
 314     * zero.  The OWord number can thus be calculated as follows:
 315     *
 316     *    oword = ((mem_x % 128) / 16) * 32 + (mem_y % 32)
 317     *
 318     * and the tiled offset is
 319     *
 320     *    offset = tile * 4096 + oword * 16 + (mem_x % 16)
 321     */
 322    unsigned tile, oword, offset;
 323
 324    tile = (mem_y >> 5) * tiles_per_row + (mem_x >> 7);
 325    oword = (mem_x & 0x70) << 1 | (mem_y & 0x1f);
 326    offset = tile << 12 | oword << 4 | (mem_x & 0xf);
 327
 328    return (swizzle) ? tex_tile_y_swizzle(offset) : offset;
 329 }
 330
 331 static unsigned
 332 tex_tile_w_offset(unsigned mem_x, unsigned mem_y,
 333                   unsigned tiles_per_row, bool swizzle)
 334 {
 335    /*
 336     * From the Sandy Bridge PRM, volume 1 part 2, page 23, we know that a
 337     * W-major tile has 8 8x8-block rows and 8 8x8-block columns.  Tiles in the
 338     * tiled region are numbered in row-major order, starting from zero.  The
 339     * tile number can thus be calculated as follows:
 340     *
 341     *    tile = (mem_y / 64) * tiles_per_row + (mem_x / 64)
 342     *
 343     * 8x8-blocks in that tile are numbered in column-major order, starting
 344     * from zero.  The 8x8-block number can thus be calculated as follows:
 345     *
 346     *    blk8 = ((mem_x % 64) / 8) * 8 + ((mem_y % 64) / 8)
 347     *
 348     * Each 8x8-block is divided into 4 4x4-blocks, in row-major order.  Each
 349     * 4x4-block is further divided into 4 2x2-blocks, also in row-major order.
 350     * We have
 351     *
 352     *    blk4 = (((mem_y % 64) / 4) & 1) * 2 + (((mem_x % 64) / 4) & 1)
 353     *    blk2 = (((mem_y % 64) / 2) & 1) * 2 + (((mem_x % 64) / 2) & 1)
 354     *    blk1 = (((mem_y % 64)    ) & 1) * 2 + (((mem_x % 64)    ) & 1)
 355     *
 356     * and the tiled offset is
 357     *
 358     *    offset = tile * 4096 + blk8 * 64 + blk4 * 16 + blk2 * 4 + blk1
 359     */
 360    unsigned tile, blk8, blk4, blk2, blk1, offset;
 361
 362    tile = (mem_y >> 6) * tiles_per_row + (mem_x >> 6);
 363    blk8 = ((mem_x >> 3) & 0x7) << 3 | ((mem_y >> 3) & 0x7);
 364    blk4 = ((mem_y >> 2) & 0x1) << 1 | ((mem_x >> 2) & 0x1);
 365    blk2 = ((mem_y >> 1) & 0x1) << 1 | ((mem_x >> 1) & 0x1);
 366    blk1 = ((mem_y     ) & 0x1) << 1 | ((mem_x     ) & 0x1);
 367    offset = tile << 12 | blk8 << 6 | blk4 << 4 | blk2 << 2 | blk1;
 368
 369    return (swizzle) ? tex_tile_y_swizzle(offset) : offset;
 370 }
 371
 372 static unsigned
 373 tex_tile_none_offset(unsigned mem_x, unsigned mem_y,
 374                      unsigned tiles_per_row, bool swizzle)
 375 {
 376    return mem_y * tiles_per_row + mem_x;
 377 }
 378
 379 typedef unsigned (*tex_tile_offset_func)(unsigned mem_x, unsigned mem_y,
 380                                          unsigned tiles_per_row,
 381                                          bool swizzle);
 382
 383 static tex_tile_offset_func
 384 tex_tile_choose_offset_func(const struct ilo_texture *tex,
 385                             unsigned *tiles_per_row)
 386 {
 387    switch (tex->tiling) {
 388    case INTEL_TILING_X:
 389       *tiles_per_row = tex->bo_stride / 512;
 390       return tex_tile_x_offset;
 391    case INTEL_TILING_Y:
 392       *tiles_per_row = tex->bo_stride / 128;
 393       return tex_tile_y_offset;
 394    case INTEL_TILING_NONE:
 395    default:
 396       /* W-tiling */
 397       if (tex->bo_format == PIPE_FORMAT_S8_UINT) {
 398          *tiles_per_row = tex->bo_stride / 64;
 399          return tex_tile_w_offset;
 400       }
 401       else {
 402          *tiles_per_row = tex->bo_stride;
 403          return tex_tile_none_offset;
 404       }
 405    }
 406 }
 407
 408 static void
 409 tex_staging_sys_zs_read(struct ilo_context *ilo,
 410                         struct ilo_texture *tex,
 411                         const struct ilo_transfer *xfer)
 412 {
 413    const bool swizzle = ilo->dev->has_address_swizzling;
 414    const struct pipe_box *box = &xfer->base.box;
 415    const uint8_t *src = intel_bo_get_virtual(tex->bo);
 416    tex_tile_offset_func tile_offset;
 417    unsigned tiles_per_row;
 418    int slice;
 419
 420    tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row);
 421
 422    assert(tex->block_width == 1 && tex->block_height == 1);
 423
 424    if (tex->separate_s8) {
 425       struct ilo_texture *s8_tex = tex->separate_s8;
 426       const uint8_t *s8_src = intel_bo_get_virtual(s8_tex->bo);
 427       tex_tile_offset_func s8_tile_offset;
 428       unsigned s8_tiles_per_row;
 429       int dst_cpp, dst_s8_pos, src_cpp_used;
 430
 431       s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
 432
 433       if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
 434          assert(tex->bo_format == PIPE_FORMAT_Z24X8_UNORM);
 435
 436          dst_cpp = 4;
 437          dst_s8_pos = 3;
 438          src_cpp_used = 3;
 439       }
 440       else {
 441          assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
 442          assert(tex->bo_format == PIPE_FORMAT_Z32_FLOAT);
 443
 444          dst_cpp = 8;
 445          dst_s8_pos = 4;
 446          src_cpp_used = 4;
 447       }
 448
 449       for (slice = 0; slice < box->depth; slice++) {
 450          unsigned mem_x, mem_y, s8_mem_x, s8_mem_y;
 451          uint8_t *dst;
 452          int i, j;
 453
 454          tex_get_box_origin(tex, xfer->base.level, slice,
 455                             box, &mem_x, &mem_y);
 456          tex_get_box_origin(s8_tex, xfer->base.level, slice,
 457                             box, &s8_mem_x, &s8_mem_y);
 458
 459          dst = xfer->staging_sys + xfer->base.layer_stride * slice;
 460
 461          for (i = 0; i < box->height; i++) {
 462             unsigned x = mem_x, s8_x = s8_mem_x;
 463             uint8_t *d = dst;
 464
 465             for (j = 0; j < box->width; j++) {
 466                const unsigned offset =
 467                   tile_offset(x, mem_y, tiles_per_row, swizzle);
 468                const unsigned s8_offset =
 469                   s8_tile_offset(s8_x, s8_mem_y, s8_tiles_per_row, swizzle);
 470
 471                memcpy(d, src + offset, src_cpp_used);
 472                d[dst_s8_pos] = s8_src[s8_offset];
 473
 474                d += dst_cpp;
 475                x += tex->bo_cpp;
 476                s8_x++;
 477             }
 478
 479             dst += xfer->base.stride;
 480             mem_y++;
 481             s8_mem_y++;
 482          }
 483       }
 484    }
 485    else {
 486       assert(tex->bo_format == PIPE_FORMAT_S8_UINT);
 487
 488       for (slice = 0; slice < box->depth; slice++) {
 489          unsigned mem_x, mem_y;
 490          uint8_t *dst;
 491          int i, j;
 492
 493          tex_get_box_origin(tex, xfer->base.level, slice,
 494                             box, &mem_x, &mem_y);
 495
 496          dst = xfer->staging_sys + xfer->base.layer_stride * slice;
 497
 498          for (i = 0; i < box->height; i++) {
 499             unsigned x = mem_x;
 500             uint8_t *d = dst;
 501
 502             for (j = 0; j < box->width; j++) {
 503                const unsigned offset =
 504                   tile_offset(x, mem_y, tiles_per_row, swizzle);
 505
 506                *d = src[offset];
 507
 508                d++;
 509                x++;
 510             }
 511
 512             dst += xfer->base.stride;
 513             mem_y++;
 514          }
 515       }
 516    }
 517 }
 518
 519 static void
 520 tex_staging_sys_zs_write(struct ilo_context *ilo,
 521                          struct ilo_texture *tex,
 522                          const struct ilo_transfer *xfer)
 523 {
 524    const bool swizzle = ilo->dev->has_address_swizzling;
 525    const struct pipe_box *box = &xfer->base.box;
 526    uint8_t *dst = intel_bo_get_virtual(tex->bo);
 527    tex_tile_offset_func tile_offset;
 528    unsigned tiles_per_row;
 529    int slice;
 530
 531    tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row);
 532
 533    assert(tex->block_width == 1 && tex->block_height == 1);
 534
 535    if (tex->separate_s8) {
 536       struct ilo_texture *s8_tex = tex->separate_s8;
 537       uint8_t *s8_dst = intel_bo_get_virtual(s8_tex->bo);
 538       tex_tile_offset_func s8_tile_offset;
 539       unsigned s8_tiles_per_row;
 540       int src_cpp, src_s8_pos, dst_cpp_used;
 541
 542       s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
 543
 544       if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
 545          assert(tex->bo_format == PIPE_FORMAT_Z24X8_UNORM);
 546
 547          src_cpp = 4;
 548          src_s8_pos = 3;
 549          dst_cpp_used = 3;
 550       }
 551       else {
 552          assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
 553          assert(tex->bo_format == PIPE_FORMAT_Z32_FLOAT);
 554
 555          src_cpp = 8;
 556          src_s8_pos = 4;
 557          dst_cpp_used = 4;
 558       }
 559
 560       for (slice = 0; slice < box->depth; slice++) {
 561          unsigned mem_x, mem_y, s8_mem_x, s8_mem_y;
 562          const uint8_t *src;
 563          int i, j;
 564
 565          tex_get_box_origin(tex, xfer->base.level, slice,
 566                             box, &mem_x, &mem_y);
 567          tex_get_box_origin(s8_tex, xfer->base.level, slice,
 568                             box, &s8_mem_x, &s8_mem_y);
 569
 570          src = xfer->staging_sys + xfer->base.layer_stride * slice;
 571
 572          for (i = 0; i < box->height; i++) {
 573             unsigned x = mem_x, s8_x = s8_mem_x;
 574             const uint8_t *s = src;
 575
 576             for (j = 0; j < box->width; j++) {
 577                const unsigned offset =
 578                   tile_offset(x, mem_y, tiles_per_row, swizzle);
 579                const unsigned s8_offset =
 580                   s8_tile_offset(s8_x, s8_mem_y, s8_tiles_per_row, swizzle);
 581
 582                memcpy(dst + offset, s, dst_cpp_used);
 583                s8_dst[s8_offset] = s[src_s8_pos];
 584
 585                s += src_cpp;
 586                x += tex->bo_cpp;
 587                s8_x++;
 588             }
 589
 590             src += xfer->base.stride;
 591             mem_y++;
 592             s8_mem_y++;
 593          }
 594       }
 595    }
 596    else {
 597       assert(tex->bo_format == PIPE_FORMAT_S8_UINT);
 598
 599       for (slice = 0; slice < box->depth; slice++) {
 600          unsigned mem_x, mem_y;
 601          const uint8_t *src;
 602          int i, j;
 603
 604          tex_get_box_origin(tex, xfer->base.level, slice,
 605                             box, &mem_x, &mem_y);
 606
 607          src = xfer->staging_sys + xfer->base.layer_stride * slice;
 608
 609          for (i = 0; i < box->height; i++) {
 610             unsigned x = mem_x;
 611             const uint8_t *s = src;
 612
 613             for (j = 0; j < box->width; j++) {
 614                const unsigned offset =
 615                   tile_offset(x, mem_y, tiles_per_row, swizzle);
 616
 617                dst[offset] = *s;
 618
 619                s++;
 620                x++;
 621             }
 622
 623             src += xfer->base.stride;
 624             mem_y++;
 625          }
 626       }
 627    }
 628 }
 629
 630 static void
 631 tex_staging_sys_convert_write(struct ilo_context *ilo,
 632                               struct ilo_texture *tex,
 633                               const struct ilo_transfer *xfer)
 634 {
 635    const struct pipe_box *box = &xfer->base.box;
 636    unsigned dst_slice_stride;
 637    void *dst;
 638    int slice;
 639
 640    dst = intel_bo_get_virtual(tex->bo);
 641    dst += tex_get_box_offset(tex, xfer->base.level, box);
 642
 643    /* slice stride is not always available */
 644    if (box->depth > 1)
 645       dst_slice_stride = tex_get_slice_stride(tex, xfer->base.level);
 646    else
 647       dst_slice_stride = 0;
 648
 649    if (unlikely(tex->bo_format == tex->base.format)) {
 650       util_copy_box(dst, tex->bo_format, tex->bo_stride, dst_slice_stride,
 651             0, 0, 0, box->width, box->height, box->depth,
 652             xfer->staging_sys, xfer->base.stride, xfer->base.layer_stride,
 653             0, 0, 0);
 654       return;
 655    }
 656
 657    switch (tex->base.format) {
 658    case PIPE_FORMAT_ETC1_RGB8:
 659       assert(tex->bo_format == PIPE_FORMAT_R8G8B8X8_UNORM);
 660
 661       for (slice = 0; slice < box->depth; slice++) {
 662          const void *src =
 663             xfer->staging_sys + xfer->base.layer_stride * slice;
 664
 665          util_format_etc1_rgb8_unpack_rgba_8unorm(dst,
 666                tex->bo_stride, src, xfer->base.stride,
 667                box->width, box->height);
 668
 669          dst += dst_slice_stride;
 670       }
 671       break;
 672    default:
 673       assert(!"unable to convert the staging data");
 674       break;
 675    }
 676 }
 677
 678 static bool
 679 tex_staging_sys_map_bo(const struct ilo_context *ilo,
 680                        const struct ilo_texture *tex,
 681                        bool for_read_back, bool linear_view)
 682 {
 683    const bool prefer_cpu = (ilo->dev->has_llc || for_read_back);
 684    int err;
 685
 686    if (prefer_cpu && (tex->tiling == INTEL_TILING_NONE || !linear_view))
 687       err = intel_bo_map(tex->bo, !for_read_back);
 688    else
 689       err = intel_bo_map_gtt(tex->bo);
 690
 691    if (!tex->separate_s8)
 692       return !err;
 693
 694    err = intel_bo_map(tex->separate_s8->bo, !for_read_back);
 695    if (err)
 696       intel_bo_unmap(tex->bo);
 697
 698    return !err;
 699 }
 700
 701 static void
 702 tex_staging_sys_unmap_bo(const struct ilo_context *ilo,
 703                          const struct ilo_texture *tex)
 704 {
 705    if (tex->separate_s8)
 706       intel_bo_unmap(tex->separate_s8->bo);
 707
 708    intel_bo_unmap(tex->bo);
 709 }
 710
 711 static void
 712 tex_staging_sys_unmap(struct ilo_context *ilo,
 713                       struct ilo_texture *tex,
 714                       struct ilo_transfer *xfer)
 715 {
 716    bool success;
 717
 718    if (!(xfer->base.usage & PIPE_TRANSFER_WRITE)) {
 719       FREE(xfer->staging_sys);
 720       return;
 721    }
 722
 723    switch (xfer->method) {
 724    case ILO_TRANSFER_MAP_SW_CONVERT:
 725       success = tex_staging_sys_map_bo(ilo, tex, false, true);
 726       if (success) {
 727          tex_staging_sys_convert_write(ilo, tex, xfer);
 728          tex_staging_sys_unmap_bo(ilo, tex);
 729       }
 730       break;
 731    case ILO_TRANSFER_MAP_SW_ZS:
 732       success = tex_staging_sys_map_bo(ilo, tex, false, false);
 733       if (success) {
 734          tex_staging_sys_zs_write(ilo, tex, xfer);
 735          tex_staging_sys_unmap_bo(ilo, tex);
 736       }
 737       break;
 738    default:
 739       assert(!"unknown mapping method");
 740       success = false;
 741       break;
 742    }
 743
 744    if (!success)
 745       ilo_err("failed to map resource for moving staging data\n");
 746
 747    FREE(xfer->staging_sys);
 748 }
 749
 750 static bool
 751 tex_staging_sys_map(struct ilo_context *ilo,
 752                     struct ilo_texture *tex,
 753                     struct ilo_transfer *xfer)
 754 {
 755    const struct pipe_box *box = &xfer->base.box;
 756    const size_t stride = util_format_get_stride(tex->base.format, box->width);
 757    const size_t size =
 758       util_format_get_2d_size(tex->base.format, stride, box->height);
 759    bool read_back = false, success;
 760
 761    xfer->staging_sys = MALLOC(size * box->depth);
 762    if (!xfer->staging_sys)
 763       return false;
 764
 765    xfer->base.stride = stride;
 766    xfer->base.layer_stride = size;
 767    xfer->ptr = xfer->staging_sys;
 768
 769    /* see if we need to read the resource back */
 770    if (xfer->base.usage & PIPE_TRANSFER_READ) {
 771       read_back = true;
 772    }
 773    else if (xfer->base.usage & PIPE_TRANSFER_WRITE) {
 774       const unsigned discard_flags =
 775          (PIPE_TRANSFER_DISCARD_RANGE | PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE);
 776
 777       if (!(xfer->base.usage & discard_flags))
 778          read_back = true;
 779    }
 780
 781    if (!read_back)
 782       return true;
 783
 784    switch (xfer->method) {
 785    case ILO_TRANSFER_MAP_SW_CONVERT:
 786       assert(!"no on-the-fly format conversion for mapping");
 787       success = false;
 788       break;
 789    case ILO_TRANSFER_MAP_SW_ZS:
 790       success = tex_staging_sys_map_bo(ilo, tex, true, false);
 791       if (success) {
 792          tex_staging_sys_zs_read(ilo, tex, xfer);
 793          tex_staging_sys_unmap_bo(ilo, tex);
 794       }
 795       break;
 796    default:
 797       assert(!"unknown mapping method");
 798       success = false;
 799       break;
 800    }
 801
 802    return success;
 803 }
 804
 805 static void
 806 tex_direct_unmap(struct ilo_context *ilo,
 807                  struct ilo_texture *tex,
 808                  struct ilo_transfer *xfer)
 809 {
 810    intel_bo_unmap(tex->bo);
 811 }
 812
 813 static bool
 814 tex_direct_map(struct ilo_context *ilo,
 815                struct ilo_texture *tex,
 816                struct ilo_transfer *xfer)
 817 {
 818    if (!map_bo_for_transfer(ilo, tex->bo, xfer))
 819       return false;
 820
 821    /* note that stride is for a block row, not a texel row */
 822    xfer->base.stride = tex->bo_stride;
 823
 824    /* slice stride is not always available */
 825    if (xfer->base.box.depth > 1)
 826       xfer->base.layer_stride = tex_get_slice_stride(tex, xfer->base.level);
 827    else
 828       xfer->base.layer_stride = 0;
 829
 830    xfer->ptr = intel_bo_get_virtual(tex->bo);
 831    xfer->ptr += tex_get_box_offset(tex, xfer->base.level, &xfer->base.box);
 832
 833    return true;
 834 }
 835
 836 static bool
 837 tex_map(struct ilo_context *ilo, struct ilo_transfer *xfer)
 838 {
 839    struct ilo_texture *tex = ilo_texture(xfer->base.resource);
 840    bool success;
 841
 842    if (!choose_transfer_method(ilo, xfer))
 843       return false;
 844
 845    switch (xfer->method) {
 846    case ILO_TRANSFER_MAP_CPU:
 847    case ILO_TRANSFER_MAP_GTT:
 848    case ILO_TRANSFER_MAP_UNSYNC:
 849       success = tex_direct_map(ilo, tex, xfer);
 850       break;
 851    case ILO_TRANSFER_MAP_SW_CONVERT:
 852    case ILO_TRANSFER_MAP_SW_ZS:
 853       success = tex_staging_sys_map(ilo, tex, xfer);
 854       break;
 855    default:
 856       assert(!"unknown mapping method");
 857       success = false;
 858       break;
 859    }
 860
 861    return success;
 862 }
 863
 864 static void
 865 tex_unmap(struct ilo_context *ilo, struct ilo_transfer *xfer)
 866 {
 867    struct ilo_texture *tex = ilo_texture(xfer->base.resource);
 868
 869    switch (xfer->method) {
 870    case ILO_TRANSFER_MAP_CPU:
 871    case ILO_TRANSFER_MAP_GTT:
 872    case ILO_TRANSFER_MAP_UNSYNC:
 873       tex_direct_unmap(ilo, tex, xfer);
 874       break;
 875    case ILO_TRANSFER_MAP_SW_CONVERT:
 876    case ILO_TRANSFER_MAP_SW_ZS:
 877       tex_staging_sys_unmap(ilo, tex, xfer);
 878       break;
 879    default:
 880       assert(!"unknown mapping method");
 881       break;
 882    }
 883 }
 884
 885 static bool
 886 buf_map(struct ilo_context *ilo, struct ilo_transfer *xfer)
 887 {
 888    struct ilo_buffer *buf = ilo_buffer(xfer->base.resource);
 889
 890    if (!choose_transfer_method(ilo, xfer))
 891       return false;
 892
 893    if (!map_bo_for_transfer(ilo, buf->bo, xfer))
 894       return false;
 895
 896    assert(xfer->base.level == 0);
 897    assert(xfer->base.box.y == 0);
 898    assert(xfer->base.box.z == 0);
 899    assert(xfer->base.box.height == 1);
 900    assert(xfer->base.box.depth == 1);
 901
 902    xfer->base.stride = 0;
 903    xfer->base.layer_stride = 0;
 904
 905    xfer->ptr = intel_bo_get_virtual(buf->bo);
 906    xfer->ptr += xfer->base.box.x;
 907
 908    return true;
 909 }
 910
 911 static void
 912 buf_unmap(struct ilo_context *ilo, struct ilo_transfer *xfer)
 913 {
 914    struct ilo_buffer *buf = ilo_buffer(xfer->base.resource);
 915
 916    intel_bo_unmap(buf->bo);
 917 }
 918
 919 static void
 920 buf_pwrite(struct ilo_context *ilo, struct ilo_buffer *buf,
 921            unsigned usage, int offset, int size, const void *data)
 922 {
 923    bool need_flush;
 924
 925    /* see if we can avoid stalling */
 926    if (is_bo_busy(ilo, buf->bo, &need_flush)) {
 927       bool will_stall = true;
 928
 929       if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
 930          /* old data not needed so discard the old bo to avoid stalling */
 931          if (ilo_buffer_alloc_bo(buf)) {
 932             ilo_mark_states_with_resource_dirty(ilo, &buf->base);
 933             will_stall = false;
 934          }
 935       }
 936       else {
 937          /*
 938           * We could allocate a temporary bo to hold the data and emit
 939           * pipelined copy blit to move them to buf->bo.  But for now, do
 940           * nothing.
 941           */
 942       }
 943
 944       /* flush to make bo busy (so that pwrite() stalls as it should be) */
 945       if (will_stall && need_flush)
 946          ilo_cp_flush(ilo->cp, "syncing for pwrites");
 947    }
 948
 949    intel_bo_pwrite(buf->bo, offset, size, data);
 950 }
 951
 952 static void
 953 ilo_transfer_flush_region(struct pipe_context *pipe,
 954                           struct pipe_transfer *transfer,
 955                           const struct pipe_box *box)
 956 {
 957 }
 958
 959 static void
 960 ilo_transfer_unmap(struct pipe_context *pipe,
 961                    struct pipe_transfer *transfer)
 962 {
 963    struct ilo_context *ilo = ilo_context(pipe);
 964    struct ilo_transfer *xfer = ilo_transfer(transfer);
 965
 966    if (xfer->base.resource->target == PIPE_BUFFER)
 967       buf_unmap(ilo, xfer);
 968    else
 969       tex_unmap(ilo, xfer);
 970
 971    pipe_resource_reference(&xfer->base.resource, NULL);
 972
 973    util_slab_free(&ilo->transfer_mempool, xfer);
 974 }
 975
 976 static void *
 977 ilo_transfer_map(struct pipe_context *pipe,
 978                  struct pipe_resource *res,
 979                  unsigned level,
 980                  unsigned usage,
 981                  const struct pipe_box *box,
 982                  struct pipe_transfer **transfer)
 983 {
 984    struct ilo_context *ilo = ilo_context(pipe);
 985    struct ilo_transfer *xfer;
 986    bool success;
 987
 988    xfer = util_slab_alloc(&ilo->transfer_mempool);
 989    if (!xfer) {
 990       *transfer = NULL;
 991       return NULL;
 992    }
 993
 994    xfer->base.resource = NULL;
 995    pipe_resource_reference(&xfer->base.resource, res);
 996    xfer->base.level = level;
 997    xfer->base.usage = usage;
 998    xfer->base.box = *box;
 999
1000    if (res->target == PIPE_BUFFER)
1001       success = buf_map(ilo, xfer);
1002    else
1003       success = tex_map(ilo, xfer);
1004
1005    if (!success) {
1006       pipe_resource_reference(&xfer->base.resource, NULL);
1007       FREE(xfer);
1008       *transfer = NULL;
1009       return NULL;
1010    }
1011
1012    *transfer = &xfer->base;
1013
1014    return xfer->ptr;
1015 }
1016
1017 static void
1018 ilo_transfer_inline_write(struct pipe_context *pipe,
1019                           struct pipe_resource *res,
1020                           unsigned level,
1021                           unsigned usage,
1022                           const struct pipe_box *box,
1023                           const void *data,
1024                           unsigned stride,
1025                           unsigned layer_stride)
1026 {
1027    if (likely(res->target == PIPE_BUFFER) &&
1028        !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
1029       /* they should specify just an offset and a size */
1030       assert(level == 0);
1031       assert(box->y == 0);
1032       assert(box->z == 0);
1033       assert(box->height == 1);
1034       assert(box->depth == 1);
1035
1036       buf_pwrite(ilo_context(pipe), ilo_buffer(res),
1037             usage, box->x, box->width, data);
1038    }
1039    else {
1040       u_default_transfer_inline_write(pipe, res,
1041             level, usage, box, data, stride, layer_stride);
1042    }
1043 }
1044
1045 /**
1046  * Initialize transfer-related functions.
1047  */
1048 void
1049 ilo_init_transfer_functions(struct ilo_context *ilo)
1050 {
1051    ilo->base.transfer_map = ilo_transfer_map;
1052    ilo->base.transfer_flush_region = ilo_transfer_flush_region;
1053    ilo->base.transfer_unmap = ilo_transfer_unmap;
1054    ilo->base.transfer_inline_write = ilo_transfer_inline_write;
1055 }