src/gallium/drivers/ilo/ilo_resource.c

   1 /*
   2  * Mesa 3-D graphics library
   3  *
   4  * Copyright (C) 2012-2013 LunarG, Inc.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included
  14  * in all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22  * DEALINGS IN THE SOFTWARE.
  23  *
  24  * Authors:
  25  *    Chia-I Wu <olv@lunarg.com>
  26  */
  27
  28 #include "util/u_transfer.h"
  29
  30 #include "ilo_cp.h"
  31 #include "ilo_context.h"
  32 #include "ilo_screen.h"
  33 #include "ilo_resource.h"
  34
  35 static struct intel_bo *
  36 alloc_buf_bo(const struct ilo_resource *res)
  37 {
  38    struct ilo_screen *is = ilo_screen(res->base.screen);
  39    struct intel_bo *bo;
  40    const char *name;
  41    const unsigned size = res->bo_width;
  42
  43    switch (res->base.bind) {
  44    case PIPE_BIND_VERTEX_BUFFER:
  45       name = "vertex buffer";
  46       break;
  47    case PIPE_BIND_INDEX_BUFFER:
  48       name = "index buffer";
  49       break;
  50    case PIPE_BIND_CONSTANT_BUFFER:
  51       name = "constant buffer";
  52       break;
  53    case PIPE_BIND_STREAM_OUTPUT:
  54       name = "stream output";
  55       break;
  56    default:
  57       name = "unknown buffer";
  58       break;
  59    }
  60
  61    /* this is what a buffer supposed to be like */
  62    assert(res->bo_width * res->bo_height * res->bo_cpp == size);
  63    assert(res->tiling == INTEL_TILING_NONE);
  64    assert(res->bo_stride == 0);
  65
  66    if (res->handle) {
  67       bo = is->winsys->import_handle(is->winsys, name,
  68             res->bo_width, res->bo_height, res->bo_cpp, res->handle);
  69
  70       /* since the bo is shared to us, make sure it meets the expectations */
  71       if (bo) {
  72          assert(bo->get_size(res->bo) == size);
  73          assert(bo->get_tiling(res->bo) == res->tiling);
  74          assert(bo->get_pitch(res->bo) == res->bo_stride);
  75       }
  76    }
  77    else {
  78       bo = is->winsys->alloc_buffer(is->winsys, name, size, 0);
  79    }
  80
  81    return bo;
  82 }
  83
  84 static struct intel_bo *
  85 alloc_tex_bo(const struct ilo_resource *res)
  86 {
  87    struct ilo_screen *is = ilo_screen(res->base.screen);
  88    struct intel_bo *bo;
  89    const char *name;
  90
  91    switch (res->base.target) {
  92    case PIPE_TEXTURE_1D:
  93       name = "1D texture";
  94       break;
  95    case PIPE_TEXTURE_2D:
  96       name = "2D texture";
  97       break;
  98    case PIPE_TEXTURE_3D:
  99       name = "3D texture";
 100       break;
 101    case PIPE_TEXTURE_CUBE:
 102       name = "cube texture";
 103       break;
 104    case PIPE_TEXTURE_RECT:
 105       name = "rectangle texture";
 106       break;
 107    case PIPE_TEXTURE_1D_ARRAY:
 108       name = "1D array texture";
 109       break;
 110    case PIPE_TEXTURE_2D_ARRAY:
 111       name = "2D array texture";
 112       break;
 113    case PIPE_TEXTURE_CUBE_ARRAY:
 114       name = "cube array texture";
 115       break;
 116    default:
 117       name ="unknown texture";
 118       break;
 119    }
 120
 121    if (res->handle) {
 122       bo = is->winsys->import_handle(is->winsys, name,
 123             res->bo_width, res->bo_height, res->bo_cpp, res->handle);
 124    }
 125    else {
 126       const bool for_render =
 127          (res->base.bind & (PIPE_BIND_DEPTH_STENCIL |
 128                             PIPE_BIND_RENDER_TARGET));
 129       const unsigned long flags =
 130          (for_render) ? INTEL_ALLOC_FOR_RENDER : 0;
 131
 132       bo = is->winsys->alloc(is->winsys, name,
 133             res->bo_width, res->bo_height, res->bo_cpp,
 134             res->tiling, flags);
 135    }
 136
 137    return bo;
 138 }
 139
 140 static bool
 141 realloc_bo(struct ilo_resource *res)
 142 {
 143    struct intel_bo *old_bo = res->bo;
 144
 145    /* a shared bo cannot be reallocated */
 146    if (old_bo && res->handle)
 147       return false;
 148
 149    if (res->base.target == PIPE_BUFFER)
 150       res->bo = alloc_buf_bo(res);
 151    else
 152       res->bo = alloc_tex_bo(res);
 153
 154    if (!res->bo) {
 155       res->bo = old_bo;
 156       return false;
 157    }
 158
 159    /* winsys may decide to use a different tiling */
 160    res->tiling = res->bo->get_tiling(res->bo);
 161    res->bo_stride = res->bo->get_pitch(res->bo);
 162
 163    if (old_bo)
 164       old_bo->unreference(old_bo);
 165
 166    return true;
 167 }
 168
 169 static void
 170 ilo_transfer_inline_write(struct pipe_context *pipe,
 171                           struct pipe_resource *r,
 172                           unsigned level,
 173                           unsigned usage,
 174                           const struct pipe_box *box,
 175                           const void *data,
 176                           unsigned stride,
 177                           unsigned layer_stride)
 178 {
 179    struct ilo_context *ilo = ilo_context(pipe);
 180    struct ilo_resource *res = ilo_resource(r);
 181    int offset, size;
 182    bool will_be_busy;
 183
 184    /*
 185     * Fall back to map(), memcpy(), and unmap().  We use this path for
 186     * unsynchronized write, as the buffer is likely to be busy and pwrite()
 187     * will stall.
 188     */
 189    if (unlikely(res->base.target != PIPE_BUFFER) ||
 190        (usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
 191       u_default_transfer_inline_write(pipe, r,
 192             level, usage, box, data, stride, layer_stride);
 193
 194       return;
 195    }
 196
 197    /*
 198     * XXX With hardware context support, the bo may be needed by GPU without
 199     * being referenced by ilo->cp->bo.  We have to flush unconditionally, and
 200     * that is bad.
 201     */
 202    if (ilo->cp->hw_ctx)
 203       ilo_cp_flush(ilo->cp);
 204
 205    will_be_busy = ilo->cp->bo->references(ilo->cp->bo, res->bo);
 206
 207    /* see if we can avoid stalling */
 208    if (will_be_busy || intel_bo_is_busy(res->bo)) {
 209       bool will_stall = true;
 210
 211       if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
 212          /* old data not needed so discard the old bo to avoid stalling */
 213          if (realloc_bo(res))
 214             will_stall = false;
 215       }
 216       else {
 217          /*
 218           * We could allocate a temporary bo to hold the data and emit
 219           * pipelined copy blit to move them to res->bo.  But for now, do
 220           * nothing.
 221           */
 222       }
 223
 224       /* flush to make bo busy (so that pwrite() stalls as it should be) */
 225       if (will_stall && will_be_busy)
 226          ilo_cp_flush(ilo->cp);
 227    }
 228
 229    /* they should specify just an offset and a size */
 230    assert(level == 0);
 231    assert(box->y == 0);
 232    assert(box->z == 0);
 233    assert(box->height == 1);
 234    assert(box->depth == 1);
 235    offset = box->x;
 236    size = box->width;
 237
 238    res->bo->pwrite(res->bo, offset, size, data);
 239 }
 240
 241 static void
 242 ilo_transfer_unmap(struct pipe_context *pipe,
 243                    struct pipe_transfer *transfer)
 244 {
 245    struct ilo_resource *res = ilo_resource(transfer->resource);
 246
 247    res->bo->unmap(res->bo);
 248
 249    pipe_resource_reference(&transfer->resource, NULL);
 250    FREE(transfer);
 251 }
 252
 253 static void
 254 ilo_transfer_flush_region(struct pipe_context *pipe,
 255                           struct pipe_transfer *transfer,
 256                           const struct pipe_box *box)
 257 {
 258 }
 259
 260 static bool
 261 map_resource(struct ilo_context *ilo, struct ilo_resource *res,
 262              unsigned usage)
 263 {
 264    struct ilo_screen *is = ilo_screen(res->base.screen);
 265    bool will_be_busy;
 266    int err;
 267
 268    /* simply map unsynchronized */
 269    if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
 270       err = res->bo->map_unsynchronized(res->bo);
 271       return !err;
 272    }
 273
 274    /*
 275     * XXX With hardware context support, the bo may be needed by GPU without
 276     * being referenced by ilo->cp->bo.  We have to flush unconditionally, and
 277     * that is bad.
 278     */
 279    if (ilo->cp->hw_ctx)
 280       ilo_cp_flush(ilo->cp);
 281
 282    will_be_busy = ilo->cp->bo->references(ilo->cp->bo, res->bo);
 283
 284    /* see if we can avoid stalling */
 285    if (will_be_busy || intel_bo_is_busy(res->bo)) {
 286       bool will_stall = true;
 287
 288       if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
 289          /* discard old bo and allocate a new one for mapping */
 290          if (realloc_bo(res))
 291             will_stall = false;
 292       }
 293       else if (usage & PIPE_TRANSFER_MAP_DIRECTLY) {
 294          /* nothing we can do */
 295       }
 296       else if (usage & PIPE_TRANSFER_FLUSH_EXPLICIT) {
 297          /*
 298           * We could allocate and return a system buffer here.  When a region
 299           * of the buffer is explicitly flushed, we pwrite() the region to a
 300           * temporary bo and emit pipelined copy blit.
 301           *
 302           * For now, do nothing.
 303           */
 304       }
 305       else if (usage & PIPE_TRANSFER_DISCARD_RANGE) {
 306          /*
 307           * We could allocate a temporary bo for mapping, and emit pipelined
 308           * copy blit upon unmapping.
 309           *
 310           * For now, do nothing.
 311           */
 312       }
 313
 314       if (will_stall) {
 315          if (usage & PIPE_TRANSFER_DONTBLOCK)
 316             return false;
 317
 318          /* flush to make bo busy (so that map() stalls as it should be) */
 319          if (will_be_busy)
 320             ilo_cp_flush(ilo->cp);
 321       }
 322    }
 323
 324    /* prefer map() when there is the last-level cache */
 325    if (res->tiling == INTEL_TILING_NONE &&
 326        (is->dev.has_llc || (usage & PIPE_TRANSFER_READ)))
 327       err = res->bo->map(res->bo, (usage & PIPE_TRANSFER_WRITE));
 328    else
 329       err = res->bo->map_gtt(res->bo);
 330
 331    return !err;
 332 }
 333
 334 static void *
 335 ilo_transfer_map(struct pipe_context *pipe,
 336                  struct pipe_resource *r,
 337                  unsigned level,
 338                  unsigned usage,
 339                  const struct pipe_box *box,
 340                  struct pipe_transfer **transfer)
 341 {
 342    struct ilo_context *ilo = ilo_context(pipe);
 343    struct ilo_resource *res = ilo_resource(r);
 344    struct pipe_transfer *xfer;
 345    void *ptr;
 346    int x, y;
 347
 348    xfer = MALLOC_STRUCT(pipe_transfer);
 349    if (!xfer)
 350       return NULL;
 351
 352    if (!map_resource(ilo, res, usage)) {
 353       FREE(xfer);
 354       return NULL;
 355    }
 356
 357    /* init transfer */
 358    xfer->resource = NULL;
 359    pipe_resource_reference(&xfer->resource, &res->base);
 360    xfer->level = level;
 361    xfer->usage = usage;
 362    xfer->box = *box;
 363    /* stride for a block row, not a texel row */
 364    xfer->stride = res->bo_stride;
 365
 366    /*
 367     * we can walk through layers when the resource is a texture array or
 368     * when this is the first level of a 3D texture being mapped
 369     */
 370    if (res->base.array_size > 1 ||
 371        (res->base.target == PIPE_TEXTURE_3D && level == 0)) {
 372       const unsigned qpitch =
 373          res->slice_offsets[level][1].y - res->slice_offsets[level][0].y;
 374
 375       assert(qpitch % res->block_height == 0);
 376       xfer->layer_stride = (qpitch / res->block_height) * xfer->stride;
 377    }
 378    else {
 379       xfer->layer_stride = 0;
 380    }
 381
 382    x = res->slice_offsets[level][box->z].x;
 383    y = res->slice_offsets[level][box->z].y;
 384
 385    x += box->x;
 386    y += box->y;
 387
 388    /* in blocks */
 389    assert(x % res->block_width == 0 && y % res->block_height == 0);
 390    x /= res->block_width;
 391    y /= res->block_height;
 392
 393    ptr = res->bo->get_virtual(res->bo);
 394    ptr += y * res->bo_stride + x * res->bo_cpp;
 395
 396    *transfer = xfer;
 397
 398    return ptr;
 399 }
 400
 401 static bool
 402 alloc_slice_offsets(struct ilo_resource *res)
 403 {
 404    int depth, lv;
 405
 406    /* sum the depths of all levels */
 407    depth = 0;
 408    for (lv = 0; lv <= res->base.last_level; lv++)
 409       depth += u_minify(res->base.depth0, lv);
 410
 411    /*
 412     * There are (depth * res->base.array_size) slices.  Either depth is one
 413     * (non-3D) or res->base.array_size is one (non-array), but it does not
 414     * matter.
 415     */
 416    res->slice_offsets[0] =
 417       CALLOC(depth * res->base.array_size, sizeof(res->slice_offsets[0][0]));
 418    if (!res->slice_offsets[0])
 419       return false;
 420
 421    /* point to the respective positions in the buffer */
 422    for (lv = 1; lv <= res->base.last_level; lv++) {
 423       res->slice_offsets[lv] = res->slice_offsets[lv - 1] +
 424          u_minify(res->base.depth0, lv - 1) * res->base.array_size;
 425    }
 426
 427    return true;
 428 }
 429
 430 static void
 431 free_slice_offsets(struct ilo_resource *res)
 432 {
 433    int lv;
 434
 435    FREE(res->slice_offsets[0]);
 436    for (lv = 0; lv <= res->base.last_level; lv++)
 437       res->slice_offsets[lv] = NULL;
 438 }
 439
 440 struct layout_tex_info {
 441    bool compressed;
 442    int block_width, block_height;
 443    int align_i, align_j;
 444    int qpitch;
 445
 446    struct {
 447       int w, h, d;
 448    } sizes[PIPE_MAX_TEXTURE_LEVELS];
 449 };
 450
 451 /**
 452  * Prepare for texture layout.
 453  */
 454 static void
 455 layout_tex_init(const struct ilo_resource *res, struct layout_tex_info *info)
 456 {
 457    struct ilo_screen *is = ilo_screen(res->base.screen);
 458    const enum intel_tiling_mode tiling = res->tiling;
 459    const struct pipe_resource *templ = &res->base;
 460    int last_level, lv;
 461
 462    memset(info, 0, sizeof(*info));
 463
 464    info->compressed = util_format_is_compressed(templ->format);
 465    info->block_width = util_format_get_blockwidth(templ->format);
 466    info->block_height = util_format_get_blockheight(templ->format);
 467
 468    /*
 469     * From the Sandy Bridge PRM, volume 1 part 1, page 113:
 470     *
 471     *     "surface format           align_i     align_j
 472     *      YUV 4:2:2 formats        4           *see below
 473     *      BC1-5                    4           4
 474     *      FXT1                     8           4
 475     *      all other formats        4           *see below"
 476     *
 477     *     "- align_j = 4 for any depth buffer
 478     *      - align_j = 2 for separate stencil buffer
 479     *      - align_j = 4 for any render target surface is multisampled (4x)
 480     *      - align_j = 4 for any render target surface with Surface Vertical
 481     *        Alignment = VALIGN_4
 482     *      - align_j = 2 for any render target surface with Surface Vertical
 483     *        Alignment = VALIGN_2
 484     *      - align_j = 2 for all other render target surface
 485     *      - align_j = 2 for any sampling engine surface with Surface Vertical
 486     *        Alignment = VALIGN_2
 487     *      - align_j = 4 for any sampling engine surface with Surface Vertical
 488     *        Alignment = VALIGN_4"
 489     *
 490     * From the Sandy Bridge PRM, volume 4 part 1, page 86:
 491     *
 492     *     "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
 493     *      the Surface Format is 96 bits per element (BPE)."
 494     *
 495     * They can be rephrased as
 496     *
 497     *                                  align_i        align_j
 498     *   compressed formats             block width    block height
 499     *   PIPE_FORMAT_S8_UINT            4              2
 500     *   other depth/stencil formats    4              4
 501     *   4x multisampled                4              4
 502     *   bpp 96                         4              2
 503     *   others                         4              2 or 4
 504     */
 505
 506    /*
 507     * From the Ivy Bridge PRM, volume 1 part 1, page 110:
 508     *
 509     *     "surface defined by      surface format     align_i     align_j
 510     *      3DSTATE_DEPTH_BUFFER    D16_UNORM          8           4
 511     *                              not D16_UNORM      4           4
 512     *      3DSTATE_STENCIL_BUFFER  N/A                8           8
 513     *      SURFACE_STATE           BC*, ETC*, EAC*    4           4
 514     *                              FXT1               8           4
 515     *                              all others         (set by SURFACE_STATE)"
 516     *
 517     * From the Ivy Bridge PRM, volume 4 part 1, page 63:
 518     *
 519     *     "- This field (Surface Vertical Aligment) is intended to be set to
 520     *        VALIGN_4 if the surface was rendered as a depth buffer, for a
 521     *        multisampled (4x) render target, or for a multisampled (8x)
 522     *        render target, since these surfaces support only alignment of 4.
 523     *      - Use of VALIGN_4 for other surfaces is supported, but uses more
 524     *        memory.
 525     *      - This field must be set to VALIGN_4 for all tiled Y Render Target
 526     *        surfaces.
 527     *      - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
 528     *        YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
 529     *      - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
 530     *        must be set to VALIGN_4."
 531     *      - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
 532     *
 533     *     "- This field (Surface Horizontal Aligment) is intended to be set to
 534     *        HALIGN_8 only if the surface was rendered as a depth buffer with
 535     *        Z16 format or a stencil buffer, since these surfaces support only
 536     *        alignment of 8.
 537     *      - Use of HALIGN_8 for other surfaces is supported, but uses more
 538     *        memory.
 539     *      - This field must be set to HALIGN_4 if the Surface Format is BC*.
 540     *      - This field must be set to HALIGN_8 if the Surface Format is
 541     *        FXT1."
 542     *
 543     * They can be rephrased as
 544     *
 545     *                                  align_i        align_j
 546     *  compressed formats              block width    block height
 547     *  PIPE_FORMAT_Z16_UNORM           8              4
 548     *  PIPE_FORMAT_S8_UINT             8              8
 549     *  other depth/stencil formats     4 or 8         4
 550     *  2x or 4x multisampled           4 or 8         4
 551     *  tiled Y                         4 or 8         4 (if rt)
 552     *  PIPE_FORMAT_R32G32B32_FLOAT     4 or 8         2
 553     *  others                          4 or 8         2 or 4
 554     */
 555
 556    if (info->compressed) {
 557       /* this happens to be the case */
 558       info->align_i = info->block_width;
 559       info->align_j = info->block_height;
 560    }
 561    else if (util_format_is_depth_or_stencil(templ->format)) {
 562       if (is->dev.gen >= ILO_GEN(7)) {
 563          switch (templ->format) {
 564          case PIPE_FORMAT_Z16_UNORM:
 565             info->align_i = 8;
 566             info->align_j = 4;
 567             break;
 568          case PIPE_FORMAT_S8_UINT:
 569             info->align_i = 8;
 570             info->align_j = 8;
 571             break;
 572          default:
 573             /*
 574              * From the Ivy Bridge PRM, volume 2 part 1, page 319:
 575              *
 576              *     "The 3 LSBs of both offsets (Depth Coordinate Offset Y and
 577              *      Depth Coordinate Offset X) must be zero to ensure correct
 578              *      alignment"
 579              *
 580              * We will make use of them and setting align_i to 8 help us meet
 581              * the requirement.
 582              */
 583             info->align_i = (templ->last_level > 0) ? 8 : 4;
 584             info->align_j = 4;
 585             break;
 586          }
 587       }
 588       else {
 589          switch (templ->format) {
 590          case PIPE_FORMAT_S8_UINT:
 591             info->align_i = 4;
 592             info->align_j = 2;
 593             break;
 594          default:
 595             info->align_i = 4;
 596             info->align_j = 4;
 597             break;
 598          }
 599       }
 600    }
 601    else {
 602       const bool valign_4 = (templ->nr_samples > 1) ||
 603          (is->dev.gen >= ILO_GEN(7) &&
 604           (templ->bind & PIPE_BIND_RENDER_TARGET) &&
 605           tiling == INTEL_TILING_Y);
 606
 607       if (valign_4)
 608          assert(util_format_get_blocksizebits(templ->format) != 96);
 609
 610       info->align_i = 4;
 611       info->align_j = (valign_4) ? 4 : 2;
 612    }
 613
 614    /*
 615     * the fact that align i and j are multiples of block width and height
 616     * respectively is what makes the size of the bo a multiple of the block
 617     * size, slices start at block boundaries, and many of the computations
 618     * work.
 619     */
 620    assert(info->align_i % info->block_width == 0);
 621    assert(info->align_j % info->block_height == 0);
 622
 623    /* make sure align() works */
 624    assert(util_is_power_of_two(info->align_i) &&
 625           util_is_power_of_two(info->align_j));
 626    assert(util_is_power_of_two(info->block_width) &&
 627           util_is_power_of_two(info->block_height));
 628
 629    last_level = templ->last_level;
 630    /* need at least 2 levels to compute qpitch below */
 631    if (templ->array_size > 1 && last_level == 0 &&
 632        templ->format != PIPE_FORMAT_S8_UINT)
 633       last_level++;
 634
 635    /* compute mip level sizes */
 636    for (lv = 0; lv <= last_level; lv++) {
 637       int w, h, d;
 638
 639       w = u_minify(templ->width0, lv);
 640       h = u_minify(templ->height0, lv);
 641       d = u_minify(templ->depth0, lv);
 642
 643       /*
 644        * From the Sandy Bridge PRM, volume 1 part 1, page 114:
 645        *
 646        *     "The dimensions of the mip maps are first determined by applying
 647        *      the sizing algorithm presented in Non-Power-of-Two Mipmaps
 648        *      above. Then, if necessary, they are padded out to compression
 649        *      block boundaries."
 650        */
 651       w = align(w, info->block_width);
 652       h = align(h, info->block_height);
 653
 654       /*
 655        * From the Sandy Bridge PRM, volume 1 part 1, page 111:
 656        *
 657        *     "If the surface is multisampled (4x), these values must be
 658        *      adjusted as follows before proceeding:
 659        *
 660        *        W_L = ceiling(W_L / 2) * 4
 661        *        H_L = ceiling(H_L / 2) * 4"
 662        */
 663       if (templ->nr_samples > 1) {
 664          w = align(w, 2) * 2;
 665          h = align(h, 2) * 2;
 666       }
 667
 668       info->sizes[lv].w = w;
 669       info->sizes[lv].h = h;
 670       info->sizes[lv].d = d;
 671    }
 672
 673    if (templ->array_size > 1) {
 674       const int h0 = align(info->sizes[0].h, info->align_j);
 675
 676       if (templ->format == PIPE_FORMAT_S8_UINT) {
 677          info->qpitch = h0;
 678       }
 679       else {
 680          const int h1 = align(info->sizes[1].h, info->align_j);
 681
 682          /*
 683           * From the Sandy Bridge PRM, volume 1 part 1, page 115:
 684           *
 685           *     "The following equation is used for surface formats other than
 686           *      compressed textures:
 687           *
 688           *        QPitch = (h0 + h1 + 11j)"
 689           *
 690           *     "The equation for compressed textures (BC* and FXT1 surface
 691           *      formats) follows:
 692           *
 693           *        QPitch = (h0 + h1 + 11j) / 4"
 694           *
 695           *     "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than
 696           *      the value calculated in the equation above, for every other
 697           *      odd Surface Height starting from 1 i.e. 1,5,9,13"
 698           *
 699           * To access the N-th slice, an offset of (Stride * QPitch * N) is
 700           * added to the base address.  The PRM divides QPitch by 4 for
 701           * compressed formats because the block height for those formats are
 702           * 4, and it wants QPitch to mean the number of memory rows, as
 703           * opposed to texel rows, between slices.  Since we use texel rows in
 704           * res->slice_offsets, we do not need to divide QPitch by 4.
 705           */
 706          info->qpitch = h0 + h1 +
 707             ((is->dev.gen >= ILO_GEN(7)) ? 12 : 11) * info->align_j;
 708
 709          if (is->dev.gen == ILO_GEN(6) && templ->nr_samples > 1 &&
 710                templ->height0 % 4 == 1)
 711             info->qpitch += 4;
 712       }
 713    }
 714 }
 715
 716 /**
 717  * Layout a 2D texture.
 718  */
 719 static void
 720 layout_tex_2d(struct ilo_resource *res, const struct layout_tex_info *info)
 721 {
 722    const struct pipe_resource *templ = &res->base;
 723    unsigned int level_x, level_y;
 724    int lv;
 725
 726    res->bo_width = 0;
 727    res->bo_height = 0;
 728
 729    level_x = 0;
 730    level_y = 0;
 731    for (lv = 0; lv <= templ->last_level; lv++) {
 732       const unsigned int level_w = info->sizes[lv].w;
 733       const unsigned int level_h = info->sizes[lv].h;
 734       int slice;
 735
 736       for (slice = 0; slice < templ->array_size; slice++) {
 737          res->slice_offsets[lv][slice].x = level_x;
 738          /* slices are qpitch apart in Y-direction */
 739          res->slice_offsets[lv][slice].y = level_y + info->qpitch * slice;
 740       }
 741
 742       /* extend the size of the monolithic bo to cover this mip level */
 743       if (res->bo_width < level_x + level_w)
 744          res->bo_width = level_x + level_w;
 745       if (res->bo_height < level_y + level_h)
 746          res->bo_height = level_y + level_h;
 747
 748       /* MIPLAYOUT_BELOW */
 749       if (lv == 1)
 750          level_x += align(level_w, info->align_i);
 751       else
 752          level_y += align(level_h, info->align_j);
 753    }
 754
 755    /* we did not take slices into consideration in the computation above */
 756    res->bo_height += info->qpitch * (templ->array_size - 1);
 757 }
 758
 759 /**
 760  * Layout a 3D texture.
 761  */
 762 static void
 763 layout_tex_3d(struct ilo_resource *res, const struct layout_tex_info *info)
 764 {
 765    const struct pipe_resource *templ = &res->base;
 766    unsigned int level_y;
 767    int lv;
 768
 769    res->bo_width = 0;
 770    res->bo_height = 0;
 771
 772    level_y = 0;
 773    for (lv = 0; lv <= templ->last_level; lv++) {
 774       const unsigned int level_w = info->sizes[lv].w;
 775       const unsigned int level_h = info->sizes[lv].h;
 776       const unsigned int level_d = info->sizes[lv].d;
 777       const unsigned int slice_pitch = align(level_w, info->align_i);
 778       const unsigned int slice_qpitch = align(level_h, info->align_j);
 779       const unsigned int num_slices_per_row = 1 << lv;
 780       int slice;
 781
 782       for (slice = 0; slice < level_d; slice += num_slices_per_row) {
 783          int i;
 784
 785          for (i = 0; i < num_slices_per_row && slice + i < level_d; i++) {
 786             res->slice_offsets[lv][slice + i].x = slice_pitch * i;
 787             res->slice_offsets[lv][slice + i].y = level_y;
 788          }
 789
 790          /* move on to the next slice row */
 791          level_y += slice_qpitch;
 792       }
 793
 794       /* rightmost slice */
 795       slice = MIN2(num_slices_per_row, level_d) - 1;
 796
 797       /* extend the size of the monolithic bo to cover this slice */
 798       if (res->bo_width < slice_pitch * slice + level_w)
 799          res->bo_width = slice_pitch * slice + level_w;
 800       if (lv == templ->last_level)
 801          res->bo_height = (level_y - slice_qpitch) + level_h;
 802    }
 803 }
 804
 805 /**
 806  * Guess the texture size.  For large textures, the errors are relative small.
 807  */
 808 static size_t
 809 guess_tex_size(const struct pipe_resource *templ,
 810                enum intel_tiling_mode tiling)
 811 {
 812    int bo_width, bo_height, bo_stride;
 813
 814    /* HALIGN_8 and VALIGN_4 */
 815    bo_width = align(templ->width0, 8);
 816    bo_height = align(templ->height0, 4);
 817
 818    if (templ->target == PIPE_TEXTURE_3D) {
 819       const int num_rows = util_next_power_of_two(templ->depth0);
 820       int lv, sum;
 821
 822       sum = bo_height * templ->depth0;
 823       for (lv = 1; lv <= templ->last_level; lv++)
 824          sum += u_minify(bo_height, lv) * u_minify(num_rows, lv);
 825
 826       bo_height = sum;
 827    }
 828    else if (templ->last_level > 0) {
 829       /* MIPLAYOUT_BELOW, ignore qpich */
 830       bo_height = (bo_height + u_minify(bo_height, 1)) * templ->array_size;
 831    }
 832
 833    bo_stride = util_format_get_stride(templ->format, bo_width);
 834
 835    switch (tiling) {
 836    case INTEL_TILING_X:
 837       bo_stride = align(bo_stride, 512);
 838       bo_height = align(bo_height, 8);
 839       break;
 840    case INTEL_TILING_Y:
 841       bo_stride = align(bo_stride, 128);
 842       bo_height = align(bo_height, 32);
 843       break;
 844    default:
 845       bo_height = align(bo_height, 2);
 846       break;
 847    }
 848
 849    return util_format_get_2d_size(templ->format, bo_stride, bo_height);
 850 }
 851
 852 static enum intel_tiling_mode
 853 get_tex_tiling(const struct ilo_resource *res)
 854 {
 855    const struct pipe_resource *templ = &res->base;
 856
 857    /*
 858     * From the Sandy Bridge PRM, volume 1 part 2, page 32:
 859     *
 860     *     "Display/Overlay   Y-Major not supported.
 861     *                        X-Major required for Async Flips"
 862     */
 863    if (unlikely(templ->bind & PIPE_BIND_SCANOUT))
 864       return INTEL_TILING_X;
 865
 866    /*
 867     * From the Sandy Bridge PRM, volume 3 part 2, page 158:
 868     *
 869     *     "The cursor surface address must be 4K byte aligned. The cursor must
 870     *      be in linear memory, it cannot be tiled."
 871     */
 872    if (unlikely(templ->bind & PIPE_BIND_CURSOR))
 873       return INTEL_TILING_NONE;
 874
 875    /*
 876     * From the Sandy Bridge PRM, volume 2 part 1, page 318:
 877     *
 878     *     "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
 879     *      Depth Buffer is not supported."
 880     *
 881     *     "The Depth Buffer, if tiled, must use Y-Major tiling."
 882     */
 883    if (templ->bind & PIPE_BIND_DEPTH_STENCIL)
 884       return INTEL_TILING_Y;
 885
 886    if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) {
 887       enum intel_tiling_mode tiling = INTEL_TILING_NONE;
 888
 889       /*
 890        * From the Sandy Bridge PRM, volume 1 part 2, page 32:
 891        *
 892        *     "NOTE: 128BPE Format Color buffer ( render target ) MUST be
 893        *      either TileX or Linear."
 894        *
 895        * Also, heuristically set a minimum width/height for enabling tiling.
 896        */
 897       if (util_format_get_blocksizebits(templ->format) == 128 &&
 898           (templ->bind & PIPE_BIND_RENDER_TARGET) && templ->width0 >= 64)
 899          tiling = INTEL_TILING_X;
 900       else if ((templ->width0 >= 32 && templ->height0 >= 16) ||
 901                (templ->width0 >= 16 && templ->height0 >= 32))
 902          tiling = INTEL_TILING_Y;
 903
 904       /* make sure the bo can be mapped through GTT if tiled */
 905       if (tiling != INTEL_TILING_NONE) {
 906          /*
 907           * Usually only the first 256MB of the GTT is mappable.
 908           *
 909           * See also how intel_context::max_gtt_map_object_size is calculated.
 910           */
 911          const size_t mappable_gtt_size = 256 * 1024 * 1024;
 912          const size_t size = guess_tex_size(templ, tiling);
 913
 914          /* be conservative */
 915          if (size > mappable_gtt_size / 4)
 916             tiling = INTEL_TILING_NONE;
 917       }
 918
 919       return tiling;
 920    }
 921
 922    return INTEL_TILING_NONE;
 923 }
 924
 925 static void
 926 init_texture(struct ilo_resource *res)
 927 {
 928    const enum pipe_format format = res->base.format;
 929    struct layout_tex_info info;
 930
 931    /* determine tiling first as it may affect the layout */
 932    res->tiling = get_tex_tiling(res);
 933
 934    layout_tex_init(res, &info);
 935
 936    res->compressed = info.compressed;
 937    res->block_width = info.block_width;
 938    res->block_height = info.block_height;
 939    res->halign_8 = (info.align_i == 8);
 940    res->valign_4 = (info.align_j == 4);
 941
 942    switch (res->base.target) {
 943    case PIPE_TEXTURE_1D:
 944    case PIPE_TEXTURE_2D:
 945    case PIPE_TEXTURE_CUBE:
 946    case PIPE_TEXTURE_RECT:
 947    case PIPE_TEXTURE_1D_ARRAY:
 948    case PIPE_TEXTURE_2D_ARRAY:
 949    case PIPE_TEXTURE_CUBE_ARRAY:
 950       layout_tex_2d(res, &info);
 951       break;
 952    case PIPE_TEXTURE_3D:
 953       layout_tex_3d(res, &info);
 954       break;
 955    default:
 956       assert(!"unknown resource target");
 957       break;
 958    }
 959
 960    /* in blocks */
 961    assert(res->bo_width % info.block_width == 0);
 962    assert(res->bo_height % info.block_height == 0);
 963    res->bo_width /= info.block_width;
 964    res->bo_height /= info.block_height;
 965    res->bo_cpp = util_format_get_blocksize(format);
 966 }
 967
 968 static void
 969 init_buffer(struct ilo_resource *res)
 970 {
 971    res->compressed = false;
 972    res->block_width = 1;
 973    res->block_height = 1;
 974    res->halign_8 = false;
 975    res->valign_4 = false;
 976
 977    res->bo_width = res->base.width0;
 978    res->bo_height = 1;
 979    res->bo_cpp = 1;
 980    res->bo_stride = 0;
 981    res->tiling = INTEL_TILING_NONE;
 982 }
 983
 984 static struct pipe_resource *
 985 create_resource(struct pipe_screen *screen,
 986                 const struct pipe_resource *templ,
 987                 struct winsys_handle *handle)
 988 {
 989    struct ilo_resource *res;
 990
 991    res = CALLOC_STRUCT(ilo_resource);
 992    if (!res)
 993       return NULL;
 994
 995    res->base = *templ;
 996    res->base.screen = screen;
 997    pipe_reference_init(&res->base.reference, 1);
 998    res->handle = handle;
 999
1000    if (!alloc_slice_offsets(res)) {
1001       FREE(res);
1002       return NULL;
1003    }
1004
1005    if (templ->target == PIPE_BUFFER)
1006       init_buffer(res);
1007    else
1008       init_texture(res);
1009
1010    if (!realloc_bo(res)) {
1011       free_slice_offsets(res);
1012       FREE(res);
1013       return NULL;
1014    }
1015
1016    return &res->base;
1017 }
1018
1019 static boolean
1020 ilo_can_create_resource(struct pipe_screen *screen,
1021                         const struct pipe_resource *templ)
1022 {
1023    /*
1024     * We do not know if we will fail until we try to allocate the bo.
1025     * So just set a limit on the texture size.
1026     */
1027    const size_t max_size = 1 * 1024 * 1024 * 1024;
1028    const size_t size = guess_tex_size(templ, INTEL_TILING_Y);
1029
1030    return (size <= max_size);
1031 }
1032
1033 static struct pipe_resource *
1034 ilo_resource_create(struct pipe_screen *screen,
1035                     const struct pipe_resource *templ)
1036 {
1037    return create_resource(screen, templ, NULL);
1038 }
1039
1040 static struct pipe_resource *
1041 ilo_resource_from_handle(struct pipe_screen *screen,
1042                          const struct pipe_resource *templ,
1043                          struct winsys_handle *handle)
1044 {
1045    return create_resource(screen, templ, handle);
1046 }
1047
1048 static boolean
1049 ilo_resource_get_handle(struct pipe_screen *screen,
1050                         struct pipe_resource *r,
1051                         struct winsys_handle *handle)
1052 {
1053    struct ilo_resource *res = ilo_resource(r);
1054    int err;
1055
1056    err = res->bo->export_handle(res->bo, handle);
1057
1058    return !err;
1059 }
1060
1061 static void
1062 ilo_resource_destroy(struct pipe_screen *screen,
1063                      struct pipe_resource *r)
1064 {
1065    struct ilo_resource *res = ilo_resource(r);
1066
1067    free_slice_offsets(res);
1068    res->bo->unreference(res->bo);
1069    FREE(res);
1070 }
1071
1072 /**
1073  * Initialize resource-related functions.
1074  */
1075 void
1076 ilo_init_resource_functions(struct ilo_screen *is)
1077 {
1078    is->base.can_create_resource = ilo_can_create_resource;
1079    is->base.resource_create = ilo_resource_create;
1080    is->base.resource_from_handle = ilo_resource_from_handle;
1081    is->base.resource_get_handle = ilo_resource_get_handle;
1082    is->base.resource_destroy = ilo_resource_destroy;
1083 }
1084
1085 /**
1086  * Initialize transfer-related functions.
1087  */
1088 void
1089 ilo_init_transfer_functions(struct ilo_context *ilo)
1090 {
1091    ilo->base.transfer_map = ilo_transfer_map;
1092    ilo->base.transfer_flush_region = ilo_transfer_flush_region;
1093    ilo->base.transfer_unmap = ilo_transfer_unmap;
1094    ilo->base.transfer_inline_write = ilo_transfer_inline_write;
1095 }
1096
1097 /**
1098  * Return the offset (in bytes) to a slice within the bo.
1099  *
1100  * When tile_aligned is true, the offset is to the tile containing the start
1101  * address of the slice.  x_offset and y_offset are offsets (in pixels) from
1102  * the tile start to slice start.  x_offset is always a multiple of 4 and
1103  * y_offset is always a multiple of 2.
1104  */
1105 unsigned
1106 ilo_resource_get_slice_offset(const struct ilo_resource *res,
1107                               int level, int slice, bool tile_aligned,
1108                               unsigned *x_offset, unsigned *y_offset)
1109 {
1110    const unsigned x = res->slice_offsets[level][slice].x / res->block_width;
1111    const unsigned y = res->slice_offsets[level][slice].y / res->block_height;
1112    unsigned tile_w, tile_h, tile_size, row_size;
1113    unsigned slice_offset;
1114
1115    /* see the Sandy Bridge PRM, volume 1 part 2, page 24 */
1116
1117    switch (res->tiling) {
1118    case INTEL_TILING_NONE:
1119       tile_w = res->bo_cpp;
1120       tile_h = 1;
1121       break;
1122    case INTEL_TILING_X:
1123       tile_w = 512;
1124       tile_h = 8;
1125       break;
1126    case INTEL_TILING_Y:
1127       tile_w = 128;
1128       tile_h = 32;
1129       break;
1130    default:
1131       assert(!"unknown tiling");
1132       tile_w = res->bo_cpp;
1133       tile_h = 1;
1134       break;
1135    }
1136
1137    tile_size = tile_w * tile_h;
1138    row_size = res->bo_stride * tile_h;
1139
1140    /*
1141     * for non-tiled resources, this is equivalent to
1142     *
1143     *   slice_offset = y * res->bo_stride + x * res->bo_cpp;
1144     */
1145    slice_offset =
1146       row_size * (y / tile_h) + tile_size * (x * res->bo_cpp / tile_w);
1147
1148    /*
1149     * Since res->bo_stride is a multiple of tile_w, slice_offset should be
1150     * aligned at this point.
1151     */
1152    assert(slice_offset % tile_size == 0);
1153
1154    if (tile_aligned) {
1155       /*
1156        * because of the possible values of align_i and align_j in
1157        * layout_tex_init(), x_offset must be a multiple of 4 and y_offset must
1158        * be a multiple of 2.
1159        */
1160       if (x_offset) {
1161          assert(tile_w % res->bo_cpp == 0);
1162          *x_offset = (x % (tile_w / res->bo_cpp)) * res->block_width;
1163          assert(*x_offset % 4 == 0);
1164       }
1165       if (y_offset) {
1166          *y_offset = (y % tile_h) * res->block_height;
1167          assert(*y_offset % 2 == 0);
1168       }
1169    }
1170    else {
1171       const unsigned tx = (x * res->bo_cpp) % tile_w;
1172       const unsigned ty = y % tile_h;
1173
1174       switch (res->tiling) {
1175       case INTEL_TILING_NONE:
1176          assert(tx == 0 && ty == 0);
1177          break;
1178       case INTEL_TILING_X:
1179          slice_offset += tile_w * ty + tx;
1180          break;
1181       case INTEL_TILING_Y:
1182          slice_offset += tile_h * 16 * (tx / 16) + ty * 16 + (tx % 16);
1183          break;
1184       }
1185
1186       if (x_offset)
1187          *x_offset = 0;
1188       if (y_offset)
1189          *y_offset = 0;
1190    }
1191
1192    return slice_offset;
1193 }