src/gallium/drivers/nouveau/nouveau_buffer.c

   1
   2 #include "util/u_inlines.h"
   3 #include "util/u_memory.h"
   4 #include "util/u_math.h"
   5 #include "util/u_surface.h"
   6
   7 #include "nouveau_screen.h"
   8 #include "nouveau_context.h"
   9 #include "nouveau_winsys.h"
  10 #include "nouveau_fence.h"
  11 #include "nouveau_buffer.h"
  12 #include "nouveau_mm.h"
  13
  14 #define NOUVEAU_TRANSFER_PUSHBUF_THRESHOLD 192
  15
  16 struct nouveau_transfer {
  17    struct pipe_transfer base;
  18
  19    uint8_t *map;
  20    struct nouveau_bo *bo;
  21    struct nouveau_mm_allocation *mm;
  22    uint32_t offset;
  23 };
  24
  25 static INLINE struct nouveau_transfer *
  26 nouveau_transfer(struct pipe_transfer *transfer)
  27 {
  28    return (struct nouveau_transfer *)transfer;
  29 }
  30
  31 static INLINE boolean
  32 nouveau_buffer_malloc(struct nv04_resource *buf)
  33 {
  34    if (!buf->data)
  35       buf->data = align_malloc(buf->base.width0, NOUVEAU_MIN_BUFFER_MAP_ALIGN);
  36    return !!buf->data;
  37 }
  38
  39 static INLINE boolean
  40 nouveau_buffer_allocate(struct nouveau_screen *screen,
  41                         struct nv04_resource *buf, unsigned domain)
  42 {
  43    uint32_t size = buf->base.width0;
  44
  45    if (buf->base.bind & (PIPE_BIND_CONSTANT_BUFFER |
  46                          PIPE_BIND_COMPUTE_RESOURCE |
  47                          PIPE_BIND_SHADER_RESOURCE))
  48       size = align(size, 0x100);
  49
  50    if (domain == NOUVEAU_BO_VRAM) {
  51       buf->mm = nouveau_mm_allocate(screen->mm_VRAM, size,
  52                                     &buf->bo, &buf->offset);
  53       if (!buf->bo)
  54          return nouveau_buffer_allocate(screen, buf, NOUVEAU_BO_GART);
  55       NOUVEAU_DRV_STAT(screen, buf_obj_current_bytes_vid, buf->base.width0);
  56    } else
  57    if (domain == NOUVEAU_BO_GART) {
  58       buf->mm = nouveau_mm_allocate(screen->mm_GART, size,
  59                                     &buf->bo, &buf->offset);
  60       if (!buf->bo)
  61          return FALSE;
  62       NOUVEAU_DRV_STAT(screen, buf_obj_current_bytes_sys, buf->base.width0);
  63    } else {
  64       assert(domain == 0);
  65       if (!nouveau_buffer_malloc(buf))
  66          return FALSE;
  67    }
  68    buf->domain = domain;
  69    if (buf->bo)
  70       buf->address = buf->bo->offset + buf->offset;
  71
  72    return TRUE;
  73 }
  74
  75 static INLINE void
  76 release_allocation(struct nouveau_mm_allocation **mm,
  77                    struct nouveau_fence *fence)
  78 {
  79    nouveau_fence_work(fence, nouveau_mm_free_work, *mm);
  80    (*mm) = NULL;
  81 }
  82
  83 INLINE void
  84 nouveau_buffer_release_gpu_storage(struct nv04_resource *buf)
  85 {
  86    nouveau_bo_ref(NULL, &buf->bo);
  87
  88    if (buf->mm)
  89       release_allocation(&buf->mm, buf->fence);
  90
  91    if (buf->domain == NOUVEAU_BO_VRAM)
  92       NOUVEAU_DRV_STAT_RES(buf, buf_obj_current_bytes_vid, -(uint64_t)buf->base.width0);
  93    if (buf->domain == NOUVEAU_BO_GART)
  94       NOUVEAU_DRV_STAT_RES(buf, buf_obj_current_bytes_sys, -(uint64_t)buf->base.width0);
  95
  96    buf->domain = 0;
  97 }
  98
  99 static INLINE boolean
 100 nouveau_buffer_reallocate(struct nouveau_screen *screen,
 101                           struct nv04_resource *buf, unsigned domain)
 102 {
 103    nouveau_buffer_release_gpu_storage(buf);
 104
 105    nouveau_fence_ref(NULL, &buf->fence);
 106    nouveau_fence_ref(NULL, &buf->fence_wr);
 107
 108    buf->status &= NOUVEAU_BUFFER_STATUS_REALLOC_MASK;
 109
 110    return nouveau_buffer_allocate(screen, buf, domain);
 111 }
 112
 113 static void
 114 nouveau_buffer_destroy(struct pipe_screen *pscreen,
 115                        struct pipe_resource *presource)
 116 {
 117    struct nv04_resource *res = nv04_resource(presource);
 118
 119    nouveau_buffer_release_gpu_storage(res);
 120
 121    if (res->data && !(res->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY))
 122       align_free(res->data);
 123
 124    nouveau_fence_ref(NULL, &res->fence);
 125    nouveau_fence_ref(NULL, &res->fence_wr);
 126
 127    FREE(res);
 128
 129    NOUVEAU_DRV_STAT(nouveau_screen(pscreen), buf_obj_current_count, -1);
 130 }
 131
 132 /* Set up a staging area for the transfer. This is either done in "regular"
 133  * system memory if the driver supports push_data (nv50+) and the data is
 134  * small enough (and permit_pb == true), or in GART memory.
 135  */
 136 static uint8_t *
 137 nouveau_transfer_staging(struct nouveau_context *nv,
 138                          struct nouveau_transfer *tx, boolean permit_pb)
 139 {
 140    const unsigned adj = tx->base.box.x & NOUVEAU_MIN_BUFFER_MAP_ALIGN_MASK;
 141    const unsigned size = align(tx->base.box.width, 4) + adj;
 142
 143    if (!nv->push_data)
 144       permit_pb = FALSE;
 145
 146    if ((size <= NOUVEAU_TRANSFER_PUSHBUF_THRESHOLD) && permit_pb) {
 147       tx->map = align_malloc(size, NOUVEAU_MIN_BUFFER_MAP_ALIGN);
 148       if (tx->map)
 149          tx->map += adj;
 150    } else {
 151       tx->mm =
 152          nouveau_mm_allocate(nv->screen->mm_GART, size, &tx->bo, &tx->offset);
 153       if (tx->bo) {
 154          tx->offset += adj;
 155          if (!nouveau_bo_map(tx->bo, 0, NULL))
 156             tx->map = (uint8_t *)tx->bo->map + tx->offset;
 157       }
 158    }
 159    return tx->map;
 160 }
 161
 162 /* Copies data from the resource into the the transfer's temporary GART
 163  * buffer. Also updates buf->data if present.
 164  *
 165  * Maybe just migrate to GART right away if we actually need to do this. */
 166 static boolean
 167 nouveau_transfer_read(struct nouveau_context *nv, struct nouveau_transfer *tx)
 168 {
 169    struct nv04_resource *buf = nv04_resource(tx->base.resource);
 170    const unsigned base = tx->base.box.x;
 171    const unsigned size = tx->base.box.width;
 172
 173    NOUVEAU_DRV_STAT(nv->screen, buf_read_bytes_staging_vid, size);
 174
 175    nv->copy_data(nv, tx->bo, tx->offset, NOUVEAU_BO_GART,
 176                  buf->bo, buf->offset + base, buf->domain, size);
 177
 178    if (nouveau_bo_wait(tx->bo, NOUVEAU_BO_RD, nv->client))
 179       return FALSE;
 180
 181    if (buf->data)
 182       memcpy(buf->data + base, tx->map, size);
 183
 184    return TRUE;
 185 }
 186
 187 static void
 188 nouveau_transfer_write(struct nouveau_context *nv, struct nouveau_transfer *tx,
 189                        unsigned offset, unsigned size)
 190 {
 191    struct nv04_resource *buf = nv04_resource(tx->base.resource);
 192    uint8_t *data = tx->map + offset;
 193    const unsigned base = tx->base.box.x + offset;
 194    const boolean can_cb = !((base | size) & 3);
 195
 196    if (buf->data)
 197       memcpy(data, buf->data + base, size);
 198    else
 199       buf->status |= NOUVEAU_BUFFER_STATUS_DIRTY;
 200
 201    if (buf->domain == NOUVEAU_BO_VRAM)
 202       NOUVEAU_DRV_STAT(nv->screen, buf_write_bytes_staging_vid, size);
 203    if (buf->domain == NOUVEAU_BO_GART)
 204       NOUVEAU_DRV_STAT(nv->screen, buf_write_bytes_staging_sys, size);
 205
 206    if (tx->bo)
 207       nv->copy_data(nv, buf->bo, buf->offset + base, buf->domain,
 208                     tx->bo, tx->offset + offset, NOUVEAU_BO_GART, size);
 209    else
 210    if ((buf->base.bind & PIPE_BIND_CONSTANT_BUFFER) && nv->push_cb && can_cb)
 211       nv->push_cb(nv, buf->bo, buf->domain, buf->offset, buf->base.width0,
 212                   base, size / 4, (const uint32_t *)data);
 213    else
 214       nv->push_data(nv, buf->bo, buf->offset + base, buf->domain, size, data);
 215
 216    nouveau_fence_ref(nv->screen->fence.current, &buf->fence);
 217    nouveau_fence_ref(nv->screen->fence.current, &buf->fence_wr);
 218 }
 219
 220 /* Does a CPU wait for the buffer's backing data to become reliably accessible
 221  * for write/read by waiting on the buffer's relevant fences.
 222  */
 223 static INLINE boolean
 224 nouveau_buffer_sync(struct nv04_resource *buf, unsigned rw)
 225 {
 226    if (rw == PIPE_TRANSFER_READ) {
 227       if (!buf->fence_wr)
 228          return TRUE;
 229       NOUVEAU_DRV_STAT_RES(buf, buf_non_kernel_fence_sync_count,
 230                            !nouveau_fence_signalled(buf->fence_wr));
 231       if (!nouveau_fence_wait(buf->fence_wr))
 232          return FALSE;
 233    } else {
 234       if (!buf->fence)
 235          return TRUE;
 236       NOUVEAU_DRV_STAT_RES(buf, buf_non_kernel_fence_sync_count,
 237                            !nouveau_fence_signalled(buf->fence));
 238       if (!nouveau_fence_wait(buf->fence))
 239          return FALSE;
 240
 241       nouveau_fence_ref(NULL, &buf->fence);
 242    }
 243    nouveau_fence_ref(NULL, &buf->fence_wr);
 244
 245    return TRUE;
 246 }
 247
 248 static INLINE boolean
 249 nouveau_buffer_busy(struct nv04_resource *buf, unsigned rw)
 250 {
 251    if (rw == PIPE_TRANSFER_READ)
 252       return (buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr));
 253    else
 254       return (buf->fence && !nouveau_fence_signalled(buf->fence));
 255 }
 256
 257 static INLINE void
 258 nouveau_buffer_transfer_init(struct nouveau_transfer *tx,
 259                              struct pipe_resource *resource,
 260                              const struct pipe_box *box,
 261                              unsigned usage)
 262 {
 263    tx->base.resource = resource;
 264    tx->base.level = 0;
 265    tx->base.usage = usage;
 266    tx->base.box.x = box->x;
 267    tx->base.box.y = 0;
 268    tx->base.box.z = 0;
 269    tx->base.box.width = box->width;
 270    tx->base.box.height = 1;
 271    tx->base.box.depth = 1;
 272    tx->base.stride = 0;
 273    tx->base.layer_stride = 0;
 274
 275    tx->bo = NULL;
 276    tx->map = NULL;
 277 }
 278
 279 static INLINE void
 280 nouveau_buffer_transfer_del(struct nouveau_context *nv,
 281                             struct nouveau_transfer *tx)
 282 {
 283    if (tx->map) {
 284       if (likely(tx->bo)) {
 285          nouveau_bo_ref(NULL, &tx->bo);
 286          if (tx->mm)
 287             release_allocation(&tx->mm, nv->screen->fence.current);
 288       } else {
 289          align_free(tx->map -
 290                     (tx->base.box.x & NOUVEAU_MIN_BUFFER_MAP_ALIGN_MASK));
 291       }
 292    }
 293 }
 294
 295 /* Creates a cache in system memory of the buffer data. */
 296 static boolean
 297 nouveau_buffer_cache(struct nouveau_context *nv, struct nv04_resource *buf)
 298 {
 299    struct nouveau_transfer tx;
 300    boolean ret;
 301    tx.base.resource = &buf->base;
 302    tx.base.box.x = 0;
 303    tx.base.box.width = buf->base.width0;
 304    tx.bo = NULL;
 305    tx.map = NULL;
 306
 307    if (!buf->data)
 308       if (!nouveau_buffer_malloc(buf))
 309          return FALSE;
 310    if (!(buf->status & NOUVEAU_BUFFER_STATUS_DIRTY))
 311       return TRUE;
 312    nv->stats.buf_cache_count++;
 313
 314    if (!nouveau_transfer_staging(nv, &tx, FALSE))
 315       return FALSE;
 316
 317    ret = nouveau_transfer_read(nv, &tx);
 318    if (ret) {
 319       buf->status &= ~NOUVEAU_BUFFER_STATUS_DIRTY;
 320       memcpy(buf->data, tx.map, buf->base.width0);
 321    }
 322    nouveau_buffer_transfer_del(nv, &tx);
 323    return ret;
 324 }
 325
 326
 327 #define NOUVEAU_TRANSFER_DISCARD \
 328    (PIPE_TRANSFER_DISCARD_RANGE | PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)
 329
 330 /* Checks whether it is possible to completely discard the memory backing this
 331  * resource. This can be useful if we would otherwise have to wait for a read
 332  * operation to complete on this data.
 333  */
 334 static INLINE boolean
 335 nouveau_buffer_should_discard(struct nv04_resource *buf, unsigned usage)
 336 {
 337    if (!(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE))
 338       return FALSE;
 339    if (unlikely(buf->base.bind & PIPE_BIND_SHARED))
 340       return FALSE;
 341    return buf->mm && nouveau_buffer_busy(buf, PIPE_TRANSFER_WRITE);
 342 }
 343
 344 /* Returns a pointer to a memory area representing a window into the
 345  * resource's data.
 346  *
 347  * This may or may not be the _actual_ memory area of the resource. However
 348  * when calling nouveau_buffer_transfer_unmap, if it wasn't the actual memory
 349  * area, the contents of the returned map are copied over to the resource.
 350  *
 351  * The usage indicates what the caller plans to do with the map:
 352  *
 353  *   WRITE means that the user plans to write to it
 354  *
 355  *   READ means that the user plans on reading from it
 356  *
 357  *   DISCARD_WHOLE_RESOURCE means that the whole resource is going to be
 358  *   potentially overwritten, and even if it isn't, the bits that aren't don't
 359  *   need to be maintained.
 360  *
 361  *   DISCARD_RANGE means that all the data in the specified range is going to
 362  *   be overwritten.
 363  *
 364  * The strategy for determining what kind of memory area to return is complex,
 365  * see comments inside of the function.
 366  */
 367 static void *
 368 nouveau_buffer_transfer_map(struct pipe_context *pipe,
 369                             struct pipe_resource *resource,
 370                             unsigned level, unsigned usage,
 371                             const struct pipe_box *box,
 372                             struct pipe_transfer **ptransfer)
 373 {
 374    struct nouveau_context *nv = nouveau_context(pipe);
 375    struct nv04_resource *buf = nv04_resource(resource);
 376    struct nouveau_transfer *tx = MALLOC_STRUCT(nouveau_transfer);
 377    uint8_t *map;
 378    int ret;
 379
 380    if (!tx)
 381       return NULL;
 382    nouveau_buffer_transfer_init(tx, resource, box, usage);
 383    *ptransfer = &tx->base;
 384
 385    if (usage & PIPE_TRANSFER_READ)
 386       NOUVEAU_DRV_STAT(nv->screen, buf_transfers_rd, 1);
 387    if (usage & PIPE_TRANSFER_WRITE)
 388       NOUVEAU_DRV_STAT(nv->screen, buf_transfers_wr, 1);
 389
 390    if (buf->domain == NOUVEAU_BO_VRAM) {
 391       if (usage & NOUVEAU_TRANSFER_DISCARD) {
 392          /* Set up a staging area for the user to write to. It will be copied
 393           * back into VRAM on unmap. */
 394          if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)
 395             buf->status &= NOUVEAU_BUFFER_STATUS_REALLOC_MASK;
 396          nouveau_transfer_staging(nv, tx, TRUE);
 397       } else {
 398          if (buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
 399             /* The GPU is currently writing to this buffer. Copy its current
 400              * contents to a staging area in the GART. This is necessary since
 401              * not the whole area being mapped is being discarded.
 402              */
 403             if (buf->data) {
 404                align_free(buf->data);
 405                buf->data = NULL;
 406             }
 407             nouveau_transfer_staging(nv, tx, FALSE);
 408             nouveau_transfer_read(nv, tx);
 409          } else {
 410             /* The buffer is currently idle. Create a staging area for writes,
 411              * and make sure that the cached data is up-to-date. */
 412             if (usage & PIPE_TRANSFER_WRITE)
 413                nouveau_transfer_staging(nv, tx, TRUE);
 414             if (!buf->data)
 415                nouveau_buffer_cache(nv, buf);
 416          }
 417       }
 418       return buf->data ? (buf->data + box->x) : tx->map;
 419    } else
 420    if (unlikely(buf->domain == 0)) {
 421       return buf->data + box->x;
 422    }
 423
 424    /* At this point, buf->domain == GART */
 425
 426    if (nouveau_buffer_should_discard(buf, usage)) {
 427       int ref = buf->base.reference.count - 1;
 428       nouveau_buffer_reallocate(nv->screen, buf, buf->domain);
 429       if (ref > 0) /* any references inside context possible ? */
 430          nv->invalidate_resource_storage(nv, &buf->base, ref);
 431    }
 432
 433    /* Note that nouveau_bo_map ends up doing a nouveau_bo_wait with the
 434     * relevant flags. If buf->mm is set, that means this resource is part of a
 435     * larger slab bo that holds multiple resources. So in that case, don't
 436     * wait on the whole slab and instead use the logic below to return a
 437     * reasonable buffer for that case.
 438     */
 439    ret = nouveau_bo_map(buf->bo,
 440                         buf->mm ? 0 : nouveau_screen_transfer_flags(usage),
 441                         nv->client);
 442    if (ret) {
 443       FREE(tx);
 444       return NULL;
 445    }
 446    map = (uint8_t *)buf->bo->map + buf->offset + box->x;
 447
 448    /* using kernel fences only if !buf->mm */
 449    if ((usage & PIPE_TRANSFER_UNSYNCHRONIZED) || !buf->mm)
 450       return map;
 451
 452    /* If the GPU is currently reading/writing this buffer, we shouldn't
 453     * interfere with its progress. So instead we either wait for the GPU to
 454     * complete its operation, or set up a staging area to perform our work in.
 455     */
 456    if (nouveau_buffer_busy(buf, usage & PIPE_TRANSFER_READ_WRITE)) {
 457       if (unlikely(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)) {
 458          /* Discarding was not possible, must sync because
 459           * subsequent transfers might use UNSYNCHRONIZED. */
 460          nouveau_buffer_sync(buf, usage & PIPE_TRANSFER_READ_WRITE);
 461       } else
 462       if (usage & PIPE_TRANSFER_DISCARD_RANGE) {
 463          /* The whole range is being discarded, so it doesn't matter what was
 464           * there before. No need to copy anything over. */
 465          nouveau_transfer_staging(nv, tx, TRUE);
 466          map = tx->map;
 467       } else
 468       if (nouveau_buffer_busy(buf, PIPE_TRANSFER_READ)) {
 469          if (usage & PIPE_TRANSFER_DONTBLOCK)
 470             map = NULL;
 471          else
 472             nouveau_buffer_sync(buf, usage & PIPE_TRANSFER_READ_WRITE);
 473       } else {
 474          /* It is expected that the returned buffer be a representation of the
 475           * data in question, so we must copy it over from the buffer. */
 476          nouveau_transfer_staging(nv, tx, TRUE);
 477          if (tx->map)
 478             memcpy(tx->map, map, box->width);
 479          map = tx->map;
 480       }
 481    }
 482    if (!map)
 483       FREE(tx);
 484    return map;
 485 }
 486
 487
 488
 489 static void
 490 nouveau_buffer_transfer_flush_region(struct pipe_context *pipe,
 491                                      struct pipe_transfer *transfer,
 492                                      const struct pipe_box *box)
 493 {
 494    struct nouveau_transfer *tx = nouveau_transfer(transfer);
 495    if (tx->map)
 496       nouveau_transfer_write(nouveau_context(pipe), tx, box->x, box->width);
 497 }
 498
 499 /* Unmap stage of the transfer. If it was a WRITE transfer and the map that
 500  * was returned was not the real resource's data, this needs to transfer the
 501  * data back to the resource.
 502  *
 503  * Also marks vbo/cb dirty if the buffer's binding
 504  */
 505 static void
 506 nouveau_buffer_transfer_unmap(struct pipe_context *pipe,
 507                               struct pipe_transfer *transfer)
 508 {
 509    struct nouveau_context *nv = nouveau_context(pipe);
 510    struct nouveau_transfer *tx = nouveau_transfer(transfer);
 511    struct nv04_resource *buf = nv04_resource(transfer->resource);
 512
 513    if (tx->base.usage & PIPE_TRANSFER_WRITE) {
 514       if (!(tx->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT) && tx->map)
 515          nouveau_transfer_write(nv, tx, 0, tx->base.box.width);
 516
 517       if (likely(buf->domain)) {
 518          const uint8_t bind = buf->base.bind;
 519          /* make sure we invalidate dedicated caches */
 520          if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER))
 521             nv->vbo_dirty = TRUE;
 522          if (bind & (PIPE_BIND_CONSTANT_BUFFER))
 523             nv->cb_dirty = TRUE;
 524       }
 525    }
 526
 527    if (!tx->bo && (tx->base.usage & PIPE_TRANSFER_WRITE))
 528       NOUVEAU_DRV_STAT(nv->screen, buf_write_bytes_direct, tx->base.box.width);
 529
 530    nouveau_buffer_transfer_del(nv, tx);
 531    FREE(tx);
 532 }
 533
 534
 535 void
 536 nouveau_copy_buffer(struct nouveau_context *nv,
 537                     struct nv04_resource *dst, unsigned dstx,
 538                     struct nv04_resource *src, unsigned srcx, unsigned size)
 539 {
 540    assert(dst->base.target == PIPE_BUFFER && src->base.target == PIPE_BUFFER);
 541
 542    if (likely(dst->domain) && likely(src->domain)) {
 543       nv->copy_data(nv,
 544                     dst->bo, dst->offset + dstx, dst->domain,
 545                     src->bo, src->offset + srcx, src->domain, size);
 546
 547       dst->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
 548       nouveau_fence_ref(nv->screen->fence.current, &dst->fence);
 549       nouveau_fence_ref(nv->screen->fence.current, &dst->fence_wr);
 550
 551       src->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
 552       nouveau_fence_ref(nv->screen->fence.current, &src->fence);
 553    } else {
 554       struct pipe_box src_box;
 555       src_box.x = srcx;
 556       src_box.y = 0;
 557       src_box.z = 0;
 558       src_box.width = size;
 559       src_box.height = 1;
 560       src_box.depth = 1;
 561       util_resource_copy_region(&nv->pipe,
 562                                 &dst->base, 0, dstx, 0, 0,
 563                                 &src->base, 0, &src_box);
 564    }
 565 }
 566
 567
 568 void *
 569 nouveau_resource_map_offset(struct nouveau_context *nv,
 570                             struct nv04_resource *res, uint32_t offset,
 571                             uint32_t flags)
 572 {
 573    if (unlikely(res->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY))
 574       return res->data + offset;
 575
 576    if (res->domain == NOUVEAU_BO_VRAM) {
 577       if (!res->data || (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING))
 578          nouveau_buffer_cache(nv, res);
 579    }
 580    if (res->domain != NOUVEAU_BO_GART)
 581       return res->data + offset;
 582
 583    if (res->mm) {
 584       unsigned rw;
 585       rw = (flags & NOUVEAU_BO_WR) ? PIPE_TRANSFER_WRITE : PIPE_TRANSFER_READ;
 586       nouveau_buffer_sync(res, rw);
 587       if (nouveau_bo_map(res->bo, 0, NULL))
 588          return NULL;
 589    } else {
 590       if (nouveau_bo_map(res->bo, flags, nv->client))
 591          return NULL;
 592    }
 593    return (uint8_t *)res->bo->map + res->offset + offset;
 594 }
 595
 596
 597 const struct u_resource_vtbl nouveau_buffer_vtbl =
 598 {
 599    u_default_resource_get_handle,     /* get_handle */
 600    nouveau_buffer_destroy,               /* resource_destroy */
 601    nouveau_buffer_transfer_map,          /* transfer_map */
 602    nouveau_buffer_transfer_flush_region, /* transfer_flush_region */
 603    nouveau_buffer_transfer_unmap,        /* transfer_unmap */
 604    u_default_transfer_inline_write    /* transfer_inline_write */
 605 };
 606
 607 struct pipe_resource *
 608 nouveau_buffer_create(struct pipe_screen *pscreen,
 609                       const struct pipe_resource *templ)
 610 {
 611    struct nouveau_screen *screen = nouveau_screen(pscreen);
 612    struct nv04_resource *buffer;
 613    boolean ret;
 614
 615    buffer = CALLOC_STRUCT(nv04_resource);
 616    if (!buffer)
 617       return NULL;
 618
 619    buffer->base = *templ;
 620    buffer->vtbl = &nouveau_buffer_vtbl;
 621    pipe_reference_init(&buffer->base.reference, 1);
 622    buffer->base.screen = pscreen;
 623
 624    if (buffer->base.bind &
 625        (screen->vidmem_bindings & screen->sysmem_bindings)) {
 626       switch (buffer->base.usage) {
 627       case PIPE_USAGE_DEFAULT:
 628       case PIPE_USAGE_IMMUTABLE:
 629       case PIPE_USAGE_STATIC:
 630          buffer->domain = NOUVEAU_BO_VRAM;
 631          break;
 632       case PIPE_USAGE_DYNAMIC:
 633          /* For most apps, we'd have to do staging transfers to avoid sync
 634           * with this usage, and GART -> GART copies would be suboptimal.
 635           */
 636          buffer->domain = NOUVEAU_BO_VRAM;
 637          break;
 638       case PIPE_USAGE_STAGING:
 639       case PIPE_USAGE_STREAM:
 640          buffer->domain = NOUVEAU_BO_GART;
 641          break;
 642       default:
 643          assert(0);
 644          break;
 645       }
 646    } else {
 647       if (buffer->base.bind & screen->vidmem_bindings)
 648          buffer->domain = NOUVEAU_BO_VRAM;
 649       else
 650       if (buffer->base.bind & screen->sysmem_bindings)
 651          buffer->domain = NOUVEAU_BO_GART;
 652    }
 653    ret = nouveau_buffer_allocate(screen, buffer, buffer->domain);
 654
 655    if (ret == FALSE)
 656       goto fail;
 657
 658    if (buffer->domain == NOUVEAU_BO_VRAM && screen->hint_buf_keep_sysmem_copy)
 659       nouveau_buffer_cache(NULL, buffer);
 660
 661    NOUVEAU_DRV_STAT(screen, buf_obj_current_count, 1);
 662
 663    return &buffer->base;
 664
 665 fail:
 666    FREE(buffer);
 667    return NULL;
 668 }
 669
 670
 671 struct pipe_resource *
 672 nouveau_user_buffer_create(struct pipe_screen *pscreen, void *ptr,
 673                            unsigned bytes, unsigned bind)
 674 {
 675    struct nv04_resource *buffer;
 676
 677    buffer = CALLOC_STRUCT(nv04_resource);
 678    if (!buffer)
 679       return NULL;
 680
 681    pipe_reference_init(&buffer->base.reference, 1);
 682    buffer->vtbl = &nouveau_buffer_vtbl;
 683    buffer->base.screen = pscreen;
 684    buffer->base.format = PIPE_FORMAT_R8_UNORM;
 685    buffer->base.usage = PIPE_USAGE_IMMUTABLE;
 686    buffer->base.bind = bind;
 687    buffer->base.width0 = bytes;
 688    buffer->base.height0 = 1;
 689    buffer->base.depth0 = 1;
 690
 691    buffer->data = ptr;
 692    buffer->status = NOUVEAU_BUFFER_STATUS_USER_MEMORY;
 693
 694    return &buffer->base;
 695 }
 696
 697 static INLINE boolean
 698 nouveau_buffer_data_fetch(struct nouveau_context *nv, struct nv04_resource *buf,
 699                           struct nouveau_bo *bo, unsigned offset, unsigned size)
 700 {
 701    if (!nouveau_buffer_malloc(buf))
 702       return FALSE;
 703    if (nouveau_bo_map(bo, NOUVEAU_BO_RD, nv->client))
 704       return FALSE;
 705    memcpy(buf->data, (uint8_t *)bo->map + offset, size);
 706    return TRUE;
 707 }
 708
 709 /* Migrate a linear buffer (vertex, index, constants) USER -> GART -> VRAM. */
 710 boolean
 711 nouveau_buffer_migrate(struct nouveau_context *nv,
 712                        struct nv04_resource *buf, const unsigned new_domain)
 713 {
 714    struct nouveau_screen *screen = nv->screen;
 715    struct nouveau_bo *bo;
 716    const unsigned old_domain = buf->domain;
 717    unsigned size = buf->base.width0;
 718    unsigned offset;
 719    int ret;
 720
 721    assert(new_domain != old_domain);
 722
 723    if (new_domain == NOUVEAU_BO_GART && old_domain == 0) {
 724       if (!nouveau_buffer_allocate(screen, buf, new_domain))
 725          return FALSE;
 726       ret = nouveau_bo_map(buf->bo, 0, nv->client);
 727       if (ret)
 728          return ret;
 729       memcpy((uint8_t *)buf->bo->map + buf->offset, buf->data, size);
 730       align_free(buf->data);
 731    } else
 732    if (old_domain != 0 && new_domain != 0) {
 733       struct nouveau_mm_allocation *mm = buf->mm;
 734
 735       if (new_domain == NOUVEAU_BO_VRAM) {
 736          /* keep a system memory copy of our data in case we hit a fallback */
 737          if (!nouveau_buffer_data_fetch(nv, buf, buf->bo, buf->offset, size))
 738             return FALSE;
 739          if (nouveau_mesa_debug)
 740             debug_printf("migrating %u KiB to VRAM\n", size / 1024);
 741       }
 742
 743       offset = buf->offset;
 744       bo = buf->bo;
 745       buf->bo = NULL;
 746       buf->mm = NULL;
 747       nouveau_buffer_allocate(screen, buf, new_domain);
 748
 749       nv->copy_data(nv, buf->bo, buf->offset, new_domain,
 750                     bo, offset, old_domain, buf->base.width0);
 751
 752       nouveau_bo_ref(NULL, &bo);
 753       if (mm)
 754          release_allocation(&mm, screen->fence.current);
 755    } else
 756    if (new_domain == NOUVEAU_BO_VRAM && old_domain == 0) {
 757       struct nouveau_transfer tx;
 758       if (!nouveau_buffer_allocate(screen, buf, NOUVEAU_BO_VRAM))
 759          return FALSE;
 760       tx.base.resource = &buf->base;
 761       tx.base.box.x = 0;
 762       tx.base.box.width = buf->base.width0;
 763       tx.bo = NULL;
 764       tx.map = NULL;
 765       if (!nouveau_transfer_staging(nv, &tx, FALSE))
 766          return FALSE;
 767       nouveau_transfer_write(nv, &tx, 0, tx.base.box.width);
 768       nouveau_buffer_transfer_del(nv, &tx);
 769    } else
 770       return FALSE;
 771
 772    assert(buf->domain == new_domain);
 773    return TRUE;
 774 }
 775
 776 /* Migrate data from glVertexAttribPointer(non-VBO) user buffers to GART.
 777  * We'd like to only allocate @size bytes here, but then we'd have to rebase
 778  * the vertex indices ...
 779  */
 780 boolean
 781 nouveau_user_buffer_upload(struct nouveau_context *nv,
 782                            struct nv04_resource *buf,
 783                            unsigned base, unsigned size)
 784 {
 785    struct nouveau_screen *screen = nouveau_screen(buf->base.screen);
 786    int ret;
 787
 788    assert(buf->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY);
 789
 790    buf->base.width0 = base + size;
 791    if (!nouveau_buffer_reallocate(screen, buf, NOUVEAU_BO_GART))
 792       return FALSE;
 793
 794    ret = nouveau_bo_map(buf->bo, 0, nv->client);
 795    if (ret)
 796       return FALSE;
 797    memcpy((uint8_t *)buf->bo->map + buf->offset + base, buf->data + base, size);
 798
 799    return TRUE;
 800 }
 801
 802
 803 /* Scratch data allocation. */
 804
 805 static INLINE int
 806 nouveau_scratch_bo_alloc(struct nouveau_context *nv, struct nouveau_bo **pbo,
 807                          unsigned size)
 808 {
 809    return nouveau_bo_new(nv->screen->device, NOUVEAU_BO_GART | NOUVEAU_BO_MAP,
 810                          4096, size, NULL, pbo);
 811 }
 812
 813 void
 814 nouveau_scratch_runout_release(struct nouveau_context *nv)
 815 {
 816    if (!nv->scratch.nr_runout)
 817       return;
 818    do {
 819       --nv->scratch.nr_runout;
 820       nouveau_bo_ref(NULL, &nv->scratch.runout[nv->scratch.nr_runout]);
 821    } while (nv->scratch.nr_runout);
 822
 823    FREE(nv->scratch.runout);
 824    nv->scratch.end = 0;
 825    nv->scratch.runout = NULL;
 826 }
 827
 828 /* Allocate an extra bo if we can't fit everything we need simultaneously.
 829  * (Could happen for very large user arrays.)
 830  */
 831 static INLINE boolean
 832 nouveau_scratch_runout(struct nouveau_context *nv, unsigned size)
 833 {
 834    int ret;
 835    const unsigned n = nv->scratch.nr_runout++;
 836
 837    nv->scratch.runout = REALLOC(nv->scratch.runout,
 838                                 (n + 0) * sizeof(*nv->scratch.runout),
 839                                 (n + 1) * sizeof(*nv->scratch.runout));
 840    nv->scratch.runout[n] = NULL;
 841
 842    ret = nouveau_scratch_bo_alloc(nv, &nv->scratch.runout[n], size);
 843    if (!ret) {
 844       ret = nouveau_bo_map(nv->scratch.runout[n], 0, NULL);
 845       if (ret)
 846          nouveau_bo_ref(NULL, &nv->scratch.runout[--nv->scratch.nr_runout]);
 847    }
 848    if (!ret) {
 849       nv->scratch.current = nv->scratch.runout[n];
 850       nv->scratch.offset = 0;
 851       nv->scratch.end = size;
 852       nv->scratch.map = nv->scratch.current->map;
 853    }
 854    return !ret;
 855 }
 856
 857 /* Continue to next scratch buffer, if available (no wrapping, large enough).
 858  * Allocate it if it has not yet been created.
 859  */
 860 static INLINE boolean
 861 nouveau_scratch_next(struct nouveau_context *nv, unsigned size)
 862 {
 863    struct nouveau_bo *bo;
 864    int ret;
 865    const unsigned i = (nv->scratch.id + 1) % NOUVEAU_MAX_SCRATCH_BUFS;
 866
 867    if ((size > nv->scratch.bo_size) || (i == nv->scratch.wrap))
 868       return FALSE;
 869    nv->scratch.id = i;
 870
 871    bo = nv->scratch.bo[i];
 872    if (!bo) {
 873       ret = nouveau_scratch_bo_alloc(nv, &bo, nv->scratch.bo_size);
 874       if (ret)
 875          return FALSE;
 876       nv->scratch.bo[i] = bo;
 877    }
 878    nv->scratch.current = bo;
 879    nv->scratch.offset = 0;
 880    nv->scratch.end = nv->scratch.bo_size;
 881
 882    ret = nouveau_bo_map(bo, NOUVEAU_BO_WR, nv->client);
 883    if (!ret)
 884       nv->scratch.map = bo->map;
 885    return !ret;
 886 }
 887
 888 static boolean
 889 nouveau_scratch_more(struct nouveau_context *nv, unsigned min_size)
 890 {
 891    boolean ret;
 892
 893    ret = nouveau_scratch_next(nv, min_size);
 894    if (!ret)
 895       ret = nouveau_scratch_runout(nv, min_size);
 896    return ret;
 897 }
 898
 899
 900 /* Copy data to a scratch buffer and return address & bo the data resides in. */
 901 uint64_t
 902 nouveau_scratch_data(struct nouveau_context *nv,
 903                      const void *data, unsigned base, unsigned size,
 904                      struct nouveau_bo **bo)
 905 {
 906    unsigned bgn = MAX2(base, nv->scratch.offset);
 907    unsigned end = bgn + size;
 908
 909    if (end >= nv->scratch.end) {
 910       end = base + size;
 911       if (!nouveau_scratch_more(nv, end))
 912          return 0;
 913       bgn = base;
 914    }
 915    nv->scratch.offset = align(end, 4);
 916
 917    memcpy(nv->scratch.map + bgn, (const uint8_t *)data + base, size);
 918
 919    *bo = nv->scratch.current;
 920    return (*bo)->offset + (bgn - base);
 921 }
 922
 923 void *
 924 nouveau_scratch_get(struct nouveau_context *nv,
 925                     unsigned size, uint64_t *gpu_addr, struct nouveau_bo **pbo)
 926 {
 927    unsigned bgn = nv->scratch.offset;
 928    unsigned end = nv->scratch.offset + size;
 929
 930    if (end >= nv->scratch.end) {
 931       end = size;
 932       if (!nouveau_scratch_more(nv, end))
 933          return NULL;
 934       bgn = 0;
 935    }
 936    nv->scratch.offset = align(end, 4);
 937
 938    *pbo = nv->scratch.current;
 939    *gpu_addr = nv->scratch.current->offset + bgn;
 940    return nv->scratch.map + bgn;
 941 }