src/gallium/drivers/radeon/r600_buffer_common.c

   1 /*
   2  * Copyright 2013 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * on the rights to use, copy, modify, merge, publish, distribute, sub
   8  * license, and/or sell copies of the Software, and to permit persons to whom
   9  * the Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *      Marek Olšák
  25  */
  26
  27 #include "r600_cs.h"
  28 #include "util/u_memory.h"
  29 #include "util/u_upload_mgr.h"
  30 #include <inttypes.h>
  31
  32 boolean r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
  33                                         struct radeon_winsys_cs_handle *buf,
  34                                         enum radeon_bo_usage usage)
  35 {
  36         if (ctx->ws->cs_is_buffer_referenced(ctx->rings.gfx.cs, buf, usage)) {
  37                 return TRUE;
  38         }
  39         if (ctx->rings.dma.cs &&
  40             ctx->ws->cs_is_buffer_referenced(ctx->rings.dma.cs, buf, usage)) {
  41                 return TRUE;
  42         }
  43         return FALSE;
  44 }
  45
  46 void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
  47                                       struct r600_resource *resource,
  48                                       unsigned usage)
  49 {
  50         enum radeon_bo_usage rusage = RADEON_USAGE_READWRITE;
  51
  52         if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
  53                 return ctx->ws->buffer_map(resource->cs_buf, NULL, usage);
  54         }
  55
  56         if (!(usage & PIPE_TRANSFER_WRITE)) {
  57                 /* have to wait for the last write */
  58                 rusage = RADEON_USAGE_WRITE;
  59         }
  60
  61         if (ctx->rings.gfx.cs->cdw &&
  62             ctx->ws->cs_is_buffer_referenced(ctx->rings.gfx.cs,
  63                                              resource->cs_buf, rusage)) {
  64                 if (usage & PIPE_TRANSFER_DONTBLOCK) {
  65                         ctx->rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC);
  66                         return NULL;
  67                 } else {
  68                         ctx->rings.gfx.flush(ctx, 0);
  69                 }
  70         }
  71         if (ctx->rings.dma.cs &&
  72             ctx->rings.dma.cs->cdw &&
  73             ctx->ws->cs_is_buffer_referenced(ctx->rings.dma.cs,
  74                                              resource->cs_buf, rusage)) {
  75                 if (usage & PIPE_TRANSFER_DONTBLOCK) {
  76                         ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC);
  77                         return NULL;
  78                 } else {
  79                         ctx->rings.dma.flush(ctx, 0);
  80                 }
  81         }
  82
  83         if (ctx->ws->buffer_is_busy(resource->buf, rusage)) {
  84                 if (usage & PIPE_TRANSFER_DONTBLOCK) {
  85                         return NULL;
  86                 } else {
  87                         /* We will be wait for the GPU. Wait for any offloaded
  88                          * CS flush to complete to avoid busy-waiting in the winsys. */
  89                         ctx->ws->cs_sync_flush(ctx->rings.gfx.cs);
  90                         if (ctx->rings.dma.cs)
  91                                 ctx->ws->cs_sync_flush(ctx->rings.dma.cs);
  92                 }
  93         }
  94
  95         return ctx->ws->buffer_map(resource->cs_buf, NULL, usage);
  96 }
  97
  98 bool r600_init_resource(struct r600_common_screen *rscreen,
  99                         struct r600_resource *res,
 100                         unsigned size, unsigned alignment,
 101                         bool use_reusable_pool, unsigned usage)
 102 {
 103         uint32_t initial_domain, domains;
 104
 105         switch(usage) {
 106         case PIPE_USAGE_STAGING:
 107                 /* Staging resources participate in transfers, i.e. are used
 108                  * for uploads and downloads from regular resources.
 109                  * We generate them internally for some transfers.
 110                  */
 111                 initial_domain = RADEON_DOMAIN_GTT;
 112                 domains = RADEON_DOMAIN_GTT;
 113                 break;
 114         case PIPE_USAGE_DYNAMIC:
 115         case PIPE_USAGE_STREAM:
 116                 /* Default to GTT, but allow the memory manager to move it to VRAM. */
 117                 initial_domain = RADEON_DOMAIN_GTT;
 118                 domains = RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM;
 119                 break;
 120         case PIPE_USAGE_DEFAULT:
 121         case PIPE_USAGE_STATIC:
 122         case PIPE_USAGE_IMMUTABLE:
 123         default:
 124                 /* Don't list GTT here, because the memory manager would put some
 125                  * resources to GTT no matter what the initial domain is.
 126                  * Not listing GTT in the domains improves performance a lot. */
 127                 initial_domain = RADEON_DOMAIN_VRAM;
 128                 domains = RADEON_DOMAIN_VRAM;
 129                 break;
 130         }
 131
 132         res->buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment,
 133                                               use_reusable_pool,
 134                                               initial_domain);
 135         if (!res->buf) {
 136                 return false;
 137         }
 138
 139         res->cs_buf = rscreen->ws->buffer_get_cs_handle(res->buf);
 140         res->domains = domains;
 141         util_range_set_empty(&res->valid_buffer_range);
 142
 143         if (rscreen->debug_flags & DBG_VM && res->b.b.target == PIPE_BUFFER) {
 144                 fprintf(stderr, "VM start=0x%"PRIu64"  end=0x%"PRIu64" | Buffer %u bytes\n",
 145                         r600_resource_va(&rscreen->b, &res->b.b),
 146                         r600_resource_va(&rscreen->b, &res->b.b) + res->buf->size,
 147                         res->buf->size);
 148         }
 149         return true;
 150 }
 151
 152 static void r600_buffer_destroy(struct pipe_screen *screen,
 153                                 struct pipe_resource *buf)
 154 {
 155         struct r600_resource *rbuffer = r600_resource(buf);
 156
 157         util_range_destroy(&rbuffer->valid_buffer_range);
 158         pb_reference(&rbuffer->buf, NULL);
 159         FREE(rbuffer);
 160 }
 161
 162 static void *r600_buffer_get_transfer(struct pipe_context *ctx,
 163                                       struct pipe_resource *resource,
 164                                       unsigned level,
 165                                       unsigned usage,
 166                                       const struct pipe_box *box,
 167                                       struct pipe_transfer **ptransfer,
 168                                       void *data, struct r600_resource *staging,
 169                                       unsigned offset)
 170 {
 171         struct r600_common_context *rctx = (struct r600_common_context*)ctx;
 172         struct r600_transfer *transfer = util_slab_alloc(&rctx->pool_transfers);
 173
 174         transfer->transfer.resource = resource;
 175         transfer->transfer.level = level;
 176         transfer->transfer.usage = usage;
 177         transfer->transfer.box = *box;
 178         transfer->transfer.stride = 0;
 179         transfer->transfer.layer_stride = 0;
 180         transfer->offset = offset;
 181         transfer->staging = staging;
 182         *ptransfer = &transfer->transfer;
 183         return data;
 184 }
 185
 186 static void *r600_buffer_transfer_map(struct pipe_context *ctx,
 187                                       struct pipe_resource *resource,
 188                                       unsigned level,
 189                                       unsigned usage,
 190                                       const struct pipe_box *box,
 191                                       struct pipe_transfer **ptransfer)
 192 {
 193         struct r600_common_context *rctx = (struct r600_common_context*)ctx;
 194         struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen;
 195         struct r600_resource *rbuffer = r600_resource(resource);
 196         uint8_t *data;
 197
 198         assert(box->x + box->width <= resource->width0);
 199
 200         /* See if the buffer range being mapped has never been initialized,
 201          * in which case it can be mapped unsynchronized. */
 202         if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
 203             usage & PIPE_TRANSFER_WRITE &&
 204             !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) {
 205                 usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
 206         }
 207
 208         if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE &&
 209             !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
 210                 assert(usage & PIPE_TRANSFER_WRITE);
 211
 212                 /* Check if mapping this buffer would cause waiting for the GPU. */
 213                 if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
 214                     rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
 215                         rctx->invalidate_buffer(&rctx->b, &rbuffer->b.b);
 216                 }
 217         }
 218         else if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
 219                  !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
 220                  !(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) &&
 221                  (rscreen->has_cp_dma ||
 222                   (rscreen->has_streamout &&
 223                    /* The buffer range must be aligned to 4 with streamout. */
 224                    box->x % 4 == 0 && box->width % 4 == 0))) {
 225                 assert(usage & PIPE_TRANSFER_WRITE);
 226
 227                 /* Check if mapping this buffer would cause waiting for the GPU. */
 228                 if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
 229                     rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
 230                         /* Do a wait-free write-only transfer using a temporary buffer. */
 231                         unsigned offset;
 232                         struct r600_resource *staging = NULL;
 233
 234                         u_upload_alloc(rctx->uploader, 0, box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT),
 235                                        &offset, (struct pipe_resource**)&staging, (void**)&data);
 236
 237                         if (staging) {
 238                                 data += box->x % R600_MAP_BUFFER_ALIGNMENT;
 239                                 return r600_buffer_get_transfer(ctx, resource, level, usage, box,
 240                                                                 ptransfer, data, staging, offset);
 241                         }
 242                 }
 243         }
 244
 245         data = r600_buffer_map_sync_with_rings(rctx, rbuffer, usage);
 246         if (!data) {
 247                 return NULL;
 248         }
 249         data += box->x;
 250
 251         return r600_buffer_get_transfer(ctx, resource, level, usage, box,
 252                                         ptransfer, data, NULL, 0);
 253 }
 254
 255 static void r600_buffer_transfer_unmap(struct pipe_context *ctx,
 256                                        struct pipe_transfer *transfer)
 257 {
 258         struct r600_common_context *rctx = (struct r600_common_context*)ctx;
 259         struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
 260         struct r600_resource *rbuffer = r600_resource(transfer->resource);
 261
 262         if (rtransfer->staging) {
 263                 struct pipe_resource *dst, *src;
 264                 unsigned soffset, doffset, size;
 265                 struct pipe_box box;
 266
 267                 dst = transfer->resource;
 268                 src = &rtransfer->staging->b.b;
 269                 size = transfer->box.width;
 270                 doffset = transfer->box.x;
 271                 soffset = rtransfer->offset + transfer->box.x % R600_MAP_BUFFER_ALIGNMENT;
 272
 273                 u_box_1d(soffset, size, &box);
 274
 275                 /* Copy the staging buffer into the original one. */
 276                 if (!(size % 4) && !(doffset % 4) && !(soffset % 4) &&
 277                     rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, 0, &box)) {
 278                         /* DONE. */
 279                 } else {
 280                         ctx->resource_copy_region(ctx, dst, 0, doffset, 0, 0, src, 0, &box);
 281                 }
 282                 pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL);
 283         }
 284
 285         if (transfer->usage & PIPE_TRANSFER_WRITE) {
 286                 util_range_add(&rbuffer->valid_buffer_range, transfer->box.x,
 287                                transfer->box.x + transfer->box.width);
 288         }
 289         util_slab_free(&rctx->pool_transfers, transfer);
 290 }
 291
 292 static const struct u_resource_vtbl r600_buffer_vtbl =
 293 {
 294         NULL,                           /* get_handle */
 295         r600_buffer_destroy,            /* resource_destroy */
 296         r600_buffer_transfer_map,       /* transfer_map */
 297         NULL,                           /* transfer_flush_region */
 298         r600_buffer_transfer_unmap,     /* transfer_unmap */
 299         NULL                            /* transfer_inline_write */
 300 };
 301
 302 struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
 303                                          const struct pipe_resource *templ,
 304                                          unsigned alignment)
 305 {
 306         struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
 307         struct r600_resource *rbuffer;
 308
 309         rbuffer = MALLOC_STRUCT(r600_resource);
 310
 311         rbuffer->b.b = *templ;
 312         pipe_reference_init(&rbuffer->b.b.reference, 1);
 313         rbuffer->b.b.screen = screen;
 314         rbuffer->b.vtbl = &r600_buffer_vtbl;
 315         util_range_init(&rbuffer->valid_buffer_range);
 316
 317         if (!r600_init_resource(rscreen, rbuffer, templ->width0, alignment, TRUE, templ->usage)) {
 318                 FREE(rbuffer);
 319                 return NULL;
 320         }
 321         return &rbuffer->b.b;
 322 }