*/
#include "r600_pipe.h"
#include "util/u_upload_mgr.h"
+#include "util/u_memory.h"
+#include "util/u_surface.h"
static void r600_buffer_destroy(struct pipe_screen *screen,
struct pipe_resource *buf)
{
- struct r600_screen *rscreen = (struct r600_screen*)screen;
struct r600_resource *rbuffer = r600_resource(buf);
+ util_range_destroy(&rbuffer->valid_buffer_range);
pb_reference(&rbuffer->buf, NULL);
- util_slab_free(&rscreen->pool_buffers, rbuffer);
-}
-
-static struct pipe_transfer *r600_get_transfer(struct pipe_context *ctx,
- struct pipe_resource *resource,
- unsigned level,
- unsigned usage,
- const struct pipe_box *box)
-{
- struct r600_context *rctx = (struct r600_context*)ctx;
- struct pipe_transfer *transfer = util_slab_alloc(&rctx->pool_transfers);
-
- transfer->resource = resource;
- transfer->level = level;
- transfer->usage = usage;
- transfer->box = *box;
- transfer->stride = 0;
- transfer->layer_stride = 0;
- transfer->data = NULL;
-
- /* Note strides are zero, this is ok for buffers, but not for
- * textures 2d & higher at least.
- */
- return transfer;
+ FREE(rbuffer);
}
static void r600_set_constants_dirty_if_bound(struct r600_context *rctx,
- struct r600_constbuf_state *state,
struct r600_resource *rbuffer)
{
- bool found = false;
- uint32_t mask = state->enabled_mask;
-
- while (mask) {
- unsigned i = u_bit_scan(&mask);
- if (state->cb[i].buffer == &rbuffer->b.b) {
- found = true;
- state->dirty_mask |= 1 << i;
+ unsigned shader;
+
+ for (shader = 0; shader < PIPE_SHADER_TYPES; shader++) {
+ struct r600_constbuf_state *state = &rctx->constbuf_state[shader];
+ bool found = false;
+ uint32_t mask = state->enabled_mask;
+
+ while (mask) {
+ unsigned i = u_bit_scan(&mask);
+ if (state->cb[i].buffer == &rbuffer->b.b) {
+ found = true;
+ state->dirty_mask |= 1 << i;
+ }
+ }
+ if (found) {
+ r600_constant_buffers_dirty(rctx, state);
}
- }
- if (found) {
- r600_constant_buffers_dirty(rctx, state);
}
}
-static void *r600_buffer_transfer_map(struct pipe_context *pipe,
- struct pipe_transfer *transfer)
+static void *r600_buffer_get_transfer(struct pipe_context *ctx,
+ struct pipe_resource *resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ struct pipe_transfer **ptransfer,
+ void *data, struct r600_resource *staging,
+ unsigned offset)
{
- struct r600_resource *rbuffer = r600_resource(transfer->resource);
- struct r600_context *rctx = (struct r600_context*)pipe;
+ struct r600_context *rctx = (struct r600_context*)ctx;
+ struct r600_transfer *transfer = util_slab_alloc(&rctx->pool_transfers);
+
+ transfer->transfer.resource = resource;
+ transfer->transfer.level = level;
+ transfer->transfer.usage = usage;
+ transfer->transfer.box = *box;
+ transfer->transfer.stride = 0;
+ transfer->transfer.layer_stride = 0;
+ transfer->offset = offset;
+ transfer->staging = staging;
+ *ptransfer = &transfer->transfer;
+ return data;
+}
+
+static void *r600_buffer_transfer_map(struct pipe_context *ctx,
+ struct pipe_resource *resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ struct pipe_transfer **ptransfer)
+{
+ struct r600_context *rctx = (struct r600_context*)ctx;
+ struct r600_resource *rbuffer = r600_resource(resource);
uint8_t *data;
- if (transfer->usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
- /* When mapping for read, we only need to check if the GPU is writing to it. */
- enum radeon_bo_usage rusage = transfer->usage & PIPE_TRANSFER_WRITE ?
- RADEON_USAGE_READWRITE : RADEON_USAGE_WRITE;
+ assert(box->x + box->width <= resource->width0);
+
+ /* See if the buffer range being mapped has never been initialized,
+ * in which case it can be mapped unsynchronized. */
+ if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
+ usage & PIPE_TRANSFER_WRITE &&
+ !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) {
+ usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
+ }
+
+ if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE &&
+ !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
+ assert(usage & PIPE_TRANSFER_WRITE);
/* Check if mapping this buffer would cause waiting for the GPU. */
- if (rctx->ws->cs_is_buffer_referenced(rctx->cs, rbuffer->cs_buf, rusage) ||
- rctx->ws->buffer_is_busy(rbuffer->buf, rusage)) {
- unsigned i;
+ if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
+ rctx->b.ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
+ unsigned i, mask;
/* Discard the buffer. */
pb_reference(&rbuffer->buf, NULL);
/* Create a new one in the same pipe_resource. */
/* XXX We probably want a different alignment for buffers and textures. */
r600_init_resource(rctx->screen, rbuffer, rbuffer->b.b.width0, 4096,
- rbuffer->b.b.bind, rbuffer->b.b.usage);
+ TRUE, rbuffer->b.b.usage);
/* We changed the buffer, now we need to bind it where the old one was bound. */
/* Vertex buffers. */
- for (i = 0; i < rctx->nr_vertex_buffers; i++) {
- if (rctx->vertex_buffer[i].buffer == &rbuffer->b.b) {
- r600_inval_vertex_cache(rctx);
- r600_atom_dirty(rctx, &rctx->vertex_buffer_state);
+ mask = rctx->vertex_buffer_state.enabled_mask;
+ while (mask) {
+ i = u_bit_scan(&mask);
+ if (rctx->vertex_buffer_state.vb[i].buffer == &rbuffer->b.b) {
+ rctx->vertex_buffer_state.dirty_mask |= 1 << i;
+ r600_vertex_buffers_dirty(rctx);
}
}
/* Streamout buffers. */
- for (i = 0; i < rctx->num_so_targets; i++) {
- if (rctx->so_targets[i]->b.buffer == &rbuffer->b.b) {
- r600_context_streamout_end(rctx);
- rctx->streamout_start = TRUE;
- rctx->streamout_append_bitmask = ~0;
+ for (i = 0; i < rctx->b.streamout.num_targets; i++) {
+ if (rctx->b.streamout.targets[i]->b.buffer == &rbuffer->b.b) {
+ if (rctx->b.streamout.begin_emitted) {
+ r600_emit_streamout_end(&rctx->b);
+ }
+ rctx->b.streamout.append_bitmask = rctx->b.streamout.enabled_mask;
+ r600_streamout_buffers_dirty(&rctx->b);
}
}
/* Constant buffers. */
- r600_set_constants_dirty_if_bound(rctx, &rctx->vs_constbuf_state, rbuffer);
- r600_set_constants_dirty_if_bound(rctx, &rctx->ps_constbuf_state, rbuffer);
+ r600_set_constants_dirty_if_bound(rctx, rbuffer);
}
}
+ else if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
+ !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
+ !(rctx->screen->debug_flags & DBG_NO_DISCARD_RANGE) &&
+ (rctx->screen->has_cp_dma ||
+ (rctx->screen->has_streamout &&
+ /* The buffer range must be aligned to 4 with streamout. */
+ box->x % 4 == 0 && box->width % 4 == 0))) {
+ assert(usage & PIPE_TRANSFER_WRITE);
- if (rbuffer->b.b.user_ptr)
- return rbuffer->b.b.user_ptr + transfer->box.x;
+ /* Check if mapping this buffer would cause waiting for the GPU. */
+ if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
+ rctx->b.ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
+ /* Do a wait-free write-only transfer using a temporary buffer. */
+ unsigned offset;
+ struct r600_resource *staging = NULL;
+
+ u_upload_alloc(rctx->uploader, 0, box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT),
+ &offset, (struct pipe_resource**)&staging, (void**)&data);
- data = rctx->ws->buffer_map(rbuffer->cs_buf, rctx->cs, transfer->usage);
- if (!data)
+ if (staging) {
+ data += box->x % R600_MAP_BUFFER_ALIGNMENT;
+ return r600_buffer_get_transfer(ctx, resource, level, usage, box,
+ ptransfer, data, staging, offset);
+ }
+ }
+ }
+
+ /* mmap and synchronize with rings */
+ data = r600_buffer_mmap_sync_with_rings(rctx, rbuffer, usage);
+ if (!data) {
return NULL;
+ }
+ data += box->x;
- return (uint8_t*)data + transfer->box.x;
+ return r600_buffer_get_transfer(ctx, resource, level, usage, box,
+ ptransfer, data, NULL, 0);
}
static void r600_buffer_transfer_unmap(struct pipe_context *pipe,
struct pipe_transfer *transfer)
{
- /* no-op */
-}
+ struct r600_context *rctx = (struct r600_context*)pipe;
+ struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
+ struct r600_resource *rbuffer = r600_resource(transfer->resource);
-static void r600_transfer_destroy(struct pipe_context *ctx,
- struct pipe_transfer *transfer)
-{
- struct r600_context *rctx = (struct r600_context*)ctx;
+ if (rtransfer->staging) {
+ struct pipe_resource *dst, *src;
+ unsigned soffset, doffset, size;
+
+ dst = transfer->resource;
+ src = &rtransfer->staging->b.b;
+ size = transfer->box.width;
+ doffset = transfer->box.x;
+ soffset = rtransfer->offset + transfer->box.x % R600_MAP_BUFFER_ALIGNMENT;
+ /* Copy the staging buffer into the original one. */
+ if (rctx->b.rings.dma.cs && !(size % 4) && !(doffset % 4) && !(soffset % 4)) {
+ if (rctx->screen->b.chip_class >= EVERGREEN) {
+ evergreen_dma_copy(rctx, dst, src, doffset, soffset, size);
+ } else {
+ r600_dma_copy(rctx, dst, src, doffset, soffset, size);
+ }
+ } else {
+ struct pipe_box box;
+
+ u_box_1d(soffset, size, &box);
+ r600_copy_buffer(pipe, dst, doffset, src, &box);
+ }
+ pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL);
+ }
+
+ if (transfer->usage & PIPE_TRANSFER_WRITE) {
+ util_range_add(&rbuffer->valid_buffer_range, transfer->box.x,
+ transfer->box.x + transfer->box.width);
+ }
util_slab_free(&rctx->pool_transfers, transfer);
}
{
u_default_resource_get_handle, /* get_handle */
r600_buffer_destroy, /* resource_destroy */
- r600_get_transfer, /* get_transfer */
- r600_transfer_destroy, /* transfer_destroy */
r600_buffer_transfer_map, /* transfer_map */
NULL, /* transfer_flush_region */
r600_buffer_transfer_unmap, /* transfer_unmap */
bool r600_init_resource(struct r600_screen *rscreen,
struct r600_resource *res,
unsigned size, unsigned alignment,
- unsigned bind, unsigned usage)
+ bool use_reusable_pool, unsigned usage)
{
uint32_t initial_domain, domains;
- /* Staging resources particpate in transfers and blits only
- * and are used for uploads and downloads from regular
- * resources. We generate them internally for some transfers.
- */
- if (usage == PIPE_USAGE_STAGING) {
+ switch(usage) {
+ case PIPE_USAGE_STAGING:
+ /* Staging resources participate in transfers, i.e. are used
+ * for uploads and downloads from regular resources.
+ * We generate them internally for some transfers.
+ */
+ initial_domain = RADEON_DOMAIN_GTT;
domains = RADEON_DOMAIN_GTT;
+ break;
+ case PIPE_USAGE_DYNAMIC:
+ case PIPE_USAGE_STREAM:
+ /* Default to GTT, but allow the memory manager to move it to VRAM. */
initial_domain = RADEON_DOMAIN_GTT;
- } else {
domains = RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM;
-
- switch(usage) {
- case PIPE_USAGE_DYNAMIC:
- case PIPE_USAGE_STREAM:
- case PIPE_USAGE_STAGING:
- initial_domain = RADEON_DOMAIN_GTT;
- break;
- case PIPE_USAGE_DEFAULT:
- case PIPE_USAGE_STATIC:
- case PIPE_USAGE_IMMUTABLE:
- default:
- initial_domain = RADEON_DOMAIN_VRAM;
- break;
- }
+ break;
+ case PIPE_USAGE_DEFAULT:
+ case PIPE_USAGE_STATIC:
+ case PIPE_USAGE_IMMUTABLE:
+ default:
+ /* Don't list GTT here, because the memory manager would put some
+ * resources to GTT no matter what the initial domain is.
+ * Not listing GTT in the domains improves performance a lot. */
+ initial_domain = RADEON_DOMAIN_VRAM;
+ domains = RADEON_DOMAIN_VRAM;
+ break;
}
- res->buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment, bind, initial_domain);
+ res->buf = rscreen->b.ws->buffer_create(rscreen->b.ws, size, alignment,
+ use_reusable_pool,
+ initial_domain);
if (!res->buf) {
return false;
}
- res->cs_buf = rscreen->ws->buffer_get_cs_handle(res->buf);
+ res->cs_buf = rscreen->b.ws->buffer_get_cs_handle(res->buf);
res->domains = domains;
+ util_range_set_empty(&res->valid_buffer_range);
+
+ if (rscreen->debug_flags & DBG_VM && res->b.b.target == PIPE_BUFFER) {
+ fprintf(stderr, "VM start=0x%llX end=0x%llX | Buffer %u bytes\n",
+ r600_resource_va(&rscreen->b.b, &res->b.b),
+ r600_resource_va(&rscreen->b.b, &res->b.b) + res->buf->size,
+ res->buf->size);
+ }
return true;
}
struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
- const struct pipe_resource *templ)
+ const struct pipe_resource *templ,
+ unsigned alignment)
{
struct r600_screen *rscreen = (struct r600_screen*)screen;
struct r600_resource *rbuffer;
- /* XXX We probably want a different alignment for buffers and textures. */
- unsigned alignment = 4096;
- rbuffer = util_slab_alloc(&rscreen->pool_buffers);
+ rbuffer = MALLOC_STRUCT(r600_resource);
rbuffer->b.b = *templ;
pipe_reference_init(&rbuffer->b.b.reference, 1);
rbuffer->b.b.screen = screen;
- rbuffer->b.b.user_ptr = NULL;
rbuffer->b.vtbl = &r600_buffer_vtbl;
+ util_range_init(&rbuffer->valid_buffer_range);
- if (!r600_init_resource(rscreen, rbuffer, templ->width0, alignment, templ->bind, templ->usage)) {
- util_slab_free(&rscreen->pool_buffers, rbuffer);
+ if (!r600_init_resource(rscreen, rbuffer, templ->width0, alignment, TRUE, templ->usage)) {
+ FREE(rbuffer);
return NULL;
}
return &rbuffer->b.b;
}
-
-struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen,
- void *ptr, unsigned bytes,
- unsigned bind)
-{
- struct r600_screen *rscreen = (struct r600_screen*)screen;
- struct r600_resource *rbuffer;
-
- rbuffer = util_slab_alloc(&rscreen->pool_buffers);
-
- pipe_reference_init(&rbuffer->b.b.reference, 1);
- rbuffer->b.vtbl = &r600_buffer_vtbl;
- rbuffer->b.b.screen = screen;
- rbuffer->b.b.target = PIPE_BUFFER;
- rbuffer->b.b.format = PIPE_FORMAT_R8_UNORM;
- rbuffer->b.b.usage = PIPE_USAGE_IMMUTABLE;
- rbuffer->b.b.bind = bind;
- rbuffer->b.b.width0 = bytes;
- rbuffer->b.b.height0 = 1;
- rbuffer->b.b.depth0 = 1;
- rbuffer->b.b.array_size = 1;
- rbuffer->b.b.flags = 0;
- rbuffer->b.b.user_ptr = ptr;
- rbuffer->buf = NULL;
- return &rbuffer->b.b;
-}