/* Upload code + ROdata */
shader->code_bo = r600_compute_buffer_alloc_vram(rctx->screen,
shader->bc.ndw * 4);
- p = r600_buffer_map_sync_with_rings(&rctx->b, shader->code_bo, PIPE_TRANSFER_WRITE);
+ p = r600_buffer_map_sync_with_rings(
+ &rctx->b, shader->code_bo,
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
//TODO: use util_memcpy_cpu_to_le32 ?
memcpy(p, shader->bc.bytecode, shader->bc.ndw * 4);
rctx->b.ws->buffer_unmap(shader->code_bo->buf);
return NULL;
}
- bytecode = r600_buffer_map_sync_with_rings(&rctx->b, shader->buffer, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED);
+ bytecode = r600_buffer_map_sync_with_rings
+ (&rctx->b, shader->buffer,
+ PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED | RADEON_TRANSFER_TEMPORARY);
bytecode += shader->offset / 4;
if (R600_BIG_ENDIAN) {
if (shader->bo == NULL) {
return -ENOMEM;
}
- ptr = r600_buffer_map_sync_with_rings(&rctx->b, shader->bo, PIPE_TRANSFER_WRITE);
+ ptr = r600_buffer_map_sync_with_rings(
+ &rctx->b, shader->bo,
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
if (R600_BIG_ENDIAN) {
for (i = 0; i < shader->shader.bc.ndw; ++i) {
ptr[i] = util_cpu_to_le32(shader->shader.bc.bytecode[i]);
buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
/* and map it for CPU access */
- ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE);
+ ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
/* calc buffer offsets */
dec->msg = (struct ruvd_msg *)ptr;
dec->bs_size = 0;
dec->bs_ptr = dec->ws->buffer_map(
dec->bs_buffers[dec->cur_buffer].res->buf,
- dec->cs, PIPE_TRANSFER_WRITE);
+ dec->cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
}
/**
}
dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
- PIPE_TRANSFER_WRITE);
+ PIPE_TRANSFER_WRITE |
+ RADEON_TRANSFER_TEMPORARY);
if (!dec->bs_ptr)
return;
struct rvid_buffer *fb = feedback;
if (size) {
- uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE);
+ uint32_t *ptr = enc->ws->buffer_map(
+ fb->res->buf, enc->cs,
+ PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY);
if (ptr[1]) {
*size = ptr[4] - ptr[9];
if (!rvid_create_buffer(screen, new_buf, new_size, new_buf->usage))
goto error;
- src = ws->buffer_map(old_buf.res->buf, cs, PIPE_TRANSFER_READ);
+ src = ws->buffer_map(old_buf.res->buf, cs,
+ PIPE_TRANSFER_READ | RADEON_TRANSFER_TEMPORARY);
if (!src)
goto error;
- dst = ws->buffer_map(new_buf->res->buf, cs, PIPE_TRANSFER_WRITE);
+ dst = ws->buffer_map(new_buf->res->buf, cs,
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
if (!dst)
goto error;
buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
/* and map it for CPU access */
- ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE);
+ ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
/* calc buffer offsets */
dec->msg = (struct ruvd_msg *)ptr;
dec->bs_size = 0;
dec->bs_ptr = dec->ws->buffer_map(
dec->bs_buffers[dec->cur_buffer].res->buf,
- dec->cs, PIPE_TRANSFER_WRITE);
+ dec->cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
}
/**
return;
}
- dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
- PIPE_TRANSFER_WRITE);
+ dec->bs_ptr = dec->ws->buffer_map(
+ buf->res->buf, dec->cs,
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
if (!dec->bs_ptr)
return;
if (NULL != size) {
radeon_uvd_enc_feedback_t *fb_data =
- (radeon_uvd_enc_feedback_t *) enc->ws->buffer_map(fb->res->buf,
- enc->cs,
- PIPE_TRANSFER_READ_WRITE);
+ (radeon_uvd_enc_feedback_t *) enc->ws->buffer_map(
+ fb->res->buf, enc->cs,
+ PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY);
if (!fb_data->status)
*size = fb_data->bitstream_size;
struct rvid_buffer *fb = feedback;
if (size) {
- uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE);
+ uint32_t *ptr = enc->ws->buffer_map(
+ fb->res->buf, enc->cs,
+ PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY);
if (ptr[1]) {
*size = ptr[4] - ptr[9];
si_vid_clear_buffer(dec->base.context, &dec->ctx);
/* ctx needs probs table */
- ptr = dec->ws->buffer_map(dec->ctx.res->buf, dec->cs, PIPE_TRANSFER_WRITE);
+ ptr = dec->ws->buffer_map(
+ dec->ctx.res->buf, dec->cs,
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
fill_probs_table(ptr);
dec->ws->buffer_unmap(dec->ctx.res->buf);
}
buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer];
/* and map it for CPU access */
- ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE);
+ ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
/* calc buffer offsets */
dec->msg = ptr;
dec->bs_size = 0;
dec->bs_ptr = dec->ws->buffer_map(
dec->bs_buffers[dec->cur_buffer].res->buf,
- dec->cs, PIPE_TRANSFER_WRITE);
+ dec->cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
}
/**
return;
}
- dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
- PIPE_TRANSFER_WRITE);
+ dec->bs_ptr = dec->ws->buffer_map(
+ buf->res->buf, dec->cs,
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
if (!dec->bs_ptr)
return;
void *ptr;
buf = &dec->msg_fb_it_probs_buffers[i];
- ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE);
+ ptr = dec->ws->buffer_map(
+ buf->res->buf, dec->cs,
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
ptr += FB_BUFFER_OFFSET + FB_BUFFER_SIZE;
fill_probs_table(ptr);
dec->ws->buffer_unmap(buf->res->buf);
struct rvid_buffer *fb = feedback;
if (size) {
- uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE);
+ uint32_t *ptr = enc->ws->buffer_map(
+ fb->res->buf, enc->cs,
+ PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY);
if (ptr[1])
*size = ptr[6];
else
if (!si_vid_create_buffer(screen, new_buf, new_size, new_buf->usage))
goto error;
- src = ws->buffer_map(old_buf.res->buf, cs, PIPE_TRANSFER_READ);
+ src = ws->buffer_map(old_buf.res->buf, cs,
+ PIPE_TRANSFER_READ | RADEON_TRANSFER_TEMPORARY);
if (!src)
goto error;
- dst = ws->buffer_map(new_buf->res->buf, cs, PIPE_TRANSFER_WRITE);
+ dst = ws->buffer_map(new_buf->res->buf, cs,
+ PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
if (!dst)
goto error;
RADEON_USAGE_SYNCHRONIZED = 8
};
+enum radeon_transfer_flags {
+ /* Indicates that the caller will unmap the buffer.
+ *
+ * Not unmapping buffers is an important performance optimization for
+ * OpenGL (avoids kernel overhead for frequently mapped buffers).
+ */
+ RADEON_TRANSFER_TEMPORARY = (PIPE_TRANSFER_DRV_PRV << 0),
+};
+
#define RADEON_SPARSE_PAGE_SIZE (64 * 1024)
enum ring_type {
* Map the entire data store of a buffer object into the client's address
* space.
*
+ * Callers are expected to unmap buffers again if and only if the
+ * RADEON_TRANSFER_TEMPORARY flag is set in \p usage.
+ *
* \param buf A winsys buffer object to map.
* \param cs A command stream to flush if the buffer is referenced by it.
- * \param usage A bitmask of the PIPE_TRANSFER_* flags.
+ * \param usage A bitmask of the PIPE_TRANSFER_* and RADEON_TRANSFER_* flags.
* \return The pointer at the beginning of the buffer.
*/
void *(*buffer_map)(struct pb_buffer *buf,
/* Upload. */
ptr = sscreen->ws->buffer_map(shader->bo->buf, NULL,
PIPE_TRANSFER_READ_WRITE |
- PIPE_TRANSFER_UNSYNCHRONIZED);
+ PIPE_TRANSFER_UNSYNCHRONIZED |
+ RADEON_TRANSFER_TEMPORARY);
/* Don't use util_memcpy_cpu_to_le32. LLVM binaries are
* endian-independent. */
* PIPE_RESOURCE_FLAG_MAP_COHERENT must be set when creating
* the resource.
*/
- PIPE_TRANSFER_COHERENT = (1 << 14)
+ PIPE_TRANSFER_COHERENT = (1 << 14),
+
+ /**
+ * This and higher bits are reserved for private use by drivers. Drivers
+ * should use this as (PIPE_TRANSFER_DRV_PRV << i).
+ */
+ PIPE_TRANSFER_DRV_PRV = (1 << 24)
};
/**
unsigned alignment,
enum radeon_bo_domain domain,
enum radeon_bo_flag flags);
+static void amdgpu_bo_unmap(struct pb_buffer *buf);
static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
enum radeon_bo_usage usage)
assert(bo->bo && "must not be called for slab entries");
+ if (!bo->is_user_ptr && bo->cpu_ptr) {
+ bo->cpu_ptr = NULL;
+ amdgpu_bo_unmap(&bo->base);
+ }
+ assert(bo->is_user_ptr || bo->u.real.map_count == 0);
+
if (ws->debug_all_bos) {
simple_mtx_lock(&ws->global_bo_list_lock);
LIST_DEL(&bo->u.real.global_list_item);
else if (bo->initial_domain & RADEON_DOMAIN_GTT)
ws->allocated_gtt -= align64(bo->base.size, ws->info.gart_page_size);
- if (bo->u.real.map_count >= 1) {
- if (bo->initial_domain & RADEON_DOMAIN_VRAM)
- ws->mapped_vram -= bo->base.size;
- else if (bo->initial_domain & RADEON_DOMAIN_GTT)
- ws->mapped_gtt -= bo->base.size;
- ws->num_mapped_buffers--;
- }
-
simple_mtx_destroy(&bo->lock);
FREE(bo);
}
amdgpu_bo_destroy(_buf);
}
+static bool amdgpu_bo_do_map(struct amdgpu_winsys_bo *bo, void **cpu)
+{
+ assert(!bo->sparse && bo->bo && !bo->is_user_ptr);
+ int r = amdgpu_bo_cpu_map(bo->bo, cpu);
+ if (r) {
+ /* Clear the cache and try again. */
+ pb_cache_release_all_buffers(&bo->ws->bo_cache);
+ r = amdgpu_bo_cpu_map(bo->bo, cpu);
+ if (r)
+ return false;
+ }
+
+ if (p_atomic_inc_return(&bo->u.real.map_count) == 1) {
+ if (bo->initial_domain & RADEON_DOMAIN_VRAM)
+ bo->ws->mapped_vram += bo->base.size;
+ else if (bo->initial_domain & RADEON_DOMAIN_GTT)
+ bo->ws->mapped_gtt += bo->base.size;
+ bo->ws->num_mapped_buffers++;
+ }
+
+ return true;
+}
+
static void *amdgpu_bo_map(struct pb_buffer *buf,
struct radeon_cmdbuf *rcs,
enum pipe_transfer_usage usage)
struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
struct amdgpu_winsys_bo *real;
struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs;
- int r;
- void *cpu = NULL;
- uint64_t offset = 0;
assert(!bo->sparse);
}
}
- /* If the buffer is created from user memory, return the user pointer. */
- if (bo->user_ptr)
- return bo->user_ptr;
+ /* Buffer synchronization has been checked, now actually map the buffer. */
+ void *cpu = NULL;
+ uint64_t offset = 0;
if (bo->bo) {
real = bo;
offset = bo->va - real->va;
}
- r = amdgpu_bo_cpu_map(real->bo, &cpu);
- if (r) {
- /* Clear the cache and try again. */
- pb_cache_release_all_buffers(&real->ws->bo_cache);
- r = amdgpu_bo_cpu_map(real->bo, &cpu);
- if (r)
- return NULL;
+ if (usage & RADEON_TRANSFER_TEMPORARY) {
+ if (real->is_user_ptr) {
+ cpu = real->cpu_ptr;
+ } else {
+ if (!amdgpu_bo_do_map(real, &cpu))
+ return NULL;
+ }
+ } else {
+ cpu = p_atomic_read(&real->cpu_ptr);
+ if (!cpu) {
+ simple_mtx_lock(&real->lock);
+ /* Must re-check due to the possibility of a race. Re-check need not
+ * be atomic thanks to the lock. */
+ cpu = real->cpu_ptr;
+ if (!cpu) {
+ if (!amdgpu_bo_do_map(real, &cpu)) {
+ simple_mtx_unlock(&real->lock);
+ return NULL;
+ }
+ p_atomic_set(&real->cpu_ptr, cpu);
+ }
+ simple_mtx_unlock(&real->lock);
+ }
}
- if (p_atomic_inc_return(&real->u.real.map_count) == 1) {
- if (real->initial_domain & RADEON_DOMAIN_VRAM)
- real->ws->mapped_vram += real->base.size;
- else if (real->initial_domain & RADEON_DOMAIN_GTT)
- real->ws->mapped_gtt += real->base.size;
- real->ws->num_mapped_buffers++;
- }
return (uint8_t*)cpu + offset;
}
assert(!bo->sparse);
- if (bo->user_ptr)
+ if (bo->is_user_ptr)
return;
real = bo->bo ? bo : bo->u.slab.real;
-
+ assert(real->u.real.map_count != 0 && "too many unmaps");
if (p_atomic_dec_zero(&real->u.real.map_count)) {
+ assert(!real->cpu_ptr &&
+ "too many unmaps or forgot RADEON_TRANSFER_TEMPORARY flag");
+
if (real->initial_domain & RADEON_DOMAIN_VRAM)
real->ws->mapped_vram -= real->base.size;
else if (real->initial_domain & RADEON_DOMAIN_GTT)
goto error_va_map;
/* Initialize it. */
+ bo->is_user_ptr = true;
pipe_reference_init(&bo->base.reference, 1);
simple_mtx_init(&bo->lock, mtx_plain);
bo->bo = buf_handle;
bo->base.size = size;
bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
bo->ws = ws;
- bo->user_ptr = pointer;
+ bo->cpu_ptr = pointer;
bo->va = va;
bo->u.real.va_handle = va_handle;
bo->initial_domain = RADEON_DOMAIN_GTT;
static bool amdgpu_bo_is_user_ptr(struct pb_buffer *buf)
{
- return ((struct amdgpu_winsys_bo*)buf)->user_ptr != NULL;
+ return ((struct amdgpu_winsys_bo*)buf)->is_user_ptr;
}
static bool amdgpu_bo_is_suballocated(struct pb_buffer *buf)
} u;
struct amdgpu_winsys *ws;
- void *user_ptr; /* from buffer_from_ptr */
+ void *cpu_ptr; /* for user_ptr and permanent maps */
amdgpu_bo_handle bo; /* NULL for slab entries and sparse buffers */
bool sparse;
+ bool is_user_ptr;
bool is_local;
uint32_t unique_id;
uint64_t va;