struct pb_buffer *buf;
struct radeon_winsys_cs_handle *cs_buf;
+ enum radeon_bo_domain domain;
+
uint32_t offset; /* COLOROFFSET or DEPTHOFFSET. */
uint32_t pitch; /* COLORPITCH or DEPTHPITCH. */
uint32_t pitch_zmask; /* ZMASK_PITCH */
/* Winsys buffer backing this resource. */
struct pb_buffer *buf;
struct radeon_winsys_cs_handle *cs_buf;
+ enum radeon_bo_domain domain;
/* Constant buffers are in user memory. */
uint8_t *constant_buffer;
tex = r300_resource(fb->cbufs[i]->texture);
assert(tex && tex->buf && "cbuf is marked, but NULL!");
r300->rws->cs_add_reloc(r300->cs, tex->cs_buf,
- RADEON_USAGE_READWRITE);
+ RADEON_USAGE_READWRITE,
+ r300_surface(fb->cbufs[i])->domain);
}
/* ...depth buffer... */
if (fb->zsbuf) {
tex = r300_resource(fb->zsbuf->texture);
assert(tex && tex->buf && "zsbuf is marked, but NULL!");
r300->rws->cs_add_reloc(r300->cs, tex->cs_buf,
- RADEON_USAGE_READWRITE);
+ RADEON_USAGE_READWRITE,
+ r300_surface(fb->zsbuf)->domain);
}
}
if (r300->textures_state.dirty) {
}
tex = r300_resource(texstate->sampler_views[i]->base.texture);
- r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, RADEON_USAGE_READ);
+ r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, RADEON_USAGE_READ,
+ tex->domain);
}
}
/* ...occlusion query buffer... */
if (r300->query_current)
r300->rws->cs_add_reloc(r300->cs, r300->query_current->cs_buf,
- RADEON_USAGE_WRITE);
+ RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT);
/* ...vertex buffer for SWTCL path... */
if (r300->vbo)
r300->rws->cs_add_reloc(r300->cs, r300_resource(r300->vbo)->cs_buf,
- RADEON_USAGE_READ);
+ RADEON_USAGE_READ,
+ r300_resource(r300->vbo)->domain);
/* ...vertex buffers for HWTCL path... */
if (do_validate_vertex_buffers && r300->vertex_arrays_dirty) {
struct pipe_vertex_buffer *vbuf = r300->vbuf_mgr->real_vertex_buffer;
continue;
r300->rws->cs_add_reloc(r300->cs, r300_resource(buf)->cs_buf,
- RADEON_USAGE_READ);
+ RADEON_USAGE_READ,
+ r300_resource(buf)->domain);
}
}
/* ...and index buffer for HWTCL path. */
if (index_buffer)
r300->rws->cs_add_reloc(r300->cs, r300_resource(index_buffer)->cs_buf,
- RADEON_USAGE_READ);
+ RADEON_USAGE_READ,
+ r300_resource(index_buffer)->domain);
/* Now do the validation (flush is called inside cs_validate on failure). */
if (!r300->rws->cs_validate(r300->cs)) {
/* Create a fence, which is a dummy BO. */
*rfence = r300->rws->buffer_create(r300->rws, 1, 1,
PIPE_BIND_CUSTOM,
- PIPE_USAGE_IMMUTABLE);
+ RADEON_DOMAIN_GTT);
/* Add the fence as a dummy relocation. */
r300->rws->cs_add_reloc(r300->cs,
r300->rws->buffer_get_cs_handle(*rfence),
- RADEON_USAGE_READWRITE);
+ RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT);
}
if (r300->dirty_hw) {
q->num_pipes = r300screen->info.r300_num_gb_pipes;
q->buf = r300->rws->buffer_create(r300->rws, 4096, 4096,
- PIPE_BIND_CUSTOM, PIPE_USAGE_STAGING);
+ PIPE_BIND_CUSTOM, RADEON_DOMAIN_GTT);
if (!q->buf) {
FREE(q);
return NULL;
pipe_reference_init(&rbuf->b.b.b.reference, 1);
rbuf->b.b.b.screen = screen;
rbuf->b.user_ptr = NULL;
+ rbuf->domain = RADEON_DOMAIN_GTT;
rbuf->buf = NULL;
rbuf->constant_buffer = NULL;
return &rbuf->b.b.b;
}
-#ifdef PIPE_ARCH_BIG_ENDIAN
- /* Force buffer placement to GTT on big endian machines, because
- * the vertex fetcher can't swap bytes from VRAM. */
- rbuf->b.b.b.usage = PIPE_USAGE_STAGING;
-#endif
-
rbuf->buf =
r300screen->rws->buffer_create(r300screen->rws,
rbuf->b.b.b.width0, alignment,
- rbuf->b.b.b.bind, rbuf->b.b.b.usage);
+ rbuf->b.b.b.bind, rbuf->domain);
if (!rbuf->buf) {
util_slab_free(&r300screen->pool_buffers, rbuf);
return NULL;
rbuf->b.b.b.flags = 0;
rbuf->b.b.vtbl = &r300_buffer_vtbl;
rbuf->b.user_ptr = ptr;
+ rbuf->domain = RADEON_DOMAIN_GTT;
rbuf->buf = NULL;
rbuf->constant_buffer = NULL;
return &rbuf->b.b.b;
tex->tex.microtile = microtile;
tex->tex.macrotile[0] = macrotile;
tex->tex.stride_in_bytes_override = stride_in_bytes_override;
+ tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ?
+ RADEON_DOMAIN_GTT :
+ RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT;
tex->buf = buffer;
r300_resource_set_properties(&rscreen->screen, &tex->b.b.b, base);
/* Create the backing buffer if needed. */
if (!tex->buf) {
tex->buf = rws->buffer_create(rws, tex->tex.size_in_bytes, 2048,
- base->bind, base->usage);
+ base->bind, tex->domain);
if (!tex->buf) {
FREE(tex);
surface->buf = tex->buf;
surface->cs_buf = tex->cs_buf;
+ /* Prefer VRAM if there are multiple domains to choose from. */
+ surface->domain = tex->domain;
+ if (surface->domain & RADEON_DOMAIN_VRAM)
+ surface->domain &= ~RADEON_DOMAIN_GTT;
+
surface->offset = r300_texture_get_offset(tex, level,
surf_tmpl->u.tex.first_layer);
r300_texture_setup_fb_state(surface);
/* Winsys objects. */
struct pb_buffer *buf;
struct radeon_winsys_cs_handle *cs_buf;
+
+ /* Resource state. */
+ unsigned domains;
};
/* R600/R700 STATES */
unsigned size, unsigned alignment,
unsigned bind, unsigned usage)
{
- res->buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment, bind, usage);
+ uint32_t initial_domain, domains;
+
+ /* Staging resources particpate in transfers and blits only
+ * and are used for uploads and downloads from regular
+ * resources. We generate them internally for some transfers.
+ */
+ if (usage == PIPE_USAGE_STAGING) {
+ domains = RADEON_DOMAIN_GTT;
+ initial_domain = RADEON_DOMAIN_GTT;
+ } else {
+ domains = RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM;
+
+ switch(usage) {
+ case PIPE_USAGE_DYNAMIC:
+ case PIPE_USAGE_STREAM:
+ case PIPE_USAGE_STAGING:
+ initial_domain = RADEON_DOMAIN_GTT;
+ break;
+ case PIPE_USAGE_DEFAULT:
+ case PIPE_USAGE_STATIC:
+ case PIPE_USAGE_IMMUTABLE:
+ default:
+ initial_domain = RADEON_DOMAIN_VRAM;
+ break;
+ }
+ }
+
+ res->buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment, bind, initial_domain);
if (!res->buf) {
return false;
}
res->cs_buf = rscreen->ws->buffer_get_cs_handle(res->buf);
+ res->domains = domains;
return true;
}
assert(usage);
- reloc_index = ctx->ws->cs_add_reloc(ctx->cs, rbo->cs_buf, usage);
+ reloc_index = ctx->ws->cs_add_reloc(ctx->cs, rbo->cs_buf, usage, rbo->domains);
if (reloc_index >= ctx->creloc)
ctx->creloc = reloc_index+1;
} else if (buf) {
resource->buf = buf;
resource->cs_buf = rscreen->ws->buffer_get_cs_handle(buf);
+ resource->domains = RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM;
}
if (rtex->stencil) {
pb_reference(&rtex->stencil->resource.buf, rtex->resource.buf);
rtex->stencil->resource.cs_buf = rtex->resource.cs_buf;
+ rtex->stencil->resource.domains = rtex->resource.domains;
}
return rtex;
}
memset(&args, 0, sizeof(args));
- assert(rdesc->initial_domains && rdesc->reloc_domains);
+ assert(rdesc->initial_domains);
assert((rdesc->initial_domains &
~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0);
- assert((rdesc->reloc_domains &
- ~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0);
args.size = size;
args.alignment = desc->alignment;
bo->mgr = mgr;
bo->rws = mgr->rws;
bo->handle = args.handle;
- bo->reloc_domains = rdesc->reloc_domains;
pipe_mutex_init(bo->map_mutex);
return &bo->base;
radeon_winsys_bo_create(struct radeon_winsys *rws,
unsigned size,
unsigned alignment,
- unsigned bind, unsigned usage)
+ unsigned bind,
+ enum radeon_bo_domain domain)
{
struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
struct radeon_bo_desc desc;
memset(&desc, 0, sizeof(desc));
desc.base.alignment = alignment;
- /* Determine the memory domains. */
- switch (usage) {
- case PIPE_USAGE_STAGING:
- case PIPE_USAGE_STREAM:
- case PIPE_USAGE_DYNAMIC:
- desc.initial_domains = RADEON_GEM_DOMAIN_GTT;
- desc.reloc_domains = RADEON_GEM_DOMAIN_GTT;
- break;
- case PIPE_USAGE_IMMUTABLE:
- case PIPE_USAGE_STATIC:
- desc.initial_domains = RADEON_GEM_DOMAIN_VRAM;
- desc.reloc_domains = RADEON_GEM_DOMAIN_VRAM;
- break;
- default:
- if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER |
- PIPE_BIND_CONSTANT_BUFFER)) {
- desc.initial_domains = RADEON_GEM_DOMAIN_GTT;
- } else {
- desc.initial_domains = RADEON_GEM_DOMAIN_VRAM;
- }
- desc.reloc_domains = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
- }
-
/* Additional criteria for the cache manager. */
- desc.base.usage = desc.initial_domains;
+ desc.base.usage = domain;
+ desc.initial_domains = domain;
/* Assign a buffer manager. */
if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER |
}
bo->handle = open_arg.handle;
bo->name = whandle->handle;
- bo->reloc_domains = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
/* Initialize it. */
pipe_reference_init(&bo->base.reference, 1);
struct pb_desc base;
unsigned initial_domains;
- unsigned reloc_domains;
};
struct radeon_bo {
void *ptr;
pipe_mutex map_mutex;
- uint32_t reloc_domains;
uint32_t handle;
uint32_t name;
#define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value)
static INLINE void update_reloc_domains(struct drm_radeon_cs_reloc *reloc,
- enum radeon_bo_usage usage,
- unsigned domains)
+ enum radeon_bo_domain rd,
+ enum radeon_bo_domain wd,
+ enum radeon_bo_domain *added_domains)
{
- if (usage & RADEON_USAGE_READ)
- reloc->read_domains |= domains;
- if (usage & RADEON_USAGE_WRITE)
- reloc->write_domain |= domains;
+ *added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
+
+ reloc->read_domains |= rd;
+ reloc->write_domain |= wd;
}
int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo)
if (reloc->handle == bo->handle) {
/* Put this reloc in the hash list.
* This will prevent additional hash collisions if there are
- * several subsequent get_reloc calls of the same buffer.
+ * several consecutive get_reloc calls for the same buffer.
*
* Example: Assuming buffers A,B,C collide in the hash list,
* the following sequence of relocs:
static unsigned radeon_add_reloc(struct radeon_cs_context *csc,
struct radeon_bo *bo,
enum radeon_bo_usage usage,
- unsigned *added_domains)
+ enum radeon_bo_domain domains,
+ enum radeon_bo_domain *added_domains)
{
struct drm_radeon_cs_reloc *reloc;
unsigned i;
unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
+ enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
+ enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
if (csc->is_handle_added[hash]) {
reloc = csc->relocs_hashlist[hash];
if (reloc->handle == bo->handle) {
- update_reloc_domains(reloc, usage, bo->reloc_domains);
+ update_reloc_domains(reloc, rd, wd, added_domains);
return csc->reloc_indices_hashlist[hash];
}
--i;
reloc = &csc->relocs[i];
if (reloc->handle == bo->handle) {
- update_reloc_domains(reloc, usage, bo->reloc_domains);
+ update_reloc_domains(reloc, rd, wd, added_domains);
csc->relocs_hashlist[hash] = reloc;
csc->reloc_indices_hashlist[hash] = i;
p_atomic_inc(&bo->num_cs_references);
reloc = &csc->relocs[csc->crelocs];
reloc->handle = bo->handle;
- if (usage & RADEON_USAGE_READ)
- reloc->read_domains = bo->reloc_domains;
- if (usage & RADEON_USAGE_WRITE)
- reloc->write_domain = bo->reloc_domains;
+ reloc->read_domains = rd;
+ reloc->write_domain = wd;
reloc->flags = 0;
csc->is_handle_added[hash] = TRUE;
csc->chunks[1].length_dw += RELOC_DWORDS;
- *added_domains = bo->reloc_domains;
+ *added_domains = rd | wd;
return csc->crelocs++;
}
static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
struct radeon_winsys_cs_handle *buf,
- enum radeon_bo_usage usage)
+ enum radeon_bo_usage usage,
+ enum radeon_bo_domain domains)
{
struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
struct radeon_bo *bo = (struct radeon_bo*)buf;
- unsigned added_domains = 0;
+ enum radeon_bo_domain added_domains;
- unsigned index = radeon_add_reloc(cs->csc, bo, usage, &added_domains);
+ unsigned index = radeon_add_reloc(cs->csc, bo, usage, domains, &added_domains);
- if (added_domains & RADEON_GEM_DOMAIN_GTT)
+ if (added_domains & RADEON_DOMAIN_GTT)
cs->csc->used_gart += bo->base.size;
- if (added_domains & RADEON_GEM_DOMAIN_VRAM)
+ if (added_domains & RADEON_DOMAIN_VRAM)
cs->csc->used_vram += bo->base.size;
return index;
RADEON_LAYOUT_UNKNOWN
};
+enum radeon_bo_domain { /* bitfield */
+ RADEON_DOMAIN_GTT = 2,
+ RADEON_DOMAIN_VRAM = 4
+};
+
enum radeon_bo_usage { /* bitfield */
RADEON_USAGE_READ = 2,
RADEON_USAGE_WRITE = 4,
* \param size The size to allocate.
* \param alignment An alignment of the buffer in memory.
* \param bind A bitmask of the PIPE_BIND_* flags.
- * \param usage A bitmask of the PIPE_USAGE_* flags.
+ * \param domain A bitmask of the RADEON_DOMAIN_* flags.
* \return The created buffer object.
*/
struct pb_buffer *(*buffer_create)(struct radeon_winsys *ws,
unsigned size,
unsigned alignment,
- unsigned bind, unsigned usage);
+ unsigned bind,
+ enum radeon_bo_domain domain);
struct radeon_winsys_cs_handle *(*buffer_get_cs_handle)(
struct pb_buffer *buf);
*
* \param cs A command stream to add buffer for validation against.
* \param buf A winsys buffer to validate.
- * \param usage Whether the buffer is used for read and/or write.
+ * \param usage Whether the buffer is used for read and/or write.
+ * \param domain Bitmask of the RADEON_DOMAIN_* flags.
* \return Relocation index.
*/
unsigned (*cs_add_reloc)(struct radeon_winsys_cs *cs,
struct radeon_winsys_cs_handle *buf,
- enum radeon_bo_usage usage);
+ enum radeon_bo_usage usage,
+ enum radeon_bo_domain domain);
/**
* Return TRUE if there is enough memory in VRAM and GTT for the relocs