/**
- * Writing relocations.
+ * Writing buffers.
*/
#define OUT_CS_RELOC(r) do { \
assert((r)); \
assert((r)->cs_buf); \
OUT_CS(0xc0001000); /* PKT3_NOP */ \
- OUT_CS(cs_winsys->cs_get_reloc(cs_copy, (r)->cs_buf) * 4); \
+ OUT_CS(cs_winsys->cs_lookup_buffer(cs_copy, (r)->cs_buf) * 4); \
} while (0)
assert(r300->vbo_cs);
OUT_CS(0xc0001000); /* PKT3_NOP */
- OUT_CS(r300->rws->cs_get_reloc(r300->cs, r300->vbo_cs) * 4);
+ OUT_CS(r300->rws->cs_lookup_buffer(r300->cs, r300->vbo_cs) * 4);
END_CS;
}
continue;
tex = r300_resource(fb->cbufs[i]->texture);
assert(tex && tex->buf && "cbuf is marked, but NULL!");
- r300->rws->cs_add_reloc(r300->cs, tex->cs_buf,
+ r300->rws->cs_add_buffer(r300->cs, tex->cs_buf,
RADEON_USAGE_READWRITE,
r300_surface(fb->cbufs[i])->domain,
tex->b.b.nr_samples > 1 ?
if (fb->zsbuf) {
tex = r300_resource(fb->zsbuf->texture);
assert(tex && tex->buf && "zsbuf is marked, but NULL!");
- r300->rws->cs_add_reloc(r300->cs, tex->cs_buf,
+ r300->rws->cs_add_buffer(r300->cs, tex->cs_buf,
RADEON_USAGE_READWRITE,
r300_surface(fb->zsbuf)->domain,
tex->b.b.nr_samples > 1 ?
/* The AA resolve buffer. */
if (r300->aa_state.dirty) {
if (aa->dest) {
- r300->rws->cs_add_reloc(r300->cs, aa->dest->cs_buf,
+ r300->rws->cs_add_buffer(r300->cs, aa->dest->cs_buf,
RADEON_USAGE_WRITE,
aa->dest->domain,
RADEON_PRIO_COLOR_BUFFER);
}
tex = r300_resource(texstate->sampler_views[i]->base.texture);
- r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, RADEON_USAGE_READ,
+ r300->rws->cs_add_buffer(r300->cs, tex->cs_buf, RADEON_USAGE_READ,
tex->domain, RADEON_PRIO_SAMPLER_TEXTURE);
}
}
/* ...occlusion query buffer... */
if (r300->query_current)
- r300->rws->cs_add_reloc(r300->cs, r300->query_current->cs_buf,
+ r300->rws->cs_add_buffer(r300->cs, r300->query_current->cs_buf,
RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT,
RADEON_PRIO_QUERY);
/* ...vertex buffer for SWTCL path... */
if (r300->vbo_cs)
- r300->rws->cs_add_reloc(r300->cs, r300->vbo_cs,
+ r300->rws->cs_add_buffer(r300->cs, r300->vbo_cs,
RADEON_USAGE_READ, RADEON_DOMAIN_GTT,
RADEON_PRIO_VERTEX_BUFFER);
/* ...vertex buffers for HWTCL path... */
if (!buf)
continue;
- r300->rws->cs_add_reloc(r300->cs, r300_resource(buf)->cs_buf,
+ r300->rws->cs_add_buffer(r300->cs, r300_resource(buf)->cs_buf,
RADEON_USAGE_READ,
r300_resource(buf)->domain,
RADEON_PRIO_SAMPLER_BUFFER);
}
/* ...and index buffer for HWTCL path. */
if (index_buffer)
- r300->rws->cs_add_reloc(r300->cs, r300_resource(index_buffer)->cs_buf,
+ r300->rws->cs_add_buffer(r300->cs, r300_resource(index_buffer)->cs_buf,
RADEON_USAGE_READ,
r300_resource(index_buffer)->domain,
RADEON_PRIO_INDEX_BUFFER);
rctx->rings.gfx.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
}
}
- return rctx->ws->cs_add_reloc(ring->cs, rbo->cs_buf, usage,
+ return rctx->ws->cs_add_buffer(ring->cs, rbo->cs_buf, usage,
rbo->domains, priority) * 4;
}
{
int reloc_idx;
- reloc_idx = dec->ws->cs_add_reloc(dec->cs, cs_buf, usage, domain,
+ reloc_idx = dec->ws->cs_add_buffer(dec->cs, cs_buf, usage, domain,
RADEON_PRIO_UVD);
if (!dec->use_legacy) {
uint64_t addr;
{
int reloc_idx;
- reloc_idx = enc->ws->cs_add_reloc(enc->cs, buf, usage, domain, RADEON_PRIO_VCE);
+ reloc_idx = enc->ws->cs_add_buffer(enc->cs, buf, usage, domain, RADEON_PRIO_VCE);
if (enc->use_vm) {
uint64_t addr;
addr = enc->ws->buffer_get_virtual_address(buf);
void (*cs_destroy)(struct radeon_winsys_cs *cs);
/**
- * Add a new buffer relocation. Every relocation must first be added
- * before it can be written.
+ * Add a buffer. Each buffer used by a CS must be added using this function.
*
- * \param cs A command stream to add buffer for validation against.
- * \param buf A winsys buffer to validate.
+ * \param cs Command stream
+ * \param buf Buffer
* \param usage Whether the buffer is used for read and/or write.
* \param domain Bitmask of the RADEON_DOMAIN_* flags.
* \param priority A higher number means a greater chance of being
* placed in the requested domain. 15 is the maximum.
- * \return Relocation index.
+ * \return Buffer index.
*/
- unsigned (*cs_add_reloc)(struct radeon_winsys_cs *cs,
+ unsigned (*cs_add_buffer)(struct radeon_winsys_cs *cs,
struct radeon_winsys_cs_handle *buf,
enum radeon_bo_usage usage,
enum radeon_bo_domain domain,
* \param buf Buffer
* \return The buffer index, or -1 if the buffer has not been added.
*/
- int (*cs_get_reloc)(struct radeon_winsys_cs *cs,
- struct radeon_winsys_cs_handle *buf);
+ int (*cs_lookup_buffer)(struct radeon_winsys_cs *cs,
+ struct radeon_winsys_cs_handle *buf);
/**
- * Return TRUE if there is enough memory in VRAM and GTT for the relocs
- * added so far. If the validation fails, all the relocations which have
+ * Return TRUE if there is enough memory in VRAM and GTT for the buffers
+ * added so far. If the validation fails, all buffers which have
* been added since the last call of cs_validate will be removed and
- * the CS will be flushed (provided there are still any relocations).
+ * the CS will be flushed (provided there are still any buffers).
*
* \param cs A command stream to validate.
*/
boolean (*cs_validate)(struct radeon_winsys_cs *cs);
/**
- * Return TRUE if there is enough memory in VRAM and GTT for the relocs
+ * Return TRUE if there is enough memory in VRAM and GTT for the buffers
* added so far.
*
* \param cs A command stream to validate.
{
uint64_t mask = views->desc.enabled_mask;
- /* Add relocations to the CS. */
+ /* Add buffers to the CS. */
while (mask) {
int i = u_bit_scan64(&mask);
struct si_sampler_view *rview =
{
uint64_t mask = buffers->desc.enabled_mask;
- /* Add relocations to the CS. */
+ /* Add buffers to the CS. */
while (mask) {
int i = u_bit_scan64(&mask);
struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
/* There are two memory usage counters in the winsys for all buffers
- * that have been added (cs_add_reloc) and two counters in the pipe
+ * that have been added (cs_add_buffer) and two counters in the pipe
* driver for those that haven't been added yet.
*/
if (unlikely(!ctx->b.ws->cs_memory_below_limit(ctx->b.rings.gfx.cs,
#define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value)
-int amdgpu_get_reloc(struct amdgpu_cs *cs, struct amdgpu_winsys_bo *bo)
+int amdgpu_lookup_buffer(struct amdgpu_cs *cs, struct amdgpu_winsys_bo *bo)
{
unsigned hash = bo->unique_id & (Elements(cs->buffer_indices_hashlist)-1);
int i = cs->buffer_indices_hashlist[hash];
if (i == -1 || cs->buffers[i].bo == bo)
return i;
- /* Hash collision, look for the BO in the list of relocs linearly. */
+ /* Hash collision, look for the BO in the list of buffers linearly. */
for (i = cs->num_buffers - 1; i >= 0; i--) {
if (cs->buffers[i].bo == bo) {
- /* Put this reloc in the hash list.
+ /* Put this buffer in the hash list.
* This will prevent additional hash collisions if there are
- * several consecutive get_reloc calls for the same buffer.
+ * several consecutive lookup_buffer calls for the same buffer.
*
* Example: Assuming buffers A,B,C collide in the hash list,
- * the following sequence of relocs:
+ * the following sequence of buffers:
* AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
* will collide here: ^ and here: ^,
* meaning that we should get very few collisions in the end. */
return -1;
}
-static unsigned amdgpu_add_reloc(struct amdgpu_cs *cs,
+static unsigned amdgpu_add_buffer(struct amdgpu_cs *cs,
struct amdgpu_winsys_bo *bo,
enum radeon_bo_usage usage,
enum radeon_bo_domain domains,
unsigned priority,
enum radeon_bo_domain *added_domains)
{
- struct amdgpu_cs_buffer *reloc;
+ struct amdgpu_cs_buffer *buffer;
unsigned hash = bo->unique_id & (Elements(cs->buffer_indices_hashlist)-1);
int i = -1;
assert(priority < 64);
*added_domains = 0;
- i = amdgpu_get_reloc(cs, bo);
+ i = amdgpu_lookup_buffer(cs, bo);
if (i >= 0) {
- reloc = &cs->buffers[i];
- reloc->usage |= usage;
- *added_domains = domains & ~reloc->domains;
- reloc->domains |= domains;
+ buffer = &cs->buffers[i];
+ buffer->usage |= usage;
+ *added_domains = domains & ~buffer->domains;
+ buffer->domains |= domains;
cs->flags[i] = MAX2(cs->flags[i], priority / 4);
return i;
}
- /* New relocation, check if the backing array is large enough. */
+ /* New buffer, check if the backing array is large enough. */
if (cs->num_buffers >= cs->max_num_buffers) {
uint32_t size;
cs->max_num_buffers += 10;
cs->flags = realloc(cs->flags, cs->max_num_buffers);
}
- /* Initialize the new relocation. */
+ /* Initialize the new buffer. */
cs->buffers[cs->num_buffers].bo = NULL;
amdgpu_winsys_bo_reference(&cs->buffers[cs->num_buffers].bo, bo);
cs->handles[cs->num_buffers] = bo->bo;
cs->flags[cs->num_buffers] = priority / 4;
p_atomic_inc(&bo->num_cs_references);
- reloc = &cs->buffers[cs->num_buffers];
- reloc->bo = bo;
- reloc->usage = usage;
- reloc->domains = domains;
+ buffer = &cs->buffers[cs->num_buffers];
+ buffer->bo = bo;
+ buffer->usage = usage;
+ buffer->domains = domains;
cs->buffer_indices_hashlist[hash] = cs->num_buffers;
return cs->num_buffers++;
}
-static unsigned amdgpu_cs_add_reloc(struct radeon_winsys_cs *rcs,
+static unsigned amdgpu_cs_add_buffer(struct radeon_winsys_cs *rcs,
struct radeon_winsys_cs_handle *buf,
enum radeon_bo_usage usage,
enum radeon_bo_domain domains,
struct amdgpu_cs *cs = amdgpu_cs(rcs);
struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
enum radeon_bo_domain added_domains;
- unsigned index = amdgpu_add_reloc(cs, bo, usage, bo->initial_domain,
+ unsigned index = amdgpu_add_buffer(cs, bo, usage, bo->initial_domain,
priority, &added_domains);
if (added_domains & RADEON_DOMAIN_GTT)
return index;
}
-static int amdgpu_cs_get_reloc(struct radeon_winsys_cs *rcs,
+static int amdgpu_cs_lookup_buffer(struct radeon_winsys_cs *rcs,
struct radeon_winsys_cs_handle *buf)
{
struct amdgpu_cs *cs = amdgpu_cs(rcs);
- return amdgpu_get_reloc(cs, (struct amdgpu_winsys_bo*)buf);
+ return amdgpu_lookup_buffer(cs, (struct amdgpu_winsys_bo*)buf);
}
static boolean amdgpu_cs_validate(struct radeon_winsys_cs *rcs)
fprintf(stderr, "amdgpu: command stream overflowed\n");
}
- amdgpu_cs_add_reloc(rcs, (void*)cs->big_ib_winsys_buffer,
+ amdgpu_cs_add_buffer(rcs, (void*)cs->big_ib_winsys_buffer,
RADEON_USAGE_READ, 0, RADEON_PRIO_IB1);
/* If the CS is not empty or overflowed.... */
ws->base.ctx_query_reset_status = amdgpu_ctx_query_reset_status;
ws->base.cs_create = amdgpu_cs_create;
ws->base.cs_destroy = amdgpu_cs_destroy;
- ws->base.cs_add_reloc = amdgpu_cs_add_reloc;
- ws->base.cs_get_reloc = amdgpu_cs_get_reloc;
+ ws->base.cs_add_buffer = amdgpu_cs_add_buffer;
+ ws->base.cs_lookup_buffer = amdgpu_cs_lookup_buffer;
ws->base.cs_validate = amdgpu_cs_validate;
ws->base.cs_memory_below_limit = amdgpu_cs_memory_below_limit;
ws->base.cs_flush = amdgpu_cs_flush;
struct amdgpu_cs_request request;
struct amdgpu_cs_ib_info ib;
- /* Relocs. */
+ /* Buffers. */
unsigned max_num_buffers;
unsigned num_buffers;
amdgpu_bo_handle *handles;
*rdst = rsrc;
}
-int amdgpu_get_reloc(struct amdgpu_cs *csc, struct amdgpu_winsys_bo *bo);
+int amdgpu_lookup_buffer(struct amdgpu_cs *csc, struct amdgpu_winsys_bo *bo);
static inline struct amdgpu_cs *
amdgpu_cs(struct radeon_winsys_cs *base)
{
int num_refs = bo->num_cs_references;
return num_refs == bo->rws->num_cs ||
- (num_refs && amdgpu_get_reloc(cs, bo) != -1);
+ (num_refs && amdgpu_lookup_buffer(cs, bo) != -1);
}
static inline boolean
if (!bo->num_cs_references)
return FALSE;
- index = amdgpu_get_reloc(cs, bo);
+ index = amdgpu_lookup_buffer(cs, bo);
if (index == -1)
return FALSE;
/*
This file replaces libdrm's radeon_cs_gem with our own implemention.
It's optimized specifically for Radeon DRM.
- Reloc writes and space checking are faster and simpler than their
+ Adding buffers and space checking are faster and simpler than their
counterparts in libdrm (the time complexity of all the functions
is O(1) in nearly all scenarios, thanks to hashing).
It works like this:
- cs_add_reloc(cs, buf, read_domain, write_domain) adds a new relocation and
+ cs_add_buffer(cs, buf, read_domain, write_domain) adds a new relocation and
also adds the size of 'buf' to the used_gart and used_vram winsys variables
based on the domains, which are simply or'd for the accounting purposes.
The adding is skipped if the reloc is already present in the list, but it
(done in the pipe driver)
cs_write_reloc(cs, buf) just writes a reloc that has been added using
- cs_add_reloc. The read_domain and write_domain parameters have been removed,
- because we already specify them in cs_add_reloc.
+ cs_add_buffer. The read_domain and write_domain parameters have been removed,
+ because we already specify them in cs_add_buffer.
*/
#include "radeon_drm_cs.h"
reloc->flags = MAX2(reloc->flags, priority);
}
-int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo)
+int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo)
{
unsigned hash = bo->handle & (Elements(csc->reloc_indices_hashlist)-1);
int i = csc->reloc_indices_hashlist[hash];
if (csc->relocs_bo[i] == bo) {
/* Put this reloc in the hash list.
* This will prevent additional hash collisions if there are
- * several consecutive get_reloc calls for the same buffer.
+ * several consecutive lookup_buffer calls for the same buffer.
*
* Example: Assuming buffers A,B,C collide in the hash list,
* the following sequence of relocs:
return -1;
}
-static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
+static unsigned radeon_add_buffer(struct radeon_drm_cs *cs,
struct radeon_bo *bo,
enum radeon_bo_usage usage,
enum radeon_bo_domain domains,
assert(priority < 64);
*added_domains = 0;
- i = radeon_get_reloc(csc, bo);
+ i = radeon_lookup_buffer(csc, bo);
if (i >= 0) {
reloc = &csc->relocs[i];
update_reloc(reloc, rd, wd, priority / 4, added_domains);
- /* For async DMA, every add_reloc call must add a buffer to the list
+ /* For async DMA, every add_buffer call must add a buffer to the list
* no matter how many duplicates there are. This is due to the fact
* the DMA CS checker doesn't use NOP packets for offset patching,
* but always uses the i-th buffer from the list to patch the i-th
return csc->crelocs++;
}
-static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
+static unsigned radeon_drm_cs_add_buffer(struct radeon_winsys_cs *rcs,
struct radeon_winsys_cs_handle *buf,
enum radeon_bo_usage usage,
enum radeon_bo_domain domains,
struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
struct radeon_bo *bo = (struct radeon_bo*)buf;
enum radeon_bo_domain added_domains;
- unsigned index = radeon_add_reloc(cs, bo, usage, domains, priority,
- &added_domains);
+ unsigned index = radeon_add_buffer(cs, bo, usage, domains, priority,
+ &added_domains);
if (added_domains & RADEON_DOMAIN_GTT)
cs->csc->used_gart += bo->base.size;
return index;
}
-static int radeon_drm_cs_get_reloc(struct radeon_winsys_cs *rcs,
+static int radeon_drm_cs_lookup_buffer(struct radeon_winsys_cs *rcs,
struct radeon_winsys_cs_handle *buf)
{
struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
- return radeon_get_reloc(cs->csc, (struct radeon_bo*)buf);
+ return radeon_lookup_buffer(cs->csc, (struct radeon_bo*)buf);
}
static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
if (status) {
cs->csc->validated_crelocs = cs->csc->crelocs;
} else {
- /* Remove lately-added relocations. The validation failed with them
+ /* Remove lately-added buffers. The validation failed with them
* and the CS is about to be flushed because of that. Keep only
- * the already-validated relocations. */
+ * the already-validated buffers. */
unsigned i;
for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) {
if (!bo->num_cs_references)
return FALSE;
- index = radeon_get_reloc(cs->csc, bo);
+ index = radeon_lookup_buffer(cs->csc, bo);
if (index == -1)
return FALSE;
fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1, TRUE,
RADEON_DOMAIN_GTT, 0);
/* Add the fence as a dummy relocation. */
- cs->ws->base.cs_add_reloc(rcs, cs->ws->base.buffer_get_cs_handle(fence),
+ cs->ws->base.cs_add_buffer(rcs, cs->ws->base.buffer_get_cs_handle(fence),
RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT,
RADEON_PRIO_FENCE);
return (struct pipe_fence_handle*)fence;
ws->base.ctx_destroy = radeon_drm_ctx_destroy;
ws->base.cs_create = radeon_drm_cs_create;
ws->base.cs_destroy = radeon_drm_cs_destroy;
- ws->base.cs_add_reloc = radeon_drm_cs_add_reloc;
- ws->base.cs_get_reloc = radeon_drm_cs_get_reloc;
+ ws->base.cs_add_buffer = radeon_drm_cs_add_buffer;
+ ws->base.cs_lookup_buffer = radeon_drm_cs_lookup_buffer;
ws->base.cs_validate = radeon_drm_cs_validate;
ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit;
ws->base.cs_flush = radeon_drm_cs_flush;
uint32_t cs_trace_id;
- /* Relocs. */
+ /* Buffers. */
unsigned nrelocs;
unsigned crelocs;
unsigned validated_crelocs;
struct radeon_bo *trace_buf;
};
-int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo);
+int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo);
static inline struct radeon_drm_cs *
radeon_drm_cs(struct radeon_winsys_cs *base)
{
int num_refs = bo->num_cs_references;
return num_refs == bo->rws->num_cs ||
- (num_refs && radeon_get_reloc(cs->csc, bo) != -1);
+ (num_refs && radeon_lookup_buffer(cs->csc, bo) != -1);
}
static inline boolean
if (!bo->num_cs_references)
return FALSE;
- index = radeon_get_reloc(cs->csc, bo);
+ index = radeon_lookup_buffer(cs->csc, bo);
if (index == -1)
return FALSE;