Use the priority flags and expand them.
This information will be used for debugging.
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
tex = r300_resource(texstate->sampler_views[i]->base.texture);
r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, RADEON_USAGE_READ,
- tex->domain, RADEON_PRIO_SHADER_TEXTURE_RO);
+ tex->domain, RADEON_PRIO_SAMPLER_TEXTURE);
}
}
/* ...occlusion query buffer... */
if (r300->query_current)
r300->rws->cs_add_reloc(r300->cs, r300->query_current->cs_buf,
RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_QUERY);
/* ...vertex buffer for SWTCL path... */
if (r300->vbo_cs)
r300->rws->cs_add_reloc(r300->cs, r300->vbo_cs,
RADEON_USAGE_READ, RADEON_DOMAIN_GTT,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_VERTEX_BUFFER);
/* ...vertex buffers for HWTCL path... */
if (do_validate_vertex_buffers && r300->vertex_arrays_dirty) {
struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
r300->rws->cs_add_reloc(r300->cs, r300_resource(buf)->cs_buf,
RADEON_USAGE_READ,
r300_resource(buf)->domain,
- RADEON_PRIO_SHADER_BUFFER_RO);
+ RADEON_PRIO_SAMPLER_BUFFER);
}
}
/* ...and index buffer for HWTCL path. */
r300->rws->cs_add_reloc(r300->cs, r300_resource(index_buffer)->cs_buf,
RADEON_USAGE_READ,
r300_resource(index_buffer)->domain,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_INDEX_BUFFER);
/* Now do the validation (flush is called inside cs_validate on failure). */
if (!r300->rws->cs_validate(r300->cs)) {
unsigned reloc = radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.gfx,
(struct r600_resource*)cb->base.texture,
RADEON_USAGE_READWRITE,
- RADEON_PRIO_SHADER_RESOURCE_RW);
+ RADEON_PRIO_SHADER_RW_BUFFER);
radeon_compute_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 7);
radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */
radeon_emit(cs, PKT3C(PKT3_NOP, 0, 0));
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
code_bo, RADEON_USAGE_READ,
- RADEON_PRIO_SHADER_DATA));
+ RADEON_PRIO_USER_SHADER));
}
static void evergreen_launch_grid(
csize = size < EG_DMA_COPY_MAX_SIZE ? size : EG_DMA_COPY_MAX_SIZE;
/* emit reloc before writing cs so that cs is always in consistent state */
radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_SDMA_BUFFER);
radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_SDMA_BUFFER);
cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, csize);
cs->buf[cs->cdw++] = dst_offset & 0xffffffff;
cs->buf[cs->cdw++] = src_offset & 0xffffffff;
/* This must be done after r600_need_cs_space. */
reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
(struct r600_resource*)dst, RADEON_USAGE_WRITE,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_CP_DMA);
radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
radeon_emit(cs, clear_value); /* DATA [31:0] */
if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
cmask_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
tex->cmask_buffer, RADEON_USAGE_READWRITE,
- RADEON_PRIO_COLOR_META);
+ RADEON_PRIO_CMASK);
} else {
cmask_reloc = reloc;
}
radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, a->rsurf->db_preload_control);
radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer,
- RADEON_USAGE_READWRITE, RADEON_PRIO_DEPTH_META);
+ RADEON_USAGE_READWRITE, RADEON_PRIO_HTILE);
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
cs->buf[cs->cdw++] = reloc_idx;
} else {
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
- RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
+ RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER));
}
state->dirty_mask = 0;
}
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
- RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
+ RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER));
radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
radeon_emit(cs, (buffer_id_base + buffer_index) * 8);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
- RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
+ RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER));
dirty_mask &= ~(1 << buffer_index);
}
reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rview->tex_resource,
RADEON_USAGE_READ,
- rview->tex_resource->b.b.nr_samples > 1 ?
- RADEON_PRIO_SHADER_TEXTURE_MSAA :
- RADEON_PRIO_SHADER_TEXTURE_RO);
+ r600_get_sampler_view_priority(rview->tex_resource));
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
radeon_emit(cs, reloc);
(shader->buffer->gpu_address + shader->offset) >> 8);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->buffer,
- RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA));
+ RADEON_USAGE_READ,
+ RADEON_PRIO_INTERNAL_SHADER));
}
static void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a)
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
RADEON_USAGE_READWRITE,
- RADEON_PRIO_SHADER_RESOURCE_RW));
+ RADEON_PRIO_RINGS_STREAMOUT));
radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE,
state->esgs_ring.buffer_size >> 8);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
RADEON_USAGE_READWRITE,
- RADEON_PRIO_SHADER_RESOURCE_RW));
+ RADEON_PRIO_RINGS_STREAMOUT));
radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE,
state->gsvs_ring.buffer_size >> 8);
} else {
size = (cheight * pitch) / 4;
/* emit reloc before writing cs so that cs is always in consistent state */
radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rsrc->resource,
- RADEON_USAGE_READ, RADEON_PRIO_MIN);
+ RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE);
radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rdst->resource,
- RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
+ RADEON_USAGE_WRITE, RADEON_PRIO_SDMA_TEXTURE);
cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, size);
cs->buf[cs->cdw++] = base >> 8;
cs->buf[cs->cdw++] = (detile << 31) | (array_mode << 27) |
/* This must be done after r600_need_cs_space. */
src_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)src,
- RADEON_USAGE_READ, RADEON_PRIO_MIN);
+ RADEON_USAGE_READ, RADEON_PRIO_CP_DMA);
dst_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)dst,
- RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
+ RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA);
radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
radeon_emit(cs, src_offset); /* SRC_ADDR_LO [31:0] */
csize = size < R600_DMA_COPY_MAX_SIZE_DW ? size : R600_DMA_COPY_MAX_SIZE_DW;
/* emit reloc before writing cs so that cs is always in consistent state */
radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_SDMA_BUFFER);
radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_SDMA_BUFFER);
cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, 0, 0, csize);
cs->buf[cs->cdw++] = dst_offset & 0xfffffffc;
cs->buf[cs->cdw++] = src_offset & 0xfffffffc;
radeon_set_context_reg(cs, R_028D24_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer,
- RADEON_USAGE_READWRITE, RADEON_PRIO_DEPTH_META);
+ RADEON_USAGE_READWRITE, RADEON_PRIO_HTILE);
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
cs->buf[cs->cdw++] = reloc_idx;
} else {
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
- RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
+ RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER));
}
}
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
- RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
+ RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER));
radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
radeon_emit(cs, (buffer_id_base + buffer_index) * 7);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
- RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO));
+ RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER));
dirty_mask &= ~(1 << buffer_index);
}
reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rview->tex_resource,
RADEON_USAGE_READ,
- rview->tex_resource->b.b.nr_samples > 1 ?
- RADEON_PRIO_SHADER_TEXTURE_MSAA :
- RADEON_PRIO_SHADER_TEXTURE_RO);
+ r600_get_sampler_view_priority(rview->tex_resource));
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, reloc);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_set_context_reg(cs, R_028894_SQ_PGM_START_FS, shader->offset >> 8);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->buffer,
- RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA));
+ RADEON_USAGE_READ,
+ RADEON_PRIO_INTERNAL_SHADER));
}
static void r600_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a)
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
RADEON_USAGE_READWRITE,
- RADEON_PRIO_SHADER_RESOURCE_RW));
+ RADEON_PRIO_RINGS_STREAMOUT));
radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE,
state->esgs_ring.buffer_size >> 8);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
RADEON_USAGE_READWRITE,
- RADEON_PRIO_SHADER_RESOURCE_RW));
+ RADEON_PRIO_RINGS_STREAMOUT));
radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE,
state->gsvs_ring.buffer_size >> 8);
} else {
size = (cheight * pitch) / 4;
/* emit reloc before writing cs so that cs is always in consistent state */
radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rsrc->resource, RADEON_USAGE_READ,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_SDMA_TEXTURE);
radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rdst->resource, RADEON_USAGE_WRITE,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_SDMA_TEXTURE);
cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, 1, 0, size);
cs->buf[cs->cdw++] = base >> 8;
cs->buf[cs->cdw++] = (detile << 31) | (array_mode << 27) |
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
(struct r600_resource*)info.indirect,
- RADEON_USAGE_READ, RADEON_PRIO_MIN);
+ RADEON_USAGE_READ,
+ RADEON_PRIO_DRAW_INDIRECT);
}
if (info.indexed) {
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
(struct r600_resource*)ib.buffer,
- RADEON_USAGE_READ, RADEON_PRIO_MIN);
+ RADEON_USAGE_READ,
+ RADEON_PRIO_INDEX_BUFFER);
}
else {
uint32_t max_size = (ib.buffer->width0 - ib.offset) / ib.index_size;
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
(struct r600_resource*)ib.buffer,
- RADEON_USAGE_READ, RADEON_PRIO_MIN);
+ RADEON_USAGE_READ,
+ RADEON_PRIO_INDEX_BUFFER);
cs->buf[cs->cdw++] = PKT3(EG_PKT3_INDEX_BUFFER_SIZE, 0, rctx->b.predicate_drawing);
cs->buf[cs->cdw++] = max_size;
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
t->buf_filled_size, RADEON_USAGE_READ,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_SO_FILLED_SIZE);
}
if (likely(!info.indirect)) {
r600_emit_command_buffer(cs, &shader->command_buffer);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->bo,
- RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA));
+ RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER));
}
unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
va = rscreen->b.trace_bo->gpu_address;
reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rscreen->b.trace_bo,
- RADEON_USAGE_READWRITE, RADEON_PRIO_MIN);
+ RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE);
radeon_emit(cs, PKT3(PKT3_MEM_WRITE, 3, 0));
radeon_emit(cs, va & 0xFFFFFFFFUL);
radeon_emit(cs, (va >> 32UL) & 0xFFUL);
}
}
+static inline enum radeon_bo_priority
+r600_get_sampler_view_priority(struct r600_resource *res)
+{
+ if (res->b.b.target == PIPE_BUFFER)
+ return RADEON_PRIO_SAMPLER_BUFFER;
+
+ if (res->b.b.nr_samples > 1)
+ return RADEON_PRIO_SAMPLER_TEXTURE_MSAA;
+
+ return RADEON_PRIO_SAMPLER_TEXTURE;
+}
+
#define COMPUTE_DBG(rscreen, fmt, args...) \
do { \
if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, fmt, ##args); \
assert(0);
}
r600_emit_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_QUERY);
if (r600_is_timer_query(query->type))
ctx->num_cs_dw_timer_queries_suspend += query->num_cs_dw;
assert(0);
}
r600_emit_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_QUERY);
query->buffer.results_end += query->result_size;
radeon_emit(cs, va + results_base);
radeon_emit(cs, op | (((va + results_base) >> 32) & 0xFF));
r600_emit_reloc(ctx, &ctx->rings.gfx, qbuf->buf, RADEON_USAGE_READ,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_QUERY);
results_base += query->result_size;
/* set CONTINUE bit for all packets except the first */
radeon_emit(cs, buffer->gpu_address);
radeon_emit(cs, buffer->gpu_address >> 32);
- r600_emit_reloc(ctx, &ctx->rings.gfx, buffer, RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
+ r600_emit_reloc(ctx, &ctx->rings.gfx, buffer,
+ RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
/* analyze results */
results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_READ);
radeon_emit(cs, va >> 8); /* BUFFER_BASE */
r600_emit_reloc(rctx, &rctx->rings.gfx, r600_resource(t[i]->b.buffer),
- RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
+ RADEON_USAGE_WRITE, RADEON_PRIO_RINGS_STREAMOUT);
/* R7xx requires this packet after updating BUFFER_BASE.
* Without this, R7xx locks up. */
radeon_emit(cs, va >> 8);
r600_emit_reloc(rctx, &rctx->rings.gfx, r600_resource(t[i]->b.buffer),
- RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
+ RADEON_USAGE_WRITE, RADEON_PRIO_RINGS_STREAMOUT);
}
}
radeon_emit(cs, va >> 32); /* src address hi */
r600_emit_reloc(rctx, &rctx->rings.gfx, t[i]->buf_filled_size,
- RADEON_USAGE_READ, RADEON_PRIO_MIN);
+ RADEON_USAGE_READ, RADEON_PRIO_SO_FILLED_SIZE);
} else {
/* Start from the beginning. */
radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
radeon_emit(cs, 0); /* unused */
r600_emit_reloc(rctx, &rctx->rings.gfx, t[i]->buf_filled_size,
- RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
+ RADEON_USAGE_WRITE, RADEON_PRIO_SO_FILLED_SIZE);
/* Zero the buffer size. The counters (primitives generated,
* primitives emitted) may be enabled even if there is not
int reloc_idx;
reloc_idx = dec->ws->cs_add_reloc(dec->cs, cs_buf, usage, domain,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_UVD);
if (!dec->use_legacy) {
uint64_t addr;
addr = dec->ws->buffer_get_virtual_address(cs_buf);
{
int reloc_idx;
- reloc_idx = enc->ws->cs_add_reloc(enc->cs, buf, usage, domain, RADEON_PRIO_MIN);
+ reloc_idx = enc->ws->cs_add_reloc(enc->cs, buf, usage, domain, RADEON_PRIO_VCE);
if (enc->use_vm) {
uint64_t addr;
addr = enc->ws->buffer_get_virtual_address(buf);
RADEON_GPU_RESET_COUNTER, /* DRM 2.43.0 */
};
+/* Each group of four has the same priority. */
enum radeon_bo_priority {
- RADEON_PRIO_MIN,
- RADEON_PRIO_SHADER_DATA, /* shader code, resource descriptors */
- RADEON_PRIO_SHADER_BUFFER_RO, /* read-only */
- RADEON_PRIO_SHADER_TEXTURE_RO, /* read-only */
- RADEON_PRIO_SHADER_RESOURCE_RW, /* buffers, textures, streamout, GS rings, RATs; read/write */
- RADEON_PRIO_COLOR_BUFFER,
- RADEON_PRIO_DEPTH_BUFFER,
- RADEON_PRIO_SHADER_TEXTURE_MSAA,
- RADEON_PRIO_COLOR_BUFFER_MSAA,
- RADEON_PRIO_DEPTH_BUFFER_MSAA,
- RADEON_PRIO_COLOR_META,
- RADEON_PRIO_DEPTH_META,
- RADEON_PRIO_MAX /* must be <= 15 */
+ RADEON_PRIO_FENCE = 0,
+ RADEON_PRIO_TRACE,
+ RADEON_PRIO_SO_FILLED_SIZE,
+ RADEON_PRIO_QUERY,
+
+ RADEON_PRIO_IB1 = 4, /* main IB submitted to the kernel */
+ RADEON_PRIO_IB2, /* IB executed with INDIRECT_BUFFER */
+ RADEON_PRIO_DRAW_INDIRECT,
+ RADEON_PRIO_INDEX_BUFFER,
+
+ RADEON_PRIO_CP_DMA = 8,
+
+ RADEON_PRIO_VCE = 12,
+ RADEON_PRIO_UVD,
+ RADEON_PRIO_SDMA_BUFFER,
+ RADEON_PRIO_SDMA_TEXTURE,
+
+ RADEON_PRIO_USER_SHADER = 16,
+ RADEON_PRIO_INTERNAL_SHADER, /* fetch shader, etc. */
+
+ /* gap: 20 */
+
+ RADEON_PRIO_CONST_BUFFER = 24,
+ RADEON_PRIO_DESCRIPTORS,
+ RADEON_PRIO_BORDER_COLORS,
+
+ RADEON_PRIO_SAMPLER_BUFFER = 28,
+ RADEON_PRIO_VERTEX_BUFFER,
+
+ RADEON_PRIO_SHADER_RW_BUFFER = 32,
+ RADEON_PRIO_RINGS_STREAMOUT,
+ RADEON_PRIO_SCRATCH_BUFFER,
+ RADEON_PRIO_COMPUTE_GLOBAL,
+
+ RADEON_PRIO_SAMPLER_TEXTURE = 36,
+ RADEON_PRIO_SHADER_RW_IMAGE,
+
+ RADEON_PRIO_SAMPLER_TEXTURE_MSAA = 40,
+
+ RADEON_PRIO_COLOR_BUFFER = 44,
+
+ RADEON_PRIO_DEPTH_BUFFER = 48,
+
+ RADEON_PRIO_COLOR_BUFFER_MSAA = 52,
+
+ RADEON_PRIO_DEPTH_BUFFER_MSAA = 56,
+
+ RADEON_PRIO_CMASK = 60,
+ RADEON_PRIO_DCC,
+ RADEON_PRIO_HTILE,
+ /* 63 is the maximum value */
};
struct winsys_handle;
r600_need_dma_space(&ctx->b, ncopy * 7);
radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_SDMA_BUFFER);
radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_SDMA_BUFFER);
for (i = 0; i < ncopy; i++) {
csize = size < CIK_SDMA_COPY_MAX_SIZE ? size : CIK_SDMA_COPY_MAX_SIZE;
r600_need_dma_space(&ctx->b, ncopy * 12);
radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rsrc->resource,
- RADEON_USAGE_READ, RADEON_PRIO_MIN);
+ RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE);
radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rdst->resource,
- RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
+ RADEON_USAGE_WRITE, RADEON_PRIO_SDMA_TEXTURE);
copy_height = size * 4 / pitch;
for (i = 0; i < ncopy; i++) {
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
shader->scratch_bo,
RADEON_USAGE_READWRITE,
- RADEON_PRIO_SHADER_RESOURCE_RW);
+ RADEON_PRIO_SCRATCH_BUFFER);
scratch_buffer_va = shader->scratch_bo->gpu_address;
}
kernel_args_va += kernel_args_offset;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, input_buffer,
- RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+ RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER);
si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, kernel_args_va);
si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 4, S_008F04_BASE_ADDRESS_HI (kernel_args_va >> 32) | S_008F04_STRIDE(0));
}
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, buffer,
RADEON_USAGE_READWRITE,
- RADEON_PRIO_SHADER_RESOURCE_RW);
+ RADEON_PRIO_COMPUTE_GLOBAL);
}
/* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID
shader_va += pc;
#endif
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, shader->bo,
- RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+ RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, shader_va >> 8);
si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40);
/* This must be done after need_cs_space. */
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
(struct r600_resource*)dst, RADEON_USAGE_WRITE,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_CP_DMA);
/* Flush the caches for the first copy only.
* Also wait for the previous CP DMA operations. */
/* This must be done after r600_need_cs_space. */
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)src,
- RADEON_USAGE_READ, RADEON_PRIO_MIN);
+ RADEON_USAGE_READ, RADEON_PRIO_CP_DMA);
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)dst,
- RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
+ RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA);
si_emit_cp_dma_copy_buffer(sctx, dst_offset, src_offset, byte_count, sync_flags);
util_memcpy_cpu_to_le32(ptr, desc->list, list_size);
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, desc->buffer,
- RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+ RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
desc->list_dirty = false;
desc->pointer_dirty = true;
si_release_descriptors(&views->desc);
}
-static enum radeon_bo_priority si_get_resource_ro_priority(struct r600_resource *res)
-{
- if (res->b.b.target == PIPE_BUFFER)
- return RADEON_PRIO_SHADER_BUFFER_RO;
-
- if (res->b.b.nr_samples > 1)
- return RADEON_PRIO_SHADER_TEXTURE_MSAA;
-
- return RADEON_PRIO_SHADER_TEXTURE_RO;
-}
-
static void si_sampler_views_begin_new_cs(struct si_context *sctx,
struct si_sampler_views *views)
{
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
rview->resource, RADEON_USAGE_READ,
- si_get_resource_ro_priority(rview->resource));
+ r600_get_sampler_view_priority(rview->resource));
}
if (!views->desc.buffer)
return;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, views->desc.buffer,
- RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
+ RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS);
}
static void si_set_sampler_view(struct si_context *sctx, unsigned shader,
if (rview->resource)
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
rview->resource, RADEON_USAGE_READ,
- si_get_resource_ro_priority(rview->resource));
+ r600_get_sampler_view_priority(rview->resource));
pipe_sampler_view_reference(&views->views[slot], view);
memcpy(views->desc.list + slot*8, view_desc, 8*4);
if (!states->desc.buffer)
return;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, states->desc.buffer,
- RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
+ RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS);
}
static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader,
return;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
buffers->desc.buffer, RADEON_USAGE_READWRITE,
- RADEON_PRIO_SHADER_DATA);
+ RADEON_PRIO_DESCRIPTORS);
}
/* VERTEX BUFFERS */
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
(struct r600_resource*)sctx->vertex_buffer[vb].buffer,
- RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
+ RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
}
if (!desc->buffer)
return;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
desc->buffer, RADEON_USAGE_READ,
- RADEON_PRIO_SHADER_DATA);
+ RADEON_PRIO_DESCRIPTORS);
}
static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
desc->buffer, RADEON_USAGE_READ,
- RADEON_PRIO_SHADER_DATA);
+ RADEON_PRIO_DESCRIPTORS);
assert(count <= SI_NUM_VERTEX_BUFFERS);
if (!bound[ve->vertex_buffer_index]) {
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
(struct r600_resource*)vb->buffer,
- RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
+ RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
bound[ve->vertex_buffer_index] = true;
}
}
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
rbuffer, RADEON_USAGE_READ,
- RADEON_PRIO_SHADER_BUFFER_RO);
+ RADEON_PRIO_SAMPLER_BUFFER);
}
}
}
for (i = 0; i < SI_NUM_SHADERS; i++) {
si_init_buffer_resources(&sctx->const_buffers[i],
SI_NUM_CONST_BUFFERS, SI_SGPR_CONST,
- RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
+ RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER);
si_init_buffer_resources(&sctx->rw_buffers[i],
SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS,
- RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
+ RADEON_USAGE_READWRITE, RADEON_PRIO_RINGS_STREAMOUT);
si_init_descriptors(&sctx->samplers[i].views.desc,
SI_SGPR_RESOURCE, 8, SI_NUM_SAMPLER_VIEWS);
r600_need_dma_space(&ctx->b, ncopy * 5);
radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_SDMA_BUFFER);
radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_SDMA_BUFFER);
for (i = 0; i < ncopy; i++) {
csize = size < max_csize ? size : max_csize;
r600_need_dma_space(&ctx->b, ncopy * 9);
radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rsrc->resource,
- RADEON_USAGE_READ, RADEON_PRIO_MIN);
+ RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE);
radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rdst->resource,
- RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
+ RADEON_USAGE_WRITE, RADEON_PRIO_SDMA_TEXTURE);
for (i = 0; i < ncopy; i++) {
cheight = copy_height;
struct r600_resource *ib = state->indirect_buffer;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, ib,
- RADEON_USAGE_READ, RADEON_PRIO_MIN);
+ RADEON_USAGE_READ,
+ RADEON_PRIO_IB2);
radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
radeon_emit(cs, ib->gpu_address);
if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
tex->cmask_buffer, RADEON_USAGE_READWRITE,
- RADEON_PRIO_COLOR_META);
+ RADEON_PRIO_CMASK);
}
radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C,
if (zb->db_htile_data_base) {
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
rtex->htile_buffer, RADEON_USAGE_READWRITE,
- RADEON_PRIO_DEPTH_META);
+ RADEON_PRIO_HTILE);
}
radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
if (sctx->b.chip_class >= CIK)
si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40);
si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ,
- RADEON_PRIO_SHADER_DATA);
+ RADEON_PRIO_BORDER_COLORS);
si_pm4_upload_indirect_buffer(sctx, pm4);
sctx->init_config = pm4;
if (sctx->scratch_buffer) {
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
sctx->scratch_buffer, RADEON_USAGE_READWRITE,
- RADEON_PRIO_SHADER_RESOURCE_RW);
+ RADEON_PRIO_SCRATCH_BUFFER);
}
sctx->emit_scratch_reloc = false;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
t->buf_filled_size, RADEON_USAGE_READ,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_SO_FILLED_SIZE);
}
/* draw packet */
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
(struct r600_resource *)info->indirect,
- RADEON_USAGE_READ, RADEON_PRIO_MIN);
+ RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT);
}
if (info->indexed) {
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
(struct r600_resource *)ib->buffer,
- RADEON_USAGE_READ, RADEON_PRIO_MIN);
+ RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER);
if (info->indirect) {
uint64_t indirect_va = r600_resource(info->indirect)->gpu_address;
sctx->trace_id++;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, sctx->trace_buf,
- RADEON_USAGE_READWRITE, RADEON_PRIO_MIN);
+ RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE);
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |
S_370_WR_CONFIRM(1) |
return;
va = shader->bo->gpu_address;
- si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+ si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
/* We need at least 2 components for LS.
* VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */
return;
va = shader->bo->gpu_address;
- si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+ si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
num_user_sgprs = SI_TCS_NUM_USER_SGPR;
num_sgprs = shader->num_sgprs;
return;
va = shader->bo->gpu_address;
- si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+ si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
if (shader->selector->type == PIPE_SHADER_VERTEX) {
vgpr_comp_cnt = shader->uses_instanceid ? 3 : 0;
S_028B90_ENABLE(gs_num_invocations > 0));
va = shader->bo->gpu_address;
- si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+ si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8);
si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, va >> 40);
si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, 0);
va = shader->bo->gpu_address;
- si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+ si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
if (shader->is_gs_copy_shader) {
vgpr_comp_cnt = 0; /* only VertexID is needed for GS-COPY. */
si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, shader->cb_shader_mask);
va = shader->bo->gpu_address;
- si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+ si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8);
si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, va >> 40);
unsigned hash = bo->unique_id & (Elements(cs->buffer_indices_hashlist)-1);
int i = -1;
- priority = MIN2(priority, 15);
+ assert(priority < 64);
*added_domains = 0;
i = amdgpu_get_reloc(cs, bo);
reloc->usage |= usage;
*added_domains = domains & ~reloc->domains;
reloc->domains |= domains;
- cs->flags[i] = MAX2(cs->flags[i], priority);
+ cs->flags[i] = MAX2(cs->flags[i], priority / 4);
return i;
}
cs->buffers[cs->num_buffers].bo = NULL;
amdgpu_winsys_bo_reference(&cs->buffers[cs->num_buffers].bo, bo);
cs->handles[cs->num_buffers] = bo->bo;
- cs->flags[cs->num_buffers] = priority;
+ cs->flags[cs->num_buffers] = priority / 4;
p_atomic_inc(&bo->num_cs_references);
reloc = &cs->buffers[cs->num_buffers];
reloc->bo = bo;
}
amdgpu_cs_add_reloc(rcs, (void*)cs->big_ib_winsys_buffer,
- RADEON_USAGE_READ, 0, RADEON_PRIO_MIN);
+ RADEON_USAGE_READ, 0, RADEON_PRIO_IB1);
/* If the CS is not empty or overflowed.... */
if (cs->base.cdw && cs->base.cdw <= cs->base.max_dw && !debug_get_option_noop()) {
enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
int i = -1;
- priority = MIN2(priority, 15);
+ assert(priority < 64);
*added_domains = 0;
i = radeon_get_reloc(csc, bo);
if (i >= 0) {
reloc = &csc->relocs[i];
- update_reloc(reloc, rd, wd, priority, added_domains);
+ update_reloc(reloc, rd, wd, priority / 4, added_domains);
/* For async DMA, every add_reloc call must add a buffer to the list
* no matter how many duplicates there are. This is due to the fact
reloc->handle = bo->handle;
reloc->read_domains = rd;
reloc->write_domain = wd;
- reloc->flags = priority;
+ reloc->flags = priority / 4;
csc->reloc_indices_hashlist[hash] = csc->crelocs;
struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
struct radeon_bo *bo = (struct radeon_bo*)buf;
enum radeon_bo_domain added_domains;
- unsigned index = radeon_add_reloc(cs, bo, usage, domains, priority, &added_domains);
+ unsigned index = radeon_add_reloc(cs, bo, usage, domains, priority,
+ &added_domains);
if (added_domains & RADEON_DOMAIN_GTT)
cs->csc->used_gart += bo->base.size;
/* Add the fence as a dummy relocation. */
cs->ws->base.cs_add_reloc(rcs, cs->ws->base.buffer_get_cs_handle(fence),
RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT,
- RADEON_PRIO_MIN);
+ RADEON_PRIO_FENCE);
return (struct pipe_fence_handle*)fence;
}