From bee2b96b02161cf75cfe17f7d30d14f2b838423f Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 20 Feb 2014 15:39:35 +0100 Subject: [PATCH] r600g,radeonsi: set priorities for relocations --- src/gallium/drivers/r600/evergreen_compute.c | 6 +- .../drivers/r600/evergreen_hw_context.c | 9 ++- src/gallium/drivers/r600/evergreen_state.c | 50 ++++++++++----- src/gallium/drivers/r600/r600_hw_context.c | 12 ++-- src/gallium/drivers/r600/r600_state.c | 54 +++++++++++----- src/gallium/drivers/r600/r600_state_common.c | 14 +++-- src/gallium/drivers/radeon/r600_cs.h | 10 +-- src/gallium/drivers/radeon/r600_query.c | 11 ++-- src/gallium/drivers/radeon/r600_streamout.c | 8 +-- src/gallium/drivers/radeonsi/si_compute.c | 6 +- src/gallium/drivers/radeonsi/si_descriptors.c | 63 +++++++++++++------ src/gallium/drivers/radeonsi/si_pm4.c | 6 +- src/gallium/drivers/radeonsi/si_pm4.h | 4 +- src/gallium/drivers/radeonsi/si_state.c | 14 +++-- src/gallium/drivers/radeonsi/si_state.h | 1 + src/gallium/drivers/radeonsi/si_state_draw.c | 17 ++--- 16 files changed, 197 insertions(+), 88 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index a9d4079b4de..aba34085dae 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -428,7 +428,8 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, struct r600_surface *cb = (struct r600_surface*)ctx->framebuffer.state.cbufs[i]; unsigned reloc = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx, (struct r600_resource*)cb->base.texture, - RADEON_USAGE_READWRITE); + RADEON_USAGE_READWRITE, + RADEON_PRIO_SHADER_RESOURCE_RW); r600_write_compute_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 7); radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */ @@ -533,7 +534,8 @@ void evergreen_emit_cs_shader( radeon_emit(cs, PKT3C(PKT3_NOP, 0, 0)); radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, - kernel->code_bo, RADEON_USAGE_READ)); + kernel->code_bo, RADEON_USAGE_READ, + RADEON_PRIO_SHADER_DATA)); } static void evergreen_launch_grid( diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c index 85fdc4e925d..083b6978c5e 100644 --- a/src/gallium/drivers/r600/evergreen_hw_context.c +++ b/src/gallium/drivers/r600/evergreen_hw_context.c @@ -66,8 +66,10 @@ void evergreen_dma_copy(struct r600_context *rctx, for (i = 0; i < ncopy; i++) { csize = size < 0x000fffff ? size : 0x000fffff; /* emit reloc before writting cs so that cs is always in consistent state */ - r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ); - r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE); + r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ, + RADEON_PRIO_MIN); + r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE, + RADEON_PRIO_MIN); cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, csize); cs->buf[cs->cdw++] = dst_offset & 0xffffffff; cs->buf[cs->cdw++] = src_offset & 0xffffffff; @@ -130,7 +132,8 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx, /* This must be done after r600_need_cs_space. */ reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, - (struct r600_resource*)dst, RADEON_USAGE_WRITE); + (struct r600_resource*)dst, RADEON_USAGE_WRITE, + RADEON_PRIO_MIN); radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0)); radeon_emit(cs, clear_value); /* DATA [31:0] */ diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 570935b8818..d4900e6edec 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1764,11 +1764,15 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)cb->base.texture, - RADEON_USAGE_READWRITE); + RADEON_USAGE_READWRITE, + tex->surface.nsamples > 1 ? + RADEON_PRIO_COLOR_BUFFER_MSAA : + RADEON_PRIO_COLOR_BUFFER); if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) { cmask_reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, - tex->cmask_buffer, RADEON_USAGE_READWRITE); + tex->cmask_buffer, RADEON_USAGE_READWRITE, + RADEON_PRIO_COLOR_META); } else { cmask_reloc = reloc; } @@ -1814,7 +1818,8 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r unsigned reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)state->cbufs[0]->texture, - RADEON_USAGE_READWRITE); + RADEON_USAGE_READWRITE, + RADEON_PRIO_COLOR_BUFFER); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C70_CB_COLOR0_INFO */ radeon_emit(cs, reloc); @@ -1836,7 +1841,10 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r unsigned reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)state->zsbuf->texture, - RADEON_USAGE_READWRITE); + RADEON_USAGE_READWRITE, + zb->base.texture->nr_samples > 1 ? + RADEON_PRIO_DEPTH_BUFFER_MSAA : + RADEON_PRIO_DEPTH_BUFFER); r600_write_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, zb->pa_su_poly_offset_db_fmt_cntl); @@ -1945,7 +1953,8 @@ static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, a->rsurf->db_htile_surface); r600_write_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, a->rsurf->db_preload_control); r600_write_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base); - reloc_idx = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer, RADEON_USAGE_READWRITE); + reloc_idx = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer, + RADEON_USAGE_READWRITE, RADEON_PRIO_DEPTH_META); cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); cs->buf[cs->cdw++] = reloc_idx; } else { @@ -2056,7 +2065,8 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx, radeon_emit(cs, 0xc0000000); /* RESOURCEi_WORD7 */ radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); - radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer, RADEON_USAGE_READ)); + radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer, + RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO)); } state->dirty_mask = 0; } @@ -2104,7 +2114,8 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx, } radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); - radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer, RADEON_USAGE_READ)); + radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer, + RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO)); radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags); radeon_emit(cs, (buffer_id_base + buffer_index) * 8); @@ -2128,7 +2139,8 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx, S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER)); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); - radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer, RADEON_USAGE_READ)); + radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer, + RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO)); dirty_mask &= ~(1 << buffer_index); } @@ -2187,7 +2199,10 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx, radeon_emit_array(cs, rview->tex_resource_words, 8); reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rview->tex_resource, - RADEON_USAGE_READ); + RADEON_USAGE_READ, + rview->tex_resource->b.b.nr_samples > 1 ? + RADEON_PRIO_SHADER_TEXTURE_MSAA : + RADEON_PRIO_SHADER_TEXTURE_RO); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, reloc); @@ -2286,7 +2301,8 @@ static void evergreen_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600_write_context_reg(cs, R_0288A4_SQ_PGM_START_FS, (r600_resource_va(rctx->b.b.screen, &shader->buffer->b.b) + shader->offset) >> 8); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); - radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, shader->buffer, RADEON_USAGE_READ)); + radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, shader->buffer, + RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA)); } static void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a) @@ -2339,7 +2355,9 @@ static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom r600_write_config_reg(cs, R_008C40_SQ_ESGS_RING_BASE, (r600_resource_va(screen, &rbuffer->b.b)) >> 8); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); - radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer, RADEON_USAGE_READWRITE)); + radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer, + RADEON_USAGE_READWRITE, + RADEON_PRIO_SHADER_RESOURCE_RW)); r600_write_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, state->esgs_ring.buffer_size >> 8); @@ -2347,7 +2365,9 @@ static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom r600_write_config_reg(cs, R_008C48_SQ_GSVS_RING_BASE, (r600_resource_va(screen, &rbuffer->b.b)) >> 8); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); - radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer, RADEON_USAGE_READWRITE)); + radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer, + RADEON_USAGE_READWRITE, + RADEON_PRIO_SHADER_RESOURCE_RW)); r600_write_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, state->gsvs_ring.buffer_size >> 8); } else { @@ -3494,8 +3514,10 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx, } size = (cheight * pitch) >> 2; /* emit reloc before writting cs so that cs is always in consistent state */ - r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, &rsrc->resource, RADEON_USAGE_READ); - r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, &rdst->resource, RADEON_USAGE_WRITE); + r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, &rsrc->resource, + RADEON_USAGE_READ, RADEON_PRIO_MIN); + r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, &rdst->resource, + RADEON_USAGE_WRITE, RADEON_PRIO_MIN); cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, size); cs->buf[cs->cdw++] = base >> 8; cs->buf[cs->cdw++] = (detile << 31) | (array_mode << 27) | diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index ef077b21553..3d0da2c212f 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -412,8 +412,10 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx, } /* This must be done after r600_need_cs_space. */ - src_reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)src, RADEON_USAGE_READ); - dst_reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)dst, RADEON_USAGE_WRITE); + src_reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)src, + RADEON_USAGE_READ, RADEON_PRIO_MIN); + dst_reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)dst, + RADEON_USAGE_WRITE, RADEON_PRIO_MIN); radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0)); radeon_emit(cs, src_offset); /* SRC_ADDR_LO [31:0] */ @@ -477,8 +479,10 @@ void r600_dma_copy(struct r600_context *rctx, for (i = 0; i < ncopy; i++) { csize = size < 0xffff ? size : 0xffff; /* emit reloc before writting cs so that cs is always in consistent state */ - r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ); - r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE); + r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ, + RADEON_PRIO_MIN); + r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE, + RADEON_PRIO_MIN); cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, 0, 0, csize); cs->buf[cs->cdw++] = dst_offset & 0xfffffffc; cs->buf[cs->cdw++] = src_offset & 0xfffffffc; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 3e3b0ddd9a5..e0e75c6b28a 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1405,7 +1405,10 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)cb[i]->base.texture, - RADEON_USAGE_READWRITE); + RADEON_USAGE_READWRITE, + cb[i]->base.texture->nr_samples > 1 ? + RADEON_PRIO_COLOR_BUFFER_MSAA : + RADEON_PRIO_COLOR_BUFFER); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, reloc); @@ -1415,7 +1418,10 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, cb[i]->cb_buffer_fmask, - RADEON_USAGE_READWRITE); + RADEON_USAGE_READWRITE, + cb[i]->base.texture->nr_samples > 1 ? + RADEON_PRIO_COLOR_BUFFER_MSAA : + RADEON_PRIO_COLOR_BUFFER); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, reloc); @@ -1425,7 +1431,10 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, cb[i]->cb_buffer_cmask, - RADEON_USAGE_READWRITE); + RADEON_USAGE_READWRITE, + cb[i]->base.texture->nr_samples > 1 ? + RADEON_PRIO_COLOR_BUFFER_MSAA : + RADEON_PRIO_COLOR_BUFFER); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, reloc); } @@ -1461,7 +1470,10 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a unsigned reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)state->zsbuf->texture, - RADEON_USAGE_READWRITE); + RADEON_USAGE_READWRITE, + surf->base.texture->nr_samples > 1 ? + RADEON_PRIO_DEPTH_BUFFER_MSAA : + RADEON_PRIO_DEPTH_BUFFER); r600_write_context_reg(cs, R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL, surf->pa_su_poly_offset_db_fmt_cntl); @@ -1554,7 +1566,8 @@ static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value)); r600_write_context_reg(cs, R_028D24_DB_HTILE_SURFACE, a->rsurf->db_htile_surface); r600_write_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base); - reloc_idx = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer, RADEON_USAGE_READWRITE); + reloc_idx = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer, + RADEON_USAGE_READWRITE, RADEON_PRIO_DEPTH_META); cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); cs->buf[cs->cdw++] = reloc_idx; } else { @@ -1652,7 +1665,8 @@ static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom radeon_emit(cs, 0xc0000000); /* RESOURCEi_WORD6 */ radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); - radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer, RADEON_USAGE_READ)); + radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer, + RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO)); } } @@ -1684,7 +1698,8 @@ static void r600_emit_constant_buffers(struct r600_context *rctx, } radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); - radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer, RADEON_USAGE_READ)); + radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer, + RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO)); radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0)); radeon_emit(cs, (buffer_id_base + buffer_index) * 7); @@ -1699,7 +1714,8 @@ static void r600_emit_constant_buffers(struct r600_context *rctx, radeon_emit(cs, 0xc0000000); /* RESOURCEi_WORD6 */ radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); - radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer, RADEON_USAGE_READ)); + radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer, + RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO)); dirty_mask &= ~(1 << buffer_index); } @@ -1747,7 +1763,10 @@ static void r600_emit_sampler_views(struct r600_context *rctx, radeon_emit_array(cs, rview->tex_resource_words, 7); reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rview->tex_resource, - RADEON_USAGE_READ); + RADEON_USAGE_READ, + rview->tex_resource->b.b.nr_samples > 1 ? + RADEON_PRIO_SHADER_TEXTURE_MSAA : + RADEON_PRIO_SHADER_TEXTURE_RO); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, reloc); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); @@ -1874,7 +1893,8 @@ static void r600_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600 r600_write_context_reg(cs, R_028894_SQ_PGM_START_FS, shader->offset >> 8); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); - radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, shader->buffer, RADEON_USAGE_READ)); + radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, shader->buffer, + RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA)); } static void r600_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a) @@ -1923,7 +1943,9 @@ static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a) r600_write_config_reg(cs, R_008C40_SQ_ESGS_RING_BASE, (r600_resource_va(screen, &rbuffer->b.b)) >> 8); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); - radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer, RADEON_USAGE_READWRITE)); + radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer, + RADEON_USAGE_READWRITE, + RADEON_PRIO_SHADER_RESOURCE_RW)); r600_write_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, state->esgs_ring.buffer_size >> 8); @@ -1931,7 +1953,9 @@ static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a) r600_write_config_reg(cs, R_008C48_SQ_GSVS_RING_BASE, (r600_resource_va(screen, &rbuffer->b.b)) >> 8); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); - radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer, RADEON_USAGE_READWRITE)); + radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer, + RADEON_USAGE_READWRITE, + RADEON_PRIO_SHADER_RESOURCE_RW)); r600_write_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, state->gsvs_ring.buffer_size >> 8); } else { @@ -2846,8 +2870,10 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx, cheight = cheight > copy_height ? copy_height : cheight; size = (cheight * pitch) >> 2; /* emit reloc before writting cs so that cs is always in consistent state */ - r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, &rsrc->resource, RADEON_USAGE_READ); - r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, &rdst->resource, RADEON_USAGE_WRITE); + r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, &rsrc->resource, RADEON_USAGE_READ, + RADEON_PRIO_MIN); + r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, &rdst->resource, RADEON_USAGE_WRITE, + RADEON_PRIO_MIN); cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, 1, 0, size); cs->buf[cs->cdw++] = base >> 8; cs->buf[cs->cdw++] = (detile << 31) | (array_mode << 27) | diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 73faffb93d3..96c18808ea8 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -1463,7 +1463,9 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info cs->buf[cs->cdw++] = info.count; cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA; cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing); - cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)ib.buffer, RADEON_USAGE_READ); + cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, + (struct r600_resource*)ib.buffer, + RADEON_USAGE_READ, RADEON_PRIO_MIN); } } else { if (info.count_from_stream_output) { @@ -1480,7 +1482,9 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info cs->buf[cs->cdw++] = 0; /* unused */ cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); - cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, t->buf_filled_size, RADEON_USAGE_READ); + cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, + t->buf_filled_size, RADEON_USAGE_READ, + RADEON_PRIO_MIN); } cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, rctx->b.predicate_drawing); @@ -1724,7 +1728,8 @@ void r600_emit_shader(struct r600_context *rctx, struct r600_atom *a) r600_emit_command_buffer(cs, &shader->command_buffer); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); - radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, shader->bo, RADEON_USAGE_READ)); + radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, shader->bo, + RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA)); } unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format, @@ -2401,7 +2406,8 @@ void r600_trace_emit(struct r600_context *rctx) uint32_t reloc; va = r600_resource_va(&rscreen->b.b, (void*)rscreen->b.trace_bo); - reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rscreen->b.trace_bo, RADEON_USAGE_READWRITE); + reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rscreen->b.trace_bo, + RADEON_USAGE_READWRITE, RADEON_PRIO_MIN); radeon_emit(cs, PKT3(PKT3_MEM_WRITE, 3, 0)); radeon_emit(cs, va & 0xFFFFFFFFUL); radeon_emit(cs, (va >> 32UL) & 0xFFUL); diff --git a/src/gallium/drivers/radeon/r600_cs.h b/src/gallium/drivers/radeon/r600_cs.h index 45dc76c433b..5588592eb02 100644 --- a/src/gallium/drivers/radeon/r600_cs.h +++ b/src/gallium/drivers/radeon/r600_cs.h @@ -45,7 +45,8 @@ static INLINE uint64_t r600_resource_va(struct pipe_screen *screen, static INLINE unsigned r600_context_bo_reloc(struct r600_common_context *rctx, struct r600_ring *ring, struct r600_resource *rbo, - enum radeon_bo_usage usage) + enum radeon_bo_usage usage, + enum radeon_bo_priority priority) { assert(usage); @@ -64,16 +65,17 @@ static INLINE unsigned r600_context_bo_reloc(struct r600_common_context *rctx, } } return rctx->ws->cs_add_reloc(ring->cs, rbo->cs_buf, usage, - rbo->domains, RADEON_PRIO_MIN) * 4; + rbo->domains, priority) * 4; } static INLINE void r600_emit_reloc(struct r600_common_context *rctx, struct r600_ring *ring, struct r600_resource *rbo, - enum radeon_bo_usage usage) + enum radeon_bo_usage usage, + enum radeon_bo_priority priority) { struct radeon_winsys_cs *cs = ring->cs; bool has_vm = ((struct r600_common_screen*)rctx->b.screen)->info.r600_virtual_address; - unsigned reloc = r600_context_bo_reloc(rctx, ring, rbo, usage); + unsigned reloc = r600_context_bo_reloc(rctx, ring, rbo, usage, priority); if (!has_vm) { radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c index 915d37ade9e..a5d75717797 100644 --- a/src/gallium/drivers/radeon/r600_query.c +++ b/src/gallium/drivers/radeon/r600_query.c @@ -206,7 +206,8 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q default: assert(0); } - r600_emit_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE); + r600_emit_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE, + RADEON_PRIO_MIN); if (!r600_is_timer_query(query->type)) { ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw; @@ -271,7 +272,8 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que default: assert(0); } - r600_emit_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE); + r600_emit_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE, + RADEON_PRIO_MIN); query->buffer.results_end += query->result_size; @@ -320,7 +322,8 @@ static void r600_emit_query_predication(struct r600_common_context *ctx, struct radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0)); radeon_emit(cs, (va + results_base) & 0xFFFFFFFFUL); radeon_emit(cs, op | (((va + results_base) >> 32UL) & 0xFF)); - r600_emit_reloc(ctx, &ctx->rings.gfx, qbuf->buf, RADEON_USAGE_READ); + r600_emit_reloc(ctx, &ctx->rings.gfx, qbuf->buf, RADEON_USAGE_READ, + RADEON_PRIO_MIN); results_base += query->result_size; /* set CONTINUE bit for all packets except the first */ @@ -817,7 +820,7 @@ void r600_query_init_backend_mask(struct r600_common_context *ctx) radeon_emit(cs, va); radeon_emit(cs, va >> 32); - r600_emit_reloc(ctx, &ctx->rings.gfx, buffer, RADEON_USAGE_WRITE); + r600_emit_reloc(ctx, &ctx->rings.gfx, buffer, RADEON_USAGE_WRITE, RADEON_PRIO_MIN); /* analyze results */ results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_READ); diff --git a/src/gallium/drivers/radeon/r600_streamout.c b/src/gallium/drivers/radeon/r600_streamout.c index b6cf8584e2a..a556e3ae0e0 100644 --- a/src/gallium/drivers/radeon/r600_streamout.c +++ b/src/gallium/drivers/radeon/r600_streamout.c @@ -244,7 +244,7 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r radeon_emit(cs, va >> 8); /* BUFFER_BASE */ r600_emit_reloc(rctx, &rctx->rings.gfx, r600_resource(t[i]->b.buffer), - RADEON_USAGE_WRITE); + RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RESOURCE_RW); /* R7xx requires this packet after updating BUFFER_BASE. * Without this, R7xx locks up. */ @@ -254,7 +254,7 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r radeon_emit(cs, va >> 8); r600_emit_reloc(rctx, &rctx->rings.gfx, r600_resource(t[i]->b.buffer), - RADEON_USAGE_WRITE); + RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RESOURCE_RW); } } @@ -273,7 +273,7 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r radeon_emit(cs, va >> 32); /* src address hi */ r600_emit_reloc(rctx, &rctx->rings.gfx, t[i]->buf_filled_size, - RADEON_USAGE_READ); + RADEON_USAGE_READ, RADEON_PRIO_MIN); } else { /* Start from the beginning. */ radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); @@ -318,7 +318,7 @@ void r600_emit_streamout_end(struct r600_common_context *rctx) radeon_emit(cs, 0); /* unused */ r600_emit_reloc(rctx, &rctx->rings.gfx, t[i]->buf_filled_size, - RADEON_USAGE_WRITE); + RADEON_USAGE_WRITE, RADEON_PRIO_MIN); } if (rctx->chip_class >= EVERGREEN) { diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index ead5862df39..e16feab5234 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -169,7 +169,7 @@ static void si_launch_grid( (struct pipe_resource*)kernel_args_buffer); kernel_args_va += kernel_args_offset; - si_pm4_add_bo(pm4, kernel_args_buffer, RADEON_USAGE_READ); + si_pm4_add_bo(pm4, kernel_args_buffer, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, kernel_args_va); si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 4, S_008F04_BASE_ADDRESS_HI (kernel_args_va >> 32) | S_008F04_STRIDE(0)); @@ -192,7 +192,7 @@ static void si_launch_grid( if (!buffer) { continue; } - si_pm4_add_bo(pm4, buffer, RADEON_USAGE_READWRITE); + si_pm4_add_bo(pm4, buffer, RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RESOURCE_RW); } /* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID @@ -209,7 +209,7 @@ static void si_launch_grid( } shader_va = r600_resource_va(ctx->screen, (void *)shader->bo); - si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ); + si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, (shader_va >> 8) & 0xffffffff); si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40); diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 6b0ff91c2bc..bf2206dc1bc 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -129,7 +129,8 @@ static void si_init_descriptors(struct si_context *sctx, PIPE_USAGE_DEFAULT, SI_NUM_CONTEXTS * desc->context_size); - r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, desc->buffer, RADEON_USAGE_READWRITE); + r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, desc->buffer, + RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA); va = r600_resource_va(sctx->b.b.screen, &desc->buffer->b.b); /* We don't check for CS space here, because this should be called @@ -301,6 +302,17 @@ static void si_release_sampler_views(struct si_sampler_views *views) si_release_descriptors(&views->desc); } +static enum radeon_bo_priority si_get_resource_ro_priority(struct r600_resource *res) +{ + if (res->b.b.target == PIPE_BUFFER) + return RADEON_PRIO_SHADER_BUFFER_RO; + + if (res->b.b.nr_samples > 1) + return RADEON_PRIO_SHADER_TEXTURE_MSAA; + + return RADEON_PRIO_SHADER_TEXTURE_RO; +} + static void si_sampler_views_begin_new_cs(struct si_context *sctx, struct si_sampler_views *views) { @@ -312,10 +324,13 @@ static void si_sampler_views_begin_new_cs(struct si_context *sctx, struct si_pipe_sampler_view *rview = (struct si_pipe_sampler_view*)views->views[i]; - r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, rview->resource, RADEON_USAGE_READ); + r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, + rview->resource, RADEON_USAGE_READ, + si_get_resource_ro_priority(rview->resource)); } - r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, views->desc.buffer, RADEON_USAGE_READWRITE); + r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, views->desc.buffer, + RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA); si_emit_shader_pointer(sctx, &views->desc); } @@ -333,7 +348,9 @@ void si_set_sampler_view(struct si_context *sctx, unsigned shader, struct si_pipe_sampler_view *rview = (struct si_pipe_sampler_view*)view; - r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, rview->resource, RADEON_USAGE_READ); + r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, + rview->resource, RADEON_USAGE_READ, + si_get_resource_ro_priority(rview->resource)); pipe_sampler_view_reference(&views->views[slot], view); views->desc_data[slot] = view_desc; @@ -361,12 +378,14 @@ static void si_init_buffer_resources(struct si_context *sctx, struct si_buffer_resources *buffers, unsigned num_buffers, unsigned shader, unsigned shader_userdata_index, - enum radeon_bo_usage shader_usage) + enum radeon_bo_usage shader_usage, + enum radeon_bo_priority priority) { int i; buffers->num_buffers = num_buffers; buffers->shader_usage = shader_usage; + buffers->priority = priority; buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*)); buffers->desc_storage = CALLOC(num_buffers, sizeof(uint32_t) * 4); @@ -408,11 +427,12 @@ static void si_buffer_resources_begin_new_cs(struct si_context *sctx, r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)buffers->buffers[i], - buffers->shader_usage); + buffers->shader_usage, buffers->priority); } r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, - buffers->desc.buffer, RADEON_USAGE_READWRITE); + buffers->desc.buffer, RADEON_USAGE_READWRITE, + RADEON_PRIO_SHADER_DATA); si_emit_shader_pointer(sctx, &buffers->desc); } @@ -495,7 +515,8 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s buffers->buffers[slot] = buffer; r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, - (struct r600_resource*)buffer, buffers->shader_usage); + (struct r600_resource*)buffer, + buffers->shader_usage, buffers->priority); buffers->desc.enabled_mask |= 1 << slot; } else { /* Clear the descriptor. */ @@ -588,7 +609,7 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot, pipe_resource_reference(&buffers->buffers[slot], input->buffer); r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)input->buffer, - buffers->shader_usage); + buffers->shader_usage, buffers->priority); buffers->desc.enabled_mask |= 1 << slot; } else { /* Clear the descriptor. */ @@ -643,7 +664,7 @@ static void si_set_streamout_targets(struct pipe_context *ctx, buffer); r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)buffer, - buffers->shader_usage); + buffers->shader_usage, buffers->priority); buffers->desc.enabled_mask |= 1 << bufidx; } else { /* Clear the descriptor and unset the resource. */ @@ -731,7 +752,8 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource old_va, buf); r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, - rbuffer, buffers->shader_usage); + rbuffer, buffers->shader_usage, + buffers->priority); buffers->desc.dirty_mask |= 1 << i; found = true; @@ -765,7 +787,8 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource old_va, buf); r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, - rbuffer, buffers->shader_usage); + rbuffer, buffers->shader_usage, + buffers->priority); buffers->desc.dirty_mask |= 1 << i; found = true; @@ -790,7 +813,8 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource old_va, buf); r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, - rbuffer, RADEON_USAGE_READ); + rbuffer, RADEON_USAGE_READ, + RADEON_PRIO_SHADER_BUFFER_RO); views->desc.dirty_mask |= 1 << i; found = true; @@ -853,7 +877,8 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst, /* This must be done after need_cs_space. */ r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, - (struct r600_resource*)dst, RADEON_USAGE_WRITE); + (struct r600_resource*)dst, RADEON_USAGE_WRITE, + RADEON_PRIO_MIN); /* Flush the caches for the first copy only. * Also wait for the previous CP DMA operations. */ @@ -927,8 +952,10 @@ void si_copy_buffer(struct si_context *sctx, } /* This must be done after r600_need_cs_space. */ - r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)src, RADEON_USAGE_READ); - r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)dst, RADEON_USAGE_WRITE); + r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)src, + RADEON_USAGE_READ, RADEON_PRIO_MIN); + r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)dst, + RADEON_USAGE_WRITE, RADEON_PRIO_MIN); si_emit_cp_dma_copy_buffer(sctx, dst_offset, src_offset, byte_count, sync_flags); @@ -954,12 +981,12 @@ void si_init_all_descriptors(struct si_context *sctx) for (i = 0; i < SI_NUM_SHADERS; i++) { si_init_buffer_resources(sctx, &sctx->const_buffers[i], NUM_CONST_BUFFERS, i, SI_SGPR_CONST, - RADEON_USAGE_READ); + RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO); si_init_buffer_resources(sctx, &sctx->rw_buffers[i], i == PIPE_SHADER_VERTEX ? SI_RW_SO + 4 : SI_RW_SO, i, SI_SGPR_RW_BUFFERS, - RADEON_USAGE_READWRITE); + RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RESOURCE_RW); si_init_sampler_views(sctx, &sctx->samplers[i].views, i); diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c index 0a5673ba917..082da85e03d 100644 --- a/src/gallium/drivers/radeonsi/si_pm4.c +++ b/src/gallium/drivers/radeonsi/si_pm4.c @@ -92,13 +92,15 @@ void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val) void si_pm4_add_bo(struct si_pm4_state *state, struct r600_resource *bo, - enum radeon_bo_usage usage) + enum radeon_bo_usage usage, + enum radeon_bo_priority priority) { unsigned idx = state->nbo++; assert(idx < SI_PM4_MAX_BO); r600_resource_reference(&state->bo[idx], bo); state->bo_usage[idx] = usage; + state->bo_priority[idx] = priority; } void si_pm4_sh_data_begin(struct si_pm4_state *state) @@ -215,7 +217,7 @@ void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state) struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; for (int i = 0; i < state->nbo; ++i) { r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, state->bo[i], - state->bo_usage[i]); + state->bo_usage[i], state->bo_priority[i]); } memcpy(&cs->buf[cs->cdw], state->pm4, state->ndw * 4); diff --git a/src/gallium/drivers/radeonsi/si_pm4.h b/src/gallium/drivers/radeonsi/si_pm4.h index e1d0cac3b73..a71958601aa 100644 --- a/src/gallium/drivers/radeonsi/si_pm4.h +++ b/src/gallium/drivers/radeonsi/si_pm4.h @@ -57,6 +57,7 @@ struct si_pm4_state unsigned nbo; struct r600_resource *bo[SI_PM4_MAX_BO]; enum radeon_bo_usage bo_usage[SI_PM4_MAX_BO]; + enum radeon_bo_priority bo_priority[SI_PM4_MAX_BO]; /* relocs for shader data */ unsigned nrelocs; @@ -72,7 +73,8 @@ void si_pm4_cmd_end(struct si_pm4_state *state, bool predicate); void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val); void si_pm4_add_bo(struct si_pm4_state *state, struct r600_resource *bo, - enum radeon_bo_usage usage); + enum radeon_bo_usage usage, + enum radeon_bo_priority priority); void si_pm4_sh_data_begin(struct si_pm4_state *state); void si_pm4_sh_data_add(struct si_pm4_state *state, uint32_t dw); diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 7d4f3c7409d..7734e35876e 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2064,7 +2064,9 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, sctx->fb_compressed_cb_mask |= 1 << i; } - si_pm4_add_bo(pm4, &rtex->resource, RADEON_USAGE_READWRITE); + si_pm4_add_bo(pm4, &rtex->resource, RADEON_USAGE_READWRITE, + rtex->surface.nsamples > 1 ? RADEON_PRIO_COLOR_BUFFER_MSAA : + RADEON_PRIO_COLOR_BUFFER); si_pm4_set_reg(pm4, R_028C60_CB_COLOR0_BASE + i * 0x3C, surf->cb_color_base); si_pm4_set_reg(pm4, R_028C64_CB_COLOR0_PITCH + i * 0x3C, surf->cb_color_pitch); si_pm4_set_reg(pm4, R_028C68_CB_COLOR0_SLICE + i * 0x3C, surf->cb_color_slice); @@ -2101,7 +2103,8 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, } if (surf->db_htile_data_base) { - si_pm4_add_bo(pm4, rtex->htile_buffer, RADEON_USAGE_READWRITE); + si_pm4_add_bo(pm4, rtex->htile_buffer, RADEON_USAGE_READWRITE, + RADEON_PRIO_DEPTH_META); } si_pm4_set_reg(pm4, R_028008_DB_DEPTH_VIEW, surf->db_depth_view); si_pm4_set_reg(pm4, R_028014_DB_HTILE_DATA_BASE, surf->db_htile_data_base); @@ -2109,7 +2112,9 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, si_pm4_set_reg(pm4, R_02803C_DB_DEPTH_INFO, surf->db_depth_info); si_pm4_set_reg(pm4, R_028040_DB_Z_INFO, surf->db_z_info); si_pm4_set_reg(pm4, R_028044_DB_STENCIL_INFO, surf->db_stencil_info); - si_pm4_add_bo(pm4, &rtex->resource, RADEON_USAGE_READWRITE); + si_pm4_add_bo(pm4, &rtex->resource, RADEON_USAGE_READWRITE, + rtex->surface.nsamples > 1 ? RADEON_PRIO_DEPTH_BUFFER_MSAA : + RADEON_PRIO_DEPTH_BUFFER); si_pm4_set_reg(pm4, R_028048_DB_Z_READ_BASE, surf->db_depth_base); si_pm4_set_reg(pm4, R_02804C_DB_STENCIL_READ_BASE, surf->db_stencil_base); si_pm4_set_reg(pm4, R_028050_DB_Z_WRITE_BASE, surf->db_depth_base); @@ -2863,7 +2868,8 @@ static void si_set_sampler_states(struct si_context *sctx, if (sctx->b.chip_class >= CIK) si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, va_offset >> 40); sctx->b.ws->buffer_unmap(sctx->border_color_table->cs_buf); - si_pm4_add_bo(pm4, sctx->border_color_table, RADEON_USAGE_READ); + si_pm4_add_bo(pm4, sctx->border_color_table, RADEON_USAGE_READ, + RADEON_PRIO_SHADER_DATA); } memcpy(samplers->samplers, states, sizeof(void*) * count); diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index fbc0d231226..0bd4e0551d2 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -162,6 +162,7 @@ struct si_buffer_resources { struct si_descriptors desc; unsigned num_buffers; enum radeon_bo_usage shader_usage; /* READ, WRITE, or READWRITE */ + enum radeon_bo_priority priority; struct pipe_resource **buffers; /* this has num_buffers elements */ uint32_t *desc_storage; /* this has num_buffers*4 elements */ uint32_t **desc_data; /* an array of pointers pointing to desc_storage */ diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 480e3f8a520..7321d1dd385 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -54,7 +54,7 @@ static void si_pipe_shader_es(struct pipe_context *ctx, struct si_pipe_shader *s return; va = r600_resource_va(ctx->screen, (void *)shader->bo); - si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ); + si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); vgpr_comp_cnt = shader->shader.uses_instanceid ? 3 : 0; @@ -129,7 +129,7 @@ static void si_pipe_shader_gs(struct pipe_context *ctx, struct si_pipe_shader *s si_pm4_set_reg(pm4, R_028B5C_VGT_GS_VERT_ITEMSIZE, gs_vert_itemsize); va = r600_resource_va(ctx->screen, (void *)shader->bo); - si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ); + si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8); si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, va >> 40); @@ -166,7 +166,7 @@ static void si_pipe_shader_vs(struct pipe_context *ctx, struct si_pipe_shader *s return; va = r600_resource_va(ctx->screen, (void *)shader->bo); - si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ); + si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); vgpr_comp_cnt = shader->shader.uses_instanceid ? 3 : 0; @@ -315,7 +315,7 @@ static void si_pipe_shader_ps(struct pipe_context *ctx, struct si_pipe_shader *s si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, shader->cb_shader_mask); va = r600_resource_va(ctx->screen, (void *)shader->bo); - si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ); + si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8); si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, va >> 40); @@ -728,7 +728,8 @@ static void si_vertex_buffer_update(struct si_context *sctx) si_pm4_sh_data_add(pm4, sctx->vertex_elements->rsrc_word3[i]); if (!bound[ve->vertex_buffer_index]) { - si_pm4_add_bo(pm4, rbuffer, RADEON_USAGE_READ); + si_pm4_add_bo(pm4, rbuffer, RADEON_USAGE_READ, + RADEON_PRIO_SHADER_BUFFER_RO); bound[ve->vertex_buffer_index] = true; } } @@ -784,7 +785,8 @@ static void si_state_draw(struct si_context *sctx, si_pm4_cmd_add(pm4, va >> 32UL); /* src address hi */ si_pm4_cmd_add(pm4, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2); si_pm4_cmd_add(pm4, 0); /* unused */ - si_pm4_add_bo(pm4, t->buf_filled_size, RADEON_USAGE_READ); + si_pm4_add_bo(pm4, t->buf_filled_size, RADEON_USAGE_READ, + RADEON_PRIO_MIN); si_pm4_cmd_end(pm4, true); } @@ -810,7 +812,8 @@ static void si_state_draw(struct si_context *sctx, va = r600_resource_va(&sctx->screen->b.b, ib->buffer); va += ib->offset; - si_pm4_add_bo(pm4, (struct r600_resource *)ib->buffer, RADEON_USAGE_READ); + si_pm4_add_bo(pm4, (struct r600_resource *)ib->buffer, RADEON_USAGE_READ, + RADEON_PRIO_MIN); si_cmd_draw_index_2(pm4, max_size, va, info->count, V_0287F0_DI_SRC_SEL_DMA, sctx->b.predicate_drawing); -- 2.30.2