From 2edb0606397d16fe88d7b488285df379aaae5893 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 26 Sep 2015 23:18:55 +0200 Subject: [PATCH] gallium/radeon: tell the winsys the exact resource binding types MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Use the priority flags and expand them. This information will be used for debugging. Reviewed-by: Michel Dänzer --- src/gallium/drivers/r300/r300_emit.c | 10 +-- src/gallium/drivers/r600/evergreen_compute.c | 4 +- .../drivers/r600/evergreen_hw_context.c | 6 +- src/gallium/drivers/r600/evergreen_state.c | 25 ++++--- src/gallium/drivers/r600/r600_hw_context.c | 8 +-- src/gallium/drivers/r600/r600_state.c | 23 ++++--- src/gallium/drivers/r600/r600_state_common.c | 15 +++-- src/gallium/drivers/radeon/r600_pipe_common.h | 12 ++++ src/gallium/drivers/radeon/r600_query.c | 9 +-- src/gallium/drivers/radeon/r600_streamout.c | 8 +-- src/gallium/drivers/radeon/radeon_uvd.c | 2 +- src/gallium/drivers/radeon/radeon_vce.c | 2 +- src/gallium/drivers/radeon/radeon_winsys.h | 65 +++++++++++++++---- src/gallium/drivers/radeonsi/cik_sdma.c | 8 +-- src/gallium/drivers/radeonsi/si_compute.c | 8 +-- src/gallium/drivers/radeonsi/si_cp_dma.c | 6 +- src/gallium/drivers/radeonsi/si_descriptors.c | 37 ++++------- src/gallium/drivers/radeonsi/si_dma.c | 8 +-- src/gallium/drivers/radeonsi/si_pm4.c | 3 +- src/gallium/drivers/radeonsi/si_state.c | 6 +- src/gallium/drivers/radeonsi/si_state_draw.c | 10 +-- .../drivers/radeonsi/si_state_shaders.c | 12 ++-- src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 8 +-- src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 11 ++-- 24 files changed, 175 insertions(+), 131 deletions(-) diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 4c9971e5128..ecc4307a670 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -1357,19 +1357,19 @@ validate: tex = r300_resource(texstate->sampler_views[i]->base.texture); r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, RADEON_USAGE_READ, - tex->domain, RADEON_PRIO_SHADER_TEXTURE_RO); + tex->domain, RADEON_PRIO_SAMPLER_TEXTURE); } } /* ...occlusion query buffer... */ if (r300->query_current) r300->rws->cs_add_reloc(r300->cs, r300->query_current->cs_buf, RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT, - RADEON_PRIO_MIN); + RADEON_PRIO_QUERY); /* ...vertex buffer for SWTCL path... */ if (r300->vbo_cs) r300->rws->cs_add_reloc(r300->cs, r300->vbo_cs, RADEON_USAGE_READ, RADEON_DOMAIN_GTT, - RADEON_PRIO_MIN); + RADEON_PRIO_VERTEX_BUFFER); /* ...vertex buffers for HWTCL path... */ if (do_validate_vertex_buffers && r300->vertex_arrays_dirty) { struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; @@ -1385,7 +1385,7 @@ validate: r300->rws->cs_add_reloc(r300->cs, r300_resource(buf)->cs_buf, RADEON_USAGE_READ, r300_resource(buf)->domain, - RADEON_PRIO_SHADER_BUFFER_RO); + RADEON_PRIO_SAMPLER_BUFFER); } } /* ...and index buffer for HWTCL path. */ @@ -1393,7 +1393,7 @@ validate: r300->rws->cs_add_reloc(r300->cs, r300_resource(index_buffer)->cs_buf, RADEON_USAGE_READ, r300_resource(index_buffer)->domain, - RADEON_PRIO_MIN); + RADEON_PRIO_INDEX_BUFFER); /* Now do the validation (flush is called inside cs_validate on failure). */ if (!r300->rws->cs_validate(r300->cs)) { diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 33009c16f68..6f2b7ba0db3 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -442,7 +442,7 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, unsigned reloc = radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.gfx, (struct r600_resource*)cb->base.texture, RADEON_USAGE_READWRITE, - RADEON_PRIO_SHADER_RESOURCE_RW); + RADEON_PRIO_SHADER_RW_BUFFER); radeon_compute_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 7); radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */ @@ -566,7 +566,7 @@ void evergreen_emit_cs_shader( radeon_emit(cs, PKT3C(PKT3_NOP, 0, 0)); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, code_bo, RADEON_USAGE_READ, - RADEON_PRIO_SHADER_DATA)); + RADEON_PRIO_USER_SHADER)); } static void evergreen_launch_grid( diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c index 29bdd9daddb..89abe92cbb4 100644 --- a/src/gallium/drivers/r600/evergreen_hw_context.c +++ b/src/gallium/drivers/r600/evergreen_hw_context.c @@ -65,9 +65,9 @@ void evergreen_dma_copy_buffer(struct r600_context *rctx, csize = size < EG_DMA_COPY_MAX_SIZE ? size : EG_DMA_COPY_MAX_SIZE; /* emit reloc before writing cs so that cs is always in consistent state */ radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ, - RADEON_PRIO_MIN); + RADEON_PRIO_SDMA_BUFFER); radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE, - RADEON_PRIO_MIN); + RADEON_PRIO_SDMA_BUFFER); cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, csize); cs->buf[cs->cdw++] = dst_offset & 0xffffffff; cs->buf[cs->cdw++] = src_offset & 0xffffffff; @@ -131,7 +131,7 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx, /* This must be done after r600_need_cs_space. */ reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)dst, RADEON_USAGE_WRITE, - RADEON_PRIO_MIN); + RADEON_PRIO_CP_DMA); radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0)); radeon_emit(cs, clear_value); /* DATA [31:0] */ diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 52f4dc81d9f..a5caa0dac2b 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1584,7 +1584,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) { cmask_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, tex->cmask_buffer, RADEON_USAGE_READWRITE, - RADEON_PRIO_COLOR_META); + RADEON_PRIO_CMASK); } else { cmask_reloc = reloc; } @@ -1767,7 +1767,7 @@ static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, a->rsurf->db_preload_control); radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base); reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer, - RADEON_USAGE_READWRITE, RADEON_PRIO_DEPTH_META); + RADEON_USAGE_READWRITE, RADEON_PRIO_HTILE); cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); cs->buf[cs->cdw++] = reloc_idx; } else { @@ -1881,7 +1881,7 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx, radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer, - RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO)); + RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER)); } state->dirty_mask = 0; } @@ -1929,7 +1929,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx, radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer, - RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO)); + RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER)); radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags); radeon_emit(cs, (buffer_id_base + buffer_index) * 8); @@ -1954,7 +1954,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx, radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer, - RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO)); + RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER)); dirty_mask &= ~(1 << buffer_index); } @@ -2018,9 +2018,7 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx, reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rview->tex_resource, RADEON_USAGE_READ, - rview->tex_resource->b.b.nr_samples > 1 ? - RADEON_PRIO_SHADER_TEXTURE_MSAA : - RADEON_PRIO_SHADER_TEXTURE_RO); + r600_get_sampler_view_priority(rview->tex_resource)); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); radeon_emit(cs, reloc); @@ -2140,7 +2138,8 @@ static void evergreen_emit_vertex_fetch_shader(struct r600_context *rctx, struct (shader->buffer->gpu_address + shader->offset) >> 8); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->buffer, - RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA)); + RADEON_USAGE_READ, + RADEON_PRIO_INTERNAL_SHADER)); } static void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a) @@ -2199,7 +2198,7 @@ static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer, RADEON_USAGE_READWRITE, - RADEON_PRIO_SHADER_RESOURCE_RW)); + RADEON_PRIO_RINGS_STREAMOUT)); radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, state->esgs_ring.buffer_size >> 8); @@ -2209,7 +2208,7 @@ static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer, RADEON_USAGE_READWRITE, - RADEON_PRIO_SHADER_RESOURCE_RW)); + RADEON_PRIO_RINGS_STREAMOUT)); radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, state->gsvs_ring.buffer_size >> 8); } else { @@ -3330,9 +3329,9 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx, size = (cheight * pitch) / 4; /* emit reloc before writing cs so that cs is always in consistent state */ radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rsrc->resource, - RADEON_USAGE_READ, RADEON_PRIO_MIN); + RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE); radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rdst->resource, - RADEON_USAGE_WRITE, RADEON_PRIO_MIN); + RADEON_USAGE_WRITE, RADEON_PRIO_SDMA_TEXTURE); cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, size); cs->buf[cs->cdw++] = base >> 8; cs->buf[cs->cdw++] = (detile << 31) | (array_mode << 27) | diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index cf715976ab2..6f11366e606 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -419,9 +419,9 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx, /* This must be done after r600_need_cs_space. */ src_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)src, - RADEON_USAGE_READ, RADEON_PRIO_MIN); + RADEON_USAGE_READ, RADEON_PRIO_CP_DMA); dst_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)dst, - RADEON_USAGE_WRITE, RADEON_PRIO_MIN); + RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA); radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0)); radeon_emit(cs, src_offset); /* SRC_ADDR_LO [31:0] */ @@ -472,9 +472,9 @@ void r600_dma_copy_buffer(struct r600_context *rctx, csize = size < R600_DMA_COPY_MAX_SIZE_DW ? size : R600_DMA_COPY_MAX_SIZE_DW; /* emit reloc before writing cs so that cs is always in consistent state */ radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ, - RADEON_PRIO_MIN); + RADEON_PRIO_SDMA_BUFFER); radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE, - RADEON_PRIO_MIN); + RADEON_PRIO_SDMA_BUFFER); cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, 0, 0, csize); cs->buf[cs->cdw++] = dst_offset & 0xfffffffc; cs->buf[cs->cdw++] = src_offset & 0xfffffffc; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 7084c5f359b..4b171894f5c 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1605,7 +1605,7 @@ static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom radeon_set_context_reg(cs, R_028D24_DB_HTILE_SURFACE, a->rsurf->db_htile_surface); radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base); reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer, - RADEON_USAGE_READWRITE, RADEON_PRIO_DEPTH_META); + RADEON_USAGE_READWRITE, RADEON_PRIO_HTILE); cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); cs->buf[cs->cdw++] = reloc_idx; } else { @@ -1720,7 +1720,7 @@ static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer, - RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO)); + RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER)); } } @@ -1753,7 +1753,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx, radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer, - RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO)); + RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER)); radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0)); radeon_emit(cs, (buffer_id_base + buffer_index) * 7); @@ -1769,7 +1769,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx, radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer, - RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO)); + RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER)); dirty_mask &= ~(1 << buffer_index); } @@ -1821,9 +1821,7 @@ static void r600_emit_sampler_views(struct r600_context *rctx, reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rview->tex_resource, RADEON_USAGE_READ, - rview->tex_resource->b.b.nr_samples > 1 ? - RADEON_PRIO_SHADER_TEXTURE_MSAA : - RADEON_PRIO_SHADER_TEXTURE_RO); + r600_get_sampler_view_priority(rview->tex_resource)); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, reloc); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); @@ -1945,7 +1943,8 @@ static void r600_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600 radeon_set_context_reg(cs, R_028894_SQ_PGM_START_FS, shader->offset >> 8); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->buffer, - RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA)); + RADEON_USAGE_READ, + RADEON_PRIO_INTERNAL_SHADER)); } static void r600_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a) @@ -1999,7 +1998,7 @@ static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a) radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer, RADEON_USAGE_READWRITE, - RADEON_PRIO_SHADER_RESOURCE_RW)); + RADEON_PRIO_RINGS_STREAMOUT)); radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, state->esgs_ring.buffer_size >> 8); @@ -2008,7 +2007,7 @@ static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a) radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer, RADEON_USAGE_READWRITE, - RADEON_PRIO_SHADER_RESOURCE_RW)); + RADEON_PRIO_RINGS_STREAMOUT)); radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, state->gsvs_ring.buffer_size >> 8); } else { @@ -2914,9 +2913,9 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx, size = (cheight * pitch) / 4; /* emit reloc before writing cs so that cs is always in consistent state */ radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rsrc->resource, RADEON_USAGE_READ, - RADEON_PRIO_MIN); + RADEON_PRIO_SDMA_TEXTURE); radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rdst->resource, RADEON_USAGE_WRITE, - RADEON_PRIO_MIN); + RADEON_PRIO_SDMA_TEXTURE); cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, 1, 0, size); cs->buf[cs->cdw++] = base >> 8; cs->buf[cs->cdw++] = (detile << 31) | (array_mode << 27) | diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index efce852eafa..a16f1c25dcb 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -1683,7 +1683,8 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing); cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)info.indirect, - RADEON_USAGE_READ, RADEON_PRIO_MIN); + RADEON_USAGE_READ, + RADEON_PRIO_DRAW_INDIRECT); } if (info.indexed) { @@ -1712,7 +1713,8 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing); cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)ib.buffer, - RADEON_USAGE_READ, RADEON_PRIO_MIN); + RADEON_USAGE_READ, + RADEON_PRIO_INDEX_BUFFER); } else { uint32_t max_size = (ib.buffer->width0 - ib.offset) / ib.index_size; @@ -1724,7 +1726,8 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing); cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)ib.buffer, - RADEON_USAGE_READ, RADEON_PRIO_MIN); + RADEON_USAGE_READ, + RADEON_PRIO_INDEX_BUFFER); cs->buf[cs->cdw++] = PKT3(EG_PKT3_INDEX_BUFFER_SIZE, 0, rctx->b.predicate_drawing); cs->buf[cs->cdw++] = max_size; @@ -1751,7 +1754,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, t->buf_filled_size, RADEON_USAGE_READ, - RADEON_PRIO_MIN); + RADEON_PRIO_SO_FILLED_SIZE); } if (likely(!info.indirect)) { @@ -1941,7 +1944,7 @@ void r600_emit_shader(struct r600_context *rctx, struct r600_atom *a) r600_emit_command_buffer(cs, &shader->command_buffer); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->bo, - RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA)); + RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER)); } unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format, @@ -2669,7 +2672,7 @@ void r600_trace_emit(struct r600_context *rctx) va = rscreen->b.trace_bo->gpu_address; reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rscreen->b.trace_bo, - RADEON_USAGE_READWRITE, RADEON_PRIO_MIN); + RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE); radeon_emit(cs, PKT3(PKT3_MEM_WRITE, 3, 0)); radeon_emit(cs, va & 0xFFFFFFFFUL); radeon_emit(cs, (va >> 32UL) & 0xFFUL); diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 2df93e54559..0f9b91af315 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -623,6 +623,18 @@ static inline unsigned r600_wavefront_size(enum radeon_family family) } } +static inline enum radeon_bo_priority +r600_get_sampler_view_priority(struct r600_resource *res) +{ + if (res->b.b.target == PIPE_BUFFER) + return RADEON_PRIO_SAMPLER_BUFFER; + + if (res->b.b.nr_samples > 1) + return RADEON_PRIO_SAMPLER_TEXTURE_MSAA; + + return RADEON_PRIO_SAMPLER_TEXTURE; +} + #define COMPUTE_DBG(rscreen, fmt, args...) \ do { \ if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, fmt, ##args); \ diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c index deeae0a6a65..9a5402583f4 100644 --- a/src/gallium/drivers/radeon/r600_query.c +++ b/src/gallium/drivers/radeon/r600_query.c @@ -226,7 +226,7 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q assert(0); } r600_emit_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE, - RADEON_PRIO_MIN); + RADEON_PRIO_QUERY); if (r600_is_timer_query(query->type)) ctx->num_cs_dw_timer_queries_suspend += query->num_cs_dw; @@ -288,7 +288,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que assert(0); } r600_emit_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE, - RADEON_PRIO_MIN); + RADEON_PRIO_QUERY); query->buffer.results_end += query->result_size; @@ -344,7 +344,7 @@ static void r600_emit_query_predication(struct r600_common_context *ctx, struct radeon_emit(cs, va + results_base); radeon_emit(cs, op | (((va + results_base) >> 32) & 0xFF)); r600_emit_reloc(ctx, &ctx->rings.gfx, qbuf->buf, RADEON_USAGE_READ, - RADEON_PRIO_MIN); + RADEON_PRIO_QUERY); results_base += query->result_size; /* set CONTINUE bit for all packets except the first */ @@ -990,7 +990,8 @@ void r600_query_init_backend_mask(struct r600_common_context *ctx) radeon_emit(cs, buffer->gpu_address); radeon_emit(cs, buffer->gpu_address >> 32); - r600_emit_reloc(ctx, &ctx->rings.gfx, buffer, RADEON_USAGE_WRITE, RADEON_PRIO_MIN); + r600_emit_reloc(ctx, &ctx->rings.gfx, buffer, + RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); /* analyze results */ results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_READ); diff --git a/src/gallium/drivers/radeon/r600_streamout.c b/src/gallium/drivers/radeon/r600_streamout.c index 5198f1e041d..33403b572af 100644 --- a/src/gallium/drivers/radeon/r600_streamout.c +++ b/src/gallium/drivers/radeon/r600_streamout.c @@ -217,7 +217,7 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r radeon_emit(cs, va >> 8); /* BUFFER_BASE */ r600_emit_reloc(rctx, &rctx->rings.gfx, r600_resource(t[i]->b.buffer), - RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RESOURCE_RW); + RADEON_USAGE_WRITE, RADEON_PRIO_RINGS_STREAMOUT); /* R7xx requires this packet after updating BUFFER_BASE. * Without this, R7xx locks up. */ @@ -227,7 +227,7 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r radeon_emit(cs, va >> 8); r600_emit_reloc(rctx, &rctx->rings.gfx, r600_resource(t[i]->b.buffer), - RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RESOURCE_RW); + RADEON_USAGE_WRITE, RADEON_PRIO_RINGS_STREAMOUT); } } @@ -245,7 +245,7 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r radeon_emit(cs, va >> 32); /* src address hi */ r600_emit_reloc(rctx, &rctx->rings.gfx, t[i]->buf_filled_size, - RADEON_USAGE_READ, RADEON_PRIO_MIN); + RADEON_USAGE_READ, RADEON_PRIO_SO_FILLED_SIZE); } else { /* Start from the beginning. */ radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); @@ -289,7 +289,7 @@ void r600_emit_streamout_end(struct r600_common_context *rctx) radeon_emit(cs, 0); /* unused */ r600_emit_reloc(rctx, &rctx->rings.gfx, t[i]->buf_filled_size, - RADEON_USAGE_WRITE, RADEON_PRIO_MIN); + RADEON_USAGE_WRITE, RADEON_PRIO_SO_FILLED_SIZE); /* Zero the buffer size. The counters (primitives generated, * primitives emitted) may be enabled even if there is not diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c index 81f3f45db9f..4c59885eecf 100644 --- a/src/gallium/drivers/radeon/radeon_uvd.c +++ b/src/gallium/drivers/radeon/radeon_uvd.c @@ -111,7 +111,7 @@ static void send_cmd(struct ruvd_decoder *dec, unsigned cmd, int reloc_idx; reloc_idx = dec->ws->cs_add_reloc(dec->cs, cs_buf, usage, domain, - RADEON_PRIO_MIN); + RADEON_PRIO_UVD); if (!dec->use_legacy) { uint64_t addr; addr = dec->ws->buffer_get_virtual_address(cs_buf); diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c index 7eab974a3df..0e01e91d02b 100644 --- a/src/gallium/drivers/radeon/radeon_vce.c +++ b/src/gallium/drivers/radeon/radeon_vce.c @@ -516,7 +516,7 @@ void rvce_add_buffer(struct rvce_encoder *enc, struct radeon_winsys_cs_handle *b { int reloc_idx; - reloc_idx = enc->ws->cs_add_reloc(enc->cs, buf, usage, domain, RADEON_PRIO_MIN); + reloc_idx = enc->ws->cs_add_reloc(enc->cs, buf, usage, domain, RADEON_PRIO_VCE); if (enc->use_vm) { uint64_t addr; addr = enc->ws->buffer_get_virtual_address(buf); diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index 00accd5b3e6..8b17281efe7 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -178,20 +178,59 @@ enum radeon_value_id { RADEON_GPU_RESET_COUNTER, /* DRM 2.43.0 */ }; +/* Each group of four has the same priority. */ enum radeon_bo_priority { - RADEON_PRIO_MIN, - RADEON_PRIO_SHADER_DATA, /* shader code, resource descriptors */ - RADEON_PRIO_SHADER_BUFFER_RO, /* read-only */ - RADEON_PRIO_SHADER_TEXTURE_RO, /* read-only */ - RADEON_PRIO_SHADER_RESOURCE_RW, /* buffers, textures, streamout, GS rings, RATs; read/write */ - RADEON_PRIO_COLOR_BUFFER, - RADEON_PRIO_DEPTH_BUFFER, - RADEON_PRIO_SHADER_TEXTURE_MSAA, - RADEON_PRIO_COLOR_BUFFER_MSAA, - RADEON_PRIO_DEPTH_BUFFER_MSAA, - RADEON_PRIO_COLOR_META, - RADEON_PRIO_DEPTH_META, - RADEON_PRIO_MAX /* must be <= 15 */ + RADEON_PRIO_FENCE = 0, + RADEON_PRIO_TRACE, + RADEON_PRIO_SO_FILLED_SIZE, + RADEON_PRIO_QUERY, + + RADEON_PRIO_IB1 = 4, /* main IB submitted to the kernel */ + RADEON_PRIO_IB2, /* IB executed with INDIRECT_BUFFER */ + RADEON_PRIO_DRAW_INDIRECT, + RADEON_PRIO_INDEX_BUFFER, + + RADEON_PRIO_CP_DMA = 8, + + RADEON_PRIO_VCE = 12, + RADEON_PRIO_UVD, + RADEON_PRIO_SDMA_BUFFER, + RADEON_PRIO_SDMA_TEXTURE, + + RADEON_PRIO_USER_SHADER = 16, + RADEON_PRIO_INTERNAL_SHADER, /* fetch shader, etc. */ + + /* gap: 20 */ + + RADEON_PRIO_CONST_BUFFER = 24, + RADEON_PRIO_DESCRIPTORS, + RADEON_PRIO_BORDER_COLORS, + + RADEON_PRIO_SAMPLER_BUFFER = 28, + RADEON_PRIO_VERTEX_BUFFER, + + RADEON_PRIO_SHADER_RW_BUFFER = 32, + RADEON_PRIO_RINGS_STREAMOUT, + RADEON_PRIO_SCRATCH_BUFFER, + RADEON_PRIO_COMPUTE_GLOBAL, + + RADEON_PRIO_SAMPLER_TEXTURE = 36, + RADEON_PRIO_SHADER_RW_IMAGE, + + RADEON_PRIO_SAMPLER_TEXTURE_MSAA = 40, + + RADEON_PRIO_COLOR_BUFFER = 44, + + RADEON_PRIO_DEPTH_BUFFER = 48, + + RADEON_PRIO_COLOR_BUFFER_MSAA = 52, + + RADEON_PRIO_DEPTH_BUFFER_MSAA = 56, + + RADEON_PRIO_CMASK = 60, + RADEON_PRIO_DCC, + RADEON_PRIO_HTILE, + /* 63 is the maximum value */ }; struct winsys_handle; diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c index 8b0ce9f1bb8..691d379bccd 100644 --- a/src/gallium/drivers/radeonsi/cik_sdma.c +++ b/src/gallium/drivers/radeonsi/cik_sdma.c @@ -62,9 +62,9 @@ static void cik_sdma_do_copy_buffer(struct si_context *ctx, r600_need_dma_space(&ctx->b, ncopy * 7); radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ, - RADEON_PRIO_MIN); + RADEON_PRIO_SDMA_BUFFER); radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE, - RADEON_PRIO_MIN); + RADEON_PRIO_SDMA_BUFFER); for (i = 0; i < ncopy; i++) { csize = size < CIK_SDMA_COPY_MAX_SIZE ? size : CIK_SDMA_COPY_MAX_SIZE; @@ -172,9 +172,9 @@ static void cik_sdma_copy_tile(struct si_context *ctx, r600_need_dma_space(&ctx->b, ncopy * 12); radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rsrc->resource, - RADEON_USAGE_READ, RADEON_PRIO_MIN); + RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE); radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rdst->resource, - RADEON_USAGE_WRITE, RADEON_PRIO_MIN); + RADEON_USAGE_WRITE, RADEON_PRIO_SDMA_TEXTURE); copy_height = size * 4 / pitch; for (i = 0; i < ncopy; i++) { diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index e1849bad933..c6605346771 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -297,7 +297,7 @@ static void si_launch_grid( radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, shader->scratch_bo, RADEON_USAGE_READWRITE, - RADEON_PRIO_SHADER_RESOURCE_RW); + RADEON_PRIO_SCRATCH_BUFFER); scratch_buffer_va = shader->scratch_bo->gpu_address; } @@ -311,7 +311,7 @@ static void si_launch_grid( kernel_args_va += kernel_args_offset; radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, input_buffer, - RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); + RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER); si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, kernel_args_va); si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 4, S_008F04_BASE_ADDRESS_HI (kernel_args_va >> 32) | S_008F04_STRIDE(0)); @@ -340,7 +340,7 @@ static void si_launch_grid( } radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, buffer, RADEON_USAGE_READWRITE, - RADEON_PRIO_SHADER_RESOURCE_RW); + RADEON_PRIO_COMPUTE_GLOBAL); } /* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID @@ -362,7 +362,7 @@ static void si_launch_grid( shader_va += pc; #endif radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, shader->bo, - RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); + RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER); si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, shader_va >> 8); si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40); diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index 32ab6a9dcbf..d4bd7b28cf3 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -160,7 +160,7 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst, /* This must be done after need_cs_space. */ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)dst, RADEON_USAGE_WRITE, - RADEON_PRIO_MIN); + RADEON_PRIO_CP_DMA); /* Flush the caches for the first copy only. * Also wait for the previous CP DMA operations. */ @@ -240,9 +240,9 @@ void si_copy_buffer(struct si_context *sctx, /* This must be done after r600_need_cs_space. */ radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)src, - RADEON_USAGE_READ, RADEON_PRIO_MIN); + RADEON_USAGE_READ, RADEON_PRIO_CP_DMA); radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)dst, - RADEON_USAGE_WRITE, RADEON_PRIO_MIN); + RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA); si_emit_cp_dma_copy_buffer(sctx, dst_offset, src_offset, byte_count, sync_flags); diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index b07ab3b94ac..74ec7cccba8 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -118,7 +118,7 @@ static bool si_upload_descriptors(struct si_context *sctx, util_memcpy_cpu_to_le32(ptr, desc->list, list_size); radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, desc->buffer, - RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); + RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS); desc->list_dirty = false; desc->pointer_dirty = true; @@ -138,17 +138,6 @@ static void si_release_sampler_views(struct si_sampler_views *views) si_release_descriptors(&views->desc); } -static enum radeon_bo_priority si_get_resource_ro_priority(struct r600_resource *res) -{ - if (res->b.b.target == PIPE_BUFFER) - return RADEON_PRIO_SHADER_BUFFER_RO; - - if (res->b.b.nr_samples > 1) - return RADEON_PRIO_SHADER_TEXTURE_MSAA; - - return RADEON_PRIO_SHADER_TEXTURE_RO; -} - static void si_sampler_views_begin_new_cs(struct si_context *sctx, struct si_sampler_views *views) { @@ -165,13 +154,13 @@ static void si_sampler_views_begin_new_cs(struct si_context *sctx, radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, rview->resource, RADEON_USAGE_READ, - si_get_resource_ro_priority(rview->resource)); + r600_get_sampler_view_priority(rview->resource)); } if (!views->desc.buffer) return; radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, views->desc.buffer, - RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA); + RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS); } static void si_set_sampler_view(struct si_context *sctx, unsigned shader, @@ -190,7 +179,7 @@ static void si_set_sampler_view(struct si_context *sctx, unsigned shader, if (rview->resource) radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, rview->resource, RADEON_USAGE_READ, - si_get_resource_ro_priority(rview->resource)); + r600_get_sampler_view_priority(rview->resource)); pipe_sampler_view_reference(&views->views[slot], view); memcpy(views->desc.list + slot*8, view_desc, 8*4); @@ -270,7 +259,7 @@ static void si_sampler_states_begin_new_cs(struct si_context *sctx, if (!states->desc.buffer) return; radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, states->desc.buffer, - RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA); + RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS); } static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader, @@ -348,7 +337,7 @@ static void si_buffer_resources_begin_new_cs(struct si_context *sctx, return; radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, buffers->desc.buffer, RADEON_USAGE_READWRITE, - RADEON_PRIO_SHADER_DATA); + RADEON_PRIO_DESCRIPTORS); } /* VERTEX BUFFERS */ @@ -369,14 +358,14 @@ static void si_vertex_buffers_begin_new_cs(struct si_context *sctx) radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)sctx->vertex_buffer[vb].buffer, - RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO); + RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER); } if (!desc->buffer) return; radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, desc->buffer, RADEON_USAGE_READ, - RADEON_PRIO_SHADER_DATA); + RADEON_PRIO_DESCRIPTORS); } static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx) @@ -403,7 +392,7 @@ static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx) radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, desc->buffer, RADEON_USAGE_READ, - RADEON_PRIO_SHADER_DATA); + RADEON_PRIO_DESCRIPTORS); assert(count <= SI_NUM_VERTEX_BUFFERS); @@ -447,7 +436,7 @@ static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx) if (!bound[ve->vertex_buffer_index]) { radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)vb->buffer, - RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO); + RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER); bound[ve->vertex_buffer_index] = true; } } @@ -870,7 +859,7 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, rbuffer, RADEON_USAGE_READ, - RADEON_PRIO_SHADER_BUFFER_RO); + RADEON_PRIO_SAMPLER_BUFFER); } } } @@ -1017,10 +1006,10 @@ void si_init_all_descriptors(struct si_context *sctx) for (i = 0; i < SI_NUM_SHADERS; i++) { si_init_buffer_resources(&sctx->const_buffers[i], SI_NUM_CONST_BUFFERS, SI_SGPR_CONST, - RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO); + RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER); si_init_buffer_resources(&sctx->rw_buffers[i], SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS, - RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RESOURCE_RW); + RADEON_USAGE_READWRITE, RADEON_PRIO_RINGS_STREAMOUT); si_init_descriptors(&sctx->samplers[i].views.desc, SI_SGPR_RESOURCE, 8, SI_NUM_SAMPLER_VIEWS); diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c index 309ae04424a..3d980fb67b8 100644 --- a/src/gallium/drivers/radeonsi/si_dma.c +++ b/src/gallium/drivers/radeonsi/si_dma.c @@ -79,9 +79,9 @@ static void si_dma_copy_buffer(struct si_context *ctx, r600_need_dma_space(&ctx->b, ncopy * 5); radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ, - RADEON_PRIO_MIN); + RADEON_PRIO_SDMA_BUFFER); radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE, - RADEON_PRIO_MIN); + RADEON_PRIO_SDMA_BUFFER); for (i = 0; i < ncopy; i++) { csize = size < max_csize ? size : max_csize; @@ -178,9 +178,9 @@ static void si_dma_copy_tile(struct si_context *ctx, r600_need_dma_space(&ctx->b, ncopy * 9); radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rsrc->resource, - RADEON_USAGE_READ, RADEON_PRIO_MIN); + RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE); radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rdst->resource, - RADEON_USAGE_WRITE, RADEON_PRIO_MIN); + RADEON_USAGE_WRITE, RADEON_PRIO_SDMA_TEXTURE); for (i = 0; i < ncopy; i++) { cheight = copy_height; diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c index b1834afa796..f16933c5f98 100644 --- a/src/gallium/drivers/radeonsi/si_pm4.c +++ b/src/gallium/drivers/radeonsi/si_pm4.c @@ -140,7 +140,8 @@ void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state) struct r600_resource *ib = state->indirect_buffer; radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, ib, - RADEON_USAGE_READ, RADEON_PRIO_MIN); + RADEON_USAGE_READ, + RADEON_PRIO_IB2); radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0)); radeon_emit(cs, ib->gpu_address); diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index d74f6e896c4..5d4e579b392 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2238,7 +2238,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) { radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, tex->cmask_buffer, RADEON_USAGE_READWRITE, - RADEON_PRIO_COLOR_META); + RADEON_PRIO_CMASK); } radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, @@ -2285,7 +2285,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom if (zb->db_htile_data_base) { radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, rtex->htile_buffer, RADEON_USAGE_READWRITE, - RADEON_PRIO_DEPTH_META); + RADEON_PRIO_HTILE); } radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view); @@ -3391,7 +3391,7 @@ static void si_init_config(struct si_context *sctx) if (sctx->b.chip_class >= CIK) si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40); si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ, - RADEON_PRIO_SHADER_DATA); + RADEON_PRIO_BORDER_COLORS); si_pm4_upload_indirect_buffer(sctx, pm4); sctx->init_config = pm4; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 6d8e0e509bf..fb65eb3ce2d 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -353,7 +353,7 @@ static void si_emit_scratch_reloc(struct si_context *sctx) if (sctx->scratch_buffer) { radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, sctx->scratch_buffer, RADEON_USAGE_READWRITE, - RADEON_PRIO_SHADER_RESOURCE_RW); + RADEON_PRIO_SCRATCH_BUFFER); } sctx->emit_scratch_reloc = false; @@ -467,7 +467,7 @@ static void si_emit_draw_packets(struct si_context *sctx, radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, t->buf_filled_size, RADEON_USAGE_READ, - RADEON_PRIO_MIN); + RADEON_PRIO_SO_FILLED_SIZE); } /* draw packet */ @@ -521,7 +521,7 @@ static void si_emit_draw_packets(struct si_context *sctx, radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource *)info->indirect, - RADEON_USAGE_READ, RADEON_PRIO_MIN); + RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT); } if (info->indexed) { @@ -531,7 +531,7 @@ static void si_emit_draw_packets(struct si_context *sctx, radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource *)ib->buffer, - RADEON_USAGE_READ, RADEON_PRIO_MIN); + RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER); if (info->indirect) { uint64_t indirect_va = r600_resource(info->indirect)->gpu_address; @@ -883,7 +883,7 @@ void si_trace_emit(struct si_context *sctx) sctx->trace_id++; radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, sctx->trace_buf, - RADEON_USAGE_READWRITE, RADEON_PRIO_MIN); + RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE); radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) | S_370_WR_CONFIRM(1) | diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index b5e14ead160..31c0ab95464 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -103,7 +103,7 @@ static void si_shader_ls(struct si_shader *shader) return; va = shader->bo->gpu_address; - si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); + si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER); /* We need at least 2 components for LS. * VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */ @@ -138,7 +138,7 @@ static void si_shader_hs(struct si_shader *shader) return; va = shader->bo->gpu_address; - si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); + si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER); num_user_sgprs = SI_TCS_NUM_USER_SGPR; num_sgprs = shader->num_sgprs; @@ -173,7 +173,7 @@ static void si_shader_es(struct si_shader *shader) return; va = shader->bo->gpu_address; - si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); + si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER); if (shader->selector->type == PIPE_SHADER_VERTEX) { vgpr_comp_cnt = shader->uses_instanceid ? 3 : 0; @@ -279,7 +279,7 @@ static void si_shader_gs(struct si_shader *shader) S_028B90_ENABLE(gs_num_invocations > 0)); va = shader->bo->gpu_address; - si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); + si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER); si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8); si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, va >> 40); @@ -327,7 +327,7 @@ static void si_shader_vs(struct si_shader *shader) si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, 0); va = shader->bo->gpu_address; - si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); + si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER); if (shader->is_gs_copy_shader) { vgpr_comp_cnt = 0; /* only VertexID is needed for GS-COPY. */ @@ -458,7 +458,7 @@ static void si_shader_ps(struct si_shader *shader) si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, shader->cb_shader_mask); va = shader->bo->gpu_address; - si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); + si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER); si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8); si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, va >> 40); diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 84fc40b923d..9a89ba85a29 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -409,7 +409,7 @@ static unsigned amdgpu_add_reloc(struct amdgpu_cs *cs, unsigned hash = bo->unique_id & (Elements(cs->buffer_indices_hashlist)-1); int i = -1; - priority = MIN2(priority, 15); + assert(priority < 64); *added_domains = 0; i = amdgpu_get_reloc(cs, bo); @@ -419,7 +419,7 @@ static unsigned amdgpu_add_reloc(struct amdgpu_cs *cs, reloc->usage |= usage; *added_domains = domains & ~reloc->domains; reloc->domains |= domains; - cs->flags[i] = MAX2(cs->flags[i], priority); + cs->flags[i] = MAX2(cs->flags[i], priority / 4); return i; } @@ -441,7 +441,7 @@ static unsigned amdgpu_add_reloc(struct amdgpu_cs *cs, cs->buffers[cs->num_buffers].bo = NULL; amdgpu_winsys_bo_reference(&cs->buffers[cs->num_buffers].bo, bo); cs->handles[cs->num_buffers] = bo->bo; - cs->flags[cs->num_buffers] = priority; + cs->flags[cs->num_buffers] = priority / 4; p_atomic_inc(&bo->num_cs_references); reloc = &cs->buffers[cs->num_buffers]; reloc->bo = bo; @@ -622,7 +622,7 @@ static void amdgpu_cs_flush(struct radeon_winsys_cs *rcs, } amdgpu_cs_add_reloc(rcs, (void*)cs->big_ib_winsys_buffer, - RADEON_USAGE_READ, 0, RADEON_PRIO_MIN); + RADEON_USAGE_READ, 0, RADEON_PRIO_IB1); /* If the CS is not empty or overflowed.... */ if (cs->base.cdw && cs->base.cdw <= cs->base.max_dw && !debug_get_option_noop()) { diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 2c4f990944c..b277efecf61 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -263,14 +263,14 @@ static unsigned radeon_add_reloc(struct radeon_drm_cs *cs, enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0; int i = -1; - priority = MIN2(priority, 15); + assert(priority < 64); *added_domains = 0; i = radeon_get_reloc(csc, bo); if (i >= 0) { reloc = &csc->relocs[i]; - update_reloc(reloc, rd, wd, priority, added_domains); + update_reloc(reloc, rd, wd, priority / 4, added_domains); /* For async DMA, every add_reloc call must add a buffer to the list * no matter how many duplicates there are. This is due to the fact @@ -309,7 +309,7 @@ static unsigned radeon_add_reloc(struct radeon_drm_cs *cs, reloc->handle = bo->handle; reloc->read_domains = rd; reloc->write_domain = wd; - reloc->flags = priority; + reloc->flags = priority / 4; csc->reloc_indices_hashlist[hash] = csc->crelocs; @@ -328,7 +328,8 @@ static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs, struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct radeon_bo *bo = (struct radeon_bo*)buf; enum radeon_bo_domain added_domains; - unsigned index = radeon_add_reloc(cs, bo, usage, domains, priority, &added_domains); + unsigned index = radeon_add_reloc(cs, bo, usage, domains, priority, + &added_domains); if (added_domains & RADEON_DOMAIN_GTT) cs->csc->used_gart += bo->base.size; @@ -633,7 +634,7 @@ radeon_cs_create_fence(struct radeon_winsys_cs *rcs) /* Add the fence as a dummy relocation. */ cs->ws->base.cs_add_reloc(rcs, cs->ws->base.buffer_get_cs_handle(fence), RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT, - RADEON_PRIO_MIN); + RADEON_PRIO_FENCE); return (struct pipe_fence_handle*)fence; } -- 2.30.2