From: Marek Olšák Date: Tue, 12 Feb 2019 20:03:13 +0000 (-0500) Subject: radeonsi: add si_cp_copy_data X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=c59d238bb04a1cbbc01b31fabdb7d93306788110;p=mesa.git radeonsi: add si_cp_copy_data Tested-by: Dieter Nützel Acked-by: Nicolai Hähnle --- diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 729dc4e325f..f1afef2e66f 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -724,22 +724,11 @@ static void si_setup_tgsi_user_data(struct si_context *sctx, if (info->indirect) { if (program->uses_grid_size) { - uint64_t base_va = si_resource(info->indirect)->gpu_address; - uint64_t va = base_va + info->indirect_offset; - int i; - - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, - si_resource(info->indirect), - RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT); - - for (i = 0; i < 3; ++i) { - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | - COPY_DATA_DST_SEL(COPY_DATA_REG)); - radeon_emit(cs, (va + 4 * i)); - radeon_emit(cs, (va + 4 * i) >> 32); - radeon_emit(cs, (grid_size_reg >> 2) + i); - radeon_emit(cs, 0); + for (unsigned i = 0; i < 3; ++i) { + si_cp_copy_data(sctx, + COPY_DATA_REG, NULL, (grid_size_reg >> 2) + i, + COPY_DATA_SRC_MEM, si_resource(info->indirect), + info->indirect_offset + 4 * i); } } } else { diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index f349325202c..404117d1813 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -606,3 +606,31 @@ void si_cp_write_data(struct si_context *sctx, struct si_resource *buf, radeon_emit(cs, va >> 32); radeon_emit_array(cs, (const uint32_t*)data, size/4); } + +void si_cp_copy_data(struct si_context *sctx, + unsigned dst_sel, struct si_resource *dst, unsigned dst_offset, + unsigned src_sel, struct si_resource *src, unsigned src_offset) +{ + struct radeon_cmdbuf *cs = sctx->gfx_cs; + + if (dst) { + radeon_add_to_buffer_list(sctx, cs, dst, + RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA); + } + if (src) { + radeon_add_to_buffer_list(sctx, cs, src, + RADEON_USAGE_READ, RADEON_PRIO_CP_DMA); + } + + uint64_t dst_va = (dst ? dst->gpu_address : 0ull) + dst_offset; + uint64_t src_va = (src ? src->gpu_address : 0ull) + src_offset; + + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, COPY_DATA_SRC_SEL(src_sel) | + COPY_DATA_DST_SEL(dst_sel) | + COPY_DATA_WR_CONFIRM); + radeon_emit(cs, src_va); + radeon_emit(cs, src_va >> 32); + radeon_emit(cs, dst_va); + radeon_emit(cs, dst_va >> 32); +} diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index 4ce71f9500d..c15c444cc40 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -671,16 +671,9 @@ static void si_pc_emit_start(struct si_context *sctx, { struct radeon_cmdbuf *cs = sctx->gfx_cs; - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, buffer, - RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); - - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | - COPY_DATA_DST_SEL(COPY_DATA_DST_MEM)); - radeon_emit(cs, 1); /* immediate */ - radeon_emit(cs, 0); /* unused */ - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); + si_cp_copy_data(sctx, + COPY_DATA_DST_MEM, buffer, va - buffer->gpu_address, + COPY_DATA_IMM, NULL, 1); radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL, S_036020_PERFMON_STATE(V_036020_DISABLE_AND_RESET)); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index aaa95f32d20..e45ef62ecfe 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1239,6 +1239,9 @@ void si_test_gds(struct si_context *sctx); void si_cp_write_data(struct si_context *sctx, struct si_resource *buf, unsigned offset, unsigned size, unsigned dst_sel, unsigned engine, const void *data); +void si_cp_copy_data(struct si_context *sctx, + unsigned dst_sel, struct si_resource *dst, unsigned dst_offset, + unsigned src_sel, struct si_resource *src, unsigned src_offset); /* si_debug.c */ void si_save_cs(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 2a514f144b9..1cadc416ca3 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -678,24 +678,14 @@ static void si_emit_draw_packets(struct si_context *sctx, if (info->count_from_stream_output) { struct si_streamout_target *t = (struct si_streamout_target*)info->count_from_stream_output; - uint64_t va = t->buf_filled_size->gpu_address + - t->buf_filled_size_offset; radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, t->stride_in_dw); - - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | - COPY_DATA_DST_SEL(COPY_DATA_REG) | - COPY_DATA_WR_CONFIRM); - radeon_emit(cs, va); /* src address lo */ - radeon_emit(cs, va >> 32); /* src address hi */ - radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2); - radeon_emit(cs, 0); /* unused */ - - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, - t->buf_filled_size, RADEON_USAGE_READ, - RADEON_PRIO_SO_FILLED_SIZE); + si_cp_copy_data(sctx, + COPY_DATA_REG, NULL, + R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2, + COPY_DATA_SRC_MEM, t->buf_filled_size, + t->buf_filled_size_offset); } /* draw packet */