From 11b76369f53e064bef1bad629f957373c0e93b6c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 29 Dec 2014 13:22:00 +0100 Subject: [PATCH] radeonsi: use TC L2 for updating descriptors on CIK MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This allows not flushing TC L2 on CIK later. Reviewed-by: Michel Dänzer --- src/gallium/drivers/radeonsi/si_descriptors.c | 13 ++++++++----- src/gallium/drivers/radeonsi/sid.h | 2 ++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index f0e353a1db9..e2da476ab0f 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -54,6 +54,7 @@ static uint32_t null_desc[8]; /* zeros */ * packet. It's for preventing a read-after-write (RAW) hazard between two * CP DMA packets. */ #define SI_CP_DMA_RAW_WAIT (1 << 1) /* SI+ */ +#define CIK_CP_DMA_USE_L2 (1 << 2) /* Emit a CP DMA packet to do a copy from one buffer to another. * The size must fit in bits [20:0]. @@ -65,13 +66,15 @@ static void si_emit_cp_dma_copy_buffer(struct si_context *sctx, struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? PKT3_CP_DMA_CP_SYNC : 0; uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? PKT3_CP_DMA_CMD_RAW_WAIT : 0; + uint32_t sel = flags & CIK_CP_DMA_USE_L2 ? + PKT3_CP_DMA_SRC_SEL(3) | PKT3_CP_DMA_DST_SEL(3) : 0; assert(size); assert((size & ((1<<21)-1)) == size); if (sctx->b.chip_class >= CIK) { radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0)); - radeon_emit(cs, sync_flag); /* CP_SYNC [31] */ + radeon_emit(cs, sync_flag | sel); /* CP_SYNC [31] */ radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */ radeon_emit(cs, src_va >> 32); /* SRC_ADDR_HI [31:0] */ radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */ @@ -95,13 +98,14 @@ static void si_emit_cp_dma_clear_buffer(struct si_context *sctx, struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? PKT3_CP_DMA_CP_SYNC : 0; uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? PKT3_CP_DMA_CMD_RAW_WAIT : 0; + uint32_t dst_sel = flags & CIK_CP_DMA_USE_L2 ? PKT3_CP_DMA_DST_SEL(3) : 0; assert(size); assert((size & ((1<<21)-1)) == size); if (sctx->b.chip_class >= CIK) { radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0)); - radeon_emit(cs, sync_flag | PKT3_CP_DMA_SRC_SEL(2)); /* CP_SYNC [31] | SRC_SEL[30:29] */ + radeon_emit(cs, sync_flag | dst_sel | PKT3_CP_DMA_SRC_SEL(2)); /* CP_SYNC [31] | SRC_SEL[30:29] */ radeon_emit(cs, clear_value); /* DATA [31:0] */ radeon_emit(cs, 0); radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */ @@ -145,7 +149,7 @@ static void si_init_descriptors(struct si_context *sctx, * only once at context initialization. */ si_emit_cp_dma_clear_buffer(sctx, desc->buffer->gpu_address, desc->buffer->b.b.width0, 0, - R600_CP_DMA_SYNC); + R600_CP_DMA_SYNC | CIK_CP_DMA_USE_L2); } static void si_release_descriptors(struct si_descriptors *desc) @@ -227,11 +231,10 @@ static void si_emit_descriptors(struct si_context *sctx, va_base = desc->buffer->gpu_address; /* Copy the descriptors to a new context slot. */ - /* XXX Consider using TC or L2 for this copy on CIK. */ si_emit_cp_dma_copy_buffer(sctx, va_base + new_context_id * desc->context_size, va_base + desc->current_context_id * desc->context_size, - desc->context_size, R600_CP_DMA_SYNC); + desc->context_size, R600_CP_DMA_SYNC | CIK_CP_DMA_USE_L2); va_base += new_context_id * desc->context_size; diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h index e36244dc7d4..afe011b15c7 100644 --- a/src/gallium/drivers/radeonsi/sid.h +++ b/src/gallium/drivers/radeonsi/sid.h @@ -164,10 +164,12 @@ /* 0 - SRC_ADDR * 1 - GDS (program SAS to 1 as well) * 2 - DATA + * 3 - SRC_ADDR using TC L2 (DMA_DATA only) */ #define PKT3_CP_DMA_DST_SEL(x) ((x) << 20) /* 0 - DST_ADDR * 1 - GDS (program DAS to 1 as well) + * 3 - DST_ADDR using TC L2 (DMA_DATA only) */ /* COMMAND */ #define PKT3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23) -- 2.30.2