X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fr600%2Fevergreen_hw_context.c;h=5e0e27b0f16f9dfde4b28c259382265219dab413;hb=709905cbb683d5b0b9fdbb82c29165aba0149706;hp=0c5dcba071809e0dfd71ff786c2c2dbb038f3b90;hpb=e3ecfecadaa4210168f7c850b89992307016bc64;p=mesa.git diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c index 0c5dcba0718..5e0e27b0f16 100644 --- a/src/gallium/drivers/r600/evergreen_hw_context.c +++ b/src/gallium/drivers/r600/evergreen_hw_context.c @@ -23,303 +23,128 @@ * Authors: * Jerome Glisse */ -#include "r600_hw_context_priv.h" +#include "r600_pipe.h" #include "evergreend.h" #include "util/u_memory.h" - -static const struct r600_reg cayman_config_reg_list[] = { - {R_009100_SPI_CONFIG_CNTL, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0}, - {R_00913C_SPI_CONFIG_CNTL_1, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0}, -}; - -static const struct r600_reg evergreen_context_reg_list[] = { - {R_028010_DB_RENDER_OVERRIDE2, 0, 0}, - {GROUP_FORCE_NEW_BLOCK, 0, 0}, - {R_028014_DB_HTILE_DATA_BASE, REG_FLAG_NEED_BO, 0}, - {GROUP_FORCE_NEW_BLOCK, 0, 0}, - {R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0, 0}, - {R_028250_PA_SC_VPORT_SCISSOR_0_TL, 0, 0}, - {R_028254_PA_SC_VPORT_SCISSOR_0_BR, 0, 0}, - {R_028350_SX_MISC, 0, 0}, - {GROUP_FORCE_NEW_BLOCK, 0, 0}, - {R_02861C_SPI_VS_OUT_ID_0, 0, 0}, - {R_028620_SPI_VS_OUT_ID_1, 0, 0}, - {R_028624_SPI_VS_OUT_ID_2, 0, 0}, - {R_028628_SPI_VS_OUT_ID_3, 0, 0}, - {R_02862C_SPI_VS_OUT_ID_4, 0, 0}, - {R_028630_SPI_VS_OUT_ID_5, 0, 0}, - {R_028634_SPI_VS_OUT_ID_6, 0, 0}, - {R_028638_SPI_VS_OUT_ID_7, 0, 0}, - {R_02863C_SPI_VS_OUT_ID_8, 0, 0}, - {R_028640_SPI_VS_OUT_ID_9, 0, 0}, - {GROUP_FORCE_NEW_BLOCK, 0, 0}, - {R_028644_SPI_PS_INPUT_CNTL_0, 0, 0}, - {R_028648_SPI_PS_INPUT_CNTL_1, 0, 0}, - {R_02864C_SPI_PS_INPUT_CNTL_2, 0, 0}, - {R_028650_SPI_PS_INPUT_CNTL_3, 0, 0}, - {R_028654_SPI_PS_INPUT_CNTL_4, 0, 0}, - {R_028658_SPI_PS_INPUT_CNTL_5, 0, 0}, - {R_02865C_SPI_PS_INPUT_CNTL_6, 0, 0}, - {R_028660_SPI_PS_INPUT_CNTL_7, 0, 0}, - {R_028664_SPI_PS_INPUT_CNTL_8, 0, 0}, - {R_028668_SPI_PS_INPUT_CNTL_9, 0, 0}, - {R_02866C_SPI_PS_INPUT_CNTL_10, 0, 0}, - {R_028670_SPI_PS_INPUT_CNTL_11, 0, 0}, - {R_028674_SPI_PS_INPUT_CNTL_12, 0, 0}, - {R_028678_SPI_PS_INPUT_CNTL_13, 0, 0}, - {R_02867C_SPI_PS_INPUT_CNTL_14, 0, 0}, - {R_028680_SPI_PS_INPUT_CNTL_15, 0, 0}, - {R_028684_SPI_PS_INPUT_CNTL_16, 0, 0}, - {R_028688_SPI_PS_INPUT_CNTL_17, 0, 0}, - {R_02868C_SPI_PS_INPUT_CNTL_18, 0, 0}, - {R_028690_SPI_PS_INPUT_CNTL_19, 0, 0}, - {R_028694_SPI_PS_INPUT_CNTL_20, 0, 0}, - {R_028698_SPI_PS_INPUT_CNTL_21, 0, 0}, - {R_02869C_SPI_PS_INPUT_CNTL_22, 0, 0}, - {R_0286A0_SPI_PS_INPUT_CNTL_23, 0, 0}, - {R_0286A4_SPI_PS_INPUT_CNTL_24, 0, 0}, - {R_0286A8_SPI_PS_INPUT_CNTL_25, 0, 0}, - {R_0286AC_SPI_PS_INPUT_CNTL_26, 0, 0}, - {R_0286B0_SPI_PS_INPUT_CNTL_27, 0, 0}, - {R_0286B4_SPI_PS_INPUT_CNTL_28, 0, 0}, - {R_0286B8_SPI_PS_INPUT_CNTL_29, 0, 0}, - {R_0286BC_SPI_PS_INPUT_CNTL_30, 0, 0}, - {R_0286C0_SPI_PS_INPUT_CNTL_31, 0, 0}, - {GROUP_FORCE_NEW_BLOCK, 0, 0}, - {R_0286C4_SPI_VS_OUT_CONFIG, 0, 0}, - {R_0286C8_SPI_THREAD_GROUPING, 0, 0}, - {R_0286CC_SPI_PS_IN_CONTROL_0, 0, 0}, - {R_0286D0_SPI_PS_IN_CONTROL_1, 0, 0}, - {R_0286D4_SPI_INTERP_CONTROL_0, 0, 0}, - {R_0286D8_SPI_INPUT_Z, 0, 0}, - {R_0286E0_SPI_BARYC_CNTL, 0, 0}, - {R_0286E4_SPI_PS_IN_CONTROL_2, 0, 0}, - {R_0286E8_SPI_COMPUTE_INPUT_CNTL, 0, 0}, - {R_028780_CB_BLEND0_CONTROL, 0, 0}, - {R_028784_CB_BLEND1_CONTROL, 0, 0}, - {R_028788_CB_BLEND2_CONTROL, 0, 0}, - {R_02878C_CB_BLEND3_CONTROL, 0, 0}, - {R_028790_CB_BLEND4_CONTROL, 0, 0}, - {R_028794_CB_BLEND5_CONTROL, 0, 0}, - {R_028798_CB_BLEND6_CONTROL, 0, 0}, - {R_02879C_CB_BLEND7_CONTROL, 0, 0}, - {R_028800_DB_DEPTH_CONTROL, 0, 0}, - {R_02880C_DB_SHADER_CONTROL, 0, 0}, - {R_028808_CB_COLOR_CONTROL, 0, 0}, - {R_028814_PA_SU_SC_MODE_CNTL, 0, 0}, - {R_028840_SQ_PGM_START_PS, REG_FLAG_NEED_BO, 0}, - {R_028844_SQ_PGM_RESOURCES_PS, 0, 0}, - {R_02884C_SQ_PGM_EXPORTS_PS, 0, 0}, - {R_02885C_SQ_PGM_START_VS, REG_FLAG_NEED_BO, 0}, - {R_028860_SQ_PGM_RESOURCES_VS, 0, 0}, - {R_0288A4_SQ_PGM_START_FS, REG_FLAG_NEED_BO, 0}, - {R_0288EC_SQ_LDS_ALLOC_PS, 0, 0}, - {R_028A00_PA_SU_POINT_SIZE, 0, 0}, - {R_028A04_PA_SU_POINT_MINMAX, 0, 0}, - {R_028A08_PA_SU_LINE_CNTL, 0, 0}, - {R_028A48_PA_SC_MODE_CNTL_0, 0, 0}, - {R_028ABC_DB_HTILE_SURFACE, 0, 0}, - {R_028B54_VGT_SHADER_STAGES_EN, 0, 0}, - {R_028B70_DB_ALPHA_TO_MASK, 0, 0}, - {R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 0, 0}, - {R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0, 0}, - {R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, 0, 0}, - {R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, 0, 0}, - {R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, 0, 0}, - {R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, 0, 0}, - {R_028C08_PA_SU_VTX_CNTL, 0, 0}, -}; - -static const struct r600_reg cayman_context_reg_list[] = { - {R_028010_DB_RENDER_OVERRIDE2, 0, 0}, - {GROUP_FORCE_NEW_BLOCK, 0, 0}, - {R_028014_DB_HTILE_DATA_BASE, REG_FLAG_NEED_BO, 0}, - {GROUP_FORCE_NEW_BLOCK, 0, 0}, - {R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0, 0}, - {R_028250_PA_SC_VPORT_SCISSOR_0_TL, 0, 0}, - {R_028254_PA_SC_VPORT_SCISSOR_0_BR, 0, 0}, - {R_028350_SX_MISC, 0, 0}, - {GROUP_FORCE_NEW_BLOCK, 0, 0}, - {R_02861C_SPI_VS_OUT_ID_0, 0, 0}, - {R_028620_SPI_VS_OUT_ID_1, 0, 0}, - {R_028624_SPI_VS_OUT_ID_2, 0, 0}, - {R_028628_SPI_VS_OUT_ID_3, 0, 0}, - {R_02862C_SPI_VS_OUT_ID_4, 0, 0}, - {R_028630_SPI_VS_OUT_ID_5, 0, 0}, - {R_028634_SPI_VS_OUT_ID_6, 0, 0}, - {R_028638_SPI_VS_OUT_ID_7, 0, 0}, - {R_02863C_SPI_VS_OUT_ID_8, 0, 0}, - {R_028640_SPI_VS_OUT_ID_9, 0, 0}, - {R_028644_SPI_PS_INPUT_CNTL_0, 0, 0}, - {R_028648_SPI_PS_INPUT_CNTL_1, 0, 0}, - {R_02864C_SPI_PS_INPUT_CNTL_2, 0, 0}, - {R_028650_SPI_PS_INPUT_CNTL_3, 0, 0}, - {R_028654_SPI_PS_INPUT_CNTL_4, 0, 0}, - {R_028658_SPI_PS_INPUT_CNTL_5, 0, 0}, - {R_02865C_SPI_PS_INPUT_CNTL_6, 0, 0}, - {R_028660_SPI_PS_INPUT_CNTL_7, 0, 0}, - {R_028664_SPI_PS_INPUT_CNTL_8, 0, 0}, - {R_028668_SPI_PS_INPUT_CNTL_9, 0, 0}, - {R_02866C_SPI_PS_INPUT_CNTL_10, 0, 0}, - {R_028670_SPI_PS_INPUT_CNTL_11, 0, 0}, - {R_028674_SPI_PS_INPUT_CNTL_12, 0, 0}, - {R_028678_SPI_PS_INPUT_CNTL_13, 0, 0}, - {R_02867C_SPI_PS_INPUT_CNTL_14, 0, 0}, - {R_028680_SPI_PS_INPUT_CNTL_15, 0, 0}, - {R_028684_SPI_PS_INPUT_CNTL_16, 0, 0}, - {R_028688_SPI_PS_INPUT_CNTL_17, 0, 0}, - {R_02868C_SPI_PS_INPUT_CNTL_18, 0, 0}, - {R_028690_SPI_PS_INPUT_CNTL_19, 0, 0}, - {R_028694_SPI_PS_INPUT_CNTL_20, 0, 0}, - {R_028698_SPI_PS_INPUT_CNTL_21, 0, 0}, - {R_02869C_SPI_PS_INPUT_CNTL_22, 0, 0}, - {R_0286A0_SPI_PS_INPUT_CNTL_23, 0, 0}, - {R_0286A4_SPI_PS_INPUT_CNTL_24, 0, 0}, - {R_0286A8_SPI_PS_INPUT_CNTL_25, 0, 0}, - {R_0286AC_SPI_PS_INPUT_CNTL_26, 0, 0}, - {R_0286B0_SPI_PS_INPUT_CNTL_27, 0, 0}, - {R_0286B4_SPI_PS_INPUT_CNTL_28, 0, 0}, - {R_0286B8_SPI_PS_INPUT_CNTL_29, 0, 0}, - {R_0286BC_SPI_PS_INPUT_CNTL_30, 0, 0}, - {R_0286C0_SPI_PS_INPUT_CNTL_31, 0, 0}, - {R_0286C4_SPI_VS_OUT_CONFIG, 0, 0}, - {R_0286C8_SPI_THREAD_GROUPING, 0, 0}, - {R_0286CC_SPI_PS_IN_CONTROL_0, 0, 0}, - {R_0286D0_SPI_PS_IN_CONTROL_1, 0, 0}, - {R_0286D4_SPI_INTERP_CONTROL_0, 0, 0}, - {R_0286D8_SPI_INPUT_Z, 0, 0}, - {R_0286E0_SPI_BARYC_CNTL, 0, 0}, - {R_0286E4_SPI_PS_IN_CONTROL_2, 0, 0}, - {R_0286E8_SPI_COMPUTE_INPUT_CNTL, 0, 0}, - {R_028780_CB_BLEND0_CONTROL, 0, 0}, - {R_028784_CB_BLEND1_CONTROL, 0, 0}, - {R_028788_CB_BLEND2_CONTROL, 0, 0}, - {R_02878C_CB_BLEND3_CONTROL, 0, 0}, - {R_028790_CB_BLEND4_CONTROL, 0, 0}, - {R_028794_CB_BLEND5_CONTROL, 0, 0}, - {R_028798_CB_BLEND6_CONTROL, 0, 0}, - {R_02879C_CB_BLEND7_CONTROL, 0, 0}, - {R_028800_DB_DEPTH_CONTROL, 0, 0}, - {R_028808_CB_COLOR_CONTROL, 0, 0}, - {R_02880C_DB_SHADER_CONTROL, 0, 0}, - {R_028814_PA_SU_SC_MODE_CNTL, 0, 0}, - {R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1, 0, 0}, - {R_028840_SQ_PGM_START_PS, REG_FLAG_NEED_BO, 0}, - {R_028844_SQ_PGM_RESOURCES_PS, 0, 0}, - {R_02884C_SQ_PGM_EXPORTS_PS, 0, 0}, - {R_02885C_SQ_PGM_START_VS, REG_FLAG_NEED_BO, 0}, - {R_028860_SQ_PGM_RESOURCES_VS, 0, 0}, - {R_0288A4_SQ_PGM_START_FS, REG_FLAG_NEED_BO, 0}, - {R_028900_SQ_ESGS_RING_ITEMSIZE, 0, 0}, - {R_028904_SQ_GSVS_RING_ITEMSIZE, 0, 0}, - {R_028908_SQ_ESTMP_RING_ITEMSIZE, 0, 0}, - {R_02890C_SQ_GSTMP_RING_ITEMSIZE, 0, 0}, - {R_028910_SQ_VSTMP_RING_ITEMSIZE, 0, 0}, - {R_028914_SQ_PSTMP_RING_ITEMSIZE, 0, 0}, - {R_02891C_SQ_GS_VERT_ITEMSIZE, 0, 0}, - {R_028920_SQ_GS_VERT_ITEMSIZE_1, 0, 0}, - {R_028924_SQ_GS_VERT_ITEMSIZE_2, 0, 0}, - {R_028928_SQ_GS_VERT_ITEMSIZE_3, 0, 0}, - {R_028A00_PA_SU_POINT_SIZE, 0, 0}, - {R_028A04_PA_SU_POINT_MINMAX, 0, 0}, - {R_028A08_PA_SU_LINE_CNTL, 0, 0}, - {R_028A48_PA_SC_MODE_CNTL_0, 0, 0}, - {R_028ABC_DB_HTILE_SURFACE, 0, 0}, - {R_028B54_VGT_SHADER_STAGES_EN, 0, 0}, - {R_028B70_DB_ALPHA_TO_MASK, 0, 0}, - {R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 0, 0}, - {R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0, 0}, - {R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, 0, 0}, - {R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, 0, 0}, - {R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, 0, 0}, - {R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, 0, 0}, - {CM_R_028BE4_PA_SU_VTX_CNTL, 0, 0}, -}; - -static int evergreen_loop_const_init(struct r600_context *ctx, uint32_t offset) +#include "util/u_math.h" + +void evergreen_dma_copy_buffer(struct r600_context *rctx, + struct pipe_resource *dst, + struct pipe_resource *src, + uint64_t dst_offset, + uint64_t src_offset, + uint64_t size) { - unsigned nreg = 32; - struct r600_reg r600_loop_consts[32]; - int i; - - for (i = 0; i < nreg; i++) { - r600_loop_consts[i].offset = EVERGREEN_LOOP_CONST_OFFSET + ((offset + i) * 4); - r600_loop_consts[i].flags = REG_FLAG_DIRTY_ALWAYS; - r600_loop_consts[i].sbu_flags = 0; + struct radeon_cmdbuf *cs = rctx->b.dma.cs; + unsigned i, ncopy, csize, sub_cmd, shift; + struct r600_resource *rdst = (struct r600_resource*)dst; + struct r600_resource *rsrc = (struct r600_resource*)src; + + /* Mark the buffer range of destination as valid (initialized), + * so that transfer_map knows it should wait for the GPU when mapping + * that range. */ + util_range_add(&rdst->valid_buffer_range, dst_offset, + dst_offset + size); + + dst_offset += rdst->gpu_address; + src_offset += rsrc->gpu_address; + + /* see if we use dword or byte copy */ + if (!(dst_offset % 4) && !(src_offset % 4) && !(size % 4)) { + size >>= 2; + sub_cmd = EG_DMA_COPY_DWORD_ALIGNED; + shift = 2; + } else { + sub_cmd = EG_DMA_COPY_BYTE_ALIGNED; + shift = 0; + } + ncopy = (size / EG_DMA_COPY_MAX_SIZE) + !!(size % EG_DMA_COPY_MAX_SIZE); + + r600_need_dma_space(&rctx->b, ncopy * 5, rdst, rsrc); + for (i = 0; i < ncopy; i++) { + csize = size < EG_DMA_COPY_MAX_SIZE ? size : EG_DMA_COPY_MAX_SIZE; + /* emit reloc before writing cs so that cs is always in consistent state */ + radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, rsrc, RADEON_USAGE_READ, 0); + radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, rdst, RADEON_USAGE_WRITE, 0); + radeon_emit(cs, DMA_PACKET(DMA_PACKET_COPY, sub_cmd, csize)); + radeon_emit(cs, dst_offset & 0xffffffff); + radeon_emit(cs, src_offset & 0xffffffff); + radeon_emit(cs, (dst_offset >> 32UL) & 0xff); + radeon_emit(cs, (src_offset >> 32UL) & 0xff); + dst_offset += csize << shift; + src_offset += csize << shift; + size -= csize; } - return r600_context_add_block(ctx, r600_loop_consts, nreg, PKT3_SET_LOOP_CONST, EVERGREEN_LOOP_CONST_OFFSET); -} - -int evergreen_context_init(struct r600_context *ctx) -{ - int r = 0; - - /* add blocks */ - if (ctx->family >= CHIP_CAYMAN) - r = r600_context_add_block(ctx, cayman_config_reg_list, - Elements(cayman_config_reg_list), PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET); - if (r) - goto out_err; - if (ctx->family >= CHIP_CAYMAN) - r = r600_context_add_block(ctx, cayman_context_reg_list, - Elements(cayman_context_reg_list), PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET); - else - r = r600_context_add_block(ctx, evergreen_context_reg_list, - Elements(evergreen_context_reg_list), PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET); - if (r) - goto out_err; - - /* PS loop const */ - evergreen_loop_const_init(ctx, 0); - /* VS loop const */ - evergreen_loop_const_init(ctx, 32); - - r = r600_setup_block_table(ctx); - if (r) - goto out_err; - - ctx->max_db = 8; - return 0; -out_err: - r600_context_fini(ctx); - return r; } -void evergreen_flush_vgt_streamout(struct r600_context *ctx) -{ - struct radeon_winsys_cs *cs = ctx->cs; - - cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONFIG_REG, 1, 0); - cs->buf[cs->cdw++] = (R_0084FC_CP_STRMOUT_CNTL - EVERGREEN_CONFIG_REG_OFFSET) >> 2; - cs->buf[cs->cdw++] = 0; - - cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); - cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0); +/* The max number of bytes to copy per packet. */ +#define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - 8) - cs->buf[cs->cdw++] = PKT3(PKT3_WAIT_REG_MEM, 5, 0); - cs->buf[cs->cdw++] = WAIT_REG_MEM_EQUAL; /* wait until the register is equal to the reference value */ - cs->buf[cs->cdw++] = R_0084FC_CP_STRMOUT_CNTL >> 2; /* register */ - cs->buf[cs->cdw++] = 0; - cs->buf[cs->cdw++] = S_0084FC_OFFSET_UPDATE_DONE(1); /* reference value */ - cs->buf[cs->cdw++] = S_0084FC_OFFSET_UPDATE_DONE(1); /* mask */ - cs->buf[cs->cdw++] = 4; /* poll interval */ -} - -void evergreen_set_streamout_enable(struct r600_context *ctx, unsigned buffer_enable_bit) +void evergreen_cp_dma_clear_buffer(struct r600_context *rctx, + struct pipe_resource *dst, uint64_t offset, + unsigned size, uint32_t clear_value, + enum r600_coherency coher) { - struct radeon_winsys_cs *cs = ctx->cs; - - if (buffer_enable_bit) { - cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0); - cs->buf[cs->cdw++] = (R_028B94_VGT_STRMOUT_CONFIG - EVERGREEN_CONTEXT_REG_OFFSET) >> 2; - cs->buf[cs->cdw++] = S_028B94_STREAMOUT_0_EN(1); - - cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0); - cs->buf[cs->cdw++] = (R_028B98_VGT_STRMOUT_BUFFER_CONFIG - EVERGREEN_CONTEXT_REG_OFFSET) >> 2; - cs->buf[cs->cdw++] = S_028B98_STREAM_0_BUFFER_EN(buffer_enable_bit); - } else { - cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0); - cs->buf[cs->cdw++] = (R_028B94_VGT_STRMOUT_CONFIG - EVERGREEN_CONTEXT_REG_OFFSET) >> 2; - cs->buf[cs->cdw++] = S_028B94_STREAMOUT_0_EN(0); + struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + + assert(size); + assert(rctx->screen->b.has_cp_dma); + + /* Mark the buffer range of destination as valid (initialized), + * so that transfer_map knows it should wait for the GPU when mapping + * that range. */ + util_range_add(&r600_resource(dst)->valid_buffer_range, offset, + offset + size); + + offset += r600_resource(dst)->gpu_address; + + /* Flush the cache where the resource is bound. */ + rctx->b.flags |= r600_get_flush_flags(coher) | + R600_CONTEXT_WAIT_3D_IDLE; + + while (size) { + unsigned sync = 0; + unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT); + unsigned reloc; + + r600_need_cs_space(rctx, + 10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0) + + R600_MAX_PFP_SYNC_ME_DWORDS, FALSE, 0); + + /* Flush the caches for the first copy only. */ + if (rctx->b.flags) { + r600_flush_emit(rctx); + } + + /* Do the synchronization after the last copy, so that all data is written to memory. */ + if (size == byte_count) { + sync = PKT3_CP_DMA_CP_SYNC; + } + + /* This must be done after r600_need_cs_space. */ + reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, + (struct r600_resource*)dst, RADEON_USAGE_WRITE, + RADEON_PRIO_CP_DMA); + + radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0)); + radeon_emit(cs, clear_value); /* DATA [31:0] */ + radeon_emit(cs, sync | PKT3_CP_DMA_SRC_SEL(2)); /* CP_SYNC [31] | SRC_SEL[30:29] */ + radeon_emit(cs, offset); /* DST_ADDR_LO [31:0] */ + radeon_emit(cs, (offset >> 32) & 0xff); /* DST_ADDR_HI [7:0] */ + radeon_emit(cs, byte_count); /* COMMAND [29:22] | BYTE_COUNT [20:0] */ + + radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); + radeon_emit(cs, reloc); + + size -= byte_count; + offset += byte_count; } + + /* CP DMA is executed in ME, but index buffers are read by PFP. + * This ensures that ME (CP DMA) is idle before PFP starts fetching + * indices. If we wanted to execute CP DMA in PFP, this packet + * should precede it. + */ + if (coher == R600_COHERENCY_SHADER) + r600_emit_pfp_sync_me(rctx); }