+static bool cik_sdma_copy_texture(struct si_context *sctx,
+ struct pipe_resource *dst,
+ unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src,
+ unsigned src_level,
+ const struct pipe_box *src_box)
+{
+ struct radeon_info *info = &sctx->screen->info;
+ struct si_texture *ssrc = (struct si_texture*)src;
+ struct si_texture *sdst = (struct si_texture*)dst;
+ unsigned bpp = sdst->surface.bpe;
+ uint64_t dst_address = sdst->buffer.gpu_address +
+ sdst->surface.u.legacy.level[dst_level].offset;
+ uint64_t src_address = ssrc->buffer.gpu_address +
+ ssrc->surface.u.legacy.level[src_level].offset;
+ unsigned dst_mode = sdst->surface.u.legacy.level[dst_level].mode;
+ unsigned src_mode = ssrc->surface.u.legacy.level[src_level].mode;
+ unsigned dst_tile_index = sdst->surface.u.legacy.tiling_index[dst_level];
+ unsigned src_tile_index = ssrc->surface.u.legacy.tiling_index[src_level];
+ unsigned dst_tile_mode = info->si_tile_mode_array[dst_tile_index];
+ unsigned src_tile_mode = info->si_tile_mode_array[src_tile_index];
+ unsigned dst_micro_mode = G_009910_MICRO_TILE_MODE_NEW(dst_tile_mode);
+ unsigned src_micro_mode = G_009910_MICRO_TILE_MODE_NEW(src_tile_mode);
+ unsigned dst_tile_swizzle = dst_mode == RADEON_SURF_MODE_2D ?
+ sdst->surface.tile_swizzle : 0;
+ unsigned src_tile_swizzle = src_mode == RADEON_SURF_MODE_2D ?
+ ssrc->surface.tile_swizzle : 0;
+ unsigned dst_pitch = sdst->surface.u.legacy.level[dst_level].nblk_x;
+ unsigned src_pitch = ssrc->surface.u.legacy.level[src_level].nblk_x;
+ uint64_t dst_slice_pitch = ((uint64_t)sdst->surface.u.legacy.level[dst_level].slice_size_dw * 4) / bpp;
+ uint64_t src_slice_pitch = ((uint64_t)ssrc->surface.u.legacy.level[src_level].slice_size_dw * 4) / bpp;
+ unsigned dst_width = minify_as_blocks(sdst->buffer.b.b.width0,
+ dst_level, sdst->surface.blk_w);
+ unsigned src_width = minify_as_blocks(ssrc->buffer.b.b.width0,
+ src_level, ssrc->surface.blk_w);
+ unsigned dst_height = minify_as_blocks(sdst->buffer.b.b.height0,
+ dst_level, sdst->surface.blk_h);
+ unsigned src_height = minify_as_blocks(ssrc->buffer.b.b.height0,
+ src_level, ssrc->surface.blk_h);
+ unsigned srcx = src_box->x / ssrc->surface.blk_w;
+ unsigned srcy = src_box->y / ssrc->surface.blk_h;
+ unsigned srcz = src_box->z;
+ unsigned copy_width = DIV_ROUND_UP(src_box->width, ssrc->surface.blk_w);
+ unsigned copy_height = DIV_ROUND_UP(src_box->height, ssrc->surface.blk_h);
+ unsigned copy_depth = src_box->depth;
+
+ assert(src_level <= src->last_level);
+ assert(dst_level <= dst->last_level);
+ assert(sdst->surface.u.legacy.level[dst_level].offset +
+ dst_slice_pitch * bpp * (dstz + src_box->depth) <=
+ sdst->buffer.buf->size);
+ assert(ssrc->surface.u.legacy.level[src_level].offset +
+ src_slice_pitch * bpp * (srcz + src_box->depth) <=
+ ssrc->buffer.buf->size);
+
+ if (!si_prepare_for_dma_blit(sctx, sdst, dst_level, dstx, dsty,
+ dstz, ssrc, src_level, src_box))
+ return false;
+
+ dstx /= sdst->surface.blk_w;
+ dsty /= sdst->surface.blk_h;
+
+ if (srcx >= (1 << 14) ||
+ srcy >= (1 << 14) ||
+ srcz >= (1 << 11) ||
+ dstx >= (1 << 14) ||
+ dsty >= (1 << 14) ||
+ dstz >= (1 << 11))
+ return false;
+
+ dst_address |= dst_tile_swizzle << 8;
+ src_address |= src_tile_swizzle << 8;
+
+ /* Linear -> linear sub-window copy. */
+ if (dst_mode == RADEON_SURF_MODE_LINEAR_ALIGNED &&
+ src_mode == RADEON_SURF_MODE_LINEAR_ALIGNED &&
+ /* check if everything fits into the bitfields */
+ src_pitch <= (1 << 14) &&
+ dst_pitch <= (1 << 14) &&
+ src_slice_pitch <= (1 << 28) &&
+ dst_slice_pitch <= (1 << 28) &&
+ copy_width <= (1 << 14) &&
+ copy_height <= (1 << 14) &&
+ copy_depth <= (1 << 11) &&
+ /* HW limitation - GFX7: */
+ (sctx->chip_class != GFX7 ||
+ (copy_width < (1 << 14) &&
+ copy_height < (1 << 14) &&
+ copy_depth < (1 << 11))) &&
+ /* HW limitation - some GFX7 parts: */
+ ((sctx->family != CHIP_BONAIRE &&
+ sctx->family != CHIP_KAVERI) ||
+ (srcx + copy_width != (1 << 14) &&
+ srcy + copy_height != (1 << 14)))) {
+ struct radeon_cmdbuf *cs = sctx->dma_cs;
+
+ si_need_dma_space(sctx, 13, &sdst->buffer, &ssrc->buffer);
+
+ radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
+ CIK_SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW, 0) |
+ (util_logbase2(bpp) << 29));
+ radeon_emit(cs, src_address);
+ radeon_emit(cs, src_address >> 32);
+ radeon_emit(cs, srcx | (srcy << 16));
+ radeon_emit(cs, srcz | ((src_pitch - 1) << 16));
+ radeon_emit(cs, src_slice_pitch - 1);
+ radeon_emit(cs, dst_address);
+ radeon_emit(cs, dst_address >> 32);
+ radeon_emit(cs, dstx | (dsty << 16));
+ radeon_emit(cs, dstz | ((dst_pitch - 1) << 16));
+ radeon_emit(cs, dst_slice_pitch - 1);
+ if (sctx->chip_class == GFX7) {
+ radeon_emit(cs, copy_width | (copy_height << 16));
+ radeon_emit(cs, copy_depth);
+ } else {
+ radeon_emit(cs, (copy_width - 1) | ((copy_height - 1) << 16));
+ radeon_emit(cs, (copy_depth - 1));
+ }
+ return true;