*/
r600_emit_command_buffer(ctx->cs, &ctx->start_compute_cs_cmd);
- ctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV;
+ ctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
r600_flush_emit(ctx);
/* Emit colorbuffers. */
uint32_t i, log_samples;
if (rctx->framebuffer.state.nr_cbufs) {
- rctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV;
+ rctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
if (rctx->framebuffer.state.cbufs[0]->texture->nr_samples > 1) {
rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_CB_META;
}
}
if (rctx->framebuffer.state.zsbuf) {
- rctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV;
+ rctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
}
util_copy_framebuffer_state(&rctx->framebuffer.state, state);
#define R600_CONTEXT_INVAL_READ_CACHES (1 << 0)
#define R600_CONTEXT_STREAMOUT_FLUSH (1 << 1)
-#define R600_CONTEXT_WAIT_IDLE (1 << 2)
-#define R600_CONTEXT_FLUSH_AND_INV (1 << 3)
-#define R600_CONTEXT_FLUSH_AND_INV_CB_META (1 << 4)
+#define R600_CONTEXT_WAIT_3D_IDLE (1 << 2)
+#define R600_CONTEXT_WAIT_CP_DMA_IDLE (1 << 3)
+#define R600_CONTEXT_FLUSH_AND_INV (1 << 4)
+#define R600_CONTEXT_FLUSH_AND_INV_CB_META (1 << 5)
struct r600_context;
struct r600_screen;
void r600_context_streamout_end(struct r600_context *ctx);
void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in);
void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block *block, unsigned pkt_flags);
+void r600_cp_dma_copy_buffer(struct r600_context *rctx,
+ struct pipe_resource *dst, unsigned dst_offset,
+ struct pipe_resource *src, unsigned src_offset,
+ unsigned size);
int evergreen_context_init(struct r600_context *ctx);
r600_blitter_end(ctx);
}
-void r600_copy_buffer(struct pipe_context *ctx, struct
- pipe_resource *dst, unsigned dstx,
+void r600_copy_buffer(struct pipe_context *ctx, struct pipe_resource *dst, unsigned dstx,
struct pipe_resource *src, const struct pipe_box *src_box)
{
struct r600_context *rctx = (struct r600_context*)ctx;
- if (rctx->screen->has_streamout &&
- /* Require dword alignment. */
- dstx % 4 == 0 && src_box->x % 4 == 0 && src_box->width % 4 == 0) {
+ /* CP DMA doesn't work on R600 (flushing seems to be unreliable). */
+ if (rctx->screen->info.drm_minor >= 27 && rctx->chip_class >= R700) {
+ r600_cp_dma_copy_buffer(rctx, dst, dstx, src, src_box->x, src_box->width);
+ }
+ else if (rctx->screen->has_streamout &&
+ /* Require 4-byte alignment. */
+ dstx % 4 == 0 && src_box->x % 4 == 0 && src_box->width % 4 == 0) {
r600_blitter_begin(ctx, R600_COPY_BUFFER);
util_blitter_copy_buffer(rctx->blitter, dst, dstx, src, src_box->x, src_box->width);
r600_blitter_end(ctx);
LIST_ADDTAIL(&block->list,&ctx->dirty);
if (block->flags & REG_FLAG_FLUSH_CHANGE) {
- ctx->flags |= R600_CONTEXT_WAIT_IDLE;
+ ctx->flags |= R600_CONTEXT_WAIT_3D_IDLE;
}
}
}
{
struct radeon_winsys_cs *cs = rctx->cs;
unsigned cp_coher_cntl = 0;
+ unsigned wait_until = 0;
unsigned emit_flush = 0;
if (!rctx->flags) {
cs->buf[cs->cdw++] = 0x0000000A; /* POLL_INTERVAL */
}
- if (rctx->flags & R600_CONTEXT_WAIT_IDLE) {
+ if (rctx->flags & R600_CONTEXT_WAIT_3D_IDLE) {
+ wait_until |= S_008040_WAIT_3D_IDLE(1);
+ }
+ if (rctx->flags & R600_CONTEXT_WAIT_CP_DMA_IDLE) {
+ wait_until |= S_008040_WAIT_CP_DMA_IDLE(1);
+ }
+ if (wait_until) {
/* wait for things to settle */
- r600_write_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
+ r600_write_config_reg(cs, R_008040_WAIT_UNTIL, wait_until);
}
/* everything is properly flushed */
*/
ctx->flags |= R600_CONTEXT_FLUSH_AND_INV |
R600_CONTEXT_FLUSH_AND_INV_CB_META |
- R600_CONTEXT_WAIT_IDLE;
+ R600_CONTEXT_WAIT_3D_IDLE |
+ R600_CONTEXT_WAIT_CP_DMA_IDLE;
r600_flush_emit(ctx);
}
r600_set_streamout_enable(ctx, 0);
}
- ctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV;
+ ctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
ctx->num_cs_dw_streamout_end = 0;
}
+
+/* The max number of bytes to copy per packet. */
+#define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - 8)
+
+void r600_cp_dma_copy_buffer(struct r600_context *rctx,
+ struct pipe_resource *dst, unsigned dst_offset,
+ struct pipe_resource *src, unsigned src_offset,
+ unsigned size)
+{
+ struct radeon_winsys_cs *cs = rctx->cs;
+
+ assert(size);
+ assert(rctx->chip_class != R600);
+
+ /* CP DMA doesn't work on R600 (flushing seems to be unreliable). */
+ if (rctx->chip_class == R600) {
+ return;
+ }
+
+ /* We flush the caches, because we might read from or write
+ * to resources which are bound right now. */
+ rctx->flags |= R600_CONTEXT_INVAL_READ_CACHES |
+ R600_CONTEXT_FLUSH_AND_INV |
+ R600_CONTEXT_FLUSH_AND_INV_CB_META |
+ R600_CONTEXT_STREAMOUT_FLUSH |
+ R600_CONTEXT_WAIT_3D_IDLE;
+
+ /* There are differences between R700 and EG in CP DMA,
+ * but we only use the common bits here. */
+ while (size) {
+ unsigned sync = 0;
+ unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
+ unsigned src_reloc, dst_reloc;
+
+ r600_need_cs_space(rctx, 10 + (rctx->flags ? R600_MAX_FLUSH_CS_DWORDS : 0), FALSE);
+
+ /* Flush the caches for the first copy only. */
+ if (rctx->flags) {
+ r600_flush_emit(rctx);
+ }
+
+ /* Do the synchronization after the last copy, so that all data is written to memory. */
+ if (size == byte_count) {
+ sync = PKT3_CP_DMA_CP_SYNC;
+ }
+
+ /* This must be done after r600_need_cs_space. */
+ src_reloc = r600_context_bo_reloc(rctx, (struct r600_resource*)src, RADEON_USAGE_READ);
+ dst_reloc = r600_context_bo_reloc(rctx, (struct r600_resource*)dst, RADEON_USAGE_WRITE);
+
+ r600_write_value(cs, PKT3(PKT3_CP_DMA, 4, 0));
+ r600_write_value(cs, src_offset); /* SRC_ADDR_LO [31:0] */
+ r600_write_value(cs, sync); /* CP_SYNC [31] | SRC_ADDR_HI [7:0] */
+ r600_write_value(cs, dst_offset); /* DST_ADDR_LO [31:0] */
+ r600_write_value(cs, 0); /* DST_ADDR_HI [7:0] */
+ r600_write_value(cs, byte_count); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
+
+ r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
+ r600_write_value(cs, src_reloc);
+ r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
+ r600_write_value(cs, dst_reloc);
+
+ size -= byte_count;
+ src_offset += byte_count;
+ dst_offset += byte_count;
+ }
+}
void evergreen_update_db_shader_control(struct r600_context * rctx);
/* r600_blit.c */
-void r600_copy_buffer(struct pipe_context *ctx, struct
- pipe_resource *dst, unsigned dstx,
+void r600_copy_buffer(struct pipe_context *ctx, struct pipe_resource *dst, unsigned dstx,
struct pipe_resource *src, const struct pipe_box *src_box);
void r600_init_blit_functions(struct r600_context *rctx);
void r600_blit_decompress_depth(struct pipe_context *ctx,
unsigned i;
if (rctx->framebuffer.state.nr_cbufs) {
- rctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV;
+ rctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
if (rctx->chip_class >= R700 &&
rctx->framebuffer.state.cbufs[0]->texture->nr_samples > 1) {
}
}
if (rctx->framebuffer.state.zsbuf) {
- rctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV;
+ rctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
}
/* Set the new state. */
if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp) {
rctx->config_state.sq_gpr_resource_mgmt_1 = tmp;
rctx->config_state.atom.dirty = true;
- rctx->flags |= R600_CONTEXT_WAIT_IDLE;
+ rctx->flags |= R600_CONTEXT_WAIT_3D_IDLE;
}
return true;
}
{
struct r600_context *rctx = (struct r600_context *)ctx;
- rctx->flags |= R600_CONTEXT_WAIT_IDLE;
+ rctx->flags |= R600_CONTEXT_WAIT_3D_IDLE;
rctx->flags |= R600_CONTEXT_INVAL_READ_CACHES;
rctx->flags |= R600_CONTEXT_FLUSH_AND_INV;
}
{
if (state->dirty_mask) {
if (state->dirty_mask & state->has_bordercolor_mask) {
- rctx->flags |= R600_CONTEXT_WAIT_IDLE;
+ rctx->flags |= R600_CONTEXT_WAIT_3D_IDLE;
}
state->atom.num_dw =
util_bitcount(state->dirty_mask & state->has_bordercolor_mask) * 11 +
seamless_cube_map != -1 &&
seamless_cube_map != rctx->seamless_cube_map.enabled) {
/* change in TA_CNTL_AUX need a pipeline flush */
- rctx->flags |= R600_CONTEXT_WAIT_IDLE;
+ rctx->flags |= R600_CONTEXT_WAIT_3D_IDLE;
rctx->seamless_cube_map.enabled = seamless_cube_map;
rctx->seamless_cube_map.atom.dirty = true;
}
#define PKT3_PRED_S(x) (((x) >> 0) & 0x1)
#define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count))
+#define PKT3_CP_DMA 0x41
+/* 1. header
+ * 2. SRC_ADDR_LO [31:0]
+ * 3. CP_SYNC [31] | SRC_ADDR_HI [7:0]
+ * 4. DST_ADDR_LO [31:0]
+ * 5. DST_ADDR_HI [7:0]
+ * 6. COMMAND [29:22] | BYTE_COUNT [20:0]
+ */
+#define PKT3_CP_DMA_CP_SYNC (1 << 31)
+/* COMMAND */
+#define PKT3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23)
+/* 0 - none
+ * 1 - 8 in 16
+ * 2 - 8 in 32
+ * 3 - 8 in 64
+ */
+#define PKT3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24)
+/* 0 - none
+ * 1 - 8 in 16
+ * 2 - 8 in 32
+ * 3 - 8 in 64
+ */
+#define PKT3_CP_DMA_CMD_SAS (1 << 26)
+/* 0 - memory
+ * 1 - register
+ */
+#define PKT3_CP_DMA_CMD_DAS (1 << 27)
+/* 0 - memory
+ * 1 - register
+ */
+#define PKT3_CP_DMA_CMD_SAIC (1 << 28)
+#define PKT3_CP_DMA_CMD_DAIC (1 << 29)
+
+
/* Registers */
#define R_008490_CP_STRMOUT_CNTL 0x008490
#define S_008490_OFFSET_UPDATE_DONE(x) (((x) & 0x1) << 0)