From: Marek Olšák Date: Sun, 9 Sep 2012 04:08:39 +0000 (+0200) Subject: r600g: simplify flushing X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=fd2e34d557c07fba5a6e344e915f73dcfb66d0b4;p=mesa.git r600g: simplify flushing Based on the patch called "simplify and fix flushing and synchronization" by Jerome Glisse. Rebased, removed unneded code, simplified more and cleaned up. Also, SH_ACTION_ENA is not set when changing shaders (hw doesn't seem to need it). It's only used to flush constant buffers. Reviewed-by: Jerome Glisse --- diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 35333120654..1fb63d608a2 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -96,7 +96,7 @@ static void evergreen_cs_set_vertex_buffer( vb->buffer = buffer; vb->user_buffer = NULL; - r600_inval_vertex_cache(rctx); + rctx->flags |= rctx->has_vertex_cache ? R600_CONTEXT_VTX_FLUSH : R600_CONTEXT_TEX_FLUSH; state->enabled_mask |= 1 << vb_index; state->dirty_mask |= 1 << vb_index; r600_atom_dirty(rctx, &state->atom); @@ -332,8 +332,11 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, */ r600_emit_atom(ctx, &ctx->start_compute_cs_cmd.atom); + ctx->flags |= R600_CONTEXT_CB_FLUSH; + r600_flush_emit(ctx); + /* Emit cb_state */ - cb_state = ctx->states[R600_PIPE_STATE_FRAMEBUFFER]; + cb_state = ctx->states[R600_PIPE_STATE_FRAMEBUFFER]; r600_context_pipe_state_emit(ctx, cb_state, RADEON_CP_PACKET3_COMPUTE_MODE); /* Set CB_TARGET_MASK XXX: Use cb_misc_state */ @@ -384,15 +387,10 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, /* Emit dispatch state and dispatch packet */ evergreen_emit_direct_dispatch(ctx, block_layout, grid_layout); - /* r600_flush_framebuffer() updates the cb_flush_flags and then - * calls r600_emit_atom() on the ctx->surface_sync_cmd.atom, which emits - * a SURFACE_SYNC packet via r600_emit_surface_sync(). - * - * XXX r600_emit_surface_sync() hardcodes the CP_COHER_SIZE to - * 0xffffffff, so we will need to add a field to struct - * r600_surface_sync_cmd if we want to manually set this value. + /* XXX evergreen_flush_emit() hardcodes the CP_COHER_SIZE to 0xffffffff */ - r600_flush_framebuffer(ctx, true /* Flush now */); + ctx->flags |= R600_CONTEXT_CB_FLUSH; + r600_flush_emit(ctx); #if 0 COMPUTE_DBG("cdw: %i\n", cs->cdw); @@ -444,7 +442,7 @@ void evergreen_emit_cs_shader( r600_write_value(cs, r600_context_bo_reloc(rctx, shader->shader_code_bo, RADEON_USAGE_READ)); - r600_inval_shader_cache(rctx); + rctx->flags |= R600_CONTEXT_SHADERCONST_FLUSH; } static void evergreen_launch_grid( diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.c b/src/gallium/drivers/r600/evergreen_compute_internal.c index 50a60d361f2..dc957320685 100644 --- a/src/gallium/drivers/r600/evergreen_compute_internal.c +++ b/src/gallium/drivers/r600/evergreen_compute_internal.c @@ -562,7 +562,7 @@ void evergreen_set_tex_resource( util_format_get_blockwidth(tmp->resource.b.b.format) * view->base.texture->width0*height*depth; - r600_inval_texture_cache(pipe->ctx); + pipe->ctx->flags |= R600_CONTEXT_TEX_FLUSH; evergreen_emit_force_reloc(res); evergreen_emit_force_reloc(res); @@ -621,7 +621,7 @@ void evergreen_set_const_cache( res->usage = RADEON_USAGE_READ; res->coher_bo_size = size; - r600_inval_shader_cache(pipe->ctx); + pipe->ctx->flags |= R600_CONTEXT_SHADERCONST_FLUSH; } struct r600_resource* r600_compute_buffer_alloc_vram( diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 405d1b03eeb..7386726ca16 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1697,7 +1697,12 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, if (rstate == NULL) return; - r600_flush_framebuffer(rctx, false); + if (rctx->framebuffer.nr_cbufs) { + rctx->flags |= R600_CONTEXT_CB_FLUSH; + } + if (rctx->framebuffer.zsbuf) { + rctx->flags |= R600_CONTEXT_DB_FLUSH; + } /* unreference old buffer and reference new one */ rstate->id = R600_PIPE_STATE_FRAMEBUFFER; diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index e4d72f5411e..18e1eb7bc66 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -46,7 +46,8 @@ #define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10 #define EVENT_TYPE_ZPASS_DONE 0x15 #define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16 -#define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH 0x1f +#define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH 0x1f +#define EVENT_TYPE_FLUSH_AND_INV_DB_META 0x2c #define EVENT_TYPE(x) ((x) << 0) #define EVENT_INDEX(x) ((x) << 8) @@ -2186,16 +2187,12 @@ #define C_0085F0_DB_DEST_BASE_ENA 0xFFFFBFFF #define S_0085F0_CB8_DEST_BASE_ENA(x) (((x) & 0x1) << 15) #define G_0085F0_CB8_DEST_BASE_ENA(x) (((x) >> 15) & 0x1) - #define S_0085F0_CB9_DEST_BASE_ENA(x) (((x) & 0x1) << 16) #define G_0085F0_CB9_DEST_BASE_ENA(x) (((x) >> 16) & 0x1) - #define S_0085F0_CB10_DEST_BASE_ENA(x) (((x) & 0x1) << 17) #define G_0085F0_CB10_DEST_BASE_ENA(x) (((x) >> 17) & 0x1) - #define S_0085F0_CB11_DEST_BASE_ENA(x) (((x) & 0x1) << 18) #define G_0085F0_CB11_DEST_BASE_ENA(x) (((x) >> 18) & 0x1) - #define S_0085F0_TC_ACTION_ENA(x) (((x) & 0x1) << 23) #define G_0085F0_TC_ACTION_ENA(x) (((x) >> 23) & 0x1) #define C_0085F0_TC_ACTION_ENA 0xFF7FFFFF diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index af7b0ca2e74..7c1d77313de 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -180,9 +180,16 @@ struct r600_so_target { unsigned so_index; }; -#define R600_CONTEXT_DRAW_PENDING (1 << 0) -#define R600_CONTEXT_DST_CACHES_DIRTY (1 << 1) -#define R600_PARTIAL_FLUSH (1 << 2) +#define R600_CONTEXT_PS_PARTIAL_FLUSH (1 << 0) +#define R600_CONTEXT_CB_FLUSH (1 << 1) +#define R600_CONTEXT_DB_FLUSH (1 << 2) +#define R600_CONTEXT_SHADERCONST_FLUSH (1 << 3) +#define R600_CONTEXT_TEX_FLUSH (1 << 4) +#define R600_CONTEXT_VTX_FLUSH (1 << 5) +#define R600_CONTEXT_STREAMOUT_FLUSH (1 << 6) +#define R600_CONTEXT_WAIT_IDLE (1 << 7) +#define R600_CONTEXT_FLUSH_AND_INV (1 << 8) +#define R600_CONTEXT_HTILE_ERRATA (1 << 9) struct r600_context; struct r600_screen; @@ -196,10 +203,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags); void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fence, unsigned offset, unsigned value); -void r600_inval_shader_cache(struct r600_context *ctx); -void r600_inval_texture_cache(struct r600_context *ctx); -void r600_inval_vertex_cache(struct r600_context *ctx); -void r600_flush_framebuffer(struct r600_context *ctx, bool flush_now); +void r600_flush_emit(struct r600_context *ctx); void r600_context_streamout_begin(struct r600_context *ctx); void r600_context_streamout_end(struct r600_context *ctx); diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index 0ec13e5bade..d40f6b60933 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -114,19 +114,6 @@ err: return; } -void r600_context_ps_partial_flush(struct r600_context *ctx) -{ - struct radeon_winsys_cs *cs = ctx->cs; - - if (!(ctx->flags & R600_CONTEXT_DRAW_PENDING)) - return; - - cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); - cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); - - ctx->flags &= ~R600_CONTEXT_DRAW_PENDING; -} - static void r600_init_block(struct r600_context *ctx, struct r600_block *block, const struct r600_reg *reg, int index, int nreg, @@ -665,7 +652,7 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, } /* Count in framebuffer cache flushes at the end of CS. */ - num_dw += 7; /* one SURFACE_SYNC and CACHE_FLUSH_AND_INV (r6xx-only) */ + num_dw += 44; /* one SURFACE_SYNC and CACHE_FLUSH_AND_INV (r6xx-only) */ /* Save 16 dwords for the fence mechanism. */ num_dw += 16; @@ -693,7 +680,7 @@ void r600_context_dirty_block(struct r600_context *ctx, LIST_ADDTAIL(&block->list,&ctx->dirty); if (block->flags & REG_FLAG_FLUSH_CHANGE) { - r600_context_ps_partial_flush(ctx); + ctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH; } } } @@ -861,54 +848,138 @@ out: LIST_DELINIT(&block->list); } -void r600_inval_shader_cache(struct r600_context *ctx) +void r600_flush_emit(struct r600_context *rctx) { - ctx->surface_sync_cmd.flush_flags |= S_0085F0_SH_ACTION_ENA(1); - r600_atom_dirty(ctx, &ctx->surface_sync_cmd.atom); -} + struct radeon_winsys_cs *cs = rctx->cs; -void r600_inval_texture_cache(struct r600_context *ctx) -{ - ctx->surface_sync_cmd.flush_flags |= S_0085F0_TC_ACTION_ENA(1); - r600_atom_dirty(ctx, &ctx->surface_sync_cmd.atom); -} + if (!rctx->flags) { + return; + } -void r600_inval_vertex_cache(struct r600_context *ctx) -{ - if (ctx->has_vertex_cache) { - ctx->surface_sync_cmd.flush_flags |= S_0085F0_VC_ACTION_ENA(1); - } else { - /* Some GPUs don't have the vertex cache and must use the texture cache instead. */ - ctx->surface_sync_cmd.flush_flags |= S_0085F0_TC_ACTION_ENA(1); + if (rctx->flags & R600_CONTEXT_PS_PARTIAL_FLUSH) { + cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); + cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); } - r600_atom_dirty(ctx, &ctx->surface_sync_cmd.atom); -} -void r600_flush_framebuffer(struct r600_context *ctx, bool flush_now) -{ - if (!(ctx->flags & R600_CONTEXT_DST_CACHES_DIRTY)) - return; + if (rctx->flags & R600_CONTEXT_FLUSH_AND_INV) { + cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); + cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0); + + /* DB flushes are special due to errata with hyperz, we need to + * insert a no-op, so that the cache has time to really flush. + */ + if (rctx->chip_class <= R700 && + rctx->flags & R600_CONTEXT_HTILE_ERRATA) { + cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 31, 0); + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + } + } - ctx->surface_sync_cmd.flush_flags |= - r600_get_cb_flush_flags(ctx) | - (ctx->framebuffer.zsbuf ? S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1) : 0); + if (rctx->flags & (R600_CONTEXT_CB_FLUSH | + R600_CONTEXT_DB_FLUSH | + R600_CONTEXT_SHADERCONST_FLUSH | + R600_CONTEXT_TEX_FLUSH | + R600_CONTEXT_VTX_FLUSH | + R600_CONTEXT_STREAMOUT_FLUSH)) { + /* anything left (cb, vtx, shader, streamout) can be flushed + * using the surface sync packet + */ + unsigned flags = 0; + + if (rctx->flags & R600_CONTEXT_CB_FLUSH) { + flags |= S_0085F0_CB_ACTION_ENA(1) | + S_0085F0_CB0_DEST_BASE_ENA(1) | + S_0085F0_CB1_DEST_BASE_ENA(1) | + S_0085F0_CB2_DEST_BASE_ENA(1) | + S_0085F0_CB3_DEST_BASE_ENA(1) | + S_0085F0_CB4_DEST_BASE_ENA(1) | + S_0085F0_CB5_DEST_BASE_ENA(1) | + S_0085F0_CB6_DEST_BASE_ENA(1) | + S_0085F0_CB7_DEST_BASE_ENA(1); + + if (rctx->chip_class >= EVERGREEN) { + flags |= S_0085F0_CB8_DEST_BASE_ENA(1) | + S_0085F0_CB9_DEST_BASE_ENA(1) | + S_0085F0_CB10_DEST_BASE_ENA(1) | + S_0085F0_CB11_DEST_BASE_ENA(1); + } - if (flush_now) { - r600_emit_atom(ctx, &ctx->surface_sync_cmd.atom); - } else { - r600_atom_dirty(ctx, &ctx->surface_sync_cmd.atom); - } + /* RV670 errata + * (CB1_DEST_BASE_ENA is also required, which is + * included unconditionally above). */ + if (rctx->family == CHIP_RV670 || + rctx->family == CHIP_RS780 || + rctx->family == CHIP_RS880) { + flags |= S_0085F0_DEST_BASE_0_ENA(1); + } + } - /* Also add a complete cache flush to work around broken flushing on R6xx. */ - if (ctx->chip_class == R600) { - if (flush_now) { - r600_emit_atom(ctx, &ctx->r6xx_flush_and_inv_cmd); - } else { - r600_atom_dirty(ctx, &ctx->r6xx_flush_and_inv_cmd); + if (rctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) { + flags |= S_0085F0_SO0_DEST_BASE_ENA(1) | + S_0085F0_SO1_DEST_BASE_ENA(1) | + S_0085F0_SO2_DEST_BASE_ENA(1) | + S_0085F0_SO3_DEST_BASE_ENA(1) | + S_0085F0_SMX_ACTION_ENA(1); + + /* RV670 errata */ + if (rctx->family == CHIP_RV670 || + rctx->family == CHIP_RS780 || + rctx->family == CHIP_RS880) { + flags |= S_0085F0_DEST_BASE_0_ENA(1); + } } + + flags |= (rctx->flags & R600_CONTEXT_DB_FLUSH) ? S_0085F0_DB_ACTION_ENA(1) | + S_0085F0_DB_DEST_BASE_ENA(1): 0; + flags |= (rctx->flags & R600_CONTEXT_SHADERCONST_FLUSH) ? S_0085F0_SH_ACTION_ENA(1) : 0; + flags |= (rctx->flags & R600_CONTEXT_TEX_FLUSH) ? S_0085F0_TC_ACTION_ENA(1) : 0; + flags |= (rctx->flags & R600_CONTEXT_VTX_FLUSH) ? S_0085F0_VC_ACTION_ENA(1) : 0; + + cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0); + cs->buf[cs->cdw++] = flags; /* CP_COHER_CNTL */ + cs->buf[cs->cdw++] = 0xffffffff; /* CP_COHER_SIZE */ + cs->buf[cs->cdw++] = 0; /* CP_COHER_BASE */ + cs->buf[cs->cdw++] = 0x0000000A; /* POLL_INTERVAL */ + } + + if (rctx->flags & R600_CONTEXT_WAIT_IDLE) { + /* wait for things to settle */ + r600_write_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1)); } - ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY; + /* everything is properly flushed */ + rctx->flags = 0; } void r600_context_flush(struct r600_context *ctx, unsigned flags) @@ -937,10 +1008,18 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags) streamout_suspended = true; } - r600_flush_framebuffer(ctx, true); - /* partial flush is needed to avoid lockups on some chips with user fences */ - r600_context_ps_partial_flush(ctx); + ctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH; + + /* flush the framebuffer */ + ctx->flags |= R600_CONTEXT_CB_FLUSH | R600_CONTEXT_DB_FLUSH; + + /* R6xx errata */ + if (ctx->chip_class == R600) { + ctx->flags |= R600_CONTEXT_FLUSH_AND_INV; + } + + r600_flush_emit(ctx); /* old kernels and userspace don't set SX_MISC, so we must reset it to 0 here */ if (ctx->chip_class <= R700) { @@ -959,10 +1038,6 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags) /* Begin a new CS. */ r600_emit_atom(ctx, &ctx->start_cs_cmd.atom); - /* Invalidate caches. */ - r600_inval_texture_cache(ctx); - r600_flush_framebuffer(ctx, false); - /* Re-emit states. */ r600_atom_dirty(ctx, &ctx->alphatest_state.atom); r600_atom_dirty(ctx, &ctx->cb_misc_state.atom); @@ -1024,7 +1099,10 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fen va = r600_resource_va(&ctx->screen->screen, (void*)fence_bo); va = va + (offset << 2); - r600_context_ps_partial_flush(ctx); + ctx->flags &= ~R600_CONTEXT_PS_PARTIAL_FLUSH; + cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); + cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); + cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL; /* ADDRESS_LO */ @@ -1185,7 +1263,7 @@ void r600_context_streamout_end(struct r600_context *ctx) { struct radeon_winsys_cs *cs = ctx->cs; struct r600_so_target **t = ctx->so_targets; - unsigned i, flush_flags = 0; + unsigned i; uint64_t va; if (ctx->chip_class >= EVERGREEN) { @@ -1212,7 +1290,6 @@ void r600_context_streamout_end(struct r600_context *ctx) r600_context_bo_reloc(ctx, t[i]->filled_size, RADEON_USAGE_WRITE); - flush_flags |= S_0085F0_SO0_DEST_BASE_ENA(1) << i; } } @@ -1221,22 +1298,11 @@ void r600_context_streamout_end(struct r600_context *ctx) } else { r600_set_streamout_enable(ctx, 0); } + ctx->flags |= R600_CONTEXT_STREAMOUT_FLUSH; - /* This is needed to fix cache flushes on r600. */ + /* R6xx errata */ if (ctx->chip_class == R600) { - if (ctx->family == CHIP_RV670 || - ctx->family == CHIP_RS780 || - ctx->family == CHIP_RS880) { - flush_flags |= S_0085F0_DEST_BASE_0_ENA(1); - } - - r600_atom_dirty(ctx, &ctx->r6xx_flush_and_inv_cmd); + ctx->flags |= R600_CONTEXT_FLUSH_AND_INV; } - - /* Flush streamout caches. */ - ctx->surface_sync_cmd.flush_flags |= - S_0085F0_SMX_ACTION_ENA(1) | flush_flags; - r600_atom_dirty(ctx, &ctx->surface_sync_cmd.atom); - ctx->num_cs_dw_streamout_end = 0; } diff --git a/src/gallium/drivers/r600/r600_hw_context_priv.h b/src/gallium/drivers/r600/r600_hw_context_priv.h index 8d0ebc9d52d..12a9750e8b7 100644 --- a/src/gallium/drivers/r600/r600_hw_context_priv.h +++ b/src/gallium/drivers/r600/r600_hw_context_priv.h @@ -28,7 +28,8 @@ #include "r600_pipe.h" -#define R600_MAX_DRAW_CS_DWORDS 16 +/* the number of CS dwords for flushing and drawing */ +#define R600_MAX_DRAW_CS_DWORDS 64 /* these flags are used in register flags and added into block flags */ #define REG_FLAG_NEED_BO 1 @@ -54,9 +55,6 @@ int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, void r600_context_dirty_block(struct r600_context *ctx, struct r600_block *block, int dirty, int index); int r600_setup_block_table(struct r600_context *ctx); -int r600_state_sampler_init(struct r600_context *ctx, uint32_t offset); -void r600_context_pipe_state_set_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset); -void r600_context_ps_partial_flush(struct r600_context *ctx); /* * evergreen_hw_context.c diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 9e6c28d523b..d0dd4d54aa6 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -242,8 +242,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void rctx->context.create_video_decoder = vl_create_decoder; rctx->context.create_video_buffer = vl_video_buffer_create; - r600_init_common_atoms(rctx); - switch (rctx->chip_class) { case R600: case R700: diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index ff720e95cfe..ee021ed8980 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -65,11 +65,6 @@ struct r600_command_buffer { unsigned pkt_flags; }; -struct r600_surface_sync_cmd { - struct r600_atom atom; - unsigned flush_flags; /* CP_COHER_CNTL */ -}; - struct r600_db_misc_state { struct r600_atom atom; bool occlusion_query_enabled; @@ -370,8 +365,6 @@ struct r600_context { /** Compute specific registers initializations. The start_cs_cmd atom * must be emitted before start_compute_cs_cmd. */ struct r600_command_buffer start_compute_cs_cmd; - struct r600_surface_sync_cmd surface_sync_cmd; - struct r600_atom r6xx_flush_and_inv_cmd; struct r600_alphatest_state alphatest_state; struct r600_cb_misc_state cb_misc_state; struct r600_db_misc_state db_misc_state; @@ -575,8 +568,6 @@ void r600_emit_alphatest_state(struct r600_context *rctx, struct r600_atom *atom void r600_init_atom(struct r600_context *rctx, struct r600_atom *atom, unsigned id, void (*emit)(struct r600_context *ctx, struct r600_atom *state), unsigned num_dw); -void r600_init_common_atoms(struct r600_context *rctx); -unsigned r600_get_cb_flush_flags(struct r600_context *rctx); void r600_texture_barrier(struct pipe_context *ctx); void r600_set_index_buffer(struct pipe_context *ctx, const struct pipe_index_buffer *ib); diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index b363dc1ba98..579bcee9634 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1600,13 +1600,23 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, if (rstate == NULL) return; - r600_flush_framebuffer(rctx, false); + if (rctx->framebuffer.nr_cbufs) { + rctx->flags |= R600_CONTEXT_CB_FLUSH; + } + if (rctx->framebuffer.zsbuf) { + rctx->flags |= R600_CONTEXT_DB_FLUSH; + } + /* R6xx errata */ + if (rctx->chip_class == R600) { + rctx->flags |= R600_CONTEXT_FLUSH_AND_INV; + } /* unreference old buffer and reference new one */ rstate->id = R600_PIPE_STATE_FRAMEBUFFER; util_copy_framebuffer_state(&rctx->framebuffer, state); + /* Colorbuffers. */ rctx->export_16bpc = true; rctx->nr_cbufs = state->nr_cbufs; @@ -2125,14 +2135,7 @@ void r600_adjust_gprs(struct r600_context *rctx) unsigned tmp; int diff; - /* XXX: Following call moved from r600_bind_[ps|vs]_shader, - * it seems eg+ doesn't need it, r6xx/7xx probably need it only for - * adjusting the GPR allocation? - * Do we need this if we aren't really changing config below? */ - r600_inval_shader_cache(rctx); - - if (rctx->ps_shader->current->shader.bc.ngpr > rctx->default_ps_gprs) - { + if (rctx->ps_shader->current->shader.bc.ngpr > rctx->default_ps_gprs) { diff = rctx->ps_shader->current->shader.bc.ngpr - rctx->default_ps_gprs; num_vs_gprs -= diff; num_ps_gprs += diff; diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 079934074a5..b8a0e484e89 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -56,27 +56,6 @@ void r600_release_command_buffer(struct r600_command_buffer *cb) FREE(cb->buf); } -static void r600_emit_surface_sync(struct r600_context *rctx, struct r600_atom *atom) -{ - struct radeon_winsys_cs *cs = rctx->cs; - struct r600_surface_sync_cmd *a = (struct r600_surface_sync_cmd*)atom; - - cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0); - cs->buf[cs->cdw++] = a->flush_flags; /* CP_COHER_CNTL */ - cs->buf[cs->cdw++] = 0xffffffff; /* CP_COHER_SIZE */ - cs->buf[cs->cdw++] = 0; /* CP_COHER_BASE */ - cs->buf[cs->cdw++] = 0x0000000A; /* POLL_INTERVAL */ - - a->flush_flags = 0; -} - -static void r600_emit_r6xx_flush_and_inv(struct r600_context *rctx, struct r600_atom *atom) -{ - struct radeon_winsys_cs *cs = rctx->cs; - cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); - cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0); -} - void r600_init_atom(struct r600_context *rctx, struct r600_atom *atom, unsigned id, @@ -108,37 +87,16 @@ void r600_emit_alphatest_state(struct r600_context *rctx, struct r600_atom *atom r600_write_context_reg(cs, R_028438_SX_ALPHA_REF, alpha_ref); } -void r600_init_common_atoms(struct r600_context *rctx) -{ - r600_init_atom(rctx, &rctx->r6xx_flush_and_inv_cmd, 2, r600_emit_r6xx_flush_and_inv, 2); - r600_init_atom(rctx, &rctx->surface_sync_cmd.atom, 3, r600_emit_surface_sync, 5); -} - -unsigned r600_get_cb_flush_flags(struct r600_context *rctx) -{ - unsigned flags = 0; - - if (rctx->framebuffer.nr_cbufs) { - flags |= S_0085F0_CB_ACTION_ENA(1) | - (((1 << rctx->framebuffer.nr_cbufs) - 1) << S_0085F0_CB0_DEST_BASE_ENA_SHIFT); - } - - /* Workaround for broken flushing on some R6xx chipsets. */ - if (rctx->family == CHIP_RV670 || - rctx->family == CHIP_RS780 || - rctx->family == CHIP_RS880) { - flags |= S_0085F0_CB1_DEST_BASE_ENA(1) | - S_0085F0_DEST_BASE_0_ENA(1); - } - return flags; -} - void r600_texture_barrier(struct pipe_context *ctx) { struct r600_context *rctx = (struct r600_context *)ctx; - rctx->surface_sync_cmd.flush_flags |= S_0085F0_TC_ACTION_ENA(1) | r600_get_cb_flush_flags(rctx); - r600_atom_dirty(rctx, &rctx->surface_sync_cmd.atom); + rctx->flags |= R600_CONTEXT_CB_FLUSH | R600_CONTEXT_TEX_FLUSH; + + /* R6xx errata */ + if (rctx->chip_class == R600) { + rctx->flags |= R600_CONTEXT_FLUSH_AND_INV; + } } static bool r600_conv_pipe_prim(unsigned pprim, unsigned *prim) @@ -424,7 +382,7 @@ static void r600_bind_samplers(struct pipe_context *pipe, } if (sampler->border_color_use) { dst->atom_sampler.num_dw += 11; - rctx->flags |= R600_PARTIAL_FLUSH; + rctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH; } else { dst->atom_sampler.num_dw += 5; } @@ -432,7 +390,7 @@ static void r600_bind_samplers(struct pipe_context *pipe, } if (rctx->chip_class <= R700 && seamless_cube_map != -1 && seamless_cube_map != rctx->seamless_cube_map.enabled) { /* change in TA_CNTL_AUX need a pipeline flush */ - rctx->flags |= R600_PARTIAL_FLUSH; + rctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH; rctx->seamless_cube_map.enabled = seamless_cube_map; r600_atom_dirty(rctx, &rctx->seamless_cube_map.atom); } @@ -477,8 +435,6 @@ void r600_bind_vertex_elements(struct pipe_context *ctx, void *state) rctx->vertex_elements = v; if (v) { - r600_inval_shader_cache(rctx); - rctx->states[v->rstate.id] = &v->rstate; r600_context_pipe_state_set(rctx, &v->rstate); } @@ -515,7 +471,7 @@ void r600_set_index_buffer(struct pipe_context *ctx, void r600_vertex_buffers_dirty(struct r600_context *rctx) { if (rctx->vertex_buffer_state.dirty_mask) { - r600_inval_vertex_cache(rctx); + rctx->flags |= rctx->has_vertex_cache ? R600_CONTEXT_VTX_FLUSH : R600_CONTEXT_TEX_FLUSH; rctx->vertex_buffer_state.atom.num_dw = (rctx->chip_class >= EVERGREEN ? 12 : 11) * util_bitcount(rctx->vertex_buffer_state.dirty_mask); r600_atom_dirty(rctx, &rctx->vertex_buffer_state.atom); @@ -570,7 +526,7 @@ void r600_sampler_views_dirty(struct r600_context *rctx, struct r600_samplerview_state *state) { if (state->dirty_mask) { - r600_inval_texture_cache(rctx); + rctx->flags |= R600_CONTEXT_TEX_FLUSH; state->atom.num_dw = (rctx->chip_class >= EVERGREEN ? 14 : 13) * util_bitcount(state->dirty_mask); r600_atom_dirty(rctx, &state->atom); @@ -898,7 +854,7 @@ void r600_delete_vs_shader(struct pipe_context *ctx, void *state) void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state) { if (state->dirty_mask) { - r600_inval_shader_cache(rctx); + rctx->flags |= R600_CONTEXT_SHADERCONST_FLUSH; state->atom.num_dw = rctx->chip_class >= EVERGREEN ? util_bitcount(state->dirty_mask)*20 : util_bitcount(state->dirty_mask)*19; r600_atom_dirty(rctx, &state->atom); @@ -1148,13 +1104,6 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo) r600_update_derived_state(rctx); - /* partial flush triggered by border color change */ - if (rctx->flags & R600_PARTIAL_FLUSH) { - rctx->flags &= ~R600_PARTIAL_FLUSH; - r600_write_value(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); - r600_write_value(cs, EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4)); - } - if (info.indexed) { /* Initialize the index buffer struct. */ pipe_resource_reference(&ib.buffer, rctx->index_buffer.buffer); @@ -1221,6 +1170,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo) /* Emit states (the function expects that we emit at most 17 dwords here). */ r600_need_cs_space(rctx, 0, TRUE); + r600_flush_emit(rctx); for (i = 0; i < R600_MAX_ATOM; i++) { if (rctx->atoms[i] == NULL || !rctx->atoms[i]->dirty) { @@ -1275,8 +1225,6 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo) (info.count_from_stream_output ? S_0287F0_USE_OPAQUE(1) : 0); } - rctx->flags |= R600_CONTEXT_DST_CACHES_DIRTY | R600_CONTEXT_DRAW_PENDING; - /* Set the depth buffer as dirty. */ if (rctx->framebuffer.zsbuf) { struct pipe_surface *surf = rctx->framebuffer.zsbuf; diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 28423e1f518..4bd77161ca9 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -3341,9 +3341,21 @@ #define S_0085F0_DB_DEST_BASE_ENA(x) (((x) & 0x1) << 14) #define G_0085F0_DB_DEST_BASE_ENA(x) (((x) >> 14) & 0x1) #define C_0085F0_DB_DEST_BASE_ENA 0xFFFFBFFF +/* r600 only start */ #define S_0085F0_CR_DEST_BASE_ENA(x) (((x) & 0x1) << 15) #define G_0085F0_CR_DEST_BASE_ENA(x) (((x) >> 15) & 0x1) #define C_0085F0_CR_DEST_BASE_ENA 0xFFFF7FFF +/* r600 only end */ +/* evergreen only start */ +#define S_0085F0_CB8_DEST_BASE_ENA(x) (((x) & 0x1) << 15) +#define G_0085F0_CB8_DEST_BASE_ENA(x) (((x) >> 15) & 0x1) +#define S_0085F0_CB9_DEST_BASE_ENA(x) (((x) & 0x1) << 16) +#define G_0085F0_CB9_DEST_BASE_ENA(x) (((x) >> 16) & 0x1) +#define S_0085F0_CB10_DEST_BASE_ENA(x) (((x) & 0x1) << 17) +#define G_0085F0_CB10_DEST_BASE_ENA(x) (((x) >> 17) & 0x1) +#define S_0085F0_CB11_DEST_BASE_ENA(x) (((x) & 0x1) << 18) +#define G_0085F0_CB11_DEST_BASE_ENA(x) (((x) >> 18) & 0x1) +/* evergreen only end */ #define S_0085F0_TC_ACTION_ENA(x) (((x) & 0x1) << 23) #define G_0085F0_TC_ACTION_ENA(x) (((x) >> 23) & 0x1) #define C_0085F0_TC_ACTION_ENA 0xFF7FFFFF