unsigned num_resident_handles;
uint64_t num_alloc_tex_transfer_bytes;
unsigned last_tex_ps_draw_ratio; /* for query */
+ unsigned context_roll_counter;
/* Queries. */
/* Maintain the list of active queries for pausing between IBs. */
(sctx->ps_shader.cso->info.colors_written & 0x3) != 0x3)
cb_target_mask = 0;
- radeon_opt_set_context_reg(sctx, R_028238_CB_TARGET_MASK,
- SI_TRACKED_CB_TARGET_MASK, cb_target_mask);
-
/* GFX9: Flush DFSM when CB_TARGET_MASK changes.
* I think we don't have to do anything between IBs.
*/
radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
}
+ unsigned initial_cdw = cs->current.cdw;
+ radeon_opt_set_context_reg(sctx, R_028238_CB_TARGET_MASK,
+ SI_TRACKED_CB_TARGET_MASK, cb_target_mask);
+
if (sctx->chip_class >= VI) {
/* DCC MSAA workaround for blending.
* Alternatively, we can set CB_COLORi_DCC_CONTROL.OVERWRITE_-
sx_ps_downconvert, sx_blend_opt_epsilon,
sx_blend_opt_control);
}
+ if (initial_cdw != cs->current.cdw)
+ sctx->context_roll_counter++;
}
/*
clipdist_mask &= rs->clip_plane_enable;
culldist_mask |= clipdist_mask;
+ unsigned initial_cdw = sctx->gfx_cs->current.cdw;
radeon_opt_set_context_reg(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
SI_TRACKED_PA_CL_VS_OUT_CNTL,
vs_sel->pa_cl_vs_out_cntl |
rs->pa_cl_clip_cntl |
ucp_mask |
S_028810_CLIP_DISABLE(window_space));
+
+ if (initial_cdw != sctx->gfx_cs->current.cdw)
+ sctx->context_roll_counter++;
}
/*
{
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
unsigned db_shader_control, db_render_control, db_count_control;
+ unsigned initial_cdw = sctx->gfx_cs->current.cdw;
/* DB_RENDER_CONTROL */
if (sctx->dbcb_depth_copy_enabled ||
radeon_opt_set_context_reg(sctx, R_02880C_DB_SHADER_CONTROL,
SI_TRACKED_DB_SHADER_CONTROL, db_shader_control);
+
+ if (initial_cdw != sctx->gfx_cs->current.cdw)
+ sctx->context_roll_counter++;
}
/*
}
}
+ unsigned initial_cdw = cs->current.cdw;
+
/* R_028BDC_PA_SC_LINE_CNTL, R_028BE0_PA_SC_AA_CONFIG */
radeon_opt_set_context_reg2(sctx, R_028BDC_PA_SC_LINE_CNTL,
SI_TRACKED_PA_SC_LINE_CNTL, sc_line_cntl,
radeon_opt_set_context_reg(sctx, R_028A4C_PA_SC_MODE_CNTL_1,
SI_TRACKED_PA_SC_MODE_CNTL_1, sc_mode_cntl_1);
- /* GFX9: Flush DFSM when the AA mode changes. */
- if (sctx->screen->dfsm_allowed) {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
+ if (initial_cdw != cs->current.cdw) {
+ sctx->context_roll_counter++;
+
+ /* GFX9: Flush DFSM when the AA mode changes. */
+ if (sctx->screen->dfsm_allowed) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
+ }
}
}
#define SI_STATE_BIT(name) (1 << SI_STATE_IDX(name))
#define SI_NUM_STATES (sizeof(union si_state) / sizeof(struct si_pm4_state *))
-static inline unsigned si_states_that_roll_context(void)
+static inline unsigned si_states_that_always_roll_context(void)
{
return (SI_STATE_BIT(blend) |
SI_STATE_BIT(rasterizer) |
SI_STATE_BIT(dsa) |
SI_STATE_BIT(poly_offset) |
- SI_STATE_BIT(es) |
- SI_STATE_BIT(gs) |
- SI_STATE_BIT(vgt_shader_config) |
- SI_STATE_BIT(vs) |
- SI_STATE_BIT(ps));
+ SI_STATE_BIT(vgt_shader_config));
}
union si_state_atoms {
sizeof(struct si_atom)))
#define SI_NUM_ATOMS (sizeof(union si_state_atoms)/sizeof(struct si_atom*))
-static inline unsigned si_atoms_that_roll_context(void)
+static inline unsigned si_atoms_that_always_roll_context(void)
{
return (SI_ATOM_BIT(streamout_begin) |
SI_ATOM_BIT(streamout_enable) |
SI_ATOM_BIT(framebuffer) |
SI_ATOM_BIT(msaa_sample_locs) |
- SI_ATOM_BIT(db_render_state) |
- SI_ATOM_BIT(dpbb_state) |
- SI_ATOM_BIT(msaa_config) |
SI_ATOM_BIT(sample_mask) |
- SI_ATOM_BIT(cb_render_state) |
SI_ATOM_BIT(blend_color) |
- SI_ATOM_BIT(clip_regs) |
SI_ATOM_BIT(clip_state) |
- SI_ATOM_BIT(guardband) |
SI_ATOM_BIT(scissors) |
SI_ATOM_BIT(viewports) |
SI_ATOM_BIT(stencil_ref) |
- SI_ATOM_BIT(spi_map) |
SI_ATOM_BIT(scratch_state));
}
static void si_emit_dpbb_disable(struct si_context *sctx)
{
+ unsigned initial_cdw = sctx->gfx_cs->current.cdw;
+
radeon_opt_set_context_reg(sctx, R_028C44_PA_SC_BINNER_CNTL_0,
SI_TRACKED_PA_SC_BINNER_CNTL_0,
S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
SI_TRACKED_DB_DFSM_CONTROL,
S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF) |
S_028060_POPS_DRAIN_PS_ON_OVERLAP(1));
+ if (initial_cdw != sctx->gfx_cs->current.cdw)
+ sctx->context_roll_counter++;
}
void si_emit_dpbb_state(struct si_context *sctx)
if (bin_size.y >= 32)
bin_size_extend.y = util_logbase2(bin_size.y) - 5;
+ unsigned initial_cdw = sctx->gfx_cs->current.cdw;
radeon_opt_set_context_reg(
sctx, R_028C44_PA_SC_BINNER_CNTL_0,
SI_TRACKED_PA_SC_BINNER_CNTL_0,
SI_TRACKED_DB_DFSM_CONTROL,
S_028060_PUNCHOUT_MODE(punchout_mode) |
S_028060_POPS_DRAIN_PS_ON_OVERLAP(1));
+ if (initial_cdw != sctx->gfx_cs->current.cdw)
+ sctx->context_roll_counter++;
}
unsigned skip_atom_mask)
{
unsigned num_patches = 0;
+ /* Vega10/Raven scissor bug workaround. When any context register is
+ * written (i.e. the GPU rolls the context), PA_SC_VPORT_SCISSOR
+ * registers must be written too.
+ */
+ bool handle_scissor_bug = (sctx->family == CHIP_VEGA10 || sctx->family == CHIP_RAVEN) &&
+ !si_is_atom_dirty(sctx, &sctx->atoms.s.scissors);
bool context_roll = false; /* set correctly for GFX9 only */
context_roll |= si_emit_rasterizer_prim_state(sctx);
if (sctx->tes_shader.cso)
context_roll |= si_emit_derived_tess_state(sctx, info, &num_patches);
- if (info->count_from_stream_output)
+
+ if (handle_scissor_bug &&
+ (info->count_from_stream_output ||
+ sctx->dirty_atoms & si_atoms_that_always_roll_context() ||
+ sctx->dirty_states & si_states_that_always_roll_context() ||
+ si_prim_restart_index_changed(sctx, info)))
context_roll = true;
- /* Vega10/Raven scissor bug workaround. When any context register is
- * written (i.e. the GPU rolls the context), PA_SC_VPORT_SCISSOR
- * registers must be written too.
- */
- if ((sctx->family == CHIP_VEGA10 || sctx->family == CHIP_RAVEN) &&
- (context_roll ||
- sctx->dirty_atoms & si_atoms_that_roll_context() ||
- sctx->dirty_states & si_states_that_roll_context() ||
- si_prim_restart_index_changed(sctx, info))) {
- sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
- si_mark_atom_dirty(sctx, &sctx->atoms.s.scissors);
- }
+ sctx->context_roll_counter = 0;
/* Emit state atoms. */
unsigned mask = sctx->dirty_atoms & ~skip_atom_mask;
}
sctx->dirty_states = 0;
+ if (handle_scissor_bug &&
+ (context_roll || sctx->context_roll_counter)) {
+ sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
+ sctx->atoms.s.scissors.emit(sctx);
+ }
+
/* Emit draw states. */
si_emit_vs_state(sctx, info);
si_emit_draw_registers(sctx, info, num_patches);
static void si_emit_shader_es(struct si_context *sctx)
{
struct si_shader *shader = sctx->queued.named.es->shader;
+ unsigned initial_cdw = sctx->gfx_cs->current.cdw;
if (!shader)
return;
radeon_opt_set_context_reg(sctx, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
shader->vgt_vertex_reuse_block_cntl);
+
+ if (initial_cdw != sctx->gfx_cs->current.cdw)
+ sctx->context_roll_counter++;
}
static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
static void si_emit_shader_gs(struct si_context *sctx)
{
struct si_shader *shader = sctx->queued.named.gs->shader;
+ unsigned initial_cdw = sctx->gfx_cs->current.cdw;
+
if (!shader)
return;
SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
shader->vgt_vertex_reuse_block_cntl);
}
+
+ if (initial_cdw != sctx->gfx_cs->current.cdw)
+ sctx->context_roll_counter++;
}
static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
static void si_emit_shader_vs(struct si_context *sctx)
{
struct si_shader *shader = sctx->queued.named.vs->shader;
+ unsigned initial_cdw = sctx->gfx_cs->current.cdw;
+
if (!shader)
return;
radeon_opt_set_context_reg(sctx, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
shader->vgt_vertex_reuse_block_cntl);
+
+ if (initial_cdw != sctx->gfx_cs->current.cdw)
+ sctx->context_roll_counter++;
}
/**
static void si_emit_shader_ps(struct si_context *sctx)
{
struct si_shader *shader = sctx->queued.named.ps->shader;
+ unsigned initial_cdw = sctx->gfx_cs->current.cdw;
+
if (!shader)
return;
radeon_opt_set_context_reg(sctx, R_02823C_CB_SHADER_MASK,
SI_TRACKED_CB_SHADER_MASK,
shader->ctx_reg.ps.cb_shader_mask);
+
+ if (initial_cdw != sctx->gfx_cs->current.cdw)
+ sctx->context_roll_counter++;
}
static void si_shader_ps(struct si_shader *shader)
/* R_028644_SPI_PS_INPUT_CNTL_0 */
/* Dota 2: Only ~16% of SPI map updates set different values. */
/* Talos: Only ~9% of SPI map updates set different values. */
+ unsigned initial_cdw = sctx->gfx_cs->current.cdw;
radeon_opt_set_context_regn(sctx, R_028644_SPI_PS_INPUT_CNTL_0,
spi_ps_input_cntl,
sctx->tracked_regs.spi_ps_input_cntl, num_interp);
+
+ if (initial_cdw != sctx->gfx_cs->current.cdw)
+ sctx->context_roll_counter++;
}
/**
* R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, R_028BEC_PA_CL_GB_VERT_DISC_ADJ
* R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ
*/
+ unsigned initial_cdw = ctx->gfx_cs->current.cdw;
radeon_opt_set_context_reg4(ctx, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ,
SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ,
fui(guardband_y), fui(discard_y),
S_028BE4_PIX_CENTER(rs->half_pixel_center) |
S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH +
vp_as_scissor.quant_mode));
+ if (initial_cdw != ctx->gfx_cs->current.cdw)
+ ctx->context_roll_counter++;
}
static void si_emit_scissors(struct si_context *ctx)