From: Nicolai Hähnle Date: Wed, 6 Jul 2016 14:33:43 +0000 (+0200) Subject: radeonsi: explicitly choose center locations for 1xAA on Polaris X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=d938b8c0bf32ab5f0103ac68071c4cc467846108;p=mesa.git radeonsi: explicitly choose center locations for 1xAA on Polaris Unlike SC, the small primitive filter does not automatically use center locations in 1xAA mode, so this is needed to avoid artifacts caused by the small primitive filter discarding triangles that it shouldn't. As a side effect of how the effective number of samples is now calculated, this patch also avoids submitting the sample locations for line/poly smoothing when they're not really needed. Cc: 12.0 Reviewed-by: Marek Olšák --- diff --git a/src/gallium/drivers/radeon/cayman_msaa.c b/src/gallium/drivers/radeon/cayman_msaa.c index 89c4937a2cc..33f1040185a 100644 --- a/src/gallium/drivers/radeon/cayman_msaa.c +++ b/src/gallium/drivers/radeon/cayman_msaa.c @@ -143,6 +143,13 @@ void cayman_init_msaa(struct pipe_context *ctx) void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples) { switch (nr_samples) { + default: + case 1: + radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 0); + radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 0); + radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 0); + radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 0); + break; case 2: radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]); radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]); diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index 500eca7de63..f36a7a025d6 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -207,7 +207,8 @@ void si_begin_new_cs(struct si_context *ctx) si_mark_atom_dirty(ctx, &ctx->clip_regs); si_mark_atom_dirty(ctx, &ctx->clip_state.atom); - si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs); + ctx->msaa_sample_locs.nr_samples = 0; + si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs.atom); si_mark_atom_dirty(ctx, &ctx->msaa_config); si_mark_atom_dirty(ctx, &ctx->sample_mask.atom); si_mark_atom_dirty(ctx, &ctx->cb_render_state); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 1f63c12e5b4..326b8191475 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -181,6 +181,11 @@ struct si_clip_state { struct pipe_clip_state state; }; +struct si_sample_locs { + struct r600_atom atom; + unsigned nr_samples; +}; + struct si_sample_mask { struct r600_atom atom; uint16_t sample_mask; @@ -225,7 +230,7 @@ struct si_context { /* Atom declarations. */ struct r600_atom cache_flush; struct si_framebuffer framebuffer; - struct r600_atom msaa_sample_locs; + struct si_sample_locs msaa_sample_locs; struct r600_atom db_render_state; struct r600_atom msaa_config; struct si_sample_mask sample_mask; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 4182906a525..ee92f153db0 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -856,9 +856,13 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state) return; if (sctx->framebuffer.nr_samples > 1 && - (!old_rs || old_rs->multisample_enable != rs->multisample_enable)) + (!old_rs || old_rs->multisample_enable != rs->multisample_enable)) { si_mark_atom_dirty(sctx, &sctx->db_render_state); + if (sctx->b.family >= CHIP_POLARIS10) + si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom); + } + r600_set_scissor_enable(&sctx->b, rs->scissor_enable); si_pm4_bind_state(sctx, rasterizer, rs); @@ -2380,18 +2384,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4; si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &constbuf); - /* Smoothing (only possible with nr_samples == 1) uses the same - * sample locations as the MSAA it simulates. - * - * Therefore, don't update the sample locations when - * transitioning from no AA to smoothing-equivalent AA, and - * vice versa. - */ - if ((sctx->framebuffer.nr_samples != 1 || - old_nr_samples != SI_NUM_SMOOTH_AA_SAMPLES) && - (sctx->framebuffer.nr_samples != SI_NUM_SMOOTH_AA_SAMPLES || - old_nr_samples != 1)) - si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs); + si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom); } sctx->need_check_render_feedback = true; @@ -2570,8 +2563,28 @@ static void si_emit_msaa_sample_locs(struct si_context *sctx, struct radeon_winsys_cs *cs = sctx->b.gfx.cs; unsigned nr_samples = sctx->framebuffer.nr_samples; - cayman_emit_msaa_sample_locs(cs, nr_samples > 1 ? nr_samples : - SI_NUM_SMOOTH_AA_SAMPLES); + /* Smoothing (only possible with nr_samples == 1) uses the same + * sample locations as the MSAA it simulates. + */ + if (nr_samples <= 1 && sctx->smoothing_enabled) + nr_samples = SI_NUM_SMOOTH_AA_SAMPLES; + + /* The small primitive filter on Polaris requires explicitly setting + * sample locations to 0 when MSAA is disabled. + */ + if (sctx->b.family >= CHIP_POLARIS10) { + struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; + + if (!sctx->smoothing_enabled && + rs && !rs->multisample_enable) + nr_samples = 1; + } + + if ((nr_samples > 1 || sctx->b.family >= CHIP_POLARIS10) && + (nr_samples != sctx->msaa_sample_locs.nr_samples)) { + sctx->msaa_sample_locs.nr_samples = nr_samples; + cayman_emit_msaa_sample_locs(cs, nr_samples); + } } static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom) @@ -3402,7 +3415,7 @@ void si_init_state_functions(struct si_context *sctx) si_init_atom(sctx, &sctx->cache_flush, &sctx->atoms.s.cache_flush, si_emit_cache_flush); si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state); - si_init_atom(sctx, &sctx->msaa_sample_locs, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs); + si_init_atom(sctx, &sctx->msaa_sample_locs.atom, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs); si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state); si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config); si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 117cf4be1bc..abbe451d8e8 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -2168,6 +2168,10 @@ bool si_update_shaders(struct si_context *sctx) if (sctx->b.chip_class == SI) si_mark_atom_dirty(sctx, &sctx->db_render_state); + + if (sctx->framebuffer.nr_samples <= 1 && + sctx->b.family >= CHIP_POLARIS10) + si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom); } }