/*
* Inferred framebuffer and blender state.
*
- * One of the reasons CB_TARGET_MASK must be derived from the framebuffer state
- * is that:
- * - The blend state mask is 0xf most of the time.
- * - The COLOR1 format isn't INVALID because of possible dual-source blending,
- * so COLOR1 is enabled pretty much all the time.
- * So CB_TARGET_MASK is the only register that can disable COLOR1.
- *
- * Another reason is to avoid a hang with dual source blending.
+ * CB_TARGET_MASK is emitted here to avoid a hang with dual source blending
+ * if there is not enough PS outputs.
*/
static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *atom)
{
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
struct si_state_blend *blend = sctx->queued.named.blend;
- uint32_t cb_target_mask = 0, i;
-
- for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++)
- if (sctx->framebuffer.state.cbufs[i])
- cb_target_mask |= 0xf << (4*i);
+ uint32_t cb_target_mask, i;
+ /* CB_COLORn_INFO.FORMAT=INVALID disables empty colorbuffer slots. */
if (blend)
- cb_target_mask &= blend->cb_target_mask;
+ cb_target_mask = blend->cb_target_mask;
+ else
+ cb_target_mask = 0xffffffff;
/* Avoid a hang that happens when dual source blending is enabled
* but there is not enough color outputs. This is undefined behavior,
S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
+ /* Only set dual source blending for MRT0 to avoid a hang. */
+ if (i >= 1 && blend->dual_src_blend)
+ continue;
+
+ /* Only addition and subtraction equations are supported with
+ * dual source blending.
+ */
+ if (blend->dual_src_blend &&
+ (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX ||
+ eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX)) {
+ assert(!"Unsupported equation for dual source blending");
+ continue;
+ }
+
if (!state->rt[j].colormask)
continue;
struct si_context *sctx = (struct si_context *)ctx;
si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state);
si_mark_atom_dirty(sctx, &sctx->cb_render_state);
+ sctx->do_update_shaders = true;
}
static void si_delete_blend_state(struct pipe_context *ctx, void *state)
return;
if (sctx->framebuffer.nr_samples > 1 &&
- (!old_rs || old_rs->multisample_enable != rs->multisample_enable))
+ (!old_rs || old_rs->multisample_enable != rs->multisample_enable)) {
si_mark_atom_dirty(sctx, &sctx->db_render_state);
+ if (sctx->b.family >= CHIP_POLARIS10)
+ si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom);
+ }
+
r600_set_scissor_enable(&sctx->b, rs->scissor_enable);
si_pm4_bind_state(sctx, rasterizer, rs);
si_update_poly_offset_state(sctx);
si_mark_atom_dirty(sctx, &sctx->clip_regs);
+ sctx->do_update_shaders = true;
}
static void si_delete_rs_state(struct pipe_context *ctx, void *state)
sctx->stencil_ref.dsa_part = dsa->stencil_ref;
si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom);
}
+ sctx->do_update_shaders = true;
}
static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
S_028C70_COMP_SWAP(swap) |
S_028C70_BLEND_CLAMP(blend_clamp) |
S_028C70_BLEND_BYPASS(blend_bypass) |
+ S_028C70_SIMPLE_FLOAT(1) |
+ S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
+ ntype != V_028C70_NUMBER_SNORM &&
+ ntype != V_028C70_NUMBER_SRGB &&
+ format != V_028C70_COLOR_8_24 &&
+ format != V_028C70_COLOR_24_8) |
S_028C70_NUMBER_TYPE(ntype) |
S_028C70_ENDIAN(endian);
vi_separate_dcc_start_query(ctx, rtex);
}
}
- /* Set the second SPI format for possible dual-src blending. */
- if (i == 1 && surf) {
- sctx->framebuffer.spi_shader_col_format |=
- surf->spi_shader_col_format << (i * 4);
- sctx->framebuffer.spi_shader_col_format_alpha |=
- surf->spi_shader_col_format_alpha << (i * 4);
- sctx->framebuffer.spi_shader_col_format_blend |=
- surf->spi_shader_col_format_blend << (i * 4);
- sctx->framebuffer.spi_shader_col_format_blend_alpha |=
- surf->spi_shader_col_format_blend_alpha << (i * 4);
- }
if (state->zsbuf) {
surf = (struct r600_surface*)state->zsbuf;
constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4;
si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &constbuf);
- /* Smoothing (only possible with nr_samples == 1) uses the same
- * sample locations as the MSAA it simulates.
- *
- * Therefore, don't update the sample locations when
- * transitioning from no AA to smoothing-equivalent AA, and
- * vice versa.
- */
- if ((sctx->framebuffer.nr_samples != 1 ||
- old_nr_samples != SI_NUM_SMOOTH_AA_SAMPLES) &&
- (sctx->framebuffer.nr_samples != SI_NUM_SMOOTH_AA_SAMPLES ||
- old_nr_samples != 1))
- si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs);
+ si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom);
}
sctx->need_check_render_feedback = true;
+ sctx->do_update_shaders = true;
}
static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom)
tex->dcc_offset +
tex->surface.level[cb->base.u.tex.level].dcc_offset) >> 8);
}
- /* set CB_COLOR1_INFO for possible dual-src blending */
- if (i == 1 && state->cbufs[0] &&
- sctx->framebuffer.dirty_cbufs & (1 << 0)) {
- radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
- cb_color_info);
- i++;
- }
for (; i < 8 ; i++)
if (sctx->framebuffer.dirty_cbufs & (1 << i))
radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
unsigned nr_samples = sctx->framebuffer.nr_samples;
- cayman_emit_msaa_sample_locs(cs, nr_samples > 1 ? nr_samples :
- SI_NUM_SMOOTH_AA_SAMPLES);
+ /* Smoothing (only possible with nr_samples == 1) uses the same
+ * sample locations as the MSAA it simulates.
+ */
+ if (nr_samples <= 1 && sctx->smoothing_enabled)
+ nr_samples = SI_NUM_SMOOTH_AA_SAMPLES;
+
+ /* On Polaris, the small primitive filter uses the sample locations
+ * even when MSAA is off, so we need to make sure they're set to 0.
+ */
+ if ((nr_samples > 1 || sctx->b.family >= CHIP_POLARIS10) &&
+ (nr_samples != sctx->msaa_sample_locs.nr_samples)) {
+ sctx->msaa_sample_locs.nr_samples = nr_samples;
+ cayman_emit_msaa_sample_locs(cs, nr_samples);
+ }
+
+ if (sctx->b.family >= CHIP_POLARIS10) {
+ struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
+ unsigned small_prim_filter_cntl =
+ S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
+ S_028830_LINE_FILTER_DISABLE(1); /* line bug */
+
+ /* The alternative of setting sample locations to 0 would
+ * require a DB flush to avoid Z errors, see
+ * https://bugs.freedesktop.org/show_bug.cgi?id=96908
+ */
+ if (sctx->framebuffer.nr_samples > 1 && rs && !rs->multisample_enable)
+ small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE;
+
+ radeon_set_context_reg(cs, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
+ small_prim_filter_cntl);
+ }
}
static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom)
return;
sctx->ps_iter_samples = min_samples;
+ sctx->do_update_shaders = true;
if (sctx->framebuffer.nr_samples > 1)
si_mark_atom_dirty(sctx, &sctx->msaa_config);
void
si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf,
enum pipe_format format,
- unsigned first_element, unsigned last_element,
+ unsigned offset, unsigned size,
uint32_t *state)
{
const struct util_format_description *desc;
desc = util_format_description(format);
first_non_void = util_format_get_first_non_void_channel(format);
stride = desc->block.bits / 8;
- va = buf->gpu_address + first_element * stride;
+ va = buf->gpu_address + offset;
num_format = si_translate_buffer_numformat(&screen->b.b, desc, first_non_void);
data_format = si_translate_buffer_dataformat(&screen->b.b, desc, first_non_void);
- num_records = last_element + 1 - first_element;
+ num_records = size / stride;
num_records = MIN2(num_records, buf->b.b.width0 / stride);
if (screen->b.chip_class >= VI)
si_make_buffer_descriptor(sctx->screen,
(struct r600_resource *)texture,
state->format,
- state->u.buf.first_element,
- state->u.buf.last_element,
+ state->u.buf.offset,
+ state->u.buf.size,
view->state);
LIST_ADDTAIL(&view->list, &sctx->b.texture_buffers);
/* Texturing with separate depth and stencil. */
pipe_format = state->format;
+
+ /* Depth/stencil texturing sometimes needs separate texture. */
+ if (tmp->is_depth && !r600_can_sample_zs(tmp, view->is_stencil_sampler)) {
+ if (!tmp->flushed_depth_texture &&
+ !r600_init_flushed_depth_texture(ctx, texture, NULL)) {
+ pipe_resource_reference(&view->base.texture, NULL);
+ FREE(view);
+ return NULL;
+ }
+
+ assert(tmp->flushed_depth_texture);
+
+ /* Override format for the case where the flushed texture
+ * contains only Z or only S.
+ */
+ if (tmp->flushed_depth_texture->resource.b.b.format != tmp->resource.b.b.format)
+ pipe_format = tmp->flushed_depth_texture->resource.b.b.format;
+
+ tmp = tmp->flushed_depth_texture;
+ }
+
surflevel = tmp->surface.level;
- if (tmp->is_depth && !tmp->is_flushing_texture) {
+ if (tmp->db_compatible) {
switch (pipe_format) {
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
pipe_format = PIPE_FORMAT_Z32_FLOAT;
break;
case PIPE_FORMAT_X8Z24_UNORM:
case PIPE_FORMAT_S8_UINT_Z24_UNORM:
- /* Z24 is always stored like this. */
+ /* Z24 is always stored like this for DB
+ * compatibility.
+ */
pipe_format = PIPE_FORMAT_Z24X8_UNORM;
break;
case PIPE_FORMAT_X24S8_UINT:
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
unsigned mask = sctx->sample_mask.sample_mask;
+ /* Needed for line and polygon smoothing as well as for the Polaris
+ * small primitive filter. We expect the state tracker to take care of
+ * this for us.
+ */
+ assert(mask == 0xffff || sctx->framebuffer.nr_samples > 1 ||
+ (mask & 1 && sctx->blitter->running));
+
radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
radeon_emit(cs, mask | (mask << 16));
radeon_emit(cs, mask | (mask << 16));
sctx->vertex_elements = v;
sctx->vertex_buffers_dirty = true;
+ sctx->do_update_shaders = true;
}
static void si_delete_vertex_element(struct pipe_context *ctx, void *state)
si_init_atom(sctx, &sctx->cache_flush, &sctx->atoms.s.cache_flush, si_emit_cache_flush);
si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state);
- si_init_atom(sctx, &sctx->msaa_sample_locs, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs);
+ si_init_atom(sctx, &sctx->msaa_sample_locs.atom, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs);
si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state);
si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config);
si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask);
if (sctx->b.family == CHIP_STONEY)
si_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0);
- if (sctx->b.family >= CHIP_POLARIS10)
- si_pm4_set_reg(pm4, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
- S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
- S_028830_LINE_FILTER_DISABLE(1)); /* line bug */
-
si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
if (sctx->b.chip_class >= CIK)
si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40);