/*
* Inferred framebuffer and blender state.
*
- * One of the reasons CB_TARGET_MASK must be derived from the framebuffer state
- * is that:
- * - The blend state mask is 0xf most of the time.
- * - The COLOR1 format isn't INVALID because of possible dual-source blending,
- * so COLOR1 is enabled pretty much all the time.
- * So CB_TARGET_MASK is the only register that can disable COLOR1.
- *
- * Another reason is to avoid a hang with dual source blending.
+ * CB_TARGET_MASK is emitted here to avoid a hang with dual source blending
+ * if there is not enough PS outputs.
*/
static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *atom)
{
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
struct si_state_blend *blend = sctx->queued.named.blend;
- uint32_t cb_target_mask = 0, i;
-
- for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++)
- if (sctx->framebuffer.state.cbufs[i])
- cb_target_mask |= 0xf << (4*i);
+ uint32_t cb_target_mask, i;
+ /* CB_COLORn_INFO.FORMAT=INVALID disables empty colorbuffer slots. */
if (blend)
- cb_target_mask &= blend->cb_target_mask;
+ cb_target_mask = blend->cb_target_mask;
+ else
+ cb_target_mask = 0xffffffff;
/* Avoid a hang that happens when dual source blending is enabled
* but there is not enough color outputs. This is undefined behavior,
S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
+ /* Only set dual source blending for MRT0 to avoid a hang. */
+ if (i >= 1 && blend->dual_src_blend)
+ continue;
+
+ /* Only addition and subtraction equations are supported with
+ * dual source blending.
+ */
+ if (blend->dual_src_blend &&
+ (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX ||
+ eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX)) {
+ assert(!"Unsupported equation for dual source blending");
+ continue;
+ }
+
if (!state->rt[j].colormask)
continue;
struct si_context *sctx = (struct si_context *)ctx;
si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state);
si_mark_atom_dirty(sctx, &sctx->cb_render_state);
+ sctx->do_update_shaders = true;
}
static void si_delete_blend_state(struct pipe_context *ctx, void *state)
si_update_poly_offset_state(sctx);
si_mark_atom_dirty(sctx, &sctx->clip_regs);
+ sctx->do_update_shaders = true;
}
static void si_delete_rs_state(struct pipe_context *ctx, void *state)
sctx->stencil_ref.dsa_part = dsa->stencil_ref;
si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom);
}
+ sctx->do_update_shaders = true;
}
static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
S_028C70_COMP_SWAP(swap) |
S_028C70_BLEND_CLAMP(blend_clamp) |
S_028C70_BLEND_BYPASS(blend_bypass) |
+ S_028C70_SIMPLE_FLOAT(1) |
+ S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
+ ntype != V_028C70_NUMBER_SNORM &&
+ ntype != V_028C70_NUMBER_SRGB &&
+ format != V_028C70_COLOR_8_24 &&
+ format != V_028C70_COLOR_24_8) |
S_028C70_NUMBER_TYPE(ntype) |
S_028C70_ENDIAN(endian);
vi_separate_dcc_start_query(ctx, rtex);
}
}
- /* Set the second SPI format for possible dual-src blending. */
- if (i == 1 && surf) {
- sctx->framebuffer.spi_shader_col_format |=
- surf->spi_shader_col_format << (i * 4);
- sctx->framebuffer.spi_shader_col_format_alpha |=
- surf->spi_shader_col_format_alpha << (i * 4);
- sctx->framebuffer.spi_shader_col_format_blend |=
- surf->spi_shader_col_format_blend << (i * 4);
- sctx->framebuffer.spi_shader_col_format_blend_alpha |=
- surf->spi_shader_col_format_blend_alpha << (i * 4);
- }
if (state->zsbuf) {
surf = (struct r600_surface*)state->zsbuf;
}
sctx->need_check_render_feedback = true;
+ sctx->do_update_shaders = true;
}
static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom)
tex->dcc_offset +
tex->surface.level[cb->base.u.tex.level].dcc_offset) >> 8);
}
- /* set CB_COLOR1_INFO for possible dual-src blending */
- if (i == 1 && state->cbufs[0] &&
- sctx->framebuffer.dirty_cbufs & (1 << 0)) {
- radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
- cb_color_info);
- i++;
- }
for (; i < 8 ; i++)
if (sctx->framebuffer.dirty_cbufs & (1 << i))
radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
if (nr_samples <= 1 && sctx->smoothing_enabled)
nr_samples = SI_NUM_SMOOTH_AA_SAMPLES;
- /* The small primitive filter on Polaris requires explicitly setting
- * sample locations to 0 when MSAA is disabled.
+ /* On Polaris, the small primitive filter uses the sample locations
+ * even when MSAA is off, so we need to make sure they're set to 0.
*/
- if (sctx->b.family >= CHIP_POLARIS10) {
- struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
-
- if (!sctx->smoothing_enabled &&
- rs && !rs->multisample_enable)
- nr_samples = 1;
- }
-
if ((nr_samples > 1 || sctx->b.family >= CHIP_POLARIS10) &&
(nr_samples != sctx->msaa_sample_locs.nr_samples)) {
sctx->msaa_sample_locs.nr_samples = nr_samples;
cayman_emit_msaa_sample_locs(cs, nr_samples);
}
+
+ if (sctx->b.family >= CHIP_POLARIS10) {
+ struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
+ unsigned small_prim_filter_cntl =
+ S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
+ S_028830_LINE_FILTER_DISABLE(1); /* line bug */
+
+ /* The alternative of setting sample locations to 0 would
+ * require a DB flush to avoid Z errors, see
+ * https://bugs.freedesktop.org/show_bug.cgi?id=96908
+ */
+ if (sctx->framebuffer.nr_samples > 1 && rs && !rs->multisample_enable)
+ small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE;
+
+ radeon_set_context_reg(cs, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
+ small_prim_filter_cntl);
+ }
}
static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom)
return;
sctx->ps_iter_samples = min_samples;
+ sctx->do_update_shaders = true;
if (sctx->framebuffer.nr_samples > 1)
si_mark_atom_dirty(sctx, &sctx->msaa_config);
void
si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf,
enum pipe_format format,
- unsigned first_element, unsigned last_element,
+ unsigned offset, unsigned size,
uint32_t *state)
{
const struct util_format_description *desc;
desc = util_format_description(format);
first_non_void = util_format_get_first_non_void_channel(format);
stride = desc->block.bits / 8;
- va = buf->gpu_address + first_element * stride;
+ va = buf->gpu_address + offset;
num_format = si_translate_buffer_numformat(&screen->b.b, desc, first_non_void);
data_format = si_translate_buffer_dataformat(&screen->b.b, desc, first_non_void);
- num_records = last_element + 1 - first_element;
+ num_records = size / stride;
num_records = MIN2(num_records, buf->b.b.width0 / stride);
if (screen->b.chip_class >= VI)
si_make_buffer_descriptor(sctx->screen,
(struct r600_resource *)texture,
state->format,
- state->u.buf.first_element,
- state->u.buf.last_element,
+ state->u.buf.offset,
+ state->u.buf.size,
view->state);
LIST_ADDTAIL(&view->list, &sctx->b.texture_buffers);
return NULL;
}
+ assert(tmp->flushed_depth_texture);
+
/* Override format for the case where the flushed texture
* contains only Z or only S.
*/
sctx->vertex_elements = v;
sctx->vertex_buffers_dirty = true;
+ sctx->do_update_shaders = true;
}
static void si_delete_vertex_element(struct pipe_context *ctx, void *state)
if (sctx->b.family == CHIP_STONEY)
si_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0);
- if (sctx->b.family >= CHIP_POLARIS10)
- si_pm4_set_reg(pm4, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
- S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
- S_028830_LINE_FILTER_DISABLE(1)); /* line bug */
-
si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
if (sctx->b.chip_class >= CIK)
si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40);