gallium: change pipe_sampler_view::first_element/last_element -> offset/size
[mesa.git] / src / gallium / drivers / radeonsi / si_state.c
index bdd7ef4a0f82417bd7478d13e0a5e55d2ee8eef1..7e63d487377689433da9cdb7c2f637c90a7b817c 100644 (file)
@@ -88,27 +88,20 @@ static unsigned si_pack_float_12p4(float x)
 /*
  * Inferred framebuffer and blender state.
  *
- * One of the reasons CB_TARGET_MASK must be derived from the framebuffer state
- * is that:
- * - The blend state mask is 0xf most of the time.
- * - The COLOR1 format isn't INVALID because of possible dual-source blending,
- *   so COLOR1 is enabled pretty much all the time.
- * So CB_TARGET_MASK is the only register that can disable COLOR1.
- *
- * Another reason is to avoid a hang with dual source blending.
+ * CB_TARGET_MASK is emitted here to avoid a hang with dual source blending
+ * if there is not enough PS outputs.
  */
 static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *atom)
 {
        struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
        struct si_state_blend *blend = sctx->queued.named.blend;
-       uint32_t cb_target_mask = 0, i;
-
-       for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++)
-               if (sctx->framebuffer.state.cbufs[i])
-                       cb_target_mask |= 0xf << (4*i);
+       uint32_t cb_target_mask, i;
 
+       /* CB_COLORn_INFO.FORMAT=INVALID disables empty colorbuffer slots. */
        if (blend)
-               cb_target_mask &= blend->cb_target_mask;
+               cb_target_mask = blend->cb_target_mask;
+       else
+               cb_target_mask = 0xffffffff;
 
        /* Avoid a hang that happens when dual source blending is enabled
         * but there is not enough color outputs. This is undefined behavior,
@@ -460,6 +453,20 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
                        S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
                        S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
 
+               /* Only set dual source blending for MRT0 to avoid a hang. */
+               if (i >= 1 && blend->dual_src_blend)
+                       continue;
+
+               /* Only addition and subtraction equations are supported with
+                * dual source blending.
+                */
+               if (blend->dual_src_blend &&
+                   (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX ||
+                    eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX)) {
+                       assert(!"Unsupported equation for dual source blending");
+                       continue;
+               }
+
                if (!state->rt[j].colormask)
                        continue;
 
@@ -572,6 +579,7 @@ static void si_bind_blend_state(struct pipe_context *ctx, void *state)
        struct si_context *sctx = (struct si_context *)ctx;
        si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state);
        si_mark_atom_dirty(sctx, &sctx->cb_render_state);
+       sctx->do_update_shaders = true;
 }
 
 static void si_delete_blend_state(struct pipe_context *ctx, void *state)
@@ -869,6 +877,7 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state)
        si_update_poly_offset_state(sctx);
 
        si_mark_atom_dirty(sctx, &sctx->clip_regs);
+       sctx->do_update_shaders = true;
 }
 
 static void si_delete_rs_state(struct pipe_context *ctx, void *state)
@@ -1018,6 +1027,7 @@ static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
                sctx->stencil_ref.dsa_part = dsa->stencil_ref;
                si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom);
        }
+       sctx->do_update_shaders = true;
 }
 
 static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
@@ -2049,6 +2059,12 @@ static void si_initialize_color_surface(struct si_context *sctx,
                S_028C70_COMP_SWAP(swap) |
                S_028C70_BLEND_CLAMP(blend_clamp) |
                S_028C70_BLEND_BYPASS(blend_bypass) |
+               S_028C70_SIMPLE_FLOAT(1) |
+               S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
+                                   ntype != V_028C70_NUMBER_SNORM &&
+                                   ntype != V_028C70_NUMBER_SRGB &&
+                                   format != V_028C70_COLOR_8_24 &&
+                                   format != V_028C70_COLOR_24_8) |
                S_028C70_NUMBER_TYPE(ntype) |
                S_028C70_ENDIAN(endian);
 
@@ -2327,17 +2343,6 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
                        vi_separate_dcc_start_query(ctx, rtex);
                }
        }
-       /* Set the second SPI format for possible dual-src blending. */
-       if (i == 1 && surf) {
-               sctx->framebuffer.spi_shader_col_format |=
-                       surf->spi_shader_col_format << (i * 4);
-               sctx->framebuffer.spi_shader_col_format_alpha |=
-                       surf->spi_shader_col_format_alpha << (i * 4);
-               sctx->framebuffer.spi_shader_col_format_blend |=
-                       surf->spi_shader_col_format_blend << (i * 4);
-               sctx->framebuffer.spi_shader_col_format_blend_alpha |=
-                       surf->spi_shader_col_format_blend_alpha << (i * 4);
-       }
 
        if (state->zsbuf) {
                surf = (struct r600_surface*)state->zsbuf;
@@ -2388,6 +2393,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
        }
 
        sctx->need_check_render_feedback = true;
+       sctx->do_update_shaders = true;
 }
 
 static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom)
@@ -2494,13 +2500,6 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
                                         tex->dcc_offset +
                                         tex->surface.level[cb->base.u.tex.level].dcc_offset) >> 8);
        }
-       /* set CB_COLOR1_INFO for possible dual-src blending */
-       if (i == 1 && state->cbufs[0] &&
-           sctx->framebuffer.dirty_cbufs & (1 << 0)) {
-               radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
-                                      cb_color_info);
-               i++;
-       }
        for (; i < 8 ; i++)
                if (sctx->framebuffer.dirty_cbufs & (1 << i))
                        radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
@@ -2569,22 +2568,31 @@ static void si_emit_msaa_sample_locs(struct si_context *sctx,
        if (nr_samples <= 1 && sctx->smoothing_enabled)
                nr_samples = SI_NUM_SMOOTH_AA_SAMPLES;
 
-       /* The small primitive filter on Polaris requires explicitly setting
-        * sample locations to 0 when MSAA is disabled.
+       /* On Polaris, the small primitive filter uses the sample locations
+        * even when MSAA is off, so we need to make sure they're set to 0.
         */
-       if (sctx->b.family >= CHIP_POLARIS10) {
-               struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
-
-               if (!sctx->smoothing_enabled &&
-                   rs && !rs->multisample_enable)
-                       nr_samples = 1;
-       }
-
        if ((nr_samples > 1 || sctx->b.family >= CHIP_POLARIS10) &&
            (nr_samples != sctx->msaa_sample_locs.nr_samples)) {
                sctx->msaa_sample_locs.nr_samples = nr_samples;
                cayman_emit_msaa_sample_locs(cs, nr_samples);
        }
+
+       if (sctx->b.family >= CHIP_POLARIS10) {
+               struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
+               unsigned small_prim_filter_cntl =
+                       S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
+                       S_028830_LINE_FILTER_DISABLE(1); /* line bug */
+
+               /* The alternative of setting sample locations to 0 would
+                * require a DB flush to avoid Z errors, see
+                * https://bugs.freedesktop.org/show_bug.cgi?id=96908
+                */
+               if (sctx->framebuffer.nr_samples > 1 && rs && !rs->multisample_enable)
+                       small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE;
+
+               radeon_set_context_reg(cs, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
+                                      small_prim_filter_cntl);
+       }
 }
 
 static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom)
@@ -2619,6 +2627,7 @@ static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
                return;
 
        sctx->ps_iter_samples = min_samples;
+       sctx->do_update_shaders = true;
 
        if (sctx->framebuffer.nr_samples > 1)
                si_mark_atom_dirty(sctx, &sctx->msaa_config);
@@ -2635,7 +2644,7 @@ static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
 void
 si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf,
                          enum pipe_format format,
-                         unsigned first_element, unsigned last_element,
+                         unsigned offset, unsigned size,
                          uint32_t *state)
 {
        const struct util_format_description *desc;
@@ -2648,11 +2657,11 @@ si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf,
        desc = util_format_description(format);
        first_non_void = util_format_get_first_non_void_channel(format);
        stride = desc->block.bits / 8;
-       va = buf->gpu_address + first_element * stride;
+       va = buf->gpu_address + offset;
        num_format = si_translate_buffer_numformat(&screen->b.b, desc, first_non_void);
        data_format = si_translate_buffer_dataformat(&screen->b.b, desc, first_non_void);
 
-       num_records = last_element + 1 - first_element;
+       num_records = size / stride;
        num_records = MIN2(num_records, buf->b.b.width0 / stride);
 
        if (screen->b.chip_class >= VI)
@@ -2951,8 +2960,8 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
                si_make_buffer_descriptor(sctx->screen,
                                          (struct r600_resource *)texture,
                                          state->format,
-                                         state->u.buf.first_element,
-                                         state->u.buf.last_element,
+                                         state->u.buf.offset,
+                                         state->u.buf.size,
                                          view->state);
 
                LIST_ADDTAIL(&view->list, &sctx->b.texture_buffers);
@@ -3001,6 +3010,8 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
                        return NULL;
                }
 
+               assert(tmp->flushed_depth_texture);
+
                /* Override format for the case where the flushed texture
                 * contains only Z or only S.
                 */
@@ -3256,6 +3267,7 @@ static void si_bind_vertex_elements(struct pipe_context *ctx, void *state)
 
        sctx->vertex_elements = v;
        sctx->vertex_buffers_dirty = true;
+       sctx->do_update_shaders = true;
 }
 
 static void si_delete_vertex_element(struct pipe_context *ctx, void *state)
@@ -3955,11 +3967,6 @@ static void si_init_config(struct si_context *sctx)
        if (sctx->b.family == CHIP_STONEY)
                si_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0);
 
-       if (sctx->b.family >= CHIP_POLARIS10)
-               si_pm4_set_reg(pm4, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
-                              S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
-                              S_028830_LINE_FILTER_DISABLE(1)); /* line bug */
-
        si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
        if (sctx->b.chip_class >= CIK)
                si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40);