radeonsi: implement forcing per-sample_interpolation using the shader key only
authorMarek Olšák <marek.olsak@amd.com>
Sun, 3 Jan 2016 18:00:29 +0000 (19:00 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Tue, 9 Feb 2016 20:19:51 +0000 (21:19 +0100)
It was partly a state and partly emulated by shader code, but since we want
to do this in a fragment shader prolog, we need to put it into the shader
key, which will be used to generate the prolog.

This also removes the spi_ps_input states and moves the registers
to the PS state.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_hw_context.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_state.h
src/gallium/drivers/radeonsi/si_state_shaders.c

index d60c451562567583346671441bc3af8748d59ec6..b5a4034cc12c6a47cf6cd9cafdb72d28180042c4 100644 (file)
@@ -182,7 +182,6 @@ void si_begin_new_cs(struct si_context *ctx)
        si_mark_atom_dirty(ctx, &ctx->db_render_state);
        si_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);
        si_mark_atom_dirty(ctx, &ctx->spi_map);
-       si_mark_atom_dirty(ctx, &ctx->spi_ps_input);
        si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
        si_mark_atom_dirty(ctx, &ctx->b.render_cond_atom);
        si_all_descriptors_begin_new_cs(ctx);
index 48947442757cc0bda8c5544ad0640b3f081ed592..3c963db5078b1188b6c988638ccbf64996c111a6 100644 (file)
@@ -202,7 +202,6 @@ struct si_context {
        struct si_viewports             viewports;
        struct si_stencil_ref           stencil_ref;
        struct r600_atom                spi_map;
-       struct r600_atom                spi_ps_input;
 
        /* Precomputed states. */
        struct si_pm4_state             *init_config;
@@ -222,7 +221,6 @@ struct si_context {
        struct si_vertex_element        *vertex_elements;
        unsigned                        sprite_coord_enable;
        bool                            flatshade;
-       bool                            force_persample_interp;
 
        /* shader descriptors */
        struct si_descriptors           vertex_buffers;
index c595f20827403c17c2cf1a0e509109ff1826352a..0a92a7b54e60ba24251caefd02b2a6b099379923 100644 (file)
@@ -833,14 +833,11 @@ static int lookup_interp_param_index(unsigned interpolate, unsigned location)
 }
 
 /* This shouldn't be used by explicit INTERP opcodes. */
-static LLVMValueRef get_interp_param(struct si_shader_context *si_shader_ctx,
-                                    unsigned param)
+static unsigned select_interp_param(struct si_shader_context *si_shader_ctx,
+                                   unsigned param)
 {
-       struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
-       unsigned sample_param = 0;
-       LLVMValueRef default_ij, sample_ij, force_sample;
-
-       default_ij = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, param);
+       if (!si_shader_ctx->shader->key.ps.force_persample_interp)
+               return param;
 
        /* If the shader doesn't use center/centroid, just return the parameter.
         *
@@ -850,36 +847,15 @@ static LLVMValueRef get_interp_param(struct si_shader_context *si_shader_ctx,
        switch (param) {
        case SI_PARAM_PERSP_CENTROID:
        case SI_PARAM_PERSP_CENTER:
-               if (!si_shader_ctx->shader->selector->forces_persample_interp_for_persp)
-                       return default_ij;
-
-               sample_param = SI_PARAM_PERSP_SAMPLE;
-               break;
+               return SI_PARAM_PERSP_SAMPLE;
 
        case SI_PARAM_LINEAR_CENTROID:
        case SI_PARAM_LINEAR_CENTER:
-               if (!si_shader_ctx->shader->selector->forces_persample_interp_for_linear)
-                       return default_ij;
-
-               sample_param = SI_PARAM_LINEAR_SAMPLE;
-               break;
+               return SI_PARAM_LINEAR_SAMPLE;
 
        default:
-               return default_ij;
+               return param;
        }
-
-       /* Otherwise, we have to select (i,j) based on a user data SGPR. */
-       sample_ij = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, sample_param);
-
-       /* TODO: this can be done more efficiently by switching between
-        * 2 prologs.
-        */
-       force_sample = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
-                                   SI_PARAM_PS_STATE_BITS);
-       force_sample = LLVMBuildTrunc(gallivm->builder, force_sample,
-                                     LLVMInt1TypeInContext(gallivm->context), "");
-       return LLVMBuildSelect(gallivm->builder, force_sample,
-                              sample_ij, default_ij, "");
 }
 
 static void declare_input_fs(
@@ -918,8 +894,11 @@ static void declare_input_fs(
                                                     decl->Interp.Location);
        if (interp_param_idx == -1)
                return;
-       else if (interp_param_idx)
-               interp_param = get_interp_param(si_shader_ctx, interp_param_idx);
+       else if (interp_param_idx) {
+               interp_param_idx = select_interp_param(si_shader_ctx,
+                                                      interp_param_idx);
+               interp_param = LLVMGetParam(main_fn, interp_param_idx);
+       }
 
        /* fs.constant returns the param from the middle vertex, so it's not
         * really useful for flat shading. It's meant to be used for custom
@@ -3633,7 +3612,6 @@ static void create_function(struct si_shader_context *si_shader_ctx)
 
        case TGSI_PROCESSOR_FRAGMENT:
                params[SI_PARAM_ALPHA_REF] = f32;
-               params[SI_PARAM_PS_STATE_BITS] = i32;
                params[SI_PARAM_PRIM_MASK] = i32;
                last_sgpr = SI_PARAM_PRIM_MASK;
                params[SI_PARAM_PERSP_SAMPLE] = v2i32;
index d3609d463344d95d1dd2bf95cf22d248f5eda4e2..0d6a45a319a083caf30d3b5362080bf3177b584b 100644 (file)
@@ -88,7 +88,6 @@ struct radeon_shader_reloc;
 #define SI_SGPR_TCS_OUT_LAYOUT 9  /* TCS & TES only */
 #define SI_SGPR_TCS_IN_LAYOUT  10 /* TCS only */
 #define SI_SGPR_ALPHA_REF      8  /* PS only */
-#define SI_SGPR_PS_STATE_BITS  9  /* PS only */
 
 #define SI_VS_NUM_USER_SGPR    13 /* API VS */
 #define SI_ES_NUM_USER_SGPR    12 /* API VS */
@@ -97,7 +96,7 @@ struct radeon_shader_reloc;
 #define SI_TES_NUM_USER_SGPR   10
 #define SI_GS_NUM_USER_SGPR    8
 #define SI_GSCOPY_NUM_USER_SGPR        4
-#define SI_PS_NUM_USER_SGPR    10
+#define SI_PS_NUM_USER_SGPR    9
 
 /* LLVM function parameter indices */
 #define SI_PARAM_RW_BUFFERS    0
@@ -152,27 +151,23 @@ struct radeon_shader_reloc;
 
 /* PS only parameters */
 #define SI_PARAM_ALPHA_REF             4
-/* Bits:
- * 0: force_persample_interp
- */
-#define SI_PARAM_PS_STATE_BITS         5
-#define SI_PARAM_PRIM_MASK             6
-#define SI_PARAM_PERSP_SAMPLE          7
-#define SI_PARAM_PERSP_CENTER          8
-#define SI_PARAM_PERSP_CENTROID                9
-#define SI_PARAM_PERSP_PULL_MODEL      10
-#define SI_PARAM_LINEAR_SAMPLE         11
-#define SI_PARAM_LINEAR_CENTER         12
-#define SI_PARAM_LINEAR_CENTROID       13
-#define SI_PARAM_LINE_STIPPLE_TEX      14
-#define SI_PARAM_POS_X_FLOAT           15
-#define SI_PARAM_POS_Y_FLOAT           16
-#define SI_PARAM_POS_Z_FLOAT           17
-#define SI_PARAM_POS_W_FLOAT           18
-#define SI_PARAM_FRONT_FACE            19
-#define SI_PARAM_ANCILLARY             20
-#define SI_PARAM_SAMPLE_COVERAGE       21
-#define SI_PARAM_POS_FIXED_PT          22
+#define SI_PARAM_PRIM_MASK             5
+#define SI_PARAM_PERSP_SAMPLE          6
+#define SI_PARAM_PERSP_CENTER          7
+#define SI_PARAM_PERSP_CENTROID                8
+#define SI_PARAM_PERSP_PULL_MODEL      9
+#define SI_PARAM_LINEAR_SAMPLE         10
+#define SI_PARAM_LINEAR_CENTER         11
+#define SI_PARAM_LINEAR_CENTROID       12
+#define SI_PARAM_LINE_STIPPLE_TEX      13
+#define SI_PARAM_POS_X_FLOAT           14
+#define SI_PARAM_POS_Y_FLOAT           15
+#define SI_PARAM_POS_Z_FLOAT           16
+#define SI_PARAM_POS_W_FLOAT           17
+#define SI_PARAM_FRONT_FACE            18
+#define SI_PARAM_ANCILLARY             19
+#define SI_PARAM_SAMPLE_COVERAGE       20
+#define SI_PARAM_POS_FIXED_PT          21
 
 #define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 1)
 
@@ -193,14 +188,6 @@ struct si_shader_selector {
        /* PIPE_SHADER_[VERTEX|FRAGMENT|...] */
        unsigned        type;
 
-       /* Whether the shader has to use a conditional assignment to
-        * choose between weights when emulating
-        * pipe_rasterizer_state::force_persample_interp.
-        * If false, "si_emit_spi_ps_input" will take care of it instead.
-        */
-       bool            forces_persample_interp_for_persp;
-       bool            forces_persample_interp_for_linear;
-
        /* GS parameters. */
        unsigned        esgs_itemsize;
        unsigned        gs_input_verts_per_prim;
@@ -245,6 +232,7 @@ union si_shader_key {
                unsigned        poly_stipple:1;
                unsigned        poly_line_smoothing:1;
                unsigned        clamp_color:1;
+               unsigned        force_persample_interp:1;
        } ps;
        struct {
                unsigned        instance_divisors[SI_NUM_VERTEX_BUFFERS];
index 507f45938ce275fbdb4415798e47bddc82d9d418..e9a017534d1b08d52220516753f2aacbbcee8d69 100644 (file)
@@ -133,7 +133,6 @@ union si_state_atoms {
                struct r600_atom *viewports;
                struct r600_atom *stencil_ref;
                struct r600_atom *spi_map;
-               struct r600_atom *spi_ps_input;
        } s;
        struct r600_atom *array[0];
 };
index 6e7311807ddd7dd2c87e0c968c855b082041c5a7..59511c67ed02398eab933d87c738a1eee1da09ad 100644 (file)
@@ -472,6 +472,17 @@ static void si_shader_ps(struct si_shader *shader)
        unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
        uint64_t va;
        bool has_centroid;
+       unsigned input_ena = shader->config.spi_ps_input_ena;
+
+       /* we need to enable at least one of them, otherwise we hang the GPU */
+       assert(G_0286CC_PERSP_SAMPLE_ENA(input_ena) ||
+              G_0286CC_PERSP_CENTER_ENA(input_ena) ||
+              G_0286CC_PERSP_CENTROID_ENA(input_ena) ||
+              G_0286CC_PERSP_PULL_MODEL_ENA(input_ena) ||
+              G_0286CC_LINEAR_SAMPLE_ENA(input_ena) ||
+              G_0286CC_LINEAR_CENTER_ENA(input_ena) ||
+              G_0286CC_LINEAR_CENTROID_ENA(input_ena) ||
+              G_0286CC_LINE_STIPPLE_TEX_ENA(input_ena));
 
        pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
 
@@ -515,6 +526,9 @@ static void si_shader_ps(struct si_shader *shader)
             shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS))
                spi_shader_col_format = V_028714_SPI_SHADER_32_R;
 
+       si_pm4_set_reg(pm4, R_0286CC_SPI_PS_INPUT_ENA, input_ena);
+       si_pm4_set_reg(pm4, R_0286D0_SPI_PS_INPUT_ADDR, input_ena);
+
        /* Set interpolation controls. */
        has_centroid = G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena) ||
                       G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena);
@@ -706,6 +720,15 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
                                                       (is_line && rs->line_smooth)) &&
                                                      sctx->framebuffer.nr_samples <= 1;
                        key->ps.clamp_color = rs->clamp_fragment_color;
+
+                       key->ps.force_persample_interp = rs->force_persample_interp &&
+                                                        rs->multisample_enable &&
+                                                        sctx->framebuffer.nr_samples > 1 &&
+                                                        sctx->ps_iter_samples > 1 &&
+                                                        (sel->info.uses_persp_center ||
+                                                         sel->info.uses_persp_centroid ||
+                                                         sel->info.uses_linear_center ||
+                                                         sel->info.uses_linear_centroid);
                }
 
                key->ps.alpha_func = si_get_alpha_test_func(sctx);
@@ -808,7 +831,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
        sel->type = util_pipe_shader_from_tgsi_processor(sel->info.processor);
        p_atomic_inc(&sscreen->b.num_shaders_created);
 
-       /* First set which opcode uses which (i,j) pair. */
+       /* Set which opcode uses which (i,j) pair. */
        if (sel->info.uses_persp_opcode_interp_centroid)
                sel->info.uses_persp_centroid = true;
 
@@ -823,19 +846,6 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
            sel->info.uses_linear_opcode_interp_sample)
                sel->info.uses_linear_center = true;
 
-       /* Determine if the shader has to use a conditional assignment when
-        * emulating force_persample_interp.
-        */
-       sel->forces_persample_interp_for_persp =
-               sel->info.uses_persp_center +
-               sel->info.uses_persp_centroid +
-               sel->info.uses_persp_sample >= 2;
-
-       sel->forces_persample_interp_for_linear =
-               sel->info.uses_linear_center +
-               sel->info.uses_linear_centroid +
-               sel->info.uses_linear_sample >= 2;
-
        switch (sel->type) {
        case PIPE_SHADER_GEOMETRY:
                sel->gs_output_prim =
@@ -1181,68 +1191,6 @@ static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom)
        assert(num_interp == num_written);
 }
 
-static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom)
-{
-       struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
-       struct si_shader *ps = sctx->ps_shader.current;
-       unsigned input_ena;
-
-       if (!ps)
-               return;
-
-       input_ena = ps->config.spi_ps_input_ena;
-
-       /* we need to enable at least one of them, otherwise we hang the GPU */
-       assert(G_0286CC_PERSP_SAMPLE_ENA(input_ena) ||
-           G_0286CC_PERSP_CENTER_ENA(input_ena) ||
-           G_0286CC_PERSP_CENTROID_ENA(input_ena) ||
-           G_0286CC_PERSP_PULL_MODEL_ENA(input_ena) ||
-           G_0286CC_LINEAR_SAMPLE_ENA(input_ena) ||
-           G_0286CC_LINEAR_CENTER_ENA(input_ena) ||
-           G_0286CC_LINEAR_CENTROID_ENA(input_ena) ||
-           G_0286CC_LINE_STIPPLE_TEX_ENA(input_ena));
-
-       if (sctx->force_persample_interp) {
-               unsigned num_persp = G_0286CC_PERSP_SAMPLE_ENA(input_ena) +
-                                    G_0286CC_PERSP_CENTER_ENA(input_ena) +
-                                    G_0286CC_PERSP_CENTROID_ENA(input_ena);
-               unsigned num_linear = G_0286CC_LINEAR_SAMPLE_ENA(input_ena) +
-                                     G_0286CC_LINEAR_CENTER_ENA(input_ena) +
-                                     G_0286CC_LINEAR_CENTROID_ENA(input_ena);
-
-               /* If only one set of (i,j) coordinates is used, we can disable
-                * CENTER/CENTROID, enable SAMPLE and it will load SAMPLE coordinates
-                * where CENTER/CENTROID are expected, effectively forcing per-sample
-                * interpolation.
-                */
-               if (num_persp == 1) {
-                       input_ena &= C_0286CC_PERSP_CENTER_ENA;
-                       input_ena &= C_0286CC_PERSP_CENTROID_ENA;
-                       input_ena |= G_0286CC_PERSP_SAMPLE_ENA(1);
-               }
-               if (num_linear == 1) {
-                       input_ena &= C_0286CC_LINEAR_CENTER_ENA;
-                       input_ena &= C_0286CC_LINEAR_CENTROID_ENA;
-                       input_ena |= G_0286CC_LINEAR_SAMPLE_ENA(1);
-               }
-
-               /* If at least 2 sets of coordinates are used, we can't use this
-                * trick and have to select SAMPLE using a conditional assignment
-                * in the shader with "force_persample_interp" being a shader constant.
-                */
-       }
-
-       radeon_set_context_reg_seq(cs, R_0286CC_SPI_PS_INPUT_ENA, 2);
-       radeon_emit(cs, input_ena);
-       radeon_emit(cs, input_ena);
-
-       if (ps->selector->forces_persample_interp_for_persp ||
-           ps->selector->forces_persample_interp_for_linear)
-               radeon_set_sh_reg(cs, R_00B030_SPI_SHADER_USER_DATA_PS_0 +
-                                     SI_SGPR_PS_STATE_BITS * 4,
-                                 sctx->force_persample_interp);
-}
-
 /**
  * Writing CONFIG or UCONFIG VGT registers requires VGT_FLUSH before that.
  */
@@ -1774,12 +1722,6 @@ bool si_update_shaders(struct si_context *sctx)
                        si_mark_atom_dirty(sctx, &sctx->spi_map);
                }
 
-               if (si_pm4_state_changed(sctx, ps) ||
-                   sctx->force_persample_interp != rs->force_persample_interp) {
-                       sctx->force_persample_interp = rs->force_persample_interp;
-                       si_mark_atom_dirty(sctx, &sctx->spi_ps_input);
-               }
-
                if (sctx->b.family == CHIP_STONEY && si_pm4_state_changed(sctx, ps))
                        si_mark_atom_dirty(sctx, &sctx->cb_render_state);
 
@@ -1812,7 +1754,6 @@ bool si_update_shaders(struct si_context *sctx)
 void si_init_shader_functions(struct si_context *sctx)
 {
        si_init_atom(sctx, &sctx->spi_map, &sctx->atoms.s.spi_map, si_emit_spi_map);
-       si_init_atom(sctx, &sctx->spi_ps_input, &sctx->atoms.s.spi_ps_input, si_emit_spi_ps_input);
 
        sctx->b.b.create_vs_state = si_create_shader_selector;
        sctx->b.b.create_tcs_state = si_create_shader_selector;