radeonsi: add PS prolog
authorMarek Olšák <marek.olsak@amd.com>
Mon, 15 Feb 2016 22:57:54 +0000 (23:57 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Sun, 21 Feb 2016 20:08:58 +0000 (21:08 +0100)
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_state_shaders.c

index 3c87dd690f7d469e40141b0f3e58938fec8e3512..8bfaf85df5bf02cb5b4de0212561cc582b57f4e1 100644 (file)
@@ -542,6 +542,7 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
                sscreen->vs_prologs,
                sscreen->vs_epilogs,
                sscreen->tcs_epilogs,
+               sscreen->ps_prologs,
                sscreen->ps_epilogs
        };
        unsigned i;
index 5d204ec6462eba6865552dbd20a2bcd506fa140b..1ac7bc4bd853be8a237e82fd15d9c95c50a4185f 100644 (file)
@@ -92,6 +92,7 @@ struct si_screen {
        struct si_shader_part           *vs_prologs;
        struct si_shader_part           *vs_epilogs;
        struct si_shader_part           *tcs_epilogs;
+       struct si_shader_part           *ps_prologs;
        struct si_shader_part           *ps_epilogs;
 };
 
index 8e50d06b30fb48e82218887955d2829634b61737..bb30c0f5019685313bbf81e54277f0755136ef39 100644 (file)
@@ -879,7 +879,8 @@ static int lookup_interp_param_index(unsigned interpolate, unsigned location)
 static unsigned select_interp_param(struct si_shader_context *ctx,
                                    unsigned param)
 {
-       if (!ctx->shader->key.ps.prolog.force_persample_interp)
+       if (!ctx->shader->key.ps.prolog.force_persample_interp ||
+           !ctx->is_monolithic)
                return param;
 
        /* If the shader doesn't use center/centroid, just return the parameter.
@@ -1023,6 +1024,7 @@ static void declare_input_fs(
        unsigned input_index,
        const struct tgsi_full_declaration *decl)
 {
+       struct lp_build_context *base = &radeon_bld->soa.bld_base.base;
        struct si_shader_context *ctx =
                si_shader_context(&radeon_bld->soa.bld_base);
        struct si_shader *shader = ctx->shader;
@@ -1030,6 +1032,26 @@ static void declare_input_fs(
        LLVMValueRef interp_param = NULL;
        int interp_param_idx;
 
+       /* Get colors from input VGPRs (set by the prolog). */
+       if (!ctx->is_monolithic &&
+           decl->Semantic.Name == TGSI_SEMANTIC_COLOR) {
+               unsigned i = decl->Semantic.Index;
+               unsigned colors_read = shader->selector->info.colors_read;
+               unsigned mask = colors_read >> (i * 4);
+               unsigned offset = SI_PARAM_POS_FIXED_PT + 1 +
+                                 (i ? util_bitcount(colors_read & 0xf) : 0);
+
+               radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 0)] =
+                       mask & 0x1 ? LLVMGetParam(main_fn, offset++) : base->undef;
+               radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 1)] =
+                       mask & 0x2 ? LLVMGetParam(main_fn, offset++) : base->undef;
+               radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 2)] =
+                       mask & 0x4 ? LLVMGetParam(main_fn, offset++) : base->undef;
+               radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 3)] =
+                       mask & 0x8 ? LLVMGetParam(main_fn, offset++) : base->undef;
+               return;
+       }
+
        interp_param_idx = lookup_interp_param_index(decl->Interp.Interpolate,
                                                     decl->Interp.Location);
        if (interp_param_idx == -1)
@@ -3970,6 +3992,16 @@ static void create_function(struct si_shader_context *ctx)
                num_params = SI_PARAM_POS_FIXED_PT+1;
 
                if (!ctx->is_monolithic) {
+                       /* Color inputs from the prolog. */
+                       if (shader->selector->info.colors_read) {
+                               unsigned num_color_elements =
+                                       util_bitcount(shader->selector->info.colors_read);
+
+                               assert(num_params + num_color_elements <= ARRAY_SIZE(params));
+                               for (i = 0; i < num_color_elements; i++)
+                                       params[num_params++] = ctx->f32;
+                       }
+
                        /* Outputs for the epilog. */
                        num_return_sgprs = SI_SGPR_ALPHA_REF + 1;
                        num_returns =
@@ -4001,6 +4033,20 @@ static void create_function(struct si_shader_context *ctx)
        si_create_function(ctx, returns, num_returns, params,
                           num_params, last_array_pointer, last_sgpr);
 
+       /* Reserve register locations for VGPR inputs the PS prolog may need. */
+       if (ctx->type == TGSI_PROCESSOR_FRAGMENT &&
+           !ctx->is_monolithic) {
+               radeon_llvm_add_attribute(ctx->radeon_bld.main_fn,
+                                         "InitialPSInputAddr",
+                                         S_0286D0_PERSP_SAMPLE_ENA(1) |
+                                         S_0286D0_PERSP_CENTER_ENA(1) |
+                                         S_0286D0_PERSP_CENTROID_ENA(1) |
+                                         S_0286D0_LINEAR_SAMPLE_ENA(1) |
+                                         S_0286D0_LINEAR_CENTER_ENA(1) |
+                                         S_0286D0_LINEAR_CENTROID_ENA(1) |
+                                         S_0286D0_FRONT_FACE_ENA(1));
+       }
+
        shader->num_input_sgprs = 0;
        shader->num_input_vgprs = 0;
 
@@ -5304,6 +5350,157 @@ static bool si_shader_select_tcs_parts(struct si_screen *sscreen,
        return shader->epilog != NULL;
 }
 
+/**
+ * Compile the pixel shader prolog. This handles:
+ * - two-side color selection and interpolation
+ * - overriding interpolation parameters for the API PS
+ * - polygon stippling
+ *
+ * All preloaded SGPRs and VGPRs are passed through unmodified unless they are
+ * overriden by other states. (e.g. per-sample interpolation)
+ * Interpolated colors are stored after the preloaded VGPRs.
+ */
+static bool si_compile_ps_prolog(struct si_screen *sscreen,
+                                LLVMTargetMachineRef tm,
+                                struct pipe_debug_callback *debug,
+                                struct si_shader_part *out)
+{
+       union si_shader_part_key *key = &out->key;
+       struct si_shader shader = {};
+       struct si_shader_context ctx;
+       struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm;
+       LLVMTypeRef *params;
+       LLVMValueRef ret, func;
+       int last_sgpr, num_params, num_returns, i, num_color_channels;
+       bool status = true;
+
+       si_init_shader_ctx(&ctx, sscreen, &shader, tm, NULL);
+       ctx.type = TGSI_PROCESSOR_FRAGMENT;
+       shader.key.ps.prolog = key->ps_prolog.states;
+
+       /* Number of inputs + 8 color elements. */
+       params = alloca((key->ps_prolog.num_input_sgprs +
+                        key->ps_prolog.num_input_vgprs + 8) *
+                       sizeof(LLVMTypeRef));
+
+       /* Declare inputs. */
+       num_params = 0;
+       for (i = 0; i < key->ps_prolog.num_input_sgprs; i++)
+               params[num_params++] = ctx.i32;
+       last_sgpr = num_params - 1;
+
+       for (i = 0; i < key->ps_prolog.num_input_vgprs; i++)
+               params[num_params++] = ctx.f32;
+
+       /* Declare outputs (same as inputs + add colors if needed) */
+       num_returns = num_params;
+       num_color_channels = util_bitcount(key->ps_prolog.colors_read);
+       for (i = 0; i < num_color_channels; i++)
+               params[num_returns++] = ctx.f32;
+
+       /* Create the function. */
+       si_create_function(&ctx, params, num_returns, params,
+                          num_params, -1, last_sgpr);
+       func = ctx.radeon_bld.main_fn;
+
+       /* Copy inputs to outputs. This should be no-op, as the registers match,
+        * but it will prevent the compiler from overwriting them unintentionally.
+        */
+       ret = ctx.return_value;
+       for (i = 0; i < num_params; i++) {
+               LLVMValueRef p = LLVMGetParam(func, i);
+               ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
+       }
+
+       /* Interpolate colors. */
+       for (i = 0; i < 2; i++) {
+               unsigned writemask = (key->ps_prolog.colors_read >> (i * 4)) & 0xf;
+               unsigned face_vgpr = key->ps_prolog.num_input_sgprs +
+                                    key->ps_prolog.face_vgpr_index;
+               LLVMValueRef interp[2], color[4];
+               LLVMValueRef interp_ij = NULL, prim_mask = NULL, face = NULL;
+
+               if (!writemask)
+                       continue;
+
+               /* If the interpolation qualifier is not CONSTANT (-1). */
+               if (key->ps_prolog.color_interp_vgpr_index[i] != -1) {
+                       unsigned interp_vgpr = key->ps_prolog.num_input_sgprs +
+                                              key->ps_prolog.color_interp_vgpr_index[i];
+
+                       interp[0] = LLVMGetParam(func, interp_vgpr);
+                       interp[1] = LLVMGetParam(func, interp_vgpr + 1);
+                       interp_ij = lp_build_gather_values(gallivm, interp, 2);
+                       interp_ij = LLVMBuildBitCast(gallivm->builder, interp_ij,
+                                                    ctx.v2i32, "");
+               }
+
+               /* Use the absolute location of the input. */
+               prim_mask = LLVMGetParam(func, SI_PS_NUM_USER_SGPR);
+
+               if (key->ps_prolog.states.color_two_side) {
+                       face = LLVMGetParam(func, face_vgpr);
+                       face = LLVMBuildBitCast(gallivm->builder, face, ctx.i32, "");
+               }
+
+               interp_fs_input(&ctx,
+                               key->ps_prolog.color_attr_index[i],
+                               TGSI_SEMANTIC_COLOR, i,
+                               key->ps_prolog.num_interp_inputs,
+                               key->ps_prolog.colors_read, interp_ij,
+                               prim_mask, face, color);
+
+               while (writemask) {
+                       unsigned chan = u_bit_scan(&writemask);
+                       ret = LLVMBuildInsertValue(gallivm->builder, ret, color[chan],
+                                                  num_params++, "");
+               }
+       }
+
+       /* Force per-sample interpolation. */
+       if (key->ps_prolog.states.force_persample_interp) {
+               unsigned i, base = key->ps_prolog.num_input_sgprs;
+               LLVMValueRef persp_sample[2], linear_sample[2];
+
+               /* Read PERSP_SAMPLE. */
+               for (i = 0; i < 2; i++)
+                       persp_sample[i] = LLVMGetParam(func, base + i);
+               /* Overwrite PERSP_CENTER. */
+               for (i = 0; i < 2; i++)
+                       ret = LLVMBuildInsertValue(gallivm->builder, ret,
+                                                  persp_sample[i], base + 2 + i, "");
+               /* Overwrite PERSP_CENTROID. */
+               for (i = 0; i < 2; i++)
+                       ret = LLVMBuildInsertValue(gallivm->builder, ret,
+                                                  persp_sample[i], base + 4 + i, "");
+               /* Read LINEAR_SAMPLE. */
+               for (i = 0; i < 2; i++)
+                       linear_sample[i] = LLVMGetParam(func, base + 6 + i);
+               /* Overwrite LINEAR_CENTER. */
+               for (i = 0; i < 2; i++)
+                       ret = LLVMBuildInsertValue(gallivm->builder, ret,
+                                                  linear_sample[i], base + 8 + i, "");
+               /* Overwrite LINEAR_CENTROID. */
+               for (i = 0; i < 2; i++)
+                       ret = LLVMBuildInsertValue(gallivm->builder, ret,
+                                                  linear_sample[i], base + 10 + i, "");
+       }
+
+       /* TODO: polygon stippling */
+
+       /* Compile. */
+       LLVMBuildRet(gallivm->builder, ret);
+       radeon_llvm_finalize_module(&ctx.radeon_bld);
+
+       if (si_compile_llvm(sscreen, &out->binary, &out->config, tm,
+                           gallivm->module, debug, ctx.type,
+                           "Fragment Shader Prolog"))
+               status = false;
+
+       radeon_llvm_dispose(&ctx.radeon_bld);
+       return status;
+}
+
 /**
  * Compile the pixel shader epilog. This handles everything that must be
  * emulated for pixel shader exports. (alpha-test, format conversions, etc)
@@ -5430,7 +5627,103 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
                                      struct pipe_debug_callback *debug)
 {
        struct tgsi_shader_info *info = &shader->selector->info;
+       union si_shader_part_key prolog_key;
        union si_shader_part_key epilog_key;
+       unsigned i;
+
+       /* Get the prolog. */
+       memset(&prolog_key, 0, sizeof(prolog_key));
+       prolog_key.ps_prolog.states = shader->key.ps.prolog;
+       prolog_key.ps_prolog.colors_read = info->colors_read;
+       prolog_key.ps_prolog.num_input_sgprs = shader->num_input_sgprs;
+       prolog_key.ps_prolog.num_input_vgprs = shader->num_input_vgprs;
+
+       if (info->colors_read) {
+               unsigned *color = shader->selector->color_attr_index;
+
+               if (shader->key.ps.prolog.color_two_side) {
+                       /* BCOLORs are stored after the last input. */
+                       prolog_key.ps_prolog.num_interp_inputs = info->num_inputs;
+                       prolog_key.ps_prolog.face_vgpr_index = shader->face_vgpr_index;
+                       shader->config.spi_ps_input_ena |= S_0286CC_FRONT_FACE_ENA(1);
+               }
+
+               for (i = 0; i < 2; i++) {
+                       unsigned location = info->input_interpolate_loc[color[i]];
+
+                       if (!(info->colors_read & (0xf << i*4)))
+                               continue;
+
+                       prolog_key.ps_prolog.color_attr_index[i] = color[i];
+
+                       /* Force per-sample interpolation for the colors here. */
+                       if (shader->key.ps.prolog.force_persample_interp)
+                               location = TGSI_INTERPOLATE_LOC_SAMPLE;
+
+                       switch (info->input_interpolate[color[i]]) {
+                       case TGSI_INTERPOLATE_CONSTANT:
+                               prolog_key.ps_prolog.color_interp_vgpr_index[i] = -1;
+                               break;
+                       case TGSI_INTERPOLATE_PERSPECTIVE:
+                       case TGSI_INTERPOLATE_COLOR:
+                               switch (location) {
+                               case TGSI_INTERPOLATE_LOC_SAMPLE:
+                                       prolog_key.ps_prolog.color_interp_vgpr_index[i] = 0;
+                                       shader->config.spi_ps_input_ena |=
+                                               S_0286CC_PERSP_SAMPLE_ENA(1);
+                                       break;
+                               case TGSI_INTERPOLATE_LOC_CENTER:
+                                       prolog_key.ps_prolog.color_interp_vgpr_index[i] = 2;
+                                       shader->config.spi_ps_input_ena |=
+                                               S_0286CC_PERSP_CENTER_ENA(1);
+                                       break;
+                               case TGSI_INTERPOLATE_LOC_CENTROID:
+                                       prolog_key.ps_prolog.color_interp_vgpr_index[i] = 4;
+                                       shader->config.spi_ps_input_ena |=
+                                               S_0286CC_PERSP_CENTROID_ENA(1);
+                                       break;
+                               default:
+                                       assert(0);
+                               }
+                               break;
+                       case TGSI_INTERPOLATE_LINEAR:
+                               switch (location) {
+                               case TGSI_INTERPOLATE_LOC_SAMPLE:
+                                       prolog_key.ps_prolog.color_interp_vgpr_index[i] = 6;
+                                       shader->config.spi_ps_input_ena |=
+                                               S_0286CC_LINEAR_SAMPLE_ENA(1);
+                                       break;
+                               case TGSI_INTERPOLATE_LOC_CENTER:
+                                       prolog_key.ps_prolog.color_interp_vgpr_index[i] = 8;
+                                       shader->config.spi_ps_input_ena |=
+                                               S_0286CC_LINEAR_CENTER_ENA(1);
+                                       break;
+                               case TGSI_INTERPOLATE_LOC_CENTROID:
+                                       prolog_key.ps_prolog.color_interp_vgpr_index[i] = 10;
+                                       shader->config.spi_ps_input_ena |=
+                                               S_0286CC_LINEAR_CENTROID_ENA(1);
+                                       break;
+                               default:
+                                       assert(0);
+                               }
+                               break;
+                       default:
+                               assert(0);
+                       }
+               }
+       }
+
+       /* The prolog is a no-op if these aren't set. */
+       if (prolog_key.ps_prolog.colors_read ||
+           prolog_key.ps_prolog.states.force_persample_interp ||
+           prolog_key.ps_prolog.states.poly_stipple) {
+               shader->prolog =
+                       si_get_shader_part(sscreen, &sscreen->ps_prologs,
+                                          &prolog_key, tm, debug,
+                                          si_compile_ps_prolog);
+               if (!shader->prolog)
+                       return false;
+       }
 
        /* Get the epilog. */
        memset(&epilog_key, 0, sizeof(epilog_key));
@@ -5447,6 +5740,35 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
        if (!shader->epilog)
                return false;
 
+       /* Set up the enable bits for per-sample shading if needed. */
+       if (shader->key.ps.prolog.force_persample_interp) {
+               if (G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_ena) ||
+                   G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena)) {
+                       shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTER_ENA;
+                       shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTROID_ENA;
+                       shader->config.spi_ps_input_ena |= S_0286CC_PERSP_SAMPLE_ENA(1);
+               }
+               if (G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_ena) ||
+                   G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena)) {
+                       shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTER_ENA;
+                       shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTROID_ENA;
+                       shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_SAMPLE_ENA(1);
+               }
+       }
+
+       /* POW_W_FLOAT requires that one of the perspective weights is enabled. */
+       if (G_0286CC_POS_W_FLOAT_ENA(shader->config.spi_ps_input_ena) &&
+           !(shader->config.spi_ps_input_ena & 0xf)) {
+               shader->config.spi_ps_input_ena |= S_0286CC_PERSP_CENTER_ENA(1);
+               assert(G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_addr));
+       }
+
+       /* At least one pair of interpolation weights must be enabled. */
+       if (!(shader->config.spi_ps_input_ena & 0x7f)) {
+               shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_CENTER_ENA(1);
+               assert(G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_addr));
+       }
+
        /* The sample mask input is always enabled, because the API shader always
         * passes it through to the epilog. Disable it here if it's unused.
         */
index 928cb2e18e981c7a28ed5102b6a1544799b061e2..196fa3e9086637c082a0f2488fdbcf48ba2117f4 100644 (file)
@@ -169,7 +169,7 @@ struct radeon_shader_reloc;
 #define SI_PARAM_SAMPLE_COVERAGE       20
 #define SI_PARAM_POS_FIXED_PT          21
 
-#define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 1)
+#define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 9) /* +8 for COLOR[0..1] */
 
 struct si_shader;
 
@@ -199,6 +199,7 @@ struct si_shader_selector {
        unsigned        max_gsvs_emit_size;
 
        /* PS parameters. */
+       unsigned        color_attr_index[2];
        unsigned        db_shader_control;
        /* Set 0xf or 0x0 (4 bits) per each written output.
         * ANDed with spi_shader_col_format.
@@ -281,6 +282,17 @@ union si_shader_part_key {
        struct {
                struct si_tcs_epilog_bits states;
        } tcs_epilog;
+       struct {
+               struct si_ps_prolog_bits states;
+               unsigned        num_input_sgprs:5;
+               unsigned        num_input_vgprs:5;
+               /* Color interpolation and two-side color selection. */
+               unsigned        colors_read:8; /* color input components read */
+               unsigned        num_interp_inputs:5; /* BCOLOR is at this location */
+               unsigned        face_vgpr_index:5;
+               char            color_attr_index[2];
+               char            color_interp_vgpr_index[2]; /* -1 == constant */
+       } ps_prolog;
        struct {
                struct si_ps_epilog_bits states;
                unsigned        colors_written:8;
index 08f5d88e8dc714ae96e712d75e9516c4aad3e074..fbc377a6a4eb0cf69a308aa1d1dd70b60d61b2dd 100644 (file)
@@ -903,6 +903,13 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
                for (i = 0; i < 8; i++)
                        if (sel->info.colors_written & (1 << i))
                                sel->colors_written_4bit |= 0xf << (4 * i);
+
+               for (i = 0; i < sel->info.num_inputs; i++) {
+                       if (sel->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR) {
+                               int index = sel->info.input_semantic_index[i];
+                               sel->color_attr_index[index] = i;
+                       }
+               }
                break;
        }