radeonsi: separate 2 pieces of code from create_function
[mesa.git] / src / gallium / drivers / radeonsi / si_shader.c
index 655894146fdbfc780241fb13157e52871575037d..1a852c5cba25ffc7c6f7a8a3f87de789660ec8e1 100644 (file)
@@ -376,7 +376,7 @@ static LLVMValueRef build_indexed_load_const(
 
 static LLVMValueRef get_instance_index_for_fetch(
        struct radeon_llvm_context *radeon_bld,
-       unsigned divisor)
+       unsigned param_start_instance, unsigned divisor)
 {
        struct si_shader_context *ctx =
                si_shader_context(&radeon_bld->soa.bld_base);
@@ -390,8 +390,8 @@ static LLVMValueRef get_instance_index_for_fetch(
                result = LLVMBuildUDiv(gallivm->builder, result,
                                lp_build_const_int32(gallivm, divisor), "");
 
-       return LLVMBuildAdd(gallivm->builder, result, LLVMGetParam(
-                       radeon_bld->main_fn, SI_PARAM_START_INSTANCE), "");
+       return LLVMBuildAdd(gallivm->builder, result,
+                           LLVMGetParam(radeon_bld->main_fn, param_start_instance), "");
 }
 
 static void declare_input_vs(
@@ -403,7 +403,8 @@ static void declare_input_vs(
        struct gallivm_state *gallivm = base->gallivm;
        struct si_shader_context *ctx =
                si_shader_context(&radeon_bld->soa.bld_base);
-       unsigned divisor = ctx->shader->key.vs.instance_divisors[input_index];
+       unsigned divisor =
+               ctx->shader->key.vs.prolog.instance_divisors[input_index];
 
        unsigned chan;
 
@@ -428,7 +429,9 @@ static void declare_input_vs(
        if (divisor) {
                /* Build index from instance ID, start instance and divisor */
                ctx->shader->uses_instanceid = true;
-               buffer_index = get_instance_index_for_fetch(&ctx->radeon_bld, divisor);
+               buffer_index = get_instance_index_for_fetch(&ctx->radeon_bld,
+                                                           SI_PARAM_START_INSTANCE,
+                                                           divisor);
        } else {
                /* Load the buffer index for vertices. */
                LLVMValueRef vertex_id = LLVMGetParam(ctx->radeon_bld.main_fn,
@@ -854,7 +857,7 @@ static int lookup_interp_param_index(unsigned interpolate, unsigned location)
 static unsigned select_interp_param(struct si_shader_context *ctx,
                                    unsigned param)
 {
-       if (!ctx->shader->key.ps.force_persample_interp)
+       if (!ctx->shader->key.ps.prolog.force_persample_interp)
                return param;
 
        /* If the shader doesn't use center/centroid, just return the parameter.
@@ -924,7 +927,7 @@ static void interp_fs_input(struct si_shader_context *ctx,
        intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
 
        if (semantic_name == TGSI_SEMANTIC_COLOR &&
-           ctx->shader->key.ps.color_two_side) {
+           ctx->shader->key.ps.prolog.color_two_side) {
                LLVMValueRef args[4];
                LLVMValueRef is_face_positive;
                LLVMValueRef back_attr_number;
@@ -1331,12 +1334,12 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
 
        if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
                const union si_shader_key *key = &ctx->shader->key;
-               unsigned col_formats = key->ps.spi_shader_col_format;
+               unsigned col_formats = key->ps.epilog.spi_shader_col_format;
                int cbuf = target - V_008DFC_SQ_EXP_MRT;
 
                assert(cbuf >= 0 && cbuf < 8);
                spi_shader_col_format = (col_formats >> (cbuf * 4)) & 0xf;
-               is_int8 = (key->ps.color_is_int8 >> cbuf) & 0x1;
+               is_int8 = (key->ps.epilog.color_is_int8 >> cbuf) & 0x1;
        }
 
        args[4] = uint->zero; /* COMPR flag */
@@ -1489,13 +1492,13 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct gallivm_state *gallivm = bld_base->base.gallivm;
 
-       if (ctx->shader->key.ps.alpha_func != PIPE_FUNC_NEVER) {
+       if (ctx->shader->key.ps.epilog.alpha_func != PIPE_FUNC_NEVER) {
                LLVMValueRef alpha_ref = LLVMGetParam(ctx->radeon_bld.main_fn,
                                SI_PARAM_ALPHA_REF);
 
                LLVMValueRef alpha_pass =
                        lp_build_cmp(&bld_base->base,
-                                    ctx->shader->key.ps.alpha_func,
+                                    ctx->shader->key.ps.epilog.alpha_func,
                                     alpha, alpha_ref);
                LLVMValueRef arg =
                        lp_build_select(&bld_base->base,
@@ -1512,7 +1515,8 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
 }
 
 static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base,
-                                                 LLVMValueRef alpha)
+                                                 LLVMValueRef alpha,
+                                                 unsigned samplemask_param)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct gallivm_state *gallivm = bld_base->base.gallivm;
@@ -1520,7 +1524,7 @@ static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *
 
        /* alpha = alpha * popcount(coverage) / SI_NUM_SMOOTH_AA_SAMPLES */
        coverage = LLVMGetParam(ctx->radeon_bld.main_fn,
-                               SI_PARAM_SAMPLE_COVERAGE);
+                               samplemask_param);
        coverage = bitcast(bld_base, TGSI_TYPE_SIGNED, coverage);
 
        coverage = lp_build_intrinsic(gallivm->builder, "llvm.ctpop.i32",
@@ -1990,7 +1994,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
                                  invocation_id, bld_base->uint_bld.zero, ""));
 
        /* Determine the layout of one tess factor element in the buffer. */
-       switch (shader->key.tcs.prim_mode) {
+       switch (shader->key.tcs.epilog.prim_mode) {
        case PIPE_PRIM_LINES:
                stride = 2; /* 2 dwords, 1 vec2 store */
                outer_comps = 2;
@@ -2285,6 +2289,7 @@ static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base,
 
 static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
                                LLVMValueRef *color, unsigned index,
+                               unsigned samplemask_param,
                                bool is_last)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
@@ -2292,30 +2297,31 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
        int i;
 
        /* Clamp color */
-       if (ctx->shader->key.ps.clamp_color)
+       if (ctx->shader->key.ps.epilog.clamp_color)
                for (i = 0; i < 4; i++)
                        color[i] = radeon_llvm_saturate(bld_base, color[i]);
 
        /* Alpha to one */
-       if (ctx->shader->key.ps.alpha_to_one)
+       if (ctx->shader->key.ps.epilog.alpha_to_one)
                color[3] = base->one;
 
        /* Alpha test */
        if (index == 0 &&
-           ctx->shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS)
+           ctx->shader->key.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS)
                si_alpha_test(bld_base, color[3]);
 
        /* Line & polygon smoothing */
-       if (ctx->shader->key.ps.poly_line_smoothing)
-               color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3]);
+       if (ctx->shader->key.ps.epilog.poly_line_smoothing)
+               color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3],
+                                                        samplemask_param);
 
        /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
-       if (ctx->shader->key.ps.last_cbuf > 0) {
+       if (ctx->shader->key.ps.epilog.last_cbuf > 0) {
                LLVMValueRef args[8][9];
                int c, last = -1;
 
                /* Get the export arguments, also find out what the last one is. */
-               for (c = 0; c <= ctx->shader->key.ps.last_cbuf; c++) {
+               for (c = 0; c <= ctx->shader->key.ps.epilog.last_cbuf; c++) {
                        si_llvm_init_export_args(bld_base, color,
                                                 V_008DFC_SQ_EXP_MRT + c, args[c]);
                        if (args[c][0] != bld_base->uint_bld.zero)
@@ -2323,7 +2329,7 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
                }
 
                /* Emit all exports. */
-               for (c = 0; c <= ctx->shader->key.ps.last_cbuf; c++) {
+               for (c = 0; c <= ctx->shader->key.ps.epilog.last_cbuf; c++) {
                        if (is_last && last == c) {
                                args[c][1] = bld_base->uint_bld.one; /* whether the EXEC mask is valid */
                                args[c][2] = bld_base->uint_bld.one; /* DONE bit */
@@ -2386,11 +2392,11 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context *bld_base)
         * Otherwise, find the last color export.
         */
        if (!info->writes_z && !info->writes_stencil && !info->writes_samplemask) {
-               unsigned spi_format = shader->key.ps.spi_shader_col_format;
+               unsigned spi_format = shader->key.ps.epilog.spi_shader_col_format;
 
                /* Don't export NULL and return if alpha-test is enabled. */
-               if (shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS &&
-                   shader->key.ps.alpha_func != PIPE_FUNC_NEVER &&
+               if (shader->key.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS &&
+                   shader->key.ps.epilog.alpha_func != PIPE_FUNC_NEVER &&
                    (spi_format & 0xf) == 0)
                        spi_format |= V_028714_SPI_SHADER_32_AR;
 
@@ -2401,10 +2407,10 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context *bld_base)
                                continue;
 
                        /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
-                       if (shader->key.ps.last_cbuf > 0) {
+                       if (shader->key.ps.epilog.last_cbuf > 0) {
                                /* Just set this if any of the colorbuffers are enabled. */
                                if (spi_format &
-                                   ((1llu << (4 * (shader->key.ps.last_cbuf + 1))) - 1))
+                                   ((1llu << (4 * (shader->key.ps.epilog.last_cbuf + 1))) - 1))
                                        last_color_export = i;
                                continue;
                        }
@@ -2446,6 +2452,7 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context *bld_base)
                                                         ctx->radeon_bld.soa.outputs[i][j], "");
 
                        si_export_mrt_color(bld_base, color, semantic_index,
+                                           SI_PARAM_SAMPLE_COVERAGE,
                                            last_color_export == i);
                        break;
                default:
@@ -3547,6 +3554,30 @@ static const struct lp_build_tgsi_action interp_action = {
        .emit = build_interp_intrinsic,
 };
 
+static void si_create_function(struct si_shader_context *ctx,
+                              LLVMTypeRef *returns, unsigned num_returns,
+                              LLVMTypeRef *params, unsigned num_params,
+                              int last_array_pointer, int last_sgpr)
+{
+       int i;
+
+       radeon_llvm_create_func(&ctx->radeon_bld, returns, num_returns,
+                               params, num_params);
+       radeon_llvm_shader_type(ctx->radeon_bld.main_fn, ctx->type);
+       ctx->return_value = LLVMGetUndef(ctx->radeon_bld.return_type);
+
+       for (i = 0; i <= last_sgpr; ++i) {
+               LLVMValueRef P = LLVMGetParam(ctx->radeon_bld.main_fn, i);
+
+               /* We tell llvm that array inputs are passed by value to allow Sinking pass
+                * to move load. Inputs are constant so this is fine. */
+               if (i <= last_array_pointer)
+                       LLVMAddAttribute(P, LLVMByValAttribute);
+               else
+                       LLVMAddAttribute(P, LLVMInRegAttribute);
+       }
+}
+
 static void create_meta_data(struct si_shader_context *ctx)
 {
        struct gallivm_state *gallivm = ctx->radeon_bld.soa.bld_base.base.gallivm;
@@ -3600,6 +3631,27 @@ static unsigned llvm_get_type_size(LLVMTypeRef type)
        }
 }
 
+static void declare_tess_lds(struct si_shader_context *ctx)
+{
+       struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
+       LLVMTypeRef i32 = ctx->radeon_bld.soa.bld_base.uint_bld.elem_type;
+
+       /* This is the upper bound, maximum is 32 inputs times 32 vertices */
+       unsigned vertex_data_dw_size = 32*32*4;
+       unsigned patch_data_dw_size = 32*4;
+       /* The formula is: TCS inputs + TCS outputs + TCS patch outputs. */
+       unsigned patch_dw_size = vertex_data_dw_size*2 + patch_data_dw_size;
+       unsigned lds_dwords = patch_dw_size;
+
+       /* The actual size is computed outside of the shader to reduce
+        * the number of shader variants. */
+       ctx->lds =
+               LLVMAddGlobalInAddressSpace(gallivm->module,
+                                           LLVMArrayType(i32, lds_dwords),
+                                           "tess_lds",
+                                           LOCAL_ADDR_SPACE);
+}
+
 static void create_function(struct si_shader_context *ctx)
 {
        struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
@@ -3732,26 +3784,15 @@ static void create_function(struct si_shader_context *ctx)
        }
 
        assert(num_params <= Elements(params));
-       radeon_llvm_create_func(&ctx->radeon_bld, NULL, 0,
-                               params, num_params);
-       radeon_llvm_shader_type(ctx->radeon_bld.main_fn, ctx->type);
-       ctx->return_value = LLVMGetUndef(ctx->radeon_bld.return_type);
+
+       si_create_function(ctx, NULL, 0, params,
+                          num_params, last_array_pointer, last_sgpr);
 
        shader->num_input_sgprs = 0;
        shader->num_input_vgprs = 0;
 
-       for (i = 0; i <= last_sgpr; ++i) {
-               LLVMValueRef P = LLVMGetParam(ctx->radeon_bld.main_fn, i);
-
-               /* We tell llvm that array inputs are passed by value to allow Sinking pass
-                * to move load. Inputs are constant so this is fine. */
-               if (i <= last_array_pointer)
-                       LLVMAddAttribute(P, LLVMByValAttribute);
-               else
-                       LLVMAddAttribute(P, LLVMInRegAttribute);
-
+       for (i = 0; i <= last_sgpr; ++i)
                shader->num_input_sgprs += llvm_get_type_size(params[i]) / 4;
-       }
 
        /* Unused fragment shader inputs are eliminated by the compiler,
         * so we don't know yet how many there will be.
@@ -3775,22 +3816,8 @@ static void create_function(struct si_shader_context *ctx)
 
        if ((ctx->type == TGSI_PROCESSOR_VERTEX && shader->key.vs.as_ls) ||
            ctx->type == TGSI_PROCESSOR_TESS_CTRL ||
-           ctx->type == TGSI_PROCESSOR_TESS_EVAL) {
-               /* This is the upper bound, maximum is 32 inputs times 32 vertices */
-               unsigned vertex_data_dw_size = 32*32*4;
-               unsigned patch_data_dw_size = 32*4;
-               /* The formula is: TCS inputs + TCS outputs + TCS patch outputs. */
-               unsigned patch_dw_size = vertex_data_dw_size*2 + patch_data_dw_size;
-               unsigned lds_dwords = patch_dw_size;
-
-               /* The actual size is computed outside of the shader to reduce
-                * the number of shader variants. */
-               ctx->lds =
-                       LLVMAddGlobalInAddressSpace(gallivm->module,
-                                                   LLVMArrayType(ctx->i32, lds_dwords),
-                                                   "tess_lds",
-                                                   LOCAL_ADDR_SPACE);
-       }
+           ctx->type == TGSI_PROCESSOR_TESS_EVAL)
+               declare_tess_lds(ctx);
 }
 
 static void preload_constants(struct si_shader_context *ctx)
@@ -4313,35 +4340,38 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f)
        switch (shader) {
        case PIPE_SHADER_VERTEX:
                fprintf(f, "  instance_divisors = {");
-               for (i = 0; i < Elements(key->vs.instance_divisors); i++)
+               for (i = 0; i < Elements(key->vs.prolog.instance_divisors); i++)
                        fprintf(f, !i ? "%u" : ", %u",
-                               key->vs.instance_divisors[i]);
+                               key->vs.prolog.instance_divisors[i]);
                fprintf(f, "}\n");
                fprintf(f, "  as_es = %u\n", key->vs.as_es);
                fprintf(f, "  as_ls = %u\n", key->vs.as_ls);
-               fprintf(f, "  export_prim_id = %u\n", key->vs.export_prim_id);
+               fprintf(f, "  export_prim_id = %u\n", key->vs.epilog.export_prim_id);
                break;
 
        case PIPE_SHADER_TESS_CTRL:
-               fprintf(f, "  prim_mode = %u\n", key->tcs.prim_mode);
+               fprintf(f, "  prim_mode = %u\n", key->tcs.epilog.prim_mode);
                break;
 
        case PIPE_SHADER_TESS_EVAL:
                fprintf(f, "  as_es = %u\n", key->tes.as_es);
-               fprintf(f, "  export_prim_id = %u\n", key->tes.export_prim_id);
+               fprintf(f, "  export_prim_id = %u\n", key->tes.epilog.export_prim_id);
                break;
 
        case PIPE_SHADER_GEOMETRY:
                break;
 
        case PIPE_SHADER_FRAGMENT:
-               fprintf(f, "  spi_shader_col_format = 0x%x\n", key->ps.spi_shader_col_format);
-               fprintf(f, "  last_cbuf = %u\n", key->ps.last_cbuf);
-               fprintf(f, "  color_two_side = %u\n", key->ps.color_two_side);
-               fprintf(f, "  alpha_func = %u\n", key->ps.alpha_func);
-               fprintf(f, "  alpha_to_one = %u\n", key->ps.alpha_to_one);
-               fprintf(f, "  poly_stipple = %u\n", key->ps.poly_stipple);
-               fprintf(f, "  clamp_color = %u\n", key->ps.clamp_color);
+               fprintf(f, "  prolog.color_two_side = %u\n", key->ps.prolog.color_two_side);
+               fprintf(f, "  prolog.poly_stipple = %u\n", key->ps.prolog.poly_stipple);
+               fprintf(f, "  prolog.force_persample_interp = %u\n", key->ps.prolog.force_persample_interp);
+               fprintf(f, "  epilog.spi_shader_col_format = 0x%x\n", key->ps.epilog.spi_shader_col_format);
+               fprintf(f, "  epilog.color_is_int8 = 0x%X\n", key->ps.epilog.color_is_int8);
+               fprintf(f, "  epilog.last_cbuf = %u\n", key->ps.epilog.last_cbuf);
+               fprintf(f, "  epilog.alpha_func = %u\n", key->ps.epilog.alpha_func);
+               fprintf(f, "  epilog.alpha_to_one = %u\n", key->ps.epilog.alpha_to_one);
+               fprintf(f, "  epilog.poly_line_smoothing = %u\n", key->ps.epilog.poly_line_smoothing);
+               fprintf(f, "  epilog.clamp_color = %u\n", key->ps.epilog.clamp_color);
                break;
 
        default:
@@ -4427,7 +4457,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
        LLVMModuleRef mod;
        int r = 0;
        bool poly_stipple = sel->type == PIPE_SHADER_FRAGMENT &&
-                           shader->key.ps.poly_stipple;
+                           shader->key.ps.prolog.poly_stipple;
 
        if (poly_stipple) {
                tokens = util_pstipple_create_fragment_shader(tokens, NULL,