radeonsi/gfx9: add VS prolog support for merged LS-HS
authorMarek Olšák <marek.olsak@amd.com>
Mon, 20 Feb 2017 21:25:17 +0000 (22:25 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Fri, 28 Apr 2017 19:47:35 +0000 (21:47 +0200)
HS input VGPRs must be reserved.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h

index 574244b484c0b18cfd5cfdb756c85bccd45d51fd..ecb8ee9283919c399e07a01052d12e581b0f5192 100644 (file)
@@ -7078,6 +7078,9 @@ static void si_get_vs_prolog_key(const struct tgsi_shader_info *info,
        key->vs_prolog.num_input_sgprs = num_input_sgprs;
        key->vs_prolog.last_input = MAX2(1, info->num_inputs) - 1;
 
+       if (shader_out->selector->type == PIPE_SHADER_TESS_CTRL)
+               key->vs_prolog.num_merged_next_stage_vgprs = 2;
+
        /* Set the instanceID flag. */
        for (unsigned i = 0; i < info->num_inputs; i++)
                if (key->vs_prolog.states.instance_divisors[i])
@@ -7861,15 +7864,19 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx,
        LLVMTypeRef *params, *returns;
        LLVMValueRef ret, func;
        int last_sgpr, num_params, num_returns, i;
+       unsigned first_vs_vgpr = key->vs_prolog.num_input_sgprs +
+                                key->vs_prolog.num_merged_next_stage_vgprs;
+       unsigned num_input_vgprs = key->vs_prolog.num_merged_next_stage_vgprs + 4;
+       unsigned num_all_input_regs = key->vs_prolog.num_input_sgprs +
+                                     num_input_vgprs;
+       unsigned user_sgpr_base = key->vs_prolog.num_merged_next_stage_vgprs ? 8 : 0;
 
-       ctx->param_vertex_id = key->vs_prolog.num_input_sgprs;
-       ctx->param_instance_id = key->vs_prolog.num_input_sgprs + 3;
+       ctx->param_vertex_id = first_vs_vgpr;
+       ctx->param_instance_id = first_vs_vgpr + 3;
 
        /* 4 preloaded VGPRs + vertex load indices as prolog outputs */
-       params = alloca((key->vs_prolog.num_input_sgprs + 4) *
-                       sizeof(LLVMTypeRef));
-       returns = alloca((key->vs_prolog.num_input_sgprs + 4 +
-                         key->vs_prolog.last_input + 1) *
+       params = alloca(num_all_input_regs * sizeof(LLVMTypeRef));
+       returns = alloca((num_all_input_regs + key->vs_prolog.last_input + 1) *
                         sizeof(LLVMTypeRef));
        num_params = 0;
        num_returns = 0;
@@ -7882,8 +7889,8 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx,
        }
        last_sgpr = num_params - 1;
 
-       /* 4 preloaded VGPRs (outputs must be floats) */
-       for (i = 0; i < 4; i++) {
+       /* Preloaded VGPRs (outputs must be floats) */
+       for (i = 0; i < num_input_vgprs; i++) {
                params[num_params++] = ctx->i32;
                returns[num_returns++] = ctx->f32;
        }
@@ -7905,7 +7912,7 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx,
                LLVMValueRef p = LLVMGetParam(func, i);
                ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
        }
-       for (i = num_params - 4; i < num_params; i++) {
+       for (; i < num_params; i++) {
                LLVMValueRef p = LLVMGetParam(func, i);
                p = LLVMBuildBitCast(gallivm->builder, p, ctx->f32, "");
                ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
@@ -7919,13 +7926,15 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx,
                if (divisor) {
                        /* InstanceID / Divisor + StartInstance */
                        index = get_instance_index_for_fetch(ctx,
+                                                            user_sgpr_base +
                                                             SI_SGPR_START_INSTANCE,
                                                             divisor);
                } else {
                        /* VertexID + BaseVertex */
                        index = LLVMBuildAdd(gallivm->builder,
                                             LLVMGetParam(func, ctx->param_vertex_id),
-                                            LLVMGetParam(func, SI_SGPR_BASE_VERTEX), "");
+                                            LLVMGetParam(func, user_sgpr_base +
+                                                               SI_SGPR_BASE_VERTEX), "");
                }
 
                index = LLVMBuildBitCast(gallivm->builder, index, ctx->f32, "");
index 34ba1412071f0f03adc5d433cbab75a9b271b610..2d660ac6cc1e02b4949ea37cd7125fcb8e05b2b4 100644 (file)
@@ -352,6 +352,8 @@ union si_shader_part_key {
        struct {
                struct si_vs_prolog_bits states;
                unsigned        num_input_sgprs:6;
+               /* For merged stages such as LS-HS, HS input VGPRs are first. */
+               unsigned        num_merged_next_stage_vgprs:3;
                unsigned        last_input:4;
        } vs_prolog;
        struct {