radeonsi: don't read tcs_out_lds_layout.vertex_size from an SGPR
authorMarek Olšák <marek.olsak@amd.com>
Tue, 5 Sep 2017 17:52:23 +0000 (19:52 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Thu, 7 Sep 2017 11:00:07 +0000 (13:00 +0200)
TCS outputs are usually not written to LDS, so no stats here.

Tested-by: Dieter Nützel <Dieter@nuetzel-hh.de>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader_internal.h
src/gallium/drivers/radeonsi/si_state_draw.c

index 96ba907a0822952c6600857b345b1f58a76b5599..9117aec5b77485b292181a7c5eeedae55f346437 100644 (file)
@@ -326,6 +326,23 @@ get_tcs_out_patch_stride(struct si_shader_context *ctx)
        return unpack_param(ctx, ctx->param_tcs_out_lds_layout, 0, 13);
 }
 
+static unsigned get_tcs_out_vertex_dw_stride_constant(struct si_shader_context *ctx)
+{
+       assert(ctx->type == PIPE_SHADER_TESS_CTRL);
+
+       if (ctx->shader->key.mono.u.ff_tcs_inputs_to_copy)
+               return util_last_bit64(ctx->shader->key.mono.u.ff_tcs_inputs_to_copy) * 4;
+
+       return util_last_bit64(ctx->shader->selector->outputs_written) * 4;
+}
+
+static LLVMValueRef get_tcs_out_vertex_dw_stride(struct si_shader_context *ctx)
+{
+       unsigned stride = get_tcs_out_vertex_dw_stride_constant(ctx);
+
+       return LLVMConstInt(ctx->i32, stride, 0);
+}
+
 static LLVMValueRef
 get_tcs_out_patch0_offset(struct si_shader_context *ctx)
 {
@@ -1086,7 +1103,7 @@ static LLVMValueRef fetch_output_tcs(
        LLVMValueRef dw_addr, stride;
 
        if (reg->Register.Dimension) {
-               stride = unpack_param(ctx, ctx->param_tcs_out_lds_layout, 13, 8);
+               stride = get_tcs_out_vertex_dw_stride(ctx);
                dw_addr = get_tcs_out_current_patch_offset(ctx);
                dw_addr = get_dw_address(ctx, NULL, reg, stride, dw_addr);
        } else {
@@ -1139,7 +1156,7 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
        }
 
        if (reg->Register.Dimension) {
-               stride = unpack_param(ctx, ctx->param_tcs_out_lds_layout, 13, 8);
+               stride = get_tcs_out_vertex_dw_stride(ctx);
                dw_addr = get_tcs_out_current_patch_offset(ctx);
                dw_addr = get_dw_address(ctx, reg, NULL, stride, dw_addr);
                skip_lds_store = !sh_info->reads_pervertex_outputs;
index 1231ef494657cea9e3ca5ce298b1e76d01c3e332..ad29ab7e8459304862add51262cba0ec5fb43b38 100644 (file)
@@ -161,8 +161,6 @@ struct si_shader_context {
        /* Layout of TCS outputs / TES inputs:
         *   [0:12] = stride between output patches in DW, num_outputs * num_vertices * 4
         *            max = 32*32*4 + 32*4
-        *   [13:20] = stride between output vertices in DW = num_inputs * 4
-        *             max = 32*4
         *   [26:31] = gl_PatchVerticesIn, max = 32
         */
        int param_tcs_out_lds_layout;
index 363a4aed5d510b19661b7179faf896b0ded200dc..05ed85475bf99f996c7b3dcbb42dbfec579273e3 100644 (file)
@@ -233,8 +233,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
 
        tcs_in_layout = S_VS_STATE_LS_OUT_PATCH_SIZE(input_patch_size / 4) |
                        S_VS_STATE_LS_OUT_VERTEX_SIZE(input_vertex_size / 4);
-       tcs_out_layout = (output_patch_size / 4) |
-                        ((output_vertex_size / 4) << 13);
+       tcs_out_layout = output_patch_size / 4;
        tcs_out_offsets = (output_patch0_offset / 16) |
                          ((perpatch_output_offset / 16) << 16);
        offchip_layout = *num_patches |