radeonsi: skip LDS stores in TCS if there are no LDS output reads
authorMarek Olšák <marek.olsak@amd.com>
Sat, 18 Feb 2017 14:30:25 +0000 (15:30 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Tue, 21 Feb 2017 20:27:23 +0000 (21:27 +0100)
This removes a lot of useless LDS stores.

A few games read TESSINNER/OUTER, but not any other outputs. Most games
don't read any outputs.

The only app doing LDS output reads is UE4 Lightsroom Interior.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_shader.c

index 4075eefe62333616749eaf20a3f1e0e333cf761f..b85874ae4031801d6a1894fea09e2ca68e6edc70 100644 (file)
@@ -982,10 +982,12 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct gallivm_state *gallivm = bld_base->base.gallivm;
        const struct tgsi_full_dst_register *reg = &inst->Dst[0];
+       const struct tgsi_shader_info *sh_info = &ctx->shader->selector->info;
        unsigned chan_index;
        LLVMValueRef dw_addr, stride;
        LLVMValueRef rw_buffers, buffer, base, buf_addr;
        LLVMValueRef values[4];
+       bool skip_lds_store;
 
        /* Only handle per-patch and per-vertex outputs here.
         * Vectors will be lowered to scalars and this function will be called again.
@@ -1000,9 +1002,20 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
                stride = unpack_param(ctx, SI_PARAM_TCS_OUT_LAYOUT, 13, 8);
                dw_addr = get_tcs_out_current_patch_offset(ctx);
                dw_addr = get_dw_address(ctx, reg, NULL, stride, dw_addr);
+               skip_lds_store = !sh_info->reads_pervertex_outputs;
        } else {
                dw_addr = get_tcs_out_current_patch_data_offset(ctx);
                dw_addr = get_dw_address(ctx, reg, NULL, NULL, dw_addr);
+               skip_lds_store = !sh_info->reads_perpatch_outputs;
+
+               if (!reg->Register.Indirect) {
+                       int name = sh_info->output_semantic_name[reg->Register.Index];
+
+                       /* Always write tess factors into LDS for the TCS epilog. */
+                       if (name == TGSI_SEMANTIC_TESSINNER ||
+                           name == TGSI_SEMANTIC_TESSOUTER)
+                               skip_lds_store = false;
+               }
        }
 
        rw_buffers = LLVMGetParam(ctx->main_fn,
@@ -1020,7 +1033,9 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
                if (inst->Instruction.Saturate)
                        value = ac_emit_clamp(&ctx->ac, value);
 
-               lds_store(bld_base, chan_index, dw_addr, value);
+               /* Skip LDS stores if there is no LDS read of this output. */
+               if (!skip_lds_store)
+                       lds_store(bld_base, chan_index, dw_addr, value);
 
                value = LLVMBuildBitCast(gallivm->builder, value, ctx->i32, "");
                values[chan_index] = value;