radeonsi: skip LDS stores in TCS if there are no LDS output reads
[mesa.git] / src / gallium / drivers / radeonsi / si_shader.c
index 1829e3ec118be3bb952140a1582dc3ec096dc256..b85874ae4031801d6a1894fea09e2ca68e6edc70 100644 (file)
@@ -982,10 +982,12 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct gallivm_state *gallivm = bld_base->base.gallivm;
        const struct tgsi_full_dst_register *reg = &inst->Dst[0];
+       const struct tgsi_shader_info *sh_info = &ctx->shader->selector->info;
        unsigned chan_index;
        LLVMValueRef dw_addr, stride;
        LLVMValueRef rw_buffers, buffer, base, buf_addr;
        LLVMValueRef values[4];
+       bool skip_lds_store;
 
        /* Only handle per-patch and per-vertex outputs here.
         * Vectors will be lowered to scalars and this function will be called again.
@@ -1000,9 +1002,20 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
                stride = unpack_param(ctx, SI_PARAM_TCS_OUT_LAYOUT, 13, 8);
                dw_addr = get_tcs_out_current_patch_offset(ctx);
                dw_addr = get_dw_address(ctx, reg, NULL, stride, dw_addr);
+               skip_lds_store = !sh_info->reads_pervertex_outputs;
        } else {
                dw_addr = get_tcs_out_current_patch_data_offset(ctx);
                dw_addr = get_dw_address(ctx, reg, NULL, NULL, dw_addr);
+               skip_lds_store = !sh_info->reads_perpatch_outputs;
+
+               if (!reg->Register.Indirect) {
+                       int name = sh_info->output_semantic_name[reg->Register.Index];
+
+                       /* Always write tess factors into LDS for the TCS epilog. */
+                       if (name == TGSI_SEMANTIC_TESSINNER ||
+                           name == TGSI_SEMANTIC_TESSOUTER)
+                               skip_lds_store = false;
+               }
        }
 
        rw_buffers = LLVMGetParam(ctx->main_fn,
@@ -1018,9 +1031,11 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
                LLVMValueRef value = dst[chan_index];
 
                if (inst->Instruction.Saturate)
-                       value = si_llvm_saturate(bld_base, value);
+                       value = ac_emit_clamp(&ctx->ac, value);
 
-               lds_store(bld_base, chan_index, dw_addr, value);
+               /* Skip LDS stores if there is no LDS read of this output. */
+               if (!skip_lds_store)
+                       lds_store(bld_base, chan_index, dw_addr, value);
 
                value = LLVMBuildBitCast(gallivm->builder, value, ctx->i32, "");
                values[chan_index] = value;
@@ -1735,7 +1750,7 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
        LLVMValueRef val[4];
        unsigned spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR;
        unsigned chan;
-       bool is_int8;
+       bool is_int8, is_int10;
 
        /* Default is 0xf. Adjusted below depending on the format. */
        args[0] = lp_build_const_int32(base->gallivm, 0xf); /* writemask */
@@ -1757,6 +1772,7 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
                assert(cbuf >= 0 && cbuf < 8);
                spi_shader_col_format = (col_formats >> (cbuf * 4)) & 0xf;
                is_int8 = (key->part.ps.epilog.color_is_int8 >> cbuf) & 0x1;
+               is_int10 = (key->part.ps.epilog.color_is_int10 >> cbuf) & 0x1;
        }
 
        args[4] = uint->zero; /* COMPR flag */
@@ -1810,7 +1826,7 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
 
        case V_028714_SPI_SHADER_UNORM16_ABGR:
                for (chan = 0; chan < 4; chan++) {
-                       val[chan] = si_llvm_saturate(bld_base, values[chan]);
+                       val[chan] = ac_emit_clamp(&ctx->ac, values[chan]);
                        val[chan] = LLVMBuildFMul(builder, val[chan],
                                                  lp_build_const_float(gallivm, 65535), "");
                        val[chan] = LLVMBuildFAdd(builder, val[chan],
@@ -1856,13 +1872,17 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
                break;
 
        case V_028714_SPI_SHADER_UINT16_ABGR: {
-               LLVMValueRef max = lp_build_const_int32(gallivm, is_int8 ?
-                                                       255 : 65535);
+               LLVMValueRef max_rgb = lp_build_const_int32(gallivm,
+                       is_int8 ? 255 : is_int10 ? 1023 : 65535);
+               LLVMValueRef max_alpha =
+                       !is_int10 ? max_rgb : lp_build_const_int32(gallivm, 3);
+
                /* Clamp. */
                for (chan = 0; chan < 4; chan++) {
                        val[chan] = bitcast(bld_base, TGSI_TYPE_UNSIGNED, values[chan]);
                        val[chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_UMIN,
-                                                             val[chan], max);
+                                       val[chan],
+                                       chan == 3 ? max_alpha : max_rgb);
                }
 
                args[4] = uint->one; /* COMPR flag */
@@ -1874,19 +1894,24 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
        }
 
        case V_028714_SPI_SHADER_SINT16_ABGR: {
-               LLVMValueRef max = lp_build_const_int32(gallivm, is_int8 ?
-                                                       127 : 32767);
-               LLVMValueRef min = lp_build_const_int32(gallivm, is_int8 ?
-                                                       -128 : -32768);
+               LLVMValueRef max_rgb = lp_build_const_int32(gallivm,
+                       is_int8 ? 127 : is_int10 ? 511 : 32767);
+               LLVMValueRef min_rgb = lp_build_const_int32(gallivm,
+                       is_int8 ? -128 : is_int10 ? -512 : -32768);
+               LLVMValueRef max_alpha =
+                       !is_int10 ? max_rgb : lp_build_const_int32(gallivm, 1);
+               LLVMValueRef min_alpha =
+                       !is_int10 ? min_rgb : lp_build_const_int32(gallivm, -2);
+
                /* Clamp. */
                for (chan = 0; chan < 4; chan++) {
                        val[chan] = bitcast(bld_base, TGSI_TYPE_UNSIGNED, values[chan]);
                        val[chan] = lp_build_emit_llvm_binary(bld_base,
-                                                             TGSI_OPCODE_IMIN,
-                                                             val[chan], max);
+                                       TGSI_OPCODE_IMIN,
+                                       val[chan], chan == 3 ? max_alpha : max_rgb);
                        val[chan] = lp_build_emit_llvm_binary(bld_base,
-                                                             TGSI_OPCODE_IMAX,
-                                                             val[chan], min);
+                                       TGSI_OPCODE_IMAX,
+                                       val[chan], chan == 3 ? min_alpha : min_rgb);
                }
 
                args[4] = uint->one; /* COMPR flag */
@@ -2688,7 +2713,7 @@ static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context *bld_base)
                        for (j = 0; j < 4; j++) {
                                addr = ctx->outputs[i][j];
                                val = LLVMBuildLoad(gallivm->builder, addr, "");
-                               val = si_llvm_saturate(bld_base, val);
+                               val = ac_emit_clamp(&ctx->ac, val);
                                LLVMBuildStore(gallivm->builder, val, addr);
                        }
                }
@@ -2834,7 +2859,7 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
        /* Clamp color */
        if (ctx->shader->key.part.ps.epilog.clamp_color)
                for (i = 0; i < 4; i++)
-                       color[i] = si_llvm_saturate(bld_base, color[i]);
+                       color[i] = ac_emit_clamp(&ctx->ac, color[i]);
 
        /* Alpha to one */
        if (ctx->shader->key.part.ps.epilog.alpha_to_one)
@@ -4357,7 +4382,7 @@ static void tex_fetch_args(
                 * Z32_FLOAT, but we don't know that here.
                 */
                if (ctx->screen->b.chip_class == VI)
-                       z = si_llvm_saturate(bld_base, z);
+                       z = ac_emit_clamp(&ctx->ac, z);
 
                address[count++] = z;
        }
@@ -5266,7 +5291,7 @@ static void create_function(struct si_shader_context *ctx)
        struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
        struct gallivm_state *gallivm = bld_base->base.gallivm;
        struct si_shader *shader = ctx->shader;
-       LLVMTypeRef params[SI_NUM_PARAMS + SI_NUM_VERTEX_BUFFERS], v3i32;
+       LLVMTypeRef params[SI_NUM_PARAMS + SI_MAX_ATTRIBS], v3i32;
        LLVMTypeRef returns[16+32*4];
        unsigned i, last_sgpr, num_params, num_return_sgprs;
        unsigned num_returns = 0;
@@ -5282,7 +5307,7 @@ static void create_function(struct si_shader_context *ctx)
 
        switch (ctx->type) {
        case PIPE_SHADER_VERTEX:
-               params[SI_PARAM_VERTEX_BUFFERS] = const_array(ctx->v16i8, SI_NUM_VERTEX_BUFFERS);
+               params[SI_PARAM_VERTEX_BUFFERS] = const_array(ctx->v16i8, SI_MAX_ATTRIBS);
                params[SI_PARAM_BASE_VERTEX] = ctx->i32;
                params[SI_PARAM_START_INSTANCE] = ctx->i32;
                params[SI_PARAM_DRAWID] = ctx->i32;
@@ -6336,6 +6361,7 @@ static void si_dump_shader_key(unsigned shader, struct si_shader_key *key,
                fprintf(f, "  part.ps.prolog.bc_optimize_for_linear = %u\n", key->part.ps.prolog.bc_optimize_for_linear);
                fprintf(f, "  part.ps.epilog.spi_shader_col_format = 0x%x\n", key->part.ps.epilog.spi_shader_col_format);
                fprintf(f, "  part.ps.epilog.color_is_int8 = 0x%X\n", key->part.ps.epilog.color_is_int8);
+               fprintf(f, "  part.ps.epilog.color_is_int10 = 0x%X\n", key->part.ps.epilog.color_is_int10);
                fprintf(f, "  part.ps.epilog.last_cbuf = %u\n", key->part.ps.epilog.last_cbuf);
                fprintf(f, "  part.ps.epilog.alpha_func = %u\n", key->part.ps.epilog.alpha_func);
                fprintf(f, "  part.ps.epilog.alpha_to_one = %u\n", key->part.ps.epilog.alpha_to_one);
@@ -8240,7 +8266,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
                     struct pipe_debug_callback *debug)
 {
        struct si_shader_selector *sel = shader->selector;
-       struct si_shader *mainp = sel->main_shader_part;
+       struct si_shader *mainp = *si_get_main_shader_part(sel, &shader->key);
        int r;
 
        /* LS, ES, VS are compiled on demand if the main part hasn't been