radeonsi: initialize output, temp, and address registers to "undef"
authorMarek Olšák <marek.olsak@amd.com>
Sat, 10 Oct 2015 00:40:20 +0000 (02:40 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Sat, 17 Oct 2015 19:40:03 +0000 (21:40 +0200)
This removes "v_mov v0, 0" which typically occurs before exports.

Totals:
SGPRS: 345216 -> 344552 (-0.19 %)
VGPRS: 197684 -> 197132 (-0.28 %)
Code Size: 7390408 -> 7375376 (-0.20 %) bytes
LDS: 91 -> 91 (0.00 %) blocks
Scratch: 1842176 -> 1679360 (-8.84 %) bytes per wave

Totals from affected shaders:
SGPRS: 101336 -> 100672 (-0.66 %)
VGPRS: 53920 -> 53368 (-1.02 %)
Code Size: 2170176 -> 2155144 (-0.69 %) bytes
LDS: 2 -> 2 (0.00 %) blocks
Scratch: 1015808 -> 852992 (-16.03 %) bytes per wave

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Tom Stellard <thomas.stellard@amd.com>
src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c

index 2e9a01356476aec7cdb96df7cd1f3cc9c6e35048..f548d1ac36f862a09c0f1e3962cf23fbe916ee6b 100644 (file)
@@ -272,6 +272,15 @@ static LLVMValueRef fetch_system_value(
        return bitcast(bld_base, type, cval);
 }
 
+static LLVMValueRef si_build_alloca_undef(struct gallivm_state *gallivm,
+                                         LLVMTypeRef type,
+                                         const char *name)
+{
+       LLVMValueRef ptr = lp_build_alloca(gallivm, type, name);
+       LLVMBuildStore(gallivm->builder, LLVMGetUndef(type), ptr);
+       return ptr;
+}
+
 static void emit_declaration(
        struct lp_build_tgsi_context * bld_base,
        const struct tgsi_full_declaration *decl)
@@ -285,7 +294,7 @@ static void emit_declaration(
                for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
                        unsigned chan;
                        for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
-                                ctx->soa.addr[idx][chan] = lp_build_alloca(
+                                ctx->soa.addr[idx][chan] = si_build_alloca_undef(
                                        &ctx->gallivm,
                                        ctx->soa.bld_base.uint_bld.elem_type, "");
                        }
@@ -315,8 +324,9 @@ static void emit_declaration(
                for (idx = first; idx <= last; idx++) {
                        for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
                                ctx->temps[idx * TGSI_NUM_CHANNELS + i] =
-                                       lp_build_alloca(bld_base->base.gallivm, bld_base->base.vec_type,
-                                               "temp");
+                                       si_build_alloca_undef(bld_base->base.gallivm,
+                                                             bld_base->base.vec_type,
+                                                             "temp");
                        }
                }
                break;
@@ -347,7 +357,8 @@ static void emit_declaration(
                        unsigned chan;
                        assert(idx < RADEON_LLVM_MAX_OUTPUTS);
                        for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
-                               ctx->soa.outputs[idx][chan] = lp_build_alloca(&ctx->gallivm,
+                               ctx->soa.outputs[idx][chan] = si_build_alloca_undef(
+                                       &ctx->gallivm,
                                        ctx->soa.bld_base.base.elem_type, "");
                        }
                }