From 8916d1e2fae61c532e1e2013f0f76122ed1916b7 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Nicolai=20H=C3=A4hnle?= Date: Mon, 8 Aug 2016 22:31:02 +0200 Subject: [PATCH] gallium/radeon: reduce alloca of temporaries based on usagemask MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit v2: take actual writemasks into account Reviewed-by: Marek Olšák --- src/gallium/drivers/radeon/radeon_llvm.h | 2 + .../drivers/radeon/radeon_setup_tgsi_llvm.c | 62 ++++++++++++++++--- 2 files changed, 54 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h index 4ed2c97936f..0276ef3e02a 100644 --- a/src/gallium/drivers/radeon/radeon_llvm.h +++ b/src/gallium/drivers/radeon/radeon_llvm.h @@ -99,6 +99,8 @@ struct radeon_llvm_context { struct tgsi_array_info *temp_arrays; LLVMValueRef *temp_array_allocas; + LLVMValueRef undef_alloca; + LLVMValueRef main_fn; LLVMTypeRef return_type; diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index dac0594cfbd..dd7d60b136c 100644 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c @@ -185,6 +185,7 @@ get_pointer_into_array(struct radeon_llvm_context *ctx, const struct tgsi_ind_register *reg_indirect) { unsigned array_id; + struct tgsi_array_info *array; struct gallivm_state *gallivm = ctx->soa.bld_base.base.gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef idxs[2]; @@ -202,10 +203,23 @@ get_pointer_into_array(struct radeon_llvm_context *ctx, if (!alloca) return NULL; + array = &ctx->temp_arrays[array_id - 1]; + + if (!(array->writemask & (1 << swizzle))) + return ctx->undef_alloca; + index = emit_array_index(&ctx->soa, reg_indirect, reg_index - ctx->temp_arrays[array_id - 1].range.First); - index = LLVMBuildMul(builder, index, lp_build_const_int32(gallivm, TGSI_NUM_CHANNELS), ""); - index = LLVMBuildAdd(builder, index, lp_build_const_int32(gallivm, swizzle), ""); + index = LLVMBuildMul( + builder, index, + lp_build_const_int32(gallivm, util_bitcount(array->writemask)), + ""); + index = LLVMBuildAdd( + builder, index, + lp_build_const_int32( + gallivm, + util_bitcount(array->writemask & ((1 << swizzle) - 1))), + ""); idxs[0] = ctx->soa.bld_base.uint_bld.zero; idxs[1] = index; return LLVMBuildGEP(builder, alloca, idxs, 2, ""); @@ -479,11 +493,18 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base, char name[16] = ""; LLVMValueRef array_alloca = NULL; unsigned decl_size; + unsigned writemask = decl->Declaration.UsageMask; first = decl->Range.First; last = decl->Range.Last; decl_size = 4 * ((last - first) + 1); + if (decl->Declaration.Array) { unsigned id = decl->Array.ArrayID - 1; + unsigned array_size; + + writemask &= ctx->temp_arrays[id].writemask; + ctx->temp_arrays[id].writemask = writemask; + array_size = ((last - first) + 1) * util_bitcount(writemask); /* If the array has more than 16 elements, store it * in memory using an alloca that spans the entire @@ -491,7 +512,8 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base, * * Otherwise, store each array element individually. * We will then generate vectors (per-channel, up to - * <4 x float>) for indirect addressing. + * <16 x float> if the usagemask is a single bit) for + * indirect addressing. * * Note that 16 is the number of vector elements that * LLVM will store in a register, so theoretically an @@ -503,10 +525,10 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base, * code path for arrays. LLVM should be smart enough to * promote allocas into registers when profitable. */ - if (decl_size > 16) { + if (array_size > 16) { array_alloca = LLVMBuildAlloca(builder, LLVMArrayType(bld_base->base.vec_type, - decl_size), "array"); + array_size), "array"); ctx->temp_array_allocas[id] = array_alloca; } } @@ -531,14 +553,34 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base, bld_base->uint_bld.zero, NULL }; + unsigned j = 0; + + if (writemask != TGSI_WRITEMASK_XYZW && + !ctx->undef_alloca) { + /* Create a dummy alloca. We use it so that we + * have a pointer that is safe to load from if + * a shader ever reads from a channel that + * it never writes to. + */ + ctx->undef_alloca = si_build_alloca_undef( + bld_base->base.gallivm, + bld_base->base.vec_type, "undef"); + } + for (i = 0; i < decl_size; ++i) { + LLVMValueRef ptr; + if (writemask & (1 << (i % 4))) { #ifdef DEBUG - snprintf(name, sizeof(name), "TEMP%d.%c", - first + i / 4, "xyzw"[i % 4]); + snprintf(name, sizeof(name), "TEMP%d.%c", + first + i / 4, "xyzw"[i % 4]); #endif - idxs[1] = lp_build_const_int32(bld_base->base.gallivm, i); - ctx->temps[first * TGSI_NUM_CHANNELS + i] = - LLVMBuildGEP(builder, array_alloca, idxs, 2, name); + idxs[1] = lp_build_const_int32(bld_base->base.gallivm, j); + ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name); + j++; + } else { + ptr = ctx->undef_alloca; + } + ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr; } } break; -- 2.30.2