From 69ee3f431f9f1bb782485ede992b95e01ad790a5 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 4 Feb 2014 19:28:58 -0500 Subject: [PATCH] gallivm: handle huge number of immediates We only supported up to 256 immediates, which isn't enough. We had code which was allocating immediates as an allocated array, but it was always used along a statically backed array for performance reasons. This commit adds code to skip that performance optimization and always use just the dynamically allocated immediates if the number of them is too great. Signed-off-by: Zack Rusin Reviewed-by: Jose Fonseca Reviewed-by: Brian Paul Reviewed-by: Roland Scheidegger --- src/gallium/auxiliary/gallivm/lp_bld_limits.h | 10 +- src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 6 +- .../auxiliary/gallivm/lp_bld_tgsi_aos.c | 2 +- .../auxiliary/gallivm/lp_bld_tgsi_soa.c | 112 ++++++++++++------ 4 files changed, 86 insertions(+), 44 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h b/src/gallium/auxiliary/gallivm/lp_bld_limits.h index e03bac640df..87be3511d94 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_limits.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h @@ -47,19 +47,21 @@ #define LP_MAX_TGSI_ADDRS 16 -#define LP_MAX_TGSI_IMMEDIATES 256 +#define LP_MAX_TGSI_IMMEDIATES 4096 #define LP_MAX_TGSI_PREDS 16 #define LP_MAX_TGSI_CONST_BUFFERS 16 /* - * For quick access we cache temps in a statically - * allocated array. This defines the maximum size - * of that array. + * For quick access we cache registers in statically + * allocated arrays. Here we define the maximum size + * for those arrays. */ #define LP_MAX_INLINED_TEMPS 256 +#define LP_MAX_INLINED_IMMEDIATES 256 + /** * Maximum control flow nesting * diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index e0a7c5dc1ab..ffd6e874a89 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -444,7 +444,7 @@ struct lp_build_tgsi_soa_context struct tgsi_declaration_sampler_view sv[PIPE_MAX_SHADER_SAMPLER_VIEWS]; - LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][TGSI_NUM_CHANNELS]; + LLVMValueRef immediates[LP_MAX_INLINED_IMMEDIATES][TGSI_NUM_CHANNELS]; LLVMValueRef temps[LP_MAX_INLINED_TEMPS][TGSI_NUM_CHANNELS]; LLVMValueRef addr[LP_MAX_TGSI_ADDRS][TGSI_NUM_CHANNELS]; LLVMValueRef preds[LP_MAX_TGSI_PREDS][TGSI_NUM_CHANNELS]; @@ -482,7 +482,7 @@ struct lp_build_tgsi_soa_context struct lp_exec_mask exec_mask; uint num_immediates; - + boolean use_immediates_array; }; void @@ -536,7 +536,7 @@ struct lp_build_tgsi_aos_context struct lp_build_sampler_aos *sampler; - LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES]; + LLVMValueRef immediates[LP_MAX_INLINED_IMMEDIATES]; LLVMValueRef temps[LP_MAX_INLINED_TEMPS]; LLVMValueRef addr[LP_MAX_TGSI_ADDRS]; LLVMValueRef preds[LP_MAX_TGSI_PREDS]; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c index fd5df0eb52f..4dee9bb4dd4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c @@ -1042,7 +1042,7 @@ lp_build_tgsi_aos(struct gallivm_state *gallivm, const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; float imm[4]; assert(size <= 4); - assert(num_immediates < LP_MAX_TGSI_IMMEDIATES); + assert(num_immediates < LP_MAX_INLINED_IMMEDIATES); for (chan = 0; chan < 4; ++chan) { imm[chan] = 0.0f; } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 3ba20314203..d2cb0a0975f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -1295,33 +1295,42 @@ emit_fetch_immediate( LLVMBuilderRef builder = gallivm->builder; LLVMValueRef res = NULL; - if (reg->Register.Indirect) { - LLVMValueRef indirect_index; - LLVMValueRef index_vec; /* index into the immediate register array */ + if (bld->use_immediates_array || reg->Register.Indirect) { LLVMValueRef imms_array; LLVMTypeRef fptr_type; - indirect_index = get_indirect_index(bld, - reg->Register.File, - reg->Register.Index, - ®->Indirect); - /* - * Unlike for other reg classes, adding pixel offsets is unnecessary - - * immediates are stored as full vectors (FIXME??? - might be better - * to store them the same as constants) but all elements are the same - * in any case. - */ - index_vec = get_soa_array_offsets(&bld_base->uint_bld, - indirect_index, - swizzle, - FALSE); - /* cast imms_array pointer to float* */ fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, ""); - /* Gather values from the immediate register array */ - res = build_gather(&bld_base->base, imms_array, index_vec, NULL); + if (reg->Register.Indirect) { + LLVMValueRef indirect_index; + LLVMValueRef index_vec; /* index into the immediate register array */ + + indirect_index = get_indirect_index(bld, + reg->Register.File, + reg->Register.Index, + ®->Indirect); + /* + * Unlike for other reg classes, adding pixel offsets is unnecessary - + * immediates are stored as full vectors (FIXME??? - might be better + * to store them the same as constants) but all elements are the same + * in any case. + */ + index_vec = get_soa_array_offsets(&bld_base->uint_bld, + indirect_index, + swizzle, + FALSE); + + /* Gather values from the immediate register array */ + res = build_gather(&bld_base->base, imms_array, index_vec, NULL); + } else { + LLVMValueRef lindex = lp_build_const_int32(gallivm, + reg->Register.Index * 4 + swizzle); + LLVMValueRef imms_ptr = LLVMBuildGEP(builder, + bld->imms_array, &lindex, 1, ""); + res = LLVMBuildLoad(builder, imms_ptr, ""); + } } else { res = bld->immediates[reg->Register.Index][swizzle]; @@ -2728,51 +2737,71 @@ void lp_emit_immediate_soa( { struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); struct gallivm_state * gallivm = bld_base->base.gallivm; - - /* simply copy the immediate values into the next immediates[] slot */ + LLVMValueRef imms[4]; unsigned i; const uint size = imm->Immediate.NrTokens - 1; assert(size <= 4); - assert(bld->num_immediates < LP_MAX_TGSI_IMMEDIATES); switch (imm->Immediate.DataType) { case TGSI_IMM_FLOAT32: for( i = 0; i < size; ++i ) - bld->immediates[bld->num_immediates][i] = - lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float); + imms[i] = + lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float); break; case TGSI_IMM_UINT32: for( i = 0; i < size; ++i ) { LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint); - bld->immediates[bld->num_immediates][i] = - LLVMConstBitCast(tmp, bld_base->base.vec_type); + imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type); } break; case TGSI_IMM_INT32: for( i = 0; i < size; ++i ) { LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int); - bld->immediates[bld->num_immediates][i] = - LLVMConstBitCast(tmp, bld_base->base.vec_type); + imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type); } - + break; } for( i = size; i < 4; ++i ) - bld->immediates[bld->num_immediates][i] = bld_base->base.undef; + imms[i] = bld_base->base.undef; - if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) { + if (bld->use_immediates_array) { unsigned index = bld->num_immediates; struct gallivm_state *gallivm = bld->bld_base.base.gallivm; LLVMBuilderRef builder = gallivm->builder; + + assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)); for (i = 0; i < 4; ++i ) { LLVMValueRef lindex = lp_build_const_int32( - bld->bld_base.base.gallivm, index * 4 + i); + bld->bld_base.base.gallivm, index * 4 + i); LLVMValueRef imm_ptr = LLVMBuildGEP(builder, bld->imms_array, &lindex, 1, ""); - LLVMBuildStore(builder, - bld->immediates[index][i], - imm_ptr); + LLVMBuildStore(builder, imms[i], imm_ptr); + } + } else { + /* simply copy the immediate values into the next immediates[] slot */ + unsigned i; + const uint size = imm->Immediate.NrTokens - 1; + assert(size <= 4); + assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES); + + for(i = 0; i < 4; ++i ) + bld->immediates[bld->num_immediates][i] = imms[i]; + + if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) { + unsigned index = bld->num_immediates; + struct gallivm_state *gallivm = bld->bld_base.base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + for (i = 0; i < 4; ++i ) { + LLVMValueRef lindex = lp_build_const_int32( + bld->bld_base.base.gallivm, index * 4 + i); + LLVMValueRef imm_ptr = LLVMBuildGEP(builder, + bld->imms_array, &lindex, 1, ""); + LLVMBuildStore(builder, + bld->immediates[index][i], + imm_ptr); + } } } @@ -3629,6 +3658,17 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, if (info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) { bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY); } + /* + * For performance reason immediates are always backed in a static + * array, but if their number is too great, we have to use just + * a dynamically allocated array. + */ + bld.use_immediates_array = + (info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES); + if (bld.use_immediates_array) { + bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE); + } + bld.bld_base.soa = TRUE; bld.bld_base.emit_debug = emit_debug; -- 2.30.2