From 6f0d955b6d5017ad76752555c1a4214c16e92622 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 11 Jan 2017 14:50:41 +0100 Subject: [PATCH] radeonsi: allocate the array of immediates dynamically MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Currently, we can store up to 256 immediates in a static array, but this is not always enough. Instead, allocate a dynamic array like what we currently do for temps. This fixes a segfault with dEQP-GLES31.functional.ssbo.layout.random.all_shared_buffer.23 No regressions found with full piglit run. Signed-off-by: Samuel Pitoiset Reviewed-by: Nicolai Hähnle Reviewed-by: Marek Olšák --- src/gallium/drivers/radeonsi/si_shader.c | 16 ++++++++-------- .../drivers/radeonsi/si_shader_internal.h | 3 +++ .../drivers/radeonsi/si_shader_tgsi_setup.c | 18 +++++++++++++----- 3 files changed, 24 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index c24d82ddaea..61c0eff1526 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -4724,7 +4724,6 @@ static void tex_fetch_args( /* add tex offsets */ if (inst->Texture.NumOffsets) { struct lp_build_context *uint_bld = &bld_base->uint_bld; - struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); const struct tgsi_texture_offset *off = inst->TexOffsets; assert(inst->Texture.NumOffsets == 1); @@ -4732,7 +4731,7 @@ static void tex_fetch_args( switch (target) { case TGSI_TEXTURE_3D: address[2] = lp_build_add(uint_bld, address[2], - bld->immediates[off->Index][off->SwizzleZ]); + ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleZ]); /* fall through */ case TGSI_TEXTURE_2D: case TGSI_TEXTURE_SHADOW2D: @@ -4742,7 +4741,7 @@ static void tex_fetch_args( case TGSI_TEXTURE_SHADOW2D_ARRAY: address[1] = lp_build_add(uint_bld, address[1], - bld->immediates[off->Index][off->SwizzleY]); + ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleY]); /* fall through */ case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: @@ -4750,7 +4749,7 @@ static void tex_fetch_args( case TGSI_TEXTURE_SHADOW1D_ARRAY: address[0] = lp_build_add(uint_bld, address[0], - bld->immediates[off->Index][off->SwizzleX]); + ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleX]); break; /* texture offsets do not apply to other texture targets */ } @@ -4770,13 +4769,12 @@ static void tex_fetch_args( /* Get the component index from src1.x for Gather4. */ if (!tgsi_is_shadow_target(target)) { - LLVMValueRef (*imms)[4] = lp_soa_context(bld_base)->immediates; LLVMValueRef comp_imm; struct tgsi_src_register src1 = inst->Src[1].Register; assert(src1.File == TGSI_FILE_IMMEDIATE); - comp_imm = imms[src1.Index][src1.SwizzleX]; + comp_imm = ctx->imms[src1.Index * TGSI_NUM_CHANNELS + src1.SwizzleX]; gather_comp = LLVMConstIntGetZExtValue(comp_imm); gather_comp = CLAMP(gather_comp, 0, 3); } @@ -5250,13 +5248,15 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action, static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { - LLVMValueRef (*imms)[4] = lp_soa_context(bld_base)->immediates; + struct si_shader_context *ctx = si_shader_context(bld_base); struct tgsi_src_register src0 = emit_data->inst->Src[0].Register; + LLVMValueRef imm; unsigned stream; assert(src0.File == TGSI_FILE_IMMEDIATE); - stream = LLVMConstIntGetZExtValue(imms[src0.Index][src0.SwizzleX]) & 0x3; + imm = ctx->imms[src0.Index * TGSI_NUM_CHANNELS + src0.SwizzleX]; + stream = LLVMConstIntGetZExtValue(imm) & 0x3; return stream; } diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 51d491ebdf6..430c1101d5a 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -89,6 +89,9 @@ struct si_shader_context { unsigned temps_count; LLVMValueRef system_values[RADEON_LLVM_MAX_SYSTEM_VALUES]; + LLVMValueRef *imms; + unsigned imms_num; + struct si_llvm_flow *flow; unsigned flow_depth; unsigned flow_depth_max; diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c index 8c8b4266e12..d86b460272b 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c @@ -677,14 +677,14 @@ LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base, if (tgsi_type_is_64bit(type)) { result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2)); result = LLVMConstInsertElement(result, - bld->immediates[reg->Register.Index][swizzle], + ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], bld_base->int_bld.zero); result = LLVMConstInsertElement(result, - bld->immediates[reg->Register.Index][swizzle + 1], + ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1], bld_base->int_bld.one); return LLVMConstBitCast(result, ctype); } else { - return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype); + return LLVMConstBitCast(ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], ctype); } } @@ -1230,11 +1230,11 @@ static void emit_immediate(struct lp_build_tgsi_context *bld_base, struct si_shader_context *ctx = si_shader_context(bld_base); for (i = 0; i < 4; ++i) { - ctx->soa.immediates[ctx->soa.num_immediates][i] = + ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] = LLVMConstInt(bld_base->uint_bld.elem_type, imm->u[i].Uint, false ); } - ctx->soa.num_immediates++; + ctx->imms_num++; } void si_llvm_context_init(struct si_shader_context *ctx, @@ -1285,6 +1285,11 @@ void si_llvm_context_init(struct si_shader_context *ctx, ctx->temp_arrays); } + if (info && info->file_max[TGSI_FILE_IMMEDIATE] >= 0) { + int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1; + ctx->imms = MALLOC(size * TGSI_NUM_CHANNELS * sizeof(LLVMValueRef)); + } + type.floating = true; type.fixed = false; type.sign = true; @@ -1418,6 +1423,9 @@ void si_llvm_dispose(struct si_shader_context *ctx) FREE(ctx->temps); ctx->temps = NULL; ctx->temps_count = 0; + FREE(ctx->imms); + ctx->imms = NULL; + ctx->imms_num = 0; FREE(ctx->flow); ctx->flow = NULL; ctx->flow_depth_max = 0; -- 2.30.2