radeonsi: allocate the array of immediates dynamically
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Wed, 11 Jan 2017 13:50:41 +0000 (14:50 +0100)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Fri, 13 Jan 2017 09:40:57 +0000 (10:40 +0100)
Currently, we can store up to 256 immediates in a static array,
but this is not always enough. Instead, allocate a dynamic array
like what we currently do for temps.

This fixes a segfault with
dEQP-GLES31.functional.ssbo.layout.random.all_shared_buffer.23

No regressions found with full piglit run.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader_internal.h
src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c

index c24d82ddaea8760d0333f4b81b67a16e200cbd07..61c0eff1526d8400e67d403ebf05c5fc893238ba 100644 (file)
@@ -4724,7 +4724,6 @@ static void tex_fetch_args(
                /* add tex offsets */
                if (inst->Texture.NumOffsets) {
                        struct lp_build_context *uint_bld = &bld_base->uint_bld;
-                       struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
                        const struct tgsi_texture_offset *off = inst->TexOffsets;
 
                        assert(inst->Texture.NumOffsets == 1);
@@ -4732,7 +4731,7 @@ static void tex_fetch_args(
                        switch (target) {
                        case TGSI_TEXTURE_3D:
                                address[2] = lp_build_add(uint_bld, address[2],
-                                               bld->immediates[off->Index][off->SwizzleZ]);
+                                               ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleZ]);
                                /* fall through */
                        case TGSI_TEXTURE_2D:
                        case TGSI_TEXTURE_SHADOW2D:
@@ -4742,7 +4741,7 @@ static void tex_fetch_args(
                        case TGSI_TEXTURE_SHADOW2D_ARRAY:
                                address[1] =
                                        lp_build_add(uint_bld, address[1],
-                                               bld->immediates[off->Index][off->SwizzleY]);
+                                               ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleY]);
                                /* fall through */
                        case TGSI_TEXTURE_1D:
                        case TGSI_TEXTURE_SHADOW1D:
@@ -4750,7 +4749,7 @@ static void tex_fetch_args(
                        case TGSI_TEXTURE_SHADOW1D_ARRAY:
                                address[0] =
                                        lp_build_add(uint_bld, address[0],
-                                               bld->immediates[off->Index][off->SwizzleX]);
+                                               ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleX]);
                                break;
                                /* texture offsets do not apply to other texture targets */
                        }
@@ -4770,13 +4769,12 @@ static void tex_fetch_args(
 
                /* Get the component index from src1.x for Gather4. */
                if (!tgsi_is_shadow_target(target)) {
-                       LLVMValueRef (*imms)[4] = lp_soa_context(bld_base)->immediates;
                        LLVMValueRef comp_imm;
                        struct tgsi_src_register src1 = inst->Src[1].Register;
 
                        assert(src1.File == TGSI_FILE_IMMEDIATE);
 
-                       comp_imm = imms[src1.Index][src1.SwizzleX];
+                       comp_imm = ctx->imms[src1.Index * TGSI_NUM_CHANNELS + src1.SwizzleX];
                        gather_comp = LLVMConstIntGetZExtValue(comp_imm);
                        gather_comp = CLAMP(gather_comp, 0, 3);
                }
@@ -5250,13 +5248,15 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
 static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base,
                                       struct lp_build_emit_data *emit_data)
 {
-       LLVMValueRef (*imms)[4] = lp_soa_context(bld_base)->immediates;
+       struct si_shader_context *ctx = si_shader_context(bld_base);
        struct tgsi_src_register src0 = emit_data->inst->Src[0].Register;
+       LLVMValueRef imm;
        unsigned stream;
 
        assert(src0.File == TGSI_FILE_IMMEDIATE);
 
-       stream = LLVMConstIntGetZExtValue(imms[src0.Index][src0.SwizzleX]) & 0x3;
+       imm = ctx->imms[src0.Index * TGSI_NUM_CHANNELS + src0.SwizzleX];
+       stream = LLVMConstIntGetZExtValue(imm) & 0x3;
        return stream;
 }
 
index 51d491ebdf6beae19fbcb23be7f5003a102b5385..430c1101d5adf51e4a5ac693bcb1c74b1d52af3a 100644 (file)
@@ -89,6 +89,9 @@ struct si_shader_context {
        unsigned temps_count;
        LLVMValueRef system_values[RADEON_LLVM_MAX_SYSTEM_VALUES];
 
+       LLVMValueRef *imms;
+       unsigned imms_num;
+
        struct si_llvm_flow *flow;
        unsigned flow_depth;
        unsigned flow_depth_max;
index 8c8b4266e12358ba5296b2cd5fee87a721478bd5..d86b460272bd484b8976914d7855f4164446c0e8 100644 (file)
@@ -677,14 +677,14 @@ LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
                if (tgsi_type_is_64bit(type)) {
                        result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
                        result = LLVMConstInsertElement(result,
-                                                       bld->immediates[reg->Register.Index][swizzle],
+                                                       ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle],
                                                        bld_base->int_bld.zero);
                        result = LLVMConstInsertElement(result,
-                                                       bld->immediates[reg->Register.Index][swizzle + 1],
+                                                       ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1],
                                                        bld_base->int_bld.one);
                        return LLVMConstBitCast(result, ctype);
                } else {
-                       return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
+                       return LLVMConstBitCast(ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], ctype);
                }
        }
 
@@ -1230,11 +1230,11 @@ static void emit_immediate(struct lp_build_tgsi_context *bld_base,
        struct si_shader_context *ctx = si_shader_context(bld_base);
 
        for (i = 0; i < 4; ++i) {
-               ctx->soa.immediates[ctx->soa.num_immediates][i] =
+               ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] =
                                LLVMConstInt(bld_base->uint_bld.elem_type, imm->u[i].Uint, false   );
        }
 
-       ctx->soa.num_immediates++;
+       ctx->imms_num++;
 }
 
 void si_llvm_context_init(struct si_shader_context *ctx,
@@ -1285,6 +1285,11 @@ void si_llvm_context_init(struct si_shader_context *ctx,
                                         ctx->temp_arrays);
        }
 
+       if (info && info->file_max[TGSI_FILE_IMMEDIATE] >= 0) {
+               int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1;
+               ctx->imms = MALLOC(size * TGSI_NUM_CHANNELS * sizeof(LLVMValueRef));
+       }
+
        type.floating = true;
        type.fixed = false;
        type.sign = true;
@@ -1418,6 +1423,9 @@ void si_llvm_dispose(struct si_shader_context *ctx)
        FREE(ctx->temps);
        ctx->temps = NULL;
        ctx->temps_count = 0;
+       FREE(ctx->imms);
+       ctx->imms = NULL;
+       ctx->imms_num = 0;
        FREE(ctx->flow);
        ctx->flow = NULL;
        ctx->flow_depth_max = 0;