From: Connor Abbott Date: Tue, 4 Jun 2019 12:42:54 +0000 (+0200) Subject: ac/nir: Set speculatable for buffer loads where allowed X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=53a7649e5de22d6d30dc43a129d60efdc5f3c7be;p=mesa.git ac/nir: Set speculatable for buffer loads where allowed This brings the nir path in line with the TGSI path. Totals from affected shaders: SGPRS: 2984 -> 2984 (0.00 %) VGPRS: 2792 -> 2652 (-5.01 %) Spilled SGPRs: 0 -> 0 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Private memory VGPRs: 0 -> 0 (0.00 %) Scratch size: 0 -> 0 (0.00 %) dwords per thread Code Size: 247380 -> 248072 (0.28 %) bytes LDS: 0 -> 0 (0.00 %) blocks Max Waves: 121 -> 132 (9.09 %) Wait states: 0 -> 0 (0.00 %) Most of the change came from DiRT: Showdown, and came from sinking SSBO loads. Reviewed-by: Timothy Arceri --- diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index b73a0f599f1..dffaeedfbb4 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -1779,11 +1779,12 @@ static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx, cache_policy & ac_glc); } else { int num_channels = util_next_power_of_two(load_bytes) / 4; + bool can_speculate = access & ACCESS_CAN_REORDER; ret = ac_build_buffer_load(&ctx->ac, rsrc, num_channels, vindex, offset, immoffset, 0, cache_policy & ac_glc, 0, - false, false); + can_speculate, false); } LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, ac_get_type_size(LLVMTypeOf(ret))); @@ -2489,11 +2490,11 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), ctx->ac.i32_0, ""); - /* TODO: set "can_speculate" when OpenGL needs it. */ + bool can_speculate = access & ACCESS_CAN_REORDER; res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex, ctx->ac.i32_0, num_channels, !!(args.cache_policy & ac_glc), - false); + can_speculate); res = ac_build_expand_to_vec4(&ctx->ac, res, num_channels); res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components);