ac/nir: Set speculatable for buffer loads where allowed
authorConnor Abbott <cwabbott0@gmail.com>
Tue, 4 Jun 2019 12:42:54 +0000 (14:42 +0200)
committerConnor Abbott <cwabbott0@gmail.com>
Wed, 19 Jun 2019 12:08:28 +0000 (14:08 +0200)
This brings the nir path in line with the TGSI path.

Totals from affected shaders:
SGPRS: 2984 -> 2984 (0.00 %)
VGPRS: 2792 -> 2652 (-5.01 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 247380 -> 248072 (0.28 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 121 -> 132 (9.09 %)
Wait states: 0 -> 0 (0.00 %)

Most of the change came from DiRT: Showdown, and came from sinking SSBO
loads.

Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com>
src/amd/common/ac_nir_to_llvm.c

index b73a0f599f1cb69de655d14ed284658aea3c596a..dffaeedfbb42b5c5e81d0ca4f0f65a647a956086 100644 (file)
@@ -1779,11 +1779,12 @@ static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
                                                         cache_policy & ac_glc);
                } else {
                        int num_channels = util_next_power_of_two(load_bytes) / 4;
+                       bool can_speculate = access & ACCESS_CAN_REORDER;
 
                        ret = ac_build_buffer_load(&ctx->ac, rsrc, num_channels,
                                                   vindex, offset, immoffset, 0,
                                                   cache_policy & ac_glc, 0,
-                                                  false, false);
+                                                  can_speculate, false);
                }
 
                LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, ac_get_type_size(LLVMTypeOf(ret)));
@@ -2489,11 +2490,11 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
                vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
                                                 ctx->ac.i32_0, "");
 
-               /* TODO: set "can_speculate" when OpenGL needs it. */
+               bool can_speculate = access & ACCESS_CAN_REORDER;
                res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex,
                                                  ctx->ac.i32_0, num_channels,
                                                  !!(args.cache_policy & ac_glc),
-                                                 false);
+                                                 can_speculate);
                res = ac_build_expand_to_vec4(&ctx->ac, res, num_channels);
 
                res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components);