aco: use nir_addition_might_overflow to combine additions into SMEM
authorRhys Perry <pendingchaos02@gmail.com>
Tue, 12 Nov 2019 17:51:34 +0000 (17:51 +0000)
committerMarge Bot <eric+marge@anholt.net>
Tue, 21 Jul 2020 18:25:35 +0000 (18:25 +0000)
fossil-db (Navi):
Totals from 24656 (18.14% of 135946) affected shaders:
CodeSize: 120077160 -> 118877304 (-1.00%); split: -1.01%, +0.01%
Instrs: 23192657 -> 22979553 (-0.92%); split: -0.94%, +0.02%
VMEM: 165151115 -> 151861460 (-8.05%); split: +0.14%, -8.19%
SMEM: 18133265 -> 16709635 (-7.85%); split: +0.28%, -8.13%
VClause: 385011 -> 384447 (-0.15%); split: -0.16%, +0.02%
SClause: 954884 -> 838266 (-12.21%); split: -12.34%, +0.12%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2720>

src/amd/compiler/aco_instruction_selection_setup.cpp

index 72b4f38b826d56c890aeaa17e80f54744d44b0ff..6de954bcdb235dbb1fe440e5baccafdf81434d01 100644 (file)
@@ -426,6 +426,124 @@ void fill_desc_set_info(isel_context *ctx, nir_function_impl *impl)
    }
 }
 
+void apply_nuw_to_ssa(nir_shader *shader, struct hash_table *range_ht, nir_ssa_def *ssa,
+                      const nir_unsigned_upper_bound_config *config)
+{
+   nir_ssa_scalar scalar;
+   scalar.def = ssa;
+   scalar.comp = 0;
+
+   if (!nir_ssa_scalar_is_alu(scalar) || nir_ssa_scalar_alu_op(scalar) != nir_op_iadd)
+      return;
+
+   nir_alu_instr *add = nir_instr_as_alu(ssa->parent_instr);
+
+   if (add->no_unsigned_wrap)
+      return;
+
+   nir_ssa_scalar src0 = nir_ssa_scalar_chase_alu_src(scalar, 0);
+   nir_ssa_scalar src1 = nir_ssa_scalar_chase_alu_src(scalar, 1);
+
+   if (nir_ssa_scalar_is_const(src0)) {
+      nir_ssa_scalar tmp = src0;
+      src0 = src1;
+      src1 = tmp;
+   }
+
+   uint32_t src1_ub = nir_unsigned_upper_bound(shader, range_ht, src1, config);
+   add->no_unsigned_wrap = !nir_addition_might_overflow(shader, range_ht, src0, src1_ub, config);
+}
+
+void apply_nuw_to_offsets(isel_context *ctx, nir_function_impl *impl)
+{
+   nir_unsigned_upper_bound_config config;
+   config.min_subgroup_size = 64;
+   config.max_subgroup_size = 64;
+   if (ctx->shader->info.stage == MESA_SHADER_COMPUTE && ctx->options->key.cs.subgroup_size) {
+      config.min_subgroup_size = ctx->options->key.cs.subgroup_size;
+      config.max_subgroup_size = ctx->options->key.cs.subgroup_size;
+   }
+   config.max_work_group_invocations = 2048;
+   config.max_work_group_count[0] = 65535;
+   config.max_work_group_count[1] = 65535;
+   config.max_work_group_count[2] = 65535;
+   config.max_work_group_size[0] = 2048;
+   config.max_work_group_size[1] = 2048;
+   config.max_work_group_size[2] = 2048;
+   for (unsigned i = 0; i < MAX_VERTEX_ATTRIBS; i++) {
+      unsigned attrib_format = ctx->options->key.vs.vertex_attribute_formats[i];
+      unsigned dfmt = attrib_format & 0xf;
+      unsigned nfmt = (attrib_format >> 4) & 0x7;
+
+      uint32_t max = UINT32_MAX;
+      if (nfmt == V_008F0C_BUF_NUM_FORMAT_UNORM) {
+         max = 0x3f800000u;
+      } else if (nfmt == V_008F0C_BUF_NUM_FORMAT_UINT ||
+                 nfmt == V_008F0C_BUF_NUM_FORMAT_USCALED) {
+         bool uscaled = nfmt == V_008F0C_BUF_NUM_FORMAT_USCALED;
+         switch (dfmt) {
+         case V_008F0C_BUF_DATA_FORMAT_8:
+         case V_008F0C_BUF_DATA_FORMAT_8_8:
+         case V_008F0C_BUF_DATA_FORMAT_8_8_8_8:
+            max = uscaled ? 0x437f0000u : UINT8_MAX;
+            break;
+         case V_008F0C_BUF_DATA_FORMAT_10_10_10_2:
+         case V_008F0C_BUF_DATA_FORMAT_2_10_10_10:
+            max = uscaled ? 0x447fc000u : 1023;
+            break;
+         case V_008F0C_BUF_DATA_FORMAT_10_11_11:
+         case V_008F0C_BUF_DATA_FORMAT_11_11_10:
+            max = uscaled ? 0x44ffe000u : 2047;
+            break;
+         case V_008F0C_BUF_DATA_FORMAT_16:
+         case V_008F0C_BUF_DATA_FORMAT_16_16:
+         case V_008F0C_BUF_DATA_FORMAT_16_16_16_16:
+            max = uscaled ? 0x477fff00u : UINT16_MAX;
+            break;
+         case V_008F0C_BUF_DATA_FORMAT_32:
+         case V_008F0C_BUF_DATA_FORMAT_32_32:
+         case V_008F0C_BUF_DATA_FORMAT_32_32_32:
+         case V_008F0C_BUF_DATA_FORMAT_32_32_32_32:
+            max = uscaled ? 0x4f800000u : UINT32_MAX;
+            break;
+         }
+      }
+      config.vertex_attrib_max[i] = max;
+   }
+
+   struct hash_table *range_ht = _mesa_pointer_hash_table_create(NULL);
+
+   nir_foreach_block(block, impl) {
+      nir_foreach_instr(instr, block) {
+         if (instr->type != nir_instr_type_intrinsic)
+            continue;
+         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+         switch (intrin->intrinsic) {
+         case nir_intrinsic_load_constant:
+         case nir_intrinsic_load_uniform:
+         case nir_intrinsic_load_push_constant:
+            if (!nir_src_is_divergent(intrin->src[0]))
+               apply_nuw_to_ssa(ctx->shader, range_ht, intrin->src[0].ssa, &config);
+            break;
+         case nir_intrinsic_load_ubo:
+         case nir_intrinsic_load_ssbo:
+            if (!nir_src_is_divergent(intrin->src[1]))
+               apply_nuw_to_ssa(ctx->shader, range_ht, intrin->src[1].ssa, &config);
+            break;
+         case nir_intrinsic_store_ssbo:
+            if (!nir_src_is_divergent(intrin->src[2]))
+               apply_nuw_to_ssa(ctx->shader, range_ht, intrin->src[2].ssa, &config);
+            break;
+         default:
+            break;
+         }
+      }
+   }
+
+   _mesa_hash_table_destroy(range_ht, NULL);
+}
+
 RegClass get_reg_class(isel_context *ctx, RegType type, unsigned components, unsigned bitsize)
 {
    if (bitsize == 1)
@@ -444,6 +562,8 @@ void init_context(isel_context *ctx, nir_shader *shader)
 
    fill_desc_set_info(ctx, impl);
 
+   apply_nuw_to_offsets(ctx, impl);
+
    /* sanitize control flow */
    nir_metadata_require(impl, nir_metadata_dominance);
    sanitize_cf_list(impl, &impl->body);