}
}
+static bool
+brw_nir_should_vectorize_mem(unsigned align, unsigned bit_size,
+ unsigned num_components, unsigned high_offset,
+ nir_intrinsic_instr *low,
+ nir_intrinsic_instr *high)
+{
+ /* Don't combine things to generate 64-bit loads/stores. We have to split
+ * those back into 32-bit ones anyway and UBO loads aren't split in NIR so
+ * we don't want to make a mess for the back-end.
+ */
+ if (bit_size > 32)
+ return false;
+
+ /* We can handle at most a vec4 right now. Anything bigger would get
+ * immediately split by brw_nir_lower_mem_access_bit_sizes anyway.
+ */
+ if (num_components > 4)
+ return false;
+
+ if (align < bit_size / 8)
+ return false;
+
+ return true;
+}
+
static
bool combine_all_barriers(nir_intrinsic_instr *a,
nir_intrinsic_instr *b,
return true;
}
+static void
+brw_vectorize_lower_mem_access(nir_shader *nir,
+ const struct brw_compiler *compiler,
+ bool is_scalar)
+{
+ const struct gen_device_info *devinfo = compiler->devinfo;
+ bool progress = false;
+
+ if (is_scalar) {
+ OPT(nir_opt_load_store_vectorize,
+ nir_var_mem_ubo | nir_var_mem_ssbo |
+ nir_var_mem_global | nir_var_mem_shared,
+ brw_nir_should_vectorize_mem);
+ }
+
+ OPT(brw_nir_lower_mem_access_bit_sizes, devinfo);
+
+ while (progress) {
+ progress = false;
+
+ OPT(nir_lower_pack);
+ OPT(nir_copy_prop);
+ OPT(nir_opt_dce);
+ OPT(nir_opt_cse);
+ OPT(nir_opt_algebraic);
+ OPT(nir_opt_constant_folding);
+ }
+}
+
/* Prepare the given shader for codegen
*
* This function is intended to be called right before going into the actual
brw_nir_optimize(nir, compiler, is_scalar, false);
- if (OPT(brw_nir_lower_mem_access_bit_sizes, devinfo)) {
- do {
- progress = false;
- OPT(nir_lower_pack);
- OPT(nir_copy_prop);
- OPT(nir_opt_dce);
- OPT(nir_opt_cse);
- OPT(nir_opt_algebraic);
- OPT(nir_opt_constant_folding);
- } while (progress);
- }
+ brw_vectorize_lower_mem_access(nir, compiler, is_scalar);
if (OPT(nir_lower_int64, nir->options->lower_int64_options))
brw_nir_optimize(nir, compiler, is_scalar, false);