uint64_t offset; /* sign-extended */
int64_t offset_signed;
};
- uint32_t best_align;
+ uint32_t align_mul;
+ uint32_t align_offset;
nir_instr *instr;
nir_intrinsic_instr *intrin;
return glsl_type_is_boolean(type) ? 4u : glsl_get_bit_size(type) / 8u;
}
-static int
-get_array_stride(const struct glsl_type *type)
-{
- unsigned explicit_stride = glsl_get_explicit_stride(type);
- if ((glsl_type_is_matrix(type) &&
- glsl_matrix_type_is_row_major(type)) ||
- (glsl_type_is_vector(type) && explicit_stride == 0))
- return type_scalar_size_bytes(type);
- return explicit_stride;
-}
-
static uint64_t
mask_sign_extend(uint64_t val, unsigned bit_size)
{
case nir_deref_type_ptr_as_array: {
assert(parent);
nir_ssa_def *index = deref->arr.index.ssa;
- uint32_t stride;
- if (deref->deref_type == nir_deref_type_ptr_as_array)
- stride = nir_deref_instr_ptr_as_array_stride(deref);
- else
- stride = get_array_stride(parent->type);
+ uint32_t stride = nir_deref_instr_array_stride(deref);
nir_ssa_def *base = index;
uint64_t offset = 0, base_mul = 1;
return modes;
}
+static void
+calc_alignment(struct entry *entry)
+{
+ uint32_t align_mul = 31;
+ for (unsigned i = 0; i < entry->key->offset_def_count; i++) {
+ if (entry->key->offset_defs_mul[i])
+ align_mul = MIN2(align_mul, ffsll(entry->key->offset_defs_mul[i]));
+ }
+
+ entry->align_mul = 1u << (align_mul - 1);
+ bool has_align = nir_intrinsic_infos[entry->intrin->intrinsic].index_map[NIR_INTRINSIC_ALIGN_MUL];
+ if (!has_align || entry->align_mul >= nir_intrinsic_align_mul(entry->intrin)) {
+ entry->align_offset = entry->offset % entry->align_mul;
+ } else {
+ entry->align_mul = nir_intrinsic_align_mul(entry->intrin);
+ entry->align_offset = nir_intrinsic_align_offset(entry->intrin);
+ }
+}
+
static struct entry *
create_entry(struct vectorize_ctx *ctx,
const struct intrinsic_info *info,
entry->intrin = intrin;
entry->instr = &intrin->instr;
entry->info = info;
- entry->best_align = UINT32_MAX;
entry->is_store = entry->info->value_src >= 0;
if (entry->info->deref_src >= 0) {
nir_ssa_def *base = entry->info->base_src >= 0 ?
intrin->src[entry->info->base_src].ssa : NULL;
uint64_t offset = 0;
- if (nir_intrinsic_infos[intrin->intrinsic].index_map[NIR_INTRINSIC_BASE])
+ if (nir_intrinsic_has_base(intrin))
offset += nir_intrinsic_base(intrin);
entry->key = create_entry_key_from_offset(entry, base, 1, &offset);
entry->offset = offset;
if (entry->info->resource_src >= 0)
entry->key->resource = intrin->src[entry->info->resource_src].ssa;
- if (nir_intrinsic_infos[intrin->intrinsic].index_map[NIR_INTRINSIC_ACCESS])
+ if (nir_intrinsic_has_access(intrin))
entry->access = nir_intrinsic_access(intrin);
else if (entry->key->var)
entry->access = entry->key->var->data.access;
if (get_variable_mode(entry) & restrict_modes)
entry->access |= ACCESS_RESTRICT;
+ calc_alignment(entry);
+
return entry;
}
return true;
}
-static uint64_t
-gcd(uint64_t a, uint64_t b)
-{
- while (b) {
- uint64_t old_b = b;
- b = a % b;
- a = old_b;
- }
- return a;
-}
-
-static uint32_t
-get_best_align(struct entry *entry)
-{
- if (entry->best_align != UINT32_MAX)
- return entry->best_align;
-
- uint64_t best_align = entry->offset;
- for (unsigned i = 0; i < entry->key->offset_def_count; i++) {
- if (!best_align)
- best_align = entry->key->offset_defs_mul[i];
- else if (entry->key->offset_defs_mul[i])
- best_align = gcd(best_align, entry->key->offset_defs_mul[i]);
- }
-
- if (nir_intrinsic_infos[entry->intrin->intrinsic].index_map[NIR_INTRINSIC_ALIGN_MUL])
- best_align = MAX2(best_align, nir_intrinsic_align(entry->intrin));
-
- /* ensure the result is a power of two that fits in a int32_t */
- entry->best_align = gcd(best_align, 1u << 30);
-
- return entry->best_align;
-}
-
/* Return true if "new_bit_size" is a usable bit size for a vectorized load/store
* of "low" and "high". */
static bool
if (new_bit_size / common_bit_size > NIR_MAX_VEC_COMPONENTS)
return false;
- if (!ctx->callback(get_best_align(low), new_bit_size, new_num_components,
+ uint32_t align = low->align_offset ? 1 << (ffs(low->align_offset) - 1) : low->align_mul;
+ if (!ctx->callback(align, new_bit_size, new_num_components,
high_offset, low->intrin, high->intrin))
return false;
/* avoid adding another deref to the path */
if (deref->deref_type == nir_deref_type_ptr_as_array &&
nir_src_is_const(deref->arr.index) &&
- offset % nir_deref_instr_ptr_as_array_stride(deref) == 0) {
- unsigned stride = nir_deref_instr_ptr_as_array_stride(deref);
+ offset % nir_deref_instr_array_stride(deref) == 0) {
+ unsigned stride = nir_deref_instr_array_stride(deref);
nir_ssa_def *index = nir_imm_intN_t(b, nir_src_as_int(deref->arr.index) - offset / stride,
deref->dest.ssa.bit_size);
return nir_build_deref_ptr_as_array(b, nir_deref_instr_parent(deref), index);
b, deref, nir_imm_intN_t(b, -offset, deref->dest.ssa.bit_size));
}
-static bool update_align(struct entry *entry)
-{
- bool has_align_index =
- nir_intrinsic_infos[entry->intrin->intrinsic].index_map[NIR_INTRINSIC_ALIGN_MUL];
- if (has_align_index) {
- unsigned align = get_best_align(entry);
- if (align != nir_intrinsic_align(entry->intrin)) {
- nir_intrinsic_set_align(entry->intrin, align, 0);
- return true;
- }
- }
- return false;
-}
-
static void
vectorize_loads(nir_builder *b, struct vectorize_ctx *ctx,
struct entry *low, struct entry *high,
}
/* update base/align */
- bool has_base_index =
- nir_intrinsic_infos[first->intrin->intrinsic].index_map[NIR_INTRINSIC_BASE];
-
- if (first != low && has_base_index)
+ if (first != low && nir_intrinsic_has_base(first->intrin))
nir_intrinsic_set_base(first->intrin, nir_intrinsic_base(low->intrin));
first->key = low->key;
first->offset = low->offset;
- first->best_align = get_best_align(low);
- update_align(first);
+ first->align_mul = low->align_mul;
+ first->align_offset = low->align_offset;
nir_instr_remove(second->instr);
}
}
/* update base/align */
- bool has_base_index =
- nir_intrinsic_infos[second->intrin->intrinsic].index_map[NIR_INTRINSIC_BASE];
-
- if (second != low && has_base_index)
+ if (second != low && nir_intrinsic_has_base(second->intrin))
nir_intrinsic_set_base(second->intrin, nir_intrinsic_base(low->intrin));
second->key = low->key;
second->offset = low->offset;
- second->best_align = get_best_align(low);
- update_align(second);
+ second->align_mul = low->align_mul;
+ second->align_offset = low->align_offset;
list_del(&first->head);
nir_instr_remove(first->instr);
return true;
}
+static bool
+update_align(struct entry *entry)
+{
+ if (nir_intrinsic_has_align_mul(entry->intrin) &&
+ (entry->align_mul != nir_intrinsic_align_mul(entry->intrin) ||
+ entry->align_offset != nir_intrinsic_align_offset(entry->intrin))) {
+ nir_intrinsic_set_align(entry->intrin, entry->align_mul, entry->align_offset);
+ return true;
+ }
+ return false;
+}
+
static bool
vectorize_entries(struct vectorize_ctx *ctx, nir_function_impl *impl, struct hash_table *ht)
{
struct entry *high = *util_dynarray_element(arr, struct entry *, i + 1);
uint64_t diff = high->offset_signed - low->offset_signed;
- if (diff > get_bit_size(low) / 8u * low->intrin->num_components) {
- progress |= update_align(low);
+ if (diff > get_bit_size(low) / 8u * low->intrin->num_components)
continue;
- }
struct entry *first = low->index < high->index ? low : high;
struct entry *second = low->index < high->index ? high : low;
*util_dynarray_element(arr, struct entry *, i) = NULL;
*util_dynarray_element(arr, struct entry *, i + 1) = low->is_store ? second : first;
progress = true;
- } else {
- progress |= update_align(low);
}
}
- struct entry *last = *util_dynarray_element(arr, struct entry *, i);
- progress |= update_align(last);
+ util_dynarray_foreach(arr, struct entry *, elem) {
+ if (*elem)
+ progress |= update_align(*elem);
+ }
}
_mesa_hash_table_clear(ht, delete_entry_dynarray);