STORE(0, deref, -1, -1, 0, 1)
LOAD(nir_var_mem_shared, shared, -1, 0, -1)
STORE(nir_var_mem_shared, shared, -1, 1, -1, 0)
+ LOAD(nir_var_mem_global, global, -1, 0, -1)
+ STORE(nir_var_mem_global, global, -1, 1, -1, 0)
ATOMIC(nir_var_mem_ssbo, ssbo, add, 0, 1, -1, 2)
ATOMIC(nir_var_mem_ssbo, ssbo, imin, 0, 1, -1, 2)
ATOMIC(nir_var_mem_ssbo, ssbo, umin, 0, 1, -1, 2)
ATOMIC(nir_var_mem_shared, shared, fmin, -1, 0, -1, 1)
ATOMIC(nir_var_mem_shared, shared, fmax, -1, 0, -1, 1)
ATOMIC(nir_var_mem_shared, shared, fcomp_swap, -1, 0, -1, 1)
+ ATOMIC(nir_var_mem_global, global, add, -1, 0, -1, 1)
+ ATOMIC(nir_var_mem_global, global, imin, -1, 0, -1, 1)
+ ATOMIC(nir_var_mem_global, global, umin, -1, 0, -1, 1)
+ ATOMIC(nir_var_mem_global, global, imax, -1, 0, -1, 1)
+ ATOMIC(nir_var_mem_global, global, umax, -1, 0, -1, 1)
+ ATOMIC(nir_var_mem_global, global, and, -1, 0, -1, 1)
+ ATOMIC(nir_var_mem_global, global, or, -1, 0, -1, 1)
+ ATOMIC(nir_var_mem_global, global, xor, -1, 0, -1, 1)
+ ATOMIC(nir_var_mem_global, global, exchange, -1, 0, -1, 1)
+ ATOMIC(nir_var_mem_global, global, comp_swap, -1, 0, -1, 1)
+ ATOMIC(nir_var_mem_global, global, fadd, -1, 0, -1, 1)
+ ATOMIC(nir_var_mem_global, global, fmin, -1, 0, -1, 1)
+ ATOMIC(nir_var_mem_global, global, fmax, -1, 0, -1, 1)
+ ATOMIC(nir_var_mem_global, global, fcomp_swap, -1, 0, -1, 1)
default:
break;
#undef ATOMIC
struct vectorize_ctx {
nir_variable_mode modes;
nir_should_vectorize_mem_func callback;
+ nir_variable_mode robust_modes;
struct list_head entries[nir_num_variable_modes];
struct hash_table *loads[nir_num_variable_modes];
struct hash_table *stores[nir_num_variable_modes];
* the order of the hash table walk is deterministic */
struct entry_key *key = (struct entry_key*)key_;
- uint32_t hash = _mesa_fnv32_1a_offset_bias;
+ uint32_t hash = 0;
if (key->resource)
- hash = _mesa_fnv32_1a_accumulate(hash, key->resource->index);
+ hash = XXH32(&key->resource->index, sizeof(key->resource->index), hash);
if (key->var) {
- hash = _mesa_fnv32_1a_accumulate(hash, key->var->index);
+ hash = XXH32(&key->var->index, sizeof(key->var->index), hash);
unsigned mode = key->var->data.mode;
- hash = _mesa_fnv32_1a_accumulate(hash, mode);
+ hash = XXH32(&mode, sizeof(mode), hash);
}
for (unsigned i = 0; i < key->offset_def_count; i++)
- hash = _mesa_fnv32_1a_accumulate(hash, key->offset_defs[i]->index);
+ hash = XXH32(&key->offset_defs[i]->index, sizeof(key->offset_defs[i]->index), hash);
- hash = _mesa_fnv32_1a_accumulate_block(
- hash, key->offset_defs_mul, key->offset_def_count * sizeof(uint64_t));
+ hash = XXH32(key->offset_defs_mul, key->offset_def_count * sizeof(uint64_t), hash);
return hash;
}
return entry->deref->mode;
}
+static unsigned
+mode_to_index(nir_variable_mode mode)
+{
+ assert(util_bitcount(mode) == 1);
+
+ /* Globals and SSBOs should be tracked together */
+ if (mode == nir_var_mem_global)
+ mode = nir_var_mem_ssbo;
+
+ return ffs(mode) - 1;
+}
+
+static nir_variable_mode
+aliasing_modes(nir_variable_mode modes)
+{
+ /* Global and SSBO can alias */
+ if (modes & (nir_var_mem_ssbo | nir_var_mem_global))
+ modes |= nir_var_mem_ssbo | nir_var_mem_global;
+ return modes;
+}
+
static struct entry *
create_entry(struct vectorize_ctx *ctx,
const struct intrinsic_info *info,
nir_ssa_def *base = entry->info->base_src >= 0 ?
intrin->src[entry->info->base_src].ssa : NULL;
uint64_t offset = 0;
- if (nir_intrinsic_infos[intrin->intrinsic].index_map[NIR_INTRINSIC_BASE])
+ if (nir_intrinsic_has_base(intrin))
offset += nir_intrinsic_base(intrin);
entry->key = create_entry_key_from_offset(entry, base, 1, &offset);
entry->offset = offset;
if (entry->info->resource_src >= 0)
entry->key->resource = intrin->src[entry->info->resource_src].ssa;
- if (nir_intrinsic_infos[intrin->intrinsic].index_map[NIR_INTRINSIC_ACCESS])
+ if (nir_intrinsic_has_access(intrin))
entry->access = nir_intrinsic_access(intrin);
else if (entry->key->var)
entry->access = entry->key->var->data.access;
best_align = gcd(best_align, entry->key->offset_defs_mul[i]);
}
- if (nir_intrinsic_infos[entry->intrin->intrinsic].index_map[NIR_INTRINSIC_ALIGN_MUL])
+ if (nir_intrinsic_has_align_mul(entry->intrin))
best_align = MAX2(best_align, nir_intrinsic_align(entry->intrin));
/* ensure the result is a power of two that fits in a int32_t */
static bool update_align(struct entry *entry)
{
- bool has_align_index =
- nir_intrinsic_infos[entry->intrin->intrinsic].index_map[NIR_INTRINSIC_ALIGN_MUL];
- if (has_align_index) {
+ if (nir_intrinsic_has_align_mul(entry->intrin)) {
unsigned align = get_best_align(entry);
if (align != nir_intrinsic_align(entry->intrin)) {
nir_intrinsic_set_align(entry->intrin, align, 0);
}
/* update base/align */
- bool has_base_index =
- nir_intrinsic_infos[first->intrin->intrinsic].index_map[NIR_INTRINSIC_BASE];
-
- if (first != low && has_base_index)
+ if (first != low && nir_intrinsic_has_base(first->intrin))
nir_intrinsic_set_base(first->intrin, nir_intrinsic_base(low->intrin));
first->key = low->key;
}
/* update base/align */
- bool has_base_index =
- nir_intrinsic_infos[second->intrin->intrinsic].index_map[NIR_INTRINSIC_BASE];
-
- if (second != low && has_base_index)
+ if (second != low && nir_intrinsic_has_base(second->intrin))
nir_intrinsic_set_base(second->intrin, nir_intrinsic_base(low->intrin));
second->key = low->key;
static bool
may_alias(struct entry *a, struct entry *b)
{
- assert(get_variable_mode(a) == get_variable_mode(b));
+ assert(mode_to_index(get_variable_mode(a)) ==
+ mode_to_index(get_variable_mode(b)));
/* if the resources/variables are definitively different and both have
* ACCESS_RESTRICT, we can assume they do not alias. */
nir_var_mem_push_const | nir_var_mem_ubo))
return false;
- unsigned mode_index = ffs(mode) - 1;
+ unsigned mode_index = mode_to_index(mode);
if (first->is_store) {
/* find first entry that aliases "first" */
list_for_each_entry_from(struct entry, next, first, &ctx->entries[mode_index], head) {
return false;
}
+static bool
+check_for_robustness(struct vectorize_ctx *ctx, struct entry *low)
+{
+ nir_variable_mode mode = get_variable_mode(low);
+ if (mode & ctx->robust_modes) {
+ unsigned low_bit_size = get_bit_size(low);
+ unsigned low_size = low->intrin->num_components * low_bit_size;
+
+ /* don't attempt to vectorize accesses if the offset can overflow. */
+ /* TODO: handle indirect accesses. */
+ return low->offset_signed < 0 && low->offset_signed + low_size >= 0;
+ }
+
+ return false;
+}
+
static bool
is_strided_vector(const struct glsl_type *type)
{
if (glsl_type_is_vector(type)) {
- return glsl_get_explicit_stride(type) !=
+ unsigned explicit_stride = glsl_get_explicit_stride(type);
+ return explicit_stride != 0 && explicit_stride !=
type_scalar_size_bytes(glsl_get_array_element(type));
} else {
return false;
struct entry *low, struct entry *high,
struct entry *first, struct entry *second)
{
+ if (!(get_variable_mode(first) & ctx->modes) ||
+ !(get_variable_mode(second) & ctx->modes))
+ return false;
+
if (check_for_aliasing(ctx, first, second))
return false;
+ if (check_for_robustness(ctx, low))
+ return false;
+
/* we can only vectorize non-volatile loads/stores of the same type and with
* the same access */
if (first->info != second->info || first->access != second->access ||
case nir_intrinsic_memory_barrier_shared:
modes = nir_var_mem_shared;
break;
- case nir_intrinsic_scoped_memory_barrier:
- modes = nir_intrinsic_memory_modes(intrin);
+ case nir_intrinsic_scoped_barrier:
+ if (nir_intrinsic_memory_scope(intrin) == NIR_SCOPE_NONE)
+ break;
+
+ modes = nir_intrinsic_memory_modes(intrin) & (nir_var_mem_ssbo |
+ nir_var_mem_shared |
+ nir_var_mem_global);
acquire = nir_intrinsic_memory_semantics(intrin) & NIR_MEMORY_ACQUIRE;
release = nir_intrinsic_memory_semantics(intrin) & NIR_MEMORY_RELEASE;
switch (nir_intrinsic_memory_scope(intrin)) {
while (modes) {
unsigned mode_index = u_bit_scan(&modes);
+ if ((1 << mode_index) == nir_var_mem_global) {
+ /* Global should be rolled in with SSBO */
+ assert(list_is_empty(&ctx->entries[mode_index]));
+ assert(ctx->loads[mode_index] == NULL);
+ assert(ctx->stores[mode_index] == NULL);
+ continue;
+ }
if (acquire)
*progress |= vectorize_entries(ctx, impl, ctx->loads[mode_index]);
nir_variable_mode mode = info->mode;
if (!mode)
mode = nir_src_as_deref(intrin->src[info->deref_src])->mode;
- if (!(mode & ctx->modes))
+ if (!(mode & aliasing_modes(ctx->modes)))
continue;
- unsigned mode_index = ffs(mode) - 1;
+ unsigned mode_index = mode_to_index(mode);
/* create entry */
struct entry *entry = create_entry(ctx, info, intrin);
bool
nir_opt_load_store_vectorize(nir_shader *shader, nir_variable_mode modes,
- nir_should_vectorize_mem_func callback)
+ nir_should_vectorize_mem_func callback,
+ nir_variable_mode robust_modes)
{
bool progress = false;
struct vectorize_ctx *ctx = rzalloc(NULL, struct vectorize_ctx);
ctx->modes = modes;
ctx->callback = callback;
+ ctx->robust_modes = robust_modes;
- nir_index_vars(shader, NULL, modes);
+ nir_shader_index_vars(shader, modes);
nir_foreach_function(function, shader) {
if (function->impl) {
if (modes & nir_var_function_temp)
- nir_index_vars(shader, function->impl, nir_var_function_temp);
+ nir_function_impl_index_vars(function->impl);
nir_foreach_block(block, function->impl)
progress |= process_block(function->impl, ctx, block);