return progress;
}
+/** Lower load/store_deref intrinsics on I/O variables to offset-based intrinsics
+ *
+ * This pass is intended to be used for cross-stage shader I/O and driver-
+ * managed uniforms to turn deref-based access into a simpler model using
+ * locations or offsets. For fragment shader inputs, it can optionally turn
+ * load_deref into an explicit interpolation using barycentrics coming from
+ * one of the load_barycentric_* intrinsics. This pass requires that all
+ * deref chains are complete and contain no casts.
+ */
bool
nir_lower_io(nir_shader *shader, nir_variable_mode modes,
int (*type_size)(const struct glsl_type *, bool),
addr_format == nir_address_format_64bit_bounded_global;
}
+static bool
+addr_format_is_offset(nir_address_format addr_format)
+{
+ return addr_format == nir_address_format_32bit_offset;
+}
+
static nir_ssa_def *
addr_to_global(nir_builder *b, nir_ssa_def *addr,
nir_address_format addr_format)
op = nir_intrinsic_load_kernel_input;
break;
case nir_var_mem_shared:
- assert(addr_format == nir_address_format_32bit_offset);
+ assert(addr_format_is_offset(addr_format));
op = nir_intrinsic_load_shared;
break;
+ case nir_var_shader_temp:
+ case nir_var_function_temp:
+ if (addr_format_is_offset(addr_format)) {
+ op = nir_intrinsic_load_scratch;
+ } else {
+ assert(addr_format_is_global(addr_format));
+ op = nir_intrinsic_load_global;
+ }
+ break;
default:
unreachable("Unsupported explicit IO variable mode");
}
load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
}
- if (mode != nir_var_shader_in && mode != nir_var_mem_shared)
+ if (nir_intrinsic_infos[op].index_map[NIR_INTRINSIC_ACCESS] > 0)
nir_intrinsic_set_access(load, nir_intrinsic_access(intrin));
unsigned bit_size = intrin->dest.ssa.bit_size;
* standard encoding for booleans rather than forcing a 0/1 boolean.
* This should save an instruction or two.
*/
- if (mode == nir_var_mem_shared)
+ if (mode == nir_var_mem_shared ||
+ mode == nir_var_shader_temp ||
+ mode == nir_var_function_temp)
result = nir_b2b1(b, result);
else
result = nir_i2b(b, result);
op = nir_intrinsic_store_global;
break;
case nir_var_mem_shared:
- assert(addr_format == nir_address_format_32bit_offset);
+ assert(addr_format_is_offset(addr_format));
op = nir_intrinsic_store_shared;
break;
+ case nir_var_shader_temp:
+ case nir_var_function_temp:
+ if (addr_format_is_offset(addr_format)) {
+ op = nir_intrinsic_store_scratch;
+ } else {
+ assert(addr_format_is_global(addr_format));
+ op = nir_intrinsic_store_global;
+ }
+ break;
default:
unreachable("Unsupported explicit IO variable mode");
}
*
* TODO: Make the native bool bit_size an option.
*/
- if (mode == nir_var_mem_shared)
+ if (mode == nir_var_mem_shared ||
+ mode == nir_var_shader_temp ||
+ mode == nir_var_function_temp)
value = nir_b2b32(b, value);
else
value = nir_b2i(b, value, 32);
nir_intrinsic_set_write_mask(store, write_mask);
- if (mode != nir_var_mem_shared)
+ if (nir_intrinsic_infos[op].index_map[NIR_INTRINSIC_ACCESS] > 0)
nir_intrinsic_set_access(store, nir_intrinsic_access(intrin));
/* TODO: We should try and provide a better alignment. For OpenCL, we need
/* Global atomics don't have access flags because they assume that the
* address may be non-uniform.
*/
- if (!addr_format_is_global(addr_format) && mode != nir_var_mem_shared)
+ if (nir_intrinsic_infos[op].index_map[NIR_INTRINSIC_ACCESS] > 0)
nir_intrinsic_set_access(atomic, nir_intrinsic_access(intrin));
assert(intrin->dest.ssa.num_components == 1);
assert(deref->dest.is_ssa);
switch (deref->deref_type) {
case nir_deref_type_var:
- assert(deref->mode & (nir_var_shader_in | nir_var_mem_shared));
- return nir_imm_intN_t(b, deref->var->data.driver_location,
- deref->dest.ssa.bit_size);
+ assert(deref->mode & (nir_var_shader_in | nir_var_mem_shared |
+ nir_var_shader_temp | nir_var_function_temp));
+ if (addr_format_is_global(addr_format)) {
+ assert(nir_var_shader_temp | nir_var_function_temp);
+ base_addr =
+ nir_load_scratch_base_ptr(b, !(deref->mode & nir_var_shader_temp),
+ nir_address_format_num_components(addr_format),
+ nir_address_format_bit_size(addr_format));
+ return build_addr_iadd_imm(b, base_addr, addr_format,
+ deref->var->data.driver_location);
+ } else {
+ return nir_imm_intN_t(b, deref->var->data.driver_location,
+ deref->dest.ssa.bit_size);
+ }
case nir_deref_type_array: {
nir_deref_instr *parent = nir_deref_instr_parent(deref);
return progress;
}
+/** Lower explicitly laid out I/O access to byte offset/address intrinsics
+ *
+ * This pass is intended to be used for any I/O which touches memory external
+ * to the shader or which is directly visible to the client. It requires that
+ * all data types in the given modes have a explicit stride/offset decorations
+ * to tell it exactly how to calculate the offset/address for the given load,
+ * store, or atomic operation. If the offset/stride information does not come
+ * from the client explicitly (as with shared variables in GL or Vulkan),
+ * nir_lower_vars_to_explicit_types() can be used to add them.
+ *
+ * Unlike nir_lower_io, this pass is fully capable of handling incomplete
+ * pointer chains which may contain cast derefs. It does so by walking the
+ * deref chain backwards and simply replacing each deref, one at a time, with
+ * the appropriate address calculation. The pass takes a nir_address_format
+ * parameter which describes how the offset or address is to be represented
+ * during calculations. By ensuring that the address is always in a
+ * consistent format, pointers can safely be conjured from thin air by the
+ * driver, stored to variables, passed through phis, etc.
+ *
+ * The one exception to the simple algorithm described above is for handling
+ * row-major matrices in which case we may look down one additional level of
+ * the deref chain.
+ */
bool
nir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes,
nir_address_format addr_format)
glsl_type_size_align_func type_info)
{
bool progress = false;
- unsigned offset = 0;
+ unsigned offset;
+ switch (mode) {
+ case nir_var_function_temp:
+ case nir_var_shader_temp:
+ offset = shader->scratch_size;
+ break;
+ case nir_var_mem_shared:
+ offset = 0;
+ break;
+ default:
+ unreachable("Unsupported mode");
+ }
nir_foreach_variable(var, vars) {
unsigned size, align;
const struct glsl_type *explicit_type =
offset = var->data.driver_location + size;
}
- if (mode == nir_var_mem_shared) {
+ switch (mode) {
+ case nir_var_shader_temp:
+ case nir_var_function_temp:
+ shader->scratch_size = offset;
+ break;
+ case nir_var_mem_shared:
shader->info.cs.shared_size = offset;
shader->num_shared = offset;
+ break;
+ default:
+ unreachable("Unsupported mode");
}
return progress;