If no options are provided, existing intrinsics are used.
If the lowering pass indicates there should be offsets used for global
invocation ID or work group ID, then those instructions are lowered to
include the offset.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5891>
NIR_PASS_V(nir, nir_propagate_invariant);
NIR_PASS_V(nir, nir_lower_system_values);
- NIR_PASS_V(nir, nir_lower_compute_system_values);
+ NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
NIR_PASS_V(c->s, nir_lower_tex, &tex_options);
NIR_PASS_V(c->s, nir_lower_system_values);
- NIR_PASS_V(c->s, nir_lower_compute_system_values);
+ NIR_PASS_V(c->s, nir_lower_compute_system_values, NULL);
NIR_PASS_V(c->s, nir_lower_vars_to_scratch,
nir_var_function_temp,
bool nir_lower_system_values(nir_shader *shader);
-bool nir_lower_compute_system_values(nir_shader *shader);
+typedef struct nir_lower_compute_system_values_options {
+ bool has_base_global_invocation_id:1;
+ bool has_base_work_group_id:1;
+} nir_lower_compute_system_values_options;
+
+bool nir_lower_compute_system_values(nir_shader *shader,
+ const nir_lower_compute_system_values_options *options);
enum PACKED nir_lower_tex_packing {
nir_lower_tex_packing_none = 0,
nir_instr *instr, void *_options)
{
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ const nir_lower_compute_system_values_options *options = _options;
/* All the intrinsics we care about are loads */
if (!nir_intrinsic_infos[intrin->intrinsic].has_dest)
nir_channel(b, local_size, 1)));
return nir_u2u(b, nir_vec3(b, id_x, id_y, id_z), bit_size);
} else {
- return sanitize_32bit_sysval(b, intrin);
+ return NULL;
}
case nir_intrinsic_load_local_invocation_index:
index = nir_iadd(b, index, nir_channel(b, local_id, 0));
return nir_u2u(b, index, bit_size);
} else {
- return sanitize_32bit_sysval(b, intrin);
+ return NULL;
}
case nir_intrinsic_load_local_group_size:
* this point. We do, however, have to make sure that the intrinsic
* is only 32-bit.
*/
- return sanitize_32bit_sysval(b, intrin);
+ return NULL;
} else {
/* using a 32 bit constant is safe here as no device/driver needs more
* than 32 bits for the local size */
return nir_u2u(b, nir_build_imm(b, 3, 32, local_size_const), bit_size);
}
- case nir_intrinsic_load_global_invocation_id: {
- if (!b->shader->options->has_cs_global_id) {
+ case nir_intrinsic_load_global_invocation_id_zero_base: {
+ if ((options && options->has_base_work_group_id) ||
+ !b->shader->options->has_cs_global_id) {
nir_ssa_def *group_size = nir_load_local_group_size(b);
nir_ssa_def *group_id = nir_load_work_group_id(b, bit_size);
nir_ssa_def *local_id = nir_load_local_invocation_id(b);
}
}
+ case nir_intrinsic_load_global_invocation_id: {
+ if (options && options->has_base_global_invocation_id)
+ return nir_iadd(b, nir_load_global_invocation_id_zero_base(b, bit_size),
+ nir_load_base_global_invocation_id(b, bit_size));
+ else if (!b->shader->options->has_cs_global_id)
+ return nir_load_global_invocation_id_zero_base(b, bit_size);
+ else
+ return NULL;
+ }
+
case nir_intrinsic_load_global_invocation_index: {
- nir_ssa_def *global_id = nir_load_global_invocation_id(b, bit_size);
+ /* OpenCL's global_linear_id explicitly removes the global offset before computing this */
+ assert(b->shader->info.stage == MESA_SHADER_KERNEL);
+ nir_ssa_def *global_base_id = nir_load_base_global_invocation_id(b, bit_size);
+ nir_ssa_def *global_id = nir_isub(b, nir_load_global_invocation_id(b, bit_size), global_base_id);
nir_ssa_def *global_size = build_global_group_size(b, bit_size);
/* index = id.x + ((id.y + (id.z * size.y)) * size.x) */
return index;
}
+ case nir_intrinsic_load_work_group_id: {
+ if (options && options->has_base_work_group_id)
+ return nir_iadd(b, nir_u2u(b, nir_load_work_group_id_zero_base(b), bit_size),
+ nir_load_base_work_group_id(b, bit_size));
+ else
+ return NULL;
+ }
+
default:
return NULL;
}
}
bool
-nir_lower_compute_system_values(nir_shader *shader)
+nir_lower_compute_system_values(nir_shader *shader,
+ const nir_lower_compute_system_values_options *options)
{
if (shader->info.stage != MESA_SHADER_COMPUTE &&
shader->info.stage != MESA_SHADER_KERNEL)
return nir_shader_lower_instructions(shader,
lower_compute_system_value_filter,
lower_compute_system_value_instr,
- NULL);
+ (void*)options);
}
nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, stage);
NIR_PASS_V(nir, nir_lower_system_values);
- NIR_PASS_V(nir, nir_lower_compute_system_values);
+ NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
NIR_PASS_V(nir, nir_lower_frexp);
NIR_PASS_V(nir, nir_split_var_copies);
NIR_PASS_V(nir, nir_lower_var_copies);
NIR_PASS_V(nir, nir_lower_system_values);
- NIR_PASS_V(nir, nir_lower_compute_system_values);
+ NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
if (c->cap_packed_uniforms)
NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, 16);
ir3_glsl_type_size);
NIR_PASS_V(nir, nir_lower_system_values);
- NIR_PASS_V(nir, nir_lower_compute_system_values);
+ NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
NIR_PASS_V(nir, nir_lower_frexp);
NIR_PASS_V(nir, nir_lower_io,
/* TODO do this somewhere else */
nir_lower_int64(nir);
nir_lower_system_values(nir);
- nir_lower_compute_system_values(nir);
+ nir_lower_compute_system_values(nir, NULL);
} else if (num_files > 0) {
nir = load_glsl(num_files, filenames, stage);
} else {
spirv_options.global_addr_format);
NIR_PASS_V(nir, nir_lower_system_values);
- NIR_PASS_V(nir, nir_lower_compute_system_values);
+ NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
if (compiler_options->lower_int64_options)
NIR_PASS_V(nir, nir_lower_int64);
if (stage == MESA_SHADER_FRAGMENT)
val_lower_input_attachments(nir, false);
NIR_PASS_V(nir, nir_lower_system_values);
- NIR_PASS_V(nir, nir_lower_compute_system_values);
+ NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
nir_remove_dead_variables(nir, nir_var_uniform, NULL);
}
OPT(nir_lower_system_values);
- OPT(nir_lower_compute_system_values);
+ OPT(nir_lower_compute_system_values, NULL);
const nir_lower_subgroups_options subgroups_options = {
.ballot_bit_size = 32,
st->pipe->screen);
NIR_PASS_V(nir, nir_lower_system_values);
- NIR_PASS_V(nir, nir_lower_compute_system_values);
+ NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
NIR_PASS_V(nir, nir_split_var_copies);
NIR_PASS_V(nir, nir_lower_var_copies);
NIR_PASS_V(nir, nir_lower_system_values);
- NIR_PASS_V(nir, nir_lower_compute_system_values);
+ NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
if (nir->options->lower_to_scalar) {
nir_variable_mode mask =
NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, prog, screen);
NIR_PASS_V(nir, nir_lower_system_values);
- NIR_PASS_V(nir, nir_lower_compute_system_values);
+ NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
/* Optimise NIR */
NIR_PASS_V(nir, nir_opt_constant_folding);