X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fcompiler%2Fnir%2Fnir_lower_system_values.c;h=b99f655c2e0d75863185f0c7bc4e2c6b4403ddae;hb=27e6117ee9f77ef40721f7757e181ddf93fc60dd;hp=a5f44e657a5f8508c49e52a2f0dc4b45ccb61fda;hpb=a331d7d1cdfdc971f707fb6b1f71edbad622c804;p=mesa.git diff --git a/src/compiler/nir/nir_lower_system_values.c b/src/compiler/nir/nir_lower_system_values.c index a5f44e657a5..b99f655c2e0 100644 --- a/src/compiler/nir/nir_lower_system_values.c +++ b/src/compiler/nir/nir_lower_system_values.c @@ -28,120 +28,106 @@ #include "nir.h" #include "nir_builder.h" -static bool -convert_block(nir_block *block, nir_builder *b) +static nir_ssa_def * +sanitize_32bit_sysval(nir_builder *b, nir_intrinsic_instr *intrin) { - bool progress = false; - - nir_foreach_instr_safe(instr, block) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *load_deref = nir_instr_as_intrinsic(instr); - if (load_deref->intrinsic != nir_intrinsic_load_deref) - continue; - - nir_variable *var = - nir_deref_instr_get_variable(nir_src_as_deref(load_deref->src[0])); + assert(intrin->dest.is_ssa); + const unsigned bit_size = intrin->dest.ssa.bit_size; + if (bit_size == 32) + return NULL; - if (var->data.mode != nir_var_system_value) - continue; - - b->cursor = nir_after_instr(&load_deref->instr); - - nir_ssa_def *sysval = NULL; - switch (var->data.location) { - case SYSTEM_VALUE_GLOBAL_INVOCATION_ID: { - /* From the GLSL man page for gl_GlobalInvocationID: - * - * "The value of gl_GlobalInvocationID is equal to - * gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID" - */ + intrin->dest.ssa.bit_size = 32; + return nir_u2u(b, &intrin->dest.ssa, bit_size); +} - nir_const_value local_size; - memset(&local_size, 0, sizeof(local_size)); - local_size.u32[0] = b->shader->info.cs.local_size[0]; - local_size.u32[1] = b->shader->info.cs.local_size[1]; - local_size.u32[2] = b->shader->info.cs.local_size[2]; +static nir_ssa_def* +build_global_group_size(nir_builder *b, unsigned bit_size) +{ + nir_ssa_def *group_size = nir_load_local_group_size(b); + nir_ssa_def *num_work_groups = nir_load_num_work_groups(b, bit_size); + return nir_imul(b, nir_u2u(b, group_size, bit_size), + num_work_groups); +} - nir_ssa_def *group_id = nir_load_work_group_id(b); - nir_ssa_def *local_id = nir_load_local_invocation_id(b); +static bool +lower_system_value_filter(const nir_instr *instr, const void *_state) +{ + return instr->type == nir_instr_type_intrinsic; +} - sysval = nir_iadd(b, nir_imul(b, group_id, - nir_build_imm(b, 3, 32, local_size)), - local_id); - break; +static nir_ssa_def * +lower_system_value_instr(nir_builder *b, nir_instr *instr, void *_state) +{ + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + /* All the intrinsics we care about are loads */ + if (!nir_intrinsic_infos[intrin->intrinsic].has_dest) + return NULL; + + assert(intrin->dest.is_ssa); + const unsigned bit_size = intrin->dest.ssa.bit_size; + + switch (intrin->intrinsic) { + case nir_intrinsic_load_vertex_id: + if (b->shader->options->vertex_id_zero_based) { + return nir_iadd(b, nir_load_vertex_id_zero_base(b), + nir_load_first_vertex(b)); + } else { + return NULL; } - case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX: { - /* If lower_cs_local_index_from_id is true, then we derive the local - * index from the local id. - */ - if (!b->shader->options->lower_cs_local_index_from_id) - break; - - /* From the GLSL man page for gl_LocalInvocationIndex: - * - * "The value of gl_LocalInvocationIndex is equal to - * gl_LocalInvocationID.z * gl_WorkGroupSize.x * - * gl_WorkGroupSize.y + gl_LocalInvocationID.y * - * gl_WorkGroupSize.x + gl_LocalInvocationID.x" - */ - nir_ssa_def *local_id = nir_load_local_invocation_id(b); - - nir_ssa_def *size_x = - nir_imm_int(b, b->shader->info.cs.local_size[0]); - nir_ssa_def *size_y = - nir_imm_int(b, b->shader->info.cs.local_size[1]); - - sysval = nir_imul(b, nir_channel(b, local_id, 2), - nir_imul(b, size_x, size_y)); - sysval = nir_iadd(b, sysval, - nir_imul(b, nir_channel(b, local_id, 1), size_x)); - sysval = nir_iadd(b, sysval, nir_channel(b, local_id, 0)); - break; + case nir_intrinsic_load_base_vertex: + /** + * From the OpenGL 4.6 (11.1.3.9 Shader Inputs) specification: + * + * "gl_BaseVertex holds the integer value passed to the baseVertex + * parameter to the command that resulted in the current shader + * invocation. In the case where the command has no baseVertex + * parameter, the value of gl_BaseVertex is zero." + */ + if (b->shader->options->lower_base_vertex) { + return nir_iand(b, nir_load_is_indexed_draw(b), + nir_load_first_vertex(b)); + } else { + return NULL; } - case SYSTEM_VALUE_LOCAL_GROUP_SIZE: { - nir_const_value local_size; - memset(&local_size, 0, sizeof(local_size)); - local_size.u32[0] = b->shader->info.cs.local_size[0]; - local_size.u32[1] = b->shader->info.cs.local_size[1]; - local_size.u32[2] = b->shader->info.cs.local_size[2]; - sysval = nir_build_imm(b, 3, 32, local_size); - break; + case nir_intrinsic_load_helper_invocation: + if (b->shader->options->lower_helper_invocation) { + nir_ssa_def *tmp; + tmp = nir_ishl(b, nir_imm_int(b, 1), + nir_load_sample_id_no_per_sample(b)); + tmp = nir_iand(b, nir_load_sample_mask_in(b), tmp); + return nir_inot(b, nir_i2b(b, tmp)); + } else { + return NULL; } - case SYSTEM_VALUE_VERTEX_ID: - if (b->shader->options->vertex_id_zero_based) { - sysval = nir_iadd(b, - nir_load_vertex_id_zero_base(b), - nir_load_first_vertex(b)); - } else { - sysval = nir_load_vertex_id(b); - } - break; + case nir_intrinsic_load_local_invocation_id: + case nir_intrinsic_load_local_invocation_index: + case nir_intrinsic_load_local_group_size: + return sanitize_32bit_sysval(b, intrin); - case SYSTEM_VALUE_BASE_VERTEX: - /** - * From the OpenGL 4.6 (11.1.3.9 Shader Inputs) specification: - * - * "gl_BaseVertex holds the integer value passed to the baseVertex - * parameter to the command that resulted in the current shader - * invocation. In the case where the command has no baseVertex - * parameter, the value of gl_BaseVertex is zero." + case nir_intrinsic_load_deref: { + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + if (deref->mode != nir_var_system_value) + return NULL; + + if (deref->deref_type != nir_deref_type_var) { + /* The only one system value that is an array and that is + * gl_SampleMask which is always an array of one element. */ - if (b->shader->options->lower_base_vertex) - sysval = nir_iand(b, - nir_load_is_indexed_draw(b), - nir_load_first_vertex(b)); - break; + assert(deref->deref_type == nir_deref_type_array); + deref = nir_deref_instr_parent(deref); + assert(deref->deref_type == nir_deref_type_var); + assert(deref->var->data.location == SYSTEM_VALUE_SAMPLE_MASK_IN); + } + nir_variable *var = deref->var; + switch (var->data.location) { case SYSTEM_VALUE_INSTANCE_INDEX: - sysval = nir_iadd(b, - nir_load_instance_id(b), - nir_load_base_instance(b)); - break; + return nir_iadd(b, nir_load_instance_id(b), + nir_load_base_instance(b)); case SYSTEM_VALUE_SUBGROUP_EQ_MASK: case SYSTEM_VALUE_SUBGROUP_GE_MASK: @@ -155,66 +141,262 @@ convert_block(nir_block *block, nir_builder *b) var->type, NULL); load->num_components = load->dest.ssa.num_components; nir_builder_instr_insert(b, &load->instr); - sysval = &load->dest.ssa; - break; + return &load->dest.ssa; } case SYSTEM_VALUE_DEVICE_INDEX: if (b->shader->options->lower_device_index_to_zero) - sysval = nir_imm_int(b, 0); + return nir_imm_int(b, 0); break; - default: - break; - } + case SYSTEM_VALUE_GLOBAL_GROUP_SIZE: + return build_global_group_size(b, bit_size); - if (sysval == NULL) { - nir_intrinsic_op sysval_op = - nir_intrinsic_from_system_value(var->data.location); - sysval = nir_load_system_value(b, sysval_op, 0); - } + case SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL: + return nir_load_barycentric(b, nir_intrinsic_load_barycentric_pixel, + INTERP_MODE_NOPERSPECTIVE); - nir_ssa_def_rewrite_uses(&load_deref->dest.ssa, nir_src_for_ssa(sysval)); - nir_instr_remove(&load_deref->instr); + case SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID: + return nir_load_barycentric(b, nir_intrinsic_load_barycentric_centroid, + INTERP_MODE_NOPERSPECTIVE); - progress = true; - } + case SYSTEM_VALUE_BARYCENTRIC_LINEAR_SAMPLE: + return nir_load_barycentric(b, nir_intrinsic_load_barycentric_sample, + INTERP_MODE_NOPERSPECTIVE); - return progress; -} + case SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL: + return nir_load_barycentric(b, nir_intrinsic_load_barycentric_pixel, + INTERP_MODE_SMOOTH); -static bool -convert_impl(nir_function_impl *impl) -{ - bool progress = false; - nir_builder builder; - nir_builder_init(&builder, impl); + case SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID: + return nir_load_barycentric(b, nir_intrinsic_load_barycentric_centroid, + INTERP_MODE_SMOOTH); + + case SYSTEM_VALUE_BARYCENTRIC_PERSP_SAMPLE: + return nir_load_barycentric(b, nir_intrinsic_load_barycentric_sample, + INTERP_MODE_SMOOTH); + + case SYSTEM_VALUE_BARYCENTRIC_PULL_MODEL: + return nir_load_barycentric(b, nir_intrinsic_load_barycentric_model, + INTERP_MODE_NONE); + + default: + break; + } - nir_foreach_block(block, impl) { - progress |= convert_block(block, &builder); + nir_intrinsic_op sysval_op = + nir_intrinsic_from_system_value(var->data.location); + return nir_load_system_value(b, sysval_op, 0, + intrin->dest.ssa.num_components, + intrin->dest.ssa.bit_size); } - nir_metadata_preserve(impl, nir_metadata_block_index | - nir_metadata_dominance); - return progress; + default: + return NULL; + } } bool nir_lower_system_values(nir_shader *shader) { - bool progress = false; - - nir_foreach_function(function, shader) { - if (function->impl) - progress = convert_impl(function->impl) || progress; - } + bool progress = nir_shader_lower_instructions(shader, + lower_system_value_filter, + lower_system_value_instr, + NULL); /* We're going to delete the variables so we need to clean up all those * derefs we left lying around. */ - nir_remove_dead_derefs(shader); + if (progress) + nir_remove_dead_derefs(shader); - exec_list_make_empty(&shader->system_values); + nir_foreach_variable_with_modes_safe(var, shader, nir_var_system_value) + exec_node_remove(&var->node); return progress; } + +static bool +lower_compute_system_value_filter(const nir_instr *instr, const void *_options) +{ + return instr->type == nir_instr_type_intrinsic; +} + +static nir_ssa_def * +lower_compute_system_value_instr(nir_builder *b, + nir_instr *instr, void *_options) +{ + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + const nir_lower_compute_system_values_options *options = _options; + + /* All the intrinsics we care about are loads */ + if (!nir_intrinsic_infos[intrin->intrinsic].has_dest) + return NULL; + + assert(intrin->dest.is_ssa); + const unsigned bit_size = intrin->dest.ssa.bit_size; + + switch (intrin->intrinsic) { + case nir_intrinsic_load_local_invocation_id: + /* If lower_cs_local_id_from_index is true, then we derive the local + * index from the local id. + */ + if (b->shader->options->lower_cs_local_id_from_index) { + /* We lower gl_LocalInvocationID from gl_LocalInvocationIndex based + * on this formula: + * + * gl_LocalInvocationID.x = + * gl_LocalInvocationIndex % gl_WorkGroupSize.x; + * gl_LocalInvocationID.y = + * (gl_LocalInvocationIndex / gl_WorkGroupSize.x) % + * gl_WorkGroupSize.y; + * gl_LocalInvocationID.z = + * (gl_LocalInvocationIndex / + * (gl_WorkGroupSize.x * gl_WorkGroupSize.y)) % + * gl_WorkGroupSize.z; + * + * However, the final % gl_WorkGroupSize.z does nothing unless we + * accidentally end up with a gl_LocalInvocationIndex that is too + * large so it can safely be omitted. + */ + nir_ssa_def *local_index = nir_load_local_invocation_index(b); + nir_ssa_def *local_size = nir_load_local_group_size(b); + + /* Because no hardware supports a local workgroup size greater than + * about 1K, this calculation can be done in 32-bit and can save some + * 64-bit arithmetic. + */ + nir_ssa_def *id_x, *id_y, *id_z; + id_x = nir_umod(b, local_index, + nir_channel(b, local_size, 0)); + id_y = nir_umod(b, nir_udiv(b, local_index, + nir_channel(b, local_size, 0)), + nir_channel(b, local_size, 1)); + id_z = nir_udiv(b, local_index, + nir_imul(b, nir_channel(b, local_size, 0), + nir_channel(b, local_size, 1))); + return nir_u2u(b, nir_vec3(b, id_x, id_y, id_z), bit_size); + } else { + return NULL; + } + + case nir_intrinsic_load_local_invocation_index: + /* If lower_cs_local_index_from_id is true, then we derive the local + * index from the local id. + */ + if (b->shader->options->lower_cs_local_index_from_id) { + /* From the GLSL man page for gl_LocalInvocationIndex: + * + * "The value of gl_LocalInvocationIndex is equal to + * gl_LocalInvocationID.z * gl_WorkGroupSize.x * + * gl_WorkGroupSize.y + gl_LocalInvocationID.y * + * gl_WorkGroupSize.x + gl_LocalInvocationID.x" + */ + nir_ssa_def *local_id = nir_load_local_invocation_id(b); + + nir_ssa_def *size_x = + nir_imm_int(b, b->shader->info.cs.local_size[0]); + nir_ssa_def *size_y = + nir_imm_int(b, b->shader->info.cs.local_size[1]); + + /* Because no hardware supports a local workgroup size greater than + * about 1K, this calculation can be done in 32-bit and can save some + * 64-bit arithmetic. + */ + nir_ssa_def *index; + index = nir_imul(b, nir_channel(b, local_id, 2), + nir_imul(b, size_x, size_y)); + index = nir_iadd(b, index, + nir_imul(b, nir_channel(b, local_id, 1), size_x)); + index = nir_iadd(b, index, nir_channel(b, local_id, 0)); + return nir_u2u(b, index, bit_size); + } else { + return NULL; + } + + case nir_intrinsic_load_local_group_size: + if (b->shader->info.cs.local_size_variable) { + /* If the local work group size is variable it can't be lowered at + * this point. We do, however, have to make sure that the intrinsic + * is only 32-bit. + */ + return NULL; + } else { + /* using a 32 bit constant is safe here as no device/driver needs more + * than 32 bits for the local size */ + nir_const_value local_size_const[3]; + memset(local_size_const, 0, sizeof(local_size_const)); + local_size_const[0].u32 = b->shader->info.cs.local_size[0]; + local_size_const[1].u32 = b->shader->info.cs.local_size[1]; + local_size_const[2].u32 = b->shader->info.cs.local_size[2]; + return nir_u2u(b, nir_build_imm(b, 3, 32, local_size_const), bit_size); + } + + case nir_intrinsic_load_global_invocation_id_zero_base: { + if ((options && options->has_base_work_group_id) || + !b->shader->options->has_cs_global_id) { + nir_ssa_def *group_size = nir_load_local_group_size(b); + nir_ssa_def *group_id = nir_load_work_group_id(b, bit_size); + nir_ssa_def *local_id = nir_load_local_invocation_id(b); + + return nir_iadd(b, nir_imul(b, group_id, + nir_u2u(b, group_size, bit_size)), + nir_u2u(b, local_id, bit_size)); + } else { + return NULL; + } + } + + case nir_intrinsic_load_global_invocation_id: { + if (options && options->has_base_global_invocation_id) + return nir_iadd(b, nir_load_global_invocation_id_zero_base(b, bit_size), + nir_load_base_global_invocation_id(b, bit_size)); + else if (!b->shader->options->has_cs_global_id) + return nir_load_global_invocation_id_zero_base(b, bit_size); + else + return NULL; + } + + case nir_intrinsic_load_global_invocation_index: { + /* OpenCL's global_linear_id explicitly removes the global offset before computing this */ + assert(b->shader->info.stage == MESA_SHADER_KERNEL); + nir_ssa_def *global_base_id = nir_load_base_global_invocation_id(b, bit_size); + nir_ssa_def *global_id = nir_isub(b, nir_load_global_invocation_id(b, bit_size), global_base_id); + nir_ssa_def *global_size = build_global_group_size(b, bit_size); + + /* index = id.x + ((id.y + (id.z * size.y)) * size.x) */ + nir_ssa_def *index; + index = nir_imul(b, nir_channel(b, global_id, 2), + nir_channel(b, global_size, 1)); + index = nir_iadd(b, nir_channel(b, global_id, 1), index); + index = nir_imul(b, nir_channel(b, global_size, 0), index); + index = nir_iadd(b, nir_channel(b, global_id, 0), index); + return index; + } + + case nir_intrinsic_load_work_group_id: { + if (options && options->has_base_work_group_id) + return nir_iadd(b, nir_u2u(b, nir_load_work_group_id_zero_base(b), bit_size), + nir_load_base_work_group_id(b, bit_size)); + else + return NULL; + } + + default: + return NULL; + } +} + +bool +nir_lower_compute_system_values(nir_shader *shader, + const nir_lower_compute_system_values_options *options) +{ + if (shader->info.stage != MESA_SHADER_COMPUTE && + shader->info.stage != MESA_SHADER_KERNEL) + return false; + + return nir_shader_lower_instructions(shader, + lower_compute_system_value_filter, + lower_compute_system_value_instr, + (void*)options); +}