From: Jesse Natalie Date: Mon, 27 Jul 2020 23:56:21 +0000 (-0700) Subject: nir: Add new system values and intrinsics for dealing with CL work offsets X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=commitdiff_plain;h=41e4eb9948d0ca62e9586b491c720a1a05904802 nir: Add new system values and intrinsics for dealing with CL work offsets New intrinsics are added for global invocation IDs and work group IDs to deal with offsets in both. The only one of these that needs a system value is global invocation offset, for CL's get_global_offset(). Note that CL requires very large work group sizes, so these intrinsics are modified to be able to use 64bit values, for 64bit SPIR-V. Reviewed-by: Karol Herbst Reviewed-by: Jason Ekstrand Part-of: --- diff --git a/src/amd/vulkan/radv_meta_buffer.c b/src/amd/vulkan/radv_meta_buffer.c index 28343ebd83a..e27cae1e61b 100644 --- a/src/amd/vulkan/radv_meta_buffer.c +++ b/src/amd/vulkan/radv_meta_buffer.c @@ -16,7 +16,7 @@ build_buffer_fill_shader(struct radv_device *dev) b.shader->info.cs.local_size[2] = 1; nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -71,7 +71,7 @@ build_buffer_copy_shader(struct radv_device *dev) b.shader->info.cs.local_size[2] = 1; nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], diff --git a/src/amd/vulkan/radv_meta_bufimage.c b/src/amd/vulkan/radv_meta_bufimage.c index 2354ebd90f4..913d14de4a1 100644 --- a/src/amd/vulkan/radv_meta_bufimage.c +++ b/src/amd/vulkan/radv_meta_bufimage.c @@ -60,7 +60,7 @@ build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d) output_img->data.binding = 1; nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -289,7 +289,7 @@ build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d) output_img->data.binding = 1; nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -511,7 +511,7 @@ build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev) output_img->data.binding = 1; nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -719,7 +719,7 @@ build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d) output_img->data.binding = 1; nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -936,7 +936,7 @@ build_nir_itoi_r32g32b32_compute_shader(struct radv_device *dev) output_img->data.binding = 1; nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -1143,7 +1143,7 @@ build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d) output_img->data.binding = 0; nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -1335,7 +1335,7 @@ build_nir_cleari_r32g32b32_compute_shader(struct radv_device *dev) output_img->data.binding = 0; nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], diff --git a/src/amd/vulkan/radv_meta_clear.c b/src/amd/vulkan/radv_meta_clear.c index 4d4dd109165..bfeda8e3fa9 100644 --- a/src/amd/vulkan/radv_meta_clear.c +++ b/src/amd/vulkan/radv_meta_clear.c @@ -1175,7 +1175,7 @@ build_clear_htile_mask_shader() b.shader->info.cs.local_size[2] = 1; nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], diff --git a/src/amd/vulkan/radv_meta_fast_clear.c b/src/amd/vulkan/radv_meta_fast_clear.c index bf9ed047c35..f4596888a12 100644 --- a/src/amd/vulkan/radv_meta_fast_clear.c +++ b/src/amd/vulkan/radv_meta_fast_clear.c @@ -58,7 +58,7 @@ build_dcc_decompress_compute_shader(struct radv_device *dev) output_img->data.binding = 1; nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], diff --git a/src/amd/vulkan/radv_meta_fmask_expand.c b/src/amd/vulkan/radv_meta_fmask_expand.c index 8f229a8188b..3a109110c95 100644 --- a/src/amd/vulkan/radv_meta_fmask_expand.c +++ b/src/amd/vulkan/radv_meta_fmask_expand.c @@ -58,7 +58,7 @@ build_fmask_expand_compute_shader(struct radv_device *device, int samples) output_img->data.access = ACCESS_NON_READABLE; nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], diff --git a/src/amd/vulkan/radv_meta_resolve_cs.c b/src/amd/vulkan/radv_meta_resolve_cs.c index b57bce8ef7a..e1e30777062 100644 --- a/src/amd/vulkan/radv_meta_resolve_cs.c +++ b/src/amd/vulkan/radv_meta_resolve_cs.c @@ -93,7 +93,7 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_s output_img->data.descriptor_set = 0; output_img->data.binding = 1; nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -195,7 +195,7 @@ build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples, output_img->data.descriptor_set = 0; output_img->data.binding = 1; nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index feeb5d84512..66a9fe94b90 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -203,7 +203,7 @@ build_occlusion_query_shader(struct radv_device *device) { nir_builder_instr_insert(&b, &src_buf->instr); nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -395,7 +395,7 @@ build_pipeline_statistics_query_shader(struct radv_device *device) { nir_builder_instr_insert(&b, &src_buf->instr); nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -635,7 +635,7 @@ build_tfb_query_shader(struct radv_device *device) /* Compute global ID. */ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -837,7 +837,7 @@ build_timestamp_query_shader(struct radv_device *device) /* Compute global ID. */ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); - nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c index 40c39490585..693bd332a66 100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@ -2121,6 +2121,8 @@ nir_intrinsic_from_system_value(gl_system_value val) return nir_intrinsic_load_local_group_size; case SYSTEM_VALUE_GLOBAL_INVOCATION_ID: return nir_intrinsic_load_global_invocation_id; + case SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID: + return nir_intrinsic_load_base_global_invocation_id; case SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX: return nir_intrinsic_load_global_invocation_index; case SYSTEM_VALUE_WORK_DIM: @@ -2220,6 +2222,8 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin) return SYSTEM_VALUE_LOCAL_GROUP_SIZE; case nir_intrinsic_load_global_invocation_id: return SYSTEM_VALUE_GLOBAL_INVOCATION_ID; + case nir_intrinsic_load_base_global_invocation_id: + return SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID; case nir_intrinsic_load_global_invocation_index: return SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX; case nir_intrinsic_load_work_dim: diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index da4f95c5ed3..80c61650f78 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -586,9 +586,13 @@ system_value("tess_level_inner_default", 2) system_value("patch_vertices_in", 1) system_value("local_invocation_id", 3) system_value("local_invocation_index", 1) -system_value("work_group_id", 3) +# zero_base indicates it starts from 0 for the current dispatch +# non-zero_base indicates the base is included +system_value("work_group_id", 3, bit_sizes=[32, 64]) +system_value("work_group_id_zero_base", 3) +system_value("base_work_group_id", 3, bit_sizes=[32, 64]) system_value("user_clip_plane", 4, indices=[UCP_ID]) -system_value("num_work_groups", 3) +system_value("num_work_groups", 3, bit_sizes=[32, 64]) system_value("helper_invocation", 1, bit_sizes=[1, 32]) system_value("alpha_ref_float", 1) system_value("layer_id", 1) @@ -603,7 +607,13 @@ system_value("subgroup_lt_mask", 0, bit_sizes=[32, 64]) system_value("num_subgroups", 1) system_value("subgroup_id", 1) system_value("local_group_size", 3) +# note: the definition of global_invocation_id_zero_base is based on +# (work_group_id * local_group_size) + local_invocation_id. +# it is *not* based on work_group_id_zero_base, meaning the work group +# base is already accounted for, and the global base is additive on top of that system_value("global_invocation_id", 3, bit_sizes=[32, 64]) +system_value("global_invocation_id_zero_base", 3, bit_sizes=[32, 64]) +system_value("base_global_invocation_id", 3, bit_sizes=[32, 64]) system_value("global_invocation_index", 1, bit_sizes=[32, 64]) system_value("work_dim", 1) system_value("line_width", 1) diff --git a/src/compiler/nir/nir_lower_system_values.c b/src/compiler/nir/nir_lower_system_values.c index 05cf5107d6e..92723a5bffb 100644 --- a/src/compiler/nir/nir_lower_system_values.c +++ b/src/compiler/nir/nir_lower_system_values.c @@ -44,9 +44,9 @@ static nir_ssa_def* build_global_group_size(nir_builder *b, unsigned bit_size) { nir_ssa_def *group_size = nir_load_local_group_size(b); - nir_ssa_def *num_work_groups = nir_load_num_work_groups(b); + nir_ssa_def *num_work_groups = nir_load_num_work_groups(b, bit_size); return nir_imul(b, nir_u2u(b, group_size, bit_size), - nir_u2u(b, num_work_groups, bit_size)); + num_work_groups); } static bool @@ -189,10 +189,10 @@ lower_system_value_instr(nir_builder *b, nir_instr *instr, void *_state) case nir_intrinsic_load_global_invocation_id: { nir_ssa_def *group_size = nir_load_local_group_size(b); - nir_ssa_def *group_id = nir_load_work_group_id(b); + nir_ssa_def *group_id = nir_load_work_group_id(b, bit_size); nir_ssa_def *local_id = nir_load_local_invocation_id(b); - return nir_iadd(b, nir_imul(b, nir_u2u(b, group_id, bit_size), + return nir_iadd(b, nir_imul(b, group_id, nir_u2u(b, group_size, bit_size)), nir_u2u(b, local_id, bit_size)); } @@ -222,10 +222,6 @@ lower_system_value_instr(nir_builder *b, nir_instr *instr, void *_state) return NULL; } - case nir_intrinsic_load_num_work_groups: - case nir_intrinsic_load_work_group_id: - return sanitize_32bit_sysval(b, intrin); - case nir_intrinsic_load_deref: { nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); if (deref->mode != nir_var_system_value) diff --git a/src/compiler/shader_enums.c b/src/compiler/shader_enums.c index a89b5138372..a91c44369e1 100644 --- a/src/compiler/shader_enums.c +++ b/src/compiler/shader_enums.c @@ -246,6 +246,7 @@ gl_system_value_name(gl_system_value sysval) ENUM(SYSTEM_VALUE_LOCAL_INVOCATION_ID), ENUM(SYSTEM_VALUE_LOCAL_INVOCATION_INDEX), ENUM(SYSTEM_VALUE_GLOBAL_INVOCATION_ID), + ENUM(SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID), ENUM(SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX), ENUM(SYSTEM_VALUE_WORK_GROUP_ID), ENUM(SYSTEM_VALUE_NUM_WORK_GROUPS), diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h index 4ef3512849f..e2b9cd6f02e 100644 --- a/src/compiler/shader_enums.h +++ b/src/compiler/shader_enums.h @@ -615,6 +615,7 @@ typedef enum SYSTEM_VALUE_LOCAL_INVOCATION_ID, SYSTEM_VALUE_LOCAL_INVOCATION_INDEX, SYSTEM_VALUE_GLOBAL_INVOCATION_ID, + SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID, SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX, SYSTEM_VALUE_WORK_GROUP_ID, SYSTEM_VALUE_NUM_WORK_GROUPS, diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 4d5d22f6b74..3035f5460a5 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -644,7 +644,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index, break; case TGSI_SEMANTIC_BLOCK_ID: op = nir_intrinsic_load_work_group_id; - load = nir_load_work_group_id(b); + load = nir_load_work_group_id(b, 32); break; case TGSI_SEMANTIC_BLOCK_SIZE: op = nir_intrinsic_load_local_group_size;