nir: Add new system values and intrinsics for dealing with CL work offsets
authorJesse Natalie <jenatali@microsoft.com>
Mon, 27 Jul 2020 23:56:21 +0000 (16:56 -0700)
committerMarge Bot <eric+marge@anholt.net>
Fri, 21 Aug 2020 22:07:05 +0000 (22:07 +0000)
New intrinsics are added for global invocation IDs and work group IDs to
deal with offsets in both. The only one of these that needs a system value
is global invocation offset, for CL's get_global_offset().

Note that CL requires very large work group sizes, so these intrinsics
are modified to be able to use 64bit values, for 64bit SPIR-V.

Reviewed-by: Karol Herbst <kherbst@redhat.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5891>

13 files changed:
src/amd/vulkan/radv_meta_buffer.c
src/amd/vulkan/radv_meta_bufimage.c
src/amd/vulkan/radv_meta_clear.c
src/amd/vulkan/radv_meta_fast_clear.c
src/amd/vulkan/radv_meta_fmask_expand.c
src/amd/vulkan/radv_meta_resolve_cs.c
src/amd/vulkan/radv_query.c
src/compiler/nir/nir.c
src/compiler/nir/nir_intrinsics.py
src/compiler/nir/nir_lower_system_values.c
src/compiler/shader_enums.c
src/compiler/shader_enums.h
src/gallium/auxiliary/nir/tgsi_to_nir.c

index 28343ebd83ab9b7cba822694548c93bfd7154411..e27cae1e61b8edacc1a99024a574f7e8db0b0dd1 100644 (file)
@@ -16,7 +16,7 @@ build_buffer_fill_shader(struct radv_device *dev)
        b.shader->info.cs.local_size[2] = 1;
 
        nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
+       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
        nir_ssa_def *block_size = nir_imm_ivec4(&b,
                                                b.shader->info.cs.local_size[0],
                                                b.shader->info.cs.local_size[1],
@@ -71,7 +71,7 @@ build_buffer_copy_shader(struct radv_device *dev)
        b.shader->info.cs.local_size[2] = 1;
 
        nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
+       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
        nir_ssa_def *block_size = nir_imm_ivec4(&b,
                                                b.shader->info.cs.local_size[0],
                                                b.shader->info.cs.local_size[1],
index 2354ebd90f4bf9e64dbf50b68809c8ee398d809c..913d14de4a13e6ac9a880b7e9cd9d247bdd9a05f 100644 (file)
@@ -60,7 +60,7 @@ build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d)
        output_img->data.binding = 1;
 
        nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
+       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
        nir_ssa_def *block_size = nir_imm_ivec4(&b,
                                                b.shader->info.cs.local_size[0],
                                                b.shader->info.cs.local_size[1],
@@ -289,7 +289,7 @@ build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d)
        output_img->data.binding = 1;
 
        nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
+       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
        nir_ssa_def *block_size = nir_imm_ivec4(&b,
                                                b.shader->info.cs.local_size[0],
                                                b.shader->info.cs.local_size[1],
@@ -511,7 +511,7 @@ build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev)
        output_img->data.binding = 1;
 
        nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
+       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
        nir_ssa_def *block_size = nir_imm_ivec4(&b,
                                                b.shader->info.cs.local_size[0],
                                                b.shader->info.cs.local_size[1],
@@ -719,7 +719,7 @@ build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d)
        output_img->data.binding = 1;
 
        nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
+       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
        nir_ssa_def *block_size = nir_imm_ivec4(&b,
                                                b.shader->info.cs.local_size[0],
                                                b.shader->info.cs.local_size[1],
@@ -936,7 +936,7 @@ build_nir_itoi_r32g32b32_compute_shader(struct radv_device *dev)
        output_img->data.binding = 1;
 
        nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
+       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
        nir_ssa_def *block_size = nir_imm_ivec4(&b,
                                                b.shader->info.cs.local_size[0],
                                                b.shader->info.cs.local_size[1],
@@ -1143,7 +1143,7 @@ build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d)
        output_img->data.binding = 0;
 
        nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
+       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
        nir_ssa_def *block_size = nir_imm_ivec4(&b,
                                                b.shader->info.cs.local_size[0],
                                                b.shader->info.cs.local_size[1],
@@ -1335,7 +1335,7 @@ build_nir_cleari_r32g32b32_compute_shader(struct radv_device *dev)
        output_img->data.binding = 0;
 
        nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
+       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
        nir_ssa_def *block_size = nir_imm_ivec4(&b,
                                                b.shader->info.cs.local_size[0],
                                                b.shader->info.cs.local_size[1],
index 4d4dd1091655852b1331cde7a68b27797cb57730..bfeda8e3fa9369391b638f2da761f786b6959e5b 100644 (file)
@@ -1175,7 +1175,7 @@ build_clear_htile_mask_shader()
        b.shader->info.cs.local_size[2] = 1;
 
        nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
+       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
        nir_ssa_def *block_size = nir_imm_ivec4(&b,
                                                b.shader->info.cs.local_size[0],
                                                b.shader->info.cs.local_size[1],
index bf9ed047c35623d236020c214d702220e7b88356..f4596888a12ee5c0a667340a02f1c22cae9274b3 100644 (file)
@@ -58,7 +58,7 @@ build_dcc_decompress_compute_shader(struct radv_device *dev)
        output_img->data.binding = 1;
 
        nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
+       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
        nir_ssa_def *block_size = nir_imm_ivec4(&b,
                                                b.shader->info.cs.local_size[0],
                                                b.shader->info.cs.local_size[1],
index 8f229a8188b7054b6da6389234c65defaeaaa122..3a109110c951725d738aed6af3075dc6ddc536d9 100644 (file)
@@ -58,7 +58,7 @@ build_fmask_expand_compute_shader(struct radv_device *device, int samples)
        output_img->data.access = ACCESS_NON_READABLE;
 
        nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
+       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
        nir_ssa_def *block_size = nir_imm_ivec4(&b,
                                                b.shader->info.cs.local_size[0],
                                                b.shader->info.cs.local_size[1],
index b57bce8ef7ab08388bbb37a444fceb713f4d3f50..e1e307770626b37f05ca5f743ed18640ab9fc626 100644 (file)
@@ -93,7 +93,7 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_s
        output_img->data.descriptor_set = 0;
        output_img->data.binding = 1;
        nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
+       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
        nir_ssa_def *block_size = nir_imm_ivec4(&b,
                                                b.shader->info.cs.local_size[0],
                                                b.shader->info.cs.local_size[1],
@@ -195,7 +195,7 @@ build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples,
        output_img->data.descriptor_set = 0;
        output_img->data.binding = 1;
        nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
+       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
        nir_ssa_def *block_size = nir_imm_ivec4(&b,
                                                b.shader->info.cs.local_size[0],
                                                b.shader->info.cs.local_size[1],
index feeb5d845123e531029d9f075695409d1e56cd9c..66a9fe94b900a9993cbbc4d07f4574bfc24016c3 100644 (file)
@@ -203,7 +203,7 @@ build_occlusion_query_shader(struct radv_device *device) {
        nir_builder_instr_insert(&b, &src_buf->instr);
 
        nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
+       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
        nir_ssa_def *block_size = nir_imm_ivec4(&b,
                                                b.shader->info.cs.local_size[0],
                                                b.shader->info.cs.local_size[1],
@@ -395,7 +395,7 @@ build_pipeline_statistics_query_shader(struct radv_device *device) {
        nir_builder_instr_insert(&b, &src_buf->instr);
 
        nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
+       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
        nir_ssa_def *block_size = nir_imm_ivec4(&b,
                                                b.shader->info.cs.local_size[0],
                                                b.shader->info.cs.local_size[1],
@@ -635,7 +635,7 @@ build_tfb_query_shader(struct radv_device *device)
 
        /* Compute global ID. */
        nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
+       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
        nir_ssa_def *block_size = nir_imm_ivec4(&b,
                                                b.shader->info.cs.local_size[0],
                                                b.shader->info.cs.local_size[1],
@@ -837,7 +837,7 @@ build_timestamp_query_shader(struct radv_device *device)
 
        /* Compute global ID. */
        nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
+       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
        nir_ssa_def *block_size = nir_imm_ivec4(&b,
                                                b.shader->info.cs.local_size[0],
                                                b.shader->info.cs.local_size[1],
index 40c3949058594d52d7927c515c8b06b94acc85a1..693bd332a6678cb670a5c70be28a282f0d609931 100644 (file)
@@ -2121,6 +2121,8 @@ nir_intrinsic_from_system_value(gl_system_value val)
       return nir_intrinsic_load_local_group_size;
    case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
       return nir_intrinsic_load_global_invocation_id;
+   case SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID:
+      return nir_intrinsic_load_base_global_invocation_id;
    case SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX:
       return nir_intrinsic_load_global_invocation_index;
    case SYSTEM_VALUE_WORK_DIM:
@@ -2220,6 +2222,8 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
       return SYSTEM_VALUE_LOCAL_GROUP_SIZE;
    case nir_intrinsic_load_global_invocation_id:
       return SYSTEM_VALUE_GLOBAL_INVOCATION_ID;
+   case nir_intrinsic_load_base_global_invocation_id:
+      return SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID;
    case nir_intrinsic_load_global_invocation_index:
       return SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX;
    case nir_intrinsic_load_work_dim:
index da4f95c5ed3d63e13099ef54fa4770095cadf515..80c61650f78894159e192eff6615181949c2a9d7 100644 (file)
@@ -586,9 +586,13 @@ system_value("tess_level_inner_default", 2)
 system_value("patch_vertices_in", 1)
 system_value("local_invocation_id", 3)
 system_value("local_invocation_index", 1)
-system_value("work_group_id", 3)
+# zero_base indicates it starts from 0 for the current dispatch
+# non-zero_base indicates the base is included
+system_value("work_group_id", 3, bit_sizes=[32, 64])
+system_value("work_group_id_zero_base", 3)
+system_value("base_work_group_id", 3, bit_sizes=[32, 64])
 system_value("user_clip_plane", 4, indices=[UCP_ID])
-system_value("num_work_groups", 3)
+system_value("num_work_groups", 3, bit_sizes=[32, 64])
 system_value("helper_invocation", 1, bit_sizes=[1, 32])
 system_value("alpha_ref_float", 1)
 system_value("layer_id", 1)
@@ -603,7 +607,13 @@ system_value("subgroup_lt_mask", 0, bit_sizes=[32, 64])
 system_value("num_subgroups", 1)
 system_value("subgroup_id", 1)
 system_value("local_group_size", 3)
+# note: the definition of global_invocation_id_zero_base is based on
+# (work_group_id * local_group_size) + local_invocation_id.
+# it is *not* based on work_group_id_zero_base, meaning the work group
+# base is already accounted for, and the global base is additive on top of that
 system_value("global_invocation_id", 3, bit_sizes=[32, 64])
+system_value("global_invocation_id_zero_base", 3, bit_sizes=[32, 64])
+system_value("base_global_invocation_id", 3, bit_sizes=[32, 64])
 system_value("global_invocation_index", 1, bit_sizes=[32, 64])
 system_value("work_dim", 1)
 system_value("line_width", 1)
index 05cf5107d6ef256cd875463302e5016ccaac8890..92723a5bffbb096097135a1e4c079955e91e43dc 100644 (file)
@@ -44,9 +44,9 @@ static nir_ssa_def*
 build_global_group_size(nir_builder *b, unsigned bit_size)
 {
    nir_ssa_def *group_size = nir_load_local_group_size(b);
-   nir_ssa_def *num_work_groups = nir_load_num_work_groups(b);
+   nir_ssa_def *num_work_groups = nir_load_num_work_groups(b, bit_size);
    return nir_imul(b, nir_u2u(b, group_size, bit_size),
-                      nir_u2u(b, num_work_groups, bit_size));
+                      num_work_groups);
 }
 
 static bool
@@ -189,10 +189,10 @@ lower_system_value_instr(nir_builder *b, nir_instr *instr, void *_state)
 
    case nir_intrinsic_load_global_invocation_id: {
       nir_ssa_def *group_size = nir_load_local_group_size(b);
-      nir_ssa_def *group_id = nir_load_work_group_id(b);
+      nir_ssa_def *group_id = nir_load_work_group_id(b, bit_size);
       nir_ssa_def *local_id = nir_load_local_invocation_id(b);
 
-      return nir_iadd(b, nir_imul(b, nir_u2u(b, group_id, bit_size),
+      return nir_iadd(b, nir_imul(b, group_id,
                                      nir_u2u(b, group_size, bit_size)),
                          nir_u2u(b, local_id, bit_size));
    }
@@ -222,10 +222,6 @@ lower_system_value_instr(nir_builder *b, nir_instr *instr, void *_state)
          return NULL;
       }
 
-   case nir_intrinsic_load_num_work_groups:
-   case nir_intrinsic_load_work_group_id:
-      return sanitize_32bit_sysval(b, intrin);
-
    case nir_intrinsic_load_deref: {
       nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
       if (deref->mode != nir_var_system_value)
index a89b51383723b146d8f6e1f9c961fc7bbe9fa2a7..a91c44369e1485d156648955f61aa74e4432aaf8 100644 (file)
@@ -246,6 +246,7 @@ gl_system_value_name(gl_system_value sysval)
      ENUM(SYSTEM_VALUE_LOCAL_INVOCATION_ID),
      ENUM(SYSTEM_VALUE_LOCAL_INVOCATION_INDEX),
      ENUM(SYSTEM_VALUE_GLOBAL_INVOCATION_ID),
+     ENUM(SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID),
      ENUM(SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX),
      ENUM(SYSTEM_VALUE_WORK_GROUP_ID),
      ENUM(SYSTEM_VALUE_NUM_WORK_GROUPS),
index 4ef3512849f28e66a0e56f35dc0ff31cfb74eaee..e2b9cd6f02e8d425fb9bcb7107a98c3acfe7f8bc 100644 (file)
@@ -615,6 +615,7 @@ typedef enum
    SYSTEM_VALUE_LOCAL_INVOCATION_ID,
    SYSTEM_VALUE_LOCAL_INVOCATION_INDEX,
    SYSTEM_VALUE_GLOBAL_INVOCATION_ID,
+   SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID,
    SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX,
    SYSTEM_VALUE_WORK_GROUP_ID,
    SYSTEM_VALUE_NUM_WORK_GROUPS,
index 4d5d22f6b7486c1ccf5ebf49ae7f48e6d7e5e789..3035f5460a578bcc2b970243dd3d19ef6bdb92c8 100644 (file)
@@ -644,7 +644,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
          break;
       case TGSI_SEMANTIC_BLOCK_ID:
          op = nir_intrinsic_load_work_group_id;
-         load = nir_load_work_group_id(b);
+         load = nir_load_work_group_id(b, 32);
          break;
       case TGSI_SEMANTIC_BLOCK_SIZE:
          op = nir_intrinsic_load_local_group_size;