From e461926ef2793d5969604b75c72d365d46a58cd0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 19 Nov 2018 13:40:35 -0600 Subject: [PATCH] nir: Add load/store/atomic global intrinsics These correspond roughly to reading/writing OpenCL global pointers. The idea is that they just take a bare address and load/store from it. Of course, exactly what this address means is driver-dependent. Reviewed-by: Bas Nieuwenhuizen Reviewed-by: Karol Herbst --- src/compiler/nir/nir_intrinsics.py | 34 +++++++++++++++++++++ src/compiler/nir/nir_lower_io.c | 2 ++ src/compiler/nir/nir_lower_phis_to_scalar.c | 4 ++- 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index a5cc3f7401c..90d347f7331 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -458,6 +458,34 @@ intrinsic("shared_atomic_fmin", src_comp=[1, 1], dest_comp=1, indices=[BASE]) intrinsic("shared_atomic_fmax", src_comp=[1, 1], dest_comp=1, indices=[BASE]) intrinsic("shared_atomic_fcomp_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE]) +# Global atomic intrinsics +# +# All of the shared variable atomic memory operations read a value from +# memory, compute a new value using one of the operations below, write the +# new value to memory, and return the original value read. +# +# All operations take 2 sources except CompSwap that takes 3. These +# sources represent: +# +# 0: The memory address that the atomic operation will operate on. +# 1: The data parameter to the atomic function (i.e. the value to add +# in shared_atomic_add, etc). +# 2: For CompSwap only: the second data parameter. +intrinsic("global_atomic_add", src_comp=[1, 1], dest_comp=1, indices=[BASE]) +intrinsic("global_atomic_imin", src_comp=[1, 1], dest_comp=1, indices=[BASE]) +intrinsic("global_atomic_umin", src_comp=[1, 1], dest_comp=1, indices=[BASE]) +intrinsic("global_atomic_imax", src_comp=[1, 1], dest_comp=1, indices=[BASE]) +intrinsic("global_atomic_umax", src_comp=[1, 1], dest_comp=1, indices=[BASE]) +intrinsic("global_atomic_and", src_comp=[1, 1], dest_comp=1, indices=[BASE]) +intrinsic("global_atomic_or", src_comp=[1, 1], dest_comp=1, indices=[BASE]) +intrinsic("global_atomic_xor", src_comp=[1, 1], dest_comp=1, indices=[BASE]) +intrinsic("global_atomic_exchange", src_comp=[1, 1], dest_comp=1, indices=[BASE]) +intrinsic("global_atomic_comp_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE]) +intrinsic("global_atomic_fadd", src_comp=[1, 1], dest_comp=1, indices=[BASE]) +intrinsic("global_atomic_fmin", src_comp=[1, 1], dest_comp=1, indices=[BASE]) +intrinsic("global_atomic_fmax", src_comp=[1, 1], dest_comp=1, indices=[BASE]) +intrinsic("global_atomic_fcomp_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE]) + def system_value(name, dest_comp, indices=[], bit_sizes=[32]): intrinsic("load_" + name, [], dest_comp, indices, flags=[CAN_ELIMINATE, CAN_REORDER], sysval=True, @@ -590,6 +618,9 @@ load("shared", 1, [BASE, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE]) load("push_constant", 1, [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER]) # src[] = { offset }. const_index[] = { base, range } load("constant", 1, [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER]) +# src[] = { address }. +# const_index[] = { access, align_mul, align_offset } +load("global", 1, [ACCESS, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE]) # Stores work the same way as loads, except now the first source is the value # to store and the second (and possibly third) source specify where to store @@ -610,3 +641,6 @@ store("ssbo", 3, [WRMASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET]) # src[] = { value, offset }. # const_index[] = { base, write_mask, align_mul, align_offset } store("shared", 2, [BASE, WRMASK, ALIGN_MUL, ALIGN_OFFSET]) +# src[] = { value, address }. +# const_index[] = { write_mask, align_mul, align_offset } +store("global", 2, [WRMASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET]) diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index 06598e054a6..f07be78aa99 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -944,6 +944,7 @@ nir_get_io_offset_src(nir_intrinsic_instr *instr) case nir_intrinsic_load_output: case nir_intrinsic_load_shared: case nir_intrinsic_load_uniform: + case nir_intrinsic_load_global: return &instr->src[0]; case nir_intrinsic_load_ubo: case nir_intrinsic_load_ssbo: @@ -952,6 +953,7 @@ nir_get_io_offset_src(nir_intrinsic_instr *instr) case nir_intrinsic_load_interpolated_input: case nir_intrinsic_store_output: case nir_intrinsic_store_shared: + case nir_intrinsic_store_global: return &instr->src[1]; case nir_intrinsic_store_ssbo: case nir_intrinsic_store_per_vertex_output: diff --git a/src/compiler/nir/nir_lower_phis_to_scalar.c b/src/compiler/nir/nir_lower_phis_to_scalar.c index 41ae19e8391..16001f73685 100644 --- a/src/compiler/nir/nir_lower_phis_to_scalar.c +++ b/src/compiler/nir/nir_lower_phis_to_scalar.c @@ -88,7 +88,8 @@ is_phi_src_scalarizable(nir_phi_src *src, return deref->mode == nir_var_shader_in || deref->mode == nir_var_uniform || deref->mode == nir_var_mem_ubo || - deref->mode == nir_var_mem_ssbo; + deref->mode == nir_var_mem_ssbo || + deref->mode == nir_var_mem_global; } case nir_intrinsic_interp_deref_at_centroid: @@ -97,6 +98,7 @@ is_phi_src_scalarizable(nir_phi_src *src, case nir_intrinsic_load_uniform: case nir_intrinsic_load_ubo: case nir_intrinsic_load_ssbo: + case nir_intrinsic_load_global: case nir_intrinsic_load_input: return true; default: -- 2.30.2