From 73572abc2a7a15bc16d2e7e4df6aa6096fd1985b Mon Sep 17 00:00:00 2001 From: Caio Marcelo de Oliveira Filho Date: Thu, 18 Jul 2019 16:14:03 -0700 Subject: [PATCH] nir: Add scoped_memory_barrier intrinsic Add a NIR instrinsic that represent a memory barrier in SPIR-V / Vulkan Memory Model, with extra attributes that describe the barrier: - Ordering: whether is an Acquire or Release; - "Cache control": availability ("ensure this gets written in the memory") and visibility ("ensure my cache is up to date when I'm reading"); - Variable modes: which memory types this barrier applies to; - Scope: how far this barrier applies. Note that unlike in SPIR-V, the "Storage Semantics" and the "Memory Semantics" are split into two different attributes so we can use variable modes for the former. NIR passes that took barriers in consideration were also changed - nir_opt_copy_prop_vars: clean up the values for the mode of an ACQUIRE barrier. Copy propagation effect is to "pull up a load" (by not performing it), which is what ACQUIRE restricts. - nir_opt_dead_write_vars and nir_opt_combine_writes: clean up the pending writes for the modes of an RELEASE barrier. Dead writes effect is to "push down a store", which is what RELEASE restricts. - nir_opt_access: treat the ACQUIRE and RELEASE as a full barrier for the modes. This is conservative, but since this is a GL-specific pass, doesn't make a difference for now. v2: Fix the scoped barrier handling in copy propagation. (Jason) Add scoped barrier handling to nir_opt_access and nir_opt_combine_writes. (Rhys) Reviewed-by: Jason Ekstrand Reviewed-by: Bas Nieuwenhuizen --- src/compiler/nir/nir.h | 36 ++++++++++++++++++++ src/compiler/nir/nir_intrinsics.py | 12 +++++++ src/compiler/nir/nir_opt_access.c | 10 ++++++ src/compiler/nir/nir_opt_combine_stores.c | 7 ++++ src/compiler/nir/nir_opt_copy_prop_vars.c | 12 +++++++ src/compiler/nir/nir_opt_dead_write_vars.c | 8 +++++ src/compiler/nir/nir_print.c | 39 ++++++++++++++++++++++ 7 files changed, 124 insertions(+) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 6496ad92b11..f5f9826a27c 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1380,6 +1380,24 @@ nir_intrinsic_get_var(nir_intrinsic_instr *intrin, unsigned i) return nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[i])); } +typedef enum { + /* Memory ordering. */ + NIR_MEMORY_ACQUIRE = 1 << 0, + NIR_MEMORY_RELEASE = 1 << 1, + + /* Memory visibility operations. */ + NIR_MEMORY_MAKE_AVAILABLE = 1 << 3, + NIR_MEMORY_MAKE_VISIBLE = 1 << 4, +} nir_memory_semantics; + +typedef enum { + NIR_SCOPE_DEVICE, + NIR_SCOPE_QUEUE_FAMILY, + NIR_SCOPE_WORKGROUP, + NIR_SCOPE_SUBGROUP, + NIR_SCOPE_INVOCATION, +} nir_scope; + /** * \name NIR intrinsics semantic flags * @@ -1529,6 +1547,21 @@ typedef enum { /* Driver location for nir_load_patch_location_ir3 */ NIR_INTRINSIC_DRIVER_LOCATION, + /** + * Mask of nir_memory_semantics, includes ordering and visibility. + */ + NIR_INTRINSIC_MEMORY_SEMANTICS, + + /** + * Mask of nir_variable_modes affected by the memory operation. + */ + NIR_INTRINSIC_MEMORY_MODES, + + /** + * Value of nir_scope. + */ + NIR_INTRINSIC_MEMORY_SCOPE, + NIR_INTRINSIC_NUM_INDEX_FLAGS, } nir_intrinsic_index_flag; @@ -1638,6 +1671,9 @@ INTRINSIC_IDX_ACCESSORS(desc_type, DESC_TYPE, unsigned) INTRINSIC_IDX_ACCESSORS(type, TYPE, nir_alu_type) INTRINSIC_IDX_ACCESSORS(swizzle_mask, SWIZZLE_MASK, unsigned) INTRINSIC_IDX_ACCESSORS(driver_location, DRIVER_LOCATION, unsigned) +INTRINSIC_IDX_ACCESSORS(memory_semantics, MEMORY_SEMANTICS, nir_memory_semantics) +INTRINSIC_IDX_ACCESSORS(memory_modes, MEMORY_MODES, nir_variable_mode) +INTRINSIC_IDX_ACCESSORS(memory_scope, MEMORY_SCOPE, nir_scope) static inline void nir_intrinsic_set_align(nir_intrinsic_instr *intrin, diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index a648995f7fd..02c781e7181 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -126,6 +126,12 @@ TYPE = "NIR_INTRINSIC_TYPE" SWIZZLE_MASK = "NIR_INTRINSIC_SWIZZLE_MASK" # Driver location of attribute DRIVER_LOCATION = "NIR_INTRINSIC_DRIVER_LOCATION" +# Ordering and visibility of a memory operation +MEMORY_SEMANTICS = "NIR_INTRINSIC_MEMORY_SEMANTICS" +# Modes affected by a memory operation +MEMORY_MODES = "NIR_INTRINSIC_MEMORY_MODES" +# Scope of a memory operation +MEMORY_SCOPE = "NIR_INTRINSIC_MEMORY_SCOPE" # # Possible flags: @@ -206,6 +212,12 @@ intrinsic("is_helper_invocation", dest_comp=1, flags=[CAN_ELIMINATE]) # intrinsic. barrier("memory_barrier") +# Memory barrier with explicit scope. Follows the semantics of SPIR-V +# OpMemoryBarrier, used to implement Vulkan Memory Model. Storage that the +# barrierr applies is represented using NIR variable modes. +intrinsic("scoped_memory_barrier", + indices=[MEMORY_SEMANTICS, MEMORY_MODES, MEMORY_SCOPE]) + # Shader clock intrinsic with semantics analogous to the clock2x32ARB() # GLSL intrinsic. # The latter can be used as code motion barrier, which is currently not diff --git a/src/compiler/nir/nir_opt_access.c b/src/compiler/nir/nir_opt_access.c index ec316a8947e..766f056568a 100644 --- a/src/compiler/nir/nir_opt_access.c +++ b/src/compiler/nir/nir_opt_access.c @@ -134,6 +134,16 @@ gather_intrinsic(struct access_state *state, nir_intrinsic_instr *instr) state->image_barriers = true; break; + case nir_intrinsic_scoped_memory_barrier: + /* TODO: Could be more granular if we had nir_var_mem_image. */ + if (nir_intrinsic_memory_modes(instr) & (nir_var_mem_ubo | + nir_var_mem_ssbo | + nir_var_uniform)) { + state->buffer_barriers = true; + state->image_barriers = true; + } + break; + default: break; } diff --git a/src/compiler/nir/nir_opt_combine_stores.c b/src/compiler/nir/nir_opt_combine_stores.c index 48b9cfa1501..b3e5cb3947f 100644 --- a/src/compiler/nir/nir_opt_combine_stores.c +++ b/src/compiler/nir/nir_opt_combine_stores.c @@ -316,6 +316,13 @@ combine_stores_block(struct combine_stores_state *state, nir_block *block) nir_var_mem_shared); break; + case nir_intrinsic_scoped_memory_barrier: + if (nir_intrinsic_memory_semantics(intrin) & NIR_MEMORY_RELEASE) { + combine_stores_with_modes(state, + nir_intrinsic_memory_modes(intrin)); + } + break; + case nir_intrinsic_emit_vertex: case nir_intrinsic_emit_vertex_with_counter: combine_stores_with_modes(state, nir_var_shader_out); diff --git a/src/compiler/nir/nir_opt_copy_prop_vars.c b/src/compiler/nir/nir_opt_copy_prop_vars.c index c65beb22756..c4544eac0f2 100644 --- a/src/compiler/nir/nir_opt_copy_prop_vars.c +++ b/src/compiler/nir/nir_opt_copy_prop_vars.c @@ -171,6 +171,11 @@ gather_vars_written(struct copy_prop_var_state *state, nir_var_mem_shared; break; + case nir_intrinsic_scoped_memory_barrier: + if (nir_intrinsic_memory_semantics(intrin) & NIR_MEMORY_ACQUIRE) + written->modes |= nir_intrinsic_memory_modes(intrin); + break; + case nir_intrinsic_emit_vertex: case nir_intrinsic_emit_vertex_with_counter: written->modes = nir_var_shader_out; @@ -802,6 +807,13 @@ copy_prop_vars_block(struct copy_prop_var_state *state, nir_var_mem_shared); break; + case nir_intrinsic_scoped_memory_barrier: + if (debug) dump_instr(instr); + + if (nir_intrinsic_memory_semantics(intrin) & NIR_MEMORY_ACQUIRE) + apply_barrier_for_modes(copies, nir_intrinsic_memory_modes(intrin)); + break; + case nir_intrinsic_emit_vertex: case nir_intrinsic_emit_vertex_with_counter: if (debug) dump_instr(instr); diff --git a/src/compiler/nir/nir_opt_dead_write_vars.c b/src/compiler/nir/nir_opt_dead_write_vars.c index d2062a01ac0..201e0847ce9 100644 --- a/src/compiler/nir/nir_opt_dead_write_vars.c +++ b/src/compiler/nir/nir_opt_dead_write_vars.c @@ -139,6 +139,14 @@ remove_dead_write_vars_local(void *mem_ctx, nir_block *block) break; } + case nir_intrinsic_scoped_memory_barrier: { + if (nir_intrinsic_memory_semantics(intrin) & NIR_MEMORY_RELEASE) { + clear_unused_for_modes(&unused_writes, + nir_intrinsic_memory_modes(intrin)); + } + break; + } + case nir_intrinsic_emit_vertex: case nir_intrinsic_emit_vertex_with_counter: { clear_unused_for_modes(&unused_writes, nir_var_shader_out); diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index 8408fa2ba74..ca9dab79e96 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -801,6 +801,9 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state) [NIR_INTRINSIC_TYPE] = "type", [NIR_INTRINSIC_SWIZZLE_MASK] = "swizzle_mask", [NIR_INTRINSIC_DRIVER_LOCATION] = "driver_location", + [NIR_INTRINSIC_MEMORY_SEMANTICS] = "mem_semantics", + [NIR_INTRINSIC_MEMORY_MODES] = "mem_modes", + [NIR_INTRINSIC_MEMORY_SCOPE] = "mem_scope", }; for (unsigned idx = 1; idx < NIR_INTRINSIC_NUM_INDEX_FLAGS; idx++) { if (!info->index_map[idx]) @@ -887,6 +890,42 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state) break; } + case NIR_INTRINSIC_MEMORY_SEMANTICS: { + nir_memory_semantics semantics = nir_intrinsic_memory_semantics(instr); + fprintf(fp, " mem_semantics="); + switch (semantics & (NIR_MEMORY_ACQUIRE | NIR_MEMORY_RELEASE)) { + case 0: fprintf(fp, "NONE"); break; + case NIR_MEMORY_ACQUIRE: fprintf(fp, "ACQ"); break; + case NIR_MEMORY_RELEASE: fprintf(fp, "REL"); break; + default: fprintf(fp, "ACQ|REL"); break; + } + if (semantics & (NIR_MEMORY_MAKE_AVAILABLE)) fprintf(fp, "|AVAILABLE"); + if (semantics & (NIR_MEMORY_MAKE_VISIBLE)) fprintf(fp, "|VISIBLE"); + break; + } + + case NIR_INTRINSIC_MEMORY_MODES: { + fprintf(fp, " mem_modes="); + unsigned int modes = nir_intrinsic_memory_modes(instr); + while (modes) { + nir_variable_mode m = u_bit_scan(&modes); + fprintf(fp, "%s%s", get_variable_mode_str(1 << m, true), modes ? "|" : ""); + } + break; + } + + case NIR_INTRINSIC_MEMORY_SCOPE: { + fprintf(fp, " mem_scope="); + switch (nir_intrinsic_memory_scope(instr)) { + case NIR_SCOPE_DEVICE: fprintf(fp, "DEVICE"); break; + case NIR_SCOPE_QUEUE_FAMILY: fprintf(fp, "QUEUE_FAMILY"); break; + case NIR_SCOPE_WORKGROUP: fprintf(fp, "WORKGROUP"); break; + case NIR_SCOPE_SUBGROUP: fprintf(fp, "SUBGROUP"); break; + case NIR_SCOPE_INVOCATION: fprintf(fp, "INVOCATION"); break; + } + break; + } + default: { unsigned off = info->index_map[idx] - 1; assert(index_name[idx]); /* forgot to update index_name table? */ -- 2.30.2