nir: Add scoped_memory_barrier intrinsic
authorCaio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Thu, 18 Jul 2019 23:14:03 +0000 (16:14 -0700)
committerCaio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Thu, 24 Oct 2019 18:39:55 +0000 (11:39 -0700)
Add a NIR instrinsic that represent a memory barrier in SPIR-V /
Vulkan Memory Model, with extra attributes that describe the barrier:

- Ordering: whether is an Acquire or Release;
- "Cache control": availability ("ensure this gets written in the memory")
  and visibility ("ensure my cache is up to date when I'm reading");
- Variable modes: which memory types this barrier applies to;
- Scope: how far this barrier applies.

Note that unlike in SPIR-V, the "Storage Semantics" and the "Memory
Semantics" are split into two different attributes so we can use
variable modes for the former.

NIR passes that took barriers in consideration were also changed

- nir_opt_copy_prop_vars: clean up the values for the mode of an
  ACQUIRE barrier.  Copy propagation effect is to "pull up a load" (by
  not performing it), which is what ACQUIRE restricts.

- nir_opt_dead_write_vars and nir_opt_combine_writes: clean up the
  pending writes for the modes of an RELEASE barrier.  Dead writes
  effect is to "push down a store", which is what RELEASE restricts.

- nir_opt_access: treat the ACQUIRE and RELEASE as a full barrier for
  the modes.  This is conservative, but since this is a GL-specific
  pass, doesn't make a difference for now.

v2: Fix the scoped barrier handling in copy propagation.  (Jason)
    Add scoped barrier handling to nir_opt_access and
    nir_opt_combine_writes.  (Rhys)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/compiler/nir/nir.h
src/compiler/nir/nir_intrinsics.py
src/compiler/nir/nir_opt_access.c
src/compiler/nir/nir_opt_combine_stores.c
src/compiler/nir/nir_opt_copy_prop_vars.c
src/compiler/nir/nir_opt_dead_write_vars.c
src/compiler/nir/nir_print.c

index 6496ad92b11b7ab1052e8851c03df8097820c165..f5f9826a27c290feddc5538b7280173d9474c036 100644 (file)
@@ -1380,6 +1380,24 @@ nir_intrinsic_get_var(nir_intrinsic_instr *intrin, unsigned i)
    return nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[i]));
 }
 
+typedef enum {
+   /* Memory ordering. */
+   NIR_MEMORY_ACQUIRE        = 1 << 0,
+   NIR_MEMORY_RELEASE        = 1 << 1,
+
+   /* Memory visibility operations. */
+   NIR_MEMORY_MAKE_AVAILABLE = 1 << 3,
+   NIR_MEMORY_MAKE_VISIBLE   = 1 << 4,
+} nir_memory_semantics;
+
+typedef enum {
+   NIR_SCOPE_DEVICE,
+   NIR_SCOPE_QUEUE_FAMILY,
+   NIR_SCOPE_WORKGROUP,
+   NIR_SCOPE_SUBGROUP,
+   NIR_SCOPE_INVOCATION,
+} nir_scope;
+
 /**
  * \name NIR intrinsics semantic flags
  *
@@ -1529,6 +1547,21 @@ typedef enum {
    /* Driver location for nir_load_patch_location_ir3 */
    NIR_INTRINSIC_DRIVER_LOCATION,
 
+   /**
+    * Mask of nir_memory_semantics, includes ordering and visibility.
+    */
+   NIR_INTRINSIC_MEMORY_SEMANTICS,
+
+   /**
+    * Mask of nir_variable_modes affected by the memory operation.
+    */
+   NIR_INTRINSIC_MEMORY_MODES,
+
+   /**
+    * Value of nir_scope.
+    */
+   NIR_INTRINSIC_MEMORY_SCOPE,
+
    NIR_INTRINSIC_NUM_INDEX_FLAGS,
 
 } nir_intrinsic_index_flag;
@@ -1638,6 +1671,9 @@ INTRINSIC_IDX_ACCESSORS(desc_type, DESC_TYPE, unsigned)
 INTRINSIC_IDX_ACCESSORS(type, TYPE, nir_alu_type)
 INTRINSIC_IDX_ACCESSORS(swizzle_mask, SWIZZLE_MASK, unsigned)
 INTRINSIC_IDX_ACCESSORS(driver_location, DRIVER_LOCATION, unsigned)
+INTRINSIC_IDX_ACCESSORS(memory_semantics, MEMORY_SEMANTICS, nir_memory_semantics)
+INTRINSIC_IDX_ACCESSORS(memory_modes, MEMORY_MODES, nir_variable_mode)
+INTRINSIC_IDX_ACCESSORS(memory_scope, MEMORY_SCOPE, nir_scope)
 
 static inline void
 nir_intrinsic_set_align(nir_intrinsic_instr *intrin,
index a648995f7fda471c56c1b0b106798f4e8e69fdd6..02c781e7181a0171ed47996e3196761d27acd715 100644 (file)
@@ -126,6 +126,12 @@ TYPE = "NIR_INTRINSIC_TYPE"
 SWIZZLE_MASK = "NIR_INTRINSIC_SWIZZLE_MASK"
 # Driver location of attribute
 DRIVER_LOCATION = "NIR_INTRINSIC_DRIVER_LOCATION"
+# Ordering and visibility of a memory operation
+MEMORY_SEMANTICS = "NIR_INTRINSIC_MEMORY_SEMANTICS"
+# Modes affected by a memory operation
+MEMORY_MODES = "NIR_INTRINSIC_MEMORY_MODES"
+# Scope of a memory operation
+MEMORY_SCOPE = "NIR_INTRINSIC_MEMORY_SCOPE"
 
 #
 # Possible flags:
@@ -206,6 +212,12 @@ intrinsic("is_helper_invocation", dest_comp=1, flags=[CAN_ELIMINATE])
 # intrinsic.
 barrier("memory_barrier")
 
+# Memory barrier with explicit scope.  Follows the semantics of SPIR-V
+# OpMemoryBarrier, used to implement Vulkan Memory Model.  Storage that the
+# barrierr applies is represented using NIR variable modes.
+intrinsic("scoped_memory_barrier",
+          indices=[MEMORY_SEMANTICS, MEMORY_MODES, MEMORY_SCOPE])
+
 # Shader clock intrinsic with semantics analogous to the clock2x32ARB()
 # GLSL intrinsic.
 # The latter can be used as code motion barrier, which is currently not
index ec316a8947ecb06d1f6e6e7d28bf62f2dd880d2d..766f056568ab83b7ea9df0e870ab7ea2b52e6f36 100644 (file)
@@ -134,6 +134,16 @@ gather_intrinsic(struct access_state *state, nir_intrinsic_instr *instr)
       state->image_barriers = true;
       break;
 
+   case nir_intrinsic_scoped_memory_barrier:
+      /* TODO: Could be more granular if we had nir_var_mem_image. */
+      if (nir_intrinsic_memory_modes(instr) & (nir_var_mem_ubo |
+                                               nir_var_mem_ssbo |
+                                               nir_var_uniform)) {
+         state->buffer_barriers = true;
+         state->image_barriers = true;
+      }
+      break;
+
    default:
       break;
    }
index 48b9cfa15012bad50a68856b747ddb8f4b8234c5..b3e5cb3947fc8810912b173bc10f8158455fa6e7 100644 (file)
@@ -316,6 +316,13 @@ combine_stores_block(struct combine_stores_state *state, nir_block *block)
                                               nir_var_mem_shared);
          break;
 
+      case nir_intrinsic_scoped_memory_barrier:
+         if (nir_intrinsic_memory_semantics(intrin) & NIR_MEMORY_RELEASE) {
+            combine_stores_with_modes(state,
+                                      nir_intrinsic_memory_modes(intrin));
+         }
+         break;
+
       case nir_intrinsic_emit_vertex:
       case nir_intrinsic_emit_vertex_with_counter:
          combine_stores_with_modes(state, nir_var_shader_out);
index c65beb227562137be5a868cd883ecd07af00ecf3..c4544eac0f2bee7ec4a60c6e2ee7ccb640e58e43 100644 (file)
@@ -171,6 +171,11 @@ gather_vars_written(struct copy_prop_var_state *state,
                               nir_var_mem_shared;
             break;
 
+         case nir_intrinsic_scoped_memory_barrier:
+            if (nir_intrinsic_memory_semantics(intrin) & NIR_MEMORY_ACQUIRE)
+               written->modes |= nir_intrinsic_memory_modes(intrin);
+            break;
+
          case nir_intrinsic_emit_vertex:
          case nir_intrinsic_emit_vertex_with_counter:
             written->modes = nir_var_shader_out;
@@ -802,6 +807,13 @@ copy_prop_vars_block(struct copy_prop_var_state *state,
                                          nir_var_mem_shared);
          break;
 
+      case nir_intrinsic_scoped_memory_barrier:
+         if (debug) dump_instr(instr);
+
+         if (nir_intrinsic_memory_semantics(intrin) & NIR_MEMORY_ACQUIRE)
+            apply_barrier_for_modes(copies, nir_intrinsic_memory_modes(intrin));
+         break;
+
       case nir_intrinsic_emit_vertex:
       case nir_intrinsic_emit_vertex_with_counter:
          if (debug) dump_instr(instr);
index d2062a01ac038e49331159b760851b325c0193b9..201e0847ce945252f5c64c3baa6d2b042d7c0a1f 100644 (file)
@@ -139,6 +139,14 @@ remove_dead_write_vars_local(void *mem_ctx, nir_block *block)
          break;
       }
 
+      case nir_intrinsic_scoped_memory_barrier: {
+         if (nir_intrinsic_memory_semantics(intrin) & NIR_MEMORY_RELEASE) {
+            clear_unused_for_modes(&unused_writes,
+                                   nir_intrinsic_memory_modes(intrin));
+         }
+         break;
+      }
+
       case nir_intrinsic_emit_vertex:
       case nir_intrinsic_emit_vertex_with_counter: {
          clear_unused_for_modes(&unused_writes, nir_var_shader_out);
index 8408fa2ba741013f29f03d9f2f5336c60bad0e46..ca9dab79e9670df01fc930bdb196ed652ba07768 100644 (file)
@@ -801,6 +801,9 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
       [NIR_INTRINSIC_TYPE] = "type",
       [NIR_INTRINSIC_SWIZZLE_MASK] = "swizzle_mask",
       [NIR_INTRINSIC_DRIVER_LOCATION] = "driver_location",
+      [NIR_INTRINSIC_MEMORY_SEMANTICS] = "mem_semantics",
+      [NIR_INTRINSIC_MEMORY_MODES] = "mem_modes",
+      [NIR_INTRINSIC_MEMORY_SCOPE] = "mem_scope",
    };
    for (unsigned idx = 1; idx < NIR_INTRINSIC_NUM_INDEX_FLAGS; idx++) {
       if (!info->index_map[idx])
@@ -887,6 +890,42 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
          break;
       }
 
+      case NIR_INTRINSIC_MEMORY_SEMANTICS: {
+         nir_memory_semantics semantics = nir_intrinsic_memory_semantics(instr);
+         fprintf(fp, " mem_semantics=");
+         switch (semantics & (NIR_MEMORY_ACQUIRE | NIR_MEMORY_RELEASE)) {
+         case 0:                  fprintf(fp, "NONE");    break;
+         case NIR_MEMORY_ACQUIRE: fprintf(fp, "ACQ");     break;
+         case NIR_MEMORY_RELEASE: fprintf(fp, "REL");     break;
+         default:                 fprintf(fp, "ACQ|REL"); break;
+         }
+         if (semantics & (NIR_MEMORY_MAKE_AVAILABLE)) fprintf(fp, "|AVAILABLE");
+         if (semantics & (NIR_MEMORY_MAKE_VISIBLE))   fprintf(fp, "|VISIBLE");
+         break;
+      }
+
+      case NIR_INTRINSIC_MEMORY_MODES: {
+         fprintf(fp, " mem_modes=");
+         unsigned int modes = nir_intrinsic_memory_modes(instr);
+         while (modes) {
+            nir_variable_mode m = u_bit_scan(&modes);
+            fprintf(fp, "%s%s", get_variable_mode_str(1 << m, true), modes ? "|" : "");
+         }
+         break;
+      }
+
+      case NIR_INTRINSIC_MEMORY_SCOPE: {
+         fprintf(fp, " mem_scope=");
+         switch (nir_intrinsic_memory_scope(instr)) {
+         case NIR_SCOPE_DEVICE:       fprintf(fp, "DEVICE");       break;
+         case NIR_SCOPE_QUEUE_FAMILY: fprintf(fp, "QUEUE_FAMILY"); break;
+         case NIR_SCOPE_WORKGROUP:    fprintf(fp, "WORKGROUP");    break;
+         case NIR_SCOPE_SUBGROUP:     fprintf(fp, "SUBGROUP");     break;
+         case NIR_SCOPE_INVOCATION:   fprintf(fp, "INVOCATION");   break;
+         }
+         break;
+      }
+
       default: {
          unsigned off = info->index_map[idx] - 1;
          assert(index_name[idx]);  /* forgot to update index_name table? */