nir: Add a pass for selectively lowering variables to scratch space
authorJason Ekstrand <jason.ekstrand@intel.com>
Fri, 2 Dec 2016 19:36:42 +0000 (11:36 -0800)
committerEric Anholt <eric@anholt.net>
Fri, 12 Apr 2019 22:59:31 +0000 (15:59 -0700)
This commit adds new nir_load/store_scratch opcodes which read and write
a virtual scratch space.  It's up to the back-end to figure out what to
do with it and where to put the actual scratch data.

v2: Drop const_index comments (by anholt)

Reviewed-by: Eric Anholt <eric@anholt.net>
src/compiler/Makefile.sources
src/compiler/nir/meson.build
src/compiler/nir/nir.h
src/compiler/nir/nir_clone.c
src/compiler/nir/nir_intrinsics.py
src/compiler/nir/nir_lower_io.c
src/compiler/nir/nir_lower_scratch.c [new file with mode: 0644]
src/compiler/nir/nir_print.c
src/compiler/nir/nir_serialize.c

index d201ea5855c7d614052629b7630b05addd0e550a..5737a827daa843592f85536ae10fd6fde5e95482 100644 (file)
@@ -263,6 +263,7 @@ NIR_FILES = \
        nir/nir_lower_phis_to_scalar.c \
        nir/nir_lower_regs_to_ssa.c \
        nir/nir_lower_returns.c \
+       nir/nir_lower_scratch.c \
        nir/nir_lower_subgroups.c \
        nir/nir_lower_system_values.c \
        nir/nir_lower_tex.c \
index 54655f7cd7c18f526b12d463d004f46c28cb721d..4e5039e28e001021e98d0e16178757088741ca44 100644 (file)
@@ -143,6 +143,7 @@ files_libnir = files(
   'nir_lower_phis_to_scalar.c',
   'nir_lower_regs_to_ssa.c',
   'nir_lower_returns.c',
+  'nir_lower_scratch.c',
   'nir_lower_subgroups.c',
   'nir_lower_system_values.c',
   'nir_lower_tex.c',
index 0f110dd959fe6634782ad9bd4f17f85bae999faf..91cad8256536e6ff24658e0bf1bd1361185f7b9e 100644 (file)
@@ -2359,6 +2359,9 @@ typedef struct nir_shader {
     */
    unsigned num_inputs, num_uniforms, num_outputs, num_shared;
 
+   /** Size in bytes of required scratch space */
+   unsigned scratch_size;
+
    /** Constant data associated with this shader.
     *
     * Constant data is loaded through load_constant intrinsics.  See also
@@ -3012,6 +3015,11 @@ void nir_lower_io_to_temporaries(nir_shader *shader,
                                  nir_function_impl *entrypoint,
                                  bool outputs, bool inputs);
 
+bool nir_lower_vars_to_scratch(nir_shader *shader,
+                               nir_variable_mode modes,
+                               int size_threshold,
+                               glsl_type_size_align_func size_align);
+
 void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint);
 
 void nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
index a45a581bd05fcc8e352fda5ea5f99d3460c490f8..1baa60b2fe5b2021729e0cd20785e769fbe811d8 100644 (file)
@@ -735,6 +735,7 @@ nir_shader_clone(void *mem_ctx, const nir_shader *s)
    ns->num_uniforms = s->num_uniforms;
    ns->num_outputs = s->num_outputs;
    ns->num_shared = s->num_shared;
+   ns->scratch_size = s->scratch_size;
 
    ns->constant_data_size = s->constant_data_size;
    if (s->constant_data_size > 0) {
index 9b3f480f7af150e8fbf1b8508911b224a1d1ad5c..bf06f8385a59b5d234bf94b81087ffca653f6922 100644 (file)
@@ -654,6 +654,8 @@ load("constant", 1, [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER])
 load("global", 1, [ACCESS, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
 # src[] = { address }.
 load("kernel_input", 1, [BASE, RANGE, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE, CAN_REORDER])
+# src[] = { offset }.
+load("scratch", 1, [ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
 
 # Stores work the same way as loads, except now the first source is the value
 # to store and the second (and possibly third) source specify where to store
@@ -673,7 +675,8 @@ store("ssbo", 3, [WRMASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET])
 store("shared", 2, [BASE, WRMASK, ALIGN_MUL, ALIGN_OFFSET])
 # src[] = { value, address }.
 store("global", 2, [WRMASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET])
-
+# src[] = { value, offset }.
+store("scratch", 2, [ALIGN_MUL, ALIGN_OFFSET, WRMASK])
 
 # IR3-specific version of most SSBO intrinsics. The only different
 # compare to the originals is that they add an extra source to hold
index 5f18f1df4455469b00b85b26db3e7b8ea645e5f9..331ecc08324ed88e52d5fdc3f6cecbd2c12e5ede 100644 (file)
@@ -1178,6 +1178,7 @@ nir_get_io_offset_src(nir_intrinsic_instr *instr)
    case nir_intrinsic_load_shared:
    case nir_intrinsic_load_uniform:
    case nir_intrinsic_load_global:
+   case nir_intrinsic_load_scratch:
       return &instr->src[0];
    case nir_intrinsic_load_ubo:
    case nir_intrinsic_load_ssbo:
@@ -1187,6 +1188,7 @@ nir_get_io_offset_src(nir_intrinsic_instr *instr)
    case nir_intrinsic_store_output:
    case nir_intrinsic_store_shared:
    case nir_intrinsic_store_global:
+   case nir_intrinsic_store_scratch:
       return &instr->src[1];
    case nir_intrinsic_store_ssbo:
    case nir_intrinsic_store_per_vertex_output:
diff --git a/src/compiler/nir/nir_lower_scratch.c b/src/compiler/nir/nir_lower_scratch.c
new file mode 100644 (file)
index 0000000..df0d3f4
--- /dev/null
@@ -0,0 +1,195 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+/*
+ * This lowering pass converts references to variables with loads/stores to
+ * scratch space based on a few configurable parameters.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+#include "nir_deref.h"
+
+static bool
+deref_has_indirect(nir_deref_instr *deref)
+{
+   while (deref->deref_type != nir_deref_type_var) {
+      if (deref->deref_type == nir_deref_type_array &&
+          nir_src_as_const_value(deref->arr.index) == NULL)
+         return true;
+
+      deref = nir_deref_instr_parent(deref);
+   }
+
+   return false;
+}
+
+static void
+lower_load_store(nir_builder *b,
+                 nir_intrinsic_instr *intrin,
+                 glsl_type_size_align_func size_align)
+{
+   b->cursor = nir_before_instr(&intrin->instr);
+
+   nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+   nir_variable *var = nir_deref_instr_get_variable(deref);
+
+   nir_ssa_def *offset =
+      nir_iadd_imm(b, nir_build_deref_offset(b, deref, size_align),
+                      var->data.location);
+
+   unsigned align, UNUSED size;
+   size_align(deref->type, &size, &align);
+
+   if (intrin->intrinsic == nir_intrinsic_load_deref) {
+      nir_intrinsic_instr *load =
+         nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_scratch);
+      load->num_components = intrin->num_components;
+      load->src[0] = nir_src_for_ssa(offset);
+      nir_intrinsic_set_align(load, align, 0);
+      nir_ssa_dest_init(&load->instr, &load->dest,
+                        intrin->dest.ssa.num_components,
+                        intrin->dest.ssa.bit_size, NULL);
+      nir_builder_instr_insert(b, &load->instr);
+
+      nir_ssa_def *value = &load->dest.ssa;
+      if (glsl_type_is_boolean(deref->type))
+         value = nir_b2i32(b, value);
+
+      nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+                               nir_src_for_ssa(&load->dest.ssa));
+   } else {
+      assert(intrin->intrinsic == nir_intrinsic_store_deref);
+
+      assert(intrin->src[1].is_ssa);
+      nir_ssa_def *value = intrin->src[1].ssa;
+      if (glsl_type_is_boolean(deref->type))
+         value = nir_i2b(b, value);
+
+      nir_intrinsic_instr *store =
+         nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_scratch);
+      store->num_components = intrin->num_components;
+      store->src[0] = nir_src_for_ssa(value);
+      store->src[1] = nir_src_for_ssa(offset);
+      nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intrin));
+      nir_intrinsic_set_align(store, align, 0);
+      nir_builder_instr_insert(b, &store->instr);
+   }
+
+   nir_instr_remove(&intrin->instr);
+   nir_deref_instr_remove_if_unused(deref);
+}
+
+bool
+nir_lower_vars_to_scratch(nir_shader *shader,
+                          nir_variable_mode modes,
+                          int size_threshold,
+                          glsl_type_size_align_func size_align)
+{
+   /* First, we walk the instructions and flag any variables we want to lower
+    * by removing them from their respective list and setting the mode to 0.
+    */
+   nir_foreach_function(function, shader) {
+      nir_foreach_block(block, function->impl) {
+         nir_foreach_instr(instr, block) {
+            if (instr->type != nir_instr_type_intrinsic)
+               continue;
+
+            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+            if (intrin->intrinsic != nir_intrinsic_load_deref &&
+                intrin->intrinsic != nir_intrinsic_store_deref)
+               continue;
+
+            nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+            if (!(deref->mode & modes))
+               continue;
+
+            if (!deref_has_indirect(nir_src_as_deref(intrin->src[0])))
+               continue;
+
+            nir_variable *var = nir_deref_instr_get_variable(deref);
+
+            /* We set var->mode to 0 to indicate that a variable will be moved
+             * to scratch.  Don't assign a scratch location twice.
+             */
+            if (var->data.mode == 0)
+               continue;
+
+            unsigned var_size, var_align;
+            size_align(var->type, &var_size, &var_align);
+            if (var_size <= size_threshold)
+               continue;
+
+            /* Remove it from its list */
+            exec_node_remove(&var->node);
+            /* Invalid mode used to flag "moving to scratch" */
+            var->data.mode = 0;
+
+            var->data.location = ALIGN_POT(shader->scratch_size, var_align);
+            shader->scratch_size = var->data.location + var_size;
+         }
+      }
+   }
+
+   bool progress = false;
+   nir_foreach_function(function, shader) {
+      if (!function->impl)
+         continue;
+
+      nir_builder build;
+      nir_builder_init(&build, function->impl);
+
+      bool impl_progress = false;
+      nir_foreach_block(block, function->impl) {
+         nir_foreach_instr_safe(instr, block) {
+            if (instr->type != nir_instr_type_intrinsic)
+               continue;
+
+            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+            if (intrin->intrinsic != nir_intrinsic_load_deref &&
+                intrin->intrinsic != nir_intrinsic_store_deref)
+               continue;
+
+            nir_variable *var = nir_intrinsic_get_var(intrin, 0);
+            /* Variables flagged for lowering above have mode == 0 */
+            if (!var || var->data.mode)
+               continue;
+
+            lower_load_store(&build, intrin, size_align);
+            impl_progress = true;
+         }
+      }
+
+      if (impl_progress) {
+         progress = true;
+         nir_metadata_preserve(function->impl, nir_metadata_block_index |
+                                               nir_metadata_dominance);
+      }
+   }
+
+   return progress;
+}
index bab422329920cd63fc87a4441e1e170581ef5c84..42053dc2d8cf1f38d2e8b89e7b6634414bfce578 100644 (file)
@@ -1376,6 +1376,8 @@ nir_print_shader_annotated(nir_shader *shader, FILE *fp,
    fprintf(fp, "outputs: %u\n", shader->num_outputs);
    fprintf(fp, "uniforms: %u\n", shader->num_uniforms);
    fprintf(fp, "shared: %u\n", shader->num_shared);
+   if (shader->scratch_size)
+      fprintf(fp, "scratch: %u\n", shader->scratch_size);
 
    nir_foreach_variable(var, &shader->uniforms) {
       print_var_decl(var, &state);
index 324c0a154b3b10f020c328d0d831ff8f1db35166..fe74603115a0b27431e3a330e05fef2161337a17 100644 (file)
@@ -1122,6 +1122,7 @@ nir_serialize(struct blob *blob, const nir_shader *nir)
    blob_write_uint32(blob, nir->num_uniforms);
    blob_write_uint32(blob, nir->num_outputs);
    blob_write_uint32(blob, nir->num_shared);
+   blob_write_uint32(blob, nir->scratch_size);
 
    blob_write_uint32(blob, exec_list_length(&nir->functions));
    nir_foreach_function(fxn, nir) {
@@ -1179,6 +1180,7 @@ nir_deserialize(void *mem_ctx,
    ctx.nir->num_uniforms = blob_read_uint32(blob);
    ctx.nir->num_outputs = blob_read_uint32(blob);
    ctx.nir->num_shared = blob_read_uint32(blob);
+   ctx.nir->scratch_size = blob_read_uint32(blob);
 
    unsigned num_functions = blob_read_uint32(blob);
    for (unsigned i = 0; i < num_functions; i++)