nir/lower_io: Add support for global scratch addressing
[mesa.git] / src / compiler / nir / nir_lower_io.c
index 4c5479622b27fad670623ad179532668cd3c9ee5..7e328e7ae552e8592ce2e1ee2bb32a3586f39a42 100644 (file)
@@ -659,6 +659,15 @@ nir_lower_io_impl(nir_function_impl *impl,
    return progress;
 }
 
+/** Lower load/store_deref intrinsics on I/O variables to offset-based intrinsics
+ *
+ * This pass is intended to be used for cross-stage shader I/O and driver-
+ * managed uniforms to turn deref-based access into a simpler model using
+ * locations or offsets.  For fragment shader inputs, it can optionally turn
+ * load_deref into an explicit interpolation using barycentrics coming from
+ * one of the load_barycentric_* intrinsics.  This pass requires that all
+ * deref chains are complete and contain no casts.
+ */
 bool
 nir_lower_io(nir_shader *shader, nir_variable_mode modes,
              int (*type_size)(const struct glsl_type *, bool),
@@ -766,6 +775,12 @@ addr_format_is_global(nir_address_format addr_format)
           addr_format == nir_address_format_64bit_bounded_global;
 }
 
+static bool
+addr_format_is_offset(nir_address_format addr_format)
+{
+   return addr_format == nir_address_format_32bit_offset;
+}
+
 static nir_ssa_def *
 addr_to_global(nir_builder *b, nir_ssa_def *addr,
                nir_address_format addr_format)
@@ -834,9 +849,18 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
       op = nir_intrinsic_load_kernel_input;
       break;
    case nir_var_mem_shared:
-      assert(addr_format == nir_address_format_32bit_offset);
+      assert(addr_format_is_offset(addr_format));
       op = nir_intrinsic_load_shared;
       break;
+   case nir_var_shader_temp:
+   case nir_var_function_temp:
+      if (addr_format_is_offset(addr_format)) {
+         op = nir_intrinsic_load_scratch;
+      } else {
+         assert(addr_format_is_global(addr_format));
+         op = nir_intrinsic_load_global;
+      }
+      break;
    default:
       unreachable("Unsupported explicit IO variable mode");
    }
@@ -853,7 +877,7 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
       load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
    }
 
-   if (mode != nir_var_shader_in && mode != nir_var_mem_shared)
+   if (nir_intrinsic_infos[op].index_map[NIR_INTRINSIC_ACCESS] > 0)
       nir_intrinsic_set_access(load, nir_intrinsic_access(intrin));
 
    unsigned bit_size = intrin->dest.ssa.bit_size;
@@ -900,7 +924,9 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
        * standard encoding for booleans rather than forcing a 0/1 boolean.
        * This should save an instruction or two.
        */
-      if (mode == nir_var_mem_shared)
+      if (mode == nir_var_mem_shared ||
+          mode == nir_var_shader_temp ||
+          mode == nir_var_function_temp)
          result = nir_b2b1(b, result);
       else
          result = nir_i2b(b, result);
@@ -929,9 +955,18 @@ build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
       op = nir_intrinsic_store_global;
       break;
    case nir_var_mem_shared:
-      assert(addr_format == nir_address_format_32bit_offset);
+      assert(addr_format_is_offset(addr_format));
       op = nir_intrinsic_store_shared;
       break;
+   case nir_var_shader_temp:
+   case nir_var_function_temp:
+      if (addr_format_is_offset(addr_format)) {
+         op = nir_intrinsic_store_scratch;
+      } else {
+         assert(addr_format_is_global(addr_format));
+         op = nir_intrinsic_store_global;
+      }
+      break;
    default:
       unreachable("Unsupported explicit IO variable mode");
    }
@@ -945,7 +980,9 @@ build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
        *
        * TODO: Make the native bool bit_size an option.
        */
-      if (mode == nir_var_mem_shared)
+      if (mode == nir_var_mem_shared ||
+          mode == nir_var_shader_temp ||
+          mode == nir_var_function_temp)
          value = nir_b2b32(b, value);
       else
          value = nir_b2i(b, value, 32);
@@ -964,7 +1001,7 @@ build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
 
    nir_intrinsic_set_write_mask(store, write_mask);
 
-   if (mode != nir_var_mem_shared)
+   if (nir_intrinsic_infos[op].index_map[NIR_INTRINSIC_ACCESS] > 0)
       nir_intrinsic_set_access(store, nir_intrinsic_access(intrin));
 
    /* TODO: We should try and provide a better alignment.  For OpenCL, we need
@@ -1037,7 +1074,7 @@ build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
    /* Global atomics don't have access flags because they assume that the
     * address may be non-uniform.
     */
-   if (!addr_format_is_global(addr_format) && mode != nir_var_mem_shared)
+   if (nir_intrinsic_infos[op].index_map[NIR_INTRINSIC_ACCESS] > 0)
       nir_intrinsic_set_access(atomic, nir_intrinsic_access(intrin));
 
    assert(intrin->dest.ssa.num_components == 1);
@@ -1069,9 +1106,20 @@ nir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref,
    assert(deref->dest.is_ssa);
    switch (deref->deref_type) {
    case nir_deref_type_var:
-      assert(deref->mode & (nir_var_shader_in | nir_var_mem_shared));
-      return nir_imm_intN_t(b, deref->var->data.driver_location,
-                            deref->dest.ssa.bit_size);
+      assert(deref->mode & (nir_var_shader_in | nir_var_mem_shared |
+                            nir_var_shader_temp | nir_var_function_temp));
+      if (addr_format_is_global(addr_format)) {
+         assert(nir_var_shader_temp | nir_var_function_temp);
+         base_addr =
+            nir_load_scratch_base_ptr(b, !(deref->mode & nir_var_shader_temp),
+                                      nir_address_format_num_components(addr_format),
+                                      nir_address_format_bit_size(addr_format));
+         return build_addr_iadd_imm(b, base_addr, addr_format,
+                                       deref->var->data.driver_location);
+      } else {
+         return nir_imm_intN_t(b, deref->var->data.driver_location,
+                               deref->dest.ssa.bit_size);
+      }
 
    case nir_deref_type_array: {
       nir_deref_instr *parent = nir_deref_instr_parent(deref);
@@ -1328,6 +1376,29 @@ nir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes,
    return progress;
 }
 
+/** Lower explicitly laid out I/O access to byte offset/address intrinsics
+ *
+ * This pass is intended to be used for any I/O which touches memory external
+ * to the shader or which is directly visible to the client.  It requires that
+ * all data types in the given modes have a explicit stride/offset decorations
+ * to tell it exactly how to calculate the offset/address for the given load,
+ * store, or atomic operation.  If the offset/stride information does not come
+ * from the client explicitly (as with shared variables in GL or Vulkan),
+ * nir_lower_vars_to_explicit_types() can be used to add them.
+ *
+ * Unlike nir_lower_io, this pass is fully capable of handling incomplete
+ * pointer chains which may contain cast derefs.  It does so by walking the
+ * deref chain backwards and simply replacing each deref, one at a time, with
+ * the appropriate address calculation.  The pass takes a nir_address_format
+ * parameter which describes how the offset or address is to be represented
+ * during calculations.  By ensuring that the address is always in a
+ * consistent format, pointers can safely be conjured from thin air by the
+ * driver, stored to variables, passed through phis, etc.
+ *
+ * The one exception to the simple algorithm described above is for handling
+ * row-major matrices in which case we may look down one additional level of
+ * the deref chain.
+ */
 bool
 nir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes,
                       nir_address_format addr_format)
@@ -1393,7 +1464,18 @@ lower_vars_to_explicit(nir_shader *shader,
                        glsl_type_size_align_func type_info)
 {
    bool progress = false;
-   unsigned offset = 0;
+   unsigned offset;
+   switch (mode) {
+   case nir_var_function_temp:
+   case nir_var_shader_temp:
+      offset = shader->scratch_size;
+      break;
+   case nir_var_mem_shared:
+      offset = 0;
+      break;
+   default:
+      unreachable("Unsupported mode");
+   }
    nir_foreach_variable(var, vars) {
       unsigned size, align;
       const struct glsl_type *explicit_type =
@@ -1408,9 +1490,17 @@ lower_vars_to_explicit(nir_shader *shader,
       offset = var->data.driver_location + size;
    }
 
-   if (mode == nir_var_mem_shared) {
+   switch (mode) {
+   case nir_var_shader_temp:
+   case nir_var_function_temp:
+      shader->scratch_size = offset;
+      break;
+   case nir_var_mem_shared:
       shader->info.cs.shared_size = offset;
       shader->num_shared = offset;
+      break;
+   default:
+      unreachable("Unsupported mode");
    }
 
    return progress;