nir/lower_explicit_io: add nir_var_mem_shared support
authorRhys Perry <pendingchaos02@gmail.com>
Tue, 9 Apr 2019 16:28:13 +0000 (17:28 +0100)
committerJason Ekstrand <jason@jlekstrand.net>
Thu, 8 Aug 2019 17:10:39 +0000 (12:10 -0500)
v2: require nir_address_format_32bit_offset instead
v3: don't call nir_intrinsic_set_access() for shared atomics

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
src/compiler/nir/nir_lower_io.c

index 36877fc0346656bad24ec827165f9d6f3ae5650d..f8bcf4420e052967ac759e12065d65b0a1fbbbcc 100644 (file)
@@ -93,6 +93,31 @@ global_atomic_for_deref(nir_intrinsic_op deref_op)
    }
 }
 
+static nir_intrinsic_op
+shared_atomic_for_deref(nir_intrinsic_op deref_op)
+{
+   switch (deref_op) {
+#define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_shared_##O;
+   OP(atomic_exchange)
+   OP(atomic_comp_swap)
+   OP(atomic_add)
+   OP(atomic_imin)
+   OP(atomic_umin)
+   OP(atomic_imax)
+   OP(atomic_umax)
+   OP(atomic_and)
+   OP(atomic_or)
+   OP(atomic_xor)
+   OP(atomic_fadd)
+   OP(atomic_fmin)
+   OP(atomic_fmax)
+   OP(atomic_fcomp_swap)
+#undef OP
+   default:
+      unreachable("Invalid shared atomic");
+   }
+}
+
 void
 nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
                          int (*type_size)(const struct glsl_type *, bool))
@@ -427,27 +452,7 @@ lower_atomic(nir_intrinsic_instr *intrin, struct lower_io_state *state,
    nir_builder *b = &state->builder;
    assert(var->data.mode == nir_var_mem_shared);
 
-   nir_intrinsic_op op;
-   switch (intrin->intrinsic) {
-#define OP(O) case nir_intrinsic_deref_##O: op = nir_intrinsic_shared_##O; break;
-   OP(atomic_exchange)
-   OP(atomic_comp_swap)
-   OP(atomic_add)
-   OP(atomic_imin)
-   OP(atomic_umin)
-   OP(atomic_imax)
-   OP(atomic_umax)
-   OP(atomic_and)
-   OP(atomic_or)
-   OP(atomic_xor)
-   OP(atomic_fadd)
-   OP(atomic_fmin)
-   OP(atomic_fmax)
-   OP(atomic_fcomp_swap)
-#undef OP
-   default:
-      unreachable("Invalid atomic");
-   }
+   nir_intrinsic_op op = shared_atomic_for_deref(intrin->intrinsic);
 
    nir_intrinsic_instr *atomic =
       nir_intrinsic_instr_create(state->builder.shader, op);
@@ -849,6 +854,10 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
       assert(addr_format_is_global(addr_format));
       op = nir_intrinsic_load_kernel_input;
       break;
+   case nir_var_mem_shared:
+      assert(addr_format == nir_address_format_32bit_offset);
+      op = nir_intrinsic_load_shared;
+      break;
    default:
       unreachable("Unsupported explicit IO variable mode");
    }
@@ -857,12 +866,15 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
 
    if (addr_format_is_global(addr_format)) {
       load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
+   } else if (addr_format == nir_address_format_32bit_offset) {
+      assert(addr->num_components == 1);
+      load->src[0] = nir_src_for_ssa(addr);
    } else {
       load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
       load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
    }
 
-   if (mode != nir_var_mem_ubo && mode != nir_var_shader_in)
+   if (mode != nir_var_mem_ubo && mode != nir_var_shader_in && mode != nir_var_mem_shared)
       nir_intrinsic_set_access(load, nir_intrinsic_access(intrin));
 
    /* TODO: We should try and provide a better alignment.  For OpenCL, we need
@@ -919,6 +931,10 @@ build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
       assert(addr_format_is_global(addr_format));
       op = nir_intrinsic_store_global;
       break;
+   case nir_var_mem_shared:
+      assert(addr_format == nir_address_format_32bit_offset);
+      op = nir_intrinsic_store_shared;
+      break;
    default:
       unreachable("Unsupported explicit IO variable mode");
    }
@@ -928,6 +944,9 @@ build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
    store->src[0] = nir_src_for_ssa(value);
    if (addr_format_is_global(addr_format)) {
       store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
+   } else if (addr_format == nir_address_format_32bit_offset) {
+      assert(addr->num_components == 1);
+      store->src[1] = nir_src_for_ssa(addr);
    } else {
       store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
       store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
@@ -935,7 +954,8 @@ build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
 
    nir_intrinsic_set_write_mask(store, write_mask);
 
-   nir_intrinsic_set_access(store, nir_intrinsic_access(intrin));
+   if (mode != nir_var_mem_shared)
+      nir_intrinsic_set_access(store, nir_intrinsic_access(intrin));
 
    /* TODO: We should try and provide a better alignment.  For OpenCL, we need
     * to plumb the alignment through from SPIR-V when we have one.
@@ -980,6 +1000,10 @@ build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
       assert(addr_format_is_global(addr_format));
       op = global_atomic_for_deref(intrin->intrinsic);
       break;
+   case nir_var_mem_shared:
+      assert(addr_format == nir_address_format_32bit_offset);
+      op = shared_atomic_for_deref(intrin->intrinsic);
+      break;
    default:
       unreachable("Unsupported explicit IO variable mode");
    }
@@ -989,6 +1013,9 @@ build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
    unsigned src = 0;
    if (addr_format_is_global(addr_format)) {
       atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
+   } else if (addr_format == nir_address_format_32bit_offset) {
+      assert(addr->num_components == 1);
+      atomic->src[src++] = nir_src_for_ssa(addr);
    } else {
       atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
       atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
@@ -1000,7 +1027,7 @@ build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
    /* Global atomics don't have access flags because they assume that the
     * address may be non-uniform.
     */
-   if (!addr_format_is_global(addr_format))
+   if (!addr_format_is_global(addr_format) && mode != nir_var_mem_shared)
       nir_intrinsic_set_access(atomic, nir_intrinsic_access(intrin));
 
    assert(intrin->dest.ssa.num_components == 1);
@@ -1032,7 +1059,7 @@ nir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref,
    assert(deref->dest.is_ssa);
    switch (deref->deref_type) {
    case nir_deref_type_var:
-      assert(deref->mode == nir_var_shader_in);
+      assert(deref->mode & (nir_var_shader_in | nir_var_mem_shared));
       return nir_imm_intN_t(b, deref->var->data.driver_location,
                             deref->dest.ssa.bit_size);