r600/sfn: Add lowering pass for shared IO
authorGert Wollny <gert.wollny@collabora.com>
Mon, 18 May 2020 18:36:16 +0000 (20:36 +0200)
committerMarge Bot <eric+marge@anholt.net>
Mon, 22 Jun 2020 12:01:17 +0000 (12:01 +0000)
Lower shared load and store to use the r600 specific intrinsics.

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5575>

src/gallium/drivers/r600/sfn/sfn_nir.cpp

index 8ae92fd09ebe8805930cb589b8cb6c5f1e53f2c4..0a4246fb0675065e9f4d291ba67e7cc86d54759a 100644 (file)
@@ -512,6 +512,94 @@ r600_get_natural_size_align_bytes(const struct glsl_type *type,
    }
 }
 
+static bool
+r600_lower_shared_io_impl(nir_function *func)
+{
+   nir_builder b;
+   nir_builder_init(&b, func->impl);
+
+   bool progress = false;
+   nir_foreach_block(block, func->impl) {
+      nir_foreach_instr_safe(instr, block) {
+
+         if (instr->type != nir_instr_type_intrinsic)
+            continue;
+
+         nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
+         if (op->intrinsic != nir_intrinsic_load_shared &&
+             op->intrinsic != nir_intrinsic_store_shared)
+            continue;
+
+         b.cursor = nir_before_instr(instr);
+
+         if (op->intrinsic == nir_intrinsic_load_shared) {
+            nir_ssa_def *addr = op->src[0].ssa;
+
+            switch (nir_dest_num_components(op->dest)) {
+            case 2: {
+               auto addr2 = nir_iadd_imm(&b, addr, 4);
+               addr = nir_vec2(&b, addr, addr2);
+               break;
+            }
+            case 3: {
+               auto addr2 = nir_iadd(&b, addr, nir_imm_ivec2(&b, 4, 8));
+               addr = nir_vec3(&b, addr,
+                               nir_channel(&b, addr2, 0),
+                               nir_channel(&b, addr2, 1));
+               break;
+            }
+            case 4: {
+               addr = nir_iadd(&b, addr, nir_imm_ivec4(&b, 0, 4, 8, 12));
+               break;
+            }
+            }
+
+            auto load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_local_shared_r600);
+            load->num_components = nir_dest_num_components(op->dest);
+            load->src[0] = nir_src_for_ssa(addr);
+            nir_ssa_dest_init(&load->instr, &load->dest,
+                              load->num_components, 32, NULL);
+            nir_ssa_def_rewrite_uses(&op->dest.ssa, nir_src_for_ssa(&load->dest.ssa));
+            nir_builder_instr_insert(&b, &load->instr);
+         } else {
+            nir_ssa_def *addr = op->src[1].ssa;
+            for (int i = 0; i < 2; ++i) {
+               unsigned test_mask = (0x3 << 2 * i);
+               if (!(nir_intrinsic_write_mask(op) & test_mask))
+                  continue;
+
+               auto store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_local_shared_r600);
+               unsigned writemask = nir_intrinsic_write_mask(op) & test_mask;
+               nir_intrinsic_set_write_mask(store, writemask);
+               store->src[0] = nir_src_for_ssa(op->src[0].ssa);
+               store->num_components = store->src[0].ssa->num_components;
+               bool start_even = (writemask & (1u << (2 * i)));
+
+               auto addr2 = nir_iadd(&b, addr, nir_imm_int(&b, 8 * i + (start_even ? 0 : 4)));
+               store->src[1] = nir_src_for_ssa(addr2);
+
+               nir_builder_instr_insert(&b, &store->instr);
+            }
+         }
+         nir_instr_remove(instr);
+         progress = true;
+      }
+   }
+   return progress;
+}
+
+static bool
+r600_lower_shared_io(nir_shader *nir)
+{
+       bool progress=false;
+       nir_foreach_function(function, nir) {
+               if (function->impl &&
+                        r600_lower_shared_io_impl(function))
+                       progress = true;
+       }
+       return progress;
+}
+
 static bool
 optimize_once(nir_shader *shader)
 {
@@ -575,6 +663,8 @@ int r600_shader_from_nir(struct r600_context *rctx,
    NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa);
    NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar);
 
+   NIR_PASS_V(sel->nir, r600_lower_shared_io);
+
    static const struct nir_lower_tex_options lower_tex_options = {
       .lower_txp = ~0u,
    };