}
}
+static bool
+r600_lower_shared_io_impl(nir_function *func)
+{
+ nir_builder b;
+ nir_builder_init(&b, func->impl);
+
+ bool progress = false;
+ nir_foreach_block(block, func->impl) {
+ nir_foreach_instr_safe(instr, block) {
+
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
+ if (op->intrinsic != nir_intrinsic_load_shared &&
+ op->intrinsic != nir_intrinsic_store_shared)
+ continue;
+
+ b.cursor = nir_before_instr(instr);
+
+ if (op->intrinsic == nir_intrinsic_load_shared) {
+ nir_ssa_def *addr = op->src[0].ssa;
+
+ switch (nir_dest_num_components(op->dest)) {
+ case 2: {
+ auto addr2 = nir_iadd_imm(&b, addr, 4);
+ addr = nir_vec2(&b, addr, addr2);
+ break;
+ }
+ case 3: {
+ auto addr2 = nir_iadd(&b, addr, nir_imm_ivec2(&b, 4, 8));
+ addr = nir_vec3(&b, addr,
+ nir_channel(&b, addr2, 0),
+ nir_channel(&b, addr2, 1));
+ break;
+ }
+ case 4: {
+ addr = nir_iadd(&b, addr, nir_imm_ivec4(&b, 0, 4, 8, 12));
+ break;
+ }
+ }
+
+ auto load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_local_shared_r600);
+ load->num_components = nir_dest_num_components(op->dest);
+ load->src[0] = nir_src_for_ssa(addr);
+ nir_ssa_dest_init(&load->instr, &load->dest,
+ load->num_components, 32, NULL);
+ nir_ssa_def_rewrite_uses(&op->dest.ssa, nir_src_for_ssa(&load->dest.ssa));
+ nir_builder_instr_insert(&b, &load->instr);
+ } else {
+ nir_ssa_def *addr = op->src[1].ssa;
+ for (int i = 0; i < 2; ++i) {
+ unsigned test_mask = (0x3 << 2 * i);
+ if (!(nir_intrinsic_write_mask(op) & test_mask))
+ continue;
+
+ auto store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_local_shared_r600);
+ unsigned writemask = nir_intrinsic_write_mask(op) & test_mask;
+ nir_intrinsic_set_write_mask(store, writemask);
+ store->src[0] = nir_src_for_ssa(op->src[0].ssa);
+ store->num_components = store->src[0].ssa->num_components;
+ bool start_even = (writemask & (1u << (2 * i)));
+
+ auto addr2 = nir_iadd(&b, addr, nir_imm_int(&b, 8 * i + (start_even ? 0 : 4)));
+ store->src[1] = nir_src_for_ssa(addr2);
+
+ nir_builder_instr_insert(&b, &store->instr);
+ }
+ }
+ nir_instr_remove(instr);
+ progress = true;
+ }
+ }
+ return progress;
+}
+
+static bool
+r600_lower_shared_io(nir_shader *nir)
+{
+ bool progress=false;
+ nir_foreach_function(function, nir) {
+ if (function->impl &&
+ r600_lower_shared_io_impl(function))
+ progress = true;
+ }
+ return progress;
+}
+
static bool
optimize_once(nir_shader *shader)
{
NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa);
NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar);
+ NIR_PASS_V(sel->nir, r600_lower_shared_io);
+
static const struct nir_lower_tex_options lower_tex_options = {
.lower_txp = ~0u,
};